diff options
author | Arthur Heymans <arthur@aheymans.xyz> | 2018-01-25 20:03:42 +0100 |
---|---|---|
committer | Arthur Heymans <arthur@aheymans.xyz> | 2018-04-11 09:56:59 +0000 |
commit | 5fbe788bae15f0d24d56011e8eb8b48c107b7b05 (patch) | |
tree | 9a71d82fcb75915ee5dec6f70834836f4200cdbe | |
parent | e9eb14079c22f00342f8f884791e24d82980c2b4 (diff) |
model_206ax: Use parallel MP init
This patch adds a few southbridge calls needed for parallel MP init.
Moves the smm_relocate() function to smm/gen1/smi.h, since that is
where this function is defined now.
Tested on Thinkpad X220, shaves of ~30ms on a 2 core, 4 threads CPU.
Change-Id: Iacd7bfedfccbc09057e1b7ca3bd03d44a888871d
Signed-off-by: Arthur Heymans <arthur@aheymans.xyz>
Reviewed-on: https://review.coreboot.org/23432
Reviewed-by: Aaron Durbin <adurbin@chromium.org>
Tested-by: build bot (Jenkins) <no-reply@coreboot.org>
-rw-r--r-- | src/cpu/intel/fsp_model_206ax/model_206ax.h | 1 | ||||
-rw-r--r-- | src/cpu/intel/model_2065x/model_2065x.h | 1 | ||||
-rw-r--r-- | src/cpu/intel/model_206ax/Kconfig | 1 | ||||
-rw-r--r-- | src/cpu/intel/model_206ax/model_206ax.h | 1 | ||||
-rw-r--r-- | src/cpu/intel/model_206ax/model_206ax_init.c | 161 | ||||
-rw-r--r-- | src/cpu/intel/smm/gen1/smi.h | 11 | ||||
-rw-r--r-- | src/cpu/intel/smm/gen1/smmrelocate.c | 77 | ||||
-rw-r--r-- | src/northbridge/intel/sandybridge/northbridge.c | 2 | ||||
-rw-r--r-- | src/southbridge/intel/bd82x6x/elog.c | 2 | ||||
-rw-r--r-- | src/southbridge/intel/bd82x6x/pch.h | 3 | ||||
-rw-r--r-- | src/southbridge/intel/common/pmutil.h | 1 | ||||
-rw-r--r-- | src/southbridge/intel/common/smi.c | 26 |
12 files changed, 194 insertions, 93 deletions
diff --git a/src/cpu/intel/fsp_model_206ax/model_206ax.h b/src/cpu/intel/fsp_model_206ax/model_206ax.h index e65b370914..3d6166d8e0 100644 --- a/src/cpu/intel/fsp_model_206ax/model_206ax.h +++ b/src/cpu/intel/fsp_model_206ax/model_206ax.h @@ -102,7 +102,6 @@ void intel_model_206ax_finalize_smm(void); /* Configure power limits for turbo mode */ void set_power_limits(u8 power_limit_1_time); int cpu_config_tdp_levels(void); -void smm_relocate(void); #endif #endif diff --git a/src/cpu/intel/model_2065x/model_2065x.h b/src/cpu/intel/model_2065x/model_2065x.h index 8bb3b877e8..861414dcb7 100644 --- a/src/cpu/intel/model_2065x/model_2065x.h +++ b/src/cpu/intel/model_2065x/model_2065x.h @@ -91,7 +91,6 @@ void intel_model_2065x_finalize_smm(void); /* Configure power limits for turbo mode */ void set_power_limits(u8 power_limit_1_time); int cpu_config_tdp_levels(void); -void smm_relocate(void); #endif #endif diff --git a/src/cpu/intel/model_206ax/Kconfig b/src/cpu/intel/model_206ax/Kconfig index b30cfa10c2..cb09d23750 100644 --- a/src/cpu/intel/model_206ax/Kconfig +++ b/src/cpu/intel/model_206ax/Kconfig @@ -23,6 +23,7 @@ config CPU_SPECIFIC_OPTIONS select TSC_SYNC_MFENCE select CPU_INTEL_COMMON select CACHE_RELOCATED_RAMSTAGE_OUTSIDE_CBMEM + select PARALLEL_MP config BOOTBLOCK_CPU_INIT string diff --git a/src/cpu/intel/model_206ax/model_206ax.h b/src/cpu/intel/model_206ax/model_206ax.h index 962b8302eb..e5bc39eab4 100644 --- a/src/cpu/intel/model_206ax/model_206ax.h +++ b/src/cpu/intel/model_206ax/model_206ax.h @@ -121,7 +121,6 @@ void intel_model_206ax_finalize_smm(void); /* Configure power limits for turbo mode */ void set_power_limits(u8 power_limit_1_time); int cpu_config_tdp_levels(void); -void smm_relocate(void); #endif #endif diff --git a/src/cpu/intel/model_206ax/model_206ax_init.c b/src/cpu/intel/model_206ax/model_206ax_init.c index 5c60ed77b5..2b8e55bd77 100644 --- a/src/cpu/intel/model_206ax/model_206ax_init.c +++ b/src/cpu/intel/model_206ax/model_206ax_init.c @@ -15,6 +15,7 @@ * GNU General Public License for more details. */ +#include <assert.h> #include <console/console.h> #include <device/device.h> #include <string.h> @@ -23,6 +24,7 @@ #include <cpu/x86/mtrr.h> #include <cpu/x86/msr.h> #include <cpu/x86/lapic.h> +#include <cpu/x86/mp.h> #include <cpu/intel/microcode.h> #include <cpu/intel/speedstep.h> #include <cpu/intel/turbo.h> @@ -421,83 +423,6 @@ static void configure_mca(void) wrmsr(IA32_MC0_STATUS + (i * 4), msr); } -int cpu_get_apic_id_map(int *apic_id_map) -{ - struct cpuid_result result; - unsigned int threads_per_package, threads_per_core, i, shift = 0; - - /* Logical processors (threads) per core */ - result = cpuid_ext(0xb, 0); - threads_per_core = result.ebx & 0xffff; - - /* Logical processors (threads) per package */ - result = cpuid_ext(0xb, 1); - threads_per_package = result.ebx & 0xffff; - - if (threads_per_core == 1) - shift++; - - for (i = 0; i < threads_per_package && i < CONFIG_MAX_CPUS; i++) - apic_id_map[i] = i << shift; - - return threads_per_package; -} - -/* - * Initialize any extra cores/threads in this package. - */ -static void intel_cores_init(struct device *cpu) -{ - struct cpuid_result result; - unsigned int threads_per_package, threads_per_core, i; - - /* Logical processors (threads) per core */ - result = cpuid_ext(0xb, 0); - threads_per_core = result.ebx & 0xffff; - - /* Logical processors (threads) per package */ - result = cpuid_ext(0xb, 1); - threads_per_package = result.ebx & 0xffff; - - /* Only initialize extra cores from BSP */ - if (cpu->path.apic.apic_id) - return; - - printk(BIOS_DEBUG, "CPU: %u has %u cores, %u threads per core\n", - cpu->path.apic.apic_id, threads_per_package/threads_per_core, - threads_per_core); - - for (i = 1; i < threads_per_package; ++i) { - struct device_path cpu_path; - struct device *new; - - /* Build the CPU device path */ - cpu_path.type = DEVICE_PATH_APIC; - cpu_path.apic.apic_id = - cpu->path.apic.apic_id + i; - - /* Update APIC ID if no hyperthreading */ - if (threads_per_core == 1) - cpu_path.apic.apic_id <<= 1; - - /* Allocate the new CPU device structure */ - new = alloc_dev(cpu->bus, &cpu_path); - if (!new) - continue; - - printk(BIOS_DEBUG, "CPU: %u has core %u\n", - cpu->path.apic.apic_id, - new->path.apic.apic_id); - - /* Start the new CPU */ - if (is_smp_boot() && !start_cpu(new)) { - /* Record the error in cpu? */ - printk(BIOS_ERR, "CPU %u would not start!\n", - new->path.apic.apic_id); - } - } -} - static void model_206ax_init(struct device *cpu) { char processor_name[49]; @@ -505,8 +430,6 @@ static void model_206ax_init(struct device *cpu) /* Turn on caching if we haven't already */ x86_enable_cache(); - intel_update_microcode_from_cbfs(); - /* Clear out pending MCEs */ configure_mca(); @@ -514,10 +437,6 @@ static void model_206ax_init(struct device *cpu) fill_processor_name(processor_name); printk(BIOS_INFO, "CPU: %s.\n", processor_name); - /* Setup MTRRs based on physical address size */ - x86_setup_mtrrs_with_detect(); - x86_mtrr_check(); - /* Setup Page Attribute Tables (PAT) */ // TODO set up PAT @@ -548,9 +467,81 @@ static void model_206ax_init(struct device *cpu) /* Enable Turbo */ enable_turbo(); +} + +/* MP initialization support. */ +static const void *microcode_patch; + +static void pre_mp_init(void) +{ + /* Setup MTRRs based on physical address size. */ + x86_setup_mtrrs_with_detect(); + x86_mtrr_check(); +} + +static int get_cpu_count(void) +{ + struct cpuid_result result; + unsigned int threads_per_package, threads_per_core; + + /* Logical processors (threads) per core */ + result = cpuid_ext(0xb, 0); + threads_per_core = result.ebx & 0xffff; + + ASSERT(threads_per_core != 0); + + /* Logical processors (threads) per package */ + result = cpuid_ext(0xb, 1); + threads_per_package = result.ebx & 0xffff; - /* Start up extra cores */ - intel_cores_init(cpu); + printk(BIOS_DEBUG, "CPU has %u cores, %u threads enabled.\n", + threads_per_package / threads_per_core, threads_per_package); + + return threads_per_package; +} + +static void get_microcode_info(const void **microcode, int *parallel) +{ + microcode_patch = intel_microcode_find(); + *microcode = microcode_patch; + *parallel = 1; +} + +static void per_cpu_smm_trigger(void) +{ + /* Relocate the SMM handler. */ + smm_relocate(); + + /* After SMM relocation a 2nd microcode load is required. */ + intel_microcode_load_unlocked(microcode_patch); +} + +static void post_mp_init(void) +{ + /* Now that all APs have been relocated as well as the BSP let SMIs + * start flowing. */ + southbridge_smm_init(); + + /* Lock down the SMRAM space. */ + smm_lock(); +} + + +static const struct mp_ops mp_ops = { + .pre_mp_init = pre_mp_init, + .get_cpu_count = get_cpu_count, + .get_smm_info = smm_info, + .get_microcode_info = get_microcode_info, + .pre_mp_smm_init = smm_initialize, + .per_cpu_smm_trigger = per_cpu_smm_trigger, + .relocation_handler = smm_relocation_handler, + .post_mp_init = post_mp_init, +}; + +void bsp_init_and_start_aps(struct bus *cpu_bus) +{ + if (mp_init_with_smm(cpu_bus, &mp_ops)) + printk(BIOS_ERR, "MP initialization failure.\n"); } static struct device_operations cpu_dev_ops = { diff --git a/src/cpu/intel/smm/gen1/smi.h b/src/cpu/intel/smm/gen1/smi.h index c328eae91a..a63e731a51 100644 --- a/src/cpu/intel/smm/gen1/smi.h +++ b/src/cpu/intel/smm/gen1/smi.h @@ -11,6 +11,10 @@ * GNU General Public License for more details. */ +#include <device/device.h> + +void bsp_init_and_start_aps(struct bus *cpu_bus); + /* These helpers are for performing SMM relocation. */ void southbridge_smm_init(void); void southbridge_trigger_smi(void); @@ -18,3 +22,10 @@ void southbridge_clear_smi_status(void); u32 northbridge_get_tseg_base(void); int cpu_get_apic_id_map(int *apic_id_map); void northbridge_write_smram(u8 smram); +void smm_info(uintptr_t *perm_smbase, size_t *perm_smsize, + size_t *smm_save_state_size); +void smm_initialize(void); +void southbridge_smm_clear_state(void); +void smm_relocation_handler(int cpu, uintptr_t curr_smbase, + uintptr_t staggered_smbase); +void smm_relocate(void); diff --git a/src/cpu/intel/smm/gen1/smmrelocate.c b/src/cpu/intel/smm/gen1/smmrelocate.c index e80fa31489..4fccd2d9e6 100644 --- a/src/cpu/intel/smm/gen1/smmrelocate.c +++ b/src/cpu/intel/smm/gen1/smmrelocate.c @@ -23,10 +23,12 @@ #include <device/pci.h> #include <cpu/cpu.h> #include <cpu/x86/cache.h> +#include <cpu/x86/mp.h> #include <cpu/x86/msr.h> #include <cpu/x86/mtrr.h> #include <cpu/x86/smm.h> #include <console/console.h> +#include <smp/node.h> #include "smi.h" #define SMRR_SUPPORTED (1 << 11) @@ -292,3 +294,78 @@ void smm_lock(void) northbridge_write_smram(D_LCK | G_SMRAME | C_BASE_SEG); } + +void smm_info(uintptr_t *perm_smbase, size_t *perm_smsize, + size_t *smm_save_state_size) +{ + printk(BIOS_DEBUG, "Setting up SMI for CPU\n"); + + fill_in_relocation_params(&smm_reloc_params); + + setup_ied_area(&smm_reloc_params); + + *perm_smbase = smm_reloc_params.smram_base; + *perm_smsize = smm_reloc_params.smram_size; + *smm_save_state_size = sizeof(em64t101_smm_state_save_area_t); +} + +void smm_initialize(void) +{ + /* Clear the SMM state in the southbridge. */ + southbridge_smm_clear_state(); + + /* + * Run the relocation handler for on the BSP to check and set up + * parallel SMM relocation. + */ + smm_initiate_relocation(); +} + +/* The relocation work is actually performed in SMM context, but the code + * resides in the ramstage module. This occurs by trampolining from the default + * SMRAM entry point to here. */ +void smm_relocation_handler(int cpu, uintptr_t curr_smbase, + uintptr_t staggered_smbase) +{ + msr_t mtrr_cap; + struct smm_relocation_params *relo_params = &smm_reloc_params; + em64t101_smm_state_save_area_t *save_state; + u32 smbase = staggered_smbase; + u32 iedbase = relo_params->ied_base; + + printk(BIOS_DEBUG, "In relocation handler: cpu %d\n", cpu); + + /* Make appropriate changes to the save state map. */ + printk(BIOS_DEBUG, "New SMBASE=0x%08x IEDBASE=0x%08x\n", + smbase, iedbase); + + save_state = (void *)(curr_smbase + SMM_DEFAULT_SIZE - + sizeof(*save_state)); + save_state->smbase = smbase; + save_state->iedbase = iedbase; + + + /* Write EMRR and SMRR MSRs based on indicated support. */ + mtrr_cap = rdmsr(MTRR_CAP_MSR); + if (mtrr_cap.lo & SMRR_SUPPORTED && !IS_ENABLED(CONFIG_HAS_NO_SMRR)) + write_smrr(relo_params); +} + +/* + * The default SMM entry can happen in parallel or serially. If the + * default SMM entry is done in parallel the BSP has already setup + * the saving state to each CPU's MSRs. At least one save state size + * is required for the initial SMM entry for the BSP to determine if + * parallel SMM relocation is even feasible. + */ +void smm_relocate(void) +{ + /* + * If smm_save_state_in_msrs is non-zero then parallel SMM relocation + * shall take place. Run the relocation handler a second time on the + * BSP to do * the final move. For APs, a relocation handler always + * needs to be run. + */ + if (!boot_cpu()) + smm_initiate_relocation(); +} diff --git a/src/northbridge/intel/sandybridge/northbridge.c b/src/northbridge/intel/sandybridge/northbridge.c index 9fed17ecee..2258067596 100644 --- a/src/northbridge/intel/sandybridge/northbridge.c +++ b/src/northbridge/intel/sandybridge/northbridge.c @@ -525,7 +525,7 @@ static const struct pci_driver mc_driver_158 __pci_driver = { static void cpu_bus_init(device_t dev) { - initialize_cpus(dev->link_list); + bsp_init_and_start_aps(dev->link_list); } static struct device_operations cpu_bus_ops = { diff --git a/src/southbridge/intel/bd82x6x/elog.c b/src/southbridge/intel/bd82x6x/elog.c index 814ff80598..96098facb1 100644 --- a/src/southbridge/intel/bd82x6x/elog.c +++ b/src/southbridge/intel/bd82x6x/elog.c @@ -22,7 +22,7 @@ #include <stdint.h> #include <string.h> #include <elog.h> -#include "pch.h" +#include <southbridge/intel/common/pmutils.h> void pch_log_state(void) { diff --git a/src/southbridge/intel/bd82x6x/pch.h b/src/southbridge/intel/bd82x6x/pch.h index ebcb058c0b..9e87ff6b72 100644 --- a/src/southbridge/intel/bd82x6x/pch.h +++ b/src/southbridge/intel/bd82x6x/pch.h @@ -68,9 +68,6 @@ int pch_silicon_revision(void); int pch_silicon_type(void); int pch_silicon_supported(int type, int rev); void pch_iobp_update(u32 address, u32 andvalue, u32 orvalue); -#if IS_ENABLED(CONFIG_ELOG) -void pch_log_state(void); -#endif #else /* __PRE_RAM__ */ void enable_smbus(void); void enable_usb_bar(void); diff --git a/src/southbridge/intel/common/pmutil.h b/src/southbridge/intel/common/pmutil.h index e2b6e5828d..4aa2812ff1 100644 --- a/src/southbridge/intel/common/pmutil.h +++ b/src/southbridge/intel/common/pmutil.h @@ -123,5 +123,6 @@ void southbridge_update_gnvs(u8 apm_cnt, int *smm_done); void southbridge_finalize_all(void); void southbridge_smi_monitor(void); em64t101_smm_state_save_area_t *smi_apmc_find_state_save(u8 cmd); +void pch_log_state(void); #endif /*INTEL_COMMON_PMUTIL_H */ diff --git a/src/southbridge/intel/common/smi.c b/src/southbridge/intel/common/smi.c index deaecb2625..264b5487d8 100644 --- a/src/southbridge/intel/common/smi.c +++ b/src/southbridge/intel/common/smi.c @@ -159,3 +159,29 @@ void smm_setup_structures(void *gnvs, void *tcg, void *smi1) "d" (APM_CNT) ); } + +void southbridge_smm_clear_state(void) +{ + u32 smi_en; + + if (IS_ENABLED(CONFIG_ELOG)) + /* Log events from chipset before clearing */ + pch_log_state(); + + printk(BIOS_DEBUG, "Initializing Southbridge SMI..."); + printk(BIOS_SPEW, " ... pmbase = 0x%04x\n", get_pmbase()); + + smi_en = inl(get_pmbase() + SMI_EN); + if (smi_en & APMC_EN) { + printk(BIOS_INFO, "SMI# handler already enabled?\n"); + return; + } + + printk(BIOS_DEBUG, "\n"); + + /* Dump and clear status registers */ + reset_smi_status(); + reset_pm1_status(); + reset_tco_status(); + reset_gpe0_status(); +} |