summaryrefslogtreecommitdiff
path: root/src/cpu
diff options
context:
space:
mode:
authorArthur Heymans <arthur@aheymans.xyz>2018-01-25 20:03:42 +0100
committerArthur Heymans <arthur@aheymans.xyz>2019-01-22 12:16:18 +0000
commitedbf5d913819726a09cd56bace2d13e74b560ab1 (patch)
treecd33c998831b53e838e255565aba3b97fdbc8a69 /src/cpu
parentd30894b835eb466e6e46c64317edf96e5554b138 (diff)
cpu/intel/model_206ax: Use parallel MP init
This patch adds a few southbridge calls needed for parallel MP init. Moves the smm_relocate() function to smm/gen1/smi.h, since that is where this function is defined now. Tested on Thinkpad X220, shaves off ~30ms on a 2 core, 4 threads CPU. Change-Id: Ia1d547ed4a3cb6746a0222c3e54e94e5848b0dd7 Signed-off-by: Arthur Heymans <arthur@aheymans.xyz> Reviewed-on: https://review.coreboot.org/c/25618 Reviewed-by: Angel Pons <th3fanbus@gmail.com> Tested-by: build bot (Jenkins) <no-reply@coreboot.org>
Diffstat (limited to 'src/cpu')
-rw-r--r--src/cpu/intel/model_2065x/model_2065x.h1
-rw-r--r--src/cpu/intel/model_206ax/Kconfig1
-rw-r--r--src/cpu/intel/model_206ax/model_206ax.h2
-rw-r--r--src/cpu/intel/model_206ax/model_206ax_init.c155
-rw-r--r--src/cpu/intel/smm/gen1/smi.h13
-rw-r--r--src/cpu/intel/smm/gen1/smmrelocate.c81
6 files changed, 166 insertions, 87 deletions
diff --git a/src/cpu/intel/model_2065x/model_2065x.h b/src/cpu/intel/model_2065x/model_2065x.h
index 11d86cdf89..1e6a311910 100644
--- a/src/cpu/intel/model_2065x/model_2065x.h
+++ b/src/cpu/intel/model_2065x/model_2065x.h
@@ -77,7 +77,6 @@ void intel_model_2065x_finalize_smm(void);
/* Configure power limits for turbo mode */
void set_power_limits(u8 power_limit_1_time);
int cpu_config_tdp_levels(void);
-void smm_relocate(void);
#endif
#endif
diff --git a/src/cpu/intel/model_206ax/Kconfig b/src/cpu/intel/model_206ax/Kconfig
index b30cfa10c2..cb09d23750 100644
--- a/src/cpu/intel/model_206ax/Kconfig
+++ b/src/cpu/intel/model_206ax/Kconfig
@@ -23,6 +23,7 @@ config CPU_SPECIFIC_OPTIONS
select TSC_SYNC_MFENCE
select CPU_INTEL_COMMON
select CACHE_RELOCATED_RAMSTAGE_OUTSIDE_CBMEM
+ select PARALLEL_MP
config BOOTBLOCK_CPU_INIT
string
diff --git a/src/cpu/intel/model_206ax/model_206ax.h b/src/cpu/intel/model_206ax/model_206ax.h
index 2bf9d32e46..f4c5d9323a 100644
--- a/src/cpu/intel/model_206ax/model_206ax.h
+++ b/src/cpu/intel/model_206ax/model_206ax.h
@@ -22,6 +22,7 @@
/* SandyBridge/IvyBridge bus clock is fixed at 100MHz */
#define SANDYBRIDGE_BCLK 100
+#define CORE_THREAD_COUNT_MSR 0x35
#define MSR_FEATURE_CONFIG 0x13c
#define MSR_FLEX_RATIO 0x194
#define FLEX_RATIO_LOCK (1 << 20)
@@ -109,7 +110,6 @@ void intel_model_206ax_finalize_smm(void);
/* Configure power limits for turbo mode */
void set_power_limits(u8 power_limit_1_time);
int cpu_config_tdp_levels(void);
-void smm_relocate(void);
#endif
int get_platform_id(void);
diff --git a/src/cpu/intel/model_206ax/model_206ax_init.c b/src/cpu/intel/model_206ax/model_206ax_init.c
index 58aabdb198..33ad1e79ae 100644
--- a/src/cpu/intel/model_206ax/model_206ax_init.c
+++ b/src/cpu/intel/model_206ax/model_206ax_init.c
@@ -15,6 +15,7 @@
* GNU General Public License for more details.
*/
+#include <assert.h>
#include <console/console.h>
#include <device/device.h>
#include <string.h>
@@ -24,6 +25,7 @@
#include <cpu/x86/mtrr.h>
#include <cpu/x86/msr.h>
#include <cpu/x86/lapic.h>
+#include <cpu/x86/mp.h>
#include <cpu/intel/microcode.h>
#include <cpu/intel/speedstep.h>
#include <cpu/intel/turbo.h>
@@ -426,83 +428,6 @@ static void configure_mca(void)
wrmsr(IA32_MC0_STATUS + (i * 4), msr);
}
-int cpu_get_apic_id_map(int *apic_id_map)
-{
- struct cpuid_result result;
- unsigned int threads_per_package, threads_per_core, i, shift = 0;
-
- /* Logical processors (threads) per core */
- result = cpuid_ext(0xb, 0);
- threads_per_core = result.ebx & 0xffff;
-
- /* Logical processors (threads) per package */
- result = cpuid_ext(0xb, 1);
- threads_per_package = result.ebx & 0xffff;
-
- if (threads_per_core == 1)
- shift++;
-
- for (i = 0; i < threads_per_package && i < CONFIG_MAX_CPUS; i++)
- apic_id_map[i] = i << shift;
-
- return threads_per_package;
-}
-
-/*
- * Initialize any extra cores/threads in this package.
- */
-static void intel_cores_init(struct device *cpu)
-{
- struct cpuid_result result;
- unsigned int threads_per_package, threads_per_core, i;
-
- /* Logical processors (threads) per core */
- result = cpuid_ext(0xb, 0);
- threads_per_core = result.ebx & 0xffff;
-
- /* Logical processors (threads) per package */
- result = cpuid_ext(0xb, 1);
- threads_per_package = result.ebx & 0xffff;
-
- /* Only initialize extra cores from BSP */
- if (cpu->path.apic.apic_id)
- return;
-
- printk(BIOS_DEBUG, "CPU: %u has %u cores, %u threads per core\n",
- cpu->path.apic.apic_id, threads_per_package/threads_per_core,
- threads_per_core);
-
- for (i = 1; i < threads_per_package; ++i) {
- struct device_path cpu_path;
- struct device *new;
-
- /* Build the CPU device path */
- cpu_path.type = DEVICE_PATH_APIC;
- cpu_path.apic.apic_id =
- cpu->path.apic.apic_id + i;
-
- /* Update APIC ID if no hyperthreading */
- if (threads_per_core == 1)
- cpu_path.apic.apic_id <<= 1;
-
- /* Allocate the new CPU device structure */
- new = alloc_dev(cpu->bus, &cpu_path);
- if (!new)
- continue;
-
- printk(BIOS_DEBUG, "CPU: %u has core %u\n",
- cpu->path.apic.apic_id,
- new->path.apic.apic_id);
-
- /* Start the new CPU */
- if (is_smp_boot() && !start_cpu(new)) {
- /* Record the error in cpu? */
- printk(BIOS_ERR, "CPU %u would not start!\n",
- new->path.apic.apic_id);
- }
- }
-}
-
static void model_206ax_report(void)
{
static const char *const mode[] = {"NOT ", ""};
@@ -536,18 +461,12 @@ static void model_206ax_init(struct device *cpu)
/* Turn on caching if we haven't already */
x86_enable_cache();
- intel_update_microcode_from_cbfs();
-
/* Clear out pending MCEs */
configure_mca();
/* Print infos */
model_206ax_report();
- /* Setup MTRRs based on physical address size */
- x86_setup_mtrrs_with_detect();
- x86_mtrr_check();
-
/* Setup Page Attribute Tables (PAT) */
// TODO set up PAT
@@ -578,9 +497,75 @@ static void model_206ax_init(struct device *cpu)
/* Enable Turbo */
enable_turbo();
+}
+
+/* MP initialization support. */
+static const void *microcode_patch;
- /* Start up extra cores */
- intel_cores_init(cpu);
+static void pre_mp_init(void)
+{
+ /* Setup MTRRs based on physical address size. */
+ x86_setup_mtrrs_with_detect();
+ x86_mtrr_check();
+}
+
+static int get_cpu_count(void)
+{
+ msr_t msr;
+ int num_threads;
+ int num_cores;
+
+ msr = rdmsr(CORE_THREAD_COUNT_MSR);
+ num_threads = (msr.lo >> 0) & 0xffff;
+ num_cores = (msr.lo >> 16) & 0xffff;
+ printk(BIOS_DEBUG, "CPU has %u cores, %u threads enabled.\n",
+ num_cores, num_threads);
+
+ return num_threads;
+}
+
+static void get_microcode_info(const void **microcode, int *parallel)
+{
+ microcode_patch = intel_microcode_find();
+ *microcode = microcode_patch;
+ *parallel = 1;
+}
+
+static void per_cpu_smm_trigger(void)
+{
+ /* Relocate the SMM handler. */
+ smm_relocate();
+
+ /* After SMM relocation a 2nd microcode load is required. */
+ intel_microcode_load_unlocked(microcode_patch);
+}
+
+static void post_mp_init(void)
+{
+ /* Now that all APs have been relocated as well as the BSP let SMIs
+ * start flowing. */
+ southbridge_smm_init();
+
+ /* Lock down the SMRAM space. */
+ smm_lock();
+}
+
+
+static const struct mp_ops mp_ops = {
+ .pre_mp_init = pre_mp_init,
+ .get_cpu_count = get_cpu_count,
+ .get_smm_info = smm_info,
+ .get_microcode_info = get_microcode_info,
+ .pre_mp_smm_init = smm_initialize,
+ .per_cpu_smm_trigger = per_cpu_smm_trigger,
+ .relocation_handler = smm_relocation_handler,
+ .post_mp_init = post_mp_init,
+};
+
+void bsp_init_and_start_aps(struct bus *cpu_bus)
+{
+ if (mp_init_with_smm(cpu_bus, &mp_ops))
+ printk(BIOS_ERR, "MP initialization failure.\n");
}
static struct device_operations cpu_dev_ops = {
diff --git a/src/cpu/intel/smm/gen1/smi.h b/src/cpu/intel/smm/gen1/smi.h
index cc6811fd63..6d08bb3684 100644
--- a/src/cpu/intel/smm/gen1/smi.h
+++ b/src/cpu/intel/smm/gen1/smi.h
@@ -11,6 +11,10 @@
* GNU General Public License for more details.
*/
+#include <device/device.h>
+
+void bsp_init_and_start_aps(struct bus *cpu_bus);
+
/* These helpers are for performing SMM relocation. */
void southbridge_smm_init(void);
void southbridge_trigger_smi(void);
@@ -21,3 +25,12 @@ int cpu_get_apic_id_map(int *apic_id_map);
void northbridge_write_smram(u8 smram);
bool cpu_has_alternative_smrr(void);
+
+/* parallel MP helper functions */
+void smm_info(uintptr_t *perm_smbase, size_t *perm_smsize,
+ size_t *smm_save_state_size);
+void smm_initialize(void);
+void southbridge_smm_clear_state(void);
+void smm_relocation_handler(int cpu, uintptr_t curr_smbase,
+ uintptr_t staggered_smbase);
+void smm_relocate(void);
diff --git a/src/cpu/intel/smm/gen1/smmrelocate.c b/src/cpu/intel/smm/gen1/smmrelocate.c
index 105b9a45ca..4b824a57a5 100644
--- a/src/cpu/intel/smm/gen1/smmrelocate.c
+++ b/src/cpu/intel/smm/gen1/smmrelocate.c
@@ -22,10 +22,12 @@
#include <device/device.h>
#include <device/pci.h>
#include <cpu/x86/cache.h>
+#include <cpu/x86/mp.h>
#include <cpu/x86/msr.h>
#include <cpu/x86/mtrr.h>
#include <cpu/x86/smm.h>
#include <console/console.h>
+#include <smp/node.h>
#include "smi.h"
#define SMRR_SUPPORTED (1 << 11)
@@ -349,3 +351,82 @@ void smm_lock(void)
northbridge_write_smram(D_LCK | G_SMRAME | C_BASE_SEG);
}
+
+void smm_info(uintptr_t *perm_smbase, size_t *perm_smsize,
+ size_t *smm_save_state_size)
+{
+ printk(BIOS_DEBUG, "Setting up SMI for CPU\n");
+
+ fill_in_relocation_params(&smm_reloc_params);
+
+ if (CONFIG_IED_REGION_SIZE != 0)
+ setup_ied_area(&smm_reloc_params);
+
+ *perm_smbase = smm_reloc_params.smram_base;
+ *perm_smsize = smm_reloc_params.smram_size;
+ *smm_save_state_size = sizeof(em64t101_smm_state_save_area_t);
+}
+
+void smm_initialize(void)
+{
+ /* Clear the SMM state in the southbridge. */
+ southbridge_smm_clear_state();
+
+ /*
+ * Run the relocation handler for on the BSP to check and set up
+ * parallel SMM relocation.
+ */
+ smm_initiate_relocation();
+}
+
+/* The relocation work is actually performed in SMM context, but the code
+ * resides in the ramstage module. This occurs by trampolining from the default
+ * SMRAM entry point to here. */
+void smm_relocation_handler(int cpu, uintptr_t curr_smbase,
+ uintptr_t staggered_smbase)
+{
+ msr_t mtrr_cap;
+ struct smm_relocation_params *relo_params = &smm_reloc_params;
+ em64t101_smm_state_save_area_t *save_state;
+ u32 smbase = staggered_smbase;
+ u32 iedbase = relo_params->ied_base;
+
+ printk(BIOS_DEBUG, "In relocation handler: cpu %d\n", cpu);
+
+ /* Make appropriate changes to the save state map. */
+ if (CONFIG_IED_REGION_SIZE != 0)
+ printk(BIOS_DEBUG, "New SMBASE=0x%08x IEDBASE=0x%08x\n",
+ smbase, iedbase);
+ else
+ printk(BIOS_DEBUG, "New SMBASE=0x%08x\n",
+ smbase);
+
+ save_state = (void *)(curr_smbase + SMM_DEFAULT_SIZE -
+ sizeof(*save_state));
+ save_state->smbase = smbase;
+ save_state->iedbase = iedbase;
+
+ /* Write EMRR and SMRR MSRs based on indicated support. */
+ mtrr_cap = rdmsr(MTRR_CAP_MSR);
+ if (mtrr_cap.lo & SMRR_SUPPORTED && relo_params->smrr_mask.lo != 0)
+ write_smrr(relo_params);
+}
+
+/*
+ * The default SMM entry can happen in parallel or serially. If the
+ * default SMM entry is done in parallel the BSP has already setup
+ * the saving state to each CPU's MSRs. At least one save state size
+ * is required for the initial SMM entry for the BSP to determine if
+ * parallel SMM relocation is even feasible.
+ */
+void smm_relocate(void)
+{
+ /*
+ * If smm_save_state_in_msrs is non-zero then parallel SMM relocation
+ * shall take place. Run the relocation handler a second time on the
+ * BSP to do the final move. For APs, a relocation handler always
+ * needs to be run.
+ */
+ if (!boot_cpu())
+ smm_initiate_relocation();
+}