summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/cpu/intel/haswell/haswell.h3
-rw-r--r--src/cpu/intel/haswell/haswell_init.c3
-rw-r--r--src/cpu/intel/haswell/mp_init.c52
-rw-r--r--src/cpu/intel/haswell/smmrelocate.c142
4 files changed, 163 insertions, 37 deletions
diff --git a/src/cpu/intel/haswell/haswell.h b/src/cpu/intel/haswell/haswell.h
index 26807e9f3a..a510e7de58 100644
--- a/src/cpu/intel/haswell/haswell.h
+++ b/src/cpu/intel/haswell/haswell.h
@@ -144,6 +144,7 @@ int cpu_config_tdp_levels(void);
/* Returns 0 on success, < 0 on failure. */
int smm_initialize(void);
void smm_initiate_relocation(void);
+void smm_initiate_relocation_parallel(void);
struct bus;
void bsp_init_and_start_aps(struct bus *cpu_bus);
/* Returns 0 on succes. < 0 on failure. */
@@ -151,7 +152,7 @@ int setup_ap_init(struct bus *cpu_bus, int *max_cpus,
const void *microcode_patch);
/* Returns 0 on success, < 0 on failure. */
int start_aps(struct bus *cpu_bus, int max_cpus);
-void release_aps_for_smm_relocation(void);
+void release_aps_for_smm_relocation(int do_parallel_relocation);
#endif
#endif
diff --git a/src/cpu/intel/haswell/haswell_init.c b/src/cpu/intel/haswell/haswell_init.c
index 82430b750d..c7f89ee646 100644
--- a/src/cpu/intel/haswell/haswell_init.c
+++ b/src/cpu/intel/haswell/haswell_init.c
@@ -549,9 +549,6 @@ void bsp_init_and_start_aps(struct bus *cpu_bus)
return;
}
- /* Release APs to perform SMM relocation. */
- release_aps_for_smm_relocation();
-
/* After SMM relocation a 2nd microcode load is required. */
intel_microcode_load_unlocked(microcode_patch);
}
diff --git a/src/cpu/intel/haswell/mp_init.c b/src/cpu/intel/haswell/mp_init.c
index 7f15c391d7..c8bd5c22be 100644
--- a/src/cpu/intel/haswell/mp_init.c
+++ b/src/cpu/intel/haswell/mp_init.c
@@ -75,9 +75,16 @@ static device_t cpu_devs[CONFIG_MAX_CPUS];
/* Number of APs checked that have checked in. */
static atomic_t num_aps;
+/* Number of APs that have relocated their SMM handler. */
+static atomic_t num_aps_relocated_smm;
/* Barrier to stop APs from performing SMM relcoation. */
static int smm_relocation_barrier_begin __attribute__ ((aligned (64)));
+static inline void mfence(void)
+{
+ __asm__ __volatile__("mfence\t\n": : :"memory");
+}
+
static inline void wait_for_barrier(volatile int *barrier)
{
while (*barrier == 0) {
@@ -95,13 +102,18 @@ static void ap_wait_for_smm_relocation_begin(void)
wait_for_barrier(&smm_relocation_barrier_begin);
}
+/* This function pointer is used by the non-BSP CPUs to initiate relocation. It
+ * points to either a serial or parallel SMM initiation. */
+static void (*ap_initiate_smm_relocation)(void) = &smm_initiate_relocation;
+
/* Returns 1 if timeout waiting for APs. 0 if target aps found. */
-static int wait_for_aps(int target, int total_delay, int delay_step)
+static int wait_for_aps(atomic_t *val, int target, int total_delay,
+ int delay_step)
{
int timeout = 0;
int delayed = 0;
- while (atomic_read(&num_aps) != target) {
+ while (atomic_read(val) != target) {
udelay(delay_step);
delayed += delay_step;
if (delayed >= total_delay) {
@@ -113,9 +125,19 @@ static int wait_for_aps(int target, int total_delay, int delay_step)
return timeout;
}
-void release_aps_for_smm_relocation(void)
+void release_aps_for_smm_relocation(int do_parallel)
{
+ /* Change the AP SMM initiation function, and ensure it is visible
+ * before releasing the APs. */
+ if (do_parallel) {
+ ap_initiate_smm_relocation = &smm_initiate_relocation_parallel;
+ mfence();
+ }
release_barrier(&smm_relocation_barrier_begin);
+ /* Wait for CPUs to relocate their SMM handler up to 100ms. */
+ if (wait_for_aps(&num_aps_relocated_smm, atomic_read(&num_aps),
+ 100000 /* 100 ms */, 200 /* us */))
+ printk(BIOS_DEBUG, "Timed out waiting for AP SMM relocation\n");
}
/* The mtrr code sets up ROM caching on the BSP, but not the others. However,
@@ -172,7 +194,10 @@ ap_init(unsigned int cpu, void *microcode_ptr)
ap_wait_for_smm_relocation_begin();
- smm_initiate_relocation();
+ ap_initiate_smm_relocation();
+
+ /* Indicate that SMM relocation has occured on this thread. */
+ atomic_inc(&num_aps_relocated_smm);
/* After SMM relocation a 2nd microcode load is required. */
intel_microcode_load_unlocked(microcode_ptr);
@@ -483,7 +508,7 @@ int start_aps(struct bus *cpu_bus, int ap_count)
printk(BIOS_DEBUG, "done.\n");
}
/* Wait for CPUs to check in up to 200 us. */
- wait_for_aps(ap_count, 200 /* us */, 15 /* us */);
+ wait_for_aps(&num_aps, ap_count, 200 /* us */, 15 /* us */);
/* Send 2nd SIPI */
if ((lapic_read(LAPIC_ICR) & LAPIC_ICR_BUSY)) {
@@ -507,7 +532,7 @@ int start_aps(struct bus *cpu_bus, int ap_count)
}
/* Wait for CPUs to check in. */
- if (wait_for_aps(ap_count, 10000 /* 10 ms */, 50 /* us */)) {
+ if (wait_for_aps(&num_aps, ap_count, 10000 /* 10 ms */, 50 /* us */)) {
printk(BIOS_DEBUG, "Not all APs checked in: %d/%d.\n",
atomic_read(&num_aps), ap_count);
return -1;
@@ -516,17 +541,12 @@ int start_aps(struct bus *cpu_bus, int ap_count)
return 0;
}
-DECLARE_SPIN_LOCK(smm_relocation_lock);
-
-void smm_initiate_relocation(void)
+void smm_initiate_relocation_parallel(void)
{
- spin_lock(&smm_relocation_lock);
-
if ((lapic_read(LAPIC_ICR) & LAPIC_ICR_BUSY)) {
printk(BIOS_DEBUG, "Waiting for ICR not to be busy...");
if (apic_wait_timeout(1000 /* 1 ms */, 50)) {
printk(BIOS_DEBUG, "timed out. Aborting.\n");
- spin_unlock(&smm_relocation_lock);
return;
} else
printk(BIOS_DEBUG, "done.\n");
@@ -539,6 +559,14 @@ void smm_initiate_relocation(void)
} else
printk(BIOS_DEBUG, "Relocation complete.\n");
+}
+
+DECLARE_SPIN_LOCK(smm_relocation_lock);
+
+void smm_initiate_relocation(void)
+{
+ spin_lock(&smm_relocation_lock);
+ smm_initiate_relocation_parallel();
spin_unlock(&smm_relocation_lock);
}
diff --git a/src/cpu/intel/haswell/smmrelocate.c b/src/cpu/intel/haswell/smmrelocate.c
index 2bf304ebbf..2a322a7f9a 100644
--- a/src/cpu/intel/haswell/smmrelocate.c
+++ b/src/cpu/intel/haswell/smmrelocate.c
@@ -36,6 +36,14 @@
#define EMRRphysMask_MSR 0x1f5
#define UNCORE_EMRRphysBase_MSR 0x2f4
#define UNCORE_EMRRphysMask_MSR 0x2f5
+#define SMM_MCA_CAP_MSR 0x17d
+#define SMM_CPU_SVRSTR_BIT 57
+#define SMM_CPU_SVRSTR_MASK (1 << (SMM_CPU_SVRSTR_BIT - 32))
+#define SMM_FEATURE_CONTROL_MSR 0x4e0
+#define SMM_CPU_SAVE_EN (1 << 1)
+/* SMM save state MSRs */
+#define SMBASE_MSR 0xc20
+#define IEDBASE_MSR 0xc22
#define SMRR_SUPPORTED (1<<11)
#define EMRR_SUPPORTED (1<<12)
@@ -51,6 +59,10 @@ struct smm_relocation_params {
msr_t emrr_mask;
msr_t uncore_emrr_base;
msr_t uncore_emrr_mask;
+ /* The smm_save_state_in_msrs field indicates if SMM save state
+ * locations live in MSRs. This indicates to the CPUs how to adjust
+ * the SMMBASE and IEDBASE */
+ int smm_save_state_in_msrs;
};
/* This gets filled in and used during relocation. */
@@ -82,13 +94,79 @@ static inline void write_uncore_emrr(struct smm_relocation_params *relo_params)
wrmsr(UNCORE_EMRRphysMask_MSR, relo_params->uncore_emrr_mask);
}
+static void update_save_state(int cpu,
+ struct smm_relocation_params *relo_params,
+ const struct smm_runtime *runtime)
+{
+ u32 smbase;
+ u32 iedbase;
+
+ /* The relocated handler runs with all CPUs concurrently. Therefore
+ * stagger the entry points adjusting SMBASE downwards by save state
+ * size * CPU num. */
+ smbase = relo_params->smram_base - cpu * runtime->save_state_size;
+ iedbase = relo_params->ied_base;
+
+ printk(BIOS_DEBUG, "New SMBASE=0x%08x IEDBASE=0x%08x\n",
+ smbase, iedbase);
+
+ /* All threads need to set IEDBASE and SMBASE to the relocated
+ * handler region. However, the save state location depends on the
+ * smm_save_state_in_msrs field in the relocation parameters. If
+ * smm_save_state_in_msrs is non-zero then the CPUs are relocating
+ * the SMM handler in parallel, and each CPUs save state area is
+ * located in their respective MSR space. If smm_save_state_in_msrs
+ * is zero then the SMM relocation is happening serially so the
+ * save state is at the same default location for all CPUs. */
+ if (relo_params->smm_save_state_in_msrs) {
+ msr_t smbase_msr;
+ msr_t iedbase_msr;
+
+ smbase_msr.lo = smbase;
+ smbase_msr.hi = 0;
+
+ /* According the BWG the IEDBASE MSR is in bits 63:32. It's
+ * not clear why it differs from the SMBASE MSR. */
+ iedbase_msr.lo = 0;
+ iedbase_msr.hi = iedbase;
+
+ wrmsr(SMBASE_MSR, smbase_msr);
+ wrmsr(IEDBASE_MSR, iedbase_msr);
+ } else {
+ em64t101_smm_state_save_area_t *save_state;
+
+ save_state = (void *)(runtime->smbase + SMM_DEFAULT_SIZE -
+ runtime->save_state_size);
+
+ save_state->smbase = smbase;
+ save_state->iedbase = iedbase;
+ }
+}
+
+/* Returns 1 if SMM MSR save state was set. */
+static int bsp_setup_msr_save_state(struct smm_relocation_params *relo_params)
+{
+ msr_t smm_mca_cap;
+
+ smm_mca_cap = rdmsr(SMM_MCA_CAP_MSR);
+ if (smm_mca_cap.hi & SMM_CPU_SVRSTR_MASK) {
+ msr_t smm_feature_control;
+
+ smm_feature_control = rdmsr(SMM_FEATURE_CONTROL_MSR);
+ smm_feature_control.hi = 0;
+ smm_feature_control.lo |= SMM_CPU_SAVE_EN;
+ wrmsr(SMM_FEATURE_CONTROL_MSR, smm_feature_control);
+ relo_params->smm_save_state_in_msrs = 1;
+ }
+ return relo_params->smm_save_state_in_msrs;
+}
+
/* The relocation work is actually performed in SMM context, but the code
* resides in the ramstage module. This occurs by trampolining from the default
* SMRAM entry point to here. */
static void __attribute__((cdecl))
cpu_smm_do_relocation(void *arg, int cpu, const struct smm_runtime *runtime)
{
- em64t101_smm_state_save_area_t *save_state;
msr_t mtrr_cap;
struct smm_relocation_params *relo_params = arg;
@@ -100,21 +178,32 @@ cpu_smm_do_relocation(void *arg, int cpu, const struct smm_runtime *runtime)
printk(BIOS_DEBUG, "In relocation handler: cpu %d\n", cpu);
- /* All threads need to set IEDBASE and SMBASE in the save state area.
- * Since one thread runs at a time during the relocation the save state
- * is the same for all cpus. */
- save_state = (void *)(runtime->smbase + SMM_DEFAULT_SIZE -
- runtime->save_state_size);
-
- /* The relocated handler runs with all CPUs concurrently. Therefore
- * stagger the entry points adjusting SMBASE downwards by save state
- * size * CPU num. */
- save_state->smbase = relo_params->smram_base -
- cpu * runtime->save_state_size;
- save_state->iedbase = relo_params->ied_base;
+ /* Determine if the processor supports saving state in MSRs. If so,
+ * enable it before the non-BSPs run so that SMM relocation can occur
+ * in parallel in the non-BSP CPUs. */
+ if (cpu == 0) {
+ /* If smm_save_state_in_msrs is 1 then that means this is the
+ * 2nd time through the relocation handler for the BSP.
+ * Parallel SMM handler relocation is taking place. However,
+ * it is desired to access other CPUs save state in the real
+ * SMM handler. Therefore, disable the SMM save state in MSRs
+ * feature. */
+ if (relo_params->smm_save_state_in_msrs) {
+ msr_t smm_feature_control;
+
+ smm_feature_control = rdmsr(SMM_FEATURE_CONTROL_MSR);
+ smm_feature_control.lo &= ~SMM_CPU_SAVE_EN;
+ wrmsr(SMM_FEATURE_CONTROL_MSR, smm_feature_control);
+ } else if (bsp_setup_msr_save_state(relo_params))
+ /* Just return from relocation handler if MSR save
+ * state is enabled. In that case the BSP will come
+ * back into the relocation handler to setup the new
+ * SMBASE as well disabling SMM save state in MSRs. */
+ return;
+ }
- printk(BIOS_DEBUG, "New SMBASE=0x%08x IEDBASE=0x%08x @ %p\n",
- save_state->smbase, save_state->iedbase, save_state);
+ /* Make appropriate changes to the save state map. */
+ update_save_state(cpu, relo_params, runtime);
/* Write EMRR and SMRR MSRs based on indicated support. */
mtrr_cap = rdmsr(MTRRcap_MSR);
@@ -128,8 +217,6 @@ cpu_smm_do_relocation(void *arg, int cpu, const struct smm_runtime *runtime)
if (cpu == 0)
write_uncore_emrr(relo_params);
}
-
- southbridge_clear_smi_status();
}
static u32 northbridge_get_base_reg(device_t dev, int reg)
@@ -199,10 +286,12 @@ static void fill_in_relocation_params(device_t dev,
static int install_relocation_handler(int num_cpus,
struct smm_relocation_params *relo_params)
{
- /* The default SMM entry happens serially at the default location.
- * Therefore, there is only 1 concurrent save state area. Set the
- * stack size to the save state size, and call into the
- * do_relocation handler. */
+ /* The default SMM entry can happen in parallel or serially. If the
+ * default SMM entry is done in parallel the BSP has already setup
+ * the saving state to each CPU's MSRs. At least one save state size
+ * is required for the initial SMM entry for the BSP to determine if
+ * parallel SMM relocation is even feasible. Set the stack size to
+ * the save state size, and call into the do_relocation handler. */
int save_state_size = sizeof(em64t101_smm_state_save_area_t);
struct smm_loader_params smm_params = {
.per_cpu_stack_size = save_state_size,
@@ -309,6 +398,17 @@ int smm_initialize(void)
/* Run the relocation handler. */
smm_initiate_relocation();
+ /* If smm_save_state_in_msrs is non-zero then parallel SMM relocation
+ * shall take place. Run the relocation handler a second time to do
+ * the final move. */
+ if (smm_reloc_params.smm_save_state_in_msrs) {
+ printk(BIOS_DEBUG, "Doing parallel SMM relocation.\n");
+ release_aps_for_smm_relocation(1);
+ smm_initiate_relocation_parallel();
+ } else {
+ release_aps_for_smm_relocation(0);
+ }
+
/* Lock down the SMRAM space. */
smm_lock();