diff options
-rw-r--r-- | src/cpu/intel/haswell/Kconfig | 1 | ||||
-rw-r--r-- | src/cpu/intel/haswell/Makefile.inc | 23 | ||||
-rw-r--r-- | src/cpu/intel/haswell/haswell.h | 9 | ||||
-rw-r--r-- | src/cpu/intel/haswell/haswell_init.c | 145 | ||||
-rw-r--r-- | src/cpu/intel/haswell/mp_init.c | 550 | ||||
-rw-r--r-- | src/cpu/intel/haswell/sipi_header.c | 6 | ||||
-rw-r--r-- | src/cpu/intel/haswell/sipi_vector.S | 190 | ||||
-rw-r--r-- | src/cpu/intel/haswell/smmrelocate.c | 25 |
8 files changed, 845 insertions, 104 deletions
diff --git a/src/cpu/intel/haswell/Kconfig b/src/cpu/intel/haswell/Kconfig index bf0abbcbd6..95d0b18ff2 100644 --- a/src/cpu/intel/haswell/Kconfig +++ b/src/cpu/intel/haswell/Kconfig @@ -11,6 +11,7 @@ config CPU_SPECIFIC_OPTIONS select UDELAY_LAPIC select SMM_TSEG select SMM_MODULES + select RELOCATABLE_MODULES select CPU_MICROCODE_IN_CBFS #select AP_IN_SIPI_WAIT select TSC_SYNC_MFENCE diff --git a/src/cpu/intel/haswell/Makefile.inc b/src/cpu/intel/haswell/Makefile.inc index b2116f2dfe..a19a8c5270 100644 --- a/src/cpu/intel/haswell/Makefile.inc +++ b/src/cpu/intel/haswell/Makefile.inc @@ -1,5 +1,6 @@ ramstage-y += haswell_init.c subdirs-y += ../../x86/name +ramstage-y += mp_init.c romstage-y += romstage.c ramstage-$(CONFIG_GENERATE_ACPI_TABLES) += acpi.c @@ -10,3 +11,25 @@ cpu_microcode-$(CONFIG_CPU_MICROCODE_CBFS_GENERATE) += microcode_blob.c smm-$(CONFIG_HAVE_SMI_HANDLER) += finalize.c cpu_incs += $(src)/cpu/intel/haswell/cache_as_ram.inc + +# AP startub stub +SIPI_ELF=$(obj)/cpu/intel/haswell/sipi_vector.elf +SIPI_BIN=$(SIPI_ELF:.elf=) +SIPI_DOTO=$(SIPI_ELF:.elf=.o) + +ramstage-srcs += $(SIPI_BIN) +rmodules-y += sipi_vector.S +rmodules-y += sipi_header.c + +$(SIPI_DOTO): $(dir $(SIPI_ELF))sipi_vector.rmodules.o $(dir $(SIPI_ELF))sipi_header.rmodules.o + $(CC) $(LDFLAGS) -nostdlib -r -o $@ $^ + +$(eval $(call rmodule_link,$(SIPI_ELF), $(SIPI_ELF:.elf=.o), 0)) + +$(SIPI_BIN): $(SIPI_ELF) + $(OBJCOPY) -O binary $< $@ + +$(SIPI_BIN).ramstage.o: $(SIPI_BIN) + @printf " OBJCOPY $(subst $(obj)/,,$(@))\n" + cd $(dir $@); $(OBJCOPY) -I binary $(notdir $<) -O elf32-i386 -B i386 $(notdir $@) + diff --git a/src/cpu/intel/haswell/haswell.h b/src/cpu/intel/haswell/haswell.h index 733ddd3039..26807e9f3a 100644 --- a/src/cpu/intel/haswell/haswell.h +++ b/src/cpu/intel/haswell/haswell.h @@ -141,8 +141,17 @@ void intel_cpu_haswell_finalize_smm(void); /* Configure power limits for turbo mode */ void set_power_limits(u8 power_limit_1_time); int cpu_config_tdp_levels(void); +/* Returns 0 on success, < 0 on failure. */ +int smm_initialize(void); +void smm_initiate_relocation(void); struct bus; void bsp_init_and_start_aps(struct bus *cpu_bus); +/* Returns 0 on succes. < 0 on failure. */ +int setup_ap_init(struct bus *cpu_bus, int *max_cpus, + const void *microcode_patch); +/* Returns 0 on success, < 0 on failure. */ +int start_aps(struct bus *cpu_bus, int max_cpus); +void release_aps_for_smm_relocation(void); #endif #endif diff --git a/src/cpu/intel/haswell/haswell_init.c b/src/cpu/intel/haswell/haswell_init.c index 9e62b31eb9..82430b750d 100644 --- a/src/cpu/intel/haswell/haswell_init.c +++ b/src/cpu/intel/haswell/haswell_init.c @@ -442,71 +442,30 @@ static void configure_mca(void) static unsigned ehci_debug_addr; #endif -/* - * Initialize any extra cores/threads in this package. - */ -static void intel_cores_init(device_t cpu) -{ - struct cpuid_result result; - unsigned threads_per_package, threads_per_core, i; - - /* Logical processors (threads) per core */ - result = cpuid_ext(0xb, 0); - threads_per_core = result.ebx & 0xffff; - - /* Logical processors (threads) per package */ - result = cpuid_ext(0xb, 1); - threads_per_package = result.ebx & 0xffff; - - /* Only initialize extra cores from BSP */ - if (cpu->path.apic.apic_id) - return; - - printk(BIOS_DEBUG, "CPU: %u has %u cores, %u threads per core\n", - cpu->path.apic.apic_id, threads_per_package/threads_per_core, - threads_per_core); - - for (i = 1; i < threads_per_package; ++i) { - struct device_path cpu_path; - device_t new; - - /* Build the cpu device path */ - cpu_path.type = DEVICE_PATH_APIC; - cpu_path.apic.apic_id = - cpu->path.apic.apic_id + i; - - /* Update APIC ID if no hyperthreading */ - if (threads_per_core == 1) - cpu_path.apic.apic_id <<= 1; - - /* Allocate the new cpu device structure */ - new = alloc_dev(cpu->bus, &cpu_path); - if (!new) - continue; - - printk(BIOS_DEBUG, "CPU: %u has core %u\n", - cpu->path.apic.apic_id, - new->path.apic.apic_id); - -#if CONFIG_SMP && CONFIG_MAX_CPUS > 1 - /* Start the new cpu */ - if (!start_cpu(new)) { - /* Record the error in cpu? */ - printk(BIOS_ERR, "CPU %u would not start!\n", - new->path.apic.apic_id); - } -#endif - } -} - -static void bsp_init_before_ap_bringup(void) +static void bsp_init_before_ap_bringup(struct bus *cpu_bus) { + struct device_path cpu_path; + struct cpu_info *info; char processor_name[49]; /* Print processor name */ fill_processor_name(processor_name); printk(BIOS_INFO, "CPU: %s.\n", processor_name); + /* Ensure the local apic is enabled */ + enable_lapic(); + + /* Set the device path of the boot cpu. */ + cpu_path.type = DEVICE_PATH_APIC; + cpu_path.apic.apic_id = lapicid(); + + /* Find the device structure for the boot cpu. */ + info = cpu_info(); + info->cpu = alloc_find_dev(cpu_bus, &cpu_path); + + if (info->index != 0) + printk(BIOS_CRIT, "BSP index(%d) != 0!\n", info->index); + #if CONFIG_USBDEBUG // Is this caution really needed? if(!ehci_debug_addr) @@ -523,23 +482,12 @@ static void bsp_init_before_ap_bringup(void) set_ehci_debug(ehci_debug_addr); #endif - enable_lapic(); -} - -static void ap_init(device_t cpu) -{ - /* Microcode needs to be loaded before caching is enabled. */ - intel_update_microcode_from_cbfs(); - - /* Turn on caching if we haven't already */ - x86_enable_cache(); - x86_setup_fixed_mtrrs(); - x86_setup_var_mtrrs(cpuid_eax(0x80000008) & 0xff, 2); - - enable_lapic(); + /* Call through the cpu driver's initialization. */ + cpu_initialize(0); } -static void cpu_common_init(device_t cpu) +/* All CPUs including BSP will run the following function. */ +static void haswell_init(device_t cpu) { /* Clear out pending MCEs */ configure_mca(); @@ -572,33 +520,40 @@ static void cpu_common_init(device_t cpu) void bsp_init_and_start_aps(struct bus *cpu_bus) { + int max_cpus; + int num_aps; + const void *microcode_patch; + /* Perform any necesarry BSP initialization before APs are brought up. * This call alos allows the BSP to prepare for any secondary effects * from calling cpu_initialize() such as smm_init(). */ - bsp_init_before_ap_bringup(); - - /* - * This calls into the gerneic initialize_cpus() which attempts to - * start APs on the APIC bus in the devicetree. No APs get started - * because there is only the BSP and a placeholder (disabled) in the - * devicetree. initialize_cpus() also does SMM initialization by way - * of smm_init(). It will eventually call cpu_initialize(0) which calls - * dev_ops->init(). For Haswell the dev_ops->init() starts up the APs - * by way of intel_cores_init(). - */ - initialize_cpus(cpu_bus); -} + bsp_init_before_ap_bringup(cpu_bus); -static void haswell_init(device_t cpu) -{ - if (cpu->path.apic.apic_id == 0) { - cpu_common_init(cpu); - /* Start up extra cores */ - intel_cores_init(cpu); - } else { - ap_init(cpu); - cpu_common_init(cpu); + microcode_patch = intel_microcode_find(); + + /* This needs to be called after the mtrr setup so the BSP mtrrs + * can be mirrored by the APs. */ + if (setup_ap_init(cpu_bus, &max_cpus, microcode_patch)) { + printk(BIOS_CRIT, "AP setup initialization failed. " + "No APs will be brought up.\n"); + return; + } + + num_aps = max_cpus - 1; + if (start_aps(cpu_bus, num_aps)) { + printk(BIOS_CRIT, "AP startup failed. Trying to continue.\n"); } + + if (smm_initialize()) { + printk(BIOS_CRIT, "SMM Initialiazation failed...\n"); + return; + } + + /* Release APs to perform SMM relocation. */ + release_aps_for_smm_relocation(); + + /* After SMM relocation a 2nd microcode load is required. */ + intel_microcode_load_unlocked(microcode_patch); } static struct device_operations cpu_dev_ops = { diff --git a/src/cpu/intel/haswell/mp_init.c b/src/cpu/intel/haswell/mp_init.c new file mode 100644 index 0000000000..b1567ba3e4 --- /dev/null +++ b/src/cpu/intel/haswell/mp_init.c @@ -0,0 +1,550 @@ +/* + * This file is part of the coreboot project. + * + * Copyright (C) 2013 ChromeOS Authors + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; version 2 of + * the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, + * MA 02110-1301 USA + */ + +#include <console/console.h> +#include <stdint.h> +#include <rmodule.h> +#include <arch/cpu.h> +#include <cpu/cpu.h> +#include <cpu/intel/microcode.h> +#include <cpu/x86/cache.h> +#include <cpu/x86/lapic.h> +#include <cpu/x86/msr.h> +#include <cpu/x86/mtrr.h> +#include <cpu/x86/smm.h> +#include <delay.h> +#include <device/device.h> +#include <device/path.h> +#include <lib.h> +#include <smp/atomic.h> +#include <smp/spinlock.h> +#include "haswell.h" + +/* This needs to match the layout in the .module_parametrs section. */ +struct sipi_params { + u16 gdtlimit; + u32 gdt; + u16 unused; + u32 idt_ptr; + u32 stack_top; + u32 stack_size; + u32 microcode_ptr; + u32 msr_table_ptr; + u32 msr_count; + u32 c_handler; + u32 c_handler_arg; + u8 apic_to_cpu_num[CONFIG_MAX_CPUS]; +} __attribute__((packed)); + +/* This also needs to match the assembly code for saved MSR encoding. */ +struct saved_msr { + u32 index; + u32 lo; + u32 hi; +} __attribute__((packed)); + + +/* The sipi vector rmodule is included in the ramstage using 'objdump -B'. */ +extern char _binary_sipi_vector_start[]; +/* These symbols are defined in c_start.S. */ +extern char gdt[]; +extern char gdt_limit[]; +extern char idtarg[]; + +/* This table keeps track of each CPU's APIC id. */ +static u8 apic_id_table[CONFIG_MAX_CPUS]; +static device_t cpu_devs[CONFIG_MAX_CPUS]; + +/* Number of APs checked that have checked in. */ +static atomic_t num_aps; +/* Barrier to stop APs from performing SMM relcoation. */ +static int smm_relocation_barrier_begin __attribute__ ((aligned (64))); + +static inline void wait_for_barrier(volatile int *barrier) +{ + while (*barrier == 0) { + asm ("pause"); + } +} + +static inline void release_barrier(volatile int *barrier) +{ + *barrier = 1; +} + +static void ap_wait_for_smm_relocation_begin(void) +{ + wait_for_barrier(&smm_relocation_barrier_begin); +} + + +/* Returns 1 if timeout waiting for APs. 0 if target aps found. */ +static int wait_for_aps(int target, int total_delay, int delay_step) +{ + int timeout = 0; + int delayed = 0; + while (atomic_read(&num_aps) != target) { + udelay(delay_step); + delayed += delay_step; + if (delayed >= total_delay) { + timeout = 1; + break; + } + } + + return timeout; +} + +void release_aps_for_smm_relocation(void) +{ + release_barrier(&smm_relocation_barrier_begin); +} + +/* The mtrr code sets up ROM caching on the BSP, but not the others. However, + * the boot loader payload disables this. In order for Linux not to complain + * ensure the caching is disabled for tha APs before going to sleep. */ +static void cleanup_rom_caching(void) +{ +#if CONFIG_CACHE_ROM + msr_t msr; + unsigned int last_var_mtrr; + + msr = rdmsr(MTRRcap_MSR); + last_var_mtrr = (msr.lo & 0xff) - 1; + + /* Check if the MTRR is valid. */ + msr = rdmsr(MTRRphysMask_MSR(last_var_mtrr)); + if ((msr.lo & MTRRphysMaskValid) == 0) + return; + msr = rdmsr(MTRRphysBase_MSR(last_var_mtrr)); + /* Assum that if the MTRR is of write protected type, the MTRR is used + * to cache the ROM. */ + if ((msr.lo & MTRR_NUM_TYPES) == MTRR_TYPE_WRPROT) { + msr.lo = msr.hi = 0; + disable_cache(); + wrmsr(MTRRphysMask_MSR(last_var_mtrr), msr); + wrmsr(MTRRphysBase_MSR(last_var_mtrr), msr); + enable_cache(); + } +#endif +} + +/* By the time APs call ap_init() caching has been setup, and microcode has + * been loaded. */ +static void ap_init(unsigned int cpu, void *microcode_ptr) +{ + struct cpu_info *info; + + /* Signal that the AP has arrived. */ + atomic_inc(&num_aps); + + /* Ensure the local apic is enabled */ + enable_lapic(); + + info = cpu_info(); + info->index = cpu; + info->cpu = cpu_devs[cpu]; + + apic_id_table[info->index] = lapicid(); + info->cpu->path.apic.apic_id = apic_id_table[info->index]; + + /* Call through the cpu driver's initialization. */ + cpu_initialize(info->index); + + ap_wait_for_smm_relocation_begin(); + + smm_initiate_relocation(); + + /* After SMM relocation a 2nd microcode load is required. */ + intel_microcode_load_unlocked(microcode_ptr); + + /* Cleanup ROM caching. */ + cleanup_rom_caching(); + + /* FIXME(adurbin): park CPUs properly -- preferably somewhere in a + * reserved part of memory that the OS cannot get to. */ + stop_this_cpu(); +} + +static void setup_default_sipi_vector_params(struct sipi_params *sp) +{ + int i; + + sp->gdt = (u32)&gdt; + sp->gdtlimit = (u32)&gdt_limit; + sp->idt_ptr = (u32)&idtarg; + sp->stack_size = CONFIG_STACK_SIZE; + sp->stack_top = (u32)&_estack; + /* Adjust the stack top to take into account cpu_info. */ + sp->stack_top -= sizeof(struct cpu_info); + /* Default to linear APIC id space. */ + for (i = 0; i < CONFIG_MAX_CPUS; i++) + sp->apic_to_cpu_num[i] = i; +} + +#define NUM_FIXED_MTRRS 11 +static unsigned int fixed_mtrrs[NUM_FIXED_MTRRS] = { + MTRRfix64K_00000_MSR, MTRRfix16K_80000_MSR, MTRRfix16K_A0000_MSR, + MTRRfix4K_C0000_MSR, MTRRfix4K_C8000_MSR, MTRRfix4K_D0000_MSR, + MTRRfix4K_D8000_MSR, MTRRfix4K_E0000_MSR, MTRRfix4K_E8000_MSR, + MTRRfix4K_F0000_MSR, MTRRfix4K_F8000_MSR, +}; + +static inline struct saved_msr *save_msr(int index, struct saved_msr *entry) +{ + msr_t msr; + + msr = rdmsr(index); + entry->index = index; + entry->lo = msr.lo; + entry->hi = msr.hi; + + /* Return the next entry. */ + entry++; + return entry; +} + +static int save_bsp_msrs(char *start, int size) +{ + int msr_count; + int num_var_mtrrs; + struct saved_msr *msr_entry; + int i; + msr_t msr; + + /* Determine number of MTRRs need to be saved. */ + msr = rdmsr(MTRRcap_MSR); + num_var_mtrrs = msr.lo & 0xff; + + /* 2 * num_var_mtrrs for base and mask. +1 for IA32_MTRR_DEF_TYPE. */ + msr_count = 2 * num_var_mtrrs + NUM_FIXED_MTRRS + 1; + + if ((msr_count * sizeof(struct saved_msr)) > size) { + printk(BIOS_CRIT, "Cannot mirror all %d msrs.\n", msr_count); + return -1; + } + + msr_entry = (void *)start; + for (i = 0; i < NUM_FIXED_MTRRS; i++) { + msr_entry = save_msr(fixed_mtrrs[i], msr_entry); + } + + for (i = 0; i < num_var_mtrrs; i++) { + msr_entry = save_msr(MTRRphysBase_MSR(i), msr_entry); + msr_entry = save_msr(MTRRphysMask_MSR(i), msr_entry); + } + + msr_entry = save_msr(MTRRdefType_MSR, msr_entry); + + return msr_count; +} + +/* The SIPI vector is loaded at the SMM_DEFAULT_BASE. The reason is at the + * memory range is already reserved so the OS cannot use it. That region is + * free to use for AP bringup before SMM is initialized. */ +static u32 sipi_vector_location = SMM_DEFAULT_BASE; +static int sipi_vector_location_size = SMM_DEFAULT_SIZE; + +static int load_sipi_vector(const void *microcode_patch) +{ + struct rmodule sipi_mod; + int module_size; + int num_msrs; + struct sipi_params *sp; + char *mod_loc = (void *)sipi_vector_location; + const int loc_size = sipi_vector_location_size; + + if (rmodule_parse(&_binary_sipi_vector_start, &sipi_mod)) { + printk(BIOS_CRIT, "Unable to parse sipi module.\n"); + return -1; + } + + if (rmodule_entry_offset(&sipi_mod) != 0) { + printk(BIOS_CRIT, "SIPI module entry offset is not 0!\n"); + return -1; + } + + if (rmodule_load_alignment(&sipi_mod) != 4096) { + printk(BIOS_CRIT, "SIPI module load alignment(%d) != 4096.\n", + rmodule_load_alignment(&sipi_mod)); + return -1; + } + + module_size = rmodule_memory_size(&sipi_mod); + + /* Align to 4 bytes. */ + module_size += 3; + module_size &= ~3; + + if (module_size > loc_size) { + printk(BIOS_CRIT, "SIPI module size (%d) > region size (%d).\n", + module_size, loc_size); + return -1; + } + + num_msrs = save_bsp_msrs(&mod_loc[module_size], loc_size - module_size); + + if (num_msrs < 0) { + printk(BIOS_CRIT, "Error mirroring BSP's msrs.\n"); + return -1; + } + + if (rmodule_load(mod_loc, &sipi_mod)) { + printk(BIOS_CRIT, "Unable to load SIPI module.\n"); + return -1; + } + + sp = rmodule_parameters(&sipi_mod); + + if (sp == NULL) { + printk(BIOS_CRIT, "SIPI module has no parameters.\n"); + return -1; + } + + setup_default_sipi_vector_params(sp); + /* Setup MSR table. */ + sp->msr_table_ptr = (u32)&mod_loc[module_size]; + sp->msr_count = num_msrs; + /* Provide pointer to microcode patch. */ + sp->microcode_ptr = (u32)microcode_patch; + /* The microcode pointer is passed on through to the c handler so + * that it can be loaded again after SMM relocation. */ + sp->c_handler_arg = (u32)microcode_patch; + sp->c_handler = (u32)&ap_init; + + /* Make sure SIPI vector hits RAM so the APs that come up will see + * the startup code even if the caches are disabled. */ + wbinvd(); + + return 0; +} + +static int allocate_cpu_devices(struct bus *cpu_bus, int *total_hw_threads) +{ + int i; + int num_threads; + int num_cores; + int max_cpus; + struct cpu_info *info; + msr_t msr; + + info = cpu_info(); + cpu_devs[info->index] = info->cpu; + apic_id_table[info->index] = info->cpu->path.apic.apic_id; + + msr = rdmsr(CORE_THREAD_COUNT_MSR); + num_threads = (msr.lo >> 0) & 0xffff; + num_cores = (msr.lo >> 16) & 0xffff; + printk(BIOS_DEBUG, "CPU has %u cores, %u threads enabled.\n", + num_cores, num_threads); + + max_cpus = num_threads; + *total_hw_threads = num_threads; + if (num_threads > CONFIG_MAX_CPUS) { + printk(BIOS_CRIT, "CPU count(%d) exceeds CONFIG_MAX_CPUS(%d)\n", + num_threads, CONFIG_MAX_CPUS); + max_cpus = CONFIG_MAX_CPUS; + } + + for (i = 1; i < max_cpus; i++) { + struct device_path cpu_path; + device_t new; + + /* Build the cpu device path */ + cpu_path.type = DEVICE_PATH_APIC; + cpu_path.apic.apic_id = info->cpu->path.apic.apic_id + i; + + /* Allocate the new cpu device structure */ + new = alloc_find_dev(cpu_bus, &cpu_path); + if (new == NULL) { + printk(BIOS_CRIT, "Could not allocte cpu device\n"); + max_cpus--; + } + cpu_devs[i] = new; + } + + return max_cpus; +} + +int setup_ap_init(struct bus *cpu_bus, int *max_cpus, + const void *microcode_patch) +{ + int num_cpus; + int hw_threads; + + /* Default to currently running CPU. */ + num_cpus = allocate_cpu_devices(cpu_bus, &hw_threads); + + /* Load the SIPI vector. */ + if (load_sipi_vector(microcode_patch)) + return -1; + + *max_cpus = num_cpus; + + if (num_cpus < hw_threads) { + printk(BIOS_CRIT, + "ERROR: More HW threads (%d) than support (%d).\n", + hw_threads, num_cpus); + return -1; + } + + return 0; +} + +/* Returns 1 for timeout. 0 on success. */ +static int apic_wait_timeout(int total_delay, int delay_step) +{ + int total = 0; + int timeout = 0; + + while (lapic_read(LAPIC_ICR) & LAPIC_ICR_BUSY) { + udelay(delay_step); + total += delay_step; + if (total >= total_delay) { + timeout = 1; + break; + } + } + + return timeout; +} + +int start_aps(struct bus *cpu_bus, int ap_count) +{ + int sipi_vector; + + if (ap_count == 0) + return 0; + + /* The vector is sent as a 4k aligned address in one byte. */ + sipi_vector = sipi_vector_location >> 12; + + if (sipi_vector > 256) { + printk(BIOS_CRIT, "SIPI vector too large! 0x%08x\n", + sipi_vector); + return -1; + } + + printk(BIOS_DEBUG, "Attempting to start %d APs\n", ap_count); + + if ((lapic_read(LAPIC_ICR) & LAPIC_ICR_BUSY)) { + printk(BIOS_DEBUG, "Waiting for ICR not to be busy..."); + if (apic_wait_timeout(1000 /* 1 ms */, 50)) { + printk(BIOS_DEBUG, "timed out. Aborting.\n"); + return -1; + } else + printk(BIOS_DEBUG, "done.\n"); + } + + /* Send INIT IPI to all but self. */ + lapic_write_around(LAPIC_ICR2, SET_LAPIC_DEST_FIELD(0)); + lapic_write_around(LAPIC_ICR, LAPIC_DEST_ALLBUT | LAPIC_INT_ASSERT | + LAPIC_DM_INIT); + printk(BIOS_DEBUG, "Waiting for INIT to complete..."); + + /* Wait for 10 ms to complete. */ + if (apic_wait_timeout(10000 /* 10 ms */, 100 /* us */)) { + printk(BIOS_DEBUG, "timed out. Bailing. \n"); + return -1; + } else { + printk(BIOS_DEBUG, "done.\n"); + } + + /* Send 1st SIPI */ + if ((lapic_read(LAPIC_ICR) & LAPIC_ICR_BUSY)) { + printk(BIOS_DEBUG, "Waiting for ICR not to be busy..."); + if (apic_wait_timeout(1000 /* 1 ms */, 50)) { + printk(BIOS_DEBUG, "timed out. Aborting.\n"); + return -1; + } else + printk(BIOS_DEBUG, "done.\n"); + } + + lapic_write_around(LAPIC_ICR2, SET_LAPIC_DEST_FIELD(0)); + lapic_write_around(LAPIC_ICR, LAPIC_DEST_ALLBUT | LAPIC_INT_ASSERT | + LAPIC_DM_STARTUP | sipi_vector); + printk(BIOS_DEBUG, "Waiting for 1st SIPI to complete..."); + if (apic_wait_timeout(10000 /* 10 ms */, 50 /* us */)) { + printk(BIOS_DEBUG, "timed out.\n"); + return -1; + } else { + printk(BIOS_DEBUG, "done.\n"); + } + /* Wait for CPUs to check in up to 200 us. */ + wait_for_aps(ap_count, 200 /* us */, 15 /* us */); + + /* Send 2nd SIPI */ + if ((lapic_read(LAPIC_ICR) & LAPIC_ICR_BUSY)) { + printk(BIOS_DEBUG, "Waiting for ICR not to be busy..."); + if (apic_wait_timeout(1000 /* 1 ms */, 50)) { + printk(BIOS_DEBUG, "timed out. Aborting.\n"); + return -1; + } else + printk(BIOS_DEBUG, "done.\n"); + } + + lapic_write_around(LAPIC_ICR2, SET_LAPIC_DEST_FIELD(0)); + lapic_write_around(LAPIC_ICR, LAPIC_DEST_ALLBUT | LAPIC_INT_ASSERT | + LAPIC_DM_STARTUP | sipi_vector); + printk(BIOS_DEBUG, "Waiting for 2nd SIPI to complete..."); + if (apic_wait_timeout(10000 /* 10 ms */, 50 /* us */)) { + printk(BIOS_DEBUG, "timed out.\n"); + return -1; + } else { + printk(BIOS_DEBUG, "done.\n"); + } + + /* Wait for CPUs to check in. */ + if (wait_for_aps(ap_count, 10000 /* 10 ms */, 50 /* us */)) { + printk(BIOS_DEBUG, "Not all APs checked in: %d/%d.\n", + atomic_read(&num_aps), ap_count); + return -1; + } + + return 0; +} + +DECLARE_SPIN_LOCK(smm_relocation_lock); + +void smm_initiate_relocation(void) +{ + spin_lock(&smm_relocation_lock); + + if ((lapic_read(LAPIC_ICR) & LAPIC_ICR_BUSY)) { + printk(BIOS_DEBUG, "Waiting for ICR not to be busy..."); + if (apic_wait_timeout(1000 /* 1 ms */, 50)) { + printk(BIOS_DEBUG, "timed out. Aborting.\n"); + spin_unlock(&smm_relocation_lock); + return; + } else + printk(BIOS_DEBUG, "done.\n"); + } + + lapic_write_around(LAPIC_ICR2, SET_LAPIC_DEST_FIELD(lapicid())); + lapic_write_around(LAPIC_ICR, LAPIC_INT_ASSERT | LAPIC_DM_SMI); + if (apic_wait_timeout(1000 /* 1 ms */, 100 /* us */)) { + printk(BIOS_DEBUG, "SMI Relocation timed out.\n"); + } else + printk(BIOS_DEBUG, "Relocation complete.\n"); + + spin_unlock(&smm_relocation_lock); +} + diff --git a/src/cpu/intel/haswell/sipi_header.c b/src/cpu/intel/haswell/sipi_header.c new file mode 100644 index 0000000000..846a82d7c2 --- /dev/null +++ b/src/cpu/intel/haswell/sipi_header.c @@ -0,0 +1,6 @@ +#include <rmodule.h> + + +extern void *ap_start; + +DEFINE_RMODULE_HEADER(sipi_vector_header, ap_start, RMODULE_TYPE_SIPI_VECTOR); diff --git a/src/cpu/intel/haswell/sipi_vector.S b/src/cpu/intel/haswell/sipi_vector.S new file mode 100644 index 0000000000..664a9ee8ee --- /dev/null +++ b/src/cpu/intel/haswell/sipi_vector.S @@ -0,0 +1,190 @@ +/* + * This file is part of the coreboot project. + * + * Copyright (C) 2013 ChromeOS Authors + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; version 2 of + * the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, + * MA 02110-1301 USA + */ + +/* The SIPI vector is responsible for initializing the APs in the sytem. It + * loads microcode, sets up MSRs, and enables caching before calling into + * C code. */ + +/* These segment selectors need to match the gdt entries in c_start.S. */ +#define CODE_SEG 0x10 +#define DATA_SEG 0x18 + +#define IA32_UPDT_TRIG 0x79 +#define IA32_BIOS_SIGN_ID 0x8b + +.section ".module_parameters", "aw", @progbits +ap_start_params: +gdtaddr: +.word 0 /* limit */ +.long 0 /* table */ +.word 0 /* unused */ +idt_ptr: +.long 0 +stack_top: +.long 0 +stack_size: +.long 0 +microcode_ptr: +.long 0 +msr_table_ptr: +.long 0 +msr_count: +.long 0 +c_handler: +.long 0 +c_handler_arg: +.long 0 +apic_to_cpu_num: +.fill CONFIG_MAX_CPUS,1,0xff + +.text +.code16 +.global ap_start +ap_start: + cli + xorl %eax, %eax + movl %eax, %cr3 /* Invalidate TLB*/ + + /* On hyper threaded cpus, invalidating the cache here is + * very very bad. Don't. + */ + + /* setup the data segment */ + movw %cs, %ax + movw %ax, %ds + + /* The gdtaddr needs to be releative to the data segment in order + * to properly dereference it. The .text section comes first in an + * rmodule so ap_start can be used as a proxy for the load address. */ + movl $(gdtaddr), %ebx + sub $(ap_start), %ebx + + data32 lgdt (%ebx) + + movl %cr0, %eax + andl $0x7FFAFFD1, %eax /* PG,AM,WP,NE,TS,EM,MP = 0 */ + orl $0x60000001, %eax /* CD, NW, PE = 1 */ + movl %eax, %cr0 + + ljmpl $CODE_SEG, $1f +1: + .code32 + movw $DATA_SEG, %ax + movw %ax, %ds + movw %ax, %es + movw %ax, %ss + movw %ax, %fs + movw %ax, %gs + + /* Load the Interrupt descriptor table */ + mov idt_ptr, %ebx + lidt (%ebx) + + /* The CPU number is calculated by reading the initial APIC id. */ + mov $1, %eax + cpuid + /* Default APIC id in ebx[31:24]. Move it to bl. */ + bswap %ebx + mov $(apic_to_cpu_num), %eax + xor %ecx, %ecx + +1: + cmp (%eax, %ecx, 1), %bl + je 1f + inc %ecx + cmp $CONFIG_MAX_CPUS, %ecx + jne 1b + + /* This is bad. No CPU number found. However, the BSP should have setup + * the AP handler properly. Just park the CPU. */ + mov $0x80, %dx + movw $0xdead, %ax + outw %ax, %dx + jmp halt_jump +1: + /* Setup stacks for each CPU. */ + movl stack_size, %eax + mul %ecx + movl stack_top, %edx + subl %eax, %edx + mov %edx, %esp + /* Save cpu number. */ + mov %ecx, %esi + + /* Determine if one should check microcode versions. */ + mov microcode_ptr, %edi + test %edi, %edi + jz 1f /* Bypass if no microde exists. */ + + /* Get the Microcode version. */ + mov $1, %eax + cpuid + mov $IA32_BIOS_SIGN_ID, %ecx + rdmsr + /* If something already loaded skip loading again. */ + test %edx, %edx + jnz 1f + + /* Load new microcode. */ + mov $IA32_UPDT_TRIG, %ecx + xor %edx, %edx + mov %edi, %eax + /* The microcode pointer is passed in pointing to the header. Adjust + * pointer to reflect the payload (header size is 48 bytes). */ + add $48, %eax + pusha + wrmsr + popa + +1: + /* + * Load MSRs. Each entry in the table consists of: + * 0: index, + * 4: value[31:0] + * 8: value[63:32] + */ + mov msr_table_ptr, %edi + mov msr_count, %ebx + test %ebx, %ebx + jz 1f +load_msr: + mov (%edi), %ecx + mov 4(%edi), %eax + mov 8(%edi), %edx + wrmsr + add $12, %edi + dec %ebx + jnz load_msr + +1: + /* Enable caching. */ + mov %cr0, %eax + and $0x9fffffff, %eax /* CD, NW = 0 */ + mov %eax, %cr0 + + /* c_handler(cpu_num, *c_handler_arg) */ + push c_handler_arg + push %esi /* cpu_num */ + mov c_handler, %eax + call *%eax +halt_jump: + hlt + jmp halt_jump diff --git a/src/cpu/intel/haswell/smmrelocate.c b/src/cpu/intel/haswell/smmrelocate.c index 1ccc9bb5b3..4312d7953e 100644 --- a/src/cpu/intel/haswell/smmrelocate.c +++ b/src/cpu/intel/haswell/smmrelocate.c @@ -23,6 +23,7 @@ #include <device/pci.h> #include <cpu/cpu.h> #include <cpu/x86/cache.h> +#include <cpu/x86/lapic.h> #include <cpu/x86/msr.h> #include <cpu/x86/mtrr.h> #include <cpu/x86/smm.h> @@ -297,24 +298,30 @@ static int cpu_smm_setup(void) return 0; } -void smm_init(void) +int smm_initialize(void) { /* Return early if CPU SMM setup failed. */ if (cpu_smm_setup()) - return; + return -1; southbridge_smm_init(); - /* Initiate first SMI to kick off SMM-context relocation. Note: this - * SMI being triggered here queues up an SMI in the APs which are in - * wait-for-SIPI state. Once an AP gets an SIPI it will service the SMI - * at the SMM_DEFAULT_BASE before jumping to startup vector. */ - southbridge_trigger_smi(); - - printk(BIOS_DEBUG, "Relocation complete.\n"); + /* Run the relocation handler. */ + smm_initiate_relocation(); /* Lock down the SMRAM space. */ smm_lock(); + + return 0; +} + +void smm_init(void) +{ + /* smm_init() is normally called from initialize_cpus() in + * lapic_cpu_init.c. However, that path is no longer used. Don't reuse + * the function name because that would cause confusion. + * The smm_initialize() function above is used to setup SMM at the + * appropriate time. */ } void smm_lock(void) |