diff options
-rw-r--r-- | src/cpu/x86/Kconfig | 7 | ||||
-rw-r--r-- | src/cpu/x86/Makefile.inc | 25 | ||||
-rw-r--r-- | src/cpu/x86/mp_init.c | 586 | ||||
-rw-r--r-- | src/cpu/x86/sipi_header.c | 6 | ||||
-rw-r--r-- | src/cpu/x86/sipi_vector.S | 192 | ||||
-rw-r--r-- | src/include/cpu/x86/mp.h | 120 |
6 files changed, 936 insertions, 0 deletions
diff --git a/src/cpu/x86/Kconfig b/src/cpu/x86/Kconfig index c64a8e477b..21282c321a 100644 --- a/src/cpu/x86/Kconfig +++ b/src/cpu/x86/Kconfig @@ -115,3 +115,10 @@ config X86_AMD_FIXED_MTRRS help This option informs the MTRR code to use the RdMem and WrMem fields in the fixed MTRR MSRs. + +config PARALLEL_MP + def_bool n + help + This option uses common MP infrastructure for bringing up APs + in parallel. It additionally provides a more flexible mechanism + for sequencing the steps of bringing up the APs. diff --git a/src/cpu/x86/Makefile.inc b/src/cpu/x86/Makefile.inc index ca755dec5d..d5bc2fd219 100644 --- a/src/cpu/x86/Makefile.inc +++ b/src/cpu/x86/Makefile.inc @@ -1,2 +1,27 @@ romstage-$(CONFIG_EARLY_CBMEM_INIT) += car.c romstage-$(CONFIG_HAVE_ACPI_RESUME) += car.c + +subdirs-$(CONFIG_PARALLEL_MP) += name +ramstage-$(CONFIG_PARALLEL_MP) += mp_init.c + +SIPI_ELF=$(obj)/cpu/x86/sipi_vector.elf +SIPI_BIN=$(SIPI_ELF:.elf=) +SIPI_DOTO=$(SIPI_ELF:.elf=.o) + +ifeq ($(CONFIG_PARALLEL_MP),y) +ramstage-srcs += $(SIPI_BIN) +endif +rmodules-$(CONFIG_PARALLEL_MP) += sipi_vector.S +rmodules-$(CONFIG_PARALLEL_MP) += sipi_header.c + +$(SIPI_DOTO): $(dir $(SIPI_ELF))sipi_vector.rmodules.o $(dir $(SIPI_ELF))sipi_header.rmodules.o + $(CC) $(LDFLAGS) -nostdlib -r -o $@ $^ + +$(eval $(call rmodule_link,$(SIPI_ELF), $(SIPI_ELF:.elf=.o), 0)) + +$(SIPI_BIN): $(SIPI_ELF) + $(OBJCOPY) -O binary $< $@ + +$(SIPI_BIN).ramstage.o: $(SIPI_BIN) + @printf " OBJCOPY $(subst $(obj)/,,$(@))\n" + cd $(dir $@); $(OBJCOPY) -I binary $(notdir $<) -O elf32-i386 -B i386 $(notdir $@) diff --git a/src/cpu/x86/mp_init.c b/src/cpu/x86/mp_init.c new file mode 100644 index 0000000000..242d656144 --- /dev/null +++ b/src/cpu/x86/mp_init.c @@ -0,0 +1,586 @@ +/* + * This file is part of the coreboot project. + * + * Copyright (C) 2013 Google Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; version 2 of + * the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, + * MA 02110-1301 USA + */ + +#include <console/console.h> +#include <stdint.h> +#include <rmodule.h> +#include <arch/cpu.h> +#include <cpu/cpu.h> +#include <cpu/intel/microcode.h> +#include <cpu/x86/cache.h> +#include <cpu/x86/lapic.h> +#include <cpu/x86/name.h> +#include <cpu/x86/msr.h> +#include <cpu/x86/mtrr.h> +#include <cpu/x86/smm.h> +#include <cpu/x86/mp.h> +#include <delay.h> +#include <device/device.h> +#include <device/path.h> +#include <lib.h> +#include <smp/atomic.h> +#include <smp/spinlock.h> +#include <thread.h> + +#define MAX_APIC_IDS 256 +/* This needs to match the layout in the .module_parametrs section. */ +struct sipi_params { + uint16_t gdtlimit; + uint32_t gdt; + uint16_t unused; + uint32_t idt_ptr; + uint32_t stack_top; + uint32_t stack_size; + uint32_t microcode_lock; /* 0xffffffff means parallel loading. */ + uint32_t microcode_ptr; + uint32_t msr_table_ptr; + uint32_t msr_count; + uint32_t c_handler; + atomic_t ap_count; +} __attribute__((packed)); + +/* This also needs to match the assembly code for saved MSR encoding. */ +struct saved_msr { + uint32_t index; + uint32_t lo; + uint32_t hi; +} __attribute__((packed)); + + +/* The sipi vector rmodule is included in the ramstage using 'objdump -B'. */ +extern char _binary_sipi_vector_start[]; +/* These symbols are defined in c_start.S. */ +extern char gdt[]; +extern char gdt_end[]; +extern char idtarg[]; + +/* The SIPI vector is loaded at the SMM_DEFAULT_BASE. The reason is at the + * memory range is already reserved so the OS cannot use it. That region is + * free to use for AP bringup before SMM is initialized. */ +static const uint32_t sipi_vector_location = SMM_DEFAULT_BASE; +static const int sipi_vector_location_size = SMM_DEFAULT_SIZE; + +struct mp_flight_plan { + int num_records; + struct mp_flight_record *records; +}; + +static struct mp_flight_plan mp_info; + +struct cpu_map { + device_t dev; + int apic_id; +}; + +/* Keep track of apic and device structure for each cpu. */ +static struct cpu_map cpus[CONFIG_MAX_CPUS]; + +static inline void barrier_wait(atomic_t *b) +{ + while (atomic_read(b) == 0) { + asm ("pause"); + } + mfence(); +} + +static inline void release_barrier(atomic_t *b) +{ + mfence(); + atomic_set(b, 1); +} + +/* Returns 1 if timeout waiting for APs. 0 if target aps found. */ +static int wait_for_aps(atomic_t *val, int target, int total_delay, + int delay_step) +{ + int timeout = 0; + int delayed = 0; + while (atomic_read(val) != target) { + udelay(delay_step); + delayed += delay_step; + if (delayed >= total_delay) { + timeout = 1; + break; + } + } + + return timeout; +} + +static void ap_do_flight_plan(void) +{ + int i; + + for (i = 0; i < mp_info.num_records; i++) { + struct mp_flight_record *rec = &mp_info.records[i]; + + atomic_inc(&rec->cpus_entered); + barrier_wait(&rec->barrier); + + if (rec->ap_call != NULL) { + rec->ap_call(rec->ap_arg); + } + } +} + +/* By the time APs call ap_init() caching has been setup, and microcode has + * been loaded. */ +static void asmlinkage ap_init(unsigned int cpu) +{ + struct cpu_info *info; + int apic_id; + + /* Ensure the local apic is enabled */ + enable_lapic(); + + info = cpu_info(); + info->index = cpu; + info->cpu = cpus[cpu].dev; + thread_init_cpu_info_non_bsp(info); + + apic_id = lapicid(); + info->cpu->path.apic.apic_id = apic_id; + cpus[cpu].apic_id = apic_id; + + printk(BIOS_INFO, "AP: slot %d apic_id %x.\n", cpu, apic_id); + + /* Walk the flight plan */ + ap_do_flight_plan(); + + /* Park the AP. */ + stop_this_cpu(); +} + +static void setup_default_sipi_vector_params(struct sipi_params *sp) +{ + sp->gdt = (uint32_t)&gdt; + sp->gdtlimit = (uint32_t)&gdt_end - (u32)&gdt - 1; + sp->idt_ptr = (uint32_t)&idtarg; + sp->stack_size = CONFIG_STACK_SIZE; + sp->stack_top = (uint32_t)&_estack; + /* Adjust the stack top to take into account cpu_info. */ + sp->stack_top -= sizeof(struct cpu_info); +} + +#define NUM_FIXED_MTRRS 11 +static const unsigned int fixed_mtrrs[NUM_FIXED_MTRRS] = { + MTRRfix64K_00000_MSR, MTRRfix16K_80000_MSR, MTRRfix16K_A0000_MSR, + MTRRfix4K_C0000_MSR, MTRRfix4K_C8000_MSR, MTRRfix4K_D0000_MSR, + MTRRfix4K_D8000_MSR, MTRRfix4K_E0000_MSR, MTRRfix4K_E8000_MSR, + MTRRfix4K_F0000_MSR, MTRRfix4K_F8000_MSR, +}; + +static inline struct saved_msr *save_msr(int index, struct saved_msr *entry) +{ + msr_t msr; + + msr = rdmsr(index); + entry->index = index; + entry->lo = msr.lo; + entry->hi = msr.hi; + + /* Return the next entry. */ + entry++; + return entry; +} + +static int save_bsp_msrs(char *start, int size) +{ + int msr_count; + int num_var_mtrrs; + struct saved_msr *msr_entry; + int i; + msr_t msr; + + /* Determine number of MTRRs need to be saved. */ + msr = rdmsr(MTRRcap_MSR); + num_var_mtrrs = msr.lo & 0xff; + + /* 2 * num_var_mtrrs for base and mask. +1 for IA32_MTRR_DEF_TYPE. */ + msr_count = 2 * num_var_mtrrs + NUM_FIXED_MTRRS + 1; + + if ((msr_count * sizeof(struct saved_msr)) > size) { + printk(BIOS_CRIT, "Cannot mirror all %d msrs.\n", msr_count); + return -1; + } + + msr_entry = (void *)start; + for (i = 0; i < NUM_FIXED_MTRRS; i++) { + msr_entry = save_msr(fixed_mtrrs[i], msr_entry); + } + + for (i = 0; i < num_var_mtrrs; i++) { + msr_entry = save_msr(MTRRphysBase_MSR(i), msr_entry); + msr_entry = save_msr(MTRRphysMask_MSR(i), msr_entry); + } + + msr_entry = save_msr(MTRRdefType_MSR, msr_entry); + + return msr_count; +} + +static atomic_t *load_sipi_vector(struct mp_params *mp_params) +{ + struct rmodule sipi_mod; + int module_size; + int num_msrs; + struct sipi_params *sp; + char *mod_loc = (void *)sipi_vector_location; + const int loc_size = sipi_vector_location_size; + atomic_t *ap_count = NULL; + + if (rmodule_parse(&_binary_sipi_vector_start, &sipi_mod)) { + printk(BIOS_CRIT, "Unable to parse sipi module.\n"); + return ap_count; + } + + if (rmodule_entry_offset(&sipi_mod) != 0) { + printk(BIOS_CRIT, "SIPI module entry offset is not 0!\n"); + return ap_count; + } + + if (rmodule_load_alignment(&sipi_mod) != 4096) { + printk(BIOS_CRIT, "SIPI module load alignment(%d) != 4096.\n", + rmodule_load_alignment(&sipi_mod)); + return ap_count; + } + + module_size = rmodule_memory_size(&sipi_mod); + + /* Align to 4 bytes. */ + module_size = ALIGN(module_size, 4); + + if (module_size > loc_size) { + printk(BIOS_CRIT, "SIPI module size (%d) > region size (%d).\n", + module_size, loc_size); + return ap_count; + } + + num_msrs = save_bsp_msrs(&mod_loc[module_size], loc_size - module_size); + + if (num_msrs < 0) { + printk(BIOS_CRIT, "Error mirroring BSP's msrs.\n"); + return ap_count; + } + + if (rmodule_load(mod_loc, &sipi_mod)) { + printk(BIOS_CRIT, "Unable to load SIPI module.\n"); + return ap_count; + } + + sp = rmodule_parameters(&sipi_mod); + + if (sp == NULL) { + printk(BIOS_CRIT, "SIPI module has no parameters.\n"); + return ap_count; + } + + setup_default_sipi_vector_params(sp); + /* Setup MSR table. */ + sp->msr_table_ptr = (uint32_t)&mod_loc[module_size]; + sp->msr_count = num_msrs; + /* Provide pointer to microcode patch. */ + sp->microcode_ptr = (uint32_t)mp_params->microcode_pointer; + /* Pass on abiility to load microcode in parallel. */ + if (mp_params->parallel_microcode_load) { + sp->microcode_lock = 0; + } else { + sp->microcode_lock = ~0; + } + sp->c_handler = (uint32_t)&ap_init; + ap_count = &sp->ap_count; + atomic_set(ap_count, 0); + + return ap_count; +} + +static int allocate_cpu_devices(struct bus *cpu_bus, struct mp_params *p) +{ + int i; + int max_cpus; + struct cpu_info *info; + + max_cpus = p->num_cpus; + if (max_cpus > CONFIG_MAX_CPUS) { + printk(BIOS_CRIT, "CPU count(%d) exceeds CONFIG_MAX_CPUS(%d)\n", + max_cpus, CONFIG_MAX_CPUS); + max_cpus = CONFIG_MAX_CPUS; + } + + info = cpu_info(); + for (i = 1; i < max_cpus; i++) { + struct device_path cpu_path; + device_t new; + int apic_id; + + /* Build the cpu device path */ + cpu_path.type = DEVICE_PATH_APIC; + + /* Assuming linear APIC space allocation. */ + apic_id = info->cpu->path.apic.apic_id + i; + if (p->adjust_apic_id != NULL) { + apic_id = p->adjust_apic_id(i, apic_id); + } + cpu_path.apic.apic_id = apic_id; + + /* Allocate the new cpu device structure */ + new = alloc_find_dev(cpu_bus, &cpu_path); + if (new == NULL) { + printk(BIOS_CRIT, "Could not allocte cpu device\n"); + max_cpus--; + } + cpus[i].dev = new; + } + + return max_cpus; +} + +/* Returns 1 for timeout. 0 on success. */ +static int apic_wait_timeout(int total_delay, int delay_step) +{ + int total = 0; + int timeout = 0; + + while (lapic_read(LAPIC_ICR) & LAPIC_ICR_BUSY) { + udelay(delay_step); + total += delay_step; + if (total >= total_delay) { + timeout = 1; + break; + } + } + + return timeout; +} + +static int start_aps(struct bus *cpu_bus, int ap_count, atomic_t *num_aps) +{ + int sipi_vector; + /* Max location is 4KiB below 1MiB */ + const int max_vector_loc = ((1 << 20) - (1 << 12)) >> 12; + + if (ap_count == 0) + return 0; + + /* The vector is sent as a 4k aligned address in one byte. */ + sipi_vector = sipi_vector_location >> 12; + + if (sipi_vector > max_vector_loc) { + printk(BIOS_CRIT, "SIPI vector too large! 0x%08x\n", + sipi_vector); + return -1; + } + + printk(BIOS_DEBUG, "Attempting to start %d APs\n", ap_count); + + if ((lapic_read(LAPIC_ICR) & LAPIC_ICR_BUSY)) { + printk(BIOS_DEBUG, "Waiting for ICR not to be busy..."); + if (apic_wait_timeout(1000 /* 1 ms */, 50)) { + printk(BIOS_DEBUG, "timed out. Aborting.\n"); + return -1; + } else + printk(BIOS_DEBUG, "done.\n"); + } + + /* Send INIT IPI to all but self. */ + lapic_write_around(LAPIC_ICR2, SET_LAPIC_DEST_FIELD(0)); + lapic_write_around(LAPIC_ICR, LAPIC_DEST_ALLBUT | LAPIC_INT_ASSERT | + LAPIC_DM_INIT); + printk(BIOS_DEBUG, "Waiting for 10ms after sending INIT.\n"); + mdelay(10); + + /* Send 1st SIPI */ + if ((lapic_read(LAPIC_ICR) & LAPIC_ICR_BUSY)) { + printk(BIOS_DEBUG, "Waiting for ICR not to be busy..."); + if (apic_wait_timeout(1000 /* 1 ms */, 50)) { + printk(BIOS_DEBUG, "timed out. Aborting.\n"); + return -1; + } else + printk(BIOS_DEBUG, "done.\n"); + } + + lapic_write_around(LAPIC_ICR2, SET_LAPIC_DEST_FIELD(0)); + lapic_write_around(LAPIC_ICR, LAPIC_DEST_ALLBUT | LAPIC_INT_ASSERT | + LAPIC_DM_STARTUP | sipi_vector); + printk(BIOS_DEBUG, "Waiting for 1st SIPI to complete..."); + if (apic_wait_timeout(10000 /* 10 ms */, 50 /* us */)) { + printk(BIOS_DEBUG, "timed out.\n"); + return -1; + } else { + printk(BIOS_DEBUG, "done.\n"); + } + + /* Wait for CPUs to check in up to 200 us. */ + wait_for_aps(num_aps, ap_count, 200 /* us */, 15 /* us */); + + /* Send 2nd SIPI */ + if ((lapic_read(LAPIC_ICR) & LAPIC_ICR_BUSY)) { + printk(BIOS_DEBUG, "Waiting for ICR not to be busy..."); + if (apic_wait_timeout(1000 /* 1 ms */, 50)) { + printk(BIOS_DEBUG, "timed out. Aborting.\n"); + return -1; + } else + printk(BIOS_DEBUG, "done.\n"); + } + + lapic_write_around(LAPIC_ICR2, SET_LAPIC_DEST_FIELD(0)); + lapic_write_around(LAPIC_ICR, LAPIC_DEST_ALLBUT | LAPIC_INT_ASSERT | + LAPIC_DM_STARTUP | sipi_vector); + printk(BIOS_DEBUG, "Waiting for 2nd SIPI to complete..."); + if (apic_wait_timeout(10000 /* 10 ms */, 50 /* us */)) { + printk(BIOS_DEBUG, "timed out.\n"); + return -1; + } else { + printk(BIOS_DEBUG, "done.\n"); + } + + /* Wait for CPUs to check in. */ + if (wait_for_aps(num_aps, ap_count, 10000 /* 10 ms */, 50 /* us */)) { + printk(BIOS_DEBUG, "Not all APs checked in: %d/%d.\n", + atomic_read(num_aps), ap_count); + return -1; + } + + return 0; +} + +static int bsp_do_flight_plan(struct mp_params *mp_params) +{ + int i; + int ret = 0; + const int timeout_us = 100000; + const int step_us = 100; + int num_aps = mp_params->num_cpus - 1; + + for (i = 0; i < mp_params->num_records; i++) { + struct mp_flight_record *rec = &mp_params->flight_plan[i]; + + /* Wait for APs if the record is not released. */ + if (atomic_read(&rec->barrier) == 0) { + /* Wait for the APs to check in. */ + if (wait_for_aps(&rec->cpus_entered, num_aps, + timeout_us, step_us)) { + printk(BIOS_ERR, "MP record %d timeout.\n", i); + ret = -1; + } + } + + if (rec->bsp_call != NULL) { + rec->bsp_call(rec->bsp_arg); + } + + release_barrier(&rec->barrier); + } + return ret; +} + +static void init_bsp(struct bus *cpu_bus) +{ + struct device_path cpu_path; + struct cpu_info *info; + char processor_name[49]; + + /* Print processor name */ + fill_processor_name(processor_name); + printk(BIOS_INFO, "CPU: %s.\n", processor_name); + + /* Ensure the local apic is enabled */ + enable_lapic(); + + /* Set the device path of the boot cpu. */ + cpu_path.type = DEVICE_PATH_APIC; + cpu_path.apic.apic_id = lapicid(); + + /* Find the device structure for the boot cpu. */ + info = cpu_info(); + info->cpu = alloc_find_dev(cpu_bus, &cpu_path); + + if (info->index != 0) + printk(BIOS_CRIT, "BSP index(%d) != 0!\n", info->index); + + /* Track BSP in cpu_map structures. */ + cpus[info->index].dev = info->cpu; + cpus[info->index].apic_id = cpu_path.apic.apic_id; +} + +int mp_init(struct bus *cpu_bus, struct mp_params *p) +{ + int num_cpus; + int num_aps; + atomic_t *ap_count; + + init_bsp(cpu_bus); + + if (p == NULL || p->flight_plan == NULL || p->num_records < 1) { + printk(BIOS_CRIT, "Invalid MP parameters\n"); + return -1; + } + + /* Default to currently running CPU. */ + num_cpus = allocate_cpu_devices(cpu_bus, p); + + if (num_cpus < p->num_cpus) { + printk(BIOS_CRIT, + "ERROR: More cpus requested (%d) than supported (%d).\n", + p->num_cpus, num_cpus); + return -1; + } + + /* Copy needed parameters so that APs have a reference to the plan. */ + mp_info.num_records = p->num_records; + mp_info.records = p->flight_plan; + + /* Load the SIPI vector. */ + ap_count = load_sipi_vector(p); + if (ap_count == NULL) + return -1; + + /* Make sure SIPI data hits RAM so the APs that come up will see + * the startup code even if the caches are disabled. */ + wbinvd(); + + /* Start the APs providing number of APs and the cpus_entered field. */ + num_aps = p->num_cpus - 1; + if (start_aps(cpu_bus, num_aps, ap_count) < 0) { + mdelay(1000); + printk(BIOS_DEBUG, "%d/%d eventually checked in?\n", + atomic_read(ap_count), num_aps); + return -1; + } + + /* Walk the flight plan for the BSP. */ + return bsp_do_flight_plan(p); +} + +void mp_initialize_cpu(void *unused) +{ + /* Call back into driver infrastructure for the AP initialization. */ + struct cpu_info *info = cpu_info(); + cpu_initialize(info->index); +} + +int mp_get_apic_id(int cpu_slot) +{ + if (cpu_slot >= CONFIG_MAX_CPUS || cpu_slot < 0) + return -1; + + return cpus[cpu_slot].apic_id; +} diff --git a/src/cpu/x86/sipi_header.c b/src/cpu/x86/sipi_header.c new file mode 100644 index 0000000000..846a82d7c2 --- /dev/null +++ b/src/cpu/x86/sipi_header.c @@ -0,0 +1,6 @@ +#include <rmodule.h> + + +extern void *ap_start; + +DEFINE_RMODULE_HEADER(sipi_vector_header, ap_start, RMODULE_TYPE_SIPI_VECTOR); diff --git a/src/cpu/x86/sipi_vector.S b/src/cpu/x86/sipi_vector.S new file mode 100644 index 0000000000..52b12d0314 --- /dev/null +++ b/src/cpu/x86/sipi_vector.S @@ -0,0 +1,192 @@ +/* + * This file is part of the coreboot project. + * + * Copyright (C) 2013 Google Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; version 2 of + * the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, + * MA 02110-1301 USA + */ + +/* The SIPI vector is responsible for initializing the APs in the sytem. It + * loads microcode, sets up MSRs, and enables caching before calling into + * C code. */ + +/* These segment selectors need to match the gdt entries in c_start.S. */ +#define CODE_SEG 0x10 +#define DATA_SEG 0x18 + +#define IA32_UPDT_TRIG 0x79 +#define IA32_BIOS_SIGN_ID 0x8b + +.section ".module_parameters", "aw", @progbits +ap_start_params: +gdtaddr: +.word 0 /* limit */ +.long 0 /* table */ +.word 0 /* unused */ +idt_ptr: +.long 0 +stack_top: +.long 0 +stack_size: +.long 0 +microcode_lock: +.long 0 +microcode_ptr: +.long 0 +msr_table_ptr: +.long 0 +msr_count: +.long 0 +c_handler: +.long 0 +ap_count: +.long 0 + +.text +.code16 +.global ap_start +ap_start: + cli + xorl %eax, %eax + movl %eax, %cr3 /* Invalidate TLB*/ + + /* On hyper threaded cpus, invalidating the cache here is + * very very bad. Don't. + */ + + /* setup the data segment */ + movw %cs, %ax + movw %ax, %ds + + /* The gdtaddr needs to be releative to the data segment in order + * to properly dereference it. The .text section comes first in an + * rmodule so ap_start can be used as a proxy for the load address. */ + movl $(gdtaddr), %ebx + sub $(ap_start), %ebx + + data32 lgdt (%ebx) + + movl %cr0, %eax + andl $0x7FFAFFD1, %eax /* PG,AM,WP,NE,TS,EM,MP = 0 */ + orl $0x60000001, %eax /* CD, NW, PE = 1 */ + movl %eax, %cr0 + + ljmpl $CODE_SEG, $1f +1: + .code32 + movw $DATA_SEG, %ax + movw %ax, %ds + movw %ax, %es + movw %ax, %ss + movw %ax, %fs + movw %ax, %gs + + /* Load the Interrupt descriptor table */ + mov idt_ptr, %ebx + lidt (%ebx) + + /* Obtain cpu number. */ + movl ap_count, %eax +1: + movl %eax, %ecx + inc %ecx + lock cmpxchg %ecx, ap_count + jnz 1b + + /* Setup stacks for each CPU. */ + movl stack_size, %eax + mul %ecx + movl stack_top, %edx + subl %eax, %edx + mov %edx, %esp + /* Save cpu number. */ + mov %ecx, %esi + + /* Determine if one should check microcode versions. */ + mov microcode_ptr, %edi + test %edi, %edi + jz microcode_done /* Bypass if no microde exists. */ + + /* Get the Microcode version. */ + mov $1, %eax + cpuid + mov $IA32_BIOS_SIGN_ID, %ecx + rdmsr + /* If something already loaded skip loading again. */ + test %edx, %edx + jnz microcode_done + + /* Determine if parallel microcode loading is allowed. */ + cmp $0xffffffff, microcode_lock + je load_microcode + + /* Protect microcode loading. */ +lock_microcode: + lock bts $0, microcode_lock + jc lock_microcode + +load_microcode: + /* Load new microcode. */ + mov $IA32_UPDT_TRIG, %ecx + xor %edx, %edx + mov %edi, %eax + /* The microcode pointer is passed in pointing to the header. Adjust + * pointer to reflect the payload (header size is 48 bytes). */ + add $48, %eax + pusha + wrmsr + popa + + /* Unconditionally unlock microcode loading. */ + cmp $0xffffffff, microcode_lock + je microcode_done + + xor %eax, %eax + mov %eax, microcode_lock + +microcode_done: + /* + * Load MSRs. Each entry in the table consists of: + * 0: index, + * 4: value[31:0] + * 8: value[63:32] + */ + mov msr_table_ptr, %edi + mov msr_count, %ebx + test %ebx, %ebx + jz 1f +load_msr: + mov (%edi), %ecx + mov 4(%edi), %eax + mov 8(%edi), %edx + wrmsr + add $12, %edi + dec %ebx + jnz load_msr + +1: + /* Enable caching. */ + mov %cr0, %eax + and $0x9fffffff, %eax /* CD, NW = 0 */ + mov %eax, %cr0 + + /* c_handler(cpu_num) */ + push %esi /* cpu_num */ + mov c_handler, %eax + call *%eax +halt_jump: + hlt + jmp halt_jump diff --git a/src/include/cpu/x86/mp.h b/src/include/cpu/x86/mp.h new file mode 100644 index 0000000000..5112848e94 --- /dev/null +++ b/src/include/cpu/x86/mp.h @@ -0,0 +1,120 @@ +/* + * This file is part of the coreboot project. + * + * Copyright (C) 2013 Google Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _X86_MP_H_ +#define _X86_MP_H_ + +#include <arch/smp/atomic.h> + +#define CACHELINE_SIZE 64 + +struct cpu_info; +struct bus; + +static inline void mfence(void) +{ + __asm__ __volatile__("mfence\t\n": : :"memory"); +} + +typedef void (*mp_callback_t)(void *arg); + +/* + * A mp_flight_record details a sequence of calls for the APs to perform + * along with the BSP to coordinate sequencing. Each flight record either + * provides a barrier for each AP before calling the callback or the APs + * are allowed to perform the callback without waiting. Regardless, each + * record has the cpus_entered field incremented for each record. When + * the BSP observes that the cpus_entered matches the number of APs + * the bsp_call is called with bsp_arg and upon returning releases the + * barrier allowing the APs to make further progress. + * + * Note that ap_call() and bsp_call() can be NULL. In the NULL case the + * callback will just not be called. + */ +struct mp_flight_record { + atomic_t barrier; + atomic_t cpus_entered; + mp_callback_t ap_call; + void *ap_arg; + mp_callback_t bsp_call; + void *bsp_arg; +} __attribute__((aligned(CACHELINE_SIZE))); + +#define _MP_FLIGHT_RECORD(barrier_, ap_func_, ap_arg_, bsp_func_, bsp_arg_) \ + { \ + .barrier = ATOMIC_INIT(barrier_), \ + .cpus_entered = ATOMIC_INIT(0), \ + .ap_call = ap_func_, \ + .ap_arg = ap_arg_, \ + .bsp_call = bsp_func_, \ + .bsp_arg = bsp_arg_, \ + } + +#define MP_FR_BLOCK_APS(ap_func_, ap_arg_, bsp_func_, bsp_arg_) \ + _MP_FLIGHT_RECORD(0, ap_func_, ap_arg_, bsp_func_, bsp_arg_) + +#define MP_FR_NOBLOCK_APS(ap_func_, ap_arg_, bsp_func_, bsp_arg_) \ + _MP_FLIGHT_RECORD(1, ap_func_, ap_arg_, bsp_func_, bsp_arg_) + +/* The mp_params structure provides the arguments to the mp subsystem + * for bringing up APs. */ +struct mp_params { + int num_cpus; /* Total cpus include BSP */ + int parallel_microcode_load; + const void *microcode_pointer; + /* adjust_apic_id() is called for every potential apic id in the + * system up from 0 to CONFIG_MAX_CPUS. Return adjusted apic_id. */ + int (*adjust_apic_id)(int index, int apic_id); + /* Flight plan for APs and BSP. */ + struct mp_flight_record *flight_plan; + int num_records; +}; + +/* + * mp_init() will set up the SIPI vector and bring up the APs according to + * mp_params. Each flight record will be executed according to the plan. Note + * that the MP infrastructure uses SMM default area without saving it. It's + * up to the chipset or mainboard to either e820 reserve this area or save this + * region prior to calling mp_init() and restoring it after mp_init returns. + * + * At the time mp_init() is called the MTRR MSRs are mirrored into APs then + * caching is enabled before running the flight plan. + * + * The MP initialization has the following properties: + * 1. APs are brought up in parallel. + * 2. The ordering of coreboot cpu number and APIC ids is not deterministic. + * Therefore, one cannot rely on this property or the order of devices in + * the device tree unless the chipset or mainboard know the APIC ids + * a priori. + * + * mp_init() returns < 0 on error, 0 on success. + */ +int mp_init(struct bus *cpu_bus, struct mp_params *params); + +/* + * Useful functions to use in flight records when sequencing APs. + */ + +/* Calls cpu_initialize(info->index) which calls the coreboot CPU drivers. */ +void mp_initialize_cpu(void *unused); + +/* Returns apic id for coreboot cpu number or < 0 on failure. */ +int mp_get_apic_id(int cpu_slot); + +#endif /* _X86_MP_H_ */ |