aboutsummaryrefslogtreecommitdiff
path: root/src/cpu/x86
diff options
context:
space:
mode:
Diffstat (limited to 'src/cpu/x86')
-rw-r--r--src/cpu/x86/16bit/entry16.inc124
-rw-r--r--src/cpu/x86/16bit/entry16.lds2
-rw-r--r--src/cpu/x86/16bit/reset16.inc21
-rw-r--r--src/cpu/x86/16bit/reset16.lds14
-rw-r--r--src/cpu/x86/32bit/entry32.inc61
-rw-r--r--src/cpu/x86/32bit/entry32.lds14
-rw-r--r--src/cpu/x86/32bit/reset32.inc10
-rw-r--r--src/cpu/x86/32bit/reset32.lds14
-rw-r--r--src/cpu/x86/cache/Config.lb1
-rw-r--r--src/cpu/x86/cache/cache.c10
-rw-r--r--src/cpu/x86/fpu/Config.lb0
-rw-r--r--src/cpu/x86/fpu/enable_fpu.inc9
-rw-r--r--src/cpu/x86/lapic/Config.lb3
-rw-r--r--src/cpu/x86/lapic/boot_cpu.c10
-rw-r--r--src/cpu/x86/lapic/lapic.c72
-rw-r--r--src/cpu/x86/lapic/lapic_cpu_init.c316
-rw-r--r--src/cpu/x86/lapic/secondary.S53
-rw-r--r--src/cpu/x86/mmx/Config.lb0
-rw-r--r--src/cpu/x86/mmx/disable_mmx.inc2
-rw-r--r--src/cpu/x86/mmx/enable_mmx.inc6
-rw-r--r--src/cpu/x86/mtrr/Config.lb1
-rw-r--r--src/cpu/x86/mtrr/earlymtrr.c123
-rw-r--r--src/cpu/x86/mtrr/mtrr.c378
-rw-r--r--src/cpu/x86/pae/Config.lb1
-rw-r--r--src/cpu/x86/pae/pgtbl.c94
-rw-r--r--src/cpu/x86/sse/Config.lb0
-rw-r--r--src/cpu/x86/sse/disable_sse.inc18
-rw-r--r--src/cpu/x86/sse/enable_sse.inc14
-rw-r--r--src/cpu/x86/tsc/Config.lb5
-rw-r--r--src/cpu/x86/tsc/delay_tsc.c165
30 files changed, 1541 insertions, 0 deletions
diff --git a/src/cpu/x86/16bit/entry16.inc b/src/cpu/x86/16bit/entry16.inc
new file mode 100644
index 0000000000..61726d8ad9
--- /dev/null
+++ b/src/cpu/x86/16bit/entry16.inc
@@ -0,0 +1,124 @@
+/*
+This software and ancillary information (herein called SOFTWARE )
+called LinuxBIOS is made available under the terms described
+here. The SOFTWARE has been approved for release with associated
+LA-CC Number 00-34 . Unless otherwise indicated, this SOFTWARE has
+been authored by an employee or employees of the University of
+California, operator of the Los Alamos National Laboratory under
+Contract No. W-7405-ENG-36 with the U.S. Department of Energy. The
+U.S. Government has rights to use, reproduce, and distribute this
+SOFTWARE. The public may copy, distribute, prepare derivative works
+and publicly display this SOFTWARE without charge, provided that this
+Notice and any statement of authorship are reproduced on all copies.
+Neither the Government nor the University makes any warranty, express
+or implied, or assumes any liability or responsibility for the use of
+this SOFTWARE. If SOFTWARE is modified to produce derivative works,
+such modified SOFTWARE should be clearly marked, so as not to confuse
+it with the version available from LANL.
+ */
+/* Copyright 2000, Ron Minnich, Advanced Computing Lab, LANL
+ * rminnich@lanl.gov
+ */
+
+
+/** Start code to put an i386 or later processor into 32-bit
+ * protected mode.
+ */
+
+/* .section ".rom.text" */
+#include <arch/rom_segs.h>
+.code16
+.globl _start
+.type _start, @function
+
+_start:
+ cli
+ /* Save the BIST result */
+ movl %eax, %ebp
+
+/* thanks to kmliu@sis.tw.com for this TBL fix ... */
+/**/
+/* IMMEDIATELY invalidate the translation lookaside buffer before executing*/
+/* any further code. Even though paging is disabled we could still get*/
+/*false address translations due to the TLB if we didn't invalidate it.*/
+/**/
+ xorl %eax, %eax
+ movl %eax, %cr3 /* Invalidate TLB*/
+
+
+ /* Invalidating the cache here seems to be a bad idea on
+ * modern processors. Don't.
+ * If we are hyperthreaded or we have multiple cores it is bad,
+ * for SMP startup. On Opterons it causes a 5 second delay.
+ * Invalidating the cache was pure paranoia in any event.
+ * If you cpu needs it you can write a cpu dependent version of
+ * entry16.inc.
+ */
+
+ /* Note: gas handles memory addresses in 16 bit code very poorly.
+ * In particular it doesn't appear to have a directive allowing you
+ * associate a section or even an absolute offset with a segment register.
+ *
+ * This means that anything except cs:ip relative offsets are
+ * a real pain in 16 bit mode. And explains why it is almost
+ * imposible to get gas to do lgdt correctly.
+ *
+ * One way to work around this is to have the linker do the
+ * math instead of the assembler. This solves the very
+ * pratical problem of being able to write code that can
+ * be relocated.
+ *
+ * An lgdt call before we have memory enabled cannot be
+ * position independent, as we cannot execute a call
+ * instruction to get our current instruction pointer.
+ * So while this code is relocateable it isn't arbitrarily
+ * relocatable.
+ *
+ * The criteria for relocation have been relaxed to their
+ * utmost, so that we can use the same code for both
+ * our initial entry point and startup of the second cpu.
+ * The code assumes when executing at _start that:
+ * (((cs & 0xfff) == 0) and (ip == _start & 0xffff))
+ * or
+ * ((cs == anything) and (ip == 0)).
+ *
+ * The restrictions in reset16.inc mean that _start initially
+ * must be loaded at or above 0xffff0000 or below 0x100000.
+ *
+ * The linker scripts computs gdtptr16_offset by simply returning
+ * the low 16 bits. This means that the intial segment used
+ * when start is called must be 64K aligned. This should not
+ * restrict the address as the ip address can be anything.
+ */
+
+ movw %cs, %ax
+ shlw $4, %ax
+ movw $gdtptr16_offset, %bx
+ subw %ax, %bx
+ data32 lgdt %cs:(%bx)
+
+ movl %cr0, %eax
+ andl $0x7FFAFFD1, %eax /* PG,AM,WP,NE,TS,EM,MP = 0 */
+ orl $0x60000001, %eax /* CD, NW, PE = 1 */
+ movl %eax, %cr0
+
+ /* Restore BIST to %eax */
+ movl %ebp, %eax
+
+ /* Now that we are in protected mode jump to a 32 bit code segment. */
+ data32 ljmp $ROM_CODE_SEG, $__protected_start
+
+/** The gdt has a 4 Gb code segment at 0x10, and a 4 GB data segment
+ * at 0x18; these are Linux-compatible.
+ */
+
+.align 4
+.globl gdtptr16
+gdtptr16:
+ .word gdt_end - gdt -1 /* compute the table limit */
+ .long gdt /* we know the offset */
+
+.globl _estart
+_estart:
+ .code32
+
diff --git a/src/cpu/x86/16bit/entry16.lds b/src/cpu/x86/16bit/entry16.lds
new file mode 100644
index 0000000000..db37e66302
--- /dev/null
+++ b/src/cpu/x86/16bit/entry16.lds
@@ -0,0 +1,2 @@
+ gdtptr16_offset = gdtptr16 & 0xffff;
+ _start_offset = _start & 0xffff;
diff --git a/src/cpu/x86/16bit/reset16.inc b/src/cpu/x86/16bit/reset16.inc
new file mode 100644
index 0000000000..d36c94085e
--- /dev/null
+++ b/src/cpu/x86/16bit/reset16.inc
@@ -0,0 +1,21 @@
+ .section ".reset"
+ .code16
+.globl reset_vector
+reset_vector:
+#if _ROMBASE >= 0xffff0000
+ /* jmp _start */
+ .byte 0xe9
+ .int _start - ( . + 2 )
+ /* Note: The above jump is hand coded to work around bugs in binutils.
+ * 5 byte are used for a 3 byte instruction. This works because x86
+ * is little endian and allows us to use supported 32bit relocations
+ * instead of the weird 16 bit relocations that binutils does not
+ * handle consistenly between versions because they are used so rarely.
+ */
+#else
+# error _ROMBASE is an unsupported value
+#endif
+ . = 0x8;
+ .code32
+ jmp protected_start
+ .previous
diff --git a/src/cpu/x86/16bit/reset16.lds b/src/cpu/x86/16bit/reset16.lds
new file mode 100644
index 0000000000..80f2fc0c6f
--- /dev/null
+++ b/src/cpu/x86/16bit/reset16.lds
@@ -0,0 +1,14 @@
+/*
+ * _ROMTOP : The top of the rom used where we
+ * need to put the reset vector.
+ */
+
+SECTIONS {
+ _ROMTOP = (_ROMBASE >= 0xffff0000)? 0xfffffff0 : 0xffff0;
+ . = _ROMTOP;
+ .reset . : {
+ *(.reset)
+ . = 15 ;
+ BYTE(0x00);
+ }
+}
diff --git a/src/cpu/x86/32bit/entry32.inc b/src/cpu/x86/32bit/entry32.inc
new file mode 100644
index 0000000000..3d30a3f85f
--- /dev/null
+++ b/src/cpu/x86/32bit/entry32.inc
@@ -0,0 +1,61 @@
+/* For starting linuxBIOS in protected mode */
+
+#include <arch/rom_segs.h>
+
+/* .section ".rom.text" */
+ .code32
+
+ .align 4
+.globl gdtptr
+
+gdt:
+gdtptr:
+ .word gdt_end - gdt -1 /* compute the table limit */
+ .long gdt /* we know the offset */
+ .word 0
+
+/* flat code segment */
+ .word 0xffff, 0x0000
+ .byte 0x00, 0x9b, 0xcf, 0x00
+
+/* flat data segment */
+ .word 0xffff, 0x0000
+ .byte 0x00, 0x93, 0xcf, 0x00
+
+gdt_end:
+
+
+/*
+ * When we come here we are in protected mode. We expand
+ * the stack and copies the data segment from ROM to the
+ * memory.
+ *
+ * After that, we call the chipset bootstrap routine that
+ * does what is left of the chipset initialization.
+ *
+ * NOTE aligned to 4 so that we are sure that the prefetch
+ * cache will be reloaded.
+ */
+ .align 4
+.globl protected_start
+protected_start:
+
+ lgdt %cs:gdtptr
+ ljmp $ROM_CODE_SEG, $__protected_start
+
+__protected_start:
+ /* Save the BIST value */
+ movl %eax, %ebp
+
+ intel_chip_post_macro(0x10) /* post 10 */
+
+ movw $ROM_DATA_SEG, %ax
+ movw %ax, %ds
+ movw %ax, %es
+ movw %ax, %ss
+ movw %ax, %fs
+ movw %ax, %gs
+
+ /* Restore the BIST value to %eax */
+ movl %ebp, %eax
+
diff --git a/src/cpu/x86/32bit/entry32.lds b/src/cpu/x86/32bit/entry32.lds
new file mode 100644
index 0000000000..37a75ba6ae
--- /dev/null
+++ b/src/cpu/x86/32bit/entry32.lds
@@ -0,0 +1,14 @@
+/*
+ _cache_ram_seg_base = DEFINED(CACHE_RAM_BASE)? CACHE_RAM_BASE - _rodata : 0;
+ _cache_ram_seg_base_low = (_cache_ram_seg_base) & 0xffff;
+ _cache_ram_seg_base_middle = (_cache_ram_seg_base >> 16) & 0xff;
+ _cache_ram_seg_base_high = (_cache_ram_seg_base >> 24) & 0xff;
+
+ _rom_code_seg_base = _ltext - _text;
+ _rom_code_seg_base_low = (_rom_code_seg_base) & 0xffff;
+ _rom_code_seg_base_middle = (_rom_code_seg_base >> 16) & 0xff;
+ _rom_code_seg_base_high = (_rom_code_seg_base >> 24) & 0xff;
+*/
+
+
+
diff --git a/src/cpu/x86/32bit/reset32.inc b/src/cpu/x86/32bit/reset32.inc
new file mode 100644
index 0000000000..42c68cc770
--- /dev/null
+++ b/src/cpu/x86/32bit/reset32.inc
@@ -0,0 +1,10 @@
+ .section ".reset"
+ .code16
+.globl reset_vector
+reset_vector:
+
+ . = 0x8;
+ .code32
+ jmp protected_start
+
+ .previous
diff --git a/src/cpu/x86/32bit/reset32.lds b/src/cpu/x86/32bit/reset32.lds
new file mode 100644
index 0000000000..fa6db86b1a
--- /dev/null
+++ b/src/cpu/x86/32bit/reset32.lds
@@ -0,0 +1,14 @@
+/*
+ * _ROMTOP : The top of the rom used where we
+ * need to put the reset vector.
+ */
+
+SECTIONS {
+ _ROMTOP = _ROMBASE + ROM_IMAGE_SIZE - 0x10;
+ . = _ROMTOP;
+ .reset (.): {
+ *(.reset)
+ . = 15 ;
+ BYTE(0x00);
+ }
+}
diff --git a/src/cpu/x86/cache/Config.lb b/src/cpu/x86/cache/Config.lb
new file mode 100644
index 0000000000..e39bb2da32
--- /dev/null
+++ b/src/cpu/x86/cache/Config.lb
@@ -0,0 +1 @@
+object cache.o
diff --git a/src/cpu/x86/cache/cache.c b/src/cpu/x86/cache/cache.c
new file mode 100644
index 0000000000..92e4a69e29
--- /dev/null
+++ b/src/cpu/x86/cache/cache.c
@@ -0,0 +1,10 @@
+#include <console/console.h>
+#include <cpu/x86/cache.h>
+
+void x86_enable_cache(void)
+{
+ post_code(0x60);
+ printk_info("Enabling cache\n");
+ enable_cache();
+}
+
diff --git a/src/cpu/x86/fpu/Config.lb b/src/cpu/x86/fpu/Config.lb
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/src/cpu/x86/fpu/Config.lb
diff --git a/src/cpu/x86/fpu/enable_fpu.inc b/src/cpu/x86/fpu/enable_fpu.inc
new file mode 100644
index 0000000000..e663e365ac
--- /dev/null
+++ b/src/cpu/x86/fpu/enable_fpu.inc
@@ -0,0 +1,9 @@
+ /* preserve BIST in %eax */
+ movl %eax, %ebp
+
+ /* Disable floating point emulation */
+ movl %cr0, %eax
+ andl $~(1<<2), %eax
+ movl %eax, %cr0
+
+ movl %ebp, %eax
diff --git a/src/cpu/x86/lapic/Config.lb b/src/cpu/x86/lapic/Config.lb
new file mode 100644
index 0000000000..8b5eaa376e
--- /dev/null
+++ b/src/cpu/x86/lapic/Config.lb
@@ -0,0 +1,3 @@
+object lapic.o
+object lapic_cpu_init.o
+object secondary.S
diff --git a/src/cpu/x86/lapic/boot_cpu.c b/src/cpu/x86/lapic/boot_cpu.c
new file mode 100644
index 0000000000..d3a8f6e7a7
--- /dev/null
+++ b/src/cpu/x86/lapic/boot_cpu.c
@@ -0,0 +1,10 @@
+#include <cpu/x86/msr.h>
+
+int boot_cpu(void)
+{
+ int bsp;
+ msr_t msr;
+ msr = rdmsr(0x1b);
+ bsp = !!(msr.lo & (1 << 8));
+ return bsp;
+}
diff --git a/src/cpu/x86/lapic/lapic.c b/src/cpu/x86/lapic/lapic.c
new file mode 100644
index 0000000000..8282890bf7
--- /dev/null
+++ b/src/cpu/x86/lapic/lapic.c
@@ -0,0 +1,72 @@
+#include <cpu/x86/lapic.h>
+#include <console/console.h>
+#include <cpu/x86/msr.h>
+#include <cpu/x86/mtrr.h>
+
+void setup_lapic(void)
+{
+ /* this is so interrupts work. This is very limited scope --
+ * linux will do better later, we hope ...
+ */
+ /* this is the first way we learned to do it. It fails on real SMP
+ * stuff. So we have to do things differently ...
+ * see the Intel mp1.4 spec, page A-3
+ */
+
+#if NEED_LAPIC == 1
+ /* Only Pentium Pro and later have those MSR stuff */
+ msr_t msr;
+
+ printk_info("Setting up local apic...");
+
+ /* Enable the local apic */
+ msr = rdmsr(LAPIC_BASE_MSR);
+ msr.lo |= LAPIC_BASE_MSR_ENABLE;
+ msr.lo &= ~LAPIC_BASE_MSR_ADDR_MASK;
+ msr.lo |= LAPIC_DEFAULT_BASE;
+ wrmsr(LAPIC_BASE_MSR, msr);
+
+ /*
+ * Set Task Priority to 'accept all'.
+ */
+ lapic_write_around(LAPIC_TASKPRI,
+ lapic_read_around(LAPIC_TASKPRI) & ~LAPIC_TPRI_MASK);
+
+ /* Put the local apic in virtual wire mode */
+ lapic_write_around(LAPIC_SPIV,
+ (lapic_read_around(LAPIC_SPIV) & ~(LAPIC_VECTOR_MASK))
+ | LAPIC_SPIV_ENABLE);
+ lapic_write_around(LAPIC_LVT0,
+ (lapic_read_around(LAPIC_LVT0) &
+ ~(LAPIC_LVT_MASKED | LAPIC_LVT_LEVEL_TRIGGER |
+ LAPIC_LVT_REMOTE_IRR | LAPIC_INPUT_POLARITY |
+ LAPIC_SEND_PENDING |LAPIC_LVT_RESERVED_1 |
+ LAPIC_DELIVERY_MODE_MASK))
+ | (LAPIC_LVT_REMOTE_IRR |LAPIC_SEND_PENDING |
+ LAPIC_DELIVERY_MODE_EXTINT)
+ );
+ lapic_write_around(LAPIC_LVT1,
+ (lapic_read_around(LAPIC_LVT1) &
+ ~(LAPIC_LVT_MASKED | LAPIC_LVT_LEVEL_TRIGGER |
+ LAPIC_LVT_REMOTE_IRR | LAPIC_INPUT_POLARITY |
+ LAPIC_SEND_PENDING |LAPIC_LVT_RESERVED_1 |
+ LAPIC_DELIVERY_MODE_MASK))
+ | (LAPIC_LVT_REMOTE_IRR |LAPIC_SEND_PENDING |
+ LAPIC_DELIVERY_MODE_NMI)
+ );
+
+ printk_debug(" apic_id: %d ", lapicid());
+
+#else /* !NEED_LLAPIC */
+ /* Only Pentium Pro and later have those MSR stuff */
+ msr_t msr;
+
+ printk_info("Disabling local apic...");
+
+ msr = rdmsr(LAPIC_BASE_MSR);
+ msr.lo &= ~LAPIC_BASE_MSR_ENABLE;
+ wrmsr(LAPIC_BASE_MSR, msr);
+#endif /* !NEED_LAPIC */
+ printk_info("done.\n");
+ post_code(0x9b);
+}
diff --git a/src/cpu/x86/lapic/lapic_cpu_init.c b/src/cpu/x86/lapic/lapic_cpu_init.c
new file mode 100644
index 0000000000..963b1cf729
--- /dev/null
+++ b/src/cpu/x86/lapic/lapic_cpu_init.c
@@ -0,0 +1,316 @@
+#include <cpu/x86/lapic.h>
+#include <delay.h>
+#include <string.h>
+#include <console/console.h>
+#include <arch/hlt.h>
+#include <device/device.h>
+#include <device/path.h>
+#include <smp/atomic.h>
+#include <smp/spinlock.h>
+#include <cpu/cpu.h>
+
+
+#if CONFIG_SMP == 1
+/* This is a lot more paranoid now, since Linux can NOT handle
+ * being told there is a CPU when none exists. So any errors
+ * will return 0, meaning no CPU.
+ *
+ * We actually handling that case by noting which cpus startup
+ * and not telling anyone about the ones that dont.
+ */
+static int lapic_start_cpu(unsigned long apicid)
+{
+ int timeout;
+ unsigned long send_status, accept_status, start_eip;
+ int j, num_starts, maxlvt;
+ extern char _secondary_start[];
+
+ /*
+ * Starting actual IPI sequence...
+ */
+
+ printk_spew("Asserting INIT.\n");
+
+ /*
+ * Turn INIT on target chip
+ */
+ lapic_write_around(LAPIC_ICR2, SET_LAPIC_DEST_FIELD(apicid));
+
+ /*
+ * Send IPI
+ */
+
+ lapic_write_around(LAPIC_ICR, LAPIC_INT_LEVELTRIG | LAPIC_INT_ASSERT
+ | LAPIC_DM_INIT);
+
+ printk_spew("Waiting for send to finish...\n");
+ timeout = 0;
+ do {
+ printk_spew("+");
+ udelay(100);
+ send_status = lapic_read(LAPIC_ICR) & LAPIC_ICR_BUSY;
+ } while (send_status && (timeout++ < 1000));
+ if (timeout >= 1000) {
+ printk_err("CPU %d: First apic write timed out. Disabling\n",
+ apicid);
+ // too bad.
+ printk_err("ESR is 0x%x\n", lapic_read(LAPIC_ESR));
+ if (lapic_read(LAPIC_ESR)) {
+ printk_err("Try to reset ESR\n");
+ lapic_write_around(LAPIC_ESR, 0);
+ printk_err("ESR is 0x%x\n", lapic_read(LAPIC_ESR));
+ }
+ return 0;
+ }
+ mdelay(10);
+
+ printk_spew("Deasserting INIT.\n");
+
+ /* Target chip */
+ lapic_write_around(LAPIC_ICR2, SET_LAPIC_DEST_FIELD(apicid));
+
+ /* Send IPI */
+ lapic_write_around(LAPIC_ICR, LAPIC_INT_LEVELTRIG | LAPIC_DM_INIT);
+
+ printk_spew("Waiting for send to finish...\n");
+ timeout = 0;
+ do {
+ printk_spew("+");
+ udelay(100);
+ send_status = lapic_read(LAPIC_ICR) & LAPIC_ICR_BUSY;
+ } while (send_status && (timeout++ < 1000));
+ if (timeout >= 1000) {
+ printk_err("CPU %d: Second apic write timed out. Disabling\n",
+ apicid);
+ // too bad.
+ return 0;
+ }
+
+ start_eip = (unsigned long)_secondary_start;
+ printk_spew("start_eip=0x%08lx\n", start_eip);
+
+ num_starts = 2;
+
+ /*
+ * Run STARTUP IPI loop.
+ */
+ printk_spew("#startup loops: %d.\n", num_starts);
+
+ maxlvt = 4;
+
+ for (j = 1; j <= num_starts; j++) {
+ printk_spew("Sending STARTUP #%d to %u.\n", j, apicid);
+ lapic_read_around(LAPIC_SPIV);
+ lapic_write(LAPIC_ESR, 0);
+ lapic_read(LAPIC_ESR);
+ printk_spew("After apic_write.\n");
+
+ /*
+ * STARTUP IPI
+ */
+
+ /* Target chip */
+ lapic_write_around(LAPIC_ICR2, SET_LAPIC_DEST_FIELD(apicid));
+
+ /* Boot on the stack */
+ /* Kick the second */
+ lapic_write_around(LAPIC_ICR, LAPIC_DM_STARTUP
+ | (start_eip >> 12));
+
+ /*
+ * Give the other CPU some time to accept the IPI.
+ */
+ udelay(300);
+
+ printk_spew("Startup point 1.\n");
+
+ printk_spew("Waiting for send to finish...\n");
+ timeout = 0;
+ do {
+ printk_spew("+");
+ udelay(100);
+ send_status = lapic_read(LAPIC_ICR) & LAPIC_ICR_BUSY;
+ } while (send_status && (timeout++ < 1000));
+
+ /*
+ * Give the other CPU some time to accept the IPI.
+ */
+ udelay(200);
+ /*
+ * Due to the Pentium erratum 3AP.
+ */
+ if (maxlvt > 3) {
+ lapic_read_around(LAPIC_SPIV);
+ lapic_write(LAPIC_ESR, 0);
+ }
+ accept_status = (lapic_read(LAPIC_ESR) & 0xEF);
+ if (send_status || accept_status)
+ break;
+ }
+ printk_spew("After Startup.\n");
+ if (send_status)
+ printk_warning("APIC never delivered???\n");
+ if (accept_status)
+ printk_warning("APIC delivery error (%lx).\n", accept_status);
+ if (send_status || accept_status)
+ return 0;
+ return 1;
+}
+
+/* Number of cpus that are currently running in linuxbios */
+static atomic_t active_cpus = ATOMIC_INIT(1);
+
+/* start_cpu_lock covers last_cpu_index and secondary_stack.
+ * Only starting one cpu at a time let's me remove the logic
+ * for select the stack from assembly language.
+ *
+ * In addition communicating by variables to the cpu I
+ * am starting allows me to veryify it has started before
+ * start_cpu returns.
+ */
+
+static spinlock_t start_cpu_lock = SPIN_LOCK_UNLOCKED;
+static unsigned last_cpu_index = 0;
+volatile unsigned long secondary_stack;
+
+int start_cpu(device_t cpu)
+{
+ extern unsigned char _estack[];
+ struct cpu_info *info;
+ unsigned long stack_end;
+ unsigned long apicid;
+ unsigned long index;
+ unsigned long count;
+ int result;
+
+ spin_lock(&start_cpu_lock);
+
+ /* Get the cpu's apicid */
+ apicid = cpu->path.u.apic.apic_id;
+
+ /* Get an index for the new processor */
+ index = ++last_cpu_index;
+
+ /* Find end of the new processors stack */
+ stack_end = ((unsigned long)_estack) - (STACK_SIZE*index) - sizeof(struct cpu_info);
+
+ /* Record the index and which cpu structure we are using */
+ info = (struct cpu_info *)stack_end;
+ info->index = index;
+ info->cpu = cpu;
+
+ /* Advertise the new stack to start_cpu */
+ secondary_stack = stack_end;
+
+ /* Until the cpu starts up report the cpu is not enabled */
+ cpu->enabled = 0;
+ cpu->initialized = 0;
+
+ /* Start the cpu */
+ result = lapic_start_cpu(apicid);
+
+ if (result) {
+ result = 0;
+ /* Wait 1s or until the new the new cpu calls in */
+ for(count = 0; count < 100000 ; count++) {
+ if (secondary_stack == 0) {
+ result = 1;
+ break;
+ }
+ udelay(10);
+ }
+ }
+ secondary_stack = 0;
+ spin_unlock(&start_cpu_lock);
+ return result;
+}
+
+/* C entry point of secondary cpus */
+void secondary_cpu_init(void)
+{
+ atomic_inc(&active_cpus);
+ cpu_initialize();
+ atomic_dec(&active_cpus);
+ stop_this_cpu();
+}
+
+static void initialize_other_cpus(device_t root)
+{
+ int old_active_count, active_count;
+ device_t cpu;
+ /* Loop through the cpus once getting them started */
+ for(cpu = root->link[1].children; cpu ; cpu = cpu->sibling) {
+ if (cpu->path.type != DEVICE_PATH_APIC) {
+ continue;
+ }
+ if (!cpu->enabled) {
+ continue;
+ }
+ if (cpu->initialized) {
+ continue;
+ }
+ if (!start_cpu(cpu)) {
+ /* Record the error in cpu? */
+ printk_err("CPU %u would not start!\n",
+ cpu->path.u.apic.apic_id);
+ }
+ }
+
+ /* Now loop until the other cpus have finished initializing */
+ old_active_count = 1;
+ active_count = atomic_read(&active_cpus);
+ while(active_count > 1) {
+ if (active_count != old_active_count) {
+ printk_info("Waiting for %d CPUS to stop\n", active_count);
+ old_active_count = active_count;
+ }
+ udelay(10);
+ active_count = atomic_read(&active_cpus);
+ }
+ for(cpu = root->link[1].children; cpu; cpu = cpu->sibling) {
+ if (cpu->path.type != DEVICE_PATH_APIC) {
+ continue;
+ }
+ if (!cpu->initialized) {
+ printk_err("CPU %u did not initialize!\n",
+ cpu->path.u.apic.apic_id);
+#warning "FIXME do I need a mainboard_cpu_fixup function?"
+ }
+ }
+ printk_debug("All AP CPUs stopped\n");
+}
+
+#else /* CONFIG_SMP */
+#define initialize_other_cpus(root) do {} while(0)
+#endif /* CONFIG_SMP */
+
+void initialize_cpus(device_t root)
+{
+ struct device_path cpu_path;
+ struct cpu_info *info;
+
+ /* Find the info struct for this cpu */
+ info = cpu_info();
+
+#if NEED_LAPIC == 1
+ /* Ensure the local apic is enabled */
+ enable_lapic();
+
+ /* Get the device path of the boot cpu */
+ cpu_path.type = DEVICE_PATH_APIC;
+ cpu_path.u.apic.apic_id = lapicid();
+#else
+ /* Get the device path of the boot cpu */
+ cpu_path.type = DEVICE_PATH_BOOT_CPU;
+#endif
+
+ /* Find the device structure for the boot cpu */
+ info->cpu = alloc_find_dev(&root->link[1], &cpu_path);
+
+ /* Initialize the bootstrap processor */
+ cpu_initialize();
+
+ /* Now initialize the rest of the cpus */
+ initialize_other_cpus(root);
+}
+
diff --git a/src/cpu/x86/lapic/secondary.S b/src/cpu/x86/lapic/secondary.S
new file mode 100644
index 0000000000..786c31e532
--- /dev/null
+++ b/src/cpu/x86/lapic/secondary.S
@@ -0,0 +1,53 @@
+#include <arch/asm.h>
+#include <arch/intel.h>
+#include <cpu/x86/mtrr.h>
+#include <cpu/x86/lapic_def.h>
+ .text
+ .globl _secondary_start
+ .balign 4096
+_secondary_start:
+ .code16
+ cli
+ xorl %eax, %eax
+ movl %eax, %cr3 /* Invalidate TLB*/
+
+ /* On hyper threaded cpus, invalidating the cache here is
+ * very very bad. Don't.
+ */
+
+ /* setup the data segment */
+ movw %cs, %ax
+ movw %ax, %ds
+
+ data32 lgdt gdtaddr - _secondary_start
+
+ movl %cr0, %eax
+ andl $0x7FFAFFD1, %eax /* PG,AM,WP,NE,TS,EM,MP = 0 */
+ orl $0x60000001, %eax /* CD, NW, PE = 1 */
+ movl %eax, %cr0
+
+ ljmpl $0x10, $1f
+1:
+ .code32
+ movw $0x18, %ax
+ movw %ax, %ds
+ movw %ax, %es
+ movw %ax, %ss
+ movw %ax, %fs
+ movw %ax, %gs
+
+ /* Set the stack pointer, and flag that we are done */
+ xorl %eax, %eax
+ movl secondary_stack, %esp
+ movl %eax, secondary_stack
+
+ call secondary_cpu_init
+1: hlt
+ jmp 1b
+
+gdtaddr:
+ .word gdt_limit /* the table limit */
+ .long gdt /* we know the offset */
+
+
+.code32
diff --git a/src/cpu/x86/mmx/Config.lb b/src/cpu/x86/mmx/Config.lb
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/src/cpu/x86/mmx/Config.lb
diff --git a/src/cpu/x86/mmx/disable_mmx.inc b/src/cpu/x86/mmx/disable_mmx.inc
new file mode 100644
index 0000000000..97d62d60f5
--- /dev/null
+++ b/src/cpu/x86/mmx/disable_mmx.inc
@@ -0,0 +1,2 @@
+ /* Clear out an mmx state */
+ emms
diff --git a/src/cpu/x86/mmx/enable_mmx.inc b/src/cpu/x86/mmx/enable_mmx.inc
new file mode 100644
index 0000000000..a0578daf65
--- /dev/null
+++ b/src/cpu/x86/mmx/enable_mmx.inc
@@ -0,0 +1,6 @@
+ /* BIST in %eax */
+
+ /*
+ * Enabling mmx registers is a noop
+ */
+
diff --git a/src/cpu/x86/mtrr/Config.lb b/src/cpu/x86/mtrr/Config.lb
new file mode 100644
index 0000000000..df5c16f8bd
--- /dev/null
+++ b/src/cpu/x86/mtrr/Config.lb
@@ -0,0 +1 @@
+object mtrr.o \ No newline at end of file
diff --git a/src/cpu/x86/mtrr/earlymtrr.c b/src/cpu/x86/mtrr/earlymtrr.c
new file mode 100644
index 0000000000..e8fda994af
--- /dev/null
+++ b/src/cpu/x86/mtrr/earlymtrr.c
@@ -0,0 +1,123 @@
+#ifndef EARLYMTRR_C
+#define EARLYMTRR_C
+#include <cpu/x86/cache.h>
+#include <cpu/x86/mtrr.h>
+#include <cpu/x86/msr.h>
+
+/* Validate XIP_ROM_SIZE and XIP_ROM_BASE */
+#if defined(XIP_ROM_SIZE) && !defined(XIP_ROM_BASE)
+#error "XIP_ROM_SIZE without XIP_ROM_BASE"
+#endif
+#if defined(XIP_ROM_BASE) && !defined(XIP_ROM_SIZE)
+#error "XIP_ROM_BASE without XIP_ROM_SIZE"
+#endif
+#if !defined(CONFIG_LB_MEM_TOPK)
+#error "CONFIG_LB_MEM_TOPK not defined"
+#endif
+
+#if defined(XIP_ROM_SIZE) && ((XIP_ROM_SIZE & (XIP_ROM_SIZE -1)) != 0)
+#error "XIP_ROM_SIZE is not a power of 2"
+#endif
+#if defined(XIP_ROM_SIZE) && ((XIP_ROM_BASE % XIP_ROM_SIZE) != 0)
+#error "XIP_ROM_BASE is not a multiple of XIP_ROM_SIZE"
+#endif
+
+#if (CONFIG_LB_MEM_TOPK & (CONFIG_LB_MEM_TOPK -1)) != 0
+# error "CONFIG_LB_MEM_TOPK must be a power of 2"
+#endif
+
+static void disable_var_mtrr(unsigned reg)
+{
+ /* The invalid bit is kept in the mask so we simply
+ * clear the relevent mask register to disable a
+ * range.
+ */
+ msr_t zero;
+ zero.lo = zero.hi = 0;
+ wrmsr(MTRRphysMask_MSR(reg), zero);
+}
+
+static void set_var_mtrr(
+ unsigned reg, unsigned base, unsigned size, unsigned type)
+
+{
+ /* Bit Bit 32-35 of MTRRphysMask should be set to 1 */
+ msr_t basem, maskm;
+ basem.lo = base | type;
+ basem.hi = 0;
+ wrmsr(MTRRphysBase_MSR(reg), basem);
+ maskm.lo = ~(size - 1) | 0x800;
+ maskm.hi = 0x0f;
+ wrmsr(MTRRphysMask_MSR(reg), maskm);
+}
+
+static void cache_lbmem(int type)
+{
+ /* Enable caching for 0 - 1MB using variable mtrr */
+ disable_cache();
+ set_var_mtrr(0, 0x00000000, CONFIG_LB_MEM_TOPK << 10, type);
+ enable_cache();
+}
+
+
+/* the fixed and variable MTTRs are power-up with random values,
+ * clear them to MTRR_TYPE_UNCACHEABLE for safty.
+ */
+static void do_early_mtrr_init(const unsigned long *mtrr_msrs)
+{
+ /* Precondition:
+ * The cache is not enabled in cr0 nor in MTRRdefType_MSR
+ * entry32.inc ensures the cache is not enabled in cr0
+ */
+ msr_t msr;
+ const unsigned long *msr_addr;
+ unsigned long cr0;
+
+ print_spew("Clearing mtrr\r\n");
+
+ /* Inialize all of the relevant msrs to 0 */
+ msr.lo = 0;
+ msr.hi = 0;
+ unsigned long msr_nr;
+ for(msr_addr = mtrr_msrs; (msr_nr = *msr_addr); msr_addr++) {
+ wrmsr(msr_nr, msr);
+ }
+
+#if defined(XIP_ROM_SIZE)
+ /* enable write through caching so we can do execute in place
+ * on the flash rom.
+ */
+ set_var_mtrr(1, XIP_ROM_BASE, XIP_ROM_SIZE, MTRR_TYPE_WRBACK);
+#endif
+
+ /* Set the default memory type and enable fixed and variable MTRRs
+ */
+ /* Enable Variable MTRRs */
+ msr.hi = 0x00000000;
+ msr.lo = 0x00000800;
+ wrmsr(MTRRdefType_MSR, msr);
+
+}
+
+static void early_mtrr_init(void)
+{
+ static const unsigned long mtrr_msrs[] = {
+ /* fixed mtrr */
+ 0x250, 0x258, 0x259,
+ 0x268, 0x269, 0x26A,
+ 0x26B, 0x26C, 0x26D,
+ 0x26E, 0x26F,
+ /* var mtrr */
+ 0x200, 0x201, 0x202, 0x203,
+ 0x204, 0x205, 0x206, 0x207,
+ 0x208, 0x209, 0x20A, 0x20B,
+ 0x20C, 0x20D, 0x20E, 0x20F,
+ /* NULL end of table */
+ 0
+ };
+ disable_cache();
+ do_early_mtrr_init(mtrr_msrs);
+ enable_cache();
+}
+
+#endif /* EARLYMTRR_C */
diff --git a/src/cpu/x86/mtrr/mtrr.c b/src/cpu/x86/mtrr/mtrr.c
new file mode 100644
index 0000000000..8e38f23736
--- /dev/null
+++ b/src/cpu/x86/mtrr/mtrr.c
@@ -0,0 +1,378 @@
+/*
+ * intel_mtrr.c: setting MTRR to decent values for cache initialization on P6
+ *
+ * Derived from intel_set_mtrr in intel_subr.c and mtrr.c in linux kernel
+ *
+ * Copyright 2000 Silicon Integrated System Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ *
+ * Reference: Intel Architecture Software Developer's Manual, Volume 3: System Programming
+ */
+
+#include <console/console.h>
+#include <device/device.h>
+#include <cpu/x86/msr.h>
+#include <cpu/x86/mtrr.h>
+#include <cpu/x86/cache.h>
+
+#define arraysize(x) (sizeof(x)/sizeof((x)[0]))
+
+#warning "FIXME I do not properly handle address more than 36 physical address bits"
+#ifdef k8
+# define ADDRESS_BITS 40
+#else
+# define ADDRESS_BITS 36
+#endif
+#define ADDRESS_BITS_HIGH (ADDRESS_BITS - 32)
+#define ADDRESS_MASK_HIGH ((1u << ADDRESS_BITS_HIGH) - 1)
+
+static unsigned int mtrr_msr[] = {
+ MTRRfix64K_00000_MSR, MTRRfix16K_80000_MSR, MTRRfix16K_A0000_MSR,
+ MTRRfix4K_C0000_MSR, MTRRfix4K_C8000_MSR, MTRRfix4K_D0000_MSR, MTRRfix4K_D8000_MSR,
+ MTRRfix4K_E0000_MSR, MTRRfix4K_E8000_MSR, MTRRfix4K_F0000_MSR, MTRRfix4K_F8000_MSR,
+};
+
+
+static void enable_fixed_mtrr(void)
+{
+ msr_t msr;
+
+ msr = rdmsr(MTRRdefType_MSR);
+ msr.lo |= 0xc00;
+ wrmsr(MTRRdefType_MSR, msr);
+}
+
+static void enable_var_mtrr(void)
+{
+ msr_t msr;
+
+ msr = rdmsr(MTRRdefType_MSR);
+ msr.lo |= 0x800;
+ wrmsr(MTRRdefType_MSR, msr);
+}
+
+/* setting variable mtrr, comes from linux kernel source */
+static void set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek, unsigned char type)
+{
+ msr_t base, mask;
+
+ base.hi = basek >> 22;
+ base.lo = basek << 10;
+
+ //printk_debug("ADDRESS_MASK_HIGH=%#x\n", ADDRESS_MASK_HIGH);
+
+ if (sizek < 4*1024*1024) {
+ mask.hi = ADDRESS_MASK_HIGH;
+ mask.lo = ~((sizek << 10) -1);
+ }
+ else {
+ mask.hi = ADDRESS_MASK_HIGH & (~((sizek >> 22) -1));
+ mask.lo = 0;
+ }
+
+ if (reg >= 8)
+ return;
+
+ // it is recommended that we disable and enable cache when we
+ // do this.
+ disable_cache();
+ if (sizek == 0) {
+ msr_t zero;
+ zero.lo = zero.hi = 0;
+ /* The invalid bit is kept in the mask, so we simply clear the
+ relevant mask register to disable a range. */
+ wrmsr (MTRRphysMask_MSR(reg), zero);
+ } else {
+ /* Bit 32-35 of MTRRphysMask should be set to 1 */
+ base.lo |= type;
+ mask.lo |= 0x800;
+ wrmsr (MTRRphysBase_MSR(reg), base);
+ wrmsr (MTRRphysMask_MSR(reg), mask);
+ }
+ enable_cache();
+}
+
+/* fms: find most sigificant bit set, stolen from Linux Kernel Source. */
+static inline unsigned int fms(unsigned int x)
+{
+ int r;
+
+ __asm__("bsrl %1,%0\n\t"
+ "jnz 1f\n\t"
+ "movl $0,%0\n"
+ "1:" : "=r" (r) : "g" (x));
+ return r;
+}
+
+/* fms: find least sigificant bit set */
+static inline unsigned int fls(unsigned int x)
+{
+ int r;
+
+ __asm__("bsfl %1,%0\n\t"
+ "jnz 1f\n\t"
+ "movl $32,%0\n"
+ "1:" : "=r" (r) : "g" (x));
+ return r;
+}
+
+/* setting up variable and fixed mtrr
+ *
+ * From Intel Vol. III Section 9.12.4, the Range Size and Base Alignment has some kind of requirement:
+ * 1. The range size must be 2^N byte for N >= 12 (i.e 4KB minimum).
+ * 2. The base address must be 2^N aligned, where the N here is equal to the N in previous
+ * requirement. So a 8K range must be 8K aligned not 4K aligned.
+ *
+ * These requirement is meet by "decompositing" the ramsize into Sum(Cn * 2^n, n = [0..N], Cn = [0, 1]).
+ * For Cm = 1, there is a WB range of 2^m size at base address Sum(Cm * 2^m, m = [N..n]).
+ * A 124MB (128MB - 4MB SMA) example:
+ * ramsize = 124MB == 64MB (at 0MB) + 32MB (at 64MB) + 16MB (at 96MB ) + 8MB (at 112MB) + 4MB (120MB).
+ * But this wastes a lot of MTRR registers so we use another more "aggresive" way with Uncacheable Regions.
+ *
+ * In the Uncacheable Region scheme, we try to cover the whole ramsize by one WB region as possible,
+ * If (an only if) this can not be done we will try to decomposite the ramesize, the mathematical formula
+ * whould be ramsize = Sum(Cn * 2^n, n = [0..N], Cn = [-1, 0, 1]). For Cn = -1, a Uncachable Region is used.
+ * The same 124MB example:
+ * ramsize = 124MB == 128MB WB (at 0MB) + 4MB UC (at 124MB)
+ * or a 156MB (128MB + 32MB - 4MB SMA) example:
+ * ramsize = 156MB == 128MB WB (at 0MB) + 32MB WB (at 128MB) + 4MB UC (at 156MB)
+ */
+/* 2 MTRRS are reserved for the operating system */
+#if 0
+#define BIOS_MTRRS 6
+#define OS_MTRRS 2
+#else
+#define BIOS_MTRRS 8
+#define OS_MTRRS 0
+#endif
+#define MTRRS (BIOS_MTRRS + OS_MTRRS)
+
+
+static void set_fixed_mtrrs(unsigned int first, unsigned int last, unsigned char type)
+{
+ unsigned int i;
+ unsigned int fixed_msr = NUM_FIXED_RANGES >> 3;
+ msr_t msr;
+ msr.lo = msr.hi = 0; /* Shut up gcc */
+ for(i = first; i < last; i++) {
+ /* When I switch to a new msr read it in */
+ if (fixed_msr != i >> 3) {
+ /* But first write out the old msr */
+ if (fixed_msr < (NUM_FIXED_RANGES >> 3)) {
+ disable_cache();
+ wrmsr(mtrr_msr[fixed_msr], msr);
+ enable_cache();
+ }
+ fixed_msr = i>>3;
+ msr = rdmsr(mtrr_msr[fixed_msr]);
+ }
+ if ((i & 7) < 4) {
+ msr.lo &= ~(0xff << ((i&3)*8));
+ msr.lo |= type << ((i&3)*8);
+ } else {
+ msr.hi &= ~(0xff << ((i&3)*8));
+ msr.hi |= type << ((i&3)*8);
+ }
+ }
+ /* Write out the final msr */
+ if (fixed_msr < (NUM_FIXED_RANGES >> 3)) {
+ disable_cache();
+ wrmsr(mtrr_msr[fixed_msr], msr);
+ enable_cache();
+ }
+}
+
+static unsigned fixed_mtrr_index(unsigned long addrk)
+{
+ unsigned index;
+ index = (addrk - 0) >> 6;
+ if (index >= 8) {
+ index = ((addrk - 8*64) >> 4) + 8;
+ }
+ if (index >= 24) {
+ index = ((addrk - (8*64 + 16*16)) >> 2) + 24;
+ }
+ if (index > NUM_FIXED_RANGES) {
+ index = NUM_FIXED_RANGES;
+ }
+ return index;
+}
+
+static unsigned int range_to_mtrr(unsigned int reg,
+ unsigned long range_startk, unsigned long range_sizek,
+ unsigned long next_range_startk)
+{
+ if (!range_sizek || (reg >= BIOS_MTRRS)) {
+ return reg;
+ }
+ while(range_sizek) {
+ unsigned long max_align, align;
+ unsigned long sizek;
+ /* Compute the maximum size I can make a range */
+ max_align = fls(range_startk);
+ align = fms(range_sizek);
+ if (align > max_align) {
+ align = max_align;
+ }
+ sizek = 1 << align;
+ printk_debug("Setting variable MTRR %d, base: %4dMB, range: %4dMB, type WB\n",
+ reg, range_startk >>10, sizek >> 10);
+ set_var_mtrr(reg++, range_startk, sizek, MTRR_TYPE_WRBACK);
+ range_startk += sizek;
+ range_sizek -= sizek;
+ if (reg >= BIOS_MTRRS)
+ break;
+ }
+ return reg;
+}
+
+static unsigned long resk(uint64_t value)
+{
+ unsigned long resultk;
+ if (value < (1ULL << 42)) {
+ resultk = value >> 10;
+ }
+ else {
+ resultk = 0xffffffff;
+ }
+ return resultk;
+}
+
+void x86_setup_mtrrs(void)
+{
+ /* Try this the simple way of incrementally adding together
+ * mtrrs. If this doesn't work out we can get smart again
+ * and clear out the mtrrs.
+ */
+ struct device *dev;
+ unsigned long range_startk, range_sizek;
+ unsigned int reg;
+
+ printk_debug("\n");
+ /* Initialized the fixed_mtrrs to uncached */
+ printk_debug("Setting fixed MTRRs(%d-%d) type: UC\n",
+ 0, NUM_FIXED_RANGES);
+ set_fixed_mtrrs(0, NUM_FIXED_RANGES, MTRR_TYPE_UNCACHEABLE);
+
+ /* Now see which of the fixed mtrrs cover ram.
+ */
+ for(dev = all_devices; dev; dev = dev->next) {
+ struct resource *res, *last;
+ last = &dev->resource[dev->resources];
+ for(res = &dev->resource[0]; res < last; res++) {
+ unsigned int start_mtrr;
+ unsigned int last_mtrr;
+ if (!(res->flags & IORESOURCE_MEM) ||
+ !(res->flags & IORESOURCE_CACHEABLE))
+ {
+ continue;
+ }
+ start_mtrr = fixed_mtrr_index(resk(res->base));
+ last_mtrr = fixed_mtrr_index(resk((res->base + res->size)));
+ if (start_mtrr >= NUM_FIXED_RANGES) {
+ break;
+ }
+ printk_debug("Setting fixed MTRRs(%d-%d) Type: WB\n",
+ start_mtrr, last_mtrr);
+ set_fixed_mtrrs(start_mtrr, last_mtrr, MTRR_TYPE_WRBACK);
+ }
+ }
+ printk_debug("DONE fixed MTRRs\n");
+ /* Cache as many memory areas as possible */
+ /* FIXME is there an algorithm for computing the optimal set of mtrrs?
+ * In some cases it is definitely possible to do better.
+ */
+ range_startk = 0;
+ range_sizek = 0;
+ reg = 0;
+ for(dev = all_devices; dev; dev = dev->next) {
+ struct resource *res, *last;
+ last = &dev->resource[dev->resources];
+ for(res = &dev->resource[0]; res < last; res++) {
+ unsigned long basek, sizek;
+ if (!(res->flags & IORESOURCE_MEM) ||
+ !(res->flags & IORESOURCE_CACHEABLE)) {
+ continue;
+ }
+ basek = resk(res->base);
+ sizek = resk(res->size);
+ /* See if I can merge with the last range
+ * Either I am below 1M and the fixed mtrrs handle it, or
+ * the ranges touch.
+ */
+ if ((basek <= 1024) || (range_startk + range_sizek == basek)) {
+ unsigned long endk = basek + sizek;
+ range_sizek = endk - range_startk;
+ continue;
+ }
+ /* Write the range mtrrs */
+ if (range_sizek != 0) {
+ reg = range_to_mtrr(reg, range_startk, range_sizek, basek);
+ range_startk = 0;
+ range_sizek = 0;
+ if (reg >= BIOS_MTRRS)
+ goto last_msr;
+ }
+ /* Allocate an msr */
+ range_startk = basek;
+ range_sizek = sizek;
+ }
+ }
+ last_msr:
+ /* Write the last range */
+ reg = range_to_mtrr(reg, range_startk, range_sizek, 0);
+ printk_debug("DONE variable MTRRs\n");
+ printk_debug("Clear out the extra MTRR's\n");
+ /* Clear out the extra MTRR's */
+ while(reg < MTRRS) {
+ set_var_mtrr(reg++, 0, 0, 0);
+ }
+ /* enable fixed MTRR */
+ printk_spew("call enable_fixed_mtrr()\n");
+ enable_fixed_mtrr();
+ printk_spew("call enable_var_mtrr()\n");
+ enable_var_mtrr();
+ printk_spew("Leave %s\n", __FUNCTION__);
+ post_code(0x6A);
+}
+
+int x86_mtrr_check(void)
+{
+ /* Only Pentium Pro and later have MTRR */
+ msr_t msr;
+ printk_debug("\nMTRR check\n");
+
+ msr = rdmsr(0x2ff);
+ msr.lo >>= 10;
+
+ printk_debug("Fixed MTRRs : ");
+ if (msr.lo & 0x01)
+ printk_debug("Enabled\n");
+ else
+ printk_debug("Disabled\n");
+
+ printk_debug("Variable MTRRs: ");
+ if (msr.lo & 0x02)
+ printk_debug("Enabled\n");
+ else
+ printk_debug("Disabled\n");
+
+ printk_debug("\n");
+
+ post_code(0x93);
+ return ((int) msr.lo);
+}
diff --git a/src/cpu/x86/pae/Config.lb b/src/cpu/x86/pae/Config.lb
new file mode 100644
index 0000000000..45e7f5754b
--- /dev/null
+++ b/src/cpu/x86/pae/Config.lb
@@ -0,0 +1 @@
+object pgtbl.o \ No newline at end of file
diff --git a/src/cpu/x86/pae/pgtbl.c b/src/cpu/x86/pae/pgtbl.c
new file mode 100644
index 0000000000..756cc53f65
--- /dev/null
+++ b/src/cpu/x86/pae/pgtbl.c
@@ -0,0 +1,94 @@
+#include <console/console.h>
+#include <cpu/cpu.h>
+#include <cpu/x86/pae.h>
+#include <string.h>
+
+static void paging_off(void)
+{
+ __asm__ __volatile__ (
+ /* Disable paging */
+ "movl %%cr0, %%eax\n\t"
+ "andl $0x7FFFFFFF, %%eax\n\t"
+ "movl %%eax, %%cr0\n\t"
+ /* Disable pae */
+ "movl %%cr4, %%eax\n\t"
+ "andl $0xFFFFFFDF, %%eax\n\t"
+ :
+ :
+ : "eax"
+ );
+}
+
+static void paging_on(void *pdp)
+{
+ __asm__ __volatile__(
+ /* Load the page table address */
+ "movl %0, %%cr3\n\t"
+ /* Enable pae */
+ "movl %%cr4, %%eax\n\t"
+ "orl $0x00000020, %%eax\n\t"
+ "movl %%eax, %%cr4\n\t"
+ /* Enable paging */
+ "movl %%cr0, %%eax\n\t"
+ "orl $0x80000000, %%eax\n\t"
+ "movl %%eax, %%cr0\n\t"
+ :
+ : "r" (pdp)
+ : "eax"
+ );
+}
+
+void *map_2M_page(unsigned long page)
+{
+ struct pde {
+ uint32_t addr_lo;
+ uint32_t addr_hi;
+ } __attribute__ ((packed));
+ struct pg_table {
+ struct pde pd[2048];
+ struct pde pdp[512];
+ } __attribute__ ((packed));
+ static struct pg_table pgtbl[CONFIG_MAX_CPUS] __attribute__ ((aligned(4096)));
+ static unsigned long mapped_window[CONFIG_MAX_CPUS];
+ unsigned long index;
+ unsigned long window;
+ void *result;
+ int i;
+ index = cpu_index();
+ if ((index < 0) || (index >= CONFIG_MAX_CPUS)) {
+ return MAPPING_ERROR;
+ }
+ window = page >> 10;
+ if (window != mapped_window[index]) {
+ paging_off();
+ if (window > 1) {
+ struct pde *pd, *pdp;
+ /* Point the page directory pointers at the page directories */
+ memset(&pgtbl[index].pdp, 0, sizeof(pgtbl[index].pdp));
+ pd = pgtbl[index].pd;
+ pdp = pgtbl[index].pdp;
+ pdp[0].addr_lo = ((uint32_t)&pd[512*0])|1;
+ pdp[1].addr_lo = ((uint32_t)&pd[512*1])|1;
+ pdp[2].addr_lo = ((uint32_t)&pd[512*2])|1;
+ pdp[3].addr_lo = ((uint32_t)&pd[512*3])|1;
+ /* The first half of the page table is identity mapped */
+ for(i = 0; i < 1024; i++) {
+ pd[i].addr_lo = ((i & 0x3ff) << 21)| 0xE3;
+ pd[i].addr_hi = 0;
+ }
+ /* The second half of the page table holds the mapped page */
+ for(i = 1024; i < 2048; i++) {
+ pd[i].addr_lo = ((window & 1) << 31) | ((i & 0x3ff) << 21) | 0xE3;
+ pd[i].addr_hi = (window >> 1);
+ }
+ paging_on(pdp);
+ }
+ mapped_window[index] = window;
+ }
+ if (window == 0) {
+ result = (void *)(page << 21);
+ } else {
+ result = (void *)(0x80000000 | ((page & 0x3ff) << 21));
+ }
+ return result;
+}
diff --git a/src/cpu/x86/sse/Config.lb b/src/cpu/x86/sse/Config.lb
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/src/cpu/x86/sse/Config.lb
diff --git a/src/cpu/x86/sse/disable_sse.inc b/src/cpu/x86/sse/disable_sse.inc
new file mode 100644
index 0000000000..a18ea18643
--- /dev/null
+++ b/src/cpu/x86/sse/disable_sse.inc
@@ -0,0 +1,18 @@
+ /*
+ * Put the processor back into a reset state
+ * with respect to the xmm registers.
+ */
+
+ xorps %xmm0, %xmm0
+ xorps %xmm1, %xmm1
+ xorps %xmm2, %xmm2
+ xorps %xmm3, %xmm3
+ xorps %xmm4, %xmm4
+ xorps %xmm5, %xmm5
+ xorps %xmm6, %xmm6
+ xorps %xmm7, %xmm7
+
+ /* Disable sse instructions */
+ movl %cr4, %eax
+ andl $~(3<<9), %eax
+ movl %eax, %cr4
diff --git a/src/cpu/x86/sse/enable_sse.inc b/src/cpu/x86/sse/enable_sse.inc
new file mode 100644
index 0000000000..95724b71f7
--- /dev/null
+++ b/src/cpu/x86/sse/enable_sse.inc
@@ -0,0 +1,14 @@
+ /* preserve BIST in %eax */
+ movl %eax, %ebp
+
+ /*
+ * Enable the use of the xmm registers
+ */
+
+ /* Enable sse instructions */
+ movl %cr4, %eax
+ orl $(1<<9), %eax
+ movl %eax, %cr4
+
+ movl %ebp, %eax
+
diff --git a/src/cpu/x86/tsc/Config.lb b/src/cpu/x86/tsc/Config.lb
new file mode 100644
index 0000000000..07272ad9d7
--- /dev/null
+++ b/src/cpu/x86/tsc/Config.lb
@@ -0,0 +1,5 @@
+uses CONFIG_UDELAY_TSC
+uses CONFIG_TSC_X86RDTSC_CALIBRATE_WITH_TIMER2
+
+default CONFIG_TSC_X86RDTSC_CALIBRATE_WITH_TIMER2=0
+if CONFIG_UDELAY_TSC object delay_tsc.o end
diff --git a/src/cpu/x86/tsc/delay_tsc.c b/src/cpu/x86/tsc/delay_tsc.c
new file mode 100644
index 0000000000..c7c431baac
--- /dev/null
+++ b/src/cpu/x86/tsc/delay_tsc.c
@@ -0,0 +1,165 @@
+#include <console/console.h>
+#include <arch/io.h>
+#include <cpu/x86/msr.h>
+#include <cpu/x86/tsc.h>
+#include <smp/spinlock.h>
+#include <delay.h>
+
+static unsigned long clocks_per_usec;
+
+#if (CONFIG_TSC_X86RDTSC_CALIBRATE_WITH_TIMER2 == 1)
+#define CLOCK_TICK_RATE 1193180U /* Underlying HZ */
+
+/* ------ Calibrate the TSC -------
+ * Too much 64-bit arithmetic here to do this cleanly in C, and for
+ * accuracy's sake we want to keep the overhead on the CTC speaker (channel 2)
+ * output busy loop as low as possible. We avoid reading the CTC registers
+ * directly because of the awkward 8-bit access mechanism of the 82C54
+ * device.
+ */
+
+#define CALIBRATE_INTERVAL ((20*CLOCK_TICK_RATE)/1000) /* 20ms */
+#define CALIBRATE_DIVISOR (20*1000) /* 20ms / 20000 == 1usec */
+
+static unsigned long long calibrate_tsc(void)
+{
+ /* Set the Gate high, disable speaker */
+ outb((inb(0x61) & ~0x02) | 0x01, 0x61);
+
+ /*
+ * Now let's take care of CTC channel 2
+ *
+ * Set the Gate high, program CTC channel 2 for mode 0,
+ * (interrupt on terminal count mode), binary count,
+ * load 5 * LATCH count, (LSB and MSB) to begin countdown.
+ */
+ outb(0xb0, 0x43); /* binary, mode 0, LSB/MSB, Ch 2 */
+ outb(CALIBRATE_INTERVAL & 0xff, 0x42); /* LSB of count */
+ outb(CALIBRATE_INTERVAL >> 8, 0x42); /* MSB of count */
+
+ {
+ tsc_t start;
+ tsc_t end;
+ unsigned long count;
+
+ start = rdtsc();
+ count = 0;
+ do {
+ count++;
+ } while ((inb(0x61) & 0x20) == 0);
+ end = rdtsc();
+
+ /* Error: ECTCNEVERSET */
+ if (count <= 1)
+ goto bad_ctc;
+
+ /* 64-bit subtract - gcc just messes up with long longs */
+ __asm__("subl %2,%0\n\t"
+ "sbbl %3,%1"
+ :"=a" (end.lo), "=d" (end.hi)
+ :"g" (start.lo), "g" (start.hi),
+ "0" (end.lo), "1" (end.hi));
+
+ /* Error: ECPUTOOFAST */
+ if (end.hi)
+ goto bad_ctc;
+
+
+ /* Error: ECPUTOOSLOW */
+ if (end.lo <= CALIBRATE_DIVISOR)
+ goto bad_ctc;
+
+ return (end.lo + CALIBRATE_DIVISOR -1)/CALIBRATE_DIVISOR;
+ }
+
+ /*
+ * The CTC wasn't reliable: we got a hit on the very first read,
+ * or the CPU was so fast/slow that the quotient wouldn't fit in
+ * 32 bits..
+ */
+bad_ctc:
+ printk_err("bad_ctc\n");
+ return 0;
+}
+
+#else /* CONFIG_TSC_X86RDTSC_CALIBRATE_WITH_TIMER2 */
+
+/*
+ * this is the "no timer2" version.
+ * to calibrate tsc, we get a TSC reading, then do 1,000,000 outbs to port 0x80
+ * then we read TSC again, and divide the difference by 1,000,000
+ * we have found on a wide range of machines that this gives us a a
+ * good microsecond value
+ * to +- 10%. On a dual AMD 1.6 Ghz box, it gives us .97 microseconds, and on a
+ * 267 Mhz. p5, it gives us 1.1 microseconds.
+ * also, since gcc now supports long long, we use that.
+ * also no unsigned long long / operator, so we play games.
+ * about the only thing you can do with long longs, it seems,
+ *is return them and assign them.
+ * (and do asm on them, yuck)
+ * so avoid all ops on long longs.
+ */
+static unsigned long long calibrate_tsc(void)
+{
+ unsigned long long start, end, delta;
+ unsigned long allones = (unsigned long) -1, result;
+ unsigned long count;
+
+ start = rdtscll();
+ // no udivdi3, dammit.
+ // so we count to 1<< 20 and then right shift 20
+ for(count = 0; count < (1<<20); count ++)
+ outb(0x80, 0x80);
+ end = rdtscll();
+
+#if 0
+ // make delta be (endhigh - starthigh) + (endlow - startlow)
+ // but >> 20
+ // do it this way to avoid gcc warnings.
+ start = tsc_start.hi;
+ start <<= 32;
+ start |= start.lo;
+ end = tsc_end.hi;
+ end <<= 32;
+ end |= tsc_end.lo;
+#endif
+ delta = end - start;
+ // at this point we have a delta for 1,000,000 outbs. Now rescale for one microsecond.
+ delta >>= 20;
+ // save this for microsecond timing.
+ result = delta;
+ printk_spew("end %x:%x, start %x:%x\n",
+ endhigh, endlow, starthigh, startlow);
+ printk_spew("32-bit delta %d\n", (unsigned long) delta);
+
+ printk_spew(__FUNCTION__ " 32-bit result is %d\n", result);
+ return delta;
+}
+
+
+#endif /* CONFIG_TSC_X86RDTSC_CALIBRATE_WITH_TIMER2*/
+
+void init_timer(void)
+{
+ if (!clocks_per_usec) {
+ clocks_per_usec = calibrate_tsc();
+ printk_info("clocks_per_usec: %u\n", clocks_per_usec);
+ }
+}
+
+void udelay(unsigned us)
+{
+ unsigned long long count;
+ unsigned long long stop;
+ unsigned long long clocks;
+
+ init_timer();
+ clocks = us;
+ clocks *= clocks_per_usec;
+ count = rdtscll();
+ stop = clocks + count;
+ while(stop > count) {
+ cpu_relax();
+ count = rdtscll();
+ }
+}