From fcd5ace00b333ce31b11b02a2243dfbf39307f10 Mon Sep 17 00:00:00 2001 From: Eric Biederman Date: Thu, 14 Oct 2004 19:29:29 +0000 Subject: - Add new cvs code to cvs git-svn-id: svn://svn.coreboot.org/coreboot/trunk@1657 2b7e53f0-3cfb-0310-b3e9-8179ed1497e1 --- src/cpu/x86/16bit/entry16.inc | 124 ++++++++++++ src/cpu/x86/16bit/entry16.lds | 2 + src/cpu/x86/16bit/reset16.inc | 21 +++ src/cpu/x86/16bit/reset16.lds | 14 ++ src/cpu/x86/32bit/entry32.inc | 61 ++++++ src/cpu/x86/32bit/entry32.lds | 14 ++ src/cpu/x86/32bit/reset32.inc | 10 + src/cpu/x86/32bit/reset32.lds | 14 ++ src/cpu/x86/cache/Config.lb | 1 + src/cpu/x86/cache/cache.c | 10 + src/cpu/x86/fpu/Config.lb | 0 src/cpu/x86/fpu/enable_fpu.inc | 9 + src/cpu/x86/lapic/Config.lb | 3 + src/cpu/x86/lapic/boot_cpu.c | 10 + src/cpu/x86/lapic/lapic.c | 72 +++++++ src/cpu/x86/lapic/lapic_cpu_init.c | 316 +++++++++++++++++++++++++++++++ src/cpu/x86/lapic/secondary.S | 53 ++++++ src/cpu/x86/mmx/Config.lb | 0 src/cpu/x86/mmx/disable_mmx.inc | 2 + src/cpu/x86/mmx/enable_mmx.inc | 6 + src/cpu/x86/mtrr/Config.lb | 1 + src/cpu/x86/mtrr/earlymtrr.c | 123 ++++++++++++ src/cpu/x86/mtrr/mtrr.c | 378 +++++++++++++++++++++++++++++++++++++ src/cpu/x86/pae/Config.lb | 1 + src/cpu/x86/pae/pgtbl.c | 94 +++++++++ src/cpu/x86/sse/Config.lb | 0 src/cpu/x86/sse/disable_sse.inc | 18 ++ src/cpu/x86/sse/enable_sse.inc | 14 ++ src/cpu/x86/tsc/Config.lb | 5 + src/cpu/x86/tsc/delay_tsc.c | 165 ++++++++++++++++ 30 files changed, 1541 insertions(+) create mode 100644 src/cpu/x86/16bit/entry16.inc create mode 100644 src/cpu/x86/16bit/entry16.lds create mode 100644 src/cpu/x86/16bit/reset16.inc create mode 100644 src/cpu/x86/16bit/reset16.lds create mode 100644 src/cpu/x86/32bit/entry32.inc create mode 100644 src/cpu/x86/32bit/entry32.lds create mode 100644 src/cpu/x86/32bit/reset32.inc create mode 100644 src/cpu/x86/32bit/reset32.lds create mode 100644 src/cpu/x86/cache/Config.lb create mode 100644 src/cpu/x86/cache/cache.c create mode 100644 src/cpu/x86/fpu/Config.lb create mode 100644 src/cpu/x86/fpu/enable_fpu.inc create mode 100644 src/cpu/x86/lapic/Config.lb create mode 100644 src/cpu/x86/lapic/boot_cpu.c create mode 100644 src/cpu/x86/lapic/lapic.c create mode 100644 src/cpu/x86/lapic/lapic_cpu_init.c create mode 100644 src/cpu/x86/lapic/secondary.S create mode 100644 src/cpu/x86/mmx/Config.lb create mode 100644 src/cpu/x86/mmx/disable_mmx.inc create mode 100644 src/cpu/x86/mmx/enable_mmx.inc create mode 100644 src/cpu/x86/mtrr/Config.lb create mode 100644 src/cpu/x86/mtrr/earlymtrr.c create mode 100644 src/cpu/x86/mtrr/mtrr.c create mode 100644 src/cpu/x86/pae/Config.lb create mode 100644 src/cpu/x86/pae/pgtbl.c create mode 100644 src/cpu/x86/sse/Config.lb create mode 100644 src/cpu/x86/sse/disable_sse.inc create mode 100644 src/cpu/x86/sse/enable_sse.inc create mode 100644 src/cpu/x86/tsc/Config.lb create mode 100644 src/cpu/x86/tsc/delay_tsc.c (limited to 'src/cpu/x86') diff --git a/src/cpu/x86/16bit/entry16.inc b/src/cpu/x86/16bit/entry16.inc new file mode 100644 index 0000000000..61726d8ad9 --- /dev/null +++ b/src/cpu/x86/16bit/entry16.inc @@ -0,0 +1,124 @@ +/* +This software and ancillary information (herein called SOFTWARE ) +called LinuxBIOS is made available under the terms described +here. The SOFTWARE has been approved for release with associated +LA-CC Number 00-34 . Unless otherwise indicated, this SOFTWARE has +been authored by an employee or employees of the University of +California, operator of the Los Alamos National Laboratory under +Contract No. W-7405-ENG-36 with the U.S. Department of Energy. The +U.S. Government has rights to use, reproduce, and distribute this +SOFTWARE. The public may copy, distribute, prepare derivative works +and publicly display this SOFTWARE without charge, provided that this +Notice and any statement of authorship are reproduced on all copies. +Neither the Government nor the University makes any warranty, express +or implied, or assumes any liability or responsibility for the use of +this SOFTWARE. If SOFTWARE is modified to produce derivative works, +such modified SOFTWARE should be clearly marked, so as not to confuse +it with the version available from LANL. + */ +/* Copyright 2000, Ron Minnich, Advanced Computing Lab, LANL + * rminnich@lanl.gov + */ + + +/** Start code to put an i386 or later processor into 32-bit + * protected mode. + */ + +/* .section ".rom.text" */ +#include +.code16 +.globl _start +.type _start, @function + +_start: + cli + /* Save the BIST result */ + movl %eax, %ebp + +/* thanks to kmliu@sis.tw.com for this TBL fix ... */ +/**/ +/* IMMEDIATELY invalidate the translation lookaside buffer before executing*/ +/* any further code. Even though paging is disabled we could still get*/ +/*false address translations due to the TLB if we didn't invalidate it.*/ +/**/ + xorl %eax, %eax + movl %eax, %cr3 /* Invalidate TLB*/ + + + /* Invalidating the cache here seems to be a bad idea on + * modern processors. Don't. + * If we are hyperthreaded or we have multiple cores it is bad, + * for SMP startup. On Opterons it causes a 5 second delay. + * Invalidating the cache was pure paranoia in any event. + * If you cpu needs it you can write a cpu dependent version of + * entry16.inc. + */ + + /* Note: gas handles memory addresses in 16 bit code very poorly. + * In particular it doesn't appear to have a directive allowing you + * associate a section or even an absolute offset with a segment register. + * + * This means that anything except cs:ip relative offsets are + * a real pain in 16 bit mode. And explains why it is almost + * imposible to get gas to do lgdt correctly. + * + * One way to work around this is to have the linker do the + * math instead of the assembler. This solves the very + * pratical problem of being able to write code that can + * be relocated. + * + * An lgdt call before we have memory enabled cannot be + * position independent, as we cannot execute a call + * instruction to get our current instruction pointer. + * So while this code is relocateable it isn't arbitrarily + * relocatable. + * + * The criteria for relocation have been relaxed to their + * utmost, so that we can use the same code for both + * our initial entry point and startup of the second cpu. + * The code assumes when executing at _start that: + * (((cs & 0xfff) == 0) and (ip == _start & 0xffff)) + * or + * ((cs == anything) and (ip == 0)). + * + * The restrictions in reset16.inc mean that _start initially + * must be loaded at or above 0xffff0000 or below 0x100000. + * + * The linker scripts computs gdtptr16_offset by simply returning + * the low 16 bits. This means that the intial segment used + * when start is called must be 64K aligned. This should not + * restrict the address as the ip address can be anything. + */ + + movw %cs, %ax + shlw $4, %ax + movw $gdtptr16_offset, %bx + subw %ax, %bx + data32 lgdt %cs:(%bx) + + movl %cr0, %eax + andl $0x7FFAFFD1, %eax /* PG,AM,WP,NE,TS,EM,MP = 0 */ + orl $0x60000001, %eax /* CD, NW, PE = 1 */ + movl %eax, %cr0 + + /* Restore BIST to %eax */ + movl %ebp, %eax + + /* Now that we are in protected mode jump to a 32 bit code segment. */ + data32 ljmp $ROM_CODE_SEG, $__protected_start + +/** The gdt has a 4 Gb code segment at 0x10, and a 4 GB data segment + * at 0x18; these are Linux-compatible. + */ + +.align 4 +.globl gdtptr16 +gdtptr16: + .word gdt_end - gdt -1 /* compute the table limit */ + .long gdt /* we know the offset */ + +.globl _estart +_estart: + .code32 + diff --git a/src/cpu/x86/16bit/entry16.lds b/src/cpu/x86/16bit/entry16.lds new file mode 100644 index 0000000000..db37e66302 --- /dev/null +++ b/src/cpu/x86/16bit/entry16.lds @@ -0,0 +1,2 @@ + gdtptr16_offset = gdtptr16 & 0xffff; + _start_offset = _start & 0xffff; diff --git a/src/cpu/x86/16bit/reset16.inc b/src/cpu/x86/16bit/reset16.inc new file mode 100644 index 0000000000..d36c94085e --- /dev/null +++ b/src/cpu/x86/16bit/reset16.inc @@ -0,0 +1,21 @@ + .section ".reset" + .code16 +.globl reset_vector +reset_vector: +#if _ROMBASE >= 0xffff0000 + /* jmp _start */ + .byte 0xe9 + .int _start - ( . + 2 ) + /* Note: The above jump is hand coded to work around bugs in binutils. + * 5 byte are used for a 3 byte instruction. This works because x86 + * is little endian and allows us to use supported 32bit relocations + * instead of the weird 16 bit relocations that binutils does not + * handle consistenly between versions because they are used so rarely. + */ +#else +# error _ROMBASE is an unsupported value +#endif + . = 0x8; + .code32 + jmp protected_start + .previous diff --git a/src/cpu/x86/16bit/reset16.lds b/src/cpu/x86/16bit/reset16.lds new file mode 100644 index 0000000000..80f2fc0c6f --- /dev/null +++ b/src/cpu/x86/16bit/reset16.lds @@ -0,0 +1,14 @@ +/* + * _ROMTOP : The top of the rom used where we + * need to put the reset vector. + */ + +SECTIONS { + _ROMTOP = (_ROMBASE >= 0xffff0000)? 0xfffffff0 : 0xffff0; + . = _ROMTOP; + .reset . : { + *(.reset) + . = 15 ; + BYTE(0x00); + } +} diff --git a/src/cpu/x86/32bit/entry32.inc b/src/cpu/x86/32bit/entry32.inc new file mode 100644 index 0000000000..3d30a3f85f --- /dev/null +++ b/src/cpu/x86/32bit/entry32.inc @@ -0,0 +1,61 @@ +/* For starting linuxBIOS in protected mode */ + +#include + +/* .section ".rom.text" */ + .code32 + + .align 4 +.globl gdtptr + +gdt: +gdtptr: + .word gdt_end - gdt -1 /* compute the table limit */ + .long gdt /* we know the offset */ + .word 0 + +/* flat code segment */ + .word 0xffff, 0x0000 + .byte 0x00, 0x9b, 0xcf, 0x00 + +/* flat data segment */ + .word 0xffff, 0x0000 + .byte 0x00, 0x93, 0xcf, 0x00 + +gdt_end: + + +/* + * When we come here we are in protected mode. We expand + * the stack and copies the data segment from ROM to the + * memory. + * + * After that, we call the chipset bootstrap routine that + * does what is left of the chipset initialization. + * + * NOTE aligned to 4 so that we are sure that the prefetch + * cache will be reloaded. + */ + .align 4 +.globl protected_start +protected_start: + + lgdt %cs:gdtptr + ljmp $ROM_CODE_SEG, $__protected_start + +__protected_start: + /* Save the BIST value */ + movl %eax, %ebp + + intel_chip_post_macro(0x10) /* post 10 */ + + movw $ROM_DATA_SEG, %ax + movw %ax, %ds + movw %ax, %es + movw %ax, %ss + movw %ax, %fs + movw %ax, %gs + + /* Restore the BIST value to %eax */ + movl %ebp, %eax + diff --git a/src/cpu/x86/32bit/entry32.lds b/src/cpu/x86/32bit/entry32.lds new file mode 100644 index 0000000000..37a75ba6ae --- /dev/null +++ b/src/cpu/x86/32bit/entry32.lds @@ -0,0 +1,14 @@ +/* + _cache_ram_seg_base = DEFINED(CACHE_RAM_BASE)? CACHE_RAM_BASE - _rodata : 0; + _cache_ram_seg_base_low = (_cache_ram_seg_base) & 0xffff; + _cache_ram_seg_base_middle = (_cache_ram_seg_base >> 16) & 0xff; + _cache_ram_seg_base_high = (_cache_ram_seg_base >> 24) & 0xff; + + _rom_code_seg_base = _ltext - _text; + _rom_code_seg_base_low = (_rom_code_seg_base) & 0xffff; + _rom_code_seg_base_middle = (_rom_code_seg_base >> 16) & 0xff; + _rom_code_seg_base_high = (_rom_code_seg_base >> 24) & 0xff; +*/ + + + diff --git a/src/cpu/x86/32bit/reset32.inc b/src/cpu/x86/32bit/reset32.inc new file mode 100644 index 0000000000..42c68cc770 --- /dev/null +++ b/src/cpu/x86/32bit/reset32.inc @@ -0,0 +1,10 @@ + .section ".reset" + .code16 +.globl reset_vector +reset_vector: + + . = 0x8; + .code32 + jmp protected_start + + .previous diff --git a/src/cpu/x86/32bit/reset32.lds b/src/cpu/x86/32bit/reset32.lds new file mode 100644 index 0000000000..fa6db86b1a --- /dev/null +++ b/src/cpu/x86/32bit/reset32.lds @@ -0,0 +1,14 @@ +/* + * _ROMTOP : The top of the rom used where we + * need to put the reset vector. + */ + +SECTIONS { + _ROMTOP = _ROMBASE + ROM_IMAGE_SIZE - 0x10; + . = _ROMTOP; + .reset (.): { + *(.reset) + . = 15 ; + BYTE(0x00); + } +} diff --git a/src/cpu/x86/cache/Config.lb b/src/cpu/x86/cache/Config.lb new file mode 100644 index 0000000000..e39bb2da32 --- /dev/null +++ b/src/cpu/x86/cache/Config.lb @@ -0,0 +1 @@ +object cache.o diff --git a/src/cpu/x86/cache/cache.c b/src/cpu/x86/cache/cache.c new file mode 100644 index 0000000000..92e4a69e29 --- /dev/null +++ b/src/cpu/x86/cache/cache.c @@ -0,0 +1,10 @@ +#include +#include + +void x86_enable_cache(void) +{ + post_code(0x60); + printk_info("Enabling cache\n"); + enable_cache(); +} + diff --git a/src/cpu/x86/fpu/Config.lb b/src/cpu/x86/fpu/Config.lb new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/cpu/x86/fpu/enable_fpu.inc b/src/cpu/x86/fpu/enable_fpu.inc new file mode 100644 index 0000000000..e663e365ac --- /dev/null +++ b/src/cpu/x86/fpu/enable_fpu.inc @@ -0,0 +1,9 @@ + /* preserve BIST in %eax */ + movl %eax, %ebp + + /* Disable floating point emulation */ + movl %cr0, %eax + andl $~(1<<2), %eax + movl %eax, %cr0 + + movl %ebp, %eax diff --git a/src/cpu/x86/lapic/Config.lb b/src/cpu/x86/lapic/Config.lb new file mode 100644 index 0000000000..8b5eaa376e --- /dev/null +++ b/src/cpu/x86/lapic/Config.lb @@ -0,0 +1,3 @@ +object lapic.o +object lapic_cpu_init.o +object secondary.S diff --git a/src/cpu/x86/lapic/boot_cpu.c b/src/cpu/x86/lapic/boot_cpu.c new file mode 100644 index 0000000000..d3a8f6e7a7 --- /dev/null +++ b/src/cpu/x86/lapic/boot_cpu.c @@ -0,0 +1,10 @@ +#include + +int boot_cpu(void) +{ + int bsp; + msr_t msr; + msr = rdmsr(0x1b); + bsp = !!(msr.lo & (1 << 8)); + return bsp; +} diff --git a/src/cpu/x86/lapic/lapic.c b/src/cpu/x86/lapic/lapic.c new file mode 100644 index 0000000000..8282890bf7 --- /dev/null +++ b/src/cpu/x86/lapic/lapic.c @@ -0,0 +1,72 @@ +#include +#include +#include +#include + +void setup_lapic(void) +{ + /* this is so interrupts work. This is very limited scope -- + * linux will do better later, we hope ... + */ + /* this is the first way we learned to do it. It fails on real SMP + * stuff. So we have to do things differently ... + * see the Intel mp1.4 spec, page A-3 + */ + +#if NEED_LAPIC == 1 + /* Only Pentium Pro and later have those MSR stuff */ + msr_t msr; + + printk_info("Setting up local apic..."); + + /* Enable the local apic */ + msr = rdmsr(LAPIC_BASE_MSR); + msr.lo |= LAPIC_BASE_MSR_ENABLE; + msr.lo &= ~LAPIC_BASE_MSR_ADDR_MASK; + msr.lo |= LAPIC_DEFAULT_BASE; + wrmsr(LAPIC_BASE_MSR, msr); + + /* + * Set Task Priority to 'accept all'. + */ + lapic_write_around(LAPIC_TASKPRI, + lapic_read_around(LAPIC_TASKPRI) & ~LAPIC_TPRI_MASK); + + /* Put the local apic in virtual wire mode */ + lapic_write_around(LAPIC_SPIV, + (lapic_read_around(LAPIC_SPIV) & ~(LAPIC_VECTOR_MASK)) + | LAPIC_SPIV_ENABLE); + lapic_write_around(LAPIC_LVT0, + (lapic_read_around(LAPIC_LVT0) & + ~(LAPIC_LVT_MASKED | LAPIC_LVT_LEVEL_TRIGGER | + LAPIC_LVT_REMOTE_IRR | LAPIC_INPUT_POLARITY | + LAPIC_SEND_PENDING |LAPIC_LVT_RESERVED_1 | + LAPIC_DELIVERY_MODE_MASK)) + | (LAPIC_LVT_REMOTE_IRR |LAPIC_SEND_PENDING | + LAPIC_DELIVERY_MODE_EXTINT) + ); + lapic_write_around(LAPIC_LVT1, + (lapic_read_around(LAPIC_LVT1) & + ~(LAPIC_LVT_MASKED | LAPIC_LVT_LEVEL_TRIGGER | + LAPIC_LVT_REMOTE_IRR | LAPIC_INPUT_POLARITY | + LAPIC_SEND_PENDING |LAPIC_LVT_RESERVED_1 | + LAPIC_DELIVERY_MODE_MASK)) + | (LAPIC_LVT_REMOTE_IRR |LAPIC_SEND_PENDING | + LAPIC_DELIVERY_MODE_NMI) + ); + + printk_debug(" apic_id: %d ", lapicid()); + +#else /* !NEED_LLAPIC */ + /* Only Pentium Pro and later have those MSR stuff */ + msr_t msr; + + printk_info("Disabling local apic..."); + + msr = rdmsr(LAPIC_BASE_MSR); + msr.lo &= ~LAPIC_BASE_MSR_ENABLE; + wrmsr(LAPIC_BASE_MSR, msr); +#endif /* !NEED_LAPIC */ + printk_info("done.\n"); + post_code(0x9b); +} diff --git a/src/cpu/x86/lapic/lapic_cpu_init.c b/src/cpu/x86/lapic/lapic_cpu_init.c new file mode 100644 index 0000000000..963b1cf729 --- /dev/null +++ b/src/cpu/x86/lapic/lapic_cpu_init.c @@ -0,0 +1,316 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#if CONFIG_SMP == 1 +/* This is a lot more paranoid now, since Linux can NOT handle + * being told there is a CPU when none exists. So any errors + * will return 0, meaning no CPU. + * + * We actually handling that case by noting which cpus startup + * and not telling anyone about the ones that dont. + */ +static int lapic_start_cpu(unsigned long apicid) +{ + int timeout; + unsigned long send_status, accept_status, start_eip; + int j, num_starts, maxlvt; + extern char _secondary_start[]; + + /* + * Starting actual IPI sequence... + */ + + printk_spew("Asserting INIT.\n"); + + /* + * Turn INIT on target chip + */ + lapic_write_around(LAPIC_ICR2, SET_LAPIC_DEST_FIELD(apicid)); + + /* + * Send IPI + */ + + lapic_write_around(LAPIC_ICR, LAPIC_INT_LEVELTRIG | LAPIC_INT_ASSERT + | LAPIC_DM_INIT); + + printk_spew("Waiting for send to finish...\n"); + timeout = 0; + do { + printk_spew("+"); + udelay(100); + send_status = lapic_read(LAPIC_ICR) & LAPIC_ICR_BUSY; + } while (send_status && (timeout++ < 1000)); + if (timeout >= 1000) { + printk_err("CPU %d: First apic write timed out. Disabling\n", + apicid); + // too bad. + printk_err("ESR is 0x%x\n", lapic_read(LAPIC_ESR)); + if (lapic_read(LAPIC_ESR)) { + printk_err("Try to reset ESR\n"); + lapic_write_around(LAPIC_ESR, 0); + printk_err("ESR is 0x%x\n", lapic_read(LAPIC_ESR)); + } + return 0; + } + mdelay(10); + + printk_spew("Deasserting INIT.\n"); + + /* Target chip */ + lapic_write_around(LAPIC_ICR2, SET_LAPIC_DEST_FIELD(apicid)); + + /* Send IPI */ + lapic_write_around(LAPIC_ICR, LAPIC_INT_LEVELTRIG | LAPIC_DM_INIT); + + printk_spew("Waiting for send to finish...\n"); + timeout = 0; + do { + printk_spew("+"); + udelay(100); + send_status = lapic_read(LAPIC_ICR) & LAPIC_ICR_BUSY; + } while (send_status && (timeout++ < 1000)); + if (timeout >= 1000) { + printk_err("CPU %d: Second apic write timed out. Disabling\n", + apicid); + // too bad. + return 0; + } + + start_eip = (unsigned long)_secondary_start; + printk_spew("start_eip=0x%08lx\n", start_eip); + + num_starts = 2; + + /* + * Run STARTUP IPI loop. + */ + printk_spew("#startup loops: %d.\n", num_starts); + + maxlvt = 4; + + for (j = 1; j <= num_starts; j++) { + printk_spew("Sending STARTUP #%d to %u.\n", j, apicid); + lapic_read_around(LAPIC_SPIV); + lapic_write(LAPIC_ESR, 0); + lapic_read(LAPIC_ESR); + printk_spew("After apic_write.\n"); + + /* + * STARTUP IPI + */ + + /* Target chip */ + lapic_write_around(LAPIC_ICR2, SET_LAPIC_DEST_FIELD(apicid)); + + /* Boot on the stack */ + /* Kick the second */ + lapic_write_around(LAPIC_ICR, LAPIC_DM_STARTUP + | (start_eip >> 12)); + + /* + * Give the other CPU some time to accept the IPI. + */ + udelay(300); + + printk_spew("Startup point 1.\n"); + + printk_spew("Waiting for send to finish...\n"); + timeout = 0; + do { + printk_spew("+"); + udelay(100); + send_status = lapic_read(LAPIC_ICR) & LAPIC_ICR_BUSY; + } while (send_status && (timeout++ < 1000)); + + /* + * Give the other CPU some time to accept the IPI. + */ + udelay(200); + /* + * Due to the Pentium erratum 3AP. + */ + if (maxlvt > 3) { + lapic_read_around(LAPIC_SPIV); + lapic_write(LAPIC_ESR, 0); + } + accept_status = (lapic_read(LAPIC_ESR) & 0xEF); + if (send_status || accept_status) + break; + } + printk_spew("After Startup.\n"); + if (send_status) + printk_warning("APIC never delivered???\n"); + if (accept_status) + printk_warning("APIC delivery error (%lx).\n", accept_status); + if (send_status || accept_status) + return 0; + return 1; +} + +/* Number of cpus that are currently running in linuxbios */ +static atomic_t active_cpus = ATOMIC_INIT(1); + +/* start_cpu_lock covers last_cpu_index and secondary_stack. + * Only starting one cpu at a time let's me remove the logic + * for select the stack from assembly language. + * + * In addition communicating by variables to the cpu I + * am starting allows me to veryify it has started before + * start_cpu returns. + */ + +static spinlock_t start_cpu_lock = SPIN_LOCK_UNLOCKED; +static unsigned last_cpu_index = 0; +volatile unsigned long secondary_stack; + +int start_cpu(device_t cpu) +{ + extern unsigned char _estack[]; + struct cpu_info *info; + unsigned long stack_end; + unsigned long apicid; + unsigned long index; + unsigned long count; + int result; + + spin_lock(&start_cpu_lock); + + /* Get the cpu's apicid */ + apicid = cpu->path.u.apic.apic_id; + + /* Get an index for the new processor */ + index = ++last_cpu_index; + + /* Find end of the new processors stack */ + stack_end = ((unsigned long)_estack) - (STACK_SIZE*index) - sizeof(struct cpu_info); + + /* Record the index and which cpu structure we are using */ + info = (struct cpu_info *)stack_end; + info->index = index; + info->cpu = cpu; + + /* Advertise the new stack to start_cpu */ + secondary_stack = stack_end; + + /* Until the cpu starts up report the cpu is not enabled */ + cpu->enabled = 0; + cpu->initialized = 0; + + /* Start the cpu */ + result = lapic_start_cpu(apicid); + + if (result) { + result = 0; + /* Wait 1s or until the new the new cpu calls in */ + for(count = 0; count < 100000 ; count++) { + if (secondary_stack == 0) { + result = 1; + break; + } + udelay(10); + } + } + secondary_stack = 0; + spin_unlock(&start_cpu_lock); + return result; +} + +/* C entry point of secondary cpus */ +void secondary_cpu_init(void) +{ + atomic_inc(&active_cpus); + cpu_initialize(); + atomic_dec(&active_cpus); + stop_this_cpu(); +} + +static void initialize_other_cpus(device_t root) +{ + int old_active_count, active_count; + device_t cpu; + /* Loop through the cpus once getting them started */ + for(cpu = root->link[1].children; cpu ; cpu = cpu->sibling) { + if (cpu->path.type != DEVICE_PATH_APIC) { + continue; + } + if (!cpu->enabled) { + continue; + } + if (cpu->initialized) { + continue; + } + if (!start_cpu(cpu)) { + /* Record the error in cpu? */ + printk_err("CPU %u would not start!\n", + cpu->path.u.apic.apic_id); + } + } + + /* Now loop until the other cpus have finished initializing */ + old_active_count = 1; + active_count = atomic_read(&active_cpus); + while(active_count > 1) { + if (active_count != old_active_count) { + printk_info("Waiting for %d CPUS to stop\n", active_count); + old_active_count = active_count; + } + udelay(10); + active_count = atomic_read(&active_cpus); + } + for(cpu = root->link[1].children; cpu; cpu = cpu->sibling) { + if (cpu->path.type != DEVICE_PATH_APIC) { + continue; + } + if (!cpu->initialized) { + printk_err("CPU %u did not initialize!\n", + cpu->path.u.apic.apic_id); +#warning "FIXME do I need a mainboard_cpu_fixup function?" + } + } + printk_debug("All AP CPUs stopped\n"); +} + +#else /* CONFIG_SMP */ +#define initialize_other_cpus(root) do {} while(0) +#endif /* CONFIG_SMP */ + +void initialize_cpus(device_t root) +{ + struct device_path cpu_path; + struct cpu_info *info; + + /* Find the info struct for this cpu */ + info = cpu_info(); + +#if NEED_LAPIC == 1 + /* Ensure the local apic is enabled */ + enable_lapic(); + + /* Get the device path of the boot cpu */ + cpu_path.type = DEVICE_PATH_APIC; + cpu_path.u.apic.apic_id = lapicid(); +#else + /* Get the device path of the boot cpu */ + cpu_path.type = DEVICE_PATH_BOOT_CPU; +#endif + + /* Find the device structure for the boot cpu */ + info->cpu = alloc_find_dev(&root->link[1], &cpu_path); + + /* Initialize the bootstrap processor */ + cpu_initialize(); + + /* Now initialize the rest of the cpus */ + initialize_other_cpus(root); +} + diff --git a/src/cpu/x86/lapic/secondary.S b/src/cpu/x86/lapic/secondary.S new file mode 100644 index 0000000000..786c31e532 --- /dev/null +++ b/src/cpu/x86/lapic/secondary.S @@ -0,0 +1,53 @@ +#include +#include +#include +#include + .text + .globl _secondary_start + .balign 4096 +_secondary_start: + .code16 + cli + xorl %eax, %eax + movl %eax, %cr3 /* Invalidate TLB*/ + + /* On hyper threaded cpus, invalidating the cache here is + * very very bad. Don't. + */ + + /* setup the data segment */ + movw %cs, %ax + movw %ax, %ds + + data32 lgdt gdtaddr - _secondary_start + + movl %cr0, %eax + andl $0x7FFAFFD1, %eax /* PG,AM,WP,NE,TS,EM,MP = 0 */ + orl $0x60000001, %eax /* CD, NW, PE = 1 */ + movl %eax, %cr0 + + ljmpl $0x10, $1f +1: + .code32 + movw $0x18, %ax + movw %ax, %ds + movw %ax, %es + movw %ax, %ss + movw %ax, %fs + movw %ax, %gs + + /* Set the stack pointer, and flag that we are done */ + xorl %eax, %eax + movl secondary_stack, %esp + movl %eax, secondary_stack + + call secondary_cpu_init +1: hlt + jmp 1b + +gdtaddr: + .word gdt_limit /* the table limit */ + .long gdt /* we know the offset */ + + +.code32 diff --git a/src/cpu/x86/mmx/Config.lb b/src/cpu/x86/mmx/Config.lb new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/cpu/x86/mmx/disable_mmx.inc b/src/cpu/x86/mmx/disable_mmx.inc new file mode 100644 index 0000000000..97d62d60f5 --- /dev/null +++ b/src/cpu/x86/mmx/disable_mmx.inc @@ -0,0 +1,2 @@ + /* Clear out an mmx state */ + emms diff --git a/src/cpu/x86/mmx/enable_mmx.inc b/src/cpu/x86/mmx/enable_mmx.inc new file mode 100644 index 0000000000..a0578daf65 --- /dev/null +++ b/src/cpu/x86/mmx/enable_mmx.inc @@ -0,0 +1,6 @@ + /* BIST in %eax */ + + /* + * Enabling mmx registers is a noop + */ + diff --git a/src/cpu/x86/mtrr/Config.lb b/src/cpu/x86/mtrr/Config.lb new file mode 100644 index 0000000000..df5c16f8bd --- /dev/null +++ b/src/cpu/x86/mtrr/Config.lb @@ -0,0 +1 @@ +object mtrr.o \ No newline at end of file diff --git a/src/cpu/x86/mtrr/earlymtrr.c b/src/cpu/x86/mtrr/earlymtrr.c new file mode 100644 index 0000000000..e8fda994af --- /dev/null +++ b/src/cpu/x86/mtrr/earlymtrr.c @@ -0,0 +1,123 @@ +#ifndef EARLYMTRR_C +#define EARLYMTRR_C +#include +#include +#include + +/* Validate XIP_ROM_SIZE and XIP_ROM_BASE */ +#if defined(XIP_ROM_SIZE) && !defined(XIP_ROM_BASE) +#error "XIP_ROM_SIZE without XIP_ROM_BASE" +#endif +#if defined(XIP_ROM_BASE) && !defined(XIP_ROM_SIZE) +#error "XIP_ROM_BASE without XIP_ROM_SIZE" +#endif +#if !defined(CONFIG_LB_MEM_TOPK) +#error "CONFIG_LB_MEM_TOPK not defined" +#endif + +#if defined(XIP_ROM_SIZE) && ((XIP_ROM_SIZE & (XIP_ROM_SIZE -1)) != 0) +#error "XIP_ROM_SIZE is not a power of 2" +#endif +#if defined(XIP_ROM_SIZE) && ((XIP_ROM_BASE % XIP_ROM_SIZE) != 0) +#error "XIP_ROM_BASE is not a multiple of XIP_ROM_SIZE" +#endif + +#if (CONFIG_LB_MEM_TOPK & (CONFIG_LB_MEM_TOPK -1)) != 0 +# error "CONFIG_LB_MEM_TOPK must be a power of 2" +#endif + +static void disable_var_mtrr(unsigned reg) +{ + /* The invalid bit is kept in the mask so we simply + * clear the relevent mask register to disable a + * range. + */ + msr_t zero; + zero.lo = zero.hi = 0; + wrmsr(MTRRphysMask_MSR(reg), zero); +} + +static void set_var_mtrr( + unsigned reg, unsigned base, unsigned size, unsigned type) + +{ + /* Bit Bit 32-35 of MTRRphysMask should be set to 1 */ + msr_t basem, maskm; + basem.lo = base | type; + basem.hi = 0; + wrmsr(MTRRphysBase_MSR(reg), basem); + maskm.lo = ~(size - 1) | 0x800; + maskm.hi = 0x0f; + wrmsr(MTRRphysMask_MSR(reg), maskm); +} + +static void cache_lbmem(int type) +{ + /* Enable caching for 0 - 1MB using variable mtrr */ + disable_cache(); + set_var_mtrr(0, 0x00000000, CONFIG_LB_MEM_TOPK << 10, type); + enable_cache(); +} + + +/* the fixed and variable MTTRs are power-up with random values, + * clear them to MTRR_TYPE_UNCACHEABLE for safty. + */ +static void do_early_mtrr_init(const unsigned long *mtrr_msrs) +{ + /* Precondition: + * The cache is not enabled in cr0 nor in MTRRdefType_MSR + * entry32.inc ensures the cache is not enabled in cr0 + */ + msr_t msr; + const unsigned long *msr_addr; + unsigned long cr0; + + print_spew("Clearing mtrr\r\n"); + + /* Inialize all of the relevant msrs to 0 */ + msr.lo = 0; + msr.hi = 0; + unsigned long msr_nr; + for(msr_addr = mtrr_msrs; (msr_nr = *msr_addr); msr_addr++) { + wrmsr(msr_nr, msr); + } + +#if defined(XIP_ROM_SIZE) + /* enable write through caching so we can do execute in place + * on the flash rom. + */ + set_var_mtrr(1, XIP_ROM_BASE, XIP_ROM_SIZE, MTRR_TYPE_WRBACK); +#endif + + /* Set the default memory type and enable fixed and variable MTRRs + */ + /* Enable Variable MTRRs */ + msr.hi = 0x00000000; + msr.lo = 0x00000800; + wrmsr(MTRRdefType_MSR, msr); + +} + +static void early_mtrr_init(void) +{ + static const unsigned long mtrr_msrs[] = { + /* fixed mtrr */ + 0x250, 0x258, 0x259, + 0x268, 0x269, 0x26A, + 0x26B, 0x26C, 0x26D, + 0x26E, 0x26F, + /* var mtrr */ + 0x200, 0x201, 0x202, 0x203, + 0x204, 0x205, 0x206, 0x207, + 0x208, 0x209, 0x20A, 0x20B, + 0x20C, 0x20D, 0x20E, 0x20F, + /* NULL end of table */ + 0 + }; + disable_cache(); + do_early_mtrr_init(mtrr_msrs); + enable_cache(); +} + +#endif /* EARLYMTRR_C */ diff --git a/src/cpu/x86/mtrr/mtrr.c b/src/cpu/x86/mtrr/mtrr.c new file mode 100644 index 0000000000..8e38f23736 --- /dev/null +++ b/src/cpu/x86/mtrr/mtrr.c @@ -0,0 +1,378 @@ +/* + * intel_mtrr.c: setting MTRR to decent values for cache initialization on P6 + * + * Derived from intel_set_mtrr in intel_subr.c and mtrr.c in linux kernel + * + * Copyright 2000 Silicon Integrated System Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * + * Reference: Intel Architecture Software Developer's Manual, Volume 3: System Programming + */ + +#include +#include +#include +#include +#include + +#define arraysize(x) (sizeof(x)/sizeof((x)[0])) + +#warning "FIXME I do not properly handle address more than 36 physical address bits" +#ifdef k8 +# define ADDRESS_BITS 40 +#else +# define ADDRESS_BITS 36 +#endif +#define ADDRESS_BITS_HIGH (ADDRESS_BITS - 32) +#define ADDRESS_MASK_HIGH ((1u << ADDRESS_BITS_HIGH) - 1) + +static unsigned int mtrr_msr[] = { + MTRRfix64K_00000_MSR, MTRRfix16K_80000_MSR, MTRRfix16K_A0000_MSR, + MTRRfix4K_C0000_MSR, MTRRfix4K_C8000_MSR, MTRRfix4K_D0000_MSR, MTRRfix4K_D8000_MSR, + MTRRfix4K_E0000_MSR, MTRRfix4K_E8000_MSR, MTRRfix4K_F0000_MSR, MTRRfix4K_F8000_MSR, +}; + + +static void enable_fixed_mtrr(void) +{ + msr_t msr; + + msr = rdmsr(MTRRdefType_MSR); + msr.lo |= 0xc00; + wrmsr(MTRRdefType_MSR, msr); +} + +static void enable_var_mtrr(void) +{ + msr_t msr; + + msr = rdmsr(MTRRdefType_MSR); + msr.lo |= 0x800; + wrmsr(MTRRdefType_MSR, msr); +} + +/* setting variable mtrr, comes from linux kernel source */ +static void set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek, unsigned char type) +{ + msr_t base, mask; + + base.hi = basek >> 22; + base.lo = basek << 10; + + //printk_debug("ADDRESS_MASK_HIGH=%#x\n", ADDRESS_MASK_HIGH); + + if (sizek < 4*1024*1024) { + mask.hi = ADDRESS_MASK_HIGH; + mask.lo = ~((sizek << 10) -1); + } + else { + mask.hi = ADDRESS_MASK_HIGH & (~((sizek >> 22) -1)); + mask.lo = 0; + } + + if (reg >= 8) + return; + + // it is recommended that we disable and enable cache when we + // do this. + disable_cache(); + if (sizek == 0) { + msr_t zero; + zero.lo = zero.hi = 0; + /* The invalid bit is kept in the mask, so we simply clear the + relevant mask register to disable a range. */ + wrmsr (MTRRphysMask_MSR(reg), zero); + } else { + /* Bit 32-35 of MTRRphysMask should be set to 1 */ + base.lo |= type; + mask.lo |= 0x800; + wrmsr (MTRRphysBase_MSR(reg), base); + wrmsr (MTRRphysMask_MSR(reg), mask); + } + enable_cache(); +} + +/* fms: find most sigificant bit set, stolen from Linux Kernel Source. */ +static inline unsigned int fms(unsigned int x) +{ + int r; + + __asm__("bsrl %1,%0\n\t" + "jnz 1f\n\t" + "movl $0,%0\n" + "1:" : "=r" (r) : "g" (x)); + return r; +} + +/* fms: find least sigificant bit set */ +static inline unsigned int fls(unsigned int x) +{ + int r; + + __asm__("bsfl %1,%0\n\t" + "jnz 1f\n\t" + "movl $32,%0\n" + "1:" : "=r" (r) : "g" (x)); + return r; +} + +/* setting up variable and fixed mtrr + * + * From Intel Vol. III Section 9.12.4, the Range Size and Base Alignment has some kind of requirement: + * 1. The range size must be 2^N byte for N >= 12 (i.e 4KB minimum). + * 2. The base address must be 2^N aligned, where the N here is equal to the N in previous + * requirement. So a 8K range must be 8K aligned not 4K aligned. + * + * These requirement is meet by "decompositing" the ramsize into Sum(Cn * 2^n, n = [0..N], Cn = [0, 1]). + * For Cm = 1, there is a WB range of 2^m size at base address Sum(Cm * 2^m, m = [N..n]). + * A 124MB (128MB - 4MB SMA) example: + * ramsize = 124MB == 64MB (at 0MB) + 32MB (at 64MB) + 16MB (at 96MB ) + 8MB (at 112MB) + 4MB (120MB). + * But this wastes a lot of MTRR registers so we use another more "aggresive" way with Uncacheable Regions. + * + * In the Uncacheable Region scheme, we try to cover the whole ramsize by one WB region as possible, + * If (an only if) this can not be done we will try to decomposite the ramesize, the mathematical formula + * whould be ramsize = Sum(Cn * 2^n, n = [0..N], Cn = [-1, 0, 1]). For Cn = -1, a Uncachable Region is used. + * The same 124MB example: + * ramsize = 124MB == 128MB WB (at 0MB) + 4MB UC (at 124MB) + * or a 156MB (128MB + 32MB - 4MB SMA) example: + * ramsize = 156MB == 128MB WB (at 0MB) + 32MB WB (at 128MB) + 4MB UC (at 156MB) + */ +/* 2 MTRRS are reserved for the operating system */ +#if 0 +#define BIOS_MTRRS 6 +#define OS_MTRRS 2 +#else +#define BIOS_MTRRS 8 +#define OS_MTRRS 0 +#endif +#define MTRRS (BIOS_MTRRS + OS_MTRRS) + + +static void set_fixed_mtrrs(unsigned int first, unsigned int last, unsigned char type) +{ + unsigned int i; + unsigned int fixed_msr = NUM_FIXED_RANGES >> 3; + msr_t msr; + msr.lo = msr.hi = 0; /* Shut up gcc */ + for(i = first; i < last; i++) { + /* When I switch to a new msr read it in */ + if (fixed_msr != i >> 3) { + /* But first write out the old msr */ + if (fixed_msr < (NUM_FIXED_RANGES >> 3)) { + disable_cache(); + wrmsr(mtrr_msr[fixed_msr], msr); + enable_cache(); + } + fixed_msr = i>>3; + msr = rdmsr(mtrr_msr[fixed_msr]); + } + if ((i & 7) < 4) { + msr.lo &= ~(0xff << ((i&3)*8)); + msr.lo |= type << ((i&3)*8); + } else { + msr.hi &= ~(0xff << ((i&3)*8)); + msr.hi |= type << ((i&3)*8); + } + } + /* Write out the final msr */ + if (fixed_msr < (NUM_FIXED_RANGES >> 3)) { + disable_cache(); + wrmsr(mtrr_msr[fixed_msr], msr); + enable_cache(); + } +} + +static unsigned fixed_mtrr_index(unsigned long addrk) +{ + unsigned index; + index = (addrk - 0) >> 6; + if (index >= 8) { + index = ((addrk - 8*64) >> 4) + 8; + } + if (index >= 24) { + index = ((addrk - (8*64 + 16*16)) >> 2) + 24; + } + if (index > NUM_FIXED_RANGES) { + index = NUM_FIXED_RANGES; + } + return index; +} + +static unsigned int range_to_mtrr(unsigned int reg, + unsigned long range_startk, unsigned long range_sizek, + unsigned long next_range_startk) +{ + if (!range_sizek || (reg >= BIOS_MTRRS)) { + return reg; + } + while(range_sizek) { + unsigned long max_align, align; + unsigned long sizek; + /* Compute the maximum size I can make a range */ + max_align = fls(range_startk); + align = fms(range_sizek); + if (align > max_align) { + align = max_align; + } + sizek = 1 << align; + printk_debug("Setting variable MTRR %d, base: %4dMB, range: %4dMB, type WB\n", + reg, range_startk >>10, sizek >> 10); + set_var_mtrr(reg++, range_startk, sizek, MTRR_TYPE_WRBACK); + range_startk += sizek; + range_sizek -= sizek; + if (reg >= BIOS_MTRRS) + break; + } + return reg; +} + +static unsigned long resk(uint64_t value) +{ + unsigned long resultk; + if (value < (1ULL << 42)) { + resultk = value >> 10; + } + else { + resultk = 0xffffffff; + } + return resultk; +} + +void x86_setup_mtrrs(void) +{ + /* Try this the simple way of incrementally adding together + * mtrrs. If this doesn't work out we can get smart again + * and clear out the mtrrs. + */ + struct device *dev; + unsigned long range_startk, range_sizek; + unsigned int reg; + + printk_debug("\n"); + /* Initialized the fixed_mtrrs to uncached */ + printk_debug("Setting fixed MTRRs(%d-%d) type: UC\n", + 0, NUM_FIXED_RANGES); + set_fixed_mtrrs(0, NUM_FIXED_RANGES, MTRR_TYPE_UNCACHEABLE); + + /* Now see which of the fixed mtrrs cover ram. + */ + for(dev = all_devices; dev; dev = dev->next) { + struct resource *res, *last; + last = &dev->resource[dev->resources]; + for(res = &dev->resource[0]; res < last; res++) { + unsigned int start_mtrr; + unsigned int last_mtrr; + if (!(res->flags & IORESOURCE_MEM) || + !(res->flags & IORESOURCE_CACHEABLE)) + { + continue; + } + start_mtrr = fixed_mtrr_index(resk(res->base)); + last_mtrr = fixed_mtrr_index(resk((res->base + res->size))); + if (start_mtrr >= NUM_FIXED_RANGES) { + break; + } + printk_debug("Setting fixed MTRRs(%d-%d) Type: WB\n", + start_mtrr, last_mtrr); + set_fixed_mtrrs(start_mtrr, last_mtrr, MTRR_TYPE_WRBACK); + } + } + printk_debug("DONE fixed MTRRs\n"); + /* Cache as many memory areas as possible */ + /* FIXME is there an algorithm for computing the optimal set of mtrrs? + * In some cases it is definitely possible to do better. + */ + range_startk = 0; + range_sizek = 0; + reg = 0; + for(dev = all_devices; dev; dev = dev->next) { + struct resource *res, *last; + last = &dev->resource[dev->resources]; + for(res = &dev->resource[0]; res < last; res++) { + unsigned long basek, sizek; + if (!(res->flags & IORESOURCE_MEM) || + !(res->flags & IORESOURCE_CACHEABLE)) { + continue; + } + basek = resk(res->base); + sizek = resk(res->size); + /* See if I can merge with the last range + * Either I am below 1M and the fixed mtrrs handle it, or + * the ranges touch. + */ + if ((basek <= 1024) || (range_startk + range_sizek == basek)) { + unsigned long endk = basek + sizek; + range_sizek = endk - range_startk; + continue; + } + /* Write the range mtrrs */ + if (range_sizek != 0) { + reg = range_to_mtrr(reg, range_startk, range_sizek, basek); + range_startk = 0; + range_sizek = 0; + if (reg >= BIOS_MTRRS) + goto last_msr; + } + /* Allocate an msr */ + range_startk = basek; + range_sizek = sizek; + } + } + last_msr: + /* Write the last range */ + reg = range_to_mtrr(reg, range_startk, range_sizek, 0); + printk_debug("DONE variable MTRRs\n"); + printk_debug("Clear out the extra MTRR's\n"); + /* Clear out the extra MTRR's */ + while(reg < MTRRS) { + set_var_mtrr(reg++, 0, 0, 0); + } + /* enable fixed MTRR */ + printk_spew("call enable_fixed_mtrr()\n"); + enable_fixed_mtrr(); + printk_spew("call enable_var_mtrr()\n"); + enable_var_mtrr(); + printk_spew("Leave %s\n", __FUNCTION__); + post_code(0x6A); +} + +int x86_mtrr_check(void) +{ + /* Only Pentium Pro and later have MTRR */ + msr_t msr; + printk_debug("\nMTRR check\n"); + + msr = rdmsr(0x2ff); + msr.lo >>= 10; + + printk_debug("Fixed MTRRs : "); + if (msr.lo & 0x01) + printk_debug("Enabled\n"); + else + printk_debug("Disabled\n"); + + printk_debug("Variable MTRRs: "); + if (msr.lo & 0x02) + printk_debug("Enabled\n"); + else + printk_debug("Disabled\n"); + + printk_debug("\n"); + + post_code(0x93); + return ((int) msr.lo); +} diff --git a/src/cpu/x86/pae/Config.lb b/src/cpu/x86/pae/Config.lb new file mode 100644 index 0000000000..45e7f5754b --- /dev/null +++ b/src/cpu/x86/pae/Config.lb @@ -0,0 +1 @@ +object pgtbl.o \ No newline at end of file diff --git a/src/cpu/x86/pae/pgtbl.c b/src/cpu/x86/pae/pgtbl.c new file mode 100644 index 0000000000..756cc53f65 --- /dev/null +++ b/src/cpu/x86/pae/pgtbl.c @@ -0,0 +1,94 @@ +#include +#include +#include +#include + +static void paging_off(void) +{ + __asm__ __volatile__ ( + /* Disable paging */ + "movl %%cr0, %%eax\n\t" + "andl $0x7FFFFFFF, %%eax\n\t" + "movl %%eax, %%cr0\n\t" + /* Disable pae */ + "movl %%cr4, %%eax\n\t" + "andl $0xFFFFFFDF, %%eax\n\t" + : + : + : "eax" + ); +} + +static void paging_on(void *pdp) +{ + __asm__ __volatile__( + /* Load the page table address */ + "movl %0, %%cr3\n\t" + /* Enable pae */ + "movl %%cr4, %%eax\n\t" + "orl $0x00000020, %%eax\n\t" + "movl %%eax, %%cr4\n\t" + /* Enable paging */ + "movl %%cr0, %%eax\n\t" + "orl $0x80000000, %%eax\n\t" + "movl %%eax, %%cr0\n\t" + : + : "r" (pdp) + : "eax" + ); +} + +void *map_2M_page(unsigned long page) +{ + struct pde { + uint32_t addr_lo; + uint32_t addr_hi; + } __attribute__ ((packed)); + struct pg_table { + struct pde pd[2048]; + struct pde pdp[512]; + } __attribute__ ((packed)); + static struct pg_table pgtbl[CONFIG_MAX_CPUS] __attribute__ ((aligned(4096))); + static unsigned long mapped_window[CONFIG_MAX_CPUS]; + unsigned long index; + unsigned long window; + void *result; + int i; + index = cpu_index(); + if ((index < 0) || (index >= CONFIG_MAX_CPUS)) { + return MAPPING_ERROR; + } + window = page >> 10; + if (window != mapped_window[index]) { + paging_off(); + if (window > 1) { + struct pde *pd, *pdp; + /* Point the page directory pointers at the page directories */ + memset(&pgtbl[index].pdp, 0, sizeof(pgtbl[index].pdp)); + pd = pgtbl[index].pd; + pdp = pgtbl[index].pdp; + pdp[0].addr_lo = ((uint32_t)&pd[512*0])|1; + pdp[1].addr_lo = ((uint32_t)&pd[512*1])|1; + pdp[2].addr_lo = ((uint32_t)&pd[512*2])|1; + pdp[3].addr_lo = ((uint32_t)&pd[512*3])|1; + /* The first half of the page table is identity mapped */ + for(i = 0; i < 1024; i++) { + pd[i].addr_lo = ((i & 0x3ff) << 21)| 0xE3; + pd[i].addr_hi = 0; + } + /* The second half of the page table holds the mapped page */ + for(i = 1024; i < 2048; i++) { + pd[i].addr_lo = ((window & 1) << 31) | ((i & 0x3ff) << 21) | 0xE3; + pd[i].addr_hi = (window >> 1); + } + paging_on(pdp); + } + mapped_window[index] = window; + } + if (window == 0) { + result = (void *)(page << 21); + } else { + result = (void *)(0x80000000 | ((page & 0x3ff) << 21)); + } + return result; +} diff --git a/src/cpu/x86/sse/Config.lb b/src/cpu/x86/sse/Config.lb new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/cpu/x86/sse/disable_sse.inc b/src/cpu/x86/sse/disable_sse.inc new file mode 100644 index 0000000000..a18ea18643 --- /dev/null +++ b/src/cpu/x86/sse/disable_sse.inc @@ -0,0 +1,18 @@ + /* + * Put the processor back into a reset state + * with respect to the xmm registers. + */ + + xorps %xmm0, %xmm0 + xorps %xmm1, %xmm1 + xorps %xmm2, %xmm2 + xorps %xmm3, %xmm3 + xorps %xmm4, %xmm4 + xorps %xmm5, %xmm5 + xorps %xmm6, %xmm6 + xorps %xmm7, %xmm7 + + /* Disable sse instructions */ + movl %cr4, %eax + andl $~(3<<9), %eax + movl %eax, %cr4 diff --git a/src/cpu/x86/sse/enable_sse.inc b/src/cpu/x86/sse/enable_sse.inc new file mode 100644 index 0000000000..95724b71f7 --- /dev/null +++ b/src/cpu/x86/sse/enable_sse.inc @@ -0,0 +1,14 @@ + /* preserve BIST in %eax */ + movl %eax, %ebp + + /* + * Enable the use of the xmm registers + */ + + /* Enable sse instructions */ + movl %cr4, %eax + orl $(1<<9), %eax + movl %eax, %cr4 + + movl %ebp, %eax + diff --git a/src/cpu/x86/tsc/Config.lb b/src/cpu/x86/tsc/Config.lb new file mode 100644 index 0000000000..07272ad9d7 --- /dev/null +++ b/src/cpu/x86/tsc/Config.lb @@ -0,0 +1,5 @@ +uses CONFIG_UDELAY_TSC +uses CONFIG_TSC_X86RDTSC_CALIBRATE_WITH_TIMER2 + +default CONFIG_TSC_X86RDTSC_CALIBRATE_WITH_TIMER2=0 +if CONFIG_UDELAY_TSC object delay_tsc.o end diff --git a/src/cpu/x86/tsc/delay_tsc.c b/src/cpu/x86/tsc/delay_tsc.c new file mode 100644 index 0000000000..c7c431baac --- /dev/null +++ b/src/cpu/x86/tsc/delay_tsc.c @@ -0,0 +1,165 @@ +#include +#include +#include +#include +#include +#include + +static unsigned long clocks_per_usec; + +#if (CONFIG_TSC_X86RDTSC_CALIBRATE_WITH_TIMER2 == 1) +#define CLOCK_TICK_RATE 1193180U /* Underlying HZ */ + +/* ------ Calibrate the TSC ------- + * Too much 64-bit arithmetic here to do this cleanly in C, and for + * accuracy's sake we want to keep the overhead on the CTC speaker (channel 2) + * output busy loop as low as possible. We avoid reading the CTC registers + * directly because of the awkward 8-bit access mechanism of the 82C54 + * device. + */ + +#define CALIBRATE_INTERVAL ((20*CLOCK_TICK_RATE)/1000) /* 20ms */ +#define CALIBRATE_DIVISOR (20*1000) /* 20ms / 20000 == 1usec */ + +static unsigned long long calibrate_tsc(void) +{ + /* Set the Gate high, disable speaker */ + outb((inb(0x61) & ~0x02) | 0x01, 0x61); + + /* + * Now let's take care of CTC channel 2 + * + * Set the Gate high, program CTC channel 2 for mode 0, + * (interrupt on terminal count mode), binary count, + * load 5 * LATCH count, (LSB and MSB) to begin countdown. + */ + outb(0xb0, 0x43); /* binary, mode 0, LSB/MSB, Ch 2 */ + outb(CALIBRATE_INTERVAL & 0xff, 0x42); /* LSB of count */ + outb(CALIBRATE_INTERVAL >> 8, 0x42); /* MSB of count */ + + { + tsc_t start; + tsc_t end; + unsigned long count; + + start = rdtsc(); + count = 0; + do { + count++; + } while ((inb(0x61) & 0x20) == 0); + end = rdtsc(); + + /* Error: ECTCNEVERSET */ + if (count <= 1) + goto bad_ctc; + + /* 64-bit subtract - gcc just messes up with long longs */ + __asm__("subl %2,%0\n\t" + "sbbl %3,%1" + :"=a" (end.lo), "=d" (end.hi) + :"g" (start.lo), "g" (start.hi), + "0" (end.lo), "1" (end.hi)); + + /* Error: ECPUTOOFAST */ + if (end.hi) + goto bad_ctc; + + + /* Error: ECPUTOOSLOW */ + if (end.lo <= CALIBRATE_DIVISOR) + goto bad_ctc; + + return (end.lo + CALIBRATE_DIVISOR -1)/CALIBRATE_DIVISOR; + } + + /* + * The CTC wasn't reliable: we got a hit on the very first read, + * or the CPU was so fast/slow that the quotient wouldn't fit in + * 32 bits.. + */ +bad_ctc: + printk_err("bad_ctc\n"); + return 0; +} + +#else /* CONFIG_TSC_X86RDTSC_CALIBRATE_WITH_TIMER2 */ + +/* + * this is the "no timer2" version. + * to calibrate tsc, we get a TSC reading, then do 1,000,000 outbs to port 0x80 + * then we read TSC again, and divide the difference by 1,000,000 + * we have found on a wide range of machines that this gives us a a + * good microsecond value + * to +- 10%. On a dual AMD 1.6 Ghz box, it gives us .97 microseconds, and on a + * 267 Mhz. p5, it gives us 1.1 microseconds. + * also, since gcc now supports long long, we use that. + * also no unsigned long long / operator, so we play games. + * about the only thing you can do with long longs, it seems, + *is return them and assign them. + * (and do asm on them, yuck) + * so avoid all ops on long longs. + */ +static unsigned long long calibrate_tsc(void) +{ + unsigned long long start, end, delta; + unsigned long allones = (unsigned long) -1, result; + unsigned long count; + + start = rdtscll(); + // no udivdi3, dammit. + // so we count to 1<< 20 and then right shift 20 + for(count = 0; count < (1<<20); count ++) + outb(0x80, 0x80); + end = rdtscll(); + +#if 0 + // make delta be (endhigh - starthigh) + (endlow - startlow) + // but >> 20 + // do it this way to avoid gcc warnings. + start = tsc_start.hi; + start <<= 32; + start |= start.lo; + end = tsc_end.hi; + end <<= 32; + end |= tsc_end.lo; +#endif + delta = end - start; + // at this point we have a delta for 1,000,000 outbs. Now rescale for one microsecond. + delta >>= 20; + // save this for microsecond timing. + result = delta; + printk_spew("end %x:%x, start %x:%x\n", + endhigh, endlow, starthigh, startlow); + printk_spew("32-bit delta %d\n", (unsigned long) delta); + + printk_spew(__FUNCTION__ " 32-bit result is %d\n", result); + return delta; +} + + +#endif /* CONFIG_TSC_X86RDTSC_CALIBRATE_WITH_TIMER2*/ + +void init_timer(void) +{ + if (!clocks_per_usec) { + clocks_per_usec = calibrate_tsc(); + printk_info("clocks_per_usec: %u\n", clocks_per_usec); + } +} + +void udelay(unsigned us) +{ + unsigned long long count; + unsigned long long stop; + unsigned long long clocks; + + init_timer(); + clocks = us; + clocks *= clocks_per_usec; + count = rdtscll(); + stop = clocks + count; + while(stop > count) { + cpu_relax(); + count = rdtscll(); + } +} -- cgit v1.2.3