/* * intel_mtrr.c: setting MTRR to decent values for cache initialization on P6 * * Derived from intel_set_mtrr in intel_subr.c and mtrr.c in linux kernel * * Copyright 2000 Silicon Integrated System Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * * * Reference: Intel Architecture Software Developer's Manual, Volume 3: System Programming */ #include #include #include #include #include #define arraysize(x) (sizeof(x)/sizeof((x)[0])) #ifdef k8 # define ADDRESS_BITS 40 #else # define ADDRESS_BITS 36 #endif #define ADDRESS_BITS_HIGH (ADDRESS_BITS - 32) #define ADDRESS_MASK_HIGH ((1u << ADDRESS_BITS_HIGH) - 1) static unsigned int mtrr_msr[] = { MTRRfix64K_00000_MSR, MTRRfix16K_80000_MSR, MTRRfix16K_A0000_MSR, MTRRfix4K_C0000_MSR, MTRRfix4K_C8000_MSR, MTRRfix4K_D0000_MSR, MTRRfix4K_D8000_MSR, MTRRfix4K_E0000_MSR, MTRRfix4K_E8000_MSR, MTRRfix4K_F0000_MSR, MTRRfix4K_F8000_MSR, }; static void intel_enable_fixed_mtrr(void) { msr_t msr; msr = rdmsr(MTRRdefType_MSR); msr.lo |= 0xc00; wrmsr(MTRRdefType_MSR, msr); } static void intel_enable_var_mtrr(void) { msr_t msr; msr = rdmsr(MTRRdefType_MSR); msr.lo |= 0x800; wrmsr(MTRRdefType_MSR, msr); } static inline void disable_cache(void) { unsigned int tmp; /* Disable cache */ /* Write back the cache and flush TLB */ asm volatile ( "movl %%cr0, %0\n\t" "orl $0x40000000, %0\n\t" "wbinvd\n\t" "movl %0, %%cr0\n\t" "wbinvd\n\t" :"=r" (tmp) ::"memory"); } static inline void enable_cache(void) { unsigned int tmp; // turn cache back on. asm volatile ( "movl %%cr0, %0\n\t" "andl $0x9fffffff, %0\n\t" "movl %0, %%cr0\n\t" :"=r" (tmp) ::"memory"); } /* setting variable mtrr, comes from linux kernel source */ static void intel_set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek, unsigned char type) { msr_t base, mask; base.hi = basek >> 22; base.lo = basek << 10; //printk_debug("ADDRESS_MASK_HIGH=%#x\n", ADDRESS_MASK_HIGH); if (sizek < 4*1024*1024) { mask.hi = ADDRESS_MASK_HIGH; mask.lo = ~((sizek << 10) -1); } else { mask.hi = ADDRESS_MASK_HIGH & (~((sizek >> 22) -1)); mask.lo = 0; } if (reg >= 8) return; // it is recommended that we disable and enable cache when we // do this. disable_cache(); if (sizek == 0) { msr_t zero; zero.lo = zero.hi = 0; /* The invalid bit is kept in the mask, so we simply clear the relevant mask register to disable a range. */ wrmsr (MTRRphysMask_MSR(reg), zero); } else { /* Bit 32-35 of MTRRphysMask should be set to 1 */ base.lo |= type; mask.lo |= 0x800; wrmsr (MTRRphysBase_MSR(reg), base); wrmsr (MTRRphysMask_MSR(reg), mask); } enable_cache(); } /* setting variable mtrr, comes from linux kernel source */ void set_var_mtrr(unsigned int reg, unsigned long base, unsigned long size, unsigned char type) { if (reg >= 8) return; // it is recommended that we disable and enable cache when we // do this. disable_cache(); if (size == 0) { /* The invalid bit is kept in the mask, so we simply clear the relevant mask register to disable a range. */ msr_t zero; zero.lo = zero.hi = 0; wrmsr (MTRRphysMask_MSR(reg), zero); } else { /* Bit 32-35 of MTRRphysMask should be set to 1 */ msr_t basem, maskm; basem.lo = base | type; basem.hi = 0; maskm.lo = ~(size - 1) | 0x800; maskm.hi = 0x0F; wrmsr (MTRRphysBase_MSR(reg), basem); wrmsr (MTRRphysMask_MSR(reg), maskm); } // turn cache back on. enable_cache(); } /* fms: find most sigificant bit set, stolen from Linux Kernel Source. */ static inline unsigned int fms(unsigned int x) { int r; __asm__("bsrl %1,%0\n\t" "jnz 1f\n\t" "movl $0,%0\n" "1:" : "=r" (r) : "g" (x)); return r; } /* fms: find least sigificant bit set */ static inline unsigned int fls(unsigned int x) { int r; __asm__("bsfl %1,%0\n\t" "jnz 1f\n\t" "movl $32,%0\n" "1:" : "=r" (r) : "g" (x)); return r; } /* setting up variable and fixed mtrr * * From Intel Vol. III Section 9.12.4, the Range Size and Base Alignment has some kind of requirement: * 1. The range size must be 2^N byte for N >= 12 (i.e 4KB minimum). * 2. The base address must be 2^N aligned, where the N here is equal to the N in previous * requirement. So a 8K range must be 8K aligned not 4K aligned. * * These requirement is meet by "decompositing" the ramsize into Sum(Cn * 2^n, n = [0..N], Cn = [0, 1]). * For Cm = 1, there is a WB range of 2^m size at base address Sum(Cm * 2^m, m = [N..n]). * A 124MB (128MB - 4MB SMA) example: * ramsize = 124MB == 64MB (at 0MB) + 32MB (at 64MB) + 16MB (at 96MB ) + 8MB (at 112MB) + 4MB (120MB). * But this wastes a lot of MTRR registers so we use another more "aggresive" way with Uncacheable Regions. * * In the Uncacheable Region scheme, we try to cover the whole ramsize by one WB region as possible, * If (an only if) this can not be done we will try to decomposite the ramesize, the mathematical formula * whould be ramsize = Sum(Cn * 2^n, n = [0..N], Cn = [-1, 0, 1]). For Cn = -1, a Uncachable Region is used. * The same 124MB example: * ramsize = 124MB == 128MB WB (at 0MB) + 4MB UC (at 124MB) * or a 156MB (128MB + 32MB - 4MB SMA) example: * ramsize = 156MB == 128MB WB (at 0MB) + 32MB WB (at 128MB) + 4MB UC (at 156MB) */ /* 2 MTRRS are reserved for the operating system */ #define BIOS_MTRRS 6 #define OS_MTRRS 2 #define MTRRS (BIOS_MTRRS + OS_MTRRS) static void set_fixed_mtrrs(unsigned int first, unsigned int last, unsigned char type) { unsigned int i; unsigned int fixed_msr = NUM_FIXED_RANGES >> 3; msr_t msr; msr.lo = msr.hi = 0; /* Shut up gcc */ for (i = first; i < last; i++) { /* When I switch to a new msr read it in */ if (fixed_msr != i >> 3) { /* But first write out the old msr */ if (fixed_msr < (NUM_FIXED_RANGES >> 3)) { disable_cache(); wrmsr(mtrr_msr[fixed_msr], msr); enable_cache(); } fixed_msr = i>>3; msr = rdmsr(mtrr_msr[fixed_msr]); } if ((i & 7) < 4) { msr.lo &= ~(0xff << ((i&3)*8)); msr.lo |= type << ((i&3)*8); } else { msr.hi &= ~(0xff << ((i&3)*8)); msr.hi |= type << ((i&3)*8); } } /* Write out the final msr */ if (fixed_msr < (NUM_FIXED_RANGES >> 3)) { disable_cache(); wrmsr(mtrr_msr[fixed_msr], msr); enable_cache(); } } static unsigned fixed_mtrr_index(unsigned long addrk) { unsigned index; index = (addrk - 0) >> 6; if (index >= 8) { index = ((addrk - 8*64) >> 4) + 8; } if (index >= 24) { index = ((addrk - (8*64 + 16*16)) >> 2) + 24; } if (index > NUM_FIXED_RANGES) { index = NUM_FIXED_RANGES; } return index; } static unsigned int range_to_mtrr(unsigned int reg, unsigned long range_startk, unsigned long range_sizek, unsigned long next_range_startk) { if (!range_sizek || (reg >= BIOS_MTRRS)) { return reg; } while(range_sizek) { unsigned long max_align, align; unsigned long sizek; /* Compute the maximum size I can make a range */ max_align = fls(range_startk); align = fms(range_sizek); if (align > max_align) { align = max_align; } sizek = 1 << align; printk_debug("Setting variable MTRR %d, base: %4dMB, range: %4dMB, type WB\n", reg, range_startk >>10, sizek >> 10); intel_set_var_mtrr(reg++, range_startk, sizek, MTRR_TYPE_WRBACK); range_startk += sizek; range_sizek -= sizek; if (reg >= BIOS_MTRRS) break; } return reg; } void setup_mtrrs(struct mem_range *mem) { /* Try this the simple way of incrementally adding together * mtrrs. If this doesn't work out we can get smart again * and clear out the mtrrs. */ struct mem_range *memp; unsigned long range_startk, range_sizek; unsigned int reg; printk_debug("\n"); /* Initialized the fixed_mtrrs to uncached */ printk_debug("Setting fixed MTRRs(%d-%d) type: UC\n", 0, NUM_FIXED_RANGES); set_fixed_mtrrs(0, NUM_FIXED_RANGES, MTRR_TYPE_UNCACHEABLE); /* Now see which of the fixed mtrrs cover ram. */ for (memp = mem; memp->sizek; memp++) { unsigned int start_mtrr; unsigned int last_mtrr; start_mtrr = fixed_mtrr_index(memp->basek); last_mtrr = fixed_mtrr_index(memp->basek + memp->sizek); if (start_mtrr >= NUM_FIXED_RANGES) { break; } #if defined(k7) || defined(k8) #warning "FIXME: dealing with RdMEM/WrMEM for Athlon/Opteron" #endif printk_debug("Setting fixed MTRRs(%d-%d) type: WB\n", start_mtrr, last_mtrr); set_fixed_mtrrs(start_mtrr, last_mtrr, MTRR_TYPE_WRBACK); } printk_debug("DONE fixed MTRRs\n"); /* Cache as many memory areas as possible */ /* FIXME is there an algorithm for computing the optimal set of mtrrs? * In some cases it is definitely possible to do better. */ range_startk = 0; range_sizek = 0; reg = 0; for (memp = mem; memp->sizek; memp++) { /* See if I can merge with the last range * Either I am below 1M and the fixed mtrrs handle it, or * the ranges touch. */ if ((memp->basek <= 1024) || (range_startk + range_sizek == memp->basek)) { unsigned long endk = memp->basek + memp->sizek; range_sizek = endk - range_startk; continue; } /* Write the range mtrrs */ if (range_sizek != 0) { reg = range_to_mtrr(reg, range_startk, range_sizek, memp->basek); range_startk = 0; range_sizek = 0; if (reg >= BIOS_MTRRS) break; } /* Allocate an msr */ range_startk = memp->basek; range_sizek = memp->sizek; } /* Write the last range */ reg = range_to_mtrr(reg, range_startk, range_sizek, 0); printk_debug("DONE variable MTRRs\n"); printk_debug("Clear out the extra MTRR's\n"); /* Clear out the extra MTRR's */ while(reg < MTRRS) { intel_set_var_mtrr(reg++, 0, 0, 0); } /* enable fixed MTRR */ printk_debug("call intel_enable_fixed_mtrr()\n"); intel_enable_fixed_mtrr(); printk_debug("call intel_enable_var_mtrr()\n"); intel_enable_var_mtrr(); printk_debug("Leave %s\n", __FUNCTION__); }