diff options
-rw-r--r-- | payloads/libpayload/arch/arm/Makefile.inc | 2 | ||||
-rw-r--r-- | payloads/libpayload/arch/arm/cache.c | 125 | ||||
-rw-r--r-- | payloads/libpayload/arch/arm/cpu.S | 117 | ||||
-rw-r--r-- | src/arch/arm/armv7/Makefile.inc | 3 | ||||
-rw-r--r-- | src/arch/arm/armv7/cache.c | 125 | ||||
-rw-r--r-- | src/arch/arm/armv7/cpu.S | 72 |
6 files changed, 166 insertions, 278 deletions
diff --git a/payloads/libpayload/arch/arm/Makefile.inc b/payloads/libpayload/arch/arm/Makefile.inc index 53a52df6aa..0078ffe845 100644 --- a/payloads/libpayload/arch/arm/Makefile.inc +++ b/payloads/libpayload/arch/arm/Makefile.inc @@ -35,5 +35,5 @@ libc-y += timer.c coreboot.c util.S libc-y += virtual.c libc-y += memcpy.S memset.S memmove.S libc-y += exception_asm.S exception.c -libc-y += cache.c +libc-y += cache.c cpu.S libcbfs-$(CONFIG_LP_CBFS) += dummy_media.c diff --git a/payloads/libpayload/arch/arm/cache.c b/payloads/libpayload/arch/arm/cache.c index 4c222eab73..defe640654 100644 --- a/payloads/libpayload/arch/arm/cache.c +++ b/payloads/libpayload/arch/arm/cache.c @@ -36,21 +36,6 @@ #include <arch/cache.h> #include <arch/virtual.h> -#define bitmask(high, low) ((1UL << (high)) + \ - ((1UL << (high)) - 1) - ((1UL << (low)) - 1)) - -/* Basic log2() implementation. Note: log2(0) is 0 for our purposes. */ -/* FIXME: src/include/lib.h is difficult to work with due to romcc */ -static unsigned long log2(unsigned long u) -{ - int i = 0; - - while (u >>= 1) - i++; - - return i; -} - void tlb_invalidate_all(void) { /* @@ -85,116 +70,6 @@ enum dcache_op { OP_DCIMVAC, }; -/* - * Do a dcache operation on entire cache by set/way. This is done for - * portability because mapping of memory address to cache location is - * implementation defined (See note on "Requirements for operations by - * set/way" in arch ref. manual). - */ -static void dcache_op_set_way(enum dcache_op op) -{ - uint32_t ccsidr; - unsigned int associativity, num_sets, linesize_bytes; - unsigned int set, way; - unsigned int level; - - level = (read_csselr() >> 1) & 0x7; - - /* - * dcache must be invalidated by set/way for portability since virtual - * memory mapping is system-defined. The number of sets and - * associativity is given by CCSIDR. We'll use DCISW to invalidate the - * dcache. - */ - ccsidr = read_ccsidr(); - - /* FIXME: rounding up required here? */ - num_sets = ((ccsidr & bitmask(27, 13)) >> 13) + 1; - associativity = ((ccsidr & bitmask(12, 3)) >> 3) + 1; - /* FIXME: do we need to use CTR.DminLine here? */ - linesize_bytes = (1 << ((ccsidr & 0x7) + 2)) * 4; - - dsb(); - - /* - * Set/way operations require an interesting bit packing. See section - * B4-35 in the ARMv7 Architecture Reference Manual: - * - * A: Log2(associativity) - * B: L+S - * L: Log2(linesize) - * S: Log2(num_sets) - * - * The bits are packed as follows: - * 31 31-A B B-1 L L-1 4 3 1 0 - * |---|-------------|--------|-------|-----|-| - * |Way| zeros | Set | zeros |level|0| - * |---|-------------|--------|-------|-----|-| - */ - for (way = 0; way < associativity; way++) { - for (set = 0; set < num_sets; set++) { - uint32_t val = 0; - val |= way << (32 - log2(associativity)); - val |= set << log2(linesize_bytes); - val |= level << 1; - switch(op) { - case OP_DCCISW: - dccisw(val); - break; - case OP_DCISW: - dcisw(val); - break; - case OP_DCCSW: - dccsw(val); - break; - default: - break; - } - } - } - isb(); -} - -static void dcache_foreach(enum dcache_op op) -{ - uint32_t clidr; - int level; - - clidr = read_clidr(); - for (level = 0; level < 7; level++) { - unsigned int ctype = (clidr >> (level * 3)) & 0x7; - uint32_t csselr; - - switch(ctype) { - case 0x2: - case 0x3: - case 0x4: - csselr = level << 1; - write_csselr(csselr); - dcache_op_set_way(op); - break; - default: - /* no cache, icache only, or reserved */ - break; - } - } -} - -void dcache_clean_all(void) -{ - dcache_foreach(OP_DCCSW); -} - -void dcache_clean_invalidate_all(void) -{ - dcache_foreach(OP_DCCISW); -} - -void dcache_invalidate_all(void) -{ - dcache_foreach(OP_DCISW); -} - unsigned int dcache_line_bytes(void) { uint32_t ccsidr; diff --git a/payloads/libpayload/arch/arm/cpu.S b/payloads/libpayload/arch/arm/cpu.S new file mode 100644 index 0000000000..29a19e76df --- /dev/null +++ b/payloads/libpayload/arch/arm/cpu.S @@ -0,0 +1,117 @@ +/* + * Optimized assembly for low-level CPU operations on ARMv7 processors. + * + * Cache flushing code based off sys/arch/arm/arm/cpufunc_asm_armv7.S in NetBSD + * + * Copyright (c) 2010 Per Odlund <per.odlund@armagedon.se> + * Copyright (c) 2014 Google Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <arch/asm.h> + +/* + * Dcache invalidations by set/way work by passing a [way:sbz:set:sbz:level:0] + * bitfield in a register to the appropriate MCR instruction. This algorithm + * works by initializing a bitfield with the highest-numbered set and way, and + * generating a "set decrement" and a "way decrement". The former just contains + * the LSB of the set field, but the latter contains the LSB of the way field + * minus the highest valid set field... such that when you subtract it from a + * [way:0:level] field you end up with a [way - 1:highest_set:level] field + * through the magic of double subtraction. It's quite ingenius, really. + * Takes care to only use r0-r3 and ip so it's pefectly ABI-compatible without + * needing to write to memory. + */ + +.macro dcache_apply_all crm + dsb + mov r3, #-2 @ initialize level so that we start at 0 + +1: @next_level + add r3, r3, #2 @ increment level + + mrc p15, 1, r0, c0, c0, 1 @ read CLIDR + and ip, r0, #0x07000000 @ narrow to LoC + lsr ip, ip, #23 @ left align LoC (low 4 bits) + cmp r3, ip @ compare + bge 3f @done @ else fall through (r0 == CLIDR) + + add r2, r3, r3, lsr #1 @ r2 = (level << 1) * 3 / 2 + mov r1, r0, lsr r2 @ r1 = cache type + bfc r1, #3, #28 + cmp r1, #2 @ is it data or i&d? + blt 1b @next_level @ nope, skip level + + mcr p15, 2, r3, c0, c0, 0 @ select cache level + isb + mrc p15, 1, r0, c0, c0, 0 @ read CCSIDR + + ubfx ip, r0, #0, #3 @ get linesize from CCSIDR + add ip, ip, #4 @ apply bias + ubfx r2, r0, #13, #15 @ get numsets - 1 from CCSIDR + lsl r2, r2, ip @ shift to set position + orr r3, r3, r2 @ merge set into way/set/level + mov r1, #1 + lsl r1, r1, ip @ r1 = set decr + + ubfx ip, r0, #3, #10 @ get numways - 1 from [to be discarded] CCSIDR + clz r2, ip @ number of bits to MSB of way + lsl ip, ip, r2 @ shift by that into way position + mov r0, #1 + lsl r2, r0, r2 @ r2 now contains the way decr + mov r0, r3 @ get sets/level (no way yet) + orr r3, r3, ip @ merge way into way/set/level + bfc r0, #0, #4 @ clear low 4 bits (level) to get numset - 1 + sub r2, r2, r0 @ subtract from way decr + + /* r3 = ways/sets/level, r2 = way decr, r1 = set decr, r0 and ip are free */ +2: mcr p15, 0, r3, c7, \crm, 2 @ writeback and/or invalidate line + cmp r3, #15 @ are we done with this level (way/set == 0) + bls 1b @next_level @ yes, go to next level + lsr r0, r3, #4 @ clear level bits leaving only way/set bits + lsls r0, r0, #14 @ clear way bits leaving only set bits + subne r3, r3, r1 @ non-zero?, decrement set # + subeq r3, r3, r2 @ zero?, decrement way # and restore set count + b 2b + +3: @done + mov r0, #0 @ default back to cache level 0 + mcr p15, 2, r0, c0, c0, 0 @ select cache level + dsb + isb + bx lr +.endm + +ENTRY(dcache_invalidate_all) + dcache_apply_all crm=c6 +ENDPROC(dcache_invalidate_all) + +ENTRY(dcache_clean_all) + dcache_apply_all crm=c10 +ENDPROC(dcache_clean_all) + +ENTRY(dcache_clean_invalidate_all) + dcache_apply_all crm=c14 +ENDPROC(dcache_clean_invalidate_all) diff --git a/src/arch/arm/armv7/Makefile.inc b/src/arch/arm/armv7/Makefile.inc index 033d498d47..c46fb39c5e 100644 --- a/src/arch/arm/armv7/Makefile.inc +++ b/src/arch/arm/armv7/Makefile.inc @@ -34,6 +34,7 @@ bootblock-$(CONFIG_BOOTBLOCK_SIMPLE) += bootblock_simple.c endif bootblock-y += cache.c +bootblock-y += cpu.S bootblock-$(CONFIG_BOOTBLOCK_CONSOLE) += exception.c bootblock-$(CONFIG_BOOTBLOCK_CONSOLE) += exception_asm.S bootblock-y += mmu.c @@ -50,6 +51,7 @@ endif # CONFIG_ARCH_BOOTBLOCK_ARMV7 ifeq ($(CONFIG_ARCH_ROMSTAGE_ARMV7),y) romstage-y += cache.c +romstage-y += cpu.S romstage-y += exception.c romstage-y += exception_asm.S romstage-y += mmu.c @@ -66,6 +68,7 @@ endif # CONFIG_ARCH_ROMSTAGE_ARMV7 ifeq ($(CONFIG_ARCH_RAMSTAGE_ARMV7),y) ramstage-y += cache.c +ramstage-y += cpu.S ramstage-y += exception.c ramstage-y += exception_asm.S ramstage-y += mmu.c diff --git a/src/arch/arm/armv7/cache.c b/src/arch/arm/armv7/cache.c index 4ee2687d38..7db86c81f1 100644 --- a/src/arch/arm/armv7/cache.c +++ b/src/arch/arm/armv7/cache.c @@ -35,21 +35,6 @@ #include <arch/cache.h> -#define bitmask(high, low) ((1UL << (high)) + \ - ((1UL << (high)) - 1) - ((1UL << (low)) - 1)) - -/* Basic log2() implementation. Note: log2(0) is 0 for our purposes. */ -/* FIXME: src/include/lib.h is difficult to work with due to romcc */ -static unsigned long log2(unsigned long u) -{ - int i = 0; - - while (u >>= 1) - i++; - - return i; -} - void tlb_invalidate_all(void) { /* @@ -84,116 +69,6 @@ enum dcache_op { OP_DCIMVAC, }; -/* - * Do a dcache operation on entire cache by set/way. This is done for - * portability because mapping of memory address to cache location is - * implementation defined (See note on "Requirements for operations by - * set/way" in arch ref. manual). - */ -static void dcache_op_set_way(enum dcache_op op) -{ - uint32_t ccsidr; - unsigned int associativity, num_sets, linesize_bytes; - unsigned int set, way; - unsigned int level; - - level = (read_csselr() >> 1) & 0x7; - - /* - * dcache must be invalidated by set/way for portability since virtual - * memory mapping is system-defined. The number of sets and - * associativity is given by CCSIDR. We'll use DCISW to invalidate the - * dcache. - */ - ccsidr = read_ccsidr(); - - /* FIXME: rounding up required here? */ - num_sets = ((ccsidr & bitmask(27, 13)) >> 13) + 1; - associativity = ((ccsidr & bitmask(12, 3)) >> 3) + 1; - /* FIXME: do we need to use CTR.DminLine here? */ - linesize_bytes = (1 << ((ccsidr & 0x7) + 2)) * 4; - - dsb(); - - /* - * Set/way operations require an interesting bit packing. See section - * B4-35 in the ARMv7 Architecture Reference Manual: - * - * A: Log2(associativity) - * B: L+S - * L: Log2(linesize) - * S: Log2(num_sets) - * - * The bits are packed as follows: - * 31 31-A B B-1 L L-1 4 3 1 0 - * |---|-------------|--------|-------|-----|-| - * |Way| zeros | Set | zeros |level|0| - * |---|-------------|--------|-------|-----|-| - */ - for (way = 0; way < associativity; way++) { - for (set = 0; set < num_sets; set++) { - uint32_t val = 0; - val |= way << (32 - log2(associativity)); - val |= set << log2(linesize_bytes); - val |= level << 1; - switch(op) { - case OP_DCCISW: - dccisw(val); - break; - case OP_DCISW: - dcisw(val); - break; - case OP_DCCSW: - dccsw(val); - break; - default: - break; - } - } - } - isb(); -} - -static void dcache_foreach(enum dcache_op op) -{ - uint32_t clidr; - int level; - - clidr = read_clidr(); - for (level = 0; level < 7; level++) { - unsigned int ctype = (clidr >> (level * 3)) & 0x7; - uint32_t csselr; - - switch(ctype) { - case 0x2: - case 0x3: - case 0x4: - csselr = level << 1; - write_csselr(csselr); - dcache_op_set_way(op); - break; - default: - /* no cache, icache only, or reserved */ - break; - } - } -} - -void dcache_clean_all(void) -{ - dcache_foreach(OP_DCCSW); -} - -void dcache_clean_invalidate_all(void) -{ - dcache_foreach(OP_DCCISW); -} - -void dcache_invalidate_all(void) -{ - dcache_foreach(OP_DCISW); -} - unsigned int dcache_line_bytes(void) { uint32_t ccsidr; diff --git a/src/arch/arm/armv7/cpu.S b/src/arch/arm/armv7/cpu.S index 42e2354d44..29a19e76df 100644 --- a/src/arch/arm/armv7/cpu.S +++ b/src/arch/arm/armv7/cpu.S @@ -30,25 +30,39 @@ * SUCH DAMAGE. */ +#include <arch/asm.h> + /* - * These work very hard to not push registers onto the stack and to limit themselves - * to use r0-r3 and ip. + * Dcache invalidations by set/way work by passing a [way:sbz:set:sbz:level:0] + * bitfield in a register to the appropriate MCR instruction. This algorithm + * works by initializing a bitfield with the highest-numbered set and way, and + * generating a "set decrement" and a "way decrement". The former just contains + * the LSB of the set field, but the latter contains the LSB of the way field + * minus the highest valid set field... such that when you subtract it from a + * [way:0:level] field you end up with a [way - 1:highest_set:level] field + * through the magic of double subtraction. It's quite ingenius, really. + * Takes care to only use r0-r3 and ip so it's pefectly ABI-compatible without + * needing to write to memory. */ -/* * LINTSTUB: void armv7_dcache_wbinv_all(void); */ -ENTRY_NP(armv7_dcache_wbinv_all) +.macro dcache_apply_all crm + dsb + mov r3, #-2 @ initialize level so that we start at 0 + +1: @next_level + add r3, r3, #2 @ increment level + mrc p15, 1, r0, c0, c0, 1 @ read CLIDR - ands r3, r0, #0x07000000 - beq .Ldone_wbinv - lsr r3, r3, #23 @ left align loc (low 4 bits) + and ip, r0, #0x07000000 @ narrow to LoC + lsr ip, ip, #23 @ left align LoC (low 4 bits) + cmp r3, ip @ compare + bge 3f @done @ else fall through (r0 == CLIDR) - mov r1, #0 -.Lstart_wbinv: - add r2, r3, r3, lsr #1 @ r2 = level * 3 / 2 + add r2, r3, r3, lsr #1 @ r2 = (level << 1) * 3 / 2 mov r1, r0, lsr r2 @ r1 = cache type bfc r1, #3, #28 cmp r1, #2 @ is it data or i&d? - blt .Lnext_level_wbinv @ nope, skip level + blt 1b @next_level @ nope, skip level mcr p15, 2, r3, c0, c0, 0 @ select cache level isb @@ -65,7 +79,7 @@ ENTRY_NP(armv7_dcache_wbinv_all) ubfx ip, r0, #3, #10 @ get numways - 1 from [to be discarded] CCSIDR clz r2, ip @ number of bits to MSB of way lsl ip, ip, r2 @ shift by that into way position - mov r0, #1 @ + mov r0, #1 lsl r2, r0, r2 @ r2 now contains the way decr mov r0, r3 @ get sets/level (no way yet) orr r3, r3, ip @ merge way into way/set/level @@ -73,27 +87,31 @@ ENTRY_NP(armv7_dcache_wbinv_all) sub r2, r2, r0 @ subtract from way decr /* r3 = ways/sets/level, r2 = way decr, r1 = set decr, r0 and ip are free */ -1: mcr p15, 0, r3, c7, c14, 2 @ writeback and invalidate line +2: mcr p15, 0, r3, c7, \crm, 2 @ writeback and/or invalidate line cmp r3, #15 @ are we done with this level (way/set == 0) - bls .Lnext_level_wbinv @ yes, go to next level - lsl r0, r3, #10 @ clear way bits leaving only set/level bits - lsr r0, r0, #4 @ clear level bits leaving only set bits + bls 1b @next_level @ yes, go to next level + lsr r0, r3, #4 @ clear level bits leaving only way/set bits + lsls r0, r0, #14 @ clear way bits leaving only set bits subne r3, r3, r1 @ non-zero?, decrement set # subeq r3, r3, r2 @ zero?, decrement way # and restore set count - b 1b + b 2b -.Lnext_level_wbinv: - mrc p15, 1, r0, c0, c0, 1 @ read CLIDR - and ip, r0, #0x07000000 @ narrow to LoC - lsr ip, ip, #23 @ left align LoC (low 4 bits) - add r3, r3, #2 @ go to next level - cmp r3, ip @ compare - blt .Lstart_wbinv @ not done, next level (r0 == CLIDR) - -.Ldone_wbinv: +3: @done mov r0, #0 @ default back to cache level 0 mcr p15, 2, r0, c0, c0, 0 @ select cache level dsb isb bx lr -END(armv7_dcache_wbinv_all) +.endm + +ENTRY(dcache_invalidate_all) + dcache_apply_all crm=c6 +ENDPROC(dcache_invalidate_all) + +ENTRY(dcache_clean_all) + dcache_apply_all crm=c10 +ENDPROC(dcache_clean_all) + +ENTRY(dcache_clean_invalidate_all) + dcache_apply_all crm=c14 +ENDPROC(dcache_clean_invalidate_all) |