summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--payloads/libpayload/arch/arm/Makefile.inc2
-rw-r--r--payloads/libpayload/arch/arm/cache.c125
-rw-r--r--payloads/libpayload/arch/arm/cpu.S117
-rw-r--r--src/arch/arm/armv7/Makefile.inc3
-rw-r--r--src/arch/arm/armv7/cache.c125
-rw-r--r--src/arch/arm/armv7/cpu.S72
6 files changed, 166 insertions, 278 deletions
diff --git a/payloads/libpayload/arch/arm/Makefile.inc b/payloads/libpayload/arch/arm/Makefile.inc
index 53a52df6aa..0078ffe845 100644
--- a/payloads/libpayload/arch/arm/Makefile.inc
+++ b/payloads/libpayload/arch/arm/Makefile.inc
@@ -35,5 +35,5 @@ libc-y += timer.c coreboot.c util.S
libc-y += virtual.c
libc-y += memcpy.S memset.S memmove.S
libc-y += exception_asm.S exception.c
-libc-y += cache.c
+libc-y += cache.c cpu.S
libcbfs-$(CONFIG_LP_CBFS) += dummy_media.c
diff --git a/payloads/libpayload/arch/arm/cache.c b/payloads/libpayload/arch/arm/cache.c
index 4c222eab73..defe640654 100644
--- a/payloads/libpayload/arch/arm/cache.c
+++ b/payloads/libpayload/arch/arm/cache.c
@@ -36,21 +36,6 @@
#include <arch/cache.h>
#include <arch/virtual.h>
-#define bitmask(high, low) ((1UL << (high)) + \
- ((1UL << (high)) - 1) - ((1UL << (low)) - 1))
-
-/* Basic log2() implementation. Note: log2(0) is 0 for our purposes. */
-/* FIXME: src/include/lib.h is difficult to work with due to romcc */
-static unsigned long log2(unsigned long u)
-{
- int i = 0;
-
- while (u >>= 1)
- i++;
-
- return i;
-}
-
void tlb_invalidate_all(void)
{
/*
@@ -85,116 +70,6 @@ enum dcache_op {
OP_DCIMVAC,
};
-/*
- * Do a dcache operation on entire cache by set/way. This is done for
- * portability because mapping of memory address to cache location is
- * implementation defined (See note on "Requirements for operations by
- * set/way" in arch ref. manual).
- */
-static void dcache_op_set_way(enum dcache_op op)
-{
- uint32_t ccsidr;
- unsigned int associativity, num_sets, linesize_bytes;
- unsigned int set, way;
- unsigned int level;
-
- level = (read_csselr() >> 1) & 0x7;
-
- /*
- * dcache must be invalidated by set/way for portability since virtual
- * memory mapping is system-defined. The number of sets and
- * associativity is given by CCSIDR. We'll use DCISW to invalidate the
- * dcache.
- */
- ccsidr = read_ccsidr();
-
- /* FIXME: rounding up required here? */
- num_sets = ((ccsidr & bitmask(27, 13)) >> 13) + 1;
- associativity = ((ccsidr & bitmask(12, 3)) >> 3) + 1;
- /* FIXME: do we need to use CTR.DminLine here? */
- linesize_bytes = (1 << ((ccsidr & 0x7) + 2)) * 4;
-
- dsb();
-
- /*
- * Set/way operations require an interesting bit packing. See section
- * B4-35 in the ARMv7 Architecture Reference Manual:
- *
- * A: Log2(associativity)
- * B: L+S
- * L: Log2(linesize)
- * S: Log2(num_sets)
- *
- * The bits are packed as follows:
- * 31 31-A B B-1 L L-1 4 3 1 0
- * |---|-------------|--------|-------|-----|-|
- * |Way| zeros | Set | zeros |level|0|
- * |---|-------------|--------|-------|-----|-|
- */
- for (way = 0; way < associativity; way++) {
- for (set = 0; set < num_sets; set++) {
- uint32_t val = 0;
- val |= way << (32 - log2(associativity));
- val |= set << log2(linesize_bytes);
- val |= level << 1;
- switch(op) {
- case OP_DCCISW:
- dccisw(val);
- break;
- case OP_DCISW:
- dcisw(val);
- break;
- case OP_DCCSW:
- dccsw(val);
- break;
- default:
- break;
- }
- }
- }
- isb();
-}
-
-static void dcache_foreach(enum dcache_op op)
-{
- uint32_t clidr;
- int level;
-
- clidr = read_clidr();
- for (level = 0; level < 7; level++) {
- unsigned int ctype = (clidr >> (level * 3)) & 0x7;
- uint32_t csselr;
-
- switch(ctype) {
- case 0x2:
- case 0x3:
- case 0x4:
- csselr = level << 1;
- write_csselr(csselr);
- dcache_op_set_way(op);
- break;
- default:
- /* no cache, icache only, or reserved */
- break;
- }
- }
-}
-
-void dcache_clean_all(void)
-{
- dcache_foreach(OP_DCCSW);
-}
-
-void dcache_clean_invalidate_all(void)
-{
- dcache_foreach(OP_DCCISW);
-}
-
-void dcache_invalidate_all(void)
-{
- dcache_foreach(OP_DCISW);
-}
-
unsigned int dcache_line_bytes(void)
{
uint32_t ccsidr;
diff --git a/payloads/libpayload/arch/arm/cpu.S b/payloads/libpayload/arch/arm/cpu.S
new file mode 100644
index 0000000000..29a19e76df
--- /dev/null
+++ b/payloads/libpayload/arch/arm/cpu.S
@@ -0,0 +1,117 @@
+/*
+ * Optimized assembly for low-level CPU operations on ARMv7 processors.
+ *
+ * Cache flushing code based off sys/arch/arm/arm/cpufunc_asm_armv7.S in NetBSD
+ *
+ * Copyright (c) 2010 Per Odlund <per.odlund@armagedon.se>
+ * Copyright (c) 2014 Google Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <arch/asm.h>
+
+/*
+ * Dcache invalidations by set/way work by passing a [way:sbz:set:sbz:level:0]
+ * bitfield in a register to the appropriate MCR instruction. This algorithm
+ * works by initializing a bitfield with the highest-numbered set and way, and
+ * generating a "set decrement" and a "way decrement". The former just contains
+ * the LSB of the set field, but the latter contains the LSB of the way field
+ * minus the highest valid set field... such that when you subtract it from a
+ * [way:0:level] field you end up with a [way - 1:highest_set:level] field
+ * through the magic of double subtraction. It's quite ingenius, really.
+ * Takes care to only use r0-r3 and ip so it's pefectly ABI-compatible without
+ * needing to write to memory.
+ */
+
+.macro dcache_apply_all crm
+ dsb
+ mov r3, #-2 @ initialize level so that we start at 0
+
+1: @next_level
+ add r3, r3, #2 @ increment level
+
+ mrc p15, 1, r0, c0, c0, 1 @ read CLIDR
+ and ip, r0, #0x07000000 @ narrow to LoC
+ lsr ip, ip, #23 @ left align LoC (low 4 bits)
+ cmp r3, ip @ compare
+ bge 3f @done @ else fall through (r0 == CLIDR)
+
+ add r2, r3, r3, lsr #1 @ r2 = (level << 1) * 3 / 2
+ mov r1, r0, lsr r2 @ r1 = cache type
+ bfc r1, #3, #28
+ cmp r1, #2 @ is it data or i&d?
+ blt 1b @next_level @ nope, skip level
+
+ mcr p15, 2, r3, c0, c0, 0 @ select cache level
+ isb
+ mrc p15, 1, r0, c0, c0, 0 @ read CCSIDR
+
+ ubfx ip, r0, #0, #3 @ get linesize from CCSIDR
+ add ip, ip, #4 @ apply bias
+ ubfx r2, r0, #13, #15 @ get numsets - 1 from CCSIDR
+ lsl r2, r2, ip @ shift to set position
+ orr r3, r3, r2 @ merge set into way/set/level
+ mov r1, #1
+ lsl r1, r1, ip @ r1 = set decr
+
+ ubfx ip, r0, #3, #10 @ get numways - 1 from [to be discarded] CCSIDR
+ clz r2, ip @ number of bits to MSB of way
+ lsl ip, ip, r2 @ shift by that into way position
+ mov r0, #1
+ lsl r2, r0, r2 @ r2 now contains the way decr
+ mov r0, r3 @ get sets/level (no way yet)
+ orr r3, r3, ip @ merge way into way/set/level
+ bfc r0, #0, #4 @ clear low 4 bits (level) to get numset - 1
+ sub r2, r2, r0 @ subtract from way decr
+
+ /* r3 = ways/sets/level, r2 = way decr, r1 = set decr, r0 and ip are free */
+2: mcr p15, 0, r3, c7, \crm, 2 @ writeback and/or invalidate line
+ cmp r3, #15 @ are we done with this level (way/set == 0)
+ bls 1b @next_level @ yes, go to next level
+ lsr r0, r3, #4 @ clear level bits leaving only way/set bits
+ lsls r0, r0, #14 @ clear way bits leaving only set bits
+ subne r3, r3, r1 @ non-zero?, decrement set #
+ subeq r3, r3, r2 @ zero?, decrement way # and restore set count
+ b 2b
+
+3: @done
+ mov r0, #0 @ default back to cache level 0
+ mcr p15, 2, r0, c0, c0, 0 @ select cache level
+ dsb
+ isb
+ bx lr
+.endm
+
+ENTRY(dcache_invalidate_all)
+ dcache_apply_all crm=c6
+ENDPROC(dcache_invalidate_all)
+
+ENTRY(dcache_clean_all)
+ dcache_apply_all crm=c10
+ENDPROC(dcache_clean_all)
+
+ENTRY(dcache_clean_invalidate_all)
+ dcache_apply_all crm=c14
+ENDPROC(dcache_clean_invalidate_all)
diff --git a/src/arch/arm/armv7/Makefile.inc b/src/arch/arm/armv7/Makefile.inc
index 033d498d47..c46fb39c5e 100644
--- a/src/arch/arm/armv7/Makefile.inc
+++ b/src/arch/arm/armv7/Makefile.inc
@@ -34,6 +34,7 @@ bootblock-$(CONFIG_BOOTBLOCK_SIMPLE) += bootblock_simple.c
endif
bootblock-y += cache.c
+bootblock-y += cpu.S
bootblock-$(CONFIG_BOOTBLOCK_CONSOLE) += exception.c
bootblock-$(CONFIG_BOOTBLOCK_CONSOLE) += exception_asm.S
bootblock-y += mmu.c
@@ -50,6 +51,7 @@ endif # CONFIG_ARCH_BOOTBLOCK_ARMV7
ifeq ($(CONFIG_ARCH_ROMSTAGE_ARMV7),y)
romstage-y += cache.c
+romstage-y += cpu.S
romstage-y += exception.c
romstage-y += exception_asm.S
romstage-y += mmu.c
@@ -66,6 +68,7 @@ endif # CONFIG_ARCH_ROMSTAGE_ARMV7
ifeq ($(CONFIG_ARCH_RAMSTAGE_ARMV7),y)
ramstage-y += cache.c
+ramstage-y += cpu.S
ramstage-y += exception.c
ramstage-y += exception_asm.S
ramstage-y += mmu.c
diff --git a/src/arch/arm/armv7/cache.c b/src/arch/arm/armv7/cache.c
index 4ee2687d38..7db86c81f1 100644
--- a/src/arch/arm/armv7/cache.c
+++ b/src/arch/arm/armv7/cache.c
@@ -35,21 +35,6 @@
#include <arch/cache.h>
-#define bitmask(high, low) ((1UL << (high)) + \
- ((1UL << (high)) - 1) - ((1UL << (low)) - 1))
-
-/* Basic log2() implementation. Note: log2(0) is 0 for our purposes. */
-/* FIXME: src/include/lib.h is difficult to work with due to romcc */
-static unsigned long log2(unsigned long u)
-{
- int i = 0;
-
- while (u >>= 1)
- i++;
-
- return i;
-}
-
void tlb_invalidate_all(void)
{
/*
@@ -84,116 +69,6 @@ enum dcache_op {
OP_DCIMVAC,
};
-/*
- * Do a dcache operation on entire cache by set/way. This is done for
- * portability because mapping of memory address to cache location is
- * implementation defined (See note on "Requirements for operations by
- * set/way" in arch ref. manual).
- */
-static void dcache_op_set_way(enum dcache_op op)
-{
- uint32_t ccsidr;
- unsigned int associativity, num_sets, linesize_bytes;
- unsigned int set, way;
- unsigned int level;
-
- level = (read_csselr() >> 1) & 0x7;
-
- /*
- * dcache must be invalidated by set/way for portability since virtual
- * memory mapping is system-defined. The number of sets and
- * associativity is given by CCSIDR. We'll use DCISW to invalidate the
- * dcache.
- */
- ccsidr = read_ccsidr();
-
- /* FIXME: rounding up required here? */
- num_sets = ((ccsidr & bitmask(27, 13)) >> 13) + 1;
- associativity = ((ccsidr & bitmask(12, 3)) >> 3) + 1;
- /* FIXME: do we need to use CTR.DminLine here? */
- linesize_bytes = (1 << ((ccsidr & 0x7) + 2)) * 4;
-
- dsb();
-
- /*
- * Set/way operations require an interesting bit packing. See section
- * B4-35 in the ARMv7 Architecture Reference Manual:
- *
- * A: Log2(associativity)
- * B: L+S
- * L: Log2(linesize)
- * S: Log2(num_sets)
- *
- * The bits are packed as follows:
- * 31 31-A B B-1 L L-1 4 3 1 0
- * |---|-------------|--------|-------|-----|-|
- * |Way| zeros | Set | zeros |level|0|
- * |---|-------------|--------|-------|-----|-|
- */
- for (way = 0; way < associativity; way++) {
- for (set = 0; set < num_sets; set++) {
- uint32_t val = 0;
- val |= way << (32 - log2(associativity));
- val |= set << log2(linesize_bytes);
- val |= level << 1;
- switch(op) {
- case OP_DCCISW:
- dccisw(val);
- break;
- case OP_DCISW:
- dcisw(val);
- break;
- case OP_DCCSW:
- dccsw(val);
- break;
- default:
- break;
- }
- }
- }
- isb();
-}
-
-static void dcache_foreach(enum dcache_op op)
-{
- uint32_t clidr;
- int level;
-
- clidr = read_clidr();
- for (level = 0; level < 7; level++) {
- unsigned int ctype = (clidr >> (level * 3)) & 0x7;
- uint32_t csselr;
-
- switch(ctype) {
- case 0x2:
- case 0x3:
- case 0x4:
- csselr = level << 1;
- write_csselr(csselr);
- dcache_op_set_way(op);
- break;
- default:
- /* no cache, icache only, or reserved */
- break;
- }
- }
-}
-
-void dcache_clean_all(void)
-{
- dcache_foreach(OP_DCCSW);
-}
-
-void dcache_clean_invalidate_all(void)
-{
- dcache_foreach(OP_DCCISW);
-}
-
-void dcache_invalidate_all(void)
-{
- dcache_foreach(OP_DCISW);
-}
-
unsigned int dcache_line_bytes(void)
{
uint32_t ccsidr;
diff --git a/src/arch/arm/armv7/cpu.S b/src/arch/arm/armv7/cpu.S
index 42e2354d44..29a19e76df 100644
--- a/src/arch/arm/armv7/cpu.S
+++ b/src/arch/arm/armv7/cpu.S
@@ -30,25 +30,39 @@
* SUCH DAMAGE.
*/
+#include <arch/asm.h>
+
/*
- * These work very hard to not push registers onto the stack and to limit themselves
- * to use r0-r3 and ip.
+ * Dcache invalidations by set/way work by passing a [way:sbz:set:sbz:level:0]
+ * bitfield in a register to the appropriate MCR instruction. This algorithm
+ * works by initializing a bitfield with the highest-numbered set and way, and
+ * generating a "set decrement" and a "way decrement". The former just contains
+ * the LSB of the set field, but the latter contains the LSB of the way field
+ * minus the highest valid set field... such that when you subtract it from a
+ * [way:0:level] field you end up with a [way - 1:highest_set:level] field
+ * through the magic of double subtraction. It's quite ingenius, really.
+ * Takes care to only use r0-r3 and ip so it's pefectly ABI-compatible without
+ * needing to write to memory.
*/
-/* * LINTSTUB: void armv7_dcache_wbinv_all(void); */
-ENTRY_NP(armv7_dcache_wbinv_all)
+.macro dcache_apply_all crm
+ dsb
+ mov r3, #-2 @ initialize level so that we start at 0
+
+1: @next_level
+ add r3, r3, #2 @ increment level
+
mrc p15, 1, r0, c0, c0, 1 @ read CLIDR
- ands r3, r0, #0x07000000
- beq .Ldone_wbinv
- lsr r3, r3, #23 @ left align loc (low 4 bits)
+ and ip, r0, #0x07000000 @ narrow to LoC
+ lsr ip, ip, #23 @ left align LoC (low 4 bits)
+ cmp r3, ip @ compare
+ bge 3f @done @ else fall through (r0 == CLIDR)
- mov r1, #0
-.Lstart_wbinv:
- add r2, r3, r3, lsr #1 @ r2 = level * 3 / 2
+ add r2, r3, r3, lsr #1 @ r2 = (level << 1) * 3 / 2
mov r1, r0, lsr r2 @ r1 = cache type
bfc r1, #3, #28
cmp r1, #2 @ is it data or i&d?
- blt .Lnext_level_wbinv @ nope, skip level
+ blt 1b @next_level @ nope, skip level
mcr p15, 2, r3, c0, c0, 0 @ select cache level
isb
@@ -65,7 +79,7 @@ ENTRY_NP(armv7_dcache_wbinv_all)
ubfx ip, r0, #3, #10 @ get numways - 1 from [to be discarded] CCSIDR
clz r2, ip @ number of bits to MSB of way
lsl ip, ip, r2 @ shift by that into way position
- mov r0, #1 @
+ mov r0, #1
lsl r2, r0, r2 @ r2 now contains the way decr
mov r0, r3 @ get sets/level (no way yet)
orr r3, r3, ip @ merge way into way/set/level
@@ -73,27 +87,31 @@ ENTRY_NP(armv7_dcache_wbinv_all)
sub r2, r2, r0 @ subtract from way decr
/* r3 = ways/sets/level, r2 = way decr, r1 = set decr, r0 and ip are free */
-1: mcr p15, 0, r3, c7, c14, 2 @ writeback and invalidate line
+2: mcr p15, 0, r3, c7, \crm, 2 @ writeback and/or invalidate line
cmp r3, #15 @ are we done with this level (way/set == 0)
- bls .Lnext_level_wbinv @ yes, go to next level
- lsl r0, r3, #10 @ clear way bits leaving only set/level bits
- lsr r0, r0, #4 @ clear level bits leaving only set bits
+ bls 1b @next_level @ yes, go to next level
+ lsr r0, r3, #4 @ clear level bits leaving only way/set bits
+ lsls r0, r0, #14 @ clear way bits leaving only set bits
subne r3, r3, r1 @ non-zero?, decrement set #
subeq r3, r3, r2 @ zero?, decrement way # and restore set count
- b 1b
+ b 2b
-.Lnext_level_wbinv:
- mrc p15, 1, r0, c0, c0, 1 @ read CLIDR
- and ip, r0, #0x07000000 @ narrow to LoC
- lsr ip, ip, #23 @ left align LoC (low 4 bits)
- add r3, r3, #2 @ go to next level
- cmp r3, ip @ compare
- blt .Lstart_wbinv @ not done, next level (r0 == CLIDR)
-
-.Ldone_wbinv:
+3: @done
mov r0, #0 @ default back to cache level 0
mcr p15, 2, r0, c0, c0, 0 @ select cache level
dsb
isb
bx lr
-END(armv7_dcache_wbinv_all)
+.endm
+
+ENTRY(dcache_invalidate_all)
+ dcache_apply_all crm=c6
+ENDPROC(dcache_invalidate_all)
+
+ENTRY(dcache_clean_all)
+ dcache_apply_all crm=c10
+ENDPROC(dcache_clean_all)
+
+ENTRY(dcache_clean_invalidate_all)
+ dcache_apply_all crm=c14
+ENDPROC(dcache_clean_invalidate_all)