summaryrefslogtreecommitdiff
path: root/src/arch/arm64
diff options
context:
space:
mode:
authorJulius Werner <jwerner@chromium.org>2015-04-21 14:32:36 -0700
committerJulius Werner <jwerner@chromium.org>2017-05-30 22:17:57 +0200
commitbaa3e70084bac00885667b20efde3e69901cda70 (patch)
treeceea5f55a386f4d94ff16fd203aa2191a8680c50 /src/arch/arm64
parent3db7653aabb98b02b9dbea0231fa68eacbbb5991 (diff)
arm64: Align cache maintenance code with libpayload and ARM32
coreboot and libpayload currently use completely different code to perform a full cache flush on ARM64, with even different function names. The libpayload code is closely inspired by the ARM32 version, so for the sake of overall consistency let's sync coreboot to that. Also align a few other cache management details to work the same way as the corresponding ARM32 parts (such as only flushing but not invalidating the data cache after loading a new stage, which may have a small performance benefit). Change-Id: I9e05b425eeeaa27a447b37f98c0928fed3f74340 Signed-off-by: Julius Werner <jwerner@chromium.org> Reviewed-on: https://review.coreboot.org/19785 Tested-by: build bot (Jenkins) <no-reply@coreboot.org> Reviewed-by: Aaron Durbin <adurbin@chromium.org>
Diffstat (limited to 'src/arch/arm64')
-rw-r--r--src/arch/arm64/armv8/Makefile.inc4
-rw-r--r--src/arch/arm64/armv8/cache.c13
-rw-r--r--src/arch/arm64/armv8/cache_helpers.S124
-rw-r--r--src/arch/arm64/armv8/cpu.S81
-rw-r--r--src/arch/arm64/armv8/mmu.c3
-rw-r--r--src/arch/arm64/include/arch/cache_helpers.h47
-rw-r--r--src/arch/arm64/include/armv8/arch/cache.h9
7 files changed, 88 insertions, 193 deletions
diff --git a/src/arch/arm64/armv8/Makefile.inc b/src/arch/arm64/armv8/Makefile.inc
index a7a6b19f9f..21ebf70bfb 100644
--- a/src/arch/arm64/armv8/Makefile.inc
+++ b/src/arch/arm64/armv8/Makefile.inc
@@ -28,7 +28,6 @@ ifneq ($(CONFIG_BOOTBLOCK_CUSTOM),y)
bootblock-y += bootblock.S
endif
bootblock-y += cache.c
-bootblock-y += cache_helpers.S
bootblock-y += cpu.S
bootblock-y += mmu.c
@@ -50,7 +49,6 @@ ifeq ($(CONFIG_ARCH_VERSTAGE_ARMV8_64),y)
verstage-y += cache.c
verstage-y += cpu.S
-verstage-y += cache_helpers.S
verstage-y += exception.c
verstage-generic-ccopts += $(armv8_flags)
@@ -63,7 +61,6 @@ endif
ifeq ($(CONFIG_ARCH_ROMSTAGE_ARMV8_64),y)
romstage-y += cache.c
-romstage-y += cache_helpers.S
romstage-y += cpu.S
romstage-y += exception.c
romstage-y += mmu.c
@@ -80,7 +77,6 @@ endif
ifeq ($(CONFIG_ARCH_RAMSTAGE_ARMV8_64),y)
ramstage-y += cache.c
-ramstage-y += cache_helpers.S
ramstage-y += cpu.S
ramstage-y += exception.c
ramstage-y += mmu.c
diff --git a/src/arch/arm64/armv8/cache.c b/src/arch/arm64/armv8/cache.c
index 4f91de02f2..4b99cd7830 100644
--- a/src/arch/arm64/armv8/cache.c
+++ b/src/arch/arm64/armv8/cache.c
@@ -34,7 +34,6 @@
#include <stdint.h>
#include <arch/cache.h>
-#include <arch/cache_helpers.h>
#include <arch/lib_helpers.h>
#include <program_loading.h>
@@ -121,7 +120,11 @@ void dcache_invalidate_by_mva(void const *addr, size_t len)
void cache_sync_instructions(void)
{
- flush_dcache_all(DCCISW); /* includes trailing DSB (in assembly) */
+ uint32_t sctlr = raw_read_sctlr_current();
+ if (sctlr & SCTLR_C)
+ dcache_clean_all(); /* includes trailing DSB (assembly) */
+ else if (sctlr & SCTLR_I)
+ dcache_clean_invalidate_all();
icache_invalidate_all(); /* includdes leading DSB and trailing ISB. */
}
@@ -131,6 +134,10 @@ void cache_sync_instructions(void)
*/
void arch_segment_loaded(uintptr_t start, size_t size, int flags)
{
- dcache_clean_invalidate_by_mva((void *)start, size);
+ uint32_t sctlr = raw_read_sctlr_current();
+ if (sctlr & SCTLR_C)
+ dcache_clean_by_mva((void *)start, size);
+ else if (sctlr & SCTLR_I)
+ dcache_clean_invalidate_by_mva((void *)start, size);
icache_invalidate_all();
}
diff --git a/src/arch/arm64/armv8/cache_helpers.S b/src/arch/arm64/armv8/cache_helpers.S
deleted file mode 100644
index b94bc30781..0000000000
--- a/src/arch/arm64/armv8/cache_helpers.S
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * Copyright (c) 2013-2014, ARM Limited and Contributors. All rights reserved.
- * Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * Neither the name of ARM nor the names of its contributors may be used
- * to endorse or promote products derived from this software without specific
- * prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <arch/asm.h>
-#include <arch/cache_helpers.h>
-
- /* ---------------------------------------------------------------
- * Data cache operations by set/way to the level specified
- *
- * The main function, do_dcsw_op requires:
- * x0: The operation type (0-2), as defined in cache_helpers.h
- * x3: The last cache level to operate on
- * x9: clidr_el1
- * and will carry out the operation on each data cache from level 0
- * to the level in x3 in sequence
- *
- * The dcsw_op macro sets up the x3 and x9 parameters based on
- * clidr_el1 cache information before invoking the main function
- * ---------------------------------------------------------------
- */
-
-.macro dcsw_op shift, fw, ls
- mrs x9, clidr_el1
- ubfx x3, x9, \shift, \fw
- lsl x3, x3, \ls
- b do_dcsw_op
-.endm
-
-ENTRY(do_dcsw_op)
- cbz x3, exit
- mov x10, xzr
- adr x14, dcsw_loop_table // compute inner loop address
- add x14, x14, x0, lsl #5 // inner loop is 8x32-bit instructions
- mov x0, x9
- mov w8, #1
-loop1:
- add x2, x10, x10, lsr #1 // work out 3x current cache level
- lsr x1, x0, x2 // extract cache type bits from clidr
- and x1, x1, #7 // mask the bits for current cache only
- cmp x1, #2 // see what cache we have at this level
- b.lt level_done // nothing to do if no cache or icache
-
- msr csselr_el1, x10 // select current cache level in csselr
- isb // isb to sych the new cssr&csidr
- mrs x1, ccsidr_el1 // read the new ccsidr
- and x2, x1, #7 // extract the length of the cache lines
- add x2, x2, #4 // add 4 (line length offset)
- ubfx x4, x1, #3, #10 // maximum way number
- clz w5, w4 // bit position of way size increment
- lsl w9, w4, w5 // w9 = aligned max way number
- lsl w16, w8, w5 // w16 = way number loop decrement
- orr w9, w10, w9 // w9 = combine way and cache number
- ubfx w6, w1, #13, #15 // w6 = max set number
- lsl w17, w8, w2 // w17 = set number loop decrement
- dsb sy // barrier before we start this level
- br x14 // jump to DC operation specific loop
-
-level_done:
- add x10, x10, #2 // increment cache number
- cmp x3, x10
- b.gt loop1
- msr csselr_el1, xzr // select cache level 0 in csselr
- dsb sy // barrier to complete final cache operation
- isb
-exit:
- ret
-ENDPROC(do_dcsw_op)
-
-.macro dcsw_loop _op
-loop2_\_op:
- lsl w7, w6, w2 // w7 = aligned max set number
-
-loop3_\_op:
- orr w11, w9, w7 // combine cache, way and set number
- dc \_op, x11
- subs w7, w7, w17 // decrement set number
- b.ge loop3_\_op
-
- subs x9, x9, x16 // decrement way number
- b.ge loop2_\_op
-
- b level_done
-.endm
-
-dcsw_loop_table:
- dcsw_loop isw
- dcsw_loop cisw
- dcsw_loop csw
-
-ENTRY(flush_dcache_louis)
- dcsw_op #LOUIS_SHIFT, #CLIDR_FIELD_WIDTH, #LEVEL_SHIFT
-ENDPROC(flush_dcache_louis)
-
-ENTRY(flush_dcache_all)
- dcsw_op #LOC_SHIFT, #CLIDR_FIELD_WIDTH, #LEVEL_SHIFT
-ENDPROC(flush_dcache_all)
diff --git a/src/arch/arm64/armv8/cpu.S b/src/arch/arm64/armv8/cpu.S
index 711c338685..1bb8c838ad 100644
--- a/src/arch/arm64/armv8/cpu.S
+++ b/src/arch/arm64/armv8/cpu.S
@@ -1,8 +1,8 @@
/*
- * Based on arch/arm/include/asm/cacheflush.h
+ * Optimized assembly for low-level CPU operations on ARM64 processors.
*
- * Copyright (C) 1999-2002 Russell King.
- * Copyright (C) 2012 ARM Ltd.
+ * Copyright (c) 2010 Per Odlund <per.odlund@armagedon.se>
+ * Copyright (c) 2014 Google Inc.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -15,11 +15,77 @@
*/
#include <arch/asm.h>
-#include <arch/cache_helpers.h>
+
+.macro dcache_apply_all crm
+ dsb sy
+ mrs x0, clidr_el1 // read CLIDR
+ and w3, w0, #0x07000000 // narrow to LoC
+ lsr w3, w3, #23 // left align LoC (low 4 bits)
+ cbz w3, 5f //done
+
+ mov w10, #0 // w10 = 2 * cache level
+ mov w8, #1 // w8 = constant 0b1
+
+1: //next_level
+ add w2, w10, w10, lsr #1 // calculate 3 * cache level
+ lsr w1, w0, w2 // extract 3-bit cache type for this level
+ and w1, w1, #0x7 // w1 = cache type
+ cmp w1, #2 // is it data or i&d?
+ b.lt 4f //skip
+ msr csselr_el1, x10 // select current cache level
+ isb // sync change of csselr
+ mrs x1, ccsidr_el1 // w1 = read ccsidr
+ and w2, w1, #7 // w2 = log2(linelen_bytes) - 4
+ add w2, w2, #4 // w2 = log2(linelen_bytes)
+ ubfx w4, w1, #3, #10 // w4 = associativity - 1 (also
+ // max way number)
+ clz w5, w4 // w5 = 32 - log2(ways)
+ // (bit position of way in DC)
+ lsl w9, w4, w5 // w9 = max way number
+ // (aligned for DC)
+ lsl w16, w8, w5 // w16 = amount to decrement (way
+ // number per iteration)
+2: //next_way
+ ubfx w7, w1, #13, #15 // w7 = max set #, right aligned
+ lsl w7, w7, w2 // w7 = max set #, DC aligned
+ lsl w17, w8, w2 // w17 = amount to decrement (set
+ // number per iteration)
+
+3: //next_set
+ orr w11, w10, w9 // w11 = combine way # & cache #
+ orr w11, w11, w7 // ... and set #
+ dc \crm, x11 // clean and/or invalidate line
+ subs w7, w7, w17 // decrement set number
+ b.ge 3b //next_set
+ subs x9, x9, x16 // decrement way number
+ b.ge 2b //next_way
+
+4: //skip
+ add w10, w10, #2 // increment 2 *cache level
+ cmp w3, w10 // Went beyond LoC?
+ b.gt 1b //next_level
+
+5: //done
+ dsb sy
+ isb
+ ret
+.endm
+
+ENTRY(dcache_invalidate_all)
+ dcache_apply_all crm=isw
+ENDPROC(dcache_invalidate_all)
+
+ENTRY(dcache_clean_all)
+ dcache_apply_all crm=csw
+ENDPROC(dcache_clean_all)
+
+ENTRY(dcache_clean_invalidate_all)
+ dcache_apply_all crm=cisw
+ENDPROC(dcache_clean_invalidate_all)
/*
* Bring an ARMv8 processor we just gained control of (e.g. from IROM) into a
- * known state regarding caches/SCTLR/PSTATE. Completely cleans and invalidates
+ * known state regarding caches/SCTLR/PSTATE. Completely invalidates
* icache/dcache, disables MMU and dcache (if active), and enables unaligned
* accesses, icache and branch prediction (if inactive). Seeds the stack and
* initializes SP_EL0. Clobbers R22 and R23.
@@ -41,9 +107,8 @@ ENTRY(arm64_init_cpu)
msr sctlr_el3, x22
isb
- /* Flush and invalidate dcache */
- mov x0, #DCCISW
- bl flush_dcache_all
+ /* Invalidate dcache */
+ bl dcache_invalidate_all
/* Deactivate MMU (0), Alignment Check (1) and DCache (2) */
and x22, x22, # ~(1 << 0) & ~(1 << 1) & ~(1 << 2)
diff --git a/src/arch/arm64/armv8/mmu.c b/src/arch/arm64/armv8/mmu.c
index 9280fc2d7c..55bd703d44 100644
--- a/src/arch/arm64/armv8/mmu.c
+++ b/src/arch/arm64/armv8/mmu.c
@@ -37,7 +37,6 @@
#include <arch/mmu.h>
#include <arch/lib_helpers.h>
#include <arch/cache.h>
-#include <arch/cache_helpers.h>
/* This just caches the next free table slot (okay to do since they fill up from
* bottom to top and can never be freed up again). It will reset to its initial
@@ -295,7 +294,7 @@ void mmu_enable(void)
*/
void mmu_disable(void)
{
- flush_dcache_all(DCCISW);
+ dcache_clean_invalidate_all();
uint32_t sctlr = raw_read_sctlr_el3();
sctlr &= ~(SCTLR_C | SCTLR_M);
raw_write_sctlr_el3(sctlr);
diff --git a/src/arch/arm64/include/arch/cache_helpers.h b/src/arch/arm64/include/arch/cache_helpers.h
deleted file mode 100644
index 2919d99492..0000000000
--- a/src/arch/arm64/include/arch/cache_helpers.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Copyright (c) 2013-2014, ARM Limited and Contributors. All rights reserved.
- * Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * Neither the name of ARM nor the names of its contributors may be used
- * to endorse or promote products derived from this software without specific
- * prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __CACHE_HELPERS_H
-
-/* CLIDR definitions */
-#define LOUIS_SHIFT 21
-#define LOC_SHIFT 24
-#define CLIDR_FIELD_WIDTH 3
-
-/* CSSELR definitions */
-#define LEVEL_SHIFT 1
-
-/* D$ set/way op type defines */
-#define DCISW 0x0
-#define DCCISW 0x1
-#define DCCSW 0x2
-
-#endif /* __CACHE_HELPERS_H */
diff --git a/src/arch/arm64/include/armv8/arch/cache.h b/src/arch/arm64/include/armv8/arch/cache.h
index 84f051d533..3647290866 100644
--- a/src/arch/arm64/include/armv8/arch/cache.h
+++ b/src/arch/arm64/include/armv8/arch/cache.h
@@ -67,11 +67,10 @@ void dcache_clean_invalidate_by_mva(void const *addr, size_t len);
/* dcache invalidate by virtual address to PoC */
void dcache_invalidate_by_mva(void const *addr, size_t len);
-/* dcache invalidate all */
-void flush_dcache_all(int op_type);
-
-/* flush the dcache up to the Level of Unification Inner Shareable */
-void flush_dcache_louis(int op_type);
+/* dcache clean and/or invalidate all sets/ways to PoC */
+void dcache_clean_all(void);
+void dcache_invalidate_all(void);
+void dcache_clean_invalidate_all(void);
/* returns number of bytes per cache line */
unsigned int dcache_line_bytes(void);