diff options
author | Julius Werner <jwerner@chromium.org> | 2013-12-13 12:59:57 -0800 |
---|---|---|
committer | Isaac Christensen <isaac.christensen@se-eng.com> | 2014-09-22 18:47:42 +0200 |
commit | d65e214d666269d0bd20d88ba2bc83349810c668 (patch) | |
tree | 4c7db158bc03fddf7bc4b373ee382cd1a2635979 /src/arch/arm | |
parent | 64b9ca9d4eb5eccdea86d967220c67b503a4519b (diff) |
arm: Update mem* functions to newer versions
The memcpy/memset/memmove assembly implementations have been taken from
U-Boot, which originally got them from Linux. I turns out that they are
actually not that bad, but they could use an update. This patch pulls in
the current Linux upstream versions of those files, removing some old
U-Boot cruft such as checking whether the two pointers in a memcpy() are
equal (really now?) or side-stepping the R8 register because it was used
for special purposes. It also returns to the good old Linux
ENTRY/ENDPROC macros since we have them now anyway, and straightens out
the W() macro in preparation for unified thumb support.
Change-Id: I138af269b423bef0a237759ac29f1ee58ca206a0
Signed-off-by: Julius Werner <jwerner@chromium.org>
Reviewed-on: https://chromium-review.googlesource.com/182179
Reviewed-by: Vincent Palatin <vpalatin@chromium.org>
(cherry picked from commit 777127997bde5785b21d422d0b6eb04c4328b478)
Signed-off-by: Isaac Christensen <isaac.christensen@se-eng.com>
Reviewed-on: http://review.coreboot.org/6918
Tested-by: build bot (Jenkins)
Reviewed-by: David Hendricks <dhendrix@chromium.org>
Diffstat (limited to 'src/arch/arm')
-rw-r--r-- | src/arch/arm/asmlib.h (renamed from src/arch/arm/include/assembler.h) | 28 | ||||
-rw-r--r-- | src/arch/arm/include/arch/asm.h | 5 | ||||
-rw-r--r-- | src/arch/arm/memcpy.S | 13 | ||||
-rw-r--r-- | src/arch/arm/memmove.S | 10 | ||||
-rw-r--r-- | src/arch/arm/memset.S | 109 |
5 files changed, 87 insertions, 78 deletions
diff --git a/src/arch/arm/include/assembler.h b/src/arch/arm/asmlib.h index 10363c4e4e..cef0e7ea1a 100644 --- a/src/arch/arm/include/assembler.h +++ b/src/arch/arm/asmlib.h @@ -1,5 +1,7 @@ /* - * arch/arm/include/asm/assembler.h + * arch/arm/asmlib.h + * + * Adapted from Linux arch/arm/include/assembler.h * * Copyright (C) 1996-2000 Russell King * @@ -15,6 +17,16 @@ */ /* + * WARNING: This file is *only* meant for memcpy.S and friends which were copied + * from Linux and require some weird macros. It does unspeakable things like + * redefining "push", so do *not* try to turn it into a general assembly macro + * file, and keep it out of global include directories. + */ + +#ifndef __ARM_ASMLIB_H__ +#define __ARM_ASMLIB_H__ + +/* * Endian independent macros for shifting bytes within registers. */ #ifndef __ARMEB__ @@ -44,19 +56,17 @@ /* * Data preload for architectures that support it */ -#if defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) || \ - defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \ - defined(__ARM_ARCH_6T2__) || defined(__ARM_ARCH_6Z__) || \ - defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_7A__) || \ - defined(__ARM_ARCH_7R__) +#if __COREBOOT_ARM_ARCH__ >= 5 #define PLD(code...) code #else #define PLD(code...) #endif /* - * Cache aligned + * This can be used to enable code to cacheline align the destination + * pointer when bulk writing to memory. Linux doesn't enable this except + * for the "Feroceon" processor, so we better just leave it out. */ -#define CALGN(code...) code +#define CALGN(code...) -#define W(instr) instr +#endif /* __ARM_ASMLIB_H */ diff --git a/src/arch/arm/include/arch/asm.h b/src/arch/arm/include/arch/asm.h index 1c3f7a21c3..5f3e55f135 100644 --- a/src/arch/arm/include/arch/asm.h +++ b/src/arch/arm/include/arch/asm.h @@ -23,9 +23,14 @@ #if defined __arm__ # define ARM(x...) x # define THUMB(x...) +# define W(instr) instr #elif defined __thumb__ # define ARM(x...) # define THUMB(x...) x +# define W(instr) instr.w +# if __COREBOOT_ARM_ARCH__ < 7 +# error thumb mode has not been tested with ARM < v7! +# endif #else # error Not in ARM or thumb mode! #endif diff --git a/src/arch/arm/memcpy.S b/src/arch/arm/memcpy.S index 921fc2a6bc..b8f857bb56 100644 --- a/src/arch/arm/memcpy.S +++ b/src/arch/arm/memcpy.S @@ -10,9 +10,8 @@ * published by the Free Software Foundation. */ -#include <assembler.h> - -#define W(instr) instr +#include <arch/asm.h> +#include "asmlib.h" #define LDR1W_SHIFT 0 #define STR1W_SHIFT 0 @@ -57,12 +56,7 @@ /* Prototype: void *memcpy(void *dest, const void *src, size_t n); */ -.type memcpy, function -.globl memcpy -memcpy: - - cmp r0, r1 - moveq pc, lr +ENTRY(memcpy) enter r4, lr @@ -242,3 +236,4 @@ memcpy: 17: forward_copy_shift pull=16 push=16 18: forward_copy_shift pull=24 push=8 +ENDPROC(memcpy) diff --git a/src/arch/arm/memmove.S b/src/arch/arm/memmove.S index a2f9ea18ae..dc29f7458c 100644 --- a/src/arch/arm/memmove.S +++ b/src/arch/arm/memmove.S @@ -10,7 +10,8 @@ * published by the Free Software Foundation. */ -#include <assembler.h> +#include <arch/asm.h> +#include "asmlib.h" .text @@ -25,9 +26,8 @@ * occurring in the opposite direction. */ -.type memmove, function -.globl memmove -memmove: +ENTRY(memmove) + subs ip, r0, r1 cmphi r2, ip bls memcpy @@ -195,3 +195,5 @@ memmove: 17: backward_copy_shift push=16 pull=16 18: backward_copy_shift push=24 pull=8 + +ENDPROC(memmove) diff --git a/src/arch/arm/memset.S b/src/arch/arm/memset.S index a3cc9477f8..945767c599 100644 --- a/src/arch/arm/memset.S +++ b/src/arch/arm/memset.S @@ -9,33 +9,21 @@ * * ASM optimised string functions */ -#include <assembler.h> + +#include <arch/asm.h> +#include "asmlib.h" .text .align 5 - .word 0 -1: subs r2, r2, #4 @ 1 do we have enough - blt 5f @ 1 bytes to align with? - cmp r3, #2 @ 1 - strltb r1, [r0], #1 @ 1 - strleb r1, [r0], #1 @ 1 - strb r1, [r0], #1 @ 1 - add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3)) -/* - * The pointer is now aligned and the length is adjusted. Try doing the - * memset again. - */ - -.type memset, function -.globl memset -memset: +ENTRY(memset) ands r3, r0, #3 @ 1 unaligned? - bne 1b @ 1 + mov ip, r0 @ preserve r0 as return value + bne 6f @ 1 /* - * we know that the pointer in r0 is aligned to a word boundary. + * we know that the pointer in ip is aligned to a word boundary. */ - orr r1, r1, r1, lsl #8 +1: orr r1, r1, r1, lsl #8 orr r1, r1, r1, lsl #16 mov r3, r1 cmp r2, #16 @@ -44,29 +32,28 @@ memset: #if ! CALGN(1)+0 /* - * We need an extra register for this loop - save the return address and - * use the LR + * We need 2 extra registers for this loop - use r8 and the LR */ - str lr, [sp, #-4]! - mov ip, r1 + stmfd sp!, {r8, lr} + mov r8, r1 mov lr, r1 2: subs r2, r2, #64 - stmgeia r0!, {r1, r3, ip, lr} @ 64 bytes at a time. - stmgeia r0!, {r1, r3, ip, lr} - stmgeia r0!, {r1, r3, ip, lr} - stmgeia r0!, {r1, r3, ip, lr} + stmgeia ip!, {r1, r3, r8, lr} @ 64 bytes at a time. + stmgeia ip!, {r1, r3, r8, lr} + stmgeia ip!, {r1, r3, r8, lr} + stmgeia ip!, {r1, r3, r8, lr} bgt 2b - ldmeqfd sp!, {pc} @ Now <64 bytes to go. + ldmeqfd sp!, {r8, pc} @ Now <64 bytes to go. /* * No need to correct the count; we're only testing bits from now on */ tst r2, #32 - stmneia r0!, {r1, r3, ip, lr} - stmneia r0!, {r1, r3, ip, lr} + stmneia ip!, {r1, r3, r8, lr} + stmneia ip!, {r1, r3, r8, lr} tst r2, #16 - stmneia r0!, {r1, r3, ip, lr} - ldr lr, [sp], #4 + stmneia ip!, {r1, r3, r8, lr} + ldmfd sp!, {r8, lr} #else @@ -75,53 +62,63 @@ memset: * whole cache lines at once. */ - stmfd sp!, {r4-r7, lr} + stmfd sp!, {r4-r8, lr} mov r4, r1 mov r5, r1 mov r6, r1 mov r7, r1 - mov ip, r1 + mov r8, r1 mov lr, r1 cmp r2, #96 - tstgt r0, #31 + tstgt ip, #31 ble 3f - and ip, r0, #31 - rsb ip, ip, #32 - sub r2, r2, ip - movs ip, ip, lsl #(32 - 4) - stmcsia r0!, {r4, r5, r6, r7} - stmmiia r0!, {r4, r5} - tst ip, #(1 << 30) - mov ip, r1 - strne r1, [r0], #4 + and r8, ip, #31 + rsb r8, r8, #32 + sub r2, r2, r8 + movs r8, r8, lsl #(32 - 4) + stmcsia ip!, {r4, r5, r6, r7} + stmmiia ip!, {r4, r5} + tst r8, #(1 << 30) + mov r8, r1 + strne r1, [ip], #4 3: subs r2, r2, #64 - stmgeia r0!, {r1, r3-r7, ip, lr} - stmgeia r0!, {r1, r3-r7, ip, lr} + stmgeia ip!, {r1, r3-r8, lr} + stmgeia ip!, {r1, r3-r8, lr} bgt 3b - ldmeqfd sp!, {r4-r7, pc} + ldmeqfd sp!, {r4-r8, pc} tst r2, #32 - stmneia r0!, {r1, r3-r7, ip, lr} + stmneia ip!, {r1, r3-r8, lr} tst r2, #16 - stmneia r0!, {r4-r7} - ldmfd sp!, {r4-r7, lr} + stmneia ip!, {r4-r7} + ldmfd sp!, {r4-r8, lr} #endif 4: tst r2, #8 - stmneia r0!, {r1, r3} + stmneia ip!, {r1, r3} tst r2, #4 - strne r1, [r0], #4 + strne r1, [ip], #4 /* * When we get here, we've got less than 4 bytes to zero. We * may have an unaligned pointer as well. */ 5: tst r2, #2 - strneb r1, [r0], #1 - strneb r1, [r0], #1 + strneb r1, [ip], #1 + strneb r1, [ip], #1 tst r2, #1 - strneb r1, [r0], #1 + strneb r1, [ip], #1 mov pc, lr + +6: subs r2, r2, #4 @ 1 do we have enough + blt 5b @ 1 bytes to align with? + cmp r3, #2 @ 1 + strltb r1, [ip], #1 @ 1 + strleb r1, [ip], #1 @ 1 + strb r1, [ip], #1 @ 1 + add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3)) + b 1b +ENDPROC(memset) |