From d65e214d666269d0bd20d88ba2bc83349810c668 Mon Sep 17 00:00:00 2001 From: Julius Werner Date: Fri, 13 Dec 2013 12:59:57 -0800 Subject: arm: Update mem* functions to newer versions The memcpy/memset/memmove assembly implementations have been taken from U-Boot, which originally got them from Linux. I turns out that they are actually not that bad, but they could use an update. This patch pulls in the current Linux upstream versions of those files, removing some old U-Boot cruft such as checking whether the two pointers in a memcpy() are equal (really now?) or side-stepping the R8 register because it was used for special purposes. It also returns to the good old Linux ENTRY/ENDPROC macros since we have them now anyway, and straightens out the W() macro in preparation for unified thumb support. Change-Id: I138af269b423bef0a237759ac29f1ee58ca206a0 Signed-off-by: Julius Werner Reviewed-on: https://chromium-review.googlesource.com/182179 Reviewed-by: Vincent Palatin (cherry picked from commit 777127997bde5785b21d422d0b6eb04c4328b478) Signed-off-by: Isaac Christensen Reviewed-on: http://review.coreboot.org/6918 Tested-by: build bot (Jenkins) Reviewed-by: David Hendricks --- src/arch/arm/asmlib.h | 72 ++++++++++++++++++++++++++ src/arch/arm/include/arch/asm.h | 5 ++ src/arch/arm/include/assembler.h | 62 ---------------------- src/arch/arm/memcpy.S | 13 ++--- src/arch/arm/memmove.S | 10 ++-- src/arch/arm/memset.S | 109 +++++++++++++++++++-------------------- 6 files changed, 140 insertions(+), 131 deletions(-) create mode 100644 src/arch/arm/asmlib.h delete mode 100644 src/arch/arm/include/assembler.h (limited to 'src/arch/arm') diff --git a/src/arch/arm/asmlib.h b/src/arch/arm/asmlib.h new file mode 100644 index 0000000000..cef0e7ea1a --- /dev/null +++ b/src/arch/arm/asmlib.h @@ -0,0 +1,72 @@ +/* + * arch/arm/asmlib.h + * + * Adapted from Linux arch/arm/include/assembler.h + * + * Copyright (C) 1996-2000 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This file contains arm architecture specific defines + * for the different processors. + * + * Do not include any C declarations in this file - it is included by + * assembler source. + */ + +/* + * WARNING: This file is *only* meant for memcpy.S and friends which were copied + * from Linux and require some weird macros. It does unspeakable things like + * redefining "push", so do *not* try to turn it into a general assembly macro + * file, and keep it out of global include directories. + */ + +#ifndef __ARM_ASMLIB_H__ +#define __ARM_ASMLIB_H__ + +/* + * Endian independent macros for shifting bytes within registers. + */ +#ifndef __ARMEB__ +#define pull lsr +#define push lsl +#define get_byte_0 lsl #0 +#define get_byte_1 lsr #8 +#define get_byte_2 lsr #16 +#define get_byte_3 lsr #24 +#define put_byte_0 lsl #0 +#define put_byte_1 lsl #8 +#define put_byte_2 lsl #16 +#define put_byte_3 lsl #24 +#else +#define pull lsl +#define push lsr +#define get_byte_0 lsr #24 +#define get_byte_1 lsr #16 +#define get_byte_2 lsr #8 +#define get_byte_3 lsl #0 +#define put_byte_0 lsl #24 +#define put_byte_1 lsl #16 +#define put_byte_2 lsl #8 +#define put_byte_3 lsl #0 +#endif + +/* + * Data preload for architectures that support it + */ +#if __COREBOOT_ARM_ARCH__ >= 5 +#define PLD(code...) code +#else +#define PLD(code...) +#endif + +/* + * This can be used to enable code to cacheline align the destination + * pointer when bulk writing to memory. Linux doesn't enable this except + * for the "Feroceon" processor, so we better just leave it out. + */ +#define CALGN(code...) + +#endif /* __ARM_ASMLIB_H */ diff --git a/src/arch/arm/include/arch/asm.h b/src/arch/arm/include/arch/asm.h index 1c3f7a21c3..5f3e55f135 100644 --- a/src/arch/arm/include/arch/asm.h +++ b/src/arch/arm/include/arch/asm.h @@ -23,9 +23,14 @@ #if defined __arm__ # define ARM(x...) x # define THUMB(x...) +# define W(instr) instr #elif defined __thumb__ # define ARM(x...) # define THUMB(x...) x +# define W(instr) instr.w +# if __COREBOOT_ARM_ARCH__ < 7 +# error thumb mode has not been tested with ARM < v7! +# endif #else # error Not in ARM or thumb mode! #endif diff --git a/src/arch/arm/include/assembler.h b/src/arch/arm/include/assembler.h deleted file mode 100644 index 10363c4e4e..0000000000 --- a/src/arch/arm/include/assembler.h +++ /dev/null @@ -1,62 +0,0 @@ -/* - * arch/arm/include/asm/assembler.h - * - * Copyright (C) 1996-2000 Russell King - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This file contains arm architecture specific defines - * for the different processors. - * - * Do not include any C declarations in this file - it is included by - * assembler source. - */ - -/* - * Endian independent macros for shifting bytes within registers. - */ -#ifndef __ARMEB__ -#define pull lsr -#define push lsl -#define get_byte_0 lsl #0 -#define get_byte_1 lsr #8 -#define get_byte_2 lsr #16 -#define get_byte_3 lsr #24 -#define put_byte_0 lsl #0 -#define put_byte_1 lsl #8 -#define put_byte_2 lsl #16 -#define put_byte_3 lsl #24 -#else -#define pull lsl -#define push lsr -#define get_byte_0 lsr #24 -#define get_byte_1 lsr #16 -#define get_byte_2 lsr #8 -#define get_byte_3 lsl #0 -#define put_byte_0 lsl #24 -#define put_byte_1 lsl #16 -#define put_byte_2 lsl #8 -#define put_byte_3 lsl #0 -#endif - -/* - * Data preload for architectures that support it - */ -#if defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) || \ - defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \ - defined(__ARM_ARCH_6T2__) || defined(__ARM_ARCH_6Z__) || \ - defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_7A__) || \ - defined(__ARM_ARCH_7R__) -#define PLD(code...) code -#else -#define PLD(code...) -#endif - -/* - * Cache aligned - */ -#define CALGN(code...) code - -#define W(instr) instr diff --git a/src/arch/arm/memcpy.S b/src/arch/arm/memcpy.S index 921fc2a6bc..b8f857bb56 100644 --- a/src/arch/arm/memcpy.S +++ b/src/arch/arm/memcpy.S @@ -10,9 +10,8 @@ * published by the Free Software Foundation. */ -#include - -#define W(instr) instr +#include +#include "asmlib.h" #define LDR1W_SHIFT 0 #define STR1W_SHIFT 0 @@ -57,12 +56,7 @@ /* Prototype: void *memcpy(void *dest, const void *src, size_t n); */ -.type memcpy, function -.globl memcpy -memcpy: - - cmp r0, r1 - moveq pc, lr +ENTRY(memcpy) enter r4, lr @@ -242,3 +236,4 @@ memcpy: 17: forward_copy_shift pull=16 push=16 18: forward_copy_shift pull=24 push=8 +ENDPROC(memcpy) diff --git a/src/arch/arm/memmove.S b/src/arch/arm/memmove.S index a2f9ea18ae..dc29f7458c 100644 --- a/src/arch/arm/memmove.S +++ b/src/arch/arm/memmove.S @@ -10,7 +10,8 @@ * published by the Free Software Foundation. */ -#include +#include +#include "asmlib.h" .text @@ -25,9 +26,8 @@ * occurring in the opposite direction. */ -.type memmove, function -.globl memmove -memmove: +ENTRY(memmove) + subs ip, r0, r1 cmphi r2, ip bls memcpy @@ -195,3 +195,5 @@ memmove: 17: backward_copy_shift push=16 pull=16 18: backward_copy_shift push=24 pull=8 + +ENDPROC(memmove) diff --git a/src/arch/arm/memset.S b/src/arch/arm/memset.S index a3cc9477f8..945767c599 100644 --- a/src/arch/arm/memset.S +++ b/src/arch/arm/memset.S @@ -9,33 +9,21 @@ * * ASM optimised string functions */ -#include + +#include +#include "asmlib.h" .text .align 5 - .word 0 -1: subs r2, r2, #4 @ 1 do we have enough - blt 5f @ 1 bytes to align with? - cmp r3, #2 @ 1 - strltb r1, [r0], #1 @ 1 - strleb r1, [r0], #1 @ 1 - strb r1, [r0], #1 @ 1 - add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3)) -/* - * The pointer is now aligned and the length is adjusted. Try doing the - * memset again. - */ - -.type memset, function -.globl memset -memset: +ENTRY(memset) ands r3, r0, #3 @ 1 unaligned? - bne 1b @ 1 + mov ip, r0 @ preserve r0 as return value + bne 6f @ 1 /* - * we know that the pointer in r0 is aligned to a word boundary. + * we know that the pointer in ip is aligned to a word boundary. */ - orr r1, r1, r1, lsl #8 +1: orr r1, r1, r1, lsl #8 orr r1, r1, r1, lsl #16 mov r3, r1 cmp r2, #16 @@ -44,29 +32,28 @@ memset: #if ! CALGN(1)+0 /* - * We need an extra register for this loop - save the return address and - * use the LR + * We need 2 extra registers for this loop - use r8 and the LR */ - str lr, [sp, #-4]! - mov ip, r1 + stmfd sp!, {r8, lr} + mov r8, r1 mov lr, r1 2: subs r2, r2, #64 - stmgeia r0!, {r1, r3, ip, lr} @ 64 bytes at a time. - stmgeia r0!, {r1, r3, ip, lr} - stmgeia r0!, {r1, r3, ip, lr} - stmgeia r0!, {r1, r3, ip, lr} + stmgeia ip!, {r1, r3, r8, lr} @ 64 bytes at a time. + stmgeia ip!, {r1, r3, r8, lr} + stmgeia ip!, {r1, r3, r8, lr} + stmgeia ip!, {r1, r3, r8, lr} bgt 2b - ldmeqfd sp!, {pc} @ Now <64 bytes to go. + ldmeqfd sp!, {r8, pc} @ Now <64 bytes to go. /* * No need to correct the count; we're only testing bits from now on */ tst r2, #32 - stmneia r0!, {r1, r3, ip, lr} - stmneia r0!, {r1, r3, ip, lr} + stmneia ip!, {r1, r3, r8, lr} + stmneia ip!, {r1, r3, r8, lr} tst r2, #16 - stmneia r0!, {r1, r3, ip, lr} - ldr lr, [sp], #4 + stmneia ip!, {r1, r3, r8, lr} + ldmfd sp!, {r8, lr} #else @@ -75,53 +62,63 @@ memset: * whole cache lines at once. */ - stmfd sp!, {r4-r7, lr} + stmfd sp!, {r4-r8, lr} mov r4, r1 mov r5, r1 mov r6, r1 mov r7, r1 - mov ip, r1 + mov r8, r1 mov lr, r1 cmp r2, #96 - tstgt r0, #31 + tstgt ip, #31 ble 3f - and ip, r0, #31 - rsb ip, ip, #32 - sub r2, r2, ip - movs ip, ip, lsl #(32 - 4) - stmcsia r0!, {r4, r5, r6, r7} - stmmiia r0!, {r4, r5} - tst ip, #(1 << 30) - mov ip, r1 - strne r1, [r0], #4 + and r8, ip, #31 + rsb r8, r8, #32 + sub r2, r2, r8 + movs r8, r8, lsl #(32 - 4) + stmcsia ip!, {r4, r5, r6, r7} + stmmiia ip!, {r4, r5} + tst r8, #(1 << 30) + mov r8, r1 + strne r1, [ip], #4 3: subs r2, r2, #64 - stmgeia r0!, {r1, r3-r7, ip, lr} - stmgeia r0!, {r1, r3-r7, ip, lr} + stmgeia ip!, {r1, r3-r8, lr} + stmgeia ip!, {r1, r3-r8, lr} bgt 3b - ldmeqfd sp!, {r4-r7, pc} + ldmeqfd sp!, {r4-r8, pc} tst r2, #32 - stmneia r0!, {r1, r3-r7, ip, lr} + stmneia ip!, {r1, r3-r8, lr} tst r2, #16 - stmneia r0!, {r4-r7} - ldmfd sp!, {r4-r7, lr} + stmneia ip!, {r4-r7} + ldmfd sp!, {r4-r8, lr} #endif 4: tst r2, #8 - stmneia r0!, {r1, r3} + stmneia ip!, {r1, r3} tst r2, #4 - strne r1, [r0], #4 + strne r1, [ip], #4 /* * When we get here, we've got less than 4 bytes to zero. We * may have an unaligned pointer as well. */ 5: tst r2, #2 - strneb r1, [r0], #1 - strneb r1, [r0], #1 + strneb r1, [ip], #1 + strneb r1, [ip], #1 tst r2, #1 - strneb r1, [r0], #1 + strneb r1, [ip], #1 mov pc, lr + +6: subs r2, r2, #4 @ 1 do we have enough + blt 5b @ 1 bytes to align with? + cmp r3, #2 @ 1 + strltb r1, [ip], #1 @ 1 + strleb r1, [ip], #1 @ 1 + strb r1, [ip], #1 @ 1 + add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3)) + b 1b +ENDPROC(memset) -- cgit v1.2.3