1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
|
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Based on linux/arch/arm/lib/memcpy.S
*/
#include <arch/asm.h>
#include "asmlib.h"
#define LDR1W_SHIFT 0
#define STR1W_SHIFT 0
.macro ldr1w ptr reg abort
W(ldr) \reg, [\ptr], #4
.endm
.macro ldr4w ptr reg1 reg2 reg3 reg4 abort
ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4}
.endm
.macro ldr8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8}
.endm
.macro ldr1b ptr reg cond=al abort
ldr\cond\()b \reg, [\ptr], #1
.endm
.macro str1w ptr reg abort
W(str) \reg, [\ptr], #4
.endm
.macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
stmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8}
.endm
.macro str1b ptr reg cond=al abort
str\cond\()b \reg, [\ptr], #1
.endm
.macro enter reg1 reg2
stmdb sp!, {r0, \reg1, \reg2}
.endm
.macro exit reg1 reg2
ldmfd sp!, {r0, \reg1, \reg2}
.endm
/* Prototype: void *memcpy(void *dest, const void *src, size_t n); */
ENTRY(memcpy)
enter r4, lr
subs r2, r2, #4
blt 8f
ands ip, r0, #3
PLD( pld [r1, #0] )
bne 9f
ands ip, r1, #3
bne 10f
1: subs r2, r2, #(28)
stmfd sp!, {r5 - r8}
blt 5f
CALGN( ands ip, r0, #31 )
CALGN( rsb r3, ip, #32 )
CALGN( sbcnes r4, r3, r2 ) @ C is always set here
CALGN( bcs 2f )
CALGN( adr r4, 6f )
CALGN( subs r2, r2, r3 ) @ C gets set
CALGN( add pc, r4, ip )
PLD( pld [r1, #0] )
2: PLD( subs r2, r2, #96 )
PLD( pld [r1, #28] )
PLD( blt 4f )
PLD( pld [r1, #60] )
PLD( pld [r1, #92] )
3: PLD( pld [r1, #124] )
4: ldr8w r1, r3, r4, r5, r6, r7, r8, ip, lr, abort=20f
subs r2, r2, #32
str8w r0, r3, r4, r5, r6, r7, r8, ip, lr, abort=20f
bge 3b
PLD( cmn r2, #96 )
PLD( bge 4b )
5: ands ip, r2, #28
rsb ip, ip, #32
#if LDR1W_SHIFT > 0
lsl ip, ip, #LDR1W_SHIFT
#endif
addne pc, pc, ip @ C is always clear here
b 7f
6:
.rept (1 << LDR1W_SHIFT)
W(nop)
.endr
ldr1w r1, r3, abort=20f
ldr1w r1, r4, abort=20f
ldr1w r1, r5, abort=20f
ldr1w r1, r6, abort=20f
ldr1w r1, r7, abort=20f
ldr1w r1, r8, abort=20f
ldr1w r1, lr, abort=20f
#if LDR1W_SHIFT < STR1W_SHIFT
lsl ip, ip, #STR1W_SHIFT - LDR1W_SHIFT
#elif LDR1W_SHIFT > STR1W_SHIFT
lsr ip, ip, #LDR1W_SHIFT - STR1W_SHIFT
#endif
add pc, pc, ip
nop
.rept (1 << STR1W_SHIFT)
W(nop)
.endr
str1w r0, r3, abort=20f
str1w r0, r4, abort=20f
str1w r0, r5, abort=20f
str1w r0, r6, abort=20f
str1w r0, r7, abort=20f
str1w r0, r8, abort=20f
str1w r0, lr, abort=20f
CALGN( bcs 2b )
7: ldmfd sp!, {r5 - r8}
8: movs r2, r2, lsl #31
ldr1b r1, r3, ne, abort=21f
ldr1b r1, r4, cs, abort=21f
ldr1b r1, ip, cs, abort=21f
str1b r0, r3, ne, abort=21f
str1b r0, r4, cs, abort=21f
str1b r0, ip, cs, abort=21f
exit r4, pc
9: rsb ip, ip, #4
cmp ip, #2
ldr1b r1, r3, gt, abort=21f
ldr1b r1, r4, ge, abort=21f
ldr1b r1, lr, abort=21f
str1b r0, r3, gt, abort=21f
str1b r0, r4, ge, abort=21f
subs r2, r2, ip
str1b r0, lr, abort=21f
blt 8b
ands ip, r1, #3
beq 1b
10: bic r1, r1, #3
cmp ip, #2
ldr1w r1, lr, abort=21f
beq 17f
bgt 18f
.macro forward_copy_shift pull push
subs r2, r2, #28
blt 14f
CALGN( ands ip, r0, #31 )
CALGN( rsb ip, ip, #32 )
CALGN( sbcnes r4, ip, r2 ) @ C is always set here
CALGN( subcc r2, r2, ip )
CALGN( bcc 15f )
11: stmfd sp!, {r5 - r9}
PLD( pld [r1, #0] )
PLD( subs r2, r2, #96 )
PLD( pld [r1, #28] )
PLD( blt 13f )
PLD( pld [r1, #60] )
PLD( pld [r1, #92] )
12: PLD( pld [r1, #124] )
13: ldr4w r1, r4, r5, r6, r7, abort=19f
mov r3, lr, pull #\pull
subs r2, r2, #32
ldr4w r1, r8, r9, ip, lr, abort=19f
orr r3, r3, r4, push #\push
mov r4, r4, pull #\pull
orr r4, r4, r5, push #\push
mov r5, r5, pull #\pull
orr r5, r5, r6, push #\push
mov r6, r6, pull #\pull
orr r6, r6, r7, push #\push
mov r7, r7, pull #\pull
orr r7, r7, r8, push #\push
mov r8, r8, pull #\pull
orr r8, r8, r9, push #\push
mov r9, r9, pull #\pull
orr r9, r9, ip, push #\push
mov ip, ip, pull #\pull
orr ip, ip, lr, push #\push
str8w r0, r3, r4, r5, r6, r7, r8, r9, ip, , abort=19f
bge 12b
PLD( cmn r2, #96 )
PLD( bge 13b )
ldmfd sp!, {r5 - r9}
14: ands ip, r2, #28
beq 16f
15: mov r3, lr, pull #\pull
ldr1w r1, lr, abort=21f
subs ip, ip, #4
orr r3, r3, lr, push #\push
str1w r0, r3, abort=21f
bgt 15b
CALGN( cmp r2, #0 )
CALGN( bge 11b )
16: sub r1, r1, #(\push / 8)
b 8b
.endm
forward_copy_shift pull=8 push=24
17: forward_copy_shift pull=16 push=16
18: forward_copy_shift pull=24 push=8
ENDPROC(memcpy)
|