aboutsummaryrefslogtreecommitdiff
path: root/src/arch/armv7/lib/_uldivmod.S
blob: 65e4fa8441e72dccf41dc6cd4612756350bc8a9f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
/*
 * Copyright (c) 2011 The Chromium OS Authors. All rights reserved.
 * Use of this source code is governed by a BSD-style license that can be
 * found in the LICENSE file.
 *
 * Alternatively, this software may be distributed under the terms of the
 * GNU General Public License ("GPL") version 2 as published by the Free
 * Software Foundation.
 */

/*
 * A, Q = r0 + (r1 << 32)
 * B, R = r2 + (r3 << 32)
 * A / B = Q ... R
 */

	.text
	.global	__aeabi_uldivmod
	.type	__aeabi_uldivmod, function
	.align	0

A_0	.req	r0
A_1	.req	r1
B_0	.req	r2
B_1	.req	r3
C_0	.req	r4
C_1	.req	r5
D_0	.req	r6
D_1	.req	r7

Q_0	.req	r0
Q_1	.req	r1
R_0	.req	r2
R_1	.req	r3

__aeabi_uldivmod:
	stmfd	sp!, {r4, r5, r6, r7, lr}
	@ Test if B == 0
	orrs	ip, B_0, B_1		@ Z set -> B == 0
	beq	L_div_by_0
	@ Test if B is power of 2: (B & (B - 1)) == 0
	subs	C_0, B_0, #1
	sbc	C_1, B_1, #0
	tst	C_0, B_0
	tsteq	B_1, C_1
	beq	L_pow2
	@ Test if A_1 == B_1 == 0
	orrs	ip, A_1, B_1
	beq	L_div_32_32

L_div_64_64:
	mov	C_0, #1
	mov	C_1, #0
	@ D_0 = clz A
	teq	A_1, #0
	clz	D_0, A_1
	clzeq	ip, A_0
	addeq	D_0, D_0, ip
	@ D_1 = clz B
	teq	B_1, #0
	clz	D_1, B_1
	clzeq	ip, B_0
	addeq	D_1, D_1, ip
	@ if clz B - clz A > 0
	subs	D_0, D_1, D_0
	bls	L_done_shift
	@ B <<= (clz B - clz A)
	subs	D_1, D_0, #32
	rsb	ip, D_0, #32
	movmi	B_1, B_1, lsl D_0
	orrmi	B_1, B_1, B_0, lsr ip
	movpl	B_1, B_0, lsl D_1
	mov	B_0, B_0, lsl D_0
	@ C = 1 << (clz B - clz A)
	movmi	C_1, C_1, lsl D_0
	orrmi	C_1, C_1, C_0, lsr ip
	movpl	C_1, C_0, lsl D_1
	mov	C_0, C_0, lsl D_0
L_done_shift:
	mov	D_0, #0
	mov	D_1, #0
	@ C: current bit; D: result
L_subtract:
	@ if A >= B
	cmp	A_1, B_1
	cmpeq	A_0, B_0
	bcc	L_update
	@ A -= B
	subs	A_0, A_0, B_0
	sbc	A_1, A_1, B_1
	@ D |= C
	orr	D_0, D_0, C_0
	orr	D_1, D_1, C_1
L_update:
	@ if A == 0: break
	orrs	ip, A_1, A_0
	beq	L_exit
	@ C >>= 1
	movs	C_1, C_1, lsr #1
	movs	C_0, C_0, rrx
	@ if C == 0: break
	orrs	ip, C_1, C_0
	beq	L_exit
	@ B >>= 1
	movs	B_1, B_1, lsr #1
	mov	B_0, B_0, rrx
	b	L_subtract
L_exit:
	@ Note: A, B & Q, R are aliases
	mov	R_0, A_0
	mov	R_1, A_1
	mov	Q_0, D_0
	mov	Q_1, D_1
	ldmfd	sp!, {r4, r5, r6, r7, pc}

L_div_32_32:
	@ Note:	A_0 &	r0 are aliases
	@	Q_1	r1
	mov	r1, B_0
	bl	__aeabi_uidivmod
	mov	R_0, r1
	mov	R_1, #0
	mov	Q_1, #0
	ldmfd	sp!, {r4, r5, r6, r7, pc}

L_pow2:
	@ Note: A, B and Q, R are aliases
	@ R = A & (B - 1)
	and	C_0, A_0, C_0
	and	C_1, A_1, C_1
	@ Q = A >> log2(B)
	@ Note: B must not be 0 here!
	clz	D_0, B_0
	add	D_1, D_0, #1
	rsbs	D_0, D_0, #31
	bpl	L_1
	clz	D_0, B_1
	rsb	D_0, D_0, #31
	mov	A_0, A_1, lsr D_0
	add	D_0, D_0, #32
L_1:
	movpl	A_0, A_0, lsr D_0
	orrpl	A_0, A_0, A_1, lsl D_1
	mov	A_1, A_1, lsr D_0
	@ Mov back C to R
	mov	R_0, C_0
	mov	R_1, C_1
	ldmfd	sp!, {r4, r5, r6, r7, pc}

L_div_by_0:
	bl	__div0
	@ As wrong as it could be
	mov	Q_0, #0
	mov	Q_1, #0
	mov	R_0, #0
	mov	R_1, #0
	ldmfd	sp!, {r4, r5, r6, r7, pc}