src/arch/arm/armv7/cpu.S


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148

/*
 * Optimized assembly for low-level CPU operations on ARMv7 processors.
 *
 * Cache flushing code based off sys/arch/arm/arm/cpufunc_asm_armv7.S in NetBSD
 *
 * Copyright (c) 2010 Per Odlund <per.odlund@armagedon.se>
 * Copyright (c) 2014 Google Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. The name of the author may not be used to endorse or promote products
 *    derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#include <arch/asm.h>

/*
 * Dcache invalidations by set/way work by passing a [way:sbz:set:sbz:level:0]
 * bitfield in a register to the appropriate MCR instruction. This algorithm
 * works by initializing a bitfield with the highest-numbered set and way, and
 * generating a "set decrement" and a "way decrement". The former just contains
 * the LSB of the set field, but the latter contains the LSB of the way field
 * minus the highest valid set field... such that when you subtract it from a
 * [way:0:level] field you end up with a [way - 1:highest_set:level] field
 * through the magic of double subtraction. It's quite ingenius, really.
 * Takes care to only use r0-r3 and ip so it's pefectly ABI-compatible without
 * needing to write to memory.
 */

.macro	dcache_apply_all crm
	dsb
	mov	r3, #-2			@ initialize level so that we start at 0

1:	@next_level
	add	r3, r3, #2		@ increment level

	mrc	p15, 1, r0, c0, c0, 1	@ read CLIDR
	and	ip, r0, #0x07000000	@ narrow to LoC
	lsr	ip, ip, #23		@ left align LoC (low 4 bits)
	cmp	r3, ip			@ compare
	bge	3f @done		@ else fall through (r0 == CLIDR)

	add	r2, r3, r3, lsr #1	@ r2 = (level << 1) * 3 / 2
	mov	r1, r0, lsr r2		@ r1 = cache type
	and	r1, r1, #7
	cmp	r1, #2			@ is it data or i&d?
	blt	1b @next_level		@ nope, skip level

	mcr	p15, 2, r3, c0, c0, 0	@ select cache level
	isb
	mrc	p15, 1, r0, c0, c0, 0	@ read CCSIDR

	ubfx	ip, r0, #0, #3		@ get linesize from CCSIDR
	add	ip, ip, #4		@ apply bias
	ubfx	r2, r0, #13, #15	@ get numsets - 1 from CCSIDR
	lsl	r2, r2, ip		@ shift to set position
	orr	r3, r3, r2		@ merge set into way/set/level
	mov	r1, #1
	lsl	r1, r1, ip		@ r1 = set decr

	ubfx	ip, r0, #3, #10		@ get numways - 1 from [to be discarded] CCSIDR
	clz	r2, ip			@ number of bits to MSB of way
	lsl	ip, ip, r2		@ shift by that into way position
	mov	r0, #1
	lsl	r2, r0, r2		@ r2 now contains the way decr
	mov	r0, r3 			@ get sets/level (no way yet)
	orr	r3, r3, ip		@ merge way into way/set/level
	bfc	r0, #0, #4		@ clear low 4 bits (level) to get numset - 1
	sub	r2, r2, r0		@ subtract from way decr

	/* r3 = ways/sets/level, r2 = way decr, r1 = set decr, r0 and ip are free */
2:	mcr	p15, 0, r3, c7, \crm, 2	@ writeback and/or invalidate line
	cmp	r3, #15			@ are we done with this level (way/set == 0)
	bls	1b @next_level		@ yes, go to next level
	lsr	r0, r3, #4		@ clear level bits leaving only way/set bits
	lsls	r0, r0, #14		@ clear way bits leaving only set bits
	subne	r3, r3, r1		@ non-zero?, decrement set #
	subeq	r3, r3, r2		@ zero?, decrement way # and restore set count
	b	2b

3:	@done
	mov	r0, #0			@ default back to cache level 0
	mcr	p15, 2, r0, c0, c0, 0	@ select cache level
	dsb
	isb
	bx	lr
.endm

/*
 * Bring an ARM processor we just gained control of (e.g. from IROM) into a
 * known state regarding caches/SCTLR. Completely cleans and invalidates
 * icache/dcache, disables MMU and dcache (if active), and enables unaligned
 * accesses, icache and branch prediction (if inactive). Clobbers r4 and r5.
 */
ENTRY(arm_init_caches)
	/* r4: SCTLR, return address: r5 (stay valid for the whole function) */
	mov	r5, lr
	mrc	p15, 0, r4, c1, c0, 0

	/* Activate ICache (12) and Branch Prediction (11) already for speed */
	orr	r4, # (1 << 11) | (1 << 12)
	mcr	p15, 0, r4, c1, c0, 0

	/* Flush and invalidate dcache in ascending order */
	bl	dcache_invalidate_all

	/* Deactivate MMU (0), Alignment Check (1) and DCache (2) */
	and	r4, # ~(1 << 0) & ~(1 << 1) & ~(1 << 2)
	mcr	p15, 0, r4, c1, c0, 0

	/* Invalidate icache and TLB for good measure */
	mcr	p15, 0, r0, c7, c5, 0
	mcr	p15, 0, r0, c8, c7, 0
	dsb
	isb

	bx	r5
ENDPROC(arm_init_caches)

ENTRY(dcache_invalidate_all)
	dcache_apply_all crm=c6
ENDPROC(dcache_invalidate_all)

ENTRY(dcache_clean_all)
	dcache_apply_all crm=c10
ENDPROC(dcache_clean_all)

ENTRY(dcache_clean_invalidate_all)
	dcache_apply_all crm=c14
ENDPROC(dcache_clean_invalidate_all)