1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
|
;*****************************************************************************
; AMD Generic Encapsulated Software Architecture
;
; Workfile: cpcarmac.inc $Revision:: 44323 $ $Date:: 2010-12-22 01:24:58 -0700 (Wed, 22 Dec 2010) $
;
; Description: Code to setup and break down cache-as-stack
;
;*****************************************************************************
;
; Copyright (c) 2011, Advanced Micro Devices, Inc.
; All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
; * Redistributions in binary form must reproduce the above copyright
; notice, this list of conditions and the following disclaimer in the
; documentation and/or other materials provided with the distribution.
; * Neither the name of Advanced Micro Devices, Inc. nor the names of
; its contributors may be used to endorse or promote products derived
; from this software without specific prior written permission.
;
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
; ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
; DISCLAIMED. IN NO EVENT SHALL ADVANCED MICRO DEVICES, INC. BE LIABLE FOR ANY
; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
; (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
; ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
;
;*****************************************************************************
.XLIST
INCLUDE cpcar.inc
.LIST
.586P
;======================================================================
; AMD_ENABLE_STACK: Setup a stack
;
; In:
; none
;
; Out:
; SS:ESP - Our new private stack location
; 4000:3FFC - for BSP (16K Stack)
; 4000:7FFC - for core0 of node1 (16K Stack)
; 4000:BFFC - for core0 of node2 (16K Stack)
; 4000:FFFC - for core0 of node3 (16K Stack)
; 5000:3FFC - for core0 of node4 (16K Stack)
; 5000:7FFC - for core0 of node5 (16K Stack)
; 5000:BFFC - for core0 of node6 (16K Stack)
; 5000:FFFC - for core0 of node7 (16K Stack)
;
; 6000:1FFC - for core1 node0 (4k stack)
; 6000:2FFC - for core2 node0 (4k stack)
; ...
; 9000:8FFC - for core7 of node7 (4k stack) ......... max of 64 cores in system
;
; EAX = AGESA_STATUS
;
; ECX = Stack size in bytes
;
; Requirements:
; * This routine presently is limited to a max of 64 processors
;
; Preserved:
; ebx
; Destroyed:
; eax, ecx, edx, edi, esi, ds, es
;
;======================================================================
AMD_ENABLE_STACK MACRO
local SetupStack
local SetupDramMap
local get_SS_ESP
local r16bmode
local p32mode
local init_stack
; Note that SS:ESP will be default stack. Note that this stack
; routine will not be used after memory has been initialized. Because
; of its limited lifetime, it will not conflict with typical PCI devices.
mov esp, ebx ; put return address in a safe place
; get node id and core id of current executing core
mov si, -1
GET_NODE_ID_CORE_ID
movzx edi, di
; determine if stack is already enabled.
mov eax, cr0
test eax, 60000000h
jnz SetupStack
mov ecx, AMD_MTRR_DEFTYPE
_RDMSR
test ah, 0Ch
jz SetupStack
or edi, 0FFFF0000h ; indicate stack has already been initialized
jmp get_SS_ESP
SetupStack:
; Enable routing tables on BSP (just in case the HT init code has not yet enabled them)
mov eax, 8000C06Ch
mov dx, 0CF8h
out dx, eax
add dx, 4
in eax, dx
btr eax, 0
out dx, eax
; Setup temporary DRAM map for CAS on all nodes
mov eax, 8000C060h ; Get NodeCnt from BSP
mov dx, 0CF8h
out dx, eax
add dx, 4
in al, dx
shr ax, 4
and al, 7
mov cl, al
mov ebx, 8000C144h
SetupDramMap:
mov eax, ebx ; Set 0000_0000 to 00FF_FFFF as DRAM
mov dx, 0CF8h
out dx, eax
add dx, 4
mov eax, 0
out dx, eax
mov eax, ebx
sub eax, 4
mov dx, 0CF8h
out dx, eax
add dx, 4
mov eax, 3
out dx, eax
add bh, 8
dec cl
jns SetupDramMap
; Disable the self modifying code check buffer and Disable hardware prefetches
mov ecx, 0C0011022h
_RDMSR
bts eax, DC_DIS_SPEC_TLB_RLD ; turn on Disable speculative TLB reloads bit
bts eax, DIS_CLR_WBTOL2_SMC_HIT ; turn on Disable the self modifying code check buffer bit
bts eax, DIS_HW_PF ; turn on Disable hardware prefetches bit
_WRMSR
dec cx ; MSRC001_1021 Instruction Cache Configuration Register (IC_CFG)
_RDMSR
bts eax, IC_DIS_SPEC_TLB_RLD ; turn on Disable speculative TLB reloads bit
_WRMSR
AMD_ENABLE_STACK_FAMILY_HOOK ; Disable L3 cache to accept clear lines
; Init CPU MSRs for our init routines
mov ecx, MTRR_SYS_CFG ; SYS_CFG
_RDMSR
and eax, 0FFE3FFFFh ; turn off MTRR enable bits
bts eax, MtrrFixDramModEn ; turn on modification enable bit
_WRMSR
; clear all variable and Fixed MTRRs
mov ecx, 200h
xor eax, eax
xor edx, edx
.while (cl != 10h) ; MTRRphysBasen and MTRRphysMaskn
_WRMSR
inc cl
.endw
mov cl, 50h ; MTRRfix64K_00000
_WRMSR
mov cl, 58h ; MTRRfix16K_80000
_WRMSR
mov cl, 59h ; MTRRfix16K_A0000
_WRMSR
mov cl, 68h ; MTRRfix4K_C0000 to MTRRfix4K_F8000
.while (cl != 70h)
_WRMSR
inc cl
.endw
; setup MTTR for stacks
mov ebx, WB_DRAM_TYPE
.if (di == 0) ;core 0
.if (si > 3) ; node 0 to 3 located at 40000h, node 4 to 7 located at 50000h
shl ebx, 8
.endif
mov ecx, AMD_MTRR_FIX64k_00000
_RDMSR
or edx, ebx
_WRMSR
.else ;core 1 to core 7 start at 60000h
.if (si < 4) ; node 0 to 3 using AMD_MTRR_FIX64K_6000 and AMD_MTRR_FIX64K_7000 MTTR
shl ebx, 16
.if (si > 1)
shl ebx, 8
.endif
mov ecx, AMD_MTRR_FIX64k_00000
_RDMSR
or edx, ebx
_WRMSR
.else ; node 4 to 7 uses AMD_MTRR_FIX16K_80000 and AMD_MTRR_FIX16K_9000 MTTR
mov ecx, AMD_MTRR_FIX16k_80000
_RDMSR
.if (si < 6) ; node 4 and node 5
.if (si == 4) ; node 4
.if (di >= 4)
shl ebx, 8
.endif
.else ; node 5
shl ebx, 16
.if (di >= 4)
shl ebx, 8
.endif
.endif
or eax, ebx
_WRMSR
.else ; node 6 and node 7
.if (si == 6) ; node 6
.if (di >= 4)
shl ebx, 8
.endif
.else ; node 7
shl ebx, 16
.if (di >= 4)
shl ebx, 8
.endif
.endif
or edx, ebx
_WRMSR
.endif
.endif
.endif
; Clear IORRs, TOP_MEM and TOM2
xor eax, eax
xor edx, edx
mov ecx, 0C0010016h ;IORRBase0
.while (cl != 1Bh)
_WRMSR
inc cl
.endw
mov cl, 1Dh
_WRMSR
; Enable MTRRs
mov ecx, 02FFh ; MTRRdefType
mov ah, 0Ch ; MtrrDefTypeEn and MtrrDefTypeFixEn
_WRMSR
mov ecx, MTRR_SYS_CFG ; SYS_CFG
_RDMSR
bts eax, MtrrFixDramEn ; MtrrFixDramEn
btr eax, MtrrFixDramModEn ; turn off modification enable bit
_WRMSR
; Enable caching in CR0
mov eax, CR0 ; Enable WT/WB cache
btr eax, 31 ; make sure paging is disabled
btr eax, 30 ; Clear CR0 NW and CD
btr eax, 29
mov CR0, eax
get_SS_ESP:
; allocate space for stacks
xor cx, cx
xor edx, edx
.if (di == 0) ;core 0
mov eax, CORE0_STACK_BASE_ADDR
.while (cx <= si)
add eax, 4000h
inc cx
.endw
mov edx, eax
sub eax, 4000h
and eax, 0F0000h
sub edx, 4
and edx, 0FFFFh
mov bx, CORE0_STACK_SIZE / 4
.else ;core 1 to core 7 start at 60000h
mov eax, CORE1_STACK_BASE_ADDR ; core 1 stack starts at 60000h
.while (cx <= si)
add eax, 8000h ; 32k for each node
inc cx
.endw
sub eax, 8000h
mov dx, ax
and eax, 0F0000h
xor cx, cx
.while (cx <= di)
add edx, 1000h ; 4k for APs
inc cx
.endw
sub edx, 4
mov bx, CORE1_STACK_SIZE / 4
.endif
; Allocate stack and set ESP
mov ecx, CR0 ; check for 32-bit protect mode
test ecx, 1 ; PE bit
jz r16bmode ; PE=0, real mode
mov cx, cs ; PE=1
cmp cx, 0D000h ; check for CS
jb p32mode ; if CS < D000, it is a selector instead of a segment
r16bmode:
shr eax, 4 ; ax = ss, ds, es
mov ss, ax
mov ds, ax
mov es, ax
jmp init_stack
p32mode:
add edx, eax ; linear address of the stack
init_stack:
.if ( !(edi & 0FFFF0000h))
std
xor ecx, ecx
mov cx, bx
mov esi, edx
rep lods DWORD PTR [esi]
xor eax, eax
mov cx, bx
mov edi, edx
rep stos DWORD PTR [edi]
cld
xor eax, eax ; eax = 0 : no error
.else
mov eax, 40000001h ; eax = AGESA_WARNING (Stack has already been set up)
.endif
mov cx, bx ; ecx = size in dwords
shl ecx, 2 ; ecx = size in bytes
mov ebx, esp
mov esp, edx
ENDM
;======================================================================
; AMD_DISABLE_STACK: Destroy the stack inside the cache. This routine
; should only be executed on the BSP
;
; In:
; none
;
; Out:
; EAX = AGESA_STATUS
;
; Preserved:
; ebx
; Destroyed:
; eax, ecx, edx, esp
;======================================================================
AMD_DISABLE_STACK MACRO
mov esp, ebx ; save return address
; Turn on modification enable bit
mov ecx, MTRR_SYS_CFG
_RDMSR
bts eax, MtrrFixDramModEn ; Enable
_WRMSR
; Disable MTRRs and turn on modification enable bit
mov ecx,AMD_MTRR_FIX64k_00000
mov eax,1E1E1E1Eh
mov edx,eax
_WRMSR ; 0 - 512K = WB Mem
mov cl,58h
_WRMSR ; 512K - 640K = WB Mem
; Turn off modification enable bit
mov ecx, MTRR_SYS_CFG
_RDMSR
btr eax, MtrrFixDramModEn ; Disable
_WRMSR
; Enable the self modifying code check buffer and Enable hardware prefetches
mov ecx, 0C0011022h
_RDMSR
btr eax, DC_DIS_SPEC_TLB_RLD ; Disable speculative TLB reloads bit
btr eax, DIS_CLR_WBTOL2_SMC_HIT ; Disable the self modifying code check buffer bit
btr eax, DIS_HW_PF ; Disable hardware prefetches bit
_WRMSR
dec cx ; MSRC001_1021 Instruction Cache Configuration Register (IC_CFG)
_RDMSR
btr eax, IC_DIS_SPEC_TLB_RLD ; turn on Disable speculative TLB reloads bit
_WRMSR
AMD_DISABLE_STACK_FAMILY_HOOK ; Re-Enable L3 cache to accept clear lines
mov ebx, esp
xor eax, eax
ENDM
|