;*****************************************************************************
; AMD Generic Encapsulated Software Architecture
;
;  Workfile: cpcarmac.inc    $Revision:: 38483   $    $Date:: 2010-09-25 02:13:03 +0800 (Sat, 25 Sep 2010) $
;
; Description: Code to setup and break down cache-as-stack
;
;*****************************************************************************
; 
;  Copyright (c) 2011, Advanced Micro Devices, Inc.
;  All rights reserved.
;  
;  Redistribution and use in source and binary forms, with or without
;  modification, are permitted provided that the following conditions are met:
;      * Redistributions of source code must retain the above copyright
;        notice, this list of conditions and the following disclaimer.
;      * Redistributions in binary form must reproduce the above copyright
;        notice, this list of conditions and the following disclaimer in the
;        documentation and/or other materials provided with the distribution.
;      * Neither the name of Advanced Micro Devices, Inc. nor the names of 
;        its contributors may be used to endorse or promote products derived 
;        from this software without specific prior written permission.
;  
;  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
;  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
;  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
;  DISCLAIMED. IN NO EVENT SHALL ADVANCED MICRO DEVICES, INC. BE LIABLE FOR ANY
;  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
;  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
;  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
;  ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
;  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
;  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
;  
;*****************************************************************************

    .XLIST
    INCLUDE cpcar.inc
    .LIST
    .586P
    .mmx

;======================================================================
; AMD_ENABLE_STACK:  Setup a stack
;
;   In:
;       EBX  = Return address (preserved)
;
;   Out:
;       SS:ESP - Our new private stack location
;
;       EAX = AGESA_STATUS
;
;       ECX = Stack size in bytes
;
;   Requirements:
;       * This routine presently is limited to a max of 64 processor cores
;   Preserved:
;       ebx ebp
;   Destroyed:
;       eax, ecx, edx, edi, esi, ds, es, ss, esp
;       mmx0, mmx1
;
;   Description:
; Fixed MTRR address allocation to cores:
; The BSP gets 64K of stack, Core0 of each node gets 16K of stack, all other cores get 4K.
; There is a max of 1 BSP, 7 core0s and 56 other cores.
; Although each core has it's own cache storage, they share the address space. Each core must
; be assigned a private and unique address space for its stack. To support legacy systems,
; the stack needs to be within the legacy address space (1st 1Meg). Room must also be reserved
; for the other legacy elements (Interrupt vectors, BIOS ROM, video buffer, etc.)
;
; 80000h                                        40000h                                      00000h
;     +----------+----------+----------+----------+----------+----------+----------+----------+
; 64K |          |          |          |          |          |          |          |          |  64K  ea
;  ea +----------+----------+----------+----------+----------+----------+----------+----------+
;     |                             MTRR 0000_0250 MTRRfix64K_00000                           |
;     +----------+----------+----------+----------+----------+----------+----------+----------+
;     |  7 ,  6  |  5 ,  4  |  3 ,  2  |  1 ,  0  |     0    |          |          |          | <-node
;     |7..1,7..1 |7..1,7..1 |7..1,7..1 |7..1,7..1 |     0    |          |          |          | <-core
;     +----------+----------+----------+----------+----------+----------+----------+----------+
;
; C0000h                       B0000h                      A0000h                      90000h                      80000h
;     +------+------+------+------+------+------+------+------+------+------+------+------+------+------+------+------+
;16K  |      |      |      |      |      |      |      |      |      |      |      |      |      |      |      |      |
; ea  +------+------+------+------+------+------+------+------+------+------+------+------+------+------+------+------+
;     |              MTRR 0259 MTRRfix16K_A0000               |             MTRR 0258 MTRRfix16K_80000                |
;     +------+------+------+------+------+------+------+------+------+------+------+------+------+------+------+------+
;     | > Dis|play B|uffer |   <  |      |      |      |      |   7  |  6   |  5   |  4   |  3   |  2   |  1   |      | <-node
;     | >   T| e  m |p o r |a r y |  B u |f f e |r   A |r e a<|   0  |  0   |  0   |  0   |  0   |  0   |  0   |      | <-core
;     +------+------+------+------+------+------+------+------+------+------+------+------+------+------+------+------+
;
; E0000h                                            D0000h                                         C0000h
;     +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
; 4K  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  4K  ea
;  ea +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
;     |  026B MTRRfix4K_D8000 | 026A MTRRfix4K_D0000  | 0269 MTRRfix4K_C8000  | 0268 MTRRfix4K_C0000  |
;     +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
;     |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  | >| V| I| D| E| O|  |B |I |O |S |  |A |r |e |a<|
;     +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
;
; 100000h                                           F0000h                                          E0000h
;     +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
;     |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  4K  ea
;     +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
;     |  026F MTRRfix4K_F8000 | 026E MTRRfix4K_F0000  | 026D MTRRfix4K_E8000  | 026C MTRRfix4K_E0000  |
;     +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
;     | >|MA|IN| B|IO|S |RA|NG|E |  |  |  |  |  |  |< | >|EX|TE|ND|ED| B|IO|S |ZO|NE|  |  |  |  |  |< |
;     +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
;======================================================================
AMD_ENABLE_STACK MACRO
  local AmdEnableStackExit

;   Note that SS:ESP will be default stack.  Note that this stack
;   routine will not be used after memory has been initialized.  Because
;   of its limited lifetime, it will not conflict with typical PCI devices.
    movd    mm0, ebx                    ; Put return address in a safe place
    movd    mm1, ebp                    ; Save some other user registers

    ; get node id and core id of current executing core
    GET_NODE_ID_CORE_ID                 ; Sets ESI[15,8]= Node#; ESI[7,0]= core# (relative to node)
    ; Note: ESI[31:24] are used for flags:  Unrecognized Family,  Is_Primary core,  Stack already established

    ; If we detected an unknown processor family, return AGESA_FATAL.
    .if (esi & (1 SHL FLAG_UNKNOWN_FAMILY))
      mov eax, AGESA_FATAL
      jmp AmdEnableStackExit
    .endif

    ; determine if stack is already enabled. We are using the DefType MSR for this determination.
    ; It is =0 after reset; CAR setup sets it to enable the MTRRs
    mov     eax, cr0                    ; Is cache enabled? (CD or NW bit set)
    CR0_MASK    TEXTEQU %((1 SHL CR0_CD) OR (1 SHL CR0_NW))
    .if (!(eax & CR0_MASK))
        mov     ecx, AMD_MTRR_DEFTYPE   ; MSR:0000_02FF
        _RDMSR                          ; Are either of the default types enabled? (MTRR_DEF_TYPE_EN + MTRR_DEF_TYPE_FIX_EN)
        MSR_MASK    TEXTEQU %((1 SHL MTRR_DEF_TYPE_EN)+(1 SHL MTRR_DEF_TYPE_FIX_EN))
        .if (eax & MSR_MASK)
            bts     esi, FLAG_STACK_REENTRY     ; indicate stack has already been initialized
        .endif
    .endif

    ; Set node to map the first 16MB to node 0; 0000_0000 to 00FF_FFFF as DRAM
    mov     ebx, esi                    ; Get my Node/Core info
    xor     bl, bl
    shl     bh, 3                       ; Isolate my node#, match alignment for PCI Dev#
    mov     eax, 8000C144h              ; D18F1x44:DRAM Base/Limit; N is Base, N+4 is Limit
    add     ah, bh
    mov     ebx, eax                    ; Save PCI address for Base/Limit pair

    mov     dx, 0CF8h
    out     dx, eax
    add     dx, 4
    xor     eax, eax                    ; Least Significant bit is AD24 so 0 sets mask of 00FF_FFFF (16MB)
    out     dx, eax                     ; DRAM Limit = node0, no interleave

    mov     eax, ebx
    sub     eax, 4                      ; Now point to the Base register
    mov     dx, 0CF8h
    out     dx, eax
    add     dx, 4
    mov     eax, 00000003h              ; Set the read and write enable bits
    out     dx, eax                     ; DRAM Base = 0x0000, R/W

    AMD_ENABLE_STACK_FAMILY_HOOK

    ; Init CPU MSRs for our init routines
    mov     ecx, MTRR_SYS_CFG           ; SYS_CFG
    _RDMSR
    bts     eax, MTRR_FIX_DRAM_MOD_EN   ; Turn on modification enable bit
    _WRMSR

    mov     eax, esi
    bt      eax, FLAG_STACK_REENTRY     ; Is this a 2nd entry?
    .if (!carry?)                       ;   On a re-entry, do not clear MTRRs or reset TOM; just reset the stack SS:ESP
        bt      eax, FLAG_IS_PRIMARY    ;   Is this core the primary in a compute unit?
        .if (carry?)                    ;     Families using shared groups do not need to clear the MTRRs since that is done at power-on reset
            ;  Note: Relying on MSRs to be cleared to 0's at reset for families w/shared cores
            ; Clear all variable and Fixed MTRRs for non-shared cores
            mov     ecx, AMD_MTRR_VARIABLE_BASE0
            xor     eax, eax
            xor     edx, edx
            .while (cl != 10h)                  ; Variable MTRRphysBase[n] and MTRRphysMask[n]
                _WRMSR
                inc     cl
            .endw
            mov     cx, AMD_MTRR_FIX64k_00000   ; MSR:0000_0250
            _WRMSR
            mov     cx, AMD_MTRR_FIX16k_80000   ; MSR:0000_0258
            _WRMSR
            mov     cx, AMD_MTRR_FIX16k_A0000   ; MSR:0000_0259
            _WRMSR
            mov     cx, AMD_MTRR_FIX4k_C0000    ; Fixed 4Ks: MTRRfix4K_C0000 to MTRRfix4K_F8000
            .while (cl != 70h)
                _WRMSR
                inc     cl
            .endw

            ; Set TOP_MEM (C001_001A) for non-shared cores to 16M. This will be increased at heap init.
            ;  - not strictly needed since the FixedMTRRs take presedence.
            mov     eax, (16 * 1024 * 1024)
            mov     ecx, TOP_MEM                ; MSR:C001_001A
            _WRMSR
        .endif                          ;   End Is_Primary
    .endif                              ; End Stack_ReEntry

    ; Clear IORRs (C001_0016-19) and TOM2(C001_001D) for all cores
    xor     eax, eax
    xor     edx, edx
    mov     ecx, IORR_BASE              ; MSR:C001_0016 - 0019
    .while (cl != 1Ah)
        _WRMSR
        inc     cl
    .endw
    mov     ecx, TOP_MEM2               ; MSR:C001_001D
    _WRMSR

    ; setup MTTRs for stacks
    ;   A speculative read can be generated by a speculative fetch mis-aligned in a code zone
    ;    or due to a data zone being interpreted as code. When a speculative read occurs outside a
    ;    controlled region (intentionally used by software), it could cause an unwanted cache eviction.
    ;   To prevent speculative reads from causing an eviction, the unused cache ranges are set
    ;    to UC type. Only the actively used regions (stack, heap) are reflected in the MTRRs.
    ;    Note: some core stack regions will share an MTRR since the control granularity is much
    ;    larger than the allocated stack zone. The allocation algorithm must account for this 'extra'
    ;    space covered by the MTRR when parseling out cache space for the various uses. In some cases
    ;    this could reduce the amount of EXE cache available to a core. see cpuCacheInit.c
    ;
    ; Outcome of this block is that:   (Note the MTRR map at the top of the file)
    ;   ebp - start address of stack block
    ;   ebx - [31:16] - MTRR MSR address
    ;       - [15:8]  - slot# in MTRR register
    ;       - [7:0]   - block size in #4K blocks
    ; review: ESI[31:24]=Flags; SI[15,8]= Node#; SI[7,0]= core# (relative to node)
    ;

    mov     ax, si                      ; Load node, core
    .if (al == 0)                       ; Is a core 0?
        .if (ah == 0)                   ; Is Node 0? (BSP)
            ; Is BSP, assign a 64K stack
            mov     ebx, ((AMD_MTRR_FIX64k_00000 SHL 16) + (3 SHL 8) + (BSP_STACK_SIZE  / 1000h))
            mov     ebp, BSP_STACK_BASE_ADDR
        .else   ; node 1 to 7, core0
            ; Is a Core0 of secondary node, assign 16K stacks
            mov     bx, AMD_MTRR_FIX16k_80000
            shl     ebx, 16             ;
            mov     bh, ah              ; Node# is used as slot#
            mov     bl, (CORE0_STACK_SIZE / 1000h)
            mov     al, ah              ; Base = (Node# * Size);
            mul     bl                  ;
            movzx   eax, ax             ;
            shl     eax, 12             ; Expand back to full byte count (* 4K)
            add     eax, CORE0_STACK_BASE_ADDR
            mov     ebp, eax
        .endif
    .else    ;core 1 thru core 7
        ; Is core 1-7 of any node, assign 4K stacks
        mov     al, 8                   ; CoreIndex = ( (Node# * 8) ...
        mul     ah                      ;
        mov     bx, si                  ;
        add     al, bl                  ;         ...  + Core#);

        mov     bx, AMD_MTRR_FIX64k_00000
        shl     ebx, 16                 ;
        mov     bh, al                  ; Slot# = (CoreIndex / 16) + 4;
        shr     bh, 4                   ;
        add     bh, 4                   ;
        mov     bl, (CORE1_STACK_SIZE / 1000h)

        mul     bl                      ; Base = ( (CoreIndex * Size) ...
        movzx   eax, ax                 ;
        shl     eax, 12                 ; Expand back to full byte count (* 4K)
        add     eax, CORE1_STACK_BASE_ADDR ;     ...   + Base_Addr);
        mov     ebp, eax
    .endif

    ; Now set the MTRR. Add this to already existing settings (don't clear any MTRR)
    mov     edi, WB_DRAM_TYPE           ; Load Cache type in 1st slot
    mov     cl, bh                      ; ShiftCount =  ((slot#   ...
    and     cl, 03h                     ;   ...  % 4)             ...
    shl     cl, 3                       ;   ...  * 8);
    shl     edi, cl                     ; Cache type is now in correct position
    ror     ebx, 16                     ; Get the MTRR address
    movzx   ecx, bx                     ;
    rol     ebx, 16                     ; Put slot# & size back in BX
    _RDMSR                              ; Read-modify-write the MSR
    .if (bh < 4)                        ; Is value in lower or upper half of MSR?
        or      eax, edi                ;
    .else                               ;
        or      edx, edi                ;
    .endif                              ;
    _WRMSR                              ;

    ; Enable MTRR defaults as UC type
    mov     ecx, AMD_MTRR_DEFTYPE       ; MSR:0000_02FF
    _RDMSR                              ; Read-modify-write the MSR
    bts     eax, MTRR_DEF_TYPE_EN       ; MtrrDefTypeEn
    bts     eax, MTRR_DEF_TYPE_FIX_EN   ; MtrrDefTypeFixEn
    _WRMSR

    ; Close the modification window on the Fixed MTRRs
    mov     ecx, MTRR_SYS_CFG           ; MSR:0C001_0010
    _RDMSR
    bts     eax, MTRR_FIX_DRAM_EN       ; MtrrFixDramEn
    bts     eax, MTRR_VAR_DRAM_EN       ; variable MTRR enable bit
    btr     eax, MTRR_FIX_DRAM_MOD_EN   ; Turn off modification enable bit
    _WRMSR

    ; Enable caching in CR0
    mov     eax, CR0                    ; Enable WT/WB cache
    btr     eax, CR0_PG                 ; Make sure paging is disabled
    btr     eax, CR0_CD                 ; Clear CR0 NW and CD
    btr     eax, CR0_NW
    mov     CR0, eax

    ; Use the Stack Base & size to calculate SS and ESP values
    ; review:
    ;       esi[31:24]=Flags; esi[15,8]= Node#; esi[7,0]= core# (relative to node)
    ;       ebp - start address of stack block
    ;       ebx - [31:16] - MTRR MSR address
    ;           - [15:8]  - slot# in MTRR register
    ;           - [7:0]   - block size in #4K blocks
    ;
    mov     esp, ebp                    ; Initialize the stack pointer
    mov     edi, esp                    ; Copy the stack start to edi
    movzx   bx, bl
    movzx   ebx, bx                     ; Clear upper ebx, don't need MSR addr anymore
    shl     ebx, 12                     ; Make size full byte count (* 4K)
    add     esp, ebx                    ; Set the Stack Pointer as full linear address
    sub     esp, 4
    ;
    ; review:
    ;       esi[31:24]=Flags; esi[15,8]= Node#; esi[7,0]= core# (relative to node)
    ;       edi - 32b start address of stack block
    ;       ebx - size of stack block
    ;       esp - 32b linear stack pointer
    ;

    ; Determine mode for SS base;
    mov     ecx, CR0                    ; Check for 32-bit protect mode
    bt      ecx, CR0_PE                 ;
    .if (!carry?)                       ; PE=0 means real mode
        mov     cx, cs                  ;
        .if (cx >= 0D000h)              ; If CS >= D000, it's a real mode segment. PM selector would be 08-> 1000
            ; alter SS:ESP for 16b Real Mode:
            mov     eax, edi            ;
            shr     eax, 4              ;   Create a Real Mode segment for ss, ds, es
            mov     ss, ax              ;
            mov     ds, ax              ;
            mov     es, ax              ;
            shl     eax, 4              ;
            sub     edi, eax            ;   Adjust the clearing pointer for Seg:Offset mode
            mov     esp, ebx            ;   Make SP an offset from SS
            sub     esp, 4              ;
        .endif                          ; endif
    ; else
    ;   Default is to use Protected 32b Mode
    .endif
    ;
    ; Clear The Stack
    ;   Now that we have set the location and the MTRRs, initialize the cache by
    ;   reading then writing to zero all of the stack area.
    ; review:
    ;       ss  - Stack base
    ;       esp - stack pointer
    ;       ebx - size of stack block
    ;       esi[31:24]=Flags; esi[15,8]= Node#; esi[7,0]= core# (relative to node)
    ;       edi -  address of start of stack block
    ;
    shr     ebx, 2                      ;
    mov     cx, bx                      ; set cx for size count of DWORDS
    ; Check our flags - Don't clear an existing stack
    .if ( !(esi & (1 SHL FLAG_STACK_REENTRY)) )
        cld
        mov     esi, edi
        rep     lods DWORD PTR [esi]    ; Pre-load the range
        xor     eax, eax
        mov     cx, bx
        mov     esi, edi                ; Preserve base for push on stack
        rep     stos DWORD PTR [edi]    ; Clear the range
        mov     DWORD PTR [esp], 0ABCDDCBAh ; Put marker in top stack dword
        shl     ebx, 2                  ; Put stack size and base
        push    ebx                     ;  in top of stack
        push    esi

        mov     ecx, ebx                ; Return size of stack in bytes
        mov     eax, AGESA_SUCCESS      ; eax = AGESA_SUCCESS : no error return code
    .else
        movzx   ecx, cx
        shl     ecx, 2                  ; Return size of stack, in bytes
        mov     eax, esi
        shr     eax, 24                 ; Keep the flags as part of the error report
        or      eax, 40000000h          ; eax = AGESA_WARNING (Stack has already been set up)
    .endif
AmdEnableStackExit:
    movd        ebx, mm0                ; Restore return address
    movd        ebp, mm1
ENDM

;======================================================================
; AMD_DISABLE_STACK:  Destroy the stack inside the cache. This routine
;                     should only be executed on the BSP
;
;   In:
;       none
;
;   Out:
;       EAX = AGESA_SUCCESS
;
;   Preserved:
;       ebx
;   Destroyed:
;       eax, ecx, edx, esp
;======================================================================
AMD_DISABLE_STACK MACRO

    mov     esp, ebx                    ; Save return address

    ; get node/core/flags of current executing core
    GET_NODE_ID_CORE_ID                 ; Sets ESI[15,8]= Node#; ESI[7,0]= core# (relative to node)

    ; Turn on modification enable bit
    mov     ecx, MTRR_SYS_CFG           ; MSR:C001_0010
    _RDMSR
    bts     eax, MTRR_FIX_DRAM_MOD_EN   ; Enable modifications
    _WRMSR

    ; Set lower 640K MTRRs for Write-Back memory caching
    mov     ecx, AMD_MTRR_FIX64k_00000
    mov     eax, 1E1E1E1Eh
    mov     edx, eax
    _WRMSR                              ; 0 - 512K = WB Mem
    mov     ecx, AMD_MTRR_FIX16k_80000
    _WRMSR                              ; 512K - 640K = WB Mem

    ; Turn off modification enable bit
    mov     ecx, MTRR_SYS_CFG           ; MSR:C001_0010
    _RDMSR
    btr     eax, MTRR_FIX_DRAM_MOD_EN   ; Disable modification
    _WRMSR

    AMD_DISABLE_STACK_FAMILY_HOOK       ; Re-Enable 'normal' cache operations

    mov     ebx, esp                    ; restore return address (ebx)
    xor     eax, eax

ENDM