diff options
Diffstat (limited to 'src/northbridge/amd/amdmct')
23 files changed, 10943 insertions, 0 deletions
diff --git a/src/northbridge/amd/amdmct/amddefs.h b/src/northbridge/amd/amdmct/amddefs.h new file mode 100644 index 0000000000..90ab102f73 --- /dev/null +++ b/src/northbridge/amd/amdmct/amddefs.h @@ -0,0 +1,69 @@ +/* + * This file is part of the LinuxBIOS project. + * + * Copyright (C) 2007 Advanced Micro Devices, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* Public Revisions - USE THESE VERSIONS TO MAKE COMPARE WITH CPULOGICALID RETURN VALUE*/ +#define AMD_SAFEMODE 0x80000000 /* Unknown future revision - SAFE MODE */ +#define AMD_NPT_F0 0x00000001 /* F0 stepping */ +#define AMD_NPT_F1 0x00000002 /* F1 stepping */ +#define AMD_NPT_F2C 0x00000004 +#define AMD_NPT_F2D 0x00000008 +#define AMD_NPT_F2E 0x00000010 /* F2 stepping E */ +#define AMD_NPT_F2G 0x00000020 /* F2 stepping G */ +#define AMD_NPT_F2J 0x00000040 +#define AMD_NPT_F2K 0x00000080 +#define AMD_NPT_F3L 0x00000100 /* F3 Stepping */ +#define AMD_NPT_G0A 0x00000200 /* G0 stepping */ +#define AMD_NPT_G1B 0x00000400 /* G1 stepping */ +#define AMD_DR_A0A 0x00010000 /* Barcelona A0 */ +#define AMD_DR_A1B 0x00020000 /* Barcelona A1 */ +#define AMD_DR_A2 0x00040000 /* Barcelona A2 */ +#define AMD_DR_B0 0x00080000 /* Barcelona B0 */ +#define AMD_DR_B1 0x00100000 /* Barcelona B1 */ +#define AMD_DR_B2 0x00200000 /* Barcelona B2 */ +#define AMD_DR_BA 0x00400000 /* Barcelona BA */ + +/* + Groups - Create as many as you wish, from the above public values +*/ +#define AMD_NPT_F2 (AMD_NPT_F2C + AMD_NPT_F2D + AMD_NPT_F2E + AMD_NPT_F2G + AMD_NPT_F2J + AMD_NPT_F2K) +#define AMD_NPT_F3 (AMD_NPT_F3L) +#define AMD_NPT_Fx (AMD_NPT_F0 + AMD_NPT_F1 + AMD_NPT_F2 + AMD_NPT_F3) +#define AMD_NPT_Gx (AMD_NPT_G0A + AMD_NPT_G1B) +#define AMD_NPT_ALL (AMD_NPT_Fx + AMD_NPT_Gx) +#define AMD_DR_Ax (AMD_DR_A0A + AMD_DR_A1B + AMD_DR_A2) +#define AMD_FINEDELAY (AMD_NPT_F0 + AMD_NPT_F1 + AMD_NPT_F2) +#define AMD_GT_F0 (AMD_NPT_ALL AND NOT AMD_NPT_F0) + + +#define CPUID_EXT_PM 0x80000007 + +#define CPUID_MODEL 1 + + +#define HWCR 0xC0010015 + + +#define FidVidStatus 0xC0010042 + + +#define FS_Base 0xC0000100 + + +#define BU_CFG 0xC0011023 +#define BU_CFG2 0xC001102A diff --git a/src/northbridge/amd/amdmct/mct/mct.h b/src/northbridge/amd/amdmct/mct/mct.h new file mode 100644 index 0000000000..2ddd5da7bc --- /dev/null +++ b/src/northbridge/amd/amdmct/mct/mct.h @@ -0,0 +1,552 @@ +/* + * This file is part of the LinuxBIOS project. + * + * Copyright (C) 2007 Advanced Micro Devices, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef MCT_H +#define MCT_H +/*=========================================================================== + CPU - K8/FAM10 +===========================================================================*/ +#define PT_L1 0 /* CPU Package Type*/ +#define PT_M2 1 +#define PT_S1 2 + +#define J_MIN 0 /* j loop constraint. 1=CL 2.0 T*/ +#define J_MAX 4 /* j loop constraint. 4=CL 6.0 T*/ +#define K_MIN 1 /* k loop constraint. 1=200 Mhz*/ +#define K_MAX 4 /* k loop constraint. 9=400 Mhz*/ +#define CL_DEF 2 /* Default value for failsafe operation. 2=CL 4.0 T*/ +#define T_DEF 1 /* Default value for failsafe operation. 1=5ns (cycle time)*/ + +#define BSCRate 1 /* reg bit field=rate of dram scrubber for ecc*/ + /* memory initialization (ecc and check-bits).*/ + /* 1=40 ns/64 bytes.*/ +#define FirstPass 1 /* First pass through RcvEn training*/ +#define SecondPass 2 /* Second pass through Rcven training*/ + +#define RCVREN_MARGIN 6 /* number of DLL taps to delay beyond first passing position*/ +#define MAXASYNCLATCTL_3 60 /* Max Async Latency Control value (This value will be divided by 20)*/ +#define DQS_FAIL 1 +#define DQS_PASS 0 +#define DQS_WRITEDIR 0 +#define DQS_READDIR 1 +#define MIN_DQS_WNDW 3 +#define secPassOffset 6 + +#define PA_HOST (((24 << 3)+0) << 8) /* Node 0 Host Bus function PCI Address bits [15:0] */ +#define PA_MAP (((24 << 3)+1) << 8) /* Node 0 MAP function PCI Address bits [15:0] */ +#define PA_DCT (((24 << 3)+2) << 8) /* Node 0 DCT function PCI Address bits [15:0] */ +#define PA_DCTADDL (((00 << 3)+2) << 8) /* Node x DCT function, Additional Registers PCI Address bits [15:0] */ +#define PA_NBMISC (((24 << 3)+3) << 8) /* Node 0 Misc PCI Address bits [15:0] */ +#define PA_NBDEVOP (((00 << 3)+3) << 8) /* Node 0 Misc PCI Address bits [15:0] */ + +#define DCC_EN 1 /* X:2:0x94[19]*/ +#define ILD_Lmt 3 /* X:2:0x94[18:16]*/ + +#define EncodedTSPD 0x00191709 /* encodes which SPD byte to get T from*/ + /* versus CL X, CL X-.5, and CL X-1*/ + +#define Bias_TrpT 3 /* bias to convert bus clocks to bit field value*/ +#define Bias_TrrdT 2 +#define Bias_TrcdT 3 +#define Bias_TrasT 3 +#define Bias_TrcT 11 +#define Bias_TrtpT 4 +#define Bias_TwrT 3 +#define Bias_TwtrT 0 + +#define Min_TrpT 3 /* min programmable value in busclocks*/ +#define Max_TrpT 6 /* max programmable value in busclocks*/ +#define Min_TrrdT 2 +#define Max_TrrdT 5 +#define Min_TrcdT 3 +#define Max_TrcdT 6 +#define Min_TrasT 5 +#define Max_TrasT 18 +#define Min_TrcT 11 +#define Max_TrcT 26 +#define Min_TrtpT 4 +#define Max_TrtpT 5 +#define Min_TwrT 3 +#define Max_TwrT 6 +#define Min_TwtrT 1 +#define Max_TwtrT 3 + +/* common register bit names */ +#define DramHoleValid 0 /* func 1, offset F0h, bit 0 */ +#define CSEnable 0 /* func 2, offset 40h-5C, bit 0 */ +#define Spare 1 /* func 2, offset 40h-5C, bit 1 */ +#define TestFail 2 /* func 2, offset 40h-5C, bit 2 */ +#define DqsRcvEnTrain 18 /* func 2, offset 78h, bit 18 */ +#define EnDramInit 31 /* func 2, offset 7Ch, bit 31 */ +#define DisAutoRefresh 18 /* func 2, offset 8Ch, bit 18 */ +#define InitDram 0 /* func 2, offset 90h, bit 0 */ +#define BurstLength32 10 /* func 2, offset 90h, bit 10 */ +#define Width128 11 /* func 2, offset 90h, bit 11 */ +#define X4Dimm 12 /* func 2, offset 90h, bit 12 */ +#define UnBuffDimm 16 /* func 2, offset 90h, bit 16 */ +#define DimmEcEn 19 /* func 2, offset 90h, bit 19 */ +#define MemClkFreqVal 3 /* func 2, offset 94h, bit 3 */ +#define RDqsEn 12 /* func 2, offset 94h, bit 12 */ +#define DisDramInterface 14 /* func 2, offset 94h, bit 14 */ +#define DctAccessWrite 30 /* func 2, offset 98h, bit 30 */ +#define DctAccessDone 31 /* func 2, offset 98h, bit 31 */ +#define PwrSavingsEn 10 /* func 2, offset A0h, bit 10 */ +#define Mod64BitMux 4 /* func 2, offset A0h, bit 4 */ +#define DisableJitter 1 /* func 2, offset A0h, bit 1 */ +#define DramEnabled 9 /* func 2, offset A0h, bit 9 */ +#define SyncOnUcEccEn 2 /* fun 3, offset 44h, bit 2 */ + +/*============================================================================= + Jedec DDR II +=============================================================================*/ +#define SPD_TYPE 2 /* SPD byte read location*/ + #define JED_DDRSDRAM 0x07 /* Jedec defined bit field*/ + #define JED_DDR2SDRAM 0x08 /* Jedec defined bit field*/ + +#define SPD_DIMMTYPE 20 +#define SPD_ATTRIB 21 + #define JED_DIFCKMSK 0x20 /* Differential Clock Input*/ + #define JED_REGADCMSK 0x11 /* Registered Address/Control*/ + #define JED_PROBEMSK 0x40 /* Analysis Probe installed*/ +#define SPD_DEVATTRIB 22 +#define SPD_EDCTYPE 11 + #define JED_ECC 0x02 + #define JED_ADRCPAR 0x04 +#define SPD_ROWSZ 3 +#define SPD_COLSZ 4 +#define SPD_LBANKS 17 /* number of [logical] banks on each device*/ +#define SPD_DMBANKS 5 /* number of physical banks on dimm*/ + #define SPDPLBit 4 /* Dram package bit*/ +#define SPD_BANKSZ 31 /* capacity of physical bank*/ +#define SPD_DEVWIDTH 13 +#define SPD_CASLAT 18 +#define SPD_TRP 27 +#define SPD_TRRD 28 +#define SPD_TRCD 29 +#define SPD_TRAS 30 +#define SPD_TWR 36 +#define SPD_TWTR 37 +#define SPD_TRTP 38 +#define SPD_TRCRFC 40 +#define SPD_TRC 41 +#define SPD_TRFC 42 + +#define SPD_MANDATEYR 93 /* Module Manufacturing Year (BCD) */ + +#define SPD_MANDATEWK 94 /* Module Manufacturing Week (BCD) */ + +/*-------------------------------------- + Jedec DDR II related equates +--------------------------------------*/ +#define MYEAR06 6 /* Manufacturing Year BCD encoding of 2006 - 06d*/ +#define MWEEK24 0x24 /* Manufacturing Week BCD encoding of June - 24d*/ + +/*============================================================================= + Macros +=============================================================================*/ + +#define _2GB_RJ8 (2<<(30-8)) +#define _4GB_RJ8 (4<<(30-8)) +#define _4GB_RJ4 (4<<(30-4)) + +#define BigPagex8_RJ8 (1<<(17+3-8)) /* 128KB * 8 >> 8 */ + +/*============================================================================= + Global MCT Status Structure +=============================================================================*/ +struct MCTStatStruc { + u32 GStatus; /* Global Status bitfield*/ + u32 HoleBase; /* If not zero, BASE[39:8] (system address) + of sub 4GB dram hole for HW remapping.*/ + u32 Sub4GCacheTop; /* If not zero, the 32-bit top of cacheable memory.*/ + u32 SysLimit; /* LIMIT[39:8] (system address)*/ +}; +/*============================================================================= + Global MCT Configuration Status Word (GStatus) +=============================================================================*/ +/*These should begin at bit 0 of GStatus[31:0]*/ +#define GSB_MTRRshort 0 /* Ran out of MTRRs while mapping memory*/ +#define GSB_ECCDIMMs 1 /* All banks of all Nodes are ECC capable*/ +#define GSB_DramECCDis 2 /* Dram ECC requested but not enabled.*/ +#define GSB_SoftHole 3 /* A Node Base gap was created*/ +#define GSB_HWHole 4 /* A HW dram remap was created*/ +#define GSB_NodeIntlv 5 /* Node Memory interleaving was enabled*/ +#define GSB_SpIntRemapHole 16 /* Special condition for Node Interleave and HW remapping*/ + + +/*=============================================================================== + Local DCT Status structure (a structure for each DCT) +===============================================================================*/ + +struct DCTStatStruc { /* A per Node structure*/ + u8 Node_ID; /* Node ID of current controller*/ + u8 ErrCode; /* Current error condition of Node + 0= no error + 1= Variance Error, DCT is running but not in an optimal configuration. + 2= Stop Error, DCT is NOT running + 3= Fatal Error, DCT/MCT initialization has been halted.*/ + u32 ErrStatus; /* Error Status bit Field */ + u32 Status; /* Status bit Field*/ + u8 DIMMAddr[8]; /* SPD address of DIMM controlled by MA0_CS_L[0,1]*/ + /* SPD address of..MB0_CS_L[0,1]*/ + /* SPD address of..MA1_CS_L[0,1]*/ + /* SPD address of..MB1_CS_L[0,1]*/ + /* SPD address of..MA2_CS_L[0,1]*/ + /* SPD address of..MB2_CS_L[0,1]*/ + /* SPD address of..MA3_CS_L[0,1]*/ + /* SPD address of..MB3_CS_L[0,1]*/ + u16 DIMMPresent; /* For each bit n 0..7, 1=DIMM n is present. + DIMM# Select Signal + 0 MA0_CS_L[0,1] + 1 MB0_CS_L[0,1] + 2 MA1_CS_L[0,1] + 3 MB1_CS_L[0,1] + 4 MA2_CS_L[0,1] + 5 MB2_CS_L[0,1] + 6 MA3_CS_L[0,1] + 7 MB3_CS_L[0,1]*/ + u16 DIMMValid; /* For each bit n 0..7, 1=DIMM n is valid and is/will be configured*/ + u16 DIMMSPDCSE; /* For each bit n 0..7, 1=DIMM n SPD checksum error*/ + u16 DimmECCPresent; /* For each bit n 0..7, 1=DIMM n is ECC capable.*/ + u16 DimmPARPresent; /* For each bit n 0..7, 1=DIMM n is ADR/CMD Parity capable.*/ + u16 Dimmx4Present; /* For each bit n 0..7, 1=DIMM n contains x4 data devices.*/ + u16 Dimmx8Present; /* For each bit n 0..7, 1=DIMM n contains x8 data devices.*/ + u16 Dimmx16Present; /* For each bit n 0..7, 1=DIMM n contains x16 data devices.*/ + u16 DIMM1Kpage; /* For each bit n 0..7, 1=DIMM n contains 1K page devices.*/ + u8 MAload[2]; /* Number of devices loading MAA bus*/ + /* Number of devices loading MAB bus*/ + u8 MAdimms[2]; /* Number of DIMMs loading CH A*/ + /* Number of DIMMs loading CH B*/ + u8 DATAload[2]; /* Number of ranks loading CH A DATA*/ + /* Number of ranks loading CH B DATA*/ + u8 DIMMAutoSpeed; /* Max valid Mfg. Speed of DIMMs + 1=200Mhz + 2=266Mhz + 3=333Mhz + 4=400Mhz */ + u8 DIMMCASL; /* Min valid Mfg. CL bitfield + 0=2.0 + 1=3.0 + 2=4.0 + 3=5.0 + 4=6.0 */ + u16 DIMMTrcd; /* Minimax Trcd*40 (ns) of DIMMs*/ + u16 DIMMTrp; /* Minimax Trp*40 (ns) of DIMMs*/ + u16 DIMMTrtp; /* Minimax Trtp*40 (ns) of DIMMs*/ + u16 DIMMTras; /* Minimax Tras*40 (ns) of DIMMs*/ + u16 DIMMTrc; /* Minimax Trc*40 (ns) of DIMMs*/ + u16 DIMMTwr; /* Minimax Twr*40 (ns) of DIMMs*/ + u16 DIMMTrrd; /* Minimax Trrd*40 (ns) of DIMMs*/ + u16 DIMMTwtr; /* Minimax Twtr*40 (ns) of DIMMs*/ + u8 Speed; /* Bus Speed (to set Controller) + 1=200Mhz + 2=266Mhz + 3=333Mhz + 4=400Mhz */ + u8 CASL; /* CAS latency DCT setting + 0=2.0 + 1=3.0 + 2=4.0 + 3=5.0 + 4=6.0 */ + u8 Trcd; /* DCT Trcd (busclocks) */ + u8 Trp; /* DCT Trp (busclocks) */ + u8 Trtp; /* DCT Trtp (busclocks) */ + u8 Tras; /* DCT Tras (busclocks) */ + u8 Trc; /* DCT Trc (busclocks) */ + u8 Twr; /* DCT Twr (busclocks) */ + u8 Trrd; /* DCT Trrd (busclocks) */ + u8 Twtr; /* DCT Twtr (busclocks) */ + u8 Trfc[4]; /* DCT Logical DIMM0 Trfc + 0=75ns (for 256Mb devs) + 1=105ns (for 512Mb devs) + 2=127.5ns (for 1Gb devs) + 3=195ns (for 2Gb devs) + 4=327.5ns (for 4Gb devs) */ + /* DCT Logical DIMM1 Trfc (see Trfc0 for format) */ + /* DCT Logical DIMM2 Trfc (see Trfc0 for format) */ + /* DCT Logical DIMM3 Trfc (see Trfc0 for format) */ + u16 CSPresent; /* For each bit n 0..7, 1=Chip-select n is present */ + u16 CSTestFail; /* For each bit n 0..7, 1=Chip-select n is present but disabled */ + u32 DCTSysBase; /* BASE[39:8] (system address) of this Node's DCTs. */ + u32 DCTHoleBase; /* If not zero, BASE[39:8] (system address) of dram hole for HW remapping. Dram hole exists on this Node's DCTs. */ + u32 DCTSysLimit; /* LIMIT[39:8] (system address) of this Node's DCTs */ + u16 PresetmaxFreq; /* Maximum OEM defined DDR frequency + 200=200Mhz (DDR400) + 266=266Mhz (DDR533) + 333=333Mhz (DDR667) + 400=400Mhz (DDR800) */ + u8 _2Tmode; /* 1T or 2T CMD mode (slow access mode) + 1=1T + 2=2T */ + u8 TrwtTO; /* DCT TrwtTO (busclocks)*/ + u8 Twrrd; /* DCT Twrrd (busclocks)*/ + u8 Twrwr; /* DCT Twrwr (busclocks)*/ + u8 Trdrd; /* DCT Trdrd (busclocks)*/ + u32 CH_ODC_CTL[2]; /* Output Driver Strength (see BKDG FN2:Offset 9Ch, index 00h*/ + u32 CH_ADDR_TMG[2]; /* Address Bus Timing (see BKDG FN2:Offset 9Ch, index 04h*/ + /* Output Driver Strength (see BKDG FN2:Offset 9Ch, index 20h*/ + /* Address Bus Timing (see BKDG FN2:Offset 9Ch, index 24h*/ + u16 CH_EccDQSLike[2]; /* CHA DQS ECC byte like...*/ + u8 CH_EccDQSScale[2]; /* CHA DQS ECC byte scale*/ +// u8 reserved_b_1; /* Reserved*/ + /* CHB DQS ECC byte like...*/ + /* CHB DQS ECC byte scale*/ +// u8 reserved_b_2; /*Reserved*/ + u8 MaxAsyncLat; /* Max Asynchronous Latency (ns)*/ + u8 CH_B_DQS[2][2][9]; /* CHA Byte 0 - 7 and Check Write DQS Delay*/ + /* Reserved*/ + /* CHA Byte 0 - 7 and Check Read DQS Delay*/ + /* Reserved*/ + /* CHB Byte 0 - 7 and Check Write DQS Delay*/ + /* Reserved*/ + /* CHB Byte 0 - 7 and Check Read DQS Delay*/ + /* Reserved*/ + u8 CH_D_RCVRDLY[2][4]; /* CHA DIMM 0 - 3 Receiver Enable Delay*/ + /* CHB DIMM 0 - 3 Receiver Enable Delay*/ + u32 PtrPatternBufA; /* Ptr on stack to aligned DQS testing pattern*/ + u32 PtrPatternBufB; /*Ptr on stack to aligned DQS testing pattern*/ + u8 Channel; /* Current Channel (0= CH A, 1=CH B)*/ + u8 ByteLane; /* Current Byte Lane (0..7)*/ + u8 Direction; /* Current DQS-DQ training write direction (0=read, 1=write)*/ + u8 Pattern; /* Current pattern*/ + u8 DQSDelay; /* Current DQS delay value*/ + u32 TrainErrors; /* Current Training Errors*/ +// u8 reserved_b_3; /* RSVD */ + u32 AMC_TSC_DeltaLo; /* Time Stamp Counter measurement of AMC, Low dword*/ + u32 AMC_TSC_DeltaHi; /* Time Stamp Counter measurement of AMC, High dword*/ + u8 CH_B_Dly[2][2][2][8]; /* CH A byte lane 0 - 7 minimum filtered window passing DQS delay value*/ + /* CH A byte lane 0 - 7 maximum filtered window passing DQS delay value*/ + /* CH B byte lane 0 - 7 minimum filtered window passing DQS delay value*/ + /* CH B byte lane 0 - 7 maximum filtered window passing DQS delay value*/ + /* CH A byte lane 0 - 7 minimum filtered window passing DQS delay value*/ + /* CH A byte lane 0 - 7 maximum filtered window passing DQS delay value*/ + /* CH B byte lane 0 - 7 minimum filtered window passing DQS delay value*/ + /* CH B byte lane 0 - 7 maximum filtered window passing DQS delay value*/ + u32 LogicalCPUID; /* The logical CPUID of the node*/ + u16 HostBiosSrvc1; /* Word sized general purpose field for use by host BIOS. Scratch space.*/ + u32 HostBiosSrvc2; /* Dword sized general purpose field for use by host BIOS. Scratch space.*/ + u16 DimmQRPresent; /* QuadRank DIMM present?*/ + u16 DimmTrainFail; /* Bitmap showing which dimms failed training*/ + u16 CSTrainFail; /* Bitmap showing which chipselects failed training*/ + u16 DimmYr06; /* Bitmap indicating which Dimms have a manufactur's year code <= 2006*/ + u16 DimmWk2406; /* Bitmap indicating which Dimms have a manufactur's week code <= 24 of 2006 (June)*/ + u16 DimmDRPresent; /* Bitmap indicating that Dual Rank Dimms are present*/ + u16 DimmPlPresent; /* Bitmap indicating that Planar (1) or Stacked (0) Dimms are present.*/ + u16 ChannelTrainFail; /* Bitmap showing the chanel informaiton about failed Chip Selects*/ + /* 0 in any bit field indicates Channel 0*/ + /* 1 in any bit field indicates Channel 1*/ +}; + +/*=============================================================================== + Local Error Status Codes (DCTStatStruc.ErrCode) +===============================================================================*/ +#define SC_RunningOK 0 +#define SC_VarianceErr 1 /* Running non-optimally*/ +#define SC_StopError 2 /* Not Running*/ +#define SC_FatalErr 3 /* Fatal Error, MCTB has exited immediately*/ + +/*=============================================================================== + Local Error Status (DCTStatStruc.ErrStatus[31:0]) + ===============================================================================*/ +#define SB_NoDimms 0 +#define SB_DIMMChkSum 1 +#define SB_DimmMismatchM 2 /* dimm module type(buffer) mismatch*/ +#define SB_DimmMismatchT 3 /* dimm CL/T mismatch*/ +#define SB_DimmMismatchO 4 /* dimm organization mismatch (128-bit)*/ +#define SB_NoTrcTrfc 5 /* SPD missing Trc or Trfc info*/ +#define SB_NoCycTime 6 /* SPD missing byte 23 or 25*/ +#define SB_BkIntDis 7 /* Bank interleave requested but not enabled*/ +#define SB_DramECCDis 8 /* Dram ECC requested but not enabled*/ +#define SB_SpareDis 9 /* Online spare requested but not enabled*/ +#define SB_MinimumMode 10 /* Running in Minimum Mode*/ +#define SB_NORCVREN 11 /* No DQS Receiver Enable pass window found*/ +#define SB_CHA2BRCVREN 12 /* DQS Rcvr En pass window CHA to CH B too large*/ +#define SB_SmallRCVR 13 /* DQS Rcvr En pass window too small (far right of dynamic range)*/ +#define SB_NODQSPOS 14 /* No DQS-DQ passing positions*/ +#define SB_SMALLDQS 15 /* DQS-DQ passing window too small*/ + +/*=============================================================================== + Local Configuration Status (DCTStatStruc.Status[31:0]) +===============================================================================*/ +#define SB_Registered 0 /* All DIMMs are Registered*/ +#define SB_ECCDIMMs 1 /* All banks ECC capable*/ +#define SB_PARDIMMs 2 /* All banks Addr/CMD Parity capable*/ +#define SB_DiagClks 3 /* Jedec ALL slots clock enable diag mode*/ +#define SB_128bitmode 4 /* DCT in 128-bit mode operation*/ +#define SB_64MuxedMode 5 /* DCT in 64-bit mux'ed mode.*/ +#define SB_2TMode 6 /* 2T CMD timing mode is enabled.*/ +#define SB_SWNodeHole 7 /* Remapping of Node Base on this Node to create a gap.*/ +#define SB_HWHole 8 /* Memory Hole created on this Node using HW remapping.*/ + + + +/*=============================================================================== + NVRAM/run-time-configurable Items +===============================================================================*/ +/* Platform Configuration */ +#define NV_PACK_TYPE 0 /* CPU Package Type (2-bits) + 0=NPT L1 + 1=NPT M2 + 2=NPT S1*/ +#define NV_MAX_NODES 1 /* Number of Nodes/Sockets (4-bits)*/ +#define NV_MAX_DIMMS 2 /* Number of DIMM slots for the specified Node ID (4-bits)*/ +#define NV_MAX_MEMCLK 3 /* Maximum platform demonstrated Memclock (10-bits) + 200=200Mhz (DDR400) + 266=266Mhz (DDR533) + 333=333Mhz (DDR667) + 400=400Mhz (DDR800)*/ +#define NV_ECC_CAP 4 /* Bus ECC capable (1-bits) + 0=Platform not capable + 1=Platform is capable*/ +#define NV_4RANKType 5 /* Quad Rank DIMM slot type (2-bits) + 0=Normal + 1=R4 (4-Rank Registered DIMMs in AMD server configuration) + 2=S4 (Unbuffered SO-DIMMs)*/ +#define NV_BYPMAX 6 /* Value to set DcqBypassMax field (See Function 2, Offset 94h, [27:24] of BKDG for field definition). + 4=4 times bypass (normal for non-UMA systems) + 7=7 times bypass (normal for UMA systems)*/ +#define NV_RDWRQBYP 7 /* Value to set RdWrQByp field (See Function 2, Offset A0h, [3:2] of BKDG for field definition). + 2=8 times (normal for non-UMA systems) + 3=16 times (normal for UMA systems)*/ + + +/* Dram Timing */ +#define NV_MCTUSRTMGMODE 10 /* User Memclock Mode (2-bits) + 0=Auto, no user limit + 1=Auto, user limit provided in NV_MemCkVal + 2=Manual, user value provided in NV_MemCkVal*/ +#define NV_MemCkVal 11 /* Memory Clock Value (2-bits) + 0=200Mhz + 1=266Mhz + 2=333Mhz + 3=400Mhz*/ + +/* Dram Configuration */ +#define NV_BankIntlv 20 /* Dram Bank (chip-select) Interleaving (1-bits) + 0=disable + 1=enable*/ +#define NV_AllMemClks 21 /* Turn on All DIMM clocks (1-bits) + 0=normal + 1=enable all memclocks*/ +#define NV_SPDCHK_RESTRT 22 /* SPD Check control bitmap (1-bits) + 0=Exit current node init if any DIMM has SPD checksum error + 1=Ignore faulty SPD checksums (Note: DIMM cannot be enabled)*/ +#define NV_DQSTrainCTL 23 /* DQS Signal Timing Training Control + 0=skip DQS training + 1=perform DQS training*/ +#define NV_NodeIntlv 24 /* Node Memory Interleaving (1-bits) + 0=disable + 1=enable*/ +#define NV_BurstLen32 25 /* burstLength32 for 64-bit mode (1-bits) + 0=disable (normal) + 1=enable (4 beat burst when width is 64-bits)*/ + +/* Dram Power */ +#define NV_CKE_PDEN 30 /* CKE based power down mode (1-bits) + 0=disable + 1=enable*/ +#define NV_CKE_CTL 31 /* CKE based power down control (1-bits) + 0=per Channel control + 1=per Chip select control*/ +#define NV_CLKHZAltVidC3 32 /* Memclock tri-stating during C3 and Alt VID (1-bits) + 0=disable + 1=enable*/ + +/* Memory Map/Mgt.*/ +#define NV_BottomIO 40 /* Bottom of 32-bit IO space (8-bits) + NV_BottomIO[7:0]=Addr[31:24]*/ +#define NV_BottomUMA 41 /* Bottom of shared graphics dram (8-bits) + NV_BottomUMA[7:0]=Addr[31:24]*/ +#define NV_MemHole 42 /* Memory Hole Remapping (1-bits) + 0=disable + 1=enable */ + +/* ECC */ +#define NV_ECC 50 /* Dram ECC enable*/ +#define NV_NBECC 52 /* ECC MCE enable*/ +#define NV_ChipKill 53 /* Chip-Kill ECC Mode enable*/ +#define NV_ECCRedir 54 /* Dram ECC Redirection enable*/ +#define NV_DramBKScrub 55 /* Dram ECC Background Scrubber CTL*/ +#define NV_L2BKScrub 56 /* L2 ECC Background Scrubber CTL*/ +#define NV_DCBKScrub 57 /* DCache ECC Background Scrubber CTL*/ +#define NV_CS_SpareCTL 58 /* Chip Select Spare Control bit 0: + 0=disable Spare + 1=enable Spare */ + /*Chip Select Spare Control bit 1-4: + Reserved, must be zero*/ +#define NV_Parity 60 /* Parity Enable*/ +#define NV_SyncOnUnEccEn 61 /* SyncOnUnEccEn control + 0=disable + 1=enable*/ + +#ifndef MAX_NODES_SUPPORTED +#define MAX_NODES_SUPPORTED 8 +#endif + +#ifndef MAX_DIMMS_SUPPORTED +#define MAX_DIMMS_SUPPORTED 8 +#endif + +#ifndef MAX_CS_SUPPORTED +#define MAX_CS_SUPPORTED 8 +#endif + + +// global function +u32 NodePresent(u32 Node); +u32 Get_NB32n(struct DCTStatStruc *pDCTstat, u32 addrx); +u32 Get_NB32(u32 addr); /* NOTE: extend addr to 32 bit for bus > 0 */ +u32 mctGetLogicalCPUID(u32 Node); + +void K8FInterleaveBanks(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat); + +void mctInitWithWritetoCS(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat); + +void mctGet_PS_Cfg(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat); +void Get_ChannelPS_Cfg0( unsigned MAAdimms, unsigned Speed, unsigned MAAload, unsigned DATAAload, + unsigned *AddrTmgCTL, unsigned *ODC_CTL); +void Get_ChannelPS_Cfg1( unsigned MAAdimms, unsigned Speed, unsigned MAAload, + unsigned *AddrTmgCTL, unsigned *ODC_CTL, unsigned *val); +void Get_ChannelPS_Cfg2( unsigned MAAdimms, unsigned Speed, unsigned MAAload, + unsigned *AddrTmgCTL, unsigned *ODC_CTL, unsigned *val); + +u8 MCTDefRet(void); + +u32 Get_RcvrSysAddr(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 channel, u8 receiver, u8 *valid); +u32 Get_MCTSysAddr(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 channel, u8 chipsel, u8 *valid); +void K8FTrainReceiverEn(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA, u8 pass); +void K8FTrainDQSPos(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA); +u32 SetUpperFSbase(u32 addr_hi); + + +void K8FECCInit(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA); + +unsigned amd_FD_support(void); +void amd_MCTInit(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA); + +void K8FCPUMemTyping(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA); +void K8FCPUMemTyping_clear(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA); + +void K8FWaitMemClrDelay(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat); +unsigned K8FCalcFinalDQSRcvValue(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, unsigned LeftRcvEn, unsigned RightRcvEn, unsigned *valid); + +void K8FGetDeltaTSCPart1(struct DCTStatStruc *pDCTstat); +void K8FGetDeltaTSCPart2(struct DCTStatStruc *pDCTstat); +#endif diff --git a/src/northbridge/amd/amdmct/mct/mct_d.c b/src/northbridge/amd/amdmct/mct/mct_d.c new file mode 100644 index 0000000000..b4a5fdcf1f --- /dev/null +++ b/src/northbridge/amd/amdmct/mct/mct_d.c @@ -0,0 +1,3862 @@ +/* + * This file is part of the LinuxBIOS project. + * + * Copyright (C) 2007 Advanced Micro Devices, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* Description: Main memory controller system configuration for DDR 2 */ + + +/* KNOWN ISSUES - ERRATA + * + * Trtp is not calculated correctly when the controller is in 64-bit mode, it + * is 1 busclock off. No fix planned. The controller is not ordinarily in + * 64-bit mode. + * + * 32 Byte burst not supported. No fix planned. The controller is not + * ordinarily in 64-bit mode. + * + * Trc precision does not use extra Jedec defined fractional component. + * InsteadTrc (course) is rounded up to nearest 1 ns. + * + * Mini and Micro DIMM not supported. Only RDIMM, UDIMM, SO-DIMM defined types + * supported. + */ + +static u8 ReconfigureDIMMspare_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstatA); +static void DQSTiming_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstatA); +static void LoadDQSSigTmgRegs_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstatA); +static void ResetNBECCstat_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstatA); +static void HTMemMapInit_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstatA); +static void MCTMemClr_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstatA); +static void DCTMemClr_Init_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat); +static void DCTMemClr_Sync_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat); +static void MCTMemClrSync_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstatA); +static u8 NodePresent_D(u8 Node); +static void SyncDCTsReady_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstatA); +static void StartupDCT_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct); +static void ClearDCT_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct); +static u8 AutoCycTiming_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct); +static void GetPresetmaxF_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat); +static void SPDGetTCL_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct); +static u8 AutoConfig_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct); +static u8 PlatformSpec_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct); +static void SPDSetBanks_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct); +static void StitchMemory_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct); +static u8 Get_DefTrc_k_D(u8 k); +static u16 Get_40Tk_D(u8 k); +static u16 Get_Fk_D(u8 k); +static u8 Dimm_Supports_D(struct DCTStatStruc *pDCTstat, u8 i, u8 j, u8 k); +static u8 Sys_Capability_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, int j, int k); +static u8 Get_DIMMAddress_D(struct DCTStatStruc *pDCTstat, u8 i); +static void mct_initDCT(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat); +static void mct_DramInit(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct); +static u8 mct_PlatformSpec(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct); +static void mct_SyncDCTsReady(struct DCTStatStruc *pDCTstat); +static void Get_Trdrd(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct); +static void mct_AfterGetCLT(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct); +static u8 mct_SPDCalcWidth(struct MCTStatStruc *pMCTstat,\ + struct DCTStatStruc *pDCTstat, u8 dct); +static void mct_AfterStitchMemory(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct); +static u8 mct_DIMMPresence(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct); +static void Set_OtherTiming(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct); +static void Get_Twrwr(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct); +static void Get_Twrrd(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct); +static void Get_TrwtTO(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct); +static void Get_TrwtWB(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat); +static u8 Check_DqsRcvEn_Diff(struct DCTStatStruc *pDCTstat, u8 dct, + u32 dev, u32 index_reg, u32 index); +static u8 Get_DqsRcvEnGross_Diff(struct DCTStatStruc *pDCTstat, + u32 dev, u32 index_reg); +static u8 Get_WrDatGross_Diff(struct DCTStatStruc *pDCTstat, u8 dct, + u32 dev, u32 index_reg); +static u16 Get_DqsRcvEnGross_MaxMin(struct DCTStatStruc *pDCTstat, + u32 dev, u32 index_reg, u32 index); +static void mct_FinalMCT_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat); +static u16 Get_WrDatGross_MaxMin(struct DCTStatStruc *pDCTstat, u8 dct, + u32 dev, u32 index_reg, u32 index); +static void mct_InitialMCT_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat); +static void mct_init(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat); +static void clear_legacy_Mode(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat); +static void mct_HTMemMapExt(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstatA); +static void SetCSTriState(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct); +static void SetODTTriState(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct); +static void InitPhyCompensation(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct); +static u32 mct_NodePresent_D(void); +static void WaitRoutine_D(u32 time); +static void mct_OtherTiming(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstatA); +static void mct_ResetDataStruct_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstatA); +static void mct_EarlyArbEn_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat); +static void mct_BeforeDramInit_Prod_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat); +void mct_ClrClToNB_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat); +static u8 CheckNBCOFEarlyArbEn(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat); +void mct_ClrWbEnhWsbDis_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat); +static void mct_BeforeDQSTrain_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstatA); +static void AfterDramInit_D(struct DCTStatStruc *pDCTstat, u8 dct); +static void mct_ResetDLL_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct); + + +/*See mctAutoInitMCT header for index relationships to CL and T*/ +static const u16 Table_F_k[] = {00,200,266,333,400,533 }; +static const u8 Table_T_k[] = {0x00,0x50,0x3D,0x30,0x25, 0x18 }; +static const u8 Table_CL2_j[] = {0x04,0x08,0x10,0x20,0x40, 0x80 }; +static const u8 Tab_defTrc_k[] = {0x0,0x41,0x3C,0x3C,0x3A, 0x3A }; +static const u16 Tab_40T_k[] = {00,200,150,120,100,75 }; +static const u8 Tab_TrefT_k[] = {00,0,1,1,2,2,3,4,5,6,0,0}; +static const u8 Tab_BankAddr[] = {0x0,0x08,0x09,0x10,0x0C,0x0D,0x11,0x0E,0x15,0x16,0x0F,0x17}; +static const u8 Tab_tCL_j[] = {0,2,3,4,5}; +static const u8 Tab_1KTfawT_k[] = {00,8,10,13,14,20}; +static const u8 Tab_2KTfawT_k[] = {00,10,14,17,18,24}; +static const u8 Tab_L1CLKDis[] = {8,8,6,4,2,0,8,8}; +static const u8 Tab_M2CLKDis[] = {2,0,8,8,2,0,2,0}; +static const u8 Tab_S1CLKDis[] = {8,0,8,8,8,0,8,0}; +static const u8 Table_Comp_Rise_Slew_20x[] = {7, 3, 2, 2, 0xFF}; +static const u8 Table_Comp_Rise_Slew_15x[] = {7, 7, 3, 2, 0xFF}; +static const u8 Table_Comp_Fall_Slew_20x[] = {7, 5, 3, 2, 0xFF}; +static const u8 Table_Comp_Fall_Slew_15x[] = {7, 7, 5, 3, 0xFF}; + +void mctAutoInitMCT_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstatA) +{ + /* + * Memory may be mapped contiguously all the way up to 4GB (depending + * on setup options). It is the responsibility of PCI subsystem to + * create an uncacheable IO region below 4GB and to adjust TOP_MEM + * downward prior to any IO mapping or accesses. It is the same + * responsibility of the CPU sub-system prior toaccessing LAPIC. + * + * Slot Number is an external convention, and is determined by OEM with + * accompanying silk screening. OEM may choose to use Slot number + * convention which is consistent with DIMM number conventions. + * All AMD engineering + * platforms do. + * + * Run-Time Requirements: + * 1. Complete Hypertransport Bus Configuration + * 2. SMBus Controller Initialized + * 3. Checksummed or Valid NVRAM bits + * 4. MCG_CTL=-1, MC4_CTL_EN=0 for all CPUs + * 5. MCi_STS from shutdown/warm reset recorded (if desired) prior to + * entry + * 6. All var MTRRs reset to zero + * 7. State of NB_CFG.DisDatMsk set properly on all CPUs + * 8. All CPUs at 2Ghz Speed (unless DQS training is not installed). + * 9. All cHT links at max Speed/Width (unless DQS training is not + * installed). + * + * + * Global relationship between index values and item values: + * j CL(j) k F(k) + * -------------------------- + * 0 2.0 - - + * 1 3.0 1 200 Mhz + * 2 4.0 2 266 Mhz + * 3 5.0 3 333 Mhz + * 4 6.0 4 400 Mhz + * 5 7.0 5 533 Mhz + */ + u8 Node, NodesWmem; + u32 node_sys_base; + +restartinit: + mctInitMemGPIOs_A_D(); /* Set any required GPIOs*/ + NodesWmem = 0; + node_sys_base = 0; + for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) { + struct DCTStatStruc *pDCTstat; + pDCTstat = pDCTstatA + Node; + pDCTstat->Node_ID = Node; + pDCTstat->dev_host = PA_HOST(Node); + pDCTstat->dev_map = PA_MAP(Node); + pDCTstat->dev_dct = PA_DCT(Node); + pDCTstat->dev_nbmisc = PA_NBMISC(Node); + pDCTstat->NodeSysBase = node_sys_base; + + print_tx("mctAutoInitMCT_D: mct_init Node ", Node); + mct_init(pMCTstat, pDCTstat); + mctNodeIDDebugPort_D(); + pDCTstat->NodePresent = NodePresent_D(Node); + if (pDCTstat->NodePresent) { /* See if Node is there*/ + print_t("mctAutoInitMCT_D: clear_legacy_Mode\n"); + clear_legacy_Mode(pMCTstat, pDCTstat); + pDCTstat->LogicalCPUID = mctGetLogicalCPUID_D(Node); + + print_t("mctAutoInitMCT_D: mct_InitialMCT_D\n"); + mct_InitialMCT_D(pMCTstat, pDCTstat); + + print_t("mctAutoInitMCT_D: mctSMBhub_Init\n"); + mctSMBhub_Init(Node); /* Switch SMBUS crossbar to proper node*/ + + print_t("mctAutoInitMCT_D: mct_initDCT\n"); + mct_initDCT(pMCTstat, pDCTstat); + if (pDCTstat->ErrCode == SC_FatalErr) { + goto fatalexit; /* any fatal errors?*/ + } else if (pDCTstat->ErrCode < SC_StopError) { + NodesWmem++; + } + } /* if Node present */ + node_sys_base = pDCTstat->NodeSysBase; + node_sys_base += (pDCTstat->NodeSysLimit + 2) & ~0x0F; + } + if (NodesWmem == 0) { + print_debug("No Nodes?!\n"); + goto fatalexit; + } + + print_t("mctAutoInitMCT_D: SyncDCTsReady_D\n"); + SyncDCTsReady_D(pMCTstat, pDCTstatA); /* Make sure DCTs are ready for accesses.*/ + + print_t("mctAutoInitMCT_D: HTMemMapInit_D\n"); + HTMemMapInit_D(pMCTstat, pDCTstatA); /* Map local memory into system address space.*/ + mctHookAfterHTMap(); + + print_t("mctAutoInitMCT_D: CPUMemTyping_D\n"); + CPUMemTyping_D(pMCTstat, pDCTstatA); /* Map dram into WB/UC CPU cacheability */ + mctHookAfterCPU(); /* Setup external northbridge(s) */ + + print_t("mctAutoInitMCT_D: DQSTiming_D\n"); + DQSTiming_D(pMCTstat, pDCTstatA); /* Get Receiver Enable and DQS signal timing*/ + + print_t("mctAutoInitMCT_D: :OtherTiming\n"); + mct_OtherTiming(pMCTstat, pDCTstatA); + + if (ReconfigureDIMMspare_D(pMCTstat, pDCTstatA)) { /* RESET# if 1st pass of DIMM spare enabled*/ + goto restartinit; + } + + InterleaveNodes_D(pMCTstat, pDCTstatA); + InterleaveChannels_D(pMCTstat, pDCTstatA); + + print_t("mctAutoInitMCT_D: ECCInit_D\n"); + if (ECCInit_D(pMCTstat, pDCTstatA)) { /* Setup ECC control and ECC check-bits*/ + print_t("mctAutoInitMCT_D: MCTMemClr_D\n"); + MCTMemClr_D(pMCTstat,pDCTstatA); + } + + mct_FinalMCT_D(pMCTstat, (pDCTstatA + 0) ); // Node 0 + print_t("All Done\n"); + return; + +fatalexit: + die("mct_d: fatalexit"); +} + + +static u8 ReconfigureDIMMspare_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstatA) +{ + u8 ret; + + if (mctGet_NVbits(NV_CS_SpareCTL)) { + if (MCT_DIMM_SPARE_NO_WARM) { + /* Do no warm-reset DIMM spare */ + if (pMCTstat->GStatus & (1 << GSB_EnDIMMSpareNW)) { + LoadDQSSigTmgRegs_D(pMCTstat, pDCTstatA); + ret = 0; + } else { + mct_ResetDataStruct_D(pMCTstat, pDCTstatA); + pMCTstat->GStatus |= 1 << GSB_EnDIMMSpareNW; + ret = 1; + } + } else { + /* Do warm-reset DIMM spare */ + if (mctGet_NVbits(NV_DQSTrainCTL)) + mctWarmReset_D(); + ret = 0; + } + + + } else { + ret = 0; + } + + return ret; +} + + +static void DQSTiming_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstatA) +{ + u8 nv_DQSTrainCTL; + + if (pMCTstat->GStatus & (1 << GSB_EnDIMMSpareNW)) { + return; + } + nv_DQSTrainCTL = mctGet_NVbits(NV_DQSTrainCTL); + /* FIXME: BOZO- DQS training every time*/ + nv_DQSTrainCTL = 1; + + if (nv_DQSTrainCTL) { + print_t("DQSTiming_D: mct_BeforeDQSTrain_D:\n"); + mct_BeforeDQSTrain_D(pMCTstat, pDCTstatA);; + phyAssistedMemFnceTraining(pMCTstat, pDCTstatA); + mctHookBeforeAnyTraining(); + + print_t("DQSTiming_D: TrainReceiverEn_D FirstPass:\n"); + TrainReceiverEn_D(pMCTstat, pDCTstatA, FirstPass); + + print_t("DQSTiming_D: mct_TrainDQSPos_D\n"); + mct_TrainDQSPos_D(pMCTstat, pDCTstatA); + + // Second Pass never used for Barcelona! + //print_t("DQSTiming_D: TrainReceiverEn_D SecondPass:\n"); + //TrainReceiverEn_D(pMCTstat, pDCTstatA, SecondPass); + + print_t("DQSTiming_D: mctSetEccDQSRcvrEn_D\n"); + mctSetEccDQSRcvrEn_D(pMCTstat, pDCTstatA); + + print_t("DQSTiming_D: TrainMaxReadLatency_D\n"); +//FIXME - currently uses calculated value TrainMaxReadLatency_D(pMCTstat, pDCTstatA); + mctHookAfterAnyTraining(); + mctSaveDQSSigTmg_D(); + + print_t("DQSTiming_D: mct_EndDQSTraining_D\n"); + mct_EndDQSTraining_D(pMCTstat, pDCTstatA); + + print_t("DQSTiming_D: MCTMemClr_D\n"); + MCTMemClr_D(pMCTstat, pDCTstatA); + } else { + mctGetDQSSigTmg_D(); /* get values into data structure */ + LoadDQSSigTmgRegs_D(pMCTstat, pDCTstatA); /* load values into registers.*/ + //mctDoWarmResetMemClr_D(); + MCTMemClr_D(pMCTstat, pDCTstatA); + } +} + + +static void LoadDQSSigTmgRegs_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstatA) +{ + u8 Node, Receiver, Channel, Dir, DIMM; + u32 dev; + u32 index_reg; + u32 reg; + u32 index; + u32 val; + + + for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) { + struct DCTStatStruc *pDCTstat; + pDCTstat = pDCTstatA + Node; + + if (pDCTstat->DCTSysLimit) { + dev = pDCTstat->dev_dct; + for (Channel = 0;Channel < 2; Channel++) { + /* there are four receiver pairs, + loosely associated with chipselects.*/ + index_reg = 0x98 + Channel * 0x100; + for (Receiver = 0; Receiver < 8; Receiver += 2) { + /* Set Receiver Enable Values */ + mct_SetRcvrEnDly_D(pDCTstat, + 0, /* RcvrEnDly */ + 1, /* FinalValue, From stack */ + Channel, + Receiver, + dev, index_reg, + (Receiver >> 1) * 3 + 0x10, /* Addl_Index */ + 2); /* Pass Second Pass ? */ + + } + } + for (Channel = 0; Channel<2; Channel++) { + SetEccDQSRcvrEn_D(pDCTstat, Channel); + } + + for (Channel = 0; Channel < 2; Channel++) { + u8 *p; + index_reg = 0x98 + Channel * 0x100; + + /* NOTE: + * when 400, 533, 667, it will support dimm0/1/2/3, + * and set conf for dimm0, hw will copy to dimm1/2/3 + * set for dimm1, hw will copy to dimm3 + * Rev A/B only support DIMM0/1 when 800Mhz and above + * + 0x100 to next dimm + * Rev C support DIMM0/1/2/3 when 800Mhz and above + * + 0x100 to next dimm + */ + for (DIMM = 0; DIMM < 2; DIMM++) { + if (DIMM==0) { + index = 0; /* CHA Write Data Timing Low */ + } else { + if (pDCTstat->Speed >= 4) { + index = 0x100 * DIMM; + } else { + break; + } + } + for (Dir=0;Dir<2;Dir++) {//RD/WR + p = pDCTstat->CH_D_DIR_B_DQS[Channel][DIMM][Dir]; + val = stream_to_int(p); /* CHA Read Data Timing High */ + Set_NB32_index_wait(dev, index_reg, index+1, val); + val = stream_to_int(p+4); /* CHA Write Data Timing High */ + Set_NB32_index_wait(dev, index_reg, index+2, val); + val = *(p+8); /* CHA Write ECC Timing */ + Set_NB32_index_wait(dev, index_reg, index+3, val); + index += 4; + } + } + } + + for (Channel = 0; Channel<2; Channel++) { + reg = 0x78 + Channel * 0x100; + val = Get_NB32(dev, reg); + val &= ~(0x3ff<<22); + val |= ((u32) pDCTstat->CH_MaxRdLat[Channel] << 22); + val &= ~(1<<DqsRcvEnTrain); + Set_NB32(dev, reg, val); /* program MaxRdLatency to correspond with current delay*/ + } + } + } +} + + +static void ResetNBECCstat_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstatA) +{ + /* Clear MC4_STS for all Nodes in the system. This is required in some + * circumstances to clear left over garbage from cold reset, shutdown, + * or normal ECC memory conditioning. + */ + + //FIXME: this function depends on pDCTstat Array ( with Node id ) - Is this really a problem? + + u32 dev; + u8 Node; + + for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) { + struct DCTStatStruc *pDCTstat; + pDCTstat = pDCTstatA + Node; + + if (pDCTstat->NodePresent) { + dev = pDCTstat->dev_nbmisc; + /*MCA NB Status Low (alias to MC4_STS[31:0] */ + Set_NB32(dev, 0x48, 0); + /* MCA NB Status High (alias to MC4_STS[63:32] */ + Set_NB32(dev, 0x4C, 0); + } + } +} + + +static void HTMemMapInit_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstatA) +{ + u8 Node; + u32 NextBase, BottomIO; + u8 _MemHoleRemap, DramHoleBase, DramHoleOffset; + u32 HoleSize, DramSelBaseAddr; + + u32 val; + u32 base; + u32 limit; + u32 dev; + struct DCTStatStruc *pDCTstat; + + _MemHoleRemap = mctGet_NVbits(NV_MemHole); + + if (pMCTstat->HoleBase == 0) { + DramHoleBase = mctGet_NVbits(NV_BottomIO); + } else { + DramHoleBase = pMCTstat->HoleBase >> (24-8); + } + + BottomIO = DramHoleBase << (24-8); + + NextBase = 0; + pDCTstat = pDCTstatA + 0; + dev = pDCTstat->dev_map; + + + for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) { + DramSelBaseAddr = 0; + pDCTstat = pDCTstatA + Node; + if (!pDCTstat->GangedMode) { + DramSelBaseAddr = pDCTstat->NodeSysLimit - pDCTstat->DCTSysLimit; + /*In unganged mode, we must add DCT0 and DCT1 to DCTSysLimit */ + val = pDCTstat->NodeSysLimit; + if ((val & 0xFF) == 0xFE) { + DramSelBaseAddr++; + val++; + } + pDCTstat->DCTSysLimit = val; + } + + base = pDCTstat->DCTSysBase; + limit = pDCTstat->DCTSysLimit; + if (limit > base) { + base += NextBase; + limit += NextBase; + DramSelBaseAddr += NextBase; + printk_debug(" Node: %02x base: %02x limit: %02x BottomIO: %02x\n", Node, base, limit, BottomIO); + + if (_MemHoleRemap) { + if ((base < BottomIO) && (limit >= BottomIO)) { + /* HW Dram Remap */ + pDCTstat->Status |= 1 << SB_HWHole; + pMCTstat->GStatus |= 1 << GSB_HWHole; + pDCTstat->DCTSysBase = base; + pDCTstat->DCTSysLimit = limit; + pDCTstat->DCTHoleBase = BottomIO; + pMCTstat->HoleBase = BottomIO; + HoleSize = _4GB_RJ8 - BottomIO; /* HoleSize[39:8] */ + if ((DramSelBaseAddr > 0) && (DramSelBaseAddr < BottomIO)) + base = DramSelBaseAddr; + val = ((base + HoleSize) >> (24-8)) & 0xFF; + DramHoleOffset = val; + val <<= 8; /* shl 16, rol 24 */ + val |= DramHoleBase << 24; + val |= 1 << DramHoleValid; + Set_NB32(dev, 0xF0, val); /*Dram Hole Address Register*/ + pDCTstat->DCTSysLimit += HoleSize; + base = pDCTstat->DCTSysBase; + limit = pDCTstat->DCTSysLimit; + } else if (base == BottomIO) { + /* SW Node Hoist */ + pMCTstat->GStatus |= 1<<GSB_SpIntRemapHole; + pDCTstat->Status |= 1<<SB_SWNodeHole; + pMCTstat->GStatus |= 1<<GSB_SoftHole; + pMCTstat->HoleBase = base; + limit -= base; + base = _4GB_RJ8; + limit += base; + pDCTstat->DCTSysBase = base; + pDCTstat->DCTSysLimit = limit; + } else { + /* No Remapping. Normal Contiguous mapping */ + pDCTstat->DCTSysBase = base; + pDCTstat->DCTSysLimit = limit; + } + } else { + /*No Remapping. Normal Contiguous mapping*/ + pDCTstat->DCTSysBase = base; + pDCTstat->DCTSysLimit = limit; + } + base |= 3; /* set WE,RE fields*/ + pMCTstat->SysLimit = limit; + } + Set_NB32(dev, 0x40 + (Node << 3), base); /* [Node] + Dram Base 0 */ + val = limit & 0xffff0000; + val |= Node; /* set DstNode*/ + Set_NB32(dev, 0x44 + (Node << 3), val); /* set DstNode */ + + limit = pDCTstat->DCTSysLimit; + if (limit) { + NextBase = (limit & 0xffff0000) + 0x10000; + } + } + + /* Copy dram map from Node 0 to Node 1-7 */ + for (Node = 1; Node < MAX_NODES_SUPPORTED; Node++) { + pDCTstat = pDCTstatA + Node; + u32 reg; + u32 devx = pDCTstat->dev_map; + + if (pDCTstat->NodePresent) { + printk_debug(" Copy dram map from Node 0 to Node %02x \n", Node); + reg = 0x40; /*Dram Base 0*/ + do { + val = Get_NB32(dev, reg); + Set_NB32(devx, reg, val); + reg += 4; + } while ( reg < 0x80); + } else { + break; /* stop at first absent Node */ + } + } + + /*Copy dram map to F1x120/124*/ + mct_HTMemMapExt(pMCTstat, pDCTstatA); +} + + +static void MCTMemClr_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstatA) +{ + + /* Initiates a memory clear operation for all node. The mem clr + * is done in paralel. After the memclr is complete, all processors + * status are checked to ensure that memclr has completed. + */ + u8 Node; + struct DCTStatStruc *pDCTstat; + + if (!mctGet_NVbits(NV_DQSTrainCTL)){ + // FIXME: callback to wrapper: mctDoWarmResetMemClr_D + } else { // NV_DQSTrainCTL == 1 + for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) { + pDCTstat = pDCTstatA + Node; + + if (pDCTstat->NodePresent) { + DCTMemClr_Init_D(pMCTstat, pDCTstat); + } + } + for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) { + pDCTstat = pDCTstatA + Node; + + if (pDCTstat->NodePresent) { + DCTMemClr_Sync_D(pMCTstat, pDCTstat); + } + } + } +} + + +static void DCTMemClr_Init_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat) +{ + u32 val; + u32 dev; + u32 reg; + + /* Initiates a memory clear operation on one node */ + if (pDCTstat->DCTSysLimit) { + dev = pDCTstat->dev_dct; + reg = 0x110; + + do { + val = Get_NB32(dev, reg); + } while (val & (1 << MemClrBusy)); + + val |= (1 << MemClrInit); + Set_NB32(dev, reg, val); + + } +} + + +static void MCTMemClrSync_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstatA) +{ + /* Ensures that memory clear has completed on all node.*/ + u8 Node; + struct DCTStatStruc *pDCTstat; + + if (!mctGet_NVbits(NV_DQSTrainCTL)){ + // callback to wrapper: mctDoWarmResetMemClr_D + } else { // NV_DQSTrainCTL == 1 + for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) { + pDCTstat = pDCTstatA + Node; + + if (pDCTstat->NodePresent) { + DCTMemClr_Sync_D(pMCTstat, pDCTstat); + } + } + } +} + + +static void DCTMemClr_Sync_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat) +{ + u32 val; + u32 dev = pDCTstat->dev_dct; + u32 reg; + + /* Ensure that a memory clear operation has completed on one node */ + if (pDCTstat->DCTSysLimit){ + reg = 0x110; + + do { + val = Get_NB32(dev, reg); + } while (val & (1 << MemClrBusy)); + + do { + val = Get_NB32(dev, reg); + } while (!(val & (1 << Dr_MemClrStatus))); + } + + val = 0x0FE40FC0; // BKDG recommended + val |= MCCH_FlushWrOnStpGnt; // Set for S3 + Set_NB32(dev, 0x11C, val); +} + + +static u8 NodePresent_D(u8 Node) +{ + /* + * Determine if a single Hammer Node exists within the network. + */ + + u32 dev; + u32 val; + u32 dword; + u8 ret = 0; + + dev = PA_HOST(Node); /*test device/vendor id at host bridge */ + val = Get_NB32(dev, 0); + dword = mct_NodePresent_D(); /* FIXME: BOZO -11001022h rev for F */ + if (val == dword) { /* AMD Hammer Family CPU HT Configuration */ + if (oemNodePresent_D(Node, &ret)) + goto finish; + /* Node ID register */ + val = Get_NB32(dev, 0x60); + val &= 0x07; + dword = Node; + if (val == dword) /* current nodeID = requested nodeID ? */ + ret = 1; +finish: + ; + } + + return ret; +} + + +static void DCTInit_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 dct) +{ + /* + * Initialize DRAM on single Athlon 64/Opteron Node. + */ + + u8 stopDCTflag; + u32 val; + + ClearDCT_D(pMCTstat, pDCTstat, dct); + stopDCTflag = 1; /*preload flag with 'disable' */ + if (mct_DIMMPresence(pMCTstat, pDCTstat, dct) < SC_StopError) { + print_t("\t\tDCTInit_D: mct_DIMMPresence Done\n"); + if (mct_SPDCalcWidth(pMCTstat, pDCTstat, dct) < SC_StopError) { + print_t("\t\tDCTInit_D: mct_SPDCalcWidth Done\n"); + if (AutoCycTiming_D(pMCTstat, pDCTstat, dct) < SC_StopError) { + print_t("\t\tDCTInit_D: AutoCycTiming_D Done\n"); + if (AutoConfig_D(pMCTstat, pDCTstat, dct) < SC_StopError) { + print_t("\t\tDCTInit_D: AutoConfig_D Done\n"); + if (PlatformSpec_D(pMCTstat, pDCTstat, dct) < SC_StopError) { + print_t("\t\tDCTInit_D: PlatformSpec_D Done\n"); + stopDCTflag = 0; + if (!(pMCTstat->GStatus & (1 << GSB_EnDIMMSpareNW))) { + print_t("\t\tDCTInit_D: StartupDCT_D\n"); + StartupDCT_D(pMCTstat, pDCTstat, dct); /*yeaahhh! */ + } + } + } + } + } + } + if (stopDCTflag) { + u32 reg_off = dct * 0x100; + val = 1<<DisDramInterface; + Set_NB32(pDCTstat->dev_dct, reg_off+0x94, val); + /*To maximize power savings when DisDramInterface=1b, + all of the MemClkDis bits should also be set.*/ + val = 0xFF000000; + Set_NB32(pDCTstat->dev_dct, reg_off+0x88, val); + } +} + + +static void SyncDCTsReady_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstatA) +{ + /* Wait (and block further access to dram) for all DCTs to be ready, + * by polling all InitDram bits and waiting for possible memory clear + * operations to be complete. Read MemClkFreqVal bit to see if + * the DIMMs are present in this node. + */ + + u8 Node; + + for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) { + struct DCTStatStruc *pDCTstat; + pDCTstat = pDCTstatA + Node; + mct_SyncDCTsReady(pDCTstat); + } +} + + +static void StartupDCT_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct) +{ + /* Read MemClkFreqVal bit to see if the DIMMs are present in this node. + * If the DIMMs are present then set the DRAM Enable bit for this node. + * + * Setting dram init starts up the DCT state machine, initializes the + * dram devices with MRS commands, and kicks off any + * HW memory clear process that the chip is capable of. The sooner + * that dram init is set for all nodes, the faster the memory system + * initialization can complete. Thus, the init loop is unrolled into + * two loops so as to start the processeses for non BSP nodes sooner. + * This procedure will not wait for the process to finish. + * Synchronization is handled elsewhere. + */ + + u32 val; + u32 dev; + u8 byte; + u32 reg; + u32 reg_off = dct * 0x100; + + dev = pDCTstat->dev_dct; + val = Get_NB32(dev, 0x94 + reg_off); + if (val & (1<<MemClkFreqVal)) { + print_t("\t\t\tStartupDCT_D: MemClkFreqVal\n"); + byte = mctGet_NVbits(NV_DQSTrainCTL); + if (byte == 1) { + /* Enable DQSRcvEn training mode */ + print_t("\t\t\tStartupDCT_D: DqsRcvEnTrain set \n"); + reg = 0x78 + reg_off; + val = Get_NB32(dev, reg); + /* Setting this bit forces a 1T window with hard left + * pass/fail edge and a probabalistic right pass/fail + * edge. LEFT edge is referenced for final + * receiver enable position.*/ + val |= 1 << DqsRcvEnTrain; + Set_NB32(dev, reg, val); + } + mctHookBeforeDramInit(); /* generalized Hook */ + print_t("\t\t\tStartupDCT_D: DramInit \n"); + mct_DramInit(pMCTstat, pDCTstat, dct); + AfterDramInit_D(pDCTstat, dct); + mctHookAfterDramInit(); /* generalized Hook*/ + } +} + + +static void ClearDCT_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct) +{ + u32 reg_end; + u32 dev = pDCTstat->dev_dct; + u32 reg = 0x40 + 0x100 * dct; + u32 val = 0; + + if (pMCTstat->GStatus & (1 << GSB_EnDIMMSpareNW)) { + reg_end = 0x78 + 0x100 * dct; + } else { + reg_end = 0xA4 + 0x100 * dct; + } + + while(reg < reg_end) { + Set_NB32(dev, reg, val); + reg += 4; + } + + val = 0; + dev = pDCTstat->dev_map; + reg = 0xF0; + Set_NB32(dev, reg, val); +} + + +static u8 AutoCycTiming_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct) +{ + /* Initialize DCT Timing registers as per DIMM SPD. + * For primary timing (T, CL) use best case T value. + * For secondary timing params., use most aggressive settings + * of slowest DIMM. + * + * There are three components to determining "maximum frequency": + * SPD component, Bus load component, and "Preset" max frequency + * component. + * + * The SPD component is a function of the min cycle time specified + * by each DIMM, and the interaction of cycle times from all DIMMs + * in conjunction with CAS latency. The SPD component only applies + * when user timing mode is 'Auto'. + * + * The Bus load component is a limiting factor determined by electrical + * characteristics on the bus as a result of varying number of device + * loads. The Bus load component is specific to each platform but may + * also be a function of other factors. The bus load component only + * applies when user timing mode is 'Auto'. + * + * The Preset component is subdivided into three items and is the + * minimum of the set: Silicon revision, user limit setting when user + * timing mode is 'Auto' and memclock mode is 'Limit', OEM build + * specification of the maximum frequency. The Preset component is only + * applies when user timing mode is 'Auto'. + */ + + u8 i; + u8 Twr, Trtp; + u8 Trp, Trrd, Trcd, Tras, Trc, Trfc[4], Rows; + u32 DramTimingLo, DramTimingHi; + u16 Tk10, Tk40; + u8 Twtr; + u8 LDIMM; + u8 DDR2_1066; + u8 byte; + u32 dword; + u32 dev; + u32 reg; + u32 reg_off; + u32 val; + u16 smbaddr; + + /* Get primary timing (CAS Latency and Cycle Time) */ + if (pDCTstat->Speed == 0) { + mctGet_MaxLoadFreq(pDCTstat); + + /* and Factor in presets (setup options, Si cap, etc.) */ + GetPresetmaxF_D(pMCTstat, pDCTstat); + + /* Go get best T and CL as specified by DIMM mfgs. and OEM */ + SPDGetTCL_D(pMCTstat, pDCTstat, dct); + /* skip callback mctForce800to1067_D */ + pDCTstat->Speed = pDCTstat->DIMMAutoSpeed; + pDCTstat->CASL = pDCTstat->DIMMCASL; + + /* if "manual" memclock mode */ + if ( mctGet_NVbits(NV_MCTUSRTMGMODE) == 2) + pDCTstat->Speed = mctGet_NVbits(NV_MemCkVal) + 1; + + mct_AfterGetCLT(pMCTstat, pDCTstat, dct); + } + + /* Gather all DIMM mini-max values for cycle timing data */ + Rows = 0; + Trp = 0; + Trrd = 0; + Trcd = 0; + Trtp = 0; + Tras = 0; + Trc = 0; + Twr = 0; + Twtr = 0; + for (i=0; i < 4; i++) + Trfc[i] = 0; + + for ( i = 0; i< MAX_DIMMS_SUPPORTED; i++) { + LDIMM = i >> 1; + if (pDCTstat->DIMMValid & (1 << i)) { + smbaddr = Get_DIMMAddress_D(pDCTstat, i); + byte = mctRead_SPD(smbaddr, SPD_ROWSZ); + if (Rows < byte) + Rows = byte; /* keep track of largest row sz */ + + byte = mctRead_SPD(smbaddr, SPD_TRP); + if (Trp < byte) + Trp = byte; + + byte = mctRead_SPD(smbaddr, SPD_TRRD); + if (Trrd < byte) + Trrd = byte; + + byte = mctRead_SPD(smbaddr, SPD_TRCD); + if (Trcd < byte) + Trcd = byte; + + byte = mctRead_SPD(smbaddr, SPD_TRTP); + if (Trtp < byte) + Trtp = byte; + + byte = mctRead_SPD(smbaddr, SPD_TWR); + if (Twr < byte) + Twr = byte; + + byte = mctRead_SPD(smbaddr, SPD_TWTR); + if (Twtr < byte) + Twtr = byte; + + val = mctRead_SPD(smbaddr, SPD_TRC); + if ((val == 0) || (val == 0xFF)) { + pDCTstat->ErrStatus |= 1<<SB_NoTrcTrfc; + pDCTstat->ErrCode = SC_VarianceErr; + val = Get_DefTrc_k_D(pDCTstat->DIMMAutoSpeed); + } else { + byte = mctRead_SPD(smbaddr, SPD_TRCRFC); + if (byte & 0xF0) { + val++; /* round up in case fractional extention is non-zero.*/ + } + } + if (Trc < val) + Trc = val; + + /* dev density=rank size/#devs per rank */ + byte = mctRead_SPD(smbaddr, SPD_BANKSZ); + + val = ((byte >> 5) | (byte << 3)) & 0xFF; + val <<= 2; + + byte = mctRead_SPD(smbaddr, SPD_DEVWIDTH) & 0xFE; /* dev density=2^(rows+columns+banks) */ + if (byte == 4) { + val >>= 4; + } else if (byte == 8) { + val >>= 3; + } else if (byte == 16) { + val >>= 2; + } + + byte = bsr(val); + + if (Trfc[LDIMM] < byte) + Trfc[LDIMM] = byte; + + byte = mctRead_SPD(smbaddr, SPD_TRAS); + if (Tras < byte) + Tras = byte; + } /* Dimm Present */ + } + + /* Convert DRAM CycleTiming values and store into DCT structure */ + DDR2_1066 = 0; + byte = pDCTstat->DIMMAutoSpeed; + if (byte == 5) + DDR2_1066 = 1; + Tk40 = Get_40Tk_D(byte); + Tk10 = Tk40>>2; + + /* Notes: + 1. All secondary time values given in SPDs are in binary with units of ns. + 2. Some time values are scaled by four, in order to have least count of 0.25 ns + (more accuracy). JEDEC SPD spec. shows which ones are x1 and x4. + 3. Internally to this SW, cycle time, Tk, is scaled by 10 to affect a + least count of 0.1 ns (more accuracy). + 4. SPD values not scaled are multiplied by 10 and then divided by 10T to find + equivalent minimum number of bus clocks (a remainder causes round-up of clocks). + 5. SPD values that are prescaled by 4 are multiplied by 10 and then divided by 40T to find + equivalent minimum number of bus clocks (a remainder causes round-up of clocks).*/ + + /* Tras */ + dword = Tras * 40; + pDCTstat->DIMMTras = (u16)dword; + val = dword / Tk40; + if (dword % Tk40) { /* round up number of busclocks */ + val++; + } + if (DDR2_1066) { + if (val < Min_TrasT_1066) + val = Min_TrasT_1066; + else if (val > Max_TrasT_1066) + val = Max_TrasT_1066; + } else { + if (val < Min_TrasT) + val = Min_TrasT; + else if (val > Max_TrasT) + val = Max_TrasT; + } + pDCTstat->Tras = val; + + /* Trp */ + dword = Trp * 10; + pDCTstat->DIMMTrp = dword; + val = dword / Tk40; + if (dword % Tk40) { /* round up number of busclocks */ + val++; + } + if (DDR2_1066) { + if (val < Min_TrasT_1066) + val = Min_TrpT_1066; + else if (val > Max_TrpT_1066) + val = Max_TrpT_1066; + } else { + if (val < Min_TrpT) + val = Min_TrpT; + else if (val > Max_TrpT) + val = Max_TrpT; + } + pDCTstat->Trp = val; + + /*Trrd*/ + dword = Trrd * 10; + pDCTstat->DIMMTrrd = dword; + val = dword / Tk40; + if (dword % Tk40) { /* round up number of busclocks */ + val++; + } + if (DDR2_1066) { + if (val < Min_TrrdT_1066) + val = Min_TrrdT_1066; + else if (val > Max_TrrdT_1066) + val = Max_TrrdT_1066; + } else { + if (val < Min_TrrdT) + val = Min_TrrdT; + else if (val > Max_TrrdT) + val = Max_TrrdT; + } + pDCTstat->Trrd = val; + + /* Trcd */ + dword = Trcd * 10; + pDCTstat->DIMMTrcd = dword; + val = dword / Tk40; + if (dword % Tk40) { /* round up number of busclocks */ + val++; + } + if (DDR2_1066) { + if (val < Min_TrcdT_1066) + val = Min_TrcdT_1066; + else if (val > Max_TrcdT_1066) + val = Max_TrcdT_1066; + } else { + if (val < Min_TrcdT) + val = Min_TrcdT; + else if (val > Max_TrcdT) + val = Max_TrcdT; + } + pDCTstat->Trcd = val; + + /* Trc */ + dword = Trc * 40; + pDCTstat->DIMMTrc = dword; + val = dword / Tk40; + if (dword % Tk40) { /* round up number of busclocks */ + val++; + } + if (DDR2_1066) { + if (val < Min_TrcT_1066) + val = Min_TrcT_1066; + else if (val > Max_TrcT_1066) + val = Max_TrcT_1066; + } else { + if (val < Min_TrcT) + val = Min_TrcT; + else if (val > Max_TrcT) + val = Max_TrcT; + } + pDCTstat->Trc = val; + + /* Trtp */ + dword = Trtp * 10; + pDCTstat->DIMMTrtp = dword; + val = pDCTstat->Speed; + if (val <= 2) { + val = 2; /* Calculate by 7.75ns / Speed in ns to get clock # */ + } else if (val == 4) { /* Note a speed of 3 will be a Trtp of 3 */ + val = 3; + } else if (val == 5){ + val = 2; + } + pDCTstat->Trtp = val; + + /* Twr */ + dword = Twr * 10; + pDCTstat->DIMMTwr = dword; + val = dword / Tk40; + if (dword % Tk40) { /* round up number of busclocks */ + val++; + } + if (DDR2_1066) { + if (val < Min_TwrT_1066) + val = Min_TwrT_1066; + else if (val > Max_TwrT_1066) + val = Max_TwrT_1066; + } else { + if (val < Min_TwrT) + val = Min_TwrT; + else if (val > Max_TwrT) + val = Max_TwrT; + } + pDCTstat->Twr = val; + + /* Twtr */ + dword = Twtr * 10; + pDCTstat->DIMMTwtr = dword; + val = dword / Tk40; + if (dword % Tk40) { /* round up number of busclocks */ + val++; + } + if (DDR2_1066) { + if (val < Min_TwrT_1066) + val = Min_TwtrT_1066; + else if (val > Max_TwtrT_1066) + val = Max_TwtrT_1066; + } else { + if (val < Min_TwtrT) + val = Min_TwtrT; + else if (val > Max_TwtrT) + val = Max_TwtrT; + } + pDCTstat->Twtr = val; + + + /* Trfc0-Trfc3 */ + for (i=0; i<4; i++) + pDCTstat->Trfc[i] = Trfc[i]; + + mctAdjustAutoCycTmg_D(); + + /* Program DRAM Timing values */ + DramTimingLo = 0; /* Dram Timing Low init */ + val = pDCTstat->CASL; + val = Tab_tCL_j[val]; + DramTimingLo |= val; + + val = pDCTstat->Trcd; + if (DDR2_1066) + val -= Bias_TrcdT_1066; + else + val -= Bias_TrcdT; + + DramTimingLo |= val<<4; + + val = pDCTstat->Trp; + if (DDR2_1066) + val -= Bias_TrpT_1066; + else { + val -= Bias_TrpT; + val <<= 1; + } + DramTimingLo |= val<<7; + + val = pDCTstat->Trtp; + val -= Bias_TrtpT; + DramTimingLo |= val<<11; + + val = pDCTstat->Tras; + if (DDR2_1066) + val -= Bias_TrasT_1066; + else + val -= Bias_TrasT; + DramTimingLo |= val<<12; + + val = pDCTstat->Trc; + val -= Bias_TrcT; + DramTimingLo |= val<<16; + + if (!DDR2_1066) { + val = pDCTstat->Twr; + val -= Bias_TwrT; + DramTimingLo |= val<<20; + } + + val = pDCTstat->Trrd; + if (DDR2_1066) + val -= Bias_TrrdT_1066; + else + val -= Bias_TrrdT; + DramTimingLo |= val<<22; + + + DramTimingHi = 0; /* Dram Timing Low init */ + val = pDCTstat->Twtr; + if (DDR2_1066) + val -= Bias_TwtrT_1066; + else + val -= Bias_TwtrT; + DramTimingHi |= val<<8; + + val = 2; + DramTimingHi |= val<<16; + + val = 0; + for (i=4;i>0;i--) { + val <<= 3; + val |= Trfc[i-1]; + } + DramTimingHi |= val << 20; + + + dev = pDCTstat->dev_dct; + reg_off = 0x100 * dct; + print_tx("AutoCycTiming: DramTimingLo ", DramTimingLo); + print_tx("AutoCycTiming: DramTimingHi ", DramTimingHi); + + Set_NB32(dev, 0x88 + reg_off, DramTimingLo); /*DCT Timing Low*/ + DramTimingHi |=0x0000FC77; + Set_NB32(dev, 0x8c + reg_off, DramTimingHi); /*DCT Timing Hi*/ + + if (DDR2_1066) { + /* Twr */ + dword = pDCTstat->Twr; + dword -= Bias_TwrT_1066; + dword <<= 4; + reg = 0x84 + reg_off; + val = Get_NB32(dev, reg); + val &= 0x8F; + val |= dword; + Set_NB32(dev, reg, val); + } +// dump_pci_device(PCI_DEV(0, 0x18+pDCTstat->Node_ID, 2)); + + print_tx("AutoCycTiming: Status ", pDCTstat->Status); + print_tx("AutoCycTiming: ErrStatus ", pDCTstat->ErrStatus); + print_tx("AutoCycTiming: ErrCode ", pDCTstat->ErrCode); + print_t("AutoCycTiming: Done\n"); + + mctHookAfterAutoCycTmg(); + + return pDCTstat->ErrCode; +} + + +static void GetPresetmaxF_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat) +{ + /* Get max frequency from OEM platform definition, from any user + * override (limiting) of max frequency, and from any Si Revision + * Specific information. Return the least of these three in + * DCTStatStruc.PresetmaxFreq. + */ + + u16 proposedFreq; + u16 word; + + /* Get CPU Si Revision defined limit (NPT) */ + proposedFreq = 533; /* Rev F0 programmable max memclock is */ + + /*Get User defined limit if "limit" mode */ + if ( mctGet_NVbits(NV_MCTUSRTMGMODE) == 1) { + word = Get_Fk_D(mctGet_NVbits(NV_MemCkVal) + 1); + if (word < proposedFreq) + proposedFreq = word; + + /* Get Platform defined limit */ + word = mctGet_NVbits(NV_MAX_MEMCLK); + if (word < proposedFreq) + proposedFreq = word; + + word = pDCTstat->PresetmaxFreq; + if (word > proposedFreq) + word = proposedFreq; + + pDCTstat->PresetmaxFreq = word; + } +} + + + +static void SPDGetTCL_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct) +{ + /* Find the best T and CL primary timing parameter pair, per Mfg., + * for the given set of DIMMs, and store into DCTStatStruc + * (.DIMMAutoSpeed and .DIMMCASL). See "Global relationship between + * index values and item values" for definition of CAS latency + * index (j) and Frequency index (k). + */ + int i, j, k; + u8 T1min, CL1min; + + /* i={0..7} (std. physical DIMM number) + * j is an integer which enumerates increasing CAS latency. + * k is an integer which enumerates decreasing cycle time. + * CL no. {0,1,2} corresponds to CL X, CL X-.5, or CL X-1 (per individual DIMM) + * Max timing values are per parameter, of all DIMMs, spec'd in ns like the SPD. + */ + + CL1min = 0xFF; + T1min = 0xFF; + for (k=K_MAX; k >= K_MIN; k--) { + for (j = J_MIN; j <= J_MAX; j++) { + if (Sys_Capability_D(pMCTstat, pDCTstat, j, k) ) { + /* 1. check to see if DIMMi is populated. + 2. check if DIMMi supports CLj and Tjk */ + for (i = 0; i < MAX_DIMMS_SUPPORTED; i++) { + if (pDCTstat->DIMMValid & (1 << i)) { + if (Dimm_Supports_D(pDCTstat, i, j, k)) + break; + } + } /* while ++i */ + if (i == MAX_DIMMS_SUPPORTED) { + T1min = k; + CL1min = j; + goto got_TCL; + } + } + } /* while ++j */ + } /* while --k */ + +got_TCL: + if (T1min != 0xFF) { + pDCTstat->DIMMCASL = CL1min; /*mfg. optimized */ + pDCTstat->DIMMAutoSpeed = T1min; + print_tx("SPDGetTCL_D: DIMMCASL ", pDCTstat->DIMMCASL); + print_tx("SPDGetTCL_D: DIMMAutoSpeed ", pDCTstat->DIMMAutoSpeed); + + } else { + pDCTstat->DIMMCASL = CL_DEF; /* failsafe values (running in min. mode) */ + pDCTstat->DIMMAutoSpeed = T_DEF; + pDCTstat->ErrStatus |= 1 << SB_DimmMismatchT; + pDCTstat->ErrStatus |= 1 << SB_MinimumMode; + pDCTstat->ErrCode = SC_VarianceErr; + } + print_tx("SPDGetTCL_D: Status ", pDCTstat->Status); + print_tx("SPDGetTCL_D: ErrStatus ", pDCTstat->ErrStatus); + print_tx("SPDGetTCL_D: ErrCode ", pDCTstat->ErrCode); + print_t("SPDGetTCL_D: Done\n"); +} + + +static u8 PlatformSpec_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct) +{ + u32 dev; + u32 reg; + u32 val; + + mctGet_PS_Cfg_D(pMCTstat, pDCTstat, dct); + + if (pDCTstat->GangedMode) { + mctGet_PS_Cfg_D(pMCTstat, pDCTstat, 1); + } + + if ( pDCTstat->_2Tmode == 2) { + dev = pDCTstat->dev_dct; + reg = 0x94 + 0x100 * dct; /* Dram Configuration Hi */ + val = Get_NB32(dev, reg); + val |= 1 << 20; /* 2T CMD mode */ + Set_NB32(dev, reg, val); + } + + mct_PlatformSpec(pMCTstat, pDCTstat, dct); + InitPhyCompensation(pMCTstat, pDCTstat, dct); + mctHookAfterPSCfg(); + return pDCTstat->ErrCode; +} + + +static u8 AutoConfig_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct) +{ + u32 DramControl, DramTimingLo, Status; + u32 DramConfigLo, DramConfigHi, DramConfigMisc, DramConfigMisc2; + u32 val; + u32 reg_off; + u32 dev; + u16 word; + u32 dword; + u8 byte; + + print_tx("AutoConfig_D: DCT: ", dct); + + DramConfigLo = 0; + DramConfigHi = 0; + DramConfigMisc = 0; + DramConfigMisc2 = 0; + + /* set bank addessing and Masks, plus CS pops */ + SPDSetBanks_D(pMCTstat, pDCTstat, dct); + if (pDCTstat->ErrCode == SC_StopError) + goto AutoConfig_exit; + + /* map chip-selects into local address space */ + StitchMemory_D(pMCTstat, pDCTstat, dct); + InterleaveBanks_D(pMCTstat, pDCTstat, dct); + + /* temp image of status (for convenience). RO usage! */ + Status = pDCTstat->Status; + + dev = pDCTstat->dev_dct; + reg_off = 0x100 * dct; + + + /* Build Dram Control Register Value */ + DramConfigMisc2 = Get_NB32 (dev, 0xA8 + reg_off); /* Dram Control*/ + DramControl = Get_NB32 (dev, 0x78 + reg_off); /* Dram Control*/ + + if (mctGet_NVbits(NV_CLKHZAltVidC3)) + DramControl |= 1<<16; + + // FIXME: Add support(skip) for Ax and Cx versions + DramControl |= 5; /* RdPtrInit */ + + + /* Build Dram Config Lo Register Value */ + DramConfigLo |= 1 << 4; /* 75 Ohms ODT */ + if (mctGet_NVbits(NV_MAX_DIMMS) == 8) { + if (pDCTstat->Speed == 3) { + if ((pDCTstat->MAdimms[dct] == 4)) + DramConfigLo |= 1 << 5; /* 50 Ohms ODT */ + } else if (pDCTstat->Speed == 4){ + if ((pDCTstat->MAdimms[dct] != 1)) + DramConfigLo |= 1 << 5; /* 50 Ohms ODT */ + } + } else { + // FIXME: Skip for Ax versions + if ((pDCTstat->MAdimms[dct] == 4)) { + if ( pDCTstat->DimmQRPresent != 0) { + if ((pDCTstat->Speed == 3) || (pDCTstat->Speed == 4)) { + DramConfigLo |= 1 << 5; /* 50 Ohms ODT */ + } + } else if ((pDCTstat->MAdimms[dct] == 4)) { + if (pDCTstat->Speed == 4) { + if ( pDCTstat->DimmQRPresent != 0) { + DramConfigLo |= 1 << 5; /* 50 Ohms ODT */ + } + } + } + } else if ((pDCTstat->MAdimms[dct] == 2)) { + DramConfigLo |= 1 << 5; /* 50 Ohms ODT */ + } + + } + + // FIXME: Skip for Ax versions + /* callback not required - if (!mctParityControl_D()) */ + if (Status & (1 << SB_PARDIMMs)) { + DramConfigLo |= 1 << ParEn; + DramConfigMisc2 |= 1 << ActiveCmdAtRst; + } else { + DramConfigLo &= ~(1 << ParEn); + DramConfigMisc2 &= ~(1 << ActiveCmdAtRst); + } + + if (mctGet_NVbits(NV_BurstLen32)) { + if (!pDCTstat->GangedMode) + DramConfigLo |= 1 << BurstLength32; + } + + if (Status & (1 << SB_128bitmode)) + DramConfigLo |= 1 << Width128; /* 128-bit mode (normal) */ + + word = dct; + dword = X4Dimm; + while (word < 8) { + if (pDCTstat->Dimmx4Present & (1 << word)) + DramConfigLo |= 1 << dword; /* X4Dimm[3:0] */ + word++; + word++; + dword++; + } + + if (!(Status & (1 << SB_Registered))) + DramConfigLo |= 1 << UnBuffDimm; /* Unbufferd DIMMs */ + + if (mctGet_NVbits(NV_ECC_CAP)) + if (Status & (1 << SB_ECCDIMMs)) + if ( mctGet_NVbits(NV_ECC)) + DramConfigLo |= 1 << DimmEcEn; + + + + /* Build Dram Config Hi Register Value */ + dword = pDCTstat->Speed; + DramConfigHi |= dword - 1; /* get MemClk encoding */ + DramConfigHi |= 1 << MemClkFreqVal; + + if (Status & (1 << SB_Registered)) + if ((pDCTstat->Dimmx4Present != 0) && (pDCTstat->Dimmx8Present != 0)) + /* set only if x8 Registered DIMMs in System*/ + DramConfigHi |= 1 << RDqsEn; + + if (mctGet_NVbits(NV_CKE_PDEN)) { + DramConfigHi |= 1 << 15; /* PowerDownEn */ + if (mctGet_NVbits(NV_CKE_CTL)) + /*Chip Select control of CKE*/ + DramConfigHi |= 1 << 16; + } + + /* Control Bank Swizzle */ + if (0) /* call back not needed mctBankSwizzleControl_D()) */ + DramConfigHi &= ~(1 << BankSwizzleMode); + else + DramConfigHi |= 1 << BankSwizzleMode; /* recommended setting (default) */ + + /* Check for Quadrank DIMM presence */ + if ( pDCTstat->DimmQRPresent != 0) { + byte = mctGet_NVbits(NV_4RANKType); + if (byte == 2) + DramConfigHi |= 1 << 17; /* S4 (4-Rank SO-DIMMs) */ + else if (byte == 1) + DramConfigHi |= 1 << 18; /* R4 (4-Rank Registered DIMMs) */ + } + + if (0) /* call back not needed mctOverrideDcqBypMax_D ) */ + val = mctGet_NVbits(NV_BYPMAX); + else + val = 0x0f; // recommended setting (default) + DramConfigHi |= val << 24; + + val = pDCTstat->DIMM2Kpage; + if (pDCTstat->GangedMode != 0) { + if (dct != 0) { + val &= 0x55; + } else { + val &= 0xAA; + } + } + if (val) + val = Tab_2KTfawT_k[pDCTstat->Speed]; + else + val = Tab_1KTfawT_k[pDCTstat->Speed]; + + if (pDCTstat->Speed == 5) + val >>= 1; + + val -= Bias_TfawT; + val <<= 28; + DramConfigHi |= val; /* Tfaw for 1K or 2K paged drams */ + + // FIXME: Skip for Ax versions + DramConfigHi |= 1 << DcqArbBypassEn; + + + /* Build MemClkDis Value from Dram Timing Lo and + Dram Config Misc Registers + 1. We will assume that MemClkDis field has been preset prior to this + point. + 2. We will only set MemClkDis bits if a DIMM is NOT present AND if: + NV_AllMemClks <>0 AND SB_DiagClks ==0 */ + + + /* Dram Timing Low (owns Clock Enable bits) */ + DramTimingLo = Get_NB32(dev, 0x88 + reg_off); + if (mctGet_NVbits(NV_AllMemClks) == 0) { + /* Special Jedec SPD diagnostic bit - "enable all clocks" */ + if (!(pDCTstat->Status & (1<<SB_DiagClks))) { + const u8 *p; + byte = mctGet_NVbits(NV_PACK_TYPE); + if (byte == PT_L1) + p = Tab_L1CLKDis; + else if (byte == PT_M2) + p = Tab_M2CLKDis; + else + p = Tab_S1CLKDis; + + dword = 0; + while(dword < MAX_DIMMS_SUPPORTED) { + val = p[dword]; + print_tx("DramTimingLo: val=", val); + if (!(pDCTstat->DIMMValid & (1<<val))) + /*disable memclk*/ + DramTimingLo |= 1<<(dword+24); + dword++ ; + } + } + } + + print_tx("AutoConfig_D: DramControl: ", DramControl); + print_tx("AutoConfig_D: DramTimingLo: ", DramTimingLo); + print_tx("AutoConfig_D: DramConfigMisc: ", DramConfigMisc); + print_tx("AutoConfig_D: DramConfigMisc2: ", DramConfigMisc2); + print_tx("AutoConfig_D: DramConfigLo: ", DramConfigLo); + print_tx("AutoConfig_D: DramConfigHi: ", DramConfigHi); + + /* Write Values to the registers */ + Set_NB32(dev, 0x78 + reg_off, DramControl); + Set_NB32(dev, 0x88 + reg_off, DramTimingLo); + Set_NB32(dev, 0xA0 + reg_off, DramConfigMisc); + Set_NB32(dev, 0xA8 + reg_off, DramConfigMisc2); + Set_NB32(dev, 0x90 + reg_off, DramConfigLo); + mct_SetDramConfigHi_D(pDCTstat, dct, DramConfigHi); + mct_ForceAutoPrecharge_D(pDCTstat, dct); + mct_EarlyArbEn_D(pMCTstat, pDCTstat); + mctHookAfterAutoCfg(); + +// dump_pci_device(PCI_DEV(0, 0x18+pDCTstat->Node_ID, 2)); + + print_tx("AutoConfig: Status ", pDCTstat->Status); + print_tx("AutoConfig: ErrStatus ", pDCTstat->ErrStatus); + print_tx("AutoConfig: ErrCode ", pDCTstat->ErrCode); + print_t("AutoConfig: Done\n"); +AutoConfig_exit: + return pDCTstat->ErrCode; +} + + +static void SPDSetBanks_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct) +{ + /* Set bank addressing, program Mask values and build a chip-select + * population map. This routine programs PCI 0:24N:2x80 config register + * and PCI 0:24N:2x60,64,68,6C config registers (CS Mask 0-3). + */ + + u8 ChipSel, Rows, Cols, Ranks ,Banks, DevWidth; + u32 BankAddrReg, csMask; + + u32 val; + u32 reg; + u32 dev; + u32 reg_off; + u8 byte; + u16 word; + u32 dword; + u16 smbaddr; + + dev = pDCTstat->dev_dct; + reg_off = 0x100 * dct; + + BankAddrReg = 0; + for (ChipSel = 0; ChipSel < MAX_CS_SUPPORTED; ChipSel+=2) { + byte = ChipSel; + if ((pDCTstat->Status & (1 << SB_64MuxedMode)) && ChipSel >=4) + byte -= 3; + + if (pDCTstat->DIMMValid & (1<<byte)) { + smbaddr = Get_DIMMAddress_D(pDCTstat, (ChipSel + dct)); + + byte = mctRead_SPD(smbaddr, SPD_ROWSZ); + Rows = byte & 0x1f; + + byte = mctRead_SPD(smbaddr, SPD_COLSZ); + Cols = byte & 0x1f; + + Banks = mctRead_SPD(smbaddr, SPD_LBANKS); + + byte = mctRead_SPD(smbaddr, SPD_DEVWIDTH); + DevWidth = byte & 0x7f; /* bits 0-6 = bank 0 width */ + + byte = mctRead_SPD(smbaddr, SPD_DMBANKS); + Ranks = (byte & 7) + 1; + + /* Configure Bank encoding + * Use a 6-bit key into a lookup table. + * Key (index) = CCCBRR, where CCC is the number of + * Columns minus 9,RR is the number of Rows minus 13, + * and B is the number of banks minus 2. + * See "6-bit Bank Addressing Table" at the end of + * this file.*/ + byte = Cols - 9; /* 9 Cols is smallest dev size */ + byte <<= 3; /* make room for row and bank bits*/ + if (Banks == 8) + byte |= 4; + + /* 13 Rows is smallest dev size */ + byte |= Rows - 13; /* CCCBRR internal encode */ + + for (dword=0; dword < 12; dword++) { + if (byte == Tab_BankAddr[dword]) + break; + } + + if (dword < 12) { + + /* bit no. of CS field in address mapping reg.*/ + dword <<= (ChipSel<<1); + BankAddrReg |= dword; + + /* Mask value=(2pow(rows+cols+banks+3)-1)>>8, + or 2pow(rows+cols+banks-5)-1*/ + csMask = 0; + + byte = Rows + Cols; /* cl=rows+cols*/ + if (Banks == 8) + byte -= 2; /* 3 banks - 5 */ + else + byte -= 3; /* 2 banks - 5 */ + /* mask size (64-bit rank only) */ + + if (pDCTstat->Status & (1 << SB_128bitmode)) + byte++; /* double mask size if in 128-bit mode*/ + + csMask |= 1 << byte; + csMask--; + + /*set ChipSelect population indicator even bits*/ + pDCTstat->CSPresent |= (1<<ChipSel); + if (Ranks >= 2) + /*set ChipSelect population indicator odd bits*/ + pDCTstat->CSPresent |= 1 << (ChipSel + 1); + + reg = 0x60+(ChipSel<<1) + reg_off; /*Dram CS Mask Register */ + val = csMask; + val &= 0x1FF83FE0; /* Mask out reserved bits.*/ + Set_NB32(dev, reg, val); + } + } else { + if (pDCTstat->DIMMSPDCSE & (1<<ChipSel)) + pDCTstat->CSTestFail |= (1<<ChipSel); + } /* if DIMMValid*/ + } /* while ChipSel*/ + + SetCSTriState(pMCTstat, pDCTstat, dct); + /* SetCKETriState */ + SetODTTriState(pMCTstat, pDCTstat, dct); + + if ( pDCTstat->Status & 1<<SB_128bitmode) { + SetCSTriState(pMCTstat, pDCTstat, 1); /* force dct1) */ + SetODTTriState(pMCTstat, pDCTstat, 1); /* force dct1) */ + } + word = pDCTstat->CSPresent; + mctGetCS_ExcludeMap(); /* mask out specified chip-selects */ + word ^= pDCTstat->CSPresent; + pDCTstat->CSTestFail |= word; /* enable ODT to disabled DIMMs */ + if (!pDCTstat->CSPresent) + pDCTstat->ErrCode = SC_StopError; + + reg = 0x80 + reg_off; /* Bank Addressing Register */ + Set_NB32(dev, reg, BankAddrReg); + +// dump_pci_device(PCI_DEV(0, 0x18+pDCTstat->Node_ID, 2)); + + print_tx("SPDSetBanks: Status ", pDCTstat->Status); + print_tx("SPDSetBanks: ErrStatus ", pDCTstat->ErrStatus); + print_tx("SPDSetBanks: ErrCode ", pDCTstat->ErrCode); + print_t("SPDSetBanks: Done\n"); +} + + +static void SPDCalcWidth_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat) +{ + /* Per SPDs, check the symmetry of DIMM pairs (DIMM on Channel A + * matching with DIMM on Channel B), the overall DIMM population, + * and determine the width mode: 64-bit, 64-bit muxed, 128-bit. + */ + + u8 i; + u8 smbaddr, smbaddr1; + u8 byte, byte1; + + /* Check Symmetry of Channel A and Channel B DIMMs + (must be matched for 128-bit mode).*/ + for (i=0; i < MAX_DIMMS_SUPPORTED; i += 2) { + if ((pDCTstat->DIMMValid & (1 << i)) && (pDCTstat->DIMMValid & (1<<(i+1)))) { + smbaddr = Get_DIMMAddress_D(pDCTstat, i); + smbaddr1 = Get_DIMMAddress_D(pDCTstat, i+1); + + byte = mctRead_SPD(smbaddr, SPD_ROWSZ) & 0x1f; + byte1 = mctRead_SPD(smbaddr1, SPD_ROWSZ) & 0x1f; + if (byte != byte1) { + pDCTstat->ErrStatus |= (1<<SB_DimmMismatchO); + break; + } + + byte = mctRead_SPD(smbaddr, SPD_COLSZ) & 0x1f; + byte1 = mctRead_SPD(smbaddr1, SPD_COLSZ) & 0x1f; + if (byte != byte1) { + pDCTstat->ErrStatus |= (1<<SB_DimmMismatchO); + break; + } + + byte = mctRead_SPD(smbaddr, SPD_BANKSZ); + byte1 = mctRead_SPD(smbaddr1, SPD_BANKSZ); + if (byte != byte1) { + pDCTstat->ErrStatus |= (1<<SB_DimmMismatchO); + break; + } + + byte = mctRead_SPD(smbaddr, SPD_DEVWIDTH) & 0x7f; + byte1 = mctRead_SPD(smbaddr1, SPD_DEVWIDTH) & 0x7f; + if (byte != byte1) { + pDCTstat->ErrStatus |= (1<<SB_DimmMismatchO); + break; + } + + byte = mctRead_SPD(smbaddr, SPD_DMBANKS) & 7; /* #ranks-1 */ + byte1 = mctRead_SPD(smbaddr1, SPD_DMBANKS) & 7; /* #ranks-1 */ + if (byte != byte1) { + pDCTstat->ErrStatus |= (1<<SB_DimmMismatchO); + break; + } + + } + } + +} + + +static void StitchMemory_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct) +{ + /* Requires that Mask values for each bank be programmed first and that + * the chip-select population indicator is correctly set. + */ + + u8 b = 0; + u32 nxtcsBase, curcsBase; + u8 p, q; + u32 Sizeq, BiggestBank; + u8 _DSpareEn; + + u16 word; + u32 dev; + u32 reg; + u32 reg_off; + u32 val; + + + dev = pDCTstat->dev_dct; + reg_off = 0x100 * dct; + + _DSpareEn = 0; + + /* CS Sparing 1=enabled, 0=disabled */ + if (mctGet_NVbits(NV_CS_SpareCTL) & 1) { + if (MCT_DIMM_SPARE_NO_WARM) { + /* Do no warm-reset DIMM spare */ + if (pMCTstat->GStatus & 1 << GSB_EnDIMMSpareNW) { + word = pDCTstat->CSPresent; + val = bsf(word); + word &= ~(1<<val); + if (word) + /* Make sure at least two chip-selects are available */ + _DSpareEn = 1; + else + pDCTstat->ErrStatus |= 1 << SB_SpareDis; + } + } else { + if (!mctGet_NVbits(NV_DQSTrainCTL)) { /*DQS Training 1=enabled, 0=disabled */ + word = pDCTstat->CSPresent; + val = bsf(word); + word &= ~(1 << val); + if (word) + /* Make sure at least two chip-selects are available */ + _DSpareEn = 1; + else + pDCTstat->ErrStatus |= 1 << SB_SpareDis; + } + } + } + + nxtcsBase = 0; /* Next available cs base ADDR[39:8] */ + for (p=0; p < MAX_DIMMS_SUPPORTED; p++) { + BiggestBank = 0; + for (q = 0; q < MAX_CS_SUPPORTED; q++) { /* from DIMMS to CS */ + if (pDCTstat->CSPresent & (1 << q)) { /* bank present? */ + reg = 0x40 + (q << 2) + reg_off; /* Base[q] reg.*/ + val = Get_NB32(dev, reg); + if (!(val & 3)) { /* (CSEnable|Spare==1)bank is enabled already? */ + reg = 0x60 + (q << 1) + reg_off; /*Mask[q] reg.*/ + val = Get_NB32(dev, reg); + val >>= 19; + val++; + val <<= 19; + Sizeq = val; //never used + if (val > BiggestBank) { + /*Bingo! possibly Map this chip-select next! */ + BiggestBank = val; + b = q; + } + } + } /*if bank present */ + } /* while q */ + if (BiggestBank !=0) { + curcsBase = nxtcsBase; /* curcsBase=nxtcsBase*/ + /* DRAM CS Base b Address Register offset */ + reg = 0x40 + (b << 2) + reg_off; + if (_DSpareEn) { + BiggestBank = 0; + val = 1 << Spare; /* Spare Enable*/ + } else { + val = curcsBase; + val |= 1 << CSEnable; /* Bank Enable */ + } + Set_NB32(dev, reg, val); + if (_DSpareEn) + _DSpareEn = 0; + else + /* let nxtcsBase+=Size[b] */ + nxtcsBase += BiggestBank; + } + + /* bank present but disabled?*/ + if ( pDCTstat->CSTestFail & (1 << p)) { + /* DRAM CS Base b Address Register offset */ + reg = (p << 2) + 0x40 + reg_off; + val = 1 << TestFail; + Set_NB32(dev, reg, val); + } + } + + if (nxtcsBase) { + pDCTstat->DCTSysLimit = nxtcsBase - 1; + mct_AfterStitchMemory(pMCTstat, pDCTstat, dct); + } + +// dump_pci_device(PCI_DEV(0, 0x18+pDCTstat->Node_ID, 2)); + + print_tx("StitchMemory: Status ", pDCTstat->Status); + print_tx("StitchMemory: ErrStatus ", pDCTstat->ErrStatus); + print_tx("StitchMemory: ErrCode ", pDCTstat->ErrCode); + print_t("StitchMemory: Done\n"); +} + + +static u8 Get_Tk_D(u8 k) +{ + return Table_T_k[k]; +} + + +static u8 Get_CLj_D(u8 j) +{ + return Table_CL2_j[j]; +} + +static u8 Get_DefTrc_k_D(u8 k) +{ + return Tab_defTrc_k[k]; +} + + +static u16 Get_40Tk_D(u8 k) +{ + return Tab_40T_k[k]; /* FIXME: k or k<<1 ?*/ +} + + +static u16 Get_Fk_D(u8 k) +{ + return Table_F_k[k]; /* FIXME: k or k<<1 ? */ +} + + +static u8 Dimm_Supports_D(struct DCTStatStruc *pDCTstat, + u8 i, u8 j, u8 k) +{ + u8 Tk, CLj, CL_i; + u8 ret = 0; + + u32 DIMMi; + u8 byte; + u16 word, wordx; + + DIMMi = Get_DIMMAddress_D(pDCTstat, i); + + CLj = Get_CLj_D(j); + + /* check if DIMMi supports CLj */ + CL_i = mctRead_SPD(DIMMi, SPD_CASLAT); + byte = CL_i & CLj; + if (byte) { + /*find out if its CL X, CLX-1, or CLX-2 */ + word = bsr(byte); /* bit position of CLj */ + wordx = bsr(CL_i); /* bit position of CLX of CLi */ + wordx -= word; /* CL number (CL no. = 0,1, 2, or 3) */ + wordx <<= 3; /* 8 bits per SPD byte index */ + /*get T from SPD byte 9, 23, 25*/ + word = (EncodedTSPD >> wordx) & 0xFF; + Tk = Get_Tk_D(k); + byte = mctRead_SPD(DIMMi, word); /* DIMMi speed */ + if (Tk < byte) { + ret = 1; + } else if (byte == 0){ + pDCTstat->ErrStatus |= 1<<SB_NoCycTime; + ret = 1; + } else { + ret = 0; /* DIMM is capable! */ + } + } else { + ret = 1; + } + return ret; +} + + +static u8 DIMMPresence_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat) +{ + /* Check DIMMs present, verify checksum, flag SDRAM type, + * build population indicator bitmaps, and preload bus loading + * of DIMMs into DCTStatStruc. + * MAAload=number of devices on the "A" bus. + * MABload=number of devices on the "B" bus. + * MAAdimms=number of DIMMs on the "A" bus slots. + * MABdimms=number of DIMMs on the "B" bus slots. + * DATAAload=number of ranks on the "A" bus slots. + * DATABload=number of ranks on the "B" bus slots. + */ + + u16 i, j; + u8 smbaddr, Index; + u16 Checksum; + u8 SPDCtrl; + u16 RegDIMMPresent, MaxDimms; + u8 devwidth; + u16 DimmSlots; + u8 byte = 0, bytex; + u16 word; + + /* preload data structure with addrs */ + mctGet_DIMMAddr(pDCTstat, pDCTstat->Node_ID); + + DimmSlots = MaxDimms = mctGet_NVbits(NV_MAX_DIMMS); + + SPDCtrl = mctGet_NVbits(NV_SPDCHK_RESTRT); + + RegDIMMPresent = 0; + pDCTstat->DimmQRPresent = 0; + + for (i = 0; i< MAX_DIMMS_SUPPORTED; i++) { + if (i >= MaxDimms) + break; + + if ((pDCTstat->DimmQRPresent & (1 << i)) || (i < DimmSlots)) { + print_tx("\t DIMMPresence: i=", i); + smbaddr = Get_DIMMAddress_D(pDCTstat, i); + print_tx("\t DIMMPresence: smbaddr=", smbaddr); + if (smbaddr) { + Checksum = 0; + for (Index=0; Index < 64; Index++){ + int status; + status = mctRead_SPD(smbaddr, Index); + if (status < 0) + break; + byte = status & 0xFF; + if (Index < 63) + Checksum += byte; + } + + if (Index == 64) { + pDCTstat->DIMMPresent |= 1 << i; + if ((Checksum & 0xFF) == byte) { + byte = mctRead_SPD(smbaddr, SPD_TYPE); + if (byte == JED_DDR2SDRAM) { + /*Dimm is 'Present'*/ + pDCTstat->DIMMValid |= 1 << i; + } + } else { + pDCTstat->DIMMSPDCSE = 1 << i; + if (SPDCtrl == 0) { + pDCTstat->ErrStatus |= 1 << SB_DIMMChkSum; + pDCTstat->ErrCode = SC_StopError; + } else { + /*if NV_SPDCHK_RESTRT is set to 1, ignore faulty SPD checksum*/ + pDCTstat->ErrStatus |= 1<<SB_DIMMChkSum; + byte = mctRead_SPD(smbaddr, SPD_TYPE); + if (byte == JED_DDR2SDRAM) + pDCTstat->DIMMValid |= 1 << i; + } + } + /* Check module type */ + byte = mctRead_SPD(smbaddr, SPD_DIMMTYPE); + if (byte & JED_REGADCMSK) + RegDIMMPresent |= 1 << i; + /* Check ECC capable */ + byte = mctRead_SPD(smbaddr, SPD_EDCTYPE); + if (byte & JED_ECC) { + /* DIMM is ECC capable */ + pDCTstat->DimmECCPresent |= 1 << i; + } + if (byte & JED_ADRCPAR) { + /* DIMM is ECC capable */ + pDCTstat->DimmPARPresent |= 1 << i; + } + /* Check if x4 device */ + devwidth = mctRead_SPD(smbaddr, SPD_DEVWIDTH) & 0xFE; + if (devwidth == 4) { + /* DIMM is made with x4 or x16 drams */ + pDCTstat->Dimmx4Present |= 1 << i; + } else if (devwidth == 8) { + pDCTstat->Dimmx8Present |= 1 << i; + } else if (devwidth == 16) { + pDCTstat->Dimmx16Present |= 1 << i; + } + /* check page size */ + byte = mctRead_SPD(smbaddr, SPD_COLSZ); + byte &= 0x0F; + word = 1 << byte; + word >>= 3; + word *= devwidth; /* (((2^COLBITS) / 8) * ORG) / 2048 */ + word >>= 11; + if (word) + pDCTstat->DIMM2Kpage |= 1 << i; + + /*Check if SPD diag bit 'analysis probe installed' is set */ + byte = mctRead_SPD(smbaddr, SPD_ATTRIB); + if ( byte & JED_PROBEMSK ) + pDCTstat->Status |= 1<<SB_DiagClks; + + byte = mctRead_SPD(smbaddr, SPD_DMBANKS); + if (!(byte & (1<< SPDPLBit))) + pDCTstat->DimmPlPresent |= 1 << i; + byte &= 7; + byte++; /* ranks */ + if (byte > 2) { + /* if any DIMMs are QR, we have to make two passes through DIMMs*/ + if ( pDCTstat->DimmQRPresent == 0) { + MaxDimms <<= 1; + } + if (i < DimmSlots) { + pDCTstat->DimmQRPresent |= (1 << i) | (1 << (i+4)); + } + byte = 2; /* upper two ranks of QR DIMM will be counted on another DIMM number iteration*/ + } else if (byte == 2) { + pDCTstat->DimmDRPresent |= 1 << i; + } + bytex = devwidth; + if (devwidth == 16) + bytex = 4; + else if (devwidth == 4) + bytex=16; + + if (byte == 2) + bytex <<= 1; /*double Addr bus load value for dual rank DIMMs*/ + + j = i & (1<<0); + pDCTstat->DATAload[j] += byte; /*number of ranks on DATA bus*/ + pDCTstat->MAload[j] += bytex; /*number of devices on CMD/ADDR bus*/ + pDCTstat->MAdimms[j]++; /*number of DIMMs on A bus */ + /*check for DRAM package Year <= 06*/ + byte = mctRead_SPD(smbaddr, SPD_MANDATEYR); + if (byte < MYEAR06) { + /*Year < 06 and hence Week < 24 of 06 */ + pDCTstat->DimmYr06 |= 1 << i; + pDCTstat->DimmWk2406 |= 1 << i; + } else if (byte == MYEAR06) { + /*Year = 06, check if Week <= 24 */ + pDCTstat->DimmYr06 |= 1 << i; + byte = mctRead_SPD(smbaddr, SPD_MANDATEWK); + if (byte <= MWEEK24) + pDCTstat->DimmWk2406 |= 1 << i; + } + } + } + } + } + print_tx("\t DIMMPresence: DIMMValid=", pDCTstat->DIMMValid); + print_tx("\t DIMMPresence: DIMMPresent=", pDCTstat->DIMMPresent); + print_tx("\t DIMMPresence: RegDIMMPresent=", RegDIMMPresent); + print_tx("\t DIMMPresence: DimmECCPresent=", pDCTstat->DimmECCPresent); + print_tx("\t DIMMPresence: DimmPARPresent=", pDCTstat->DimmPARPresent); + print_tx("\t DIMMPresence: Dimmx4Present=", pDCTstat->Dimmx4Present); + print_tx("\t DIMMPresence: Dimmx8Present=", pDCTstat->Dimmx8Present); + print_tx("\t DIMMPresence: Dimmx16Present=", pDCTstat->Dimmx16Present); + print_tx("\t DIMMPresence: DimmPlPresent=", pDCTstat->DimmPlPresent); + print_tx("\t DIMMPresence: DimmDRPresent=", pDCTstat->DimmDRPresent); + print_tx("\t DIMMPresence: DimmQRPresent=", pDCTstat->DimmQRPresent); + print_tx("\t DIMMPresence: DATAload[0]=", pDCTstat->DATAload[0]); + print_tx("\t DIMMPresence: MAload[0]=", pDCTstat->MAload[0]); + print_tx("\t DIMMPresence: MAdimms[0]=", pDCTstat->MAdimms[0]); + print_tx("\t DIMMPresence: DATAload[1]=", pDCTstat->DATAload[1]); + print_tx("\t DIMMPresence: MAload[1]=", pDCTstat->MAload[1]); + print_tx("\t DIMMPresence: MAdimms[1]=", pDCTstat->MAdimms[1]); + + if (pDCTstat->DIMMValid != 0) { /* If any DIMMs are present...*/ + if (RegDIMMPresent != 0) { + if ((RegDIMMPresent ^ pDCTstat->DIMMValid) !=0) { + /* module type DIMM mismatch (reg'ed, unbuffered) */ + pDCTstat->ErrStatus |= 1<<SB_DimmMismatchM; + pDCTstat->ErrCode = SC_StopError; + } else{ + /* all DIMMs are registered */ + pDCTstat->Status |= 1<<SB_Registered; + } + } + if (pDCTstat->DimmECCPresent != 0) { + if ((pDCTstat->DimmECCPresent ^ pDCTstat->DIMMValid )== 0) { + /* all DIMMs are ECC capable */ + pDCTstat->Status |= 1<<SB_ECCDIMMs; + } + } + if (pDCTstat->DimmPARPresent != 0) { + if ((pDCTstat->DimmPARPresent ^ pDCTstat->DIMMValid) == 0) { + /*all DIMMs are Parity capable */ + pDCTstat->Status |= 1<<SB_PARDIMMs; + } + } + } else { + /* no DIMMs present or no DIMMs that qualified. */ + pDCTstat->ErrStatus |= 1<<SB_NoDimms; + pDCTstat->ErrCode = SC_StopError; + } + + print_tx("\t DIMMPresence: Status ", pDCTstat->Status); + print_tx("\t DIMMPresence: ErrStatus ", pDCTstat->ErrStatus); + print_tx("\t DIMMPresence: ErrCode ", pDCTstat->ErrCode); + print_t("\t DIMMPresence: Done\n"); + + mctHookAfterDIMMpre(); + + return pDCTstat->ErrCode; +} + + +static u8 Sys_Capability_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, int j, int k) +{ + /* Determine if system is capable of operating at given input + * parameters for CL, and T. There are three components to + * determining "maximum frequency" in AUTO mode: SPD component, + * Bus load component, and "Preset" max frequency component. + * This procedure is used to help find the SPD component and relies + * on pre-determination of the bus load component and the Preset + * components. The generalized algorithm for finding maximum + * frequency is structured this way so as to optimize for CAS + * latency (which might get better as a result of reduced frequency). + * See "Global relationship between index values and item values" + * for definition of CAS latency index (j) and Frequency index (k). + */ + u8 freqOK, ClOK; + u8 ret = 0; + + if (Get_Fk_D(k) > pDCTstat->PresetmaxFreq) + freqOK = 0; + else + freqOK = 1; + + /* compare proposed CAS latency with AMD Si capabilities */ + if ((j < J_MIN) || (j > J_MAX)) + ClOK = 0; + else + ClOK = 1; + + if (freqOK && ClOK) + ret = 1; + + return ret; +} + + +static u8 Get_DIMMAddress_D(struct DCTStatStruc *pDCTstat, u8 i) +{ + u8 *p; + + p = pDCTstat->DIMMAddr; + //mct_BeforeGetDIMMAddress(); + return p[i]; +} + + +static void mct_initDCT(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat) +{ + u32 val; + u8 err_code; + + /* Config. DCT0 for Ganged or unganged mode */ + print_t("\tmct_initDCT: DCTInit_D 0\n"); + DCTInit_D(pMCTstat, pDCTstat, 0); + if (pDCTstat->ErrCode == SC_FatalErr) { + // Do nothing goto exitDCTInit; /* any fatal errors? */ + } else { + /* Configure DCT1 if unganged and enabled*/ + if (!pDCTstat->GangedMode) { + if ( pDCTstat->DIMMValidDCT[1] > 0) { + print_t("\tmct_initDCT: DCTInit_D 1\n"); + err_code = pDCTstat->ErrCode; /* save DCT0 errors */ + pDCTstat->ErrCode = 0; + DCTInit_D(pMCTstat, pDCTstat, 1); + if (pDCTstat->ErrCode == 2) /* DCT1 is not Running */ + pDCTstat->ErrCode = err_code; /* Using DCT0 Error code to update pDCTstat.ErrCode */ + } else { + val = 1 << DisDramInterface; + Set_NB32(pDCTstat->dev_dct, 0x100 + 0x94, val); + } + } + } +// exitDCTInit: +} + + +static void mct_DramInit(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct) +{ + u32 val; + + mct_BeforeDramInit_Prod_D(pMCTstat, pDCTstat); + // FIXME: for rev A: mct_BeforeDramInit_D(pDCTstat, dct); + + /* Disable auto refresh before Dram init when in ganged mode */ + if (pDCTstat->GangedMode) { + val = Get_NB32(pDCTstat->dev_dct, 0x8C + (0x100 * dct)); + val |= 1 << DisAutoRefresh; + Set_NB32(pDCTstat->dev_dct, 0x8C + (0x100 * dct), val); + } + + mct_DramInit_Hw_D(pMCTstat, pDCTstat, dct); + + /* Re-enable auto refresh after Dram init when in ganged mode + * to ensure both DCTs are in sync + */ + + if (pDCTstat->GangedMode) { + do { + val = Get_NB32(pDCTstat->dev_dct, 0x90 + (0x100 * dct)); + } while (!(val & (1 << InitDram))); + + WaitRoutine_D(50); + + val = Get_NB32(pDCTstat->dev_dct, 0x8C + (0x100 * dct)); + val &= ~(1 << DisAutoRefresh); + val |= 1 << DisAutoRefresh; + val &= ~(1 << DisAutoRefresh); + } +} + + +static u8 mct_setMode(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat) +{ + u8 byte; + u8 bytex; + u32 val; + u32 reg; + + byte = bytex = pDCTstat->DIMMValid; + bytex &= 0x55; /* CHA DIMM pop */ + pDCTstat->DIMMValidDCT[0] = bytex; + + byte &= 0xAA; /* CHB DIMM popa */ + byte >>= 1; + pDCTstat->DIMMValidDCT[1] = byte; + + if (byte != bytex) { + pDCTstat->ErrStatus &= ~(1 << SB_DimmMismatchO); + } else { + if ( mctGet_NVbits(NV_Unganged) ) + pDCTstat->ErrStatus |= (1 << SB_DimmMismatchO); + + if (!(pDCTstat->ErrStatus & (1 << SB_DimmMismatchO))) { + pDCTstat->GangedMode = 1; + /* valid 128-bit mode population. */ + pDCTstat->Status |= 1 << SB_128bitmode; + reg = 0x110; + val = Get_NB32(pDCTstat->dev_dct, reg); + val |= 1 << DctGangEn; + Set_NB32(pDCTstat->dev_dct, reg, val); + print_tx("setMode: DRAM Controller Select Low Register = ", val); + } + } + return pDCTstat->ErrCode; +} + + +u32 Get_NB32(u32 dev, u32 reg) +{ + u32 addr; + + addr = (dev>>4) | (reg & 0xFF) | ((reg & 0xf00)<<16); + outl((1<<31) | (addr & ~3), 0xcf8); + + return inl(0xcfc); +} + + +void Set_NB32(u32 dev, u32 reg, u32 val) +{ + u32 addr; + + addr = (dev>>4) | (reg & 0xFF) | ((reg & 0xf00)<<16); + outl((1<<31) | (addr & ~3), 0xcf8); + outl(val, 0xcfc); +} + + +u32 Get_NB32_index(u32 dev, u32 index_reg, u32 index) +{ + u32 dword; + + Set_NB32(dev, index_reg, index); + dword = Get_NB32(dev, index_reg+0x4); + + return dword; +} + +void Set_NB32_index(u32 dev, u32 index_reg, u32 index, u32 data) +{ + Set_NB32(dev, index_reg, index); + Set_NB32(dev, index_reg + 0x4, data); +} + + +u32 Get_NB32_index_wait(u32 dev, u32 index_reg, u32 index) +{ + + u32 dword; + + + index &= ~(1 << DctAccessWrite); + Set_NB32(dev, index_reg, index); + do { + dword = Get_NB32(dev, index_reg); + } while (!(dword & (1 << DctAccessDone))); + dword = Get_NB32(dev, index_reg + 0x4); + + return dword; +} + + +void Set_NB32_index_wait(u32 dev, u32 index_reg, u32 index, u32 data) +{ + u32 dword; + + + Set_NB32(dev, index_reg + 0x4, data); + index |= (1 << DctAccessWrite); + Set_NB32(dev, index_reg, index); + do { + dword = Get_NB32(dev, index_reg); + } while (!(dword & (1 << DctAccessDone))); + +} + + +static u8 mct_PlatformSpec(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct) +{ + /* Get platform specific config/timing values from the interface layer + * and program them into DCT. + */ + + u32 dev = pDCTstat->dev_dct; + u32 index_reg; + u8 i, i_start, i_end; + + if (pDCTstat->GangedMode) { + SyncSetting(pDCTstat); + i_start = 0; + i_end = 2; + } else { + i_start = dct; + i_end = dct + 1; + } + for (i=i_start; i<i_end; i++) { + index_reg = 0x98 + (i * 0x100); + Set_NB32_index_wait(dev, index_reg, 0x00, pDCTstat->CH_ODC_CTL[i]); /* Channel A Output Driver Compensation Control */ + Set_NB32_index_wait(dev, index_reg, 0x04, pDCTstat->CH_ADDR_TMG[i]); /* Channel A Output Driver Compensation Control */ + } + + return pDCTstat->ErrCode; + +} + + +static void mct_SyncDCTsReady(struct DCTStatStruc *pDCTstat) +{ + u32 dev; + u32 val; + + if (pDCTstat->NodePresent) { + print_tx("mct_SyncDCTsReady: Node ", pDCTstat->Node_ID); + dev = pDCTstat->dev_dct; + + if ((pDCTstat->DIMMValidDCT[0] ) || (pDCTstat->DIMMValidDCT[1])) { /* This Node has dram */ + do { + val = Get_NB32(dev, 0x110); + } while (!(val & (1 << DramEnabled))); + print_t("mct_SyncDCTsReady: DramEnabled\n"); + } + } /* Node is present */ +} + + +static void mct_AfterGetCLT(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct) +{ + if (!pDCTstat->GangedMode) { + if (dct == 0 ) { + pDCTstat->DIMMValid = pDCTstat->DIMMValidDCT[dct]; + if (pDCTstat->DIMMValidDCT[dct] == 0) + pDCTstat->ErrCode = SC_StopError; + } else { + pDCTstat->CSPresent = 0; + pDCTstat->CSTestFail = 0; + pDCTstat->DIMMValid = pDCTstat->DIMMValidDCT[dct]; + if (pDCTstat->DIMMValidDCT[dct] == 0) + pDCTstat->ErrCode = SC_StopError; + } + } +} + +static u8 mct_SPDCalcWidth(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct) +{ + u8 ret; + + if ( dct == 0) { + SPDCalcWidth_D(pMCTstat, pDCTstat); + ret = mct_setMode(pMCTstat, pDCTstat); + } else { + ret = pDCTstat->ErrCode; + } + + print_tx("SPDCalcWidth: Status ", pDCTstat->Status); + print_tx("SPDCalcWidth: ErrStatus ", pDCTstat->ErrStatus); + print_tx("SPDCalcWidth: ErrCode ", pDCTstat->ErrCode); + print_t("SPDCalcWidth: Done\n"); + + return ret; +} + + +static void mct_AfterStitchMemory(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct) +{ + u32 val; + u32 dword; + u32 dev; + u32 reg; + u8 _MemHoleRemap; + u32 DramHoleBase; + + _MemHoleRemap = mctGet_NVbits(NV_MemHole); + DramHoleBase = mctGet_NVbits(NV_BottomIO); + DramHoleBase <<= 8; + /* Increase hole size so;[31:24]to[31:16] + * it has granularity of 128MB shl eax,8 + * Set 'effective' bottom IOmov DramHoleBase,eax + */ + pMCTstat->HoleBase = (DramHoleBase & 0xFFFFF800) << 8; + + /* In unganged mode, we must add DCT0 and DCT1 to DCTSysLimit */ + if (!pDCTstat->GangedMode) { + dev = pDCTstat->dev_dct; + pDCTstat->NodeSysLimit += pDCTstat->DCTSysLimit; + /* if DCT0 and DCT1 exist both, set DctSelBaseAddr[47:27] */ + if (dct == 0) { + if (pDCTstat->DIMMValidDCT[1] > 0) { + dword = pDCTstat->DCTSysLimit + 1; + dword += pDCTstat->NodeSysBase; + dword >>= 8; /* scale [39:8] to [47:27],and to F2x110[31:11] */ + if ((dword >= DramHoleBase) && _MemHoleRemap) { + pMCTstat->HoleBase = (DramHoleBase & 0xFFFFF800) << 8; + val = pMCTstat->HoleBase; + val >>= 16; + val &= ~(0xFF); + val |= (((~val) & 0xFF) + 1); + val <<= 8; + dword += val; + } + reg = 0x110; + val = Get_NB32(dev, reg); + val &= 0x7F; + val |= dword; + val |= 3; /* Set F2x110[DctSelHiRngEn], F2x110[DctSelHi] */ + Set_NB32(dev, reg, val); + print_tx("AfterStitch DCT0 and DCT1: DRAM Controller Select Low Register = ", val); + + reg = 0x114; + val = dword; + Set_NB32(dev, reg, val); + } + } else { + /* Program the DctSelBaseAddr value to 0 + if DCT 0 is disabled */ + if (pDCTstat->DIMMValidDCT[0] == 0) { + dword = pDCTstat->NodeSysBase; + dword >>= 8; + if (dword >= DramHoleBase) { + pMCTstat->HoleBase = (DramHoleBase & 0xFFFFF800) << 8; + val = pMCTstat->HoleBase; + val >>= 8; + val &= ~(0xFFFF); + val |= (((~val) & 0xFFFF) + 1); + dword += val; + } + reg = 0x114; + val = dword; + Set_NB32(dev, reg, val); + + reg = 0x110; + val |= 3; /* Set F2x110[DctSelHiRngEn], F2x110[DctSelHi] */ + Set_NB32(dev, reg, val); + print_tx("AfterStitch DCT1 only: DRAM Controller Select Low Register = ", val); + } + } + } else { + pDCTstat->NodeSysLimit += pDCTstat->DCTSysLimit; + } + print_tx("AfterStitch pDCTstat->NodeSysBase = ", pDCTstat->NodeSysBase); + print_tx("mct_AfterStitchMemory: pDCTstat->NodeSysLimit ", pDCTstat->NodeSysLimit); +} + + +static u8 mct_DIMMPresence(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct) +{ + u8 ret; + + if ( dct == 0) + ret = DIMMPresence_D(pMCTstat, pDCTstat); + else + ret = pDCTstat->ErrCode; + + return ret; +} + + +/* mct_BeforeGetDIMMAddress inline in C */ + + +static void mct_OtherTiming(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstatA) +{ + u8 Node; + + for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) { + struct DCTStatStruc *pDCTstat; + pDCTstat = pDCTstatA + Node; + if (pDCTstat->NodePresent) { + if (pDCTstat->DIMMValidDCT[0]) { + pDCTstat->DIMMValid = pDCTstat->DIMMValidDCT[0]; + Set_OtherTiming(pMCTstat, pDCTstat, 0); + } + if (pDCTstat->DIMMValidDCT[1] && !pDCTstat->GangedMode ) { + pDCTstat->DIMMValid = pDCTstat->DIMMValidDCT[1]; + Set_OtherTiming(pMCTstat, pDCTstat, 1); + } + } /* Node is present*/ + } /* while Node */ +} + + +static void Set_OtherTiming(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct) +{ + u32 reg; + u32 reg_off = 0x100 * dct; + u32 val; + u32 dword; + u32 dev = pDCTstat->dev_dct; + + Get_Trdrd(pMCTstat, pDCTstat, dct); + Get_Twrwr(pMCTstat, pDCTstat, dct); + Get_Twrrd(pMCTstat, pDCTstat, dct); + Get_TrwtTO(pMCTstat, pDCTstat, dct); + Get_TrwtWB(pMCTstat, pDCTstat); + + reg = 0x8C + reg_off; /* Dram Timing Hi */ + val = Get_NB32(dev, reg); + val &= 0xffff0300; + dword = pDCTstat->TrwtTO; //0x07 + val |= dword << 4; + dword = pDCTstat->Twrrd; //0x03 + val |= dword << 10; + dword = pDCTstat->Twrwr; //0x03 + val |= dword << 12; + dword = pDCTstat->Trdrd; //0x03 + val |= dword << 14; + dword = pDCTstat->TrwtWB; //0x07 + val |= dword; + val = OtherTiming_A_D(pDCTstat, val); + Set_NB32(dev, reg, val); + +} + + +static void Get_Trdrd(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct) +{ + u8 Trdrd; + u8 byte; + u32 dword; + u32 val; + u32 index_reg = 0x98 + 0x100 * dct; + u32 dev = pDCTstat->dev_dct; + + if ((pDCTstat->Dimmx4Present != 0) && (pDCTstat->Dimmx8Present != 0)) { + /* mixed (x4 or x8) DIMM types + the largest DqsRcvEnGrossDelay of any DIMM minus the DqsRcvEnGrossDelay + of any other DIMM is equal to the Critical Gross Delay Difference (CGDD) for Trdrd.*/ + byte = Get_DqsRcvEnGross_Diff(pDCTstat, dev, index_reg); + if (byte == 0) + Trdrd = 1; + else + Trdrd = 2; + + } else { + /* + Trdrd with non-mixed DIMM types + RdDqsTime are the same for all DIMMs and DqsRcvEn difference between + any two DIMMs is less than half of a MEMCLK, BIOS should program Trdrd to 0000b, + else BIOS should program Trdrd to 0001b. + + RdDqsTime are the same for all DIMMs + DDR400~DDR667 only use one set register + DDR800 have two set register for DIMM0 and DIMM1 */ + Trdrd = 1; + if (pDCTstat->Speed > 3) { + /* DIMM0+DIMM1 exist */ //NOTE it should be 5 + val = bsf(pDCTstat->DIMMValid); + dword = bsr(pDCTstat->DIMMValid); + if (dword != val && dword != 0) { + /* DCT Read DQS Timing Control - DIMM0 - Low */ + dword = Get_NB32_index_wait(dev, index_reg, 0x05); + /* DCT Read DQS Timing Control - DIMM1 - Low */ + val = Get_NB32_index_wait(dev, index_reg, 0x105); + if (val != dword) + goto Trdrd_1; + + /* DCT Read DQS Timing Control - DIMM0 - High */ + dword = Get_NB32_index_wait(dev, index_reg, 0x06); + /* DCT Read DQS Timing Control - DIMM1 - High */ + val = Get_NB32_index_wait(dev, index_reg, 0x106); + if (val != dword) + goto Trdrd_1; + } + } + + /* DqsRcvEn difference between any two DIMMs is + less than half of a MEMCLK */ + /* DqsRcvEn byte 1,0*/ + if (Check_DqsRcvEn_Diff(pDCTstat, dct, dev, index_reg, 0x10)) + goto Trdrd_1; + /* DqsRcvEn byte 3,2*/ + if (Check_DqsRcvEn_Diff(pDCTstat, dct, dev, index_reg, 0x11)) + goto Trdrd_1; + /* DqsRcvEn byte 5,4*/ + if (Check_DqsRcvEn_Diff(pDCTstat, dct, dev, index_reg, 0x20)) + goto Trdrd_1; + /* DqsRcvEn byte 7,6*/ + if (Check_DqsRcvEn_Diff(pDCTstat, dct, dev, index_reg, 0x21)) + goto Trdrd_1; + /* DqsRcvEn ECC*/ + if (Check_DqsRcvEn_Diff(pDCTstat, dct, dev, index_reg, 0x12)) + goto Trdrd_1; + Trdrd = 0; + Trdrd_1: + ; + } + pDCTstat->Trdrd = Trdrd; + +} + + +static void Get_Twrwr(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct) +{ + u8 Twrwr = 0; + u32 index_reg = 0x98 + 0x100 * dct; + u32 dev = pDCTstat->dev_dct; + u32 val; + u32 dword; + + /* WrDatGrossDlyByte only use one set register when DDR400~DDR667 + DDR800 have two set register for DIMM0 and DIMM1 */ + if (pDCTstat->Speed > 3) { + val = bsf(pDCTstat->DIMMValid); + dword = bsr(pDCTstat->DIMMValid); + if (dword != val && dword != 0) { + /*the largest WrDatGrossDlyByte of any DIMM minus the + WrDatGrossDlyByte of any other DIMM is equal to CGDD */ + val = Get_WrDatGross_Diff(pDCTstat, dct, dev, index_reg); + } + if (val == 0) + Twrwr = 2; + else + Twrwr = 3; + } + pDCTstat->Twrwr = Twrwr; +} + + +static void Get_Twrrd(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct) +{ + u8 byte, bytex; + u32 index_reg = 0x98 + 0x100 * dct; + u32 dev = pDCTstat->dev_dct; + + /* On any given byte lane, the largest WrDatGrossDlyByte delay of + any DIMM minus the DqsRcvEnGrossDelay delay of any other DIMM is + equal to the Critical Gross Delay Difference (CGDD) for Twrrd.*/ + pDCTstat->Twrrd = 0; + Get_DqsRcvEnGross_Diff(pDCTstat, dev, index_reg); + Get_WrDatGross_Diff(pDCTstat, dct, dev, index_reg); + bytex = pDCTstat->DqsRcvEnGrossL; + byte = pDCTstat->WrDatGrossH; + if (byte > bytex) { + byte -= bytex; + if (byte == 1) + bytex = 1; + else + bytex = 2; + } else { + bytex = 0; + } + pDCTstat->Twrrd = bytex; +} + + +static void Get_TrwtTO(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct) +{ + u8 byte, bytex; + u32 index_reg = 0x98 + 0x100 * dct; + u32 dev = pDCTstat->dev_dct; + + /* On any given byte lane, the largest WrDatGrossDlyByte delay of + any DIMM minus the DqsRcvEnGrossDelay delay of any other DIMM is + equal to the Critical Gross Delay Difference (CGDD) for TrwtTO. */ + Get_DqsRcvEnGross_Diff(pDCTstat, dev, index_reg); + Get_WrDatGross_Diff(pDCTstat, dct, dev, index_reg); + bytex = pDCTstat->DqsRcvEnGrossL; + byte = pDCTstat->WrDatGrossH; + if (bytex > byte) { + bytex -= byte; + if ((bytex == 1) || (bytex == 2)) + bytex = 3; + else + bytex = 4; + } else { + byte -= bytex; + if ((byte == 0) || (byte == 1)) + bytex = 2; + else + bytex = 1; + } + + pDCTstat->TrwtTO = bytex; +} + + +static void Get_TrwtWB(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat) +{ + /* TrwtWB ensures read-to-write data-bus turnaround. + This value should be one more than the programmed TrwtTO.*/ + pDCTstat->TrwtWB = pDCTstat->TrwtTO + 1; +} + + +static u8 Check_DqsRcvEn_Diff(struct DCTStatStruc *pDCTstat, + u8 dct, u32 dev, u32 index_reg, + u32 index) +{ + u8 Smallest_0, Largest_0, Smallest_1, Largest_1; + u8 i; + u32 val; + u8 byte; + + Smallest_0 = 0xFF; + Smallest_1 = 0xFF; + Largest_0 = 0; + Largest_1 = 0; + + for (i=0; i < 8; i+=2) { + if ( pDCTstat->DIMMValid & (1 << i)) { + val = Get_NB32_index_wait(dev, index_reg, index); + byte = val & 0xFF; + if (byte < Smallest_0) + Smallest_0 = byte; + if (byte > Largest_0) + Largest_0 = byte; + byte = (val >> 16) & 0xFF; + if (byte < Smallest_1) + Smallest_1 = byte; + if (byte > Largest_1) + Largest_1 = byte; + } + index += 3; + } /* while ++i */ + + /* check if total DqsRcvEn delay difference between any + two DIMMs is less than half of a MEMCLK */ + if ((Largest_0 - Smallest_0) > 31) + return 1; + if ((Largest_1 - Smallest_1) > 31) + return 1; + return 0; +} + + +static u8 Get_DqsRcvEnGross_Diff(struct DCTStatStruc *pDCTstat, + u32 dev, u32 index_reg) +{ + u8 Smallest, Largest; + u32 val; + u8 byte, bytex; + + /* The largest DqsRcvEnGrossDelay of any DIMM minus the + DqsRcvEnGrossDelay of any other DIMM is equal to the Critical + Gross Delay Difference (CGDD) */ + /* DqsRcvEn byte 1,0 */ + val = Get_DqsRcvEnGross_MaxMin(pDCTstat, dev, index_reg, 0x10); + Largest = val & 0xFF; + Smallest = (val >> 8) & 0xFF; + + /* DqsRcvEn byte 3,2 */ + val = Get_DqsRcvEnGross_MaxMin(pDCTstat, dev, index_reg, 0x11); + byte = val & 0xFF; + bytex = (val >> 8) & 0xFF; + if (bytex < Smallest) + Smallest = bytex; + if (byte > Largest) + Largest = byte; + + /* DqsRcvEn byte 5,4 */ + val = Get_DqsRcvEnGross_MaxMin(pDCTstat, dev, index_reg, 0x20); + byte = val & 0xFF; + bytex = (val >> 8) & 0xFF; + if (bytex < Smallest) + Smallest = bytex; + if (byte > Largest) + Largest = byte; + + /* DqsRcvEn byte 7,6 */ + val = Get_DqsRcvEnGross_MaxMin(pDCTstat, dev, index_reg, 0x21); + byte = val & 0xFF; + bytex = (val >> 8) & 0xFF; + if (bytex < Smallest) + Smallest = bytex; + if (byte > Largest) + Largest = byte; + + if (pDCTstat->DimmECCPresent> 0) { + /*DqsRcvEn Ecc */ + val = Get_DqsRcvEnGross_MaxMin(pDCTstat, dev, index_reg, 0x12); + byte = val & 0xFF; + bytex = (val >> 8) & 0xFF; + if (bytex < Smallest) + Smallest = bytex; + if (byte > Largest) + Largest = byte; + } + + pDCTstat->DqsRcvEnGrossL = Largest; + return Largest - Smallest; +} + + +static u8 Get_WrDatGross_Diff(struct DCTStatStruc *pDCTstat, + u8 dct, u32 dev, u32 index_reg) +{ + u8 Smallest, Largest; + u32 val; + u8 byte, bytex; + + /* The largest WrDatGrossDlyByte of any DIMM minus the + WrDatGrossDlyByte of any other DIMM is equal to CGDD */ + val = Get_WrDatGross_MaxMin(pDCTstat, dct, dev, index_reg, 0x01); /* WrDatGrossDlyByte byte 0,1,2,3 for DIMM0 */ + Largest = val & 0xFF; + Smallest = (val >> 8) & 0xFF; + val = Get_WrDatGross_MaxMin(pDCTstat, dct, dev, index_reg, 0x101); /* WrDatGrossDlyByte byte 0,1,2,3 for DIMM1 */ + byte = val & 0xFF; + bytex = (val >> 8) & 0xFF; + if (bytex < Smallest) + Smallest = bytex; + if (byte > Largest) + Largest = byte; + + // FIXME: Add Cx support. + + pDCTstat->WrDatGrossH = Largest; + return Largest - Smallest; +} + +static u16 Get_DqsRcvEnGross_MaxMin(struct DCTStatStruc *pDCTstat, + u32 dev, u32 index_reg, + u32 index) +{ + u8 Smallest, Largest; + u8 i; + u8 byte; + u32 val; + u16 word; + + Smallest = 7; + Largest = 0; + + for (i=0; i < 8; i+=2) { + if ( pDCTstat->DIMMValid & (1 << i)) { + val = Get_NB32_index_wait(dev, index_reg, index); + val &= 0x00E000E0; + byte = (val >> 5) & 0xFF; + if (byte < Smallest) + Smallest = byte; + if (byte > Largest) + Largest = byte; + byte = (val >> (16 + 5)) & 0xFF; + if (byte < Smallest) + Smallest = byte; + if (byte > Largest) + Largest = byte; + } + index += 3; + } /* while ++i */ + + word = Smallest; + word <<= 8; + word |= Largest; + + return word; +} + +static u16 Get_WrDatGross_MaxMin(struct DCTStatStruc *pDCTstat, + u8 dct, u32 dev, u32 index_reg, + u32 index) +{ + u8 Smallest, Largest; + u8 i, j; + u32 val; + u8 byte; + u16 word; + + Smallest = 3; + Largest = 0; + for (i=0; i < 2; i++) { + val = Get_NB32_index_wait(dev, index_reg, index); + val &= 0x60606060; + val >>= 5; + for (j=0; j < 4; j++) { + byte = val & 0xFF; + if (byte < Smallest) + Smallest = byte; + if (byte > Largest) + Largest = byte; + val >>= 8; + } /* while ++j */ + index++; + } /*while ++i*/ + + if (pDCTstat->DimmECCPresent > 0) { + index++; + val = Get_NB32_index_wait(dev, index_reg, index); + val &= 0x00000060; + val >>= 5; + byte = val & 0xFF; + if (byte < Smallest) + Smallest = byte; + if (byte > Largest) + Largest = byte; + } + + word = Smallest; + word <<= 8; + word |= Largest; + + return word; +} + + + +static void mct_FinalMCT_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat) +{ + print_t("\tmct_FinalMCT_D: Clr Cl, Wb\n"); + + + mct_ClrClToNB_D(pMCTstat, pDCTstat); + mct_ClrWbEnhWsbDis_D(pMCTstat, pDCTstat); +} + + +static void mct_InitialMCT_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat) +{ + print_t("\tmct_InitialMCT_D: Set Cl, Wb\n"); + mct_SetClToNB_D(pMCTstat, pDCTstat); + mct_SetWbEnhWsbDis_D(pMCTstat, pDCTstat); +} + + +static u32 mct_NodePresent_D(void) +{ + u32 val; + val = 0x12001022; + return val; +} + + +static void mct_init(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat) +{ + u32 lo, hi; + u32 addr; + + pDCTstat->GangedMode = 0; + pDCTstat->DRPresent = 1; + + /* enable extend PCI configuration access */ + addr = 0xC001001F; + _RDMSR(addr, &lo, &hi); + if (hi & (1 << (46-32))) { + pDCTstat->Status |= 1 << SB_ExtConfig; + } else { + hi |= 1 << (46-32); + _WRMSR(addr, lo, hi); + } +} + + +static void clear_legacy_Mode(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat) +{ + u32 reg; + u32 val; + u32 dev = pDCTstat->dev_dct; + + /* Clear Legacy BIOS Mode bit */ + reg = 0x94; + val = Get_NB32(dev, reg); + val &= ~(1<<LegacyBiosMode); + Set_NB32(dev, reg, val); +} + + +static void mct_HTMemMapExt(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstatA) +{ + u8 Node; + u32 Drambase, Dramlimit; + u32 val; + u32 reg; + u32 dev; + u32 devx; + u32 dword; + struct DCTStatStruc *pDCTstat; + + pDCTstat = pDCTstatA + 0; + dev = pDCTstat->dev_map; + + /* Copy dram map from F1x40/44,F1x48/4c, + to F1x120/124(Node0),F1x120/124(Node1),...*/ + for (Node=0; Node < MAX_NODES_SUPPORTED; Node++) { + pDCTstat = pDCTstatA + Node; + devx = pDCTstat->dev_map; + + /* get base/limit from Node0 */ + reg = 0x40 + (Node << 3); /* Node0/Dram Base 0 */ + val = Get_NB32(dev, reg); + Drambase = val >> ( 16 + 3); + + reg = 0x44 + (Node << 3); /* Node0/Dram Base 0 */ + val = Get_NB32(dev, reg); + Dramlimit = val >> (16 + 3); + + /* set base/limit to F1x120/124 per Node */ + if (pDCTstat->NodePresent) { + reg = 0x120; /* F1x120,DramBase[47:27] */ + val = Get_NB32(devx, reg); + val &= 0xFFE00000; + val |= Drambase; + Set_NB32(devx, reg, val); + + reg = 0x124; + val = Get_NB32(devx, reg); + val &= 0xFFE00000; + val |= Dramlimit; + Set_NB32(devx, reg, val); + + if ( pMCTstat->GStatus & ( 1 << GSB_HWHole)) { + reg = 0xF0; + val = Get_NB32(devx, reg); + val |= (1 << DramMemHoistValid); + val &= ~(0xFF << 24); + dword = (pMCTstat->HoleBase >> (24 - 8)) & 0xFF; + dword <<= 24; + val |= dword; + Set_NB32(devx, reg, val); + } + + } + } +} + +static void SetCSTriState(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct) +{ + u32 val; + u32 dev = pDCTstat->dev_dct; + u32 index_reg = 0x98 + 0x100 * dct; + u8 cs; + u32 index; + u16 word; + + /* Tri-state unused chipselects when motherboard + termination is available */ + + // FIXME: skip for Ax + + word = pDCTstat->CSPresent; + if (pDCTstat->Status & (1 << SB_Registered)) { + for (cs = 0; cs < 8; cs++) { + if (word & (1 << cs)) { + if (!(cs & 1)) + word |= 1 << (cs + 1); + } + } + } + word = (~word) & 0xFF; + index = 0x0c; + val = Get_NB32_index_wait(dev, index_reg, index); + val |= word; + Set_NB32_index_wait(dev, index_reg, index, val); +} + + + +static void SetCKETriState(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct) +{ + u32 val; + u32 dev; + u32 index_reg = 0x98 + 0x100 * dct; + u8 cs; + u32 index; + u16 word; + + /* Tri-state unused CKEs when motherboard termination is available */ + + // FIXME: skip for Ax + + dev = pDCTstat->dev_dct; + word = 0x101; + for (cs = 0; cs < 8; cs++) { + if (pDCTstat->CSPresent & (1 << cs)) { + if (!(cs & 1)) + word &= 0xFF00; + else + word &= 0x00FF; + } + } + + index = 0x0c; + val = Get_NB32_index_wait(dev, index_reg, index); + if ((word & 0x00FF) == 1) + val |= 1 << 12; + else + val &= ~(1 << 12); + + if ((word >> 8) == 1) + val |= 1 << 13; + else + val &= ~(1 << 13); + + Set_NB32_index_wait(dev, index_reg, index, val); +} + + +static void SetODTTriState(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct) +{ + u32 val; + u32 dev; + u32 index_reg = 0x98 + 0x100 * dct; + u8 cs; + u32 index; + u16 word; + + /* Tri-state unused ODTs when motherboard termination is available */ + + // FIXME: skip for Ax + + dev = pDCTstat->dev_dct; + word = 0; + for (cs = 0; cs < 8; cs += 2) { + if (!(pDCTstat->CSPresent & (1 << cs))) { + if (!(pDCTstat->CSPresent & (1 << (cs + 1)))) + word |= (1 << (cs >> 1)); + } + } + + index = 0x0C; + val = Get_NB32_index_wait(dev, index_reg, index); + val |= (word << 8); + Set_NB32_index_wait(dev, index_reg, index, val); +} + + +static void InitPhyCompensation(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct) +{ + u8 i; + u32 index_reg = 0x98 + 0x100 * dct; + u32 dev = pDCTstat->dev_dct; + u32 val; + u32 valx = 0; + u32 dword; + const u8 *p; + + val = Get_NB32_index_wait(dev, index_reg, 0x00); + dword = 0; + for (i=0; i < 6; i++) { + switch (i) { + case 0: + case 4: + p = Table_Comp_Rise_Slew_15x; + valx = p[(val >> 16) & 3]; + break; + case 1: + case 5: + p = Table_Comp_Fall_Slew_15x; + valx = p[(val >> 16) & 3]; + break; + case 2: + p = Table_Comp_Rise_Slew_20x; + valx = p[(val >> 8) & 3]; + break; + case 3: + p = Table_Comp_Fall_Slew_20x; + valx = p[(val >> 8) & 3]; + break; + + } + dword |= valx << (5 * i); + } + + /* Override/Exception */ + if ((pDCTstat->Speed == 2) && (pDCTstat->MAdimms[dct] == 4)) + dword &= 0xF18FFF18; + + Set_NB32_index_wait(dev, index_reg, 0x0a, dword); +} + + +static void WaitRoutine_D(u32 time) +{ + while(time) { + _EXECFENCE; + time--; + } +} + + +static void mct_EarlyArbEn_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat) +{ + u32 reg; + u32 val; + u32 dev = pDCTstat->dev_dct; + + /* GhEnhancement #18429 modified by askar: For low NB CLK : + * Memclk ratio, the DCT may need to arbitrate early to avoid + * unnecessary bubbles. + * bit 19 of F2x[1,0]78 Dram Control Register, set this bit only when + * NB CLK : Memclk ratio is between 3:1 (inclusive) to 4:5 (inclusive) + */ + + reg = 0x78; + val = Get_NB32(dev, reg); + + //FIXME: check for Cx + if (CheckNBCOFEarlyArbEn(pMCTstat, pDCTstat)) + val |= (1 << EarlyArbEn); + + Set_NB32(dev, reg, val); + +} + + +static u8 CheckNBCOFEarlyArbEn(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat) +{ + u32 reg; + u32 val; + u32 tmp; + u32 rem; + u32 dev = pDCTstat->dev_dct; + u32 hi, lo; + u8 NbDid = 0; + + /* Check if NB COF >= 4*Memclk, if it is not, return a fatal error + */ + + /* 3*(Fn2xD4[NBFid]+4)/(2^NbDid)/(3+Fn2x94[MemClkFreq]) */ + _RDMSR(0xC0010071, &lo, &hi); + if (lo & (1 << 22)) + NbDid |= 1; + + + reg = 0x94; + val = Get_NB32(dev, reg); + if (!(val & (1 << MemClkFreqVal))) + val = Get_NB32(dev, reg * 0x100); /* get the DCT1 value */ + + val &= 0x07; + val += 3; + if (NbDid) + val <<= 1; + tmp = val; + + dev = pDCTstat->dev_nbmisc; + reg = 0xD4; + val = Get_NB32(dev, reg); + val &= 0x1F; + val += 3; + val *= 3; + val = val / tmp; + rem = val % tmp; + tmp >>= 1; + + // Yes this could be nicer but this was how the asm was.... + if (val < 3) { /* NClk:MemClk < 3:1 */ + return 0; + } else if (val > 4) { /* NClk:MemClk >= 5:1 */ + return 0; + } else if ((val == 4) && (rem > tmp)) { /* NClk:MemClk > 4.5:1 */ + return 0; + } else { + return 1; /* 3:1 <= NClk:MemClk <= 4.5:1*/ + } +} + + +static void mct_ResetDataStruct_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstatA) +{ + u8 Node; + u32 i; + struct DCTStatStruc *pDCTstat; + u16 start, stop; + u8 *p; + u16 host_serv1, host_serv2; + + /* Initialize Data structures by clearing all entries to 0 */ + p = (u8 *) pMCTstat; + for (i = 0; i < sizeof(struct MCTStatStruc); i++) { + p[i] = 0; + } + + for (Node = 0; Node < 8; Node++) { + pDCTstat = pDCTstatA + Node; + host_serv1 = pDCTstat->HostBiosSrvc1; + host_serv2 = pDCTstat->HostBiosSrvc2; + + p = (u8 *) pDCTstat; + start = 0; + stop = ((u16) &((struct DCTStatStruc *)0)->CH_MaxRdLat[2]); + for (i = start; i < stop ; i++) { + p[i] = 0; + } + + start = ((u16) &((struct DCTStatStruc *)0)->CH_D_BC_RCVRDLY[2][4]); + stop = sizeof(struct DCTStatStruc); + for (i = start; i < stop; i++) { + p[i] = 0; + } + pDCTstat->HostBiosSrvc1 = host_serv1; + pDCTstat->HostBiosSrvc2 = host_serv2; + } +} + + +static void mct_BeforeDramInit_Prod_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat) +{ + u8 i; + u32 reg_off; + u32 dev = pDCTstat->dev_dct; + + // FIXME: skip for Ax + if ((pDCTstat->Speed == 3) || ( pDCTstat->Speed == 2)) { // MemClkFreq = 667MHz or 533Mhz + for (i=0; i < 2; i++) { + reg_off = 0x100 * i; + Set_NB32(dev, 0x98 + reg_off, 0x0D000030); + Set_NB32(dev, 0x9C + reg_off, 0x00000806); + Set_NB32(dev, 0x98 + reg_off, 0x4D040F30); + } + } +} + + +void mct_AdjustDelayRange_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 *dqs_pos) +{ + // FIXME: Skip for Ax + if ((pDCTstat->Speed == 3) || ( pDCTstat->Speed == 2)) { // MemClkFreq = 667MHz or 533Mhz + *dqs_pos = 32; + } +} + + +void mct_SetClToNB_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat) +{ + u32 lo, hi; + u32 msr; + + // FIXME: Maybe check the CPUID? - not for now. + // pDCTstat->LogicalCPUID; + + msr = BU_CFG2; + _RDMSR(msr, &lo, &hi); + lo |= 1 << ClLinesToNbDis; + _WRMSR(msr, lo, hi); +} + + +void mct_ClrClToNB_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat) +{ + + u32 lo, hi; + u32 msr; + + // FIXME: Maybe check the CPUID? - not for now. + // pDCTstat->LogicalCPUID; + + msr = BU_CFG2; + _RDMSR(msr, &lo, &hi); + if (!pDCTstat->ClToNB_flag) + lo &= ~(1<<ClLinesToNbDis); + _WRMSR(msr, lo, hi); + +} + + +void mct_SetWbEnhWsbDis_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat) +{ + u32 lo, hi; + u32 msr; + + // FIXME: Maybe check the CPUID? - not for now. + // pDCTstat->LogicalCPUID; + + msr = BU_CFG; + _RDMSR(msr, &lo, &hi); + hi |= (1 << WbEnhWsbDis_D); + _WRMSR(msr, lo, hi); +} + + +void mct_ClrWbEnhWsbDis_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat) +{ + u32 lo, hi; + u32 msr; + + // FIXME: Maybe check the CPUID? - not for now. + // pDCTstat->LogicalCPUID; + + msr = BU_CFG; + _RDMSR(msr, &lo, &hi); + hi &= ~(1 << WbEnhWsbDis_D); + _WRMSR(msr, lo, hi); +} + + +void mct_SetDramConfigHi_D(struct DCTStatStruc *pDCTstat, u32 dct, + u32 DramConfigHi) +{ + /* Bug#15114: Comp. update interrupted by Freq. change can cause + * subsequent update to be invalid during any MemClk frequency change: + * Solution: From the bug report: + * 1. A software-initiated frequency change should be wrapped into the + * following sequence : + * - a) Disable Compensation (F2[1, 0]9C_x08[30] ) + * b) Reset the Begin Compensation bit (D3CMP->COMP_CONFIG[0]) in all the compensation engines + * c) Do frequency change + * d) Enable Compensation (F2[1, 0]9C_x08[30] ) + * 2. A software-initiated Disable Compensation should always be + * followed by step b) of the above steps. + * Silicon Status: Fixed In Rev B0 + * + * Errata#177: DRAM Phy Automatic Compensation Updates May Be Invalid + * Solution: BIOS should disable the phy automatic compensation prior + * to initiating a memory clock frequency change as follows: + * 1. Disable PhyAutoComp by writing 1'b1 to F2x[1, 0]9C_x08[30] + * 2. Reset the Begin Compensation bits by writing 32'h0 to + * F2x[1, 0]9C_x4D004F00 + * 3. Perform frequency change + * 4. Enable PhyAutoComp by writing 1'b0 to F2x[1, 0]9C_08[30] + * In addition, any time software disables the automatic phy + * compensation it should reset the begin compensation bit per step 2. + * Silicon Status: Fixed in DR-B0 + */ + + u32 dev = pDCTstat->dev_dct; + u32 index_reg = 0x98 + 0x100 * dct; + u32 index; + + u32 val; + + index = 0x08; + val = Get_NB32_index_wait(dev, index_reg, index); + Set_NB32_index_wait(dev, index_reg, index, val | (1 << DisAutoComp)); + + //FIXME: check for Bx Cx CPU + // if Ax mct_SetDramConfigHi_Samp_D + + /* errata#177 */ + index = 0x4D014F00; /* F2x[1, 0]9C_x[D0FFFFF:D000000] DRAM Phy Debug Registers */ + index |= 1 << DctAccessWrite; + val = 0; + Set_NB32_index_wait(dev, index_reg, index, val); + + Set_NB32(dev, 0x94 + 0x100 * dct, DramConfigHi); + + index = 0x08; + val = Get_NB32_index_wait(dev, index_reg, index); + Set_NB32_index_wait(dev, index_reg, index, val & (~(1 << DisAutoComp))); +} + +static void mct_BeforeDQSTrain_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstatA) +{ + u8 Node; + struct DCTStatStruc *pDCTstat; + + /* Errata 178 + * + * Bug#15115: Uncertainty In The Sync Chain Leads To Setup Violations + * In TX FIFO + * Solution: BIOS should program DRAM Control Register[RdPtrInit] = + * 5h, (F2x[1, 0]78[3:0] = 5h). + * Silicon Status: Fixed In Rev B0 + * + * Bug#15880: Determine validity of reset settings for DDR PHY timing. + * Solutiuon: At least, set WrDqs fine delay to be 0 for DDR2 training. + */ + + for (Node = 0; Node < 8; Node++) { + pDCTstat = pDCTstatA + Node; + + if (pDCTstat->NodePresent) + mct_BeforeDQSTrain_Samp_D(pMCTstat, pDCTstat); + mct_ResetDLL_D(pMCTstat, pDCTstat, 0); + mct_ResetDLL_D(pMCTstat, pDCTstat, 1); + + } +} + +static void mct_ResetDLL_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct) +{ + u8 Receiver; + u32 val; + u32 dev = pDCTstat->dev_dct; + u32 reg_off = 0x100 * dct; + u32 addr; + u8 valid = 0; + + pDCTstat->Channel = dct; + Receiver = mct_InitReceiver_D(pDCTstat, dct); + /* there are four receiver pairs, loosely associated with chipselects.*/ + for (; Receiver < 8; Receiver += 2) { + if (mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, dct, Receiver)) { + addr = mct_GetRcvrSysAddr_D(pMCTstat, pDCTstat, dct, Receiver, &valid); + if (valid) { + mct_Read1LTestPattern_D(pMCTstat, pDCTstat, addr); /* cache fills */ + Set_NB32(dev, 0x98 + reg_off, 0x0D00000C); + val = Get_NB32(dev, 0x9C + reg_off); + val |= 1 << 15; + Set_NB32(dev, 0x9C + reg_off, val); + Set_NB32(dev, 0x98 + reg_off, 0x4D0F0F0C); + mct_Wait_10ns(60); /* wait >= 300ns */ + + Set_NB32(dev, 0x98 + reg_off, 0x0D00000C); + val = Get_NB32(dev, 0x9C + reg_off); + val &= ~(1 << 15); + Set_NB32(dev, 0x9C + reg_off, val); + Set_NB32(dev, 0x98 + reg_off, 0x4D0F0F0C); + mct_Wait_10ns(400); /* wait >= 2us */ + break; + } + } + } +} + + +static void mct_EnableDatIntlv_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat) +{ + u32 dev = pDCTstat->dev_dct; + u32 val; + + /* Enable F2x110[DctDatIntlv] */ + // Call back not required mctHookBeforeDatIntlv_D() + // FIXME Skip for Ax + if (!pDCTstat->GangedMode) { + val = Get_NB32(dev, 0x110); + val |= 1 << 5; // DctDatIntlv + Set_NB32(dev, 0x110, val); + + // FIXME Skip for Cx + dev = pDCTstat->dev_nbmisc; + val = Get_NB32(dev, 0x8C); // NB Configuration Hi + val |= 36-32; // DisDatMask + Set_NB32(dev, 0x8C, val); + } +} + + +static void mct_SetupSync_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat) +{ + /* set F2x78[ChSetupSync] when F2x[1, 0]9C_x04[AddrCmdSetup, CsOdtSetup, + * CkeSetup] setups for one DCT are all 0s and at least one of the setups, + * F2x[1, 0]9C_x04[AddrCmdSetup, CsOdtSetup, CkeSetup], of the other + * controller is 1 + */ + u32 cha, chb; + u32 dev = pDCTstat->dev_dct; + u32 val; + + cha = pDCTstat->CH_ADDR_TMG[0] & 0x0202020; + chb = pDCTstat->CH_ADDR_TMG[1] & 0x0202020; + + if ((cha != chb) && ((cha == 0) || (chb == 0))) { + val = Get_NB32(dev, 0x78); + val |= ChSetupSync; + Set_NB32(dev, 0x78, val); + } +} + +static void AfterDramInit_D(struct DCTStatStruc *pDCTstat, u8 dct) { + + u32 val; + u32 reg_off = 0x100 * dct; + u32 dev = pDCTstat->dev_dct; + + if (pDCTstat->LogicalCPUID & AMD_DR_B2) { + mct_Wait_10ns(5000); /* Wait 50 us*/ + val = Get_NB32(dev, 0x110); + if ( val & (1 << DramEnabled)) { + /* If 50 us expires while DramEnable =0 then do the following */ + val = Get_NB32(dev, 0x90 + reg_off); + val &= ~(1 << Width128); /* Program Width128 = 0 */ + Set_NB32(dev, 0x90 + reg_off, val); + + val = Get_NB32_index_wait(dev, 0x98 + reg_off, 0x05); /* Perform dummy CSR read to F2x09C_x05 */ + + if (pDCTstat->GangedMode) { + val = Get_NB32(dev, 0x90 + reg_off); + val |= 1 << Width128; /* Program Width128 = 0 */ + Set_NB32(dev, 0x90 + reg_off, val); + } + } + } +} + + +/* ========================================================== + * 6-bit Bank Addressing Table + * RR=rows-13 binary + * B=Banks-2 binary + * CCC=Columns-9 binary + * ========================================================== + * DCT CCCBRR Rows Banks Columns 64-bit CS Size + * Encoding + * 0000 000000 13 2 9 128MB + * 0001 001000 13 2 10 256MB + * 0010 001001 14 2 10 512MB + * 0011 010000 13 2 11 512MB + * 0100 001100 13 3 10 512MB + * 0101 001101 14 3 10 1GB + * 0110 010001 14 2 11 1GB + * 0111 001110 15 3 10 2GB + * 1000 010101 14 3 11 2GB + * 1001 010110 15 3 11 4GB + * 1010 001111 16 3 10 4GB + * 1011 010111 16 3 11 8GB + */ diff --git a/src/northbridge/amd/amdmct/mct/mct_d.h b/src/northbridge/amd/amdmct/mct/mct_d.h new file mode 100644 index 0000000000..07de9ac564 --- /dev/null +++ b/src/northbridge/amd/amdmct/mct/mct_d.h @@ -0,0 +1,737 @@ +/* + * This file is part of the LinuxBIOS project. + * + * Copyright (C) 2007 Advanced Micro Devices, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* + * Description: Include file for all generic DDR 2 MCT files. + */ +#ifndef MCT_D_H +#define MCT_D_H + + + +/*=========================================================================== + CPU - K8/FAM10 +===========================================================================*/ +#define PT_L1 0 /* CPU Package Type */ +#define PT_M2 1 +#define PT_S1 2 +#define PT_GR 3 + +#define J_MIN 0 /* j loop constraint. 1=CL 2.0 T*/ +#define J_MAX 5 /* j loop constraint. 5=CL 7.0 T*/ +#define K_MIN 1 /* k loop constraint. 1=200 Mhz*/ +#define K_MAX 5 /* k loop constraint. 5=533 Mhz*/ +#define CL_DEF 2 /* Default value for failsafe operation. 2=CL 4.0 T*/ +#define T_DEF 1 /* Default value for failsafe operation. 1=5ns (cycle time)*/ + +#define BSCRate 1 /* reg bit field=rate of dram scrubber for ecc*/ + /* memory initialization (ecc and check-bits).*/ + /* 1=40 ns/64 bytes.*/ +#define FirstPass 1 /* First pass through RcvEn training*/ +#define SecondPass 2 /* Second pass through Rcven training*/ + +#define RCVREN_MARGIN 6 /* number of DLL taps to delay beyond first passing position*/ +#define MAXASYNCLATCTL_2 2 /* Max Async Latency Control value*/ +#define MAXASYNCLATCTL_3 3 /* Max Async Latency Control value*/ + +#define DQS_FAIL 1 +#define DQS_PASS 0 +#define DQS_WRITEDIR 1 +#define DQS_READDIR 0 +#define MIN_DQS_WNDW 3 +#define secPassOffset 6 +#define Pass1MemClkDly 0x20 /* Add 1/2 Memlock delay */ +#define MAX_RD_LAT 0x3FF +#define MIN_FENCE 14 +#define MAX_FENCE 20 +#define MIN_DQS_WR_FENCE 14 +#define MAX_DQS_WR_FENCE 20 +#define FenceTrnFinDlySeed 19 +#define EarlyArbEn 19 + +#define PA_HOST(Node) ((((0x18+Node) << 3)+0) << 12) /* Node 0 Host Bus function PCI Address bits [15:0]*/ +#define PA_MAP(Node) ((((0x18+Node) << 3)+1) << 12) /* Node 0 MAP function PCI Address bits [15:0]*/ +#define PA_DCT(Node) ((((0x18+Node) << 3)+2) << 12) /* Node 0 DCT function PCI Address bits [15:0]*/ +//#define PA_EXT_DCT (((00 << 3)+4) << 8) /*Node 0 DCT extended configuration registers*/ +//#define PA_DCTADDL (((00 << 3)+2) << 8) /*Node x DCT function, Additional Registers PCI Address bits [15:0]*/ +//#define PA_EXT_DCTADDL (((00 << 3)+5) << 8) /*Node x DCT function, Additional Registers PCI Address bits [15:0]*/ + +#define PA_NBMISC(Node) ((((0x18+Node) << 3)+3) << 12) /*Node 0 Misc PCI Address bits [15:0]*/ +//#define PA_NBDEVOP (((00 << 3)+3) << 8) /*Node 0 Misc PCI Address bits [15:0]*/ + +#define DCC_EN 1 /* X:2:0x94[19]*/ +#define ILD_Lmt 3 /* X:2:0x94[18:16]*/ + +#define EncodedTSPD 0x00191709 /* encodes which SPD byte to get T from*/ + /* versus CL X, CL X-.5, and CL X-1*/ + +#define Bias_TrpT 3 /* bias to convert bus clocks to bit field value*/ +#define Bias_TrrdT 2 +#define Bias_TrcdT 3 +#define Bias_TrasT 3 +#define Bias_TrcT 11 +#define Bias_TrtpT 2 +#define Bias_TwrT 3 +#define Bias_TwtrT 0 +#define Bias_TfawT 7 + +#define Min_TrpT 3 /* min programmable value in busclocks*/ +#define Max_TrpT 6 /* max programmable value in busclocks*/ +#define Min_TrrdT 2 +#define Max_TrrdT 5 +#define Min_TrcdT 3 +#define Max_TrcdT 6 +#define Min_TrasT 5 +#define Max_TrasT 18 +#define Min_TrcT 11 +#define Max_TrcT 26 +#define Min_TrtpT 2 +#define Max_TrtpT 3 +#define Min_TwrT 3 +#define Max_TwrT 6 +#define Min_TwtrT 1 +#define Max_TwtrT 3 + +/*DDR2-1066 support*/ +#define Bias_TrcdT_1066 5 +#define Bias_TrasT_1066 15 +#define Bias_TrrdT_1066 4 +#define Bias_TwrT_1066 4 +#define Bias_TrpT_1066 5 +#define Bias_TwtrT_1066 4 +#define Bias_TfawT_1066 15 + +#define Min_TrcdT_1066 5 +#define Max_TrcdT_1066 12 +#define Min_TrasT_1066 15 +#define Max_TrasT_1066 30 +#define Min_TrcT_1066 11 +#define Max_TrcT_1066 42 +#define Min_TrrdT_1066 4 +#define Max_TrrdT_1066 7 +#define Min_TwrT_1066 5 +#define Max_TwrT_1066 8 +#define Min_TrpT_1066 5 +#define Max_TrpT_1066 12 +#define Min_TwtrT_1066 4 +#define Max_TwtrT_1066 7 + +/*common register bit names*/ +#define DramHoleValid 0 /* func 1, offset F0h, bit 0*/ +#define DramMemHoistValid 1 /* func 1, offset F0h, bit 1*/ +#define CSEnable 0 /* func 2, offset 40h-5C, bit 0*/ +#define Spare 1 /* func 2, offset 40h-5C, bit 1*/ +#define TestFail 2 /* func 2, offset 40h-5C, bit 2*/ +#define DqsRcvEnTrain 18 /* func 2, offset 78h, bit 18*/ +#define EnDramInit 31 /* func 2, offset 7Ch, bit 31*/ +#define DisAutoRefresh 18 /* func 2, offset 8Ch, bit 18*/ +#define InitDram 0 /* func 2, offset 90h, bit 0*/ +#define BurstLength32 10 /* func 2, offset 90h, bit 10*/ +#define Width128 11 /* func 2, offset 90h, bit 11*/ +#define X4Dimm 12 /* func 2, offset 90h, bit 12*/ +#define UnBuffDimm 16 /* func 2, offset 90h, bit 16*/ +#define DimmEcEn 19 /* func 2, offset 90h, bit 19*/ +#define MemClkFreqVal 3 /* func 2, offset 94h, bit 3*/ +#define RDqsEn 12 /* func 2, offset 94h, bit 12*/ +#define DisDramInterface 14 /* func 2, offset 94h, bit 14*/ +#define DctAccessWrite 30 /* func 2, offset 98h, bit 30*/ +#define DctAccessDone 31 /* func 2, offset 98h, bit 31*/ +#define MemClrStatus 0 /* func 2, offset A0h, bit 0*/ +#define PwrSavingsEn 10 /* func 2, offset A0h, bit 10*/ +#define Mod64BitMux 4 /* func 2, offset A0h, bit 4*/ +#define DisableJitter 1 /* func 2, offset A0h, bit 1*/ +#define MemClrDis 1 /* func 3, offset F8h, FNC 4, bit 1*/ +#define SyncOnUcEccEn 2 /* func 3, offset 44h, bit 2*/ +#define Dr_MemClrStatus 10 /* func 3, offset 110h, bit 10*/ +#define MemClrBusy 9 /* func 3, offset 110h, bit 9*/ +#define DctGangEn 4 /* func 3, offset 110h, bit 4*/ +#define MemClrInit 3 /* func 3, offset 110h, bit 3*/ +#define AssertCke 28 /* func 2, offset 7Ch, bit 28*/ +#define DeassertMemRstX 27 /* func 2, offset 7Ch, bit 27*/ +#define SendMrsCmd 26 /* func 2, offset 7Ch, bit 26*/ +#define SendAutoRefresh 25 /* func 2, offset 7Ch, bit 25*/ +#define SendPchgAll 24 /* func 2, offset 7Ch, bit 24*/ +#define DisDqsBar 6 /* func 2, offset 90h, bit 6*/ +#define DramEnabled 8 /* func 2, offset 110h, bit 8*/ +#define LegacyBiosMode 9 /* func 2, offset 94h, bit 9*/ +#define PrefDramTrainMode 28 /* func 2, offset 11Ch, bit 28*/ +#define FlushWr 30 /* func 2, offset 11Ch, bit 30*/ +#define DisAutoComp 30 /* func 2, offset 9Ch, Index 8, bit 30*/ +#define DqsRcvTrEn 13 /* func 2, offset 9Ch, Index 8, bit 13*/ +#define ForceAutoPchg 23 /* func 2, offset 90h, bit 23*/ +#define ClLinesToNbDis 15 /* Bu_CFG2, bit 15*/ +#define WbEnhWsbDis_D (48-32) +#define PhyFenceTrEn 3 /* func 2, offset 9Ch, Index 8, bit 3 */ +#define ParEn 8 /* func 2, offset 90h, bit 8 */ +#define DcqArbBypassEn 19 /* func 2, offset 94h, bit 19 */ +#define ActiveCmdAtRst 1 /* func 2, offset A8H, bit 1 */ +#define FlushWrOnStpGnt 29 /* func 2, offset 11Ch, bit 29 */ +#define BankSwizzleMode 22 /* func 2, offset 94h, bit 22 */ +#define ChSetupSync 15 /* func 2, offset 78h, bit 15 */ + + + +/*============================================================================= + SW Initialization +============================================================================*/ +#define DLL_Enable 1 +#define OCD_Default 2 +#define OCD_Exit 3 + + + +/*============================================================================= + Jedec DDR II +=============================================================================*/ +#define SPD_TYPE 2 /*SPD byte read location*/ + #define JED_DDRSDRAM 0x07 /*Jedec defined bit field*/ + #define JED_DDR2SDRAM 0x08 /*Jedec defined bit field*/ + +#define SPD_DIMMTYPE 20 +#define SPD_ATTRIB 21 + #define JED_DIFCKMSK 0x20 /*Differential Clock Input*/ + #define JED_REGADCMSK 0x11 /*Registered Address/Control*/ + #define JED_PROBEMSK 0x40 /*Analysis Probe installed*/ +#define SPD_DEVATTRIB 22 +#define SPD_EDCTYPE 11 + #define JED_ECC 0x02 + #define JED_ADRCPAR 0x04 +#define SPD_ROWSZ 3 +#define SPD_COLSZ 4 +#define SPD_LBANKS 17 /*number of [logical] banks on each device*/ +#define SPD_DMBANKS 5 /*number of physical banks on dimm*/ + #define SPDPLBit 4 /* Dram package bit*/ +#define SPD_BANKSZ 31 /*capacity of physical bank*/ +#define SPD_DEVWIDTH 13 +#define SPD_CASLAT 18 +#define SPD_TRP 27 +#define SPD_TRRD 28 +#define SPD_TRCD 29 +#define SPD_TRAS 30 +#define SPD_TWR 36 +#define SPD_TWTR 37 +#define SPD_TRTP 38 +#define SPD_TRCRFC 40 +#define SPD_TRC 41 +#define SPD_TRFC 42 + +#define SPD_MANDATEYR 93 /*Module Manufacturing Year (BCD)*/ + +#define SPD_MANDATEWK 94 /*Module Manufacturing Week (BCD)*/ + +/*----------------------------- + Jdec DDR II related equates +-----------------------------*/ +#define MYEAR06 6 /* Manufacturing Year BCD encoding of 2006 - 06d*/ +#define MWEEK24 0x24 /* Manufacturing Week BCD encoding of June - 24d*/ + +/*============================================================================= + Macros +=============================================================================*/ + +#define _2GB_RJ8 (2<<(30-8)) +#define _4GB_RJ8 (4<<(30-8)) +#define _4GB_RJ4 (4<<(30-4)) + +#define BigPagex8_RJ8 (1<<(17+3-8)) /*128KB * 8 >> 8 */ + +/*============================================================================= + Global MCT Status Structure +=============================================================================*/ +struct MCTStatStruc { + u32 GStatus; /* Global Status bitfield*/ + u32 HoleBase; /* If not zero, BASE[39:8] (system address) + of sub 4GB dram hole for HW remapping.*/ + u32 Sub4GCacheTop; /* If not zero, the 32-bit top of cacheable memory.*/ + u32 SysLimit; /* LIMIT[39:8] (system address)*/ +}; + +/*============================================================================= + Global MCT Configuration Status Word (GStatus) +=============================================================================*/ +/*These should begin at bit 0 of GStatus[31:0]*/ +#define GSB_MTRRshort 0 /* Ran out of MTRRs while mapping memory*/ +#define GSB_ECCDIMMs 1 /* All banks of all Nodes are ECC capable*/ +#define GSB_DramECCDis 2 /* Dram ECC requested but not enabled.*/ +#define GSB_SoftHole 3 /* A Node Base gap was created*/ +#define GSB_HWHole 4 /* A HW dram remap was created*/ +#define GSB_NodeIntlv 5 /* Node Memory interleaving was enabled*/ +#define GSB_SpIntRemapHole 16 /* Special condition for Node Interleave and HW remapping*/ +#define GSB_EnDIMMSpareNW 17 /* Indicates that DIMM Spare can be used without a warm reset */ + /* NOTE: This is a local bit used by memory code */ + + +/*=============================================================================== + Local DCT Status structure (a structure for each DCT) +===============================================================================*/ + +struct DCTStatStruc { /* A per Node structure*/ +/* DCTStatStruct_F - start */ + u8 Node_ID; /* Node ID of current controller*/ + u8 ErrCode; /* Current error condition of Node + 0= no error + 1= Variance Error, DCT is running but not in an optimal configuration. + 2= Stop Error, DCT is NOT running + 3= Fatal Error, DCT/MCT initialization has been halted.*/ + u32 ErrStatus; /* Error Status bit Field */ + u32 Status; /* Status bit Field*/ + u8 DIMMAddr[8]; /* SPD address of DIMM controlled by MA0_CS_L[0,1]*/ + /* SPD address of..MB0_CS_L[0,1]*/ + /* SPD address of..MA1_CS_L[0,1]*/ + /* SPD address of..MB1_CS_L[0,1]*/ + /* SPD address of..MA2_CS_L[0,1]*/ + /* SPD address of..MB2_CS_L[0,1]*/ + /* SPD address of..MA3_CS_L[0,1]*/ + /* SPD address of..MB3_CS_L[0,1]*/ + u16 DIMMPresent; /*For each bit n 0..7, 1=DIMM n is present. + DIMM# Select Signal + 0 MA0_CS_L[0,1] + 1 MB0_CS_L[0,1] + 2 MA1_CS_L[0,1] + 3 MB1_CS_L[0,1] + 4 MA2_CS_L[0,1] + 5 MB2_CS_L[0,1] + 6 MA3_CS_L[0,1] + 7 MB3_CS_L[0,1]*/ + u16 DIMMValid; /* For each bit n 0..7, 1=DIMM n is valid and is/will be configured*/ + u16 DIMMMismatch; /* For each bit n 0..7, 1=DIMM n is mismatched, channel B is always considered the mismatch */ + u16 DIMMSPDCSE; /* For each bit n 0..7, 1=DIMM n SPD checksum error*/ + u16 DimmECCPresent; /* For each bit n 0..7, 1=DIMM n is ECC capable.*/ + u16 DimmPARPresent; /* For each bit n 0..7, 1=DIMM n is ADR/CMD Parity capable.*/ + u16 Dimmx4Present; /* For each bit n 0..7, 1=DIMM n contains x4 data devices.*/ + u16 Dimmx8Present; /* For each bit n 0..7, 1=DIMM n contains x8 data devices.*/ + u16 Dimmx16Present; /* For each bit n 0..7, 1=DIMM n contains x16 data devices.*/ + u16 DIMM2Kpage; /* For each bit n 0..7, 1=DIMM n contains 1K page devices.*/ + u8 MAload[2]; /* Number of devices loading MAA bus*/ + /* Number of devices loading MAB bus*/ + u8 MAdimms[2]; /*Number of DIMMs loading CH A*/ + /* Number of DIMMs loading CH B*/ + u8 DATAload[2]; /*Number of ranks loading CH A DATA*/ + /* Number of ranks loading CH B DATA*/ + u8 DIMMAutoSpeed; /*Max valid Mfg. Speed of DIMMs + 1=200Mhz + 2=266Mhz + 3=333Mhz + 4=400Mhz + 5=533Mhz*/ + u8 DIMMCASL; /* Min valid Mfg. CL bitfield + 0=2.0 + 1=3.0 + 2=4.0 + 3=5.0 + 4=6.0 */ + u16 DIMMTrcd; /* Minimax Trcd*40 (ns) of DIMMs*/ + u16 DIMMTrp; /* Minimax Trp*40 (ns) of DIMMs*/ + u16 DIMMTrtp; /* Minimax Trtp*40 (ns) of DIMMs*/ + u16 DIMMTras; /* Minimax Tras*40 (ns) of DIMMs*/ + u16 DIMMTrc; /* Minimax Trc*40 (ns) of DIMMs*/ + u16 DIMMTwr; /* Minimax Twr*40 (ns) of DIMMs*/ + u16 DIMMTrrd; /* Minimax Trrd*40 (ns) of DIMMs*/ + u16 DIMMTwtr; /* Minimax Twtr*40 (ns) of DIMMs*/ + u8 Speed; /* Bus Speed (to set Controller) + 1=200Mhz + 2=266Mhz + 3=333Mhz + 4=400Mhz */ + u8 CASL; /* CAS latency DCT setting + 0=2.0 + 1=3.0 + 2=4.0 + 3=5.0 + 4=6.0 */ + u8 Trcd; /* DCT Trcd (busclocks) */ + u8 Trp; /* DCT Trp (busclocks) */ + u8 Trtp; /* DCT Trtp (busclocks) */ + u8 Tras; /* DCT Tras (busclocks) */ + u8 Trc; /* DCT Trc (busclocks) */ + u8 Twr; /* DCT Twr (busclocks) */ + u8 Trrd; /* DCT Trrd (busclocks) */ + u8 Twtr; /* DCT Twtr (busclocks) */ + u8 Trfc[4]; /* DCT Logical DIMM0 Trfc + 0=75ns (for 256Mb devs) + 1=105ns (for 512Mb devs) + 2=127.5ns (for 1Gb devs) + 3=195ns (for 2Gb devs) + 4=327.5ns (for 4Gb devs) */ + /* DCT Logical DIMM1 Trfc (see Trfc0 for format) */ + /* DCT Logical DIMM2 Trfc (see Trfc0 for format) */ + /* DCT Logical DIMM3 Trfc (see Trfc0 for format) */ + u16 CSPresent; /* For each bit n 0..7, 1=Chip-select n is present */ + u16 CSTestFail; /* For each bit n 0..7, 1=Chip-select n is present but disabled */ + u32 DCTSysBase; /* BASE[39:8] (system address) of this Node's DCTs. */ + u32 DCTHoleBase; /* If not zero, BASE[39:8] (system address) of dram hole for HW remapping. Dram hole exists on this Node's DCTs. */ + u32 DCTSysLimit; /* LIMIT[39:8] (system address) of this Node's DCTs */ + u16 PresetmaxFreq; /* Maximum OEM defined DDR frequency + 200=200Mhz (DDR400) + 266=266Mhz (DDR533) + 333=333Mhz (DDR667) + 400=400Mhz (DDR800) */ + u8 _2Tmode; /* 1T or 2T CMD mode (slow access mode) + 1=1T + 2=2T */ + u8 TrwtTO; /* DCT TrwtTO (busclocks)*/ + u8 Twrrd; /* DCT Twrrd (busclocks)*/ + u8 Twrwr; /* DCT Twrwr (busclocks)*/ + u8 Trdrd; /* DCT Trdrd (busclocks)*/ + u32 CH_ODC_CTL[2]; /* Output Driver Strength (see BKDG FN2:Offset 9Ch, index 00h*/ + u32 CH_ADDR_TMG[2]; /* Address Bus Timing (see BKDG FN2:Offset 9Ch, index 04h*/ + /* Output Driver Strength (see BKDG FN2:Offset 9Ch, index 20h*/ + /* Address Bus Timing (see BKDG FN2:Offset 9Ch, index 24h*/ + u16 CH_EccDQSLike[2]; /* CHA DQS ECC byte like...*/ + u8 CH_EccDQSScale[2]; /* CHA DQS ECC byte scale*/ + /* CHA DQS ECC byte like...*/ + /* CHA DQS ECC byte scale*/ + u8 MaxAsyncLat; /* Max Asynchronous Latency (ns)*/ + // NOTE: Not used in Barcelona - u8 CH_D_RCVRDLY[2][4]; + /* CHA DIMM 0 - 4 Receiver Enable Delay*/ + /* CHB DIMM 0 - 4 Receiver Enable Delay */ + // NOTE: Not used in Barcelona - u8 CH_D_B_DQS[2][2][8]; + /* CHA Byte 0-7 Write DQS Delay */ + /* CHA Byte 0-7 Read DQS Delay */ + /* CHB Byte 0-7 Write DQS Delay */ + /* CHB Byte 0-7 Read DQS Delay */ + u32 PtrPatternBufA; /* Ptr on stack to aligned DQS testing pattern*/ + u32 PtrPatternBufB; /* Ptr on stack to aligned DQS testing pattern*/ + u8 Channel; /* Current Channel (0= CH A, 1=CH B)*/ + u8 ByteLane; /* Current Byte Lane (0..7)*/ + u8 Direction; /* Current DQS-DQ training write direction (0=read, 1=write)*/ + u8 Pattern; /* Current pattern*/ + u8 DQSDelay; /* Current DQS delay value*/ + u32 TrainErrors; /* Current Training Errors*/ + + u32 AMC_TSC_DeltaLo; /* Time Stamp Counter measurement of AMC, Low dword*/ + u32 AMC_TSC_DeltaHi; /* Time Stamp Counter measurement of AMC, High dword*/ + // NOTE: Not used in Barcelona - u8 CH_D_DIR_MaxMin_B_Dly[2][4][2][2][8]; + /* CH A byte lane 0 - 7 minimum filtered window passing DQS delay value*/ + /* CH A byte lane 0 - 7 maximum filtered window passing DQS delay value*/ + /* CH B byte lane 0 - 7 minimum filtered window passing DQS delay value*/ + /* CH B byte lane 0 - 7 maximum filtered window passing DQS delay value*/ + /* CH A byte lane 0 - 7 minimum filtered window passing DQS delay value*/ + /* CH A byte lane 0 - 7 maximum filtered window passing DQS delay value*/ + /* CH B byte lane 0 - 7 minimum filtered window passing DQS delay value*/ + /* CH B byte lane 0 - 7 maximum filtered window passing DQS delay value*/ + u32 LogicalCPUID; /* The logical CPUID of the node*/ + u16 HostBiosSrvc1; /* Word sized general purpose field for use by host BIOS. Scratch space.*/ + u32 HostBiosSrvc2; /* Dword sized general purpose field for use by host BIOS. Scratch space.*/ + u16 DimmQRPresent; /* QuadRank DIMM present?*/ + u16 DimmTrainFail; /* Bitmap showing which dimms failed training*/ + u16 CSTrainFail; /* Bitmap showing which chipselects failed training*/ + u16 DimmYr06; /* Bitmap indicating which Dimms have a manufactur's year code <= 2006*/ + u16 DimmWk2406; /* Bitmap indicating which Dimms have a manufactur's week code <= 24 of 2006 (June)*/ + u16 DimmDRPresent; /* Bitmap indicating that Dual Rank Dimms are present*/ + u16 DimmPlPresent; /* Bitmap indicating that Planar (1) or Stacked (0) Dimms are present.*/ + u16 ChannelTrainFai; /* Bitmap showing the chanel informaiton about failed Chip Selects + 0 in any bit field indicates Channel 0 + 1 in any bit field indicates Channel 1 */ + u16 CSUsrTestFail; /* Chip selects excluded by user */ +/* DCTStatStruct_F - end */ + + u16 CH_MaxRdLat[2]; /* Max Read Latency (ns) for DCT 0*/ + /* Max Read Latency (ns) for DCT 1*/ + u8 CH_D_DIR_B_DQS[2][4][2][9]; /* [A/B] [DIMM1-4] [R/W] [DQS] */ + /* CHA DIMM0 Byte 0 - 7 and Check Write DQS Delay*/ + /* CHA DIMM0 Byte 0 - 7 and Check Read DQS Delay*/ + /* CHA DIMM1 Byte 0 - 7 and Check Write DQS Delay*/ + /* CHA DIMM1 Byte 0 - 7 and Check Read DQS Delay*/ + /* CHB DIMM0 Byte 0 - 7 and Check Write DQS Delay*/ + /* CHB DIMM0 Byte 0 - 7 and Check Read DQS Delay*/ + /* CHB DIMM1 Byte 0 - 7 and Check Write DQS Delay*/ + /* CHB DIMM1 Byte 0 - 7 and Check Read DQS Delay*/ + u8 CH_D_B_RCVRDLY[2][4][8]; /* [A/B] [DIMM0-3] [DQS] */ + /* CHA DIMM 0 Receiver Enable Delay*/ + /* CHA DIMM 1 Receiver Enable Delay*/ + /* CHA DIMM 2 Receiver Enable Delay*/ + /* CHA DIMM 3 Receiver Enable Delay*/ + + /* CHB DIMM 0 Receiver Enable Delay*/ + /* CHB DIMM 1 Receiver Enable Delay*/ + /* CHB DIMM 2 Receiver Enable Delay*/ + /* CHB DIMM 3 Receiver Enable Delay*/ + u8 CH_D_BC_RCVRDLY[2][4]; + /* CHA DIMM 0 - 4 Check Byte Receiver Enable Delay*/ + /* CHB DIMM 0 - 4 Check Byte Receiver Enable Delay*/ + u8 DIMMValidDCT[2]; /* DIMM# in DCT0*/ + /* DIMM# in DCT1*/ + u8 MaxDCTs; /* Max number of DCTs in system*/ + // NOTE: removed u8 DCT. Use ->dev_ for pci R/W; /*DCT pointer*/ + u8 GangedMode; /* Ganged mode enabled, 0 = disabled, 1 = enabled*/ + u8 DRPresent; /* Family 10 present flag, 0 = n0t Fam10, 1 = Fam10*/ + u32 NodeSysLimit; /* BASE[39:8],for DCT0+DCT1 system address*/ + u8 WrDatGrossH; + u8 DqsRcvEnGrossL; + // NOTE: Not used - u8 NodeSpeed /* Bus Speed (to set Controller) + /* 1=200Mhz */ + /* 2=266Mhz */ + /* 3=333Mhz */ + // NOTE: Not used - u8 NodeCASL /* CAS latency DCT setting + /* 0=2.0 */ + /* 1=3.0 */ + /* 2=4.0 */ + /* 3=5.0 */ + /* 4=6.0 */ + u8 TrwtWB; + u8 CurrRcvrCHADelay; /* for keep current RcvrEnDly of chA*/ + u16 T1000; /* get the T1000 figure (cycle time (ns)*1K)*/ + u8 DqsRcvEn_Pass; /* for TrainRcvrEn byte lane pass flag*/ + u8 DqsRcvEn_Saved; /* for TrainRcvrEn byte lane saved flag*/ + u8 SeedPass1Remainder; /* for Phy assisted DQS receiver enable training*/ + + /* for second pass - Second pass should never run for Fam10*/ + // NOTE: Not used for Barcelona - u8 CH_D_B_RCVRDLY_1[2][4][8]; /* CHA DIMM 0 Receiver Enable Delay*/ + /* CHA DIMM 1 Receiver Enable Delay*/ + /* CHA DIMM 2 Receiver Enable Delay*/ + /* CHA DIMM 3 Receiver Enable Delay*/ + + /* CHB DIMM 0 Receiver Enable Delay*/ + /* CHB DIMM 1 Receiver Enable Delay*/ + /* CHB DIMM 2 Receiver Enable Delay*/ + /* CHB DIMM 3 Receiver Enable Delay*/ + + u8 ClToNB_flag; /* is used to restore ClLinesToNbDis bit after memory */ + u32 NodeSysBase; /* for channel interleave usage */ + +/* New for LB Support */ + u8 NodePresent; + u32 dev_host; + u32 dev_map; + u32 dev_dct; + u32 dev_nbmisc; +}; + +/*=============================================================================== + Local Error Status Codes (DCTStatStruc.ErrCode) +===============================================================================*/ +#define SC_RunningOK 0 +#define SC_VarianceErr 1 /* Running non-optimally*/ +#define SC_StopError 2 /* Not Running*/ +#define SC_FatalErr 3 /* Fatal Error, MCTB has exited immediately*/ + +/*=============================================================================== + Local Error Status (DCTStatStruc.ErrStatus[31:0]) +===============================================================================*/ +#define SB_NoDimms 0 +#define SB_DIMMChkSum 1 +#define SB_DimmMismatchM 2 /* dimm module type(buffer) mismatch*/ +#define SB_DimmMismatchT 3 /* dimm CL/T mismatch*/ +#define SB_DimmMismatchO 4 /* dimm organization mismatch (128-bit)*/ +#define SB_NoTrcTrfc 5 /* SPD missing Trc or Trfc info*/ +#define SB_NoCycTime 6 /* SPD missing byte 23 or 25*/ +#define SB_BkIntDis 7 /* Bank interleave requested but not enabled*/ +#define SB_DramECCDis 8 /* Dram ECC requested but not enabled*/ +#define SB_SpareDis 9 /* Online spare requested but not enabled*/ +#define SB_MinimumMode 10 /* Running in Minimum Mode*/ +#define SB_NORCVREN 11 /* No DQS Receiver Enable pass window found*/ +#define SB_CHA2BRCVREN 12 /* DQS Rcvr En pass window CHA to CH B too large*/ +#define SB_SmallRCVR 13 /* DQS Rcvr En pass window too small (far right of dynamic range)*/ +#define SB_NODQSPOS 14 /* No DQS-DQ passing positions*/ +#define SB_SMALLDQS 15 /* DQS-DQ passing window too small*/ +#define SB_DCBKScrubDis 16 /* DCache scrub requested but not enabled */ + +/*=============================================================================== + Local Configuration Status (DCTStatStruc.Status[31:0]) +===============================================================================*/ +#define SB_Registered 0 /* All DIMMs are Registered*/ +#define SB_ECCDIMMs 1 /* All banks ECC capable*/ +#define SB_PARDIMMs 2 /* All banks Addr/CMD Parity capable*/ +#define SB_DiagClks 3 /* Jedec ALL slots clock enable diag mode*/ +#define SB_128bitmode 4 /* DCT in 128-bit mode operation*/ +#define SB_64MuxedMode 5 /* DCT in 64-bit mux'ed mode.*/ +#define SB_2TMode 6 /* 2T CMD timing mode is enabled.*/ +#define SB_SWNodeHole 7 /* Remapping of Node Base on this Node to create a gap.*/ +#define SB_HWHole 8 /* Memory Hole created on this Node using HW remapping.*/ +#define SB_Over400MHz 9 /* DCT freq >= 400MHz flag*/ +#define SB_DQSPos_Pass2 10 /* Using for TrainDQSPos DIMM0/1, when freq>=400MHz*/ +#define SB_DQSRcvLimit 11 /* Using for DQSRcvEnTrain to know we have reached to upper bound.*/ +#define SB_ExtConfig 12 /* Indicator the default setting for extend PCI configuration support*/ + + + + +/*=============================================================================== + NVRAM/run-time-configurable Items +===============================================================================*/ +/*Platform Configuration*/ +#define NV_PACK_TYPE 0 /* CPU Package Type (2-bits) + 0=NPT L1 + 1=NPT M2 + 2=NPT S1*/ +#define NV_MAX_NODES 1 /* Number of Nodes/Sockets (4-bits)*/ +#define NV_MAX_DIMMS 2 /* Number of DIMM slots for the specified Node ID (4-bits)*/ +#define NV_MAX_MEMCLK 3 /* Maximum platform demonstrated Memclock (10-bits) + 200=200Mhz (DDR400) + 266=266Mhz (DDR533) + 333=333Mhz (DDR667) + 400=400Mhz (DDR800)*/ +#define NV_ECC_CAP 4 /* Bus ECC capable (1-bits) + 0=Platform not capable + 1=Platform is capable*/ +#define NV_4RANKType 5 /* Quad Rank DIMM slot type (2-bits) + 0=Normal + 1=R4 (4-Rank Registered DIMMs in AMD server configuration) + 2=S4 (Unbuffered SO-DIMMs)*/ +#define NV_BYPMAX 6 /* Value to set DcqBypassMax field (See Function 2, Offset 94h, [27:24] of BKDG for field definition). + 4=4 times bypass (normal for non-UMA systems) + 7=7 times bypass (normal for UMA systems)*/ +#define NV_RDWRQBYP 7 /* Value to set RdWrQByp field (See Function 2, Offset A0h, [3:2] of BKDG for field definition). + 2=8 times (normal for non-UMA systems) + 3=16 times (normal for UMA systems)*/ + + +/*Dram Timing*/ +#define NV_MCTUSRTMGMODE 10 /* User Memclock Mode (2-bits) + 0=Auto, no user limit + 1=Auto, user limit provided in NV_MemCkVal + 2=Manual, user value provided in NV_MemCkVal*/ +#define NV_MemCkVal 11 /* Memory Clock Value (2-bits) + 0=200Mhz + 1=266Mhz + 2=333Mhz + 3=400Mhz*/ + +/*Dram Configuration*/ +#define NV_BankIntlv 20 /* Dram Bank (chip-select) Interleaving (1-bits) + 0=disable + 1=enable*/ +#define NV_AllMemClks 21 /* Turn on All DIMM clocks (1-bits) + 0=normal + 1=enable all memclocks*/ +#define NV_SPDCHK_RESTRT 22 /* SPD Check control bitmap (1-bits) + 0=Exit current node init if any DIMM has SPD checksum error + 1=Ignore faulty SPD checksums (Note: DIMM cannot be enabled)*/ +#define NV_DQSTrainCTL 23 /* DQS Signal Timing Training Control + 0=skip DQS training + 1=perform DQS training*/ +#define NV_NodeIntlv 24 /* Node Memory Interleaving (1-bits) + 0=disable + 1=enable*/ +#define NV_BurstLen32 25 /* BurstLength32 for 64-bit mode (1-bits) + 0=disable (normal) + 1=enable (4 beat burst when width is 64-bits)*/ + +/*Dram Power*/ +#define NV_CKE_PDEN 30 /* CKE based power down mode (1-bits) + 0=disable + 1=enable*/ +#define NV_CKE_CTL 31 /* CKE based power down control (1-bits) + 0=per Channel control + 1=per Chip select control*/ +#define NV_CLKHZAltVidC3 32 /* Memclock tri-stating during C3 and Alt VID (1-bits) + 0=disable + 1=enable*/ + +/*Memory Map/Mgt.*/ +#define NV_BottomIO 40 /* Bottom of 32-bit IO space (8-bits) + NV_BottomIO[7:0]=Addr[31:24]*/ +#define NV_BottomUMA 41 /* Bottom of shared graphics dram (8-bits) + NV_BottomUMA[7:0]=Addr[31:24]*/ +#define NV_MemHole 42 /* Memory Hole Remapping (1-bits) + 0=disable + 1=enable */ + +/*ECC*/ +#define NV_ECC 50 /* Dram ECC enable*/ +#define NV_NBECC 52 /* ECC MCE enable*/ +#define NV_ChipKill 53 /* Chip-Kill ECC Mode enable*/ +#define NV_ECCRedir 54 /* Dram ECC Redirection enable*/ +#define NV_DramBKScrub 55 /* Dram ECC Background Scrubber CTL*/ +#define NV_L2BKScrub 56 /* L2 ECC Background Scrubber CTL*/ +#define NV_DCBKScrub 57 /* DCache ECC Background Scrubber CTL*/ +#define NV_CS_SpareCTL 58 /* Chip Select Spare Control bit 0: + 0=disable Spare + 1=enable Spare */ + /* Chip Select Spare Control bit 1-4: + Reserved, must be zero*/ +#define NV_SyncOnUnEccEn 61 /* SyncOnUnEccEn control + 0=disable + 1=enable*/ +#define NV_Unganged 62 + +#define NV_ChannelIntlv 63 /* Channel Interleaving (3-bits) + xx0b = disable + yy1b = enable with DctSelIntLvAddr set to yyb */ + + +#ifndef MAX_NODES_SUPPORTED +#define MAX_NODES_SUPPORTED 8 +#endif + +#ifndef MAX_DIMMS_SUPPORTED +#define MAX_DIMMS_SUPPORTED 8 +#endif + +#ifndef MAX_CS_SUPPORTED +#define MAX_CS_SUPPORTED 8 +#endif + +#ifndef MCT_DIMM_SPARE_NO_WARM +#define MCT_DIMM_SPARE_NO_WARM 0 +#endif + + +u32 Get_NB32(u32 dev, u32 reg); +void Set_NB32(u32 dev, u32 reg, u32 val); +u32 Get_NB32_index(u32 dev, u32 index_reg, u32 index); +void Set_NB32_index(u32 dev, u32 index_reg, u32 index, u32 data); +u32 Get_NB32_index_wait(u32 dev, u32 index_reg, u32 index); +void Set_NB32_index_wait(u32 dev, u32 index_reg, u32 index, u32 data); +u32 OtherTiming_A_D(struct DCTStatStruc *pDCTstat, u32 val); +void mct_ForceAutoPrecharge_D(struct DCTStatStruc *pDCTstat, u32 dct); +u32 Modify_D3CMP(struct DCTStatStruc *pDCTstat, u32 dct, u32 value); +u8 mct_checkNumberOfDqsRcvEn_1Pass(u8 pass); +u32 SetupDqsPattern_1PassA(u8 Pass); +u32 SetupDqsPattern_1PassB(u8 Pass); +u8 mct_Get_Start_RcvrEnDly_1Pass(u8 Pass); +u8 mct_Average_RcvrEnDly_Pass(struct DCTStatStruc *pDCTstat, u8 RcvrEnDly, u8 RcvrEnDlyLimit, u8 Channel, u8 Receiver, u8 Pass); +void CPUMemTyping_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA); +u32 mctGetLogicalCPUID(u32 Node); +u8 ECCInit_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA); +void TrainReceiverEn_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA, u8 Pass); +void mct_TrainDQSPos_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA); +void mctSetEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA); +void TrainMaxReadLatency_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA); +void mct_EndDQSTraining_D(struct MCTStatStruc *pMCTstat,struct DCTStatStruc *pDCTstatA); +void mct_SetRcvrEnDly_D(struct DCTStatStruc *pDCTstat, u8 RcvrEnDly, u8 FinalValue, u8 Channel, u8 Receiver, u32 dev, u32 index_reg, u8 Addl_Index, u8 Pass); +void SetEccDQSRcvrEn_D(struct DCTStatStruc *pDCTstat, u8 Channel); +void mctGet_PS_Cfg_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u32 dct); +void InterleaveBanks_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 dct); +void mct_SetDramConfigHi_D(struct DCTStatStruc *pDCTstat, u32 dct, u32 DramConfigHi); +void mct_DramInit_Hw_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 dct); +void SyncSetting(struct DCTStatStruc *pDCTstat); +void mct_SetClToNB_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat); +void mct_SetWbEnhWsbDis_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat); +void mct_TrainRcvrEn_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 Pass); +void mct_EnableDimmEccEn_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 _DisableDramECC); +u32 procOdtWorkaround(struct DCTStatStruc *pDCTstat, u32 dct, u32 val); +void mct_BeforeDramInit_D(struct DCTStatStruc *pDCTstat, u32 dct); +void mctGet_DIMMAddr(struct DCTStatStruc *pDCTstat, u32 node); +void mctSMBhub_Init(u32 node); +int mctRead_SPD(u32 smaddr, u32 reg); +void InterleaveNodes_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA); +void InterleaveChannels_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA); +void mct_BeforeDQSTrain_Samp_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat); +static void StoreDQSDatStrucVal_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 ChipSel); +void phyAssistedMemFnceTraining(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA); +u8 mct_SaveRcvEnDly_D_1Pass(struct DCTStatStruc *pDCTstat, u8 pass); +static void mct_AdjustScrub_D(struct DCTStatStruc *pDCTstat, u16 *scrub_request); +static u8 mct_InitReceiver_D(struct DCTStatStruc *pDCTstat, u8 dct); +static void mct_Wait_10ns (u32 cycles); +u8 mct_RcvrRankEnabled_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 Channel, u8 ChipSel); +u32 mct_GetRcvrSysAddr_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 channel, u8 receiver, u8 *valid); +void mct_Read1LTestPattern_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u32 addr); +#endif diff --git a/src/northbridge/amd/amdmct/mct/mct_d_gcc.h b/src/northbridge/amd/amdmct/mct/mct_d_gcc.h new file mode 100644 index 0000000000..3f0f2c1351 --- /dev/null +++ b/src/northbridge/amd/amdmct/mct/mct_d_gcc.h @@ -0,0 +1,388 @@ +/* + * This file is part of the LinuxBIOS project. + * + * Copyright (C) 2007 Advanced Micro Devices, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + + +static inline void _WRMSR(u32 addr, u32 lo, u32 hi) +{ + __asm__ volatile ( + "wrmsr" + : + :"c"(addr),"a"(lo), "d" (hi) + ); +} + + +static inline void _RDMSR(u32 addr, u32 *lo, u32 *hi) +{ + __asm__ volatile ( + "rdmsr" + :"=a"(*lo), "=d" (*hi) + :"c"(addr) + ); +} + + +static inline void _RDTSC(u32 *lo, u32 *hi) +{ + __asm__ volatile ( + "rdtsc" + : "=a" (*lo), "=d"(*hi) + ); +} + + +static inline void _cpu_id(u32 addr, u32 *val) +{ + __asm__ volatile( + "cpuid" + : "=a" (val[0]), + "=b" (val[1]), + "=c" (val[2]), + "=d" (val[3]) + : "0" (addr)); + +} + + +static inline u32 bsr(u32 x) +{ + u8 i; + u32 ret = 0; + + for(i=31; i>0; i--) { + if(x & (1<<i)) { + ret = i; + break; + } + } + + return ret; + +} + + +static inline u32 bsf(u32 x) +{ + u8 i; + u32 ret = 32; + + for(i=0; i<32; i++) { + if(x & (1<<i)) { + ret = i; + break; + } + } + + return ret; +} + +#define _MFENCE asm volatile ( "mfence") + +#define _SFENCE asm volatile ( "sfence" ) + +/* prevent speculative execution of following instructions */ +#define _EXECFENCE asm volatile ("outb %al, $0xed") + +static inline u32 read_cr4(void) +{ + u32 cr4; + __asm__ volatile ("movl %%cr4, %0" : "=r" (cr4)); + return cr4; +} + + +static inline void write_cr4(u32 cr4) +{ + __asm__ volatile ("movl %0, %%cr4" : : "r" (cr4)); +} + + +u32 SetUpperFSbase(u32 addr_hi); + + +static void proc_CLFLUSH(u32 addr_hi) +{ + SetUpperFSbase(addr_hi); + + __asm__ volatile ( + /* clflush fs:[eax] */ + "outb %%al, $0xed\n\t" /* _EXECFENCE */ + "clflush %%fs:(%0)\n\t" + "mfence\n\t" + ::"a" (addr_hi<<8) + ); +} + + +static void WriteLNTestPattern(u32 addr_lo, u8 *buf_a, u32 line_num) +{ + __asm__ volatile ( + /*prevent speculative execution of following instructions*/ + /* FIXME: needed ? */ + "outb %%al, $0xed\n\t" /* _EXECFENCE */ + "1:\n\t" + "movdqa (%3), %%xmm0\n\t" + "movntdq %%xmm0, %%fs:(%0)\n\t" /* xmm0 is 128 bit */ + "addl %1, %0\n\t" + "addl %1, %3\n\t" + "loop 1b\n\t" + "mfence\n\t" + + :: "a" (addr_lo), "d" (16), "c" (line_num * 4), "b"(buf_a) + ); + +} + + +u32 read32_fs(u32 addr_lo) +{ + u32 value; + __asm__ volatile ( + "outb %%al, $0xed\n\t" /* _EXECFENCE */ + "movl %%fs:(%1), %0\n\t" + :"=b"(value): "a" (addr_lo) + ); + return value; +} + + +u8 read8_fs(u32 addr_lo) +{ + u8 byte; + __asm__ volatile ( + "outb %%al, $0xed\n\t" /* _EXECFENCE */ + "movb %%fs:(%1), %b0\n\t" + "mfence\n\t" + :"=b"(byte): "a" (addr_lo) + ); + return byte; +} + + +static void FlushDQSTestPattern_L9(u32 addr_lo) +{ + __asm__ volatile ( + "outb %%al, $0xed\n\t" /* _EXECFENCE */ + "clflush %%fs:-128(%%ecx)\n\t" + "clflush %%fs:-64(%%ecx)\n\t" + "clflush %%fs:(%%ecx)\n\t" + "clflush %%fs:64(%%ecx)\n\t" + + "clflush %%fs:-128(%%eax)\n\t" + "clflush %%fs:-64(%%eax)\n\t" + "clflush %%fs:(%%eax)\n\t" + "clflush %%fs:64(%%eax)\n\t" + + "clflush %%fs:-128(%%ebx)\n\t" + + :: "b" (addr_lo+128+8*64), "c"(addr_lo+128), + "a"(addr_lo+128+4*64) + ); + +} + + +static __attribute__((noinline)) void FlushDQSTestPattern_L18(u32 addr_lo) +{ + __asm__ volatile ( + "outb %%al, $0xed\n\t" /* _EXECFENCE */ + "clflush %%fs:-128(%%eax)\n\t" + "clflush %%fs:-64(%%eax)\n\t" + "clflush %%fs:(%%eax)\n\t" + "clflush %%fs:64(%%eax)\n\t" + + "clflush %%fs:-128(%%edi)\n\t" + "clflush %%fs:-64(%%edi)\n\t" + "clflush %%fs:(%%edi)\n\t" + "clflush %%fs:64(%%edi)\n\t" + + "clflush %%fs:-128(%%ebx)\n\t" + "clflush %%fs:-64(%%ebx)\n\t" + "clflush %%fs:(%%ebx)\n\t" + "clflush %%fs:64(%%ebx)\n\t" + + "clflush %%fs:-128(%%ecx)\n\t" + "clflush %%fs:-64(%%ecx)\n\t" + "clflush %%fs:(%%ecx)\n\t" + "clflush %%fs:64(%%ecx)\n\t" + + "clflush %%fs:-128(%%edx)\n\t" + "clflush %%fs:-64(%%edx)\n\t" + + :: "b" (addr_lo+128+8*64), "c" (addr_lo+128+12*64), + "d" (addr_lo +128+16*64), "a"(addr_lo+128), + "D"(addr_lo+128+4*64) + ); +} + + +static void ReadL18TestPattern(u32 addr_lo) +{ + // set fs and use fs prefix to access the mem + __asm__ volatile ( + "outb %%al, $0xed\n\t" /* _EXECFENCE */ + "movl %%fs:-128(%%esi), %%eax\n\t" //TestAddr cache line + "movl %%fs:-64(%%esi), %%eax\n\t" //+1 + "movl %%fs:(%%esi), %%eax\n\t" //+2 + "movl %%fs:64(%%esi), %%eax\n\t" //+3 + + "movl %%fs:-128(%%edi), %%eax\n\t" //+4 + "movl %%fs:-64(%%edi), %%eax\n\t" //+5 + "movl %%fs:(%%edi), %%eax\n\t" //+6 + "movl %%fs:64(%%edi), %%eax\n\t" //+7 + + "movl %%fs:-128(%%ebx), %%eax\n\t" //+8 + "movl %%fs:-64(%%ebx), %%eax\n\t" //+9 + "movl %%fs:(%%ebx), %%eax\n\t" //+10 + "movl %%fs:64(%%ebx), %%eax\n\t" //+11 + + "movl %%fs:-128(%%ecx), %%eax\n\t" //+12 + "movl %%fs:-64(%%ecx), %%eax\n\t" //+13 + "movl %%fs:(%%ecx), %%eax\n\t" //+14 + "movl %%fs:64(%%ecx), %%eax\n\t" //+15 + + "movl %%fs:-128(%%edx), %%eax\n\t" //+16 + "movl %%fs:-64(%%edx), %%eax\n\t" //+17 + "mfence\n\t" + + :: "a"(0), "b" (addr_lo+128+8*64), "c" (addr_lo+128+12*64), + "d" (addr_lo +128+16*64), "S"(addr_lo+128), + "D"(addr_lo+128+4*64) + ); + +} + + +static void ReadL9TestPattern(u32 addr_lo) +{ + + // set fs and use fs prefix to access the mem + __asm__ volatile ( + "outb %%al, $0xed\n\t" /* _EXECFENCE */ + + "movl %%fs:-128(%%ecx), %%eax\n\t" //TestAddr cache line + "movl %%fs:-64(%%ecx), %%eax\n\t" //+1 + "movl %%fs:(%%ecx), %%eax\n\t" //+2 + "movl %%fs:64(%%ecx), %%eax\n\t" //+3 + + "movl %%fs:-128(%%edx), %%eax\n\t" //+4 + "movl %%fs:-64(%%edx), %%eax\n\t" //+5 + "movl %%fs:(%%edx), %%eax\n\t" //+6 + "movl %%fs:64(%%edx), %%eax\n\t" //+7 + + "movl %%fs:-128(%%ebx), %%eax\n\t" //+8 + "mfence\n\t" + + :: "a"(0), "b" (addr_lo+128+8*64), "c"(addr_lo+128), + "d"(addr_lo+128+4*64) + ); + +} + + +static void ReadMaxRdLat1CLTestPattern_D(u32 addr) +{ + SetUpperFSbase(addr); + + __asm__ volatile ( + "outb %%al, $0xed\n\t" /* _EXECFENCE */ + "movl %%fs:-128(%%esi), %%eax\n\t" //TestAddr cache line + "movl %%fs:-64(%%esi), %%eax\n\t" //+1 + "movl %%fs:(%%esi), %%eax\n\t" //+2 + "mfence\n\t" + :: "a"(0), "S"((addr<<8)+128) + ); + +} + + +void WriteMaxRdLat1CLTestPattern_D(u32 buf, u32 addr) +{ + SetUpperFSbase(addr); + + __asm__ volatile ( + "outb %%al, $0xed\n\t" /* _EXECFENCE */ + "1:\n\t" + "movdqa (%3), %%xmm0\n\t" + "movntdq %%xmm0, %%fs:(%0)\n\t" /* xmm0 is 128 bit */ + "addl %1, %0\n\t" + "addl %1, %3\n\t" + "loop 1b\n\t" + "mfence\n\t" + + :: "a" (addr<<8), "d" (16), "c" (3 * 4), "b"(buf) + ); +} + + +static void FlushMaxRdLatTestPattern_D(u32 addr) +{ + /* Flush a pattern of 72 bit times (per DQ) from cache. + * This procedure is used to ensure cache miss on the next read training. + */ + + SetUpperFSbase(addr); + + __asm__ volatile ( + "outb %%al, $0xed\n\t" /* _EXECFENCE */ + "clflush %%fs:-128(%%esi)\n\t" //TestAddr cache line + "clflush %%fs:-64(%%esi)\n\t" //+1 + "clflush %%fs:(%%esi)\n\t" //+2 + "mfence\n\t" + + :: "S"((addr<<8)+128) + ); +} + + +u32 stream_to_int(u8 *p) +{ + int i; + u32 val; + u32 valx; + + val = 0; + + for(i=3; i>=0; i--) { + val <<= 8; + valx = *(p+i); + val |= valx; + } + + return val; +} + + +void oemSet_NB32(u32 addr, u32 val, u8 *valid) +{ +} + + +u32 oemGet_NB32(u32 addr, u8 *valid) +{ + *valid = 0; + return 0xffffffff; +} + + +u8 oemNodePresent_D(u8 Node, u8 *ret) +{ + *ret = 0; + return 0; +} diff --git a/src/northbridge/amd/amdmct/mct/mct_fd.c b/src/northbridge/amd/amdmct/mct/mct_fd.c new file mode 100644 index 0000000000..b13467918e --- /dev/null +++ b/src/northbridge/amd/amdmct/mct/mct_fd.c @@ -0,0 +1,25 @@ +/* + * This file is part of the LinuxBIOS project. + * + * Copyright (C) 2007 Advanced Micro Devices, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + + + +u8 amd_FD_support(void) +{ + return 1; +} diff --git a/src/northbridge/amd/amdmct/mct/mctardk3.c b/src/northbridge/amd/amdmct/mct/mctardk3.c new file mode 100644 index 0000000000..7300a99aaf --- /dev/null +++ b/src/northbridge/amd/amdmct/mct/mctardk3.c @@ -0,0 +1,206 @@ +/* + * This file is part of the LinuxBIOS project. + * + * Copyright (C) 2007 Advanced Micro Devices, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 021100xFF301 USA + */ + + +static void Get_ChannelPS_Cfg0_D( u8 MAAdimms, u8 Speed, u8 MAAload, + u8 DATAAload, u32 *AddrTmgCTL, u32 *ODC_CTL); + + +void mctGet_PS_Cfg_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u32 dct) +{ + u16 val, valx; + + print_tx("dct: ", dct); + print_tx("Speed: ", pDCTstat->Speed); + + Get_ChannelPS_Cfg0_D(pDCTstat->MAdimms[dct], pDCTstat->Speed, + pDCTstat->MAload[dct], pDCTstat->DATAload[dct], + &(pDCTstat->CH_ADDR_TMG[dct]), &(pDCTstat->CH_ODC_CTL[dct])); + + + if(pDCTstat->MAdimms[dct] == 1) + pDCTstat->CH_ODC_CTL[dct] |= 0x20000000; /* 75ohms */ + else + pDCTstat->CH_ODC_CTL[dct] |= 0x10000000; /* 150ohms */ + + pDCTstat->_2Tmode = 1; + + /* use byte lane 4 delay for ECC lane */ + pDCTstat->CH_EccDQSLike[0] = 0x0504; + pDCTstat->CH_EccDQSScale[0] = 0; /* 100% byte lane 4 */ + pDCTstat->CH_EccDQSLike[1] = 0x0504; + pDCTstat->CH_EccDQSScale[1] = 0; /* 100% byte lane 4 */ + + + /* + Overrides and/or exceptions + */ + + /* 1) QRx4 needs to adjust CS/ODT setup time */ + // FIXME: Add Ax support? + if (mctGet_NVbits(NV_MAX_DIMMS) == 4) { + if (pDCTstat->DimmQRPresent != 0) { + pDCTstat->CH_ADDR_TMG[dct] &= 0xFF00FFFF; + pDCTstat->CH_ADDR_TMG[dct] |= 0x00000000; + if (pDCTstat->MAdimms[dct] == 4) { + pDCTstat->CH_ADDR_TMG[dct] &= 0xFF00FFFF; + pDCTstat->CH_ADDR_TMG[dct] |= 0x002F0000; + if (pDCTstat->Speed == 3 || pDCTstat->Speed == 4) { + pDCTstat->CH_ADDR_TMG[dct] &= 0xFF00FFFF; + pDCTstat->CH_ADDR_TMG[dct] |= 0x00002F00; + if (pDCTstat->MAdimms[dct] == 4) + pDCTstat->CH_ODC_CTL[dct] = 0x00331222; + } + } + } + } + + + /* 2) DRx4 (R/C-J) @ DDR667 needs to adjust CS/ODT setup time */ + if (pDCTstat->Speed == 3 || pDCTstat->Speed == 4) { + val = pDCTstat->Dimmx4Present; + if (dct == 0) { + val &= 0x55; + } else { + val &= 0xAA; + val >>= 1; + } + val &= pDCTstat->DIMMValid; + if (val) { + //FIXME: skip for Ax + valx = pDCTstat->DimmDRPresent; + if (dct == 0) { + valx &= 0x55; + } else { + valx &= 0xAA; + valx >>= 1; + } + if (mctGet_NVbits(NV_MAX_DIMMS) == 8) { + val &= valx; + if (val != 0) { + pDCTstat->CH_ADDR_TMG[dct] &= 0xFFFF00FF; + pDCTstat->CH_ADDR_TMG[dct] |= 0x00002F00; + } + } else { + val &= valx; + if (val != 0) { + if (pDCTstat->Speed == 3 || pDCTstat->Speed == 3) { + pDCTstat->CH_ADDR_TMG[dct] &= 0xFFFF00FF; + pDCTstat->CH_ADDR_TMG[dct] |= 0x00002F00; + } + } + + } + } + } + + + pDCTstat->CH_ODC_CTL[dct] = procOdtWorkaround(pDCTstat, dct, pDCTstat->CH_ODC_CTL[dct]); + + print_tx("CH_ODC_CTL: ", pDCTstat->CH_ODC_CTL[dct]); + print_tx("CH_ADDR_TMG: ", pDCTstat->CH_ADDR_TMG[dct]); + + +} + + +/*=============================================================================== + * Vendor is responsible for correct settings. + * M2/Unbuffered 4 Slot - AMD Design Guideline. + *=============================================================================== + * #1, BYTE, Speed (DCTStatstruc.Speed) (Secondary Key) + * #2, BYTE, number of Address bus loads on the Channel. (Tershery Key) + * These must be listed in ascending order. + * FFh (0xFE) has special meanying of 'any', and must be listed first for each speed grade. + * #3, DWORD, Address Timing Control Register Value + * #4, DWORD, Output Driver Compensation Control Register Value + * #5, BYTE, Number of DIMMs (Primary Key) + */ +static const u8 Table_ATC_ODC_8D_D[] = { + 0xFE, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x22, 0x12, 0x11, 0x00, 1, + 0xFE, 0xFF, 0x00, 0x00, 0x37, 0x00, 0x22, 0x12, 0x11, 0x00, 2, + 1, 0xFF, 0x00, 0x00, 0x2F, 0x00, 0x22, 0x12, 0x11, 0x00, 3, + 2, 0xFF, 0x00, 0x00, 0x2F, 0x00, 0x22, 0x12, 0x11, 0x00, 3, + 3, 0xFF, 0x2F, 0x00, 0x2F, 0x00, 0x22, 0x12, 0x11, 0x00, 3, + 4, 0xFF, 0x2F, 0x00, 0x2F, 0x00, 0x22, 0x12, 0x33, 0x00, 3, + 1, 0xFF, 0x00, 0x00, 0x2F, 0x00, 0x22, 0x12, 0x11, 0x00, 4, + 2, 0xFF, 0x00, 0x00, 0x2F, 0x00, 0x22, 0x12, 0x11, 0x00, 4, + 3, 0xFF, 0x2F, 0x00, 0x2F, 0x00, 0x22, 0x12, 0x33, 0x00, 4, + 4, 0xFF, 0x2F, 0x00, 0x2F, 0x00, 0x22, 0x12, 0x33, 0x00, 4, + 0xFF +}; + +static const u8 Table_ATC_ODC_4D_D[] = { + 0xFE, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x22, 0x12, 0x11, 0x00, 1, + 0xFE, 0xFF, 0x00, 0x00, 0x37, 0x00, 0x22, 0x12, 0x11, 0x00, 2, + 0xFE, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x22, 0x12, 0x11, 0x00, 3, + 0xFE, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x22, 0x12, 0x11, 0x00, 4, + 0xFF +}; + +static const u8 Table_ATC_ODC_8D_D_Ax[] = { + 1,0xff,0x00,0x00,0x2F,0x0,0x22,0x12,0x11,0x00, 0xFE, + 2,0xff,0x00,0x00,0x2C,0x0,0x22,0x12,0x11,0x00, 0xFE, + 3,0xff,0x00,0x00,0x2C,0x0,0x22,0x12,0x11,0x00, 0xFE, + 4,0xff,0x00,0x33,0x2F,0x0,0x22,0x12,0x11,0x00, 0xFE, + 0xFF +}; + +static const u8 Table_ATC_ODC_4D_D_Ax[] = { + 1,0xff,0x00,0x00,0x2F,0x00,0x22,0x12,0x11,0x00, 0xFE, + 2,0xff,0x00,0x2C,0x2C,0x00,0x22,0x12,0x11,0x00, 0xFE, + 3,0xff,0x00,0x00,0x2C,0x00,0x22,0x12,0x11,0x00, 0xFE, + 4,0xff,0x00,0x33,0x2F,0x00,0x22,0x12,0x11,0x00, 0xFE, + 0xFF +}; + + +static void Get_ChannelPS_Cfg0_D(u8 MAAdimms, u8 Speed, u8 MAAload, + u8 DATAAload, u32 *AddrTmgCTL, u32 *ODC_CTL) +{ + const u8 *p; + + *AddrTmgCTL = 0; + *ODC_CTL = 0; + + if(mctGet_NVbits(NV_MAX_DIMMS) == 8) { + /* 8 DIMM Table */ + p = Table_ATC_ODC_8D_D; + //FIXME Add Ax support + } else { + /* 4 DIMM Table*/ + p = Table_ATC_ODC_4D_D; + //FIXME Add Ax support + } + + while (*p != 0xFF) { + if ((MAAdimms == *(p+10)) || (*(p+10 ) == 0xFE)) { + if((*p == Speed) || (*p == 0xFE)) { + if(MAAload <= *(p+1)) { + *AddrTmgCTL = stream_to_int((u8*)(p+2)); + *ODC_CTL = stream_to_int((u8*)(p+6)); + break; + } + } + } + p+=11; + } +} + diff --git a/src/northbridge/amd/amdmct/mct/mctardk4.c b/src/northbridge/amd/amdmct/mct/mctardk4.c new file mode 100644 index 0000000000..c3f1aeab5a --- /dev/null +++ b/src/northbridge/amd/amdmct/mct/mctardk4.c @@ -0,0 +1,172 @@ +/* + * This file is part of the LinuxBIOS project. + * + * Copyright (C) 2007 Advanced Micro Devices, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + + +static void Get_ChannelPS_Cfg0_D( u8 MAAdimms, u8 Speed, u8 MAAload, + u8 DATAAload, u32 *AddrTmgCTL, u32 *ODC_CTL, + u32 *CMDmode); + + +void mctGet_PS_Cfg_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u32 dct) + + print_tx("dct: ", dct); + print_tx("Speed: ", pDCTstat->Speed); + + Get_ChannelPS_Cfg0_D(pDCTstat->MAdimms[dct], pDCTstat->Speed, + pDCTstat->MAload[dct], pDCTstat->DATAload[dct], + &(pDCTstat->CH_ADDR_TMG[dct]), &(pDCTstat->CH_ODC_CTL[dct]), + &pDCTstat->_2Tmode); + +// print_tx("1 CH_ODC_CTL: ", pDCTstat->CH_ODC_CTL[dct]); +// print_tx("1 CH_ADDR_TMG: ", pDCTstat->CH_ADDR_TMG[dct]); + + if(pDCTstat->MAdimms[dct] == 1) + pDCTstat->CH_ODC_CTL[dct] |= 0x20000000; /* 75ohms */ + else + pDCTstat->CH_ODC_CTL[dct] |= 0x10000000; /* 150ohms */ + + + /* + * Overrides and/or workarounds + */ + pDCTstat->CH_ODC_CTL[dct] = procOdtWorkaround(pDCTstat, dct, pDCTstat->CH_ODC_CTL[dct]); + + print_tx("4 CH_ODC_CTL: ", pDCTstat->CH_ODC_CTL[dct]); + print_tx("4 CH_ADDR_TMG: ", pDCTstat->CH_ADDR_TMG[dct]); +} + +/*============================================================================= + * Vendor is responsible for correct settings. + * M2/Unbuffered 4 Slot - AMD Design Guideline. + *============================================================================= + * #1, BYTE, Speed (DCTStatstruc.Speed) + * #2, BYTE, number of Address bus loads on the Channel. + * These must be listed in ascending order. + * FFh (-1) has special meanying of 'any', and must be listed first for + * each speed grade. + * #3, DWORD, Address Timing Control Register Value + * #4, DWORD, Output Driver Compensation Control Register Value + */ + +static const u8 Table_ATC_ODC_D_Bx[] = { + 1, 0xFF, 0x00, 0x2F, 0x2F, 0x0, 0x22, 0x13, 0x11, 0x0 + 2, 12, 0x00, 0x2F, 0x2F, 0x0, 0x22, 0x13, 0x11, 0x0 + 2, 16, 0x00, 0x2F, 0x00, 0x0, 0x22, 0x13, 0x11, 0x0 + 2, 20, 0x00, 0x2F, 0x38, 0x0, 0x22, 0x13, 0x11, 0x0 + 2, 24, 0x00, 0x2F, 0x37, 0x0, 0x22, 0x13, 0x11, 0x0 + 2, 32, 0x00, 0x2F, 0x34, 0x0, 0x22, 0x13, 0x11, 0x0 + 3, 12, 0x20, 0x22, 0x20, 0x0, 0x22, 0x13, 0x11, 0x0 + 3, 16, 0x20, 0x22, 0x30, 0x0, 0x22, 0x13, 0x11, 0x0 + 3, 20, 0x20, 0x22, 0x2C, 0x0, 0x22, 0x13, 0x11, 0x0 + 3, 24, 0x20, 0x22, 0x2A, 0x0, 0x22, 0x13, 0x11, 0x0 + 3, 32, 0x20, 0x22, 0x2B, 0x0, 0x22, 0x13, 0x11, 0x0 + 4, 0xFF, 0x20, 0x25, 0x20, 0x0, 0x22, 0x33, 0x11, 0x0 + 5, 0xFF, 0x20, 0x20, 0x2F, 0x0, 0x22, 0x32, 0x11, 0x0 + 0FFh + +static const u8 Table_ATC_ODC_D_Ax[] = { + 1, 0xFF, 0x00, 0x2F, 0x2F, 0x0, 0x22, 0x13, 0x11, 0x0 + 2, 12, 0x00, 0x2F, 0x2F, 0x0, 0x22, 0x13, 0x11, 0x0 + 2, 16, 0x00, 0x2F, 0x00, 0x0, 0x22, 0x13, 0x11, 0x0 + 2, 20, 0x00, 0x2F, 0x38, 0x0, 0x22, 0x13, 0x11, 0x0 + 2, 24, 0x00, 0x2F, 0x37, 0x0, 0x22, 0x13, 0x11, 0x0 + 2, 32, 0x00, 0x2F, 0x34, 0x0, 0x22, 0x13, 0x11, 0x0 + 3, 12, 0x20, 0x22, 0x20, 0x0, 0x22, 0x13, 0x11, 0x0 + 3, 16, 0x20, 0x22, 0x30, 0x0, 0x22, 0x13, 0x11, 0x0 + 3, 20, 0x20, 0x22, 0x2C, 0x0, 0x22, 0x13, 0x11, 0x0 + 3, 24, 0x20, 0x22, 0x2A, 0x0, 0x22, 0x13, 0x11, 0x0 + 3, 32, 0x20, 0x22, 0x2B, 0x0, 0x22, 0x13, 0x11, 0x0 + 4, 0xFF, 0x20, 0x25, 0x20, 0x0, 0x22, 0x33, 0x11, 0x0 + 5, 0xFF, 0x20, 0x20, 0x2F, 0x0, 0x22, 0x32, 0x11, 0x0 + 0xFF +}; + + +static void Get_ChannelPS_Cfg0_D( u8 MAAdimms, u8 Speed, u8 MAAload, + u8 DATAAload, u32 *AddrTmgCTL, u32 *ODC_CTL, + u32 *CMDmode); +{ + u8 *p; + + *AddrTmgCTL = 0; + *ODC_CTL = 0; + *CMDmode = 1; + + // FIXME: add Ax support + if(MAAdimms == 0) { + *ODC_CTL = 0x00111222; + if(Speed == 3) + *AddrTmgCTL = 0x00202220; + else if (Speed == 2) + *AddrTmgCTL = 0x002F2F00; + else if (Speed == 1) + *AddrTmgCTL = 0x002F2F00; + else if (Speed == 4) + *AddrTmgCTL = 0x00202520; + else if (Speed == 4) + *AddrTmgCTL = 0x002F2020; + else + *AddrTmgCTL = 0x002F2F2F; + } else if(MAAdimms == 1) { + if(Speed == 4) { + *CMDmode = 2; + *AddrTmgCTL = 0x00202520; + *ODC_CTL = 0x00113222; + } else if(Speed == 4) { + *CMDmode = 2; + *AddrTmgCTL = 0x002F2020; + *ODC_CTL = 0x00113222; + } else { + *CMDmode = 1; + *ODC_CTL = 0x00111222; + if(Speed == 3) { + *AddrTmgCTL = 0x00202220; + } else if(Speed == 2) { + if (MAAload == 4) + *AddrTmgCTL = 0x002B2F00; + else if (MAAload == 16) + *AddrTmgCTL = 0x002B2F00; + else if (MAAload == 8) + *AddrTmgCTL = 0x002F2F00; + else + *AddrTmgCTL = 0x002F2F00; + } else if(Speed == 1) { + *AddrTmgCTL = 0x002F2F00; + } else if(Speed == 5) { + *AddrTmgCTL = 0x002F2020; + } else { + *AddrTmgCTL = 0x002F2F2F; + } + } + } else { + *CMDmode = 2; + p = Table_ATC_ODC_D_Bx; + do { + if(Speed == *p) { + if(MAAload <= *(p+1)) { + *AddrTmgCTL = stream_to_int(p+2); + *ODC_CTL = stream_to_int(p+6); + break; + } + } + p+=10; + } while (0xFF == *p); + +} diff --git a/src/northbridge/amd/amdmct/mct/mctchi_d.c b/src/northbridge/amd/amdmct/mct/mctchi_d.c new file mode 100644 index 0000000000..f50fd6d62e --- /dev/null +++ b/src/northbridge/amd/amdmct/mct/mctchi_d.c @@ -0,0 +1,130 @@ +/* + * This file is part of the LinuxBIOS project. + * + * Copyright (C) 2007 Advanced Micro Devices, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + + + +void InterleaveChannels_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstatA) +{ + + u8 Node; + u32 DramBase, DctSelBase; + u8 DctSelIntLvAddr, DctSelHi; + u8 HoleValid = 0; + u32 HoleSize, HoleBase = 0; + u32 val, tmp; + u32 dct0_size, dct1_size; + u8 enabled; + struct DCTStatStruc *pDCTstat; + + /* HoleValid - indicates whether the current Node contains hole. + * HoleSize - indicates whether there is IO hole in the whole system + * memory. + */ + + /* call back to wrapper not needed ManualChannelInterleave_D(); */ + /* call back - DctSelIntLvAddr = mctGet_NVbits(NV_ChannelIntlv);*/ /* override interleave */ + // FIXME: Check for Cx + DctSelIntLvAddr = 5; /* use default: Enable channel interleave */ + enabled = 1; /* with Hash*: exclusive OR of address bits[20:16, 6]. */ + beforeInterleaveChannels_D(pDCTstatA, &enabled); + + if (enabled) { + DctSelIntLvAddr >>= 1; + HoleSize = 0; + if ((pMCTstat->GStatus & (1 << GSB_SoftHole)) || + (pMCTstat->GStatus & (1 << GSB_HWHole))) { + if (pMCTstat->HoleBase) { + HoleBase = pMCTstat->HoleBase >> 8; + HoleSize = HoleBase & 0xFFFF0000; + HoleSize |= ((~HoleBase) + 1) & 0xFFFF; + } + } + Node = 0; + while (Node < MAX_NODES_SUPPORTED) { + pDCTstat = pDCTstatA + Node; + val = Get_NB32(pDCTstat->dev_map, 0xF0); + if (val & (1 << DramHoleValid)) + HoleValid = 1; + if (!pDCTstat->GangedMode && pDCTstat->DIMMValidDCT[0] && pDCTstat->DIMMValidDCT[1]) { + DramBase = pDCTstat->NodeSysBase >> 8; + dct1_size = ((pDCTstat->NodeSysLimit) + 2) >> 8; + dct0_size = Get_NB32(pDCTstat->dev_dct, 0x114); + if (dct0_size >= 0x10000) { + dct0_size -= HoleSize; + } + + dct0_size -= DramBase; + dct1_size -= dct0_size; + DctSelHi = 0x05; /* DctSelHiRngEn = 1, DctSelHi = 0 */ + if (dct1_size == dct0_size) { + dct1_size = 0; + DctSelHi = 0x04; /* DctSelHiRngEn = 0 */ + } else if (dct1_size > dct0_size ) { + dct1_size = dct0_size; + DctSelHi = 0x07; /* DctSelHiRngEn = 1, DctSelHi = 1 */ + } + dct0_size = dct1_size; + dct0_size += DramBase; + dct0_size += dct1_size; + if (dct0_size >= HoleBase) /* if DctSelBaseAddr > HoleBase */ + dct0_size += HoleBase; + DctSelBase = dct0_size; + + if (dct1_size == 0) + dct0_size = 0; + dct0_size -= dct1_size; /* DctSelBaseOffset = DctSelBaseAddr - Interleaved region */ + Set_NB32(pDCTstat->dev_dct, 0x114, dct0_size); + + if (dct1_size == 0) + dct1_size = DctSelBase; + val = Get_NB32(pDCTstat->dev_dct, 0x110); + val &= 0x7F8; + val |= dct1_size; + val |= DctSelHi; + val |= (DctSelIntLvAddr << 6) & 0xFF; + Set_NB32(pDCTstat->dev_dct, 0x110, val); + print_tx("InterleaveChannels: DRAM Controller Select Low Register = ", val); + + if (HoleValid) { + tmp = DramBase; + val = DctSelBase; + if (val < HoleBase) { /* DctSelBaseAddr < DramHoleBase */ + val -= DramBase; + val >>= 1; + tmp += val; + } + tmp += HoleSize; + val = Get_NB32(pDCTstat->dev_map, 0xF0); /* DramHoleOffset */ + val &= 0x7F; + val |= (tmp & 0xFF); + Set_NB32(pDCTstat->dev_map, 0xF0, val); +print_tx("InterleaveChannels:0xF0 = ", val); + + } + } + print_tx("InterleaveChannels_D: Node ", Node); + print_tx("InterleaveChannels_D: Status ", pDCTstat->Status); + print_tx("InterleaveChannels_D: ErrStatus ", pDCTstat->ErrStatus); + print_tx("InterleaveChannels_D: ErrCode ", pDCTstat->ErrCode); + Node++; + } + } + print_t("InterleaveChannels_D: Done\n"); +} diff --git a/src/northbridge/amd/amdmct/mct/mctcsi_d.c b/src/northbridge/amd/amdmct/mct/mctcsi_d.c new file mode 100644 index 0000000000..f2f5cedada --- /dev/null +++ b/src/northbridge/amd/amdmct/mct/mctcsi_d.c @@ -0,0 +1,147 @@ +/* + * This file is part of the LinuxBIOS project. + * + * Copyright (C) 2007 Advanced Micro Devices, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + + +/* Low swap bit vs bank size encoding (physical, not logical address bit) + * ;To calculate the number by hand, add the number of Bank address bits + * ;(2 or 3) to the number of column address bits, plus 3 (the logical + * ;page size), and subtract 8. + */ +static const u8 Tab_int_D[] = { 6,7,7,8,8,8,8,8,9,9,8,9 }; + +void InterleaveBanks_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct) +{ + u8 ChipSel, EnChipSels; + u32 AddrLoMask, AddrHiMask; + u32 AddrLoMaskN, AddrHiMaskN, MemSize = 0; + u8 DoIntlv, _CsIntCap; + u32 BitDelta, BankEncd = 0; + + u32 dev; + u32 reg; + u32 reg_off; + u32 val; + u32 val_lo, val_hi; + + DoIntlv = mctGet_NVbits(NV_BankIntlv); + _CsIntCap = 0; + EnChipSels = 0; + + dev = pDCTstat->dev_dct; + reg_off = 0x100 * dct; + + ChipSel = 0; /* Find out if current configuration is capable */ + while (DoIntlv && (ChipSel < MAX_CS_SUPPORTED)) { + reg = 0x40+(ChipSel<<2) + reg_off; /* Dram CS Base 0 */ + val = Get_NB32(dev, reg); + if ( val & (1<<CSEnable)) { + EnChipSels++; + reg = 0x60+((ChipSel>>1)<<2)+reg_off; /*Dram CS Mask 0 */ + val = Get_NB32(dev, reg); + val >>= 19; + val &= 0x3ff; + val++; + if (EnChipSels == 1) + MemSize = val; + else + /*If mask sizes not same then skip */ + if (val != MemSize) + break; + reg = 0x80 + reg_off; /*Dram Bank Addressing */ + val = Get_NB32(dev, reg); + val >>= (ChipSel>>1)<<2; + val &= 0x0f; + if(EnChipSels == 1) + BankEncd = val; + else + /*If number of Rows/Columns not equal, skip */ + if (val != BankEncd) + break; + } + ChipSel++; + } + if (ChipSel == MAX_CS_SUPPORTED) { + if ((EnChipSels == 2) || (EnChipSels == 4) || (EnChipSels == 8)) + _CsIntCap = 1; + } + + if (DoIntlv) { + if(!_CsIntCap) { + pDCTstat->ErrStatus |= 1<<SB_BkIntDis; + DoIntlv = 0; + } + } + + if(DoIntlv) { + val = Tab_int_D[BankEncd]; + if (pDCTstat->Status & (1<<SB_128bitmode)) + val++; + + AddrLoMask = (EnChipSels - 1) << val; + AddrLoMaskN = ~AddrLoMask; + + val = bsf(MemSize) + 19; + AddrHiMask = (EnChipSels -1) << val; + AddrHiMaskN = ~AddrHiMask; + + BitDelta = bsf(AddrHiMask) - bsf(AddrLoMask); + + for (ChipSel = 0; ChipSel < MAX_CS_SUPPORTED; ChipSel++) { + reg = 0x40+(ChipSel<<2) + reg_off; /*Dram CS Base 0 */ + val = Get_NB32(dev, reg); + if (val & 3) { + val_lo = val & AddrLoMask; + val_hi = val & AddrHiMask; + val &= AddrLoMaskN; + val &= AddrHiMaskN; + val_lo <<= BitDelta; + val_hi >>= BitDelta; + val |= val_lo; + val |= val_hi; + Set_NB32(dev, reg, val); + + if(ChipSel & 1) + continue; + + reg = 0x60 + ((ChipSel>>1)<<2) + reg_off; /*Dram CS Mask 0 */ + val = Get_NB32(dev, reg); + val_lo = val & AddrLoMask; + val_hi = val & AddrHiMask; + val &= AddrLoMaskN; + val &= AddrHiMaskN; + val_lo <<= BitDelta; + val_hi >>= BitDelta; + val |= val_lo; + val |= val_hi; + Set_NB32(dev, reg, val); + } + } + print_t("InterleaveBanks_D: Banks Interleaved "); + } /* DoIntlv */ + +// dump_pci_device(PCI_DEV(0, 0x18+pDCTstat->Node_ID, 2)); + + print_tx("InterleaveBanks_D: Status ", pDCTstat->Status); + print_tx("InterleaveBanks_D: ErrStatus ", pDCTstat->ErrStatus); + print_tx("InterleaveBanks_D: ErrCode ", pDCTstat->ErrCode); + print_t("InterleaveBanks_D: Done\n"); +} + + diff --git a/src/northbridge/amd/amdmct/mct/mctdqs_d.c b/src/northbridge/amd/amdmct/mct/mctdqs_d.c new file mode 100644 index 0000000000..ae87966367 --- /dev/null +++ b/src/northbridge/amd/amdmct/mct/mctdqs_d.c @@ -0,0 +1,1216 @@ +/* + * This file is part of the LinuxBIOS project. + * + * Copyright (C) 2007 Advanced Micro Devices, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + + +static void CalcEccDQSPos_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u16 like, + u8 scale, u8 ChipSel); +static void GetDQSDatStrucVal_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 ChipSel); +static u8 MiddleDQS_D(u8 min, u8 max); +static void TrainReadDQS_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, + u8 cs_start); +static void TrainWriteDQS_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, + u8 cs_start); +static void WriteDQSTestPattern_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, + u32 TestAddr_lo); +static void WriteL18TestPattern_D(struct DCTStatStruc *pDCTstat, + u32 TestAddr_lo); +static void WriteL9TestPattern_D(struct DCTStatStruc *pDCTstat, + u32 TestAddr_lo); +static u8 CompareDQSTestPattern_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, + u32 addr_lo); +static void FlushDQSTestPattern_D(struct DCTStatStruc *pDCTstat, + u32 addr_lo); +static void SetTargetWTIO_D(u32 TestAddr); +static void ResetTargetWTIO_D(void); +static void ReadDQSTestPattern_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, + u32 TestAddr_lo); +void ResetDCTWrPtr_D(u32 dev, u32 index_reg, u32 index); +u8 mct_DisableDimmEccEn_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat); +static void mct_SetDQSDelayCSR_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, + u8 ChipSel); +static void mct_SetDQSDelayAllCSR_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, + u8 cs_start); +u32 mct_GetMCTSysAddr_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 Channel, + u8 receiver, u8 *valid); +static void SetupDqsPattern_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, + u32 *buffer); + +#define DQS_TRAIN_DEBUG 0 + +static inline void print_debug_dqs(const char *str, u32 val, u8 level) +{ +#if DQS_TRAIN_DEBUG > 0 + if (DQS_TRAIN_DEBUG >= level) { + printk_debug("%s%x\n", str, val); + } +#endif +} + +static inline void print_debug_dqs_pair(const char *str, u32 val, const char *str2, u32 val2, u8 level) +{ +#if DQS_TRAIN_DEBUG > 0 + if (DQS_TRAIN_DEBUG >= level) { + printk_debug("%s%08x%s%08x\n", str, val, str2, val2); + } +#endif +} + +/*Warning: These must be located so they do not cross a logical 16-bit segment boundary!*/ +const static u32 TestPatternJD1a_D[] = { + 0x00000000,0x00000000,0xFFFFFFFF,0xFFFFFFFF, /* QW0-1, ALL-EVEN */ + 0x00000000,0x00000000,0x00000000,0x00000000, /* QW2-3, ALL-EVEN */ + 0x00000000,0x00000000,0xFFFFFFFF,0xFFFFFFFF, /* QW4-5, ALL-EVEN */ + 0x00000000,0x00000000,0x00000000,0x00000000, /* QW6-7, ALL-EVEN */ + 0xFeFeFeFe,0xFeFeFeFe,0x01010101,0x01010101, /* QW0-1, DQ0-ODD */ + 0xFeFeFeFe,0xFeFeFeFe,0x01010101,0x01010101, /* QW2-3, DQ0-ODD */ + 0x01010101,0x01010101,0xFeFeFeFe,0xFeFeFeFe, /* QW4-5, DQ0-ODD */ + 0xFeFeFeFe,0xFeFeFeFe,0x01010101,0x01010101, /* QW6-7, DQ0-ODD */ + 0x02020202,0x02020202,0x02020202,0x02020202, /* QW0-1, DQ1-ODD */ + 0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd, /* QW2-3, DQ1-ODD */ + 0xFdFdFdFd,0xFdFdFdFd,0x02020202,0x02020202, /* QW4-5, DQ1-ODD */ + 0x02020202,0x02020202,0x02020202,0x02020202, /* QW6-7, DQ1-ODD */ + 0x04040404,0x04040404,0xfBfBfBfB,0xfBfBfBfB, /* QW0-1, DQ2-ODD */ + 0x04040404,0x04040404,0x04040404,0x04040404, /* QW2-3, DQ2-ODD */ + 0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, /* QW4-5, DQ2-ODD */ + 0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, /* QW6-7, DQ2-ODD */ + 0x08080808,0x08080808,0xF7F7F7F7,0xF7F7F7F7, /* QW0-1, DQ3-ODD */ + 0x08080808,0x08080808,0x08080808,0x08080808, /* QW2-3, DQ3-ODD */ + 0xF7F7F7F7,0xF7F7F7F7,0x08080808,0x08080808, /* QW4-5, DQ3-ODD */ + 0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, /* QW6-7, DQ3-ODD */ + 0x10101010,0x10101010,0x10101010,0x10101010, /* QW0-1, DQ4-ODD */ + 0xeFeFeFeF,0xeFeFeFeF,0x10101010,0x10101010, /* QW2-3, DQ4-ODD */ + 0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, /* QW4-5, DQ4-ODD */ + 0xeFeFeFeF,0xeFeFeFeF,0x10101010,0x10101010, /* QW6-7, DQ4-ODD */ + 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, /* QW0-1, DQ5-ODD */ + 0xdFdFdFdF,0xdFdFdFdF,0x20202020,0x20202020, /* QW2-3, DQ5-ODD */ + 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, /* QW4-5, DQ5-ODD */ + 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, /* QW6-7, DQ5-ODD */ + 0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, /* QW0-1, DQ6-ODD */ + 0x40404040,0x40404040,0xBfBfBfBf,0xBfBfBfBf, /* QW2-3, DQ6-ODD */ + 0x40404040,0x40404040,0xBfBfBfBf,0xBfBfBfBf, /* QW4-5, DQ6-ODD */ + 0x40404040,0x40404040,0xBfBfBfBf,0xBfBfBfBf, /* QW6-7, DQ6-ODD */ + 0x80808080,0x80808080,0x7F7F7F7F,0x7F7F7F7F, /* QW0-1, DQ7-ODD */ + 0x80808080,0x80808080,0x7F7F7F7F,0x7F7F7F7F, /* QW2-3, DQ7-ODD */ + 0x80808080,0x80808080,0x7F7F7F7F,0x7F7F7F7F, /* QW4-5, DQ7-ODD */ + 0x80808080,0x80808080,0x80808080,0x80808080 /* QW6-7, DQ7-ODD */ +}; +const static u32 TestPatternJD1b_D[] = { + 0x00000000,0x00000000,0x00000000,0x00000000, /* QW0,CHA-B, ALL-EVEN */ + 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, /* QW1,CHA-B, ALL-EVEN */ + 0x00000000,0x00000000,0x00000000,0x00000000, /* QW2,CHA-B, ALL-EVEN */ + 0x00000000,0x00000000,0x00000000,0x00000000, /* QW3,CHA-B, ALL-EVEN */ + 0x00000000,0x00000000,0x00000000,0x00000000, /* QW4,CHA-B, ALL-EVEN */ + 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, /* QW5,CHA-B, ALL-EVEN */ + 0x00000000,0x00000000,0x00000000,0x00000000, /* QW6,CHA-B, ALL-EVEN */ + 0x00000000,0x00000000,0x00000000,0x00000000, /* QW7,CHA-B, ALL-EVEN */ + 0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe, /* QW0,CHA-B, DQ0-ODD */ + 0x01010101,0x01010101,0x01010101,0x01010101, /* QW1,CHA-B, DQ0-ODD */ + 0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe, /* QW2,CHA-B, DQ0-ODD */ + 0x01010101,0x01010101,0x01010101,0x01010101, /* QW3,CHA-B, DQ0-ODD */ + 0x01010101,0x01010101,0x01010101,0x01010101, /* QW4,CHA-B, DQ0-ODD */ + 0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe, /* QW5,CHA-B, DQ0-ODD */ + 0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe, /* QW6,CHA-B, DQ0-ODD */ + 0x01010101,0x01010101,0x01010101,0x01010101, /* QW7,CHA-B, DQ0-ODD */ + 0x02020202,0x02020202,0x02020202,0x02020202, /* QW0,CHA-B, DQ1-ODD */ + 0x02020202,0x02020202,0x02020202,0x02020202, /* QW1,CHA-B, DQ1-ODD */ + 0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd, /* QW2,CHA-B, DQ1-ODD */ + 0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd, /* QW3,CHA-B, DQ1-ODD */ + 0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd, /* QW4,CHA-B, DQ1-ODD */ + 0x02020202,0x02020202,0x02020202,0x02020202, /* QW5,CHA-B, DQ1-ODD */ + 0x02020202,0x02020202,0x02020202,0x02020202, /* QW6,CHA-B, DQ1-ODD */ + 0x02020202,0x02020202,0x02020202,0x02020202, /* QW7,CHA-B, DQ1-ODD */ + 0x04040404,0x04040404,0x04040404,0x04040404, /* QW0,CHA-B, DQ2-ODD */ + 0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, /* QW1,CHA-B, DQ2-ODD */ + 0x04040404,0x04040404,0x04040404,0x04040404, /* QW2,CHA-B, DQ2-ODD */ + 0x04040404,0x04040404,0x04040404,0x04040404, /* QW3,CHA-B, DQ2-ODD */ + 0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, /* QW4,CHA-B, DQ2-ODD */ + 0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, /* QW5,CHA-B, DQ2-ODD */ + 0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, /* QW6,CHA-B, DQ2-ODD */ + 0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, /* QW7,CHA-B, DQ2-ODD */ + 0x08080808,0x08080808,0x08080808,0x08080808, /* QW0,CHA-B, DQ3-ODD */ + 0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, /* QW1,CHA-B, DQ3-ODD */ + 0x08080808,0x08080808,0x08080808,0x08080808, /* QW2,CHA-B, DQ3-ODD */ + 0x08080808,0x08080808,0x08080808,0x08080808, /* QW3,CHA-B, DQ3-ODD */ + 0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, /* QW4,CHA-B, DQ3-ODD */ + 0x08080808,0x08080808,0x08080808,0x08080808, /* QW5,CHA-B, DQ3-ODD */ + 0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, /* QW6,CHA-B, DQ3-ODD */ + 0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, /* QW7,CHA-B, DQ3-ODD */ + 0x10101010,0x10101010,0x10101010,0x10101010, /* QW0,CHA-B, DQ4-ODD */ + 0x10101010,0x10101010,0x10101010,0x10101010, /* QW1,CHA-B, DQ4-ODD */ + 0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, /* QW2,CHA-B, DQ4-ODD */ + 0x10101010,0x10101010,0x10101010,0x10101010, /* QW3,CHA-B, DQ4-ODD */ + 0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, /* QW4,CHA-B, DQ4-ODD */ + 0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, /* QW5,CHA-B, DQ4-ODD */ + 0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, /* QW6,CHA-B, DQ4-ODD */ + 0x10101010,0x10101010,0x10101010,0x10101010, /* QW7,CHA-B, DQ4-ODD */ + 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, /* QW0,CHA-B, DQ5-ODD */ + 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, /* QW1,CHA-B, DQ5-ODD */ + 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, /* QW2,CHA-B, DQ5-ODD */ + 0x20202020,0x20202020,0x20202020,0x20202020, /* QW3,CHA-B, DQ5-ODD */ + 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, /* QW4,CHA-B, DQ5-ODD */ + 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, /* QW5,CHA-B, DQ5-ODD */ + 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, /* QW6,CHA-B, DQ5-ODD */ + 0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, /* QW7,CHA-B, DQ5-ODD */ + 0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, /* QW0,CHA-B, DQ6-ODD */ + 0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, /* QW1,CHA-B, DQ6-ODD */ + 0x40404040,0x40404040,0x40404040,0x40404040, /* QW2,CHA-B, DQ6-ODD */ + 0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, /* QW3,CHA-B, DQ6-ODD */ + 0x40404040,0x40404040,0x40404040,0x40404040, /* QW4,CHA-B, DQ6-ODD */ + 0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, /* QW5,CHA-B, DQ6-ODD */ + 0x40404040,0x40404040,0x40404040,0x40404040, /* QW6,CHA-B, DQ6-ODD */ + 0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, /* QW7,CHA-B, DQ6-ODD */ + 0x80808080,0x80808080,0x80808080,0x80808080, /* QW0,CHA-B, DQ7-ODD */ + 0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F, /* QW1,CHA-B, DQ7-ODD */ + 0x80808080,0x80808080,0x80808080,0x80808080, /* QW2,CHA-B, DQ7-ODD */ + 0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F, /* QW3,CHA-B, DQ7-ODD */ + 0x80808080,0x80808080,0x80808080,0x80808080, /* QW4,CHA-B, DQ7-ODD */ + 0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F, /* QW5,CHA-B, DQ7-ODD */ + 0x80808080,0x80808080,0x80808080,0x80808080, /* QW6,CHA-B, DQ7-ODD */ + 0x80808080,0x80808080,0x80808080,0x80808080 /* QW7,CHA-B, DQ7-ODD */ +}; + +const u8 Table_DQSRcvEn_Offset[] = {0x00,0x01,0x10,0x11}; + + +void TrainReceiverEn_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstatA, u8 Pass) +{ + u8 Node; + struct DCTStatStruc *pDCTstat; + + for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) { + pDCTstat = pDCTstatA + Node; + +/*FIXME: needed? if (!pDCTstat->NodePresent) + break; +*/ + if (pDCTstat->DCTSysLimit) { + mct_TrainRcvrEn_D(pMCTstat, pDCTstat, Pass); + } + } +} + + +static void SetEccDQSRdWrPos_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 ChipSel) +{ + u8 channel; + u8 direction; + + for (channel = 0; channel < 2; channel++){ + for (direction = 0; direction < 2; direction++) { + pDCTstat->Channel = channel; /* Channel A or B */ + pDCTstat->Direction = direction; /* Read or write */ + CalcEccDQSPos_D(pMCTstat, pDCTstat, pDCTstat->CH_EccDQSLike[channel], pDCTstat->CH_EccDQSScale[channel], ChipSel); + print_debug_dqs_pair("\t\tSetEccDQSRdWrPos: channel ", channel, direction==DQS_READDIR? " R dqs_delay":" W dqs_delay", pDCTstat->DQSDelay, 2); + pDCTstat->ByteLane = 8; + StoreDQSDatStrucVal_D(pMCTstat, pDCTstat, ChipSel); + mct_SetDQSDelayCSR_D(pMCTstat, pDCTstat, ChipSel); + } + } +} + + + +static void CalcEccDQSPos_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, + u16 like, u8 scale, u8 ChipSel) +{ + u8 DQSDelay0, DQSDelay1; + u16 DQSDelay; + + pDCTstat->ByteLane = like & 0xff; + GetDQSDatStrucVal_D(pMCTstat, pDCTstat, ChipSel); + DQSDelay0 = pDCTstat->DQSDelay; + + pDCTstat->ByteLane = (like >> 8) & 0xff; + GetDQSDatStrucVal_D(pMCTstat, pDCTstat, ChipSel); + DQSDelay1 = pDCTstat->DQSDelay; + + if (DQSDelay0>DQSDelay1) { + DQSDelay = DQSDelay0 - DQSDelay1; + } else { + DQSDelay = DQSDelay1 - DQSDelay0; + } + + DQSDelay = DQSDelay * (~scale); + + DQSDelay += 0x80; // round it + + DQSDelay >>= 8; // /256 + + if (DQSDelay0>DQSDelay1) { + DQSDelay = DQSDelay1 - DQSDelay; + } else { + DQSDelay += DQSDelay1; + } + + pDCTstat->DQSDelay = (u8)DQSDelay; +} + + +static void TrainDQSRdWrPos_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, + u8 cs_start) +{ + u32 Errors; + u8 Channel, DQSWrDelay; + u8 _DisableDramECC = 0; + u32 PatternBuffer[292]; + u8 _Wrap32Dis = 0, _SSE2 = 0; + u8 dqsWrDelay_end; + + u32 addr; + u32 cr4; + u32 lo, hi; + + print_debug_dqs("\nTrainDQSRdWrPos: Node_ID ", pDCTstat->Node_ID, 0); + cr4 = read_cr4(); + if (cr4 & (1<<9)) { + _SSE2 = 1; + } + cr4 |= (1<<9); /* OSFXSR enable SSE2 */ + write_cr4(cr4); + + addr = HWCR; + _RDMSR(addr, &lo, &hi); + if (lo & (1<<17)) { + _Wrap32Dis = 1; + } + lo |= (1<<17); /* HWCR.wrap32dis */ + _WRMSR(addr, lo, hi); /* allow 64-bit memory references in real mode */ + + /* Disable ECC correction of reads on the dram bus. */ + _DisableDramECC = mct_DisableDimmEccEn_D(pMCTstat, pDCTstat); + + SetupDqsPattern_D(pMCTstat, pDCTstat, PatternBuffer); + + /* mct_BeforeTrainDQSRdWrPos_D */ + dqsWrDelay_end = 0x20; + + Errors = 0; + for (Channel = 0; Channel < 2; Channel++) { + print_debug_dqs("\tTrainDQSRdWrPos: 1 Channel ",Channel, 1); + pDCTstat->Channel = Channel; + + if (pDCTstat->DIMMValidDCT[Channel] == 0) /* mct_BeforeTrainDQSRdWrPos_D */ + continue; + + for ( DQSWrDelay = 0; DQSWrDelay < dqsWrDelay_end; DQSWrDelay++) { + pDCTstat->DQSDelay = DQSWrDelay; + pDCTstat->Direction = DQS_WRITEDIR; + mct_SetDQSDelayAllCSR_D(pMCTstat, pDCTstat, cs_start); + + print_debug_dqs("\t\tTrainDQSRdWrPos: 21 DQSWrDelay ", DQSWrDelay, 2); + TrainReadDQS_D(pMCTstat, pDCTstat, cs_start); + + print_debug_dqs("\t\tTrainDQSRdWrPos: 22 TrainErrors ",pDCTstat->TrainErrors, 2); + if (pDCTstat->TrainErrors == 0) { + break; + } + Errors |= pDCTstat->TrainErrors; + } + if (DQSWrDelay < dqsWrDelay_end) { + Errors = 0; + + print_debug_dqs("\tTrainDQSRdWrPos: 231 DQSWrDelay ", DQSWrDelay, 1); + TrainWriteDQS_D(pMCTstat, pDCTstat, cs_start); + } + print_debug_dqs("\tTrainDQSRdWrPos: 232 Errors ", Errors, 1); + pDCTstat->ErrStatus |= Errors; + } + +#if DQS_TRAIN_DEBUG > 0 + { + u8 val; + u8 i; + u8 Channel, Receiver, Dir; + u8 *p; + + for (Dir = 0; Dir < 2; Dir++) { + if (Dir == 0) { + print_debug("TrainDQSRdWrPos: CH_D_DIR_B_DQS WR:\n"); + } else { + print_debug("TrainDQSRdWrPos: CH_D_DIR_B_DQS RD:\n"); + } + for (Channel = 0; Channel < 2; Channel++) { + print_debug("Channel:"); print_debug_hex8(Channel); print_debug("\n"); + for (Receiver = cs_start; Receiver < (cs_start + 2); Receiver += 2) { + print_debug("\t\tReceiver:"); print_debug_hex8(Receiver); + p = pDCTstat->CH_D_DIR_B_DQS[Channel][Receiver >> 1][Dir]; + print_debug(": "); + for (i=0;i<8; i++) { + val = p[i]; + print_debug_hex8(val); + print_debug(" "); + } + print_debug("\n"); + } + } + } + + } +#endif + + if (_DisableDramECC) { + mct_EnableDimmEccEn_D(pMCTstat, pDCTstat, _DisableDramECC); + } + if (!_Wrap32Dis) { + addr = HWCR; + _RDMSR(addr, &lo, &hi); + lo &= ~(1<<17); /* restore HWCR.wrap32dis */ + _WRMSR(addr, lo, hi); + } + if (!_SSE2){ + cr4 = read_cr4(); + cr4 &= ~(1<<9); /* restore cr4.OSFXSR */ + write_cr4(cr4); + } + + print_tx("TrainDQSRdWrPos: Status ", pDCTstat->Status); + print_tx("TrainDQSRdWrPos: TrainErrors ", pDCTstat->TrainErrors); + print_tx("TrainDQSRdWrPos: ErrStatus ", pDCTstat->ErrStatus); + print_tx("TrainDQSRdWrPos: ErrCode ", pDCTstat->ErrCode); + print_t("TrainDQSRdWrPos: Done\n"); +} + + +static void SetupDqsPattern_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u32 *buffer) +{ + /* 1. Set the Pattern type (0 or 1) in DCTStatstruc.Pattern + * 2. Copy the pattern from ROM to Cache, aligning on 16 byte boundary + * 3. Set the ptr to Cacheable copy in DCTStatstruc.PtrPatternBufA + */ + + u32 *buf; + u16 i; + + buf = (u32 *)(((u32)buffer + 0x10) & (0xfffffff0)); + if (pDCTstat->Status & (1<<SB_128bitmode)) { + pDCTstat->Pattern = 1; /* 18 cache lines, alternating qwords */ + for (i=0; i<16*18; i++) + buf[i] = TestPatternJD1b_D[i]; + } else { + pDCTstat->Pattern = 0; /* 9 cache lines, sequential qwords */ + for (i=0; i<16*9; i++) + buf[i] = TestPatternJD1a_D[i]; + } + pDCTstat->PtrPatternBufA = (u32)buf; +} + + +static void TrainDQSPos_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, + u8 cs_start) +{ + u32 Errors; + u8 ChipSel, DQSDelay; + u8 RnkDlySeqPassMin,RnkDlySeqPassMax, RnkDlyFilterMin, RnkDlyFilterMax; + u8 LastTest; + u32 TestAddr; + u8 ByteLane; + u8 MutualCSPassW[64]; + u8 BanksPresent; + u8 dqsDelay_end; + u8 tmp, valid; + +// print_tx("TrainDQSPos: Node_ID", pDCTstat->Node_ID); +// print_tx("TrainDQSPos: Direction", pDCTstat->Direction); + + /* MutualCSPassW: each byte represents a bitmap of pass/fail per + * ByteLane. The indext within MutualCSPassW is the delay value + * given the results. + */ + + + print_debug_dqs("\t\t\tTrainDQSPos begin ", 0, 3); + + Errors = 0; + BanksPresent = 0; + + if (pDCTstat->Direction == DQS_READDIR) { + dqsDelay_end = 64; + mct_AdjustDelayRange_D(pMCTstat, pDCTstat, &dqsDelay_end); + } else { + dqsDelay_end = 32; + } + + /* Bitmapped status per delay setting, 0xff=All positions + * passing (1= PASS). Set the entire array. + */ + for (DQSDelay=0; DQSDelay<64; DQSDelay++) { + MutualCSPassW[DQSDelay] = 0xFF; + } + + for (ChipSel = cs_start; ChipSel < (cs_start + 2); ChipSel++) { /* logical register chipselects 0..7 */ + print_debug_dqs("\t\t\t\tTrainDQSPos: 11 ChipSel ", ChipSel, 4); + + if (!mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, pDCTstat->Channel, ChipSel)) { + print_debug_dqs("\t\t\t\tmct_RcvrRankEnabled_D CS not enabled ", ChipSel, 4); + continue; + } + + BanksPresent = 1; /* flag for atleast one bank is present */ + TestAddr = mct_GetMCTSysAddr_D(pMCTstat, pDCTstat, pDCTstat->Channel, ChipSel, &valid); + if (!valid) { + print_debug_dqs("\t\t\t\tAddress not supported on current CS ", TestAddr, 4); + continue; + } + + print_debug_dqs("\t\t\t\tTrainDQSPos: 12 TestAddr ", TestAddr, 4); + SetUpperFSbase(TestAddr); /* fs:eax=far ptr to target */ + + if (pDCTstat->Direction==DQS_READDIR) { + print_debug_dqs("\t\t\t\tTrainDQSPos: 13 for read ", 0, 4); + WriteDQSTestPattern_D(pMCTstat, pDCTstat, TestAddr<<8); + } + + for (DQSDelay = 0; DQSDelay < dqsDelay_end; DQSDelay++) { + print_debug_dqs("\t\t\t\t\tTrainDQSPos: 141 DQSDelay ", DQSDelay, 5); + if (MutualCSPassW[DQSDelay] == 0) + continue; //skip current delay value if other chipselects have failed all 8 bytelanes + pDCTstat->DQSDelay = DQSDelay; + mct_SetDQSDelayAllCSR_D(pMCTstat, pDCTstat, cs_start); + print_debug_dqs("\t\t\t\t\tTrainDQSPos: 142 MutualCSPassW ", MutualCSPassW[DQSDelay], 5); + + if (pDCTstat->Direction == DQS_WRITEDIR) { + print_debug_dqs("\t\t\t\t\tTrainDQSPos: 143 for write", 0, 5); + WriteDQSTestPattern_D(pMCTstat, pDCTstat, TestAddr<<8); + } + + print_debug_dqs("\t\t\t\t\tTrainDQSPos: 144 Pattern ", pDCTstat->Pattern, 5); + ReadDQSTestPattern_D(pMCTstat, pDCTstat, TestAddr<<8); +// print_debug_dqs("\t\t\t\t\tTrainDQSPos: 145 MutualCSPassW ", MutualCSPassW[DQSDelay], 5); + tmp = CompareDQSTestPattern_D(pMCTstat, pDCTstat, TestAddr << 8); /* 0=fail, 1=pass */ + + if (mct_checkFenceHoleAdjust_D(pMCTstat, pDCTstat, DQSDelay, ChipSel, &tmp)) { + goto skipLocMiddle; + } + + MutualCSPassW[DQSDelay] &= tmp; + print_debug_dqs("\t\t\t\t\tTrainDQSPos: 146 \tMutualCSPassW ", MutualCSPassW[DQSDelay], 5); + + SetTargetWTIO_D(TestAddr); + FlushDQSTestPattern_D(pDCTstat, TestAddr<<8); + ResetTargetWTIO_D(); + } + + } + + if (BanksPresent) { + u8 mask_pass = 0; + for (ByteLane = 0; ByteLane < 8; ByteLane++) { + print_debug_dqs("\t\t\t\tTrainDQSPos: 31 ByteLane ",ByteLane, 4); + pDCTstat->ByteLane = ByteLane; + LastTest = DQS_FAIL; /* Analyze the results */ + RnkDlySeqPassMin = 0; + RnkDlySeqPassMax = 0; + RnkDlyFilterMax = 0; + RnkDlyFilterMin = 0; + for (DQSDelay = 0; DQSDelay < dqsDelay_end; DQSDelay++) { + if (MutualCSPassW[DQSDelay] & (1 << ByteLane)) { + print_debug_dqs("\t\t\t\t\tTrainDQSPos: 321 DQSDelay ", DQSDelay, 5); + print_debug_dqs("\t\t\t\t\tTrainDQSPos: 322 MutualCSPassW ", MutualCSPassW[DQSDelay], 5); + + RnkDlySeqPassMax = DQSDelay; + if (LastTest == DQS_FAIL) { + RnkDlySeqPassMin = DQSDelay; //start sequential run + } + if ((RnkDlySeqPassMax - RnkDlySeqPassMin)>(RnkDlyFilterMax-RnkDlyFilterMin)){ + RnkDlyFilterMin = RnkDlySeqPassMin; + RnkDlyFilterMax = RnkDlySeqPassMax; + } + LastTest = DQS_PASS; + } else { + LastTest = DQS_FAIL; + } + } + print_debug_dqs("\t\t\t\tTrainDQSPos: 33 RnkDlySeqPassMax ", RnkDlySeqPassMax, 4); + if (RnkDlySeqPassMax == 0) { + Errors |= 1<<SB_NODQSPOS; /* no passing window */ + } else { + print_debug_dqs_pair("\t\t\t\tTrainDQSPos: 34 RnkDlyFilter: ", RnkDlyFilterMin, " ", RnkDlyFilterMax, 4); + if (((RnkDlyFilterMax - RnkDlyFilterMin) < MIN_DQS_WNDW)){ + Errors |= 1 << SB_SMALLDQS; + } else { + u8 middle_dqs; + /* mctEngDQSwindow_Save_D Not required for arrays */ + middle_dqs = MiddleDQS_D(RnkDlyFilterMin, RnkDlyFilterMax); + pDCTstat->DQSDelay = middle_dqs; + mct_SetDQSDelayCSR_D(pMCTstat, pDCTstat, cs_start); /* load the register with the value */ + StoreDQSDatStrucVal_D(pMCTstat, pDCTstat, cs_start); /* store the value into the data structure */ + print_debug_dqs("\t\t\t\tTrainDQSPos: 42 middle_dqs : ",middle_dqs, 4); + } + } + } + print_debug_dqs("\t\t\t\tTrainDQSPos: 41 mask_pass ",mask_pass, 3); + } +skipLocMiddle: + pDCTstat->TrainErrors = Errors; + + print_debug_dqs("\t\t\tTrainDQSPos: Errors ", Errors, 3); + +} + + +static void StoreDQSDatStrucVal_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 ChipSel) +{ + /* Store the DQSDelay value, found during a training sweep, into the DCT + * status structure for this node + */ + + + /* When 400, 533, 667, it will support dimm0/1/2/3, + * and set conf for dimm0, hw will copy to dimm1/2/3 + * set for dimm1, hw will copy to dimm3 + * Rev A/B only support DIMM0/1 when 800Mhz and above + 0x100 to next dimm + * Rev C support DIMM0/1/2/3 when 800Mhz and above + 0x100 to next dimm + */ + + /* FindDQSDatDimmVal_D is not required since we use an array */ + u8 dn = 0; + + if (pDCTstat->Status & (1 << SB_Over400MHz)) + dn = ChipSel>>1; /* if odd or even logical DIMM */ + + pDCTstat->CH_D_DIR_B_DQS[pDCTstat->Channel][dn][pDCTstat->Direction][pDCTstat->ByteLane] = + pDCTstat->DQSDelay; +} + + +static void GetDQSDatStrucVal_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 ChipSel) +{ + u8 dn = 0; + + + /* When 400, 533, 667, it will support dimm0/1/2/3, + * and set conf for dimm0, hw will copy to dimm1/2/3 + * set for dimm1, hw will copy to dimm3 + * Rev A/B only support DIMM0/1 when 800Mhz and above + 0x100 to next dimm + * Rev C support DIMM0/1/2/3 when 800Mhz and above + 0x100 to next dimm + */ + + /* FindDQSDatDimmVal_D is not required since we use an array */ + if (pDCTstat->Status & (1<<SB_Over400MHz)) + dn = ChipSel >> 1; /*if odd or even logical DIMM */ + + pDCTstat->DQSDelay = + pDCTstat->CH_D_DIR_B_DQS[pDCTstat->Channel][dn][pDCTstat->Direction][pDCTstat->ByteLane]; +} + + +/* FindDQSDatDimmVal_D is not required since we use an array */ + + +static u8 MiddleDQS_D(u8 min, u8 max) +{ + u8 size; + size = max-min; + if (size % 2) + size++; // round up if the size isn't even. + return ( min + (size >> 1)); +} + + +static void TrainReadDQS_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, + u8 cs_start) +{ + print_debug_dqs("\t\tTrainReadPos ", 0, 2); + pDCTstat->Direction = DQS_READDIR; + TrainDQSPos_D(pMCTstat, pDCTstat, cs_start); +} + + +static void TrainWriteDQS_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, + u8 cs_start) +{ + pDCTstat->Direction = DQS_WRITEDIR; + print_debug_dqs("\t\tTrainWritePos", 0, 2); + TrainDQSPos_D(pMCTstat, pDCTstat, cs_start); +} + + +static void proc_IOCLFLUSH_D(u32 addr_hi) +{ + SetTargetWTIO_D(addr_hi); + proc_CLFLUSH(addr_hi); + ResetTargetWTIO_D(); +} + + +static u8 ChipSelPresent_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, + u8 Channel, u8 ChipSel) +{ + u32 val; + u32 reg; + u32 dev = pDCTstat->dev_dct; + u32 reg_off; + u8 ret = 0; + + if (!pDCTstat->GangedMode) { + reg_off = 0x100 * Channel; + } else { + reg_off = 0; + } + + if (ChipSel < MAX_CS_SUPPORTED){ + reg = 0x40 + (ChipSel << 2) + reg_off; + val = Get_NB32(dev, reg); + if (val & ( 1 << 0)) + ret = 1; + } + + return ret; +} + + +/* proc_CLFLUSH_D located in mct_gcc.h */ + + +static void WriteDQSTestPattern_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, + u32 TestAddr_lo) +{ + /* Write a pattern of 72 bit times (per DQ), to test dram functionality. + * The pattern is a stress pattern which exercises both ISI and + * crosstalk. The number of cache lines to fill is dependent on DCT + * width mode and burstlength. + * Mode BL Lines Pattern no. + * ----+---+------------------- + * 64 4 9 0 + * 64 8 9 0 + * 64M 4 9 0 + * 64M 8 9 0 + * 128 4 18 1 + * 128 8 N/A - + */ + + if (pDCTstat->Pattern == 0) + WriteL9TestPattern_D(pDCTstat, TestAddr_lo); + else + WriteL18TestPattern_D(pDCTstat, TestAddr_lo); +} + + +static void WriteL18TestPattern_D(struct DCTStatStruc *pDCTstat, + u32 TestAddr_lo) +{ + u8 *buf; + + buf = (u8 *)pDCTstat->PtrPatternBufA; + WriteLNTestPattern(TestAddr_lo, buf, 18); + +} + + +static void WriteL9TestPattern_D(struct DCTStatStruc *pDCTstat, + u32 TestAddr_lo) +{ + u8 *buf; + + buf = (u8 *)pDCTstat->PtrPatternBufA; + WriteLNTestPattern(TestAddr_lo, buf, 9); +} + + + +static u8 CompareDQSTestPattern_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u32 addr_lo) +{ + /* Compare a pattern of 72 bit times (per DQ), to test dram functionality. + * The pattern is a stress pattern which exercises both ISI and + * crosstalk. The number of cache lines to fill is dependent on DCT + * width mode and burstlength. + * Mode BL Lines Pattern no. + * ----+---+------------------- + * 64 4 9 0 + * 64 8 9 0 + * 64M 4 9 0 + * 64M 8 9 0 + * 128 4 18 1 + * 128 8 N/A - + */ + + u32 *test_buf; + u8 bitmap; + u8 bytelane; + u8 i; + u32 value; + u8 j; + u32 value_test; + u8 pattern, channel; + + pattern = pDCTstat->Pattern; + channel = pDCTstat->Channel; + test_buf = (u32 *)pDCTstat->PtrPatternBufA; + + if (pattern && channel) { + addr_lo += 8; //second channel + test_buf+= 2; + } + + bytelane = 0; + bitmap = 0xFF; + for (i=0; i < (9 * 64 / 4); i++) { /* /4 due to next loop */ + value = read32_fs(addr_lo); + value_test = *test_buf; + + print_debug_dqs_pair("\t\t\t\t\t\ttest_buf = ", (u32)test_buf, " value = ", value_test, 7); + print_debug_dqs_pair("\t\t\t\t\t\ttaddr_lo = ", addr_lo, " value = ", value, 7); + + for (j = 0; j < (4 * 8); j += 8) { + if (((value >> j) & 0xff) != ((value_test >> j) & 0xff)) { + bitmap &= ~(1 << bytelane); + } + + bytelane++; + bytelane &= 0x7; + } + + print_debug_dqs("\t\t\t\t\t\tbitmap = ", bitmap, 7); + + if (!bitmap) + break; + + if (bytelane == 0){ + if (pattern == 1) { //dual channel + addr_lo += 8; //skip over other channel's data + test_buf += 2; + } + } + addr_lo += 4; + test_buf += 1; + } + + return bitmap; +} + + +static void FlushDQSTestPattern_D(struct DCTStatStruc *pDCTstat, + u32 addr_lo) +{ + /* Flush functions in mct_gcc.h */ + if (pDCTstat->Pattern == 0){ + FlushDQSTestPattern_L9(addr_lo); + } else { + FlushDQSTestPattern_L18(addr_lo); + } +} + +static void SetTargetWTIO_D(u32 TestAddr) +{ + u32 lo, hi; + hi = TestAddr >> 24; + lo = TestAddr << 8; + _WRMSR(0xC0010016, lo, hi); /* IORR0 Base */ + hi = 0xFF; + lo = 0xFC000800; /* 64MB Mask */ + _WRMSR(0xC0010017, lo, hi); /* IORR0 Mask */ +} + + +static void ResetTargetWTIO_D(void) +{ + u32 lo, hi; + + hi = 0; + lo = 0; + _WRMSR(0xc0010017, lo, hi); // IORR0 Mask +} + + +static void ReadDQSTestPattern_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, + u32 TestAddr_lo) +{ + /* Read a pattern of 72 bit times (per DQ), to test dram functionality. + * The pattern is a stress pattern which exercises both ISI and + * crosstalk. The number of cache lines to fill is dependent on DCT + * width mode and burstlength. + * Mode BL Lines Pattern no. + * ----+---+------------------- + * 64 4 9 0 + * 64 8 9 0 + * 64M 4 9 0 + * 64M 8 9 0 + * 128 4 18 1 + * 128 8 N/A - + */ + if (pDCTstat->Pattern == 0) + ReadL9TestPattern(TestAddr_lo); + else + ReadL18TestPattern(TestAddr_lo); + _MFENCE; +} + + +u32 SetUpperFSbase(u32 addr_hi) +{ + /* Set the upper 32-bits of the Base address, 4GB aligned) for the + * FS selector. + */ + + u32 lo, hi; + u32 addr; + lo = 0; + hi = addr_hi>>24; + addr = FS_Base; + _WRMSR(addr, lo, hi); + return addr_hi<<8; +} + + +void ResetDCTWrPtr_D(u32 dev, u32 index_reg, u32 index) +{ + u32 val; + + val = Get_NB32_index_wait(dev, index_reg, index); + Set_NB32_index_wait(dev, index_reg, index, val); +} + + +/* mctEngDQSwindow_Save_D not required with arrays */ + + +void mct_TrainDQSPos_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstatA) +{ + u8 Node; + u8 ChipSel; + struct DCTStatStruc *pDCTstat; + + for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) { + pDCTstat = pDCTstatA + Node; + if (pDCTstat->DCTSysLimit) { + /* when DCT speed >= 400MHz, we only support 2 DIMMs + * and we have two sets registers for DIMM0 and DIMM1 so + * here we must traning DQSRd/WrPos for DIMM0 and DIMM1 + */ + if (pDCTstat->Speed >= 4) { + pDCTstat->Status |= (1 << SB_Over400MHz); + } + for (ChipSel = 0; ChipSel < MAX_CS_SUPPORTED; ChipSel += 2) { + TrainDQSRdWrPos_D(pMCTstat, pDCTstat, ChipSel); + SetEccDQSRdWrPos_D(pMCTstat, pDCTstat, ChipSel); + } + } + } +} + + +/* mct_BeforeTrainDQSRdWrPos_D + * Function is inline. + */ + +u8 mct_DisableDimmEccEn_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat) +{ + u8 _DisableDramECC = 0; + u32 val; + u32 reg; + u32 dev; + + /*Disable ECC correction of reads on the dram bus. */ + + dev = pDCTstat->dev_dct; + reg = 0x90; + val = Get_NB32(dev, reg); + if (val & (1<<DimmEcEn)) { + _DisableDramECC |= 0x01; + val &= ~(1<<DimmEcEn); + Set_NB32(dev, reg, val); + } + if (!pDCTstat->GangedMode) { + reg = 0x190; + val = Get_NB32(dev, reg); + if (val & (1<<DimmEcEn)) { + _DisableDramECC |= 0x02; + val &= ~(1<<DimmEcEn); + Set_NB32(dev, reg, val); + } + } + return _DisableDramECC; +} + + + +void mct_EnableDimmEccEn_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 _DisableDramECC) +{ + + u32 val; + u32 reg; + u32 dev; + + /* Enable ECC correction if it was previously disabled */ + + dev = pDCTstat->dev_dct; + + if ((_DisableDramECC & 0x01) == 0x01) { + reg = 0x90; + val = Get_NB32(dev, reg); + val |= (1<<DimmEcEn); + Set_NB32(dev, reg, val); + } + if ((_DisableDramECC & 0x02) == 0x02) { + reg = 0x190; + val = Get_NB32(dev, reg); + val |= (1<<DimmEcEn); + Set_NB32(dev, reg, val); + } +} + + +static void mct_SetDQSDelayCSR_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 ChipSel) +{ + u8 ByteLane; + u32 val; + u32 index_reg = 0x98 + 0x100 * pDCTstat->Channel; + u8 shift; + u32 dqs_delay = (u32)pDCTstat->DQSDelay; + u32 dev = pDCTstat->dev_dct; + u32 index; + + ByteLane = pDCTstat->ByteLane; + + /* Channel is offset */ + if (ByteLane < 4) { + index = 1; + } else if (ByteLane <8) { + index = 2; + } else { + index = 3; + } + + if (pDCTstat->Direction == DQS_READDIR) { + index += 4; + } + + /* get the proper register index */ + shift = ByteLane%4; + shift <<= 3; /* get bit position of bytelane, 8 bit */ + + if (pDCTstat->Status & (1 << SB_Over400MHz)) { + index += (ChipSel >> 1) * 0x100; /* if logical DIMM1/DIMM3 */ + } + + val = Get_NB32_index_wait(dev, index_reg, index); + val &= ~(0x7f << shift); + val |= (dqs_delay << shift); + Set_NB32_index_wait(dev, index_reg, index, val); +} + + +static void mct_SetDQSDelayAllCSR_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, + u8 cs_start) +{ + u8 ByteLane; + u8 ChipSel = cs_start; + + + for (ChipSel = cs_start; ChipSel < (cs_start + 2); ChipSel++) { + if ( mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, pDCTstat->Channel, ChipSel)) { + for (ByteLane = 0; ByteLane < 8; ByteLane++) { + pDCTstat->ByteLane = ByteLane; + mct_SetDQSDelayCSR_D(pMCTstat, pDCTstat, ChipSel); + } + } + } +} + + +u8 mct_RcvrRankEnabled_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, + u8 Channel, u8 ChipSel) +{ + u8 ret; + + ret = ChipSelPresent_D(pMCTstat, pDCTstat, Channel, ChipSel); + return ret; +} + + +u32 mct_GetRcvrSysAddr_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, + u8 channel, u8 receiver, u8 *valid) +{ + return mct_GetMCTSysAddr_D(pMCTstat, pDCTstat, channel, receiver, valid); +} + + +u32 mct_GetMCTSysAddr_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, + u8 Channel, u8 receiver, u8 *valid) +{ + u32 val; + u32 reg_off = 0; + u32 reg; + u32 dword; + u32 dev = pDCTstat->dev_dct; + + *valid = 0; + + + if (!pDCTstat->GangedMode) { // FIXME: not used. + reg_off = 0x100 * Channel; + } + + /* get the local base addr of the chipselect */ + reg = 0x40 + (receiver << 2); + val = Get_NB32(dev, reg); + + val &= ~0x0F; + + /* unganged mode DCT0+DCT1, sys addr of DCT1=node + * base+DctSelBaseAddr+local ca base*/ + if ((Channel) && (pDCTstat->GangedMode == 0) && ( pDCTstat->DIMMValidDCT[0] > 0)) { + reg = 0x110; + dword = Get_NB32(dev, reg); + dword &= 0xfffff800; + dword <<= 8; /* scale [47:27] of F2x110[31:11] to [39:8]*/ + val += dword; + + /* if DCTSelBaseAddr < Hole, and eax > HoleBase, then add Hole size to test address */ + if ((val >= pDCTstat->DCTHoleBase) && (pDCTstat->DCTHoleBase > dword)) { + dword = (~(pDCTstat->DCTHoleBase >> (24 - 8)) + 1) & 0xFF; + dword <<= (24 - 8); + val += dword; + } + } else { + /* sys addr=node base+local cs base */ + val += pDCTstat->DCTSysBase; + + /* New stuff */ + if (pDCTstat->DCTHoleBase && (val >= pDCTstat->DCTHoleBase)) { + val -= pDCTstat->DCTSysBase; + dword = Get_NB32(pDCTstat->dev_map, 0xF0); /* get Hole Offset */ + val += (dword & 0x0000ff00) << (24-8-8); + } + } + + /* New stuff */ + val += ((1 << 21) >> 8); /* Add 2MB offset to avoid compat area */ + if (val >= MCT_TRNG_KEEPOUT_START) { + while(val < MCT_TRNG_KEEPOUT_END) + val += (1 << (15-8)); /* add 32K */ + } + + /* HW remap disabled? */ + if (!(pDCTstat->Status & (1 << SB_HWHole))) { + if (!(pDCTstat->Status & (1 << SB_SWNodeHole))) { + /* SW memhole disabled */ + u32 lo, hi; + _RDMSR(TOP_MEM, &lo, &hi); + lo >>= 8; + if ((val >= lo) && (val < _4GB_RJ8)) { + val = 0; + *valid = 0; + goto exitGetAddr; + } else { + *valid = 1; + goto exitGetAddrWNoError; + } + } else { + *valid = 1; + goto exitGetAddrWNoError; + } + } else { + *valid = 1; + goto exitGetAddrWNoError; + } + +exitGetAddrWNoError: + + /* Skip if Address is in UMA region */ + dword = pMCTstat->Sub4GCacheTop; + dword >>= 8; + if (dword != 0) { + if ((val >= dword) && (val < _4GB_RJ8)) { + val = 0; + *valid = 0; + } else { + *valid = 1; + } + } + +exitGetAddr: + return val; +} + + +void mct_Write1LTestPattern_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, + u32 TestAddr, u8 pattern) +{ + + u8 *buf; + + /* Issue the stream of writes. When F2x11C[MctWrLimit] is reached + * (or when F2x11C[FlushWr] is set again), all the writes are written + * to DRAM. + */ + + SetUpperFSbase(TestAddr); + + if (pattern) + buf = (u8 *)pDCTstat->PtrPatternBufB; + else + buf = (u8 *)pDCTstat->PtrPatternBufA; + + WriteLNTestPattern(TestAddr << 8, buf, 1); +} + + +void mct_Read1LTestPattern_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u32 addr) +{ + u32 value; + + /* BIOS issues the remaining (Ntrain - 2) reads after checking that + * F2x11C[PrefDramTrainMode] is cleared. These reads must be to + * consecutive cache lines (i.e., 64 bytes apart) and must not cross + * a naturally aligned 4KB boundary. These reads hit the prefetches and + * read the data from the prefetch buffer. + */ + + /* get data from DIMM */ + SetUpperFSbase(addr); + + /* 1st move causes read fill (to exclusive or shared)*/ + value = read32_fs(addr<<8); +} diff --git a/src/northbridge/amd/amdmct/mct/mctecc_d.c b/src/northbridge/amd/amdmct/mct/mctecc_d.c new file mode 100644 index 0000000000..b48c1f5417 --- /dev/null +++ b/src/northbridge/amd/amdmct/mct/mctecc_d.c @@ -0,0 +1,296 @@ +/* + * This file is part of the LinuxBIOS project. + * + * Copyright (C) 2007 Advanced Micro Devices, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + + +#include "mct_d.h" + +static void setSyncOnUnEccEn_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstatA); +static u32 GetScrubAddr_D(u32 Node); +static u8 isDramECCEn_D(struct DCTStatStruc *pDCTstat); + + +/* Initialize ECC modes of Integrated Dram+Memory Controllers of a network of + * Hammer processors. Use Dram background scrubber to fast initialize ECC bits + * of all dram. + * + * Notes: + * + * Order that items are set: + * 1. eccen bit in NB + * 2. Scrub Base + * 3. Temp Node Base + * 4. Temp Node Limit + * 5. Redir bit in NB + * 6. Scrub CTL + * + * Conditions for setting background scrubber. + * 1. node is present + * 2. node has dram functioning (WE=RE=1) + * 3. all eccdimms (or bit 17 of offset 90,fn 2) + * 4. no chip-select gap exists + * + * The dram background scrubber is used under very controlled circumstances to + * initialize all the ECC bits on the DIMMs of the entire dram address map + * (including hidden or lost dram and dram above 4GB). We will turn the scrub + * rate up to maximum, which should clear 4GB of dram in about 2.7 seconds. + * We will activate the scrubbers of all nodes with ecc dram and let them run in + * parallel, thereby reducing even further the time required to condition dram. + * Finally, we will go through each node and either disable background scrubber, + * or set the scrub rate to the user setup specified rate. + * + * To allow the NB to scrub, we need to wait a time period long enough to + * guarantee that the NB scrubs the entire dram on its node. Do do this, we + * simply sample the scrub ADDR once, for an initial value, then we sample and poll until the polled value of scrub ADDR + * has wrapped around at least once: Scrub ADDRi+1 < Scrub ADDRi. Since we let all + * Nodes run in parallel, we need to gaurantee that all nodes have wrapped. To do + * this efficiently, we need only to sample one of the nodes, the node with the + * largest ammount of dram populated is the one which will take the longest amount + * of time (the scrub rate is set to max, the same rate, on all nodes). So, + * during setup of scrub Base, we determine how much memory and which node has + * the largest memory installed. + * + * Scrubbing should not ordinarily be enabled on a Node with a chip-select gap + * (aka SW memhole, cs hoisting, etc..).To init ECC memory on this node, the + * scrubber is used in two steps. First, the Dram Limit for the node is adjusted + * down to the bottom of the gap, and that ECC dram is initialized. Second, the + * orignal Limit is restored, the Scrub base is set to 4GB, and scrubber is + * allowed to run until the Scrub Addr wraps around to zero. + */ +u8 ECCInit_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA) +{ + u8 Node; + u8 AllECC; + u16 OB_NBECC; + u32 curBase; + u16 OB_ECCRedir; + u32 LDramECC; + u32 OF_ScrubCTL; + u16 OB_ChipKill; + u8 MemClrECC; + + u32 dev; + u32 reg; + u32 val; + + mctHookBeforeECC(); + + /* Construct these booleans, based on setup options, for easy handling + later in this procedure */ + OB_NBECC = mctGet_NVbits(NV_NBECC); /* MCA ECC (MCE) enable bit */ + + OB_ECCRedir = mctGet_NVbits(NV_ECCRedir); /* ECC Redirection */ + + OB_ChipKill = mctGet_NVbits(NV_ChipKill); /* ECC Chip-kill mode */ + + OF_ScrubCTL = 0; /* Scrub CTL for Dcache, L2, and dram */ + val = mctGet_NVbits(NV_DCBKScrub); + mct_AdjustScrub_D(pDCTstatA, val); + OF_ScrubCTL |= val << 16; + val = mctGet_NVbits(NV_L2BKScrub); + OF_ScrubCTL |= val << 8; + + val = mctGet_NVbits(NV_DramBKScrub); + OF_ScrubCTL |= val; + + AllECC = 1; + MemClrECC = 0; + print_t(" ECCInit 0 \n"); + for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) { + struct DCTStatStruc *pDCTstat; + pDCTstat = pDCTstatA + Node; + LDramECC = 0; + if (NodePresent_D(Node)) { /*If Node is present */ + dev = pDCTstat->dev_map; + reg = 0x40+(Node << 3); /* Dram Base Node 0 + index */ + val = Get_NB32(dev, reg); + + /* WE/RE is checked */ + if((val & 3)==3) { /* Node has dram populated */ + /* Negate 'all nodes/dimms ECC' flag if non ecc + memory populated */ + if( pDCTstat->Status & (1<<SB_ECCDIMMs)) { + LDramECC = isDramECCEn_D(pDCTstat); + if(pDCTstat->ErrCode != SC_RunningOK) { + pDCTstat->Status &= ~(1 << SB_ECCDIMMs); + if (OB_NBECC) { + pDCTstat->ErrStatus |= (1 << SB_DramECCDis); + } + AllECC = 0; + LDramECC =0; + } + } else { + AllECC = 0; + } + if(LDramECC) { /* if ECC is enabled on this dram */ + if (OB_NBECC) { + mct_EnableDatIntlv_D(pMCTstat, pDCTstat); + dev = pDCTstat->dev_nbmisc; + reg =0x44; /* MCA NB Configuration */ + val = Get_NB32(dev, reg); + val |= 1 << 22; /* EccEn */ + Set_NB32(dev, reg, val); + DCTMemClr_Init_D(pMCTstat, pDCTstat); + MemClrECC = 1; + print_tx(" ECC enabled on node: ", Node); + } + } /* this node has ECC enabled dram */ + } else { + LDramECC = 0; + } /* Node has Dram */ + + if (MemClrECC) { + MCTMemClrSync_D(pMCTstat, pDCTstatA); + } + } /* if Node present */ + } + print_t(" ECCInit 1 \n"); + + if(AllECC) + pMCTstat->GStatus |= 1<<GSB_ECCDIMMs; + else + pMCTstat->GStatus &= ~(1<<GSB_ECCDIMMs); + + print_t(" ECCInit 2 \n"); + + /* Program the Dram BKScrub CTL to the proper (user selected) value.*/ + /* Reset MC4_STS. */ + for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) { + struct DCTStatStruc *pDCTstat; + pDCTstat = pDCTstatA + Node; + LDramECC = 0; + if (NodePresent_D(Node)) { /* If Node is present */ + reg = 0x40+(Node<<3); /* Dram Base Node 0 + index */ + val = Get_NB32(pDCTstat->dev_map, reg); + curBase = val & 0xffff0000; + /*WE/RE is checked because memory config may have been */ + if((val & 3)==3) { /* Node has dram populated */ + if (isDramECCEn_D(pDCTstat)) { /* if ECC is enabled on this dram */ + dev = pDCTstat->dev_nbmisc; + val = curBase << 8; + if(OB_ECCRedir) { + val |= (1<<0); /* enable redirection */ + } + Set_NB32(dev, 0x5C, val); /* Dram Scrub Addr Low */ + val = curBase>>24; + Set_NB32(dev, 0x60, val); /* Dram Scrub Addr High */ + Set_NB32(dev, 0x58, OF_ScrubCTL); /*Scrub Control */ /*set dram background scrubbing to setup value */ + } /* this node has ECC enabled dram */ + } /*Node has Dram */ + } /*if Node present */ + } + print_t(" ECCInit 3 \n"); + + if(mctGet_NVbits(NV_SyncOnUnEccEn)) + setSyncOnUnEccEn_D(pMCTstat, pDCTstatA); + + mctHookAfterECC(); + return MemClrECC; +} + + +static void setSyncOnUnEccEn_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstatA) +{ + u32 Node; + u32 reg; + u32 dev; + u32 val; + + for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) { + struct DCTStatStruc *pDCTstat; + pDCTstat = pDCTstatA + Node; + if (NodePresent_D(Node)) { /* If Node is present*/ + reg = 0x40+(Node<<3); /* Dram Base Node 0 + index*/ + val = Get_NB32(pDCTstat->dev_map, reg); + /*WE/RE is checked because memory config may have been*/ + if((val & 3)==3) { /* Node has dram populated*/ + if( isDramECCEn_D(pDCTstat)) { + /*if ECC is enabled on this dram*/ + dev = pDCTstat->dev_nbmisc; + reg = 0x44; /* MCA NB Configuration*/ + val = Get_NB32(dev, reg); + val |= (1<<SyncOnUcEccEn); + Set_NB32(dev, reg, val); + } + } /* Node has Dram*/ + } /* if Node present*/ + } +} + + +static u32 GetScrubAddr_D(u32 Node) +{ + /* Get the current 40-bit Scrub ADDR address, scaled to 32-bits, + * of the specified Node. + */ + + u32 reg; + u32 regx; + u32 lo, hi; + u32 val; + u32 dev = PA_NBMISC(Node); + + + reg = 0x60; /* Scrub Addr High */ + hi = Get_NB32(dev, reg); + + regx = 0x5C; /* Scrub Addr Low */ + lo = Get_NB32(dev, regx); + /* Scrub Addr High again, detect 32-bit wrap */ + val = Get_NB32(dev, reg); + if(val != hi) { + hi = val; /* Scrub Addr Low again, if wrap occured */ + lo = Get_NB32(dev, regx); + } + + val = hi << 24; + val |= lo >> 8; + + return val; /* ScrubAddr[39:8] */ +} + + +static u8 isDramECCEn_D(struct DCTStatStruc *pDCTstat) +{ + u32 reg; + u32 val; + u8 i; + u32 dev = pDCTstat->dev_dct; + u8 ch_end; + u8 isDimmECCEn = 0; + + if(pDCTstat->GangedMode) { + ch_end = 1; + } else { + ch_end = 2; + } + for(i=0; i<ch_end; i++) { + if(pDCTstat->DIMMValidDCT[i] > 0){ + reg = 0x90 + i * 0x100; /* Dram Config Low */ + val = Get_NB32(dev, reg); + if(val & (1<<DimmEcEn)) { + /* set local flag 'dram ecc capable' */ + isDimmECCEn = 1; + break; + } + } + } + return isDimmECCEn; +} diff --git a/src/northbridge/amd/amdmct/mct/mctgr.c b/src/northbridge/amd/amdmct/mct/mctgr.c new file mode 100644 index 0000000000..ecf5847db3 --- /dev/null +++ b/src/northbridge/amd/amdmct/mct/mctgr.c @@ -0,0 +1,88 @@ +/* + * This file is part of the LinuxBIOS project. + * + * Copyright (C) 2007 Advanced Micro Devices, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + + +static const u8 Tab_GRCLKDis[] = { 8,0,8,8,0,0,8,0 }; + + +u32 mct_AdjustMemClkDis_GR(struct DCTStatStruc *pDCTstat, u32 dct, + u32 DramTimingLo) +{ + /* Greayhound format -> Griffin format */ + u32 NewDramTimingLo; + u32 dev = pDCTstat->dev_dct; + u32 reg; + u32 reg_off = 0x100 * dct; + u32 val; + int i; + + DramTimingLo = val; + /* Dram Timing Low (owns Clock Enable bits) */ + NewDramTimingLo = Get_NB32(dev, 0x88 + reg_off); + if(mctGet_NVbits(NV_AllMemClks)==0) { + /*Special Jedec SPD diagnostic bit - "enable all clocks"*/ + if(!(pDCTstat->Status & (1<<SB_DiagClks))) { + for(i=0; i<MAX_DIMMS_SUPPORTED; i++) { + val = Tab_GRCLKDis[i]; + if(val<8) { + if(!(pDCTstat->DIMMValidDCT[dct] & (1<<val))) { + /* disable memclk */ + NewDramTimingLo |= (1<<(i+1)); + } + } + } + } + } + DramTimingLo &= ~(0xff<<24); + DramTimingLo |= NewDramTimingLo & (0xff<<24); + DramTimingLo &= (0x4d<<24); /* FIXME - enable all MemClks for now */ + + return DramTimingLo; +} + + +u32 mct_AdjustDramConfigLo_GR(struct DCTStatStruc *pDCTstat, u32 dct, u32 val) +{ + /* Greayhound format -> Griffin format */ + /*FIXME - BurstLength32 must be 0 when F3x44[DramEccEn]=1. */ +/* + ; mov cx,PA_NBMISC+44h ;MCA NB Configuration + ; call Get_NB32n_D + ; bt eax,22 ;EccEn + ; .if(CARRY?) + ; btr eax,BurstLength32 + ; .endif +*/ + return val; +} + + +void mct_AdjustMemHoist_GR(struct DCTStatStruc *pDCTstat, u32 base, u32 HoleSize) +{ + u32 val; + if(base >= pDCTstat->DCTHoleBase) { + u32 dev = pDCTstat->dev_dct; + base += HoleSize; + base >>= 27 - 8; + val = Get_NB32(dev, 0x110); + val &= ~(0xfff<<11); + val |= (base & 0xfff)<<11; + Set_NB32(dev, 0x110, val); + } +} diff --git a/src/northbridge/amd/amdmct/mct/mcthdi.c b/src/northbridge/amd/amdmct/mct/mcthdi.c new file mode 100644 index 0000000000..ee347502bf --- /dev/null +++ b/src/northbridge/amd/amdmct/mct/mcthdi.c @@ -0,0 +1,33 @@ +/* + * This file is part of the LinuxBIOS project. + * + * Copyright (C) 2007 Advanced Micro Devices, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + + +void mct_DramInit_Hw_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct) +{ + u32 val; + u32 reg; + u32 dev = pDCTstat->dev_dct; + + /*flag for selecting HW/SW DRAM Init HW DRAM Init */ + reg = 0x90 + 0x100 * dct; /*DRAM Configuration Low */ + val = Get_NB32(dev, reg); + val |= (1<<InitDram); + Set_NB32(dev, reg, val); +} diff --git a/src/northbridge/amd/amdmct/mct/mctmtr_d.c b/src/northbridge/amd/amdmct/mct/mctmtr_d.c new file mode 100644 index 0000000000..d7cb3649e5 --- /dev/null +++ b/src/northbridge/amd/amdmct/mct/mctmtr_d.c @@ -0,0 +1,213 @@ +/* + * This file is part of the LinuxBIOS project. + * + * Copyright (C) 2007 Advanced Micro Devices, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + + +#include "mct_d.h" + +static void SetMTRRrangeWB_D(u32 Base, u32 *pLimit, u32 *pMtrrAddr); +static void SetMTRRrange_D(u32 Base, u32 *pLimit, u32 *pMtrrAddr, u16 MtrrType); + +void CPUMemTyping_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstatA) +{ + /* BSP only. Set the fixed MTRRs for common legacy ranges. + * Set TOP_MEM and TOM2. + * Set some variable MTRRs with WB Uncacheable type. + */ + + u32 Bottom32bIO, Bottom40bIO, Cache32bTOP; + u32 val; + u32 addr; + u32 lo, hi; + + /* Set temporary top of memory from Node structure data. + * Adjust temp top of memory down to accomodate 32-bit IO space. + * Bottom40bIO=top of memory, right justified 8 bits + * (defines dram versus IO space type) + * Bottom32bIO=sub 4GB top of memory, right justified 8 bits + * (defines dram versus IO space type) + * Cache32bTOP=sub 4GB top of WB cacheable memory, + * right justified 8 bits + */ + + val = mctGet_NVbits(NV_BottomIO); + if(val == 0) + val++; + + Bottom32bIO = val << (24-8); + + val = pMCTstat->SysLimit + 1; + if(val <= _4GB_RJ8) { + Bottom40bIO = 0; + if(Bottom32bIO >= val) + Bottom32bIO = val; + } else { + Bottom40bIO = val; + } + + val = mctGet_NVbits(NV_BottomUMA); + if(val == 0) + val++; + + val <<= (24-8); + if(val > Bottom32bIO) + val = Bottom32bIO; + + Cache32bTOP = val; + + /*====================================================================== + Set default values for CPU registers + ======================================================================*/ + + /* NOTE : For LinuxBIOS, we don't need to set mtrr enables here because + they are still enable from cache_as_ram.inc */ + + addr = 0x250; + lo = 0x1E1E1E1E; + hi = lo; + _WRMSR(addr, lo, hi); /* 0 - 512K = WB Mem */ + addr = 0x258; + _WRMSR(addr, lo, hi); /* 512K - 640K = WB Mem */ + + /*====================================================================== + Set variable MTRR values + ======================================================================*/ + /* NOTE: for LinuxBIOS change from 0x200 to 0x204: LinuxBIOS is using + 0x200, 0x201 for [1M, CONFIG_TOP_MEM) + 0x202, 0x203 for ROM Caching + */ + addr = 0x204; /* MTRR phys base 2*/ + /* use TOP_MEM as limit*/ + /* Limit=TOP_MEM|TOM2*/ + /* Base=0*/ + print_tx("\t CPUMemTyping: Cache32bTOP:", Cache32bTOP); + SetMTRRrangeWB_D(0, &Cache32bTOP, &addr); + /* Base */ + /* Limit */ + /* MtrrAddr */ + if(addr == -1) /* ran out of MTRRs?*/ + pMCTstat->GStatus |= 1<<GSB_MTRRshort; + + pMCTstat->Sub4GCacheTop = Cache32bTOP<<8; + + /*====================================================================== + Set TOP_MEM and TOM2 CPU registers + ======================================================================*/ + addr = TOP_MEM; + lo = Bottom32bIO<<8; + hi = Bottom32bIO>>24; + _WRMSR(addr, lo, hi); + print_tx("\t CPUMemTyping: Bottom32bIO:", Bottom32bIO); + print_tx("\t CPUMemTyping: Bottom40bIO:", Bottom40bIO); + if(Bottom40bIO) { + hi = Bottom40bIO >> 24; + lo = Bottom40bIO << 8; + addr += 3; /* TOM2 */ + _WRMSR(addr, lo, hi); + } + addr = 0xC0010010; /* SYS_CFG */ + _RDMSR(addr, &lo, &hi); + if(Bottom40bIO) { + lo |= (1<<21); /* MtrrTom2En=1 */ + lo |= (1<<22); /* Tom2ForceMemTypeWB */ + } else { + lo &= ~(1<<21); /* MtrrTom2En=0 */ + lo &= ~(1<<22); /* Tom2ForceMemTypeWB */ + } + _WRMSR(addr, lo, hi); +} + + +static void SetMTRRrangeWB_D(u32 Base, u32 *pLimit, u32 *pMtrrAddr) +{ + /*set WB type*/ + SetMTRRrange_D(Base, pLimit, pMtrrAddr, 6); +} + + +static void SetMTRRrange_D(u32 Base, u32 *pLimit, u32 *pMtrrAddr, u16 MtrrType) +{ + /* Program MTRRs to describe given range as given cache type. + * Use MTRR pairs starting with the given MTRRphys Base address, + * and use as many as is required up to (excluding) MSR 020C, which + * is reserved for OS. + * + * "Limit" in the context of this procedure is not the numerically + * correct limit, but rather the Last address+1, for purposes of coding + * efficiency and readability. Size of a region is then Limit-Base. + * + * 1. Size of each range must be a power of two + * 2. Each range must be naturally aligned (Base is same as size) + * + * There are two code paths: the ascending path and descending path + * (analogous to bsf and bsr), where the next limit is a funtion of the + * next set bit in a forward or backward sequence of bits (as a function + * of the Limit). We start with the ascending path, to ensure that + * regions are naturally aligned, then we switch to the descending path + * to maximize MTRR usage efficiency. Base=0 is a special case where we + * start with the descending path. Correct Mask for region is + * 2comp(Size-1)-1, which is 2comp(Limit-Base-1)-1 + */ + + u32 curBase, curLimit, curSize; + u32 val, valx; + u32 addr; + + val = curBase = Base; + curLimit = *pLimit; + addr = *pMtrrAddr; + while((addr >= 0x200) && (addr < 0x20C) && (val < *pLimit)) { + /* start with "ascending" code path */ + /* alignment (largest block size)*/ + valx = 1 << bsf(curBase); + curSize = valx; + + /* largest legal limit, given current non-zero range Base*/ + valx += curBase; + if((curBase == 0) || (*pLimit < valx)) { + /* flop direction to "descending" code path*/ + valx = 1<<bsr(*pLimit - curBase); + curSize = valx; + valx += curBase; + } + curLimit = valx; /*eax=curBase, edx=curLimit*/ + valx = val>>24; + val <<= 8; + + /* now program the MTRR */ + val |= MtrrType; /* set cache type (UC or WB)*/ + _WRMSR(addr, val, valx); /* prog. MTRR with current region Base*/ + val = ((~(curSize - 1))+1) - 1; /* Size-1*/ /*Mask=2comp(Size-1)-1*/ + valx = (val >> 24) | (0xff00); /* GH have 48 bits addr */ + val <<= 8; + val |= ( 1 << 11); /* set MTRR valid*/ + addr++; + _WRMSR(addr, val, valx); /* prog. MTRR with current region Mask*/ + val = curLimit; + curBase = val; /* next Base = current Limit (loop exit)*/ + addr++; /* next MTRR pair addr */ + } + if(val < *pLimit) { + *pLimit = val; + addr = -1; + } + *pMtrrAddr = addr; +} + + diff --git a/src/northbridge/amd/amdmct/mct/mctndi_d.c b/src/northbridge/amd/amdmct/mct/mctndi_d.c new file mode 100644 index 0000000000..8e96a567e5 --- /dev/null +++ b/src/northbridge/amd/amdmct/mct/mctndi_d.c @@ -0,0 +1,237 @@ +/* + * This file is part of the LinuxBIOS project. + * + * Copyright (C) 2007 Advanced Micro Devices, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + + + +void InterleaveNodes_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstatA) +{ + + /* Applies Node memory interleaving if enabled and if all criteria are met. */ + u8 Node; + u32 Base; + u32 MemSize, MemSize0 = 0; + u32 Dct0MemSize = 0, DctSelBase, DctSelBaseOffset; + u8 Nodes; + u8 NodesWmem; + u8 DoIntlv; + u8 _NdIntCap; + u8 _SWHole; + u8 HWHoleSz; + u32 DramHoleAddrReg; + u32 HoleBase; + u32 dev0; + u32 reg0; + u32 val; + u8 i; + struct DCTStatStruc *pDCTstat; + + DoIntlv = mctGet_NVbits(NV_NodeIntlv); + + _NdIntCap = 0; + HWHoleSz = 0; /*For HW remapping, NOT Node hoisting. */ + + pDCTstat = pDCTstatA + 0; + dev0 = pDCTstat->dev_host; + Nodes = ((Get_NB32(dev0, 0x60) >> 4) & 0x7) + 1; + + + dev0 = pDCTstat->dev_map; + reg0 = 0x40; + + NodesWmem = 0; + Node = 0; + + while (DoIntlv && (Node < Nodes)) { + pDCTstat = pDCTstatA + Node; + if (pMCTstat->GStatus & (1 << GSB_SpIntRemapHole)) { + pMCTstat->GStatus |= 1 << GSB_HWHole; + _SWHole = 0; + } else if (pDCTstat->Status & (1 << SB_SWNodeHole)) { + _SWHole = 1; + } else { + _SWHole = 0; + } + + if(!_SWHole) { + Base = Get_NB32(dev0, reg0); + if (Base & 1) { + NodesWmem++; + Base &= 0xFFFF0000; /* Base[39:8] */ + + if (pDCTstat->Status & (1 << SB_HWHole )) { + + /* to get true amount of dram, + * subtract out memory hole if HW dram remapping */ + DramHoleAddrReg = Get_NB32(pDCTstat->dev_map, 0xF0); + HWHoleSz = DramHoleAddrReg >> 16; + HWHoleSz = (((~HWHoleSz) + 1) & 0xFF); + HWHoleSz <<= 24-8; + } + /* check to see if the amount of memory on each channel + * are the same on all nodes */ + + DctSelBase = Get_NB32(pDCTstat->dev_dct, 0x114); + if(DctSelBase) { + DctSelBase <<= 8; + if ( pDCTstat->Status & (1 << SB_HWHole)) { + if (DctSelBase >= 0x1000000) { + DctSelBase -= HWHoleSz; + } + } + DctSelBaseOffset -= Base; + if (Node == 0) { + Dct0MemSize = DctSelBase; + } else if (DctSelBase != Dct0MemSize) { + break; + } + } + + MemSize = Get_NB32(dev0, reg0 + 4); + MemSize &= 0xFFFF0000; + MemSize += 0x00010000; + MemSize -= Base; + if ( pDCTstat->Status & (1 << SB_HWHole)) { + MemSize -= HWHoleSz; + } + if (Node == 0) { + MemSize0 = MemSize; + } else if (MemSize0 != MemSize) { + break; + } + } else { + break; + } + } else { + break; + } + Node++; + reg0 += 8; + } + + if (Node == Nodes) { + /* if all nodes have memory and no Node had SW memhole */ + if (Nodes == 2 || Nodes == 4 || Nodes == 8) + _NdIntCap = 1; + } + + if (!_NdIntCap) + DoIntlv = 0; + + + if (pMCTstat->GStatus & 1 << (GSB_SpIntRemapHole)) { + HWHoleSz = pMCTstat->HoleBase; + if (HWHoleSz == 0) { + HWHoleSz = mctGet_NVbits(NV_BottomIO) & 0xFF; + HWHoleSz <<= 24-8; + } + HWHoleSz = ((~HWHoleSz) + 1) & 0x00FF0000; + } + + if (DoIntlv) { + MCTMemClr_D(pMCTstat,pDCTstatA); + /* Program Interleaving enabled on Node 0 map only.*/ + MemSize0 <<= bsf(Nodes); /* MemSize=MemSize*2 (or 4, or 8) */ + Dct0MemSize <<= bsf(Nodes); + MemSize0 += HWHoleSz; + Base = ((Nodes - 1) << 8) | 3; + reg0 = 0x40; + Node = 0; + while(Node < Nodes) { + Set_NB32(dev0, reg0, Base); + MemSize = MemSize0; + MemSize--; + MemSize &= 0xFFFF0000; + MemSize |= Node << 8; /* set IntlvSel[2:0] field */ + MemSize |= Node; /* set DstNode[2:0] field */ + Set_NB32(dev0, reg0 + 4, MemSize0); + reg0 += 8; + Node++; + } + + /* set base/limit to F1x120/124 per Node */ + Node = 0; + while(Node < Nodes) { + pDCTstat = pDCTstatA + Node; + pDCTstat->NodeSysBase = 0; + MemSize = MemSize0; + MemSize -= HWHoleSz; + MemSize--; + pDCTstat->NodeSysLimit = MemSize; + Set_NB32(pDCTstat->dev_map, 0x120, Node << 21); + MemSize = MemSize0; + MemSize--; + MemSize >>= 19; + val = Base; + val &= 0x700; + val <<= 13; + val |= MemSize; + Set_NB32(pDCTstat->dev_map, 0x124, val); + + if (pMCTstat->GStatus & (1 << GSB_HWHole)) { + HoleBase = pMCTstat->HoleBase; + if (Dct0MemSize >= HoleBase) { + val = HWHoleSz; + if( Node == 0) { + val += Dct0MemSize; + } + } else { + val = HWHoleSz + Dct0MemSize; + } + + val >>= 8; /* DramHoleOffset */ + HoleBase <<= 8; /* DramHoleBase */ + val |= HoleBase; + val |= 1 << DramMemHoistValid; + val |= 1 << DramHoleValid; + Set_NB32(pDCTstat->dev_map, 0xF0, val); + } + + + Set_NB32(pDCTstat->dev_dct, 0x114, Dct0MemSize >> 8); /* DctSelBaseOffset */ + val = Get_NB32(pDCTstat->dev_dct, 0x110); + val &= 0x7FF; + val |= Dct0MemSize >> 8; + Set_NB32(pDCTstat->dev_dct, 0x110, val); /* DctSelBaseAddr */ + print_tx("InterleaveNodes: DRAM Controller Select Low Register = ", val); + Node++; + } + + + /* Copy Node 0 into other Nodes' CSRs */ + Node = 1; + while (Node < Nodes) { + pDCTstat = pDCTstatA + Node; + + for (i = 0x40; i <= 0x80; i++) { + val = Get_NB32(dev0, i); + Set_NB32(pDCTstat->dev_map, i, val); + } + + val = Get_NB32(dev0, 0xF0); + Set_NB32(pDCTstat->dev_map, 0xF0, val); + Node++; + } + pMCTstat->GStatus = (1 << GSB_NodeIntlv); + } + print_tx("InterleaveNodes_D: Status ", pDCTstat->Status); + print_tx("InterleaveNodes_D: ErrStatus ", pDCTstat->ErrStatus); + print_tx("InterleaveNodes_D: ErrCode ", pDCTstat->ErrCode); + print_t("InterleaveNodes_D: Done\n"); +} diff --git a/src/northbridge/amd/amdmct/mct/mctpro_d.c b/src/northbridge/amd/amdmct/mct/mctpro_d.c new file mode 100644 index 0000000000..70d92c6b72 --- /dev/null +++ b/src/northbridge/amd/amdmct/mct/mctpro_d.c @@ -0,0 +1,406 @@ +/* + * This file is part of the LinuxBIOS project. + * + * Copyright (C) 2007 Advanced Micro Devices, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + + +static u32 CheckNBCOFAutoPrechg(struct DCTStatStruc *pDCTstat, u32 dct); +static u8 mct_AdjustDQSPosDelay_D(struct DCTStatStruc *pDCTstat, u8 dly); + +void EarlySampleSupport_D(void) +{ +} + + +u32 procOdtWorkaround(struct DCTStatStruc *pDCTstat, u32 dct, u32 val) +{ + u32 tmp; + tmp = pDCTstat->LogicalCPUID; + if ((tmp == AMD_DR_A0A) || (tmp == AMD_DR_A1B) || (tmp == AMD_DR_A2)) { + val &= 0x0FFFFFFF; + if(pDCTstat->MAdimms[dct] > 1) + val |= 0x10000000; + } + + return val; +} + + +u32 OtherTiming_A_D(struct DCTStatStruc *pDCTstat, u32 val) +{ + /* Bug#10695:One MEMCLK Bubble Writes Don't Do X4 X8 Switching Correctly + * Solution: BIOS should set DRAM Timing High[Twrwr] > 00b + * ( F2x[1, 0]8C[1:0] > 00b). Silicon Status: Fixed in Rev B + * FIXME: check if this is still required. + */ + u32 tmp; + tmp = pDCTstat->LogicalCPUID; + if ((tmp == AMD_DR_A0A) || (tmp == AMD_DR_A1B) || (tmp == AMD_DR_A2)) { + if(!(val & (3<<12) )) + val |= 1<<12; + } + return val; +} + + +void mct_ForceAutoPrecharge_D(struct DCTStatStruc *pDCTstat, u32 dct) +{ + u32 tmp; + u32 reg; + u32 reg_off; + u32 dev; + u32 val; + + tmp = pDCTstat->LogicalCPUID; + if ((tmp == AMD_DR_A0A) || (tmp == AMD_DR_A1B) || (tmp == AMD_DR_A2)) { + if(CheckNBCOFAutoPrechg(pDCTstat, dct)) { + dev = pDCTstat->dev_dct; + reg_off = 0x100 * dct; + reg = 0x90 + reg_off; /* Dram Configuration Lo */ + val = Get_NB32(dev, reg); + val |= 1<<ForceAutoPchg; + if(!pDCTstat->GangedMode) + val |= 1<<BurstLength32; + Set_NB32(dev, reg, val); + + reg = 0x88 + reg_off; /* cx=Dram Timing Lo */ + val = Get_NB32(dev, reg); + val |= 0x000F0000; /* Trc = 0Fh */ + Set_NB32(dev, reg, val); + } + } +} + + +void mct_EndDQSTraining_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstatA) +{ + /* Bug#13341: Prefetch is getting killed when the limit is reached in + * PrefDramTrainMode + * Solution: Explicitly clear the PrefDramTrainMode bit after training + * sequence in order to ensure resumption of normal HW prefetch + * behavior. + * NOTE -- this has been documented with a note at the end of this + * section in the BKDG (although, admittedly, the note does not really + * stand out). + * Silicon Status: Fixed in Rev B ( confirm) + * FIXME: check this. + */ + + u32 tmp; + u32 dev; + u32 reg; + u32 val; + u32 Node; + + for(Node = 0; Node < MAX_NODES_SUPPORTED; Node++) { + struct DCTStatStruc *pDCTstat; + pDCTstat = pDCTstatA + Node; + + if(!pDCTstat->NodePresent) break; + + tmp = pDCTstat->LogicalCPUID; + if ((tmp == AMD_DR_A0A) || (tmp == AMD_DR_A1B) || (tmp == AMD_DR_A2)) { + dev = pDCTstat->dev_dct; + reg = 0x11c; + val = Get_NB32(dev, reg); + val &= ~(1<<PrefDramTrainMode); + Set_NB32(dev, reg, val); + } + } +} + + + + +void mct_BeforeDQSTrain_Samp_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat) +{ + /* Bug#15115: Uncertainty In The Sync Chain Leads To Setup Violations + * In TX FIFO + * Solution: BIOS should program DRAM Control Register[RdPtrInit] = 5h, + * (F2x[1, 0]78[3:0] = 5h). + * Silicon Status: Fixed In Rev B0 + */ + + /* Bug#15880: Determine validity of reset settings for DDR PHY timing + * regi.. + * Solutiuon: At least, set WrDqs fine delay to be 0 for DDR2 training. + */ + + u32 dev; + u32 reg_off; + u32 index_reg; + u32 index; + u32 reg; + u32 val; + u32 tmp; + u32 Channel; + + + tmp = pDCTstat->LogicalCPUID; + if ((tmp == AMD_DR_A0A) || (tmp == AMD_DR_A1B) || (tmp == AMD_DR_A2)) { + + dev = pDCTstat->dev_dct; + index = 0; + + for(Channel = 0; Channel<2; Channel++) { + index_reg = 0x98 + 0x100 * Channel; + val = Get_NB32_index_wait(dev, index_reg, 0x0d004007); + val |= 0x3ff; + Set_NB32_index_wait(dev, index_reg, 0x0d0f4f07, val); + } + + for(Channel = 0; Channel<2; Channel++) { + if(pDCTstat->GangedMode && Channel) + break; + reg_off = 0x100 * Channel; + reg = 0x78 + reg_off; + val = Get_NB32(dev, reg); + val &= ~(0x07); + val |= 5; + Set_NB32(dev, reg, val); + } + + for(Channel = 0; Channel<2; Channel++) { + reg_off = 0x100 * Channel; + val = 0; + index_reg = 0x98 + reg_off; + for( index = 0x30; index < (0x45 + 1); index++) { + Set_NB32_index_wait(dev, index_reg, index, val); + } + } + + } +} + + +u32 Modify_D3CMP(struct DCTStatStruc *pDCTstat, u32 dct, u32 value) +{ + /* Errata#189: Reads To Phy Driver Calibration Register and Phy + * Predriver Calibration Register Do Not Return Bit 27. + * Solution: See #41322 for details. + * BIOS can modify bit 27 of the Phy Driver Calibration register + * as follows: + * 1. Read F2x[1, 0]9C_x09 + * 2. Read F2x[1, 0]9C_x0D004201 + * 3. Set F2x[1, 0]9C_x09[27] = F2x[1, 0]9C_x0D004201[10] + * BIOS can modify bit 27 of the Phy Predriver Calibration register + * as follows: + * 1. Read F2x[1, 0]9C_x0A + * 2. Read F2x[1, 0]9C_x0D004209 + * 3. Set F2x[1, 0]9C_x0A[27] = F2x[1, 0]9C_x0D004209[10] + * Silicon Status: Fixed planned for DR-B0 + */ + + u32 dev; + u32 index_reg; + u32 index; + u32 val; + u32 tmp; + + tmp = pDCTstat->LogicalCPUID; + if ((tmp == AMD_DR_A0A) || (tmp == AMD_DR_A1B) || (tmp == AMD_DR_A2)) { + dev = pDCTstat->dev_dct; + index_reg = 0x98 + 0x100 * dct; + index = 0x0D004201; + val = Get_NB32_index_wait(dev, index_reg, index); + value &= ~(1<<27); + value |= ((val>>10) & 1) << 27; + } + return value; +} + + +void SyncSetting(struct DCTStatStruc *pDCTstat) +{ + /* Errata#198: AddrCmdSetup, CsOdtSetup, and CkeSetup Require Identical + * Programming For Both Channels in Ganged Mode + * Solution: The BIOS must program the following DRAM timing parameters + * the same for both channels: + * 1. F2x[1, 0]9C_x04[21] (AddrCmdSetup) + * 2. F2x[1, 0]9C_x04[15] (CsOdtSetup) + * 3. F2x[1, 0]9C_x04[5]) (CkeSetup) + * That is, if the AddrCmdSetup, CsOdtSetup, or CkeSetup is + * set to 1'b1 for one of the controllers, then the corresponding + * AddrCmdSetup, CsOdtSetup, or CkeSetup must be set to 1'b1 for the + * other controller. + * Silicon Status: Fix TBD + */ + + u32 tmp; + tmp = pDCTstat->LogicalCPUID; + if ((tmp == AMD_DR_A0A) || (tmp == AMD_DR_A1B) || (tmp == AMD_DR_A2)) { + pDCTstat->CH_ODC_CTL[1] = pDCTstat->CH_ODC_CTL[0]; + pDCTstat->CH_ADDR_TMG[1] = pDCTstat->CH_ADDR_TMG[0]; + } +} + + +static u32 CheckNBCOFAutoPrechg(struct DCTStatStruc *pDCTstat, u32 dct) +{ + u32 ret = 0; + u32 lo, hi; + u32 msr; + u32 val; + u32 valx, valy; + u32 NbDid; + + /* 3 * (Fn2xD4[NBFid]+4)/(2^NbDid)/(3+Fn2x94[MemClkFreq]) */ + msr = 0xC0010071; + _RDMSR(msr, &lo, &hi); + NbDid = (lo>>22) & 1; + + val = Get_NB32(pDCTstat->dev_dct, 0x94 + 0x100 * dct); + valx = ((val & 0x07) + 3)<<NbDid; + print_tx("MemClk:", valx >> NbDid); + + val = Get_NB32(pDCTstat->dev_nbmisc, 0xd4); + valy = ((val & 0x1f) + 4) * 3; + print_tx("NB COF:", valy >> NbDid); + + val = valy/valx; + if((val==3) && (valy%valx)) /* 3 < NClk/MemClk < 4 */ + ret = 1; + + return ret; +} + + +void mct_BeforeDramInit_D(struct DCTStatStruc *pDCTstat, u32 dct) +{ + u32 tmp; + u32 Speed; + u32 ch, ch_start, ch_end; + u32 index_reg; + u32 index; + u32 dev; + u32 val; + + + tmp = pDCTstat->LogicalCPUID; + if ((tmp == AMD_DR_A0A) || (tmp == AMD_DR_A1B) || (tmp == AMD_DR_A2)) { + Speed = pDCTstat->Speed; + /* MemClkFreq = 333MHz or 533Mhz */ + if((Speed == 3) || (Speed == 2)) { + if(pDCTstat->GangedMode) { + ch_start = 0; + ch_end = 2; + } else { + ch_start = dct; + ch_end = dct+1; + } + dev = pDCTstat->dev_dct; + index = 0x0D00E001; + for(ch=ch_start; ch<ch_end; ch++) { + index_reg = 0x98 + 0x100 * ch; + val = Get_NB32_index(dev, index_reg, 0x0D00E001); + val &= ~(0xf0); + val |= 0x80; + Set_NB32_index(dev, index_reg, 0x0D01E001, val); + } + } + + } +} + + +static u8 mct_AdjustDelay_D(struct DCTStatStruc *pDCTstat, u8 dly) +{ + u8 skip = 0; + dly &= 0x1f; + if ((dly >= MIN_FENCE) && (dly <= MAX_FENCE)) + skip = 1; + + return skip; +} + + +static u8 mct_checkFenceHoleAdjust_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 DQSDelay, + u8 ChipSel, u8 *result) +{ + u8 ByteLane; + u32 tmp; + + tmp = pDCTstat->LogicalCPUID; + if ((tmp == AMD_DR_A0A) || (tmp == AMD_DR_A1B) || (tmp == AMD_DR_A2)) { + if (pDCTstat->Direction == DQS_WRITEDIR) { + if ((pDCTstat->Speed == 2) || (pDCTstat->Speed == 3)) { + if(DQSDelay == 13) { + if (*result == 0xFF) { + for (ByteLane = 0; ByteLane < 8; ByteLane++) { + pDCTstat->DQSDelay = 13; + pDCTstat->ByteLane = ByteLane; + /* store the value into the data structure */ + StoreDQSDatStrucVal_D(pMCTstat, pDCTstat, ChipSel); + } + return 1; + } + } + } + if (mct_AdjustDQSPosDelay_D(pDCTstat, DQSDelay)) { + *result = 0; + } + } + } + return 0; +} + + +static u8 mct_AdjustDQSPosDelay_D(struct DCTStatStruc *pDCTstat, u8 dly) +{ + u8 skip = 0; + + dly &= 0x1f; + if ((dly >= MIN_DQS_WR_FENCE) && (dly <= MAX_DQS_WR_FENCE)) + skip = 1; + + return skip; + +} + +static void beforeInterleaveChannels_D(struct DCTStatStruc *pDCTstatA, u8 *enabled) { + + if (pDCTstatA->LogicalCPUID & (AMD_DR_Ax)) + *enabled = 0; +} + + +static u8 mctDoAxRdPtrInit_D(struct DCTStatStruc *pDCTstat, u8 *Rdtr) +{ + u32 tmp; + + tmp = pDCTstat->LogicalCPUID; + if ((tmp == AMD_DR_A0A) || (tmp == AMD_DR_A1B) || (tmp == AMD_DR_A2)) { + *Rdtr = 5; + return 1; + } + return 0; +} + + +static void mct_AdjustScrub_D(struct DCTStatStruc *pDCTstat, u16 *scrub_request) { + + /* Erratum #202: disable DCache scrubber for Ax parts */ + + if (pDCTstat->LogicalCPUID & (AMD_DR_Ax)) { + *scrub_request = 0; + pDCTstat->ErrStatus |= 1 << SB_DCBKScrubDis; + } +} + diff --git a/src/northbridge/amd/amdmct/mct/mctsrc.c b/src/northbridge/amd/amdmct/mct/mctsrc.c new file mode 100644 index 0000000000..c781ffd6b0 --- /dev/null +++ b/src/northbridge/amd/amdmct/mct/mctsrc.c @@ -0,0 +1,1121 @@ +/* + * This file is part of the LinuxBIOS project. + * + * Copyright (C) 2007 Advanced Micro Devices, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/****************************************************************************** + Description: Receiver En and DQS Timing Training feature for DDR 2 MCT +******************************************************************************/ + +static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 Pass); +static u8 mct_SavePassRcvEnDly_D(struct DCTStatStruc *pDCTstat, + u8 rcvrEnDly, u8 Channel, + u8 receiver, u8 Pass); +static u8 mct_CompareTestPatternQW0_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, + u32 addr, u8 channel, + u8 pattern, u8 Pass); +static void mct_InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat); +static void InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 Channel); +static void CalcEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 Channel); +static void mct_SetFinalRcvrEnDly_D(struct DCTStatStruc *pDCTstat, + u8 RcvrEnDly, u8 where, + u8 Channel, u8 Receiver, + u32 dev, u32 index_reg, + u8 Addl_Index, u8 Pass); +static void CalcMaxLatency_D(struct DCTStatStruc *pDCTstat, + u8 DQSRcvrEnDly, u8 Channel); +static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u8 DQSRcvEnDly); +static void mct_SetDQSRcvEn_D(struct DCTStatStruc *pDCTstat, u32 val); +static void fenceDynTraining_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct); +static void mct_DisableDQSRcvEn_D(struct DCTStatStruc *pDCTstat); + + +/* Warning: These must be located so they do not cross a logical 16-bit + segment boundary! */ +const static u32 TestPattern0_D[] = { + 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, + 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, + 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, + 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, +}; +const static u32 TestPattern1_D[] = { + 0x55555555, 0x55555555, 0x55555555, 0x55555555, + 0x55555555, 0x55555555, 0x55555555, 0x55555555, + 0x55555555, 0x55555555, 0x55555555, 0x55555555, + 0x55555555, 0x55555555, 0x55555555, 0x55555555, +}; +const static u32 TestPattern2_D[] = { + 0x12345678, 0x87654321, 0x23456789, 0x98765432, + 0x59385824, 0x30496724, 0x24490795, 0x99938733, + 0x40385642, 0x38465245, 0x29432163, 0x05067894, + 0x12349045, 0x98723467, 0x12387634, 0x34587623, +}; + +static void SetupRcvrPattern(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u32 *buffer, u8 pass) +{ + /* + * 1. Copy the alpha and Beta patterns from ROM to Cache, + * aligning on 16 byte boundary + * 2. Set the ptr to DCTStatstruc.PtrPatternBufA for Alpha + * 3. Set the ptr to DCTStatstruc.PtrPatternBufB for Beta + */ + + u32 *buf_a; + u32 *buf_b; + u32 *p_A; + u32 *p_B; + u8 i; + + buf_a = (u32 *)(((u32)buffer + 0x10) & (0xfffffff0)); + buf_b = buf_a + 32; //?? + p_A = (u32 *)SetupDqsPattern_1PassB(pass); + p_B = (u32 *)SetupDqsPattern_1PassA(pass); + + for(i=0;i<16;i++) { + buf_a[i] = p_A[i]; + buf_b[i] = p_B[i]; + } + + pDCTstat->PtrPatternBufA = (u32)buf_a; + pDCTstat->PtrPatternBufB = (u32)buf_b; +} + + +void mct_TrainRcvrEn_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 Pass) +{ + if(mct_checkNumberOfDqsRcvEn_1Pass(Pass)) + dqsTrainRcvrEn_SW(pMCTstat, pDCTstat, Pass); +} + + +static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 Pass) +{ + u8 Channel, RcvrEnDly, RcvrEnDlyRmin; + u8 Test0, Test1, CurrTest, CurrTestSide0, CurrTestSide1; + u8 CTLRMaxDelay, _2Ranks, PatternA, PatternB; + u8 Addl_Index = 0; + u8 Receiver; + u8 _DisableDramECC = 0, _Wrap32Dis = 0, _SSE2 = 0; + u8 RcvrEnDlyLimit, Final_Value, MaxDelay_CH[2]; + u32 TestAddr0, TestAddr1, TestAddr0B, TestAddr1B; + u32 PatternBuffer[64+4]; /* FIXME: need increase 8? */ + u32 Errors; + + u32 val; + u32 reg; + u32 dev; + u32 index_reg; + u32 ch_start, ch_end, ch; + u32 msr; + u32 cr4; + u32 lo, hi; + + u8 valid; + u32 tmp; + u8 LastTest; + + print_debug_dqs("\nTrainRcvEn: Node", pDCTstat->Node_ID, 0); + print_debug_dqs("TrainRcvEn: Pass", Pass, 0); + + + dev = pDCTstat->dev_dct; + ch_start = 0; + if(!pDCTstat->GangedMode) { + ch_end = 2; + } else { + ch_end = 1; + } + + for (ch = ch_start; ch < ch_end; ch++) { + reg = 0x78 + (0x100 * ch); + val = Get_NB32(dev, reg); + val &= ~(0x3ff << 22); + val |= (0x0c8 << 22); /* Max Rd Lat */ + Set_NB32(dev, reg, val); + } + + Final_Value = 1; + if (Pass == FirstPass) { + mct_InitDQSPos4RcvrEn_D(pMCTstat, pDCTstat); + } else { + pDCTstat->DimmTrainFail = 0; + pDCTstat->CSTrainFail = ~pDCTstat->CSPresent; + } + print_t("TrainRcvrEn: 1\n"); + + cr4 = read_cr4(); + if(cr4 & ( 1 << 9)) { /* save the old value */ + _SSE2 = 1; + } + cr4 |= (1 << 9); /* OSFXSR enable SSE2 */ + write_cr4(cr4); + print_t("TrainRcvrEn: 2\n"); + + msr = HWCR; + _RDMSR(msr, &lo, &hi); + //FIXME: Why use SSEDIS + if(lo & (1 << 17)) { /* save the old value */ + _Wrap32Dis = 1; + } + lo |= (1 << 17); /* HWCR.wrap32dis */ + lo &= ~(1 << 15); /* SSEDIS */ + _WRMSR(msr, lo, hi); /* Setting wrap32dis allows 64-bit memory references in real mode */ + print_t("TrainRcvrEn: 3\n"); + + _DisableDramECC = mct_DisableDimmEccEn_D(pMCTstat, pDCTstat); + + + if(pDCTstat->Speed == 1) { + pDCTstat->T1000 = 5000; /* get the T1000 figure (cycle time (ns)*1K */ + } else if(pDCTstat->Speed == 2) { + pDCTstat->T1000 = 3759; + } else if(pDCTstat->Speed == 3) { + pDCTstat->T1000 = 3003; + } else if(pDCTstat->Speed == 4) { + pDCTstat->T1000 = 2500; + } else if(pDCTstat->Speed == 5) { + pDCTstat->T1000 = 1876; + } else { + pDCTstat->T1000 = 0; + } + + SetupRcvrPattern(pMCTstat, pDCTstat, PatternBuffer, Pass); + print_t("TrainRcvrEn: 4\n"); + + Errors = 0; + dev = pDCTstat->dev_dct; + CTLRMaxDelay = 0; + + for (Channel = 0; Channel < 2; Channel++) { + print_debug_dqs("\tTrainRcvEn51: Node ", pDCTstat->Node_ID, 1); + print_debug_dqs("\tTrainRcvEn51: Channel ", Channel, 1); + pDCTstat->Channel = Channel; + + MaxDelay_CH[Channel] = 0; + index_reg = 0x98 + 0x100 * Channel; + + Receiver = mct_InitReceiver_D(pDCTstat, Channel); + /* There are four receiver pairs, loosely associated with chipselects. */ + for (; Receiver < 8; Receiver += 2) { + Addl_Index = (Receiver >> 1) * 3 + 0x10; + LastTest = DQS_FAIL; + + /* mct_ModifyIndex_D */ + RcvrEnDlyRmin = RcvrEnDlyLimit = 0xff; + + print_debug_dqs("\t\tTrainRcvEnd52: index ", Addl_Index, 2); + + if(!mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, Receiver)) { + print_t("\t\t\tRank not enabled_D\n"); + continue; + } + + TestAddr0 = mct_GetRcvrSysAddr_D(pMCTstat, pDCTstat, Channel, Receiver, &valid); + if(!valid) { /* Address not supported on current CS */ + print_t("\t\t\tAddress not supported on current CS\n"); + continue; + } + + TestAddr0B = TestAddr0 + (BigPagex8_RJ8 << 3); + + if(mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, Receiver+1)) { + TestAddr1 = mct_GetRcvrSysAddr_D(pMCTstat, pDCTstat, Channel, Receiver+1, &valid); + if(!valid) { /* Address not supported on current CS */ + print_t("\t\t\tAddress not supported on current CS+1\n"); + continue; + } + TestAddr1B = TestAddr1 + (BigPagex8_RJ8 << 3); + _2Ranks = 1; + } else { + _2Ranks = TestAddr1 = TestAddr1B = 0; + } + + print_debug_dqs("\t\tTrainRcvEn53: TestAddr0 ", TestAddr0, 2); + print_debug_dqs("\t\tTrainRcvEn53: TestAddr0B ", TestAddr0B, 2); + print_debug_dqs("\t\tTrainRcvEn53: TestAddr1 ", TestAddr1, 2); + print_debug_dqs("\t\tTrainRcvEn53: TestAddr1B ", TestAddr1B, 2); + + /* + * Get starting RcvrEnDly value + */ + RcvrEnDly = mct_Get_Start_RcvrEnDly_1Pass(Pass); + + /* mct_GetInitFlag_D*/ + if (Pass == FirstPass) { + pDCTstat->DqsRcvEn_Pass = 0; + } else { + pDCTstat->DqsRcvEn_Pass=0xFF; + } + pDCTstat->DqsRcvEn_Saved = 0; + + + while(RcvrEnDly < RcvrEnDlyLimit) { /* sweep Delay value here */ + print_debug_dqs("\t\t\tTrainRcvEn541: RcvrEnDly ", RcvrEnDly, 3); + + /* callback not required + if(mct_AdjustDelay_D(pDCTstat, RcvrEnDly)) + goto skipDly; + */ + + /* Odd steps get another pattern such that even + and odd steps alternate. The pointers to the + patterns will be swaped at the end of the loop + so that they correspond. */ + if(RcvrEnDly & 1) { + PatternA = 1; + PatternB = 0; + } else { + /* Even step */ + PatternA = 0; + PatternB = 1; + } + + mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0, PatternA); /* rank 0 of DIMM, testpattern 0 */ + mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0B, PatternB); /* rank 0 of DIMM, testpattern 1 */ + if(_2Ranks) { + mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1, PatternA); /*rank 1 of DIMM, testpattern 0 */ + mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1B, PatternB); /*rank 1 of DIMM, testpattern 1 */ + } + + mct_SetRcvrEnDly_D(pDCTstat, RcvrEnDly, 0, Channel, Receiver, dev, index_reg, Addl_Index, Pass); + + CurrTest = DQS_FAIL; + CurrTestSide0 = DQS_FAIL; + CurrTestSide1 = DQS_FAIL; + + mct_Read1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0); /*cache fills */ + Test0 = mct_CompareTestPatternQW0_D(pMCTstat, pDCTstat, TestAddr0, Channel, PatternA, Pass);/* ROM vs cache compare */ + proc_IOCLFLUSH_D(TestAddr0); + ResetDCTWrPtr_D(dev, index_reg, Addl_Index); + + print_debug_dqs("\t\t\tTrainRcvEn542: Test0 result ", Test0, 3); + + // != 0x00 mean pass + + if(Test0 == DQS_PASS) { + mct_Read1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0B); /*cache fills */ + /* ROM vs cache compare */ + Test1 = mct_CompareTestPatternQW0_D(pMCTstat, pDCTstat, TestAddr0B, Channel, PatternB, Pass); + proc_IOCLFLUSH_D(TestAddr0B); + ResetDCTWrPtr_D(dev, index_reg, Addl_Index); + + print_debug_dqs("\t\t\tTrainRcvEn543: Test1 result ", Test1, 3); + + if(Test1 == DQS_PASS) { + CurrTestSide0 = DQS_PASS; + } + } + if(_2Ranks) { + mct_Read1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1); /*cache fills */ + /* ROM vs cache compare */ + Test0 = mct_CompareTestPatternQW0_D(pMCTstat, pDCTstat, TestAddr1, Channel, PatternA, Pass); + proc_IOCLFLUSH_D(TestAddr1); + ResetDCTWrPtr_D(dev, index_reg, Addl_Index); + + print_debug_dqs("\t\t\tTrainRcvEn544: Test0 result ", Test0, 3); + + if(Test0 == DQS_PASS) { + mct_Read1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1B); /*cache fills */ + /* ROM vs cache compare */ + Test1 = mct_CompareTestPatternQW0_D(pMCTstat, pDCTstat, TestAddr1B, Channel, PatternB, Pass); + proc_IOCLFLUSH_D(TestAddr1B); + ResetDCTWrPtr_D(dev, index_reg, Addl_Index); + + print_debug_dqs("\t\t\tTrainRcvEn545: Test1 result ", Test1, 3); + if(Test1 == DQS_PASS) { + CurrTestSide1 = DQS_PASS; + } + } + } + + if(_2Ranks) { + if ((CurrTestSide0 == DQS_PASS) && (CurrTestSide1 == DQS_PASS)) { + CurrTest = DQS_PASS; + } + } else if (CurrTestSide0 == DQS_PASS) { + CurrTest = DQS_PASS; + } + + + /* record first pass DqsRcvEn to stack */ + valid = mct_SavePassRcvEnDly_D(pDCTstat, RcvrEnDly, Channel, Receiver, Pass); + + /* Break(1:RevF,2:DR) or not(0) FIXME: This comment deosn't make sense */ + if(valid == 2 || (LastTest == DQS_FAIL && valid == 1)) { + RcvrEnDlyRmin = RcvrEnDly; + break; + } + + LastTest = CurrTest; + + /* swap the rank 0 pointers */ + tmp = TestAddr0; + TestAddr0 = TestAddr0B; + TestAddr0B = tmp; + + /* swap the rank 1 pointers */ + tmp = TestAddr1; + TestAddr1 = TestAddr1B; + TestAddr1B = tmp; + + print_debug_dqs("\t\t\tTrainRcvEn56: RcvrEnDly ", RcvrEnDly, 3); + + RcvrEnDly++; + + } /* while RcvrEnDly */ + + print_debug_dqs("\t\tTrainRcvEn61: RcvrEnDly ", RcvrEnDly, 2); + print_debug_dqs("\t\tTrainRcvEn61: RcvrEnDlyRmin ", RcvrEnDlyRmin, 3); + print_debug_dqs("\t\tTrainRcvEn61: RcvrEnDlyLimit ", RcvrEnDlyLimit, 3); + if(RcvrEnDlyRmin == RcvrEnDlyLimit) { + /* no passing window */ + pDCTstat->ErrStatus |= 1 << SB_NORCVREN; + Errors |= 1 << SB_NORCVREN; + pDCTstat->ErrCode = SC_FatalErr; + } + + if(RcvrEnDly > (RcvrEnDlyLimit - 1)) { + /* passing window too narrow, too far delayed*/ + pDCTstat->ErrStatus |= 1 << SB_SmallRCVR; + Errors |= 1 << SB_SmallRCVR; + pDCTstat->ErrCode = SC_FatalErr; + RcvrEnDly = RcvrEnDlyLimit - 1; + pDCTstat->CSTrainFail |= 1 << Receiver; + pDCTstat->DimmTrainFail |= 1 << (Receiver + Channel); + } + + // CHB_D0_B0_RCVRDLY set in mct_Average_RcvrEnDly_Pass + mct_Average_RcvrEnDly_Pass(pDCTstat, RcvrEnDly, RcvrEnDlyLimit, Channel, Receiver, Pass); + + mct_SetFinalRcvrEnDly_D(pDCTstat, RcvrEnDly, Final_Value, Channel, Receiver, dev, index_reg, Addl_Index, Pass); + + if(pDCTstat->ErrStatus & (1 << SB_SmallRCVR)) { + Errors |= 1 << SB_SmallRCVR; + } + + RcvrEnDly += Pass1MemClkDly; + if(RcvrEnDly > CTLRMaxDelay) { + CTLRMaxDelay = RcvrEnDly; + } + + } /* while Receiver */ + + MaxDelay_CH[Channel] = CTLRMaxDelay; + } /* for Channel */ + + CTLRMaxDelay = MaxDelay_CH[0]; + if (MaxDelay_CH[1] > CTLRMaxDelay) + CTLRMaxDelay = MaxDelay_CH[1]; + + for (Channel = 0; Channel < 2; Channel++) { + mct_SetMaxLatency_D(pDCTstat, Channel, CTLRMaxDelay); /* program Ch A/B MaxAsyncLat to correspond with max delay */ + } + + ResetDCTWrPtr_D(dev, index_reg, Addl_Index); + + if(_DisableDramECC) { + mct_EnableDimmEccEn_D(pMCTstat, pDCTstat, _DisableDramECC); + } + + if (Pass == FirstPass) { + /*Disable DQSRcvrEn training mode */ + print_t("TrainRcvrEn: mct_DisableDQSRcvEn_D\n"); + mct_DisableDQSRcvEn_D(pDCTstat); + } + + if(!_Wrap32Dis) { + msr = HWCR; + _RDMSR(msr, &lo, &hi); + lo &= ~(1<<17); /* restore HWCR.wrap32dis */ + _WRMSR(msr, lo, hi); + } + if(!_SSE2){ + cr4 = read_cr4(); + cr4 &= ~(1<<9); /* restore cr4.OSFXSR */ + write_cr4(cr4); + } + +#if DQS_TRAIN_DEBUG > 0 + { + u8 Channel; + print_debug("TrainRcvrEn: CH_MaxRdLat:\n"); + for(Channel = 0; Channel<2; Channel++) { + print_debug("Channel:"); print_debug_hex8(Channel); + print_debug(": "); + print_debug_hex8( pDCTstat->CH_MaxRdLat[Channel] ); + print_debug("\n"); + } + } +#endif + +#if DQS_TRAIN_DEBUG > 0 + { + u8 val; + u8 Channel, Receiver; + u8 i; + u8 *p; + + print_debug("TrainRcvrEn: CH_D_B_RCVRDLY:\n"); + for(Channel = 0; Channel < 2; Channel++) { + print_debug("Channel:"); print_debug_hex8(Channel); print_debug("\n"); + for(Receiver = 0; Receiver<8; Receiver+=2) { + print_debug("\t\tReceiver:"); + print_debug_hex8(Receiver); + p = pDCTstat->CH_D_B_RCVRDLY[Channel][Receiver>>1]; + print_debug(": "); + for (i=0;i<8; i++) { + val = p[i]; + print_debug_hex8(val); + print_debug(" "); + } + print_debug("\n"); + } + } + } +#endif + + print_tx("TrainRcvrEn: Status ", pDCTstat->Status); + print_tx("TrainRcvrEn: ErrStatus ", pDCTstat->ErrStatus); + print_tx("TrainRcvrEn: ErrCode ", pDCTstat->ErrCode); + print_t("TrainRcvrEn: Done\n"); +} + + +static u8 mct_InitReceiver_D(struct DCTStatStruc *pDCTstat, u8 dct) +{ + if (pDCTstat->DIMMValidDCT[dct] == 0 ) { + return 8; + } else { + return 0; + } +} + + +static void mct_SetFinalRcvrEnDly_D(struct DCTStatStruc *pDCTstat, u8 RcvrEnDly, u8 where, u8 Channel, u8 Receiver, u32 dev, u32 index_reg, u8 Addl_Index, u8 Pass/*, u8 *p*/) +{ + /* + * Program final DqsRcvEnDly to additional index for DQS receiver + * enabled delay + */ + mct_SetRcvrEnDly_D(pDCTstat, RcvrEnDly, where, Channel, Receiver, dev, index_reg, Addl_Index, Pass); +} + + +static void mct_DisableDQSRcvEn_D(struct DCTStatStruc *pDCTstat) +{ + u8 ch_end, ch; + u32 reg; + u32 dev; + u32 val; + + dev = pDCTstat->dev_dct; + if (pDCTstat->GangedMode) { + ch_end = 1; + } else { + ch_end = 2; + } + + for (ch=0; ch<ch_end; ch++) { + reg = 0x78 + 0x100 * ch; + val = Get_NB32(dev, reg); + val &= ~(1 << DqsRcvEnTrain); + Set_NB32(dev, reg, val); + } +} + + +/* mct_ModifyIndex_D + * Function only used once so it was inlined. + */ + + +/* mct_GetInitFlag_D + * Function only used once so it was inlined. + */ + + +void mct_SetRcvrEnDly_D(struct DCTStatStruc *pDCTstat, u8 RcvrEnDly, + u8 FinalValue, u8 Channel, u8 Receiver, u32 dev, + u32 index_reg, u8 Addl_Index, u8 Pass) +{ + u32 index; + u8 i; + u8 *p; + u32 val; + + if(RcvrEnDly == 0xFE) { + /*set the boudary flag */ + pDCTstat->Status |= 1 << SB_DQSRcvLimit; + } + + /* DimmOffset not needed for CH_D_B_RCVRDLY array */ + + + for(i=0; i < 8; i++) { + if(FinalValue) { + /*calculate dimm offset */ + p = pDCTstat->CH_D_B_RCVRDLY[Channel][Receiver >> 1]; + RcvrEnDly = p[i]; + } + + /* if flag=0, set DqsRcvEn value to reg. */ + /* get the register index from table */ + index = Table_DQSRcvEn_Offset[i >> 1]; + index += Addl_Index; /* DIMMx DqsRcvEn byte0 */ + val = Get_NB32_index_wait(dev, index_reg, index); + if(i & 1) { + /* odd byte lane */ + val &= ~(0xFF << 16); + val |= (RcvrEnDly << 16); + } else { + /* even byte lane */ + val &= ~0xFF; + val |= RcvrEnDly; + } + Set_NB32_index_wait(dev, index_reg, index, val); + } + +} + +static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u8 DQSRcvEnDly) +{ + u32 dev; + u32 reg; + u16 SubTotal; + u32 index_reg; + u32 reg_off; + u32 val; + u32 valx; + + if(pDCTstat->GangedMode) + Channel = 0; + + dev = pDCTstat->dev_dct; + reg_off = 0x100 * Channel; + index_reg = 0x98 + reg_off; + + /* Multiply the CAS Latency by two to get a number of 1/2 MEMCLKs units.*/ + val = Get_NB32(dev, 0x88 + reg_off); + SubTotal = ((val & 0x0f) + 1) << 1; /* SubTotal is 1/2 Memclk unit */ + + /* If registered DIMMs are being used then + * add 1 MEMCLK to the sub-total. + */ + val = Get_NB32(dev, 0x90 + reg_off); + if(!(val & (1 << UnBuffDimm))) + SubTotal += 2; + + /* If the address prelaunch is setup for 1/2 MEMCLKs then + * add 1, else add 2 to the sub-total. + * if (AddrCmdSetup || CsOdtSetup || CkeSetup) then K := K + 2; + */ + val = Get_NB32_index_wait(dev, index_reg, 0x04); + if(!(val & 0x00202020)) + SubTotal += 1; + else + SubTotal += 2; + + /* If the F2x[1, 0]78[RdPtrInit] field is 4, 5, 6 or 7 MEMCLKs, + * then add 4, 3, 2, or 1 MEMCLKs, respectively to the sub-total. */ + val = Get_NB32(dev, 0x78 + reg_off); + SubTotal += 8 - (val & 0x0f); + + /* Convert bits 7-5 (also referred to as the course delay) of + * the current (or worst case) DQS receiver enable delay to + * 1/2 MEMCLKs units, rounding up, and add this to the sub-total. + */ + SubTotal += DQSRcvEnDly >> 5; /*BOZO-no rounding up */ + + /* Add 5.5 to the sub-total. 5.5 represents part of the + * processor specific constant delay value in the DRAM + * clock domain. + */ + SubTotal <<= 1; /*scale 1/2 MemClk to 1/4 MemClk */ + SubTotal += 11; /*add 5.5 1/2MemClk */ + + /* Convert the sub-total (in 1/2 MEMCLKs) to northbridge + * clocks (NCLKs) as follows (assuming DDR400 and assuming + * that no P-state or link speed changes have occurred). + */ + + /* New formula: + * SubTotal *= 3*(Fn2xD4[NBFid]+4)/(3+Fn2x94[MemClkFreq])/2 */ + val = Get_NB32(dev, 0x94 + reg_off); + + /* SubTotal div 4 to scale 1/4 MemClk back to MemClk */ + val &= 7; + if (val == 4) { + val++; /* adjust for DDR2-1066 */ + } + valx = (val + 3) << 2; + + val = Get_NB32(pDCTstat->dev_nbmisc, 0xD4); + SubTotal *= ((val & 0x1f) + 4 ) * 3; + + SubTotal /= valx; + if (SubTotal % valx) { /* round up */ + SubTotal++; + } + + /* Add 5 NCLKs to the sub-total. 5 represents part of the + * processor specific constant value in the northbridge + * clock domain. + */ + SubTotal += 5; + + pDCTstat->CH_MaxRdLat[Channel] = SubTotal; + if(pDCTstat->GangedMode) { + pDCTstat->CH_MaxRdLat[1] = SubTotal; + } + + /* Program the F2x[1, 0]78[MaxRdLatency] register with + * the total delay value (in NCLKs). + */ + + reg = 0x78 + reg_off; + val = Get_NB32(dev, reg); + val &= ~(0x3ff << 22); + val |= (SubTotal & 0x3ff) << 22; + + /* program MaxRdLatency to correspond with current delay */ + Set_NB32(dev, reg, val); +} + + +static u8 mct_SavePassRcvEnDly_D(struct DCTStatStruc *pDCTstat, + u8 rcvrEnDly, u8 Channel, + u8 receiver, u8 Pass) +{ + u8 i; + u8 mask_Saved, mask_Pass; + u8 *p; + + /* calculate dimm offset + * not needed for CH_D_B_RCVRDLY array + */ + + /* cmp if there has new DqsRcvEnDly to be recorded */ + mask_Pass = pDCTstat->DqsRcvEn_Pass; + + if(Pass == SecondPass) { + mask_Pass = ~mask_Pass; + } + + mask_Saved = pDCTstat->DqsRcvEn_Saved; + if(mask_Pass != mask_Saved) { + + /* find desired stack offset according to channel/dimm/byte */ + if(Pass == SecondPass) { + // FIXME: SecondPass is never used for Barcelona p = pDCTstat->CH_D_B_RCVRDLY_1[Channel][receiver>>1]; + p = 0; // Keep the compiler happy. + } else { + mask_Saved &= mask_Pass; + p = pDCTstat->CH_D_B_RCVRDLY[Channel][receiver>>1]; + } + for(i=0; i < 8; i++) { + /* cmp per byte lane */ + if(mask_Pass & (1 << i)) { + if(!(mask_Saved & (1 << i))) { + /* save RcvEnDly to stack, according to + the related Dimm/byte lane */ + p[i] = (u8)rcvrEnDly; + mask_Saved |= 1 << i; + } + } + } + pDCTstat->DqsRcvEn_Saved = mask_Saved; + } + return mct_SaveRcvEnDly_D_1Pass(pDCTstat, Pass); +} + + +static u8 mct_CompareTestPatternQW0_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, + u32 addr, u8 channel, + u8 pattern, u8 Pass) +{ + /* Compare only the first beat of data. Since target addrs are cache + * line aligned, the Channel parameter is used to determine which + * cache QW to compare. + */ + + u8 *test_buf; + u8 i; + u8 result; + u8 *addr_lo_buf; + + SetUpperFSbase(addr); // needed? + + if(Pass == FirstPass) { + if(pattern==1) { + test_buf = (u8 *)TestPattern1_D; + } else { + test_buf = (u8 *)TestPattern0_D; + } + } else { // Second Pass + test_buf = (u8 *)TestPattern2_D; + } + + addr_lo_buf = (u8 *) (addr << 8); + result = DQS_FAIL; + + if((pDCTstat->Status & (1<<SB_128bitmode)) && channel ) { + addr_lo_buf += 8; /* second channel */ + test_buf += 8; + } + + +#if DQS_TRAIN_DEBUG > 4 + print_debug("\t\t\t\t\t\tQW0 : test_buf = "); + print_debug_hex32((unsigned)test_buf); + print_debug(": "); + for (i=0; i<8; i++) { + print_debug_hex8(test_buf[i]); print_debug(" "); + } + print_debug("\n"); + + print_debug("\t\t\t\t\t\tQW0 : addr_lo_buf = "); + print_debug_hex32((unsigned)addr_lo_buf); + print_debug(": "); + for (i=0; i<8; i++) { + print_debug_hex8(addr_lo_buf[i]); print_debug(" "); + } + print_debug("\n"); +#endif + + /* prevent speculative execution of following instructions */ + _EXECFENCE; + + for (i=0; i<8; i++) { + if(addr_lo_buf[i] == test_buf[i]) { + pDCTstat->DqsRcvEn_Pass |= (1<<i); + } else { + pDCTstat->DqsRcvEn_Pass &= ~(1<<i); + } + } + + + if (Pass == FirstPass) { + /* if first pass, at least one byte lane pass + * ,then DQS_PASS=1 and will set to related reg. + */ + if(pDCTstat->DqsRcvEn_Pass != 0) { + result = DQS_PASS; + } else { + result = DQS_FAIL; + } + + } else { + /* if second pass, at least one byte lane fail + * ,then DQS_FAIL=1 and will set to related reg. + */ + if(pDCTstat->DqsRcvEn_Pass != 0xFF) { + result = DQS_FAIL; + } else { + result = DQS_PASS; + } + } + + /* if second pass, we can't find the fail until FFh, + * then let it fail to save the final delay + */ + if((Pass == SecondPass) && (pDCTstat->Status & (1 << SB_DQSRcvLimit))) { + result = DQS_FAIL; + pDCTstat->DqsRcvEn_Pass = 0; + } + + /* second pass needs to be inverted + * FIXME? this could be inverted in the above code to start with... + */ + if(Pass == SecondPass) { + if (result == DQS_PASS) { + result = DQS_FAIL; + } else if (result == DQS_FAIL) { /* FIXME: doesn't need to be else if */ + result = DQS_PASS; + } + } + + + return result; +} + + + +static void mct_InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat) +{ + /* Initialize the DQS Positions in preparation for + * Reciever Enable Training. + * Write Position is 1/2 Memclock Delay + * Read Position is 1/2 Memclock Delay + */ + u8 i; + for(i=0;i<2; i++){ + InitDQSPos4RcvrEn_D(pMCTstat, pDCTstat, i); + } +} + + +static void InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 Channel) +{ + /* Initialize the DQS Positions in preparation for + * Reciever Enable Training. + * Write Position is no Delay + * Read Position is 1/2 Memclock Delay + */ + + u8 i, j; + u32 dword; + u8 dn = 2; // TODO: Rev C could be 4 + u32 dev = pDCTstat->dev_dct; + u32 index_reg = 0x98 + 0x100 * Channel; + + + // FIXME: add Cx support + dword = 0x00000000; + for(i=1; i<=3; i++) { + for(j=0; j<dn; j++) + /* DIMM0 Write Data Timing Low */ + /* DIMM0 Write ECC Timing */ + Set_NB32_index_wait(dev, index_reg, i + 0x100 * j, dword); + } + + /* errata #180 */ + dword = 0x2f2f2f2f; + for(i=5; i<=6; i++) { + for(j=0; j<dn; j++) + /* DIMM0 Read DQS Timing Control Low */ + Set_NB32_index_wait(dev, index_reg, i + 0x100 * j, dword); + } + + dword = 0x0000002f; + for(j=0; j<dn; j++) + /* DIMM0 Read DQS ECC Timing Control */ + Set_NB32_index_wait(dev, index_reg, 7 + 0x100 * j, dword); +} + + +void SetEccDQSRcvrEn_D(struct DCTStatStruc *pDCTstat, u8 Channel) +{ + u32 dev; + u32 index_reg; + u32 index; + u8 ChipSel; + u8 *p; + u32 val; + + dev = pDCTstat->dev_dct; + index_reg = 0x98 + Channel * 0x100; + index = 0x12; + p = pDCTstat->CH_D_BC_RCVRDLY[Channel]; + print_debug_dqs("\t\tSetEccDQSRcvrPos: Channel ", Channel, 2); + for(ChipSel = 0; ChipSel < MAX_CS_SUPPORTED; ChipSel += 2) { + val = p[ChipSel>>1]; + Set_NB32_index_wait(dev, index_reg, index, val); + print_debug_dqs_pair("\t\tSetEccDQSRcvrPos: ChipSel ", + ChipSel, " rcvr_delay ", val, 2); + index += 3; + } +} + + +static void CalcEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 Channel) +{ + u8 ChipSel; + u16 EccDQSLike; + u8 EccDQSScale; + u32 val, val0, val1; + + EccDQSLike = pDCTstat->CH_EccDQSLike[Channel]; + EccDQSScale = pDCTstat->CH_EccDQSScale[Channel]; + + for (ChipSel = 0; ChipSel < MAX_CS_SUPPORTED; ChipSel += 2) { + if(mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, ChipSel)) { + u8 *p; + p = pDCTstat->CH_D_B_RCVRDLY[Channel][ChipSel>>1]; + + /* DQS Delay Value of Data Bytelane + * most like ECC byte lane */ + val0 = p[EccDQSLike & 0x07]; + /* DQS Delay Value of Data Bytelane + * 2nd most like ECC byte lane */ + val1 = p[(EccDQSLike>>8) & 0x07]; + + if(val0 > val1) { + val = val0 - val1; + } else { + val = val1 - val0; + } + + val *= ~EccDQSScale; + val >>= 8; // /256 + + if(val0 > val1) { + val -= val1; + } else { + val += val0; + } + + pDCTstat->CH_D_BC_RCVRDLY[Channel][ChipSel>>1] = val; + } + } + SetEccDQSRcvrEn_D(pDCTstat, Channel); +} + +void mctSetEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstatA) +{ + u8 Node; + u8 i; + + for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) { + struct DCTStatStruc *pDCTstat; + pDCTstat = pDCTstatA + Node; + if (!pDCTstat->NodePresent) + break; + if (pDCTstat->DCTSysLimit) { + for(i=0; i<2; i++) + CalcEccDQSRcvrEn_D(pMCTstat, pDCTstat, i); + } + } +} + + +void phyAssistedMemFnceTraining(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstatA) +{ + + u8 Node = 0; + struct DCTStatStruc *pDCTstat; + + // FIXME: skip for Ax + while (Node < MAX_NODES_SUPPORTED) { + pDCTstat = pDCTstatA + Node; + + if(pDCTstat->DCTSysLimit) { + fenceDynTraining_D(pMCTstat, pDCTstat, 0); + fenceDynTraining_D(pMCTstat, pDCTstat, 1); + } + Node++; + } +} + + +static void fenceDynTraining_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 dct) +{ + u16 avRecValue; + u32 val; + u32 dev; + u32 index_reg = 0x98 + 0x100 * dct; + u32 index; + + /* BIOS first programs a seed value to the phase recovery engine + * (recommended 19) registers. + * Dram Phase Recovery Control Register (F2x[1,0]9C_x[51:50] and + * F2x[1,0]9C_x52.) . + */ + + dev = pDCTstat->dev_dct; + for (index = 0x50; index <= 0x52; index ++) { + val = Get_NB32_index_wait(dev, index_reg, index); + val |= (FenceTrnFinDlySeed & 0x1F); + if (index != 0x52) { + val &= ~(0xFF << 8); + val |= (val & 0xFF) << 8; + val &= 0xFFFF; + val |= val << 16; + } + Set_NB32_index_wait(dev, index_reg, index, val); + } + + + /* Set F2x[1,0]9C_x08[PhyFenceTrEn]=1. */ + val = Get_NB32_index_wait(dev, index_reg, 0x08); + val |= 1 << PhyFenceTrEn; + Set_NB32_index_wait(dev, index_reg, 0x08, val); + + /* Wait 200 MEMCLKs. */ + mct_Wait_10ns (20000); /* wait 200us */ + + /* Clear F2x[1,0]9C_x08[PhyFenceTrEn]=0. */ + val = Get_NB32_index_wait(dev, index_reg, 0x08); + val &= ~(1 << PhyFenceTrEn); + Set_NB32_index_wait(dev, index_reg, 0x08, val); + + /* BIOS reads the phase recovery engine registers + * F2x[1,0]9C_x[51:50] and F2x[1,0]9C_x52. */ + avRecValue = 0; + for (index = 0x50; index <= 0x52; index ++) { + val = Get_NB32_index_wait(dev, index_reg, index); + avRecValue += val & 0x7F; + if (index != 0x52) { + avRecValue += (val >> 8) & 0x7F; + avRecValue += (val >> 16) & 0x7F; + avRecValue += (val >> 24) & 0x7F; + } + } + + val = avRecValue / 9; + if (avRecValue % 9) + val++; + avRecValue = val; + + /* Write the (averaged value -8) to F2x[1,0]9C_x0C[PhyFence]. */ + avRecValue -= 8; + val = Get_NB32_index_wait(dev, index_reg, 0x0C); + val &= ~(0x1F << 16); + val |= (avRecValue & 0x1F) << 16; + Set_NB32_index_wait(dev, index_reg, 0x0C, val); + + /* Rewrite F2x[1,0]9C_x04-DRAM Address/Command Timing Control Register + * delays (both channels). */ + val = Get_NB32_index_wait(dev, index_reg, 0x04); + Set_NB32_index_wait(dev, index_reg, 0x04, val); +} + + +static void mct_Wait_10ns (u32 cycles) +{ + u32 saved, i; + u32 hi, lo, msr; + + /* cycles = number of 10ns cycles(or longer) to delay */ + /* FIXME: Need to calibrate to CPU/NCLK speed? */ + + msr = 0x10; /* TSC */ + for (i = 0; i < cycles; i++) { + _RDMSR(msr, &lo, &hi); + saved = lo; + + do { + _RDMSR(msr, &lo, &hi); + } while (lo - saved < 8); /* 8 x 1.25 ns as NCLK is at 1.25ns */ + } +} diff --git a/src/northbridge/amd/amdmct/mct/mctsrc1p.c b/src/northbridge/amd/amdmct/mct/mctsrc1p.c new file mode 100644 index 0000000000..31d2af8955 --- /dev/null +++ b/src/northbridge/amd/amdmct/mct/mctsrc1p.c @@ -0,0 +1,96 @@ +/* + * This file is part of the LinuxBIOS project. + * + * Copyright (C) 2007 Advanced Micro Devices, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + + +u8 mct_checkNumberOfDqsRcvEn_1Pass(u8 pass) +{ + u8 ret = 1; + if (pass == SecondPass) + ret = 0; + + return ret; +} + + +u32 SetupDqsPattern_1PassA(u8 pass) +{ + return (u32) TestPattern1_D; +} + + +u32 SetupDqsPattern_1PassB(u8 pass) +{ + return (u32) TestPattern0_D; +} + +u8 mct_Get_Start_RcvrEnDly_1Pass(u8 pass) +{ + return 0; +} + +u8 mct_Average_RcvrEnDly_1Pass(struct DCTStatStruc *pDCTstat, u8 Channel, u8 Receiver, + u8 Pass) +{ + u8 i, MaxValue; + u8 *p; + u8 val; + + MaxValue = 0; + p = pDCTstat->CH_D_B_RCVRDLY[Channel][Receiver >> 1]; + + for(i=0; i < 8; i++) { + /* get left value from DCTStatStruc.CHA_D0_B0_RCVRDLY*/ + val = p[i]; + /* get right value from DCTStatStruc.CHA_D0_B0_RCVRDLY_1*/ + val += Pass1MemClkDly; + /* write back the value to stack */ + if (val > MaxValue) + MaxValue = val; + + p[i] = val; + } +// pDCTstat->DimmTrainFail &= ~(1<<Receiver+Channel); + + return MaxValue; +} + + + +u8 mct_AdjustFinalDQSRcvValue_1Pass(u8 val_1p, u8 val_2p) +{ + return (val_1p & 0xff) + ((val_2p & 0xff)<<8); +} + + +u8 mct_SaveRcvEnDly_D_1Pass(struct DCTStatStruc *pDCTstat, u8 pass) +{ + u8 ret; + ret = 0; + if((pDCTstat->DqsRcvEn_Pass == 0xff) && (pass== FirstPass)) + ret = 2; + return ret; +} + +u8 mct_Average_RcvrEnDly_Pass(struct DCTStatStruc *pDCTstat, + u8 RcvrEnDly, u8 RcvrEnDlyLimit, + u8 Channel, u8 Receiver, u8 Pass) + +{ + return mct_Average_RcvrEnDly_1Pass(pDCTstat, Channel, Receiver, Pass); +} diff --git a/src/northbridge/amd/amdmct/mct/mctsrc2p.c b/src/northbridge/amd/amdmct/mct/mctsrc2p.c new file mode 100644 index 0000000000..5912513053 --- /dev/null +++ b/src/northbridge/amd/amdmct/mct/mctsrc2p.c @@ -0,0 +1,139 @@ +/* + * This file is part of the LinuxBIOS project. + * + * Copyright (C) 2007 Advanced Micro Devices, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + + +u8 mct_checkNumberOfDqsRcvEn_Pass(u8 pass) +{ + return 1; +} + + +u32 SetupDqsPattern_PassA(u8 Pass) +{ + u32 ret; + if(Pass == FirstPass) + ret = (u32) TestPattern1_D; + else + ret = (u32) TestPattern2_D; + + return ret; +} + + +u32 SetupDqsPattern_PassB(u8 Pass) +{ + u32 ret; + if(Pass == FirstPass) + ret = (u32) TestPattern0_D; + else + ret = (u32) TestPattern2_D; + + return ret; +} + + +u8 mct_Get_Start_RcvrEnDly_Pass(struct DCTStatStruc *pDCTstat, + u8 Channel, u8 Receiver, + u8 Pass) +{ + u8 RcvrEnDly; + + if (Pass == FirstPass) + RcvrEnDly = 0; + else { + u8 max = 0; + u8 val; + u8 i; + u8 *p = pDCTstat->CH_D_B_RCVRDLY[Channel][Receiver>>1]; + u8 bn; + bn = 8; +// print_tx("mct_Get_Start_RcvrEnDly_Pass: Channel:", Channel); +// print_tx("mct_Get_Start_RcvrEnDly_Pass: Receiver:", Receiver); + for ( i=0;i<bn; i++) { + val = p[i]; +// print_tx("mct_Get_Start_RcvrEnDly_Pass: i:", i); +// print_tx("mct_Get_Start_RcvrEnDly_Pass: val:", val); + if(val > max) { + max = val; + } + } + RcvrEnDly = max; +// while(1) {; } +// RcvrEnDly += secPassOffset; //FIXME Why + } + + return RcvrEnDly; +} + + + +u8 mct_Average_RcvrEnDly_Pass(struct DCTStatStruc *pDCTstat, + u8 RcvrEnDly, u8 RcvrEnDlyLimit, + u8 Channel, u8 Receiver, u8 Pass) +{ + u8 i; + u8 *p; + u8 *p_1; + u8 val; + u8 val_1; + u8 valid = 1; + u8 bn; + + bn = 8; + + p = pDCTstat->CH_D_B_RCVRDLY[Channel][Receiver>>1]; + + if (Pass == SecondPass) { /* second pass must average values */ + //FIXME: which byte? + p_1 = pDCTstat->B_RCVRDLY_1; +// p_1 = pDCTstat->CH_D_B_RCVRDLY_1[Channel][Receiver>>1]; + for(i=0; i<bn; i++) { + val = p[i]; + /* left edge */ + if (val != (RcvrEnDlyLimit - 1)) { + val -= Pass1MemClkDly; + val_1 = p_1[i]; + val += val_1; + val >>= 1; + p[i] = val; + } else { + valid = 0; + break; + } + } + if (!valid) { + pDCTstat->ErrStatus |= 1<<SB_NORCVREN; + } else { + pDCTstat->DimmTrainFail &= ~(1<<(Receiver + Channel)); + } + } else { + for(i=0; i < bn; i++) { + val = p[i]; + /* Add 1/2 Memlock delay */ + //val += Pass1MemClkDly; + val += 0x5; // NOTE: middle value with DQSRCVEN_SAVED_GOOD_TIMES + //val += 0x02; + p[i] = val; + pDCTstat->DimmTrainFail &= ~(1<<(Receiver + Channel)); + } + } + + return RcvrEnDly; +} diff --git a/src/northbridge/amd/amdmct/mct/mcttmrl.c b/src/northbridge/amd/amdmct/mct/mcttmrl.c new file mode 100644 index 0000000000..f7763d13b8 --- /dev/null +++ b/src/northbridge/amd/amdmct/mct/mcttmrl.c @@ -0,0 +1,413 @@ +/* + * This file is part of the LinuxBIOS project. + * + * Copyright (C) 2007 Advanced Micro Devices, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + + +/* + * Description: Max Read Latency Training feature for DDR 2 MCT + */ + +static u8 CompareMaxRdLatTestPattern_D(u32 pattern_buf, u32 addr); +static u32 GetMaxRdLatTestAddr_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 Channel, + u8 *MaxRcvrEnDly, u8 *valid); +u8 mct_GetStartMaxRdLat_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 Channel, + u8 DQSRcvEnDly, u32 *Margin); +static void maxRdLatencyTrain_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat); +static void mct_setMaxRdLatTrnVal_D(struct DCTStatStruc *pDCTstat, u8 Channel, + u16 MaxRdLatVal); + +/*Warning: These must be located so they do not cross a logical 16-bit + segment boundary!*/ +static const u32 TestMaxRdLAtPattern_D[] = { + 0x6E0E3FAC, 0x0C3CFF52, + 0x4A688181, 0x49C5B613, + 0x7C780BA6, 0x5C1650E3, + 0x0C4F9D76, 0x0C6753E6, + 0x205535A5, 0xBABFB6CA, + 0x610E6E5F, 0x0C5F1C87, + 0x488493CE, 0x14C9C383, + 0xF5B9A5CD, 0x9CE8F615, + + 0xAAD714B5, 0xC38F1B4C, + 0x72ED647C, 0x669F7562, + 0x5233F802, 0x4A898B30, + 0x10A40617, 0x3326B465, + 0x55386E04, 0xC807E3D3, + 0xAB49E193, 0x14B4E63A, + 0x67DF2495, 0xEA517C45, + 0x7624CE51, 0xF8140C51, + + 0x4824BD23, 0xB61DD0C9, + 0x072BCFBE, 0xE8F3807D, + 0x919EA373, 0x25E30C47, + 0xFEB12958, 0x4DA80A5A, + 0xE9A0DDF8, 0x792B0076, + 0xE81C73DC, 0xF025B496, + 0x1DB7E627, 0x808594FE, + 0x82668268, 0x655C7783, +}; + + +static u32 SetupMaxRdPattern(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, + u32 *buffer) +{ + /* 1. Copy the alpha and Beta patterns from ROM to Cache, + * aligning on 16 byte boundary + * 2. Set the ptr to Cacheable copy in DCTStatstruc.PtrPatternBufA + * for Alpha + * 3. Set the ptr to Cacheable copy in DCTStatstruc.PtrPatternBufB + * for Beta + */ + + u32 *buf; + u8 i; + + buf = (u32 *)(((u32)buffer + 0x10) & (0xfffffff0)); + + for(i = 0; i < (16 * 3); i++) { + buf[i] = TestMaxRdLAtPattern_D[i]; + } + + return (u32)buf; + +} + + +void TrainMaxReadLatency_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstatA) +{ + u8 Node; + + for(Node = 0; Node < MAX_NODES_SUPPORTED; Node++) { + struct DCTStatStruc *pDCTstat; + pDCTstat = pDCTstatA + Node; + + if(!pDCTstat->NodePresent) + break; + + if(pDCTstat->DCTSysLimit) + maxRdLatencyTrain_D(pMCTstat, pDCTstat); + } +} + + +static void maxRdLatencyTrain_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat) +{ + u8 Channel; + u32 TestAddr0; + u8 _DisableDramECC = 0, _Wrap32Dis = 0, _SSE2 = 0; + u16 MaxRdLatDly; + u8 RcvrEnDly = 0; + u32 PatternBuffer[60]; // FIXME: why not 48 + 4 + u32 Margin; + u32 addr; + u32 cr4; + u32 lo, hi; + + u8 valid; + u32 pattern_buf; + + cr4 = read_cr4(); + if(cr4 & (1<<9)) { /* save the old value */ + _SSE2 = 1; + } + cr4 |= (1<<9); /* OSFXSR enable SSE2 */ + write_cr4(cr4); + + addr = HWCR; + _RDMSR(addr, &lo, &hi); + if(lo & (1<<17)) { /* save the old value */ + _Wrap32Dis = 1; + } + lo |= (1<<17); /* HWCR.wrap32dis */ + lo &= ~(1<<15); /* SSEDIS */ + /* Setting wrap32dis allows 64-bit memory references in + real mode */ + _WRMSR(addr, lo, hi); + + _DisableDramECC = mct_DisableDimmEccEn_D(pMCTstat, pDCTstat); + + pattern_buf = SetupMaxRdPattern(pMCTstat, pDCTstat, PatternBuffer); + + for (Channel = 0; Channel < 2; Channel++) { + print_debug_dqs("\tMaxRdLatencyTrain51: Channel ",Channel, 1); + pDCTstat->Channel = Channel; + + if( (pDCTstat->Status & (1 << SB_128bitmode)) && Channel) + break; /*if ganged mode, skip DCT 1 */ + + TestAddr0 = GetMaxRdLatTestAddr_D(pMCTstat, pDCTstat, Channel, &RcvrEnDly, &valid); + if(!valid) /* Address not supported on current CS */ + continue; + /* rank 1 of DIMM, testpattern 0 */ + WriteMaxRdLat1CLTestPattern_D(pattern_buf, TestAddr0); + + MaxRdLatDly = mct_GetStartMaxRdLat_D(pMCTstat, pDCTstat, Channel, RcvrEnDly, &Margin); + print_debug_dqs("\tMaxRdLatencyTrain52: MaxRdLatDly start ", MaxRdLatDly, 2); + print_debug_dqs("\tMaxRdLatencyTrain52: MaxRdLatDly Margin ", Margin, 2); + while(MaxRdLatDly < MAX_RD_LAT) { /* sweep Delay value here */ + mct_setMaxRdLatTrnVal_D(pDCTstat, Channel, MaxRdLatDly); + ReadMaxRdLat1CLTestPattern_D(TestAddr0); + if( CompareMaxRdLatTestPattern_D(pattern_buf, TestAddr0) == DQS_PASS) + break; + SetTargetWTIO_D(TestAddr0); + FlushMaxRdLatTestPattern_D(TestAddr0); + ResetTargetWTIO_D(); + MaxRdLatDly++; + } + print_debug_dqs("\tMaxRdLatencyTrain53: MaxRdLatDly end ", MaxRdLatDly, 2); + mct_setMaxRdLatTrnVal_D(pDCTstat, Channel, MaxRdLatDly + Margin); + } + + if(_DisableDramECC) { + mct_EnableDimmEccEn_D(pMCTstat, pDCTstat, _DisableDramECC); + } + + if(!_Wrap32Dis) { + addr = HWCR; + _RDMSR(addr, &lo, &hi); + lo &= ~(1<<17); /* restore HWCR.wrap32dis */ + _WRMSR(addr, lo, hi); + } + if(!_SSE2){ + cr4 = read_cr4(); + cr4 &= ~(1<<9); /* restore cr4.OSFXSR */ + write_cr4(cr4); + } + +#if DQS_TRAIN_DEBUG > 0 + { + u8 Channel; + print_debug("maxRdLatencyTrain: CH_MaxRdLat:\n"); + for(Channel = 0; Channel<2; Channel++) { + print_debug("Channel:"); print_debug_hex8(Channel); + print_debug(": "); + print_debug_hex8( pDCTstat->CH_MaxRdLat[Channel] ); + print_debug("\n"); + } + } +#endif + +} + +static void mct_setMaxRdLatTrnVal_D(struct DCTStatStruc *pDCTstat, + u8 Channel, u16 MaxRdLatVal) +{ + u8 i; + u32 reg; + u32 dev; + u32 val; + + if (pDCTstat->GangedMode) { + Channel = 0; // for safe + for (i=0; i<2; i++) + pDCTstat->CH_MaxRdLat[i] = MaxRdLatVal; + } else { + pDCTstat->CH_MaxRdLat[Channel] = MaxRdLatVal; + } + + dev = pDCTstat->dev_dct; + reg = 0x78 + Channel * 0x100; + val = Get_NB32(dev, reg); + val &= ~(0x3ff<<22); + val |= MaxRdLatVal<<22; + /* program MaxRdLatency to correspond with current delay */ + Set_NB32(dev, reg, val); + +} + + +static u8 CompareMaxRdLatTestPattern_D(u32 pattern_buf, u32 addr) +{ + /* Compare only the first beat of data. Since target addrs are cache + * line aligned, the Channel parameter is used to determine which cache + * QW to compare. + */ + + u32 *test_buf = (u32 *)pattern_buf; + u32 addr_lo; + u32 val, val_test; + int i; + u8 ret = DQS_PASS; + + SetUpperFSbase(addr); + addr_lo = addr<<8; + + _EXECFENCE; + for (i=0; i<(16*3); i++) { + val = read32_fs(addr_lo); + val_test = test_buf[i]; + + print_debug_dqs_pair("\t\t\t\t\t\ttest_buf = ", (u32)test_buf, " value = ", val_test, 5); + print_debug_dqs_pair("\t\t\t\t\t\ttaddr_lo = ", addr_lo, " value = ", val, 5); + if(val != val_test) { + ret = DQS_FAIL; + break; + } + addr_lo += 4; + } + + return ret; +} + +static u32 GetMaxRdLatTestAddr_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, + u8 Channel, u8 *MaxRcvrEnDly, + u8 *valid) +{ + u8 Max = 0; + + u8 Channel_Max = 0; + u8 d; + u8 d_Max = 0; + + u8 Byte; + u32 TestAddr0 = 0; + u8 ch, ch_start, ch_end; + u8 bn; + + bn = 8; + + if(pDCTstat->Status & (1 << SB_128bitmode)) { + ch_start = 0; + ch_end = 2; + } else { + ch_start = Channel; + ch_end = Channel + 1; + } + + *valid = 0; + + for(ch = ch_start; ch < ch_end; ch++) { + for(d=0; d<4; d++) { + for(Byte = 0; Byte<bn; Byte++) { + u8 tmp; + tmp = pDCTstat->CH_D_B_RCVRDLY[ch][d][Byte]; + if(tmp>Max) { + Max = tmp; + Channel_Max = Channel; + d_Max = d; + } + } + } + } + + if(mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel_Max, d_Max << 1)) { + TestAddr0 = mct_GetMCTSysAddr_D(pMCTstat, pDCTstat, Channel_Max, d_Max << 1, valid); + } + + if(*valid) + *MaxRcvrEnDly = Max; + + return TestAddr0; + +} + +u8 mct_GetStartMaxRdLat_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, + u8 Channel, u8 DQSRcvEnDly, u32 *Margin) +{ + u32 SubTotal; + u32 val; + u32 valx; + u32 valxx; + u32 index_reg; + u32 reg_off; + u32 dev; + + if(pDCTstat->GangedMode) + Channel = 0; + + index_reg = 0x98 + 0x100 * Channel; + + reg_off = 0x100 * Channel; + dev = pDCTstat->dev_dct; + + /* Multiply the CAS Latency by two to get a number of 1/2 MEMCLKs units.*/ + val = Get_NB32(dev, 0x88 + reg_off); + SubTotal = ((val & 0x0f) + 1) << 1; /* SubTotal is 1/2 Memclk unit */ + + /* If registered DIMMs are being used then add 1 MEMCLK to the sub-total*/ + val = Get_NB32(dev, 0x90 + reg_off); + if(!(val & (1 << UnBuffDimm))) + SubTotal += 2; + + /*If the address prelaunch is setup for 1/2 MEMCLKs then add 1, + * else add 2 to the sub-total. if (AddrCmdSetup || CsOdtSetup + * || CkeSetup) then K := K + 2; */ + val = Get_NB32_index_wait(dev, index_reg, 0x04); + if(!(val & 0x00202020)) + SubTotal += 1; + else + SubTotal += 2; + + /* If the F2x[1, 0]78[RdPtrInit] field is 4, 5, 6 or 7 MEMCLKs, + * then add 4, 3, 2, or 1 MEMCLKs, respectively to the sub-total. */ + val = Get_NB32(dev, 0x78 + reg_off); + SubTotal += 8 - (val & 0x0f); + + /* Convert bits 7-5 (also referred to as the course delay) of the current + * (or worst case) DQS receiver enable delay to 1/2 MEMCLKs units, + * rounding up, and add this to the sub-total. */ + SubTotal += DQSRcvEnDly >> 5; /*BOZO-no rounding up */ + + SubTotal <<= 1; /*scale 1/2 MemClk to 1/4 MemClk */ + + /* Convert the sub-total (in 1/2 MEMCLKs) to northbridge clocks (NCLKs) + * as follows (assuming DDR400 and assuming that no P-state or link speed + * changes have occurred). */ + + /*New formula: + SubTotal *= 3*(Fn2xD4[NBFid]+4)/(3+Fn2x94[MemClkFreq])/2 */ + val = Get_NB32(dev, 0x94 + reg_off); + /* SubTotal div 4 to scale 1/4 MemClk back to MemClk */ + val &= 7; + if (val == 4) { + val++; /* adjust for DDR2-1066 */ + } + valx = (val + 3) << 2; /* SubTotal div 4 to scale 1/4 MemClk back to MemClk */ + + + val = Get_NB32(pDCTstat->dev_nbmisc, 0xD4); + val = ((val & 0x1f) + 4 ) * 3; + + /* Calculate 1 MemClk + 1 NCLK delay in NCLKs for margin */ + valxx = val << 2; + valxx /= valx; + if (valxx % valx) + valxx++; /* round up */ + valxx++; /* add 1NCLK */ + *Margin = valxx; /* one MemClk delay in NCLKs and one additional NCLK */ + + val *= SubTotal; + + val /= valx; + if (val % valx) + val++; /* round up */ + + + + return val; +} + + diff --git a/src/northbridge/amd/amdmct/wrappers/mcti.h b/src/northbridge/amd/amdmct/wrappers/mcti.h new file mode 100644 index 0000000000..455b7b7c30 --- /dev/null +++ b/src/northbridge/amd/amdmct/wrappers/mcti.h @@ -0,0 +1,59 @@ +/* + * This file is part of the LinuxBIOS project. + * + * Copyright (C) 2007 Advanced Micro Devices, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + + +#define SERVER 0 +#define DESKTOP 1 +//#define MOBILE 2 +#define REV_F 0 +#define REV_DR 1 +#define REV_FDR 2 + + +/*---------------------------------------------------------------------------- +COMMENT OUT ALL BUT 1 +----------------------------------------------------------------------------*/ +//#define BUILD_VERSION REV_F /*BIOS supports rev F only*/ +//#define BUILD_VERSION REV_DR /*BIOS supports rev 10 only*/ +//#define BUILD_VERSION REV_FDR /*BIOS supports both rev F and 10*/ + +/*---------------------------------------------------------------------------- +COMMENT OUT ALL BUT 1 +----------------------------------------------------------------------------*/ +#ifndef SYSTEM_TYPE +#define SYSTEM_TYPE SERVER +//#define SYSTEM_TYPE DESKTOP +//#define SYSTEM_TYPE MOBILE +#endif + +/*---------------------------------------------------------------------------- +COMMENT OUT ALL BUT 1 +----------------------------------------------------------------------------*/ +#define UMA_SUPPORT 0 /*Not supported */ +//#define UMA_SUPPORT 1 /*Supported */ + +/*---------------------------------------------------------------------------- +UPDATE AS NEEDED +----------------------------------------------------------------------------*/ +#define MAX_NODES_SUPPORTED 8 +#define MAX_DIMMS_SUPPORTED 8 +#define MAX_CS_SUPPORTED 8 +#define MCT_TRNG_KEEPOUT_START 0x00000C00 +#define MCT_TRNG_KEEPOUT_END 0x00000CFF + diff --git a/src/northbridge/amd/amdmct/wrappers/mcti_d.c b/src/northbridge/amd/amdmct/wrappers/mcti_d.c new file mode 100644 index 0000000000..41afed52ff --- /dev/null +++ b/src/northbridge/amd/amdmct/wrappers/mcti_d.c @@ -0,0 +1,338 @@ +/* + * This file is part of the LinuxBIOS project. + * + * Copyright (C) 2007 Advanced Micro Devices, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* Call-backs */ + +u16 mctGet_NVbits(u8 index) +{ + u16 val = 0; + + switch (index) { + case NV_PACK_TYPE: +#if SYSTEM_TYPE == SERVER + val = 0; +#elif SYSTEM_TYPE == DESKTOP + val = 1; +//#elif SYSTEM_TYPE == MOBILE +// val = 2; +#endif + break; + case NV_MAX_NODES: + val = MAX_NODES_SUPPORTED; + break; + case NV_MAX_DIMMS: + //val = MAX_DIMMS_SUPPORTED; + val = 8; + break; + case NV_MAX_MEMCLK: + /* Maximum platform supported memclk */ + //val = 200; /* 200MHz(DDR400) */ + //val = 266; /* 266MHz(DDR533) */ + //val = 333; /* 333MHz(DDR667) */ + val = 400; /* 400MHz(DDR800) */ + break; + case NV_ECC_CAP: +#if SYSTEM_TYPE == SERVER + val = 1; /* memory bus ECC capable */ +#else + val = 0; /* memory bus ECC not capable */ +#endif + break; + case NV_4RANKType: + /* Quad Rank DIMM slot type */ + val = 0; /* normal */ + //val = 1; /* R4 (registered DIMMs in AMD server configuration) */ + //val = 2; /* S4 (Unbuffered SO-DIMMS) */ + break; + case NV_BYPMAX: +#if (UMA_SUPPORT == 0) + val = 4; +#elif (UMA_SUPPORT == 1) + val = 7; +#endif + break; + case NV_RDWRQBYP: +#if (UMA_SUPPORT == 0) + val = 2; +#elif (UMA_SUPPORT == 1) + val = 3; +#endif + break; + case NV_MCTUSRTMGMODE: + val = 0; /* Automatic (recommended) */ + //val = 1; /* Limited */ + //val = 2; /* Manual */ + break; + case NV_MemCkVal: + //val = 0; /* 200MHz */ + //val = 1; /* 266MHz */ + val = 2; /* 333MHz */ + break; + case NV_BankIntlv: + /* Bank (chip select) interleaving */ + //val = 0; /* disabled */ + val = 1; /* enabled (recommended) */ + break; + case NV_MemHole: + //val = 0; /* Disabled */ + val = 1; /* Enabled (recommended) */ + break; + case NV_AllMemClks: + val = 0; /* Normal (only to slots that have enabled DIMMs) */ + //val = 1; /* Enable all memclocks */ + break; + case NV_SPDCHK_RESTRT: + val = 0; /* Exit current node initialization if any DIMM has SPD checksum error */ + //val = 1; /* Ignore faulty SPD checksum (DIMM will still be disabled), continue current node intialization */ + break; + case NV_DQSTrainCTL: + //val = 0; /*Skip dqs training */ + val = 1; /* Perform dqs training */ + break; + case NV_NodeIntlv: + val = 0; /* Disabled (recommended) */ + //val = 1; /* Enable */ + break; + case NV_BurstLen32: +#if (UMA_SUPPORT == 0) + val = 0; /* 64 byte mode */ +#elif (UMA_SUPPORT == 1) + val = 1; /* 32 byte mode */ +#endif + break; + case NV_CKE_PDEN: + //val = 0; /* Disable */ + val = 1; /* Enable */ + break; + case NV_CKE_CTL: + val = 0; /* per channel control */ + //val = 1; /* per chip select control */ + break; + case NV_CLKHZAltVidC3: + val = 0; /* disable */ + //val = 1; /* enable */ + break; + case NV_BottomIO: + val = 0xC0; /* address bits [31:24] */ + break; + case NV_BottomUMA: +#if (UMA_SUPPORT == 0) + val = 0xC0; /* address bits [31:24] */ +#elif (UMA_SUPPORT == 1) + val = 0xB0; /* address bits [31:24] */ +#endif + break; + case NV_ECC: +#if (SYSTEM_TYPE == SERVER) + val = 1; /* Enable */ +#else + val = 0; /* Disable */ +#endif + break; + case NV_NBECC: +#if (SYSTEM_TYPE == SERVER) + val = 1; /* Enable */ +#else + val = 0; /* Disable */ +#endif + break; + case NV_ChipKill: +#if (SYSTEM_TYPE == SERVER) + val = 1; /* Enable */ +#else + val = 0; /* Disable */ +#endif + break; + case NV_ECCRedir: + val = 0; /* Disable */ + //val = 1; /* Enable */ + break; + case NV_DramBKScrub: + val = 0x00; /* Disabled */ + //val = 0x01; /* 40ns */ + //val = 0x02; /* 80ns */ + //val = 0x03; /* 160ns */ + //val = 0x04; /* 320ns */ + //val = 0x05; /* 640ns */ + //val = 0x06; /* 1.28us */ + //val = 0x07; /* 2.56us */ + //val = 0x08; /* 5.12us */ + //val = 0x09; /* 10.2us */ + //val = 0x0a; /* 20.5us */ + //val = 0x0b; /* 41us */ + //val = 0x0c; /* 81.9us */ + //val = 0x0d; /* 163.8us */ + //val = 0x0e; /* 327.7us */ + //val = 0x0f; /* 655.4us */ + //val = 0x10; /* 1.31ms */ + //val = 0x11; /* 2.62ms */ + //val = 0x12; /* 5.24ms */ + //val = 0x13; /* 10.49ms */ + //val = 0x14; /* 20.97sms */ + //val = 0x15; /* 42ms */ + //val = 0x16; /* 84ms */ + break; + case NV_L2BKScrub: + val = 0; /* Disabled - See L2Scrub in BKDG */ + break; + case NV_DCBKScrub: + val = 0; /* Disabled - See DcacheScrub in BKDG */ + break; + case NV_CS_SpareCTL: + val = 0; /* Disabled */ + //val = 1; /* Enabled */ + case NV_SyncOnUnEccEn: + val = 0; /* Disabled */ + //val = 1; /* Enabled */ + case NV_Unganged: + /* channel interleave is better performance than ganged mode at this time */ + val = 1; /* Enabled */ + //val = 0; /* Disabled */ + case NV_ChannelIntlv: + val = 5; /* Disabled */ /* Not currently checked in mctchi_d.c */ + /* Bit 0 = 0 - Disable + * 1 - Enable + * Bits[2:1] = 00b - Address bits 6 + * 01b - Address bits 1 + * 10b - Hash*, XOR of address bits [20:16, 6] + * 11b - Hash*, XOR of address bits [20:16, 9] + */ + + } + + return val; +} + + +void mctHookAfterDIMMpre(void) +{ +} + + +void mctGet_MaxLoadFreq(struct DCTStatStruc *pDCTstat) +{ + pDCTstat->PresetmaxFreq = 400; +} + + +void mctAdjustAutoCycTmg(void) +{ +} + +void mctAdjustAutoCycTmg_D(void) +{ +} + + +void mctHookAfterAutoCycTmg(void) +{ +} + + +void mctGetCS_ExcludeMap(void) +{ +} + + +void mctHookAfterAutoCfg(void) +{ +} + + +void mctHookAfterPSCfg(void) +{ +} + + +void mctHookAfterHTMap(void) +{ +} + + +void mctHookAfterCPU(void) +{ +} + + +void mctSaveDQSSigTmg_D(void) +{ +} + + +void mctGetDQSSigTmg_D(void) +{ +} + + +void mctHookBeforeECC(void) +{ +} + + +void mctHookAfterECC(void) +{ +} + + +void mctInitMemGPIOs_A(void) +{ +} + + +void mctInitMemGPIOs_A_D(void) +{ +} + + +void mctNodeIDDebugPort_D(void) +{ +} + + +void mctWarmReset(void) +{ +} + +void mctWarmReset_D(void) +{ +} + + +void mctHookBeforeDramInit(void) +{ +} + + +void mctHookAfterDramInit(void) +{ +} + + +void mctHookBeforeAnyTraining(void) +{ +} + +void mctHookAfterAnyTraining(void) +{ +} + +u32 mctGetLogicalCPUID_D(u8 node) +{ + return mctGetLogicalCPUID(node); +} |