aboutsummaryrefslogtreecommitdiff
path: root/src/northbridge/amd/amdmct/mct/mctsrc.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/northbridge/amd/amdmct/mct/mctsrc.c')
-rw-r--r--src/northbridge/amd/amdmct/mct/mctsrc.c1121
1 files changed, 1121 insertions, 0 deletions
diff --git a/src/northbridge/amd/amdmct/mct/mctsrc.c b/src/northbridge/amd/amdmct/mct/mctsrc.c
new file mode 100644
index 0000000000..c781ffd6b0
--- /dev/null
+++ b/src/northbridge/amd/amdmct/mct/mctsrc.c
@@ -0,0 +1,1121 @@
+/*
+ * This file is part of the LinuxBIOS project.
+ *
+ * Copyright (C) 2007 Advanced Micro Devices, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/******************************************************************************
+ Description: Receiver En and DQS Timing Training feature for DDR 2 MCT
+******************************************************************************/
+
+static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat, u8 Pass);
+static u8 mct_SavePassRcvEnDly_D(struct DCTStatStruc *pDCTstat,
+ u8 rcvrEnDly, u8 Channel,
+ u8 receiver, u8 Pass);
+static u8 mct_CompareTestPatternQW0_D(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat,
+ u32 addr, u8 channel,
+ u8 pattern, u8 Pass);
+static void mct_InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat);
+static void InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat, u8 Channel);
+static void CalcEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat, u8 Channel);
+static void mct_SetFinalRcvrEnDly_D(struct DCTStatStruc *pDCTstat,
+ u8 RcvrEnDly, u8 where,
+ u8 Channel, u8 Receiver,
+ u32 dev, u32 index_reg,
+ u8 Addl_Index, u8 Pass);
+static void CalcMaxLatency_D(struct DCTStatStruc *pDCTstat,
+ u8 DQSRcvrEnDly, u8 Channel);
+static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u8 DQSRcvEnDly);
+static void mct_SetDQSRcvEn_D(struct DCTStatStruc *pDCTstat, u32 val);
+static void fenceDynTraining_D(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat, u8 dct);
+static void mct_DisableDQSRcvEn_D(struct DCTStatStruc *pDCTstat);
+
+
+/* Warning: These must be located so they do not cross a logical 16-bit
+ segment boundary! */
+const static u32 TestPattern0_D[] = {
+ 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
+ 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
+ 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
+ 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
+};
+const static u32 TestPattern1_D[] = {
+ 0x55555555, 0x55555555, 0x55555555, 0x55555555,
+ 0x55555555, 0x55555555, 0x55555555, 0x55555555,
+ 0x55555555, 0x55555555, 0x55555555, 0x55555555,
+ 0x55555555, 0x55555555, 0x55555555, 0x55555555,
+};
+const static u32 TestPattern2_D[] = {
+ 0x12345678, 0x87654321, 0x23456789, 0x98765432,
+ 0x59385824, 0x30496724, 0x24490795, 0x99938733,
+ 0x40385642, 0x38465245, 0x29432163, 0x05067894,
+ 0x12349045, 0x98723467, 0x12387634, 0x34587623,
+};
+
+static void SetupRcvrPattern(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat, u32 *buffer, u8 pass)
+{
+ /*
+ * 1. Copy the alpha and Beta patterns from ROM to Cache,
+ * aligning on 16 byte boundary
+ * 2. Set the ptr to DCTStatstruc.PtrPatternBufA for Alpha
+ * 3. Set the ptr to DCTStatstruc.PtrPatternBufB for Beta
+ */
+
+ u32 *buf_a;
+ u32 *buf_b;
+ u32 *p_A;
+ u32 *p_B;
+ u8 i;
+
+ buf_a = (u32 *)(((u32)buffer + 0x10) & (0xfffffff0));
+ buf_b = buf_a + 32; //??
+ p_A = (u32 *)SetupDqsPattern_1PassB(pass);
+ p_B = (u32 *)SetupDqsPattern_1PassA(pass);
+
+ for(i=0;i<16;i++) {
+ buf_a[i] = p_A[i];
+ buf_b[i] = p_B[i];
+ }
+
+ pDCTstat->PtrPatternBufA = (u32)buf_a;
+ pDCTstat->PtrPatternBufB = (u32)buf_b;
+}
+
+
+void mct_TrainRcvrEn_D(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat, u8 Pass)
+{
+ if(mct_checkNumberOfDqsRcvEn_1Pass(Pass))
+ dqsTrainRcvrEn_SW(pMCTstat, pDCTstat, Pass);
+}
+
+
+static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat, u8 Pass)
+{
+ u8 Channel, RcvrEnDly, RcvrEnDlyRmin;
+ u8 Test0, Test1, CurrTest, CurrTestSide0, CurrTestSide1;
+ u8 CTLRMaxDelay, _2Ranks, PatternA, PatternB;
+ u8 Addl_Index = 0;
+ u8 Receiver;
+ u8 _DisableDramECC = 0, _Wrap32Dis = 0, _SSE2 = 0;
+ u8 RcvrEnDlyLimit, Final_Value, MaxDelay_CH[2];
+ u32 TestAddr0, TestAddr1, TestAddr0B, TestAddr1B;
+ u32 PatternBuffer[64+4]; /* FIXME: need increase 8? */
+ u32 Errors;
+
+ u32 val;
+ u32 reg;
+ u32 dev;
+ u32 index_reg;
+ u32 ch_start, ch_end, ch;
+ u32 msr;
+ u32 cr4;
+ u32 lo, hi;
+
+ u8 valid;
+ u32 tmp;
+ u8 LastTest;
+
+ print_debug_dqs("\nTrainRcvEn: Node", pDCTstat->Node_ID, 0);
+ print_debug_dqs("TrainRcvEn: Pass", Pass, 0);
+
+
+ dev = pDCTstat->dev_dct;
+ ch_start = 0;
+ if(!pDCTstat->GangedMode) {
+ ch_end = 2;
+ } else {
+ ch_end = 1;
+ }
+
+ for (ch = ch_start; ch < ch_end; ch++) {
+ reg = 0x78 + (0x100 * ch);
+ val = Get_NB32(dev, reg);
+ val &= ~(0x3ff << 22);
+ val |= (0x0c8 << 22); /* Max Rd Lat */
+ Set_NB32(dev, reg, val);
+ }
+
+ Final_Value = 1;
+ if (Pass == FirstPass) {
+ mct_InitDQSPos4RcvrEn_D(pMCTstat, pDCTstat);
+ } else {
+ pDCTstat->DimmTrainFail = 0;
+ pDCTstat->CSTrainFail = ~pDCTstat->CSPresent;
+ }
+ print_t("TrainRcvrEn: 1\n");
+
+ cr4 = read_cr4();
+ if(cr4 & ( 1 << 9)) { /* save the old value */
+ _SSE2 = 1;
+ }
+ cr4 |= (1 << 9); /* OSFXSR enable SSE2 */
+ write_cr4(cr4);
+ print_t("TrainRcvrEn: 2\n");
+
+ msr = HWCR;
+ _RDMSR(msr, &lo, &hi);
+ //FIXME: Why use SSEDIS
+ if(lo & (1 << 17)) { /* save the old value */
+ _Wrap32Dis = 1;
+ }
+ lo |= (1 << 17); /* HWCR.wrap32dis */
+ lo &= ~(1 << 15); /* SSEDIS */
+ _WRMSR(msr, lo, hi); /* Setting wrap32dis allows 64-bit memory references in real mode */
+ print_t("TrainRcvrEn: 3\n");
+
+ _DisableDramECC = mct_DisableDimmEccEn_D(pMCTstat, pDCTstat);
+
+
+ if(pDCTstat->Speed == 1) {
+ pDCTstat->T1000 = 5000; /* get the T1000 figure (cycle time (ns)*1K */
+ } else if(pDCTstat->Speed == 2) {
+ pDCTstat->T1000 = 3759;
+ } else if(pDCTstat->Speed == 3) {
+ pDCTstat->T1000 = 3003;
+ } else if(pDCTstat->Speed == 4) {
+ pDCTstat->T1000 = 2500;
+ } else if(pDCTstat->Speed == 5) {
+ pDCTstat->T1000 = 1876;
+ } else {
+ pDCTstat->T1000 = 0;
+ }
+
+ SetupRcvrPattern(pMCTstat, pDCTstat, PatternBuffer, Pass);
+ print_t("TrainRcvrEn: 4\n");
+
+ Errors = 0;
+ dev = pDCTstat->dev_dct;
+ CTLRMaxDelay = 0;
+
+ for (Channel = 0; Channel < 2; Channel++) {
+ print_debug_dqs("\tTrainRcvEn51: Node ", pDCTstat->Node_ID, 1);
+ print_debug_dqs("\tTrainRcvEn51: Channel ", Channel, 1);
+ pDCTstat->Channel = Channel;
+
+ MaxDelay_CH[Channel] = 0;
+ index_reg = 0x98 + 0x100 * Channel;
+
+ Receiver = mct_InitReceiver_D(pDCTstat, Channel);
+ /* There are four receiver pairs, loosely associated with chipselects. */
+ for (; Receiver < 8; Receiver += 2) {
+ Addl_Index = (Receiver >> 1) * 3 + 0x10;
+ LastTest = DQS_FAIL;
+
+ /* mct_ModifyIndex_D */
+ RcvrEnDlyRmin = RcvrEnDlyLimit = 0xff;
+
+ print_debug_dqs("\t\tTrainRcvEnd52: index ", Addl_Index, 2);
+
+ if(!mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, Receiver)) {
+ print_t("\t\t\tRank not enabled_D\n");
+ continue;
+ }
+
+ TestAddr0 = mct_GetRcvrSysAddr_D(pMCTstat, pDCTstat, Channel, Receiver, &valid);
+ if(!valid) { /* Address not supported on current CS */
+ print_t("\t\t\tAddress not supported on current CS\n");
+ continue;
+ }
+
+ TestAddr0B = TestAddr0 + (BigPagex8_RJ8 << 3);
+
+ if(mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, Receiver+1)) {
+ TestAddr1 = mct_GetRcvrSysAddr_D(pMCTstat, pDCTstat, Channel, Receiver+1, &valid);
+ if(!valid) { /* Address not supported on current CS */
+ print_t("\t\t\tAddress not supported on current CS+1\n");
+ continue;
+ }
+ TestAddr1B = TestAddr1 + (BigPagex8_RJ8 << 3);
+ _2Ranks = 1;
+ } else {
+ _2Ranks = TestAddr1 = TestAddr1B = 0;
+ }
+
+ print_debug_dqs("\t\tTrainRcvEn53: TestAddr0 ", TestAddr0, 2);
+ print_debug_dqs("\t\tTrainRcvEn53: TestAddr0B ", TestAddr0B, 2);
+ print_debug_dqs("\t\tTrainRcvEn53: TestAddr1 ", TestAddr1, 2);
+ print_debug_dqs("\t\tTrainRcvEn53: TestAddr1B ", TestAddr1B, 2);
+
+ /*
+ * Get starting RcvrEnDly value
+ */
+ RcvrEnDly = mct_Get_Start_RcvrEnDly_1Pass(Pass);
+
+ /* mct_GetInitFlag_D*/
+ if (Pass == FirstPass) {
+ pDCTstat->DqsRcvEn_Pass = 0;
+ } else {
+ pDCTstat->DqsRcvEn_Pass=0xFF;
+ }
+ pDCTstat->DqsRcvEn_Saved = 0;
+
+
+ while(RcvrEnDly < RcvrEnDlyLimit) { /* sweep Delay value here */
+ print_debug_dqs("\t\t\tTrainRcvEn541: RcvrEnDly ", RcvrEnDly, 3);
+
+ /* callback not required
+ if(mct_AdjustDelay_D(pDCTstat, RcvrEnDly))
+ goto skipDly;
+ */
+
+ /* Odd steps get another pattern such that even
+ and odd steps alternate. The pointers to the
+ patterns will be swaped at the end of the loop
+ so that they correspond. */
+ if(RcvrEnDly & 1) {
+ PatternA = 1;
+ PatternB = 0;
+ } else {
+ /* Even step */
+ PatternA = 0;
+ PatternB = 1;
+ }
+
+ mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0, PatternA); /* rank 0 of DIMM, testpattern 0 */
+ mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0B, PatternB); /* rank 0 of DIMM, testpattern 1 */
+ if(_2Ranks) {
+ mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1, PatternA); /*rank 1 of DIMM, testpattern 0 */
+ mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1B, PatternB); /*rank 1 of DIMM, testpattern 1 */
+ }
+
+ mct_SetRcvrEnDly_D(pDCTstat, RcvrEnDly, 0, Channel, Receiver, dev, index_reg, Addl_Index, Pass);
+
+ CurrTest = DQS_FAIL;
+ CurrTestSide0 = DQS_FAIL;
+ CurrTestSide1 = DQS_FAIL;
+
+ mct_Read1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0); /*cache fills */
+ Test0 = mct_CompareTestPatternQW0_D(pMCTstat, pDCTstat, TestAddr0, Channel, PatternA, Pass);/* ROM vs cache compare */
+ proc_IOCLFLUSH_D(TestAddr0);
+ ResetDCTWrPtr_D(dev, index_reg, Addl_Index);
+
+ print_debug_dqs("\t\t\tTrainRcvEn542: Test0 result ", Test0, 3);
+
+ // != 0x00 mean pass
+
+ if(Test0 == DQS_PASS) {
+ mct_Read1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0B); /*cache fills */
+ /* ROM vs cache compare */
+ Test1 = mct_CompareTestPatternQW0_D(pMCTstat, pDCTstat, TestAddr0B, Channel, PatternB, Pass);
+ proc_IOCLFLUSH_D(TestAddr0B);
+ ResetDCTWrPtr_D(dev, index_reg, Addl_Index);
+
+ print_debug_dqs("\t\t\tTrainRcvEn543: Test1 result ", Test1, 3);
+
+ if(Test1 == DQS_PASS) {
+ CurrTestSide0 = DQS_PASS;
+ }
+ }
+ if(_2Ranks) {
+ mct_Read1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1); /*cache fills */
+ /* ROM vs cache compare */
+ Test0 = mct_CompareTestPatternQW0_D(pMCTstat, pDCTstat, TestAddr1, Channel, PatternA, Pass);
+ proc_IOCLFLUSH_D(TestAddr1);
+ ResetDCTWrPtr_D(dev, index_reg, Addl_Index);
+
+ print_debug_dqs("\t\t\tTrainRcvEn544: Test0 result ", Test0, 3);
+
+ if(Test0 == DQS_PASS) {
+ mct_Read1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1B); /*cache fills */
+ /* ROM vs cache compare */
+ Test1 = mct_CompareTestPatternQW0_D(pMCTstat, pDCTstat, TestAddr1B, Channel, PatternB, Pass);
+ proc_IOCLFLUSH_D(TestAddr1B);
+ ResetDCTWrPtr_D(dev, index_reg, Addl_Index);
+
+ print_debug_dqs("\t\t\tTrainRcvEn545: Test1 result ", Test1, 3);
+ if(Test1 == DQS_PASS) {
+ CurrTestSide1 = DQS_PASS;
+ }
+ }
+ }
+
+ if(_2Ranks) {
+ if ((CurrTestSide0 == DQS_PASS) && (CurrTestSide1 == DQS_PASS)) {
+ CurrTest = DQS_PASS;
+ }
+ } else if (CurrTestSide0 == DQS_PASS) {
+ CurrTest = DQS_PASS;
+ }
+
+
+ /* record first pass DqsRcvEn to stack */
+ valid = mct_SavePassRcvEnDly_D(pDCTstat, RcvrEnDly, Channel, Receiver, Pass);
+
+ /* Break(1:RevF,2:DR) or not(0) FIXME: This comment deosn't make sense */
+ if(valid == 2 || (LastTest == DQS_FAIL && valid == 1)) {
+ RcvrEnDlyRmin = RcvrEnDly;
+ break;
+ }
+
+ LastTest = CurrTest;
+
+ /* swap the rank 0 pointers */
+ tmp = TestAddr0;
+ TestAddr0 = TestAddr0B;
+ TestAddr0B = tmp;
+
+ /* swap the rank 1 pointers */
+ tmp = TestAddr1;
+ TestAddr1 = TestAddr1B;
+ TestAddr1B = tmp;
+
+ print_debug_dqs("\t\t\tTrainRcvEn56: RcvrEnDly ", RcvrEnDly, 3);
+
+ RcvrEnDly++;
+
+ } /* while RcvrEnDly */
+
+ print_debug_dqs("\t\tTrainRcvEn61: RcvrEnDly ", RcvrEnDly, 2);
+ print_debug_dqs("\t\tTrainRcvEn61: RcvrEnDlyRmin ", RcvrEnDlyRmin, 3);
+ print_debug_dqs("\t\tTrainRcvEn61: RcvrEnDlyLimit ", RcvrEnDlyLimit, 3);
+ if(RcvrEnDlyRmin == RcvrEnDlyLimit) {
+ /* no passing window */
+ pDCTstat->ErrStatus |= 1 << SB_NORCVREN;
+ Errors |= 1 << SB_NORCVREN;
+ pDCTstat->ErrCode = SC_FatalErr;
+ }
+
+ if(RcvrEnDly > (RcvrEnDlyLimit - 1)) {
+ /* passing window too narrow, too far delayed*/
+ pDCTstat->ErrStatus |= 1 << SB_SmallRCVR;
+ Errors |= 1 << SB_SmallRCVR;
+ pDCTstat->ErrCode = SC_FatalErr;
+ RcvrEnDly = RcvrEnDlyLimit - 1;
+ pDCTstat->CSTrainFail |= 1 << Receiver;
+ pDCTstat->DimmTrainFail |= 1 << (Receiver + Channel);
+ }
+
+ // CHB_D0_B0_RCVRDLY set in mct_Average_RcvrEnDly_Pass
+ mct_Average_RcvrEnDly_Pass(pDCTstat, RcvrEnDly, RcvrEnDlyLimit, Channel, Receiver, Pass);
+
+ mct_SetFinalRcvrEnDly_D(pDCTstat, RcvrEnDly, Final_Value, Channel, Receiver, dev, index_reg, Addl_Index, Pass);
+
+ if(pDCTstat->ErrStatus & (1 << SB_SmallRCVR)) {
+ Errors |= 1 << SB_SmallRCVR;
+ }
+
+ RcvrEnDly += Pass1MemClkDly;
+ if(RcvrEnDly > CTLRMaxDelay) {
+ CTLRMaxDelay = RcvrEnDly;
+ }
+
+ } /* while Receiver */
+
+ MaxDelay_CH[Channel] = CTLRMaxDelay;
+ } /* for Channel */
+
+ CTLRMaxDelay = MaxDelay_CH[0];
+ if (MaxDelay_CH[1] > CTLRMaxDelay)
+ CTLRMaxDelay = MaxDelay_CH[1];
+
+ for (Channel = 0; Channel < 2; Channel++) {
+ mct_SetMaxLatency_D(pDCTstat, Channel, CTLRMaxDelay); /* program Ch A/B MaxAsyncLat to correspond with max delay */
+ }
+
+ ResetDCTWrPtr_D(dev, index_reg, Addl_Index);
+
+ if(_DisableDramECC) {
+ mct_EnableDimmEccEn_D(pMCTstat, pDCTstat, _DisableDramECC);
+ }
+
+ if (Pass == FirstPass) {
+ /*Disable DQSRcvrEn training mode */
+ print_t("TrainRcvrEn: mct_DisableDQSRcvEn_D\n");
+ mct_DisableDQSRcvEn_D(pDCTstat);
+ }
+
+ if(!_Wrap32Dis) {
+ msr = HWCR;
+ _RDMSR(msr, &lo, &hi);
+ lo &= ~(1<<17); /* restore HWCR.wrap32dis */
+ _WRMSR(msr, lo, hi);
+ }
+ if(!_SSE2){
+ cr4 = read_cr4();
+ cr4 &= ~(1<<9); /* restore cr4.OSFXSR */
+ write_cr4(cr4);
+ }
+
+#if DQS_TRAIN_DEBUG > 0
+ {
+ u8 Channel;
+ print_debug("TrainRcvrEn: CH_MaxRdLat:\n");
+ for(Channel = 0; Channel<2; Channel++) {
+ print_debug("Channel:"); print_debug_hex8(Channel);
+ print_debug(": ");
+ print_debug_hex8( pDCTstat->CH_MaxRdLat[Channel] );
+ print_debug("\n");
+ }
+ }
+#endif
+
+#if DQS_TRAIN_DEBUG > 0
+ {
+ u8 val;
+ u8 Channel, Receiver;
+ u8 i;
+ u8 *p;
+
+ print_debug("TrainRcvrEn: CH_D_B_RCVRDLY:\n");
+ for(Channel = 0; Channel < 2; Channel++) {
+ print_debug("Channel:"); print_debug_hex8(Channel); print_debug("\n");
+ for(Receiver = 0; Receiver<8; Receiver+=2) {
+ print_debug("\t\tReceiver:");
+ print_debug_hex8(Receiver);
+ p = pDCTstat->CH_D_B_RCVRDLY[Channel][Receiver>>1];
+ print_debug(": ");
+ for (i=0;i<8; i++) {
+ val = p[i];
+ print_debug_hex8(val);
+ print_debug(" ");
+ }
+ print_debug("\n");
+ }
+ }
+ }
+#endif
+
+ print_tx("TrainRcvrEn: Status ", pDCTstat->Status);
+ print_tx("TrainRcvrEn: ErrStatus ", pDCTstat->ErrStatus);
+ print_tx("TrainRcvrEn: ErrCode ", pDCTstat->ErrCode);
+ print_t("TrainRcvrEn: Done\n");
+}
+
+
+static u8 mct_InitReceiver_D(struct DCTStatStruc *pDCTstat, u8 dct)
+{
+ if (pDCTstat->DIMMValidDCT[dct] == 0 ) {
+ return 8;
+ } else {
+ return 0;
+ }
+}
+
+
+static void mct_SetFinalRcvrEnDly_D(struct DCTStatStruc *pDCTstat, u8 RcvrEnDly, u8 where, u8 Channel, u8 Receiver, u32 dev, u32 index_reg, u8 Addl_Index, u8 Pass/*, u8 *p*/)
+{
+ /*
+ * Program final DqsRcvEnDly to additional index for DQS receiver
+ * enabled delay
+ */
+ mct_SetRcvrEnDly_D(pDCTstat, RcvrEnDly, where, Channel, Receiver, dev, index_reg, Addl_Index, Pass);
+}
+
+
+static void mct_DisableDQSRcvEn_D(struct DCTStatStruc *pDCTstat)
+{
+ u8 ch_end, ch;
+ u32 reg;
+ u32 dev;
+ u32 val;
+
+ dev = pDCTstat->dev_dct;
+ if (pDCTstat->GangedMode) {
+ ch_end = 1;
+ } else {
+ ch_end = 2;
+ }
+
+ for (ch=0; ch<ch_end; ch++) {
+ reg = 0x78 + 0x100 * ch;
+ val = Get_NB32(dev, reg);
+ val &= ~(1 << DqsRcvEnTrain);
+ Set_NB32(dev, reg, val);
+ }
+}
+
+
+/* mct_ModifyIndex_D
+ * Function only used once so it was inlined.
+ */
+
+
+/* mct_GetInitFlag_D
+ * Function only used once so it was inlined.
+ */
+
+
+void mct_SetRcvrEnDly_D(struct DCTStatStruc *pDCTstat, u8 RcvrEnDly,
+ u8 FinalValue, u8 Channel, u8 Receiver, u32 dev,
+ u32 index_reg, u8 Addl_Index, u8 Pass)
+{
+ u32 index;
+ u8 i;
+ u8 *p;
+ u32 val;
+
+ if(RcvrEnDly == 0xFE) {
+ /*set the boudary flag */
+ pDCTstat->Status |= 1 << SB_DQSRcvLimit;
+ }
+
+ /* DimmOffset not needed for CH_D_B_RCVRDLY array */
+
+
+ for(i=0; i < 8; i++) {
+ if(FinalValue) {
+ /*calculate dimm offset */
+ p = pDCTstat->CH_D_B_RCVRDLY[Channel][Receiver >> 1];
+ RcvrEnDly = p[i];
+ }
+
+ /* if flag=0, set DqsRcvEn value to reg. */
+ /* get the register index from table */
+ index = Table_DQSRcvEn_Offset[i >> 1];
+ index += Addl_Index; /* DIMMx DqsRcvEn byte0 */
+ val = Get_NB32_index_wait(dev, index_reg, index);
+ if(i & 1) {
+ /* odd byte lane */
+ val &= ~(0xFF << 16);
+ val |= (RcvrEnDly << 16);
+ } else {
+ /* even byte lane */
+ val &= ~0xFF;
+ val |= RcvrEnDly;
+ }
+ Set_NB32_index_wait(dev, index_reg, index, val);
+ }
+
+}
+
+static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u8 DQSRcvEnDly)
+{
+ u32 dev;
+ u32 reg;
+ u16 SubTotal;
+ u32 index_reg;
+ u32 reg_off;
+ u32 val;
+ u32 valx;
+
+ if(pDCTstat->GangedMode)
+ Channel = 0;
+
+ dev = pDCTstat->dev_dct;
+ reg_off = 0x100 * Channel;
+ index_reg = 0x98 + reg_off;
+
+ /* Multiply the CAS Latency by two to get a number of 1/2 MEMCLKs units.*/
+ val = Get_NB32(dev, 0x88 + reg_off);
+ SubTotal = ((val & 0x0f) + 1) << 1; /* SubTotal is 1/2 Memclk unit */
+
+ /* If registered DIMMs are being used then
+ * add 1 MEMCLK to the sub-total.
+ */
+ val = Get_NB32(dev, 0x90 + reg_off);
+ if(!(val & (1 << UnBuffDimm)))
+ SubTotal += 2;
+
+ /* If the address prelaunch is setup for 1/2 MEMCLKs then
+ * add 1, else add 2 to the sub-total.
+ * if (AddrCmdSetup || CsOdtSetup || CkeSetup) then K := K + 2;
+ */
+ val = Get_NB32_index_wait(dev, index_reg, 0x04);
+ if(!(val & 0x00202020))
+ SubTotal += 1;
+ else
+ SubTotal += 2;
+
+ /* If the F2x[1, 0]78[RdPtrInit] field is 4, 5, 6 or 7 MEMCLKs,
+ * then add 4, 3, 2, or 1 MEMCLKs, respectively to the sub-total. */
+ val = Get_NB32(dev, 0x78 + reg_off);
+ SubTotal += 8 - (val & 0x0f);
+
+ /* Convert bits 7-5 (also referred to as the course delay) of
+ * the current (or worst case) DQS receiver enable delay to
+ * 1/2 MEMCLKs units, rounding up, and add this to the sub-total.
+ */
+ SubTotal += DQSRcvEnDly >> 5; /*BOZO-no rounding up */
+
+ /* Add 5.5 to the sub-total. 5.5 represents part of the
+ * processor specific constant delay value in the DRAM
+ * clock domain.
+ */
+ SubTotal <<= 1; /*scale 1/2 MemClk to 1/4 MemClk */
+ SubTotal += 11; /*add 5.5 1/2MemClk */
+
+ /* Convert the sub-total (in 1/2 MEMCLKs) to northbridge
+ * clocks (NCLKs) as follows (assuming DDR400 and assuming
+ * that no P-state or link speed changes have occurred).
+ */
+
+ /* New formula:
+ * SubTotal *= 3*(Fn2xD4[NBFid]+4)/(3+Fn2x94[MemClkFreq])/2 */
+ val = Get_NB32(dev, 0x94 + reg_off);
+
+ /* SubTotal div 4 to scale 1/4 MemClk back to MemClk */
+ val &= 7;
+ if (val == 4) {
+ val++; /* adjust for DDR2-1066 */
+ }
+ valx = (val + 3) << 2;
+
+ val = Get_NB32(pDCTstat->dev_nbmisc, 0xD4);
+ SubTotal *= ((val & 0x1f) + 4 ) * 3;
+
+ SubTotal /= valx;
+ if (SubTotal % valx) { /* round up */
+ SubTotal++;
+ }
+
+ /* Add 5 NCLKs to the sub-total. 5 represents part of the
+ * processor specific constant value in the northbridge
+ * clock domain.
+ */
+ SubTotal += 5;
+
+ pDCTstat->CH_MaxRdLat[Channel] = SubTotal;
+ if(pDCTstat->GangedMode) {
+ pDCTstat->CH_MaxRdLat[1] = SubTotal;
+ }
+
+ /* Program the F2x[1, 0]78[MaxRdLatency] register with
+ * the total delay value (in NCLKs).
+ */
+
+ reg = 0x78 + reg_off;
+ val = Get_NB32(dev, reg);
+ val &= ~(0x3ff << 22);
+ val |= (SubTotal & 0x3ff) << 22;
+
+ /* program MaxRdLatency to correspond with current delay */
+ Set_NB32(dev, reg, val);
+}
+
+
+static u8 mct_SavePassRcvEnDly_D(struct DCTStatStruc *pDCTstat,
+ u8 rcvrEnDly, u8 Channel,
+ u8 receiver, u8 Pass)
+{
+ u8 i;
+ u8 mask_Saved, mask_Pass;
+ u8 *p;
+
+ /* calculate dimm offset
+ * not needed for CH_D_B_RCVRDLY array
+ */
+
+ /* cmp if there has new DqsRcvEnDly to be recorded */
+ mask_Pass = pDCTstat->DqsRcvEn_Pass;
+
+ if(Pass == SecondPass) {
+ mask_Pass = ~mask_Pass;
+ }
+
+ mask_Saved = pDCTstat->DqsRcvEn_Saved;
+ if(mask_Pass != mask_Saved) {
+
+ /* find desired stack offset according to channel/dimm/byte */
+ if(Pass == SecondPass) {
+ // FIXME: SecondPass is never used for Barcelona p = pDCTstat->CH_D_B_RCVRDLY_1[Channel][receiver>>1];
+ p = 0; // Keep the compiler happy.
+ } else {
+ mask_Saved &= mask_Pass;
+ p = pDCTstat->CH_D_B_RCVRDLY[Channel][receiver>>1];
+ }
+ for(i=0; i < 8; i++) {
+ /* cmp per byte lane */
+ if(mask_Pass & (1 << i)) {
+ if(!(mask_Saved & (1 << i))) {
+ /* save RcvEnDly to stack, according to
+ the related Dimm/byte lane */
+ p[i] = (u8)rcvrEnDly;
+ mask_Saved |= 1 << i;
+ }
+ }
+ }
+ pDCTstat->DqsRcvEn_Saved = mask_Saved;
+ }
+ return mct_SaveRcvEnDly_D_1Pass(pDCTstat, Pass);
+}
+
+
+static u8 mct_CompareTestPatternQW0_D(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat,
+ u32 addr, u8 channel,
+ u8 pattern, u8 Pass)
+{
+ /* Compare only the first beat of data. Since target addrs are cache
+ * line aligned, the Channel parameter is used to determine which
+ * cache QW to compare.
+ */
+
+ u8 *test_buf;
+ u8 i;
+ u8 result;
+ u8 *addr_lo_buf;
+
+ SetUpperFSbase(addr); // needed?
+
+ if(Pass == FirstPass) {
+ if(pattern==1) {
+ test_buf = (u8 *)TestPattern1_D;
+ } else {
+ test_buf = (u8 *)TestPattern0_D;
+ }
+ } else { // Second Pass
+ test_buf = (u8 *)TestPattern2_D;
+ }
+
+ addr_lo_buf = (u8 *) (addr << 8);
+ result = DQS_FAIL;
+
+ if((pDCTstat->Status & (1<<SB_128bitmode)) && channel ) {
+ addr_lo_buf += 8; /* second channel */
+ test_buf += 8;
+ }
+
+
+#if DQS_TRAIN_DEBUG > 4
+ print_debug("\t\t\t\t\t\tQW0 : test_buf = ");
+ print_debug_hex32((unsigned)test_buf);
+ print_debug(": ");
+ for (i=0; i<8; i++) {
+ print_debug_hex8(test_buf[i]); print_debug(" ");
+ }
+ print_debug("\n");
+
+ print_debug("\t\t\t\t\t\tQW0 : addr_lo_buf = ");
+ print_debug_hex32((unsigned)addr_lo_buf);
+ print_debug(": ");
+ for (i=0; i<8; i++) {
+ print_debug_hex8(addr_lo_buf[i]); print_debug(" ");
+ }
+ print_debug("\n");
+#endif
+
+ /* prevent speculative execution of following instructions */
+ _EXECFENCE;
+
+ for (i=0; i<8; i++) {
+ if(addr_lo_buf[i] == test_buf[i]) {
+ pDCTstat->DqsRcvEn_Pass |= (1<<i);
+ } else {
+ pDCTstat->DqsRcvEn_Pass &= ~(1<<i);
+ }
+ }
+
+
+ if (Pass == FirstPass) {
+ /* if first pass, at least one byte lane pass
+ * ,then DQS_PASS=1 and will set to related reg.
+ */
+ if(pDCTstat->DqsRcvEn_Pass != 0) {
+ result = DQS_PASS;
+ } else {
+ result = DQS_FAIL;
+ }
+
+ } else {
+ /* if second pass, at least one byte lane fail
+ * ,then DQS_FAIL=1 and will set to related reg.
+ */
+ if(pDCTstat->DqsRcvEn_Pass != 0xFF) {
+ result = DQS_FAIL;
+ } else {
+ result = DQS_PASS;
+ }
+ }
+
+ /* if second pass, we can't find the fail until FFh,
+ * then let it fail to save the final delay
+ */
+ if((Pass == SecondPass) && (pDCTstat->Status & (1 << SB_DQSRcvLimit))) {
+ result = DQS_FAIL;
+ pDCTstat->DqsRcvEn_Pass = 0;
+ }
+
+ /* second pass needs to be inverted
+ * FIXME? this could be inverted in the above code to start with...
+ */
+ if(Pass == SecondPass) {
+ if (result == DQS_PASS) {
+ result = DQS_FAIL;
+ } else if (result == DQS_FAIL) { /* FIXME: doesn't need to be else if */
+ result = DQS_PASS;
+ }
+ }
+
+
+ return result;
+}
+
+
+
+static void mct_InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat)
+{
+ /* Initialize the DQS Positions in preparation for
+ * Reciever Enable Training.
+ * Write Position is 1/2 Memclock Delay
+ * Read Position is 1/2 Memclock Delay
+ */
+ u8 i;
+ for(i=0;i<2; i++){
+ InitDQSPos4RcvrEn_D(pMCTstat, pDCTstat, i);
+ }
+}
+
+
+static void InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat, u8 Channel)
+{
+ /* Initialize the DQS Positions in preparation for
+ * Reciever Enable Training.
+ * Write Position is no Delay
+ * Read Position is 1/2 Memclock Delay
+ */
+
+ u8 i, j;
+ u32 dword;
+ u8 dn = 2; // TODO: Rev C could be 4
+ u32 dev = pDCTstat->dev_dct;
+ u32 index_reg = 0x98 + 0x100 * Channel;
+
+
+ // FIXME: add Cx support
+ dword = 0x00000000;
+ for(i=1; i<=3; i++) {
+ for(j=0; j<dn; j++)
+ /* DIMM0 Write Data Timing Low */
+ /* DIMM0 Write ECC Timing */
+ Set_NB32_index_wait(dev, index_reg, i + 0x100 * j, dword);
+ }
+
+ /* errata #180 */
+ dword = 0x2f2f2f2f;
+ for(i=5; i<=6; i++) {
+ for(j=0; j<dn; j++)
+ /* DIMM0 Read DQS Timing Control Low */
+ Set_NB32_index_wait(dev, index_reg, i + 0x100 * j, dword);
+ }
+
+ dword = 0x0000002f;
+ for(j=0; j<dn; j++)
+ /* DIMM0 Read DQS ECC Timing Control */
+ Set_NB32_index_wait(dev, index_reg, 7 + 0x100 * j, dword);
+}
+
+
+void SetEccDQSRcvrEn_D(struct DCTStatStruc *pDCTstat, u8 Channel)
+{
+ u32 dev;
+ u32 index_reg;
+ u32 index;
+ u8 ChipSel;
+ u8 *p;
+ u32 val;
+
+ dev = pDCTstat->dev_dct;
+ index_reg = 0x98 + Channel * 0x100;
+ index = 0x12;
+ p = pDCTstat->CH_D_BC_RCVRDLY[Channel];
+ print_debug_dqs("\t\tSetEccDQSRcvrPos: Channel ", Channel, 2);
+ for(ChipSel = 0; ChipSel < MAX_CS_SUPPORTED; ChipSel += 2) {
+ val = p[ChipSel>>1];
+ Set_NB32_index_wait(dev, index_reg, index, val);
+ print_debug_dqs_pair("\t\tSetEccDQSRcvrPos: ChipSel ",
+ ChipSel, " rcvr_delay ", val, 2);
+ index += 3;
+ }
+}
+
+
+static void CalcEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat, u8 Channel)
+{
+ u8 ChipSel;
+ u16 EccDQSLike;
+ u8 EccDQSScale;
+ u32 val, val0, val1;
+
+ EccDQSLike = pDCTstat->CH_EccDQSLike[Channel];
+ EccDQSScale = pDCTstat->CH_EccDQSScale[Channel];
+
+ for (ChipSel = 0; ChipSel < MAX_CS_SUPPORTED; ChipSel += 2) {
+ if(mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, ChipSel)) {
+ u8 *p;
+ p = pDCTstat->CH_D_B_RCVRDLY[Channel][ChipSel>>1];
+
+ /* DQS Delay Value of Data Bytelane
+ * most like ECC byte lane */
+ val0 = p[EccDQSLike & 0x07];
+ /* DQS Delay Value of Data Bytelane
+ * 2nd most like ECC byte lane */
+ val1 = p[(EccDQSLike>>8) & 0x07];
+
+ if(val0 > val1) {
+ val = val0 - val1;
+ } else {
+ val = val1 - val0;
+ }
+
+ val *= ~EccDQSScale;
+ val >>= 8; // /256
+
+ if(val0 > val1) {
+ val -= val1;
+ } else {
+ val += val0;
+ }
+
+ pDCTstat->CH_D_BC_RCVRDLY[Channel][ChipSel>>1] = val;
+ }
+ }
+ SetEccDQSRcvrEn_D(pDCTstat, Channel);
+}
+
+void mctSetEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstatA)
+{
+ u8 Node;
+ u8 i;
+
+ for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) {
+ struct DCTStatStruc *pDCTstat;
+ pDCTstat = pDCTstatA + Node;
+ if (!pDCTstat->NodePresent)
+ break;
+ if (pDCTstat->DCTSysLimit) {
+ for(i=0; i<2; i++)
+ CalcEccDQSRcvrEn_D(pMCTstat, pDCTstat, i);
+ }
+ }
+}
+
+
+void phyAssistedMemFnceTraining(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstatA)
+{
+
+ u8 Node = 0;
+ struct DCTStatStruc *pDCTstat;
+
+ // FIXME: skip for Ax
+ while (Node < MAX_NODES_SUPPORTED) {
+ pDCTstat = pDCTstatA + Node;
+
+ if(pDCTstat->DCTSysLimit) {
+ fenceDynTraining_D(pMCTstat, pDCTstat, 0);
+ fenceDynTraining_D(pMCTstat, pDCTstat, 1);
+ }
+ Node++;
+ }
+}
+
+
+static void fenceDynTraining_D(struct MCTStatStruc *pMCTstat,
+ struct DCTStatStruc *pDCTstat, u8 dct)
+{
+ u16 avRecValue;
+ u32 val;
+ u32 dev;
+ u32 index_reg = 0x98 + 0x100 * dct;
+ u32 index;
+
+ /* BIOS first programs a seed value to the phase recovery engine
+ * (recommended 19) registers.
+ * Dram Phase Recovery Control Register (F2x[1,0]9C_x[51:50] and
+ * F2x[1,0]9C_x52.) .
+ */
+
+ dev = pDCTstat->dev_dct;
+ for (index = 0x50; index <= 0x52; index ++) {
+ val = Get_NB32_index_wait(dev, index_reg, index);
+ val |= (FenceTrnFinDlySeed & 0x1F);
+ if (index != 0x52) {
+ val &= ~(0xFF << 8);
+ val |= (val & 0xFF) << 8;
+ val &= 0xFFFF;
+ val |= val << 16;
+ }
+ Set_NB32_index_wait(dev, index_reg, index, val);
+ }
+
+
+ /* Set F2x[1,0]9C_x08[PhyFenceTrEn]=1. */
+ val = Get_NB32_index_wait(dev, index_reg, 0x08);
+ val |= 1 << PhyFenceTrEn;
+ Set_NB32_index_wait(dev, index_reg, 0x08, val);
+
+ /* Wait 200 MEMCLKs. */
+ mct_Wait_10ns (20000); /* wait 200us */
+
+ /* Clear F2x[1,0]9C_x08[PhyFenceTrEn]=0. */
+ val = Get_NB32_index_wait(dev, index_reg, 0x08);
+ val &= ~(1 << PhyFenceTrEn);
+ Set_NB32_index_wait(dev, index_reg, 0x08, val);
+
+ /* BIOS reads the phase recovery engine registers
+ * F2x[1,0]9C_x[51:50] and F2x[1,0]9C_x52. */
+ avRecValue = 0;
+ for (index = 0x50; index <= 0x52; index ++) {
+ val = Get_NB32_index_wait(dev, index_reg, index);
+ avRecValue += val & 0x7F;
+ if (index != 0x52) {
+ avRecValue += (val >> 8) & 0x7F;
+ avRecValue += (val >> 16) & 0x7F;
+ avRecValue += (val >> 24) & 0x7F;
+ }
+ }
+
+ val = avRecValue / 9;
+ if (avRecValue % 9)
+ val++;
+ avRecValue = val;
+
+ /* Write the (averaged value -8) to F2x[1,0]9C_x0C[PhyFence]. */
+ avRecValue -= 8;
+ val = Get_NB32_index_wait(dev, index_reg, 0x0C);
+ val &= ~(0x1F << 16);
+ val |= (avRecValue & 0x1F) << 16;
+ Set_NB32_index_wait(dev, index_reg, 0x0C, val);
+
+ /* Rewrite F2x[1,0]9C_x04-DRAM Address/Command Timing Control Register
+ * delays (both channels). */
+ val = Get_NB32_index_wait(dev, index_reg, 0x04);
+ Set_NB32_index_wait(dev, index_reg, 0x04, val);
+}
+
+
+static void mct_Wait_10ns (u32 cycles)
+{
+ u32 saved, i;
+ u32 hi, lo, msr;
+
+ /* cycles = number of 10ns cycles(or longer) to delay */
+ /* FIXME: Need to calibrate to CPU/NCLK speed? */
+
+ msr = 0x10; /* TSC */
+ for (i = 0; i < cycles; i++) {
+ _RDMSR(msr, &lo, &hi);
+ saved = lo;
+
+ do {
+ _RDMSR(msr, &lo, &hi);
+ } while (lo - saved < 8); /* 8 x 1.25 ns as NCLK is at 1.25ns */
+ }
+}