summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/northbridge/amd/amdfam10/raminit_amdmct.c2
-rw-r--r--src/northbridge/amd/amdmct/mct/mct_d.c1
-rw-r--r--src/northbridge/amd/amdmct/mct_ddr3/mct_d.c191
-rw-r--r--src/northbridge/amd/amdmct/mct_ddr3/mct_d.h8
-rw-r--r--src/northbridge/amd/amdmct/mct_ddr3/mct_d_gcc.h87
-rw-r--r--src/northbridge/amd/amdmct/mct_ddr3/mctardk6.c6
-rw-r--r--src/northbridge/amd/amdmct/mct_ddr3/mctdqs_d.c806
-rw-r--r--src/northbridge/amd/amdmct/mct_ddr3/mcthwl.c6
-rw-r--r--src/northbridge/amd/amdmct/mct_ddr3/mctmtr_d.c14
-rw-r--r--src/northbridge/amd/amdmct/mct_ddr3/mctndi_d.c3
-rw-r--r--src/northbridge/amd/amdmct/mct_ddr3/mctproc.c17
-rw-r--r--src/northbridge/amd/amdmct/mct_ddr3/mctsdi.c5
-rw-r--r--src/northbridge/amd/amdmct/mct_ddr3/mctsrc.c800
-rw-r--r--src/northbridge/amd/amdmct/mct_ddr3/mctsrc1p.c18
-rw-r--r--src/northbridge/amd/amdmct/mct_ddr3/mctsrc2p.c13
-rw-r--r--src/northbridge/amd/amdmct/mct_ddr3/mcttmrl.c7
-rw-r--r--src/northbridge/amd/amdmct/mct_ddr3/mctwl.c42
-rw-r--r--src/northbridge/amd/amdmct/mct_ddr3/mhwlc_d.c267
-rw-r--r--src/northbridge/amd/amdmct/wrappers/mcti_d.c102
19 files changed, 1253 insertions, 1142 deletions
diff --git a/src/northbridge/amd/amdfam10/raminit_amdmct.c b/src/northbridge/amd/amdfam10/raminit_amdmct.c
index a0d47f4afd..25cf93daf7 100644
--- a/src/northbridge/amd/amdfam10/raminit_amdmct.c
+++ b/src/northbridge/amd/amdfam10/raminit_amdmct.c
@@ -26,7 +26,6 @@ static void print_tx(const char *strval, u32 val)
printk(BIOS_DEBUG, "%s%08x\n", strval, val);
#endif
}
-#endif
static void print_t(const char *strval)
{
@@ -34,6 +33,7 @@ static void print_t(const char *strval)
printk(BIOS_DEBUG, "%s", strval);
#endif
}
+#endif
static void print_tf(const char *func, const char *strval)
{
diff --git a/src/northbridge/amd/amdmct/mct/mct_d.c b/src/northbridge/amd/amdmct/mct/mct_d.c
index 3dec934595..88910e2d5f 100644
--- a/src/northbridge/amd/amdmct/mct/mct_d.c
+++ b/src/northbridge/amd/amdmct/mct/mct_d.c
@@ -542,7 +542,6 @@ static void HTMemMapInit_D(struct MCTStatStruc *pMCTstat,
pDCTstat = pDCTstatA + Node;
devx = pDCTstat->dev_map;
DramSelBaseAddr = 0;
- pDCTstat = pDCTstatA + Node;
if (!pDCTstat->GangedMode) {
DramSelBaseAddr = pDCTstat->NodeSysLimit - pDCTstat->DCTSysLimit;
/*In unganged mode, we must add DCT0 and DCT1 to DCTSysLimit */
diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mct_d.c b/src/northbridge/amd/amdmct/mct_ddr3/mct_d.c
index 71a6be881e..81a75768ab 100644
--- a/src/northbridge/amd/amdmct/mct_ddr3/mct_d.c
+++ b/src/northbridge/amd/amdmct/mct_ddr3/mct_d.c
@@ -209,12 +209,24 @@ static const u8 Table_DQSRcvEn_Offset[] = {0x00,0x01,0x10,0x11,0x2};
MEMCLK_MAPPING EQU 00010000b, 00000100b, 00001000b, 00100000b, 00000000b, 00000000b, 00000000b, 00000000b
*/
-/* Note: If you are not sure about the pin mappings at initial stage, we dont have to disable MemClk.
- * Set entries in the tables all 0xFF. */
+/* ==========================================================================================
+ * Set up clock pin to DIMM mappings,
+ * NOTE: If you are not sure about the pin mappings, you can keep all MemClk signals active,
+ * just set all entries in the relevant table(s) to 0xff.
+ * ==========================================================================================
+ */
static const u8 Tab_L1CLKDis[] = {0x20, 0x20, 0x10, 0x10, 0x08, 0x08, 0x04, 0x04};
static const u8 Tab_AM3CLKDis[] = {0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00};
static const u8 Tab_S1CLKDis[] = {0xA2, 0xA2, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
+
+/* C32: Enable CS0 - CS3 clocks (DIMM0 - DIMM1) */
+static const u8 Tab_C32CLKDis[] = {0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00};
+
+/* G34: Enable CS0 - CS3 clocks (DIMM0 - DIMM1) */
+static const u8 Tab_G34CLKDis[] = {0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00};
+
static const u8 Tab_ManualCLKDis[]= {0x10, 0x04, 0x08, 0x20, 0x00, 0x00, 0x00, 0x00};
+/* ========================================================================================== */
static const u8 Table_Comp_Rise_Slew_20x[] = {7, 3, 2, 2, 0xFF};
static const u8 Table_Comp_Rise_Slew_15x[] = {7, 7, 3, 2, 0xFF};
@@ -277,6 +289,11 @@ restartinit:
for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) {
struct DCTStatStruc *pDCTstat;
pDCTstat = pDCTstatA + Node;
+
+ /* Zero out data structures to avoid false detection of DIMMs */
+ memset(pDCTstat, 0, sizeof(struct DCTStatStruc));
+
+ /* Initialize data structures */
pDCTstat->Node_ID = Node;
pDCTstat->dev_host = PA_HOST(Node);
pDCTstat->dev_map = PA_MAP(Node);
@@ -284,17 +301,22 @@ restartinit:
pDCTstat->dev_nbmisc = PA_NBMISC(Node);
pDCTstat->NodeSysBase = node_sys_base;
+ printk(BIOS_DEBUG, "%s: mct_init Node %d\n", __func__, Node);
mct_init(pMCTstat, pDCTstat);
mctNodeIDDebugPort_D();
pDCTstat->NodePresent = NodePresent_D(Node);
if (pDCTstat->NodePresent) { /* See if Node is there*/
+ printk(BIOS_DEBUG, "%s: clear_legacy_Mode\n", __func__);
clear_legacy_Mode(pMCTstat, pDCTstat);
pDCTstat->LogicalCPUID = mctGetLogicalCPUID_D(Node);
+ printk(BIOS_DEBUG, "%s: mct_InitialMCT_D\n", __func__);
mct_InitialMCT_D(pMCTstat, pDCTstat);
+ printk(BIOS_DEBUG, "%s: mctSMBhub_Init\n", __func__);
mctSMBhub_Init(Node); /* Switch SMBUS crossbar to proper node*/
+ printk(BIOS_DEBUG, "%s: mct_initDCT\n", __func__);
mct_initDCT(pMCTstat, pDCTstat);
if (pDCTstat->ErrCode == SC_FatalErr) {
goto fatalexit; /* any fatal errors?*/
@@ -345,6 +367,7 @@ restartinit:
mct_FinalMCT_D(pMCTstat, pDCTstatA);
printk(BIOS_DEBUG, "mctAutoInitMCT_D Done: Global Status: %x\n", pMCTstat->GStatus);
+
return;
fatalexit:
@@ -560,7 +583,6 @@ static void HTMemMapInit_D(struct MCTStatStruc *pMCTstat,
pDCTstat = pDCTstatA + Node;
devx = pDCTstat->dev_map;
DramSelBaseAddr = 0;
- pDCTstat = pDCTstatA + Node; /* ??? */
if (!pDCTstat->GangedMode) {
DramSelBaseAddr = pDCTstat->NodeSysLimit - pDCTstat->DCTSysLimit;
/*In unganged mode, we must add DCT0 and DCT1 to DCTSysLimit */
@@ -645,6 +667,7 @@ static void HTMemMapInit_D(struct MCTStatStruc *pMCTstat,
devx = pDCTstat->dev_map;
if (pDCTstat->NodePresent) {
+ printk(BIOS_DEBUG, " Copy dram map from Node 0 to Node %02x \n", Node);
reg = 0x40; /*Dram Base 0*/
do {
val = Get_NB32(dev, reg);
@@ -1162,7 +1185,7 @@ static void SPD2ndTiming(struct MCTStatStruc *pMCTstat,
/* Program DRAM Timing values */
DramTimingLo = 0; /* Dram Timing Low init */
- val = pDCTstat->CASL - 2; /* pDCTstat.CASL to reg. definition */
+ val = pDCTstat->CASL - 4; /* pDCTstat.CASL to reg. definition */
DramTimingLo |= val;
val = pDCTstat->Trcd - Bias_TrcdT;
@@ -1406,18 +1429,16 @@ static void SPDGetTCL_D(struct MCTStatStruc *pMCTstat,
else if (tCKproposed16x <= 24) {
pDCTstat->TargetFreq = 6;
tCKproposed16x = 24;
- }
- else if (tCKproposed16x <= 30) {
+ } else if (tCKproposed16x <= 30) {
pDCTstat->TargetFreq = 5;
tCKproposed16x = 30;
- }
- else {
+ } else {
pDCTstat->TargetFreq = 4;
tCKproposed16x = 40;
}
/* Running through this loop twice:
- First time find tCL at target frequency
- - Second tim find tCL at 400MHz */
+ - Second time find tCL at 400MHz */
for (;;) {
CLT_Fail = 0;
@@ -1451,7 +1472,7 @@ static void SPDGetTCL_D(struct MCTStatStruc *pMCTstat,
CLT_Fail = 1;
/* get CL and T */
if (!CLT_Fail) {
- bytex = CLactual - 2;
+ bytex = CLactual;
if (tCKproposed16x == 20)
byte = 7;
else if (tCKproposed16x == 24)
@@ -1632,7 +1653,7 @@ static u8 AutoConfig_D(struct MCTStatStruc *pMCTstat,
val = 0x0f; /* recommended setting (default) */
DramConfigHi |= val << 24;
- if (pDCTstat->LogicalCPUID & (AMD_DR_Cx | AMD_DR_Bx))
+ if (pDCTstat->LogicalCPUID & (AMD_DR_Dx | AMD_DR_Cx | AMD_DR_Bx))
DramConfigHi |= 1 << DcqArbBypassEn;
/* Build MemClkDis Value from Dram Timing Lo and
@@ -1657,6 +1678,10 @@ static u8 AutoConfig_D(struct MCTStatStruc *pMCTstat,
p = Tab_L1CLKDis;
else if (byte == PT_M2 || byte == PT_AS)
p = Tab_AM3CLKDis;
+ else if (byte == PT_C3)
+ p = Tab_C32CLKDis;
+ else if (byte == PT_GR)
+ p = Tab_G34CLKDis;
else
p = Tab_S1CLKDis;
@@ -2102,8 +2127,7 @@ static u8 DIMMPresence_D(struct MCTStatStruc *pMCTstat,
if (byte == JED_RDIMM || byte == JED_MiniRDIMM) {
RegDIMMPresent |= 1 << i;
pDCTstat->DimmRegistered[i] = 1;
- }
- else {
+ } else {
pDCTstat->DimmRegistered[i] = 0;
}
/* Check ECC capable */
@@ -2977,9 +3001,9 @@ static void mct_FinalMCT_D(struct MCTStatStruc *pMCTstat,
} else { /* For Dx CPU */
val = 0x0CE00F00 | 1 << 29/* FlushWrOnStpGnt */;
if (!(pDCTstat->GangedMode))
- val |= 0x20; /* MctWrLimit = 8 for Unganed mode */
+ val |= 0x20; /* MctWrLimit = 8 for Unganged mode */
else
- val |= 0x40; /* MctWrLimit = 16 for ganed mode */
+ val |= 0x40; /* MctWrLimit = 16 for ganged mode */
Set_NB32(pDCTstat->dev_dct, 0x11C, val);
val = Get_NB32(pDCTstat->dev_dct, 0x1B0);
@@ -3414,6 +3438,138 @@ static void mct_BeforeDramInit_Prod_D(struct MCTStatStruc *pMCTstat,
Set_NB32(dev, 0x98 + reg_off, 0x0D000030);
Set_NB32(dev, 0x9C + reg_off, dword);
Set_NB32(dev, 0x98 + reg_off, 0x4D040F30);
+
+ /* FIXME
+ * Mainboards need to be able to specify the maximum number of DIMMs installable per channel
+ * For now assume a maximum of 2 DIMMs per channel can be installed
+ */
+ uint8_t MaxDimmsInstallable = 2;
+
+ /* Obtain number of DIMMs on channel */
+ uint8_t dimm_count = pDCTstat->MAdimms[i];
+ uint8_t rank_count_dimm0;
+ uint8_t rank_count_dimm1;
+ uint32_t odt_pattern_0;
+ uint32_t odt_pattern_1;
+ uint32_t odt_pattern_2;
+ uint32_t odt_pattern_3;
+
+ /* Select appropriate ODT pattern for installed DIMMs
+ * Refer to the BKDG Rev. 3.62, page 120 onwards
+ */
+ if (pDCTstat->C_DCTPtr[i]->Status[DCT_STATUS_REGISTERED]) {
+ if (MaxDimmsInstallable == 2) {
+ if (dimm_count == 1) {
+ /* 1 DIMM detected */
+ rank_count_dimm1 = pDCTstat->C_DCTPtr[i]->DimmRanks[1];
+ if (rank_count_dimm1 == 1) {
+ odt_pattern_0 = 0x00000000;
+ odt_pattern_1 = 0x00000000;
+ odt_pattern_2 = 0x00000000;
+ odt_pattern_3 = 0x00020000;
+ } else if (rank_count_dimm1 == 2) {
+ odt_pattern_0 = 0x00000000;
+ odt_pattern_1 = 0x00000000;
+ odt_pattern_2 = 0x00000000;
+ odt_pattern_3 = 0x02080000;
+ } else if (rank_count_dimm1 == 4) {
+ odt_pattern_0 = 0x00000000;
+ odt_pattern_1 = 0x00000000;
+ odt_pattern_2 = 0x020a0000;
+ odt_pattern_3 = 0x080a0000;
+ } else {
+ /* Fallback */
+ odt_pattern_0 = 0x00000000;
+ odt_pattern_1 = 0x00000000;
+ odt_pattern_2 = 0x00000000;
+ odt_pattern_3 = 0x00000000;
+ }
+ } else {
+ /* 2 DIMMs detected */
+ rank_count_dimm0 = pDCTstat->C_DCTPtr[i]->DimmRanks[0];
+ rank_count_dimm1 = pDCTstat->C_DCTPtr[i]->DimmRanks[1];
+ if ((rank_count_dimm0 < 4) && (rank_count_dimm1 < 4)) {
+ odt_pattern_0 = 0x00000000;
+ odt_pattern_1 = 0x01010202;
+ odt_pattern_2 = 0x00000000;
+ odt_pattern_3 = 0x09030603;
+ } else if ((rank_count_dimm0 < 4) && (rank_count_dimm1 == 4)) {
+ odt_pattern_0 = 0x01010000;
+ odt_pattern_1 = 0x01010a0a;
+ odt_pattern_2 = 0x01090000;
+ odt_pattern_3 = 0x01030e0b;
+ } else if ((rank_count_dimm0 == 4) && (rank_count_dimm1 < 4)) {
+ odt_pattern_0 = 0x00000202;
+ odt_pattern_1 = 0x05050202;
+ odt_pattern_2 = 0x00000206;
+ odt_pattern_3 = 0x0d070203;
+ } else if ((rank_count_dimm0 == 4) && (rank_count_dimm1 == 4)) {
+ odt_pattern_0 = 0x05050a0a;
+ odt_pattern_1 = 0x05050a0a;
+ odt_pattern_2 = 0x050d0a0e;
+ odt_pattern_3 = 0x05070a0b;
+ } else {
+ /* Fallback */
+ odt_pattern_0 = 0x00000000;
+ odt_pattern_1 = 0x00000000;
+ odt_pattern_2 = 0x00000000;
+ odt_pattern_3 = 0x00000000;
+ }
+ }
+ } else {
+ /* FIXME
+ * 3 DIMMs per channel UNIMPLEMENTED
+ */
+ odt_pattern_0 = 0x00000000;
+ odt_pattern_1 = 0x00000000;
+ odt_pattern_2 = 0x00000000;
+ odt_pattern_3 = 0x00000000;
+ }
+ } else {
+ if (MaxDimmsInstallable == 2) {
+ if (dimm_count == 1) {
+ /* 1 DIMM detected */
+ rank_count_dimm1 = pDCTstat->C_DCTPtr[i]->DimmRanks[1];
+ if (rank_count_dimm1 == 1) {
+ odt_pattern_0 = 0x00000000;
+ odt_pattern_1 = 0x00000000;
+ odt_pattern_2 = 0x00000000;
+ odt_pattern_3 = 0x00020000;
+ } else if (rank_count_dimm1 == 2) {
+ odt_pattern_0 = 0x00000000;
+ odt_pattern_1 = 0x00000000;
+ odt_pattern_2 = 0x00000000;
+ odt_pattern_3 = 0x02080000;
+ } else {
+ /* Fallback */
+ odt_pattern_0 = 0x00000000;
+ odt_pattern_1 = 0x00000000;
+ odt_pattern_2 = 0x00000000;
+ odt_pattern_3 = 0x00000000;
+ }
+ } else {
+ /* 2 DIMMs detected */
+ odt_pattern_0 = 0x00000000;
+ odt_pattern_1 = 0x01010202;
+ odt_pattern_2 = 0x00000000;
+ odt_pattern_3 = 0x09030603;
+ }
+ } else {
+ /* FIXME
+ * 3 DIMMs per channel UNIMPLEMENTED
+ */
+ odt_pattern_0 = 0x00000000;
+ odt_pattern_1 = 0x00000000;
+ odt_pattern_2 = 0x00000000;
+ odt_pattern_3 = 0x00000000;
+ }
+ }
+
+ /* Program ODT pattern */
+ Set_NB32_index_wait(dev, 0xf0 + reg_off, 0x180, odt_pattern_1);
+ Set_NB32_index_wait(dev, 0xf0 + reg_off, 0x181, odt_pattern_0);
+ Set_NB32_index_wait(dev, 0xf0 + reg_off, 0x182, odt_pattern_3);
+ Set_NB32_index_wait(dev, 0xf0 + reg_off, 0x183, odt_pattern_2);
}
}
}
@@ -3657,6 +3813,7 @@ static void mct_BeforeDQSTrain_D(struct MCTStatStruc *pMCTstat,
}
}
+/* Erratum 350 */
static void mct_ResetDLL_D(struct MCTStatStruc *pMCTstat,
struct DCTStatStruc *pDCTstat, u8 dct)
{
@@ -3692,11 +3849,11 @@ static void mct_ResetDLL_D(struct MCTStatStruc *pMCTstat,
mct_Read1LTestPattern_D(pMCTstat, pDCTstat, addr); /* cache fills */
/* Write 0000_8000h to register F2x[1,0]9C_xD080F0C */
- Set_NB32_index_wait(dev, 0x98 + reg_off, 0x4D080F0C, 0x00008000);
+ Set_NB32_index_wait(dev, 0x98 + reg_off, 0xD080F0C, 0x00008000);
mct_Wait(80); /* wait >= 300ns */
/* Write 0000_0000h to register F2x[1,0]9C_xD080F0C */
- Set_NB32_index_wait(dev, 0x98 + reg_off, 0x4D080F0C, 0x00000000);
+ Set_NB32_index_wait(dev, 0x98 + reg_off, 0xD080F0C, 0x00000000);
mct_Wait(800); /* wait >= 2us */
break;
}
diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mct_d.h b/src/northbridge/amd/amdmct/mct_ddr3/mct_d.h
index d6e5fb4ca9..987c0c8a5c 100644
--- a/src/northbridge/amd/amdmct/mct_ddr3/mct_d.h
+++ b/src/northbridge/amd/amdmct/mct_ddr3/mct_d.h
@@ -499,7 +499,7 @@ struct DCTStatStruc { /* A per Node structure*/
/* CHB DIMM0 Byte 0 - 7 TxDqs */
/* CHB DIMM1 Byte 0 - 7 TxDqs */
/* CHB DIMM1 Byte 0 - 7 TxDqs */
- u8 CH_D_B_RCVRDLY[2][4][8]; /* [A/B] [DIMM0-3] [DQS] */
+ u16 CH_D_B_RCVRDLY[2][4][8]; /* [A/B] [DIMM0-3] [DQS] */
/* CHA DIMM 0 Receiver Enable Delay*/
/* CHA DIMM 1 Receiver Enable Delay*/
/* CHA DIMM 2 Receiver Enable Delay*/
@@ -509,7 +509,7 @@ struct DCTStatStruc { /* A per Node structure*/
/* CHB DIMM 1 Receiver Enable Delay*/
/* CHB DIMM 2 Receiver Enable Delay*/
/* CHB DIMM 3 Receiver Enable Delay*/
- u8 CH_D_BC_RCVRDLY[2][4];
+ u16 CH_D_BC_RCVRDLY[2][4];
/* CHA DIMM 0 - 4 Check Byte Receiver Enable Delay*/
/* CHB DIMM 0 - 4 Check Byte Receiver Enable Delay*/
u8 DIMMValidDCT[2]; /* DIMM# in DCT0*/
@@ -769,7 +769,7 @@ u8 mct_checkNumberOfDqsRcvEn_1Pass(u8 pass);
u32 SetupDqsPattern_1PassA(u8 Pass);
u32 SetupDqsPattern_1PassB(u8 Pass);
u8 mct_Get_Start_RcvrEnDly_1Pass(u8 Pass);
-u8 mct_Average_RcvrEnDly_Pass(struct DCTStatStruc *pDCTstat, u8 RcvrEnDly, u8 RcvrEnDlyLimit, u8 Channel, u8 Receiver, u8 Pass);
+u16 mct_Average_RcvrEnDly_Pass(struct DCTStatStruc *pDCTstat, u16 RcvrEnDly, u16 RcvrEnDlyLimit, u8 Channel, u8 Receiver, u8 Pass);
void CPUMemTyping_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA);
void UMAMemTyping_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA);
uint64_t mctGetLogicalCPUID(u32 Node);
@@ -779,7 +779,7 @@ void mct_TrainDQSPos_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTs
void mctSetEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA);
void TrainMaxReadLatency_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA);
void mct_EndDQSTraining_D(struct MCTStatStruc *pMCTstat,struct DCTStatStruc *pDCTstatA);
-void mct_SetRcvrEnDly_D(struct DCTStatStruc *pDCTstat, u8 RcvrEnDly, u8 FinalValue, u8 Channel, u8 Receiver, u32 dev, u32 index_reg, u8 Addl_Index, u8 Pass);
+void mct_SetRcvrEnDly_D(struct DCTStatStruc *pDCTstat, u16 RcvrEnDly, u8 FinalValue, u8 Channel, u8 Receiver, u32 dev, u32 index_reg, u8 Addl_Index, u8 Pass);
void SetEccDQSRcvrEn_D(struct DCTStatStruc *pDCTstat, u8 Channel);
void mctGet_PS_Cfg_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u32 dct);
void InterleaveBanks_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 dct);
diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mct_d_gcc.h b/src/northbridge/amd/amdmct/mct_ddr3/mct_d_gcc.h
index 60f98bc89a..9990304645 100644
--- a/src/northbridge/amd/amdmct/mct_ddr3/mct_d_gcc.h
+++ b/src/northbridge/amd/amdmct/mct_ddr3/mct_d_gcc.h
@@ -2,6 +2,7 @@
* This file is part of the coreboot project.
*
* Copyright (C) 2010 Advanced Micro Devices, Inc.
+ * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -103,10 +104,10 @@ static void proc_CLFLUSH(u32 addr_hi)
__asm__ volatile (
/* clflush fs:[eax] */
- "outb %%al, $0xed\n\t" /* _EXECFENCE */
- "clflush %%fs:(%0)\n\t"
+ "outb %%al, $0xed\n\t" /* _EXECFENCE */
+ "clflush %%fs:(%0)\n\t"
"mfence\n\t"
- ::"a" (addr_hi<<8)
+ ::"a" (addr_hi<<8)
);
}
@@ -141,6 +142,24 @@ static u32 read32_fs(u32 addr_lo)
return value;
}
+static uint64_t read64_fs(uint32_t addr_lo)
+{
+ uint64_t value = 0;
+ uint32_t value_lo;
+ uint32_t value_hi;
+
+ __asm__ volatile (
+ "outb %%al, $0xed\n\t" /* _EXECFENCE */
+ "mfence\n\t"
+ "movl %%fs:(%2), %0\n\t"
+ "movl %%fs:(%3), %1\n\t"
+ :"=c"(value_lo), "=d"(value_hi): "a" (addr_lo), "b" (addr_lo + 4) : "memory"
+ );
+ value |= value_lo;
+ value |= ((uint64_t)value_hi) << 32;
+ return value;
+}
+
#ifdef UNUSED_CODE
static u8 read8_fs(u32 addr_lo)
{
@@ -210,68 +229,6 @@ static __attribute__((noinline)) void FlushDQSTestPattern_L18(u32 addr_lo)
);
}
-static void ReadL18TestPattern(u32 addr_lo)
-{
- /* set fs and use fs prefix to access the mem */
- __asm__ volatile (
- "outb %%al, $0xed\n\t" /* _EXECFENCE */
- "movl %%fs:-128(%%esi), %%eax\n\t" /* TestAddr cache line */
- "movl %%fs:-64(%%esi), %%eax\n\t" /* +1 */
- "movl %%fs:(%%esi), %%eax\n\t" /* +2 */
- "movl %%fs:64(%%esi), %%eax\n\t" /* +3 */
-
- "movl %%fs:-128(%%edi), %%eax\n\t" /* +4 */
- "movl %%fs:-64(%%edi), %%eax\n\t" /* +5 */
- "movl %%fs:(%%edi), %%eax\n\t" /* +6 */
- "movl %%fs:64(%%edi), %%eax\n\t" /* +7 */
-
- "movl %%fs:-128(%%ebx), %%eax\n\t" /* +8 */
- "movl %%fs:-64(%%ebx), %%eax\n\t" /* +9 */
- "movl %%fs:(%%ebx), %%eax\n\t" /* +10 */
- "movl %%fs:64(%%ebx), %%eax\n\t" /* +11 */
-
- "movl %%fs:-128(%%ecx), %%eax\n\t" /* +12 */
- "movl %%fs:-64(%%ecx), %%eax\n\t" /* +13 */
- "movl %%fs:(%%ecx), %%eax\n\t" /* +14 */
- "movl %%fs:64(%%ecx), %%eax\n\t" /* +15 */
-
- "movl %%fs:-128(%%edx), %%eax\n\t" /* +16 */
- "movl %%fs:-64(%%edx), %%eax\n\t" /* +17 */
- "mfence\n\t"
-
- :: "a"(0), "b" (addr_lo+128+8*64), "c" (addr_lo+128+12*64),
- "d" (addr_lo +128+16*64), "S"(addr_lo+128),
- "D"(addr_lo+128+4*64)
- );
-
-}
-
-static void ReadL9TestPattern(u32 addr_lo)
-{
-
- /* set fs and use fs prefix to access the mem */
- __asm__ volatile (
- "outb %%al, $0xed\n\t" /* _EXECFENCE */
-
- "movl %%fs:-128(%%ecx), %%eax\n\t" /* TestAddr cache line */
- "movl %%fs:-64(%%ecx), %%eax\n\t" /* +1 */
- "movl %%fs:(%%ecx), %%eax\n\t" /* +2 */
- "movl %%fs:64(%%ecx), %%eax\n\t" /* +3 */
-
- "movl %%fs:-128(%%edx), %%eax\n\t" /* +4 */
- "movl %%fs:-64(%%edx), %%eax\n\t" /* +5 */
- "movl %%fs:(%%edx), %%eax\n\t" /* +6 */
- "movl %%fs:64(%%edx), %%eax\n\t" /* +7 */
-
- "movl %%fs:-128(%%ebx), %%eax\n\t" /* +8 */
- "mfence\n\t"
-
- :: "a"(0), "b" (addr_lo+128+8*64), "c"(addr_lo+128),
- "d"(addr_lo+128+4*64)
- );
-
-}
-
static void ReadMaxRdLat1CLTestPattern_D(u32 addr)
{
SetUpperFSbase(addr);
diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctardk6.c b/src/northbridge/amd/amdmct/mct_ddr3/mctardk6.c
index ae1654cc21..99a26288f9 100644
--- a/src/northbridge/amd/amdmct/mct_ddr3/mctardk6.c
+++ b/src/northbridge/amd/amdmct/mct_ddr3/mctardk6.c
@@ -2,6 +2,7 @@
* This file is part of the coreboot project.
*
* Copyright (C) 2010 Advanced Micro Devices, Inc.
+ * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -17,7 +18,7 @@
* Foundation, Inc.
*/
-/* The socket type F (1207), Fr2, G (1207) are not tested.
+/* The socket type Fr2, G (1207) are not tested.
*/
static void Get_ChannelPS_Cfg0_D(u8 MAAdimms, u8 Speed, u8 MAAload,
@@ -79,8 +80,7 @@ static void Get_ChannelPS_Cfg0_D( u8 MAAdimms, u8 Speed, u8 MAAload,
else
*AddrTmgCTL = 0x00353935;
}
- }
- else {
+ } else {
if(Speed == 4) {
*AddrTmgCTL = 0x00000000;
if (MAAdimms == 3)
diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctdqs_d.c b/src/northbridge/amd/amdmct/mct_ddr3/mctdqs_d.c
index 404727b493..cc2f43a952 100644
--- a/src/northbridge/amd/amdmct/mct_ddr3/mctdqs_d.c
+++ b/src/northbridge/amd/amdmct/mct_ddr3/mctdqs_d.c
@@ -2,6 +2,7 @@
* This file is part of the coreboot project.
*
* Copyright (C) 2010 Advanced Micro Devices, Inc.
+ * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -22,13 +23,6 @@ static void CalcEccDQSPos_D(struct MCTStatStruc *pMCTstat,
u8 scale, u8 ChipSel);
static void GetDQSDatStrucVal_D(struct MCTStatStruc *pMCTstat,
struct DCTStatStruc *pDCTstat, u8 ChipSel);
-static u8 MiddleDQS_D(u8 min, u8 max);
-static void TrainReadDQS_D(struct MCTStatStruc *pMCTstat,
- struct DCTStatStruc *pDCTstat,
- u8 cs_start);
-static void TrainWriteDQS_D(struct MCTStatStruc *pMCTstat,
- struct DCTStatStruc *pDCTstat,
- u8 cs_start);
static void WriteDQSTestPattern_D(struct MCTStatStruc *pMCTstat,
struct DCTStatStruc *pDCTstat,
u32 TestAddr_lo);
@@ -43,31 +37,19 @@ static void FlushDQSTestPattern_D(struct DCTStatStruc *pDCTstat,
u32 addr_lo);
static void SetTargetWTIO_D(u32 TestAddr);
static void ResetTargetWTIO_D(void);
-static void ReadDQSTestPattern_D(struct MCTStatStruc *pMCTstat,
- struct DCTStatStruc *pDCTstat,
- u32 TestAddr_lo);
-static void mctEngDQSwindow_Save_D(struct MCTStatStruc *pMCTstat,
- struct DCTStatStruc *pDCTstat, u8 ChipSel,
- u8 RnkDlyFilterMin, u8 RnkDlyFilterMax);
void ResetDCTWrPtr_D(u32 dev, u32 index_reg, u32 index);
u8 mct_DisableDimmEccEn_D(struct MCTStatStruc *pMCTstat,
struct DCTStatStruc *pDCTstat);
static void mct_SetDQSDelayCSR_D(struct MCTStatStruc *pMCTstat,
struct DCTStatStruc *pDCTstat,
u8 ChipSel);
-static void mct_SetDQSDelayAllCSR_D(struct MCTStatStruc *pMCTstat,
- struct DCTStatStruc *pDCTstat,
- u8 cs_start);
u32 mct_GetMCTSysAddr_D(struct MCTStatStruc *pMCTstat,
struct DCTStatStruc *pDCTstat, u8 Channel,
u8 receiver, u8 *valid);
static void SetupDqsPattern_D(struct MCTStatStruc *pMCTstat,
struct DCTStatStruc *pDCTstat,
u32 *buffer);
-
-static void StoreWrRdDQSDatStrucVal_D(struct MCTStatStruc *pMCTstat,
- struct DCTStatStruc *pDCTstat, u8 ChipSel,
- u8 RnkDlyFilterMin, u8 RnkDlyFilterMax);
+static void proc_IOCLFLUSH_D(u32 addr_hi);
static void StoreDQSDatStrucVal_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 ChipSel);
@@ -286,20 +268,99 @@ static void CalcEccDQSPos_D(struct MCTStatStruc *pMCTstat,
pDCTstat->DQSDelay = (u8)DQSDelay;
}
+static void write_dqs_write_data_timing_registers(uint16_t* delay, uint32_t dev, uint8_t dimm, uint32_t index_reg)
+{
+ uint32_t dword;
+
+ /* Lanes 0 - 3 */
+ dword = Get_NB32_index_wait(dev, index_reg, 0x1 | (dimm << 8));
+ dword &= ~0x7f7f7f7f;
+ dword |= (delay[3] & 0x7f) << 24;
+ dword |= (delay[2] & 0x7f) << 16;
+ dword |= (delay[1] & 0x7f) << 8;
+ dword |= delay[0] & 0x7f;
+ Set_NB32_index_wait(dev, index_reg, 0x1 | (dimm << 8), dword);
+
+ /* Lanes 4 - 7 */
+ dword = Get_NB32_index_wait(dev, index_reg, 0x2 | (dimm << 8));
+ dword &= ~0x7f7f7f7f;
+ dword |= (delay[7] & 0x7f) << 24;
+ dword |= (delay[6] & 0x7f) << 16;
+ dword |= (delay[5] & 0x7f) << 8;
+ dword |= delay[4] & 0x7f;
+ Set_NB32_index_wait(dev, index_reg, 0x2 | (dimm << 8), dword);
+
+ /* Lane 8 (ECC) */
+ dword = Get_NB32_index_wait(dev, index_reg, 0x3 | (dimm << 8));
+ dword &= ~0x0000007f;
+ dword |= delay[8] & 0x7f;
+ Set_NB32_index_wait(dev, index_reg, 0x3 | (dimm << 8), dword);
+}
+
+static void write_dqs_read_data_timing_registers(uint16_t* delay, uint32_t dev, uint8_t dimm, uint32_t index_reg)
+{
+ uint32_t dword;
+
+ /* Lanes 0 - 3 */
+ dword = Get_NB32_index_wait(dev, index_reg, 0x5 | (dimm << 8));
+ dword &= ~0x3f3f3f3f;
+ dword |= (delay[3] & 0x3f) << 24;
+ dword |= (delay[2] & 0x3f) << 16;
+ dword |= (delay[1] & 0x3f) << 8;
+ dword |= delay[0] & 0x3f;
+ Set_NB32_index_wait(dev, index_reg, 0x5 | (dimm << 8), dword);
+
+ /* Lanes 4 - 7 */
+ dword = Get_NB32_index_wait(dev, index_reg, 0x6 | (dimm << 8));
+ dword &= ~0x3f3f3f3f;
+ dword |= (delay[7] & 0x3f) << 24;
+ dword |= (delay[6] & 0x3f) << 16;
+ dword |= (delay[5] & 0x3f) << 8;
+ dword |= delay[4] & 0x3f;
+ Set_NB32_index_wait(dev, index_reg, 0x6 | (dimm << 8), dword);
+
+ /* Lane 8 (ECC) */
+ dword = Get_NB32_index_wait(dev, index_reg, 0x7 | (dimm << 8));
+ dword &= ~0x0000003f;
+ dword |= delay[8] & 0x3f;
+ Set_NB32_index_wait(dev, index_reg, 0x7 | (dimm << 8), dword);
+}
+
+/* DQS Position Training
+ * Algorithm detailed in the Fam10h BKDG Rev. 3.62 section 2.8.9.9.3
+ */
static void TrainDQSRdWrPos_D(struct MCTStatStruc *pMCTstat,
- struct DCTStatStruc *pDCTstat,
- u8 cs_start)
+ struct DCTStatStruc *pDCTstat)
{
u32 Errors;
- u8 Channel, DQSWrDelay;
+ u8 Channel;
+ u8 Receiver;
u8 _DisableDramECC = 0;
- u32 PatternBuffer[292];
+ u32 PatternBuffer[304]; /* 288 + 16 */
u8 _Wrap32Dis = 0, _SSE2 = 0;
- u8 dqsWrDelay_end;
+ u32 dev;
u32 addr;
+ u8 valid;
u32 cr4;
u32 lo, hi;
+ u32 index_reg;
+ uint32_t TestAddr;
+
+ uint8_t dual_rank;
+ uint8_t iter;
+ uint8_t lane;
+ uint16_t bytelane_test_results;
+ uint16_t current_write_dqs_delay[MAX_BYTE_LANES];
+ uint16_t current_read_dqs_delay[MAX_BYTE_LANES];
+ uint16_t write_dqs_delay_stepping_done[MAX_BYTE_LANES];
+ uint8_t dqs_read_results_array[2][MAX_BYTE_LANES][64]; /* [rank][lane][step] */
+ uint8_t dqs_write_results_array[2][MAX_BYTE_LANES][128]; /* [rank][lane][step] */
+
+ uint8_t last_pos = 0;
+ uint8_t cur_count = 0;
+ uint8_t best_pos = 0;
+ uint8_t best_count = 0;
print_debug_dqs("\nTrainDQSRdWrPos: Node_ID ", pDCTstat->Node_ID, 0);
cr4 = read_cr4();
@@ -323,50 +384,363 @@ static void TrainDQSRdWrPos_D(struct MCTStatStruc *pMCTstat,
SetupDqsPattern_D(pMCTstat, pDCTstat, PatternBuffer);
/* mct_BeforeTrainDQSRdWrPos_D */
- dqsWrDelay_end = 0x20;
+
+ dev = pDCTstat->dev_dct;
+ pDCTstat->Direction = DQS_READDIR;
+
+ /* 2.8.9.9.3 (2)
+ * Loop over each channel, lane, and rank
+ */
+
+ /* NOTE
+ * The BKDG originally stated to iterate over lane, then rank, however this process is quite slow
+ * compared to an equivalent loop over rank, then lane as the latter allows multiple lanes to be
+ * tested simultaneously, thus improving performance by around 8x.
+ */
Errors = 0;
for (Channel = 0; Channel < 2; Channel++) {
- print_debug_dqs("\tTrainDQSRdWrPos: 1 Channel ",Channel, 1);
+ print_debug_dqs("\tTrainDQSRdWrPos: 1 Channel ", Channel, 1);
pDCTstat->Channel = Channel;
if (pDCTstat->DIMMValidDCT[Channel] == 0) /* mct_BeforeTrainDQSRdWrPos_D */
continue;
- pDCTstat->DqsRdWrPos_Saved = 0;
- for ( DQSWrDelay = 0; DQSWrDelay < dqsWrDelay_end; DQSWrDelay++) {
- pDCTstat->DQSDelay = DQSWrDelay;
- pDCTstat->Direction = DQS_WRITEDIR;
- mct_SetDQSDelayAllCSR_D(pMCTstat, pDCTstat, cs_start);
-
- print_debug_dqs("\t\tTrainDQSRdWrPos: 21 DQSWrDelay ", DQSWrDelay, 2);
- TrainReadDQS_D(pMCTstat, pDCTstat, cs_start);
- print_debug_dqs("\t\tTrainDQSRdWrPos: 21 DqsRdWrPos_Saved ", pDCTstat->DqsRdWrPos_Saved, 2);
- if (pDCTstat->DqsRdWrPos_Saved == 0xFF)
- break;
-
- print_debug_dqs("\t\tTrainDQSRdWrPos: 22 TrainErrors ",pDCTstat->TrainErrors, 2);
- if (pDCTstat->TrainErrors == 0) {
+
+ index_reg = 0x98 + 0x100 * Channel;
+
+ dual_rank = 0;
+ Receiver = mct_InitReceiver_D(pDCTstat, Channel);
+ /* There are four receiver pairs, loosely associated with chipselects.
+ * This is essentially looping over each rank of each DIMM.
+ */
+ for (; Receiver < 8; Receiver++) {
+ if ((Receiver & 0x1) == 0) {
+ /* Even rank of DIMM */
+ if(mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, Receiver+1))
+ dual_rank = 1;
+ else
+ dual_rank = 0;
+ }
+
+ if (!mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, Receiver)) {
+ continue;
+ }
+
+ /* Select the base test address for the current rank */
+ TestAddr = mct_GetMCTSysAddr_D(pMCTstat, pDCTstat, Channel, Receiver, &valid);
+ if (!valid) { /* Address not supported on current CS */
+ continue;
+ }
+
+ print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 14 TestAddr ", TestAddr, 4);
+ SetUpperFSbase(TestAddr); /* fs:eax=far ptr to target */
+
+ print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 12 Receiver ", Receiver, 2);
+
+ /* 2.8.9.9.3 (DRAM Write Data Timing Loop)
+ * Iterate over all possible DQS delay values (0x0 - 0x7f)
+ */
+ uint8_t test_write_dqs_delay = 0;
+ uint8_t test_read_dqs_delay = 0;
+ uint8_t passing_dqs_delay_found[MAX_BYTE_LANES];
+
+ /* Initialize variables */
+ for (lane = 0; lane < MAX_BYTE_LANES; lane++) {
+ current_write_dqs_delay[lane] = 0;
+ passing_dqs_delay_found[lane] = 0;
+ write_dqs_delay_stepping_done[lane] = 0;
+ }
+
+ for (test_write_dqs_delay = 0; test_write_dqs_delay < 128; test_write_dqs_delay++) {
+ print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 16 test_write_dqs_delay ", test_write_dqs_delay, 6);
+
+ /* Break out of loop if passing window already found, */
+ if (write_dqs_delay_stepping_done[0] && write_dqs_delay_stepping_done[1]
+ && write_dqs_delay_stepping_done[2] && write_dqs_delay_stepping_done[3]
+ && write_dqs_delay_stepping_done[4] && write_dqs_delay_stepping_done[5]
+ && write_dqs_delay_stepping_done[6] && write_dqs_delay_stepping_done[7])
break;
+
+ /* Commit the current Write Data Timing settings to the hardware registers */
+ write_dqs_write_data_timing_registers(current_write_dqs_delay, dev, (Receiver >> 1), index_reg);
+
+ /* Write the DRAM training pattern to the base test address */
+ WriteDQSTestPattern_D(pMCTstat, pDCTstat, TestAddr << 8);
+
+ /* 2.8.9.9.3 (DRAM Read DQS Timing Control Loop)
+ * Iterate over all possible DQS delay values (0x0 - 0x3f)
+ */
+ for (test_read_dqs_delay = 0; test_read_dqs_delay < 64; test_read_dqs_delay++) {
+ print_debug_dqs("\t\t\t\t\tTrainDQSRdWrPos: 161 test_read_dqs_delay ", test_read_dqs_delay, 6);
+
+ /* Initialize Read DQS Timing Control settings for this iteration */
+ for (lane = 0; lane < MAX_BYTE_LANES; lane++)
+ if (!write_dqs_delay_stepping_done[lane])
+ current_read_dqs_delay[lane] = test_read_dqs_delay;
+
+ /* Commit the current Read DQS Timing Control settings to the hardware registers */
+ write_dqs_read_data_timing_registers(current_read_dqs_delay, dev, (Receiver >> 1), index_reg);
+
+ /* Initialize test result variable */
+ bytelane_test_results = 0xff;
+
+ /* Read the DRAM training pattern from the base test address three times
+ * NOTE
+ * While the BKDG states to read three times this is probably excessive!
+ * Decrease training time by only reading the test pattern once per iteration
+ */
+ for (iter = 0; iter < 1; iter++) {
+ /* Flush caches */
+ SetTargetWTIO_D(TestAddr);
+ FlushDQSTestPattern_D(pDCTstat, TestAddr << 8);
+ ResetTargetWTIO_D();
+
+ /* Read and compare pattern */
+ bytelane_test_results &= (CompareDQSTestPattern_D(pMCTstat, pDCTstat, TestAddr << 8) & 0xff); /* [Lane 7 :: Lane 0] 0=fail, 1=pass */
+
+ /* If all lanes have already failed testing bypass remaining re-read attempt(s) */
+ if (bytelane_test_results == 0x0)
+ break;
+ }
+
+ /* Store any lanes that passed testing for later use */
+ for (lane = 0; lane < 8; lane++)
+ if (!write_dqs_delay_stepping_done[lane])
+ dqs_read_results_array[Receiver & 0x1][lane][test_read_dqs_delay] = (!!(bytelane_test_results & (1 << lane)));
+
+ print_debug_dqs("\t\t\t\t\tTrainDQSRdWrPos: 162 bytelane_test_results ", bytelane_test_results, 6);
+ }
+
+ for (lane = 0; lane < MAX_BYTE_LANES; lane++) {
+ if (write_dqs_delay_stepping_done[lane])
+ continue;
+
+ /* Determine location and length of longest consecutive string of passing values
+ * Output is stored in best_pos and best_count
+ */
+ last_pos = 0;
+ cur_count = 0;
+ best_pos = 0;
+ best_count = 0;
+ for (iter = 0; iter < 64; iter++) {
+ if ((dqs_read_results_array[Receiver & 0x1][lane][iter]) && (iter < 63)) {
+ /* Pass */
+ cur_count++;
+ } else {
+ /* Failure or end of loop */
+ if (cur_count > best_count) {
+ best_count = cur_count;
+ best_pos = last_pos;
+ }
+ cur_count = 0;
+ last_pos = iter;
+ }
+ }
+
+ if (best_count > 2) {
+ /* Exit the DRAM Write Data Timing Loop after programming the Read DQS Timing Control
+ * register with the center of the passing window
+ */
+ current_read_dqs_delay[lane] = (best_pos + (best_count / 2));
+ passing_dqs_delay_found[lane] = 1;
+
+ /* Commit the current Read DQS Timing Control settings to the hardware registers */
+ write_dqs_read_data_timing_registers(current_read_dqs_delay, dev, (Receiver >> 1), index_reg);
+
+ /* Exit the DRAM Write Data Timing Loop */
+ write_dqs_delay_stepping_done[lane] = 1;
+
+ print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 142 largest passing region ", best_count, 4);
+ print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 143 largest passing region start ", best_pos, 4);
+ }
+
+ /* Increment the DQS Write Delay value if needed for the next DRAM Write Data Timing Loop iteration */
+ if (!write_dqs_delay_stepping_done[lane])
+ current_write_dqs_delay[lane]++;
+ }
}
- Errors |= pDCTstat->TrainErrors;
- }
- pDCTstat->DqsRdWrPos_Saved = 0;
- if (DQSWrDelay < dqsWrDelay_end) {
- Errors = 0;
+ /* Flag failure(s) if present */
+ for (lane = 0; lane < 8; lane++) {
+ if (!passing_dqs_delay_found[lane]) {
+ print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 121 Unable to find passing region for lane ", lane, 2);
+
+ /* Flag absence of passing window */
+ Errors |= 1 << SB_NODQSPOS;
+ }
+ }
+
+ /* Iterate over all possible Write Data Timing values (0x0 - 0x7f)
+ * Note that the Read DQS Timing Control was calibrated / centered in the prior nested loop
+ */
+ for (test_write_dqs_delay = 0; test_write_dqs_delay < 128; test_write_dqs_delay++) {
+ /* Initialize Write Data Timing settings for this iteration */
+ for (lane = 0; lane < MAX_BYTE_LANES; lane++)
+ current_write_dqs_delay[lane] = test_write_dqs_delay;
+
+ /* Commit the current Write Data Timing settings to the hardware registers */
+ write_dqs_write_data_timing_registers(current_write_dqs_delay, dev, (Receiver >> 1), index_reg);
+
+ /* Write the DRAM training pattern to the base test address */
+ WriteDQSTestPattern_D(pMCTstat, pDCTstat, TestAddr << 8);
+
+ /* Flush caches */
+ SetTargetWTIO_D(TestAddr);
+ FlushDQSTestPattern_D(pDCTstat, TestAddr << 8);
+ ResetTargetWTIO_D();
+
+ /* Read and compare pattern from the base test address */
+ bytelane_test_results = (CompareDQSTestPattern_D(pMCTstat, pDCTstat, TestAddr << 8) & 0xff); /* [Lane 7 :: Lane 0] 0=fail, 1=pass */
+
+ /* Store any lanes that passed testing for later use */
+ for (lane = 0; lane < 8; lane++)
+ dqs_write_results_array[Receiver & 0x1][lane][test_write_dqs_delay] = (!!(bytelane_test_results & (1 << lane)));
+ }
+
+ for (lane = 0; lane < 8; lane++) {
+ if ((!dual_rank) || (dual_rank && (Receiver & 0x1))) {
+
+#ifdef PRINT_PASS_FAIL_BITMAPS
+ for (iter = 0; iter < 64; iter++) {
+ if (dqs_read_results_array[0][lane][iter])
+ printk(BIOS_DEBUG, "+");
+ else
+ printk(BIOS_DEBUG, ".");
+ }
+ printk(BIOS_DEBUG, "\n");
+ for (iter = 0; iter < 64; iter++) {
+ if (dqs_read_results_array[1][lane][iter])
+ printk(BIOS_DEBUG, "+");
+ else
+ printk(BIOS_DEBUG, ".");
+ }
+ printk(BIOS_DEBUG, "\n\n");
+ for (iter = 0; iter < 128; iter++) {
+ if (dqs_write_results_array[0][lane][iter])
+ printk(BIOS_DEBUG, "+");
+ else
+ printk(BIOS_DEBUG, ".");
+ }
+ printk(BIOS_DEBUG, "\n");
+ for (iter = 0; iter < 128; iter++) {
+ if (dqs_write_results_array[1][lane][iter])
+ printk(BIOS_DEBUG, "+");
+ else
+ printk(BIOS_DEBUG, ".");
+ }
+ printk(BIOS_DEBUG, "\n\n");
+#endif
+
+ /* Base rank of single-rank DIMM, or odd rank of dual-rank DIMM */
+ if (dual_rank) {
+ /* Intersect the passing windows of both ranks */
+ for (iter = 0; iter < 64; iter++)
+ if (!dqs_read_results_array[1][lane][iter])
+ dqs_read_results_array[0][lane][iter] = 0;
+ for (iter = 0; iter < 128; iter++)
+ if (!dqs_write_results_array[1][lane][iter])
+ dqs_write_results_array[0][lane][iter] = 0;
+ }
+
+ /* Determine location and length of longest consecutive string of passing values for read DQS timing
+ * Output is stored in best_pos and best_count
+ */
+ last_pos = 0;
+ cur_count = 0;
+ best_pos = 0;
+ best_count = 0;
+ for (iter = 0; iter < 64; iter++) {
+ if ((dqs_read_results_array[0][lane][iter]) && (iter < 63)) {
+ /* Pass */
+ cur_count++;
+ } else {
+ /* Failure or end of loop */
+ if (cur_count > best_count) {
+ best_count = cur_count;
+ best_pos = last_pos;
+ }
+ cur_count = 0;
+ last_pos = iter;
+ }
+ }
+ print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 144 largest read passing region ", best_count, 4);
+ if (best_count > 0) {
+ if (best_count < MIN_DQS_WNDW) {
+ /* Flag excessively small passing window */
+ Errors |= 1 << SB_SMALLDQS;
+ }
+
+ /* Find the center of the passing window */
+ current_read_dqs_delay[lane] = (best_pos + (best_count / 2));
+
+ /* Commit the current Read DQS Timing Control settings to the hardware registers */
+ write_dqs_read_data_timing_registers(current_read_dqs_delay, dev, (Receiver >> 1), index_reg);
+
+ /* Save the final Read DQS Timing Control settings for later use */
+ pDCTstat->CH_D_DIR_B_DQS[Channel][Receiver >> 1][DQS_READDIR][lane] = current_read_dqs_delay[lane];
+ } else {
+ print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 122 Unable to find read passing region for lane ", lane, 2);
+
+ /* Flag absence of passing window */
+ Errors |= 1 << SB_NODQSPOS;
+ }
+
+ /* Determine location and length of longest consecutive string of passing values for write DQS timing
+ * Output is stored in best_pos and best_count
+ */
+ last_pos = 0;
+ cur_count = 0;
+ best_pos = 0;
+ best_count = 0;
+ for (iter = 0; iter < 128; iter++) {
+ if ((dqs_write_results_array[0][lane][iter]) && (iter < 127)) {
+ /* Pass */
+ cur_count++;
+ } else {
+ /* Failure or end of loop */
+ if (cur_count > best_count) {
+ best_count = cur_count;
+ best_pos = last_pos;
+ }
+ cur_count = 0;
+ last_pos = iter;
+ }
+ }
+ print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 145 largest write passing region ", best_count, 4);
+ if (best_count > 0) {
+ if (best_count < MIN_DQS_WNDW) {
+ /* Flag excessively small passing window */
+ Errors |= 1 << SB_SMALLDQS;
+ }
+
+ /* Find the center of the passing window */
+ current_write_dqs_delay[lane] = (best_pos + (best_count / 2));
+
+ /* Commit the current Write Data Timing settings to the hardware registers */
+ write_dqs_write_data_timing_registers(current_write_dqs_delay, dev, (Receiver >> 1), index_reg);
+
+ /* Save the final Write Data Timing settings for later use */
+ pDCTstat->CH_D_DIR_B_DQS[Channel][Receiver >> 1][DQS_WRITEDIR][lane] = current_write_dqs_delay[lane];
+ } else {
+ print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 123 Unable to find write passing region for lane ", lane, 2);
+
+ /* Flag absence of passing window */
+ Errors |= 1 << SB_NODQSPOS;
+ }
+ }
+ }
- print_debug_dqs("\tTrainDQSRdWrPos: 231 DQSWrDelay ", DQSWrDelay, 1);
- TrainWriteDQS_D(pMCTstat, pDCTstat, cs_start);
}
- print_debug_dqs("\tTrainDQSRdWrPos: 232 Errors ", Errors, 1);
- pDCTstat->ErrStatus |= Errors;
}
+ pDCTstat->TrainErrors |= Errors;
+ pDCTstat->ErrStatus |= Errors;
+
#if DQS_TRAIN_DEBUG > 0
{
u8 val;
u8 i;
- u8 Channel, Receiver, Dir;
+ u8 ChannelDTD, ReceiverDTD, Dir;
u8 *p;
for (Dir = 0; Dir < 2; Dir++) {
@@ -375,14 +749,14 @@ static void TrainDQSRdWrPos_D(struct MCTStatStruc *pMCTstat,
} else {
printk(BIOS_DEBUG, "TrainDQSRdWrPos: CH_D_DIR_B_DQS RD:\n");
}
- for (Channel = 0; Channel < 2; Channel++) {
- printk(BIOS_DEBUG, "Channel: %02x\n", Channel);
- for (Receiver = cs_start; Receiver < (cs_start + 2); Receiver += 2) {
- printk(BIOS_DEBUG, "\t\tReceiver: %02x: ", Receiver);
- p = pDCTstat->CH_D_DIR_B_DQS[Channel][Receiver >> 1][Dir];
+ for (ChannelDTD = 0; ChannelDTD < 2; ChannelDTD++) {
+ printk(BIOS_DEBUG, "Channel: %02x\n", ChannelDTD);
+ for (ReceiverDTD = 0; ReceiverDTD < MAX_CS_SUPPORTED; ReceiverDTD += 2) {
+ printk(BIOS_DEBUG, "\t\tReceiver: %02x:", ReceiverDTD);
+ p = pDCTstat->CH_D_DIR_B_DQS[ChannelDTD][ReceiverDTD >> 1][Dir];
for (i=0;i<8; i++) {
val = p[i];
- printk(BIOS_DEBUG, "%02x ", val);
+ printk(BIOS_DEBUG, " %02x", val);
}
printk(BIOS_DEBUG, "\n");
}
@@ -437,225 +811,6 @@ static void SetupDqsPattern_D(struct MCTStatStruc *pMCTstat,
pDCTstat->PtrPatternBufA = (u32)buf;
}
-static void TrainDQSPos_D(struct MCTStatStruc *pMCTstat,
- struct DCTStatStruc *pDCTstat,
- u8 cs_start)
-{
- u32 Errors;
- u8 ChipSel, DQSDelay;
- u8 RnkDlySeqPassMin=0, RnkDlySeqPassMax=0xFF, RnkDlyFilterMin=0, RnkDlyFilterMax=0xFF;
- u8 RnkDlySeqPassMinTot=0, RnkDlySeqPassMaxTot=0xFF, RnkDlyFilterMinTot=0, RnkDlyFilterMaxTot=0xFF;
- u8 LastTest ,LastTestTot;
- u32 TestAddr;
- u8 ByteLane;
- u8 MutualCSPassW[128];
- u8 BanksPresent;
- u8 dqsDelay_end;
- u8 tmp, valid, tmp1;
- u16 word;
-
- /* MutualCSPassW: each byte represents a bitmap of pass/fail per
- * ByteLane. The indext within MutualCSPassW is the delay value
- * given the results.
- */
- print_debug_dqs("\t\t\tTrainDQSPos begin ", 0, 3);
-
- Errors = 0;
- BanksPresent = 0;
-
- dqsDelay_end = 32;
- /* Bitmapped status per delay setting, 0xff=All positions
- * passing (1= PASS). Set the entire array.
- */
- for (DQSDelay=0; DQSDelay<128; DQSDelay++) {
- MutualCSPassW[DQSDelay] = 0xFF;
- }
-
- for (ChipSel = cs_start; ChipSel < (cs_start + 2); ChipSel++) { /* logical register chipselects 0..7 */
- print_debug_dqs("\t\t\t\tTrainDQSPos: 11 ChipSel ", ChipSel, 4);
-
- if (!mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, pDCTstat->Channel, ChipSel)) {
- print_debug_dqs("\t\t\t\tmct_RcvrRankEnabled_D CS not enabled ", ChipSel, 4);
- continue;
- }
-
- BanksPresent = 1; /* flag for at least one bank is present */
- TestAddr = mct_GetMCTSysAddr_D(pMCTstat, pDCTstat, pDCTstat->Channel, ChipSel, &valid);
- if (!valid) {
- print_debug_dqs("\t\t\t\tAddress not supported on current CS ", TestAddr, 4);
- continue;
- }
-
- print_debug_dqs("\t\t\t\tTrainDQSPos: 12 TestAddr ", TestAddr, 4);
- SetUpperFSbase(TestAddr); /* fs:eax=far ptr to target */
-
- if (pDCTstat->Direction == DQS_READDIR) {
- print_debug_dqs("\t\t\t\tTrainDQSPos: 13 for read ", 0, 4);
- WriteDQSTestPattern_D(pMCTstat, pDCTstat, TestAddr << 8);
- }
-
- for (DQSDelay = 0; DQSDelay < dqsDelay_end; DQSDelay++) {
- print_debug_dqs("\t\t\t\t\tTrainDQSPos: 141 DQSDelay ", DQSDelay, 5);
-
- tmp = 0xFF;
- tmp1 = DQSDelay;
- if (pDCTstat->Direction == DQS_READDIR) {
- tmp &= MutualCSPassW[DQSDelay];
- tmp1 += dqsDelay_end;
- }
- tmp &= MutualCSPassW[tmp1];
-
- if (tmp == 0) {
- continue;/* skip current delay value if other chipselects have failed all 8 bytelanes */
- }
-
- pDCTstat->DQSDelay = DQSDelay;
- mct_SetDQSDelayAllCSR_D(pMCTstat, pDCTstat, cs_start);
- print_debug_dqs("\t\t\t\t\tTrainDQSPos: 142 MutualCSPassW ", MutualCSPassW[DQSDelay], 5);
-
- if (pDCTstat->Direction == DQS_WRITEDIR) {
- print_debug_dqs("\t\t\t\t\tTrainDQSPos: 143 for write", 0, 5);
- WriteDQSTestPattern_D(pMCTstat, pDCTstat, TestAddr << 8);
- }
-
- print_debug_dqs("\t\t\t\t\tTrainDQSPos: 144 Pattern ", pDCTstat->Pattern, 5);
- ReadDQSTestPattern_D(pMCTstat, pDCTstat, TestAddr << 8);
- /* print_debug_dqs("\t\t\t\t\tTrainDQSPos: 145 MutualCSPassW ", MutualCSPassW[DQSDelay], 5); */
- word = CompareDQSTestPattern_D(pMCTstat, pDCTstat, TestAddr << 8); /* 0=fail, 1=pass */
- print_debug_dqs("\t\t\t\t\tTrainDQSPos: 144 compare 1 ", word, 3);
-
- print_debug_dqs("\t\t\t\t\tTrainDQSPos: 144 DqsRdWrPos_Saved ", pDCTstat->DqsRdWrPos_Saved, 3);
- word &= ~(pDCTstat->DqsRdWrPos_Saved); /* mask out bytelanes that already passed */
- word &= ~(pDCTstat->DqsRdWrPos_Saved << 8);
- print_debug_dqs("\t\t\t\t\tTrainDQSPos: 144 compare 2 ", word, 3);
-
- tmp = DQSDelay;
- if (pDCTstat->Direction == DQS_READDIR) {
- MutualCSPassW[tmp] &= word >> 8;
- tmp += dqsDelay_end;
- }
- MutualCSPassW[tmp] &= word & 0xFF;
-
- print_debug_dqs("\t\t\t\t\tTrainDQSPos: 146 \tMutualCSPassW ", MutualCSPassW[DQSDelay], 5);
-
- SetTargetWTIO_D(TestAddr);
- FlushDQSTestPattern_D(pDCTstat, TestAddr << 8);
- ResetTargetWTIO_D();
- }
-
- }
-
- if (pDCTstat->Direction == DQS_READDIR) {
- dqsDelay_end <<= 1;
- }
-
- if (BanksPresent) {
- #if 0 /* show the bitmap */
- for (ByteLane = 0; ByteLane < 8; ByteLane++) { /* just print ByteLane 0 */
- for (DQSDelay = 0; DQSDelay < dqsDelay_end; DQSDelay++) {
- if (!(MutualCSPassW[DQSDelay] &(1 << ByteLane))) {
- printk(BIOS_DEBUG, ".");
- } else {
- printk(BIOS_DEBUG, "*");
- }
- }
- printk(BIOS_DEBUG, "\n");
- }
- #endif
- for (ByteLane = 0; ByteLane < 8; ByteLane++) {
- print_debug_dqs("\t\t\t\tTrainDQSPos: 31 ByteLane ",ByteLane, 4);
- if (!(pDCTstat->DqsRdWrPos_Saved &(1 << ByteLane))) {
- pDCTstat->ByteLane = ByteLane;
- LastTest = DQS_FAIL; /* Analyze the results */
- LastTestTot = DQS_FAIL;
- /* RnkDlySeqPassMin = 0; */
- /* RnkDlySeqPassMax = 0; */
- RnkDlyFilterMax = 0;
- RnkDlyFilterMin = 0;
- RnkDlyFilterMaxTot = 0;
- RnkDlyFilterMinTot = 0;
- for (DQSDelay = 0; DQSDelay < dqsDelay_end; DQSDelay++) {
- if (MutualCSPassW[DQSDelay] & (1 << ByteLane)) {
- print_debug_dqs("\t\t\t\t\tTrainDQSPos: 321 DQSDelay ", DQSDelay, 5);
- print_debug_dqs("\t\t\t\t\tTrainDQSPos: 322 MutualCSPassW ", MutualCSPassW[DQSDelay], 5);
- if (pDCTstat->Direction == DQS_READDIR)
- tmp = 0x20;
- else
- tmp = 0;
- if (DQSDelay >= tmp) {
- RnkDlySeqPassMax = DQSDelay;
- if (LastTest == DQS_FAIL) {
- RnkDlySeqPassMin = DQSDelay; /* start sequential run */
- }
- if ((RnkDlySeqPassMax - RnkDlySeqPassMin)>(RnkDlyFilterMax-RnkDlyFilterMin)){
- RnkDlyFilterMin = RnkDlySeqPassMin;
- RnkDlyFilterMax = RnkDlySeqPassMax;
- }
- LastTest = DQS_PASS;
- }
-
- if (pDCTstat->Direction == DQS_READDIR) {
- RnkDlySeqPassMaxTot = DQSDelay;
- if (LastTestTot == DQS_FAIL)
- RnkDlySeqPassMinTot = DQSDelay;
- if ((RnkDlySeqPassMaxTot - RnkDlySeqPassMinTot)>(RnkDlyFilterMaxTot-RnkDlyFilterMinTot)){
- RnkDlyFilterMinTot = RnkDlySeqPassMinTot;
- RnkDlyFilterMaxTot = RnkDlySeqPassMaxTot;
- }
- LastTestTot = DQS_PASS;
- }
- } else {
- LastTest = DQS_FAIL;
- LastTestTot = DQS_FAIL;
- }
- }
- print_debug_dqs("\t\t\t\tTrainDQSPos: 33 RnkDlySeqPassMax ", RnkDlySeqPassMax, 4);
- if (RnkDlySeqPassMax == 0) {
- Errors |= 1 << SB_NODQSPOS; /* no passing window */
- } else {
- print_debug_dqs_pair("\t\t\t\tTrainDQSPos: 34 RnkDlyFilter: ", RnkDlyFilterMin, " ", RnkDlyFilterMax, 4);
- if (((RnkDlyFilterMax - RnkDlyFilterMin) < MIN_DQS_WNDW)){
- Errors |= 1 << SB_SMALLDQS;
- } else {
- u8 middle_dqs;
- /* mctEngDQSwindow_Save_D Not required for arrays */
- if (pDCTstat->Direction == DQS_READDIR)
- middle_dqs = MiddleDQS_D(RnkDlyFilterMinTot, RnkDlyFilterMaxTot);
- else
- middle_dqs = MiddleDQS_D(RnkDlyFilterMin, RnkDlyFilterMax);
- pDCTstat->DQSDelay = middle_dqs;
- mct_SetDQSDelayCSR_D(pMCTstat, pDCTstat, cs_start); /* load the register with the value */
- if (pDCTstat->Direction == DQS_READDIR)
- StoreWrRdDQSDatStrucVal_D(pMCTstat, pDCTstat, cs_start, RnkDlyFilterMinTot, RnkDlyFilterMaxTot); /* store the value into the data structure */
- else
- StoreWrRdDQSDatStrucVal_D(pMCTstat, pDCTstat, cs_start, RnkDlyFilterMin, RnkDlyFilterMax); /* store the value into the data structure */
- print_debug_dqs("\t\t\t\tTrainDQSPos: 42 middle_dqs : ",middle_dqs, 4);
- pDCTstat->DqsRdWrPos_Saved |= 1 << ByteLane;
- }
- }
- }
- } /* if (pDCTstat->DqsRdWrPos_Saved &(1 << ByteLane)) */
- }
-/* skipLocMiddle: */
- pDCTstat->TrainErrors = Errors;
-
- print_debug_dqs("\t\t\tTrainDQSPos: Errors ", Errors, 3);
-}
-
-static void mctEngDQSwindow_Save_D(struct MCTStatStruc *pMCTstat,
- struct DCTStatStruc *pDCTstat, u8 ChipSel,
- u8 RnkDlyFilterMin, u8 RnkDlyFilterMax)
-{
- pDCTstat->CH_D_DIR_MaxMin_B_Dly[pDCTstat->Channel]
- [pDCTstat->Direction]
- [0]
- [pDCTstat->ByteLane] = RnkDlyFilterMin;
- pDCTstat->CH_D_DIR_MaxMin_B_Dly[pDCTstat->Channel]
- [pDCTstat->Direction]
- [1]
- [pDCTstat->ByteLane] = RnkDlyFilterMax;
-}
-
static void StoreDQSDatStrucVal_D(struct MCTStatStruc *pMCTstat,
struct DCTStatStruc *pDCTstat, u8 ChipSel)
{
@@ -679,26 +834,6 @@ static void StoreDQSDatStrucVal_D(struct MCTStatStruc *pMCTstat,
pDCTstat->DQSDelay;
}
-static void StoreWrRdDQSDatStrucVal_D(struct MCTStatStruc *pMCTstat,
- struct DCTStatStruc *pDCTstat, u8 ChipSel,
- u8 RnkDlyFilterMin, u8 RnkDlyFilterMax)
-{
- u8 dn;
-
- if (pDCTstat->Direction == DQS_WRITEDIR) {
- dn = ChipSel >> 1;
- RnkDlyFilterMin += pDCTstat->CH_D_B_TxDqs[pDCTstat->Channel][dn][pDCTstat->ByteLane];
- RnkDlyFilterMax += pDCTstat->CH_D_B_TxDqs[pDCTstat->Channel][dn][pDCTstat->ByteLane];
- pDCTstat->DQSDelay += pDCTstat->CH_D_B_TxDqs[pDCTstat->Channel][dn][pDCTstat->ByteLane];
- } else {
- RnkDlyFilterMin <<= 1;
- RnkDlyFilterMax <<= 1;
- pDCTstat->DQSDelay <<= 1;
- }
- mctEngDQSwindow_Save_D(pMCTstat, pDCTstat, ChipSel, RnkDlyFilterMin, RnkDlyFilterMax);
- StoreDQSDatStrucVal_D(pMCTstat, pDCTstat, ChipSel);
-}
-
static void GetDQSDatStrucVal_D(struct MCTStatStruc *pMCTstat,
struct DCTStatStruc *pDCTstat, u8 ChipSel)
{
@@ -720,33 +855,6 @@ static void GetDQSDatStrucVal_D(struct MCTStatStruc *pMCTstat,
/* FindDQSDatDimmVal_D is not required since we use an array */
-static u8 MiddleDQS_D(u8 min, u8 max)
-{
- u8 size;
- size = max-min;
- if (size % 2)
- size++; /* round up if the size isn't even. */
- return ( min + (size >> 1));
-}
-
-static void TrainReadDQS_D(struct MCTStatStruc *pMCTstat,
- struct DCTStatStruc *pDCTstat,
- u8 cs_start)
-{
- print_debug_dqs("\t\tTrainReadPos ", 0, 2);
- pDCTstat->Direction = DQS_READDIR;
- TrainDQSPos_D(pMCTstat, pDCTstat, cs_start);
-}
-
-static void TrainWriteDQS_D(struct MCTStatStruc *pMCTstat,
- struct DCTStatStruc *pDCTstat,
- u8 cs_start)
-{
- pDCTstat->Direction = DQS_WRITEDIR;
- print_debug_dqs("\t\tTrainWritePos", 0, 2);
- TrainDQSPos_D(pMCTstat, pDCTstat, cs_start);
-}
-
static void proc_IOCLFLUSH_D(u32 addr_hi)
{
SetTargetWTIO_D(addr_hi);
@@ -963,30 +1071,6 @@ static void ResetTargetWTIO_D(void)
_WRMSR(0xc0010017, lo, hi); /* IORR0 Mask */
}
-static void ReadDQSTestPattern_D(struct MCTStatStruc *pMCTstat,
- struct DCTStatStruc *pDCTstat,
- u32 TestAddr_lo)
-{
- /* Read a pattern of 72 bit times (per DQ), to test dram functionality.
- * The pattern is a stress pattern which exercises both ISI and
- * crosstalk. The number of cache lines to fill is dependent on DCT
- * width mode and burstlength.
- * Mode BL Lines Pattern no.
- * ----+---+-------------------
- * 64 4 9 0
- * 64 8 9 0
- * 64M 4 9 0
- * 64M 8 9 0
- * 128 4 18 1
- * 128 8 N/A -
- */
- if (pDCTstat->Pattern == 0)
- ReadL9TestPattern(TestAddr_lo);
- else
- ReadL18TestPattern(TestAddr_lo);
- _MFENCE;
-}
-
u32 SetUpperFSbase(u32 addr_hi)
{
/* Set the upper 32-bits of the Base address, 4GB aligned) for the
@@ -1009,8 +1093,6 @@ void ResetDCTWrPtr_D(u32 dev, u32 index_reg, u32 index)
Set_NB32_index_wait(dev, index_reg, index, val);
}
-/* mctEngDQSwindow_Save_D not required with arrays */
-
void mct_TrainDQSPos_D(struct MCTStatStruc *pMCTstat,
struct DCTStatStruc *pDCTstatA)
{
@@ -1021,8 +1103,8 @@ void mct_TrainDQSPos_D(struct MCTStatStruc *pMCTstat,
for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) {
pDCTstat = pDCTstatA + Node;
if (pDCTstat->DCTSysLimit) {
+ TrainDQSRdWrPos_D(pMCTstat, pDCTstat);
for (ChipSel = 0; ChipSel < MAX_CS_SUPPORTED; ChipSel += 2) {
- TrainDQSRdWrPos_D(pMCTstat, pDCTstat, ChipSel);
SetEccDQSRdWrPos_D(pMCTstat, pDCTstat, ChipSel);
}
}
@@ -1137,27 +1219,6 @@ static void mct_SetDQSDelayCSR_D(struct MCTStatStruc *pMCTstat,
}
}
-/*
- * mct_SetDQSDelayAllCSR_D:
- * Write the Delay value to all eight byte lanes.
- */
-static void mct_SetDQSDelayAllCSR_D(struct MCTStatStruc *pMCTstat,
- struct DCTStatStruc *pDCTstat,
- u8 cs_start)
-{
- u8 ByteLane;
- u8 ChipSel = cs_start;
-
- for (ChipSel = cs_start; ChipSel < (cs_start + 2); ChipSel++) {
- if ( mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, pDCTstat->Channel, ChipSel)) {
- for (ByteLane = 0; ByteLane < 8; ByteLane++) {
- pDCTstat->ByteLane = ByteLane;
- mct_SetDQSDelayCSR_D(pMCTstat, pDCTstat, ChipSel);
- }
- }
- }
-}
-
u8 mct_RcvrRankEnabled_D(struct MCTStatStruc *pMCTstat,
struct DCTStatStruc *pDCTstat,
u8 Channel, u8 ChipSel)
@@ -1196,7 +1257,7 @@ u32 mct_GetMCTSysAddr_D(struct MCTStatStruc *pMCTstat,
reg = 0x40 + (receiver << 2) + reg_off;
val = Get_NB32(dev, reg);
- val &= ~0x0F;
+ val &= ~0xe007c01f;
/* unganged mode DCT0+DCT1, sys addr of DCT1=node
* base+DctSelBaseAddr+local ca base*/
@@ -1277,6 +1338,7 @@ exitGetAddrWNoError:
print_debug_dqs("mct_GetMCTSysAddr_D: base_addr ", val, 2);
print_debug_dqs("mct_GetMCTSysAddr_D: valid ", *valid, 2);
print_debug_dqs("mct_GetMCTSysAddr_D: status ", pDCTstat->Status, 2);
+ print_debug_dqs("mct_GetMCTSysAddr_D: SysBase ", pDCTstat->DCTSysBase, 2);
print_debug_dqs("mct_GetMCTSysAddr_D: HoleBase ", pDCTstat->DCTHoleBase, 2);
print_debug_dqs("mct_GetMCTSysAddr_D: Cachetop ", pMCTstat->Sub4GCacheTop, 2);
diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mcthwl.c b/src/northbridge/amd/amdmct/mct_ddr3/mcthwl.c
index 528c782394..60bc01d5fa 100644
--- a/src/northbridge/amd/amdmct/mct_ddr3/mcthwl.c
+++ b/src/northbridge/amd/amdmct/mct_ddr3/mcthwl.c
@@ -2,6 +2,7 @@
* This file is part of the coreboot project.
*
* Copyright (C) 2010 Advanced Micro Devices, Inc.
+ * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -25,7 +26,6 @@ static void EnableZQcalibration(struct MCTStatStruc *pMCTstat, struct DCTStatStr
static void DisableZQcalibration(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat);
static void PrepareC_MCT(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat);
static void PrepareC_DCT(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 dct);
-static void MultiplyDelay(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 dct);
static void Restore_OnDimmMirror(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat);
static void Clear_OnDimmMirror(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat);
@@ -154,7 +154,6 @@ static void PhyWLPass2(struct MCTStatStruc *pMCTstat,
Clear_OnDimmMirror(pMCTstat, pDCTstat);
SetDllSpeedUp_D(pMCTstat, pDCTstat, dct);
DisableAutoRefresh_D(pMCTstat, pDCTstat);
- MultiplyDelay(pMCTstat, pDCTstat, dct);
for (dimm = 0; dimm < MAX_DIMMS_SUPPORTED; dimm ++) {
if (DIMMValid & (1 << (dimm << 1)))
AgesaHwWlPhase1(pDCTstat->C_MCTPtr, pDCTstat->C_DCTPtr[dct], dimm, SecondPass);
@@ -162,6 +161,9 @@ static void PhyWLPass2(struct MCTStatStruc *pMCTstat,
}
}
+/* Write Levelization Training
+ * Algorithm detailed in the Fam10h BKDG Rev. 3.62 section 2.8.9.9.1
+ */
static void WriteLevelization_HW(struct MCTStatStruc *pMCTstat,
struct DCTStatStruc *pDCTstat)
{
diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctmtr_d.c b/src/northbridge/amd/amdmct/mct_ddr3/mctmtr_d.c
index 3d625dec78..6dac0aeb15 100644
--- a/src/northbridge/amd/amdmct/mct_ddr3/mctmtr_d.c
+++ b/src/northbridge/amd/amdmct/mct_ddr3/mctmtr_d.c
@@ -2,6 +2,7 @@
* This file is part of the coreboot project.
*
* Copyright (C) 2010 Advanced Micro Devices, Inc.
+ * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -201,12 +202,13 @@ static void SetMTRRrange_D(u32 Base, u32 *pLimit, u32 *pMtrrAddr, u16 MtrrType)
void UMAMemTyping_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA)
{
-/* UMA memory size may need splitting the MTRR configuration into two
- Before training use NB_BottomIO or the physical memory size to set the MTRRs.
- After training, add UMAMemTyping function to reconfigure the MTRRs based on
- NV_BottomUMA (for UMA systems only).
- This two-step process allows all memory to be cached for training
-*/
+ /* UMA memory size may need splitting the MTRR configuration into two
+ * Before training use NB_BottomIO or the physical memory size to set the MTRRs.
+ * After training, add UMAMemTyping function to reconfigure the MTRRs based on
+ * NV_BottomUMA (for UMA systems only).
+ * This two-step process allows all memory to be cached for training
+ */
+
u32 Bottom32bIO, Cache32bTOP;
u32 val;
u32 addr;
diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctndi_d.c b/src/northbridge/amd/amdmct/mct_ddr3/mctndi_d.c
index 013a1b9e86..6f9706132b 100644
--- a/src/northbridge/amd/amdmct/mct_ddr3/mctndi_d.c
+++ b/src/northbridge/amd/amdmct/mct_ddr3/mctndi_d.c
@@ -2,6 +2,7 @@
* This file is part of the coreboot project.
*
* Copyright (C) 2010 Advanced Micro Devices, Inc.
+ * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -140,7 +141,7 @@ void InterleaveNodes_D(struct MCTStatStruc *pMCTstat,
}
if (DoIntlv) {
- MCTMemClr_D(pMCTstat,pDCTstatA);
+ MCTMemClr_D(pMCTstat, pDCTstatA);
/* Program Interleaving enabled on Node 0 map only.*/
MemSize0 <<= bsf(Nodes); /* MemSize=MemSize*2 (or 4, or 8) */
Dct0MemSize <<= bsf(Nodes);
diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctproc.c b/src/northbridge/amd/amdmct/mct_ddr3/mctproc.c
index da2f3724b9..37e125e972 100644
--- a/src/northbridge/amd/amdmct/mct_ddr3/mctproc.c
+++ b/src/northbridge/amd/amdmct/mct_ddr3/mctproc.c
@@ -2,6 +2,7 @@
* This file is part of the coreboot project.
*
* Copyright (C) 2010 Advanced Micro Devices, Inc.
+ * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -36,10 +37,10 @@ u32 mct_SetDramConfigMisc2(struct DCTStatStruc *pDCTstat, u8 dct, u32 misc2)
val = Get_NB32(pDCTstat->dev_dct, dct * 0x100 + 0x78);
val &= 7;
- val = ((~val) & 0xFF) + 1;
+ val = ((~val) & 0xff) + 1;
val += 6;
- val &= 0xFF;
- misc2 &= 0xFFF8FFFF;
+ val &= 0x7;
+ misc2 &= 0xfff8ffff;
misc2 |= val << 16; /* DataTxFifoWrDly */
if (pDCTstat->LogicalCPUID & AMD_DR_Dx)
misc2 |= 1 << 7; /* ProgOdtEn */
@@ -52,11 +53,13 @@ void mct_ExtMCTConfig_Cx(struct DCTStatStruc *pDCTstat)
u32 val;
if (pDCTstat->LogicalCPUID & (AMD_DR_Cx)) {
- Set_NB32(pDCTstat->dev_dct, 0x11C, 0x0CE00FC0 | 1 << 29/* FlushWrOnStpGnt */);
+ /* Revision C */
+ Set_NB32(pDCTstat->dev_dct, 0x11c, 0x0ce00fc0 | 1 << 29/* FlushWrOnStpGnt */);
- val = Get_NB32(pDCTstat->dev_dct, 0x1B0);
- val &= 0xFFFFF8C0;
+ val = Get_NB32(pDCTstat->dev_dct, 0x1b0);
+ val &= ~0x73f;
val |= 0x101; /* BKDG recommended settings */
- Set_NB32(pDCTstat->dev_dct, 0x1B0, val);
+
+ Set_NB32(pDCTstat->dev_dct, 0x1b0, val);
}
}
diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctsdi.c b/src/northbridge/amd/amdmct/mct_ddr3/mctsdi.c
index 6de2f4eee1..b21b96a641 100644
--- a/src/northbridge/amd/amdmct/mct_ddr3/mctsdi.c
+++ b/src/northbridge/amd/amdmct/mct_ddr3/mctsdi.c
@@ -2,6 +2,7 @@
* This file is part of the coreboot project.
*
* Copyright (C) 2010 Advanced Micro Devices, Inc.
+ * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -172,6 +173,7 @@ static u32 mct_MR1(struct MCTStatStruc *pMCTstat,
ret |= 1 << 11;
}
+ /* program MrsAddress[12]=QOFF: based on F2x[1,0]84[Qoff] */
if (dword & (1 << 13))
ret |= 1 << 12;
@@ -199,7 +201,8 @@ static u32 mct_MR0(struct MCTStatStruc *pMCTstat,
/* program MrsAddress[6:4,2]=read CAS latency
(CL):based on F2x[1,0]88[Tcl] */
dword2 = Get_NB32(dev, reg_off + 0x88);
- ret |= (dword2 & 0xF) << 4; /* F2x88[3:0] to MrsAddress[6:4,2]=xxx0b */
+ ret |= (dword2 & 0x7) << 4; /* F2x88[2:0] to MrsAddress[6:4] */
+ ret |= ((dword2 & 0x8) >> 3) << 2; /* F2x88[3] to MrsAddress[2] */
/* program MrsAddress[12]=0 (PPD):slow exit */
if (dword & (1 << 23))
diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctsrc.c b/src/northbridge/amd/amdmct/mct_ddr3/mctsrc.c
index 8e5c268586..91e8f777c3 100644
--- a/src/northbridge/amd/amdmct/mct_ddr3/mctsrc.c
+++ b/src/northbridge/amd/amdmct/mct_ddr3/mctsrc.c
@@ -2,6 +2,7 @@
* This file is part of the coreboot project.
*
* Copyright (C) 2010 Advanced Micro Devices, Inc.
+ * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -24,25 +25,13 @@
static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat,
struct DCTStatStruc *pDCTstat, u8 Pass);
-static u8 mct_SavePassRcvEnDly_D(struct DCTStatStruc *pDCTstat,
- u8 rcvrEnDly, u8 Channel,
- u8 receiver, u8 Pass);
-static u8 mct_CompareTestPatternQW0_D(struct MCTStatStruc *pMCTstat,
- struct DCTStatStruc *pDCTstat,
- u32 addr, u8 channel,
- u8 pattern, u8 Pass);
static void mct_InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat,
struct DCTStatStruc *pDCTstat);
static void InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat,
struct DCTStatStruc *pDCTstat, u8 Channel);
static void CalcEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat,
struct DCTStatStruc *pDCTstat, u8 Channel);
-static void mct_SetFinalRcvrEnDly_D(struct DCTStatStruc *pDCTstat,
- u8 RcvrEnDly, u8 where,
- u8 Channel, u8 Receiver,
- u32 dev, u32 index_reg,
- u8 Addl_Index, u8 Pass);
-static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u8 DQSRcvEnDly);
+static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u16 DQSRcvEnDly);
static void fenceDynTraining_D(struct MCTStatStruc *pMCTstat,
struct DCTStatStruc *pDCTstat, u8 dct);
static void mct_DisableDQSRcvEn_D(struct DCTStatStruc *pDCTstat);
@@ -50,17 +39,17 @@ static void mct_DisableDQSRcvEn_D(struct DCTStatStruc *pDCTstat);
/* Warning: These must be located so they do not cross a logical 16-bit
segment boundary! */
static const u32 TestPattern0_D[] = {
- 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
- 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
- 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
- 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
-};
-static const u32 TestPattern1_D[] = {
0x55555555, 0x55555555, 0x55555555, 0x55555555,
0x55555555, 0x55555555, 0x55555555, 0x55555555,
0x55555555, 0x55555555, 0x55555555, 0x55555555,
0x55555555, 0x55555555, 0x55555555, 0x55555555,
};
+static const u32 TestPattern1_D[] = {
+ 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
+ 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
+ 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
+ 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
+};
static const u32 TestPattern2_D[] = {
0x12345678, 0x87654321, 0x23456789, 0x98765432,
0x59385824, 0x30496724, 0x24490795, 0x99938733,
@@ -104,16 +93,87 @@ void mct_TrainRcvrEn_D(struct MCTStatStruc *pMCTstat,
dqsTrainRcvrEn_SW(pMCTstat, pDCTstat, Pass);
}
+static void read_dqs_write_timing_control_registers(uint16_t* current_total_delay, uint32_t dev, uint8_t dimm, uint32_t index_reg)
+{
+ uint8_t lane;
+ uint32_t dword;
+
+ for (lane = 0; lane < MAX_BYTE_LANES; lane++) {
+ uint32_t wdt_reg;
+ if ((lane == 0) || (lane == 1))
+ wdt_reg = 0x30;
+ if ((lane == 2) || (lane == 3))
+ wdt_reg = 0x31;
+ if ((lane == 4) || (lane == 5))
+ wdt_reg = 0x40;
+ if ((lane == 6) || (lane == 7))
+ wdt_reg = 0x41;
+ if (lane == 8)
+ wdt_reg = 0x32;
+ wdt_reg += dimm * 3;
+ dword = Get_NB32_index_wait(dev, index_reg, wdt_reg);
+ if ((lane == 7) || (lane == 5) || (lane == 3) || (lane == 1))
+ current_total_delay[lane] = (dword & 0x00ff0000) >> 16;
+ if ((lane == 8) || (lane == 6) || (lane == 4) || (lane == 2) || (lane == 0))
+ current_total_delay[lane] = dword & 0x000000ff;
+ }
+}
+
+static void write_dqs_receiver_enable_control_registers(uint16_t* current_total_delay, uint32_t dev, uint8_t dimm, uint32_t index_reg)
+{
+ uint8_t lane;
+ uint32_t dword;
+
+ for (lane = 0; lane < 8; lane++) {
+ uint32_t ret_reg;
+ if ((lane == 0) || (lane == 1))
+ ret_reg = 0x10;
+ if ((lane == 2) || (lane == 3))
+ ret_reg = 0x11;
+ if ((lane == 4) || (lane == 5))
+ ret_reg = 0x20;
+ if ((lane == 6) || (lane == 7))
+ ret_reg = 0x21;
+ ret_reg += dimm * 3;
+ dword = Get_NB32_index_wait(dev, index_reg, ret_reg);
+ if ((lane == 7) || (lane == 5) || (lane == 3) || (lane == 1)) {
+ dword &= ~(0x1ff << 16);
+ dword |= (current_total_delay[lane] & 0x1ff) << 16;
+ }
+ if ((lane == 6) || (lane == 4) || (lane == 2) || (lane == 0)) {
+ dword &= ~0x1ff;
+ dword |= current_total_delay[lane] & 0x1ff;
+ }
+ Set_NB32_index_wait(dev, index_reg, ret_reg, dword);
+ }
+}
+
+static uint32_t convert_testaddr_and_channel_to_address(struct DCTStatStruc *pDCTstat, uint32_t testaddr, uint8_t channel)
+{
+ SetUpperFSbase(testaddr);
+ testaddr <<= 8;
+
+ if((pDCTstat->Status & (1<<SB_128bitmode)) && channel ) {
+ testaddr += 8; /* second channel */
+ }
+
+ return testaddr;
+}
+
+/* DQS Receiver Enable Training
+ * Algorithm detailed in the Fam10h BKDG Rev. 3.62 section 2.8.9.9.2
+ */
static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat,
struct DCTStatStruc *pDCTstat, u8 Pass)
{
- u8 Channel, RcvrEnDly, RcvrEnDlyRmin;
- u8 Test0, Test1, CurrTest, CurrTestSide0, CurrTestSide1;
- u8 CTLRMaxDelay, _2Ranks, PatternA, PatternB;
+ u8 Channel;
+ u8 _2Ranks;
u8 Addl_Index = 0;
u8 Receiver;
u8 _DisableDramECC = 0, _Wrap32Dis = 0, _SSE2 = 0;
- u8 RcvrEnDlyLimit, Final_Value, MaxDelay_CH[2];
+ u8 Final_Value;
+ u16 CTLRMaxDelay;
+ u16 MaxDelay_CH[2];
u32 TestAddr0, TestAddr1, TestAddr0B, TestAddr1B;
u32 PatternBuffer[64+4]; /* FIXME: need increase 8? */
u32 Errors;
@@ -127,9 +187,20 @@ static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat,
u32 cr4;
u32 lo, hi;
+ uint32_t dword;
+ uint8_t rank;
+ uint8_t lane;
+ uint16_t current_total_delay[MAX_BYTE_LANES];
+ uint16_t candidate_total_delay[8];
+ uint8_t data_test_pass_sr[2][8]; /* [rank][lane] */
+ uint8_t data_test_pass[8]; /* [lane] */
+ uint8_t data_test_pass_prev[8]; /* [lane] */
+ uint8_t window_det_toggle[8];
+ uint8_t trained[8];
+ uint64_t result_qword1;
+ uint64_t result_qword2;
+
u8 valid;
- u32 tmp;
- u8 LastTest;
print_debug_dqs("\nTrainRcvEn: Node", pDCTstat->Node_ID, 0);
print_debug_dqs("TrainRcvEn: Pass", Pass, 0);
@@ -181,33 +252,103 @@ static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat,
Errors = 0;
dev = pDCTstat->dev_dct;
- CTLRMaxDelay = 0;
for (Channel = 0; Channel < 2; Channel++) {
print_debug_dqs("\tTrainRcvEn51: Node ", pDCTstat->Node_ID, 1);
print_debug_dqs("\tTrainRcvEn51: Channel ", Channel, 1);
pDCTstat->Channel = Channel;
+ CTLRMaxDelay = 0;
MaxDelay_CH[Channel] = 0;
index_reg = 0x98 + 0x100 * Channel;
Receiver = mct_InitReceiver_D(pDCTstat, Channel);
- /* There are four receiver pairs, loosely associated with chipselects. */
+ /* There are four receiver pairs, loosely associated with chipselects.
+ * This is essentially looping over each DIMM.
+ */
for (; Receiver < 8; Receiver += 2) {
Addl_Index = (Receiver >> 1) * 3 + 0x10;
- LastTest = DQS_FAIL;
-
- /* mct_ModifyIndex_D */
- RcvrEnDlyRmin = RcvrEnDlyLimit = 0xff;
print_debug_dqs("\t\tTrainRcvEnd52: index ", Addl_Index, 2);
- if(!mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, Receiver)) {
+ if (!mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, Receiver)) {
continue;
}
+ /* Clear data structures */
+ for (lane = 0; lane < 8; lane++) {
+ data_test_pass_prev[lane] = 0;
+ trained[lane] = 0;
+ }
+
+ /* 2.8.9.9.2 (1, 6)
+ * Retrieve gross and fine timing fields from write DQS registers
+ */
+ read_dqs_write_timing_control_registers(current_total_delay, dev, (Receiver >> 1), index_reg);
+
+ /* 2.8.9.9.2 (1)
+ * Program the Write Data Timing and Write ECC Timing register to
+ * the values stored in the DQS Write Timing Control register
+ * for each lane
+ */
+ for (lane = 0; lane < MAX_BYTE_LANES; lane++) {
+ uint32_t wdt_reg;
+
+ /* Calculate Write Data Timing register location */
+ if ((lane == 0) || (lane == 1) || (lane == 2) || (lane == 3))
+ wdt_reg = 0x1;
+ if ((lane == 4) || (lane == 5) || (lane == 6) || (lane == 7))
+ wdt_reg = 0x2;
+ if (lane == 8)
+ wdt_reg = 0x3;
+ wdt_reg |= ((Receiver / 2) << 8);
+
+ /* Set Write Data Timing register values */
+ dword = Get_NB32_index_wait(dev, index_reg, wdt_reg);
+ if ((lane == 7) || (lane == 3)) {
+ dword &= ~(0x7f << 24);
+ dword |= (current_total_delay[lane] & 0x7f) << 24;
+ }
+ if ((lane == 6) || (lane == 2)) {
+ dword &= ~(0x7f << 16);
+ dword |= (current_total_delay[lane] & 0x7f) << 16;
+ }
+ if ((lane == 5) || (lane == 1)) {
+ dword &= ~(0x7f << 8);
+ dword |= (current_total_delay[lane] & 0x7f) << 8;
+ }
+ if ((lane == 8) || (lane == 4) || (lane == 0)) {
+ dword &= ~0x7f;
+ dword |= current_total_delay[lane] & 0x7f;
+ }
+ Set_NB32_index_wait(dev, index_reg, wdt_reg, dword);
+ }
+
+ /* 2.8.9.9.2 (2)
+ * Program the Read DQS Timing Control and the Read DQS ECC Timing Control registers
+ * to 1/2 MEMCLK for all lanes
+ */
+ for (lane = 0; lane < MAX_BYTE_LANES; lane++) {
+ uint32_t rdt_reg;
+ if ((lane == 0) || (lane == 1) || (lane == 2) || (lane == 3))
+ rdt_reg = 0x5;
+ if ((lane == 4) || (lane == 5) || (lane == 6) || (lane == 7))
+ rdt_reg = 0x6;
+ if (lane == 8)
+ rdt_reg = 0x7;
+ rdt_reg |= ((Receiver / 2) << 8);
+ if (lane == 8)
+ dword = 0x0000003f;
+ else
+ dword = 0x3f3f3f3f;
+ Set_NB32_index_wait(dev, index_reg, rdt_reg, dword);
+ }
+
+ /* 2.8.9.9.2 (3)
+ * Select two test addresses for each rank present
+ */
TestAddr0 = mct_GetRcvrSysAddr_D(pMCTstat, pDCTstat, Channel, Receiver, &valid);
- if(!valid) { /* Address not supported on current CS */
+ if (!valid) { /* Address not supported on current CS */
continue;
}
@@ -229,171 +370,214 @@ static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat,
print_debug_dqs("\t\tTrainRcvEn53: TestAddr1 ", TestAddr1, 2);
print_debug_dqs("\t\tTrainRcvEn53: TestAddr1B ", TestAddr1B, 2);
- /*
- * Get starting RcvrEnDly value
+ /* 2.8.9.9.2 (4, 5)
+ * Write 1 cache line of the appropriate test pattern to each test addresse
*/
- RcvrEnDly = mct_Get_Start_RcvrEnDly_1Pass(Pass);
+ mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0, 0); /* rank 0 of DIMM, testpattern 0 */
+ mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0B, 1); /* rank 0 of DIMM, testpattern 1 */
+ if (_2Ranks) {
+ mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1, 0); /*rank 1 of DIMM, testpattern 0 */
+ mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1B, 1); /*rank 1 of DIMM, testpattern 1 */
+ }
- /* mct_GetInitFlag_D*/
- if (Pass == FirstPass) {
- pDCTstat->DqsRcvEn_Pass = 0;
- } else {
- pDCTstat->DqsRcvEn_Pass=0xFF;
+#if DQS_TRAIN_DEBUG > 0
+ for (lane = 0; lane < 8; lane++) {
+ print_debug_dqs("\t\tTrainRcvEn54: lane: ", lane, 2);
+ print_debug_dqs("\t\tTrainRcvEn54: current_total_delay ", current_total_delay[lane], 2);
}
- pDCTstat->DqsRcvEn_Saved = 0;
+#endif
+ /* 2.8.9.9.2 (6)
+ * Write gross and fine timing fields to read DQS registers
+ */
+ write_dqs_receiver_enable_control_registers(current_total_delay, dev, (Receiver >> 1), index_reg);
+
+ /* 2.8.9.9.2 (7)
+ * Loop over all delay values up to 1 MEMCLK (0x40 delay steps) from the initial delay values
+ *
+ * FIXME
+ * It is not clear if training should be discontinued if any test failures occur in the first
+ * 1 MEMCLK window, or if it should be discontinued if no successes occur in the first 1 MEMCLK
+ * window. Therefore, loop over up to 2 MEMCLK (0x80 delay steps) to be on the safe side.
+ */
+ uint16_t current_delay_step;
- while(RcvrEnDly < RcvrEnDlyLimit) { /* sweep Delay value here */
- print_debug_dqs("\t\t\tTrainRcvEn541: RcvrEnDly ", RcvrEnDly, 3);
+ for (current_delay_step = 0; current_delay_step < 0x80; current_delay_step++) {
+ print_debug_dqs("\t\t\tTrainRcvEn541: current_delay_step ", current_delay_step, 3);
- /* callback not required
- if(mct_AdjustDelay_D(pDCTstat, RcvrEnDly))
- goto skipDly;
+ /* 2.8.9.9.2 (7 D)
+ * Terminate if all lanes are trained
*/
+ uint8_t all_lanes_trained = 1;
+ for (lane = 0; lane < 8; lane++)
+ if (!trained[lane])
+ all_lanes_trained = 0;
- /* Odd steps get another pattern such that even
- and odd steps alternate. The pointers to the
- patterns will be swaped at the end of the loop
- so that they correspond. */
- if(RcvrEnDly & 1) {
- PatternA = 1;
- PatternB = 0;
- } else {
- /* Even step */
- PatternA = 0;
- PatternB = 1;
- }
-
- mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0, PatternA); /* rank 0 of DIMM, testpattern 0 */
- mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0B, PatternB); /* rank 0 of DIMM, testpattern 1 */
- if(_2Ranks) {
- mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1, PatternA); /*rank 1 of DIMM, testpattern 0 */
- mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1B, PatternB); /*rank 1 of DIMM, testpattern 1 */
- }
-
- mct_SetRcvrEnDly_D(pDCTstat, RcvrEnDly, 0, Channel, Receiver, dev, index_reg, Addl_Index, Pass);
-
- CurrTest = DQS_FAIL;
- CurrTestSide0 = DQS_FAIL;
- CurrTestSide1 = DQS_FAIL;
-
- mct_Read1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0); /*cache fills */
- Test0 = mct_CompareTestPatternQW0_D(pMCTstat, pDCTstat, TestAddr0, Channel, PatternA, Pass);/* ROM vs cache compare */
- proc_IOCLFLUSH_D(TestAddr0);
- ResetDCTWrPtr_D(dev, index_reg, Addl_Index);
-
- print_debug_dqs("\t\t\tTrainRcvEn542: Test0 result ", Test0, 3);
-
- /* != 0x00 mean pass */
-
- if(Test0 == DQS_PASS) {
- mct_Read1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0B); /*cache fills */
- /* ROM vs cache compare */
- Test1 = mct_CompareTestPatternQW0_D(pMCTstat, pDCTstat, TestAddr0B, Channel, PatternB, Pass);
- proc_IOCLFLUSH_D(TestAddr0B);
- ResetDCTWrPtr_D(dev, index_reg, Addl_Index);
-
- print_debug_dqs("\t\t\tTrainRcvEn543: Test1 result ", Test1, 3);
+ if (all_lanes_trained)
+ break;
- if(Test1 == DQS_PASS) {
- CurrTestSide0 = DQS_PASS;
+ /* 2.8.9.9.2 (7 A)
+ * Loop over all ranks
+ */
+ for (rank = 0; rank < (_2Ranks + 1); rank++) {
+ /* 2.8.9.9.2 (7 A a-d)
+ * Read the first test address of the current rank
+ * Store the first data beat for analysis
+ * Reset read pointer in the DRAM controller FIFO
+ * Read the second test address of the current rank
+ * Store the first data beat for analysis
+ * Reset read pointer in the DRAM controller FIFO
+ */
+ if (rank & 1) {
+ /* 2.8.9.9.2 (7 D)
+ * Invert read instructions to alternate data read order on the bus
+ */
+ proc_IOCLFLUSH_D((rank == 0)?TestAddr0B:TestAddr1B);
+ result_qword2 = read64_fs(convert_testaddr_and_channel_to_address(pDCTstat, (rank == 0)?TestAddr0B:TestAddr1B, Channel));
+ write_dqs_receiver_enable_control_registers(current_total_delay, dev, (Receiver >> 1), index_reg);
+ proc_IOCLFLUSH_D((rank == 0)?TestAddr0:TestAddr1);
+ result_qword1 = read64_fs(convert_testaddr_and_channel_to_address(pDCTstat, (rank == 0)?TestAddr0:TestAddr1, Channel));
+ write_dqs_receiver_enable_control_registers(current_total_delay, dev, (Receiver >> 1), index_reg);
+ } else {
+ proc_IOCLFLUSH_D((rank == 0)?TestAddr0:TestAddr1);
+ result_qword1 = read64_fs(convert_testaddr_and_channel_to_address(pDCTstat, (rank == 0)?TestAddr0:TestAddr1, Channel));
+ write_dqs_receiver_enable_control_registers(current_total_delay, dev, (Receiver >> 1), index_reg);
+ proc_IOCLFLUSH_D((rank == 0)?TestAddr0B:TestAddr1B);
+ result_qword2 = read64_fs(convert_testaddr_and_channel_to_address(pDCTstat, (rank == 0)?TestAddr0B:TestAddr1B, Channel));
+ write_dqs_receiver_enable_control_registers(current_total_delay, dev, (Receiver >> 1), index_reg);
}
- }
- if(_2Ranks) {
- mct_Read1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1); /*cache fills */
- /* ROM vs cache compare */
- Test0 = mct_CompareTestPatternQW0_D(pMCTstat, pDCTstat, TestAddr1, Channel, PatternA, Pass);
- proc_IOCLFLUSH_D(TestAddr1);
- ResetDCTWrPtr_D(dev, index_reg, Addl_Index);
-
- print_debug_dqs("\t\t\tTrainRcvEn544: Test0 result ", Test0, 3);
-
- if(Test0 == DQS_PASS) {
- mct_Read1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1B); /*cache fills */
- /* ROM vs cache compare */
- Test1 = mct_CompareTestPatternQW0_D(pMCTstat, pDCTstat, TestAddr1B, Channel, PatternB, Pass);
- proc_IOCLFLUSH_D(TestAddr1B);
- ResetDCTWrPtr_D(dev, index_reg, Addl_Index);
-
- print_debug_dqs("\t\t\tTrainRcvEn545: Test1 result ", Test1, 3);
- if(Test1 == DQS_PASS) {
- CurrTestSide1 = DQS_PASS;
+ /* 2.8.9.9.2 (7 A e)
+ * Compare both read patterns and flag passing ranks/lanes
+ */
+ uint8_t result_lane_byte1;
+ uint8_t result_lane_byte2;
+ for (lane = 0; lane < 8; lane++) {
+ if (trained[lane] == 1) {
+#if DQS_TRAIN_DEBUG > 0
+ print_debug_dqs("\t\t\t\t\t\t\t\t lane already trained: ", lane, 4);
+#endif
+ continue;
}
+
+ result_lane_byte1 = (result_qword1 >> (lane * 8)) & 0xff;
+ result_lane_byte2 = (result_qword2 >> (lane * 8)) & 0xff;
+ if ((result_lane_byte1 == 0x55) && (result_lane_byte2 == 0xaa))
+ data_test_pass_sr[rank][lane] = 1;
+ else
+ data_test_pass_sr[rank][lane] = 0;
+#if DQS_TRAIN_DEBUG > 0
+ print_debug_dqs_pair("\t\t\t\t\t\t\t\t ", 0x55, " | ", result_lane_byte1, 4);
+ print_debug_dqs_pair("\t\t\t\t\t\t\t\t ", 0xaa, " | ", result_lane_byte2, 4);
+#endif
}
}
- if(_2Ranks) {
- if ((CurrTestSide0 == DQS_PASS) && (CurrTestSide1 == DQS_PASS)) {
- CurrTest = DQS_PASS;
+ /* 2.8.9.9.2 (7 B)
+ * If DIMM is dual rank, only use delays that pass testing for both ranks
+ */
+ for (lane = 0; lane < 8; lane++) {
+ if (_2Ranks) {
+ if ((data_test_pass_sr[0][lane]) && (data_test_pass_sr[1][lane]))
+ data_test_pass[lane] = 1;
+ else
+ data_test_pass[lane] = 0;
+ } else {
+ data_test_pass[lane] = data_test_pass_sr[0][lane];
}
- } else if (CurrTestSide0 == DQS_PASS) {
- CurrTest = DQS_PASS;
}
- /* record first pass DqsRcvEn to stack */
- valid = mct_SavePassRcvEnDly_D(pDCTstat, RcvrEnDly, Channel, Receiver, Pass);
+ /* 2.8.9.9.2 (7 E)
+ * For each lane, update the DQS receiver delay setting in support of next iteration
+ */
+ for (lane = 0; lane < 8; lane++) {
+ if (trained[lane] == 1)
+ continue;
+
+ /* 2.8.9.9.2 (7 C a)
+ * Save the total delay of the first success after a failure for later use
+ */
+ if ((data_test_pass[lane] == 1) && (data_test_pass_prev[lane] == 0)) {
+ candidate_total_delay[lane] = current_total_delay[lane];
+ window_det_toggle[lane] = 0;
+ }
- /* Break(1:RevF,2:DR) or not(0) FIXME: This comment deosn't make sense */
- if(valid == 2 || (LastTest == DQS_FAIL && valid == 1)) {
- RcvrEnDlyRmin = RcvrEnDly;
- break;
+ /* 2.8.9.9.2 (7 C b)
+ * If the current delay failed testing add 1/8 UI to the current delay
+ */
+ if (data_test_pass[lane] == 0)
+ current_total_delay[lane] += 0x4;
+
+ /* 2.8.9.9.2 (7 C c)
+ * If the current delay passed testing alternately add either 1/32 UI or 1/4 UI to the current delay
+ * If 1.25 UI of delay have been added with no failures the lane is considered trained
+ */
+ if (data_test_pass[lane] == 1) {
+ /* See if lane is trained */
+ if ((current_total_delay[lane] - candidate_total_delay[lane]) >= 0x28) {
+ trained[lane] = 1;
+
+ /* Calculate and set final lane delay value
+ * The final delay is the candidate delay + 7/8 UI
+ */
+ current_total_delay[lane] = candidate_total_delay[lane] + 0x1c;
+ } else {
+ if (window_det_toggle[lane] == 0) {
+ current_total_delay[lane] += 0x1;
+ window_det_toggle[lane] = 1;
+ } else {
+ current_total_delay[lane] += 0x8;
+ window_det_toggle[lane] = 0;
+ }
+ }
+ }
}
- LastTest = CurrTest;
-
- /* swap the rank 0 pointers */
- tmp = TestAddr0;
- TestAddr0 = TestAddr0B;
- TestAddr0B = tmp;
-
- /* swap the rank 1 pointers */
- tmp = TestAddr1;
- TestAddr1 = TestAddr1B;
- TestAddr1B = tmp;
-
- print_debug_dqs("\t\t\tTrainRcvEn56: RcvrEnDly ", RcvrEnDly, 3);
+ /* Update delays in hardware */
+ write_dqs_receiver_enable_control_registers(current_total_delay, dev, (Receiver >> 1), index_reg);
- RcvrEnDly++;
-
- } /* while RcvrEnDly */
-
- print_debug_dqs("\t\tTrainRcvEn61: RcvrEnDly ", RcvrEnDly, 2);
- print_debug_dqs("\t\tTrainRcvEn61: RcvrEnDlyRmin ", RcvrEnDlyRmin, 3);
- print_debug_dqs("\t\tTrainRcvEn61: RcvrEnDlyLimit ", RcvrEnDlyLimit, 3);
- if(RcvrEnDlyRmin == RcvrEnDlyLimit) {
- /* no passing window */
- pDCTstat->ErrStatus |= 1 << SB_NORCVREN;
- Errors |= 1 << SB_NORCVREN;
- pDCTstat->ErrCode = SC_FatalErr;
+ /* Save previous results for comparison in the next iteration */
+ for (lane = 0; lane < 8; lane++)
+ data_test_pass_prev[lane] = data_test_pass[lane];
}
- if(RcvrEnDly > (RcvrEnDlyLimit - 1)) {
- /* passing window too narrow, too far delayed*/
- pDCTstat->ErrStatus |= 1 << SB_SmallRCVR;
- Errors |= 1 << SB_SmallRCVR;
- pDCTstat->ErrCode = SC_FatalErr;
- RcvrEnDly = RcvrEnDlyLimit - 1;
- pDCTstat->CSTrainFail |= 1 << Receiver;
- pDCTstat->DimmTrainFail |= 1 << (Receiver + Channel);
- }
-
- /* CHB_D0_B0_RCVRDLY set in mct_Average_RcvrEnDly_Pass */
- mct_Average_RcvrEnDly_Pass(pDCTstat, RcvrEnDly, RcvrEnDlyLimit, Channel, Receiver, Pass);
+#if DQS_TRAIN_DEBUG > 0
+ for (lane = 0; lane < 8; lane++)
+ print_debug_dqs_pair("\t\tTrainRcvEn55: Lane ", lane, " current_total_delay ", current_total_delay[lane], 2);
+#endif
- mct_SetFinalRcvrEnDly_D(pDCTstat, RcvrEnDly, Final_Value, Channel, Receiver, dev, index_reg, Addl_Index, Pass);
+ /* Find highest delay value and save for later use */
+ for (lane = 0; lane < 8; lane++)
+ if (current_total_delay[lane] > CTLRMaxDelay)
+ CTLRMaxDelay = current_total_delay[lane];
- if(pDCTstat->ErrStatus & (1 << SB_SmallRCVR)) {
- Errors |= 1 << SB_SmallRCVR;
+ /* See if any lanes failed training, and set error flags appropriately
+ * For all trained lanes, save delay values for later use
+ */
+ for (lane = 0; lane < 8; lane++) {
+ if (trained[lane]) {
+ pDCTstat->CH_D_B_RCVRDLY[Channel][Receiver >> 1][lane] = current_total_delay[lane];
+ } else {
+ printk(BIOS_WARNING, "TrainRcvrEn: WARNING: Lane %d of receiver %d on channel %d failed training!\n", lane, Receiver, Channel);
+
+ /* Set error flags */
+ pDCTstat->ErrStatus |= 1 << SB_NORCVREN;
+ Errors |= 1 << SB_NORCVREN;
+ pDCTstat->ErrCode = SC_FatalErr;
+ pDCTstat->CSTrainFail |= 1 << Receiver;
+ pDCTstat->DimmTrainFail |= 1 << (Receiver + Channel);
+ }
}
- RcvrEnDly += Pass1MemClkDly;
- if(RcvrEnDly > CTLRMaxDelay) {
- CTLRMaxDelay = RcvrEnDly;
- }
+ /* 2.8.9.9.2 (8)
+ * Flush the receiver FIFO
+ * Write one full cache line of non-0x55/0xaa data to one of the test addresses, then read it back to flush the FIFO
+ */
- } /* while Receiver */
+ WriteLNTestPattern(TestAddr0 << 8, (uint8_t *)TestPattern2_D, 1);
+ mct_Read1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0);
+ }
MaxDelay_CH[Channel] = CTLRMaxDelay;
- } /* for Channel */
+ }
CTLRMaxDelay = MaxDelay_CH[0];
if (MaxDelay_CH[1] > CTLRMaxDelay)
@@ -428,31 +612,31 @@ static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat,
#if DQS_TRAIN_DEBUG > 0
{
- u8 Channel;
+ u8 ChannelDTD;
printk(BIOS_DEBUG, "TrainRcvrEn: CH_MaxRdLat:\n");
- for(Channel = 0; Channel<2; Channel++) {
+ for(ChannelDTD = 0; ChannelDTD<2; ChannelDTD++) {
printk(BIOS_DEBUG, "Channel:%x: %x\n",
- Channel, pDCTstat->CH_MaxRdLat[Channel]);
+ ChannelDTD, pDCTstat->CH_MaxRdLat[ChannelDTD]);
}
}
#endif
#if DQS_TRAIN_DEBUG > 0
{
- u8 val;
- u8 Channel, Receiver;
+ u16 valDTD;
+ u8 ChannelDTD, ReceiverDTD;
u8 i;
- u8 *p;
+ u16 *p;
printk(BIOS_DEBUG, "TrainRcvrEn: CH_D_B_RCVRDLY:\n");
- for(Channel = 0; Channel < 2; Channel++) {
- printk(BIOS_DEBUG, "Channel:%x\n", Channel);
- for(Receiver = 0; Receiver<8; Receiver+=2) {
- printk(BIOS_DEBUG, "\t\tReceiver:%x:", Receiver);
- p = pDCTstat->CH_D_B_RCVRDLY[Channel][Receiver>>1];
+ for(ChannelDTD = 0; ChannelDTD < 2; ChannelDTD++) {
+ printk(BIOS_DEBUG, "Channel:%x\n", ChannelDTD);
+ for(ReceiverDTD = 0; ReceiverDTD<8; ReceiverDTD+=2) {
+ printk(BIOS_DEBUG, "\t\tReceiver:%x:", ReceiverDTD);
+ p = pDCTstat->CH_D_B_RCVRDLY[ChannelDTD][ReceiverDTD>>1];
for (i=0;i<8; i++) {
- val = p[i];
- printk(BIOS_DEBUG, "%x ", val);
+ valDTD = p[i];
+ printk(BIOS_DEBUG, " %03x", valDTD);
}
printk(BIOS_DEBUG, "\n");
}
@@ -475,15 +659,6 @@ u8 mct_InitReceiver_D(struct DCTStatStruc *pDCTstat, u8 dct)
}
}
-static void mct_SetFinalRcvrEnDly_D(struct DCTStatStruc *pDCTstat, u8 RcvrEnDly, u8 where, u8 Channel, u8 Receiver, u32 dev, u32 index_reg, u8 Addl_Index, u8 Pass/*, u8 *p*/)
-{
- /*
- * Program final DqsRcvEnDly to additional index for DQS receiver
- * enabled delay
- */
- mct_SetRcvrEnDly_D(pDCTstat, RcvrEnDly, where, Channel, Receiver, dev, index_reg, Addl_Index, Pass);
-}
-
static void mct_DisableDQSRcvEn_D(struct DCTStatStruc *pDCTstat)
{
u8 ch_end, ch;
@@ -514,17 +689,20 @@ static void mct_DisableDQSRcvEn_D(struct DCTStatStruc *pDCTstat)
* Function only used once so it was inlined.
*/
-void mct_SetRcvrEnDly_D(struct DCTStatStruc *pDCTstat, u8 RcvrEnDly,
+/* Set F2x[1, 0]9C_x[2B:10] DRAM DQS Receiver Enable Timing Control Registers
+ * See BKDG Rev. 3.62 page 268 for more information
+ */
+void mct_SetRcvrEnDly_D(struct DCTStatStruc *pDCTstat, u16 RcvrEnDly,
u8 FinalValue, u8 Channel, u8 Receiver, u32 dev,
u32 index_reg, u8 Addl_Index, u8 Pass)
{
u32 index;
u8 i;
- u8 *p;
+ u16 *p;
u32 val;
- if(RcvrEnDly == 0xFE) {
- /*set the boudary flag */
+ if(RcvrEnDly == 0x1fe) {
+ /*set the boundary flag */
pDCTstat->Status |= 1 << SB_DQSRcvLimit;
}
@@ -543,27 +721,57 @@ void mct_SetRcvrEnDly_D(struct DCTStatStruc *pDCTstat, u8 RcvrEnDly,
val = Get_NB32_index_wait(dev, index_reg, index);
if(i & 1) {
/* odd byte lane */
- val &= ~(0xFF << 16);
- val |= (RcvrEnDly << 16);
+ val &= ~(0x1ff << 16);
+ val |= ((RcvrEnDly & 0x1ff) << 16);
} else {
/* even byte lane */
- val &= ~0xFF;
- val |= RcvrEnDly;
+ val &= ~0x1ff;
+ val |= (RcvrEnDly & 0x1ff);
}
Set_NB32_index_wait(dev, index_reg, index, val);
}
}
-static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u8 DQSRcvEnDly)
+/* Calculate MaxRdLatency
+ * Algorithm detailed in the Fam10h BKDG Rev. 3.62 section 2.8.9.9.5
+ */
+static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u16 DQSRcvEnDly)
{
u32 dev;
u32 reg;
- u16 SubTotal;
+ u32 SubTotal;
u32 index_reg;
u32 reg_off;
u32 val;
- u32 valx;
+
+ uint8_t cpu_val_n;
+ uint8_t cpu_val_p;
+
+ u16 freq_tab[] = {400, 533, 667, 800};
+
+ /* Set up processor-dependent values */
+ if (pDCTstat->LogicalCPUID & AMD_DR_Dx) {
+ /* Revision D and above */
+ cpu_val_n = 4;
+ cpu_val_p = 29;
+ } else if (pDCTstat->LogicalCPUID & AMD_DR_Cx) {
+ /* Revision C */
+ uint8_t package_type = mctGet_NVbits(NV_PACK_TYPE);
+ if ((package_type == PT_L1) /* Socket F (1207) */
+ || (package_type == PT_M2) /* Socket AM3 */
+ || (package_type == PT_S1)) { /* Socket S1g<x> */
+ cpu_val_n = 10;
+ cpu_val_p = 11;
+ } else {
+ cpu_val_n = 4;
+ cpu_val_p = 29;
+ }
+ } else {
+ /* Revision B and below */
+ cpu_val_n = 10;
+ cpu_val_p = 11;
+ }
if(pDCTstat->GangedMode)
Channel = 0;
@@ -598,49 +806,32 @@ static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u8 DQ
val = Get_NB32(dev, 0x78 + reg_off);
SubTotal += 8 - (val & 0x0f);
- /* Convert bits 7-5 (also referred to as the course delay) of
+ /* Convert bits 7-5 (also referred to as the coarse delay) of
* the current (or worst case) DQS receiver enable delay to
* 1/2 MEMCLKs units, rounding up, and add this to the sub-total.
*/
- SubTotal += DQSRcvEnDly >> 5; /*BOZO-no rounding up */
+ SubTotal += DQSRcvEnDly >> 5; /* Retrieve gross delay portion of value */
- /* Add 5.5 to the sub-total. 5.5 represents part of the
+ /* Add "P" to the sub-total. "P" represents part of the
* processor specific constant delay value in the DRAM
* clock domain.
*/
SubTotal <<= 1; /*scale 1/2 MemClk to 1/4 MemClk */
- SubTotal += 11; /*add 5.5 1/2MemClk */
+ SubTotal += cpu_val_p; /*add "P" 1/2MemClk */
+ SubTotal >>= 1; /*scale 1/4 MemClk back to 1/2 MemClk */
/* Convert the sub-total (in 1/2 MEMCLKs) to northbridge
- * clocks (NCLKs) as follows (assuming DDR400 and assuming
- * that no P-state or link speed changes have occurred).
+ * clocks (NCLKs)
*/
+ SubTotal *= 200 * ((Get_NB32(pDCTstat->dev_nbmisc, 0xd4) & 0x1f) + 4);
+ SubTotal /= freq_tab[((Get_NB32(pDCTstat->dev_dct, 0x94 + reg_off) & 0x7) - 3)];
+ SubTotal = (SubTotal + (2 - 1)) / 2; /* Round up */
- /* New formula:
- * SubTotal *= 3*(Fn2xD4[NBFid]+4)/(3+Fn2x94[MemClkFreq])/2 */
- val = Get_NB32(dev, 0x94 + reg_off);
-
- /* SubTotal div 4 to scale 1/4 MemClk back to MemClk */
- val &= 7;
- if (val >= 3) {
- val <<= 1;
- } else
- val += 3;
- valx = val << 2;
-
- val = Get_NB32(pDCTstat->dev_nbmisc, 0xD4);
- SubTotal *= ((val & 0x1f) + 4 ) * 3;
-
- SubTotal /= valx;
- if (SubTotal % valx) { /* round up */
- SubTotal++;
- }
-
- /* Add 5 NCLKs to the sub-total. 5 represents part of the
+ /* Add "N" NCLKs to the sub-total. "N" represents part of the
* processor specific constant value in the northbridge
* clock domain.
*/
- SubTotal += 5;
+ SubTotal += (cpu_val_n) / 2;
pDCTstat->CH_MaxRdLat[Channel] = SubTotal;
if(pDCTstat->GangedMode) {
@@ -659,143 +850,6 @@ static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u8 DQ
Set_NB32(dev, reg, val);
}
-static u8 mct_SavePassRcvEnDly_D(struct DCTStatStruc *pDCTstat,
- u8 rcvrEnDly, u8 Channel,
- u8 receiver, u8 Pass)
-{
- u8 i;
- u8 mask_Saved, mask_Pass;
- u8 *p;
-
- /* calculate dimm offset
- * not needed for CH_D_B_RCVRDLY array
- */
-
- /* cmp if there has new DqsRcvEnDly to be recorded */
- mask_Pass = pDCTstat->DqsRcvEn_Pass;
-
- if(Pass == SecondPass) {
- mask_Pass = ~mask_Pass;
- }
-
- mask_Saved = pDCTstat->DqsRcvEn_Saved;
- if(mask_Pass != mask_Saved) {
-
- /* find desired stack offset according to channel/dimm/byte */
- if(Pass == SecondPass) {
- /* FIXME: SecondPass is never used for Barcelona p = pDCTstat->CH_D_B_RCVRDLY_1[Channel][receiver>>1]; */
- p = 0; /* Keep the compiler happy. */
- } else {
- mask_Saved &= mask_Pass;
- p = pDCTstat->CH_D_B_RCVRDLY[Channel][receiver>>1];
- }
- for(i=0; i < 8; i++) {
- /* cmp per byte lane */
- if(mask_Pass & (1 << i)) {
- if(!(mask_Saved & (1 << i))) {
- /* save RcvEnDly to stack, according to
- the related Dimm/byte lane */
- p[i] = (u8)rcvrEnDly;
- mask_Saved |= 1 << i;
- }
- }
- }
- pDCTstat->DqsRcvEn_Saved = mask_Saved;
- }
- return mct_SaveRcvEnDly_D_1Pass(pDCTstat, Pass);
-}
-
-static u8 mct_CompareTestPatternQW0_D(struct MCTStatStruc *pMCTstat,
- struct DCTStatStruc *pDCTstat,
- u32 addr, u8 channel,
- u8 pattern, u8 Pass)
-{
- /* Compare only the first beat of data. Since target addrs are cache
- * line aligned, the Channel parameter is used to determine which
- * cache QW to compare.
- */
-
- u8 *test_buf;
- u8 i;
- u8 result;
- u8 value;
-
- if(Pass == FirstPass) {
- if(pattern==1) {
- test_buf = (u8 *)TestPattern1_D;
- } else {
- test_buf = (u8 *)TestPattern0_D;
- }
- } else { /* Second Pass */
- test_buf = (u8 *)TestPattern2_D;
- }
-
- SetUpperFSbase(addr);
- addr <<= 8;
-
- if((pDCTstat->Status & (1<<SB_128bitmode)) && channel ) {
- addr += 8; /* second channel */
- test_buf += 8;
- }
-
- print_debug_dqs_pair("\t\t\t\t\t\t test_buf = ", (u32)test_buf, " | addr_lo = ", addr, 4);
- for (i=0; i<8; i++, addr ++) {
- value = read32_fs(addr);
- print_debug_dqs_pair("\t\t\t\t\t\t\t\t ", test_buf[i], " | ", value, 4);
-
- if (value == test_buf[i]) {
- pDCTstat->DqsRcvEn_Pass |= (1<<i);
- } else {
- pDCTstat->DqsRcvEn_Pass &= ~(1<<i);
- }
- }
-
- result = DQS_FAIL;
-
- if (Pass == FirstPass) {
- /* if first pass, at least one byte lane pass
- * ,then DQS_PASS=1 and will set to related reg.
- */
- if(pDCTstat->DqsRcvEn_Pass != 0) {
- result = DQS_PASS;
- } else {
- result = DQS_FAIL;
- }
-
- } else {
- /* if second pass, at least one byte lane fail
- * ,then DQS_FAIL=1 and will set to related reg.
- */
- if(pDCTstat->DqsRcvEn_Pass != 0xFF) {
- result = DQS_FAIL;
- } else {
- result = DQS_PASS;
- }
- }
-
- /* if second pass, we can't find the fail until FFh,
- * then let it fail to save the final delay
- */
- if((Pass == SecondPass) && (pDCTstat->Status & (1 << SB_DQSRcvLimit))) {
- result = DQS_FAIL;
- pDCTstat->DqsRcvEn_Pass = 0;
- }
-
- /* second pass needs to be inverted
- * FIXME? this could be inverted in the above code to start with...
- */
- if(Pass == SecondPass) {
- if (result == DQS_PASS) {
- result = DQS_FAIL;
- } else if (result == DQS_FAIL) { /* FIXME: doesn't need to be else if */
- result = DQS_PASS;
- }
- }
-
-
- return result;
-}
-
static void mct_InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat,
struct DCTStatStruc *pDCTstat)
{
@@ -854,7 +908,7 @@ void SetEccDQSRcvrEn_D(struct DCTStatStruc *pDCTstat, u8 Channel)
u32 index_reg;
u32 index;
u8 ChipSel;
- u8 *p;
+ u16 *p;
u32 val;
dev = pDCTstat->dev_dct;
@@ -884,7 +938,7 @@ static void CalcEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat,
for (ChipSel = 0; ChipSel < MAX_CS_SUPPORTED; ChipSel += 2) {
if(mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, ChipSel)) {
- u8 *p;
+ u16 *p;
p = pDCTstat->CH_D_B_RCVRDLY[Channel][ChipSel>>1];
/* DQS Delay Value of Data Bytelane
@@ -920,6 +974,10 @@ static void CalcEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat,
SetEccDQSRcvrEn_D(pDCTstat, Channel);
}
+/* 2.8.9.9.4
+ * ECC Byte Lane Training
+ * DQS Receiver Enable Delay
+ */
void mctSetEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat,
struct DCTStatStruc *pDCTstatA)
{
@@ -1017,7 +1075,9 @@ static void fenceDynTraining_D(struct MCTStatStruc *pMCTstat,
avRecValue -= 3;
else
*/
- if (pDCTstat->LogicalCPUID & AMD_DR_Cx)
+ if (pDCTstat->LogicalCPUID & AMD_DR_Dx)
+ avRecValue -= 8;
+ else if (pDCTstat->LogicalCPUID & AMD_DR_Cx)
avRecValue -= 8;
else if (pDCTstat->LogicalCPUID & AMD_DR_Bx)
avRecValue -= 8;
diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctsrc1p.c b/src/northbridge/amd/amdmct/mct_ddr3/mctsrc1p.c
index c00975692e..f01e011c32 100644
--- a/src/northbridge/amd/amdmct/mct_ddr3/mctsrc1p.c
+++ b/src/northbridge/amd/amdmct/mct_ddr3/mctsrc1p.c
@@ -2,6 +2,7 @@
* This file is part of the coreboot project.
*
* Copyright (C) 2010 Advanced Micro Devices, Inc.
+ * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -36,17 +37,12 @@ u32 SetupDqsPattern_1PassB(u8 pass)
return (u32) TestPattern0_D;
}
-u8 mct_Get_Start_RcvrEnDly_1Pass(u8 pass)
-{
- return 0;
-}
-
-static u8 mct_Average_RcvrEnDly_1Pass(struct DCTStatStruc *pDCTstat, u8 Channel, u8 Receiver,
+static u16 mct_Average_RcvrEnDly_1Pass(struct DCTStatStruc *pDCTstat, u8 Channel, u8 Receiver,
u8 Pass)
{
- u8 i, MaxValue;
- u8 *p;
- u8 val;
+ u16 i, MaxValue;
+ u16 *p;
+ u16 val;
MaxValue = 0;
p = pDCTstat->CH_D_B_RCVRDLY[Channel][Receiver >> 1];
@@ -76,8 +72,8 @@ u8 mct_SaveRcvEnDly_D_1Pass(struct DCTStatStruc *pDCTstat, u8 pass)
return ret;
}
-u8 mct_Average_RcvrEnDly_Pass(struct DCTStatStruc *pDCTstat,
- u8 RcvrEnDly, u8 RcvrEnDlyLimit,
+u16 mct_Average_RcvrEnDly_Pass(struct DCTStatStruc *pDCTstat,
+ u16 RcvrEnDly, u16 RcvrEnDlyLimit,
u8 Channel, u8 Receiver, u8 Pass)
{
diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctsrc2p.c b/src/northbridge/amd/amdmct/mct_ddr3/mctsrc2p.c
index b01889d9b4..796febcb0b 100644
--- a/src/northbridge/amd/amdmct/mct_ddr3/mctsrc2p.c
+++ b/src/northbridge/amd/amdmct/mct_ddr3/mctsrc2p.c
@@ -2,6 +2,7 @@
* This file is part of the coreboot project.
*
* Copyright (C) 2010 Advanced Micro Devices, Inc.
+ * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -74,15 +75,15 @@ u8 mct_Get_Start_RcvrEnDly_Pass(struct DCTStatStruc *pDCTstat,
return RcvrEnDly;
}
-u8 mct_Average_RcvrEnDly_Pass(struct DCTStatStruc *pDCTstat,
- u8 RcvrEnDly, u8 RcvrEnDlyLimit,
+u16 mct_Average_RcvrEnDly_Pass(struct DCTStatStruc *pDCTstat,
+ u16 RcvrEnDly, u16 RcvrEnDlyLimit,
u8 Channel, u8 Receiver, u8 Pass)
{
u8 i;
- u8 *p;
- u8 *p_1;
- u8 val;
- u8 val_1;
+ u16 *p;
+ u16 *p_1;
+ u16 val;
+ u16 val_1;
u8 valid = 1;
u8 bn;
diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mcttmrl.c b/src/northbridge/amd/amdmct/mct_ddr3/mcttmrl.c
index ea5c8c7bb7..920f514415 100644
--- a/src/northbridge/amd/amdmct/mct_ddr3/mcttmrl.c
+++ b/src/northbridge/amd/amdmct/mct_ddr3/mcttmrl.c
@@ -2,6 +2,7 @@
* This file is part of the coreboot project.
*
* Copyright (C) 2010 Advanced Micro Devices, Inc.
+ * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -191,10 +192,10 @@ static void maxRdLatencyTrain_D(struct MCTStatStruc *pMCTstat,
#if DQS_TRAIN_DEBUG > 0
{
- u8 Channel;
+ u8 ChannelDTD;
printk(BIOS_DEBUG, "maxRdLatencyTrain: CH_MaxRdLat:\n");
- for(Channel = 0; Channel<2; Channel++) {
- printk(BIOS_DEBUG, "Channel: %02x: %02x\n", Channel, pDCTstat->CH_MaxRdLat[Channel]);
+ for(ChannelDTD = 0; ChannelDTD<2; ChannelDTD++) {
+ printk(BIOS_DEBUG, "Channel: %02x: %02x\n", ChannelDTD, pDCTstat->CH_MaxRdLat[ChannelDTD]);
}
}
#endif
diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctwl.c b/src/northbridge/amd/amdmct/mct_ddr3/mctwl.c
index cdeae49f3a..1c3e322d54 100644
--- a/src/northbridge/amd/amdmct/mct_ddr3/mctwl.c
+++ b/src/northbridge/amd/amdmct/mct_ddr3/mctwl.c
@@ -2,6 +2,7 @@
* This file is part of the coreboot project.
*
* Copyright (C) 2010 Advanced Micro Devices, Inc.
+ * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -58,9 +59,9 @@ void PrepareC_DCT(struct MCTStatStruc *pMCTstat,
pDCTstat->C_DCTPtr[dct]->LogicalCPUID = pDCTstat->LogicalCPUID;
for (dimm = 0; dimm < MAX_DIMMS; dimm++) {
- if (DimmValid & (1 << dimm))
+ if (DimmValid & (1 << (dimm << 1)))
pDCTstat->C_DCTPtr[dct]->DimmPresent[dimm] = 1;
- if (Dimmx8Present & (1 << dimm))
+ if (Dimmx8Present & (1 << (dimm << 1)))
pDCTstat->C_DCTPtr[dct]->DimmX8Present[dimm] = 1;
}
@@ -88,9 +89,9 @@ void PrepareC_DCT(struct MCTStatStruc *pMCTstat,
u8 DimmRanks;
if (DimmValid & (1 << (dimm << 1))) {
DimmRanks = 1;
- if (pDCTstat->DimmDRPresent & (1 << (dimm+dct)))
+ if (pDCTstat->DimmDRPresent & (1 << ((dimm << 1) + dct)))
DimmRanks = 2;
- else if (pDCTstat->DimmQRPresent & (1 << (dimm+dct)))
+ else if (pDCTstat->DimmQRPresent & (1 << ((dimm << 1) + dct)))
DimmRanks = 4;
} else
DimmRanks = 0;
@@ -249,35 +250,6 @@ static void ChangeMemClk(struct MCTStatStruc *pMCTstat,
}
}
-/* Multiply the previously saved delay values in Pass 1, step #5 by
- (target frequency)/400 to find the gross and fine delay initialization
- values at the target frequency.
- */
-void MultiplyDelay(struct MCTStatStruc *pMCTstat,
- struct DCTStatStruc *pDCTstat, u8 dct)
-{
- u16 index;
- u8 Multiplier;
- u8 gross, fine;
- u16 total;
-
- Multiplier = pDCTstat->TargetFreq;
-
- for (index=0; index < MAX_BYTE_LANES*MAX_LDIMMS; index ++) {
- gross = pDCTstat->C_DCTPtr[dct]->WLGrossDelay[index];
- fine = pDCTstat->C_DCTPtr[dct]->WLFineDelay[index];
-
- total = gross << 5 | fine;
- total *= Multiplier;
- if (total % 3)
- total = total / 3 + 1;
- else
- total = total / 3;
- pDCTstat->C_DCTPtr[dct]->WLGrossDelay[index] = (total & 0xFF) >> 5;
- pDCTstat->C_DCTPtr[dct]->WLFineDelay[index] = total & 0x1F;
- }
-}
-
/*
* the DRAM controller to bring the DRAMs out of self refresh mode.
*/
@@ -352,9 +324,9 @@ void SetTargetFreq(struct MCTStatStruc *pMCTstat,
if (!DCT1Present)
pDCTstat->CSPresent = pDCTstat->CSPresent_DCT[0];
- else if (pDCTstat->GangedMode) {
+ else if (pDCTstat->GangedMode)
pDCTstat->CSPresent = 0;
- } else
+ else
pDCTstat->CSPresent = pDCTstat->CSPresent_DCT[1];
FreqChgCtrlWrd(pMCTstat, pDCTstat);
diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mhwlc_d.c b/src/northbridge/amd/amdmct/mct_ddr3/mhwlc_d.c
index 8e7e70c4c7..397fd77275 100644
--- a/src/northbridge/amd/amdmct/mct_ddr3/mhwlc_d.c
+++ b/src/northbridge/amd/amdmct/mct_ddr3/mhwlc_d.c
@@ -2,6 +2,7 @@
* This file is part of the coreboot project.
*
* Copyright (C) 2010 Advanced Micro Devices, Inc.
+ * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -235,6 +236,65 @@ u32 swapBankBits(sDCTStruct *pDCTData, u32 MRSValue)
return MRSValue;
}
+static uint16_t unbuffered_dimm_nominal_termination_emrs(uint8_t number_of_dimms, uint8_t frequency_index, uint8_t rank_count, uint8_t rank)
+{
+ uint16_t term;
+
+ /* FIXME
+ * Mainboards need to be able to specify the maximum number of DIMMs installable per channel
+ * For now assume a maximum of 2 DIMMs per channel can be installed
+ */
+ uint8_t MaxDimmsInstallable = 2;
+
+ if (number_of_dimms == 1) {
+ if (MaxDimmsInstallable < 3) {
+ term = 0x04; /* Rtt_Nom=RZQ/4=60 Ohm */
+ } else {
+ if (rank_count == 1) {
+ term = 0x04; /* Rtt_Nom=RZQ/4=60 Ohm */
+ } else {
+ if (rank == 0)
+ term = 0x04; /* Rtt_Nom=RZQ/4=60 Ohm */
+ else
+ term = 0x00; /* Rtt_Nom=OFF */
+ }
+ }
+ } else {
+ if (frequency_index < 5)
+ term = 0x0044; /* Rtt_Nom=RZQ/6=40 Ohm */
+ else
+ term = 0x0204; /* Rtt_Nom=RZQ/8=30 Ohm */
+ }
+
+ return term;
+}
+
+static uint16_t unbuffered_dimm_dynamic_termination_emrs(uint8_t number_of_dimms, uint8_t frequency_index, uint8_t rank_count, uint8_t rank)
+{
+ uint16_t term;
+
+ /* FIXME
+ * Mainboards need to be able to specify the maximum number of DIMMs installable per channel
+ * For now assume a maximum of 2 DIMMs per channel can be installed
+ */
+ uint8_t MaxDimmsInstallable = 2;
+
+ if (number_of_dimms == 1) {
+ if (MaxDimmsInstallable < 3) {
+ term = 0x00; /* Rtt_WR=off */
+ } else {
+ if (rank_count == 1)
+ term = 0x00; /* Rtt_WR=off */
+ else
+ term = 0x200; /* Rtt_WR=RZQ/4=60 Ohm */
+ }
+ } else {
+ term = 0x400; /* Rtt_WR=RZQ/2=120 Ohm */
+ }
+
+ return term;
+}
+
/*-----------------------------------------------------------------------------
* void prepareDimms(sMCTStruct *pMCTData, sDCTStruct *DCTData, u8 Dimm, BOOL WL)
*
@@ -295,48 +355,23 @@ void prepareDimms(sMCTStruct *pMCTData, sDCTStruct *pDCTData, u8 dimm, BOOL wl)
if (pDCTData->Status[DCT_STATUS_REGISTERED]) {
tempW1 = RttNomTargetRegDimm(pMCTData, pDCTData, dimm, wl, MemClkFreq, rank);
} else {
- if (wl)
- {
- if (pDCTData->MaxDimmsInstalled == 1)
- {
- if ((pDCTData->DimmRanks[dimm] == 2) && (rank == 0))
- {
- tempW1 = 0x00; /* Rtt_Nom=OFF */
- }
+ if (wl) {
+ if (rank == 0) {
+ /* Get Rtt_WR for the current DIMM and rank */
+ uint16_t dynamic_term = unbuffered_dimm_dynamic_termination_emrs(pDCTData->MaxDimmsInstalled, MemClkFreq, pDCTData->DimmRanks[dimm], rank);
+
+ /* Convert dynamic termination code to corresponding nominal termination code */
+ if (dynamic_term == 0x200)
+ tempW1 = 0x04;
+ else if (dynamic_term == 0x400)
+ tempW1 = 0x40;
else
- {
- tempW1 = 0x04; /* Rtt_Nom=RZQ/4=60 Ohm */
- }
- }
- else /* 2 Dimms or more per channel */
- {
- if ((pDCTData->DimmRanks[dimm] == 2) && (rank == 1))
- {
- tempW1 = 0x00; /* Rtt_Nom=OFF */
- }
- else
- {
- if (MemClkFreq == 6) {
- tempW1 = 0x04; /* Rtt_Nom=RZQ/4=60 Ohm */
- } else {
- tempW1 = 0x40;/* Rtt_Nom=RZQ/2=120 Ohm */
- }
- }
- }
- }
- else { /* 1 or 4 Dimms per channel */
- if ((pDCTData->MaxDimmsInstalled == 1) || (pDCTData->MaxDimmsInstalled == 4))
- {
- tempW1 = 0x04; /* Rtt_Nom=RZQ/4=60 Ohm */
- }
- else /* 2 or 3 Dimms per channel */
- {
- if (MemClkFreq < 5) {
- tempW1 = 0x0044; /* Rtt_Nom=RZQ/6=40 Ohm */
- } else {
- tempW1 = 0x0204; /* Rtt_Nom=RZQ/8=30 Ohm */
- }
+ tempW1 = 0x0;
+ } else {
+ tempW1 = unbuffered_dimm_nominal_termination_emrs(pDCTData->MaxDimmsInstalled, MemClkFreq, pDCTData->DimmRanks[dimm], rank);
}
+ } else {
+ tempW1 = unbuffered_dimm_nominal_termination_emrs(pDCTData->MaxDimmsInstalled, MemClkFreq, pDCTData->DimmRanks[dimm], rank);
}
}
tempW=tempW|tempW1;
@@ -353,20 +388,22 @@ void prepareDimms(sMCTStruct *pMCTData, sDCTStruct *pDCTData, u8 dimm, BOOL wl)
else
{
/* Disable the output drivers of all other ranks for
- * the target DIMM. */
+ * the target DIMM.
+ */
tempW = bitTestSet(tempW1, Qoff);
}
}
- /* program MrsAddress[5,1]=output driver impedance control (DIC):
- * based on F2x[1,0]84[DrvImpCtrl] */
+ /* Program MrsAddress[5,1]=output driver impedance control (DIC):
+ * based on F2x[1,0]84[DrvImpCtrl]
+ */
tempW1 = get_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId,
FUN_DCT, DRAM_MRS_REGISTER, DrvImpCtrlStart, DrvImpCtrlEnd);
- if (bitTest(tempW1,1))
- {tempW = bitTestSet(tempW, 5);}
- if (bitTest(tempW1,0))
- {tempW = bitTestSet(tempW, 1);}
+ if (bitTest(tempW1, 1))
+ tempW = bitTestSet(tempW, 5);
+ if (bitTest(tempW1, 0))
+ tempW = bitTestSet(tempW, 1);
- tempW = swapAddrBits_wl(pDCTData,tempW);
+ tempW = swapAddrBits_wl(pDCTData, tempW);
set_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId, FUN_DCT,
DRAM_INIT, MrsAddressStart, MrsAddressEnd, tempW);
@@ -404,29 +441,10 @@ void prepareDimms(sMCTStruct *pMCTData, sDCTStruct *pDCTData, u8 dimm, BOOL wl)
if ((pDCTData->LogicalCPUID & AMD_DR_Bx) && (pDCTData->Status[DCT_STATUS_REGISTERED]))
tempW+=0x8;
/* determine Rtt_WR for WL & Normal mode */
- if (pDCTData->Status[DCT_STATUS_REGISTERED]) {
+ if (pDCTData->Status[DCT_STATUS_REGISTERED])
tempW1 = RttWrRegDimm(pMCTData, pDCTData, dimm, wl, MemClkFreq, rank);
- } else {
- if (wl)
- {
- tempW1 = 0x00; /* Rtt_WR=off */
- }
- else
- {
- if (pDCTData->MaxDimmsInstalled == 1)
- {
- tempW1 = 0x00; /* Rtt_WR=off */
- }
- else
- {
- if (MemClkFreq == 6) {
- tempW1 = 0x200; /* Rtt_WR=RZQ/4=60 Ohm */
- } else {
- tempW1 = 0x400; /* Rtt_WR=RZQ/2 */
- }
- }
- }
- }
+ else
+ tempW1 = unbuffered_dimm_dynamic_termination_emrs(pDCTData->MaxDimmsInstalled, MemClkFreq, pDCTData->DimmRanks[dimm], rank);
tempW=tempW|tempW1;
tempW = swapAddrBits_wl(pDCTData,tempW);
set_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId, FUN_DCT,
@@ -483,38 +501,10 @@ void prepareDimms(sMCTStruct *pMCTData, sDCTStruct *pDCTData, u8 dimm, BOOL wl)
}
/* determine Rtt_Nom for WL & Normal mode */
- if (pDCTData->Status[DCT_STATUS_REGISTERED]) {
+ if (pDCTData->Status[DCT_STATUS_REGISTERED])
tempW1 = RttNomNonTargetRegDimm(pMCTData, pDCTData, currDimm, wl, MemClkFreq, rank);
- } else {
- if (wl)
- {
- if ((pDCTData->DimmRanks[currDimm] == 2) && (rank == 1))
- {
- tempW1 = 0x00; /* Rtt_Nom=OFF */
- }
- else
- {
- if (MemClkFreq < 5) {
- tempW1 = 0x0044;/* Rtt_Nom=RZQ/6=40 Ohm */
- } else {
- tempW1 = 0x0204;/* Rtt_Nom=RZQ/8=30 Ohm */
- }
- }
- }
- else { /* 1 or 4 Dimms per channel */
- if (pDCTData->MaxDimmsInstalled == 4)
- {
- tempW1 = 0x04; /* Rtt_Nom=RZQ/4=60 Ohm */
- }
- else { /* 2 or 3 Dimms per channel */
- if (MemClkFreq < 5) {
- tempW1 = 0x0044; /* Rtt_Nom=RZQ/6=40 Ohm */
- } else {
- tempW1 = 0x0204; /* Rtt_Nom=RZQ/8=30 Ohm */
- }
- }
- }
- }
+ else
+ tempW1 = unbuffered_dimm_nominal_termination_emrs(pDCTData->MaxDimmsInstalled, MemClkFreq, pDCTData->DimmRanks[currDimm], rank);
tempW=tempW|tempW1;
/* program MrsAddress[5,1]=output driver impedance control (DIC):
* based on F2x[1,0]84[DrvImpCtrl] */
@@ -560,22 +550,10 @@ void prepareDimms(sMCTStruct *pMCTData, sDCTStruct *pDCTData, u8 dimm, BOOL wl)
if ((pDCTData->LogicalCPUID & AMD_DR_Bx) && (pDCTData->Status[DCT_STATUS_REGISTERED]))
tempW+=0x8;
/* determine Rtt_WR for WL & Normal mode */
- if (pDCTData->Status[DCT_STATUS_REGISTERED]) {
+ if (pDCTData->Status[DCT_STATUS_REGISTERED])
tempW1 = RttWrRegDimm(pMCTData, pDCTData, currDimm, wl, MemClkFreq, rank);
- } else {
- if (wl)
- {
- tempW1 = 0x00; /* Rtt_WR=off */
- }
- else
- {
- if (MemClkFreq == 6) {
- tempW1 = 0x200; /* Rtt_WR=RZQ/4=60 Ohm */
- } else {
- tempW1 = 0x400; /* Rtt_WR=RZQ/2 */
- }
- }
- }
+ else
+ tempW1 = unbuffered_dimm_dynamic_termination_emrs(pDCTData->MaxDimmsInstalled, MemClkFreq, pDCTData->DimmRanks[currDimm], rank);
tempW=tempW|tempW1;
tempW = swapAddrBits_wl(pDCTData,tempW);
set_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId, FUN_DCT,
@@ -646,9 +624,14 @@ void programODT(sMCTStruct *pMCTData, sDCTStruct *pDCTData, u8 dimm)
*/
void procConifg(sMCTStruct *pMCTData,sDCTStruct *pDCTData, u8 dimm, u8 pass)
{
- u8 ByteLane, Seed_Gross, Seed_Fine;
+ u8 ByteLane, Seed_Gross, Seed_Fine, MemClkFreq;
u32 Value, Addr;
u16 Addl_Data_Offset, Addl_Data_Port;
+ u16 freq_tab[] = {400, 533, 667, 800};
+
+ /* MemClkFreq: 3: 400MHz; 4: 533MHz; 5: 667MHz; 6: 800MHz */
+ MemClkFreq = get_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId,
+ FUN_DCT, DRAM_CONFIG_HIGH, 0, 2);
/* Program F2x[1, 0]9C_x08[WrLvOdt[3:0]] to the proper ODT settings for the
* current memory subsystem configuration.
@@ -656,12 +639,13 @@ void procConifg(sMCTStruct *pMCTData,sDCTStruct *pDCTData, u8 dimm, u8 pass)
programODT(pMCTData, pDCTData, dimm);
/* Program F2x[1,0]9C_x08[WrLvOdtEn]=1 */
- if (pDCTData->LogicalCPUID & (AMD_DR_Cx | AMD_DR_Dx))
+ if (pDCTData->LogicalCPUID & (AMD_DR_Cx | AMD_DR_Dx)) {
set_DCT_ADDR_Bits(pDCTData, pDCTData->DctTrain, pDCTData->NodeId, FUN_DCT,
DRAM_ADD_DCT_PHY_CONTROL_REG, WrLvOdtEn, WrLvOdtEn, (u32)1);
+ }
else
{
- /* Program WrLvOdtEn=1 through set bit 12 of D3CSODT reg offset 0 for Rev.B*/
+ /* Program WrLvOdtEn=1 through set bit 12 of D3CSODT reg offset 0 for Rev.B */
if (pDCTData->DctTrain)
{
Addl_Data_Offset=0x198;
@@ -687,7 +671,6 @@ void procConifg(sMCTStruct *pMCTData,sDCTStruct *pDCTData, u8 dimm, u8 pass)
/* Wait 10 MEMCLKs to allow for ODT signal settling. */
pMCTData->AgesaDelay(10);
- ByteLane = 0;
if (pass == 1)
{
if (pDCTData->Status[DCT_STATUS_REGISTERED])
@@ -705,10 +688,17 @@ void procConifg(sMCTStruct *pMCTData,sDCTStruct *pDCTData, u8 dimm, u8 pass)
}
else
{
- Seed_Gross = 0x00;
- Seed_Fine = 0x1A;
+ if (MemClkFreq == 6) {
+ /* DDR-800 */
+ Seed_Gross = 0x00;
+ Seed_Fine = 0x1a;
+ } else {
+ /* Use settings for DDR-400 (interpolated from BKDG) */
+ Seed_Gross = 0x00;
+ Seed_Fine = 0x0d;
+ }
}
- while(ByteLane < MAX_BYTE_LANES)
+ for (ByteLane = 0; ByteLane < MAX_BYTE_LANES; ByteLane++)
{
/* Program an initialization value to registers F2x[1, 0]9C_x[51:50] and
* F2x[1, 0]9C_x52 to set the gross and fine delay for all the byte lane fields
@@ -720,35 +710,32 @@ void procConifg(sMCTStruct *pMCTData,sDCTStruct *pDCTData, u8 dimm, u8 pass)
*/
pDCTData->WLGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_Gross;
pDCTData->WLFineDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_Fine;
- ByteLane++;
}
- } else if (pDCTData->Status[DCT_STATUS_REGISTERED]) { /* For Pass 2 */
+ } else { /* Pass 2 */
/* From BKDG, Write Leveling Seed Value. */
- /* TODO: The unbuffered DIMMs are unstable on the code below. So temporarily it is
- * only for registered DIMMs. */
u32 RegisterDelay, SeedTotal;
- u8 MemClkFreq;
- u16 freq_tab[] = {400, 533, 667, 800};
- while(ByteLane < MAX_BYTE_LANES)
+ for (ByteLane = 0; ByteLane < MAX_BYTE_LANES; ByteLane++)
{
- MemClkFreq = get_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId,
- FUN_DCT, DRAM_CONFIG_HIGH, 0, 2);
if (pDCTData->Status[DCT_STATUS_REGISTERED])
RegisterDelay = 0x20; /* TODO: ((RCW2 & BIT0) == 0) ? 0x20 : 0x30; */
else
RegisterDelay = 0;
- SeedTotal = (pDCTData->WLFineDelay[MAX_BYTE_LANES*dimm+ByteLane] & 0x1F) |
- pDCTData->WLGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] << 5;
+ SeedTotal = (pDCTData->WLFineDelay[MAX_BYTE_LANES*dimm+ByteLane] & 0x1f) |
+ (pDCTData->WLGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] << 5);
/* SeedTotalPreScaling = (the total delay value in F2x[1, 0]9C_x[4A:30] from pass 1 of write levelization
training) - RegisterDelay. */
- /* MemClkFreq: 3: 400MHz; 4: 533MHz; 5: 667MHz; 6: 800MHz */
- SeedTotal = (u16) (RegisterDelay + ((((u32) SeedTotal - RegisterDelay) *
- freq_tab[MemClkFreq-3]) / 400));
- Seed_Gross = (SeedTotal & 0x20) != 0 ? 1 : 2;
- Seed_Fine = SeedTotal & 0x1F;
+ SeedTotal = (uint16_t) (RegisterDelay + ((((uint64_t) SeedTotal - RegisterDelay) *
+ freq_tab[MemClkFreq-3] * 100) / (freq_tab[0] * 100)));
+ Seed_Gross = SeedTotal / 32;
+ Seed_Fine = SeedTotal & 0x1f;
+ if (Seed_Gross == 0)
+ Seed_Gross = 0;
+ else if (Seed_Gross & 0x1)
+ Seed_Gross = 1;
+ else
+ Seed_Gross = 2;
pDCTData->WLGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_Gross;
pDCTData->WLFineDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_Fine;
- ByteLane ++;
}
}
diff --git a/src/northbridge/amd/amdmct/wrappers/mcti_d.c b/src/northbridge/amd/amdmct/wrappers/mcti_d.c
index ea328935b2..f9a9921e6e 100644
--- a/src/northbridge/amd/amdmct/wrappers/mcti_d.c
+++ b/src/northbridge/amd/amdmct/wrappers/mcti_d.c
@@ -59,6 +59,10 @@ static u16 mctGet_NVbits(u8 index)
val = 1;
#elif CONFIG_CPU_SOCKET_TYPE == 0x13 /* ASB2 */
val = 4;
+#elif CONFIG_CPU_SOCKET_TYPE == 0x14 /* C32 */
+ val = 5;
+#elif CONFIG_CPU_SOCKET_TYPE == 0x15 /* G34 */
+ val = 3;
//#elif SYSTEM_TYPE == MOBILE
// val = 2;
#endif
@@ -413,101 +417,6 @@ static void mctHookAfterDramInit(void)
}
#if (CONFIG_DIMM_SUPPORT & 0x000F)==0x0005 /* AMD_FAM10_DDR3 */
-static void coreDelay(u32 microseconds)
-{
- msr_t now;
- msr_t end;
- u32 cycles;
-
- /* delay ~40us
- This seems like a hack to me...
- It would be nice to have a central delay function. */
-
- cycles = (microseconds * 100) << 3; /* x8 (number of 1.25ns ticks) */
-
- if (!(rdmsr(HWCR).lo & TSC_FREQ_SEL_MASK)) {
- msr_t pstate_msr = rdmsr(CUR_PSTATE_MSR);
- if (!(rdmsr(0xC0010064+pstate_msr.lo).lo & NB_DID_M_ON)) {
- cycles = cycles <<1; // half freq, double cycles
- }
- } // else should we keep p0 freq at the time of setting TSC_FREQ_SEL_MASK somewhere and check it here ?
-
- now = rdmsr(TSC_MSR);
- // avoid overflow when called near 2^32 ticks ~ 5.3 s boundaries
- if (0xffffffff - cycles >= now.lo ) {
- end.hi = now.hi;
- end.lo = now.lo + cycles;
- } else {
- end.hi = now.hi +1; //
- end.lo = cycles - (1+(0xffffffff - now.lo));
- }
- do {
- now = rdmsr(TSC_MSR);
- } while ((now.hi < end.hi) || ((now.hi == end.hi) && (now.lo < end.lo)));
-}
-
-/* Erratum 350 */
-static void vErrata350(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat)
-{
- u8 u8Channel;
- u8 u8Receiver;
- u32 u32Addr;
- u8 u8Valid;
- u32 u32DctDev;
-
- // 1. dummy read for each installed DIMM */
- for (u8Channel = 0; u8Channel < 2; u8Channel++) {
- // This will be 0 for vaild DIMMS, eles 8
- u8Receiver = mct_InitReceiver_D(pDCTstat, u8Channel);
-
- for (; u8Receiver < 8; u8Receiver += 2) {
- u32Addr = mct_GetRcvrSysAddr_D(pMCTstat, pDCTstat, u8Channel, u8Receiver, &u8Valid);
-
- if(!u8Valid) { /* Address not supported on current CS */
- print_t("vErrata350: Address not supported on current CS\n");
- continue;
- }
- print_t("vErrata350: dummy read \n");
- read32_fs(u32Addr);
- }
- }
-
- print_t("vErrata350: step 2a\n");
-
- /* 2. Write 0000_8000h to register F2x[1, 0]9C_xD080F0C. */
- u32DctDev = pDCTstat->dev_dct;
- Set_NB32_index_wait(u32DctDev, 0x098, 0xD080F0C, 0x00008000);
- /* ^--- value
- ^---F2x[1, 0]9C_x0D080F0C, No description in BKDG.
- ^----F2x[1, 0]98 DRAM Controller Additional Data Offset Register */
-
- if(!pDCTstat->GangedMode) {
- print_t("vErrata350: step 2b\n");
- Set_NB32_index_wait(u32DctDev, 0x198, 0xD080F0C, 0x00008000);
- /* ^--- value
- ^---F2x[1, 0]9C_x0D080F0C, No description in BKDG
- ^----F2x[1, 0]98 DRAM Controller Additional Data Offset Register */
- }
-
- print_t("vErrata350: step 3\n");
- /* 3. Wait at least 300 nanoseconds. */
- coreDelay(1);
-
- print_t("vErrata350: step 4\n");
- /* 4. Write 0000_0000h to register F2x[1, 0]9C_xD080F0C. */
- Set_NB32_index_wait(u32DctDev, 0x098, 0xD080F0C, 0x00000000);
-
- if(!pDCTstat->GangedMode) {
- print_t("vErrata350: step 4b\n");
- Set_NB32_index_wait(u32DctDev, 0x198, 0xD080F0C, 0x00000000);
- }
-
- print_t("vErrata350: step 5\n");
- /* 5. Wait at least 2 microseconds. */
- coreDelay(2);
-
-}
-
static void vErratum372(struct DCTStatStruc *pDCTstat)
{
msr_t msr = rdmsr(NB_CFG_MSR);
@@ -546,8 +455,7 @@ static void mctHookBeforeAnyTraining(struct MCTStatStruc *pMCTstat, struct DCTSt
{
#if (CONFIG_DIMM_SUPPORT & 0x000F)==0x0005 /* AMD_FAM10_DDR3 */
/* FIXME : as of 25.6.2010 errata 350 and 372 should apply to ((RB|BL|DA)-C[23])|(HY-D[01])|(PH-E0) but I don't find constants for all of them */
- if (pDCTstatA->LogicalCPUID & AMD_DRBH_Cx) {
- vErrata350(pMCTstat, pDCTstatA);
+ if (pDCTstatA->LogicalCPUID & (AMD_DRBH_Cx | AMD_DR_Dx)) {
vErratum372(pDCTstatA);
vErratum414(pDCTstatA);
}