nb/amd/mct_ddr3: Work around RDIMM training failure

Under certain conditions, not elucidated in the BKDG, an extra memclock of CAS write latency is required. The only reliable way I have found to detect when this is required is to try training without the delay, and if DQS position training fails, adding the delay and retraining. This is probably related in some form or another to the badly broken DQS Write Early algorithm given in the BKDG. Change-Id: Idfaca1b3da3f45793d210980e952ccdfc9ba1410 Signed-off-by: Timothy Pearson <tpearson@raptorengineeringinc.com> Reviewed-on: https://review.coreboot.org/13531 Tested-by: build bot (Jenkins) Tested-by: Raptor Engineering Automated Test Stand <noreply@raptorengineeringinc.com> Reviewed-by: Martin Roth <martinroth@google.com>
author: Timothy Pearson <tpearson@raptorengineeringinc.com> 2016-01-30 23:34:51 -0600
committer: Martin Roth <martinroth@google.com> 2016-02-05 22:26:31 +0100
commit: 31682364ba062fb3cbf4ff3b0ad7cbdb7b5daae1 (patch)
tree: 0824c0e09657c7fbbdc71c83e9bba0969826f7a9 /src/northbridge/amd/amdmct
parent: 606b6ec686e07930008c5c1710efaaf3d097f465 (diff)
4 files changed, 113 insertions, 6 deletions
diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mct_d.c b/src/northbridge/amd/amdmct/mct_ddr3/mct_d.c
index e1c0d4f563..cbe75b60d4 100644
--- a/src/northbridge/amd/amdmct/mct_ddr3/mct_d.c
+++ b/src/northbridge/amd/amdmct/mct_ddr3/mct_d.c
@@ -3332,12 +3332,24 @@ static void exit_training_mode_fam15(struct MCTStatStruc *pMCTstat,
 static void DQSTiming_D(struct MCTStatStruc *pMCTstat,
 				struct DCTStatStruc *pDCTstatA, uint8_t allow_config_restore)
 {
+	uint8_t Node;
 	u8 nv_DQSTrainCTL;
+	uint8_t retry_requested;
 
 	if (pMCTstat->GStatus & (1 << GSB_EnDIMMSpareNW)) {
 		return;
 	}
 
+	/* Set initial TCWL offset to zero */
+	for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) {
+		uint8_t dct;
+		struct DCTStatStruc *pDCTstat;
+		pDCTstat = pDCTstatA + Node;
+		for (dct = 0; dct < 2; dct++)
+			pDCTstat->tcwl_delay[dct] = 0;
+	}
+
+retry_dqs_training_and_levelization:
 	// nv_DQSTrainCTL = mctGet_NVbits(NV_DQSTrainCTL);
 	nv_DQSTrainCTL = !allow_config_restore;
 
@@ -3345,7 +3357,6 @@ static void DQSTiming_D(struct MCTStatStruc *pMCTstat,
 	phyAssistedMemFnceTraining(pMCTstat, pDCTstatA, -1);
 
 	if (is_fam15h()) {
-		uint8_t Node;
 		struct DCTStatStruc *pDCTstat;
 		for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) {
 			pDCTstat = pDCTstatA + Node;
@@ -3393,6 +3404,59 @@ static void DQSTiming_D(struct MCTStatStruc *pMCTstat,
 
 		mct_TrainDQSPos_D(pMCTstat, pDCTstatA);
 
+		/* Determine if DQS training requested a retrain attempt */
+		retry_requested = 0;
+		for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) {
+			struct DCTStatStruc *pDCTstat;
+			pDCTstat = pDCTstatA + Node;
+
+			if (pDCTstat->NodePresent) {
+				if (pDCTstat->TrainErrors & (1 << SB_FatalError)) {
+					die("DIMM training FAILED!  Halting system.");
+				}
+				if (pDCTstat->TrainErrors & (1 << SB_RetryConfigTrain)) {
+					retry_requested = 1;
+
+					/* Clear previous errors */
+					pDCTstat->TrainErrors &= ~(1 << SB_RetryConfigTrain);
+					pDCTstat->TrainErrors &= ~(1 << SB_NODQSPOS);
+					pDCTstat->ErrStatus &= ~(1 << SB_RetryConfigTrain);
+					pDCTstat->ErrStatus &= ~(1 << SB_NODQSPOS);
+				}
+			}
+		}
+
+		/* Retry training and levelization if requested */
+		if (retry_requested) {
+			printk(BIOS_DEBUG, "%s: Restarting training on algorithm request\n", __func__);
+			/* Reset frequency to minimum */
+			for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) {
+				struct DCTStatStruc *pDCTstat;
+				pDCTstat = pDCTstatA + Node;
+				if (pDCTstat->NodePresent) {
+					uint8_t original_target_freq = pDCTstat->TargetFreq;
+					uint8_t original_auto_speed = pDCTstat->DIMMAutoSpeed;
+					pDCTstat->TargetFreq = mhz_to_memclk_config(mctGet_NVbits(NV_MIN_MEMCLK));
+					pDCTstat->Speed = pDCTstat->DIMMAutoSpeed = pDCTstat->TargetFreq;
+					SetTargetFreq(pMCTstat, pDCTstatA, Node);
+					pDCTstat->TargetFreq = original_target_freq;
+					pDCTstat->DIMMAutoSpeed = original_auto_speed;
+				}
+			}
+			/* Apply any DIMM timing changes */
+			for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) {
+				struct DCTStatStruc *pDCTstat;
+				pDCTstat = pDCTstatA + Node;
+				if (pDCTstat->NodePresent) {
+					AutoCycTiming_D(pMCTstat, pDCTstat, 0);
+					if (!pDCTstat->GangedMode)
+						if (pDCTstat->DIMMValidDCT[1] > 0)
+							AutoCycTiming_D(pMCTstat, pDCTstat, 1);
+				}
+			}
+			goto retry_dqs_training_and_levelization;
+		}
+
 		TrainMaxRdLatency_En_D(pMCTstat, pDCTstatA);
 
 		if (is_fam15h())
@@ -3417,7 +3481,6 @@ static void DQSTiming_D(struct MCTStatStruc *pMCTstat,
 	}
 
 	if (is_fam15h()) {
-		uint8_t Node;
 		struct DCTStatStruc *pDCTstat;
 
 		/* Switch DCT control register to DCT 0 per Erratum 505 */
@@ -4268,6 +4331,9 @@ static void SPD2ndTiming(struct MCTStatStruc *pMCTstat,
 			Tcwl = 0x9;
 		else
 			Tcwl = 0x5;	/* Power-on default */
+
+		/* Apply offset */
+		Tcwl += pDCTstat->tcwl_delay[dct];
 	}
 
 	/* Program DRAM Timing values */
diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mct_d.h b/src/northbridge/amd/amdmct/mct_ddr3/mct_d.h
index f953919052..91843d05be 100644
--- a/src/northbridge/amd/amdmct/mct_ddr3/mct_d.h
+++ b/src/northbridge/amd/amdmct/mct_ddr3/mct_d.h
@@ -597,6 +597,7 @@ struct DCTStatStruc {		/* A per Node structure*/
 	u8 DqsRcvEnGrossMin;
 	u8 WrDatGrossMax;
 	u8 WrDatGrossMin;
+	uint8_t tcwl_delay[2];
 
 	u16 RegMan1Present;	/* DIMM present bitmap of Register manufacture 1 */
 	u16 RegMan2Present;	/* DIMM present bitmap of Register manufacture 2 */
@@ -829,7 +830,9 @@ struct amd_s3_persistent_data {
 #define SB_SmallRCVR		13	/* DQS Rcvr En pass window too small (far right of dynamic range)*/
 #define SB_NODQSPOS		14	/* No DQS-DQ passing positions*/
 #define SB_SMALLDQS		15	/* DQS-DQ passing window too small*/
-#define SB_DCBKScrubDis	16	/* DCache scrub requested but not enabled */
+#define SB_DCBKScrubDis		16	/* DCache scrub requested but not enabled */
+#define SB_RetryConfigTrain	17	/* Retry configuration and training */
+#define SB_FatalError		18	/* Fatal training error detected */
 
 /*===============================================================================
 	Local Configuration Status (DCTStatStruc.Status[31:0])
diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctdqs_d.c b/src/northbridge/amd/amdmct/mct_ddr3/mctdqs_d.c
index 22e9836757..19a7acb329 100644
--- a/src/northbridge/amd/amdmct/mct_ddr3/mctdqs_d.c
+++ b/src/northbridge/amd/amdmct/mct_ddr3/mctdqs_d.c
@@ -1664,8 +1664,10 @@ static void TrainDQSReceiverEnCyc_D_Fam15(struct MCTStatStruc *pMCTstat,
 	uint8_t lane;
 	uint32_t dword;
 	uint32_t rx_en_offset;
+	uint8_t dct_training_success;
 	uint16_t initial_phy_phase_delay[MAX_BYTE_LANES];
 	uint16_t current_phy_phase_delay[MAX_BYTE_LANES];
+	uint8_t lane_training_success[MAX_BYTE_LANES];
 	uint8_t dqs_results_array[1024];
 
  	uint16_t ren_step = 0x40;
@@ -1709,6 +1711,8 @@ static void TrainDQSReceiverEnCyc_D_Fam15(struct MCTStatStruc *pMCTstat,
 		/* 2.10.5.8.3 */
 		Receiver = mct_InitReceiver_D(pDCTstat, dct);
 
+		dct_training_success = 1;
+
 		/* There are four receiver pairs, loosely associated with chipselects.
 		 * This is essentially looping over each DIMM.
 		 */
@@ -1719,6 +1723,9 @@ static void TrainDQSReceiverEnCyc_D_Fam15(struct MCTStatStruc *pMCTstat,
 				continue;
 			}
 
+			for (lane = 0; lane < MAX_BYTE_LANES; lane++)
+				lane_training_success[lane] = 0;
+
 			/* 2.10.5.8.3 (2) */
 			read_dqs_receiver_enable_control_registers(initial_phy_phase_delay, dev, dct, dimm, index_reg);
 
@@ -1753,10 +1760,24 @@ static void TrainDQSReceiverEnCyc_D_Fam15(struct MCTStatStruc *pMCTstat,
 					dqs_results_array[current_phy_phase_delay[lane]] = TrainDQSRdWrPos_D_Fam15(pMCTstat, pDCTstat, dct, Receiver, Receiver + 2, lane, lane + 1);
 				}
 
+				uint16_t phase_delay;
+				for (phase_delay = 0; phase_delay < 0x3ff; phase_delay++)
+					if (dqs_results_array[phase_delay])
+						lane_training_success[lane] = 1;
+
+				if (!lane_training_success[lane]) {
+					if (pDCTstat->tcwl_delay[dct] >= 1) {
+						Errors |= 1 << SB_FatalError;
+						printk(BIOS_ERR, "%s: lane %d failed to train!  "
+							"Training for receiver %d on DCT %d aborted\n",
+							__func__, lane, Receiver, dct);
+					}
+					break;
+				}
+
 #ifdef PRINT_PASS_FAIL_BITMAPS
-				uint16_t iter;
-				for (iter = 0; iter < 0x3ff; iter++) {
-					if (dqs_results_array[iter])
+				for (phase_delay = 0; phase_delay < 0x3ff; phase_delay++) {
+					if (dqs_results_array[phase_delay])
 						printk(BIOS_DEBUG, "+");
 					else
 						printk(BIOS_DEBUG, ".");
@@ -1787,6 +1808,13 @@ static void TrainDQSReceiverEnCyc_D_Fam15(struct MCTStatStruc *pMCTstat,
 				Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f0030 | (lane << 8), dword);
 			}
 
+			for (lane = 0; lane < MAX_BYTE_LANES; lane++) {
+				if (!lane_training_success[lane]) {
+					dct_training_success = 0;
+					Errors |= 1 << SB_NODQSPOS;
+				}
+			}
+
 #if DQS_TRAIN_DEBUG > 0
 			printk(BIOS_DEBUG, "TrainDQSReceiverEnCyc_D_Fam15 DQS receiver enable timing: ");
 			for (lane = 0; lane < MAX_BYTE_LANES; lane++) {
@@ -1795,6 +1823,15 @@ static void TrainDQSReceiverEnCyc_D_Fam15(struct MCTStatStruc *pMCTstat,
 			printk(BIOS_DEBUG, "\n");
 #endif
 		}
+
+		if (!dct_training_success) {
+			if (pDCTstat->tcwl_delay[dct] < 1) {
+				/* Increase TCWL */
+				pDCTstat->tcwl_delay[dct]++;
+				/* Request retraining */
+				Errors |= 1 << SB_RetryConfigTrain;
+			}
+		}
 	}
 
 	pDCTstat->TrainErrors |= Errors;
diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctsdi.c b/src/northbridge/amd/amdmct/mct_ddr3/mctsdi.c
index bcf603139e..d8da16b4fe 100644
--- a/src/northbridge/amd/amdmct/mct_ddr3/mctsdi.c
+++ b/src/northbridge/amd/amdmct/mct_ddr3/mctsdi.c
@@ -721,6 +721,7 @@ static u32 mct_MR2(struct MCTStatStruc *pMCTstat,
 
 		/* Obtain Tcwl, adjust, and set CWL with the adjusted value */
 		dword = Get_NB32_DCT(dev, dct, 0x20c) & 0x1f;
+		dword -= pDCTstat->tcwl_delay[dct];
 		ret |= ((dword - 5) << 3);
 
 		/* Obtain and set RttWr */
author	Timothy Pearson <tpearson@raptorengineeringinc.com>	2016-01-30 23:34:51 -0600
committer	Martin Roth <martinroth@google.com>	2016-02-05 22:26:31 +0100
commit	31682364ba062fb3cbf4ff3b0ad7cbdb7b5daae1 (patch)
tree	0824c0e09657c7fbbdc71c83e9bba0969826f7a9 /src/northbridge/amd/amdmct
parent	606b6ec686e07930008c5c1710efaaf3d097f465 (diff)