From 9426e4fcf5f49c46fa1e5cbe9fc38d2575cfdb62 Mon Sep 17 00:00:00 2001 From: Timothy Pearson Date: Thu, 25 Jun 2015 18:08:53 -0500 Subject: northbridge/amd/amdmct/mct_ddr3: Attempt to recover from phy training errors AMD's automatic phy phase detection hardware is very fragile and often produces incorrect results. Attempt to recover from obvious phase locking errors by retrying phy training on the failing link. Change-Id: Ia2c3022534c9ad44714eef6e118869f054bd9f6b Signed-off-by: Timothy Pearson Reviewed-on: http://review.coreboot.org/12006 Tested-by: build bot (Jenkins) Reviewed-by: Stefan Reinauer Reviewed-by: Alexandru Gagniuc --- src/northbridge/amd/amdmct/mct_ddr3/mcthwl.c | 68 +++++++++++++++++++++------ src/northbridge/amd/amdmct/mct_ddr3/mhwlc_d.c | 36 +++++++++++--- 2 files changed, 83 insertions(+), 21 deletions(-) (limited to 'src') diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mcthwl.c b/src/northbridge/amd/amdmct/mct_ddr3/mcthwl.c index 5107fee63d..b3572b1941 100644 --- a/src/northbridge/amd/amdmct/mct_ddr3/mcthwl.c +++ b/src/northbridge/amd/amdmct/mct_ddr3/mcthwl.c @@ -14,11 +14,11 @@ * GNU General Public License for more details. */ -static void AgesaHwWlPhase1(struct MCTStatStruc *pMCTstat, +static uint8_t AgesaHwWlPhase1(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 dct, u8 dimm, u8 pass); -static void AgesaHwWlPhase2(struct MCTStatStruc *pMCTstat, +static uint8_t AgesaHwWlPhase2(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 dct, u8 dimm, u8 pass); -static void AgesaHwWlPhase3(struct MCTStatStruc *pMCTstat, +static uint8_t AgesaHwWlPhase3(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 dct, u8 dimm, u8 pass); static void EnableZQcalibration(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat); static void DisableZQcalibration(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat); @@ -96,11 +96,12 @@ static void DisableAutoRefresh_D(struct MCTStatStruc *pMCTstat, } -static void PhyWLPass1(struct MCTStatStruc *pMCTstat, +static uint8_t PhyWLPass1(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 dct) { u8 dimm; u16 DIMMValid; + uint8_t status = 0; void *DCTPtr; dct &= 1; @@ -117,19 +118,22 @@ static void PhyWLPass1(struct MCTStatStruc *pMCTstat, PrepareC_DCT(pMCTstat, pDCTstat, dct); for (dimm = 0; dimm < MAX_DIMMS_SUPPORTED; dimm ++) { if (DIMMValid & (1 << (dimm << 1))) { - AgesaHwWlPhase1(pMCTstat, pDCTstat, dct, dimm, FirstPass); - AgesaHwWlPhase2(pMCTstat, pDCTstat, dct, dimm, FirstPass); - AgesaHwWlPhase3(pMCTstat, pDCTstat, dct, dimm, FirstPass); + status |= AgesaHwWlPhase1(pMCTstat, pDCTstat, dct, dimm, FirstPass); + status |= AgesaHwWlPhase2(pMCTstat, pDCTstat, dct, dimm, FirstPass); + status |= AgesaHwWlPhase3(pMCTstat, pDCTstat, dct, dimm, FirstPass); } } } + + return status; } -static void PhyWLPass2(struct MCTStatStruc *pMCTstat, +static uint8_t PhyWLPass2(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 dct) { u8 dimm; u16 DIMMValid; + uint8_t status = 0; void *DCTPtr; dct &= 1; @@ -159,12 +163,14 @@ static void PhyWLPass2(struct MCTStatStruc *pMCTstat, DisableAutoRefresh_D(pMCTstat, pDCTstat); for (dimm = 0; dimm < MAX_DIMMS_SUPPORTED; dimm ++) { if (DIMMValid & (1 << (dimm << 1))) { - AgesaHwWlPhase1(pMCTstat, pDCTstat, dct, dimm, SecondPass); - AgesaHwWlPhase2(pMCTstat, pDCTstat, dct, dimm, SecondPass); - AgesaHwWlPhase3(pMCTstat, pDCTstat, dct, dimm, SecondPass); + status |= AgesaHwWlPhase1(pMCTstat, pDCTstat, dct, dimm, SecondPass); + status |= AgesaHwWlPhase2(pMCTstat, pDCTstat, dct, dimm, SecondPass); + status |= AgesaHwWlPhase3(pMCTstat, pDCTstat, dct, dimm, SecondPass); } } } + + return status; } static uint16_t fam15h_next_highest_memclk_freq(uint16_t memclk_freq) @@ -179,6 +185,8 @@ static uint16_t fam15h_next_highest_memclk_freq(uint16_t memclk_freq) static void WriteLevelization_HW(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, uint8_t Pass) { + uint8_t status; + uint8_t timeout; uint16_t final_target_freq; pDCTstat->C_MCTPtr = &(pDCTstat->s_C_MCTPtr); @@ -197,8 +205,21 @@ static void WriteLevelization_HW(struct MCTStatStruc *pMCTstat, } if (Pass == FirstPass) { - PhyWLPass1(pMCTstat, pDCTstat, 0); - PhyWLPass1(pMCTstat, pDCTstat, 1); + timeout = 0; + do { + status = 0; + timeout++; + status |= PhyWLPass1(pMCTstat, pDCTstat, 0); + status |= PhyWLPass1(pMCTstat, pDCTstat, 1); + if (status) + printk(BIOS_INFO, + "%s: Retrying write levelling due to invalid value(s) detected in first phase\n", + __func__); + } while (status && (timeout < 8)); + if (status) + printk(BIOS_INFO, + "%s: Uncorrectable invalid value(s) detected in first phase of write levelling\n", + __func__); } if (Pass == SecondPass) { @@ -207,6 +228,7 @@ static void WriteLevelization_HW(struct MCTStatStruc *pMCTstat, * NOTE: BIOS must program both DCTs to the same frequency. * NOTE: Fam15h steps the frequency, Fam10h slams the frequency. */ + uint8_t global_phy_training_status = 0; final_target_freq = pDCTstat->TargetFreq; while (pDCTstat->Speed != final_target_freq) { @@ -215,12 +237,28 @@ static void WriteLevelization_HW(struct MCTStatStruc *pMCTstat, else pDCTstat->TargetFreq = final_target_freq; SetTargetFreq(pMCTstat, pDCTstat); - PhyWLPass2(pMCTstat, pDCTstat, 0); - PhyWLPass2(pMCTstat, pDCTstat, 1); + timeout = 0; + do { + status = 0; + timeout++; + status |= PhyWLPass2(pMCTstat, pDCTstat, 0); + status |= PhyWLPass2(pMCTstat, pDCTstat, 1); + if (status) + printk(BIOS_INFO, + "%s: Retrying write levelling due to invalid value(s) detected in last phase\n", + __func__); + } while (status && (timeout < 8)); + global_phy_training_status |= status; } pDCTstat->TargetFreq = final_target_freq; + if (global_phy_training_status) + printk(BIOS_WARNING, + "%s: Uncorrectable invalid value(s) detected in second phase of write levelling; " + "continuing but system may be unstable!\n", + __func__); + uint8_t dct; for (dct = 0; dct < 2; dct++) { sDCTStruct *pDCTData = pDCTstat->C_DCTPtr[dct]; diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mhwlc_d.c b/src/northbridge/amd/amdmct/mct_ddr3/mhwlc_d.c index 48b72caf36..496803e94b 100644 --- a/src/northbridge/amd/amdmct/mct_ddr3/mhwlc_d.c +++ b/src/northbridge/amd/amdmct/mct_ddr3/mhwlc_d.c @@ -50,7 +50,7 @@ static int32_t abs(int32_t val) { */ /*----------------------------------------------------------------------------- - * void AgesaHwWlPhase1(SPDStruct *SPDData,MCTStruct *MCTData, DCTStruct *DCTData, + * uint8_t AgesaHwWlPhase1(SPDStruct *SPDData,MCTStruct *MCTData, DCTStruct *DCTData, * u8 Dimm, u8 Pass) * * Description: @@ -67,7 +67,7 @@ static int32_t abs(int32_t val) { * OUT *----------------------------------------------------------------------------- */ -void AgesaHwWlPhase1(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, +uint8_t AgesaHwWlPhase1(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 dct, u8 dimm, u8 pass) { u8 ByteLane; @@ -170,12 +170,15 @@ void AgesaHwWlPhase1(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTsta } pDCTData->WLCriticalGrossDelayPrevPass = 0x1f; + + return 0; } -void AgesaHwWlPhase2(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, +uint8_t AgesaHwWlPhase2(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 dct, u8 dimm, u8 pass) { u8 ByteLane; + uint8_t status = 0; sDCTStruct *pDCTData = pDCTstat->C_DCTPtr[dct]; if (is_fam15h()) { @@ -202,19 +205,38 @@ void AgesaHwWlPhase2(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTsta /* Compensate for occasional noise/instability causing sporadic training failure */ for (ByteLane = 0; ByteLane < MAX_BYTE_LANES; ByteLane++) { + uint8_t faulty_value_detected = 0; uint16_t total_delay_seed = ((pDCTData->WLSeedGrossDelay[index+ByteLane] & 0x1f) << 5) | (pDCTData->WLSeedFineDelay[index+ByteLane] & 0x1f); uint16_t total_delay_phy = ((pDCTData->WLGrossDelay[index+ByteLane] & 0x1f) << 5) | (pDCTData->WLFineDelay[index+ByteLane] & 0x1f); - if (abs(total_delay_phy - total_delay_seed) > 0x20) { - printk(BIOS_DEBUG, "%s: overriding faulty phy value (seed: %04x phy: %04x step: %04x)\n", __func__, + if (pass == FirstPass) { + /* Allow a somewhat higher step threshold on the first pass + * For the most part, as long as the phy isn't stepping + * several clocks at once the values are probably valid. + */ + if (abs(total_delay_phy - total_delay_seed) > 0x30) + faulty_value_detected = 1; + } else { + /* Stepping memory clocks between adjacent allowed frequencies + * should not yield large phy value differences... + */ + + if (abs(total_delay_phy - total_delay_seed) > 0x20) + faulty_value_detected = 1; + } + if (faulty_value_detected) { + printk(BIOS_INFO, "%s: overriding faulty phy value (seed: %04x phy: %04x step: %04x)\n", __func__, total_delay_seed, total_delay_phy, abs(total_delay_phy - total_delay_seed)); pDCTData->WLGrossDelay[index+ByteLane] = pDCTData->WLSeedGrossDelay[index+ByteLane]; pDCTData->WLFineDelay[index+ByteLane] = pDCTData->WLSeedFineDelay[index+ByteLane]; + status = 1; } } } + + return status; } -void AgesaHwWlPhase3(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, +uint8_t AgesaHwWlPhase3(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 dct, u8 dimm, u8 pass) { u8 ByteLane; @@ -281,6 +303,8 @@ void AgesaHwWlPhase3(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTsta * to the normal operating termination: */ prepareDimms(pMCTstat, pDCTstat, dct, dimm, FALSE); + + return 0; } /*---------------------------------------------------------------------------- -- cgit v1.2.3