diff options
Diffstat (limited to 'src/northbridge/amd/amdmct/mct_ddr3/mctsrc.c')
-rw-r--r-- | src/northbridge/amd/amdmct/mct_ddr3/mctsrc.c | 1342 |
1 files changed, 1181 insertions, 161 deletions
diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctsrc.c b/src/northbridge/amd/amdmct/mct_ddr3/mctsrc.c index d2a82ce995..4397ebaccb 100644 --- a/src/northbridge/amd/amdmct/mct_ddr3/mctsrc.c +++ b/src/northbridge/amd/amdmct/mct_ddr3/mctsrc.c @@ -19,7 +19,10 @@ Description: Receiver En and DQS Timing Training feature for DDR 3 MCT ******************************************************************************/ -static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat, +static int32_t abs(int32_t val); +static void dqsTrainRcvrEn_SW_Fam10(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 Pass); +static void dqsTrainRcvrEn_SW_Fam15(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 Pass); static void mct_InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat); @@ -28,7 +31,7 @@ static void InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat, static void CalcEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 Channel); static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u16 DQSRcvEnDly); -static void fenceDynTraining_D(struct MCTStatStruc *pMCTstat, +static uint32_t fenceDynTraining_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 dct); static void mct_DisableDQSRcvEn_D(struct DCTStatStruc *pDCTstat); @@ -85,11 +88,154 @@ static void SetupRcvrPattern(struct MCTStatStruc *pMCTstat, void mct_TrainRcvrEn_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 Pass) { - if(mct_checkNumberOfDqsRcvEn_1Pass(Pass)) - dqsTrainRcvrEn_SW(pMCTstat, pDCTstat, Pass); + if(mct_checkNumberOfDqsRcvEn_1Pass(Pass)) { + if (is_fam15h()) + dqsTrainRcvrEn_SW_Fam15(pMCTstat, pDCTstat, Pass); + else + dqsTrainRcvrEn_SW_Fam10(pMCTstat, pDCTstat, Pass); + } } -static void read_dqs_write_timing_control_registers(uint16_t* current_total_delay, uint32_t dev, uint8_t dimm, uint32_t index_reg) +static uint16_t fam15_receiver_enable_training_seed(struct DCTStatStruc *pDCTstat, uint8_t dct, uint8_t dimm, uint8_t rank, uint8_t package_type) +{ + uint32_t dword; + uint16_t seed = 0; + + /* FIXME + * Mainboards need to be able to specify the maximum number of DIMMs installable per channel + * For now assume a maximum of 2 DIMMs per channel can be installed + */ + uint8_t MaxDimmsInstallable = 2; + + uint8_t channel = dct; + if (package_type == PT_GR) { + /* Get the internal node number */ + dword = Get_NB32(pDCTstat->dev_nbmisc, 0xe8); + dword = (dword >> 30) & 0x3; + if (dword == 1) { + channel += 2; + } + } + + if (pDCTstat->Status & (1 << SB_Registered)) { + if (package_type == PT_GR) { + /* Socket G34: Fam15h BKDG v3.14 Table 99 */ + if (MaxDimmsInstallable == 1) { + if (channel == 0) + seed = 0x43; + else if (channel == 1) + seed = 0x3f; + else if (channel == 2) + seed = 0x3a; + else if (channel == 3) + seed = 0x35; + } else if (MaxDimmsInstallable == 2) { + if (channel == 0) + seed = 0x54; + else if (channel == 1) + seed = 0x4d; + else if (channel == 2) + seed = 0x45; + else if (channel == 3) + seed = 0x40; + } else if (MaxDimmsInstallable == 3) { + if (channel == 0) + seed = 0x6b; + else if (channel == 1) + seed = 0x5e; + else if (channel == 2) + seed = 0x4b; + else if (channel == 3) + seed = 0x3d; + } + } else if (package_type == PT_C3) { + /* Socket C32: Fam15h BKDG v3.14 Table 100 */ + if ((MaxDimmsInstallable == 1) || (MaxDimmsInstallable == 2)) { + if (channel == 0) + seed = 0x3f; + else if (channel == 1) + seed = 0x3e; + } else if (MaxDimmsInstallable == 3) { + if (channel == 0) + seed = 0x47; + else if (channel == 1) + seed = 0x38; + } + } + } else if (pDCTstat->Status & (1 << SB_LoadReduced)) { + if (package_type == PT_GR) { + /* Socket G34: Fam15h BKDG v3.14 Table 99 */ + if (MaxDimmsInstallable == 1) { + if (channel == 0) + seed = 0x123; + else if (channel == 1) + seed = 0x122; + else if (channel == 2) + seed = 0x112; + else if (channel == 3) + seed = 0x102; + } + } else if (package_type == PT_C3) { + /* Socket C32: Fam15h BKDG v3.14 Table 100 */ + if (channel == 0) + seed = 0x132; + else if (channel == 1) + seed = 0x122; + } + } else { + if (package_type == PT_GR) { + /* Socket G34: Fam15h BKDG v3.14 Table 99 */ + if (MaxDimmsInstallable == 1) { + if (channel == 0) + seed = 0x3e; + else if (channel == 1) + seed = 0x38; + else if (channel == 2) + seed = 0x37; + else if (channel == 3) + seed = 0x31; + } else if (MaxDimmsInstallable == 2) { + if (channel == 0) + seed = 0x51; + else if (channel == 1) + seed = 0x4a; + else if (channel == 2) + seed = 0x46; + else if (channel == 3) + seed = 0x3f; + } else if (MaxDimmsInstallable == 3) { + if (channel == 0) + seed = 0x5e; + else if (channel == 1) + seed = 0x52; + else if (channel == 2) + seed = 0x48; + else if (channel == 3) + seed = 0x3c; + } + } else if (package_type == PT_C3) { + /* Socket C32: Fam15h BKDG v3.14 Table 100 */ + if ((MaxDimmsInstallable == 1) || (MaxDimmsInstallable == 2)) { + if (channel == 0) + seed = 0x39; + else if (channel == 1) + seed = 0x32; + } else if (MaxDimmsInstallable == 3) { + if (channel == 0) + seed = 0x45; + else if (channel == 1) + seed = 0x37; + } + } else if (package_type == PT_M2) { + /* Socket AM3: Fam15h BKDG v3.14 Table 101 */ + seed = 0x3a; + } + } + + return seed; +} + +static void read_dqs_write_timing_control_registers(uint16_t* current_total_delay, uint32_t dev, uint8_t dct, uint8_t dimm, uint32_t index_reg) { uint8_t lane; uint32_t dword; @@ -107,7 +253,7 @@ static void read_dqs_write_timing_control_registers(uint16_t* current_total_dela if (lane == 8) wdt_reg = 0x32; wdt_reg += dimm * 3; - dword = Get_NB32_index_wait(dev, index_reg, wdt_reg); + dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, wdt_reg); if ((lane == 7) || (lane == 5) || (lane == 3) || (lane == 1)) current_total_delay[lane] = (dword & 0x00ff0000) >> 16; if ((lane == 8) || (lane == 6) || (lane == 4) || (lane == 2) || (lane == 0)) @@ -115,12 +261,124 @@ static void read_dqs_write_timing_control_registers(uint16_t* current_total_dela } } -static void write_dqs_receiver_enable_control_registers(uint16_t* current_total_delay, uint32_t dev, uint8_t dimm, uint32_t index_reg) +#ifdef UNUSED_CODE +static void write_dqs_write_timing_control_registers(uint16_t* current_total_delay, uint32_t dev, uint8_t dct, uint8_t dimm, uint32_t index_reg) +{ + uint8_t lane; + uint32_t dword; + + for (lane = 0; lane < MAX_BYTE_LANES; lane++) { + uint32_t ret_reg; + if ((lane == 0) || (lane == 1)) + ret_reg = 0x30; + if ((lane == 2) || (lane == 3)) + ret_reg = 0x31; + if ((lane == 4) || (lane == 5)) + ret_reg = 0x40; + if ((lane == 6) || (lane == 7)) + ret_reg = 0x41; + if (lane == 8) + ret_reg = 0x32; + ret_reg += dimm * 3; + dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, ret_reg); + if ((lane == 7) || (lane == 5) || (lane == 3) || (lane == 1)) { + dword &= ~(0xff << 16); + dword |= (current_total_delay[lane] & 0xff) << 16; + } + if ((lane == 8) || (lane == 6) || (lane == 4) || (lane == 2) || (lane == 0)) { + dword &= ~0xff; + dword |= current_total_delay[lane] & 0xff; + } + Set_NB32_index_wait_DCT(dev, dct, index_reg, ret_reg, dword); + } +} +#endif + +static void write_write_data_timing_control_registers(uint16_t* current_total_delay, uint32_t dev, uint8_t dct, uint8_t dimm, uint32_t index_reg) +{ + uint8_t lane; + uint32_t dword; + + for (lane = 0; lane < MAX_BYTE_LANES; lane++) { + uint32_t wdt_reg; + + /* Calculate Write Data Timing register location */ + if ((lane == 0) || (lane == 1) || (lane == 2) || (lane == 3)) + wdt_reg = 0x1; + if ((lane == 4) || (lane == 5) || (lane == 6) || (lane == 7)) + wdt_reg = 0x2; + if (lane == 8) + wdt_reg = 0x3; + wdt_reg |= (dimm << 8); + + /* Set Write Data Timing register values */ + dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, wdt_reg); + if ((lane == 7) || (lane == 3)) { + dword &= ~(0x7f << 24); + dword |= (current_total_delay[lane] & 0x7f) << 24; + } + if ((lane == 6) || (lane == 2)) { + dword &= ~(0x7f << 16); + dword |= (current_total_delay[lane] & 0x7f) << 16; + } + if ((lane == 5) || (lane == 1)) { + dword &= ~(0x7f << 8); + dword |= (current_total_delay[lane] & 0x7f) << 8; + } + if ((lane == 8) || (lane == 4) || (lane == 0)) { + dword &= ~0x7f; + dword |= current_total_delay[lane] & 0x7f; + } + Set_NB32_index_wait_DCT(dev, dct, index_reg, wdt_reg, dword); + } +} + +static void read_dqs_receiver_enable_control_registers(uint16_t* current_total_delay, uint32_t dev, uint8_t dct, uint8_t dimm, uint32_t index_reg) +{ + uint8_t lane; + uint32_t mask; + uint32_t dword; + + if (is_fam15h()) + mask = 0x3ff; + else + mask = 0x1ff; + + for (lane = 0; lane < MAX_BYTE_LANES; lane++) { + uint32_t ret_reg; + if ((lane == 0) || (lane == 1)) + ret_reg = 0x10; + if ((lane == 2) || (lane == 3)) + ret_reg = 0x11; + if ((lane == 4) || (lane == 5)) + ret_reg = 0x20; + if ((lane == 6) || (lane == 7)) + ret_reg = 0x21; + if (lane == 8) + ret_reg = 0x12; + ret_reg += dimm * 3; + dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, ret_reg); + if ((lane == 7) || (lane == 5) || (lane == 3) || (lane == 1)) { + current_total_delay[lane] = (dword & (mask << 16)) >> 16; + } + if ((lane == 8) || (lane == 6) || (lane == 4) || (lane == 2) || (lane == 0)) { + current_total_delay[lane] = dword & mask; + } + } +} + +static void write_dqs_receiver_enable_control_registers(uint16_t* current_total_delay, uint32_t dev, uint8_t dct, uint8_t dimm, uint32_t index_reg) { uint8_t lane; + uint32_t mask; uint32_t dword; - for (lane = 0; lane < 8; lane++) { + if (is_fam15h()) + mask = 0x3ff; + else + mask = 0x1ff; + + for (lane = 0; lane < MAX_BYTE_LANES; lane++) { uint32_t ret_reg; if ((lane == 0) || (lane == 1)) ret_reg = 0x10; @@ -130,17 +388,125 @@ static void write_dqs_receiver_enable_control_registers(uint16_t* current_total_ ret_reg = 0x20; if ((lane == 6) || (lane == 7)) ret_reg = 0x21; + if (lane == 8) + ret_reg = 0x12; ret_reg += dimm * 3; - dword = Get_NB32_index_wait(dev, index_reg, ret_reg); + dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, ret_reg); if ((lane == 7) || (lane == 5) || (lane == 3) || (lane == 1)) { - dword &= ~(0x1ff << 16); - dword |= (current_total_delay[lane] & 0x1ff) << 16; + dword &= ~(mask << 16); + dword |= (current_total_delay[lane] & mask) << 16; } - if ((lane == 6) || (lane == 4) || (lane == 2) || (lane == 0)) { - dword &= ~0x1ff; - dword |= current_total_delay[lane] & 0x1ff; + if ((lane == 8) || (lane == 6) || (lane == 4) || (lane == 2) || (lane == 0)) { + dword &= ~mask; + dword |= current_total_delay[lane] & mask; } - Set_NB32_index_wait(dev, index_reg, ret_reg, dword); + Set_NB32_index_wait_DCT(dev, dct, index_reg, ret_reg, dword); + } +} + +static void read_dram_phase_recovery_control_registers(uint16_t* current_total_delay, uint32_t dev, uint8_t dct, uint8_t dimm, uint32_t index_reg) +{ + uint8_t lane; + uint32_t dword; + + for (lane = 0; lane < MAX_BYTE_LANES; lane++) { + uint32_t prc_reg; + + /* Calculate DRAM Phase Recovery Control register location */ + if ((lane == 0) || (lane == 1) || (lane == 2) || (lane == 3)) + prc_reg = 0x50; + if ((lane == 4) || (lane == 5) || (lane == 6) || (lane == 7)) + prc_reg = 0x51; + if (lane == 8) + prc_reg = 0x52; + + dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, prc_reg); + if ((lane == 7) || (lane == 3)) { + current_total_delay[lane] = (dword >> 24) & 0x7f; + } + if ((lane == 6) || (lane == 2)) { + current_total_delay[lane] = (dword >> 16) & 0x7f; + } + if ((lane == 5) || (lane == 1)) { + current_total_delay[lane] = (dword >> 8) & 0x7f; + } + if ((lane == 8) || (lane == 4) || (lane == 0)) { + current_total_delay[lane] = dword & 0x7f; + } + } +} + +static void write_dram_phase_recovery_control_registers(uint16_t* current_total_delay, uint32_t dev, uint8_t dct, uint8_t dimm, uint32_t index_reg) +{ + uint8_t lane; + uint32_t dword; + + for (lane = 0; lane < MAX_BYTE_LANES; lane++) { + uint32_t prc_reg; + + /* Calculate DRAM Phase Recovery Control register location */ + if ((lane == 0) || (lane == 1) || (lane == 2) || (lane == 3)) + prc_reg = 0x50; + if ((lane == 4) || (lane == 5) || (lane == 6) || (lane == 7)) + prc_reg = 0x51; + if (lane == 8) + prc_reg = 0x52; + + /* Set DRAM Phase Recovery Control register values */ + dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, prc_reg); + if ((lane == 7) || (lane == 3)) { + dword &= ~(0x7f << 24); + dword |= (current_total_delay[lane] & 0x7f) << 24; + } + if ((lane == 6) || (lane == 2)) { + dword &= ~(0x7f << 16); + dword |= (current_total_delay[lane] & 0x7f) << 16; + } + if ((lane == 5) || (lane == 1)) { + dword &= ~(0x7f << 8); + dword |= (current_total_delay[lane] & 0x7f) << 8; + } + if ((lane == 8) || (lane == 4) || (lane == 0)) { + dword &= ~0x7f; + dword |= current_total_delay[lane] & 0x7f; + } + Set_NB32_index_wait_DCT(dev, dct, index_reg, prc_reg, dword); + } +} + +static void read_read_dqs_timing_control_registers(uint16_t* current_total_delay, uint32_t dev, uint8_t dct, uint8_t dimm, uint32_t index_reg) +{ + uint8_t lane; + uint32_t dword; + + for (lane = 0; lane < MAX_BYTE_LANES; lane++) { + uint32_t rdt_reg; + + /* Calculate DRAM Read DQS Timing register location */ + if ((lane == 0) || (lane == 1) || (lane == 2) || (lane == 3)) + rdt_reg = 0x5; + if ((lane == 4) || (lane == 5) || (lane == 6) || (lane == 7)) + rdt_reg = 0x6; + if (lane == 8) + rdt_reg = 0x7; + rdt_reg |= (dimm << 8); + + dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, rdt_reg); + if ((lane == 7) || (lane == 3)) { + current_total_delay[lane] = (dword >> 24) & 0x3f; + } + if ((lane == 6) || (lane == 2)) { + current_total_delay[lane] = (dword >> 16) & 0x3f; + } + if ((lane == 5) || (lane == 1)) { + current_total_delay[lane] = (dword >> 8) & 0x3f; + } + if ((lane == 8) || (lane == 4) || (lane == 0)) { + current_total_delay[lane] = dword & 0x3f; + } + + if (is_fam15h()) + current_total_delay[lane] >>= 1; } } @@ -156,10 +522,11 @@ static uint32_t convert_testaddr_and_channel_to_address(struct DCTStatStruc *pDC return testaddr; } -/* DQS Receiver Enable Training - * Algorithm detailed in the Fam10h BKDG Rev. 3.62 section 2.8.9.9.2 +/* DQS Receiver Enable Training (Family 10h) + * Algorithm detailed in: + * The Fam10h BKDG Rev. 3.62 section 2.8.9.9.2 */ -static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat, +static void dqsTrainRcvrEn_SW_Fam10(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 Pass) { u8 Channel; @@ -167,7 +534,6 @@ static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat, u8 Addl_Index = 0; u8 Receiver; u8 _DisableDramECC = 0, _Wrap32Dis = 0, _SSE2 = 0; - u8 Final_Value; u16 CTLRMaxDelay; u16 MaxDelay_CH[2]; u32 TestAddr0, TestAddr1, TestAddr0B, TestAddr1B; @@ -184,6 +550,7 @@ static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat, u32 lo, hi; uint32_t dword; + uint8_t dimm; uint8_t rank; uint8_t lane; uint16_t current_total_delay[MAX_BYTE_LANES]; @@ -210,14 +577,13 @@ static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat, } for (ch = ch_start; ch < ch_end; ch++) { - reg = 0x78 + (0x100 * ch); - val = Get_NB32(dev, reg); + reg = 0x78; + val = Get_NB32_DCT(dev, ch, reg); val &= ~(0x3ff << 22); - val |= (0x0c8 << 22); /* Max Rd Lat */ - Set_NB32(dev, reg, val); + val |= (0x0c8 << 22); /* MaxRdLatency = 0xc8 */ + Set_NB32_DCT(dev, ch, reg, val); } - Final_Value = 1; if (Pass == FirstPass) { mct_InitDQSPos4RcvrEn_D(pMCTstat, pDCTstat); } else { @@ -256,7 +622,7 @@ static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat, CTLRMaxDelay = 0; MaxDelay_CH[Channel] = 0; - index_reg = 0x98 + 0x100 * Channel; + index_reg = 0x98; Receiver = mct_InitReceiver_D(pDCTstat, Channel); /* There are four receiver pairs, loosely associated with chipselects. @@ -264,6 +630,7 @@ static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat, */ for (; Receiver < 8; Receiver += 2) { Addl_Index = (Receiver >> 1) * 3 + 0x10; + dimm = (Receiver >> 1); print_debug_dqs("\t\tTrainRcvEnd52: index ", Addl_Index, 2); @@ -280,45 +647,14 @@ static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat, /* 2.8.9.9.2 (1, 6) * Retrieve gross and fine timing fields from write DQS registers */ - read_dqs_write_timing_control_registers(current_total_delay, dev, (Receiver >> 1), index_reg); + read_dqs_write_timing_control_registers(current_total_delay, dev, Channel, dimm, index_reg); /* 2.8.9.9.2 (1) * Program the Write Data Timing and Write ECC Timing register to * the values stored in the DQS Write Timing Control register * for each lane */ - for (lane = 0; lane < MAX_BYTE_LANES; lane++) { - uint32_t wdt_reg; - - /* Calculate Write Data Timing register location */ - if ((lane == 0) || (lane == 1) || (lane == 2) || (lane == 3)) - wdt_reg = 0x1; - if ((lane == 4) || (lane == 5) || (lane == 6) || (lane == 7)) - wdt_reg = 0x2; - if (lane == 8) - wdt_reg = 0x3; - wdt_reg |= ((Receiver / 2) << 8); - - /* Set Write Data Timing register values */ - dword = Get_NB32_index_wait(dev, index_reg, wdt_reg); - if ((lane == 7) || (lane == 3)) { - dword &= ~(0x7f << 24); - dword |= (current_total_delay[lane] & 0x7f) << 24; - } - if ((lane == 6) || (lane == 2)) { - dword &= ~(0x7f << 16); - dword |= (current_total_delay[lane] & 0x7f) << 16; - } - if ((lane == 5) || (lane == 1)) { - dword &= ~(0x7f << 8); - dword |= (current_total_delay[lane] & 0x7f) << 8; - } - if ((lane == 8) || (lane == 4) || (lane == 0)) { - dword &= ~0x7f; - dword |= current_total_delay[lane] & 0x7f; - } - Set_NB32_index_wait(dev, index_reg, wdt_reg, dword); - } + write_write_data_timing_control_registers(current_total_delay, dev, Channel, dimm, index_reg); /* 2.8.9.9.2 (2) * Program the Read DQS Timing Control and the Read DQS ECC Timing Control registers @@ -332,12 +668,12 @@ static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat, rdt_reg = 0x6; if (lane == 8) rdt_reg = 0x7; - rdt_reg |= ((Receiver / 2) << 8); + rdt_reg |= (dimm << 8); if (lane == 8) dword = 0x0000003f; else dword = 0x3f3f3f3f; - Set_NB32_index_wait(dev, index_reg, rdt_reg, dword); + Set_NB32_index_wait_DCT(dev, Channel, index_reg, rdt_reg, dword); } /* 2.8.9.9.2 (3) @@ -367,7 +703,7 @@ static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat, print_debug_dqs("\t\tTrainRcvEn53: TestAddr1B ", TestAddr1B, 2); /* 2.8.9.9.2 (4, 5) - * Write 1 cache line of the appropriate test pattern to each test addresse + * Write 1 cache line of the appropriate test pattern to each test address */ mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0, 0); /* rank 0 of DIMM, testpattern 0 */ mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0B, 1); /* rank 0 of DIMM, testpattern 1 */ @@ -386,7 +722,7 @@ static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat, /* 2.8.9.9.2 (6) * Write gross and fine timing fields to read DQS registers */ - write_dqs_receiver_enable_control_registers(current_total_delay, dev, (Receiver >> 1), index_reg); + write_dqs_receiver_enable_control_registers(current_total_delay, dev, Channel, dimm, index_reg); /* 2.8.9.9.2 (7) * Loop over all delay values up to 1 MEMCLK (0x40 delay steps) from the initial delay values @@ -413,8 +749,8 @@ static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat, break; /* 2.8.9.9.2 (7 A) - * Loop over all ranks - */ + * Loop over all ranks + */ for (rank = 0; rank < (_2Ranks + 1); rank++) { /* 2.8.9.9.2 (7 A a-d) * Read the first test address of the current rank @@ -430,17 +766,17 @@ static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat, */ proc_IOCLFLUSH_D((rank == 0)?TestAddr0B:TestAddr1B); result_qword2 = read64_fs(convert_testaddr_and_channel_to_address(pDCTstat, (rank == 0)?TestAddr0B:TestAddr1B, Channel)); - write_dqs_receiver_enable_control_registers(current_total_delay, dev, (Receiver >> 1), index_reg); + write_dqs_receiver_enable_control_registers(current_total_delay, dev, Channel, dimm, index_reg); proc_IOCLFLUSH_D((rank == 0)?TestAddr0:TestAddr1); result_qword1 = read64_fs(convert_testaddr_and_channel_to_address(pDCTstat, (rank == 0)?TestAddr0:TestAddr1, Channel)); - write_dqs_receiver_enable_control_registers(current_total_delay, dev, (Receiver >> 1), index_reg); + write_dqs_receiver_enable_control_registers(current_total_delay, dev, Channel, dimm, index_reg); } else { proc_IOCLFLUSH_D((rank == 0)?TestAddr0:TestAddr1); result_qword1 = read64_fs(convert_testaddr_and_channel_to_address(pDCTstat, (rank == 0)?TestAddr0:TestAddr1, Channel)); - write_dqs_receiver_enable_control_registers(current_total_delay, dev, (Receiver >> 1), index_reg); + write_dqs_receiver_enable_control_registers(current_total_delay, dev, Channel, dimm, index_reg); proc_IOCLFLUSH_D((rank == 0)?TestAddr0B:TestAddr1B); result_qword2 = read64_fs(convert_testaddr_and_channel_to_address(pDCTstat, (rank == 0)?TestAddr0B:TestAddr1B, Channel)); - write_dqs_receiver_enable_control_registers(current_total_delay, dev, (Receiver >> 1), index_reg); + write_dqs_receiver_enable_control_registers(current_total_delay, dev, Channel, dimm, index_reg); } /* 2.8.9.9.2 (7 A e) * Compare both read patterns and flag passing ranks/lanes @@ -529,7 +865,7 @@ static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat, } /* Update delays in hardware */ - write_dqs_receiver_enable_control_registers(current_total_delay, dev, (Receiver >> 1), index_reg); + write_dqs_receiver_enable_control_registers(current_total_delay, dev, Channel, dimm, index_reg); /* Save previous results for comparison in the next iteration */ for (lane = 0; lane < 8; lane++) @@ -583,7 +919,483 @@ static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat, mct_SetMaxLatency_D(pDCTstat, Channel, CTLRMaxDelay); /* program Ch A/B MaxAsyncLat to correspond with max delay */ } - ResetDCTWrPtr_D(dev, index_reg, Addl_Index); + for (Channel = 0; Channel < 2; Channel++) { + ResetDCTWrPtr_D(dev, Channel, index_reg, Addl_Index); + } + + if(_DisableDramECC) { + mct_EnableDimmEccEn_D(pMCTstat, pDCTstat, _DisableDramECC); + } + + if (Pass == FirstPass) { + /*Disable DQSRcvrEn training mode */ + mct_DisableDQSRcvEn_D(pDCTstat); + } + + if(!_Wrap32Dis) { + msr = HWCR; + _RDMSR(msr, &lo, &hi); + lo &= ~(1<<17); /* restore HWCR.wrap32dis */ + _WRMSR(msr, lo, hi); + } + if(!_SSE2){ + cr4 = read_cr4(); + cr4 &= ~(1<<9); /* restore cr4.OSFXSR */ + write_cr4(cr4); + } + +#if DQS_TRAIN_DEBUG > 0 + { + u8 ChannelDTD; + printk(BIOS_DEBUG, "TrainRcvrEn: CH_MaxRdLat:\n"); + for(ChannelDTD = 0; ChannelDTD<2; ChannelDTD++) { + printk(BIOS_DEBUG, "Channel:%x: %x\n", + ChannelDTD, pDCTstat->CH_MaxRdLat[ChannelDTD]); + } + } +#endif + +#if DQS_TRAIN_DEBUG > 0 + { + u16 valDTD; + u8 ChannelDTD, ReceiverDTD; + u8 i; + u16 *p; + + printk(BIOS_DEBUG, "TrainRcvrEn: CH_D_B_RCVRDLY:\n"); + for(ChannelDTD = 0; ChannelDTD < 2; ChannelDTD++) { + printk(BIOS_DEBUG, "Channel:%x\n", ChannelDTD); + for(ReceiverDTD = 0; ReceiverDTD<8; ReceiverDTD+=2) { + printk(BIOS_DEBUG, "\t\tReceiver:%x:", ReceiverDTD); + p = pDCTstat->CH_D_B_RCVRDLY[ChannelDTD][ReceiverDTD>>1]; + for (i=0;i<8; i++) { + valDTD = p[i]; + printk(BIOS_DEBUG, " %03x", valDTD); + } + printk(BIOS_DEBUG, "\n"); + } + } + } +#endif + + printk(BIOS_DEBUG, "TrainRcvrEn: Status %x\n", pDCTstat->Status); + printk(BIOS_DEBUG, "TrainRcvrEn: ErrStatus %x\n", pDCTstat->ErrStatus); + printk(BIOS_DEBUG, "TrainRcvrEn: ErrCode %x\n", pDCTstat->ErrCode); + printk(BIOS_DEBUG, "TrainRcvrEn: Done\n\n"); +} + +/* DQS Receiver Enable Training Pattern Generation (Family 15h) + * Algorithm detailed in: + * The Fam15h BKDG Rev. 3.14 section 2.10.5.8.2 (4) + */ +static void generate_dram_receiver_enable_training_pattern_fam15(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, uint8_t dct, uint8_t Receiver) +{ + uint32_t dword; + uint32_t dev = pDCTstat->dev_dct; + + /* 2.10.5.7.1.1 + * It appears that the DCT only supports 8-beat burst length mode, + * so do nothing here... + */ + + /* Wait for CmdSendInProg == 0 */ + do { + dword = Get_NB32_DCT(dev, dct, 0x250); + } while (dword & (0x1 << 12)); + + /* Set CmdTestEnable = 1 */ + dword = Get_NB32_DCT(dev, dct, 0x250); + dword |= (0x1 << 2); + Set_NB32_DCT(dev, dct, 0x250, dword); + + /* 2.10.5.8.6.1.1 Send Activate Command */ + dword = Get_NB32_DCT(dev, dct, 0x28c); + dword &= ~(0xff << 22); /* CmdChipSelect = Receiver */ + dword |= ((0x1 << Receiver) << 22); + dword &= ~(0x7 << 19); /* CmdBank = 0 */ + dword &= ~(0x3ffff); /* CmdAddress = 0 */ + dword |= (0x1 << 31); /* SendActCmd = 1 */ + Set_NB32_DCT(dev, dct, 0x28c, dword); + + /* Wait for SendActCmd == 0 */ + do { + dword = Get_NB32_DCT(dev, dct, 0x28c); + } while (dword & (0x1 << 31)); + + /* Wait 75 MEMCLKs. */ + precise_memclk_delay_fam15(pMCTstat, pDCTstat, dct, 75); + + /* 2.10.5.8.6.1.2 */ + Set_NB32_DCT(dev, dct, 0x274, 0x0); /* DQMask = 0 */ + Set_NB32_DCT(dev, dct, 0x278, 0x0); + + dword = Get_NB32_DCT(dev, dct, 0x27c); + dword &= ~(0xff); /* EccMask = 0 */ + if (pDCTstat->DimmECCPresent == 0) + dword |= 0xff; /* EccMask = 0xff */ + Set_NB32_DCT(dev, dct, 0x27c, dword); + + /* 2.10.5.8.6.1.2 */ + dword = Get_NB32_DCT(dev, dct, 0x270); + dword &= ~(0x7ffff); /* DataPrbsSeed = 55555 */ +// dword |= (0x55555); + dword |= (0x44443); /* Use AGESA seed */ + Set_NB32_DCT(dev, dct, 0x270, dword); + + /* 2.10.5.8.2 (4) */ + dword = Get_NB32_DCT(dev, dct, 0x260); + dword &= ~(0x1fffff); /* CmdCount = 192 */ + dword |= 192; + Set_NB32_DCT(dev, dct, 0x260, dword); + +#if 0 + /* TODO: This applies to Fam15h model 10h and above only */ + /* Program Bubble Count and CmdStreamLen */ + dword = Get_NB32_DCT(dev, dct, 0x25c); + dword &= ~(0x3ff << 12); /* BubbleCnt = 0 */ + dword &= ~(0x3ff << 22); /* BubbleCnt2 = 0 */ + dword &= ~(0xff); /* CmdStreamLen = 1 */ + dword |= 0x1; + Set_NB32_DCT(dev, dct, 0x25c, dword); +#endif + + /* Configure Target A */ + dword = Get_NB32_DCT(dev, dct, 0x254); + dword &= ~(0x7 << 24); /* TgtChipSelect = Receiver */ + dword |= (Receiver & 0x7) << 24; + dword &= ~(0x7 << 21); /* TgtBank = 0 */ + dword &= ~(0x3ff); /* TgtAddress = 0 */ + Set_NB32_DCT(dev, dct, 0x254, dword); + + dword = Get_NB32_DCT(dev, dct, 0x250); + dword |= (0x1 << 3); /* ResetAllErr = 1 */ + dword &= ~(0x1 << 4); /* StopOnErr = 0 */ + dword &= ~(0x3 << 8); /* CmdTgt = 0 (Target A) */ + dword &= ~(0x7 << 5); /* CmdType = 0 (Read) */ + dword |= (0x1 << 11); /* SendCmd = 1 */ + Set_NB32_DCT(dev, dct, 0x250, dword); + + /* 2.10.5.8.6.1.2 Wait for TestStatus == 1 and CmdSendInProg == 0 */ + do { + dword = Get_NB32_DCT(dev, dct, 0x250); + } while ((dword & (0x1 << 12)) || (!(dword & (0x1 << 10)))); + + dword = Get_NB32_DCT(dev, dct, 0x250); + dword &= ~(0x1 << 11); /* SendCmd = 0 */ + Set_NB32_DCT(dev, dct, 0x250, dword); + + /* 2.10.5.8.6.1.1 Send Precharge Command */ + /* Wait 25 MEMCLKs. */ + precise_memclk_delay_fam15(pMCTstat, pDCTstat, dct, 25); + + dword = Get_NB32_DCT(dev, dct, 0x28c); + dword &= ~(0xff << 22); /* CmdChipSelect = Receiver */ + dword |= ((0x1 << Receiver) << 22); + dword &= ~(0x7 << 19); /* CmdBank = 0 */ + dword &= ~(0x3ffff); /* CmdAddress = 0x400 */ + dword |= 0x400; + dword |= (0x1 << 30); /* SendPchgCmd = 1 */ + Set_NB32_DCT(dev, dct, 0x28c, dword); + + /* Wait for SendPchgCmd == 0 */ + do { + dword = Get_NB32_DCT(dev, dct, 0x28c); + } while (dword & (0x1 << 30)); + + /* Wait 25 MEMCLKs. */ + precise_memclk_delay_fam15(pMCTstat, pDCTstat, dct, 25); + + /* Set CmdTestEnable = 0 */ + dword = Get_NB32_DCT(dev, dct, 0x250); + dword &= ~(0x1 << 2); + Set_NB32_DCT(dev, dct, 0x250, dword); +} + +/* DQS Receiver Enable Training (Family 15h) + * Algorithm detailed in: + * The Fam15h BKDG Rev. 3.14 section 2.10.5.8.2 + * This algorithm runs once at the lowest supported MEMCLK, + * then once again at the highest supported MEMCLK. + */ +static void dqsTrainRcvrEn_SW_Fam15(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, u8 Pass) +{ + u8 Channel; + u8 _2Ranks; + u8 Addl_Index = 0; + u8 Receiver; + u8 _DisableDramECC = 0, _Wrap32Dis = 0, _SSE2 = 0; + u32 Errors; + + u32 val; + u32 dev; + u32 index_reg; + u32 ch_start, ch_end, ch; + u32 msr; + u32 cr4; + u32 lo, hi; + + uint32_t dword; + uint8_t dimm; + uint8_t rank; + uint8_t lane; + uint8_t mem_clk; + uint16_t initial_seed; + uint16_t current_total_delay[MAX_BYTE_LANES]; + uint16_t dqs_ret_pass1_total_delay[MAX_BYTE_LANES]; + uint16_t rank0_current_total_delay[MAX_BYTE_LANES]; + uint16_t phase_recovery_delays[MAX_BYTE_LANES]; + uint16_t seed[MAX_BYTE_LANES]; + uint16_t seed_gross[MAX_BYTE_LANES]; + uint16_t seed_fine[MAX_BYTE_LANES]; + uint16_t seed_pre_gross[MAX_BYTE_LANES]; + + uint8_t package_type = mctGet_NVbits(NV_PACK_TYPE); + uint16_t fam15h_freq_tab[] = {0, 0, 0, 0, 333, 0, 400, 0, 0, 0, 533, 0, 0, 0, 667, 0, 0, 0, 800, 0, 0, 0, 933}; + + print_debug_dqs("\nTrainRcvEn: Node", pDCTstat->Node_ID, 0); + print_debug_dqs("TrainRcvEn: Pass", Pass, 0); + + dev = pDCTstat->dev_dct; + index_reg = 0x98; + ch_start = 0; + ch_end = 2; + + for (ch = ch_start; ch < ch_end; ch++) { + uint8_t max_rd_latency = 0x55; + uint8_t p_state; + + /* 2.10.5.6 */ + fam15EnableTrainingMode(pMCTstat, pDCTstat, ch, 1); + + /* 2.10.5.2 */ + for (p_state = 0; p_state < 3; p_state++) { + val = Get_NB32_DCT_NBPstate(dev, ch, p_state, 0x210); + val &= ~(0x3ff << 22); /* MaxRdLatency = max_rd_latency */ + val |= (max_rd_latency & 0x3ff) << 22; + Set_NB32_DCT_NBPstate(dev, ch, p_state, 0x210, val); + } + } + + if (Pass != FirstPass) { + pDCTstat->DimmTrainFail = 0; + pDCTstat->CSTrainFail = ~pDCTstat->CSPresent; + } + + cr4 = read_cr4(); + if(cr4 & ( 1 << 9)) { /* save the old value */ + _SSE2 = 1; + } + cr4 |= (1 << 9); /* OSFXSR enable SSE2 */ + write_cr4(cr4); + + msr = HWCR; + _RDMSR(msr, &lo, &hi); + /* FIXME: Why use SSEDIS */ + if(lo & (1 << 17)) { /* save the old value */ + _Wrap32Dis = 1; + } + lo |= (1 << 17); /* HWCR.wrap32dis */ + lo &= ~(1 << 15); /* SSEDIS */ + _WRMSR(msr, lo, hi); /* Setting wrap32dis allows 64-bit memory references in real mode */ + + _DisableDramECC = mct_DisableDimmEccEn_D(pMCTstat, pDCTstat); + + Errors = 0; + dev = pDCTstat->dev_dct; + + for (Channel = 0; Channel < 2; Channel++) { + print_debug_dqs("\tTrainRcvEn51: Node ", pDCTstat->Node_ID, 1); + print_debug_dqs("\tTrainRcvEn51: Channel ", Channel, 1); + pDCTstat->Channel = Channel; + + mem_clk = Get_NB32_DCT(dev, Channel, 0x94) & 0x1f; + + Receiver = mct_InitReceiver_D(pDCTstat, Channel); + /* There are four receiver pairs, loosely associated with chipselects. + * This is essentially looping over each DIMM. + */ + for (; Receiver < 8; Receiver += 2) { + Addl_Index = (Receiver >> 1) * 3 + 0x10; + dimm = (Receiver >> 1); + + print_debug_dqs("\t\tTrainRcvEnd52: index ", Addl_Index, 2); + + if (!mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, Receiver)) { + continue; + } + + /* Retrieve the total delay values from pass 1 of DQS receiver enable training */ + if (Pass != FirstPass) { + read_dqs_receiver_enable_control_registers(dqs_ret_pass1_total_delay, dev, Channel, dimm, index_reg); + } + + /* 2.10.5.8.2 + * Loop over all ranks + */ + if (mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, Receiver+1)) + _2Ranks = 1; + else + _2Ranks = 0; + for (rank = 0; rank < (_2Ranks + 1); rank++) { + /* 2.10.5.8.2 (1) + * Specify the target DIMM to be trained + * Set TrNibbleSel = 0 + * + * TODO: Add support for x4 DIMMs + */ + dword = Get_NB32_index_wait_DCT(dev, Channel, index_reg, 0x00000008); + dword &= ~(0x3 << 4); /* TrDimmSel */ + dword |= ((dimm & 0x3) << 4); + dword &= ~(0x1 << 2); /* TrNibbleSel */ + Set_NB32_index_wait_DCT(dev, Channel, index_reg, 0x00000008, dword); + + /* 2.10.5.8.2 (2) + * Retrieve gross and fine timing fields from write DQS registers + */ + read_dqs_write_timing_control_registers(current_total_delay, dev, Channel, dimm, index_reg); + + /* 2.10.5.8.2.1 + * Generate the DQS Receiver Enable Training Seed Values + */ + if (Pass == FirstPass) { + initial_seed = fam15_receiver_enable_training_seed(pDCTstat, Channel, dimm, rank, package_type); + + /* Adjust seed for the minimum platform supported frequency */ + initial_seed = (uint16_t) (((((uint64_t) initial_seed) * + fam15h_freq_tab[mem_clk] * 100) / (mctGet_NVbits(NV_MIN_MEMCLK) * 100))); + + for (lane = 0; lane < MAX_BYTE_LANES; lane++) { + uint16_t wl_pass1_delay; + wl_pass1_delay = current_total_delay[lane]; + + seed[lane] = initial_seed + wl_pass1_delay; + } + } else { + uint8_t addr_prelaunch = 0; /* TODO: Fetch the correct value from RC2[0] */ + uint16_t register_delay; + int16_t seed_prescaling; + + memcpy(current_total_delay, dqs_ret_pass1_total_delay, sizeof(current_total_delay)); + if ((pDCTstat->Status & (1 << SB_Registered))) { + if (addr_prelaunch) + register_delay = 0x30; + else + register_delay = 0x20; + } else if ((pDCTstat->Status & (1 << SB_LoadReduced))) { + /* TODO + * Load reduced DIMM support unimplemented + */ + register_delay = 0x0; + } else { + register_delay = 0x0; + } + + for (lane = 0; lane < MAX_BYTE_LANES; lane++) { + seed_prescaling = current_total_delay[lane] - register_delay - 0x20; + seed[lane] = (uint16_t) (register_delay + ((((uint64_t) seed_prescaling) * fam15h_freq_tab[mem_clk] * 100) / (mctGet_NVbits(NV_MIN_MEMCLK) * 100))); + } + } + + for (lane = 0; lane < MAX_BYTE_LANES; lane++) { + seed_gross[lane] = (seed[lane] >> 5) & 0x1f; + seed_fine[lane] = seed[lane] & 0x1f; + + /*if (seed_gross[lane] == 0) + seed_pre_gross[lane] = 0; + else */if (seed_gross[lane] & 0x1) + seed_pre_gross[lane] = 1; + else + seed_pre_gross[lane] = 2; + + /* Calculate phase recovery delays */ + phase_recovery_delays[lane] = ((seed_pre_gross[lane] & 0x1f) << 5) | (seed_fine[lane] & 0x1f); + + /* Set the gross delay. + * NOTE: While the BKDG states to only program DqsRcvEnGrossDelay, this appears + * to have been a misprint as DqsRcvEnFineDelay should be set to zero as well. + */ + current_total_delay[lane] = ((seed_gross[lane] & 0x1f) << 5); + } + + /* 2.10.5.8.2 (2) / 2.10.5.8.2.1 (5 6) + * Program PhRecFineDly and PhRecGrossDly + */ + write_dram_phase_recovery_control_registers(phase_recovery_delays, dev, Channel, dimm, index_reg); + + /* 2.10.5.8.2 (2) / 2.10.5.8.2.1 (7) + * Program the DQS Receiver Enable delay values for each lane + */ + write_dqs_receiver_enable_control_registers(current_total_delay, dev, Channel, dimm, index_reg); + + /* 2.10.5.8.2 (3) + * Program DqsRcvTrEn = 1 + */ + dword = Get_NB32_index_wait_DCT(dev, Channel, index_reg, 0x00000008); + dword |= (0x1 << 13); + Set_NB32_index_wait_DCT(dev, Channel, index_reg, 0x00000008, dword); + + /* 2.10.5.8.2 (4) + * Issue 192 read requests to the target rank + */ + generate_dram_receiver_enable_training_pattern_fam15(pMCTstat, pDCTstat, Channel, Receiver + (rank & 0x1)); + + /* 2.10.5.8.2 (5) + * Program DqsRcvTrEn = 0 + */ + dword = Get_NB32_index_wait_DCT(dev, Channel, index_reg, 0x00000008); + dword &= ~(0x1 << 13); + Set_NB32_index_wait_DCT(dev, Channel, index_reg, 0x00000008, dword); + + /* 2.10.5.8.2 (6) + * Read PhRecGrossDly, PhRecFineDly + */ + read_dram_phase_recovery_control_registers(phase_recovery_delays, dev, Channel, dimm, index_reg); + + /* 2.10.5.8.2 (7) + * Calculate and program the DQS Receiver Enable delay values + */ + for (lane = 0; lane < MAX_BYTE_LANES; lane++) { + current_total_delay[lane] = (phase_recovery_delays[lane] & 0x1f); + current_total_delay[lane] |= ((seed_gross[lane] + ((phase_recovery_delays[lane] >> 5) & 0x1f) - seed_pre_gross[lane] + 1) << 5); + if (lane == 8) + pDCTstat->CH_D_BC_RCVRDLY[Channel][dimm] = current_total_delay[lane]; + else + pDCTstat->CH_D_B_RCVRDLY[Channel][dimm][lane] = current_total_delay[lane]; + } + write_dqs_receiver_enable_control_registers(current_total_delay, dev, Channel, dimm, index_reg); + + if (rank == 0) { + /* Back up the Rank 0 delays for later use */ + memcpy(rank0_current_total_delay, current_total_delay, sizeof(current_total_delay)); + } + + if (rank == 1) { + /* 2.10.5.8.2 (8) + * Compute the average delay across both ranks and program the result into + * the DQS Receiver Enable delay registers + */ + for (lane = 0; lane < MAX_BYTE_LANES; lane++) { + current_total_delay[lane] = (rank0_current_total_delay[lane] + current_total_delay[lane]) / 2; + if (lane == 8) + pDCTstat->CH_D_BC_RCVRDLY[Channel][dimm] = current_total_delay[lane]; + else + pDCTstat->CH_D_B_RCVRDLY[Channel][dimm][lane] = current_total_delay[lane]; + } + write_dqs_receiver_enable_control_registers(current_total_delay, dev, Channel, dimm, index_reg); + } + } + +#if DQS_TRAIN_DEBUG > 0 + for (lane = 0; lane < 8; lane++) + print_debug_dqs_pair("\t\tTrainRcvEn55: Lane ", lane, " current_total_delay ", current_total_delay[lane], 2); +#endif + } + } + + /* Calculate and program MaxRdLatency */ + Calc_SetMaxRdLatency_D_Fam15(pMCTstat, pDCTstat, Channel); if(_DisableDramECC) { mct_EnableDimmEccEn_D(pMCTstat, pDCTstat, _DisableDramECC); @@ -670,10 +1482,10 @@ static void mct_DisableDQSRcvEn_D(struct DCTStatStruc *pDCTstat) } for (ch=0; ch<ch_end; ch++) { - reg = 0x78 + 0x100 * ch; - val = Get_NB32(dev, reg); + reg = 0x78; + val = Get_NB32_DCT(dev, ch, reg); val &= ~(1 << DqsRcvEnTrain); - Set_NB32(dev, reg, val); + Set_NB32_DCT(dev, ch, reg, val); } } @@ -714,7 +1526,7 @@ void mct_SetRcvrEnDly_D(struct DCTStatStruc *pDCTstat, u16 RcvrEnDly, /* get the register index from table */ index = Table_DQSRcvEn_Offset[i >> 1]; index += Addl_Index; /* DIMMx DqsRcvEn byte0 */ - val = Get_NB32_index_wait(dev, index_reg, index); + val = Get_NB32_index_wait_DCT(dev, Channel, index_reg, index); if(i & 1) { /* odd byte lane */ val &= ~(0x1ff << 16); @@ -724,7 +1536,7 @@ void mct_SetRcvrEnDly_D(struct DCTStatStruc *pDCTstat, u16 RcvrEnDly, val &= ~0x1ff; val |= (RcvrEnDly & 0x1ff); } - Set_NB32_index_wait(dev, index_reg, index, val); + Set_NB32_index_wait_DCT(dev, Channel, index_reg, index, val); } } @@ -738,7 +1550,6 @@ static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u16 D u32 reg; u32 SubTotal; u32 index_reg; - u32 reg_off; u32 val; uint8_t cpu_val_n; @@ -773,17 +1584,16 @@ static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u16 D Channel = 0; dev = pDCTstat->dev_dct; - reg_off = 0x100 * Channel; - index_reg = 0x98 + reg_off; + index_reg = 0x98; /* Multiply the CAS Latency by two to get a number of 1/2 MEMCLKs units.*/ - val = Get_NB32(dev, 0x88 + reg_off); + val = Get_NB32_DCT(dev, Channel, 0x88); SubTotal = ((val & 0x0f) + 4) << 1; /* SubTotal is 1/2 Memclk unit */ /* If registered DIMMs are being used then * add 1 MEMCLK to the sub-total. */ - val = Get_NB32(dev, 0x90 + reg_off); + val = Get_NB32_DCT(dev, Channel, 0x90); if(!(val & (1 << UnBuffDimm))) SubTotal += 2; @@ -791,7 +1601,7 @@ static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u16 D * add 1, else add 2 to the sub-total. * if (AddrCmdSetup || CsOdtSetup || CkeSetup) then K := K + 2; */ - val = Get_NB32_index_wait(dev, index_reg, 0x04); + val = Get_NB32_index_wait_DCT(dev, Channel, index_reg, 0x04); if(!(val & 0x00202020)) SubTotal += 1; else @@ -799,7 +1609,7 @@ static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u16 D /* If the F2x[1, 0]78[RdPtrInit] field is 4, 5, 6 or 7 MEMCLKs, * then add 4, 3, 2, or 1 MEMCLKs, respectively to the sub-total. */ - val = Get_NB32(dev, 0x78 + reg_off); + val = Get_NB32_DCT(dev, Channel, 0x78); SubTotal += 8 - (val & 0x0f); /* Convert bits 7-5 (also referred to as the coarse delay) of @@ -820,7 +1630,7 @@ static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u16 D * clocks (NCLKs) */ SubTotal *= 200 * ((Get_NB32(pDCTstat->dev_nbmisc, 0xd4) & 0x1f) + 4); - SubTotal /= freq_tab[((Get_NB32(pDCTstat->dev_dct, 0x94 + reg_off) & 0x7) - 3)]; + SubTotal /= freq_tab[((Get_NB32_DCT(pDCTstat->dev_dct, Channel, 0x94) & 0x7) - 3)]; SubTotal = (SubTotal + (2 - 1)) / 2; /* Round up */ /* Add "N" NCLKs to the sub-total. "N" represents part of the @@ -837,13 +1647,13 @@ static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u16 D /* Program the F2x[1, 0]78[MaxRdLatency] register with * the total delay value (in NCLKs). */ - reg = 0x78 + reg_off; - val = Get_NB32(dev, reg); + reg = 0x78; + val = Get_NB32_DCT(dev, Channel, reg); val &= ~(0x3ff << 22); val |= (SubTotal & 0x3ff) << 22; /* program MaxRdLatency to correspond with current delay */ - Set_NB32(dev, reg, val); + Set_NB32_DCT(dev, Channel, reg, val); } static void mct_InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat, @@ -873,7 +1683,7 @@ static void InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat, u32 dword; u8 dn = 4; /* TODO: Rev C could be 4 */ u32 dev = pDCTstat->dev_dct; - u32 index_reg = 0x98 + 0x100 * Channel; + u32 index_reg = 0x98; /* FIXME: add Cx support */ dword = 0x00000000; @@ -881,7 +1691,7 @@ static void InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat, for(j=0; j<dn; j++) /* DIMM0 Write Data Timing Low */ /* DIMM0 Write ECC Timing */ - Set_NB32_index_wait(dev, index_reg, i + 0x100 * j, dword); + Set_NB32_index_wait_DCT(dev, Channel, index_reg, i + 0x100 * j, dword); } /* errata #180 */ @@ -889,13 +1699,13 @@ static void InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat, for(i=5; i<=6; i++) { for(j=0; j<dn; j++) /* DIMM0 Read DQS Timing Control Low */ - Set_NB32_index_wait(dev, index_reg, i + 0x100 * j, dword); + Set_NB32_index_wait_DCT(dev, Channel, index_reg, i + 0x100 * j, dword); } dword = 0x0000002f; for(j=0; j<dn; j++) /* DIMM0 Read DQS ECC Timing Control */ - Set_NB32_index_wait(dev, index_reg, 7 + 0x100 * j, dword); + Set_NB32_index_wait_DCT(dev, Channel, index_reg, 7 + 0x100 * j, dword); } void SetEccDQSRcvrEn_D(struct DCTStatStruc *pDCTstat, u8 Channel) @@ -908,13 +1718,13 @@ void SetEccDQSRcvrEn_D(struct DCTStatStruc *pDCTstat, u8 Channel) u32 val; dev = pDCTstat->dev_dct; - index_reg = 0x98 + Channel * 0x100; + index_reg = 0x98; index = 0x12; p = pDCTstat->CH_D_BC_RCVRDLY[Channel]; print_debug_dqs("\t\tSetEccDQSRcvrPos: Channel ", Channel, 2); for(ChipSel = 0; ChipSel < MAX_CS_SUPPORTED; ChipSel += 2) { val = p[ChipSel>>1]; - Set_NB32_index_wait(dev, index_reg, index, val); + Set_NB32_index_wait_DCT(dev, Channel, index_reg, index, val); print_debug_dqs_pair("\t\tSetEccDQSRcvrPos: ChipSel ", ChipSel, " rcvr_delay ", val, 2); index += 3; @@ -998,95 +1808,305 @@ void phyAssistedMemFnceTraining(struct MCTStatStruc *pMCTstat, u8 Node = 0; struct DCTStatStruc *pDCTstat; + printk(BIOS_DEBUG, "%s: Start\n", __func__); + /* FIXME: skip for Ax */ - while (Node < MAX_NODES_SUPPORTED) { + for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) { pDCTstat = pDCTstatA + Node; + if (!pDCTstat->NodePresent) + continue; + + if (pDCTstat->DCTSysLimit) { + if (is_fam15h()) { + /* Fam15h BKDG v3.14 section 2.10.5.3.3 + * This picks up where InitDDRPhy left off + */ + uint8_t dct; + uint8_t index; + uint32_t dword; + uint32_t datc_backup; + uint32_t training_dword; + uint32_t fence2_config_dword; + uint32_t fence_tx_pad_config_dword; + uint32_t index_reg = 0x98; + uint32_t dev = pDCTstat->dev_dct; + + for (dct = 0; dct < 2; dct++) { + if (!pDCTstat->DIMMValidDCT[dct]) + continue; + + /* Back up D18F2x9C_x0000_0004_dct[1:0] */ + datc_backup = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000004); + + /* FenceTrSel = 0x2 */ + dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000008); + dword &= ~(0x3 << 6); + dword |= (0x2 << 6); + Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000008, dword); + + /* Set phase recovery seed values */ + Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000050, 0x13131313); + Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000051, 0x13131313); + Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000052, 0x00000013); + + training_dword = fenceDynTraining_D(pMCTstat, pDCTstat, dct); + + /* Save calculated fence value to the TX DLL */ + dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0000000c); + dword &= ~(0x1f << 26); + dword |= ((training_dword & 0x1f) << 26); + Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0000000c, dword); + + /* D18F2x9C_x0D0F_0[F,8:0]0F_dct[1:0][AlwaysEnDllClks]=0x1 */ + for (index = 0; index < 0x9; index++) { + dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f000f | (index << 8)); + dword &= ~(0x7 << 12); + dword |= (0x1 << 12); + Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f000f | (index << 8), dword); + } + + /* FenceTrSel = 0x1 */ + dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000008); + dword &= ~(0x3 << 6); + dword |= (0x1 << 6); + Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000008, dword); + + /* Set phase recovery seed values */ + Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000050, 0x13131313); + Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000051, 0x13131313); + Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000052, 0x00000013); + + training_dword = fenceDynTraining_D(pMCTstat, pDCTstat, dct); + + /* Save calculated fence value to the RX DLL */ + dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0000000c); + dword &= ~(0x1f << 21); + dword |= ((training_dword & 0x1f) << 21); + Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0000000c, dword); + + /* D18F2x9C_x0D0F_0[F,8:0]0F_dct[1:0][AlwaysEnDllClks]=0x0 */ + for (index = 0; index < 0x9; index++) { + dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f000f | (index << 8)); + dword &= ~(0x7 << 12); + Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f000f | (index << 8), dword); + } + + /* FenceTrSel = 0x3 */ + dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000008); + dword &= ~(0x3 << 6); + dword |= (0x3 << 6); + Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000008, dword); + + /* Set phase recovery seed values */ + Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000050, 0x13131313); + Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000051, 0x13131313); + Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000052, 0x00000013); + + fence_tx_pad_config_dword = fenceDynTraining_D(pMCTstat, pDCTstat, dct); + + /* Save calculated fence value to the TX Pad */ + dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0000000c); + dword &= ~(0x1f << 16); + dword |= ((fence_tx_pad_config_dword & 0x1f) << 16); + Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0000000c, dword); + + /* Program D18F2x9C_x0D0F_[C,8,2][2:0]31_dct[1:0] */ + training_dword = fence_tx_pad_config_dword; + if (fence_tx_pad_config_dword < 16) + training_dword |= (0x1 << 4); + else + training_dword = 0; + for (index = 0; index < 0x3; index++) { + dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f2031 | (index << 8)); + dword &= ~(0x1f); + dword |= (training_dword & 0x1f); + Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f2031 | (index << 8), dword); + } + for (index = 0; index < 0x3; index++) { + dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f8031 | (index << 8)); + dword &= ~(0x1f); + dword |= (training_dword & 0x1f); + Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f8031 | (index << 8), dword); + } + for (index = 0; index < 0x3; index++) { + dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0fc031 | (index << 8)); + dword &= ~(0x1f); + dword |= (training_dword & 0x1f); + Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0fc031 | (index << 8), dword); + } + + /* Assemble Fence2 configuration word (Fam15h BKDG v3.14 page 331) */ + dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0000000c); + fence2_config_dword = 0; + + /* TxPad */ + training_dword = (dword >> 16) & 0x1f; + if (training_dword < 16) + training_dword |= 0x10; + else + training_dword = 0; + fence2_config_dword |= training_dword; + + /* RxDll */ + training_dword = (dword >> 21) & 0x1f; + if (training_dword < 16) + training_dword |= 0x10; + else + training_dword = 0; + fence2_config_dword |= (training_dword << 10); + + /* TxDll */ + training_dword = (dword >> 26) & 0x1f; + if (training_dword < 16) + training_dword |= 0x10; + else + training_dword = 0; + fence2_config_dword |= (training_dword << 5); + + /* Program D18F2x9C_x0D0F_0[F,8:0]31_dct[1:0] */ + for (index = 0; index < 0x9; index++) { + dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f0031 | (index << 8)); + dword &= ~(0x7fff); + dword |= fence2_config_dword; + Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f0031 | (index << 8), dword); + } - if(pDCTstat->DCTSysLimit) { - fenceDynTraining_D(pMCTstat, pDCTstat, 0); - fenceDynTraining_D(pMCTstat, pDCTstat, 1); + /* Restore D18F2x9C_x0000_0004_dct[1:0] */ + Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x00000004, datc_backup); + } + } else { + fenceDynTraining_D(pMCTstat, pDCTstat, 0); + fenceDynTraining_D(pMCTstat, pDCTstat, 1); + } } - Node++; } + + printk(BIOS_DEBUG, "%s: Done\n", __func__); } -static void fenceDynTraining_D(struct MCTStatStruc *pMCTstat, +static uint32_t fenceDynTraining_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 dct) { u16 avRecValue; u32 val; u32 dev; - u32 index_reg = 0x98 + 0x100 * dct; + u32 index_reg = 0x98; u32 index; - /* BIOS first programs a seed value to the phase recovery engine - * (recommended 19) registers. - * Dram Phase Recovery Control Register (F2x[1,0]9C_x[51:50] and - * F2x[1,0]9C_x52.) . - */ dev = pDCTstat->dev_dct; - for (index = 0x50; index <= 0x52; index ++) { - val = (FenceTrnFinDlySeed & 0x1F); - if (index != 0x52) { - val |= val << 8 | val << 16 | val << 24; + + if (is_fam15h()) { + /* Set F2x[1,0]9C_x08[PhyFenceTrEn] */ + val = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x08); + val |= 1 << PhyFenceTrEn; + Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x08, val); + + /* Wait 2000 MEMCLKs */ + precise_memclk_delay_fam15(pMCTstat, pDCTstat, dct, 2000); + + /* Clear F2x[1,0]9C_x08[PhyFenceTrEn] */ + val = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x08); + val &= ~(1 << PhyFenceTrEn); + Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x08, val); + + /* BIOS reads the phase recovery engine registers + * F2x[1,0]9C_x[51:50] and F2x[1,0]9C_x52. + * Average the fine delay components only. + */ + avRecValue = 0; + for (index = 0x50; index <= 0x52; index++) { + val = Get_NB32_index_wait_DCT(dev, dct, index_reg, index); + avRecValue += val & 0x1f; + if (index != 0x52) { + avRecValue += (val >> 8) & 0x1f; + avRecValue += (val >> 16) & 0x1f; + avRecValue += (val >> 24) & 0x1f; + } } - Set_NB32_index_wait(dev, index_reg, index, val); - } - /* Set F2x[1,0]9C_x08[PhyFenceTrEn]=1. */ - val = Get_NB32_index_wait(dev, index_reg, 0x08); - val |= 1 << PhyFenceTrEn; - Set_NB32_index_wait(dev, index_reg, 0x08, val); - - /* Wait 200 MEMCLKs. */ - mct_Wait(50000); /* wait 200us */ - - /* Clear F2x[1,0]9C_x08[PhyFenceTrEn]=0. */ - val = Get_NB32_index_wait(dev, index_reg, 0x08); - val &= ~(1 << PhyFenceTrEn); - Set_NB32_index_wait(dev, index_reg, 0x08, val); - - /* BIOS reads the phase recovery engine registers - * F2x[1,0]9C_x[51:50] and F2x[1,0]9C_x52. */ - avRecValue = 0; - for (index = 0x50; index <= 0x52; index ++) { - val = Get_NB32_index_wait(dev, index_reg, index); - avRecValue += val & 0x7F; - if (index != 0x52) { - avRecValue += (val >> 8) & 0x7F; - avRecValue += (val >> 16) & 0x7F; - avRecValue += (val >> 24) & 0x7F; + val = avRecValue / 9; + if (avRecValue % 9) + val++; + avRecValue = val; + + if (avRecValue < 6) + avRecValue = 0; + else + avRecValue -= 6; + + return avRecValue; + } else { + /* BIOS first programs a seed value to the phase recovery engine + * (recommended 19) registers. + * Dram Phase Recovery Control Register (F2x[1,0]9C_x[51:50] and + * F2x[1,0]9C_x52.) . + */ + for (index = 0x50; index <= 0x52; index ++) { + val = (FenceTrnFinDlySeed & 0x1F); + if (index != 0x52) { + val |= val << 8 | val << 16 | val << 24; + } + Set_NB32_index_wait_DCT(dev, dct, index_reg, index, val); } - } - val = avRecValue / 9; - if (avRecValue % 9) - val++; - avRecValue = val; + /* Set F2x[1,0]9C_x08[PhyFenceTrEn]=1. */ + val = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x08); + val |= 1 << PhyFenceTrEn; + Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x08, val); + + /* Wait 200 MEMCLKs. */ + mct_Wait(50000); /* wait 200us */ + + /* Clear F2x[1,0]9C_x08[PhyFenceTrEn]=0. */ + val = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x08); + val &= ~(1 << PhyFenceTrEn); + Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x08, val); + + /* BIOS reads the phase recovery engine registers + * F2x[1,0]9C_x[51:50] and F2x[1,0]9C_x52. */ + avRecValue = 0; + for (index = 0x50; index <= 0x52; index ++) { + val = Get_NB32_index_wait_DCT(dev, dct, index_reg, index); + avRecValue += val & 0x7F; + if (index != 0x52) { + avRecValue += (val >> 8) & 0x7F; + avRecValue += (val >> 16) & 0x7F; + avRecValue += (val >> 24) & 0x7F; + } + } - /* Write the (averaged value -8) to F2x[1,0]9C_x0C[PhyFence]. */ - /* inlined mct_AdjustFenceValue() */ - /* TODO: The RBC0 is not supported. */ - /* if (pDCTstat->LogicalCPUID & AMD_RB_C0) - avRecValue -= 3; - else - */ - if (pDCTstat->LogicalCPUID & AMD_DR_Dx) - avRecValue -= 8; - else if (pDCTstat->LogicalCPUID & AMD_DR_Cx) - avRecValue -= 8; - else if (pDCTstat->LogicalCPUID & AMD_DR_Bx) - avRecValue -= 8; - - val = Get_NB32_index_wait(dev, index_reg, 0x0C); - val &= ~(0x1F << 16); - val |= (avRecValue & 0x1F) << 16; - Set_NB32_index_wait(dev, index_reg, 0x0C, val); - - /* Rewrite F2x[1,0]9C_x04-DRAM Address/Command Timing Control Register - * delays (both channels). */ - val = Get_NB32_index_wait(dev, index_reg, 0x04); - Set_NB32_index_wait(dev, index_reg, 0x04, val); + val = avRecValue / 9; + if (avRecValue % 9) + val++; + avRecValue = val; + + /* Write the (averaged value -8) to F2x[1,0]9C_x0C[PhyFence]. */ + /* inlined mct_AdjustFenceValue() */ + /* TODO: The RBC0 is not supported. */ + /* if (pDCTstat->LogicalCPUID & AMD_RB_C0) + avRecValue -= 3; + else + */ + if (pDCTstat->LogicalCPUID & AMD_DR_Dx) + avRecValue -= 8; + else if (pDCTstat->LogicalCPUID & AMD_DR_Cx) + avRecValue -= 8; + else if (pDCTstat->LogicalCPUID & AMD_DR_Bx) + avRecValue -= 8; + + val = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0C); + val &= ~(0x1F << 16); + val |= (avRecValue & 0x1F) << 16; + Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0C, val); + + /* Rewrite F2x[1,0]9C_x04-DRAM Address/Command Timing Control Register + * delays (both channels). + */ + val = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x04); + Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x04, val); + + return avRecValue; + } } void mct_Wait(u32 cycles) |