diff options
19 files changed, 1253 insertions, 1142 deletions
diff --git a/src/northbridge/amd/amdfam10/raminit_amdmct.c b/src/northbridge/amd/amdfam10/raminit_amdmct.c index a0d47f4afd..25cf93daf7 100644 --- a/src/northbridge/amd/amdfam10/raminit_amdmct.c +++ b/src/northbridge/amd/amdfam10/raminit_amdmct.c @@ -26,7 +26,6 @@ static void print_tx(const char *strval, u32 val) printk(BIOS_DEBUG, "%s%08x\n", strval, val); #endif } -#endif static void print_t(const char *strval) { @@ -34,6 +33,7 @@ static void print_t(const char *strval) printk(BIOS_DEBUG, "%s", strval); #endif } +#endif static void print_tf(const char *func, const char *strval) { diff --git a/src/northbridge/amd/amdmct/mct/mct_d.c b/src/northbridge/amd/amdmct/mct/mct_d.c index 3dec934595..88910e2d5f 100644 --- a/src/northbridge/amd/amdmct/mct/mct_d.c +++ b/src/northbridge/amd/amdmct/mct/mct_d.c @@ -542,7 +542,6 @@ static void HTMemMapInit_D(struct MCTStatStruc *pMCTstat, pDCTstat = pDCTstatA + Node; devx = pDCTstat->dev_map; DramSelBaseAddr = 0; - pDCTstat = pDCTstatA + Node; if (!pDCTstat->GangedMode) { DramSelBaseAddr = pDCTstat->NodeSysLimit - pDCTstat->DCTSysLimit; /*In unganged mode, we must add DCT0 and DCT1 to DCTSysLimit */ diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mct_d.c b/src/northbridge/amd/amdmct/mct_ddr3/mct_d.c index 71a6be881e..81a75768ab 100644 --- a/src/northbridge/amd/amdmct/mct_ddr3/mct_d.c +++ b/src/northbridge/amd/amdmct/mct_ddr3/mct_d.c @@ -209,12 +209,24 @@ static const u8 Table_DQSRcvEn_Offset[] = {0x00,0x01,0x10,0x11,0x2}; MEMCLK_MAPPING EQU 00010000b, 00000100b, 00001000b, 00100000b, 00000000b, 00000000b, 00000000b, 00000000b */ -/* Note: If you are not sure about the pin mappings at initial stage, we dont have to disable MemClk. - * Set entries in the tables all 0xFF. */ +/* ========================================================================================== + * Set up clock pin to DIMM mappings, + * NOTE: If you are not sure about the pin mappings, you can keep all MemClk signals active, + * just set all entries in the relevant table(s) to 0xff. + * ========================================================================================== + */ static const u8 Tab_L1CLKDis[] = {0x20, 0x20, 0x10, 0x10, 0x08, 0x08, 0x04, 0x04}; static const u8 Tab_AM3CLKDis[] = {0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00}; static const u8 Tab_S1CLKDis[] = {0xA2, 0xA2, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; + +/* C32: Enable CS0 - CS3 clocks (DIMM0 - DIMM1) */ +static const u8 Tab_C32CLKDis[] = {0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00}; + +/* G34: Enable CS0 - CS3 clocks (DIMM0 - DIMM1) */ +static const u8 Tab_G34CLKDis[] = {0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00}; + static const u8 Tab_ManualCLKDis[]= {0x10, 0x04, 0x08, 0x20, 0x00, 0x00, 0x00, 0x00}; +/* ========================================================================================== */ static const u8 Table_Comp_Rise_Slew_20x[] = {7, 3, 2, 2, 0xFF}; static const u8 Table_Comp_Rise_Slew_15x[] = {7, 7, 3, 2, 0xFF}; @@ -277,6 +289,11 @@ restartinit: for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) { struct DCTStatStruc *pDCTstat; pDCTstat = pDCTstatA + Node; + + /* Zero out data structures to avoid false detection of DIMMs */ + memset(pDCTstat, 0, sizeof(struct DCTStatStruc)); + + /* Initialize data structures */ pDCTstat->Node_ID = Node; pDCTstat->dev_host = PA_HOST(Node); pDCTstat->dev_map = PA_MAP(Node); @@ -284,17 +301,22 @@ restartinit: pDCTstat->dev_nbmisc = PA_NBMISC(Node); pDCTstat->NodeSysBase = node_sys_base; + printk(BIOS_DEBUG, "%s: mct_init Node %d\n", __func__, Node); mct_init(pMCTstat, pDCTstat); mctNodeIDDebugPort_D(); pDCTstat->NodePresent = NodePresent_D(Node); if (pDCTstat->NodePresent) { /* See if Node is there*/ + printk(BIOS_DEBUG, "%s: clear_legacy_Mode\n", __func__); clear_legacy_Mode(pMCTstat, pDCTstat); pDCTstat->LogicalCPUID = mctGetLogicalCPUID_D(Node); + printk(BIOS_DEBUG, "%s: mct_InitialMCT_D\n", __func__); mct_InitialMCT_D(pMCTstat, pDCTstat); + printk(BIOS_DEBUG, "%s: mctSMBhub_Init\n", __func__); mctSMBhub_Init(Node); /* Switch SMBUS crossbar to proper node*/ + printk(BIOS_DEBUG, "%s: mct_initDCT\n", __func__); mct_initDCT(pMCTstat, pDCTstat); if (pDCTstat->ErrCode == SC_FatalErr) { goto fatalexit; /* any fatal errors?*/ @@ -345,6 +367,7 @@ restartinit: mct_FinalMCT_D(pMCTstat, pDCTstatA); printk(BIOS_DEBUG, "mctAutoInitMCT_D Done: Global Status: %x\n", pMCTstat->GStatus); + return; fatalexit: @@ -560,7 +583,6 @@ static void HTMemMapInit_D(struct MCTStatStruc *pMCTstat, pDCTstat = pDCTstatA + Node; devx = pDCTstat->dev_map; DramSelBaseAddr = 0; - pDCTstat = pDCTstatA + Node; /* ??? */ if (!pDCTstat->GangedMode) { DramSelBaseAddr = pDCTstat->NodeSysLimit - pDCTstat->DCTSysLimit; /*In unganged mode, we must add DCT0 and DCT1 to DCTSysLimit */ @@ -645,6 +667,7 @@ static void HTMemMapInit_D(struct MCTStatStruc *pMCTstat, devx = pDCTstat->dev_map; if (pDCTstat->NodePresent) { + printk(BIOS_DEBUG, " Copy dram map from Node 0 to Node %02x \n", Node); reg = 0x40; /*Dram Base 0*/ do { val = Get_NB32(dev, reg); @@ -1162,7 +1185,7 @@ static void SPD2ndTiming(struct MCTStatStruc *pMCTstat, /* Program DRAM Timing values */ DramTimingLo = 0; /* Dram Timing Low init */ - val = pDCTstat->CASL - 2; /* pDCTstat.CASL to reg. definition */ + val = pDCTstat->CASL - 4; /* pDCTstat.CASL to reg. definition */ DramTimingLo |= val; val = pDCTstat->Trcd - Bias_TrcdT; @@ -1406,18 +1429,16 @@ static void SPDGetTCL_D(struct MCTStatStruc *pMCTstat, else if (tCKproposed16x <= 24) { pDCTstat->TargetFreq = 6; tCKproposed16x = 24; - } - else if (tCKproposed16x <= 30) { + } else if (tCKproposed16x <= 30) { pDCTstat->TargetFreq = 5; tCKproposed16x = 30; - } - else { + } else { pDCTstat->TargetFreq = 4; tCKproposed16x = 40; } /* Running through this loop twice: - First time find tCL at target frequency - - Second tim find tCL at 400MHz */ + - Second time find tCL at 400MHz */ for (;;) { CLT_Fail = 0; @@ -1451,7 +1472,7 @@ static void SPDGetTCL_D(struct MCTStatStruc *pMCTstat, CLT_Fail = 1; /* get CL and T */ if (!CLT_Fail) { - bytex = CLactual - 2; + bytex = CLactual; if (tCKproposed16x == 20) byte = 7; else if (tCKproposed16x == 24) @@ -1632,7 +1653,7 @@ static u8 AutoConfig_D(struct MCTStatStruc *pMCTstat, val = 0x0f; /* recommended setting (default) */ DramConfigHi |= val << 24; - if (pDCTstat->LogicalCPUID & (AMD_DR_Cx | AMD_DR_Bx)) + if (pDCTstat->LogicalCPUID & (AMD_DR_Dx | AMD_DR_Cx | AMD_DR_Bx)) DramConfigHi |= 1 << DcqArbBypassEn; /* Build MemClkDis Value from Dram Timing Lo and @@ -1657,6 +1678,10 @@ static u8 AutoConfig_D(struct MCTStatStruc *pMCTstat, p = Tab_L1CLKDis; else if (byte == PT_M2 || byte == PT_AS) p = Tab_AM3CLKDis; + else if (byte == PT_C3) + p = Tab_C32CLKDis; + else if (byte == PT_GR) + p = Tab_G34CLKDis; else p = Tab_S1CLKDis; @@ -2102,8 +2127,7 @@ static u8 DIMMPresence_D(struct MCTStatStruc *pMCTstat, if (byte == JED_RDIMM || byte == JED_MiniRDIMM) { RegDIMMPresent |= 1 << i; pDCTstat->DimmRegistered[i] = 1; - } - else { + } else { pDCTstat->DimmRegistered[i] = 0; } /* Check ECC capable */ @@ -2977,9 +3001,9 @@ static void mct_FinalMCT_D(struct MCTStatStruc *pMCTstat, } else { /* For Dx CPU */ val = 0x0CE00F00 | 1 << 29/* FlushWrOnStpGnt */; if (!(pDCTstat->GangedMode)) - val |= 0x20; /* MctWrLimit = 8 for Unganed mode */ + val |= 0x20; /* MctWrLimit = 8 for Unganged mode */ else - val |= 0x40; /* MctWrLimit = 16 for ganed mode */ + val |= 0x40; /* MctWrLimit = 16 for ganged mode */ Set_NB32(pDCTstat->dev_dct, 0x11C, val); val = Get_NB32(pDCTstat->dev_dct, 0x1B0); @@ -3414,6 +3438,138 @@ static void mct_BeforeDramInit_Prod_D(struct MCTStatStruc *pMCTstat, Set_NB32(dev, 0x98 + reg_off, 0x0D000030); Set_NB32(dev, 0x9C + reg_off, dword); Set_NB32(dev, 0x98 + reg_off, 0x4D040F30); + + /* FIXME + * Mainboards need to be able to specify the maximum number of DIMMs installable per channel + * For now assume a maximum of 2 DIMMs per channel can be installed + */ + uint8_t MaxDimmsInstallable = 2; + + /* Obtain number of DIMMs on channel */ + uint8_t dimm_count = pDCTstat->MAdimms[i]; + uint8_t rank_count_dimm0; + uint8_t rank_count_dimm1; + uint32_t odt_pattern_0; + uint32_t odt_pattern_1; + uint32_t odt_pattern_2; + uint32_t odt_pattern_3; + + /* Select appropriate ODT pattern for installed DIMMs + * Refer to the BKDG Rev. 3.62, page 120 onwards + */ + if (pDCTstat->C_DCTPtr[i]->Status[DCT_STATUS_REGISTERED]) { + if (MaxDimmsInstallable == 2) { + if (dimm_count == 1) { + /* 1 DIMM detected */ + rank_count_dimm1 = pDCTstat->C_DCTPtr[i]->DimmRanks[1]; + if (rank_count_dimm1 == 1) { + odt_pattern_0 = 0x00000000; + odt_pattern_1 = 0x00000000; + odt_pattern_2 = 0x00000000; + odt_pattern_3 = 0x00020000; + } else if (rank_count_dimm1 == 2) { + odt_pattern_0 = 0x00000000; + odt_pattern_1 = 0x00000000; + odt_pattern_2 = 0x00000000; + odt_pattern_3 = 0x02080000; + } else if (rank_count_dimm1 == 4) { + odt_pattern_0 = 0x00000000; + odt_pattern_1 = 0x00000000; + odt_pattern_2 = 0x020a0000; + odt_pattern_3 = 0x080a0000; + } else { + /* Fallback */ + odt_pattern_0 = 0x00000000; + odt_pattern_1 = 0x00000000; + odt_pattern_2 = 0x00000000; + odt_pattern_3 = 0x00000000; + } + } else { + /* 2 DIMMs detected */ + rank_count_dimm0 = pDCTstat->C_DCTPtr[i]->DimmRanks[0]; + rank_count_dimm1 = pDCTstat->C_DCTPtr[i]->DimmRanks[1]; + if ((rank_count_dimm0 < 4) && (rank_count_dimm1 < 4)) { + odt_pattern_0 = 0x00000000; + odt_pattern_1 = 0x01010202; + odt_pattern_2 = 0x00000000; + odt_pattern_3 = 0x09030603; + } else if ((rank_count_dimm0 < 4) && (rank_count_dimm1 == 4)) { + odt_pattern_0 = 0x01010000; + odt_pattern_1 = 0x01010a0a; + odt_pattern_2 = 0x01090000; + odt_pattern_3 = 0x01030e0b; + } else if ((rank_count_dimm0 == 4) && (rank_count_dimm1 < 4)) { + odt_pattern_0 = 0x00000202; + odt_pattern_1 = 0x05050202; + odt_pattern_2 = 0x00000206; + odt_pattern_3 = 0x0d070203; + } else if ((rank_count_dimm0 == 4) && (rank_count_dimm1 == 4)) { + odt_pattern_0 = 0x05050a0a; + odt_pattern_1 = 0x05050a0a; + odt_pattern_2 = 0x050d0a0e; + odt_pattern_3 = 0x05070a0b; + } else { + /* Fallback */ + odt_pattern_0 = 0x00000000; + odt_pattern_1 = 0x00000000; + odt_pattern_2 = 0x00000000; + odt_pattern_3 = 0x00000000; + } + } + } else { + /* FIXME + * 3 DIMMs per channel UNIMPLEMENTED + */ + odt_pattern_0 = 0x00000000; + odt_pattern_1 = 0x00000000; + odt_pattern_2 = 0x00000000; + odt_pattern_3 = 0x00000000; + } + } else { + if (MaxDimmsInstallable == 2) { + if (dimm_count == 1) { + /* 1 DIMM detected */ + rank_count_dimm1 = pDCTstat->C_DCTPtr[i]->DimmRanks[1]; + if (rank_count_dimm1 == 1) { + odt_pattern_0 = 0x00000000; + odt_pattern_1 = 0x00000000; + odt_pattern_2 = 0x00000000; + odt_pattern_3 = 0x00020000; + } else if (rank_count_dimm1 == 2) { + odt_pattern_0 = 0x00000000; + odt_pattern_1 = 0x00000000; + odt_pattern_2 = 0x00000000; + odt_pattern_3 = 0x02080000; + } else { + /* Fallback */ + odt_pattern_0 = 0x00000000; + odt_pattern_1 = 0x00000000; + odt_pattern_2 = 0x00000000; + odt_pattern_3 = 0x00000000; + } + } else { + /* 2 DIMMs detected */ + odt_pattern_0 = 0x00000000; + odt_pattern_1 = 0x01010202; + odt_pattern_2 = 0x00000000; + odt_pattern_3 = 0x09030603; + } + } else { + /* FIXME + * 3 DIMMs per channel UNIMPLEMENTED + */ + odt_pattern_0 = 0x00000000; + odt_pattern_1 = 0x00000000; + odt_pattern_2 = 0x00000000; + odt_pattern_3 = 0x00000000; + } + } + + /* Program ODT pattern */ + Set_NB32_index_wait(dev, 0xf0 + reg_off, 0x180, odt_pattern_1); + Set_NB32_index_wait(dev, 0xf0 + reg_off, 0x181, odt_pattern_0); + Set_NB32_index_wait(dev, 0xf0 + reg_off, 0x182, odt_pattern_3); + Set_NB32_index_wait(dev, 0xf0 + reg_off, 0x183, odt_pattern_2); } } } @@ -3657,6 +3813,7 @@ static void mct_BeforeDQSTrain_D(struct MCTStatStruc *pMCTstat, } } +/* Erratum 350 */ static void mct_ResetDLL_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 dct) { @@ -3692,11 +3849,11 @@ static void mct_ResetDLL_D(struct MCTStatStruc *pMCTstat, mct_Read1LTestPattern_D(pMCTstat, pDCTstat, addr); /* cache fills */ /* Write 0000_8000h to register F2x[1,0]9C_xD080F0C */ - Set_NB32_index_wait(dev, 0x98 + reg_off, 0x4D080F0C, 0x00008000); + Set_NB32_index_wait(dev, 0x98 + reg_off, 0xD080F0C, 0x00008000); mct_Wait(80); /* wait >= 300ns */ /* Write 0000_0000h to register F2x[1,0]9C_xD080F0C */ - Set_NB32_index_wait(dev, 0x98 + reg_off, 0x4D080F0C, 0x00000000); + Set_NB32_index_wait(dev, 0x98 + reg_off, 0xD080F0C, 0x00000000); mct_Wait(800); /* wait >= 2us */ break; } diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mct_d.h b/src/northbridge/amd/amdmct/mct_ddr3/mct_d.h index d6e5fb4ca9..987c0c8a5c 100644 --- a/src/northbridge/amd/amdmct/mct_ddr3/mct_d.h +++ b/src/northbridge/amd/amdmct/mct_ddr3/mct_d.h @@ -499,7 +499,7 @@ struct DCTStatStruc { /* A per Node structure*/ /* CHB DIMM0 Byte 0 - 7 TxDqs */ /* CHB DIMM1 Byte 0 - 7 TxDqs */ /* CHB DIMM1 Byte 0 - 7 TxDqs */ - u8 CH_D_B_RCVRDLY[2][4][8]; /* [A/B] [DIMM0-3] [DQS] */ + u16 CH_D_B_RCVRDLY[2][4][8]; /* [A/B] [DIMM0-3] [DQS] */ /* CHA DIMM 0 Receiver Enable Delay*/ /* CHA DIMM 1 Receiver Enable Delay*/ /* CHA DIMM 2 Receiver Enable Delay*/ @@ -509,7 +509,7 @@ struct DCTStatStruc { /* A per Node structure*/ /* CHB DIMM 1 Receiver Enable Delay*/ /* CHB DIMM 2 Receiver Enable Delay*/ /* CHB DIMM 3 Receiver Enable Delay*/ - u8 CH_D_BC_RCVRDLY[2][4]; + u16 CH_D_BC_RCVRDLY[2][4]; /* CHA DIMM 0 - 4 Check Byte Receiver Enable Delay*/ /* CHB DIMM 0 - 4 Check Byte Receiver Enable Delay*/ u8 DIMMValidDCT[2]; /* DIMM# in DCT0*/ @@ -769,7 +769,7 @@ u8 mct_checkNumberOfDqsRcvEn_1Pass(u8 pass); u32 SetupDqsPattern_1PassA(u8 Pass); u32 SetupDqsPattern_1PassB(u8 Pass); u8 mct_Get_Start_RcvrEnDly_1Pass(u8 Pass); -u8 mct_Average_RcvrEnDly_Pass(struct DCTStatStruc *pDCTstat, u8 RcvrEnDly, u8 RcvrEnDlyLimit, u8 Channel, u8 Receiver, u8 Pass); +u16 mct_Average_RcvrEnDly_Pass(struct DCTStatStruc *pDCTstat, u16 RcvrEnDly, u16 RcvrEnDlyLimit, u8 Channel, u8 Receiver, u8 Pass); void CPUMemTyping_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA); void UMAMemTyping_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA); uint64_t mctGetLogicalCPUID(u32 Node); @@ -779,7 +779,7 @@ void mct_TrainDQSPos_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTs void mctSetEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA); void TrainMaxReadLatency_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA); void mct_EndDQSTraining_D(struct MCTStatStruc *pMCTstat,struct DCTStatStruc *pDCTstatA); -void mct_SetRcvrEnDly_D(struct DCTStatStruc *pDCTstat, u8 RcvrEnDly, u8 FinalValue, u8 Channel, u8 Receiver, u32 dev, u32 index_reg, u8 Addl_Index, u8 Pass); +void mct_SetRcvrEnDly_D(struct DCTStatStruc *pDCTstat, u16 RcvrEnDly, u8 FinalValue, u8 Channel, u8 Receiver, u32 dev, u32 index_reg, u8 Addl_Index, u8 Pass); void SetEccDQSRcvrEn_D(struct DCTStatStruc *pDCTstat, u8 Channel); void mctGet_PS_Cfg_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u32 dct); void InterleaveBanks_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 dct); diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mct_d_gcc.h b/src/northbridge/amd/amdmct/mct_ddr3/mct_d_gcc.h index 60f98bc89a..9990304645 100644 --- a/src/northbridge/amd/amdmct/mct_ddr3/mct_d_gcc.h +++ b/src/northbridge/amd/amdmct/mct_ddr3/mct_d_gcc.h @@ -2,6 +2,7 @@ * This file is part of the coreboot project. * * Copyright (C) 2010 Advanced Micro Devices, Inc. + * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -103,10 +104,10 @@ static void proc_CLFLUSH(u32 addr_hi) __asm__ volatile ( /* clflush fs:[eax] */ - "outb %%al, $0xed\n\t" /* _EXECFENCE */ - "clflush %%fs:(%0)\n\t" + "outb %%al, $0xed\n\t" /* _EXECFENCE */ + "clflush %%fs:(%0)\n\t" "mfence\n\t" - ::"a" (addr_hi<<8) + ::"a" (addr_hi<<8) ); } @@ -141,6 +142,24 @@ static u32 read32_fs(u32 addr_lo) return value; } +static uint64_t read64_fs(uint32_t addr_lo) +{ + uint64_t value = 0; + uint32_t value_lo; + uint32_t value_hi; + + __asm__ volatile ( + "outb %%al, $0xed\n\t" /* _EXECFENCE */ + "mfence\n\t" + "movl %%fs:(%2), %0\n\t" + "movl %%fs:(%3), %1\n\t" + :"=c"(value_lo), "=d"(value_hi): "a" (addr_lo), "b" (addr_lo + 4) : "memory" + ); + value |= value_lo; + value |= ((uint64_t)value_hi) << 32; + return value; +} + #ifdef UNUSED_CODE static u8 read8_fs(u32 addr_lo) { @@ -210,68 +229,6 @@ static __attribute__((noinline)) void FlushDQSTestPattern_L18(u32 addr_lo) ); } -static void ReadL18TestPattern(u32 addr_lo) -{ - /* set fs and use fs prefix to access the mem */ - __asm__ volatile ( - "outb %%al, $0xed\n\t" /* _EXECFENCE */ - "movl %%fs:-128(%%esi), %%eax\n\t" /* TestAddr cache line */ - "movl %%fs:-64(%%esi), %%eax\n\t" /* +1 */ - "movl %%fs:(%%esi), %%eax\n\t" /* +2 */ - "movl %%fs:64(%%esi), %%eax\n\t" /* +3 */ - - "movl %%fs:-128(%%edi), %%eax\n\t" /* +4 */ - "movl %%fs:-64(%%edi), %%eax\n\t" /* +5 */ - "movl %%fs:(%%edi), %%eax\n\t" /* +6 */ - "movl %%fs:64(%%edi), %%eax\n\t" /* +7 */ - - "movl %%fs:-128(%%ebx), %%eax\n\t" /* +8 */ - "movl %%fs:-64(%%ebx), %%eax\n\t" /* +9 */ - "movl %%fs:(%%ebx), %%eax\n\t" /* +10 */ - "movl %%fs:64(%%ebx), %%eax\n\t" /* +11 */ - - "movl %%fs:-128(%%ecx), %%eax\n\t" /* +12 */ - "movl %%fs:-64(%%ecx), %%eax\n\t" /* +13 */ - "movl %%fs:(%%ecx), %%eax\n\t" /* +14 */ - "movl %%fs:64(%%ecx), %%eax\n\t" /* +15 */ - - "movl %%fs:-128(%%edx), %%eax\n\t" /* +16 */ - "movl %%fs:-64(%%edx), %%eax\n\t" /* +17 */ - "mfence\n\t" - - :: "a"(0), "b" (addr_lo+128+8*64), "c" (addr_lo+128+12*64), - "d" (addr_lo +128+16*64), "S"(addr_lo+128), - "D"(addr_lo+128+4*64) - ); - -} - -static void ReadL9TestPattern(u32 addr_lo) -{ - - /* set fs and use fs prefix to access the mem */ - __asm__ volatile ( - "outb %%al, $0xed\n\t" /* _EXECFENCE */ - - "movl %%fs:-128(%%ecx), %%eax\n\t" /* TestAddr cache line */ - "movl %%fs:-64(%%ecx), %%eax\n\t" /* +1 */ - "movl %%fs:(%%ecx), %%eax\n\t" /* +2 */ - "movl %%fs:64(%%ecx), %%eax\n\t" /* +3 */ - - "movl %%fs:-128(%%edx), %%eax\n\t" /* +4 */ - "movl %%fs:-64(%%edx), %%eax\n\t" /* +5 */ - "movl %%fs:(%%edx), %%eax\n\t" /* +6 */ - "movl %%fs:64(%%edx), %%eax\n\t" /* +7 */ - - "movl %%fs:-128(%%ebx), %%eax\n\t" /* +8 */ - "mfence\n\t" - - :: "a"(0), "b" (addr_lo+128+8*64), "c"(addr_lo+128), - "d"(addr_lo+128+4*64) - ); - -} - static void ReadMaxRdLat1CLTestPattern_D(u32 addr) { SetUpperFSbase(addr); diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctardk6.c b/src/northbridge/amd/amdmct/mct_ddr3/mctardk6.c index ae1654cc21..99a26288f9 100644 --- a/src/northbridge/amd/amdmct/mct_ddr3/mctardk6.c +++ b/src/northbridge/amd/amdmct/mct_ddr3/mctardk6.c @@ -2,6 +2,7 @@ * This file is part of the coreboot project. * * Copyright (C) 2010 Advanced Micro Devices, Inc. + * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -17,7 +18,7 @@ * Foundation, Inc. */ -/* The socket type F (1207), Fr2, G (1207) are not tested. +/* The socket type Fr2, G (1207) are not tested. */ static void Get_ChannelPS_Cfg0_D(u8 MAAdimms, u8 Speed, u8 MAAload, @@ -79,8 +80,7 @@ static void Get_ChannelPS_Cfg0_D( u8 MAAdimms, u8 Speed, u8 MAAload, else *AddrTmgCTL = 0x00353935; } - } - else { + } else { if(Speed == 4) { *AddrTmgCTL = 0x00000000; if (MAAdimms == 3) diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctdqs_d.c b/src/northbridge/amd/amdmct/mct_ddr3/mctdqs_d.c index 404727b493..cc2f43a952 100644 --- a/src/northbridge/amd/amdmct/mct_ddr3/mctdqs_d.c +++ b/src/northbridge/amd/amdmct/mct_ddr3/mctdqs_d.c @@ -2,6 +2,7 @@ * This file is part of the coreboot project. * * Copyright (C) 2010 Advanced Micro Devices, Inc. + * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -22,13 +23,6 @@ static void CalcEccDQSPos_D(struct MCTStatStruc *pMCTstat, u8 scale, u8 ChipSel); static void GetDQSDatStrucVal_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 ChipSel); -static u8 MiddleDQS_D(u8 min, u8 max); -static void TrainReadDQS_D(struct MCTStatStruc *pMCTstat, - struct DCTStatStruc *pDCTstat, - u8 cs_start); -static void TrainWriteDQS_D(struct MCTStatStruc *pMCTstat, - struct DCTStatStruc *pDCTstat, - u8 cs_start); static void WriteDQSTestPattern_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u32 TestAddr_lo); @@ -43,31 +37,19 @@ static void FlushDQSTestPattern_D(struct DCTStatStruc *pDCTstat, u32 addr_lo); static void SetTargetWTIO_D(u32 TestAddr); static void ResetTargetWTIO_D(void); -static void ReadDQSTestPattern_D(struct MCTStatStruc *pMCTstat, - struct DCTStatStruc *pDCTstat, - u32 TestAddr_lo); -static void mctEngDQSwindow_Save_D(struct MCTStatStruc *pMCTstat, - struct DCTStatStruc *pDCTstat, u8 ChipSel, - u8 RnkDlyFilterMin, u8 RnkDlyFilterMax); void ResetDCTWrPtr_D(u32 dev, u32 index_reg, u32 index); u8 mct_DisableDimmEccEn_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat); static void mct_SetDQSDelayCSR_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 ChipSel); -static void mct_SetDQSDelayAllCSR_D(struct MCTStatStruc *pMCTstat, - struct DCTStatStruc *pDCTstat, - u8 cs_start); u32 mct_GetMCTSysAddr_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 Channel, u8 receiver, u8 *valid); static void SetupDqsPattern_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u32 *buffer); - -static void StoreWrRdDQSDatStrucVal_D(struct MCTStatStruc *pMCTstat, - struct DCTStatStruc *pDCTstat, u8 ChipSel, - u8 RnkDlyFilterMin, u8 RnkDlyFilterMax); +static void proc_IOCLFLUSH_D(u32 addr_hi); static void StoreDQSDatStrucVal_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 ChipSel); @@ -286,20 +268,99 @@ static void CalcEccDQSPos_D(struct MCTStatStruc *pMCTstat, pDCTstat->DQSDelay = (u8)DQSDelay; } +static void write_dqs_write_data_timing_registers(uint16_t* delay, uint32_t dev, uint8_t dimm, uint32_t index_reg) +{ + uint32_t dword; + + /* Lanes 0 - 3 */ + dword = Get_NB32_index_wait(dev, index_reg, 0x1 | (dimm << 8)); + dword &= ~0x7f7f7f7f; + dword |= (delay[3] & 0x7f) << 24; + dword |= (delay[2] & 0x7f) << 16; + dword |= (delay[1] & 0x7f) << 8; + dword |= delay[0] & 0x7f; + Set_NB32_index_wait(dev, index_reg, 0x1 | (dimm << 8), dword); + + /* Lanes 4 - 7 */ + dword = Get_NB32_index_wait(dev, index_reg, 0x2 | (dimm << 8)); + dword &= ~0x7f7f7f7f; + dword |= (delay[7] & 0x7f) << 24; + dword |= (delay[6] & 0x7f) << 16; + dword |= (delay[5] & 0x7f) << 8; + dword |= delay[4] & 0x7f; + Set_NB32_index_wait(dev, index_reg, 0x2 | (dimm << 8), dword); + + /* Lane 8 (ECC) */ + dword = Get_NB32_index_wait(dev, index_reg, 0x3 | (dimm << 8)); + dword &= ~0x0000007f; + dword |= delay[8] & 0x7f; + Set_NB32_index_wait(dev, index_reg, 0x3 | (dimm << 8), dword); +} + +static void write_dqs_read_data_timing_registers(uint16_t* delay, uint32_t dev, uint8_t dimm, uint32_t index_reg) +{ + uint32_t dword; + + /* Lanes 0 - 3 */ + dword = Get_NB32_index_wait(dev, index_reg, 0x5 | (dimm << 8)); + dword &= ~0x3f3f3f3f; + dword |= (delay[3] & 0x3f) << 24; + dword |= (delay[2] & 0x3f) << 16; + dword |= (delay[1] & 0x3f) << 8; + dword |= delay[0] & 0x3f; + Set_NB32_index_wait(dev, index_reg, 0x5 | (dimm << 8), dword); + + /* Lanes 4 - 7 */ + dword = Get_NB32_index_wait(dev, index_reg, 0x6 | (dimm << 8)); + dword &= ~0x3f3f3f3f; + dword |= (delay[7] & 0x3f) << 24; + dword |= (delay[6] & 0x3f) << 16; + dword |= (delay[5] & 0x3f) << 8; + dword |= delay[4] & 0x3f; + Set_NB32_index_wait(dev, index_reg, 0x6 | (dimm << 8), dword); + + /* Lane 8 (ECC) */ + dword = Get_NB32_index_wait(dev, index_reg, 0x7 | (dimm << 8)); + dword &= ~0x0000003f; + dword |= delay[8] & 0x3f; + Set_NB32_index_wait(dev, index_reg, 0x7 | (dimm << 8), dword); +} + +/* DQS Position Training + * Algorithm detailed in the Fam10h BKDG Rev. 3.62 section 2.8.9.9.3 + */ static void TrainDQSRdWrPos_D(struct MCTStatStruc *pMCTstat, - struct DCTStatStruc *pDCTstat, - u8 cs_start) + struct DCTStatStruc *pDCTstat) { u32 Errors; - u8 Channel, DQSWrDelay; + u8 Channel; + u8 Receiver; u8 _DisableDramECC = 0; - u32 PatternBuffer[292]; + u32 PatternBuffer[304]; /* 288 + 16 */ u8 _Wrap32Dis = 0, _SSE2 = 0; - u8 dqsWrDelay_end; + u32 dev; u32 addr; + u8 valid; u32 cr4; u32 lo, hi; + u32 index_reg; + uint32_t TestAddr; + + uint8_t dual_rank; + uint8_t iter; + uint8_t lane; + uint16_t bytelane_test_results; + uint16_t current_write_dqs_delay[MAX_BYTE_LANES]; + uint16_t current_read_dqs_delay[MAX_BYTE_LANES]; + uint16_t write_dqs_delay_stepping_done[MAX_BYTE_LANES]; + uint8_t dqs_read_results_array[2][MAX_BYTE_LANES][64]; /* [rank][lane][step] */ + uint8_t dqs_write_results_array[2][MAX_BYTE_LANES][128]; /* [rank][lane][step] */ + + uint8_t last_pos = 0; + uint8_t cur_count = 0; + uint8_t best_pos = 0; + uint8_t best_count = 0; print_debug_dqs("\nTrainDQSRdWrPos: Node_ID ", pDCTstat->Node_ID, 0); cr4 = read_cr4(); @@ -323,50 +384,363 @@ static void TrainDQSRdWrPos_D(struct MCTStatStruc *pMCTstat, SetupDqsPattern_D(pMCTstat, pDCTstat, PatternBuffer); /* mct_BeforeTrainDQSRdWrPos_D */ - dqsWrDelay_end = 0x20; + + dev = pDCTstat->dev_dct; + pDCTstat->Direction = DQS_READDIR; + + /* 2.8.9.9.3 (2) + * Loop over each channel, lane, and rank + */ + + /* NOTE + * The BKDG originally stated to iterate over lane, then rank, however this process is quite slow + * compared to an equivalent loop over rank, then lane as the latter allows multiple lanes to be + * tested simultaneously, thus improving performance by around 8x. + */ Errors = 0; for (Channel = 0; Channel < 2; Channel++) { - print_debug_dqs("\tTrainDQSRdWrPos: 1 Channel ",Channel, 1); + print_debug_dqs("\tTrainDQSRdWrPos: 1 Channel ", Channel, 1); pDCTstat->Channel = Channel; if (pDCTstat->DIMMValidDCT[Channel] == 0) /* mct_BeforeTrainDQSRdWrPos_D */ continue; - pDCTstat->DqsRdWrPos_Saved = 0; - for ( DQSWrDelay = 0; DQSWrDelay < dqsWrDelay_end; DQSWrDelay++) { - pDCTstat->DQSDelay = DQSWrDelay; - pDCTstat->Direction = DQS_WRITEDIR; - mct_SetDQSDelayAllCSR_D(pMCTstat, pDCTstat, cs_start); - - print_debug_dqs("\t\tTrainDQSRdWrPos: 21 DQSWrDelay ", DQSWrDelay, 2); - TrainReadDQS_D(pMCTstat, pDCTstat, cs_start); - print_debug_dqs("\t\tTrainDQSRdWrPos: 21 DqsRdWrPos_Saved ", pDCTstat->DqsRdWrPos_Saved, 2); - if (pDCTstat->DqsRdWrPos_Saved == 0xFF) - break; - - print_debug_dqs("\t\tTrainDQSRdWrPos: 22 TrainErrors ",pDCTstat->TrainErrors, 2); - if (pDCTstat->TrainErrors == 0) { + + index_reg = 0x98 + 0x100 * Channel; + + dual_rank = 0; + Receiver = mct_InitReceiver_D(pDCTstat, Channel); + /* There are four receiver pairs, loosely associated with chipselects. + * This is essentially looping over each rank of each DIMM. + */ + for (; Receiver < 8; Receiver++) { + if ((Receiver & 0x1) == 0) { + /* Even rank of DIMM */ + if(mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, Receiver+1)) + dual_rank = 1; + else + dual_rank = 0; + } + + if (!mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, Receiver)) { + continue; + } + + /* Select the base test address for the current rank */ + TestAddr = mct_GetMCTSysAddr_D(pMCTstat, pDCTstat, Channel, Receiver, &valid); + if (!valid) { /* Address not supported on current CS */ + continue; + } + + print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 14 TestAddr ", TestAddr, 4); + SetUpperFSbase(TestAddr); /* fs:eax=far ptr to target */ + + print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 12 Receiver ", Receiver, 2); + + /* 2.8.9.9.3 (DRAM Write Data Timing Loop) + * Iterate over all possible DQS delay values (0x0 - 0x7f) + */ + uint8_t test_write_dqs_delay = 0; + uint8_t test_read_dqs_delay = 0; + uint8_t passing_dqs_delay_found[MAX_BYTE_LANES]; + + /* Initialize variables */ + for (lane = 0; lane < MAX_BYTE_LANES; lane++) { + current_write_dqs_delay[lane] = 0; + passing_dqs_delay_found[lane] = 0; + write_dqs_delay_stepping_done[lane] = 0; + } + + for (test_write_dqs_delay = 0; test_write_dqs_delay < 128; test_write_dqs_delay++) { + print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 16 test_write_dqs_delay ", test_write_dqs_delay, 6); + + /* Break out of loop if passing window already found, */ + if (write_dqs_delay_stepping_done[0] && write_dqs_delay_stepping_done[1] + && write_dqs_delay_stepping_done[2] && write_dqs_delay_stepping_done[3] + && write_dqs_delay_stepping_done[4] && write_dqs_delay_stepping_done[5] + && write_dqs_delay_stepping_done[6] && write_dqs_delay_stepping_done[7]) break; + + /* Commit the current Write Data Timing settings to the hardware registers */ + write_dqs_write_data_timing_registers(current_write_dqs_delay, dev, (Receiver >> 1), index_reg); + + /* Write the DRAM training pattern to the base test address */ + WriteDQSTestPattern_D(pMCTstat, pDCTstat, TestAddr << 8); + + /* 2.8.9.9.3 (DRAM Read DQS Timing Control Loop) + * Iterate over all possible DQS delay values (0x0 - 0x3f) + */ + for (test_read_dqs_delay = 0; test_read_dqs_delay < 64; test_read_dqs_delay++) { + print_debug_dqs("\t\t\t\t\tTrainDQSRdWrPos: 161 test_read_dqs_delay ", test_read_dqs_delay, 6); + + /* Initialize Read DQS Timing Control settings for this iteration */ + for (lane = 0; lane < MAX_BYTE_LANES; lane++) + if (!write_dqs_delay_stepping_done[lane]) + current_read_dqs_delay[lane] = test_read_dqs_delay; + + /* Commit the current Read DQS Timing Control settings to the hardware registers */ + write_dqs_read_data_timing_registers(current_read_dqs_delay, dev, (Receiver >> 1), index_reg); + + /* Initialize test result variable */ + bytelane_test_results = 0xff; + + /* Read the DRAM training pattern from the base test address three times + * NOTE + * While the BKDG states to read three times this is probably excessive! + * Decrease training time by only reading the test pattern once per iteration + */ + for (iter = 0; iter < 1; iter++) { + /* Flush caches */ + SetTargetWTIO_D(TestAddr); + FlushDQSTestPattern_D(pDCTstat, TestAddr << 8); + ResetTargetWTIO_D(); + + /* Read and compare pattern */ + bytelane_test_results &= (CompareDQSTestPattern_D(pMCTstat, pDCTstat, TestAddr << 8) & 0xff); /* [Lane 7 :: Lane 0] 0=fail, 1=pass */ + + /* If all lanes have already failed testing bypass remaining re-read attempt(s) */ + if (bytelane_test_results == 0x0) + break; + } + + /* Store any lanes that passed testing for later use */ + for (lane = 0; lane < 8; lane++) + if (!write_dqs_delay_stepping_done[lane]) + dqs_read_results_array[Receiver & 0x1][lane][test_read_dqs_delay] = (!!(bytelane_test_results & (1 << lane))); + + print_debug_dqs("\t\t\t\t\tTrainDQSRdWrPos: 162 bytelane_test_results ", bytelane_test_results, 6); + } + + for (lane = 0; lane < MAX_BYTE_LANES; lane++) { + if (write_dqs_delay_stepping_done[lane]) + continue; + + /* Determine location and length of longest consecutive string of passing values + * Output is stored in best_pos and best_count + */ + last_pos = 0; + cur_count = 0; + best_pos = 0; + best_count = 0; + for (iter = 0; iter < 64; iter++) { + if ((dqs_read_results_array[Receiver & 0x1][lane][iter]) && (iter < 63)) { + /* Pass */ + cur_count++; + } else { + /* Failure or end of loop */ + if (cur_count > best_count) { + best_count = cur_count; + best_pos = last_pos; + } + cur_count = 0; + last_pos = iter; + } + } + + if (best_count > 2) { + /* Exit the DRAM Write Data Timing Loop after programming the Read DQS Timing Control + * register with the center of the passing window + */ + current_read_dqs_delay[lane] = (best_pos + (best_count / 2)); + passing_dqs_delay_found[lane] = 1; + + /* Commit the current Read DQS Timing Control settings to the hardware registers */ + write_dqs_read_data_timing_registers(current_read_dqs_delay, dev, (Receiver >> 1), index_reg); + + /* Exit the DRAM Write Data Timing Loop */ + write_dqs_delay_stepping_done[lane] = 1; + + print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 142 largest passing region ", best_count, 4); + print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 143 largest passing region start ", best_pos, 4); + } + + /* Increment the DQS Write Delay value if needed for the next DRAM Write Data Timing Loop iteration */ + if (!write_dqs_delay_stepping_done[lane]) + current_write_dqs_delay[lane]++; + } } - Errors |= pDCTstat->TrainErrors; - } - pDCTstat->DqsRdWrPos_Saved = 0; - if (DQSWrDelay < dqsWrDelay_end) { - Errors = 0; + /* Flag failure(s) if present */ + for (lane = 0; lane < 8; lane++) { + if (!passing_dqs_delay_found[lane]) { + print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 121 Unable to find passing region for lane ", lane, 2); + + /* Flag absence of passing window */ + Errors |= 1 << SB_NODQSPOS; + } + } + + /* Iterate over all possible Write Data Timing values (0x0 - 0x7f) + * Note that the Read DQS Timing Control was calibrated / centered in the prior nested loop + */ + for (test_write_dqs_delay = 0; test_write_dqs_delay < 128; test_write_dqs_delay++) { + /* Initialize Write Data Timing settings for this iteration */ + for (lane = 0; lane < MAX_BYTE_LANES; lane++) + current_write_dqs_delay[lane] = test_write_dqs_delay; + + /* Commit the current Write Data Timing settings to the hardware registers */ + write_dqs_write_data_timing_registers(current_write_dqs_delay, dev, (Receiver >> 1), index_reg); + + /* Write the DRAM training pattern to the base test address */ + WriteDQSTestPattern_D(pMCTstat, pDCTstat, TestAddr << 8); + + /* Flush caches */ + SetTargetWTIO_D(TestAddr); + FlushDQSTestPattern_D(pDCTstat, TestAddr << 8); + ResetTargetWTIO_D(); + + /* Read and compare pattern from the base test address */ + bytelane_test_results = (CompareDQSTestPattern_D(pMCTstat, pDCTstat, TestAddr << 8) & 0xff); /* [Lane 7 :: Lane 0] 0=fail, 1=pass */ + + /* Store any lanes that passed testing for later use */ + for (lane = 0; lane < 8; lane++) + dqs_write_results_array[Receiver & 0x1][lane][test_write_dqs_delay] = (!!(bytelane_test_results & (1 << lane))); + } + + for (lane = 0; lane < 8; lane++) { + if ((!dual_rank) || (dual_rank && (Receiver & 0x1))) { + +#ifdef PRINT_PASS_FAIL_BITMAPS + for (iter = 0; iter < 64; iter++) { + if (dqs_read_results_array[0][lane][iter]) + printk(BIOS_DEBUG, "+"); + else + printk(BIOS_DEBUG, "."); + } + printk(BIOS_DEBUG, "\n"); + for (iter = 0; iter < 64; iter++) { + if (dqs_read_results_array[1][lane][iter]) + printk(BIOS_DEBUG, "+"); + else + printk(BIOS_DEBUG, "."); + } + printk(BIOS_DEBUG, "\n\n"); + for (iter = 0; iter < 128; iter++) { + if (dqs_write_results_array[0][lane][iter]) + printk(BIOS_DEBUG, "+"); + else + printk(BIOS_DEBUG, "."); + } + printk(BIOS_DEBUG, "\n"); + for (iter = 0; iter < 128; iter++) { + if (dqs_write_results_array[1][lane][iter]) + printk(BIOS_DEBUG, "+"); + else + printk(BIOS_DEBUG, "."); + } + printk(BIOS_DEBUG, "\n\n"); +#endif + + /* Base rank of single-rank DIMM, or odd rank of dual-rank DIMM */ + if (dual_rank) { + /* Intersect the passing windows of both ranks */ + for (iter = 0; iter < 64; iter++) + if (!dqs_read_results_array[1][lane][iter]) + dqs_read_results_array[0][lane][iter] = 0; + for (iter = 0; iter < 128; iter++) + if (!dqs_write_results_array[1][lane][iter]) + dqs_write_results_array[0][lane][iter] = 0; + } + + /* Determine location and length of longest consecutive string of passing values for read DQS timing + * Output is stored in best_pos and best_count + */ + last_pos = 0; + cur_count = 0; + best_pos = 0; + best_count = 0; + for (iter = 0; iter < 64; iter++) { + if ((dqs_read_results_array[0][lane][iter]) && (iter < 63)) { + /* Pass */ + cur_count++; + } else { + /* Failure or end of loop */ + if (cur_count > best_count) { + best_count = cur_count; + best_pos = last_pos; + } + cur_count = 0; + last_pos = iter; + } + } + print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 144 largest read passing region ", best_count, 4); + if (best_count > 0) { + if (best_count < MIN_DQS_WNDW) { + /* Flag excessively small passing window */ + Errors |= 1 << SB_SMALLDQS; + } + + /* Find the center of the passing window */ + current_read_dqs_delay[lane] = (best_pos + (best_count / 2)); + + /* Commit the current Read DQS Timing Control settings to the hardware registers */ + write_dqs_read_data_timing_registers(current_read_dqs_delay, dev, (Receiver >> 1), index_reg); + + /* Save the final Read DQS Timing Control settings for later use */ + pDCTstat->CH_D_DIR_B_DQS[Channel][Receiver >> 1][DQS_READDIR][lane] = current_read_dqs_delay[lane]; + } else { + print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 122 Unable to find read passing region for lane ", lane, 2); + + /* Flag absence of passing window */ + Errors |= 1 << SB_NODQSPOS; + } + + /* Determine location and length of longest consecutive string of passing values for write DQS timing + * Output is stored in best_pos and best_count + */ + last_pos = 0; + cur_count = 0; + best_pos = 0; + best_count = 0; + for (iter = 0; iter < 128; iter++) { + if ((dqs_write_results_array[0][lane][iter]) && (iter < 127)) { + /* Pass */ + cur_count++; + } else { + /* Failure or end of loop */ + if (cur_count > best_count) { + best_count = cur_count; + best_pos = last_pos; + } + cur_count = 0; + last_pos = iter; + } + } + print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 145 largest write passing region ", best_count, 4); + if (best_count > 0) { + if (best_count < MIN_DQS_WNDW) { + /* Flag excessively small passing window */ + Errors |= 1 << SB_SMALLDQS; + } + + /* Find the center of the passing window */ + current_write_dqs_delay[lane] = (best_pos + (best_count / 2)); + + /* Commit the current Write Data Timing settings to the hardware registers */ + write_dqs_write_data_timing_registers(current_write_dqs_delay, dev, (Receiver >> 1), index_reg); + + /* Save the final Write Data Timing settings for later use */ + pDCTstat->CH_D_DIR_B_DQS[Channel][Receiver >> 1][DQS_WRITEDIR][lane] = current_write_dqs_delay[lane]; + } else { + print_debug_dqs("\t\t\t\tTrainDQSRdWrPos: 123 Unable to find write passing region for lane ", lane, 2); + + /* Flag absence of passing window */ + Errors |= 1 << SB_NODQSPOS; + } + } + } - print_debug_dqs("\tTrainDQSRdWrPos: 231 DQSWrDelay ", DQSWrDelay, 1); - TrainWriteDQS_D(pMCTstat, pDCTstat, cs_start); } - print_debug_dqs("\tTrainDQSRdWrPos: 232 Errors ", Errors, 1); - pDCTstat->ErrStatus |= Errors; } + pDCTstat->TrainErrors |= Errors; + pDCTstat->ErrStatus |= Errors; + #if DQS_TRAIN_DEBUG > 0 { u8 val; u8 i; - u8 Channel, Receiver, Dir; + u8 ChannelDTD, ReceiverDTD, Dir; u8 *p; for (Dir = 0; Dir < 2; Dir++) { @@ -375,14 +749,14 @@ static void TrainDQSRdWrPos_D(struct MCTStatStruc *pMCTstat, } else { printk(BIOS_DEBUG, "TrainDQSRdWrPos: CH_D_DIR_B_DQS RD:\n"); } - for (Channel = 0; Channel < 2; Channel++) { - printk(BIOS_DEBUG, "Channel: %02x\n", Channel); - for (Receiver = cs_start; Receiver < (cs_start + 2); Receiver += 2) { - printk(BIOS_DEBUG, "\t\tReceiver: %02x: ", Receiver); - p = pDCTstat->CH_D_DIR_B_DQS[Channel][Receiver >> 1][Dir]; + for (ChannelDTD = 0; ChannelDTD < 2; ChannelDTD++) { + printk(BIOS_DEBUG, "Channel: %02x\n", ChannelDTD); + for (ReceiverDTD = 0; ReceiverDTD < MAX_CS_SUPPORTED; ReceiverDTD += 2) { + printk(BIOS_DEBUG, "\t\tReceiver: %02x:", ReceiverDTD); + p = pDCTstat->CH_D_DIR_B_DQS[ChannelDTD][ReceiverDTD >> 1][Dir]; for (i=0;i<8; i++) { val = p[i]; - printk(BIOS_DEBUG, "%02x ", val); + printk(BIOS_DEBUG, " %02x", val); } printk(BIOS_DEBUG, "\n"); } @@ -437,225 +811,6 @@ static void SetupDqsPattern_D(struct MCTStatStruc *pMCTstat, pDCTstat->PtrPatternBufA = (u32)buf; } -static void TrainDQSPos_D(struct MCTStatStruc *pMCTstat, - struct DCTStatStruc *pDCTstat, - u8 cs_start) -{ - u32 Errors; - u8 ChipSel, DQSDelay; - u8 RnkDlySeqPassMin=0, RnkDlySeqPassMax=0xFF, RnkDlyFilterMin=0, RnkDlyFilterMax=0xFF; - u8 RnkDlySeqPassMinTot=0, RnkDlySeqPassMaxTot=0xFF, RnkDlyFilterMinTot=0, RnkDlyFilterMaxTot=0xFF; - u8 LastTest ,LastTestTot; - u32 TestAddr; - u8 ByteLane; - u8 MutualCSPassW[128]; - u8 BanksPresent; - u8 dqsDelay_end; - u8 tmp, valid, tmp1; - u16 word; - - /* MutualCSPassW: each byte represents a bitmap of pass/fail per - * ByteLane. The indext within MutualCSPassW is the delay value - * given the results. - */ - print_debug_dqs("\t\t\tTrainDQSPos begin ", 0, 3); - - Errors = 0; - BanksPresent = 0; - - dqsDelay_end = 32; - /* Bitmapped status per delay setting, 0xff=All positions - * passing (1= PASS). Set the entire array. - */ - for (DQSDelay=0; DQSDelay<128; DQSDelay++) { - MutualCSPassW[DQSDelay] = 0xFF; - } - - for (ChipSel = cs_start; ChipSel < (cs_start + 2); ChipSel++) { /* logical register chipselects 0..7 */ - print_debug_dqs("\t\t\t\tTrainDQSPos: 11 ChipSel ", ChipSel, 4); - - if (!mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, pDCTstat->Channel, ChipSel)) { - print_debug_dqs("\t\t\t\tmct_RcvrRankEnabled_D CS not enabled ", ChipSel, 4); - continue; - } - - BanksPresent = 1; /* flag for at least one bank is present */ - TestAddr = mct_GetMCTSysAddr_D(pMCTstat, pDCTstat, pDCTstat->Channel, ChipSel, &valid); - if (!valid) { - print_debug_dqs("\t\t\t\tAddress not supported on current CS ", TestAddr, 4); - continue; - } - - print_debug_dqs("\t\t\t\tTrainDQSPos: 12 TestAddr ", TestAddr, 4); - SetUpperFSbase(TestAddr); /* fs:eax=far ptr to target */ - - if (pDCTstat->Direction == DQS_READDIR) { - print_debug_dqs("\t\t\t\tTrainDQSPos: 13 for read ", 0, 4); - WriteDQSTestPattern_D(pMCTstat, pDCTstat, TestAddr << 8); - } - - for (DQSDelay = 0; DQSDelay < dqsDelay_end; DQSDelay++) { - print_debug_dqs("\t\t\t\t\tTrainDQSPos: 141 DQSDelay ", DQSDelay, 5); - - tmp = 0xFF; - tmp1 = DQSDelay; - if (pDCTstat->Direction == DQS_READDIR) { - tmp &= MutualCSPassW[DQSDelay]; - tmp1 += dqsDelay_end; - } - tmp &= MutualCSPassW[tmp1]; - - if (tmp == 0) { - continue;/* skip current delay value if other chipselects have failed all 8 bytelanes */ - } - - pDCTstat->DQSDelay = DQSDelay; - mct_SetDQSDelayAllCSR_D(pMCTstat, pDCTstat, cs_start); - print_debug_dqs("\t\t\t\t\tTrainDQSPos: 142 MutualCSPassW ", MutualCSPassW[DQSDelay], 5); - - if (pDCTstat->Direction == DQS_WRITEDIR) { - print_debug_dqs("\t\t\t\t\tTrainDQSPos: 143 for write", 0, 5); - WriteDQSTestPattern_D(pMCTstat, pDCTstat, TestAddr << 8); - } - - print_debug_dqs("\t\t\t\t\tTrainDQSPos: 144 Pattern ", pDCTstat->Pattern, 5); - ReadDQSTestPattern_D(pMCTstat, pDCTstat, TestAddr << 8); - /* print_debug_dqs("\t\t\t\t\tTrainDQSPos: 145 MutualCSPassW ", MutualCSPassW[DQSDelay], 5); */ - word = CompareDQSTestPattern_D(pMCTstat, pDCTstat, TestAddr << 8); /* 0=fail, 1=pass */ - print_debug_dqs("\t\t\t\t\tTrainDQSPos: 144 compare 1 ", word, 3); - - print_debug_dqs("\t\t\t\t\tTrainDQSPos: 144 DqsRdWrPos_Saved ", pDCTstat->DqsRdWrPos_Saved, 3); - word &= ~(pDCTstat->DqsRdWrPos_Saved); /* mask out bytelanes that already passed */ - word &= ~(pDCTstat->DqsRdWrPos_Saved << 8); - print_debug_dqs("\t\t\t\t\tTrainDQSPos: 144 compare 2 ", word, 3); - - tmp = DQSDelay; - if (pDCTstat->Direction == DQS_READDIR) { - MutualCSPassW[tmp] &= word >> 8; - tmp += dqsDelay_end; - } - MutualCSPassW[tmp] &= word & 0xFF; - - print_debug_dqs("\t\t\t\t\tTrainDQSPos: 146 \tMutualCSPassW ", MutualCSPassW[DQSDelay], 5); - - SetTargetWTIO_D(TestAddr); - FlushDQSTestPattern_D(pDCTstat, TestAddr << 8); - ResetTargetWTIO_D(); - } - - } - - if (pDCTstat->Direction == DQS_READDIR) { - dqsDelay_end <<= 1; - } - - if (BanksPresent) { - #if 0 /* show the bitmap */ - for (ByteLane = 0; ByteLane < 8; ByteLane++) { /* just print ByteLane 0 */ - for (DQSDelay = 0; DQSDelay < dqsDelay_end; DQSDelay++) { - if (!(MutualCSPassW[DQSDelay] &(1 << ByteLane))) { - printk(BIOS_DEBUG, "."); - } else { - printk(BIOS_DEBUG, "*"); - } - } - printk(BIOS_DEBUG, "\n"); - } - #endif - for (ByteLane = 0; ByteLane < 8; ByteLane++) { - print_debug_dqs("\t\t\t\tTrainDQSPos: 31 ByteLane ",ByteLane, 4); - if (!(pDCTstat->DqsRdWrPos_Saved &(1 << ByteLane))) { - pDCTstat->ByteLane = ByteLane; - LastTest = DQS_FAIL; /* Analyze the results */ - LastTestTot = DQS_FAIL; - /* RnkDlySeqPassMin = 0; */ - /* RnkDlySeqPassMax = 0; */ - RnkDlyFilterMax = 0; - RnkDlyFilterMin = 0; - RnkDlyFilterMaxTot = 0; - RnkDlyFilterMinTot = 0; - for (DQSDelay = 0; DQSDelay < dqsDelay_end; DQSDelay++) { - if (MutualCSPassW[DQSDelay] & (1 << ByteLane)) { - print_debug_dqs("\t\t\t\t\tTrainDQSPos: 321 DQSDelay ", DQSDelay, 5); - print_debug_dqs("\t\t\t\t\tTrainDQSPos: 322 MutualCSPassW ", MutualCSPassW[DQSDelay], 5); - if (pDCTstat->Direction == DQS_READDIR) - tmp = 0x20; - else - tmp = 0; - if (DQSDelay >= tmp) { - RnkDlySeqPassMax = DQSDelay; - if (LastTest == DQS_FAIL) { - RnkDlySeqPassMin = DQSDelay; /* start sequential run */ - } - if ((RnkDlySeqPassMax - RnkDlySeqPassMin)>(RnkDlyFilterMax-RnkDlyFilterMin)){ - RnkDlyFilterMin = RnkDlySeqPassMin; - RnkDlyFilterMax = RnkDlySeqPassMax; - } - LastTest = DQS_PASS; - } - - if (pDCTstat->Direction == DQS_READDIR) { - RnkDlySeqPassMaxTot = DQSDelay; - if (LastTestTot == DQS_FAIL) - RnkDlySeqPassMinTot = DQSDelay; - if ((RnkDlySeqPassMaxTot - RnkDlySeqPassMinTot)>(RnkDlyFilterMaxTot-RnkDlyFilterMinTot)){ - RnkDlyFilterMinTot = RnkDlySeqPassMinTot; - RnkDlyFilterMaxTot = RnkDlySeqPassMaxTot; - } - LastTestTot = DQS_PASS; - } - } else { - LastTest = DQS_FAIL; - LastTestTot = DQS_FAIL; - } - } - print_debug_dqs("\t\t\t\tTrainDQSPos: 33 RnkDlySeqPassMax ", RnkDlySeqPassMax, 4); - if (RnkDlySeqPassMax == 0) { - Errors |= 1 << SB_NODQSPOS; /* no passing window */ - } else { - print_debug_dqs_pair("\t\t\t\tTrainDQSPos: 34 RnkDlyFilter: ", RnkDlyFilterMin, " ", RnkDlyFilterMax, 4); - if (((RnkDlyFilterMax - RnkDlyFilterMin) < MIN_DQS_WNDW)){ - Errors |= 1 << SB_SMALLDQS; - } else { - u8 middle_dqs; - /* mctEngDQSwindow_Save_D Not required for arrays */ - if (pDCTstat->Direction == DQS_READDIR) - middle_dqs = MiddleDQS_D(RnkDlyFilterMinTot, RnkDlyFilterMaxTot); - else - middle_dqs = MiddleDQS_D(RnkDlyFilterMin, RnkDlyFilterMax); - pDCTstat->DQSDelay = middle_dqs; - mct_SetDQSDelayCSR_D(pMCTstat, pDCTstat, cs_start); /* load the register with the value */ - if (pDCTstat->Direction == DQS_READDIR) - StoreWrRdDQSDatStrucVal_D(pMCTstat, pDCTstat, cs_start, RnkDlyFilterMinTot, RnkDlyFilterMaxTot); /* store the value into the data structure */ - else - StoreWrRdDQSDatStrucVal_D(pMCTstat, pDCTstat, cs_start, RnkDlyFilterMin, RnkDlyFilterMax); /* store the value into the data structure */ - print_debug_dqs("\t\t\t\tTrainDQSPos: 42 middle_dqs : ",middle_dqs, 4); - pDCTstat->DqsRdWrPos_Saved |= 1 << ByteLane; - } - } - } - } /* if (pDCTstat->DqsRdWrPos_Saved &(1 << ByteLane)) */ - } -/* skipLocMiddle: */ - pDCTstat->TrainErrors = Errors; - - print_debug_dqs("\t\t\tTrainDQSPos: Errors ", Errors, 3); -} - -static void mctEngDQSwindow_Save_D(struct MCTStatStruc *pMCTstat, - struct DCTStatStruc *pDCTstat, u8 ChipSel, - u8 RnkDlyFilterMin, u8 RnkDlyFilterMax) -{ - pDCTstat->CH_D_DIR_MaxMin_B_Dly[pDCTstat->Channel] - [pDCTstat->Direction] - [0] - [pDCTstat->ByteLane] = RnkDlyFilterMin; - pDCTstat->CH_D_DIR_MaxMin_B_Dly[pDCTstat->Channel] - [pDCTstat->Direction] - [1] - [pDCTstat->ByteLane] = RnkDlyFilterMax; -} - static void StoreDQSDatStrucVal_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 ChipSel) { @@ -679,26 +834,6 @@ static void StoreDQSDatStrucVal_D(struct MCTStatStruc *pMCTstat, pDCTstat->DQSDelay; } -static void StoreWrRdDQSDatStrucVal_D(struct MCTStatStruc *pMCTstat, - struct DCTStatStruc *pDCTstat, u8 ChipSel, - u8 RnkDlyFilterMin, u8 RnkDlyFilterMax) -{ - u8 dn; - - if (pDCTstat->Direction == DQS_WRITEDIR) { - dn = ChipSel >> 1; - RnkDlyFilterMin += pDCTstat->CH_D_B_TxDqs[pDCTstat->Channel][dn][pDCTstat->ByteLane]; - RnkDlyFilterMax += pDCTstat->CH_D_B_TxDqs[pDCTstat->Channel][dn][pDCTstat->ByteLane]; - pDCTstat->DQSDelay += pDCTstat->CH_D_B_TxDqs[pDCTstat->Channel][dn][pDCTstat->ByteLane]; - } else { - RnkDlyFilterMin <<= 1; - RnkDlyFilterMax <<= 1; - pDCTstat->DQSDelay <<= 1; - } - mctEngDQSwindow_Save_D(pMCTstat, pDCTstat, ChipSel, RnkDlyFilterMin, RnkDlyFilterMax); - StoreDQSDatStrucVal_D(pMCTstat, pDCTstat, ChipSel); -} - static void GetDQSDatStrucVal_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 ChipSel) { @@ -720,33 +855,6 @@ static void GetDQSDatStrucVal_D(struct MCTStatStruc *pMCTstat, /* FindDQSDatDimmVal_D is not required since we use an array */ -static u8 MiddleDQS_D(u8 min, u8 max) -{ - u8 size; - size = max-min; - if (size % 2) - size++; /* round up if the size isn't even. */ - return ( min + (size >> 1)); -} - -static void TrainReadDQS_D(struct MCTStatStruc *pMCTstat, - struct DCTStatStruc *pDCTstat, - u8 cs_start) -{ - print_debug_dqs("\t\tTrainReadPos ", 0, 2); - pDCTstat->Direction = DQS_READDIR; - TrainDQSPos_D(pMCTstat, pDCTstat, cs_start); -} - -static void TrainWriteDQS_D(struct MCTStatStruc *pMCTstat, - struct DCTStatStruc *pDCTstat, - u8 cs_start) -{ - pDCTstat->Direction = DQS_WRITEDIR; - print_debug_dqs("\t\tTrainWritePos", 0, 2); - TrainDQSPos_D(pMCTstat, pDCTstat, cs_start); -} - static void proc_IOCLFLUSH_D(u32 addr_hi) { SetTargetWTIO_D(addr_hi); @@ -963,30 +1071,6 @@ static void ResetTargetWTIO_D(void) _WRMSR(0xc0010017, lo, hi); /* IORR0 Mask */ } -static void ReadDQSTestPattern_D(struct MCTStatStruc *pMCTstat, - struct DCTStatStruc *pDCTstat, - u32 TestAddr_lo) -{ - /* Read a pattern of 72 bit times (per DQ), to test dram functionality. - * The pattern is a stress pattern which exercises both ISI and - * crosstalk. The number of cache lines to fill is dependent on DCT - * width mode and burstlength. - * Mode BL Lines Pattern no. - * ----+---+------------------- - * 64 4 9 0 - * 64 8 9 0 - * 64M 4 9 0 - * 64M 8 9 0 - * 128 4 18 1 - * 128 8 N/A - - */ - if (pDCTstat->Pattern == 0) - ReadL9TestPattern(TestAddr_lo); - else - ReadL18TestPattern(TestAddr_lo); - _MFENCE; -} - u32 SetUpperFSbase(u32 addr_hi) { /* Set the upper 32-bits of the Base address, 4GB aligned) for the @@ -1009,8 +1093,6 @@ void ResetDCTWrPtr_D(u32 dev, u32 index_reg, u32 index) Set_NB32_index_wait(dev, index_reg, index, val); } -/* mctEngDQSwindow_Save_D not required with arrays */ - void mct_TrainDQSPos_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA) { @@ -1021,8 +1103,8 @@ void mct_TrainDQSPos_D(struct MCTStatStruc *pMCTstat, for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) { pDCTstat = pDCTstatA + Node; if (pDCTstat->DCTSysLimit) { + TrainDQSRdWrPos_D(pMCTstat, pDCTstat); for (ChipSel = 0; ChipSel < MAX_CS_SUPPORTED; ChipSel += 2) { - TrainDQSRdWrPos_D(pMCTstat, pDCTstat, ChipSel); SetEccDQSRdWrPos_D(pMCTstat, pDCTstat, ChipSel); } } @@ -1137,27 +1219,6 @@ static void mct_SetDQSDelayCSR_D(struct MCTStatStruc *pMCTstat, } } -/* - * mct_SetDQSDelayAllCSR_D: - * Write the Delay value to all eight byte lanes. - */ -static void mct_SetDQSDelayAllCSR_D(struct MCTStatStruc *pMCTstat, - struct DCTStatStruc *pDCTstat, - u8 cs_start) -{ - u8 ByteLane; - u8 ChipSel = cs_start; - - for (ChipSel = cs_start; ChipSel < (cs_start + 2); ChipSel++) { - if ( mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, pDCTstat->Channel, ChipSel)) { - for (ByteLane = 0; ByteLane < 8; ByteLane++) { - pDCTstat->ByteLane = ByteLane; - mct_SetDQSDelayCSR_D(pMCTstat, pDCTstat, ChipSel); - } - } - } -} - u8 mct_RcvrRankEnabled_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 Channel, u8 ChipSel) @@ -1196,7 +1257,7 @@ u32 mct_GetMCTSysAddr_D(struct MCTStatStruc *pMCTstat, reg = 0x40 + (receiver << 2) + reg_off; val = Get_NB32(dev, reg); - val &= ~0x0F; + val &= ~0xe007c01f; /* unganged mode DCT0+DCT1, sys addr of DCT1=node * base+DctSelBaseAddr+local ca base*/ @@ -1277,6 +1338,7 @@ exitGetAddrWNoError: print_debug_dqs("mct_GetMCTSysAddr_D: base_addr ", val, 2); print_debug_dqs("mct_GetMCTSysAddr_D: valid ", *valid, 2); print_debug_dqs("mct_GetMCTSysAddr_D: status ", pDCTstat->Status, 2); + print_debug_dqs("mct_GetMCTSysAddr_D: SysBase ", pDCTstat->DCTSysBase, 2); print_debug_dqs("mct_GetMCTSysAddr_D: HoleBase ", pDCTstat->DCTHoleBase, 2); print_debug_dqs("mct_GetMCTSysAddr_D: Cachetop ", pMCTstat->Sub4GCacheTop, 2); diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mcthwl.c b/src/northbridge/amd/amdmct/mct_ddr3/mcthwl.c index 528c782394..60bc01d5fa 100644 --- a/src/northbridge/amd/amdmct/mct_ddr3/mcthwl.c +++ b/src/northbridge/amd/amdmct/mct_ddr3/mcthwl.c @@ -2,6 +2,7 @@ * This file is part of the coreboot project. * * Copyright (C) 2010 Advanced Micro Devices, Inc. + * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -25,7 +26,6 @@ static void EnableZQcalibration(struct MCTStatStruc *pMCTstat, struct DCTStatStr static void DisableZQcalibration(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat); static void PrepareC_MCT(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat); static void PrepareC_DCT(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 dct); -static void MultiplyDelay(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 dct); static void Restore_OnDimmMirror(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat); static void Clear_OnDimmMirror(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat); @@ -154,7 +154,6 @@ static void PhyWLPass2(struct MCTStatStruc *pMCTstat, Clear_OnDimmMirror(pMCTstat, pDCTstat); SetDllSpeedUp_D(pMCTstat, pDCTstat, dct); DisableAutoRefresh_D(pMCTstat, pDCTstat); - MultiplyDelay(pMCTstat, pDCTstat, dct); for (dimm = 0; dimm < MAX_DIMMS_SUPPORTED; dimm ++) { if (DIMMValid & (1 << (dimm << 1))) AgesaHwWlPhase1(pDCTstat->C_MCTPtr, pDCTstat->C_DCTPtr[dct], dimm, SecondPass); @@ -162,6 +161,9 @@ static void PhyWLPass2(struct MCTStatStruc *pMCTstat, } } +/* Write Levelization Training + * Algorithm detailed in the Fam10h BKDG Rev. 3.62 section 2.8.9.9.1 + */ static void WriteLevelization_HW(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat) { diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctmtr_d.c b/src/northbridge/amd/amdmct/mct_ddr3/mctmtr_d.c index 3d625dec78..6dac0aeb15 100644 --- a/src/northbridge/amd/amdmct/mct_ddr3/mctmtr_d.c +++ b/src/northbridge/amd/amdmct/mct_ddr3/mctmtr_d.c @@ -2,6 +2,7 @@ * This file is part of the coreboot project. * * Copyright (C) 2010 Advanced Micro Devices, Inc. + * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -201,12 +202,13 @@ static void SetMTRRrange_D(u32 Base, u32 *pLimit, u32 *pMtrrAddr, u16 MtrrType) void UMAMemTyping_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA) { -/* UMA memory size may need splitting the MTRR configuration into two - Before training use NB_BottomIO or the physical memory size to set the MTRRs. - After training, add UMAMemTyping function to reconfigure the MTRRs based on - NV_BottomUMA (for UMA systems only). - This two-step process allows all memory to be cached for training -*/ + /* UMA memory size may need splitting the MTRR configuration into two + * Before training use NB_BottomIO or the physical memory size to set the MTRRs. + * After training, add UMAMemTyping function to reconfigure the MTRRs based on + * NV_BottomUMA (for UMA systems only). + * This two-step process allows all memory to be cached for training + */ + u32 Bottom32bIO, Cache32bTOP; u32 val; u32 addr; diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctndi_d.c b/src/northbridge/amd/amdmct/mct_ddr3/mctndi_d.c index 013a1b9e86..6f9706132b 100644 --- a/src/northbridge/amd/amdmct/mct_ddr3/mctndi_d.c +++ b/src/northbridge/amd/amdmct/mct_ddr3/mctndi_d.c @@ -2,6 +2,7 @@ * This file is part of the coreboot project. * * Copyright (C) 2010 Advanced Micro Devices, Inc. + * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -140,7 +141,7 @@ void InterleaveNodes_D(struct MCTStatStruc *pMCTstat, } if (DoIntlv) { - MCTMemClr_D(pMCTstat,pDCTstatA); + MCTMemClr_D(pMCTstat, pDCTstatA); /* Program Interleaving enabled on Node 0 map only.*/ MemSize0 <<= bsf(Nodes); /* MemSize=MemSize*2 (or 4, or 8) */ Dct0MemSize <<= bsf(Nodes); diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctproc.c b/src/northbridge/amd/amdmct/mct_ddr3/mctproc.c index da2f3724b9..37e125e972 100644 --- a/src/northbridge/amd/amdmct/mct_ddr3/mctproc.c +++ b/src/northbridge/amd/amdmct/mct_ddr3/mctproc.c @@ -2,6 +2,7 @@ * This file is part of the coreboot project. * * Copyright (C) 2010 Advanced Micro Devices, Inc. + * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -36,10 +37,10 @@ u32 mct_SetDramConfigMisc2(struct DCTStatStruc *pDCTstat, u8 dct, u32 misc2) val = Get_NB32(pDCTstat->dev_dct, dct * 0x100 + 0x78); val &= 7; - val = ((~val) & 0xFF) + 1; + val = ((~val) & 0xff) + 1; val += 6; - val &= 0xFF; - misc2 &= 0xFFF8FFFF; + val &= 0x7; + misc2 &= 0xfff8ffff; misc2 |= val << 16; /* DataTxFifoWrDly */ if (pDCTstat->LogicalCPUID & AMD_DR_Dx) misc2 |= 1 << 7; /* ProgOdtEn */ @@ -52,11 +53,13 @@ void mct_ExtMCTConfig_Cx(struct DCTStatStruc *pDCTstat) u32 val; if (pDCTstat->LogicalCPUID & (AMD_DR_Cx)) { - Set_NB32(pDCTstat->dev_dct, 0x11C, 0x0CE00FC0 | 1 << 29/* FlushWrOnStpGnt */); + /* Revision C */ + Set_NB32(pDCTstat->dev_dct, 0x11c, 0x0ce00fc0 | 1 << 29/* FlushWrOnStpGnt */); - val = Get_NB32(pDCTstat->dev_dct, 0x1B0); - val &= 0xFFFFF8C0; + val = Get_NB32(pDCTstat->dev_dct, 0x1b0); + val &= ~0x73f; val |= 0x101; /* BKDG recommended settings */ - Set_NB32(pDCTstat->dev_dct, 0x1B0, val); + + Set_NB32(pDCTstat->dev_dct, 0x1b0, val); } } diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctsdi.c b/src/northbridge/amd/amdmct/mct_ddr3/mctsdi.c index 6de2f4eee1..b21b96a641 100644 --- a/src/northbridge/amd/amdmct/mct_ddr3/mctsdi.c +++ b/src/northbridge/amd/amdmct/mct_ddr3/mctsdi.c @@ -2,6 +2,7 @@ * This file is part of the coreboot project. * * Copyright (C) 2010 Advanced Micro Devices, Inc. + * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -172,6 +173,7 @@ static u32 mct_MR1(struct MCTStatStruc *pMCTstat, ret |= 1 << 11; } + /* program MrsAddress[12]=QOFF: based on F2x[1,0]84[Qoff] */ if (dword & (1 << 13)) ret |= 1 << 12; @@ -199,7 +201,8 @@ static u32 mct_MR0(struct MCTStatStruc *pMCTstat, /* program MrsAddress[6:4,2]=read CAS latency (CL):based on F2x[1,0]88[Tcl] */ dword2 = Get_NB32(dev, reg_off + 0x88); - ret |= (dword2 & 0xF) << 4; /* F2x88[3:0] to MrsAddress[6:4,2]=xxx0b */ + ret |= (dword2 & 0x7) << 4; /* F2x88[2:0] to MrsAddress[6:4] */ + ret |= ((dword2 & 0x8) >> 3) << 2; /* F2x88[3] to MrsAddress[2] */ /* program MrsAddress[12]=0 (PPD):slow exit */ if (dword & (1 << 23)) diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctsrc.c b/src/northbridge/amd/amdmct/mct_ddr3/mctsrc.c index 8e5c268586..91e8f777c3 100644 --- a/src/northbridge/amd/amdmct/mct_ddr3/mctsrc.c +++ b/src/northbridge/amd/amdmct/mct_ddr3/mctsrc.c @@ -2,6 +2,7 @@ * This file is part of the coreboot project. * * Copyright (C) 2010 Advanced Micro Devices, Inc. + * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -24,25 +25,13 @@ static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 Pass); -static u8 mct_SavePassRcvEnDly_D(struct DCTStatStruc *pDCTstat, - u8 rcvrEnDly, u8 Channel, - u8 receiver, u8 Pass); -static u8 mct_CompareTestPatternQW0_D(struct MCTStatStruc *pMCTstat, - struct DCTStatStruc *pDCTstat, - u32 addr, u8 channel, - u8 pattern, u8 Pass); static void mct_InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat); static void InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 Channel); static void CalcEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 Channel); -static void mct_SetFinalRcvrEnDly_D(struct DCTStatStruc *pDCTstat, - u8 RcvrEnDly, u8 where, - u8 Channel, u8 Receiver, - u32 dev, u32 index_reg, - u8 Addl_Index, u8 Pass); -static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u8 DQSRcvEnDly); +static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u16 DQSRcvEnDly); static void fenceDynTraining_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 dct); static void mct_DisableDQSRcvEn_D(struct DCTStatStruc *pDCTstat); @@ -50,17 +39,17 @@ static void mct_DisableDQSRcvEn_D(struct DCTStatStruc *pDCTstat); /* Warning: These must be located so they do not cross a logical 16-bit segment boundary! */ static const u32 TestPattern0_D[] = { - 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, - 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, - 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, - 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, -}; -static const u32 TestPattern1_D[] = { 0x55555555, 0x55555555, 0x55555555, 0x55555555, 0x55555555, 0x55555555, 0x55555555, 0x55555555, 0x55555555, 0x55555555, 0x55555555, 0x55555555, 0x55555555, 0x55555555, 0x55555555, 0x55555555, }; +static const u32 TestPattern1_D[] = { + 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, + 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, + 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, + 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, +}; static const u32 TestPattern2_D[] = { 0x12345678, 0x87654321, 0x23456789, 0x98765432, 0x59385824, 0x30496724, 0x24490795, 0x99938733, @@ -104,16 +93,87 @@ void mct_TrainRcvrEn_D(struct MCTStatStruc *pMCTstat, dqsTrainRcvrEn_SW(pMCTstat, pDCTstat, Pass); } +static void read_dqs_write_timing_control_registers(uint16_t* current_total_delay, uint32_t dev, uint8_t dimm, uint32_t index_reg) +{ + uint8_t lane; + uint32_t dword; + + for (lane = 0; lane < MAX_BYTE_LANES; lane++) { + uint32_t wdt_reg; + if ((lane == 0) || (lane == 1)) + wdt_reg = 0x30; + if ((lane == 2) || (lane == 3)) + wdt_reg = 0x31; + if ((lane == 4) || (lane == 5)) + wdt_reg = 0x40; + if ((lane == 6) || (lane == 7)) + wdt_reg = 0x41; + if (lane == 8) + wdt_reg = 0x32; + wdt_reg += dimm * 3; + dword = Get_NB32_index_wait(dev, index_reg, wdt_reg); + if ((lane == 7) || (lane == 5) || (lane == 3) || (lane == 1)) + current_total_delay[lane] = (dword & 0x00ff0000) >> 16; + if ((lane == 8) || (lane == 6) || (lane == 4) || (lane == 2) || (lane == 0)) + current_total_delay[lane] = dword & 0x000000ff; + } +} + +static void write_dqs_receiver_enable_control_registers(uint16_t* current_total_delay, uint32_t dev, uint8_t dimm, uint32_t index_reg) +{ + uint8_t lane; + uint32_t dword; + + for (lane = 0; lane < 8; lane++) { + uint32_t ret_reg; + if ((lane == 0) || (lane == 1)) + ret_reg = 0x10; + if ((lane == 2) || (lane == 3)) + ret_reg = 0x11; + if ((lane == 4) || (lane == 5)) + ret_reg = 0x20; + if ((lane == 6) || (lane == 7)) + ret_reg = 0x21; + ret_reg += dimm * 3; + dword = Get_NB32_index_wait(dev, index_reg, ret_reg); + if ((lane == 7) || (lane == 5) || (lane == 3) || (lane == 1)) { + dword &= ~(0x1ff << 16); + dword |= (current_total_delay[lane] & 0x1ff) << 16; + } + if ((lane == 6) || (lane == 4) || (lane == 2) || (lane == 0)) { + dword &= ~0x1ff; + dword |= current_total_delay[lane] & 0x1ff; + } + Set_NB32_index_wait(dev, index_reg, ret_reg, dword); + } +} + +static uint32_t convert_testaddr_and_channel_to_address(struct DCTStatStruc *pDCTstat, uint32_t testaddr, uint8_t channel) +{ + SetUpperFSbase(testaddr); + testaddr <<= 8; + + if((pDCTstat->Status & (1<<SB_128bitmode)) && channel ) { + testaddr += 8; /* second channel */ + } + + return testaddr; +} + +/* DQS Receiver Enable Training + * Algorithm detailed in the Fam10h BKDG Rev. 3.62 section 2.8.9.9.2 + */ static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 Pass) { - u8 Channel, RcvrEnDly, RcvrEnDlyRmin; - u8 Test0, Test1, CurrTest, CurrTestSide0, CurrTestSide1; - u8 CTLRMaxDelay, _2Ranks, PatternA, PatternB; + u8 Channel; + u8 _2Ranks; u8 Addl_Index = 0; u8 Receiver; u8 _DisableDramECC = 0, _Wrap32Dis = 0, _SSE2 = 0; - u8 RcvrEnDlyLimit, Final_Value, MaxDelay_CH[2]; + u8 Final_Value; + u16 CTLRMaxDelay; + u16 MaxDelay_CH[2]; u32 TestAddr0, TestAddr1, TestAddr0B, TestAddr1B; u32 PatternBuffer[64+4]; /* FIXME: need increase 8? */ u32 Errors; @@ -127,9 +187,20 @@ static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat, u32 cr4; u32 lo, hi; + uint32_t dword; + uint8_t rank; + uint8_t lane; + uint16_t current_total_delay[MAX_BYTE_LANES]; + uint16_t candidate_total_delay[8]; + uint8_t data_test_pass_sr[2][8]; /* [rank][lane] */ + uint8_t data_test_pass[8]; /* [lane] */ + uint8_t data_test_pass_prev[8]; /* [lane] */ + uint8_t window_det_toggle[8]; + uint8_t trained[8]; + uint64_t result_qword1; + uint64_t result_qword2; + u8 valid; - u32 tmp; - u8 LastTest; print_debug_dqs("\nTrainRcvEn: Node", pDCTstat->Node_ID, 0); print_debug_dqs("TrainRcvEn: Pass", Pass, 0); @@ -181,33 +252,103 @@ static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat, Errors = 0; dev = pDCTstat->dev_dct; - CTLRMaxDelay = 0; for (Channel = 0; Channel < 2; Channel++) { print_debug_dqs("\tTrainRcvEn51: Node ", pDCTstat->Node_ID, 1); print_debug_dqs("\tTrainRcvEn51: Channel ", Channel, 1); pDCTstat->Channel = Channel; + CTLRMaxDelay = 0; MaxDelay_CH[Channel] = 0; index_reg = 0x98 + 0x100 * Channel; Receiver = mct_InitReceiver_D(pDCTstat, Channel); - /* There are four receiver pairs, loosely associated with chipselects. */ + /* There are four receiver pairs, loosely associated with chipselects. + * This is essentially looping over each DIMM. + */ for (; Receiver < 8; Receiver += 2) { Addl_Index = (Receiver >> 1) * 3 + 0x10; - LastTest = DQS_FAIL; - - /* mct_ModifyIndex_D */ - RcvrEnDlyRmin = RcvrEnDlyLimit = 0xff; print_debug_dqs("\t\tTrainRcvEnd52: index ", Addl_Index, 2); - if(!mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, Receiver)) { + if (!mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, Receiver)) { continue; } + /* Clear data structures */ + for (lane = 0; lane < 8; lane++) { + data_test_pass_prev[lane] = 0; + trained[lane] = 0; + } + + /* 2.8.9.9.2 (1, 6) + * Retrieve gross and fine timing fields from write DQS registers + */ + read_dqs_write_timing_control_registers(current_total_delay, dev, (Receiver >> 1), index_reg); + + /* 2.8.9.9.2 (1) + * Program the Write Data Timing and Write ECC Timing register to + * the values stored in the DQS Write Timing Control register + * for each lane + */ + for (lane = 0; lane < MAX_BYTE_LANES; lane++) { + uint32_t wdt_reg; + + /* Calculate Write Data Timing register location */ + if ((lane == 0) || (lane == 1) || (lane == 2) || (lane == 3)) + wdt_reg = 0x1; + if ((lane == 4) || (lane == 5) || (lane == 6) || (lane == 7)) + wdt_reg = 0x2; + if (lane == 8) + wdt_reg = 0x3; + wdt_reg |= ((Receiver / 2) << 8); + + /* Set Write Data Timing register values */ + dword = Get_NB32_index_wait(dev, index_reg, wdt_reg); + if ((lane == 7) || (lane == 3)) { + dword &= ~(0x7f << 24); + dword |= (current_total_delay[lane] & 0x7f) << 24; + } + if ((lane == 6) || (lane == 2)) { + dword &= ~(0x7f << 16); + dword |= (current_total_delay[lane] & 0x7f) << 16; + } + if ((lane == 5) || (lane == 1)) { + dword &= ~(0x7f << 8); + dword |= (current_total_delay[lane] & 0x7f) << 8; + } + if ((lane == 8) || (lane == 4) || (lane == 0)) { + dword &= ~0x7f; + dword |= current_total_delay[lane] & 0x7f; + } + Set_NB32_index_wait(dev, index_reg, wdt_reg, dword); + } + + /* 2.8.9.9.2 (2) + * Program the Read DQS Timing Control and the Read DQS ECC Timing Control registers + * to 1/2 MEMCLK for all lanes + */ + for (lane = 0; lane < MAX_BYTE_LANES; lane++) { + uint32_t rdt_reg; + if ((lane == 0) || (lane == 1) || (lane == 2) || (lane == 3)) + rdt_reg = 0x5; + if ((lane == 4) || (lane == 5) || (lane == 6) || (lane == 7)) + rdt_reg = 0x6; + if (lane == 8) + rdt_reg = 0x7; + rdt_reg |= ((Receiver / 2) << 8); + if (lane == 8) + dword = 0x0000003f; + else + dword = 0x3f3f3f3f; + Set_NB32_index_wait(dev, index_reg, rdt_reg, dword); + } + + /* 2.8.9.9.2 (3) + * Select two test addresses for each rank present + */ TestAddr0 = mct_GetRcvrSysAddr_D(pMCTstat, pDCTstat, Channel, Receiver, &valid); - if(!valid) { /* Address not supported on current CS */ + if (!valid) { /* Address not supported on current CS */ continue; } @@ -229,171 +370,214 @@ static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat, print_debug_dqs("\t\tTrainRcvEn53: TestAddr1 ", TestAddr1, 2); print_debug_dqs("\t\tTrainRcvEn53: TestAddr1B ", TestAddr1B, 2); - /* - * Get starting RcvrEnDly value + /* 2.8.9.9.2 (4, 5) + * Write 1 cache line of the appropriate test pattern to each test addresse */ - RcvrEnDly = mct_Get_Start_RcvrEnDly_1Pass(Pass); + mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0, 0); /* rank 0 of DIMM, testpattern 0 */ + mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0B, 1); /* rank 0 of DIMM, testpattern 1 */ + if (_2Ranks) { + mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1, 0); /*rank 1 of DIMM, testpattern 0 */ + mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1B, 1); /*rank 1 of DIMM, testpattern 1 */ + } - /* mct_GetInitFlag_D*/ - if (Pass == FirstPass) { - pDCTstat->DqsRcvEn_Pass = 0; - } else { - pDCTstat->DqsRcvEn_Pass=0xFF; +#if DQS_TRAIN_DEBUG > 0 + for (lane = 0; lane < 8; lane++) { + print_debug_dqs("\t\tTrainRcvEn54: lane: ", lane, 2); + print_debug_dqs("\t\tTrainRcvEn54: current_total_delay ", current_total_delay[lane], 2); } - pDCTstat->DqsRcvEn_Saved = 0; +#endif + /* 2.8.9.9.2 (6) + * Write gross and fine timing fields to read DQS registers + */ + write_dqs_receiver_enable_control_registers(current_total_delay, dev, (Receiver >> 1), index_reg); + + /* 2.8.9.9.2 (7) + * Loop over all delay values up to 1 MEMCLK (0x40 delay steps) from the initial delay values + * + * FIXME + * It is not clear if training should be discontinued if any test failures occur in the first + * 1 MEMCLK window, or if it should be discontinued if no successes occur in the first 1 MEMCLK + * window. Therefore, loop over up to 2 MEMCLK (0x80 delay steps) to be on the safe side. + */ + uint16_t current_delay_step; - while(RcvrEnDly < RcvrEnDlyLimit) { /* sweep Delay value here */ - print_debug_dqs("\t\t\tTrainRcvEn541: RcvrEnDly ", RcvrEnDly, 3); + for (current_delay_step = 0; current_delay_step < 0x80; current_delay_step++) { + print_debug_dqs("\t\t\tTrainRcvEn541: current_delay_step ", current_delay_step, 3); - /* callback not required - if(mct_AdjustDelay_D(pDCTstat, RcvrEnDly)) - goto skipDly; + /* 2.8.9.9.2 (7 D) + * Terminate if all lanes are trained */ + uint8_t all_lanes_trained = 1; + for (lane = 0; lane < 8; lane++) + if (!trained[lane]) + all_lanes_trained = 0; - /* Odd steps get another pattern such that even - and odd steps alternate. The pointers to the - patterns will be swaped at the end of the loop - so that they correspond. */ - if(RcvrEnDly & 1) { - PatternA = 1; - PatternB = 0; - } else { - /* Even step */ - PatternA = 0; - PatternB = 1; - } - - mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0, PatternA); /* rank 0 of DIMM, testpattern 0 */ - mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0B, PatternB); /* rank 0 of DIMM, testpattern 1 */ - if(_2Ranks) { - mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1, PatternA); /*rank 1 of DIMM, testpattern 0 */ - mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1B, PatternB); /*rank 1 of DIMM, testpattern 1 */ - } - - mct_SetRcvrEnDly_D(pDCTstat, RcvrEnDly, 0, Channel, Receiver, dev, index_reg, Addl_Index, Pass); - - CurrTest = DQS_FAIL; - CurrTestSide0 = DQS_FAIL; - CurrTestSide1 = DQS_FAIL; - - mct_Read1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0); /*cache fills */ - Test0 = mct_CompareTestPatternQW0_D(pMCTstat, pDCTstat, TestAddr0, Channel, PatternA, Pass);/* ROM vs cache compare */ - proc_IOCLFLUSH_D(TestAddr0); - ResetDCTWrPtr_D(dev, index_reg, Addl_Index); - - print_debug_dqs("\t\t\tTrainRcvEn542: Test0 result ", Test0, 3); - - /* != 0x00 mean pass */ - - if(Test0 == DQS_PASS) { - mct_Read1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0B); /*cache fills */ - /* ROM vs cache compare */ - Test1 = mct_CompareTestPatternQW0_D(pMCTstat, pDCTstat, TestAddr0B, Channel, PatternB, Pass); - proc_IOCLFLUSH_D(TestAddr0B); - ResetDCTWrPtr_D(dev, index_reg, Addl_Index); - - print_debug_dqs("\t\t\tTrainRcvEn543: Test1 result ", Test1, 3); + if (all_lanes_trained) + break; - if(Test1 == DQS_PASS) { - CurrTestSide0 = DQS_PASS; + /* 2.8.9.9.2 (7 A) + * Loop over all ranks + */ + for (rank = 0; rank < (_2Ranks + 1); rank++) { + /* 2.8.9.9.2 (7 A a-d) + * Read the first test address of the current rank + * Store the first data beat for analysis + * Reset read pointer in the DRAM controller FIFO + * Read the second test address of the current rank + * Store the first data beat for analysis + * Reset read pointer in the DRAM controller FIFO + */ + if (rank & 1) { + /* 2.8.9.9.2 (7 D) + * Invert read instructions to alternate data read order on the bus + */ + proc_IOCLFLUSH_D((rank == 0)?TestAddr0B:TestAddr1B); + result_qword2 = read64_fs(convert_testaddr_and_channel_to_address(pDCTstat, (rank == 0)?TestAddr0B:TestAddr1B, Channel)); + write_dqs_receiver_enable_control_registers(current_total_delay, dev, (Receiver >> 1), index_reg); + proc_IOCLFLUSH_D((rank == 0)?TestAddr0:TestAddr1); + result_qword1 = read64_fs(convert_testaddr_and_channel_to_address(pDCTstat, (rank == 0)?TestAddr0:TestAddr1, Channel)); + write_dqs_receiver_enable_control_registers(current_total_delay, dev, (Receiver >> 1), index_reg); + } else { + proc_IOCLFLUSH_D((rank == 0)?TestAddr0:TestAddr1); + result_qword1 = read64_fs(convert_testaddr_and_channel_to_address(pDCTstat, (rank == 0)?TestAddr0:TestAddr1, Channel)); + write_dqs_receiver_enable_control_registers(current_total_delay, dev, (Receiver >> 1), index_reg); + proc_IOCLFLUSH_D((rank == 0)?TestAddr0B:TestAddr1B); + result_qword2 = read64_fs(convert_testaddr_and_channel_to_address(pDCTstat, (rank == 0)?TestAddr0B:TestAddr1B, Channel)); + write_dqs_receiver_enable_control_registers(current_total_delay, dev, (Receiver >> 1), index_reg); } - } - if(_2Ranks) { - mct_Read1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1); /*cache fills */ - /* ROM vs cache compare */ - Test0 = mct_CompareTestPatternQW0_D(pMCTstat, pDCTstat, TestAddr1, Channel, PatternA, Pass); - proc_IOCLFLUSH_D(TestAddr1); - ResetDCTWrPtr_D(dev, index_reg, Addl_Index); - - print_debug_dqs("\t\t\tTrainRcvEn544: Test0 result ", Test0, 3); - - if(Test0 == DQS_PASS) { - mct_Read1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1B); /*cache fills */ - /* ROM vs cache compare */ - Test1 = mct_CompareTestPatternQW0_D(pMCTstat, pDCTstat, TestAddr1B, Channel, PatternB, Pass); - proc_IOCLFLUSH_D(TestAddr1B); - ResetDCTWrPtr_D(dev, index_reg, Addl_Index); - - print_debug_dqs("\t\t\tTrainRcvEn545: Test1 result ", Test1, 3); - if(Test1 == DQS_PASS) { - CurrTestSide1 = DQS_PASS; + /* 2.8.9.9.2 (7 A e) + * Compare both read patterns and flag passing ranks/lanes + */ + uint8_t result_lane_byte1; + uint8_t result_lane_byte2; + for (lane = 0; lane < 8; lane++) { + if (trained[lane] == 1) { +#if DQS_TRAIN_DEBUG > 0 + print_debug_dqs("\t\t\t\t\t\t\t\t lane already trained: ", lane, 4); +#endif + continue; } + + result_lane_byte1 = (result_qword1 >> (lane * 8)) & 0xff; + result_lane_byte2 = (result_qword2 >> (lane * 8)) & 0xff; + if ((result_lane_byte1 == 0x55) && (result_lane_byte2 == 0xaa)) + data_test_pass_sr[rank][lane] = 1; + else + data_test_pass_sr[rank][lane] = 0; +#if DQS_TRAIN_DEBUG > 0 + print_debug_dqs_pair("\t\t\t\t\t\t\t\t ", 0x55, " | ", result_lane_byte1, 4); + print_debug_dqs_pair("\t\t\t\t\t\t\t\t ", 0xaa, " | ", result_lane_byte2, 4); +#endif } } - if(_2Ranks) { - if ((CurrTestSide0 == DQS_PASS) && (CurrTestSide1 == DQS_PASS)) { - CurrTest = DQS_PASS; + /* 2.8.9.9.2 (7 B) + * If DIMM is dual rank, only use delays that pass testing for both ranks + */ + for (lane = 0; lane < 8; lane++) { + if (_2Ranks) { + if ((data_test_pass_sr[0][lane]) && (data_test_pass_sr[1][lane])) + data_test_pass[lane] = 1; + else + data_test_pass[lane] = 0; + } else { + data_test_pass[lane] = data_test_pass_sr[0][lane]; } - } else if (CurrTestSide0 == DQS_PASS) { - CurrTest = DQS_PASS; } - /* record first pass DqsRcvEn to stack */ - valid = mct_SavePassRcvEnDly_D(pDCTstat, RcvrEnDly, Channel, Receiver, Pass); + /* 2.8.9.9.2 (7 E) + * For each lane, update the DQS receiver delay setting in support of next iteration + */ + for (lane = 0; lane < 8; lane++) { + if (trained[lane] == 1) + continue; + + /* 2.8.9.9.2 (7 C a) + * Save the total delay of the first success after a failure for later use + */ + if ((data_test_pass[lane] == 1) && (data_test_pass_prev[lane] == 0)) { + candidate_total_delay[lane] = current_total_delay[lane]; + window_det_toggle[lane] = 0; + } - /* Break(1:RevF,2:DR) or not(0) FIXME: This comment deosn't make sense */ - if(valid == 2 || (LastTest == DQS_FAIL && valid == 1)) { - RcvrEnDlyRmin = RcvrEnDly; - break; + /* 2.8.9.9.2 (7 C b) + * If the current delay failed testing add 1/8 UI to the current delay + */ + if (data_test_pass[lane] == 0) + current_total_delay[lane] += 0x4; + + /* 2.8.9.9.2 (7 C c) + * If the current delay passed testing alternately add either 1/32 UI or 1/4 UI to the current delay + * If 1.25 UI of delay have been added with no failures the lane is considered trained + */ + if (data_test_pass[lane] == 1) { + /* See if lane is trained */ + if ((current_total_delay[lane] - candidate_total_delay[lane]) >= 0x28) { + trained[lane] = 1; + + /* Calculate and set final lane delay value + * The final delay is the candidate delay + 7/8 UI + */ + current_total_delay[lane] = candidate_total_delay[lane] + 0x1c; + } else { + if (window_det_toggle[lane] == 0) { + current_total_delay[lane] += 0x1; + window_det_toggle[lane] = 1; + } else { + current_total_delay[lane] += 0x8; + window_det_toggle[lane] = 0; + } + } + } } - LastTest = CurrTest; - - /* swap the rank 0 pointers */ - tmp = TestAddr0; - TestAddr0 = TestAddr0B; - TestAddr0B = tmp; - - /* swap the rank 1 pointers */ - tmp = TestAddr1; - TestAddr1 = TestAddr1B; - TestAddr1B = tmp; - - print_debug_dqs("\t\t\tTrainRcvEn56: RcvrEnDly ", RcvrEnDly, 3); + /* Update delays in hardware */ + write_dqs_receiver_enable_control_registers(current_total_delay, dev, (Receiver >> 1), index_reg); - RcvrEnDly++; - - } /* while RcvrEnDly */ - - print_debug_dqs("\t\tTrainRcvEn61: RcvrEnDly ", RcvrEnDly, 2); - print_debug_dqs("\t\tTrainRcvEn61: RcvrEnDlyRmin ", RcvrEnDlyRmin, 3); - print_debug_dqs("\t\tTrainRcvEn61: RcvrEnDlyLimit ", RcvrEnDlyLimit, 3); - if(RcvrEnDlyRmin == RcvrEnDlyLimit) { - /* no passing window */ - pDCTstat->ErrStatus |= 1 << SB_NORCVREN; - Errors |= 1 << SB_NORCVREN; - pDCTstat->ErrCode = SC_FatalErr; + /* Save previous results for comparison in the next iteration */ + for (lane = 0; lane < 8; lane++) + data_test_pass_prev[lane] = data_test_pass[lane]; } - if(RcvrEnDly > (RcvrEnDlyLimit - 1)) { - /* passing window too narrow, too far delayed*/ - pDCTstat->ErrStatus |= 1 << SB_SmallRCVR; - Errors |= 1 << SB_SmallRCVR; - pDCTstat->ErrCode = SC_FatalErr; - RcvrEnDly = RcvrEnDlyLimit - 1; - pDCTstat->CSTrainFail |= 1 << Receiver; - pDCTstat->DimmTrainFail |= 1 << (Receiver + Channel); - } - - /* CHB_D0_B0_RCVRDLY set in mct_Average_RcvrEnDly_Pass */ - mct_Average_RcvrEnDly_Pass(pDCTstat, RcvrEnDly, RcvrEnDlyLimit, Channel, Receiver, Pass); +#if DQS_TRAIN_DEBUG > 0 + for (lane = 0; lane < 8; lane++) + print_debug_dqs_pair("\t\tTrainRcvEn55: Lane ", lane, " current_total_delay ", current_total_delay[lane], 2); +#endif - mct_SetFinalRcvrEnDly_D(pDCTstat, RcvrEnDly, Final_Value, Channel, Receiver, dev, index_reg, Addl_Index, Pass); + /* Find highest delay value and save for later use */ + for (lane = 0; lane < 8; lane++) + if (current_total_delay[lane] > CTLRMaxDelay) + CTLRMaxDelay = current_total_delay[lane]; - if(pDCTstat->ErrStatus & (1 << SB_SmallRCVR)) { - Errors |= 1 << SB_SmallRCVR; + /* See if any lanes failed training, and set error flags appropriately + * For all trained lanes, save delay values for later use + */ + for (lane = 0; lane < 8; lane++) { + if (trained[lane]) { + pDCTstat->CH_D_B_RCVRDLY[Channel][Receiver >> 1][lane] = current_total_delay[lane]; + } else { + printk(BIOS_WARNING, "TrainRcvrEn: WARNING: Lane %d of receiver %d on channel %d failed training!\n", lane, Receiver, Channel); + + /* Set error flags */ + pDCTstat->ErrStatus |= 1 << SB_NORCVREN; + Errors |= 1 << SB_NORCVREN; + pDCTstat->ErrCode = SC_FatalErr; + pDCTstat->CSTrainFail |= 1 << Receiver; + pDCTstat->DimmTrainFail |= 1 << (Receiver + Channel); + } } - RcvrEnDly += Pass1MemClkDly; - if(RcvrEnDly > CTLRMaxDelay) { - CTLRMaxDelay = RcvrEnDly; - } + /* 2.8.9.9.2 (8) + * Flush the receiver FIFO + * Write one full cache line of non-0x55/0xaa data to one of the test addresses, then read it back to flush the FIFO + */ - } /* while Receiver */ + WriteLNTestPattern(TestAddr0 << 8, (uint8_t *)TestPattern2_D, 1); + mct_Read1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0); + } MaxDelay_CH[Channel] = CTLRMaxDelay; - } /* for Channel */ + } CTLRMaxDelay = MaxDelay_CH[0]; if (MaxDelay_CH[1] > CTLRMaxDelay) @@ -428,31 +612,31 @@ static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat, #if DQS_TRAIN_DEBUG > 0 { - u8 Channel; + u8 ChannelDTD; printk(BIOS_DEBUG, "TrainRcvrEn: CH_MaxRdLat:\n"); - for(Channel = 0; Channel<2; Channel++) { + for(ChannelDTD = 0; ChannelDTD<2; ChannelDTD++) { printk(BIOS_DEBUG, "Channel:%x: %x\n", - Channel, pDCTstat->CH_MaxRdLat[Channel]); + ChannelDTD, pDCTstat->CH_MaxRdLat[ChannelDTD]); } } #endif #if DQS_TRAIN_DEBUG > 0 { - u8 val; - u8 Channel, Receiver; + u16 valDTD; + u8 ChannelDTD, ReceiverDTD; u8 i; - u8 *p; + u16 *p; printk(BIOS_DEBUG, "TrainRcvrEn: CH_D_B_RCVRDLY:\n"); - for(Channel = 0; Channel < 2; Channel++) { - printk(BIOS_DEBUG, "Channel:%x\n", Channel); - for(Receiver = 0; Receiver<8; Receiver+=2) { - printk(BIOS_DEBUG, "\t\tReceiver:%x:", Receiver); - p = pDCTstat->CH_D_B_RCVRDLY[Channel][Receiver>>1]; + for(ChannelDTD = 0; ChannelDTD < 2; ChannelDTD++) { + printk(BIOS_DEBUG, "Channel:%x\n", ChannelDTD); + for(ReceiverDTD = 0; ReceiverDTD<8; ReceiverDTD+=2) { + printk(BIOS_DEBUG, "\t\tReceiver:%x:", ReceiverDTD); + p = pDCTstat->CH_D_B_RCVRDLY[ChannelDTD][ReceiverDTD>>1]; for (i=0;i<8; i++) { - val = p[i]; - printk(BIOS_DEBUG, "%x ", val); + valDTD = p[i]; + printk(BIOS_DEBUG, " %03x", valDTD); } printk(BIOS_DEBUG, "\n"); } @@ -475,15 +659,6 @@ u8 mct_InitReceiver_D(struct DCTStatStruc *pDCTstat, u8 dct) } } -static void mct_SetFinalRcvrEnDly_D(struct DCTStatStruc *pDCTstat, u8 RcvrEnDly, u8 where, u8 Channel, u8 Receiver, u32 dev, u32 index_reg, u8 Addl_Index, u8 Pass/*, u8 *p*/) -{ - /* - * Program final DqsRcvEnDly to additional index for DQS receiver - * enabled delay - */ - mct_SetRcvrEnDly_D(pDCTstat, RcvrEnDly, where, Channel, Receiver, dev, index_reg, Addl_Index, Pass); -} - static void mct_DisableDQSRcvEn_D(struct DCTStatStruc *pDCTstat) { u8 ch_end, ch; @@ -514,17 +689,20 @@ static void mct_DisableDQSRcvEn_D(struct DCTStatStruc *pDCTstat) * Function only used once so it was inlined. */ -void mct_SetRcvrEnDly_D(struct DCTStatStruc *pDCTstat, u8 RcvrEnDly, +/* Set F2x[1, 0]9C_x[2B:10] DRAM DQS Receiver Enable Timing Control Registers + * See BKDG Rev. 3.62 page 268 for more information + */ +void mct_SetRcvrEnDly_D(struct DCTStatStruc *pDCTstat, u16 RcvrEnDly, u8 FinalValue, u8 Channel, u8 Receiver, u32 dev, u32 index_reg, u8 Addl_Index, u8 Pass) { u32 index; u8 i; - u8 *p; + u16 *p; u32 val; - if(RcvrEnDly == 0xFE) { - /*set the boudary flag */ + if(RcvrEnDly == 0x1fe) { + /*set the boundary flag */ pDCTstat->Status |= 1 << SB_DQSRcvLimit; } @@ -543,27 +721,57 @@ void mct_SetRcvrEnDly_D(struct DCTStatStruc *pDCTstat, u8 RcvrEnDly, val = Get_NB32_index_wait(dev, index_reg, index); if(i & 1) { /* odd byte lane */ - val &= ~(0xFF << 16); - val |= (RcvrEnDly << 16); + val &= ~(0x1ff << 16); + val |= ((RcvrEnDly & 0x1ff) << 16); } else { /* even byte lane */ - val &= ~0xFF; - val |= RcvrEnDly; + val &= ~0x1ff; + val |= (RcvrEnDly & 0x1ff); } Set_NB32_index_wait(dev, index_reg, index, val); } } -static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u8 DQSRcvEnDly) +/* Calculate MaxRdLatency + * Algorithm detailed in the Fam10h BKDG Rev. 3.62 section 2.8.9.9.5 + */ +static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u16 DQSRcvEnDly) { u32 dev; u32 reg; - u16 SubTotal; + u32 SubTotal; u32 index_reg; u32 reg_off; u32 val; - u32 valx; + + uint8_t cpu_val_n; + uint8_t cpu_val_p; + + u16 freq_tab[] = {400, 533, 667, 800}; + + /* Set up processor-dependent values */ + if (pDCTstat->LogicalCPUID & AMD_DR_Dx) { + /* Revision D and above */ + cpu_val_n = 4; + cpu_val_p = 29; + } else if (pDCTstat->LogicalCPUID & AMD_DR_Cx) { + /* Revision C */ + uint8_t package_type = mctGet_NVbits(NV_PACK_TYPE); + if ((package_type == PT_L1) /* Socket F (1207) */ + || (package_type == PT_M2) /* Socket AM3 */ + || (package_type == PT_S1)) { /* Socket S1g<x> */ + cpu_val_n = 10; + cpu_val_p = 11; + } else { + cpu_val_n = 4; + cpu_val_p = 29; + } + } else { + /* Revision B and below */ + cpu_val_n = 10; + cpu_val_p = 11; + } if(pDCTstat->GangedMode) Channel = 0; @@ -598,49 +806,32 @@ static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u8 DQ val = Get_NB32(dev, 0x78 + reg_off); SubTotal += 8 - (val & 0x0f); - /* Convert bits 7-5 (also referred to as the course delay) of + /* Convert bits 7-5 (also referred to as the coarse delay) of * the current (or worst case) DQS receiver enable delay to * 1/2 MEMCLKs units, rounding up, and add this to the sub-total. */ - SubTotal += DQSRcvEnDly >> 5; /*BOZO-no rounding up */ + SubTotal += DQSRcvEnDly >> 5; /* Retrieve gross delay portion of value */ - /* Add 5.5 to the sub-total. 5.5 represents part of the + /* Add "P" to the sub-total. "P" represents part of the * processor specific constant delay value in the DRAM * clock domain. */ SubTotal <<= 1; /*scale 1/2 MemClk to 1/4 MemClk */ - SubTotal += 11; /*add 5.5 1/2MemClk */ + SubTotal += cpu_val_p; /*add "P" 1/2MemClk */ + SubTotal >>= 1; /*scale 1/4 MemClk back to 1/2 MemClk */ /* Convert the sub-total (in 1/2 MEMCLKs) to northbridge - * clocks (NCLKs) as follows (assuming DDR400 and assuming - * that no P-state or link speed changes have occurred). + * clocks (NCLKs) */ + SubTotal *= 200 * ((Get_NB32(pDCTstat->dev_nbmisc, 0xd4) & 0x1f) + 4); + SubTotal /= freq_tab[((Get_NB32(pDCTstat->dev_dct, 0x94 + reg_off) & 0x7) - 3)]; + SubTotal = (SubTotal + (2 - 1)) / 2; /* Round up */ - /* New formula: - * SubTotal *= 3*(Fn2xD4[NBFid]+4)/(3+Fn2x94[MemClkFreq])/2 */ - val = Get_NB32(dev, 0x94 + reg_off); - - /* SubTotal div 4 to scale 1/4 MemClk back to MemClk */ - val &= 7; - if (val >= 3) { - val <<= 1; - } else - val += 3; - valx = val << 2; - - val = Get_NB32(pDCTstat->dev_nbmisc, 0xD4); - SubTotal *= ((val & 0x1f) + 4 ) * 3; - - SubTotal /= valx; - if (SubTotal % valx) { /* round up */ - SubTotal++; - } - - /* Add 5 NCLKs to the sub-total. 5 represents part of the + /* Add "N" NCLKs to the sub-total. "N" represents part of the * processor specific constant value in the northbridge * clock domain. */ - SubTotal += 5; + SubTotal += (cpu_val_n) / 2; pDCTstat->CH_MaxRdLat[Channel] = SubTotal; if(pDCTstat->GangedMode) { @@ -659,143 +850,6 @@ static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u8 DQ Set_NB32(dev, reg, val); } -static u8 mct_SavePassRcvEnDly_D(struct DCTStatStruc *pDCTstat, - u8 rcvrEnDly, u8 Channel, - u8 receiver, u8 Pass) -{ - u8 i; - u8 mask_Saved, mask_Pass; - u8 *p; - - /* calculate dimm offset - * not needed for CH_D_B_RCVRDLY array - */ - - /* cmp if there has new DqsRcvEnDly to be recorded */ - mask_Pass = pDCTstat->DqsRcvEn_Pass; - - if(Pass == SecondPass) { - mask_Pass = ~mask_Pass; - } - - mask_Saved = pDCTstat->DqsRcvEn_Saved; - if(mask_Pass != mask_Saved) { - - /* find desired stack offset according to channel/dimm/byte */ - if(Pass == SecondPass) { - /* FIXME: SecondPass is never used for Barcelona p = pDCTstat->CH_D_B_RCVRDLY_1[Channel][receiver>>1]; */ - p = 0; /* Keep the compiler happy. */ - } else { - mask_Saved &= mask_Pass; - p = pDCTstat->CH_D_B_RCVRDLY[Channel][receiver>>1]; - } - for(i=0; i < 8; i++) { - /* cmp per byte lane */ - if(mask_Pass & (1 << i)) { - if(!(mask_Saved & (1 << i))) { - /* save RcvEnDly to stack, according to - the related Dimm/byte lane */ - p[i] = (u8)rcvrEnDly; - mask_Saved |= 1 << i; - } - } - } - pDCTstat->DqsRcvEn_Saved = mask_Saved; - } - return mct_SaveRcvEnDly_D_1Pass(pDCTstat, Pass); -} - -static u8 mct_CompareTestPatternQW0_D(struct MCTStatStruc *pMCTstat, - struct DCTStatStruc *pDCTstat, - u32 addr, u8 channel, - u8 pattern, u8 Pass) -{ - /* Compare only the first beat of data. Since target addrs are cache - * line aligned, the Channel parameter is used to determine which - * cache QW to compare. - */ - - u8 *test_buf; - u8 i; - u8 result; - u8 value; - - if(Pass == FirstPass) { - if(pattern==1) { - test_buf = (u8 *)TestPattern1_D; - } else { - test_buf = (u8 *)TestPattern0_D; - } - } else { /* Second Pass */ - test_buf = (u8 *)TestPattern2_D; - } - - SetUpperFSbase(addr); - addr <<= 8; - - if((pDCTstat->Status & (1<<SB_128bitmode)) && channel ) { - addr += 8; /* second channel */ - test_buf += 8; - } - - print_debug_dqs_pair("\t\t\t\t\t\t test_buf = ", (u32)test_buf, " | addr_lo = ", addr, 4); - for (i=0; i<8; i++, addr ++) { - value = read32_fs(addr); - print_debug_dqs_pair("\t\t\t\t\t\t\t\t ", test_buf[i], " | ", value, 4); - - if (value == test_buf[i]) { - pDCTstat->DqsRcvEn_Pass |= (1<<i); - } else { - pDCTstat->DqsRcvEn_Pass &= ~(1<<i); - } - } - - result = DQS_FAIL; - - if (Pass == FirstPass) { - /* if first pass, at least one byte lane pass - * ,then DQS_PASS=1 and will set to related reg. - */ - if(pDCTstat->DqsRcvEn_Pass != 0) { - result = DQS_PASS; - } else { - result = DQS_FAIL; - } - - } else { - /* if second pass, at least one byte lane fail - * ,then DQS_FAIL=1 and will set to related reg. - */ - if(pDCTstat->DqsRcvEn_Pass != 0xFF) { - result = DQS_FAIL; - } else { - result = DQS_PASS; - } - } - - /* if second pass, we can't find the fail until FFh, - * then let it fail to save the final delay - */ - if((Pass == SecondPass) && (pDCTstat->Status & (1 << SB_DQSRcvLimit))) { - result = DQS_FAIL; - pDCTstat->DqsRcvEn_Pass = 0; - } - - /* second pass needs to be inverted - * FIXME? this could be inverted in the above code to start with... - */ - if(Pass == SecondPass) { - if (result == DQS_PASS) { - result = DQS_FAIL; - } else if (result == DQS_FAIL) { /* FIXME: doesn't need to be else if */ - result = DQS_PASS; - } - } - - - return result; -} - static void mct_InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat) { @@ -854,7 +908,7 @@ void SetEccDQSRcvrEn_D(struct DCTStatStruc *pDCTstat, u8 Channel) u32 index_reg; u32 index; u8 ChipSel; - u8 *p; + u16 *p; u32 val; dev = pDCTstat->dev_dct; @@ -884,7 +938,7 @@ static void CalcEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat, for (ChipSel = 0; ChipSel < MAX_CS_SUPPORTED; ChipSel += 2) { if(mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, ChipSel)) { - u8 *p; + u16 *p; p = pDCTstat->CH_D_B_RCVRDLY[Channel][ChipSel>>1]; /* DQS Delay Value of Data Bytelane @@ -920,6 +974,10 @@ static void CalcEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat, SetEccDQSRcvrEn_D(pDCTstat, Channel); } +/* 2.8.9.9.4 + * ECC Byte Lane Training + * DQS Receiver Enable Delay + */ void mctSetEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA) { @@ -1017,7 +1075,9 @@ static void fenceDynTraining_D(struct MCTStatStruc *pMCTstat, avRecValue -= 3; else */ - if (pDCTstat->LogicalCPUID & AMD_DR_Cx) + if (pDCTstat->LogicalCPUID & AMD_DR_Dx) + avRecValue -= 8; + else if (pDCTstat->LogicalCPUID & AMD_DR_Cx) avRecValue -= 8; else if (pDCTstat->LogicalCPUID & AMD_DR_Bx) avRecValue -= 8; diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctsrc1p.c b/src/northbridge/amd/amdmct/mct_ddr3/mctsrc1p.c index c00975692e..f01e011c32 100644 --- a/src/northbridge/amd/amdmct/mct_ddr3/mctsrc1p.c +++ b/src/northbridge/amd/amdmct/mct_ddr3/mctsrc1p.c @@ -2,6 +2,7 @@ * This file is part of the coreboot project. * * Copyright (C) 2010 Advanced Micro Devices, Inc. + * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -36,17 +37,12 @@ u32 SetupDqsPattern_1PassB(u8 pass) return (u32) TestPattern0_D; } -u8 mct_Get_Start_RcvrEnDly_1Pass(u8 pass) -{ - return 0; -} - -static u8 mct_Average_RcvrEnDly_1Pass(struct DCTStatStruc *pDCTstat, u8 Channel, u8 Receiver, +static u16 mct_Average_RcvrEnDly_1Pass(struct DCTStatStruc *pDCTstat, u8 Channel, u8 Receiver, u8 Pass) { - u8 i, MaxValue; - u8 *p; - u8 val; + u16 i, MaxValue; + u16 *p; + u16 val; MaxValue = 0; p = pDCTstat->CH_D_B_RCVRDLY[Channel][Receiver >> 1]; @@ -76,8 +72,8 @@ u8 mct_SaveRcvEnDly_D_1Pass(struct DCTStatStruc *pDCTstat, u8 pass) return ret; } -u8 mct_Average_RcvrEnDly_Pass(struct DCTStatStruc *pDCTstat, - u8 RcvrEnDly, u8 RcvrEnDlyLimit, +u16 mct_Average_RcvrEnDly_Pass(struct DCTStatStruc *pDCTstat, + u16 RcvrEnDly, u16 RcvrEnDlyLimit, u8 Channel, u8 Receiver, u8 Pass) { diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctsrc2p.c b/src/northbridge/amd/amdmct/mct_ddr3/mctsrc2p.c index b01889d9b4..796febcb0b 100644 --- a/src/northbridge/amd/amdmct/mct_ddr3/mctsrc2p.c +++ b/src/northbridge/amd/amdmct/mct_ddr3/mctsrc2p.c @@ -2,6 +2,7 @@ * This file is part of the coreboot project. * * Copyright (C) 2010 Advanced Micro Devices, Inc. + * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -74,15 +75,15 @@ u8 mct_Get_Start_RcvrEnDly_Pass(struct DCTStatStruc *pDCTstat, return RcvrEnDly; } -u8 mct_Average_RcvrEnDly_Pass(struct DCTStatStruc *pDCTstat, - u8 RcvrEnDly, u8 RcvrEnDlyLimit, +u16 mct_Average_RcvrEnDly_Pass(struct DCTStatStruc *pDCTstat, + u16 RcvrEnDly, u16 RcvrEnDlyLimit, u8 Channel, u8 Receiver, u8 Pass) { u8 i; - u8 *p; - u8 *p_1; - u8 val; - u8 val_1; + u16 *p; + u16 *p_1; + u16 val; + u16 val_1; u8 valid = 1; u8 bn; diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mcttmrl.c b/src/northbridge/amd/amdmct/mct_ddr3/mcttmrl.c index ea5c8c7bb7..920f514415 100644 --- a/src/northbridge/amd/amdmct/mct_ddr3/mcttmrl.c +++ b/src/northbridge/amd/amdmct/mct_ddr3/mcttmrl.c @@ -2,6 +2,7 @@ * This file is part of the coreboot project. * * Copyright (C) 2010 Advanced Micro Devices, Inc. + * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -191,10 +192,10 @@ static void maxRdLatencyTrain_D(struct MCTStatStruc *pMCTstat, #if DQS_TRAIN_DEBUG > 0 { - u8 Channel; + u8 ChannelDTD; printk(BIOS_DEBUG, "maxRdLatencyTrain: CH_MaxRdLat:\n"); - for(Channel = 0; Channel<2; Channel++) { - printk(BIOS_DEBUG, "Channel: %02x: %02x\n", Channel, pDCTstat->CH_MaxRdLat[Channel]); + for(ChannelDTD = 0; ChannelDTD<2; ChannelDTD++) { + printk(BIOS_DEBUG, "Channel: %02x: %02x\n", ChannelDTD, pDCTstat->CH_MaxRdLat[ChannelDTD]); } } #endif diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctwl.c b/src/northbridge/amd/amdmct/mct_ddr3/mctwl.c index cdeae49f3a..1c3e322d54 100644 --- a/src/northbridge/amd/amdmct/mct_ddr3/mctwl.c +++ b/src/northbridge/amd/amdmct/mct_ddr3/mctwl.c @@ -2,6 +2,7 @@ * This file is part of the coreboot project. * * Copyright (C) 2010 Advanced Micro Devices, Inc. + * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -58,9 +59,9 @@ void PrepareC_DCT(struct MCTStatStruc *pMCTstat, pDCTstat->C_DCTPtr[dct]->LogicalCPUID = pDCTstat->LogicalCPUID; for (dimm = 0; dimm < MAX_DIMMS; dimm++) { - if (DimmValid & (1 << dimm)) + if (DimmValid & (1 << (dimm << 1))) pDCTstat->C_DCTPtr[dct]->DimmPresent[dimm] = 1; - if (Dimmx8Present & (1 << dimm)) + if (Dimmx8Present & (1 << (dimm << 1))) pDCTstat->C_DCTPtr[dct]->DimmX8Present[dimm] = 1; } @@ -88,9 +89,9 @@ void PrepareC_DCT(struct MCTStatStruc *pMCTstat, u8 DimmRanks; if (DimmValid & (1 << (dimm << 1))) { DimmRanks = 1; - if (pDCTstat->DimmDRPresent & (1 << (dimm+dct))) + if (pDCTstat->DimmDRPresent & (1 << ((dimm << 1) + dct))) DimmRanks = 2; - else if (pDCTstat->DimmQRPresent & (1 << (dimm+dct))) + else if (pDCTstat->DimmQRPresent & (1 << ((dimm << 1) + dct))) DimmRanks = 4; } else DimmRanks = 0; @@ -249,35 +250,6 @@ static void ChangeMemClk(struct MCTStatStruc *pMCTstat, } } -/* Multiply the previously saved delay values in Pass 1, step #5 by - (target frequency)/400 to find the gross and fine delay initialization - values at the target frequency. - */ -void MultiplyDelay(struct MCTStatStruc *pMCTstat, - struct DCTStatStruc *pDCTstat, u8 dct) -{ - u16 index; - u8 Multiplier; - u8 gross, fine; - u16 total; - - Multiplier = pDCTstat->TargetFreq; - - for (index=0; index < MAX_BYTE_LANES*MAX_LDIMMS; index ++) { - gross = pDCTstat->C_DCTPtr[dct]->WLGrossDelay[index]; - fine = pDCTstat->C_DCTPtr[dct]->WLFineDelay[index]; - - total = gross << 5 | fine; - total *= Multiplier; - if (total % 3) - total = total / 3 + 1; - else - total = total / 3; - pDCTstat->C_DCTPtr[dct]->WLGrossDelay[index] = (total & 0xFF) >> 5; - pDCTstat->C_DCTPtr[dct]->WLFineDelay[index] = total & 0x1F; - } -} - /* * the DRAM controller to bring the DRAMs out of self refresh mode. */ @@ -352,9 +324,9 @@ void SetTargetFreq(struct MCTStatStruc *pMCTstat, if (!DCT1Present) pDCTstat->CSPresent = pDCTstat->CSPresent_DCT[0]; - else if (pDCTstat->GangedMode) { + else if (pDCTstat->GangedMode) pDCTstat->CSPresent = 0; - } else + else pDCTstat->CSPresent = pDCTstat->CSPresent_DCT[1]; FreqChgCtrlWrd(pMCTstat, pDCTstat); diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mhwlc_d.c b/src/northbridge/amd/amdmct/mct_ddr3/mhwlc_d.c index 8e7e70c4c7..397fd77275 100644 --- a/src/northbridge/amd/amdmct/mct_ddr3/mhwlc_d.c +++ b/src/northbridge/amd/amdmct/mct_ddr3/mhwlc_d.c @@ -2,6 +2,7 @@ * This file is part of the coreboot project. * * Copyright (C) 2010 Advanced Micro Devices, Inc. + * Copyright (C) 2015 Timothy Pearson <tpearson@raptorengineeringinc.com>, Raptor Engineering * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -235,6 +236,65 @@ u32 swapBankBits(sDCTStruct *pDCTData, u32 MRSValue) return MRSValue; } +static uint16_t unbuffered_dimm_nominal_termination_emrs(uint8_t number_of_dimms, uint8_t frequency_index, uint8_t rank_count, uint8_t rank) +{ + uint16_t term; + + /* FIXME + * Mainboards need to be able to specify the maximum number of DIMMs installable per channel + * For now assume a maximum of 2 DIMMs per channel can be installed + */ + uint8_t MaxDimmsInstallable = 2; + + if (number_of_dimms == 1) { + if (MaxDimmsInstallable < 3) { + term = 0x04; /* Rtt_Nom=RZQ/4=60 Ohm */ + } else { + if (rank_count == 1) { + term = 0x04; /* Rtt_Nom=RZQ/4=60 Ohm */ + } else { + if (rank == 0) + term = 0x04; /* Rtt_Nom=RZQ/4=60 Ohm */ + else + term = 0x00; /* Rtt_Nom=OFF */ + } + } + } else { + if (frequency_index < 5) + term = 0x0044; /* Rtt_Nom=RZQ/6=40 Ohm */ + else + term = 0x0204; /* Rtt_Nom=RZQ/8=30 Ohm */ + } + + return term; +} + +static uint16_t unbuffered_dimm_dynamic_termination_emrs(uint8_t number_of_dimms, uint8_t frequency_index, uint8_t rank_count, uint8_t rank) +{ + uint16_t term; + + /* FIXME + * Mainboards need to be able to specify the maximum number of DIMMs installable per channel + * For now assume a maximum of 2 DIMMs per channel can be installed + */ + uint8_t MaxDimmsInstallable = 2; + + if (number_of_dimms == 1) { + if (MaxDimmsInstallable < 3) { + term = 0x00; /* Rtt_WR=off */ + } else { + if (rank_count == 1) + term = 0x00; /* Rtt_WR=off */ + else + term = 0x200; /* Rtt_WR=RZQ/4=60 Ohm */ + } + } else { + term = 0x400; /* Rtt_WR=RZQ/2=120 Ohm */ + } + + return term; +} + /*----------------------------------------------------------------------------- * void prepareDimms(sMCTStruct *pMCTData, sDCTStruct *DCTData, u8 Dimm, BOOL WL) * @@ -295,48 +355,23 @@ void prepareDimms(sMCTStruct *pMCTData, sDCTStruct *pDCTData, u8 dimm, BOOL wl) if (pDCTData->Status[DCT_STATUS_REGISTERED]) { tempW1 = RttNomTargetRegDimm(pMCTData, pDCTData, dimm, wl, MemClkFreq, rank); } else { - if (wl) - { - if (pDCTData->MaxDimmsInstalled == 1) - { - if ((pDCTData->DimmRanks[dimm] == 2) && (rank == 0)) - { - tempW1 = 0x00; /* Rtt_Nom=OFF */ - } + if (wl) { + if (rank == 0) { + /* Get Rtt_WR for the current DIMM and rank */ + uint16_t dynamic_term = unbuffered_dimm_dynamic_termination_emrs(pDCTData->MaxDimmsInstalled, MemClkFreq, pDCTData->DimmRanks[dimm], rank); + + /* Convert dynamic termination code to corresponding nominal termination code */ + if (dynamic_term == 0x200) + tempW1 = 0x04; + else if (dynamic_term == 0x400) + tempW1 = 0x40; else - { - tempW1 = 0x04; /* Rtt_Nom=RZQ/4=60 Ohm */ - } - } - else /* 2 Dimms or more per channel */ - { - if ((pDCTData->DimmRanks[dimm] == 2) && (rank == 1)) - { - tempW1 = 0x00; /* Rtt_Nom=OFF */ - } - else - { - if (MemClkFreq == 6) { - tempW1 = 0x04; /* Rtt_Nom=RZQ/4=60 Ohm */ - } else { - tempW1 = 0x40;/* Rtt_Nom=RZQ/2=120 Ohm */ - } - } - } - } - else { /* 1 or 4 Dimms per channel */ - if ((pDCTData->MaxDimmsInstalled == 1) || (pDCTData->MaxDimmsInstalled == 4)) - { - tempW1 = 0x04; /* Rtt_Nom=RZQ/4=60 Ohm */ - } - else /* 2 or 3 Dimms per channel */ - { - if (MemClkFreq < 5) { - tempW1 = 0x0044; /* Rtt_Nom=RZQ/6=40 Ohm */ - } else { - tempW1 = 0x0204; /* Rtt_Nom=RZQ/8=30 Ohm */ - } + tempW1 = 0x0; + } else { + tempW1 = unbuffered_dimm_nominal_termination_emrs(pDCTData->MaxDimmsInstalled, MemClkFreq, pDCTData->DimmRanks[dimm], rank); } + } else { + tempW1 = unbuffered_dimm_nominal_termination_emrs(pDCTData->MaxDimmsInstalled, MemClkFreq, pDCTData->DimmRanks[dimm], rank); } } tempW=tempW|tempW1; @@ -353,20 +388,22 @@ void prepareDimms(sMCTStruct *pMCTData, sDCTStruct *pDCTData, u8 dimm, BOOL wl) else { /* Disable the output drivers of all other ranks for - * the target DIMM. */ + * the target DIMM. + */ tempW = bitTestSet(tempW1, Qoff); } } - /* program MrsAddress[5,1]=output driver impedance control (DIC): - * based on F2x[1,0]84[DrvImpCtrl] */ + /* Program MrsAddress[5,1]=output driver impedance control (DIC): + * based on F2x[1,0]84[DrvImpCtrl] + */ tempW1 = get_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId, FUN_DCT, DRAM_MRS_REGISTER, DrvImpCtrlStart, DrvImpCtrlEnd); - if (bitTest(tempW1,1)) - {tempW = bitTestSet(tempW, 5);} - if (bitTest(tempW1,0)) - {tempW = bitTestSet(tempW, 1);} + if (bitTest(tempW1, 1)) + tempW = bitTestSet(tempW, 5); + if (bitTest(tempW1, 0)) + tempW = bitTestSet(tempW, 1); - tempW = swapAddrBits_wl(pDCTData,tempW); + tempW = swapAddrBits_wl(pDCTData, tempW); set_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId, FUN_DCT, DRAM_INIT, MrsAddressStart, MrsAddressEnd, tempW); @@ -404,29 +441,10 @@ void prepareDimms(sMCTStruct *pMCTData, sDCTStruct *pDCTData, u8 dimm, BOOL wl) if ((pDCTData->LogicalCPUID & AMD_DR_Bx) && (pDCTData->Status[DCT_STATUS_REGISTERED])) tempW+=0x8; /* determine Rtt_WR for WL & Normal mode */ - if (pDCTData->Status[DCT_STATUS_REGISTERED]) { + if (pDCTData->Status[DCT_STATUS_REGISTERED]) tempW1 = RttWrRegDimm(pMCTData, pDCTData, dimm, wl, MemClkFreq, rank); - } else { - if (wl) - { - tempW1 = 0x00; /* Rtt_WR=off */ - } - else - { - if (pDCTData->MaxDimmsInstalled == 1) - { - tempW1 = 0x00; /* Rtt_WR=off */ - } - else - { - if (MemClkFreq == 6) { - tempW1 = 0x200; /* Rtt_WR=RZQ/4=60 Ohm */ - } else { - tempW1 = 0x400; /* Rtt_WR=RZQ/2 */ - } - } - } - } + else + tempW1 = unbuffered_dimm_dynamic_termination_emrs(pDCTData->MaxDimmsInstalled, MemClkFreq, pDCTData->DimmRanks[dimm], rank); tempW=tempW|tempW1; tempW = swapAddrBits_wl(pDCTData,tempW); set_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId, FUN_DCT, @@ -483,38 +501,10 @@ void prepareDimms(sMCTStruct *pMCTData, sDCTStruct *pDCTData, u8 dimm, BOOL wl) } /* determine Rtt_Nom for WL & Normal mode */ - if (pDCTData->Status[DCT_STATUS_REGISTERED]) { + if (pDCTData->Status[DCT_STATUS_REGISTERED]) tempW1 = RttNomNonTargetRegDimm(pMCTData, pDCTData, currDimm, wl, MemClkFreq, rank); - } else { - if (wl) - { - if ((pDCTData->DimmRanks[currDimm] == 2) && (rank == 1)) - { - tempW1 = 0x00; /* Rtt_Nom=OFF */ - } - else - { - if (MemClkFreq < 5) { - tempW1 = 0x0044;/* Rtt_Nom=RZQ/6=40 Ohm */ - } else { - tempW1 = 0x0204;/* Rtt_Nom=RZQ/8=30 Ohm */ - } - } - } - else { /* 1 or 4 Dimms per channel */ - if (pDCTData->MaxDimmsInstalled == 4) - { - tempW1 = 0x04; /* Rtt_Nom=RZQ/4=60 Ohm */ - } - else { /* 2 or 3 Dimms per channel */ - if (MemClkFreq < 5) { - tempW1 = 0x0044; /* Rtt_Nom=RZQ/6=40 Ohm */ - } else { - tempW1 = 0x0204; /* Rtt_Nom=RZQ/8=30 Ohm */ - } - } - } - } + else + tempW1 = unbuffered_dimm_nominal_termination_emrs(pDCTData->MaxDimmsInstalled, MemClkFreq, pDCTData->DimmRanks[currDimm], rank); tempW=tempW|tempW1; /* program MrsAddress[5,1]=output driver impedance control (DIC): * based on F2x[1,0]84[DrvImpCtrl] */ @@ -560,22 +550,10 @@ void prepareDimms(sMCTStruct *pMCTData, sDCTStruct *pDCTData, u8 dimm, BOOL wl) if ((pDCTData->LogicalCPUID & AMD_DR_Bx) && (pDCTData->Status[DCT_STATUS_REGISTERED])) tempW+=0x8; /* determine Rtt_WR for WL & Normal mode */ - if (pDCTData->Status[DCT_STATUS_REGISTERED]) { + if (pDCTData->Status[DCT_STATUS_REGISTERED]) tempW1 = RttWrRegDimm(pMCTData, pDCTData, currDimm, wl, MemClkFreq, rank); - } else { - if (wl) - { - tempW1 = 0x00; /* Rtt_WR=off */ - } - else - { - if (MemClkFreq == 6) { - tempW1 = 0x200; /* Rtt_WR=RZQ/4=60 Ohm */ - } else { - tempW1 = 0x400; /* Rtt_WR=RZQ/2 */ - } - } - } + else + tempW1 = unbuffered_dimm_dynamic_termination_emrs(pDCTData->MaxDimmsInstalled, MemClkFreq, pDCTData->DimmRanks[currDimm], rank); tempW=tempW|tempW1; tempW = swapAddrBits_wl(pDCTData,tempW); set_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId, FUN_DCT, @@ -646,9 +624,14 @@ void programODT(sMCTStruct *pMCTData, sDCTStruct *pDCTData, u8 dimm) */ void procConifg(sMCTStruct *pMCTData,sDCTStruct *pDCTData, u8 dimm, u8 pass) { - u8 ByteLane, Seed_Gross, Seed_Fine; + u8 ByteLane, Seed_Gross, Seed_Fine, MemClkFreq; u32 Value, Addr; u16 Addl_Data_Offset, Addl_Data_Port; + u16 freq_tab[] = {400, 533, 667, 800}; + + /* MemClkFreq: 3: 400MHz; 4: 533MHz; 5: 667MHz; 6: 800MHz */ + MemClkFreq = get_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId, + FUN_DCT, DRAM_CONFIG_HIGH, 0, 2); /* Program F2x[1, 0]9C_x08[WrLvOdt[3:0]] to the proper ODT settings for the * current memory subsystem configuration. @@ -656,12 +639,13 @@ void procConifg(sMCTStruct *pMCTData,sDCTStruct *pDCTData, u8 dimm, u8 pass) programODT(pMCTData, pDCTData, dimm); /* Program F2x[1,0]9C_x08[WrLvOdtEn]=1 */ - if (pDCTData->LogicalCPUID & (AMD_DR_Cx | AMD_DR_Dx)) + if (pDCTData->LogicalCPUID & (AMD_DR_Cx | AMD_DR_Dx)) { set_DCT_ADDR_Bits(pDCTData, pDCTData->DctTrain, pDCTData->NodeId, FUN_DCT, DRAM_ADD_DCT_PHY_CONTROL_REG, WrLvOdtEn, WrLvOdtEn, (u32)1); + } else { - /* Program WrLvOdtEn=1 through set bit 12 of D3CSODT reg offset 0 for Rev.B*/ + /* Program WrLvOdtEn=1 through set bit 12 of D3CSODT reg offset 0 for Rev.B */ if (pDCTData->DctTrain) { Addl_Data_Offset=0x198; @@ -687,7 +671,6 @@ void procConifg(sMCTStruct *pMCTData,sDCTStruct *pDCTData, u8 dimm, u8 pass) /* Wait 10 MEMCLKs to allow for ODT signal settling. */ pMCTData->AgesaDelay(10); - ByteLane = 0; if (pass == 1) { if (pDCTData->Status[DCT_STATUS_REGISTERED]) @@ -705,10 +688,17 @@ void procConifg(sMCTStruct *pMCTData,sDCTStruct *pDCTData, u8 dimm, u8 pass) } else { - Seed_Gross = 0x00; - Seed_Fine = 0x1A; + if (MemClkFreq == 6) { + /* DDR-800 */ + Seed_Gross = 0x00; + Seed_Fine = 0x1a; + } else { + /* Use settings for DDR-400 (interpolated from BKDG) */ + Seed_Gross = 0x00; + Seed_Fine = 0x0d; + } } - while(ByteLane < MAX_BYTE_LANES) + for (ByteLane = 0; ByteLane < MAX_BYTE_LANES; ByteLane++) { /* Program an initialization value to registers F2x[1, 0]9C_x[51:50] and * F2x[1, 0]9C_x52 to set the gross and fine delay for all the byte lane fields @@ -720,35 +710,32 @@ void procConifg(sMCTStruct *pMCTData,sDCTStruct *pDCTData, u8 dimm, u8 pass) */ pDCTData->WLGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_Gross; pDCTData->WLFineDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_Fine; - ByteLane++; } - } else if (pDCTData->Status[DCT_STATUS_REGISTERED]) { /* For Pass 2 */ + } else { /* Pass 2 */ /* From BKDG, Write Leveling Seed Value. */ - /* TODO: The unbuffered DIMMs are unstable on the code below. So temporarily it is - * only for registered DIMMs. */ u32 RegisterDelay, SeedTotal; - u8 MemClkFreq; - u16 freq_tab[] = {400, 533, 667, 800}; - while(ByteLane < MAX_BYTE_LANES) + for (ByteLane = 0; ByteLane < MAX_BYTE_LANES; ByteLane++) { - MemClkFreq = get_Bits(pDCTData, pDCTData->CurrDct, pDCTData->NodeId, - FUN_DCT, DRAM_CONFIG_HIGH, 0, 2); if (pDCTData->Status[DCT_STATUS_REGISTERED]) RegisterDelay = 0x20; /* TODO: ((RCW2 & BIT0) == 0) ? 0x20 : 0x30; */ else RegisterDelay = 0; - SeedTotal = (pDCTData->WLFineDelay[MAX_BYTE_LANES*dimm+ByteLane] & 0x1F) | - pDCTData->WLGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] << 5; + SeedTotal = (pDCTData->WLFineDelay[MAX_BYTE_LANES*dimm+ByteLane] & 0x1f) | + (pDCTData->WLGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] << 5); /* SeedTotalPreScaling = (the total delay value in F2x[1, 0]9C_x[4A:30] from pass 1 of write levelization training) - RegisterDelay. */ - /* MemClkFreq: 3: 400MHz; 4: 533MHz; 5: 667MHz; 6: 800MHz */ - SeedTotal = (u16) (RegisterDelay + ((((u32) SeedTotal - RegisterDelay) * - freq_tab[MemClkFreq-3]) / 400)); - Seed_Gross = (SeedTotal & 0x20) != 0 ? 1 : 2; - Seed_Fine = SeedTotal & 0x1F; + SeedTotal = (uint16_t) (RegisterDelay + ((((uint64_t) SeedTotal - RegisterDelay) * + freq_tab[MemClkFreq-3] * 100) / (freq_tab[0] * 100))); + Seed_Gross = SeedTotal / 32; + Seed_Fine = SeedTotal & 0x1f; + if (Seed_Gross == 0) + Seed_Gross = 0; + else if (Seed_Gross & 0x1) + Seed_Gross = 1; + else + Seed_Gross = 2; pDCTData->WLGrossDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_Gross; pDCTData->WLFineDelay[MAX_BYTE_LANES*dimm+ByteLane] = Seed_Fine; - ByteLane ++; } } diff --git a/src/northbridge/amd/amdmct/wrappers/mcti_d.c b/src/northbridge/amd/amdmct/wrappers/mcti_d.c index ea328935b2..f9a9921e6e 100644 --- a/src/northbridge/amd/amdmct/wrappers/mcti_d.c +++ b/src/northbridge/amd/amdmct/wrappers/mcti_d.c @@ -59,6 +59,10 @@ static u16 mctGet_NVbits(u8 index) val = 1; #elif CONFIG_CPU_SOCKET_TYPE == 0x13 /* ASB2 */ val = 4; +#elif CONFIG_CPU_SOCKET_TYPE == 0x14 /* C32 */ + val = 5; +#elif CONFIG_CPU_SOCKET_TYPE == 0x15 /* G34 */ + val = 3; //#elif SYSTEM_TYPE == MOBILE // val = 2; #endif @@ -413,101 +417,6 @@ static void mctHookAfterDramInit(void) } #if (CONFIG_DIMM_SUPPORT & 0x000F)==0x0005 /* AMD_FAM10_DDR3 */ -static void coreDelay(u32 microseconds) -{ - msr_t now; - msr_t end; - u32 cycles; - - /* delay ~40us - This seems like a hack to me... - It would be nice to have a central delay function. */ - - cycles = (microseconds * 100) << 3; /* x8 (number of 1.25ns ticks) */ - - if (!(rdmsr(HWCR).lo & TSC_FREQ_SEL_MASK)) { - msr_t pstate_msr = rdmsr(CUR_PSTATE_MSR); - if (!(rdmsr(0xC0010064+pstate_msr.lo).lo & NB_DID_M_ON)) { - cycles = cycles <<1; // half freq, double cycles - } - } // else should we keep p0 freq at the time of setting TSC_FREQ_SEL_MASK somewhere and check it here ? - - now = rdmsr(TSC_MSR); - // avoid overflow when called near 2^32 ticks ~ 5.3 s boundaries - if (0xffffffff - cycles >= now.lo ) { - end.hi = now.hi; - end.lo = now.lo + cycles; - } else { - end.hi = now.hi +1; // - end.lo = cycles - (1+(0xffffffff - now.lo)); - } - do { - now = rdmsr(TSC_MSR); - } while ((now.hi < end.hi) || ((now.hi == end.hi) && (now.lo < end.lo))); -} - -/* Erratum 350 */ -static void vErrata350(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat) -{ - u8 u8Channel; - u8 u8Receiver; - u32 u32Addr; - u8 u8Valid; - u32 u32DctDev; - - // 1. dummy read for each installed DIMM */ - for (u8Channel = 0; u8Channel < 2; u8Channel++) { - // This will be 0 for vaild DIMMS, eles 8 - u8Receiver = mct_InitReceiver_D(pDCTstat, u8Channel); - - for (; u8Receiver < 8; u8Receiver += 2) { - u32Addr = mct_GetRcvrSysAddr_D(pMCTstat, pDCTstat, u8Channel, u8Receiver, &u8Valid); - - if(!u8Valid) { /* Address not supported on current CS */ - print_t("vErrata350: Address not supported on current CS\n"); - continue; - } - print_t("vErrata350: dummy read \n"); - read32_fs(u32Addr); - } - } - - print_t("vErrata350: step 2a\n"); - - /* 2. Write 0000_8000h to register F2x[1, 0]9C_xD080F0C. */ - u32DctDev = pDCTstat->dev_dct; - Set_NB32_index_wait(u32DctDev, 0x098, 0xD080F0C, 0x00008000); - /* ^--- value - ^---F2x[1, 0]9C_x0D080F0C, No description in BKDG. - ^----F2x[1, 0]98 DRAM Controller Additional Data Offset Register */ - - if(!pDCTstat->GangedMode) { - print_t("vErrata350: step 2b\n"); - Set_NB32_index_wait(u32DctDev, 0x198, 0xD080F0C, 0x00008000); - /* ^--- value - ^---F2x[1, 0]9C_x0D080F0C, No description in BKDG - ^----F2x[1, 0]98 DRAM Controller Additional Data Offset Register */ - } - - print_t("vErrata350: step 3\n"); - /* 3. Wait at least 300 nanoseconds. */ - coreDelay(1); - - print_t("vErrata350: step 4\n"); - /* 4. Write 0000_0000h to register F2x[1, 0]9C_xD080F0C. */ - Set_NB32_index_wait(u32DctDev, 0x098, 0xD080F0C, 0x00000000); - - if(!pDCTstat->GangedMode) { - print_t("vErrata350: step 4b\n"); - Set_NB32_index_wait(u32DctDev, 0x198, 0xD080F0C, 0x00000000); - } - - print_t("vErrata350: step 5\n"); - /* 5. Wait at least 2 microseconds. */ - coreDelay(2); - -} - static void vErratum372(struct DCTStatStruc *pDCTstat) { msr_t msr = rdmsr(NB_CFG_MSR); @@ -546,8 +455,7 @@ static void mctHookBeforeAnyTraining(struct MCTStatStruc *pMCTstat, struct DCTSt { #if (CONFIG_DIMM_SUPPORT & 0x000F)==0x0005 /* AMD_FAM10_DDR3 */ /* FIXME : as of 25.6.2010 errata 350 and 372 should apply to ((RB|BL|DA)-C[23])|(HY-D[01])|(PH-E0) but I don't find constants for all of them */ - if (pDCTstatA->LogicalCPUID & AMD_DRBH_Cx) { - vErrata350(pMCTstat, pDCTstatA); + if (pDCTstatA->LogicalCPUID & (AMD_DRBH_Cx | AMD_DR_Dx)) { vErratum372(pDCTstatA); vErratum414(pDCTstatA); } |