From fd5fa2ad1fb1f3525deab9213bed2c38e083342d Mon Sep 17 00:00:00 2001 From: Patrick Rudolph Date: Fri, 11 Nov 2016 18:22:33 +0100 Subject: nb/intel/sandybridge/raminit: Split raminit.c Split raminit.c into smaller parts. Move all functions that will be used by chip-specific code into raminit_common.c. The chip-specific changes includes new configuration values for IvyBridge and 100Mhz reference clock support, including new frequencies. No functionality is changed. Tested on Lenovo T420. Change-Id: If7bb5949f4b771430f3dba1b754ad241a7e8426b Signed-off-by: Patrick Rudolph Reviewed-on: https://review.coreboot.org/17604 Tested-by: build bot (Jenkins) Reviewed-by: Martin Roth --- src/northbridge/intel/sandybridge/raminit_common.c | 3395 ++++++++++++++++++++ 1 file changed, 3395 insertions(+) create mode 100644 src/northbridge/intel/sandybridge/raminit_common.c (limited to 'src/northbridge/intel/sandybridge/raminit_common.c') diff --git a/src/northbridge/intel/sandybridge/raminit_common.c b/src/northbridge/intel/sandybridge/raminit_common.c new file mode 100644 index 0000000000..70bb7aff7f --- /dev/null +++ b/src/northbridge/intel/sandybridge/raminit_common.c @@ -0,0 +1,3395 @@ +/* + * This file is part of the coreboot project. + * + * Copyright (C) 2014 Damien Zammit + * Copyright (C) 2014 Vladimir Serbinenko + * Copyright (C) 2016 Patrick Rudolph + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "raminit_native.h" +#include "raminit_common.h" +#include "sandybridge.h" + +/* FIXME: no ECC support. */ +/* FIXME: no support for 3-channel chipsets. */ + +/* + * Register description: + * Intel provides a command queue of depth four. + * Every command is configured by using multiple registers. + * On executing the command queue you have to provide the depth used. + * + * Known registers: + * Channel X = [0, 1] + * Command queue index Y = [0, 1, 2, 3] + * + * DEFAULT_MCHBAR + 0x4220 + 0x400 * X + 4 * Y: command io register + * Controls the DRAM command signals + * Bit 0: !RAS + * Bit 1: !CAS + * Bit 2: !WE + * + * DEFAULT_MCHBAR + 0x4200 + 0x400 * X + 4 * Y: addr bankslot io register + * Controls the address, bank address and slotrank signals + * Bit 0-15 : Address + * Bit 20-22: Bank Address + * Bit 24-25: slotrank + * + * DEFAULT_MCHBAR + 0x4230 + 0x400 * X + 4 * Y: idle register + * Controls the idle time after issuing this DRAM command + * Bit 16-32: number of clock-cylces to idle + * + * DEFAULT_MCHBAR + 0x4284 + 0x400 * channel: execute command queue + * Starts to execute all queued commands + * Bit 0 : start DRAM command execution + * Bit 16-20: (number of queued commands - 1) * 4 + */ + +static void sfence(void) +{ + asm volatile ("sfence"); +} + +static void toggle_io_reset(void) { + /* toggle IO reset bit */ + u32 r32 = read32(DEFAULT_MCHBAR + 0x5030); + write32(DEFAULT_MCHBAR + 0x5030, r32 | 0x20); + udelay(1); + write32(DEFAULT_MCHBAR + 0x5030, r32 & ~0x20); + udelay(1); +} + +static u32 get_XOVER_CLK(u8 rankmap) +{ + return rankmap << 24; +} + +static u32 get_XOVER_CMD(u8 rankmap) +{ + u32 reg; + + // enable xover cmd + reg = 0x4000; + + // enable xover ctl + if (rankmap & 0x3) + reg |= 0x20000; + + if (rankmap & 0xc) + reg |= 0x4000000; + + return reg; +} + +/* CAS write latency. To be programmed in MR2. + * See DDR3 SPEC for MR2 documentation. */ +u8 get_CWL(u32 tCK) +{ + /* Get CWL based on tCK using the following rule: */ + switch (tCK) { + case TCK_1333MHZ: + return 12; + case TCK_1200MHZ: + case TCK_1100MHZ: + return 11; + case TCK_1066MHZ: + case TCK_1000MHZ: + return 10; + case TCK_933MHZ: + case TCK_900MHZ: + return 9; + case TCK_800MHZ: + case TCK_700MHZ: + return 8; + case TCK_666MHZ: + return 7; + case TCK_533MHZ: + return 6; + default: + return 5; + } +} + +void dram_find_common_params(ramctr_timing *ctrl) +{ + size_t valid_dimms; + int channel, slot; + dimm_info *dimms = &ctrl->info; + + ctrl->cas_supported = (1 << (MAX_CAS - MIN_CAS + 1)) - 1; + valid_dimms = 0; + FOR_ALL_CHANNELS for (slot = 0; slot < 2; slot++) { + const dimm_attr *dimm = &dimms->dimm[channel][slot]; + if (dimm->dram_type != SPD_MEMORY_TYPE_SDRAM_DDR3) + continue; + valid_dimms++; + + /* Find all possible CAS combinations */ + ctrl->cas_supported &= dimm->cas_supported; + + /* Find the smallest common latencies supported by all DIMMs */ + ctrl->tCK = MAX(ctrl->tCK, dimm->tCK); + ctrl->tAA = MAX(ctrl->tAA, dimm->tAA); + ctrl->tWR = MAX(ctrl->tWR, dimm->tWR); + ctrl->tRCD = MAX(ctrl->tRCD, dimm->tRCD); + ctrl->tRRD = MAX(ctrl->tRRD, dimm->tRRD); + ctrl->tRP = MAX(ctrl->tRP, dimm->tRP); + ctrl->tRAS = MAX(ctrl->tRAS, dimm->tRAS); + ctrl->tRFC = MAX(ctrl->tRFC, dimm->tRFC); + ctrl->tWTR = MAX(ctrl->tWTR, dimm->tWTR); + ctrl->tRTP = MAX(ctrl->tRTP, dimm->tRTP); + ctrl->tFAW = MAX(ctrl->tFAW, dimm->tFAW); + } + + if (!ctrl->cas_supported) + die("Unsupported DIMM combination. " + "DIMMS do not support common CAS latency"); + if (!valid_dimms) + die("No valid DIMMs found"); +} + +void dram_xover(ramctr_timing * ctrl) +{ + u32 reg; + int channel; + + FOR_ALL_CHANNELS { + // enable xover clk + reg = get_XOVER_CLK(ctrl->rankmap[channel]); + printram("XOVER CLK [%x] = %x\n", channel * 0x100 + 0xc14, + reg); + MCHBAR32(channel * 0x100 + 0xc14) = reg; + + // enable xover ctl & xover cmd + reg = get_XOVER_CMD(ctrl->rankmap[channel]); + printram("XOVER CMD [%x] = %x\n", 0x100 * channel + 0x320c, + reg); + MCHBAR32(0x100 * channel + 0x320c) = reg; + } +} + +void dram_timing_regs(ramctr_timing * ctrl) +{ + u32 reg, addr, val32, cpu, stretch; + struct cpuid_result cpures; + int channel; + + FOR_ALL_CHANNELS { + // DBP + reg = 0; + reg |= ctrl->tRCD; + reg |= (ctrl->tRP << 4); + reg |= (ctrl->CAS << 8); + reg |= (ctrl->CWL << 12); + reg |= (ctrl->tRAS << 16); + printram("DBP [%x] = %x\n", 0x400 * channel + 0x4000, reg); + MCHBAR32(0x400 * channel + 0x4000) = reg; + + // RAP + reg = 0; + reg |= ctrl->tRRD; + reg |= (ctrl->tRTP << 4); + reg |= (ctrl->tCKE << 8); + reg |= (ctrl->tWTR << 12); + reg |= (ctrl->tFAW << 16); + reg |= (ctrl->tWR << 24); + reg |= (3 << 30); + printram("RAP [%x] = %x\n", 0x400 * channel + 0x4004, reg); + MCHBAR32(0x400 * channel + 0x4004) = reg; + + // OTHP + addr = 0x400 * channel + 0x400c; + reg = 0; + reg |= ctrl->tXPDLL; + reg |= (ctrl->tXP << 5); + reg |= (ctrl->tAONPD << 8); + reg |= 0xa0000; + printram("OTHP [%x] = %x\n", addr, reg); + MCHBAR32(addr) = reg; + + MCHBAR32(0x400 * channel + 0x4014) = 0; + + MCHBAR32(addr) |= 0x00020000; + + // ODT stretch + reg = 0; + + cpures = cpuid(1); + cpu = cpures.eax; + if (IS_IVY_CPU(cpu) + || (IS_SANDY_CPU(cpu) && IS_SANDY_CPU_D2(cpu))) { + stretch = 2; + addr = 0x400 * channel + 0x400c; + printram("ODT stretch [%x] = %x\n", + 0x400 * channel + 0x400c, reg); + reg = MCHBAR32(addr); + + if (((ctrl->rankmap[channel] & 3) == 0) + || (ctrl->rankmap[channel] & 0xc) == 0) { + + // Rank 0 - operate on rank 2 + reg = (reg & ~0xc0000) | (stretch << 18); + + // Rank 2 - operate on rank 0 + reg = (reg & ~0x30000) | (stretch << 16); + + printram("ODT stretch [%x] = %x\n", addr, reg); + MCHBAR32(addr) = reg; + } + + } else if (IS_SANDY_CPU(cpu) && IS_SANDY_CPU_C(cpu)) { + stretch = 3; + addr = 0x400 * channel + 0x401c; + reg = MCHBAR32(addr); + + if (((ctrl->rankmap[channel] & 3) == 0) + || (ctrl->rankmap[channel] & 0xc) == 0) { + + // Rank 0 - operate on rank 2 + reg = (reg & ~0x3000) | (stretch << 12); + + // Rank 2 - operate on rank 0 + reg = (reg & ~0xc00) | (stretch << 10); + + printram("ODT stretch [%x] = %x\n", addr, reg); + MCHBAR32(addr) = reg; + } + } else { + stretch = 0; + } + + // REFI + reg = 0; + val32 = ctrl->tREFI; + reg = (reg & ~0xffff) | val32; + val32 = ctrl->tRFC; + reg = (reg & ~0x1ff0000) | (val32 << 16); + val32 = (u32) (ctrl->tREFI * 9) / 1024; + reg = (reg & ~0xfe000000) | (val32 << 25); + printram("REFI [%x] = %x\n", 0x400 * channel + 0x4298, + reg); + MCHBAR32(0x400 * channel + 0x4298) = reg; + + MCHBAR32(0x400 * channel + 0x4294) |= 0xff; + + // SRFTP + reg = 0; + val32 = tDLLK; + reg = (reg & ~0xfff) | val32; + val32 = ctrl->tXSOffset; + reg = (reg & ~0xf000) | (val32 << 12); + val32 = tDLLK - ctrl->tXSOffset; + reg = (reg & ~0x3ff0000) | (val32 << 16); + val32 = ctrl->tMOD - 8; + reg = (reg & ~0xf0000000) | (val32 << 28); + printram("SRFTP [%x] = %x\n", 0x400 * channel + 0x42a4, + reg); + MCHBAR32(0x400 * channel + 0x42a4) = reg; + } +} + +void dram_dimm_mapping(ramctr_timing *ctrl) +{ + u32 reg, val32; + int channel; + dimm_info *info = &ctrl->info; + + FOR_ALL_CHANNELS { + dimm_attr *dimmA = 0; + dimm_attr *dimmB = 0; + reg = 0; + val32 = 0; + if (info->dimm[channel][0].size_mb >= + info->dimm[channel][1].size_mb) { + // dimm 0 is bigger, set it to dimmA + dimmA = &info->dimm[channel][0]; + dimmB = &info->dimm[channel][1]; + reg |= (0 << 16); + } else { + // dimm 1 is bigger, set it to dimmA + dimmA = &info->dimm[channel][1]; + dimmB = &info->dimm[channel][0]; + reg |= (1 << 16); + } + // dimmA + if (dimmA && (dimmA->ranks > 0)) { + val32 = dimmA->size_mb / 256; + reg = (reg & ~0xff) | val32; + val32 = dimmA->ranks - 1; + reg = (reg & ~0x20000) | (val32 << 17); + val32 = (dimmA->width / 8) - 1; + reg = (reg & ~0x80000) | (val32 << 19); + } + // dimmB + if (dimmB && (dimmB->ranks > 0)) { + val32 = dimmB->size_mb / 256; + reg = (reg & ~0xff00) | (val32 << 8); + val32 = dimmB->ranks - 1; + reg = (reg & ~0x40000) | (val32 << 18); + val32 = (dimmB->width / 8) - 1; + reg = (reg & ~0x100000) | (val32 << 20); + } + reg = (reg & ~0x200000) | (1 << 21); // rank interleave + reg = (reg & ~0x400000) | (1 << 22); // enhanced interleave + + // Save MAD-DIMM register + if ((dimmA && (dimmA->ranks > 0)) + || (dimmB && (dimmB->ranks > 0))) { + ctrl->mad_dimm[channel] = reg; + } else { + ctrl->mad_dimm[channel] = 0; + } + } +} + +void dram_dimm_set_mapping(ramctr_timing * ctrl) +{ + int channel; + FOR_ALL_CHANNELS { + MCHBAR32(0x5004 + channel * 4) = ctrl->mad_dimm[channel]; + } +} + +void dram_zones(ramctr_timing * ctrl, int training) +{ + u32 reg, ch0size, ch1size; + u8 val; + reg = 0; + val = 0; + if (training) { + ch0size = ctrl->channel_size_mb[0] ? 256 : 0; + ch1size = ctrl->channel_size_mb[1] ? 256 : 0; + } else { + ch0size = ctrl->channel_size_mb[0]; + ch1size = ctrl->channel_size_mb[1]; + } + + if (ch0size >= ch1size) { + reg = MCHBAR32(0x5014); + val = ch1size / 256; + reg = (reg & ~0xff000000) | val << 24; + reg = (reg & ~0xff0000) | (2 * val) << 16; + MCHBAR32(0x5014) = reg; + MCHBAR32(0x5000) = 0x24; + } else { + reg = MCHBAR32(0x5014); + val = ch0size / 256; + reg = (reg & ~0xff000000) | val << 24; + reg = (reg & ~0xff0000) | (2 * val) << 16; + MCHBAR32(0x5014) = reg; + MCHBAR32(0x5000) = 0x21; + } +} + +#define HOST_BRIDGE PCI_DEVFN(0, 0) +#define DEFAULT_TCK TCK_800MHZ + +unsigned int get_mem_min_tck(void) +{ + u32 reg32; + u8 rev; + const struct device *dev; + const struct northbridge_intel_sandybridge_config *cfg = NULL; + + dev = dev_find_slot(0, HOST_BRIDGE); + if (dev) + cfg = dev->chip_info; + + /* If this is zero, it just means devicetree.cb didn't set it */ + if (!cfg || cfg->max_mem_clock_mhz == 0) { + rev = pci_read_config8(PCI_DEV(0, 0, 0), PCI_DEVICE_ID); + + if ((rev & BASE_REV_MASK) == BASE_REV_SNB) { + /* read Capabilities A Register DMFC bits */ + reg32 = pci_read_config32(PCI_DEV(0, 0, 0), CAPID0_A); + reg32 &= 0x7; + + switch (reg32) { + case 7: return TCK_533MHZ; + case 6: return TCK_666MHZ; + case 5: return TCK_800MHZ; + /* reserved: */ + default: + break; + } + } else { + /* read Capabilities B Register DMFC bits */ + reg32 = pci_read_config32(PCI_DEV(0, 0, 0), CAPID0_B); + reg32 = (reg32 >> 4) & 0x7; + + switch (reg32) { + case 7: return TCK_533MHZ; + case 6: return TCK_666MHZ; + case 5: return TCK_800MHZ; + case 4: return TCK_933MHZ; + case 3: return TCK_1066MHZ; + case 2: return TCK_1200MHZ; + case 1: return TCK_1333MHZ; + /* reserved: */ + default: + break; + } + } + return DEFAULT_TCK; + } else { + if (cfg->max_mem_clock_mhz >= 1066) + return TCK_1066MHZ; + else if (cfg->max_mem_clock_mhz >= 933) + return TCK_933MHZ; + else if (cfg->max_mem_clock_mhz >= 800) + return TCK_800MHZ; + else if (cfg->max_mem_clock_mhz >= 666) + return TCK_666MHZ; + else if (cfg->max_mem_clock_mhz >= 533) + return TCK_533MHZ; + else + return TCK_400MHZ; + } +} + +#define DEFAULT_PCI_MMIO_SIZE 2048 + +static unsigned int get_mmio_size(void) +{ + const struct device *dev; + const struct northbridge_intel_sandybridge_config *cfg = NULL; + + dev = dev_find_slot(0, HOST_BRIDGE); + if (dev) + cfg = dev->chip_info; + + /* If this is zero, it just means devicetree.cb didn't set it */ + if (!cfg || cfg->pci_mmio_size == 0) + return DEFAULT_PCI_MMIO_SIZE; + else + return cfg->pci_mmio_size; +} + +void dram_memorymap(ramctr_timing * ctrl, int me_uma_size) +{ + u32 reg, val, reclaim; + u32 tom, gfxstolen, gttsize; + size_t tsegsize, mmiosize, toludbase, touudbase, gfxstolenbase, gttbase, + tsegbase, mestolenbase; + size_t tsegbasedelta, remapbase, remaplimit; + uint16_t ggc; + + mmiosize = get_mmio_size(); + + ggc = pci_read_config16(NORTHBRIDGE, GGC); + if (!(ggc & 2)) { + gfxstolen = ((ggc >> 3) & 0x1f) * 32; + gttsize = ((ggc >> 8) & 0x3); + } else { + gfxstolen = 0; + gttsize = 0; + } + + tsegsize = CONFIG_SMM_TSEG_SIZE >> 20; + + tom = ctrl->channel_size_mb[0] + ctrl->channel_size_mb[1]; + + mestolenbase = tom - me_uma_size; + + toludbase = MIN(4096 - mmiosize + gfxstolen + gttsize + tsegsize, + tom - me_uma_size); + gfxstolenbase = toludbase - gfxstolen; + gttbase = gfxstolenbase - gttsize; + + tsegbase = gttbase - tsegsize; + + // Round tsegbase down to nearest address aligned to tsegsize + tsegbasedelta = tsegbase & (tsegsize - 1); + tsegbase &= ~(tsegsize - 1); + + gttbase -= tsegbasedelta; + gfxstolenbase -= tsegbasedelta; + toludbase -= tsegbasedelta; + + // Test if it is possible to reclaim a hole in the RAM addressing + if (tom - me_uma_size > toludbase) { + // Reclaim is possible + reclaim = 1; + remapbase = MAX(4096, tom - me_uma_size); + remaplimit = + remapbase + MIN(4096, tom - me_uma_size) - toludbase - 1; + touudbase = remaplimit + 1; + } else { + // Reclaim not possible + reclaim = 0; + touudbase = tom - me_uma_size; + } + + // Update memory map in pci-e configuration space + printk(BIOS_DEBUG, "Update PCI-E configuration space:\n"); + + // TOM (top of memory) + reg = pcie_read_config32(PCI_DEV(0, 0, 0), 0xa0); + val = tom & 0xfff; + reg = (reg & ~0xfff00000) | (val << 20); + printk(BIOS_DEBUG, "PCI(0, 0, 0)[%x] = %x\n", 0xa0, reg); + pcie_write_config32(PCI_DEV(0, 0, 0), 0xa0, reg); + + reg = pcie_read_config32(PCI_DEV(0, 0, 0), 0xa4); + val = tom & 0xfffff000; + reg = (reg & ~0x000fffff) | (val >> 12); + printk(BIOS_DEBUG, "PCI(0, 0, 0)[%x] = %x\n", 0xa4, reg); + pcie_write_config32(PCI_DEV(0, 0, 0), 0xa4, reg); + + // TOLUD (top of low used dram) + reg = pcie_read_config32(PCI_DEV(0, 0, 0), 0xbc); + val = toludbase & 0xfff; + reg = (reg & ~0xfff00000) | (val << 20); + printk(BIOS_DEBUG, "PCI(0, 0, 0)[%x] = %x\n", 0xbc, reg); + pcie_write_config32(PCI_DEV(0, 0, 0), 0xbc, reg); + + // TOUUD LSB (top of upper usable dram) + reg = pcie_read_config32(PCI_DEV(0, 0, 0), 0xa8); + val = touudbase & 0xfff; + reg = (reg & ~0xfff00000) | (val << 20); + printk(BIOS_DEBUG, "PCI(0, 0, 0)[%x] = %x\n", 0xa8, reg); + pcie_write_config32(PCI_DEV(0, 0, 0), 0xa8, reg); + + // TOUUD MSB + reg = pcie_read_config32(PCI_DEV(0, 0, 0), 0xac); + val = touudbase & 0xfffff000; + reg = (reg & ~0x000fffff) | (val >> 12); + printk(BIOS_DEBUG, "PCI(0, 0, 0)[%x] = %x\n", 0xac, reg); + pcie_write_config32(PCI_DEV(0, 0, 0), 0xac, reg); + + if (reclaim) { + // REMAP BASE + pcie_write_config32(PCI_DEV(0, 0, 0), 0x90, remapbase << 20); + pcie_write_config32(PCI_DEV(0, 0, 0), 0x94, remapbase >> 12); + + // REMAP LIMIT + pcie_write_config32(PCI_DEV(0, 0, 0), 0x98, remaplimit << 20); + pcie_write_config32(PCI_DEV(0, 0, 0), 0x9c, remaplimit >> 12); + } + // TSEG + reg = pcie_read_config32(PCI_DEV(0, 0, 0), 0xb8); + val = tsegbase & 0xfff; + reg = (reg & ~0xfff00000) | (val << 20); + printk(BIOS_DEBUG, "PCI(0, 0, 0)[%x] = %x\n", 0xb8, reg); + pcie_write_config32(PCI_DEV(0, 0, 0), 0xb8, reg); + + // GFX stolen memory + reg = pcie_read_config32(PCI_DEV(0, 0, 0), 0xb0); + val = gfxstolenbase & 0xfff; + reg = (reg & ~0xfff00000) | (val << 20); + printk(BIOS_DEBUG, "PCI(0, 0, 0)[%x] = %x\n", 0xb0, reg); + pcie_write_config32(PCI_DEV(0, 0, 0), 0xb0, reg); + + // GTT stolen memory + reg = pcie_read_config32(PCI_DEV(0, 0, 0), 0xb4); + val = gttbase & 0xfff; + reg = (reg & ~0xfff00000) | (val << 20); + printk(BIOS_DEBUG, "PCI(0, 0, 0)[%x] = %x\n", 0xb4, reg); + pcie_write_config32(PCI_DEV(0, 0, 0), 0xb4, reg); + + if (me_uma_size) { + reg = pcie_read_config32(PCI_DEV(0, 0, 0), 0x7c); + val = (0x80000 - me_uma_size) & 0xfffff000; + reg = (reg & ~0x000fffff) | (val >> 12); + printk(BIOS_DEBUG, "PCI(0, 0, 0)[%x] = %x\n", 0x7c, reg); + pcie_write_config32(PCI_DEV(0, 0, 0), 0x7c, reg); + + // ME base + reg = pcie_read_config32(PCI_DEV(0, 0, 0), 0x70); + val = mestolenbase & 0xfff; + reg = (reg & ~0xfff00000) | (val << 20); + printk(BIOS_DEBUG, "PCI(0, 0, 0)[%x] = %x\n", 0x70, reg); + pcie_write_config32(PCI_DEV(0, 0, 0), 0x70, reg); + + reg = pcie_read_config32(PCI_DEV(0, 0, 0), 0x74); + val = mestolenbase & 0xfffff000; + reg = (reg & ~0x000fffff) | (val >> 12); + printk(BIOS_DEBUG, "PCI(0, 0, 0)[%x] = %x\n", 0x74, reg); + pcie_write_config32(PCI_DEV(0, 0, 0), 0x74, reg); + + // ME mask + reg = pcie_read_config32(PCI_DEV(0, 0, 0), 0x78); + val = (0x80000 - me_uma_size) & 0xfff; + reg = (reg & ~0xfff00000) | (val << 20); + reg = (reg & ~0x400) | (1 << 10); // set lockbit on ME mem + + reg = (reg & ~0x800) | (1 << 11); // set ME memory enable + printk(BIOS_DEBUG, "PCI(0, 0, 0)[%x] = %x\n", 0x78, reg); + pcie_write_config32(PCI_DEV(0, 0, 0), 0x78, reg); + } +} + +static void wait_428c(int channel) +{ + while (1) { + if (read32(DEFAULT_MCHBAR + 0x428c + (channel << 10)) & 0x50) + return; + } +} + +static void write_reset(ramctr_timing * ctrl) +{ + int channel, slotrank; + + /* choose a populated channel. */ + channel = (ctrl->rankmap[0]) ? 0 : 1; + + wait_428c(channel); + + /* choose a populated rank. */ + slotrank = (ctrl->rankmap[channel] & 1) ? 0 : 2; + + /* DRAM command ZQCS */ + write32(DEFAULT_MCHBAR + 0x4220 + 0x400 * channel, 0x0f003); + write32(DEFAULT_MCHBAR + 0x4230 + 0x400 * channel, 0x80c01); + + write32(DEFAULT_MCHBAR + 0x4200 + 0x400 * channel, + (slotrank << 24) | 0x60000); + + write32(DEFAULT_MCHBAR + 0x4210 + 0x400 * channel, 0); + + write32(DEFAULT_MCHBAR + 0x4284 + 0x400 * channel, 0x400001); + wait_428c(channel); +} + +void dram_jedecreset(ramctr_timing * ctrl) +{ + u32 reg, addr; + int channel; + + while (!(MCHBAR32(0x5084) & 0x10000)); + do { + reg = MCHBAR32(0x428c); + } while ((reg & 0x14) == 0); + + // Set state of memory controller + reg = 0x112; + MCHBAR32(0x5030) = reg; + MCHBAR32(0x4ea0) = 0; + reg |= 2; //ddr reset + MCHBAR32(0x5030) = reg; + + // Assert dimm reset signal + reg = MCHBAR32(0x5030); + reg &= ~0x2; + MCHBAR32(0x5030) = reg; + + // Wait 200us + udelay(200); + + // Deassert dimm reset signal + MCHBAR32(0x5030) |= 2; + + // Wait 500us + udelay(500); + + // Enable DCLK + MCHBAR32(0x5030) |= 4; + + // XXX Wait 20ns + udelay(1); + + FOR_ALL_CHANNELS { + // Set valid rank CKE + reg = 0; + reg = (reg & ~0xf) | ctrl->rankmap[channel]; + addr = 0x400 * channel + 0x42a0; + MCHBAR32(addr) = reg; + + // Wait 10ns for ranks to settle + //udelay(0.01); + + reg = (reg & ~0xf0) | (ctrl->rankmap[channel] << 4); + MCHBAR32(addr) = reg; + + // Write reset using a NOP + write_reset(ctrl); + } +} + +static odtmap get_ODT(ramctr_timing *ctrl, u8 rank, int channel) +{ + /* Get ODT based on rankmap: */ + int dimms_per_ch = (ctrl->rankmap[channel] & 1) + + ((ctrl->rankmap[channel] >> 2) & 1); + + if (dimms_per_ch == 1) { + return (const odtmap){60, 60}; + } else { + return (const odtmap){120, 30}; + } +} + +static void write_mrreg(ramctr_timing *ctrl, int channel, int slotrank, + int reg, u32 val) +{ + wait_428c(channel); + + if (ctrl->rank_mirror[channel][slotrank]) { + /* DDR3 Rank1 Address mirror + * swap the following pins: + * A3<->A4, A5<->A6, A7<->A8, BA0<->BA1 */ + reg = ((reg >> 1) & 1) | ((reg << 1) & 2); + val = (val & ~0x1f8) | ((val >> 1) & 0xa8) + | ((val & 0xa8) << 1); + } + + /* DRAM command MRS */ + write32(DEFAULT_MCHBAR + 0x4220 + 0x400 * channel, 0x0f000); + write32(DEFAULT_MCHBAR + 0x4230 + 0x400 * channel, 0x41001); + write32(DEFAULT_MCHBAR + 0x4200 + 0x400 * channel, + (slotrank << 24) | (reg << 20) | val | 0x60000); + write32(DEFAULT_MCHBAR + 0x4210 + 0x400 * channel, 0); + + /* DRAM command MRS */ + write32(DEFAULT_MCHBAR + 0x4224 + 0x400 * channel, 0x1f000); + write32(DEFAULT_MCHBAR + 0x4234 + 0x400 * channel, 0x41001); + write32(DEFAULT_MCHBAR + 0x4204 + 0x400 * channel, + (slotrank << 24) | (reg << 20) | val | 0x60000); + write32(DEFAULT_MCHBAR + 0x4214 + 0x400 * channel, 0); + + /* DRAM command MRS */ + write32(DEFAULT_MCHBAR + 0x4228 + 0x400 * channel, 0x0f000); + write32(DEFAULT_MCHBAR + 0x4238 + 0x400 * channel, + 0x1001 | (ctrl->tMOD << 16)); + write32(DEFAULT_MCHBAR + 0x4208 + 0x400 * channel, + (slotrank << 24) | (reg << 20) | val | 0x60000); + write32(DEFAULT_MCHBAR + 0x4218 + 0x400 * channel, 0); + write32(DEFAULT_MCHBAR + 0x4284 + 0x400 * channel, 0x80001); +} + +static u32 make_mr0(ramctr_timing * ctrl, u8 rank) +{ + u16 mr0reg, mch_cas, mch_wr; + static const u8 mch_wr_t[12] = { 1, 2, 3, 4, 0, 5, 0, 6, 0, 7, 0, 0 }; + + /* DLL Reset - self clearing - set after CLK frequency has been changed */ + mr0reg = 0x100; + + // Convert CAS to MCH register friendly + if (ctrl->CAS < 12) { + mch_cas = (u16) ((ctrl->CAS - 4) << 1); + } else { + mch_cas = (u16) (ctrl->CAS - 12); + mch_cas = ((mch_cas << 1) | 0x1); + } + + // Convert tWR to MCH register friendly + mch_wr = mch_wr_t[ctrl->tWR - 5]; + + mr0reg = (mr0reg & ~0x4) | ((mch_cas & 0x1) << 2); + mr0reg = (mr0reg & ~0x70) | ((mch_cas & 0xe) << 3); + mr0reg = (mr0reg & ~0xe00) | (mch_wr << 9); + + // Precharge PD - Fast (desktop) 0x1 or slow (mobile) 0x0 - mostly power-saving feature + mr0reg = (mr0reg & ~0x1000) | (!ctrl->mobile << 12); + return mr0reg; +} + +static void dram_mr0(ramctr_timing *ctrl, u8 rank, int channel) +{ + write_mrreg(ctrl, channel, rank, 0, + make_mr0(ctrl, rank)); +} + +static u32 encode_odt(u32 odt) +{ + switch (odt) { + case 30: + return (1 << 9) | (1 << 2); // RZQ/8, RZQ/4 + case 60: + return (1 << 2); // RZQ/4 + case 120: + return (1 << 6); // RZQ/2 + default: + case 0: + return 0; + } +} + +static u32 make_mr1(ramctr_timing *ctrl, u8 rank, int channel) +{ + odtmap odt; + u32 mr1reg; + + odt = get_ODT(ctrl, rank, channel); + mr1reg = 0x2; + + mr1reg |= encode_odt(odt.rttnom); + + return mr1reg; +} + +static void dram_mr1(ramctr_timing *ctrl, u8 rank, int channel) +{ + u16 mr1reg; + + mr1reg = make_mr1(ctrl, rank, channel); + + write_mrreg(ctrl, channel, rank, 1, mr1reg); +} + +static void dram_mr2(ramctr_timing *ctrl, u8 rank, int channel) +{ + u16 pasr, cwl, mr2reg; + odtmap odt; + int srt; + + pasr = 0; + cwl = ctrl->CWL - 5; + odt = get_ODT(ctrl, rank, channel); + + srt = ctrl->extended_temperature_range && !ctrl->auto_self_refresh; + + mr2reg = 0; + mr2reg = (mr2reg & ~0x7) | pasr; + mr2reg = (mr2reg & ~0x38) | (cwl << 3); + mr2reg = (mr2reg & ~0x40) | (ctrl->auto_self_refresh << 6); + mr2reg = (mr2reg & ~0x80) | (srt << 7); + mr2reg |= (odt.rttwr / 60) << 9; + + write_mrreg(ctrl, channel, rank, 2, mr2reg); +} + +static void dram_mr3(ramctr_timing *ctrl, u8 rank, int channel) +{ + write_mrreg(ctrl, channel, rank, 3, 0); +} + +void dram_mrscommands(ramctr_timing * ctrl) +{ + u8 slotrank; + u32 reg, addr; + int channel; + + FOR_ALL_POPULATED_CHANNELS { + FOR_ALL_POPULATED_RANKS { + // MR2 + dram_mr2(ctrl, slotrank, channel); + + // MR3 + dram_mr3(ctrl, slotrank, channel); + + // MR1 + dram_mr1(ctrl, slotrank, channel); + + // MR0 + dram_mr0(ctrl, slotrank, channel); + } + } + + /* DRAM command NOP */ + write32(DEFAULT_MCHBAR + 0x4e20, 0x7); + write32(DEFAULT_MCHBAR + 0x4e30, 0xf1001); + write32(DEFAULT_MCHBAR + 0x4e00, 0x60002); + write32(DEFAULT_MCHBAR + 0x4e10, 0); + + /* DRAM command ZQCL */ + write32(DEFAULT_MCHBAR + 0x4e24, 0x1f003); + write32(DEFAULT_MCHBAR + 0x4e34, 0x1901001); + write32(DEFAULT_MCHBAR + 0x4e04, 0x60400); + write32(DEFAULT_MCHBAR + 0x4e14, 0x288); + + /* execute command queue on all channels ? */ + write32(DEFAULT_MCHBAR + 0x4e84, 0x40004); + + // Drain + FOR_ALL_CHANNELS { + // Wait for ref drained + wait_428c(channel); + } + + // Refresh enable + MCHBAR32(0x5030) |= 8; + + FOR_ALL_POPULATED_CHANNELS { + addr = 0x400 * channel + 0x4020; + reg = MCHBAR32(addr); + reg &= ~0x200000; + MCHBAR32(addr) = reg; + + wait_428c(channel); + + slotrank = (ctrl->rankmap[channel] & 1) ? 0 : 2; + + // Drain + wait_428c(channel); + + /* DRAM command ZQCS */ + write32(DEFAULT_MCHBAR + 0x4220 + 0x400 * channel, 0x0f003); + write32(DEFAULT_MCHBAR + 0x4230 + 0x400 * channel, 0x659001); + write32(DEFAULT_MCHBAR + 0x4200 + 0x400 * channel, + (slotrank << 24) | 0x60000); + write32(DEFAULT_MCHBAR + 0x4210 + 0x400 * channel, 0x3e0); + write32(DEFAULT_MCHBAR + 0x4284 + 0x400 * channel, 0x1); + + // Drain + wait_428c(channel); + } +} + +static const u32 lane_registers[] = { + 0x0000, 0x0200, 0x0400, 0x0600, + 0x1000, 0x1200, 0x1400, 0x1600, + 0x0800 +}; + +void program_timings(ramctr_timing * ctrl, int channel) +{ + u32 reg32, reg_4024, reg_c14, reg_c18, reg_4028; + int lane; + int slotrank, slot; + int full_shift = 0; + u16 slot320c[NUM_SLOTS]; + + FOR_ALL_POPULATED_RANKS { + if (full_shift < -ctrl->timings[channel][slotrank].val_320c) + full_shift = -ctrl->timings[channel][slotrank].val_320c; + } + + for (slot = 0; slot < NUM_SLOTS; slot++) + switch ((ctrl->rankmap[channel] >> (2 * slot)) & 3) { + case 0: + default: + slot320c[slot] = 0x7f; + break; + case 1: + slot320c[slot] = + ctrl->timings[channel][2 * slot + 0].val_320c + + full_shift; + break; + case 2: + slot320c[slot] = + ctrl->timings[channel][2 * slot + 1].val_320c + + full_shift; + break; + case 3: + slot320c[slot] = + (ctrl->timings[channel][2 * slot].val_320c + + ctrl->timings[channel][2 * slot + + 1].val_320c) / 2 + + full_shift; + break; + } + + /* enable CMD XOVER */ + reg32 = get_XOVER_CMD(ctrl->rankmap[channel]); + reg32 |= ((slot320c[0] & 0x3f) << 6) | ((slot320c[0] & 0x40) << 9); + reg32 |= (slot320c[1] & 0x7f) << 18; + reg32 |= (full_shift & 0x3f) | ((full_shift & 0x40) << 6); + + MCHBAR32(0x320c + 0x100 * channel) = reg32; + + /* enable CLK XOVER */ + reg_c14 = get_XOVER_CLK(ctrl->rankmap[channel]); + reg_c18 = 0; + + FOR_ALL_POPULATED_RANKS { + int shift = + ctrl->timings[channel][slotrank].val_320c + full_shift; + int offset_val_c14; + if (shift < 0) + shift = 0; + offset_val_c14 = ctrl->reg_c14_offset + shift; + /* set CLK phase shift */ + reg_c14 |= (offset_val_c14 & 0x3f) << (6 * slotrank); + reg_c18 |= ((offset_val_c14 >> 6) & 1) << slotrank; + } + + MCHBAR32(0xc14 + channel * 0x100) = reg_c14; + MCHBAR32(0xc18 + channel * 0x100) = reg_c18; + + reg_4028 = MCHBAR32(0x4028 + 0x400 * channel); + reg_4028 &= 0xffff0000; + + reg_4024 = 0; + + FOR_ALL_POPULATED_RANKS { + int post_timA_min_high = 7, post_timA_max_high = 0; + int pre_timA_min_high = 7, pre_timA_max_high = 0; + int shift_402x = 0; + int shift = + ctrl->timings[channel][slotrank].val_320c + full_shift; + + if (shift < 0) + shift = 0; + + FOR_ALL_LANES { + if (post_timA_min_high > + ((ctrl->timings[channel][slotrank].lanes[lane]. + timA + shift) >> 6)) + post_timA_min_high = + ((ctrl->timings[channel][slotrank]. + lanes[lane].timA + shift) >> 6); + if (pre_timA_min_high > + (ctrl->timings[channel][slotrank].lanes[lane]. + timA >> 6)) + pre_timA_min_high = + (ctrl->timings[channel][slotrank]. + lanes[lane].timA >> 6); + if (post_timA_max_high < + ((ctrl->timings[channel][slotrank].lanes[lane]. + timA + shift) >> 6)) + post_timA_max_high = + ((ctrl->timings[channel][slotrank]. + lanes[lane].timA + shift) >> 6); + if (pre_timA_max_high < + (ctrl->timings[channel][slotrank].lanes[lane]. + timA >> 6)) + pre_timA_max_high = + (ctrl->timings[channel][slotrank]. + lanes[lane].timA >> 6); + } + + if (pre_timA_max_high - pre_timA_min_high < + post_timA_max_high - post_timA_min_high) + shift_402x = +1; + else if (pre_timA_max_high - pre_timA_min_high > + post_timA_max_high - post_timA_min_high) + shift_402x = -1; + + reg_4028 |= + (ctrl->timings[channel][slotrank].val_4028 + shift_402x - + post_timA_min_high) << (4 * slotrank); + reg_4024 |= + (ctrl->timings[channel][slotrank].val_4024 + + shift_402x) << (8 * slotrank); + + FOR_ALL_LANES { + MCHBAR32(lane_registers[lane] + 0x10 + 0x100 * channel + + 4 * slotrank) + = + (((ctrl->timings[channel][slotrank].lanes[lane]. + timA + shift) & 0x3f) + | + ((ctrl->timings[channel][slotrank].lanes[lane]. + rising + shift) << 8) + | + (((ctrl->timings[channel][slotrank].lanes[lane]. + timA + shift - + (post_timA_min_high << 6)) & 0x1c0) << 10) + | ((ctrl->timings[channel][slotrank].lanes[lane]. + falling + shift) << 20)); + + MCHBAR32(lane_registers[lane] + 0x20 + 0x100 * channel + + 4 * slotrank) + = + (((ctrl->timings[channel][slotrank].lanes[lane]. + timC + shift) & 0x3f) + | + (((ctrl->timings[channel][slotrank].lanes[lane]. + timB + shift) & 0x3f) << 8) + | + (((ctrl->timings[channel][slotrank].lanes[lane]. + timB + shift) & 0x1c0) << 9) + | + (((ctrl->timings[channel][slotrank].lanes[lane]. + timC + shift) & 0x40) << 13)); + } + } + MCHBAR32(0x4024 + 0x400 * channel) = reg_4024; + MCHBAR32(0x4028 + 0x400 * channel) = reg_4028; +} + +static void test_timA(ramctr_timing * ctrl, int channel, int slotrank) +{ + wait_428c(channel); + + /* DRAM command MRS + * write MR3 MPR enable + * in this mode only RD and RDA are allowed + * all reads return a predefined pattern */ + write32(DEFAULT_MCHBAR + 0x4220 + 0x400 * channel, 0x1f000); + write32(DEFAULT_MCHBAR + 0x4230 + 0x400 * channel, + (0xc01 | (ctrl->tMOD << 16))); + write32(DEFAULT_MCHBAR + 0x4200 + 0x400 * channel, + (slotrank << 24) | 0x360004); + write32(DEFAULT_MCHBAR + 0x4210 + 0x400 * channel, 0); + + /* DRAM command RD */ + write32(DEFAULT_MCHBAR + 0x4224 + 0x400 * channel, 0x1f105); + write32(DEFAULT_MCHBAR + 0x4234 + 0x400 * channel, 0x4040c01); + write32(DEFAULT_MCHBAR + 0x4204 + 0x400 * channel, (slotrank << 24)); + write32(DEFAULT_MCHBAR + 0x4214 + 0x400 * channel, 0); + + /* DRAM command RD */ + write32(DEFAULT_MCHBAR + 0x4228 + 0x400 * channel, 0x1f105); + write32(DEFAULT_MCHBAR + 0x4238 + 0x400 * channel, + 0x100f | ((ctrl->CAS + 36) << 16)); + write32(DEFAULT_MCHBAR + 0x4208 + 0x400 * channel, + (slotrank << 24) | 0x60000); + write32(DEFAULT_MCHBAR + 0x4218 + 0x400 * channel, 0); + + /* DRAM command MRS + * write MR3 MPR disable */ + write32(DEFAULT_MCHBAR + 0x422c + 0x400 * channel, 0x1f000); + write32(DEFAULT_MCHBAR + 0x423c + 0x400 * channel, + (0xc01 | (ctrl->tMOD << 16))); + write32(DEFAULT_MCHBAR + 0x420c + 0x400 * channel, + (slotrank << 24) | 0x360000); + write32(DEFAULT_MCHBAR + 0x421c + 0x400 * channel, 0); + + write32(DEFAULT_MCHBAR + 0x4284 + 0x400 * channel, 0xc0001); + + wait_428c(channel); +} + +static int does_lane_work(ramctr_timing * ctrl, int channel, int slotrank, + int lane) +{ + u32 timA = ctrl->timings[channel][slotrank].lanes[lane].timA; + return ((read32 + (DEFAULT_MCHBAR + lane_registers[lane] + channel * 0x100 + 4 + + ((timA / 32) & 1) * 4) + >> (timA % 32)) & 1); +} + +struct run { + int middle; + int end; + int start; + int all; + int length; +}; + +static struct run get_longest_zero_run(int *seq, int sz) +{ + int i, ls; + int bl = 0, bs = 0; + struct run ret; + + ls = 0; + for (i = 0; i < 2 * sz; i++) + if (seq[i % sz]) { + if (i - ls > bl) { + bl = i - ls; + bs = ls; + } + ls = i + 1; + } + if (bl == 0) { + ret.middle = sz / 2; + ret.start = 0; + ret.end = sz; + ret.all = 1; + return ret; + } + + ret.start = bs % sz; + ret.end = (bs + bl - 1) % sz; + ret.middle = (bs + (bl - 1) / 2) % sz; + ret.length = bl; + ret.all = 0; + + return ret; +} + +static void discover_timA_coarse(ramctr_timing * ctrl, int channel, + int slotrank, int *upperA) +{ + int timA; + int statistics[NUM_LANES][128]; + int lane; + + for (timA = 0; timA < 128; timA++) { + FOR_ALL_LANES { + ctrl->timings[channel][slotrank].lanes[lane].timA = timA; + } + program_timings(ctrl, channel); + + test_timA(ctrl, channel, slotrank); + + FOR_ALL_LANES { + statistics[lane][timA] = + !does_lane_work(ctrl, channel, slotrank, lane); + printram("Astat: %d, %d, %d: %x, %x\n", + channel, slotrank, lane, timA, + statistics[lane][timA]); + } + } + FOR_ALL_LANES { + struct run rn = get_longest_zero_run(statistics[lane], 128); + ctrl->timings[channel][slotrank].lanes[lane].timA = rn.middle; + upperA[lane] = rn.end; + if (upperA[lane] < rn.middle) + upperA[lane] += 128; + printram("Aval: %d, %d, %d: %x\n", channel, slotrank, + lane, ctrl->timings[channel][slotrank].lanes[lane].timA); + printram("Aend: %d, %d, %d: %x\n", channel, slotrank, + lane, upperA[lane]); + } +} + +static void discover_timA_fine(ramctr_timing * ctrl, int channel, int slotrank, + int *upperA) +{ + int timA_delta; + int statistics[NUM_LANES][51]; + int lane, i; + + memset(statistics, 0, sizeof(statistics)); + + for (timA_delta = -25; timA_delta <= 25; timA_delta++) { + FOR_ALL_LANES ctrl->timings[channel][slotrank].lanes[lane]. + timA = upperA[lane] + timA_delta + 0x40; + program_timings(ctrl, channel); + + for (i = 0; i < 100; i++) { + test_timA(ctrl, channel, slotrank); + FOR_ALL_LANES { + statistics[lane][timA_delta + 25] += + does_lane_work(ctrl, channel, slotrank, + lane); + } + } + } + FOR_ALL_LANES { + int last_zero, first_all; + + for (last_zero = -25; last_zero <= 25; last_zero++) + if (statistics[lane][last_zero + 25]) + break; + last_zero--; + for (first_all = -25; first_all <= 25; first_all++) + if (statistics[lane][first_all + 25] == 100) + break; + + printram("lane %d: %d, %d\n", lane, last_zero, + first_all); + + ctrl->timings[channel][slotrank].lanes[lane].timA = + (last_zero + first_all) / 2 + upperA[lane]; + printram("Aval: %d, %d, %d: %x\n", channel, slotrank, + lane, ctrl->timings[channel][slotrank].lanes[lane].timA); + } +} + +static int discover_402x(ramctr_timing *ctrl, int channel, int slotrank, + int *upperA) +{ + int works[NUM_LANES]; + int lane; + while (1) { + int all_works = 1, some_works = 0; + program_timings(ctrl, channel); + test_timA(ctrl, channel, slotrank); + FOR_ALL_LANES { + works[lane] = + !does_lane_work(ctrl, channel, slotrank, lane); + if (works[lane]) + some_works = 1; + else + all_works = 0; + } + if (all_works) + return 0; + if (!some_works) { + if (ctrl->timings[channel][slotrank].val_4024 < 2) { + printk(BIOS_EMERG, "402x discovery failed (1): %d, %d\n", + channel, slotrank); + return MAKE_ERR; + } + ctrl->timings[channel][slotrank].val_4024 -= 2; + printram("4024 -= 2;\n"); + continue; + } + ctrl->timings[channel][slotrank].val_4028 += 2; + printram("4028 += 2;\n"); + if (ctrl->timings[channel][slotrank].val_4028 >= 0x10) { + printk(BIOS_EMERG, "402x discovery failed (2): %d, %d\n", + channel, slotrank); + return MAKE_ERR; + } + FOR_ALL_LANES if (works[lane]) { + ctrl->timings[channel][slotrank].lanes[lane].timA += + 128; + upperA[lane] += 128; + printram("increment %d, %d, %d\n", channel, + slotrank, lane); + } + } + return 0; +} + +struct timA_minmax { + int timA_min_high, timA_max_high; +}; + +static void pre_timA_change(ramctr_timing * ctrl, int channel, int slotrank, + struct timA_minmax *mnmx) +{ + int lane; + mnmx->timA_min_high = 7; + mnmx->timA_max_high = 0; + + FOR_ALL_LANES { + if (mnmx->timA_min_high > + (ctrl->timings[channel][slotrank].lanes[lane].timA >> 6)) + mnmx->timA_min_high = + (ctrl->timings[channel][slotrank].lanes[lane]. + timA >> 6); + if (mnmx->timA_max_high < + (ctrl->timings[channel][slotrank].lanes[lane].timA >> 6)) + mnmx->timA_max_high = + (ctrl->timings[channel][slotrank].lanes[lane]. + timA >> 6); + } +} + +static void post_timA_change(ramctr_timing * ctrl, int channel, int slotrank, + struct timA_minmax *mnmx) +{ + struct timA_minmax post; + int shift_402x = 0; + + /* Get changed maxima. */ + pre_timA_change(ctrl, channel, slotrank, &post); + + if (mnmx->timA_max_high - mnmx->timA_min_high < + post.timA_max_high - post.timA_min_high) + shift_402x = +1; + else if (mnmx->timA_max_high - mnmx->timA_min_high > + post.timA_max_high - post.timA_min_high) + shift_402x = -1; + else + shift_402x = 0; + + ctrl->timings[channel][slotrank].val_4028 += shift_402x; + ctrl->timings[channel][slotrank].val_4024 += shift_402x; + printram("4024 += %d;\n", shift_402x); + printram("4028 += %d;\n", shift_402x); +} + +/* Compensate the skew between DQS and DQs. + * To ease PCB design a small skew between Data Strobe signals and + * Data Signals is allowed. + * The controller has to measure and compensate this skew for every byte-lane. + * By delaying either all DQs signals or DQS signal, a full phase + * shift can be introduced. + * It is assumed that one byte-lane's DQs signals have the same routing delay. + * + * To measure the actual skew, the DRAM is placed in "read leveling" mode. + * In read leveling mode the DRAM-chip outputs an alternating periodic pattern. + * The memory controller iterates over all possible values to do a full phase shift + * and issues read commands. + * With DQS and DQs in phase the data read is expected to alternate on every byte: + * 0xFF 0x00 0xFF ... + * Once the controller has detected this pattern a bit in the result register is + * set for the current phase shift. + */ +int read_training(ramctr_timing * ctrl) +{ + int channel, slotrank, lane; + int err; + + FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS { + int all_high, some_high; + int upperA[NUM_LANES]; + struct timA_minmax mnmx; + + wait_428c(channel); + + /* DRAM command PREA */ + write32(DEFAULT_MCHBAR + 0x4220 + 0x400 * channel, 0x1f002); + write32(DEFAULT_MCHBAR + 0x4230 + 0x400 * channel, + 0xc01 | (ctrl->tRP << 16)); + write32(DEFAULT_MCHBAR + 0x4200 + 0x400 * channel, + (slotrank << 24) | 0x60400); + write32(DEFAULT_MCHBAR + 0x4210 + 0x400 * channel, 0); + write32(DEFAULT_MCHBAR + 0x4284 + 0x400 * channel, 1); + + write32(DEFAULT_MCHBAR + 0x3400, (slotrank << 2) | 0x8001); + + ctrl->timings[channel][slotrank].val_4028 = 4; + ctrl->timings[channel][slotrank].val_4024 = 55; + program_timings(ctrl, channel); + + discover_timA_coarse(ctrl, channel, slotrank, upperA); + + all_high = 1; + some_high = 0; + FOR_ALL_LANES { + if (ctrl->timings[channel][slotrank].lanes[lane]. + timA >= 0x40) + some_high = 1; + else + all_high = 0; + } + + if (all_high) { + ctrl->timings[channel][slotrank].val_4028--; + printram("4028--;\n"); + FOR_ALL_LANES { + ctrl->timings[channel][slotrank].lanes[lane]. + timA -= 0x40; + upperA[lane] -= 0x40; + + } + } else if (some_high) { + ctrl->timings[channel][slotrank].val_4024++; + ctrl->timings[channel][slotrank].val_4028++; + printram("4024++;\n"); + printram("4028++;\n"); + } + + program_timings(ctrl, channel); + + pre_timA_change(ctrl, channel, slotrank, &mnmx); + + err = discover_402x(ctrl, channel, slotrank, upperA); + if (err) + return err; + + post_timA_change(ctrl, channel, slotrank, &mnmx); + pre_timA_change(ctrl, channel, slotrank, &mnmx); + + discover_timA_fine(ctrl, channel, slotrank, upperA); + + post_timA_change(ctrl, channel, slotrank, &mnmx); + pre_timA_change(ctrl, channel, slotrank, &mnmx); + + FOR_ALL_LANES { + ctrl->timings[channel][slotrank].lanes[lane].timA -= mnmx.timA_min_high * 0x40; + } + ctrl->timings[channel][slotrank].val_4028 -= mnmx.timA_min_high; + printram("4028 -= %d;\n", mnmx.timA_min_high); + + post_timA_change(ctrl, channel, slotrank, &mnmx); + + printram("4/8: %d, %d, %x, %x\n", channel, slotrank, + ctrl->timings[channel][slotrank].val_4024, + ctrl->timings[channel][slotrank].val_4028); + + printram("final results:\n"); + FOR_ALL_LANES + printram("Aval: %d, %d, %d: %x\n", channel, slotrank, + lane, + ctrl->timings[channel][slotrank].lanes[lane].timA); + + write32(DEFAULT_MCHBAR + 0x3400, 0); + + toggle_io_reset(); + } + + FOR_ALL_POPULATED_CHANNELS { + program_timings(ctrl, channel); + } + FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS FOR_ALL_LANES { + write32(DEFAULT_MCHBAR + 0x4080 + 0x400 * channel + + 4 * lane, 0); + } + return 0; +} + +static void test_timC(ramctr_timing * ctrl, int channel, int slotrank) +{ + int lane; + + FOR_ALL_LANES { + write32(DEFAULT_MCHBAR + 0x4340 + 0x400 * channel + 4 * lane, 0); + read32(DEFAULT_MCHBAR + 0x4140 + 0x400 * channel + 4 * lane); + } + + wait_428c(channel); + + /* DRAM command ACT */ + write32(DEFAULT_MCHBAR + 0x4220 + 0x400 * channel, 0x1f006); + write32(DEFAULT_MCHBAR + 0x4230 + 0x400 * channel, + (max((ctrl->tFAW >> 2) + 1, ctrl->tRRD) << 10) + | 4 | (ctrl->tRCD << 16)); + + write32(DEFAULT_MCHBAR + 0x4200 + 0x400 * channel, + (slotrank << 24) | (6 << 16)); + + write32(DEFAULT_MCHBAR + 0x4210 + 0x400 * channel, 0x244); + + /* DRAM command NOP */ + write32(DEFAULT_MCHBAR + 0x4224 + 0x400 * channel, 0x1f207); + write32(DEFAULT_MCHBAR + 0x4234 + 0x400 * channel, 0x8041001); + write32(DEFAULT_MCHBAR + 0x4204 + 0x400 * channel, + (slotrank << 24) | 8); + write32(DEFAULT_MCHBAR + 0x4214 + 0x400 * channel, 0x3e0); + + /* DRAM command WR */ + write32(DEFAULT_MCHBAR + 0x4228 + 0x400 * channel, 0x1f201); + write32(DEFAULT_MCHBAR + 0x4238 + 0x400 * channel, 0x80411f4); + write32(DEFAULT_MCHBAR + 0x4208 + 0x400 * channel, (slotrank << 24)); + write32(DEFAULT_MCHBAR + 0x4218 + 0x400 * channel, 0x242); + + /* DRAM command NOP */ + write32(DEFAULT_MCHBAR + 0x422c + 0x400 * channel, 0x1f207); + write32(DEFAULT_MCHBAR + 0x423c + 0x400 * channel, + 0x8000c01 | ((ctrl->CWL + ctrl->tWTR + 5) << 16)); + write32(DEFAULT_MCHBAR + 0x420c + 0x400 * channel, + (slotrank << 24) | 8); + write32(DEFAULT_MCHBAR + 0x421c + 0x400 * channel, 0x3e0); + + write32(DEFAULT_MCHBAR + 0x4284 + 0x400 * channel, 0xc0001); + + wait_428c(channel); + + /* DRAM command PREA */ + write32(DEFAULT_MCHBAR + 0x4220 + 0x400 * channel, 0x1f002); + write32(DEFAULT_MCHBAR + 0x4230 + 0x400 * channel, + 0xc01 | (ctrl->tRP << 16)); + write32(DEFAULT_MCHBAR + 0x4200 + 0x400 * channel, + (slotrank << 24) | 0x60400); + write32(DEFAULT_MCHBAR + 0x4210 + 0x400 * channel, 0x240); + + /* DRAM command ACT */ + write32(DEFAULT_MCHBAR + 0x4224 + 0x400 * channel, 0x1f006); + write32(DEFAULT_MCHBAR + 0x4234 + 0x400 * channel, + (max(ctrl->tRRD, (ctrl->tFAW >> 2) + 1) << 10) + | 8 | (ctrl->CAS << 16)); + + write32(DEFAULT_MCHBAR + 0x4204 + 0x400 * channel, + (slotrank << 24) | 0x60000); + + write32(DEFAULT_MCHBAR + 0x4214 + 0x400 * channel, 0x244); + + /* DRAM command RD */ + write32(DEFAULT_MCHBAR + 0x4228 + 0x400 * channel, 0x1f105); + write32(DEFAULT_MCHBAR + 0x4238 + 0x400 * channel, + 0x40011f4 | (max(ctrl->tRTP, 8) << 16)); + write32(DEFAULT_MCHBAR + 0x4208 + 0x400 * channel, (slotrank << 24)); + write32(DEFAULT_MCHBAR + 0x4218 + 0x400 * channel, 0x242); + + /* DRAM command PREA */ + write32(DEFAULT_MCHBAR + 0x422c + 0x400 * channel, 0x1f002); + write32(DEFAULT_MCHBAR + 0x423c + 0x400 * channel, + 0xc01 | (ctrl->tRP << 16)); + write32(DEFAULT_MCHBAR + 0x420c + 0x400 * channel, + (slotrank << 24) | 0x60400); + write32(DEFAULT_MCHBAR + 0x421c + 0x400 * channel, 0x240); + write32(DEFAULT_MCHBAR + 0x4284 + 0x400 * channel, 0xc0001); + wait_428c(channel); +} + +static int discover_timC(ramctr_timing *ctrl, int channel, int slotrank) +{ + int timC; + int statistics[NUM_LANES][MAX_TIMC + 1]; + int lane; + + wait_428c(channel); + + /* DRAM command PREA */ + write32(DEFAULT_MCHBAR + 0x4220 + 0x400 * channel, 0x1f002); + write32(DEFAULT_MCHBAR + 0x4230 + 0x400 * channel, + 0xc01 | (ctrl->tRP << 16)); + write32(DEFAULT_MCHBAR + 0x4200 + 0x400 * channel, + (slotrank << 24) | 0x60400); + write32(DEFAULT_MCHBAR + 0x4210 + 0x400 * channel, 0x240); + write32(DEFAULT_MCHBAR + 0x4284 + 0x400 * channel, 1); + + for (timC = 0; timC <= MAX_TIMC; timC++) { + FOR_ALL_LANES ctrl->timings[channel][slotrank].lanes[lane]. + timC = timC; + program_timings(ctrl, channel); + + test_timC(ctrl, channel, slotrank); + + FOR_ALL_LANES { + statistics[lane][timC] = + read32(DEFAULT_MCHBAR + 0x4340 + 4 * lane + + 0x400 * channel); + printram("Cstat: %d, %d, %d, %x, %x\n", + channel, slotrank, lane, timC, + statistics[lane][timC]); + } + } + FOR_ALL_LANES { + struct run rn = + get_longest_zero_run(statistics[lane], MAX_TIMC + 1); + ctrl->timings[channel][slotrank].lanes[lane].timC = rn.middle; + if (rn.all) { + printk(BIOS_EMERG, "timC discovery failed: %d, %d, %d\n", + channel, slotrank, lane); + return MAKE_ERR; + } + printram("Cval: %d, %d, %d: %x\n", channel, slotrank, + lane, ctrl->timings[channel][slotrank].lanes[lane].timC); + } + return 0; +} + +static int get_precedening_channels(ramctr_timing * ctrl, int target_channel) +{ + int channel, ret = 0; + FOR_ALL_POPULATED_CHANNELS if (channel < target_channel) + ret++; + return ret; +} + +static void fill_pattern0(ramctr_timing * ctrl, int channel, u32 a, u32 b) +{ + unsigned j; + unsigned channel_offset = + get_precedening_channels(ctrl, channel) * 0x40; + for (j = 0; j < 16; j++) + write32((void *)(0x04000000 + channel_offset + 4 * j), j & 2 ? b : a); + sfence(); +} + +static int num_of_channels(const ramctr_timing * ctrl) +{ + int ret = 0; + int channel; + FOR_ALL_POPULATED_CHANNELS ret++; + return ret; +} + +static void fill_pattern1(ramctr_timing * ctrl, int channel) +{ + unsigned j; + unsigned channel_offset = + get_precedening_channels(ctrl, channel) * 0x40; + unsigned channel_step = 0x40 * num_of_channels(ctrl); + for (j = 0; j < 16; j++) + write32((void *)(0x04000000 + channel_offset + j * 4), 0xffffffff); + for (j = 0; j < 16; j++) + write32((void *)(0x04000000 + channel_offset + channel_step + j * 4), 0); + sfence(); +} + +static void precharge(ramctr_timing * ctrl) +{ + int channel, slotrank, lane; + + FOR_ALL_POPULATED_CHANNELS { + FOR_ALL_POPULATED_RANKS FOR_ALL_LANES { + ctrl->timings[channel][slotrank].lanes[lane].falling = + 16; + ctrl->timings[channel][slotrank].lanes[lane].rising = + 16; + } + + program_timings(ctrl, channel); + + FOR_ALL_POPULATED_RANKS { + wait_428c(channel); + + /* DRAM command MRS + * write MR3 MPR enable + * in this mode only RD and RDA are allowed + * all reads return a predefined pattern */ + write32(DEFAULT_MCHBAR + 0x4220 + 0x400 * channel, + 0x1f000); + write32(DEFAULT_MCHBAR + 0x4230 + 0x400 * channel, + 0xc01 | (ctrl->tMOD << 16)); + write32(DEFAULT_MCHBAR + 0x4200 + 0x400 * channel, + (slotrank << 24) | 0x360004); + write32(DEFAULT_MCHBAR + 0x4210 + 0x400 * channel, 0); + + /* DRAM command RD */ + write32(DEFAULT_MCHBAR + 0x4224 + 0x400 * channel, + 0x1f105); + write32(DEFAULT_MCHBAR + 0x4234 + 0x400 * channel, + 0x4041003); + write32(DEFAULT_MCHBAR + 0x4204 + 0x400 * channel, + (slotrank << 24) | 0); + write32(DEFAULT_MCHBAR + 0x4214 + 0x400 * channel, 0); + + /* DRAM command RD */ + write32(DEFAULT_MCHBAR + 0x4228 + 0x400 * channel, + 0x1f105); + write32(DEFAULT_MCHBAR + 0x4238 + 0x400 * channel, + 0x1001 | ((ctrl->CAS + 8) << 16)); + write32(DEFAULT_MCHBAR + 0x4208 + 0x400 * channel, + (slotrank << 24) | 0x60000); + write32(DEFAULT_MCHBAR + 0x4218 + 0x400 * channel, 0); + + /* DRAM command MRS + * write MR3 MPR disable */ + write32(DEFAULT_MCHBAR + 0x422c + 0x400 * channel, + 0x1f000); + write32(DEFAULT_MCHBAR + 0x423c + 0x400 * channel, + 0xc01 | (ctrl->tMOD << 16)); + write32(DEFAULT_MCHBAR + 0x420c + 0x400 * channel, + (slotrank << 24) | 0x360000); + write32(DEFAULT_MCHBAR + 0x421c + 0x400 * channel, 0); + write32(DEFAULT_MCHBAR + 0x4284 + 0x400 * channel, + 0xc0001); + + wait_428c(channel); + } + + FOR_ALL_POPULATED_RANKS FOR_ALL_LANES { + ctrl->timings[channel][slotrank].lanes[lane].falling = + 48; + ctrl->timings[channel][slotrank].lanes[lane].rising = + 48; + } + + program_timings(ctrl, channel); + + FOR_ALL_POPULATED_RANKS { + wait_428c(channel); + /* DRAM command MRS + * write MR3 MPR enable + * in this mode only RD and RDA are allowed + * all reads return a predefined pattern */ + write32(DEFAULT_MCHBAR + 0x4220 + 0x400 * channel, + 0x1f000); + write32(DEFAULT_MCHBAR + 0x4230 + 0x400 * channel, + 0xc01 | (ctrl->tMOD << 16)); + write32(DEFAULT_MCHBAR + 0x4200 + 0x400 * channel, + (slotrank << 24) | 0x360004); + write32(DEFAULT_MCHBAR + 0x4210 + 0x400 * channel, 0); + + /* DRAM command RD */ + write32(DEFAULT_MCHBAR + 0x4224 + 0x400 * channel, + 0x1f105); + write32(DEFAULT_MCHBAR + 0x4234 + 0x400 * channel, + 0x4041003); + write32(DEFAULT_MCHBAR + 0x4204 + 0x400 * channel, + (slotrank << 24) | 0); + write32(DEFAULT_MCHBAR + 0x4214 + 0x400 * channel, 0); + + /* DRAM command RD */ + write32(DEFAULT_MCHBAR + 0x4228 + 0x400 * channel, + 0x1f105); + write32(DEFAULT_MCHBAR + 0x4238 + 0x400 * channel, + 0x1001 | ((ctrl->CAS + 8) << 16)); + write32(DEFAULT_MCHBAR + 0x4208 + 0x400 * channel, + (slotrank << 24) | 0x60000); + write32(DEFAULT_MCHBAR + 0x4218 + 0x400 * channel, 0); + + /* DRAM command MRS + * write MR3 MPR disable */ + write32(DEFAULT_MCHBAR + 0x422c + 0x400 * channel, + 0x1f000); + write32(DEFAULT_MCHBAR + 0x423c + 0x400 * channel, + 0xc01 | (ctrl->tMOD << 16)); + + write32(DEFAULT_MCHBAR + 0x420c + 0x400 * channel, + (slotrank << 24) | 0x360000); + write32(DEFAULT_MCHBAR + 0x421c + 0x400 * channel, 0); + + write32(DEFAULT_MCHBAR + 0x4284 + 0x400 * channel, + 0xc0001); + wait_428c(channel); + } + } +} + +static void test_timB(ramctr_timing * ctrl, int channel, int slotrank) +{ + /* enable DQs on this slotrank */ + write_mrreg(ctrl, channel, slotrank, 1, + 0x80 | make_mr1(ctrl, slotrank, channel)); + + wait_428c(channel); + /* DRAM command NOP */ + write32(DEFAULT_MCHBAR + 0x4220 + 0x400 * channel, 0x1f207); + write32(DEFAULT_MCHBAR + 0x4230 + 0x400 * channel, + 0x8000c01 | ((ctrl->CWL + ctrl->tWLO) << 16)); + write32(DEFAULT_MCHBAR + 0x4200 + 0x400 * channel, + 8 | (slotrank << 24)); + write32(DEFAULT_MCHBAR + 0x4210 + 0x400 * channel, 0); + + /* DRAM command NOP */ + write32(DEFAULT_MCHBAR + 0x4224 + 0x400 * channel, 0x1f107); + write32(DEFAULT_MCHBAR + 0x4234 + 0x400 * channel, + 0x4000c01 | ((ctrl->CAS + 38) << 16)); + write32(DEFAULT_MCHBAR + 0x4204 + 0x400 * channel, + (slotrank << 24) | 4); + write32(DEFAULT_MCHBAR + 0x4214 + 0x400 * channel, 0); + + write32(DEFAULT_MCHBAR + 0x400 * channel + 0x4284, 0x40001); + wait_428c(channel); + + /* disable DQs on this slotrank */ + write_mrreg(ctrl, channel, slotrank, 1, + 0x1080 | make_mr1(ctrl, slotrank, channel)); +} + +static int discover_timB(ramctr_timing *ctrl, int channel, int slotrank) +{ + int timB; + int statistics[NUM_LANES][128]; + int lane; + + write32(DEFAULT_MCHBAR + 0x3400, 0x108052 | (slotrank << 2)); + + for (timB = 0; timB < 128; timB++) { + FOR_ALL_LANES { + ctrl->timings[channel][slotrank].lanes[lane].timB = timB; + } + program_timings(ctrl, channel); + + test_timB(ctrl, channel, slotrank); + + FOR_ALL_LANES { + statistics[lane][timB] = + !((read32 + (DEFAULT_MCHBAR + lane_registers[lane] + + channel * 0x100 + 4 + ((timB / 32) & 1) * 4) + >> (timB % 32)) & 1); + printram("Bstat: %d, %d, %d: %x, %x\n", + channel, slotrank, lane, timB, + statistics[lane][timB]); + } + } + FOR_ALL_LANES { + struct run rn = get_longest_zero_run(statistics[lane], 128); + /* timC is a direct function of timB's 6 LSBs. + * Some tests increments the value of timB by a small value, + * which might cause the 6bit value to overflow, if it's close + * to 0x3F. Increment the value by a small offset if it's likely + * to overflow, to make sure it won't overflow while running + * tests and bricks the system due to a non matching timC. + * + * TODO: find out why some tests (edge write discovery) + * increment timB. */ + if ((rn.start & 0x3F) == 0x3E) + rn.start += 2; + else if ((rn.start & 0x3F) == 0x3F) + rn.start += 1; + ctrl->timings[channel][slotrank].lanes[lane].timB = rn.start; + if (rn.all) { + printk(BIOS_EMERG, "timB discovery failed: %d, %d, %d\n", + channel, slotrank, lane); + return MAKE_ERR; + } + printram("Bval: %d, %d, %d: %x\n", channel, slotrank, + lane, ctrl->timings[channel][slotrank].lanes[lane].timB); + } + return 0; +} + +static int get_timB_high_adjust(u64 val) +{ + int i; + + /* good */ + if (val == 0xffffffffffffffffLL) + return 0; + + if (val >= 0xf000000000000000LL) { + /* needs negative adjustment */ + for (i = 0; i < 8; i++) + if (val << (8 * (7 - i) + 4)) + return -i; + } else { + /* needs positive adjustment */ + for (i = 0; i < 8; i++) + if (val >> (8 * (7 - i) + 4)) + return i; + } + return 8; +} + +static void adjust_high_timB(ramctr_timing * ctrl) +{ + int channel, slotrank, lane, old; + write32(DEFAULT_MCHBAR + 0x3400, 0x200); + FOR_ALL_POPULATED_CHANNELS { + fill_pattern1(ctrl, channel); + write32(DEFAULT_MCHBAR + 0x4288 + (channel << 10), 1); + } + FOR_ALL_POPULATED_CHANNELS FOR_ALL_POPULATED_RANKS { + + write32(DEFAULT_MCHBAR + 0x4288 + 0x400 * channel, 0x10001); + + wait_428c(channel); + + /* DRAM command ACT */ + write32(DEFAULT_MCHBAR + 0x4220 + 0x400 * channel, 0x1f006); + write32(DEFAULT_MCHBAR + 0x4230 + 0x400 * channel, + 0xc01 | (ctrl->tRCD << 16)); + write32(DEFAULT_MCHBAR + 0x4200 + 0x400 * channel, + (slotrank << 24) | 0x60000); + write32(DEFAULT_MCHBAR + 0x4210 + 0x400 * channel, 0); + + /* DRAM command NOP */ + write32(DEFAULT_MCHBAR + 0x4224 + 0x400 * channel, 0x1f207); + write32(DEFAULT_MCHBAR + 0x4234 + 0x400 * channel, 0x8040c01); + write32(DEFAULT_MCHBAR + 0x4204 + 0x400 * channel, + (slotrank << 24) | 0x8); + write32(DEFAULT_MCHBAR + 0x4214 + 0x400 * channel, 0x3e0); + + /* DRAM command WR */ + write32(DEFAULT_MCHBAR + 0x4228 + 0x400 * channel, 0x1f201); + write32(DEFAULT_MCHBAR + 0x4238 + 0x400 * channel, 0x8041003); + write32(DEFAULT_MCHBAR + 0x4208 + 0x400 * channel, + (slotrank << 24)); + write32(DEFAULT_MCHBAR + 0x4218 + 0x400 * channel, 0x3e2); + + /* DRAM command NOP */ + write32(DEFAULT_MCHBAR + 0x422c + 0x400 * channel, 0x1f207); + write32(DEFAULT_MCHBAR + 0x423c + 0x400 * channel, + 0x8000c01 | ((ctrl->CWL + ctrl->tWTR + 5) << 16)); + write32(DEFAULT_MCHBAR + 0x420c + 0x400 * channel, + (slotrank << 24) | 0x8); + write32(DEFAULT_MCHBAR + 0x421c + 0x400 * channel, 0x3e0); + + write32(DEFAULT_MCHBAR + 0x4284 + 0x400 * channel, 0xc0001); + + wait_428c(channel); + + /* DRAM command PREA */ + write32(DEFAULT_MCHBAR + 0x4220 + 0x400 * channel, 0x1f002); + write32(DEFAULT_MCHBAR + 0x4230 + 0x400 * channel, + 0xc01 | ((ctrl->tRP) << 16)); + write32(DEFAULT_MCHBAR + 0x4200 + 0x400 * channel, + (slotrank << 24) | 0x60400); + write32(DEFAULT_MCHBAR + 0x4210 + 0x400 * channel, 0x240); + + /* DRAM command ACT */ + write32(DEFAULT_MCHBAR + 0x4224 + 0x400 * channel, 0x1f006); + write32(DEFAULT_MCHBAR + 0x4234 + 0x400 * channel, + 0xc01 | ((ctrl->tRCD) << 16)); + write32(DEFAULT_MCHBAR + 0x4204 + 0x400 * channel, + (slotrank << 24) | 0x60000); + write32(DEFAULT_MCHBAR + 0x4214 + 0x400 * channel, 0); + + /* DRAM command RD */ + write32(DEFAULT_MCHBAR + 0x4228 + 0x400 * channel, 0x3f105); + write32(DEFAULT_MCHBAR + 0x4238 + 0x400 * channel, + 0x4000c01 | + ((ctrl->tRP + + ctrl->timings[channel][slotrank].val_4024 + + ctrl->timings[channel][slotrank].val_4028) << 16)); + write32(DEFAULT_MCHBAR + 0x4208 + 0x400 * channel, + (slotrank << 24) | 0x60008); + write32(DEFAULT_MCHBAR + 0x4218 + 0x400 * channel, 0); + + write32(DEFAULT_MCHBAR + 0x4284 + 0x400 * channel, 0x80001); + wait_428c(channel); + FOR_ALL_LANES { + u64 res = + read32(DEFAULT_MCHBAR + lane_registers[lane] + + 0x100 * channel + 4); + res |= + ((u64) read32(DEFAULT_MCHBAR + lane_registers[lane] + + 0x100 * channel + 8)) << 32; + old = ctrl->timings[channel][slotrank].lanes[lane].timB; + ctrl->timings[channel][slotrank].lanes[lane].timB += + get_timB_high_adjust(res) * 64; + + printram("High adjust %d:%016llx\n", lane, res); + printram("Bval+: %d, %d, %d, %x -> %x\n", channel, + slotrank, lane, old, + ctrl->timings[channel][slotrank].lanes[lane]. + timB); + } + } + write32(DEFAULT_MCHBAR + 0x3400, 0); +} + +static void write_op(ramctr_timing * ctrl, int channel) +{ + int slotrank; + + wait_428c(channel); + + /* choose an existing rank. */ + slotrank = !(ctrl->rankmap[channel] & 1) ? 2 : 0; + + /* DRAM command ACT */ + write32(DEFAULT_MCHBAR + 0x4220 + 0x400 * channel, 0x0f003); + write32(DEFAULT_MCHBAR + 0x4230 + 0x400 * channel, 0x41001); + + write32(DEFAULT_MCHBAR + 0x4200 + 0x400 * channel, + (slotrank << 24) | 0x60000); + + write32(DEFAULT_MCHBAR + 0x4210 + 0x400 * channel, 0x3e0); + + write32(DEFAULT_MCHBAR + 0x4284 + 0x400 * channel, 1); + wait_428c(channel); +} + +/* Compensate the skew between CMD/ADDR/CLK and DQ/DQS lanes. + * DDR3 adopted the fly-by topology. The data and strobes signals reach + * the chips at different times with respect to command, address and + * clock signals. + * By delaying either all DQ/DQs or all CMD/ADDR/CLK signals, a full phase + * shift can be introduced. + * It is assumed that the CLK/ADDR/CMD signals have the same routing delay. + * + * To find the required phase shift the DRAM is placed in "write leveling" mode. + * In this mode the DRAM-chip samples the CLK on every DQS edge and feeds back the + * sampled value on the data lanes (DQs). + */ +int write_training(ramctr_timing * ctrl) +{ + int channel, slotrank, lane; + int err; + + FOR_ALL_POPULATED_CHANNELS + write32(DEFAULT_MCHBAR + 0x4008 + 0x400 * channel, + read32(DEFAULT_MCHBAR + 0x4008 + + 0x400 * channel) | 0x8000000); + + FOR_ALL_POPULATED_CHANNELS { + write_op(ctrl, channel); + write32(DEFAULT_MCHBAR + 0x4020 + 0x400 * channel, + read32(DEFAULT_MCHBAR + 0x4020 + + 0x400 * channel) | 0x200000); + } + + /* refresh disable */ + write32(DEFAULT_MCHBAR + 0x5030, read32(DEFAULT_MCHBAR + 0x5030) & ~8); + FOR_ALL_POPULATED_CHANNELS { + write_op(ctrl, channel); + } + + /* enable write leveling on all ranks + * disable all DQ outputs + * only NOP is allowed in this mode */ + FOR_ALL_CHANNELS + FOR_ALL_POPULATED_RANKS + write_mrreg(ctrl, channel, slotrank, 1, + make_mr1(ctrl, slotrank, channel) | 0x1080); + + write32(DEFAULT_MCHBAR + 0x3400, 0x108052); + + toggle_io_reset(); + + /* set any valid value for timB, it gets corrected later */ + FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS { + err = discover_timB(ctrl, channel, slotrank); + if (err) + return err; + } + + /* disable write leveling on all ranks */ + FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS + write_mrreg(ctrl, channel, + slotrank, 1, make_mr1(ctrl, slotrank, channel)); + + write32(DEFAULT_MCHBAR + 0x3400, 0); + + FOR_ALL_POPULATED_CHANNELS + wait_428c(channel); + + /* refresh enable */ + write32(DEFAULT_MCHBAR + 0x5030, read32(DEFAULT_MCHBAR + 0x5030) | 8); + + FOR_ALL_POPULATED_CHANNELS { + write32(DEFAULT_MCHBAR + 0x4020 + 0x400 * channel, + ~0x00200000 & read32(DEFAULT_MCHBAR + 0x4020 + + 0x400 * channel)); + read32(DEFAULT_MCHBAR + 0x428c + 0x400 * channel); + wait_428c(channel); + + /* DRAM command ZQCS */ + write32(DEFAULT_MCHBAR + 0x4220 + 0x400 * channel, 0x0f003); + write32(DEFAULT_MCHBAR + 0x4230 + 0x400 * channel, 0x659001); + write32(DEFAULT_MCHBAR + 0x4200 + 0x400 * channel, 0x60000); + write32(DEFAULT_MCHBAR + 0x4210 + 0x400 * channel, 0x3e0); + + write32(DEFAULT_MCHBAR + 0x4284 + 0x400 * channel, 1); + wait_428c(channel); + } + + toggle_io_reset(); + + printram("CPE\n"); + precharge(ctrl); + printram("CPF\n"); + + FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS FOR_ALL_LANES { + read32(DEFAULT_MCHBAR + 0x4080 + 0x400 * channel + 4 * lane); + write32(DEFAULT_MCHBAR + 0x4080 + 0x400 * channel + 4 * lane, + 0); + } + + FOR_ALL_POPULATED_CHANNELS { + fill_pattern0(ctrl, channel, 0xaaaaaaaa, 0x55555555); + write32(DEFAULT_MCHBAR + 0x4288 + (channel << 10), 0); + } + + FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS { + err = discover_timC(ctrl, channel, slotrank); + if (err) + return err; + } + + FOR_ALL_POPULATED_CHANNELS + program_timings(ctrl, channel); + + /* measure and adjust timB timings */ + adjust_high_timB(ctrl); + + FOR_ALL_POPULATED_CHANNELS + program_timings(ctrl, channel); + + FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS FOR_ALL_LANES { + read32(DEFAULT_MCHBAR + 0x4080 + 0x400 * channel + 4 * lane); + write32(DEFAULT_MCHBAR + 0x4080 + 0x400 * channel + 4 * lane, + 0); + } + return 0; +} + +static int test_320c(ramctr_timing * ctrl, int channel, int slotrank) +{ + struct ram_rank_timings saved_rt = ctrl->timings[channel][slotrank]; + int timC_delta; + int lanes_ok = 0; + int ctr = 0; + int lane; + + for (timC_delta = -5; timC_delta <= 5; timC_delta++) { + FOR_ALL_LANES { + ctrl->timings[channel][slotrank].lanes[lane].timC = + saved_rt.lanes[lane].timC + timC_delta; + } + program_timings(ctrl, channel); + FOR_ALL_LANES { + write32(DEFAULT_MCHBAR + 4 * lane + 0x4f40, 0); + } + + write32(DEFAULT_MCHBAR + 0x4288 + 0x400 * channel, 0x1f); + + wait_428c(channel); + /* DRAM command ACT */ + write32(DEFAULT_MCHBAR + 0x4220 + 0x400 * channel, 0x1f006); + write32(DEFAULT_MCHBAR + 0x4230 + 0x400 * channel, + ((max(ctrl->tRRD, (ctrl->tFAW >> 2) + 1)) << 10) + | 8 | (ctrl->tRCD << 16)); + + write32(DEFAULT_MCHBAR + 0x4200 + 0x400 * channel, + (slotrank << 24) | ctr | 0x60000); + + write32(DEFAULT_MCHBAR + 0x4210 + 0x400 * channel, 0x244); + /* DRAM command WR */ + write32(DEFAULT_MCHBAR + 0x4224 + 0x400 * channel, 0x1f201); + write32(DEFAULT_MCHBAR + 0x4234 + 0x400 * channel, + 0x8001020 | ((ctrl->CWL + ctrl->tWTR + 8) << 16)); + write32(DEFAULT_MCHBAR + 0x4204 + 0x400 * channel, + (slotrank << 24)); + write32(DEFAULT_MCHBAR + 0x4244 + 0x400 * channel, 0x389abcd); + write32(DEFAULT_MCHBAR + 0x4214 + 0x400 * channel, 0x20e42); + + /* DRAM command RD */ + write32(DEFAULT_MCHBAR + 0x4228 + 0x400 * channel, 0x1f105); + write32(DEFAULT_MCHBAR + 0x4238 + 0x400 * channel, + 0x4001020 | (max(ctrl->tRTP, 8) << 16)); + write32(DEFAULT_MCHBAR + 0x4208 + 0x400 * channel, + (slotrank << 24)); + write32(DEFAULT_MCHBAR + 0x4248 + 0x400 * channel, 0x389abcd); + write32(DEFAULT_MCHBAR + 0x4218 + 0x400 * channel, 0x20e42); + + /* DRAM command PRE */ + write32(DEFAULT_MCHBAR + 0x422c + 0x400 * channel, 0x1f002); + write32(DEFAULT_MCHBAR + 0x423c + 0x400 * channel, 0xf1001); + write32(DEFAULT_MCHBAR + 0x420c + 0x400 * channel, + (slotrank << 24) | 0x60400); + write32(DEFAULT_MCHBAR + 0x421c + 0x400 * channel, 0x240); + + write32(DEFAULT_MCHBAR + 0x4284 + 0x400 * channel, 0xc0001); + wait_428c(channel); + FOR_ALL_LANES { + u32 r32 = + read32(DEFAULT_MCHBAR + 0x4340 + 4 * lane + + 0x400 * channel); + + if (r32 == 0) + lanes_ok |= 1 << lane; + } + ctr++; + if (lanes_ok == ((1 << NUM_LANES) - 1)) + break; + } + + ctrl->timings[channel][slotrank] = saved_rt; + + printram("3lanes: %x\n", lanes_ok); + return lanes_ok != ((1 << NUM_LANES) - 1); +} + +#include "raminit_patterns.h" + +static void fill_pattern5(ramctr_timing * ctrl, int channel, int patno) +{ + unsigned i, j; + unsigned channel_offset = + get_precedening_channels(ctrl, channel) * 0x40; + unsigned channel_step = 0x40 * num_of_channels(ctrl); + + if (patno) { + u8 base8 = 0x80 >> ((patno - 1) % 8); + u32 base = base8 | (base8 << 8) | (base8 << 16) | (base8 << 24); + for (i = 0; i < 32; i++) { + for (j = 0; j < 16; j++) { + u32 val = use_base[patno - 1][i] & (1 << (j / 2)) ? base : 0; + if (invert[patno - 1][i] & (1 << (j / 2))) + val = ~val; + write32((void *)(0x04000000 + channel_offset + i * channel_step + + j * 4), val); + } + } + + } else { + for (i = 0; i < sizeof(pattern) / sizeof(pattern[0]); i++) { + for (j = 0; j < 16; j++) + write32((void *)(0x04000000 + channel_offset + i * channel_step + + j * 4), pattern[i][j]); + } + sfence(); + } +} + +static void reprogram_320c(ramctr_timing * ctrl) +{ + int channel, slotrank; + + FOR_ALL_POPULATED_CHANNELS { + wait_428c(channel); + + /* choose an existing rank. */ + slotrank = !(ctrl->rankmap[channel] & 1) ? 2 : 0; + + /* DRAM command ZQCS */ + write32(DEFAULT_MCHBAR + 0x4220 + 0x400 * channel, 0x0f003); + write32(DEFAULT_MCHBAR + 0x4230 + 0x400 * channel, 0x41001); + + write32(DEFAULT_MCHBAR + 0x4200 + 0x400 * channel, + (slotrank << 24) | 0x60000); + + write32(DEFAULT_MCHBAR + 0x4210 + 0x400 * channel, 0x3e0); + + write32(DEFAULT_MCHBAR + 0x4284 + 0x400 * channel, 1); + wait_428c(channel); + write32(DEFAULT_MCHBAR + 0x4020 + 0x400 * channel, + read32(DEFAULT_MCHBAR + 0x4020 + + 0x400 * channel) | 0x200000); + } + + /* refresh disable */ + write32(DEFAULT_MCHBAR + 0x5030, read32(DEFAULT_MCHBAR + 0x5030) & ~8); + FOR_ALL_POPULATED_CHANNELS { + wait_428c(channel); + + /* choose an existing rank. */ + slotrank = !(ctrl->rankmap[channel] & 1) ? 2 : 0; + + /* DRAM command ZQCS */ + write32(DEFAULT_MCHBAR + 0x4220 + 0x400 * channel, 0x0f003); + write32(DEFAULT_MCHBAR + 0x4230 + 0x400 * channel, 0x41001); + + write32(DEFAULT_MCHBAR + 0x4200 + 0x400 * channel, + (slotrank << 24) | 0x60000); + + write32(DEFAULT_MCHBAR + 0x4210 + 0x400 * channel, 0x3e0); + + write32(DEFAULT_MCHBAR + 0x4284 + 0x400 * channel, 1); + wait_428c(channel); + } + + /* jedec reset */ + dram_jedecreset(ctrl); + /* mrs commands. */ + dram_mrscommands(ctrl); + + toggle_io_reset(); +} + +#define MIN_C320C_LEN 13 + +static int try_cmd_stretch(ramctr_timing *ctrl, int channel, int cmd_stretch) +{ + struct ram_rank_timings saved_timings[NUM_CHANNELS][NUM_SLOTRANKS]; + int slotrank; + int c320c; + int stat[NUM_SLOTRANKS][256]; + int delta = 0; + + printram("Trying cmd_stretch %d on channel %d\n", cmd_stretch, channel); + + FOR_ALL_POPULATED_RANKS { + saved_timings[channel][slotrank] = + ctrl->timings[channel][slotrank]; + } + + ctrl->cmd_stretch[channel] = cmd_stretch; + + MCHBAR32(0x4004 + 0x400 * channel) = + ctrl->tRRD + | (ctrl->tRTP << 4) + | (ctrl->tCKE << 8) + | (ctrl->tWTR << 12) + | (ctrl->tFAW << 16) + | (ctrl->tWR << 24) + | (ctrl->cmd_stretch[channel] << 30); + + if (ctrl->cmd_stretch[channel] == 2) + delta = 2; + else if (ctrl->cmd_stretch[channel] == 0) + delta = 4; + + FOR_ALL_POPULATED_RANKS { + ctrl->timings[channel][slotrank].val_4024 -= delta; + } + + for (c320c = -127; c320c <= 127; c320c++) { + FOR_ALL_POPULATED_RANKS { + ctrl->timings[channel][slotrank].val_320c = c320c; + } + program_timings(ctrl, channel); + reprogram_320c(ctrl); + FOR_ALL_POPULATED_RANKS { + stat[slotrank][c320c + 127] = + test_320c(ctrl, channel, slotrank); + printram("3stat: %d, %d, %d: %x\n", + channel, slotrank, c320c, + stat[slotrank][c320c + 127]); + } + } + FOR_ALL_POPULATED_RANKS { + struct run rn = + get_longest_zero_run(stat[slotrank], 255); + ctrl->timings[channel][slotrank].val_320c = + rn.middle - 127; + printram("3val %d, %d: %d\n", channel, + slotrank, + ctrl->timings[channel][slotrank].val_320c); + if (rn.all || rn.length < MIN_C320C_LEN) { + FOR_ALL_POPULATED_RANKS { + ctrl->timings[channel][slotrank] = + saved_timings[channel][slotrank]; + } + return MAKE_ERR; + } + } + + return 0; +} + +/* Adjust CMD phase shift and try multiple command rates. + * A command rate of 2T doubles the time needed for address and + * command decode. */ +int command_training(ramctr_timing *ctrl) +{ + int channel; + int err; + + FOR_ALL_POPULATED_CHANNELS { + fill_pattern5(ctrl, channel, 0); + write32(DEFAULT_MCHBAR + 0x4288 + 0x400 * channel, 0x1f); + } + + FOR_ALL_POPULATED_CHANNELS { + /* try command rate 1T and 2T */ + err = try_cmd_stretch(ctrl, channel, 0); + if (err) { + err = try_cmd_stretch(ctrl, channel, 2); + if (err) { + printk(BIOS_EMERG, "c320c discovery failed\n"); + return err; + } + printram("Using CMD rate 2T on channel %u\n", channel); + } else + printram("Using CMD rate 1T on channel %u\n", channel); + } + + FOR_ALL_POPULATED_CHANNELS + program_timings(ctrl, channel); + + reprogram_320c(ctrl); + return 0; +} + + +static int discover_edges_real(ramctr_timing *ctrl, int channel, int slotrank, + int *edges) +{ + int edge; + int statistics[NUM_LANES][MAX_EDGE_TIMING + 1]; + int lane; + + for (edge = 0; edge <= MAX_EDGE_TIMING; edge++) { + FOR_ALL_LANES { + ctrl->timings[channel][slotrank].lanes[lane].rising = + edge; + ctrl->timings[channel][slotrank].lanes[lane].falling = + edge; + } + program_timings(ctrl, channel); + + FOR_ALL_LANES { + write32(DEFAULT_MCHBAR + 0x4340 + 0x400 * channel + + 4 * lane, 0); + read32(DEFAULT_MCHBAR + 0x400 * channel + 4 * lane + + 0x4140); + } + + wait_428c(channel); + /* DRAM command MRS + * write MR3 MPR enable + * in this mode only RD and RDA are allowed + * all reads return a predefined pattern */ + write32(DEFAULT_MCHBAR + 0x4220 + 0x400 * channel, 0x1f000); + write32(DEFAULT_MCHBAR + 0x4230 + 0x400 * channel, + (0xc01 | (ctrl->tMOD << 16))); + write32(DEFAULT_MCHBAR + 0x4200 + 0x400 * channel, + (slotrank << 24) | 0x360004); + write32(DEFAULT_MCHBAR + 0x4210 + 0x400 * channel, 0); + + /* DRAM command RD */ + write32(DEFAULT_MCHBAR + 0x4224 + 0x400 * channel, 0x1f105); + write32(DEFAULT_MCHBAR + 0x4234 + 0x400 * channel, 0x40411f4); + write32(DEFAULT_MCHBAR + 0x4204 + 0x400 * channel, + (slotrank << 24)); + write32(DEFAULT_MCHBAR + 0x4214 + 0x400 * channel, 0); + + /* DRAM command RD */ + write32(DEFAULT_MCHBAR + 0x4228 + 0x400 * channel, 0x1f105); + write32(DEFAULT_MCHBAR + 0x4238 + 0x400 * channel, + 0x1001 | ((ctrl->CAS + 8) << 16)); + write32(DEFAULT_MCHBAR + 0x4208 + 0x400 * channel, + (slotrank << 24) | 0x60000); + write32(DEFAULT_MCHBAR + 0x4218 + 0x400 * channel, 0); + + /* DRAM command MRS + * MR3 disable MPR */ + write32(DEFAULT_MCHBAR + 0x422c + 0x400 * channel, 0x1f000); + write32(DEFAULT_MCHBAR + 0x423c + 0x400 * channel, + (0xc01 | (ctrl->tMOD << 16))); + write32(DEFAULT_MCHBAR + 0x420c + 0x400 * channel, + (slotrank << 24) | 0x360000); + write32(DEFAULT_MCHBAR + 0x421c + 0x400 * channel, 0); + + write32(DEFAULT_MCHBAR + 0x4284 + 0x400 * channel, 0xc0001); + + wait_428c(channel); + + FOR_ALL_LANES { + statistics[lane][edge] = + read32(DEFAULT_MCHBAR + 0x4340 + 0x400 * channel + + lane * 4); + } + } + FOR_ALL_LANES { + struct run rn = + get_longest_zero_run(statistics[lane], MAX_EDGE_TIMING + 1); + edges[lane] = rn.middle; + if (rn.all) { + printk(BIOS_EMERG, "edge discovery failed: %d, %d, %d\n", + channel, slotrank, lane); + return MAKE_ERR; + } + printram("eval %d, %d, %d: %02x\n", channel, slotrank, + lane, edges[lane]); + } + return 0; +} + +int discover_edges(ramctr_timing *ctrl) +{ + int falling_edges[NUM_CHANNELS][NUM_SLOTRANKS][NUM_LANES]; + int rising_edges[NUM_CHANNELS][NUM_SLOTRANKS][NUM_LANES]; + int channel, slotrank, lane; + int err; + + write32(DEFAULT_MCHBAR + 0x3400, 0); + + toggle_io_reset(); + + FOR_ALL_POPULATED_CHANNELS FOR_ALL_LANES { + write32(DEFAULT_MCHBAR + 4 * lane + + 0x400 * channel + 0x4080, 0); + } + + FOR_ALL_POPULATED_CHANNELS { + fill_pattern0(ctrl, channel, 0, 0); + write32(DEFAULT_MCHBAR + 0x4288 + (channel << 10), 0); + FOR_ALL_LANES { + read32(DEFAULT_MCHBAR + 0x400 * channel + + lane * 4 + 0x4140); + } + + FOR_ALL_POPULATED_RANKS FOR_ALL_LANES { + ctrl->timings[channel][slotrank].lanes[lane].falling = + 16; + ctrl->timings[channel][slotrank].lanes[lane].rising = + 16; + } + + program_timings(ctrl, channel); + + FOR_ALL_POPULATED_RANKS { + wait_428c(channel); + + /* DRAM command MRS + * MR3 enable MPR + * write MR3 MPR enable + * in this mode only RD and RDA are allowed + * all reads return a predefined pattern */ + write32(DEFAULT_MCHBAR + 0x4220 + 0x400 * channel, + 0x1f000); + write32(DEFAULT_MCHBAR + 0x4230 + 0x400 * channel, + 0xc01 | (ctrl->tMOD << 16)); + write32(DEFAULT_MCHBAR + 0x4200 + 0x400 * channel, + (slotrank << 24) | 0x360004); + write32(DEFAULT_MCHBAR + 0x4210 + 0x400 * channel, 0); + + /* DRAM command RD */ + write32(DEFAULT_MCHBAR + 0x4224 + 0x400 * channel, + 0x1f105); + write32(DEFAULT_MCHBAR + 0x4234 + 0x400 * channel, + 0x4041003); + write32(DEFAULT_MCHBAR + 0x4204 + 0x400 * channel, + (slotrank << 24) | 0); + write32(DEFAULT_MCHBAR + 0x4214 + 0x400 * channel, 0); + + /* DRAM command RD */ + write32(DEFAULT_MCHBAR + 0x4228 + 0x400 * channel, + 0x1f105); + write32(DEFAULT_MCHBAR + 0x4238 + 0x400 * channel, + 0x1001 | ((ctrl->CAS + 8) << 16)); + write32(DEFAULT_MCHBAR + 0x4208 + 0x400 * channel, + (slotrank << 24) | 0x60000); + write32(DEFAULT_MCHBAR + 0x4218 + 0x400 * channel, 0); + + /* DRAM command MRS + * MR3 disable MPR */ + write32(DEFAULT_MCHBAR + 0x422c + 0x400 * channel, + 0x1f000); + write32(DEFAULT_MCHBAR + 0x423c + 0x400 * channel, + 0xc01 | (ctrl->tMOD << 16)); + write32(DEFAULT_MCHBAR + 0x420c + 0x400 * channel, + (slotrank << 24) | 0x360000); + write32(DEFAULT_MCHBAR + 0x421c + 0x400 * channel, 0); + write32(DEFAULT_MCHBAR + 0x4284 + 0x400 * channel, + 0xc0001); + + wait_428c(channel); + } + + /* XXX: check any measured value ? */ + + FOR_ALL_POPULATED_RANKS FOR_ALL_LANES { + ctrl->timings[channel][slotrank].lanes[lane].falling = + 48; + ctrl->timings[channel][slotrank].lanes[lane].rising = + 48; + } + + program_timings(ctrl, channel); + + FOR_ALL_POPULATED_RANKS { + wait_428c(channel); + + /* DRAM command MRS + * MR3 enable MPR + * write MR3 MPR enable + * in this mode only RD and RDA are allowed + * all reads return a predefined pattern */ + write32(DEFAULT_MCHBAR + 0x4220 + 0x400 * channel, + 0x1f000); + write32(DEFAULT_MCHBAR + 0x4230 + 0x400 * channel, + 0xc01 | (ctrl->tMOD << 16)); + write32(DEFAULT_MCHBAR + 0x4200 + 0x400 * channel, + (slotrank << 24) | 0x360004); + write32(DEFAULT_MCHBAR + 0x4210 + 0x400 * channel, 0); + + /* DRAM command RD */ + write32(DEFAULT_MCHBAR + 0x4224 + 0x400 * channel, + 0x1f105); + write32(DEFAULT_MCHBAR + 0x4234 + 0x400 * channel, + 0x4041003); + write32(DEFAULT_MCHBAR + 0x4204 + 0x400 * channel, + (slotrank << 24) | 0); + write32(DEFAULT_MCHBAR + 0x4214 + 0x400 * channel, 0); + + /* DRAM command RD */ + write32(DEFAULT_MCHBAR + 0x4228 + 0x400 * channel, + 0x1f105); + write32(DEFAULT_MCHBAR + 0x4238 + 0x400 * channel, + 0x1001 | ((ctrl->CAS + 8) << 16)); + write32(DEFAULT_MCHBAR + 0x4208 + 0x400 * channel, + (slotrank << 24) | 0x60000); + write32(DEFAULT_MCHBAR + 0x4218 + 0x400 * channel, 0); + + /* DRAM command MRS + * MR3 disable MPR */ + write32(DEFAULT_MCHBAR + 0x422c + 0x400 * channel, + 0x1f000); + write32(DEFAULT_MCHBAR + 0x423c + 0x400 * channel, + 0xc01 | (ctrl->tMOD << 16)); + write32(DEFAULT_MCHBAR + 0x420c + 0x400 * channel, + (slotrank << 24) | 0x360000); + write32(DEFAULT_MCHBAR + 0x421c + 0x400 * channel, 0); + + write32(DEFAULT_MCHBAR + 0x4284 + 0x400 * channel, + 0xc0001); + wait_428c(channel); + } + + /* XXX: check any measured value ? */ + + FOR_ALL_LANES { + write32(DEFAULT_MCHBAR + 0x4080 + 0x400 * channel + + lane * 4, + ~read32(DEFAULT_MCHBAR + 0x4040 + + 0x400 * channel + lane * 4) & 0xff); + } + + fill_pattern0(ctrl, channel, 0, 0xffffffff); + write32(DEFAULT_MCHBAR + 0x4288 + (channel << 10), 0); + } + + /* FIXME: under some conditions (older chipsets?) vendor BIOS sets both edges to the same value. */ + write32(DEFAULT_MCHBAR + 0x4eb0, 0x300); + printram("discover falling edges:\n[%x] = %x\n", 0x4eb0, 0x300); + + FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS { + err = discover_edges_real(ctrl, channel, slotrank, + falling_edges[channel][slotrank]); + if (err) + return err; + } + + write32(DEFAULT_MCHBAR + 0x4eb0, 0x200); + printram("discover rising edges:\n[%x] = %x\n", 0x4eb0, 0x200); + + FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS { + err = discover_edges_real(ctrl, channel, slotrank, + rising_edges[channel][slotrank]); + if (err) + return err; + } + + write32(DEFAULT_MCHBAR + 0x4eb0, 0); + + FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS FOR_ALL_LANES { + ctrl->timings[channel][slotrank].lanes[lane].falling = + falling_edges[channel][slotrank][lane]; + ctrl->timings[channel][slotrank].lanes[lane].rising = + rising_edges[channel][slotrank][lane]; + } + + FOR_ALL_POPULATED_CHANNELS { + program_timings(ctrl, channel); + } + + FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS FOR_ALL_LANES { + write32(DEFAULT_MCHBAR + 0x4080 + 0x400 * channel + 4 * lane, + 0); + } + return 0; +} + +static int discover_edges_write_real(ramctr_timing *ctrl, int channel, + int slotrank, int *edges) +{ + int edge; + u32 raw_statistics[MAX_EDGE_TIMING + 1]; + int statistics[MAX_EDGE_TIMING + 1]; + const int reg3000b24[] = { 0, 0xc, 0x2c }; + int lane, i; + int lower[NUM_LANES]; + int upper[NUM_LANES]; + int pat; + + FOR_ALL_LANES { + lower[lane] = 0; + upper[lane] = MAX_EDGE_TIMING; + } + + for (i = 0; i < 3; i++) { + write32(DEFAULT_MCHBAR + 0x3000 + 0x100 * channel, + reg3000b24[i] << 24); + printram("[%x] = 0x%08x\n", + 0x3000 + 0x100 * channel, reg3000b24[i] << 24); + for (pat = 0; pat < NUM_PATTERNS; pat++) { + fill_pattern5(ctrl, channel, pat); + write32(DEFAULT_MCHBAR + 0x4288 + 0x400 * channel, 0x1f); + printram("using pattern %d\n", pat); + for (edge = 0; edge <= MAX_EDGE_TIMING; edge++) { + FOR_ALL_LANES { + ctrl->timings[channel][slotrank].lanes[lane]. + rising = edge; + ctrl->timings[channel][slotrank].lanes[lane]. + falling = edge; + } + program_timings(ctrl, channel); + + FOR_ALL_LANES { + write32(DEFAULT_MCHBAR + 0x4340 + + 0x400 * channel + 4 * lane, 0); + read32(DEFAULT_MCHBAR + 0x400 * channel + + 4 * lane + 0x4140); + } + wait_428c(channel); + + /* DRAM command ACT */ + write32(DEFAULT_MCHBAR + 0x4220 + 0x400 * channel, + 0x1f006); + write32(DEFAULT_MCHBAR + 0x4230 + 0x400 * channel, + 0x4 | (ctrl->tRCD << 16) + | (max(ctrl->tRRD, (ctrl->tFAW >> 2) + 1) << + 10)); + write32(DEFAULT_MCHBAR + 0x4200 + 0x400 * channel, + (slotrank << 24) | 0x60000); + write32(DEFAULT_MCHBAR + 0x4210 + 0x400 * channel, + 0x240); + + /* DRAM command WR */ + write32(DEFAULT_MCHBAR + 0x4224 + 0x400 * channel, + 0x1f201); + write32(DEFAULT_MCHBAR + 0x4234 + 0x400 * channel, + 0x8005020 | ((ctrl->tWTR + ctrl->CWL + 8) << + 16)); + write32(DEFAULT_MCHBAR + 0x4204 + 0x400 * channel, + (slotrank << 24)); + write32(DEFAULT_MCHBAR + 0x4214 + 0x400 * channel, + 0x242); + + /* DRAM command RD */ + write32(DEFAULT_MCHBAR + 0x4228 + 0x400 * channel, + 0x1f105); + write32(DEFAULT_MCHBAR + 0x4238 + 0x400 * channel, + 0x4005020 | (max(ctrl->tRTP, 8) << 16)); + write32(DEFAULT_MCHBAR + 0x4208 + 0x400 * channel, + (slotrank << 24)); + write32(DEFAULT_MCHBAR + 0x4218 + 0x400 * channel, + 0x242); + + /* DRAM command PRE */ + write32(DEFAULT_MCHBAR + 0x422c + 0x400 * channel, + 0x1f002); + write32(DEFAULT_MCHBAR + 0x423c + 0x400 * channel, + 0xc01 | (ctrl->tRP << 16)); + write32(DEFAULT_MCHBAR + 0x420c + 0x400 * channel, + (slotrank << 24) | 0x60400); + write32(DEFAULT_MCHBAR + 0x421c + 0x400 * channel, 0); + + write32(DEFAULT_MCHBAR + 0x4284 + 0x400 * channel, + 0xc0001); + wait_428c(channel); + FOR_ALL_LANES { + read32(DEFAULT_MCHBAR + 0x4340 + + 0x400 * channel + lane * 4); + } + + raw_statistics[edge] = + MCHBAR32(0x436c + 0x400 * channel); + } + FOR_ALL_LANES { + struct run rn; + for (edge = 0; edge <= MAX_EDGE_TIMING; edge++) + statistics[edge] = + ! !(raw_statistics[edge] & (1 << lane)); + rn = get_longest_zero_run(statistics, + MAX_EDGE_TIMING + 1); + printram("edges: %d, %d, %d: 0x%02x-0x%02x-0x%02x, 0x%02x-0x%02x\n", + channel, slotrank, i, rn.start, rn.middle, + rn.end, rn.start + ctrl->edge_offset[i], + rn.end - ctrl->edge_offset[i]); + lower[lane] = + max(rn.start + ctrl->edge_offset[i], lower[lane]); + upper[lane] = + min(rn.end - ctrl->edge_offset[i], upper[lane]); + edges[lane] = (lower[lane] + upper[lane]) / 2; + if (rn.all || (lower[lane] > upper[lane])) { + printk(BIOS_EMERG, "edge write discovery failed: %d, %d, %d\n", + channel, slotrank, lane); + return MAKE_ERR; + } + } + } + } + + write32(DEFAULT_MCHBAR + 0x3000, 0); + printram("CPA\n"); + return 0; +} + +int discover_edges_write(ramctr_timing *ctrl) +{ + int falling_edges[NUM_CHANNELS][NUM_SLOTRANKS][NUM_LANES]; + int rising_edges[NUM_CHANNELS][NUM_SLOTRANKS][NUM_LANES]; + int channel, slotrank, lane; + int err; + + /* FIXME: under some conditions (older chipsets?) vendor BIOS sets both edges to the same value. */ + write32(DEFAULT_MCHBAR + 0x4eb0, 0x300); + printram("discover falling edges write:\n[%x] = %x\n", 0x4eb0, 0x300); + + FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS { + err = discover_edges_write_real(ctrl, channel, slotrank, + falling_edges[channel][slotrank]); + if (err) + return err; + } + + write32(DEFAULT_MCHBAR + 0x4eb0, 0x200); + printram("discover rising edges write:\n[%x] = %x\n", 0x4eb0, 0x200); + + FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS { + err = discover_edges_write_real(ctrl, channel, slotrank, + rising_edges[channel][slotrank]); + if (err) + return err; + } + + write32(DEFAULT_MCHBAR + 0x4eb0, 0); + + FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS FOR_ALL_LANES { + ctrl->timings[channel][slotrank].lanes[lane].falling = + falling_edges[channel][slotrank][lane]; + ctrl->timings[channel][slotrank].lanes[lane].rising = + rising_edges[channel][slotrank][lane]; + } + + FOR_ALL_POPULATED_CHANNELS + program_timings(ctrl, channel); + + FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS FOR_ALL_LANES { + write32(DEFAULT_MCHBAR + 0x4080 + 0x400 * channel + 4 * lane, + 0); + } + return 0; +} + +static void test_timC_write(ramctr_timing *ctrl, int channel, int slotrank) +{ + wait_428c(channel); + /* DRAM command ACT */ + write32(DEFAULT_MCHBAR + 0x4220 + 0x400 * channel, 0x1f006); + write32(DEFAULT_MCHBAR + 0x4230 + 0x400 * channel, + (max((ctrl->tFAW >> 2) + 1, ctrl->tRRD) + << 10) | (ctrl->tRCD << 16) | 4); + write32(DEFAULT_MCHBAR + 0x4200 + 0x400 * channel, + (slotrank << 24) | 0x60000); + write32(DEFAULT_MCHBAR + 0x4210 + 0x400 * channel, 0x244); + + /* DRAM command WR */ + write32(DEFAULT_MCHBAR + 0x4224 + 0x400 * channel, 0x1f201); + write32(DEFAULT_MCHBAR + 0x4234 + 0x400 * channel, + 0x80011e0 | + ((ctrl->tWTR + ctrl->CWL + 8) << 16)); + write32(DEFAULT_MCHBAR + 0x4204 + + 0x400 * channel, (slotrank << 24)); + write32(DEFAULT_MCHBAR + 0x4214 + + 0x400 * channel, 0x242); + + /* DRAM command RD */ + write32(DEFAULT_MCHBAR + 0x4228 + + 0x400 * channel, 0x1f105); + write32(DEFAULT_MCHBAR + 0x4238 + + 0x400 * channel, + 0x40011e0 | (max(ctrl->tRTP, 8) << 16)); + write32(DEFAULT_MCHBAR + 0x4208 + + 0x400 * channel, (slotrank << 24)); + write32(DEFAULT_MCHBAR + 0x4218 + + 0x400 * channel, 0x242); + + /* DRAM command PRE */ + write32(DEFAULT_MCHBAR + 0x422c + + 0x400 * channel, 0x1f002); + write32(DEFAULT_MCHBAR + 0x423c + + 0x400 * channel, + 0x1001 | (ctrl->tRP << 16)); + write32(DEFAULT_MCHBAR + 0x420c + + 0x400 * channel, + (slotrank << 24) | 0x60400); + write32(DEFAULT_MCHBAR + 0x421c + + 0x400 * channel, 0); + + write32(DEFAULT_MCHBAR + 0x4284 + + 0x400 * channel, 0xc0001); + wait_428c(channel); +} + +int discover_timC_write(ramctr_timing *ctrl) +{ + const u8 rege3c_b24[3] = { 0, 0xf, 0x2f }; + int i, pat; + + int lower[NUM_CHANNELS][NUM_SLOTRANKS][NUM_LANES]; + int upper[NUM_CHANNELS][NUM_SLOTRANKS][NUM_LANES]; + int channel, slotrank, lane; + + FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS FOR_ALL_LANES { + lower[channel][slotrank][lane] = 0; + upper[channel][slotrank][lane] = MAX_TIMC; + } + + write32(DEFAULT_MCHBAR + 0x4ea8, 1); + printram("discover timC write:\n"); + + for (i = 0; i < 3; i++) + FOR_ALL_POPULATED_CHANNELS { + write32(DEFAULT_MCHBAR + 0xe3c + (channel * 0x100), + (rege3c_b24[i] << 24) + | (read32(DEFAULT_MCHBAR + 0xe3c + (channel * 0x100)) + & ~0x3f000000)); + udelay(2); + for (pat = 0; pat < NUM_PATTERNS; pat++) { + FOR_ALL_POPULATED_RANKS { + int timC; + u32 raw_statistics[MAX_TIMC + 1]; + int statistics[MAX_TIMC + 1]; + + /* Make sure rn.start < rn.end */ + statistics[MAX_TIMC] = 1; + + fill_pattern5(ctrl, channel, pat); + write32(DEFAULT_MCHBAR + 0x4288 + 0x400 * channel, 0x1f); + for (timC = 0; timC < MAX_TIMC; timC++) { + FOR_ALL_LANES + ctrl->timings[channel][slotrank].lanes[lane].timC = timC; + program_timings(ctrl, channel); + + test_timC_write (ctrl, channel, slotrank); + + raw_statistics[timC] = + MCHBAR32(0x436c + 0x400 * channel); + } + FOR_ALL_LANES { + struct run rn; + for (timC = 0; timC < MAX_TIMC; timC++) + statistics[timC] = + !!(raw_statistics[timC] & + (1 << lane)); + + rn = get_longest_zero_run(statistics, + MAX_TIMC + 1); + if (rn.all) { + printk(BIOS_EMERG, "timC write discovery failed: %d, %d, %d\n", + channel, slotrank, lane); + return MAKE_ERR; + } + printram("timC: %d, %d, %d: 0x%02x-0x%02x-0x%02x, 0x%02x-0x%02x\n", + channel, slotrank, i, rn.start, + rn.middle, rn.end, + rn.start + ctrl->timC_offset[i], + rn.end - ctrl->timC_offset[i]); + lower[channel][slotrank][lane] = + max(rn.start + ctrl->timC_offset[i], + lower[channel][slotrank][lane]); + upper[channel][slotrank][lane] = + min(rn.end - ctrl->timC_offset[i], + upper[channel][slotrank][lane]); + + } + } + } + } + + FOR_ALL_CHANNELS { + write32(DEFAULT_MCHBAR + (channel * 0x100) + 0xe3c, + 0 | (read32(DEFAULT_MCHBAR + (channel * 0x100) + 0xe3c) & + ~0x3f000000)); + udelay(2); + } + + write32(DEFAULT_MCHBAR + 0x4ea8, 0); + + printram("CPB\n"); + + FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS FOR_ALL_LANES { + printram("timC %d, %d, %d: %x\n", channel, + slotrank, lane, + (lower[channel][slotrank][lane] + + upper[channel][slotrank][lane]) / 2); + ctrl->timings[channel][slotrank].lanes[lane].timC = + (lower[channel][slotrank][lane] + + upper[channel][slotrank][lane]) / 2; + } + FOR_ALL_POPULATED_CHANNELS { + program_timings(ctrl, channel); + } + return 0; +} + +void normalize_training(ramctr_timing * ctrl) +{ + int channel, slotrank, lane; + int mat = 0; + + FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS { + int delta; + FOR_ALL_LANES mat = + max(ctrl->timings[channel][slotrank].lanes[lane].timA, mat); + delta = (mat >> 6) - ctrl->timings[channel][slotrank].val_4028; + ctrl->timings[channel][slotrank].val_4024 += delta; + ctrl->timings[channel][slotrank].val_4028 += delta; + } + + FOR_ALL_POPULATED_CHANNELS { + program_timings(ctrl, channel); + } +} + +void write_controller_mr(ramctr_timing * ctrl) +{ + int channel, slotrank; + + FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS { + write32(DEFAULT_MCHBAR + 0x0004 + (channel << 8) + + lane_registers[slotrank], make_mr0(ctrl, slotrank)); + write32(DEFAULT_MCHBAR + 0x0008 + (channel << 8) + + lane_registers[slotrank], + make_mr1(ctrl, slotrank, channel)); + } +} + +int channel_test(ramctr_timing *ctrl) +{ + int channel, slotrank, lane; + + slotrank = 0; + FOR_ALL_POPULATED_CHANNELS + if (read32(DEFAULT_MCHBAR + 0x42a0 + (channel << 10)) & 0xa000) { + printk(BIOS_EMERG, "Mini channel test failed (1): %d\n", + channel); + return MAKE_ERR; + } + FOR_ALL_POPULATED_CHANNELS { + fill_pattern0(ctrl, channel, 0x12345678, 0x98765432); + + write32(DEFAULT_MCHBAR + 0x4288 + (channel << 10), 0); + } + + for (slotrank = 0; slotrank < 4; slotrank++) + FOR_ALL_CHANNELS + if (ctrl->rankmap[channel] & (1 << slotrank)) { + FOR_ALL_LANES { + write32(DEFAULT_MCHBAR + (0x4f40 + 4 * lane), 0); + write32(DEFAULT_MCHBAR + (0x4d40 + 4 * lane), 0); + } + wait_428c(channel); + /* DRAM command ACT */ + write32(DEFAULT_MCHBAR + 0x4220 + (channel << 10), 0x0001f006); + write32(DEFAULT_MCHBAR + 0x4230 + (channel << 10), 0x0028a004); + write32(DEFAULT_MCHBAR + 0x4200 + (channel << 10), + 0x00060000 | (slotrank << 24)); + write32(DEFAULT_MCHBAR + 0x4210 + (channel << 10), 0x00000244); + /* DRAM command WR */ + write32(DEFAULT_MCHBAR + 0x4224 + (channel << 10), 0x0001f201); + write32(DEFAULT_MCHBAR + 0x4234 + (channel << 10), 0x08281064); + write32(DEFAULT_MCHBAR + 0x4204 + (channel << 10), + 0x00000000 | (slotrank << 24)); + write32(DEFAULT_MCHBAR + 0x4214 + (channel << 10), 0x00000242); + /* DRAM command RD */ + write32(DEFAULT_MCHBAR + 0x4228 + (channel << 10), 0x0001f105); + write32(DEFAULT_MCHBAR + 0x4238 + (channel << 10), 0x04281064); + write32(DEFAULT_MCHBAR + 0x4208 + (channel << 10), + 0x00000000 | (slotrank << 24)); + write32(DEFAULT_MCHBAR + 0x4218 + (channel << 10), 0x00000242); + /* DRAM command PRE */ + write32(DEFAULT_MCHBAR + 0x422c + (channel << 10), 0x0001f002); + write32(DEFAULT_MCHBAR + 0x423c + (channel << 10), 0x00280c01); + write32(DEFAULT_MCHBAR + 0x420c + (channel << 10), + 0x00060400 | (slotrank << 24)); + write32(DEFAULT_MCHBAR + 0x421c + (channel << 10), 0x00000240); + write32(DEFAULT_MCHBAR + 0x4284 + (channel << 10), 0x000c0001); + wait_428c(channel); + FOR_ALL_LANES + if (read32(DEFAULT_MCHBAR + 0x4340 + (channel << 10) + 4 * lane)) { + printk(BIOS_EMERG, "Mini channel test failed (2): %d, %d, %d\n", + channel, slotrank, lane); + return MAKE_ERR; + } + } + return 0; +} + +void set_scrambling_seed(ramctr_timing * ctrl) +{ + int channel; + + /* FIXME: we hardcode seeds. Do we need to use some PRNG for them? + I don't think so. */ + static u32 seeds[NUM_CHANNELS][3] = { + {0x00009a36, 0xbafcfdcf, 0x46d1ab68}, + {0x00028bfa, 0x53fe4b49, 0x19ed5483} + }; + FOR_ALL_POPULATED_CHANNELS { + MCHBAR32(0x4020 + 0x400 * channel) &= ~0x10000000; + write32(DEFAULT_MCHBAR + 0x4034, seeds[channel][0]); + write32(DEFAULT_MCHBAR + 0x403c, seeds[channel][1]); + write32(DEFAULT_MCHBAR + 0x4038, seeds[channel][2]); + } +} + +void set_4f8c(void) +{ + struct cpuid_result cpures; + u32 cpu; + + cpures = cpuid(1); + cpu = (cpures.eax); + if (IS_SANDY_CPU(cpu) && (IS_SANDY_CPU_D0(cpu) || IS_SANDY_CPU_D1(cpu))) { + MCHBAR32(0x4f8c) = 0x141D1519; + } else { + MCHBAR32(0x4f8c) = 0x551D1519; + } +} + +void prepare_training(ramctr_timing * ctrl) +{ + int channel; + + FOR_ALL_POPULATED_CHANNELS { + // Always drive command bus + MCHBAR32(0x4004 + 0x400 * channel) |= 0x20000000; + } + + udelay(1); + + FOR_ALL_POPULATED_CHANNELS { + wait_428c(channel); + } +} + +void set_4008c(ramctr_timing * ctrl) +{ + int channel, slotrank; + u32 reg; + FOR_ALL_POPULATED_CHANNELS { + u32 b20, b4_8_12; + int min_320c = 10000; + int max_320c = -10000; + + FOR_ALL_POPULATED_RANKS { + max_320c = max(ctrl->timings[channel][slotrank].val_320c, max_320c); + min_320c = min(ctrl->timings[channel][slotrank].val_320c, min_320c); + } + + if (max_320c - min_320c > 51) + b20 = 0; + else + b20 = ctrl->ref_card_offset[channel]; + + if (ctrl->reg_320c_range_threshold < max_320c - min_320c) + b4_8_12 = 0x3330; + else + b4_8_12 = 0x2220; + + reg = read32(DEFAULT_MCHBAR + 0x400c + (channel << 10)); + write32(DEFAULT_MCHBAR + 0x400c + (channel << 10), + (reg & 0xFFF0FFFF) + | (ctrl->ref_card_offset[channel] << 16) + | (ctrl->ref_card_offset[channel] << 18)); + write32(DEFAULT_MCHBAR + 0x4008 + (channel << 10), + 0x0a000000 + | (b20 << 20) + | ((ctrl->ref_card_offset[channel] + 2) << 16) + | b4_8_12); + } +} + +void set_42a0(ramctr_timing * ctrl) +{ + int channel; + FOR_ALL_POPULATED_CHANNELS { + write32(DEFAULT_MCHBAR + (0x42a0 + 0x400 * channel), + 0x00001000 | ctrl->rankmap[channel]); + MCHBAR32(0x4004 + 0x400 * channel) &= ~0x20000000; // OK + } +} + +static int encode_5d10(int ns) +{ + return (ns + 499) / 500; +} + +/* FIXME: values in this function should be hardware revision-dependent. */ +void final_registers(ramctr_timing * ctrl) +{ + int channel; + int t1_cycles = 0, t1_ns = 0, t2_ns; + int t3_ns; + u32 r32; + + write32(DEFAULT_MCHBAR + 0x4cd4, 0x00000046); + + write32(DEFAULT_MCHBAR + 0x400c, (read32(DEFAULT_MCHBAR + 0x400c) & 0xFFFFCFFF) | 0x1000); // OK + write32(DEFAULT_MCHBAR + 0x440c, (read32(DEFAULT_MCHBAR + 0x440c) & 0xFFFFCFFF) | 0x1000); // OK + write32(DEFAULT_MCHBAR + 0x4cb0, 0x00000740); + write32(DEFAULT_MCHBAR + 0x4380, 0x00000aaa); // OK + write32(DEFAULT_MCHBAR + 0x4780, 0x00000aaa); // OK + write32(DEFAULT_MCHBAR + 0x4f88, 0x5f7003ff); // OK + write32(DEFAULT_MCHBAR + 0x5064, 0x00073000 | ctrl->reg_5064b0); // OK + + FOR_ALL_CHANNELS { + switch (ctrl->rankmap[channel]) { + /* Unpopulated channel. */ + case 0: + write32(DEFAULT_MCHBAR + 0x4384 + channel * 0x400, 0); + break; + /* Only single-ranked dimms. */ + case 1: + case 4: + case 5: + write32(DEFAULT_MCHBAR + 0x4384 + channel * 0x400, 0x373131); + break; + /* Dual-ranked dimms present. */ + default: + write32(DEFAULT_MCHBAR + 0x4384 + channel * 0x400, 0x9b6ea1); + break; + } + } + + write32 (DEFAULT_MCHBAR + 0x5880, 0xca9171e5); + write32 (DEFAULT_MCHBAR + 0x5888, + (read32 (DEFAULT_MCHBAR + 0x5888) & ~0xffffff) | 0xe4d5d0); + write32 (DEFAULT_MCHBAR + 0x58a8, read32 (DEFAULT_MCHBAR + 0x58a8) & ~0x1f); + write32 (DEFAULT_MCHBAR + 0x4294, + (read32 (DEFAULT_MCHBAR + 0x4294) & ~0x30000) + | (1 << 16)); + write32 (DEFAULT_MCHBAR + 0x4694, + (read32 (DEFAULT_MCHBAR + 0x4694) & ~0x30000) + | (1 << 16)); + + MCHBAR32(0x5030) |= 1; // OK + MCHBAR32(0x5030) |= 0x80; // OK + MCHBAR32(0x5f18) = 0xfa; // OK + + /* Find a populated channel. */ + FOR_ALL_POPULATED_CHANNELS + break; + + t1_cycles = ((read32(DEFAULT_MCHBAR + 0x4290 + channel * 0x400) >> 8) & 0xff); + r32 = read32(DEFAULT_MCHBAR + 0x5064); + if (r32 & 0x20000) + t1_cycles += (r32 & 0xfff); + t1_cycles += (read32(DEFAULT_MCHBAR + channel * 0x400 + 0x42a4) & 0xfff); + t1_ns = t1_cycles * ctrl->tCK / 256 + 544; + if (!(r32 & 0x20000)) + t1_ns += 500; + + t2_ns = 10 * ((read32(DEFAULT_MCHBAR + 0x5f10) >> 8) & 0xfff); + if ( read32(DEFAULT_MCHBAR + 0x5f00) & 8 ) + { + t3_ns = 10 * ((read32(DEFAULT_MCHBAR + 0x5f20) >> 8) & 0xfff); + t3_ns += 10 * (read32(DEFAULT_MCHBAR + 0x5f18) & 0xff); + } + else + { + t3_ns = 500; + } + printk(BIOS_DEBUG, "t123: %d, %d, %d\n", + t1_ns, t2_ns, t3_ns); + write32 (DEFAULT_MCHBAR + 0x5d10, + ((encode_5d10(t1_ns) + encode_5d10(t2_ns)) << 16) + | (encode_5d10(t1_ns) << 8) + | ((encode_5d10(t3_ns) + encode_5d10(t2_ns) + encode_5d10(t1_ns)) << 24) + | (read32(DEFAULT_MCHBAR + 0x5d10) & 0xC0C0C0C0) + | 0xc); +} + +void restore_timings(ramctr_timing * ctrl) +{ + int channel, slotrank, lane; + + FOR_ALL_POPULATED_CHANNELS + MCHBAR32(0x4004 + 0x400 * channel) = + ctrl->tRRD + | (ctrl->tRTP << 4) + | (ctrl->tCKE << 8) + | (ctrl->tWTR << 12) + | (ctrl->tFAW << 16) + | (ctrl->tWR << 24) + | (ctrl->cmd_stretch[channel] << 30); + + udelay(1); + + FOR_ALL_POPULATED_CHANNELS { + wait_428c(channel); + } + + FOR_ALL_CHANNELS FOR_ALL_POPULATED_RANKS FOR_ALL_LANES { + write32(DEFAULT_MCHBAR + 0x4080 + 0x400 * channel + + 4 * lane, 0); + } + + FOR_ALL_POPULATED_CHANNELS + write32(DEFAULT_MCHBAR + 0x4008 + 0x400 * channel, + read32(DEFAULT_MCHBAR + 0x4008 + + 0x400 * channel) | 0x8000000); + + FOR_ALL_POPULATED_CHANNELS { + udelay (1); + write32(DEFAULT_MCHBAR + 0x4020 + 0x400 * channel, + read32(DEFAULT_MCHBAR + 0x4020 + + 0x400 * channel) | 0x200000); + } + + printram("CPE\n"); + + write32(DEFAULT_MCHBAR + 0x3400, 0); + write32(DEFAULT_MCHBAR + 0x4eb0, 0); + + printram("CP5b\n"); + + FOR_ALL_POPULATED_CHANNELS { + program_timings(ctrl, channel); + } + + u32 reg, addr; + + while (!(MCHBAR32(0x5084) & 0x10000)); + do { + reg = MCHBAR32(0x428c); + } while ((reg & 0x14) == 0); + + // Set state of memory controller + MCHBAR32(0x5030) = 0x116; + MCHBAR32(0x4ea0) = 0; + + // Wait 500us + udelay(500); + + FOR_ALL_CHANNELS { + // Set valid rank CKE + reg = 0; + reg = (reg & ~0xf) | ctrl->rankmap[channel]; + addr = 0x400 * channel + 0x42a0; + MCHBAR32(addr) = reg; + + // Wait 10ns for ranks to settle + //udelay(0.01); + + reg = (reg & ~0xf0) | (ctrl->rankmap[channel] << 4); + MCHBAR32(addr) = reg; + + // Write reset using a NOP + write_reset(ctrl); + } + + /* mrs commands. */ + dram_mrscommands(ctrl); + + printram("CP5c\n"); + + write32(DEFAULT_MCHBAR + 0x3000, 0); + + FOR_ALL_CHANNELS { + write32(DEFAULT_MCHBAR + (channel * 0x100) + 0xe3c, + 0 | (read32(DEFAULT_MCHBAR + (channel * 0x100) + 0xe3c) & + ~0x3f000000)); + udelay(2); + } + + write32(DEFAULT_MCHBAR + 0x4ea8, 0); +} -- cgit v1.2.3