diff options
Diffstat (limited to 'src/vendorcode/cavium/bdk')
41 files changed, 24089 insertions, 0 deletions
diff --git a/src/vendorcode/cavium/bdk/libbdk-arch/bdk-csr.c b/src/vendorcode/cavium/bdk/libbdk-arch/bdk-csr.c new file mode 100644 index 0000000000..981ad231dc --- /dev/null +++ b/src/vendorcode/cavium/bdk/libbdk-arch/bdk-csr.c @@ -0,0 +1,376 @@ +/***********************license start*********************************** +* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights +* reserved. +* +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are +* met: +* +* * Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* +* * Redistributions in binary form must reproduce the above +* copyright notice, this list of conditions and the following +* disclaimer in the documentation and/or other materials provided +* with the distribution. +* +* * Neither the name of Cavium Inc. nor the names of +* its contributors may be used to endorse or promote products +* derived from this software without specific prior written +* permission. +* +* This Software, including technical data, may be subject to U.S. export +* control laws, including the U.S. Export Administration Act and its +* associated regulations, and may be subject to export or import +* regulations in other countries. +* +* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS" +* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR +* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT +* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY +* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT +* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES +* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR +* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT, +* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK +* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU. +***********************license end**************************************/ +#include <bdk.h> +#include <stdio.h> +#include "libbdk-arch/bdk-csrs-pccpf.h" +#include "libbdk-arch/bdk-csrs-pem.h" + +#ifndef BDK_BUILD_HOST + +/** + * Read a slow CSR, not RSL or NCB. + * + * @param type Bus type the CSR is on + * @param busnum Bus number the CSR is on + * @param size Width of the CSR in bytes + * @param address The address of the CSR + * + * @return The value of the CSR + */ +uint64_t __bdk_csr_read_slow(bdk_node_t node, bdk_csr_type_t type, int busnum, int size, uint64_t address) +{ + switch (type) + { + case BDK_CSR_TYPE_DAB: + case BDK_CSR_TYPE_DAB32b: + case BDK_CSR_TYPE_NCB: + case BDK_CSR_TYPE_NCB32b: + case BDK_CSR_TYPE_PEXP_NCB: + case BDK_CSR_TYPE_RSL: + case BDK_CSR_TYPE_RSL32b: + case BDK_CSR_TYPE_RVU_PF_BAR0: + case BDK_CSR_TYPE_RVU_PF_BAR2: + case BDK_CSR_TYPE_RVU_PFVF_BAR2: + case BDK_CSR_TYPE_RVU_VF_BAR2: + /* Handled by inline code, we should never get here */ + bdk_error("%s: Passed type that should be handled inline\n", __FUNCTION__); + break; + + case BDK_CSR_TYPE_PCCBR: + case BDK_CSR_TYPE_PCCPF: + case BDK_CSR_TYPE_PCCVF: + case BDK_CSR_TYPE_PEXP: + case BDK_CSR_TYPE_MDSB: + case BDK_CSR_TYPE_PCICONFIGEP_SHADOW: + case BDK_CSR_TYPE_PCICONFIGEPVF: + bdk_error("%s: Register not supported\n", __FUNCTION__); + break; + + case BDK_CSR_TYPE_SYSREG: + return bdk_sysreg_read(node, bdk_get_core_num(), address); + + case BDK_CSR_TYPE_PCICONFIGRC: + { + /* Don't allow PCIe register access if PCIe wasn't linked in */ + if (!bdk_pcie_config_read32) + bdk_fatal("PCIe CSR access not supported when PCIe not linked in\n"); + union bdk_pcc_dev_con_s dev_con; + switch (busnum) + { + case 0: + if (CAVIUM_IS_MODEL(CAVIUM_CN88XX)) + dev_con.u = BDK_PCC_DEV_CON_E_PCIERC0_CN88XX; + else if (CAVIUM_IS_MODEL(CAVIUM_CN83XX)) + dev_con.u = BDK_PCC_DEV_CON_E_PCIERC0_CN83XX; + else if (CAVIUM_IS_MODEL(CAVIUM_CN81XX)) + dev_con.u = BDK_PCC_DEV_CON_E_PCIERC0_CN81XX; + else + bdk_fatal("Update PCICONFIG in %s\n", __FUNCTION__); + break; + case 1: + if (CAVIUM_IS_MODEL(CAVIUM_CN88XX)) + dev_con.u = BDK_PCC_DEV_CON_E_PCIERC1_CN88XX; + else if (CAVIUM_IS_MODEL(CAVIUM_CN83XX)) + dev_con.u = BDK_PCC_DEV_CON_E_PCIERC1_CN83XX; + else if (CAVIUM_IS_MODEL(CAVIUM_CN81XX)) + dev_con.u = BDK_PCC_DEV_CON_E_PCIERC1_CN81XX; + else + bdk_fatal("Update PCICONFIG in %s\n", __FUNCTION__); + break; + case 2: + if (CAVIUM_IS_MODEL(CAVIUM_CN88XX)) + dev_con.u = BDK_PCC_DEV_CON_E_PCIERC2_CN88XX; + else if (CAVIUM_IS_MODEL(CAVIUM_CN83XX)) + dev_con.u = BDK_PCC_DEV_CON_E_PCIERC2_CN83XX; + else if (CAVIUM_IS_MODEL(CAVIUM_CN81XX)) + dev_con.u = BDK_PCC_DEV_CON_E_PCIERC2_CN81XX; + else + bdk_fatal("Update PCICONFIG in %s\n", __FUNCTION__); + break; + case 3: + if (CAVIUM_IS_MODEL(CAVIUM_CN88XX)) + dev_con.u = BDK_PCC_DEV_CON_E_PCIERC3_CN88XX; + else if (CAVIUM_IS_MODEL(CAVIUM_CN83XX)) + dev_con.u = BDK_PCC_DEV_CON_E_PCIERC3_CN83XX; + else + bdk_fatal("Update PCICONFIG in %s\n", __FUNCTION__); + break; + case 4: + dev_con.u = BDK_PCC_DEV_CON_E_PCIERC4; + break; + case 5: + dev_con.u = BDK_PCC_DEV_CON_E_PCIERC5; + break; + default: + bdk_error("%s: Illegal PCIe bus number\n", __FUNCTION__); + return -1; + } + return bdk_pcie_config_read32(node, 100 + dev_con.cn8.ecam, dev_con.s.bus, dev_con.s.func >> 3, dev_con.s.func & 7, address); + } + case BDK_CSR_TYPE_PCICONFIGEP: + { + BDK_CSR_DEFINE(cfg_rd, BDK_PEMX_CFG_RD(busnum)); + cfg_rd.u = 0; + cfg_rd.s.addr = address; + BDK_CSR_WRITE(node, BDK_PEMX_CFG_RD(busnum), cfg_rd.u); + cfg_rd.u = BDK_CSR_READ(node, BDK_PEMX_CFG_RD(busnum)); + return cfg_rd.s.data; + } + } + return -1; /* Return -1 as this looks invalid in register dumps. Zero is too common as a good value */ +} + + +/** + * Write a value to a slow CSR, not RSL or NCB. + * + * @param type Bus type the CSR is on + * @param busnum Bus number the CSR is on + * @param size Width of the CSR in bytes + * @param address The address of the CSR + * @param value Value to write to the CSR + */ +void __bdk_csr_write_slow(bdk_node_t node, bdk_csr_type_t type, int busnum, int size, uint64_t address, uint64_t value) +{ + switch (type) + { + case BDK_CSR_TYPE_DAB: + case BDK_CSR_TYPE_DAB32b: + case BDK_CSR_TYPE_NCB: + case BDK_CSR_TYPE_NCB32b: + case BDK_CSR_TYPE_PEXP_NCB: + case BDK_CSR_TYPE_RSL: + case BDK_CSR_TYPE_RSL32b: + case BDK_CSR_TYPE_RVU_PF_BAR0: + case BDK_CSR_TYPE_RVU_PF_BAR2: + case BDK_CSR_TYPE_RVU_PFVF_BAR2: + case BDK_CSR_TYPE_RVU_VF_BAR2: + /* Handled by inline code, we should never get here */ + bdk_error("%s: Passed type that should be handled inline\n", __FUNCTION__); + break; + + case BDK_CSR_TYPE_PCCBR: + case BDK_CSR_TYPE_PCCPF: + case BDK_CSR_TYPE_PCCVF: + case BDK_CSR_TYPE_PEXP: + case BDK_CSR_TYPE_MDSB: + case BDK_CSR_TYPE_PCICONFIGEP_SHADOW: + case BDK_CSR_TYPE_PCICONFIGEPVF: + bdk_error("%s: Register not supported\n", __FUNCTION__); + break; + + case BDK_CSR_TYPE_SYSREG: + bdk_sysreg_write(node, bdk_get_core_num(), address, value); + break; + + case BDK_CSR_TYPE_PCICONFIGRC: + { + /* Don't allow PCIe register access if PCIe wasn't linked in */ + if (!bdk_pcie_config_write32) + bdk_fatal("PCIe CSR access not supported when PCIe not linked in\n"); + union bdk_pcc_dev_con_s dev_con; + switch (busnum) + { + case 0: + if (CAVIUM_IS_MODEL(CAVIUM_CN88XX)) + dev_con.u = BDK_PCC_DEV_CON_E_PCIERC0_CN88XX; + else if (CAVIUM_IS_MODEL(CAVIUM_CN83XX)) + dev_con.u = BDK_PCC_DEV_CON_E_PCIERC0_CN83XX; + else if (CAVIUM_IS_MODEL(CAVIUM_CN81XX)) + dev_con.u = BDK_PCC_DEV_CON_E_PCIERC0_CN81XX; + else + bdk_fatal("Update PCICONFIG in %s\n", __FUNCTION__); + break; + case 1: + if (CAVIUM_IS_MODEL(CAVIUM_CN88XX)) + dev_con.u = BDK_PCC_DEV_CON_E_PCIERC1_CN88XX; + else if (CAVIUM_IS_MODEL(CAVIUM_CN83XX)) + dev_con.u = BDK_PCC_DEV_CON_E_PCIERC1_CN83XX; + else if (CAVIUM_IS_MODEL(CAVIUM_CN81XX)) + dev_con.u = BDK_PCC_DEV_CON_E_PCIERC1_CN81XX; + else + bdk_fatal("Update PCICONFIG in %s\n", __FUNCTION__); + break; + case 2: + if (CAVIUM_IS_MODEL(CAVIUM_CN88XX)) + dev_con.u = BDK_PCC_DEV_CON_E_PCIERC2_CN88XX; + else if (CAVIUM_IS_MODEL(CAVIUM_CN83XX)) + dev_con.u = BDK_PCC_DEV_CON_E_PCIERC2_CN83XX; + else if (CAVIUM_IS_MODEL(CAVIUM_CN81XX)) + dev_con.u = BDK_PCC_DEV_CON_E_PCIERC2_CN81XX; + else + bdk_fatal("Update PCICONFIG in %s\n", __FUNCTION__); + break; + case 3: + if (CAVIUM_IS_MODEL(CAVIUM_CN88XX)) + dev_con.u = BDK_PCC_DEV_CON_E_PCIERC3_CN88XX; + else if (CAVIUM_IS_MODEL(CAVIUM_CN83XX)) + dev_con.u = BDK_PCC_DEV_CON_E_PCIERC3_CN83XX; + else + bdk_fatal("Update PCICONFIG in %s\n", __FUNCTION__); + break; + case 4: + dev_con.u = BDK_PCC_DEV_CON_E_PCIERC4; + break; + case 5: + dev_con.u = BDK_PCC_DEV_CON_E_PCIERC5; + break; + default: + bdk_error("%s: Illegal PCIe bus number\n", __FUNCTION__); + return; + } + bdk_pcie_config_write32(node, 100 + dev_con.cn8.ecam, dev_con.s.bus, dev_con.s.func >> 3, dev_con.s.func & 7, address, value); + break; + } + case BDK_CSR_TYPE_PCICONFIGEP: + { + BDK_CSR_DEFINE(cfg_wr, BDK_PEMX_CFG_WR(busnum)); + cfg_wr.u = 0; + cfg_wr.s.addr = address; + cfg_wr.s.data = value; + BDK_CSR_WRITE(node, BDK_PEMX_CFG_WR(busnum), cfg_wr.u); + break; + } + } +} + +#endif + +void __bdk_csr_fatal(const char *name, int num_args, unsigned long arg1, unsigned long arg2, unsigned long arg3, unsigned long arg4) +{ + switch (num_args) + { + case 0: + bdk_fatal("%s is invalid on this chip\n", name); + case 1: + bdk_fatal("%s(%lu) is invalid on this chip\n", name, arg1); + case 2: + bdk_fatal("%s(%lu,%lu) is invalid on this chip\n", name, arg1, arg2); + case 3: + bdk_fatal("%s(%lu,%lu,%lu) is invalid on this chip\n", name, arg1, arg2, arg3); + default: + bdk_fatal("%s(%lu,%lu,%lu,%lu) is invalid on this chip\n", name, arg1, arg2, arg3, arg4); + } +} + +/** + * Read a core system register from a different node or core + * + * @param node Node to read from + * @param core Core to read + * @param regnum Register to read in MRS encoding + * + * @return Register value + */ +uint64_t bdk_sysreg_read(int node, int core, uint64_t regnum) +{ + BDK_CSR_INIT(pp_reset, node, BDK_RST_PP_RESET); + if (pp_reset.u & (1ull<<core)) + { + bdk_error("Attempt to read system register for core in reset\n"); + return -1; + } + + /* Addresses indicate selects as follows: + select 3,4,14,2,3 + == 0x03040e020300 + | | | | |^--- 1 if is E2H duplicated register + | | | |^^-- fifth select + | | |^^-- fourth select + | |^^-- third select + |^^-- second select + ^^-- first select */ + uint64_t first = (regnum >> 40) & 0xff; + uint64_t second = (regnum >> 32) & 0xff; + uint64_t third = (regnum >> 24) & 0xff; + uint64_t fourth = (regnum >> 16) & 0xff; + uint64_t fifth = (regnum >> 8) & 0xff; + uint64_t regid = ((first & 3) << 14) | (second << 11) | (third << 7) | (fourth << 3) | fifth; + + /* Note this requires DAP_IMP_DAR[caben] = 1 */ + uint64_t address = 1ull<<47; + address |= 0x7Bull << 36; + address |= core << 19; + address |= regid << 3; + address = bdk_numa_get_address(node, address); + return bdk_read64_uint64(address); +} + +/** + * Write a system register for a different node or core + * + * @param node Node to write too + * @param core Core to write + * @param regnum Register to write in MSR encoding + * @param value Value to write + */ +void bdk_sysreg_write(int node, int core, uint64_t regnum, uint64_t value) +{ + BDK_CSR_INIT(pp_reset, node, BDK_RST_PP_RESET); + if (pp_reset.u & (1ull<<core)) + { + bdk_error("Attempt to write system register for core in reset\n"); + return; + } + + /* Addresses indicate selects as follows: + select 3,4,14,2,3 + == 0x03040e020300 + | | | | |^--- 1 if is E2H duplicated register + | | | |^^-- fifth select + | | |^^-- fourth select + | |^^-- third select + |^^-- second select + ^^-- first select */ + uint64_t first = (regnum >> 40) & 0xff; + uint64_t second = (regnum >> 32) & 0xff; + uint64_t third = (regnum >> 24) & 0xff; + uint64_t fourth = (regnum >> 16) & 0xff; + uint64_t fifth = (regnum >> 8) & 0xff; + uint64_t regid = ((first & 3) << 14) | (second << 11) | (third << 7) | (fourth << 3) | fifth; + + /* Note this requires DAP_IMP_DAR[caben] = 1 */ + uint64_t address = 1ull<<47; + address |= 0x7Bull << 36; + address |= core << 19; + address |= regid << 3; + address = bdk_numa_get_address(node, address); + bdk_write64_uint64(address, value); +} + diff --git a/src/vendorcode/cavium/bdk/libbdk-arch/bdk-model.c b/src/vendorcode/cavium/bdk/libbdk-arch/bdk-model.c new file mode 100644 index 0000000000..f2b4a0c803 --- /dev/null +++ b/src/vendorcode/cavium/bdk/libbdk-arch/bdk-model.c @@ -0,0 +1,927 @@ +/***********************license start*********************************** +* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights +* reserved. +* +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are +* met: +* +* * Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* +* * Redistributions in binary form must reproduce the above +* copyright notice, this list of conditions and the following +* disclaimer in the documentation and/or other materials provided +* with the distribution. +* +* * Neither the name of Cavium Inc. nor the names of +* its contributors may be used to endorse or promote products +* derived from this software without specific prior written +* permission. +* +* This Software, including technical data, may be subject to U.S. export +* control laws, including the U.S. Export Administration Act and its +* associated regulations, and may be subject to export or import +* regulations in other countries. +* +* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS" +* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR +* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT +* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY +* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT +* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES +* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR +* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT, +* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK +* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU. +***********************license end**************************************/ +#include <bdk.h> +#include "libbdk-arch/bdk-csrs-ap.h" +#include "libbdk-arch/bdk-csrs-mio_fus.h" +#include "libbdk-arch/bdk-csrs-fus.h" +#include "libbdk-arch/bdk-csrs-fusf.h" + +/* + Format of a SKU + CN8890-2000BG2601-AAP-G + CN8890-2000BG2601-AAP-PR-Y-G + CN XX XX X - XXX BG XXX - XX (- XX) (- X) - G + | | | | | | | | | | ^ RoHS Option, G=RoHS 6/6 + | | | | | | | | | ^ Product Revision, blank for pass 1, Y=pass 2, W=pass 3, V=pass 4 + | | | | | | | | ^ Product Phase, blank=production, PR=Prototype, ES=Engineering Sample + | | | | | | | ^ Marketing Segment Option (SC, SNT, etc) + | | | | | | ^ Number of balls on the package + | | | | | ^ Ball Grid Array + | | | | ^ Frequency in Mhz, 3 or 4 digits (300 - 2000) + | | | ^ Optional Customer Code, blank or A-Z + | | ^ Number of cores, see table below + | ^ Processor family, plus or minus for L2 sizes and such (88, 86, 83, 81, 80) + ^ Cavium Prefix, sometimes changed for customer specific parts + + Table of Core to Model encoding + >= 48 shows xx90 + >= 44 shows xx88 + >= 42 shows xx85 + >= 32 shows xx80 + >= 24 shows xx70 + >= 20 shows xx65 + >= 16 shows xx60 + = 15 shows xx58 + = 14 shows xx55 + = 13 shows xx52 + = 12 shows xx50 + = 11 shows xx48 + = 10 shows xx45 + = 9 shows xx42 + = 8 shows xx40 + = 7 shows xx38 + = 6 shows xx34 + = 5 shows xx32 + = 4 shows xx30 + = 3 shows xx25 + = 2 shows xx20 + = 1 shows xx10 +*/ + +/* Definition of each SKU table entry for the different dies */ +typedef struct +{ + uint8_t fuse_index; /* Index programmed into PNAME fuses to match this entry. Must never change once fused parts ship */ + const char prefix[4]; /* Prefix before model number, usually "CN". Third letter is customer code shown after the model */ + uint8_t model_base; /* First two digits of the model number */ + uint16_t num_balls; /* Number of balls on package, included in SKU */ + const char segment[4]; /* Market segment SKU is for, 2-3 character string */ + uint16_t fuses[12]; /* List of fuses required for operation of this SKU */ +} model_sku_info_t; + +/* In the model_sku_info_t.fuses[] array, we use a special value + FUSES_CHECK_FUSF to represent that we need to check FUSF_CTL bit + 6, checking for trusted boot */ +#define FUSES_CHECK_FUSF 0xffff + +/***************************************************/ +/* SKU table for t88 */ +/* From "Thunder Part Number fuse overview Rev 16.xlsx" */ +/***************************************************/ +static const model_sku_info_t t88_sku_info[] = +{ + /* Index zero reserved for no fuses programmed */ + { 0x01, "CN", 88, 2601, "AAP", /* 48, 32 cores */ + { /* List of fuses for this SKU */ + 0 /* End of fuse list marker */ + } + }, + { 0x02, "CN", 88, 2601, "AAS", /* 24 cores */ + { /* List of fuses for this SKU */ + BDK_MIO_FUS_FUSE_NUM_E_OCX_DIS, /* Disable CCPI */ + 0 /* End of fuse list marker */ + } + }, + { 0x03, "CN", 88, 2601, "ST", /* 48, 32 cores */ + { /* List of fuses for this SKU */ + BDK_MIO_FUS_FUSE_NUM_E_TNS_CRIPPLE, /* Disable TNS */ + BDK_MIO_FUS_FUSE_NUM_E_PEM_DISX(0), /* Disable PEM0-1 */ + BDK_MIO_FUS_FUSE_NUM_E_PEM_DISX(2), /* Disable PEM4-5 */ + 0 /* End of fuse list marker */ + } + }, + { 0x04, "CN", 88, 2601, "STT", /* 48 cores */ + { /* List of fuses for this SKU */ + BDK_MIO_FUS_FUSE_NUM_E_PEM_DISX(0), /* Disable PEM0-1 */ + BDK_MIO_FUS_FUSE_NUM_E_PEM_DISX(2), /* Disable PEM4-5 */ + 0 /* End of fuse list marker */ + } + }, + { 0x05, "CN", 88, 2601, "STS", /* 24 cores */ + { /* List of fuses for this SKU */ + BDK_MIO_FUS_FUSE_NUM_E_LMC_DIS, /* Disable LMC2-3 */ + BDK_MIO_FUS_FUSE_NUM_E_OCX_DIS, /* Disable CCPI */ + BDK_MIO_FUS_FUSE_NUM_E_TNS_CRIPPLE, /* Disable TNS */ + BDK_MIO_FUS_FUSE_NUM_E_PEM_DISX(0), /* Disable PEM0-1 */ + BDK_MIO_FUS_FUSE_NUM_E_PEM_DISX(2), /* Disable PEM4-5 */ + BDK_MIO_FUS_FUSE_NUM_E_BGX_DISX(1), /* Disable BGX1 */ + 0 /* End of fuse list marker */ + } + }, + { 0x06, "CN", 88, 2601, "STP", /* 48, 32 cores */ + { /* List of fuses for this SKU */ + BDK_MIO_FUS_FUSE_NUM_E_TNS_CRIPPLE, /* Disable TNS */ + 0 /* End of fuse list marker */ + } + }, + { 0x07, "CN", 88, 2601, "NT", /* 48, 32 cores */ + { /* List of fuses for this SKU */ + BDK_MIO_FUS_FUSE_NUM_E_SATA_DISX(0),/* Disable SATA0-3 */ + BDK_MIO_FUS_FUSE_NUM_E_SATA_DISX(2),/* Disable SATA8-11 */ + BDK_MIO_FUS_FUSE_NUM_E_SATA_DISX(3),/* Disable SATA12-15 */ + 0 /* End of fuse list marker */ + } + }, + { 0x08, "CN", 88, 2601, "NTS", /* 24 cores */ + { /* List of fuses for this SKU */ + BDK_MIO_FUS_FUSE_NUM_E_LMC_DIS, /* Disable LMC2-3 */ + BDK_MIO_FUS_FUSE_NUM_E_OCX_DIS, /* Disable CCPI */ + BDK_MIO_FUS_FUSE_NUM_E_SATA_DISX(0),/* Disable SATA0-3 */ + BDK_MIO_FUS_FUSE_NUM_E_SATA_DISX(2),/* Disable SATA8-11 */ + BDK_MIO_FUS_FUSE_NUM_E_SATA_DISX(3),/* Disable SATA12-15 */ + BDK_MIO_FUS_FUSE_NUM_E_BGX_DISX(1), /* Disable BGX1 */ + 0 /* End of fuse list marker */ + } + }, + { 0x09, "CN", 88, 2601, "NTP", /* 48, 32 cores */ + { /* List of fuses for this SKU */ + BDK_MIO_FUS_FUSE_NUM_E_SATA_DISX(0),/* Disable SATA0-3 */ + BDK_MIO_FUS_FUSE_NUM_E_SATA_DISX(1),/* Disable SATA4-7 */ + BDK_MIO_FUS_FUSE_NUM_E_SATA_DISX(2),/* Disable SATA8-11 */ + BDK_MIO_FUS_FUSE_NUM_E_SATA_DISX(3),/* Disable SATA12-15 */ + 0 /* End of fuse list marker */ + } + }, + { 0x0a, "CN", 88, 2601, "CP", /* 48,32 cores */ + { /* List of fuses for this SKU */ + BDK_MIO_FUS_FUSE_NUM_E_NODFA_CP2, /* Disable HFA */ + BDK_MIO_FUS_FUSE_NUM_E_RSVD134X(0), /* Disable HNA */ + BDK_MIO_FUS_FUSE_NUM_E_NOZIP, /* Disable Compression */ + BDK_MIO_FUS_FUSE_NUM_E_TNS_CRIPPLE, /* Disable TNS */ + BDK_MIO_FUS_FUSE_NUM_E_SATA_DISX(0),/* Disable SATA0-3 */ + BDK_MIO_FUS_FUSE_NUM_E_SATA_DISX(2),/* Disable SATA8-11 */ + BDK_MIO_FUS_FUSE_NUM_E_SATA_DISX(3),/* Disable SATA12-15 */ + 0 /* End of fuse list marker */ + } + }, + { 0x0b, "CN", 88, 2601, "CPS", /* 24 cores */ + { /* List of fuses for this SKU */ + BDK_MIO_FUS_FUSE_NUM_E_NODFA_CP2, /* Disable HFA */ + BDK_MIO_FUS_FUSE_NUM_E_RSVD134X(0), /* Disable HNA */ + BDK_MIO_FUS_FUSE_NUM_E_NOZIP, /* Disable Compression */ + BDK_MIO_FUS_FUSE_NUM_E_LMC_DIS, /* Disable LMC2-3 */ + BDK_MIO_FUS_FUSE_NUM_E_OCX_DIS, /* Disable CCPI */ + BDK_MIO_FUS_FUSE_NUM_E_TNS_CRIPPLE, /* Disable TNS */ + BDK_MIO_FUS_FUSE_NUM_E_SATA_DISX(0),/* Disable SATA0-3 */ + BDK_MIO_FUS_FUSE_NUM_E_SATA_DISX(2),/* Disable SATA8-11 */ + BDK_MIO_FUS_FUSE_NUM_E_SATA_DISX(3),/* Disable SATA12-15 */ + BDK_MIO_FUS_FUSE_NUM_E_BGX_DISX(1), /* Disable BGX1 */ + 0 /* End of fuse list marker */ + } + }, + { 0x0c, "CN", 88, 2601, "SNT", /* 48,32 cores, Nitrox connects to PEM2x8, QLM4-5 */ + { /* List of fuses for this SKU */ + BDK_MIO_FUS_FUSE_NUM_E_RSVD231X(0), /* Nitrox 3 is present */ + 0 /* End of fuse list marker */ + } + }, + { 0x0d, "CN", 88, 2601, "SC", /* 48,32 cores, Nitrox connects to PEM2x8, QLM4-5 */ + { /* List of fuses for this SKU */ + BDK_MIO_FUS_FUSE_NUM_E_RSVD231X(0), /* Nitrox 3 is present */ + BDK_MIO_FUS_FUSE_NUM_E_NODFA_CP2, /* Disable HFA */ + BDK_MIO_FUS_FUSE_NUM_E_RSVD134X(0), /* Disable HNA */ + BDK_MIO_FUS_FUSE_NUM_E_NOZIP, /* Disable Compression */ + BDK_MIO_FUS_FUSE_NUM_E_TNS_CRIPPLE, /* Disable TNS */ + 0 /* End of fuse list marker */ + } + }, + /* Index gap for adding more CN88 variants */ + { 0x20, "CN", 86, 1676, "AAP", /* No part, match unfused CN86XX */ + { /* List of fuses for this SKU */ + BDK_MIO_FUS_FUSE_NUM_E_CHIP_IDX(6), /* Alternate package fuse */ + 0 /* End of fuse list marker */ + } + }, + { 0x21, "CN", 86, 1676, "SCP", /* 8 cores */ + { /* List of fuses for this SKU */ + BDK_MIO_FUS_FUSE_NUM_E_CHIP_IDX(6), /* Alternate package fuse */ + BDK_MIO_FUS_FUSE_NUM_E_L2C_CRIPX(1),/* L2C is half size */ + BDK_MIO_FUS_FUSE_NUM_E_NODFA_CP2, /* Disable HFA */ + BDK_MIO_FUS_FUSE_NUM_E_RSVD134X(0), /* Disable HNA */ + BDK_MIO_FUS_FUSE_NUM_E_NOZIP, /* Disable Compression */ + BDK_MIO_FUS_FUSE_NUM_E_LMC_DIS, /* Disable LMC2-3 */ + BDK_MIO_FUS_FUSE_NUM_E_OCX_DIS, /* Disable CCPI */ + BDK_MIO_FUS_FUSE_NUM_E_TNS_CRIPPLE, /* Disable TNS */ + 0 /* End of fuse list marker */ + } + }, + {} /* End of SKU list marker */ +}; + +/***************************************************/ +/* SKU table for t83 */ +/* From "Thunder Part Number fuse overview Rev 16.xlsx" */ +/***************************************************/ +static const model_sku_info_t t83_sku_info[] = +{ + /* Index zero reserved for no fuses programmed */ + { 0x01, "CN", 83, 1676, "SCP", /* 24, 20, 16, 12, 8 cores */ + { /* List of fuses for this SKU */ + 0 /* End of fuse list marker */ + } + }, + { 0x02, "CN", 83, 1676, "CP", /* 24, 20, 16, 12, 8 cores */ + { /* List of fuses for this SKU */ + /* Disable all Nitrox cores, CPT0 and CPT1 */ + BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(0), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(1), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(2), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(3), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(4), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(5), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(6), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(7), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(8), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(9), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(10), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(11), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(12), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(13), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(14), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(15), /* Nitrox */ + BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(16), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(17), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(18), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(19), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(20), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(21), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(22), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(23), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(24), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(25), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(26), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(27), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(28), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(29), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(30), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(31), /* Nitrox */ + BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(32), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(33), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(34), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(35), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(36), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(37), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(38), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(39), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(40), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(41), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(42), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(43), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(44), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(45), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(46), /* Nitrox */ + BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(47), /* Nitrox */ + BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(0), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(1), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(2), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(3), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(4), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(5), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(6), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(7), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(8), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(9), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(10), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(11), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(12), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(13), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(14), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(15), /* Nitrox */ + BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(16), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(17), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(18), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(19), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(20), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(21), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(22), /* Nitrox */ + BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(23), /* Nitrox */ + 0 /* End of fuse list marker */ + } + }, + { 0x03, "CN", 83, 1676, "AUS", /* 24, 20, 16, 12, 8 cores */ + { /* List of fuses for this SKU */ + FUSES_CHECK_FUSF, /* Trusted boot */ + 0 /* End of fuse list marker */ + } + }, + { 0x04, "CN", 82, 1676, "SCP", /* 12, 8 cores */ + { /* List of fuses for this SKU */ + BDK_MIO_FUS_FUSE_NUM_E_L2C_CRIPX(1),/* L2C is half size */ + BDK_MIO_FUS_FUSE_NUM_E_LMC_DIS, /* Disable upper LMC */ + /* Disable Nitrox cores CPT0[24-47] and CPT1[12-23] */ + BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(24), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(25), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(26), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(27), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(28), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(29), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(30), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(31), /* Nitrox */ + BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(32), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(33), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(34), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(35), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(36), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(37), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(38), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(39), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(40), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(41), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(42), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(43), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(44), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(45), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(46), /* Nitrox */ + BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(47), /* Nitrox */ + BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(12), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(13), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(14), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(15), /* Nitrox */ + BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(16), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(17), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(18), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(19), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(20), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(21), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(22), /* Nitrox */ + BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(23), /* Nitrox */ + 0 /* End of fuse list marker */ + } + }, + { 0x05, "CN", 82, 1676, "CP", /* 12, 8 cores */ + { /* List of fuses for this SKU */ + BDK_MIO_FUS_FUSE_NUM_E_L2C_CRIPX(1),/* L2C is half size */ + BDK_MIO_FUS_FUSE_NUM_E_LMC_DIS, /* Disable upper LMC */ + /* Disable all Nitrox cores, CPT0 and CPT1 */ + BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(0), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(1), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(2), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(3), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(4), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(5), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(6), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(7), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(8), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(9), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(10), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(11), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(12), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(13), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(14), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(15), /* Nitrox */ + BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(16), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(17), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(18), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(19), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(20), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(21), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(22), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(23), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(24), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(25), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(26), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(27), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(28), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(29), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(30), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(31), /* Nitrox */ + BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(32), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(33), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(34), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(35), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(36), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(37), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(38), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(39), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(40), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(41), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(42), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(43), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(44), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(45), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(46), /* Nitrox */ + BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(47), /* Nitrox */ + BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(0), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(1), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(2), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(3), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(4), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(5), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(6), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(7), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(8), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(9), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(10), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(11), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(12), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(13), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(14), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(15), /* Nitrox */ + BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(16), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(17), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(18), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(19), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(20), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(21), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(22), /* Nitrox */ + BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(23), /* Nitrox */ + 0 /* End of fuse list marker */ + } + }, + {} /* End of SKU list marker */ +}; + +/***************************************************/ +/* SKU table for t81 */ +/* From "Thunder Part Number fuse overview Rev 16.xlsx" */ +/***************************************************/ +static const model_sku_info_t t81_sku_info[] = +{ + /* Index zero reserved for no fuses programmed */ + { 0x01, "CN", 81, 676, "SCP", /* 4, 2 cores */ + { /* List of fuses for this SKU */ + /* No fuses */ + 0 /* End of fuse list marker */ + } + }, + { 0x02, "CN", 81, 676, "CP", /* 4, 2 cores */ + { /* List of fuses for this SKU */ + BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(1), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(2), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(3), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(4), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(5), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(6), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(7), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(8), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(9), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(10), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(11), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(12), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(13), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(14), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(15), /* Nitrox */ + BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(16), /* Nitrox */ + 0 /* End of fuse list marker */ + } + }, + { 0x07, "CN", 81, 676, "AUS", /* 4, 2 cores */ + { /* List of fuses for this SKU */ + FUSES_CHECK_FUSF, /* Trusted boot */ + 0 /* End of fuse list marker */ + } + }, + { 0x08, "CN", 81, 676, "AUC", /* 4, 2 cores */ + { /* List of fuses for this SKU */ + FUSES_CHECK_FUSF, /* Trusted boot */ + BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(1), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(2), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(3), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(4), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(5), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(6), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(7), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(8), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(9), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(10), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(11), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(12), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(13), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(14), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(15), /* Nitrox */ + BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(16), /* Nitrox */ + 0 /* End of fuse list marker */ + } + }, + { 0x03, "CN", 80, 676, "SCP", /* 4, 2 cores */ + { /* List of fuses for this SKU */ + /* Note that CHIP_ID(7) is suppose to be blown, but a few chips + have incorrect fuses. We allow CN80XX SKUs with or without + CHIP_ID(7) */ + //BDK_MIO_FUS_FUSE_NUM_E_CHIP_IDX(7), /* Alternate package fuse 2? */ + BDK_MIO_FUS_FUSE_NUM_E_L2C_CRIPX(1), /* L2C is half size */ + BDK_MIO_FUS_FUSE_NUM_E_LMC_HALF, /* LMC is half width */ + 0 /* End of fuse list marker */ + } + }, + { 0x04, "CN", 80, 676, "CP", /* 4, 2 cores */ + { /* List of fuses for this SKU */ + /* Note that CHIP_ID(7) is suppose to be blown, but a few chips + have incorrect fuses. We allow CN80XX SKUs with or without + CHIP_ID(7) */ + //BDK_MIO_FUS_FUSE_NUM_E_CHIP_IDX(7), /* Alternate package fuse 2? */ + BDK_MIO_FUS_FUSE_NUM_E_L2C_CRIPX(1), /* L2C is half size */ + BDK_MIO_FUS_FUSE_NUM_E_LMC_HALF, /* LMC is half width */ + BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(1), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(2), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(3), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(4), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(5), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(6), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(7), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(8), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(9), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(10), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(11), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(12), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(13), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(14), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(15), /* Nitrox */ + BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(16), /* Nitrox */ + 0 /* End of fuse list marker */ + } + }, + { 0x05, "CN", 80, 555, "SCP", /* 4, 2 cores */ + { /* List of fuses for this SKU */ + BDK_MIO_FUS_FUSE_NUM_E_CHIP_IDX(6), /* Alternate package fuse */ + BDK_MIO_FUS_FUSE_NUM_E_L2C_CRIPX(1), /* L2C is half size */ + BDK_MIO_FUS_FUSE_NUM_E_LMC_HALF, /* LMC is half width */ + 0 /* End of fuse list marker */ + } + }, + { 0x06, "CN", 80, 555, "CP", /* 4, 2 cores */ + { /* List of fuses for this SKU */ + BDK_MIO_FUS_FUSE_NUM_E_CHIP_IDX(6), /* Alternate package fuse */ + BDK_MIO_FUS_FUSE_NUM_E_L2C_CRIPX(1), /* L2C is half size */ + BDK_MIO_FUS_FUSE_NUM_E_LMC_HALF, /* LMC is half width */ + BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(1), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(2), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(3), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(4), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(5), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(6), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(7), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(8), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(9), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(10), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(11), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(12), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(13), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(14), /* Nitrox */ + //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(15), /* Nitrox */ + BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(16), /* Nitrox */ + 0 /* End of fuse list marker */ + } + }, + {} /* End of SKU list marker */ +}; + +/***************************************************/ +/* SKU table for t93 */ +/***************************************************/ +static const model_sku_info_t t93_sku_info[] = +{ + /* Index zero reserved for no fuses programmed */ + { 0x01, "CN", 93, 1676, "SCP", /* 24, 20, 16, 12, 8 cores */ + { /* List of fuses for this SKU */ + /* No fuses */ + 0 /* End of fuse list marker */ + } + }, + {} /* End of SKU list marker */ +}; + +/** + * Given a core count, return the last two digits of a model number + * + * @param cores Number of cores + * + * @return Two digit model number + */ +static int model_digits_for_cores(int cores) +{ + /* If the number of cores is between two model levels, use the lower + level. This assumes that a model guarantees a minimum number of + cores. This should never happen, but you never know */ + switch (cores) + { + case 1: return 10; /* CNxx10 = 1 core */ + case 2: return 20; /* CNxx20 = 2 cores */ + case 3: return 25; /* CNxx25 = 3 cores */ + case 4: return 30; /* CNxx30 = 4 cores */ + case 5: return 32; /* CNxx32 = 5 cores */ + case 6: return 34; /* CNxx34 = 6 cores */ + case 7: return 38; /* CNxx38 = 7 cores */ + case 8: return 40; /* CNxx40 = 8 cores */ + case 9: return 42; /* CNxx42 = 9 cores */ + case 10: return 45; /* CNxx45 = 10 cores */ + case 11: return 48; /* CNxx48 = 11 cores */ + case 12: return 50; /* CNxx50 = 12 cores */ + case 13: return 52; /* CNxx52 = 13 cores */ + case 14: return 55; /* CNxx55 = 14 cores */ + case 15: return 58; /* CNxx58 = 15 cores */ + case 16 ... 19: return 60; /* CNxx60 = 16 cores */ + case 20 ... 23: return 65; /* CNxx65 = 20 cores */ + case 24 ... 31: return 70; /* CNxx70 = 24 cores */ + case 32 ... 39: return 80; /* CNxx80 = 32 cores */ + case 40 ... 43: return 85; /* CNxx85 = 40 cores */ + case 44 ... 47: return 88; /* CNxx88 = 44 cores */ + default: return 90; /* CNxx90 = 48 cores */ + } +} + +/** + * Return non-zero if the die is in an alternate package. The + * normal is_model() checks will treat alternate package parts + * as all the same, where this function can be used to detect + * them. The return value is the upper two bits of + * MIO_FUS_DAT2[chip_id]. Most alternate packages use bit 6, + * which will return 1 here. Parts with a second alternative + * will use bit 7, which will return 2. + * + * @param arg_model One of the CAVIUM_* constants for chip models and passes + * + * @return Non-zero if an alternate package + * 0 = Normal package + * 1 = Alternate package 1 (CN86XX, CN80XX with 555 balls) + * 2 = Alternate package 2 (CN80XX with 676 balls) + * 3 = Alternate package 3 (Currently unused) + */ +int cavium_is_altpkg(uint32_t arg_model) +{ + if (CAVIUM_IS_MODEL(arg_model)) + { + BDK_CSR_INIT(mio_fus_dat2, bdk_numa_local(), BDK_MIO_FUS_DAT2); + /* Bits 7:6 are used for alternate packages. Return the exact + number so multiple alternate packages can be detected + (CN80XX is an example) */ + int altpkg = mio_fus_dat2.s.chip_id >> 6; + if (altpkg) + return altpkg; + /* Due to a documentation mixup, some CN80XX parts do not have chip_id + bit 7 set. As a backup, use lmc_mode32 to find these parts. Both + bits are suppose to be fused, but some parts only have lmc_mode32 */ + if (CAVIUM_IS_MODEL(CAVIUM_CN81XX) && mio_fus_dat2.s.lmc_mode32) + return 2; + return 0; + } + else + return 0; +} + +/** + * Return the SKU string for a chip + * + * @param node Node to get SKU for + * + * @return Chip's SKU + */ +const char* bdk_model_get_sku(int node) +{ + /* Storage for SKU is per node. Static variable stores the value + so we don't decode on every call */ + static char chip_sku[BDK_NUMA_MAX_NODES][32] = { { 0, }, }; + + /* Return the cached string if we've already filled it in */ + if (chip_sku[node][0]) + return chip_sku[node]; + + /* Figure out which SKU list to use */ + const model_sku_info_t *sku_info; + uint64_t result; + asm ("mrs %[rd],MIDR_EL1" : [rd] "=r" (result)); + result = bdk_extract(result, 4, 12); + switch (result) + { + case 0xa1: + sku_info = t88_sku_info; + break; + case 0xa2: + sku_info = t81_sku_info; + break; + case 0xa3: + sku_info = t83_sku_info; + break; + case 0xb2: + sku_info = t93_sku_info; + break; + default: + bdk_fatal("SKU detect: Unknown die\n"); + } + + /* Read the SKU index from the PNAME fuses */ + int match_index = -1; + // FIXME: Implement PNAME reads + + /* Search the SKU list for the best match, where all the fuses match. + Only needed if the PNAME fuses don't specify the index */ + if (match_index == -1) + { + match_index = 0; + int match_score = -1; + int index = 0; + while (sku_info[index].fuse_index) + { + int score = 0; + int fuse_index = 0; + /* Count the number of fuses that match. A mismatch forces the worst + score (-1) */ + while (sku_info[index].fuses[fuse_index]) + { + int fuse; + /* FUSES_CHECK_FUSF is special for trusted parts */ + if (sku_info[index].fuses[fuse_index] == FUSES_CHECK_FUSF) + { + BDK_CSR_INIT(fusf_ctl, node, BDK_FUSF_CTL); + fuse = (fusf_ctl.u >> 6) & 1; + } + else + { + fuse = bdk_fuse_read(node, sku_info[index].fuses[fuse_index]); + } + if (fuse) + { + /* Match, improve the score */ + score++; + } + else + { + /* Mismatch, force score bad */ + score = -1; + break; + } + fuse_index++; + } + /* If this score is better than the last match, use this index as the + match */ + if (score > match_score) + { + match_score = score; + match_index = index; + } + index++; + } + } + + /* Use the SKU table to determine the defaults for the SKU parts */ + const char *prefix = sku_info[match_index].prefix; + int model = 100 * sku_info[match_index].model_base; + int cores = bdk_get_num_cores(node); + const char *customer_code = ""; + int rclk_limit = bdk_clock_get_rate(node, BDK_CLOCK_RCLK) / 1000000; + const char *bg_str = "BG"; /* Default Ball Grid array */ + int balls = sku_info[match_index].num_balls; /* Num package balls */ + const char *segment = sku_info[match_index].segment; /* Market segment */ + char prod_phase[4]; /* Blank = production, PR = Prototype, ES = Engineering sample */ + char prod_rev[5]; /* Product revision */ + const char *rohs_option = "G"; /* RoHS is always G for current parts */ + + /* Update the model number with the number of cores */ + model = (model / 100) * 100 + model_digits_for_cores(cores); + + /* Update the RCLK setting based on MIO_FUS_DAT3[core_pll_mul] */ + uint64_t core_pll_mul; + if (CAVIUM_IS_MODEL(CAVIUM_CN8XXX)) + { + BDK_CSR_INIT(mio_fus_dat3, node, BDK_MIO_FUS_DAT3); + core_pll_mul = mio_fus_dat3.s.core_pll_mul; + } + else + core_pll_mul = bdk_fuse_read_range(bdk_numa_local(), BDK_FUS_FUSE_NUM_E_CORE_MAX_MULX(0), 7); + + if (core_pll_mul) + { + /* CORE_PLL_MUL covers bits 5:1, so we need to multiple by 2. The + documentation doen't mention this clearly: There is a 300Mhz + addition to the base multiplier */ + rclk_limit = core_pll_mul * 2 * 50 + 300; + } + + /* FIXME: Hardcode production as there is no way to tell */ + prod_phase[0] = 0; + + /* Read the Pass information from fuses. Note that pass info in + MIO_FUS_DAT2[CHIP_ID] is encoded as + bit[7] = Unused, zero + bit[6] = Alternate package + bit[5..3] = Major pass + bit[2..0] = Minor pass */ + int major_pass; + int minor_pass; + if (CAVIUM_IS_MODEL(CAVIUM_CN8XXX)) + { + BDK_CSR_INIT(mio_fus_dat2, node, BDK_MIO_FUS_DAT2); + major_pass = ((mio_fus_dat2.s.chip_id >> 3) & 7) + 1; + minor_pass = mio_fus_dat2.s.chip_id & 7; + } + else + { + /* FIXME: We don't support getting the pass for other node on CN9XXX */ + bdk_ap_midr_el1_t midr_el1; + BDK_MRS(MIDR_EL1, midr_el1.u); + major_pass = (midr_el1.s.variant & 7) + 1; + minor_pass = midr_el1.s.revision; + } + + if (major_pass == 1) + { + /* Pass 1.x is special in that we don't show the implied 'X' */ + if (minor_pass == 0) + { + /* Completely blank for 1.0 */ + prod_rev[0] = 0; + } + else + { + /* If we are production and not pass 1.0, the product phase + changes from blank to "-P". The product revision then + follows the product phase without a '-' */ + if (prod_phase[0] == 0) + { + /* Change product phase to "-P" */ + prod_phase[0] = '-'; + prod_phase[1] = 'P'; + prod_phase[2] = 0; + } + /* No separator between phase and revision */ + prod_rev[0] = '1'; + prod_rev[1] = '0' + minor_pass; + prod_rev[2] = 0; + } + } + else + { + /* Pass 2.0 and above 12345678 */ + const char pass_letter[8] = "XYWVUTSR"; + prod_rev[0] = '-'; + prod_rev[1] = pass_letter[major_pass-1]; + if (minor_pass == 0) + { + /* Nothing after the letter code */ + prod_rev[2] = 0; + } + else + { + /* Add major and minor after the letter code */ + prod_rev[2] = '0' + major_pass; + prod_rev[3] = '0' + minor_pass; + prod_rev[4] = 0; + } + } + + /* Special check for CN88XX pass 2.0 and 2.1. Documentation mistakenly + specified 2.0 as -PR and 2.1 as -Y. Rather than fix the docs, OPs has + decided to special case this SKU */ + if (CAVIUM_IS_MODEL(CAVIUM_CN88XX) && (major_pass == 2)) + { + if (minor_pass == 0) + { + prod_phase[0] = '-'; /* SKU ends with -PR-Y-G */ + prod_phase[1] = 'P'; + prod_phase[2] = 'R'; + prod_phase[3] = 0; + } + else if (minor_pass == 1) + { + prod_rev[0] = '-'; /* SKU ends with -Y-G */ + prod_rev[1] = 'Y'; + prod_rev[2] = 0; + } + } + + /* Read PNAME fuses, looking for SKU overrides */ + // FIXME: Implement PNAME reads + + /* Build the SKU string */ + snprintf(chip_sku[node], sizeof(chip_sku[node]), "%s%d%s-%d%s%d-%s%s%s-%s", + prefix, model, customer_code, rclk_limit, bg_str, balls, segment, + prod_phase, prod_rev, rohs_option); + + return chip_sku[node]; +} diff --git a/src/vendorcode/cavium/bdk/libbdk-arch/bdk-numa.c b/src/vendorcode/cavium/bdk/libbdk-arch/bdk-numa.c new file mode 100644 index 0000000000..33d34ba669 --- /dev/null +++ b/src/vendorcode/cavium/bdk/libbdk-arch/bdk-numa.c @@ -0,0 +1,91 @@ +/***********************license start*********************************** +* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights +* reserved. +* +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are +* met: +* +* * Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* +* * Redistributions in binary form must reproduce the above +* copyright notice, this list of conditions and the following +* disclaimer in the documentation and/or other materials provided +* with the distribution. +* +* * Neither the name of Cavium Inc. nor the names of +* its contributors may be used to endorse or promote products +* derived from this software without specific prior written +* permission. +* +* This Software, including technical data, may be subject to U.S. export +* control laws, including the U.S. Export Administration Act and its +* associated regulations, and may be subject to export or import +* regulations in other countries. +* +* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS" +* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR +* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT +* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY +* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT +* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES +* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR +* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT, +* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK +* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU. +***********************license end**************************************/ +#include <bdk.h> +#include <stdio.h> + +int __bdk_numa_master_node = -1; /* Which node is the master */ +static int __bdk_numa_exists_mask = 0; /* Bitmask of nodes that exist */ +static bdk_spinlock_t __bdk_numa_lock; + +/** + * Get a bitmask of the nodes that exist + * + * @return bitmask + */ +uint64_t bdk_numa_get_exists_mask(void) +{ + return __bdk_numa_exists_mask; +} + +/** + * Add a node to the exists mask + * + * @param node Node to add + */ +void bdk_numa_set_exists(bdk_node_t node) +{ + bdk_spinlock_lock(&__bdk_numa_lock); + __bdk_numa_exists_mask |= 1 << node; + if (__bdk_numa_master_node == -1) + __bdk_numa_master_node = node; + bdk_spinlock_unlock(&__bdk_numa_lock); +} + +/** + * Return true if a node exists + * + * @param node Node to check + * + * @return Non zero if the node exists + */ +int bdk_numa_exists(bdk_node_t node) +{ + return __bdk_numa_exists_mask & (1 << node); +} + +/** + * Return true if there is only one node + * + * @return + */ +extern int bdk_numa_is_only_one() +{ + return __bdk_numa_exists_mask == 1; +} + diff --git a/src/vendorcode/cavium/bdk/libbdk-arch/bdk-platform.c b/src/vendorcode/cavium/bdk/libbdk-arch/bdk-platform.c new file mode 100644 index 0000000000..8cac04a214 --- /dev/null +++ b/src/vendorcode/cavium/bdk/libbdk-arch/bdk-platform.c @@ -0,0 +1,59 @@ +/***********************license start*********************************** +* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights +* reserved. +* +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are +* met: +* +* * Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* +* * Redistributions in binary form must reproduce the above +* copyright notice, this list of conditions and the following +* disclaimer in the documentation and/or other materials provided +* with the distribution. +* +* * Neither the name of Cavium Inc. nor the names of +* its contributors may be used to endorse or promote products +* derived from this software without specific prior written +* permission. +* +* This Software, including technical data, may be subject to U.S. export +* control laws, including the U.S. Export Administration Act and its +* associated regulations, and may be subject to export or import +* regulations in other countries. +* +* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS" +* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR +* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT +* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY +* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT +* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES +* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR +* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT, +* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK +* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU. +***********************license end**************************************/ +#include <bdk.h> +#include "libbdk-arch/bdk-csrs-ocla.h" + +bdk_platform_t __bdk_platform; + +void __bdk_platform_init() +{ + BDK_CSR_INIT(c, bdk_numa_master(), BDK_OCLAX_CONST(0)); + if (c.u == 0) + { + __bdk_platform = BDK_PLATFORM_ASIM; + } + else + { + int plat2 = bdk_fuse_read(bdk_numa_master(), 197); + int plat1 = bdk_fuse_read(bdk_numa_master(), 196); + int plat0 = bdk_fuse_read(bdk_numa_master(), 195); + __bdk_platform = (plat2 << 2) | (plat1 << 1) | plat0; + } +} + diff --git a/src/vendorcode/cavium/bdk/libbdk-boot/bdk-boot-status.c b/src/vendorcode/cavium/bdk/libbdk-boot/bdk-boot-status.c new file mode 100644 index 0000000000..83ab14cbc7 --- /dev/null +++ b/src/vendorcode/cavium/bdk/libbdk-boot/bdk-boot-status.c @@ -0,0 +1,81 @@ +/***********************license start*********************************** +* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights +* reserved. +* +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are +* met: +* +* * Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* +* * Redistributions in binary form must reproduce the above +* copyright notice, this list of conditions and the following +* disclaimer in the documentation and/or other materials provided +* with the distribution. +* +* * Neither the name of Cavium Inc. nor the names of +* its contributors may be used to endorse or promote products +* derived from this software without specific prior written +* permission. +* +* This Software, including technical data, may be subject to U.S. export +* control laws, including the U.S. Export Administration Act and its +* associated regulations, and may be subject to export or import +* regulations in other countries. +* +* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS" +* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR +* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT +* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY +* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT +* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES +* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR +* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT, +* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK +* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU. +***********************license end**************************************/ +#include <bdk.h> +#include "libbdk-arch/bdk-csrs-mio_tws.h" + +/** + * Report boot status to the BMC or whomever might care. This function + * will return quickly except for a status of "power cycle". In the power cycle + * case it is assumed the board is in a bad state and should not continue until + * a power cycle restarts us. + * + * @param status Status to report. Enumerated in bdk_boot_status_t + */ +void bdk_boot_status(bdk_boot_status_t status) +{ + bdk_node_t node = bdk_numa_master(); + int twsi = bdk_config_get_int(BDK_CONFIG_BMC_TWSI); + + /* Update status */ + if (twsi != -1) + { + BDK_CSR_DEFINE(sw_twsi, BDK_MIO_TWSX_SW_TWSI(twsi)); + sw_twsi.u = 0; + sw_twsi.s.v = 1; /* Valid data */ + sw_twsi.s.slonly = 1; /* Slave only */ + sw_twsi.s.data = status; + BDK_CSR_WRITE(node, BDK_MIO_TWSX_SW_TWSI(twsi), sw_twsi.u); + } + + /* As a special case, power cycle will display a message and try a + soft reset if we can't power cycle in 5 seconds */ + if (status == BDK_BOOT_STATUS_REQUEST_POWER_CYCLE) + { + if (twsi != -1) + { + printf("Requested power cycle\n"); + bdk_wait_usec(5000000); /* 5 sec */ + printf("Power cycle failed, trying soft reset\n"); + } + else + printf("Performing soft reset\n"); + bdk_reset_chip(node); + } +} + diff --git a/src/vendorcode/cavium/bdk/libbdk-boot/bdk-watchdog.c b/src/vendorcode/cavium/bdk/libbdk-boot/bdk-watchdog.c new file mode 100644 index 0000000000..48f955a7ef --- /dev/null +++ b/src/vendorcode/cavium/bdk/libbdk-boot/bdk-watchdog.c @@ -0,0 +1,108 @@ +/***********************license start*********************************** +* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights +* reserved. +* +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are +* met: +* +* * Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* +* * Redistributions in binary form must reproduce the above +* copyright notice, this list of conditions and the following +* disclaimer in the documentation and/or other materials provided +* with the distribution. +* +* * Neither the name of Cavium Inc. nor the names of +* its contributors may be used to endorse or promote products +* derived from this software without specific prior written +* permission. +* +* This Software, including technical data, may be subject to U.S. export +* control laws, including the U.S. Export Administration Act and its +* associated regulations, and may be subject to export or import +* regulations in other countries. +* +* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS" +* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR +* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT +* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY +* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT +* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES +* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR +* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT, +* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK +* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU. +***********************license end**************************************/ +#include <bdk.h> +#include "libbdk-arch/bdk-csrs-gti.h" + +/** + * Setup the watchdog to expire in timeout_ms milliseconds. When the watchdog + * expires, the chip three things happen: + * 1) Expire 1: interrupt that is ignored by the BDK + * 2) Expire 2: DEL3T interrupt, which is disabled and ignored + * 3) Expire 3: Soft reset of the chip + * + * Since we want a soft reset, we actually program the watchdog to expire at + * the timeout / 3. + * + * @param timeout_ms Timeout in milliseconds. If this is zero, the timeout is taken from the + * global configuration option BDK_BRD_CFG_WATCHDOG_TIMEOUT + */ +void bdk_watchdog_set(unsigned int timeout_ms) +{ + if (timeout_ms == 0) + timeout_ms = bdk_config_get_int(BDK_CONFIG_WATCHDOG_TIMEOUT); + + if (timeout_ms > 0) + { + uint64_t sclk = bdk_clock_get_rate(bdk_numa_local(), BDK_CLOCK_SCLK); + uint64_t timeout_sclk = sclk * timeout_ms / 1000; + /* Per comment above, we want the watchdog to expire at 3x the rate specified */ + timeout_sclk /= 3; + /* Watchdog counts in 1024 cycle steps */ + uint64_t timeout_wdog = timeout_sclk >> 10; + /* We can only specify the upper 16 bits of a 24 bit value. Round up */ + timeout_wdog = (timeout_wdog + 0xff) >> 8; + /* If the timeout overflows the hardware limit, set max */ + if (timeout_wdog >= 0x10000) + timeout_wdog = 0xffff; + + BDK_TRACE(INIT, "Watchdog: Set to expire %lu SCLK cycles\n", timeout_wdog << 18); + BDK_CSR_MODIFY(c, bdk_numa_local(), BDK_GTI_CWD_WDOGX(bdk_get_core_num()), + c.s.len = timeout_wdog; + c.s.mode = 3); + } +} + +/** + * Signal the watchdog that we are still running + */ +void bdk_watchdog_poke(void) +{ + BDK_CSR_WRITE(bdk_numa_local(), BDK_GTI_CWD_POKEX(bdk_get_core_num()), 0); +} + +/** + * Disable the hardware watchdog + */ +void bdk_watchdog_disable(void) +{ + BDK_CSR_WRITE(bdk_numa_local(), BDK_GTI_CWD_WDOGX(bdk_get_core_num()), 0); + BDK_TRACE(INIT, "Watchdog: Disabled\n"); +} + +/** + * Return true if the watchdog is configured and running + * + * @return Non-zero if watchdog is running + */ +int bdk_watchdog_is_running(void) +{ + BDK_CSR_INIT(wdog, bdk_numa_local(), BDK_GTI_CWD_WDOGX(bdk_get_core_num())); + return wdog.s.mode != 0; +} + diff --git a/src/vendorcode/cavium/bdk/libbdk-dram/bdk-dram-address.c b/src/vendorcode/cavium/bdk/libbdk-dram/bdk-dram-address.c new file mode 100644 index 0000000000..94d7d76752 --- /dev/null +++ b/src/vendorcode/cavium/bdk/libbdk-dram/bdk-dram-address.c @@ -0,0 +1,183 @@ +/***********************license start*********************************** +* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights +* reserved. +* +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are +* met: +* +* * Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* +* * Redistributions in binary form must reproduce the above +* copyright notice, this list of conditions and the following +* disclaimer in the documentation and/or other materials provided +* with the distribution. +* +* * Neither the name of Cavium Inc. nor the names of +* its contributors may be used to endorse or promote products +* derived from this software without specific prior written +* permission. +* +* This Software, including technical data, may be subject to U.S. export +* control laws, including the U.S. Export Administration Act and its +* associated regulations, and may be subject to export or import +* regulations in other countries. +* +* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS" +* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR +* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT +* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY +* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT +* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES +* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR +* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT, +* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK +* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU. +***********************license end**************************************/ +#include <bdk.h> +#include "libbdk-arch/bdk-csrs-l2c.h" + +#define EXTRACT(v, lsb, width) (((v) >> (lsb)) & ((1ull << (width)) - 1)) +#define INSERT(a, v, lsb, width) a|=(((v) & ((1ull << (width)) - 1)) << (lsb)) + +/** + * Given a physical DRAM address, extract information about the node, LMC, DIMM, + * prank, lrank, bank, row, and column that was accessed. + * + * @param address Physical address to decode + * @param node Node the address was for + * @param lmc LMC controller the address was for + * @param dimm DIMM the address was for + * @param prank Physical RANK on the DIMM + * @param lrank Logical RANK on the DIMM + * @param bank BANK on the DIMM + * @param row Row on the DIMM + * @param col Column on the DIMM + */ +void +bdk_dram_address_extract_info(uint64_t address, int *node, int *lmc, int *dimm, + int *prank, int *lrank, int *bank, int *row, int *col) +{ + int bitno = CAVIUM_IS_MODEL(CAVIUM_CN83XX) ? 19 : 20; + *node = EXTRACT(address, 40, 2); /* Address bits [41:40] */ + /* Determine the LMC controller */ + BDK_CSR_INIT(l2c_ctl, *node, BDK_L2C_CTL); + int bank_lsb, xbits; + + /* xbits depends on number of LMCs */ + xbits = __bdk_dram_get_num_lmc(*node) >> 1; // 4->2; 2->1; 1->0 + bank_lsb = 7 + xbits; + + /* LMC number is probably aliased */ + if (l2c_ctl.s.disidxalias) + *lmc = EXTRACT(address, 7, xbits); + else + *lmc = EXTRACT(address, 7, xbits) ^ EXTRACT(address, bitno, xbits) ^ EXTRACT(address, 12, xbits); + + /* Figure out the bank field width */ + BDK_CSR_INIT(lmcx_config, *node, BDK_LMCX_CONFIG(*lmc)); + int bank_width = __bdk_dram_get_num_bank_bits(*node, *lmc); + + /* Extract additional info from the LMC_CONFIG CSR */ + BDK_CSR_INIT(ext_config, *node, BDK_LMCX_EXT_CONFIG(*lmc)); + int dimm_lsb = 28 + lmcx_config.s.pbank_lsb + xbits; + int dimm_width = 40 - dimm_lsb; + int prank_lsb = dimm_lsb - lmcx_config.s.rank_ena; + int prank_width = dimm_lsb - prank_lsb; + int lrank_lsb = prank_lsb - ext_config.s.dimm0_cid; + int lrank_width = prank_lsb - lrank_lsb; + int row_lsb = 14 + lmcx_config.s.row_lsb + xbits; + int row_width = lrank_lsb - row_lsb; + int col_hi_lsb = bank_lsb + bank_width; + int col_hi_width= row_lsb - col_hi_lsb; + + /* Extract the parts of the address */ + *dimm = EXTRACT(address, dimm_lsb, dimm_width); + *prank = EXTRACT(address, prank_lsb, prank_width); + *lrank = EXTRACT(address, lrank_lsb, lrank_width); + *row = EXTRACT(address, row_lsb, row_width); + + /* bank calculation may be aliased... */ + BDK_CSR_INIT(lmcx_control, *node, BDK_LMCX_CONTROL(*lmc)); + if (lmcx_control.s.xor_bank) + *bank = EXTRACT(address, bank_lsb, bank_width) ^ EXTRACT(address, 12 + xbits, bank_width); + else + *bank = EXTRACT(address, bank_lsb, bank_width); + + /* LMC number already extracted */ + int col_hi = EXTRACT(address, col_hi_lsb, col_hi_width); + *col = EXTRACT(address, 3, 4) | (col_hi << 4); + /* Bus byte is address bits [2:0]. Unused here */ +} + +/** + * Construct a physical address given the node, LMC, DIMM, prank, lrank, bank, row, and column. + * + * @param node Node the address was for + * @param lmc LMC controller the address was for + * @param dimm DIMM the address was for + * @param prank Physical RANK on the DIMM + * @param lrank Logical RANK on the DIMM + * @param bank BANK on the DIMM + * @param row Row on the DIMM + * @param col Column on the DIMM + */ +uint64_t +bdk_dram_address_construct_info(bdk_node_t node, int lmc, int dimm, + int prank, int lrank, int bank, int row, int col) + +{ + uint64_t address = 0; + int bitno = CAVIUM_IS_MODEL(CAVIUM_CN83XX) ? 19 : 20; + + // insert node bits + INSERT(address, node, 40, 2); /* Address bits [41:40] */ + + /* xbits depends on number of LMCs */ + int xbits = __bdk_dram_get_num_lmc(node) >> 1; // 4->2; 2->1; 1->0 + int bank_lsb = 7 + xbits; + + /* Figure out the bank field width */ + int bank_width = __bdk_dram_get_num_bank_bits(node, lmc); + + /* Extract additional info from the LMC_CONFIG CSR */ + BDK_CSR_INIT(lmcx_config, node, BDK_LMCX_CONFIG(lmc)); + BDK_CSR_INIT(ext_config, node, BDK_LMCX_EXT_CONFIG(lmc)); + int dimm_lsb = 28 + lmcx_config.s.pbank_lsb + xbits; + int dimm_width = 40 - dimm_lsb; + int prank_lsb = dimm_lsb - lmcx_config.s.rank_ena; + int prank_width = dimm_lsb - prank_lsb; + int lrank_lsb = prank_lsb - ext_config.s.dimm0_cid; + int lrank_width = prank_lsb - lrank_lsb; + int row_lsb = 14 + lmcx_config.s.row_lsb + xbits; + int row_width = lrank_lsb - row_lsb; + int col_hi_lsb = bank_lsb + bank_width; + int col_hi_width = row_lsb - col_hi_lsb; + + /* Insert some other parts of the address */ + INSERT(address, dimm, dimm_lsb, dimm_width); + INSERT(address, prank, prank_lsb, prank_width); + INSERT(address, lrank, lrank_lsb, lrank_width); + INSERT(address, row, row_lsb, row_width); + INSERT(address, col >> 4, col_hi_lsb, col_hi_width); + INSERT(address, col, 3, 4); + + /* bank calculation may be aliased... */ + BDK_CSR_INIT(lmcx_control, node, BDK_LMCX_CONTROL(lmc)); + int new_bank = bank; + if (lmcx_control.s.xor_bank) + new_bank ^= EXTRACT(address, 12 + xbits, bank_width); + INSERT(address, new_bank, bank_lsb, bank_width); + + /* Determine the actual C bits from the input LMC controller arg */ + /* The input LMC number was probably aliased with other fields */ + BDK_CSR_INIT(l2c_ctl, node, BDK_L2C_CTL); + int new_lmc = lmc; + if (!l2c_ctl.s.disidxalias) + new_lmc ^= EXTRACT(address, bitno, xbits) ^ EXTRACT(address, 12, xbits); + INSERT(address, new_lmc, 7, xbits); + + return address; +} diff --git a/src/vendorcode/cavium/bdk/libbdk-dram/bdk-dram-config.c b/src/vendorcode/cavium/bdk/libbdk-dram/bdk-dram-config.c new file mode 100644 index 0000000000..3465c5d98b --- /dev/null +++ b/src/vendorcode/cavium/bdk/libbdk-dram/bdk-dram-config.c @@ -0,0 +1,163 @@ +/***********************license start*********************************** +* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights +* reserved. +* +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are +* met: +* +* * Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* +* * Redistributions in binary form must reproduce the above +* copyright notice, this list of conditions and the following +* disclaimer in the documentation and/or other materials provided +* with the distribution. +* +* * Neither the name of Cavium Inc. nor the names of +* its contributors may be used to endorse or promote products +* derived from this software without specific prior written +* permission. +* +* This Software, including technical data, may be subject to U.S. export +* control laws, including the U.S. Export Administration Act and its +* associated regulations, and may be subject to export or import +* regulations in other countries. +* +* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS" +* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR +* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT +* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY +* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT +* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES +* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR +* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT, +* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK +* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU. +***********************license end**************************************/ +#include <bdk.h> +#include <unistd.h> + +BDK_REQUIRE_DEFINE(DRAM_CONFIG); + +/** + * Lookup a DRAM configuration by name and initialize DRAM using it + * + * @param node Node to configure + * @param ddr_clock_override + * If non zero, override the DRAM frequency specified + * in the config with this value + * + * @return Amount of DRAM in MB, or negative on failure + */ +int bdk_dram_config(int node, int ddr_clock_override) +{ + const dram_config_t *config = libdram_config_load(node); + if (!config) + { + printf("N%d: No DRAM config specified, skipping DRAM init\n", node); + return 0; + } + + BDK_TRACE(DRAM, "N%d: Starting DRAM init (config=%p, ddr_clock_override=%d)\n", node, config, ddr_clock_override); + int mbytes = libdram_config(node, config, ddr_clock_override); + BDK_TRACE(DRAM, "N%d: DRAM init returned %d\n", node, mbytes); + if (mbytes <= 0) + { + printf("ERROR: DDR initialization failed\n"); + return -1; + } + + return mbytes; +} + +/** + * Do DRAM configuration tuning + * + * @param node Node to tune + * + * @return Success or Fail + */ +int bdk_dram_tune(int node) +{ + int ret; + BDK_TRACE(DRAM, "N%d: Starting DRAM tuning\n", node); + ret = libdram_tune(node); + BDK_TRACE(DRAM, "N%d: DRAM tuning returned %d\n", node, ret); + return ret; +} + +/** + * Do all the DRAM Margin tests + * + * @param node Node to test + * + * @return Success or Fail + */ +void bdk_dram_margin(int node) +{ + BDK_TRACE(DRAM, "N%d: Starting DRAM margining\n", node); + libdram_margin(node); + BDK_TRACE(DRAM, "N%d: Finished DRAM margining.\n", node); + return; +} + +/** + * Return the string of the DRAM configuration info at the specified node. + * If the node is not configured, NULL is returned. + * + * @param node node to retrieve + * + * @return string or NULL + */ +const char* bdk_dram_get_info_string(int node) +{ + #define INFO_STRING_LEN 40 + static char info_string[INFO_STRING_LEN]; + static const char *info_ptr = info_string; + + snprintf(info_string, INFO_STRING_LEN, + " %ld MB, %ld MT/s, %s %s", + bdk_dram_get_size_mbytes(node), + bdk_config_get_int(BDK_CONFIG_DDR_SPEED, node), + (__bdk_dram_is_ddr4(node, 0)) ? "DDR4" : "DDR3", + (__bdk_dram_is_rdimm(node, 0)) ? "RDIMM" : "UDIMM"); + + return info_ptr; +} + + +/** + * Return the highest address currently used by the BDK. This address will + * be about 4MB above the top of the BDK to make sure small growths between the + * call and its use don't cause corruption. Any call to memory allocation can + * change this value. + * + * @return Size of the BDK in bytes + */ +uint64_t bdk_dram_get_top_of_bdk(void) +{ + /* Make sure the start address is higher that the BDK's active range. + * + * As sbrk() returns a node address, mask off the node portion of + * the address to make it a physical offset. Doing this simplifies the + * address checks and calculations which only work with physical offsets. + */ + uint64_t top_of_bdk = (bdk_ptr_to_phys(sbrk(0)) & bdk_build_mask(40)); + uint64_t l2_size = bdk_l2c_get_cache_size_bytes(bdk_numa_master()); + if (top_of_bdk <= l2_size) + { + /* Early BDK code takes care of the first L2 sized area of memory */ + top_of_bdk = l2_size; + } + else + { + /* Give 4MB of extra so the BDK has room to grow */ + top_of_bdk += 4 << 20; + /* Align it on a 64KB boundary */ + top_of_bdk >>= 16; + top_of_bdk <<= 16; + } + return top_of_bdk; +} diff --git a/src/vendorcode/cavium/bdk/libbdk-dram/bdk-dram-size.c b/src/vendorcode/cavium/bdk/libbdk-dram/bdk-dram-size.c new file mode 100644 index 0000000000..122afb2a18 --- /dev/null +++ b/src/vendorcode/cavium/bdk/libbdk-dram/bdk-dram-size.c @@ -0,0 +1,213 @@ +/***********************license start*********************************** +* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights +* reserved. +* +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are +* met: +* +* * Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* +* * Redistributions in binary form must reproduce the above +* copyright notice, this list of conditions and the following +* disclaimer in the documentation and/or other materials provided +* with the distribution. +* +* * Neither the name of Cavium Inc. nor the names of +* its contributors may be used to endorse or promote products +* derived from this software without specific prior written +* permission. +* +* This Software, including technical data, may be subject to U.S. export +* control laws, including the U.S. Export Administration Act and its +* associated regulations, and may be subject to export or import +* regulations in other countries. +* +* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS" +* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR +* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT +* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY +* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT +* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES +* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR +* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT, +* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK +* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU. +***********************license end**************************************/ +#include <bdk.h> + +/** + * Return the number of LMC controllers in use + * + * @param node Node to probe + * + * @return 2 or 4 depending on the mode + */ +int __bdk_dram_get_num_lmc(bdk_node_t node) +{ + if (CAVIUM_IS_MODEL(CAVIUM_CN88XX)) + { + BDK_CSR_INIT(lmcx_dll_ctl2, node, BDK_LMCX_DLL_CTL2(2)); // sample LMC2 + return (lmcx_dll_ctl2.s.intf_en) ? 4 : 2; + } + else if (CAVIUM_IS_MODEL(CAVIUM_CN83XX)) + { + BDK_CSR_INIT(lmcx_dll_ctl1, node, BDK_LMCX_DLL_CTL2(1)); // sample LMC1 + return (lmcx_dll_ctl1.s.intf_en) ? 2 : 1; + } + else if (CAVIUM_IS_MODEL(CAVIUM_CN81XX)) + { + return 1; + } + else if (CAVIUM_IS_MODEL(CAVIUM_CN93XX)) + { + BDK_CSR_INIT(lmcx_dll_ctl1, node, BDK_LMCX_DLL_CTL2(2)); + if (lmcx_dll_ctl1.s.intf_en) + return 3; + lmcx_dll_ctl1.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL2(1)); + return (lmcx_dll_ctl1.s.intf_en) ? 2 : 1; + } + bdk_error("__bdk_dram_get_num_lmc() needs update for this chip\n"); + return 1; +} + +/** + * Return whether the node/LMC is in DRESET + * + * @param node Node to probe + * @param node LMC to probe + * + * @return 1 or 0 + */ +static int __bdk_dram_is_lmc_in_dreset(bdk_node_t node, int lmc) +{ + BDK_CSR_INIT(lmcx_dll_ctl2, node, BDK_LMCX_DLL_CTL2(lmc)); // can always read this + return (lmcx_dll_ctl2.s.dreset != 0) ? 1 : 0; +} + +/** + * Return a mask of the number of row bits in use + * + * @param node Node to probe + * + */ +uint32_t __bdk_dram_get_row_mask(bdk_node_t node, int lmc) +{ + // PROTECT!!! + if (__bdk_dram_is_lmc_in_dreset(node, lmc)) // check LMCn + return 0; + BDK_CSR_INIT(lmcx_config, node, BDK_LMCX_CONFIG(lmc)); // sample LMCn + int numbits = 14 + lmcx_config.s.pbank_lsb - lmcx_config.s.row_lsb - lmcx_config.s.rank_ena; + return ((1ul << numbits) - 1); +} + +/** + * Return a mask of the number of column bits in use + * + * @param node Node to probe + * + */ +uint32_t __bdk_dram_get_col_mask(bdk_node_t node, int lmc) +{ + // PROTECT!!! + if (__bdk_dram_is_lmc_in_dreset(node, lmc)) // check LMCn + return 0; + BDK_CSR_INIT(lmcx_config, node, BDK_LMCX_CONFIG(lmc)); // sample LMCn + int numbits = 11 + lmcx_config.s.row_lsb - __bdk_dram_get_num_bank_bits(node, lmc); + return ((1ul << numbits) - 1); +} + +/** + * Return the number of bank bits in use + * + * @param node Node to probe + * + */ +// all DDR3, and DDR4 x16 today, use only 3 bank bits; DDR4 x4 and x8 always have 4 bank bits +// NOTE: this will change in the future, when DDR4 x16 devices can come with 16 banks!! FIXME!! +int __bdk_dram_get_num_bank_bits(bdk_node_t node, int lmc) +{ + // PROTECT!!! + if (__bdk_dram_is_lmc_in_dreset(node, lmc)) // check LMCn + return 0; + BDK_CSR_INIT(lmcx_config, node, BDK_LMCX_CONFIG(lmc)); // sample LMCn + int bank_width = (__bdk_dram_is_ddr4(node, lmc) && (lmcx_config.s.bg2_enable)) ? 4 : 3; + return bank_width; +} + +/** + * Return whether the node has DDR3 or DDR4 DRAM + * + * @param node Node to probe + * + * @return 0 (DDR3) or 1 (DDR4) + */ +int __bdk_dram_is_ddr4(bdk_node_t node, int lmc) +{ + // PROTECT!!! + if (__bdk_dram_is_lmc_in_dreset(node, lmc)) // check LMCn + return 0; + if (CAVIUM_IS_MODEL(CAVIUM_CN9XXX)) + return 1; + BDK_CSR_INIT(lmcx_ddr_pll_ctl, node, BDK_LMCX_DDR_PLL_CTL(lmc)); // sample LMCn + return (lmcx_ddr_pll_ctl.cn83xx.ddr4_mode != 0); +} + +/** + * Return whether the node has Registered DIMMs or Unbuffered DIMMs + * + * @param node Node to probe + * + * @return 0 (Unbuffered) or 1 (Registered) + */ +int __bdk_dram_is_rdimm(bdk_node_t node, int lmc) +{ + // PROTECT!!! + if (__bdk_dram_is_lmc_in_dreset(node, lmc)) // check LMCn + return 0; + BDK_CSR_INIT(lmcx_control, node, BDK_LMCX_CONTROL(lmc)); // sample LMCn + return (lmcx_control.s.rdimm_ena != 0); +} + +/** + * Get the amount of DRAM configured for a node. This is read from the LMC + * controller after DRAM is setup. + * + * @param node Node to query + * + * @return Size in megabytes + */ +uint64_t bdk_dram_get_size_mbytes(int node) +{ + if (bdk_is_platform(BDK_PLATFORM_EMULATOR)) + return 2 << 10; /* 2GB is available on t88 and t81 + ** some t83 models have 8gb, but it is too long to init */ + /* Return zero if dram isn't enabled */ + if (!__bdk_is_dram_enabled(node)) + return 0; + + uint64_t memsize = 0; + const int num_dram_controllers = __bdk_dram_get_num_lmc(node); + for (int lmc = 0; lmc < num_dram_controllers; lmc++) + { + if (bdk_is_platform(BDK_PLATFORM_ASIM)) + { + /* Asim doesn't simulate the rank detection, fake 4GB per controller */ + memsize += 4ull << 30; + } + else + { + // PROTECT!!! + if (__bdk_dram_is_lmc_in_dreset(node, lmc)) // check LMCn + return 0; + BDK_CSR_INIT(lmcx_config, node, BDK_LMCX_CONFIG(lmc)); + int num_ranks = bdk_pop(lmcx_config.s.init_status); + uint64_t rank_size = 1ull << (28 + lmcx_config.s.pbank_lsb - lmcx_config.s.rank_ena); + memsize += rank_size * num_ranks; + } + } + return memsize >> 20; +} + diff --git a/src/vendorcode/cavium/bdk/libbdk-dram/bdk-dram-test-addrbus.c b/src/vendorcode/cavium/bdk/libbdk-dram/bdk-dram-test-addrbus.c new file mode 100644 index 0000000000..9fe8570454 --- /dev/null +++ b/src/vendorcode/cavium/bdk/libbdk-dram/bdk-dram-test-addrbus.c @@ -0,0 +1,115 @@ +/***********************license start*********************************** +* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights +* reserved. +* +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are +* met: +* +* * Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* +* * Redistributions in binary form must reproduce the above +* copyright notice, this list of conditions and the following +* disclaimer in the documentation and/or other materials provided +* with the distribution. +* +* * Neither the name of Cavium Inc. nor the names of +* its contributors may be used to endorse or promote products +* derived from this software without specific prior written +* permission. +* +* This Software, including technical data, may be subject to U.S. export +* control laws, including the U.S. Export Administration Act and its +* associated regulations, and may be subject to export or import +* regulations in other countries. +* +* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS" +* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR +* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT +* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY +* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT +* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES +* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR +* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT, +* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK +* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU. +***********************license end**************************************/ +#include "bdk.h" + +/* Used for all memory reads/writes related to the test */ +#define READ64(address) __bdk_dram_read64(address) +#define WRITE64(address, data) __bdk_dram_write64(address, data) + +/** + * Address bus test. This test writes a single value to each power of two in the + * area, looking for false aliases that would be created by address lines being + * shorted or tied together. + * + * @param area + * @param max_address + * @param bursts + * + * @return + */ +int __bdk_dram_test_mem_address_bus(uint64_t area, uint64_t max_address, int bursts) +{ + int failures = 0; + + /* Clear our work area. Checking for aliases later could get false + positives if it matched stale data */ + void *ptr = (area) ? bdk_phys_to_ptr(area) : NULL; + bdk_zero_memory(ptr, max_address - area); + __bdk_dram_flush_to_mem_range(area, max_address); + + /* Each time we write, we'll write this pattern xored the address it is + written too */ + uint64_t pattern = 0x0fedcba987654321; + + /* Walk through the region incrementing our offset by a power of two. The + first few writes will be to the same cache line (offset 0x8, 0x10, 0x20, + and 0x40. Offset 0x80 and beyond will be to different cache lines */ + uint64_t offset = 0x8; + while (area + offset < max_address) + { + uint64_t address = area + offset; + /* Write one location with pattern xor address */ + uint64_t p = pattern ^ address; + WRITE64(address, p); + __bdk_dram_flush_to_mem(address); + offset <<= 1; + } + + /* Read all of the area to make sure no other locations were written */ + uint64_t a = area; + offset = 0x8; + uint64_t next_write = area + offset; + while (a < max_address) + { + if (a + 256 < max_address) + BDK_PREFETCH(a + 256, 0); + for (int i=0; i<16; i++) + { + uint64_t data = READ64(a); + uint64_t correct; + if (a == next_write) + { + correct = pattern ^ next_write; + offset <<= 1; + next_write = area + offset; + } + else + correct = 0; + if (bdk_unlikely(data != correct)) + { + failures++; + __bdk_dram_report_error(a, data, correct, 0, -1); + } + a += 8; + } + } + + return failures; +} + diff --git a/src/vendorcode/cavium/bdk/libbdk-dram/bdk-dram-test-databus.c b/src/vendorcode/cavium/bdk/libbdk-dram/bdk-dram-test-databus.c new file mode 100644 index 0000000000..c3fa1ffd8d --- /dev/null +++ b/src/vendorcode/cavium/bdk/libbdk-dram/bdk-dram-test-databus.c @@ -0,0 +1,252 @@ +/***********************license start*********************************** +* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights +* reserved. +* +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are +* met: +* +* * Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* +* * Redistributions in binary form must reproduce the above +* copyright notice, this list of conditions and the following +* disclaimer in the documentation and/or other materials provided +* with the distribution. +* +* * Neither the name of Cavium Inc. nor the names of +* its contributors may be used to endorse or promote products +* derived from this software without specific prior written +* permission. +* +* This Software, including technical data, may be subject to U.S. export +* control laws, including the U.S. Export Administration Act and its +* associated regulations, and may be subject to export or import +* regulations in other countries. +* +* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS" +* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR +* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT +* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY +* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT +* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES +* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR +* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT, +* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK +* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU. +***********************license end**************************************/ +#include "bdk.h" + +/* Used for all memory reads/writes related to the test */ +#define READ64(address) __bdk_dram_read64(address) +#define WRITE64(address, data) __bdk_dram_write64(address, data) + +/* Build a 64bit mask out of a single hex digit */ +#define REPEAT2(v) ((((uint64_t)v) << 4) | ((uint64_t)v)) +#define REPEAT4(v) ((REPEAT2(v) << 8) | REPEAT2(v)) +#define REPEAT8(v) ((REPEAT4(v) << 16) | REPEAT4(v)) +#define REPEAT16(v) ((REPEAT8(v) << 32) | REPEAT8(v)) + +/** + * Read memory and check that the data bus pattern is present. The pattern is a + * sequence if 16 dwords created from the 16 hex digits repeated in each word. + * + * @param address Physical address to read. This must be cache line aligned. + * @param bursts Number of time to repeat the read test to verify stability + * + * @return Number of errors, zero means success + */ +static int read_data_bus_burst(uint64_t address, int bursts) +{ + int failures = 0; + + /* Loop over the burst so people using a scope have time to capture + traces */ + for (int burst = 0; burst < bursts; burst++) + { + /* Invalidate all caches so we must read from DRAM */ + __bdk_dram_flush_to_mem(address); + BDK_DCACHE_INVALIDATE; + + for (uint64_t digit = 0; digit < 16; digit++) + { + uint64_t a = address + digit * 8; + uint64_t data = READ64(a); + uint64_t correct = REPEAT16(digit); + if (data != correct) + { + failures++; + __bdk_dram_report_error(a, data, correct, burst, -1); + } + } + } + return failures; +} + +/** + * Write memory with a data bus pattern and check that it can be read correctly. + * The pattern is a sequence if 16 dwords created from the 16 hex digits repeated + * in each word. + * + * @param address Physical address to write. This must be cache line aligned. 128 bytes will be + * written starting at this address. + * @param bursts Number of time to repeat the write+read test to verify stability + * + * @return Number of errors, zero means success + */ +static int write_data_bus_burst(uint64_t address, int bursts) +{ + BDK_TRACE(DRAM_TEST, "[0x%016lx:0x%016lx] Writing incrementing digits\n", + address, address + 127); + /* Loop over the burst so people using a scope have time to capture + traces */ + for (int burst = 0; burst < bursts; burst++) + { + /* Fill a cache line with an incrementing pattern. Each nibble + in the 64bit word increments from 0 to 0xf */ + for (uint64_t digit = 0; digit < 16; digit++) + WRITE64(address + digit * 8, REPEAT16(digit)); + /* Force the cache line out to memory */ + __bdk_dram_flush_to_mem(address); + } + return read_data_bus_burst(address, bursts); +} + +/** + * Read back the pattern written by write_data_bus_walk() nad + * make sure it was stored properly. + * + * @param address Physical address to read. This must be cache line aligned. + * @param bursts Number of time to repeat the read test to verify stability + * @param pattern Pattern basis for writes. See + * write_data_bus_walk() + * + * @return Number of errors, zero means success + */ +static int read_data_bus_walk(uint64_t address, int burst, uint64_t pattern) +{ + int failures = 0; + + /* Invalidate all caches so we must readfrom DRAM */ + __bdk_dram_flush_to_mem(address); + BDK_DCACHE_INVALIDATE; + + uint64_t correct = pattern; + for (uint64_t word = 0; word < 16; word++) + { + uint64_t a = address + word * 8; + uint64_t data = READ64(a); + if (data != correct) + { + failures++; + __bdk_dram_report_error(a, data, correct, burst, -1); + } + uint64_t tmp = correct >> 63; /* Save top bit */ + correct <<= 1; /* Shift left one bit */ + correct |= tmp; /* Restore the top bit as bit 0 */ + } + + return failures; +} + +/** + * Write a pattern to a cache line, rotating it one bit for each DWORD. Read back + * the pattern and make sure it was stored properly. The input pattern is rotated + * left by one bit for each DWORD written. + * + * @param address Physical address to write. This must be cache line aligned. 128 bytes will be + * written starting at this address. + * @param bursts Number of time to repeat the write+read test to verify stability + * @param pattern Pattern basis + * + * @return Number of errors, zero means success + */ +static void write_data_bus_walk(uint64_t address, int burst, uint64_t pattern) +{ + BDK_TRACE(DRAM_TEST, "[0x%016lx:0x%016lx] Writing walking pattern 0x%016lx\n", + address, address + 127, pattern); + + uint64_t a = address; + uint64_t d = pattern; + + /* Fill a cache line with pattern. Each 64bit work will have the + pattern rotated left one bit */ + for (uint64_t word = 0; word < 16; word++) + { + WRITE64(a, d); + a += 8; + uint64_t tmp = d >> 63; /* Save top bit */ + d <<= 1; /* Shift left one bit */ + d |= tmp; /* Restore the top bit as bit 0 */ + } + /* Force the cache line out to memory */ + __bdk_dram_flush_to_mem(address); +} + +/** + * The goal of these tests are to toggle every DDR data pin, one at a time or in + * related groups, to isolate any short circuits between the data pins or open + * circuits where the pin is not connected to the DDR memory. A board which fails + * one of these tests has severe problems and will not be able to run any of the + * later test patterns. + * + * @param start_address + * Physical address of a cache line to + * use for the test. Only this cache line is + * written. + * @param end_address + * Top end of the address range. Currently unused + * @param bursts Number of time to repeats writes+reads to insure stability + * + * @return Number of errors, zero means success + */ +int __bdk_dram_test_mem_data_bus(uint64_t start_address, uint64_t end_address, int bursts) +{ + int failures = 0; + + /* Incrementing pattern: 0x0 - 0xf in each nibble */ + failures += write_data_bus_burst(start_address, bursts); + + /* Walking ones. Run with 1, 2, and 3 bits walking */ + for (int bits = 1; bits <= 3; bits++) + { + for (int burst = 0; burst < bursts; burst++) + { + /* Each write_data_bus_walk() call write 16 dword, so step by 16 */ + for (int i = 0; i < 64; i += 16) + { + uint64_t pattern = bdk_build_mask(bits) << i; + write_data_bus_walk(start_address + i*8, burst, pattern); + } + /* Each read_data_bus_walk() call write 16 dword, so step by 16 */ + for (int i = 0; i < 64; i += 16) + { + uint64_t pattern = bdk_build_mask(bits) << i; + failures += read_data_bus_walk(start_address + i*8, burst, pattern); + } + } + } + + /* Walking zeros. Run with 1, 2, and 3 bits walking */ + for (int bits = 1; bits <= 3; bits++) + { + for (int burst = 0; burst < bursts; burst++) + { + /* Each write_data_bus_walk() call write 16 dword, so step by 16 */ + for (int i = 0; i < 64; i += 16) + { + uint64_t pattern = ~(bdk_build_mask(bits) << i); + write_data_bus_walk(start_address + i*8, burst, pattern); + } + /* Each read_data_bus_walk() call write 16 dword, so step by 16 */ + for (int i = 0; i < 64; i += 16) + { + uint64_t pattern = ~(bdk_build_mask(bits) << i); + failures += read_data_bus_walk(start_address + i*8, burst, pattern); + } + } + } + return failures; +} + diff --git a/src/vendorcode/cavium/bdk/libbdk-dram/bdk-dram-test-fastscan.c b/src/vendorcode/cavium/bdk/libbdk-dram/bdk-dram-test-fastscan.c new file mode 100644 index 0000000000..46e205dd80 --- /dev/null +++ b/src/vendorcode/cavium/bdk/libbdk-dram/bdk-dram-test-fastscan.c @@ -0,0 +1,103 @@ +/***********************license start*********************************** +* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights +* reserved. +* +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are +* met: +* +* * Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* +* * Redistributions in binary form must reproduce the above +* copyright notice, this list of conditions and the following +* disclaimer in the documentation and/or other materials provided +* with the distribution. +* +* * Neither the name of Cavium Inc. nor the names of +* its contributors may be used to endorse or promote products +* derived from this software without specific prior written +* permission. +* +* This Software, including technical data, may be subject to U.S. export +* control laws, including the U.S. Export Administration Act and its +* associated regulations, and may be subject to export or import +* regulations in other countries. +* +* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS" +* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR +* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT +* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY +* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT +* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES +* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR +* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT, +* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK +* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU. +***********************license end**************************************/ +#include "bdk.h" + +/* Used for all memory reads/writes related to the test */ +#define READ64(address) __bdk_dram_read64(address) +#define WRITE64(address, data) __bdk_dram_write64(address, data) + +/** + * Fast scan test. This test is meant to find gross errors caused by read/write + * level failing on a single rank or dimm. The idea is to scan through all of + * memory in large steps. The large steps hit each rank multiple times, but not + * every byte. If the whole rank has errors, his should find it quickly. This test + * is suitable for an alive test during early boot. + * + * @param area Starting physical address + * @param max_address + * Ending physical address, exclusive + * @param bursts Burst to run + * + * @return Number of errors + */ +int __bdk_dram_test_fast_scan(uint64_t area, uint64_t max_address, int bursts) +{ + int failures = 0; + const uint64_t step = 0x10008; /* The 8 is so we walk through cache lines too */ + const uint64_t pattern1 = 0xaaaaaaaaaaaaaaaa; + const uint64_t pattern2 = 0x5555555555555555; + + /* Walk through the region incrementing our offset by STEP */ + uint64_t a = area; + while (a + 16 <= max_address) + { + WRITE64(a, pattern1); + WRITE64(a+8, pattern2); + __bdk_dram_flush_to_mem_range(a, a + 16); + a += step; + } + + /* Read back, checking the writes */ + a = area; + while (a + 16 <= max_address) + { + /* Prefetch 3 ahead for better performance */ + uint64_t pre = a + step * 2; + if (pre + 16 < max_address) + BDK_PREFETCH(pre, 0); + /* Check pattern 1 */ + uint64_t data1 = READ64(a); + if (bdk_unlikely(data1 != pattern1)) + { + failures++; + __bdk_dram_report_error(a, data1, pattern1, 0, -1); + } + /* Check pattern 2 */ + uint64_t data2 = READ64(a+8); + if (bdk_unlikely(data2 != pattern2)) + { + failures++; + __bdk_dram_report_error(a+8, data2, pattern2, 0, -1); + } + a += step; + } + + return failures; +} + diff --git a/src/vendorcode/cavium/bdk/libbdk-dram/bdk-dram-test-patfil.c b/src/vendorcode/cavium/bdk/libbdk-dram/bdk-dram-test-patfil.c new file mode 100644 index 0000000000..e6c4b57721 --- /dev/null +++ b/src/vendorcode/cavium/bdk/libbdk-dram/bdk-dram-test-patfil.c @@ -0,0 +1,829 @@ +/***********************license start*********************************** +* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights +* reserved. +* +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are +* met: +* +* * Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* +* * Redistributions in binary form must reproduce the above +* copyright notice, this list of conditions and the following +* disclaimer in the documentation and/or other materials provided +* with the distribution. +* +* * Neither the name of Cavium Inc. nor the names of +* its contributors may be used to endorse or promote products +* derived from this software without specific prior written +* permission. +* +* This Software, including technical data, may be subject to U.S. export +* control laws, including the U.S. Export Administration Act and its +* associated regulations, and may be subject to export or import +* regulations in other countries. +* +* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS" +* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR +* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT +* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY +* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT +* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES +* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR +* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT, +* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK +* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU. +***********************license end**************************************/ +#include "bdk.h" + +// choose prediction-based algorithms for mem_xor and mem_rows tests +#define USE_PREDICTION_CODE_VERSIONS 1 // change to 0 to go back to the original versions + +/* Used for all memory reads/writes related to the test */ +#define READ64(address) __bdk_dram_read64(address) +#define WRITE64(address, data) __bdk_dram_write64(address, data) + +/** + * Fill an memory area with the address of each 64-bit word in the area. + * Reread to confirm the pattern. + * + * @param area Start of the physical memory area + * @param max_address + * End of the physical memory area (exclusive) + * @param bursts Number of time to repeat the test over the entire area + * + * @return Number of errors, zero on success + */ +int __bdk_dram_test_mem_self_addr(uint64_t area, uint64_t max_address, int bursts) +{ + int failures = 0; + + for (int burst = 0; burst < bursts; burst++) + { + /* Write the pattern to memory. Each location receives the address + * of the location. + */ + for (uint64_t address = area; address < max_address; address+=8) + WRITE64(address, address); + __bdk_dram_flush_to_mem_range(area, max_address); + BDK_DCACHE_INVALIDATE; + + /* Read by ascending address the written memory and confirm that it + * has the expected data pattern. + */ + for (uint64_t address = area; address < max_address; ) + { + if (address + 256 < max_address) + BDK_PREFETCH(address + 256, 0); + for (int i=0; i<16; i++) + { + uint64_t data = READ64(address); + if (bdk_unlikely(data != address)) + failures += __bdk_dram_retry_failure(burst, address, data, address); + address += 8; + } + } + __bdk_dram_flush_to_mem_range(area, max_address); + BDK_DCACHE_INVALIDATE; + + /* Read by descending address the written memory and confirm that it + * has the expected data pattern. + */ + uint64_t end = max_address - sizeof(uint64_t); + for (uint64_t address = end; address >= area; ) + { + if (address - 256 >= area) + BDK_PREFETCH(address - 256, 0); + for (int i=0; i<16; i++) + { + uint64_t data = READ64(address); + if (bdk_unlikely(data != address)) + failures += __bdk_dram_retry_failure(burst, address, data, address); + address -= 8; + } + } + __bdk_dram_flush_to_mem_range(area, max_address); + BDK_DCACHE_INVALIDATE; + + /* Read from random addresses within the memory area. + */ + uint64_t probes = (max_address - area) / 128; + uint64_t address_ahead1 = area; + uint64_t address_ahead2 = area; + for (uint64_t i = 0; i < probes; i++) + { + /* Create a pipeline of prefetches: + address = address read this loop + address_ahead1 = prefetch started last loop + address_ahead2 = prefetch started this loop */ + uint64_t address = address_ahead1; + address_ahead1 = address_ahead2; + address_ahead2 = bdk_rng_get_random64() % (max_address - area); + address_ahead2 += area; + address_ahead2 &= -8; + BDK_PREFETCH(address_ahead2, 0); + + uint64_t data = READ64(address); + if (bdk_unlikely(data != address)) + failures += __bdk_dram_retry_failure(burst, address, data, address); + } + } + return failures; +} + +/** + * Write "pattern" and its compliment to memory and verify it was written + * properly. Memory will be filled with DWORDs pattern, ~pattern, pattern, + * ~pattern, ... + * + * @param area Start physical address of memory + * @param max_address + * End of physical memory region + * @param pattern Pattern to write + * @param passes Number of time to repeat the test + * + * @return Number of errors, zero on success + */ +static uint32_t test_mem_pattern(uint64_t area, uint64_t max_address, uint64_t pattern, + int passes) +{ + int failures = 0; + + for (int pass = 0; pass < passes; pass++) + { + if (pass & 0x1) + pattern = ~pattern; + + for (uint64_t address = area; address < max_address; address += 8) + WRITE64(address, pattern); + __bdk_dram_flush_to_mem_range(area, max_address); + BDK_DCACHE_INVALIDATE; + + /* Read the written memory and confirm that it has the expected + * data pattern. + */ + uint64_t address = area; + while (address < max_address) + { + if (address + 256 < max_address) + BDK_PREFETCH(address + 256, 0); + for (int i=0; i<16; i++) + { + uint64_t data = READ64(address); + if (bdk_unlikely(data != pattern)) + failures += __bdk_dram_retry_failure(pass, address, data, pattern); + address += 8; + } + } + } + return failures; +} + +/** + * Walking zero written to memory, left shift + * + * @param area Start of the physical memory area + * @param max_address + * End of the physical memory area + * @param bursts Number of time to repeat the test over the entire area + * + * @return Number of errors, zero on success + */ +int __bdk_dram_test_mem_leftwalk0(uint64_t area, uint64_t max_address, int bursts) +{ + int failures = 0; + for (int burst = 0; burst < bursts; burst++) + { + for (uint64_t pattern = 1; pattern != 0; pattern = pattern << 1) + failures += test_mem_pattern(area, max_address, ~pattern, 1); + } + return failures; +} + +/** + * Walking one written to memory, left shift + * + * @param area Start of the physical memory area + * @param max_address + * End of the physical memory area + * @param bursts Number of time to repeat the test over the entire area + * + * @return Number of errors, zero on success + */ +int __bdk_dram_test_mem_leftwalk1(uint64_t area, uint64_t max_address, int bursts) +{ + int failures = 0; + for (int burst = 0; burst < bursts; burst++) + { + for (uint64_t pattern = 1; pattern != 0; pattern = pattern << 1) + failures += test_mem_pattern(area, max_address, pattern, 1); + } + return failures; +} + +/** + * Walking zero written to memory, right shift + * + * @param area Start of the physical memory area + * @param max_address + * End of the physical memory area + * @param bursts Number of time to repeat the test over the entire area + * + * @return Number of errors, zero on success + */ +int __bdk_dram_test_mem_rightwalk0(uint64_t area, uint64_t max_address, int bursts) +{ + int failures = 0; + for (int burst = 0; burst < bursts; burst++) + { + for (uint64_t pattern = 1ull << 63; pattern != 0; pattern = pattern >> 1) + failures += test_mem_pattern(area, max_address, ~pattern, 1); + } + return failures; +} + +/** + * Walking one written to memory, right shift + * + * @param area Start of the physical memory area + * @param max_address + * End of the physical memory area + * @param bursts Number of time to repeat the test over the entire area + * + * @return Number of errors, zero on success + */ +int __bdk_dram_test_mem_rightwalk1(uint64_t area, uint64_t max_address, int bursts) +{ + int failures = 0; + for (int burst = 0; burst < bursts; burst++) + { + for (uint64_t pattern = 1ull<<63; pattern != 0; pattern = pattern >> 1) + failures += test_mem_pattern(area, max_address, pattern, 1); + } + return failures; +} + +/** + * Apply the March C- testing algorithm to the given memory area. + * 1) Write "pattern" to memory. + * 2) Verify "pattern" and write "~pattern". + * 3) Verify "~pattern" and write "pattern". + * 4) Verify "pattern" and write "~pattern". + * 5) Verify "~pattern" and write "pattern". + * 6) Verify "pattern". + * + * @param area Start of the physical memory area + * @param max_address + * End of the physical memory area + * @param pattern + * + * @return Number of errors, zero on success + */ +static int test_mem_march_c(uint64_t area, uint64_t max_address, uint64_t pattern) +{ + int failures = 0; + + /* Pass 1 ascending addresses, fill memory with pattern. */ + BDK_TRACE(DRAM_TEST, " [0x%016lx:0x%016lx] Phase1, address incrementing, pattern 0x%016lx\n", area, max_address-1, pattern); + for (uint64_t address = area; address < max_address; address += 8) + WRITE64(address, pattern); + + __bdk_dram_flush_to_mem_range(area, max_address); + BDK_DCACHE_INVALIDATE; + + /* Pass 2: ascending addresses, read pattern and write ~pattern */ + BDK_TRACE(DRAM_TEST, " [0x%016lx:0x%016lx] Phase2, address incrementing, pattern 0x%016lx\n", area, max_address-1, ~pattern); + for (uint64_t address = area; address < max_address; address += 8) + { + uint64_t data = READ64(address); + if (bdk_unlikely(data != pattern)) + failures += __bdk_dram_retry_failure(1, address, data, pattern); + WRITE64(address, ~pattern); + } + + __bdk_dram_flush_to_mem_range(area, max_address); + BDK_DCACHE_INVALIDATE; + + /* Pass 3: ascending addresses, read ~pattern and write pattern. */ + BDK_TRACE(DRAM_TEST, " [0x%016lx:0x%016lx] Phase3, address incrementing, pattern 0x%016lx\n", area, max_address-1, pattern); + for (uint64_t address = area; address < max_address; address += 8) + { + uint64_t data = READ64(address); + if (bdk_unlikely(data != ~pattern)) + failures += __bdk_dram_retry_failure(1, address, data, ~pattern); + WRITE64(address, pattern); + } + + __bdk_dram_flush_to_mem_range(area, max_address); + BDK_DCACHE_INVALIDATE; + + /* Pass 4: descending addresses, read pattern and write ~pattern. */ + BDK_TRACE(DRAM_TEST, " [0x%016lx:0x%016lx] Phase4, address decrementing, pattern 0x%016lx\n", area, max_address-1, ~pattern); + uint64_t end = max_address - sizeof(uint64_t); + for (uint64_t address = end; address >= area; address -= 8) + { + uint64_t data = READ64(address); + if (bdk_unlikely(data != pattern)) + failures += __bdk_dram_retry_failure(1, address, data, pattern); + WRITE64(address, ~pattern); + } + + __bdk_dram_flush_to_mem_range(area, max_address); + BDK_DCACHE_INVALIDATE; + + /* Pass 5: descending addresses, read ~pattern and write pattern. */ + BDK_TRACE(DRAM_TEST, " [0x%016lx:0x%016lx] Phase5, address decrementing, pattern 0x%016lx\n", area, max_address-1, pattern); + for (uint64_t address = end; address >= area; address -= 8) + { + uint64_t data = READ64(address); + if (bdk_unlikely(data != ~pattern)) + failures += __bdk_dram_retry_failure(1, address, data, ~pattern); + WRITE64(address, pattern); + } + + __bdk_dram_flush_to_mem_range(area, max_address); + BDK_DCACHE_INVALIDATE; + + /* Pass 6: ascending addresses, read pattern. */ + BDK_TRACE(DRAM_TEST, " [0x%016lx:0x%016lx] Phase6, address incrementing\n", area, max_address-1); + for (uint64_t address = area; address < max_address; address += 8) + { + uint64_t data = READ64(address); + if (bdk_unlikely(data != pattern)) + failures += __bdk_dram_retry_failure(1, address, data, pattern); + } + + return failures; +} + +/** + * Use test_mem_march_c() with a all ones pattern + * + * @param area Start of the physical memory area + * @param max_address + * End of the physical memory area + * @param bursts Number of time to repeat the test over the entire area + * + * @return Number of errors, zero on success + */ +int __bdk_dram_test_mem_solid(uint64_t area, uint64_t max_address, int bursts) +{ + int failures = 0; + for (int burst = 0; burst < bursts; burst++) + failures += test_mem_march_c(area, max_address, -1); + return failures; +} + +/** + * Use test_mem_march_c() with a 0x55 pattern + * + * @param area Start of the physical memory area + * @param max_address + * End of the physical memory area + * @param bursts Number of time to repeat the test over the entire area + * + * @return Number of errors, zero on success + */ +int __bdk_dram_test_mem_checkerboard(uint64_t area, uint64_t max_address, int bursts) +{ + int failures = 0; + for (int burst = 0; burst < bursts; burst++) + failures += test_mem_march_c(area, max_address, 0x5555555555555555L); + return failures; +} + +/** + * Write a pseudo random pattern to memory and verify it + * + * @param area Start of the physical memory area + * @param max_address + * End of the physical memory area + * @param bursts Number of time to repeat the test over the entire area + * + * @return Number of errors, zero on success + */ +int __bdk_dram_test_mem_random(uint64_t area, uint64_t max_address, int bursts) +{ + /* This constant is used to increment the pattern after every DWORD. This + makes only the first DWORD truly random, but saves us processing + power generating the random values */ + const uint64_t INC = 0x1010101010101010ULL; + + int failures = 0; + for (int burst = 0; burst < bursts; burst++) + { + const uint64_t init_pattern = bdk_rng_get_random64(); + uint64_t pattern = init_pattern; + + /* Write the pattern to memory. Each location receives the address + * of the location. A second write pass is needed to force all of + * the cached memory out to the DDR. + */ + for (uint64_t address = area; address < max_address; address += 8) + { + WRITE64(address, pattern); + pattern += INC; + } + + __bdk_dram_flush_to_mem_range(area, max_address); + BDK_DCACHE_INVALIDATE; + + /* Read the written memory and confirm that it has the expected + * data pattern. + */ + pattern = init_pattern; + for (uint64_t address = area; address < max_address; address += 8) + { + uint64_t data = READ64(address); + if (bdk_unlikely(data != pattern)) + failures += __bdk_dram_retry_failure(burst, address, data, pattern); + pattern += INC; + } + } + return failures; +} + +#if !USE_PREDICTION_CODE_VERSIONS +/** + * test_mem_xor + * + * @param area Start of the physical memory area + * @param max_address + * End of the physical memory area + * @param bursts Number of time to repeat the test over the entire area + * + * @return Number of errors, zero on success + */ +int __bdk_dram_test_mem_xor(uint64_t area, uint64_t max_address, int bursts) +{ + int failures = 0; + + uint64_t extent = max_address - area; + uint64_t count = (extent / sizeof(uint64_t)) / 2; + + /* Fill both halves of the memory area with identical randomized data. + */ + uint64_t address1 = area; + uint64_t address2 = area + count * sizeof(uint64_t); + + uint64_t pattern = bdk_rng_get_random64(); + + for (uint64_t j = 0; j < count; j++) + { + uint64_t p = pattern * address1; + WRITE64(address1, p); + WRITE64(address2, p); + address1 += 8; + address2 += 8; + } + __bdk_dram_flush_to_mem_range(area, max_address); + BDK_DCACHE_INVALIDATE; + + /* Make a series of passes over the memory areas. */ + for (int burst = 0; burst < bursts; burst++) + { + /* XOR the data with a random value, applying the change to both + * memory areas. + */ + address1 = area; + address2 = area + count * sizeof(uint64_t); + + pattern = bdk_rng_get_random64(); + + for (uint64_t j = 0; j < count; j++) + { + if ((address1 & BDK_CACHE_LINE_MASK) == 0) + BDK_PREFETCH(address1, BDK_CACHE_LINE_SIZE); + if ((address2 & BDK_CACHE_LINE_MASK) == 0) + BDK_PREFETCH(address2, BDK_CACHE_LINE_SIZE); + WRITE64(address1, READ64(address1) ^ pattern); + WRITE64(address2, READ64(address2) ^ pattern); + address1 += 8; + address2 += 8; + } + + __bdk_dram_flush_to_mem_range(area, max_address); + BDK_DCACHE_INVALIDATE; + + /* Look for differences in the areas. If there is a mismatch, reset + * both memory locations with the same pattern. Failing to do so + * means that on all subsequent passes the pair of locations remain + * out of sync giving spurious errors. + */ + address1 = area; + address2 = area + count * sizeof(uint64_t); + for (uint64_t j = 0; j < count; j++) + { + if ((address1 & BDK_CACHE_LINE_MASK) == 0) + BDK_PREFETCH(address1, BDK_CACHE_LINE_SIZE); + if ((address2 & BDK_CACHE_LINE_MASK) == 0) + BDK_PREFETCH(address2, BDK_CACHE_LINE_SIZE); + uint64_t d1 = READ64(address1); + uint64_t d2 = READ64(address2); + if (bdk_unlikely(d1 != d2)) + { + failures += __bdk_dram_retry_failure2(burst, address1, d1, address2, d2); + + // Synchronize the two areas, adjusting for the error. + WRITE64(address1, d2); + WRITE64(address2, d2); + } + address1 += 8; + address2 += 8; + } + } + return failures; +} + +/** + * test_mem_rows + * + * Write a pattern of alternating 64-bit words of all one bits and then all 0 + * bits. This pattern generates the maximum amount of simultaneous switching + * activity on the memory channels. Each pass flips the pattern with words + * going from all ones to all zeros and vice versa. + * + * @param area Start of the physical memory area + * @param max_address + * End of the physical memory area + * @param bursts Number of times to repeat the test over the entire area + * + * @return Number of errors, zero on success + */ +int __bdk_dram_test_mem_rows(uint64_t area, uint64_t max_address, int bursts) +{ + int failures = 0; + uint64_t pattern = 0x0; + uint64_t extent = (max_address - area); + uint64_t count = (extent / 2) / sizeof(uint64_t); // in terms of 64bit words + + /* Fill both halves of the memory area with identical data pattern. Odd + * address 64-bit words get the pattern, while even address words get the + * inverted pattern. + */ + uint64_t address1 = area; + uint64_t address2 = area + count * sizeof(uint64_t); + + for (uint64_t j = 0; j < (count / 2); j++) + { + WRITE64(address1, pattern); + WRITE64(address2, pattern); + address1 += 8; + address2 += 8; + WRITE64(address1, ~pattern); + WRITE64(address2, ~pattern); + address1 += 8; + address2 += 8; + } + __bdk_dram_flush_to_mem_range(area, max_address); + BDK_DCACHE_INVALIDATE; + + /* Make a series of passes over the memory areas. */ + for (int burst = 0; burst < bursts; burst++) + { + /* Invert the data, applying the change to both memory areas. Thus on + * alternate passes, the data flips from 0 to 1 and vice versa. + */ + address1 = area; + address2 = area + count * sizeof(uint64_t); + for (uint64_t j = 0; j < count; j++) + { + WRITE64(address1, ~READ64(address1)); + WRITE64(address2, ~READ64(address2)); + address1 += 8; + address2 += 8; + } + __bdk_dram_flush_to_mem_range(area, max_address); + BDK_DCACHE_INVALIDATE; + + /* Look for differences in the areas. If there is a mismatch, reset + * both memory locations with the same pattern. Failing to do so + * means that on all subsequent passes the pair of locations remain + * out of sync giving spurious errors. + */ + address1 = area; + address2 = area + count * sizeof(uint64_t); + for (uint64_t j = 0; j < count; j++) + { + uint64_t d1 = READ64(address1); + uint64_t d2 = READ64(address2); + if (bdk_unlikely(d1 != d2)) + { + failures += __bdk_dram_retry_failure2(burst, address1, d1, address2, d2); + + // Synchronize the two areas, adjusting for the error. + WRITE64(address1, d2); + WRITE64(address2, d2); + } + address1 += 8; + address2 += 8; + } + } + return failures; +} +#endif /* !USE_PREDICTION_CODE_VERSIONS */ + +#if USE_PREDICTION_CODE_VERSIONS +//////////////////////////// this is the new code... + +int __bdk_dram_test_mem_xor(uint64_t area, uint64_t max_address, int bursts) +{ + int failures = 0; + int burst; + + uint64_t extent = max_address - area; + uint64_t count = (extent / sizeof(uint64_t)) / 2; + uint64_t offset = count * sizeof(uint64_t); + uint64_t area2 = area + offset; + + /* Fill both halves of the memory area with identical randomized data. + */ + uint64_t address1 = area; + + uint64_t pattern1 = bdk_rng_get_random64(); + uint64_t pattern2 = 0; + uint64_t this_pattern; + + uint64_t p; + uint64_t d1, d2; + + // move the multiplies outside the loop + uint64_t pbase = address1 * pattern1; + uint64_t pincr = 8 * pattern1; + uint64_t ppred; + + p = pbase; + while (address1 < area2) + { + WRITE64(address1 , p); + WRITE64(address1 + offset, p); + address1 += 8; + p += pincr; + } + __bdk_dram_flush_to_mem_range(area, max_address); + BDK_DCACHE_INVALIDATE; + + /* Make a series of passes over the memory areas. */ + for (burst = 0; burst < bursts; burst++) + { + /* XOR the data with a random value, applying the change to both + * memory areas. + */ + address1 = area; + + this_pattern = bdk_rng_get_random64(); + pattern2 ^= this_pattern; + + while (address1 < area2) + { +#if 1 + if ((address1 & BDK_CACHE_LINE_MASK) == 0) + BDK_PREFETCH(address1, BDK_CACHE_LINE_SIZE); + if (((address1 + offset) & BDK_CACHE_LINE_MASK) == 0) + BDK_PREFETCH(address1 + offset, BDK_CACHE_LINE_SIZE); +#endif + WRITE64(address1 , READ64(address1 ) ^ this_pattern); + WRITE64(address1 + offset, READ64(address1 + offset) ^ this_pattern); + address1 += 8; + } + + __bdk_dram_flush_to_mem_range(area, max_address); + BDK_DCACHE_INVALIDATE; + + /* Look for differences from the expected pattern in both areas. + * If there is a mismatch, reset the appropriate memory location + * with the correct pattern. Failing to do so + * means that on all subsequent passes the erroring locations + * will be out of sync, giving spurious errors. + */ + address1 = area; + ppred = pbase; + + while (address1 < area2) + { +#if 1 + if ((address1 & BDK_CACHE_LINE_MASK) == 0) + BDK_PREFETCH(address1, BDK_CACHE_LINE_SIZE); + if (((address1 + offset) & BDK_CACHE_LINE_MASK) == 0) + BDK_PREFETCH(address1 + offset, BDK_CACHE_LINE_SIZE); +#endif + d1 = READ64(address1 ); + d2 = READ64(address1 + offset); + + p = ppred ^ pattern2; + + if (bdk_unlikely(d1 != p)) { + failures += __bdk_dram_retry_failure(burst, address1, d1, p); + // Synchronize the area, adjusting for the error. + //WRITE64(address1, p); // retries should do this + } + if (bdk_unlikely(d2 != p)) { + failures += __bdk_dram_retry_failure(burst, address1 + offset, d2, p); + // Synchronize the area, adjusting for the error. + //WRITE64(address1 + offset, p); // retries should do this + } + + address1 += 8; + ppred += pincr; + + } /* while (address1 < area2) */ + } /* for (int burst = 0; burst < bursts; burst++) */ + return failures; +} + +//////////////// this is the new code... + +int __bdk_dram_test_mem_rows(uint64_t area, uint64_t max_address, int bursts) +{ + int failures = 0; + + uint64_t pattern1 = 0x0; + uint64_t extent = (max_address - area); + uint64_t count = (extent / 2) / sizeof(uint64_t); // in terms of 64bit words + uint64_t offset = count * sizeof(uint64_t); + uint64_t area2 = area + offset; + uint64_t pattern2; + uint64_t d1, d2; + int burst; + + /* Fill both halves of the memory area with identical data pattern. Odd + * address 64-bit words get the pattern, while even address words get the + * inverted pattern. + */ + uint64_t address1 = area; + + pattern2 = pattern1; // start with original pattern + + while (address1 < area2) + { + WRITE64(address1 , pattern2); + WRITE64(address1 + offset, pattern2); + address1 += 8; + pattern2 = ~pattern2; // flip for next slots + } + + __bdk_dram_flush_to_mem_range(area, max_address); + BDK_DCACHE_INVALIDATE; + + /* Make a series of passes over the memory areas. */ + for (burst = 0; burst < bursts; burst++) + { + /* Invert the data, applying the change to both memory areas. Thus on + * alternate passes, the data flips from 0 to 1 and vice versa. + */ + address1 = area; + + while (address1 < area2) + { + if ((address1 & BDK_CACHE_LINE_MASK) == 0) + BDK_PREFETCH(address1 , BDK_CACHE_LINE_SIZE); + if (((address1 + offset) & BDK_CACHE_LINE_MASK) == 0) + BDK_PREFETCH(address1 + offset, BDK_CACHE_LINE_SIZE); + + WRITE64(address1 , ~READ64(address1 )); + WRITE64(address1 + offset, ~READ64(address1 + offset)); + address1 += 8; + } + + __bdk_dram_flush_to_mem_range(area, max_address); + BDK_DCACHE_INVALIDATE; + + /* Look for differences in the areas. If there is a mismatch, reset + * both memory locations with the same pattern. Failing to do so + * means that on all subsequent passes the pair of locations remain + * out of sync giving spurious errors. + */ + address1 = area; + pattern1 = ~pattern1; // flip the starting pattern to match above loop + pattern2 = pattern1; // slots have been flipped by the above loop + + while (address1 < area2) + { + if ((address1 & BDK_CACHE_LINE_MASK) == 0) + BDK_PREFETCH(address1 , BDK_CACHE_LINE_SIZE); + if (((address1 + offset) & BDK_CACHE_LINE_MASK) == 0) + BDK_PREFETCH(address1 + offset, BDK_CACHE_LINE_SIZE); + + d1 = READ64(address1 ); + d2 = READ64(address1 + offset); + + if (bdk_unlikely(d1 != pattern2)) { + failures += __bdk_dram_retry_failure(burst, address1, d1, pattern2); + // Synchronize the area, adjusting for the error. + //WRITE64(address1, pattern2); // retries should do this + } + if (bdk_unlikely(d2 != pattern2)) { + failures += __bdk_dram_retry_failure(burst, address1 + offset, d2, pattern2); + // Synchronize the two areas, adjusting for the error. + //WRITE64(address1 + offset, pattern2); // retries should do this + } + + address1 += 8; + pattern2 = ~pattern2; // flip for next pair of slots + } + } + return failures; +} +#endif /* USE_PREDICTION_CODE_VERSIONS */ diff --git a/src/vendorcode/cavium/bdk/libbdk-dram/bdk-dram-test.c b/src/vendorcode/cavium/bdk/libbdk-dram/bdk-dram-test.c new file mode 100644 index 0000000000..53137502fc --- /dev/null +++ b/src/vendorcode/cavium/bdk/libbdk-dram/bdk-dram-test.c @@ -0,0 +1,860 @@ +/***********************license start*********************************** +* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights +* reserved. +* +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are +* met: +* +* * Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* +* * Redistributions in binary form must reproduce the above +* copyright notice, this list of conditions and the following +* disclaimer in the documentation and/or other materials provided +* with the distribution. +* +* * Neither the name of Cavium Inc. nor the names of +* its contributors may be used to endorse or promote products +* derived from this software without specific prior written +* permission. +* +* This Software, including technical data, may be subject to U.S. export +* control laws, including the U.S. Export Administration Act and its +* associated regulations, and may be subject to export or import +* regulations in other countries. +* +* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS" +* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR +* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT +* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY +* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT +* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES +* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR +* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT, +* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK +* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU. +***********************license end**************************************/ +#include <bdk.h> +#include "libbdk-arch/bdk-csrs-gti.h" +#include "libbdk-arch/bdk-csrs-ocx.h" + +/* This code is an optional part of the BDK. It is only linked in + if BDK_REQUIRE() needs it */ +BDK_REQUIRE_DEFINE(DRAM_TEST); + +#define MAX_ERRORS_TO_REPORT 50 +#define RETRY_LIMIT 1000 + +typedef struct +{ + const char * name; /* Friendly name for the test */ + __bdk_dram_test_t test_func; /* Function to call */ + int bursts; /* Bursts parameter to pass to the test */ + int max_cores; /* Maximum number of cores the test should be run on in parallel. Zero means all */ +} dram_test_info_t; + +static const dram_test_info_t TEST_INFO[] = { + /* Name, Test function, Bursts, Max Cores */ + { "Data Bus", __bdk_dram_test_mem_data_bus, 8, 1}, + { "Address Bus", __bdk_dram_test_mem_address_bus, 0, 1}, + { "Marching Rows", __bdk_dram_test_mem_rows, 16, 0}, + { "Random Data", __bdk_dram_test_mem_random, 32, 0}, + { "Random XOR (32 Burst)", __bdk_dram_test_mem_xor, 32, 0}, + { "Self Address", __bdk_dram_test_mem_self_addr, 1, 0}, + { "March C- Solid Bits", __bdk_dram_test_mem_solid, 1, 0}, + { "March C- Checkerboard", __bdk_dram_test_mem_checkerboard, 1, 0}, + { "Walking Ones Left", __bdk_dram_test_mem_leftwalk1, 1, 0}, + { "Walking Ones Right", __bdk_dram_test_mem_rightwalk1, 1, 0}, + { "Walking Zeros Left", __bdk_dram_test_mem_leftwalk0, 1, 0}, + { "Walking Zeros Right", __bdk_dram_test_mem_rightwalk0, 1, 0}, + { "Random XOR (224 Burst)", __bdk_dram_test_mem_xor, 224, 0}, + { "Fast Scan", __bdk_dram_test_fast_scan, 0, 0}, + { NULL, NULL, 0, 0} +}; + +/* These variables count the number of ECC errors. They should only be accessed atomically */ +int64_t __bdk_dram_ecc_single_bit_errors[BDK_MAX_MEM_CHANS]; +int64_t __bdk_dram_ecc_double_bit_errors[BDK_MAX_MEM_CHANS]; + +static int64_t dram_test_thread_done; +static int64_t dram_test_thread_errors; +static uint64_t dram_test_thread_start; +static uint64_t dram_test_thread_end; +static uint64_t dram_test_thread_size; + +/** + * Force the memory at the pointer location to be written to memory and evicted + * from L2. L1 will be unaffected. + * + * @param address Physical memory location + */ +void __bdk_dram_flush_to_mem(uint64_t address) +{ + BDK_MB; + /* The DRAM code doesn't use the normal bdk_phys_to_ptr() because of the + NULL check in it. This greatly slows down the memory tests */ + char *ptr = (void*)address; + BDK_CACHE_WBI_L2(ptr); +} + +/** + * Force a memory region to be written to DRAM and evicted from L2 + * + * @param area Start of the region + * @param max_address + * End of the region (exclusive) + */ +void __bdk_dram_flush_to_mem_range(uint64_t area, uint64_t max_address) +{ + /* The DRAM code doesn't use the normal bdk_phys_to_ptr() because of the + NULL check in it. This greatly slows down the memory tests */ + char *ptr = (void*)area; + char *end = (void*)max_address; + BDK_MB; + while (ptr < end) + { + BDK_CACHE_WBI_L2(ptr); + ptr += 128; + } +} + +/** + * Convert a test enumeration into a string + * + * @param test Test to convert + * + * @return String for display + */ +const char *bdk_dram_get_test_name(int test) +{ + if (test < (int)(sizeof(TEST_INFO) / sizeof(TEST_INFO[0]))) + return TEST_INFO[test].name; + else + return NULL; +} + +static bdk_dram_test_flags_t dram_test_flags; // FIXME: Don't use global +/** + * This function is run as a thread to perform memory tests over multiple cores. + * Each thread gets a section of memory to work on, which is controlled by global + * variables at the beginning of this file. + * + * @param arg Number of the region we should check + * @param arg1 Pointer the the test_info structure + */ +static void dram_test_thread(int arg, void *arg1) +{ + const dram_test_info_t *test_info = arg1; + const int bursts = test_info->bursts; + const int range_number = arg; + + /* Figure out our work memory range. + * + * Note start_address and end_address just provide the physical offset + * portion of the address and do not have the node bits set. This is + * to simplify address checks and calculations. Later, when about to run + * the memory test, the routines adds in the node bits to form the final + * addresses. + */ + uint64_t start_address = dram_test_thread_start + dram_test_thread_size * range_number; + uint64_t end_address = start_address + dram_test_thread_size; + if (end_address > dram_test_thread_end) + end_address = dram_test_thread_end; + + bdk_node_t test_node = bdk_numa_local(); + if (dram_test_flags & BDK_DRAM_TEST_USE_CCPI) + test_node ^= 1; + /* Insert the node part of the address */ + start_address = bdk_numa_get_address(test_node, start_address); + end_address = bdk_numa_get_address(test_node, end_address); + /* Test the region */ + BDK_TRACE(DRAM_TEST, " Node %d, core %d, Testing [0x%011lx:0x%011lx]\n", + bdk_numa_local(), bdk_get_core_num() & 127, start_address, end_address - 1); + test_info->test_func(start_address, end_address, bursts); + + /* Report that we're done */ + BDK_TRACE(DRAM_TEST, "Thread %d on node %d done with memory test\n", range_number, bdk_numa_local()); + bdk_atomic_add64_nosync(&dram_test_thread_done, 1); +} + +/** + * Run the memory test. + * + * @param test_info + * @param start_address + * Physical address to start at + * @param length Length of memory block + * @param flags Flags to control memory test options. Zero defaults to testing all + * node with statistics and progress output. + * + * @return Number of errors found. Zero is success. Negative means the test + * did not run due to some other failure. + */ +static int __bdk_dram_run_test(const dram_test_info_t *test_info, uint64_t start_address, + uint64_t length, bdk_dram_test_flags_t flags) +{ + /* Figure out the addess of the byte one off the top of memory */ + uint64_t max_address = bdk_dram_get_size_mbytes(bdk_numa_local()); + BDK_TRACE(DRAM_TEST, "DRAM available per node: %lu MB\n", max_address); + max_address <<= 20; + + /* Make sure we have enough */ + if (max_address < (16<<20)) + { + bdk_error("DRAM size is too small\n"); + return -1; + } + + /* Make sure the amount is sane */ + if (CAVIUM_IS_MODEL(CAVIUM_CN8XXX)) + { + if (max_address > (1ull << 40)) /* 40 bits in CN8XXX */ + max_address = 1ull << 40; + } + else + { + if (max_address > (1ull << 43)) /* 43 bits in CN9XXX */ + max_address = 1ull << 43; + } + BDK_TRACE(DRAM_TEST, "DRAM max address: 0x%011lx\n", max_address-1); + + /* Make sure the start address is lower than the top of memory */ + if (start_address >= max_address) + { + bdk_error("Start address is larger than the amount of memory: 0x%011lx versus 0x%011lx\n", + start_address, max_address); + return -1; + } + if (length == (uint64_t)-1) + length = max_address - start_address; + + /* Final range checks */ + uint64_t end_address = start_address + length; + if (end_address > max_address) + { + end_address = max_address; + length = end_address - start_address; + } + if (length == 0) + return 0; + + /* Ready to run the test. Figure out how many cores we need */ + int max_cores = test_info->max_cores; + int total_cores_all_nodes = max_cores; + + /* Figure out the number of cores available in the system */ + if (max_cores == 0) + { + max_cores += bdk_get_num_running_cores(bdk_numa_local()); + /* Calculate the total number of cores being used. The per node number + is confusing to people */ + for (bdk_node_t node = BDK_NODE_0; node < BDK_NUMA_MAX_NODES; node++) + if (flags & (1 << node)) + { + if (flags & BDK_DRAM_TEST_USE_CCPI) + total_cores_all_nodes += bdk_get_num_running_cores(node ^ 1); + else + total_cores_all_nodes += bdk_get_num_running_cores(node); + } + } + if (!(flags & BDK_DRAM_TEST_NO_BANNERS)) + printf("Starting Test \"%s\" for [0x%011lx:0x%011lx] using %d core(s)\n", + test_info->name, start_address, end_address - 1, total_cores_all_nodes); + + /* Remember the LMC perf counters for stats after the test */ + uint64_t start_dram_dclk[BDK_NUMA_MAX_NODES][4]; + uint64_t start_dram_ops[BDK_NUMA_MAX_NODES][4]; + uint64_t stop_dram_dclk[BDK_NUMA_MAX_NODES][4]; + uint64_t stop_dram_ops[BDK_NUMA_MAX_NODES][4]; + for (bdk_node_t node = BDK_NODE_0; node < BDK_NUMA_MAX_NODES; node++) + { + if (flags & (1 << node)) + { + const int num_dram_controllers = __bdk_dram_get_num_lmc(node); + for (int i = 0; i < num_dram_controllers; i++) + { + start_dram_dclk[node][i] = BDK_CSR_READ(node, BDK_LMCX_DCLK_CNT(i)); + start_dram_ops[node][i] = BDK_CSR_READ(node, BDK_LMCX_OPS_CNT(i)); + } + } + } + /* Remember the CCPI link counters for stats after the test */ + uint64_t start_ccpi_data[BDK_NUMA_MAX_NODES][3]; + uint64_t start_ccpi_idle[BDK_NUMA_MAX_NODES][3]; + uint64_t start_ccpi_err[BDK_NUMA_MAX_NODES][3]; + uint64_t stop_ccpi_data[BDK_NUMA_MAX_NODES][3]; + uint64_t stop_ccpi_idle[BDK_NUMA_MAX_NODES][3]; + uint64_t stop_ccpi_err[BDK_NUMA_MAX_NODES][3]; + if (!bdk_numa_is_only_one()) + { + for (bdk_node_t node = BDK_NODE_0; node < BDK_NUMA_MAX_NODES; node++) + { + if (flags & (1 << node)) + { + for (int link = 0; link < 3; link++) + { + start_ccpi_data[node][link] = BDK_CSR_READ(node, BDK_OCX_TLKX_STAT_DATA_CNT(link)); + start_ccpi_idle[node][link] = BDK_CSR_READ(node, BDK_OCX_TLKX_STAT_IDLE_CNT(link)); + start_ccpi_err[node][link] = BDK_CSR_READ(node, BDK_OCX_TLKX_STAT_ERR_CNT(link)); + } + } + } + } + + /* WARNING: This code assumes the same memory range is being tested on + all nodes. The same number of cores are used on each node to test + its local memory */ + uint64_t work_address = start_address; + dram_test_flags = flags; + bdk_atomic_set64(&dram_test_thread_errors, 0); + while ((work_address < end_address) && ((dram_test_thread_errors == 0) || (flags & BDK_DRAM_TEST_NO_STOP_ERROR))) + { + /* Check at most MAX_CHUNK_SIZE across each iteration. We only report + progress between chunks, so keep them reasonably small */ + const uint64_t MAX_CHUNK_SIZE = 1ull << 28; /* 256MB */ + uint64_t size = end_address - work_address; + if (size > MAX_CHUNK_SIZE) + size = MAX_CHUNK_SIZE; + + /* Divide memory evenly between the cores. Round the size up so that + all memory is covered. The last core may have slightly less memory to + test */ + uint64_t thread_size = (size + (max_cores - 1)) / max_cores; + thread_size += 127; + thread_size &= -128; + dram_test_thread_start = work_address; + dram_test_thread_end = work_address + size; + dram_test_thread_size = thread_size; + BDK_WMB; + + /* Poke the watchdog */ + BDK_CSR_WRITE(bdk_numa_local(), BDK_GTI_CWD_POKEX(0), 0); + + /* disable progress output when batch mode is ON */ + if (!(flags & BDK_DRAM_TEST_NO_PROGRESS)) { + + /* Report progress percentage */ + int percent_x10 = (work_address - start_address) * 1000 / (end_address - start_address); + printf(" %3d.%d%% complete, testing [0x%011lx:0x%011lx]\r", + percent_x10 / 10, percent_x10 % 10, work_address, work_address + size - 1); + fflush(stdout); + } + + work_address += size; + + /* Start threads for all the cores */ + int total_count = 0; + bdk_atomic_set64(&dram_test_thread_done, 0); + for (bdk_node_t node = BDK_NODE_0; node < BDK_NUMA_MAX_NODES; node++) + { + if (flags & (1 << node)) + { + const int num_cores = bdk_get_num_cores(node); + int per_node = 0; + for (int core = 0; core < num_cores; core++) + { + if (per_node >= max_cores) + break; + int run_node = (flags & BDK_DRAM_TEST_USE_CCPI) ? node ^ 1 : node; + BDK_TRACE(DRAM_TEST, "Starting thread %d on node %d for memory test\n", per_node, node); + if (bdk_thread_create(run_node, 0, dram_test_thread, per_node, (void *)test_info, 0)) + { + bdk_error("Failed to create thread %d for memory test on node %d\n", per_node, node); + } + else + { + per_node++; + total_count++; + } + } + } + } + +#if 0 + /* Wait for threads to finish */ + while (bdk_atomic_get64(&dram_test_thread_done) < total_count) + bdk_thread_yield(); +#else +#define TIMEOUT_SECS 30 // FIXME: long enough so multicore RXOR 224 should not print out + /* Wait for threads to finish, with progress */ + int cur_count; + uint64_t cur_time; + uint64_t period = bdk_clock_get_rate(bdk_numa_local(), BDK_CLOCK_TIME) * TIMEOUT_SECS; // FIXME? + uint64_t timeout = bdk_clock_get_count(BDK_CLOCK_TIME) + period; + do { + bdk_thread_yield(); + cur_count = bdk_atomic_get64(&dram_test_thread_done); + cur_time = bdk_clock_get_count(BDK_CLOCK_TIME); + if (cur_time >= timeout) { + BDK_TRACE(DRAM_TEST, "N%d: Waiting for %d cores\n", + bdk_numa_local(), total_count - cur_count); + timeout = cur_time + period; + } + } while (cur_count < total_count); +#endif + } + + /* Get the DRAM perf counters */ + for (bdk_node_t node = BDK_NODE_0; node < BDK_NUMA_MAX_NODES; node++) + { + if (flags & (1 << node)) + { + const int num_dram_controllers = __bdk_dram_get_num_lmc(node); + for (int i = 0; i < num_dram_controllers; i++) + { + stop_dram_dclk[node][i] = BDK_CSR_READ(node, BDK_LMCX_DCLK_CNT(i)); + stop_dram_ops[node][i] = BDK_CSR_READ(node, BDK_LMCX_OPS_CNT(i)); + } + } + } + /* Get the CCPI link counters */ + if (!bdk_numa_is_only_one()) + { + for (bdk_node_t node = BDK_NODE_0; node < BDK_NUMA_MAX_NODES; node++) + { + if (flags & (1 << node)) + { + for (int link = 0; link < 3; link++) + { + stop_ccpi_data[node][link] = BDK_CSR_READ(node, BDK_OCX_TLKX_STAT_DATA_CNT(link)); + stop_ccpi_idle[node][link] = BDK_CSR_READ(node, BDK_OCX_TLKX_STAT_IDLE_CNT(link)); + stop_ccpi_err[node][link] = BDK_CSR_READ(node, BDK_OCX_TLKX_STAT_ERR_CNT(link)); + } + } + } + } + + /* disable progress output when batch mode is ON */ + if (!(flags & BDK_DRAM_TEST_NO_PROGRESS)) { + + /* Report progress percentage as complete */ + printf(" %3d.%d%% complete, testing [0x%011lx:0x%011lx]\n", + 100, 0, start_address, end_address - 1); + fflush(stdout); + } + + if (!(flags & BDK_DRAM_TEST_NO_STATS)) + { + /* Display LMC load */ + for (bdk_node_t node = BDK_NODE_0; node < BDK_NUMA_MAX_NODES; node++) + { + if (flags & (1 << node)) + { + const int num_dram_controllers = __bdk_dram_get_num_lmc(node); + for (int i = 0; i < num_dram_controllers; i++) + { + uint64_t ops = stop_dram_ops[node][i] - start_dram_ops[node][i]; + uint64_t dclk = stop_dram_dclk[node][i] - start_dram_dclk[node][i]; + if (dclk == 0) + dclk = 1; + uint64_t percent_x10 = ops * 1000 / dclk; + printf(" Node %d, LMC%d: ops %lu, cycles %lu, used %lu.%lu%%\n", + node, i, ops, dclk, percent_x10 / 10, percent_x10 % 10); + } + } + } + if (flags & BDK_DRAM_TEST_USE_CCPI) + { + /* Display CCPI load */ + for (bdk_node_t node = BDK_NODE_0; node < BDK_NUMA_MAX_NODES; node++) + { + if (flags & (1 << node)) + { + for (int link = 0; link < 3; link++) + { + uint64_t busy = stop_ccpi_data[node][link] - start_ccpi_data[node][link]; + busy += stop_ccpi_err[node][link] - start_ccpi_err[node][link]; + uint64_t total = stop_ccpi_idle[node][link] - start_ccpi_idle[node][link]; + total += busy; + if (total == 0) + continue; + uint64_t percent_x10 = busy * 1000 / total; + printf(" Node %d, CCPI%d: busy %lu, total %lu, used %lu.%lu%%\n", + node, link, busy, total, percent_x10 / 10, percent_x10 % 10); + } + } + } + } + } + return dram_test_thread_errors; +} + +/** + * Perform a memory test. + * + * @param test Test type to run + * @param start_address + * Physical address to start at + * @param length Length of memory block + * @param flags Flags to control memory test options. Zero defaults to testing all + * node with statistics and progress output. + * + * @return Number of errors found. Zero is success. Negative means the test + * did not run due to some other failure. + */ +int bdk_dram_test(int test, uint64_t start_address, uint64_t length, bdk_dram_test_flags_t flags) +{ + /* These limits are arbitrary. They just make sure we aren't doing something + silly, like test a non cache line aligned memory region */ + if (start_address & 0xffff) + { + bdk_error("DRAM test start address must be aligned on a 64KB boundary\n"); + return -1; + } + if (length & 0xffff) + { + bdk_error("DRAM test length must be a multiple of 64KB\n"); + return -1; + } + + const char *name = bdk_dram_get_test_name(test); + if (name == NULL) + { + bdk_error("Invalid DRAM test number %d\n", test); + return -1; + } + + /* If no nodes are selected assume the user meant all nodes */ + if ((flags & (BDK_DRAM_TEST_NODE0 | BDK_DRAM_TEST_NODE1 | BDK_DRAM_TEST_NODE2 | BDK_DRAM_TEST_NODE3)) == 0) + flags |= BDK_DRAM_TEST_NODE0 | BDK_DRAM_TEST_NODE1 | BDK_DRAM_TEST_NODE2 | BDK_DRAM_TEST_NODE3; + + /* Remove nodes from the flags that don't exist */ + for (bdk_node_t node = BDK_NODE_0; node < BDK_NUMA_MAX_NODES; node++) + { + if (flags & BDK_DRAM_TEST_USE_CCPI) + { + if (!bdk_numa_exists(node ^ 1)) + flags &= ~(1 << node); + } + else + { + if (!bdk_numa_exists(node)) + flags &= ~(1 << node); + } + } + + + /* Make sure the start address is higher that the BDK's active range */ + uint64_t top_of_bdk = bdk_dram_get_top_of_bdk(); + if (start_address < top_of_bdk) + start_address = top_of_bdk; + + /* Clear ECC error counters before starting the test */ + for (int chan = 0; chan < BDK_MAX_MEM_CHANS; chan++) { + bdk_atomic_set64(&__bdk_dram_ecc_single_bit_errors[chan], 0); + bdk_atomic_set64(&__bdk_dram_ecc_double_bit_errors[chan], 0); + } + + /* Make sure at least one core from each node is running */ + for (bdk_node_t node = BDK_NODE_0; node < BDK_NUMA_MAX_NODES; node++) + { + if (flags & (1<<node)) + { + int use_node = (flags & BDK_DRAM_TEST_USE_CCPI) ? node ^ 1 : node; + if (bdk_get_running_coremask(use_node) == 0) + bdk_init_cores(use_node, 1); + } + } + + /* This returns any data compare errors found */ + int errors = __bdk_dram_run_test(&TEST_INFO[test], start_address, length, flags); + + /* Poll for any errors right now to make sure any ECC errors are reported */ + for (bdk_node_t node = BDK_NODE_0; node < BDK_NUMA_MAX_NODES; node++) + { + if (bdk_numa_exists(node) && bdk_error_check) + bdk_error_check(node); + } + + /* Check ECC error counters after the test */ + int64_t ecc_single = 0; + int64_t ecc_double = 0; + int64_t ecc_single_errs[BDK_MAX_MEM_CHANS]; + int64_t ecc_double_errs[BDK_MAX_MEM_CHANS]; + + for (int chan = 0; chan < BDK_MAX_MEM_CHANS; chan++) { + ecc_single += (ecc_single_errs[chan] = bdk_atomic_get64(&__bdk_dram_ecc_single_bit_errors[chan])); + ecc_double += (ecc_double_errs[chan] = bdk_atomic_get64(&__bdk_dram_ecc_double_bit_errors[chan])); + } + + /* Always print any ECC errors */ + if (ecc_single || ecc_double) + { + printf("Test \"%s\": ECC errors, %ld/%ld/%ld/%ld corrected, %ld/%ld/%ld/%ld uncorrected\n", + name, + ecc_single_errs[0], ecc_single_errs[1], ecc_single_errs[2], ecc_single_errs[3], + ecc_double_errs[0], ecc_double_errs[1], ecc_double_errs[2], ecc_double_errs[3]); + } + if (errors || ecc_double || ecc_single) { + printf("Test \"%s\": FAIL: %ld single, %ld double, %d compare errors\n", + name, ecc_single, ecc_double, errors); + } + else + BDK_TRACE(DRAM_TEST, "Test \"%s\": PASS\n", name); + + return (errors + ecc_double + ecc_single); +} + +/** + * Report a DRAM address in decoded format. + * + * @param address Physical address the error occurred at + * + */ +static void __bdk_dram_report_address_decode(uint64_t address, char *buffer, int len) +{ + int node, lmc, dimm, prank, lrank, bank, row, col; + + bdk_dram_address_extract_info(address, &node, &lmc, &dimm, &prank, &lrank, &bank, &row, &col); + + snprintf(buffer, len, "[0x%011lx] (N%d,LMC%d,DIMM%d,Rank%d/%d,Bank%02d,Row 0x%05x,Col 0x%04x)", + address, node, lmc, dimm, prank, lrank, bank, row, col); +} + +/** + * Report a DRAM address in a new decoded format. + * + * @param address Physical address the error occurred at + * @param xor XOR of data read vs expected data + * + */ +static void __bdk_dram_report_address_decode_new(uint64_t address, uint64_t orig_xor, char *buffer, int len) +{ + int node, lmc, dimm, prank, lrank, bank, row, col; + + int byte = 8; // means no byte-lanes in error, should not happen + uint64_t bits, print_bits = 0; + uint64_t xor = orig_xor; + + // find the byte-lane(s) with errors + for (int i = 0; i < 8; i++) { + bits = xor & 0xffULL; + xor >>= 8; + if (bits) { + if (byte != 8) { + byte = 9; // means more than 1 byte-lane was present + print_bits = orig_xor; // print the full original + break; // quit now + } else { + byte = i; // keep checking + print_bits = bits; + } + } + } + + bdk_dram_address_extract_info(address, &node, &lmc, &dimm, &prank, &lrank, &bank, &row, &col); + + snprintf(buffer, len, "N%d.LMC%d: CMP byte %d xor 0x%02lx (DIMM%d,Rank%d/%d,Bank%02d,Row 0x%05x,Col 0x%04x)[0x%011lx]", + node, lmc, byte, print_bits, dimm, prank, lrank, bank, row, col, address); +} + +/** + * Report a DRAM error. Errors are not shown after MAX_ERRORS_TO_REPORT is + * exceeded. Used when a single address is involved in the failure. + * + * @param address Physical address the error occurred at + * @param data Data read from memory + * @param correct Correct data + * @param burst Which burst this is from, informational only + * @param fails -1 for no retries done, >= 0 number of failures during retries + * + * @return Zero if a message was logged, non-zero if the error limit has been reached + */ +void __bdk_dram_report_error(uint64_t address, uint64_t data, uint64_t correct, int burst, int fails) +{ + char buffer[128]; + char failbuf[32]; + int64_t errors = bdk_atomic_fetch_and_add64(&dram_test_thread_errors, 1); + uint64_t xor = data ^ correct; + + if (errors < MAX_ERRORS_TO_REPORT) + { + if (fails < 0) { + snprintf(failbuf, sizeof(failbuf), " "); + } else { + int percent_x10 = fails * 1000 / RETRY_LIMIT; + snprintf(failbuf, sizeof(failbuf), ", retries failed %3d.%d%%", + percent_x10 / 10, percent_x10 % 10); + } + + __bdk_dram_report_address_decode_new(address, xor, buffer, sizeof(buffer)); + bdk_error("%s%s\n", buffer, failbuf); + + if (errors == MAX_ERRORS_TO_REPORT-1) + bdk_error("No further DRAM errors will be reported\n"); + } + return; +} + +/** + * Report a DRAM error. Errors are not shown after MAX_ERRORS_TO_REPORT is + * exceeded. Used when two addresses might be involved in the failure. + * + * @param address1 First address involved in the failure + * @param data1 Data from the first address + * @param address2 Second address involved in the failure + * @param data2 Data from second address + * @param burst Which burst this is from, informational only + * @param fails -1 for no retries done, >= 0 number of failures during retries + * + * @return Zero if a message was logged, non-zero if the error limit has been reached + */ +void __bdk_dram_report_error2(uint64_t address1, uint64_t data1, uint64_t address2, uint64_t data2, + int burst, int fails) +{ + int64_t errors = bdk_atomic_fetch_and_add64(&dram_test_thread_errors, 1); + if (errors < MAX_ERRORS_TO_REPORT) + { + char buffer1[80], buffer2[80]; + char failbuf[32]; + + if (fails < 0) { + snprintf(failbuf, sizeof(failbuf), " "); + } else { + snprintf(failbuf, sizeof(failbuf), ", retried %d failed %d", RETRY_LIMIT, fails); + } + __bdk_dram_report_address_decode(address1, buffer1, sizeof(buffer1)); + __bdk_dram_report_address_decode(address2, buffer2, sizeof(buffer2)); + + bdk_error("compare: data1: 0x%016lx, xor: 0x%016lx%s\n" + " %s\n %s\n", + data1, data1 ^ data2, failbuf, + buffer1, buffer2); + + if (errors == MAX_ERRORS_TO_REPORT-1) + bdk_error("No further DRAM errors will be reported\n"); + } + return; +} + +/* Report the circumstances of a failure and try re-reading the memory + * location to see if the error is transient or permanent. + * + * Note: re-reading requires using evicting addresses + */ +int __bdk_dram_retry_failure(int burst, uint64_t address, uint64_t data, uint64_t expected) +{ + int refail = 0; + + // bypass the retries if we are already over the limit... + if (bdk_atomic_get64(&dram_test_thread_errors) < MAX_ERRORS_TO_REPORT) { + + /* Try re-reading the memory location. A transient error may fail + * on one read and work on another. Keep on retrying even when a + * read succeeds. + */ + for (int i = 0; i < RETRY_LIMIT; i++) { + + __bdk_dram_flush_to_mem(address); + BDK_DCACHE_INVALIDATE; + + uint64_t new = __bdk_dram_read64(address); + + if (new != expected) { + refail++; + } + } + } else + refail = -1; + + // this will increment the errors always, but maybe not print... + __bdk_dram_report_error(address, data, expected, burst, refail); + + return 1; +} + +/** + * retry_failure2 + * + * @param burst + * @param address1 + * @param address2 + */ +int __bdk_dram_retry_failure2(int burst, uint64_t address1, uint64_t data1, uint64_t address2, uint64_t data2) +{ + int refail = 0; + + // bypass the retries if we are already over the limit... + if (bdk_atomic_get64(&dram_test_thread_errors) < MAX_ERRORS_TO_REPORT) { + + for (int i = 0; i < RETRY_LIMIT; i++) { + __bdk_dram_flush_to_mem(address1); + __bdk_dram_flush_to_mem(address2); + BDK_DCACHE_INVALIDATE; + + uint64_t d1 = __bdk_dram_read64(address1); + uint64_t d2 = __bdk_dram_read64(address2); + + if (d1 != d2) { + refail++; + } + } + } else + refail = -1; + + // this will increment the errors always, but maybe not print... + __bdk_dram_report_error2(address1, data1, address2, data2, burst, refail); + + return 1; +} + +/** + * Inject a DRAM error at a specific address in memory. The injection can either + * be a single bit inside the byte, or a double bit error in the ECC byte. Double + * bit errors may corrupt memory, causing software to crash. The corruption is + * written to memory and will continue to exist until the cache line is written + * again. After a call to this function, the BDK should report a ECC error. Double + * bit errors corrupt bits 0-1. + * + * @param address Physical address to corrupt. Any byte alignment is supported + * @param bit Bit to corrupt in the byte (0-7), or -1 to create a double bit fault in the ECC + * byte. + */ +void bdk_dram_test_inject_error(uint64_t address, int bit) +{ + uint64_t aligned_address = address & -16; + int corrupt_bit = -1; + if (bit >= 0) + corrupt_bit = (address & 0xf) * 8 + bit; + + /* Extract the DRAM controller information */ + int node, lmc, dimm, prank, lrank, bank, row, col; + bdk_dram_address_extract_info(address, &node, &lmc, &dimm, &prank, &lrank, &bank, &row, &col); + + /* Read the current data */ + uint64_t data = __bdk_dram_read64(aligned_address); + + /* Program LMC to inject the error */ + if ((corrupt_bit >= 0) && (corrupt_bit < 64)) + BDK_CSR_WRITE(node, BDK_LMCX_CHAR_MASK0(lmc), 1ull << corrupt_bit); + else if (bit == -1) + BDK_CSR_WRITE(node, BDK_LMCX_CHAR_MASK0(lmc), 3); + else + BDK_CSR_WRITE(node, BDK_LMCX_CHAR_MASK0(lmc), 0); + if (corrupt_bit >= 64) + BDK_CSR_WRITE(node, BDK_LMCX_CHAR_MASK2(lmc), 1ull << (corrupt_bit - 64)); + else + BDK_CSR_WRITE(node, BDK_LMCX_CHAR_MASK2(lmc), 0); + BDK_CSR_MODIFY(c, node, BDK_LMCX_ECC_PARITY_TEST(lmc), + c.s.ecc_corrupt_idx = (address & 0x7f) >> 4; + c.s.ecc_corrupt_ena = 1); + BDK_CSR_READ(node, BDK_LMCX_ECC_PARITY_TEST(lmc)); + + /* Perform a write and push it to DRAM. This creates the error */ + __bdk_dram_write64(aligned_address, data); + __bdk_dram_flush_to_mem(aligned_address); + + /* Disable error injection */ + BDK_CSR_MODIFY(c, node, BDK_LMCX_ECC_PARITY_TEST(lmc), + c.s.ecc_corrupt_ena = 0); + BDK_CSR_READ(node, BDK_LMCX_ECC_PARITY_TEST(lmc)); + BDK_CSR_WRITE(node, BDK_LMCX_CHAR_MASK0(lmc), 0); + BDK_CSR_WRITE(node, BDK_LMCX_CHAR_MASK2(lmc), 0); + + /* Read back the data, which should now cause an error */ + printf("Loading the injected error address 0x%lx, node=%d, lmc=%d, dimm=%d, rank=%d/%d, bank=%d, row=%d, col=%d\n", + address, node, lmc, dimm, prank, lrank, bank, row, col); + __bdk_dram_read64(aligned_address); +} diff --git a/src/vendorcode/cavium/bdk/libbdk-driver/bdk-driver-rnm.c b/src/vendorcode/cavium/bdk/libbdk-driver/bdk-driver-rnm.c new file mode 100644 index 0000000000..8394ad8c5e --- /dev/null +++ b/src/vendorcode/cavium/bdk/libbdk-driver/bdk-driver-rnm.c @@ -0,0 +1,124 @@ +/***********************license start*********************************** +* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights +* reserved. +* +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are +* met: +* +* * Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* +* * Redistributions in binary form must reproduce the above +* copyright notice, this list of conditions and the following +* disclaimer in the documentation and/or other materials provided +* with the distribution. +* +* * Neither the name of Cavium Inc. nor the names of +* its contributors may be used to endorse or promote products +* derived from this software without specific prior written +* permission. +* +* This Software, including technical data, may be subject to U.S. export +* control laws, including the U.S. Export Administration Act and its +* associated regulations, and may be subject to export or import +* regulations in other countries. +* +* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS" +* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR +* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT +* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY +* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT +* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES +* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR +* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT, +* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK +* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU. +***********************license end**************************************/ +#include <bdk.h> +#include "libbdk-arch/bdk-csrs-pccpf.h" +#include "libbdk-arch/bdk-csrs-rnm.h" + +BDK_REQUIRE_DEFINE(RNM); + +/** + * Reads 8 bits of random data from Random number generator + * + * @return random data + */ +uint8_t bdk_rng_get_random8(void) +{ + + return bdk_read64_uint8(bdk_numa_get_address(bdk_numa_local(), BDK_RNM_RANDOM)); +} + +/** + * Reads 16 bits of random data from Random number generator + * + * @return random data + */ +uint16_t bdk_rng_get_random16(void) +{ + return bdk_read64_uint16(bdk_numa_get_address(bdk_numa_local(), BDK_RNM_RANDOM)); +} + +/** + * Reads 32 bits of random data from Random number generator + * + * @return random data + */ +uint32_t bdk_rng_get_random32(void) +{ + return bdk_read64_uint32(bdk_numa_get_address(bdk_numa_local(), BDK_RNM_RANDOM)); +} + +/** + * Reads 64 bits of random data from Random number generator + * + * @return random data + */ +uint64_t bdk_rng_get_random64(void) +{ + return bdk_read64_uint64(bdk_numa_get_address(bdk_numa_local(), BDK_RNM_RANDOM)); +} + +/** + * The RNM probe function + * + * @param device RNM to probe + * + * @return Zero on success, negative on failure + */ +static int probe(bdk_device_t *device) +{ + bdk_device_rename(device, "N%d.RNM%d", device->node, device->instance); + return 0; +} + +/** + * RNM init() function + * + * @param device RNM to initialize + * + * @return Zero on success, negative on failure + */ +static int init(bdk_device_t *device) +{ + BDK_BAR_MODIFY(c, device, BDK_RNM_CTL_STATUS, + c.s.ent_en = 1; + c.s.rng_en = 1); + /* Read back after enable so we know it is done. Needed on t88 pass 2.0 emulator */ + BDK_BAR_READ(device, BDK_RNM_CTL_STATUS); + /* Errata (RNM-22528) First consecutive reads to RNM_RANDOM return same + value. Before using the random entropy, read RNM_RANDOM at least once + and discard the data */ + bdk_rng_get_random64(); + return 0; +} + +bdk_driver_t __bdk_driver_rnm = { + .id = (BDK_PCC_PROD_E_GEN << 24) | BDK_PCC_VENDOR_E_CAVIUM | (BDK_PCC_DEV_IDL_E_RNM << 16), + .probe = probe, + .init = init, +}; diff --git a/src/vendorcode/cavium/bdk/libbdk-hal/bdk-clock.c b/src/vendorcode/cavium/bdk/libbdk-hal/bdk-clock.c new file mode 100644 index 0000000000..f81285dffd --- /dev/null +++ b/src/vendorcode/cavium/bdk/libbdk-hal/bdk-clock.c @@ -0,0 +1,221 @@ +/***********************license start*********************************** +* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights +* reserved. +* +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are +* met: +* +* * Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* +* * Redistributions in binary form must reproduce the above +* copyright notice, this list of conditions and the following +* disclaimer in the documentation and/or other materials provided +* with the distribution. +* +* * Neither the name of Cavium Inc. nor the names of +* its contributors may be used to endorse or promote products +* derived from this software without specific prior written +* permission. +* +* This Software, including technical data, may be subject to U.S. export +* control laws, including the U.S. Export Administration Act and its +* associated regulations, and may be subject to export or import +* regulations in other countries. +* +* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS" +* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR +* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT +* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY +* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT +* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES +* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR +* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT, +* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK +* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU. +***********************license end**************************************/ +#include <bdk.h> +#include "libbdk-arch/bdk-csrs-gti.h" +#include "libbdk-arch/bdk-csrs-ocx.h" + +/** + * Called in __bdk_init to setup the global timer + */ +void bdk_clock_setup(bdk_node_t node) +{ + const bdk_node_t local_node = bdk_numa_local(); + + /* Check if the counter was already setup */ + BDK_CSR_INIT(cntcr, node, BDK_GTI_CC_CNTCR); + if (cntcr.s.en) + return; + + /* Configure GTI to tick at BDK_GTI_RATE */ + uint64_t sclk = bdk_clock_get_rate(node, BDK_CLOCK_SCLK); + uint64_t inc = (BDK_GTI_RATE << 32) / sclk; + BDK_CSR_WRITE(node, BDK_GTI_CC_CNTRATE, inc); + BDK_CSR_WRITE(node, BDK_GTI_CTL_CNTFRQ, BDK_GTI_RATE); + cntcr.s.en = 1; + if (node != local_node) + { + /* Synchronize with local node. Very simple set of counter, will be + off a little */ + BDK_CSR_WRITE(node, BDK_GTI_CC_CNTCV, bdk_clock_get_count(BDK_CLOCK_TIME)); + } + /* Enable the counter */ + BDK_CSR_WRITE(node, BDK_GTI_CC_CNTCR, cntcr.u); + BDK_CSR_READ(node, BDK_GTI_CC_CNTCR); + + if (node != local_node) + { + if (CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X)) + { + /* Assume the delay in each direction is the same, sync the counters */ + int64_t local1 = bdk_clock_get_count(BDK_CLOCK_TIME); + int64_t remote = BDK_CSR_READ(node, BDK_GTI_CC_CNTCV); + int64_t local2 = bdk_clock_get_count(BDK_CLOCK_TIME); + int64_t expected = (local1 + local2) / 2; + BDK_CSR_WRITE(node, BDK_GTI_CC_CNTADD, expected - remote); + BDK_TRACE(INIT, "N%d.GTI: Clock synchronization with master\n" + " expected: %ld, remote %ld\n" + " Counter correction: %ld\n", + node, expected, remote, expected - remote); + } + else + { + /* Due to errata TBD, we need to use OCX_PP_CMD to write + GTI_CC_CNTMB in order for timestamps to update. These constants + are the addresses we need for both local and remote GTI_CC_CNTMB */ + const uint64_t LOCAL_GTI_CC_CNTMB = bdk_numa_get_address(local_node, BDK_GTI_CC_CNTMB); + const uint64_t REMOTE_GTI_CC_CNTMB = bdk_numa_get_address(node, BDK_GTI_CC_CNTMB); + /* Build partial OCX_PP_CMD command used for writes. Address will + be filled later */ + BDK_CSR_DEFINE(pp_cmd, BDK_OCX_PP_CMD); + pp_cmd.u = 0; + pp_cmd.s.wr_mask = 0xff; + + const int NUM_AVERAGE = 16; /* Choose a power of two to avoid division */ + int64_t local_to_remote_sum = 0; + int64_t local_to_remote_min = 1000000; + int64_t local_to_remote_max = -1000000; + int64_t remote_to_local_sum = 0; + int64_t remote_to_local_min = 1000000; + int64_t remote_to_local_max = -1000000; + for (int loop = 0; loop < NUM_AVERAGE; loop++) + { + /* Perform a write to the remote GTI_CC_CNTMB to cause timestamp + update. We don't care about the value actually written */ + pp_cmd.s.addr = REMOTE_GTI_CC_CNTMB; + BDK_CSR_WRITE(local_node, BDK_OCX_PP_CMD, pp_cmd.u); + BDK_CSR_READ(local_node, BDK_OCX_PP_CMD); + + int64_t remote = BDK_CSR_READ(node, BDK_GTI_CC_CNTMBTS); + int64_t local = BDK_CSR_READ(local_node, BDK_GTI_CC_CNTMBTS); + int64_t delta = remote - local; + + local_to_remote_sum += delta; + if (delta < local_to_remote_min) + local_to_remote_min = delta; + if (delta > local_to_remote_max) + local_to_remote_max = delta; + + /* Perform a write to the local GTI_CC_CNTMB to cause timestamp + update. We don't care about the value actually written */ + pp_cmd.s.addr = LOCAL_GTI_CC_CNTMB; + BDK_CSR_WRITE(node, BDK_OCX_PP_CMD, pp_cmd.u); + BDK_CSR_READ(node, BDK_OCX_PP_CMD); + + remote = BDK_CSR_READ(node, BDK_GTI_CC_CNTMBTS); + local = BDK_CSR_READ(local_node, BDK_GTI_CC_CNTMBTS); + delta = local - remote; + + remote_to_local_sum += delta; + if (delta < remote_to_local_min) + remote_to_local_min = delta; + if (delta > remote_to_local_max) + remote_to_local_max = delta; + } + /* Calculate average, rounding to nearest */ + int64_t local_to_remote = (local_to_remote_sum + NUM_AVERAGE/2) / NUM_AVERAGE; + int64_t remote_to_local = (remote_to_local_sum + NUM_AVERAGE/2) / NUM_AVERAGE; + /* Calculate remote node offset */ + int64_t remote_offset = (remote_to_local - local_to_remote) / 2; + BDK_CSR_WRITE(node, BDK_GTI_CC_CNTADD, remote_offset); + BDK_TRACE(INIT, "N%d.GTI: Clock synchronization with master\n" + " local -> remote: min %ld, avg %ld, max %ld\n" + " remote -> local: min %ld, avg %ld, max %ld\n" + " Counter correction: %ld\n", + node, + local_to_remote_min, local_to_remote, local_to_remote_max, + remote_to_local_min, remote_to_local, remote_to_local_max, + remote_offset); + } + } +} + +/** + * Get cycle count based on the clock type. + * + * @param clock - Enumeration of the clock type. + * @return - Get the number of cycles executed so far. + */ +uint64_t __bdk_clock_get_count_slow(bdk_clock_t clock) +{ + bdk_node_t node = bdk_numa_local(); + BDK_CSR_INIT(rst_boot, node, BDK_RST_BOOT); + if (bdk_is_platform(BDK_PLATFORM_EMULATOR)) + { + /* Force RCLK and SCLK to be 1GHz on emulator */ + rst_boot.s.c_mul = 20; + rst_boot.s.pnr_mul = 20; + } + uint64_t ref_cntr = BDK_CSR_READ(node, BDK_RST_REF_CNTR); + switch(clock) + { + case BDK_CLOCK_TIME: + return 0; /* Handled in fast path */ + case BDK_CLOCK_MAIN_REF: + return ref_cntr; + case BDK_CLOCK_RCLK: + return ref_cntr * rst_boot.s.c_mul; + case BDK_CLOCK_SCLK: + return ref_cntr * rst_boot.s.pnr_mul; + } + return 0; +} + +/** + * Get clock rate based on the clock type. + * + * @param node Node to use in a Numa setup. Can be an exact ID or a special value. + * @param clock - Enumeration of the clock type. + * @return - return the clock rate. + */ +uint64_t __bdk_clock_get_rate_slow(bdk_node_t node, bdk_clock_t clock) +{ + /* This is currently defined to be 50Mhz */ + const uint64_t REF_CLOCK = 50000000; + + BDK_CSR_INIT(mio_rst_boot, node, BDK_RST_BOOT); + if (bdk_is_platform(BDK_PLATFORM_EMULATOR)) + { + /* Force RCLK and SCLK to be 1GHz on emulator */ + mio_rst_boot.s.c_mul = 20; + mio_rst_boot.s.pnr_mul = 20; + } + switch (clock) + { + case BDK_CLOCK_TIME: + return BDK_GTI_RATE; /* Programed as part of setup */ + case BDK_CLOCK_MAIN_REF: + return REF_CLOCK; + case BDK_CLOCK_RCLK: + return REF_CLOCK * mio_rst_boot.s.c_mul; + case BDK_CLOCK_SCLK: + return REF_CLOCK * mio_rst_boot.s.pnr_mul; + } + return 0; +} + diff --git a/src/vendorcode/cavium/bdk/libbdk-hal/bdk-config.c b/src/vendorcode/cavium/bdk/libbdk-hal/bdk-config.c new file mode 100644 index 0000000000..d4b412d439 --- /dev/null +++ b/src/vendorcode/cavium/bdk/libbdk-hal/bdk-config.c @@ -0,0 +1,1946 @@ +/***********************license start*********************************** +* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights +* reserved. +* +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are +* met: +* +* * Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* +* * Redistributions in binary form must reproduce the above +* copyright notice, this list of conditions and the following +* disclaimer in the documentation and/or other materials provided +* with the distribution. +* +* * Neither the name of Cavium Inc. nor the names of +* its contributors may be used to endorse or promote products +* derived from this software without specific prior written +* permission. +* +* This Software, including technical data, may be subject to U.S. export +* control laws, including the U.S. Export Administration Act and its +* associated regulations, and may be subject to export or import +* regulations in other countries. +* +* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS" +* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR +* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT +* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY +* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT +* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES +* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR +* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT, +* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK +* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU. +***********************license end**************************************/ +#include <bdk.h> +#include <stdarg.h> +#include <libfdt.h> +#include <unistd.h> +#include "libbdk-arch/bdk-csrs-mio_fus.h" +#include "libbdk-arch/bdk-csrs-fus.h" + +/* Set this define to override the trace the BDK uses. This is most + useful with trusted boot when the setup menus are not able to + configure the trace level. A possible example: */ +//#define BDK_TRACE_OVERRIDE (1ull << BDK_TRACE_ENABLE_INIT) +#define BDK_TRACE_OVERRIDE 0 + +typedef enum +{ + BDK_CONFIG_TYPE_INT, + BDK_CONFIG_TYPE_STR, + BDK_CONFIG_TYPE_STR_LIST, + BDK_CONFIG_TYPE_BINARY, +} bdk_config_type_t; + +typedef struct +{ + const char *format; /* Printf style format string to create the item name */ + const bdk_config_type_t ctype;/* Type of this item */ + int64_t default_value; /* Default value when no present. String defaults are cast to pointers from this */ + const int64_t min_value;/* Minimum valid value for INT parameters. Unused for Strings */ + const int64_t max_value;/* Maximum valid value for INT parameters. Unused for Strings */ +} bdk_config_info_t; + +static void config_set_defaults(void); + +/* Tracing defaults to the level specified here before config files are loaded */ +uint64_t bdk_trace_enables = BDK_TRACE_OVERRIDE; + +/* Global variables that contain the config inside a FDT */ +static void *config_fdt; +static int config_node; + +static bdk_config_info_t config_info[__BDK_CONFIG_END] = { + /* Board manufacturing data */ + [BDK_CONFIG_BOARD_MODEL] = { + .format = "BOARD-MODEL", /* String, No parameters */ + .ctype = BDK_CONFIG_TYPE_STR, + .default_value = (long)"unknown", + }, + [BDK_CONFIG_BOARD_REVISION] = { + .format = "BOARD-REVISION", /* String, No parameters */ + .ctype = BDK_CONFIG_TYPE_STR, + .default_value = (long)"unknown", + }, + [BDK_CONFIG_BOARD_SERIAL] = { + .format = "BOARD-SERIAL", /* String, No parameters */ + .ctype = BDK_CONFIG_TYPE_STR, + .default_value = (long)"unknown", + }, + [BDK_CONFIG_MAC_ADDRESS] = { + .format = "BOARD-MAC-ADDRESS", /* Int64, No parameters */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = 0, /* Default updated at boot based on fuses */ + .min_value = 0, + .max_value = 0xffffffffffffll, + }, + [BDK_CONFIG_MAC_ADDRESS_NUM] = { + .format = "BOARD-MAC-ADDRESS-NUM", /* Int, No parameters */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = 0, + .min_value = 0, + .max_value = 256, + }, + [BDK_CONFIG_MAC_ADDRESS_NUM_OVERRIDE] = { + .format = "BOARD-MAC-ADDRESS-NUM-OVERRIDE", /* Int, No parameters */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = -1, + .min_value = -1, + .max_value = 256, + }, + + /* Board generic */ + [BDK_CONFIG_BMC_TWSI] = { + .format = "BMC-TWSI", /* No parameters */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = -1, /* TWSI bus number, -1 = disabled */ + .min_value = -1, + .max_value = 5, + }, + [BDK_CONFIG_WATCHDOG_TIMEOUT] = { + .format = "WATCHDOG-TIMEOUT", /* No parameters */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = 0, /* 0 = disabled */ + .min_value = 0, + .max_value = 10000, + }, + [BDK_CONFIG_TWSI_WRITE] = { + .format = "TWSI-WRITE", /* No parameters */ + .ctype = BDK_CONFIG_TYPE_STR_LIST, + }, + [BDK_CONFIG_MDIO_WRITE] = { + .format = "MDIO-WRITE", /* No parameters */ + .ctype = BDK_CONFIG_TYPE_STR_LIST, + }, + + /* Board wiring of network ports and PHYs */ + [BDK_CONFIG_PHY_ADDRESS] = { + .format = "PHY-ADDRESS.N%d.BGX%d.P%d", /* Parameters: Node, Interface, Port */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = -1, /* Default to no PHY */ + .min_value = -1, + .max_value = 0xffffffffll, + }, + [BDK_CONFIG_BGX_ENABLE] = { + .format = "BGX-ENABLE.N%d.BGX%d.P%d", /* Parameters: Node, BGX, Port */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = 1, /* 0 = disable, 1 = enable */ + .min_value = 0, + .max_value = 1, + }, + /* Non-EBB specific SFF8104 board and alike */ + [BDK_CONFIG_AQUANTIA_PHY] = { + .format = "AQUANTIA-PHY.N%d.BGX%d.P%d", /*Parameters: Node, BGX, Port */ + .default_value = 0, + .min_value = 0, + .max_value = 0xffffll, + }, + + + /* BDK Configuration params */ + [BDK_CONFIG_VERSION] = { + .format = "BDK-VERSION", + .ctype = BDK_CONFIG_TYPE_STR, + }, + [BDK_CONFIG_NUM_PACKET_BUFFERS] = { + .format = "BDK-NUM-PACKET-BUFFERS", + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = 0, /* Default updated at boot */ + .min_value = 0, + .max_value = 1000000, + }, + [BDK_CONFIG_PACKET_BUFFER_SIZE] = { + .format = "BDK-PACKET-BUFFER-SIZE", + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = 1024, /* bytes */ + .min_value = 128, + .max_value = 32768, + }, + [BDK_CONFIG_SHOW_LINK_STATUS] = { + .format = "BDK-SHOW-LINK-STATUS", + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = 1, /* 0 = off, 1 = on */ + .min_value = 0, + .max_value = 1, + }, + [BDK_CONFIG_COREMASK] = { + .format = "BDK-COREMASK", + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = 0, /* Zero means all cores */ + .min_value = 0, + .max_value = 0xffffffffffffll, + }, + [BDK_CONFIG_BOOT_MENU_TIMEOUT] = { + .format = "BDK-BOOT-MENU-TIMEOUT", + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = 10, /* seconds */ + .min_value = 0, + .max_value = 300, + }, + [BDK_CONFIG_BOOT_PATH_OPTION] = { + .format = "BDK-BOOT-PATH-OPTION", + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = 0, /* 0 = normal, 1 = diagnostics */ + .min_value = 0, + .max_value = 1, + }, + [BDK_CONFIG_BOOT_NEXT_STAGE] = { + .format = "BDK-CONFIG-BOOT-NEXT-STAGE-%s", + .ctype = BDK_CONFIG_TYPE_STR, + }, + [BDK_CONFIG_TRACE] = { + .format = "BDK-CONFIG-TRACE", + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = 0, /* bitmask */ + .min_value = 0, + .max_value = 0x7fffffffffffffffull, + }, + + /* Chip feature items */ + [BDK_CONFIG_MULTI_NODE] = { + .format = "MULTI-NODE", /* No parameters */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = 2, /* 2 = Auto */ + .min_value = 0, + .max_value = 2, + }, + [BDK_CONFIG_PCIE_EA] = { + .format = "PCIE-ENHANCED-ALLOCATION", /* No parameters */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = 1, /* 1 = EA supported, 0 = EA not supported */ + .min_value = 0, + .max_value = 1, + }, + [BDK_CONFIG_PCIE_ORDERING] = { + .format = "PCIE-ORDERING", /* No parameters */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = 0, /* 1 = Wait for commit, 0 = Don't wait for commit */ + .min_value = 0, + .max_value = 1, + }, + [BDK_CONFIG_PCIE_PRESET_REQUEST_VECTOR] = { + .format = "PCIE-PRESET-REQUEST-VECTOR.N%d.PORT%d", /* Parameters: Node, Port */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = 0x593, /* Value for PCIERCX_CFG554[PRV] */ + .min_value = 0, + .max_value = 0xffff, + }, + [BDK_CONFIG_PCIE_WIDTH] = { + .format = "PCIE-WIDTH.N%d.PORT%d", /* Parameters: Node, Port */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = -1, /* Width override for PCIe links */ + .min_value = -1, + .max_value = 16, + }, + [BDK_CONFIG_PCIE_PHYSICAL_SLOT] = { + .format = "PCIE-PHYSICAL-SLOT.N%d.PORT%d", /* Parameters: Node, Port */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = -1, /* Define which physical slot we connect to on the board */ + .min_value = -1, + .max_value = 8191, + }, + [BDK_CONFIG_PCIE_FLASH] = { + .format = "PCIE-FLASH.N%d.PORT%d", /* Parameters: Node, Port */ + .ctype = BDK_CONFIG_TYPE_STR_LIST, + }, + [BDK_CONFIG_CCPI_LANE_REVERSE] = { + .format = "CCPI-LANE-REVERSE", /* No parameters */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = 0, /* 0 = No forced lane reversal, 1 = forced lane reversal */ + .min_value = 0, + .max_value = 1, + }, + [BDK_CONFIG_CHIP_SKU] = { + .format = "CHIP-SKU.NODE%d", /* Parameter: Node */ + .ctype = BDK_CONFIG_TYPE_STR, + .default_value = (long)"TBD", + }, + [BDK_CONFIG_CHIP_SERIAL] = { + .format = "CHIP-SERIAL.NODE%d", /* Parameter: Node */ + .ctype = BDK_CONFIG_TYPE_STR, + .default_value = (long)"TBD", + }, + [BDK_CONFIG_CHIP_UNIQUE_ID] = { + .format = "CHIP-UNIQUE-ID.NODE%d", /* Parameter: Node */ + .ctype = BDK_CONFIG_TYPE_STR, + .default_value = (long)"TBD", + }, + + /* QLM related config */ + [BDK_CONFIG_QLM_AUTO_CONFIG] = { + .format = "QLM-AUTO-CONFIG", /* Parameters: Node */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = 0, /* 0 = off, 1 = on */ + .min_value = 0, + .max_value = 1, + }, + /* SFF8104 related QLM config */ + [BDK_CONFIG_QLM_DIP_AUTO_CONFIG] = { + .format = "QLM-DIP-AUTO-CONFIG", /* Parameters: Node */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = 0, /* 0 = off, 1 = on */ + .min_value = 0, + .max_value = 1, + }, + + [BDK_CONFIG_QLM_MODE] = { + .format = "QLM-MODE.N%d.QLM%d", /* Parameters: Node, QLM */ + .ctype = BDK_CONFIG_TYPE_STR, + }, + [BDK_CONFIG_QLM_FREQ] = { + .format = "QLM-FREQ.N%d.QLM%d", /* Parameters: Node, QLM */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = 0, /* Mhz */ + .min_value = 0, + .max_value = 10312, + }, + [BDK_CONFIG_QLM_CLK] = { + .format = "QLM-CLK.N%d.QLM%d", /* Parameters: Node, QLM */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = 2, /* 2 = External */ + .min_value = 0, + .max_value = 2, + }, + [BDK_CONFIG_QLM_TUNING_TX_SWING] = { + .format = "QLM-TUNING-TX-SWING.N%d.QLM%d.LANE%d", /* Parameters: Node, QLM, Lane */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = -1, /* Default of no tuning */ + .min_value = -1, + .max_value = 31, + }, + [BDK_CONFIG_QLM_TUNING_TX_PREMPTAP] = { + .format = "QLM-TUNING-TX-PREMPTAP.N%d.QLM%d.LANE%d", /* Parameters: Node, QLM, Lane */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = -1, /* Default of no tuning */ + .min_value = -1, + .max_value = 511, + }, + [BDK_CONFIG_QLM_TUNING_TX_GAIN] = { + .format = "QLM-TUNING-TX-GAIN.N%d.QLM%d.LANE%d", /* Parameters: Node, QLM, Lane */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = -1, /* Default of no tuning */ + .min_value = -1, + .max_value = 7, + }, + [BDK_CONFIG_QLM_TUNING_TX_VBOOST] = { + .format = "QLM-TUNING-TX-VBOOST.N%d.QLM%d.LANE%d", /* Parameters: Node, QLM, Lane */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = -1, /* Default of no tuning */ + .min_value = -1, + .max_value = 1, + }, + [BDK_CONFIG_QLM_CHANNEL_LOSS] = { + .format = "QLM-CHANNEL-LOSS.N%d.QLM%d", /* Parameters: Node, QLM */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = -1, /* Default will use Cavium defaults */ + .min_value = -1, + .max_value = 40, + }, + + /* DRAM configuration options */ + [BDK_CONFIG_DDR_SPEED] = { + .format = "DDR-SPEED.N%d", /* Parameters: Node */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = 0, /* In MT/s */ + .min_value = 0, + .max_value = 2400, + }, + [BDK_CONFIG_DDR_ALT_REFCLK] = { + .format = "DDR-ALT-REFCLK.N%d", /* Parameters: Node */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = 0, /* Mhz */ + .min_value = 0, + .max_value = 100, + }, + [BDK_CONFIG_DDR_SPD_ADDR] = { + .format = "DDR-CONFIG-SPD-ADDR.DIMM%d.LMC%d.N%d", /* Parameters: DIMM, LMC, Node */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = 0, + .min_value = 0, + .max_value = 0xffff, + }, + [BDK_CONFIG_DDR_SPD_DATA] = { + .format = "DDR-CONFIG-SPD-DATA.DIMM%d.LMC%d.N%d", /* Parameters: DIMM, LMC, Node */ + .ctype = BDK_CONFIG_TYPE_BINARY, + }, + [BDK_CONFIG_DDR_RANKS_DQX_CTL] = { + .format = "DDR-CONFIG-DQX-CTL.RANKS%d.DIMMS%d.LMC%d.N%d", /* Parameters: Num Ranks, Num DIMMs, LMC, Node */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = 0, + .min_value = 0, + .max_value = 0xf, + }, + [BDK_CONFIG_DDR_RANKS_WODT_MASK] = { + .format = "DDR-CONFIG-WODT-MASK.RANKS%d.DIMMS%d.LMC%d.N%d", /* Parameters: Num Ranks, Num DIMMs, LMC, Node */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = 0, + .min_value = 0, + .max_value = 0xfffffff, + }, + [BDK_CONFIG_DDR_RANKS_MODE1_PASR] = { + .format = "DDR-CONFIG-MODE1-PASR.RANKS%d.DIMMS%d.RANK%d.LMC%d.N%d", /* Parameters: Num Ranks, Num DIMMs, Rank, LMC, Node */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = 0, + .min_value = 0, + .max_value = 0x7, + }, + [BDK_CONFIG_DDR_RANKS_MODE1_ASR] = { + .format = "DDR-CONFIG-MODE1-ASR.RANKS%d.DIMMS%d.RANK%d.LMC%d.N%d", /* Parameters: Num Ranks, Num DIMMs, Rank, LMC, Node */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = 0, + .min_value = 0, + .max_value = 1, + }, + [BDK_CONFIG_DDR_RANKS_MODE1_SRT] = { + .format = "DDR-CONFIG-MODE1-SRT.RANKS%d.DIMMS%d.RANK%d.LMC%d.N%d", /* Parameters: Num Ranks, Num DIMMs, Rank, LMC, Node */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = 0, + .min_value = 0, + .max_value = 1, + }, + [BDK_CONFIG_DDR_RANKS_MODE1_RTT_WR] = { + .format = "DDR-CONFIG-MODE1-RTT-WR.RANKS%d.DIMMS%d.RANK%d.LMC%d.N%d", /* Parameters: Num Ranks, Num DIMMs, Rank, LMC, Node */ + .ctype = BDK_CONFIG_TYPE_INT, // Split for extension bit + .default_value = 0, + .min_value = 0, + .max_value = 0x7, + }, + [BDK_CONFIG_DDR_RANKS_MODE1_DIC] = { + .format = "DDR-CONFIG-MODE1-DIC.RANKS%d.DIMMS%d.RANK%d.LMC%d.N%d", /* Parameters: Num Ranks, Num DIMMs, Rank, LMC, Node */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = 0, + .min_value = 0, + .max_value = 0x3, + }, + [BDK_CONFIG_DDR_RANKS_MODE1_RTT_NOM] = { + .format = "DDR-CONFIG-MODE1-RTT-NOM.RANKS%d.DIMMS%d.RANK%d.LMC%d.N%d", /* Parameters: Num Ranks, Num DIMMs, Rank, LMC, Node */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = 0, + .min_value = 0, + .max_value = 0x7, + }, + [BDK_CONFIG_DDR_RANKS_MODE1_DB_OUTPUT_IMPEDANCE] = { + .format = "DDR-CONFIG-MODE1-DB-OUTPUT-IMPEDANCE.RANKS%d.DIMMS%d.LMC%d.N%d", /* Parameters: Num Ranks, Num DIMMs, LMC, Node */ + .ctype = BDK_CONFIG_TYPE_INT, // Not per RANK, only one + .default_value = 0, + .min_value = 0, + .max_value = 0x7, + }, + [BDK_CONFIG_DDR_RANKS_MODE2_RTT_PARK] = { + .format = "DDR-CONFIG-MODE2-RTT-PARK.RANKS%d.DIMMS%d.RANK%d.LMC%d.N%d", /* Parameters: Num Ranks, Num DIMMs, Rank, LMC, Node */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = 0, + .min_value = 0, + .max_value = 0x7, + }, + [BDK_CONFIG_DDR_RANKS_MODE2_VREF_VALUE] = { + .format = "DDR-CONFIG-MODE2-VREF-VALUE.RANKS%d.DIMMS%d.RANK%d.LMC%d.N%d", /* Parameters: Num Ranks, Num DIMMs, Rank, LMC, Node */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = 0, + .min_value = 0, + .max_value = 0x3f, + }, + [BDK_CONFIG_DDR_RANKS_MODE2_VREF_RANGE] = { + .format = "DDR-CONFIG-MODE2-VREF-RANGE.RANKS%d.DIMMS%d.RANK%d.LMC%d.N%d", /* Parameters: Num Ranks, Num DIMMs, Rank, LMC, Node */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = 0, + .min_value = 0, + .max_value = 1, + }, + [BDK_CONFIG_DDR_RANKS_MODE2_VREFDQ_TRAIN_EN] = { + .format = "DDR-CONFIG-MODE2-VREFDQ-TRAIN-EN.RANKS%d.DIMMS%d.LMC%d.N%d", /* Parameters: Num Ranks, Num DIMMs, LMC, Node */ + .ctype = BDK_CONFIG_TYPE_INT, // Not per RANK, only one + .default_value = 0, + .min_value = 0, + .max_value = 1, + }, + + [BDK_CONFIG_DDR_RANKS_RODT_CTL] = { + .format = "DDR-CONFIG-RODT-CTL.RANKS%d.DIMMS%d.LMC%d.N%d", /* Parameters: Num Ranks, Num DIMMs, LMC, Node */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = 0, + .min_value = 0, + .max_value = 0xf, + }, + [BDK_CONFIG_DDR_RANKS_RODT_MASK] = { + .format = "DDR-CONFIG-RODT-MASK.RANKS%d.DIMMS%d.LMC%d.N%d", /* Parameters: Num Ranks, Num DIMMs, LMC, Node */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = 0, + .min_value = 0, + .max_value = 0xfffffff, + }, + [BDK_CONFIG_DDR_CUSTOM_MIN_RTT_NOM_IDX] = { + .format = "DDR-CONFIG-CUSTOM-MIN-RTT-NOM-IDX.LMC%d.N%d", /* Parameters: LMC, Node */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = 1, + .min_value = 0, + .max_value = 7, + }, + [BDK_CONFIG_DDR_CUSTOM_MAX_RTT_NOM_IDX] = { + .format = "DDR-CONFIG-CUSTOM-MAX-RTT-NOM-IDX.LMC%d.N%d", /* Parameters: LMC, Node */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = 5, + .min_value = 0, + .max_value = 7, + }, + [BDK_CONFIG_DDR_CUSTOM_MIN_RODT_CTL] = { + .format = "DDR-CONFIG-CUSTOM-MIN-RODT-CTL.LMC%d.N%d", /* Parameters: LMC, Node */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = 1, + .min_value = 0, + .max_value = 7, + }, + [BDK_CONFIG_DDR_CUSTOM_MAX_RODT_CTL] = { + .format = "DDR-CONFIG-CUSTOM-MAX-RODT-CTL.LMC%d.N%d", /* Parameters: LMC, Node */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = 5, + .min_value = 0, + .max_value = 7, + }, + [BDK_CONFIG_DDR_CUSTOM_CK_CTL] = { + .format = "DDR-CONFIG-CUSTOM-CK-CTL.LMC%d.N%d", /* Parameters: LMC, Node */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = 0, + .min_value = 0, + .max_value = 0xffff, + }, + [BDK_CONFIG_DDR_CUSTOM_CMD_CTL] = { + .format = "DDR-CONFIG-CUSTOM-CMD-CTL.LMC%d.N%d", /* Parameters: LMC, Node */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = 0, + .min_value = 0, + .max_value = 0xffff, + }, + [BDK_CONFIG_DDR_CUSTOM_CTL_CTL] = { + .format = "DDR-CONFIG-CUSTOM-CTL-CTL.LMC%d.N%d", /* Parameters: LMC, Node */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = 0, + .min_value = 0, + .max_value = 0xf, + }, + [BDK_CONFIG_DDR_CUSTOM_MIN_CAS_LATENCY] = { + .format = "DDR-CONFIG-CUSTOM-MIN-CAS-LATENCY.LMC%d.N%d", /* Parameters: LMC, Node */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = 0, + .min_value = 0, + .max_value = 0xffff, + }, + [BDK_CONFIG_DDR_CUSTOM_OFFSET_EN] = { + .format = "DDR-CONFIG-CUSTOM-OFFSET-EN.LMC%d.N%d", /* Parameters: LMC, Node */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = 1, + .min_value = 0, + .max_value = 1, + }, + [BDK_CONFIG_DDR_CUSTOM_OFFSET] = { + .format = "DDR-CONFIG-CUSTOM-OFFSET.%s.LMC%d.N%d", /* Parameters: Type(UDIMM,RDIMM), LMC, Node */ + .ctype = BDK_CONFIG_TYPE_INT, // UDIMM or RDIMM + .default_value = 0, + .min_value = 0, + .max_value = 0xf, + }, + [BDK_CONFIG_DDR_CUSTOM_RLEVEL_COMPUTE] = { + .format = "DDR-CONFIG-CUSTOM-RLEVEL-COMPUTE.LMC%d.N%d", /* Parameters: LMC, Node */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = 0, + .min_value = 0, + .max_value = 1, + }, + [BDK_CONFIG_DDR_CUSTOM_RLEVEL_COMP_OFFSET] = { + .format = "DDR-CONFIG-CUSTOM-RLEVEL-COMP-OFFSET.%s.LMC%d.N%d", /* Parameters: Type(UDIMM,RDIMM), LMC, Node */ + .ctype = BDK_CONFIG_TYPE_INT, // UDIMM or RDIMM + .default_value = 2, + .min_value = 0, + .max_value = 0xffff, + }, + [BDK_CONFIG_DDR_CUSTOM_DDR2T] = { + .format = "DDR-CONFIG-CUSTOM-DDR2T.%s.LMC%d.N%d", /* Parameters: Type(UDIMM,RDIMM), LMC, Node */ + .ctype = BDK_CONFIG_TYPE_INT, // UDIMM or RDIMM + .default_value = 0, + .min_value = 0, + .max_value = 1, + }, + [BDK_CONFIG_DDR_CUSTOM_DISABLE_SEQUENTIAL_DELAY_CHECK] = { + .format = "DDR-CONFIG-CUSTOM-DISABLE-SEQUENTIAL-DELAY-CHECK.LMC%d.N%d", /* Parameters: LMC, Node */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = 0, + .min_value = 0, + .max_value = 1, + }, + [BDK_CONFIG_DDR_CUSTOM_MAXIMUM_ADJACENT_RLEVEL_DELAY_INCREMENT] = { + .format = "DDR-CONFIG-CUSTOM-MAXIMUM-ADJACENT-RLEVEL-DELAY-INCREMENT.LMC%d.N%d", /* Parameters: LMC, Node */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = 0, + .min_value = 0, + .max_value = 0xffff, + }, + [BDK_CONFIG_DDR_CUSTOM_PARITY] = { + .format = "DDR-CONFIG-CUSTOM-PARITY.LMC%d.N%d", /* Parameters: LMC, Node */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = 0, + .min_value = 0, + .max_value = 1, + }, + [BDK_CONFIG_DDR_CUSTOM_FPRCH2] = { + .format = "DDR-CONFIG-CUSTOM-FPRCH2.LMC%d.N%d", /* Parameters: LMC, Node */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = 0, + .min_value = 0, + .max_value = 0xf, + }, + [BDK_CONFIG_DDR_CUSTOM_MODE32B] = { + .format = "DDR-CONFIG-CUSTOM-MODE32B.LMC%d.N%d", /* Parameters: LMC, Node */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = 0, + .min_value = 0, + .max_value = 1, + }, + [BDK_CONFIG_DDR_CUSTOM_MEASURED_VREF] = { + .format = "DDR-CONFIG-CUSTOM-MEASURED-VREF.LMC%d.N%d", /* Parameters: LMC, Node */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = 0, + .min_value = 0, + .max_value = 1, + }, + [BDK_CONFIG_DDR_CUSTOM_DLL_WRITE_OFFSET] = { + .format = "DDR-CONFIG-CUSTOM-DLL-WRITE-OFFSET.BYTE%d.LMC%d.N%d", /* Parameters: Byte, LMC, Node */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = 0, + .min_value = -63, + .max_value = 63, + }, + [BDK_CONFIG_DDR_CUSTOM_DLL_READ_OFFSET] = { + .format = "DDR-CONFIG-CUSTOM-DLL-READ-OFFSET.BYTE%d.LMC%d.N%d", /* Parameters: Byte, LMC, Node */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = 0, + .min_value = -63, + .max_value = 63, + }, + + /* High level DRAM options */ + [BDK_CONFIG_DRAM_VERBOSE] = { + .format = "DDR-VERBOSE", /* Parameters: Node */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = 0, /* 0 = off */ + .min_value = 0, + .max_value = 255, + }, + [BDK_CONFIG_DRAM_BOOT_TEST] = { + .format = "DDR-TEST-BOOT", /* No parameters */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = 0, /* 0 = off, 1 = on */ + .min_value = 0, + .max_value = 1, + }, + [BDK_CONFIG_DRAM_CONFIG_GPIO] = { + .format = "DDR-CONFIG-GPIO", /* No parameters */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = -1, /* -1 = disabled, otherwise GPIO number */ + .min_value = -1, + .max_value = 63, + }, + [BDK_CONFIG_DRAM_SCRAMBLE] = { + .format = "DDR-CONFIG-SCRAMBLE", /* No parameters */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = 2, /* 0=off, 1=on, 2=trust on, non-trust off */ + .min_value = 0, + .max_value = 2, + }, + + /* USB */ + [BDK_CONFIG_USB_PWR_GPIO] = { + .format = "USB-PWR-GPIO.N%d.PORT%d", /* Parameters: Node, Port */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = -1, /* GPIO number, or -1 for none */ + .min_value = -1, + .max_value = 49, + }, + [BDK_CONFIG_USB_PWR_GPIO_POLARITY] = { + .format = "USB-PWR-GPIO-POLARITY.N%d.PORT%d", /* Parameters: Node, Port */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = 1, /* GPIO polarity: 1=high, 0=low */ + .min_value = 0, + .max_value = 1, + }, + [BDK_CONFIG_USB_REFCLK_SRC] = { + .format = "USB-REFCLK-SRC.N%d.PORT%d", /* Parameters: Node, Port */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = 0, /* Clock Source (SS:HS) + ** 0 - SS(USB_REF_CLK) HS(USB_REF_CLK) + ** 1 - SS(DLMC_REF_CLK0) HS(DLMC_REF_CLK0) + ** 2 - SS(DLMC_REF_CLK1) HS(DLMC_REF_CLK1) + ** 3 - SS(USB_REF_CLK) HS(PLL_REF_CLK) + ** 4 - SS(DLMC_REF_CLK0) HS(PLL_REF_CLK) + ** 5 - SS(DLMC_REF_CLK1) HS(PLL_REF_CLK) + */ + .min_value = 0, + .max_value = 5, + }, + + /* Nitrox reset - For CN88XX SC and SNT part. High drives Nitrox DC_OK high */ + [BDK_CONFIG_NITROX_GPIO] = { + .format = "NITROX-GPIO.N%d", /* Parameters: Node */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = -1, /* GPIO number, or -1 for none */ + .min_value = -1, + .max_value = 49, + }, + + /* How EYE diagrams are captured from a QLM */ + [BDK_CONFIG_EYE_ZEROS] = { + .format = "QLM-EYE-NUM-ZEROS", /* No parameters */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = 2, + .min_value = 1, + .max_value = 63, + }, + [BDK_CONFIG_EYE_SAMPLE_TIME] = { + .format = "QLM-EYE-SAMPLE-TIME", /* No parameters */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = 400, /* us */ + .min_value = 20, /* us */ + .max_value = 10000000, /* us */ + }, + [BDK_CONFIG_EYE_SETTLE_TIME] = { + .format = "QLM-EYE-SETTLE-TIME", /* No parameters */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = 50, /* us */ + .min_value = 20, /* us */ + .max_value = 100000, /* us */ + }, + + /* SGPIO */ + [BDK_CONFIG_SGPIO_SCLOCK_FREQ] = { + .format = "SGPIO-SCLOCK-FREQ.N%d", /* Parameters: Node */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = 10000, /* Hz */ + .min_value = 128, /* Hz */ + .max_value = 100000, /* Hz */ + }, + [BDK_CONFIG_SGPIO_PIN_POWER] = { + .format = "SGPIO-PIN-POWER.N%d", /* Parameters: Node */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = -1, /* GPIO number, or -1 for none */ + .min_value = -1, + .max_value = 50, + }, + [BDK_CONFIG_SGPIO_PIN_SCLOCK] = { + .format = "SGPIO-PIN-SCLOCK.N%d", /* Parameters: Node */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = -1, /* GPIO number, or -1 for none */ + .min_value = -1, + .max_value = 50, + }, + [BDK_CONFIG_SGPIO_PIN_SLOAD] = { + .format = "SGPIO-PIN-SLOAD.N%d", /* Parameters: Node */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = -1, /* GPIO number, or -1 for none */ + .min_value = -1, + .max_value = 50, + }, + [BDK_CONFIG_SGPIO_PIN_SDATAOUT] = { + .format = "SGPIO-PIN-SDATAOUT.N%d.D%d", /* Parameters: Node, Dataline */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = -1, /* GPIO number, or -1 for none */ + .min_value = -1, + .max_value = 50, + }, + + /* VRM temperature throttling */ + [BDK_CONFIG_VRM_TEMP_TRIP] = { + .format = "VRM-TEMP-TRIP.N%d", /* Parameters: Node */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = 110, /* Degrees C */ + .min_value = 0, /* Degrees C */ + .max_value = 110, /* Degrees C. Max die temp plus 5 for uncertainty of measurement */ + }, + [BDK_CONFIG_VRM_TEMP_HIGH] = { + .format = "VRM-TEMP-HIGH.N%d", /* Parameters: Node */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = 110, /* Degrees C */ + .min_value = 0, /* Degrees C */ + .max_value = 110, /* Degrees C. Max die temp plus 5 for uncertainty of measurement */ + }, + [BDK_CONFIG_VRM_TEMP_LOW] = { + .format = "VRM-TEMP-LOW.N%d", /* Parameters: Node */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = 100, /* Degrees C */ + .min_value = 0, /* Degrees C */ + .max_value = 110, /* Degrees C. Max die temp plus 5 for uncertainty of measurement */ + }, + [BDK_CONFIG_VRM_THROTTLE_NORMAL] = { + .format = "VRM-THROTTLE-NORMAL.N%d", /* Parameters: Node */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = 65, /* Percentage */ + .min_value = 1, /* Percentage */ + .max_value = 100, /* Percentage */ + }, + [BDK_CONFIG_VRM_THROTTLE_THERM] = { + .format = "VRM-THROTTLE-THERM.N%d", /* Parameters: Node */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = 15, /* Percentage */ + .min_value = 1, /* Percentage */ + .max_value = 100, /* Percentage */ + }, + + /* Generic GPIO, unrelated to a specific block */ + [BDK_CONFIG_GPIO_PIN_SELECT] = { + .format = "GPIO-PIN-SELECT-GPIO%d.N%d", /* Parameters: GPIO, Node */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = -1, /* Hardware default, normal GPIO pin */ + .min_value = 0, /* GPIO_PIN_SEL_E enumeration */ + .max_value = 65535, /* GPIO_PIN_SEL_E enumeration */ + }, + [BDK_CONFIG_GPIO_POLARITY] = { + .format = "GPIO-POLARITY-GPIO%d.N%d", /* Parameters: GPIO, Node */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = 0, /* Hardware default, not inverted */ + .min_value = 0, /* Not inverted */ + .max_value = 1, /* Inverted */ + }, + + /* PBUS */ + [BDK_CONFIG_PBUS_CFG] = { + .format = "PBUS-CFG.REGION%d.N%d", /* Parameters: Region, Node */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = 0, /* Hardware default */ + .min_value = 0, /* No change */ + .max_value = 0x0000ffffffffffffll, /* PBUS_REGX_CFG value */ + }, + [BDK_CONFIG_PBUS_TIM] = { + .format = "PBUS-TIM.REGION%d.N%d", /* Parameters: Region, Node */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = 0, /* Hardware default, not inverted */ + .min_value = 0x8000000000000000ll, /* PBUS_REGX_TIM value, zero is no change */ + .max_value = 0x7fffffffffffffffll, /* PBUS_REGX_TIM value */ + }, + + /* Trusted boot information */ + [BDK_CONFIG_TRUST_CSIB] = { + .format = "TRUST-CSIB", /* No parameters */ + .ctype = BDK_CONFIG_TYPE_BINARY, + .default_value = 0, /* Hardware default */ + }, + [BDK_CONFIG_TRUST_ROT_ADDR] = { + .format = "TRUST-ROT-ADDR", /* No parameters */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = 0, /* Non-trusted */ + .min_value = 0, /* No key */ + .max_value = 0x0000ffffffffffffll, /* Address in key memory */ + }, + [BDK_CONFIG_TRUST_BSSK_ADDR] = { + .format = "TRUST-BSSK-ADDR", /* No parameters */ + .ctype = BDK_CONFIG_TYPE_INT, + .default_value = 0, /* No HUK, so no BSSK */ + .min_value = 0, /* No HUK, so no BSSK */ + .max_value = 0x0000ffffffffffffll, /* Address in key memory */ + }, +}; + +/** + * Look up a configuration item in the environment. + * + * @param name + * + * @return + */ +static const char *get_value(const char *name, int *blob_size) +{ + if (!config_fdt) + { + bdk_error("bdk-config asked for %s before configuration loaded\n", name); + return NULL; + } + + char n[64]; + strncpy(n, name, sizeof(n)); + n[sizeof(n)-1] = '\0'; + + while (*n) + { + const char *val = fdt_getprop(config_fdt, config_node, n, blob_size); + if (val) + return val; + + char *p = strrchr(n, '.'); + if (p) + *p = '\0'; + else + break; + } + return NULL; +} + +/** + * Get an integer configuration item + * + * @param cfg_item Config item to get. If the item takes parameters (see bdk_config_t), then the + * parameters are listed following cfg_item. + * + * @return The value of the configuration item, or def_value if the item is not set + */ +int64_t bdk_config_get_int(bdk_config_t cfg_item, ...) +{ + /* Make sure the correct access function was called */ + if (config_info[cfg_item].ctype != BDK_CONFIG_TYPE_INT) + bdk_fatal("bdk_config_get_int() called for %s, not an int\n", + config_info[cfg_item].format); + + char name[64]; + va_list args; + va_start(args, cfg_item); + vsnprintf(name, sizeof(name)-1, config_info[cfg_item].format, args); + va_end(args); + + const char *val = get_value(name, NULL); + if (val) + { + int count; + int64_t tmp; + if ((val[0] == '0') && (val[1] == 'x')) + count = sscanf(val + 2, "%lx", &tmp); + else + count = sscanf(val, "%li", &tmp); + if (count == 1) + { + if ((tmp < config_info[cfg_item].min_value) || (tmp > config_info[cfg_item].max_value)) + { + bdk_warn("Out of range for %s = \"%s\", using default\n", name, val); + return config_info[cfg_item].default_value; + } + return tmp; + } + else + { + bdk_warn("Failed to parse %s = \"%s\", using default\n", name, val); + return config_info[cfg_item].default_value; + } + } + else + return config_info[cfg_item].default_value; +} + +/** + * Get a string configuration item + * + * @param cfg_item Config item to get. If the item takes parameters (see bdk_config_t), then the + * parameters are listed following cfg_item. + * + * @return The value of the configuration item, or def_value if the item is not set + */ +const char *bdk_config_get_str(bdk_config_t cfg_item, ...) +{ + /* Make sure the correct access function was called */ + if (config_info[cfg_item].ctype != BDK_CONFIG_TYPE_STR) + bdk_fatal("bdk_config_get_str() called for %s, not a str\n", + config_info[cfg_item].format); + + char name[64]; + va_list args; + va_start(args, cfg_item); + vsnprintf(name, sizeof(name)-1, config_info[cfg_item].format, args); + + if (BDK_CONFIG_QLM_MODE == cfg_item) + { + char name2[64]; + vsnprintf(name2, sizeof(name2)-1,"QLM-MODE.N%d.DLM%d" , args); + const char *val = get_value(name2, NULL); + if (val) + bdk_warn("%s: QLM-MODE.N%%d.DLM%%d format depricated. Please use QLM-MODE.N%%d.QLM%%d instead\n", name2); + + } + va_end(args); + + const char *val = get_value(name, NULL); + if (val) + return val; + else + return (const char *)config_info[cfg_item].default_value; +} + +/** + * Get a binary blob + * + * @param blob_size Integer to receive the size of the blob + * @param cfg_item Config item to get. If the item takes parameters (see bdk_config_t), then the + * parameters are listed following cfg_item. + * + * @return The value of the configuration item, or def_value if the item is not set + */ +const void* bdk_config_get_blob(int *blob_size, bdk_config_t cfg_item, ...) +{ + char name[64]; + va_list args; + va_start(args, cfg_item); + vsnprintf(name, sizeof(name)-1, config_info[cfg_item].format, args); + va_end(args); + + const void *val = get_value(name, blob_size); + if (val) + return val; + else + return (const void *)config_info[cfg_item].default_value; +} + +/** + * Set an integer configuration item. Note this only sets the item in memory, + * persistent storage is not updated. The optional parameters for the setting are + * not supplied, meaning this function only changes the global default. + * + * @param value Configuration item value + * @param cfg_item Config item to set. If the item takes parameters (see bdk_config_t), then the + * parameters are listed following cfg_item. + */ +void bdk_config_set_int_no_param(int64_t value, bdk_config_t cfg_item) +{ + /* Make sure the correct access function was called */ + if (config_info[cfg_item].ctype != BDK_CONFIG_TYPE_INT) + bdk_fatal("bdk_config_set_int_no_param() called for %s, not an int\n", + config_info[cfg_item].format); + + char name[64]; + char valstr[20]; + /* Create a name without the optional parameters */ + strncpy(name, config_info[cfg_item].format, sizeof(name) - 1); + name[sizeof(name) - 1] = 0; + char *ptr = strchr(name, '.'); + if (ptr) + *ptr = 0; + + if (!config_fdt) + { + bdk_error("bdk-config set %s before configuration loaded\n", name); + return; + } + if ((value < config_info[cfg_item].min_value) || (value > config_info[cfg_item].max_value)) + { + bdk_error("Set out of range for %s = \"0x%lx\", ignoring\n", name, value); + return; + } + + if (value < 10) + snprintf(valstr, sizeof(valstr), "%ld", value); + else + snprintf(valstr, sizeof(valstr), "0x%lx", value); + + int status = fdt_setprop_string(config_fdt, config_node, name, valstr); + if (status < 0) + bdk_fatal("Failed to set %s=%s in FDT\n", name, valstr); +} + +/** + * Set an integer configuration item. Note this only sets the item in memory, + * persistent storage is not updated. + * + * @param value Configuration item value + * @param cfg_item Config item to set. If the item takes parameters (see bdk_config_t), then the + * parameters are listed following cfg_item. + */ +void bdk_config_set_int(int64_t value, bdk_config_t cfg_item, ...) +{ + /* Make sure the correct access function was called */ + if (config_info[cfg_item].ctype != BDK_CONFIG_TYPE_INT) + bdk_fatal("bdk_config_set_int() called for %s, not an int\n", + config_info[cfg_item].format); + + char name[64]; + char valstr[20]; + va_list args; + va_start(args, cfg_item); + vsnprintf(name, sizeof(name)-1, config_info[cfg_item].format, args); + va_end(args); + + if (!config_fdt) + { + bdk_error("bdk-config set %s before configuration loaded\n", name); + return; + } + if ((value < config_info[cfg_item].min_value) || (value > config_info[cfg_item].max_value)) + { + bdk_error("Set out of range for %s = \"0x%lx\", ignoring\n", name, value); + return; + } + + if (value < 10) + snprintf(valstr, sizeof(valstr), "%ld", value); + else + snprintf(valstr, sizeof(valstr), "0x%lx", value); + + int status = fdt_setprop_string(config_fdt, config_node, name, valstr); + if (status < 0) + bdk_fatal("Failed to set %s=%s in FDT\n", name, valstr); +} + +/** + * Set an integer configuration item. Note this only sets the item in memory, + * persistent storage is not updated. + * + * @param value Configuration item value + * @param cfg_item Config item to set. If the item takes parameters (see bdk_config_t), then the + * parameters are listed following cfg_item. + */ +void bdk_config_set_str(const char *value, bdk_config_t cfg_item, ...) +{ + /* Make sure the correct access function was called */ + if (config_info[cfg_item].ctype != BDK_CONFIG_TYPE_STR) + bdk_fatal("bdk_config_set_str() called for %s, not a str\n", + config_info[cfg_item].format); + + char name[64]; + va_list args; + + va_start(args, cfg_item); + vsnprintf(name, sizeof(name)-1, config_info[cfg_item].format, args); + va_end(args); + + if (!config_fdt) + { + bdk_error("bdk-config set %s before configuration loaded\n", name); + return; + } + + int status; + if (value) + status = fdt_setprop_string(config_fdt, config_node, name, value); + else + status = fdt_delprop(config_fdt, config_node, name); + + if ((status < 0) && (status != -FDT_ERR_NOTFOUND)) + bdk_fatal("Failed to set %s=%s in FDT\n", name, value); +} + +/** + * Set a blob configuration item. Note this only sets the + * item in memory, persistent storage is not updated. The optional + * parameters for the setting are not supplied, meaning this function + * only changes the global default. + * + * @param size Size of the item in bytes. A size of zero removes the device tree field + * @param value Configuration item value + * @param cfg_item Config item to set. If the item takes parameters (see bdk_config_t), then the + * parameters are listed following cfg_item. + */ +void bdk_config_set_blob_no_param(int size, const void *value, bdk_config_t cfg_item) +{ + /* Make sure the correct access function was called */ + if ((config_info[cfg_item].ctype != BDK_CONFIG_TYPE_BINARY) && + (config_info[cfg_item].ctype != BDK_CONFIG_TYPE_STR_LIST)) + bdk_fatal("bdk_config_set_blob() called for %s, not binary\n", + config_info[cfg_item].format); + + char name[64]; + /* Create a name without the optional parameters */ + strncpy(name, config_info[cfg_item].format, sizeof(name) - 1); + name[sizeof(name) - 1] = 0; + char *ptr = strchr(name, '.'); + if (ptr) + *ptr = 0; + + if (!config_fdt) + { + bdk_error("bdk-config set %s before configuration loaded\n", name); + return; + } + + int status; + if (size) + status = fdt_setprop(config_fdt, config_node, name, value, size); + else + status = fdt_delprop(config_fdt, config_node, name); + + if ((status < 0) && (status != -FDT_ERR_NOTFOUND)) + bdk_fatal("Failed to set %s in FDT\n", name); +} + +/** + * Set a blob configuration item. Note this only sets the + * item in memory, persistent storage is not updated. + * + * @param size Size of the item in bytes. A size of zero removes the device tree field + * @param value Configuration item value + * @param cfg_item Config item to set. If the item takes parameters (see bdk_config_t), then the + * parameters are listed following cfg_item. + */ +void bdk_config_set_blob(int size, const void *value, bdk_config_t cfg_item, ...) +{ + /* Make sure the correct access function was called */ + if ((config_info[cfg_item].ctype != BDK_CONFIG_TYPE_BINARY) && + (config_info[cfg_item].ctype != BDK_CONFIG_TYPE_STR_LIST)) + bdk_fatal("bdk_config_set_blob() called for %s, not binary\n", + config_info[cfg_item].format); + + char name[64]; + va_list args; + + va_start(args, cfg_item); + vsnprintf(name, sizeof(name)-1, config_info[cfg_item].format, args); + va_end(args); + + if (!config_fdt) + { + bdk_error("bdk-config set %s before configuration loaded\n", name); + return; + } + + int status; + if (size) + status = fdt_setprop(config_fdt, config_node, name, value, size); + else + status = fdt_delprop(config_fdt, config_node, name); + + if ((status < 0) && (status != -FDT_ERR_NOTFOUND)) + bdk_fatal("Failed to set %s in FDT\n", name); +} + +/** + * Multiple functions need to display the config item help string in a format + * suitable for inclusion in a device tree. This function displays the help + * message properly indented and such. + * + * @param cfg Config item to display help for + */ +static void display_help(bdk_config_t cfg) +{ + /* Print the help text as a comment before the entry */ + /* Indent with tabs like Linux requires */ + printf("\n"); + printf("\t/* "); + const char *ptr = bdk_config_get_help(cfg); + while (*ptr) + { + putchar(*ptr); + if (*ptr == '\n') + putchar('\t'); + ptr++; + } + printf(" */\n"); + /* Print the parameter and its default value a comment. This will be + a reference that is easy for the user to change */ + printf("\t//%s = ", config_info[cfg].format); + switch (config_info[cfg].ctype) + { + case BDK_CONFIG_TYPE_INT: + if (config_info[cfg].default_value < 10) + printf("\"%ld\"", config_info[cfg].default_value); + else + printf("\"0x%lx\"", config_info[cfg].default_value); + break; + case BDK_CONFIG_TYPE_STR: + case BDK_CONFIG_TYPE_STR_LIST: + if (config_info[cfg].default_value) + printf("\"%s\"", (const char *)config_info[cfg].default_value); + else + printf("\"\""); + break; + case BDK_CONFIG_TYPE_BINARY: + printf("[]"); + break; + } + printf(";\n"); +} + +/** + * Display the active configuration as a valid device tree + */ +void bdk_config_show(void) +{ + /* Output the standard DTS headers */ + printf("/dts-v1/;\n"); + printf("\n"); + printf("/ {\n"); + printf("cavium,bdk {\n"); + for (bdk_config_t cfg = 0; cfg < __BDK_CONFIG_END; cfg++) + { + /* Show the help message */ + display_help(cfg); + + /* Figure out how much of the config item is fixed versus + the optional parameters */ + const char *format = config_info[cfg].format; + const char *format_param = strchr(format, '.'); + int format_length = 0; + if (format_param) + format_length = format_param - format; + + /* Loop through all device tree entries displaying the ones that + match this format */ + int offset = fdt_first_property_offset(config_fdt, config_node); + while (offset >= 0) + { + /* Get the device tree item */ + const char *name = NULL; + int data_size = 0; + const char *data = fdt_getprop_by_offset(config_fdt, offset, &name, &data_size); + const char *data_end = data + data_size; + /* Find the first param */ + const char *name_param = strchr(name, '.'); + int name_length = 0; + if (name_param) + { + /* We want to compare up to the first param */ + name_length = name_param - name; + /* If the lengths are different not including the parameters, + then we force a full matchn which will always fail */ + if (name_length != format_length) + name_length = 0; + } + else /* No params, match base of format */ + name_length = format_length; + + /* Check if it matches the current config format */ + int match; + if (name_length) + { + /* Check the prefix */ + match = strncmp(name, format, name_length); + if (match == 0) + { + /* Prefix matched. We only really match if the next + character is the end of the string or a '.' */ + if ((name[name_length] != 0) && (name[name_length] != '.')) + match = 1; + } + } + else + match = strcmp(name, format); + /* Print matching entries */ + if (match == 0) + { + if (config_info[cfg].ctype == BDK_CONFIG_TYPE_BINARY) + { + printf("\t%s = [", name); + const char *ptr = data; + while (ptr < data_end) + { + printf(" %02x", (int)*ptr); + ptr++; + } + printf(" ]"); + } + else + { + printf("\t%s = \"%s\"", name, data); + data += strlen(data) + 1; + while (data < data_end) + { + printf(",\n\t\t\"%s\"", data); + data += strlen(data) + 1; + } + } + printf(";\n"); + } + offset = fdt_next_property_offset(config_fdt, offset); + } + } + /* Output the standard DTS footers */ + printf("}; /* cavium,bdk */\n"); + printf("}; /* / */\n"); +} + +/** + * Display a list of all possible config items with help text + */ +void bdk_config_help(void) +{ + /* Write out formatted as part of a device tree source (dts) file */ + printf("/dts-v1/;\n"); + printf("\n"); + printf("/ {\n"); + printf("cavium,bdk {\n"); + for (bdk_config_t cfg = 0; cfg < __BDK_CONFIG_END; cfg++) + display_help(cfg); + printf("}; /* cavium,bdk */\n"); + printf("}; /* / */\n"); +} + + +/** + * Save the current configuration to flash + * + * @return Zero on success, negative on failure + */ +int bdk_config_save(void) +{ + /* Pack the FDT so it uses less space */ + int status = fdt_pack(config_fdt); + if (status < 0) + { + bdk_error("FDT error %d: %s\n", status, fdt_strerror(status)); + return -1; + } + + /* Calculate a CRC32 of the FDT */ + int fdt_size = fdt_totalsize(config_fdt); + uint32_t crc32 = bdk_crc32(config_fdt, fdt_size, 0); + + /* Open the output file */ + FILE *outf = fopen("/fatfs/default.dtb", "wb"); + if (!outf) + { + bdk_error("Failed to open flash"); + return -1; + } + + /* Write the FDT */ + if (fwrite(config_fdt, fdt_size, 1, outf) != 1) + { + bdk_error("Failed to write FDT"); + fclose(outf); + return -1; + } + + /* Save the CRC32 in the same endianness as the FDT */ + crc32 = cpu_to_fdt32(crc32); + if (fwrite(&crc32, sizeof(crc32), 1, outf) != 1) + { + bdk_error("Failed to write FDT CRC32"); + fclose(outf); + return -1; + } + + fclose(outf); + return 0; +} + +/** + * Takes the current live device tree and exports it to a memory address suitable + * for passing to the next binary in register X1. + * + * @return Physical address of the device tree, or 0 on failure + */ +uint64_t __bdk_config_export_to_mem(void) +{ + void *end_ptr = sbrk(0); + bdk_node_t node = bdk_numa_master(); + int fdt_size = fdt_totalsize(config_fdt); + + /* Round size up to 4KB boundary, be sure to add 4 bytes for CRC32 */ + int fdt_space = (fdt_size + 4 + 0xfff) & -4096; + /* First try 4MB - FDT size as this keeps the FDT in the 4MB secure space + setup by ATF */ + void *fdt_ptr = bdk_phys_to_ptr(0x400000 - fdt_space); + if (!__bdk_is_dram_enabled(node)) + { + /* Address must be in L2 */ + int l2_size = bdk_l2c_get_cache_size_bytes(node); + void *l2_ptr = bdk_phys_to_ptr(l2_size - fdt_space); + if (l2_ptr < fdt_ptr) + fdt_ptr = l2_ptr; + if (fdt_ptr < end_ptr) + { + bdk_error("No room for FDT to pass to next binary\n"); + return 0; + } + } + else + { + /* We have DRAM, make sure we're past the end of this image */ + if (fdt_ptr < end_ptr) + fdt_ptr = end_ptr; + } + uint32_t crc32 = bdk_crc32(config_fdt, fdt_size, 0); + fdt_move(config_fdt, fdt_ptr, fdt_size); + /* CRC32 is stored in same endianness as FDT at the end */ + *(uint32_t *)((const char *)fdt_ptr + fdt_size) = cpu_to_fdt32(crc32); + BDK_TRACE(FDT_OS, "Exported device tree to memory %p, size 0x%x, CRC32 %08x\n", + fdt_ptr, fdt_size, crc32); + return bdk_ptr_to_phys(fdt_ptr); +} + +/** + * Return a pointer to the device tree used for configuration + * + * @return FDT or NULL on failure + */ +void* bdk_config_get_fdt(void) +{ + return config_fdt; +} + +/** + * Set the device tree used for configuration + * + * @param fdt Device tree to use. Memory is assumed to be from malloc() and bdk_config takes + * over ownership on success + * + * @return Zero on success, negative on failure + */ +int bdk_config_set_fdt(void *fdt) +{ + int offset = fdt_path_offset(fdt, "/cavium,bdk"); /* Find our node */ + if (offset < 0) + return -1; + free(config_fdt); + config_fdt = fdt; + config_node = offset; + return 0; +} + +/** + * Write all default values to a FDT. Missing config items get defaults in the + * BDK config, this function adds those defaults to the FDT. This way other code + * gets the default value without needing special code. + * + * @param fdt FDT structure to fill defaults into + * + * @return Zero on success, negative on failure + */ +int bdk_config_expand_defaults(void *fdt) +{ + const struct fdt_property *prop; + + /* The best defaults may have changed while this image was running if DRAM + is setup. Update the defaults before expanding them */ + config_set_defaults(); + + int fdt_node = fdt_path_offset(fdt, "/cavium,bdk"); /* Find our node */ + if (fdt_node < 0) + { + bdk_error("Failed to find top node, FDT error %d: %s\n", + fdt_node, fdt_strerror(fdt_node)); + return -1; + } + + /* Loop through all configuration items */ + for (bdk_config_t cfg = 0; cfg < __BDK_CONFIG_END; cfg++) + { + /* Figure out the base name without and dot parameters */ + const char *name = config_info[cfg].format; + const char *name_end = strchr(name, '.'); + int name_len; + if (name_end) + name_len = name_end - name; + else + name_len = strlen(name); + /* Try and find the base name in the FDT */ + prop = fdt_get_property_namelen(fdt, fdt_node, name, name_len, NULL); + /* If it wasn't found, then we need to add the default */ + if (prop == NULL) + { + /* Create a copy of the name for use in FDT calls */ + char temp_name[name_len + 1]; + memcpy(temp_name, name, name_len); + temp_name[name_len] = 0; + /* Call the correct FDT call based on the type */ + int status = 0; + switch (config_info[cfg].ctype) + { + case BDK_CONFIG_TYPE_INT: + { + char temp_value[20]; + if (config_info[cfg].default_value < 10) + snprintf(temp_value, sizeof(temp_value), "%ld", config_info[cfg].default_value); + else + snprintf(temp_value, sizeof(temp_value), "0x%lx", config_info[cfg].default_value); + /* Store the default int value */ + status = fdt_setprop_string(fdt, fdt_node, temp_name, temp_value); + break; + } + case BDK_CONFIG_TYPE_STR: + /* Store the default string value, if present */ + if (config_info[cfg].default_value) + { + status = fdt_setprop_string(fdt, fdt_node, temp_name, + (const char *)config_info[cfg].default_value); + } + break; + case BDK_CONFIG_TYPE_STR_LIST: + /* Do nothing, string list default to empty */ + break; + case BDK_CONFIG_TYPE_BINARY: + /* Do nothing, binary defaults to empty */ + break; + } + if (status < 0) + { + bdk_error("Failed to set default for %s, FDT error %d: %s\n", + temp_name, status, fdt_strerror(status)); + return -1; + } + } + } + return 0; +} + +/** + * Some of the default config values can vary based on runtime parameters. This + * function sets those default parameters. It must be run before anyone calls + * bdk_config_get_*(). + */ +static void config_set_defaults(void) +{ + bool isEmulation = bdk_is_platform(BDK_PLATFORM_EMULATOR); + /* This is Cavium's OUI with the local admin bit. We will use this as a + default as it won't collide with official addresses, but is sort of + part of the Cavium range. The lower three bytes will be updated with + the wafer info */ + uint64_t mac_address = 0x020fb7000000ull; + /* Set the lower MAC address bits based on the chip manufacturing + information. This should give reasonable MAC address defaults + for production parts */ + if (CAVIUM_IS_MODEL(CAVIUM_CN8XXX)) + { + BDK_CSR_INIT(fus_dat0, bdk_numa_local(), BDK_MIO_FUS_DAT0); + mac_address |= fus_dat0.u & 0xffffff; + } + else + { + mac_address |= bdk_fuse_read_range(bdk_numa_local(), BDK_FUS_FUSE_NUM_E_MFG_INFOX(0), 24); + } + config_info[BDK_CONFIG_MAC_ADDRESS].default_value = mac_address; + + /* Set the number of packet buffers */ + int num_packet_buffers = 4096; + /* If DRAM is setup, allocate 8K buffers for 8 ports plus some slop */ + if (__bdk_is_dram_enabled(bdk_numa_master())) + num_packet_buffers = 8192 * 16 + 1024; + else if (isEmulation) { + if (CAVIUM_IS_MODEL(CAVIUM_CN83XX)) + num_packet_buffers = 4096 * 4; + } + config_info[BDK_CONFIG_NUM_PACKET_BUFFERS].default_value = num_packet_buffers; + config_info[BDK_CONFIG_PACKET_BUFFER_SIZE].default_value = 1024; + + /* Asim doesn't scale to 48 cores well. Limit to 4 */ + if (bdk_is_platform(BDK_PLATFORM_ASIM)) + config_info[BDK_CONFIG_COREMASK].default_value = 0xf; + /* CN88XX pass 1.x doesn't support EA */ + if (CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X)) + config_info[BDK_CONFIG_PCIE_EA].default_value = 0; + /* Emulator only supports 4 cores */ + if (isEmulation) + config_info[BDK_CONFIG_COREMASK].default_value = 0xf; +} + +/** + * BDK configuration items are stored in a device tree so thay can be passed to + * other software later. This function creates the initial empty device tree + * used for BDK configuration items. The values will be populated as configuration + * files are read from flash. + */ +static void config_setup_fdt(void) +{ + const int FDT_SIZE = 0x10000; + config_fdt = calloc(1, FDT_SIZE); + if (!config_fdt) + bdk_fatal("Unable to allocate memory for config FDT\n"); + if (fdt_create_empty_tree(config_fdt, FDT_SIZE) < 0) + bdk_fatal("Unable to create FDT for config\n"); + config_node = fdt_add_subnode(config_fdt, 0, "cavium,bdk"); + if (config_node < 0) + bdk_fatal("Unable to create cavium,bdk node in FDT\n"); +} + +/** + * Parse a FDT and copy its properties to our configuration FDT + * + * @param fdt FDT to parse + */ +static int config_parse_fdt(const void *fdt, const char *base_path) +{ + /* Check the FDT header */ + int result = fdt_check_header(fdt); + if (result) + goto fail; + + /* Find our node */ + result = fdt_path_offset(fdt, base_path); + if (result < 0) + goto fail; + + /* Copy all parameters to our in memory FDT */ + int offset = fdt_first_property_offset(fdt, result); + while (offset >= 0) + { + const char *name = NULL; + int blob_size = 0; + const char *data = fdt_getprop_by_offset(fdt, offset, &name, &blob_size); + result = fdt_setprop(config_fdt, config_node, name, data, blob_size); + offset = fdt_next_property_offset(fdt, offset); + } + return 0; +fail: + bdk_error("FDT error %d: %s\n", result, fdt_strerror(result)); + return -1; +} + +/** + * Load a FDT from a file and pull in its configuration properties + * + * @param filename File to read from + * @param offset Offset into the file to read from + * + * @return Zero on success, negative on failure + */ +static int config_load_file(const char *filename, uint64_t offset) +{ + uint64_t ftd_size = 0; + bdk_signed_flags_t sign_flags = BDK_SIGNED_FLAG_NONE; + if (offset) + sign_flags = BDK_SIGNED_FLAG_ALLOW_UNSIGNED | BDK_SIGNED_FLAG_NOT_ENCRYPTED; + void *fdt = bdk_signed_load(filename, offset, BDK_SIGNED_DTS, sign_flags, &ftd_size); + if (!fdt) + return -1; + + /* Make sure the read succeeded */ + if (ftd_size < (int)sizeof(struct fdt_header)) + { + bdk_error("Invalid device tee %s\n", filename); + free(fdt); + return -1; + } + + if (fdt_check_header(fdt)) + { + bdk_error("Invalid FDT header read from %s\n", filename); + free(fdt); + return -1; + } + + /* Make sure we read enough data to contain the FDT */ + int correct_size = fdt_totalsize(fdt); + if ((int)ftd_size < correct_size) + { + bdk_error("Unable to read FDT from %s\n", filename); + free(fdt); + return -1; + } + + /* Check if a CRC32 was added on the end of the FDT */ + if ((int)ftd_size >= correct_size + 4) + { + uint32_t crc32 = bdk_crc32(fdt, correct_size, 0); + uint32_t correct_crc32 = *(uint32_t *)((const char *)fdt + correct_size); + /* CRC32 is stored in same endianness as FDT */ + correct_crc32 = fdt32_to_cpu(correct_crc32); + if (crc32 != correct_crc32) + { + bdk_error("FDT failed CRC32 verification (%s)\n", filename); + free(fdt); + return -1; + } + //printf("PASS: FDT CRC32 verification (%s)\n", filename); + } + + /* Parse the device tree, adding its configuration to ours */ + if (config_parse_fdt(fdt, "/cavium,bdk")) + { + free(fdt); + return -1; + } + + free(fdt); + return 0; +} + +/** + * Internal BDK function to initialize the config system. Must be called before + * any configuration functions are called + */ +void __bdk_config_init(void) +{ + bool done_trust_init = false; + /* Set default that can vary dynamically at runtime */ + config_set_defaults(); + + /* Regsiter X1 is expected to be a device tree when we boot. Check that + the physical address seems correct, then load the device tree */ + if ((__bdk_init_reg_x1 > 0) && /* Not zero */ + (__bdk_init_reg_x1 < 0x1000000) && /* In the lower 16MB */ + ((__bdk_init_reg_x1 & 0xfff) == 0)) /* Aligned on a 4KB boundary */ + { + const void *fdt = (const void *)__bdk_init_reg_x1; + /* Check the FDT header */ + int result = fdt_check_header(fdt); + if (result) + result = -1; /* Invalid tree */ + else + { + int fdt_size = fdt_totalsize(fdt); + uint32_t crc32 = bdk_crc32(fdt, fdt_size, 0); + uint32_t correct_crc32 = *(uint32_t *)((const char *)fdt + fdt_size); + /* CRC32 is stored in same endianness as FDT */ + correct_crc32 = fdt32_to_cpu(correct_crc32); + if (crc32 == correct_crc32) + { + //printf("Previous image FDT passed CRC32 verification(%p, size 0x%x, CRC32 %08x)\n", fdt, fdt_size, crc32); + result = fdt_path_offset(fdt, "/cavium,bdk"); /* Find our node */ + } + else + { + bdk_error("Previous image FDT failed CRC32 verification(%p, size 0x%x)\n", fdt, fdt_size); + result = -1; /* Invalid tree */ + } + } + /* If tree is valid so far, attempt to move it into our memory space */ + if (result > 0) + { + /* 4KB extra room for growth */ + const int fdt_size = fdt_totalsize(fdt) + 4096; + config_fdt = calloc(1, fdt_size); + if (config_fdt) + { + int result = fdt_move(fdt, config_fdt, fdt_size); + if (result == 0) + { + /* Find our node */ + config_node = fdt_path_offset(config_fdt, "/cavium,bdk"); + if (config_node > 0) + { + printf("Using configuration from previous image\n"); + goto done; + } + else + { + bdk_error("Unable to find BDK node after move\n"); + free(config_fdt); + config_node = 0; + config_fdt = NULL; + } + } + else + { + bdk_error("Unable to move passed device tree\n"); + free(config_fdt); + config_fdt = NULL; + } + } + else + bdk_error("Failed to allocate memory for passed device tree (%d bytes)\n", fdt_size); + } + } + + /* Create the global device tree used to store config items */ + config_setup_fdt(); + /* Setup trust level so reading device trees works */ + __bdk_trust_init(); + done_trust_init = true; + + if (bdk_is_platform(BDK_PLATFORM_ASIM)) + { + if (CAVIUM_IS_MODEL(CAVIUM_CN88XX)) + bdk_config_set_str("ASIM-CN88XX", BDK_CONFIG_BOARD_MODEL); + else if (CAVIUM_IS_MODEL(CAVIUM_CN83XX)) + bdk_config_set_str("ASIM-CN83XX", BDK_CONFIG_BOARD_MODEL); + else if (CAVIUM_IS_MODEL(CAVIUM_CN81XX)) + bdk_config_set_str("ASIM-CN81XX", BDK_CONFIG_BOARD_MODEL); + else if (CAVIUM_IS_MODEL(CAVIUM_CN93XX)) + bdk_config_set_str("ASIM-CN93XX", BDK_CONFIG_BOARD_MODEL); + } + else if (bdk_is_platform(BDK_PLATFORM_EMULATOR)) + { + if (CAVIUM_IS_MODEL(CAVIUM_CN88XX)) + bdk_config_set_str("EMUL-CN88XX", BDK_CONFIG_BOARD_MODEL); + else if (CAVIUM_IS_MODEL(CAVIUM_CN83XX)) + bdk_config_set_str("EMUL-CN83XX", BDK_CONFIG_BOARD_MODEL); + else if (CAVIUM_IS_MODEL(CAVIUM_CN81XX)) + bdk_config_set_str("EMUL-CN81XX", BDK_CONFIG_BOARD_MODEL); + else if (CAVIUM_IS_MODEL(CAVIUM_CN93XX)) + bdk_config_set_str("EMUL-CN93XX", BDK_CONFIG_BOARD_MODEL); + } + else if (config_load_file("/rom/boardcfg.dtb", 0) == 0) + { + printf("Board manufacturing information loaded from ROM-FS\n"); + } + /* Load manufacturing data from the top 64KB of flash */ + else if (config_load_file("/boot", BDK_CONFIG_MANUFACTURING_ADDRESS) != 0) + { + printf("\33[1m"); /* Bold */ + bdk_warn("\n"); + bdk_warn("********************************************************\n"); + bdk_warn("* Board manufacturing information not found. Program\n"); + bdk_warn("* the board manufacturing information in the Setup menu.\n"); + bdk_warn("********************************************************\n"); + bdk_warn("\n"); + printf("\33[0m"); /* Normal */ + goto done; + } + + const char *model = bdk_config_get_str(BDK_CONFIG_BOARD_MODEL); + const char *revision = bdk_config_get_str(BDK_CONFIG_BOARD_REVISION); + + /* Load BOARD-REVISION.cfg if it is on ROM-FS */ + if (model && revision) + { + char filename[64]; + snprintf(filename, sizeof(filename), "/rom/%s-%s.dtb", model, revision); + if (config_load_file(filename, 0) == 0) + goto done; + } + + /* Load BOARD.cfg if it is on ROM-FS */ + if (model) + { + char filename[64]; + snprintf(filename, sizeof(filename), "/rom/%s.dtb", model); + if (config_load_file(filename, 0) == 0) + goto done; + } + + /* Load default.dtb if it is there */ + if (config_load_file("/fatfs/default.dtb", 0) == 0) + goto done; + + /* Load BOARD-REVISION.cfg if it is there */ + if (model && revision) + { + char filename[64]; + snprintf(filename, sizeof(filename), "/fatfs/%s-%s.dtb", model, revision); + if (config_load_file(filename, 0) == 0) + goto done; + } + + /* Load BOARD.cfg if it is there */ + if (model) + { + char filename[64]; + snprintf(filename, sizeof(filename), "/fatfs/%s.dtb", model); + if (config_load_file(filename, 0) == 0) + goto done; + } + + /* No board specific configuration was found. Warn the user */ + printf("\33[1m"); /* Bold */ + bdk_warn("\n"); + bdk_warn("********************************************************\n"); + bdk_warn("* Board configuration file not found. Either the board\n"); + bdk_warn("* model is incorrect, or factory settings are not\n"); + bdk_warn("* available. DTB file not found for board \"%s\".\n", model); + bdk_warn("********************************************************\n"); + bdk_warn("\n"); + printf("\33[0m"); /* Normal */ + +done: + bdk_config_set_str(bdk_version_string(), BDK_CONFIG_VERSION); + /* Load the tracing level */ + bdk_trace_enables = bdk_config_get_int(BDK_CONFIG_TRACE); + if (BDK_TRACE_OVERRIDE) + bdk_trace_enables = BDK_TRACE_OVERRIDE; + if (!done_trust_init) + __bdk_trust_init(); +} diff --git a/src/vendorcode/cavium/bdk/libbdk-hal/bdk-gpio.c b/src/vendorcode/cavium/bdk/libbdk-hal/bdk-gpio.c new file mode 100644 index 0000000000..55f0dbf3f2 --- /dev/null +++ b/src/vendorcode/cavium/bdk/libbdk-hal/bdk-gpio.c @@ -0,0 +1,197 @@ +/***********************license start*********************************** +* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights +* reserved. +* +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are +* met: +* +* * Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* +* * Redistributions in binary form must reproduce the above +* copyright notice, this list of conditions and the following +* disclaimer in the documentation and/or other materials provided +* with the distribution. +* +* * Neither the name of Cavium Inc. nor the names of +* its contributors may be used to endorse or promote products +* derived from this software without specific prior written +* permission. +* +* This Software, including technical data, may be subject to U.S. export +* control laws, including the U.S. Export Administration Act and its +* associated regulations, and may be subject to export or import +* regulations in other countries. +* +* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS" +* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR +* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT +* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY +* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT +* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES +* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR +* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT, +* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK +* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU. +***********************license end**************************************/ +#include <bdk.h> +#include "libbdk-arch/bdk-csrs-gpio.h" + +/* This code is an optional part of the BDK. It is only linked in + if BDK_REQUIRE() needs it */ +BDK_REQUIRE_DEFINE(GPIO); + +/** + * Initialize a single GPIO as either an input or output. If it is + * an output, also set its output value. + * + * @param gpio GPIO to initialize + * @param is_output Non zero if this GPIO should be an output + * @param output_value + * Value of the GPIO if it should be an output. Not used if the + * GPIO isn't an output. + * + * @return Zero on success, negative ob failure + */ +int bdk_gpio_initialize(bdk_node_t node, int gpio, int is_output, int output_value) +{ + if ((gpio >= 0) && (gpio < bdk_gpio_get_num())) + { + int gpio_group = gpio >> 6; + int gpio_index = gpio & 63; + if (output_value) + bdk_gpio_set(node, gpio_group, 1ull << gpio_index); + else + bdk_gpio_clear(node, gpio_group, 1ull << gpio_index); + + BDK_CSR_DEFINE(cfg, BDK_GPIO_BIT_CFGX(gpio)); + cfg.u = 0; + cfg.s.tx_oe = !!is_output; + BDK_CSR_WRITE(node, BDK_GPIO_BIT_CFGX(gpio), cfg.u); + } + else + { + bdk_error("bdk_gpio_initialize: Illegal GPIO\n"); + return -1; + } + return 0; +} + + +/** + * GPIO Read Data + * + * @param node Node GPIO block is on + * @param gpio_block GPIO block to access. Each block contains up to 64 GPIOs + * + * @return Status of the GPIO pins for the given block + */ +uint64_t bdk_gpio_read(bdk_node_t node, int gpio_block) +{ + bdk_gpio_rx_dat_t gpio_rx_dat; + switch (gpio_block) + { + case 0: + gpio_rx_dat.u = BDK_CSR_READ(node, BDK_GPIO_RX_DAT); + break; + case 1: + gpio_rx_dat.u = BDK_CSR_READ(node, BDK_GPIO_RX1_DAT); + break; + default: + bdk_error("GPIO block %d not supported\n", gpio_block); + gpio_rx_dat.u = 0; + break; + } + return gpio_rx_dat.s.dat; +} + + +/** + * GPIO Clear pin + * + * @param node Node GPIO block is on + * @param gpio_block GPIO block to access. Each block contains up to 64 GPIOs + * @param clear_mask Bit mask to indicate which bits to drive to '0'. + */ +void bdk_gpio_clear(bdk_node_t node, int gpio_block, uint64_t clear_mask) +{ + switch (gpio_block) + { + case 0: + BDK_CSR_WRITE(node, BDK_GPIO_TX_CLR, clear_mask); + break; + case 1: + BDK_CSR_WRITE(node, BDK_GPIO_TX1_CLR, clear_mask); + break; + default: + bdk_error("GPIO block %d not supported\n", gpio_block); + break; + } +} + + +/** + * GPIO Set pin + * + * @param node Node GPIO block is on + * @param gpio_block GPIO block to access. Each block contains up to 64 GPIOs + * @param set_mask Bit mask to indicate which bits to drive to '1'. + */ +void bdk_gpio_set(bdk_node_t node, int gpio_block, uint64_t set_mask) +{ + switch (gpio_block) + { + case 0: + BDK_CSR_WRITE(node, BDK_GPIO_TX_SET, set_mask); + break; + case 1: + BDK_CSR_WRITE(node, BDK_GPIO_TX1_SET, set_mask); + break; + default: + bdk_error("GPIO block %d not supported\n", gpio_block); + break; + } +} + + +/** GPIO Select pin + * + * @param node CPU node + * @param gpio GPIO number + * @param pin Pin number + */ +void bdk_gpio_select_pin(bdk_node_t node, int gpio, int pin) +{ + if ((gpio < 0) || (gpio >= bdk_gpio_get_num())) + { + bdk_warn("bdk_gpio_select_pin: Illegal GPIO %d\n", gpio); + return; + } + + BDK_CSR_MODIFY(c, node, BDK_GPIO_BIT_CFGX(gpio), c.s.pin_sel = pin); +} + + +/** + * Return the number of GPIO pins on this chip + * + * @return Number of GPIO pins + */ +int bdk_gpio_get_num(void) +{ + if (CAVIUM_IS_MODEL(CAVIUM_CN88XX)) + return 51; + else if (CAVIUM_IS_MODEL(CAVIUM_CN81XX)) + return 48; + else if (CAVIUM_IS_MODEL(CAVIUM_CN83XX)) + return 80; + else if (CAVIUM_IS_MODEL(CAVIUM_CN93XX)) + return 96; + else + { + bdk_error("bdk_gpio_get_num(): Unsupported chip"); + return 0; + } +} diff --git a/src/vendorcode/cavium/bdk/libbdk-hal/bdk-l2c.c b/src/vendorcode/cavium/bdk/libbdk-hal/bdk-l2c.c new file mode 100644 index 0000000000..b1e2a88ce1 --- /dev/null +++ b/src/vendorcode/cavium/bdk/libbdk-hal/bdk-l2c.c @@ -0,0 +1,270 @@ +/***********************license start*********************************** +* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights +* reserved. +* +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are +* met: +* +* * Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* +* * Redistributions in binary form must reproduce the above +* copyright notice, this list of conditions and the following +* disclaimer in the documentation and/or other materials provided +* with the distribution. +* +* * Neither the name of Cavium Inc. nor the names of +* its contributors may be used to endorse or promote products +* derived from this software without specific prior written +* permission. +* +* This Software, including technical data, may be subject to U.S. export +* control laws, including the U.S. Export Administration Act and its +* associated regulations, and may be subject to export or import +* regulations in other countries. +* +* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS" +* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR +* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT +* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY +* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT +* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES +* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR +* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT, +* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK +* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU. +***********************license end**************************************/ +#include <bdk.h> +#include "libbdk-arch/bdk-csrs-ap.h" +#include "libbdk-arch/bdk-csrs-l2c.h" +#include "libbdk-arch/bdk-csrs-l2c_cbc.h" +#include "libbdk-arch/bdk-csrs-mio_fus.h" + +typedef struct +{ + int sets; + int ways; + bool is_locked; +} l2_node_state_t; + +static l2_node_state_t l2_node_state[BDK_NUMA_MAX_NODES]; + +/** + * Perform one time initialization of L2 for improved + * performance. This can be called after L2 is in use. + * + * @return Zero on success, negative on failure. + */ +int bdk_l2c_initialize(bdk_node_t node) +{ + if (CAVIUM_IS_MODEL(CAVIUM_CN8XXX)) + { + /* Tell L2 to give the IOB statically higher priority compared to the + cores. This avoids conditions where IO blocks might be starved under + very high L2 loads */ + BDK_CSR_MODIFY(c, node, BDK_L2C_CTL, + c.s.rsp_arb_mode = 1; + c.s.xmc_arb_mode = 0); + } + + if (CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X) && !bdk_is_platform(BDK_PLATFORM_ASIM)) + { + /* Errata: (L2C-22279) RCAS/RSTC which hits S/S can use wrong compare data */ + BDK_CSR_MODIFY(c, node, BDK_L2C_CTL, + c.s.dissblkdty = 1); + /* Errata: (L2C-22249) Broadcast invals can cause starvation on the INV bus */ + for (int i = 0; i < 4; i++) + BDK_CSR_MODIFY(c, node, BDK_L2C_CBCX_SCRATCH(i), + c.s.invdly = 1); + } + + // FIXME: Disable partial writes on pass 2 until it is debugged + if (CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS2_X) && !bdk_is_platform(BDK_PLATFORM_ASIM)) + { + BDK_CSR_MODIFY(c, node, BDK_L2C_CTL, + c.s.dissblkdty = 1); + } + + if (CAVIUM_IS_MODEL(CAVIUM_CN8XXX) && bdk_is_platform(BDK_PLATFORM_EMULATOR)) + { + /* The emulator requires L2C_CTL[DISSBLKDTY] to be set */ + BDK_CSR_MODIFY(c, node, BDK_L2C_CTL, + c.s.dissblkdty = 1); + } + return 0; +} + +int bdk_l2c_get_core_way_partition(bdk_node_t node, int core) +{ + return (BDK_CSR_READ(node, BDK_L2C_WPAR_PPX(core)) & 0xffff); +} + +int bdk_l2c_set_core_way_partition(bdk_node_t node, int core, uint32_t mask) +{ + uint32_t valid_mask = (1 << bdk_l2c_get_num_assoc(node)) - 1; + mask &= valid_mask; + + BDK_CSR_WRITE(node, BDK_L2C_WPAR_PPX(core), mask); + return 0; +} + + +int bdk_l2c_set_hw_way_partition(bdk_node_t node, uint32_t mask) +{ + uint32_t valid_mask = (1 << bdk_l2c_get_num_assoc(node)) - 1; + mask &= valid_mask; + + BDK_CSR_WRITE(node, BDK_L2C_WPAR_IOBX(0), mask); + return 0; +} + + +int bdk_l2c_get_hw_way_partition(bdk_node_t node) +{ + return (BDK_CSR_READ(node, BDK_L2C_WPAR_IOBX(0)) & 0xffff); +} + + +int bdk_l2c_lock_mem_region(bdk_node_t node, uint64_t start, uint64_t len) +{ + /* Round start/end to cache line boundaries */ + len += start & BDK_CACHE_LINE_MASK; + start &= ~BDK_CACHE_LINE_MASK; + len = (len + BDK_CACHE_LINE_MASK) & ~BDK_CACHE_LINE_MASK; + void *ptr = (start) ? bdk_phys_to_ptr(start) : NULL; + + while (len) + { + BDK_CACHE_LCK_L2(ptr); + ptr += BDK_CACHE_LINE_SIZE; + len -= BDK_CACHE_LINE_SIZE; + } + l2_node_state[node].is_locked = true; + return 0; +} + +void bdk_l2c_flush(bdk_node_t node) +{ + /* The number of ways can be reduced with fuses, but the equations below + assume the max number of ways */ + const int MAX_WAYS = 16; + int num_sets = bdk_l2c_get_num_sets(node); + int num_ways = bdk_l2c_get_num_assoc(node); + + int is_rtg = 1; /* Clear remote tags */ + for (int l2_way = 0; l2_way < num_ways; l2_way++) + { + for (int l2_set = 0; l2_set < num_sets; l2_set++) + { + uint64_t encoded = 128 * (l2_set + num_sets * (l2_way + (is_rtg * MAX_WAYS))); + BDK_CACHE_WBI_L2_INDEXED(encoded); + } + } + + is_rtg = 0; /* Clear local tags */ + for (int l2_way = 0; l2_way < num_ways; l2_way++) + { + for (int l2_set = 0; l2_set < num_sets; l2_set++) + { + uint64_t encoded = 128 * (l2_set + num_sets * (l2_way + (is_rtg * MAX_WAYS))); + BDK_CACHE_WBI_L2_INDEXED(encoded); + } + } + l2_node_state[node].is_locked = false; +} + +int bdk_l2c_unlock_mem_region(bdk_node_t node, uint64_t start, uint64_t len) +{ + /* Round start/end to cache line boundaries */ + len += start & BDK_CACHE_LINE_MASK; + start &= ~BDK_CACHE_LINE_MASK; + len = (len + BDK_CACHE_LINE_MASK) & ~BDK_CACHE_LINE_MASK; + void *ptr = (start) ? bdk_phys_to_ptr(start) : NULL; + + while (len > 0) + { + /* Must use invalidate version to release lock */ + BDK_CACHE_WBI_L2(ptr); + ptr += BDK_CACHE_LINE_SIZE; + len -= BDK_CACHE_LINE_SIZE; + } + + l2_node_state[node].is_locked = false; + return 0; +} + + +int bdk_l2c_get_cache_size_bytes(bdk_node_t node) +{ + return bdk_l2c_get_num_sets(node) * bdk_l2c_get_num_assoc(node) * BDK_CACHE_LINE_SIZE; +} + +/* Return the number of sets in the L2 Cache */ +int bdk_l2c_get_num_sets(bdk_node_t node) +{ + if (bdk_unlikely(l2_node_state[node].sets == 0)) + { + /* Select the L2 cache */ + bdk_ap_csselr_el1_t csselr_el1; + csselr_el1.u = 0; + csselr_el1.s.ind = 0; + csselr_el1.s.level = CAVIUM_IS_MODEL(CAVIUM_CN8XXX) ? 1 : 2; + BDK_MSR(CSSELR_EL1, csselr_el1.u); + /* Read its size */ + bdk_ap_ccsidr_el1_t ccsidr_el1; + BDK_MRS(CCSIDR_EL1, ccsidr_el1.u); + /* Store it for use later */ + l2_node_state[node].sets = ccsidr_el1.s.numsets + 1; + l2_node_state[node].ways = ccsidr_el1.s.associativity + 1; + + /* Early chips didn't update the number of ways based on fusing */ + if ((l2_node_state[node].ways == 16) && CAVIUM_IS_MODEL(CAVIUM_CN8XXX)) + { + /* The l2 can be reduced in 25% increments */ + BDK_CSR_INIT(mio_fus_dat3, node, BDK_MIO_FUS_DAT3); + switch (mio_fus_dat3.s.l2c_crip) + { + case 3: /* 1/4 size */ + l2_node_state[node].ways *= 1; + break; + case 2: /* 1/2 size */ + l2_node_state[node].ways *= 2; + break; + case 1: /* 3/4 size */ + l2_node_state[node].ways *= 3; + break; + default: /* Full size */ + l2_node_state[node].ways *= 4; + break; + } + l2_node_state[node].ways /= 4; + } + } + return l2_node_state[node].sets; +} + +/* Return the number of associations in the L2 Cache */ +int bdk_l2c_get_num_assoc(bdk_node_t node) +{ + /* Get the number of sets if the global sets/ways is not setup */ + if (bdk_unlikely(l2_node_state[node].ways == 0)) + bdk_l2c_get_num_sets(node); + return l2_node_state[node].ways; +} + +/** + * Return true if the BDK has locked itself in L2 + * + * @return + */ +int bdk_l2c_is_locked(bdk_node_t node) +{ + /* Determining the lock state of L2 requires reading exact tags from L2 + which varies per chip. Rather than deal with that complexity, we just + keep a flag around saying if the L2 lock functions have been called. + This works for the BDK as its use of locking is very simple */ + return l2_node_state[node].is_locked; +} + diff --git a/src/vendorcode/cavium/bdk/libbdk-hal/bdk-twsi.c b/src/vendorcode/cavium/bdk/libbdk-hal/bdk-twsi.c new file mode 100644 index 0000000000..4fbb78a876 --- /dev/null +++ b/src/vendorcode/cavium/bdk/libbdk-hal/bdk-twsi.c @@ -0,0 +1,318 @@ +/***********************license start*********************************** +* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights +* reserved. +* +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are +* met: +* +* * Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* +* * Redistributions in binary form must reproduce the above +* copyright notice, this list of conditions and the following +* disclaimer in the documentation and/or other materials provided +* with the distribution. +* +* * Neither the name of Cavium Inc. nor the names of +* its contributors may be used to endorse or promote products +* derived from this software without specific prior written +* permission. +* +* This Software, including technical data, may be subject to U.S. export +* control laws, including the U.S. Export Administration Act and its +* associated regulations, and may be subject to export or import +* regulations in other countries. +* +* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS" +* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR +* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT +* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY +* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT +* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES +* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR +* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT, +* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK +* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU. +***********************license end**************************************/ +#include <bdk.h> +#include "libbdk-arch/bdk-csrs-mio_tws.h" + +#define RECOVERY_UDELAY 5 +#define RECOVERY_CLK_CNT 9 +#define ARBLOST_UDELAY 5000 /* 5ms */ + +/* This code is an optional part of the BDK. It is only linked in + if BDK_REQUIRE() needs it */ +BDK_REQUIRE_DEFINE(TWSI); + +/** + * Initialize the TWSI blocks. This just sets the clock rate. + * Many times stuff will work without calling this, but some + * TWSI devices will fail. This is normally called automatically + * in bdk-init-main.c. + * + * @return Zero on success, negative on failure + */ +int bdk_twsix_initialize(bdk_node_t node) +{ + const int TWSI_BUS_FREQ = 100000; /* 100 KHz */ + const int TWSI_THP = 24; /* TCLK half period (default 24) */ + const int io_clock_hz = bdk_clock_get_rate(node, BDK_CLOCK_SCLK); + int N_divider; + int M_divider; + + /* Set the TWSI clock to a conservative TWSI_BUS_FREQ. Compute the + clocks M divider based on the SCLK. + TWSI freq = (core freq) / (20 x (M+1) x (thp+1) x 2^N) + M = ((core freq) / (20 x (TWSI freq) x (thp+1) x 2^N)) - 1 */ + for (N_divider = 0; N_divider < 8; N_divider++) + { + M_divider = (io_clock_hz / (20 * TWSI_BUS_FREQ * (TWSI_THP + 1) * (1 << N_divider))) - 1; + if (M_divider < 16) + break; + } + + BDK_CSR_DEFINE(sw_twsi, BDK_MIO_TWSX_SW_TWSI(bus)); + sw_twsi.u = 0; + sw_twsi.s.v = 1; /* Clear valid bit */ + sw_twsi.s.op = 0x6; /* See EOP field */ + sw_twsi.s.r = 0; /* Select CLKCTL when R = 0 */ + sw_twsi.s.eop_ia = 3; /* R=0 selects CLKCTL, R=1 selects STAT */ + sw_twsi.s.data = ((M_divider & 0xf) << 3) | ((N_divider & 0x7) << 0); + + int num_busses = 2; + if (CAVIUM_IS_MODEL(CAVIUM_CN88XX)) + num_busses = 6; + + for (int bus = 0; bus < num_busses; bus++) + { + /* Only init non-slave ports */ + BDK_CSR_INIT(state, node, BDK_MIO_TWSX_SW_TWSI(bus)); + if (!state.s.slonly) + BDK_CSR_WRITE(node, BDK_MIO_TWSX_SW_TWSI(bus), sw_twsi.u); + } + return 0; +} + +/** + * Do a twsi bus recovery in the case when the last transaction + * on the bus has been left unfinished. + * + * @param twsi_id which TWSI bus to use + */ +static void bdk_twsix_recover_bus(bdk_node_t node, int twsi_id) +{ + /* read TWSX_INT */ + BDK_CSR_INIT(twsx_int, node, BDK_MIO_TWSX_INT(twsi_id)); + + for (int i = 0; i < RECOVERY_CLK_CNT * 2; i++) + { + if (!twsx_int.s.scl_ovr) + { + /* SCL shouldn't be low here */ + if (!twsx_int.s.scl) + { + bdk_error("N%d.TWSI%d: SCL is stuck low\n", node, twsi_id); + return; + } + + /* Break if SDA is high */ + if (twsx_int.s.sda) + break; + } + + twsx_int.s.scl_ovr = !twsx_int.s.scl_ovr; + BDK_CSR_WRITE(node, BDK_MIO_TWSX_INT(twsi_id), twsx_int.u); + bdk_wait_usec(RECOVERY_UDELAY); + } + + /* + * Generate STOP condition using the register overrides + * in order to move the higher level controller out of + * the bad state. This is a workaround for the TWSI hardware. + */ + twsx_int.s.scl_ovr = 1; + twsx_int.s.sda_ovr = 1; + BDK_CSR_WRITE(node, BDK_MIO_TWSX_INT(twsi_id), twsx_int.u); + bdk_wait_usec(RECOVERY_UDELAY); + twsx_int.s.scl_ovr = 0; + BDK_CSR_WRITE(node, BDK_MIO_TWSX_INT(twsi_id), twsx_int.u); + bdk_wait_usec(RECOVERY_UDELAY); + twsx_int.s.sda_ovr = 0; + BDK_CSR_WRITE(node, BDK_MIO_TWSX_INT(twsi_id), twsx_int.u); +} + +/** + * Do a twsi read from a 7 bit device address using an (optional) + * internal address. Up to 4 bytes can be read at a time. + * + * @param twsi_id which TWSI bus to use + * @param dev_addr Device address (7 bit) + * @param internal_addr + * Internal address. Can be 0, 1 or 2 bytes in width + * @param num_bytes Number of data bytes to read (1-4) + * @param ia_width_bytes + * Internal address size in bytes (0, 1, or 2) + * + * @return Read data, or -1 on failure + */ +int64_t bdk_twsix_read_ia(bdk_node_t node, int twsi_id, uint8_t dev_addr, uint16_t internal_addr, int num_bytes, int ia_width_bytes) +{ + bdk_mio_twsx_sw_twsi_t sw_twsi_val; + bdk_mio_twsx_sw_twsi_ext_t twsi_ext; + int retry_limit = 5; + + if (num_bytes < 1 || num_bytes > 4 || ia_width_bytes < 0 || ia_width_bytes > 2) + return -1; +retry: + twsi_ext.u = 0; + sw_twsi_val.u = 0; + sw_twsi_val.s.v = 1; + sw_twsi_val.s.r = 1; + sw_twsi_val.s.sovr = 1; + sw_twsi_val.s.size = num_bytes - 1; + sw_twsi_val.s.addr = dev_addr; + + if (ia_width_bytes > 0) + { + sw_twsi_val.s.op = 1; + sw_twsi_val.s.ia = (internal_addr >> 3) & 0x1f; + sw_twsi_val.s.eop_ia = internal_addr & 0x7; + if (ia_width_bytes == 2) + { + sw_twsi_val.s.eia = 1; + twsi_ext.s.ia = internal_addr >> 8; + BDK_CSR_WRITE(node, BDK_MIO_TWSX_SW_TWSI_EXT(twsi_id), twsi_ext.u); + } + } + + BDK_CSR_WRITE(node, BDK_MIO_TWSX_SW_TWSI(twsi_id), sw_twsi_val.u); + if (BDK_CSR_WAIT_FOR_FIELD(node, BDK_MIO_TWSX_SW_TWSI(twsi_id), v, ==, 0, 10000)) + { + bdk_warn("N%d.TWSI%d: Timeout waiting for read to complete...start recovering process\n", + node, twsi_id); + /* perform bus recovery */ + bdk_twsix_recover_bus(node, twsi_id); + if (retry_limit-- > 0) + goto retry; + + bdk_error("N%d.TWSI%d: Timeout waiting for operation to complete\n", node, twsi_id); + return -1; + } + sw_twsi_val.u = BDK_CSR_READ(node, BDK_MIO_TWSX_SW_TWSI(twsi_id)); + if (!sw_twsi_val.s.r) + { + /* Check the reason for the failure. We may need to retry to handle multi-master + ** configurations. + ** Lost arbitration : 0x38, 0x68, 0xB0, 0x78 + ** Core busy as slave: 0x80, 0x88, 0xA0, 0xA8, 0xB8, 0xC0, 0xC8 + */ + if (sw_twsi_val.s.data == 0x38 + || sw_twsi_val.s.data == 0x68 + || sw_twsi_val.s.data == 0xB0 + || sw_twsi_val.s.data == 0x78 + || sw_twsi_val.s.data == 0x80 + || sw_twsi_val.s.data == 0x88 + || sw_twsi_val.s.data == 0xA0 + || sw_twsi_val.s.data == 0xA8 + || sw_twsi_val.s.data == 0xB8 + || sw_twsi_val.s.data == 0xC8) + { + /* + * One of the arbitration lost conditions is recognized. + * The TWSI hardware has switched to the slave mode and + * expects the STOP condition on the bus. + * Make a delay before next retry. + */ + bdk_wait_usec(ARBLOST_UDELAY); + if (retry_limit-- > 0) + goto retry; + } + /* For all other errors, return an error code */ + return -1; + } + + return (sw_twsi_val.s.data & (0xFFFFFFFF >> (32 - num_bytes*8))); +} + + +/** + * Write 1-8 bytes to a TWSI device using an internal address. + * + * @param twsi_id which TWSI interface to use + * @param dev_addr TWSI device address (7 bit only) + * @param internal_addr + * TWSI internal address (0, 8, or 16 bits) + * @param num_bytes Number of bytes to write (1-8) + * @param ia_width_bytes + * internal address width, in bytes (0, 1, 2) + * @param data Data to write. Data is written MSB first on the twsi bus, and + * only the lower num_bytes bytes of the argument are valid. (If + * a 2 byte write is done, only the low 2 bytes of the argument is + * used. + * + * @return Zero on success, -1 on error + */ +int bdk_twsix_write_ia(bdk_node_t node, int twsi_id, uint8_t dev_addr, uint16_t internal_addr, int num_bytes, int ia_width_bytes, uint64_t data) +{ + bdk_mio_twsx_sw_twsi_t sw_twsi_val; + bdk_mio_twsx_sw_twsi_ext_t twsi_ext; + int retry_limit = 5; + int to; + + if (num_bytes < 1 || num_bytes > 8 || ia_width_bytes < 0 || ia_width_bytes > 2) + return -1; + +retry: + twsi_ext.u = 0; + sw_twsi_val.u = 0; + sw_twsi_val.s.v = 1; + sw_twsi_val.s.sovr = 1; + sw_twsi_val.s.size = num_bytes - 1; + sw_twsi_val.s.addr = dev_addr; + sw_twsi_val.s.data = 0xFFFFFFFF & data; + + if (ia_width_bytes > 0) + { + sw_twsi_val.s.op = 1; + sw_twsi_val.s.ia = (internal_addr >> 3) & 0x1f; + sw_twsi_val.s.eop_ia = internal_addr & 0x7; + } + if (ia_width_bytes == 2) + { + sw_twsi_val.s.eia = 1; + twsi_ext.s.ia = internal_addr >> 8; + } + if (num_bytes > 4) + twsi_ext.s.data = data >> 32; + + BDK_CSR_WRITE(node, BDK_MIO_TWSX_SW_TWSI_EXT(twsi_id), twsi_ext.u); + BDK_CSR_WRITE(node, BDK_MIO_TWSX_SW_TWSI(twsi_id), sw_twsi_val.u); + if (BDK_CSR_WAIT_FOR_FIELD(node, BDK_MIO_TWSX_SW_TWSI(twsi_id), v, ==, 0, 10000)) + { + bdk_warn("N%d.TWSI%d: Timeout waiting for write to complete...start recovering process\n", + node, twsi_id); + /* perform bus recovery */ + bdk_twsix_recover_bus(node, twsi_id); + if (retry_limit-- > 0) + goto retry; + + // After retry but still not success, report error and return + bdk_error("N%d.TWSI%d: Timeout waiting for operation to complete\n", node, twsi_id); + return -1; + } + + /* Poll until reads succeed, or polling times out */ + to = 100; + while (to-- > 0) + { + if (bdk_twsix_read_ia(node, twsi_id, dev_addr, 0, 1, 0) >= 0) + break; + } + if (to <= 0) + return -1; + + return 0; +} diff --git a/src/vendorcode/cavium/bdk/libbdk-os/bdk-init.c b/src/vendorcode/cavium/bdk/libbdk-os/bdk-init.c new file mode 100644 index 0000000000..25d6b9eed3 --- /dev/null +++ b/src/vendorcode/cavium/bdk/libbdk-os/bdk-init.c @@ -0,0 +1,561 @@ +/***********************license start*********************************** +* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights +* reserved. +* +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are +* met: +* +* * Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* +* * Redistributions in binary form must reproduce the above +* copyright notice, this list of conditions and the following +* disclaimer in the documentation and/or other materials provided +* with the distribution. +* +* * Neither the name of Cavium Inc. nor the names of +* its contributors may be used to endorse or promote products +* derived from this software without specific prior written +* permission. +* +* This Software, including technical data, may be subject to U.S. export +* control laws, including the U.S. Export Administration Act and its +* associated regulations, and may be subject to export or import +* regulations in other countries. +* +* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS" +* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR +* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT +* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY +* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT +* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES +* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR +* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT, +* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK +* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU. +***********************license end**************************************/ +#include <bdk.h> +#include <stdio.h> +#include <unistd.h> +#include "libbdk-arch/bdk-csrs-ap.h" +#include "libbdk-arch/bdk-csrs-l2c.h" +#include "libbdk-arch/bdk-csrs-l2c_tad.h" +#include "libbdk-arch/bdk-csrs-mio_boot.h" +#include "libbdk-arch/bdk-csrs-rom.h" +#include "libbdk-arch/bdk-csrs-uaa.h" + +uint64_t __bdk_init_reg_x0; /* The contents of X0 when this image started */ +uint64_t __bdk_init_reg_x1; /* The contents of X1 when this image started */ +uint64_t __bdk_init_reg_pc; /* The contents of PC when this image started */ +static int64_t __bdk_alive_coremask[BDK_NUMA_MAX_NODES]; + +/** + * Set the baud rate on a UART + * + * @param uart uart to set + * @param baudrate Baud rate (9600, 19200, 115200, etc) + * @param use_flow_control + * Non zero if hardware flow control should be enabled + */ +void bdk_set_baudrate(bdk_node_t node, int uart, int baudrate, int use_flow_control) +{ + /* 1.2.1 Initialization Sequence (Power-On/Hard/Cold Reset) */ + /* 1. Wait for IOI reset (srst_n) to deassert. */ + /* 2. Assert all resets: + a. UAA reset: UCTL_CTL[UAA_RST] = 1 + b. UCTL reset: UCTL_CTL[UCTL_RST] = 1 */ + BDK_CSR_MODIFY(c, node, BDK_UAAX_UCTL_CTL(uart), + c.s.uaa_rst = 1; + c.s.uctl_rst = 1); + + /* 3. Configure the HCLK: + a. Reset the clock dividers: UCTL_CTL[H_CLKDIV_RST] = 1. + b. Select the HCLK frequency + i. UCTL_CTL[H_CLKDIV] = desired value, + ii. UCTL_CTL[H_CLKDIV_EN] = 1 to enable the HCLK. + iii. Readback UCTL_CTL to ensure the values take effect. + c. Deassert the HCLK clock divider reset: UCTL_CTL[H_CLKDIV_RST] = 0. */ + BDK_CSR_MODIFY(c, node, BDK_UAAX_UCTL_CTL(uart), + c.s.h_clkdiv_sel = 3; /* Run at SCLK / 6, matches emulator */ + c.s.h_clk_byp_sel = 0; + c.s.h_clk_en = 1); + BDK_CSR_MODIFY(c, node, BDK_UAAX_UCTL_CTL(uart), + c.s.h_clkdiv_rst = 0); + + /* 4. Wait 20 HCLK cycles from step 3 for HCLK to start and async fifo + to properly reset. */ + bdk_wait(200); /* Overkill */ + + /* 5. Deassert UCTL and UAHC resets: + a. UCTL_CTL[UCTL_RST] = 0 + b. Wait 10 HCLK cycles. + c. UCTL_CTL[UAHC_RST] = 0 + d. You will have to wait 10 HCLK cycles before accessing any + HCLK-only registers. */ + BDK_CSR_MODIFY(c, node, BDK_UAAX_UCTL_CTL(uart), c.s.uctl_rst = 0); + bdk_wait(100); /* Overkill */ + BDK_CSR_MODIFY(c, node, BDK_UAAX_UCTL_CTL(uart), c.s.uaa_rst = 0); + bdk_wait(100); /* Overkill */ + + /* 6. Enable conditional SCLK of UCTL by writing UCTL_CTL[CSCLK_EN] = 1. */ + BDK_CSR_MODIFY(c, node, BDK_UAAX_UCTL_CTL(uart), c.s.csclk_en = 1); + + /* 7. Initialize the integer and fractional baud rate divider registers + UARTIBRD and UARTFBRD as follows: + a. Baud Rate Divisor = UARTCLK/(16xBaud Rate) = BRDI + BRDF + b. The fractional register BRDF, m is calculated as integer(BRDF x 64 + 0.5) + Example calculation: + If the required baud rate is 230400 and hclk = 4MHz then: + Baud Rate Divisor = (4x10^6)/(16x230400) = 1.085 + This means BRDI = 1 and BRDF = 0.085. + Therefore, fractional part, BRDF = integer((0.085x64)+0.5) = 5 + Generated baud rate divider = 1+5/64 = 1.078 */ + uint64_t divisor_x_64 = bdk_clock_get_rate(node, BDK_CLOCK_SCLK) / (baudrate * 16 * 6 / 64); + if (bdk_is_platform(BDK_PLATFORM_EMULATOR)) + { + /* The hardware emulator currently fixes the uart at a fixed rate */ + divisor_x_64 = 64; + } + BDK_CSR_MODIFY(c, node, BDK_UAAX_IBRD(uart), + c.s.baud_divint = divisor_x_64 >> 6); + BDK_CSR_MODIFY(c, node, BDK_UAAX_FBRD(uart), + c.s.baud_divfrac = divisor_x_64 & 0x3f); + + /* 8. Program the line control register UAA(0..1)_LCR_H and the control + register UAA(0..1)_CR */ + BDK_CSR_MODIFY(c, node, BDK_UAAX_LCR_H(uart), + c.s.sps = 0; /* No parity */ + c.s.wlen = 3; /* 8 bits */ + c.s.fen = 1; /* FIFOs enabled */ + c.s.stp2 = 0; /* Use one stop bit, not two */ + c.s.eps = 0; /* No parity */ + c.s.pen = 0; /* No parity */ + c.s.brk = 0); /* Don't send a break */ + BDK_CSR_MODIFY(c, node, BDK_UAAX_CR(uart), + c.s.ctsen = use_flow_control; + c.s.rtsen = use_flow_control; + c.s.out1 = 1; /* Drive data carrier detect */ + c.s.rts = 0; /* Don't override RTS */ + c.s.dtr = 0; /* Don't override DTR */ + c.s.rxe = 1; /* Enable receive */ + c.s.txe = 1; /* Enable transmit */ + c.s.lbe = 0; /* Disable loopback */ + c.s.uarten = 1); /* Enable uart */ +} + +/** + * First C code run when a BDK application starts. It is called by bdk-start.S. + * + * @param image_crc A CRC32 of the entire image before any variables might have been updated by C. + * This should match the CRC32 in the image header. + * @param reg_x0 The contents of the X0 register when the image started. In images loaded after + * the boot stub, this contains a "environment" string containing "BOARD=xxx". The + * use of this is deprecated as it has been replaced with a expandable device tree + * in X1. + * @param reg_x1 The contents of the X1 register when the image started. For all images after the + * boot stub, this contains a physical address of a device tree in memory. This + * should be used by all images to identify and configure the board we are running + * on. + * @param reg_pc This is the PC the code started at before relocation. This is useful for + * the first stage to determine if it from trusted or non-trusted code. + */ +void __bdk_init(uint32_t image_crc, uint64_t reg_x0, uint64_t reg_x1, uint64_t reg_pc) __attribute((noreturn)); +void __bdk_init(uint32_t image_crc, uint64_t reg_x0, uint64_t reg_x1, uint64_t reg_pc) +{ + extern void __bdk_exception_current_el_sync_sp0(); + BDK_MSR(VBAR_EL3, __bdk_exception_current_el_sync_sp0); + BDK_MSR(VBAR_EL2, __bdk_exception_current_el_sync_sp0); + BDK_MSR(VBAR_EL1, __bdk_exception_current_el_sync_sp0); + + /* Use Cavium specific function to change memory to normal instead of + device attributes. DCVA47=1 makes unmapped addresses behave as + non-shared memory (not inner or outer shared in ARM speak) */ + bdk_ap_cvmmemctl0_el1_t cvmmemctl0_el1; + BDK_MRS(s3_0_c11_c0_4, cvmmemctl0_el1.u); + cvmmemctl0_el1.s.dcva47 = 1; + BDK_MSR(s3_0_c11_c0_4, cvmmemctl0_el1.u); + + + /* Setup running with no mmu */ + bdk_ap_sctlr_el3_t sctlr_el3; + BDK_MRS(SCTLR_EL3, sctlr_el3.u); + sctlr_el3.s.wxn = 0; /* No write perm changes */ + sctlr_el3.s.i = 1; /* Enable Icache */ + sctlr_el3.s.sa = 1; /* Enable stack alignment checking */ + sctlr_el3.s.cc = 1; /* Enable Dcache */ + sctlr_el3.s.aa = 0; /* Allow unaligned accesses */ + sctlr_el3.s.m = 0; /* Disable MMU */ + BDK_MSR(SCTLR_EL3, sctlr_el3.u); + + bdk_node_t node = bdk_numa_local(); + bdk_numa_set_exists(node); + + /* Default color, Reset scroll region and goto bottom */ + static const char BANNER_1[] = "\33[0m\33[1;r\33[100;1H" + "\n\n\nCavium SOC\n"; + static const char BANNER_2[] = "Locking L2 cache\n"; + static const char BANNER_CRC_RIGHT[] = "PASS: CRC32 verification\n"; + static const char BANNER_CRC_WRONG[] = "FAIL: CRC32 verification\n"; + static const char BANNER_3[] = "Transferring to thread scheduler\n"; + + BDK_MSR(TPIDR_EL3, 0); + + if (bdk_is_boot_core()) + { + /* Initialize the platform */ + __bdk_platform_init(); + if (!bdk_is_platform(BDK_PLATFORM_EMULATOR) && CAVIUM_IS_MODEL(CAVIUM_CN88XX)) + { + BDK_CSR_INIT(l2c_oci_ctl, node, BDK_L2C_OCI_CTL); + if (l2c_oci_ctl.s.iofrcl) + { + /* CCPI isn't being used, so don't reset if the links change */ + BDK_CSR_WRITE(node, BDK_RST_OCX, 0); + BDK_CSR_READ(node, BDK_RST_OCX); + /* Force CCPI links down so they aren't trying to run while + we're configuring the QLMs */ + __bdk_init_ccpi_early(1); + } + } + + /* AP-23192: The DAP in pass 1.0 has an issue where its state isn't cleared for + cores in reset. Put the DAPs in reset as their associated cores are + also in reset */ + if (!bdk_is_platform(BDK_PLATFORM_EMULATOR) && CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_0)) + BDK_CSR_WRITE(node, BDK_RST_DBG_RESET, BDK_CSR_READ(node, BDK_RST_PP_RESET)); + + /* Enable the timer */ + BDK_MSR(CNTFRQ_EL0, BDK_GTI_RATE); /* Needed for Asim */ + bdk_clock_setup(node); + + /* Only setup the uarts if they haven't been already setup */ + BDK_CSR_INIT(uctl_ctl0, node, BDK_UAAX_UCTL_CTL(0)); + if (!uctl_ctl0.s.h_clk_en) + bdk_set_baudrate(node, 0, BDK_UART_BAUDRATE, 0); + BDK_CSR_INIT(uctl_ctl1, node, BDK_UAAX_UCTL_CTL(1)); + if (!uctl_ctl1.s.h_clk_en) + bdk_set_baudrate(node, 1, BDK_UART_BAUDRATE, 0); + + __bdk_fs_init_early(); + if (BDK_SHOW_BOOT_BANNERS) + write(1, BANNER_1, sizeof(BANNER_1)-1); + + /* Only lock L2 if DDR3 isn't initialized */ + if (bdk_is_platform(BDK_PLATFORM_HW) && !__bdk_is_dram_enabled(node)) + { + if (BDK_TRACE_ENABLE_INIT) + write(1, BANNER_2, sizeof(BANNER_2)-1); + /* Lock the entire cache for chips with less than 4MB of + L2/LLC. Larger chips can use the 1/4 of the cache to + speed up DRAM init and testing */ + int lock_size = bdk_l2c_get_cache_size_bytes(node); + if (lock_size >= (4 << 20)) + lock_size = lock_size * 3 / 4; + bdk_l2c_lock_mem_region(node, bdk_numa_get_address(node, 0), lock_size); + /* The locked region isn't considered dirty by L2. Do read + read/write of each cache line to force each to be dirty. This + is needed across the whole line to make sure the L2 dirty bits + are all up to date */ + volatile uint64_t *ptr = bdk_phys_to_ptr(bdk_numa_get_address(node, 8)); + /* The above pointer got address 8 to avoid NULL pointer checking + in bdk_phys_to_ptr(). Correct it here */ + ptr--; + uint64_t *end = bdk_phys_to_ptr(bdk_numa_get_address(node, bdk_l2c_get_cache_size_bytes(node))); + while (ptr < end) + { + *ptr = *ptr; + ptr++; + } + /* The above locking will cause L2 to load zeros without DRAM setup. + This will cause L2C_TADX_INT[rddislmc], which we suppress below */ + BDK_CSR_DEFINE(l2c_tadx_int, BDK_L2C_TADX_INT_W1C(0)); + l2c_tadx_int.u = 0; + l2c_tadx_int.s.wrdislmc = 1; + l2c_tadx_int.s.rddislmc = 1; + l2c_tadx_int.s.rdnxm = 1; + + BDK_CSR_WRITE(node, BDK_L2C_TADX_INT_W1C(0), l2c_tadx_int.u); + if (CAVIUM_IS_MODEL(CAVIUM_CN88XX) || CAVIUM_IS_MODEL(CAVIUM_CN83XX)) + { + BDK_CSR_WRITE(node, BDK_L2C_TADX_INT_W1C(1), l2c_tadx_int.u); + BDK_CSR_WRITE(node, BDK_L2C_TADX_INT_W1C(2), l2c_tadx_int.u); + BDK_CSR_WRITE(node, BDK_L2C_TADX_INT_W1C(3), l2c_tadx_int.u); + } + if (CAVIUM_IS_MODEL(CAVIUM_CN88XX)) + { + BDK_CSR_WRITE(node, BDK_L2C_TADX_INT_W1C(4), l2c_tadx_int.u); + BDK_CSR_WRITE(node, BDK_L2C_TADX_INT_W1C(5), l2c_tadx_int.u); + BDK_CSR_WRITE(node, BDK_L2C_TADX_INT_W1C(6), l2c_tadx_int.u); + BDK_CSR_WRITE(node, BDK_L2C_TADX_INT_W1C(7), l2c_tadx_int.u); + } + } + + /* Validate the image CRC */ + extern void _start(); + uint32_t *ptr_crc32 = (uint32_t *)(_start + 16); + uint32_t correct_crc = bdk_le32_to_cpu(*ptr_crc32); + if (correct_crc == image_crc) + write(1, BANNER_CRC_RIGHT, sizeof(BANNER_CRC_RIGHT) - 1); + else + write(1, BANNER_CRC_WRONG, sizeof(BANNER_CRC_WRONG) - 1); + + if (BDK_TRACE_ENABLE_INIT) + write(1, BANNER_3, sizeof(BANNER_3)-1); + bdk_thread_initialize(); + } + + /* Enable the core timer */ + BDK_MSR(CNTFRQ_EL0, BDK_GTI_RATE); /* Needed for Asim */ + bdk_ap_cntps_ctl_el1_t cntps_ctl_el1; + cntps_ctl_el1.u = 0; + cntps_ctl_el1.s.imask = 1; + cntps_ctl_el1.s.enable = 1; + BDK_MSR(CNTPS_CTL_EL1, cntps_ctl_el1.u); + + /* Setup an exception stack in case we crash */ + int EX_STACK_SIZE = 16384; + void *exception_stack = malloc(EX_STACK_SIZE); + extern void __bdk_init_exception_stack(void *ptr); + __bdk_init_exception_stack(exception_stack + EX_STACK_SIZE); + + bdk_atomic_add64(&__bdk_alive_coremask[node], bdk_core_to_mask()); + + /* Record our input registers for use later */ + __bdk_init_reg_x0 = reg_x0; + __bdk_init_reg_x1 = reg_x1; + __bdk_init_reg_pc = reg_pc; + bdk_thread_first(__bdk_init_main, 0, NULL, 0); +} + +/** + * Call this function to take secondary cores out of reset and have + * them start running threads + * + * @param node Node to use in a Numa setup. Can be an exact ID or a special + * value. + * @param coremask Cores to start. Zero is a shortcut for all. + * + * @return Zero on success, negative on failure. + */ +int bdk_init_cores(bdk_node_t node, uint64_t coremask) +{ + extern void __bdk_start_cores(); + if (CAVIUM_IS_MODEL(CAVIUM_CN8XXX)) + { + /* Write the address of the main entry point */ + BDK_TRACE(INIT, "N%d: Setting address for boot jump\n", node); + BDK_CSR_WRITE(node, BDK_MIO_BOOT_AP_JUMP, (uint64_t)__bdk_start_cores); + } + else + { + BDK_TRACE(INIT, "N%d: Setting ROM boot code\n", node); + /* Assembly for ROM memory: + d508711f ic ialluis + d503201f nop + 58000040 ldr x0, 328 <branch_addr> + d61f0000 br x0 + branch_addr: + Memory is little endain, so 64 bit constants have the first + instruction in the low word */ + BDK_CSR_WRITE(node, BDK_ROM_MEMX(0), 0xd503201fd508711f); + BDK_CSR_WRITE(node, BDK_ROM_MEMX(1), 0xd61f000058000040); + BDK_CSR_WRITE(node, BDK_ROM_MEMX(2), (uint64_t)__bdk_start_cores); + } + + /* Choose all cores by default */ + if (coremask == 0) + coremask = -1; + + /* Limit to the cores that aren't already running */ + coremask &= ~__bdk_alive_coremask[node]; + + /* Limit to the cores that are specified in configuration menu */ + uint64_t config_coremask = bdk_config_get_int(BDK_CONFIG_COREMASK); + if (config_coremask) + coremask &= config_coremask; + + /* Limit to the cores that exist */ + coremask &= (1ull<<bdk_get_num_cores(node)) - 1; + + uint64_t reset = BDK_CSR_READ(node, BDK_RST_PP_RESET); + BDK_TRACE(INIT, "N%d: Cores currently in reset: 0x%lx\n", node, reset); + uint64_t need_reset_off = reset & coremask; + if (need_reset_off) + { + BDK_TRACE(INIT, "N%d: Taking cores out of reset (0x%lx)\n", node, need_reset_off); + BDK_CSR_WRITE(node, BDK_RST_PP_RESET, reset & ~need_reset_off); + /* Wait for cores to finish coming out of reset */ + bdk_wait_usec(1); + if (BDK_CSR_WAIT_FOR_FIELD(node, BDK_RST_PP_PENDING, pend, ==, 0, 100000)) + bdk_error("Timeout wating for reset pending to clear"); + /* AP-23192: The DAP in pass 1.0 has an issue where its state isn't cleared for + cores in reset. Put the DAPs in reset as their associated cores are + also in reset */ + if (!bdk_is_platform(BDK_PLATFORM_EMULATOR) && CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_0)) + BDK_CSR_WRITE(node, BDK_RST_DBG_RESET, reset & ~need_reset_off); + } + + BDK_TRACE(INIT, "N%d: Wait up to 1s for the cores to boot\n", node); + uint64_t timeout = bdk_clock_get_rate(bdk_numa_local(), BDK_CLOCK_TIME) + bdk_clock_get_count(BDK_CLOCK_TIME); + while ((bdk_clock_get_count(BDK_CLOCK_TIME) < timeout) && ((bdk_atomic_get64(&__bdk_alive_coremask[node]) & coremask) != coremask)) + { + /* Tight spin, no thread schedules */ + } + + if ((bdk_atomic_get64(&__bdk_alive_coremask[node]) & coremask) != coremask) + { + bdk_error("Node %d: Some cores failed to start. Alive mask 0x%lx, requested 0x%lx\n", + node, __bdk_alive_coremask[node], coremask); + return -1; + } + BDK_TRACE(INIT, "N%d: All cores booted\n", node); + return 0; +} + +/** + * Put cores back in reset and power them down + * + * @param node Node to update + * @param coremask Each bit will be a core put in reset. Cores already in reset are unaffected + * + * @return Zero on success, negative on failure + */ +int bdk_reset_cores(bdk_node_t node, uint64_t coremask) +{ + extern void __bdk_reset_thread(int arg1, void *arg2); + + /* Limit to the cores that exist */ + coremask &= (1ull<<bdk_get_num_cores(node)) - 1; + + /* Update which cores are in reset */ + uint64_t reset = BDK_CSR_READ(node, BDK_RST_PP_RESET); + BDK_TRACE(INIT, "N%d: Cores currently in reset: 0x%lx\n", node, reset); + coremask &= ~reset; + BDK_TRACE(INIT, "N%d: Cores to put into reset: 0x%lx\n", node, coremask); + + /* Check if everything is already done */ + if (coremask == 0) + return 0; + + int num_cores = bdk_get_num_cores(node); + for (int core = 0; core < num_cores; core++) + { + uint64_t my_mask = 1ull << core; + /* Skip cores not in mask */ + if ((coremask & my_mask) == 0) + continue; + BDK_TRACE(INIT, "N%d: Telling core %d to go into reset\n", node, core); + if (bdk_thread_create(node, my_mask, __bdk_reset_thread, 0, NULL, 0)) + { + bdk_error("Failed to create thread for putting core in reset"); + continue; + } + /* Clear the core in the alive mask */ + bdk_atomic_fetch_and_bclr64_nosync((uint64_t*)&__bdk_alive_coremask[node], my_mask); + } + + BDK_TRACE(INIT, "N%d: Waiting for all reset bits to be set\n", node); + uint64_t timeout = bdk_clock_get_rate(bdk_numa_local(), BDK_CLOCK_TIME) + bdk_clock_get_count(BDK_CLOCK_TIME); + while (bdk_clock_get_count(BDK_CLOCK_TIME) < timeout) + { + reset = BDK_CSR_READ(node, BDK_RST_PP_RESET); + if ((reset & coremask) == coremask) + break; + bdk_thread_yield(); + } + /* AP-23192: The DAP in pass 1.0 has an issue where its state isn't cleared for + cores in reset. Put the DAPs in reset as their associated cores are + also in reset */ + if (!bdk_is_platform(BDK_PLATFORM_EMULATOR) && CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_0)) + BDK_CSR_WRITE(node, BDK_RST_DBG_RESET, BDK_CSR_READ(node, BDK_RST_PP_RESET)); + + BDK_TRACE(INIT, "N%d: Cores now in reset: 0x%lx\n", node, reset); + + return ((reset & coremask) == coremask) ? 0 : -1; +} + +/** + * Call this function to take secondary nodes and cores out of + * reset and have them start running threads + * + * @param skip_cores If non-zero, cores are not started. Only the nodes are setup + * @param ccpi_sw_gbaud + * If CCPI is in software mode, this is the speed the CCPI QLMs will be configured + * for + * + * @return Zero on success, negative on failure. + */ +int bdk_init_nodes(int skip_cores, int ccpi_sw_gbaud) +{ + int result = 0; + int do_oci_init = (__bdk_init_ccpi_links != NULL); + + /* Only init OCI/CCPI on chips that support it */ + do_oci_init &= CAVIUM_IS_MODEL(CAVIUM_CN88XX); + + /* Check that the BDK config says multi-node is enabled */ + if (bdk_config_get_int(BDK_CONFIG_MULTI_NODE) == 0) + do_oci_init = 0; + + /* Simulation under Asim is a special case. Multi-node is simulaoted, but + not the details of the low level link */ + if (do_oci_init && bdk_is_platform(BDK_PLATFORM_ASIM)) + { + bdk_numa_set_exists(0); + bdk_numa_set_exists(1); + /* Skip the rest in simulation */ + do_oci_init = 0; + } + + if (do_oci_init) + { + if (__bdk_init_ccpi_links(ccpi_sw_gbaud) == 0) + { + /* Don't run node init if L2C_OCI_CTL shows that it has already + been done */ + BDK_CSR_INIT(l2c_oci_ctl, bdk_numa_local(), BDK_L2C_OCI_CTL); + if (l2c_oci_ctl.s.enaoci == 0) + result |= __bdk_init_ccpi_multinode(); + } + } + + /* Start cores on all node unless it was disabled */ + if (!skip_cores) + { + for (bdk_node_t node=0; node<BDK_NUMA_MAX_NODES; node++) + { + if (bdk_numa_exists(node)) + result |= bdk_init_cores(node, 0); + } + } + return result; +} + +/** + * Get the coremask of the cores actively running the BDK. Doesn't count cores + * that aren't booted. + * + * @param node Node to coremask the count for + * + * @return 64bit bitmask + */ +uint64_t bdk_get_running_coremask(bdk_node_t node) +{ + return __bdk_alive_coremask[node]; +} + +/** + * Return the number of cores actively running in the BDK for the given node. + * Not an inline so it can be called from LUA. + * + * @param node Node to get the core count for + * + * @return Number of cores running. Doesn't count cores that aren't booted + */ +int bdk_get_num_running_cores(bdk_node_t node) +{ + return __builtin_popcountl(bdk_get_running_coremask(node)); +} + diff --git a/src/vendorcode/cavium/bdk/libbdk-os/bdk-thread.c b/src/vendorcode/cavium/bdk/libbdk-os/bdk-thread.c new file mode 100644 index 0000000000..df1d02864b --- /dev/null +++ b/src/vendorcode/cavium/bdk/libbdk-os/bdk-thread.c @@ -0,0 +1,384 @@ +/***********************license start*********************************** +* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights +* reserved. +* +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are +* met: +* +* * Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* +* * Redistributions in binary form must reproduce the above +* copyright notice, this list of conditions and the following +* disclaimer in the documentation and/or other materials provided +* with the distribution. +* +* * Neither the name of Cavium Inc. nor the names of +* its contributors may be used to endorse or promote products +* derived from this software without specific prior written +* permission. +* +* This Software, including technical data, may be subject to U.S. export +* control laws, including the U.S. Export Administration Act and its +* associated regulations, and may be subject to export or import +* regulations in other countries. +* +* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS" +* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR +* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT +* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY +* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT +* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES +* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR +* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT, +* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK +* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU. +***********************license end**************************************/ +#include <bdk.h> +#include <stdio.h> +#include <malloc.h> + +#define STACK_CANARY 0x0BADBADBADBADBADull + +typedef struct bdk_thread +{ + struct bdk_thread *next; + uint64_t coremask; + uint64_t gpr[32]; /* Reg 31 is SP */ + struct _reent lib_state; + uint64_t stack_canary; + uint64_t stack[0]; +} bdk_thread_t; + +typedef struct +{ + bdk_thread_t* head; + bdk_thread_t* tail; + bdk_spinlock_t lock; + int64_t __padding1[16-3]; /* Stats in different cache line for speed */ + int64_t stat_num_threads; + int64_t stat_no_schedulable_threads; + int64_t stat_next_calls; + int64_t stat_next_walks; + int64_t __padding2[16-4]; +} bdk_thread_node_t; + +static bdk_thread_node_t bdk_thread_node[BDK_NUMA_MAX_NODES]; + +extern void __bdk_thread_switch(bdk_thread_t* next_context, int delete_old); + +/** + * Main thread body for all threads + * + * @param func User function to call + * @param arg0 First argument to the user function + * @param arg1 Second argument to the user function + */ +static void __bdk_thread_body(bdk_thread_func_t func, int arg0, void *arg1) +{ + func(arg0, arg1); + bdk_thread_destroy(); +} + + +/** + * Initialize the BDK thread library + * + * @return Zero on success, negative on failure + */ +int bdk_thread_initialize(void) +{ + bdk_zero_memory(bdk_thread_node, sizeof(bdk_thread_node)); + _REENT_INIT_PTR(&__bdk_thread_global_reent); + return 0; +} + +static bdk_thread_t *__bdk_thread_next(void) +{ + bdk_thread_node_t *t_node = &bdk_thread_node[bdk_numa_local()]; + uint64_t coremask = bdk_core_to_mask(); + + bdk_atomic_add64_nosync(&t_node->stat_next_calls, 1); + bdk_thread_t *prev = NULL; + bdk_thread_t *next = t_node->head; + int walks = 0; + while (next && !(next->coremask & coremask)) + { + prev = next; + next = next->next; + walks++; + } + if (walks) + bdk_atomic_add64_nosync(&t_node->stat_next_walks, walks); + + if (next) + { + if (t_node->tail == next) + t_node->tail = prev; + if (prev) + prev->next = next->next; + else + t_node->head = next->next; + next->next = NULL; + } + else + bdk_atomic_add64_nosync(&t_node->stat_no_schedulable_threads, 1); + + return next; +} + +/** + * Yield the current thread and run a new one + */ +void bdk_thread_yield(void) +{ + if (BDK_DBG_MAGIC_ENABLE && (bdk_numa_local() == bdk_numa_master())) + bdk_dbg_check_magic(); + bdk_thread_node_t *t_node = &bdk_thread_node[bdk_numa_local()]; + bdk_thread_t *current; + BDK_MRS_NV(TPIDR_EL3, current); + + /* Yield can be called without a thread context during core init. The + cores call bdk_wait_usec(), which yields. In this case yielding + does nothing */ + if (bdk_unlikely(!current)) + return; + + if (bdk_unlikely(current->stack_canary != STACK_CANARY)) + bdk_fatal("bdk_thread_yield() detected a stack overflow\n"); + + if (CAVIUM_IS_MODEL(CAVIUM_CN83XX)) + bdk_sso_process_work(); + + if (t_node->head == NULL) + return; + + bdk_spinlock_lock(&t_node->lock); + + /* Find the first thread that can run on this core */ + bdk_thread_t *next = __bdk_thread_next(); + + /* If next is NULL then there are no other threads ready to run and we + will continue without doing anything */ + if (next) + { + __bdk_thread_switch(next, 0); + /* Unlock performed in __bdk_thread_switch_complete */ + return; + } + bdk_spinlock_unlock(&t_node->lock); +} + + +/** + * Create a new thread and return it. The thread will not be scheduled + * as it isn't put in the thread list. + * + * @param coremask Mask of cores the thread can run on. Each set bit is an allowed + * core. Zero and -1 are both shortcuts for all cores. + * @param func Function to run as a thread + * @param arg0 First argument to the function + * @param arg1 Second argument to the function + * @param stack_size Stack size for the new thread. Set to zero for the system default. + * + * @return Thread or NULL on failure + */ +static void *__bdk_thread_create(uint64_t coremask, bdk_thread_func_t func, int arg0, void *arg1, int stack_size) +{ + bdk_thread_t *thread; + if (!stack_size) + stack_size = BDK_THREAD_DEFAULT_STACK_SIZE; + + thread = memalign(16, sizeof(bdk_thread_t) + stack_size); + if (thread == NULL) + { + bdk_error("Unable to allocate memory for new thread\n"); + return NULL; + } + memset(thread, 0, sizeof(bdk_thread_t) + stack_size); + if (coremask == 0) + coremask = -1; + thread->coremask = coremask; + thread->gpr[0] = (uint64_t)func; /* x0 = Argument 0 to __bdk_thread_body */ + thread->gpr[1] = arg0; /* x1 = Argument 1 to __bdk_thread_body */ + thread->gpr[2] = (uint64_t)arg1; /* x2 = Argument 2 to __bdk_thread_body */ + thread->gpr[29] = 0; /* x29 = Frame pointer */ + thread->gpr[30] = (uint64_t)__bdk_thread_body; /* x30 = Link register */ + thread->gpr[31] = (uint64_t)thread->stack + stack_size; /* x31 = Stack pointer */ + if (thread->gpr[31] & 0xf) + bdk_fatal("Stack not aligned 0x%lx\n", thread->gpr[31]); + _REENT_INIT_PTR(&thread->lib_state); + extern void __sinit(struct _reent *); + __sinit(&thread->lib_state); + thread->stack_canary = STACK_CANARY; + thread->next = NULL; + return thread; +} + + +/** + * Create a new thread. The thread may be scheduled to any of the + * cores supplied in the coremask. Note that a single thread is + * created and may only run on one core at a time. The thread may + * not start executing until the next yield call if all cores in + * the coremask are currently busy. + * + * @param node Node to use in a Numa setup. Can be an exact ID or a + * special value. + * @param coremask Mask of cores the thread can run on. Each set bit is an allowed + * core. Zero and -1 are both shortcuts for all cores. + * @param func Function to run as a thread + * @param arg0 First argument to the function + * @param arg1 Second argument to the function + * @param stack_size Stack size for the new thread. Set to zero for the system default. + * + * @return Zero on success, negative on failure + */ +int bdk_thread_create(bdk_node_t node, uint64_t coremask, bdk_thread_func_t func, int arg0, void *arg1, int stack_size) +{ + bdk_thread_node_t *t_node = &bdk_thread_node[node]; + bdk_thread_t *thread = __bdk_thread_create(coremask, func, arg0, arg1, stack_size); + if (thread == NULL) + return -1; + + bdk_atomic_add64_nosync(&t_node->stat_num_threads, 1); + bdk_spinlock_lock(&t_node->lock); + if (t_node->tail) + t_node->tail->next = thread; + else + t_node->head = thread; + t_node->tail = thread; + bdk_spinlock_unlock(&t_node->lock); + BDK_SEV; + return 0; +} + + +/** + * Destroy the currently running thread. This never returns. + */ +void bdk_thread_destroy(void) +{ + bdk_thread_node_t *t_node = &bdk_thread_node[bdk_numa_local()]; + bdk_thread_t *current; + BDK_MRS_NV(TPIDR_EL3, current); + if (bdk_unlikely(!current)) + bdk_fatal("bdk_thread_destroy() called without thread context\n"); + if (bdk_unlikely(current->stack_canary != STACK_CANARY)) + bdk_fatal("bdk_thread_destroy() detected a stack overflow\n"); + + fflush(NULL); + bdk_atomic_add64_nosync(&t_node->stat_num_threads, -1); + + while (1) + { + if (BDK_DBG_MAGIC_ENABLE && (bdk_numa_local() == bdk_numa_master())) + bdk_dbg_check_magic(); + if (t_node->head) + { + bdk_spinlock_lock(&t_node->lock); + /* Find the first thread that can run on this core */ + bdk_thread_t *next = __bdk_thread_next(); + + /* If next is NULL then there are no other threads ready to run and we + will continue without doing anything */ + if (next) + { + __bdk_thread_switch(next, 1); + bdk_fatal("bdk_thread_destroy() should never get here\n"); + } + bdk_spinlock_unlock(&t_node->lock); + } + if (CAVIUM_IS_MODEL(CAVIUM_CN83XX)) + bdk_sso_process_work(); + BDK_WFE; + } +} + +struct _reent __bdk_thread_global_reent; +struct _reent *__bdk_thread_getreent(void) +{ + bdk_thread_t *current; + BDK_MRS_NV(TPIDR_EL3, current); + if (current) + return ¤t->lib_state; + else + return &__bdk_thread_global_reent; +} + +void __bdk_thread_switch_complete(bdk_thread_t* old_context, int delete_old) +{ + bdk_thread_node_t *t_node = &bdk_thread_node[bdk_numa_local()]; + if (bdk_unlikely(delete_old)) + { + bdk_spinlock_unlock(&t_node->lock); + free(old_context); + } + else + { + if (bdk_likely(old_context)) + { + if (t_node->tail) + t_node->tail->next = old_context; + else + t_node->head = old_context; + t_node->tail = old_context; + } + bdk_spinlock_unlock(&t_node->lock); + if (bdk_likely(old_context)) + BDK_SEV; + } +} + + +/** + * Called to create the initial thread for a CPU. Must be called + * once for each CPU. + * + * @param func Function to run as new thread. It is guaranteed that this will + * be the next thread run by the core. + * @param arg0 First thread argument + * @param arg1 Second thread argument + * @param stack_size Initial stack size, or zero for the default + */ +void bdk_thread_first(bdk_thread_func_t func, int arg0, void *arg1, int stack_size) +{ + bdk_thread_node_t *t_node = &bdk_thread_node[bdk_numa_local()]; + void *thread = __bdk_thread_create(bdk_core_to_mask(), func, arg0, arg1, stack_size); + if (thread) + { + bdk_atomic_add64_nosync(&t_node->stat_num_threads, 1); + bdk_spinlock_lock(&t_node->lock); + __bdk_thread_switch(thread, 0); + } + bdk_fatal("Create of __bdk_init_main thread failed\n"); +} + +/** + * Display statistics about the number of threads and scheduling + */ +void bdk_thread_show_stats() +{ + for (bdk_node_t node = BDK_NODE_0; node < BDK_NUMA_MAX_NODES; node++) + { + if (!bdk_numa_exists(node)) + continue; + bdk_thread_node_t *t_node = &bdk_thread_node[node]; + printf("Node %d\n", node); + printf(" Active threads: %ld\n", t_node->stat_num_threads); + printf(" Schedule checks: %ld\n", t_node->stat_next_calls); + int64_t div = t_node->stat_next_calls; + if (!div) + div = 1; + printf(" Average walk depth: %ld\n", + t_node->stat_next_walks / div); + printf(" Not switching: %ld (%ld%%)\n", + t_node->stat_no_schedulable_threads, + t_node->stat_no_schedulable_threads * 100 / div); + bdk_atomic_set64(&t_node->stat_next_calls, 0); + bdk_atomic_set64(&t_node->stat_next_walks, 0); + bdk_atomic_set64(&t_node->stat_no_schedulable_threads, 0); + } +} diff --git a/src/vendorcode/cavium/bdk/libbdk-trust/bdk-trust.c b/src/vendorcode/cavium/bdk/libbdk-trust/bdk-trust.c new file mode 100644 index 0000000000..27c3294479 --- /dev/null +++ b/src/vendorcode/cavium/bdk/libbdk-trust/bdk-trust.c @@ -0,0 +1,286 @@ +/***********************license start*********************************** +* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights +* reserved. +* +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are +* met: +* +* * Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* +* * Redistributions in binary form must reproduce the above +* copyright notice, this list of conditions and the following +* disclaimer in the documentation and/or other materials provided +* with the distribution. +* +* * Neither the name of Cavium Inc. nor the names of +* its contributors may be used to endorse or promote products +* derived from this software without specific prior written +* permission. +* +* This Software, including technical data, may be subject to U.S. export +* control laws, including the U.S. Export Administration Act and its +* associated regulations, and may be subject to export or import +* regulations in other countries. +* +* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS" +* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR +* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT +* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY +* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT +* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES +* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR +* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT, +* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK +* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU. +***********************license end**************************************/ +#include <bdk.h> +#include "libbdk-arch/bdk-csrs-fusf.h" +#include "libbdk-arch/bdk-csrs-rom.h" + +/* The define BDK_TRUST_HARD_BLOW_NV controls whether the BDK will + hard blow the secure NV counter on boot. This is needed for a + production system, but can be dangerous in a development + environment. The default value of 0 is to prevent bricking of + chips due to CSIB[NVCOUNT] mistakes. BDK_TRUST_HARD_BLOW_NV must + be changed to a 1 for production. The code below will display a + warning if BDK_TRUST_HARD_BLOW_NV=0 in a trusted boot to remind + you */ +#define BDK_TRUST_HARD_BLOW_NV 0 + +/* The CSIB used to boot will be stored here by bsk-start.S */ +union bdk_rom_csib_s __bdk_trust_csib __attribute__((section("init"))); +static bdk_trust_level_t __bdk_trust_level = BDK_TRUST_LEVEL_BROKEN; + +/** + * Update the fused secure NV counter to reflect the CSIB[NVCOUNT] value. In + * production systems, be sure to set BDK_TRUST_HARD_BLOW_NV=1. + */ +static void __bdk_program_nv_counter(void) +{ + int hw_nv = bdk_trust_get_nv_counter(); + int csib_nv = __bdk_trust_csib.s.nvcnt; + + if (!BDK_TRUST_HARD_BLOW_NV) + { + printf("\33[1m"); /* Bold */ + bdk_warn("\n"); + bdk_warn("********************************************************\n"); + bdk_warn("* Configured for soft blow of secure NV counter. This\n"); + bdk_warn("* build is not suitable for production trusted boot.\n"); + bdk_warn("********************************************************\n"); + bdk_warn("\n"); + printf("\33[0m"); /* Normal */ + } + + /* Check if the CSIB NV counter is less than the HW fused values. + This means the image is an old rollback. Refuse to run */ + if (csib_nv < hw_nv) + bdk_fatal("CSIB[NVCOUNT] is less than FUSF_CTL[ROM_T_CNT]. Image rollback not allowed\n"); + /* If the CSIB NV counter matches the HW fuses, everything is + good */ + if (csib_nv == hw_nv) + return; + /* CSIB NV counter is larger than the HW fuses. We must blow + fuses to move the hardware counter forward, protecting from + image rollback */ + if (BDK_TRUST_HARD_BLOW_NV) + { + BDK_TRACE(INIT, "Trust: Hard blow secure NV counter to %d\n", csib_nv); + uint64_t v = 1ull << BDK_FUSF_FUSE_NUM_E_ROM_T_CNTX(csib_nv - 1); + bdk_fuse_field_hard_blow(bdk_numa_master(), BDK_FUSF_FUSE_NUM_E_FUSF_LCK, v, 0); + } + else + { + BDK_TRACE(INIT, "Trust: Soft blow secure NV counter to %d\n", csib_nv); + bdk_fuse_field_soft_blow(bdk_numa_master(), BDK_FUSF_FUSE_NUM_E_ROM_T_CNTX(csib_nv - 1)); + } +} + +/** + * Called by boot stub (TBL1FW) to initialize the state of trust + */ +void __bdk_trust_init(void) +{ + extern uint64_t __bdk_init_reg_pc; /* The contents of PC when this image started */ + const bdk_node_t node = bdk_numa_local(); + volatile uint64_t *huk = bdk_phys_to_ptr(bdk_numa_get_address(node, BDK_FUSF_HUKX(0))); + + /* Non-trusted boot address */ + if (__bdk_init_reg_pc == 0x120000) + { + __bdk_trust_level = BDK_TRUST_LEVEL_NONE; + if (huk[0] | huk[1]) + { + BDK_TRACE(INIT, "Trust: Initial image, Non-trusted boot with HUK\n"); + goto fail_trust; + } + else + { + BDK_TRACE(INIT, "Trust: Initial image, Non-trusted boot without HUK\n"); + goto skip_trust; + } + } + + if (__bdk_init_reg_pc != 0x150000) + { + /* Not the first image */ + BDK_CSR_INIT(rst_boot, node, BDK_RST_BOOT); + if (!rst_boot.s.trusted_mode) + { + __bdk_trust_level = BDK_TRUST_LEVEL_NONE; + BDK_TRACE(INIT, "Trust: Secondary image, non-trusted boot\n"); + goto skip_trust; + } + int csibsize = 0; + const union bdk_rom_csib_s *csib = bdk_config_get_blob(&csibsize, BDK_CONFIG_TRUST_CSIB); + if (!csib) + { + __bdk_trust_level = BDK_TRUST_LEVEL_NONE; + BDK_TRACE(INIT, "Trust: Secondary image, non-trusted boot\n"); + goto skip_trust; + } + if (csibsize != sizeof(__bdk_trust_csib)) + { + BDK_TRACE(INIT, "Trust: Secondary image, Trusted boot with corrupt CSIB, trust broken\n"); + goto fail_trust; + } + /* Record our trust level */ + switch (csib->s.crypt) + { + case 0: + __bdk_trust_level = BDK_TRUST_LEVEL_SIGNED; + BDK_TRACE(INIT, "Trust: Secondary image, Trused boot, no encryption\n"); + goto success_trust; + case 1: + __bdk_trust_level = BDK_TRUST_LEVEL_SIGNED_SSK; + BDK_TRACE(INIT, "Trust: Secondary image, Trused boot, SSK encryption\n"); + goto success_trust; + case 2: + __bdk_trust_level = BDK_TRUST_LEVEL_SIGNED_BSSK; + BDK_TRACE(INIT, "Trust: Secondary image, Trused boot, BSSK encryption\n"); + goto success_trust; + default: + __bdk_trust_level = BDK_TRUST_LEVEL_BROKEN; + BDK_TRACE(INIT, "Trust: Secondary image, Trusted boot, Corrupt CSIB[crypt], trust broken\n"); + goto fail_trust; + } + } + + /* Copy the Root of Trust public key out of the CSIB */ + volatile uint64_t *rot_pub_key = bdk_key_alloc(node, 64); + if (!rot_pub_key) + { + __bdk_trust_level = BDK_TRUST_LEVEL_BROKEN; + BDK_TRACE(INIT, "Trust: Failed to allocate ROT memory, trust broken\n"); + goto fail_trust; + } + rot_pub_key[0] = bdk_le64_to_cpu(__bdk_trust_csib.s.rotpk0); + rot_pub_key[1] = bdk_le64_to_cpu(__bdk_trust_csib.s.rotpk1); + rot_pub_key[2] = bdk_le64_to_cpu(__bdk_trust_csib.s.rotpk2); + rot_pub_key[3] = bdk_le64_to_cpu(__bdk_trust_csib.s.rotpk3); + rot_pub_key[4] = bdk_le64_to_cpu(__bdk_trust_csib.s.rotpk4); + rot_pub_key[5] = bdk_le64_to_cpu(__bdk_trust_csib.s.rotpk5); + rot_pub_key[6] = bdk_le64_to_cpu(__bdk_trust_csib.s.rotpk6); + rot_pub_key[7] = bdk_le64_to_cpu(__bdk_trust_csib.s.rotpk7); + bdk_config_set_int(bdk_ptr_to_phys((void*)rot_pub_key), BDK_CONFIG_TRUST_ROT_ADDR); + BDK_TRACE(INIT, "Trust: ROT %016lx %016lx %016lx %016lx %016lx %016lx %016lx %016lx\n", + bdk_cpu_to_be64(rot_pub_key[0]), bdk_cpu_to_be64(rot_pub_key[1]), + bdk_cpu_to_be64(rot_pub_key[2]), bdk_cpu_to_be64(rot_pub_key[3]), + bdk_cpu_to_be64(rot_pub_key[4]), bdk_cpu_to_be64(rot_pub_key[5]), + bdk_cpu_to_be64(rot_pub_key[6]), bdk_cpu_to_be64(rot_pub_key[7])); + + /* Update the secure NV counter with the value in the CSIB */ + __bdk_program_nv_counter(); + + /* Create the BSSK */ + if (huk[0] | huk[1]) + { + uint64_t iv[2] = {0, 0}; + volatile uint64_t *bssk = bdk_key_alloc(node, 16); + if (!bssk) + { + __bdk_trust_level = BDK_TRUST_LEVEL_BROKEN; + BDK_TRACE(INIT, "Trust: Failed to allocate BSSK memory, trust broken\n"); + goto fail_trust; + } + BDK_TRACE(INIT, "Trust: Calculating BSSK\n"); + uint64_t tmp_bssk[2]; + tmp_bssk[0] = __bdk_trust_csib.s.fs0; + tmp_bssk[1] = __bdk_trust_csib.s.fs1; + bdk_aes128cbc_decrypt((void*)huk, (void*)tmp_bssk, 16, iv); + bssk[0] = tmp_bssk[0]; + bssk[1] = tmp_bssk[1]; + tmp_bssk[0] = 0; + tmp_bssk[1] = 0; + bdk_config_set_int(bdk_ptr_to_phys((void*)bssk), BDK_CONFIG_TRUST_BSSK_ADDR); + //BDK_TRACE(INIT, "Trust: BSSK %016lx %016lx\n", bdk_cpu_to_be64(bssk[0]), bdk_cpu_to_be64(bssk[1])); + } + + /* Record our trust level */ + switch (__bdk_trust_csib.s.crypt) + { + case 0: + __bdk_trust_level = BDK_TRUST_LEVEL_SIGNED; + BDK_TRACE(INIT, "Trust: Trused boot, no encryption\n"); + break; + case 1: + __bdk_trust_level = BDK_TRUST_LEVEL_SIGNED_SSK; + BDK_TRACE(INIT, "Trust: Trused boot, SSK encryption\n"); + break; + case 2: + __bdk_trust_level = BDK_TRUST_LEVEL_SIGNED_BSSK; + BDK_TRACE(INIT, "Trust: Trused boot, BSSK encryption\n"); + break; + default: + __bdk_trust_level = BDK_TRUST_LEVEL_BROKEN; + goto fail_trust; + } + + /* We started at the trusted boot address, CSIB should be + valid */ + bdk_config_set_blob(sizeof(__bdk_trust_csib), &__bdk_trust_csib, BDK_CONFIG_TRUST_CSIB); +success_trust: + bdk_signed_load_public(); + return; + +fail_trust: + /* Hide secrets */ + BDK_CSR_MODIFY(c, node, BDK_RST_BOOT, + c.s.dis_huk = 1); + BDK_TRACE(INIT, "Trust: Secrets Hidden\n"); +skip_trust: + /* Erase CSIB as it is invalid */ + memset(&__bdk_trust_csib, 0, sizeof(__bdk_trust_csib)); + bdk_config_set_blob(0, NULL, BDK_CONFIG_TRUST_CSIB); +} + +/** + * Returns the current level of trust. Must be called after + * __bdk_trust_init() + * + * @return Enumerated trsut level, see bdk_trust_level_t + */ +bdk_trust_level_t bdk_trust_get_level(void) +{ + return __bdk_trust_level; +} + +/** + * Return the current secure NV counter stored in the fuses + * + * @return NV counter (0-31) + */ +int bdk_trust_get_nv_counter(void) +{ + /* Count leading zeros in FUSF_CTL[ROM_T_CNT] to dermine the + hardware NV value */ + BDK_CSR_INIT(fusf_ctl, bdk_numa_master(), BDK_FUSF_CTL); + int hw_nv = 0; + if (fusf_ctl.s.rom_t_cnt) + hw_nv = 32 - __builtin_clz(fusf_ctl.s.rom_t_cnt); + return hw_nv; +} + diff --git a/src/vendorcode/cavium/bdk/libdram/dram-csr.h b/src/vendorcode/cavium/bdk/libdram/dram-csr.h new file mode 100644 index 0000000000..ffe1472a0b --- /dev/null +++ b/src/vendorcode/cavium/bdk/libdram/dram-csr.h @@ -0,0 +1,86 @@ +/***********************license start*********************************** +* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights +* reserved. +* +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are +* met: +* +* * Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* +* * Redistributions in binary form must reproduce the above +* copyright notice, this list of conditions and the following +* disclaimer in the documentation and/or other materials provided +* with the distribution. +* +* * Neither the name of Cavium Inc. nor the names of +* its contributors may be used to endorse or promote products +* derived from this software without specific prior written +* permission. +* +* This Software, including technical data, may be subject to U.S. export +* control laws, including the U.S. Export Administration Act and its +* associated regulations, and may be subject to export or import +* regulations in other countries. +* +* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS" +* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR +* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT +* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY +* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT +* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES +* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR +* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT, +* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK +* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU. +***********************license end**************************************/ + +/** + * Functions and macros for libdram access to CSR. These build + * on the normal BDK functions to allow logging of CSRs based on + * the libdram verbosity level. Internal use only. + */ + +/** + * Write a CSR, possibly logging it based on the verbosity + * level. You should use DRAM_CSR_WRITE() as a convientent + * wrapper. + * + * @param node + * @param csr_name + * @param type + * @param busnum + * @param size + * @param address + * @param value + */ +#ifdef DRAM_CSR_WRITE_INLINE +static inline void dram_csr_write(bdk_node_t node, const char *csr_name, bdk_csr_type_t type, int busnum, int size, uint64_t address, uint64_t value) __attribute__((always_inline)); +static inline void dram_csr_write(bdk_node_t node, const char *csr_name, bdk_csr_type_t type, int busnum, int size, uint64_t address, uint64_t value) +{ + VB_PRT(VBL_CSRS, "N%d: DDR Config %s[%016lx] => %016lx\n", node, csr_name, address, value); + bdk_csr_write(node, type, busnum, size, address, value); +} +#else +extern void dram_csr_write(bdk_node_t node, const char *csr_name, bdk_csr_type_t type, int busnum, int size, uint64_t address, uint64_t value); +#endif + +/** + * Macro to write a CSR, logging if necessary + */ +#define DRAM_CSR_WRITE(node, csr, value) \ + dram_csr_write(node, basename_##csr, bustype_##csr, busnum_##csr, sizeof(typedef_##csr), csr, value) + +/** + * Macro to make a read, modify, and write sequence easy. The "code_block" + * should be replaced with a C code block or a comma separated list of + * "name.s.field = value", without the quotes. + */ +#define DRAM_CSR_MODIFY(name, node, csr, code_block) do { \ + typedef_##csr name = {.u = bdk_csr_read(node, bustype_##csr, busnum_##csr, sizeof(typedef_##csr), csr)}; \ + code_block; \ + dram_csr_write(node, basename_##csr, bustype_##csr, busnum_##csr, sizeof(typedef_##csr), csr, name.u); \ + } while (0) + diff --git a/src/vendorcode/cavium/bdk/libdram/dram-env.c b/src/vendorcode/cavium/bdk/libdram/dram-env.c new file mode 100644 index 0000000000..f25e6bdb26 --- /dev/null +++ b/src/vendorcode/cavium/bdk/libdram/dram-env.c @@ -0,0 +1,83 @@ +/***********************license start*********************************** +* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights +* reserved. +* +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are +* met: +* +* * Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* +* * Redistributions in binary form must reproduce the above +* copyright notice, this list of conditions and the following +* disclaimer in the documentation and/or other materials provided +* with the distribution. +* +* * Neither the name of Cavium Inc. nor the names of +* its contributors may be used to endorse or promote products +* derived from this software without specific prior written +* permission. +* +* This Software, including technical data, may be subject to U.S. export +* control laws, including the U.S. Export Administration Act and its +* associated regulations, and may be subject to export or import +* regulations in other countries. +* +* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS" +* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR +* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT +* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY +* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT +* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES +* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR +* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT, +* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK +* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU. +***********************license end**************************************/ +#include <bdk.h> +#include "dram-internal.h" + +const char* lookup_env_parameter(const char *format, ...) +{ + const char *s; + unsigned long value; + va_list args; + char buffer[64]; + + va_start(args, format); + vsnprintf(buffer, sizeof(buffer)-1, format, args); + buffer[sizeof(buffer)-1] = '\0'; + va_end(args); + + if ((s = getenv(buffer)) != NULL) + { + value = strtoul(s, NULL, 0); + error_print("Parameter found in environment: %s = \"%s\" 0x%lx (%ld)\n", + buffer, s, value, value); + } + return s; +} + +const char* lookup_env_parameter_ull(const char *format, ...) +{ + const char *s; + unsigned long long value; + va_list args; + char buffer[64]; + + va_start(args, format); + vsnprintf(buffer, sizeof(buffer)-1, format, args); + buffer[sizeof(buffer)-1] = '\0'; + va_end(args); + + if ((s = getenv(buffer)) != NULL) + { + value = strtoull(s, NULL, 0); + error_print("Parameter found in environment: %s = 0x%016llx\n", + buffer, value); + } + return s; +} + diff --git a/src/vendorcode/cavium/bdk/libdram/dram-env.h b/src/vendorcode/cavium/bdk/libdram/dram-env.h new file mode 100644 index 0000000000..0f100e1b25 --- /dev/null +++ b/src/vendorcode/cavium/bdk/libdram/dram-env.h @@ -0,0 +1,48 @@ +/***********************license start*********************************** +* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights +* reserved. +* +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are +* met: +* +* * Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* +* * Redistributions in binary form must reproduce the above +* copyright notice, this list of conditions and the following +* disclaimer in the documentation and/or other materials provided +* with the distribution. +* +* * Neither the name of Cavium Inc. nor the names of +* its contributors may be used to endorse or promote products +* derived from this software without specific prior written +* permission. +* +* This Software, including technical data, may be subject to U.S. export +* control laws, including the U.S. Export Administration Act and its +* associated regulations, and may be subject to export or import +* regulations in other countries. +* +* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS" +* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR +* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT +* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY +* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT +* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES +* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR +* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT, +* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK +* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU. +***********************license end**************************************/ + +/** + * Functions for access the environment for DRAM tweaking. + * Intenral use only. + */ + + +extern const char *lookup_env_parameter(const char *format, ...) __attribute__ ((format(printf, 1, 2))); +extern const char *lookup_env_parameter_ull(const char *format, ...) __attribute__ ((format(printf, 1, 2))); + diff --git a/src/vendorcode/cavium/bdk/libdram/dram-gpio.h b/src/vendorcode/cavium/bdk/libdram/dram-gpio.h new file mode 100644 index 0000000000..62c9a5c190 --- /dev/null +++ b/src/vendorcode/cavium/bdk/libdram/dram-gpio.h @@ -0,0 +1,46 @@ +/***********************license start*********************************** +* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights +* reserved. +* +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are +* met: +* +* * Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* +* * Redistributions in binary form must reproduce the above +* copyright notice, this list of conditions and the following +* disclaimer in the documentation and/or other materials provided +* with the distribution. +* +* * Neither the name of Cavium Inc. nor the names of +* its contributors may be used to endorse or promote products +* derived from this software without specific prior written +* permission. +* +* This Software, including technical data, may be subject to U.S. export +* control laws, including the U.S. Export Administration Act and its +* associated regulations, and may be subject to export or import +* regulations in other countries. +* +* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS" +* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR +* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT +* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY +* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT +* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES +* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR +* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT, +* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK +* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU. +***********************license end**************************************/ + +/** + * Functions for reporting DRAM init status through GPIOs. + * Useful for triggering scopes and such. Internal use only. + */ + +extern void pulse_gpio_pin(bdk_node_t node, int pin, int usecs); + diff --git a/src/vendorcode/cavium/bdk/libdram/dram-init-ddr3.c b/src/vendorcode/cavium/bdk/libdram/dram-init-ddr3.c new file mode 100644 index 0000000000..edb42312f1 --- /dev/null +++ b/src/vendorcode/cavium/bdk/libdram/dram-init-ddr3.c @@ -0,0 +1,8535 @@ +/***********************license start*********************************** +* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights +* reserved. +* +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are +* met: +* +* * Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* +* * Redistributions in binary form must reproduce the above +* copyright notice, this list of conditions and the following +* disclaimer in the documentation and/or other materials provided +* with the distribution. +* +* * Neither the name of Cavium Inc. nor the names of +* its contributors may be used to endorse or promote products +* derived from this software without specific prior written +* permission. +* +* This Software, including technical data, may be subject to U.S. export +* control laws, including the U.S. Export Administration Act and its +* associated regulations, and may be subject to export or import +* regulations in other countries. +* +* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS" +* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR +* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT +* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY +* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT +* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES +* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR +* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT, +* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK +* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU. +***********************license end**************************************/ +#include <bdk.h> +#include "libbdk-arch/bdk-csrs-l2c_tad.h" +#include "libbdk-arch/bdk-csrs-mio_fus.h" +#include "dram-internal.h" + +#define WODT_MASK_2R_1S 1 // FIXME: did not seem to make much difference with #152 1-slot? + +#define DESKEW_RODT_CTL 1 + +// Set to 1 to use the feature whenever possible automatically. +// When 0, however, the feature is still available, and it can +// be enabled via envvar override "ddr_enable_write_deskew=1". +#define ENABLE_WRITE_DESKEW_DEFAULT 0 + +#define ENABLE_COMPUTED_VREF_ADJUSTMENT 1 + +#define RLEXTRAS_PATCH 1 // write to unused RL rank entries +#define WLEXTRAS_PATCH 1 // write to unused WL rank entries +#define ADD_48_OHM_SKIP 1 +#define NOSKIP_40_48_OHM 1 +#define NOSKIP_48_STACKED 1 +#define NOSKIP_FOR_MINI 1 +#define NOSKIP_FOR_2S_1R 1 +#define MAJORITY_OVER_AVG 1 +#define RANK_MAJORITY MAJORITY_OVER_AVG && 1 +#define SW_WL_CHECK_PATCH 1 // check validity after SW adjust +#define HW_WL_MAJORITY 1 +#define SWL_TRY_HWL_ALT HW_WL_MAJORITY && 1 // try HW WL base alternate if available when SW WL fails +#define DISABLE_SW_WL_PASS_2 1 + +#define HWL_BY_BYTE 0 // FIXME? set to 1 to do HWL a byte at a time (seemed to work better earlier?) + +#define USE_ORIG_TEST_DRAM_BYTE 1 + +// collect and print LMC utilization using SWL software algorithm +#define ENABLE_SW_WLEVEL_UTILIZATION 0 + +#define COUNT_RL_CANDIDATES 1 + +#define LOOK_FOR_STUCK_BYTE 0 +#define ENABLE_STUCK_BYTE_RESET 0 + +#define FAILSAFE_CHECK 1 + +#define PERFECT_BITMASK_COUNTING 1 + +#define DAC_OVERRIDE_EARLY 1 + +#define SWL_WITH_HW_ALTS_CHOOSE_SW 0 // FIXME: allow override? + +#define DEBUG_VALIDATE_BITMASK 0 +#if DEBUG_VALIDATE_BITMASK +#define debug_bitmask_print ddr_print +#else +#define debug_bitmask_print(...) +#endif + +#define ENABLE_SLOT_CTL_ACCESS 0 +#undef ENABLE_CUSTOM_RLEVEL_TABLE + +#define ENABLE_DISPLAY_MPR_PAGE 0 +#if ENABLE_DISPLAY_MPR_PAGE +static void Display_MPR_Page_Location(bdk_node_t node, int rank, + int ddr_interface_num, int dimm_count, + int page, int location, uint64_t *mpr_data); +#endif + +#define USE_L2_WAYS_LIMIT 1 + +/* Read out Deskew Settings for DDR */ + +typedef struct { + uint16_t bits[8]; +} deskew_bytes_t; +typedef struct { + deskew_bytes_t bytes[9]; +} deskew_data_t; + +static void +Get_Deskew_Settings(bdk_node_t node, int ddr_interface_num, deskew_data_t *dskdat) +{ + bdk_lmcx_phy_ctl_t phy_ctl; + bdk_lmcx_config_t lmc_config; + int bit_num, bit_index; + int byte_lane, byte_limit; + // NOTE: these are for pass 2.x + int is_t88p2 = !CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X); // added 81xx and 83xx + int bit_end = (is_t88p2) ? 9 : 8; + + lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(ddr_interface_num)); + byte_limit = ((lmc_config.s.mode32b) ? 4 : 8) + lmc_config.s.ecc_ena; + + memset(dskdat, 0, sizeof(*dskdat)); + + BDK_CSR_MODIFY(phy_ctl, node, BDK_LMCX_PHY_CTL(ddr_interface_num), + phy_ctl.s.dsk_dbg_clk_scaler = 3); + + for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) { + bit_index = 0; + for (bit_num = 0; bit_num <= bit_end; ++bit_num) { // NOTE: this is for pass 2.x + + if (bit_num == 4) continue; + if ((bit_num == 5) && is_t88p2) continue; // NOTE: this is for pass 2.x + + // set byte lane and bit to read + BDK_CSR_MODIFY(phy_ctl, node, BDK_LMCX_PHY_CTL(ddr_interface_num), + (phy_ctl.s.dsk_dbg_bit_sel = bit_num, + phy_ctl.s.dsk_dbg_byte_sel = byte_lane)); + + // start read sequence + BDK_CSR_MODIFY(phy_ctl, node, BDK_LMCX_PHY_CTL(ddr_interface_num), + phy_ctl.s.dsk_dbg_rd_start = 1); + + // poll for read sequence to complete + do { + phy_ctl.u = BDK_CSR_READ(node, BDK_LMCX_PHY_CTL(ddr_interface_num)); + } while (phy_ctl.s.dsk_dbg_rd_complete != 1); + + // record the data + dskdat->bytes[byte_lane].bits[bit_index] = phy_ctl.s.dsk_dbg_rd_data & 0x3ff; + bit_index++; + + } /* for (bit_num = 0; bit_num <= bit_end; ++bit_num) */ + } /* for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) */ + + return; +} + +static void +Display_Deskew_Data(bdk_node_t node, int ddr_interface_num, + deskew_data_t *dskdat, int print_enable) +{ + int byte_lane; + int bit_num; + uint16_t flags, deskew; + bdk_lmcx_config_t lmc_config; + int byte_limit; + const char *fc = " ?-=+*#&"; + + lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(ddr_interface_num)); + byte_limit = ((lmc_config.s.mode32b) ? 4 : 8) + lmc_config.s.ecc_ena; + + if (print_enable) { + VB_PRT(print_enable, "N%d.LMC%d: Deskew Data: Bit => :", + node, ddr_interface_num); + for (bit_num = 7; bit_num >= 0; --bit_num) + VB_PRT(print_enable, " %3d ", bit_num); + VB_PRT(print_enable, "\n"); + } + + for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) { + if (print_enable) + VB_PRT(print_enable, "N%d.LMC%d: Bit Deskew Byte %d %s :", + node, ddr_interface_num, byte_lane, + (print_enable >= VBL_TME) ? "FINAL" : " "); + + for (bit_num = 7; bit_num >= 0; --bit_num) { + + flags = dskdat->bytes[byte_lane].bits[bit_num] & 7; + deskew = dskdat->bytes[byte_lane].bits[bit_num] >> 3; + + if (print_enable) + VB_PRT(print_enable, " %3d %c", deskew, fc[flags^1]); + + } /* for (bit_num = 7; bit_num >= 0; --bit_num) */ + + if (print_enable) + VB_PRT(print_enable, "\n"); + + } /* for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) */ + + return; +} + +static int +change_wr_deskew_ena(bdk_node_t node, int ddr_interface_num, int new_state) +{ + bdk_lmcx_dll_ctl3_t ddr_dll_ctl3; + int saved_wr_deskew_ena; + + // return original WR_DESKEW_ENA setting + ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num)); + saved_wr_deskew_ena = !!GET_DDR_DLL_CTL3(wr_deskew_ena); + if (saved_wr_deskew_ena != !!new_state) { // write it only when changing it + SET_DDR_DLL_CTL3(wr_deskew_ena, !!new_state); + DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num), ddr_dll_ctl3.u); + } + return saved_wr_deskew_ena; +} + +typedef struct { + int saturated; // number saturated + int unlocked; // number unlocked + int nibrng_errs; // nibble range errors + int nibunl_errs; // nibble unlocked errors + //int nibsat_errs; // nibble saturation errors + int bitval_errs; // bit value errors +#if LOOK_FOR_STUCK_BYTE + int bytes_stuck; // byte(s) stuck +#endif +} deskew_counts_t; + +#define MIN_BITVAL 17 +#define MAX_BITVAL 110 + +static deskew_counts_t deskew_training_results; +static int deskew_validation_delay = 10000; // FIXME: make this a var for overriding + +static void +Validate_Read_Deskew_Training(bdk_node_t node, int rank_mask, int ddr_interface_num, + deskew_counts_t *counts, int print_enable) +{ + int byte_lane, bit_num, nib_num; + int nibrng_errs, nibunl_errs, bitval_errs; + //int nibsat_errs; + bdk_lmcx_config_t lmc_config; + int16_t nib_min[2], nib_max[2], nib_unl[2]/*, nib_sat[2]*/; + // NOTE: these are for pass 2.x + int is_t88p2 = !CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X); // added 81xx and 83xx + int bit_start = (is_t88p2) ? 9 : 8; + int byte_limit; +#if LOOK_FOR_STUCK_BYTE + uint64_t bl_mask[2]; // enough for 128 values + int bit_values; +#endif + deskew_data_t dskdat; + int bit_index; + int16_t flags, deskew; + const char *fc = " ?-=+*#&"; + int saved_wr_deskew_ena; + int bit_last; + + // save original WR_DESKEW_ENA setting, and disable it for read deskew + saved_wr_deskew_ena = change_wr_deskew_ena(node, ddr_interface_num, 0); + + lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(ddr_interface_num)); + byte_limit = ((!lmc_config.s.mode32b) ? 8 : 4) + lmc_config.s.ecc_ena; + + memset(counts, 0, sizeof(deskew_counts_t)); + + Get_Deskew_Settings(node, ddr_interface_num, &dskdat); + + if (print_enable) { + VB_PRT(print_enable, "N%d.LMC%d: Deskew Settings: Bit => :", + node, ddr_interface_num); + for (bit_num = 7; bit_num >= 0; --bit_num) + VB_PRT(print_enable, " %3d ", bit_num); + VB_PRT(print_enable, "\n"); + } + + for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) { + if (print_enable) + VB_PRT(print_enable, "N%d.LMC%d: Bit Deskew Byte %d %s :", + node, ddr_interface_num, byte_lane, + (print_enable >= VBL_TME) ? "FINAL" : " "); + + nib_min[0] = 127; nib_min[1] = 127; + nib_max[0] = 0; nib_max[1] = 0; + nib_unl[0] = 0; nib_unl[1] = 0; + //nib_sat[0] = 0; nib_sat[1] = 0; + +#if LOOK_FOR_STUCK_BYTE + bl_mask[0] = bl_mask[1] = 0; +#endif + + if ((lmc_config.s.mode32b == 1) && (byte_lane == 4)) { + bit_index = 3; + bit_last = 3; + if (print_enable) + VB_PRT(print_enable, " "); + } else { + bit_index = 7; + bit_last = bit_start; + } + + for (bit_num = bit_last; bit_num >= 0; --bit_num) { // NOTE: this is for pass 2.x + if (bit_num == 4) continue; + if ((bit_num == 5) && is_t88p2) continue; // NOTE: this is for pass 2.x + + nib_num = (bit_num > 4) ? 1 : 0; + + flags = dskdat.bytes[byte_lane].bits[bit_index] & 7; + deskew = dskdat.bytes[byte_lane].bits[bit_index] >> 3; + bit_index--; + + counts->saturated += !!(flags & 6); + counts->unlocked += !(flags & 1); + + nib_unl[nib_num] += !(flags & 1); + //nib_sat[nib_num] += !!(flags & 6); + + if (flags & 1) { // FIXME? only do range when locked + nib_min[nib_num] = min(nib_min[nib_num], deskew); + nib_max[nib_num] = max(nib_max[nib_num], deskew); + } + +#if LOOK_FOR_STUCK_BYTE + bl_mask[(deskew >> 6) & 1] |= 1UL << (deskew & 0x3f); +#endif + + if (print_enable) + VB_PRT(print_enable, " %3d %c", deskew, fc[flags^1]); + + } /* for (bit_num = bit_last; bit_num >= 0; --bit_num) */ + + /* + Now look for nibble errors: + + For bit 55, it looks like a bit deskew problem. When the upper nibble of byte 6 + needs to go to saturation, bit 7 of byte 6 locks prematurely at 64. + For DIMMs with raw card A and B, can we reset the deskew training when we encounter this case? + The reset criteria should be looking at one nibble at a time for raw card A and B; + if the bit-deskew setting within a nibble is different by > 33, we'll issue a reset + to the bit deskew training. + + LMC0 Bit Deskew Byte(6): 64 0 - 0 - 0 - 26 61 35 64 + */ + // upper nibble range, then lower nibble range + nibrng_errs = ((nib_max[1] - nib_min[1]) > 33) ? 1 : 0; + nibrng_errs |= ((nib_max[0] - nib_min[0]) > 33) ? 1 : 0; + + // check for nibble all unlocked + nibunl_errs = ((nib_unl[0] == 4) || (nib_unl[1] == 4)) ? 1 : 0; + + // check for nibble all saturated + //nibsat_errs = ((nib_sat[0] == 4) || (nib_sat[1] == 4)) ? 1 : 0; + + // check for bit value errors, ie < 17 or > 110 + // FIXME? assume max always > MIN_BITVAL and min < MAX_BITVAL + bitval_errs = ((nib_max[1] > MAX_BITVAL) || (nib_max[0] > MAX_BITVAL)) ? 1 : 0; + bitval_errs |= ((nib_min[1] < MIN_BITVAL) || (nib_min[0] < MIN_BITVAL)) ? 1 : 0; + + if (((nibrng_errs != 0) || (nibunl_errs != 0) /*|| (nibsat_errs != 0)*/ || (bitval_errs != 0)) + && print_enable) + { + VB_PRT(print_enable, " %c%c%c%c", + (nibrng_errs)?'R':' ', + (nibunl_errs)?'U':' ', + (bitval_errs)?'V':' ', + /*(nibsat_errs)?'S':*/' '); + } + +#if LOOK_FOR_STUCK_BYTE + bit_values = __builtin_popcountl(bl_mask[0]) + __builtin_popcountl(bl_mask[1]); + if (bit_values < 3) { + counts->bytes_stuck |= (1 << byte_lane); + if (print_enable) + VB_PRT(print_enable, "X"); + } +#endif + if (print_enable) + VB_PRT(print_enable, "\n"); + + counts->nibrng_errs |= (nibrng_errs << byte_lane); + counts->nibunl_errs |= (nibunl_errs << byte_lane); + //counts->nibsat_errs |= (nibsat_errs << byte_lane); + counts->bitval_errs |= (bitval_errs << byte_lane); + +#if LOOK_FOR_STUCK_BYTE + // just for completeness, allow print of the stuck values bitmask after the bytelane print + if ((bit_values < 3) && print_enable) { + VB_PRT(VBL_DEV, "N%d.LMC%d: Deskew byte %d STUCK on value 0x%016lx.%016lx\n", + node, ddr_interface_num, byte_lane, + bl_mask[1], bl_mask[0]); + } +#endif + + } /* for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) */ + + // restore original WR_DESKEW_ENA setting + change_wr_deskew_ena(node, ddr_interface_num, saved_wr_deskew_ena); + + return; +} + +unsigned short load_dac_override(int node, int ddr_interface_num, + int dac_value, int byte) +{ + bdk_lmcx_dll_ctl3_t ddr_dll_ctl3; + int bytex = (byte == 0x0A) ? byte : byte + 1; // single bytelanes incr by 1; A is for ALL + + ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num)); + + SET_DDR_DLL_CTL3(byte_sel, bytex); + SET_DDR_DLL_CTL3(offset, dac_value >> 1); // only 7-bit field, use MS bits + + ddr_dll_ctl3.s.bit_select = 0x9; /* No-op */ + DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num), ddr_dll_ctl3.u); + + ddr_dll_ctl3.s.bit_select = 0xC; /* Vref bypass setting load */ + DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num), ddr_dll_ctl3.u); + + ddr_dll_ctl3.s.bit_select = 0xD; /* Vref bypass on. */ + DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num), ddr_dll_ctl3.u); + + ddr_dll_ctl3.s.bit_select = 0x9; /* No-op */ + DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num), ddr_dll_ctl3.u); + + return ((unsigned short) GET_DDR_DLL_CTL3(offset)); +} + +// arg dac_or_dbi is 1 for DAC, 0 for DBI +// returns 9 entries (bytelanes 0 through 8) in settings[] +// returns 0 if OK, -1 if a problem +int read_DAC_DBI_settings(int node, int ddr_interface_num, + int dac_or_dbi, int *settings) +{ + bdk_lmcx_phy_ctl_t phy_ctl; + int byte_lane, bit_num; + int deskew; + int dac_value; + int is_t88p2 = !CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X); // added 81xx and 83xx + + phy_ctl.u = BDK_CSR_READ(node, BDK_LMCX_PHY_CTL(ddr_interface_num)); + phy_ctl.s.dsk_dbg_clk_scaler = 3; + DRAM_CSR_WRITE(node, BDK_LMCX_PHY_CTL(ddr_interface_num), phy_ctl.u); + + bit_num = (dac_or_dbi) ? 4 : 5; + if ((bit_num == 5) && !is_t88p2) { // NOTE: this is for pass 1.x + return -1; + } + + for (byte_lane = 8; byte_lane >= 0 ; --byte_lane) { // FIXME: always assume ECC is available + + //set byte lane and bit to read + phy_ctl.s.dsk_dbg_bit_sel = bit_num; + phy_ctl.s.dsk_dbg_byte_sel = byte_lane; + DRAM_CSR_WRITE(node, BDK_LMCX_PHY_CTL(ddr_interface_num), phy_ctl.u); + + //start read sequence + phy_ctl.u = BDK_CSR_READ(node, BDK_LMCX_PHY_CTL(ddr_interface_num)); + phy_ctl.s.dsk_dbg_rd_start = 1; + DRAM_CSR_WRITE(node, BDK_LMCX_PHY_CTL(ddr_interface_num), phy_ctl.u); + + //poll for read sequence to complete + do { + phy_ctl.u = BDK_CSR_READ(node, BDK_LMCX_PHY_CTL(ddr_interface_num)); + } while (phy_ctl.s.dsk_dbg_rd_complete != 1); + + deskew = phy_ctl.s.dsk_dbg_rd_data /*>> 3*/; // leave the flag bits for DBI + dac_value = phy_ctl.s.dsk_dbg_rd_data & 0xff; + + settings[byte_lane] = (dac_or_dbi) ? dac_value : deskew; + + } /* for (byte_lane = 8; byte_lane >= 0 ; --byte_lane) { */ + + return 0; +} + +// print out the DBI settings array +// arg dac_or_dbi is 1 for DAC, 0 for DBI +void +display_DAC_DBI_settings(int node, int lmc, int dac_or_dbi, + int ecc_ena, int *settings, char *title) +{ + int byte; + int flags; + int deskew; + const char *fc = " ?-=+*#&"; + + ddr_print("N%d.LMC%d: %s %s Deskew Settings %d:0 :", + node, lmc, title, (dac_or_dbi)?"DAC":"DBI", 7+ecc_ena); + for (byte = (7+ecc_ena); byte >= 0; --byte) { // FIXME: what about 32-bit mode? + if (dac_or_dbi) { // DAC + flags = 1; // say its locked to get blank + deskew = settings[byte] & 0xff; + } else { // DBI + flags = settings[byte] & 7; + deskew = (settings[byte] >> 3) & 0x7f; + } + ddr_print(" %3d %c", deskew, fc[flags^1]); + } + ddr_print("\n"); +} + +// Evaluate the DAC settings array +static int +evaluate_DAC_settings(int ddr_interface_64b, int ecc_ena, int *settings) +{ + int byte, dac; + int last = (ddr_interface_64b) ? 7 : 3; + + // this looks only for DAC values that are EVEN + for (byte = (last+ecc_ena); byte >= 0; --byte) { + dac = settings[byte] & 0xff; + if ((dac & 1) == 0) + return 1; + } + return 0; +} + +static void +Perform_Offset_Training(bdk_node_t node, int rank_mask, int ddr_interface_num) +{ + bdk_lmcx_phy_ctl_t lmc_phy_ctl; + uint64_t orig_phy_ctl; + const char *s; + + /* + * 6.9.8 LMC Offset Training + * + * LMC requires input-receiver offset training. + * + * 1. Write LMC(0)_PHY_CTL[DAC_ON] = 1 + */ + lmc_phy_ctl.u = BDK_CSR_READ(node, BDK_LMCX_PHY_CTL(ddr_interface_num)); + orig_phy_ctl = lmc_phy_ctl.u; + lmc_phy_ctl.s.dac_on = 1; + + // allow full CSR override + if ((s = lookup_env_parameter_ull("ddr_phy_ctl")) != NULL) { + lmc_phy_ctl.u = strtoull(s, NULL, 0); + } + + // do not print or write if CSR does not change... + if (lmc_phy_ctl.u != orig_phy_ctl) { + ddr_print("PHY_CTL : 0x%016lx\n", lmc_phy_ctl.u); + DRAM_CSR_WRITE(node, BDK_LMCX_PHY_CTL(ddr_interface_num), lmc_phy_ctl.u); + } + +#if 0 + // FIXME? do we really need to show RODT here? + bdk_lmcx_comp_ctl2_t lmc_comp_ctl2; + lmc_comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num)); + ddr_print("Read ODT_CTL : 0x%x (%d ohms)\n", + lmc_comp_ctl2.s.rodt_ctl, imp_values->rodt_ohms[lmc_comp_ctl2.s.rodt_ctl]); +#endif + + /* + * 2. Write LMC(0)_SEQ_CTL[SEQ_SEL] = 0x0B and + * LMC(0)_SEQ_CTL[INIT_START] = 1. + * + * 3. Wait for LMC(0)_SEQ_CTL[SEQ_COMPLETE] to be set to 1. + */ + perform_octeon3_ddr3_sequence(node, rank_mask, ddr_interface_num, 0x0B); /* Offset training sequence */ + +} + +static void +Perform_Internal_VREF_Training(bdk_node_t node, int rank_mask, int ddr_interface_num) +{ + bdk_lmcx_ext_config_t ext_config; + + /* + * 6.9.9 LMC Internal Vref Training + * + * LMC requires input-reference-voltage training. + * + * 1. Write LMC(0)_EXT_CONFIG[VREFINT_SEQ_DESKEW] = 0. + */ + ext_config.u = BDK_CSR_READ(node, BDK_LMCX_EXT_CONFIG(ddr_interface_num)); + ext_config.s.vrefint_seq_deskew = 0; + + VB_PRT(VBL_SEQ, "N%d.LMC%d: Performing LMC sequence: vrefint_seq_deskew = %d\n", + node, ddr_interface_num, ext_config.s.vrefint_seq_deskew); + + DRAM_CSR_WRITE(node, BDK_LMCX_EXT_CONFIG(ddr_interface_num), ext_config.u); + + /* + * 2. Write LMC(0)_SEQ_CTL[SEQ_SEL] = 0x0a and + * LMC(0)_SEQ_CTL[INIT_START] = 1. + * + * 3. Wait for LMC(0)_SEQ_CTL[SEQ_COMPLETE] to be set to 1. + */ + perform_octeon3_ddr3_sequence(node, rank_mask, ddr_interface_num, 0x0A); /* LMC Internal Vref Training */ +} + +#define dbg_avg(format, ...) VB_PRT(VBL_DEV, format, ##__VA_ARGS__) +static int +process_samples_average(int16_t *bytes, int num_samples, int lmc, int lane_no) +{ + int i, savg, sadj, sum = 0, rng, ret, asum, trunc; + int16_t smin = 32767, smax = -32768; + + dbg_avg("DBG_AVG%d.%d: ", lmc, lane_no); + + for (i = 0; i < num_samples; i++) { + sum += bytes[i]; + if (bytes[i] < smin) smin = bytes[i]; + if (bytes[i] > smax) smax = bytes[i]; + dbg_avg(" %3d", bytes[i]); + } + rng = smax - smin + 1; + + dbg_avg(" (%3d, %3d, %2d)", smin, smax, rng); + + asum = sum - smin - smax; + + savg = divide_nint(sum * 10, num_samples); + + sadj = divide_nint(asum * 10, (num_samples - 2)); + + trunc = asum / (num_samples - 2); + + dbg_avg(" [%3d.%d, %3d.%d, %3d]", savg/10, savg%10, sadj/10, sadj%10, trunc); + + sadj = divide_nint(sadj, 10); + if (trunc & 1) + ret = trunc; + else if (sadj & 1) + ret = sadj; + else + ret = trunc + 1; + + dbg_avg(" -> %3d\n", ret); + + return ret; +} + + +#define DEFAULT_SAT_RETRY_LIMIT 11 // 1 + 10 retries +static int default_lock_retry_limit = 20; // 20 retries // FIXME: make a var for overriding + +static int +Perform_Read_Deskew_Training(bdk_node_t node, int rank_mask, int ddr_interface_num, + int spd_rawcard_AorB, int print_flags, int ddr_interface_64b) +{ + int unsaturated, locked; + //int nibble_sat; + int sat_retries, lock_retries, lock_retries_total, lock_retries_limit; + int print_first; + int print_them_all; + deskew_counts_t dsk_counts; + uint64_t saved_wr_deskew_ena; +#if DESKEW_RODT_CTL + bdk_lmcx_comp_ctl2_t comp_ctl2; + int save_deskew_rodt_ctl = -1; +#endif + int is_t88p2 = !CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X); // added 81xx and 83xx + + VB_PRT(VBL_FAE, "N%d.LMC%d: Performing Read Deskew Training.\n", node, ddr_interface_num); + + // save original WR_DESKEW_ENA setting, and disable it for read deskew + saved_wr_deskew_ena = change_wr_deskew_ena(node, ddr_interface_num, 0); + + sat_retries = 0; + lock_retries_total = 0; + unsaturated = 0; + print_first = VBL_FAE; // print the first one, FAE and above + print_them_all = dram_is_verbose(VBL_DEV4); // set to true for printing all normal deskew attempts + + int loops, normal_loops = 1; // default to 1 NORMAL deskew training op... + const char *s; + if ((s = getenv("ddr_deskew_normal_loops")) != NULL) { + normal_loops = strtoul(s, NULL, 0); + } + +#if LOOK_FOR_STUCK_BYTE + // provide override for STUCK BYTE RESETS + int do_stuck_reset = ENABLE_STUCK_BYTE_RESET; + if ((s = getenv("ddr_enable_stuck_byte_reset")) != NULL) { + do_stuck_reset = !!strtoul(s, NULL, 0); + } +#endif + +#if DESKEW_RODT_CTL + if ((s = getenv("ddr_deskew_rodt_ctl")) != NULL) { + int deskew_rodt_ctl = strtoul(s, NULL, 0); + comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num)); + save_deskew_rodt_ctl = comp_ctl2.s.rodt_ctl; + comp_ctl2.s.rodt_ctl = deskew_rodt_ctl; + DRAM_CSR_WRITE(node, BDK_LMCX_COMP_CTL2(ddr_interface_num), comp_ctl2.u); + } +#endif + + lock_retries_limit = default_lock_retry_limit; + if (! CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X)) // added 81xx and 83xx + lock_retries_limit *= 2; // give pass 2.0 twice as many + + do { /* while (sat_retries < sat_retry_limit) */ + + /* + * 6.9.10 LMC Deskew Training + * + * LMC requires input-read-data deskew training. + * + * 1. Write LMC(0)_EXT_CONFIG[VREFINT_SEQ_DESKEW] = 1. + */ + VB_PRT(VBL_SEQ, "N%d.LMC%d: Performing LMC sequence: Set vrefint_seq_deskew = 1\n", + node, ddr_interface_num); + DRAM_CSR_MODIFY(ext_config, node, BDK_LMCX_EXT_CONFIG(ddr_interface_num), + ext_config.s.vrefint_seq_deskew = 1); /* Set Deskew sequence */ + + /* + * 2. Write LMC(0)_SEQ_CTL[SEQ_SEL] = 0x0A and + * LMC(0)_SEQ_CTL[INIT_START] = 1. + * + * 3. Wait for LMC(0)_SEQ_CTL[SEQ_COMPLETE] to be set to 1. + */ + DRAM_CSR_MODIFY(phy_ctl, node, BDK_LMCX_PHY_CTL(ddr_interface_num), + phy_ctl.s.phy_dsk_reset = 1); /* RESET Deskew sequence */ + perform_octeon3_ddr3_sequence(node, rank_mask, ddr_interface_num, 0x0A); /* LMC Deskew Training */ + + lock_retries = 0; + + perform_read_deskew_training: + // maybe perform the NORMAL deskew training sequence multiple times before looking at lock status + for (loops = 0; loops < normal_loops; loops++) { + DRAM_CSR_MODIFY(phy_ctl, node, BDK_LMCX_PHY_CTL(ddr_interface_num), + phy_ctl.s.phy_dsk_reset = 0); /* Normal Deskew sequence */ + perform_octeon3_ddr3_sequence(node, rank_mask, ddr_interface_num, 0x0A); /* LMC Deskew Training */ + } + // Moved this from Validate_Read_Deskew_Training + /* Allow deskew results to stabilize before evaluating them. */ + bdk_wait_usec(deskew_validation_delay); + + // Now go look at lock and saturation status... + Validate_Read_Deskew_Training(node, rank_mask, ddr_interface_num, &dsk_counts, print_first); + if (print_first && !print_them_all) // after printing the first and not doing them all, no more + print_first = 0; + + unsaturated = (dsk_counts.saturated == 0); + locked = (dsk_counts.unlocked == 0); + //nibble_sat = (dsk_counts.nibsat_errs != 0); + + // only do locking retries if unsaturated or rawcard A or B, otherwise full SAT retry + if (unsaturated || (spd_rawcard_AorB && !is_t88p2 /*&& !nibble_sat*/)) { + if (!locked) { // and not locked + lock_retries++; + lock_retries_total++; + if (lock_retries <= lock_retries_limit) { + goto perform_read_deskew_training; + } else { + VB_PRT(VBL_TME, "N%d.LMC%d: LOCK RETRIES failed after %d retries\n", + node, ddr_interface_num, lock_retries_limit); + } + } else { + if (lock_retries_total > 0) // only print if we did try + VB_PRT(VBL_TME, "N%d.LMC%d: LOCK RETRIES successful after %d retries\n", + node, ddr_interface_num, lock_retries); + } + } /* if (unsaturated || spd_rawcard_AorB) */ + + ++sat_retries; + +#if LOOK_FOR_STUCK_BYTE + // FIXME: this is a bit of a hack at the moment... + // We want to force a Deskew RESET hopefully to unstick the bytes values + // and then resume normal deskew training as usual. + // For now, do only if it is all locked... + if (locked && (dsk_counts.bytes_stuck != 0)) { + BDK_CSR_INIT(lmc_config, node, BDK_LMCX_CONFIG(ddr_interface_num)); + if (do_stuck_reset && lmc_config.s.mode_x4dev) { // FIXME: only when x4!! + unsaturated = 0; // to always make sure the while continues + VB_PRT(VBL_TME, "N%d.LMC%d: STUCK BYTE (0x%x), forcing deskew RESET\n", + node, ddr_interface_num, dsk_counts.bytes_stuck); + continue; // bypass the rest to get back to the RESET + } else { + VB_PRT(VBL_TME, "N%d.LMC%d: STUCK BYTE (0x%x), ignoring deskew RESET\n", + node, ddr_interface_num, dsk_counts.bytes_stuck); + } + } +#endif + /* + * At this point, check for a DDR4 RDIMM that will not benefit from SAT retries; if so, no retries + */ + if (spd_rawcard_AorB && !is_t88p2 /*&& !nibble_sat*/) { + VB_PRT(VBL_TME, "N%d.LMC%d: Read Deskew Training Loop: Exiting for RAWCARD == A or B.\n", + node, ddr_interface_num); + break; // no sat or lock retries + } + + } while (!unsaturated && (sat_retries < DEFAULT_SAT_RETRY_LIMIT)); + +#if DESKEW_RODT_CTL + if (save_deskew_rodt_ctl != -1) { + comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num)); + comp_ctl2.s.rodt_ctl = save_deskew_rodt_ctl; + DRAM_CSR_WRITE(node, BDK_LMCX_COMP_CTL2(ddr_interface_num), comp_ctl2.u); + } +#endif + + VB_PRT(VBL_FAE, "N%d.LMC%d: Read Deskew Training %s. %d sat-retries, %d lock-retries\n", + node, ddr_interface_num, + (sat_retries >= DEFAULT_SAT_RETRY_LIMIT) ? "Timed Out" : "Completed", + sat_retries-1, lock_retries_total); + + // restore original WR_DESKEW_ENA setting + change_wr_deskew_ena(node, ddr_interface_num, saved_wr_deskew_ena); + + if ((dsk_counts.nibrng_errs != 0) || (dsk_counts.nibunl_errs != 0)) { + debug_print("N%d.LMC%d: NIBBLE ERROR(S) found, returning FAULT\n", + node, ddr_interface_num); + return -1; // we did retry locally, they did not help + } + + // NOTE: we (currently) always print one last training validation before starting Read Leveling... + + return 0; +} + +static void +do_write_deskew_op(bdk_node_t node, int ddr_interface_num, + int bit_sel, int byte_sel, int ena) +{ + bdk_lmcx_dll_ctl3_t ddr_dll_ctl3; + + ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num)); + SET_DDR_DLL_CTL3(bit_select, bit_sel); + SET_DDR_DLL_CTL3(byte_sel, byte_sel); + SET_DDR_DLL_CTL3(wr_deskew_ena, ena); + DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num), ddr_dll_ctl3.u); + + ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num)); +} + +static void +set_write_deskew_offset(bdk_node_t node, int ddr_interface_num, + int bit_sel, int byte_sel, int offset) +{ + bdk_lmcx_dll_ctl3_t ddr_dll_ctl3; + + ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num)); + SET_DDR_DLL_CTL3(bit_select, bit_sel); + SET_DDR_DLL_CTL3(byte_sel, byte_sel); + SET_DDR_DLL_CTL3(offset, offset); + DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num), ddr_dll_ctl3.u); + + ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num)); + SET_DDR_DLL_CTL3(wr_deskew_ld, 1); + DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num), ddr_dll_ctl3.u); + + ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num)); +} + +static void +Update_Write_Deskew_Settings(bdk_node_t node, int ddr_interface_num, deskew_data_t *dskdat) +{ + bdk_lmcx_config_t lmc_config; + int bit_num; + int byte_lane, byte_limit; + + lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(ddr_interface_num)); + byte_limit = ((lmc_config.s.mode32b) ? 4 : 8) + lmc_config.s.ecc_ena; + + for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) { + for (bit_num = 0; bit_num <= 7; ++bit_num) { + + set_write_deskew_offset(node, ddr_interface_num, bit_num, byte_lane + 1, + dskdat->bytes[byte_lane].bits[bit_num]); + + } /* for (bit_num = 0; bit_num <= 7; ++bit_num) */ + } /* for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) */ + + return; +} + +#define ALL_BYTES 0x0A +#define BS_NOOP 0x09 +#define BS_RESET 0x0F +#define BS_REUSE 0x0A + +// set all entries to the same value (used during training) +static void +Set_Write_Deskew_Settings(bdk_node_t node, int ddr_interface_num, int value) +{ + bdk_lmcx_dll_ctl3_t ddr_dll_ctl3; + int bit_num; + + VB_PRT(VBL_DEV2, "N%d.LMC%d: SetWriteDeskew: WRITE %d\n", node, ddr_interface_num, value); + + for (bit_num = 0; bit_num <= 7; ++bit_num) { + + // write a bit-deskew value to all bit-lanes of all bytes + ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num)); + SET_DDR_DLL_CTL3(bit_select, bit_num); + SET_DDR_DLL_CTL3(byte_sel, ALL_BYTES); // FIXME? will this work in 32-bit mode? + SET_DDR_DLL_CTL3(offset, value); + DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num), ddr_dll_ctl3.u); + + ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num)); + SET_DDR_DLL_CTL3(wr_deskew_ld, 1); + DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num), ddr_dll_ctl3.u); + + } /* for (bit_num = 0; bit_num <= 7; ++bit_num) */ + +#if 0 + // FIXME: for debug use only + Get_Deskew_Settings(node, ddr_interface_num, &dskdat); + Display_Deskew_Data(node, ddr_interface_num, &dskdat, VBL_NORM); +#endif + + return; +} + +typedef struct { + uint8_t count[8]; + uint8_t start[8]; + uint8_t best_count[8]; + uint8_t best_start[8]; +} deskew_bytelane_t; +typedef struct { + deskew_bytelane_t bytes[9]; +} deskew_rank_t; + +deskew_rank_t deskew_history[4]; + +#define DSKVAL_INCR 4 + +static void +Neutral_Write_Deskew_Setup(bdk_node_t node, int ddr_interface_num) +{ + // first: NO-OP, Select all bytes, Disable write bit-deskew + ddr_print("N%d.LMC%d: NEUTRAL Write Deskew Setup: first: NOOP\n", node, ddr_interface_num); + do_write_deskew_op(node, ddr_interface_num, BS_NOOP, ALL_BYTES, 0); + //Get_Deskew_Settings(node, ddr_interface_num, &dskdat); + //Display_Deskew_Data(node, ddr_interface_num, &dskdat, VBL_NORM); + + // enable write bit-deskew and RESET the settings + ddr_print("N%d.LMC%d: NEUTRAL Write Deskew Setup: wr_ena: RESET\n", node, ddr_interface_num); + do_write_deskew_op(node, ddr_interface_num, BS_RESET, ALL_BYTES, 1); + //Get_Deskew_Settings(node, ddr_interface_num, &dskdat); + //Display_Deskew_Data(node, ddr_interface_num, &dskdat, VBL_NORM); +} + +static void +Perform_Write_Deskew_Training(bdk_node_t node, int ddr_interface_num) +{ + deskew_data_t dskdat; + int byte, bit_num; + int dskval, rankx, rank_mask, active_ranks, errors, bit_errs; + uint64_t hw_rank_offset; + uint64_t bad_bits[2]; + uint64_t phys_addr; + deskew_rank_t *dhp; + int num_lmcs = __bdk_dram_get_num_lmc(node); + + BDK_CSR_INIT(lmcx_config, node, BDK_LMCX_CONFIG(ddr_interface_num)); + rank_mask = lmcx_config.s.init_status; // FIXME: is this right when we run? + + // this should be correct for 1 or 2 ranks, 1 or 2 DIMMs + hw_rank_offset = 1ull << (28 + lmcx_config.s.pbank_lsb - lmcx_config.s.rank_ena + (num_lmcs/2)); + + VB_PRT(VBL_FAE, "N%d.LMC%d: Performing Write Deskew Training.\n", node, ddr_interface_num); + + // first: NO-OP, Select all bytes, Disable write bit-deskew + ddr_print("N%d.LMC%d: WriteDeskewConfig: first: NOOP\n", node, ddr_interface_num); + do_write_deskew_op(node, ddr_interface_num, BS_NOOP, ALL_BYTES, 0); + //Get_Deskew_Settings(node, ddr_interface_num, &dskdat); + //Display_Deskew_Data(node, ddr_interface_num, &dskdat, VBL_NORM); + + // enable write bit-deskew and RESET the settings + ddr_print("N%d.LMC%d: WriteDeskewConfig: wr_ena: RESET\n", node, ddr_interface_num); + do_write_deskew_op(node, ddr_interface_num, BS_RESET, ALL_BYTES, 1); + //Get_Deskew_Settings(node, ddr_interface_num, &dskdat); + //Display_Deskew_Data(node, ddr_interface_num, &dskdat, VBL_NORM); + +#if 0 + // enable write bit-deskew and REUSE read bit-deskew settings + ddr_print("N%d.LMC%d: WriteDeskewConfig: wr_ena: REUSE\n", node, ddr_interface_num); + do_write_deskew_op(node, ddr_interface_num, BS_REUSE, ALL_BYTES, 1); + Get_Deskew_Settings(node, ddr_interface_num, &dskdat); + Display_Deskew_Data(node, ddr_interface_num, &dskdat, VBL_NORM); +#endif + +#if 1 + memset(deskew_history, 0, sizeof(deskew_history)); + + for (dskval = 0; dskval < 128; dskval += DSKVAL_INCR) { + + Set_Write_Deskew_Settings(node, ddr_interface_num, dskval); + + active_ranks = 0; + for (rankx = 0; rankx < 4; rankx++) { + if (!(rank_mask & (1 << rankx))) + continue; + dhp = &deskew_history[rankx]; + phys_addr = hw_rank_offset * active_ranks; + active_ranks++; + + errors = test_dram_byte_hw(node, ddr_interface_num, phys_addr, 0, bad_bits); + + for (byte = 0; byte <= 8; byte++) { // do bytelane(s) + + // check errors + if (errors & (1 << byte)) { // yes, error(s) in the byte lane in this rank + bit_errs = ((byte == 8) ? bad_bits[1] : bad_bits[0] >> (8 * byte)) & 0xFFULL; + + VB_PRT(VBL_DEV2, "N%d.LMC%d.R%d: Byte %d Value %d: Address 0x%012lx errors 0x%x/0x%x\n", + node, ddr_interface_num, rankx, byte, + dskval, phys_addr, errors, bit_errs); + + for (bit_num = 0; bit_num <= 7; bit_num++) { + if (!(bit_errs & (1 << bit_num))) + continue; + if (dhp->bytes[byte].count[bit_num] > 0) { // had started run + VB_PRT(VBL_DEV2, "N%d.LMC%d.R%d: Byte %d Bit %d Value %d: stopping a run here\n", + node, ddr_interface_num, rankx, byte, bit_num, dskval); + dhp->bytes[byte].count[bit_num] = 0; // stop now + } + } /* for (bit_num = 0; bit_num <= 7; bit_num++) */ + + // FIXME: else had not started run - nothing else to do? + } else { // no error in the byte lane + for (bit_num = 0; bit_num <= 7; bit_num++) { + if (dhp->bytes[byte].count[bit_num] == 0) { // first success, set run start + VB_PRT(VBL_DEV2, "N%d.LMC%d.R%d: Byte %d Bit %d Value %d: starting a run here\n", + node, ddr_interface_num, rankx, byte, bit_num, dskval); + dhp->bytes[byte].start[bit_num] = dskval; + } + dhp->bytes[byte].count[bit_num] += DSKVAL_INCR; // bump run length + + // is this now the biggest window? + if (dhp->bytes[byte].count[bit_num] > dhp->bytes[byte].best_count[bit_num]) { + dhp->bytes[byte].best_count[bit_num] = dhp->bytes[byte].count[bit_num]; + dhp->bytes[byte].best_start[bit_num] = dhp->bytes[byte].start[bit_num]; + VB_PRT(VBL_DEV2, "N%d.LMC%d.R%d: Byte %d Bit %d Value %d: updating best to %d/%d\n", + node, ddr_interface_num, rankx, byte, bit_num, dskval, + dhp->bytes[byte].best_start[bit_num], + dhp->bytes[byte].best_count[bit_num]); + } + } /* for (bit_num = 0; bit_num <= 7; bit_num++) */ + } /* error in the byte lane */ + } /* for (byte = 0; byte <= 8; byte++) */ + } /* for (rankx = 0; rankx < 4; rankx++) */ + } /* for (dskval = 0; dskval < 128; dskval++) */ + + + for (byte = 0; byte <= 8; byte++) { // do bytelane(s) + + for (bit_num = 0; bit_num <= 7; bit_num++) { // do bits + int bit_beg, bit_end; + + bit_beg = 0; + bit_end = 128; + + for (rankx = 0; rankx < 4; rankx++) { // merge ranks + int rank_beg, rank_end, rank_count; + if (!(rank_mask & (1 << rankx))) + continue; + + dhp = &deskew_history[rankx]; + rank_beg = dhp->bytes[byte].best_start[bit_num]; + rank_count = dhp->bytes[byte].best_count[bit_num]; + + if (!rank_count) { + VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: Byte %d Bit %d: EMPTY\n", + node, ddr_interface_num, rankx, byte, bit_num); + continue; + } + + bit_beg = max(bit_beg, rank_beg); + rank_end = rank_beg + rank_count - DSKVAL_INCR; + bit_end = min(bit_end, rank_end); + + } /* for (rankx = 0; rankx < 4; rankx++) */ + + dskdat.bytes[byte].bits[bit_num] = (bit_end + bit_beg) / 2; + + } /* for (bit_num = 0; bit_num <= 7; bit_num++) */ + } /* for (byte = 0; byte <= 8; byte++) */ + +#endif + + // update the write bit-deskew settings with final settings + ddr_print("N%d.LMC%d: WriteDeskewConfig: wr_ena: UPDATE\n", node, ddr_interface_num); + Update_Write_Deskew_Settings(node, ddr_interface_num, &dskdat); + Get_Deskew_Settings(node, ddr_interface_num, &dskdat); + Display_Deskew_Data(node, ddr_interface_num, &dskdat, VBL_NORM); + + // last: NO-OP, Select all bytes, MUST leave write bit-deskew enabled + ddr_print("N%d.LMC%d: WriteDeskewConfig: last: wr_ena: NOOP\n", node, ddr_interface_num); + do_write_deskew_op(node, ddr_interface_num, BS_NOOP, ALL_BYTES, 1); + //Get_Deskew_Settings(node, ddr_interface_num, &dskdat); + //Display_Deskew_Data(node, ddr_interface_num, &dskdat, VBL_NORM); + +#if 0 + // FIXME: disable/delete this when write bit-deskew works... + // final: NO-OP, Select all bytes, do NOT leave write bit-deskew enabled + ddr_print("N%d.LMC%d: WriteDeskewConfig: final: read: NOOP\n", node, ddr_interface_num); + do_write_deskew_op(node, ddr_interface_num, BS_NOOP, ALL_BYTES, 0); + Get_Deskew_Settings(node, ddr_interface_num, &dskdat); + Display_Deskew_Data(node, ddr_interface_num, &dskdat, VBL_NORM); +#endif +} + +#define SCALING_FACTOR (1000) +#define Dprintf debug_print // make this "ddr_print" for extra debug output below +static int compute_Vref_1slot_2rank(int rtt_wr, int rtt_park, int dqx_ctl, int rank_count) +{ + uint64_t Reff_s; + uint64_t Rser_s = 15; + uint64_t Vdd = 1200; + uint64_t Vref; + //uint64_t Vl; + uint64_t rtt_wr_s = (((rtt_wr == 0) || (rtt_wr == 99)) ? 1*1024*1024 : rtt_wr); // 99 == HiZ + uint64_t rtt_park_s = (((rtt_park == 0) || ((rank_count == 1) && (rtt_wr != 0))) ? 1*1024*1024 : rtt_park); + uint64_t dqx_ctl_s = (dqx_ctl == 0 ? 1*1024*1024 : dqx_ctl); + int Vref_value; + uint64_t Rangepc = 6000; // range1 base is 60% + uint64_t Vrefpc; + int Vref_range = 0; + + Dprintf("rtt_wr = %d, rtt_park = %d, dqx_ctl = %d\n", rtt_wr, rtt_park, dqx_ctl); + Dprintf("rtt_wr_s = %d, rtt_park_s = %d, dqx_ctl_s = %d\n", rtt_wr_s, rtt_park_s, dqx_ctl_s); + + Reff_s = divide_nint((rtt_wr_s * rtt_park_s) , (rtt_wr_s + rtt_park_s)); + Dprintf("Reff_s = %d\n", Reff_s); + + //Vl = (((Rser_s + dqx_ctl_s) * SCALING_FACTOR) / (Rser_s + dqx_ctl_s + Reff_s)) * Vdd / SCALING_FACTOR; + //printf("Vl = %d\n", Vl); + + Vref = (((Rser_s + dqx_ctl_s) * SCALING_FACTOR) / (Rser_s + dqx_ctl_s + Reff_s)) + SCALING_FACTOR; + Dprintf("Vref = %d\n", Vref); + + Vref = (Vref * Vdd) / 2 / SCALING_FACTOR; + Dprintf("Vref = %d\n", Vref); + + Vrefpc = (Vref * 100 * 100) / Vdd; + Dprintf("Vrefpc = %d\n", Vrefpc); + + if (Vrefpc < Rangepc) { // < range1 base, use range2 + Vref_range = 1 << 6; // set bit A6 for range2 + Rangepc = 4500; // range2 base is 45% + } + + Vref_value = divide_nint(Vrefpc - Rangepc, 65); + if (Vref_value < 0) + Vref_value = Vref_range; // set to base of range as lowest value + else + Vref_value |= Vref_range; + Dprintf("Vref_value = %d (0x%02x)\n", Vref_value, Vref_value); + + debug_print("rtt_wr:%d, rtt_park:%d, dqx_ctl:%d, Vref_value:%d (0x%x)\n", + rtt_wr, rtt_park, dqx_ctl, Vref_value, Vref_value); + + return Vref_value; +} +static int compute_Vref_2slot_2rank(int rtt_wr, int rtt_park_00, int rtt_park_01, int dqx_ctl, int rtt_nom) +{ + //uint64_t Rser = 15; + uint64_t Vdd = 1200; + //uint64_t Vref; + uint64_t Vl, Vlp, Vcm; + uint64_t Rd0, Rd1, Rpullup; + uint64_t rtt_wr_s = (((rtt_wr == 0) || (rtt_wr == 99)) ? 1*1024*1024 : rtt_wr); // 99 == HiZ + uint64_t rtt_park_00_s = (rtt_park_00 == 0 ? 1*1024*1024 : rtt_park_00); + uint64_t rtt_park_01_s = (rtt_park_01 == 0 ? 1*1024*1024 : rtt_park_01); + uint64_t dqx_ctl_s = (dqx_ctl == 0 ? 1*1024*1024 : dqx_ctl); + uint64_t rtt_nom_s = (rtt_nom == 0 ? 1*1024*1024 : rtt_nom); + int Vref_value; + uint64_t Rangepc = 6000; // range1 base is 60% + uint64_t Vrefpc; + int Vref_range = 0; + + // Rd0 = (RTT_NOM /*parallel*/ RTT_WR) + 15 = ((RTT_NOM * RTT_WR) / (RTT_NOM + RTT_WR)) + 15 + Rd0 = divide_nint((rtt_nom_s * rtt_wr_s), (rtt_nom_s + rtt_wr_s)) + 15; + //printf("Rd0 = %ld\n", Rd0); + + // Rd1 = (RTT_PARK_00 /*parallel*/ RTT_PARK_01) + 15 = ((RTT_PARK_00 * RTT_PARK_01) / (RTT_PARK_00 + RTT_PARK_01)) + 15 + Rd1 = divide_nint((rtt_park_00_s * rtt_park_01_s), (rtt_park_00_s + rtt_park_01_s)) + 15; + //printf("Rd1 = %ld\n", Rd1); + + // Rpullup = Rd0 /*parallel*/ Rd1 = (Rd0 * Rd1) / (Rd0 + Rd1) + Rpullup = divide_nint((Rd0 * Rd1), (Rd0 + Rd1)); + //printf("Rpullup = %ld\n", Rpullup); + + // Vl = (DQX_CTL / (DQX_CTL + Rpullup)) * 1.2 + Vl = divide_nint((dqx_ctl_s * Vdd), (dqx_ctl_s + Rpullup)); + //printf("Vl = %ld\n", Vl); + + // Vlp = ((15 / Rd0) * (1.2 - Vl)) + Vl + Vlp = divide_nint((15 * (Vdd - Vl)), Rd0) + Vl; + //printf("Vlp = %ld\n", Vlp); + + // Vcm = (Vlp + 1.2) / 2 + Vcm = divide_nint((Vlp + Vdd), 2); + //printf("Vcm = %ld\n", Vcm); + + // Vrefpc = (Vcm / 1.2) * 100 + Vrefpc = divide_nint((Vcm * 100 * 100), Vdd); + //printf("Vrefpc = %ld\n", Vrefpc); + + if (Vrefpc < Rangepc) { // < range1 base, use range2 + Vref_range = 1 << 6; // set bit A6 for range2 + Rangepc = 4500; // range2 base is 45% + } + + Vref_value = divide_nint(Vrefpc - Rangepc, 65); + if (Vref_value < 0) + Vref_value = Vref_range; // set to base of range as lowest value + else + Vref_value |= Vref_range; + //printf("Vref_value = %d (0x%02x)\n", Vref_value, Vref_value); + + debug_print("rtt_wr:%d, rtt_park_00:%d, rtt_park_01:%d, dqx_ctl:%d, rtt_nom:%d, Vref_value:%d (0x%x)\n", + rtt_wr, rtt_park_00, rtt_park_01, dqx_ctl, rtt_nom, Vref_value, Vref_value); + + return Vref_value; +} + +// NOTE: only call this for DIMMs with 1 or 2 ranks, not 4. +int +compute_vref_value(bdk_node_t node, int ddr_interface_num, + int rankx, int dimm_count, int rank_count, + impedence_values_t *imp_values, int is_stacked_die) +{ + int computed_final_vref_value = 0; + + /* Calculate an override of the measured Vref value + but only for configurations we know how to...*/ + // we have code for 2-rank DIMMs in both 1-slot or 2-slot configs, + // and can use the 2-rank 1-slot code for 1-rank DIMMs in 1-slot configs + // and can use the 2-rank 2-slot code for 1-rank DIMMs in 2-slot configs + + int rtt_wr, dqx_ctl, rtt_nom, index; + bdk_lmcx_modereg_params1_t lmc_modereg_params1; + bdk_lmcx_modereg_params2_t lmc_modereg_params2; + bdk_lmcx_comp_ctl2_t comp_ctl2; + + lmc_modereg_params1.u = BDK_CSR_READ(node, BDK_LMCX_MODEREG_PARAMS1(ddr_interface_num)); + lmc_modereg_params2.u = BDK_CSR_READ(node, BDK_LMCX_MODEREG_PARAMS2(ddr_interface_num)); + comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num)); + dqx_ctl = imp_values->dqx_strength[comp_ctl2.s.dqx_ctl]; + + // WR always comes from the current rank + index = (lmc_modereg_params1.u >> (rankx * 12 + 5)) & 0x03; + if (!CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X)) { + index |= lmc_modereg_params1.u >> (51+rankx-2) & 0x04; + } + rtt_wr = imp_values->rtt_wr_ohms [index]; + + // separate calculations for 1 vs 2 DIMMs per LMC + if (dimm_count == 1) { + // PARK comes from this rank if 1-rank, otherwise other rank + index = (lmc_modereg_params2.u >> ((rankx ^ (rank_count - 1)) * 10 + 0)) & 0x07; + int rtt_park = imp_values->rtt_nom_ohms[index]; + computed_final_vref_value = compute_Vref_1slot_2rank(rtt_wr, rtt_park, dqx_ctl, rank_count); + } else { + // get both PARK values from the other DIMM + index = (lmc_modereg_params2.u >> ((rankx ^ 0x02) * 10 + 0)) & 0x07; + int rtt_park_00 = imp_values->rtt_nom_ohms[index]; + index = (lmc_modereg_params2.u >> ((rankx ^ 0x03) * 10 + 0)) & 0x07; + int rtt_park_01 = imp_values->rtt_nom_ohms[index]; + // NOM comes from this rank if 1-rank, otherwise other rank + index = (lmc_modereg_params1.u >> ((rankx ^ (rank_count - 1)) * 12 + 9)) & 0x07; + rtt_nom = imp_values->rtt_nom_ohms[index]; + computed_final_vref_value = compute_Vref_2slot_2rank(rtt_wr, rtt_park_00, rtt_park_01, dqx_ctl, rtt_nom); + } + +#if ENABLE_COMPUTED_VREF_ADJUSTMENT + { + int saved_final_vref_value = computed_final_vref_value; + BDK_CSR_INIT(lmc_config, node, BDK_LMCX_CONFIG(ddr_interface_num)); + /* + New computed Vref = existing computed Vref – X + + The value of X is depending on different conditions. Both #122 and #139 are 2Rx4 RDIMM, + while #124 is stacked die 2Rx4, so I conclude the results into two conditions: + + 1. Stacked Die: 2Rx4 + 1-slot: offset = 7. i, e New computed Vref = existing computed Vref – 7 + 2-slot: offset = 6 + + 2. Regular: 2Rx4 + 1-slot: offset = 3 + 2-slot: offset = 2 + */ + // we know we never get called unless DDR4, so test just the other conditions + if((!!__bdk_dram_is_rdimm(node, 0)) && + (rank_count == 2) && + (lmc_config.s.mode_x4dev)) + { // it must first be RDIMM and 2-rank and x4 + if (is_stacked_die) { // now do according to stacked die or not... + computed_final_vref_value -= (dimm_count == 1) ? 7 : 6; + } else { + computed_final_vref_value -= (dimm_count == 1) ? 3 : 2; + } + // we have adjusted it, so print it out if verbosity is right + VB_PRT(VBL_TME, "N%d.LMC%d.R%d: adjusting computed vref from %2d (0x%02x) to %2d (0x%02x)\n", + node, ddr_interface_num, rankx, + saved_final_vref_value, saved_final_vref_value, + computed_final_vref_value, computed_final_vref_value); + } + } +#endif + return computed_final_vref_value; +} + +static unsigned int EXTR_WR(uint64_t u, int x) +{ + return (unsigned int)(((u >> (x*12+5)) & 0x3UL) | ((u >> (51+x-2)) & 0x4UL)); +} +static void INSRT_WR(uint64_t *up, int x, int v) +{ + uint64_t u = *up; + u &= ~(((0x3UL) << (x*12+5)) | ((0x1UL) << (51+x))); + *up = (u | ((v & 0x3UL) << (x*12+5)) | ((v & 0x4UL) << (51+x-2))); + return; +} + +static int encode_row_lsb_ddr3(int row_lsb, int ddr_interface_wide) +{ + int encoded_row_lsb; + int row_lsb_start = 14; + + /* Decoding for row_lsb */ + /* 000: row_lsb = mem_adr[14] */ + /* 001: row_lsb = mem_adr[15] */ + /* 010: row_lsb = mem_adr[16] */ + /* 011: row_lsb = mem_adr[17] */ + /* 100: row_lsb = mem_adr[18] */ + /* 101: row_lsb = mem_adr[19] */ + /* 110: row_lsb = mem_adr[20] */ + /* 111: RESERVED */ + + row_lsb_start = 14; + + encoded_row_lsb = row_lsb - row_lsb_start ; + + return encoded_row_lsb; +} + +static int encode_pbank_lsb_ddr3(int pbank_lsb, int ddr_interface_wide) +{ + int encoded_pbank_lsb; + + /* Decoding for pbank_lsb */ + /* 0000:DIMM = mem_adr[28] / rank = mem_adr[27] (if RANK_ENA) */ + /* 0001:DIMM = mem_adr[29] / rank = mem_adr[28] " */ + /* 0010:DIMM = mem_adr[30] / rank = mem_adr[29] " */ + /* 0011:DIMM = mem_adr[31] / rank = mem_adr[30] " */ + /* 0100:DIMM = mem_adr[32] / rank = mem_adr[31] " */ + /* 0101:DIMM = mem_adr[33] / rank = mem_adr[32] " */ + /* 0110:DIMM = mem_adr[34] / rank = mem_adr[33] " */ + /* 0111:DIMM = 0 / rank = mem_adr[34] " */ + /* 1000-1111: RESERVED */ + + int pbank_lsb_start = 0; + + pbank_lsb_start = 28; + + encoded_pbank_lsb = pbank_lsb - pbank_lsb_start; + + return encoded_pbank_lsb; +} + +static uint64_t octeon_read_lmcx_ddr3_rlevel_dbg(bdk_node_t node, int ddr_interface_num, int idx) +{ + DRAM_CSR_MODIFY(c, node, BDK_LMCX_RLEVEL_CTL(ddr_interface_num), + c.s.byte = idx); + BDK_CSR_READ(node, BDK_LMCX_RLEVEL_CTL(ddr_interface_num)); + BDK_CSR_INIT(rlevel_dbg, node, BDK_LMCX_RLEVEL_DBG(ddr_interface_num)); + return rlevel_dbg.s.bitmask; +} + +static uint64_t octeon_read_lmcx_ddr3_wlevel_dbg(bdk_node_t node, int ddr_interface_num, int idx) +{ + bdk_lmcx_wlevel_dbg_t wlevel_dbg; + + wlevel_dbg.u = 0; + wlevel_dbg.s.byte = idx; + + DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_DBG(ddr_interface_num), wlevel_dbg.u); + BDK_CSR_READ(node, BDK_LMCX_WLEVEL_DBG(ddr_interface_num)); + + wlevel_dbg.u = BDK_CSR_READ(node, BDK_LMCX_WLEVEL_DBG(ddr_interface_num)); + return wlevel_dbg.s.bitmask; +} + + +/* + * Apply a filter to the BITMASK results returned from Octeon + * read-leveling to determine the most likely delay result. This + * computed delay may be used to qualify the delay result returned by + * Octeon. Accumulate an error penalty for invalid characteristics of + * the bitmask so that they can be used to select the most reliable + * results. + * + * The algorithm searches for the largest contiguous MASK within a + * maximum RANGE of bits beginning with the MSB. + * + * 1. a MASK with a WIDTH less than 4 will be penalized + * 2. Bubbles in the bitmask that occur before or after the MASK + * will be penalized + * 3. If there are no trailing bubbles then extra bits that occur + * beyond the maximum RANGE will be penalized. + * + * +++++++++++++++++++++++++++++++++++++++++++++++++++ + * + + + * + e.g. bitmask = 27B00 + + * + + + * + 63 +--- mstart 0 + + * + | | | + + * + | +---------+ +--- fb | + + * + | | range | | | + + * + V V V V V + + * + + + * + 0 0 ... 1 0 0 1 1 1 1 0 1 1 0 0 0 0 0 0 0 0 + + * + + + * + ^ ^ ^ + + * + | | mask| + + * + lb ---+ +-----+ + + * + width + + * + + + * +++++++++++++++++++++++++++++++++++++++++++++++++++ + */ +#define RLEVEL_BITMASK_TRAILING_BITS_ERROR 5 +#define RLEVEL_BITMASK_BUBBLE_BITS_ERROR 11 // FIXME? now less than TOOLONG +#define RLEVEL_BITMASK_NARROW_ERROR 6 +#define RLEVEL_BITMASK_BLANK_ERROR 100 +#define RLEVEL_BITMASK_TOOLONG_ERROR 12 + +#define MASKRANGE_BITS 6 +#define MASKRANGE ((1 << MASKRANGE_BITS) - 1) + +static int +validate_ddr3_rlevel_bitmask(rlevel_bitmask_t *rlevel_bitmask_p, int ddr_type) +{ + int i; + int errors = 0; + uint64_t mask = 0; /* Used in 64-bit comparisons */ + int8_t mstart = 0; + uint8_t width = 0; + uint8_t firstbit = 0; + uint8_t lastbit = 0; + uint8_t bubble = 0; + uint8_t tbubble = 0; + uint8_t blank = 0; + uint8_t narrow = 0; + uint8_t trailing = 0; + uint64_t bitmask = rlevel_bitmask_p->bm; + uint8_t extras = 0; + uint8_t toolong = 0; + uint64_t temp; + + if (bitmask == 0) { + blank += RLEVEL_BITMASK_BLANK_ERROR; + } else { + + /* Look for fb, the first bit */ + temp = bitmask; + while (!(temp & 1)) { + firstbit++; + temp >>= 1; + } + + /* Look for lb, the last bit */ + lastbit = firstbit; + while ((temp >>= 1)) + lastbit++; + + /* Start with the max range to try to find the largest mask within the bitmask data */ + width = MASKRANGE_BITS; + for (mask = MASKRANGE; mask > 0; mask >>= 1, --width) { + for (mstart = lastbit - width + 1; mstart >= firstbit; --mstart) { + temp = mask << mstart; + if ((bitmask & temp) == temp) + goto done_now; + } + } + done_now: + /* look for any more contiguous 1's to the right of mstart */ + if (width == MASKRANGE_BITS) { // only when maximum mask + while ((bitmask >> (mstart - 1)) & 1) { // slide right over more 1's + --mstart; + if (ddr_type == DDR4_DRAM) // only for DDR4 + extras++; // count the number of extra bits + } + } + + /* Penalize any extra 1's beyond the maximum desired mask */ + if (extras > 0) + toolong = RLEVEL_BITMASK_TOOLONG_ERROR * ((1 << extras) - 1); + + /* Detect if bitmask is too narrow. */ + if (width < 4) + narrow = (4 - width) * RLEVEL_BITMASK_NARROW_ERROR; + + /* detect leading bubble bits, that is, any 0's between first and mstart */ + temp = bitmask >> (firstbit + 1); + i = mstart - firstbit - 1; + while (--i >= 0) { + if ((temp & 1) == 0) + bubble += RLEVEL_BITMASK_BUBBLE_BITS_ERROR; + temp >>= 1; + } + + temp = bitmask >> (mstart + width + extras); + i = lastbit - (mstart + width + extras - 1); + while (--i >= 0) { + if (temp & 1) { /* Detect 1 bits after the trailing end of the mask, including last. */ + trailing += RLEVEL_BITMASK_TRAILING_BITS_ERROR; + } else { /* Detect trailing bubble bits, that is, any 0's between end-of-mask and last */ + tbubble += RLEVEL_BITMASK_BUBBLE_BITS_ERROR; + } + temp >>= 1; + } + } + + errors = bubble + tbubble + blank + narrow + trailing + toolong; + + /* Pass out useful statistics */ + rlevel_bitmask_p->mstart = mstart; + rlevel_bitmask_p->width = width; + + VB_PRT(VBL_DEV2, "bm:%08lx mask:%02lx, width:%2u, mstart:%2d, fb:%2u, lb:%2u" + " (bu:%2d, tb:%2d, bl:%2d, n:%2d, t:%2d, x:%2d) errors:%3d %s\n", + (unsigned long) bitmask, mask, width, mstart, + firstbit, lastbit, bubble, tbubble, blank, narrow, + trailing, toolong, errors, (errors) ? "=> invalid" : ""); + + return errors; +} + +static int compute_ddr3_rlevel_delay(uint8_t mstart, uint8_t width, bdk_lmcx_rlevel_ctl_t rlevel_ctl) +{ + int delay; + + debug_bitmask_print(" offset_en:%d", rlevel_ctl.cn8.offset_en); + + if (rlevel_ctl.s.offset_en) { + delay = max(mstart, mstart + width - 1 - rlevel_ctl.s.offset); + } else { + /* if (rlevel_ctl.s.offset) { */ /* Experimental */ + if (0) { + delay = max(mstart + rlevel_ctl.s.offset, mstart + 1); + /* Insure that the offset delay falls within the bitmask */ + delay = min(delay, mstart + width-1); + } else { + delay = (width - 1) / 2 + mstart; /* Round down */ + /* delay = (width/2) + mstart; */ /* Round up */ + } + } + + return delay; +} + +#define WLEVEL_BYTE_BITS 5 +#define WLEVEL_BYTE_MSK ((1UL << 5) - 1) + +static void update_wlevel_rank_struct(bdk_lmcx_wlevel_rankx_t *lmc_wlevel_rank, + int byte, int delay) +{ + bdk_lmcx_wlevel_rankx_t temp_wlevel_rank; + if (byte >= 0 && byte <= 8) { + temp_wlevel_rank.u = lmc_wlevel_rank->u; + temp_wlevel_rank.u &= ~(WLEVEL_BYTE_MSK << (WLEVEL_BYTE_BITS * byte)); + temp_wlevel_rank.u |= ((delay & WLEVEL_BYTE_MSK) << (WLEVEL_BYTE_BITS * byte)); + lmc_wlevel_rank->u = temp_wlevel_rank.u; + } +} + +static int get_wlevel_rank_struct(bdk_lmcx_wlevel_rankx_t *lmc_wlevel_rank, + int byte) +{ + int delay = 0; + if (byte >= 0 && byte <= 8) { + delay = ((lmc_wlevel_rank->u) >> (WLEVEL_BYTE_BITS * byte)) & WLEVEL_BYTE_MSK; + } + return delay; +} + +#if 0 +// entry = 1 is valid, entry = 0 is invalid +static int +validity_matrix[4][4] = {[0] {1,1,1,0}, // valid pairs when cv == 0: 0,0 + 0,1 + 0,2 == "7" + [1] {0,1,1,1}, // valid pairs when cv == 1: 1,1 + 1,2 + 1,3 == "E" + [2] {1,0,1,1}, // valid pairs when cv == 2: 2,2 + 2,3 + 2,0 == "D" + [3] {1,1,0,1}}; // valid pairs when cv == 3: 3,3 + 3,0 + 3,1 == "B" +#endif +static int +validate_seq(int *wl, int *seq) +{ + int seqx; // sequence index, step through the sequence array + int bitnum; + seqx = 0; + while (seq[seqx+1] >= 0) { // stop on next seq entry == -1 + // but now, check current versus next +#if 0 + if ( !validity_matrix [wl[seq[seqx]]] [wl[seq[seqx+1]]] ) + return 1; +#else + bitnum = (wl[seq[seqx]] << 2) | wl[seq[seqx+1]]; + if (!((1 << bitnum) & 0xBDE7)) // magic validity number (see matrix above) + return 1; +#endif + seqx++; + } + return 0; +} + +static int +Validate_HW_WL_Settings(bdk_node_t node, int ddr_interface_num, + bdk_lmcx_wlevel_rankx_t *lmc_wlevel_rank, + int ecc_ena) +{ + int wl[9], byte, errors; + + // arrange the sequences so + int useq[] = { 0,1,2,3,8,4,5,6,7,-1 }; // index 0 has byte 0, etc, ECC in middle + int rseq1[] = { 8,3,2,1,0,-1 }; // index 0 is ECC, then go down + int rseq2[] = { 4,5,6,7,-1 }; // index 0 has byte 4, then go up + int useqno[] = { 0,1,2,3,4,5,6,7,-1 }; // index 0 has byte 0, etc, no ECC + int rseq1no[] = { 3,2,1,0,-1 }; // index 0 is byte 3, then go down, no ECC + + // in the CSR, bytes 0-7 are always data, byte 8 is ECC + for (byte = 0; byte < 8+ecc_ena; byte++) { + wl[byte] = (get_wlevel_rank_struct(lmc_wlevel_rank, byte) >> 1) & 3; // preprocess :-) + } + + errors = 0; + if (__bdk_dram_is_rdimm(node, 0) != 0) { // RDIMM order + errors = validate_seq(wl, (ecc_ena) ? rseq1 : rseq1no); + errors += validate_seq(wl, rseq2); + } else { // UDIMM order + errors = validate_seq(wl, (ecc_ena) ? useq : useqno); + } + + return errors; +} + +#define RLEVEL_BYTE_BITS 6 +#define RLEVEL_BYTE_MSK ((1UL << 6) - 1) + +static void update_rlevel_rank_struct(bdk_lmcx_rlevel_rankx_t *lmc_rlevel_rank, + int byte, int delay) +{ + bdk_lmcx_rlevel_rankx_t temp_rlevel_rank; + if (byte >= 0 && byte <= 8) { + temp_rlevel_rank.u = lmc_rlevel_rank->u & ~(RLEVEL_BYTE_MSK << (RLEVEL_BYTE_BITS * byte)); + temp_rlevel_rank.u |= ((delay & RLEVEL_BYTE_MSK) << (RLEVEL_BYTE_BITS * byte)); + lmc_rlevel_rank->u = temp_rlevel_rank.u; + } +} + +#if RLEXTRAS_PATCH || !DISABLE_SW_WL_PASS_2 +static int get_rlevel_rank_struct(bdk_lmcx_rlevel_rankx_t *lmc_rlevel_rank, + int byte) +{ + int delay = 0; + if (byte >= 0 && byte <= 8) { + delay = ((lmc_rlevel_rank->u) >> (RLEVEL_BYTE_BITS * byte)) & RLEVEL_BYTE_MSK; + } + return delay; +} +#endif + +static void unpack_rlevel_settings(int ddr_interface_bytemask, int ecc_ena, + rlevel_byte_data_t *rlevel_byte, + bdk_lmcx_rlevel_rankx_t lmc_rlevel_rank) +{ + if ((ddr_interface_bytemask & 0xff) == 0xff) { + if (ecc_ena) { + rlevel_byte[8].delay = lmc_rlevel_rank.cn83xx.byte7; + rlevel_byte[7].delay = lmc_rlevel_rank.cn83xx.byte6; + rlevel_byte[6].delay = lmc_rlevel_rank.cn83xx.byte5; + rlevel_byte[5].delay = lmc_rlevel_rank.cn83xx.byte4; + rlevel_byte[4].delay = lmc_rlevel_rank.cn83xx.byte8; /* ECC */ + } else { + rlevel_byte[7].delay = lmc_rlevel_rank.cn83xx.byte7; + rlevel_byte[6].delay = lmc_rlevel_rank.cn83xx.byte6; + rlevel_byte[5].delay = lmc_rlevel_rank.cn83xx.byte5; + rlevel_byte[4].delay = lmc_rlevel_rank.cn83xx.byte4; + } + } else { + rlevel_byte[8].delay = lmc_rlevel_rank.cn83xx.byte8; /* unused */ + rlevel_byte[7].delay = lmc_rlevel_rank.cn83xx.byte7; /* unused */ + rlevel_byte[6].delay = lmc_rlevel_rank.cn83xx.byte6; /* unused */ + rlevel_byte[5].delay = lmc_rlevel_rank.cn83xx.byte5; /* unused */ + rlevel_byte[4].delay = lmc_rlevel_rank.cn83xx.byte4; /* ECC */ + } + rlevel_byte[3].delay = lmc_rlevel_rank.cn83xx.byte3; + rlevel_byte[2].delay = lmc_rlevel_rank.cn83xx.byte2; + rlevel_byte[1].delay = lmc_rlevel_rank.cn83xx.byte1; + rlevel_byte[0].delay = lmc_rlevel_rank.cn83xx.byte0; +} + +static void pack_rlevel_settings(int ddr_interface_bytemask, int ecc_ena, + rlevel_byte_data_t *rlevel_byte, + bdk_lmcx_rlevel_rankx_t *final_rlevel_rank) +{ + bdk_lmcx_rlevel_rankx_t lmc_rlevel_rank = *final_rlevel_rank; + + if ((ddr_interface_bytemask & 0xff) == 0xff) { + if (ecc_ena) { + lmc_rlevel_rank.cn83xx.byte7 = rlevel_byte[8].delay; + lmc_rlevel_rank.cn83xx.byte6 = rlevel_byte[7].delay; + lmc_rlevel_rank.cn83xx.byte5 = rlevel_byte[6].delay; + lmc_rlevel_rank.cn83xx.byte4 = rlevel_byte[5].delay; + lmc_rlevel_rank.cn83xx.byte8 = rlevel_byte[4].delay; /* ECC */ + } else { + lmc_rlevel_rank.cn83xx.byte7 = rlevel_byte[7].delay; + lmc_rlevel_rank.cn83xx.byte6 = rlevel_byte[6].delay; + lmc_rlevel_rank.cn83xx.byte5 = rlevel_byte[5].delay; + lmc_rlevel_rank.cn83xx.byte4 = rlevel_byte[4].delay; + } + } else { + lmc_rlevel_rank.cn83xx.byte8 = rlevel_byte[8].delay; + lmc_rlevel_rank.cn83xx.byte7 = rlevel_byte[7].delay; + lmc_rlevel_rank.cn83xx.byte6 = rlevel_byte[6].delay; + lmc_rlevel_rank.cn83xx.byte5 = rlevel_byte[5].delay; + lmc_rlevel_rank.cn83xx.byte4 = rlevel_byte[4].delay; + } + lmc_rlevel_rank.cn83xx.byte3 = rlevel_byte[3].delay; + lmc_rlevel_rank.cn83xx.byte2 = rlevel_byte[2].delay; + lmc_rlevel_rank.cn83xx.byte1 = rlevel_byte[1].delay; + lmc_rlevel_rank.cn83xx.byte0 = rlevel_byte[0].delay; + + *final_rlevel_rank = lmc_rlevel_rank; +} + +#if !DISABLE_SW_WL_PASS_2 +static void rlevel_to_wlevel(bdk_lmcx_rlevel_rankx_t *lmc_rlevel_rank, + bdk_lmcx_wlevel_rankx_t *lmc_wlevel_rank, int byte) +{ + int byte_delay = get_rlevel_rank_struct(lmc_rlevel_rank, byte); + + debug_print("Estimating Wlevel delay byte %d: ", byte); + debug_print("Rlevel=%d => ", byte_delay); + byte_delay = divide_roundup(byte_delay,2) & 0x1e; + debug_print("Wlevel=%d\n", byte_delay); + update_wlevel_rank_struct(lmc_wlevel_rank, byte, byte_delay); +} +#endif /* !DISABLE_SW_WL_PASS_2 */ + +/* Delay trend: constant=0, decreasing=-1, increasing=1 */ +static int calc_delay_trend(int v) +{ + if (v == 0) + return (0); + if (v < 0) + return (-1); + return 1; +} + +/* Evaluate delay sequence across the whole range of byte delays while +** keeping track of the overall delay trend, increasing or decreasing. +** If the trend changes charge an error amount to the score. +*/ + +// NOTE: "max_adj_delay_inc" argument is, by default, 1 for DDR3 and 2 for DDR4 + +static int nonsequential_delays(rlevel_byte_data_t *rlevel_byte, + int start, int end, int max_adj_delay_inc) +{ + int error = 0; + int delay_trend, prev_trend = 0; + int byte_idx; + int delay_inc; + int delay_diff; + int byte_err; + + for (byte_idx = start; byte_idx < end; ++byte_idx) { + byte_err = 0; + + delay_diff = rlevel_byte[byte_idx+1].delay - rlevel_byte[byte_idx].delay; + delay_trend = calc_delay_trend(delay_diff); + + debug_bitmask_print("Byte %d: %2d, Byte %d: %2d, delay_trend: %2d, prev_trend: %2d", + byte_idx+0, rlevel_byte[byte_idx+0].delay, + byte_idx+1, rlevel_byte[byte_idx+1].delay, + delay_trend, prev_trend); + + /* Increment error each time the trend changes to the opposite direction. + */ + if ((prev_trend != 0) && (delay_trend != 0) && (prev_trend != delay_trend)) { + byte_err += RLEVEL_NONSEQUENTIAL_DELAY_ERROR; + prev_trend = delay_trend; + debug_bitmask_print(" => Nonsequential byte delay"); + } + + delay_inc = _abs(delay_diff); // how big was the delay change, if any + + /* Even if the trend did not change to the opposite direction, check for + the magnitude of the change, and scale the penalty by the amount that + the size is larger than the provided limit. + */ + if ((max_adj_delay_inc != 0) && (delay_inc > max_adj_delay_inc)) { + byte_err += (delay_inc - max_adj_delay_inc) * RLEVEL_ADJACENT_DELAY_ERROR; + debug_bitmask_print(" => Adjacent delay error"); + } + + debug_bitmask_print("\n"); + if (delay_trend != 0) + prev_trend = delay_trend; + + rlevel_byte[byte_idx+1].sqerrs = byte_err; + error += byte_err; + } + return error; +} + +static int roundup_ddr3_wlevel_bitmask(int bitmask) +{ + int shifted_bitmask; + int leader; + int delay; + + for (leader=0; leader<8; ++leader) { + shifted_bitmask = (bitmask>>leader); + if ((shifted_bitmask&1) == 0) + break; + } + + for (/*leader=leader*/; leader<16; ++leader) { + shifted_bitmask = (bitmask>>(leader%8)); + if (shifted_bitmask&1) + break; + } + + delay = (leader & 1) ? leader + 1 : leader; + delay = delay % 8; + + return delay; +} + +/* Check to see if any custom offset values are provided */ +static int is_dll_offset_provided(const int8_t *dll_offset_table) +{ + int i; + if (dll_offset_table != NULL) { + for (i=0; i<9; ++i) { + if (dll_offset_table[i] != 0) + return (1); + } + } + return (0); +} + +/////////////////// These are the RLEVEL settings display routines + +// flags +#define WITH_NOTHING 0 +#define WITH_SCORE 1 +#define WITH_AVERAGE 2 +#define WITH_FINAL 4 +#define WITH_COMPUTE 8 +static void do_display_RL(bdk_node_t node, int ddr_interface_num, + bdk_lmcx_rlevel_rankx_t lmc_rlevel_rank, + int rank, int flags, int score) +{ + char score_buf[16]; + if (flags & WITH_SCORE) + snprintf(score_buf, sizeof(score_buf), "(%d)", score); + else { + score_buf[0] = ' '; score_buf[1] = 0; + } + + char *msg_buf; + char hex_buf[20]; + if (flags & WITH_AVERAGE) { + msg_buf = " DELAY AVERAGES "; + } else if (flags & WITH_FINAL) { + msg_buf = " FINAL SETTINGS "; + } else if (flags & WITH_COMPUTE) { + msg_buf = " COMPUTED DELAYS "; + } else { + snprintf(hex_buf, sizeof(hex_buf), "0x%016lX", lmc_rlevel_rank.u); + msg_buf = hex_buf; + } + + ddr_print("N%d.LMC%d.R%d: Rlevel Rank %#4x, %s : %5d %5d %5d %5d %5d %5d %5d %5d %5d %s\n", + node, ddr_interface_num, rank, + lmc_rlevel_rank.s.status, + msg_buf, + lmc_rlevel_rank.cn83xx.byte8, + lmc_rlevel_rank.cn83xx.byte7, + lmc_rlevel_rank.cn83xx.byte6, + lmc_rlevel_rank.cn83xx.byte5, + lmc_rlevel_rank.cn83xx.byte4, + lmc_rlevel_rank.cn83xx.byte3, + lmc_rlevel_rank.cn83xx.byte2, + lmc_rlevel_rank.cn83xx.byte1, + lmc_rlevel_rank.cn83xx.byte0, + score_buf + ); +} + +static inline void +display_RL(bdk_node_t node, int ddr_interface_num, bdk_lmcx_rlevel_rankx_t lmc_rlevel_rank, int rank) +{ + do_display_RL(node, ddr_interface_num, lmc_rlevel_rank, rank, 0, 0); +} + +static inline void +display_RL_with_score(bdk_node_t node, int ddr_interface_num, bdk_lmcx_rlevel_rankx_t lmc_rlevel_rank, int rank, int score) +{ + do_display_RL(node, ddr_interface_num, lmc_rlevel_rank, rank, 1, score); +} + +#if !PICK_BEST_RANK_SCORE_NOT_AVG +static inline void +display_RL_with_average(bdk_node_t node, int ddr_interface_num, bdk_lmcx_rlevel_rankx_t lmc_rlevel_rank, int rank, int score) +{ + do_display_RL(node, ddr_interface_num, lmc_rlevel_rank, rank, 3, score); +} +#endif + +static inline void +display_RL_with_final(bdk_node_t node, int ddr_interface_num, bdk_lmcx_rlevel_rankx_t lmc_rlevel_rank, int rank) +{ + do_display_RL(node, ddr_interface_num, lmc_rlevel_rank, rank, 4, 0); +} + +static inline void +display_RL_with_computed(bdk_node_t node, int ddr_interface_num, bdk_lmcx_rlevel_rankx_t lmc_rlevel_rank, int rank, int score) +{ + do_display_RL(node, ddr_interface_num, lmc_rlevel_rank, rank, 9, score); +} + +// flag values +#define WITH_RODT_BLANK 0 +#define WITH_RODT_SKIPPING 1 +#define WITH_RODT_BESTROW 2 +#define WITH_RODT_BESTSCORE 3 +// control +#define SKIP_SKIPPING 1 + +static const char *with_rodt_canned_msgs[4] = { " ", "SKIPPING ", "BEST ROW ", "BEST SCORE" }; + +static void display_RL_with_RODT(bdk_node_t node, int ddr_interface_num, + bdk_lmcx_rlevel_rankx_t lmc_rlevel_rank, int rank, int score, + int nom_ohms, int rodt_ohms, int flag) +{ + const char *msg_buf; + char set_buf[20]; +#if SKIP_SKIPPING + if (flag == WITH_RODT_SKIPPING) return; +#endif + msg_buf = with_rodt_canned_msgs[flag]; + if (nom_ohms < 0) { + snprintf(set_buf, sizeof(set_buf), " RODT %3d ", rodt_ohms); + } else { + snprintf(set_buf, sizeof(set_buf), "NOM %3d RODT %3d", nom_ohms, rodt_ohms); + } + + VB_PRT(VBL_TME, "N%d.LMC%d.R%d: Rlevel %s %s : %5d %5d %5d %5d %5d %5d %5d %5d %5d (%d)\n", + node, ddr_interface_num, rank, + set_buf, msg_buf, + lmc_rlevel_rank.cn83xx.byte8, + lmc_rlevel_rank.cn83xx.byte7, + lmc_rlevel_rank.cn83xx.byte6, + lmc_rlevel_rank.cn83xx.byte5, + lmc_rlevel_rank.cn83xx.byte4, + lmc_rlevel_rank.cn83xx.byte3, + lmc_rlevel_rank.cn83xx.byte2, + lmc_rlevel_rank.cn83xx.byte1, + lmc_rlevel_rank.cn83xx.byte0, + score + ); + + // FIXME: does this help make the output a little easier to focus? + if (flag == WITH_RODT_BESTSCORE) { + VB_PRT(VBL_DEV, "-----------\n"); + } +} + +static void +do_display_WL(bdk_node_t node, int ddr_interface_num, bdk_lmcx_wlevel_rankx_t lmc_wlevel_rank, int rank, int flags) +{ + char *msg_buf; + char hex_buf[20]; + int vbl; + if (flags & WITH_FINAL) { + msg_buf = " FINAL SETTINGS "; + vbl = VBL_NORM; + } else { + snprintf(hex_buf, sizeof(hex_buf), "0x%016lX", lmc_wlevel_rank.u); + msg_buf = hex_buf; + vbl = VBL_FAE; + } + + VB_PRT(vbl, "N%d.LMC%d.R%d: Wlevel Rank %#4x, %s : %5d %5d %5d %5d %5d %5d %5d %5d %5d\n", + node, ddr_interface_num, rank, + lmc_wlevel_rank.s.status, + msg_buf, + lmc_wlevel_rank.s.byte8, + lmc_wlevel_rank.s.byte7, + lmc_wlevel_rank.s.byte6, + lmc_wlevel_rank.s.byte5, + lmc_wlevel_rank.s.byte4, + lmc_wlevel_rank.s.byte3, + lmc_wlevel_rank.s.byte2, + lmc_wlevel_rank.s.byte1, + lmc_wlevel_rank.s.byte0 + ); +} + +static inline void +display_WL(bdk_node_t node, int ddr_interface_num, bdk_lmcx_wlevel_rankx_t lmc_wlevel_rank, int rank) +{ + do_display_WL(node, ddr_interface_num, lmc_wlevel_rank, rank, WITH_NOTHING); +} + +static inline void +display_WL_with_final(bdk_node_t node, int ddr_interface_num, bdk_lmcx_wlevel_rankx_t lmc_wlevel_rank, int rank) +{ + do_display_WL(node, ddr_interface_num, lmc_wlevel_rank, rank, WITH_FINAL); +} + +// pretty-print bitmask adjuster +static uint64_t +PPBM(uint64_t bm) +{ + if (bm != 0ul) { + while ((bm & 0x0fful) == 0ul) + bm >>= 4; + } + return bm; +} + +// xlate PACKED index to UNPACKED index to use with rlevel_byte +#define XPU(i,e) (((i) < 4)?(i):((i)<8)?(i)+(e):4) +// xlate UNPACKED index to PACKED index to use with rlevel_bitmask +#define XUP(i,e) (((i) < 4)?(i):((i)>4)?(i)-(e):8) + +// flag values +#define WITH_WL_BITMASKS 0 +#define WITH_RL_BITMASKS 1 +#define WITH_RL_MASK_SCORES 2 +#define WITH_RL_SEQ_SCORES 3 +static void +do_display_BM(bdk_node_t node, int ddr_interface_num, int rank, void *bm, int flags, int ecc_ena) +{ + int ecc = !!ecc_ena; + if (flags == WITH_WL_BITMASKS) { // wlevel_bitmask array in PACKED index order, so just print them + int *bitmasks = (int *)bm; + + ddr_print("N%d.LMC%d.R%d: Wlevel Debug Results : %05x %05x %05x %05x %05x %05x %05x %05x %05x\n", + node, ddr_interface_num, rank, + bitmasks[8], + bitmasks[7], + bitmasks[6], + bitmasks[5], + bitmasks[4], + bitmasks[3], + bitmasks[2], + bitmasks[1], + bitmasks[0] + ); + } else + if (flags == WITH_RL_BITMASKS) { // rlevel_bitmask array in PACKED index order, so just print them + rlevel_bitmask_t *rlevel_bitmask = (rlevel_bitmask_t *)bm; + ddr_print("N%d.LMC%d.R%d: Rlevel Debug Bitmasks 8:0 : %05lx %05lx %05lx %05lx %05lx %05lx %05lx %05lx %05lx\n", + node, ddr_interface_num, rank, + PPBM(rlevel_bitmask[8].bm), + PPBM(rlevel_bitmask[7].bm), + PPBM(rlevel_bitmask[6].bm), + PPBM(rlevel_bitmask[5].bm), + PPBM(rlevel_bitmask[4].bm), + PPBM(rlevel_bitmask[3].bm), + PPBM(rlevel_bitmask[2].bm), + PPBM(rlevel_bitmask[1].bm), + PPBM(rlevel_bitmask[0].bm) + ); + } else + if (flags == WITH_RL_MASK_SCORES) { // rlevel_bitmask array in PACKED index order, so just print them + rlevel_bitmask_t *rlevel_bitmask = (rlevel_bitmask_t *)bm; + ddr_print("N%d.LMC%d.R%d: Rlevel Debug Bitmask Scores 8:0 : %5d %5d %5d %5d %5d %5d %5d %5d %5d\n", + node, ddr_interface_num, rank, + rlevel_bitmask[8].errs, + rlevel_bitmask[7].errs, + rlevel_bitmask[6].errs, + rlevel_bitmask[5].errs, + rlevel_bitmask[4].errs, + rlevel_bitmask[3].errs, + rlevel_bitmask[2].errs, + rlevel_bitmask[1].errs, + rlevel_bitmask[0].errs + ); + } else + if (flags == WITH_RL_SEQ_SCORES) { // rlevel_byte array in UNPACKED index order, so xlate and print them + rlevel_byte_data_t *rlevel_byte = (rlevel_byte_data_t *)bm; + ddr_print("N%d.LMC%d.R%d: Rlevel Debug Non-seq Scores 8:0 : %5d %5d %5d %5d %5d %5d %5d %5d %5d\n", + node, ddr_interface_num, rank, + rlevel_byte[XPU(8,ecc)].sqerrs, + rlevel_byte[XPU(7,ecc)].sqerrs, + rlevel_byte[XPU(6,ecc)].sqerrs, + rlevel_byte[XPU(5,ecc)].sqerrs, + rlevel_byte[XPU(4,ecc)].sqerrs, + rlevel_byte[XPU(3,ecc)].sqerrs, + rlevel_byte[XPU(2,ecc)].sqerrs, + rlevel_byte[XPU(1,ecc)].sqerrs, + rlevel_byte[XPU(0,ecc)].sqerrs + ); + } +} + +static inline void +display_WL_BM(bdk_node_t node, int ddr_interface_num, int rank, int *bitmasks) +{ + do_display_BM(node, ddr_interface_num, rank, (void *)bitmasks, WITH_WL_BITMASKS, 0); +} + +static inline void +display_RL_BM(bdk_node_t node, int ddr_interface_num, int rank, rlevel_bitmask_t *bitmasks, int ecc_ena) +{ + do_display_BM(node, ddr_interface_num, rank, (void *)bitmasks, WITH_RL_BITMASKS, ecc_ena); +} + +static inline void +display_RL_BM_scores(bdk_node_t node, int ddr_interface_num, int rank, rlevel_bitmask_t *bitmasks, int ecc_ena) +{ + do_display_BM(node, ddr_interface_num, rank, (void *)bitmasks, WITH_RL_MASK_SCORES, ecc_ena); +} + +static inline void +display_RL_SEQ_scores(bdk_node_t node, int ddr_interface_num, int rank, rlevel_byte_data_t *bytes, int ecc_ena) +{ + do_display_BM(node, ddr_interface_num, rank, (void *)bytes, WITH_RL_SEQ_SCORES, ecc_ena); +} + +unsigned short load_dll_offset(bdk_node_t node, int ddr_interface_num, + int dll_offset_mode, int byte_offset, int byte) +{ + bdk_lmcx_dll_ctl3_t ddr_dll_ctl3; + /* byte_sel: + 0x1 = byte 0, ..., 0x9 = byte 8 + 0xA = all bytes */ + int byte_sel = (byte == 10) ? byte : byte + 1; + + ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num)); + SET_DDR_DLL_CTL3(load_offset, 0); + DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num), ddr_dll_ctl3.u); + ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num)); + + SET_DDR_DLL_CTL3(mode_sel, dll_offset_mode); + SET_DDR_DLL_CTL3(offset, (_abs(byte_offset)&0x3f) | (_sign(byte_offset) << 6)); /* Always 6-bit field? */ + SET_DDR_DLL_CTL3(byte_sel, byte_sel); + DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num), ddr_dll_ctl3.u); + ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num)); + + SET_DDR_DLL_CTL3(load_offset, 1); + DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num), ddr_dll_ctl3.u); + ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num)); + + return ((unsigned short) GET_DDR_DLL_CTL3(offset)); +} + +void change_dll_offset_enable(bdk_node_t node, int ddr_interface_num, int change) +{ + bdk_lmcx_dll_ctl3_t ddr_dll_ctl3; + + ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num)); + SET_DDR_DLL_CTL3(offset_ena, !!change); + DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num), ddr_dll_ctl3.u); + ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num)); +} + +static void process_custom_dll_offsets(bdk_node_t node, int ddr_interface_num, const char *enable_str, + const int8_t *offsets, const char *byte_str, int mode) +{ + const char *s; + int enabled; + int provided; + + if ((s = lookup_env_parameter("%s", enable_str)) != NULL) { + enabled = !!strtol(s, NULL, 0); + } else + enabled = -1; + + // enabled == -1: no override, do only configured offsets if provided + // enabled == 0: override OFF, do NOT do it even if configured offsets provided + // enabled == 1: override ON, do it for overrides plus configured offsets + + if (enabled == 0) + return; + + provided = is_dll_offset_provided(offsets); + + if (enabled < 0 && !provided) + return; + + int byte_offset; + unsigned short offset[9] = {0}; + int byte; + + // offsets need to be disabled while loading + change_dll_offset_enable(node, ddr_interface_num, 0); + + for (byte = 0; byte < 9; ++byte) { + + // always take the provided, if available + byte_offset = (provided) ? offsets[byte] : 0; + + // then, if enabled, use any overrides present + if (enabled > 0) { + if ((s = lookup_env_parameter(byte_str, ddr_interface_num, byte)) != NULL) { + byte_offset = strtol(s, NULL, 0); + } + } + + offset[byte] = load_dll_offset(node, ddr_interface_num, mode, byte_offset, byte); + } + + // re-enable offsets after loading + change_dll_offset_enable(node, ddr_interface_num, 1); + + ddr_print("N%d.LMC%d: DLL %s Offset 8:0 :" + " 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x\n", + node, ddr_interface_num, (mode == 2) ? "Read " : "Write", + offset[8], offset[7], offset[6], offset[5], offset[4], + offset[3], offset[2], offset[1], offset[0]); +} + +void perform_octeon3_ddr3_sequence(bdk_node_t node, int rank_mask, int ddr_interface_num, int sequence) +{ + /* + * 3. Without changing any other fields in LMC(0)_CONFIG, write + * LMC(0)_CONFIG[RANKMASK] then write both + * LMC(0)_SEQ_CTL[SEQ_SEL,INIT_START] = 1 with a single CSR write + * operation. LMC(0)_CONFIG[RANKMASK] bits should be set to indicate + * the ranks that will participate in the sequence. + * + * The LMC(0)_SEQ_CTL[SEQ_SEL] value should select power-up/init or + * selfrefresh exit, depending on whether the DRAM parts are in + * self-refresh and whether their contents should be preserved. While + * LMC performs these sequences, it will not perform any other DDR3 + * transactions. When the sequence is complete, hardware sets the + * LMC(0)_CONFIG[INIT_STATUS] bits for the ranks that have been + * initialized. + * + * If power-up/init is selected immediately following a DRESET + * assertion, LMC executes the sequence described in the "Reset and + * Initialization Procedure" section of the JEDEC DDR3 + * specification. This includes activating CKE, writing all four DDR3 + * mode registers on all selected ranks, and issuing the required ZQCL + * command. The LMC(0)_CONFIG[RANKMASK] value should select all ranks + * with attached DRAM in this case. If LMC(0)_CONTROL[RDIMM_ENA] = 1, + * LMC writes the JEDEC standard SSTE32882 control words selected by + * LMC(0)_DIMM_CTL[DIMM*_WMASK] between DDR_CKE* signal assertion and + * the first DDR3 mode register write operation. + * LMC(0)_DIMM_CTL[DIMM*_WMASK] should be cleared to 0 if the + * corresponding DIMM is not present. + * + * If self-refresh exit is selected, LMC executes the required SRX + * command followed by a refresh and ZQ calibration. Section 4.5 + * describes behavior of a REF + ZQCS. LMC does not write the DDR3 + * mode registers as part of this sequence, and the mode register + * parameters must match at self-refresh entry and exit times. + * + * 4. Read LMC(0)_SEQ_CTL and wait for LMC(0)_SEQ_CTL[SEQ_COMPLETE] to be + * set. + * + * 5. Read LMC(0)_CONFIG[INIT_STATUS] and confirm that all ranks have + * been initialized. + */ + + const char *s; + static const char *sequence_str[] = { + "Power-up/init", + "Read-leveling", + "Self-refresh entry", + "Self-refresh exit", + "Illegal", + "Illegal", + "Write-leveling", + "Init Register Control Words", + "Mode Register Write", + "MPR Register Access", + "LMC Deskew/Internal Vref training", + "Offset Training" + }; + + bdk_lmcx_seq_ctl_t seq_ctl; + bdk_lmcx_config_t lmc_config; + + lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(ddr_interface_num)); + lmc_config.s.rankmask = rank_mask; + DRAM_CSR_WRITE(node, BDK_LMCX_CONFIG(ddr_interface_num), lmc_config.u); + + seq_ctl.u = 0; + + seq_ctl.s.init_start = 1; + seq_ctl.s.seq_sel = sequence; + + VB_PRT(VBL_SEQ, "N%d.LMC%d: Performing LMC sequence=%x: rank_mask=0x%02x, %s\n", + node, ddr_interface_num, sequence, rank_mask, sequence_str[sequence]); + + if ((s = lookup_env_parameter("ddr_trigger_sequence%d", sequence)) != NULL) { + int trigger = strtoul(s, NULL, 0); + if (trigger) + pulse_gpio_pin(node, 1, 2); + } + + DRAM_CSR_WRITE(node, BDK_LMCX_SEQ_CTL(ddr_interface_num), seq_ctl.u); + BDK_CSR_READ(node, BDK_LMCX_SEQ_CTL(ddr_interface_num)); + + /* Wait 100us minimum before checking for sequence complete */ + bdk_wait_usec(100); + if (!bdk_is_platform(BDK_PLATFORM_ASIM) && + BDK_CSR_WAIT_FOR_FIELD(node, BDK_LMCX_SEQ_CTL(ddr_interface_num), seq_complete, ==, 1, 1000000)) + { + error_print("N%d.LMC%d: Timeout waiting for LMC sequence=%x, rank_mask=0x%02x, ignoring...\n", + node, ddr_interface_num, sequence, rank_mask); + } + else { + VB_PRT(VBL_SEQ, "N%d.LMC%d: LMC sequence=%x: Completed.\n", node, ddr_interface_num, sequence); + } +} + +void ddr4_mrw(bdk_node_t node, int ddr_interface_num, int rank, + int mr_wr_addr, int mr_wr_sel, int mr_wr_bg1) +{ + bdk_lmcx_mr_mpr_ctl_t lmc_mr_mpr_ctl; + + lmc_mr_mpr_ctl.u = 0; + lmc_mr_mpr_ctl.s.mr_wr_addr = (mr_wr_addr == -1) ? 0 : mr_wr_addr; + lmc_mr_mpr_ctl.s.mr_wr_sel = mr_wr_sel; + lmc_mr_mpr_ctl.s.mr_wr_rank = rank; + //lmc_mr_mpr_ctl.s.mr_wr_pda_mask = + //lmc_mr_mpr_ctl.s.mr_wr_pda_enable = + //lmc_mr_mpr_ctl.s.mpr_loc = + //lmc_mr_mpr_ctl.s.mpr_wr = + //lmc_mr_mpr_ctl.s.mpr_bit_select = + //lmc_mr_mpr_ctl.s.mpr_byte_select = + //lmc_mr_mpr_ctl.s.mpr_whole_byte_enable = + lmc_mr_mpr_ctl.s.mr_wr_use_default_value = (mr_wr_addr == -1) ? 1 : 0; + lmc_mr_mpr_ctl.s.mr_wr_bg1 = mr_wr_bg1; + DRAM_CSR_WRITE(node, BDK_LMCX_MR_MPR_CTL(ddr_interface_num), lmc_mr_mpr_ctl.u); + + /* Mode Register Write */ + perform_octeon3_ddr3_sequence(node, 1 << rank, ddr_interface_num, 0x8); +} + +#define InvA0_17(x) (x ^ 0x22bf8) +static void set_mpr_mode (bdk_node_t node, int rank_mask, + int ddr_interface_num, int dimm_count, int mpr, int bg1) +{ + int rankx; + + ddr_print("All Ranks: Set mpr mode = %x %c-side\n", + mpr, (bg1==0) ? 'A' : 'B'); + + for (rankx = 0; rankx < dimm_count*4; rankx++) { + if (!(rank_mask & (1 << rankx))) + continue; + if (bg1 == 0) + ddr4_mrw(node, ddr_interface_num, rankx, mpr<<2, 3, bg1); /* MR3 A-side */ + else + ddr4_mrw(node, ddr_interface_num, rankx, InvA0_17(mpr<<2), ~3, bg1); /* MR3 B-side */ + } +} + +#if ENABLE_DISPLAY_MPR_PAGE +static void do_ddr4_mpr_read(bdk_node_t node, int ddr_interface_num, int rank, + int page, int location) +{ + bdk_lmcx_mr_mpr_ctl_t lmc_mr_mpr_ctl; + + lmc_mr_mpr_ctl.u = BDK_CSR_READ(node, BDK_LMCX_MR_MPR_CTL(ddr_interface_num)); + + lmc_mr_mpr_ctl.s.mr_wr_addr = 0; + lmc_mr_mpr_ctl.s.mr_wr_sel = page; /* Page */ + lmc_mr_mpr_ctl.s.mr_wr_rank = rank; + //lmc_mr_mpr_ctl.s.mr_wr_pda_mask = + //lmc_mr_mpr_ctl.s.mr_wr_pda_enable = + lmc_mr_mpr_ctl.s.mpr_loc = location; + lmc_mr_mpr_ctl.s.mpr_wr = 0; /* Read=0, Write=1 */ + //lmc_mr_mpr_ctl.s.mpr_bit_select = + //lmc_mr_mpr_ctl.s.mpr_byte_select = + //lmc_mr_mpr_ctl.s.mpr_whole_byte_enable = + //lmc_mr_mpr_ctl.s.mr_wr_use_default_value = + //lmc_mr_mpr_ctl.s.mr_wr_bg1 = + + DRAM_CSR_WRITE(node, BDK_LMCX_MR_MPR_CTL(ddr_interface_num), lmc_mr_mpr_ctl.u); + + /* MPR register access sequence */ + perform_octeon3_ddr3_sequence(node, 1 << rank, ddr_interface_num, 0x9); + + debug_print("LMC_MR_MPR_CTL : 0x%016lx\n", lmc_mr_mpr_ctl.u); + debug_print("lmc_mr_mpr_ctl.s.mr_wr_addr: 0x%02x\n", lmc_mr_mpr_ctl.s.mr_wr_addr); + debug_print("lmc_mr_mpr_ctl.s.mr_wr_sel : 0x%02x\n", lmc_mr_mpr_ctl.s.mr_wr_sel); + debug_print("lmc_mr_mpr_ctl.s.mpr_loc : 0x%02x\n", lmc_mr_mpr_ctl.s.mpr_loc); + debug_print("lmc_mr_mpr_ctl.s.mpr_wr : 0x%02x\n", lmc_mr_mpr_ctl.s.mpr_wr); + +} +#endif + +int set_rdimm_mode(bdk_node_t node, int ddr_interface_num, int enable) +{ + bdk_lmcx_control_t lmc_control; + int save_rdimm_mode; + + lmc_control.u = BDK_CSR_READ(node, BDK_LMCX_CONTROL(ddr_interface_num)); + save_rdimm_mode = lmc_control.s.rdimm_ena; + lmc_control.s.rdimm_ena = enable; + VB_PRT(VBL_FAE, "Setting RDIMM_ENA = %x\n", enable); + DRAM_CSR_WRITE(node, BDK_LMCX_CONTROL(ddr_interface_num), lmc_control.u); + + return (save_rdimm_mode); +} + +#if ENABLE_DISPLAY_MPR_PAGE +static void ddr4_mpr_read(bdk_node_t node, int ddr_interface_num, int rank, + int page, int location, uint64_t *mpr_data) +{ + do_ddr4_mpr_read(node, ddr_interface_num, rank, page, location); + + mpr_data[0] = BDK_CSR_READ(node, BDK_LMCX_MPR_DATA0(ddr_interface_num)); + mpr_data[1] = BDK_CSR_READ(node, BDK_LMCX_MPR_DATA1(ddr_interface_num)); + mpr_data[2] = BDK_CSR_READ(node, BDK_LMCX_MPR_DATA2(ddr_interface_num)); + + debug_print("MPR Read %016lx.%016lx.%016lx\n", mpr_data[2], mpr_data[1], mpr_data[0]); +} + +/* Display MPR values for Page Location */ +static void Display_MPR_Page_Location(bdk_node_t node, int rank, + int ddr_interface_num, int dimm_count, + int page, int location, uint64_t *mpr_data) +{ + ddr4_mpr_read(node, ddr_interface_num, rank, page, location, mpr_data); + ddr_print("MPR Page %d, Loc %d %016lx.%016lx.%016lx\n", + page, location, mpr_data[2], mpr_data[1], mpr_data[0]); +} + +/* Display MPR values for Page */ +static void Display_MPR_Page(bdk_node_t node, int rank_mask, + int ddr_interface_num, int dimm_count, int page) +{ + int rankx; + uint64_t mpr_data[3]; + + for (rankx = 0; rankx < dimm_count * 4;rankx++) { + if (!(rank_mask & (1 << rankx))) + continue; + + ddr_print("Rank %d: MPR values for Page %d\n", rankx, page); + for (int location = 0; location < 4; location++) { + Display_MPR_Page_Location(node, rankx, ddr_interface_num, dimm_count, + page, location, &mpr_data[0]); + } + + } /* for (rankx = 0; rankx < dimm_count * 4; rankx++) */ +} +#endif + +void ddr4_mpr_write(bdk_node_t node, int ddr_interface_num, int rank, + int page, int location, uint8_t mpr_data) +{ + bdk_lmcx_mr_mpr_ctl_t lmc_mr_mpr_ctl; + + lmc_mr_mpr_ctl.u = 0; + lmc_mr_mpr_ctl.s.mr_wr_addr = mpr_data; + lmc_mr_mpr_ctl.s.mr_wr_sel = page; /* Page */ + lmc_mr_mpr_ctl.s.mr_wr_rank = rank; + //lmc_mr_mpr_ctl.s.mr_wr_pda_mask = + //lmc_mr_mpr_ctl.s.mr_wr_pda_enable = + lmc_mr_mpr_ctl.s.mpr_loc = location; + lmc_mr_mpr_ctl.s.mpr_wr = 1; /* Read=0, Write=1 */ + //lmc_mr_mpr_ctl.s.mpr_bit_select = + //lmc_mr_mpr_ctl.s.mpr_byte_select = + //lmc_mr_mpr_ctl.s.mpr_whole_byte_enable = + //lmc_mr_mpr_ctl.s.mr_wr_use_default_value = + //lmc_mr_mpr_ctl.s.mr_wr_bg1 = + DRAM_CSR_WRITE(node, BDK_LMCX_MR_MPR_CTL(ddr_interface_num), lmc_mr_mpr_ctl.u); + + /* MPR register access sequence */ + perform_octeon3_ddr3_sequence(node, (1 << rank), ddr_interface_num, 0x9); + + debug_print("LMC_MR_MPR_CTL : 0x%016lx\n", lmc_mr_mpr_ctl.u); + debug_print("lmc_mr_mpr_ctl.s.mr_wr_addr: 0x%02x\n", lmc_mr_mpr_ctl.s.mr_wr_addr); + debug_print("lmc_mr_mpr_ctl.s.mr_wr_sel : 0x%02x\n", lmc_mr_mpr_ctl.s.mr_wr_sel); + debug_print("lmc_mr_mpr_ctl.s.mpr_loc : 0x%02x\n", lmc_mr_mpr_ctl.s.mpr_loc); + debug_print("lmc_mr_mpr_ctl.s.mpr_wr : 0x%02x\n", lmc_mr_mpr_ctl.s.mpr_wr); +} + +void set_vref(bdk_node_t node, int ddr_interface_num, int rank, + int range, int value) +{ + bdk_lmcx_mr_mpr_ctl_t lmc_mr_mpr_ctl; + bdk_lmcx_modereg_params3_t lmc_modereg_params3; + int mr_wr_addr = 0; + + lmc_mr_mpr_ctl.u = 0; + lmc_modereg_params3.u = BDK_CSR_READ(node, BDK_LMCX_MODEREG_PARAMS3(ddr_interface_num)); + + mr_wr_addr |= lmc_modereg_params3.s.tccd_l<<10; /* A12:A10 tCCD_L */ + mr_wr_addr |= 1<<7; /* A7 1 = Enable(Training Mode) */ + mr_wr_addr |= range<<6; /* A6 VrefDQ Training Range */ + mr_wr_addr |= value<<0; /* A5:A0 VrefDQ Training Value */ + + lmc_mr_mpr_ctl.s.mr_wr_addr = mr_wr_addr; + lmc_mr_mpr_ctl.s.mr_wr_sel = 6; /* Write MR6 */ + lmc_mr_mpr_ctl.s.mr_wr_rank = rank; + //lmc_mr_mpr_ctl.s.mr_wr_pda_mask = + //lmc_mr_mpr_ctl.s.mr_wr_pda_enable = + //lmc_mr_mpr_ctl.s.mpr_loc = location; + //lmc_mr_mpr_ctl.s.mpr_wr = 0; /* Read=0, Write=1 */ + //lmc_mr_mpr_ctl.s.mpr_bit_select = + //lmc_mr_mpr_ctl.s.mpr_byte_select = + //lmc_mr_mpr_ctl.s.mpr_whole_byte_enable = + //lmc_mr_mpr_ctl.s.mr_wr_use_default_value = + //lmc_mr_mpr_ctl.s.mr_wr_bg1 = + DRAM_CSR_WRITE(node, BDK_LMCX_MR_MPR_CTL(ddr_interface_num), lmc_mr_mpr_ctl.u); + + /* 0x8 = Mode Register Write */ + perform_octeon3_ddr3_sequence(node, 1<<rank, ddr_interface_num, 0x8); + + /* It is vendor specific whether Vref_value is captured with A7=1. + A subsequent MRS might be necessary. */ + perform_octeon3_ddr3_sequence(node, 1<<rank, ddr_interface_num, 0x8); + + mr_wr_addr &= ~(1<<7); /* A7 0 = Disable(Training Mode) */ + lmc_mr_mpr_ctl.s.mr_wr_addr = mr_wr_addr; + DRAM_CSR_WRITE(node, BDK_LMCX_MR_MPR_CTL(ddr_interface_num), lmc_mr_mpr_ctl.u); +} + +static void set_DRAM_output_inversion (bdk_node_t node, + int ddr_interface_num, + int dimm_count, + int rank_mask, + int inversion) +{ + bdk_lmcx_ddr4_dimm_ctl_t lmc_ddr4_dimm_ctl; + bdk_lmcx_dimmx_params_t lmc_dimmx_params; + bdk_lmcx_dimm_ctl_t lmc_dimm_ctl; + int dimm_no; + + lmc_ddr4_dimm_ctl.u = 0; /* Don't touch extended register control words */ + DRAM_CSR_WRITE(node, BDK_LMCX_DDR4_DIMM_CTL(ddr_interface_num), lmc_ddr4_dimm_ctl.u); + + ddr_print("All DIMMs: Register Control Word RC0 : %x\n", (inversion & 1)); + + for (dimm_no = 0; dimm_no < dimm_count; ++dimm_no) { + lmc_dimmx_params.u = BDK_CSR_READ(node, BDK_LMCX_DIMMX_PARAMS(ddr_interface_num, dimm_no)); + lmc_dimmx_params.s.rc0 = (lmc_dimmx_params.s.rc0 & ~1) | (inversion & 1); + DRAM_CSR_WRITE(node, BDK_LMCX_DIMMX_PARAMS(ddr_interface_num, dimm_no), lmc_dimmx_params.u); + } + + /* LMC0_DIMM_CTL */ + lmc_dimm_ctl.u = BDK_CSR_READ(node, BDK_LMCX_DIMM_CTL(ddr_interface_num)); + lmc_dimm_ctl.s.dimm0_wmask = 0x1; + lmc_dimm_ctl.s.dimm1_wmask = (dimm_count > 1) ? 0x0001 : 0x0000; + + ddr_print("LMC DIMM_CTL : 0x%016lx\n", + lmc_dimm_ctl.u); + DRAM_CSR_WRITE(node, BDK_LMCX_DIMM_CTL(ddr_interface_num), lmc_dimm_ctl.u); + + perform_octeon3_ddr3_sequence(node, rank_mask, ddr_interface_num, 0x7 ); /* Init RCW */ +} + +static void write_mpr_page0_pattern (bdk_node_t node, int rank_mask, + int ddr_interface_num, int dimm_count, int pattern, int location_mask) +{ + int rankx; + int location; + + for (rankx = 0; rankx < dimm_count*4; rankx++) { + if (!(rank_mask & (1 << rankx))) + continue; + for (location = 0; location < 4; ++location) { + if (!(location_mask & (1 << location))) + continue; + + ddr4_mpr_write(node, ddr_interface_num, rankx, + /* page */ 0, /* location */ location, pattern); + } + } +} + +static void change_rdimm_mpr_pattern (bdk_node_t node, int rank_mask, + int ddr_interface_num, int dimm_count) +{ + int save_ref_zqcs_int; + bdk_lmcx_config_t lmc_config; + + /* + Okay, here is the latest sequence. This should work for all + chips and passes (78,88,73,etc). This sequence should be run + immediately after DRAM INIT. The basic idea is to write the + same pattern into each of the 4 MPR locations in the DRAM, so + that the same value is returned when doing MPR reads regardless + of the inversion state. My advice is to put this into a + function, change_rdimm_mpr_pattern or something like that, so + that it can be called multiple times, as I think David wants a + clock-like pattern for OFFSET training, but does not want a + clock pattern for Bit-Deskew. You should then be able to call + this at any point in the init sequence (after DRAM init) to + change the pattern to a new value. + Mike + + A correction: PHY doesn't need any pattern during offset + training, but needs clock like pattern for internal vref and + bit-dskew training. So for that reason, these steps below have + to be conducted before those trainings to pre-condition + the pattern. David + + Note: Step 3, 4, 8 and 9 have to be done through RDIMM + sequence. If you issue MRW sequence to do RCW write (in o78 pass + 1 at least), LMC will still do two commands because + CONTROL[RDIMM_ENA] is still set high. We don't want it to have + any unintentional mode register write so it's best to do what + Mike is doing here. + Andrew + */ + + + /* 1) Disable refresh (REF_ZQCS_INT = 0) */ + + debug_print("1) Disable refresh (REF_ZQCS_INT = 0)\n"); + + lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(ddr_interface_num)); + save_ref_zqcs_int = lmc_config.s.ref_zqcs_int; + lmc_config.s.ref_zqcs_int = 0; + DRAM_CSR_WRITE(node, BDK_LMCX_CONFIG(ddr_interface_num), lmc_config.u); + + + /* 2) Put all devices in MPR mode (Run MRW sequence (sequence=8) + with MODEREG_PARAMS0[MPRLOC]=0, + MODEREG_PARAMS0[MPR]=1, MR_MPR_CTL[MR_WR_SEL]=3, and + MR_MPR_CTL[MR_WR_USE_DEFAULT_VALUE]=1) */ + + debug_print("2) Put all devices in MPR mode (Run MRW sequence (sequence=8)\n"); + + set_mpr_mode(node, rank_mask, ddr_interface_num, dimm_count, /* mpr */ 1, /* bg1 */ 0); /* A-side */ + set_mpr_mode(node, rank_mask, ddr_interface_num, dimm_count, /* mpr */ 1, /* bg1 */ 1); /* B-side */ + + /* a. Or you can set MR_MPR_CTL[MR_WR_USE_DEFAULT_VALUE]=0 and set + the value you would like directly into + MR_MPR_CTL[MR_WR_ADDR] */ + + /* 3) Disable RCD Parity (if previously enabled) - parity does not + work if inversion disabled */ + + debug_print("3) Disable RCD Parity\n"); + + /* 4) Disable Inversion in the RCD. */ + /* a. I did (3&4) via the RDIMM sequence (seq_sel=7), but it + may be easier to use the MRW sequence (seq_sel=8). Just set + MR_MPR_CTL[MR_WR_SEL]=7, MR_MPR_CTL[MR_WR_ADDR][3:0]=data, + MR_MPR_CTL[MR_WR_ADDR][7:4]=RCD reg */ + + debug_print("4) Disable Inversion in the RCD.\n"); + + set_DRAM_output_inversion(node, ddr_interface_num, dimm_count, rank_mask, + 1 /* 1=disable output inversion*/); + + /* 5) Disable CONTROL[RDIMM_ENA] so that MR sequence goes out + non-inverted. */ + + debug_print("5) Disable CONTROL[RDIMM_ENA]\n"); + + set_rdimm_mode(node, ddr_interface_num, 0); + + /* 6) Write all 4 MPR registers with the desired pattern (have to + do this for all enabled ranks) */ + /* a. MR_MPR_CTL.MPR_WR=1, MR_MPR_CTL.MPR_LOC=0..3, + MR_MPR_CTL.MR_WR_SEL=0, MR_MPR_CTL.MR_WR_ADDR[7:0]=pattern */ + + debug_print("6) Write all 4 MPR page 0 Training Patterns\n"); + + write_mpr_page0_pattern(node, rank_mask, + ddr_interface_num, dimm_count, 0x55, 0x8); + + /* 7) Re-enable RDIMM_ENA */ + + debug_print("7) Re-enable RDIMM_ENA\n"); + + set_rdimm_mode(node, ddr_interface_num, 1); + + /* 8) Re-enable RDIMM inversion */ + + debug_print("8) Re-enable RDIMM inversion\n"); + + set_DRAM_output_inversion(node, ddr_interface_num, dimm_count, rank_mask, + 0 /* 0=re-enable output inversion*/); + + /* 9) Re-enable RDIMM parity (if desired) */ + + debug_print("9) Re-enable RDIMM parity (if desired)\n"); + + /* 10)Take B-side devices out of MPR mode (Run MRW sequence + (sequence=8) with MODEREG_PARAMS0[MPRLOC]=0, + MODEREG_PARAMS0[MPR]=0, MR_MPR_CTL[MR_WR_SEL]=3, and + MR_MPR_CTL[MR_WR_USE_DEFAULT_VALUE]=1) */ + + debug_print("10)Take B-side devices out of MPR mode\n"); + + set_mpr_mode(node, rank_mask, ddr_interface_num, dimm_count, /* mpr */ 0, /* bg1 */ 1); + + /* a. Or you can set MR_MPR_CTL[MR_WR_USE_DEFAULT_VALUE]=0 and + set the value you would like directly into + MR_MPR_CTL[MR_WR_ADDR] */ + + /* 11)Re-enable refresh (REF_ZQCS_INT=previous value) */ + + debug_print("11)Re-enable refresh (REF_ZQCS_INT=previous value)\n"); + + lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(ddr_interface_num)); + lmc_config.s.ref_zqcs_int = save_ref_zqcs_int; + DRAM_CSR_WRITE(node, BDK_LMCX_CONFIG(ddr_interface_num), lmc_config.u); + +} + +static unsigned char ddr4_rodt_ohms [RODT_OHMS_COUNT ] = { 0, 40, 60, 80, 120, 240, 34, 48 }; +static unsigned char ddr4_rtt_nom_ohms [RTT_NOM_OHMS_COUNT ] = { 0, 60, 120, 40, 240, 48, 80, 34 }; +static unsigned char ddr4_rtt_nom_table [RTT_NOM_TABLE_COUNT ] = { 0, 4, 2, 6, 1, 5, 3, 7 }; +static unsigned char ddr4_rtt_wr_ohms [RTT_WR_OHMS_COUNT ] = { 0, 120, 240, 99, 80 }; // setting HiZ ohms to 99 for computed vref +static unsigned char ddr4_dic_ohms [DIC_OHMS_COUNT ] = { 34, 48 }; +static short ddr4_drive_strength[DRIVE_STRENGTH_COUNT] = { 0, 0, 26, 30, 34, 40, 48, 68, 0,0,0,0,0,0,0 }; +static short ddr4_dqx_strength [DRIVE_STRENGTH_COUNT] = { 0, 24, 27, 30, 34, 40, 48, 60, 0,0,0,0,0,0,0 }; + +impedence_values_t ddr4_impedence_values = { + .rodt_ohms = ddr4_rodt_ohms , + .rtt_nom_ohms = ddr4_rtt_nom_ohms , + .rtt_nom_table = ddr4_rtt_nom_table , + .rtt_wr_ohms = ddr4_rtt_wr_ohms , + .dic_ohms = ddr4_dic_ohms , + .drive_strength = ddr4_drive_strength, + .dqx_strength = ddr4_dqx_strength , +}; + +static unsigned char ddr3_rodt_ohms [RODT_OHMS_COUNT ] = { 0, 20, 30, 40, 60, 120, 0, 0 }; +static unsigned char ddr3_rtt_nom_ohms [RTT_NOM_OHMS_COUNT ] = { 0, 60, 120, 40, 20, 30, 0, 0 }; +static unsigned char ddr3_rtt_nom_table [RTT_NOM_TABLE_COUNT ] = { 0, 2, 1, 3, 5, 4, 0, 0 }; +static unsigned char ddr3_rtt_wr_ohms [RTT_WR_OHMS_COUNT ] = { 0, 60, 120 }; +static unsigned char ddr3_dic_ohms [DIC_OHMS_COUNT ] = { 40, 34 }; +static short ddr3_drive_strength[DRIVE_STRENGTH_COUNT] = { 0, 24, 27, 30, 34, 40, 48, 60, 0,0,0,0,0,0,0 }; +static impedence_values_t ddr3_impedence_values = { + .rodt_ohms = ddr3_rodt_ohms , + .rtt_nom_ohms = ddr3_rtt_nom_ohms , + .rtt_nom_table = ddr3_rtt_nom_table , + .rtt_wr_ohms = ddr3_rtt_wr_ohms , + .dic_ohms = ddr3_dic_ohms , + .drive_strength = ddr3_drive_strength, + .dqx_strength = ddr3_drive_strength, +}; + + +uint64_t +hertz_to_psecs(uint64_t hertz) +{ + return divide_nint((uint64_t) 1000*1000*1000*1000, hertz); /* Clock in psecs */ +} + +#define DIVIDEND_SCALE 1000 /* Scale to avoid rounding error. */ +uint64_t +psecs_to_mts(uint64_t psecs) +{ + //ddr_print("psecs %ld, divisor %ld\n", psecs, divide_nint((uint64_t)(2 * 1000000 * DIVIDEND_SCALE), psecs)); + return divide_nint(divide_nint((uint64_t)(2 * 1000000 * DIVIDEND_SCALE), psecs), DIVIDEND_SCALE); +} + +#define WITHIN(v,b,m) (((v)>=((b)-(m)))&&((v)<=((b)+(m)))) + +// pretty-print version, only works with what comes from the SPD: tCKmin or tCKAVGmin +unsigned long +pretty_psecs_to_mts(uint64_t psecs) +{ + uint64_t ret = 0; // default to error + if (WITHIN(psecs, 1250, 1)) + ret = 1600; + else if (WITHIN(psecs, 1071, 1)) + ret = 1866; + else if (WITHIN(psecs, 937, 1)) + ret = 2133; + else if (WITHIN(psecs, 833, 1)) + ret = 2400; + else if (WITHIN(psecs, 750, 1)) + ret = 2666; + return ret; +} + +uint64_t +mts_to_hertz(uint64_t mts) +{ + return ((mts * 1000 * 1000) / 2); +} + +#define DEBUG_RC3X_COMPUTE 0 +#define rc3x_print(...) \ + do { if (DEBUG_RC3X_COMPUTE) printf(__VA_ARGS__); } while (0) + +static int compute_rc3x (int64_t tclk_psecs) +{ + long speed; + long tclk_psecs_min, tclk_psecs_max; + long data_rate_mhz, data_rate_mhz_min, data_rate_mhz_max; + int rc3x; + +#define ENCODING_BASE 1240 + + data_rate_mhz = psecs_to_mts(tclk_psecs); + + /* 2400 MT/s is a special case. Using integer arithmetic it rounds + from 833 psecs to 2401 MT/s. Force it to 2400 to pick the + proper setting from the table. */ + if (tclk_psecs == 833) + data_rate_mhz = 2400; + + for (speed = ENCODING_BASE; speed < 3200; speed += 20) { + int error = 0; + + tclk_psecs_min = hertz_to_psecs(mts_to_hertz(speed + 00)); /* Clock in psecs */ + tclk_psecs_max = hertz_to_psecs(mts_to_hertz(speed + 18)); /* Clock in psecs */ + + data_rate_mhz_min = psecs_to_mts(tclk_psecs_min); + data_rate_mhz_max = psecs_to_mts(tclk_psecs_max); + + /* Force alingment to multiple to avound rounding errors. */ + data_rate_mhz_min = ((data_rate_mhz_min + 18) / 20) * 20; + data_rate_mhz_max = ((data_rate_mhz_max + 18) / 20) * 20; + + error += (speed + 00 != data_rate_mhz_min); + error += (speed + 20 != data_rate_mhz_max); + + rc3x = (speed - ENCODING_BASE) / 20; + + rc3x_print("rc3x: %02x speed: %4ld MT/s < f <= %4ld MT/s, psec: %3ld:%3ld %4ld:%4ld %s\n", + rc3x, + speed, speed + 20, + tclk_psecs_min, tclk_psecs_max, + data_rate_mhz_min, data_rate_mhz_max, + error ? "****" : ""); + + if (data_rate_mhz <= (speed + 20)) { + rc3x_print("rc3x: %4ld MT/s <= %4ld MT/s\n", data_rate_mhz, speed + 20); + break; + } + } + return rc3x; +} + +static const int rlevel_separate_ab = 1; + +int init_octeon3_ddr3_interface(bdk_node_t node, + const ddr_configuration_t *ddr_configuration, + uint32_t ddr_hertz, + uint32_t cpu_hertz, + uint32_t ddr_ref_hertz, + int board_type, + int board_rev_maj, + int board_rev_min, + int ddr_interface_num, + uint32_t ddr_interface_mask + ) +{ + const char *s; + + const dimm_odt_config_t *odt_1rank_config = ddr_configuration->odt_1rank_config; + const dimm_odt_config_t *odt_2rank_config = ddr_configuration->odt_2rank_config; + const dimm_odt_config_t *odt_4rank_config = ddr_configuration->odt_4rank_config; + const dimm_config_t *dimm_config_table = ddr_configuration->dimm_config_table; + const dimm_odt_config_t *odt_config; + const ddr3_custom_config_t *custom_lmc_config = &ddr_configuration->custom_lmc_config; + int odt_idx; + + /* + ** Compute clock rates to the nearest picosecond. + */ + uint64_t tclk_psecs = hertz_to_psecs(ddr_hertz); /* Clock in psecs */ + uint64_t eclk_psecs = hertz_to_psecs(cpu_hertz); /* Clock in psecs */ + + int row_bits, col_bits, num_banks, num_ranks, dram_width; + int dimm_count = 0; + int fatal_error = 0; /* Accumulate and report all the errors before giving up */ + + int safe_ddr_flag = 0; /* Flag that indicates safe DDR settings should be used */ + int ddr_interface_64b = 1; /* THUNDER Default: 64bit interface width */ + int ddr_interface_bytemask; + uint32_t mem_size_mbytes = 0; + unsigned int didx; + int bank_bits = 0; + int bunk_enable; + int rank_mask; + int column_bits_start = 1; + int row_lsb; + int pbank_lsb; + int use_ecc = 1; + int mtb_psec = 0; /* quiet */ + short ftb_Dividend; + short ftb_Divisor; + int tAAmin; + int tCKmin; + int CL, min_cas_latency = 0, max_cas_latency = 0, override_cas_latency = 0; + int ddr_rtt_nom_auto, ddr_rodt_ctl_auto; + int i; + + int spd_addr; + int spd_org; + int spd_banks; + int spd_rdimm; + int spd_dimm_type; + int spd_ecc; + uint32_t spd_cas_latency; + int spd_mtb_dividend; + int spd_mtb_divisor; + int spd_tck_min; + int spd_taa_min; + int spd_twr; + int spd_trcd; + int spd_trrd; + int spd_trp; + int spd_tras; + int spd_trc; + int spd_trfc; + int spd_twtr; + int spd_trtp; + int spd_tfaw; + int spd_addr_mirror; + int spd_package = 0; + int spd_rawcard = 0; + int spd_rawcard_AorB = 0; + int is_stacked_die = 0; + int disable_stacked_die = 0; + int is_3ds_dimm = 0; // 3DS + int lranks_per_prank = 1; // 3DS: logical ranks per package rank + int lranks_bits = 0; // 3DS: logical ranks bits + int die_capacity = 0; // in Mbits; only used for 3DS + + /* FTB values are two's complement ranging from +127 to -128. */ + typedef signed char SC_t; + + int twr; + int trcd; + int trrd; + int trp; + int tras; + int trc; + int trfc; + int twtr; + int trtp = 0; /* quiet */ + int tfaw; + + int wlevel_bitmask_errors = 0; + int wlevel_loops; + int default_rtt_nom[4]; + int dyn_rtt_nom_mask = 0; + + ddr_type_t ddr_type; + int ddr4_tCKAVGmin = 0; /* quiet */ + int ddr4_tCKAVGmax = 0; /* quiet */ + int ddr4_tRCDmin = 0; /* quiet */ + int ddr4_tRPmin = 0; /* quiet */ + int ddr4_tRASmin = 0; /* quiet */ + int ddr4_tRCmin = 0; /* quiet */ + int ddr4_tRFC1min = 0; /* quiet */ + int ddr4_tRFC2min = 0; /* quiet */ + int ddr4_tRFC4min = 0; /* quiet */ + int ddr4_tFAWmin = 0; /* quiet */ + int ddr4_tRRD_Smin = 0; /* quiet */ + int ddr4_tRRD_Lmin; + int ddr4_tCCD_Lmin; + impedence_values_t *imp_values; + int default_rodt_ctl; + // default to disabled (ie, LMC restart, not chip reset) + int ddr_disable_chip_reset = 1; + int disable_deskew_training = 0; + const char *dimm_type_name; + + /* Allow the Write bit-deskew feature to be enabled when desired. */ + // NOTE: THUNDER pass 2.x only, 81xx, 83xx + int enable_write_deskew = ENABLE_WRITE_DESKEW_DEFAULT; + +#if SWL_TRY_HWL_ALT + typedef struct { + uint16_t hwl_alt_mask; // mask of bytelanes with alternate + uint16_t hwl_alt_delay[9]; // bytelane alternate avail if mask=1 + } hwl_alt_by_rank_t; + hwl_alt_by_rank_t hwl_alts[4]; + memset(hwl_alts, 0, sizeof(hwl_alts)); +#endif /* SWL_TRY_HWL_ALT */ + + bdk_lmcx_config_t lmc_config; + + /* Initialize these to shut up the compiler. They are configured + and used only for DDR4 */ + ddr4_tRRD_Lmin = 6000; + ddr4_tCCD_Lmin = 6000; + + ddr_print("\nInitializing node %d DDR interface %d, DDR Clock %d, DDR Reference Clock %d\n", + node, ddr_interface_num, ddr_hertz, ddr_ref_hertz); + + if (dimm_config_table[0].spd_addr == 0 && !dimm_config_table[0].spd_ptr) { + error_print("ERROR: No dimms specified in the dimm_config_table.\n"); + return (-1); + } + + // allow some overrides to be done + + // this one controls whether chip RESET is done, or LMC init restarted from step 6.9.6 + if ((s = lookup_env_parameter("ddr_disable_chip_reset")) != NULL) { + ddr_disable_chip_reset = !!strtoul(s, NULL, 0); + } + // this one controls whether Deskew Training is performed + if ((s = lookup_env_parameter("ddr_disable_deskew_training")) != NULL) { + disable_deskew_training = !!strtoul(s, NULL, 0); + } + // this one is in Validate_Read_Deskew_Training and controls a preliminary delay + if ((s = lookup_env_parameter("ddr_deskew_validation_delay")) != NULL) { + deskew_validation_delay = strtoul(s, NULL, 0); + } + // this one is in Perform_Read_Deskew_Training and controls lock retries + if ((s = lookup_env_parameter("ddr_lock_retries")) != NULL) { + default_lock_retry_limit = strtoul(s, NULL, 0); + } + // this one controls whether stacked die status can affect processing + // disabling it will affect computed vref adjustment, and rodt_row_skip_mask + if ((s = lookup_env_parameter("ddr_disable_stacked_die")) != NULL) { + disable_stacked_die = !!strtoul(s, NULL, 0); + } + + // setup/override for write bit-deskew feature + if (! CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X)) { // added 81xx and 83xx + // FIXME: allow override + if ((s = lookup_env_parameter("ddr_enable_write_deskew")) != NULL) { + enable_write_deskew = !!strtoul(s, NULL, 0); + } // else take default setting + } else { // not pass 2.x + enable_write_deskew = 0; // force disabled + } + +#if 0 // FIXME: do we really need this anymore? + if (dram_is_verbose(VBL_NORM)) { + printf("DDR SPD Table:"); + for (didx = 0; didx < DDR_CFG_T_MAX_DIMMS; ++didx) { + if (dimm_config_table[didx].spd_addr == 0) break; + printf(" --ddr%dspd=0x%02x", ddr_interface_num, dimm_config_table[didx].spd_addr); + } + printf("\n"); + } +#endif + + /* + ** Walk the DRAM Socket Configuration Table to see what is installed. + */ + for (didx = 0; didx < DDR_CFG_T_MAX_DIMMS; ++didx) + { + /* Check for lower DIMM socket populated */ + if (validate_dimm(node, &dimm_config_table[didx]) == 1) { + // NOTE: DIMM info printing is now done later when more details are available + ++dimm_count; + } else { break; } /* Finished when there is no lower DIMM */ + } + + + initialize_ddr_clock(node, + ddr_configuration, + cpu_hertz, + ddr_hertz, + ddr_ref_hertz, + ddr_interface_num, + ddr_interface_mask); + + if (!odt_1rank_config) + odt_1rank_config = disable_odt_config; + if (!odt_2rank_config) + odt_2rank_config = disable_odt_config; + if (!odt_4rank_config) + odt_4rank_config = disable_odt_config; + + if ((s = lookup_env_parameter("ddr_safe")) != NULL) { + safe_ddr_flag = !!strtoul(s, NULL, 0); + } + + + if (dimm_count == 0) { + error_print("ERROR: DIMM 0 not detected.\n"); + return(-1); + } + + // look for 32-bit mode specified in the config + if (custom_lmc_config->mode32b) { + ddr_interface_64b = 0; + } + + if (ddr_interface_64b == 0) { // check if 32-bit mode is bad + if (!CAVIUM_IS_MODEL(CAVIUM_CN81XX)) { + error_print("32-bit interface width is NOT supported for this Thunder model\n"); + ddr_interface_64b = 1; // force to 64-bit + } + } else { // check if 64-bit mode is bad + if (CAVIUM_IS_MODEL(CAVIUM_CN81XX)) { // check the fuses on 81XX for forced 32-bit mode + BDK_CSR_INIT(mio_fus_dat2, node, BDK_MIO_FUS_DAT2); + if (mio_fus_dat2.s.lmc_mode32) { + error_print("32-bit interface width is ONLY supported for this Thunder model\n"); + ddr_interface_64b = 0; // force to 32-bit + } + } + } + + // finally, say we are in 32-bit mode when it has been validated + if (ddr_interface_64b == 0) { + ddr_print("N%d.LMC%d: Setting 32-bit data width\n", + node, ddr_interface_num); + } + + /* ddr_type only indicates DDR4 or DDR3 */ + ddr_type = get_ddr_type(node, &dimm_config_table[0]); + debug_print("DRAM Device Type: DDR%d\n", ddr_type); + + spd_dimm_type = get_dimm_module_type(node, &dimm_config_table[0], ddr_type); + + if (ddr_type == DDR4_DRAM) { + int spd_module_type; + int asymmetric; + const char *signal_load[4] = {"", "MLS", "3DS", "RSV"}; + + imp_values = &ddr4_impedence_values; + dimm_type_name = ddr4_dimm_types[spd_dimm_type]; + + spd_addr = read_spd(node, &dimm_config_table[0], DDR4_SPD_ADDRESSING_ROW_COL_BITS); + spd_org = read_spd(node, &dimm_config_table[0], DDR4_SPD_MODULE_ORGANIZATION); + spd_banks = 0xFF & read_spd(node, &dimm_config_table[0], DDR4_SPD_DENSITY_BANKS); + + bank_bits = (2 + ((spd_banks >> 4) & 0x3)) + ((spd_banks >> 6) & 0x3); + bank_bits = min((int)bank_bits, 4); /* Controller can only address 4 bits. */ + + spd_package = 0XFF & read_spd(node, &dimm_config_table[0], DDR4_SPD_PACKAGE_TYPE); + if (spd_package & 0x80) { // non-monolithic device + is_stacked_die = (!disable_stacked_die) ? ((spd_package & 0x73) == 0x11) : 0; + ddr_print("DDR4: Package Type 0x%x (%s), %d die\n", spd_package, + signal_load[(spd_package & 3)], ((spd_package >> 4) & 7) + 1); + is_3ds_dimm = ((spd_package & 3) == 2); // is it 3DS? + if (is_3ds_dimm) { // is it 3DS? + lranks_per_prank = ((spd_package >> 4) & 7) + 1; + // FIXME: should make sure it is only 2H or 4H or 8H? + lranks_bits = lranks_per_prank >> 1; + if (lranks_bits == 4) lranks_bits = 3; + } + } else if (spd_package != 0) { + // FIXME: print non-zero monolithic device definition + ddr_print("DDR4: Package Type MONOLITHIC: %d die, signal load %d\n", + ((spd_package >> 4) & 7) + 1, (spd_package & 3)); + } + + asymmetric = (spd_org >> 6) & 1; + if (asymmetric) { + int spd_secondary_pkg = read_spd(node, &dimm_config_table[0], + DDR4_SPD_SECONDARY_PACKAGE_TYPE); + ddr_print("DDR4: Module Organization: ASYMMETRICAL: Secondary Package Type 0x%x\n", + spd_secondary_pkg); + } else { + uint64_t bus_width = 8 << (0x07 & read_spd(node, &dimm_config_table[0], + DDR4_SPD_MODULE_MEMORY_BUS_WIDTH)); + uint64_t ddr_width = 4 << ((spd_org >> 0) & 0x7); + uint64_t module_cap; + int shift = (spd_banks & 0x0F); + die_capacity = (shift < 8) ? (256UL << shift) : ((12UL << (shift & 1)) << 10); + ddr_print("DDR4: Module Organization: SYMMETRICAL: capacity per die %d %cbit\n", + (die_capacity > 512) ? (die_capacity >> 10) : die_capacity, + (die_capacity > 512) ? 'G' : 'M'); + module_cap = ((uint64_t)die_capacity << 20) / 8UL * bus_width / ddr_width * + /* no. pkg ranks*/(1UL + ((spd_org >> 3) & 0x7)); + if (is_3ds_dimm) // is it 3DS? + module_cap *= /* die_count */(uint64_t)(((spd_package >> 4) & 7) + 1); + ddr_print("DDR4: Module Organization: SYMMETRICAL: capacity per module %ld GB\n", + module_cap >> 30); + } + + spd_rawcard = 0xFF & read_spd(node, &dimm_config_table[0], DDR4_SPD_REFERENCE_RAW_CARD); + ddr_print("DDR4: Reference Raw Card 0x%x \n", spd_rawcard); + + spd_module_type = read_spd(node, &dimm_config_table[0], DDR4_SPD_KEY_BYTE_MODULE_TYPE); + if (spd_module_type & 0x80) { // HYBRID module + ddr_print("DDR4: HYBRID module, type %s\n", + ((spd_module_type & 0x70) == 0x10) ? "NVDIMM" : "UNKNOWN"); + } + + spd_dimm_type = spd_module_type & 0x0F; + spd_rdimm = (spd_dimm_type == 1) || (spd_dimm_type == 5) || (spd_dimm_type == 8); + if (spd_rdimm) { + int spd_mfgr_id = read_spd(node, &dimm_config_table[0], DDR4_SPD_REGISTER_MANUFACTURER_ID_LSB) | + (read_spd(node, &dimm_config_table[0], DDR4_SPD_REGISTER_MANUFACTURER_ID_MSB) << 8); + int spd_register_rev = read_spd(node, &dimm_config_table[0], DDR4_SPD_REGISTER_REVISION_NUMBER); + ddr_print("DDR4: RDIMM Register Manufacturer ID 0x%x Revision 0x%x\n", + spd_mfgr_id, spd_register_rev); + + // RAWCARD A or B must be bit 7=0 and bits 4-0 either 00000(A) or 00001(B) + spd_rawcard_AorB = ((spd_rawcard & 0x9fUL) <= 1); + } + } else { + imp_values = &ddr3_impedence_values; + dimm_type_name = ddr3_dimm_types[spd_dimm_type]; + + spd_addr = read_spd(node, &dimm_config_table[0], DDR3_SPD_ADDRESSING_ROW_COL_BITS); + spd_org = read_spd(node, &dimm_config_table[0], DDR3_SPD_MODULE_ORGANIZATION); + spd_banks = read_spd(node, &dimm_config_table[0], DDR3_SPD_DENSITY_BANKS) & 0xff; + + bank_bits = 3 + ((spd_banks >> 4) & 0x7); + bank_bits = min((int)bank_bits, 3); /* Controller can only address 3 bits. */ + + spd_rdimm = (spd_dimm_type == 1) || (spd_dimm_type == 5) || (spd_dimm_type == 9); + } + +#if 0 // FIXME: why should this be possible OR needed? + if ((s = lookup_env_parameter("ddr_rdimm_ena")) != NULL) { + spd_rdimm = !!strtoul(s, NULL, 0); + } +#endif + + debug_print("spd_addr : %#06x\n", spd_addr ); + debug_print("spd_org : %#06x\n", spd_org ); + debug_print("spd_banks : %#06x\n", spd_banks ); + + row_bits = 12 + ((spd_addr >> 3) & 0x7); + col_bits = 9 + ((spd_addr >> 0) & 0x7); + + num_ranks = 1 + ((spd_org >> 3) & 0x7); + dram_width = 4 << ((spd_org >> 0) & 0x7); + num_banks = 1 << bank_bits; + + if ((s = lookup_env_parameter("ddr_num_ranks")) != NULL) { + num_ranks = strtoul(s, NULL, 0); + } + + /* FIX + ** Check that values are within some theoretical limits. + ** col_bits(min) = row_lsb(min) - bank_bits(max) - bus_bits(max) = 14 - 3 - 4 = 7 + ** col_bits(max) = row_lsb(max) - bank_bits(min) - bus_bits(min) = 18 - 2 - 3 = 13 + */ + if ((col_bits > 13) || (col_bits < 7)) { + error_print("Unsupported number of Col Bits: %d\n", col_bits); + ++fatal_error; + } + + /* FIX + ** Check that values are within some theoretical limits. + ** row_bits(min) = pbank_lsb(min) - row_lsb(max) - rank_bits = 26 - 18 - 1 = 7 + ** row_bits(max) = pbank_lsb(max) - row_lsb(min) - rank_bits = 33 - 14 - 1 = 18 + */ + if ((row_bits > 18) || (row_bits < 7)) { + error_print("Unsupported number of Row Bits: %d\n", row_bits); + ++fatal_error; + } + + if (bdk_is_platform(BDK_PLATFORM_ASIM)) + wlevel_loops = 0; + else { + wlevel_loops = WLEVEL_LOOPS_DEFAULT; + // accept generic or interface-specific override but not for ASIM... + if ((s = lookup_env_parameter("ddr_wlevel_loops")) == NULL) + s = lookup_env_parameter("ddr%d_wlevel_loops", ddr_interface_num); + if (s != NULL) { + wlevel_loops = strtoul(s, NULL, 0); + } + } + + bunk_enable = (num_ranks > 1); + + column_bits_start = 3; + + row_lsb = column_bits_start + col_bits + bank_bits - (! ddr_interface_64b); + debug_print("row_lsb = column_bits_start + col_bits + bank_bits = %d\n", row_lsb); + + pbank_lsb = row_lsb + row_bits + bunk_enable; + debug_print("pbank_lsb = row_lsb + row_bits + bunk_enable = %d\n", pbank_lsb); + + if (lranks_per_prank > 1) { + pbank_lsb = row_lsb + row_bits + lranks_bits + bunk_enable; + ddr_print("DDR4: 3DS: pbank_lsb = (%d row_lsb) + (%d row_bits) + (%d lranks_bits) + (%d bunk_enable) = %d\n", + row_lsb, row_bits, lranks_bits, bunk_enable, pbank_lsb); + } + + mem_size_mbytes = dimm_count * ((1ull << pbank_lsb) >> 20); + if (num_ranks == 4) { + /* Quad rank dimm capacity is equivalent to two dual-rank dimms. */ + mem_size_mbytes *= 2; + } + + /* Mask with 1 bits set for for each active rank, allowing 2 bits per dimm. + ** This makes later calculations simpler, as a variety of CSRs use this layout. + ** This init needs to be updated for dual configs (ie non-identical DIMMs). + ** Bit 0 = dimm0, rank 0 + ** Bit 1 = dimm0, rank 1 + ** Bit 2 = dimm1, rank 0 + ** Bit 3 = dimm1, rank 1 + ** ... + */ + rank_mask = 0x1; + if (num_ranks > 1) + rank_mask = 0x3; + if (num_ranks > 2) + rank_mask = 0xf; + + for (i = 1; i < dimm_count; i++) + rank_mask |= ((rank_mask & 0x3) << (2*i)); + + +#ifdef CAVIUM_ONLY + /* Special request: mismatched DIMM support. Slot 0: 2-Rank, Slot 1: 1-Rank */ + if (0) + { + /* + ** Calculate the total memory size in terms of the total + ** number of ranks instead of the number of dimms. The usual + ** requirement is for both dimms to be identical. This check + ** works around that requirement to allow one exception. The + ** dimm in the second slot may now have fewer ranks than the + ** first slot. + */ + int spd_org_dimm1; + int num_ranks_dimm1; + int rank_count; + int rank_mask_dimm1; + + if (dimm_count > 1) { + spd_org_dimm1 = read_spd(node, &dimm_config_table[1] /* dimm 1*/, + DDR3_SPD_MODULE_ORGANIZATION); + num_ranks_dimm1 = 1 + ((spd_org_dimm1 >> 3) & 0x7); + rank_count = num_ranks/* dimm 0 */ + num_ranks_dimm1 /* dimm 1 */; + + if (num_ranks != num_ranks_dimm1) { + mem_size_mbytes = rank_count * ((1ull << (pbank_lsb-bunk_enable)) >> 20); + rank_mask = 1 | ((num_ranks > 1) << 1); + rank_mask_dimm1 = 1 | ((num_ranks_dimm1 > 1) << 1); + rank_mask |= ((rank_mask_dimm1 & 0x3) << 2); + ddr_print("DIMM 1 - ranks: %d, size: %d MB\n", + num_ranks_dimm1, num_ranks_dimm1 * ((1ull << (pbank_lsb-bunk_enable)) >> 20)); + } + } + } +#endif /* CAVIUM_ONLY */ + + spd_ecc = get_dimm_ecc(node, &dimm_config_table[0], ddr_type); + + VB_PRT(VBL_DEV, "Summary: - %d %s%s %dRx%d %s, row bits=%d, col bits=%d, bank bits=%d\n", + dimm_count, dimm_type_name, (dimm_count > 1) ? "s" : "", + num_ranks, dram_width, (spd_ecc) ? "ECC" : "non-ECC", + row_bits, col_bits, bank_bits); + + // always print out the useful DIMM information... + for (i = 0; i < DDR_CFG_T_MAX_DIMMS; i++) { + if (i < dimm_count) + report_dimm(node, &dimm_config_table[i], i, ddr_interface_num, + num_ranks, dram_width, mem_size_mbytes / dimm_count); + else + if (validate_dimm(node, &dimm_config_table[i]) == 0) // only if there is a slot + printf("N%d.LMC%d.DIMM%d: Not Present\n", node, ddr_interface_num, i); + } + + if (ddr_type == DDR4_DRAM) { + spd_cas_latency = ((0xff & read_spd(node, &dimm_config_table[0], DDR4_SPD_CAS_LATENCIES_BYTE0)) << 0); + spd_cas_latency |= ((0xff & read_spd(node, &dimm_config_table[0], DDR4_SPD_CAS_LATENCIES_BYTE1)) << 8); + spd_cas_latency |= ((0xff & read_spd(node, &dimm_config_table[0], DDR4_SPD_CAS_LATENCIES_BYTE2)) << 16); + spd_cas_latency |= ((0xff & read_spd(node, &dimm_config_table[0], DDR4_SPD_CAS_LATENCIES_BYTE3)) << 24); + } else { + spd_cas_latency = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_CAS_LATENCIES_LSB); + spd_cas_latency |= ((0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_CAS_LATENCIES_MSB)) << 8); + } + debug_print("spd_cas_latency : %#06x\n", spd_cas_latency ); + + if (ddr_type == DDR4_DRAM) { + + /* No other values for DDR4 MTB and FTB are specified at the + * current time so don't bother reading them. Can't speculate how + * new values will be represented. + */ + int spdMTB = 125; + int spdFTB = 1; + + tAAmin + = spdMTB * read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_CAS_LATENCY_TAAMIN) + + spdFTB * (SC_t) read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_CAS_LATENCY_FINE_TAAMIN); + + ddr4_tCKAVGmin + = spdMTB * read_spd(node, &dimm_config_table[0], DDR4_SPD_MINIMUM_CYCLE_TIME_TCKAVGMIN) + + spdFTB * (SC_t) read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_CYCLE_TIME_FINE_TCKAVGMIN); + + ddr4_tCKAVGmax + = spdMTB * read_spd(node, &dimm_config_table[0], DDR4_SPD_MAXIMUM_CYCLE_TIME_TCKAVGMAX) + + spdFTB * (SC_t) read_spd(node, &dimm_config_table[0], DDR4_SPD_MAX_CYCLE_TIME_FINE_TCKAVGMAX); + + ddr4_tRCDmin + = spdMTB * read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_RAS_CAS_DELAY_TRCDMIN) + + spdFTB * (SC_t) read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_RAS_TO_CAS_DELAY_FINE_TRCDMIN); + + ddr4_tRPmin + = spdMTB * read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_ROW_PRECHARGE_DELAY_TRPMIN) + + spdFTB * (SC_t) read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_ROW_PRECHARGE_DELAY_FINE_TRPMIN); + + ddr4_tRASmin + = spdMTB * (((read_spd(node, &dimm_config_table[0], DDR4_SPD_UPPER_NIBBLES_TRAS_TRC) & 0xf) << 8) + + ( read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_ACTIVE_PRECHARGE_LSB_TRASMIN) & 0xff)); + + ddr4_tRCmin + = spdMTB * ((((read_spd(node, &dimm_config_table[0], DDR4_SPD_UPPER_NIBBLES_TRAS_TRC) >> 4) & 0xf) << 8) + + ( read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_ACTIVE_REFRESH_LSB_TRCMIN) & 0xff)) + + spdFTB * (SC_t) read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_ACT_TO_ACT_REFRESH_DELAY_FINE_TRCMIN); + + ddr4_tRFC1min + = spdMTB * (((read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_REFRESH_RECOVERY_MSB_TRFC1MIN) & 0xff) << 8) + + ( read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_REFRESH_RECOVERY_LSB_TRFC1MIN) & 0xff)); + + ddr4_tRFC2min + = spdMTB * (((read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_REFRESH_RECOVERY_MSB_TRFC2MIN) & 0xff) << 8) + + ( read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_REFRESH_RECOVERY_LSB_TRFC2MIN) & 0xff)); + + ddr4_tRFC4min + = spdMTB * (((read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_REFRESH_RECOVERY_MSB_TRFC4MIN) & 0xff) << 8) + + ( read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_REFRESH_RECOVERY_LSB_TRFC4MIN) & 0xff)); + + ddr4_tFAWmin + = spdMTB * (((read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_FOUR_ACTIVE_WINDOW_MSN_TFAWMIN) & 0xf) << 8) + + ( read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_FOUR_ACTIVE_WINDOW_LSB_TFAWMIN) & 0xff)); + + ddr4_tRRD_Smin + = spdMTB * read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_ROW_ACTIVE_DELAY_SAME_TRRD_SMIN) + + spdFTB * (SC_t) read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_ACT_TO_ACT_DELAY_DIFF_FINE_TRRD_SMIN); + + ddr4_tRRD_Lmin + = spdMTB * read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_ROW_ACTIVE_DELAY_DIFF_TRRD_LMIN) + + spdFTB * (SC_t) read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_ACT_TO_ACT_DELAY_SAME_FINE_TRRD_LMIN); + + ddr4_tCCD_Lmin + = spdMTB * read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_CAS_TO_CAS_DELAY_TCCD_LMIN) + + spdFTB * (SC_t) read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_CAS_TO_CAS_DELAY_FINE_TCCD_LMIN); + + ddr_print("%-45s : %6d ps\n", "Medium Timebase (MTB)", spdMTB); + ddr_print("%-45s : %6d ps\n", "Fine Timebase (FTB)", spdFTB); + + #define DDR4_TWR 15000 + #define DDR4_TWTR_S 2500 + + + tCKmin = ddr4_tCKAVGmin; + twr = DDR4_TWR; + trcd = ddr4_tRCDmin; + trrd = ddr4_tRRD_Smin; + trp = ddr4_tRPmin; + tras = ddr4_tRASmin; + trc = ddr4_tRCmin; + trfc = ddr4_tRFC1min; + twtr = DDR4_TWTR_S; + tfaw = ddr4_tFAWmin; + + if (spd_rdimm) { + spd_addr_mirror = read_spd(node, &dimm_config_table[0], DDR4_SPD_RDIMM_ADDR_MAPPING_FROM_REGISTER_TO_DRAM) & 0x1; + } else { + spd_addr_mirror = read_spd(node, &dimm_config_table[0], DDR4_SPD_UDIMM_ADDR_MAPPING_FROM_EDGE) & 0x1; + } + debug_print("spd_addr_mirror : %#06x\n", spd_addr_mirror ); + + } else { /* if (ddr_type == DDR4_DRAM) */ + spd_mtb_dividend = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_MEDIUM_TIMEBASE_DIVIDEND); + spd_mtb_divisor = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_MEDIUM_TIMEBASE_DIVISOR); + spd_tck_min = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_MINIMUM_CYCLE_TIME_TCKMIN); + spd_taa_min = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_MIN_CAS_LATENCY_TAAMIN); + + spd_twr = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_MIN_WRITE_RECOVERY_TWRMIN); + spd_trcd = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_MIN_RAS_CAS_DELAY_TRCDMIN); + spd_trrd = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_MIN_ROW_ACTIVE_DELAY_TRRDMIN); + spd_trp = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_MIN_ROW_PRECHARGE_DELAY_TRPMIN); + spd_tras = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_MIN_ACTIVE_PRECHARGE_LSB_TRASMIN); + spd_tras |= ((0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_UPPER_NIBBLES_TRAS_TRC)&0xf) << 8); + spd_trc = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_MIN_ACTIVE_REFRESH_LSB_TRCMIN); + spd_trc |= ((0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_UPPER_NIBBLES_TRAS_TRC)&0xf0) << 4); + spd_trfc = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_MIN_REFRESH_RECOVERY_LSB_TRFCMIN); + spd_trfc |= ((0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_MIN_REFRESH_RECOVERY_MSB_TRFCMIN)) << 8); + spd_twtr = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_MIN_INTERNAL_WRITE_READ_CMD_TWTRMIN); + spd_trtp = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_MIN_INTERNAL_READ_PRECHARGE_CMD_TRTPMIN); + spd_tfaw = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_MIN_FOUR_ACTIVE_WINDOW_TFAWMIN); + spd_tfaw |= ((0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_UPPER_NIBBLE_TFAW)&0xf) << 8); + spd_addr_mirror = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_ADDRESS_MAPPING) & 0x1; + spd_addr_mirror = spd_addr_mirror && !spd_rdimm; /* Only address mirror unbuffered dimms. */ + ftb_Dividend = read_spd(node, &dimm_config_table[0], DDR3_SPD_FINE_TIMEBASE_DIVIDEND_DIVISOR) >> 4; + ftb_Divisor = read_spd(node, &dimm_config_table[0], DDR3_SPD_FINE_TIMEBASE_DIVIDEND_DIVISOR) & 0xf; + ftb_Divisor = (ftb_Divisor == 0) ? 1 : ftb_Divisor; /* Make sure that it is not 0 */ + + debug_print("spd_twr : %#06x\n", spd_twr ); + debug_print("spd_trcd : %#06x\n", spd_trcd); + debug_print("spd_trrd : %#06x\n", spd_trrd); + debug_print("spd_trp : %#06x\n", spd_trp ); + debug_print("spd_tras : %#06x\n", spd_tras); + debug_print("spd_trc : %#06x\n", spd_trc ); + debug_print("spd_trfc : %#06x\n", spd_trfc); + debug_print("spd_twtr : %#06x\n", spd_twtr); + debug_print("spd_trtp : %#06x\n", spd_trtp); + debug_print("spd_tfaw : %#06x\n", spd_tfaw); + debug_print("spd_addr_mirror : %#06x\n", spd_addr_mirror); + + mtb_psec = spd_mtb_dividend * 1000 / spd_mtb_divisor; + tAAmin = mtb_psec * spd_taa_min; + tAAmin += ftb_Dividend * (SC_t) read_spd(node, &dimm_config_table[0], DDR3_SPD_MIN_CAS_LATENCY_FINE_TAAMIN) / ftb_Divisor; + tCKmin = mtb_psec * spd_tck_min; + tCKmin += ftb_Dividend * (SC_t) read_spd(node, &dimm_config_table[0], DDR3_SPD_MINIMUM_CYCLE_TIME_FINE_TCKMIN) / ftb_Divisor; + + twr = spd_twr * mtb_psec; + trcd = spd_trcd * mtb_psec; + trrd = spd_trrd * mtb_psec; + trp = spd_trp * mtb_psec; + tras = spd_tras * mtb_psec; + trc = spd_trc * mtb_psec; + trfc = spd_trfc * mtb_psec; + twtr = spd_twtr * mtb_psec; + trtp = spd_trtp * mtb_psec; + tfaw = spd_tfaw * mtb_psec; + + } /* if (ddr_type == DDR4_DRAM) */ + + if (ddr_type == DDR4_DRAM) { + ddr_print("%-45s : %6d ps (%ld MT/s)\n", "SDRAM Minimum Cycle Time (tCKAVGmin)",ddr4_tCKAVGmin, + pretty_psecs_to_mts(ddr4_tCKAVGmin)); + ddr_print("%-45s : %6d ps\n", "SDRAM Maximum Cycle Time (tCKAVGmax)", ddr4_tCKAVGmax); + ddr_print("%-45s : %6d ps\n", "Minimum CAS Latency Time (tAAmin)", tAAmin); + ddr_print("%-45s : %6d ps\n", "Minimum RAS to CAS Delay Time (tRCDmin)", ddr4_tRCDmin); + ddr_print("%-45s : %6d ps\n", "Minimum Row Precharge Delay Time (tRPmin)", ddr4_tRPmin); + ddr_print("%-45s : %6d ps\n", "Minimum Active to Precharge Delay (tRASmin)", ddr4_tRASmin); + ddr_print("%-45s : %6d ps\n", "Minimum Active to Active/Refr. Delay (tRCmin)", ddr4_tRCmin); + ddr_print("%-45s : %6d ps\n", "Minimum Refresh Recovery Delay (tRFC1min)", ddr4_tRFC1min); + ddr_print("%-45s : %6d ps\n", "Minimum Refresh Recovery Delay (tRFC2min)", ddr4_tRFC2min); + ddr_print("%-45s : %6d ps\n", "Minimum Refresh Recovery Delay (tRFC4min)", ddr4_tRFC4min); + ddr_print("%-45s : %6d ps\n", "Minimum Four Activate Window Time (tFAWmin)", ddr4_tFAWmin); + ddr_print("%-45s : %6d ps\n", "Minimum Act. to Act. Delay (tRRD_Smin)", ddr4_tRRD_Smin); + ddr_print("%-45s : %6d ps\n", "Minimum Act. to Act. Delay (tRRD_Lmin)", ddr4_tRRD_Lmin); + ddr_print("%-45s : %6d ps\n", "Minimum CAS to CAS Delay Time (tCCD_Lmin)", ddr4_tCCD_Lmin); + } else { + ddr_print("Medium Timebase (MTB) : %6d ps\n", mtb_psec); + ddr_print("Minimum Cycle Time (tCKmin) : %6d ps (%ld MT/s)\n", tCKmin, + pretty_psecs_to_mts(tCKmin)); + ddr_print("Minimum CAS Latency Time (tAAmin) : %6d ps\n", tAAmin); + ddr_print("Write Recovery Time (tWR) : %6d ps\n", twr); + ddr_print("Minimum RAS to CAS delay (tRCD) : %6d ps\n", trcd); + ddr_print("Minimum Row Active to Row Active delay (tRRD) : %6d ps\n", trrd); + ddr_print("Minimum Row Precharge Delay (tRP) : %6d ps\n", trp); + ddr_print("Minimum Active to Precharge (tRAS) : %6d ps\n", tras); + ddr_print("Minimum Active to Active/Refresh Delay (tRC) : %6d ps\n", trc); + ddr_print("Minimum Refresh Recovery Delay (tRFC) : %6d ps\n", trfc); + ddr_print("Internal write to read command delay (tWTR) : %6d ps\n", twtr); + ddr_print("Min Internal Rd to Precharge Cmd Delay (tRTP) : %6d ps\n", trtp); + ddr_print("Minimum Four Activate Window Delay (tFAW) : %6d ps\n", tfaw); + } + + + /* When the cycle time is within 1 psec of the minimum accept it + as a slight rounding error and adjust it to exactly the minimum + cycle time. This avoids an unnecessary warning. */ + if (_abs(tclk_psecs - tCKmin) < 2) + tclk_psecs = tCKmin; + + if (tclk_psecs < (uint64_t)tCKmin) { + ddr_print("WARNING!!!!: DDR Clock Rate (tCLK: %ld) exceeds DIMM specifications (tCKmin: %ld)!!!!\n", + tclk_psecs, (uint64_t)tCKmin); + } + + + ddr_print("DDR Clock Rate (tCLK) : %6lu ps\n", tclk_psecs); + ddr_print("Core Clock Rate (eCLK) : %6lu ps\n", eclk_psecs); + + if ((s = lookup_env_parameter("ddr_use_ecc")) != NULL) { + use_ecc = !!strtoul(s, NULL, 0); + } + use_ecc = use_ecc && spd_ecc; + + ddr_interface_bytemask = ddr_interface_64b + ? (use_ecc ? 0x1ff : 0xff) + : (use_ecc ? 0x01f : 0x0f); // FIXME? 81xx does diff from 70xx + + ddr_print("DRAM Interface width: %d bits %s bytemask 0x%x\n", + ddr_interface_64b ? 64 : 32, use_ecc ? "+ECC" : "", + ddr_interface_bytemask); + + ddr_print("\n------ Board Custom Configuration Settings ------\n"); + ddr_print("%-45s : %d\n", "MIN_RTT_NOM_IDX ", custom_lmc_config->min_rtt_nom_idx); + ddr_print("%-45s : %d\n", "MAX_RTT_NOM_IDX ", custom_lmc_config->max_rtt_nom_idx); + ddr_print("%-45s : %d\n", "MIN_RODT_CTL ", custom_lmc_config->min_rodt_ctl); + ddr_print("%-45s : %d\n", "MAX_RODT_CTL ", custom_lmc_config->max_rodt_ctl); + ddr_print("%-45s : %d\n", "MIN_CAS_LATENCY ", custom_lmc_config->min_cas_latency); + ddr_print("%-45s : %d\n", "OFFSET_EN ", custom_lmc_config->offset_en); + ddr_print("%-45s : %d\n", "OFFSET_UDIMM ", custom_lmc_config->offset_udimm); + ddr_print("%-45s : %d\n", "OFFSET_RDIMM ", custom_lmc_config->offset_rdimm); + ddr_print("%-45s : %d\n", "DDR_RTT_NOM_AUTO ", custom_lmc_config->ddr_rtt_nom_auto); + ddr_print("%-45s : %d\n", "DDR_RODT_CTL_AUTO ", custom_lmc_config->ddr_rodt_ctl_auto); + if (spd_rdimm) + ddr_print("%-45s : %d\n", "RLEVEL_COMP_OFFSET", custom_lmc_config->rlevel_comp_offset_rdimm); + else + ddr_print("%-45s : %d\n", "RLEVEL_COMP_OFFSET", custom_lmc_config->rlevel_comp_offset_udimm); + ddr_print("%-45s : %d\n", "RLEVEL_COMPUTE ", custom_lmc_config->rlevel_compute); + ddr_print("%-45s : %d\n", "DDR2T_UDIMM ", custom_lmc_config->ddr2t_udimm); + ddr_print("%-45s : %d\n", "DDR2T_RDIMM ", custom_lmc_config->ddr2t_rdimm); + ddr_print("%-45s : %d\n", "FPRCH2 ", custom_lmc_config->fprch2); + ddr_print("-------------------------------------------------\n"); + + + CL = divide_roundup(tAAmin, tclk_psecs); + + ddr_print("Desired CAS Latency : %6d\n", CL); + + min_cas_latency = custom_lmc_config->min_cas_latency; + + + if ((s = lookup_env_parameter("ddr_min_cas_latency")) != NULL) { + min_cas_latency = strtoul(s, NULL, 0); + } + + { + int base_CL; + ddr_print("CAS Latencies supported in DIMM :"); + base_CL = (ddr_type == DDR4_DRAM) ? 7 : 4; + for (i=0; i<32; ++i) { + if ((spd_cas_latency >> i) & 1) { + ddr_print(" %d", i+base_CL); + max_cas_latency = i+base_CL; + if (min_cas_latency == 0) + min_cas_latency = i+base_CL; + } + } + ddr_print("\n"); + + /* Use relaxed timing when running slower than the minimum + supported speed. Adjust timing to match the smallest supported + CAS Latency. */ + if (CL < min_cas_latency) { + uint64_t adjusted_tclk = tAAmin / min_cas_latency; + CL = min_cas_latency; + ddr_print("Slow clock speed. Adjusting timing: tClk = %lu, Adjusted tClk = %ld\n", + tclk_psecs, adjusted_tclk); + tclk_psecs = adjusted_tclk; + } + + if ((s = lookup_env_parameter("ddr_cas_latency")) != NULL) { + override_cas_latency = strtoul(s, NULL, 0); + } + + /* Make sure that the selected cas latency is legal */ + for (i=(CL-base_CL); i<32; ++i) { + if ((spd_cas_latency >> i) & 1) { + CL = i+base_CL; + break; + } + } + } + + if (CL > max_cas_latency) + CL = max_cas_latency; + + if (override_cas_latency != 0) { + CL = override_cas_latency; + } + + ddr_print("CAS Latency : %6d\n", CL); + + if ((CL * tCKmin) > 20000) + { + ddr_print("(CLactual * tCKmin) = %d exceeds 20 ns\n", (CL * tCKmin)); + } + + if ((num_banks != 4) && (num_banks != 8) && (num_banks != 16)) + { + error_print("Unsupported number of banks %d. Must be 4 or 8 or 16.\n", num_banks); + ++fatal_error; + } + + if ((num_ranks != 1) && (num_ranks != 2) && (num_ranks != 4)) + { + error_print("Unsupported number of ranks: %d\n", num_ranks); + ++fatal_error; + } + + if (! CAVIUM_IS_MODEL(CAVIUM_CN81XX)) { // 88XX or 83XX, but not 81XX + if ((dram_width != 8) && (dram_width != 16) && (dram_width != 4)) { + error_print("Unsupported SDRAM Width, x%d. Must be x4, x8 or x16.\n", dram_width); + ++fatal_error; + } + } else if ((dram_width != 8) && (dram_width != 16)) { // 81XX can only do x8 or x16 + error_print("Unsupported SDRAM Width, x%d. Must be x8 or x16.\n", dram_width); + ++fatal_error; + } + + + /* + ** Bail out here if things are not copasetic. + */ + if (fatal_error) + return(-1); + + /* + * 6.9.6 LMC RESET Initialization + * + * The purpose of this step is to assert/deassert the RESET# pin at the + * DDR3/DDR4 parts. + * + * This LMC RESET step is done for all enabled LMCs. + */ + perform_lmc_reset(node, ddr_interface_num); + + // Make sure scrambling is disabled during init... + { + bdk_lmcx_control_t lmc_control; + + lmc_control.u = BDK_CSR_READ(node, BDK_LMCX_CONTROL(ddr_interface_num)); + lmc_control.s.scramble_ena = 0; + DRAM_CSR_WRITE(node, BDK_LMCX_CONTROL(ddr_interface_num), lmc_control.u); + + DRAM_CSR_WRITE(node, BDK_LMCX_SCRAMBLE_CFG0(ddr_interface_num), 0); + DRAM_CSR_WRITE(node, BDK_LMCX_SCRAMBLE_CFG1(ddr_interface_num), 0); + DRAM_CSR_WRITE(node, BDK_LMCX_SCRAMBLE_CFG2(ddr_interface_num), 0); + } + + + odt_idx = dimm_count - 1; + + switch (num_ranks) { + case 1: + odt_config = odt_1rank_config; + break; + case 2: + odt_config = odt_2rank_config; + break; + case 4: + odt_config = odt_4rank_config; + break; + default: + odt_config = disable_odt_config; + error_print("Unsupported number of ranks: %d\n", num_ranks); + ++fatal_error; + } + + + /* Parameters from DDR3 Specifications */ +#define DDR3_tREFI 7800000 /* 7.8 us */ +#define DDR3_ZQCS 80000ull /* 80 ns */ +#define DDR3_ZQCS_Interval 1280000000 /* 128ms/100 */ +#define DDR3_tCKE 5000 /* 5 ns */ +#define DDR3_tMRD 4 /* 4 nCK */ +#define DDR3_tDLLK 512 /* 512 nCK */ +#define DDR3_tMPRR 1 /* 1 nCK */ +#define DDR3_tWLMRD 40 /* 40 nCK */ +#define DDR3_tWLDQSEN 25 /* 25 nCK */ + + /* Parameters from DDR4 Specifications */ +#define DDR4_tMRD 8 /* 8 nCK */ +#define DDR4_tDLLK 768 /* 768 nCK */ + + /* + * 6.9.7 Early LMC Initialization + * + * All of DDR PLL, LMC CK, and LMC DRESET initializations must be + * completed prior to starting this LMC initialization sequence. + * + * Perform the following five substeps for early LMC initialization: + * + * 1. Software must ensure there are no pending DRAM transactions. + * + * 2. Write LMC(0)_CONFIG, LMC(0)_CONTROL, LMC(0)_TIMING_PARAMS0, + * LMC(0)_TIMING_PARAMS1, LMC(0)_MODEREG_PARAMS0, + * LMC(0)_MODEREG_PARAMS1, LMC(0)_DUAL_MEMCFG, LMC(0)_NXM, + * LMC(0)_WODT_MASK, LMC(0)_RODT_MASK, LMC(0)_COMP_CTL2, + * LMC(0)_PHY_CTL, LMC(0)_DIMM0/1_PARAMS, and LMC(0)_DIMM_CTL with + * appropriate values. All sections in this chapter can be used to + * derive proper register settings. + */ + + /* LMC(0)_CONFIG */ + { + lmc_config.u = 0; + + lmc_config.s.ecc_ena = use_ecc; + lmc_config.s.row_lsb = encode_row_lsb_ddr3(row_lsb, ddr_interface_64b); + lmc_config.s.pbank_lsb = encode_pbank_lsb_ddr3(pbank_lsb, ddr_interface_64b); + + lmc_config.s.idlepower = 0; /* Disabled */ + + if ((s = lookup_env_parameter("ddr_idlepower")) != NULL) { + lmc_config.s.idlepower = strtoul(s, NULL, 0); + } + + lmc_config.s.forcewrite = 0; /* Disabled */ + lmc_config.s.ecc_adr = 1; /* Include memory reference address in the ECC */ + + if ((s = lookup_env_parameter("ddr_ecc_adr")) != NULL) { + lmc_config.s.ecc_adr = strtoul(s, NULL, 0); + } + + lmc_config.s.reset = 0; + + /* + * Program LMC0_CONFIG[24:18], ref_zqcs_int(6:0) to + * RND-DN(tREFI/clkPeriod/512) Program LMC0_CONFIG[36:25], + * ref_zqcs_int(18:7) to + * RND-DN(ZQCS_Interval/clkPeriod/(512*128)). Note that this + * value should always be greater than 32, to account for + * resistor calibration delays. + */ + + lmc_config.s.ref_zqcs_int = ((DDR3_tREFI/tclk_psecs/512) & 0x7f); + lmc_config.s.ref_zqcs_int |= ((max(33ull, (DDR3_ZQCS_Interval/(tclk_psecs/100)/(512*128))) & 0xfff) << 7); + + + lmc_config.s.early_dqx = 1; /* Default to enabled */ + + if ((s = lookup_env_parameter("ddr_early_dqx")) == NULL) + s = lookup_env_parameter("ddr%d_early_dqx", ddr_interface_num); + if (s != NULL) { + lmc_config.s.early_dqx = strtoul(s, NULL, 0); + } + + lmc_config.s.sref_with_dll = 0; + + lmc_config.s.rank_ena = bunk_enable; + lmc_config.s.rankmask = rank_mask; /* Set later */ + lmc_config.s.mirrmask = (spd_addr_mirror << 1 | spd_addr_mirror << 3) & rank_mask; + lmc_config.s.init_status = rank_mask; /* Set once and don't change it. */ + lmc_config.s.early_unload_d0_r0 = 0; + lmc_config.s.early_unload_d0_r1 = 0; + lmc_config.s.early_unload_d1_r0 = 0; + lmc_config.s.early_unload_d1_r1 = 0; + lmc_config.s.scrz = 0; + // set 32-bit mode for real only when selected AND 81xx... + if (!ddr_interface_64b && CAVIUM_IS_MODEL(CAVIUM_CN81XX)) { + lmc_config.s.mode32b = 1; + } + VB_PRT(VBL_DEV, "%-45s : %d\n", "MODE32B (init)", lmc_config.s.mode32b); + lmc_config.s.mode_x4dev = (dram_width == 4) ? 1 : 0; + lmc_config.s.bg2_enable = ((ddr_type == DDR4_DRAM) && (dram_width == 16)) ? 0 : 1; + + if ((s = lookup_env_parameter_ull("ddr_config")) != NULL) { + lmc_config.u = strtoull(s, NULL, 0); + } + ddr_print("LMC_CONFIG : 0x%016lx\n", lmc_config.u); + DRAM_CSR_WRITE(node, BDK_LMCX_CONFIG(ddr_interface_num), lmc_config.u); + } + + /* LMC(0)_CONTROL */ + { + bdk_lmcx_control_t lmc_control; + lmc_control.u = BDK_CSR_READ(node, BDK_LMCX_CONTROL(ddr_interface_num)); + lmc_control.s.rdimm_ena = spd_rdimm; + lmc_control.s.bwcnt = 0; /* Clear counter later */ + if (spd_rdimm) + lmc_control.s.ddr2t = (safe_ddr_flag ? 1 : custom_lmc_config->ddr2t_rdimm ); + else + lmc_control.s.ddr2t = (safe_ddr_flag ? 1 : custom_lmc_config->ddr2t_udimm ); + lmc_control.s.pocas = 0; + lmc_control.s.fprch2 = (safe_ddr_flag ? 2 : custom_lmc_config->fprch2 ); + lmc_control.s.throttle_rd = safe_ddr_flag ? 1 : 0; + lmc_control.s.throttle_wr = safe_ddr_flag ? 1 : 0; + lmc_control.s.inorder_rd = safe_ddr_flag ? 1 : 0; + lmc_control.s.inorder_wr = safe_ddr_flag ? 1 : 0; + lmc_control.cn81xx.elev_prio_dis = safe_ddr_flag ? 1 : 0; + lmc_control.s.nxm_write_en = 0; /* discards writes to + addresses that don't exist + in the DRAM */ + lmc_control.s.max_write_batch = 8; + lmc_control.s.xor_bank = 1; + lmc_control.s.auto_dclkdis = 1; + lmc_control.s.int_zqcs_dis = 0; + lmc_control.s.ext_zqcs_dis = 0; + lmc_control.s.bprch = 1; + lmc_control.s.wodt_bprch = 1; + lmc_control.s.rodt_bprch = 1; + + if ((s = lookup_env_parameter("ddr_xor_bank")) != NULL) { + lmc_control.s.xor_bank = strtoul(s, NULL, 0); + } + + if ((s = lookup_env_parameter("ddr_2t")) != NULL) { + lmc_control.s.ddr2t = strtoul(s, NULL, 0); + } + + if ((s = lookup_env_parameter("ddr_fprch2")) != NULL) { + lmc_control.s.fprch2 = strtoul(s, NULL, 0); + } + + if ((s = lookup_env_parameter("ddr_bprch")) != NULL) { + lmc_control.s.bprch = strtoul(s, NULL, 0); + } + + if ((s = lookup_env_parameter("ddr_wodt_bprch")) != NULL) { + lmc_control.s.wodt_bprch = strtoul(s, NULL, 0); + } + + if ((s = lookup_env_parameter("ddr_rodt_bprch")) != NULL) { + lmc_control.s.rodt_bprch = strtoul(s, NULL, 0); + } + + if ((s = lookup_env_parameter("ddr_int_zqcs_dis")) != NULL) { + lmc_control.s.int_zqcs_dis = strtoul(s, NULL, 0); + } + + if ((s = lookup_env_parameter("ddr_ext_zqcs_dis")) != NULL) { + lmc_control.s.ext_zqcs_dis = strtoul(s, NULL, 0); + } + + if ((s = lookup_env_parameter_ull("ddr_control")) != NULL) { + lmc_control.u = strtoull(s, NULL, 0); + } + ddr_print("LMC_CONTROL : 0x%016lx\n", lmc_control.u); + DRAM_CSR_WRITE(node, BDK_LMCX_CONTROL(ddr_interface_num), lmc_control.u); + } + + /* LMC(0)_TIMING_PARAMS0 */ + { + unsigned trp_value; + bdk_lmcx_timing_params0_t lmc_timing_params0; + lmc_timing_params0.u = BDK_CSR_READ(node, BDK_LMCX_TIMING_PARAMS0(ddr_interface_num)); + + trp_value = divide_roundup(trp, tclk_psecs) - 1; + ddr_print("TIMING_PARAMS0[TRP]: NEW 0x%x, OLD 0x%x\n", trp_value, + trp_value + (unsigned)(divide_roundup(max(4*tclk_psecs, 7500ull), tclk_psecs)) - 4); +#if 1 + if ((s = lookup_env_parameter_ull("ddr_use_old_trp")) != NULL) { + if (!!strtoull(s, NULL, 0)) { + trp_value += divide_roundup(max(4*tclk_psecs, 7500ull), tclk_psecs) - 4; + ddr_print("TIMING_PARAMS0[trp]: USING OLD 0x%x\n", trp_value); + } + } +#endif + + lmc_timing_params0.s.txpr = divide_roundup(max(5*tclk_psecs, trfc+10000ull), 16*tclk_psecs); + lmc_timing_params0.s.tzqinit = divide_roundup(max(512*tclk_psecs, 640000ull), (256*tclk_psecs)); + lmc_timing_params0.s.trp = trp_value & 0x1f; + lmc_timing_params0.s.tcksre = divide_roundup(max(5*tclk_psecs, 10000ull), tclk_psecs) - 1; + + if (ddr_type == DDR4_DRAM) { + lmc_timing_params0.s.tzqcs = divide_roundup(128*tclk_psecs, (16*tclk_psecs)); /* Always 8. */ + lmc_timing_params0.s.tcke = divide_roundup(max(3*tclk_psecs, (uint64_t) DDR3_tCKE), tclk_psecs) - 1; + lmc_timing_params0.s.tmrd = divide_roundup((DDR4_tMRD*tclk_psecs), tclk_psecs) - 1; + //lmc_timing_params0.s.tmod = divide_roundup(max(24*tclk_psecs, 15000ull), tclk_psecs) - 1; + lmc_timing_params0.s.tmod = 25; /* 25 is the max allowed */ + lmc_timing_params0.s.tdllk = divide_roundup(DDR4_tDLLK, 256); + } else { + lmc_timing_params0.s.tzqcs = divide_roundup(max(64*tclk_psecs, DDR3_ZQCS), (16*tclk_psecs)); + lmc_timing_params0.s.tcke = divide_roundup(DDR3_tCKE, tclk_psecs) - 1; + lmc_timing_params0.s.tmrd = divide_roundup((DDR3_tMRD*tclk_psecs), tclk_psecs) - 1; + lmc_timing_params0.s.tmod = divide_roundup(max(12*tclk_psecs, 15000ull), tclk_psecs) - 1; + lmc_timing_params0.s.tdllk = divide_roundup(DDR3_tDLLK, 256); + } + + if ((s = lookup_env_parameter_ull("ddr_timing_params0")) != NULL) { + lmc_timing_params0.u = strtoull(s, NULL, 0); + } + ddr_print("TIMING_PARAMS0 : 0x%016lx\n", lmc_timing_params0.u); + DRAM_CSR_WRITE(node, BDK_LMCX_TIMING_PARAMS0(ddr_interface_num), lmc_timing_params0.u); + } + + /* LMC(0)_TIMING_PARAMS1 */ + { + int txp, temp_trcd, trfc_dlr; + bdk_lmcx_timing_params1_t lmc_timing_params1; + lmc_timing_params1.u = BDK_CSR_READ(node, BDK_LMCX_TIMING_PARAMS1(ddr_interface_num)); + + lmc_timing_params1.s.tmprr = divide_roundup(DDR3_tMPRR*tclk_psecs, tclk_psecs) - 1; + + lmc_timing_params1.s.tras = divide_roundup(tras, tclk_psecs) - 1; + + // NOTE: this is reworked for pass 2.x + temp_trcd = divide_roundup(trcd, tclk_psecs); +#if 1 + if (temp_trcd > 15) + ddr_print("TIMING_PARAMS1[trcd]: need extension bit for 0x%x\n", temp_trcd); +#endif + if (CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X) && (temp_trcd > 15)) { + /* Let .trcd=0 serve as a flag that the field has + overflowed. Must use Additive Latency mode as a + workaround. */ + temp_trcd = 0; + } + lmc_timing_params1.s.trcd = temp_trcd & 0x0f; + lmc_timing_params1.s.trcd_ext = (temp_trcd >> 4) & 1; + + lmc_timing_params1.s.twtr = divide_roundup(twtr, tclk_psecs) - 1; + lmc_timing_params1.s.trfc = divide_roundup(trfc, 8*tclk_psecs); + + // workaround needed for all THUNDER chips thru T88 Pass 2.0, + // but not 81xx and 83xx... + if ((ddr_type == DDR4_DRAM) && CAVIUM_IS_MODEL(CAVIUM_CN88XX)) { + /* Workaround bug 24006. Use Trrd_l. */ + lmc_timing_params1.s.trrd = divide_roundup(ddr4_tRRD_Lmin, tclk_psecs) - 2; + } else + lmc_timing_params1.s.trrd = divide_roundup(trrd, tclk_psecs) - 2; + + /* + ** tXP = max( 3nCK, 7.5 ns) DDR3-800 tCLK = 2500 psec + ** tXP = max( 3nCK, 7.5 ns) DDR3-1066 tCLK = 1875 psec + ** tXP = max( 3nCK, 6.0 ns) DDR3-1333 tCLK = 1500 psec + ** tXP = max( 3nCK, 6.0 ns) DDR3-1600 tCLK = 1250 psec + ** tXP = max( 3nCK, 6.0 ns) DDR3-1866 tCLK = 1071 psec + ** tXP = max( 3nCK, 6.0 ns) DDR3-2133 tCLK = 937 psec + */ + txp = (tclk_psecs < 1875) ? 6000 : 7500; + // NOTE: this is reworked for pass 2.x + int temp_txp = divide_roundup(max(3*tclk_psecs, (unsigned)txp), tclk_psecs) - 1; +#if 1 + if (temp_txp > 7) + ddr_print("TIMING_PARAMS1[txp]: need extension bit for 0x%x\n", temp_txp); +#endif + if (CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X) && (temp_txp > 7)) { + temp_txp = 7; // max it out + } + lmc_timing_params1.s.txp = temp_txp & 7; + lmc_timing_params1.s.txp_ext = (temp_txp >> 3) & 1; + + lmc_timing_params1.s.twlmrd = divide_roundup(DDR3_tWLMRD*tclk_psecs, 4*tclk_psecs); + lmc_timing_params1.s.twldqsen = divide_roundup(DDR3_tWLDQSEN*tclk_psecs, 4*tclk_psecs); + lmc_timing_params1.s.tfaw = divide_roundup(tfaw, 4*tclk_psecs); + lmc_timing_params1.s.txpdll = divide_roundup(max(10*tclk_psecs, 24000ull), tclk_psecs) - 1; + + if ((ddr_type == DDR4_DRAM) && is_3ds_dimm) { + /* + 4 Gb: tRFC_DLR = 90 ns + 8 Gb: tRFC_DLR = 120 ns + 16 Gb: tRFC_DLR = 190 ns FIXME? + */ + // RNDUP[tRFC_DLR(ns) / (8 * TCYC(ns))] + if (die_capacity == 0x1000) // 4 Gbit + trfc_dlr = 90; + else if (die_capacity == 0x2000) // 8 Gbit + trfc_dlr = 120; + else if (die_capacity == 0x4000) // 16 Gbit + trfc_dlr = 190; + else + trfc_dlr = 0; + + if (trfc_dlr == 0) { + ddr_print("N%d.LMC%d: ERROR: tRFC_DLR: die_capacity %u Mbit is illegal\n", + node, ddr_interface_num, die_capacity); + } else { + lmc_timing_params1.s.trfc_dlr = divide_roundup(trfc_dlr * 1000UL, 8*tclk_psecs); + ddr_print("N%d.LMC%d: TIMING_PARAMS1[trfc_dlr] set to %u\n", + node, ddr_interface_num, lmc_timing_params1.s.trfc_dlr); + } + } + + if ((s = lookup_env_parameter_ull("ddr_timing_params1")) != NULL) { + lmc_timing_params1.u = strtoull(s, NULL, 0); + } + ddr_print("TIMING_PARAMS1 : 0x%016lx\n", lmc_timing_params1.u); + DRAM_CSR_WRITE(node, BDK_LMCX_TIMING_PARAMS1(ddr_interface_num), lmc_timing_params1.u); + } + + /* LMC(0)_TIMING_PARAMS2 */ + if (ddr_type == DDR4_DRAM) { + bdk_lmcx_timing_params1_t lmc_timing_params1; + bdk_lmcx_timing_params2_t lmc_timing_params2; + lmc_timing_params1.u = BDK_CSR_READ(node, BDK_LMCX_TIMING_PARAMS1(ddr_interface_num)); + lmc_timing_params2.u = BDK_CSR_READ(node, BDK_LMCX_TIMING_PARAMS2(ddr_interface_num)); + ddr_print("TIMING_PARAMS2 : 0x%016lx\n", lmc_timing_params2.u); + + //lmc_timing_params2.s.trrd_l = divide_roundup(ddr4_tRRD_Lmin, tclk_psecs) - 1; + // NOTE: this is reworked for pass 2.x + int temp_trrd_l = divide_roundup(ddr4_tRRD_Lmin, tclk_psecs) - 2; +#if 1 + if (temp_trrd_l > 7) + ddr_print("TIMING_PARAMS2[trrd_l]: need extension bit for 0x%x\n", temp_trrd_l); +#endif + if (CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X) && (temp_trrd_l > 7)) { + temp_trrd_l = 7; // max it out + } + lmc_timing_params2.s.trrd_l = temp_trrd_l & 7; + lmc_timing_params2.s.trrd_l_ext = (temp_trrd_l >> 3) & 1; + + lmc_timing_params2.s.twtr_l = divide_nint(max(4*tclk_psecs, 7500ull), tclk_psecs) - 1; // correct for 1600-2400 + lmc_timing_params2.s.t_rw_op_max = 7; + lmc_timing_params2.s.trtp = divide_roundup(max(4*tclk_psecs, 7500ull), tclk_psecs) - 1; + + ddr_print("TIMING_PARAMS2 : 0x%016lx\n", lmc_timing_params2.u); + DRAM_CSR_WRITE(node, BDK_LMCX_TIMING_PARAMS2(ddr_interface_num), lmc_timing_params2.u); + + /* Workaround Errata 25823 - LMC: Possible DDR4 tWTR_L not met + for Write-to-Read operations to the same Bank Group */ + if (lmc_timing_params1.s.twtr < (lmc_timing_params2.s.twtr_l - 4)) { + lmc_timing_params1.s.twtr = lmc_timing_params2.s.twtr_l - 4; + ddr_print("ERRATA 25823: NEW: TWTR: %d, TWTR_L: %d\n", lmc_timing_params1.s.twtr, lmc_timing_params2.s.twtr_l); + ddr_print("TIMING_PARAMS1 : 0x%016lx\n", lmc_timing_params1.u); + DRAM_CSR_WRITE(node, BDK_LMCX_TIMING_PARAMS1(ddr_interface_num), lmc_timing_params1.u); + } + } + + /* LMC(0)_MODEREG_PARAMS0 */ + { + bdk_lmcx_modereg_params0_t lmc_modereg_params0; + int param; + + lmc_modereg_params0.u = BDK_CSR_READ(node, BDK_LMCX_MODEREG_PARAMS0(ddr_interface_num)); + + if (ddr_type == DDR4_DRAM) { + lmc_modereg_params0.s.cwl = 0; /* 1600 (1250ps) */ + if (tclk_psecs < 1250) + lmc_modereg_params0.s.cwl = 1; /* 1866 (1072ps) */ + if (tclk_psecs < 1072) + lmc_modereg_params0.s.cwl = 2; /* 2133 (938ps) */ + if (tclk_psecs < 938) + lmc_modereg_params0.s.cwl = 3; /* 2400 (833ps) */ + if (tclk_psecs < 833) + lmc_modereg_params0.s.cwl = 4; /* 2666 (750ps) */ + if (tclk_psecs < 750) + lmc_modereg_params0.s.cwl = 5; /* 3200 (625ps) */ + } else { + /* + ** CSR CWL CAS write Latency + ** === === ================================= + ** 0 5 ( tCK(avg) >= 2.5 ns) + ** 1 6 (2.5 ns > tCK(avg) >= 1.875 ns) + ** 2 7 (1.875 ns > tCK(avg) >= 1.5 ns) + ** 3 8 (1.5 ns > tCK(avg) >= 1.25 ns) + ** 4 9 (1.25 ns > tCK(avg) >= 1.07 ns) + ** 5 10 (1.07 ns > tCK(avg) >= 0.935 ns) + ** 6 11 (0.935 ns > tCK(avg) >= 0.833 ns) + ** 7 12 (0.833 ns > tCK(avg) >= 0.75 ns) + */ + + lmc_modereg_params0.s.cwl = 0; + if (tclk_psecs < 2500) + lmc_modereg_params0.s.cwl = 1; + if (tclk_psecs < 1875) + lmc_modereg_params0.s.cwl = 2; + if (tclk_psecs < 1500) + lmc_modereg_params0.s.cwl = 3; + if (tclk_psecs < 1250) + lmc_modereg_params0.s.cwl = 4; + if (tclk_psecs < 1070) + lmc_modereg_params0.s.cwl = 5; + if (tclk_psecs < 935) + lmc_modereg_params0.s.cwl = 6; + if (tclk_psecs < 833) + lmc_modereg_params0.s.cwl = 7; + } + + if ((s = lookup_env_parameter("ddr_cwl")) != NULL) { + lmc_modereg_params0.s.cwl = strtoul(s, NULL, 0) - 5; + } + + if (ddr_type == DDR4_DRAM) { + ddr_print("%-45s : %d, [0x%x]\n", "CAS Write Latency CWL, [CSR]", + lmc_modereg_params0.s.cwl + 9 + + ((lmc_modereg_params0.s.cwl>2) ? (lmc_modereg_params0.s.cwl-3) * 2 : 0), + lmc_modereg_params0.s.cwl); + } else { + ddr_print("%-45s : %d, [0x%x]\n", "CAS Write Latency CWL, [CSR]", + lmc_modereg_params0.s.cwl + 5, + lmc_modereg_params0.s.cwl); + } + + lmc_modereg_params0.s.mprloc = 0; + lmc_modereg_params0.s.mpr = 0; + lmc_modereg_params0.s.dll = (ddr_type == DDR4_DRAM)?1:0; /* disable(0) for DDR3 and enable(1) for DDR4 */ + lmc_modereg_params0.s.al = 0; + lmc_modereg_params0.s.wlev = 0; /* Read Only */ + lmc_modereg_params0.s.tdqs = ((ddr_type == DDR4_DRAM) || (dram_width != 8))?0:1; /* disable(0) for DDR4 and x4/x16 DDR3 */ + lmc_modereg_params0.s.qoff = 0; + //lmc_modereg_params0.s.bl = 0; /* Don't touch block dirty logic */ + + if ((s = lookup_env_parameter("ddr_cl")) != NULL) { + CL = strtoul(s, NULL, 0); + ddr_print("CAS Latency : %6d\n", CL); + } + + if (ddr_type == DDR4_DRAM) { + lmc_modereg_params0.s.cl = 0x0; + if (CL > 9) + lmc_modereg_params0.s.cl = 0x1; + if (CL > 10) + lmc_modereg_params0.s.cl = 0x2; + if (CL > 11) + lmc_modereg_params0.s.cl = 0x3; + if (CL > 12) + lmc_modereg_params0.s.cl = 0x4; + if (CL > 13) + lmc_modereg_params0.s.cl = 0x5; + if (CL > 14) + lmc_modereg_params0.s.cl = 0x6; + if (CL > 15) + lmc_modereg_params0.s.cl = 0x7; + if (CL > 16) + lmc_modereg_params0.s.cl = 0x8; + if (CL > 18) + lmc_modereg_params0.s.cl = 0x9; + if (CL > 20) + lmc_modereg_params0.s.cl = 0xA; + if (CL > 24) + lmc_modereg_params0.s.cl = 0xB; + } else { + lmc_modereg_params0.s.cl = 0x2; + if (CL > 5) + lmc_modereg_params0.s.cl = 0x4; + if (CL > 6) + lmc_modereg_params0.s.cl = 0x6; + if (CL > 7) + lmc_modereg_params0.s.cl = 0x8; + if (CL > 8) + lmc_modereg_params0.s.cl = 0xA; + if (CL > 9) + lmc_modereg_params0.s.cl = 0xC; + if (CL > 10) + lmc_modereg_params0.s.cl = 0xE; + if (CL > 11) + lmc_modereg_params0.s.cl = 0x1; + if (CL > 12) + lmc_modereg_params0.s.cl = 0x3; + if (CL > 13) + lmc_modereg_params0.s.cl = 0x5; + if (CL > 14) + lmc_modereg_params0.s.cl = 0x7; + if (CL > 15) + lmc_modereg_params0.s.cl = 0x9; + } + + lmc_modereg_params0.s.rbt = 0; /* Read Only. */ + lmc_modereg_params0.s.tm = 0; + lmc_modereg_params0.s.dllr = 0; + + param = divide_roundup(twr, tclk_psecs); + + if (ddr_type == DDR4_DRAM) { /* DDR4 */ + lmc_modereg_params0.s.wrp = 1; + if (param > 12) + lmc_modereg_params0.s.wrp = 2; + if (param > 14) + lmc_modereg_params0.s.wrp = 3; + if (param > 16) + lmc_modereg_params0.s.wrp = 4; + if (param > 18) + lmc_modereg_params0.s.wrp = 5; + if (param > 20) + lmc_modereg_params0.s.wrp = 6; + if (param > 24) /* RESERVED in DDR4 spec */ + lmc_modereg_params0.s.wrp = 7; + } else { /* DDR3 */ + lmc_modereg_params0.s.wrp = 1; + if (param > 5) + lmc_modereg_params0.s.wrp = 2; + if (param > 6) + lmc_modereg_params0.s.wrp = 3; + if (param > 7) + lmc_modereg_params0.s.wrp = 4; + if (param > 8) + lmc_modereg_params0.s.wrp = 5; + if (param > 10) + lmc_modereg_params0.s.wrp = 6; + if (param > 12) + lmc_modereg_params0.s.wrp = 7; + } + + lmc_modereg_params0.s.ppd = 0; + + if ((s = lookup_env_parameter("ddr_wrp")) != NULL) { + lmc_modereg_params0.s.wrp = strtoul(s, NULL, 0); + } + + ddr_print("%-45s : %d, [0x%x]\n", "Write recovery for auto precharge WRP, [CSR]", + param, lmc_modereg_params0.s.wrp); + + if ((s = lookup_env_parameter_ull("ddr_modereg_params0")) != NULL) { + lmc_modereg_params0.u = strtoull(s, NULL, 0); + } + ddr_print("MODEREG_PARAMS0 : 0x%016lx\n", lmc_modereg_params0.u); + DRAM_CSR_WRITE(node, BDK_LMCX_MODEREG_PARAMS0(ddr_interface_num), lmc_modereg_params0.u); + } + + /* LMC(0)_MODEREG_PARAMS1 */ + { + bdk_lmcx_modereg_params1_t lmc_modereg_params1; + + lmc_modereg_params1.u = odt_config[odt_idx].odt_mask1.u; + +#ifdef CAVIUM_ONLY + /* Special request: mismatched DIMM support. Slot 0: 2-Rank, Slot 1: 1-Rank */ + if (rank_mask == 0x7) { /* 2-Rank, 1-Rank */ + lmc_modereg_params1.s.rtt_nom_00 = 0; + lmc_modereg_params1.s.rtt_nom_01 = 3; /* rttnom_40ohm */ + lmc_modereg_params1.s.rtt_nom_10 = 3; /* rttnom_40ohm */ + lmc_modereg_params1.s.rtt_nom_11 = 0; + dyn_rtt_nom_mask = 0x6; + } +#endif /* CAVIUM_ONLY */ + + if ((s = lookup_env_parameter("ddr_rtt_nom_mask")) != NULL) { + dyn_rtt_nom_mask = strtoul(s, NULL, 0); + } + + + /* Save the original rtt_nom settings before sweeping through settings. */ + default_rtt_nom[0] = lmc_modereg_params1.s.rtt_nom_00; + default_rtt_nom[1] = lmc_modereg_params1.s.rtt_nom_01; + default_rtt_nom[2] = lmc_modereg_params1.s.rtt_nom_10; + default_rtt_nom[3] = lmc_modereg_params1.s.rtt_nom_11; + + ddr_rtt_nom_auto = custom_lmc_config->ddr_rtt_nom_auto; + + for (i=0; i<4; ++i) { + uint64_t value; + if ((s = lookup_env_parameter("ddr_rtt_nom_%1d%1d", !!(i&2), !!(i&1))) == NULL) + s = lookup_env_parameter("ddr%d_rtt_nom_%1d%1d", ddr_interface_num, !!(i&2), !!(i&1)); + if (s != NULL) { + value = strtoul(s, NULL, 0); + lmc_modereg_params1.u &= ~((uint64_t)0x7 << (i*12+9)); + lmc_modereg_params1.u |= ( (value & 0x7) << (i*12+9)); + default_rtt_nom[i] = value; + ddr_rtt_nom_auto = 0; + } + } + + if ((s = lookup_env_parameter("ddr_rtt_nom")) == NULL) + s = lookup_env_parameter("ddr%d_rtt_nom", ddr_interface_num); + if (s != NULL) { + uint64_t value; + value = strtoul(s, NULL, 0); + + if (dyn_rtt_nom_mask & 1) + default_rtt_nom[0] = lmc_modereg_params1.s.rtt_nom_00 = value; + if (dyn_rtt_nom_mask & 2) + default_rtt_nom[1] = lmc_modereg_params1.s.rtt_nom_01 = value; + if (dyn_rtt_nom_mask & 4) + default_rtt_nom[2] = lmc_modereg_params1.s.rtt_nom_10 = value; + if (dyn_rtt_nom_mask & 8) + default_rtt_nom[3] = lmc_modereg_params1.s.rtt_nom_11 = value; + + ddr_rtt_nom_auto = 0; + } + + if ((s = lookup_env_parameter("ddr_rtt_wr")) != NULL) { + uint64_t value = strtoul(s, NULL, 0); + for (i=0; i<4; ++i) { + INSRT_WR(&lmc_modereg_params1.u, i, value); + } + } + + for (i = 0; i < 4; ++i) { + uint64_t value; + if ((s = lookup_env_parameter("ddr_rtt_wr_%1d%1d", !!(i&2), !!(i&1))) == NULL) + s = lookup_env_parameter("ddr%d_rtt_wr_%1d%1d", ddr_interface_num, !!(i&2), !!(i&1)); + if (s != NULL) { + value = strtoul(s, NULL, 0); + INSRT_WR(&lmc_modereg_params1.u, i, value); + } + } + + // Make sure pass 1 has valid RTT_WR settings, because + // configuration files may be set-up for pass 2, and + // pass 1 supports no RTT_WR extension bits + if (CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X)) { + for (i = 0; i < 4; ++i) { + if (EXTR_WR(lmc_modereg_params1.u, i) > 3) { // if 80 or undefined + INSRT_WR(&lmc_modereg_params1.u, i, 1); // FIXME? always insert 120 + ddr_print("RTT_WR_%d%d set to 120 for CN88XX pass 1\n", !!(i&2), i&1); + } + } + } + if ((s = lookup_env_parameter("ddr_dic")) != NULL) { + uint64_t value = strtoul(s, NULL, 0); + for (i=0; i<4; ++i) { + lmc_modereg_params1.u &= ~((uint64_t)0x3 << (i*12+7)); + lmc_modereg_params1.u |= ( (value & 0x3) << (i*12+7)); + } + } + + for (i=0; i<4; ++i) { + uint64_t value; + if ((s = lookup_env_parameter("ddr_dic_%1d%1d", !!(i&2), !!(i&1))) != NULL) { + value = strtoul(s, NULL, 0); + lmc_modereg_params1.u &= ~((uint64_t)0x3 << (i*12+7)); + lmc_modereg_params1.u |= ( (value & 0x3) << (i*12+7)); + } + } + + if ((s = lookup_env_parameter_ull("ddr_modereg_params1")) != NULL) { + lmc_modereg_params1.u = strtoull(s, NULL, 0); + } + + ddr_print("RTT_NOM %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n", + imp_values->rtt_nom_ohms[lmc_modereg_params1.s.rtt_nom_11], + imp_values->rtt_nom_ohms[lmc_modereg_params1.s.rtt_nom_10], + imp_values->rtt_nom_ohms[lmc_modereg_params1.s.rtt_nom_01], + imp_values->rtt_nom_ohms[lmc_modereg_params1.s.rtt_nom_00], + lmc_modereg_params1.s.rtt_nom_11, + lmc_modereg_params1.s.rtt_nom_10, + lmc_modereg_params1.s.rtt_nom_01, + lmc_modereg_params1.s.rtt_nom_00); + + ddr_print("RTT_WR %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n", + imp_values->rtt_wr_ohms[EXTR_WR(lmc_modereg_params1.u, 3)], + imp_values->rtt_wr_ohms[EXTR_WR(lmc_modereg_params1.u, 2)], + imp_values->rtt_wr_ohms[EXTR_WR(lmc_modereg_params1.u, 1)], + imp_values->rtt_wr_ohms[EXTR_WR(lmc_modereg_params1.u, 0)], + EXTR_WR(lmc_modereg_params1.u, 3), + EXTR_WR(lmc_modereg_params1.u, 2), + EXTR_WR(lmc_modereg_params1.u, 1), + EXTR_WR(lmc_modereg_params1.u, 0)); + + ddr_print("DIC %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n", + imp_values->dic_ohms[lmc_modereg_params1.s.dic_11], + imp_values->dic_ohms[lmc_modereg_params1.s.dic_10], + imp_values->dic_ohms[lmc_modereg_params1.s.dic_01], + imp_values->dic_ohms[lmc_modereg_params1.s.dic_00], + lmc_modereg_params1.s.dic_11, + lmc_modereg_params1.s.dic_10, + lmc_modereg_params1.s.dic_01, + lmc_modereg_params1.s.dic_00); + + ddr_print("MODEREG_PARAMS1 : 0x%016lx\n", lmc_modereg_params1.u); + DRAM_CSR_WRITE(node, BDK_LMCX_MODEREG_PARAMS1(ddr_interface_num), lmc_modereg_params1.u); + + } /* LMC(0)_MODEREG_PARAMS1 */ + + /* LMC(0)_MODEREG_PARAMS2 */ + if (ddr_type == DDR4_DRAM) { + bdk_lmcx_modereg_params2_t lmc_modereg_params2; + lmc_modereg_params2.u = odt_config[odt_idx].odt_mask2.u; + + for (i=0; i<4; ++i) { + uint64_t value; + if ((s = lookup_env_parameter("ddr_rtt_park_%1d%1d", !!(i&2), !!(i&1))) != NULL) { + value = strtoul(s, NULL, 0); + lmc_modereg_params2.u &= ~((uint64_t)0x7 << (i*10+0)); + lmc_modereg_params2.u |= ( (value & 0x7) << (i*10+0)); + } + } + + if ((s = lookup_env_parameter("ddr_rtt_park")) != NULL) { + uint64_t value = strtoul(s, NULL, 0); + for (i=0; i<4; ++i) { + lmc_modereg_params2.u &= ~((uint64_t)0x7 << (i*10+0)); + lmc_modereg_params2.u |= ( (value & 0x7) << (i*10+0)); + } + } + + if ((s = lookup_env_parameter_ull("ddr_modereg_params2")) != NULL) { + lmc_modereg_params2.u = strtoull(s, NULL, 0); + } + + ddr_print("RTT_PARK %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n", + imp_values->rtt_nom_ohms[lmc_modereg_params2.s.rtt_park_11], + imp_values->rtt_nom_ohms[lmc_modereg_params2.s.rtt_park_10], + imp_values->rtt_nom_ohms[lmc_modereg_params2.s.rtt_park_01], + imp_values->rtt_nom_ohms[lmc_modereg_params2.s.rtt_park_00], + lmc_modereg_params2.s.rtt_park_11, + lmc_modereg_params2.s.rtt_park_10, + lmc_modereg_params2.s.rtt_park_01, + lmc_modereg_params2.s.rtt_park_00); + + ddr_print("%-45s : 0x%x,0x%x,0x%x,0x%x\n", "VREF_RANGE", + lmc_modereg_params2.s.vref_range_11, + lmc_modereg_params2.s.vref_range_10, + lmc_modereg_params2.s.vref_range_01, + lmc_modereg_params2.s.vref_range_00); + + ddr_print("%-45s : 0x%x,0x%x,0x%x,0x%x\n", "VREF_VALUE", + lmc_modereg_params2.s.vref_value_11, + lmc_modereg_params2.s.vref_value_10, + lmc_modereg_params2.s.vref_value_01, + lmc_modereg_params2.s.vref_value_00); + + ddr_print("MODEREG_PARAMS2 : 0x%016lx\n", lmc_modereg_params2.u); + DRAM_CSR_WRITE(node, BDK_LMCX_MODEREG_PARAMS2(ddr_interface_num), lmc_modereg_params2.u); + + } /* LMC(0)_MODEREG_PARAMS2 */ + + /* LMC(0)_MODEREG_PARAMS3 */ + if (ddr_type == DDR4_DRAM) { + bdk_lmcx_modereg_params3_t lmc_modereg_params3; + + lmc_modereg_params3.u = BDK_CSR_READ(node, BDK_LMCX_MODEREG_PARAMS3(ddr_interface_num)); + + //lmc_modereg_params3.s.max_pd = + //lmc_modereg_params3.s.tc_ref = + //lmc_modereg_params3.s.vref_mon = + //lmc_modereg_params3.s.cal = + //lmc_modereg_params3.s.sre_abort = + //lmc_modereg_params3.s.rd_preamble = + //lmc_modereg_params3.s.wr_preamble = + //lmc_modereg_params3.s.par_lat_mode = + //lmc_modereg_params3.s.odt_pd = + //lmc_modereg_params3.s.ca_par_pers = + //lmc_modereg_params3.s.dm = + //lmc_modereg_params3.s.wr_dbi = + //lmc_modereg_params3.s.rd_dbi = + lmc_modereg_params3.s.tccd_l = max(divide_roundup(ddr4_tCCD_Lmin, tclk_psecs), 5ull) - 4; + //lmc_modereg_params3.s.lpasr = + //lmc_modereg_params3.s.crc = + //lmc_modereg_params3.s.gd = + //lmc_modereg_params3.s.pda = + //lmc_modereg_params3.s.temp_sense = + //lmc_modereg_params3.s.fgrm = + //lmc_modereg_params3.s.wr_cmd_lat = + //lmc_modereg_params3.s.mpr_fmt = + + if (!CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X)) { + int delay = 0; + if ((lranks_per_prank == 4) && (ddr_hertz >= 1000000000)) + delay = 1; + lmc_modereg_params3.s.xrank_add_tccd_l = delay; + lmc_modereg_params3.s.xrank_add_tccd_s = delay; + } + + ddr_print("MODEREG_PARAMS3 : 0x%016lx\n", lmc_modereg_params3.u); + DRAM_CSR_WRITE(node, BDK_LMCX_MODEREG_PARAMS3(ddr_interface_num), lmc_modereg_params3.u); + } /* LMC(0)_MODEREG_PARAMS3 */ + + /* LMC(0)_NXM */ + { + bdk_lmcx_nxm_t lmc_nxm; + int num_bits = row_lsb + row_bits + lranks_bits - 26; + lmc_nxm.u = BDK_CSR_READ(node, BDK_LMCX_NXM(ddr_interface_num)); + + if (rank_mask & 0x1) + lmc_nxm.s.mem_msb_d0_r0 = num_bits; + if (rank_mask & 0x2) + lmc_nxm.s.mem_msb_d0_r1 = num_bits; + if (rank_mask & 0x4) + lmc_nxm.s.mem_msb_d1_r0 = num_bits; + if (rank_mask & 0x8) + lmc_nxm.s.mem_msb_d1_r1 = num_bits; + + lmc_nxm.s.cs_mask = ~rank_mask & 0xff; /* Set the mask for non-existant ranks. */ + + if ((s = lookup_env_parameter_ull("ddr_nxm")) != NULL) { + lmc_nxm.u = strtoull(s, NULL, 0); + } + ddr_print("LMC_NXM : 0x%016lx\n", lmc_nxm.u); + DRAM_CSR_WRITE(node, BDK_LMCX_NXM(ddr_interface_num), lmc_nxm.u); + } + + /* LMC(0)_WODT_MASK */ + { + bdk_lmcx_wodt_mask_t lmc_wodt_mask; + lmc_wodt_mask.u = odt_config[odt_idx].odt_mask; + + if ((s = lookup_env_parameter_ull("ddr_wodt_mask")) != NULL) { + lmc_wodt_mask.u = strtoull(s, NULL, 0); + } + + ddr_print("WODT_MASK : 0x%016lx\n", lmc_wodt_mask.u); + DRAM_CSR_WRITE(node, BDK_LMCX_WODT_MASK(ddr_interface_num), lmc_wodt_mask.u); + } + + /* LMC(0)_RODT_MASK */ + { + int rankx; + bdk_lmcx_rodt_mask_t lmc_rodt_mask; + lmc_rodt_mask.u = odt_config[odt_idx].rodt_ctl; + + if ((s = lookup_env_parameter_ull("ddr_rodt_mask")) != NULL) { + lmc_rodt_mask.u = strtoull(s, NULL, 0); + } + + ddr_print("%-45s : 0x%016lx\n", "RODT_MASK", lmc_rodt_mask.u); + DRAM_CSR_WRITE(node, BDK_LMCX_RODT_MASK(ddr_interface_num), lmc_rodt_mask.u); + + dyn_rtt_nom_mask = 0; + for (rankx = 0; rankx < dimm_count * 4;rankx++) { + if (!(rank_mask & (1 << rankx))) + continue; + dyn_rtt_nom_mask |= ((lmc_rodt_mask.u >> (8*rankx)) & 0xff); + } + if (num_ranks == 4) { + /* Normally ODT1 is wired to rank 1. For quad-ranked DIMMs + ODT1 is wired to the third rank (rank 2). The mask, + dyn_rtt_nom_mask, is used to indicate for which ranks + to sweep RTT_NOM during read-leveling. Shift the bit + from the ODT1 position over to the "ODT2" position so + that the read-leveling analysis comes out right. */ + int odt1_bit = dyn_rtt_nom_mask & 2; + dyn_rtt_nom_mask &= ~2; + dyn_rtt_nom_mask |= odt1_bit<<1; + } + ddr_print("%-45s : 0x%02x\n", "DYN_RTT_NOM_MASK", dyn_rtt_nom_mask); + } + + /* LMC(0)_COMP_CTL2 */ + { + bdk_lmcx_comp_ctl2_t comp_ctl2; + + comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num)); + + comp_ctl2.s.dqx_ctl = odt_config[odt_idx].odt_ena; + comp_ctl2.s.ck_ctl = (custom_lmc_config->ck_ctl == 0) ? 4 : custom_lmc_config->ck_ctl; /* Default 4=34.3 ohm */ + comp_ctl2.s.cmd_ctl = (custom_lmc_config->cmd_ctl == 0) ? 4 : custom_lmc_config->cmd_ctl; /* Default 4=34.3 ohm */ + comp_ctl2.s.control_ctl = (custom_lmc_config->ctl_ctl == 0) ? 4 : custom_lmc_config->ctl_ctl; /* Default 4=34.3 ohm */ + + // NOTE: these are now done earlier, in Step 6.9.3 + // comp_ctl2.s.ntune_offset = 0; + // comp_ctl2.s.ptune_offset = 0; + + ddr_rodt_ctl_auto = custom_lmc_config->ddr_rodt_ctl_auto; + if ((s = lookup_env_parameter("ddr_rodt_ctl_auto")) != NULL) { + ddr_rodt_ctl_auto = !!strtoul(s, NULL, 0); + } + + default_rodt_ctl = odt_config[odt_idx].qs_dic; + if ((s = lookup_env_parameter("ddr_rodt_ctl")) == NULL) + s = lookup_env_parameter("ddr%d_rodt_ctl", ddr_interface_num); + if (s != NULL) { + default_rodt_ctl = strtoul(s, NULL, 0); + ddr_rodt_ctl_auto = 0; + } + + comp_ctl2.s.rodt_ctl = default_rodt_ctl; + + // if DDR4, force CK_CTL to 26 ohms if it is currently 34 ohms, and DCLK speed is 1 GHz or more... + if ((ddr_type == DDR4_DRAM) && (comp_ctl2.s.ck_ctl == ddr4_driver_34_ohm) && (ddr_hertz >= 1000000000)) { + comp_ctl2.s.ck_ctl = ddr4_driver_26_ohm; // lowest for DDR4 is 26 ohms + ddr_print("Forcing DDR4 COMP_CTL2[CK_CTL] to %d, %d ohms\n", comp_ctl2.s.ck_ctl, + imp_values->drive_strength[comp_ctl2.s.ck_ctl]); + } + + if ((s = lookup_env_parameter("ddr_ck_ctl")) != NULL) { + comp_ctl2.s.ck_ctl = strtoul(s, NULL, 0); + } + + if ((s = lookup_env_parameter("ddr_cmd_ctl")) != NULL) { + comp_ctl2.s.cmd_ctl = strtoul(s, NULL, 0); + } + + if ((s = lookup_env_parameter("ddr_control_ctl")) != NULL) { + comp_ctl2.s.control_ctl = strtoul(s, NULL, 0); + } + + if ((s = lookup_env_parameter("ddr_dqx_ctl")) != NULL) { + comp_ctl2.s.dqx_ctl = strtoul(s, NULL, 0); + } + + ddr_print("%-45s : %d, %d ohms\n", "DQX_CTL ", comp_ctl2.s.dqx_ctl, + imp_values->dqx_strength [comp_ctl2.s.dqx_ctl ]); + ddr_print("%-45s : %d, %d ohms\n", "CK_CTL ", comp_ctl2.s.ck_ctl, + imp_values->drive_strength[comp_ctl2.s.ck_ctl ]); + ddr_print("%-45s : %d, %d ohms\n", "CMD_CTL ", comp_ctl2.s.cmd_ctl, + imp_values->drive_strength[comp_ctl2.s.cmd_ctl ]); + ddr_print("%-45s : %d, %d ohms\n", "CONTROL_CTL ", comp_ctl2.s.control_ctl, + imp_values->drive_strength[comp_ctl2.s.control_ctl]); + ddr_print("Read ODT_CTL : 0x%x (%d ohms)\n", + comp_ctl2.s.rodt_ctl, imp_values->rodt_ohms[comp_ctl2.s.rodt_ctl]); + + DRAM_CSR_WRITE(node, BDK_LMCX_COMP_CTL2(ddr_interface_num), comp_ctl2.u); + } + + /* LMC(0)_PHY_CTL */ + { + bdk_lmcx_phy_ctl_t lmc_phy_ctl; + lmc_phy_ctl.u = BDK_CSR_READ(node, BDK_LMCX_PHY_CTL(ddr_interface_num)); + lmc_phy_ctl.s.ts_stagger = 0; + + if (!CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X) && (lranks_per_prank > 1)) { + lmc_phy_ctl.cn81xx.c0_sel = lmc_phy_ctl.cn81xx.c1_sel = 2; // C0 is TEN, C1 is A17 + ddr_print("N%d.LMC%d: 3DS: setting PHY_CTL[cx_csel] = %d\n", + node, ddr_interface_num, lmc_phy_ctl.cn81xx.c1_sel); + } + + ddr_print("PHY_CTL : 0x%016lx\n", lmc_phy_ctl.u); + DRAM_CSR_WRITE(node, BDK_LMCX_PHY_CTL(ddr_interface_num), lmc_phy_ctl.u); + } + + /* LMC(0)_DIMM0/1_PARAMS */ + if (spd_rdimm) { + bdk_lmcx_dimm_ctl_t lmc_dimm_ctl; + + for (didx = 0; didx < (unsigned)dimm_count; ++didx) { + bdk_lmcx_dimmx_params_t lmc_dimmx_params; + int dimm = didx; + int rc; + + lmc_dimmx_params.u = BDK_CSR_READ(node, BDK_LMCX_DIMMX_PARAMS(ddr_interface_num, dimm)); + + + if (ddr_type == DDR4_DRAM) { + + bdk_lmcx_dimmx_ddr4_params0_t lmc_dimmx_ddr4_params0; + bdk_lmcx_dimmx_ddr4_params1_t lmc_dimmx_ddr4_params1; + bdk_lmcx_ddr4_dimm_ctl_t lmc_ddr4_dimm_ctl; + + lmc_dimmx_params.s.rc0 = 0; + lmc_dimmx_params.s.rc1 = 0; + lmc_dimmx_params.s.rc2 = 0; + + rc = read_spd(node, &dimm_config_table[didx], DDR4_SPD_RDIMM_REGISTER_DRIVE_STRENGTH_CTL); + lmc_dimmx_params.s.rc3 = (rc >> 4) & 0xf; + lmc_dimmx_params.s.rc4 = ((rc >> 0) & 0x3) << 2; + lmc_dimmx_params.s.rc4 |= ((rc >> 2) & 0x3) << 0; + + rc = read_spd(node, &dimm_config_table[didx], DDR4_SPD_RDIMM_REGISTER_DRIVE_STRENGTH_CK); + lmc_dimmx_params.s.rc5 = ((rc >> 0) & 0x3) << 2; + lmc_dimmx_params.s.rc5 |= ((rc >> 2) & 0x3) << 0; + + lmc_dimmx_params.s.rc6 = 0; + lmc_dimmx_params.s.rc7 = 0; + lmc_dimmx_params.s.rc8 = 0; + lmc_dimmx_params.s.rc9 = 0; + + /* + ** rc10 DDR4 RDIMM Operating Speed + ** ==== ========================================================= + ** 0 tclk_psecs >= 1250 psec DDR4-1600 (1250 ps) + ** 1 1250 psec > tclk_psecs >= 1071 psec DDR4-1866 (1071 ps) + ** 2 1071 psec > tclk_psecs >= 938 psec DDR4-2133 ( 938 ps) + ** 3 938 psec > tclk_psecs >= 833 psec DDR4-2400 ( 833 ps) + ** 4 833 psec > tclk_psecs >= 750 psec DDR4-2666 ( 750 ps) + ** 5 750 psec > tclk_psecs >= 625 psec DDR4-3200 ( 625 ps) + */ + lmc_dimmx_params.s.rc10 = 0; + if (1250 > tclk_psecs) + lmc_dimmx_params.s.rc10 = 1; + if (1071 > tclk_psecs) + lmc_dimmx_params.s.rc10 = 2; + if (938 > tclk_psecs) + lmc_dimmx_params.s.rc10 = 3; + if (833 > tclk_psecs) + lmc_dimmx_params.s.rc10 = 4; + if (750 > tclk_psecs) + lmc_dimmx_params.s.rc10 = 5; + + lmc_dimmx_params.s.rc11 = 0; + lmc_dimmx_params.s.rc12 = 0; + lmc_dimmx_params.s.rc13 = (spd_dimm_type == 4) ? 0 : 4; /* 0=LRDIMM, 1=RDIMM */ + lmc_dimmx_params.s.rc13 |= (ddr_type == DDR4_DRAM) ? (spd_addr_mirror << 3) : 0; + lmc_dimmx_params.s.rc14 = 0; + //lmc_dimmx_params.s.rc15 = 4; /* 0 nCK latency adder */ + lmc_dimmx_params.s.rc15 = 0; /* 1 nCK latency adder */ + + lmc_dimmx_ddr4_params0.u = 0; + + lmc_dimmx_ddr4_params0.s.rc8x = 0; + lmc_dimmx_ddr4_params0.s.rc7x = 0; + lmc_dimmx_ddr4_params0.s.rc6x = 0; + lmc_dimmx_ddr4_params0.s.rc5x = 0; + lmc_dimmx_ddr4_params0.s.rc4x = 0; + + lmc_dimmx_ddr4_params0.s.rc3x = compute_rc3x(tclk_psecs); + + lmc_dimmx_ddr4_params0.s.rc2x = 0; + lmc_dimmx_ddr4_params0.s.rc1x = 0; + + lmc_dimmx_ddr4_params1.u = 0; + + lmc_dimmx_ddr4_params1.s.rcbx = 0; + lmc_dimmx_ddr4_params1.s.rcax = 0; + lmc_dimmx_ddr4_params1.s.rc9x = 0; + + lmc_ddr4_dimm_ctl.u = 0; + lmc_ddr4_dimm_ctl.s.ddr4_dimm0_wmask = 0x004; + lmc_ddr4_dimm_ctl.s.ddr4_dimm1_wmask = (dimm_count > 1) ? 0x004 : 0x0000; + + /* + * Handle any overrides from envvars here... + */ + if ((s = lookup_env_parameter("ddr_ddr4_params0")) != NULL) { + lmc_dimmx_ddr4_params0.u = strtoul(s, NULL, 0); + } + + if ((s = lookup_env_parameter("ddr_ddr4_params1")) != NULL) { + lmc_dimmx_ddr4_params1.u = strtoul(s, NULL, 0); + } + + if ((s = lookup_env_parameter("ddr_ddr4_dimm_ctl")) != NULL) { + lmc_ddr4_dimm_ctl.u = strtoul(s, NULL, 0); + } + + for (i=0; i<11; ++i) { + uint64_t value; + if ((s = lookup_env_parameter("ddr_ddr4_rc%1xx", i+1)) != NULL) { + value = strtoul(s, NULL, 0); + if (i < 8) { + lmc_dimmx_ddr4_params0.u &= ~((uint64_t)0xff << (i*8)); + lmc_dimmx_ddr4_params0.u |= (value << (i*8)); + } else { + lmc_dimmx_ddr4_params1.u &= ~((uint64_t)0xff << ((i-8)*8)); + lmc_dimmx_ddr4_params1.u |= (value << ((i-8)*8)); + } + } + } + + /* + * write the final CSR values + */ + DRAM_CSR_WRITE(node, BDK_LMCX_DIMMX_DDR4_PARAMS0(ddr_interface_num, dimm), lmc_dimmx_ddr4_params0.u); + + DRAM_CSR_WRITE(node, BDK_LMCX_DDR4_DIMM_CTL(ddr_interface_num), lmc_ddr4_dimm_ctl.u); + + DRAM_CSR_WRITE(node, BDK_LMCX_DIMMX_DDR4_PARAMS1(ddr_interface_num, dimm), lmc_dimmx_ddr4_params1.u); + + ddr_print("DIMM%d Register Control Words RCBx:RC1x : %x %x %x %x %x %x %x %x %x %x %x\n", + dimm, + lmc_dimmx_ddr4_params1.s.rcbx, + lmc_dimmx_ddr4_params1.s.rcax, + lmc_dimmx_ddr4_params1.s.rc9x, + lmc_dimmx_ddr4_params0.s.rc8x, + lmc_dimmx_ddr4_params0.s.rc7x, + lmc_dimmx_ddr4_params0.s.rc6x, + lmc_dimmx_ddr4_params0.s.rc5x, + lmc_dimmx_ddr4_params0.s.rc4x, + lmc_dimmx_ddr4_params0.s.rc3x, + lmc_dimmx_ddr4_params0.s.rc2x, + lmc_dimmx_ddr4_params0.s.rc1x ); + + } else { /* if (ddr_type == DDR4_DRAM) */ + rc = read_spd(node, &dimm_config_table[didx], 69); + lmc_dimmx_params.s.rc0 = (rc >> 0) & 0xf; + lmc_dimmx_params.s.rc1 = (rc >> 4) & 0xf; + + rc = read_spd(node, &dimm_config_table[didx], 70); + lmc_dimmx_params.s.rc2 = (rc >> 0) & 0xf; + lmc_dimmx_params.s.rc3 = (rc >> 4) & 0xf; + + rc = read_spd(node, &dimm_config_table[didx], 71); + lmc_dimmx_params.s.rc4 = (rc >> 0) & 0xf; + lmc_dimmx_params.s.rc5 = (rc >> 4) & 0xf; + + rc = read_spd(node, &dimm_config_table[didx], 72); + lmc_dimmx_params.s.rc6 = (rc >> 0) & 0xf; + lmc_dimmx_params.s.rc7 = (rc >> 4) & 0xf; + + rc = read_spd(node, &dimm_config_table[didx], 73); + lmc_dimmx_params.s.rc8 = (rc >> 0) & 0xf; + lmc_dimmx_params.s.rc9 = (rc >> 4) & 0xf; + + rc = read_spd(node, &dimm_config_table[didx], 74); + lmc_dimmx_params.s.rc10 = (rc >> 0) & 0xf; + lmc_dimmx_params.s.rc11 = (rc >> 4) & 0xf; + + rc = read_spd(node, &dimm_config_table[didx], 75); + lmc_dimmx_params.s.rc12 = (rc >> 0) & 0xf; + lmc_dimmx_params.s.rc13 = (rc >> 4) & 0xf; + + rc = read_spd(node, &dimm_config_table[didx], 76); + lmc_dimmx_params.s.rc14 = (rc >> 0) & 0xf; + lmc_dimmx_params.s.rc15 = (rc >> 4) & 0xf; + + + if ((s = lookup_env_parameter("ddr_clk_drive")) != NULL) { + if (strcmp(s,"light") == 0) { + lmc_dimmx_params.s.rc5 = 0x0; /* Light Drive */ + } + if (strcmp(s,"moderate") == 0) { + lmc_dimmx_params.s.rc5 = 0x5; /* Moderate Drive */ + } + if (strcmp(s,"strong") == 0) { + lmc_dimmx_params.s.rc5 = 0xA; /* Strong Drive */ + } + } + + if ((s = lookup_env_parameter("ddr_cmd_drive")) != NULL) { + if (strcmp(s,"light") == 0) { + lmc_dimmx_params.s.rc3 = 0x0; /* Light Drive */ + } + if (strcmp(s,"moderate") == 0) { + lmc_dimmx_params.s.rc3 = 0x5; /* Moderate Drive */ + } + if (strcmp(s,"strong") == 0) { + lmc_dimmx_params.s.rc3 = 0xA; /* Strong Drive */ + } + } + + if ((s = lookup_env_parameter("ddr_ctl_drive")) != NULL) { + if (strcmp(s,"light") == 0) { + lmc_dimmx_params.s.rc4 = 0x0; /* Light Drive */ + } + if (strcmp(s,"moderate") == 0) { + lmc_dimmx_params.s.rc4 = 0x5; /* Moderate Drive */ + } + } + + + /* + ** rc10 DDR3 RDIMM Operating Speed + ** ==== ========================================================= + ** 0 tclk_psecs >= 2500 psec DDR3/DDR3L-800 (default) + ** 1 2500 psec > tclk_psecs >= 1875 psec DDR3/DDR3L-1066 + ** 2 1875 psec > tclk_psecs >= 1500 psec DDR3/DDR3L-1333 + ** 3 1500 psec > tclk_psecs >= 1250 psec DDR3/DDR3L-1600 + ** 4 1250 psec > tclk_psecs >= 1071 psec DDR3-1866 + */ + lmc_dimmx_params.s.rc10 = 0; + if (2500 > tclk_psecs) + lmc_dimmx_params.s.rc10 = 1; + if (1875 > tclk_psecs) + lmc_dimmx_params.s.rc10 = 2; + if (1500 > tclk_psecs) + lmc_dimmx_params.s.rc10 = 3; + if (1250 > tclk_psecs) + lmc_dimmx_params.s.rc10 = 4; + + } /* if (ddr_type == DDR4_DRAM) */ + + if ((s = lookup_env_parameter("ddr_dimmx_params")) != NULL) { + lmc_dimmx_params.u = strtoul(s, NULL, 0); + } + + for (i=0; i<16; ++i) { + uint64_t value; + if ((s = lookup_env_parameter("ddr_rc%d", i)) != NULL) { + value = strtoul(s, NULL, 0); + lmc_dimmx_params.u &= ~((uint64_t)0xf << (i*4)); + lmc_dimmx_params.u |= ( value << (i*4)); + } + } + + DRAM_CSR_WRITE(node, BDK_LMCX_DIMMX_PARAMS(ddr_interface_num, dimm), lmc_dimmx_params.u); + + ddr_print("DIMM%d Register Control Words RC15:RC0 : %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x\n", + dimm, + lmc_dimmx_params.s.rc15, + lmc_dimmx_params.s.rc14, + lmc_dimmx_params.s.rc13, + lmc_dimmx_params.s.rc12, + lmc_dimmx_params.s.rc11, + lmc_dimmx_params.s.rc10, + lmc_dimmx_params.s.rc9 , + lmc_dimmx_params.s.rc8 , + lmc_dimmx_params.s.rc7 , + lmc_dimmx_params.s.rc6 , + lmc_dimmx_params.s.rc5 , + lmc_dimmx_params.s.rc4 , + lmc_dimmx_params.s.rc3 , + lmc_dimmx_params.s.rc2 , + lmc_dimmx_params.s.rc1 , + lmc_dimmx_params.s.rc0 ); + } /* for didx */ + + if (ddr_type == DDR4_DRAM) { + + /* LMC0_DIMM_CTL */ + lmc_dimm_ctl.u = BDK_CSR_READ(node, BDK_LMCX_DIMM_CTL(ddr_interface_num)); + lmc_dimm_ctl.s.dimm0_wmask = 0xdf3f; + lmc_dimm_ctl.s.dimm1_wmask = (dimm_count > 1) ? 0xdf3f : 0x0000; + lmc_dimm_ctl.s.tcws = 0x4e0; + lmc_dimm_ctl.cn88xx.parity = custom_lmc_config->parity; + + if ((s = lookup_env_parameter("ddr_dimm0_wmask")) != NULL) { + lmc_dimm_ctl.s.dimm0_wmask = strtoul(s, NULL, 0); + } + + if ((s = lookup_env_parameter("ddr_dimm1_wmask")) != NULL) { + lmc_dimm_ctl.s.dimm1_wmask = strtoul(s, NULL, 0); + } + + if ((s = lookup_env_parameter("ddr_dimm_ctl_parity")) != NULL) { + lmc_dimm_ctl.cn88xx.parity = strtoul(s, NULL, 0); + } + + if ((s = lookup_env_parameter("ddr_dimm_ctl_tcws")) != NULL) { + lmc_dimm_ctl.s.tcws = strtoul(s, NULL, 0); + } + + ddr_print("LMC DIMM_CTL : 0x%016lx\n", lmc_dimm_ctl.u); + DRAM_CSR_WRITE(node, BDK_LMCX_DIMM_CTL(ddr_interface_num), lmc_dimm_ctl.u); + + perform_octeon3_ddr3_sequence(node, rank_mask, + ddr_interface_num, 0x7 ); /* Init RCW */ + + /* Write RC0D last */ + lmc_dimm_ctl.s.dimm0_wmask = 0x2000; + lmc_dimm_ctl.s.dimm1_wmask = (dimm_count > 1) ? 0x2000 : 0x0000; + ddr_print("LMC DIMM_CTL : 0x%016lx\n", lmc_dimm_ctl.u); + DRAM_CSR_WRITE(node, BDK_LMCX_DIMM_CTL(ddr_interface_num), lmc_dimm_ctl.u); + + /* Don't write any extended registers the second time */ + DRAM_CSR_WRITE(node, BDK_LMCX_DDR4_DIMM_CTL(ddr_interface_num), 0); + + perform_octeon3_ddr3_sequence(node, rank_mask, + ddr_interface_num, 0x7 ); /* Init RCW */ + } else { + + /* LMC0_DIMM_CTL */ + lmc_dimm_ctl.u = BDK_CSR_READ(node, BDK_LMCX_DIMM_CTL(ddr_interface_num)); + lmc_dimm_ctl.s.dimm0_wmask = 0xffff; + lmc_dimm_ctl.s.dimm1_wmask = (dimm_count > 1) ? 0xffff : 0x0000; + lmc_dimm_ctl.s.tcws = 0x4e0; + lmc_dimm_ctl.cn88xx.parity = custom_lmc_config->parity; + + if ((s = lookup_env_parameter("ddr_dimm0_wmask")) != NULL) { + lmc_dimm_ctl.s.dimm0_wmask = strtoul(s, NULL, 0); + } + + if ((s = lookup_env_parameter("ddr_dimm1_wmask")) != NULL) { + lmc_dimm_ctl.s.dimm1_wmask = strtoul(s, NULL, 0); + } + + if ((s = lookup_env_parameter("ddr_dimm_ctl_parity")) != NULL) { + lmc_dimm_ctl.cn88xx.parity = strtoul(s, NULL, 0); + } + + if ((s = lookup_env_parameter("ddr_dimm_ctl_tcws")) != NULL) { + lmc_dimm_ctl.s.tcws = strtoul(s, NULL, 0); + } + + ddr_print("LMC DIMM_CTL : 0x%016lx\n", lmc_dimm_ctl.u); + DRAM_CSR_WRITE(node, BDK_LMCX_DIMM_CTL(ddr_interface_num), lmc_dimm_ctl.u); + + perform_octeon3_ddr3_sequence(node, rank_mask, + ddr_interface_num, 0x7 ); /* Init RCW */ + } + } else { /* if (spd_rdimm) */ + /* Disable register control writes for unbuffered */ + bdk_lmcx_dimm_ctl_t lmc_dimm_ctl; + lmc_dimm_ctl.u = BDK_CSR_READ(node, BDK_LMCX_DIMM_CTL(ddr_interface_num)); + lmc_dimm_ctl.s.dimm0_wmask = 0; + lmc_dimm_ctl.s.dimm1_wmask = 0; + DRAM_CSR_WRITE(node, BDK_LMCX_DIMM_CTL(ddr_interface_num), lmc_dimm_ctl.u); + } /* if (spd_rdimm) */ + + /* + * Comments (steps 3 through 5) continue in perform_octeon3_ddr3_sequence() + */ + { + bdk_lmcx_modereg_params0_t lmc_modereg_params0; + + if (ddr_memory_preserved(node)) { + /* Contents are being preserved. Take DRAM out of + self-refresh first. Then init steps can procede + normally */ + perform_octeon3_ddr3_sequence(node, rank_mask, + ddr_interface_num, 3); /* self-refresh exit */ + } + + lmc_modereg_params0.u = BDK_CSR_READ(node, BDK_LMCX_MODEREG_PARAMS0(ddr_interface_num)); + + lmc_modereg_params0.s.dllr = 1; /* Set during first init sequence */ + DRAM_CSR_WRITE(node, BDK_LMCX_MODEREG_PARAMS0(ddr_interface_num), lmc_modereg_params0.u); + + perform_ddr_init_sequence(node, rank_mask, ddr_interface_num); + + lmc_modereg_params0.s.dllr = 0; /* Clear for normal operation */ + DRAM_CSR_WRITE(node, BDK_LMCX_MODEREG_PARAMS0(ddr_interface_num), lmc_modereg_params0.u); + } + + // NOTE: this must be done for pass 2.x and pass 1.x + if ((spd_rdimm) && (ddr_type == DDR4_DRAM)) { + VB_PRT(VBL_FAE, "Running init sequence 1\n"); + change_rdimm_mpr_pattern(node, rank_mask, ddr_interface_num, dimm_count); + } + +#define DEFAULT_INTERNAL_VREF_TRAINING_LIMIT 5 + int internal_retries = 0; + int deskew_training_errors; + int dac_eval_retries; + int dac_settings[9]; + int num_samples; + int sample, lane; + int last_lane = ((ddr_interface_64b) ? 8 : 4) + use_ecc; + +#define DEFAULT_DAC_SAMPLES 7 // originally was 5 +#define DAC_RETRIES_LIMIT 2 + + typedef struct { + int16_t bytes[DEFAULT_DAC_SAMPLES]; + } bytelane_sample_t; + bytelane_sample_t lanes[9]; + + memset(lanes, 0, sizeof(lanes)); + + if ((ddr_type == DDR4_DRAM) && !CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X)) { + num_samples = DEFAULT_DAC_SAMPLES; + } else { + num_samples = 1; // if DDR3 or no ability to write DAC values + } + + perform_internal_vref_training: + + for (sample = 0; sample < num_samples; sample++) { + + dac_eval_retries = 0; + + do { // make offset and internal vref training repeatable + + /* 6.9.8 LMC Offset Training + LMC requires input-receiver offset training. */ + Perform_Offset_Training(node, rank_mask, ddr_interface_num); + + /* 6.9.9 LMC Internal Vref Training + LMC requires input-reference-voltage training. */ + Perform_Internal_VREF_Training(node, rank_mask, ddr_interface_num); + + // read and maybe display the DAC values for a sample + read_DAC_DBI_settings(node, ddr_interface_num, /*DAC*/1, dac_settings); + if ((num_samples == 1) || dram_is_verbose(VBL_DEV)) { + display_DAC_DBI_settings(node, ddr_interface_num, /*DAC*/1, use_ecc, + dac_settings, "Internal VREF"); + } + + // for DDR4, evaluate the DAC settings and retry if any issues + if (ddr_type == DDR4_DRAM) { + if (evaluate_DAC_settings(ddr_interface_64b, use_ecc, dac_settings)) { + if (++dac_eval_retries > DAC_RETRIES_LIMIT) { + ddr_print("N%d.LMC%d: DDR4 internal VREF DAC settings: retries exhausted; continuing...\n", + node, ddr_interface_num); + } else { + ddr_print("N%d.LMC%d: DDR4 internal VREF DAC settings inconsistent; retrying....\n", + node, ddr_interface_num); // FIXME? verbosity!!! + continue; + } + } + if (num_samples > 1) { // taking multiple samples, otherwise do nothing + // good sample or exhausted retries, record it + for (lane = 0; lane < last_lane; lane++) { + lanes[lane].bytes[sample] = dac_settings[lane]; + } + } + } + break; // done if DDR3, or good sample, or exhausted retries + + } while (1); + + } /* for (sample = 0; sample < num_samples; sample++) */ + + if (num_samples > 1) { + debug_print("N%d.LMC%d: DDR4 internal VREF DAC settings: processing multiple samples...\n", + node, ddr_interface_num); + + for (lane = 0; lane < last_lane; lane++) { + dac_settings[lane] = process_samples_average(&lanes[lane].bytes[0], num_samples, + ddr_interface_num, lane); + } + display_DAC_DBI_settings(node, ddr_interface_num, /*DAC*/1, use_ecc, dac_settings, "Averaged VREF"); + + // finally, write the final DAC values + for (lane = 0; lane < last_lane; lane++) { + load_dac_override(node, ddr_interface_num, dac_settings[lane], lane); + } + } + +#if DAC_OVERRIDE_EARLY + // as a second step, after internal VREF training, before starting deskew training: + // for DDR3 and THUNDER pass 2.x, override the DAC setting to 127 + if ((ddr_type == DDR3_DRAM) && !CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X)) { // added 81xx and 83xx + load_dac_override(node, ddr_interface_num, 127, /* all */0x0A); + ddr_print("N%d.LMC%d: Overriding DDR3 internal VREF DAC settings to 127 (early).\n", + node, ddr_interface_num); + } +#endif + + /* + * 6.9.10 LMC Read Deskew Training + * LMC requires input-read-data deskew training. + */ + if (! disable_deskew_training) { + + deskew_training_errors = Perform_Read_Deskew_Training(node, rank_mask, ddr_interface_num, + spd_rawcard_AorB, 0, ddr_interface_64b); + + // All the Deskew lock and saturation retries (may) have been done, + // but we ended up with nibble errors; so, as a last ditch effort, + // enable retries of the Internal Vref Training... + if (deskew_training_errors) { + if (internal_retries < DEFAULT_INTERNAL_VREF_TRAINING_LIMIT) { + internal_retries++; + VB_PRT(VBL_FAE, "N%d.LMC%d: Deskew training results still unsettled - retrying internal Vref training (%d)\n", + node, ddr_interface_num, internal_retries); + goto perform_internal_vref_training; + } else { + VB_PRT(VBL_FAE, "N%d.LMC%d: Deskew training incomplete - %d retries exhausted, but continuing...\n", + node, ddr_interface_num, internal_retries); + } + } + + // FIXME: treat this as the final DSK print from now on, and print if VBL_NORM or above + // also, save the results of the original training + Validate_Read_Deskew_Training(node, rank_mask, ddr_interface_num, &deskew_training_results, VBL_NORM); + + // setup write bit-deskew if enabled... + if (enable_write_deskew) { + ddr_print("N%d.LMC%d: WRITE BIT-DESKEW feature enabled- going NEUTRAL.\n", + node, ddr_interface_num); + Neutral_Write_Deskew_Setup(node, ddr_interface_num); + } /* if (enable_write_deskew) */ + + } /* if (! disable_deskew_training) */ + +#if !DAC_OVERRIDE_EARLY + // as a final step in internal VREF training, after deskew training but before HW WL: + // for DDR3 and THUNDER pass 2.x, override the DAC setting to 127 + if ((ddr_type == DDR3_DRAM) && !CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X)) { // added 81xx and 83xx + load_dac_override(node, ddr_interface_num, 127, /* all */0x0A); + ddr_print("N%d.LMC%d, Overriding DDR3 internal VREF DAC settings to 127 (late).\n", + node, ddr_interface_num); + } +#endif + + + /* LMC(0)_EXT_CONFIG */ + { + bdk_lmcx_ext_config_t ext_config; + ext_config.u = BDK_CSR_READ(node, BDK_LMCX_EXT_CONFIG(ddr_interface_num)); + ext_config.s.vrefint_seq_deskew = 0; + ext_config.s.read_ena_bprch = 1; + ext_config.s.read_ena_fprch = 1; + ext_config.s.drive_ena_fprch = 1; + ext_config.s.drive_ena_bprch = 1; + ext_config.s.invert_data = 0; // make sure this is OFF for all current chips + + if ((s = lookup_env_parameter("ddr_read_fprch")) != NULL) { + ext_config.s.read_ena_fprch = strtoul(s, NULL, 0); + } + if ((s = lookup_env_parameter("ddr_read_bprch")) != NULL) { + ext_config.s.read_ena_bprch = strtoul(s, NULL, 0); + } + if ((s = lookup_env_parameter("ddr_drive_fprch")) != NULL) { + ext_config.s.drive_ena_fprch = strtoul(s, NULL, 0); + } + if ((s = lookup_env_parameter("ddr_drive_bprch")) != NULL) { + ext_config.s.drive_ena_bprch = strtoul(s, NULL, 0); + } + + if (!CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X) && (lranks_per_prank > 1)) { + ext_config.s.dimm0_cid = ext_config.s.dimm1_cid = lranks_bits; + ddr_print("N%d.LMC%d: 3DS: setting EXT_CONFIG[dimmx_cid] = %d\n", + node, ddr_interface_num, ext_config.s.dimm0_cid); + } + + DRAM_CSR_WRITE(node, BDK_LMCX_EXT_CONFIG(ddr_interface_num), ext_config.u); + ddr_print("%-45s : 0x%016lx\n", "EXT_CONFIG", ext_config.u); + } + + + { + int save_ref_zqcs_int; + uint64_t temp_delay_usecs; + + lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(ddr_interface_num)); + + /* Temporarily select the minimum ZQCS interval and wait + long enough for a few ZQCS calibrations to occur. This + should ensure that the calibration circuitry is + stabilized before read/write leveling occurs. */ + save_ref_zqcs_int = lmc_config.s.ref_zqcs_int; + lmc_config.s.ref_zqcs_int = 1 | (32<<7); /* set smallest interval */ + + DRAM_CSR_WRITE(node, BDK_LMCX_CONFIG(ddr_interface_num), lmc_config.u); + BDK_CSR_READ(node, BDK_LMCX_CONFIG(ddr_interface_num)); + + /* Compute an appropriate delay based on the current ZQCS + interval. The delay should be long enough for the + current ZQCS delay counter to expire plus ten of the + minimum intarvals to ensure that some calibrations + occur. */ + temp_delay_usecs = (((uint64_t)save_ref_zqcs_int >> 7) + * tclk_psecs * 100 * 512 * 128) / (10000*10000) + + 10 * ((uint64_t)32 * tclk_psecs * 100 * 512 * 128) / (10000*10000); + + VB_PRT(VBL_FAE, "N%d.LMC%d: Waiting %ld usecs for ZQCS calibrations to start\n", + node, ddr_interface_num, temp_delay_usecs); + bdk_wait_usec(temp_delay_usecs); + + lmc_config.s.ref_zqcs_int = save_ref_zqcs_int; /* Restore computed interval */ + + DRAM_CSR_WRITE(node, BDK_LMCX_CONFIG(ddr_interface_num), lmc_config.u); + BDK_CSR_READ(node, BDK_LMCX_CONFIG(ddr_interface_num)); + } + + /* + * 6.9.11 LMC Write Leveling + * + * LMC supports an automatic write leveling like that described in the + * JEDEC DDR3 specifications separately per byte-lane. + * + * All of DDR PLL, LMC CK, LMC DRESET, and early LMC initializations must + * be completed prior to starting this LMC write-leveling sequence. + * + * There are many possible procedures that will write-level all the + * attached DDR3 DRAM parts. One possibility is for software to simply + * write the desired values into LMC(0)_WLEVEL_RANK(0..3). This section + * describes one possible sequence that uses LMC's autowrite-leveling + * capabilities. + * + * 1. If the DQS/DQ delays on the board may be more than the ADD/CMD + * delays, then ensure that LMC(0)_CONFIG[EARLY_DQX] is set at this + * point. + * + * Do the remaining steps 2-7 separately for each rank i with attached + * DRAM. + * + * 2. Write LMC(0)_WLEVEL_RANKi = 0. + * + * 3. For ×8 parts: + * + * Without changing any other fields in LMC(0)_WLEVEL_CTL, write + * LMC(0)_WLEVEL_CTL[LANEMASK] to select all byte lanes with attached + * DRAM. + * + * For ×16 parts: + * + * Without changing any other fields in LMC(0)_WLEVEL_CTL, write + * LMC(0)_WLEVEL_CTL[LANEMASK] to select all even byte lanes with + * attached DRAM. + * + * 4. Without changing any other fields in LMC(0)_CONFIG, + * + * o write LMC(0)_SEQ_CTL[SEQ_SEL] to select write-leveling + * + * o write LMC(0)_CONFIG[RANKMASK] = (1 << i) + * + * o write LMC(0)_SEQ_CTL[INIT_START] = 1 + * + * LMC will initiate write-leveling at this point. Assuming + * LMC(0)_WLEVEL_CTL [SSET] = 0, LMC first enables write-leveling on + * the selected DRAM rank via a DDR3 MR1 write, then sequences through + * and accumulates write-leveling results for eight different delay + * settings twice, starting at a delay of zero in this case since + * LMC(0)_WLEVEL_RANKi[BYTE*<4:3>] = 0, increasing by 1/8 CK each + * setting, covering a total distance of one CK, then disables the + * write-leveling via another DDR3 MR1 write. + * + * After the sequence through 16 delay settings is complete: + * + * o LMC sets LMC(0)_WLEVEL_RANKi[STATUS] = 3 + * + * o LMC sets LMC(0)_WLEVEL_RANKi[BYTE*<2:0>] (for all ranks selected + * by LMC(0)_WLEVEL_CTL[LANEMASK]) to indicate the first write + * leveling result of 1 that followed result of 0 during the + * sequence, except that the LMC always writes + * LMC(0)_WLEVEL_RANKi[BYTE*<0>]=0. + * + * o Software can read the eight write-leveling results from the first + * pass through the delay settings by reading + * LMC(0)_WLEVEL_DBG[BITMASK] (after writing + * LMC(0)_WLEVEL_DBG[BYTE]). (LMC does not retain the writeleveling + * results from the second pass through the eight delay + * settings. They should often be identical to the + * LMC(0)_WLEVEL_DBG[BITMASK] results, though.) + * + * 5. Wait until LMC(0)_WLEVEL_RANKi[STATUS] != 2. + * + * LMC will have updated LMC(0)_WLEVEL_RANKi[BYTE*<2:0>] for all byte + * lanes selected by LMC(0)_WLEVEL_CTL[LANEMASK] at this point. + * LMC(0)_WLEVEL_RANKi[BYTE*<4:3>] will still be the value that + * software wrote in substep 2 above, which is 0. + * + * 6. For ×16 parts: + * + * Without changing any other fields in LMC(0)_WLEVEL_CTL, write + * LMC(0)_WLEVEL_CTL[LANEMASK] to select all odd byte lanes with + * attached DRAM. + * + * Repeat substeps 4 and 5 with this new LMC(0)_WLEVEL_CTL[LANEMASK] + * setting. Skip to substep 7 if this has already been done. + * + * For ×8 parts: + * + * Skip this substep. Go to substep 7. + * + * 7. Calculate LMC(0)_WLEVEL_RANKi[BYTE*<4:3>] settings for all byte + * lanes on all ranks with attached DRAM. + * + * At this point, all byte lanes on rank i with attached DRAM should + * have been write-leveled, and LMC(0)_WLEVEL_RANKi[BYTE*<2:0>] has + * the result for each byte lane. + * + * But note that the DDR3 write-leveling sequence will only determine + * the delay modulo the CK cycle time, and cannot determine how many + * additional CK cycles of delay are present. Software must calculate + * the number of CK cycles, or equivalently, the + * LMC(0)_WLEVEL_RANKi[BYTE*<4:3>] settings. + * + * This BYTE*<4:3> calculation is system/board specific. + * + * Many techniques can be used to calculate write-leveling BYTE*<4:3> values, + * including: + * + * o Known values for some byte lanes. + * + * o Relative values for some byte lanes relative to others. + * + * For example, suppose lane X is likely to require a larger + * write-leveling delay than lane Y. A BYTEX<2:0> value that is much + * smaller than the BYTEY<2:0> value may then indicate that the + * required lane X delay wrapped into the next CK, so BYTEX<4:3> + * should be set to BYTEY<4:3>+1. + * + * When ECC DRAM is not present (i.e. when DRAM is not attached to the + * DDR_CBS_0_* and DDR_CB<7:0> chip signals, or the DDR_DQS_<4>_* and + * DDR_DQ<35:32> chip signals), write LMC(0)_WLEVEL_RANK*[BYTE8] = + * LMC(0)_WLEVEL_RANK*[BYTE0], using the final calculated BYTE0 value. + * Write LMC(0)_WLEVEL_RANK*[BYTE4] = LMC(0)_WLEVEL_RANK*[BYTE0], + * using the final calculated BYTE0 value. + * + * 8. Initialize LMC(0)_WLEVEL_RANK* values for all unused ranks. + * + * Let rank i be a rank with attached DRAM. + * + * For all ranks j that do not have attached DRAM, set + * LMC(0)_WLEVEL_RANKj = LMC(0)_WLEVEL_RANKi. + */ + { // Start HW write-leveling block +#pragma pack(push,1) + bdk_lmcx_wlevel_ctl_t wlevel_ctl; + bdk_lmcx_wlevel_rankx_t lmc_wlevel_rank; + int rankx = 0; + int wlevel_bitmask[9]; + int byte_idx; + int ecc_ena; + int ddr_wlevel_roundup = 0; + int ddr_wlevel_printall = (dram_is_verbose(VBL_FAE)); // or default to 1 to print all HW WL samples + int disable_hwl_validity = 0; + int default_wlevel_rtt_nom; +#if WODT_MASK_2R_1S + uint64_t saved_wodt_mask = 0; +#endif +#pragma pack(pop) + + if (wlevel_loops) + ddr_print("N%d.LMC%d: Performing Hardware Write-Leveling\n", node, ddr_interface_num); + else { + wlevel_bitmask_errors = 1; /* Force software write-leveling to run */ + ddr_print("N%d.LMC%d: Forcing software Write-Leveling\n", node, ddr_interface_num); + } + + default_wlevel_rtt_nom = (ddr_type == DDR3_DRAM) ? rttnom_20ohm : ddr4_rttnom_40ohm ; /* FIXME? */ + +#if WODT_MASK_2R_1S + if ((ddr_type == DDR4_DRAM) && (num_ranks == 2) && (dimm_count == 1)) { + /* LMC(0)_WODT_MASK */ + bdk_lmcx_wodt_mask_t lmc_wodt_mask; + // always save original so we can always restore later + saved_wodt_mask = BDK_CSR_READ(node, BDK_LMCX_WODT_MASK(ddr_interface_num)); + if ((s = lookup_env_parameter_ull("ddr_hwl_wodt_mask")) != NULL) { + lmc_wodt_mask.u = strtoull(s, NULL, 0); + if (lmc_wodt_mask.u != saved_wodt_mask) { // print/store only when diff + ddr_print("WODT_MASK : 0x%016lx\n", lmc_wodt_mask.u); + DRAM_CSR_WRITE(node, BDK_LMCX_WODT_MASK(ddr_interface_num), lmc_wodt_mask.u); + } + } + } +#endif /* WODT_MASK_2R_1S */ + + lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(ddr_interface_num)); + ecc_ena = lmc_config.s.ecc_ena; + + if ((s = lookup_env_parameter("ddr_wlevel_roundup")) != NULL) { + ddr_wlevel_roundup = strtoul(s, NULL, 0); + } + if ((s = lookup_env_parameter("ddr_wlevel_printall")) != NULL) { + ddr_wlevel_printall = strtoul(s, NULL, 0); + } + + if ((s = lookup_env_parameter("ddr_disable_hwl_validity")) != NULL) { + disable_hwl_validity = !!strtoul(s, NULL, 0); + } + + if ((s = lookup_env_parameter("ddr_wlevel_rtt_nom")) != NULL) { + default_wlevel_rtt_nom = strtoul(s, NULL, 0); + } + + // For DDR3, we leave the WLEVEL_CTL fields at default settings + // For DDR4, we touch WLEVEL_CTL fields OR_DIS or BITMASK here + if (ddr_type == DDR4_DRAM) { + int default_or_dis = 1; + int default_bitmask = 0xFF; + + // when x4, use only the lower nibble bits + if (dram_width == 4) { + default_bitmask = 0x0F; + VB_PRT(VBL_DEV, "N%d.LMC%d: WLEVEL_CTL: default bitmask is 0x%2x for DDR4 x4\n", + node, ddr_interface_num, default_bitmask); + } + + wlevel_ctl.u = BDK_CSR_READ(node, BDK_LMCX_WLEVEL_CTL(ddr_interface_num)); + wlevel_ctl.s.or_dis = default_or_dis; + wlevel_ctl.s.bitmask = default_bitmask; + + // allow overrides + if ((s = lookup_env_parameter("ddr_wlevel_ctl_or_dis")) != NULL) { + wlevel_ctl.s.or_dis = !!strtoul(s, NULL, 0); + } + if ((s = lookup_env_parameter("ddr_wlevel_ctl_bitmask")) != NULL) { + wlevel_ctl.s.bitmask = strtoul(s, NULL, 0); + } + + // print only if not defaults + if ((wlevel_ctl.s.or_dis != default_or_dis) || (wlevel_ctl.s.bitmask != default_bitmask)) { + ddr_print("N%d.LMC%d: WLEVEL_CTL: or_dis=%d, bitmask=0x%02x\n", + node, ddr_interface_num, wlevel_ctl.s.or_dis, wlevel_ctl.s.bitmask); + } + // always write + DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_CTL(ddr_interface_num), wlevel_ctl.u); + } + + // Start the hardware write-leveling loop per rank + for (rankx = 0; rankx < dimm_count * 4; rankx++) { + + if (!(rank_mask & (1 << rankx))) + continue; + +#if HW_WL_MAJORITY + // array to collect counts of byte-lane values + // assume low-order 3 bits and even, so really only 2 bit values + int wlevel_bytes[9][4]; + memset(wlevel_bytes, 0, sizeof(wlevel_bytes)); +#endif + + // restructure the looping so we can keep trying until we get the samples we want + //for (int wloop = 0; wloop < wlevel_loops; wloop++) { + int wloop = 0; + int wloop_retries = 0; // retries per sample for HW-related issues with bitmasks or values + int wloop_retries_total = 0; + int wloop_retries_exhausted = 0; +#define WLOOP_RETRIES_DEFAULT 5 + int wlevel_validity_errors; + int wlevel_bitmask_errors_rank = 0; + int wlevel_validity_errors_rank = 0; + + while (wloop < wlevel_loops) { + + wlevel_ctl.u = BDK_CSR_READ(node, BDK_LMCX_WLEVEL_CTL(ddr_interface_num)); + + wlevel_ctl.s.rtt_nom = (default_wlevel_rtt_nom > 0) ? (default_wlevel_rtt_nom - 1) : 7; + + + DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx), 0); /* Clear write-level delays */ + + wlevel_bitmask_errors = 0; /* Reset error counters */ + wlevel_validity_errors = 0; + + for (byte_idx=0; byte_idx<9; ++byte_idx) { + wlevel_bitmask[byte_idx] = 0; /* Reset bitmasks */ + } + +#if HWL_BY_BYTE // FIXME??? + /* Make a separate pass for each byte to reduce power. */ + for (byte_idx=0; byte_idx<(8+ecc_ena); ++byte_idx) { + + if (!(ddr_interface_bytemask&(1<<byte_idx))) + continue; + + wlevel_ctl.s.lanemask = (1<<byte_idx); + + DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_CTL(ddr_interface_num), wlevel_ctl.u); + + /* Read and write values back in order to update the + status field. This insures that we read the updated + values after write-leveling has completed. */ + DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx), + BDK_CSR_READ(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx))); + + perform_octeon3_ddr3_sequence(node, 1 << rankx, ddr_interface_num, 6); /* write-leveling */ + + if (!bdk_is_platform(BDK_PLATFORM_ASIM) && + BDK_CSR_WAIT_FOR_FIELD(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx), + status, ==, 3, 1000000)) + { + error_print("ERROR: Timeout waiting for WLEVEL\n"); + } + lmc_wlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx)); + + wlevel_bitmask[byte_idx] = octeon_read_lmcx_ddr3_wlevel_dbg(node, ddr_interface_num, byte_idx); + if (wlevel_bitmask[byte_idx] == 0) + ++wlevel_bitmask_errors; + } /* for (byte_idx=0; byte_idx<(8+ecc_ena); ++byte_idx) */ + + wlevel_ctl.s.lanemask = /*0x1ff*/ddr_interface_bytemask; // restore for RL + DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_CTL(ddr_interface_num), wlevel_ctl.u); +#else + // do all the byte-lanes at the same time + wlevel_ctl.s.lanemask = /*0x1ff*/ddr_interface_bytemask; // FIXME? + + DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_CTL(ddr_interface_num), wlevel_ctl.u); + + /* Read and write values back in order to update the + status field. This insures that we read the updated + values after write-leveling has completed. */ + DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx), + BDK_CSR_READ(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx))); + + perform_octeon3_ddr3_sequence(node, 1 << rankx, ddr_interface_num, 6); /* write-leveling */ + + if (!bdk_is_platform(BDK_PLATFORM_ASIM) && + BDK_CSR_WAIT_FOR_FIELD(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx), + status, ==, 3, 1000000)) + { + error_print("ERROR: Timeout waiting for WLEVEL\n"); + } + + lmc_wlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx)); + + for (byte_idx=0; byte_idx<(8+ecc_ena); ++byte_idx) { + if (!(ddr_interface_bytemask&(1<<byte_idx))) + continue; + wlevel_bitmask[byte_idx] = octeon_read_lmcx_ddr3_wlevel_dbg(node, ddr_interface_num, byte_idx); + if (wlevel_bitmask[byte_idx] == 0) + ++wlevel_bitmask_errors; + } /* for (byte_idx=0; byte_idx<(8+ecc_ena); ++byte_idx) */ +#endif + + // check validity only if no bitmask errors + if (wlevel_bitmask_errors == 0) { + if ((spd_dimm_type != 5) && + (spd_dimm_type != 6) && + (spd_dimm_type != 8) && + (spd_dimm_type != 9) && + (dram_width != 16) && + (ddr_interface_64b) && + !(disable_hwl_validity)) + { // bypass if mini-[RU]DIMM or x16 or 32-bit or SO-[RU]DIMM + wlevel_validity_errors = + Validate_HW_WL_Settings(node, ddr_interface_num, + &lmc_wlevel_rank, ecc_ena); + wlevel_validity_errors_rank += (wlevel_validity_errors != 0); + } + } else + wlevel_bitmask_errors_rank++; + + // before we print, if we had bitmask or validity errors, do a retry... + if ((wlevel_bitmask_errors != 0) || (wlevel_validity_errors != 0)) { + // VBL must be high to show the bad bitmaps or delays here also + if (dram_is_verbose(VBL_DEV2)) { + display_WL_BM(node, ddr_interface_num, rankx, wlevel_bitmask); + display_WL(node, ddr_interface_num, lmc_wlevel_rank, rankx); + } + if (wloop_retries < WLOOP_RETRIES_DEFAULT) { + wloop_retries++; + wloop_retries_total++; + // this printout is per-retry: only when VBL is high enough (DEV2?) + VB_PRT(VBL_DEV2, "N%d.LMC%d.R%d: H/W Write-Leveling had %s errors - retrying...\n", + node, ddr_interface_num, rankx, + (wlevel_bitmask_errors) ? "Bitmask" : "Validity"); + continue; // this takes us back to the top without counting a sample + } else { // ran out of retries for this sample + // retries exhausted, do not print at normal VBL + VB_PRT(VBL_DEV2, "N%d.LMC%d.R%d: H/W Write-Leveling issues: %s errors\n", + node, ddr_interface_num, rankx, + (wlevel_bitmask_errors) ? "Bitmask" : "Validity"); + wloop_retries_exhausted++; + } + } + // no errors or exhausted retries, use this sample + wloop_retries = 0; //reset for next sample + + // when only 1 sample or forced, print the bitmasks first and current HW WL + if ((wlevel_loops == 1) || ddr_wlevel_printall) { + display_WL_BM(node, ddr_interface_num, rankx, wlevel_bitmask); + display_WL(node, ddr_interface_num, lmc_wlevel_rank, rankx); + } + + if (ddr_wlevel_roundup) { /* Round up odd bitmask delays */ + for (byte_idx=0; byte_idx<(8+ecc_ena); ++byte_idx) { + if (!(ddr_interface_bytemask&(1<<byte_idx))) + continue; + update_wlevel_rank_struct(&lmc_wlevel_rank, + byte_idx, + roundup_ddr3_wlevel_bitmask(wlevel_bitmask[byte_idx])); + } /* for (byte_idx=0; byte_idx<(8+ecc_ena); ++byte_idx) */ + DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx), lmc_wlevel_rank.u); + display_WL(node, ddr_interface_num, lmc_wlevel_rank, rankx); + } + +#if HW_WL_MAJORITY + // OK, we have a decent sample, no bitmask or validity errors + for (byte_idx=0; byte_idx<(8+ecc_ena); ++byte_idx) { + if (!(ddr_interface_bytemask&(1<<byte_idx))) + continue; + // increment count of byte-lane value + int ix = (get_wlevel_rank_struct(&lmc_wlevel_rank, byte_idx) >> 1) & 3; // only 4 values + wlevel_bytes[byte_idx][ix]++; + } /* for (byte_idx=0; byte_idx<(8+ecc_ena); ++byte_idx) */ +#endif + + wloop++; // if we get here, we have taken a decent sample + + } /* while (wloop < wlevel_loops) */ + +#if HW_WL_MAJORITY + // if we did sample more than once, try to pick a majority vote + if (wlevel_loops > 1) { + // look for the majority in each byte-lane + for (byte_idx = 0; byte_idx < (8+ecc_ena); ++byte_idx) { + int mx = -1, mc = 0, xc = 0, cc = 0; + int ix, ic; + if (!(ddr_interface_bytemask&(1<<byte_idx))) + continue; + for (ix = 0; ix < 4; ix++) { + ic = wlevel_bytes[byte_idx][ix]; + // make a bitmask of the ones with a count + if (ic > 0) { + mc |= (1 << ix); + cc++; // count how many had non-zero counts + } + // find the majority + if (ic > xc) { // new max? + xc = ic; // yes + mx = ix; // set its index + } + } +#if SWL_TRY_HWL_ALT + // see if there was an alternate + int alts = (mc & ~(1 << mx)); // take out the majority choice + if (alts != 0) { + for (ix = 0; ix < 4; ix++) { + if (alts & (1 << ix)) { // FIXME: could be done multiple times? bad if so + hwl_alts[rankx].hwl_alt_mask |= (1 << byte_idx); // set the mask + hwl_alts[rankx].hwl_alt_delay[byte_idx] = ix << 1; // record the value + VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: SWL_TRY_HWL_ALT: Byte %d maj %d (%d) alt %d (%d).\n", + node, ddr_interface_num, rankx, byte_idx, mx << 1, xc, + ix << 1, wlevel_bytes[byte_idx][ix]); + } + } + } else { + debug_print("N%d.LMC%d.R%d: SWL_TRY_HWL_ALT: Byte %d maj %d alt NONE.\n", + node, ddr_interface_num, rankx, byte_idx, mx << 1); + } +#endif /* SWL_TRY_HWL_ALT */ + if (cc > 2) { // unlikely, but... + // assume: counts for 3 indices are all 1 + // possiblities are: 0/2/4, 2/4/6, 0/4/6, 0/2/6 + // and the desired?: 2 , 4 , 6, 0 + // we choose the middle, assuming one of the outliers is bad + // NOTE: this is an ugly hack at the moment; there must be a better way + switch (mc) { + case 0x7: mx = 1; break; // was 0/2/4, choose 2 + case 0xb: mx = 0; break; // was 0/2/6, choose 0 + case 0xd: mx = 3; break; // was 0/4/6, choose 6 + case 0xe: mx = 2; break; // was 2/4/6, choose 4 + default: + case 0xf: mx = 1; break; // was 0/2/4/6, choose 2? + } + error_print("N%d.LMC%d.R%d: HW WL MAJORITY: bad byte-lane %d (0x%x), using %d.\n", + node, ddr_interface_num, rankx, byte_idx, mc, mx << 1); + } + update_wlevel_rank_struct(&lmc_wlevel_rank, byte_idx, mx << 1); + } /* for (byte_idx=0; byte_idx<(8+ecc_ena); ++byte_idx) */ + + DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx), lmc_wlevel_rank.u); + display_WL_with_final(node, ddr_interface_num, lmc_wlevel_rank, rankx); + } /* if (wlevel_loops > 1) */ +#endif /* HW_WL_MAJORITY */ + // maybe print an error summary for the rank + if ((wlevel_bitmask_errors_rank != 0) || (wlevel_validity_errors_rank != 0)) { + VB_PRT(VBL_FAE, "N%d.LMC%d.R%d: H/W Write-Leveling errors - %d bitmask, %d validity, %d retries, %d exhausted\n", + node, ddr_interface_num, rankx, + wlevel_bitmask_errors_rank, wlevel_validity_errors_rank, + wloop_retries_total, wloop_retries_exhausted); + } + + } /* for (rankx = 0; rankx < dimm_count * 4;rankx++) */ + +#if WODT_MASK_2R_1S + if ((ddr_type == DDR4_DRAM) && (num_ranks == 2) && (dimm_count == 1)) { + /* LMC(0)_WODT_MASK */ + bdk_lmcx_wodt_mask_t lmc_wodt_mask; + // always read current so we can see if its different from saved + lmc_wodt_mask.u = BDK_CSR_READ(node, BDK_LMCX_WODT_MASK(ddr_interface_num)); + if (lmc_wodt_mask.u != saved_wodt_mask) { // always restore what was saved if diff + lmc_wodt_mask.u = saved_wodt_mask; + ddr_print("WODT_MASK : 0x%016lx\n", lmc_wodt_mask.u); + DRAM_CSR_WRITE(node, BDK_LMCX_WODT_MASK(ddr_interface_num), lmc_wodt_mask.u); + } + } +#endif /* WODT_MASK_2R_1S */ + + } // End HW write-leveling block + + // At the end of HW Write Leveling, check on some things... + if (! disable_deskew_training) { + + deskew_counts_t dsk_counts; + int retry_count = 0; + + VB_PRT(VBL_FAE, "N%d.LMC%d: Check Deskew Settings before Read-Leveling.\n", node, ddr_interface_num); + + do { + Validate_Read_Deskew_Training(node, rank_mask, ddr_interface_num, &dsk_counts, VBL_FAE); + + // only RAWCARD A or B will not benefit from retraining if there's only saturation + // or any rawcard if there is a nibble error + if ((!spd_rawcard_AorB && dsk_counts.saturated > 0) || + ((dsk_counts.nibrng_errs != 0) || (dsk_counts.nibunl_errs != 0))) + { + retry_count++; + VB_PRT(VBL_FAE, "N%d.LMC%d: Deskew Status indicates saturation or nibble errors - retry %d Training.\n", + node, ddr_interface_num, retry_count); + Perform_Read_Deskew_Training(node, rank_mask, ddr_interface_num, + spd_rawcard_AorB, 0, ddr_interface_64b); + } else + break; + } while (retry_count < 5); + + // print the last setting only if we had to do retries here + if (retry_count > 0) + Validate_Read_Deskew_Training(node, rank_mask, ddr_interface_num, &dsk_counts, VBL_NORM); + } + + /* + * 6.9.12 LMC Read Leveling + * + * LMC supports an automatic read-leveling separately per byte-lane using + * the DDR3 multipurpose register predefined pattern for system + * calibration defined in the JEDEC DDR3 specifications. + * + * All of DDR PLL, LMC CK, and LMC DRESET, and early LMC initializations + * must be completed prior to starting this LMC read-leveling sequence. + * + * Software could simply write the desired read-leveling values into + * LMC(0)_RLEVEL_RANK(0..3). This section describes a sequence that uses + * LMC's autoread-leveling capabilities. + * + * When LMC does the read-leveling sequence for a rank, it first enables + * the DDR3 multipurpose register predefined pattern for system + * calibration on the selected DRAM rank via a DDR3 MR3 write, then + * executes 64 RD operations at different internal delay settings, then + * disables the predefined pattern via another DDR3 MR3 write + * operation. LMC determines the pass or fail of each of the 64 settings + * independently for each byte lane, then writes appropriate + * LMC(0)_RLEVEL_RANK(0..3)[BYTE*] values for the rank. + * + * After read-leveling for a rank, software can read the 64 pass/fail + * indications for one byte lane via LMC(0)_RLEVEL_DBG[BITMASK]. Software + * can observe all pass/fail results for all byte lanes in a rank via + * separate read-leveling sequences on the rank with different + * LMC(0)_RLEVEL_CTL[BYTE] values. + * + * The 64 pass/fail results will typically have failures for the low + * delays, followed by a run of some passing settings, followed by more + * failures in the remaining high delays. LMC sets + * LMC(0)_RLEVEL_RANK(0..3)[BYTE*] to one of the passing settings. + * First, LMC selects the longest run of successes in the 64 results. (In + * the unlikely event that there is more than one longest run, LMC + * selects the first one.) Then if LMC(0)_RLEVEL_CTL[OFFSET_EN] = 1 and + * the selected run has more than LMC(0)_RLEVEL_CTL[OFFSET] successes, + * LMC selects the last passing setting in the run minus + * LMC(0)_RLEVEL_CTL[OFFSET]. Otherwise LMC selects the middle setting in + * the run (rounding earlier when necessary). We expect the read-leveling + * sequence to produce good results with the reset values + * LMC(0)_RLEVEL_CTL [OFFSET_EN]=1, LMC(0)_RLEVEL_CTL[OFFSET] = 2. + * + * The read-leveling sequence has the following steps: + * + * 1. Select desired LMC(0)_RLEVEL_CTL[OFFSET_EN,OFFSET,BYTE] settings. + * Do the remaining substeps 2-4 separately for each rank i with + * attached DRAM. + * + * 2. Without changing any other fields in LMC(0)_CONFIG, + * + * o write LMC(0)_SEQ_CTL[SEQ_SEL] to select read-leveling + * + * o write LMC(0)_CONFIG[RANKMASK] = (1 << i) + * + * o write LMC(0)_SEQ_CTL[INIT_START] = 1 + * + * This initiates the previously-described read-leveling. + * + * 3. Wait until LMC(0)_RLEVEL_RANKi[STATUS] != 2 + * + * LMC will have updated LMC(0)_RLEVEL_RANKi[BYTE*] for all byte lanes + * at this point. + * + * If ECC DRAM is not present (i.e. when DRAM is not attached to the + * DDR_CBS_0_* and DDR_CB<7:0> chip signals, or the DDR_DQS_<4>_* and + * DDR_DQ<35:32> chip signals), write LMC(0)_RLEVEL_RANK*[BYTE8] = + * LMC(0)_RLEVEL_RANK*[BYTE0]. Write LMC(0)_RLEVEL_RANK*[BYTE4] = + * LMC(0)_RLEVEL_RANK*[BYTE0]. + * + * 4. If desired, consult LMC(0)_RLEVEL_DBG[BITMASK] and compare to + * LMC(0)_RLEVEL_RANKi[BYTE*] for the lane selected by + * LMC(0)_RLEVEL_CTL[BYTE]. If desired, modify LMC(0)_RLEVEL_CTL[BYTE] + * to a new value and repeat so that all BITMASKs can be observed. + * + * 5. Initialize LMC(0)_RLEVEL_RANK* values for all unused ranks. + * + * Let rank i be a rank with attached DRAM. + * + * For all ranks j that do not have attached DRAM, set + * LMC(0)_RLEVEL_RANKj = LMC(0)_RLEVEL_RANKi. + * + * This read-leveling sequence can help select the proper CN70XX ODT + * resistance value (LMC(0)_COMP_CTL2[RODT_CTL]). A hardware-generated + * LMC(0)_RLEVEL_RANKi[BYTEj] value (for a used byte lane j) that is + * drastically different from a neighboring LMC(0)_RLEVEL_RANKi[BYTEk] + * (for a used byte lane k) can indicate that the CN70XX ODT value is + * bad. It is possible to simultaneously optimize both + * LMC(0)_COMP_CTL2[RODT_CTL] and LMC(0)_RLEVEL_RANKn[BYTE*] values by + * performing this read-leveling sequence for several + * LMC(0)_COMP_CTL2[RODT_CTL] values and selecting the one with the best + * LMC(0)_RLEVEL_RANKn[BYTE*] profile for the ranks. + */ + + { +#pragma pack(push,4) + bdk_lmcx_rlevel_rankx_t lmc_rlevel_rank; + bdk_lmcx_comp_ctl2_t lmc_comp_ctl2; + bdk_lmcx_rlevel_ctl_t rlevel_ctl; + bdk_lmcx_control_t lmc_control; + bdk_lmcx_modereg_params1_t lmc_modereg_params1; + unsigned char rodt_ctl; + unsigned char rankx = 0; + int rlevel_rodt_errors = 0; + unsigned char ecc_ena; + unsigned char rtt_nom; + unsigned char rtt_idx; + int min_rtt_nom_idx; + int max_rtt_nom_idx; + int min_rodt_ctl; + int max_rodt_ctl; + int rlevel_debug_loops = 1; + unsigned char save_ddr2t; + int rlevel_avg_loops; + int ddr_rlevel_compute; + int saved_ddr__ptune, saved_ddr__ntune, rlevel_comp_offset; + int saved_int_zqcs_dis = 0; + int disable_sequential_delay_check = 0; + int maximum_adjacent_rlevel_delay_increment = 0; + struct { + uint64_t setting; + int score; + } rlevel_scoreboard[RTT_NOM_OHMS_COUNT][RODT_OHMS_COUNT][4]; + int print_nom_ohms; +#if PERFECT_BITMASK_COUNTING + typedef struct { + uint8_t count[9][32]; // 8+ECC by 32 values + uint8_t total[9]; // 8+ECC + } rank_perfect_t; + rank_perfect_t rank_perfect_counts[4]; +#endif + +#pragma pack(pop) + +#if PERFECT_BITMASK_COUNTING + memset(rank_perfect_counts, 0, sizeof(rank_perfect_counts)); +#endif /* PERFECT_BITMASK_COUNTING */ + + lmc_control.u = BDK_CSR_READ(node, BDK_LMCX_CONTROL(ddr_interface_num)); + save_ddr2t = lmc_control.s.ddr2t; + + lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(ddr_interface_num)); + ecc_ena = lmc_config.s.ecc_ena; + +#if 0 + { + int save_ref_zqcs_int; + uint64_t temp_delay_usecs; + + /* Temporarily select the minimum ZQCS interval and wait + long enough for a few ZQCS calibrations to occur. This + should ensure that the calibration circuitry is + stabilized before read-leveling occurs. */ + save_ref_zqcs_int = lmc_config.s.ref_zqcs_int; + lmc_config.s.ref_zqcs_int = 1 | (32<<7); /* set smallest interval */ + DRAM_CSR_WRITE(node, BDK_LMCX_CONFIG(ddr_interface_num), lmc_config.u); + BDK_CSR_READ(node, BDK_LMCX_CONFIG(ddr_interface_num)); + + /* Compute an appropriate delay based on the current ZQCS + interval. The delay should be long enough for the + current ZQCS delay counter to expire plus ten of the + minimum intarvals to ensure that some calibrations + occur. */ + temp_delay_usecs = (((uint64_t)save_ref_zqcs_int >> 7) + * tclk_psecs * 100 * 512 * 128) / (10000*10000) + + 10 * ((uint64_t)32 * tclk_psecs * 100 * 512 * 128) / (10000*10000); + + ddr_print ("Waiting %lu usecs for ZQCS calibrations to start\n", + temp_delay_usecs); + bdk_wait_usec(temp_delay_usecs); + + lmc_config.s.ref_zqcs_int = save_ref_zqcs_int; /* Restore computed interval */ + DRAM_CSR_WRITE(node, BDK_LMCX_CONFIG(ddr_interface_num), lmc_config.u); + BDK_CSR_READ(node, BDK_LMCX_CONFIG(ddr_interface_num)); + } +#endif + + if ((s = lookup_env_parameter("ddr_rlevel_2t")) != NULL) { + lmc_control.s.ddr2t = strtoul(s, NULL, 0); + } + + DRAM_CSR_WRITE(node, BDK_LMCX_CONTROL(ddr_interface_num), lmc_control.u); + + ddr_print("N%d.LMC%d: Performing Read-Leveling\n", node, ddr_interface_num); + + rlevel_ctl.u = BDK_CSR_READ(node, BDK_LMCX_RLEVEL_CTL(ddr_interface_num)); + + rlevel_avg_loops = custom_lmc_config->rlevel_average_loops; + if (rlevel_avg_loops == 0) { + rlevel_avg_loops = RLEVEL_AVG_LOOPS_DEFAULT; + if ((dimm_count == 1) || (num_ranks == 1)) // up the samples for these cases + rlevel_avg_loops = rlevel_avg_loops * 2 + 1; + } + + ddr_rlevel_compute = custom_lmc_config->rlevel_compute; + rlevel_ctl.s.offset_en = custom_lmc_config->offset_en; + rlevel_ctl.s.offset = spd_rdimm + ? custom_lmc_config->offset_rdimm + : custom_lmc_config->offset_udimm; + + rlevel_ctl.s.delay_unload_0 = 1; /* should normally be set */ + rlevel_ctl.s.delay_unload_1 = 1; /* should normally be set */ + rlevel_ctl.s.delay_unload_2 = 1; /* should normally be set */ + rlevel_ctl.s.delay_unload_3 = 1; /* should normally be set */ + + rlevel_ctl.s.or_dis = 1; // default to get best bitmasks + if ((s = lookup_env_parameter("ddr_rlevel_or_dis")) != NULL) { + rlevel_ctl.s.or_dis = !!strtoul(s, NULL, 0); + } + rlevel_ctl.s.bitmask = 0xff; // should work in 32b mode also + if ((s = lookup_env_parameter("ddr_rlevel_ctl_bitmask")) != NULL) { + rlevel_ctl.s.bitmask = strtoul(s, NULL, 0); + } + debug_print("N%d.LMC%d: RLEVEL_CTL: or_dis=%d, bitmask=0x%02x\n", + node, ddr_interface_num, + rlevel_ctl.s.or_dis, rlevel_ctl.s.bitmask); + + rlevel_comp_offset = spd_rdimm + ? custom_lmc_config->rlevel_comp_offset_rdimm + : custom_lmc_config->rlevel_comp_offset_udimm; + + if ((s = lookup_env_parameter("ddr_rlevel_offset")) != NULL) { + rlevel_ctl.s.offset = strtoul(s, NULL, 0); + } + + if ((s = lookup_env_parameter("ddr_rlevel_offset_en")) != NULL) { + rlevel_ctl.s.offset_en = strtoul(s, NULL, 0); + } + if ((s = lookup_env_parameter("ddr_rlevel_ctl")) != NULL) { + rlevel_ctl.u = strtoul(s, NULL, 0); + } + + DRAM_CSR_WRITE(node, BDK_LMCX_RLEVEL_CTL(ddr_interface_num), rlevel_ctl.u); + + if (bdk_is_platform(BDK_PLATFORM_ASIM)) + rlevel_debug_loops = 0; + + if ((s = lookup_env_parameter("ddr%d_rlevel_debug_loops", ddr_interface_num)) != NULL) { + rlevel_debug_loops = strtoul(s, NULL, 0); + } + + if ((s = lookup_env_parameter("ddr_rtt_nom_auto")) != NULL) { + ddr_rtt_nom_auto = !!strtoul(s, NULL, 0); + } + + if ((s = lookup_env_parameter("ddr_rlevel_average")) != NULL) { + rlevel_avg_loops = strtoul(s, NULL, 0); + } + + if ((s = lookup_env_parameter("ddr_rlevel_compute")) != NULL) { + ddr_rlevel_compute = strtoul(s, NULL, 0); + } + + ddr_print("RLEVEL_CTL : 0x%016lx\n", rlevel_ctl.u); + ddr_print("RLEVEL_OFFSET : %6d\n", rlevel_ctl.s.offset); + ddr_print("RLEVEL_OFFSET_EN : %6d\n", rlevel_ctl.s.offset_en); + + /* The purpose for the indexed table is to sort the settings + ** by the ohm value to simplify the testing when incrementing + ** through the settings. (index => ohms) 1=120, 2=60, 3=40, + ** 4=30, 5=20 */ + min_rtt_nom_idx = (custom_lmc_config->min_rtt_nom_idx == 0) ? 1 : custom_lmc_config->min_rtt_nom_idx; + max_rtt_nom_idx = (custom_lmc_config->max_rtt_nom_idx == 0) ? 5 : custom_lmc_config->max_rtt_nom_idx; + + min_rodt_ctl = (custom_lmc_config->min_rodt_ctl == 0) ? 1 : custom_lmc_config->min_rodt_ctl; + max_rodt_ctl = (custom_lmc_config->max_rodt_ctl == 0) ? 5 : custom_lmc_config->max_rodt_ctl; + + if ((s = lookup_env_parameter("ddr_min_rodt_ctl")) != NULL) { + min_rodt_ctl = strtoul(s, NULL, 0); + } + if ((s = lookup_env_parameter("ddr_max_rodt_ctl")) != NULL) { + max_rodt_ctl = strtoul(s, NULL, 0); + } + if ((s = lookup_env_parameter("ddr_min_rtt_nom_idx")) != NULL) { + min_rtt_nom_idx = strtoul(s, NULL, 0); + } + if ((s = lookup_env_parameter("ddr_max_rtt_nom_idx")) != NULL) { + max_rtt_nom_idx = strtoul(s, NULL, 0); + } + +#ifdef ENABLE_CUSTOM_RLEVEL_TABLE + if (custom_lmc_config->rlevel_table != NULL) { + char part_number[21]; + /* Check for hard-coded read-leveling settings */ + get_dimm_part_number(part_number, node, &dimm_config_table[0], 0, ddr_type); + for (rankx = 0; rankx < dimm_count * 4;rankx++) { + if (!(rank_mask & (1 << rankx))) + continue; + + lmc_rlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx)); + + i = 0; + while (custom_lmc_config->rlevel_table[i].part != NULL) { + debug_print("DIMM part number:\"%s\", SPD: \"%s\"\n", custom_lmc_config->rlevel_table[i].part, part_number); + if ((strcmp(part_number, custom_lmc_config->rlevel_table[i].part) == 0) + && (_abs(custom_lmc_config->rlevel_table[i].speed - 2*ddr_hertz/(1000*1000)) < 10 )) + { + ddr_print("Using hard-coded read leveling for DIMM part number: \"%s\"\n", part_number); + lmc_rlevel_rank.u = custom_lmc_config->rlevel_table[i].rlevel_rank[ddr_interface_num][rankx]; + DRAM_CSR_WRITE(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx), lmc_rlevel_rank.u); + lmc_rlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx)); + display_RL(node, ddr_interface_num, lmc_rlevel_rank, rankx); + rlevel_debug_loops = 0; /* Disable h/w read-leveling */ + break; + } + ++i; + } + } + } +#endif /* ENABLE_CUSTOM_RLEVEL_TABLE */ + + while(rlevel_debug_loops--) { + /* Initialize the error scoreboard */ + memset(rlevel_scoreboard, 0, sizeof(rlevel_scoreboard)); + + if ((s = lookup_env_parameter("ddr_rlevel_comp_offset")) != NULL) { + rlevel_comp_offset = strtoul(s, NULL, 0); + } + + disable_sequential_delay_check = custom_lmc_config->disable_sequential_delay_check; + + if ((s = lookup_env_parameter("ddr_disable_sequential_delay_check")) != NULL) { + disable_sequential_delay_check = strtoul(s, NULL, 0); + } + + maximum_adjacent_rlevel_delay_increment = custom_lmc_config->maximum_adjacent_rlevel_delay_increment; + + if ((s = lookup_env_parameter("ddr_maximum_adjacent_rlevel_delay_increment")) != NULL) { + maximum_adjacent_rlevel_delay_increment = strtoul(s, NULL, 0); + } + + lmc_comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num)); + saved_ddr__ptune = lmc_comp_ctl2.s.ddr__ptune; + saved_ddr__ntune = lmc_comp_ctl2.s.ddr__ntune; + + /* Disable dynamic compensation settings */ + if (rlevel_comp_offset != 0) { + lmc_comp_ctl2.s.ptune = saved_ddr__ptune; + lmc_comp_ctl2.s.ntune = saved_ddr__ntune; + + /* Round up the ptune calculation to bias the odd cases toward ptune */ + lmc_comp_ctl2.s.ptune += divide_roundup(rlevel_comp_offset, 2); + lmc_comp_ctl2.s.ntune -= rlevel_comp_offset/2; + + lmc_control.u = BDK_CSR_READ(node, BDK_LMCX_CONTROL(ddr_interface_num)); + saved_int_zqcs_dis = lmc_control.s.int_zqcs_dis; + lmc_control.s.int_zqcs_dis = 1; /* Disable ZQCS while in bypass. */ + DRAM_CSR_WRITE(node, BDK_LMCX_CONTROL(ddr_interface_num), lmc_control.u); + + lmc_comp_ctl2.s.byp = 1; /* Enable bypass mode */ + DRAM_CSR_WRITE(node, BDK_LMCX_COMP_CTL2(ddr_interface_num), lmc_comp_ctl2.u); + BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num)); + lmc_comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num)); /* Read again */ + ddr_print("DDR__PTUNE/DDR__NTUNE : %d/%d\n", + lmc_comp_ctl2.s.ddr__ptune, lmc_comp_ctl2.s.ddr__ntune); + } + + lmc_modereg_params1.u = BDK_CSR_READ(node, BDK_LMCX_MODEREG_PARAMS1(ddr_interface_num)); + + for (rtt_idx = min_rtt_nom_idx; rtt_idx <= max_rtt_nom_idx; ++rtt_idx) { + rtt_nom = imp_values->rtt_nom_table[rtt_idx]; + + /* When the read ODT mask is zero the dyn_rtt_nom_mask is + zero than RTT_NOM will not be changing during + read-leveling. Since the value is fixed we only need + to test it once. */ + if (dyn_rtt_nom_mask == 0) { + print_nom_ohms = -1; // flag not to print NOM ohms + if (rtt_idx != min_rtt_nom_idx) + continue; + } else { + if (dyn_rtt_nom_mask & 1) lmc_modereg_params1.s.rtt_nom_00 = rtt_nom; + if (dyn_rtt_nom_mask & 2) lmc_modereg_params1.s.rtt_nom_01 = rtt_nom; + if (dyn_rtt_nom_mask & 4) lmc_modereg_params1.s.rtt_nom_10 = rtt_nom; + if (dyn_rtt_nom_mask & 8) lmc_modereg_params1.s.rtt_nom_11 = rtt_nom; + // FIXME? rank 0 ohms always for the printout? + print_nom_ohms = imp_values->rtt_nom_ohms[lmc_modereg_params1.s.rtt_nom_00]; + } + + DRAM_CSR_WRITE(node, BDK_LMCX_MODEREG_PARAMS1(ddr_interface_num), lmc_modereg_params1.u); + VB_PRT(VBL_TME, "\n"); + VB_PRT(VBL_TME, "RTT_NOM %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n", + imp_values->rtt_nom_ohms[lmc_modereg_params1.s.rtt_nom_11], + imp_values->rtt_nom_ohms[lmc_modereg_params1.s.rtt_nom_10], + imp_values->rtt_nom_ohms[lmc_modereg_params1.s.rtt_nom_01], + imp_values->rtt_nom_ohms[lmc_modereg_params1.s.rtt_nom_00], + lmc_modereg_params1.s.rtt_nom_11, + lmc_modereg_params1.s.rtt_nom_10, + lmc_modereg_params1.s.rtt_nom_01, + lmc_modereg_params1.s.rtt_nom_00); + + perform_ddr_init_sequence(node, rank_mask, ddr_interface_num); + + // Try RANK outside RODT to rearrange the output... + for (rankx = 0; rankx < dimm_count * 4; rankx++) { + int byte_idx; + rlevel_byte_data_t rlevel_byte[9]; + int average_loops; + int rlevel_rank_errors, rlevel_bitmask_errors, rlevel_nonseq_errors; + rlevel_bitmask_t rlevel_bitmask[9]; +#if PICK_BEST_RANK_SCORE_NOT_AVG + int rlevel_best_rank_score; +#endif + + if (!(rank_mask & (1 << rankx))) + continue; + + for (rodt_ctl = max_rodt_ctl; rodt_ctl >= min_rodt_ctl; --rodt_ctl) { +#if PICK_BEST_RANK_SCORE_NOT_AVG + rlevel_best_rank_score = DEFAULT_BEST_RANK_SCORE; +#endif + rlevel_rodt_errors = 0; + lmc_comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num)); + lmc_comp_ctl2.s.rodt_ctl = rodt_ctl; + DRAM_CSR_WRITE(node, BDK_LMCX_COMP_CTL2(ddr_interface_num), lmc_comp_ctl2.u); + lmc_comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num)); + bdk_wait_usec(1); /* Give it a little time to take affect */ + VB_PRT(VBL_DEV, "Read ODT_CTL : 0x%x (%d ohms)\n", + lmc_comp_ctl2.s.rodt_ctl, imp_values->rodt_ohms[lmc_comp_ctl2.s.rodt_ctl]); + + memset(rlevel_byte, 0, sizeof(rlevel_byte)); + + for (average_loops = 0; average_loops < rlevel_avg_loops; average_loops++) { + rlevel_bitmask_errors = 0; + + if (! (rlevel_separate_ab && spd_rdimm && (ddr_type == DDR4_DRAM))) { + /* Clear read-level delays */ + DRAM_CSR_WRITE(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx), 0); + + /* read-leveling */ + perform_octeon3_ddr3_sequence(node, 1 << rankx, ddr_interface_num, 1); + + if (!bdk_is_platform(BDK_PLATFORM_ASIM) && + BDK_CSR_WAIT_FOR_FIELD(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx), + status, ==, 3, 1000000)) + { + error_print("ERROR: Timeout waiting for RLEVEL\n"); + } + } + + lmc_rlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx)); + + { // start bitmask interpretation block + int redoing_nonseq_errs = 0; + + memset(rlevel_bitmask, 0, sizeof(rlevel_bitmask)); + + if (rlevel_separate_ab && spd_rdimm && (ddr_type == DDR4_DRAM)) { + bdk_lmcx_rlevel_rankx_t lmc_rlevel_rank_aside; + bdk_lmcx_modereg_params0_t lmc_modereg_params0; + + /* A-side */ + lmc_modereg_params0.u = BDK_CSR_READ(node, BDK_LMCX_MODEREG_PARAMS0(ddr_interface_num)); + lmc_modereg_params0.s.mprloc = 0; /* MPR Page 0 Location 0 */ + DRAM_CSR_WRITE(node, BDK_LMCX_MODEREG_PARAMS0(ddr_interface_num), lmc_modereg_params0.u); + + /* Clear read-level delays */ + DRAM_CSR_WRITE(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx), 0); + + perform_octeon3_ddr3_sequence(node, 1 << rankx, ddr_interface_num, 1); /* read-leveling */ + + if (!bdk_is_platform(BDK_PLATFORM_ASIM) && + BDK_CSR_WAIT_FOR_FIELD(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx), + status, ==, 3, 1000000)) + { + error_print("ERROR: Timeout waiting for RLEVEL\n"); + + } + lmc_rlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx)); + + lmc_rlevel_rank_aside.u = lmc_rlevel_rank.u; + + rlevel_bitmask[0].bm = octeon_read_lmcx_ddr3_rlevel_dbg(node, ddr_interface_num, 0); + rlevel_bitmask[1].bm = octeon_read_lmcx_ddr3_rlevel_dbg(node, ddr_interface_num, 1); + rlevel_bitmask[2].bm = octeon_read_lmcx_ddr3_rlevel_dbg(node, ddr_interface_num, 2); + rlevel_bitmask[3].bm = octeon_read_lmcx_ddr3_rlevel_dbg(node, ddr_interface_num, 3); + rlevel_bitmask[8].bm = octeon_read_lmcx_ddr3_rlevel_dbg(node, ddr_interface_num, 8); + /* A-side complete */ + + + /* B-side */ + lmc_modereg_params0.u = BDK_CSR_READ(node, BDK_LMCX_MODEREG_PARAMS0(ddr_interface_num)); + lmc_modereg_params0.s.mprloc = 3; /* MPR Page 0 Location 3 */ + DRAM_CSR_WRITE(node, BDK_LMCX_MODEREG_PARAMS0(ddr_interface_num), lmc_modereg_params0.u); + + /* Clear read-level delays */ + DRAM_CSR_WRITE(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx), 0); + + perform_octeon3_ddr3_sequence(node, 1 << rankx, ddr_interface_num, 1); /* read-leveling */ + + if (!bdk_is_platform(BDK_PLATFORM_ASIM) && + BDK_CSR_WAIT_FOR_FIELD(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx), + status, ==, 3, 1000000)) + { + error_print("ERROR: Timeout waiting for RLEVEL\n"); + } + lmc_rlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx)); + + rlevel_bitmask[4].bm = octeon_read_lmcx_ddr3_rlevel_dbg(node, ddr_interface_num, 4); + rlevel_bitmask[5].bm = octeon_read_lmcx_ddr3_rlevel_dbg(node, ddr_interface_num, 5); + rlevel_bitmask[6].bm = octeon_read_lmcx_ddr3_rlevel_dbg(node, ddr_interface_num, 6); + rlevel_bitmask[7].bm = octeon_read_lmcx_ddr3_rlevel_dbg(node, ddr_interface_num, 7); + /* B-side complete */ + + + update_rlevel_rank_struct(&lmc_rlevel_rank, 0, lmc_rlevel_rank_aside.cn83xx.byte0); + update_rlevel_rank_struct(&lmc_rlevel_rank, 1, lmc_rlevel_rank_aside.cn83xx.byte1); + update_rlevel_rank_struct(&lmc_rlevel_rank, 2, lmc_rlevel_rank_aside.cn83xx.byte2); + update_rlevel_rank_struct(&lmc_rlevel_rank, 3, lmc_rlevel_rank_aside.cn83xx.byte3); + update_rlevel_rank_struct(&lmc_rlevel_rank, 8, lmc_rlevel_rank_aside.cn83xx.byte8); /* ECC A-side */ + + lmc_modereg_params0.u = BDK_CSR_READ(node, BDK_LMCX_MODEREG_PARAMS0(ddr_interface_num)); + lmc_modereg_params0.s.mprloc = 0; /* MPR Page 0 Location 0 */ + DRAM_CSR_WRITE(node, BDK_LMCX_MODEREG_PARAMS0(ddr_interface_num), lmc_modereg_params0.u); + + } /* if (rlevel_separate_ab && spd_rdimm && (ddr_type == DDR4_DRAM)) */ + + /* + * Evaluate the quality of the read-leveling delays from the bitmasks. + * Also save off a software computed read-leveling mask that may be + * used later to qualify the delay results from Octeon. + */ + for (byte_idx = 0; byte_idx < (8+ecc_ena); ++byte_idx) { + int bmerr; + if (!(ddr_interface_bytemask&(1<<byte_idx))) + continue; + if (! (rlevel_separate_ab && spd_rdimm && (ddr_type == DDR4_DRAM))) { + rlevel_bitmask[byte_idx].bm = + octeon_read_lmcx_ddr3_rlevel_dbg(node, ddr_interface_num, byte_idx); + } + bmerr = validate_ddr3_rlevel_bitmask(&rlevel_bitmask[byte_idx], ddr_type); + rlevel_bitmask[byte_idx].errs = bmerr; + rlevel_bitmask_errors += bmerr; +#if PERFECT_BITMASK_COUNTING + if ((ddr_type == DDR4_DRAM) && !bmerr) { // count only the "perfect" bitmasks + // FIXME: could optimize this a bit? + int delay = get_rlevel_rank_struct(&lmc_rlevel_rank, byte_idx); + rank_perfect_counts[rankx].count[byte_idx][delay] += 1; + rank_perfect_counts[rankx].total[byte_idx] += 1; + } +#endif /* PERFECT_BITMASK_COUNTING */ + } + + /* Set delays for unused bytes to match byte 0. */ + for (byte_idx = 0; byte_idx < 9; ++byte_idx) { + if (ddr_interface_bytemask & (1 << byte_idx)) + continue; + update_rlevel_rank_struct(&lmc_rlevel_rank, byte_idx, lmc_rlevel_rank.cn83xx.byte0); + } + + /* Save a copy of the byte delays in physical + order for sequential evaluation. */ + unpack_rlevel_settings(ddr_interface_bytemask, ecc_ena, rlevel_byte, lmc_rlevel_rank); + redo_nonseq_errs: + + rlevel_nonseq_errors = 0; + + if (! disable_sequential_delay_check) { + if ((ddr_interface_bytemask & 0xff) == 0xff) { + /* Evaluate delay sequence across the whole range of bytes for standard dimms. */ + if ((spd_dimm_type == 1) || (spd_dimm_type == 5)) { /* 1=RDIMM, 5=Mini-RDIMM */ + int register_adjacent_delay = _abs(rlevel_byte[4].delay - rlevel_byte[5].delay); + /* Registered dimm topology routes from the center. */ + rlevel_nonseq_errors += nonsequential_delays(rlevel_byte, 0, 3+ecc_ena, + maximum_adjacent_rlevel_delay_increment); + rlevel_nonseq_errors += nonsequential_delays(rlevel_byte, 5, 7+ecc_ena, + maximum_adjacent_rlevel_delay_increment); + // byte 5 sqerrs never gets cleared for RDIMMs + rlevel_byte[5].sqerrs = 0; + if (register_adjacent_delay > 1) { + /* Assess proximity of bytes on opposite sides of register */ + rlevel_nonseq_errors += (register_adjacent_delay-1) * RLEVEL_ADJACENT_DELAY_ERROR; + // update byte 5 error + rlevel_byte[5].sqerrs += (register_adjacent_delay-1) * RLEVEL_ADJACENT_DELAY_ERROR; + } + } + if ((spd_dimm_type == 2) || (spd_dimm_type == 6)) { /* 2=UDIMM, 6=Mini-UDIMM */ + /* Unbuffered dimm topology routes from end to end. */ + rlevel_nonseq_errors += nonsequential_delays(rlevel_byte, 0, 7+ecc_ena, + maximum_adjacent_rlevel_delay_increment); + } + } else { + rlevel_nonseq_errors += nonsequential_delays(rlevel_byte, 0, 3+ecc_ena, + maximum_adjacent_rlevel_delay_increment); + } + } /* if (! disable_sequential_delay_check) */ + +#if 0 + // FIXME FIXME: disabled for now, it was too much... + + // Calculate total errors for the rank: + // we do NOT add nonsequential errors if mini-[RU]DIMM or x16; + // mini-DIMMs and x16 devices have unusual sequence geometries. + // Make the final scores for them depend only on the bitmasks... + rlevel_rank_errors = rlevel_bitmask_errors; + if ((spd_dimm_type != 5) && + (spd_dimm_type != 6) && + (dram_width != 16)) + { + rlevel_rank_errors += rlevel_nonseq_errors; + } +#else + rlevel_rank_errors = rlevel_bitmask_errors + rlevel_nonseq_errors; +#endif + + // print original sample here only if we are not really averaging or picking best + // also do not print if we were redoing the NONSEQ score for using COMPUTED + if (!redoing_nonseq_errs && ((rlevel_avg_loops < 2) || dram_is_verbose(VBL_DEV2))) { + display_RL_BM(node, ddr_interface_num, rankx, rlevel_bitmask, ecc_ena); + display_RL_BM_scores(node, ddr_interface_num, rankx, rlevel_bitmask, ecc_ena); + display_RL_SEQ_scores(node, ddr_interface_num, rankx, rlevel_byte, ecc_ena); + display_RL_with_score(node, ddr_interface_num, lmc_rlevel_rank, rankx, rlevel_rank_errors); + } + + if (ddr_rlevel_compute) { + if (!redoing_nonseq_errs) { + /* Recompute the delays based on the bitmask */ + for (byte_idx = 0; byte_idx < (8+ecc_ena); ++byte_idx) { + if (!(ddr_interface_bytemask & (1 << byte_idx))) + continue; + update_rlevel_rank_struct(&lmc_rlevel_rank, byte_idx, + compute_ddr3_rlevel_delay(rlevel_bitmask[byte_idx].mstart, + rlevel_bitmask[byte_idx].width, + rlevel_ctl)); + } + + /* Override the copy of byte delays with the computed results. */ + unpack_rlevel_settings(ddr_interface_bytemask, ecc_ena, rlevel_byte, lmc_rlevel_rank); + + redoing_nonseq_errs = 1; + goto redo_nonseq_errs; + + } else { + /* now print this if already printed the original sample */ + if ((rlevel_avg_loops < 2) || dram_is_verbose(VBL_DEV2)) { + display_RL_with_computed(node, ddr_interface_num, + lmc_rlevel_rank, rankx, + rlevel_rank_errors); + } + } + } /* if (ddr_rlevel_compute) */ + + } // end bitmask interpretation block + +#if PICK_BEST_RANK_SCORE_NOT_AVG + + // if it is a better (lower) score, then keep it + if (rlevel_rank_errors < rlevel_best_rank_score) { + rlevel_best_rank_score = rlevel_rank_errors; + + // save the new best delays and best errors + for (byte_idx = 0; byte_idx < 9; ++byte_idx) { + rlevel_byte[byte_idx].best = rlevel_byte[byte_idx].delay; + rlevel_byte[byte_idx].bestsq = rlevel_byte[byte_idx].sqerrs; + // save bitmasks and their scores as well + // xlate UNPACKED index to PACKED index to get from rlevel_bitmask + rlevel_byte[byte_idx].bm = rlevel_bitmask[XUP(byte_idx, !!ecc_ena)].bm; + rlevel_byte[byte_idx].bmerrs = rlevel_bitmask[XUP(byte_idx, !!ecc_ena)].errs; + } + } +#else /* PICK_BEST_RANK_SCORE_NOT_AVG */ + + /* Accumulate the total score across averaging loops for this setting */ + debug_print("rlevel_scoreboard[rtt_nom=%d][rodt_ctl=%d][rankx=%d].score: %d [%d]\n", + rtt_nom, rodt_ctl, rankx, rlevel_rank_errors, average_loops); + rlevel_scoreboard[rtt_nom][rodt_ctl][rankx].score += rlevel_rank_errors; + + /* Accumulate the delay totals and loop counts + necessary to compute average delay results */ + for (byte_idx = 0; byte_idx < 9; ++byte_idx) { + if (rlevel_byte[byte_idx].delay != 0) { /* Don't include delay=0 in the average */ + ++rlevel_byte[byte_idx].loop_count; + rlevel_byte[byte_idx].loop_total += rlevel_byte[byte_idx].delay; + } + } /* for (byte_idx = 0; byte_idx < 9; ++byte_idx) */ +#endif /* PICK_BEST_RANK_SCORE_NOT_AVG */ + + rlevel_rodt_errors += rlevel_rank_errors; + + } /* for (average_loops = 0; average_loops < rlevel_avg_loops; average_loops++) */ + +#if PICK_BEST_RANK_SCORE_NOT_AVG + + /* We recorded the best score across the averaging loops */ + rlevel_scoreboard[rtt_nom][rodt_ctl][rankx].score = rlevel_best_rank_score; + + /* Restore the delays from the best fields that go with the best score */ + for (byte_idx = 0; byte_idx < 9; ++byte_idx) { + rlevel_byte[byte_idx].delay = rlevel_byte[byte_idx].best; + rlevel_byte[byte_idx].sqerrs = rlevel_byte[byte_idx].bestsq; + } +#else /* PICK_BEST_RANK_SCORE_NOT_AVG */ + + /* Compute the average score across averaging loops */ + rlevel_scoreboard[rtt_nom][rodt_ctl][rankx].score = + divide_nint(rlevel_scoreboard[rtt_nom][rodt_ctl][rankx].score, rlevel_avg_loops); + + /* Compute the average delay results */ + for (byte_idx=0; byte_idx < 9; ++byte_idx) { + if (rlevel_byte[byte_idx].loop_count == 0) + rlevel_byte[byte_idx].loop_count = 1; + rlevel_byte[byte_idx].delay = divide_nint(rlevel_byte[byte_idx].loop_total, + rlevel_byte[byte_idx].loop_count); + } +#endif /* PICK_BEST_RANK_SCORE_NOT_AVG */ + + lmc_rlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx)); + + pack_rlevel_settings(ddr_interface_bytemask, ecc_ena, rlevel_byte, &lmc_rlevel_rank); + + if (rlevel_avg_loops > 1) { +#if PICK_BEST_RANK_SCORE_NOT_AVG + // restore the "best" bitmasks and their scores for printing + for (byte_idx = 0; byte_idx < 9; ++byte_idx) { + if ((ddr_interface_bytemask & (1 << byte_idx)) == 0) + continue; + // xlate PACKED index to UNPACKED index to get from rlevel_byte + rlevel_bitmask[byte_idx].bm = rlevel_byte[XPU(byte_idx, !!ecc_ena)].bm; + rlevel_bitmask[byte_idx].errs = rlevel_byte[XPU(byte_idx, !!ecc_ena)].bmerrs; + } + // print bitmasks/scores here only for DEV // FIXME? lower VBL? + if (dram_is_verbose(VBL_DEV)) { + display_RL_BM(node, ddr_interface_num, rankx, rlevel_bitmask, ecc_ena); + display_RL_BM_scores(node, ddr_interface_num, rankx, rlevel_bitmask, ecc_ena); + display_RL_SEQ_scores(node, ddr_interface_num, rankx, rlevel_byte, ecc_ena); + } + + display_RL_with_RODT(node, ddr_interface_num, lmc_rlevel_rank, rankx, + rlevel_scoreboard[rtt_nom][rodt_ctl][rankx].score, + print_nom_ohms, imp_values->rodt_ohms[rodt_ctl], + WITH_RODT_BESTSCORE); + +#else /* PICK_BEST_RANK_SCORE_NOT_AVG */ + display_RL_with_average(node, ddr_interface_num, lmc_rlevel_rank, rankx, + rlevel_scoreboard[rtt_nom][rodt_ctl][rankx].score); +#endif /* PICK_BEST_RANK_SCORE_NOT_AVG */ + + } /* if (rlevel_avg_loops > 1) */ + + rlevel_scoreboard[rtt_nom][rodt_ctl][rankx].setting = lmc_rlevel_rank.u; + + } /* for (rodt_ctl = max_rodt_ctl; rodt_ctl >= min_rodt_ctl; --rodt_ctl) */ + } /* for (rankx = 0; rankx < dimm_count*4; rankx++) */ + } /* for (rtt_idx=min_rtt_nom_idx; rtt_idx<max_rtt_nom_idx; ++rtt_idx) */ + + + /* Re-enable dynamic compensation settings. */ + if (rlevel_comp_offset != 0) { + lmc_comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num)); + + lmc_comp_ctl2.s.ptune = 0; + lmc_comp_ctl2.s.ntune = 0; + lmc_comp_ctl2.s.byp = 0; /* Disable bypass mode */ + DRAM_CSR_WRITE(node, BDK_LMCX_COMP_CTL2(ddr_interface_num), lmc_comp_ctl2.u); + BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num)); /* Read once */ + + lmc_comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num)); /* Read again */ + ddr_print("DDR__PTUNE/DDR__NTUNE : %d/%d\n", + lmc_comp_ctl2.s.ddr__ptune, lmc_comp_ctl2.s.ddr__ntune); + + lmc_control.u = BDK_CSR_READ(node, BDK_LMCX_CONTROL(ddr_interface_num)); + lmc_control.s.int_zqcs_dis = saved_int_zqcs_dis; /* Restore original setting */ + DRAM_CSR_WRITE(node, BDK_LMCX_CONTROL(ddr_interface_num), lmc_control.u); + + } + + + { + int override_compensation = 0; + if ((s = lookup_env_parameter("ddr__ptune")) != NULL) { + saved_ddr__ptune = strtoul(s, NULL, 0); + override_compensation = 1; + } + if ((s = lookup_env_parameter("ddr__ntune")) != NULL) { + saved_ddr__ntune = strtoul(s, NULL, 0); + override_compensation = 1; + } + if (override_compensation) { + lmc_comp_ctl2.s.ptune = saved_ddr__ptune; + lmc_comp_ctl2.s.ntune = saved_ddr__ntune; + + lmc_control.u = BDK_CSR_READ(node, BDK_LMCX_CONTROL(ddr_interface_num)); + saved_int_zqcs_dis = lmc_control.s.int_zqcs_dis; + lmc_control.s.int_zqcs_dis = 1; /* Disable ZQCS while in bypass. */ + DRAM_CSR_WRITE(node, BDK_LMCX_CONTROL(ddr_interface_num), lmc_control.u); + + lmc_comp_ctl2.s.byp = 1; /* Enable bypass mode */ + DRAM_CSR_WRITE(node, BDK_LMCX_COMP_CTL2(ddr_interface_num), lmc_comp_ctl2.u); + lmc_comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num)); /* Read again */ + + ddr_print("DDR__PTUNE/DDR__NTUNE : %d/%d\n", + lmc_comp_ctl2.s.ptune, lmc_comp_ctl2.s.ntune); + } + } + { /* Evaluation block */ + int best_rodt_score = DEFAULT_BEST_RANK_SCORE; /* Start with an arbitrarily high score */ + int auto_rodt_ctl = 0; + int auto_rtt_nom = 0; + int rodt_score; + int rodt_row_skip_mask = 0; + + // just add specific RODT rows to the skip mask for DDR4 at this time... + if (ddr_type == DDR4_DRAM) { + rodt_row_skip_mask |= (1 << ddr4_rodt_ctl_34_ohm); // skip RODT row 34 ohms for all DDR4 types + rodt_row_skip_mask |= (1 << ddr4_rodt_ctl_40_ohm); // skip RODT row 40 ohms for all DDR4 types +#if ADD_48_OHM_SKIP + rodt_row_skip_mask |= (1 << ddr4_rodt_ctl_48_ohm); // skip RODT row 48 ohms for all DDR4 types +#endif /* ADD_48OHM_SKIP */ +#if NOSKIP_40_48_OHM + // For now, do not skip RODT row 40 or 48 ohm when ddr_hertz is above 1075 MHz + if (ddr_hertz > 1075000000) { + rodt_row_skip_mask &= ~(1 << ddr4_rodt_ctl_40_ohm); // noskip RODT row 40 ohms + rodt_row_skip_mask &= ~(1 << ddr4_rodt_ctl_48_ohm); // noskip RODT row 48 ohms + } +#endif /* NOSKIP_40_48_OHM */ +#if NOSKIP_48_STACKED + // For now, do not skip RODT row 48 ohm for 2Rx4 stacked die DIMMs + if ((is_stacked_die) && (num_ranks == 2) && (dram_width == 4)) { + rodt_row_skip_mask &= ~(1 << ddr4_rodt_ctl_48_ohm); // noskip RODT row 48 ohms + } +#endif /* NOSKIP_48_STACKED */ +#if NOSKIP_FOR_MINI + // for now, leave all rows eligible when we have mini-DIMMs... + if ((spd_dimm_type == 5) || (spd_dimm_type == 6)) { + rodt_row_skip_mask = 0; + } +#endif /* NOSKIP_FOR_MINI */ +#if NOSKIP_FOR_2S_1R + // for now, leave all rows eligible when we have a 2-slot 1-rank config + if ((dimm_count == 2) && (num_ranks == 1)) { + rodt_row_skip_mask = 0; + } +#endif /* NOSKIP_FOR_2S_1R */ + } + + VB_PRT(VBL_DEV, "Evaluating Read-Leveling Scoreboard for AUTO settings.\n"); + for (rtt_idx=min_rtt_nom_idx; rtt_idx<=max_rtt_nom_idx; ++rtt_idx) { + rtt_nom = imp_values->rtt_nom_table[rtt_idx]; + + /* When the read ODT mask is zero the dyn_rtt_nom_mask is + zero than RTT_NOM will not be changing during + read-leveling. Since the value is fixed we only need + to test it once. */ + if ((dyn_rtt_nom_mask == 0) && (rtt_idx != min_rtt_nom_idx)) + continue; + + for (rodt_ctl=max_rodt_ctl; rodt_ctl>=min_rodt_ctl; --rodt_ctl) { + rodt_score = 0; + for (rankx = 0; rankx < dimm_count * 4;rankx++) { + if (!(rank_mask & (1 << rankx))) + continue; + debug_print("rlevel_scoreboard[rtt_nom=%d][rodt_ctl=%d][rankx=%d].score:%d\n", + rtt_nom, rodt_ctl, rankx, rlevel_scoreboard[rtt_nom][rodt_ctl][rankx].score); + rodt_score += rlevel_scoreboard[rtt_nom][rodt_ctl][rankx].score; + } + // FIXME: do we need to skip RODT rows here, like we do below in the by-RANK settings? + + /* When using automatic ODT settings use the ODT + settings associated with the best score for + all of the tested ODT combinations. */ + + if ((rodt_score < best_rodt_score) || // always take lower score, OR + ((rodt_score == best_rodt_score) && // take same score if RODT ohms are higher + (imp_values->rodt_ohms[rodt_ctl] > imp_values->rodt_ohms[auto_rodt_ctl]))) + { + debug_print("AUTO: new best score for rodt:%d (%3d), new score:%d, previous score:%d\n", + rodt_ctl, imp_values->rodt_ohms[rodt_ctl], rodt_score, best_rodt_score); + best_rodt_score = rodt_score; + auto_rodt_ctl = rodt_ctl; + auto_rtt_nom = rtt_nom; + } + } /* for (rodt_ctl=max_rodt_ctl; rodt_ctl>=min_rodt_ctl; --rodt_ctl) */ + } /* for (rtt_idx=min_rtt_nom_idx; rtt_idx<=max_rtt_nom_idx; ++rtt_idx) */ + + lmc_modereg_params1.u = BDK_CSR_READ(node, BDK_LMCX_MODEREG_PARAMS1(ddr_interface_num)); + + if (ddr_rtt_nom_auto) { + /* Store the automatically set RTT_NOM value */ + if (dyn_rtt_nom_mask & 1) lmc_modereg_params1.s.rtt_nom_00 = auto_rtt_nom; + if (dyn_rtt_nom_mask & 2) lmc_modereg_params1.s.rtt_nom_01 = auto_rtt_nom; + if (dyn_rtt_nom_mask & 4) lmc_modereg_params1.s.rtt_nom_10 = auto_rtt_nom; + if (dyn_rtt_nom_mask & 8) lmc_modereg_params1.s.rtt_nom_11 = auto_rtt_nom; + } else { + /* restore the manual settings to the register */ + lmc_modereg_params1.s.rtt_nom_00 = default_rtt_nom[0]; + lmc_modereg_params1.s.rtt_nom_01 = default_rtt_nom[1]; + lmc_modereg_params1.s.rtt_nom_10 = default_rtt_nom[2]; + lmc_modereg_params1.s.rtt_nom_11 = default_rtt_nom[3]; + } + + DRAM_CSR_WRITE(node, BDK_LMCX_MODEREG_PARAMS1(ddr_interface_num), lmc_modereg_params1.u); + VB_PRT(VBL_DEV, "RTT_NOM %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n", + imp_values->rtt_nom_ohms[lmc_modereg_params1.s.rtt_nom_11], + imp_values->rtt_nom_ohms[lmc_modereg_params1.s.rtt_nom_10], + imp_values->rtt_nom_ohms[lmc_modereg_params1.s.rtt_nom_01], + imp_values->rtt_nom_ohms[lmc_modereg_params1.s.rtt_nom_00], + lmc_modereg_params1.s.rtt_nom_11, + lmc_modereg_params1.s.rtt_nom_10, + lmc_modereg_params1.s.rtt_nom_01, + lmc_modereg_params1.s.rtt_nom_00); + + VB_PRT(VBL_DEV, "RTT_WR %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n", + imp_values->rtt_wr_ohms[EXTR_WR(lmc_modereg_params1.u, 3)], + imp_values->rtt_wr_ohms[EXTR_WR(lmc_modereg_params1.u, 2)], + imp_values->rtt_wr_ohms[EXTR_WR(lmc_modereg_params1.u, 1)], + imp_values->rtt_wr_ohms[EXTR_WR(lmc_modereg_params1.u, 0)], + EXTR_WR(lmc_modereg_params1.u, 3), + EXTR_WR(lmc_modereg_params1.u, 2), + EXTR_WR(lmc_modereg_params1.u, 1), + EXTR_WR(lmc_modereg_params1.u, 0)); + + VB_PRT(VBL_DEV, "DIC %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n", + imp_values->dic_ohms[lmc_modereg_params1.s.dic_11], + imp_values->dic_ohms[lmc_modereg_params1.s.dic_10], + imp_values->dic_ohms[lmc_modereg_params1.s.dic_01], + imp_values->dic_ohms[lmc_modereg_params1.s.dic_00], + lmc_modereg_params1.s.dic_11, + lmc_modereg_params1.s.dic_10, + lmc_modereg_params1.s.dic_01, + lmc_modereg_params1.s.dic_00); + + if (ddr_type == DDR4_DRAM) { + bdk_lmcx_modereg_params2_t lmc_modereg_params2; + /* + * We must read the CSR, and not depend on odt_config[odt_idx].odt_mask2, + * since we could have overridden values with envvars. + * NOTE: this corrects the printout, since the CSR is not written with the old values... + */ + lmc_modereg_params2.u = BDK_CSR_READ(node, BDK_LMCX_MODEREG_PARAMS2(ddr_interface_num)); + + VB_PRT(VBL_DEV, "RTT_PARK %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n", + imp_values->rtt_nom_ohms[lmc_modereg_params2.s.rtt_park_11], + imp_values->rtt_nom_ohms[lmc_modereg_params2.s.rtt_park_10], + imp_values->rtt_nom_ohms[lmc_modereg_params2.s.rtt_park_01], + imp_values->rtt_nom_ohms[lmc_modereg_params2.s.rtt_park_00], + lmc_modereg_params2.s.rtt_park_11, + lmc_modereg_params2.s.rtt_park_10, + lmc_modereg_params2.s.rtt_park_01, + lmc_modereg_params2.s.rtt_park_00); + + VB_PRT(VBL_DEV, "%-45s : 0x%x,0x%x,0x%x,0x%x\n", "VREF_RANGE", + lmc_modereg_params2.s.vref_range_11, + lmc_modereg_params2.s.vref_range_10, + lmc_modereg_params2.s.vref_range_01, + lmc_modereg_params2.s.vref_range_00); + + VB_PRT(VBL_DEV, "%-45s : 0x%x,0x%x,0x%x,0x%x\n", "VREF_VALUE", + lmc_modereg_params2.s.vref_value_11, + lmc_modereg_params2.s.vref_value_10, + lmc_modereg_params2.s.vref_value_01, + lmc_modereg_params2.s.vref_value_00); + } + + lmc_comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num)); + if (ddr_rodt_ctl_auto) + lmc_comp_ctl2.s.rodt_ctl = auto_rodt_ctl; + else + lmc_comp_ctl2.s.rodt_ctl = default_rodt_ctl; // back to the original setting + DRAM_CSR_WRITE(node, BDK_LMCX_COMP_CTL2(ddr_interface_num), lmc_comp_ctl2.u); + lmc_comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num)); + VB_PRT(VBL_DEV, "Read ODT_CTL : 0x%x (%d ohms)\n", + lmc_comp_ctl2.s.rodt_ctl, imp_values->rodt_ohms[lmc_comp_ctl2.s.rodt_ctl]); + + ////////////////// this is the start of the RANK MAJOR LOOP + + for (rankx = 0; rankx < dimm_count * 4; rankx++) { + int best_rank_score = DEFAULT_BEST_RANK_SCORE; /* Start with an arbitrarily high score */ + int best_rank_rtt_nom = 0; + //int best_rank_nom_ohms = 0; + int best_rank_ctl = 0; + int best_rank_ohms = 0; + int best_rankx = 0; + + if (!(rank_mask & (1 << rankx))) + continue; + + /* Use the delays associated with the best score for each individual rank */ + VB_PRT(VBL_TME, "Evaluating Read-Leveling Scoreboard for Rank %d settings.\n", rankx); + + // some of the rank-related loops below need to operate only on the ranks of a single DIMM, + // so create a mask for their use here + int dimm_rank_mask; + if (num_ranks == 4) + dimm_rank_mask = rank_mask; // should be 1111 + else { + dimm_rank_mask = rank_mask & 3; // should be 01 or 11 + if (rankx >= 2) + dimm_rank_mask <<= 2; // doing a rank on the second DIMM, should be 0100 or 1100 + } + debug_print("DIMM rank mask: 0x%x, rank mask: 0x%x, rankx: %d\n", dimm_rank_mask, rank_mask, rankx); + + ////////////////// this is the start of the BEST ROW SCORE LOOP + + for (rtt_idx = min_rtt_nom_idx; rtt_idx <= max_rtt_nom_idx; ++rtt_idx) { + //int rtt_nom_ohms; + rtt_nom = imp_values->rtt_nom_table[rtt_idx]; + //rtt_nom_ohms = imp_values->rtt_nom_ohms[rtt_nom]; + + /* When the read ODT mask is zero the dyn_rtt_nom_mask is + zero than RTT_NOM will not be changing during + read-leveling. Since the value is fixed we only need + to test it once. */ + if ((dyn_rtt_nom_mask == 0) && (rtt_idx != min_rtt_nom_idx)) + continue; + + debug_print("N%d.LMC%d.R%d: starting RTT_NOM %d (%d)\n", + node, ddr_interface_num, rankx, rtt_nom, rtt_nom_ohms); + + for (rodt_ctl = max_rodt_ctl; rodt_ctl >= min_rodt_ctl; --rodt_ctl) { + int next_ohms = imp_values->rodt_ohms[rodt_ctl]; + + // skip RODT rows in mask, but *NOT* rows with too high a score; + // we will not use the skipped ones for printing or evaluating, but + // we need to allow all the non-skipped ones to be candidates for "best" + if (((1 << rodt_ctl) & rodt_row_skip_mask) != 0) { + debug_print("N%d.LMC%d.R%d: SKIPPING rodt:%d (%d) with rank_score:%d\n", + node, ddr_interface_num, rankx, rodt_ctl, next_ohms, next_score); + continue; + } + for (int orankx = 0; orankx < dimm_count * 4; orankx++) { // this is ROFFIX-0528 + if (!(dimm_rank_mask & (1 << orankx))) // stay on the same DIMM + continue; + + int next_score = rlevel_scoreboard[rtt_nom][rodt_ctl][orankx].score; + + if (next_score > best_rank_score) // always skip a higher score + continue; + if (next_score == best_rank_score) { // if scores are equal + if (next_ohms < best_rank_ohms) // always skip lower ohms + continue; + if (next_ohms == best_rank_ohms) { // if same ohms + if (orankx != rankx) // always skip the other rank(s) + continue; + } + // else next_ohms are greater, always choose it + } + // else next_score is less than current best, so always choose it + VB_PRT(VBL_DEV2, "N%d.LMC%d.R%d: new best score: rank %d, rodt %d(%3d), new best %d, previous best %d(%d)\n", + node, ddr_interface_num, rankx, orankx, rodt_ctl, next_ohms, next_score, + best_rank_score, best_rank_ohms); + best_rank_score = next_score; + best_rank_rtt_nom = rtt_nom; + //best_rank_nom_ohms = rtt_nom_ohms; + best_rank_ctl = rodt_ctl; + best_rank_ohms = next_ohms; + best_rankx = orankx; + lmc_rlevel_rank.u = rlevel_scoreboard[rtt_nom][rodt_ctl][orankx].setting; + + } /* for (int orankx = 0; orankx < dimm_count * 4; orankx++) */ + } /* for (rodt_ctl = max_rodt_ctl; rodt_ctl >= min_rodt_ctl; --rodt_ctl) */ + } /* for (rtt_idx = min_rtt_nom_idx; rtt_idx <= max_rtt_nom_idx; ++rtt_idx) */ + + ////////////////// this is the end of the BEST ROW SCORE LOOP + + // DANGER, Will Robinson!! Abort now if we did not find a best score at all... + if (best_rank_score == DEFAULT_BEST_RANK_SCORE) { + error_print("WARNING: no best rank score found for N%d.LMC%d.R%d - resetting node...\n", + node, ddr_interface_num, rankx); + bdk_wait_usec(500000); + bdk_reset_chip(node); + } + + // FIXME: relative now, but still arbitrary... + // halve the range if 2 DIMMs unless they are single rank... + int MAX_RANK_SCORE = best_rank_score; + MAX_RANK_SCORE += (MAX_RANK_SCORE_LIMIT / ((num_ranks > 1) ? dimm_count : 1)); + + if (!ecc_ena){ + lmc_rlevel_rank.cn83xx.byte8 = lmc_rlevel_rank.cn83xx.byte0; /* ECC is not used */ + } + + // at the end, write the best row settings to the current rank + DRAM_CSR_WRITE(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx), lmc_rlevel_rank.u); + lmc_rlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx)); + + bdk_lmcx_rlevel_rankx_t saved_rlevel_rank; + saved_rlevel_rank.u = lmc_rlevel_rank.u; + + ////////////////// this is the start of the PRINT LOOP + + // for pass==0, print current rank, pass==1 print other rank(s) + // this is done because we want to show each ranks RODT values together, not interlaced +#if COUNT_RL_CANDIDATES + // keep separates for ranks - pass=0 target rank, pass=1 other rank on DIMM + int mask_skipped[2] = {0,0}; + int score_skipped[2] = {0,0}; + int selected_rows[2] = {0,0}; + int zero_scores[2] = {0,0}; +#endif /* COUNT_RL_CANDIDATES */ + for (int pass = 0; pass < 2; pass++ ) { + for (int orankx = 0; orankx < dimm_count * 4; orankx++) { + if (!(dimm_rank_mask & (1 << orankx))) // stay on the same DIMM + continue; + + if (((pass == 0) && (orankx != rankx)) || ((pass != 0) && (orankx == rankx))) + continue; + + for (rtt_idx = min_rtt_nom_idx; rtt_idx <= max_rtt_nom_idx; ++rtt_idx) { + rtt_nom = imp_values->rtt_nom_table[rtt_idx]; + if (dyn_rtt_nom_mask == 0) { + print_nom_ohms = -1; + if (rtt_idx != min_rtt_nom_idx) + continue; + } else { + print_nom_ohms = imp_values->rtt_nom_ohms[rtt_nom]; + } + + // cycle through all the RODT values... + for (rodt_ctl = max_rodt_ctl; rodt_ctl >= min_rodt_ctl; --rodt_ctl) { + bdk_lmcx_rlevel_rankx_t temp_rlevel_rank; + int temp_score = rlevel_scoreboard[rtt_nom][rodt_ctl][orankx].score; + temp_rlevel_rank.u = rlevel_scoreboard[rtt_nom][rodt_ctl][orankx].setting; + + // skip RODT rows in mask, or rows with too high a score; + // we will not use them for printing or evaluating... +#if COUNT_RL_CANDIDATES + int skip_row; + if ((1 << rodt_ctl) & rodt_row_skip_mask) { + skip_row = WITH_RODT_SKIPPING; + ++mask_skipped[pass]; + } else if (temp_score > MAX_RANK_SCORE) { + skip_row = WITH_RODT_SKIPPING; + ++score_skipped[pass]; + } else { + skip_row = WITH_RODT_BLANK; + ++selected_rows[pass]; + if (temp_score == 0) + ++zero_scores[pass]; + } + +#else /* COUNT_RL_CANDIDATES */ + int skip_row = (((1 << rodt_ctl) & rodt_row_skip_mask) || (temp_score > MAX_RANK_SCORE)) + ? WITH_RODT_SKIPPING: WITH_RODT_BLANK; +#endif /* COUNT_RL_CANDIDATES */ + + // identify and print the BEST ROW when it comes up + if ((skip_row == WITH_RODT_BLANK) && + (best_rankx == orankx) && + (best_rank_rtt_nom == rtt_nom) && + (best_rank_ctl == rodt_ctl)) + { + skip_row = WITH_RODT_BESTROW; + } + + display_RL_with_RODT(node, ddr_interface_num, + temp_rlevel_rank, orankx, temp_score, + print_nom_ohms, + imp_values->rodt_ohms[rodt_ctl], + skip_row); + + } /* for (rodt_ctl = max_rodt_ctl; rodt_ctl >= min_rodt_ctl; --rodt_ctl) */ + } /* for (rtt_idx=min_rtt_nom_idx; rtt_idx<=max_rtt_nom_idx; ++rtt_idx) */ + } /* for (int orankx = 0; orankx < dimm_count * 4; orankx++) { */ + } /* for (int pass = 0; pass < 2; pass++ ) */ +#if COUNT_RL_CANDIDATES + VB_PRT(VBL_TME, "N%d.LMC%d.R%d: RLROWS: selected %d+%d, zero_scores %d+%d, mask_skipped %d+%d, score_skipped %d+%d\n", + node, ddr_interface_num, rankx, + selected_rows[0], selected_rows[1], + zero_scores[0], zero_scores[1], + mask_skipped[0], mask_skipped[1], + score_skipped[0], score_skipped[1]); +#endif /* COUNT_RL_CANDIDATES */ + + ////////////////// this is the end of the PRINT LOOP + + // now evaluate which bytes need adjusting + uint64_t byte_msk = 0x3f; // 6-bit fields + uint64_t best_byte, new_byte, temp_byte, orig_best_byte; + + uint64_t rank_best_bytes[9]; // collect the new byte values; first init with current best for neighbor use + for (int byte_idx = 0, byte_sh = 0; byte_idx < 8+ecc_ena; byte_idx++, byte_sh += 6) { + rank_best_bytes[byte_idx] = (lmc_rlevel_rank.u >> byte_sh) & byte_msk; + } + + ////////////////// this is the start of the BEST BYTE LOOP + + for (int byte_idx = 0, byte_sh = 0; byte_idx < 8+ecc_ena; byte_idx++, byte_sh += 6) { + best_byte = orig_best_byte = rank_best_bytes[byte_idx]; + + ////////////////// this is the start of the BEST BYTE AVERAGING LOOP + + // validate the initial "best" byte by looking at the average of the unskipped byte-column entries + // we want to do this before we go further, so we can try to start with a better initial value + // this is the so-called "BESTBUY" patch set + int sum = 0, count = 0; + + for (rtt_idx = min_rtt_nom_idx; rtt_idx <= max_rtt_nom_idx; ++rtt_idx) { + rtt_nom = imp_values->rtt_nom_table[rtt_idx]; + if ((dyn_rtt_nom_mask == 0) && (rtt_idx != min_rtt_nom_idx)) + continue; + + for (rodt_ctl = max_rodt_ctl; rodt_ctl >= min_rodt_ctl; --rodt_ctl) { + bdk_lmcx_rlevel_rankx_t temp_rlevel_rank; + int temp_score; + for (int orankx = 0; orankx < dimm_count * 4; orankx++) { // average over all the ranks + if (!(dimm_rank_mask & (1 << orankx))) // stay on the same DIMM + continue; + temp_score = rlevel_scoreboard[rtt_nom][rodt_ctl][orankx].score; + // skip RODT rows in mask, or rows with too high a score; + // we will not use them for printing or evaluating... + + if (!((1 << rodt_ctl) & rodt_row_skip_mask) && + (temp_score <= MAX_RANK_SCORE)) + { + temp_rlevel_rank.u = rlevel_scoreboard[rtt_nom][rodt_ctl][orankx].setting; + temp_byte = (temp_rlevel_rank.u >> byte_sh) & byte_msk; + sum += temp_byte; + count++; + } + } /* for (int orankx = 0; orankx < dimm_count * 4; orankx++) */ + } /* for (rodt_ctl = max_rodt_ctl; rodt_ctl >= min_rodt_ctl; --rodt_ctl) */ + } /* for (rtt_idx=min_rtt_nom_idx; rtt_idx<=max_rtt_nom_idx; ++rtt_idx) */ + + ////////////////// this is the end of the BEST BYTE AVERAGING LOOP + + + uint64_t avg_byte = divide_nint(sum, count); // FIXME: validate count and sum?? + int avg_diff = (int)best_byte - (int)avg_byte; + new_byte = best_byte; + if (avg_diff != 0) { + // bump best up/dn by 1, not necessarily all the way to avg + new_byte = best_byte + ((avg_diff > 0) ? -1: 1); + } + + VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: START: Byte %d: best %d is different by %d from average %d, using %d.\n", + node, ddr_interface_num, rankx, + byte_idx, (int)best_byte, avg_diff, (int)avg_byte, (int)new_byte); + best_byte = new_byte; + + // At this point best_byte is either: + // 1. the original byte-column value from the best scoring RODT row, OR + // 2. that value bumped toward the average of all the byte-column values + // + // best_byte will not change from here on... + + ////////////////// this is the start of the BEST BYTE COUNTING LOOP + + // NOTE: we do this next loop separately from above, because we count relative to "best_byte" + // which may have been modified by the above averaging operation... + // + // Also, the above only moves toward the average by +- 1, so that we will always have a count + // of at least 1 for the original best byte, even if all the others are further away and not counted; + // this ensures we will go back to the original if no others are counted... + // FIXME: this could cause issue if the range of values for a byte-lane are too disparate... + int count_less = 0, count_same = 0, count_more = 0; +#if FAILSAFE_CHECK + uint64_t count_byte = new_byte; // save the value we will count around +#endif /* FAILSAFE_CHECK */ +#if RANK_MAJORITY + int rank_less = 0, rank_same = 0, rank_more = 0; +#endif /* RANK_MAJORITY */ + + for (rtt_idx = min_rtt_nom_idx; rtt_idx <= max_rtt_nom_idx; ++rtt_idx) { + rtt_nom = imp_values->rtt_nom_table[rtt_idx]; + if ((dyn_rtt_nom_mask == 0) && (rtt_idx != min_rtt_nom_idx)) + continue; + + for (rodt_ctl = max_rodt_ctl; rodt_ctl >= min_rodt_ctl; --rodt_ctl) { + bdk_lmcx_rlevel_rankx_t temp_rlevel_rank; + int temp_score; + for (int orankx = 0; orankx < dimm_count * 4; orankx++) { // count over all the ranks + if (!(dimm_rank_mask & (1 << orankx))) // stay on the same DIMM + continue; + temp_score = rlevel_scoreboard[rtt_nom][rodt_ctl][orankx].score; + // skip RODT rows in mask, or rows with too high a score; + // we will not use them for printing or evaluating... + if (((1 << rodt_ctl) & rodt_row_skip_mask) || + (temp_score > MAX_RANK_SCORE)) + { + continue; + } + temp_rlevel_rank.u = rlevel_scoreboard[rtt_nom][rodt_ctl][orankx].setting; + temp_byte = (temp_rlevel_rank.u >> byte_sh) & byte_msk; + + if (temp_byte == 0) // do not count it if illegal + continue; + else if (temp_byte == best_byte) + count_same++; + else if (temp_byte == best_byte - 1) + count_less++; + else if (temp_byte == best_byte + 1) + count_more++; + // else do not count anything more than 1 away from the best +#if RANK_MAJORITY + // FIXME? count is relative to best_byte; should it be rank-based? + if (orankx != rankx) // rank counts only on main rank + continue; + else if (temp_byte == best_byte) + rank_same++; + else if (temp_byte == best_byte - 1) + rank_less++; + else if (temp_byte == best_byte + 1) + rank_more++; +#endif /* RANK_MAJORITY */ + } /* for (int orankx = 0; orankx < dimm_count * 4; orankx++) */ + } /* for (rodt_ctl = max_rodt_ctl; rodt_ctl >= min_rodt_ctl; --rodt_ctl) */ + } /* for (rtt_idx=min_rtt_nom_idx; rtt_idx<=max_rtt_nom_idx; ++rtt_idx) */ + +#if RANK_MAJORITY + VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: COUNT: Byte %d: orig %d now %d, more %d same %d less %d (%d/%d/%d)\n", + node, ddr_interface_num, rankx, + byte_idx, (int)orig_best_byte, (int)best_byte, + count_more, count_same, count_less, + rank_more, rank_same, rank_less); +#else /* RANK_MAJORITY */ + VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: COUNT: Byte %d: orig %d now %d, more %d same %d less %d\n", + node, ddr_interface_num, rankx, + byte_idx, (int)orig_best_byte, (int)best_byte, + count_more, count_same, count_less); +#endif /* RANK_MAJORITY */ + ////////////////// this is the end of the BEST BYTE COUNTING LOOP + + // choose the new byte value + // we need to check that there is no gap greater than 2 between adjacent bytes + // (adjacency depends on DIMM type) + // use the neighbor value to help decide + // initially, the rank_best_bytes[] will contain values from the chosen lowest score rank + new_byte = 0; + + // neighbor is index-1 unless we are index 0 or index 8 (ECC) + int neighbor = (byte_idx == 8) ? 3 : ((byte_idx == 0) ? 1 : byte_idx - 1); + uint64_t neigh_byte = rank_best_bytes[neighbor]; + + + // can go up or down or stay the same, so look at a numeric average to help + new_byte = divide_nint(((count_more * (best_byte + 1)) + + (count_same * (best_byte + 0)) + + (count_less * (best_byte - 1))), + max(1, (count_more + count_same + count_less))); + + // use neighbor to help choose with average + if ((byte_idx > 0) && (_abs(neigh_byte - new_byte) > 2)) // but not for byte 0 + { + uint64_t avg_pick = new_byte; + if ((new_byte - best_byte) != 0) + new_byte = best_byte; // back to best, average did not get better + else // avg was the same, still too far, now move it towards the neighbor + new_byte += (neigh_byte > new_byte) ? 1 : -1; + + VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: AVERAGE: Byte %d: neighbor %d too different %d from average %d, picking %d.\n", + node, ddr_interface_num, rankx, + byte_idx, neighbor, (int)neigh_byte, (int)avg_pick, (int)new_byte); + } +#if MAJORITY_OVER_AVG + // NOTE: + // For now, we let the neighbor processing above trump the new simple majority processing here. + // This is mostly because we have seen no smoking gun for a neighbor bad choice (yet?). + // Also note that we will ALWAYS be using byte 0 majority, because of the if clause above. + else { + // majority is dependent on the counts, which are relative to best_byte, so start there + uint64_t maj_byte = best_byte; + if ((count_more > count_same) && (count_more > count_less)) { + maj_byte++; + } else if ((count_less > count_same) && (count_less > count_more)) { + maj_byte--; + } + if (maj_byte != new_byte) { + // print only when majority choice is different from average + VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: MAJORTY: Byte %d: picking majority of %d over average %d.\n", + node, ddr_interface_num, rankx, + byte_idx, (int)maj_byte, (int)new_byte); + new_byte = maj_byte; + } else { + VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: AVERAGE: Byte %d: picking average of %d.\n", + node, ddr_interface_num, rankx, + byte_idx, (int)new_byte); + } +#if RANK_MAJORITY + // rank majority is dependent on the rank counts, which are relative to best_byte, + // so start there, and adjust according to the rank counts majority + uint64_t rank_maj = best_byte; + if ((rank_more > rank_same) && (rank_more > rank_less)) { + rank_maj++; + } else if ((rank_less > rank_same) && (rank_less > rank_more)) { + rank_maj--; + } + int rank_sum = rank_more + rank_same + rank_less; + + // now, let rank majority possibly rule over the current new_byte however we got it + if (rank_maj != new_byte) { // only if different + // Here is where we decide whether to completely apply RANK_MAJORITY or not + // FIXME: For the moment, we do it ONLY when running 2-slot configs + // FIXME? or when rank_sum is big enough? + if ((dimm_count > 1) || (rank_sum > 2)) { + // print only when rank majority choice is selected + VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: RANKMAJ: Byte %d: picking %d over %d.\n", + node, ddr_interface_num, rankx, + byte_idx, (int)rank_maj, (int)new_byte); + new_byte = rank_maj; + } else { // FIXME: print some info when we could have chosen RANKMAJ but did not + VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: RANKMAJ: Byte %d: NOT using %d over %d (best=%d,sum=%d).\n", + node, ddr_interface_num, rankx, + byte_idx, (int)rank_maj, (int)new_byte, + (int)best_byte, rank_sum); + } + } +#endif /* RANK_MAJORITY */ + } +#else + else { + VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: AVERAGE: Byte %d: picking average of %d.\n", + node, ddr_interface_num, rankx, + byte_idx, (int)new_byte); + } +#endif +#if FAILSAFE_CHECK + // one last check: + // if new_byte is still count_byte, BUT there was no count for that value, DO SOMETHING!!! + // FIXME: go back to original best byte from the best row + if ((new_byte == count_byte) && (count_same == 0)) { + new_byte = orig_best_byte; + VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: FAILSAF: Byte %d: going back to original %d.\n", + node, ddr_interface_num, rankx, + byte_idx, (int)new_byte); + } +#endif /* FAILSAFE_CHECK */ +#if PERFECT_BITMASK_COUNTING + // Look at counts for "perfect" bitmasks if we had any for this byte-lane. + // Remember, we only counted for DDR4, so zero means none or DDR3, and we bypass this... + if (rank_perfect_counts[rankx].total[byte_idx] > 0) { + // FIXME: should be more error checking, look for ties, etc... + int i, delay_count, delay_value, delay_max; + uint32_t ties; + delay_value = -1; + delay_max = 0; + ties = 0; + + for (i = 0; i < 32; i++) { + delay_count = rank_perfect_counts[rankx].count[byte_idx][i]; + if (delay_count > 0) { // only look closer if there are any,,, + if (delay_count > delay_max) { + delay_max = delay_count; + delay_value = i; + ties = 0; // reset ties to none + } else if (delay_count == delay_max) { + if (ties == 0) + ties = 1UL << delay_value; // put in original value + ties |= 1UL << i; // add new value + } + } + } /* for (i = 0; i < 32; i++) */ + + if (delay_value >= 0) { + if (ties != 0) { + if (ties & (1UL << (int)new_byte)) { + // leave choice as new_byte if any tied one is the same... + + + delay_value = (int)new_byte; + VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: PERFECT: Byte %d: TIES (0x%x) INCLUDED %d (%d)\n", + node, ddr_interface_num, rankx, byte_idx, ties, (int)new_byte, delay_max); + } else { + // FIXME: should choose a perfect one!!! + // FIXME: for now, leave the choice as new_byte + delay_value = (int)new_byte; + VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: PERFECT: Byte %d: TIES (0x%x) OMITTED %d (%d)\n", + node, ddr_interface_num, rankx, byte_idx, ties, (int)new_byte, delay_max); + } + } /* if (ties != 0) */ + + if (delay_value != (int)new_byte) { + delay_count = rank_perfect_counts[rankx].count[byte_idx][(int)new_byte]; + VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: PERFECT: Byte %d: DIFF from %d (%d), USING %d (%d)\n", + node, ddr_interface_num, rankx, byte_idx, (int)new_byte, + delay_count, delay_value, delay_max); + new_byte = (uint64_t)delay_value; // FIXME: make this optional via envvar? + } else { + debug_print("N%d.LMC%d.R%d: PERFECT: Byte %d: SAME as %d (%d)\n", + node, ddr_interface_num, rankx, byte_idx, new_byte, delay_max); + } + } + } /* if (rank_perfect_counts[rankx].total[byte_idx] > 0) */ + else { + if (ddr_type == DDR4_DRAM) { // only report when DDR4 + // FIXME: remove or increase VBL for this output... + VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: PERFECT: Byte %d: ZERO perfect bitmasks\n", + node, ddr_interface_num, rankx, byte_idx); + } + } /* if (rank_perfect_counts[rankx].total[byte_idx] > 0) */ +#endif /* PERFECT_BITMASK_COUNTING */ + + VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: SUMMARY: Byte %d: %s: orig %d now %d, more %d same %d less %d, using %d\n", + node, ddr_interface_num, rankx, + byte_idx, "AVG", (int)orig_best_byte, + (int)best_byte, count_more, count_same, count_less, (int)new_byte); + + // update the byte with the new value (NOTE: orig value in the CSR may not be current "best") + lmc_rlevel_rank.u &= ~(byte_msk << byte_sh); + lmc_rlevel_rank.u |= (new_byte << byte_sh); + + rank_best_bytes[byte_idx] = new_byte; // save new best for neighbor use + + } /* for (byte_idx = 0; byte_idx < 8+ecc_ena; byte_idx++) */ + + ////////////////// this is the end of the BEST BYTE LOOP + + if (saved_rlevel_rank.u != lmc_rlevel_rank.u) { + DRAM_CSR_WRITE(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx), lmc_rlevel_rank.u); + lmc_rlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx)); + debug_print("Adjusting Read-Leveling per-RANK settings.\n"); + } else { + debug_print("Not Adjusting Read-Leveling per-RANK settings.\n"); + } + display_RL_with_final(node, ddr_interface_num, lmc_rlevel_rank, rankx); + +#if RLEXTRAS_PATCH +#define RLEVEL_RANKX_EXTRAS_INCR 4 + if ((rank_mask & 0x0F) != 0x0F) { // if there are unused entries to be filled + bdk_lmcx_rlevel_rankx_t temp_rlevel_rank = lmc_rlevel_rank; // copy the current rank + int byte, delay; + if (rankx < 3) { + debug_print("N%d.LMC%d.R%d: checking for RLEVEL_RANK unused entries.\n", + node, ddr_interface_num, rankx); + for (byte = 0; byte < 9; byte++) { // modify the copy in prep for writing to empty slot(s) + delay = get_rlevel_rank_struct(&temp_rlevel_rank, byte) + RLEVEL_RANKX_EXTRAS_INCR; + if (delay > (int)RLEVEL_BYTE_MSK) delay = RLEVEL_BYTE_MSK; + update_rlevel_rank_struct(&temp_rlevel_rank, byte, delay); + } + if (rankx == 0) { // if rank 0, write rank 1 and rank 2 here if empty + if (!(rank_mask & (1<<1))) { // check that rank 1 is empty + VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: writing RLEVEL_RANK unused entry R%d.\n", + node, ddr_interface_num, rankx, 1); + DRAM_CSR_WRITE(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, 1), temp_rlevel_rank.u); + } + if (!(rank_mask & (1<<2))) { // check that rank 2 is empty + VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: writing RLEVEL_RANK unused entry R%d.\n", + node, ddr_interface_num, rankx, 2); + DRAM_CSR_WRITE(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, 2), temp_rlevel_rank.u); + } + } + // if ranks 0, 1 or 2, write rank 3 here if empty + if (!(rank_mask & (1<<3))) { // check that rank 3 is empty + VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: writing RLEVEL_RANK unused entry R%d.\n", + node, ddr_interface_num, rankx, 3); + DRAM_CSR_WRITE(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, 3), temp_rlevel_rank.u); + } + } + } +#endif /* RLEXTRAS_PATCH */ + } /* for (rankx = 0; rankx < dimm_count * 4; rankx++) */ + + ////////////////// this is the end of the RANK MAJOR LOOP + + } /* Evaluation block */ + } /* while(rlevel_debug_loops--) */ + + lmc_control.s.ddr2t = save_ddr2t; + DRAM_CSR_WRITE(node, BDK_LMCX_CONTROL(ddr_interface_num), lmc_control.u); + lmc_control.u = BDK_CSR_READ(node, BDK_LMCX_CONTROL(ddr_interface_num)); + ddr_print("%-45s : %6d\n", "DDR2T", lmc_control.s.ddr2t); /* Display final 2T value */ + + + perform_ddr_init_sequence(node, rank_mask, ddr_interface_num); + + for (rankx = 0; rankx < dimm_count * 4;rankx++) { + uint64_t value; + int parameter_set = 0; + if (!(rank_mask & (1 << rankx))) + continue; + + lmc_rlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx)); + + for (i=0; i<9; ++i) { + if ((s = lookup_env_parameter("ddr%d_rlevel_rank%d_byte%d", ddr_interface_num, rankx, i)) != NULL) { + parameter_set |= 1; + value = strtoul(s, NULL, 0); + + update_rlevel_rank_struct(&lmc_rlevel_rank, i, value); + } + } + + if ((s = lookup_env_parameter_ull("ddr%d_rlevel_rank%d", ddr_interface_num, rankx)) != NULL) { + parameter_set |= 1; + value = strtoull(s, NULL, 0); + lmc_rlevel_rank.u = value; + } + + + if (bdk_is_platform(BDK_PLATFORM_ASIM)) { + parameter_set |= 1; + + lmc_rlevel_rank.cn83xx.byte8 = 3; + lmc_rlevel_rank.cn83xx.byte7 = 3; + lmc_rlevel_rank.cn83xx.byte6 = 3; + lmc_rlevel_rank.cn83xx.byte5 = 3; + lmc_rlevel_rank.cn83xx.byte4 = 3; + lmc_rlevel_rank.cn83xx.byte3 = 3; + lmc_rlevel_rank.cn83xx.byte2 = 3; + lmc_rlevel_rank.cn83xx.byte1 = 3; + lmc_rlevel_rank.cn83xx.byte0 = 3; + } + + if (parameter_set) { + DRAM_CSR_WRITE(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx), lmc_rlevel_rank.u); + display_RL(node, ddr_interface_num, lmc_rlevel_rank, rankx); + } + } + } + + /* Workaround Trcd overflow by using Additive latency. */ + if (CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X)) + { + bdk_lmcx_modereg_params0_t lmc_modereg_params0; + bdk_lmcx_timing_params1_t lmc_timing_params1; + bdk_lmcx_control_t lmc_control; + int rankx; + + lmc_timing_params1.u = BDK_CSR_READ(node, BDK_LMCX_TIMING_PARAMS1(ddr_interface_num)); + lmc_modereg_params0.u = BDK_CSR_READ(node, BDK_LMCX_MODEREG_PARAMS0(ddr_interface_num)); + lmc_control.u = BDK_CSR_READ(node, BDK_LMCX_CONTROL(ddr_interface_num)); + + if (lmc_timing_params1.s.trcd == 0) { + ddr_print("Workaround Trcd overflow by using Additive latency.\n"); + lmc_timing_params1.s.trcd = 12; /* Hard code this to 12 and enable additive latency */ + lmc_modereg_params0.s.al = 2; /* CL-2 */ + lmc_control.s.pocas = 1; + + ddr_print("MODEREG_PARAMS0 : 0x%016lx\n", lmc_modereg_params0.u); + DRAM_CSR_WRITE(node, BDK_LMCX_MODEREG_PARAMS0(ddr_interface_num), lmc_modereg_params0.u); + ddr_print("TIMING_PARAMS1 : 0x%016lx\n", lmc_timing_params1.u); + DRAM_CSR_WRITE(node, BDK_LMCX_TIMING_PARAMS1(ddr_interface_num), lmc_timing_params1.u); + + ddr_print("LMC_CONTROL : 0x%016lx\n", lmc_control.u); + DRAM_CSR_WRITE(node, BDK_LMCX_CONTROL(ddr_interface_num), lmc_control.u); + + for (rankx = 0; rankx < dimm_count * 4; rankx++) { + if (!(rank_mask & (1 << rankx))) + continue; + + ddr4_mrw(node, ddr_interface_num, rankx, -1, 1, 0); /* MR1 */ + } + } + } + + // this is here just for output, to allow check of the Deskew settings one last time... + if (! disable_deskew_training) { + deskew_counts_t dsk_counts; + VB_PRT(VBL_TME, "N%d.LMC%d: Check Deskew Settings before software Write-Leveling.\n", + node, ddr_interface_num); + Validate_Read_Deskew_Training(node, rank_mask, ddr_interface_num, &dsk_counts, VBL_TME); // TME for FINAL + } + + + /* Workaround Errata 26304 (T88@2.0) + + When the CSRs LMCX_DLL_CTL3[WR_DESKEW_ENA] = 1 AND + LMCX_PHY_CTL2[DQS[0..8]_DSK_ADJ] > 4, set + LMCX_EXT_CONFIG[DRIVE_ENA_BPRCH] = 1. + */ + if (CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS2_X)) { // only for 88XX pass 2, not 81xx or 83xx + bdk_lmcx_dll_ctl3_t dll_ctl3; + bdk_lmcx_phy_ctl2_t phy_ctl2; + bdk_lmcx_ext_config_t ext_config; + int increased_dsk_adj = 0; + int byte; + + phy_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_PHY_CTL2(ddr_interface_num)); + ext_config.u = BDK_CSR_READ(node, BDK_LMCX_EXT_CONFIG(ddr_interface_num)); + dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num)); + + for (byte = 0; byte < 8; ++byte) { + if (!(ddr_interface_bytemask&(1<<byte))) + continue; + increased_dsk_adj |= (((phy_ctl2.u >> (byte*3)) & 0x7) > 4); + } + + if ((dll_ctl3.s.wr_deskew_ena == 1) && increased_dsk_adj) { + ext_config.s.drive_ena_bprch = 1; + DRAM_CSR_WRITE(node, BDK_LMCX_EXT_CONFIG(ddr_interface_num), + ext_config.u); + } + } + + /* + * 6.9.13 DRAM Vref Training for DDR4 + * + * This includes software write-leveling + */ + + { // Software Write-Leveling block + + /* Try to determine/optimize write-level delays experimentally. */ +#pragma pack(push,1) + bdk_lmcx_wlevel_rankx_t lmc_wlevel_rank; + bdk_lmcx_wlevel_rankx_t lmc_wlevel_rank_hw_results; + int byte; + int delay; + int rankx = 0; + int active_rank; +#if !DISABLE_SW_WL_PASS_2 + bdk_lmcx_rlevel_rankx_t lmc_rlevel_rank; + int sw_wlevel_offset = 1; +#endif + int sw_wlevel_enable = 1; /* FIX... Should be customizable. */ + int interfaces; + int measured_vref_flag; + typedef enum { + WL_ESTIMATED = 0, /* HW/SW wleveling failed. Results + estimated. */ + WL_HARDWARE = 1, /* H/W wleveling succeeded */ + WL_SOFTWARE = 2, /* S/W wleveling passed 2 contiguous + settings. */ + WL_SOFTWARE1 = 3, /* S/W wleveling passed 1 marginal + setting. */ + } sw_wl_status_t; + + static const char *wl_status_strings[] = { + "(e)", + " ", + " ", + "(1)" + }; + int sw_wlevel_hw_default = 1; // FIXME: make H/W assist the default now +#pragma pack(pop) + + if ((s = lookup_env_parameter("ddr_sw_wlevel_hw")) != NULL) { + sw_wlevel_hw_default = !!strtoul(s, NULL, 0); + } + + // cannot use hw-assist when doing 32-bit + if (! ddr_interface_64b) { + sw_wlevel_hw_default = 0; + } + + if ((s = lookup_env_parameter("ddr_software_wlevel")) != NULL) { + sw_wlevel_enable = strtoul(s, NULL, 0); + } + +#if SWL_WITH_HW_ALTS_CHOOSE_SW + // Choose the SW algo for SWL if any HWL alternates were found + // NOTE: we have to do this here, and for all, since HW-assist including ECC requires ECC enable + for (rankx = 0; rankx < dimm_count * 4; rankx++) { + if (!sw_wlevel_enable) + break; + if (!(rank_mask & (1 << rankx))) + continue; + + // if we are doing HW-assist, and there are alternates, switch to SW-algorithm for all + if (sw_wlevel_hw && hwl_alts[rankx].hwl_alt_mask) { + ddr_print("N%d.LMC%d.R%d: Using SW algorithm for write-leveling this rank\n", + node, ddr_interface_num, rankx); + sw_wlevel_hw_default = 0; + break; + } + } /* for (rankx = 0; rankx < dimm_count * 4; rankx++) */ +#endif + + /* Get the measured_vref setting from the config, check for an override... */ + /* NOTE: measured_vref=1 (ON) means force use of MEASURED Vref... */ + // NOTE: measured VREF can only be done for DDR4 + if (ddr_type == DDR4_DRAM) { + measured_vref_flag = custom_lmc_config->measured_vref; + if ((s = lookup_env_parameter("ddr_measured_vref")) != NULL) { + measured_vref_flag = !!strtoul(s, NULL, 0); + } + } else { + measured_vref_flag = 0; // OFF for DDR3 + } + + /* Ensure disabled ECC for DRAM tests using the SW algo, else leave it untouched */ + if (!sw_wlevel_hw_default) { + lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(ddr_interface_num)); + lmc_config.s.ecc_ena = 0; + DRAM_CSR_WRITE(node, BDK_LMCX_CONFIG(ddr_interface_num), lmc_config.u); + } + +#if USE_L2_WAYS_LIMIT + limit_l2_ways(node, 0, 0); /* Disable l2 sets for DRAM testing */ +#endif + + /* We need to track absolute rank number, as well as how many + ** active ranks we have. Two single rank DIMMs show up as + ** ranks 0 and 2, but only 2 ranks are active. */ + active_rank = 0; + + interfaces = __builtin_popcount(ddr_interface_mask); + +#define VREF_RANGE1_LIMIT 0x33 // range1 is valid for 0x00 - 0x32 +#define VREF_RANGE2_LIMIT 0x18 // range2 is valid for 0x00 - 0x17 +// full window is valid for 0x00 to 0x4A +// let 0x00 - 0x17 be range2, 0x18 - 0x4a be range 1 +#define VREF_LIMIT (VREF_RANGE1_LIMIT + VREF_RANGE2_LIMIT) +#define VREF_FINAL (VREF_LIMIT - 1) + + for (rankx = 0; rankx < dimm_count * 4; rankx++) { + uint64_t rank_addr; + int vref_value, final_vref_value, final_vref_range = 0; + int start_vref_value = 0, computed_final_vref_value = -1; + char best_vref_values_count, vref_values_count; + char best_vref_values_start, vref_values_start; + + int bytes_failed; + sw_wl_status_t byte_test_status[9]; + sw_wl_status_t sw_wl_rank_status = WL_HARDWARE; + int sw_wl_failed = 0; + int sw_wlevel_hw = sw_wlevel_hw_default; + + if (!sw_wlevel_enable) + break; + + if (!(rank_mask & (1 << rankx))) + continue; + + ddr_print("N%d.LMC%d.R%d: Performing Software Write-Leveling %s\n", + node, ddr_interface_num, rankx, + (sw_wlevel_hw) ? "with H/W assist" : "with S/W algorithm"); + + if ((ddr_type == DDR4_DRAM) && (num_ranks != 4)) { + // always compute when we can... + computed_final_vref_value = compute_vref_value(node, ddr_interface_num, rankx, + dimm_count, num_ranks, imp_values, + is_stacked_die); + if (!measured_vref_flag) // but only use it if allowed + start_vref_value = VREF_FINAL; // skip all the measured Vref processing, just the final setting + } + + /* Save off the h/w wl results */ + lmc_wlevel_rank_hw_results.u = BDK_CSR_READ(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx)); + + vref_values_count = 0; + vref_values_start = 0; + best_vref_values_count = 0; + best_vref_values_start = 0; + + /* Loop one extra time using the Final Vref value. */ + for (vref_value = start_vref_value; vref_value < VREF_LIMIT; ++vref_value) { + if (ddr_type == DDR4_DRAM) { + if (vref_value < VREF_FINAL) { + int vrange, vvalue; + if (vref_value < VREF_RANGE2_LIMIT) { + vrange = 1; vvalue = vref_value; + } else { + vrange = 0; vvalue = vref_value - VREF_RANGE2_LIMIT; + } + set_vref(node, ddr_interface_num, rankx, + vrange, vvalue); + } else { /* if (vref_value < VREF_FINAL) */ + /* Print the final Vref value first. */ + + /* Always print the computed first if its valid */ + if (computed_final_vref_value >= 0) { + ddr_print("N%d.LMC%d.R%d: Vref Computed Summary :" + " %2d (0x%02x)\n", + node, ddr_interface_num, + rankx, computed_final_vref_value, + computed_final_vref_value); + } + if (!measured_vref_flag) { // setup to use the computed + best_vref_values_count = 1; + final_vref_value = computed_final_vref_value; + } else { // setup to use the measured + if (best_vref_values_count > 0) { + best_vref_values_count = max(best_vref_values_count, 2); +#if 0 + // NOTE: this already adjusts VREF via calculating 40% rather than 50% + final_vref_value = best_vref_values_start + divide_roundup((best_vref_values_count-1)*4,10); + ddr_print("N%d.LMC%d.R%d: Vref Training Summary :" + " %2d <----- %2d (0x%02x) -----> %2d range: %2d\n", + node, ddr_interface_num, rankx, best_vref_values_start, + final_vref_value, final_vref_value, + best_vref_values_start+best_vref_values_count-1, + best_vref_values_count-1); +#else + final_vref_value = best_vref_values_start + divide_nint(best_vref_values_count - 1, 2); + if (final_vref_value < VREF_RANGE2_LIMIT) { + final_vref_range = 1; + } else { + final_vref_range = 0; final_vref_value -= VREF_RANGE2_LIMIT; + } + { + int vvlo = best_vref_values_start; + int vrlo; + if (vvlo < VREF_RANGE2_LIMIT) { + vrlo = 2; + } else { + vrlo = 1; vvlo -= VREF_RANGE2_LIMIT; + } + + int vvhi = best_vref_values_start + best_vref_values_count - 1; + int vrhi; + if (vvhi < VREF_RANGE2_LIMIT) { + vrhi = 2; + } else { + vrhi = 1; vvhi -= VREF_RANGE2_LIMIT; + } + ddr_print("N%d.LMC%d.R%d: Vref Training Summary :" + " 0x%02x/%1d <----- 0x%02x/%1d -----> 0x%02x/%1d, range: %2d\n", + node, ddr_interface_num, rankx, + vvlo, vrlo, + final_vref_value, final_vref_range + 1, + vvhi, vrhi, + best_vref_values_count-1); + } +#endif + + } else { + /* If nothing passed use the default Vref value for this rank */ + bdk_lmcx_modereg_params2_t lmc_modereg_params2; + lmc_modereg_params2.u = BDK_CSR_READ(node, BDK_LMCX_MODEREG_PARAMS2(ddr_interface_num)); + final_vref_value = (lmc_modereg_params2.u >> (rankx * 10 + 3)) & 0x3f; + final_vref_range = (lmc_modereg_params2.u >> (rankx * 10 + 9)) & 0x01; + + ddr_print("N%d.LMC%d.R%d: Vref Using Default :" + " %2d <----- %2d (0x%02x) -----> %2d, range%1d\n", + node, ddr_interface_num, rankx, + final_vref_value, final_vref_value, + final_vref_value, final_vref_value, final_vref_range+1); + } + } + + // allow override + if ((s = lookup_env_parameter("ddr%d_vref_value_%1d%1d", + ddr_interface_num, !!(rankx&2), !!(rankx&1))) != NULL) { + final_vref_value = strtoul(s, NULL, 0); + } + + set_vref(node, ddr_interface_num, rankx, final_vref_range, final_vref_value); + + } /* if (vref_value < VREF_FINAL) */ + } /* if (ddr_type == DDR4_DRAM) */ + + lmc_wlevel_rank.u = lmc_wlevel_rank_hw_results.u; /* Restore the saved value */ + + for (byte = 0; byte < 9; ++byte) + byte_test_status[byte] = WL_ESTIMATED; + + if (wlevel_bitmask_errors == 0) { + + /* Determine address of DRAM to test for pass 1 of software write leveling. */ + rank_addr = active_rank * (1ull << (pbank_lsb - bunk_enable + (interfaces/2))); + // FIXME: these now put in by test_dram_byte() + //rank_addr |= (ddr_interface_num<<7); /* Map address into proper interface */ + //rank_addr = bdk_numa_get_address(node, rank_addr); + VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: Active Rank %d Address: 0x%lx\n", + node, ddr_interface_num, rankx, active_rank, rank_addr); + + { // start parallel write-leveling block for delay high-order bits + int errors = 0; + int byte_delay[9]; + uint64_t bytemask; + int bytes_todo; + + if (ddr_interface_64b) { + bytes_todo = (sw_wlevel_hw) ? ddr_interface_bytemask : 0xFF; + bytemask = ~0ULL; + } else { // 32-bit, must be using SW algo, only data bytes + bytes_todo = 0x0f; + bytemask = 0x00000000ffffffffULL; + } + + for (byte = 0; byte < 9; ++byte) { + if (!(bytes_todo & (1 << byte))) { + byte_delay[byte] = 0; + } else { + byte_delay[byte] = get_wlevel_rank_struct(&lmc_wlevel_rank, byte); + } + } /* for (byte = 0; byte < 9; ++byte) */ + +#define WL_MIN_NO_ERRORS_COUNT 3 // FIXME? three passes without errors + int no_errors_count = 0; + + // Change verbosity if using measured vs computed VREF or DDR3 + // measured goes many times through SWL, computed and DDR3 only once + // so we want the EXHAUSTED messages at NORM for computed and DDR3, + // and at DEV2 for measured, just for completeness + int vbl_local = (measured_vref_flag) ? VBL_DEV2 : VBL_NORM; + uint64_t bad_bits[2]; +#if ENABLE_SW_WLEVEL_UTILIZATION + uint64_t sum_dram_dclk = 0, sum_dram_ops = 0; + uint64_t start_dram_dclk, stop_dram_dclk; + uint64_t start_dram_ops, stop_dram_ops; +#endif + do { + // write the current set of WL delays + DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx), lmc_wlevel_rank.u); + lmc_wlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx)); + + bdk_watchdog_poke(); + + // do the test + if (sw_wlevel_hw) { + errors = run_best_hw_patterns(node, ddr_interface_num, rank_addr, + DBTRAIN_TEST, bad_bits); + errors &= bytes_todo; // keep only the ones we are still doing + } else { +#if ENABLE_SW_WLEVEL_UTILIZATION + start_dram_dclk = BDK_CSR_READ(node, BDK_LMCX_DCLK_CNT(ddr_interface_num)); + start_dram_ops = BDK_CSR_READ(node, BDK_LMCX_OPS_CNT(ddr_interface_num)); +#endif +#if USE_ORIG_TEST_DRAM_BYTE + errors = test_dram_byte(node, ddr_interface_num, rank_addr, bytemask, bad_bits); +#else + errors = dram_tuning_mem_xor(node, ddr_interface_num, rank_addr, bytemask, bad_bits); +#endif +#if ENABLE_SW_WLEVEL_UTILIZATION + stop_dram_dclk = BDK_CSR_READ(node, BDK_LMCX_DCLK_CNT(ddr_interface_num)); + stop_dram_ops = BDK_CSR_READ(node, BDK_LMCX_OPS_CNT(ddr_interface_num)); + sum_dram_dclk += stop_dram_dclk - start_dram_dclk; + sum_dram_ops += stop_dram_ops - start_dram_ops; +#endif + } + + VB_PRT(VBL_DEV2, "N%d.LMC%d.R%d: S/W write-leveling TEST: returned 0x%x\n", + node, ddr_interface_num, rankx, errors); + + // remember, errors will not be returned for byte-lanes that have maxxed out... + if (errors == 0) { + no_errors_count++; // bump + if (no_errors_count > 1) // bypass check/update completely + continue; // to end of do-while + } else + no_errors_count = 0; // reset + + // check errors by byte + for (byte = 0; byte < 9; ++byte) { + if (!(bytes_todo & (1 << byte))) + continue; + + delay = byte_delay[byte]; + if (errors & (1 << byte)) { // yes, an error in this byte lane + debug_print(" byte %d delay %2d Errors\n", byte, delay); + // since this byte had an error, we move to the next delay value, unless maxxed out + delay += 8; // incr by 8 to do only delay high-order bits + if (delay < 32) { + update_wlevel_rank_struct(&lmc_wlevel_rank, byte, delay); + debug_print(" byte %d delay %2d New\n", byte, delay); + byte_delay[byte] = delay; + } else { // reached max delay, maybe really done with this byte +#if SWL_TRY_HWL_ALT + if (!measured_vref_flag && // consider an alt only for computed VREF and + (hwl_alts[rankx].hwl_alt_mask & (1 << byte))) // if an alt exists... + { + int bad_delay = delay & 0x6; // just orig low-3 bits + delay = hwl_alts[rankx].hwl_alt_delay[byte]; // yes, use it + hwl_alts[rankx].hwl_alt_mask &= ~(1 << byte); // clear that flag + update_wlevel_rank_struct(&lmc_wlevel_rank, byte, delay); + byte_delay[byte] = delay; + debug_print(" byte %d delay %2d ALTERNATE\n", byte, delay); + VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: SWL: Byte %d: %d FAIL, trying ALTERNATE %d\n", + node, ddr_interface_num, rankx, byte, bad_delay, delay); + + } else +#endif /* SWL_TRY_HWL_ALT */ + { + unsigned bits_bad; + if (byte < 8) { + bytemask &= ~(0xffULL << (8*byte)); // test no longer, remove from byte mask + bits_bad = (unsigned)((bad_bits[0] >> (8 * byte)) & 0xffUL); + } else { + bits_bad = (unsigned)(bad_bits[1] & 0xffUL); + } + bytes_todo &= ~(1 << byte); // remove from bytes to do + byte_test_status[byte] = WL_ESTIMATED; // make sure this is set for this case + debug_print(" byte %d delay %2d Exhausted\n", byte, delay); + VB_PRT(vbl_local, "N%d.LMC%d.R%d: SWL: Byte %d (0x%02x): delay %d EXHAUSTED \n", + node, ddr_interface_num, rankx, byte, bits_bad, delay); + } + } + } else { // no error, stay with current delay, but keep testing it... + debug_print(" byte %d delay %2d Passed\n", byte, delay); + byte_test_status[byte] = WL_HARDWARE; // change status + } + + } /* for (byte = 0; byte < 9; ++byte) */ + + } while (no_errors_count < WL_MIN_NO_ERRORS_COUNT); + +#if ENABLE_SW_WLEVEL_UTILIZATION + if (! sw_wlevel_hw) { + uint64_t percent_x10; + if (sum_dram_dclk == 0) + sum_dram_dclk = 1; + percent_x10 = sum_dram_ops * 1000 / sum_dram_dclk; + ddr_print("N%d.LMC%d.R%d: ops %lu, cycles %lu, used %lu.%lu%%\n", + node, ddr_interface_num, rankx, sum_dram_ops, sum_dram_dclk, + percent_x10 / 10, percent_x10 % 10); + } +#endif + if (errors) { + debug_print("End WLEV_64 while loop: vref_value %d(0x%x), errors 0x%02x\n", + vref_value, vref_value, errors); + } + } // end parallel write-leveling block for delay high-order bits + + if (sw_wlevel_hw) { // if we used HW-assist, we did the ECC byte when approp. + VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: HW-assist SWL - no ECC estimate!!!\n", + node, ddr_interface_num, rankx); + goto no_ecc_estimate; + } + + if ((ddr_interface_bytemask & 0xff) == 0xff) { + if (use_ecc) { + int save_byte8 = lmc_wlevel_rank.s.byte8; // save original HW delay + byte_test_status[8] = WL_HARDWARE; /* say it is H/W delay value */ + + if ((save_byte8 != lmc_wlevel_rank.s.byte3) && + (save_byte8 != lmc_wlevel_rank.s.byte4)) + { + // try to calculate a workable delay using SW bytes 3 and 4 and HW byte 8 + int test_byte8 = save_byte8; + int test_byte8_error; + int byte8_error = 0x1f; + int adder; + int avg_bytes = divide_nint(lmc_wlevel_rank.s.byte3+lmc_wlevel_rank.s.byte4, 2); + for (adder = 0; adder<= 32; adder+=8) { + test_byte8_error = _abs((adder+save_byte8) - avg_bytes); + if (test_byte8_error < byte8_error) { + byte8_error = test_byte8_error; + test_byte8 = save_byte8 + adder; + } + } + +#if SW_WL_CHECK_PATCH + // only do the check if we are not using measured VREF + if (!measured_vref_flag) { + test_byte8 &= ~1; /* Use only even settings, rounding down... */ + + // do validity check on the calculated ECC delay value + // this depends on the DIMM type + if (spd_rdimm) { // RDIMM + if (spd_dimm_type != 5) { // but not mini-RDIMM + // it can be > byte4, but should never be > byte3 + if (test_byte8 > lmc_wlevel_rank.s.byte3) { + byte_test_status[8] = WL_ESTIMATED; /* say it is still estimated */ + } + } + } else { // UDIMM + if ((test_byte8 < lmc_wlevel_rank.s.byte3) || + (test_byte8 > lmc_wlevel_rank.s.byte4)) + { // should never be outside the byte 3-4 range + byte_test_status[8] = WL_ESTIMATED; /* say it is still estimated */ + } + } + /* + * Report whenever the calculation appears bad. + * This happens if some of the original values were off, or unexpected geometry + * from DIMM type, or custom circuitry (NIC225E, I am looking at you!). + * We will trust the calculated value, and depend on later testing to catch + * any instances when that value is truly bad. + */ + if (byte_test_status[8] == WL_ESTIMATED) { // ESTIMATED means there may be an issue + ddr_print("N%d.LMC%d.R%d: SWL: (%cDIMM): calculated ECC delay unexpected (%d/%d/%d)\n", + node, ddr_interface_num, rankx, (spd_rdimm?'R':'U'), + lmc_wlevel_rank.s.byte4, test_byte8, lmc_wlevel_rank.s.byte3); + byte_test_status[8] = WL_HARDWARE; + } + } +#endif /* SW_WL_CHECK_PATCH */ + lmc_wlevel_rank.s.byte8 = test_byte8 & ~1; /* Use only even settings */ + } + + if (lmc_wlevel_rank.s.byte8 != save_byte8) { + /* Change the status if s/w adjusted the delay */ + byte_test_status[8] = WL_SOFTWARE; /* Estimated delay */ + } + } else { + byte_test_status[8] = WL_HARDWARE; /* H/W delay value */ + lmc_wlevel_rank.s.byte8 = lmc_wlevel_rank.s.byte0; /* ECC is not used */ + } + } else { /* if ((ddr_interface_bytemask & 0xff) == 0xff) */ + if (use_ecc) { + /* Estimate the ECC byte delay */ + lmc_wlevel_rank.s.byte4 |= (lmc_wlevel_rank.s.byte3 & 0x38); // add hi-order to b4 + if ((lmc_wlevel_rank.s.byte4 & 0x06) < (lmc_wlevel_rank.s.byte3 & 0x06)) // orig b4 < orig b3 + lmc_wlevel_rank.s.byte4 += 8; // must be next clock + } else { + lmc_wlevel_rank.s.byte4 = lmc_wlevel_rank.s.byte0; /* ECC is not used */ + } + /* Change the status if s/w adjusted the delay */ + byte_test_status[4] = WL_SOFTWARE; /* Estimated delay */ + } /* if ((ddr_interface_bytemask & 0xff) == 0xff) */ + } /* if (wlevel_bitmask_errors == 0) */ + + no_ecc_estimate: + + bytes_failed = 0; + for (byte = 0; byte < 9; ++byte) { + /* Don't accumulate errors for untested bytes. */ + if (!(ddr_interface_bytemask & (1 << byte))) + continue; + bytes_failed += (byte_test_status[byte] == WL_ESTIMATED); + } + + /* Vref training loop is only used for DDR4 */ + if (ddr_type != DDR4_DRAM) + break; + + if (bytes_failed == 0) { + if (vref_values_count == 0) { + vref_values_start = vref_value; + } + ++vref_values_count; + if (vref_values_count > best_vref_values_count) { + best_vref_values_count = vref_values_count; + best_vref_values_start = vref_values_start; + debug_print("N%d.LMC%d.R%d: Vref Training (%2d) : 0x%02x <----- ???? -----> 0x%02x\n", + node, ddr_interface_num, + rankx, vref_value, best_vref_values_start, + best_vref_values_start+best_vref_values_count-1); + } + } else { + vref_values_count = 0; + debug_print("N%d.LMC%d.R%d: Vref Training (%2d) : failed\n", + node, ddr_interface_num, + rankx, vref_value); + } + } /* for (vref_value=0; vref_value<VREF_LIMIT; ++vref_value) */ + + /* Determine address of DRAM to test for pass 2 and final test of software write leveling. */ + rank_addr = active_rank * (1ull << (pbank_lsb - bunk_enable + (interfaces/2))); + rank_addr |= (ddr_interface_num<<7); /* Map address into proper interface */ + rank_addr = bdk_numa_get_address(node, rank_addr); + debug_print("N%d.LMC%d.R%d: Active Rank %d Address: 0x%lx\n", + node, ddr_interface_num, rankx, active_rank, rank_addr); + + int errors; + + if (bytes_failed) { + +#if !DISABLE_SW_WL_PASS_2 + + ddr_print("N%d.LMC%d.R%d: Starting SW Write-leveling pass 2\n", + node, ddr_interface_num, rankx); + sw_wl_rank_status = WL_SOFTWARE; + + /* If previous s/w fixups failed then retry using s/w write-leveling. */ + if (wlevel_bitmask_errors == 0) { + /* h/w succeeded but previous s/w fixups failed. So retry s/w. */ + debug_print("N%d.LMC%d.R%d: Retrying software Write-Leveling.\n", + node, ddr_interface_num, rankx); + } + + { // start parallel write-leveling block for delay low-order bits + int byte_delay[8]; + int byte_passed[8]; + uint64_t bytemask; + uint64_t bitmask; + int wl_offset; + int bytes_todo; + + for (byte = 0; byte < 8; ++byte) { + byte_passed[byte] = 0; + } + + bytes_todo = ddr_interface_bytemask; + + for (wl_offset = sw_wlevel_offset; wl_offset >= 0; --wl_offset) { + debug_print("Starting wl_offset for-loop: %d\n", wl_offset); + + bytemask = 0; + + for (byte = 0; byte < 8; ++byte) { + byte_delay[byte] = 0; + if (!(bytes_todo & (1 << byte))) // this does not contain fully passed bytes + continue; + + byte_passed[byte] = 0; // reset across passes if not fully passed + update_wlevel_rank_struct(&lmc_wlevel_rank, byte, 0); // all delays start at 0 + bitmask = ((!ddr_interface_64b) && (byte == 4)) ? 0x0f: 0xff; + bytemask |= bitmask << (8*byte); // set the bytes bits in the bytemask + } /* for (byte = 0; byte < 8; ++byte) */ + + while (bytemask != 0) { // start a pass if there is any byte lane to test + + debug_print("Starting bytemask while-loop: 0x%lx\n", bytemask); + + // write this set of WL delays + DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx), lmc_wlevel_rank.u); + lmc_wlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx)); + + bdk_watchdog_poke(); + + // do the test + if (sw_wlevel_hw) + errors = run_best_hw_patterns(node, ddr_interface_num, rank_addr, + DBTRAIN_TEST, NULL); + else + errors = test_dram_byte(node, ddr_interface_num, rank_addr, bytemask, NULL); + + debug_print("SWL pass 2: test_dram_byte returned 0x%x\n", errors); + + // check errors by byte + for (byte = 0; byte < 8; ++byte) { + if (!(bytes_todo & (1 << byte))) + continue; + + delay = byte_delay[byte]; + if (errors & (1 << byte)) { // yes, an error + debug_print(" byte %d delay %2d Errors\n", byte, delay); + byte_passed[byte] = 0; + } else { // no error + byte_passed[byte] += 1; + if (byte_passed[byte] == (1 + wl_offset)) { /* Look for consecutive working settings */ + debug_print(" byte %d delay %2d FULLY Passed\n", byte, delay); + if (wl_offset == 1) { + byte_test_status[byte] = WL_SOFTWARE; + } else if (wl_offset == 0) { + byte_test_status[byte] = WL_SOFTWARE1; + } + bytemask &= ~(0xffULL << (8*byte)); // test no longer, remove from byte mask this pass + bytes_todo &= ~(1 << byte); // remove completely from concern + continue; // on to the next byte, bypass delay updating!! + } else { + debug_print(" byte %d delay %2d Passed\n", byte, delay); + } + } + // error or no, here we move to the next delay value for this byte, unless done all delays + // only a byte that has "fully passed" will bypass around this, + delay += 2; + if (delay < 32) { + update_wlevel_rank_struct(&lmc_wlevel_rank, byte, delay); + debug_print(" byte %d delay %2d New\n", byte, delay); + byte_delay[byte] = delay; + } else { + // reached max delay, done with this byte + debug_print(" byte %d delay %2d Exhausted\n", byte, delay); + bytemask &= ~(0xffULL << (8*byte)); // test no longer, remove from byte mask this pass + } + } /* for (byte = 0; byte < 8; ++byte) */ + debug_print("End of for-loop: bytemask 0x%lx\n", bytemask); + + } /* while (bytemask != 0) */ + } /* for (wl_offset = sw_wlevel_offset; wl_offset >= 0; --wl_offset) */ + + for (byte = 0; byte < 8; ++byte) { + // any bytes left in bytes_todo did not pass + if (bytes_todo & (1 << byte)) { + /* Last resort. Use Rlevel settings to estimate + Wlevel if software write-leveling fails */ + debug_print("Using RLEVEL as WLEVEL estimate for byte %d\n", byte); + lmc_rlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx)); + rlevel_to_wlevel(&lmc_rlevel_rank, &lmc_wlevel_rank, byte); + } + } /* for (byte = 0; byte < 8; ++byte) */ + + sw_wl_failed = (bytes_todo != 0); + + } // end parallel write-leveling block for delay low-order bits + + if (use_ecc) { + /* ECC byte has to be estimated. Take the average of the two surrounding bytes. */ + int test_byte8 = divide_nint(lmc_wlevel_rank.s.byte3 + + lmc_wlevel_rank.s.byte4 + + 2 /* round-up*/ , 2); + lmc_wlevel_rank.s.byte8 = test_byte8 & ~1; /* Use only even settings */ + byte_test_status[8] = WL_ESTIMATED; /* Estimated delay */ + } else { + byte_test_status[8] = WL_HARDWARE; /* H/W delay value */ + lmc_wlevel_rank.s.byte8 = lmc_wlevel_rank.s.byte0; /* ECC is not used */ + } + + /* Set delays for unused bytes to match byte 0. */ + for (byte=0; byte<8; ++byte) { + if ((ddr_interface_bytemask & (1 << byte))) + continue; + update_wlevel_rank_struct(&lmc_wlevel_rank, byte, + lmc_wlevel_rank.s.byte0); + byte_test_status[byte] = WL_SOFTWARE; + } +#else /* !DISABLE_SW_WL_PASS_2 */ + // FIXME? the big hammer, did not even try SW WL pass2, assume only chip reset will help + ddr_print("N%d.LMC%d.R%d: S/W write-leveling pass 1 failed\n", + node, ddr_interface_num, rankx); + sw_wl_failed = 1; +#endif /* !DISABLE_SW_WL_PASS_2 */ + + } else { /* if (bytes_failed) */ + + // SW WL pass 1 was OK, write the settings + DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx), lmc_wlevel_rank.u); + lmc_wlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx)); + +#if SW_WL_CHECK_PATCH + // do validity check on the delay values by running the test 1 more time... + // FIXME: we really need to check the ECC byte setting here as well, + // so we need to enable ECC for this test!!! + // if there are any errors, claim SW WL failure + { + uint64_t datamask = (ddr_interface_64b) ? 0xffffffffffffffffULL : 0x00000000ffffffffULL; + + // do the test + if (sw_wlevel_hw) { + errors = run_best_hw_patterns(node, ddr_interface_num, rank_addr, + DBTRAIN_TEST, NULL) & 0x0ff; + } else { +#if USE_ORIG_TEST_DRAM_BYTE + errors = test_dram_byte(node, ddr_interface_num, rank_addr, datamask, NULL); +#else + errors = dram_tuning_mem_xor(node, ddr_interface_num, rank_addr, datamask, NULL); +#endif + } + + if (errors) { + ddr_print("N%d.LMC%d.R%d: Wlevel Rank Final Test errors 0x%x\n", + node, ddr_interface_num, rankx, errors); + sw_wl_failed = 1; + } + } +#endif /* SW_WL_CHECK_PATCH */ + + } /* if (bytes_failed) */ + + // FIXME? dump the WL settings, so we get more of a clue as to what happened where + ddr_print("N%d.LMC%d.R%d: Wlevel Rank %#4x, 0x%016lX : %2d%3s %2d%3s %2d%3s %2d%3s %2d%3s %2d%3s %2d%3s %2d%3s %2d%3s %s\n", + node, ddr_interface_num, rankx, + lmc_wlevel_rank.s.status, + lmc_wlevel_rank.u, + lmc_wlevel_rank.s.byte8, wl_status_strings[byte_test_status[8]], + lmc_wlevel_rank.s.byte7, wl_status_strings[byte_test_status[7]], + lmc_wlevel_rank.s.byte6, wl_status_strings[byte_test_status[6]], + lmc_wlevel_rank.s.byte5, wl_status_strings[byte_test_status[5]], + lmc_wlevel_rank.s.byte4, wl_status_strings[byte_test_status[4]], + lmc_wlevel_rank.s.byte3, wl_status_strings[byte_test_status[3]], + lmc_wlevel_rank.s.byte2, wl_status_strings[byte_test_status[2]], + lmc_wlevel_rank.s.byte1, wl_status_strings[byte_test_status[1]], + lmc_wlevel_rank.s.byte0, wl_status_strings[byte_test_status[0]], + (sw_wl_rank_status == WL_HARDWARE) ? "" : "(s)" + ); + + // finally, check for fatal conditions: either chip reset right here, or return error flag + if (((ddr_type == DDR4_DRAM) && (best_vref_values_count == 0)) || sw_wl_failed) { + if (!ddr_disable_chip_reset) { // do chip RESET + error_print("INFO: Short memory test indicates a retry is needed on N%d.LMC%d.R%d. Resetting node...\n", + node, ddr_interface_num, rankx); + bdk_wait_usec(500000); + bdk_reset_chip(node); + } else { // return error flag so LMC init can be retried... + ddr_print("INFO: Short memory test indicates a retry is needed on N%d.LMC%d.R%d. Restarting LMC init...\n", + node, ddr_interface_num, rankx); + return 0; // 0 indicates restart possible... + } + } + + active_rank++; + } /* for (rankx = 0; rankx < dimm_count * 4; rankx++) */ + + // Finalize the write-leveling settings + for (rankx = 0; rankx < dimm_count * 4;rankx++) { + uint64_t value; + int parameter_set = 0; + if (!(rank_mask & (1 << rankx))) + continue; + + lmc_wlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx)); + + if (bdk_is_platform(BDK_PLATFORM_ASIM)) { + parameter_set |= 1; + + lmc_wlevel_rank.s.byte8 = 0; + lmc_wlevel_rank.s.byte7 = 0; + lmc_wlevel_rank.s.byte6 = 0; + lmc_wlevel_rank.s.byte5 = 0; + lmc_wlevel_rank.s.byte4 = 0; + lmc_wlevel_rank.s.byte3 = 0; + lmc_wlevel_rank.s.byte2 = 0; + lmc_wlevel_rank.s.byte1 = 0; + lmc_wlevel_rank.s.byte0 = 0; + } + + for (i=0; i<9; ++i) { + if ((s = lookup_env_parameter("ddr%d_wlevel_rank%d_byte%d", ddr_interface_num, rankx, i)) != NULL) { + parameter_set |= 1; + value = strtoul(s, NULL, 0); + + update_wlevel_rank_struct(&lmc_wlevel_rank, i, value); + } + } + + if ((s = lookup_env_parameter_ull("ddr%d_wlevel_rank%d", ddr_interface_num, rankx)) != NULL) { + parameter_set |= 1; + value = strtoull(s, NULL, 0); + lmc_wlevel_rank.u = value; + } + + if (parameter_set) { + DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx), lmc_wlevel_rank.u); + lmc_wlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx)); + display_WL(node, ddr_interface_num, lmc_wlevel_rank, rankx); + } +#if WLEXTRAS_PATCH + if ((rank_mask & 0x0F) != 0x0F) { // if there are unused entries to be filled + if (rankx < 3) { + debug_print("N%d.LMC%d.R%d: checking for WLEVEL_RANK unused entries.\n", + node, ddr_interface_num, rankx); + if (rankx == 0) { // if rank 0, write ranks 1 and 2 here if empty + if (!(rank_mask & (1<<1))) { // check that rank 1 is empty + DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, 1), lmc_wlevel_rank.u); + VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: writing WLEVEL_RANK unused entry R%d.\n", + node, ddr_interface_num, rankx, 1); + } + if (!(rank_mask & (1<<2))) { // check that rank 2 is empty + VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: writing WLEVEL_RANK unused entry R%d.\n", + node, ddr_interface_num, rankx, 2); + DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, 2), lmc_wlevel_rank.u); + } + } + // if rank 0, 1 or 2, write rank 3 here if empty + if (!(rank_mask & (1<<3))) { // check that rank 3 is empty + VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: writing WLEVEL_RANK unused entry R%d.\n", + node, ddr_interface_num, rankx, 3); + DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, 3), lmc_wlevel_rank.u); + } + } + } +#endif /* WLEXTRAS_PATCH */ + + } /* for (rankx = 0; rankx < dimm_count * 4;rankx++) */ + + /* Restore the ECC configuration */ + if (!sw_wlevel_hw_default) { + lmc_config.s.ecc_ena = use_ecc; + DRAM_CSR_WRITE(node, BDK_LMCX_CONFIG(ddr_interface_num), lmc_config.u); + } + +#if USE_L2_WAYS_LIMIT + /* Restore the l2 set configuration */ + if ((s = lookup_env_parameter("limit_l2_ways")) != NULL) { + int ways = strtoul(s, NULL, 10); + limit_l2_ways(node, ways, 1); + } else { + limit_l2_ways(node, bdk_l2c_get_num_assoc(node), 0); + } +#endif + + } // End Software Write-Leveling block + +#if ENABLE_DISPLAY_MPR_PAGE + if (ddr_type == DDR4_DRAM) { + Display_MPR_Page(node, rank_mask, ddr_interface_num, dimm_count, 2); + Display_MPR_Page(node, rank_mask, ddr_interface_num, dimm_count, 0); + } +#endif + +#if 1 // was #ifdef CAVIUM_ONLY + { + int i; + int setting[9]; + bdk_lmcx_dll_ctl3_t ddr_dll_ctl3; + ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num)); + + for (i=0; i<9; ++i) { + SET_DDR_DLL_CTL3(dll90_byte_sel, ENCODE_DLL90_BYTE_SEL(i)); + DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num), ddr_dll_ctl3.u); + BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num)); + ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num)); + setting[i] = GET_DDR_DLL_CTL3(dll90_setting); + debug_print("%d. LMC%d_DLL_CTL3[%d] = %016lx %d\n", i, ddr_interface_num, + GET_DDR_DLL_CTL3(dll90_byte_sel), ddr_dll_ctl3.u, setting[i]); + } + + VB_PRT(VBL_DEV, "N%d.LMC%d: %-36s : %5d %5d %5d %5d %5d %5d %5d %5d %5d\n", + node, ddr_interface_num, "DLL90 Setting 8:0", + setting[8], setting[7], setting[6], setting[5], setting[4], + setting[3], setting[2], setting[1], setting[0]); + + //BDK_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num), save_ddr_dll_ctl3.u); + } +#endif /* CAVIUM_ONLY */ + + // any custom DLL read or write offsets, install them + // FIXME: no need to do these if we are going to auto-tune... ??? + + process_custom_dll_offsets(node, ddr_interface_num, "ddr_dll_write_offset", + custom_lmc_config->dll_write_offset, "ddr%d_dll_write_offset_byte%d", 1); + process_custom_dll_offsets(node, ddr_interface_num, "ddr_dll_read_offset", + custom_lmc_config->dll_read_offset, "ddr%d_dll_read_offset_byte%d", 2); + + // we want to train write bit-deskew here... + if (! disable_deskew_training) { + if (enable_write_deskew) { + ddr_print("N%d.LMC%d: WRITE BIT-DESKEW feature training begins.\n", + node, ddr_interface_num); + Perform_Write_Deskew_Training(node, ddr_interface_num); + } /* if (enable_write_deskew) */ + } /* if (! disable_deskew_training) */ + + /* + * 6.9.14 Final LMC Initialization + * + * Early LMC initialization, LMC write-leveling, and LMC read-leveling + * must be completed prior to starting this final LMC initialization. + * + * LMC hardware updates the LMC(0)_SLOT_CTL0, LMC(0)_SLOT_CTL1, + * LMC(0)_SLOT_CTL2 CSRs with minimum values based on the selected + * readleveling and write-leveling settings. Software should not write + * the final LMC(0)_SLOT_CTL0, LMC(0)_SLOT_CTL1, and LMC(0)_SLOT_CTL2 + * values until after the final read-leveling and write-leveling settings + * are written. + * + * Software must ensure the LMC(0)_SLOT_CTL0, LMC(0)_SLOT_CTL1, and + * LMC(0)_SLOT_CTL2 CSR values are appropriate for this step. These CSRs + * select the minimum gaps between read operations and write operations + * of various types. + * + * Software must not reduce the values in these CSR fields below the + * values previously selected by the LMC hardware (during write-leveling + * and read-leveling steps above). + * + * All sections in this chapter may be used to derive proper settings for + * these registers. + * + * For minimal read latency, L2C_CTL[EF_ENA,EF_CNT] should be programmed + * properly. This should be done prior to the first read. + */ + +#if ENABLE_SLOT_CTL_ACCESS + { + bdk_lmcx_slot_ctl0_t lmc_slot_ctl0; + bdk_lmcx_slot_ctl1_t lmc_slot_ctl1; + bdk_lmcx_slot_ctl2_t lmc_slot_ctl2; + bdk_lmcx_slot_ctl3_t lmc_slot_ctl3; + + lmc_slot_ctl0.u = BDK_CSR_READ(node, BDK_LMCX_SLOT_CTL0(ddr_interface_num)); + lmc_slot_ctl1.u = BDK_CSR_READ(node, BDK_LMCX_SLOT_CTL1(ddr_interface_num)); + lmc_slot_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_SLOT_CTL2(ddr_interface_num)); + lmc_slot_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_SLOT_CTL3(ddr_interface_num)); + + ddr_print("%-45s : 0x%016lx\n", "LMC_SLOT_CTL0", lmc_slot_ctl0.u); + ddr_print("%-45s : 0x%016lx\n", "LMC_SLOT_CTL1", lmc_slot_ctl1.u); + ddr_print("%-45s : 0x%016lx\n", "LMC_SLOT_CTL2", lmc_slot_ctl2.u); + ddr_print("%-45s : 0x%016lx\n", "LMC_SLOT_CTL3", lmc_slot_ctl3.u); + + // for now, look only for SLOT_CTL1 envvar for override of contents + if ((s = lookup_env_parameter("ddr%d_slot_ctl1", ddr_interface_num)) != NULL) { + int slot_ctl1_incr = strtoul(s, NULL, 0); + // validate the value + if ((slot_ctl1_incr < 0) || (slot_ctl1_incr > 3)) { // allow 0 for printing only + error_print("ddr%d_slot_ctl1 illegal value (%d); must be 0-3\n", + ddr_interface_num, slot_ctl1_incr); + } else { + +#define INCR(csr, chip, field, incr) \ + csr.chip.field = (csr.chip.field < (64 - incr)) ? (csr.chip.field + incr) : 63 + + // only print original when we are changing it! + if (slot_ctl1_incr) + ddr_print("%-45s : 0x%016lx\n", "LMC_SLOT_CTL1", lmc_slot_ctl1.u); + + // modify all the SLOT_CTL1 fields by the increment, for now... + // but make sure the value will not overflow!!! + INCR(lmc_slot_ctl1, s, r2r_xrank_init, slot_ctl1_incr); + INCR(lmc_slot_ctl1, s, r2w_xrank_init, slot_ctl1_incr); + INCR(lmc_slot_ctl1, s, w2r_xrank_init, slot_ctl1_incr); + INCR(lmc_slot_ctl1, s, w2w_xrank_init, slot_ctl1_incr); + DRAM_CSR_WRITE(node, BDK_LMCX_SLOT_CTL1(ddr_interface_num), lmc_slot_ctl1.u); + lmc_slot_ctl1.u = BDK_CSR_READ(node, BDK_LMCX_SLOT_CTL1(ddr_interface_num)); + + // always print when we are changing it! + printf("%-45s : 0x%016lx\n", "LMC_SLOT_CTL1", lmc_slot_ctl1.u); + } + } + } +#endif /* ENABLE_SLOT_CTL_ACCESS */ + { + /* Clear any residual ECC errors */ + int num_tads = 1; + int tad; + + DRAM_CSR_WRITE(node, BDK_LMCX_INT(ddr_interface_num), -1ULL); + BDK_CSR_READ(node, BDK_LMCX_INT(ddr_interface_num)); + + for (tad=0; tad<num_tads; tad++) + DRAM_CSR_WRITE(node, BDK_L2C_TADX_INT_W1C(tad), BDK_CSR_READ(node, BDK_L2C_TADX_INT_W1C(tad))); + + ddr_print("%-45s : 0x%08lx\n", "LMC_INT", + BDK_CSR_READ(node, BDK_LMCX_INT(ddr_interface_num))); + +#if 0 + // NOTE: this must be done for pass 2.x + // must enable ECC interrupts to get ECC error info in LMCX_INT + if (! CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X)) { // added 81xx and 83xx + DRAM_CSR_WRITE(node, BDK_LMCX_INT_ENA_W1S(ddr_interface_num), -1ULL); + BDK_CSR_INIT(lmc_int_ena_w1s, node, BDK_LMCX_INT_ENA_W1S(ddr_interface_num)); + ddr_print("%-45s : 0x%08lx\n", "LMC_INT_ENA_W1S", lmc_int_ena_w1s.u); + } +#endif + } + + // Now we can enable scrambling if desired... + { + bdk_lmcx_control_t lmc_control; + bdk_lmcx_scramble_cfg0_t lmc_scramble_cfg0; + bdk_lmcx_scramble_cfg1_t lmc_scramble_cfg1; + bdk_lmcx_scramble_cfg2_t lmc_scramble_cfg2; + bdk_lmcx_ns_ctl_t lmc_ns_ctl; + + lmc_control.u = BDK_CSR_READ(node, BDK_LMCX_CONTROL(ddr_interface_num)); + lmc_scramble_cfg0.u = BDK_CSR_READ(node, BDK_LMCX_SCRAMBLE_CFG0(ddr_interface_num)); + lmc_scramble_cfg1.u = BDK_CSR_READ(node, BDK_LMCX_SCRAMBLE_CFG1(ddr_interface_num)); + lmc_scramble_cfg2.u = BDK_CSR_READ(node, BDK_LMCX_SCRAMBLE_CFG2(ddr_interface_num)); + lmc_ns_ctl.u = BDK_CSR_READ(node, BDK_LMCX_NS_CTL(ddr_interface_num)); + + /* Read the scramble setting from the config and see if we + need scrambling */ + int use_scramble = bdk_config_get_int(BDK_CONFIG_DRAM_SCRAMBLE); + if (use_scramble == 2) + { + if (bdk_trust_get_level() >= BDK_TRUST_LEVEL_SIGNED) + use_scramble = 1; + else + use_scramble = 0; + } + + /* Generate random values if scrambling is needed */ + if (use_scramble) + { + lmc_scramble_cfg0.u = bdk_rng_get_random64(); + lmc_scramble_cfg1.u = bdk_rng_get_random64(); + lmc_scramble_cfg2.u = bdk_rng_get_random64(); + lmc_ns_ctl.s.ns_scramble_dis = 0; + lmc_ns_ctl.s.adr_offset = 0; + lmc_control.s.scramble_ena = 1; + } + + if ((s = lookup_env_parameter_ull("ddr_scramble_cfg0")) != NULL) { + lmc_scramble_cfg0.u = strtoull(s, NULL, 0); + lmc_control.s.scramble_ena = 1; + } + ddr_print("%-45s : 0x%016lx\n", "LMC_SCRAMBLE_CFG0", lmc_scramble_cfg0.u); + + DRAM_CSR_WRITE(node, BDK_LMCX_SCRAMBLE_CFG0(ddr_interface_num), lmc_scramble_cfg0.u); + + if ((s = lookup_env_parameter_ull("ddr_scramble_cfg1")) != NULL) { + lmc_scramble_cfg1.u = strtoull(s, NULL, 0); + lmc_control.s.scramble_ena = 1; + } + ddr_print("%-45s : 0x%016lx\n", "LMC_SCRAMBLE_CFG1", lmc_scramble_cfg1.u); + DRAM_CSR_WRITE(node, BDK_LMCX_SCRAMBLE_CFG1(ddr_interface_num), lmc_scramble_cfg1.u); + + if ((s = lookup_env_parameter_ull("ddr_scramble_cfg2")) != NULL) { + lmc_scramble_cfg2.u = strtoull(s, NULL, 0); + lmc_control.s.scramble_ena = 1; + } + ddr_print("%-45s : 0x%016lx\n", "LMC_SCRAMBLE_CFG2", lmc_scramble_cfg2.u); + DRAM_CSR_WRITE(node, BDK_LMCX_SCRAMBLE_CFG2(ddr_interface_num), lmc_scramble_cfg2.u); + + if ((s = lookup_env_parameter_ull("ddr_ns_ctl")) != NULL) { + lmc_ns_ctl.u = strtoull(s, NULL, 0); + } + ddr_print("%-45s : 0x%016lx\n", "LMC_NS_CTL", lmc_ns_ctl.u); + DRAM_CSR_WRITE(node, BDK_LMCX_NS_CTL(ddr_interface_num), lmc_ns_ctl.u); + + DRAM_CSR_WRITE(node, BDK_LMCX_CONTROL(ddr_interface_num), lmc_control.u); + + } + + return(mem_size_mbytes); +} diff --git a/src/vendorcode/cavium/bdk/libdram/dram-init-ddr3.h b/src/vendorcode/cavium/bdk/libdram/dram-init-ddr3.h new file mode 100644 index 0000000000..ba1060e5e0 --- /dev/null +++ b/src/vendorcode/cavium/bdk/libdram/dram-init-ddr3.h @@ -0,0 +1,97 @@ +/***********************license start*********************************** +* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights +* reserved. +* +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are +* met: +* +* * Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* +* * Redistributions in binary form must reproduce the above +* copyright notice, this list of conditions and the following +* disclaimer in the documentation and/or other materials provided +* with the distribution. +* +* * Neither the name of Cavium Inc. nor the names of +* its contributors may be used to endorse or promote products +* derived from this software without specific prior written +* permission. +* +* This Software, including technical data, may be subject to U.S. export +* control laws, including the U.S. Export Administration Act and its +* associated regulations, and may be subject to export or import +* regulations in other countries. +* +* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS" +* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR +* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT +* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY +* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT +* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES +* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR +* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT, +* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK +* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU. +***********************license end**************************************/ + +/** + * Function for DDR3 init. Internal use only. + */ + +extern void perform_octeon3_ddr3_sequence(bdk_node_t node, int rank_mask, + int ddr_interface_num, int sequence); +extern void perform_ddr_init_sequence(bdk_node_t node, int rank_mask, + int ddr_interface_num); +extern int ddr_memory_preserved(bdk_node_t node); + +extern int init_octeon3_ddr3_interface(bdk_node_t node, + const ddr_configuration_t *ddr_configuration, uint32_t ddr_hertz, + uint32_t cpu_hertz, uint32_t ddr_ref_hertz, int board_type, + int board_rev_maj, int board_rev_min, int ddr_interface_num, + uint32_t ddr_interface_mask); + +extern void +set_vref(bdk_node_t node, int ddr_interface_num, int rank, + int range, int value); + +typedef struct { + unsigned char *rodt_ohms; + unsigned char *rtt_nom_ohms; + unsigned char *rtt_nom_table; + unsigned char *rtt_wr_ohms; + unsigned char *dic_ohms; + short *drive_strength; + short *dqx_strength; +} impedence_values_t; + +extern impedence_values_t ddr4_impedence_values; + +extern int +compute_vref_value(bdk_node_t node, int ddr_interface_num, + int rankx, int dimm_count, int rank_count, + impedence_values_t *imp_values, int is_stacked_die); + +extern unsigned short +load_dac_override(int node, int ddr_interface_num, + int dac_value, int byte); +extern int +read_DAC_DBI_settings(int node, int ddr_interface_num, + int dac_or_dbi, int *settings); +extern void +display_DAC_DBI_settings(int node, int ddr_interface_num, int dac_or_dbi, + int ecc_ena, int *settings, char *title); + +#define RODT_OHMS_COUNT 8 +#define RTT_NOM_OHMS_COUNT 8 +#define RTT_NOM_TABLE_COUNT 8 +#define RTT_WR_OHMS_COUNT 8 +#define DIC_OHMS_COUNT 3 +#define DRIVE_STRENGTH_COUNT 15 + +extern uint64_t hertz_to_psecs(uint64_t hertz); +extern uint64_t psecs_to_mts(uint64_t psecs); +extern uint64_t mts_to_hertz(uint64_t mts); +extern uint64_t pretty_psecs_to_mts(uint64_t psecs); diff --git a/src/vendorcode/cavium/bdk/libdram/dram-internal.h b/src/vendorcode/cavium/bdk/libdram/dram-internal.h new file mode 100644 index 0000000000..07fdbcbf54 --- /dev/null +++ b/src/vendorcode/cavium/bdk/libdram/dram-internal.h @@ -0,0 +1,201 @@ +#ifndef __DRAM_INTERNAL_H__ +#define __DRAM_INTERNAL_H__ +/***********************license start*********************************** +* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights +* reserved. +* +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are +* met: +* +* * Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* +* * Redistributions in binary form must reproduce the above +* copyright notice, this list of conditions and the following +* disclaimer in the documentation and/or other materials provided +* with the distribution. +* +* * Neither the name of Cavium Inc. nor the names of +* its contributors may be used to endorse or promote products +* derived from this software without specific prior written +* permission. +* +* This Software, including technical data, may be subject to U.S. export +* control laws, including the U.S. Export Administration Act and its +* associated regulations, and may be subject to export or import +* regulations in other countries. +* +* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS" +* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR +* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT +* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY +* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT +* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES +* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR +* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT, +* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK +* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU. +***********************license end**************************************/ + +/** + * This header defines all internal API for libdram. None + * of these functions should be called by users of the library. + * This is the only header that DRAM files should include + * from the libdram directory + */ + +#include "libdram.h" +#include "lib_octeon_shared.h" +#include "dram-print.h" +#include "dram-util.h" +#include "dram-csr.h" +#include "dram-env.h" +#include "dram-gpio.h" +#include "dram-spd.h" +#include "dram-l2c.h" +#include "dram-init-ddr3.h" + +#undef DRAM_CSR_WRITE_INLINE + +// define how many HW WL samples to take for majority voting +// MUST BE odd!! +// assume there should only be 2 possible values that will show up, +// so treat ties as a problem!!! +#define WLEVEL_LOOPS_DEFAULT 5 // NOTE: do not change this without checking the code!!! + +// define how many HW RL samples per rank to take +// multiple samples will allow either: +// 1. looking for the best sample score +// 2. averaging the samples into a composite score +// symbol PICK_BEST_RANK_SCORE_NOT_AVG is used to choose +// (see dram-init-ddr3.c: +#define RLEVEL_AVG_LOOPS_DEFAULT 3 +#define PICK_BEST_RANK_SCORE_NOT_AVG 1 + +typedef struct { + int delay; + int loop_total; + int loop_count; + int best; + uint64_t bm; + int bmerrs; + int sqerrs; + int bestsq; +} rlevel_byte_data_t; + +typedef struct { + uint64_t bm; + uint8_t mstart; + uint8_t width; + int errs; +} rlevel_bitmask_t; + +#define SET_DDR_DLL_CTL3(field, expr) \ + do { \ + ddr_dll_ctl3.cn81xx.field = (expr); \ + } while (0) + +#define ENCODE_DLL90_BYTE_SEL(byte_sel) ((byte_sel)+1) + +#define GET_DDR_DLL_CTL3(field) \ + (ddr_dll_ctl3.cn81xx.field) + + +#define RLEVEL_NONSEQUENTIAL_DELAY_ERROR 50 +#define RLEVEL_ADJACENT_DELAY_ERROR 30 + +#define TWO_LMC_MASK 0x03 +#define FOUR_LMC_MASK 0x0f +#define ONE_DIMM_MASK 0x01 +#define TWO_DIMM_MASK 0x03 + +extern int initialize_ddr_clock(bdk_node_t node, + const ddr_configuration_t *ddr_configuration, uint32_t cpu_hertz, + uint32_t ddr_hertz, uint32_t ddr_ref_hertz, int ddr_interface_num, + uint32_t ddr_interface_mask); + +extern int test_dram_byte(bdk_node_t node, int ddr_interface_num, uint64_t p, + uint64_t bitmask, uint64_t *xor_data); +extern int dram_tuning_mem_xor(bdk_node_t node, int ddr_interface_num, uint64_t p, + uint64_t bitmask, uint64_t *xor_data); + +// "mode" arg +#define DBTRAIN_TEST 0 +#define DBTRAIN_DBI 1 +#define DBTRAIN_LFSR 2 +extern int test_dram_byte_hw(bdk_node_t node, int ddr_interface_num, + uint64_t p, int mode, uint64_t *xor_data); +extern int run_best_hw_patterns(bdk_node_t node, int ddr_interface_num, + uint64_t p, int mode, uint64_t *xor_data); + +extern int get_dimm_part_number(char *buffer, bdk_node_t node, + const dimm_config_t *dimm_config, + int ddr_type); +extern uint32_t get_dimm_serial_number(bdk_node_t node, + const dimm_config_t *dimm_config, + int ddr_type); + +extern int octeon_ddr_initialize(bdk_node_t node, uint32_t cpu_hertz, + uint32_t ddr_hertz, uint32_t ddr_ref_hertz, uint32_t ddr_interface_mask, + const ddr_configuration_t *ddr_configuration, uint32_t *measured_ddr_hertz, + int board_type, int board_rev_maj, int board_rev_min); + +extern uint64_t divide_nint(uint64_t dividend, uint64_t divisor); + +typedef enum { + DDR3_DRAM = 3, + DDR4_DRAM = 4, +} ddr_type_t; + +static inline int get_ddr_type(bdk_node_t node, const dimm_config_t *dimm_config) +{ + int spd_ddr_type; + +#define DEVICE_TYPE DDR4_SPD_KEY_BYTE_DEVICE_TYPE // same for DDR3 and DDR4 + spd_ddr_type = read_spd(node, dimm_config, DEVICE_TYPE); + + debug_print("%s:%d spd_ddr_type=0x%02x\n", __FUNCTION__, __LINE__, spd_ddr_type); + + /* we return only DDR4 or DDR3 */ + return (spd_ddr_type == 0x0C) ? DDR4_DRAM : DDR3_DRAM; +} + +static inline int get_dimm_ecc(bdk_node_t node, const dimm_config_t *dimm_config, int ddr_type) +{ +#define BUS_WIDTH(t) (((t) == DDR4_DRAM) ? DDR4_SPD_MODULE_MEMORY_BUS_WIDTH : DDR3_SPD_MEMORY_BUS_WIDTH) + + return !!(read_spd(node, dimm_config, BUS_WIDTH(ddr_type)) & 8); +} + +static inline int get_dimm_module_type(bdk_node_t node, const dimm_config_t *dimm_config, int ddr_type) +{ +#define MODULE_TYPE DDR4_SPD_KEY_BYTE_MODULE_TYPE // same for DDR3 and DDR4 + + return (read_spd(node, dimm_config, MODULE_TYPE) & 0x0F); +} + +extern int common_ddr4_fixups(dram_config_t *cfg, uint32_t default_udimm_speed); + +#define DEFAULT_BEST_RANK_SCORE 9999999 +#define MAX_RANK_SCORE_LIMIT 99 // is this OK? + +unsigned short load_dll_offset(bdk_node_t node, int ddr_interface_num, + int dll_offset_mode, int byte_offset, int byte); +void change_dll_offset_enable(bdk_node_t node, int ddr_interface_num, int change); + +extern int perform_dll_offset_tuning(bdk_node_t node, int dll_offset_mode, int do_tune); +extern int perform_HW_dll_offset_tuning(bdk_node_t node, int dll_offset_mode, int bytelane); + +extern int perform_margin_write_voltage(bdk_node_t node); +extern int perform_margin_read_voltage(bdk_node_t node); + +#define LMC_DDR3_RESET_ASSERT 0 +#define LMC_DDR3_RESET_DEASSERT 1 +extern void cn88xx_lmc_ddr3_reset(bdk_node_t node, int ddr_interface_num, int reset); +extern void perform_lmc_reset(bdk_node_t node, int ddr_interface_num); +extern void ddr4_mrw(bdk_node_t node, int ddr_interface_num, int rank, + int mr_wr_addr, int mr_wr_sel, int mr_wr_bg1); +#endif /* __DRAM_INTERNAL_H__ */ + diff --git a/src/vendorcode/cavium/bdk/libdram/dram-l2c.c b/src/vendorcode/cavium/bdk/libdram/dram-l2c.c new file mode 100644 index 0000000000..11112955b2 --- /dev/null +++ b/src/vendorcode/cavium/bdk/libdram/dram-l2c.c @@ -0,0 +1,69 @@ +/***********************license start*********************************** +* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights +* reserved. +* +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are +* met: +* +* * Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* +* * Redistributions in binary form must reproduce the above +* copyright notice, this list of conditions and the following +* disclaimer in the documentation and/or other materials provided +* with the distribution. +* +* * Neither the name of Cavium Inc. nor the names of +* its contributors may be used to endorse or promote products +* derived from this software without specific prior written +* permission. +* +* This Software, including technical data, may be subject to U.S. export +* control laws, including the U.S. Export Administration Act and its +* associated regulations, and may be subject to export or import +* regulations in other countries. +* +* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS" +* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR +* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT +* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY +* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT +* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES +* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR +* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT, +* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK +* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU. +***********************license end**************************************/ +#include <bdk.h> +#include "dram-internal.h" + +int limit_l2_ways(bdk_node_t node, int ways, int verbose) +{ + int ways_max = bdk_l2c_get_num_assoc(node); + int ways_min = 0; + int errors = 0; + + if (ways >= ways_min && ways <= ways_max) + { + uint32_t valid_mask = (0x1 << ways_max) - 1; + uint32_t mask = (valid_mask << ways) & valid_mask; + if (verbose) + printf("Limiting L2 to %d ways\n", ways); + for (int i = 0; i < (int)bdk_get_num_cores(node); i++) + errors += bdk_l2c_set_core_way_partition(node, i, mask); + errors += bdk_l2c_set_hw_way_partition(node, mask); + } + else + { + errors++; + printf("ERROR: invalid limit_l2_ways %d, must be between %d and %d\n", + ways, ways_min, ways_max); + } + if (errors) + puts("ERROR limiting L2 cache ways\n"); + + return errors; +} + diff --git a/src/vendorcode/cavium/bdk/libdram/dram-l2c.h b/src/vendorcode/cavium/bdk/libdram/dram-l2c.h new file mode 100644 index 0000000000..5d2840884b --- /dev/null +++ b/src/vendorcode/cavium/bdk/libdram/dram-l2c.h @@ -0,0 +1,45 @@ +/***********************license start*********************************** +* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights +* reserved. +* +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are +* met: +* +* * Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* +* * Redistributions in binary form must reproduce the above +* copyright notice, this list of conditions and the following +* disclaimer in the documentation and/or other materials provided +* with the distribution. +* +* * Neither the name of Cavium Inc. nor the names of +* its contributors may be used to endorse or promote products +* derived from this software without specific prior written +* permission. +* +* This Software, including technical data, may be subject to U.S. export +* control laws, including the U.S. Export Administration Act and its +* associated regulations, and may be subject to export or import +* regulations in other countries. +* +* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS" +* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR +* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT +* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY +* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT +* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES +* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR +* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT, +* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK +* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU. +***********************license end**************************************/ + +/** + * Functions for controlling L2C. Internal use only. + */ + +extern int limit_l2_ways(bdk_node_t node, int ways, int verbose); + diff --git a/src/vendorcode/cavium/bdk/libdram/dram-print.h b/src/vendorcode/cavium/bdk/libdram/dram-print.h new file mode 100644 index 0000000000..94cdf92fbf --- /dev/null +++ b/src/vendorcode/cavium/bdk/libdram/dram-print.h @@ -0,0 +1,86 @@ +/***********************license start*********************************** +* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights +* reserved. +* +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are +* met: +* +* * Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* +* * Redistributions in binary form must reproduce the above +* copyright notice, this list of conditions and the following +* disclaimer in the documentation and/or other materials provided +* with the distribution. +* +* * Neither the name of Cavium Inc. nor the names of +* its contributors may be used to endorse or promote products +* derived from this software without specific prior written +* permission. +* +* This Software, including technical data, may be subject to U.S. export +* control laws, including the U.S. Export Administration Act and its +* associated regulations, and may be subject to export or import +* regulations in other countries. +* +* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS" +* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR +* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT +* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY +* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT +* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES +* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR +* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT, +* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK +* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU. +***********************license end**************************************/ + +/** + * Functions for diplaying output in libdram. Internal use only. + */ + +typedef enum { + // low 4 bits are verbosity level + VBL_OFF = 0, // use this only to init dram_verbosity + VBL_ALL = 0, // use this only in VBL_PR() to get printf equiv + VBL_NORM = 1, + VBL_FAE = 2, + VBL_TME = 3, + VBL_DEV = 4, + VBL_DEV2 = 5, + VBL_DEV3 = 6, + VBL_DEV4 = 7, + VBL_NONE = 15, // use this only in VBL_PR() to get no printing + // upper 4 bits are special verbosities + VBL_SEQ = 16, + VBL_CSRS = 32, + VBL_SPECIAL = 48, + // force at least 8 bits for enum + VBL_LAST = 255 +} dram_verbosity_t; + +extern dram_verbosity_t dram_verbosity; + +// "level" should be 1-7, or only one of the special bits +// let the compiler optimize the test for verbosity +#define is_verbosity_level(level) ((int)(dram_verbosity & 0x0f) >= (level)) +#define is_verbosity_special(level) (((int)(dram_verbosity & 0xf0) & (level)) != 0) +#define dram_is_verbose(level) (((level) & VBL_SPECIAL) ? is_verbosity_special(level) : is_verbosity_level(level)) + +#define VB_PRT(level, format, ...) \ + do { \ + if (dram_is_verbose(level)) \ + printf(format, ##__VA_ARGS__); \ + } while (0) + +#define ddr_print(format, ...) VB_PRT(VBL_NORM, format, ##__VA_ARGS__) + +#define error_print(format, ...) printf(format, ##__VA_ARGS__) + +#ifdef DEBUG_DEBUG_PRINT + #define debug_print(format, ...) printf(format, ##__VA_ARGS__) +#else + #define debug_print(format, ...) do {} while (0) +#endif diff --git a/src/vendorcode/cavium/bdk/libdram/dram-spd.c b/src/vendorcode/cavium/bdk/libdram/dram-spd.c new file mode 100644 index 0000000000..3717ca1109 --- /dev/null +++ b/src/vendorcode/cavium/bdk/libdram/dram-spd.c @@ -0,0 +1,583 @@ +/***********************license start*********************************** +* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights +* reserved. +* +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are +* met: +* +* * Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* +* * Redistributions in binary form must reproduce the above +* copyright notice, this list of conditions and the following +* disclaimer in the documentation and/or other materials provided +* with the distribution. +* +* * Neither the name of Cavium Inc. nor the names of +* its contributors may be used to endorse or promote products +* derived from this software without specific prior written +* permission. +* +* This Software, including technical data, may be subject to U.S. export +* control laws, including the U.S. Export Administration Act and its +* associated regulations, and may be subject to export or import +* regulations in other countries. +* +* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS" +* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR +* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT +* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY +* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT +* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES +* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR +* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT, +* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK +* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU. +***********************license end**************************************/ +#include <bdk.h> +#include <ctype.h> +#include "dram-internal.h" + +/** + * Read the entire contents of a DIMM SPD and store it in the device tree. The + * current DRAM config is also updated, so future SPD accesses used the cached + * copy. + * + * @param node Node the DRAM config is for + * @param cfg Current DRAM config. Updated with SPD data + * @param lmc LMC to read DIMM for + * @param dimm DIMM slot for SPD to read + * + * @return Zero on success, negative on failure + */ +int read_entire_spd(bdk_node_t node, dram_config_t *cfg, int lmc, int dimm) +{ + /* If pointer to data is provided, use it, otherwise read from SPD over twsi */ + if (cfg->config[lmc].dimm_config_table[dimm].spd_ptr) + return 0; + if (!cfg->config[lmc].dimm_config_table[dimm].spd_addr) + return -1; + + /* Figure out how to access the SPD */ + int spd_addr = cfg->config[lmc].dimm_config_table[dimm].spd_addr; + int bus = spd_addr >> 12; + int address = spd_addr & 0x7f; + + /* Figure out the size we will read */ + int64_t dev_type = bdk_twsix_read_ia(node, bus, address, DDR4_SPD_KEY_BYTE_DEVICE_TYPE, 1, 1); + if (dev_type < 0) + return -1; /* No DIMM */ + int spd_size = (dev_type == 0x0c) ? 512 : 256; + + /* Allocate storage */ + uint32_t *spd_buf = malloc(spd_size); + if (!spd_buf) + return -1; + uint32_t *ptr = spd_buf; + + for (int bank = 0; bank < (spd_size >> 8); bank++) + { + /* this should only happen for DDR4, which has a second bank of 256 bytes */ + if (bank) + bdk_twsix_write_ia(node, bus, 0x36 | bank, 0, 2, 1, 0); + int bank_size = 256; + for (int i = 0; i < bank_size; i += 4) + { + int64_t data = bdk_twsix_read_ia(node, bus, address, i, 4, 1); + if (data < 0) + { + free(spd_buf); + bdk_error("Failed to read SPD data at 0x%x\n", i + (bank << 8)); + /* Restore the bank to zero */ + if (bank) + bdk_twsix_write_ia(node, bus, 0x36 | 0, 0, 2, 1, 0); + return -1; + } + else + *ptr++ = bdk_be32_to_cpu(data); + } + /* Restore the bank to zero */ + if (bank) + bdk_twsix_write_ia(node, bus, 0x36 | 0, 0, 2, 1, 0); + } + + /* Store the SPD in the device tree */ + bdk_config_set_blob(spd_size, spd_buf, BDK_CONFIG_DDR_SPD_DATA, dimm, lmc, node); + cfg->config[lmc].dimm_config_table[dimm].spd_ptr = (void*)spd_buf; + + return 0; +} + +/* Read an DIMM SPD value, either using TWSI to read it from the DIMM, or + * from a provided array. + */ +int read_spd(bdk_node_t node, const dimm_config_t *dimm_config, int spd_field) +{ + /* If pointer to data is provided, use it, otherwise read from SPD over twsi */ + if (dimm_config->spd_ptr) + return dimm_config->spd_ptr[spd_field]; + else if (dimm_config->spd_addr) + { + int data; + int bus = dimm_config->spd_addr >> 12; + int address = dimm_config->spd_addr & 0x7f; + + /* this should only happen for DDR4, which has a second bank of 256 bytes */ + int bank = (spd_field >> 8) & 1; + if (bank) { + bdk_twsix_write_ia(node, bus, 0x36 | bank, 0, 2, 1, 0); + spd_field %= 256; + } + + data = bdk_twsix_read_ia(node, bus, address, spd_field, 1, 1); + + /* Restore the bank to zero */ + if (bank) { + bdk_twsix_write_ia(node, bus, 0x36 | 0, 0, 2, 1, 0); + } + + return data; + } + else + return -1; +} + +static uint16_t ddr3_crc16(uint8_t *ptr, int count) +{ + /* From DDR3 spd specification */ + int crc, i; + crc = 0; + while (--count >= 0) + { + crc = crc ^ (int)*ptr++ << 8; + for (i = 0; i < 8; ++i) + if (crc & 0x8000) + crc = crc << 1 ^ 0x1021; + else + crc = crc << 1; + } + return crc & 0xFFFF; +} + +static int validate_spd_checksum_ddr3(bdk_node_t node, int twsi_addr, int silent) +{ + uint8_t spd_data[128]; + int crc_bytes = 126; + uint16_t crc_comp; + int i; + int rv; + int ret = 1; + for (i = 0; i < 128; i++) + { + rv = bdk_twsix_read_ia(node, twsi_addr >> 12, twsi_addr & 0x7f, i, 1, 1); + if (rv < 0) + return 0; /* TWSI read error */ + spd_data[i] = (uint8_t)rv; + } + /* Check byte 0 to see how many bytes checksum is over */ + if (spd_data[0] & 0x80) + crc_bytes = 117; + + crc_comp = ddr3_crc16(spd_data, crc_bytes); + + if (spd_data[DDR3_SPD_CYCLICAL_REDUNDANCY_CODE_LOWER_NIBBLE] != (crc_comp & 0xff) || + spd_data[DDR3_SPD_CYCLICAL_REDUNDANCY_CODE_UPPER_NIBBLE] != (crc_comp >> 8)) + { + if (!silent) { + printf("DDR3 SPD CRC error, spd addr: 0x%x, calculated crc: 0x%04x, read crc: 0x%02x%02x\n", + twsi_addr, crc_comp, + spd_data[DDR3_SPD_CYCLICAL_REDUNDANCY_CODE_UPPER_NIBBLE], + spd_data[DDR3_SPD_CYCLICAL_REDUNDANCY_CODE_LOWER_NIBBLE]); + } + ret = 0; + } + return ret; +} + +static int validate_spd_checksum(bdk_node_t node, int twsi_addr, int silent) +{ + int rv; + + debug_print("Validating DIMM at address 0x%x\n", twsi_addr); + + if (!twsi_addr) return 1; /* return OK if we are not doing real DIMMs */ + + /* Look up module type to determine if DDR3 or DDR4 */ + rv = bdk_twsix_read_ia(node, twsi_addr >> 12, twsi_addr & 0x7f, 2, 1, 1); + + if (rv >= 0xB && rv <= 0xC) /* this is DDR3 or DDR4, do same */ + return validate_spd_checksum_ddr3(node, twsi_addr, silent); + + if (!silent) + printf("Unrecognized DIMM type: 0x%x at spd address: 0x%x\n", + rv, twsi_addr); + + return 0; +} + + +int validate_dimm(bdk_node_t node, const dimm_config_t *dimm_config) +{ + int spd_addr; + + spd_addr = dimm_config->spd_addr; + + debug_print("Validating dimm spd addr: 0x%02x spd ptr: %x\n", + spd_addr, dimm_config->spd_ptr); + + // if the slot is not possible + if (!spd_addr && !dimm_config->spd_ptr) + return -1; + + { + int val0, val1; + int ddr_type = get_ddr_type(node, dimm_config); + + switch (ddr_type) + { + case DDR3_DRAM: /* DDR3 */ + case DDR4_DRAM: /* DDR4 */ + + debug_print("Validating DDR%d DIMM\n", ((dimm_type >> 2) & 3) + 1); + +#define DENSITY_BANKS DDR4_SPD_DENSITY_BANKS // same for DDR3 and DDR4 +#define ROW_COL_BITS DDR4_SPD_ADDRESSING_ROW_COL_BITS // same for DDR3 and DDR4 + + val0 = read_spd(node, dimm_config, DENSITY_BANKS); + val1 = read_spd(node, dimm_config, ROW_COL_BITS); + if (val0 < 0 && val1 < 0) { + debug_print("Error reading SPD for DIMM\n"); + return 0; /* Failed to read dimm */ + } + if (val0 == 0xff && val1 == 0xff) { + ddr_print("Blank or unreadable SPD for DIMM\n"); + return 0; /* Blank SPD or otherwise unreadable device */ + } + + /* Don't treat bad checksums as fatal. */ + validate_spd_checksum(node, spd_addr, 0); + break; + + case 0x00: /* Terminator detected. Fail silently. */ + return 0; + + default: + debug_print("Unknown DIMM type 0x%x for DIMM @ 0x%x\n", + dimm_type, dimm_config->spd_addr); + return 0; /* Failed to read dimm */ + } + } + + return 1; +} + +int get_dimm_part_number(char *buffer, bdk_node_t node, + const dimm_config_t *dimm_config, + int ddr_type) +{ + int i; + int c; + int skipping = 1; + int strlen = 0; + +#define PART_LIMIT(t) (((t) == DDR4_DRAM) ? 19 : 18) +#define PART_NUMBER(t) (((t) == DDR4_DRAM) ? DDR4_SPD_MODULE_PART_NUMBER : DDR3_SPD_MODULE_PART_NUMBER) + + int limit = PART_LIMIT(ddr_type); + int offset = PART_NUMBER(ddr_type); + + for (i = 0; i < limit; ++i) { + + c = (read_spd(node, dimm_config, offset+i) & 0xff); + if (c == 0) // any null, we are done + break; + + /* Skip leading spaces. */ + if (skipping) { + if (isspace(c)) + continue; + else + skipping = 0; + } + + /* Put non-null non-leading-space-skipped char into buffer */ + buffer[strlen] = c; + ++strlen; + } + + if (strlen > 0) { + i = strlen - 1; // last char put into buf + while (i >= 0 && isspace((int)buffer[i])) { // still in buf and a space + --i; + --strlen; + } + } + buffer[strlen] = 0; /* Insure that the string is terminated */ + + return strlen; +} + +uint32_t get_dimm_serial_number(bdk_node_t node, const dimm_config_t *dimm_config, int ddr_type) +{ + uint32_t serial_number = 0; + int offset; + +#define SERIAL_NUMBER(t) (((t) == DDR4_DRAM) ? DDR4_SPD_MODULE_SERIAL_NUMBER : DDR3_SPD_MODULE_SERIAL_NUMBER) + + offset = SERIAL_NUMBER(ddr_type); + + for (int i = 0, j = 24; i < 4; ++i, j -= 8) { + serial_number |= ((read_spd(node, dimm_config, offset + i) & 0xff) << j); + } + + return serial_number; +} + +static uint32_t get_dimm_checksum(bdk_node_t node, const dimm_config_t *dimm_config, int ddr_type) +{ + uint32_t spd_chksum; + +#define LOWER_NIBBLE(t) (((t) == DDR4_DRAM) ? DDR4_SPD_CYCLICAL_REDUNDANCY_CODE_LOWER_NIBBLE : DDR3_SPD_CYCLICAL_REDUNDANCY_CODE_LOWER_NIBBLE) +#define UPPER_NIBBLE(t) (((t) == DDR4_DRAM) ? DDR4_SPD_CYCLICAL_REDUNDANCY_CODE_UPPER_NIBBLE : DDR3_SPD_CYCLICAL_REDUNDANCY_CODE_UPPER_NIBBLE) + + spd_chksum = 0xff & read_spd(node, dimm_config, LOWER_NIBBLE(ddr_type)); + spd_chksum |= ((0xff & read_spd(node, dimm_config, UPPER_NIBBLE(ddr_type))) << 8); + + return spd_chksum; +} + +static +void report_common_dimm(bdk_node_t node, const dimm_config_t *dimm_config, int dimm, + const char **dimm_types, int ddr_type, char *volt_str, + int ddr_interface_num, int num_ranks, int dram_width, int dimm_size_mb) +{ + int spd_ecc; + unsigned spd_module_type; + uint32_t serial_number; + char part_number[21]; /* 20 bytes plus string terminator is big enough for either */ + char *sn_str; + + spd_module_type = get_dimm_module_type(node, dimm_config, ddr_type); + spd_ecc = get_dimm_ecc(node, dimm_config, ddr_type); + + (void) get_dimm_part_number(part_number, node, dimm_config, ddr_type); + + serial_number = get_dimm_serial_number(node, dimm_config, ddr_type); + if ((serial_number != 0) && (serial_number != 0xffffffff)) { + sn_str = "s/n"; + } else { + serial_number = get_dimm_checksum(node, dimm_config, ddr_type); + sn_str = "chksum"; + } + + // FIXME: add output of DIMM rank/width, as in: 2Rx4, 1Rx8, etc + printf("N%d.LMC%d.DIMM%d: %d MB, DDR%d %s %dRx%d %s, p/n: %s, %s: %u, %s\n", + node, ddr_interface_num, dimm, dimm_size_mb, ddr_type, + dimm_types[spd_module_type], num_ranks, dram_width, + (spd_ecc ? "ECC" : "non-ECC"), part_number, + sn_str, serial_number, volt_str); +} + +const char *ddr3_dimm_types[16] = { + /* 0000 */ "Undefined", + /* 0001 */ "RDIMM", + /* 0010 */ "UDIMM", + /* 0011 */ "SO-DIMM", + /* 0100 */ "Micro-DIMM", + /* 0101 */ "Mini-RDIMM", + /* 0110 */ "Mini-UDIMM", + /* 0111 */ "Mini-CDIMM", + /* 1000 */ "72b-SO-UDIMM", + /* 1001 */ "72b-SO-RDIMM", + /* 1010 */ "72b-SO-CDIMM" + /* 1011 */ "LRDIMM", + /* 1100 */ "16b-SO-DIMM", + /* 1101 */ "32b-SO-DIMM", + /* 1110 */ "Reserved", + /* 1111 */ "Reserved" +}; + +static +void report_ddr3_dimm(bdk_node_t node, const dimm_config_t *dimm_config, + int dimm, int ddr_interface_num, int num_ranks, + int dram_width, int dimm_size_mb) +{ + int spd_voltage; + char *volt_str; + + spd_voltage = read_spd(node, dimm_config, DDR3_SPD_NOMINAL_VOLTAGE); + if ((spd_voltage == 0) || (spd_voltage & 3)) + volt_str = "1.5V"; + if (spd_voltage & 2) + volt_str = "1.35V"; + if (spd_voltage & 4) + volt_str = "1.2xV"; + + report_common_dimm(node, dimm_config, dimm, ddr3_dimm_types, + DDR3_DRAM, volt_str, ddr_interface_num, + num_ranks, dram_width, dimm_size_mb); +} + +const char *ddr4_dimm_types[16] = { + /* 0000 */ "Extended", + /* 0001 */ "RDIMM", + /* 0010 */ "UDIMM", + /* 0011 */ "SO-DIMM", + /* 0100 */ "LRDIMM", + /* 0101 */ "Mini-RDIMM", + /* 0110 */ "Mini-UDIMM", + /* 0111 */ "Reserved", + /* 1000 */ "72b-SO-RDIMM", + /* 1001 */ "72b-SO-UDIMM", + /* 1010 */ "Reserved", + /* 1011 */ "Reserved", + /* 1100 */ "16b-SO-DIMM", + /* 1101 */ "32b-SO-DIMM", + /* 1110 */ "Reserved", + /* 1111 */ "Reserved" +}; + +static +void report_ddr4_dimm(bdk_node_t node, const dimm_config_t *dimm_config, + int dimm, int ddr_interface_num, int num_ranks, + int dram_width, int dimm_size_mb) +{ + int spd_voltage; + char *volt_str; + + spd_voltage = read_spd(node, dimm_config, DDR4_SPD_MODULE_NOMINAL_VOLTAGE); + if ((spd_voltage == 0x01) || (spd_voltage & 0x02)) + volt_str = "1.2V"; + if ((spd_voltage == 0x04) || (spd_voltage & 0x08)) + volt_str = "TBD1 V"; + if ((spd_voltage == 0x10) || (spd_voltage & 0x20)) + volt_str = "TBD2 V"; + + report_common_dimm(node, dimm_config, dimm, ddr4_dimm_types, + DDR4_DRAM, volt_str, ddr_interface_num, + num_ranks, dram_width, dimm_size_mb); +} + +void report_dimm(bdk_node_t node, const dimm_config_t *dimm_config, + int dimm, int ddr_interface_num, int num_ranks, + int dram_width, int dimm_size_mb) +{ + int ddr_type; + + /* ddr_type only indicates DDR4 or DDR3 */ + ddr_type = get_ddr_type(node, dimm_config); + + if (ddr_type == DDR4_DRAM) + report_ddr4_dimm(node, dimm_config, dimm, ddr_interface_num, + num_ranks, dram_width, dimm_size_mb); + else + report_ddr3_dimm(node, dimm_config, dimm, ddr_interface_num, + num_ranks, dram_width, dimm_size_mb); +} + +static int +get_ddr4_spd_speed(bdk_node_t node, const dimm_config_t *dimm_config) +{ + int spdMTB = 125; + int spdFTB = 1; + + int tCKAVGmin + = spdMTB * read_spd(node, dimm_config, DDR4_SPD_MINIMUM_CYCLE_TIME_TCKAVGMIN) + + spdFTB * (signed char) read_spd(node, dimm_config, DDR4_SPD_MIN_CYCLE_TIME_FINE_TCKAVGMIN); + + return pretty_psecs_to_mts(tCKAVGmin); +} + +static int +get_ddr3_spd_speed(bdk_node_t node, const dimm_config_t *dimm_config) +{ + int spd_mtb_dividend = 0xff & read_spd(node, dimm_config, DDR3_SPD_MEDIUM_TIMEBASE_DIVIDEND); + int spd_mtb_divisor = 0xff & read_spd(node, dimm_config, DDR3_SPD_MEDIUM_TIMEBASE_DIVISOR); + int spd_tck_min = 0xff & read_spd(node, dimm_config, DDR3_SPD_MINIMUM_CYCLE_TIME_TCKMIN); + + short ftb_Dividend = read_spd(node, dimm_config, DDR3_SPD_FINE_TIMEBASE_DIVIDEND_DIVISOR) >> 4; + short ftb_Divisor = read_spd(node, dimm_config, DDR3_SPD_FINE_TIMEBASE_DIVIDEND_DIVISOR) & 0xf; + + ftb_Divisor = (ftb_Divisor == 0) ? 1 : ftb_Divisor; /* Make sure that it is not 0 */ + + int mtb_psec = spd_mtb_dividend * 1000 / spd_mtb_divisor; + int tCKmin = mtb_psec * spd_tck_min; + tCKmin += ftb_Dividend * + (signed char) read_spd(node, dimm_config, DDR3_SPD_MINIMUM_CYCLE_TIME_FINE_TCKMIN) + / ftb_Divisor; + + return pretty_psecs_to_mts(tCKmin); +} + +static int +speed_bin_down(int speed) +{ + if (speed == 2133) + return 1866; + else if (speed == 1866) + return 1600; + else + return speed; +} + +int +dram_get_default_spd_speed(bdk_node_t node, const ddr_configuration_t *ddr_config) +{ + int lmc, dimm; + int speed, ret_speed = 0; + int ddr_type = get_ddr_type(node, &ddr_config[0].dimm_config_table[0]); + int dimm_speed[8], dimm_count = 0; + int dimms_per_lmc = 0; + + for (lmc = 0; lmc < 4; lmc++) { + for (dimm = 0; dimm < DDR_CFG_T_MAX_DIMMS; dimm++) { + const dimm_config_t *dimm_config = &ddr_config[lmc].dimm_config_table[dimm]; + if (/*dimm_config->spd_addr ||*/ dimm_config->spd_ptr) + { + speed = (ddr_type == DDR4_DRAM) + ? get_ddr4_spd_speed(node, dimm_config) + : get_ddr3_spd_speed(node, dimm_config); + //printf("N%d.LMC%d.DIMM%d: SPD speed %d\n", node, lmc, dimm, speed); + dimm_speed[dimm_count] = speed; + dimm_count++; + if (lmc == 0) + dimms_per_lmc++; + } + } + } + + // all DIMMs must be same speed + speed = dimm_speed[0]; + for (dimm = 1; dimm < dimm_count; dimm++) { + if (dimm_speed[dimm] != speed) { + ret_speed = -1; + goto finish_up; + } + } + + // if 2400 or greater, use 2133 + if (speed >= 2400) + speed = 2133; + + // use next speed down if 2DPC... + if (dimms_per_lmc > 1) + speed = speed_bin_down(speed); + + // Update the in memory config to match the automatically calculated speed + bdk_config_set_int(speed, BDK_CONFIG_DDR_SPEED, node); + + // do filtering for our jittery PLL + if (speed == 2133) + speed = 2100; + else if (speed == 1866) + speed = 1880; + + // OK, return what we have... + ret_speed = mts_to_hertz(speed); + + finish_up: + //printf("N%d: Returning default SPD speed %d\n", node, ret_speed); + return ret_speed; +} diff --git a/src/vendorcode/cavium/bdk/libdram/dram-spd.h b/src/vendorcode/cavium/bdk/libdram/dram-spd.h new file mode 100644 index 0000000000..df229f4959 --- /dev/null +++ b/src/vendorcode/cavium/bdk/libdram/dram-spd.h @@ -0,0 +1,166 @@ +/***********************license start*********************************** +* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights +* reserved. +* +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are +* met: +* +* * Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* +* * Redistributions in binary form must reproduce the above +* copyright notice, this list of conditions and the following +* disclaimer in the documentation and/or other materials provided +* with the distribution. +* +* * Neither the name of Cavium Inc. nor the names of +* its contributors may be used to endorse or promote products +* derived from this software without specific prior written +* permission. +* +* This Software, including technical data, may be subject to U.S. export +* control laws, including the U.S. Export Administration Act and its +* associated regulations, and may be subject to export or import +* regulations in other countries. +* +* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS" +* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR +* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT +* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY +* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT +* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES +* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR +* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT, +* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK +* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU. +***********************license end**************************************/ + +/** + * Functions, enumarations, and structures related to DIMM SPDs. + * Everything in this file is internal to libdram. + */ + +/* data field addresses in the DDR3 SPD eeprom */ +typedef enum ddr3_spd_addrs { + DDR3_SPD_BYTES_PROGRAMMED = 0, + DDR3_SPD_REVISION = 1, + DDR3_SPD_KEY_BYTE_DEVICE_TYPE = 2, + DDR3_SPD_KEY_BYTE_MODULE_TYPE = 3, + DDR3_SPD_DENSITY_BANKS = 4, + DDR3_SPD_ADDRESSING_ROW_COL_BITS = 5, + DDR3_SPD_NOMINAL_VOLTAGE = 6, + DDR3_SPD_MODULE_ORGANIZATION = 7, + DDR3_SPD_MEMORY_BUS_WIDTH = 8, + DDR3_SPD_FINE_TIMEBASE_DIVIDEND_DIVISOR = 9, + DDR3_SPD_MEDIUM_TIMEBASE_DIVIDEND = 10, + DDR3_SPD_MEDIUM_TIMEBASE_DIVISOR = 11, + DDR3_SPD_MINIMUM_CYCLE_TIME_TCKMIN = 12, + DDR3_SPD_CAS_LATENCIES_LSB = 14, + DDR3_SPD_CAS_LATENCIES_MSB = 15, + DDR3_SPD_MIN_CAS_LATENCY_TAAMIN = 16, + DDR3_SPD_MIN_WRITE_RECOVERY_TWRMIN = 17, + DDR3_SPD_MIN_RAS_CAS_DELAY_TRCDMIN = 18, + DDR3_SPD_MIN_ROW_ACTIVE_DELAY_TRRDMIN = 19, + DDR3_SPD_MIN_ROW_PRECHARGE_DELAY_TRPMIN = 20, + DDR3_SPD_UPPER_NIBBLES_TRAS_TRC = 21, + DDR3_SPD_MIN_ACTIVE_PRECHARGE_LSB_TRASMIN = 22, + DDR3_SPD_MIN_ACTIVE_REFRESH_LSB_TRCMIN = 23, + DDR3_SPD_MIN_REFRESH_RECOVERY_LSB_TRFCMIN = 24, + DDR3_SPD_MIN_REFRESH_RECOVERY_MSB_TRFCMIN = 25, + DDR3_SPD_MIN_INTERNAL_WRITE_READ_CMD_TWTRMIN = 26, + DDR3_SPD_MIN_INTERNAL_READ_PRECHARGE_CMD_TRTPMIN = 27, + DDR3_SPD_UPPER_NIBBLE_TFAW = 28, + DDR3_SPD_MIN_FOUR_ACTIVE_WINDOW_TFAWMIN = 29, + DDR3_SPD_MINIMUM_CYCLE_TIME_FINE_TCKMIN = 34, + DDR3_SPD_MIN_CAS_LATENCY_FINE_TAAMIN = 35, + DDR3_SPD_MIN_RAS_CAS_DELAY_FINE_TRCDMIN = 36, + DDR3_SPD_MIN_ROW_PRECHARGE_DELAY_FINE_TRPMIN = 37, + DDR3_SPD_MIN_ACTIVE_REFRESH_LSB_FINE_TRCMIN = 38, + DDR3_SPD_ADDRESS_MAPPING = 63, + DDR3_SPD_MODULE_SERIAL_NUMBER = 122, + DDR3_SPD_CYCLICAL_REDUNDANCY_CODE_LOWER_NIBBLE = 126, + DDR3_SPD_CYCLICAL_REDUNDANCY_CODE_UPPER_NIBBLE = 127, + DDR3_SPD_MODULE_PART_NUMBER = 128 +} ddr3_spd_addr_t; + +/* data field addresses in the DDR4 SPD eeprom */ +typedef enum ddr4_spd_addrs { + DDR4_SPD_BYTES_PROGRAMMED = 0, + DDR4_SPD_REVISION = 1, + DDR4_SPD_KEY_BYTE_DEVICE_TYPE = 2, + DDR4_SPD_KEY_BYTE_MODULE_TYPE = 3, + DDR4_SPD_DENSITY_BANKS = 4, + DDR4_SPD_ADDRESSING_ROW_COL_BITS = 5, + DDR4_SPD_PACKAGE_TYPE = 6, + DDR4_SPD_OPTIONAL_FEATURES = 7, + DDR4_SPD_THERMAL_REFRESH_OPTIONS = 8, + DDR4_SPD_OTHER_OPTIONAL_FEATURES = 9, + DDR4_SPD_SECONDARY_PACKAGE_TYPE = 10, + DDR4_SPD_MODULE_NOMINAL_VOLTAGE = 11, + DDR4_SPD_MODULE_ORGANIZATION = 12, + DDR4_SPD_MODULE_MEMORY_BUS_WIDTH = 13, + DDR4_SPD_MODULE_THERMAL_SENSOR = 14, + DDR4_SPD_RESERVED_BYTE15 = 15, + DDR4_SPD_RESERVED_BYTE16 = 16, + DDR4_SPD_TIMEBASES = 17, + DDR4_SPD_MINIMUM_CYCLE_TIME_TCKAVGMIN = 18, + DDR4_SPD_MAXIMUM_CYCLE_TIME_TCKAVGMAX = 19, + DDR4_SPD_CAS_LATENCIES_BYTE0 = 20, + DDR4_SPD_CAS_LATENCIES_BYTE1 = 21, + DDR4_SPD_CAS_LATENCIES_BYTE2 = 22, + DDR4_SPD_CAS_LATENCIES_BYTE3 = 23, + DDR4_SPD_MIN_CAS_LATENCY_TAAMIN = 24, + DDR4_SPD_MIN_RAS_CAS_DELAY_TRCDMIN = 25, + DDR4_SPD_MIN_ROW_PRECHARGE_DELAY_TRPMIN = 26, + DDR4_SPD_UPPER_NIBBLES_TRAS_TRC = 27, + DDR4_SPD_MIN_ACTIVE_PRECHARGE_LSB_TRASMIN = 28, + DDR4_SPD_MIN_ACTIVE_REFRESH_LSB_TRCMIN = 29, + DDR4_SPD_MIN_REFRESH_RECOVERY_LSB_TRFC1MIN = 30, + DDR4_SPD_MIN_REFRESH_RECOVERY_MSB_TRFC1MIN = 31, + DDR4_SPD_MIN_REFRESH_RECOVERY_LSB_TRFC2MIN = 32, + DDR4_SPD_MIN_REFRESH_RECOVERY_MSB_TRFC2MIN = 33, + DDR4_SPD_MIN_REFRESH_RECOVERY_LSB_TRFC4MIN = 34, + DDR4_SPD_MIN_REFRESH_RECOVERY_MSB_TRFC4MIN = 35, + DDR4_SPD_MIN_FOUR_ACTIVE_WINDOW_MSN_TFAWMIN = 36, + DDR4_SPD_MIN_FOUR_ACTIVE_WINDOW_LSB_TFAWMIN = 37, + DDR4_SPD_MIN_ROW_ACTIVE_DELAY_SAME_TRRD_SMIN = 38, + DDR4_SPD_MIN_ROW_ACTIVE_DELAY_DIFF_TRRD_LMIN = 39, + DDR4_SPD_MIN_CAS_TO_CAS_DELAY_TCCD_LMIN = 40, + DDR4_SPD_MIN_CAS_TO_CAS_DELAY_FINE_TCCD_LMIN = 117, + DDR4_SPD_MIN_ACT_TO_ACT_DELAY_SAME_FINE_TRRD_LMIN = 118, + DDR4_SPD_MIN_ACT_TO_ACT_DELAY_DIFF_FINE_TRRD_SMIN = 119, + DDR4_SPD_MIN_ACT_TO_ACT_REFRESH_DELAY_FINE_TRCMIN = 120, + DDR4_SPD_MIN_ROW_PRECHARGE_DELAY_FINE_TRPMIN = 121, + DDR4_SPD_MIN_RAS_TO_CAS_DELAY_FINE_TRCDMIN = 122, + DDR4_SPD_MIN_CAS_LATENCY_FINE_TAAMIN = 123, + DDR4_SPD_MAX_CYCLE_TIME_FINE_TCKAVGMAX = 124, + DDR4_SPD_MIN_CYCLE_TIME_FINE_TCKAVGMIN = 125, + DDR4_SPD_CYCLICAL_REDUNDANCY_CODE_LOWER_NIBBLE = 126, + DDR4_SPD_CYCLICAL_REDUNDANCY_CODE_UPPER_NIBBLE = 127, + DDR4_SPD_REFERENCE_RAW_CARD = 130, + DDR4_SPD_UDIMM_ADDR_MAPPING_FROM_EDGE = 131, + DDR4_SPD_REGISTER_MANUFACTURER_ID_LSB = 133, + DDR4_SPD_REGISTER_MANUFACTURER_ID_MSB = 134, + DDR4_SPD_REGISTER_REVISION_NUMBER = 135, + DDR4_SPD_RDIMM_ADDR_MAPPING_FROM_REGISTER_TO_DRAM = 136, + DDR4_SPD_RDIMM_REGISTER_DRIVE_STRENGTH_CTL = 137, + DDR4_SPD_RDIMM_REGISTER_DRIVE_STRENGTH_CK = 138, + DDR4_SPD_MODULE_SERIAL_NUMBER = 325, + DDR4_SPD_MODULE_PART_NUMBER = 329 +} ddr4_spd_addr_t; + +extern int read_entire_spd(bdk_node_t node, dram_config_t *cfg, int lmc, int dimm); +extern int read_spd(bdk_node_t node, const dimm_config_t *dimm_config, int spd_field); + +extern int validate_dimm(bdk_node_t node, const dimm_config_t *dimm_config); + +extern void report_dimm(bdk_node_t node, const dimm_config_t *dimm_config, + int dimm, int ddr_interface_num, int num_ranks, + int dram_width, int dimm_size_mb); + +extern int dram_get_default_spd_speed(bdk_node_t node, const ddr_configuration_t *ddr_config); + +extern const char *ddr3_dimm_types[]; +extern const char *ddr4_dimm_types[]; diff --git a/src/vendorcode/cavium/bdk/libdram/dram-tune-ddr3.c b/src/vendorcode/cavium/bdk/libdram/dram-tune-ddr3.c new file mode 100644 index 0000000000..e0e9d4442c --- /dev/null +++ b/src/vendorcode/cavium/bdk/libdram/dram-tune-ddr3.c @@ -0,0 +1,2012 @@ +/***********************license start*********************************** +* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights +* reserved. +* +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are +* met: +* +* * Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* +* * Redistributions in binary form must reproduce the above +* copyright notice, this list of conditions and the following +* disclaimer in the documentation and/or other materials provided +* with the distribution. +* +* * Neither the name of Cavium Inc. nor the names of +* its contributors may be used to endorse or promote products +* derived from this software without specific prior written +* permission. +* +* This Software, including technical data, may be subject to U.S. export +* control laws, including the U.S. Export Administration Act and its +* associated regulations, and may be subject to export or import +* regulations in other countries. +* +* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS" +* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR +* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT +* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY +* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT +* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES +* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR +* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT, +* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK +* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU. +***********************license end**************************************/ +#include <bdk.h> +#include "dram-internal.h" + +// if enhanced verbosity levels are defined, use them +#if defined(VB_PRT) +#define ddr_print2(format, ...) VB_PRT(VBL_FAE, format, ##__VA_ARGS__) +#define ddr_print3(format, ...) VB_PRT(VBL_TME, format, ##__VA_ARGS__) +#define ddr_print4(format, ...) VB_PRT(VBL_DEV, format, ##__VA_ARGS__) +#define ddr_print5(format, ...) VB_PRT(VBL_DEV3, format, ##__VA_ARGS__) +#else +#define ddr_print2 ddr_print +#define ddr_print4 ddr_print +#define ddr_print5 ddr_print +#endif + +static int64_t test_dram_byte_threads_done; +static uint64_t test_dram_byte_threads_errs; +static uint64_t test_dram_byte_lmc_errs[4]; + +#if 0 +/* + * Suggested testing patterns. + */ +static const uint64_t test_pattern_2[] = { + 0xFFFFFFFFFFFFFFFFULL, + 0xAAAAAAAAAAAAAAAAULL, + 0xFFFFFFFFFFFFFFFFULL, + 0xAAAAAAAAAAAAAAAAULL, + 0x5555555555555555ULL, + 0xAAAAAAAAAAAAAAAAULL, + 0xFFFFFFFFFFFFFFFFULL, + 0xAAAAAAAAAAAAAAAAULL, + 0xFFFFFFFFFFFFFFFFULL, + 0x5555555555555555ULL, + 0xFFFFFFFFFFFFFFFFULL, + 0x5555555555555555ULL, + 0xAAAAAAAAAAAAAAAAULL, + 0x5555555555555555ULL, + 0xFFFFFFFFFFFFFFFFULL, + 0x5555555555555555ULL, +}; + /* + * or possibly + */ +static const uint64_t test_pattern_3[] = { + 0xFDFDFDFDFDFDFDFDULL, + 0x8787878787878787ULL, + 0xFEFEFEFEFEFEFEFEULL, + 0xC3C3C3C3C3C3C3C3ULL, + 0x7F7F7F7F7F7F7F7FULL, + 0xE1E1E1E1E1E1E1E1ULL, + 0xBFBFBFBFBFBFBFBFULL, + 0xF0F0F0F0F0F0F0F0ULL, + 0xDFDFDFDFDFDFDFDFULL, + 0x7878787878787878ULL, + 0xEFEFEFEFEFEFEFEFULL, + 0x3C3C3C3C3C3C3C3CULL, + 0xF7F7F7F7F7F7F7F7ULL, + 0x1E1E1E1E1E1E1E1EULL, + 0xFBFBFBFBFBFBFBFBULL, + 0x0F0F0F0F0F0F0F0FULL, +}; + +static const uint64_t test_pattern_1[] = { + 0xAAAAAAAAAAAAAAAAULL, + 0x5555555555555555ULL, + 0xAAAAAAAAAAAAAAAAULL, + 0x5555555555555555ULL, + 0xAAAAAAAAAAAAAAAAULL, + 0x5555555555555555ULL, + 0xAAAAAAAAAAAAAAAAULL, + 0x5555555555555555ULL, + 0xAAAAAAAAAAAAAAAAULL, + 0x5555555555555555ULL, + 0xAAAAAAAAAAAAAAAAULL, + 0x5555555555555555ULL, + 0xAAAAAAAAAAAAAAAAULL, + 0x5555555555555555ULL, + 0xAAAAAAAAAAAAAAAAULL, + 0x5555555555555555ULL, +#if 0 // only need a cacheline size + 0xAAAAAAAAAAAAAAAAULL, + 0x5555555555555555ULL, + 0xAAAAAAAAAAAAAAAAULL, + 0x5555555555555555ULL, + 0xAAAAAAAAAAAAAAAAULL, + 0x5555555555555555ULL, + 0xAAAAAAAAAAAAAAAAULL, + 0x5555555555555555ULL, + 0xAAAAAAAAAAAAAAAAULL, + 0x5555555555555555ULL, + 0xAAAAAAAAAAAAAAAAULL, + 0x5555555555555555ULL, + 0xAAAAAAAAAAAAAAAAULL, + 0x5555555555555555ULL, + 0xAAAAAAAAAAAAAAAAULL, + 0x5555555555555555ULL, +#endif +}; + +// setup default for test pattern array +static const uint64_t *dram_tune_test_pattern = test_pattern_1; +#endif + +// set this to 1 to shorten the testing to exit when all byte lanes have errors +// having this at 0 forces the testing to take place over the entire range every iteration, +// hopefully ensuring an even load on the memory subsystem +#define EXIT_WHEN_ALL_LANES_HAVE_ERRORS 0 + +#define DEFAULT_TEST_BURSTS 5 // FIXME: this is what works so far...// FIXME: was 7 +int dram_tune_use_bursts = DEFAULT_TEST_BURSTS; + +// dram_tune_rank_offset is used to offset the second area used in test_dram_mem_xor. +// +// If only a single-rank DIMM, the offset will be 256MB from the start of the first area, +// which is more than enough for the restricted looping/address range actually tested... +// +// If a 2-rank DIMM, the offset will be the size of a rank's address space, so the effect +// will be to have the first and second areas in different ranks on the same DIMM. +// +// So, we default this to single-rank, and it will be overridden when 2-ranks are detected. +// + +// FIXME: ASSUME that we have DIMMS no less than 4GB in size + +// offset to first area that avoids any boot stuff in low range (below 256MB) +#define AREA_BASE_OFFSET (1ULL << 28) // bit 28 always ON + +// offset to duplicate area; may coincide with rank 1 base address for 2-rank 4GB DIMM +#define AREA_DUPE_OFFSET (1ULL << 31) // bit 31 always ON + +// defaults to DUPE, but will be set elsewhere to offset to next RANK if multi-rank DIMM +static uint64_t dram_tune_rank_offset = AREA_DUPE_OFFSET; // default + +// defaults to 0, but will be set elsewhere to the address offset to next DIMM if multi-slot +static uint64_t dram_tune_dimm_offset = 0; // default + + +static int speed_bin_offset[3] = {25, 20, 15}; +static int speed_bin_winlen[3] = {70, 60, 60}; + +static int +get_speed_bin(bdk_node_t node, int lmc) +{ + uint32_t mts_speed = (libdram_get_freq_from_pll(node, lmc) / 1000000) * 2; + int ret = 0; + + // FIXME: is this reasonable speed "binning"? + if (mts_speed >= 1700) { + if (mts_speed >= 2000) + ret = 2; + else + ret = 1; + } + + debug_print("N%d.LMC%d: %s: returning bin %d for MTS %d\n", + node, lmc, __FUNCTION__, ret, mts_speed); + + return ret; +} + +static int is_low_risk_offset(int speed_bin, int offset) +{ + return (_abs(offset) <= speed_bin_offset[speed_bin]); +} +static int is_low_risk_winlen(int speed_bin, int winlen) +{ + return (winlen >= speed_bin_winlen[speed_bin]); +} + +#define ENABLE_PREFETCH 0 +#define ENABLE_WBIL2 1 +#define ENABLE_SBLKDTY 0 + +#define BDK_SYS_CVMCACHE_INV_L2 "#0,c11,c1,#1" // L2 Cache Invalidate +#define BDK_CACHE_INV_L2(address) { asm volatile ("sys " BDK_SYS_CVMCACHE_INV_L2 ", %0" : : "r" (address)); } + +int dram_tuning_mem_xor(bdk_node_t node, int lmc, uint64_t p, uint64_t bitmask, uint64_t *xor_data) +{ + uint64_t p1, p2, d1, d2; + uint64_t v, v1; + uint64_t p2offset = 0x10000000/* was: dram_tune_rank_offset; */; // FIXME? + uint64_t datamask; + uint64_t xor; + uint64_t i, j, k; + uint64_t ii; + int errors = 0; + //uint64_t index; + uint64_t pattern1 = bdk_rng_get_random64(); + uint64_t pattern2 = 0; + uint64_t bad_bits[2] = {0,0}; + +#if ENABLE_SBLKDTY + BDK_CSR_MODIFY(c, node, BDK_L2C_CTL, c.s.dissblkdty = 0); +#endif + + // Byte lanes may be clear in the mask to indicate no testing on that lane. + datamask = bitmask; + + // final address must include LMC and node + p |= (lmc<<7); /* Map address into proper interface */ + p = bdk_numa_get_address(node, p); /* Map to node */ + + /* Add offset to both test regions to not clobber boot stuff + * when running from L2 for NAND boot. + */ + p += AREA_BASE_OFFSET; // make sure base is out of the way of boot + +#define II_INC (1ULL << 29) +#define II_MAX (1ULL << 31) +#define K_INC (1ULL << 14) +#define K_MAX (1ULL << 20) +#define J_INC (1ULL << 9) +#define J_MAX (1ULL << 12) +#define I_INC (1ULL << 3) +#define I_MAX (1ULL << 7) + + debug_print("N%d.LMC%d: dram_tuning_mem_xor: phys_addr=0x%lx\n", + node, lmc, p); + +#if 0 + int ix; + // add this loop to fill memory with the test pattern first + // loops are ordered so that only entire cachelines are written + for (ii = 0; ii < II_MAX; ii += II_INC) { // FIXME? extend the range of memory tested!! + for (k = 0; k < K_MAX; k += K_INC) { + for (j = 0; j < J_MAX; j += J_INC) { + p1 = p + ii + k + j; + p2 = p1 + p2offset; + for (i = 0, ix = 0; i < I_MAX; i += I_INC, ix++) { + + v = dram_tune_test_pattern[ix]; + v1 = v; // write the same thing to both areas + + __bdk_dram_write64(p1 + i, v); + __bdk_dram_write64(p2 + i, v1); + + } +#if ENABLE_WBIL2 + BDK_CACHE_WBI_L2(p1); + BDK_CACHE_WBI_L2(p2); +#endif + } + } + } /* for (ii = 0; ii < (1ULL << 31); ii += (1ULL << 29)) */ +#endif + +#if ENABLE_PREFETCH + BDK_PREFETCH(p , BDK_CACHE_LINE_SIZE); + BDK_PREFETCH(p + p2offset, BDK_CACHE_LINE_SIZE); +#endif + + // loops are ordered so that only a single 64-bit slot is written to each cacheline at one time, + // then the cachelines are forced out; this should maximize read/write traffic + for (ii = 0; ii < II_MAX; ii += II_INC) { // FIXME? extend the range of memory tested!! + for (k = 0; k < K_MAX; k += K_INC) { + for (i = 0; i < I_MAX; i += I_INC) { + for (j = 0; j < J_MAX; j += J_INC) { + + p1 = p + ii + k + j; + p2 = p1 + p2offset; + +#if ENABLE_PREFETCH + if (j < (J_MAX - J_INC)) { + BDK_PREFETCH(p1 + J_INC, BDK_CACHE_LINE_SIZE); + BDK_PREFETCH(p2 + J_INC, BDK_CACHE_LINE_SIZE); + } +#endif + + v = pattern1 * (p1 + i); + v1 = v; // write the same thing to both areas + + __bdk_dram_write64(p1 + i, v); + __bdk_dram_write64(p2 + i, v1); + +#if ENABLE_WBIL2 + BDK_CACHE_WBI_L2(p1); + BDK_CACHE_WBI_L2(p2); +#endif + } + } + } + } /* for (ii = 0; ii < (1ULL << 31); ii += (1ULL << 29)) */ + + BDK_DCACHE_INVALIDATE; + + debug_print("N%d.LMC%d: dram_tuning_mem_xor: done INIT loop\n", + node, lmc); + + /* Make a series of passes over the memory areas. */ + + for (int burst = 0; burst < 1/* was: dram_tune_use_bursts*/; burst++) + { + uint64_t this_pattern = bdk_rng_get_random64(); + pattern2 ^= this_pattern; + + /* XOR the data with a random value, applying the change to both + * memory areas. + */ +#if ENABLE_PREFETCH + BDK_PREFETCH(p , BDK_CACHE_LINE_SIZE); + BDK_PREFETCH(p + p2offset, BDK_CACHE_LINE_SIZE); +#endif + + for (ii = 0; ii < II_MAX; ii += II_INC) { // FIXME? extend the range of memory tested!! + for (k = 0; k < K_MAX; k += K_INC) { + for (i = 0; i < I_MAX; i += I_INC) { // FIXME: rearranged, did not make much difference? + for (j = 0; j < J_MAX; j += J_INC) { + + p1 = p + ii + k + j; + p2 = p1 + p2offset; + +#if ENABLE_PREFETCH + if (j < (J_MAX - J_INC)) { + BDK_PREFETCH(p1 + J_INC, BDK_CACHE_LINE_SIZE); + BDK_PREFETCH(p2 + J_INC, BDK_CACHE_LINE_SIZE); + } +#endif + + v = __bdk_dram_read64(p1 + i) ^ this_pattern; + v1 = __bdk_dram_read64(p2 + i) ^ this_pattern; + +#if ENABLE_WBIL2 + BDK_CACHE_INV_L2(p1); + BDK_CACHE_INV_L2(p2); +#endif + + __bdk_dram_write64(p1 + i, v); + __bdk_dram_write64(p2 + i, v1); + +#if ENABLE_WBIL2 + BDK_CACHE_WBI_L2(p1); + BDK_CACHE_WBI_L2(p2); +#endif + } + } + } + } /* for (ii = 0; ii < (1ULL << 31); ii += (1ULL << 29)) */ + + BDK_DCACHE_INVALIDATE; + + debug_print("N%d.LMC%d: dram_tuning_mem_xor: done MODIFY loop\n", + node, lmc); + +#if ENABLE_PREFETCH + BDK_PREFETCH(p , BDK_CACHE_LINE_SIZE); + BDK_PREFETCH(p + p2offset, BDK_CACHE_LINE_SIZE); +#endif + + /* Look for differences in the areas. If there is a mismatch, reset + * both memory locations with the same pattern. Failing to do so + * means that on all subsequent passes the pair of locations remain + * out of sync giving spurious errors. + */ + // FIXME: change the loop order so that an entire cache line is compared at one time + // FIXME: this is so that a read error that occurs *anywhere* on the cacheline will be caught, + // FIXME: rather than comparing only 1 cacheline slot at a time, where an error on a different + // FIXME: slot will be missed that time around + // Does the above make sense? + + for (ii = 0; ii < II_MAX; ii += II_INC) { // FIXME? extend the range of memory tested!! + for (k = 0; k < K_MAX; k += K_INC) { + for (j = 0; j < J_MAX; j += J_INC) { + + p1 = p + ii + k + j; + p2 = p1 + p2offset; + +#if ENABLE_PREFETCH + if (j < (J_MAX - J_INC)) { + BDK_PREFETCH(p1 + J_INC, BDK_CACHE_LINE_SIZE); + BDK_PREFETCH(p2 + J_INC, BDK_CACHE_LINE_SIZE); + } +#endif + + // process entire cachelines in the innermost loop + for (i = 0; i < I_MAX; i += I_INC) { + + v = ((p1 + i) * pattern1) ^ pattern2; // FIXME: this should predict what we find...??? + d1 = __bdk_dram_read64(p1 + i); + d2 = __bdk_dram_read64(p2 + i); + + xor = ((d1 ^ v) | (d2 ^ v)) & datamask; // union of error bits only in active byte lanes + + if (!xor) + continue; + + // accumulate bad bits + bad_bits[0] |= xor; + //bad_bits[1] |= ~mpr_data1 & 0xffUL; // cannot do ECC here + + int bybit = 1; + uint64_t bymsk = 0xffULL; // start in byte lane 0 + while (xor != 0) { + debug_print("ERROR(%03d): [0x%016lX] [0x%016lX] expected 0x%016lX d1 %016lX d2 %016lX\n", + burst, p1, p2, v, d1, d2); + if (xor & bymsk) { // error(s) in this lane + errors |= bybit; // set the byte error bit + xor &= ~bymsk; // clear byte lane in error bits + datamask &= ~bymsk; // clear the byte lane in the mask +#if EXIT_WHEN_ALL_LANES_HAVE_ERRORS + if (datamask == 0) { // nothing left to do + return errors; // completely done when errors found in all byte lanes in datamask + } +#endif /* EXIT_WHEN_ALL_LANES_HAVE_ERRORS */ + } + bymsk <<= 8; // move mask into next byte lane + bybit <<= 1; // move bit into next byte position + } + } +#if ENABLE_WBIL2 + BDK_CACHE_WBI_L2(p1); + BDK_CACHE_WBI_L2(p2); +#endif + } + } + } /* for (ii = 0; ii < (1ULL << 31); ii += (1ULL << 29)) */ + + debug_print("N%d.LMC%d: dram_tuning_mem_xor: done TEST loop\n", + node, lmc); + + } /* for (int burst = 0; burst < dram_tune_use_bursts; burst++) */ + + if (xor_data != NULL) { // send the bad bits back... + xor_data[0] = bad_bits[0]; + xor_data[1] = bad_bits[1]; // let it be zeroed + } + +#if ENABLE_SBLKDTY + BDK_CSR_MODIFY(c, node, BDK_L2C_CTL, c.s.dissblkdty = 1); +#endif + + return errors; +} + +#undef II_INC +#undef II_MAX + +#define EXTRACT(v, lsb, width) (((v) >> (lsb)) & ((1ull << (width)) - 1)) +#define LMCNO(address, xbits) (EXTRACT(address, 7, xbits) ^ EXTRACT(address, 20, xbits) ^ EXTRACT(address, 12, xbits)) + +static int dram_tuning_mem_xor2(uint64_t p, uint64_t bitmask, int xbits) +{ + uint64_t p1, p2, d1, d2; + uint64_t v, vpred; + uint64_t p2offset = dram_tune_rank_offset; // FIXME? + uint64_t datamask; + uint64_t xor; + uint64_t ii; + uint64_t pattern1 = bdk_rng_get_random64(); + uint64_t pattern2 = 0; + int errors = 0; + int errs_by_lmc[4] = { 0,0,0,0 }; + int lmc; + uint64_t vbase, vincr; + + // Byte lanes may be clear in the mask to indicate no testing on that lane. + datamask = bitmask; + + /* Add offset to both test regions to not clobber boot stuff + * when running from L2 for NAND boot. + */ + p += AREA_BASE_OFFSET; // make sure base is out of the way of boot + + // move the multiplies outside the loop + vbase = p * pattern1; + vincr = 8 * pattern1; + +#define II_INC (1ULL << 3) +#define II_MAX (1ULL << 22) // stop where the core ID bits start + + // walk the memory areas by 8-byte words + v = vbase; + for (ii = 0; ii < II_MAX; ii += II_INC) { + + p1 = p + ii; + p2 = p1 + p2offset; + + __bdk_dram_write64(p1, v); + __bdk_dram_write64(p2, v); + + v += vincr; + } + + __bdk_dram_flush_to_mem_range(p , p + II_MAX); + __bdk_dram_flush_to_mem_range(p + p2offset, p + p2offset + II_MAX); + BDK_DCACHE_INVALIDATE; + + /* Make a series of passes over the memory areas. */ + + for (int burst = 0; burst < dram_tune_use_bursts; burst++) + { + uint64_t this_pattern = bdk_rng_get_random64(); + pattern2 ^= this_pattern; + + /* XOR the data with a random value, applying the change to both + * memory areas. + */ +#if 0 + BDK_PREFETCH(p , BDK_CACHE_LINE_SIZE); + BDK_PREFETCH(p + p2offset, BDK_CACHE_LINE_SIZE); +#endif + for (ii = 0; ii < II_MAX; ii += II_INC) { // FIXME? extend the range of memory tested!! + + p1 = p + ii; + p2 = p1 + p2offset; + + d1 = __bdk_dram_read64(p1) ^ this_pattern; + d2 = __bdk_dram_read64(p2) ^ this_pattern; + + __bdk_dram_write64(p1, d1); + __bdk_dram_write64(p2, d2); + + } + __bdk_dram_flush_to_mem_range(p , p + II_MAX); + __bdk_dram_flush_to_mem_range(p + p2offset, p + p2offset + II_MAX); + BDK_DCACHE_INVALIDATE; + + /* Look for differences in the areas. If there is a mismatch, reset + * both memory locations with the same pattern. Failing to do so + * means that on all subsequent passes the pair of locations remain + * out of sync giving spurious errors. + */ +#if 0 + BDK_PREFETCH(p , BDK_CACHE_LINE_SIZE); + BDK_PREFETCH(p + p2offset, BDK_CACHE_LINE_SIZE); +#endif + vpred = vbase; + for (ii = 0; ii < II_MAX; ii += II_INC) { + + p1 = p + ii; + p2 = p1 + p2offset; + + v = vpred ^ pattern2; // this should predict what we find... + d1 = __bdk_dram_read64(p1); + d2 = __bdk_dram_read64(p2); + vpred += vincr; + + xor = ((d1 ^ v) | (d2 ^ v)) & datamask; // union of error bits only in active byte lanes + if (!xor) // no errors + continue; + + lmc = LMCNO(p1, xbits); // FIXME: LMC should be SAME for p1 and p2!!! + if (lmc != (int)LMCNO(p2, xbits)) { + printf("ERROR: LMCs for addresses [0x%016lX] (%lld) and [0x%016lX] (%lld) differ!!!\n", + p1, LMCNO(p1, xbits), p2, LMCNO(p2, xbits)); + } + int bybit = 1; + uint64_t bymsk = 0xffULL; // start in byte lane 0 + while (xor != 0) { + debug_print("ERROR(%03d): [0x%016lX] [0x%016lX] expected 0x%016lX d1 %016lX d2 %016lX\n", + burst, p1, p2, v, d1, d2); + if (xor & bymsk) { // error(s) in this lane + errs_by_lmc[lmc] |= bybit; // set the byte error bit in the LMCs errors + errors |= bybit; // set the byte error bit + xor &= ~bymsk; // clear byte lane in error bits + //datamask &= ~bymsk; // clear the byte lane in the mask + } + bymsk <<= 8; // move mask into next byte lane + bybit <<= 1; // move bit into next byte position + } /* while (xor != 0) */ + } /* for (ii = 0; ii < II_MAX; ii += II_INC) */ + } /* for (int burst = 0; burst < dram_tune_use_bursts; burst++) */ + + // update the global LMC error states + for (lmc = 0; lmc < 4; lmc++) { + if (errs_by_lmc[lmc]) { + bdk_atomic_fetch_and_bset64_nosync(&test_dram_byte_lmc_errs[lmc], errs_by_lmc[lmc]); + } + } + + return errors; +} + +#if 0 +static int dram_tuning_mem_rows(uint64_t p, uint64_t bitmask) +{ + uint64_t p1, p2, d1, d2; + uint64_t v, v1; + uint64_t p2offset = dram_tune_rank_offset; // FIXME? + uint64_t datamask; + uint64_t xor; + int i, j, k, ii; + int errors = 0; + int index; + uint64_t pattern1 = 0; // FIXME: maybe this could be from a table? + uint64_t pattern2; + + // Byte lanes may be clear in the mask to indicate no testing on that lane. + datamask = bitmask; + + /* Add offset to both test regions to not clobber boot stuff + * when running from L2 for NAND boot. + */ + p += 0x10000000; // FIXME? was: 0x4000000; // make sure base is out of the way of cores for tuning + + pattern2 = pattern1; + for (k = 0; k < (1 << 20); k += (1 << 14)) { + for (j = 0; j < (1 << 12); j += (1 << 9)) { + for (i = 0; i < (1 << 7); i += 8) { + index = i + j + k; + p1 = p + index; + p2 = p1 + p2offset; + + v = pattern2; + v1 = v; // write the same thing to same slot in both cachelines + pattern2 = ~pattern2; // flip bits for next slots + + __bdk_dram_write64(p1, v); + __bdk_dram_write64(p2, v1); + } +#if 1 + BDK_CACHE_WBI_L2(p1); + BDK_CACHE_WBI_L2(p2); +#endif + } + } + +#if 0 + __bdk_dram_flush_to_mem_range(p, p + (1ULL << 20)); // max_addr is start + where k stops... + __bdk_dram_flush_to_mem_range(p + p2offset, p + p2offset + (1ULL << 20)); // max_addr is start + where k stops... +#endif + BDK_DCACHE_INVALIDATE; + + /* Make a series of passes over the memory areas. */ + + for (int burst = 0; burst < dram_tune_use_bursts; burst++) + { + /* just read and flip the bits applying the change to both + * memory areas. + */ + for (k = 0; k < (1 << 20); k += (1 << 14)) { + for (j = 0; j < (1 << 12); j += (1 << 9)) { + for (i = 0; i < (1 << 7); i += 8) { + index = i + j + k; + p1 = p + index; + p2 = p1 + p2offset; + + v = ~__bdk_dram_read64(p1); + v1 = ~__bdk_dram_read64(p2); + + __bdk_dram_write64(p1, v); + __bdk_dram_write64(p2, v1); + } +#if 1 + BDK_CACHE_WBI_L2(p1); + BDK_CACHE_WBI_L2(p2); +#endif + } + } + +#if 0 + __bdk_dram_flush_to_mem_range(p, p + (1ULL << 20)); // max_addr is start + where k stops... + __bdk_dram_flush_to_mem_range(p + p2offset, p + p2offset + (1ULL << 20)); // max_addr is start + where k stops... +#endif + BDK_DCACHE_INVALIDATE; + + /* Look for differences in the areas. If there is a mismatch, reset + * both memory locations with the same pattern. Failing to do so + * means that on all subsequent passes the pair of locations remain + * out of sync giving spurious errors. + */ + + // FIXME: change the loop order so that an entire cache line is compared at one time + // FIXME: this is so that a read error that occurs *anywhere* on the cacheline will be caught, + // FIXME: rather than comparing only 1 cacheline slot at a time, where an error on a different + // FIXME: slot will be missed that time around + // Does the above make sense? + + pattern2 = ~pattern1; // slots have been flipped by the above loop + + for (k = 0; k < (1 << 20); k += (1 << 14)) { + for (j = 0; j < (1 << 12); j += (1 << 9)) { + for (i = 0; i < (1 << 7); i += 8) { + index = i + j + k; + p1 = p + index; + p2 = p1 + p2offset; + + v = pattern2; // FIXME: this should predict what we find...??? + d1 = __bdk_dram_read64(p1); + d2 = __bdk_dram_read64(p2); + pattern2 = ~pattern2; // flip for next slot + + xor = ((d1 ^ v) | (d2 ^ v)) & datamask; // union of error bits only in active byte lanes + + int bybit = 1; + uint64_t bymsk = 0xffULL; // start in byte lane 0 + while (xor != 0) { + debug_print("ERROR(%03d): [0x%016lX] [0x%016lX] expected 0x%016lX d1 %016lX d2 %016lX\n", + burst, p1, p2, v, d1, d2); + if (xor & bymsk) { // error(s) in this lane + errors |= bybit; // set the byte error bit + xor &= ~bymsk; // clear byte lane in error bits + datamask &= ~bymsk; // clear the byte lane in the mask +#if EXIT_WHEN_ALL_LANES_HAVE_ERRORS + if (datamask == 0) { // nothing left to do + return errors; // completely done when errors found in all byte lanes in datamask + } +#endif /* EXIT_WHEN_ALL_LANES_HAVE_ERRORS */ + } + bymsk <<= 8; // move mask into next byte lane + bybit <<= 1; // move bit into next byte position + } + } + } + } + pattern1 = ~pattern1; // flip the starting pattern for the next burst + + } /* for (int burst = 0; burst < dram_tune_use_bursts; burst++) */ + return errors; +} +#endif + +// cores to use +#define DEFAULT_USE_CORES 44 // FIXME: was (1 << CORE_BITS) +int dram_tune_use_cores = DEFAULT_USE_CORES; // max cores to use, override available +int dram_tune_max_cores; // max cores available on a node +#define CORE_SHIFT 22 // FIXME: offset into rank_address passed to test_dram_byte + +typedef void (*__dram_tuning_thread_t)(int arg, void *arg1); + +typedef struct +{ + bdk_node_t node; + int64_t num_lmcs; + uint64_t byte_mask; +} test_dram_byte_info_t; + +static void dram_tuning_thread(int arg, void *arg1) +{ + test_dram_byte_info_t *test_info = arg1; + int core = arg; + uint64_t errs; + bdk_node_t node = test_info->node; + int num_lmcs, lmc; +#if 0 + num_lmcs = test_info->num_lmcs; + // map core numbers into hopefully equal groups per LMC + lmc = core % num_lmcs; +#else + // FIXME: this code should allow running all the cores on a single LMC... + // if incoming num_lmcs > 0, then use as normal; if < 0 remap to a single LMC + if (test_info->num_lmcs >= 0) { + num_lmcs = test_info->num_lmcs; + // map core numbers into hopefully equal groups per LMC + lmc = core % num_lmcs; + } else { + num_lmcs = 1; + // incoming num_lmcs is (desired LMC - 10) + lmc = 10 + test_info->num_lmcs; + } +#endif + uint64_t base_address = 0/* was: (lmc << 7); now done by callee */; + uint64_t bytemask = test_info->byte_mask; + + /* Figure out our work memory range. + * + * Note: base_address above just provides the physical offset which determines + * specific LMC portions of the address space and does not have the node bits set. + */ + //was: base_address = bdk_numa_get_address(node, base_address); // map to node // now done by callee + base_address |= (core << CORE_SHIFT); // FIXME: also put full core into address + if (dram_tune_dimm_offset) { // if multi-slot in some way, choose a DIMM for the core + base_address |= (core & (1 << (num_lmcs >> 1))) ? dram_tune_dimm_offset : 0; + } + + debug_print("Node %d, core %d, Testing area 1 at 0x%011lx, area 2 at 0x%011lx\n", + node, core, base_address + AREA_BASE_OFFSET, + base_address + AREA_BASE_OFFSET + dram_tune_rank_offset); + + errs = dram_tuning_mem_xor(node, lmc, base_address, bytemask, NULL); + //errs = dram_tuning_mem_rows(base_address, bytemask); + + /* Report that we're done */ + debug_print("Core %d on LMC %d node %d done with test_dram_byte with 0x%lx errs\n", + core, lmc, node, errs); + + if (errs) { + bdk_atomic_fetch_and_bset64_nosync(&test_dram_byte_threads_errs, errs); + bdk_atomic_fetch_and_bset64_nosync(&test_dram_byte_lmc_errs[lmc], errs); + } + + bdk_atomic_add64_nosync(&test_dram_byte_threads_done, 1); + + return; +} + +static void dram_tuning_thread2(int arg, void *arg1) +{ + test_dram_byte_info_t *test_info = arg1; + int core = arg; + uint64_t errs; + bdk_node_t node = test_info->node; + int num_lmcs = test_info->num_lmcs; + + uint64_t base_address = 0; // + uint64_t bytemask = test_info->byte_mask; + + /* Figure out our work memory range. + * + * Note: base_address above just provides the physical offset which determines + * specific portions of the address space and does not have the node bits set. + */ + base_address = bdk_numa_get_address(node, base_address); // map to node + base_address |= (core << CORE_SHIFT); // FIXME: also put full core into address + if (dram_tune_dimm_offset) { // if multi-slot in some way, choose a DIMM for the core + base_address |= (core & 1) ? dram_tune_dimm_offset : 0; + } + + debug_print("Node %d, core %d, Testing area 1 at 0x%011lx, area 2 at 0x%011lx\n", + node, core, base_address + AREA_BASE_OFFSET, + base_address + AREA_BASE_OFFSET + dram_tune_rank_offset); + + errs = dram_tuning_mem_xor2(base_address, bytemask, (num_lmcs >> 1)); // 4->2, 2->1, 1->0 + //errs = dram_tuning_mem_rows(base_address, bytemask); + + /* Report that we're done */ + debug_print("Core %d on LMC %d node %d done with test_dram_byte with 0x%lx errs\n", + core, lmc, node, errs); + + if (errs) { + bdk_atomic_fetch_and_bset64_nosync(&test_dram_byte_threads_errs, errs); + // FIXME: this will have been done already in the called test routine + //bdk_atomic_fetch_and_bset64_nosync(&test_dram_byte_lmc_errs[lmc], errs); + } + + bdk_atomic_add64_nosync(&test_dram_byte_threads_done, 1); + + return; +} + +static int dram_tune_use_xor2 = 1; // FIXME: do NOT default to original mem_xor (LMC-based) code + +static int +run_dram_tuning_threads(bdk_node_t node, int num_lmcs, uint64_t bytemask) +{ + test_dram_byte_info_t test_dram_byte_info; + test_dram_byte_info_t *test_info = &test_dram_byte_info; + int total_count = 0; + __dram_tuning_thread_t thread_p = (dram_tune_use_xor2) ? dram_tuning_thread2 : dram_tuning_thread; + + test_info->node = node; + test_info->num_lmcs = num_lmcs; + test_info->byte_mask = bytemask; + + // init some global data + bdk_atomic_set64(&test_dram_byte_threads_done, 0); + bdk_atomic_set64((int64_t *)&test_dram_byte_threads_errs, 0); + bdk_atomic_set64((int64_t *)&test_dram_byte_lmc_errs[0], 0); + bdk_atomic_set64((int64_t *)&test_dram_byte_lmc_errs[1], 0); + bdk_atomic_set64((int64_t *)&test_dram_byte_lmc_errs[2], 0); + bdk_atomic_set64((int64_t *)&test_dram_byte_lmc_errs[3], 0); + + /* Start threads for cores on the node */ + if (bdk_numa_exists(node)) { + debug_print("Starting %d threads for test_dram_byte\n", dram_tune_use_cores); + for (int core = 0; core < dram_tune_use_cores; core++) { + if (bdk_thread_create(node, 0, thread_p, core, (void *)test_info, 0)) { + bdk_error("Failed to create thread %d for test_dram_byte\n", core); + } else { + total_count++; + } + } + } + +#if 0 + /* Wait for threads to finish */ + while (bdk_atomic_get64(&test_dram_byte_threads_done) < total_count) + bdk_thread_yield(); +#else +#define TIMEOUT_SECS 5 // FIXME: long enough so a pass for a given setting will not print + /* Wait for threads to finish, with progress */ + int cur_count; + uint64_t cur_time; + uint64_t period = bdk_clock_get_rate(bdk_numa_local(), BDK_CLOCK_TIME) * TIMEOUT_SECS; // FIXME? + uint64_t timeout = bdk_clock_get_count(BDK_CLOCK_TIME) + period; + do { + bdk_thread_yield(); + cur_count = bdk_atomic_get64(&test_dram_byte_threads_done); + cur_time = bdk_clock_get_count(BDK_CLOCK_TIME); + if (cur_time >= timeout) { + printf("Waiting for %d cores\n", total_count - cur_count); + timeout = cur_time + period; + } + } while (cur_count < total_count); +#endif + + // NOTE: this is the summary of errors across all LMCs + return (int)bdk_atomic_get64((int64_t *)&test_dram_byte_threads_errs); +} + +/* These variables count the number of ECC errors. They should only be accessed atomically */ +extern int64_t __bdk_dram_ecc_single_bit_errors[]; +extern int64_t __bdk_dram_ecc_double_bit_errors[]; + +#if 0 +// make the tuning test callable as a standalone +int +bdk_run_dram_tuning_test(int node) +{ + int num_lmcs = __bdk_dram_get_num_lmc(node); + const char *s; + int lmc, byte; + int errors; + uint64_t start_dram_dclk[4], start_dram_ops[4]; + int save_use_bursts; + + // check for the cores on this node, abort if not more than 1 // FIXME? + dram_tune_max_cores = bdk_get_num_running_cores(node); + if (dram_tune_max_cores < 2) { + //bdk_init_cores(node, 0); + printf("N%d: ERROR: not enough cores to run the DRAM tuning test.\n", node); + return 0; + } + + // but use only a certain number of cores, at most what is available + if ((s = getenv("ddr_tune_use_cores")) != NULL) { + dram_tune_use_cores = strtoul(s, NULL, 0); + if (dram_tune_use_cores <= 0) // allow 0 or negative to mean all + dram_tune_use_cores = dram_tune_max_cores; + } + if (dram_tune_use_cores > dram_tune_max_cores) + dram_tune_use_cores = dram_tune_max_cores; + + // save the original bursts, so we can replace it with a better number for just testing + save_use_bursts = dram_tune_use_bursts; + dram_tune_use_bursts = 1500; // FIXME: hard code bursts for the test here... + + // allow override of the test repeats (bursts) per thread create + if ((s = getenv("ddr_tune_use_bursts")) != NULL) { + dram_tune_use_bursts = strtoul(s, NULL, 10); + } + + // allow override of the test mem_xor algorithm + if ((s = getenv("ddr_tune_use_xor2")) != NULL) { + dram_tune_use_xor2 = !!strtoul(s, NULL, 10); + } + + // FIXME? consult LMC0 only + BDK_CSR_INIT(lmcx_config, node, BDK_LMCX_CONFIG(0)); + if (lmcx_config.s.rank_ena) { // replace the default offset when there is more than 1 rank... + dram_tune_rank_offset = 1ull << (28 + lmcx_config.s.pbank_lsb - lmcx_config.s.rank_ena + (num_lmcs/2)); + ddr_print("N%d: run_dram_tuning_test: changing rank offset to 0x%lx\n", node, dram_tune_rank_offset); + } + if (lmcx_config.s.init_status & 0x0c) { // bit 2 or 3 set indicates 2 DIMMs + dram_tune_dimm_offset = 1ull << (28 + lmcx_config.s.pbank_lsb + (num_lmcs/2)); + ddr_print("N%d: run_dram_tuning_test: changing dimm offset to 0x%lx\n", node, dram_tune_dimm_offset); + } + int ddr_interface_64b = !lmcx_config.s.mode32b; + + // construct the bytemask + int bytes_todo = (ddr_interface_64b) ? 0xff : 0x0f; // FIXME: hack? + uint64_t bytemask = 0; + for (byte = 0; byte < 8; ++byte) { + uint64_t bitmask; + if (bytes_todo & (1 << byte)) { + bitmask = ((!ddr_interface_64b) && (byte == 4)) ? 0x0f: 0xff; + bytemask |= bitmask << (8*byte); // set the bytes bits in the bytemask + } + } /* for (byte = 0; byte < 8; ++byte) */ + + // print current working values + ddr_print("N%d: run_dram_tuning_test: max %d cores, use %d cores, use %d bursts.\n", + node, dram_tune_max_cores, dram_tune_use_cores, dram_tune_use_bursts); + + // do the setup on active LMCs + for (lmc = 0; lmc < num_lmcs; lmc++) { + // record start cycle CSRs here for utilization measure + start_dram_dclk[lmc] = BDK_CSR_READ(node, BDK_LMCX_DCLK_CNT(lmc)); + start_dram_ops[lmc] = BDK_CSR_READ(node, BDK_LMCX_OPS_CNT(lmc)); +#if 0 + bdk_atomic_set64(&__bdk_dram_ecc_single_bit_errors[lmc], 0); + bdk_atomic_set64(&__bdk_dram_ecc_double_bit_errors[lmc], 0); +#else + __bdk_dram_ecc_single_bit_errors[lmc] = 0; + __bdk_dram_ecc_double_bit_errors[lmc] = 0; +#endif + } /* for (lmc = 0; lmc < num_lmcs; lmc++) */ + + bdk_watchdog_poke(); + + // run the test(s) + // only 1 call should be enough, let the bursts, etc, control the load... + errors = run_dram_tuning_threads(node, num_lmcs, bytemask); + + /* Check ECC error counters after the test */ + int64_t ecc_single = 0; + int64_t ecc_double = 0; + int64_t ecc_single_errs[4]; + int64_t ecc_double_errs[4]; + + // finally, print the utilizations all together, and sum the ECC errors + for (lmc = 0; lmc < num_lmcs; lmc++) { + uint64_t dclk_diff = BDK_CSR_READ(node, BDK_LMCX_DCLK_CNT(lmc)) - start_dram_dclk[lmc]; + uint64_t ops_diff = BDK_CSR_READ(node, BDK_LMCX_OPS_CNT(lmc)) - start_dram_ops[lmc]; + uint64_t percent_x10 = ops_diff * 1000 / dclk_diff; + printf("N%d.LMC%d: ops %lu, cycles %lu, used %lu.%lu%%\n", + node, lmc, ops_diff, dclk_diff, percent_x10 / 10, percent_x10 % 10); + + ecc_single += (ecc_single_errs[lmc] = bdk_atomic_get64(&__bdk_dram_ecc_single_bit_errors[lmc])); + ecc_double += (ecc_double_errs[lmc] = bdk_atomic_get64(&__bdk_dram_ecc_double_bit_errors[lmc])); + } /* for (lmc = 0; lmc < num_lmcs; lmc++) */ + + /* Always print any ECC errors */ + if (ecc_single || ecc_double) { + printf("Test \"%s\": ECC errors, %ld/%ld/%ld/%ld corrected, %ld/%ld/%ld/%ld uncorrected\n", + "DRAM Tuning Test", + ecc_single_errs[0], ecc_single_errs[1], ecc_single_errs[2], ecc_single_errs[3], + ecc_double_errs[0], ecc_double_errs[1], ecc_double_errs[2], ecc_double_errs[3]); + } + if (errors || ecc_double || ecc_single) { + printf("Test \"%s\": FAIL: %ld single, %ld double, %d compare errors\n", + "DRAM Tuning Test", ecc_single, ecc_double, errors); + } + + // restore bursts + dram_tune_use_bursts = save_use_bursts; + + return (errors + ecc_double + ecc_single); +} +#endif /* 0 */ + +#define DEFAULT_SAMPLE_GRAN 3 // sample for errors every N offset values +#define MIN_BYTE_OFFSET -63 +#define MAX_BYTE_OFFSET +63 +int dram_tune_use_gran = DEFAULT_SAMPLE_GRAN; + +static int +auto_set_dll_offset(bdk_node_t node, int dll_offset_mode, + int num_lmcs, int ddr_interface_64b, + int do_tune) +{ + int byte_offset; + //unsigned short result[9]; + int byte; + int byte_delay_start[4][9]; + int byte_delay_count[4][9]; + uint64_t byte_delay_windows [4][9]; + int byte_delay_best_start[4][9]; + int byte_delay_best_count[4][9]; + //int this_rodt; + uint64_t ops_sum[4], dclk_sum[4]; + uint64_t start_dram_dclk[4], stop_dram_dclk[4]; + uint64_t start_dram_ops[4], stop_dram_ops[4]; + int errors, tot_errors; + int lmc; + char *mode_str = (dll_offset_mode == 2) ? "Read" : "Write"; + int mode_is_read = (dll_offset_mode == 2); + char *mode_blk = (dll_offset_mode == 2) ? " " : ""; + int start_offset, end_offset, incr_offset; + + int speed_bin = get_speed_bin(node, 0); // FIXME: just get from LMC0? + int low_risk_count = 0, needs_review_count = 0; + + if (dram_tune_use_gran != DEFAULT_SAMPLE_GRAN) { + ddr_print2("N%d: Changing sample granularity from %d to %d\n", + node, DEFAULT_SAMPLE_GRAN, dram_tune_use_gran); + } + // ensure sample is taken at 0 + start_offset = MIN_BYTE_OFFSET - (MIN_BYTE_OFFSET % dram_tune_use_gran); + end_offset = MAX_BYTE_OFFSET - (MAX_BYTE_OFFSET % dram_tune_use_gran); + incr_offset = dram_tune_use_gran; + + memset(ops_sum, 0, sizeof(ops_sum)); + memset(dclk_sum, 0, sizeof(dclk_sum)); + memset(byte_delay_start, 0, sizeof(byte_delay_start)); + memset(byte_delay_count, 0, sizeof(byte_delay_count)); + memset(byte_delay_windows, 0, sizeof(byte_delay_windows)); + memset(byte_delay_best_start, 0, sizeof(byte_delay_best_start)); + memset(byte_delay_best_count, 0, sizeof(byte_delay_best_count)); + + // FIXME? consult LMC0 only + BDK_CSR_INIT(lmcx_config, node, BDK_LMCX_CONFIG(0)); + if (lmcx_config.s.rank_ena) { // replace the default offset when there is more than 1 rank... + dram_tune_rank_offset = 1ull << (28 + lmcx_config.s.pbank_lsb - lmcx_config.s.rank_ena + (num_lmcs/2)); + ddr_print2("N%d: Tuning multiple ranks per DIMM (rank offset 0x%lx).\n", node, dram_tune_rank_offset); + } + if (lmcx_config.s.init_status & 0x0c) { // bit 2 or 3 set indicates 2 DIMMs + dram_tune_dimm_offset = 1ull << (28 + lmcx_config.s.pbank_lsb + (num_lmcs/2)); + ddr_print2("N%d: Tuning multiple DIMMs per channel (DIMM offset 0x%lx)\n", node, dram_tune_dimm_offset); + } + + // FIXME? do this for LMC0 only + //BDK_CSR_INIT(comp_ctl2, node, BDK_LMCX_COMP_CTL2(0)); + //this_rodt = comp_ctl2.s.rodt_ctl; + + // construct the bytemask + int bytes_todo = (ddr_interface_64b) ? 0xff : 0x0f; + uint64_t bytemask = 0; + for (byte = 0; byte < 8; ++byte) { + if (bytes_todo & (1 << byte)) { + bytemask |= 0xfful << (8*byte); // set the bytes bits in the bytemask + } + } /* for (byte = 0; byte < 8; ++byte) */ + + // now loop through selected legal values for the DLL byte offset... + + for (byte_offset = start_offset; byte_offset <= end_offset; byte_offset += incr_offset) { + + // do the setup on active LMCs + for (lmc = 0; lmc < num_lmcs; lmc++) { + change_dll_offset_enable(node, lmc, 0); + + // set all byte lanes at once + load_dll_offset(node, lmc, dll_offset_mode, byte_offset, 10 /* All bytes at once */); + // but then clear the ECC byte lane so it should be neutral for the test... + load_dll_offset(node, lmc, dll_offset_mode, 0, 8); + + change_dll_offset_enable(node, lmc, 1); + + // record start cycle CSRs here for utilization measure + start_dram_dclk[lmc] = BDK_CSR_READ(node, BDK_LMCX_DCLK_CNT(lmc)); + start_dram_ops[lmc] = BDK_CSR_READ(node, BDK_LMCX_OPS_CNT(lmc)); + } /* for (lmc = 0; lmc < num_lmcs; lmc++) */ + + bdk_watchdog_poke(); + + // run the test(s) + // only 1 call should be enough, let the bursts, etc, control the load... + tot_errors = run_dram_tuning_threads(node, num_lmcs, bytemask); + + for (lmc = 0; lmc < num_lmcs; lmc++) { + // record stop cycle CSRs here for utilization measure + stop_dram_dclk[lmc] = BDK_CSR_READ(node, BDK_LMCX_DCLK_CNT(lmc)); + stop_dram_ops[lmc] = BDK_CSR_READ(node, BDK_LMCX_OPS_CNT(lmc)); + + // accumulate... + ops_sum[lmc] += stop_dram_ops[lmc] - start_dram_ops[lmc]; + dclk_sum[lmc] += stop_dram_dclk[lmc] - start_dram_dclk[lmc]; + + errors = test_dram_byte_lmc_errs[lmc]; + + // check errors by byte, but not ECC + for (byte = 0; byte < 8; ++byte) { + if (!(bytes_todo & (1 << byte))) // is this byte lane to be done + continue; // no + + byte_delay_windows[lmc][byte] <<= 1; // always put in a zero + if (errors & (1 << byte)) { // yes, an error in this byte lane + byte_delay_count[lmc][byte] = 0; // stop now always + } else { // no error in this byte lane + if (byte_delay_count[lmc][byte] == 0) { // first success, set run start + byte_delay_start[lmc][byte] = byte_offset; + } + byte_delay_count[lmc][byte] += incr_offset; // bump run length + + if (byte_delay_count[lmc][byte] > byte_delay_best_count[lmc][byte]) { + byte_delay_best_count[lmc][byte] = byte_delay_count[lmc][byte]; + byte_delay_best_start[lmc][byte] = byte_delay_start[lmc][byte]; + } + byte_delay_windows[lmc][byte] |= 1ULL; // for pass, put in a 1 + } + } /* for (byte = 0; byte < 8; ++byte) */ + + // only print when there are errors and verbose... + if (errors) { + debug_print("DLL %s Offset Test %3d: errors 0x%x\n", + mode_str, byte_offset, errors); + } + } /* for (lmc = 0; lmc < num_lmcs; lmc++) */ + + } /* for (byte_offset=-63; byte_offset<63; byte_offset += incr_offset) */ + + // done with testing, load up and/or print out the offsets we found... + + // only when margining... + if (!do_tune) { + printf(" \n"); + printf("-------------------------------------\n"); +#if 0 + uint32_t mts_speed = (libdram_get_freq_from_pll(node, 0) * 2) / 1000000; // FIXME: sample LMC0 + printf("N%d: Starting %s Timing Margining for %d MT/s.\n", node, mode_str, mts_speed); +#else + printf("N%d: Starting %s Timing Margining.\n", node, mode_str); +#endif + printf(" \n"); + } /* if (!do_tune) */ + + for (lmc = 0; lmc < num_lmcs; lmc++) { +#if 1 + // FIXME FIXME + // FIXME: this just makes ECC always show 0 + byte_delay_best_start[lmc][8] = start_offset; + byte_delay_best_count[lmc][8] = end_offset - start_offset + incr_offset; +#endif + + // disable offsets while we load... + change_dll_offset_enable(node, lmc, 0); + + // only when margining... + if (!do_tune) { + // print the heading + printf(" \n"); + printf("N%d.LMC%d: %s Timing Margin %s : ", node, lmc, mode_str, mode_blk); + printf(" ECC/8 "); + for (byte = 7; byte >= 0; byte--) { + printf(" Byte %d ", byte); + } + printf("\n"); + } /* if (!do_tune) */ + + // print and load the offset values + // print the windows bit arrays + // only when margining... + if (!do_tune) { + printf("N%d.LMC%d: DLL %s Offset Amount %s : ", node, lmc, mode_str, mode_blk); + } else { + ddr_print("N%d.LMC%d: SW DLL %s Offset Amount %s : ", node, lmc, mode_str, mode_blk); + } + for (byte = 8; byte >= 0; --byte) { // print in "normal" reverse index order + + int count = byte_delay_best_count[lmc][byte]; + if (count == 0) + count = incr_offset; // should make non-tested ECC byte come out 0 + + byte_offset = byte_delay_best_start[lmc][byte] + + ((count - incr_offset) / 2); // adj by incr + + if (!do_tune) { // do counting and special flag if margining + int will_need_review = !is_low_risk_winlen(speed_bin, (count - incr_offset)) && + !is_low_risk_offset(speed_bin, byte_offset); + + printf("%10d%c", byte_offset, (will_need_review) ? '<' :' '); + + if (will_need_review) + needs_review_count++; + else + low_risk_count++; + } else { // if just tuning, make the printout less lengthy + ddr_print("%5d ", byte_offset); + } + + // FIXME? should we be able to override this? + if (mode_is_read) // for READ offsets, always store what we found + load_dll_offset(node, lmc, dll_offset_mode, byte_offset, byte); + else // for WRITE offsets, always store 0 + load_dll_offset(node, lmc, dll_offset_mode, 0, byte); + + } + if (!do_tune) { + printf("\n"); + } else { + ddr_print("\n"); + } + + + // re-enable the offsets now that we are done loading + change_dll_offset_enable(node, lmc, 1); + + // only when margining... + if (!do_tune) { + // print the window sizes + printf("N%d.LMC%d: DLL %s Window Length %s : ", node, lmc, mode_str, mode_blk); + for (byte = 8; byte >= 0; --byte) { // print in "normal" reverse index order + int count = byte_delay_best_count[lmc][byte]; + if (count == 0) + count = incr_offset; // should make non-tested ECC byte come out 0 + + // do this again since the "needs review" test is an AND... + byte_offset = byte_delay_best_start[lmc][byte] + + ((count - incr_offset) / 2); // adj by incr + + int will_need_review = !is_low_risk_winlen(speed_bin, (count - incr_offset)) && + !is_low_risk_offset(speed_bin, byte_offset); + + printf("%10d%c", count - incr_offset, (will_need_review) ? '<' :' '); + } + printf("\n"); + + // print the window extents + printf("N%d.LMC%d: DLL %s Window Bounds %s : ", node, lmc, mode_str, mode_blk); + for (byte = 8; byte >= 0; --byte) { // print in "normal" reverse index order + int start = byte_delay_best_start[lmc][byte]; + int count = byte_delay_best_count[lmc][byte]; + if (count == 0) + count = incr_offset; // should make non-tested ECC byte come out 0 + printf(" %3d to%3d ", start, + start + count - incr_offset); + } + printf("\n"); +#if 0 + // FIXME: should have a way to force these out... + // print the windows bit arrays + printf("N%d.LMC%d: DLL %s Window Bitmap%s : ", node, lmc, mode_str, mode_blk); + for (byte = 8; byte >= 0; --byte) { // print in "normal" reverse index order + printf("%010lx ", byte_delay_windows[lmc][byte]); + } + printf("\n"); +#endif + } /* if (!do_tune) */ + } /* for (lmc = 0; lmc < num_lmcs; lmc++) */ + + // only when margining... + if (!do_tune) { + // print the Summary line(s) here + printf(" \n"); + printf("N%d: %s Timing Margining Summary : %s ", node, mode_str, + (needs_review_count > 0) ? "Needs Review" : "Low Risk"); + if (needs_review_count > 0) + printf("(%d)", needs_review_count); + printf("\n"); + + // FIXME??? want to print here: "N0: %s Offsets have been applied already" + + printf("-------------------------------------\n"); + printf(" \n"); + } /* if (!do_tune) */ + + // FIXME: we probably want this only when doing verbose... + // finally, print the utilizations all together + for (lmc = 0; lmc < num_lmcs; lmc++) { + uint64_t percent_x10 = ops_sum[lmc] * 1000 / dclk_sum[lmc]; + ddr_print2("N%d.LMC%d: ops %lu, cycles %lu, used %lu.%lu%%\n", + node, lmc, ops_sum[lmc], dclk_sum[lmc], percent_x10 / 10, percent_x10 % 10); + } /* for (lmc = 0; lmc < num_lmcs; lmc++) */ + + // FIXME: only when verbose, or only when there are errors? + // run the test one last time + // print whether there are errors or not, but only when verbose... + bdk_watchdog_poke(); + debug_print("N%d: %s: Start running test one last time\n", node, __FUNCTION__); + tot_errors = run_dram_tuning_threads(node, num_lmcs, bytemask); + debug_print("N%d: %s: Finished running test one last time\n", node, __FUNCTION__); + if (tot_errors) + ddr_print2("%s Timing Final Test: errors 0x%x\n", mode_str, tot_errors); + + return (do_tune) ? tot_errors : !!(needs_review_count > 0); +} + +#define USE_L2_WAYS_LIMIT 0 // non-zero to enable L2 ways limiting + +/* + * Automatically adjust the DLL offset for the data bytes + */ +int perform_dll_offset_tuning(bdk_node_t node, int dll_offset_mode, int do_tune) +{ + int ddr_interface_64b; + int save_ecc_ena[4]; + bdk_lmcx_config_t lmc_config; + int lmc, num_lmcs = __bdk_dram_get_num_lmc(node); + const char *s; +#if USE_L2_WAYS_LIMIT + int ways, ways_print = 0; +#endif +#if 0 + int dram_tune_use_rodt = -1, save_rodt[4]; + bdk_lmcx_comp_ctl2_t comp_ctl2; +#endif + int loops = 1, loop; + uint64_t orig_coremask; + int errs = 0; + + // enable any non-running cores on this node + orig_coremask = bdk_get_running_coremask(node); + ddr_print4("N%d: %s: Starting cores (mask was 0x%lx)\n", + node, __FUNCTION__, orig_coremask); + bdk_init_cores(node, ~0ULL & ~orig_coremask); + dram_tune_max_cores = bdk_get_num_running_cores(node); + + // but use only a certain number of cores, at most what is available + if ((s = getenv("ddr_tune_use_cores")) != NULL) { + dram_tune_use_cores = strtoul(s, NULL, 0); + if (dram_tune_use_cores <= 0) // allow 0 or negative to mean all + dram_tune_use_cores = dram_tune_max_cores; + } + if (dram_tune_use_cores > dram_tune_max_cores) + dram_tune_use_cores = dram_tune_max_cores; + + // see if we want to do the tuning more than once per LMC... + if ((s = getenv("ddr_tune_use_loops"))) { + loops = strtoul(s, NULL, 0); + } + + // see if we want to change the granularity of the byte_offset sampling + if ((s = getenv("ddr_tune_use_gran"))) { + dram_tune_use_gran = strtoul(s, NULL, 0); + } + + // allow override of the test repeats (bursts) per thread create + if ((s = getenv("ddr_tune_use_bursts")) != NULL) { + dram_tune_use_bursts = strtoul(s, NULL, 10); + } + +#if 0 + // allow override of Read ODT setting just during the tuning run(s) + if ((s = getenv("ddr_tune_use_rodt")) != NULL) { + int temp = strtoul(s, NULL, 10); + // validity check + if (temp >= 0 && temp <= 7) + dram_tune_use_rodt = temp; + } +#endif + +#if 0 + // allow override of the test pattern + // FIXME: a bit simplistic... + if ((s = getenv("ddr_tune_use_pattern")) != NULL) { + int patno = strtoul(s, NULL, 10); + if (patno == 2) + dram_tune_test_pattern = test_pattern_2; + else if (patno == 3) + dram_tune_test_pattern = test_pattern_3; + else // all other values use default + dram_tune_test_pattern = test_pattern_1; + } +#endif + + // allow override of the test mem_xor algorithm + if ((s = getenv("ddr_tune_use_xor2")) != NULL) { + dram_tune_use_xor2 = !!strtoul(s, NULL, 10); + } + + // print current working values + ddr_print2("N%d: Tuning will use %d cores of max %d cores, and use %d repeats.\n", + node, dram_tune_use_cores, dram_tune_max_cores, + dram_tune_use_bursts); + +#if USE_L2_WAYS_LIMIT + // see if L2 ways are limited + if ((s = lookup_env_parameter("limit_l2_ways")) != NULL) { + ways = strtoul(s, NULL, 10); + ways_print = 1; + } else { + ways = bdk_l2c_get_num_assoc(node); + } +#endif + +#if 0 + // if RODT is to be overridden during tuning, note change + if (dram_tune_use_rodt >= 0) { + ddr_print("N%d: using RODT %d for tuning.\n", + node, dram_tune_use_rodt); + } +#endif + + // FIXME? get flag from LMC0 only + lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(0)); + ddr_interface_64b = !lmc_config.s.mode32b; + + // do setup for each active LMC + debug_print("N%d: %s: starting LMCs setup.\n", node, __FUNCTION__); + for (lmc = 0; lmc < num_lmcs; lmc++) { + +#if 0 + // if RODT change, save old and set new here... + if (dram_tune_use_rodt >= 0) { + comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(lmc)); + save_rodt[lmc] = comp_ctl2.s.rodt_ctl; + comp_ctl2.s.rodt_ctl = dram_tune_use_rodt; + DRAM_CSR_WRITE(node, BDK_LMCX_COMP_CTL2(lmc), comp_ctl2.u); + BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(lmc)); + } +#endif + /* Disable ECC for DRAM tests */ + lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(lmc)); + save_ecc_ena[lmc] = lmc_config.s.ecc_ena; + lmc_config.s.ecc_ena = 0; + DRAM_CSR_WRITE(node, BDK_LMCX_CONFIG(lmc), lmc_config.u); + lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(lmc)); + + } /* for (lmc = 0; lmc < num_lmcs; lmc++) */ + +#if USE_L2_WAYS_LIMIT + /* Disable l2 sets for DRAM testing */ + limit_l2_ways(node, 0, ways_print); +#endif + + // testing is done on all LMCs simultaneously + // FIXME: for now, loop here to show what happens multiple times + for (loop = 0; loop < loops; loop++) { + /* Perform DLL offset tuning */ + errs = auto_set_dll_offset(node, dll_offset_mode, num_lmcs, ddr_interface_64b, do_tune); + } + +#if USE_L2_WAYS_LIMIT + /* Restore the l2 set configuration */ + limit_l2_ways(node, ways, ways_print); +#endif + + // perform cleanup on all active LMCs + debug_print("N%d: %s: starting LMCs cleanup.\n", node, __FUNCTION__); + for (lmc = 0; lmc < num_lmcs; lmc++) { + + /* Restore ECC for DRAM tests */ + lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(lmc)); + lmc_config.s.ecc_ena = save_ecc_ena[lmc]; + DRAM_CSR_WRITE(node, BDK_LMCX_CONFIG(lmc), lmc_config.u); + lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(lmc)); +#if 0 + // if RODT change, restore old here... + if (dram_tune_use_rodt >= 0) { + comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(lmc)); + comp_ctl2.s.rodt_ctl = save_rodt[lmc]; + DRAM_CSR_WRITE(node, BDK_LMCX_COMP_CTL2(lmc), comp_ctl2.u); + BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(lmc)); + } +#endif + // finally, see if there are any read offset overrides after tuning + // FIXME: provide a way to do write offsets also?? + if (dll_offset_mode == 2) { + for (int by = 0; by < 9; by++) { + if ((s = lookup_env_parameter("ddr%d_tune_byte%d", lmc, by)) != NULL) { + int dllro = strtoul(s, NULL, 10); + change_dll_offset_enable(node, lmc, 0); + load_dll_offset(node, lmc, /* read */2, dllro, by); + change_dll_offset_enable(node, lmc, 1); + } + } + } + } /* for (lmc = 0; lmc < num_lmcs; lmc++) */ + + // finish up... + +#if 0 + // if RODT was overridden during tuning, note restore + if (dram_tune_use_rodt >= 0) { + ddr_print("N%d: restoring RODT %d after tuning.\n", + node, save_rodt[0]); // FIXME? use LMC0 + } +#endif + + // put any cores on this node, that were not running at the start, back into reset + uint64_t reset_coremask = bdk_get_running_coremask(node) & ~orig_coremask; + if (reset_coremask) { + ddr_print4("N%d: %s: Stopping cores 0x%lx\n", node, __FUNCTION__, + reset_coremask); + bdk_reset_cores(node, reset_coremask); + } else { + ddr_print4("N%d: %s: leaving cores set to 0x%lx\n", node, __FUNCTION__, + orig_coremask); + } + + return errs; + +} /* perform_dll_offset_tuning */ + +///////////////////////////////////////////////////////////////////////////////////////////// + +///// HW-assist byte DLL offset tuning ////// + +#if 1 +// setup defaults for byte test pattern array +// take these first two from the HRM section 6.9.13 +static const uint64_t byte_pattern_0[] = { + 0xFFAAFFFFFF55FFFFULL, // GP0 + 0x55555555AAAAAAAAULL, // GP1 + 0xAA55AAAAULL, // GP2 +}; +static const uint64_t byte_pattern_1[] = { + 0xFBF7EFDFBF7FFEFDULL, // GP0 + 0x0F1E3C78F0E1C387ULL, // GP1 + 0xF0E1BF7FULL, // GP2 +}; +// this is from Andrew via LFSR with PRBS=0xFFFFAAAA +static const uint64_t byte_pattern_2[] = { + 0xEE55AADDEE55AADDULL, // GP0 + 0x55AADDEE55AADDEEULL, // GP1 + 0x55EEULL, // GP2 +}; +// this is from Mike via LFSR with PRBS=0x4A519909 +static const uint64_t byte_pattern_3[] = { + 0x0088CCEE0088CCEEULL, // GP0 + 0xBB552211BB552211ULL, // GP1 + 0xBB00ULL, // GP2 +}; + +static const uint64_t *byte_patterns[] = { + byte_pattern_0, byte_pattern_1, byte_pattern_2, byte_pattern_3 // FIXME: use all we have +}; +#define NUM_BYTE_PATTERNS ((int)(sizeof(byte_patterns)/sizeof(uint64_t *))) + +#define DEFAULT_BYTE_BURSTS 32 // FIXME: this is what what the longest test usually has +int dram_tune_byte_bursts = DEFAULT_BYTE_BURSTS; +#endif + +static void +setup_hw_pattern(bdk_node_t node, int lmc, const uint64_t *pattern_p) +{ + /* + 3) Setup GENERAL_PURPOSE[0-2] registers with the data pattern of choice. + a. GENERAL_PURPOSE0[DATA<63:0>] – sets the initial lower (rising edge) 64 bits of data. + b. GENERAL_PURPOSE1[DATA<63:0>] – sets the initial upper (falling edge) 64 bits of data. + c. GENERAL_PURPOSE2[DATA<15:0>] – sets the initial lower (rising edge <7:0>) and upper + (falling edge <15:8>) ECC data. + */ + DRAM_CSR_WRITE(node, BDK_LMCX_GENERAL_PURPOSE0(lmc), pattern_p[0]); + DRAM_CSR_WRITE(node, BDK_LMCX_GENERAL_PURPOSE1(lmc), pattern_p[1]); + DRAM_CSR_WRITE(node, BDK_LMCX_GENERAL_PURPOSE2(lmc), pattern_p[2]); +} + +#define DEFAULT_PRBS 0xFFFFAAAAUL /* FIXME: maybe try 0x4A519909UL */ + +static void +setup_lfsr_pattern(bdk_node_t node, int lmc, uint64_t data) +{ + uint32_t prbs; + const char *s; + + if ((s = getenv("ddr_lfsr_prbs"))) { + prbs = strtoul(s, NULL, 0); + } else + prbs = DEFAULT_PRBS; // FIXME: from data arg? + + /* + 2) DBTRAIN_CTL[LFSR_PATTERN_SEL] = 1 + here data comes from the LFSR generating a PRBS pattern + CHAR_CTL.EN = 0 + CHAR_CTL.SEL = 0; // for PRBS + CHAR_CTL.DR = 1; + CHAR_CTL.PRBS = setup for whatever type of PRBS to send + CHAR_CTL.SKEW_ON = 1; + */ + BDK_CSR_INIT(char_ctl, node, BDK_LMCX_CHAR_CTL(lmc)); + char_ctl.s.en = 0; + char_ctl.s.sel = 0; + char_ctl.s.dr = 1; + char_ctl.s.prbs = prbs; + char_ctl.s.skew_on = 1; + DRAM_CSR_WRITE(node, BDK_LMCX_CHAR_CTL(lmc), char_ctl.u); +} + +int +choose_best_hw_patterns(bdk_node_t node, int lmc, int mode) +{ + int new_mode = mode; + const char *s; + + switch (mode) { + case DBTRAIN_TEST: // always choose LFSR if chip supports it + if (! CAVIUM_IS_MODEL(CAVIUM_CN88XX)) { + int lfsr_enable = 1; + if ((s = getenv("ddr_allow_lfsr"))) { // override? + lfsr_enable = !!strtoul(s, NULL, 0); + } + if (lfsr_enable) + new_mode = DBTRAIN_LFSR; + } + break; + case DBTRAIN_DBI: // possibly can allow LFSR use? + break; + case DBTRAIN_LFSR: // forced already + if (CAVIUM_IS_MODEL(CAVIUM_CN88XX)) { + ddr_print("ERROR: illegal HW assist mode %d\n", mode); + new_mode = DBTRAIN_TEST; + } + break; + default: + ddr_print("ERROR: unknown HW assist mode %d\n", mode); + } + + if (new_mode != mode) + VB_PRT(VBL_DEV2, "choose_best_hw_patterns: changing mode %d to %d\n", mode, new_mode); + + return new_mode; +} + +int +run_best_hw_patterns(bdk_node_t node, int lmc, uint64_t phys_addr, + int mode, uint64_t *xor_data) +{ + int pattern; + const uint64_t *pattern_p; + int errs, errors = 0; + + // FIXME? always choose LFSR if chip supports it??? + mode = choose_best_hw_patterns(node, lmc, mode); + + if (mode == DBTRAIN_LFSR) { + setup_lfsr_pattern(node, lmc, 0); + errors = test_dram_byte_hw(node, lmc, phys_addr, mode, xor_data); + VB_PRT(VBL_DEV2, "%s: LFSR at A:0x%012lx errors 0x%x\n", + __FUNCTION__, phys_addr, errors); + } else { + for (pattern = 0; pattern < NUM_BYTE_PATTERNS; pattern++) { + pattern_p = byte_patterns[pattern]; + setup_hw_pattern(node, lmc, pattern_p); + + errs = test_dram_byte_hw(node, lmc, phys_addr, mode, xor_data); + + VB_PRT(VBL_DEV2, "%s: PATTERN %d at A:0x%012lx errors 0x%x\n", + __FUNCTION__, pattern, phys_addr, errs); + + errors |= errs; + } /* for (pattern = 0; pattern < NUM_BYTE_PATTERNS; pattern++) */ + } + return errors; +} + +static void +hw_assist_test_dll_offset(bdk_node_t node, int dll_offset_mode, + int lmc, int bytelane) +{ + int byte_offset, new_best_offset[9]; + int rank_delay_start[4][9]; + int rank_delay_count[4][9]; + int rank_delay_best_start[4][9]; + int rank_delay_best_count[4][9]; + int errors[4], off_errors, tot_errors; + int num_lmcs = __bdk_dram_get_num_lmc(node); + int rank_mask, rankx, active_ranks; + int pattern; + const uint64_t *pattern_p; + int byte; + char *mode_str = (dll_offset_mode == 2) ? "Read" : "Write"; + int pat_best_offset[9]; + uint64_t phys_addr; + int pat_beg, pat_end; + int rank_beg, rank_end; + int byte_lo, byte_hi; + uint64_t hw_rank_offset; + // FIXME? always choose LFSR if chip supports it??? + int mode = choose_best_hw_patterns(node, lmc, DBTRAIN_TEST); + + if (bytelane == 0x0A) { // all bytelanes + byte_lo = 0; + byte_hi = 8; + } else { // just 1 + byte_lo = byte_hi = bytelane; + } + + BDK_CSR_INIT(lmcx_config, node, BDK_LMCX_CONFIG(lmc)); + rank_mask = lmcx_config.s.init_status; + // this should be correct for 1 or 2 ranks, 1 or 2 DIMMs + hw_rank_offset = 1ull << (28 + lmcx_config.s.pbank_lsb - lmcx_config.s.rank_ena + (num_lmcs/2)); + + debug_print("N%d: %s: starting LMC%d with rank offset 0x%lx\n", + node, __FUNCTION__, lmc, hw_rank_offset); + + // start of pattern loop + // we do the set of tests for each pattern supplied... + + memset(new_best_offset, 0, sizeof(new_best_offset)); + for (pattern = 0; pattern < NUM_BYTE_PATTERNS; pattern++) { + + memset(pat_best_offset, 0, sizeof(pat_best_offset)); + + if (mode == DBTRAIN_TEST) { + pattern_p = byte_patterns[pattern]; + setup_hw_pattern(node, lmc, pattern_p); + } else { + setup_lfsr_pattern(node, lmc, 0); + } + + // now loop through all legal values for the DLL byte offset... + +#define BYTE_OFFSET_INCR 3 // FIXME: make this tunable? + + tot_errors = 0; + + memset(rank_delay_count, 0, sizeof(rank_delay_count)); + memset(rank_delay_start, 0, sizeof(rank_delay_start)); + memset(rank_delay_best_count, 0, sizeof(rank_delay_best_count)); + memset(rank_delay_best_start, 0, sizeof(rank_delay_best_start)); + + for (byte_offset = -63; byte_offset < 64; byte_offset += BYTE_OFFSET_INCR) { + + // do the setup on the active LMC + // set the bytelanes DLL offsets + change_dll_offset_enable(node, lmc, 0); + load_dll_offset(node, lmc, dll_offset_mode, byte_offset, bytelane); // FIXME? bytelane? + change_dll_offset_enable(node, lmc, 1); + + bdk_watchdog_poke(); + + // run the test on each rank + // only 1 call per rank should be enough, let the bursts, loops, etc, control the load... + + off_errors = 0; // errors for this byte_offset, all ranks + + active_ranks = 0; + + for (rankx = 0; rankx < 4; rankx++) { + if (!(rank_mask & (1 << rankx))) + continue; + + phys_addr = hw_rank_offset * active_ranks; + // FIXME: now done by test_dram_byte_hw() + //phys_addr |= (lmc << 7); + //phys_addr = bdk_numa_get_address(node, phys_addr); // map to node + + active_ranks++; + + // NOTE: return is a now a bitmask of the erroring bytelanes.. + errors[rankx] = test_dram_byte_hw(node, lmc, phys_addr, mode, NULL); + + for (byte = byte_lo; byte <= byte_hi; byte++) { // do bytelane(s) + + // check errors + if (errors[rankx] & (1 << byte)) { // yes, an error in the byte lane in this rank + off_errors |= (1 << byte); + + ddr_print5("N%d.LMC%d.R%d: Bytelane %d DLL %s Offset Test %3d: Address 0x%012lx errors 0x%x\n", + node, lmc, rankx, bytelane, mode_str, + byte_offset, phys_addr, errors[rankx]); + + if (rank_delay_count[rankx][byte] > 0) { // had started run + ddr_print5("N%d.LMC%d.R%d: Bytelane %d DLL %s Offset Test %3d: stopping a run here\n", + node, lmc, rankx, bytelane, mode_str, byte_offset); + rank_delay_count[rankx][byte] = 0; // stop now + } + // FIXME: else had not started run - nothing else to do? + } else { // no error in the byte lane + if (rank_delay_count[rankx][byte] == 0) { // first success, set run start + ddr_print5("N%d.LMC%d.R%d: Bytelane %d DLL %s Offset Test %3d: starting a run here\n", + node, lmc, rankx, bytelane, mode_str, byte_offset); + rank_delay_start[rankx][byte] = byte_offset; + } + rank_delay_count[rankx][byte] += BYTE_OFFSET_INCR; // bump run length + + // is this now the biggest window? + if (rank_delay_count[rankx][byte] > rank_delay_best_count[rankx][byte]) { + rank_delay_best_count[rankx][byte] = rank_delay_count[rankx][byte]; + rank_delay_best_start[rankx][byte] = rank_delay_start[rankx][byte]; + debug_print("N%d.LMC%d.R%d: Bytelane %d DLL %s Offset Test %3d: updating best to %d/%d\n", + node, lmc, rankx, bytelane, mode_str, byte_offset, + rank_delay_best_start[rankx][byte], rank_delay_best_count[rankx][byte]); + } + } + } /* for (byte = byte_lo; byte <= byte_hi; byte++) */ + } /* for (rankx = 0; rankx < 4; rankx++) */ + + tot_errors |= off_errors; + + } /* for (byte_offset = -63; byte_offset < 64; byte_offset += BYTE_OFFSET_INCR) */ + + // now choose the best byte_offsets for this pattern according to the best windows of the tested ranks + // calculate offset by constructing an average window from the rank windows + for (byte = byte_lo; byte <= byte_hi; byte++) { + + pat_beg = -999; + pat_end = 999; + + for (rankx = 0; rankx < 4; rankx++) { + if (!(rank_mask & (1 << rankx))) + continue; + + rank_beg = rank_delay_best_start[rankx][byte]; + pat_beg = max(pat_beg, rank_beg); + rank_end = rank_beg + rank_delay_best_count[rankx][byte] - BYTE_OFFSET_INCR; + pat_end = min(pat_end, rank_end); + + ddr_print5("N%d.LMC%d.R%d: Bytelane %d DLL %s Offset Test: Rank Window %3d:%3d\n", + node, lmc, rankx, bytelane, mode_str, rank_beg, rank_end); + + } /* for (rankx = 0; rankx < 4; rankx++) */ + + pat_best_offset[byte] = (pat_end + pat_beg) / 2; + ddr_print4("N%d.LMC%d: Bytelane %d DLL %s Offset Test: Pattern %d Average %3d\n", + node, lmc, byte, mode_str, pattern, pat_best_offset[byte]); + +#if 0 + // FIXME: next print the window counts + sprintf(sbuffer, "N%d.LMC%d Pattern %d: DLL %s Offset Count ", + node, lmc, pattern, mode_str); + printf("%-45s : ", sbuffer); + printf(" %3d", byte_delay_best_count); + printf("\n"); +#endif + + new_best_offset[byte] += pat_best_offset[byte]; // sum the pattern averages + } /* for (byte = byte_lo; byte <= byte_hi; byte++) */ + } /* for (pattern = 0; pattern < NUM_BYTE_PATTERNS; pattern++) */ + // end of pattern loop + + ddr_print("N%d.LMC%d: HW DLL %s Offset Amount : ", + node, lmc, mode_str); + + for (byte = byte_hi; byte >= byte_lo; --byte) { // print in decending byte index order + new_best_offset[byte] = divide_nint(new_best_offset[byte], NUM_BYTE_PATTERNS); // create the new average NINT + + // print the best offsets from all patterns + + if (bytelane == 0x0A) // print just the offset of all the bytes + ddr_print("%5d ", new_best_offset[byte]); + else + ddr_print("(byte %d) %5d ", byte, new_best_offset[byte]); + + +#if 1 + // done with testing, load up the best offsets we found... + change_dll_offset_enable(node, lmc, 0); // disable offsets while we load... + load_dll_offset(node, lmc, dll_offset_mode, new_best_offset[byte], byte); + change_dll_offset_enable(node, lmc, 1); // re-enable the offsets now that we are done loading +#endif + } /* for (byte = byte_hi; byte >= byte_lo; --byte) */ + + ddr_print("\n"); + +#if 0 + // run the test one last time + // print whether there are errors or not, but only when verbose... + tot_errors = run_test_dram_byte_threads(node, num_lmcs, bytemask); + printf("N%d.LMC%d: Bytelane %d DLL %s Offset Final Test: errors 0x%x\n", + node, lmc, bytelane, mode_str, tot_errors); +#endif +} + +/* + * Automatically adjust the DLL offset for the selected bytelane using hardware-assist + */ +int perform_HW_dll_offset_tuning(bdk_node_t node, int dll_offset_mode, int bytelane) +{ + int save_ecc_ena[4]; + bdk_lmcx_config_t lmc_config; + int lmc, num_lmcs = __bdk_dram_get_num_lmc(node); + const char *s; + //bdk_lmcx_comp_ctl2_t comp_ctl2; + int loops = 1, loop; + + // see if we want to do the tuning more than once per LMC... + if ((s = getenv("ddr_tune_ecc_loops"))) { + loops = strtoul(s, NULL, 0); + } + + // allow override of the test repeats (bursts) + if ((s = getenv("ddr_tune_byte_bursts")) != NULL) { + dram_tune_byte_bursts = strtoul(s, NULL, 10); + } + + // print current working values + ddr_print2("N%d: H/W Tuning for bytelane %d will use %d loops, %d bursts, and %d patterns.\n", + node, bytelane, loops, dram_tune_byte_bursts, + NUM_BYTE_PATTERNS); + + // FIXME? get flag from LMC0 only + lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(0)); + + // do once for each active LMC + + for (lmc = 0; lmc < num_lmcs; lmc++) { + + ddr_print4("N%d: H/W Tuning: starting LMC%d bytelane %d tune.\n", node, lmc, bytelane); + + /* Enable ECC for the HW tests */ + // NOTE: we do enable ECC, but the HW tests used will not generate "visible" errors + lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(lmc)); + save_ecc_ena[lmc] = lmc_config.s.ecc_ena; + lmc_config.s.ecc_ena = 1; + DRAM_CSR_WRITE(node, BDK_LMCX_CONFIG(lmc), lmc_config.u); + lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(lmc)); + + // testing is done on a single LMC at a time + // FIXME: for now, loop here to show what happens multiple times + for (loop = 0; loop < loops; loop++) { + /* Perform DLL offset tuning */ + //auto_set_dll_offset(node, 1 /* 1=write */, lmc, bytelane); + hw_assist_test_dll_offset(node, 2 /* 2=read */, lmc, bytelane); + } + + // perform cleanup on active LMC + ddr_print4("N%d: H/W Tuning: finishing LMC%d bytelane %d tune.\n", node, lmc, bytelane); + + /* Restore ECC for DRAM tests */ + lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(lmc)); + lmc_config.s.ecc_ena = save_ecc_ena[lmc]; + DRAM_CSR_WRITE(node, BDK_LMCX_CONFIG(lmc), lmc_config.u); + lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(lmc)); + + // finally, see if there are any read offset overrides after tuning + for (int by = 0; by < 9; by++) { + if ((s = lookup_env_parameter("ddr%d_tune_byte%d", lmc, by)) != NULL) { + int dllro = strtoul(s, NULL, 10); + change_dll_offset_enable(node, lmc, 0); + load_dll_offset(node, lmc, 2 /* 2=read */, dllro, by); + change_dll_offset_enable(node, lmc, 1); + } + } + + } /* for (lmc = 0; lmc < num_lmcs; lmc++) */ + + // finish up... + + return 0; + +} /* perform_HW_dll_offset_tuning */ diff --git a/src/vendorcode/cavium/bdk/libdram/dram-util.h b/src/vendorcode/cavium/bdk/libdram/dram-util.h new file mode 100644 index 0000000000..f8ab6c1552 --- /dev/null +++ b/src/vendorcode/cavium/bdk/libdram/dram-util.h @@ -0,0 +1,96 @@ +/***********************license start*********************************** +* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights +* reserved. +* +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are +* met: +* +* * Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* +* * Redistributions in binary form must reproduce the above +* copyright notice, this list of conditions and the following +* disclaimer in the documentation and/or other materials provided +* with the distribution. +* +* * Neither the name of Cavium Inc. nor the names of +* its contributors may be used to endorse or promote products +* derived from this software without specific prior written +* permission. +* +* This Software, including technical data, may be subject to U.S. export +* control laws, including the U.S. Export Administration Act and its +* associated regulations, and may be subject to export or import +* regulations in other countries. +* +* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS" +* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR +* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT +* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY +* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT +* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES +* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR +* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT, +* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK +* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU. +***********************license end**************************************/ + +/** + * Small utility functions for use by libdram internally. These + * are not meant for users's of the libdram API. + */ + +/** + * Standard min(a,b) macro + */ +#define min(X, Y) \ + ({ typeof (X) __x = (X); \ + typeof (Y) __y = (Y); \ + (__x < __y) ? __x : __y; }) + +/** + * Standard max(a,b) macro + */ +#define max(X, Y) \ + ({ typeof (X) __x = (X); typeof(Y) __y = (Y); \ + (__x > __y) ? __x : __y; }) + +/** + * Absolute value of an integer + * + * @param v + * + * @return + */ +static inline int64_t _abs(int64_t v) +{ + return (v < 0) ? -v : v; +} + +/** + * Sign of an integer + * + * @param v + * + * @return + */ +static inline int64_t _sign(int64_t v) +{ + return v < 0; +} + +/** + * Divide and round results up to the next higher integer. + * + * @param dividend + * @param divisor + * + * @return + */ +static inline uint64_t divide_roundup(uint64_t dividend, uint64_t divisor) +{ + return (dividend + divisor - 1) / divisor; +} + diff --git a/src/vendorcode/cavium/bdk/libdram/lib_octeon_shared.c b/src/vendorcode/cavium/bdk/libdram/lib_octeon_shared.c new file mode 100644 index 0000000000..cdc799744f --- /dev/null +++ b/src/vendorcode/cavium/bdk/libdram/lib_octeon_shared.c @@ -0,0 +1,2165 @@ +/***********************license start*********************************** +* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights +* reserved. +* +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are +* met: +* +* * Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* +* * Redistributions in binary form must reproduce the above +* copyright notice, this list of conditions and the following +* disclaimer in the documentation and/or other materials provided +* with the distribution. +* +* * Neither the name of Cavium Inc. nor the names of +* its contributors may be used to endorse or promote products +* derived from this software without specific prior written +* permission. +* +* This Software, including technical data, may be subject to U.S. export +* control laws, including the U.S. Export Administration Act and its +* associated regulations, and may be subject to export or import +* regulations in other countries. +* +* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS" +* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR +* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT +* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY +* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT +* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES +* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR +* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT, +* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK +* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU. +***********************license end**************************************/ + +/* $Revision: 102369 $ */ + +#include <bdk.h> +#include "libbdk-arch/bdk-csrs-l2c.h" +#include "dram-internal.h" + +/* Define DDR_DEBUG to debug the DDR interface. This also enables the +** output necessary for review by Cavium Inc., Inc. */ +/* #define DDR_DEBUG */ + + +static int global_ddr_clock_initialized = 0; +static int global_ddr_memory_preserved = 0; + +#if 1 +uint64_t max_p1 = 0UL; +#endif + +/* + * SDRAM Physical Address (figure 6-2 from the HRM) + * 7 6 3 2 0 + * +---------+----+----------------------+---------------+--------+---+------+-----+ + * | Dimm |Rank| Row | Col | Bank | C | Col | Bus | + * +---------+----+----------------------+---------------+--------+---+------+-----+ + * | ^ | | ^ | | | + * 0 or 1 | | 12-18 bits | 6-8 bits | 1 or 2 bits + * bit | 0 or 1 bit LMC_CONFIG[ROW_LSB]+X | (X=1 or 2, resp) + * | | + * LMC_CONFIG[PBANK_LSB]+X 3 or 4 bits + * + * Bus = Selects the byte on the 72-bit DDR3 bus + * Col = Column Address for the memory part (10-12 bits) + * C = Selects the LMC that services the reference + * (2 bits for 4 LMC mode, 1 bit for 2 LMC mode; X=width) + * Bank = Bank Address for the memory part (DDR3=3 bits, DDR4=3 or 4 bits) + * Row = Row Address for the memory part (12-18 bits) + * Rank = Optional Rank Address for dual-rank DIMMs + * (present when LMC_CONFIG[RANK_ENA] is set) + * Dimm = Optional DIMM address (preseent with more than 1 DIMM) + */ + + +/** + * Divide and round results to the nearest integer. + * + * @param dividend + * @param divisor + * + * @return + */ +uint64_t divide_nint(uint64_t dividend, uint64_t divisor) +{ + uint64_t quotent, remainder; + quotent = dividend / divisor; + remainder = dividend % divisor; + return quotent + ((remainder * 2) >= divisor); +} + +/* Sometimes the pass/fail results for all possible delay settings + * determined by the read-leveling sequence is too forgiving. This + * usually occurs for DCLK speeds below 300 MHz. As a result the + * passing range is exaggerated. This function accepts the bitmask + * results from the sequence and truncates the passing range to a + * reasonable range and recomputes the proper deskew setting. + */ + +/* Default ODT config must disable ODT */ +/* Must be const (read only) so that the structure is in flash */ +const dimm_odt_config_t disable_odt_config[] = { + /* DDR4 needs an additional field in the struct (odt_mask2) */ + /* DIMMS ODT_ENA ODT_MASK ODT_MASK1 ODT_MASK2 QS_DIC RODT_CTL */ + /* ===== ======= ======== ========= ========= ====== ======== */ + /* 1 */ { 0, 0x0000, {.u = 0x0000}, {.u = 0x0000}, 0, 0x0000 }, + /* 2 */ { 0, 0x0000, {.u = 0x0000}, {.u = 0x0000}, 0, 0x0000 }, + /* 3 */ { 0, 0x0000, {.u = 0x0000}, {.u = 0x0000}, 0, 0x0000 }, + /* 4 */ { 0, 0x0000, {.u = 0x0000}, {.u = 0x0000}, 0, 0x0000 }, +}; +/* Memory controller setup function */ +static int init_octeon_dram_interface(bdk_node_t node, + const ddr_configuration_t *ddr_configuration, + uint32_t ddr_hertz, + uint32_t cpu_hertz, + uint32_t ddr_ref_hertz, + int board_type, + int board_rev_maj, + int board_rev_min, + int ddr_interface_num, + uint32_t ddr_interface_mask) +{ + uint32_t mem_size_mbytes = 0; + int lmc_restart_retries = 0; + + const char *s; + if ((s = lookup_env_parameter("ddr_timing_hertz")) != NULL) + ddr_hertz = strtoul(s, NULL, 0); + + restart_lmc_init: + + /* Poke the watchdog timer so it doesn't expire during DRAM init */ + bdk_watchdog_poke(); + + mem_size_mbytes = init_octeon3_ddr3_interface(node, + ddr_configuration, + ddr_hertz, + cpu_hertz, + ddr_ref_hertz, + board_type, + board_rev_maj, + board_rev_min, + ddr_interface_num, + ddr_interface_mask); +#define DEFAULT_RESTART_RETRIES 3 + if (mem_size_mbytes == 0) { // means restart is possible + if (lmc_restart_retries < DEFAULT_RESTART_RETRIES) { + lmc_restart_retries++; + ddr_print("N%d.LMC%d Configuration problem: attempting LMC reset and init restart %d\n", + node, ddr_interface_num, lmc_restart_retries); + // re-assert RESET first, as that is the assumption of the init code + if (!ddr_memory_preserved(node)) + cn88xx_lmc_ddr3_reset(node, ddr_interface_num, LMC_DDR3_RESET_ASSERT); + goto restart_lmc_init; + } else { + error_print("INFO: N%d.LMC%d Configuration: fatal problem remains after %d LMC init retries - Resetting node...\n", + node, ddr_interface_num, lmc_restart_retries); + bdk_wait_usec(500000); + bdk_reset_chip(node); + } + } + + error_print("N%d.LMC%d Configuration Completed: %d MB\n", + node, ddr_interface_num, mem_size_mbytes); + return mem_size_mbytes; +} + +#define DO_LIKE_RANDOM_XOR 1 + +#if !DO_LIKE_RANDOM_XOR +/* + * Suggested testing patterns. + * + * 0xFFFF_FFFF_FFFF_FFFF + * 0xAAAA_AAAA_AAAA_AAAA + * 0xFFFF_FFFF_FFFF_FFFF + * 0xAAAA_AAAA_AAAA_AAAA + * 0x5555_5555_5555_5555 + * 0xAAAA_AAAA_AAAA_AAAA + * 0xFFFF_FFFF_FFFF_FFFF + * 0xAAAA_AAAA_AAAA_AAAA + * 0xFFFF_FFFF_FFFF_FFFF + * 0x5555_5555_5555_5555 + * 0xFFFF_FFFF_FFFF_FFFF + * 0x5555_5555_5555_5555 + * 0xAAAA_AAAA_AAAA_AAAA + * 0x5555_5555_5555_5555 + * 0xFFFF_FFFF_FFFF_FFFF + * 0x5555_5555_5555_5555 + * + * or possibly + * + * 0xFDFD_FDFD_FDFD_FDFD + * 0x8787_8787_8787_8787 + * 0xFEFE_FEFE_FEFE_FEFE + * 0xC3C3_C3C3_C3C3_C3C3 + * 0x7F7F_7F7F_7F7F_7F7F + * 0xE1E1_E1E1_E1E1_E1E1 + * 0xBFBF_BFBF_BFBF_BFBF + * 0xF0F0_F0F0_F0F0_F0F0 + * 0xDFDF_DFDF_DFDF_DFDF + * 0x7878_7878_7878_7878 + * 0xEFEF_EFEF_EFEF_EFEF + * 0x3C3C_3C3C_3C3C_3C3C + * 0xF7F7_F7F7_F7F7_F7F7 + * 0x1E1E_1E1E_1E1E_1E1E + * 0xFBFB_FBFB_FBFB_FBFB + * 0x0F0F_0F0F_0F0F_0F0F + */ + +static const uint64_t test_pattern[] = { + 0xAAAAAAAAAAAAAAAAULL, + 0x5555555555555555ULL, + 0xAAAAAAAAAAAAAAAAULL, + 0x5555555555555555ULL, + 0xAAAAAAAAAAAAAAAAULL, + 0x5555555555555555ULL, + 0xAAAAAAAAAAAAAAAAULL, + 0x5555555555555555ULL, + 0xAAAAAAAAAAAAAAAAULL, + 0x5555555555555555ULL, + 0xAAAAAAAAAAAAAAAAULL, + 0x5555555555555555ULL, + 0xAAAAAAAAAAAAAAAAULL, + 0x5555555555555555ULL, + 0xAAAAAAAAAAAAAAAAULL, + 0x5555555555555555ULL, + 0xAAAAAAAAAAAAAAAAULL, + 0x5555555555555555ULL, + 0xAAAAAAAAAAAAAAAAULL, + 0x5555555555555555ULL, + 0xAAAAAAAAAAAAAAAAULL, + 0x5555555555555555ULL, + 0xAAAAAAAAAAAAAAAAULL, + 0x5555555555555555ULL, + 0xAAAAAAAAAAAAAAAAULL, + 0x5555555555555555ULL, + 0xAAAAAAAAAAAAAAAAULL, + 0x5555555555555555ULL, + 0xAAAAAAAAAAAAAAAAULL, + 0x5555555555555555ULL, + 0xAAAAAAAAAAAAAAAAULL, + 0x5555555555555555ULL, +}; +#endif /* !DO_LIKE_RANDOM_XOR */ + +int test_dram_byte(bdk_node_t node, int lmc, uint64_t p, uint64_t bitmask, uint64_t *xor_data) +{ + uint64_t p1, p2, d1, d2; + uint64_t v, v1; + uint64_t p2offset = 0x10000000; + uint64_t datamask; + uint64_t xor; + int i, j, k; + int errors = 0; + int index; +#if DO_LIKE_RANDOM_XOR + uint64_t pattern1 = bdk_rng_get_random64(); + uint64_t this_pattern; +#endif + uint64_t bad_bits[2] = {0,0}; + + // When doing in parallel, the caller must provide full 8-byte bitmask. + // Byte lanes may be clear in the mask to indicate no testing on that lane. + datamask = bitmask; + + // final address must include LMC and node + p |= (lmc<<7); /* Map address into proper interface */ + p = bdk_numa_get_address(node, p); /* Map to node */ + + // Not on THUNDER: p |= 1ull<<63; + + /* Add offset to both test regions to not clobber boot stuff + * when running from L2. + */ + p += 0x10000000; // FIXME? was: 0x4000000; // make sure base is out of the way of boot + + /* The loop ranges and increments walk through a range of addresses avoiding bits that alias + * to different memory interfaces (LMCs) on the CN88XX; ie we want to limit activity to a + * single memory channel. + */ + + /* Store something into each location first */ + // NOTE: the ordering of loops is purposeful: fill full cachelines and flush + for (k = 0; k < (1 << 20); k += (1 << 14)) { + for (j = 0; j < (1 << 12); j += (1 << 9)) { + for (i = 0; i < (1 << 7); i += 8) { + index = i + j + k; + p1 = p + index; + p2 = p1 + p2offset; + +#if DO_LIKE_RANDOM_XOR + v = pattern1 * p1; + v1 = v; // write the same thing to both areas +#else + v = 0ULL; + v1 = v; +#endif + __bdk_dram_write64(p1, v); + __bdk_dram_write64(p2, v1); + + /* Write back and invalidate the cache lines + * + * For OCX we cannot limit the number of L2 ways + * so instead we just write back and invalidate + * the L2 cache lines. This is not possible + * when booting remotely, however so this is + * only enabled for U-Boot right now. + * Potentially the BDK can also take advantage + * of this. + */ + BDK_CACHE_WBI_L2(p1); + BDK_CACHE_WBI_L2(p2); + } + } + } + + BDK_DCACHE_INVALIDATE; + +#if DO_LIKE_RANDOM_XOR + this_pattern = bdk_rng_get_random64(); +#endif + + // modify the contents of each location in some way + // NOTE: the ordering of loops is purposeful: modify full cachelines and flush + for (k = 0; k < (1 << 20); k += (1 << 14)) { + for (j = 0; j < (1 << 12); j += (1 << 9)) { + for (i = 0; i < (1 << 7); i += 8) { + index = i + j + k; + p1 = p + index; + p2 = p1 + p2offset; +#if DO_LIKE_RANDOM_XOR + v = __bdk_dram_read64(p1) ^ this_pattern; + v1 = __bdk_dram_read64(p2) ^ this_pattern; +#else + v = test_pattern[index%(sizeof(test_pattern)/sizeof(uint64_t))]; + v &= datamask; + v1 = ~v; +#endif + + debug_print("[0x%016llX]: 0x%016llX, [0x%016llX]: 0x%016llX\n", + p1, v, p2, v1); + + __bdk_dram_write64(p1, v); + __bdk_dram_write64(p2, v1); + + /* Write back and invalidate the cache lines + * + * For OCX we cannot limit the number of L2 ways + * so instead we just write back and invalidate + * the L2 cache lines. This is not possible + * when booting remotely, however so this is + * only enabled for U-Boot right now. + * Potentially the BDK can also take advantage + * of this. + */ + BDK_CACHE_WBI_L2(p1); + BDK_CACHE_WBI_L2(p2); + } + } + } + + BDK_DCACHE_INVALIDATE; + + // test the contents of each location by predicting what should be there + // NOTE: the ordering of loops is purposeful: test full cachelines to detect + // an error occuring in any slot thereof + for (k = 0; k < (1 << 20); k += (1 << 14)) { + for (j = 0; j < (1 << 12); j += (1 << 9)) { + for (i = 0; i < (1 << 7); i += 8) { + index = i + j + k; + p1 = p + index; + p2 = p1 + p2offset; +#if DO_LIKE_RANDOM_XOR + v = (p1 * pattern1) ^ this_pattern; // FIXME: this should predict what we find...??? + d1 = __bdk_dram_read64(p1); + d2 = __bdk_dram_read64(p2); +#else + v = test_pattern[index%(sizeof(test_pattern)/sizeof(uint64_t))]; + d1 = __bdk_dram_read64(p1); + d2 = ~__bdk_dram_read64(p2); +#endif + debug_print("[0x%016llX]: 0x%016llX, [0x%016llX]: 0x%016llX\n", + p1, d1, p2, d2); + + xor = ((d1 ^ v) | (d2 ^ v)) & datamask; // union of error bits only in active byte lanes + + if (!xor) + continue; + + // accumulate bad bits + bad_bits[0] |= xor; + //bad_bits[1] |= ~mpr_data1 & 0xffUL; // cannot do ECC here + + int bybit = 1; + uint64_t bymsk = 0xffULL; // start in byte lane 0 + while (xor != 0) { + debug_print("ERROR: [0x%016llX] [0x%016llX] expected 0x%016llX xor %016llX\n", + p1, p2, v, xor); + if (xor & bymsk) { // error(s) in this lane + errors |= bybit; // set the byte error bit + xor &= ~bymsk; // clear byte lane in error bits + datamask &= ~bymsk; // clear the byte lane in the mask + if (datamask == 0) { // nothing left to do + goto done_now; // completely done when errors found in all byte lanes in datamask + } + } + bymsk <<= 8; // move mask into next byte lane + bybit <<= 1; // move bit into next byte position + } + } + } + } + + done_now: + if (xor_data != NULL) { // send the bad bits back... + xor_data[0] = bad_bits[0]; + xor_data[1] = bad_bits[1]; // let it be zeroed + } + return errors; +} + +// NOTE: "mode" argument: +// DBTRAIN_TEST: for testing using GP patterns, includes ECC +// DBTRAIN_DBI: for DBI deskew training behavior (uses GP patterns) +// DBTRAIN_LFSR: for testing using LFSR patterns, includes ECC +// NOTE: trust the caller to specify the correct/supported mode +// +int test_dram_byte_hw(bdk_node_t node, int ddr_interface_num, + uint64_t p, int mode, uint64_t *xor_data) +{ + uint64_t p1; + uint64_t k; + int errors = 0; + + uint64_t mpr_data0, mpr_data1; + uint64_t bad_bits[2] = {0,0}; + + int node_address, lmc, dimm; + int prank, lrank; + int bank, row, col; + int save_or_dis; + int byte; + int ba_loop, ba_bits; + + bdk_lmcx_rlevel_ctl_t rlevel_ctl; + bdk_lmcx_dbtrain_ctl_t dbtrain_ctl; + + int bank_errs; + + // FIXME: K iterations set to 4 for now. + // FIXME: decrement to increase interations. + // FIXME: must be no less than 22 to stay above an LMC hash field. + int kshift = 26; + const char *s; + + // allow override default setting for kshift + if ((s = getenv("ddr_tune_set_kshift")) != NULL) { + int temp = strtoul(s, NULL, 0); + if ((temp < 22) || (temp > 27)) { + ddr_print("N%d.LMC%d: ILLEGAL override of kshift to %d, using default %d\n", + node, ddr_interface_num, temp, kshift); + } else { + VB_PRT(VBL_DEV2, "N%d.LMC%d: overriding kshift (%d) to %d\n", + node, ddr_interface_num, kshift, temp); + kshift = temp; + } + } + + /* + 1) Make sure that RLEVEL_CTL[OR_DIS] = 0. + */ + rlevel_ctl.u = BDK_CSR_READ(node, BDK_LMCX_RLEVEL_CTL(ddr_interface_num)); + save_or_dis = rlevel_ctl.s.or_dis; + rlevel_ctl.s.or_dis = 0; /* or_dis must be disabled for this sequence */ + DRAM_CSR_WRITE(node, BDK_LMCX_RLEVEL_CTL(ddr_interface_num), rlevel_ctl.u); + + /* + NOTE: this step done in the calling routine(s) + 3) Setup GENERAL_PURPOSE[0-2] registers with the data pattern of choice. + a. GENERAL_PURPOSE0[DATA<63:0>] – sets the initial lower (rising edge) 64 bits of data. + b. GENERAL_PURPOSE1[DATA<63:0>] – sets the initial upper (falling edge) 64 bits of data. + c. GENERAL_PURPOSE2[DATA<15:0>] – sets the initial lower (rising edge <7:0>) and upper + (falling edge <15:8>) ECC data. + */ + + // final address must include LMC and node + p |= (ddr_interface_num << 7); /* Map address into proper interface */ + p = bdk_numa_get_address(node, p); /* Map to node */ + + /* + * Add base offset to both test regions to not clobber u-boot stuff + * when running from L2 for NAND boot. + */ + p += 0x10000000; // offset to 256MB + + errors = 0; + + bdk_dram_address_extract_info(p, &node_address, &lmc, &dimm, &prank, &lrank, &bank, &row, &col); + VB_PRT(VBL_DEV2, "test_dram_byte_hw: START at A:0x%012lx, N%d L%d D%d R%d/%d B%1x Row:%05x Col:%05x\n", + p, node_address, lmc, dimm, prank, lrank, bank, row, col); + + // only check once per call, and ignore if no match... + if ((int)node != node_address) { + error_print("ERROR: Node address mismatch; ignoring...\n"); + return 0; + } + if (lmc != ddr_interface_num) { + error_print("ERROR: LMC address mismatch\n"); + return 0; + } + + /* + 7) Set PHY_CTL[PHY_RESET] = 1 (LMC automatically clears this as it’s a one-shot operation). + This is to get into the habit of resetting PHY’s SILO to the original 0 location. + */ + BDK_CSR_MODIFY(phy_ctl, node, BDK_LMCX_PHY_CTL(ddr_interface_num), + phy_ctl.s.phy_reset = 1); + + /* Walk through a range of addresses avoiding bits that alias + * interfaces on the CN88XX. + */ + + // FIXME: want to try to keep the K increment from affecting the LMC via hash, + // FIXME: so keep it above bit 21 + // NOTE: we also want to keep k less than the base offset of bit 28 (256MB) + + for (k = 0; k < (1UL << 28); k += (1UL << kshift)) { + + // FIXME: the sequence will interate over 1/2 cacheline + // FIXME: for each unit specified in "read_cmd_count", + // FIXME: so, we setup each sequence to do the max cachelines it can + + p1 = p + k; + + bdk_dram_address_extract_info(p1, &node_address, &lmc, &dimm, &prank, &lrank, &bank, &row, &col); + VB_PRT(VBL_DEV3, "test_dram_byte_hw: NEXT interation at A:0x%012lx, N%d L%d D%d R%d/%d B%1x Row:%05x Col:%05x\n", + p1, node_address, lmc, dimm, prank, lrank, bank, row, col); + + /* + 2) Setup the fields of the CSR DBTRAIN_CTL as follows: + a. COL, ROW, BA, BG, PRANK points to the starting point of the address. + You can just set them to all 0. + b. RW_TRAIN – set this to 1. + c. TCCD_L – set this to 0. + d. READ_CMD_COUNT – instruct the sequence to the how many writes/reads. + It is 5 bits field, so set to 31 of maximum # of r/w. + */ + dbtrain_ctl.u = BDK_CSR_READ(node, BDK_LMCX_DBTRAIN_CTL(ddr_interface_num)); + dbtrain_ctl.s.column_a = col; + dbtrain_ctl.s.row_a = row; + dbtrain_ctl.s.bg = (bank >> 2) & 3; + dbtrain_ctl.s.prank = (dimm * 2) + prank; // FIXME? + dbtrain_ctl.s.lrank = lrank; // FIXME? + dbtrain_ctl.s.activate = (mode == DBTRAIN_DBI); + dbtrain_ctl.s.write_ena = 1; + dbtrain_ctl.s.read_cmd_count = 31; // max count pass 1.x + if (! CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X)) // added 81xx and 83xx + dbtrain_ctl.s.cmd_count_ext = 3; // max count pass 2.x + else + dbtrain_ctl.s.cmd_count_ext = 0; // max count pass 1.x + dbtrain_ctl.s.rw_train = 1; + dbtrain_ctl.s.tccd_sel = (mode == DBTRAIN_DBI); + + // LFSR should only be on when chip supports it... + dbtrain_ctl.s.lfsr_pattern_sel = (mode == DBTRAIN_LFSR) ? 1 : 0; + + bank_errs = 0; + + // for each address, iterate over the 4 "banks" in the BA + for (ba_loop = 0, ba_bits = bank & 3; + ba_loop < 4; + ba_loop++, ba_bits = (ba_bits + 1) & 3) + { + dbtrain_ctl.s.ba = ba_bits; + DRAM_CSR_WRITE(node, BDK_LMCX_DBTRAIN_CTL(ddr_interface_num), dbtrain_ctl.u); + + VB_PRT(VBL_DEV3, "test_dram_byte_hw: DBTRAIN: Pr:%d Lr:%d Bg:%d Ba:%d Row:%05x Col:%05x\n", + dbtrain_ctl.s.prank, dbtrain_ctl.s.lrank, + dbtrain_ctl.s.bg, dbtrain_ctl.s.ba, row, col); + /* + 4) Kick off the sequence (SEQ_CTL[SEQ_SEL] = 14, SEQ_CTL[INIT_START] = 1). + 5) Poll on SEQ_CTL[SEQ_COMPLETE] for completion. + */ + perform_octeon3_ddr3_sequence(node, prank, ddr_interface_num, 14); + + /* + 6) Read MPR_DATA0 and MPR_DATA1 for results: + a. MPR_DATA0[MPR_DATA<63:0>] – comparison results for DQ63:DQ0. + (1 means MATCH, 0 means FAIL). + b. MPR_DATA1[MPR_DATA<7:0>] – comparison results for ECC bit7:0. + */ + mpr_data0 = BDK_CSR_READ(node, BDK_LMCX_MPR_DATA0(ddr_interface_num)); + mpr_data1 = BDK_CSR_READ(node, BDK_LMCX_MPR_DATA1(ddr_interface_num)); + + /* + 7) Set PHY_CTL[PHY_RESET] = 1 (LMC automatically clears this as it’s a one-shot operation). + This is to get into the habit of resetting PHY’s SILO to the original 0 location. + */ + BDK_CSR_MODIFY(phy_ctl, node, BDK_LMCX_PHY_CTL(ddr_interface_num), + phy_ctl.s.phy_reset = 1); + + if (mode == DBTRAIN_DBI) + continue; // bypass any error checking or updating when DBI mode + + // data bytes + if (~mpr_data0) { + for (byte = 0; byte < 8; byte++) { + if ((~mpr_data0 >> (8 * byte)) & 0xffUL) + bank_errs |= (1 << byte); + } + // accumulate bad bits + bad_bits[0] |= ~mpr_data0; + } + + // include ECC byte errors + if (~mpr_data1 & 0xffUL) { + bank_errs |= (1 << 8); + bad_bits[1] |= ~mpr_data1 & 0xffUL; + } + + } /* for (int ba_loop = 0; ba_loop < 4; ba_loop++) */ + + errors |= bank_errs; + + } /* end for (k=...) */ + + rlevel_ctl.s.or_dis = save_or_dis; + DRAM_CSR_WRITE(node, BDK_LMCX_RLEVEL_CTL(ddr_interface_num), rlevel_ctl.u); + + if ((mode != DBTRAIN_DBI) && (xor_data != NULL)) { // send the bad bits back... + xor_data[0] = bad_bits[0]; + xor_data[1] = bad_bits[1]; + } + + return errors; +} + +static void set_ddr_memory_preserved(bdk_node_t node) +{ + global_ddr_memory_preserved |= 0x1 << node; + +} +int ddr_memory_preserved(bdk_node_t node) +{ + return (global_ddr_memory_preserved & (0x1 << node)) != 0; +} + +void perform_ddr_init_sequence(bdk_node_t node, int rank_mask, + int ddr_interface_num) +{ + const char *s; + int ddr_init_loops = 1; + int rankx; + + if ((s = lookup_env_parameter("ddr%d_init_loops", ddr_interface_num)) != NULL) + ddr_init_loops = strtoul(s, NULL, 0); + + while (ddr_init_loops--) { + for (rankx = 0; rankx < 8; rankx++) { + if (!(rank_mask & (1 << rankx))) + continue; + + perform_octeon3_ddr3_sequence(node, (1 << rankx), + ddr_interface_num, 0); /* power-up/init */ + + bdk_wait_usec(1000); /* Wait a while. */ + + if ((s = lookup_env_parameter("ddr_sequence1")) != NULL) { + int sequence1; + sequence1 = strtoul(s, NULL, 0); + perform_octeon3_ddr3_sequence(node, (1 << rankx), + ddr_interface_num, sequence1); + } + + if ((s = lookup_env_parameter("ddr_sequence2")) != NULL) { + int sequence2; + sequence2 = strtoul(s, NULL, 0); + perform_octeon3_ddr3_sequence(node, (1 << rankx), + ddr_interface_num, sequence2); + } + } + } +} + +static void set_ddr_clock_initialized(bdk_node_t node, int ddr_interface, int inited_flag) +{ + int bit = node * 8 + ddr_interface; + if (inited_flag) + global_ddr_clock_initialized |= (0x1 << bit); + else + global_ddr_clock_initialized &= ~(0x1 << bit); +} +static int ddr_clock_initialized(bdk_node_t node, int ddr_interface) +{ + int bit = node * 8 + ddr_interface; + return (!!(global_ddr_clock_initialized & (0x1 << bit))); +} + + +static void cn78xx_lmc_dreset_init (bdk_node_t node, int ddr_interface_num) +{ + /* + * This is the embodiment of the 6.9.4 LMC DRESET Initialization section below. + * + * The remainder of this section describes the sequence for LMCn. + * + * 1. If not done already, write LMC(0..3)_DLL_CTL2 to its reset value + * (except without changing the LMC(0..3)_DLL_CTL2[INTF_EN] value from + * that set in the prior Step 3), including LMC(0..3)_DLL_CTL2[DRESET] = 1. + * + * 2. Without changing any other LMC(0..3)_DLL_CTL2 fields, write + * LMC(0..3)_DLL_CTL2[DLL_BRINGUP] = 1. + */ + + DRAM_CSR_MODIFY(c, node, BDK_LMCX_DLL_CTL2(ddr_interface_num), + c.s.dll_bringup = 1); + + /* + * 3. Read LMC(0..3)_DLL_CTL2 and wait for the result. + */ + + BDK_CSR_READ(node, BDK_LMCX_DLL_CTL2(ddr_interface_num)); + + /* + * 4. Wait for a minimum of 10 LMC CK cycles. + */ + + bdk_wait_usec(1); + + /* + * 5. Without changing any other fields in LMC(0..3)_DLL_CTL2, write + * LMC(0..3)_DLL_CTL2[QUAD_DLL_ENA] = 1. + * LMC(0..3)_DLL_CTL2[QUAD_DLL_ENA] must not change after this point + * without restarting the LMCn DRESET initialization sequence. + */ + + DRAM_CSR_MODIFY(c, node, BDK_LMCX_DLL_CTL2(ddr_interface_num), + c.s.quad_dll_ena = 1); + + /* + * 6. Read LMC(0..3)_DLL_CTL2 and wait for the result. + */ + + BDK_CSR_READ(node, BDK_LMCX_DLL_CTL2(ddr_interface_num)); + + /* + * 7. Wait a minimum of 10 us. + */ + + bdk_wait_usec(10); + + /* + * 8. Without changing any other fields in LMC(0..3)_DLL_CTL2, write + * LMC(0..3)_DLL_CTL2[DLL_BRINGUP] = 0. + * LMC(0..3)_DLL_CTL2[DLL_BRINGUP] must not change after this point + * without restarting the LMCn DRESET initialization sequence. + */ + + DRAM_CSR_MODIFY(c, node, BDK_LMCX_DLL_CTL2(ddr_interface_num), + c.s.dll_bringup = 0); + + /* + * 9. Read LMC(0..3)_DLL_CTL2 and wait for the result. + */ + + BDK_CSR_READ(node, BDK_LMCX_DLL_CTL2(ddr_interface_num)); + + /* + * 10. Without changing any other fields in LMC(0..3)_DLL_CTL2, write + * LMC(0..3)_DLL_CTL2[DRESET] = 0. + * LMC(0..3)_DLL_CTL2[DRESET] must not change after this point without + * restarting the LMCn DRESET initialization sequence. + * + * After completing LMCn DRESET initialization, all LMC CSRs may be + * accessed. Prior to completing LMC DRESET initialization, only + * LMC(0..3)_DDR_PLL_CTL, LMC(0..3)_DLL_CTL2, LMC(0..3)_RESET_CTL, and + * LMC(0..3)_COMP_CTL2 LMC CSRs can be accessed. + */ + + DRAM_CSR_MODIFY(c, node, BDK_LMCX_DLL_CTL2(ddr_interface_num), + c.s.dreset = 0); + + /* + * NEW STEP - necessary for O73, O78 P2.0, O75, and T88 P2.0 + * McBuggin: #24821 + * + * 11. Wait for a minimum of 10 LMC CK cycles. + */ + + bdk_wait_usec(1); +} + +/*static*/ void cn88xx_lmc_ddr3_reset(bdk_node_t node, int ddr_interface_num, int reset) +{ + /* + * 4. Deassert DDRn_RESET_L pin by writing LMC(0..3)_RESET_CTL[DDR3RST] = 1 + * without modifying any other LMC(0..3)_RESET_CTL fields. + * 5. Read LMC(0..3)_RESET_CTL and wait for the result. + * 6. Wait a minimum of 500us. This guarantees the necessary T = 500us + * delay between DDRn_RESET_L deassertion and DDRn_DIMM*_CKE* assertion. + */ + ddr_print("LMC%d %s DDR_RESET_L\n", ddr_interface_num, + (reset == LMC_DDR3_RESET_DEASSERT) ? "De-asserting" : "Asserting"); + DRAM_CSR_MODIFY(c, node, BDK_LMCX_RESET_CTL(ddr_interface_num), + c.cn8.ddr3rst = reset); + BDK_CSR_READ(node, BDK_LMCX_RESET_CTL(ddr_interface_num)); + bdk_wait_usec(500); +} + +int initialize_ddr_clock(bdk_node_t node, + const ddr_configuration_t *ddr_configuration, + uint32_t cpu_hertz, + uint32_t ddr_hertz, + uint32_t ddr_ref_hertz, + int ddr_interface_num, + uint32_t ddr_interface_mask + ) +{ + const char *s; + + if (ddr_clock_initialized(node, ddr_interface_num)) + return 0; + + if (!ddr_clock_initialized(node, 0)) { /* Do this once */ + int i; + bdk_lmcx_reset_ctl_t reset_ctl; + /* Check to see if memory is to be preserved and set global flag */ + for (i=3; i>=0; --i) { + if ((ddr_interface_mask & (1 << i)) == 0) + continue; + reset_ctl.u = BDK_CSR_READ(node, BDK_LMCX_RESET_CTL(i)); + if (reset_ctl.s.ddr3psv == 1) { + ddr_print("LMC%d Preserving memory\n", i); + set_ddr_memory_preserved(node); + + /* Re-initialize flags */ + reset_ctl.cn8.ddr3pwarm = 0; + reset_ctl.cn8.ddr3psoft = 0; + reset_ctl.s.ddr3psv = 0; + DRAM_CSR_WRITE(node, BDK_LMCX_RESET_CTL(i), reset_ctl.u); + } + } + } + + if (CAVIUM_IS_MODEL(CAVIUM_CN8XXX)) { + + bdk_lmcx_ddr_pll_ctl_t ddr_pll_ctl; + const dimm_config_t *dimm_config_table = ddr_configuration->dimm_config_table; + + /* ddr_type only indicates DDR4 or DDR3 */ + int ddr_type = get_ddr_type(node, &dimm_config_table[0]); + + /* + * 6.9 LMC Initialization Sequence + * + * There are 14 parts to the LMC initialization procedure: + * + * 1. LMC interface enable initialization + * + * 2. DDR PLL initialization + * + * 3. LMC CK initialization + * + * 4. LMC DRESET initialization + * + * 5. LMC CK local initialization + * + * 6. LMC RESET initialization + * + * 7. Early LMC initialization + * + * 8. LMC offset training + * + * 9. LMC internal Vref training + * + * 10. LMC deskew training + * + * 11. LMC write leveling + * + * 12. LMC read leveling + * + * 13. DRAM Vref Training for DDR4 + * + * 14. Final LMC initialization + * + * CN88XX supports two modes: + * + *  two-LMC mode: both LMCs 2/3 must not be enabled + * (LMC2/3_DLL_CTL2[DRESET] must be set to 1 and LMC2/3_DLL_CTL2[INTF_EN] + * must be set to 0) and both LMCs 0/1 must be enabled). + * + *  four-LMC mode: all four LMCs 0..3 must be enabled. + * + * Steps 4 and 6..14 should each be performed for each enabled LMC (either + * twice or four times). Steps 1..3 and 5 are more global in nature and + * each must be executed exactly once (not once per LMC) each time the + * DDR PLL changes or is first brought up. Steps 1..3 and 5 need not be + * performed if the DDR PLL is stable. + * + * Generally, the steps are performed in order. The exception is that the + * CK local initialization (step 5) must be performed after some DRESET + * initializations (step 4) and before other DRESET initializations when + * the DDR PLL is brought up or changed. (The CK local initialization + * uses information from some LMCs to bring up the other local CKs.) The + * following text describes these ordering requirements in more detail. + * + * Following any chip reset, the DDR PLL must be brought up, and all 14 + * steps should be executed. Subsequently, it is possible to execute only + * steps 4 and 6..14, or to execute only steps 8..14. + * + * The remainder of this section covers these initialization steps in + * sequence. + */ + + if (ddr_interface_num == 0) { /* Do this once */ + bdk_lmcx_dll_ctl2_t dll_ctl2; + int loop_interface_num; + + /* + * 6.9.1 LMC Interface-Enable Initialization + * + * LMC interface-enable initialization (Step 1) must be performed only + * once, not once per LMC in four-LMC mode. This step is not required + * in two-LMC mode. + * + * Perform the following three substeps for the LMC interface-enable + * initialization: + * + * 1. Without changing any other LMC2_DLL_CTL2 fields (LMC(0..3)_DLL_CTL2 + * should be at their reset values after Step 1), write + * LMC2_DLL_CTL2[INTF_EN] = 1 if four-LMC mode is desired. + * + * 2. Without changing any other LMC3_DLL_CTL2 fields, write + * LMC3_DLL_CTL2[INTF_EN] = 1 if four-LMC mode is desired. + * + * 3. Read LMC2_DLL_CTL2 and wait for the result. + * + * The LMC2_DLL_CTL2[INTF_EN] and LMC3_DLL_CTL2[INTF_EN] values should + * not be changed by software from this point. + * + */ + + /* Put all LMCs into DRESET here; these are the reset values... */ + for (loop_interface_num = 0; loop_interface_num < 4; ++loop_interface_num) { + if ((ddr_interface_mask & (1 << loop_interface_num)) == 0) + continue; + + dll_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL2(loop_interface_num)); + + dll_ctl2.s.byp_setting = 0; + dll_ctl2.s.byp_sel = 0; + dll_ctl2.s.quad_dll_ena = 0; + dll_ctl2.s.dreset = 1; + dll_ctl2.s.dll_bringup = 0; + dll_ctl2.s.intf_en = 0; + + DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL2(loop_interface_num), dll_ctl2.u); + } + + /* Now set INTF_EN for *ONLY* LMC2/3 if they are to be active on 88XX. */ + /* Do *NOT* touch LMC0/1 INTF_EN=0 setting on 88XX. */ + /* But we do have to set LMC1 INTF_EN=1 on 83XX if we want it active... */ + /* Note that 81xx has only LMC0 so the mask should reflect that. */ + for (loop_interface_num = (CAVIUM_IS_MODEL(CAVIUM_CN83XX)) ? 1 : 2; + loop_interface_num < 4; ++loop_interface_num) { + if ((ddr_interface_mask & (1 << loop_interface_num)) == 0) + continue; + + DRAM_CSR_MODIFY(c, node, BDK_LMCX_DLL_CTL2(loop_interface_num), + c.s.intf_en = 1); + BDK_CSR_READ(node, BDK_LMCX_DLL_CTL2(loop_interface_num)); + } + + /* + * 6.9.2 DDR PLL Initialization + * + * DDR PLL initialization (Step 2) must be performed for each chip reset + * and whenever the DDR clock speed changes. This step needs to be + * performed only once, not once per LMC. + * + * Perform the following eight substeps to initialize the DDR PLL: + * + * 1. If not done already, write all fields in LMC(0..1)_DDR_PLL_CTL and + * LMC(0..1)_DLL_CTL2 to their reset values, including: + * + * .. LMC0_DDR_PLL_CTL[DDR_DIV_RESET] = 1 + * .. LMC0_DLL_CTL2[DRESET] = 1 + * + * This substep is not necessary after a chip reset. + * + */ + + ddr_pll_ctl.u = BDK_CSR_READ(node, BDK_LMCX_DDR_PLL_CTL(0)); + + ddr_pll_ctl.cn83xx.reset_n = 0; + ddr_pll_ctl.cn83xx.ddr_div_reset = 1; + ddr_pll_ctl.cn83xx.phy_dcok = 0; + ddr_pll_ctl.cn83xx.dclk_invert = 0; + + // allow override of LMC0 desired setting for DCLK_INVERT + if ((s = lookup_env_parameter("ddr0_set_dclk_invert")) != NULL) { + ddr_pll_ctl.cn83xx.dclk_invert = !!strtoul(s, NULL, 0); + ddr_print("LMC0: override DDR_PLL_CTL[dclk_invert] to %d\n", + ddr_pll_ctl.cn83xx.dclk_invert); + } + + // always write LMC0 CSR, it must be active + DRAM_CSR_WRITE(node, BDK_LMCX_DDR_PLL_CTL(0), ddr_pll_ctl.u); + ddr_print("%-45s : 0x%016lx\n", "LMC0: DDR_PLL_CTL", ddr_pll_ctl.u); + + // only when LMC1 is active + // NOTE: 81xx has only 1 LMC, and 83xx can operate in 1-LMC mode + if (ddr_interface_mask & 0x2) { + + ddr_pll_ctl.cn83xx.dclk_invert ^= 1; /* DEFAULT: Toggle dclk_invert from LMC0 */ + + // allow override of LMC1 desired setting for DCLK_INVERT + if ((s = lookup_env_parameter("ddr1_set_dclk_invert")) != NULL) { + ddr_pll_ctl.cn83xx.dclk_invert = !!strtoul(s, NULL, 0); + ddr_print("LMC1: override DDR_PLL_CTL[dclk_invert] to %d\n", + ddr_pll_ctl.cn83xx.dclk_invert); + } + + // always write LMC1 CSR when it is active + DRAM_CSR_WRITE(node, BDK_LMCX_DDR_PLL_CTL(1), ddr_pll_ctl.u); + ddr_print("%-45s : 0x%016lx\n", "LMC1: DDR_PLL_CTL", ddr_pll_ctl.u); + } + + /* + * 2. If the current DRAM contents are not preserved (see + * LMC(0..3)_RESET_ CTL[DDR3PSV]), this is also an appropriate time to + * assert the RESET# pin of the DDR3/DDR4 DRAM parts. If desired, write + * LMC0_RESET_ CTL[DDR3RST] = 0 without modifying any other + * LMC0_RESET_CTL fields to assert the DDR_RESET_L pin. No action is + * required here to assert DDR_RESET_L following a chip reset. Refer to + * Section 6.9.6. Do this for all enabled LMCs. + */ + + for (loop_interface_num = 0; + ( !ddr_memory_preserved(node)) && loop_interface_num < 4; + ++loop_interface_num) + { + + if ((ddr_interface_mask & (1 << loop_interface_num)) == 0) + continue; + + cn88xx_lmc_ddr3_reset(node, loop_interface_num, LMC_DDR3_RESET_ASSERT); + } + + /* + * 3. Without changing any other LMC0_DDR_PLL_CTL values, write LMC0_DDR_ + * PLL_CTL[CLKF] with a value that gives a desired DDR PLL speed. The + * LMC0_DDR_PLL_CTL[CLKF] value should be selected in conjunction with + * the post-scalar divider values for LMC (LMC0_DDR_PLL_CTL[DDR_PS_EN]) + * so that the desired LMC CK speeds are is produced (all enabled LMCs + * must run the same speed). Section 5.14 describes + * LMC0_DDR_PLL_CTL[CLKF] and LMC0_DDR_PLL_CTL[DDR_PS_EN] programmings + * that produce the desired LMC CK speed. Section 6.9.3 describes LMC CK + * initialization, which can be done separately from the DDR PLL + * initialization described in this section. + * + * The LMC0_DDR_PLL_CTL[CLKF] value must not change after this point + * without restarting this SDRAM PLL initialization sequence. + */ + + { + /* CLKF = (DCLK * (CLKR+1) * EN(1, 2, 3, 4, 5, 6, 7, 8, 10, 12))/DREF - 1 */ + int en_idx, save_en_idx, best_en_idx=0; + uint64_t clkf, clkr, max_clkf = 127; + uint64_t best_clkf=0, best_clkr=0; + uint64_t best_pll_MHz = 0; + uint64_t pll_MHz; + uint64_t min_pll_MHz = 800; + uint64_t max_pll_MHz = 5000; + uint64_t error; + uint64_t best_error; + uint64_t best_calculated_ddr_hertz = 0; + uint64_t calculated_ddr_hertz = 0; + uint64_t orig_ddr_hertz = ddr_hertz; + static const int _en[] = {1, 2, 3, 4, 5, 6, 7, 8, 10, 12}; + int override_pll_settings; + int new_bwadj; + + error = best_error = ddr_hertz; /* Init to max error */ + + ddr_print("DDR Reference Hertz = %d\n", ddr_ref_hertz); + + while (best_error == ddr_hertz) { + + for (clkr = 0; clkr < 4; ++clkr) { + for (en_idx=sizeof(_en)/sizeof(int)-1; en_idx>=0; --en_idx) { + save_en_idx = en_idx; + clkf = ((ddr_hertz) * (clkr+1) * (_en[save_en_idx])); + clkf = divide_nint(clkf, ddr_ref_hertz) - 1; + pll_MHz = ddr_ref_hertz * (clkf+1) / (clkr+1) / 1000000; + calculated_ddr_hertz = ddr_ref_hertz * (clkf + 1) / ((clkr + 1) * (_en[save_en_idx])); + error = ddr_hertz - calculated_ddr_hertz; + + if ((pll_MHz < min_pll_MHz) || (pll_MHz > max_pll_MHz)) continue; + if (clkf > max_clkf) continue; /* PLL requires clkf to be limited */ + if (_abs(error) > _abs(best_error)) continue; + + VB_PRT(VBL_TME, "clkr: %2lu, en[%d]: %2d, clkf: %4lu, pll_MHz: %4lu, ddr_hertz: %8lu, error: %8ld\n", + clkr, save_en_idx, _en[save_en_idx], clkf, pll_MHz, calculated_ddr_hertz, error); + + /* Favor the highest PLL frequency. */ + if ((_abs(error) < _abs(best_error)) || (pll_MHz > best_pll_MHz)) { + best_pll_MHz = pll_MHz; + best_calculated_ddr_hertz = calculated_ddr_hertz; + best_error = error; + best_clkr = clkr; + best_clkf = clkf; + best_en_idx = save_en_idx; + } + } + } + + override_pll_settings = 0; + + if ((s = lookup_env_parameter("ddr_pll_clkr")) != NULL) { + best_clkr = strtoul(s, NULL, 0); + override_pll_settings = 1; + } + if ((s = lookup_env_parameter("ddr_pll_clkf")) != NULL) { + best_clkf = strtoul(s, NULL, 0); + override_pll_settings = 1; + } + if ((s = lookup_env_parameter("ddr_pll_en_idx")) != NULL) { + best_en_idx = strtoul(s, NULL, 0); + override_pll_settings = 1; + } + + if (override_pll_settings) { + best_pll_MHz = ddr_ref_hertz * (best_clkf+1) / (best_clkr+1) / 1000000; + best_calculated_ddr_hertz = ddr_ref_hertz * (best_clkf + 1) / ((best_clkr + 1) * (_en[best_en_idx])); + best_error = ddr_hertz - best_calculated_ddr_hertz; + } + + ddr_print("clkr: %2lu, en[%d]: %2d, clkf: %4lu, pll_MHz: %4lu, ddr_hertz: %8lu, error: %8ld <==\n", + best_clkr, best_en_idx, _en[best_en_idx], best_clkf, best_pll_MHz, + best_calculated_ddr_hertz, best_error); + + /* Try lowering the frequency if we can't get a working configuration */ + if (best_error == ddr_hertz) { + if (ddr_hertz < orig_ddr_hertz - 10000000) + break; + ddr_hertz -= 1000000; + best_error = ddr_hertz; + } + + } /* while (best_error == ddr_hertz) */ + + + if (best_error == ddr_hertz) { + error_print("ERROR: Can not compute a legal DDR clock speed configuration.\n"); + return(-1); + } + + new_bwadj = (best_clkf + 1) / 10; + VB_PRT(VBL_TME, "bwadj: %2d\n", new_bwadj); + + if ((s = lookup_env_parameter("ddr_pll_bwadj")) != NULL) { + new_bwadj = strtoul(s, NULL, 0); + VB_PRT(VBL_TME, "bwadj: %2d\n", new_bwadj); + } + + for (loop_interface_num = 0; loop_interface_num<2; ++loop_interface_num) { + if ((ddr_interface_mask & (1 << loop_interface_num)) == 0) + continue; + + // make sure we preserve any settings already there + ddr_pll_ctl.u = BDK_CSR_READ(node, BDK_LMCX_DDR_PLL_CTL(loop_interface_num)); + ddr_print("LMC%d: DDR_PLL_CTL : 0x%016lx\n", + loop_interface_num, ddr_pll_ctl.u); + + ddr_pll_ctl.cn83xx.ddr_ps_en = best_en_idx; + ddr_pll_ctl.cn83xx.clkf = best_clkf; + ddr_pll_ctl.cn83xx.clkr = best_clkr; + ddr_pll_ctl.cn83xx.reset_n = 0; + ddr_pll_ctl.cn83xx.bwadj = new_bwadj; + + DRAM_CSR_WRITE(node, BDK_LMCX_DDR_PLL_CTL(loop_interface_num), ddr_pll_ctl.u); + ddr_print("LMC%d: DDR_PLL_CTL : 0x%016lx\n", + loop_interface_num, ddr_pll_ctl.u); + } + } + + + for (loop_interface_num = 0; loop_interface_num<4; ++loop_interface_num) { + if ((ddr_interface_mask & (1 << loop_interface_num)) == 0) + continue; + + /* + * 4. Read LMC0_DDR_PLL_CTL and wait for the result. + */ + + BDK_CSR_READ(node, BDK_LMCX_DDR_PLL_CTL(loop_interface_num)); + + /* + * 5. Wait a minimum of 3 us. + */ + + bdk_wait_usec(3); /* Wait 3 us */ + + /* + * 6. Write LMC0_DDR_PLL_CTL[RESET_N] = 1 without changing any other + * LMC0_DDR_PLL_CTL values. + */ + + DRAM_CSR_MODIFY(c, node, BDK_LMCX_DDR_PLL_CTL(loop_interface_num), + c.cn83xx.reset_n = 1); + + /* + * 7. Read LMC0_DDR_PLL_CTL and wait for the result. + */ + + BDK_CSR_READ(node, BDK_LMCX_DDR_PLL_CTL(loop_interface_num)); + + /* + * 8. Wait a minimum of 25 us. + */ + + bdk_wait_usec(25); /* Wait 25 us */ + + } /* for (loop_interface_num = 0; loop_interface_num<4; ++loop_interface_num) */ + + for (loop_interface_num = 0; loop_interface_num<4; ++loop_interface_num) { + if ((ddr_interface_mask & (1 << loop_interface_num)) == 0) + continue; + /* + * 6.9.3 LMC CK Initialization + * + * DDR PLL initialization must be completed prior to starting LMC CK + * initialization. + * + * Perform the following substeps to initialize the LMC CK. Perform + * substeps 1..3 for both LMC0 and LMC1. + * + * 1. Without changing any other LMC(0..3)_DDR_PLL_CTL values, write + * LMC(0..3)_DDR_PLL_CTL[DDR_DIV_RESET] = 1 and + * LMC(0..3)_DDR_PLL_CTL[DDR_PS_EN] with the appropriate value to get the + * desired LMC CK speed. Section 5.14 discusses CLKF and DDR_PS_EN + * programmings. The LMC(0..3)_DDR_PLL_CTL[DDR_PS_EN] must not change + * after this point without restarting this LMC CK initialization + * sequence. + */ + + DRAM_CSR_MODIFY(c, node, BDK_LMCX_DDR_PLL_CTL(loop_interface_num), + c.cn83xx.ddr_div_reset = 1); + + /* + * 2. Without changing any other fields in LMC(0..3)_DDR_PLL_CTL, write + * LMC(0..3)_DDR_PLL_CTL[DDR4_MODE] = 0. + */ + + DRAM_CSR_MODIFY(c, node, BDK_LMCX_DDR_PLL_CTL(loop_interface_num), + c.cn83xx.ddr4_mode = (ddr_type == DDR4_DRAM) ? 1 : 0); + + /* + * 3. Read LMC(0..3)_DDR_PLL_CTL and wait for the result. + */ + + BDK_CSR_READ(node, BDK_LMCX_DDR_PLL_CTL(loop_interface_num)); + + /* + * 4. Wait a minimum of 1 us. + */ + + bdk_wait_usec(1); /* Wait 1 us */ + + /* + * 5. Without changing any other fields in LMC(0..3)_DDR_PLL_CTL, write + * LMC(0..3)_DDR_PLL_CTL[PHY_DCOK] = 1. + */ + + DRAM_CSR_MODIFY(c, node, BDK_LMCX_DDR_PLL_CTL(loop_interface_num), + c.cn83xx.phy_dcok = 1); + + /* + * 6. Read LMC(0..3)_DDR_PLL_CTL and wait for the result. + */ + + BDK_CSR_READ(node, BDK_LMCX_DDR_PLL_CTL(loop_interface_num)); + + /* + * 7. Wait a minimum of 20 us. + */ + + bdk_wait_usec(20); /* Wait 20 us */ + + /* + * 8. Without changing any other LMC(0..3)_COMP_CTL2 values, write + * LMC(0..3)_COMP_CTL2[CK_CTL,CONTROL_CTL,CMD_CTL] to the desired + * DDR*_CK_*_P control and command signals drive strength. + */ + + { + bdk_lmcx_comp_ctl2_t comp_ctl2; + const ddr3_custom_config_t *custom_lmc_config = &ddr_configuration->custom_lmc_config; + + comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(loop_interface_num)); + + comp_ctl2.s.dqx_ctl = 4; /* Default 4=34.3 ohm */ + comp_ctl2.s.ck_ctl = + (custom_lmc_config->ck_ctl == 0) ? 4 : custom_lmc_config->ck_ctl; /* Default 4=34.3 ohm */ + comp_ctl2.s.cmd_ctl = + (custom_lmc_config->cmd_ctl == 0) ? 4 : custom_lmc_config->cmd_ctl; /* Default 4=34.3 ohm */ + + comp_ctl2.s.rodt_ctl = 0x4; /* 60 ohm */ + + // These need to be done here, not later in Step 6.9.7. + // NOTE: these are/will be specific to a chip; for now, set to 0 + // should we provide overrides for these? + comp_ctl2.s.ntune_offset = 0; + comp_ctl2.s.ptune_offset = 0; + + // now do any overrides... + if ((s = lookup_env_parameter("ddr_ck_ctl")) != NULL) { + comp_ctl2.s.ck_ctl = strtoul(s, NULL, 0); + } + + if ((s = lookup_env_parameter("ddr_cmd_ctl")) != NULL) { + comp_ctl2.s.cmd_ctl = strtoul(s, NULL, 0); + } + + if ((s = lookup_env_parameter("ddr_dqx_ctl")) != NULL) { + comp_ctl2.s.dqx_ctl = strtoul(s, NULL, 0); + } + + DRAM_CSR_WRITE(node, BDK_LMCX_COMP_CTL2(loop_interface_num), comp_ctl2.u); + } + + /* + * 9. Read LMC(0..3)_DDR_PLL_CTL and wait for the result. + */ + + BDK_CSR_READ(node, BDK_LMCX_DDR_PLL_CTL(loop_interface_num)); + + /* + * 10. Wait a minimum of 200 ns. + */ + + bdk_wait_usec(1); /* Wait 1 us */ + + /* + * 11. Without changing any other LMC(0..3)_DDR_PLL_CTL values, write + * LMC(0..3)_DDR_PLL_CTL[DDR_DIV_RESET] = 0. + */ + + DRAM_CSR_MODIFY(c, node, BDK_LMCX_DDR_PLL_CTL(loop_interface_num), + c.cn83xx.ddr_div_reset = 0); + + /* + * 12. Read LMC(0..3)_DDR_PLL_CTL and wait for the result. + */ + + BDK_CSR_READ(node, BDK_LMCX_DDR_PLL_CTL(loop_interface_num)); + + /* + * 13. Wait a minimum of 200 ns. + */ + bdk_wait_usec(1); /* Wait 1 us */ + + } /* for (loop_interface_num = 0; loop_interface_num<4; ++loop_interface_num) */ + + } /* if (ddr_interface_num == 0) */ /* Do this once */ + + if (ddr_interface_num == 0) { /* Do this once */ + bdk_lmcx_dll_ctl3_t ddr_dll_ctl3; + + /* + * 6.9.4 LMC DRESET Initialization + * + * All of the DDR PLL, LMC global CK, and LMC interface enable + * initializations must be completed prior to starting this LMC DRESET + * initialization (Step 4). + * + * This LMC DRESET step is done for all enabled LMCs. + * + * There are special constraints on the ordering of DRESET initialization + * (Steps 4) and CK local initialization (Step 5) whenever CK local + * initialization must be executed. CK local initialization must be + * executed whenever the DDR PLL is being brought up (for each chip reset + * and whenever the DDR clock speed changes). + * + * When Step 5 must be executed in the two-LMC mode case: + *  LMC0 DRESET initialization must occur before Step 5. + *  LMC1 DRESET initialization must occur after Step 5. + * + * When Step 5 must be executed in the four-LMC mode case: + *  LMC2 and LMC3 DRESET initialization must occur before Step 5. + *  LMC0 and LMC1 DRESET initialization must occur after Step 5. + */ + + if ((ddr_interface_mask == 0x1) || (ddr_interface_mask == 0x3)) { + /* ONE-LMC MODE FOR 81XX AND 83XX BEFORE STEP 5 */ + /* TWO-LMC MODE BEFORE STEP 5 */ + cn78xx_lmc_dreset_init(node, 0); + + } else if (ddr_interface_mask == 0xf) { + /* FOUR-LMC MODE BEFORE STEP 5 */ + cn78xx_lmc_dreset_init(node, 2); + cn78xx_lmc_dreset_init(node, 3); + } + + /* + * 6.9.5 LMC CK Local Initialization + * + * All of DDR PLL, LMC global CK, and LMC interface-enable + * initializations must be completed prior to starting this LMC CK local + * initialization (Step 5). + * + * LMC CK Local initialization must be performed for each chip reset and + * whenever the DDR clock speed changes. This step needs to be performed + * only once, not once per LMC. + * + * There are special constraints on the ordering of DRESET initialization + * (Steps 4) and CK local initialization (Step 5) whenever CK local + * initialization must be executed. CK local initialization must be + * executed whenever the DDR PLL is being brought up (for each chip reset + * and whenever the DDR clock speed changes). + * + * When Step 5 must be executed in the two-LMC mode case: + *  LMC0 DRESET initialization must occur before Step 5. + *  LMC1 DRESET initialization must occur after Step 5. + * + * When Step 5 must be executed in the four-LMC mode case: + *  LMC2 and LMC3 DRESET initialization must occur before Step 5. + *  LMC0 and LMC1 DRESET initialization must occur after Step 5. + * + * LMC CK local initialization is different depending on whether two-LMC + * or four-LMC modes are desired. + */ + + if (ddr_interface_mask == 0x3) { + /* + * 6.9.5.1 LMC CK Local Initialization for Two-LMC Mode + * + * 1. Write LMC0_DLL_CTL3 to its reset value. (Note that + * LMC0_DLL_CTL3[DLL_90_BYTE_SEL] = 0x2 .. 0x8 should also work.) + */ + + ddr_dll_ctl3.u = 0; + ddr_dll_ctl3.s.dclk90_recal_dis = 1; + ddr_dll_ctl3.s.dll90_byte_sel = 1; + DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(0), ddr_dll_ctl3.u); + + /* + * 2. Read LMC0_DLL_CTL3 and wait for the result. + */ + + BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(0)); + + /* + * 3. Without changing any other fields in LMC0_DLL_CTL3, write + * LMC0_DLL_CTL3[DCLK90_FWD] = 1. Writing LMC0_DLL_CTL3[DCLK90_FWD] = 1 + * causes clock-delay information to be forwarded from LMC0 to LMC1. + */ + + ddr_dll_ctl3.s.dclk90_fwd = 1; + DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(0), ddr_dll_ctl3.u); + + /* + * 4. Read LMC0_DLL_CTL3 and wait for the result. + */ + + BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(0)); + } /* if (ddr_interface_mask == 0x3) */ + + if (ddr_interface_mask == 0xf) { + /* + * 6.9.5.2 LMC CK Local Initialization for Four-LMC Mode + * + * 1. Write LMC2_DLL_CTL3 to its reset value except + * LMC2_DLL_CTL3[DLL90_BYTE_SEL] = 0x7. + */ + + ddr_dll_ctl3.u = 0; + ddr_dll_ctl3.s.dclk90_recal_dis = 1; + ddr_dll_ctl3.s.dll90_byte_sel = 7; + DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(2), ddr_dll_ctl3.u); + + /* + * 2. Write LMC3_DLL_CTL3 to its reset value except + * LMC3_DLL_CTL3[DLL90_BYTE_SEL] = 0x0. + */ + + ddr_dll_ctl3.u = 0; + ddr_dll_ctl3.s.dclk90_recal_dis = 1; + ddr_dll_ctl3.s.dll90_byte_sel = 0; /* HRM wants 0, not 2 */ + DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(3), ddr_dll_ctl3.u); /* HRM wants LMC3 */ + + /* + * 3. Read LMC3_DLL_CTL3 and wait for the result. + */ + + BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(3)); + + /* + * 4. Without changing any other fields in LMC2_DLL_CTL3, write + * LMC2_DLL_CTL3[DCLK90_FWD] = 1 and LMC2_DLL_CTL3[DCLK90_RECAL_DIS] = 1. + * Writing LMC2_DLL_CTL3[DCLK90_FWD] = 1 causes LMC 2 to forward + * clock-delay information to LMC0. Setting + * LMC2_DLL_CTL3[DCLK90_RECAL_DIS] to 1 prevents LMC2 from periodically + * recalibrating this delay information. + */ + + DRAM_CSR_MODIFY(c, node, BDK_LMCX_DLL_CTL3(2), + c.s.dclk90_fwd = 1; + c.s.dclk90_recal_dis = 1); + + /* + * 5. Without changing any other fields in LMC3_DLL_CTL3, write + * LMC3_DLL_CTL3[DCLK90_FWD] = 1 and LMC3_DLL_CTL3[DCLK90_RECAL_DIS] = 1. + * Writing LMC3_DLL_CTL3[DCLK90_FWD] = 1 causes LMC3 to forward + * clock-delay information to LMC1. Setting + * LMC3_DLL_CTL3[DCLK90_RECAL_DIS] to 1 prevents LMC3 from periodically + * recalibrating this delay information. + */ + + DRAM_CSR_MODIFY(c, node, BDK_LMCX_DLL_CTL3(3), + c.s.dclk90_fwd = 1; + c.s.dclk90_recal_dis = 1); + + /* + * 6. Read LMC3_DLL_CTL3 and wait for the result. + */ + + BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(3)); + } /* if (ddr_interface_mask == 0xf) */ + + + /* ONE-LMC MODE AFTER STEP 5 - NOTHING */ + + /* TWO-LMC MODE AFTER STEP 5 */ + if (ddr_interface_mask == 0x3) { + cn78xx_lmc_dreset_init(node, 1); + } + + /* FOUR-LMC MODE AFTER STEP 5 */ + if (ddr_interface_mask == 0xf) { + cn78xx_lmc_dreset_init(node, 0); + cn78xx_lmc_dreset_init(node, 1); + + /* Enable periodic recalibration of DDR90 delay line in. */ + DRAM_CSR_MODIFY(c, node, BDK_LMCX_DLL_CTL3(0), + c.s.dclk90_recal_dis = 0); + DRAM_CSR_MODIFY(c, node, BDK_LMCX_DLL_CTL3(1), + c.s.dclk90_recal_dis = 0); + } + + + /* Enable fine tune mode for all LMCs */ + for (int lmc = 0; lmc<4; ++lmc) { + if ((ddr_interface_mask & (1 << lmc)) == 0) + continue; + DRAM_CSR_MODIFY(c, node, BDK_LMCX_DLL_CTL3(lmc), + c.s.fine_tune_mode = 1); + } + + /* Enable the trim circuit on the appropriate channels to + adjust the DDR clock duty cycle for chips that support + it. */ + if (! CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X)) { // added 81xx and 83xx + bdk_lmcx_phy_ctl_t lmc_phy_ctl; + int loop_interface_num; + + for (loop_interface_num = 0; loop_interface_num<4; ++loop_interface_num) { + if ((ddr_interface_mask & (1 << loop_interface_num)) == 0) + continue; + + lmc_phy_ctl.u = BDK_CSR_READ(node, BDK_LMCX_PHY_CTL(loop_interface_num)); + lmc_phy_ctl.cn83xx.lv_mode = (~loop_interface_num) & 1; /* Odd LMCs = 0, Even LMCs = 1 */ + + ddr_print("LMC%d: PHY_CTL : 0x%016lx\n", + loop_interface_num, lmc_phy_ctl.u); + DRAM_CSR_WRITE(node, BDK_LMCX_PHY_CTL(loop_interface_num), lmc_phy_ctl.u); + } + } + + } /* Do this once */ + + } /* if (CAVIUM_IS_MODEL(CAVIUM_CN8XXX)) */ + + set_ddr_clock_initialized(node, ddr_interface_num, 1); + return(0); +} +void +perform_lmc_reset(bdk_node_t node, int ddr_interface_num) +{ + /* + * 6.9.6 LMC RESET Initialization + * + * The purpose of this step is to assert/deassert the RESET# pin at the + * DDR3/DDR4 parts. + * + * This LMC RESET step is done for all enabled LMCs. + * + * It may be appropriate to skip this step if the DDR3/DDR4 DRAM parts + * are in self refresh and are currently preserving their + * contents. (Software can determine this via + * LMC(0..3)_RESET_CTL[DDR3PSV] in some circumstances.) The remainder of + * this section assumes that the DRAM contents need not be preserved. + * + * The remainder of this section assumes that the CN78XX DDRn_RESET_L pin + * is attached to the RESET# pin of the attached DDR3/DDR4 parts, as will + * be appropriate in many systems. + * + * (In other systems, such as ones that can preserve DDR3/DDR4 part + * contents while CN78XX is powered down, it will not be appropriate to + * directly attach the CN78XX DDRn_RESET_L pin to DRESET# of the + * DDR3/DDR4 parts, and this section may not apply.) + * + * The remainder of this section describes the sequence for LMCn. + * + * Perform the following six substeps for LMC reset initialization: + * + * 1. If not done already, assert DDRn_RESET_L pin by writing + * LMC(0..3)_RESET_ CTL[DDR3RST] = 0 without modifying any other + * LMC(0..3)_RESET_CTL fields. + */ + + if ( !ddr_memory_preserved(node)) { + /* + * 2. Read LMC(0..3)_RESET_CTL and wait for the result. + */ + + BDK_CSR_READ(node, BDK_LMCX_RESET_CTL(ddr_interface_num)); + + /* + * 3. Wait until RESET# assertion-time requirement from JEDEC DDR3/DDR4 + * specification is satisfied (200 us during a power-on ramp, 100ns when + * power is already stable). + */ + + bdk_wait_usec(200); + + /* + * 4. Deassert DDRn_RESET_L pin by writing LMC(0..3)_RESET_CTL[DDR3RST] = 1 + * without modifying any other LMC(0..3)_RESET_CTL fields. + * 5. Read LMC(0..3)_RESET_CTL and wait for the result. + * 6. Wait a minimum of 500us. This guarantees the necessary T = 500us + * delay between DDRn_RESET_L deassertion and DDRn_DIMM*_CKE* assertion. + */ + cn88xx_lmc_ddr3_reset(node, ddr_interface_num, LMC_DDR3_RESET_DEASSERT); + + /* Toggle Reset Again */ + /* That is, assert, then de-assert, one more time */ + cn88xx_lmc_ddr3_reset(node, ddr_interface_num, LMC_DDR3_RESET_ASSERT); + cn88xx_lmc_ddr3_reset(node, ddr_interface_num, LMC_DDR3_RESET_DEASSERT); + + } /* if ( !ddr_memory_preserved(node)) */ +} + +/////////////////////////////////////////////////////////// +// start of DBI switchover + +/* first pattern example: + GENERAL_PURPOSE0.DATA == 64'h00ff00ff00ff00ff; + GENERAL_PURPOSE1.DATA == 64'h00ff00ff00ff00ff; + GENERAL_PURPOSE0.DATA == 16'h0000; +*/ +const uint64_t dbi_pattern[3] = { 0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL, 0x0000ULL }; + +// Perform switchover to DBI +static void dbi_switchover_interface(int node, int lmc) +{ + bdk_lmcx_modereg_params0_t modereg_params0; + bdk_lmcx_modereg_params3_t modereg_params3; + bdk_lmcx_phy_ctl_t phy_ctl; + bdk_lmcx_config_t lmcx_config; + bdk_lmcx_ddr_pll_ctl_t ddr_pll_ctl; + int rank_mask, rankx, active_ranks; + uint64_t phys_addr, rank_offset; + int num_lmcs, errors; + int dbi_settings[9], byte, unlocked, retries; + int ecc_ena; + int rank_max = 1; // FIXME: make this 4 to try all the ranks + + ddr_pll_ctl.u = BDK_CSR_READ(node, BDK_LMCX_DDR_PLL_CTL(0)); + + lmcx_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(lmc)); + rank_mask = lmcx_config.s.init_status; + ecc_ena = lmcx_config.s.ecc_ena; + + // FIXME: must filter out any non-supported configs + // ie, no DDR3, no x4 devices, no 81XX + if ((ddr_pll_ctl.cn83xx.ddr4_mode == 0) || + (lmcx_config.s.mode_x4dev == 1) || + CAVIUM_IS_MODEL(CAVIUM_CN81XX) ) + { + ddr_print("N%d.LMC%d: DBI switchover: inappropriate device; EXITING...\n", + node, lmc); + return; + } + + // this should be correct for 1 or 2 ranks, 1 or 2 DIMMs + num_lmcs = __bdk_dram_get_num_lmc(node); + rank_offset = 1ull << (28 + lmcx_config.s.pbank_lsb - lmcx_config.s.rank_ena + (num_lmcs/2)); + + ddr_print("N%d.LMC%d: DBI switchover: rank mask 0x%x, rank size 0x%016llx.\n", + node, lmc, rank_mask, (unsigned long long)rank_offset); + + /* 1. conduct the current init sequence as usual all the way + after software write leveling. + */ + + read_DAC_DBI_settings(node, lmc, /*DBI*/0, dbi_settings); + + display_DAC_DBI_settings(node, lmc, /* DBI */0, ecc_ena, dbi_settings, " INIT"); + + /* 2. set DBI related CSRs as below and issue MR write. + MODEREG_PARAMS3.WR_DBI=1 + MODEREG_PARAMS3.RD_DBI=1 + PHY_CTL.DBI_MODE_ENA=1 + */ + modereg_params0.u = BDK_CSR_READ(node, BDK_LMCX_MODEREG_PARAMS0(lmc)); + + modereg_params3.u = BDK_CSR_READ(node, BDK_LMCX_MODEREG_PARAMS3(lmc)); + modereg_params3.s.wr_dbi = 1; + modereg_params3.s.rd_dbi = 1; + DRAM_CSR_WRITE(node, BDK_LMCX_MODEREG_PARAMS3(lmc), modereg_params3.u); + + phy_ctl.u = BDK_CSR_READ(node, BDK_LMCX_PHY_CTL(lmc)); + phy_ctl.s.dbi_mode_ena = 1; + DRAM_CSR_WRITE(node, BDK_LMCX_PHY_CTL(lmc), phy_ctl.u); + + /* + there are two options for data to send. Lets start with (1) and could move to (2) in the future: + + 1) DBTRAIN_CTL[LFSR_PATTERN_SEL] = 0 (or for older chips where this does not exist) + set data directly in these reigsters. this will yield a clk/2 pattern: + GENERAL_PURPOSE0.DATA == 64'h00ff00ff00ff00ff; + GENERAL_PURPOSE1.DATA == 64'h00ff00ff00ff00ff; + GENERAL_PURPOSE0.DATA == 16'h0000; + 2) DBTRAIN_CTL[LFSR_PATTERN_SEL] = 1 + here data comes from the LFSR generating a PRBS pattern + CHAR_CTL.EN = 0 + CHAR_CTL.SEL = 0; // for PRBS + CHAR_CTL.DR = 1; + CHAR_CTL.PRBS = setup for whatever type of PRBS to send + CHAR_CTL.SKEW_ON = 1; + */ + DRAM_CSR_WRITE(node, BDK_LMCX_GENERAL_PURPOSE0(lmc), dbi_pattern[0]); + DRAM_CSR_WRITE(node, BDK_LMCX_GENERAL_PURPOSE1(lmc), dbi_pattern[1]); + DRAM_CSR_WRITE(node, BDK_LMCX_GENERAL_PURPOSE2(lmc), dbi_pattern[2]); + + /* + 3. adjust cas_latency (only necessary if RD_DBI is set). + here is my code for doing this: + + if (csr_model.MODEREG_PARAMS3.RD_DBI.value == 1) begin + case (csr_model.MODEREG_PARAMS0.CL.value) + 0,1,2,3,4: csr_model.MODEREG_PARAMS0.CL.value += 2; // CL 9-13 -> 11-15 + 5: begin + // CL=14, CWL=10,12 gets +2, CLW=11,14 gets +3 + if((csr_model.MODEREG_PARAMS0.CWL.value==1 || csr_model.MODEREG_PARAMS0.CWL.value==3)) + csr_model.MODEREG_PARAMS0.CL.value = 7; // 14->16 + else + csr_model.MODEREG_PARAMS0.CL.value = 13; // 14->17 + end + 6: csr_model.MODEREG_PARAMS0.CL.value = 8; // 15->18 + 7: csr_model.MODEREG_PARAMS0.CL.value = 14; // 16->19 + 8: csr_model.MODEREG_PARAMS0.CL.value = 15; // 18->21 + default: + `cn_fatal(("Error mem_cfg (%s) CL (%d) with RD_DBI=1, I am not sure what to do.", + mem_cfg, csr_model.MODEREG_PARAMS3.RD_DBI.value)) + endcase + end + */ + if (modereg_params3.s.rd_dbi == 1) { + int old_cl, new_cl, old_cwl; + + old_cl = modereg_params0.s.cl; + old_cwl = modereg_params0.s.cwl; + + switch (old_cl) { + case 0: case 1: case 2: case 3: case 4: new_cl = old_cl + 2; break; // 9-13->11-15 + // CL=14, CWL=10,12 gets +2, CLW=11,14 gets +3 + case 5: new_cl = ((old_cwl == 1) || (old_cwl == 3)) ? 7 : 13; break; + case 6: new_cl = 8; break; // 15->18 + case 7: new_cl = 14; break; // 16->19 + case 8: new_cl = 15; break; // 18->21 + default: + error_print("ERROR: Bad CL value (%d) for DBI switchover.\n", old_cl); + // FIXME: need to error exit here... + old_cl = -1; + new_cl = -1; + break; + } + ddr_print("N%d.LMC%d: DBI switchover: CL ADJ: old_cl 0x%x, old_cwl 0x%x, new_cl 0x%x.\n", + node, lmc, old_cl, old_cwl, new_cl); + modereg_params0.s.cl = new_cl; + DRAM_CSR_WRITE(node, BDK_LMCX_MODEREG_PARAMS0(lmc), modereg_params0.u); + } + + /* + 4. issue MRW to MR0 (CL) and MR5 (DBI), using LMC sequence SEQ_CTL[SEQ_SEL] = MRW. + */ + // Use the default values, from the CSRs fields + // also, do B-sides for RDIMMs... + + for (rankx = 0; rankx < 4; rankx++) { + if (!(rank_mask & (1 << rankx))) + continue; + + // for RDIMMs, B-side writes should get done automatically when the A-side is written + ddr4_mrw(node, lmc, rankx, -1/* use_default*/, 0/*MRreg*/, 0 /*A-side*/); /* MR0 */ + ddr4_mrw(node, lmc, rankx, -1/* use_default*/, 5/*MRreg*/, 0 /*A-side*/); /* MR5 */ + + } /* for (rankx = 0; rankx < 4; rankx++) */ + + /* + 5. conduct DBI bit deskew training via the General Purpose R/W sequence (dbtrain). + may need to run this over and over to get a lock (I need up to 5 in simulation): + SEQ_CTL[SEQ_SEL] = RW_TRAINING (15) + DBTRAIN_CTL.CMD_COUNT_EXT = all 1's + DBTRAIN_CTL.READ_CMD_COUNT = all 1's + DBTRAIN_CTL.TCCD_SEL = set according to MODEREG_PARAMS3[TCCD_L] + DBTRAIN_CTL.RW_TRAIN = 1 + DBTRAIN_CTL.READ_DQ_COUNT = dont care + DBTRAIN_CTL.WRITE_ENA = 1; + DBTRAIN_CTL.ACTIVATE = 1; + DBTRAIN_CTL LRANK, PRANK, ROW_A, BG, BA, COLUMN_A = set to a valid address + */ + + // NOW - do the training + ddr_print("N%d.LMC%d: DBI switchover: TRAINING begins...\n", + node, lmc); + + active_ranks = 0; + for (rankx = 0; rankx < rank_max; rankx++) { + if (!(rank_mask & (1 << rankx))) + continue; + + phys_addr = rank_offset * active_ranks; + // FIXME: now done by test_dram_byte_hw() + //phys_addr |= (lmc << 7); + //phys_addr = bdk_numa_get_address(node, phys_addr); // map to node + + active_ranks++; + + retries = 0; + +#if 0 + phy_ctl.u = BDK_CSR_READ(node, BDK_LMCX_PHY_CTL(lmc)); + phy_ctl.s.phy_reset = 1; // FIXME: this may reset too much? + DRAM_CSR_WRITE(node, BDK_LMCX_PHY_CTL(lmc), phy_ctl.u); +#endif + +restart_training: + + // NOTE: return is a bitmask of the erroring bytelanes - we only print it + errors = test_dram_byte_hw(node, lmc, phys_addr, DBTRAIN_DBI, NULL); + + ddr_print("N%d.LMC%d: DBI switchover: TEST: rank %d, phys_addr 0x%lx, errors 0x%x.\n", + node, lmc, rankx, phys_addr, errors); + + // NEXT - check for locking + unlocked = 0; + read_DAC_DBI_settings(node, lmc, /*DBI*/0, dbi_settings); + + for (byte = 0; byte < (8+ecc_ena); byte++) { + unlocked += (dbi_settings[byte] & 1) ^ 1; + } + + // FIXME: print out the DBI settings array after each rank? + if (rank_max > 1) // only when doing more than 1 rank + display_DAC_DBI_settings(node, lmc, /* DBI */0, ecc_ena, dbi_settings, " RANK"); + + if (unlocked > 0) { + ddr_print("N%d.LMC%d: DBI switchover: LOCK: %d still unlocked.\n", + node, lmc, unlocked); + + retries++; + if (retries < 10) { + goto restart_training; + } else { + ddr_print("N%d.LMC%d: DBI switchover: LOCK: %d retries exhausted.\n", + node, lmc, retries); + } + } + } /* for (rankx = 0; rankx < rank_max; rankx++) */ + + // print out the final DBI settings array + display_DAC_DBI_settings(node, lmc, /* DBI */0, ecc_ena, dbi_settings, "FINAL"); +} +// end of DBI switchover +/////////////////////////////////////////////////////////// + +uint32_t measure_octeon_ddr_clock(bdk_node_t node, + const ddr_configuration_t *ddr_configuration, + uint32_t cpu_hertz, + uint32_t ddr_hertz, + uint32_t ddr_ref_hertz, + int ddr_interface_num, + uint32_t ddr_interface_mask) +{ + uint64_t core_clocks; + uint64_t ddr_clocks; + uint64_t calc_ddr_hertz; + + if (ddr_configuration) { + if (initialize_ddr_clock(node, + ddr_configuration, + cpu_hertz, + ddr_hertz, + ddr_ref_hertz, + ddr_interface_num, + ddr_interface_mask) != 0) + return 0; + } + + /* Dynamically determine the DDR clock speed */ + core_clocks = bdk_clock_get_count(BDK_CLOCK_TIME); + ddr_clocks = BDK_CSR_READ(node, BDK_LMCX_DCLK_CNT(ddr_interface_num)); + bdk_wait_usec(100000); /* 100ms */ + ddr_clocks = BDK_CSR_READ(node, BDK_LMCX_DCLK_CNT(ddr_interface_num)) - ddr_clocks; + core_clocks = bdk_clock_get_count(BDK_CLOCK_TIME) - core_clocks; + calc_ddr_hertz = ddr_clocks * bdk_clock_get_rate(bdk_numa_local(), BDK_CLOCK_TIME) / core_clocks; + + /* Asim doesn't have a DDR clock, force the measurement to be correct */ + if (bdk_is_platform(BDK_PLATFORM_ASIM)) + calc_ddr_hertz = ddr_hertz; + + ddr_print("LMC%d: Measured DDR clock: %lu, cpu clock: %u, ddr clocks: %lu\n", + ddr_interface_num, calc_ddr_hertz, cpu_hertz, ddr_clocks); + + /* Check for unreasonable settings. */ + if (calc_ddr_hertz == 0) { + error_print("DDR clock misconfigured. Exiting.\n"); + exit(1); + } + return calc_ddr_hertz; +} + +int octeon_ddr_initialize(bdk_node_t node, + uint32_t cpu_hertz, + uint32_t ddr_hertz, + uint32_t ddr_ref_hertz, + uint32_t ddr_interface_mask, + const ddr_configuration_t *ddr_configuration, + uint32_t *measured_ddr_hertz, + int board_type, + int board_rev_maj, + int board_rev_min) +{ + uint32_t ddr_config_valid_mask = 0; + int memsize_mbytes = 0; + const char *s; + int retval; + int interface_index; + uint32_t ddr_max_speed = 1210000000; /* needs to be this high for DDR4 */ + uint32_t calc_ddr_hertz = -1; + +#ifndef OCTEON_SDK_VERSION_STRING +# define OCTEON_SDK_VERSION_STRING "Development Build" +#endif + + ddr_print(OCTEON_SDK_VERSION_STRING": $Revision: 102369 $\n"); + +#ifdef CAVIUM_ONLY + /* Override speed restrictions to support internal testing. */ + ddr_max_speed = 1210000000; +#endif /* CAVIUM_ONLY */ + + if (ddr_hertz > ddr_max_speed) { + error_print("DDR clock speed %u exceeds maximum speed supported by " + "processor, reducing to %uHz\n", + ddr_hertz, ddr_max_speed); + ddr_hertz = ddr_max_speed; + } + + // Do this earlier so we can return without doing unnecessary things... + /* Check for DIMM 0 socket populated for each LMC present */ + for (interface_index = 0; interface_index < 4; ++interface_index) { + if ((ddr_interface_mask & (1 << interface_index)) && + (validate_dimm(node, &ddr_configuration[interface_index].dimm_config_table[0])) == 1) + { + ddr_config_valid_mask |= (1 << interface_index); + } + } + + if (CAVIUM_IS_MODEL(CAVIUM_CN88XX)) { + int four_lmc_mode = 1; + + // Validate that it can only be 2-LMC mode or 4-LMC mode + if ((ddr_config_valid_mask != 0x03) && (ddr_config_valid_mask != 0x0f)) { + puts("ERROR: Invalid LMC configuration detected.\n"); + return -1; + } + + if ((s = lookup_env_parameter("ddr_four_lmc")) != NULL) + four_lmc_mode = !!strtoul(s, NULL, 0); + + if (!four_lmc_mode) { + puts("Forcing two-LMC Mode.\n"); + ddr_config_valid_mask &= ~(3<<2); /* Invalidate LMC[2:3] */ + } + } + + if (!ddr_config_valid_mask) { + puts("ERROR: No valid DIMMs detected on any DDR interface.\n"); + return -1; + } + + { + /* + + rdf_cnt: Defines the sample point of the LMC response data in + the DDR-clock/core-clock crossing. For optimal + performance set to 10 * (DDR-clock period/core-clock + period) - 1. To disable set to 0. All other values + are reserved. + */ + + uint64_t rdf_cnt; + BDK_CSR_INIT(l2c_ctl, node, BDK_L2C_CTL); + /* It is more convenient to compute the ratio using clock + frequencies rather than clock periods. */ + rdf_cnt = (((uint64_t) 10 * cpu_hertz) / ddr_hertz) - 1; + rdf_cnt = rdf_cnt<256 ? rdf_cnt : 255; + l2c_ctl.s.rdf_cnt = rdf_cnt; + + if ((s = lookup_env_parameter("early_fill_count")) != NULL) + l2c_ctl.s.rdf_cnt = strtoul(s, NULL, 0); + + ddr_print("%-45s : %d, cpu_hertz:%u, ddr_hertz:%u\n", "EARLY FILL COUNT ", + l2c_ctl.s.rdf_cnt, cpu_hertz, ddr_hertz); + DRAM_CSR_WRITE(node, BDK_L2C_CTL, l2c_ctl.u); + } + + /* Check to see if we should limit the number of L2 ways. */ + if ((s = lookup_env_parameter("limit_l2_ways")) != NULL) { + int ways = strtoul(s, NULL, 10); + limit_l2_ways(node, ways, 1); + } + + /* We measure the DDR frequency by counting DDR clocks. We can + * confirm or adjust the expected frequency as necessary. We use + * the measured frequency to make accurate timing calculations + * used to configure the controller. + */ + for (interface_index = 0; interface_index < 4; ++interface_index) { + uint32_t tmp_hertz; + + if (! (ddr_config_valid_mask & (1 << interface_index))) + continue; + + try_again: + // if we are LMC0 + if (interface_index == 0) { + // if we are asking for 100 MHz refclk, we can only get it via alternate, so switch to it + if (ddr_ref_hertz == 100000000) { + DRAM_CSR_MODIFY(c, node, BDK_LMCX_DDR_PLL_CTL(0), c.s.dclk_alt_refclk_sel = 1); + bdk_wait_usec(1000); // wait 1 msec + } else { + // if we are NOT asking for 100MHz, then reset to (assumed) 50MHz and go on + DRAM_CSR_MODIFY(c, node, BDK_LMCX_DDR_PLL_CTL(0), c.s.dclk_alt_refclk_sel = 0); + bdk_wait_usec(1000); // wait 1 msec + } + } + + tmp_hertz = measure_octeon_ddr_clock(node, + &ddr_configuration[interface_index], + cpu_hertz, + ddr_hertz, + ddr_ref_hertz, + interface_index, + ddr_config_valid_mask); + + // if we are LMC0 and we are asked for 100 MHz refclk, + // we must be sure it is available + // If not, we print an error message, set to 50MHz, and go on... + if ((interface_index == 0) && (ddr_ref_hertz == 100000000)) { + // validate that the clock returned is close enough to the clock desired + // FIXME: is 5% close enough? + int hertz_diff = _abs((int)tmp_hertz - (int)ddr_hertz); + if (hertz_diff > ((int)ddr_hertz * 5 / 100)) { // nope, diff is greater than than 5% + ddr_print("N%d: DRAM init: requested 100 MHz refclk NOT FOUND\n", node); + ddr_ref_hertz = bdk_clock_get_rate(node, BDK_CLOCK_MAIN_REF); + set_ddr_clock_initialized(node, 0, 0); // clear the flag before trying again!! + goto try_again; + } else { + ddr_print("N%d: DRAM Init: requested 100 MHz refclk FOUND and SELECTED.\n", node); + } + } + + if (tmp_hertz > 0) + calc_ddr_hertz = tmp_hertz; + + } /* for (interface_index = 0; interface_index < 4; ++interface_index) */ + + if (measured_ddr_hertz) + *measured_ddr_hertz = calc_ddr_hertz; + + memsize_mbytes = 0; + for (interface_index = 0; interface_index < 4; ++interface_index) { + if (! (ddr_config_valid_mask & (1 << interface_index))) { // if LMC has no DIMMs found + if (ddr_interface_mask & (1 << interface_index)) { // but the LMC is present + for (int i = 0; i < DDR_CFG_T_MAX_DIMMS; i++) { + // check for slot presence + if (validate_dimm(node, &ddr_configuration[interface_index].dimm_config_table[i]) == 0) + printf("N%d.LMC%d.DIMM%d: Not Present\n", node, interface_index, i); + } + error_print("N%d.LMC%d Configuration Completed: 0 MB\n", node, interface_index); + } + continue; + } + + retval = init_octeon_dram_interface(node, + &ddr_configuration[interface_index], + calc_ddr_hertz, /* Configure using measured value */ + cpu_hertz, + ddr_ref_hertz, + board_type, + board_rev_maj, + board_rev_min, + interface_index, + ddr_config_valid_mask); + if (retval > 0) + memsize_mbytes += retval; + } + + if (memsize_mbytes == 0) + /* All interfaces failed to initialize, so return error */ + return -1; + + // switch over to DBI mode only for chips that support it, and enabled by envvar + if (! CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X)) { // added 81xx and 83xx + int do_dbi = 0; + if ((s = lookup_env_parameter("ddr_dbi_switchover")) != NULL) { + do_dbi = !!strtoul(s, NULL, 10); + } + if (do_dbi) { + ddr_print("DBI Switchover starting...\n"); + for (interface_index = 0; interface_index < 4; ++interface_index) { + if (! (ddr_config_valid_mask & (1 << interface_index))) + continue; + dbi_switchover_interface(node, interface_index); + } + printf("DBI Switchover finished.\n"); + } + } + + // limit memory size if desired... + if ((s = lookup_env_parameter("limit_dram_mbytes")) != NULL) { + unsigned int mbytes = strtoul(s, NULL, 10); + if (mbytes > 0) { + memsize_mbytes = mbytes; + printf("Limiting DRAM size to %d MBytes based on limit_dram_mbytes env. variable\n", + mbytes); + } + } + + return memsize_mbytes; +} + diff --git a/src/vendorcode/cavium/bdk/libdram/lib_octeon_shared.h b/src/vendorcode/cavium/bdk/libdram/lib_octeon_shared.h new file mode 100644 index 0000000000..b691e5286b --- /dev/null +++ b/src/vendorcode/cavium/bdk/libdram/lib_octeon_shared.h @@ -0,0 +1,124 @@ +/***********************license start*********************************** +* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights +* reserved. +* +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are +* met: +* +* * Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* +* * Redistributions in binary form must reproduce the above +* copyright notice, this list of conditions and the following +* disclaimer in the documentation and/or other materials provided +* with the distribution. +* +* * Neither the name of Cavium Inc. nor the names of +* its contributors may be used to endorse or promote products +* derived from this software without specific prior written +* permission. +* +* This Software, including technical data, may be subject to U.S. export +* control laws, including the U.S. Export Administration Act and its +* associated regulations, and may be subject to export or import +* regulations in other countries. +* +* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS" +* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR +* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT +* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY +* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT +* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES +* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR +* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT, +* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK +* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU. +***********************license end**************************************/ + +extern const dimm_odt_config_t disable_odt_config[]; + +#define rttnom_none 0 /* Rtt_Nom disabled */ +#define rttnom_60ohm 1 /* RZQ/4 = 240/4 = 60 ohms */ +#define rttnom_120ohm 2 /* RZQ/2 = 240/2 = 120 ohms */ +#define rttnom_40ohm 3 /* RZQ/6 = 240/6 = 40 ohms */ +#define rttnom_20ohm 4 /* RZQ/12 = 240/12 = 20 ohms */ +#define rttnom_30ohm 5 /* RZQ/8 = 240/8 = 30 ohms */ +#define rttnom_rsrv1 6 /* Reserved */ +#define rttnom_rsrv2 7 /* Reserved */ + +#define rttwr_none 0 /* Dynamic ODT off */ +#define rttwr_60ohm 1 /* RZQ/4 = 240/4 = 60 ohms */ +#define rttwr_120ohm 2 /* RZQ/2 = 240/2 = 120 ohms */ +#define rttwr_rsrv1 3 /* Reserved */ + +#define dic_40ohm 0 /* RZQ/6 = 240/6 = 40 ohms */ +#define dic_34ohm 1 /* RZQ/7 = 240/7 = 34 ohms */ + +#define driver_24_ohm 1 +#define driver_27_ohm 2 +#define driver_30_ohm 3 +#define driver_34_ohm 4 +#define driver_40_ohm 5 +#define driver_48_ohm 6 +#define driver_60_ohm 7 + +#define rodt_ctl_none 0 +#define rodt_ctl_20_ohm 1 +#define rodt_ctl_30_ohm 2 +#define rodt_ctl_40_ohm 3 +#define rodt_ctl_60_ohm 4 +#define rodt_ctl_120_ohm 5 + +#define ddr4_rttnom_none 0 /* Rtt_Nom disabled */ +#define ddr4_rttnom_60ohm 1 /* RZQ/4 = 240/4 = 60 ohms */ +#define ddr4_rttnom_120ohm 2 /* RZQ/2 = 240/2 = 120 ohms */ +#define ddr4_rttnom_40ohm 3 /* RZQ/6 = 240/6 = 40 ohms */ +#define ddr4_rttnom_240ohm 4 /* RZQ/1 = 240/1 = 240 ohms */ +#define ddr4_rttnom_48ohm 5 /* RZQ/5 = 240/5 = 48 ohms */ +#define ddr4_rttnom_80ohm 6 /* RZQ/3 = 240/3 = 80 ohms */ +#define ddr4_rttnom_34ohm 7 /* RZQ/7 = 240/7 = 34 ohms */ + +#define ddr4_rttwr_none 0 /* Dynamic ODT off */ +#define ddr4_rttwr_120ohm 1 /* RZQ/2 = 240/2 = 120 ohms */ +#define ddr4_rttwr_240ohm 2 /* RZQ/1 = 240/1 = 240 ohms */ +#define ddr4_rttwr_HiZ 3 /* HiZ */ +/* This setting will be available for cn78xx cn88xx pass 2 and cn73xx + pass 1. It is disabled for now. */ +//#define ddr4_rttwr_80ohm 4 /* RZQ/3 = 240/3 = 80 ohms */ + +#define ddr4_dic_34ohm 0 /* RZQ/7 = 240/7 = 34 ohms */ +#define ddr4_dic_48ohm 1 /* RZQ/5 = 240/5 = 48 ohms */ + +#define ddr4_rttpark_none 0 /* Rtt_Park disabled */ +#define ddr4_rttpark_60ohm 1 /* RZQ/4 = 240/4 = 60 ohms */ +#define ddr4_rttpark_120ohm 2 /* RZQ/2 = 240/2 = 120 ohms */ +#define ddr4_rttpark_40ohm 3 /* RZQ/6 = 240/6 = 40 ohms */ +#define ddr4_rttpark_240ohm 4 /* RZQ/1 = 240/1 = 240 ohms */ +#define ddr4_rttpark_48ohm 5 /* RZQ/5 = 240/5 = 48 ohms */ +#define ddr4_rttpark_80ohm 6 /* RZQ/3 = 240/3 = 80 ohms */ +#define ddr4_rttpark_34ohm 7 /* RZQ/7 = 240/7 = 34 ohms */ + +#define ddr4_driver_26_ohm 2 +#define ddr4_driver_30_ohm 3 +#define ddr4_driver_34_ohm 4 +#define ddr4_driver_40_ohm 5 +#define ddr4_driver_48_ohm 6 + +#define ddr4_dqx_driver_24_ohm 1 +#define ddr4_dqx_driver_27_ohm 2 +#define ddr4_dqx_driver_30_ohm 3 +#define ddr4_dqx_driver_34_ohm 4 +#define ddr4_dqx_driver_40_ohm 5 +#define ddr4_dqx_driver_48_ohm 6 +#define ddr4_dqx_driver_60_ohm 7 + +#define ddr4_rodt_ctl_none 0 +#define ddr4_rodt_ctl_40_ohm 1 +#define ddr4_rodt_ctl_60_ohm 2 +#define ddr4_rodt_ctl_80_ohm 3 +#define ddr4_rodt_ctl_120_ohm 4 +#define ddr4_rodt_ctl_240_ohm 5 +#define ddr4_rodt_ctl_34_ohm 6 +#define ddr4_rodt_ctl_48_ohm 7 diff --git a/src/vendorcode/cavium/bdk/libdram/libdram-config-load.c b/src/vendorcode/cavium/bdk/libdram/libdram-config-load.c new file mode 100644 index 0000000000..5173290187 --- /dev/null +++ b/src/vendorcode/cavium/bdk/libdram/libdram-config-load.c @@ -0,0 +1,262 @@ +/***********************license start*********************************** +* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights +* reserved. +* +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are +* met: +* +* * Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* +* * Redistributions in binary form must reproduce the above +* copyright notice, this list of conditions and the following +* disclaimer in the documentation and/or other materials provided +* with the distribution. +* +* * Neither the name of Cavium Inc. nor the names of +* its contributors may be used to endorse or promote products +* derived from this software without specific prior written +* permission. +* +* This Software, including technical data, may be subject to U.S. export +* control laws, including the U.S. Export Administration Act and its +* associated regulations, and may be subject to export or import +* regulations in other countries. +* +* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS" +* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR +* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT +* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY +* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT +* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES +* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR +* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT, +* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK +* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU. +***********************license end**************************************/ +#include <bdk.h> + +/** + * Load a "odt_*rank_config" structure + * + * @param cfg Config to fill + * @param ranks Number of ranks we're loading (1,2,4) + * @param node Node we're loading for + * @param dimm Which DIMM this is for + * @param lmc Which LMC this is for + */ +static void load_rank_data(dram_config_t *cfg, int ranks, int num_dimms, int lmc, bdk_node_t node) +{ + /* Get a pointer to the structure we are filling */ + dimm_odt_config_t *c; + switch (ranks) + { + case 1: + c = &cfg->config[lmc].odt_1rank_config[num_dimms - 1]; + break; + case 2: + c = &cfg->config[lmc].odt_2rank_config[num_dimms - 1]; + break; + case 4: + c = &cfg->config[lmc].odt_4rank_config[num_dimms - 1]; + break; + default: + bdk_fatal("Unexpected number of ranks\n"); + break; + } + + /* Fill the global items */ + c->odt_ena = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_DQX_CTL, ranks, num_dimms, lmc, node); + c->odt_mask = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_WODT_MASK, ranks, num_dimms, lmc, node); + + /* Fill the per rank items */ + int rank = 0; + c->odt_mask1.s.pasr_00 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_PASR, ranks, num_dimms, rank, lmc, node); + c->odt_mask1.s.asr_00 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_ASR, ranks, num_dimms, rank, lmc, node); + c->odt_mask1.s.srt_00 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_SRT, ranks, num_dimms, rank, lmc, node); + c->odt_mask1.s.rtt_wr_00 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_RTT_WR, ranks, num_dimms, rank, lmc, node); + c->odt_mask1.s.rtt_wr_00_ext = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_RTT_WR, ranks, num_dimms, rank, lmc, node) >> 2; + c->odt_mask1.s.dic_00 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_DIC, ranks, num_dimms, rank, lmc, node); + c->odt_mask1.s.rtt_nom_00 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_RTT_NOM, ranks, num_dimms, rank, lmc, node); + c->odt_mask1.s.db_output_impedance = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_DB_OUTPUT_IMPEDANCE, ranks, num_dimms, lmc, node); + rank = 1; + c->odt_mask1.s.pasr_01 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_PASR, ranks, num_dimms, rank, lmc, node); + c->odt_mask1.s.asr_01 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_ASR, ranks, num_dimms, rank, lmc, node); + c->odt_mask1.s.srt_01 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_SRT, ranks, num_dimms, rank, lmc, node); + c->odt_mask1.s.rtt_wr_01 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_RTT_WR, ranks, num_dimms, rank, lmc, node); + c->odt_mask1.s.rtt_wr_01_ext = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_RTT_WR, ranks, num_dimms, rank, lmc, node) >> 2; + c->odt_mask1.s.dic_01 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_DIC, ranks, num_dimms, rank, lmc, node); + c->odt_mask1.s.rtt_nom_01 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_RTT_NOM, ranks, num_dimms, rank, lmc, node); + rank = 2; + c->odt_mask1.s.pasr_10 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_PASR, ranks, num_dimms, rank, lmc, node); + c->odt_mask1.s.asr_10 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_ASR, ranks, num_dimms, rank, lmc, node); + c->odt_mask1.s.srt_10 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_SRT, ranks, num_dimms, rank, lmc, node); + c->odt_mask1.s.rtt_wr_10 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_RTT_WR, ranks, num_dimms, rank, lmc, node); + c->odt_mask1.s.rtt_wr_10_ext = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_RTT_WR, ranks, num_dimms, rank, lmc, node) >> 2; + c->odt_mask1.s.dic_10 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_DIC, ranks, num_dimms, rank, lmc, node); + c->odt_mask1.s.rtt_nom_10 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_RTT_NOM, ranks, num_dimms, rank, lmc, node); + rank = 3; + c->odt_mask1.s.pasr_11 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_PASR, ranks, num_dimms, rank, lmc, node); + c->odt_mask1.s.asr_11 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_ASR, ranks, num_dimms, rank, lmc, node); + c->odt_mask1.s.srt_11 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_SRT, ranks, num_dimms, rank, lmc, node); + c->odt_mask1.s.rtt_wr_11 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_RTT_WR, ranks, num_dimms, rank, lmc, node); + c->odt_mask1.s.rtt_wr_11_ext = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_RTT_WR, ranks, num_dimms, rank, lmc, node) >> 2; + c->odt_mask1.s.dic_11 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_DIC, ranks, num_dimms, rank, lmc, node); + c->odt_mask1.s.rtt_nom_11 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_RTT_NOM, ranks, num_dimms, rank, lmc, node); + rank = 0; + c->odt_mask2.s.rtt_park_00 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE2_RTT_PARK, ranks, num_dimms, rank, lmc, node); + c->odt_mask2.s.vref_value_00 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE2_VREF_VALUE, ranks, num_dimms, rank, lmc, node); + c->odt_mask2.s.vref_range_00 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE2_VREF_RANGE, ranks, num_dimms, rank, lmc, node); + c->odt_mask2.s.vrefdq_train_en = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE2_VREFDQ_TRAIN_EN, ranks, num_dimms, lmc, node); + rank = 1; + c->odt_mask2.s.rtt_park_01 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE2_RTT_PARK, ranks, num_dimms, rank, lmc, node); + c->odt_mask2.s.vref_value_01 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE2_VREF_VALUE, ranks, num_dimms, rank, lmc, node); + c->odt_mask2.s.vref_range_01 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE2_VREF_RANGE, ranks, num_dimms, rank, lmc, node); + rank = 2; + c->odt_mask2.s.rtt_park_10 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE2_RTT_PARK, ranks, num_dimms, rank, lmc, node); + c->odt_mask2.s.vref_value_10 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE2_VREF_VALUE, ranks, num_dimms, rank, lmc, node); + c->odt_mask2.s.vref_range_10 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE2_VREF_RANGE, ranks, num_dimms, rank, lmc, node); + rank = 3; + c->odt_mask2.s.rtt_park_11 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE2_RTT_PARK, ranks, num_dimms, rank, lmc, node); + c->odt_mask2.s.vref_value_11 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE2_VREF_VALUE, ranks, num_dimms, rank, lmc, node); + c->odt_mask2.s.vref_range_11 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE2_VREF_RANGE, ranks, num_dimms, rank, lmc, node); + + /* Fill more global items */ + c->qs_dic = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_RODT_CTL, ranks, num_dimms, lmc, node); + c->rodt_ctl = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_RODT_MASK, ranks, num_dimms, lmc, node); +} + +/** + * Load a DRAM configuration based on the current bdk-config settings + * + * @param node Node the DRAM config is for + * + * @return Pointer to __libdram_global_cfg, a global structure. Returns NULL if bdk-config + * lacks information about DRAM. + */ +const dram_config_t *libdram_config_load(bdk_node_t node) +{ + dram_config_t *cfg = &__libdram_global_cfg; + const int MAX_LMCS = sizeof(cfg->config) / sizeof(cfg->config[0]); + + /* Make all fields for the node default to zero */ + memset(cfg, 0, sizeof(*cfg)); + + /* Fill the SPD data first as some parameters need to know the DRAM type + to lookup the correct values */ + for (int lmc = 0; lmc < MAX_LMCS; lmc++) + { + for (int dimm = 0; dimm < DDR_CFG_T_MAX_DIMMS; dimm++) + { + int spd_addr = bdk_config_get_int(BDK_CONFIG_DDR_SPD_ADDR, dimm, lmc, node); + if (spd_addr) + { + cfg->config[lmc].dimm_config_table[dimm].spd_addr = spd_addr; + } + else + { + int spd_size; + const void *spd_data = bdk_config_get_blob(&spd_size, BDK_CONFIG_DDR_SPD_DATA, dimm, lmc, node); + if (spd_data && spd_size) + cfg->config[lmc].dimm_config_table[dimm].spd_ptr = spd_data; + } + } + } + + /* Check that we know how to get DIMM inofmration. If not, return failure */ + if (!cfg->config[0].dimm_config_table[0].spd_addr && !cfg->config[0].dimm_config_table[0].spd_ptr) + return NULL; + + cfg->name = "Loaded from bdk-config"; + for (int lmc = 0; lmc < MAX_LMCS; lmc++) + { + for (int num_dimms = 1; num_dimms <= DDR_CFG_T_MAX_DIMMS; num_dimms++) + { + load_rank_data(cfg, 1, num_dimms, lmc, node); + load_rank_data(cfg, 2, num_dimms, lmc, node); + load_rank_data(cfg, 4, num_dimms, lmc, node); + } + + ddr_configuration_t *c = &cfg->config[lmc]; + ddr3_custom_config_t *custom = &c->custom_lmc_config; + custom->min_rtt_nom_idx = bdk_config_get_int(BDK_CONFIG_DDR_CUSTOM_MIN_RTT_NOM_IDX, lmc, node); + custom->max_rtt_nom_idx = bdk_config_get_int(BDK_CONFIG_DDR_CUSTOM_MAX_RTT_NOM_IDX, lmc, node); + custom->min_rodt_ctl = bdk_config_get_int(BDK_CONFIG_DDR_CUSTOM_MIN_RODT_CTL, lmc, node); + custom->max_rodt_ctl = bdk_config_get_int(BDK_CONFIG_DDR_CUSTOM_MAX_RODT_CTL, lmc, node); + custom->ck_ctl = bdk_config_get_int(BDK_CONFIG_DDR_CUSTOM_CK_CTL, lmc, node); + custom->cmd_ctl = bdk_config_get_int(BDK_CONFIG_DDR_CUSTOM_CMD_CTL, lmc, node); + custom->ctl_ctl = bdk_config_get_int(BDK_CONFIG_DDR_CUSTOM_CTL_CTL, lmc, node); + custom->min_cas_latency = bdk_config_get_int(BDK_CONFIG_DDR_CUSTOM_MIN_CAS_LATENCY, lmc, node); + custom->offset_en = bdk_config_get_int(BDK_CONFIG_DDR_CUSTOM_OFFSET_EN, lmc, node); + custom->offset_udimm = bdk_config_get_int(BDK_CONFIG_DDR_CUSTOM_OFFSET, "UDIMM", lmc, node); + custom->offset_rdimm = bdk_config_get_int(BDK_CONFIG_DDR_CUSTOM_OFFSET, "RDIMM", lmc, node); + custom->rlevel_compute = bdk_config_get_int(BDK_CONFIG_DDR_CUSTOM_RLEVEL_COMPUTE, lmc, node); + custom->rlevel_comp_offset_udimm = bdk_config_get_int(BDK_CONFIG_DDR_CUSTOM_RLEVEL_COMP_OFFSET, "UDIMM", lmc, node); + custom->rlevel_comp_offset_rdimm = bdk_config_get_int(BDK_CONFIG_DDR_CUSTOM_RLEVEL_COMP_OFFSET, "RDIMM", lmc, node); + custom->ddr2t_udimm = bdk_config_get_int(BDK_CONFIG_DDR_CUSTOM_DDR2T, "UDIMM", lmc, node); + custom->ddr2t_rdimm = bdk_config_get_int(BDK_CONFIG_DDR_CUSTOM_DDR2T, "RDIMM", lmc, node); + custom->disable_sequential_delay_check = bdk_config_get_int(BDK_CONFIG_DDR_CUSTOM_DISABLE_SEQUENTIAL_DELAY_CHECK, lmc, node); + custom->maximum_adjacent_rlevel_delay_increment + = bdk_config_get_int(BDK_CONFIG_DDR_CUSTOM_MAXIMUM_ADJACENT_RLEVEL_DELAY_INCREMENT, lmc, node); + custom->parity = bdk_config_get_int(BDK_CONFIG_DDR_CUSTOM_PARITY, lmc, node); + custom->fprch2 = bdk_config_get_int(BDK_CONFIG_DDR_CUSTOM_FPRCH2, lmc, node); + custom->mode32b = bdk_config_get_int(BDK_CONFIG_DDR_CUSTOM_MODE32B, lmc, node); + custom->measured_vref = bdk_config_get_int(BDK_CONFIG_DDR_CUSTOM_MEASURED_VREF, lmc, node); + + /* CN80XX only supports 32bit mode */ + if (cavium_is_altpkg(CAVIUM_CN81XX)) + custom->mode32b = 1; + + /* Loop through 8 bytes, plus ecc byte */ + #define NUM_BYTES 9 /* Max bytes on LMC (8 plus ECC) */ + static int8_t dll_write_offset[NUM_BYTES]; + static int8_t dll_read_offset[NUM_BYTES]; + for (int b = 0; b < NUM_BYTES; b++) + { + dll_write_offset[b] = bdk_config_get_int(BDK_CONFIG_DDR_CUSTOM_DLL_WRITE_OFFSET, b, lmc, node); + dll_read_offset[b] = bdk_config_get_int(BDK_CONFIG_DDR_CUSTOM_DLL_READ_OFFSET, b, lmc, node); + } + custom->dll_write_offset = dll_write_offset; + custom->dll_read_offset = dll_read_offset; + } + + int is_ddr4 = (cfg->config[0].odt_1rank_config[0].odt_mask2.u != 0); + int speed = bdk_config_get_int(BDK_CONFIG_DDR_SPEED, node); + switch (speed) + { + case 0: // AUTO + cfg->ddr_clock_hertz = 0; + break; + case 800: + case 1600: + case 2400: + cfg->ddr_clock_hertz = (uint64_t)speed * 1000000 / 2; + break; + case 666: + cfg->ddr_clock_hertz = 333333333; + break; + case 1066: + cfg->ddr_clock_hertz = 533333333; + break; + case 1333: + cfg->ddr_clock_hertz = 666666666; + break; + case 1866: + if (is_ddr4) + cfg->ddr_clock_hertz = 940000000; + else + cfg->ddr_clock_hertz = 933333333; + break; + case 2133: + cfg->ddr_clock_hertz = 1050000000; + break; + default: + bdk_warn("Unsupported DRAM speed of %d MT/s\n", speed); + cfg->ddr_clock_hertz = speed * 1000000 / 2; + break; + } + + return cfg; +}; diff --git a/src/vendorcode/cavium/bdk/libdram/libdram.c b/src/vendorcode/cavium/bdk/libdram/libdram.c new file mode 100644 index 0000000000..b19486694c --- /dev/null +++ b/src/vendorcode/cavium/bdk/libdram/libdram.c @@ -0,0 +1,718 @@ +/***********************license start*********************************** +* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights +* reserved. +* +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are +* met: +* +* * Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* +* * Redistributions in binary form must reproduce the above +* copyright notice, this list of conditions and the following +* disclaimer in the documentation and/or other materials provided +* with the distribution. +* +* * Neither the name of Cavium Inc. nor the names of +* its contributors may be used to endorse or promote products +* derived from this software without specific prior written +* permission. +* +* This Software, including technical data, may be subject to U.S. export +* control laws, including the U.S. Export Administration Act and its +* associated regulations, and may be subject to export or import +* regulations in other countries. +* +* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS" +* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR +* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT +* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY +* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT +* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES +* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR +* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT, +* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK +* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU. +***********************license end**************************************/ +#include <bdk.h> +#include "libbdk-arch/bdk-csrs-mio_fus.h" +#include "dram-internal.h" + +/* This global variable is accessed through dram_is_verbose() to determine + the verbosity level. Use that function instead of setting it directly */ +dram_verbosity_t dram_verbosity = VBL_OFF; /* init this here so we could set a non-zero default */ + +static uint32_t measured_ddr_hertz[BDK_NUMA_MAX_NODES]; + +/* The various DRAM configs in the libdram/configs directory need space + to store the DRAM config. Since only one config is ever in active use + at a time, store the configs in __libdram_global_cfg. In a multi-node + setup, independent calls to get the DRAM config will load first node 0's + config, then node 1's */ +dram_config_t __libdram_global_cfg; + +static void bdk_dram_clear_mem(bdk_node_t node) +{ + if (!bdk_is_platform(BDK_PLATFORM_ASIM)) { + uint64_t mbytes = bdk_dram_get_size_mbytes(node); + uint64_t skip = (node == bdk_numa_master()) ? bdk_dram_get_top_of_bdk() : 0; + uint64_t len = (mbytes << 20) - skip; + + BDK_TRACE(DRAM, "N%d: Clearing DRAM\n", node); + if (skip) + { + /* All memory below skip may contain valid data, so we can't clear + it. We still need to make sure all cache lines in this area are + fully dirty so that ECC bits will be updated on store. A single + write to the cache line isn't good enough because partial LMC + writes may be enabled */ + ddr_print("N%d: Rewriting DRAM: start 0 length 0x%lx\n", node, skip); + volatile uint64_t *ptr = bdk_phys_to_ptr(bdk_numa_get_address(node, 8)); + /* The above pointer got address 8 to avoid NULL pointer checking + in bdk_phys_to_ptr(). Correct it here */ + ptr--; + uint64_t *end = bdk_phys_to_ptr(bdk_numa_get_address(node, skip)); + while (ptr < end) + { + *ptr = *ptr; + ptr++; + } + } + ddr_print("N%d: Clearing DRAM: start 0x%lx length 0x%lx\n", node, skip, len); + bdk_zero_memory(bdk_phys_to_ptr(bdk_numa_get_address(node, skip)), len); + BDK_TRACE(DRAM, "N%d: DRAM clear complete\n", node); + } +} + +static void bdk_dram_clear_ecc(bdk_node_t node) +{ + /* Clear any DRAM errors set during init */ + BDK_TRACE(DRAM, "N%d: Clearing LMC ECC errors\n", node); + int num_lmc = __bdk_dram_get_num_lmc(node); + for (int lmc = 0; lmc < num_lmc; lmc++) { + DRAM_CSR_WRITE(node, BDK_LMCX_INT(lmc), BDK_CSR_READ(node, BDK_LMCX_INT(lmc))); + } +} + +static void bdk_dram_enable_ecc_reporting(bdk_node_t node) +{ + /* Enable LMC ECC error HW reporting */ + int num_lmc = __bdk_dram_get_num_lmc(node); + + BDK_TRACE(DRAM, "N%d: Enable LMC ECC error reporting\n", node); + + for (int lmc = 0; lmc < num_lmc; lmc++) { + + // NOTE: this must be done for pass 2.x + // enable ECC interrupts to allow ECC error info in LMCX_INT + if (! CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X)) { // added 81xx and 83xx + DRAM_CSR_WRITE(node, BDK_LMCX_INT_ENA_W1S(lmc), -1ULL); + BDK_CSR_INIT(lmc_int_ena_w1s, node, BDK_LMCX_INT_ENA_W1S(lmc)); + ddr_print("N%d.LMC%d: %-36s : 0x%08lx\n", + node, lmc, "LMC_INT_ENA_W1S", lmc_int_ena_w1s.u); + } + } +} + +static void bdk_dram_disable_ecc_reporting(bdk_node_t node) +{ + /* Disable LMC ECC error HW reporting */ + int num_lmc = __bdk_dram_get_num_lmc(node); + + BDK_TRACE(DRAM, "N%d: Disable LMC ECC error reporting\n", node); + + for (int lmc = 0; lmc < num_lmc; lmc++) { + + // NOTE: this must be done for pass 2.x + // disable ECC interrupts to prevent ECC error info in LMCX_INT + if (! CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X)) { // added 81xx and 83xx + DRAM_CSR_WRITE(node, BDK_LMCX_INT_ENA_W1C(lmc), -1ULL); + BDK_CSR_INIT(lmc_int_ena_w1c, node, BDK_LMCX_INT_ENA_W1C(lmc)); + ddr_print("N%d.LMC%d: %-36s : 0x%08lx\n", + node, lmc, "LMC_INT_ENA_W1C", lmc_int_ena_w1c.u); + } + } +} + +// this routine simply makes the calls to the tuning routines and returns any errors +static int bdk_libdram_tune_node(int node) +{ + int errs, tot_errs; + int do_dllro_hw = 0; // default to NO + int do_dllwo = 0; // default to NO + int do_eccdll = 0; // default to NO + const char *str; + BDK_CSR_INIT(lmc_config, node, BDK_LMCX_CONFIG(0)); // FIXME: probe LMC0 + do_eccdll = (lmc_config.s.ecc_ena != 0); // change to ON if ECC enabled + + // FIXME!!! make 81xx always use HW-assist tuning + if (CAVIUM_IS_MODEL(CAVIUM_CN81XX)) + do_dllro_hw = 1; + + // Automatically tune the data byte DLL read offsets + // always done by default, but allow use of HW-assist + // NOTE: HW-assist will also tune the ECC byte + str = getenv("ddr_tune_hw_offsets"); + if (str) + do_dllro_hw = !!strtoul(str, NULL, 0); + BDK_TRACE(DRAM, "N%d: Starting DLL Read Offset Tuning for LMCs\n", node); + if (!do_dllro_hw || (lmc_config.s.mode32b != 0)) { + errs = perform_dll_offset_tuning(node, 2, /* tune */1); + } else { + errs = perform_HW_dll_offset_tuning(node, /* read */2, 0x0A/* all bytelanes */); + } + BDK_TRACE(DRAM, "N%d: Finished DLL Read Offset Tuning for LMCs, %d errors)\n", + node, errs); + tot_errs = errs; + + // disabled by default for now, does not seem to be needed? + // Automatically tune the data byte DLL write offsets + // allow override of default setting + str = getenv("ddr_tune_write_offsets"); + if (str) + do_dllwo = !!strtoul(str, NULL, 0); + if (do_dllwo) { + BDK_TRACE(DRAM, "N%d: Starting DLL Write Offset Tuning for LMCs\n", node); + errs = perform_dll_offset_tuning(node, /* write */1, /* tune */1); + BDK_TRACE(DRAM, "N%d: Finished DLL Write Offset Tuning for LMCs, %d errors)\n", + node, errs); + tot_errs += errs; + } + + // disabled by default for now, does not seem to be needed much? + // Automatically tune the ECC byte DLL read offsets + // FIXME? allow override of the filtering + // FIXME? allow programmatic override, not via envvar? + str = getenv("ddr_tune_ecc_enable"); + if (str) + do_eccdll = !!strtoul(str, NULL, 10); + if (do_eccdll && !do_dllro_hw && (lmc_config.s.mode32b == 0)) { // do not do HW-assist twice for ECC + BDK_TRACE(DRAM, "N%d: Starting ECC DLL Read Offset Tuning for LMCs\n", node); + errs = perform_HW_dll_offset_tuning(node, 2, 8/* ECC bytelane */); + BDK_TRACE(DRAM, "N%d: Finished ECC DLL Read Offset Tuning for LMCs, %d errors\n", + node, errs); + tot_errs += errs; + } + + return tot_errs; +} + +// this routine makes the calls to the tuning routines when criteria are met +// intended to be called for automated tuning, to apply filtering... + +#define IS_DDR4 1 +#define IS_DDR3 0 +#define IS_RDIMM 1 +#define IS_UDIMM 0 +#define IS_1SLOT 1 +#define IS_2SLOT 0 + +// FIXME: DDR3 is not tuned +static const uint32_t ddr_speed_filter[2][2][2] = { + [IS_DDR4] = { + [IS_RDIMM] = { + [IS_1SLOT] = 940, + [IS_2SLOT] = 800 + }, + [IS_UDIMM] = { + [IS_1SLOT] = 1050, + [IS_2SLOT] = 940 + }, + }, + [IS_DDR3] = { + [IS_RDIMM] = { + [IS_1SLOT] = 0, // disabled + [IS_2SLOT] = 0 // disabled + }, + [IS_UDIMM] = { + [IS_1SLOT] = 0, // disabled + [IS_2SLOT] = 0 // disabled + } + } +}; + +static int bdk_libdram_maybe_tune_node(int node) +{ + const char *str; + + // FIXME: allow an override here so that all configs can be tuned or none + // If the envvar is defined, always either force it or avoid it accordingly + if ((str = getenv("ddr_tune_all_configs")) != NULL) { + int tune_it = !!strtoul(str, NULL, 0); + printf("N%d: DRAM auto-tuning %s.\n", node, (tune_it) ? "forced" : "avoided"); + return (tune_it) ? bdk_libdram_tune_node(node) : 0; + } + + // filter the tuning calls here... + // determine if we should/can run automatically for this configuration + // + // FIXME: tune only when the configuration indicates it will help: + // DDR type, RDIMM or UDIMM, 1-slot or 2-slot, and speed + // + uint32_t ddr_speed = divide_nint(libdram_get_freq_from_pll(node, 0), 1000000); // sample LMC0 + BDK_CSR_INIT(lmc_config, node, BDK_LMCX_CONFIG(0)); // sample LMC0 + + int is_ddr4 = !!__bdk_dram_is_ddr4(node, 0); + int is_rdimm = !!__bdk_dram_is_rdimm(node, 0); + int is_1slot = !!(lmc_config.s.init_status < 4); // HACK, should do better + int do_tune = 0; + + uint32_t ddr_min_speed = ddr_speed_filter[is_ddr4][is_rdimm][is_1slot]; + do_tune = (ddr_min_speed && (ddr_speed > ddr_min_speed)); + + ddr_print("N%d: DDR%d %cDIMM %d-slot at %d MHz %s eligible for auto-tuning.\n", + node, (is_ddr4)?4:3, (is_rdimm)?'R':'U', (is_1slot)?1:2, + ddr_speed, (do_tune)?"is":"is not"); + + // call the tuning routines, done filtering... + return ((do_tune) ? bdk_libdram_tune_node(node) : 0); +} + +/** + * This is the main DRAM init function. Users of libdram should call this function, + * avoiding the other internal function. As a rule, functions starting with + * "libdram_*" are part of the external API and should be used. + * + * @param node Node to initialize. This may not be the same node as the one running the code + * @param dram_config + * DRAM configuration to use + * @param ddr_clock_override + * If non-zeo, this overrides the DRAM clock speed in the config structure. This + * allows quickly testing of different DRAM speeds without modifying the basic + * config. If zero, the DRAM speed in the config is used. + * + * @return Amount of memory in MB. Zero or negative is a failure. + */ +int libdram_config(int node, const dram_config_t *dram_config, int ddr_clock_override) +{ + if (bdk_is_platform(BDK_PLATFORM_ASIM)) + return bdk_dram_get_size_mbytes(node); + + /* Boards may need to mux the TWSI connection between THUNDERX and the BMC. + This allows the BMC to monitor DIMM temeratures and health */ + int gpio_select = bdk_config_get_int(BDK_CONFIG_DRAM_CONFIG_GPIO); + if (gpio_select != -1) + bdk_gpio_initialize(bdk_numa_master(), gpio_select, 1, 1); + + /* Read all the SPDs and store them in the device tree. They are needed by + later software to populate SMBIOS information */ + for (int lmc = 0; lmc < 4; lmc++) + for (int dimm = 0; dimm < DDR_CFG_T_MAX_DIMMS; dimm++) + read_entire_spd(node, (dram_config_t *)dram_config, lmc, dimm); + + const ddr_configuration_t *ddr_config = dram_config->config; + int ddr_clock_hertz = (ddr_clock_override) ? ddr_clock_override : dram_config->ddr_clock_hertz; + if (ddr_clock_hertz == 0) // 0 == AUTO + { + ddr_clock_hertz = dram_get_default_spd_speed(node, ddr_config); + if (ddr_clock_hertz < 0) { + printf("N%d: DRAM init: AUTO clock ILLEGAL configuration\n", node); + return -1; + } + } + int errs; + + // At this point, we only know the desired clock rate (ddr_clock_hertz). + // We do not know whether we are configuring RDIMMs. + // We also do not yet know if 100MHz alternate refclk is actually available. + // so, if we are being asked for 2133MT/s or better, we still need to do: + // 1. probe for RDIMMs (if not, 50MHz refclk is good enough) + // 2. determine if 100MHz refclk is there, and switch to it before starting any configuration + // + // NOTES: + // 1. dclk_alt_refclk_sel need only be set on LMC0 (see above disabled code) + // 2. I think we need to first probe to see if we need it, and configure it then if dictated use + // 3. then go on to configure at the selected refclk + int ddr_refclk_hertz = bdk_clock_get_rate(node, BDK_CLOCK_MAIN_REF); + int alt_refclk = bdk_config_get_int(BDK_CONFIG_DDR_ALT_REFCLK, node); + + char *str = getenv("ddr_100mhz_refclk"); + if (str) { // if the envvar was found, force it to that setting + int do_100mhz = !!strtoul(str, NULL, 0); + alt_refclk = (do_100mhz) ? 100 : 50; + } + + dram_verbosity = bdk_config_get_int(BDK_CONFIG_DRAM_VERBOSE); + + // Here we check for fuses that limit the number of LMCs we can configure, + // but only on 83XX and 88XX... + int lmc_limit = 4; + if (CAVIUM_IS_MODEL(CAVIUM_CN88XX) || CAVIUM_IS_MODEL(CAVIUM_CN83XX)) { + BDK_CSR_INIT(mio_fus_dat2, node, BDK_MIO_FUS_DAT2); + if (mio_fus_dat2.s.lmc_half) { + lmc_limit = (CAVIUM_IS_MODEL(CAVIUM_CN88XX)) ? 2 : 1; // limit LMCs to half present + error_print("Only %d LMC(s)s supported for this Thunder model\n", lmc_limit); + } + } + + /* We need to calculate the interface mask based on the provided SPD + addresses/contents */ + uint32_t interface_mask = 0; + for (int i = 0; i < lmc_limit; i++) + { + // We need to check only DIMM 0 of each LMC for possible presence of the LMC. + // This trusts that the board database is correctly configured. + // Empty DIMM slots in present LMCs will be detected later. + if (ddr_config[i].dimm_config_table[0].spd_addr || + ddr_config[i].dimm_config_table[0].spd_ptr) + interface_mask |= 1 << i; + + // we know whether alternate refclk is always wanted + // we also know already if we want 2133 MT/s + // if alt refclk not always wanted, then probe DDR and DIMM type + // if DDR4 and RDIMMs, then set desired refclk to 100MHz, otherwise to default (50MHz) + // depend on ddr_initialize() to do the refclk selection and validation + if (i == 0) { // only check for LMC0 + if (alt_refclk) { // if alternate refclk was specified, let it override everything + ddr_refclk_hertz = alt_refclk * 1000000; + ddr_print("N%d: DRAM init: %d MHz refclk is REQUESTED ALWAYS\n", node, alt_refclk); + } else if (ddr_clock_hertz > 1000000000) { // if more than 2000 MT/s + int ddr_type = get_ddr_type(node, &ddr_config[0].dimm_config_table[0]); + int spd_dimm_type = get_dimm_module_type(node, &ddr_config[0].dimm_config_table[0], ddr_type); + // is DDR4 and RDIMM just to be sure + if ((ddr_type == DDR4_DRAM) && + ((spd_dimm_type == 1) || (spd_dimm_type == 5) || (spd_dimm_type == 8))) { + ddr_refclk_hertz = 100000000; // yes, we require 100MHz refclk, so set it + ddr_print("N%d: DRAM init: 100 MHz refclk is REQUIRED\n", node); + } + } // if (ddr_clock_hertz > 1000000000) + } // if (i == 0) + } + + BDK_TRACE(DRAM, "N%d: DRAM init started (hertz=%d, refclk=%d, config=%p)\n", + node, ddr_clock_hertz, ddr_refclk_hertz, dram_config); + debug_print("N%d: DRAM init started (hertz=%d, refclk=%d, config=%p)\n", + node, ddr_clock_hertz, ddr_refclk_hertz, dram_config); + + BDK_TRACE(DRAM, "N%d: Calling DRAM init\n", node); + measured_ddr_hertz[node] = 0; + int mbytes = octeon_ddr_initialize(node, + bdk_clock_get_rate(node, BDK_CLOCK_RCLK), + ddr_clock_hertz, + ddr_refclk_hertz, + interface_mask, + ddr_config, + &measured_ddr_hertz[node], + 0, + 0, + 0); + BDK_TRACE(DRAM, "N%d: DRAM init returned %d, measured %u Hz\n", + node, mbytes, measured_ddr_hertz[node]); + + // do not tune or mess with memory if there was an init problem... + if (mbytes > 0) { + + bdk_dram_disable_ecc_reporting(node); + + // call the tuning routines, with filtering... + BDK_TRACE(DRAM, "N%d: Calling DRAM tuning\n", node); + errs = bdk_libdram_maybe_tune_node(node); + BDK_TRACE(DRAM, "N%d: DRAM tuning returned %d errors\n", + node, errs); + + // finally, clear memory and any left-over ECC errors + bdk_dram_clear_mem(node); + bdk_dram_clear_ecc(node); + + bdk_dram_enable_ecc_reporting(node); + } + + /* Boards may need to mux the TWSI connection between THUNDERX and the BMC. + This allows the BMC to monitor DIMM temeratures and health */ + if (gpio_select != -1) + bdk_gpio_initialize(bdk_numa_master(), gpio_select, 1, 0); + + return mbytes; +} + +/** + * This is the main DRAM tuning function. Users of libdram should call this function, + * avoiding the other internal function. As a rule, functions starting with + * "libdram_*" are part of the external API and should be used. + * + * @param node Node to tune. This may not be the same node as the one running the code + * + * @return Success or Fail + */ +int libdram_tune(int node) +{ + int tot_errs; + int l2c_is_locked = bdk_l2c_is_locked(node); + + dram_verbosity = bdk_config_get_int(BDK_CONFIG_DRAM_VERBOSE); + + // the only way this entry point should be called is from a MENU item, + // so, enable any non-running cores on this node, and leave them + // running at the end... + ddr_print("N%d: %s: Starting cores (mask was 0x%lx)\n", + node, __FUNCTION__, bdk_get_running_coremask(node)); + bdk_init_cores(node, ~0ULL); + + // must test for L2C locked here, cannot go on with it unlocked + // FIXME: but we only need to worry about Node 0??? + if (node == 0) { + if (!l2c_is_locked) { // is unlocked, must lock it now + ddr_print("N%d: %s: L2C was unlocked - locking it now\n", node, __FUNCTION__); + // FIXME: this should be common-ized; it currently matches bdk_init()... + bdk_l2c_lock_mem_region(node, 0, bdk_l2c_get_cache_size_bytes(node) * 3 / 4); + } else { + ddr_print("N%d: %s: L2C was already locked - continuing\n", node, __FUNCTION__); + } + } else { + ddr_print("N%d: %s: non-zero node, not worrying about L2C lock status\n", node, __FUNCTION__); + } + + // call the tuning routines, no filtering... + tot_errs = bdk_libdram_tune_node(node); + + // FIXME: only for node 0, unlock L2C if it was unlocked before... + if (node == 0) { + if (!l2c_is_locked) { // it was Node 0 and unlocked, must re-unlock it now + ddr_print("N%d: Node 0 L2C was unlocked before - unlocking it now\n", node); + // FIXME: this should be common-ized; it currently matches bdk_init()... + bdk_l2c_unlock_mem_region(node, 0, bdk_l2c_get_cache_size_bytes(node) * 3 / 4); + } else { + ddr_print("N%d: %s: L2C was already locked - leaving it locked\n", node, __FUNCTION__); + } + } else { + ddr_print("N%d: %s: non-zero node, not worrying about L2C lock status\n", node, __FUNCTION__); + } + + // make sure to clear memory and any ECC errs when done... + bdk_dram_clear_mem(node); + bdk_dram_clear_ecc(node); + + return tot_errs; +} + +/** + * This is the main function for DRAM margining of Write Voltage. + * Users of libdram should call this function, + * avoiding the other internal function. As a rule, functions starting with + * "libdram_*" are part of the external API and should be used. + * + * @param node Node to test. This may not be the same node as the one running the code + * + * @return Success or Fail + */ +static +int libdram_margin_write_voltage(int node) +{ + int tot_errs; + + // call the margining routine + tot_errs = perform_margin_write_voltage(node); + + // make sure to clear memory and any ECC errs when done... + bdk_dram_clear_mem(node); + bdk_dram_clear_ecc(node); + + return tot_errs; +} + +/** + * This is the main function for DRAM margining of Read Voltage. + * Users of libdram should call this function, + * avoiding the other internal function. As a rule, functions starting with + * "libdram_*" are part of the external API and should be used. + * + * @param node Node to test. This may not be the same node as the one running the code + * + * @return Success or Fail + */ +static +int libdram_margin_read_voltage(int node) +{ + int tot_errs; + + // call the margining routine + tot_errs = perform_margin_read_voltage(node); + + // make sure to clear memory and any ECC errs when done... + bdk_dram_clear_mem(node); + bdk_dram_clear_ecc(node); + + return tot_errs; +} + +/** + * This is the main function for DRAM margining of Write Timing. + * Users of libdram should call this function, + * avoiding the other internal function. As a rule, functions starting with + * "libdram_*" are part of the external API and should be used. + * + * @param node Node to test. This may not be the same node as the one running the code + * + * @return Success or Fail + */ +static +int libdram_margin_write_timing(int node) +{ + int tot_errs; + + // call the tuning routine, tell it we are margining not tuning... + tot_errs = perform_dll_offset_tuning(node, /* write offsets */1, /* margin */0); + + // make sure to clear memory and any ECC errs when done... + bdk_dram_clear_mem(node); + bdk_dram_clear_ecc(node); + + return tot_errs; +} + +/** + * This is the main function for DRAM margining of Read Timing. + * Users of libdram should call this function, + * avoiding the other internal function. As a rule, functions starting with + * "libdram_*" are part of the external API and should be used. + * + * @param node Node to test. This may not be the same node as the one running the code + * + * @return Success or Fail + */ +static +int libdram_margin_read_timing(int node) +{ + int tot_errs; + + // call the tuning routine, tell it we are margining not tuning... + tot_errs = perform_dll_offset_tuning(node, /* read offsets */2, /* margin */0); + + // make sure to clear memory and any ECC errs when done... + bdk_dram_clear_mem(node); + bdk_dram_clear_ecc(node); + + return tot_errs; +} + +/** + * This is the main function for all DRAM margining. + * Users of libdram should call this function, + * avoiding the other internal function. As a rule, functions starting with + * "libdram_*" are part of the external API and should be used. + * + * @param node Node to test. This may not be the same node as the one running the code + * + * @return Success or Fail + */ +int libdram_margin(int node) +{ + int ret_rt, ret_wt, ret_rv, ret_wv; + char *risk[2] = { "Low Risk", "Needs Review" }; + int l2c_is_locked = bdk_l2c_is_locked(node); + + // for now, no margining on 81xx, until we can reduce the dynamic runtime size... + if (CAVIUM_IS_MODEL(CAVIUM_CN81XX)) { + printf("Sorry, margining is not available on 81xx yet...\n"); + return 0; + } + + dram_verbosity = bdk_config_get_int(BDK_CONFIG_DRAM_VERBOSE); + + // the only way this entry point should be called is from a MENU item, + // so, enable any non-running cores on this node, and leave them + // running at the end... + ddr_print("N%d: %s: Starting cores (mask was 0x%lx)\n", + node, __FUNCTION__, bdk_get_running_coremask(node)); + bdk_init_cores(node, ~0ULL); + + // must test for L2C locked here, cannot go on with it unlocked + // FIXME: but we only need to worry about Node 0??? + if (node == 0) { + if (!l2c_is_locked) { // is unlocked, must lock it now + ddr_print("N%d: %s: L2C was unlocked - locking it now\n", node, __FUNCTION__); + // FIXME: this should be common-ized; it currently matches bdk_init()... + bdk_l2c_lock_mem_region(node, 0, bdk_l2c_get_cache_size_bytes(node) * 3 / 4); + } else { + ddr_print("N%d: %s: L2C was already locked - continuing\n", node, __FUNCTION__); + } + } else { + ddr_print("N%d: %s: non-zero node, not worrying about L2C lock status\n", node, __FUNCTION__); + } + + debug_print("N%d: Starting DRAM Margin ALL\n", node); + ret_rt = libdram_margin_read_timing(node); + ret_wt = libdram_margin_write_timing(node); + ret_rv = libdram_margin_read_voltage(node); + ret_wv = libdram_margin_write_voltage(node); + debug_print("N%d: DRAM Margin ALL finished\n", node); + + /* + >>> Summary from DDR Margining tool: + >>> N0: Read Timing Margin : Low Risk + >>> N0: Write Timing Margin : Low Risk + >>> N0: Read Voltage Margin : Low Risk + >>> N0: Write Voltage Margin : Low Risk + */ + printf(" \n"); + printf("-------------------------------------\n"); + printf(" \n"); + printf("Summary from DDR Margining tool\n"); + printf("N%d: Read Timing Margin : %s\n", node, risk[!!ret_rt]); + printf("N%d: Write Timing Margin : %s\n", node, risk[!!ret_wt]); + + // these may not have been done due to DDR3 and/or THUNDER pass 1.x + // FIXME? would it be better to print an appropriate message here? + if (ret_rv != -1) printf("N%d: Read Voltage Margin : %s\n", node, risk[!!ret_rv]); + if (ret_wv != -1) printf("N%d: Write Voltage Margin : %s\n", node, risk[!!ret_wv]); + + printf(" \n"); + printf("-------------------------------------\n"); + printf(" \n"); + + // FIXME: only for node 0, unlock L2C if it was unlocked before... + if (node == 0) { + if (!l2c_is_locked) { // it was Node 0 and unlocked, must re-unlock it now + ddr_print("N%d: Node 0 L2C was unlocked before - unlocking it now\n", node); + // FIXME: this should be common-ized; it currently matches bdk_init()... + bdk_l2c_unlock_mem_region(node, 0, bdk_l2c_get_cache_size_bytes(node) * 3 / 4); + } else { + ddr_print("N%d: %s: L2C was already locked - leaving it locked\n", node, __FUNCTION__); + } + } else { + ddr_print("N%d: %s: non-zero node, not worrying about L2C lock status\n", node, __FUNCTION__); + } + + return 0; +} + +/** + * Get the measured DRAM frequency after a call to libdram_config + * + * @param node Node to get frequency for + * + * @return Frequency in Hz + */ +uint32_t libdram_get_freq(int node) +{ + return measured_ddr_hertz[node]; +} + +/** + * Get the measured DRAM frequency from the DDR_PLL_CTL CSR + * + * @param node Node to get frequency for + * + * @return Frequency in Hz + */ +uint32_t libdram_get_freq_from_pll(int node, int lmc) +{ + static const uint8_t _en[] = {1, 2, 3, 4, 5, 6, 7, 8, 10, 12}; + BDK_CSR_INIT(c, node, BDK_LMCX_DDR_PLL_CTL(0)); + // we check the alternate refclk select bit in LMC0 to indicate 100MHz use + // assumption: the alternate refclk is setup for 100MHz + uint64_t ddr_ref_hertz = (c.s.dclk_alt_refclk_sel) ? 100000000 : bdk_clock_get_rate(node, BDK_CLOCK_MAIN_REF); + uint64_t en = _en[c.cn83xx.ddr_ps_en]; + uint64_t calculated_ddr_hertz = ddr_ref_hertz * (c.cn83xx.clkf + 1) / ((c.cn83xx.clkr + 1) * en); + return calculated_ddr_hertz; +} + +#ifndef DRAM_CSR_WRITE_INLINE +void dram_csr_write(bdk_node_t node, const char *csr_name, bdk_csr_type_t type, int busnum, int size, uint64_t address, uint64_t value) +{ + VB_PRT(VBL_CSRS, "N%d: DDR Config %s[%016lx] => %016lx\n", node, csr_name, address, value); + bdk_csr_write(node, type, busnum, size, address, value); +} +#endif |