aboutsummaryrefslogtreecommitdiff
path: root/src/vendorcode/cavium/bdk
diff options
context:
space:
mode:
authorDavid Hendricks <dhendricks@fb.com>2018-03-09 13:58:27 -0800
committerPatrick Georgi <pgeorgi@google.com>2018-04-06 06:48:11 +0000
commit2004b93aed993aa02bbc588b8d82c22418ac52ec (patch)
treecdd5e95a154e2e0139474288262835a7f5847665 /src/vendorcode/cavium/bdk
parent71cbd71eb5c0e8e13b25b5d5dd2f495e7d2967eb (diff)
soc/cavium: import raw BDK sources
This imports common BDK sources that will be used in subsequent patches. The BDK is licensed under BSD and will be reduced in size and optimized to compile under coreboot. Change-Id: Icb32ee670d9fa9e5c10f9abb298cebf616fa67ad Signed-off-by: David Hendricks <dhendricks@fb.com> Reviewed-on: https://review.coreboot.org/25524 Tested-by: build bot (Jenkins) <no-reply@coreboot.org> Reviewed-by: David Hendricks <david.hendricks@gmail.com>
Diffstat (limited to 'src/vendorcode/cavium/bdk')
-rw-r--r--src/vendorcode/cavium/bdk/libbdk-arch/bdk-csr.c376
-rw-r--r--src/vendorcode/cavium/bdk/libbdk-arch/bdk-model.c927
-rw-r--r--src/vendorcode/cavium/bdk/libbdk-arch/bdk-numa.c91
-rw-r--r--src/vendorcode/cavium/bdk/libbdk-arch/bdk-platform.c59
-rw-r--r--src/vendorcode/cavium/bdk/libbdk-boot/bdk-boot-status.c81
-rw-r--r--src/vendorcode/cavium/bdk/libbdk-boot/bdk-watchdog.c108
-rw-r--r--src/vendorcode/cavium/bdk/libbdk-dram/bdk-dram-address.c183
-rw-r--r--src/vendorcode/cavium/bdk/libbdk-dram/bdk-dram-config.c163
-rw-r--r--src/vendorcode/cavium/bdk/libbdk-dram/bdk-dram-size.c213
-rw-r--r--src/vendorcode/cavium/bdk/libbdk-dram/bdk-dram-test-addrbus.c115
-rw-r--r--src/vendorcode/cavium/bdk/libbdk-dram/bdk-dram-test-databus.c252
-rw-r--r--src/vendorcode/cavium/bdk/libbdk-dram/bdk-dram-test-fastscan.c103
-rw-r--r--src/vendorcode/cavium/bdk/libbdk-dram/bdk-dram-test-patfil.c829
-rw-r--r--src/vendorcode/cavium/bdk/libbdk-dram/bdk-dram-test.c860
-rw-r--r--src/vendorcode/cavium/bdk/libbdk-driver/bdk-driver-rnm.c124
-rw-r--r--src/vendorcode/cavium/bdk/libbdk-hal/bdk-clock.c221
-rw-r--r--src/vendorcode/cavium/bdk/libbdk-hal/bdk-config.c1946
-rw-r--r--src/vendorcode/cavium/bdk/libbdk-hal/bdk-gpio.c197
-rw-r--r--src/vendorcode/cavium/bdk/libbdk-hal/bdk-l2c.c270
-rw-r--r--src/vendorcode/cavium/bdk/libbdk-hal/bdk-twsi.c318
-rw-r--r--src/vendorcode/cavium/bdk/libbdk-os/bdk-init.c561
-rw-r--r--src/vendorcode/cavium/bdk/libbdk-os/bdk-thread.c384
-rw-r--r--src/vendorcode/cavium/bdk/libbdk-trust/bdk-trust.c286
-rw-r--r--src/vendorcode/cavium/bdk/libdram/dram-csr.h86
-rw-r--r--src/vendorcode/cavium/bdk/libdram/dram-env.c83
-rw-r--r--src/vendorcode/cavium/bdk/libdram/dram-env.h48
-rw-r--r--src/vendorcode/cavium/bdk/libdram/dram-gpio.h46
-rw-r--r--src/vendorcode/cavium/bdk/libdram/dram-init-ddr3.c8535
-rw-r--r--src/vendorcode/cavium/bdk/libdram/dram-init-ddr3.h97
-rw-r--r--src/vendorcode/cavium/bdk/libdram/dram-internal.h201
-rw-r--r--src/vendorcode/cavium/bdk/libdram/dram-l2c.c69
-rw-r--r--src/vendorcode/cavium/bdk/libdram/dram-l2c.h45
-rw-r--r--src/vendorcode/cavium/bdk/libdram/dram-print.h86
-rw-r--r--src/vendorcode/cavium/bdk/libdram/dram-spd.c583
-rw-r--r--src/vendorcode/cavium/bdk/libdram/dram-spd.h166
-rw-r--r--src/vendorcode/cavium/bdk/libdram/dram-tune-ddr3.c2012
-rw-r--r--src/vendorcode/cavium/bdk/libdram/dram-util.h96
-rw-r--r--src/vendorcode/cavium/bdk/libdram/lib_octeon_shared.c2165
-rw-r--r--src/vendorcode/cavium/bdk/libdram/lib_octeon_shared.h124
-rw-r--r--src/vendorcode/cavium/bdk/libdram/libdram-config-load.c262
-rw-r--r--src/vendorcode/cavium/bdk/libdram/libdram.c718
41 files changed, 24089 insertions, 0 deletions
diff --git a/src/vendorcode/cavium/bdk/libbdk-arch/bdk-csr.c b/src/vendorcode/cavium/bdk/libbdk-arch/bdk-csr.c
new file mode 100644
index 0000000000..981ad231dc
--- /dev/null
+++ b/src/vendorcode/cavium/bdk/libbdk-arch/bdk-csr.c
@@ -0,0 +1,376 @@
+/***********************license start***********************************
+* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights
+* reserved.
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are
+* met:
+*
+* * Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials provided
+* with the distribution.
+*
+* * Neither the name of Cavium Inc. nor the names of
+* its contributors may be used to endorse or promote products
+* derived from this software without specific prior written
+* permission.
+*
+* This Software, including technical data, may be subject to U.S. export
+* control laws, including the U.S. Export Administration Act and its
+* associated regulations, and may be subject to export or import
+* regulations in other countries.
+*
+* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
+* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR
+* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT
+* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY
+* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT
+* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES
+* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR
+* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT,
+* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK
+* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
+***********************license end**************************************/
+#include <bdk.h>
+#include <stdio.h>
+#include "libbdk-arch/bdk-csrs-pccpf.h"
+#include "libbdk-arch/bdk-csrs-pem.h"
+
+#ifndef BDK_BUILD_HOST
+
+/**
+ * Read a slow CSR, not RSL or NCB.
+ *
+ * @param type Bus type the CSR is on
+ * @param busnum Bus number the CSR is on
+ * @param size Width of the CSR in bytes
+ * @param address The address of the CSR
+ *
+ * @return The value of the CSR
+ */
+uint64_t __bdk_csr_read_slow(bdk_node_t node, bdk_csr_type_t type, int busnum, int size, uint64_t address)
+{
+ switch (type)
+ {
+ case BDK_CSR_TYPE_DAB:
+ case BDK_CSR_TYPE_DAB32b:
+ case BDK_CSR_TYPE_NCB:
+ case BDK_CSR_TYPE_NCB32b:
+ case BDK_CSR_TYPE_PEXP_NCB:
+ case BDK_CSR_TYPE_RSL:
+ case BDK_CSR_TYPE_RSL32b:
+ case BDK_CSR_TYPE_RVU_PF_BAR0:
+ case BDK_CSR_TYPE_RVU_PF_BAR2:
+ case BDK_CSR_TYPE_RVU_PFVF_BAR2:
+ case BDK_CSR_TYPE_RVU_VF_BAR2:
+ /* Handled by inline code, we should never get here */
+ bdk_error("%s: Passed type that should be handled inline\n", __FUNCTION__);
+ break;
+
+ case BDK_CSR_TYPE_PCCBR:
+ case BDK_CSR_TYPE_PCCPF:
+ case BDK_CSR_TYPE_PCCVF:
+ case BDK_CSR_TYPE_PEXP:
+ case BDK_CSR_TYPE_MDSB:
+ case BDK_CSR_TYPE_PCICONFIGEP_SHADOW:
+ case BDK_CSR_TYPE_PCICONFIGEPVF:
+ bdk_error("%s: Register not supported\n", __FUNCTION__);
+ break;
+
+ case BDK_CSR_TYPE_SYSREG:
+ return bdk_sysreg_read(node, bdk_get_core_num(), address);
+
+ case BDK_CSR_TYPE_PCICONFIGRC:
+ {
+ /* Don't allow PCIe register access if PCIe wasn't linked in */
+ if (!bdk_pcie_config_read32)
+ bdk_fatal("PCIe CSR access not supported when PCIe not linked in\n");
+ union bdk_pcc_dev_con_s dev_con;
+ switch (busnum)
+ {
+ case 0:
+ if (CAVIUM_IS_MODEL(CAVIUM_CN88XX))
+ dev_con.u = BDK_PCC_DEV_CON_E_PCIERC0_CN88XX;
+ else if (CAVIUM_IS_MODEL(CAVIUM_CN83XX))
+ dev_con.u = BDK_PCC_DEV_CON_E_PCIERC0_CN83XX;
+ else if (CAVIUM_IS_MODEL(CAVIUM_CN81XX))
+ dev_con.u = BDK_PCC_DEV_CON_E_PCIERC0_CN81XX;
+ else
+ bdk_fatal("Update PCICONFIG in %s\n", __FUNCTION__);
+ break;
+ case 1:
+ if (CAVIUM_IS_MODEL(CAVIUM_CN88XX))
+ dev_con.u = BDK_PCC_DEV_CON_E_PCIERC1_CN88XX;
+ else if (CAVIUM_IS_MODEL(CAVIUM_CN83XX))
+ dev_con.u = BDK_PCC_DEV_CON_E_PCIERC1_CN83XX;
+ else if (CAVIUM_IS_MODEL(CAVIUM_CN81XX))
+ dev_con.u = BDK_PCC_DEV_CON_E_PCIERC1_CN81XX;
+ else
+ bdk_fatal("Update PCICONFIG in %s\n", __FUNCTION__);
+ break;
+ case 2:
+ if (CAVIUM_IS_MODEL(CAVIUM_CN88XX))
+ dev_con.u = BDK_PCC_DEV_CON_E_PCIERC2_CN88XX;
+ else if (CAVIUM_IS_MODEL(CAVIUM_CN83XX))
+ dev_con.u = BDK_PCC_DEV_CON_E_PCIERC2_CN83XX;
+ else if (CAVIUM_IS_MODEL(CAVIUM_CN81XX))
+ dev_con.u = BDK_PCC_DEV_CON_E_PCIERC2_CN81XX;
+ else
+ bdk_fatal("Update PCICONFIG in %s\n", __FUNCTION__);
+ break;
+ case 3:
+ if (CAVIUM_IS_MODEL(CAVIUM_CN88XX))
+ dev_con.u = BDK_PCC_DEV_CON_E_PCIERC3_CN88XX;
+ else if (CAVIUM_IS_MODEL(CAVIUM_CN83XX))
+ dev_con.u = BDK_PCC_DEV_CON_E_PCIERC3_CN83XX;
+ else
+ bdk_fatal("Update PCICONFIG in %s\n", __FUNCTION__);
+ break;
+ case 4:
+ dev_con.u = BDK_PCC_DEV_CON_E_PCIERC4;
+ break;
+ case 5:
+ dev_con.u = BDK_PCC_DEV_CON_E_PCIERC5;
+ break;
+ default:
+ bdk_error("%s: Illegal PCIe bus number\n", __FUNCTION__);
+ return -1;
+ }
+ return bdk_pcie_config_read32(node, 100 + dev_con.cn8.ecam, dev_con.s.bus, dev_con.s.func >> 3, dev_con.s.func & 7, address);
+ }
+ case BDK_CSR_TYPE_PCICONFIGEP:
+ {
+ BDK_CSR_DEFINE(cfg_rd, BDK_PEMX_CFG_RD(busnum));
+ cfg_rd.u = 0;
+ cfg_rd.s.addr = address;
+ BDK_CSR_WRITE(node, BDK_PEMX_CFG_RD(busnum), cfg_rd.u);
+ cfg_rd.u = BDK_CSR_READ(node, BDK_PEMX_CFG_RD(busnum));
+ return cfg_rd.s.data;
+ }
+ }
+ return -1; /* Return -1 as this looks invalid in register dumps. Zero is too common as a good value */
+}
+
+
+/**
+ * Write a value to a slow CSR, not RSL or NCB.
+ *
+ * @param type Bus type the CSR is on
+ * @param busnum Bus number the CSR is on
+ * @param size Width of the CSR in bytes
+ * @param address The address of the CSR
+ * @param value Value to write to the CSR
+ */
+void __bdk_csr_write_slow(bdk_node_t node, bdk_csr_type_t type, int busnum, int size, uint64_t address, uint64_t value)
+{
+ switch (type)
+ {
+ case BDK_CSR_TYPE_DAB:
+ case BDK_CSR_TYPE_DAB32b:
+ case BDK_CSR_TYPE_NCB:
+ case BDK_CSR_TYPE_NCB32b:
+ case BDK_CSR_TYPE_PEXP_NCB:
+ case BDK_CSR_TYPE_RSL:
+ case BDK_CSR_TYPE_RSL32b:
+ case BDK_CSR_TYPE_RVU_PF_BAR0:
+ case BDK_CSR_TYPE_RVU_PF_BAR2:
+ case BDK_CSR_TYPE_RVU_PFVF_BAR2:
+ case BDK_CSR_TYPE_RVU_VF_BAR2:
+ /* Handled by inline code, we should never get here */
+ bdk_error("%s: Passed type that should be handled inline\n", __FUNCTION__);
+ break;
+
+ case BDK_CSR_TYPE_PCCBR:
+ case BDK_CSR_TYPE_PCCPF:
+ case BDK_CSR_TYPE_PCCVF:
+ case BDK_CSR_TYPE_PEXP:
+ case BDK_CSR_TYPE_MDSB:
+ case BDK_CSR_TYPE_PCICONFIGEP_SHADOW:
+ case BDK_CSR_TYPE_PCICONFIGEPVF:
+ bdk_error("%s: Register not supported\n", __FUNCTION__);
+ break;
+
+ case BDK_CSR_TYPE_SYSREG:
+ bdk_sysreg_write(node, bdk_get_core_num(), address, value);
+ break;
+
+ case BDK_CSR_TYPE_PCICONFIGRC:
+ {
+ /* Don't allow PCIe register access if PCIe wasn't linked in */
+ if (!bdk_pcie_config_write32)
+ bdk_fatal("PCIe CSR access not supported when PCIe not linked in\n");
+ union bdk_pcc_dev_con_s dev_con;
+ switch (busnum)
+ {
+ case 0:
+ if (CAVIUM_IS_MODEL(CAVIUM_CN88XX))
+ dev_con.u = BDK_PCC_DEV_CON_E_PCIERC0_CN88XX;
+ else if (CAVIUM_IS_MODEL(CAVIUM_CN83XX))
+ dev_con.u = BDK_PCC_DEV_CON_E_PCIERC0_CN83XX;
+ else if (CAVIUM_IS_MODEL(CAVIUM_CN81XX))
+ dev_con.u = BDK_PCC_DEV_CON_E_PCIERC0_CN81XX;
+ else
+ bdk_fatal("Update PCICONFIG in %s\n", __FUNCTION__);
+ break;
+ case 1:
+ if (CAVIUM_IS_MODEL(CAVIUM_CN88XX))
+ dev_con.u = BDK_PCC_DEV_CON_E_PCIERC1_CN88XX;
+ else if (CAVIUM_IS_MODEL(CAVIUM_CN83XX))
+ dev_con.u = BDK_PCC_DEV_CON_E_PCIERC1_CN83XX;
+ else if (CAVIUM_IS_MODEL(CAVIUM_CN81XX))
+ dev_con.u = BDK_PCC_DEV_CON_E_PCIERC1_CN81XX;
+ else
+ bdk_fatal("Update PCICONFIG in %s\n", __FUNCTION__);
+ break;
+ case 2:
+ if (CAVIUM_IS_MODEL(CAVIUM_CN88XX))
+ dev_con.u = BDK_PCC_DEV_CON_E_PCIERC2_CN88XX;
+ else if (CAVIUM_IS_MODEL(CAVIUM_CN83XX))
+ dev_con.u = BDK_PCC_DEV_CON_E_PCIERC2_CN83XX;
+ else if (CAVIUM_IS_MODEL(CAVIUM_CN81XX))
+ dev_con.u = BDK_PCC_DEV_CON_E_PCIERC2_CN81XX;
+ else
+ bdk_fatal("Update PCICONFIG in %s\n", __FUNCTION__);
+ break;
+ case 3:
+ if (CAVIUM_IS_MODEL(CAVIUM_CN88XX))
+ dev_con.u = BDK_PCC_DEV_CON_E_PCIERC3_CN88XX;
+ else if (CAVIUM_IS_MODEL(CAVIUM_CN83XX))
+ dev_con.u = BDK_PCC_DEV_CON_E_PCIERC3_CN83XX;
+ else
+ bdk_fatal("Update PCICONFIG in %s\n", __FUNCTION__);
+ break;
+ case 4:
+ dev_con.u = BDK_PCC_DEV_CON_E_PCIERC4;
+ break;
+ case 5:
+ dev_con.u = BDK_PCC_DEV_CON_E_PCIERC5;
+ break;
+ default:
+ bdk_error("%s: Illegal PCIe bus number\n", __FUNCTION__);
+ return;
+ }
+ bdk_pcie_config_write32(node, 100 + dev_con.cn8.ecam, dev_con.s.bus, dev_con.s.func >> 3, dev_con.s.func & 7, address, value);
+ break;
+ }
+ case BDK_CSR_TYPE_PCICONFIGEP:
+ {
+ BDK_CSR_DEFINE(cfg_wr, BDK_PEMX_CFG_WR(busnum));
+ cfg_wr.u = 0;
+ cfg_wr.s.addr = address;
+ cfg_wr.s.data = value;
+ BDK_CSR_WRITE(node, BDK_PEMX_CFG_WR(busnum), cfg_wr.u);
+ break;
+ }
+ }
+}
+
+#endif
+
+void __bdk_csr_fatal(const char *name, int num_args, unsigned long arg1, unsigned long arg2, unsigned long arg3, unsigned long arg4)
+{
+ switch (num_args)
+ {
+ case 0:
+ bdk_fatal("%s is invalid on this chip\n", name);
+ case 1:
+ bdk_fatal("%s(%lu) is invalid on this chip\n", name, arg1);
+ case 2:
+ bdk_fatal("%s(%lu,%lu) is invalid on this chip\n", name, arg1, arg2);
+ case 3:
+ bdk_fatal("%s(%lu,%lu,%lu) is invalid on this chip\n", name, arg1, arg2, arg3);
+ default:
+ bdk_fatal("%s(%lu,%lu,%lu,%lu) is invalid on this chip\n", name, arg1, arg2, arg3, arg4);
+ }
+}
+
+/**
+ * Read a core system register from a different node or core
+ *
+ * @param node Node to read from
+ * @param core Core to read
+ * @param regnum Register to read in MRS encoding
+ *
+ * @return Register value
+ */
+uint64_t bdk_sysreg_read(int node, int core, uint64_t regnum)
+{
+ BDK_CSR_INIT(pp_reset, node, BDK_RST_PP_RESET);
+ if (pp_reset.u & (1ull<<core))
+ {
+ bdk_error("Attempt to read system register for core in reset\n");
+ return -1;
+ }
+
+ /* Addresses indicate selects as follows:
+ select 3,4,14,2,3
+ == 0x03040e020300
+ | | | | |^--- 1 if is E2H duplicated register
+ | | | |^^-- fifth select
+ | | |^^-- fourth select
+ | |^^-- third select
+ |^^-- second select
+ ^^-- first select */
+ uint64_t first = (regnum >> 40) & 0xff;
+ uint64_t second = (regnum >> 32) & 0xff;
+ uint64_t third = (regnum >> 24) & 0xff;
+ uint64_t fourth = (regnum >> 16) & 0xff;
+ uint64_t fifth = (regnum >> 8) & 0xff;
+ uint64_t regid = ((first & 3) << 14) | (second << 11) | (third << 7) | (fourth << 3) | fifth;
+
+ /* Note this requires DAP_IMP_DAR[caben] = 1 */
+ uint64_t address = 1ull<<47;
+ address |= 0x7Bull << 36;
+ address |= core << 19;
+ address |= regid << 3;
+ address = bdk_numa_get_address(node, address);
+ return bdk_read64_uint64(address);
+}
+
+/**
+ * Write a system register for a different node or core
+ *
+ * @param node Node to write too
+ * @param core Core to write
+ * @param regnum Register to write in MSR encoding
+ * @param value Value to write
+ */
+void bdk_sysreg_write(int node, int core, uint64_t regnum, uint64_t value)
+{
+ BDK_CSR_INIT(pp_reset, node, BDK_RST_PP_RESET);
+ if (pp_reset.u & (1ull<<core))
+ {
+ bdk_error("Attempt to write system register for core in reset\n");
+ return;
+ }
+
+ /* Addresses indicate selects as follows:
+ select 3,4,14,2,3
+ == 0x03040e020300
+ | | | | |^--- 1 if is E2H duplicated register
+ | | | |^^-- fifth select
+ | | |^^-- fourth select
+ | |^^-- third select
+ |^^-- second select
+ ^^-- first select */
+ uint64_t first = (regnum >> 40) & 0xff;
+ uint64_t second = (regnum >> 32) & 0xff;
+ uint64_t third = (regnum >> 24) & 0xff;
+ uint64_t fourth = (regnum >> 16) & 0xff;
+ uint64_t fifth = (regnum >> 8) & 0xff;
+ uint64_t regid = ((first & 3) << 14) | (second << 11) | (third << 7) | (fourth << 3) | fifth;
+
+ /* Note this requires DAP_IMP_DAR[caben] = 1 */
+ uint64_t address = 1ull<<47;
+ address |= 0x7Bull << 36;
+ address |= core << 19;
+ address |= regid << 3;
+ address = bdk_numa_get_address(node, address);
+ bdk_write64_uint64(address, value);
+}
+
diff --git a/src/vendorcode/cavium/bdk/libbdk-arch/bdk-model.c b/src/vendorcode/cavium/bdk/libbdk-arch/bdk-model.c
new file mode 100644
index 0000000000..f2b4a0c803
--- /dev/null
+++ b/src/vendorcode/cavium/bdk/libbdk-arch/bdk-model.c
@@ -0,0 +1,927 @@
+/***********************license start***********************************
+* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights
+* reserved.
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are
+* met:
+*
+* * Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials provided
+* with the distribution.
+*
+* * Neither the name of Cavium Inc. nor the names of
+* its contributors may be used to endorse or promote products
+* derived from this software without specific prior written
+* permission.
+*
+* This Software, including technical data, may be subject to U.S. export
+* control laws, including the U.S. Export Administration Act and its
+* associated regulations, and may be subject to export or import
+* regulations in other countries.
+*
+* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
+* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR
+* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT
+* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY
+* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT
+* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES
+* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR
+* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT,
+* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK
+* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
+***********************license end**************************************/
+#include <bdk.h>
+#include "libbdk-arch/bdk-csrs-ap.h"
+#include "libbdk-arch/bdk-csrs-mio_fus.h"
+#include "libbdk-arch/bdk-csrs-fus.h"
+#include "libbdk-arch/bdk-csrs-fusf.h"
+
+/*
+ Format of a SKU
+ CN8890-2000BG2601-AAP-G
+ CN8890-2000BG2601-AAP-PR-Y-G
+ CN XX XX X - XXX BG XXX - XX (- XX) (- X) - G
+ | | | | | | | | | | ^ RoHS Option, G=RoHS 6/6
+ | | | | | | | | | ^ Product Revision, blank for pass 1, Y=pass 2, W=pass 3, V=pass 4
+ | | | | | | | | ^ Product Phase, blank=production, PR=Prototype, ES=Engineering Sample
+ | | | | | | | ^ Marketing Segment Option (SC, SNT, etc)
+ | | | | | | ^ Number of balls on the package
+ | | | | | ^ Ball Grid Array
+ | | | | ^ Frequency in Mhz, 3 or 4 digits (300 - 2000)
+ | | | ^ Optional Customer Code, blank or A-Z
+ | | ^ Number of cores, see table below
+ | ^ Processor family, plus or minus for L2 sizes and such (88, 86, 83, 81, 80)
+ ^ Cavium Prefix, sometimes changed for customer specific parts
+
+ Table of Core to Model encoding
+ >= 48 shows xx90
+ >= 44 shows xx88
+ >= 42 shows xx85
+ >= 32 shows xx80
+ >= 24 shows xx70
+ >= 20 shows xx65
+ >= 16 shows xx60
+ = 15 shows xx58
+ = 14 shows xx55
+ = 13 shows xx52
+ = 12 shows xx50
+ = 11 shows xx48
+ = 10 shows xx45
+ = 9 shows xx42
+ = 8 shows xx40
+ = 7 shows xx38
+ = 6 shows xx34
+ = 5 shows xx32
+ = 4 shows xx30
+ = 3 shows xx25
+ = 2 shows xx20
+ = 1 shows xx10
+*/
+
+/* Definition of each SKU table entry for the different dies */
+typedef struct
+{
+ uint8_t fuse_index; /* Index programmed into PNAME fuses to match this entry. Must never change once fused parts ship */
+ const char prefix[4]; /* Prefix before model number, usually "CN". Third letter is customer code shown after the model */
+ uint8_t model_base; /* First two digits of the model number */
+ uint16_t num_balls; /* Number of balls on package, included in SKU */
+ const char segment[4]; /* Market segment SKU is for, 2-3 character string */
+ uint16_t fuses[12]; /* List of fuses required for operation of this SKU */
+} model_sku_info_t;
+
+/* In the model_sku_info_t.fuses[] array, we use a special value
+ FUSES_CHECK_FUSF to represent that we need to check FUSF_CTL bit
+ 6, checking for trusted boot */
+#define FUSES_CHECK_FUSF 0xffff
+
+/***************************************************/
+/* SKU table for t88 */
+/* From "Thunder Part Number fuse overview Rev 16.xlsx" */
+/***************************************************/
+static const model_sku_info_t t88_sku_info[] =
+{
+ /* Index zero reserved for no fuses programmed */
+ { 0x01, "CN", 88, 2601, "AAP", /* 48, 32 cores */
+ { /* List of fuses for this SKU */
+ 0 /* End of fuse list marker */
+ }
+ },
+ { 0x02, "CN", 88, 2601, "AAS", /* 24 cores */
+ { /* List of fuses for this SKU */
+ BDK_MIO_FUS_FUSE_NUM_E_OCX_DIS, /* Disable CCPI */
+ 0 /* End of fuse list marker */
+ }
+ },
+ { 0x03, "CN", 88, 2601, "ST", /* 48, 32 cores */
+ { /* List of fuses for this SKU */
+ BDK_MIO_FUS_FUSE_NUM_E_TNS_CRIPPLE, /* Disable TNS */
+ BDK_MIO_FUS_FUSE_NUM_E_PEM_DISX(0), /* Disable PEM0-1 */
+ BDK_MIO_FUS_FUSE_NUM_E_PEM_DISX(2), /* Disable PEM4-5 */
+ 0 /* End of fuse list marker */
+ }
+ },
+ { 0x04, "CN", 88, 2601, "STT", /* 48 cores */
+ { /* List of fuses for this SKU */
+ BDK_MIO_FUS_FUSE_NUM_E_PEM_DISX(0), /* Disable PEM0-1 */
+ BDK_MIO_FUS_FUSE_NUM_E_PEM_DISX(2), /* Disable PEM4-5 */
+ 0 /* End of fuse list marker */
+ }
+ },
+ { 0x05, "CN", 88, 2601, "STS", /* 24 cores */
+ { /* List of fuses for this SKU */
+ BDK_MIO_FUS_FUSE_NUM_E_LMC_DIS, /* Disable LMC2-3 */
+ BDK_MIO_FUS_FUSE_NUM_E_OCX_DIS, /* Disable CCPI */
+ BDK_MIO_FUS_FUSE_NUM_E_TNS_CRIPPLE, /* Disable TNS */
+ BDK_MIO_FUS_FUSE_NUM_E_PEM_DISX(0), /* Disable PEM0-1 */
+ BDK_MIO_FUS_FUSE_NUM_E_PEM_DISX(2), /* Disable PEM4-5 */
+ BDK_MIO_FUS_FUSE_NUM_E_BGX_DISX(1), /* Disable BGX1 */
+ 0 /* End of fuse list marker */
+ }
+ },
+ { 0x06, "CN", 88, 2601, "STP", /* 48, 32 cores */
+ { /* List of fuses for this SKU */
+ BDK_MIO_FUS_FUSE_NUM_E_TNS_CRIPPLE, /* Disable TNS */
+ 0 /* End of fuse list marker */
+ }
+ },
+ { 0x07, "CN", 88, 2601, "NT", /* 48, 32 cores */
+ { /* List of fuses for this SKU */
+ BDK_MIO_FUS_FUSE_NUM_E_SATA_DISX(0),/* Disable SATA0-3 */
+ BDK_MIO_FUS_FUSE_NUM_E_SATA_DISX(2),/* Disable SATA8-11 */
+ BDK_MIO_FUS_FUSE_NUM_E_SATA_DISX(3),/* Disable SATA12-15 */
+ 0 /* End of fuse list marker */
+ }
+ },
+ { 0x08, "CN", 88, 2601, "NTS", /* 24 cores */
+ { /* List of fuses for this SKU */
+ BDK_MIO_FUS_FUSE_NUM_E_LMC_DIS, /* Disable LMC2-3 */
+ BDK_MIO_FUS_FUSE_NUM_E_OCX_DIS, /* Disable CCPI */
+ BDK_MIO_FUS_FUSE_NUM_E_SATA_DISX(0),/* Disable SATA0-3 */
+ BDK_MIO_FUS_FUSE_NUM_E_SATA_DISX(2),/* Disable SATA8-11 */
+ BDK_MIO_FUS_FUSE_NUM_E_SATA_DISX(3),/* Disable SATA12-15 */
+ BDK_MIO_FUS_FUSE_NUM_E_BGX_DISX(1), /* Disable BGX1 */
+ 0 /* End of fuse list marker */
+ }
+ },
+ { 0x09, "CN", 88, 2601, "NTP", /* 48, 32 cores */
+ { /* List of fuses for this SKU */
+ BDK_MIO_FUS_FUSE_NUM_E_SATA_DISX(0),/* Disable SATA0-3 */
+ BDK_MIO_FUS_FUSE_NUM_E_SATA_DISX(1),/* Disable SATA4-7 */
+ BDK_MIO_FUS_FUSE_NUM_E_SATA_DISX(2),/* Disable SATA8-11 */
+ BDK_MIO_FUS_FUSE_NUM_E_SATA_DISX(3),/* Disable SATA12-15 */
+ 0 /* End of fuse list marker */
+ }
+ },
+ { 0x0a, "CN", 88, 2601, "CP", /* 48,32 cores */
+ { /* List of fuses for this SKU */
+ BDK_MIO_FUS_FUSE_NUM_E_NODFA_CP2, /* Disable HFA */
+ BDK_MIO_FUS_FUSE_NUM_E_RSVD134X(0), /* Disable HNA */
+ BDK_MIO_FUS_FUSE_NUM_E_NOZIP, /* Disable Compression */
+ BDK_MIO_FUS_FUSE_NUM_E_TNS_CRIPPLE, /* Disable TNS */
+ BDK_MIO_FUS_FUSE_NUM_E_SATA_DISX(0),/* Disable SATA0-3 */
+ BDK_MIO_FUS_FUSE_NUM_E_SATA_DISX(2),/* Disable SATA8-11 */
+ BDK_MIO_FUS_FUSE_NUM_E_SATA_DISX(3),/* Disable SATA12-15 */
+ 0 /* End of fuse list marker */
+ }
+ },
+ { 0x0b, "CN", 88, 2601, "CPS", /* 24 cores */
+ { /* List of fuses for this SKU */
+ BDK_MIO_FUS_FUSE_NUM_E_NODFA_CP2, /* Disable HFA */
+ BDK_MIO_FUS_FUSE_NUM_E_RSVD134X(0), /* Disable HNA */
+ BDK_MIO_FUS_FUSE_NUM_E_NOZIP, /* Disable Compression */
+ BDK_MIO_FUS_FUSE_NUM_E_LMC_DIS, /* Disable LMC2-3 */
+ BDK_MIO_FUS_FUSE_NUM_E_OCX_DIS, /* Disable CCPI */
+ BDK_MIO_FUS_FUSE_NUM_E_TNS_CRIPPLE, /* Disable TNS */
+ BDK_MIO_FUS_FUSE_NUM_E_SATA_DISX(0),/* Disable SATA0-3 */
+ BDK_MIO_FUS_FUSE_NUM_E_SATA_DISX(2),/* Disable SATA8-11 */
+ BDK_MIO_FUS_FUSE_NUM_E_SATA_DISX(3),/* Disable SATA12-15 */
+ BDK_MIO_FUS_FUSE_NUM_E_BGX_DISX(1), /* Disable BGX1 */
+ 0 /* End of fuse list marker */
+ }
+ },
+ { 0x0c, "CN", 88, 2601, "SNT", /* 48,32 cores, Nitrox connects to PEM2x8, QLM4-5 */
+ { /* List of fuses for this SKU */
+ BDK_MIO_FUS_FUSE_NUM_E_RSVD231X(0), /* Nitrox 3 is present */
+ 0 /* End of fuse list marker */
+ }
+ },
+ { 0x0d, "CN", 88, 2601, "SC", /* 48,32 cores, Nitrox connects to PEM2x8, QLM4-5 */
+ { /* List of fuses for this SKU */
+ BDK_MIO_FUS_FUSE_NUM_E_RSVD231X(0), /* Nitrox 3 is present */
+ BDK_MIO_FUS_FUSE_NUM_E_NODFA_CP2, /* Disable HFA */
+ BDK_MIO_FUS_FUSE_NUM_E_RSVD134X(0), /* Disable HNA */
+ BDK_MIO_FUS_FUSE_NUM_E_NOZIP, /* Disable Compression */
+ BDK_MIO_FUS_FUSE_NUM_E_TNS_CRIPPLE, /* Disable TNS */
+ 0 /* End of fuse list marker */
+ }
+ },
+ /* Index gap for adding more CN88 variants */
+ { 0x20, "CN", 86, 1676, "AAP", /* No part, match unfused CN86XX */
+ { /* List of fuses for this SKU */
+ BDK_MIO_FUS_FUSE_NUM_E_CHIP_IDX(6), /* Alternate package fuse */
+ 0 /* End of fuse list marker */
+ }
+ },
+ { 0x21, "CN", 86, 1676, "SCP", /* 8 cores */
+ { /* List of fuses for this SKU */
+ BDK_MIO_FUS_FUSE_NUM_E_CHIP_IDX(6), /* Alternate package fuse */
+ BDK_MIO_FUS_FUSE_NUM_E_L2C_CRIPX(1),/* L2C is half size */
+ BDK_MIO_FUS_FUSE_NUM_E_NODFA_CP2, /* Disable HFA */
+ BDK_MIO_FUS_FUSE_NUM_E_RSVD134X(0), /* Disable HNA */
+ BDK_MIO_FUS_FUSE_NUM_E_NOZIP, /* Disable Compression */
+ BDK_MIO_FUS_FUSE_NUM_E_LMC_DIS, /* Disable LMC2-3 */
+ BDK_MIO_FUS_FUSE_NUM_E_OCX_DIS, /* Disable CCPI */
+ BDK_MIO_FUS_FUSE_NUM_E_TNS_CRIPPLE, /* Disable TNS */
+ 0 /* End of fuse list marker */
+ }
+ },
+ {} /* End of SKU list marker */
+};
+
+/***************************************************/
+/* SKU table for t83 */
+/* From "Thunder Part Number fuse overview Rev 16.xlsx" */
+/***************************************************/
+static const model_sku_info_t t83_sku_info[] =
+{
+ /* Index zero reserved for no fuses programmed */
+ { 0x01, "CN", 83, 1676, "SCP", /* 24, 20, 16, 12, 8 cores */
+ { /* List of fuses for this SKU */
+ 0 /* End of fuse list marker */
+ }
+ },
+ { 0x02, "CN", 83, 1676, "CP", /* 24, 20, 16, 12, 8 cores */
+ { /* List of fuses for this SKU */
+ /* Disable all Nitrox cores, CPT0 and CPT1 */
+ BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(0), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(1), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(2), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(3), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(4), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(5), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(6), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(7), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(8), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(9), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(10), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(11), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(12), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(13), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(14), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(15), /* Nitrox */
+ BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(16), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(17), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(18), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(19), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(20), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(21), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(22), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(23), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(24), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(25), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(26), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(27), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(28), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(29), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(30), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(31), /* Nitrox */
+ BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(32), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(33), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(34), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(35), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(36), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(37), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(38), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(39), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(40), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(41), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(42), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(43), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(44), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(45), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(46), /* Nitrox */
+ BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(47), /* Nitrox */
+ BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(0), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(1), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(2), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(3), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(4), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(5), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(6), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(7), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(8), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(9), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(10), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(11), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(12), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(13), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(14), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(15), /* Nitrox */
+ BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(16), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(17), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(18), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(19), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(20), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(21), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(22), /* Nitrox */
+ BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(23), /* Nitrox */
+ 0 /* End of fuse list marker */
+ }
+ },
+ { 0x03, "CN", 83, 1676, "AUS", /* 24, 20, 16, 12, 8 cores */
+ { /* List of fuses for this SKU */
+ FUSES_CHECK_FUSF, /* Trusted boot */
+ 0 /* End of fuse list marker */
+ }
+ },
+ { 0x04, "CN", 82, 1676, "SCP", /* 12, 8 cores */
+ { /* List of fuses for this SKU */
+ BDK_MIO_FUS_FUSE_NUM_E_L2C_CRIPX(1),/* L2C is half size */
+ BDK_MIO_FUS_FUSE_NUM_E_LMC_DIS, /* Disable upper LMC */
+ /* Disable Nitrox cores CPT0[24-47] and CPT1[12-23] */
+ BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(24), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(25), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(26), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(27), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(28), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(29), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(30), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(31), /* Nitrox */
+ BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(32), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(33), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(34), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(35), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(36), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(37), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(38), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(39), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(40), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(41), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(42), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(43), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(44), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(45), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(46), /* Nitrox */
+ BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(47), /* Nitrox */
+ BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(12), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(13), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(14), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(15), /* Nitrox */
+ BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(16), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(17), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(18), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(19), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(20), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(21), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(22), /* Nitrox */
+ BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(23), /* Nitrox */
+ 0 /* End of fuse list marker */
+ }
+ },
+ { 0x05, "CN", 82, 1676, "CP", /* 12, 8 cores */
+ { /* List of fuses for this SKU */
+ BDK_MIO_FUS_FUSE_NUM_E_L2C_CRIPX(1),/* L2C is half size */
+ BDK_MIO_FUS_FUSE_NUM_E_LMC_DIS, /* Disable upper LMC */
+ /* Disable all Nitrox cores, CPT0 and CPT1 */
+ BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(0), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(1), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(2), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(3), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(4), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(5), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(6), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(7), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(8), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(9), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(10), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(11), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(12), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(13), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(14), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(15), /* Nitrox */
+ BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(16), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(17), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(18), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(19), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(20), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(21), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(22), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(23), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(24), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(25), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(26), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(27), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(28), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(29), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(30), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(31), /* Nitrox */
+ BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(32), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(33), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(34), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(35), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(36), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(37), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(38), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(39), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(40), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(41), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(42), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(43), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(44), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(45), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(46), /* Nitrox */
+ BDK_MIO_FUS_FUSE_NUM_E_CPT0_ENG_DISX(47), /* Nitrox */
+ BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(0), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(1), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(2), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(3), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(4), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(5), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(6), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(7), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(8), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(9), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(10), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(11), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(12), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(13), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(14), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(15), /* Nitrox */
+ BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(16), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(17), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(18), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(19), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(20), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(21), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(22), /* Nitrox */
+ BDK_MIO_FUS_FUSE_NUM_E_CPT1_ENG_DISX(23), /* Nitrox */
+ 0 /* End of fuse list marker */
+ }
+ },
+ {} /* End of SKU list marker */
+};
+
+/***************************************************/
+/* SKU table for t81 */
+/* From "Thunder Part Number fuse overview Rev 16.xlsx" */
+/***************************************************/
+static const model_sku_info_t t81_sku_info[] =
+{
+ /* Index zero reserved for no fuses programmed */
+ { 0x01, "CN", 81, 676, "SCP", /* 4, 2 cores */
+ { /* List of fuses for this SKU */
+ /* No fuses */
+ 0 /* End of fuse list marker */
+ }
+ },
+ { 0x02, "CN", 81, 676, "CP", /* 4, 2 cores */
+ { /* List of fuses for this SKU */
+ BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(1), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(2), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(3), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(4), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(5), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(6), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(7), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(8), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(9), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(10), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(11), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(12), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(13), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(14), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(15), /* Nitrox */
+ BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(16), /* Nitrox */
+ 0 /* End of fuse list marker */
+ }
+ },
+ { 0x07, "CN", 81, 676, "AUS", /* 4, 2 cores */
+ { /* List of fuses for this SKU */
+ FUSES_CHECK_FUSF, /* Trusted boot */
+ 0 /* End of fuse list marker */
+ }
+ },
+ { 0x08, "CN", 81, 676, "AUC", /* 4, 2 cores */
+ { /* List of fuses for this SKU */
+ FUSES_CHECK_FUSF, /* Trusted boot */
+ BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(1), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(2), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(3), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(4), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(5), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(6), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(7), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(8), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(9), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(10), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(11), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(12), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(13), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(14), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(15), /* Nitrox */
+ BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(16), /* Nitrox */
+ 0 /* End of fuse list marker */
+ }
+ },
+ { 0x03, "CN", 80, 676, "SCP", /* 4, 2 cores */
+ { /* List of fuses for this SKU */
+ /* Note that CHIP_ID(7) is suppose to be blown, but a few chips
+ have incorrect fuses. We allow CN80XX SKUs with or without
+ CHIP_ID(7) */
+ //BDK_MIO_FUS_FUSE_NUM_E_CHIP_IDX(7), /* Alternate package fuse 2? */
+ BDK_MIO_FUS_FUSE_NUM_E_L2C_CRIPX(1), /* L2C is half size */
+ BDK_MIO_FUS_FUSE_NUM_E_LMC_HALF, /* LMC is half width */
+ 0 /* End of fuse list marker */
+ }
+ },
+ { 0x04, "CN", 80, 676, "CP", /* 4, 2 cores */
+ { /* List of fuses for this SKU */
+ /* Note that CHIP_ID(7) is suppose to be blown, but a few chips
+ have incorrect fuses. We allow CN80XX SKUs with or without
+ CHIP_ID(7) */
+ //BDK_MIO_FUS_FUSE_NUM_E_CHIP_IDX(7), /* Alternate package fuse 2? */
+ BDK_MIO_FUS_FUSE_NUM_E_L2C_CRIPX(1), /* L2C is half size */
+ BDK_MIO_FUS_FUSE_NUM_E_LMC_HALF, /* LMC is half width */
+ BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(1), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(2), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(3), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(4), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(5), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(6), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(7), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(8), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(9), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(10), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(11), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(12), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(13), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(14), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(15), /* Nitrox */
+ BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(16), /* Nitrox */
+ 0 /* End of fuse list marker */
+ }
+ },
+ { 0x05, "CN", 80, 555, "SCP", /* 4, 2 cores */
+ { /* List of fuses for this SKU */
+ BDK_MIO_FUS_FUSE_NUM_E_CHIP_IDX(6), /* Alternate package fuse */
+ BDK_MIO_FUS_FUSE_NUM_E_L2C_CRIPX(1), /* L2C is half size */
+ BDK_MIO_FUS_FUSE_NUM_E_LMC_HALF, /* LMC is half width */
+ 0 /* End of fuse list marker */
+ }
+ },
+ { 0x06, "CN", 80, 555, "CP", /* 4, 2 cores */
+ { /* List of fuses for this SKU */
+ BDK_MIO_FUS_FUSE_NUM_E_CHIP_IDX(6), /* Alternate package fuse */
+ BDK_MIO_FUS_FUSE_NUM_E_L2C_CRIPX(1), /* L2C is half size */
+ BDK_MIO_FUS_FUSE_NUM_E_LMC_HALF, /* LMC is half width */
+ BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(1), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(2), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(3), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(4), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(5), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(6), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(7), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(8), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(9), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(10), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(11), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(12), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(13), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(14), /* Nitrox */
+ //BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(15), /* Nitrox */
+ BDK_MIO_FUS_FUSE_NUM_E_CPT_ENG_DISX(16), /* Nitrox */
+ 0 /* End of fuse list marker */
+ }
+ },
+ {} /* End of SKU list marker */
+};
+
+/***************************************************/
+/* SKU table for t93 */
+/***************************************************/
+static const model_sku_info_t t93_sku_info[] =
+{
+ /* Index zero reserved for no fuses programmed */
+ { 0x01, "CN", 93, 1676, "SCP", /* 24, 20, 16, 12, 8 cores */
+ { /* List of fuses for this SKU */
+ /* No fuses */
+ 0 /* End of fuse list marker */
+ }
+ },
+ {} /* End of SKU list marker */
+};
+
+/**
+ * Given a core count, return the last two digits of a model number
+ *
+ * @param cores Number of cores
+ *
+ * @return Two digit model number
+ */
+static int model_digits_for_cores(int cores)
+{
+ /* If the number of cores is between two model levels, use the lower
+ level. This assumes that a model guarantees a minimum number of
+ cores. This should never happen, but you never know */
+ switch (cores)
+ {
+ case 1: return 10; /* CNxx10 = 1 core */
+ case 2: return 20; /* CNxx20 = 2 cores */
+ case 3: return 25; /* CNxx25 = 3 cores */
+ case 4: return 30; /* CNxx30 = 4 cores */
+ case 5: return 32; /* CNxx32 = 5 cores */
+ case 6: return 34; /* CNxx34 = 6 cores */
+ case 7: return 38; /* CNxx38 = 7 cores */
+ case 8: return 40; /* CNxx40 = 8 cores */
+ case 9: return 42; /* CNxx42 = 9 cores */
+ case 10: return 45; /* CNxx45 = 10 cores */
+ case 11: return 48; /* CNxx48 = 11 cores */
+ case 12: return 50; /* CNxx50 = 12 cores */
+ case 13: return 52; /* CNxx52 = 13 cores */
+ case 14: return 55; /* CNxx55 = 14 cores */
+ case 15: return 58; /* CNxx58 = 15 cores */
+ case 16 ... 19: return 60; /* CNxx60 = 16 cores */
+ case 20 ... 23: return 65; /* CNxx65 = 20 cores */
+ case 24 ... 31: return 70; /* CNxx70 = 24 cores */
+ case 32 ... 39: return 80; /* CNxx80 = 32 cores */
+ case 40 ... 43: return 85; /* CNxx85 = 40 cores */
+ case 44 ... 47: return 88; /* CNxx88 = 44 cores */
+ default: return 90; /* CNxx90 = 48 cores */
+ }
+}
+
+/**
+ * Return non-zero if the die is in an alternate package. The
+ * normal is_model() checks will treat alternate package parts
+ * as all the same, where this function can be used to detect
+ * them. The return value is the upper two bits of
+ * MIO_FUS_DAT2[chip_id]. Most alternate packages use bit 6,
+ * which will return 1 here. Parts with a second alternative
+ * will use bit 7, which will return 2.
+ *
+ * @param arg_model One of the CAVIUM_* constants for chip models and passes
+ *
+ * @return Non-zero if an alternate package
+ * 0 = Normal package
+ * 1 = Alternate package 1 (CN86XX, CN80XX with 555 balls)
+ * 2 = Alternate package 2 (CN80XX with 676 balls)
+ * 3 = Alternate package 3 (Currently unused)
+ */
+int cavium_is_altpkg(uint32_t arg_model)
+{
+ if (CAVIUM_IS_MODEL(arg_model))
+ {
+ BDK_CSR_INIT(mio_fus_dat2, bdk_numa_local(), BDK_MIO_FUS_DAT2);
+ /* Bits 7:6 are used for alternate packages. Return the exact
+ number so multiple alternate packages can be detected
+ (CN80XX is an example) */
+ int altpkg = mio_fus_dat2.s.chip_id >> 6;
+ if (altpkg)
+ return altpkg;
+ /* Due to a documentation mixup, some CN80XX parts do not have chip_id
+ bit 7 set. As a backup, use lmc_mode32 to find these parts. Both
+ bits are suppose to be fused, but some parts only have lmc_mode32 */
+ if (CAVIUM_IS_MODEL(CAVIUM_CN81XX) && mio_fus_dat2.s.lmc_mode32)
+ return 2;
+ return 0;
+ }
+ else
+ return 0;
+}
+
+/**
+ * Return the SKU string for a chip
+ *
+ * @param node Node to get SKU for
+ *
+ * @return Chip's SKU
+ */
+const char* bdk_model_get_sku(int node)
+{
+ /* Storage for SKU is per node. Static variable stores the value
+ so we don't decode on every call */
+ static char chip_sku[BDK_NUMA_MAX_NODES][32] = { { 0, }, };
+
+ /* Return the cached string if we've already filled it in */
+ if (chip_sku[node][0])
+ return chip_sku[node];
+
+ /* Figure out which SKU list to use */
+ const model_sku_info_t *sku_info;
+ uint64_t result;
+ asm ("mrs %[rd],MIDR_EL1" : [rd] "=r" (result));
+ result = bdk_extract(result, 4, 12);
+ switch (result)
+ {
+ case 0xa1:
+ sku_info = t88_sku_info;
+ break;
+ case 0xa2:
+ sku_info = t81_sku_info;
+ break;
+ case 0xa3:
+ sku_info = t83_sku_info;
+ break;
+ case 0xb2:
+ sku_info = t93_sku_info;
+ break;
+ default:
+ bdk_fatal("SKU detect: Unknown die\n");
+ }
+
+ /* Read the SKU index from the PNAME fuses */
+ int match_index = -1;
+ // FIXME: Implement PNAME reads
+
+ /* Search the SKU list for the best match, where all the fuses match.
+ Only needed if the PNAME fuses don't specify the index */
+ if (match_index == -1)
+ {
+ match_index = 0;
+ int match_score = -1;
+ int index = 0;
+ while (sku_info[index].fuse_index)
+ {
+ int score = 0;
+ int fuse_index = 0;
+ /* Count the number of fuses that match. A mismatch forces the worst
+ score (-1) */
+ while (sku_info[index].fuses[fuse_index])
+ {
+ int fuse;
+ /* FUSES_CHECK_FUSF is special for trusted parts */
+ if (sku_info[index].fuses[fuse_index] == FUSES_CHECK_FUSF)
+ {
+ BDK_CSR_INIT(fusf_ctl, node, BDK_FUSF_CTL);
+ fuse = (fusf_ctl.u >> 6) & 1;
+ }
+ else
+ {
+ fuse = bdk_fuse_read(node, sku_info[index].fuses[fuse_index]);
+ }
+ if (fuse)
+ {
+ /* Match, improve the score */
+ score++;
+ }
+ else
+ {
+ /* Mismatch, force score bad */
+ score = -1;
+ break;
+ }
+ fuse_index++;
+ }
+ /* If this score is better than the last match, use this index as the
+ match */
+ if (score > match_score)
+ {
+ match_score = score;
+ match_index = index;
+ }
+ index++;
+ }
+ }
+
+ /* Use the SKU table to determine the defaults for the SKU parts */
+ const char *prefix = sku_info[match_index].prefix;
+ int model = 100 * sku_info[match_index].model_base;
+ int cores = bdk_get_num_cores(node);
+ const char *customer_code = "";
+ int rclk_limit = bdk_clock_get_rate(node, BDK_CLOCK_RCLK) / 1000000;
+ const char *bg_str = "BG"; /* Default Ball Grid array */
+ int balls = sku_info[match_index].num_balls; /* Num package balls */
+ const char *segment = sku_info[match_index].segment; /* Market segment */
+ char prod_phase[4]; /* Blank = production, PR = Prototype, ES = Engineering sample */
+ char prod_rev[5]; /* Product revision */
+ const char *rohs_option = "G"; /* RoHS is always G for current parts */
+
+ /* Update the model number with the number of cores */
+ model = (model / 100) * 100 + model_digits_for_cores(cores);
+
+ /* Update the RCLK setting based on MIO_FUS_DAT3[core_pll_mul] */
+ uint64_t core_pll_mul;
+ if (CAVIUM_IS_MODEL(CAVIUM_CN8XXX))
+ {
+ BDK_CSR_INIT(mio_fus_dat3, node, BDK_MIO_FUS_DAT3);
+ core_pll_mul = mio_fus_dat3.s.core_pll_mul;
+ }
+ else
+ core_pll_mul = bdk_fuse_read_range(bdk_numa_local(), BDK_FUS_FUSE_NUM_E_CORE_MAX_MULX(0), 7);
+
+ if (core_pll_mul)
+ {
+ /* CORE_PLL_MUL covers bits 5:1, so we need to multiple by 2. The
+ documentation doen't mention this clearly: There is a 300Mhz
+ addition to the base multiplier */
+ rclk_limit = core_pll_mul * 2 * 50 + 300;
+ }
+
+ /* FIXME: Hardcode production as there is no way to tell */
+ prod_phase[0] = 0;
+
+ /* Read the Pass information from fuses. Note that pass info in
+ MIO_FUS_DAT2[CHIP_ID] is encoded as
+ bit[7] = Unused, zero
+ bit[6] = Alternate package
+ bit[5..3] = Major pass
+ bit[2..0] = Minor pass */
+ int major_pass;
+ int minor_pass;
+ if (CAVIUM_IS_MODEL(CAVIUM_CN8XXX))
+ {
+ BDK_CSR_INIT(mio_fus_dat2, node, BDK_MIO_FUS_DAT2);
+ major_pass = ((mio_fus_dat2.s.chip_id >> 3) & 7) + 1;
+ minor_pass = mio_fus_dat2.s.chip_id & 7;
+ }
+ else
+ {
+ /* FIXME: We don't support getting the pass for other node on CN9XXX */
+ bdk_ap_midr_el1_t midr_el1;
+ BDK_MRS(MIDR_EL1, midr_el1.u);
+ major_pass = (midr_el1.s.variant & 7) + 1;
+ minor_pass = midr_el1.s.revision;
+ }
+
+ if (major_pass == 1)
+ {
+ /* Pass 1.x is special in that we don't show the implied 'X' */
+ if (minor_pass == 0)
+ {
+ /* Completely blank for 1.0 */
+ prod_rev[0] = 0;
+ }
+ else
+ {
+ /* If we are production and not pass 1.0, the product phase
+ changes from blank to "-P". The product revision then
+ follows the product phase without a '-' */
+ if (prod_phase[0] == 0)
+ {
+ /* Change product phase to "-P" */
+ prod_phase[0] = '-';
+ prod_phase[1] = 'P';
+ prod_phase[2] = 0;
+ }
+ /* No separator between phase and revision */
+ prod_rev[0] = '1';
+ prod_rev[1] = '0' + minor_pass;
+ prod_rev[2] = 0;
+ }
+ }
+ else
+ {
+ /* Pass 2.0 and above 12345678 */
+ const char pass_letter[8] = "XYWVUTSR";
+ prod_rev[0] = '-';
+ prod_rev[1] = pass_letter[major_pass-1];
+ if (minor_pass == 0)
+ {
+ /* Nothing after the letter code */
+ prod_rev[2] = 0;
+ }
+ else
+ {
+ /* Add major and minor after the letter code */
+ prod_rev[2] = '0' + major_pass;
+ prod_rev[3] = '0' + minor_pass;
+ prod_rev[4] = 0;
+ }
+ }
+
+ /* Special check for CN88XX pass 2.0 and 2.1. Documentation mistakenly
+ specified 2.0 as -PR and 2.1 as -Y. Rather than fix the docs, OPs has
+ decided to special case this SKU */
+ if (CAVIUM_IS_MODEL(CAVIUM_CN88XX) && (major_pass == 2))
+ {
+ if (minor_pass == 0)
+ {
+ prod_phase[0] = '-'; /* SKU ends with -PR-Y-G */
+ prod_phase[1] = 'P';
+ prod_phase[2] = 'R';
+ prod_phase[3] = 0;
+ }
+ else if (minor_pass == 1)
+ {
+ prod_rev[0] = '-'; /* SKU ends with -Y-G */
+ prod_rev[1] = 'Y';
+ prod_rev[2] = 0;
+ }
+ }
+
+ /* Read PNAME fuses, looking for SKU overrides */
+ // FIXME: Implement PNAME reads
+
+ /* Build the SKU string */
+ snprintf(chip_sku[node], sizeof(chip_sku[node]), "%s%d%s-%d%s%d-%s%s%s-%s",
+ prefix, model, customer_code, rclk_limit, bg_str, balls, segment,
+ prod_phase, prod_rev, rohs_option);
+
+ return chip_sku[node];
+}
diff --git a/src/vendorcode/cavium/bdk/libbdk-arch/bdk-numa.c b/src/vendorcode/cavium/bdk/libbdk-arch/bdk-numa.c
new file mode 100644
index 0000000000..33d34ba669
--- /dev/null
+++ b/src/vendorcode/cavium/bdk/libbdk-arch/bdk-numa.c
@@ -0,0 +1,91 @@
+/***********************license start***********************************
+* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights
+* reserved.
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are
+* met:
+*
+* * Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials provided
+* with the distribution.
+*
+* * Neither the name of Cavium Inc. nor the names of
+* its contributors may be used to endorse or promote products
+* derived from this software without specific prior written
+* permission.
+*
+* This Software, including technical data, may be subject to U.S. export
+* control laws, including the U.S. Export Administration Act and its
+* associated regulations, and may be subject to export or import
+* regulations in other countries.
+*
+* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
+* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR
+* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT
+* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY
+* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT
+* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES
+* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR
+* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT,
+* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK
+* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
+***********************license end**************************************/
+#include <bdk.h>
+#include <stdio.h>
+
+int __bdk_numa_master_node = -1; /* Which node is the master */
+static int __bdk_numa_exists_mask = 0; /* Bitmask of nodes that exist */
+static bdk_spinlock_t __bdk_numa_lock;
+
+/**
+ * Get a bitmask of the nodes that exist
+ *
+ * @return bitmask
+ */
+uint64_t bdk_numa_get_exists_mask(void)
+{
+ return __bdk_numa_exists_mask;
+}
+
+/**
+ * Add a node to the exists mask
+ *
+ * @param node Node to add
+ */
+void bdk_numa_set_exists(bdk_node_t node)
+{
+ bdk_spinlock_lock(&__bdk_numa_lock);
+ __bdk_numa_exists_mask |= 1 << node;
+ if (__bdk_numa_master_node == -1)
+ __bdk_numa_master_node = node;
+ bdk_spinlock_unlock(&__bdk_numa_lock);
+}
+
+/**
+ * Return true if a node exists
+ *
+ * @param node Node to check
+ *
+ * @return Non zero if the node exists
+ */
+int bdk_numa_exists(bdk_node_t node)
+{
+ return __bdk_numa_exists_mask & (1 << node);
+}
+
+/**
+ * Return true if there is only one node
+ *
+ * @return
+ */
+extern int bdk_numa_is_only_one()
+{
+ return __bdk_numa_exists_mask == 1;
+}
+
diff --git a/src/vendorcode/cavium/bdk/libbdk-arch/bdk-platform.c b/src/vendorcode/cavium/bdk/libbdk-arch/bdk-platform.c
new file mode 100644
index 0000000000..8cac04a214
--- /dev/null
+++ b/src/vendorcode/cavium/bdk/libbdk-arch/bdk-platform.c
@@ -0,0 +1,59 @@
+/***********************license start***********************************
+* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights
+* reserved.
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are
+* met:
+*
+* * Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials provided
+* with the distribution.
+*
+* * Neither the name of Cavium Inc. nor the names of
+* its contributors may be used to endorse or promote products
+* derived from this software without specific prior written
+* permission.
+*
+* This Software, including technical data, may be subject to U.S. export
+* control laws, including the U.S. Export Administration Act and its
+* associated regulations, and may be subject to export or import
+* regulations in other countries.
+*
+* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
+* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR
+* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT
+* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY
+* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT
+* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES
+* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR
+* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT,
+* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK
+* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
+***********************license end**************************************/
+#include <bdk.h>
+#include "libbdk-arch/bdk-csrs-ocla.h"
+
+bdk_platform_t __bdk_platform;
+
+void __bdk_platform_init()
+{
+ BDK_CSR_INIT(c, bdk_numa_master(), BDK_OCLAX_CONST(0));
+ if (c.u == 0)
+ {
+ __bdk_platform = BDK_PLATFORM_ASIM;
+ }
+ else
+ {
+ int plat2 = bdk_fuse_read(bdk_numa_master(), 197);
+ int plat1 = bdk_fuse_read(bdk_numa_master(), 196);
+ int plat0 = bdk_fuse_read(bdk_numa_master(), 195);
+ __bdk_platform = (plat2 << 2) | (plat1 << 1) | plat0;
+ }
+}
+
diff --git a/src/vendorcode/cavium/bdk/libbdk-boot/bdk-boot-status.c b/src/vendorcode/cavium/bdk/libbdk-boot/bdk-boot-status.c
new file mode 100644
index 0000000000..83ab14cbc7
--- /dev/null
+++ b/src/vendorcode/cavium/bdk/libbdk-boot/bdk-boot-status.c
@@ -0,0 +1,81 @@
+/***********************license start***********************************
+* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights
+* reserved.
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are
+* met:
+*
+* * Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials provided
+* with the distribution.
+*
+* * Neither the name of Cavium Inc. nor the names of
+* its contributors may be used to endorse or promote products
+* derived from this software without specific prior written
+* permission.
+*
+* This Software, including technical data, may be subject to U.S. export
+* control laws, including the U.S. Export Administration Act and its
+* associated regulations, and may be subject to export or import
+* regulations in other countries.
+*
+* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
+* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR
+* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT
+* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY
+* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT
+* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES
+* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR
+* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT,
+* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK
+* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
+***********************license end**************************************/
+#include <bdk.h>
+#include "libbdk-arch/bdk-csrs-mio_tws.h"
+
+/**
+ * Report boot status to the BMC or whomever might care. This function
+ * will return quickly except for a status of "power cycle". In the power cycle
+ * case it is assumed the board is in a bad state and should not continue until
+ * a power cycle restarts us.
+ *
+ * @param status Status to report. Enumerated in bdk_boot_status_t
+ */
+void bdk_boot_status(bdk_boot_status_t status)
+{
+ bdk_node_t node = bdk_numa_master();
+ int twsi = bdk_config_get_int(BDK_CONFIG_BMC_TWSI);
+
+ /* Update status */
+ if (twsi != -1)
+ {
+ BDK_CSR_DEFINE(sw_twsi, BDK_MIO_TWSX_SW_TWSI(twsi));
+ sw_twsi.u = 0;
+ sw_twsi.s.v = 1; /* Valid data */
+ sw_twsi.s.slonly = 1; /* Slave only */
+ sw_twsi.s.data = status;
+ BDK_CSR_WRITE(node, BDK_MIO_TWSX_SW_TWSI(twsi), sw_twsi.u);
+ }
+
+ /* As a special case, power cycle will display a message and try a
+ soft reset if we can't power cycle in 5 seconds */
+ if (status == BDK_BOOT_STATUS_REQUEST_POWER_CYCLE)
+ {
+ if (twsi != -1)
+ {
+ printf("Requested power cycle\n");
+ bdk_wait_usec(5000000); /* 5 sec */
+ printf("Power cycle failed, trying soft reset\n");
+ }
+ else
+ printf("Performing soft reset\n");
+ bdk_reset_chip(node);
+ }
+}
+
diff --git a/src/vendorcode/cavium/bdk/libbdk-boot/bdk-watchdog.c b/src/vendorcode/cavium/bdk/libbdk-boot/bdk-watchdog.c
new file mode 100644
index 0000000000..48f955a7ef
--- /dev/null
+++ b/src/vendorcode/cavium/bdk/libbdk-boot/bdk-watchdog.c
@@ -0,0 +1,108 @@
+/***********************license start***********************************
+* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights
+* reserved.
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are
+* met:
+*
+* * Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials provided
+* with the distribution.
+*
+* * Neither the name of Cavium Inc. nor the names of
+* its contributors may be used to endorse or promote products
+* derived from this software without specific prior written
+* permission.
+*
+* This Software, including technical data, may be subject to U.S. export
+* control laws, including the U.S. Export Administration Act and its
+* associated regulations, and may be subject to export or import
+* regulations in other countries.
+*
+* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
+* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR
+* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT
+* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY
+* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT
+* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES
+* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR
+* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT,
+* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK
+* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
+***********************license end**************************************/
+#include <bdk.h>
+#include "libbdk-arch/bdk-csrs-gti.h"
+
+/**
+ * Setup the watchdog to expire in timeout_ms milliseconds. When the watchdog
+ * expires, the chip three things happen:
+ * 1) Expire 1: interrupt that is ignored by the BDK
+ * 2) Expire 2: DEL3T interrupt, which is disabled and ignored
+ * 3) Expire 3: Soft reset of the chip
+ *
+ * Since we want a soft reset, we actually program the watchdog to expire at
+ * the timeout / 3.
+ *
+ * @param timeout_ms Timeout in milliseconds. If this is zero, the timeout is taken from the
+ * global configuration option BDK_BRD_CFG_WATCHDOG_TIMEOUT
+ */
+void bdk_watchdog_set(unsigned int timeout_ms)
+{
+ if (timeout_ms == 0)
+ timeout_ms = bdk_config_get_int(BDK_CONFIG_WATCHDOG_TIMEOUT);
+
+ if (timeout_ms > 0)
+ {
+ uint64_t sclk = bdk_clock_get_rate(bdk_numa_local(), BDK_CLOCK_SCLK);
+ uint64_t timeout_sclk = sclk * timeout_ms / 1000;
+ /* Per comment above, we want the watchdog to expire at 3x the rate specified */
+ timeout_sclk /= 3;
+ /* Watchdog counts in 1024 cycle steps */
+ uint64_t timeout_wdog = timeout_sclk >> 10;
+ /* We can only specify the upper 16 bits of a 24 bit value. Round up */
+ timeout_wdog = (timeout_wdog + 0xff) >> 8;
+ /* If the timeout overflows the hardware limit, set max */
+ if (timeout_wdog >= 0x10000)
+ timeout_wdog = 0xffff;
+
+ BDK_TRACE(INIT, "Watchdog: Set to expire %lu SCLK cycles\n", timeout_wdog << 18);
+ BDK_CSR_MODIFY(c, bdk_numa_local(), BDK_GTI_CWD_WDOGX(bdk_get_core_num()),
+ c.s.len = timeout_wdog;
+ c.s.mode = 3);
+ }
+}
+
+/**
+ * Signal the watchdog that we are still running
+ */
+void bdk_watchdog_poke(void)
+{
+ BDK_CSR_WRITE(bdk_numa_local(), BDK_GTI_CWD_POKEX(bdk_get_core_num()), 0);
+}
+
+/**
+ * Disable the hardware watchdog
+ */
+void bdk_watchdog_disable(void)
+{
+ BDK_CSR_WRITE(bdk_numa_local(), BDK_GTI_CWD_WDOGX(bdk_get_core_num()), 0);
+ BDK_TRACE(INIT, "Watchdog: Disabled\n");
+}
+
+/**
+ * Return true if the watchdog is configured and running
+ *
+ * @return Non-zero if watchdog is running
+ */
+int bdk_watchdog_is_running(void)
+{
+ BDK_CSR_INIT(wdog, bdk_numa_local(), BDK_GTI_CWD_WDOGX(bdk_get_core_num()));
+ return wdog.s.mode != 0;
+}
+
diff --git a/src/vendorcode/cavium/bdk/libbdk-dram/bdk-dram-address.c b/src/vendorcode/cavium/bdk/libbdk-dram/bdk-dram-address.c
new file mode 100644
index 0000000000..94d7d76752
--- /dev/null
+++ b/src/vendorcode/cavium/bdk/libbdk-dram/bdk-dram-address.c
@@ -0,0 +1,183 @@
+/***********************license start***********************************
+* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights
+* reserved.
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are
+* met:
+*
+* * Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials provided
+* with the distribution.
+*
+* * Neither the name of Cavium Inc. nor the names of
+* its contributors may be used to endorse or promote products
+* derived from this software without specific prior written
+* permission.
+*
+* This Software, including technical data, may be subject to U.S. export
+* control laws, including the U.S. Export Administration Act and its
+* associated regulations, and may be subject to export or import
+* regulations in other countries.
+*
+* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
+* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR
+* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT
+* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY
+* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT
+* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES
+* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR
+* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT,
+* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK
+* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
+***********************license end**************************************/
+#include <bdk.h>
+#include "libbdk-arch/bdk-csrs-l2c.h"
+
+#define EXTRACT(v, lsb, width) (((v) >> (lsb)) & ((1ull << (width)) - 1))
+#define INSERT(a, v, lsb, width) a|=(((v) & ((1ull << (width)) - 1)) << (lsb))
+
+/**
+ * Given a physical DRAM address, extract information about the node, LMC, DIMM,
+ * prank, lrank, bank, row, and column that was accessed.
+ *
+ * @param address Physical address to decode
+ * @param node Node the address was for
+ * @param lmc LMC controller the address was for
+ * @param dimm DIMM the address was for
+ * @param prank Physical RANK on the DIMM
+ * @param lrank Logical RANK on the DIMM
+ * @param bank BANK on the DIMM
+ * @param row Row on the DIMM
+ * @param col Column on the DIMM
+ */
+void
+bdk_dram_address_extract_info(uint64_t address, int *node, int *lmc, int *dimm,
+ int *prank, int *lrank, int *bank, int *row, int *col)
+{
+ int bitno = CAVIUM_IS_MODEL(CAVIUM_CN83XX) ? 19 : 20;
+ *node = EXTRACT(address, 40, 2); /* Address bits [41:40] */
+ /* Determine the LMC controller */
+ BDK_CSR_INIT(l2c_ctl, *node, BDK_L2C_CTL);
+ int bank_lsb, xbits;
+
+ /* xbits depends on number of LMCs */
+ xbits = __bdk_dram_get_num_lmc(*node) >> 1; // 4->2; 2->1; 1->0
+ bank_lsb = 7 + xbits;
+
+ /* LMC number is probably aliased */
+ if (l2c_ctl.s.disidxalias)
+ *lmc = EXTRACT(address, 7, xbits);
+ else
+ *lmc = EXTRACT(address, 7, xbits) ^ EXTRACT(address, bitno, xbits) ^ EXTRACT(address, 12, xbits);
+
+ /* Figure out the bank field width */
+ BDK_CSR_INIT(lmcx_config, *node, BDK_LMCX_CONFIG(*lmc));
+ int bank_width = __bdk_dram_get_num_bank_bits(*node, *lmc);
+
+ /* Extract additional info from the LMC_CONFIG CSR */
+ BDK_CSR_INIT(ext_config, *node, BDK_LMCX_EXT_CONFIG(*lmc));
+ int dimm_lsb = 28 + lmcx_config.s.pbank_lsb + xbits;
+ int dimm_width = 40 - dimm_lsb;
+ int prank_lsb = dimm_lsb - lmcx_config.s.rank_ena;
+ int prank_width = dimm_lsb - prank_lsb;
+ int lrank_lsb = prank_lsb - ext_config.s.dimm0_cid;
+ int lrank_width = prank_lsb - lrank_lsb;
+ int row_lsb = 14 + lmcx_config.s.row_lsb + xbits;
+ int row_width = lrank_lsb - row_lsb;
+ int col_hi_lsb = bank_lsb + bank_width;
+ int col_hi_width= row_lsb - col_hi_lsb;
+
+ /* Extract the parts of the address */
+ *dimm = EXTRACT(address, dimm_lsb, dimm_width);
+ *prank = EXTRACT(address, prank_lsb, prank_width);
+ *lrank = EXTRACT(address, lrank_lsb, lrank_width);
+ *row = EXTRACT(address, row_lsb, row_width);
+
+ /* bank calculation may be aliased... */
+ BDK_CSR_INIT(lmcx_control, *node, BDK_LMCX_CONTROL(*lmc));
+ if (lmcx_control.s.xor_bank)
+ *bank = EXTRACT(address, bank_lsb, bank_width) ^ EXTRACT(address, 12 + xbits, bank_width);
+ else
+ *bank = EXTRACT(address, bank_lsb, bank_width);
+
+ /* LMC number already extracted */
+ int col_hi = EXTRACT(address, col_hi_lsb, col_hi_width);
+ *col = EXTRACT(address, 3, 4) | (col_hi << 4);
+ /* Bus byte is address bits [2:0]. Unused here */
+}
+
+/**
+ * Construct a physical address given the node, LMC, DIMM, prank, lrank, bank, row, and column.
+ *
+ * @param node Node the address was for
+ * @param lmc LMC controller the address was for
+ * @param dimm DIMM the address was for
+ * @param prank Physical RANK on the DIMM
+ * @param lrank Logical RANK on the DIMM
+ * @param bank BANK on the DIMM
+ * @param row Row on the DIMM
+ * @param col Column on the DIMM
+ */
+uint64_t
+bdk_dram_address_construct_info(bdk_node_t node, int lmc, int dimm,
+ int prank, int lrank, int bank, int row, int col)
+
+{
+ uint64_t address = 0;
+ int bitno = CAVIUM_IS_MODEL(CAVIUM_CN83XX) ? 19 : 20;
+
+ // insert node bits
+ INSERT(address, node, 40, 2); /* Address bits [41:40] */
+
+ /* xbits depends on number of LMCs */
+ int xbits = __bdk_dram_get_num_lmc(node) >> 1; // 4->2; 2->1; 1->0
+ int bank_lsb = 7 + xbits;
+
+ /* Figure out the bank field width */
+ int bank_width = __bdk_dram_get_num_bank_bits(node, lmc);
+
+ /* Extract additional info from the LMC_CONFIG CSR */
+ BDK_CSR_INIT(lmcx_config, node, BDK_LMCX_CONFIG(lmc));
+ BDK_CSR_INIT(ext_config, node, BDK_LMCX_EXT_CONFIG(lmc));
+ int dimm_lsb = 28 + lmcx_config.s.pbank_lsb + xbits;
+ int dimm_width = 40 - dimm_lsb;
+ int prank_lsb = dimm_lsb - lmcx_config.s.rank_ena;
+ int prank_width = dimm_lsb - prank_lsb;
+ int lrank_lsb = prank_lsb - ext_config.s.dimm0_cid;
+ int lrank_width = prank_lsb - lrank_lsb;
+ int row_lsb = 14 + lmcx_config.s.row_lsb + xbits;
+ int row_width = lrank_lsb - row_lsb;
+ int col_hi_lsb = bank_lsb + bank_width;
+ int col_hi_width = row_lsb - col_hi_lsb;
+
+ /* Insert some other parts of the address */
+ INSERT(address, dimm, dimm_lsb, dimm_width);
+ INSERT(address, prank, prank_lsb, prank_width);
+ INSERT(address, lrank, lrank_lsb, lrank_width);
+ INSERT(address, row, row_lsb, row_width);
+ INSERT(address, col >> 4, col_hi_lsb, col_hi_width);
+ INSERT(address, col, 3, 4);
+
+ /* bank calculation may be aliased... */
+ BDK_CSR_INIT(lmcx_control, node, BDK_LMCX_CONTROL(lmc));
+ int new_bank = bank;
+ if (lmcx_control.s.xor_bank)
+ new_bank ^= EXTRACT(address, 12 + xbits, bank_width);
+ INSERT(address, new_bank, bank_lsb, bank_width);
+
+ /* Determine the actual C bits from the input LMC controller arg */
+ /* The input LMC number was probably aliased with other fields */
+ BDK_CSR_INIT(l2c_ctl, node, BDK_L2C_CTL);
+ int new_lmc = lmc;
+ if (!l2c_ctl.s.disidxalias)
+ new_lmc ^= EXTRACT(address, bitno, xbits) ^ EXTRACT(address, 12, xbits);
+ INSERT(address, new_lmc, 7, xbits);
+
+ return address;
+}
diff --git a/src/vendorcode/cavium/bdk/libbdk-dram/bdk-dram-config.c b/src/vendorcode/cavium/bdk/libbdk-dram/bdk-dram-config.c
new file mode 100644
index 0000000000..3465c5d98b
--- /dev/null
+++ b/src/vendorcode/cavium/bdk/libbdk-dram/bdk-dram-config.c
@@ -0,0 +1,163 @@
+/***********************license start***********************************
+* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights
+* reserved.
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are
+* met:
+*
+* * Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials provided
+* with the distribution.
+*
+* * Neither the name of Cavium Inc. nor the names of
+* its contributors may be used to endorse or promote products
+* derived from this software without specific prior written
+* permission.
+*
+* This Software, including technical data, may be subject to U.S. export
+* control laws, including the U.S. Export Administration Act and its
+* associated regulations, and may be subject to export or import
+* regulations in other countries.
+*
+* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
+* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR
+* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT
+* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY
+* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT
+* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES
+* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR
+* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT,
+* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK
+* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
+***********************license end**************************************/
+#include <bdk.h>
+#include <unistd.h>
+
+BDK_REQUIRE_DEFINE(DRAM_CONFIG);
+
+/**
+ * Lookup a DRAM configuration by name and initialize DRAM using it
+ *
+ * @param node Node to configure
+ * @param ddr_clock_override
+ * If non zero, override the DRAM frequency specified
+ * in the config with this value
+ *
+ * @return Amount of DRAM in MB, or negative on failure
+ */
+int bdk_dram_config(int node, int ddr_clock_override)
+{
+ const dram_config_t *config = libdram_config_load(node);
+ if (!config)
+ {
+ printf("N%d: No DRAM config specified, skipping DRAM init\n", node);
+ return 0;
+ }
+
+ BDK_TRACE(DRAM, "N%d: Starting DRAM init (config=%p, ddr_clock_override=%d)\n", node, config, ddr_clock_override);
+ int mbytes = libdram_config(node, config, ddr_clock_override);
+ BDK_TRACE(DRAM, "N%d: DRAM init returned %d\n", node, mbytes);
+ if (mbytes <= 0)
+ {
+ printf("ERROR: DDR initialization failed\n");
+ return -1;
+ }
+
+ return mbytes;
+}
+
+/**
+ * Do DRAM configuration tuning
+ *
+ * @param node Node to tune
+ *
+ * @return Success or Fail
+ */
+int bdk_dram_tune(int node)
+{
+ int ret;
+ BDK_TRACE(DRAM, "N%d: Starting DRAM tuning\n", node);
+ ret = libdram_tune(node);
+ BDK_TRACE(DRAM, "N%d: DRAM tuning returned %d\n", node, ret);
+ return ret;
+}
+
+/**
+ * Do all the DRAM Margin tests
+ *
+ * @param node Node to test
+ *
+ * @return Success or Fail
+ */
+void bdk_dram_margin(int node)
+{
+ BDK_TRACE(DRAM, "N%d: Starting DRAM margining\n", node);
+ libdram_margin(node);
+ BDK_TRACE(DRAM, "N%d: Finished DRAM margining.\n", node);
+ return;
+}
+
+/**
+ * Return the string of the DRAM configuration info at the specified node.
+ * If the node is not configured, NULL is returned.
+ *
+ * @param node node to retrieve
+ *
+ * @return string or NULL
+ */
+const char* bdk_dram_get_info_string(int node)
+{
+ #define INFO_STRING_LEN 40
+ static char info_string[INFO_STRING_LEN];
+ static const char *info_ptr = info_string;
+
+ snprintf(info_string, INFO_STRING_LEN,
+ " %ld MB, %ld MT/s, %s %s",
+ bdk_dram_get_size_mbytes(node),
+ bdk_config_get_int(BDK_CONFIG_DDR_SPEED, node),
+ (__bdk_dram_is_ddr4(node, 0)) ? "DDR4" : "DDR3",
+ (__bdk_dram_is_rdimm(node, 0)) ? "RDIMM" : "UDIMM");
+
+ return info_ptr;
+}
+
+
+/**
+ * Return the highest address currently used by the BDK. This address will
+ * be about 4MB above the top of the BDK to make sure small growths between the
+ * call and its use don't cause corruption. Any call to memory allocation can
+ * change this value.
+ *
+ * @return Size of the BDK in bytes
+ */
+uint64_t bdk_dram_get_top_of_bdk(void)
+{
+ /* Make sure the start address is higher that the BDK's active range.
+ *
+ * As sbrk() returns a node address, mask off the node portion of
+ * the address to make it a physical offset. Doing this simplifies the
+ * address checks and calculations which only work with physical offsets.
+ */
+ uint64_t top_of_bdk = (bdk_ptr_to_phys(sbrk(0)) & bdk_build_mask(40));
+ uint64_t l2_size = bdk_l2c_get_cache_size_bytes(bdk_numa_master());
+ if (top_of_bdk <= l2_size)
+ {
+ /* Early BDK code takes care of the first L2 sized area of memory */
+ top_of_bdk = l2_size;
+ }
+ else
+ {
+ /* Give 4MB of extra so the BDK has room to grow */
+ top_of_bdk += 4 << 20;
+ /* Align it on a 64KB boundary */
+ top_of_bdk >>= 16;
+ top_of_bdk <<= 16;
+ }
+ return top_of_bdk;
+}
diff --git a/src/vendorcode/cavium/bdk/libbdk-dram/bdk-dram-size.c b/src/vendorcode/cavium/bdk/libbdk-dram/bdk-dram-size.c
new file mode 100644
index 0000000000..122afb2a18
--- /dev/null
+++ b/src/vendorcode/cavium/bdk/libbdk-dram/bdk-dram-size.c
@@ -0,0 +1,213 @@
+/***********************license start***********************************
+* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights
+* reserved.
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are
+* met:
+*
+* * Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials provided
+* with the distribution.
+*
+* * Neither the name of Cavium Inc. nor the names of
+* its contributors may be used to endorse or promote products
+* derived from this software without specific prior written
+* permission.
+*
+* This Software, including technical data, may be subject to U.S. export
+* control laws, including the U.S. Export Administration Act and its
+* associated regulations, and may be subject to export or import
+* regulations in other countries.
+*
+* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
+* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR
+* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT
+* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY
+* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT
+* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES
+* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR
+* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT,
+* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK
+* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
+***********************license end**************************************/
+#include <bdk.h>
+
+/**
+ * Return the number of LMC controllers in use
+ *
+ * @param node Node to probe
+ *
+ * @return 2 or 4 depending on the mode
+ */
+int __bdk_dram_get_num_lmc(bdk_node_t node)
+{
+ if (CAVIUM_IS_MODEL(CAVIUM_CN88XX))
+ {
+ BDK_CSR_INIT(lmcx_dll_ctl2, node, BDK_LMCX_DLL_CTL2(2)); // sample LMC2
+ return (lmcx_dll_ctl2.s.intf_en) ? 4 : 2;
+ }
+ else if (CAVIUM_IS_MODEL(CAVIUM_CN83XX))
+ {
+ BDK_CSR_INIT(lmcx_dll_ctl1, node, BDK_LMCX_DLL_CTL2(1)); // sample LMC1
+ return (lmcx_dll_ctl1.s.intf_en) ? 2 : 1;
+ }
+ else if (CAVIUM_IS_MODEL(CAVIUM_CN81XX))
+ {
+ return 1;
+ }
+ else if (CAVIUM_IS_MODEL(CAVIUM_CN93XX))
+ {
+ BDK_CSR_INIT(lmcx_dll_ctl1, node, BDK_LMCX_DLL_CTL2(2));
+ if (lmcx_dll_ctl1.s.intf_en)
+ return 3;
+ lmcx_dll_ctl1.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL2(1));
+ return (lmcx_dll_ctl1.s.intf_en) ? 2 : 1;
+ }
+ bdk_error("__bdk_dram_get_num_lmc() needs update for this chip\n");
+ return 1;
+}
+
+/**
+ * Return whether the node/LMC is in DRESET
+ *
+ * @param node Node to probe
+ * @param node LMC to probe
+ *
+ * @return 1 or 0
+ */
+static int __bdk_dram_is_lmc_in_dreset(bdk_node_t node, int lmc)
+{
+ BDK_CSR_INIT(lmcx_dll_ctl2, node, BDK_LMCX_DLL_CTL2(lmc)); // can always read this
+ return (lmcx_dll_ctl2.s.dreset != 0) ? 1 : 0;
+}
+
+/**
+ * Return a mask of the number of row bits in use
+ *
+ * @param node Node to probe
+ *
+ */
+uint32_t __bdk_dram_get_row_mask(bdk_node_t node, int lmc)
+{
+ // PROTECT!!!
+ if (__bdk_dram_is_lmc_in_dreset(node, lmc)) // check LMCn
+ return 0;
+ BDK_CSR_INIT(lmcx_config, node, BDK_LMCX_CONFIG(lmc)); // sample LMCn
+ int numbits = 14 + lmcx_config.s.pbank_lsb - lmcx_config.s.row_lsb - lmcx_config.s.rank_ena;
+ return ((1ul << numbits) - 1);
+}
+
+/**
+ * Return a mask of the number of column bits in use
+ *
+ * @param node Node to probe
+ *
+ */
+uint32_t __bdk_dram_get_col_mask(bdk_node_t node, int lmc)
+{
+ // PROTECT!!!
+ if (__bdk_dram_is_lmc_in_dreset(node, lmc)) // check LMCn
+ return 0;
+ BDK_CSR_INIT(lmcx_config, node, BDK_LMCX_CONFIG(lmc)); // sample LMCn
+ int numbits = 11 + lmcx_config.s.row_lsb - __bdk_dram_get_num_bank_bits(node, lmc);
+ return ((1ul << numbits) - 1);
+}
+
+/**
+ * Return the number of bank bits in use
+ *
+ * @param node Node to probe
+ *
+ */
+// all DDR3, and DDR4 x16 today, use only 3 bank bits; DDR4 x4 and x8 always have 4 bank bits
+// NOTE: this will change in the future, when DDR4 x16 devices can come with 16 banks!! FIXME!!
+int __bdk_dram_get_num_bank_bits(bdk_node_t node, int lmc)
+{
+ // PROTECT!!!
+ if (__bdk_dram_is_lmc_in_dreset(node, lmc)) // check LMCn
+ return 0;
+ BDK_CSR_INIT(lmcx_config, node, BDK_LMCX_CONFIG(lmc)); // sample LMCn
+ int bank_width = (__bdk_dram_is_ddr4(node, lmc) && (lmcx_config.s.bg2_enable)) ? 4 : 3;
+ return bank_width;
+}
+
+/**
+ * Return whether the node has DDR3 or DDR4 DRAM
+ *
+ * @param node Node to probe
+ *
+ * @return 0 (DDR3) or 1 (DDR4)
+ */
+int __bdk_dram_is_ddr4(bdk_node_t node, int lmc)
+{
+ // PROTECT!!!
+ if (__bdk_dram_is_lmc_in_dreset(node, lmc)) // check LMCn
+ return 0;
+ if (CAVIUM_IS_MODEL(CAVIUM_CN9XXX))
+ return 1;
+ BDK_CSR_INIT(lmcx_ddr_pll_ctl, node, BDK_LMCX_DDR_PLL_CTL(lmc)); // sample LMCn
+ return (lmcx_ddr_pll_ctl.cn83xx.ddr4_mode != 0);
+}
+
+/**
+ * Return whether the node has Registered DIMMs or Unbuffered DIMMs
+ *
+ * @param node Node to probe
+ *
+ * @return 0 (Unbuffered) or 1 (Registered)
+ */
+int __bdk_dram_is_rdimm(bdk_node_t node, int lmc)
+{
+ // PROTECT!!!
+ if (__bdk_dram_is_lmc_in_dreset(node, lmc)) // check LMCn
+ return 0;
+ BDK_CSR_INIT(lmcx_control, node, BDK_LMCX_CONTROL(lmc)); // sample LMCn
+ return (lmcx_control.s.rdimm_ena != 0);
+}
+
+/**
+ * Get the amount of DRAM configured for a node. This is read from the LMC
+ * controller after DRAM is setup.
+ *
+ * @param node Node to query
+ *
+ * @return Size in megabytes
+ */
+uint64_t bdk_dram_get_size_mbytes(int node)
+{
+ if (bdk_is_platform(BDK_PLATFORM_EMULATOR))
+ return 2 << 10; /* 2GB is available on t88 and t81
+ ** some t83 models have 8gb, but it is too long to init */
+ /* Return zero if dram isn't enabled */
+ if (!__bdk_is_dram_enabled(node))
+ return 0;
+
+ uint64_t memsize = 0;
+ const int num_dram_controllers = __bdk_dram_get_num_lmc(node);
+ for (int lmc = 0; lmc < num_dram_controllers; lmc++)
+ {
+ if (bdk_is_platform(BDK_PLATFORM_ASIM))
+ {
+ /* Asim doesn't simulate the rank detection, fake 4GB per controller */
+ memsize += 4ull << 30;
+ }
+ else
+ {
+ // PROTECT!!!
+ if (__bdk_dram_is_lmc_in_dreset(node, lmc)) // check LMCn
+ return 0;
+ BDK_CSR_INIT(lmcx_config, node, BDK_LMCX_CONFIG(lmc));
+ int num_ranks = bdk_pop(lmcx_config.s.init_status);
+ uint64_t rank_size = 1ull << (28 + lmcx_config.s.pbank_lsb - lmcx_config.s.rank_ena);
+ memsize += rank_size * num_ranks;
+ }
+ }
+ return memsize >> 20;
+}
+
diff --git a/src/vendorcode/cavium/bdk/libbdk-dram/bdk-dram-test-addrbus.c b/src/vendorcode/cavium/bdk/libbdk-dram/bdk-dram-test-addrbus.c
new file mode 100644
index 0000000000..9fe8570454
--- /dev/null
+++ b/src/vendorcode/cavium/bdk/libbdk-dram/bdk-dram-test-addrbus.c
@@ -0,0 +1,115 @@
+/***********************license start***********************************
+* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights
+* reserved.
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are
+* met:
+*
+* * Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials provided
+* with the distribution.
+*
+* * Neither the name of Cavium Inc. nor the names of
+* its contributors may be used to endorse or promote products
+* derived from this software without specific prior written
+* permission.
+*
+* This Software, including technical data, may be subject to U.S. export
+* control laws, including the U.S. Export Administration Act and its
+* associated regulations, and may be subject to export or import
+* regulations in other countries.
+*
+* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
+* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR
+* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT
+* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY
+* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT
+* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES
+* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR
+* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT,
+* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK
+* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
+***********************license end**************************************/
+#include "bdk.h"
+
+/* Used for all memory reads/writes related to the test */
+#define READ64(address) __bdk_dram_read64(address)
+#define WRITE64(address, data) __bdk_dram_write64(address, data)
+
+/**
+ * Address bus test. This test writes a single value to each power of two in the
+ * area, looking for false aliases that would be created by address lines being
+ * shorted or tied together.
+ *
+ * @param area
+ * @param max_address
+ * @param bursts
+ *
+ * @return
+ */
+int __bdk_dram_test_mem_address_bus(uint64_t area, uint64_t max_address, int bursts)
+{
+ int failures = 0;
+
+ /* Clear our work area. Checking for aliases later could get false
+ positives if it matched stale data */
+ void *ptr = (area) ? bdk_phys_to_ptr(area) : NULL;
+ bdk_zero_memory(ptr, max_address - area);
+ __bdk_dram_flush_to_mem_range(area, max_address);
+
+ /* Each time we write, we'll write this pattern xored the address it is
+ written too */
+ uint64_t pattern = 0x0fedcba987654321;
+
+ /* Walk through the region incrementing our offset by a power of two. The
+ first few writes will be to the same cache line (offset 0x8, 0x10, 0x20,
+ and 0x40. Offset 0x80 and beyond will be to different cache lines */
+ uint64_t offset = 0x8;
+ while (area + offset < max_address)
+ {
+ uint64_t address = area + offset;
+ /* Write one location with pattern xor address */
+ uint64_t p = pattern ^ address;
+ WRITE64(address, p);
+ __bdk_dram_flush_to_mem(address);
+ offset <<= 1;
+ }
+
+ /* Read all of the area to make sure no other locations were written */
+ uint64_t a = area;
+ offset = 0x8;
+ uint64_t next_write = area + offset;
+ while (a < max_address)
+ {
+ if (a + 256 < max_address)
+ BDK_PREFETCH(a + 256, 0);
+ for (int i=0; i<16; i++)
+ {
+ uint64_t data = READ64(a);
+ uint64_t correct;
+ if (a == next_write)
+ {
+ correct = pattern ^ next_write;
+ offset <<= 1;
+ next_write = area + offset;
+ }
+ else
+ correct = 0;
+ if (bdk_unlikely(data != correct))
+ {
+ failures++;
+ __bdk_dram_report_error(a, data, correct, 0, -1);
+ }
+ a += 8;
+ }
+ }
+
+ return failures;
+}
+
diff --git a/src/vendorcode/cavium/bdk/libbdk-dram/bdk-dram-test-databus.c b/src/vendorcode/cavium/bdk/libbdk-dram/bdk-dram-test-databus.c
new file mode 100644
index 0000000000..c3fa1ffd8d
--- /dev/null
+++ b/src/vendorcode/cavium/bdk/libbdk-dram/bdk-dram-test-databus.c
@@ -0,0 +1,252 @@
+/***********************license start***********************************
+* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights
+* reserved.
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are
+* met:
+*
+* * Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials provided
+* with the distribution.
+*
+* * Neither the name of Cavium Inc. nor the names of
+* its contributors may be used to endorse or promote products
+* derived from this software without specific prior written
+* permission.
+*
+* This Software, including technical data, may be subject to U.S. export
+* control laws, including the U.S. Export Administration Act and its
+* associated regulations, and may be subject to export or import
+* regulations in other countries.
+*
+* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
+* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR
+* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT
+* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY
+* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT
+* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES
+* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR
+* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT,
+* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK
+* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
+***********************license end**************************************/
+#include "bdk.h"
+
+/* Used for all memory reads/writes related to the test */
+#define READ64(address) __bdk_dram_read64(address)
+#define WRITE64(address, data) __bdk_dram_write64(address, data)
+
+/* Build a 64bit mask out of a single hex digit */
+#define REPEAT2(v) ((((uint64_t)v) << 4) | ((uint64_t)v))
+#define REPEAT4(v) ((REPEAT2(v) << 8) | REPEAT2(v))
+#define REPEAT8(v) ((REPEAT4(v) << 16) | REPEAT4(v))
+#define REPEAT16(v) ((REPEAT8(v) << 32) | REPEAT8(v))
+
+/**
+ * Read memory and check that the data bus pattern is present. The pattern is a
+ * sequence if 16 dwords created from the 16 hex digits repeated in each word.
+ *
+ * @param address Physical address to read. This must be cache line aligned.
+ * @param bursts Number of time to repeat the read test to verify stability
+ *
+ * @return Number of errors, zero means success
+ */
+static int read_data_bus_burst(uint64_t address, int bursts)
+{
+ int failures = 0;
+
+ /* Loop over the burst so people using a scope have time to capture
+ traces */
+ for (int burst = 0; burst < bursts; burst++)
+ {
+ /* Invalidate all caches so we must read from DRAM */
+ __bdk_dram_flush_to_mem(address);
+ BDK_DCACHE_INVALIDATE;
+
+ for (uint64_t digit = 0; digit < 16; digit++)
+ {
+ uint64_t a = address + digit * 8;
+ uint64_t data = READ64(a);
+ uint64_t correct = REPEAT16(digit);
+ if (data != correct)
+ {
+ failures++;
+ __bdk_dram_report_error(a, data, correct, burst, -1);
+ }
+ }
+ }
+ return failures;
+}
+
+/**
+ * Write memory with a data bus pattern and check that it can be read correctly.
+ * The pattern is a sequence if 16 dwords created from the 16 hex digits repeated
+ * in each word.
+ *
+ * @param address Physical address to write. This must be cache line aligned. 128 bytes will be
+ * written starting at this address.
+ * @param bursts Number of time to repeat the write+read test to verify stability
+ *
+ * @return Number of errors, zero means success
+ */
+static int write_data_bus_burst(uint64_t address, int bursts)
+{
+ BDK_TRACE(DRAM_TEST, "[0x%016lx:0x%016lx] Writing incrementing digits\n",
+ address, address + 127);
+ /* Loop over the burst so people using a scope have time to capture
+ traces */
+ for (int burst = 0; burst < bursts; burst++)
+ {
+ /* Fill a cache line with an incrementing pattern. Each nibble
+ in the 64bit word increments from 0 to 0xf */
+ for (uint64_t digit = 0; digit < 16; digit++)
+ WRITE64(address + digit * 8, REPEAT16(digit));
+ /* Force the cache line out to memory */
+ __bdk_dram_flush_to_mem(address);
+ }
+ return read_data_bus_burst(address, bursts);
+}
+
+/**
+ * Read back the pattern written by write_data_bus_walk() nad
+ * make sure it was stored properly.
+ *
+ * @param address Physical address to read. This must be cache line aligned.
+ * @param bursts Number of time to repeat the read test to verify stability
+ * @param pattern Pattern basis for writes. See
+ * write_data_bus_walk()
+ *
+ * @return Number of errors, zero means success
+ */
+static int read_data_bus_walk(uint64_t address, int burst, uint64_t pattern)
+{
+ int failures = 0;
+
+ /* Invalidate all caches so we must readfrom DRAM */
+ __bdk_dram_flush_to_mem(address);
+ BDK_DCACHE_INVALIDATE;
+
+ uint64_t correct = pattern;
+ for (uint64_t word = 0; word < 16; word++)
+ {
+ uint64_t a = address + word * 8;
+ uint64_t data = READ64(a);
+ if (data != correct)
+ {
+ failures++;
+ __bdk_dram_report_error(a, data, correct, burst, -1);
+ }
+ uint64_t tmp = correct >> 63; /* Save top bit */
+ correct <<= 1; /* Shift left one bit */
+ correct |= tmp; /* Restore the top bit as bit 0 */
+ }
+
+ return failures;
+}
+
+/**
+ * Write a pattern to a cache line, rotating it one bit for each DWORD. Read back
+ * the pattern and make sure it was stored properly. The input pattern is rotated
+ * left by one bit for each DWORD written.
+ *
+ * @param address Physical address to write. This must be cache line aligned. 128 bytes will be
+ * written starting at this address.
+ * @param bursts Number of time to repeat the write+read test to verify stability
+ * @param pattern Pattern basis
+ *
+ * @return Number of errors, zero means success
+ */
+static void write_data_bus_walk(uint64_t address, int burst, uint64_t pattern)
+{
+ BDK_TRACE(DRAM_TEST, "[0x%016lx:0x%016lx] Writing walking pattern 0x%016lx\n",
+ address, address + 127, pattern);
+
+ uint64_t a = address;
+ uint64_t d = pattern;
+
+ /* Fill a cache line with pattern. Each 64bit work will have the
+ pattern rotated left one bit */
+ for (uint64_t word = 0; word < 16; word++)
+ {
+ WRITE64(a, d);
+ a += 8;
+ uint64_t tmp = d >> 63; /* Save top bit */
+ d <<= 1; /* Shift left one bit */
+ d |= tmp; /* Restore the top bit as bit 0 */
+ }
+ /* Force the cache line out to memory */
+ __bdk_dram_flush_to_mem(address);
+}
+
+/**
+ * The goal of these tests are to toggle every DDR data pin, one at a time or in
+ * related groups, to isolate any short circuits between the data pins or open
+ * circuits where the pin is not connected to the DDR memory. A board which fails
+ * one of these tests has severe problems and will not be able to run any of the
+ * later test patterns.
+ *
+ * @param start_address
+ * Physical address of a cache line to
+ * use for the test. Only this cache line is
+ * written.
+ * @param end_address
+ * Top end of the address range. Currently unused
+ * @param bursts Number of time to repeats writes+reads to insure stability
+ *
+ * @return Number of errors, zero means success
+ */
+int __bdk_dram_test_mem_data_bus(uint64_t start_address, uint64_t end_address, int bursts)
+{
+ int failures = 0;
+
+ /* Incrementing pattern: 0x0 - 0xf in each nibble */
+ failures += write_data_bus_burst(start_address, bursts);
+
+ /* Walking ones. Run with 1, 2, and 3 bits walking */
+ for (int bits = 1; bits <= 3; bits++)
+ {
+ for (int burst = 0; burst < bursts; burst++)
+ {
+ /* Each write_data_bus_walk() call write 16 dword, so step by 16 */
+ for (int i = 0; i < 64; i += 16)
+ {
+ uint64_t pattern = bdk_build_mask(bits) << i;
+ write_data_bus_walk(start_address + i*8, burst, pattern);
+ }
+ /* Each read_data_bus_walk() call write 16 dword, so step by 16 */
+ for (int i = 0; i < 64; i += 16)
+ {
+ uint64_t pattern = bdk_build_mask(bits) << i;
+ failures += read_data_bus_walk(start_address + i*8, burst, pattern);
+ }
+ }
+ }
+
+ /* Walking zeros. Run with 1, 2, and 3 bits walking */
+ for (int bits = 1; bits <= 3; bits++)
+ {
+ for (int burst = 0; burst < bursts; burst++)
+ {
+ /* Each write_data_bus_walk() call write 16 dword, so step by 16 */
+ for (int i = 0; i < 64; i += 16)
+ {
+ uint64_t pattern = ~(bdk_build_mask(bits) << i);
+ write_data_bus_walk(start_address + i*8, burst, pattern);
+ }
+ /* Each read_data_bus_walk() call write 16 dword, so step by 16 */
+ for (int i = 0; i < 64; i += 16)
+ {
+ uint64_t pattern = ~(bdk_build_mask(bits) << i);
+ failures += read_data_bus_walk(start_address + i*8, burst, pattern);
+ }
+ }
+ }
+ return failures;
+}
+
diff --git a/src/vendorcode/cavium/bdk/libbdk-dram/bdk-dram-test-fastscan.c b/src/vendorcode/cavium/bdk/libbdk-dram/bdk-dram-test-fastscan.c
new file mode 100644
index 0000000000..46e205dd80
--- /dev/null
+++ b/src/vendorcode/cavium/bdk/libbdk-dram/bdk-dram-test-fastscan.c
@@ -0,0 +1,103 @@
+/***********************license start***********************************
+* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights
+* reserved.
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are
+* met:
+*
+* * Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials provided
+* with the distribution.
+*
+* * Neither the name of Cavium Inc. nor the names of
+* its contributors may be used to endorse or promote products
+* derived from this software without specific prior written
+* permission.
+*
+* This Software, including technical data, may be subject to U.S. export
+* control laws, including the U.S. Export Administration Act and its
+* associated regulations, and may be subject to export or import
+* regulations in other countries.
+*
+* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
+* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR
+* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT
+* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY
+* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT
+* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES
+* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR
+* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT,
+* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK
+* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
+***********************license end**************************************/
+#include "bdk.h"
+
+/* Used for all memory reads/writes related to the test */
+#define READ64(address) __bdk_dram_read64(address)
+#define WRITE64(address, data) __bdk_dram_write64(address, data)
+
+/**
+ * Fast scan test. This test is meant to find gross errors caused by read/write
+ * level failing on a single rank or dimm. The idea is to scan through all of
+ * memory in large steps. The large steps hit each rank multiple times, but not
+ * every byte. If the whole rank has errors, his should find it quickly. This test
+ * is suitable for an alive test during early boot.
+ *
+ * @param area Starting physical address
+ * @param max_address
+ * Ending physical address, exclusive
+ * @param bursts Burst to run
+ *
+ * @return Number of errors
+ */
+int __bdk_dram_test_fast_scan(uint64_t area, uint64_t max_address, int bursts)
+{
+ int failures = 0;
+ const uint64_t step = 0x10008; /* The 8 is so we walk through cache lines too */
+ const uint64_t pattern1 = 0xaaaaaaaaaaaaaaaa;
+ const uint64_t pattern2 = 0x5555555555555555;
+
+ /* Walk through the region incrementing our offset by STEP */
+ uint64_t a = area;
+ while (a + 16 <= max_address)
+ {
+ WRITE64(a, pattern1);
+ WRITE64(a+8, pattern2);
+ __bdk_dram_flush_to_mem_range(a, a + 16);
+ a += step;
+ }
+
+ /* Read back, checking the writes */
+ a = area;
+ while (a + 16 <= max_address)
+ {
+ /* Prefetch 3 ahead for better performance */
+ uint64_t pre = a + step * 2;
+ if (pre + 16 < max_address)
+ BDK_PREFETCH(pre, 0);
+ /* Check pattern 1 */
+ uint64_t data1 = READ64(a);
+ if (bdk_unlikely(data1 != pattern1))
+ {
+ failures++;
+ __bdk_dram_report_error(a, data1, pattern1, 0, -1);
+ }
+ /* Check pattern 2 */
+ uint64_t data2 = READ64(a+8);
+ if (bdk_unlikely(data2 != pattern2))
+ {
+ failures++;
+ __bdk_dram_report_error(a+8, data2, pattern2, 0, -1);
+ }
+ a += step;
+ }
+
+ return failures;
+}
+
diff --git a/src/vendorcode/cavium/bdk/libbdk-dram/bdk-dram-test-patfil.c b/src/vendorcode/cavium/bdk/libbdk-dram/bdk-dram-test-patfil.c
new file mode 100644
index 0000000000..e6c4b57721
--- /dev/null
+++ b/src/vendorcode/cavium/bdk/libbdk-dram/bdk-dram-test-patfil.c
@@ -0,0 +1,829 @@
+/***********************license start***********************************
+* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights
+* reserved.
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are
+* met:
+*
+* * Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials provided
+* with the distribution.
+*
+* * Neither the name of Cavium Inc. nor the names of
+* its contributors may be used to endorse or promote products
+* derived from this software without specific prior written
+* permission.
+*
+* This Software, including technical data, may be subject to U.S. export
+* control laws, including the U.S. Export Administration Act and its
+* associated regulations, and may be subject to export or import
+* regulations in other countries.
+*
+* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
+* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR
+* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT
+* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY
+* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT
+* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES
+* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR
+* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT,
+* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK
+* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
+***********************license end**************************************/
+#include "bdk.h"
+
+// choose prediction-based algorithms for mem_xor and mem_rows tests
+#define USE_PREDICTION_CODE_VERSIONS 1 // change to 0 to go back to the original versions
+
+/* Used for all memory reads/writes related to the test */
+#define READ64(address) __bdk_dram_read64(address)
+#define WRITE64(address, data) __bdk_dram_write64(address, data)
+
+/**
+ * Fill an memory area with the address of each 64-bit word in the area.
+ * Reread to confirm the pattern.
+ *
+ * @param area Start of the physical memory area
+ * @param max_address
+ * End of the physical memory area (exclusive)
+ * @param bursts Number of time to repeat the test over the entire area
+ *
+ * @return Number of errors, zero on success
+ */
+int __bdk_dram_test_mem_self_addr(uint64_t area, uint64_t max_address, int bursts)
+{
+ int failures = 0;
+
+ for (int burst = 0; burst < bursts; burst++)
+ {
+ /* Write the pattern to memory. Each location receives the address
+ * of the location.
+ */
+ for (uint64_t address = area; address < max_address; address+=8)
+ WRITE64(address, address);
+ __bdk_dram_flush_to_mem_range(area, max_address);
+ BDK_DCACHE_INVALIDATE;
+
+ /* Read by ascending address the written memory and confirm that it
+ * has the expected data pattern.
+ */
+ for (uint64_t address = area; address < max_address; )
+ {
+ if (address + 256 < max_address)
+ BDK_PREFETCH(address + 256, 0);
+ for (int i=0; i<16; i++)
+ {
+ uint64_t data = READ64(address);
+ if (bdk_unlikely(data != address))
+ failures += __bdk_dram_retry_failure(burst, address, data, address);
+ address += 8;
+ }
+ }
+ __bdk_dram_flush_to_mem_range(area, max_address);
+ BDK_DCACHE_INVALIDATE;
+
+ /* Read by descending address the written memory and confirm that it
+ * has the expected data pattern.
+ */
+ uint64_t end = max_address - sizeof(uint64_t);
+ for (uint64_t address = end; address >= area; )
+ {
+ if (address - 256 >= area)
+ BDK_PREFETCH(address - 256, 0);
+ for (int i=0; i<16; i++)
+ {
+ uint64_t data = READ64(address);
+ if (bdk_unlikely(data != address))
+ failures += __bdk_dram_retry_failure(burst, address, data, address);
+ address -= 8;
+ }
+ }
+ __bdk_dram_flush_to_mem_range(area, max_address);
+ BDK_DCACHE_INVALIDATE;
+
+ /* Read from random addresses within the memory area.
+ */
+ uint64_t probes = (max_address - area) / 128;
+ uint64_t address_ahead1 = area;
+ uint64_t address_ahead2 = area;
+ for (uint64_t i = 0; i < probes; i++)
+ {
+ /* Create a pipeline of prefetches:
+ address = address read this loop
+ address_ahead1 = prefetch started last loop
+ address_ahead2 = prefetch started this loop */
+ uint64_t address = address_ahead1;
+ address_ahead1 = address_ahead2;
+ address_ahead2 = bdk_rng_get_random64() % (max_address - area);
+ address_ahead2 += area;
+ address_ahead2 &= -8;
+ BDK_PREFETCH(address_ahead2, 0);
+
+ uint64_t data = READ64(address);
+ if (bdk_unlikely(data != address))
+ failures += __bdk_dram_retry_failure(burst, address, data, address);
+ }
+ }
+ return failures;
+}
+
+/**
+ * Write "pattern" and its compliment to memory and verify it was written
+ * properly. Memory will be filled with DWORDs pattern, ~pattern, pattern,
+ * ~pattern, ...
+ *
+ * @param area Start physical address of memory
+ * @param max_address
+ * End of physical memory region
+ * @param pattern Pattern to write
+ * @param passes Number of time to repeat the test
+ *
+ * @return Number of errors, zero on success
+ */
+static uint32_t test_mem_pattern(uint64_t area, uint64_t max_address, uint64_t pattern,
+ int passes)
+{
+ int failures = 0;
+
+ for (int pass = 0; pass < passes; pass++)
+ {
+ if (pass & 0x1)
+ pattern = ~pattern;
+
+ for (uint64_t address = area; address < max_address; address += 8)
+ WRITE64(address, pattern);
+ __bdk_dram_flush_to_mem_range(area, max_address);
+ BDK_DCACHE_INVALIDATE;
+
+ /* Read the written memory and confirm that it has the expected
+ * data pattern.
+ */
+ uint64_t address = area;
+ while (address < max_address)
+ {
+ if (address + 256 < max_address)
+ BDK_PREFETCH(address + 256, 0);
+ for (int i=0; i<16; i++)
+ {
+ uint64_t data = READ64(address);
+ if (bdk_unlikely(data != pattern))
+ failures += __bdk_dram_retry_failure(pass, address, data, pattern);
+ address += 8;
+ }
+ }
+ }
+ return failures;
+}
+
+/**
+ * Walking zero written to memory, left shift
+ *
+ * @param area Start of the physical memory area
+ * @param max_address
+ * End of the physical memory area
+ * @param bursts Number of time to repeat the test over the entire area
+ *
+ * @return Number of errors, zero on success
+ */
+int __bdk_dram_test_mem_leftwalk0(uint64_t area, uint64_t max_address, int bursts)
+{
+ int failures = 0;
+ for (int burst = 0; burst < bursts; burst++)
+ {
+ for (uint64_t pattern = 1; pattern != 0; pattern = pattern << 1)
+ failures += test_mem_pattern(area, max_address, ~pattern, 1);
+ }
+ return failures;
+}
+
+/**
+ * Walking one written to memory, left shift
+ *
+ * @param area Start of the physical memory area
+ * @param max_address
+ * End of the physical memory area
+ * @param bursts Number of time to repeat the test over the entire area
+ *
+ * @return Number of errors, zero on success
+ */
+int __bdk_dram_test_mem_leftwalk1(uint64_t area, uint64_t max_address, int bursts)
+{
+ int failures = 0;
+ for (int burst = 0; burst < bursts; burst++)
+ {
+ for (uint64_t pattern = 1; pattern != 0; pattern = pattern << 1)
+ failures += test_mem_pattern(area, max_address, pattern, 1);
+ }
+ return failures;
+}
+
+/**
+ * Walking zero written to memory, right shift
+ *
+ * @param area Start of the physical memory area
+ * @param max_address
+ * End of the physical memory area
+ * @param bursts Number of time to repeat the test over the entire area
+ *
+ * @return Number of errors, zero on success
+ */
+int __bdk_dram_test_mem_rightwalk0(uint64_t area, uint64_t max_address, int bursts)
+{
+ int failures = 0;
+ for (int burst = 0; burst < bursts; burst++)
+ {
+ for (uint64_t pattern = 1ull << 63; pattern != 0; pattern = pattern >> 1)
+ failures += test_mem_pattern(area, max_address, ~pattern, 1);
+ }
+ return failures;
+}
+
+/**
+ * Walking one written to memory, right shift
+ *
+ * @param area Start of the physical memory area
+ * @param max_address
+ * End of the physical memory area
+ * @param bursts Number of time to repeat the test over the entire area
+ *
+ * @return Number of errors, zero on success
+ */
+int __bdk_dram_test_mem_rightwalk1(uint64_t area, uint64_t max_address, int bursts)
+{
+ int failures = 0;
+ for (int burst = 0; burst < bursts; burst++)
+ {
+ for (uint64_t pattern = 1ull<<63; pattern != 0; pattern = pattern >> 1)
+ failures += test_mem_pattern(area, max_address, pattern, 1);
+ }
+ return failures;
+}
+
+/**
+ * Apply the March C- testing algorithm to the given memory area.
+ * 1) Write "pattern" to memory.
+ * 2) Verify "pattern" and write "~pattern".
+ * 3) Verify "~pattern" and write "pattern".
+ * 4) Verify "pattern" and write "~pattern".
+ * 5) Verify "~pattern" and write "pattern".
+ * 6) Verify "pattern".
+ *
+ * @param area Start of the physical memory area
+ * @param max_address
+ * End of the physical memory area
+ * @param pattern
+ *
+ * @return Number of errors, zero on success
+ */
+static int test_mem_march_c(uint64_t area, uint64_t max_address, uint64_t pattern)
+{
+ int failures = 0;
+
+ /* Pass 1 ascending addresses, fill memory with pattern. */
+ BDK_TRACE(DRAM_TEST, " [0x%016lx:0x%016lx] Phase1, address incrementing, pattern 0x%016lx\n", area, max_address-1, pattern);
+ for (uint64_t address = area; address < max_address; address += 8)
+ WRITE64(address, pattern);
+
+ __bdk_dram_flush_to_mem_range(area, max_address);
+ BDK_DCACHE_INVALIDATE;
+
+ /* Pass 2: ascending addresses, read pattern and write ~pattern */
+ BDK_TRACE(DRAM_TEST, " [0x%016lx:0x%016lx] Phase2, address incrementing, pattern 0x%016lx\n", area, max_address-1, ~pattern);
+ for (uint64_t address = area; address < max_address; address += 8)
+ {
+ uint64_t data = READ64(address);
+ if (bdk_unlikely(data != pattern))
+ failures += __bdk_dram_retry_failure(1, address, data, pattern);
+ WRITE64(address, ~pattern);
+ }
+
+ __bdk_dram_flush_to_mem_range(area, max_address);
+ BDK_DCACHE_INVALIDATE;
+
+ /* Pass 3: ascending addresses, read ~pattern and write pattern. */
+ BDK_TRACE(DRAM_TEST, " [0x%016lx:0x%016lx] Phase3, address incrementing, pattern 0x%016lx\n", area, max_address-1, pattern);
+ for (uint64_t address = area; address < max_address; address += 8)
+ {
+ uint64_t data = READ64(address);
+ if (bdk_unlikely(data != ~pattern))
+ failures += __bdk_dram_retry_failure(1, address, data, ~pattern);
+ WRITE64(address, pattern);
+ }
+
+ __bdk_dram_flush_to_mem_range(area, max_address);
+ BDK_DCACHE_INVALIDATE;
+
+ /* Pass 4: descending addresses, read pattern and write ~pattern. */
+ BDK_TRACE(DRAM_TEST, " [0x%016lx:0x%016lx] Phase4, address decrementing, pattern 0x%016lx\n", area, max_address-1, ~pattern);
+ uint64_t end = max_address - sizeof(uint64_t);
+ for (uint64_t address = end; address >= area; address -= 8)
+ {
+ uint64_t data = READ64(address);
+ if (bdk_unlikely(data != pattern))
+ failures += __bdk_dram_retry_failure(1, address, data, pattern);
+ WRITE64(address, ~pattern);
+ }
+
+ __bdk_dram_flush_to_mem_range(area, max_address);
+ BDK_DCACHE_INVALIDATE;
+
+ /* Pass 5: descending addresses, read ~pattern and write pattern. */
+ BDK_TRACE(DRAM_TEST, " [0x%016lx:0x%016lx] Phase5, address decrementing, pattern 0x%016lx\n", area, max_address-1, pattern);
+ for (uint64_t address = end; address >= area; address -= 8)
+ {
+ uint64_t data = READ64(address);
+ if (bdk_unlikely(data != ~pattern))
+ failures += __bdk_dram_retry_failure(1, address, data, ~pattern);
+ WRITE64(address, pattern);
+ }
+
+ __bdk_dram_flush_to_mem_range(area, max_address);
+ BDK_DCACHE_INVALIDATE;
+
+ /* Pass 6: ascending addresses, read pattern. */
+ BDK_TRACE(DRAM_TEST, " [0x%016lx:0x%016lx] Phase6, address incrementing\n", area, max_address-1);
+ for (uint64_t address = area; address < max_address; address += 8)
+ {
+ uint64_t data = READ64(address);
+ if (bdk_unlikely(data != pattern))
+ failures += __bdk_dram_retry_failure(1, address, data, pattern);
+ }
+
+ return failures;
+}
+
+/**
+ * Use test_mem_march_c() with a all ones pattern
+ *
+ * @param area Start of the physical memory area
+ * @param max_address
+ * End of the physical memory area
+ * @param bursts Number of time to repeat the test over the entire area
+ *
+ * @return Number of errors, zero on success
+ */
+int __bdk_dram_test_mem_solid(uint64_t area, uint64_t max_address, int bursts)
+{
+ int failures = 0;
+ for (int burst = 0; burst < bursts; burst++)
+ failures += test_mem_march_c(area, max_address, -1);
+ return failures;
+}
+
+/**
+ * Use test_mem_march_c() with a 0x55 pattern
+ *
+ * @param area Start of the physical memory area
+ * @param max_address
+ * End of the physical memory area
+ * @param bursts Number of time to repeat the test over the entire area
+ *
+ * @return Number of errors, zero on success
+ */
+int __bdk_dram_test_mem_checkerboard(uint64_t area, uint64_t max_address, int bursts)
+{
+ int failures = 0;
+ for (int burst = 0; burst < bursts; burst++)
+ failures += test_mem_march_c(area, max_address, 0x5555555555555555L);
+ return failures;
+}
+
+/**
+ * Write a pseudo random pattern to memory and verify it
+ *
+ * @param area Start of the physical memory area
+ * @param max_address
+ * End of the physical memory area
+ * @param bursts Number of time to repeat the test over the entire area
+ *
+ * @return Number of errors, zero on success
+ */
+int __bdk_dram_test_mem_random(uint64_t area, uint64_t max_address, int bursts)
+{
+ /* This constant is used to increment the pattern after every DWORD. This
+ makes only the first DWORD truly random, but saves us processing
+ power generating the random values */
+ const uint64_t INC = 0x1010101010101010ULL;
+
+ int failures = 0;
+ for (int burst = 0; burst < bursts; burst++)
+ {
+ const uint64_t init_pattern = bdk_rng_get_random64();
+ uint64_t pattern = init_pattern;
+
+ /* Write the pattern to memory. Each location receives the address
+ * of the location. A second write pass is needed to force all of
+ * the cached memory out to the DDR.
+ */
+ for (uint64_t address = area; address < max_address; address += 8)
+ {
+ WRITE64(address, pattern);
+ pattern += INC;
+ }
+
+ __bdk_dram_flush_to_mem_range(area, max_address);
+ BDK_DCACHE_INVALIDATE;
+
+ /* Read the written memory and confirm that it has the expected
+ * data pattern.
+ */
+ pattern = init_pattern;
+ for (uint64_t address = area; address < max_address; address += 8)
+ {
+ uint64_t data = READ64(address);
+ if (bdk_unlikely(data != pattern))
+ failures += __bdk_dram_retry_failure(burst, address, data, pattern);
+ pattern += INC;
+ }
+ }
+ return failures;
+}
+
+#if !USE_PREDICTION_CODE_VERSIONS
+/**
+ * test_mem_xor
+ *
+ * @param area Start of the physical memory area
+ * @param max_address
+ * End of the physical memory area
+ * @param bursts Number of time to repeat the test over the entire area
+ *
+ * @return Number of errors, zero on success
+ */
+int __bdk_dram_test_mem_xor(uint64_t area, uint64_t max_address, int bursts)
+{
+ int failures = 0;
+
+ uint64_t extent = max_address - area;
+ uint64_t count = (extent / sizeof(uint64_t)) / 2;
+
+ /* Fill both halves of the memory area with identical randomized data.
+ */
+ uint64_t address1 = area;
+ uint64_t address2 = area + count * sizeof(uint64_t);
+
+ uint64_t pattern = bdk_rng_get_random64();
+
+ for (uint64_t j = 0; j < count; j++)
+ {
+ uint64_t p = pattern * address1;
+ WRITE64(address1, p);
+ WRITE64(address2, p);
+ address1 += 8;
+ address2 += 8;
+ }
+ __bdk_dram_flush_to_mem_range(area, max_address);
+ BDK_DCACHE_INVALIDATE;
+
+ /* Make a series of passes over the memory areas. */
+ for (int burst = 0; burst < bursts; burst++)
+ {
+ /* XOR the data with a random value, applying the change to both
+ * memory areas.
+ */
+ address1 = area;
+ address2 = area + count * sizeof(uint64_t);
+
+ pattern = bdk_rng_get_random64();
+
+ for (uint64_t j = 0; j < count; j++)
+ {
+ if ((address1 & BDK_CACHE_LINE_MASK) == 0)
+ BDK_PREFETCH(address1, BDK_CACHE_LINE_SIZE);
+ if ((address2 & BDK_CACHE_LINE_MASK) == 0)
+ BDK_PREFETCH(address2, BDK_CACHE_LINE_SIZE);
+ WRITE64(address1, READ64(address1) ^ pattern);
+ WRITE64(address2, READ64(address2) ^ pattern);
+ address1 += 8;
+ address2 += 8;
+ }
+
+ __bdk_dram_flush_to_mem_range(area, max_address);
+ BDK_DCACHE_INVALIDATE;
+
+ /* Look for differences in the areas. If there is a mismatch, reset
+ * both memory locations with the same pattern. Failing to do so
+ * means that on all subsequent passes the pair of locations remain
+ * out of sync giving spurious errors.
+ */
+ address1 = area;
+ address2 = area + count * sizeof(uint64_t);
+ for (uint64_t j = 0; j < count; j++)
+ {
+ if ((address1 & BDK_CACHE_LINE_MASK) == 0)
+ BDK_PREFETCH(address1, BDK_CACHE_LINE_SIZE);
+ if ((address2 & BDK_CACHE_LINE_MASK) == 0)
+ BDK_PREFETCH(address2, BDK_CACHE_LINE_SIZE);
+ uint64_t d1 = READ64(address1);
+ uint64_t d2 = READ64(address2);
+ if (bdk_unlikely(d1 != d2))
+ {
+ failures += __bdk_dram_retry_failure2(burst, address1, d1, address2, d2);
+
+ // Synchronize the two areas, adjusting for the error.
+ WRITE64(address1, d2);
+ WRITE64(address2, d2);
+ }
+ address1 += 8;
+ address2 += 8;
+ }
+ }
+ return failures;
+}
+
+/**
+ * test_mem_rows
+ *
+ * Write a pattern of alternating 64-bit words of all one bits and then all 0
+ * bits. This pattern generates the maximum amount of simultaneous switching
+ * activity on the memory channels. Each pass flips the pattern with words
+ * going from all ones to all zeros and vice versa.
+ *
+ * @param area Start of the physical memory area
+ * @param max_address
+ * End of the physical memory area
+ * @param bursts Number of times to repeat the test over the entire area
+ *
+ * @return Number of errors, zero on success
+ */
+int __bdk_dram_test_mem_rows(uint64_t area, uint64_t max_address, int bursts)
+{
+ int failures = 0;
+ uint64_t pattern = 0x0;
+ uint64_t extent = (max_address - area);
+ uint64_t count = (extent / 2) / sizeof(uint64_t); // in terms of 64bit words
+
+ /* Fill both halves of the memory area with identical data pattern. Odd
+ * address 64-bit words get the pattern, while even address words get the
+ * inverted pattern.
+ */
+ uint64_t address1 = area;
+ uint64_t address2 = area + count * sizeof(uint64_t);
+
+ for (uint64_t j = 0; j < (count / 2); j++)
+ {
+ WRITE64(address1, pattern);
+ WRITE64(address2, pattern);
+ address1 += 8;
+ address2 += 8;
+ WRITE64(address1, ~pattern);
+ WRITE64(address2, ~pattern);
+ address1 += 8;
+ address2 += 8;
+ }
+ __bdk_dram_flush_to_mem_range(area, max_address);
+ BDK_DCACHE_INVALIDATE;
+
+ /* Make a series of passes over the memory areas. */
+ for (int burst = 0; burst < bursts; burst++)
+ {
+ /* Invert the data, applying the change to both memory areas. Thus on
+ * alternate passes, the data flips from 0 to 1 and vice versa.
+ */
+ address1 = area;
+ address2 = area + count * sizeof(uint64_t);
+ for (uint64_t j = 0; j < count; j++)
+ {
+ WRITE64(address1, ~READ64(address1));
+ WRITE64(address2, ~READ64(address2));
+ address1 += 8;
+ address2 += 8;
+ }
+ __bdk_dram_flush_to_mem_range(area, max_address);
+ BDK_DCACHE_INVALIDATE;
+
+ /* Look for differences in the areas. If there is a mismatch, reset
+ * both memory locations with the same pattern. Failing to do so
+ * means that on all subsequent passes the pair of locations remain
+ * out of sync giving spurious errors.
+ */
+ address1 = area;
+ address2 = area + count * sizeof(uint64_t);
+ for (uint64_t j = 0; j < count; j++)
+ {
+ uint64_t d1 = READ64(address1);
+ uint64_t d2 = READ64(address2);
+ if (bdk_unlikely(d1 != d2))
+ {
+ failures += __bdk_dram_retry_failure2(burst, address1, d1, address2, d2);
+
+ // Synchronize the two areas, adjusting for the error.
+ WRITE64(address1, d2);
+ WRITE64(address2, d2);
+ }
+ address1 += 8;
+ address2 += 8;
+ }
+ }
+ return failures;
+}
+#endif /* !USE_PREDICTION_CODE_VERSIONS */
+
+#if USE_PREDICTION_CODE_VERSIONS
+//////////////////////////// this is the new code...
+
+int __bdk_dram_test_mem_xor(uint64_t area, uint64_t max_address, int bursts)
+{
+ int failures = 0;
+ int burst;
+
+ uint64_t extent = max_address - area;
+ uint64_t count = (extent / sizeof(uint64_t)) / 2;
+ uint64_t offset = count * sizeof(uint64_t);
+ uint64_t area2 = area + offset;
+
+ /* Fill both halves of the memory area with identical randomized data.
+ */
+ uint64_t address1 = area;
+
+ uint64_t pattern1 = bdk_rng_get_random64();
+ uint64_t pattern2 = 0;
+ uint64_t this_pattern;
+
+ uint64_t p;
+ uint64_t d1, d2;
+
+ // move the multiplies outside the loop
+ uint64_t pbase = address1 * pattern1;
+ uint64_t pincr = 8 * pattern1;
+ uint64_t ppred;
+
+ p = pbase;
+ while (address1 < area2)
+ {
+ WRITE64(address1 , p);
+ WRITE64(address1 + offset, p);
+ address1 += 8;
+ p += pincr;
+ }
+ __bdk_dram_flush_to_mem_range(area, max_address);
+ BDK_DCACHE_INVALIDATE;
+
+ /* Make a series of passes over the memory areas. */
+ for (burst = 0; burst < bursts; burst++)
+ {
+ /* XOR the data with a random value, applying the change to both
+ * memory areas.
+ */
+ address1 = area;
+
+ this_pattern = bdk_rng_get_random64();
+ pattern2 ^= this_pattern;
+
+ while (address1 < area2)
+ {
+#if 1
+ if ((address1 & BDK_CACHE_LINE_MASK) == 0)
+ BDK_PREFETCH(address1, BDK_CACHE_LINE_SIZE);
+ if (((address1 + offset) & BDK_CACHE_LINE_MASK) == 0)
+ BDK_PREFETCH(address1 + offset, BDK_CACHE_LINE_SIZE);
+#endif
+ WRITE64(address1 , READ64(address1 ) ^ this_pattern);
+ WRITE64(address1 + offset, READ64(address1 + offset) ^ this_pattern);
+ address1 += 8;
+ }
+
+ __bdk_dram_flush_to_mem_range(area, max_address);
+ BDK_DCACHE_INVALIDATE;
+
+ /* Look for differences from the expected pattern in both areas.
+ * If there is a mismatch, reset the appropriate memory location
+ * with the correct pattern. Failing to do so
+ * means that on all subsequent passes the erroring locations
+ * will be out of sync, giving spurious errors.
+ */
+ address1 = area;
+ ppred = pbase;
+
+ while (address1 < area2)
+ {
+#if 1
+ if ((address1 & BDK_CACHE_LINE_MASK) == 0)
+ BDK_PREFETCH(address1, BDK_CACHE_LINE_SIZE);
+ if (((address1 + offset) & BDK_CACHE_LINE_MASK) == 0)
+ BDK_PREFETCH(address1 + offset, BDK_CACHE_LINE_SIZE);
+#endif
+ d1 = READ64(address1 );
+ d2 = READ64(address1 + offset);
+
+ p = ppred ^ pattern2;
+
+ if (bdk_unlikely(d1 != p)) {
+ failures += __bdk_dram_retry_failure(burst, address1, d1, p);
+ // Synchronize the area, adjusting for the error.
+ //WRITE64(address1, p); // retries should do this
+ }
+ if (bdk_unlikely(d2 != p)) {
+ failures += __bdk_dram_retry_failure(burst, address1 + offset, d2, p);
+ // Synchronize the area, adjusting for the error.
+ //WRITE64(address1 + offset, p); // retries should do this
+ }
+
+ address1 += 8;
+ ppred += pincr;
+
+ } /* while (address1 < area2) */
+ } /* for (int burst = 0; burst < bursts; burst++) */
+ return failures;
+}
+
+//////////////// this is the new code...
+
+int __bdk_dram_test_mem_rows(uint64_t area, uint64_t max_address, int bursts)
+{
+ int failures = 0;
+
+ uint64_t pattern1 = 0x0;
+ uint64_t extent = (max_address - area);
+ uint64_t count = (extent / 2) / sizeof(uint64_t); // in terms of 64bit words
+ uint64_t offset = count * sizeof(uint64_t);
+ uint64_t area2 = area + offset;
+ uint64_t pattern2;
+ uint64_t d1, d2;
+ int burst;
+
+ /* Fill both halves of the memory area with identical data pattern. Odd
+ * address 64-bit words get the pattern, while even address words get the
+ * inverted pattern.
+ */
+ uint64_t address1 = area;
+
+ pattern2 = pattern1; // start with original pattern
+
+ while (address1 < area2)
+ {
+ WRITE64(address1 , pattern2);
+ WRITE64(address1 + offset, pattern2);
+ address1 += 8;
+ pattern2 = ~pattern2; // flip for next slots
+ }
+
+ __bdk_dram_flush_to_mem_range(area, max_address);
+ BDK_DCACHE_INVALIDATE;
+
+ /* Make a series of passes over the memory areas. */
+ for (burst = 0; burst < bursts; burst++)
+ {
+ /* Invert the data, applying the change to both memory areas. Thus on
+ * alternate passes, the data flips from 0 to 1 and vice versa.
+ */
+ address1 = area;
+
+ while (address1 < area2)
+ {
+ if ((address1 & BDK_CACHE_LINE_MASK) == 0)
+ BDK_PREFETCH(address1 , BDK_CACHE_LINE_SIZE);
+ if (((address1 + offset) & BDK_CACHE_LINE_MASK) == 0)
+ BDK_PREFETCH(address1 + offset, BDK_CACHE_LINE_SIZE);
+
+ WRITE64(address1 , ~READ64(address1 ));
+ WRITE64(address1 + offset, ~READ64(address1 + offset));
+ address1 += 8;
+ }
+
+ __bdk_dram_flush_to_mem_range(area, max_address);
+ BDK_DCACHE_INVALIDATE;
+
+ /* Look for differences in the areas. If there is a mismatch, reset
+ * both memory locations with the same pattern. Failing to do so
+ * means that on all subsequent passes the pair of locations remain
+ * out of sync giving spurious errors.
+ */
+ address1 = area;
+ pattern1 = ~pattern1; // flip the starting pattern to match above loop
+ pattern2 = pattern1; // slots have been flipped by the above loop
+
+ while (address1 < area2)
+ {
+ if ((address1 & BDK_CACHE_LINE_MASK) == 0)
+ BDK_PREFETCH(address1 , BDK_CACHE_LINE_SIZE);
+ if (((address1 + offset) & BDK_CACHE_LINE_MASK) == 0)
+ BDK_PREFETCH(address1 + offset, BDK_CACHE_LINE_SIZE);
+
+ d1 = READ64(address1 );
+ d2 = READ64(address1 + offset);
+
+ if (bdk_unlikely(d1 != pattern2)) {
+ failures += __bdk_dram_retry_failure(burst, address1, d1, pattern2);
+ // Synchronize the area, adjusting for the error.
+ //WRITE64(address1, pattern2); // retries should do this
+ }
+ if (bdk_unlikely(d2 != pattern2)) {
+ failures += __bdk_dram_retry_failure(burst, address1 + offset, d2, pattern2);
+ // Synchronize the two areas, adjusting for the error.
+ //WRITE64(address1 + offset, pattern2); // retries should do this
+ }
+
+ address1 += 8;
+ pattern2 = ~pattern2; // flip for next pair of slots
+ }
+ }
+ return failures;
+}
+#endif /* USE_PREDICTION_CODE_VERSIONS */
diff --git a/src/vendorcode/cavium/bdk/libbdk-dram/bdk-dram-test.c b/src/vendorcode/cavium/bdk/libbdk-dram/bdk-dram-test.c
new file mode 100644
index 0000000000..53137502fc
--- /dev/null
+++ b/src/vendorcode/cavium/bdk/libbdk-dram/bdk-dram-test.c
@@ -0,0 +1,860 @@
+/***********************license start***********************************
+* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights
+* reserved.
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are
+* met:
+*
+* * Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials provided
+* with the distribution.
+*
+* * Neither the name of Cavium Inc. nor the names of
+* its contributors may be used to endorse or promote products
+* derived from this software without specific prior written
+* permission.
+*
+* This Software, including technical data, may be subject to U.S. export
+* control laws, including the U.S. Export Administration Act and its
+* associated regulations, and may be subject to export or import
+* regulations in other countries.
+*
+* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
+* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR
+* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT
+* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY
+* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT
+* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES
+* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR
+* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT,
+* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK
+* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
+***********************license end**************************************/
+#include <bdk.h>
+#include "libbdk-arch/bdk-csrs-gti.h"
+#include "libbdk-arch/bdk-csrs-ocx.h"
+
+/* This code is an optional part of the BDK. It is only linked in
+ if BDK_REQUIRE() needs it */
+BDK_REQUIRE_DEFINE(DRAM_TEST);
+
+#define MAX_ERRORS_TO_REPORT 50
+#define RETRY_LIMIT 1000
+
+typedef struct
+{
+ const char * name; /* Friendly name for the test */
+ __bdk_dram_test_t test_func; /* Function to call */
+ int bursts; /* Bursts parameter to pass to the test */
+ int max_cores; /* Maximum number of cores the test should be run on in parallel. Zero means all */
+} dram_test_info_t;
+
+static const dram_test_info_t TEST_INFO[] = {
+ /* Name, Test function, Bursts, Max Cores */
+ { "Data Bus", __bdk_dram_test_mem_data_bus, 8, 1},
+ { "Address Bus", __bdk_dram_test_mem_address_bus, 0, 1},
+ { "Marching Rows", __bdk_dram_test_mem_rows, 16, 0},
+ { "Random Data", __bdk_dram_test_mem_random, 32, 0},
+ { "Random XOR (32 Burst)", __bdk_dram_test_mem_xor, 32, 0},
+ { "Self Address", __bdk_dram_test_mem_self_addr, 1, 0},
+ { "March C- Solid Bits", __bdk_dram_test_mem_solid, 1, 0},
+ { "March C- Checkerboard", __bdk_dram_test_mem_checkerboard, 1, 0},
+ { "Walking Ones Left", __bdk_dram_test_mem_leftwalk1, 1, 0},
+ { "Walking Ones Right", __bdk_dram_test_mem_rightwalk1, 1, 0},
+ { "Walking Zeros Left", __bdk_dram_test_mem_leftwalk0, 1, 0},
+ { "Walking Zeros Right", __bdk_dram_test_mem_rightwalk0, 1, 0},
+ { "Random XOR (224 Burst)", __bdk_dram_test_mem_xor, 224, 0},
+ { "Fast Scan", __bdk_dram_test_fast_scan, 0, 0},
+ { NULL, NULL, 0, 0}
+};
+
+/* These variables count the number of ECC errors. They should only be accessed atomically */
+int64_t __bdk_dram_ecc_single_bit_errors[BDK_MAX_MEM_CHANS];
+int64_t __bdk_dram_ecc_double_bit_errors[BDK_MAX_MEM_CHANS];
+
+static int64_t dram_test_thread_done;
+static int64_t dram_test_thread_errors;
+static uint64_t dram_test_thread_start;
+static uint64_t dram_test_thread_end;
+static uint64_t dram_test_thread_size;
+
+/**
+ * Force the memory at the pointer location to be written to memory and evicted
+ * from L2. L1 will be unaffected.
+ *
+ * @param address Physical memory location
+ */
+void __bdk_dram_flush_to_mem(uint64_t address)
+{
+ BDK_MB;
+ /* The DRAM code doesn't use the normal bdk_phys_to_ptr() because of the
+ NULL check in it. This greatly slows down the memory tests */
+ char *ptr = (void*)address;
+ BDK_CACHE_WBI_L2(ptr);
+}
+
+/**
+ * Force a memory region to be written to DRAM and evicted from L2
+ *
+ * @param area Start of the region
+ * @param max_address
+ * End of the region (exclusive)
+ */
+void __bdk_dram_flush_to_mem_range(uint64_t area, uint64_t max_address)
+{
+ /* The DRAM code doesn't use the normal bdk_phys_to_ptr() because of the
+ NULL check in it. This greatly slows down the memory tests */
+ char *ptr = (void*)area;
+ char *end = (void*)max_address;
+ BDK_MB;
+ while (ptr < end)
+ {
+ BDK_CACHE_WBI_L2(ptr);
+ ptr += 128;
+ }
+}
+
+/**
+ * Convert a test enumeration into a string
+ *
+ * @param test Test to convert
+ *
+ * @return String for display
+ */
+const char *bdk_dram_get_test_name(int test)
+{
+ if (test < (int)(sizeof(TEST_INFO) / sizeof(TEST_INFO[0])))
+ return TEST_INFO[test].name;
+ else
+ return NULL;
+}
+
+static bdk_dram_test_flags_t dram_test_flags; // FIXME: Don't use global
+/**
+ * This function is run as a thread to perform memory tests over multiple cores.
+ * Each thread gets a section of memory to work on, which is controlled by global
+ * variables at the beginning of this file.
+ *
+ * @param arg Number of the region we should check
+ * @param arg1 Pointer the the test_info structure
+ */
+static void dram_test_thread(int arg, void *arg1)
+{
+ const dram_test_info_t *test_info = arg1;
+ const int bursts = test_info->bursts;
+ const int range_number = arg;
+
+ /* Figure out our work memory range.
+ *
+ * Note start_address and end_address just provide the physical offset
+ * portion of the address and do not have the node bits set. This is
+ * to simplify address checks and calculations. Later, when about to run
+ * the memory test, the routines adds in the node bits to form the final
+ * addresses.
+ */
+ uint64_t start_address = dram_test_thread_start + dram_test_thread_size * range_number;
+ uint64_t end_address = start_address + dram_test_thread_size;
+ if (end_address > dram_test_thread_end)
+ end_address = dram_test_thread_end;
+
+ bdk_node_t test_node = bdk_numa_local();
+ if (dram_test_flags & BDK_DRAM_TEST_USE_CCPI)
+ test_node ^= 1;
+ /* Insert the node part of the address */
+ start_address = bdk_numa_get_address(test_node, start_address);
+ end_address = bdk_numa_get_address(test_node, end_address);
+ /* Test the region */
+ BDK_TRACE(DRAM_TEST, " Node %d, core %d, Testing [0x%011lx:0x%011lx]\n",
+ bdk_numa_local(), bdk_get_core_num() & 127, start_address, end_address - 1);
+ test_info->test_func(start_address, end_address, bursts);
+
+ /* Report that we're done */
+ BDK_TRACE(DRAM_TEST, "Thread %d on node %d done with memory test\n", range_number, bdk_numa_local());
+ bdk_atomic_add64_nosync(&dram_test_thread_done, 1);
+}
+
+/**
+ * Run the memory test.
+ *
+ * @param test_info
+ * @param start_address
+ * Physical address to start at
+ * @param length Length of memory block
+ * @param flags Flags to control memory test options. Zero defaults to testing all
+ * node with statistics and progress output.
+ *
+ * @return Number of errors found. Zero is success. Negative means the test
+ * did not run due to some other failure.
+ */
+static int __bdk_dram_run_test(const dram_test_info_t *test_info, uint64_t start_address,
+ uint64_t length, bdk_dram_test_flags_t flags)
+{
+ /* Figure out the addess of the byte one off the top of memory */
+ uint64_t max_address = bdk_dram_get_size_mbytes(bdk_numa_local());
+ BDK_TRACE(DRAM_TEST, "DRAM available per node: %lu MB\n", max_address);
+ max_address <<= 20;
+
+ /* Make sure we have enough */
+ if (max_address < (16<<20))
+ {
+ bdk_error("DRAM size is too small\n");
+ return -1;
+ }
+
+ /* Make sure the amount is sane */
+ if (CAVIUM_IS_MODEL(CAVIUM_CN8XXX))
+ {
+ if (max_address > (1ull << 40)) /* 40 bits in CN8XXX */
+ max_address = 1ull << 40;
+ }
+ else
+ {
+ if (max_address > (1ull << 43)) /* 43 bits in CN9XXX */
+ max_address = 1ull << 43;
+ }
+ BDK_TRACE(DRAM_TEST, "DRAM max address: 0x%011lx\n", max_address-1);
+
+ /* Make sure the start address is lower than the top of memory */
+ if (start_address >= max_address)
+ {
+ bdk_error("Start address is larger than the amount of memory: 0x%011lx versus 0x%011lx\n",
+ start_address, max_address);
+ return -1;
+ }
+ if (length == (uint64_t)-1)
+ length = max_address - start_address;
+
+ /* Final range checks */
+ uint64_t end_address = start_address + length;
+ if (end_address > max_address)
+ {
+ end_address = max_address;
+ length = end_address - start_address;
+ }
+ if (length == 0)
+ return 0;
+
+ /* Ready to run the test. Figure out how many cores we need */
+ int max_cores = test_info->max_cores;
+ int total_cores_all_nodes = max_cores;
+
+ /* Figure out the number of cores available in the system */
+ if (max_cores == 0)
+ {
+ max_cores += bdk_get_num_running_cores(bdk_numa_local());
+ /* Calculate the total number of cores being used. The per node number
+ is confusing to people */
+ for (bdk_node_t node = BDK_NODE_0; node < BDK_NUMA_MAX_NODES; node++)
+ if (flags & (1 << node))
+ {
+ if (flags & BDK_DRAM_TEST_USE_CCPI)
+ total_cores_all_nodes += bdk_get_num_running_cores(node ^ 1);
+ else
+ total_cores_all_nodes += bdk_get_num_running_cores(node);
+ }
+ }
+ if (!(flags & BDK_DRAM_TEST_NO_BANNERS))
+ printf("Starting Test \"%s\" for [0x%011lx:0x%011lx] using %d core(s)\n",
+ test_info->name, start_address, end_address - 1, total_cores_all_nodes);
+
+ /* Remember the LMC perf counters for stats after the test */
+ uint64_t start_dram_dclk[BDK_NUMA_MAX_NODES][4];
+ uint64_t start_dram_ops[BDK_NUMA_MAX_NODES][4];
+ uint64_t stop_dram_dclk[BDK_NUMA_MAX_NODES][4];
+ uint64_t stop_dram_ops[BDK_NUMA_MAX_NODES][4];
+ for (bdk_node_t node = BDK_NODE_0; node < BDK_NUMA_MAX_NODES; node++)
+ {
+ if (flags & (1 << node))
+ {
+ const int num_dram_controllers = __bdk_dram_get_num_lmc(node);
+ for (int i = 0; i < num_dram_controllers; i++)
+ {
+ start_dram_dclk[node][i] = BDK_CSR_READ(node, BDK_LMCX_DCLK_CNT(i));
+ start_dram_ops[node][i] = BDK_CSR_READ(node, BDK_LMCX_OPS_CNT(i));
+ }
+ }
+ }
+ /* Remember the CCPI link counters for stats after the test */
+ uint64_t start_ccpi_data[BDK_NUMA_MAX_NODES][3];
+ uint64_t start_ccpi_idle[BDK_NUMA_MAX_NODES][3];
+ uint64_t start_ccpi_err[BDK_NUMA_MAX_NODES][3];
+ uint64_t stop_ccpi_data[BDK_NUMA_MAX_NODES][3];
+ uint64_t stop_ccpi_idle[BDK_NUMA_MAX_NODES][3];
+ uint64_t stop_ccpi_err[BDK_NUMA_MAX_NODES][3];
+ if (!bdk_numa_is_only_one())
+ {
+ for (bdk_node_t node = BDK_NODE_0; node < BDK_NUMA_MAX_NODES; node++)
+ {
+ if (flags & (1 << node))
+ {
+ for (int link = 0; link < 3; link++)
+ {
+ start_ccpi_data[node][link] = BDK_CSR_READ(node, BDK_OCX_TLKX_STAT_DATA_CNT(link));
+ start_ccpi_idle[node][link] = BDK_CSR_READ(node, BDK_OCX_TLKX_STAT_IDLE_CNT(link));
+ start_ccpi_err[node][link] = BDK_CSR_READ(node, BDK_OCX_TLKX_STAT_ERR_CNT(link));
+ }
+ }
+ }
+ }
+
+ /* WARNING: This code assumes the same memory range is being tested on
+ all nodes. The same number of cores are used on each node to test
+ its local memory */
+ uint64_t work_address = start_address;
+ dram_test_flags = flags;
+ bdk_atomic_set64(&dram_test_thread_errors, 0);
+ while ((work_address < end_address) && ((dram_test_thread_errors == 0) || (flags & BDK_DRAM_TEST_NO_STOP_ERROR)))
+ {
+ /* Check at most MAX_CHUNK_SIZE across each iteration. We only report
+ progress between chunks, so keep them reasonably small */
+ const uint64_t MAX_CHUNK_SIZE = 1ull << 28; /* 256MB */
+ uint64_t size = end_address - work_address;
+ if (size > MAX_CHUNK_SIZE)
+ size = MAX_CHUNK_SIZE;
+
+ /* Divide memory evenly between the cores. Round the size up so that
+ all memory is covered. The last core may have slightly less memory to
+ test */
+ uint64_t thread_size = (size + (max_cores - 1)) / max_cores;
+ thread_size += 127;
+ thread_size &= -128;
+ dram_test_thread_start = work_address;
+ dram_test_thread_end = work_address + size;
+ dram_test_thread_size = thread_size;
+ BDK_WMB;
+
+ /* Poke the watchdog */
+ BDK_CSR_WRITE(bdk_numa_local(), BDK_GTI_CWD_POKEX(0), 0);
+
+ /* disable progress output when batch mode is ON */
+ if (!(flags & BDK_DRAM_TEST_NO_PROGRESS)) {
+
+ /* Report progress percentage */
+ int percent_x10 = (work_address - start_address) * 1000 / (end_address - start_address);
+ printf(" %3d.%d%% complete, testing [0x%011lx:0x%011lx]\r",
+ percent_x10 / 10, percent_x10 % 10, work_address, work_address + size - 1);
+ fflush(stdout);
+ }
+
+ work_address += size;
+
+ /* Start threads for all the cores */
+ int total_count = 0;
+ bdk_atomic_set64(&dram_test_thread_done, 0);
+ for (bdk_node_t node = BDK_NODE_0; node < BDK_NUMA_MAX_NODES; node++)
+ {
+ if (flags & (1 << node))
+ {
+ const int num_cores = bdk_get_num_cores(node);
+ int per_node = 0;
+ for (int core = 0; core < num_cores; core++)
+ {
+ if (per_node >= max_cores)
+ break;
+ int run_node = (flags & BDK_DRAM_TEST_USE_CCPI) ? node ^ 1 : node;
+ BDK_TRACE(DRAM_TEST, "Starting thread %d on node %d for memory test\n", per_node, node);
+ if (bdk_thread_create(run_node, 0, dram_test_thread, per_node, (void *)test_info, 0))
+ {
+ bdk_error("Failed to create thread %d for memory test on node %d\n", per_node, node);
+ }
+ else
+ {
+ per_node++;
+ total_count++;
+ }
+ }
+ }
+ }
+
+#if 0
+ /* Wait for threads to finish */
+ while (bdk_atomic_get64(&dram_test_thread_done) < total_count)
+ bdk_thread_yield();
+#else
+#define TIMEOUT_SECS 30 // FIXME: long enough so multicore RXOR 224 should not print out
+ /* Wait for threads to finish, with progress */
+ int cur_count;
+ uint64_t cur_time;
+ uint64_t period = bdk_clock_get_rate(bdk_numa_local(), BDK_CLOCK_TIME) * TIMEOUT_SECS; // FIXME?
+ uint64_t timeout = bdk_clock_get_count(BDK_CLOCK_TIME) + period;
+ do {
+ bdk_thread_yield();
+ cur_count = bdk_atomic_get64(&dram_test_thread_done);
+ cur_time = bdk_clock_get_count(BDK_CLOCK_TIME);
+ if (cur_time >= timeout) {
+ BDK_TRACE(DRAM_TEST, "N%d: Waiting for %d cores\n",
+ bdk_numa_local(), total_count - cur_count);
+ timeout = cur_time + period;
+ }
+ } while (cur_count < total_count);
+#endif
+ }
+
+ /* Get the DRAM perf counters */
+ for (bdk_node_t node = BDK_NODE_0; node < BDK_NUMA_MAX_NODES; node++)
+ {
+ if (flags & (1 << node))
+ {
+ const int num_dram_controllers = __bdk_dram_get_num_lmc(node);
+ for (int i = 0; i < num_dram_controllers; i++)
+ {
+ stop_dram_dclk[node][i] = BDK_CSR_READ(node, BDK_LMCX_DCLK_CNT(i));
+ stop_dram_ops[node][i] = BDK_CSR_READ(node, BDK_LMCX_OPS_CNT(i));
+ }
+ }
+ }
+ /* Get the CCPI link counters */
+ if (!bdk_numa_is_only_one())
+ {
+ for (bdk_node_t node = BDK_NODE_0; node < BDK_NUMA_MAX_NODES; node++)
+ {
+ if (flags & (1 << node))
+ {
+ for (int link = 0; link < 3; link++)
+ {
+ stop_ccpi_data[node][link] = BDK_CSR_READ(node, BDK_OCX_TLKX_STAT_DATA_CNT(link));
+ stop_ccpi_idle[node][link] = BDK_CSR_READ(node, BDK_OCX_TLKX_STAT_IDLE_CNT(link));
+ stop_ccpi_err[node][link] = BDK_CSR_READ(node, BDK_OCX_TLKX_STAT_ERR_CNT(link));
+ }
+ }
+ }
+ }
+
+ /* disable progress output when batch mode is ON */
+ if (!(flags & BDK_DRAM_TEST_NO_PROGRESS)) {
+
+ /* Report progress percentage as complete */
+ printf(" %3d.%d%% complete, testing [0x%011lx:0x%011lx]\n",
+ 100, 0, start_address, end_address - 1);
+ fflush(stdout);
+ }
+
+ if (!(flags & BDK_DRAM_TEST_NO_STATS))
+ {
+ /* Display LMC load */
+ for (bdk_node_t node = BDK_NODE_0; node < BDK_NUMA_MAX_NODES; node++)
+ {
+ if (flags & (1 << node))
+ {
+ const int num_dram_controllers = __bdk_dram_get_num_lmc(node);
+ for (int i = 0; i < num_dram_controllers; i++)
+ {
+ uint64_t ops = stop_dram_ops[node][i] - start_dram_ops[node][i];
+ uint64_t dclk = stop_dram_dclk[node][i] - start_dram_dclk[node][i];
+ if (dclk == 0)
+ dclk = 1;
+ uint64_t percent_x10 = ops * 1000 / dclk;
+ printf(" Node %d, LMC%d: ops %lu, cycles %lu, used %lu.%lu%%\n",
+ node, i, ops, dclk, percent_x10 / 10, percent_x10 % 10);
+ }
+ }
+ }
+ if (flags & BDK_DRAM_TEST_USE_CCPI)
+ {
+ /* Display CCPI load */
+ for (bdk_node_t node = BDK_NODE_0; node < BDK_NUMA_MAX_NODES; node++)
+ {
+ if (flags & (1 << node))
+ {
+ for (int link = 0; link < 3; link++)
+ {
+ uint64_t busy = stop_ccpi_data[node][link] - start_ccpi_data[node][link];
+ busy += stop_ccpi_err[node][link] - start_ccpi_err[node][link];
+ uint64_t total = stop_ccpi_idle[node][link] - start_ccpi_idle[node][link];
+ total += busy;
+ if (total == 0)
+ continue;
+ uint64_t percent_x10 = busy * 1000 / total;
+ printf(" Node %d, CCPI%d: busy %lu, total %lu, used %lu.%lu%%\n",
+ node, link, busy, total, percent_x10 / 10, percent_x10 % 10);
+ }
+ }
+ }
+ }
+ }
+ return dram_test_thread_errors;
+}
+
+/**
+ * Perform a memory test.
+ *
+ * @param test Test type to run
+ * @param start_address
+ * Physical address to start at
+ * @param length Length of memory block
+ * @param flags Flags to control memory test options. Zero defaults to testing all
+ * node with statistics and progress output.
+ *
+ * @return Number of errors found. Zero is success. Negative means the test
+ * did not run due to some other failure.
+ */
+int bdk_dram_test(int test, uint64_t start_address, uint64_t length, bdk_dram_test_flags_t flags)
+{
+ /* These limits are arbitrary. They just make sure we aren't doing something
+ silly, like test a non cache line aligned memory region */
+ if (start_address & 0xffff)
+ {
+ bdk_error("DRAM test start address must be aligned on a 64KB boundary\n");
+ return -1;
+ }
+ if (length & 0xffff)
+ {
+ bdk_error("DRAM test length must be a multiple of 64KB\n");
+ return -1;
+ }
+
+ const char *name = bdk_dram_get_test_name(test);
+ if (name == NULL)
+ {
+ bdk_error("Invalid DRAM test number %d\n", test);
+ return -1;
+ }
+
+ /* If no nodes are selected assume the user meant all nodes */
+ if ((flags & (BDK_DRAM_TEST_NODE0 | BDK_DRAM_TEST_NODE1 | BDK_DRAM_TEST_NODE2 | BDK_DRAM_TEST_NODE3)) == 0)
+ flags |= BDK_DRAM_TEST_NODE0 | BDK_DRAM_TEST_NODE1 | BDK_DRAM_TEST_NODE2 | BDK_DRAM_TEST_NODE3;
+
+ /* Remove nodes from the flags that don't exist */
+ for (bdk_node_t node = BDK_NODE_0; node < BDK_NUMA_MAX_NODES; node++)
+ {
+ if (flags & BDK_DRAM_TEST_USE_CCPI)
+ {
+ if (!bdk_numa_exists(node ^ 1))
+ flags &= ~(1 << node);
+ }
+ else
+ {
+ if (!bdk_numa_exists(node))
+ flags &= ~(1 << node);
+ }
+ }
+
+
+ /* Make sure the start address is higher that the BDK's active range */
+ uint64_t top_of_bdk = bdk_dram_get_top_of_bdk();
+ if (start_address < top_of_bdk)
+ start_address = top_of_bdk;
+
+ /* Clear ECC error counters before starting the test */
+ for (int chan = 0; chan < BDK_MAX_MEM_CHANS; chan++) {
+ bdk_atomic_set64(&__bdk_dram_ecc_single_bit_errors[chan], 0);
+ bdk_atomic_set64(&__bdk_dram_ecc_double_bit_errors[chan], 0);
+ }
+
+ /* Make sure at least one core from each node is running */
+ for (bdk_node_t node = BDK_NODE_0; node < BDK_NUMA_MAX_NODES; node++)
+ {
+ if (flags & (1<<node))
+ {
+ int use_node = (flags & BDK_DRAM_TEST_USE_CCPI) ? node ^ 1 : node;
+ if (bdk_get_running_coremask(use_node) == 0)
+ bdk_init_cores(use_node, 1);
+ }
+ }
+
+ /* This returns any data compare errors found */
+ int errors = __bdk_dram_run_test(&TEST_INFO[test], start_address, length, flags);
+
+ /* Poll for any errors right now to make sure any ECC errors are reported */
+ for (bdk_node_t node = BDK_NODE_0; node < BDK_NUMA_MAX_NODES; node++)
+ {
+ if (bdk_numa_exists(node) && bdk_error_check)
+ bdk_error_check(node);
+ }
+
+ /* Check ECC error counters after the test */
+ int64_t ecc_single = 0;
+ int64_t ecc_double = 0;
+ int64_t ecc_single_errs[BDK_MAX_MEM_CHANS];
+ int64_t ecc_double_errs[BDK_MAX_MEM_CHANS];
+
+ for (int chan = 0; chan < BDK_MAX_MEM_CHANS; chan++) {
+ ecc_single += (ecc_single_errs[chan] = bdk_atomic_get64(&__bdk_dram_ecc_single_bit_errors[chan]));
+ ecc_double += (ecc_double_errs[chan] = bdk_atomic_get64(&__bdk_dram_ecc_double_bit_errors[chan]));
+ }
+
+ /* Always print any ECC errors */
+ if (ecc_single || ecc_double)
+ {
+ printf("Test \"%s\": ECC errors, %ld/%ld/%ld/%ld corrected, %ld/%ld/%ld/%ld uncorrected\n",
+ name,
+ ecc_single_errs[0], ecc_single_errs[1], ecc_single_errs[2], ecc_single_errs[3],
+ ecc_double_errs[0], ecc_double_errs[1], ecc_double_errs[2], ecc_double_errs[3]);
+ }
+ if (errors || ecc_double || ecc_single) {
+ printf("Test \"%s\": FAIL: %ld single, %ld double, %d compare errors\n",
+ name, ecc_single, ecc_double, errors);
+ }
+ else
+ BDK_TRACE(DRAM_TEST, "Test \"%s\": PASS\n", name);
+
+ return (errors + ecc_double + ecc_single);
+}
+
+/**
+ * Report a DRAM address in decoded format.
+ *
+ * @param address Physical address the error occurred at
+ *
+ */
+static void __bdk_dram_report_address_decode(uint64_t address, char *buffer, int len)
+{
+ int node, lmc, dimm, prank, lrank, bank, row, col;
+
+ bdk_dram_address_extract_info(address, &node, &lmc, &dimm, &prank, &lrank, &bank, &row, &col);
+
+ snprintf(buffer, len, "[0x%011lx] (N%d,LMC%d,DIMM%d,Rank%d/%d,Bank%02d,Row 0x%05x,Col 0x%04x)",
+ address, node, lmc, dimm, prank, lrank, bank, row, col);
+}
+
+/**
+ * Report a DRAM address in a new decoded format.
+ *
+ * @param address Physical address the error occurred at
+ * @param xor XOR of data read vs expected data
+ *
+ */
+static void __bdk_dram_report_address_decode_new(uint64_t address, uint64_t orig_xor, char *buffer, int len)
+{
+ int node, lmc, dimm, prank, lrank, bank, row, col;
+
+ int byte = 8; // means no byte-lanes in error, should not happen
+ uint64_t bits, print_bits = 0;
+ uint64_t xor = orig_xor;
+
+ // find the byte-lane(s) with errors
+ for (int i = 0; i < 8; i++) {
+ bits = xor & 0xffULL;
+ xor >>= 8;
+ if (bits) {
+ if (byte != 8) {
+ byte = 9; // means more than 1 byte-lane was present
+ print_bits = orig_xor; // print the full original
+ break; // quit now
+ } else {
+ byte = i; // keep checking
+ print_bits = bits;
+ }
+ }
+ }
+
+ bdk_dram_address_extract_info(address, &node, &lmc, &dimm, &prank, &lrank, &bank, &row, &col);
+
+ snprintf(buffer, len, "N%d.LMC%d: CMP byte %d xor 0x%02lx (DIMM%d,Rank%d/%d,Bank%02d,Row 0x%05x,Col 0x%04x)[0x%011lx]",
+ node, lmc, byte, print_bits, dimm, prank, lrank, bank, row, col, address);
+}
+
+/**
+ * Report a DRAM error. Errors are not shown after MAX_ERRORS_TO_REPORT is
+ * exceeded. Used when a single address is involved in the failure.
+ *
+ * @param address Physical address the error occurred at
+ * @param data Data read from memory
+ * @param correct Correct data
+ * @param burst Which burst this is from, informational only
+ * @param fails -1 for no retries done, >= 0 number of failures during retries
+ *
+ * @return Zero if a message was logged, non-zero if the error limit has been reached
+ */
+void __bdk_dram_report_error(uint64_t address, uint64_t data, uint64_t correct, int burst, int fails)
+{
+ char buffer[128];
+ char failbuf[32];
+ int64_t errors = bdk_atomic_fetch_and_add64(&dram_test_thread_errors, 1);
+ uint64_t xor = data ^ correct;
+
+ if (errors < MAX_ERRORS_TO_REPORT)
+ {
+ if (fails < 0) {
+ snprintf(failbuf, sizeof(failbuf), " ");
+ } else {
+ int percent_x10 = fails * 1000 / RETRY_LIMIT;
+ snprintf(failbuf, sizeof(failbuf), ", retries failed %3d.%d%%",
+ percent_x10 / 10, percent_x10 % 10);
+ }
+
+ __bdk_dram_report_address_decode_new(address, xor, buffer, sizeof(buffer));
+ bdk_error("%s%s\n", buffer, failbuf);
+
+ if (errors == MAX_ERRORS_TO_REPORT-1)
+ bdk_error("No further DRAM errors will be reported\n");
+ }
+ return;
+}
+
+/**
+ * Report a DRAM error. Errors are not shown after MAX_ERRORS_TO_REPORT is
+ * exceeded. Used when two addresses might be involved in the failure.
+ *
+ * @param address1 First address involved in the failure
+ * @param data1 Data from the first address
+ * @param address2 Second address involved in the failure
+ * @param data2 Data from second address
+ * @param burst Which burst this is from, informational only
+ * @param fails -1 for no retries done, >= 0 number of failures during retries
+ *
+ * @return Zero if a message was logged, non-zero if the error limit has been reached
+ */
+void __bdk_dram_report_error2(uint64_t address1, uint64_t data1, uint64_t address2, uint64_t data2,
+ int burst, int fails)
+{
+ int64_t errors = bdk_atomic_fetch_and_add64(&dram_test_thread_errors, 1);
+ if (errors < MAX_ERRORS_TO_REPORT)
+ {
+ char buffer1[80], buffer2[80];
+ char failbuf[32];
+
+ if (fails < 0) {
+ snprintf(failbuf, sizeof(failbuf), " ");
+ } else {
+ snprintf(failbuf, sizeof(failbuf), ", retried %d failed %d", RETRY_LIMIT, fails);
+ }
+ __bdk_dram_report_address_decode(address1, buffer1, sizeof(buffer1));
+ __bdk_dram_report_address_decode(address2, buffer2, sizeof(buffer2));
+
+ bdk_error("compare: data1: 0x%016lx, xor: 0x%016lx%s\n"
+ " %s\n %s\n",
+ data1, data1 ^ data2, failbuf,
+ buffer1, buffer2);
+
+ if (errors == MAX_ERRORS_TO_REPORT-1)
+ bdk_error("No further DRAM errors will be reported\n");
+ }
+ return;
+}
+
+/* Report the circumstances of a failure and try re-reading the memory
+ * location to see if the error is transient or permanent.
+ *
+ * Note: re-reading requires using evicting addresses
+ */
+int __bdk_dram_retry_failure(int burst, uint64_t address, uint64_t data, uint64_t expected)
+{
+ int refail = 0;
+
+ // bypass the retries if we are already over the limit...
+ if (bdk_atomic_get64(&dram_test_thread_errors) < MAX_ERRORS_TO_REPORT) {
+
+ /* Try re-reading the memory location. A transient error may fail
+ * on one read and work on another. Keep on retrying even when a
+ * read succeeds.
+ */
+ for (int i = 0; i < RETRY_LIMIT; i++) {
+
+ __bdk_dram_flush_to_mem(address);
+ BDK_DCACHE_INVALIDATE;
+
+ uint64_t new = __bdk_dram_read64(address);
+
+ if (new != expected) {
+ refail++;
+ }
+ }
+ } else
+ refail = -1;
+
+ // this will increment the errors always, but maybe not print...
+ __bdk_dram_report_error(address, data, expected, burst, refail);
+
+ return 1;
+}
+
+/**
+ * retry_failure2
+ *
+ * @param burst
+ * @param address1
+ * @param address2
+ */
+int __bdk_dram_retry_failure2(int burst, uint64_t address1, uint64_t data1, uint64_t address2, uint64_t data2)
+{
+ int refail = 0;
+
+ // bypass the retries if we are already over the limit...
+ if (bdk_atomic_get64(&dram_test_thread_errors) < MAX_ERRORS_TO_REPORT) {
+
+ for (int i = 0; i < RETRY_LIMIT; i++) {
+ __bdk_dram_flush_to_mem(address1);
+ __bdk_dram_flush_to_mem(address2);
+ BDK_DCACHE_INVALIDATE;
+
+ uint64_t d1 = __bdk_dram_read64(address1);
+ uint64_t d2 = __bdk_dram_read64(address2);
+
+ if (d1 != d2) {
+ refail++;
+ }
+ }
+ } else
+ refail = -1;
+
+ // this will increment the errors always, but maybe not print...
+ __bdk_dram_report_error2(address1, data1, address2, data2, burst, refail);
+
+ return 1;
+}
+
+/**
+ * Inject a DRAM error at a specific address in memory. The injection can either
+ * be a single bit inside the byte, or a double bit error in the ECC byte. Double
+ * bit errors may corrupt memory, causing software to crash. The corruption is
+ * written to memory and will continue to exist until the cache line is written
+ * again. After a call to this function, the BDK should report a ECC error. Double
+ * bit errors corrupt bits 0-1.
+ *
+ * @param address Physical address to corrupt. Any byte alignment is supported
+ * @param bit Bit to corrupt in the byte (0-7), or -1 to create a double bit fault in the ECC
+ * byte.
+ */
+void bdk_dram_test_inject_error(uint64_t address, int bit)
+{
+ uint64_t aligned_address = address & -16;
+ int corrupt_bit = -1;
+ if (bit >= 0)
+ corrupt_bit = (address & 0xf) * 8 + bit;
+
+ /* Extract the DRAM controller information */
+ int node, lmc, dimm, prank, lrank, bank, row, col;
+ bdk_dram_address_extract_info(address, &node, &lmc, &dimm, &prank, &lrank, &bank, &row, &col);
+
+ /* Read the current data */
+ uint64_t data = __bdk_dram_read64(aligned_address);
+
+ /* Program LMC to inject the error */
+ if ((corrupt_bit >= 0) && (corrupt_bit < 64))
+ BDK_CSR_WRITE(node, BDK_LMCX_CHAR_MASK0(lmc), 1ull << corrupt_bit);
+ else if (bit == -1)
+ BDK_CSR_WRITE(node, BDK_LMCX_CHAR_MASK0(lmc), 3);
+ else
+ BDK_CSR_WRITE(node, BDK_LMCX_CHAR_MASK0(lmc), 0);
+ if (corrupt_bit >= 64)
+ BDK_CSR_WRITE(node, BDK_LMCX_CHAR_MASK2(lmc), 1ull << (corrupt_bit - 64));
+ else
+ BDK_CSR_WRITE(node, BDK_LMCX_CHAR_MASK2(lmc), 0);
+ BDK_CSR_MODIFY(c, node, BDK_LMCX_ECC_PARITY_TEST(lmc),
+ c.s.ecc_corrupt_idx = (address & 0x7f) >> 4;
+ c.s.ecc_corrupt_ena = 1);
+ BDK_CSR_READ(node, BDK_LMCX_ECC_PARITY_TEST(lmc));
+
+ /* Perform a write and push it to DRAM. This creates the error */
+ __bdk_dram_write64(aligned_address, data);
+ __bdk_dram_flush_to_mem(aligned_address);
+
+ /* Disable error injection */
+ BDK_CSR_MODIFY(c, node, BDK_LMCX_ECC_PARITY_TEST(lmc),
+ c.s.ecc_corrupt_ena = 0);
+ BDK_CSR_READ(node, BDK_LMCX_ECC_PARITY_TEST(lmc));
+ BDK_CSR_WRITE(node, BDK_LMCX_CHAR_MASK0(lmc), 0);
+ BDK_CSR_WRITE(node, BDK_LMCX_CHAR_MASK2(lmc), 0);
+
+ /* Read back the data, which should now cause an error */
+ printf("Loading the injected error address 0x%lx, node=%d, lmc=%d, dimm=%d, rank=%d/%d, bank=%d, row=%d, col=%d\n",
+ address, node, lmc, dimm, prank, lrank, bank, row, col);
+ __bdk_dram_read64(aligned_address);
+}
diff --git a/src/vendorcode/cavium/bdk/libbdk-driver/bdk-driver-rnm.c b/src/vendorcode/cavium/bdk/libbdk-driver/bdk-driver-rnm.c
new file mode 100644
index 0000000000..8394ad8c5e
--- /dev/null
+++ b/src/vendorcode/cavium/bdk/libbdk-driver/bdk-driver-rnm.c
@@ -0,0 +1,124 @@
+/***********************license start***********************************
+* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights
+* reserved.
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are
+* met:
+*
+* * Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials provided
+* with the distribution.
+*
+* * Neither the name of Cavium Inc. nor the names of
+* its contributors may be used to endorse or promote products
+* derived from this software without specific prior written
+* permission.
+*
+* This Software, including technical data, may be subject to U.S. export
+* control laws, including the U.S. Export Administration Act and its
+* associated regulations, and may be subject to export or import
+* regulations in other countries.
+*
+* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
+* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR
+* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT
+* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY
+* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT
+* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES
+* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR
+* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT,
+* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK
+* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
+***********************license end**************************************/
+#include <bdk.h>
+#include "libbdk-arch/bdk-csrs-pccpf.h"
+#include "libbdk-arch/bdk-csrs-rnm.h"
+
+BDK_REQUIRE_DEFINE(RNM);
+
+/**
+ * Reads 8 bits of random data from Random number generator
+ *
+ * @return random data
+ */
+uint8_t bdk_rng_get_random8(void)
+{
+
+ return bdk_read64_uint8(bdk_numa_get_address(bdk_numa_local(), BDK_RNM_RANDOM));
+}
+
+/**
+ * Reads 16 bits of random data from Random number generator
+ *
+ * @return random data
+ */
+uint16_t bdk_rng_get_random16(void)
+{
+ return bdk_read64_uint16(bdk_numa_get_address(bdk_numa_local(), BDK_RNM_RANDOM));
+}
+
+/**
+ * Reads 32 bits of random data from Random number generator
+ *
+ * @return random data
+ */
+uint32_t bdk_rng_get_random32(void)
+{
+ return bdk_read64_uint32(bdk_numa_get_address(bdk_numa_local(), BDK_RNM_RANDOM));
+}
+
+/**
+ * Reads 64 bits of random data from Random number generator
+ *
+ * @return random data
+ */
+uint64_t bdk_rng_get_random64(void)
+{
+ return bdk_read64_uint64(bdk_numa_get_address(bdk_numa_local(), BDK_RNM_RANDOM));
+}
+
+/**
+ * The RNM probe function
+ *
+ * @param device RNM to probe
+ *
+ * @return Zero on success, negative on failure
+ */
+static int probe(bdk_device_t *device)
+{
+ bdk_device_rename(device, "N%d.RNM%d", device->node, device->instance);
+ return 0;
+}
+
+/**
+ * RNM init() function
+ *
+ * @param device RNM to initialize
+ *
+ * @return Zero on success, negative on failure
+ */
+static int init(bdk_device_t *device)
+{
+ BDK_BAR_MODIFY(c, device, BDK_RNM_CTL_STATUS,
+ c.s.ent_en = 1;
+ c.s.rng_en = 1);
+ /* Read back after enable so we know it is done. Needed on t88 pass 2.0 emulator */
+ BDK_BAR_READ(device, BDK_RNM_CTL_STATUS);
+ /* Errata (RNM-22528) First consecutive reads to RNM_RANDOM return same
+ value. Before using the random entropy, read RNM_RANDOM at least once
+ and discard the data */
+ bdk_rng_get_random64();
+ return 0;
+}
+
+bdk_driver_t __bdk_driver_rnm = {
+ .id = (BDK_PCC_PROD_E_GEN << 24) | BDK_PCC_VENDOR_E_CAVIUM | (BDK_PCC_DEV_IDL_E_RNM << 16),
+ .probe = probe,
+ .init = init,
+};
diff --git a/src/vendorcode/cavium/bdk/libbdk-hal/bdk-clock.c b/src/vendorcode/cavium/bdk/libbdk-hal/bdk-clock.c
new file mode 100644
index 0000000000..f81285dffd
--- /dev/null
+++ b/src/vendorcode/cavium/bdk/libbdk-hal/bdk-clock.c
@@ -0,0 +1,221 @@
+/***********************license start***********************************
+* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights
+* reserved.
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are
+* met:
+*
+* * Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials provided
+* with the distribution.
+*
+* * Neither the name of Cavium Inc. nor the names of
+* its contributors may be used to endorse or promote products
+* derived from this software without specific prior written
+* permission.
+*
+* This Software, including technical data, may be subject to U.S. export
+* control laws, including the U.S. Export Administration Act and its
+* associated regulations, and may be subject to export or import
+* regulations in other countries.
+*
+* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
+* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR
+* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT
+* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY
+* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT
+* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES
+* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR
+* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT,
+* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK
+* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
+***********************license end**************************************/
+#include <bdk.h>
+#include "libbdk-arch/bdk-csrs-gti.h"
+#include "libbdk-arch/bdk-csrs-ocx.h"
+
+/**
+ * Called in __bdk_init to setup the global timer
+ */
+void bdk_clock_setup(bdk_node_t node)
+{
+ const bdk_node_t local_node = bdk_numa_local();
+
+ /* Check if the counter was already setup */
+ BDK_CSR_INIT(cntcr, node, BDK_GTI_CC_CNTCR);
+ if (cntcr.s.en)
+ return;
+
+ /* Configure GTI to tick at BDK_GTI_RATE */
+ uint64_t sclk = bdk_clock_get_rate(node, BDK_CLOCK_SCLK);
+ uint64_t inc = (BDK_GTI_RATE << 32) / sclk;
+ BDK_CSR_WRITE(node, BDK_GTI_CC_CNTRATE, inc);
+ BDK_CSR_WRITE(node, BDK_GTI_CTL_CNTFRQ, BDK_GTI_RATE);
+ cntcr.s.en = 1;
+ if (node != local_node)
+ {
+ /* Synchronize with local node. Very simple set of counter, will be
+ off a little */
+ BDK_CSR_WRITE(node, BDK_GTI_CC_CNTCV, bdk_clock_get_count(BDK_CLOCK_TIME));
+ }
+ /* Enable the counter */
+ BDK_CSR_WRITE(node, BDK_GTI_CC_CNTCR, cntcr.u);
+ BDK_CSR_READ(node, BDK_GTI_CC_CNTCR);
+
+ if (node != local_node)
+ {
+ if (CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X))
+ {
+ /* Assume the delay in each direction is the same, sync the counters */
+ int64_t local1 = bdk_clock_get_count(BDK_CLOCK_TIME);
+ int64_t remote = BDK_CSR_READ(node, BDK_GTI_CC_CNTCV);
+ int64_t local2 = bdk_clock_get_count(BDK_CLOCK_TIME);
+ int64_t expected = (local1 + local2) / 2;
+ BDK_CSR_WRITE(node, BDK_GTI_CC_CNTADD, expected - remote);
+ BDK_TRACE(INIT, "N%d.GTI: Clock synchronization with master\n"
+ " expected: %ld, remote %ld\n"
+ " Counter correction: %ld\n",
+ node, expected, remote, expected - remote);
+ }
+ else
+ {
+ /* Due to errata TBD, we need to use OCX_PP_CMD to write
+ GTI_CC_CNTMB in order for timestamps to update. These constants
+ are the addresses we need for both local and remote GTI_CC_CNTMB */
+ const uint64_t LOCAL_GTI_CC_CNTMB = bdk_numa_get_address(local_node, BDK_GTI_CC_CNTMB);
+ const uint64_t REMOTE_GTI_CC_CNTMB = bdk_numa_get_address(node, BDK_GTI_CC_CNTMB);
+ /* Build partial OCX_PP_CMD command used for writes. Address will
+ be filled later */
+ BDK_CSR_DEFINE(pp_cmd, BDK_OCX_PP_CMD);
+ pp_cmd.u = 0;
+ pp_cmd.s.wr_mask = 0xff;
+
+ const int NUM_AVERAGE = 16; /* Choose a power of two to avoid division */
+ int64_t local_to_remote_sum = 0;
+ int64_t local_to_remote_min = 1000000;
+ int64_t local_to_remote_max = -1000000;
+ int64_t remote_to_local_sum = 0;
+ int64_t remote_to_local_min = 1000000;
+ int64_t remote_to_local_max = -1000000;
+ for (int loop = 0; loop < NUM_AVERAGE; loop++)
+ {
+ /* Perform a write to the remote GTI_CC_CNTMB to cause timestamp
+ update. We don't care about the value actually written */
+ pp_cmd.s.addr = REMOTE_GTI_CC_CNTMB;
+ BDK_CSR_WRITE(local_node, BDK_OCX_PP_CMD, pp_cmd.u);
+ BDK_CSR_READ(local_node, BDK_OCX_PP_CMD);
+
+ int64_t remote = BDK_CSR_READ(node, BDK_GTI_CC_CNTMBTS);
+ int64_t local = BDK_CSR_READ(local_node, BDK_GTI_CC_CNTMBTS);
+ int64_t delta = remote - local;
+
+ local_to_remote_sum += delta;
+ if (delta < local_to_remote_min)
+ local_to_remote_min = delta;
+ if (delta > local_to_remote_max)
+ local_to_remote_max = delta;
+
+ /* Perform a write to the local GTI_CC_CNTMB to cause timestamp
+ update. We don't care about the value actually written */
+ pp_cmd.s.addr = LOCAL_GTI_CC_CNTMB;
+ BDK_CSR_WRITE(node, BDK_OCX_PP_CMD, pp_cmd.u);
+ BDK_CSR_READ(node, BDK_OCX_PP_CMD);
+
+ remote = BDK_CSR_READ(node, BDK_GTI_CC_CNTMBTS);
+ local = BDK_CSR_READ(local_node, BDK_GTI_CC_CNTMBTS);
+ delta = local - remote;
+
+ remote_to_local_sum += delta;
+ if (delta < remote_to_local_min)
+ remote_to_local_min = delta;
+ if (delta > remote_to_local_max)
+ remote_to_local_max = delta;
+ }
+ /* Calculate average, rounding to nearest */
+ int64_t local_to_remote = (local_to_remote_sum + NUM_AVERAGE/2) / NUM_AVERAGE;
+ int64_t remote_to_local = (remote_to_local_sum + NUM_AVERAGE/2) / NUM_AVERAGE;
+ /* Calculate remote node offset */
+ int64_t remote_offset = (remote_to_local - local_to_remote) / 2;
+ BDK_CSR_WRITE(node, BDK_GTI_CC_CNTADD, remote_offset);
+ BDK_TRACE(INIT, "N%d.GTI: Clock synchronization with master\n"
+ " local -> remote: min %ld, avg %ld, max %ld\n"
+ " remote -> local: min %ld, avg %ld, max %ld\n"
+ " Counter correction: %ld\n",
+ node,
+ local_to_remote_min, local_to_remote, local_to_remote_max,
+ remote_to_local_min, remote_to_local, remote_to_local_max,
+ remote_offset);
+ }
+ }
+}
+
+/**
+ * Get cycle count based on the clock type.
+ *
+ * @param clock - Enumeration of the clock type.
+ * @return - Get the number of cycles executed so far.
+ */
+uint64_t __bdk_clock_get_count_slow(bdk_clock_t clock)
+{
+ bdk_node_t node = bdk_numa_local();
+ BDK_CSR_INIT(rst_boot, node, BDK_RST_BOOT);
+ if (bdk_is_platform(BDK_PLATFORM_EMULATOR))
+ {
+ /* Force RCLK and SCLK to be 1GHz on emulator */
+ rst_boot.s.c_mul = 20;
+ rst_boot.s.pnr_mul = 20;
+ }
+ uint64_t ref_cntr = BDK_CSR_READ(node, BDK_RST_REF_CNTR);
+ switch(clock)
+ {
+ case BDK_CLOCK_TIME:
+ return 0; /* Handled in fast path */
+ case BDK_CLOCK_MAIN_REF:
+ return ref_cntr;
+ case BDK_CLOCK_RCLK:
+ return ref_cntr * rst_boot.s.c_mul;
+ case BDK_CLOCK_SCLK:
+ return ref_cntr * rst_boot.s.pnr_mul;
+ }
+ return 0;
+}
+
+/**
+ * Get clock rate based on the clock type.
+ *
+ * @param node Node to use in a Numa setup. Can be an exact ID or a special value.
+ * @param clock - Enumeration of the clock type.
+ * @return - return the clock rate.
+ */
+uint64_t __bdk_clock_get_rate_slow(bdk_node_t node, bdk_clock_t clock)
+{
+ /* This is currently defined to be 50Mhz */
+ const uint64_t REF_CLOCK = 50000000;
+
+ BDK_CSR_INIT(mio_rst_boot, node, BDK_RST_BOOT);
+ if (bdk_is_platform(BDK_PLATFORM_EMULATOR))
+ {
+ /* Force RCLK and SCLK to be 1GHz on emulator */
+ mio_rst_boot.s.c_mul = 20;
+ mio_rst_boot.s.pnr_mul = 20;
+ }
+ switch (clock)
+ {
+ case BDK_CLOCK_TIME:
+ return BDK_GTI_RATE; /* Programed as part of setup */
+ case BDK_CLOCK_MAIN_REF:
+ return REF_CLOCK;
+ case BDK_CLOCK_RCLK:
+ return REF_CLOCK * mio_rst_boot.s.c_mul;
+ case BDK_CLOCK_SCLK:
+ return REF_CLOCK * mio_rst_boot.s.pnr_mul;
+ }
+ return 0;
+}
+
diff --git a/src/vendorcode/cavium/bdk/libbdk-hal/bdk-config.c b/src/vendorcode/cavium/bdk/libbdk-hal/bdk-config.c
new file mode 100644
index 0000000000..d4b412d439
--- /dev/null
+++ b/src/vendorcode/cavium/bdk/libbdk-hal/bdk-config.c
@@ -0,0 +1,1946 @@
+/***********************license start***********************************
+* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights
+* reserved.
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are
+* met:
+*
+* * Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials provided
+* with the distribution.
+*
+* * Neither the name of Cavium Inc. nor the names of
+* its contributors may be used to endorse or promote products
+* derived from this software without specific prior written
+* permission.
+*
+* This Software, including technical data, may be subject to U.S. export
+* control laws, including the U.S. Export Administration Act and its
+* associated regulations, and may be subject to export or import
+* regulations in other countries.
+*
+* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
+* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR
+* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT
+* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY
+* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT
+* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES
+* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR
+* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT,
+* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK
+* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
+***********************license end**************************************/
+#include <bdk.h>
+#include <stdarg.h>
+#include <libfdt.h>
+#include <unistd.h>
+#include "libbdk-arch/bdk-csrs-mio_fus.h"
+#include "libbdk-arch/bdk-csrs-fus.h"
+
+/* Set this define to override the trace the BDK uses. This is most
+ useful with trusted boot when the setup menus are not able to
+ configure the trace level. A possible example: */
+//#define BDK_TRACE_OVERRIDE (1ull << BDK_TRACE_ENABLE_INIT)
+#define BDK_TRACE_OVERRIDE 0
+
+typedef enum
+{
+ BDK_CONFIG_TYPE_INT,
+ BDK_CONFIG_TYPE_STR,
+ BDK_CONFIG_TYPE_STR_LIST,
+ BDK_CONFIG_TYPE_BINARY,
+} bdk_config_type_t;
+
+typedef struct
+{
+ const char *format; /* Printf style format string to create the item name */
+ const bdk_config_type_t ctype;/* Type of this item */
+ int64_t default_value; /* Default value when no present. String defaults are cast to pointers from this */
+ const int64_t min_value;/* Minimum valid value for INT parameters. Unused for Strings */
+ const int64_t max_value;/* Maximum valid value for INT parameters. Unused for Strings */
+} bdk_config_info_t;
+
+static void config_set_defaults(void);
+
+/* Tracing defaults to the level specified here before config files are loaded */
+uint64_t bdk_trace_enables = BDK_TRACE_OVERRIDE;
+
+/* Global variables that contain the config inside a FDT */
+static void *config_fdt;
+static int config_node;
+
+static bdk_config_info_t config_info[__BDK_CONFIG_END] = {
+ /* Board manufacturing data */
+ [BDK_CONFIG_BOARD_MODEL] = {
+ .format = "BOARD-MODEL", /* String, No parameters */
+ .ctype = BDK_CONFIG_TYPE_STR,
+ .default_value = (long)"unknown",
+ },
+ [BDK_CONFIG_BOARD_REVISION] = {
+ .format = "BOARD-REVISION", /* String, No parameters */
+ .ctype = BDK_CONFIG_TYPE_STR,
+ .default_value = (long)"unknown",
+ },
+ [BDK_CONFIG_BOARD_SERIAL] = {
+ .format = "BOARD-SERIAL", /* String, No parameters */
+ .ctype = BDK_CONFIG_TYPE_STR,
+ .default_value = (long)"unknown",
+ },
+ [BDK_CONFIG_MAC_ADDRESS] = {
+ .format = "BOARD-MAC-ADDRESS", /* Int64, No parameters */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = 0, /* Default updated at boot based on fuses */
+ .min_value = 0,
+ .max_value = 0xffffffffffffll,
+ },
+ [BDK_CONFIG_MAC_ADDRESS_NUM] = {
+ .format = "BOARD-MAC-ADDRESS-NUM", /* Int, No parameters */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = 0,
+ .min_value = 0,
+ .max_value = 256,
+ },
+ [BDK_CONFIG_MAC_ADDRESS_NUM_OVERRIDE] = {
+ .format = "BOARD-MAC-ADDRESS-NUM-OVERRIDE", /* Int, No parameters */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = -1,
+ .min_value = -1,
+ .max_value = 256,
+ },
+
+ /* Board generic */
+ [BDK_CONFIG_BMC_TWSI] = {
+ .format = "BMC-TWSI", /* No parameters */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = -1, /* TWSI bus number, -1 = disabled */
+ .min_value = -1,
+ .max_value = 5,
+ },
+ [BDK_CONFIG_WATCHDOG_TIMEOUT] = {
+ .format = "WATCHDOG-TIMEOUT", /* No parameters */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = 0, /* 0 = disabled */
+ .min_value = 0,
+ .max_value = 10000,
+ },
+ [BDK_CONFIG_TWSI_WRITE] = {
+ .format = "TWSI-WRITE", /* No parameters */
+ .ctype = BDK_CONFIG_TYPE_STR_LIST,
+ },
+ [BDK_CONFIG_MDIO_WRITE] = {
+ .format = "MDIO-WRITE", /* No parameters */
+ .ctype = BDK_CONFIG_TYPE_STR_LIST,
+ },
+
+ /* Board wiring of network ports and PHYs */
+ [BDK_CONFIG_PHY_ADDRESS] = {
+ .format = "PHY-ADDRESS.N%d.BGX%d.P%d", /* Parameters: Node, Interface, Port */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = -1, /* Default to no PHY */
+ .min_value = -1,
+ .max_value = 0xffffffffll,
+ },
+ [BDK_CONFIG_BGX_ENABLE] = {
+ .format = "BGX-ENABLE.N%d.BGX%d.P%d", /* Parameters: Node, BGX, Port */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = 1, /* 0 = disable, 1 = enable */
+ .min_value = 0,
+ .max_value = 1,
+ },
+ /* Non-EBB specific SFF8104 board and alike */
+ [BDK_CONFIG_AQUANTIA_PHY] = {
+ .format = "AQUANTIA-PHY.N%d.BGX%d.P%d", /*Parameters: Node, BGX, Port */
+ .default_value = 0,
+ .min_value = 0,
+ .max_value = 0xffffll,
+ },
+
+
+ /* BDK Configuration params */
+ [BDK_CONFIG_VERSION] = {
+ .format = "BDK-VERSION",
+ .ctype = BDK_CONFIG_TYPE_STR,
+ },
+ [BDK_CONFIG_NUM_PACKET_BUFFERS] = {
+ .format = "BDK-NUM-PACKET-BUFFERS",
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = 0, /* Default updated at boot */
+ .min_value = 0,
+ .max_value = 1000000,
+ },
+ [BDK_CONFIG_PACKET_BUFFER_SIZE] = {
+ .format = "BDK-PACKET-BUFFER-SIZE",
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = 1024, /* bytes */
+ .min_value = 128,
+ .max_value = 32768,
+ },
+ [BDK_CONFIG_SHOW_LINK_STATUS] = {
+ .format = "BDK-SHOW-LINK-STATUS",
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = 1, /* 0 = off, 1 = on */
+ .min_value = 0,
+ .max_value = 1,
+ },
+ [BDK_CONFIG_COREMASK] = {
+ .format = "BDK-COREMASK",
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = 0, /* Zero means all cores */
+ .min_value = 0,
+ .max_value = 0xffffffffffffll,
+ },
+ [BDK_CONFIG_BOOT_MENU_TIMEOUT] = {
+ .format = "BDK-BOOT-MENU-TIMEOUT",
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = 10, /* seconds */
+ .min_value = 0,
+ .max_value = 300,
+ },
+ [BDK_CONFIG_BOOT_PATH_OPTION] = {
+ .format = "BDK-BOOT-PATH-OPTION",
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = 0, /* 0 = normal, 1 = diagnostics */
+ .min_value = 0,
+ .max_value = 1,
+ },
+ [BDK_CONFIG_BOOT_NEXT_STAGE] = {
+ .format = "BDK-CONFIG-BOOT-NEXT-STAGE-%s",
+ .ctype = BDK_CONFIG_TYPE_STR,
+ },
+ [BDK_CONFIG_TRACE] = {
+ .format = "BDK-CONFIG-TRACE",
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = 0, /* bitmask */
+ .min_value = 0,
+ .max_value = 0x7fffffffffffffffull,
+ },
+
+ /* Chip feature items */
+ [BDK_CONFIG_MULTI_NODE] = {
+ .format = "MULTI-NODE", /* No parameters */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = 2, /* 2 = Auto */
+ .min_value = 0,
+ .max_value = 2,
+ },
+ [BDK_CONFIG_PCIE_EA] = {
+ .format = "PCIE-ENHANCED-ALLOCATION", /* No parameters */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = 1, /* 1 = EA supported, 0 = EA not supported */
+ .min_value = 0,
+ .max_value = 1,
+ },
+ [BDK_CONFIG_PCIE_ORDERING] = {
+ .format = "PCIE-ORDERING", /* No parameters */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = 0, /* 1 = Wait for commit, 0 = Don't wait for commit */
+ .min_value = 0,
+ .max_value = 1,
+ },
+ [BDK_CONFIG_PCIE_PRESET_REQUEST_VECTOR] = {
+ .format = "PCIE-PRESET-REQUEST-VECTOR.N%d.PORT%d", /* Parameters: Node, Port */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = 0x593, /* Value for PCIERCX_CFG554[PRV] */
+ .min_value = 0,
+ .max_value = 0xffff,
+ },
+ [BDK_CONFIG_PCIE_WIDTH] = {
+ .format = "PCIE-WIDTH.N%d.PORT%d", /* Parameters: Node, Port */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = -1, /* Width override for PCIe links */
+ .min_value = -1,
+ .max_value = 16,
+ },
+ [BDK_CONFIG_PCIE_PHYSICAL_SLOT] = {
+ .format = "PCIE-PHYSICAL-SLOT.N%d.PORT%d", /* Parameters: Node, Port */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = -1, /* Define which physical slot we connect to on the board */
+ .min_value = -1,
+ .max_value = 8191,
+ },
+ [BDK_CONFIG_PCIE_FLASH] = {
+ .format = "PCIE-FLASH.N%d.PORT%d", /* Parameters: Node, Port */
+ .ctype = BDK_CONFIG_TYPE_STR_LIST,
+ },
+ [BDK_CONFIG_CCPI_LANE_REVERSE] = {
+ .format = "CCPI-LANE-REVERSE", /* No parameters */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = 0, /* 0 = No forced lane reversal, 1 = forced lane reversal */
+ .min_value = 0,
+ .max_value = 1,
+ },
+ [BDK_CONFIG_CHIP_SKU] = {
+ .format = "CHIP-SKU.NODE%d", /* Parameter: Node */
+ .ctype = BDK_CONFIG_TYPE_STR,
+ .default_value = (long)"TBD",
+ },
+ [BDK_CONFIG_CHIP_SERIAL] = {
+ .format = "CHIP-SERIAL.NODE%d", /* Parameter: Node */
+ .ctype = BDK_CONFIG_TYPE_STR,
+ .default_value = (long)"TBD",
+ },
+ [BDK_CONFIG_CHIP_UNIQUE_ID] = {
+ .format = "CHIP-UNIQUE-ID.NODE%d", /* Parameter: Node */
+ .ctype = BDK_CONFIG_TYPE_STR,
+ .default_value = (long)"TBD",
+ },
+
+ /* QLM related config */
+ [BDK_CONFIG_QLM_AUTO_CONFIG] = {
+ .format = "QLM-AUTO-CONFIG", /* Parameters: Node */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = 0, /* 0 = off, 1 = on */
+ .min_value = 0,
+ .max_value = 1,
+ },
+ /* SFF8104 related QLM config */
+ [BDK_CONFIG_QLM_DIP_AUTO_CONFIG] = {
+ .format = "QLM-DIP-AUTO-CONFIG", /* Parameters: Node */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = 0, /* 0 = off, 1 = on */
+ .min_value = 0,
+ .max_value = 1,
+ },
+
+ [BDK_CONFIG_QLM_MODE] = {
+ .format = "QLM-MODE.N%d.QLM%d", /* Parameters: Node, QLM */
+ .ctype = BDK_CONFIG_TYPE_STR,
+ },
+ [BDK_CONFIG_QLM_FREQ] = {
+ .format = "QLM-FREQ.N%d.QLM%d", /* Parameters: Node, QLM */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = 0, /* Mhz */
+ .min_value = 0,
+ .max_value = 10312,
+ },
+ [BDK_CONFIG_QLM_CLK] = {
+ .format = "QLM-CLK.N%d.QLM%d", /* Parameters: Node, QLM */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = 2, /* 2 = External */
+ .min_value = 0,
+ .max_value = 2,
+ },
+ [BDK_CONFIG_QLM_TUNING_TX_SWING] = {
+ .format = "QLM-TUNING-TX-SWING.N%d.QLM%d.LANE%d", /* Parameters: Node, QLM, Lane */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = -1, /* Default of no tuning */
+ .min_value = -1,
+ .max_value = 31,
+ },
+ [BDK_CONFIG_QLM_TUNING_TX_PREMPTAP] = {
+ .format = "QLM-TUNING-TX-PREMPTAP.N%d.QLM%d.LANE%d", /* Parameters: Node, QLM, Lane */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = -1, /* Default of no tuning */
+ .min_value = -1,
+ .max_value = 511,
+ },
+ [BDK_CONFIG_QLM_TUNING_TX_GAIN] = {
+ .format = "QLM-TUNING-TX-GAIN.N%d.QLM%d.LANE%d", /* Parameters: Node, QLM, Lane */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = -1, /* Default of no tuning */
+ .min_value = -1,
+ .max_value = 7,
+ },
+ [BDK_CONFIG_QLM_TUNING_TX_VBOOST] = {
+ .format = "QLM-TUNING-TX-VBOOST.N%d.QLM%d.LANE%d", /* Parameters: Node, QLM, Lane */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = -1, /* Default of no tuning */
+ .min_value = -1,
+ .max_value = 1,
+ },
+ [BDK_CONFIG_QLM_CHANNEL_LOSS] = {
+ .format = "QLM-CHANNEL-LOSS.N%d.QLM%d", /* Parameters: Node, QLM */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = -1, /* Default will use Cavium defaults */
+ .min_value = -1,
+ .max_value = 40,
+ },
+
+ /* DRAM configuration options */
+ [BDK_CONFIG_DDR_SPEED] = {
+ .format = "DDR-SPEED.N%d", /* Parameters: Node */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = 0, /* In MT/s */
+ .min_value = 0,
+ .max_value = 2400,
+ },
+ [BDK_CONFIG_DDR_ALT_REFCLK] = {
+ .format = "DDR-ALT-REFCLK.N%d", /* Parameters: Node */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = 0, /* Mhz */
+ .min_value = 0,
+ .max_value = 100,
+ },
+ [BDK_CONFIG_DDR_SPD_ADDR] = {
+ .format = "DDR-CONFIG-SPD-ADDR.DIMM%d.LMC%d.N%d", /* Parameters: DIMM, LMC, Node */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = 0,
+ .min_value = 0,
+ .max_value = 0xffff,
+ },
+ [BDK_CONFIG_DDR_SPD_DATA] = {
+ .format = "DDR-CONFIG-SPD-DATA.DIMM%d.LMC%d.N%d", /* Parameters: DIMM, LMC, Node */
+ .ctype = BDK_CONFIG_TYPE_BINARY,
+ },
+ [BDK_CONFIG_DDR_RANKS_DQX_CTL] = {
+ .format = "DDR-CONFIG-DQX-CTL.RANKS%d.DIMMS%d.LMC%d.N%d", /* Parameters: Num Ranks, Num DIMMs, LMC, Node */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = 0,
+ .min_value = 0,
+ .max_value = 0xf,
+ },
+ [BDK_CONFIG_DDR_RANKS_WODT_MASK] = {
+ .format = "DDR-CONFIG-WODT-MASK.RANKS%d.DIMMS%d.LMC%d.N%d", /* Parameters: Num Ranks, Num DIMMs, LMC, Node */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = 0,
+ .min_value = 0,
+ .max_value = 0xfffffff,
+ },
+ [BDK_CONFIG_DDR_RANKS_MODE1_PASR] = {
+ .format = "DDR-CONFIG-MODE1-PASR.RANKS%d.DIMMS%d.RANK%d.LMC%d.N%d", /* Parameters: Num Ranks, Num DIMMs, Rank, LMC, Node */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = 0,
+ .min_value = 0,
+ .max_value = 0x7,
+ },
+ [BDK_CONFIG_DDR_RANKS_MODE1_ASR] = {
+ .format = "DDR-CONFIG-MODE1-ASR.RANKS%d.DIMMS%d.RANK%d.LMC%d.N%d", /* Parameters: Num Ranks, Num DIMMs, Rank, LMC, Node */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = 0,
+ .min_value = 0,
+ .max_value = 1,
+ },
+ [BDK_CONFIG_DDR_RANKS_MODE1_SRT] = {
+ .format = "DDR-CONFIG-MODE1-SRT.RANKS%d.DIMMS%d.RANK%d.LMC%d.N%d", /* Parameters: Num Ranks, Num DIMMs, Rank, LMC, Node */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = 0,
+ .min_value = 0,
+ .max_value = 1,
+ },
+ [BDK_CONFIG_DDR_RANKS_MODE1_RTT_WR] = {
+ .format = "DDR-CONFIG-MODE1-RTT-WR.RANKS%d.DIMMS%d.RANK%d.LMC%d.N%d", /* Parameters: Num Ranks, Num DIMMs, Rank, LMC, Node */
+ .ctype = BDK_CONFIG_TYPE_INT, // Split for extension bit
+ .default_value = 0,
+ .min_value = 0,
+ .max_value = 0x7,
+ },
+ [BDK_CONFIG_DDR_RANKS_MODE1_DIC] = {
+ .format = "DDR-CONFIG-MODE1-DIC.RANKS%d.DIMMS%d.RANK%d.LMC%d.N%d", /* Parameters: Num Ranks, Num DIMMs, Rank, LMC, Node */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = 0,
+ .min_value = 0,
+ .max_value = 0x3,
+ },
+ [BDK_CONFIG_DDR_RANKS_MODE1_RTT_NOM] = {
+ .format = "DDR-CONFIG-MODE1-RTT-NOM.RANKS%d.DIMMS%d.RANK%d.LMC%d.N%d", /* Parameters: Num Ranks, Num DIMMs, Rank, LMC, Node */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = 0,
+ .min_value = 0,
+ .max_value = 0x7,
+ },
+ [BDK_CONFIG_DDR_RANKS_MODE1_DB_OUTPUT_IMPEDANCE] = {
+ .format = "DDR-CONFIG-MODE1-DB-OUTPUT-IMPEDANCE.RANKS%d.DIMMS%d.LMC%d.N%d", /* Parameters: Num Ranks, Num DIMMs, LMC, Node */
+ .ctype = BDK_CONFIG_TYPE_INT, // Not per RANK, only one
+ .default_value = 0,
+ .min_value = 0,
+ .max_value = 0x7,
+ },
+ [BDK_CONFIG_DDR_RANKS_MODE2_RTT_PARK] = {
+ .format = "DDR-CONFIG-MODE2-RTT-PARK.RANKS%d.DIMMS%d.RANK%d.LMC%d.N%d", /* Parameters: Num Ranks, Num DIMMs, Rank, LMC, Node */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = 0,
+ .min_value = 0,
+ .max_value = 0x7,
+ },
+ [BDK_CONFIG_DDR_RANKS_MODE2_VREF_VALUE] = {
+ .format = "DDR-CONFIG-MODE2-VREF-VALUE.RANKS%d.DIMMS%d.RANK%d.LMC%d.N%d", /* Parameters: Num Ranks, Num DIMMs, Rank, LMC, Node */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = 0,
+ .min_value = 0,
+ .max_value = 0x3f,
+ },
+ [BDK_CONFIG_DDR_RANKS_MODE2_VREF_RANGE] = {
+ .format = "DDR-CONFIG-MODE2-VREF-RANGE.RANKS%d.DIMMS%d.RANK%d.LMC%d.N%d", /* Parameters: Num Ranks, Num DIMMs, Rank, LMC, Node */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = 0,
+ .min_value = 0,
+ .max_value = 1,
+ },
+ [BDK_CONFIG_DDR_RANKS_MODE2_VREFDQ_TRAIN_EN] = {
+ .format = "DDR-CONFIG-MODE2-VREFDQ-TRAIN-EN.RANKS%d.DIMMS%d.LMC%d.N%d", /* Parameters: Num Ranks, Num DIMMs, LMC, Node */
+ .ctype = BDK_CONFIG_TYPE_INT, // Not per RANK, only one
+ .default_value = 0,
+ .min_value = 0,
+ .max_value = 1,
+ },
+
+ [BDK_CONFIG_DDR_RANKS_RODT_CTL] = {
+ .format = "DDR-CONFIG-RODT-CTL.RANKS%d.DIMMS%d.LMC%d.N%d", /* Parameters: Num Ranks, Num DIMMs, LMC, Node */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = 0,
+ .min_value = 0,
+ .max_value = 0xf,
+ },
+ [BDK_CONFIG_DDR_RANKS_RODT_MASK] = {
+ .format = "DDR-CONFIG-RODT-MASK.RANKS%d.DIMMS%d.LMC%d.N%d", /* Parameters: Num Ranks, Num DIMMs, LMC, Node */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = 0,
+ .min_value = 0,
+ .max_value = 0xfffffff,
+ },
+ [BDK_CONFIG_DDR_CUSTOM_MIN_RTT_NOM_IDX] = {
+ .format = "DDR-CONFIG-CUSTOM-MIN-RTT-NOM-IDX.LMC%d.N%d", /* Parameters: LMC, Node */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = 1,
+ .min_value = 0,
+ .max_value = 7,
+ },
+ [BDK_CONFIG_DDR_CUSTOM_MAX_RTT_NOM_IDX] = {
+ .format = "DDR-CONFIG-CUSTOM-MAX-RTT-NOM-IDX.LMC%d.N%d", /* Parameters: LMC, Node */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = 5,
+ .min_value = 0,
+ .max_value = 7,
+ },
+ [BDK_CONFIG_DDR_CUSTOM_MIN_RODT_CTL] = {
+ .format = "DDR-CONFIG-CUSTOM-MIN-RODT-CTL.LMC%d.N%d", /* Parameters: LMC, Node */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = 1,
+ .min_value = 0,
+ .max_value = 7,
+ },
+ [BDK_CONFIG_DDR_CUSTOM_MAX_RODT_CTL] = {
+ .format = "DDR-CONFIG-CUSTOM-MAX-RODT-CTL.LMC%d.N%d", /* Parameters: LMC, Node */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = 5,
+ .min_value = 0,
+ .max_value = 7,
+ },
+ [BDK_CONFIG_DDR_CUSTOM_CK_CTL] = {
+ .format = "DDR-CONFIG-CUSTOM-CK-CTL.LMC%d.N%d", /* Parameters: LMC, Node */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = 0,
+ .min_value = 0,
+ .max_value = 0xffff,
+ },
+ [BDK_CONFIG_DDR_CUSTOM_CMD_CTL] = {
+ .format = "DDR-CONFIG-CUSTOM-CMD-CTL.LMC%d.N%d", /* Parameters: LMC, Node */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = 0,
+ .min_value = 0,
+ .max_value = 0xffff,
+ },
+ [BDK_CONFIG_DDR_CUSTOM_CTL_CTL] = {
+ .format = "DDR-CONFIG-CUSTOM-CTL-CTL.LMC%d.N%d", /* Parameters: LMC, Node */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = 0,
+ .min_value = 0,
+ .max_value = 0xf,
+ },
+ [BDK_CONFIG_DDR_CUSTOM_MIN_CAS_LATENCY] = {
+ .format = "DDR-CONFIG-CUSTOM-MIN-CAS-LATENCY.LMC%d.N%d", /* Parameters: LMC, Node */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = 0,
+ .min_value = 0,
+ .max_value = 0xffff,
+ },
+ [BDK_CONFIG_DDR_CUSTOM_OFFSET_EN] = {
+ .format = "DDR-CONFIG-CUSTOM-OFFSET-EN.LMC%d.N%d", /* Parameters: LMC, Node */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = 1,
+ .min_value = 0,
+ .max_value = 1,
+ },
+ [BDK_CONFIG_DDR_CUSTOM_OFFSET] = {
+ .format = "DDR-CONFIG-CUSTOM-OFFSET.%s.LMC%d.N%d", /* Parameters: Type(UDIMM,RDIMM), LMC, Node */
+ .ctype = BDK_CONFIG_TYPE_INT, // UDIMM or RDIMM
+ .default_value = 0,
+ .min_value = 0,
+ .max_value = 0xf,
+ },
+ [BDK_CONFIG_DDR_CUSTOM_RLEVEL_COMPUTE] = {
+ .format = "DDR-CONFIG-CUSTOM-RLEVEL-COMPUTE.LMC%d.N%d", /* Parameters: LMC, Node */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = 0,
+ .min_value = 0,
+ .max_value = 1,
+ },
+ [BDK_CONFIG_DDR_CUSTOM_RLEVEL_COMP_OFFSET] = {
+ .format = "DDR-CONFIG-CUSTOM-RLEVEL-COMP-OFFSET.%s.LMC%d.N%d", /* Parameters: Type(UDIMM,RDIMM), LMC, Node */
+ .ctype = BDK_CONFIG_TYPE_INT, // UDIMM or RDIMM
+ .default_value = 2,
+ .min_value = 0,
+ .max_value = 0xffff,
+ },
+ [BDK_CONFIG_DDR_CUSTOM_DDR2T] = {
+ .format = "DDR-CONFIG-CUSTOM-DDR2T.%s.LMC%d.N%d", /* Parameters: Type(UDIMM,RDIMM), LMC, Node */
+ .ctype = BDK_CONFIG_TYPE_INT, // UDIMM or RDIMM
+ .default_value = 0,
+ .min_value = 0,
+ .max_value = 1,
+ },
+ [BDK_CONFIG_DDR_CUSTOM_DISABLE_SEQUENTIAL_DELAY_CHECK] = {
+ .format = "DDR-CONFIG-CUSTOM-DISABLE-SEQUENTIAL-DELAY-CHECK.LMC%d.N%d", /* Parameters: LMC, Node */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = 0,
+ .min_value = 0,
+ .max_value = 1,
+ },
+ [BDK_CONFIG_DDR_CUSTOM_MAXIMUM_ADJACENT_RLEVEL_DELAY_INCREMENT] = {
+ .format = "DDR-CONFIG-CUSTOM-MAXIMUM-ADJACENT-RLEVEL-DELAY-INCREMENT.LMC%d.N%d", /* Parameters: LMC, Node */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = 0,
+ .min_value = 0,
+ .max_value = 0xffff,
+ },
+ [BDK_CONFIG_DDR_CUSTOM_PARITY] = {
+ .format = "DDR-CONFIG-CUSTOM-PARITY.LMC%d.N%d", /* Parameters: LMC, Node */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = 0,
+ .min_value = 0,
+ .max_value = 1,
+ },
+ [BDK_CONFIG_DDR_CUSTOM_FPRCH2] = {
+ .format = "DDR-CONFIG-CUSTOM-FPRCH2.LMC%d.N%d", /* Parameters: LMC, Node */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = 0,
+ .min_value = 0,
+ .max_value = 0xf,
+ },
+ [BDK_CONFIG_DDR_CUSTOM_MODE32B] = {
+ .format = "DDR-CONFIG-CUSTOM-MODE32B.LMC%d.N%d", /* Parameters: LMC, Node */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = 0,
+ .min_value = 0,
+ .max_value = 1,
+ },
+ [BDK_CONFIG_DDR_CUSTOM_MEASURED_VREF] = {
+ .format = "DDR-CONFIG-CUSTOM-MEASURED-VREF.LMC%d.N%d", /* Parameters: LMC, Node */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = 0,
+ .min_value = 0,
+ .max_value = 1,
+ },
+ [BDK_CONFIG_DDR_CUSTOM_DLL_WRITE_OFFSET] = {
+ .format = "DDR-CONFIG-CUSTOM-DLL-WRITE-OFFSET.BYTE%d.LMC%d.N%d", /* Parameters: Byte, LMC, Node */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = 0,
+ .min_value = -63,
+ .max_value = 63,
+ },
+ [BDK_CONFIG_DDR_CUSTOM_DLL_READ_OFFSET] = {
+ .format = "DDR-CONFIG-CUSTOM-DLL-READ-OFFSET.BYTE%d.LMC%d.N%d", /* Parameters: Byte, LMC, Node */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = 0,
+ .min_value = -63,
+ .max_value = 63,
+ },
+
+ /* High level DRAM options */
+ [BDK_CONFIG_DRAM_VERBOSE] = {
+ .format = "DDR-VERBOSE", /* Parameters: Node */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = 0, /* 0 = off */
+ .min_value = 0,
+ .max_value = 255,
+ },
+ [BDK_CONFIG_DRAM_BOOT_TEST] = {
+ .format = "DDR-TEST-BOOT", /* No parameters */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = 0, /* 0 = off, 1 = on */
+ .min_value = 0,
+ .max_value = 1,
+ },
+ [BDK_CONFIG_DRAM_CONFIG_GPIO] = {
+ .format = "DDR-CONFIG-GPIO", /* No parameters */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = -1, /* -1 = disabled, otherwise GPIO number */
+ .min_value = -1,
+ .max_value = 63,
+ },
+ [BDK_CONFIG_DRAM_SCRAMBLE] = {
+ .format = "DDR-CONFIG-SCRAMBLE", /* No parameters */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = 2, /* 0=off, 1=on, 2=trust on, non-trust off */
+ .min_value = 0,
+ .max_value = 2,
+ },
+
+ /* USB */
+ [BDK_CONFIG_USB_PWR_GPIO] = {
+ .format = "USB-PWR-GPIO.N%d.PORT%d", /* Parameters: Node, Port */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = -1, /* GPIO number, or -1 for none */
+ .min_value = -1,
+ .max_value = 49,
+ },
+ [BDK_CONFIG_USB_PWR_GPIO_POLARITY] = {
+ .format = "USB-PWR-GPIO-POLARITY.N%d.PORT%d", /* Parameters: Node, Port */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = 1, /* GPIO polarity: 1=high, 0=low */
+ .min_value = 0,
+ .max_value = 1,
+ },
+ [BDK_CONFIG_USB_REFCLK_SRC] = {
+ .format = "USB-REFCLK-SRC.N%d.PORT%d", /* Parameters: Node, Port */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = 0, /* Clock Source (SS:HS)
+ ** 0 - SS(USB_REF_CLK) HS(USB_REF_CLK)
+ ** 1 - SS(DLMC_REF_CLK0) HS(DLMC_REF_CLK0)
+ ** 2 - SS(DLMC_REF_CLK1) HS(DLMC_REF_CLK1)
+ ** 3 - SS(USB_REF_CLK) HS(PLL_REF_CLK)
+ ** 4 - SS(DLMC_REF_CLK0) HS(PLL_REF_CLK)
+ ** 5 - SS(DLMC_REF_CLK1) HS(PLL_REF_CLK)
+ */
+ .min_value = 0,
+ .max_value = 5,
+ },
+
+ /* Nitrox reset - For CN88XX SC and SNT part. High drives Nitrox DC_OK high */
+ [BDK_CONFIG_NITROX_GPIO] = {
+ .format = "NITROX-GPIO.N%d", /* Parameters: Node */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = -1, /* GPIO number, or -1 for none */
+ .min_value = -1,
+ .max_value = 49,
+ },
+
+ /* How EYE diagrams are captured from a QLM */
+ [BDK_CONFIG_EYE_ZEROS] = {
+ .format = "QLM-EYE-NUM-ZEROS", /* No parameters */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = 2,
+ .min_value = 1,
+ .max_value = 63,
+ },
+ [BDK_CONFIG_EYE_SAMPLE_TIME] = {
+ .format = "QLM-EYE-SAMPLE-TIME", /* No parameters */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = 400, /* us */
+ .min_value = 20, /* us */
+ .max_value = 10000000, /* us */
+ },
+ [BDK_CONFIG_EYE_SETTLE_TIME] = {
+ .format = "QLM-EYE-SETTLE-TIME", /* No parameters */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = 50, /* us */
+ .min_value = 20, /* us */
+ .max_value = 100000, /* us */
+ },
+
+ /* SGPIO */
+ [BDK_CONFIG_SGPIO_SCLOCK_FREQ] = {
+ .format = "SGPIO-SCLOCK-FREQ.N%d", /* Parameters: Node */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = 10000, /* Hz */
+ .min_value = 128, /* Hz */
+ .max_value = 100000, /* Hz */
+ },
+ [BDK_CONFIG_SGPIO_PIN_POWER] = {
+ .format = "SGPIO-PIN-POWER.N%d", /* Parameters: Node */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = -1, /* GPIO number, or -1 for none */
+ .min_value = -1,
+ .max_value = 50,
+ },
+ [BDK_CONFIG_SGPIO_PIN_SCLOCK] = {
+ .format = "SGPIO-PIN-SCLOCK.N%d", /* Parameters: Node */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = -1, /* GPIO number, or -1 for none */
+ .min_value = -1,
+ .max_value = 50,
+ },
+ [BDK_CONFIG_SGPIO_PIN_SLOAD] = {
+ .format = "SGPIO-PIN-SLOAD.N%d", /* Parameters: Node */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = -1, /* GPIO number, or -1 for none */
+ .min_value = -1,
+ .max_value = 50,
+ },
+ [BDK_CONFIG_SGPIO_PIN_SDATAOUT] = {
+ .format = "SGPIO-PIN-SDATAOUT.N%d.D%d", /* Parameters: Node, Dataline */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = -1, /* GPIO number, or -1 for none */
+ .min_value = -1,
+ .max_value = 50,
+ },
+
+ /* VRM temperature throttling */
+ [BDK_CONFIG_VRM_TEMP_TRIP] = {
+ .format = "VRM-TEMP-TRIP.N%d", /* Parameters: Node */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = 110, /* Degrees C */
+ .min_value = 0, /* Degrees C */
+ .max_value = 110, /* Degrees C. Max die temp plus 5 for uncertainty of measurement */
+ },
+ [BDK_CONFIG_VRM_TEMP_HIGH] = {
+ .format = "VRM-TEMP-HIGH.N%d", /* Parameters: Node */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = 110, /* Degrees C */
+ .min_value = 0, /* Degrees C */
+ .max_value = 110, /* Degrees C. Max die temp plus 5 for uncertainty of measurement */
+ },
+ [BDK_CONFIG_VRM_TEMP_LOW] = {
+ .format = "VRM-TEMP-LOW.N%d", /* Parameters: Node */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = 100, /* Degrees C */
+ .min_value = 0, /* Degrees C */
+ .max_value = 110, /* Degrees C. Max die temp plus 5 for uncertainty of measurement */
+ },
+ [BDK_CONFIG_VRM_THROTTLE_NORMAL] = {
+ .format = "VRM-THROTTLE-NORMAL.N%d", /* Parameters: Node */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = 65, /* Percentage */
+ .min_value = 1, /* Percentage */
+ .max_value = 100, /* Percentage */
+ },
+ [BDK_CONFIG_VRM_THROTTLE_THERM] = {
+ .format = "VRM-THROTTLE-THERM.N%d", /* Parameters: Node */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = 15, /* Percentage */
+ .min_value = 1, /* Percentage */
+ .max_value = 100, /* Percentage */
+ },
+
+ /* Generic GPIO, unrelated to a specific block */
+ [BDK_CONFIG_GPIO_PIN_SELECT] = {
+ .format = "GPIO-PIN-SELECT-GPIO%d.N%d", /* Parameters: GPIO, Node */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = -1, /* Hardware default, normal GPIO pin */
+ .min_value = 0, /* GPIO_PIN_SEL_E enumeration */
+ .max_value = 65535, /* GPIO_PIN_SEL_E enumeration */
+ },
+ [BDK_CONFIG_GPIO_POLARITY] = {
+ .format = "GPIO-POLARITY-GPIO%d.N%d", /* Parameters: GPIO, Node */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = 0, /* Hardware default, not inverted */
+ .min_value = 0, /* Not inverted */
+ .max_value = 1, /* Inverted */
+ },
+
+ /* PBUS */
+ [BDK_CONFIG_PBUS_CFG] = {
+ .format = "PBUS-CFG.REGION%d.N%d", /* Parameters: Region, Node */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = 0, /* Hardware default */
+ .min_value = 0, /* No change */
+ .max_value = 0x0000ffffffffffffll, /* PBUS_REGX_CFG value */
+ },
+ [BDK_CONFIG_PBUS_TIM] = {
+ .format = "PBUS-TIM.REGION%d.N%d", /* Parameters: Region, Node */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = 0, /* Hardware default, not inverted */
+ .min_value = 0x8000000000000000ll, /* PBUS_REGX_TIM value, zero is no change */
+ .max_value = 0x7fffffffffffffffll, /* PBUS_REGX_TIM value */
+ },
+
+ /* Trusted boot information */
+ [BDK_CONFIG_TRUST_CSIB] = {
+ .format = "TRUST-CSIB", /* No parameters */
+ .ctype = BDK_CONFIG_TYPE_BINARY,
+ .default_value = 0, /* Hardware default */
+ },
+ [BDK_CONFIG_TRUST_ROT_ADDR] = {
+ .format = "TRUST-ROT-ADDR", /* No parameters */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = 0, /* Non-trusted */
+ .min_value = 0, /* No key */
+ .max_value = 0x0000ffffffffffffll, /* Address in key memory */
+ },
+ [BDK_CONFIG_TRUST_BSSK_ADDR] = {
+ .format = "TRUST-BSSK-ADDR", /* No parameters */
+ .ctype = BDK_CONFIG_TYPE_INT,
+ .default_value = 0, /* No HUK, so no BSSK */
+ .min_value = 0, /* No HUK, so no BSSK */
+ .max_value = 0x0000ffffffffffffll, /* Address in key memory */
+ },
+};
+
+/**
+ * Look up a configuration item in the environment.
+ *
+ * @param name
+ *
+ * @return
+ */
+static const char *get_value(const char *name, int *blob_size)
+{
+ if (!config_fdt)
+ {
+ bdk_error("bdk-config asked for %s before configuration loaded\n", name);
+ return NULL;
+ }
+
+ char n[64];
+ strncpy(n, name, sizeof(n));
+ n[sizeof(n)-1] = '\0';
+
+ while (*n)
+ {
+ const char *val = fdt_getprop(config_fdt, config_node, n, blob_size);
+ if (val)
+ return val;
+
+ char *p = strrchr(n, '.');
+ if (p)
+ *p = '\0';
+ else
+ break;
+ }
+ return NULL;
+}
+
+/**
+ * Get an integer configuration item
+ *
+ * @param cfg_item Config item to get. If the item takes parameters (see bdk_config_t), then the
+ * parameters are listed following cfg_item.
+ *
+ * @return The value of the configuration item, or def_value if the item is not set
+ */
+int64_t bdk_config_get_int(bdk_config_t cfg_item, ...)
+{
+ /* Make sure the correct access function was called */
+ if (config_info[cfg_item].ctype != BDK_CONFIG_TYPE_INT)
+ bdk_fatal("bdk_config_get_int() called for %s, not an int\n",
+ config_info[cfg_item].format);
+
+ char name[64];
+ va_list args;
+ va_start(args, cfg_item);
+ vsnprintf(name, sizeof(name)-1, config_info[cfg_item].format, args);
+ va_end(args);
+
+ const char *val = get_value(name, NULL);
+ if (val)
+ {
+ int count;
+ int64_t tmp;
+ if ((val[0] == '0') && (val[1] == 'x'))
+ count = sscanf(val + 2, "%lx", &tmp);
+ else
+ count = sscanf(val, "%li", &tmp);
+ if (count == 1)
+ {
+ if ((tmp < config_info[cfg_item].min_value) || (tmp > config_info[cfg_item].max_value))
+ {
+ bdk_warn("Out of range for %s = \"%s\", using default\n", name, val);
+ return config_info[cfg_item].default_value;
+ }
+ return tmp;
+ }
+ else
+ {
+ bdk_warn("Failed to parse %s = \"%s\", using default\n", name, val);
+ return config_info[cfg_item].default_value;
+ }
+ }
+ else
+ return config_info[cfg_item].default_value;
+}
+
+/**
+ * Get a string configuration item
+ *
+ * @param cfg_item Config item to get. If the item takes parameters (see bdk_config_t), then the
+ * parameters are listed following cfg_item.
+ *
+ * @return The value of the configuration item, or def_value if the item is not set
+ */
+const char *bdk_config_get_str(bdk_config_t cfg_item, ...)
+{
+ /* Make sure the correct access function was called */
+ if (config_info[cfg_item].ctype != BDK_CONFIG_TYPE_STR)
+ bdk_fatal("bdk_config_get_str() called for %s, not a str\n",
+ config_info[cfg_item].format);
+
+ char name[64];
+ va_list args;
+ va_start(args, cfg_item);
+ vsnprintf(name, sizeof(name)-1, config_info[cfg_item].format, args);
+
+ if (BDK_CONFIG_QLM_MODE == cfg_item)
+ {
+ char name2[64];
+ vsnprintf(name2, sizeof(name2)-1,"QLM-MODE.N%d.DLM%d" , args);
+ const char *val = get_value(name2, NULL);
+ if (val)
+ bdk_warn("%s: QLM-MODE.N%%d.DLM%%d format depricated. Please use QLM-MODE.N%%d.QLM%%d instead\n", name2);
+
+ }
+ va_end(args);
+
+ const char *val = get_value(name, NULL);
+ if (val)
+ return val;
+ else
+ return (const char *)config_info[cfg_item].default_value;
+}
+
+/**
+ * Get a binary blob
+ *
+ * @param blob_size Integer to receive the size of the blob
+ * @param cfg_item Config item to get. If the item takes parameters (see bdk_config_t), then the
+ * parameters are listed following cfg_item.
+ *
+ * @return The value of the configuration item, or def_value if the item is not set
+ */
+const void* bdk_config_get_blob(int *blob_size, bdk_config_t cfg_item, ...)
+{
+ char name[64];
+ va_list args;
+ va_start(args, cfg_item);
+ vsnprintf(name, sizeof(name)-1, config_info[cfg_item].format, args);
+ va_end(args);
+
+ const void *val = get_value(name, blob_size);
+ if (val)
+ return val;
+ else
+ return (const void *)config_info[cfg_item].default_value;
+}
+
+/**
+ * Set an integer configuration item. Note this only sets the item in memory,
+ * persistent storage is not updated. The optional parameters for the setting are
+ * not supplied, meaning this function only changes the global default.
+ *
+ * @param value Configuration item value
+ * @param cfg_item Config item to set. If the item takes parameters (see bdk_config_t), then the
+ * parameters are listed following cfg_item.
+ */
+void bdk_config_set_int_no_param(int64_t value, bdk_config_t cfg_item)
+{
+ /* Make sure the correct access function was called */
+ if (config_info[cfg_item].ctype != BDK_CONFIG_TYPE_INT)
+ bdk_fatal("bdk_config_set_int_no_param() called for %s, not an int\n",
+ config_info[cfg_item].format);
+
+ char name[64];
+ char valstr[20];
+ /* Create a name without the optional parameters */
+ strncpy(name, config_info[cfg_item].format, sizeof(name) - 1);
+ name[sizeof(name) - 1] = 0;
+ char *ptr = strchr(name, '.');
+ if (ptr)
+ *ptr = 0;
+
+ if (!config_fdt)
+ {
+ bdk_error("bdk-config set %s before configuration loaded\n", name);
+ return;
+ }
+ if ((value < config_info[cfg_item].min_value) || (value > config_info[cfg_item].max_value))
+ {
+ bdk_error("Set out of range for %s = \"0x%lx\", ignoring\n", name, value);
+ return;
+ }
+
+ if (value < 10)
+ snprintf(valstr, sizeof(valstr), "%ld", value);
+ else
+ snprintf(valstr, sizeof(valstr), "0x%lx", value);
+
+ int status = fdt_setprop_string(config_fdt, config_node, name, valstr);
+ if (status < 0)
+ bdk_fatal("Failed to set %s=%s in FDT\n", name, valstr);
+}
+
+/**
+ * Set an integer configuration item. Note this only sets the item in memory,
+ * persistent storage is not updated.
+ *
+ * @param value Configuration item value
+ * @param cfg_item Config item to set. If the item takes parameters (see bdk_config_t), then the
+ * parameters are listed following cfg_item.
+ */
+void bdk_config_set_int(int64_t value, bdk_config_t cfg_item, ...)
+{
+ /* Make sure the correct access function was called */
+ if (config_info[cfg_item].ctype != BDK_CONFIG_TYPE_INT)
+ bdk_fatal("bdk_config_set_int() called for %s, not an int\n",
+ config_info[cfg_item].format);
+
+ char name[64];
+ char valstr[20];
+ va_list args;
+ va_start(args, cfg_item);
+ vsnprintf(name, sizeof(name)-1, config_info[cfg_item].format, args);
+ va_end(args);
+
+ if (!config_fdt)
+ {
+ bdk_error("bdk-config set %s before configuration loaded\n", name);
+ return;
+ }
+ if ((value < config_info[cfg_item].min_value) || (value > config_info[cfg_item].max_value))
+ {
+ bdk_error("Set out of range for %s = \"0x%lx\", ignoring\n", name, value);
+ return;
+ }
+
+ if (value < 10)
+ snprintf(valstr, sizeof(valstr), "%ld", value);
+ else
+ snprintf(valstr, sizeof(valstr), "0x%lx", value);
+
+ int status = fdt_setprop_string(config_fdt, config_node, name, valstr);
+ if (status < 0)
+ bdk_fatal("Failed to set %s=%s in FDT\n", name, valstr);
+}
+
+/**
+ * Set an integer configuration item. Note this only sets the item in memory,
+ * persistent storage is not updated.
+ *
+ * @param value Configuration item value
+ * @param cfg_item Config item to set. If the item takes parameters (see bdk_config_t), then the
+ * parameters are listed following cfg_item.
+ */
+void bdk_config_set_str(const char *value, bdk_config_t cfg_item, ...)
+{
+ /* Make sure the correct access function was called */
+ if (config_info[cfg_item].ctype != BDK_CONFIG_TYPE_STR)
+ bdk_fatal("bdk_config_set_str() called for %s, not a str\n",
+ config_info[cfg_item].format);
+
+ char name[64];
+ va_list args;
+
+ va_start(args, cfg_item);
+ vsnprintf(name, sizeof(name)-1, config_info[cfg_item].format, args);
+ va_end(args);
+
+ if (!config_fdt)
+ {
+ bdk_error("bdk-config set %s before configuration loaded\n", name);
+ return;
+ }
+
+ int status;
+ if (value)
+ status = fdt_setprop_string(config_fdt, config_node, name, value);
+ else
+ status = fdt_delprop(config_fdt, config_node, name);
+
+ if ((status < 0) && (status != -FDT_ERR_NOTFOUND))
+ bdk_fatal("Failed to set %s=%s in FDT\n", name, value);
+}
+
+/**
+ * Set a blob configuration item. Note this only sets the
+ * item in memory, persistent storage is not updated. The optional
+ * parameters for the setting are not supplied, meaning this function
+ * only changes the global default.
+ *
+ * @param size Size of the item in bytes. A size of zero removes the device tree field
+ * @param value Configuration item value
+ * @param cfg_item Config item to set. If the item takes parameters (see bdk_config_t), then the
+ * parameters are listed following cfg_item.
+ */
+void bdk_config_set_blob_no_param(int size, const void *value, bdk_config_t cfg_item)
+{
+ /* Make sure the correct access function was called */
+ if ((config_info[cfg_item].ctype != BDK_CONFIG_TYPE_BINARY) &&
+ (config_info[cfg_item].ctype != BDK_CONFIG_TYPE_STR_LIST))
+ bdk_fatal("bdk_config_set_blob() called for %s, not binary\n",
+ config_info[cfg_item].format);
+
+ char name[64];
+ /* Create a name without the optional parameters */
+ strncpy(name, config_info[cfg_item].format, sizeof(name) - 1);
+ name[sizeof(name) - 1] = 0;
+ char *ptr = strchr(name, '.');
+ if (ptr)
+ *ptr = 0;
+
+ if (!config_fdt)
+ {
+ bdk_error("bdk-config set %s before configuration loaded\n", name);
+ return;
+ }
+
+ int status;
+ if (size)
+ status = fdt_setprop(config_fdt, config_node, name, value, size);
+ else
+ status = fdt_delprop(config_fdt, config_node, name);
+
+ if ((status < 0) && (status != -FDT_ERR_NOTFOUND))
+ bdk_fatal("Failed to set %s in FDT\n", name);
+}
+
+/**
+ * Set a blob configuration item. Note this only sets the
+ * item in memory, persistent storage is not updated.
+ *
+ * @param size Size of the item in bytes. A size of zero removes the device tree field
+ * @param value Configuration item value
+ * @param cfg_item Config item to set. If the item takes parameters (see bdk_config_t), then the
+ * parameters are listed following cfg_item.
+ */
+void bdk_config_set_blob(int size, const void *value, bdk_config_t cfg_item, ...)
+{
+ /* Make sure the correct access function was called */
+ if ((config_info[cfg_item].ctype != BDK_CONFIG_TYPE_BINARY) &&
+ (config_info[cfg_item].ctype != BDK_CONFIG_TYPE_STR_LIST))
+ bdk_fatal("bdk_config_set_blob() called for %s, not binary\n",
+ config_info[cfg_item].format);
+
+ char name[64];
+ va_list args;
+
+ va_start(args, cfg_item);
+ vsnprintf(name, sizeof(name)-1, config_info[cfg_item].format, args);
+ va_end(args);
+
+ if (!config_fdt)
+ {
+ bdk_error("bdk-config set %s before configuration loaded\n", name);
+ return;
+ }
+
+ int status;
+ if (size)
+ status = fdt_setprop(config_fdt, config_node, name, value, size);
+ else
+ status = fdt_delprop(config_fdt, config_node, name);
+
+ if ((status < 0) && (status != -FDT_ERR_NOTFOUND))
+ bdk_fatal("Failed to set %s in FDT\n", name);
+}
+
+/**
+ * Multiple functions need to display the config item help string in a format
+ * suitable for inclusion in a device tree. This function displays the help
+ * message properly indented and such.
+ *
+ * @param cfg Config item to display help for
+ */
+static void display_help(bdk_config_t cfg)
+{
+ /* Print the help text as a comment before the entry */
+ /* Indent with tabs like Linux requires */
+ printf("\n");
+ printf("\t/* ");
+ const char *ptr = bdk_config_get_help(cfg);
+ while (*ptr)
+ {
+ putchar(*ptr);
+ if (*ptr == '\n')
+ putchar('\t');
+ ptr++;
+ }
+ printf(" */\n");
+ /* Print the parameter and its default value a comment. This will be
+ a reference that is easy for the user to change */
+ printf("\t//%s = ", config_info[cfg].format);
+ switch (config_info[cfg].ctype)
+ {
+ case BDK_CONFIG_TYPE_INT:
+ if (config_info[cfg].default_value < 10)
+ printf("\"%ld\"", config_info[cfg].default_value);
+ else
+ printf("\"0x%lx\"", config_info[cfg].default_value);
+ break;
+ case BDK_CONFIG_TYPE_STR:
+ case BDK_CONFIG_TYPE_STR_LIST:
+ if (config_info[cfg].default_value)
+ printf("\"%s\"", (const char *)config_info[cfg].default_value);
+ else
+ printf("\"\"");
+ break;
+ case BDK_CONFIG_TYPE_BINARY:
+ printf("[]");
+ break;
+ }
+ printf(";\n");
+}
+
+/**
+ * Display the active configuration as a valid device tree
+ */
+void bdk_config_show(void)
+{
+ /* Output the standard DTS headers */
+ printf("/dts-v1/;\n");
+ printf("\n");
+ printf("/ {\n");
+ printf("cavium,bdk {\n");
+ for (bdk_config_t cfg = 0; cfg < __BDK_CONFIG_END; cfg++)
+ {
+ /* Show the help message */
+ display_help(cfg);
+
+ /* Figure out how much of the config item is fixed versus
+ the optional parameters */
+ const char *format = config_info[cfg].format;
+ const char *format_param = strchr(format, '.');
+ int format_length = 0;
+ if (format_param)
+ format_length = format_param - format;
+
+ /* Loop through all device tree entries displaying the ones that
+ match this format */
+ int offset = fdt_first_property_offset(config_fdt, config_node);
+ while (offset >= 0)
+ {
+ /* Get the device tree item */
+ const char *name = NULL;
+ int data_size = 0;
+ const char *data = fdt_getprop_by_offset(config_fdt, offset, &name, &data_size);
+ const char *data_end = data + data_size;
+ /* Find the first param */
+ const char *name_param = strchr(name, '.');
+ int name_length = 0;
+ if (name_param)
+ {
+ /* We want to compare up to the first param */
+ name_length = name_param - name;
+ /* If the lengths are different not including the parameters,
+ then we force a full matchn which will always fail */
+ if (name_length != format_length)
+ name_length = 0;
+ }
+ else /* No params, match base of format */
+ name_length = format_length;
+
+ /* Check if it matches the current config format */
+ int match;
+ if (name_length)
+ {
+ /* Check the prefix */
+ match = strncmp(name, format, name_length);
+ if (match == 0)
+ {
+ /* Prefix matched. We only really match if the next
+ character is the end of the string or a '.' */
+ if ((name[name_length] != 0) && (name[name_length] != '.'))
+ match = 1;
+ }
+ }
+ else
+ match = strcmp(name, format);
+ /* Print matching entries */
+ if (match == 0)
+ {
+ if (config_info[cfg].ctype == BDK_CONFIG_TYPE_BINARY)
+ {
+ printf("\t%s = [", name);
+ const char *ptr = data;
+ while (ptr < data_end)
+ {
+ printf(" %02x", (int)*ptr);
+ ptr++;
+ }
+ printf(" ]");
+ }
+ else
+ {
+ printf("\t%s = \"%s\"", name, data);
+ data += strlen(data) + 1;
+ while (data < data_end)
+ {
+ printf(",\n\t\t\"%s\"", data);
+ data += strlen(data) + 1;
+ }
+ }
+ printf(";\n");
+ }
+ offset = fdt_next_property_offset(config_fdt, offset);
+ }
+ }
+ /* Output the standard DTS footers */
+ printf("}; /* cavium,bdk */\n");
+ printf("}; /* / */\n");
+}
+
+/**
+ * Display a list of all possible config items with help text
+ */
+void bdk_config_help(void)
+{
+ /* Write out formatted as part of a device tree source (dts) file */
+ printf("/dts-v1/;\n");
+ printf("\n");
+ printf("/ {\n");
+ printf("cavium,bdk {\n");
+ for (bdk_config_t cfg = 0; cfg < __BDK_CONFIG_END; cfg++)
+ display_help(cfg);
+ printf("}; /* cavium,bdk */\n");
+ printf("}; /* / */\n");
+}
+
+
+/**
+ * Save the current configuration to flash
+ *
+ * @return Zero on success, negative on failure
+ */
+int bdk_config_save(void)
+{
+ /* Pack the FDT so it uses less space */
+ int status = fdt_pack(config_fdt);
+ if (status < 0)
+ {
+ bdk_error("FDT error %d: %s\n", status, fdt_strerror(status));
+ return -1;
+ }
+
+ /* Calculate a CRC32 of the FDT */
+ int fdt_size = fdt_totalsize(config_fdt);
+ uint32_t crc32 = bdk_crc32(config_fdt, fdt_size, 0);
+
+ /* Open the output file */
+ FILE *outf = fopen("/fatfs/default.dtb", "wb");
+ if (!outf)
+ {
+ bdk_error("Failed to open flash");
+ return -1;
+ }
+
+ /* Write the FDT */
+ if (fwrite(config_fdt, fdt_size, 1, outf) != 1)
+ {
+ bdk_error("Failed to write FDT");
+ fclose(outf);
+ return -1;
+ }
+
+ /* Save the CRC32 in the same endianness as the FDT */
+ crc32 = cpu_to_fdt32(crc32);
+ if (fwrite(&crc32, sizeof(crc32), 1, outf) != 1)
+ {
+ bdk_error("Failed to write FDT CRC32");
+ fclose(outf);
+ return -1;
+ }
+
+ fclose(outf);
+ return 0;
+}
+
+/**
+ * Takes the current live device tree and exports it to a memory address suitable
+ * for passing to the next binary in register X1.
+ *
+ * @return Physical address of the device tree, or 0 on failure
+ */
+uint64_t __bdk_config_export_to_mem(void)
+{
+ void *end_ptr = sbrk(0);
+ bdk_node_t node = bdk_numa_master();
+ int fdt_size = fdt_totalsize(config_fdt);
+
+ /* Round size up to 4KB boundary, be sure to add 4 bytes for CRC32 */
+ int fdt_space = (fdt_size + 4 + 0xfff) & -4096;
+ /* First try 4MB - FDT size as this keeps the FDT in the 4MB secure space
+ setup by ATF */
+ void *fdt_ptr = bdk_phys_to_ptr(0x400000 - fdt_space);
+ if (!__bdk_is_dram_enabled(node))
+ {
+ /* Address must be in L2 */
+ int l2_size = bdk_l2c_get_cache_size_bytes(node);
+ void *l2_ptr = bdk_phys_to_ptr(l2_size - fdt_space);
+ if (l2_ptr < fdt_ptr)
+ fdt_ptr = l2_ptr;
+ if (fdt_ptr < end_ptr)
+ {
+ bdk_error("No room for FDT to pass to next binary\n");
+ return 0;
+ }
+ }
+ else
+ {
+ /* We have DRAM, make sure we're past the end of this image */
+ if (fdt_ptr < end_ptr)
+ fdt_ptr = end_ptr;
+ }
+ uint32_t crc32 = bdk_crc32(config_fdt, fdt_size, 0);
+ fdt_move(config_fdt, fdt_ptr, fdt_size);
+ /* CRC32 is stored in same endianness as FDT at the end */
+ *(uint32_t *)((const char *)fdt_ptr + fdt_size) = cpu_to_fdt32(crc32);
+ BDK_TRACE(FDT_OS, "Exported device tree to memory %p, size 0x%x, CRC32 %08x\n",
+ fdt_ptr, fdt_size, crc32);
+ return bdk_ptr_to_phys(fdt_ptr);
+}
+
+/**
+ * Return a pointer to the device tree used for configuration
+ *
+ * @return FDT or NULL on failure
+ */
+void* bdk_config_get_fdt(void)
+{
+ return config_fdt;
+}
+
+/**
+ * Set the device tree used for configuration
+ *
+ * @param fdt Device tree to use. Memory is assumed to be from malloc() and bdk_config takes
+ * over ownership on success
+ *
+ * @return Zero on success, negative on failure
+ */
+int bdk_config_set_fdt(void *fdt)
+{
+ int offset = fdt_path_offset(fdt, "/cavium,bdk"); /* Find our node */
+ if (offset < 0)
+ return -1;
+ free(config_fdt);
+ config_fdt = fdt;
+ config_node = offset;
+ return 0;
+}
+
+/**
+ * Write all default values to a FDT. Missing config items get defaults in the
+ * BDK config, this function adds those defaults to the FDT. This way other code
+ * gets the default value without needing special code.
+ *
+ * @param fdt FDT structure to fill defaults into
+ *
+ * @return Zero on success, negative on failure
+ */
+int bdk_config_expand_defaults(void *fdt)
+{
+ const struct fdt_property *prop;
+
+ /* The best defaults may have changed while this image was running if DRAM
+ is setup. Update the defaults before expanding them */
+ config_set_defaults();
+
+ int fdt_node = fdt_path_offset(fdt, "/cavium,bdk"); /* Find our node */
+ if (fdt_node < 0)
+ {
+ bdk_error("Failed to find top node, FDT error %d: %s\n",
+ fdt_node, fdt_strerror(fdt_node));
+ return -1;
+ }
+
+ /* Loop through all configuration items */
+ for (bdk_config_t cfg = 0; cfg < __BDK_CONFIG_END; cfg++)
+ {
+ /* Figure out the base name without and dot parameters */
+ const char *name = config_info[cfg].format;
+ const char *name_end = strchr(name, '.');
+ int name_len;
+ if (name_end)
+ name_len = name_end - name;
+ else
+ name_len = strlen(name);
+ /* Try and find the base name in the FDT */
+ prop = fdt_get_property_namelen(fdt, fdt_node, name, name_len, NULL);
+ /* If it wasn't found, then we need to add the default */
+ if (prop == NULL)
+ {
+ /* Create a copy of the name for use in FDT calls */
+ char temp_name[name_len + 1];
+ memcpy(temp_name, name, name_len);
+ temp_name[name_len] = 0;
+ /* Call the correct FDT call based on the type */
+ int status = 0;
+ switch (config_info[cfg].ctype)
+ {
+ case BDK_CONFIG_TYPE_INT:
+ {
+ char temp_value[20];
+ if (config_info[cfg].default_value < 10)
+ snprintf(temp_value, sizeof(temp_value), "%ld", config_info[cfg].default_value);
+ else
+ snprintf(temp_value, sizeof(temp_value), "0x%lx", config_info[cfg].default_value);
+ /* Store the default int value */
+ status = fdt_setprop_string(fdt, fdt_node, temp_name, temp_value);
+ break;
+ }
+ case BDK_CONFIG_TYPE_STR:
+ /* Store the default string value, if present */
+ if (config_info[cfg].default_value)
+ {
+ status = fdt_setprop_string(fdt, fdt_node, temp_name,
+ (const char *)config_info[cfg].default_value);
+ }
+ break;
+ case BDK_CONFIG_TYPE_STR_LIST:
+ /* Do nothing, string list default to empty */
+ break;
+ case BDK_CONFIG_TYPE_BINARY:
+ /* Do nothing, binary defaults to empty */
+ break;
+ }
+ if (status < 0)
+ {
+ bdk_error("Failed to set default for %s, FDT error %d: %s\n",
+ temp_name, status, fdt_strerror(status));
+ return -1;
+ }
+ }
+ }
+ return 0;
+}
+
+/**
+ * Some of the default config values can vary based on runtime parameters. This
+ * function sets those default parameters. It must be run before anyone calls
+ * bdk_config_get_*().
+ */
+static void config_set_defaults(void)
+{
+ bool isEmulation = bdk_is_platform(BDK_PLATFORM_EMULATOR);
+ /* This is Cavium's OUI with the local admin bit. We will use this as a
+ default as it won't collide with official addresses, but is sort of
+ part of the Cavium range. The lower three bytes will be updated with
+ the wafer info */
+ uint64_t mac_address = 0x020fb7000000ull;
+ /* Set the lower MAC address bits based on the chip manufacturing
+ information. This should give reasonable MAC address defaults
+ for production parts */
+ if (CAVIUM_IS_MODEL(CAVIUM_CN8XXX))
+ {
+ BDK_CSR_INIT(fus_dat0, bdk_numa_local(), BDK_MIO_FUS_DAT0);
+ mac_address |= fus_dat0.u & 0xffffff;
+ }
+ else
+ {
+ mac_address |= bdk_fuse_read_range(bdk_numa_local(), BDK_FUS_FUSE_NUM_E_MFG_INFOX(0), 24);
+ }
+ config_info[BDK_CONFIG_MAC_ADDRESS].default_value = mac_address;
+
+ /* Set the number of packet buffers */
+ int num_packet_buffers = 4096;
+ /* If DRAM is setup, allocate 8K buffers for 8 ports plus some slop */
+ if (__bdk_is_dram_enabled(bdk_numa_master()))
+ num_packet_buffers = 8192 * 16 + 1024;
+ else if (isEmulation) {
+ if (CAVIUM_IS_MODEL(CAVIUM_CN83XX))
+ num_packet_buffers = 4096 * 4;
+ }
+ config_info[BDK_CONFIG_NUM_PACKET_BUFFERS].default_value = num_packet_buffers;
+ config_info[BDK_CONFIG_PACKET_BUFFER_SIZE].default_value = 1024;
+
+ /* Asim doesn't scale to 48 cores well. Limit to 4 */
+ if (bdk_is_platform(BDK_PLATFORM_ASIM))
+ config_info[BDK_CONFIG_COREMASK].default_value = 0xf;
+ /* CN88XX pass 1.x doesn't support EA */
+ if (CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X))
+ config_info[BDK_CONFIG_PCIE_EA].default_value = 0;
+ /* Emulator only supports 4 cores */
+ if (isEmulation)
+ config_info[BDK_CONFIG_COREMASK].default_value = 0xf;
+}
+
+/**
+ * BDK configuration items are stored in a device tree so thay can be passed to
+ * other software later. This function creates the initial empty device tree
+ * used for BDK configuration items. The values will be populated as configuration
+ * files are read from flash.
+ */
+static void config_setup_fdt(void)
+{
+ const int FDT_SIZE = 0x10000;
+ config_fdt = calloc(1, FDT_SIZE);
+ if (!config_fdt)
+ bdk_fatal("Unable to allocate memory for config FDT\n");
+ if (fdt_create_empty_tree(config_fdt, FDT_SIZE) < 0)
+ bdk_fatal("Unable to create FDT for config\n");
+ config_node = fdt_add_subnode(config_fdt, 0, "cavium,bdk");
+ if (config_node < 0)
+ bdk_fatal("Unable to create cavium,bdk node in FDT\n");
+}
+
+/**
+ * Parse a FDT and copy its properties to our configuration FDT
+ *
+ * @param fdt FDT to parse
+ */
+static int config_parse_fdt(const void *fdt, const char *base_path)
+{
+ /* Check the FDT header */
+ int result = fdt_check_header(fdt);
+ if (result)
+ goto fail;
+
+ /* Find our node */
+ result = fdt_path_offset(fdt, base_path);
+ if (result < 0)
+ goto fail;
+
+ /* Copy all parameters to our in memory FDT */
+ int offset = fdt_first_property_offset(fdt, result);
+ while (offset >= 0)
+ {
+ const char *name = NULL;
+ int blob_size = 0;
+ const char *data = fdt_getprop_by_offset(fdt, offset, &name, &blob_size);
+ result = fdt_setprop(config_fdt, config_node, name, data, blob_size);
+ offset = fdt_next_property_offset(fdt, offset);
+ }
+ return 0;
+fail:
+ bdk_error("FDT error %d: %s\n", result, fdt_strerror(result));
+ return -1;
+}
+
+/**
+ * Load a FDT from a file and pull in its configuration properties
+ *
+ * @param filename File to read from
+ * @param offset Offset into the file to read from
+ *
+ * @return Zero on success, negative on failure
+ */
+static int config_load_file(const char *filename, uint64_t offset)
+{
+ uint64_t ftd_size = 0;
+ bdk_signed_flags_t sign_flags = BDK_SIGNED_FLAG_NONE;
+ if (offset)
+ sign_flags = BDK_SIGNED_FLAG_ALLOW_UNSIGNED | BDK_SIGNED_FLAG_NOT_ENCRYPTED;
+ void *fdt = bdk_signed_load(filename, offset, BDK_SIGNED_DTS, sign_flags, &ftd_size);
+ if (!fdt)
+ return -1;
+
+ /* Make sure the read succeeded */
+ if (ftd_size < (int)sizeof(struct fdt_header))
+ {
+ bdk_error("Invalid device tee %s\n", filename);
+ free(fdt);
+ return -1;
+ }
+
+ if (fdt_check_header(fdt))
+ {
+ bdk_error("Invalid FDT header read from %s\n", filename);
+ free(fdt);
+ return -1;
+ }
+
+ /* Make sure we read enough data to contain the FDT */
+ int correct_size = fdt_totalsize(fdt);
+ if ((int)ftd_size < correct_size)
+ {
+ bdk_error("Unable to read FDT from %s\n", filename);
+ free(fdt);
+ return -1;
+ }
+
+ /* Check if a CRC32 was added on the end of the FDT */
+ if ((int)ftd_size >= correct_size + 4)
+ {
+ uint32_t crc32 = bdk_crc32(fdt, correct_size, 0);
+ uint32_t correct_crc32 = *(uint32_t *)((const char *)fdt + correct_size);
+ /* CRC32 is stored in same endianness as FDT */
+ correct_crc32 = fdt32_to_cpu(correct_crc32);
+ if (crc32 != correct_crc32)
+ {
+ bdk_error("FDT failed CRC32 verification (%s)\n", filename);
+ free(fdt);
+ return -1;
+ }
+ //printf("PASS: FDT CRC32 verification (%s)\n", filename);
+ }
+
+ /* Parse the device tree, adding its configuration to ours */
+ if (config_parse_fdt(fdt, "/cavium,bdk"))
+ {
+ free(fdt);
+ return -1;
+ }
+
+ free(fdt);
+ return 0;
+}
+
+/**
+ * Internal BDK function to initialize the config system. Must be called before
+ * any configuration functions are called
+ */
+void __bdk_config_init(void)
+{
+ bool done_trust_init = false;
+ /* Set default that can vary dynamically at runtime */
+ config_set_defaults();
+
+ /* Regsiter X1 is expected to be a device tree when we boot. Check that
+ the physical address seems correct, then load the device tree */
+ if ((__bdk_init_reg_x1 > 0) && /* Not zero */
+ (__bdk_init_reg_x1 < 0x1000000) && /* In the lower 16MB */
+ ((__bdk_init_reg_x1 & 0xfff) == 0)) /* Aligned on a 4KB boundary */
+ {
+ const void *fdt = (const void *)__bdk_init_reg_x1;
+ /* Check the FDT header */
+ int result = fdt_check_header(fdt);
+ if (result)
+ result = -1; /* Invalid tree */
+ else
+ {
+ int fdt_size = fdt_totalsize(fdt);
+ uint32_t crc32 = bdk_crc32(fdt, fdt_size, 0);
+ uint32_t correct_crc32 = *(uint32_t *)((const char *)fdt + fdt_size);
+ /* CRC32 is stored in same endianness as FDT */
+ correct_crc32 = fdt32_to_cpu(correct_crc32);
+ if (crc32 == correct_crc32)
+ {
+ //printf("Previous image FDT passed CRC32 verification(%p, size 0x%x, CRC32 %08x)\n", fdt, fdt_size, crc32);
+ result = fdt_path_offset(fdt, "/cavium,bdk"); /* Find our node */
+ }
+ else
+ {
+ bdk_error("Previous image FDT failed CRC32 verification(%p, size 0x%x)\n", fdt, fdt_size);
+ result = -1; /* Invalid tree */
+ }
+ }
+ /* If tree is valid so far, attempt to move it into our memory space */
+ if (result > 0)
+ {
+ /* 4KB extra room for growth */
+ const int fdt_size = fdt_totalsize(fdt) + 4096;
+ config_fdt = calloc(1, fdt_size);
+ if (config_fdt)
+ {
+ int result = fdt_move(fdt, config_fdt, fdt_size);
+ if (result == 0)
+ {
+ /* Find our node */
+ config_node = fdt_path_offset(config_fdt, "/cavium,bdk");
+ if (config_node > 0)
+ {
+ printf("Using configuration from previous image\n");
+ goto done;
+ }
+ else
+ {
+ bdk_error("Unable to find BDK node after move\n");
+ free(config_fdt);
+ config_node = 0;
+ config_fdt = NULL;
+ }
+ }
+ else
+ {
+ bdk_error("Unable to move passed device tree\n");
+ free(config_fdt);
+ config_fdt = NULL;
+ }
+ }
+ else
+ bdk_error("Failed to allocate memory for passed device tree (%d bytes)\n", fdt_size);
+ }
+ }
+
+ /* Create the global device tree used to store config items */
+ config_setup_fdt();
+ /* Setup trust level so reading device trees works */
+ __bdk_trust_init();
+ done_trust_init = true;
+
+ if (bdk_is_platform(BDK_PLATFORM_ASIM))
+ {
+ if (CAVIUM_IS_MODEL(CAVIUM_CN88XX))
+ bdk_config_set_str("ASIM-CN88XX", BDK_CONFIG_BOARD_MODEL);
+ else if (CAVIUM_IS_MODEL(CAVIUM_CN83XX))
+ bdk_config_set_str("ASIM-CN83XX", BDK_CONFIG_BOARD_MODEL);
+ else if (CAVIUM_IS_MODEL(CAVIUM_CN81XX))
+ bdk_config_set_str("ASIM-CN81XX", BDK_CONFIG_BOARD_MODEL);
+ else if (CAVIUM_IS_MODEL(CAVIUM_CN93XX))
+ bdk_config_set_str("ASIM-CN93XX", BDK_CONFIG_BOARD_MODEL);
+ }
+ else if (bdk_is_platform(BDK_PLATFORM_EMULATOR))
+ {
+ if (CAVIUM_IS_MODEL(CAVIUM_CN88XX))
+ bdk_config_set_str("EMUL-CN88XX", BDK_CONFIG_BOARD_MODEL);
+ else if (CAVIUM_IS_MODEL(CAVIUM_CN83XX))
+ bdk_config_set_str("EMUL-CN83XX", BDK_CONFIG_BOARD_MODEL);
+ else if (CAVIUM_IS_MODEL(CAVIUM_CN81XX))
+ bdk_config_set_str("EMUL-CN81XX", BDK_CONFIG_BOARD_MODEL);
+ else if (CAVIUM_IS_MODEL(CAVIUM_CN93XX))
+ bdk_config_set_str("EMUL-CN93XX", BDK_CONFIG_BOARD_MODEL);
+ }
+ else if (config_load_file("/rom/boardcfg.dtb", 0) == 0)
+ {
+ printf("Board manufacturing information loaded from ROM-FS\n");
+ }
+ /* Load manufacturing data from the top 64KB of flash */
+ else if (config_load_file("/boot", BDK_CONFIG_MANUFACTURING_ADDRESS) != 0)
+ {
+ printf("\33[1m"); /* Bold */
+ bdk_warn("\n");
+ bdk_warn("********************************************************\n");
+ bdk_warn("* Board manufacturing information not found. Program\n");
+ bdk_warn("* the board manufacturing information in the Setup menu.\n");
+ bdk_warn("********************************************************\n");
+ bdk_warn("\n");
+ printf("\33[0m"); /* Normal */
+ goto done;
+ }
+
+ const char *model = bdk_config_get_str(BDK_CONFIG_BOARD_MODEL);
+ const char *revision = bdk_config_get_str(BDK_CONFIG_BOARD_REVISION);
+
+ /* Load BOARD-REVISION.cfg if it is on ROM-FS */
+ if (model && revision)
+ {
+ char filename[64];
+ snprintf(filename, sizeof(filename), "/rom/%s-%s.dtb", model, revision);
+ if (config_load_file(filename, 0) == 0)
+ goto done;
+ }
+
+ /* Load BOARD.cfg if it is on ROM-FS */
+ if (model)
+ {
+ char filename[64];
+ snprintf(filename, sizeof(filename), "/rom/%s.dtb", model);
+ if (config_load_file(filename, 0) == 0)
+ goto done;
+ }
+
+ /* Load default.dtb if it is there */
+ if (config_load_file("/fatfs/default.dtb", 0) == 0)
+ goto done;
+
+ /* Load BOARD-REVISION.cfg if it is there */
+ if (model && revision)
+ {
+ char filename[64];
+ snprintf(filename, sizeof(filename), "/fatfs/%s-%s.dtb", model, revision);
+ if (config_load_file(filename, 0) == 0)
+ goto done;
+ }
+
+ /* Load BOARD.cfg if it is there */
+ if (model)
+ {
+ char filename[64];
+ snprintf(filename, sizeof(filename), "/fatfs/%s.dtb", model);
+ if (config_load_file(filename, 0) == 0)
+ goto done;
+ }
+
+ /* No board specific configuration was found. Warn the user */
+ printf("\33[1m"); /* Bold */
+ bdk_warn("\n");
+ bdk_warn("********************************************************\n");
+ bdk_warn("* Board configuration file not found. Either the board\n");
+ bdk_warn("* model is incorrect, or factory settings are not\n");
+ bdk_warn("* available. DTB file not found for board \"%s\".\n", model);
+ bdk_warn("********************************************************\n");
+ bdk_warn("\n");
+ printf("\33[0m"); /* Normal */
+
+done:
+ bdk_config_set_str(bdk_version_string(), BDK_CONFIG_VERSION);
+ /* Load the tracing level */
+ bdk_trace_enables = bdk_config_get_int(BDK_CONFIG_TRACE);
+ if (BDK_TRACE_OVERRIDE)
+ bdk_trace_enables = BDK_TRACE_OVERRIDE;
+ if (!done_trust_init)
+ __bdk_trust_init();
+}
diff --git a/src/vendorcode/cavium/bdk/libbdk-hal/bdk-gpio.c b/src/vendorcode/cavium/bdk/libbdk-hal/bdk-gpio.c
new file mode 100644
index 0000000000..55f0dbf3f2
--- /dev/null
+++ b/src/vendorcode/cavium/bdk/libbdk-hal/bdk-gpio.c
@@ -0,0 +1,197 @@
+/***********************license start***********************************
+* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights
+* reserved.
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are
+* met:
+*
+* * Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials provided
+* with the distribution.
+*
+* * Neither the name of Cavium Inc. nor the names of
+* its contributors may be used to endorse or promote products
+* derived from this software without specific prior written
+* permission.
+*
+* This Software, including technical data, may be subject to U.S. export
+* control laws, including the U.S. Export Administration Act and its
+* associated regulations, and may be subject to export or import
+* regulations in other countries.
+*
+* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
+* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR
+* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT
+* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY
+* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT
+* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES
+* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR
+* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT,
+* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK
+* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
+***********************license end**************************************/
+#include <bdk.h>
+#include "libbdk-arch/bdk-csrs-gpio.h"
+
+/* This code is an optional part of the BDK. It is only linked in
+ if BDK_REQUIRE() needs it */
+BDK_REQUIRE_DEFINE(GPIO);
+
+/**
+ * Initialize a single GPIO as either an input or output. If it is
+ * an output, also set its output value.
+ *
+ * @param gpio GPIO to initialize
+ * @param is_output Non zero if this GPIO should be an output
+ * @param output_value
+ * Value of the GPIO if it should be an output. Not used if the
+ * GPIO isn't an output.
+ *
+ * @return Zero on success, negative ob failure
+ */
+int bdk_gpio_initialize(bdk_node_t node, int gpio, int is_output, int output_value)
+{
+ if ((gpio >= 0) && (gpio < bdk_gpio_get_num()))
+ {
+ int gpio_group = gpio >> 6;
+ int gpio_index = gpio & 63;
+ if (output_value)
+ bdk_gpio_set(node, gpio_group, 1ull << gpio_index);
+ else
+ bdk_gpio_clear(node, gpio_group, 1ull << gpio_index);
+
+ BDK_CSR_DEFINE(cfg, BDK_GPIO_BIT_CFGX(gpio));
+ cfg.u = 0;
+ cfg.s.tx_oe = !!is_output;
+ BDK_CSR_WRITE(node, BDK_GPIO_BIT_CFGX(gpio), cfg.u);
+ }
+ else
+ {
+ bdk_error("bdk_gpio_initialize: Illegal GPIO\n");
+ return -1;
+ }
+ return 0;
+}
+
+
+/**
+ * GPIO Read Data
+ *
+ * @param node Node GPIO block is on
+ * @param gpio_block GPIO block to access. Each block contains up to 64 GPIOs
+ *
+ * @return Status of the GPIO pins for the given block
+ */
+uint64_t bdk_gpio_read(bdk_node_t node, int gpio_block)
+{
+ bdk_gpio_rx_dat_t gpio_rx_dat;
+ switch (gpio_block)
+ {
+ case 0:
+ gpio_rx_dat.u = BDK_CSR_READ(node, BDK_GPIO_RX_DAT);
+ break;
+ case 1:
+ gpio_rx_dat.u = BDK_CSR_READ(node, BDK_GPIO_RX1_DAT);
+ break;
+ default:
+ bdk_error("GPIO block %d not supported\n", gpio_block);
+ gpio_rx_dat.u = 0;
+ break;
+ }
+ return gpio_rx_dat.s.dat;
+}
+
+
+/**
+ * GPIO Clear pin
+ *
+ * @param node Node GPIO block is on
+ * @param gpio_block GPIO block to access. Each block contains up to 64 GPIOs
+ * @param clear_mask Bit mask to indicate which bits to drive to '0'.
+ */
+void bdk_gpio_clear(bdk_node_t node, int gpio_block, uint64_t clear_mask)
+{
+ switch (gpio_block)
+ {
+ case 0:
+ BDK_CSR_WRITE(node, BDK_GPIO_TX_CLR, clear_mask);
+ break;
+ case 1:
+ BDK_CSR_WRITE(node, BDK_GPIO_TX1_CLR, clear_mask);
+ break;
+ default:
+ bdk_error("GPIO block %d not supported\n", gpio_block);
+ break;
+ }
+}
+
+
+/**
+ * GPIO Set pin
+ *
+ * @param node Node GPIO block is on
+ * @param gpio_block GPIO block to access. Each block contains up to 64 GPIOs
+ * @param set_mask Bit mask to indicate which bits to drive to '1'.
+ */
+void bdk_gpio_set(bdk_node_t node, int gpio_block, uint64_t set_mask)
+{
+ switch (gpio_block)
+ {
+ case 0:
+ BDK_CSR_WRITE(node, BDK_GPIO_TX_SET, set_mask);
+ break;
+ case 1:
+ BDK_CSR_WRITE(node, BDK_GPIO_TX1_SET, set_mask);
+ break;
+ default:
+ bdk_error("GPIO block %d not supported\n", gpio_block);
+ break;
+ }
+}
+
+
+/** GPIO Select pin
+ *
+ * @param node CPU node
+ * @param gpio GPIO number
+ * @param pin Pin number
+ */
+void bdk_gpio_select_pin(bdk_node_t node, int gpio, int pin)
+{
+ if ((gpio < 0) || (gpio >= bdk_gpio_get_num()))
+ {
+ bdk_warn("bdk_gpio_select_pin: Illegal GPIO %d\n", gpio);
+ return;
+ }
+
+ BDK_CSR_MODIFY(c, node, BDK_GPIO_BIT_CFGX(gpio), c.s.pin_sel = pin);
+}
+
+
+/**
+ * Return the number of GPIO pins on this chip
+ *
+ * @return Number of GPIO pins
+ */
+int bdk_gpio_get_num(void)
+{
+ if (CAVIUM_IS_MODEL(CAVIUM_CN88XX))
+ return 51;
+ else if (CAVIUM_IS_MODEL(CAVIUM_CN81XX))
+ return 48;
+ else if (CAVIUM_IS_MODEL(CAVIUM_CN83XX))
+ return 80;
+ else if (CAVIUM_IS_MODEL(CAVIUM_CN93XX))
+ return 96;
+ else
+ {
+ bdk_error("bdk_gpio_get_num(): Unsupported chip");
+ return 0;
+ }
+}
diff --git a/src/vendorcode/cavium/bdk/libbdk-hal/bdk-l2c.c b/src/vendorcode/cavium/bdk/libbdk-hal/bdk-l2c.c
new file mode 100644
index 0000000000..b1e2a88ce1
--- /dev/null
+++ b/src/vendorcode/cavium/bdk/libbdk-hal/bdk-l2c.c
@@ -0,0 +1,270 @@
+/***********************license start***********************************
+* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights
+* reserved.
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are
+* met:
+*
+* * Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials provided
+* with the distribution.
+*
+* * Neither the name of Cavium Inc. nor the names of
+* its contributors may be used to endorse or promote products
+* derived from this software without specific prior written
+* permission.
+*
+* This Software, including technical data, may be subject to U.S. export
+* control laws, including the U.S. Export Administration Act and its
+* associated regulations, and may be subject to export or import
+* regulations in other countries.
+*
+* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
+* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR
+* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT
+* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY
+* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT
+* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES
+* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR
+* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT,
+* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK
+* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
+***********************license end**************************************/
+#include <bdk.h>
+#include "libbdk-arch/bdk-csrs-ap.h"
+#include "libbdk-arch/bdk-csrs-l2c.h"
+#include "libbdk-arch/bdk-csrs-l2c_cbc.h"
+#include "libbdk-arch/bdk-csrs-mio_fus.h"
+
+typedef struct
+{
+ int sets;
+ int ways;
+ bool is_locked;
+} l2_node_state_t;
+
+static l2_node_state_t l2_node_state[BDK_NUMA_MAX_NODES];
+
+/**
+ * Perform one time initialization of L2 for improved
+ * performance. This can be called after L2 is in use.
+ *
+ * @return Zero on success, negative on failure.
+ */
+int bdk_l2c_initialize(bdk_node_t node)
+{
+ if (CAVIUM_IS_MODEL(CAVIUM_CN8XXX))
+ {
+ /* Tell L2 to give the IOB statically higher priority compared to the
+ cores. This avoids conditions where IO blocks might be starved under
+ very high L2 loads */
+ BDK_CSR_MODIFY(c, node, BDK_L2C_CTL,
+ c.s.rsp_arb_mode = 1;
+ c.s.xmc_arb_mode = 0);
+ }
+
+ if (CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X) && !bdk_is_platform(BDK_PLATFORM_ASIM))
+ {
+ /* Errata: (L2C-22279) RCAS/RSTC which hits S/S can use wrong compare data */
+ BDK_CSR_MODIFY(c, node, BDK_L2C_CTL,
+ c.s.dissblkdty = 1);
+ /* Errata: (L2C-22249) Broadcast invals can cause starvation on the INV bus */
+ for (int i = 0; i < 4; i++)
+ BDK_CSR_MODIFY(c, node, BDK_L2C_CBCX_SCRATCH(i),
+ c.s.invdly = 1);
+ }
+
+ // FIXME: Disable partial writes on pass 2 until it is debugged
+ if (CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS2_X) && !bdk_is_platform(BDK_PLATFORM_ASIM))
+ {
+ BDK_CSR_MODIFY(c, node, BDK_L2C_CTL,
+ c.s.dissblkdty = 1);
+ }
+
+ if (CAVIUM_IS_MODEL(CAVIUM_CN8XXX) && bdk_is_platform(BDK_PLATFORM_EMULATOR))
+ {
+ /* The emulator requires L2C_CTL[DISSBLKDTY] to be set */
+ BDK_CSR_MODIFY(c, node, BDK_L2C_CTL,
+ c.s.dissblkdty = 1);
+ }
+ return 0;
+}
+
+int bdk_l2c_get_core_way_partition(bdk_node_t node, int core)
+{
+ return (BDK_CSR_READ(node, BDK_L2C_WPAR_PPX(core)) & 0xffff);
+}
+
+int bdk_l2c_set_core_way_partition(bdk_node_t node, int core, uint32_t mask)
+{
+ uint32_t valid_mask = (1 << bdk_l2c_get_num_assoc(node)) - 1;
+ mask &= valid_mask;
+
+ BDK_CSR_WRITE(node, BDK_L2C_WPAR_PPX(core), mask);
+ return 0;
+}
+
+
+int bdk_l2c_set_hw_way_partition(bdk_node_t node, uint32_t mask)
+{
+ uint32_t valid_mask = (1 << bdk_l2c_get_num_assoc(node)) - 1;
+ mask &= valid_mask;
+
+ BDK_CSR_WRITE(node, BDK_L2C_WPAR_IOBX(0), mask);
+ return 0;
+}
+
+
+int bdk_l2c_get_hw_way_partition(bdk_node_t node)
+{
+ return (BDK_CSR_READ(node, BDK_L2C_WPAR_IOBX(0)) & 0xffff);
+}
+
+
+int bdk_l2c_lock_mem_region(bdk_node_t node, uint64_t start, uint64_t len)
+{
+ /* Round start/end to cache line boundaries */
+ len += start & BDK_CACHE_LINE_MASK;
+ start &= ~BDK_CACHE_LINE_MASK;
+ len = (len + BDK_CACHE_LINE_MASK) & ~BDK_CACHE_LINE_MASK;
+ void *ptr = (start) ? bdk_phys_to_ptr(start) : NULL;
+
+ while (len)
+ {
+ BDK_CACHE_LCK_L2(ptr);
+ ptr += BDK_CACHE_LINE_SIZE;
+ len -= BDK_CACHE_LINE_SIZE;
+ }
+ l2_node_state[node].is_locked = true;
+ return 0;
+}
+
+void bdk_l2c_flush(bdk_node_t node)
+{
+ /* The number of ways can be reduced with fuses, but the equations below
+ assume the max number of ways */
+ const int MAX_WAYS = 16;
+ int num_sets = bdk_l2c_get_num_sets(node);
+ int num_ways = bdk_l2c_get_num_assoc(node);
+
+ int is_rtg = 1; /* Clear remote tags */
+ for (int l2_way = 0; l2_way < num_ways; l2_way++)
+ {
+ for (int l2_set = 0; l2_set < num_sets; l2_set++)
+ {
+ uint64_t encoded = 128 * (l2_set + num_sets * (l2_way + (is_rtg * MAX_WAYS)));
+ BDK_CACHE_WBI_L2_INDEXED(encoded);
+ }
+ }
+
+ is_rtg = 0; /* Clear local tags */
+ for (int l2_way = 0; l2_way < num_ways; l2_way++)
+ {
+ for (int l2_set = 0; l2_set < num_sets; l2_set++)
+ {
+ uint64_t encoded = 128 * (l2_set + num_sets * (l2_way + (is_rtg * MAX_WAYS)));
+ BDK_CACHE_WBI_L2_INDEXED(encoded);
+ }
+ }
+ l2_node_state[node].is_locked = false;
+}
+
+int bdk_l2c_unlock_mem_region(bdk_node_t node, uint64_t start, uint64_t len)
+{
+ /* Round start/end to cache line boundaries */
+ len += start & BDK_CACHE_LINE_MASK;
+ start &= ~BDK_CACHE_LINE_MASK;
+ len = (len + BDK_CACHE_LINE_MASK) & ~BDK_CACHE_LINE_MASK;
+ void *ptr = (start) ? bdk_phys_to_ptr(start) : NULL;
+
+ while (len > 0)
+ {
+ /* Must use invalidate version to release lock */
+ BDK_CACHE_WBI_L2(ptr);
+ ptr += BDK_CACHE_LINE_SIZE;
+ len -= BDK_CACHE_LINE_SIZE;
+ }
+
+ l2_node_state[node].is_locked = false;
+ return 0;
+}
+
+
+int bdk_l2c_get_cache_size_bytes(bdk_node_t node)
+{
+ return bdk_l2c_get_num_sets(node) * bdk_l2c_get_num_assoc(node) * BDK_CACHE_LINE_SIZE;
+}
+
+/* Return the number of sets in the L2 Cache */
+int bdk_l2c_get_num_sets(bdk_node_t node)
+{
+ if (bdk_unlikely(l2_node_state[node].sets == 0))
+ {
+ /* Select the L2 cache */
+ bdk_ap_csselr_el1_t csselr_el1;
+ csselr_el1.u = 0;
+ csselr_el1.s.ind = 0;
+ csselr_el1.s.level = CAVIUM_IS_MODEL(CAVIUM_CN8XXX) ? 1 : 2;
+ BDK_MSR(CSSELR_EL1, csselr_el1.u);
+ /* Read its size */
+ bdk_ap_ccsidr_el1_t ccsidr_el1;
+ BDK_MRS(CCSIDR_EL1, ccsidr_el1.u);
+ /* Store it for use later */
+ l2_node_state[node].sets = ccsidr_el1.s.numsets + 1;
+ l2_node_state[node].ways = ccsidr_el1.s.associativity + 1;
+
+ /* Early chips didn't update the number of ways based on fusing */
+ if ((l2_node_state[node].ways == 16) && CAVIUM_IS_MODEL(CAVIUM_CN8XXX))
+ {
+ /* The l2 can be reduced in 25% increments */
+ BDK_CSR_INIT(mio_fus_dat3, node, BDK_MIO_FUS_DAT3);
+ switch (mio_fus_dat3.s.l2c_crip)
+ {
+ case 3: /* 1/4 size */
+ l2_node_state[node].ways *= 1;
+ break;
+ case 2: /* 1/2 size */
+ l2_node_state[node].ways *= 2;
+ break;
+ case 1: /* 3/4 size */
+ l2_node_state[node].ways *= 3;
+ break;
+ default: /* Full size */
+ l2_node_state[node].ways *= 4;
+ break;
+ }
+ l2_node_state[node].ways /= 4;
+ }
+ }
+ return l2_node_state[node].sets;
+}
+
+/* Return the number of associations in the L2 Cache */
+int bdk_l2c_get_num_assoc(bdk_node_t node)
+{
+ /* Get the number of sets if the global sets/ways is not setup */
+ if (bdk_unlikely(l2_node_state[node].ways == 0))
+ bdk_l2c_get_num_sets(node);
+ return l2_node_state[node].ways;
+}
+
+/**
+ * Return true if the BDK has locked itself in L2
+ *
+ * @return
+ */
+int bdk_l2c_is_locked(bdk_node_t node)
+{
+ /* Determining the lock state of L2 requires reading exact tags from L2
+ which varies per chip. Rather than deal with that complexity, we just
+ keep a flag around saying if the L2 lock functions have been called.
+ This works for the BDK as its use of locking is very simple */
+ return l2_node_state[node].is_locked;
+}
+
diff --git a/src/vendorcode/cavium/bdk/libbdk-hal/bdk-twsi.c b/src/vendorcode/cavium/bdk/libbdk-hal/bdk-twsi.c
new file mode 100644
index 0000000000..4fbb78a876
--- /dev/null
+++ b/src/vendorcode/cavium/bdk/libbdk-hal/bdk-twsi.c
@@ -0,0 +1,318 @@
+/***********************license start***********************************
+* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights
+* reserved.
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are
+* met:
+*
+* * Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials provided
+* with the distribution.
+*
+* * Neither the name of Cavium Inc. nor the names of
+* its contributors may be used to endorse or promote products
+* derived from this software without specific prior written
+* permission.
+*
+* This Software, including technical data, may be subject to U.S. export
+* control laws, including the U.S. Export Administration Act and its
+* associated regulations, and may be subject to export or import
+* regulations in other countries.
+*
+* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
+* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR
+* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT
+* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY
+* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT
+* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES
+* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR
+* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT,
+* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK
+* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
+***********************license end**************************************/
+#include <bdk.h>
+#include "libbdk-arch/bdk-csrs-mio_tws.h"
+
+#define RECOVERY_UDELAY 5
+#define RECOVERY_CLK_CNT 9
+#define ARBLOST_UDELAY 5000 /* 5ms */
+
+/* This code is an optional part of the BDK. It is only linked in
+ if BDK_REQUIRE() needs it */
+BDK_REQUIRE_DEFINE(TWSI);
+
+/**
+ * Initialize the TWSI blocks. This just sets the clock rate.
+ * Many times stuff will work without calling this, but some
+ * TWSI devices will fail. This is normally called automatically
+ * in bdk-init-main.c.
+ *
+ * @return Zero on success, negative on failure
+ */
+int bdk_twsix_initialize(bdk_node_t node)
+{
+ const int TWSI_BUS_FREQ = 100000; /* 100 KHz */
+ const int TWSI_THP = 24; /* TCLK half period (default 24) */
+ const int io_clock_hz = bdk_clock_get_rate(node, BDK_CLOCK_SCLK);
+ int N_divider;
+ int M_divider;
+
+ /* Set the TWSI clock to a conservative TWSI_BUS_FREQ. Compute the
+ clocks M divider based on the SCLK.
+ TWSI freq = (core freq) / (20 x (M+1) x (thp+1) x 2^N)
+ M = ((core freq) / (20 x (TWSI freq) x (thp+1) x 2^N)) - 1 */
+ for (N_divider = 0; N_divider < 8; N_divider++)
+ {
+ M_divider = (io_clock_hz / (20 * TWSI_BUS_FREQ * (TWSI_THP + 1) * (1 << N_divider))) - 1;
+ if (M_divider < 16)
+ break;
+ }
+
+ BDK_CSR_DEFINE(sw_twsi, BDK_MIO_TWSX_SW_TWSI(bus));
+ sw_twsi.u = 0;
+ sw_twsi.s.v = 1; /* Clear valid bit */
+ sw_twsi.s.op = 0x6; /* See EOP field */
+ sw_twsi.s.r = 0; /* Select CLKCTL when R = 0 */
+ sw_twsi.s.eop_ia = 3; /* R=0 selects CLKCTL, R=1 selects STAT */
+ sw_twsi.s.data = ((M_divider & 0xf) << 3) | ((N_divider & 0x7) << 0);
+
+ int num_busses = 2;
+ if (CAVIUM_IS_MODEL(CAVIUM_CN88XX))
+ num_busses = 6;
+
+ for (int bus = 0; bus < num_busses; bus++)
+ {
+ /* Only init non-slave ports */
+ BDK_CSR_INIT(state, node, BDK_MIO_TWSX_SW_TWSI(bus));
+ if (!state.s.slonly)
+ BDK_CSR_WRITE(node, BDK_MIO_TWSX_SW_TWSI(bus), sw_twsi.u);
+ }
+ return 0;
+}
+
+/**
+ * Do a twsi bus recovery in the case when the last transaction
+ * on the bus has been left unfinished.
+ *
+ * @param twsi_id which TWSI bus to use
+ */
+static void bdk_twsix_recover_bus(bdk_node_t node, int twsi_id)
+{
+ /* read TWSX_INT */
+ BDK_CSR_INIT(twsx_int, node, BDK_MIO_TWSX_INT(twsi_id));
+
+ for (int i = 0; i < RECOVERY_CLK_CNT * 2; i++)
+ {
+ if (!twsx_int.s.scl_ovr)
+ {
+ /* SCL shouldn't be low here */
+ if (!twsx_int.s.scl)
+ {
+ bdk_error("N%d.TWSI%d: SCL is stuck low\n", node, twsi_id);
+ return;
+ }
+
+ /* Break if SDA is high */
+ if (twsx_int.s.sda)
+ break;
+ }
+
+ twsx_int.s.scl_ovr = !twsx_int.s.scl_ovr;
+ BDK_CSR_WRITE(node, BDK_MIO_TWSX_INT(twsi_id), twsx_int.u);
+ bdk_wait_usec(RECOVERY_UDELAY);
+ }
+
+ /*
+ * Generate STOP condition using the register overrides
+ * in order to move the higher level controller out of
+ * the bad state. This is a workaround for the TWSI hardware.
+ */
+ twsx_int.s.scl_ovr = 1;
+ twsx_int.s.sda_ovr = 1;
+ BDK_CSR_WRITE(node, BDK_MIO_TWSX_INT(twsi_id), twsx_int.u);
+ bdk_wait_usec(RECOVERY_UDELAY);
+ twsx_int.s.scl_ovr = 0;
+ BDK_CSR_WRITE(node, BDK_MIO_TWSX_INT(twsi_id), twsx_int.u);
+ bdk_wait_usec(RECOVERY_UDELAY);
+ twsx_int.s.sda_ovr = 0;
+ BDK_CSR_WRITE(node, BDK_MIO_TWSX_INT(twsi_id), twsx_int.u);
+}
+
+/**
+ * Do a twsi read from a 7 bit device address using an (optional)
+ * internal address. Up to 4 bytes can be read at a time.
+ *
+ * @param twsi_id which TWSI bus to use
+ * @param dev_addr Device address (7 bit)
+ * @param internal_addr
+ * Internal address. Can be 0, 1 or 2 bytes in width
+ * @param num_bytes Number of data bytes to read (1-4)
+ * @param ia_width_bytes
+ * Internal address size in bytes (0, 1, or 2)
+ *
+ * @return Read data, or -1 on failure
+ */
+int64_t bdk_twsix_read_ia(bdk_node_t node, int twsi_id, uint8_t dev_addr, uint16_t internal_addr, int num_bytes, int ia_width_bytes)
+{
+ bdk_mio_twsx_sw_twsi_t sw_twsi_val;
+ bdk_mio_twsx_sw_twsi_ext_t twsi_ext;
+ int retry_limit = 5;
+
+ if (num_bytes < 1 || num_bytes > 4 || ia_width_bytes < 0 || ia_width_bytes > 2)
+ return -1;
+retry:
+ twsi_ext.u = 0;
+ sw_twsi_val.u = 0;
+ sw_twsi_val.s.v = 1;
+ sw_twsi_val.s.r = 1;
+ sw_twsi_val.s.sovr = 1;
+ sw_twsi_val.s.size = num_bytes - 1;
+ sw_twsi_val.s.addr = dev_addr;
+
+ if (ia_width_bytes > 0)
+ {
+ sw_twsi_val.s.op = 1;
+ sw_twsi_val.s.ia = (internal_addr >> 3) & 0x1f;
+ sw_twsi_val.s.eop_ia = internal_addr & 0x7;
+ if (ia_width_bytes == 2)
+ {
+ sw_twsi_val.s.eia = 1;
+ twsi_ext.s.ia = internal_addr >> 8;
+ BDK_CSR_WRITE(node, BDK_MIO_TWSX_SW_TWSI_EXT(twsi_id), twsi_ext.u);
+ }
+ }
+
+ BDK_CSR_WRITE(node, BDK_MIO_TWSX_SW_TWSI(twsi_id), sw_twsi_val.u);
+ if (BDK_CSR_WAIT_FOR_FIELD(node, BDK_MIO_TWSX_SW_TWSI(twsi_id), v, ==, 0, 10000))
+ {
+ bdk_warn("N%d.TWSI%d: Timeout waiting for read to complete...start recovering process\n",
+ node, twsi_id);
+ /* perform bus recovery */
+ bdk_twsix_recover_bus(node, twsi_id);
+ if (retry_limit-- > 0)
+ goto retry;
+
+ bdk_error("N%d.TWSI%d: Timeout waiting for operation to complete\n", node, twsi_id);
+ return -1;
+ }
+ sw_twsi_val.u = BDK_CSR_READ(node, BDK_MIO_TWSX_SW_TWSI(twsi_id));
+ if (!sw_twsi_val.s.r)
+ {
+ /* Check the reason for the failure. We may need to retry to handle multi-master
+ ** configurations.
+ ** Lost arbitration : 0x38, 0x68, 0xB0, 0x78
+ ** Core busy as slave: 0x80, 0x88, 0xA0, 0xA8, 0xB8, 0xC0, 0xC8
+ */
+ if (sw_twsi_val.s.data == 0x38
+ || sw_twsi_val.s.data == 0x68
+ || sw_twsi_val.s.data == 0xB0
+ || sw_twsi_val.s.data == 0x78
+ || sw_twsi_val.s.data == 0x80
+ || sw_twsi_val.s.data == 0x88
+ || sw_twsi_val.s.data == 0xA0
+ || sw_twsi_val.s.data == 0xA8
+ || sw_twsi_val.s.data == 0xB8
+ || sw_twsi_val.s.data == 0xC8)
+ {
+ /*
+ * One of the arbitration lost conditions is recognized.
+ * The TWSI hardware has switched to the slave mode and
+ * expects the STOP condition on the bus.
+ * Make a delay before next retry.
+ */
+ bdk_wait_usec(ARBLOST_UDELAY);
+ if (retry_limit-- > 0)
+ goto retry;
+ }
+ /* For all other errors, return an error code */
+ return -1;
+ }
+
+ return (sw_twsi_val.s.data & (0xFFFFFFFF >> (32 - num_bytes*8)));
+}
+
+
+/**
+ * Write 1-8 bytes to a TWSI device using an internal address.
+ *
+ * @param twsi_id which TWSI interface to use
+ * @param dev_addr TWSI device address (7 bit only)
+ * @param internal_addr
+ * TWSI internal address (0, 8, or 16 bits)
+ * @param num_bytes Number of bytes to write (1-8)
+ * @param ia_width_bytes
+ * internal address width, in bytes (0, 1, 2)
+ * @param data Data to write. Data is written MSB first on the twsi bus, and
+ * only the lower num_bytes bytes of the argument are valid. (If
+ * a 2 byte write is done, only the low 2 bytes of the argument is
+ * used.
+ *
+ * @return Zero on success, -1 on error
+ */
+int bdk_twsix_write_ia(bdk_node_t node, int twsi_id, uint8_t dev_addr, uint16_t internal_addr, int num_bytes, int ia_width_bytes, uint64_t data)
+{
+ bdk_mio_twsx_sw_twsi_t sw_twsi_val;
+ bdk_mio_twsx_sw_twsi_ext_t twsi_ext;
+ int retry_limit = 5;
+ int to;
+
+ if (num_bytes < 1 || num_bytes > 8 || ia_width_bytes < 0 || ia_width_bytes > 2)
+ return -1;
+
+retry:
+ twsi_ext.u = 0;
+ sw_twsi_val.u = 0;
+ sw_twsi_val.s.v = 1;
+ sw_twsi_val.s.sovr = 1;
+ sw_twsi_val.s.size = num_bytes - 1;
+ sw_twsi_val.s.addr = dev_addr;
+ sw_twsi_val.s.data = 0xFFFFFFFF & data;
+
+ if (ia_width_bytes > 0)
+ {
+ sw_twsi_val.s.op = 1;
+ sw_twsi_val.s.ia = (internal_addr >> 3) & 0x1f;
+ sw_twsi_val.s.eop_ia = internal_addr & 0x7;
+ }
+ if (ia_width_bytes == 2)
+ {
+ sw_twsi_val.s.eia = 1;
+ twsi_ext.s.ia = internal_addr >> 8;
+ }
+ if (num_bytes > 4)
+ twsi_ext.s.data = data >> 32;
+
+ BDK_CSR_WRITE(node, BDK_MIO_TWSX_SW_TWSI_EXT(twsi_id), twsi_ext.u);
+ BDK_CSR_WRITE(node, BDK_MIO_TWSX_SW_TWSI(twsi_id), sw_twsi_val.u);
+ if (BDK_CSR_WAIT_FOR_FIELD(node, BDK_MIO_TWSX_SW_TWSI(twsi_id), v, ==, 0, 10000))
+ {
+ bdk_warn("N%d.TWSI%d: Timeout waiting for write to complete...start recovering process\n",
+ node, twsi_id);
+ /* perform bus recovery */
+ bdk_twsix_recover_bus(node, twsi_id);
+ if (retry_limit-- > 0)
+ goto retry;
+
+ // After retry but still not success, report error and return
+ bdk_error("N%d.TWSI%d: Timeout waiting for operation to complete\n", node, twsi_id);
+ return -1;
+ }
+
+ /* Poll until reads succeed, or polling times out */
+ to = 100;
+ while (to-- > 0)
+ {
+ if (bdk_twsix_read_ia(node, twsi_id, dev_addr, 0, 1, 0) >= 0)
+ break;
+ }
+ if (to <= 0)
+ return -1;
+
+ return 0;
+}
diff --git a/src/vendorcode/cavium/bdk/libbdk-os/bdk-init.c b/src/vendorcode/cavium/bdk/libbdk-os/bdk-init.c
new file mode 100644
index 0000000000..25d6b9eed3
--- /dev/null
+++ b/src/vendorcode/cavium/bdk/libbdk-os/bdk-init.c
@@ -0,0 +1,561 @@
+/***********************license start***********************************
+* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights
+* reserved.
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are
+* met:
+*
+* * Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials provided
+* with the distribution.
+*
+* * Neither the name of Cavium Inc. nor the names of
+* its contributors may be used to endorse or promote products
+* derived from this software without specific prior written
+* permission.
+*
+* This Software, including technical data, may be subject to U.S. export
+* control laws, including the U.S. Export Administration Act and its
+* associated regulations, and may be subject to export or import
+* regulations in other countries.
+*
+* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
+* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR
+* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT
+* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY
+* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT
+* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES
+* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR
+* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT,
+* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK
+* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
+***********************license end**************************************/
+#include <bdk.h>
+#include <stdio.h>
+#include <unistd.h>
+#include "libbdk-arch/bdk-csrs-ap.h"
+#include "libbdk-arch/bdk-csrs-l2c.h"
+#include "libbdk-arch/bdk-csrs-l2c_tad.h"
+#include "libbdk-arch/bdk-csrs-mio_boot.h"
+#include "libbdk-arch/bdk-csrs-rom.h"
+#include "libbdk-arch/bdk-csrs-uaa.h"
+
+uint64_t __bdk_init_reg_x0; /* The contents of X0 when this image started */
+uint64_t __bdk_init_reg_x1; /* The contents of X1 when this image started */
+uint64_t __bdk_init_reg_pc; /* The contents of PC when this image started */
+static int64_t __bdk_alive_coremask[BDK_NUMA_MAX_NODES];
+
+/**
+ * Set the baud rate on a UART
+ *
+ * @param uart uart to set
+ * @param baudrate Baud rate (9600, 19200, 115200, etc)
+ * @param use_flow_control
+ * Non zero if hardware flow control should be enabled
+ */
+void bdk_set_baudrate(bdk_node_t node, int uart, int baudrate, int use_flow_control)
+{
+ /* 1.2.1 Initialization Sequence (Power-On/Hard/Cold Reset) */
+ /* 1. Wait for IOI reset (srst_n) to deassert. */
+ /* 2. Assert all resets:
+ a. UAA reset: UCTL_CTL[UAA_RST] = 1
+ b. UCTL reset: UCTL_CTL[UCTL_RST] = 1 */
+ BDK_CSR_MODIFY(c, node, BDK_UAAX_UCTL_CTL(uart),
+ c.s.uaa_rst = 1;
+ c.s.uctl_rst = 1);
+
+ /* 3. Configure the HCLK:
+ a. Reset the clock dividers: UCTL_CTL[H_CLKDIV_RST] = 1.
+ b. Select the HCLK frequency
+ i. UCTL_CTL[H_CLKDIV] = desired value,
+ ii. UCTL_CTL[H_CLKDIV_EN] = 1 to enable the HCLK.
+ iii. Readback UCTL_CTL to ensure the values take effect.
+ c. Deassert the HCLK clock divider reset: UCTL_CTL[H_CLKDIV_RST] = 0. */
+ BDK_CSR_MODIFY(c, node, BDK_UAAX_UCTL_CTL(uart),
+ c.s.h_clkdiv_sel = 3; /* Run at SCLK / 6, matches emulator */
+ c.s.h_clk_byp_sel = 0;
+ c.s.h_clk_en = 1);
+ BDK_CSR_MODIFY(c, node, BDK_UAAX_UCTL_CTL(uart),
+ c.s.h_clkdiv_rst = 0);
+
+ /* 4. Wait 20 HCLK cycles from step 3 for HCLK to start and async fifo
+ to properly reset. */
+ bdk_wait(200); /* Overkill */
+
+ /* 5. Deassert UCTL and UAHC resets:
+ a. UCTL_CTL[UCTL_RST] = 0
+ b. Wait 10 HCLK cycles.
+ c. UCTL_CTL[UAHC_RST] = 0
+ d. You will have to wait 10 HCLK cycles before accessing any
+ HCLK-only registers. */
+ BDK_CSR_MODIFY(c, node, BDK_UAAX_UCTL_CTL(uart), c.s.uctl_rst = 0);
+ bdk_wait(100); /* Overkill */
+ BDK_CSR_MODIFY(c, node, BDK_UAAX_UCTL_CTL(uart), c.s.uaa_rst = 0);
+ bdk_wait(100); /* Overkill */
+
+ /* 6. Enable conditional SCLK of UCTL by writing UCTL_CTL[CSCLK_EN] = 1. */
+ BDK_CSR_MODIFY(c, node, BDK_UAAX_UCTL_CTL(uart), c.s.csclk_en = 1);
+
+ /* 7. Initialize the integer and fractional baud rate divider registers
+ UARTIBRD and UARTFBRD as follows:
+ a. Baud Rate Divisor = UARTCLK/(16xBaud Rate) = BRDI + BRDF
+ b. The fractional register BRDF, m is calculated as integer(BRDF x 64 + 0.5)
+ Example calculation:
+ If the required baud rate is 230400 and hclk = 4MHz then:
+ Baud Rate Divisor = (4x10^6)/(16x230400) = 1.085
+ This means BRDI = 1 and BRDF = 0.085.
+ Therefore, fractional part, BRDF = integer((0.085x64)+0.5) = 5
+ Generated baud rate divider = 1+5/64 = 1.078 */
+ uint64_t divisor_x_64 = bdk_clock_get_rate(node, BDK_CLOCK_SCLK) / (baudrate * 16 * 6 / 64);
+ if (bdk_is_platform(BDK_PLATFORM_EMULATOR))
+ {
+ /* The hardware emulator currently fixes the uart at a fixed rate */
+ divisor_x_64 = 64;
+ }
+ BDK_CSR_MODIFY(c, node, BDK_UAAX_IBRD(uart),
+ c.s.baud_divint = divisor_x_64 >> 6);
+ BDK_CSR_MODIFY(c, node, BDK_UAAX_FBRD(uart),
+ c.s.baud_divfrac = divisor_x_64 & 0x3f);
+
+ /* 8. Program the line control register UAA(0..1)_LCR_H and the control
+ register UAA(0..1)_CR */
+ BDK_CSR_MODIFY(c, node, BDK_UAAX_LCR_H(uart),
+ c.s.sps = 0; /* No parity */
+ c.s.wlen = 3; /* 8 bits */
+ c.s.fen = 1; /* FIFOs enabled */
+ c.s.stp2 = 0; /* Use one stop bit, not two */
+ c.s.eps = 0; /* No parity */
+ c.s.pen = 0; /* No parity */
+ c.s.brk = 0); /* Don't send a break */
+ BDK_CSR_MODIFY(c, node, BDK_UAAX_CR(uart),
+ c.s.ctsen = use_flow_control;
+ c.s.rtsen = use_flow_control;
+ c.s.out1 = 1; /* Drive data carrier detect */
+ c.s.rts = 0; /* Don't override RTS */
+ c.s.dtr = 0; /* Don't override DTR */
+ c.s.rxe = 1; /* Enable receive */
+ c.s.txe = 1; /* Enable transmit */
+ c.s.lbe = 0; /* Disable loopback */
+ c.s.uarten = 1); /* Enable uart */
+}
+
+/**
+ * First C code run when a BDK application starts. It is called by bdk-start.S.
+ *
+ * @param image_crc A CRC32 of the entire image before any variables might have been updated by C.
+ * This should match the CRC32 in the image header.
+ * @param reg_x0 The contents of the X0 register when the image started. In images loaded after
+ * the boot stub, this contains a "environment" string containing "BOARD=xxx". The
+ * use of this is deprecated as it has been replaced with a expandable device tree
+ * in X1.
+ * @param reg_x1 The contents of the X1 register when the image started. For all images after the
+ * boot stub, this contains a physical address of a device tree in memory. This
+ * should be used by all images to identify and configure the board we are running
+ * on.
+ * @param reg_pc This is the PC the code started at before relocation. This is useful for
+ * the first stage to determine if it from trusted or non-trusted code.
+ */
+void __bdk_init(uint32_t image_crc, uint64_t reg_x0, uint64_t reg_x1, uint64_t reg_pc) __attribute((noreturn));
+void __bdk_init(uint32_t image_crc, uint64_t reg_x0, uint64_t reg_x1, uint64_t reg_pc)
+{
+ extern void __bdk_exception_current_el_sync_sp0();
+ BDK_MSR(VBAR_EL3, __bdk_exception_current_el_sync_sp0);
+ BDK_MSR(VBAR_EL2, __bdk_exception_current_el_sync_sp0);
+ BDK_MSR(VBAR_EL1, __bdk_exception_current_el_sync_sp0);
+
+ /* Use Cavium specific function to change memory to normal instead of
+ device attributes. DCVA47=1 makes unmapped addresses behave as
+ non-shared memory (not inner or outer shared in ARM speak) */
+ bdk_ap_cvmmemctl0_el1_t cvmmemctl0_el1;
+ BDK_MRS(s3_0_c11_c0_4, cvmmemctl0_el1.u);
+ cvmmemctl0_el1.s.dcva47 = 1;
+ BDK_MSR(s3_0_c11_c0_4, cvmmemctl0_el1.u);
+
+
+ /* Setup running with no mmu */
+ bdk_ap_sctlr_el3_t sctlr_el3;
+ BDK_MRS(SCTLR_EL3, sctlr_el3.u);
+ sctlr_el3.s.wxn = 0; /* No write perm changes */
+ sctlr_el3.s.i = 1; /* Enable Icache */
+ sctlr_el3.s.sa = 1; /* Enable stack alignment checking */
+ sctlr_el3.s.cc = 1; /* Enable Dcache */
+ sctlr_el3.s.aa = 0; /* Allow unaligned accesses */
+ sctlr_el3.s.m = 0; /* Disable MMU */
+ BDK_MSR(SCTLR_EL3, sctlr_el3.u);
+
+ bdk_node_t node = bdk_numa_local();
+ bdk_numa_set_exists(node);
+
+ /* Default color, Reset scroll region and goto bottom */
+ static const char BANNER_1[] = "\33[0m\33[1;r\33[100;1H"
+ "\n\n\nCavium SOC\n";
+ static const char BANNER_2[] = "Locking L2 cache\n";
+ static const char BANNER_CRC_RIGHT[] = "PASS: CRC32 verification\n";
+ static const char BANNER_CRC_WRONG[] = "FAIL: CRC32 verification\n";
+ static const char BANNER_3[] = "Transferring to thread scheduler\n";
+
+ BDK_MSR(TPIDR_EL3, 0);
+
+ if (bdk_is_boot_core())
+ {
+ /* Initialize the platform */
+ __bdk_platform_init();
+ if (!bdk_is_platform(BDK_PLATFORM_EMULATOR) && CAVIUM_IS_MODEL(CAVIUM_CN88XX))
+ {
+ BDK_CSR_INIT(l2c_oci_ctl, node, BDK_L2C_OCI_CTL);
+ if (l2c_oci_ctl.s.iofrcl)
+ {
+ /* CCPI isn't being used, so don't reset if the links change */
+ BDK_CSR_WRITE(node, BDK_RST_OCX, 0);
+ BDK_CSR_READ(node, BDK_RST_OCX);
+ /* Force CCPI links down so they aren't trying to run while
+ we're configuring the QLMs */
+ __bdk_init_ccpi_early(1);
+ }
+ }
+
+ /* AP-23192: The DAP in pass 1.0 has an issue where its state isn't cleared for
+ cores in reset. Put the DAPs in reset as their associated cores are
+ also in reset */
+ if (!bdk_is_platform(BDK_PLATFORM_EMULATOR) && CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_0))
+ BDK_CSR_WRITE(node, BDK_RST_DBG_RESET, BDK_CSR_READ(node, BDK_RST_PP_RESET));
+
+ /* Enable the timer */
+ BDK_MSR(CNTFRQ_EL0, BDK_GTI_RATE); /* Needed for Asim */
+ bdk_clock_setup(node);
+
+ /* Only setup the uarts if they haven't been already setup */
+ BDK_CSR_INIT(uctl_ctl0, node, BDK_UAAX_UCTL_CTL(0));
+ if (!uctl_ctl0.s.h_clk_en)
+ bdk_set_baudrate(node, 0, BDK_UART_BAUDRATE, 0);
+ BDK_CSR_INIT(uctl_ctl1, node, BDK_UAAX_UCTL_CTL(1));
+ if (!uctl_ctl1.s.h_clk_en)
+ bdk_set_baudrate(node, 1, BDK_UART_BAUDRATE, 0);
+
+ __bdk_fs_init_early();
+ if (BDK_SHOW_BOOT_BANNERS)
+ write(1, BANNER_1, sizeof(BANNER_1)-1);
+
+ /* Only lock L2 if DDR3 isn't initialized */
+ if (bdk_is_platform(BDK_PLATFORM_HW) && !__bdk_is_dram_enabled(node))
+ {
+ if (BDK_TRACE_ENABLE_INIT)
+ write(1, BANNER_2, sizeof(BANNER_2)-1);
+ /* Lock the entire cache for chips with less than 4MB of
+ L2/LLC. Larger chips can use the 1/4 of the cache to
+ speed up DRAM init and testing */
+ int lock_size = bdk_l2c_get_cache_size_bytes(node);
+ if (lock_size >= (4 << 20))
+ lock_size = lock_size * 3 / 4;
+ bdk_l2c_lock_mem_region(node, bdk_numa_get_address(node, 0), lock_size);
+ /* The locked region isn't considered dirty by L2. Do read
+ read/write of each cache line to force each to be dirty. This
+ is needed across the whole line to make sure the L2 dirty bits
+ are all up to date */
+ volatile uint64_t *ptr = bdk_phys_to_ptr(bdk_numa_get_address(node, 8));
+ /* The above pointer got address 8 to avoid NULL pointer checking
+ in bdk_phys_to_ptr(). Correct it here */
+ ptr--;
+ uint64_t *end = bdk_phys_to_ptr(bdk_numa_get_address(node, bdk_l2c_get_cache_size_bytes(node)));
+ while (ptr < end)
+ {
+ *ptr = *ptr;
+ ptr++;
+ }
+ /* The above locking will cause L2 to load zeros without DRAM setup.
+ This will cause L2C_TADX_INT[rddislmc], which we suppress below */
+ BDK_CSR_DEFINE(l2c_tadx_int, BDK_L2C_TADX_INT_W1C(0));
+ l2c_tadx_int.u = 0;
+ l2c_tadx_int.s.wrdislmc = 1;
+ l2c_tadx_int.s.rddislmc = 1;
+ l2c_tadx_int.s.rdnxm = 1;
+
+ BDK_CSR_WRITE(node, BDK_L2C_TADX_INT_W1C(0), l2c_tadx_int.u);
+ if (CAVIUM_IS_MODEL(CAVIUM_CN88XX) || CAVIUM_IS_MODEL(CAVIUM_CN83XX))
+ {
+ BDK_CSR_WRITE(node, BDK_L2C_TADX_INT_W1C(1), l2c_tadx_int.u);
+ BDK_CSR_WRITE(node, BDK_L2C_TADX_INT_W1C(2), l2c_tadx_int.u);
+ BDK_CSR_WRITE(node, BDK_L2C_TADX_INT_W1C(3), l2c_tadx_int.u);
+ }
+ if (CAVIUM_IS_MODEL(CAVIUM_CN88XX))
+ {
+ BDK_CSR_WRITE(node, BDK_L2C_TADX_INT_W1C(4), l2c_tadx_int.u);
+ BDK_CSR_WRITE(node, BDK_L2C_TADX_INT_W1C(5), l2c_tadx_int.u);
+ BDK_CSR_WRITE(node, BDK_L2C_TADX_INT_W1C(6), l2c_tadx_int.u);
+ BDK_CSR_WRITE(node, BDK_L2C_TADX_INT_W1C(7), l2c_tadx_int.u);
+ }
+ }
+
+ /* Validate the image CRC */
+ extern void _start();
+ uint32_t *ptr_crc32 = (uint32_t *)(_start + 16);
+ uint32_t correct_crc = bdk_le32_to_cpu(*ptr_crc32);
+ if (correct_crc == image_crc)
+ write(1, BANNER_CRC_RIGHT, sizeof(BANNER_CRC_RIGHT) - 1);
+ else
+ write(1, BANNER_CRC_WRONG, sizeof(BANNER_CRC_WRONG) - 1);
+
+ if (BDK_TRACE_ENABLE_INIT)
+ write(1, BANNER_3, sizeof(BANNER_3)-1);
+ bdk_thread_initialize();
+ }
+
+ /* Enable the core timer */
+ BDK_MSR(CNTFRQ_EL0, BDK_GTI_RATE); /* Needed for Asim */
+ bdk_ap_cntps_ctl_el1_t cntps_ctl_el1;
+ cntps_ctl_el1.u = 0;
+ cntps_ctl_el1.s.imask = 1;
+ cntps_ctl_el1.s.enable = 1;
+ BDK_MSR(CNTPS_CTL_EL1, cntps_ctl_el1.u);
+
+ /* Setup an exception stack in case we crash */
+ int EX_STACK_SIZE = 16384;
+ void *exception_stack = malloc(EX_STACK_SIZE);
+ extern void __bdk_init_exception_stack(void *ptr);
+ __bdk_init_exception_stack(exception_stack + EX_STACK_SIZE);
+
+ bdk_atomic_add64(&__bdk_alive_coremask[node], bdk_core_to_mask());
+
+ /* Record our input registers for use later */
+ __bdk_init_reg_x0 = reg_x0;
+ __bdk_init_reg_x1 = reg_x1;
+ __bdk_init_reg_pc = reg_pc;
+ bdk_thread_first(__bdk_init_main, 0, NULL, 0);
+}
+
+/**
+ * Call this function to take secondary cores out of reset and have
+ * them start running threads
+ *
+ * @param node Node to use in a Numa setup. Can be an exact ID or a special
+ * value.
+ * @param coremask Cores to start. Zero is a shortcut for all.
+ *
+ * @return Zero on success, negative on failure.
+ */
+int bdk_init_cores(bdk_node_t node, uint64_t coremask)
+{
+ extern void __bdk_start_cores();
+ if (CAVIUM_IS_MODEL(CAVIUM_CN8XXX))
+ {
+ /* Write the address of the main entry point */
+ BDK_TRACE(INIT, "N%d: Setting address for boot jump\n", node);
+ BDK_CSR_WRITE(node, BDK_MIO_BOOT_AP_JUMP, (uint64_t)__bdk_start_cores);
+ }
+ else
+ {
+ BDK_TRACE(INIT, "N%d: Setting ROM boot code\n", node);
+ /* Assembly for ROM memory:
+ d508711f ic ialluis
+ d503201f nop
+ 58000040 ldr x0, 328 <branch_addr>
+ d61f0000 br x0
+ branch_addr:
+ Memory is little endain, so 64 bit constants have the first
+ instruction in the low word */
+ BDK_CSR_WRITE(node, BDK_ROM_MEMX(0), 0xd503201fd508711f);
+ BDK_CSR_WRITE(node, BDK_ROM_MEMX(1), 0xd61f000058000040);
+ BDK_CSR_WRITE(node, BDK_ROM_MEMX(2), (uint64_t)__bdk_start_cores);
+ }
+
+ /* Choose all cores by default */
+ if (coremask == 0)
+ coremask = -1;
+
+ /* Limit to the cores that aren't already running */
+ coremask &= ~__bdk_alive_coremask[node];
+
+ /* Limit to the cores that are specified in configuration menu */
+ uint64_t config_coremask = bdk_config_get_int(BDK_CONFIG_COREMASK);
+ if (config_coremask)
+ coremask &= config_coremask;
+
+ /* Limit to the cores that exist */
+ coremask &= (1ull<<bdk_get_num_cores(node)) - 1;
+
+ uint64_t reset = BDK_CSR_READ(node, BDK_RST_PP_RESET);
+ BDK_TRACE(INIT, "N%d: Cores currently in reset: 0x%lx\n", node, reset);
+ uint64_t need_reset_off = reset & coremask;
+ if (need_reset_off)
+ {
+ BDK_TRACE(INIT, "N%d: Taking cores out of reset (0x%lx)\n", node, need_reset_off);
+ BDK_CSR_WRITE(node, BDK_RST_PP_RESET, reset & ~need_reset_off);
+ /* Wait for cores to finish coming out of reset */
+ bdk_wait_usec(1);
+ if (BDK_CSR_WAIT_FOR_FIELD(node, BDK_RST_PP_PENDING, pend, ==, 0, 100000))
+ bdk_error("Timeout wating for reset pending to clear");
+ /* AP-23192: The DAP in pass 1.0 has an issue where its state isn't cleared for
+ cores in reset. Put the DAPs in reset as their associated cores are
+ also in reset */
+ if (!bdk_is_platform(BDK_PLATFORM_EMULATOR) && CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_0))
+ BDK_CSR_WRITE(node, BDK_RST_DBG_RESET, reset & ~need_reset_off);
+ }
+
+ BDK_TRACE(INIT, "N%d: Wait up to 1s for the cores to boot\n", node);
+ uint64_t timeout = bdk_clock_get_rate(bdk_numa_local(), BDK_CLOCK_TIME) + bdk_clock_get_count(BDK_CLOCK_TIME);
+ while ((bdk_clock_get_count(BDK_CLOCK_TIME) < timeout) && ((bdk_atomic_get64(&__bdk_alive_coremask[node]) & coremask) != coremask))
+ {
+ /* Tight spin, no thread schedules */
+ }
+
+ if ((bdk_atomic_get64(&__bdk_alive_coremask[node]) & coremask) != coremask)
+ {
+ bdk_error("Node %d: Some cores failed to start. Alive mask 0x%lx, requested 0x%lx\n",
+ node, __bdk_alive_coremask[node], coremask);
+ return -1;
+ }
+ BDK_TRACE(INIT, "N%d: All cores booted\n", node);
+ return 0;
+}
+
+/**
+ * Put cores back in reset and power them down
+ *
+ * @param node Node to update
+ * @param coremask Each bit will be a core put in reset. Cores already in reset are unaffected
+ *
+ * @return Zero on success, negative on failure
+ */
+int bdk_reset_cores(bdk_node_t node, uint64_t coremask)
+{
+ extern void __bdk_reset_thread(int arg1, void *arg2);
+
+ /* Limit to the cores that exist */
+ coremask &= (1ull<<bdk_get_num_cores(node)) - 1;
+
+ /* Update which cores are in reset */
+ uint64_t reset = BDK_CSR_READ(node, BDK_RST_PP_RESET);
+ BDK_TRACE(INIT, "N%d: Cores currently in reset: 0x%lx\n", node, reset);
+ coremask &= ~reset;
+ BDK_TRACE(INIT, "N%d: Cores to put into reset: 0x%lx\n", node, coremask);
+
+ /* Check if everything is already done */
+ if (coremask == 0)
+ return 0;
+
+ int num_cores = bdk_get_num_cores(node);
+ for (int core = 0; core < num_cores; core++)
+ {
+ uint64_t my_mask = 1ull << core;
+ /* Skip cores not in mask */
+ if ((coremask & my_mask) == 0)
+ continue;
+ BDK_TRACE(INIT, "N%d: Telling core %d to go into reset\n", node, core);
+ if (bdk_thread_create(node, my_mask, __bdk_reset_thread, 0, NULL, 0))
+ {
+ bdk_error("Failed to create thread for putting core in reset");
+ continue;
+ }
+ /* Clear the core in the alive mask */
+ bdk_atomic_fetch_and_bclr64_nosync((uint64_t*)&__bdk_alive_coremask[node], my_mask);
+ }
+
+ BDK_TRACE(INIT, "N%d: Waiting for all reset bits to be set\n", node);
+ uint64_t timeout = bdk_clock_get_rate(bdk_numa_local(), BDK_CLOCK_TIME) + bdk_clock_get_count(BDK_CLOCK_TIME);
+ while (bdk_clock_get_count(BDK_CLOCK_TIME) < timeout)
+ {
+ reset = BDK_CSR_READ(node, BDK_RST_PP_RESET);
+ if ((reset & coremask) == coremask)
+ break;
+ bdk_thread_yield();
+ }
+ /* AP-23192: The DAP in pass 1.0 has an issue where its state isn't cleared for
+ cores in reset. Put the DAPs in reset as their associated cores are
+ also in reset */
+ if (!bdk_is_platform(BDK_PLATFORM_EMULATOR) && CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_0))
+ BDK_CSR_WRITE(node, BDK_RST_DBG_RESET, BDK_CSR_READ(node, BDK_RST_PP_RESET));
+
+ BDK_TRACE(INIT, "N%d: Cores now in reset: 0x%lx\n", node, reset);
+
+ return ((reset & coremask) == coremask) ? 0 : -1;
+}
+
+/**
+ * Call this function to take secondary nodes and cores out of
+ * reset and have them start running threads
+ *
+ * @param skip_cores If non-zero, cores are not started. Only the nodes are setup
+ * @param ccpi_sw_gbaud
+ * If CCPI is in software mode, this is the speed the CCPI QLMs will be configured
+ * for
+ *
+ * @return Zero on success, negative on failure.
+ */
+int bdk_init_nodes(int skip_cores, int ccpi_sw_gbaud)
+{
+ int result = 0;
+ int do_oci_init = (__bdk_init_ccpi_links != NULL);
+
+ /* Only init OCI/CCPI on chips that support it */
+ do_oci_init &= CAVIUM_IS_MODEL(CAVIUM_CN88XX);
+
+ /* Check that the BDK config says multi-node is enabled */
+ if (bdk_config_get_int(BDK_CONFIG_MULTI_NODE) == 0)
+ do_oci_init = 0;
+
+ /* Simulation under Asim is a special case. Multi-node is simulaoted, but
+ not the details of the low level link */
+ if (do_oci_init && bdk_is_platform(BDK_PLATFORM_ASIM))
+ {
+ bdk_numa_set_exists(0);
+ bdk_numa_set_exists(1);
+ /* Skip the rest in simulation */
+ do_oci_init = 0;
+ }
+
+ if (do_oci_init)
+ {
+ if (__bdk_init_ccpi_links(ccpi_sw_gbaud) == 0)
+ {
+ /* Don't run node init if L2C_OCI_CTL shows that it has already
+ been done */
+ BDK_CSR_INIT(l2c_oci_ctl, bdk_numa_local(), BDK_L2C_OCI_CTL);
+ if (l2c_oci_ctl.s.enaoci == 0)
+ result |= __bdk_init_ccpi_multinode();
+ }
+ }
+
+ /* Start cores on all node unless it was disabled */
+ if (!skip_cores)
+ {
+ for (bdk_node_t node=0; node<BDK_NUMA_MAX_NODES; node++)
+ {
+ if (bdk_numa_exists(node))
+ result |= bdk_init_cores(node, 0);
+ }
+ }
+ return result;
+}
+
+/**
+ * Get the coremask of the cores actively running the BDK. Doesn't count cores
+ * that aren't booted.
+ *
+ * @param node Node to coremask the count for
+ *
+ * @return 64bit bitmask
+ */
+uint64_t bdk_get_running_coremask(bdk_node_t node)
+{
+ return __bdk_alive_coremask[node];
+}
+
+/**
+ * Return the number of cores actively running in the BDK for the given node.
+ * Not an inline so it can be called from LUA.
+ *
+ * @param node Node to get the core count for
+ *
+ * @return Number of cores running. Doesn't count cores that aren't booted
+ */
+int bdk_get_num_running_cores(bdk_node_t node)
+{
+ return __builtin_popcountl(bdk_get_running_coremask(node));
+}
+
diff --git a/src/vendorcode/cavium/bdk/libbdk-os/bdk-thread.c b/src/vendorcode/cavium/bdk/libbdk-os/bdk-thread.c
new file mode 100644
index 0000000000..df1d02864b
--- /dev/null
+++ b/src/vendorcode/cavium/bdk/libbdk-os/bdk-thread.c
@@ -0,0 +1,384 @@
+/***********************license start***********************************
+* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights
+* reserved.
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are
+* met:
+*
+* * Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials provided
+* with the distribution.
+*
+* * Neither the name of Cavium Inc. nor the names of
+* its contributors may be used to endorse or promote products
+* derived from this software without specific prior written
+* permission.
+*
+* This Software, including technical data, may be subject to U.S. export
+* control laws, including the U.S. Export Administration Act and its
+* associated regulations, and may be subject to export or import
+* regulations in other countries.
+*
+* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
+* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR
+* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT
+* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY
+* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT
+* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES
+* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR
+* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT,
+* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK
+* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
+***********************license end**************************************/
+#include <bdk.h>
+#include <stdio.h>
+#include <malloc.h>
+
+#define STACK_CANARY 0x0BADBADBADBADBADull
+
+typedef struct bdk_thread
+{
+ struct bdk_thread *next;
+ uint64_t coremask;
+ uint64_t gpr[32]; /* Reg 31 is SP */
+ struct _reent lib_state;
+ uint64_t stack_canary;
+ uint64_t stack[0];
+} bdk_thread_t;
+
+typedef struct
+{
+ bdk_thread_t* head;
+ bdk_thread_t* tail;
+ bdk_spinlock_t lock;
+ int64_t __padding1[16-3]; /* Stats in different cache line for speed */
+ int64_t stat_num_threads;
+ int64_t stat_no_schedulable_threads;
+ int64_t stat_next_calls;
+ int64_t stat_next_walks;
+ int64_t __padding2[16-4];
+} bdk_thread_node_t;
+
+static bdk_thread_node_t bdk_thread_node[BDK_NUMA_MAX_NODES];
+
+extern void __bdk_thread_switch(bdk_thread_t* next_context, int delete_old);
+
+/**
+ * Main thread body for all threads
+ *
+ * @param func User function to call
+ * @param arg0 First argument to the user function
+ * @param arg1 Second argument to the user function
+ */
+static void __bdk_thread_body(bdk_thread_func_t func, int arg0, void *arg1)
+{
+ func(arg0, arg1);
+ bdk_thread_destroy();
+}
+
+
+/**
+ * Initialize the BDK thread library
+ *
+ * @return Zero on success, negative on failure
+ */
+int bdk_thread_initialize(void)
+{
+ bdk_zero_memory(bdk_thread_node, sizeof(bdk_thread_node));
+ _REENT_INIT_PTR(&__bdk_thread_global_reent);
+ return 0;
+}
+
+static bdk_thread_t *__bdk_thread_next(void)
+{
+ bdk_thread_node_t *t_node = &bdk_thread_node[bdk_numa_local()];
+ uint64_t coremask = bdk_core_to_mask();
+
+ bdk_atomic_add64_nosync(&t_node->stat_next_calls, 1);
+ bdk_thread_t *prev = NULL;
+ bdk_thread_t *next = t_node->head;
+ int walks = 0;
+ while (next && !(next->coremask & coremask))
+ {
+ prev = next;
+ next = next->next;
+ walks++;
+ }
+ if (walks)
+ bdk_atomic_add64_nosync(&t_node->stat_next_walks, walks);
+
+ if (next)
+ {
+ if (t_node->tail == next)
+ t_node->tail = prev;
+ if (prev)
+ prev->next = next->next;
+ else
+ t_node->head = next->next;
+ next->next = NULL;
+ }
+ else
+ bdk_atomic_add64_nosync(&t_node->stat_no_schedulable_threads, 1);
+
+ return next;
+}
+
+/**
+ * Yield the current thread and run a new one
+ */
+void bdk_thread_yield(void)
+{
+ if (BDK_DBG_MAGIC_ENABLE && (bdk_numa_local() == bdk_numa_master()))
+ bdk_dbg_check_magic();
+ bdk_thread_node_t *t_node = &bdk_thread_node[bdk_numa_local()];
+ bdk_thread_t *current;
+ BDK_MRS_NV(TPIDR_EL3, current);
+
+ /* Yield can be called without a thread context during core init. The
+ cores call bdk_wait_usec(), which yields. In this case yielding
+ does nothing */
+ if (bdk_unlikely(!current))
+ return;
+
+ if (bdk_unlikely(current->stack_canary != STACK_CANARY))
+ bdk_fatal("bdk_thread_yield() detected a stack overflow\n");
+
+ if (CAVIUM_IS_MODEL(CAVIUM_CN83XX))
+ bdk_sso_process_work();
+
+ if (t_node->head == NULL)
+ return;
+
+ bdk_spinlock_lock(&t_node->lock);
+
+ /* Find the first thread that can run on this core */
+ bdk_thread_t *next = __bdk_thread_next();
+
+ /* If next is NULL then there are no other threads ready to run and we
+ will continue without doing anything */
+ if (next)
+ {
+ __bdk_thread_switch(next, 0);
+ /* Unlock performed in __bdk_thread_switch_complete */
+ return;
+ }
+ bdk_spinlock_unlock(&t_node->lock);
+}
+
+
+/**
+ * Create a new thread and return it. The thread will not be scheduled
+ * as it isn't put in the thread list.
+ *
+ * @param coremask Mask of cores the thread can run on. Each set bit is an allowed
+ * core. Zero and -1 are both shortcuts for all cores.
+ * @param func Function to run as a thread
+ * @param arg0 First argument to the function
+ * @param arg1 Second argument to the function
+ * @param stack_size Stack size for the new thread. Set to zero for the system default.
+ *
+ * @return Thread or NULL on failure
+ */
+static void *__bdk_thread_create(uint64_t coremask, bdk_thread_func_t func, int arg0, void *arg1, int stack_size)
+{
+ bdk_thread_t *thread;
+ if (!stack_size)
+ stack_size = BDK_THREAD_DEFAULT_STACK_SIZE;
+
+ thread = memalign(16, sizeof(bdk_thread_t) + stack_size);
+ if (thread == NULL)
+ {
+ bdk_error("Unable to allocate memory for new thread\n");
+ return NULL;
+ }
+ memset(thread, 0, sizeof(bdk_thread_t) + stack_size);
+ if (coremask == 0)
+ coremask = -1;
+ thread->coremask = coremask;
+ thread->gpr[0] = (uint64_t)func; /* x0 = Argument 0 to __bdk_thread_body */
+ thread->gpr[1] = arg0; /* x1 = Argument 1 to __bdk_thread_body */
+ thread->gpr[2] = (uint64_t)arg1; /* x2 = Argument 2 to __bdk_thread_body */
+ thread->gpr[29] = 0; /* x29 = Frame pointer */
+ thread->gpr[30] = (uint64_t)__bdk_thread_body; /* x30 = Link register */
+ thread->gpr[31] = (uint64_t)thread->stack + stack_size; /* x31 = Stack pointer */
+ if (thread->gpr[31] & 0xf)
+ bdk_fatal("Stack not aligned 0x%lx\n", thread->gpr[31]);
+ _REENT_INIT_PTR(&thread->lib_state);
+ extern void __sinit(struct _reent *);
+ __sinit(&thread->lib_state);
+ thread->stack_canary = STACK_CANARY;
+ thread->next = NULL;
+ return thread;
+}
+
+
+/**
+ * Create a new thread. The thread may be scheduled to any of the
+ * cores supplied in the coremask. Note that a single thread is
+ * created and may only run on one core at a time. The thread may
+ * not start executing until the next yield call if all cores in
+ * the coremask are currently busy.
+ *
+ * @param node Node to use in a Numa setup. Can be an exact ID or a
+ * special value.
+ * @param coremask Mask of cores the thread can run on. Each set bit is an allowed
+ * core. Zero and -1 are both shortcuts for all cores.
+ * @param func Function to run as a thread
+ * @param arg0 First argument to the function
+ * @param arg1 Second argument to the function
+ * @param stack_size Stack size for the new thread. Set to zero for the system default.
+ *
+ * @return Zero on success, negative on failure
+ */
+int bdk_thread_create(bdk_node_t node, uint64_t coremask, bdk_thread_func_t func, int arg0, void *arg1, int stack_size)
+{
+ bdk_thread_node_t *t_node = &bdk_thread_node[node];
+ bdk_thread_t *thread = __bdk_thread_create(coremask, func, arg0, arg1, stack_size);
+ if (thread == NULL)
+ return -1;
+
+ bdk_atomic_add64_nosync(&t_node->stat_num_threads, 1);
+ bdk_spinlock_lock(&t_node->lock);
+ if (t_node->tail)
+ t_node->tail->next = thread;
+ else
+ t_node->head = thread;
+ t_node->tail = thread;
+ bdk_spinlock_unlock(&t_node->lock);
+ BDK_SEV;
+ return 0;
+}
+
+
+/**
+ * Destroy the currently running thread. This never returns.
+ */
+void bdk_thread_destroy(void)
+{
+ bdk_thread_node_t *t_node = &bdk_thread_node[bdk_numa_local()];
+ bdk_thread_t *current;
+ BDK_MRS_NV(TPIDR_EL3, current);
+ if (bdk_unlikely(!current))
+ bdk_fatal("bdk_thread_destroy() called without thread context\n");
+ if (bdk_unlikely(current->stack_canary != STACK_CANARY))
+ bdk_fatal("bdk_thread_destroy() detected a stack overflow\n");
+
+ fflush(NULL);
+ bdk_atomic_add64_nosync(&t_node->stat_num_threads, -1);
+
+ while (1)
+ {
+ if (BDK_DBG_MAGIC_ENABLE && (bdk_numa_local() == bdk_numa_master()))
+ bdk_dbg_check_magic();
+ if (t_node->head)
+ {
+ bdk_spinlock_lock(&t_node->lock);
+ /* Find the first thread that can run on this core */
+ bdk_thread_t *next = __bdk_thread_next();
+
+ /* If next is NULL then there are no other threads ready to run and we
+ will continue without doing anything */
+ if (next)
+ {
+ __bdk_thread_switch(next, 1);
+ bdk_fatal("bdk_thread_destroy() should never get here\n");
+ }
+ bdk_spinlock_unlock(&t_node->lock);
+ }
+ if (CAVIUM_IS_MODEL(CAVIUM_CN83XX))
+ bdk_sso_process_work();
+ BDK_WFE;
+ }
+}
+
+struct _reent __bdk_thread_global_reent;
+struct _reent *__bdk_thread_getreent(void)
+{
+ bdk_thread_t *current;
+ BDK_MRS_NV(TPIDR_EL3, current);
+ if (current)
+ return &current->lib_state;
+ else
+ return &__bdk_thread_global_reent;
+}
+
+void __bdk_thread_switch_complete(bdk_thread_t* old_context, int delete_old)
+{
+ bdk_thread_node_t *t_node = &bdk_thread_node[bdk_numa_local()];
+ if (bdk_unlikely(delete_old))
+ {
+ bdk_spinlock_unlock(&t_node->lock);
+ free(old_context);
+ }
+ else
+ {
+ if (bdk_likely(old_context))
+ {
+ if (t_node->tail)
+ t_node->tail->next = old_context;
+ else
+ t_node->head = old_context;
+ t_node->tail = old_context;
+ }
+ bdk_spinlock_unlock(&t_node->lock);
+ if (bdk_likely(old_context))
+ BDK_SEV;
+ }
+}
+
+
+/**
+ * Called to create the initial thread for a CPU. Must be called
+ * once for each CPU.
+ *
+ * @param func Function to run as new thread. It is guaranteed that this will
+ * be the next thread run by the core.
+ * @param arg0 First thread argument
+ * @param arg1 Second thread argument
+ * @param stack_size Initial stack size, or zero for the default
+ */
+void bdk_thread_first(bdk_thread_func_t func, int arg0, void *arg1, int stack_size)
+{
+ bdk_thread_node_t *t_node = &bdk_thread_node[bdk_numa_local()];
+ void *thread = __bdk_thread_create(bdk_core_to_mask(), func, arg0, arg1, stack_size);
+ if (thread)
+ {
+ bdk_atomic_add64_nosync(&t_node->stat_num_threads, 1);
+ bdk_spinlock_lock(&t_node->lock);
+ __bdk_thread_switch(thread, 0);
+ }
+ bdk_fatal("Create of __bdk_init_main thread failed\n");
+}
+
+/**
+ * Display statistics about the number of threads and scheduling
+ */
+void bdk_thread_show_stats()
+{
+ for (bdk_node_t node = BDK_NODE_0; node < BDK_NUMA_MAX_NODES; node++)
+ {
+ if (!bdk_numa_exists(node))
+ continue;
+ bdk_thread_node_t *t_node = &bdk_thread_node[node];
+ printf("Node %d\n", node);
+ printf(" Active threads: %ld\n", t_node->stat_num_threads);
+ printf(" Schedule checks: %ld\n", t_node->stat_next_calls);
+ int64_t div = t_node->stat_next_calls;
+ if (!div)
+ div = 1;
+ printf(" Average walk depth: %ld\n",
+ t_node->stat_next_walks / div);
+ printf(" Not switching: %ld (%ld%%)\n",
+ t_node->stat_no_schedulable_threads,
+ t_node->stat_no_schedulable_threads * 100 / div);
+ bdk_atomic_set64(&t_node->stat_next_calls, 0);
+ bdk_atomic_set64(&t_node->stat_next_walks, 0);
+ bdk_atomic_set64(&t_node->stat_no_schedulable_threads, 0);
+ }
+}
diff --git a/src/vendorcode/cavium/bdk/libbdk-trust/bdk-trust.c b/src/vendorcode/cavium/bdk/libbdk-trust/bdk-trust.c
new file mode 100644
index 0000000000..27c3294479
--- /dev/null
+++ b/src/vendorcode/cavium/bdk/libbdk-trust/bdk-trust.c
@@ -0,0 +1,286 @@
+/***********************license start***********************************
+* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights
+* reserved.
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are
+* met:
+*
+* * Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials provided
+* with the distribution.
+*
+* * Neither the name of Cavium Inc. nor the names of
+* its contributors may be used to endorse or promote products
+* derived from this software without specific prior written
+* permission.
+*
+* This Software, including technical data, may be subject to U.S. export
+* control laws, including the U.S. Export Administration Act and its
+* associated regulations, and may be subject to export or import
+* regulations in other countries.
+*
+* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
+* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR
+* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT
+* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY
+* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT
+* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES
+* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR
+* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT,
+* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK
+* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
+***********************license end**************************************/
+#include <bdk.h>
+#include "libbdk-arch/bdk-csrs-fusf.h"
+#include "libbdk-arch/bdk-csrs-rom.h"
+
+/* The define BDK_TRUST_HARD_BLOW_NV controls whether the BDK will
+ hard blow the secure NV counter on boot. This is needed for a
+ production system, but can be dangerous in a development
+ environment. The default value of 0 is to prevent bricking of
+ chips due to CSIB[NVCOUNT] mistakes. BDK_TRUST_HARD_BLOW_NV must
+ be changed to a 1 for production. The code below will display a
+ warning if BDK_TRUST_HARD_BLOW_NV=0 in a trusted boot to remind
+ you */
+#define BDK_TRUST_HARD_BLOW_NV 0
+
+/* The CSIB used to boot will be stored here by bsk-start.S */
+union bdk_rom_csib_s __bdk_trust_csib __attribute__((section("init")));
+static bdk_trust_level_t __bdk_trust_level = BDK_TRUST_LEVEL_BROKEN;
+
+/**
+ * Update the fused secure NV counter to reflect the CSIB[NVCOUNT] value. In
+ * production systems, be sure to set BDK_TRUST_HARD_BLOW_NV=1.
+ */
+static void __bdk_program_nv_counter(void)
+{
+ int hw_nv = bdk_trust_get_nv_counter();
+ int csib_nv = __bdk_trust_csib.s.nvcnt;
+
+ if (!BDK_TRUST_HARD_BLOW_NV)
+ {
+ printf("\33[1m"); /* Bold */
+ bdk_warn("\n");
+ bdk_warn("********************************************************\n");
+ bdk_warn("* Configured for soft blow of secure NV counter. This\n");
+ bdk_warn("* build is not suitable for production trusted boot.\n");
+ bdk_warn("********************************************************\n");
+ bdk_warn("\n");
+ printf("\33[0m"); /* Normal */
+ }
+
+ /* Check if the CSIB NV counter is less than the HW fused values.
+ This means the image is an old rollback. Refuse to run */
+ if (csib_nv < hw_nv)
+ bdk_fatal("CSIB[NVCOUNT] is less than FUSF_CTL[ROM_T_CNT]. Image rollback not allowed\n");
+ /* If the CSIB NV counter matches the HW fuses, everything is
+ good */
+ if (csib_nv == hw_nv)
+ return;
+ /* CSIB NV counter is larger than the HW fuses. We must blow
+ fuses to move the hardware counter forward, protecting from
+ image rollback */
+ if (BDK_TRUST_HARD_BLOW_NV)
+ {
+ BDK_TRACE(INIT, "Trust: Hard blow secure NV counter to %d\n", csib_nv);
+ uint64_t v = 1ull << BDK_FUSF_FUSE_NUM_E_ROM_T_CNTX(csib_nv - 1);
+ bdk_fuse_field_hard_blow(bdk_numa_master(), BDK_FUSF_FUSE_NUM_E_FUSF_LCK, v, 0);
+ }
+ else
+ {
+ BDK_TRACE(INIT, "Trust: Soft blow secure NV counter to %d\n", csib_nv);
+ bdk_fuse_field_soft_blow(bdk_numa_master(), BDK_FUSF_FUSE_NUM_E_ROM_T_CNTX(csib_nv - 1));
+ }
+}
+
+/**
+ * Called by boot stub (TBL1FW) to initialize the state of trust
+ */
+void __bdk_trust_init(void)
+{
+ extern uint64_t __bdk_init_reg_pc; /* The contents of PC when this image started */
+ const bdk_node_t node = bdk_numa_local();
+ volatile uint64_t *huk = bdk_phys_to_ptr(bdk_numa_get_address(node, BDK_FUSF_HUKX(0)));
+
+ /* Non-trusted boot address */
+ if (__bdk_init_reg_pc == 0x120000)
+ {
+ __bdk_trust_level = BDK_TRUST_LEVEL_NONE;
+ if (huk[0] | huk[1])
+ {
+ BDK_TRACE(INIT, "Trust: Initial image, Non-trusted boot with HUK\n");
+ goto fail_trust;
+ }
+ else
+ {
+ BDK_TRACE(INIT, "Trust: Initial image, Non-trusted boot without HUK\n");
+ goto skip_trust;
+ }
+ }
+
+ if (__bdk_init_reg_pc != 0x150000)
+ {
+ /* Not the first image */
+ BDK_CSR_INIT(rst_boot, node, BDK_RST_BOOT);
+ if (!rst_boot.s.trusted_mode)
+ {
+ __bdk_trust_level = BDK_TRUST_LEVEL_NONE;
+ BDK_TRACE(INIT, "Trust: Secondary image, non-trusted boot\n");
+ goto skip_trust;
+ }
+ int csibsize = 0;
+ const union bdk_rom_csib_s *csib = bdk_config_get_blob(&csibsize, BDK_CONFIG_TRUST_CSIB);
+ if (!csib)
+ {
+ __bdk_trust_level = BDK_TRUST_LEVEL_NONE;
+ BDK_TRACE(INIT, "Trust: Secondary image, non-trusted boot\n");
+ goto skip_trust;
+ }
+ if (csibsize != sizeof(__bdk_trust_csib))
+ {
+ BDK_TRACE(INIT, "Trust: Secondary image, Trusted boot with corrupt CSIB, trust broken\n");
+ goto fail_trust;
+ }
+ /* Record our trust level */
+ switch (csib->s.crypt)
+ {
+ case 0:
+ __bdk_trust_level = BDK_TRUST_LEVEL_SIGNED;
+ BDK_TRACE(INIT, "Trust: Secondary image, Trused boot, no encryption\n");
+ goto success_trust;
+ case 1:
+ __bdk_trust_level = BDK_TRUST_LEVEL_SIGNED_SSK;
+ BDK_TRACE(INIT, "Trust: Secondary image, Trused boot, SSK encryption\n");
+ goto success_trust;
+ case 2:
+ __bdk_trust_level = BDK_TRUST_LEVEL_SIGNED_BSSK;
+ BDK_TRACE(INIT, "Trust: Secondary image, Trused boot, BSSK encryption\n");
+ goto success_trust;
+ default:
+ __bdk_trust_level = BDK_TRUST_LEVEL_BROKEN;
+ BDK_TRACE(INIT, "Trust: Secondary image, Trusted boot, Corrupt CSIB[crypt], trust broken\n");
+ goto fail_trust;
+ }
+ }
+
+ /* Copy the Root of Trust public key out of the CSIB */
+ volatile uint64_t *rot_pub_key = bdk_key_alloc(node, 64);
+ if (!rot_pub_key)
+ {
+ __bdk_trust_level = BDK_TRUST_LEVEL_BROKEN;
+ BDK_TRACE(INIT, "Trust: Failed to allocate ROT memory, trust broken\n");
+ goto fail_trust;
+ }
+ rot_pub_key[0] = bdk_le64_to_cpu(__bdk_trust_csib.s.rotpk0);
+ rot_pub_key[1] = bdk_le64_to_cpu(__bdk_trust_csib.s.rotpk1);
+ rot_pub_key[2] = bdk_le64_to_cpu(__bdk_trust_csib.s.rotpk2);
+ rot_pub_key[3] = bdk_le64_to_cpu(__bdk_trust_csib.s.rotpk3);
+ rot_pub_key[4] = bdk_le64_to_cpu(__bdk_trust_csib.s.rotpk4);
+ rot_pub_key[5] = bdk_le64_to_cpu(__bdk_trust_csib.s.rotpk5);
+ rot_pub_key[6] = bdk_le64_to_cpu(__bdk_trust_csib.s.rotpk6);
+ rot_pub_key[7] = bdk_le64_to_cpu(__bdk_trust_csib.s.rotpk7);
+ bdk_config_set_int(bdk_ptr_to_phys((void*)rot_pub_key), BDK_CONFIG_TRUST_ROT_ADDR);
+ BDK_TRACE(INIT, "Trust: ROT %016lx %016lx %016lx %016lx %016lx %016lx %016lx %016lx\n",
+ bdk_cpu_to_be64(rot_pub_key[0]), bdk_cpu_to_be64(rot_pub_key[1]),
+ bdk_cpu_to_be64(rot_pub_key[2]), bdk_cpu_to_be64(rot_pub_key[3]),
+ bdk_cpu_to_be64(rot_pub_key[4]), bdk_cpu_to_be64(rot_pub_key[5]),
+ bdk_cpu_to_be64(rot_pub_key[6]), bdk_cpu_to_be64(rot_pub_key[7]));
+
+ /* Update the secure NV counter with the value in the CSIB */
+ __bdk_program_nv_counter();
+
+ /* Create the BSSK */
+ if (huk[0] | huk[1])
+ {
+ uint64_t iv[2] = {0, 0};
+ volatile uint64_t *bssk = bdk_key_alloc(node, 16);
+ if (!bssk)
+ {
+ __bdk_trust_level = BDK_TRUST_LEVEL_BROKEN;
+ BDK_TRACE(INIT, "Trust: Failed to allocate BSSK memory, trust broken\n");
+ goto fail_trust;
+ }
+ BDK_TRACE(INIT, "Trust: Calculating BSSK\n");
+ uint64_t tmp_bssk[2];
+ tmp_bssk[0] = __bdk_trust_csib.s.fs0;
+ tmp_bssk[1] = __bdk_trust_csib.s.fs1;
+ bdk_aes128cbc_decrypt((void*)huk, (void*)tmp_bssk, 16, iv);
+ bssk[0] = tmp_bssk[0];
+ bssk[1] = tmp_bssk[1];
+ tmp_bssk[0] = 0;
+ tmp_bssk[1] = 0;
+ bdk_config_set_int(bdk_ptr_to_phys((void*)bssk), BDK_CONFIG_TRUST_BSSK_ADDR);
+ //BDK_TRACE(INIT, "Trust: BSSK %016lx %016lx\n", bdk_cpu_to_be64(bssk[0]), bdk_cpu_to_be64(bssk[1]));
+ }
+
+ /* Record our trust level */
+ switch (__bdk_trust_csib.s.crypt)
+ {
+ case 0:
+ __bdk_trust_level = BDK_TRUST_LEVEL_SIGNED;
+ BDK_TRACE(INIT, "Trust: Trused boot, no encryption\n");
+ break;
+ case 1:
+ __bdk_trust_level = BDK_TRUST_LEVEL_SIGNED_SSK;
+ BDK_TRACE(INIT, "Trust: Trused boot, SSK encryption\n");
+ break;
+ case 2:
+ __bdk_trust_level = BDK_TRUST_LEVEL_SIGNED_BSSK;
+ BDK_TRACE(INIT, "Trust: Trused boot, BSSK encryption\n");
+ break;
+ default:
+ __bdk_trust_level = BDK_TRUST_LEVEL_BROKEN;
+ goto fail_trust;
+ }
+
+ /* We started at the trusted boot address, CSIB should be
+ valid */
+ bdk_config_set_blob(sizeof(__bdk_trust_csib), &__bdk_trust_csib, BDK_CONFIG_TRUST_CSIB);
+success_trust:
+ bdk_signed_load_public();
+ return;
+
+fail_trust:
+ /* Hide secrets */
+ BDK_CSR_MODIFY(c, node, BDK_RST_BOOT,
+ c.s.dis_huk = 1);
+ BDK_TRACE(INIT, "Trust: Secrets Hidden\n");
+skip_trust:
+ /* Erase CSIB as it is invalid */
+ memset(&__bdk_trust_csib, 0, sizeof(__bdk_trust_csib));
+ bdk_config_set_blob(0, NULL, BDK_CONFIG_TRUST_CSIB);
+}
+
+/**
+ * Returns the current level of trust. Must be called after
+ * __bdk_trust_init()
+ *
+ * @return Enumerated trsut level, see bdk_trust_level_t
+ */
+bdk_trust_level_t bdk_trust_get_level(void)
+{
+ return __bdk_trust_level;
+}
+
+/**
+ * Return the current secure NV counter stored in the fuses
+ *
+ * @return NV counter (0-31)
+ */
+int bdk_trust_get_nv_counter(void)
+{
+ /* Count leading zeros in FUSF_CTL[ROM_T_CNT] to dermine the
+ hardware NV value */
+ BDK_CSR_INIT(fusf_ctl, bdk_numa_master(), BDK_FUSF_CTL);
+ int hw_nv = 0;
+ if (fusf_ctl.s.rom_t_cnt)
+ hw_nv = 32 - __builtin_clz(fusf_ctl.s.rom_t_cnt);
+ return hw_nv;
+}
+
diff --git a/src/vendorcode/cavium/bdk/libdram/dram-csr.h b/src/vendorcode/cavium/bdk/libdram/dram-csr.h
new file mode 100644
index 0000000000..ffe1472a0b
--- /dev/null
+++ b/src/vendorcode/cavium/bdk/libdram/dram-csr.h
@@ -0,0 +1,86 @@
+/***********************license start***********************************
+* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights
+* reserved.
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are
+* met:
+*
+* * Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials provided
+* with the distribution.
+*
+* * Neither the name of Cavium Inc. nor the names of
+* its contributors may be used to endorse or promote products
+* derived from this software without specific prior written
+* permission.
+*
+* This Software, including technical data, may be subject to U.S. export
+* control laws, including the U.S. Export Administration Act and its
+* associated regulations, and may be subject to export or import
+* regulations in other countries.
+*
+* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
+* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR
+* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT
+* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY
+* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT
+* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES
+* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR
+* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT,
+* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK
+* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
+***********************license end**************************************/
+
+/**
+ * Functions and macros for libdram access to CSR. These build
+ * on the normal BDK functions to allow logging of CSRs based on
+ * the libdram verbosity level. Internal use only.
+ */
+
+/**
+ * Write a CSR, possibly logging it based on the verbosity
+ * level. You should use DRAM_CSR_WRITE() as a convientent
+ * wrapper.
+ *
+ * @param node
+ * @param csr_name
+ * @param type
+ * @param busnum
+ * @param size
+ * @param address
+ * @param value
+ */
+#ifdef DRAM_CSR_WRITE_INLINE
+static inline void dram_csr_write(bdk_node_t node, const char *csr_name, bdk_csr_type_t type, int busnum, int size, uint64_t address, uint64_t value) __attribute__((always_inline));
+static inline void dram_csr_write(bdk_node_t node, const char *csr_name, bdk_csr_type_t type, int busnum, int size, uint64_t address, uint64_t value)
+{
+ VB_PRT(VBL_CSRS, "N%d: DDR Config %s[%016lx] => %016lx\n", node, csr_name, address, value);
+ bdk_csr_write(node, type, busnum, size, address, value);
+}
+#else
+extern void dram_csr_write(bdk_node_t node, const char *csr_name, bdk_csr_type_t type, int busnum, int size, uint64_t address, uint64_t value);
+#endif
+
+/**
+ * Macro to write a CSR, logging if necessary
+ */
+#define DRAM_CSR_WRITE(node, csr, value) \
+ dram_csr_write(node, basename_##csr, bustype_##csr, busnum_##csr, sizeof(typedef_##csr), csr, value)
+
+/**
+ * Macro to make a read, modify, and write sequence easy. The "code_block"
+ * should be replaced with a C code block or a comma separated list of
+ * "name.s.field = value", without the quotes.
+ */
+#define DRAM_CSR_MODIFY(name, node, csr, code_block) do { \
+ typedef_##csr name = {.u = bdk_csr_read(node, bustype_##csr, busnum_##csr, sizeof(typedef_##csr), csr)}; \
+ code_block; \
+ dram_csr_write(node, basename_##csr, bustype_##csr, busnum_##csr, sizeof(typedef_##csr), csr, name.u); \
+ } while (0)
+
diff --git a/src/vendorcode/cavium/bdk/libdram/dram-env.c b/src/vendorcode/cavium/bdk/libdram/dram-env.c
new file mode 100644
index 0000000000..f25e6bdb26
--- /dev/null
+++ b/src/vendorcode/cavium/bdk/libdram/dram-env.c
@@ -0,0 +1,83 @@
+/***********************license start***********************************
+* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights
+* reserved.
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are
+* met:
+*
+* * Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials provided
+* with the distribution.
+*
+* * Neither the name of Cavium Inc. nor the names of
+* its contributors may be used to endorse or promote products
+* derived from this software without specific prior written
+* permission.
+*
+* This Software, including technical data, may be subject to U.S. export
+* control laws, including the U.S. Export Administration Act and its
+* associated regulations, and may be subject to export or import
+* regulations in other countries.
+*
+* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
+* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR
+* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT
+* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY
+* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT
+* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES
+* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR
+* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT,
+* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK
+* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
+***********************license end**************************************/
+#include <bdk.h>
+#include "dram-internal.h"
+
+const char* lookup_env_parameter(const char *format, ...)
+{
+ const char *s;
+ unsigned long value;
+ va_list args;
+ char buffer[64];
+
+ va_start(args, format);
+ vsnprintf(buffer, sizeof(buffer)-1, format, args);
+ buffer[sizeof(buffer)-1] = '\0';
+ va_end(args);
+
+ if ((s = getenv(buffer)) != NULL)
+ {
+ value = strtoul(s, NULL, 0);
+ error_print("Parameter found in environment: %s = \"%s\" 0x%lx (%ld)\n",
+ buffer, s, value, value);
+ }
+ return s;
+}
+
+const char* lookup_env_parameter_ull(const char *format, ...)
+{
+ const char *s;
+ unsigned long long value;
+ va_list args;
+ char buffer[64];
+
+ va_start(args, format);
+ vsnprintf(buffer, sizeof(buffer)-1, format, args);
+ buffer[sizeof(buffer)-1] = '\0';
+ va_end(args);
+
+ if ((s = getenv(buffer)) != NULL)
+ {
+ value = strtoull(s, NULL, 0);
+ error_print("Parameter found in environment: %s = 0x%016llx\n",
+ buffer, value);
+ }
+ return s;
+}
+
diff --git a/src/vendorcode/cavium/bdk/libdram/dram-env.h b/src/vendorcode/cavium/bdk/libdram/dram-env.h
new file mode 100644
index 0000000000..0f100e1b25
--- /dev/null
+++ b/src/vendorcode/cavium/bdk/libdram/dram-env.h
@@ -0,0 +1,48 @@
+/***********************license start***********************************
+* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights
+* reserved.
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are
+* met:
+*
+* * Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials provided
+* with the distribution.
+*
+* * Neither the name of Cavium Inc. nor the names of
+* its contributors may be used to endorse or promote products
+* derived from this software without specific prior written
+* permission.
+*
+* This Software, including technical data, may be subject to U.S. export
+* control laws, including the U.S. Export Administration Act and its
+* associated regulations, and may be subject to export or import
+* regulations in other countries.
+*
+* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
+* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR
+* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT
+* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY
+* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT
+* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES
+* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR
+* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT,
+* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK
+* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
+***********************license end**************************************/
+
+/**
+ * Functions for access the environment for DRAM tweaking.
+ * Intenral use only.
+ */
+
+
+extern const char *lookup_env_parameter(const char *format, ...) __attribute__ ((format(printf, 1, 2)));
+extern const char *lookup_env_parameter_ull(const char *format, ...) __attribute__ ((format(printf, 1, 2)));
+
diff --git a/src/vendorcode/cavium/bdk/libdram/dram-gpio.h b/src/vendorcode/cavium/bdk/libdram/dram-gpio.h
new file mode 100644
index 0000000000..62c9a5c190
--- /dev/null
+++ b/src/vendorcode/cavium/bdk/libdram/dram-gpio.h
@@ -0,0 +1,46 @@
+/***********************license start***********************************
+* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights
+* reserved.
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are
+* met:
+*
+* * Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials provided
+* with the distribution.
+*
+* * Neither the name of Cavium Inc. nor the names of
+* its contributors may be used to endorse or promote products
+* derived from this software without specific prior written
+* permission.
+*
+* This Software, including technical data, may be subject to U.S. export
+* control laws, including the U.S. Export Administration Act and its
+* associated regulations, and may be subject to export or import
+* regulations in other countries.
+*
+* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
+* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR
+* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT
+* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY
+* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT
+* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES
+* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR
+* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT,
+* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK
+* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
+***********************license end**************************************/
+
+/**
+ * Functions for reporting DRAM init status through GPIOs.
+ * Useful for triggering scopes and such. Internal use only.
+ */
+
+extern void pulse_gpio_pin(bdk_node_t node, int pin, int usecs);
+
diff --git a/src/vendorcode/cavium/bdk/libdram/dram-init-ddr3.c b/src/vendorcode/cavium/bdk/libdram/dram-init-ddr3.c
new file mode 100644
index 0000000000..edb42312f1
--- /dev/null
+++ b/src/vendorcode/cavium/bdk/libdram/dram-init-ddr3.c
@@ -0,0 +1,8535 @@
+/***********************license start***********************************
+* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights
+* reserved.
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are
+* met:
+*
+* * Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials provided
+* with the distribution.
+*
+* * Neither the name of Cavium Inc. nor the names of
+* its contributors may be used to endorse or promote products
+* derived from this software without specific prior written
+* permission.
+*
+* This Software, including technical data, may be subject to U.S. export
+* control laws, including the U.S. Export Administration Act and its
+* associated regulations, and may be subject to export or import
+* regulations in other countries.
+*
+* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
+* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR
+* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT
+* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY
+* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT
+* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES
+* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR
+* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT,
+* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK
+* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
+***********************license end**************************************/
+#include <bdk.h>
+#include "libbdk-arch/bdk-csrs-l2c_tad.h"
+#include "libbdk-arch/bdk-csrs-mio_fus.h"
+#include "dram-internal.h"
+
+#define WODT_MASK_2R_1S 1 // FIXME: did not seem to make much difference with #152 1-slot?
+
+#define DESKEW_RODT_CTL 1
+
+// Set to 1 to use the feature whenever possible automatically.
+// When 0, however, the feature is still available, and it can
+// be enabled via envvar override "ddr_enable_write_deskew=1".
+#define ENABLE_WRITE_DESKEW_DEFAULT 0
+
+#define ENABLE_COMPUTED_VREF_ADJUSTMENT 1
+
+#define RLEXTRAS_PATCH 1 // write to unused RL rank entries
+#define WLEXTRAS_PATCH 1 // write to unused WL rank entries
+#define ADD_48_OHM_SKIP 1
+#define NOSKIP_40_48_OHM 1
+#define NOSKIP_48_STACKED 1
+#define NOSKIP_FOR_MINI 1
+#define NOSKIP_FOR_2S_1R 1
+#define MAJORITY_OVER_AVG 1
+#define RANK_MAJORITY MAJORITY_OVER_AVG && 1
+#define SW_WL_CHECK_PATCH 1 // check validity after SW adjust
+#define HW_WL_MAJORITY 1
+#define SWL_TRY_HWL_ALT HW_WL_MAJORITY && 1 // try HW WL base alternate if available when SW WL fails
+#define DISABLE_SW_WL_PASS_2 1
+
+#define HWL_BY_BYTE 0 // FIXME? set to 1 to do HWL a byte at a time (seemed to work better earlier?)
+
+#define USE_ORIG_TEST_DRAM_BYTE 1
+
+// collect and print LMC utilization using SWL software algorithm
+#define ENABLE_SW_WLEVEL_UTILIZATION 0
+
+#define COUNT_RL_CANDIDATES 1
+
+#define LOOK_FOR_STUCK_BYTE 0
+#define ENABLE_STUCK_BYTE_RESET 0
+
+#define FAILSAFE_CHECK 1
+
+#define PERFECT_BITMASK_COUNTING 1
+
+#define DAC_OVERRIDE_EARLY 1
+
+#define SWL_WITH_HW_ALTS_CHOOSE_SW 0 // FIXME: allow override?
+
+#define DEBUG_VALIDATE_BITMASK 0
+#if DEBUG_VALIDATE_BITMASK
+#define debug_bitmask_print ddr_print
+#else
+#define debug_bitmask_print(...)
+#endif
+
+#define ENABLE_SLOT_CTL_ACCESS 0
+#undef ENABLE_CUSTOM_RLEVEL_TABLE
+
+#define ENABLE_DISPLAY_MPR_PAGE 0
+#if ENABLE_DISPLAY_MPR_PAGE
+static void Display_MPR_Page_Location(bdk_node_t node, int rank,
+ int ddr_interface_num, int dimm_count,
+ int page, int location, uint64_t *mpr_data);
+#endif
+
+#define USE_L2_WAYS_LIMIT 1
+
+/* Read out Deskew Settings for DDR */
+
+typedef struct {
+ uint16_t bits[8];
+} deskew_bytes_t;
+typedef struct {
+ deskew_bytes_t bytes[9];
+} deskew_data_t;
+
+static void
+Get_Deskew_Settings(bdk_node_t node, int ddr_interface_num, deskew_data_t *dskdat)
+{
+ bdk_lmcx_phy_ctl_t phy_ctl;
+ bdk_lmcx_config_t lmc_config;
+ int bit_num, bit_index;
+ int byte_lane, byte_limit;
+ // NOTE: these are for pass 2.x
+ int is_t88p2 = !CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X); // added 81xx and 83xx
+ int bit_end = (is_t88p2) ? 9 : 8;
+
+ lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(ddr_interface_num));
+ byte_limit = ((lmc_config.s.mode32b) ? 4 : 8) + lmc_config.s.ecc_ena;
+
+ memset(dskdat, 0, sizeof(*dskdat));
+
+ BDK_CSR_MODIFY(phy_ctl, node, BDK_LMCX_PHY_CTL(ddr_interface_num),
+ phy_ctl.s.dsk_dbg_clk_scaler = 3);
+
+ for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) {
+ bit_index = 0;
+ for (bit_num = 0; bit_num <= bit_end; ++bit_num) { // NOTE: this is for pass 2.x
+
+ if (bit_num == 4) continue;
+ if ((bit_num == 5) && is_t88p2) continue; // NOTE: this is for pass 2.x
+
+ // set byte lane and bit to read
+ BDK_CSR_MODIFY(phy_ctl, node, BDK_LMCX_PHY_CTL(ddr_interface_num),
+ (phy_ctl.s.dsk_dbg_bit_sel = bit_num,
+ phy_ctl.s.dsk_dbg_byte_sel = byte_lane));
+
+ // start read sequence
+ BDK_CSR_MODIFY(phy_ctl, node, BDK_LMCX_PHY_CTL(ddr_interface_num),
+ phy_ctl.s.dsk_dbg_rd_start = 1);
+
+ // poll for read sequence to complete
+ do {
+ phy_ctl.u = BDK_CSR_READ(node, BDK_LMCX_PHY_CTL(ddr_interface_num));
+ } while (phy_ctl.s.dsk_dbg_rd_complete != 1);
+
+ // record the data
+ dskdat->bytes[byte_lane].bits[bit_index] = phy_ctl.s.dsk_dbg_rd_data & 0x3ff;
+ bit_index++;
+
+ } /* for (bit_num = 0; bit_num <= bit_end; ++bit_num) */
+ } /* for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) */
+
+ return;
+}
+
+static void
+Display_Deskew_Data(bdk_node_t node, int ddr_interface_num,
+ deskew_data_t *dskdat, int print_enable)
+{
+ int byte_lane;
+ int bit_num;
+ uint16_t flags, deskew;
+ bdk_lmcx_config_t lmc_config;
+ int byte_limit;
+ const char *fc = " ?-=+*#&";
+
+ lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(ddr_interface_num));
+ byte_limit = ((lmc_config.s.mode32b) ? 4 : 8) + lmc_config.s.ecc_ena;
+
+ if (print_enable) {
+ VB_PRT(print_enable, "N%d.LMC%d: Deskew Data: Bit => :",
+ node, ddr_interface_num);
+ for (bit_num = 7; bit_num >= 0; --bit_num)
+ VB_PRT(print_enable, " %3d ", bit_num);
+ VB_PRT(print_enable, "\n");
+ }
+
+ for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) {
+ if (print_enable)
+ VB_PRT(print_enable, "N%d.LMC%d: Bit Deskew Byte %d %s :",
+ node, ddr_interface_num, byte_lane,
+ (print_enable >= VBL_TME) ? "FINAL" : " ");
+
+ for (bit_num = 7; bit_num >= 0; --bit_num) {
+
+ flags = dskdat->bytes[byte_lane].bits[bit_num] & 7;
+ deskew = dskdat->bytes[byte_lane].bits[bit_num] >> 3;
+
+ if (print_enable)
+ VB_PRT(print_enable, " %3d %c", deskew, fc[flags^1]);
+
+ } /* for (bit_num = 7; bit_num >= 0; --bit_num) */
+
+ if (print_enable)
+ VB_PRT(print_enable, "\n");
+
+ } /* for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) */
+
+ return;
+}
+
+static int
+change_wr_deskew_ena(bdk_node_t node, int ddr_interface_num, int new_state)
+{
+ bdk_lmcx_dll_ctl3_t ddr_dll_ctl3;
+ int saved_wr_deskew_ena;
+
+ // return original WR_DESKEW_ENA setting
+ ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num));
+ saved_wr_deskew_ena = !!GET_DDR_DLL_CTL3(wr_deskew_ena);
+ if (saved_wr_deskew_ena != !!new_state) { // write it only when changing it
+ SET_DDR_DLL_CTL3(wr_deskew_ena, !!new_state);
+ DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num), ddr_dll_ctl3.u);
+ }
+ return saved_wr_deskew_ena;
+}
+
+typedef struct {
+ int saturated; // number saturated
+ int unlocked; // number unlocked
+ int nibrng_errs; // nibble range errors
+ int nibunl_errs; // nibble unlocked errors
+ //int nibsat_errs; // nibble saturation errors
+ int bitval_errs; // bit value errors
+#if LOOK_FOR_STUCK_BYTE
+ int bytes_stuck; // byte(s) stuck
+#endif
+} deskew_counts_t;
+
+#define MIN_BITVAL 17
+#define MAX_BITVAL 110
+
+static deskew_counts_t deskew_training_results;
+static int deskew_validation_delay = 10000; // FIXME: make this a var for overriding
+
+static void
+Validate_Read_Deskew_Training(bdk_node_t node, int rank_mask, int ddr_interface_num,
+ deskew_counts_t *counts, int print_enable)
+{
+ int byte_lane, bit_num, nib_num;
+ int nibrng_errs, nibunl_errs, bitval_errs;
+ //int nibsat_errs;
+ bdk_lmcx_config_t lmc_config;
+ int16_t nib_min[2], nib_max[2], nib_unl[2]/*, nib_sat[2]*/;
+ // NOTE: these are for pass 2.x
+ int is_t88p2 = !CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X); // added 81xx and 83xx
+ int bit_start = (is_t88p2) ? 9 : 8;
+ int byte_limit;
+#if LOOK_FOR_STUCK_BYTE
+ uint64_t bl_mask[2]; // enough for 128 values
+ int bit_values;
+#endif
+ deskew_data_t dskdat;
+ int bit_index;
+ int16_t flags, deskew;
+ const char *fc = " ?-=+*#&";
+ int saved_wr_deskew_ena;
+ int bit_last;
+
+ // save original WR_DESKEW_ENA setting, and disable it for read deskew
+ saved_wr_deskew_ena = change_wr_deskew_ena(node, ddr_interface_num, 0);
+
+ lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(ddr_interface_num));
+ byte_limit = ((!lmc_config.s.mode32b) ? 8 : 4) + lmc_config.s.ecc_ena;
+
+ memset(counts, 0, sizeof(deskew_counts_t));
+
+ Get_Deskew_Settings(node, ddr_interface_num, &dskdat);
+
+ if (print_enable) {
+ VB_PRT(print_enable, "N%d.LMC%d: Deskew Settings: Bit => :",
+ node, ddr_interface_num);
+ for (bit_num = 7; bit_num >= 0; --bit_num)
+ VB_PRT(print_enable, " %3d ", bit_num);
+ VB_PRT(print_enable, "\n");
+ }
+
+ for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) {
+ if (print_enable)
+ VB_PRT(print_enable, "N%d.LMC%d: Bit Deskew Byte %d %s :",
+ node, ddr_interface_num, byte_lane,
+ (print_enable >= VBL_TME) ? "FINAL" : " ");
+
+ nib_min[0] = 127; nib_min[1] = 127;
+ nib_max[0] = 0; nib_max[1] = 0;
+ nib_unl[0] = 0; nib_unl[1] = 0;
+ //nib_sat[0] = 0; nib_sat[1] = 0;
+
+#if LOOK_FOR_STUCK_BYTE
+ bl_mask[0] = bl_mask[1] = 0;
+#endif
+
+ if ((lmc_config.s.mode32b == 1) && (byte_lane == 4)) {
+ bit_index = 3;
+ bit_last = 3;
+ if (print_enable)
+ VB_PRT(print_enable, " ");
+ } else {
+ bit_index = 7;
+ bit_last = bit_start;
+ }
+
+ for (bit_num = bit_last; bit_num >= 0; --bit_num) { // NOTE: this is for pass 2.x
+ if (bit_num == 4) continue;
+ if ((bit_num == 5) && is_t88p2) continue; // NOTE: this is for pass 2.x
+
+ nib_num = (bit_num > 4) ? 1 : 0;
+
+ flags = dskdat.bytes[byte_lane].bits[bit_index] & 7;
+ deskew = dskdat.bytes[byte_lane].bits[bit_index] >> 3;
+ bit_index--;
+
+ counts->saturated += !!(flags & 6);
+ counts->unlocked += !(flags & 1);
+
+ nib_unl[nib_num] += !(flags & 1);
+ //nib_sat[nib_num] += !!(flags & 6);
+
+ if (flags & 1) { // FIXME? only do range when locked
+ nib_min[nib_num] = min(nib_min[nib_num], deskew);
+ nib_max[nib_num] = max(nib_max[nib_num], deskew);
+ }
+
+#if LOOK_FOR_STUCK_BYTE
+ bl_mask[(deskew >> 6) & 1] |= 1UL << (deskew & 0x3f);
+#endif
+
+ if (print_enable)
+ VB_PRT(print_enable, " %3d %c", deskew, fc[flags^1]);
+
+ } /* for (bit_num = bit_last; bit_num >= 0; --bit_num) */
+
+ /*
+ Now look for nibble errors:
+
+ For bit 55, it looks like a bit deskew problem. When the upper nibble of byte 6
+ needs to go to saturation, bit 7 of byte 6 locks prematurely at 64.
+ For DIMMs with raw card A and B, can we reset the deskew training when we encounter this case?
+ The reset criteria should be looking at one nibble at a time for raw card A and B;
+ if the bit-deskew setting within a nibble is different by > 33, we'll issue a reset
+ to the bit deskew training.
+
+ LMC0 Bit Deskew Byte(6): 64 0 - 0 - 0 - 26 61 35 64
+ */
+ // upper nibble range, then lower nibble range
+ nibrng_errs = ((nib_max[1] - nib_min[1]) > 33) ? 1 : 0;
+ nibrng_errs |= ((nib_max[0] - nib_min[0]) > 33) ? 1 : 0;
+
+ // check for nibble all unlocked
+ nibunl_errs = ((nib_unl[0] == 4) || (nib_unl[1] == 4)) ? 1 : 0;
+
+ // check for nibble all saturated
+ //nibsat_errs = ((nib_sat[0] == 4) || (nib_sat[1] == 4)) ? 1 : 0;
+
+ // check for bit value errors, ie < 17 or > 110
+ // FIXME? assume max always > MIN_BITVAL and min < MAX_BITVAL
+ bitval_errs = ((nib_max[1] > MAX_BITVAL) || (nib_max[0] > MAX_BITVAL)) ? 1 : 0;
+ bitval_errs |= ((nib_min[1] < MIN_BITVAL) || (nib_min[0] < MIN_BITVAL)) ? 1 : 0;
+
+ if (((nibrng_errs != 0) || (nibunl_errs != 0) /*|| (nibsat_errs != 0)*/ || (bitval_errs != 0))
+ && print_enable)
+ {
+ VB_PRT(print_enable, " %c%c%c%c",
+ (nibrng_errs)?'R':' ',
+ (nibunl_errs)?'U':' ',
+ (bitval_errs)?'V':' ',
+ /*(nibsat_errs)?'S':*/' ');
+ }
+
+#if LOOK_FOR_STUCK_BYTE
+ bit_values = __builtin_popcountl(bl_mask[0]) + __builtin_popcountl(bl_mask[1]);
+ if (bit_values < 3) {
+ counts->bytes_stuck |= (1 << byte_lane);
+ if (print_enable)
+ VB_PRT(print_enable, "X");
+ }
+#endif
+ if (print_enable)
+ VB_PRT(print_enable, "\n");
+
+ counts->nibrng_errs |= (nibrng_errs << byte_lane);
+ counts->nibunl_errs |= (nibunl_errs << byte_lane);
+ //counts->nibsat_errs |= (nibsat_errs << byte_lane);
+ counts->bitval_errs |= (bitval_errs << byte_lane);
+
+#if LOOK_FOR_STUCK_BYTE
+ // just for completeness, allow print of the stuck values bitmask after the bytelane print
+ if ((bit_values < 3) && print_enable) {
+ VB_PRT(VBL_DEV, "N%d.LMC%d: Deskew byte %d STUCK on value 0x%016lx.%016lx\n",
+ node, ddr_interface_num, byte_lane,
+ bl_mask[1], bl_mask[0]);
+ }
+#endif
+
+ } /* for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) */
+
+ // restore original WR_DESKEW_ENA setting
+ change_wr_deskew_ena(node, ddr_interface_num, saved_wr_deskew_ena);
+
+ return;
+}
+
+unsigned short load_dac_override(int node, int ddr_interface_num,
+ int dac_value, int byte)
+{
+ bdk_lmcx_dll_ctl3_t ddr_dll_ctl3;
+ int bytex = (byte == 0x0A) ? byte : byte + 1; // single bytelanes incr by 1; A is for ALL
+
+ ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num));
+
+ SET_DDR_DLL_CTL3(byte_sel, bytex);
+ SET_DDR_DLL_CTL3(offset, dac_value >> 1); // only 7-bit field, use MS bits
+
+ ddr_dll_ctl3.s.bit_select = 0x9; /* No-op */
+ DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num), ddr_dll_ctl3.u);
+
+ ddr_dll_ctl3.s.bit_select = 0xC; /* Vref bypass setting load */
+ DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num), ddr_dll_ctl3.u);
+
+ ddr_dll_ctl3.s.bit_select = 0xD; /* Vref bypass on. */
+ DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num), ddr_dll_ctl3.u);
+
+ ddr_dll_ctl3.s.bit_select = 0x9; /* No-op */
+ DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num), ddr_dll_ctl3.u);
+
+ return ((unsigned short) GET_DDR_DLL_CTL3(offset));
+}
+
+// arg dac_or_dbi is 1 for DAC, 0 for DBI
+// returns 9 entries (bytelanes 0 through 8) in settings[]
+// returns 0 if OK, -1 if a problem
+int read_DAC_DBI_settings(int node, int ddr_interface_num,
+ int dac_or_dbi, int *settings)
+{
+ bdk_lmcx_phy_ctl_t phy_ctl;
+ int byte_lane, bit_num;
+ int deskew;
+ int dac_value;
+ int is_t88p2 = !CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X); // added 81xx and 83xx
+
+ phy_ctl.u = BDK_CSR_READ(node, BDK_LMCX_PHY_CTL(ddr_interface_num));
+ phy_ctl.s.dsk_dbg_clk_scaler = 3;
+ DRAM_CSR_WRITE(node, BDK_LMCX_PHY_CTL(ddr_interface_num), phy_ctl.u);
+
+ bit_num = (dac_or_dbi) ? 4 : 5;
+ if ((bit_num == 5) && !is_t88p2) { // NOTE: this is for pass 1.x
+ return -1;
+ }
+
+ for (byte_lane = 8; byte_lane >= 0 ; --byte_lane) { // FIXME: always assume ECC is available
+
+ //set byte lane and bit to read
+ phy_ctl.s.dsk_dbg_bit_sel = bit_num;
+ phy_ctl.s.dsk_dbg_byte_sel = byte_lane;
+ DRAM_CSR_WRITE(node, BDK_LMCX_PHY_CTL(ddr_interface_num), phy_ctl.u);
+
+ //start read sequence
+ phy_ctl.u = BDK_CSR_READ(node, BDK_LMCX_PHY_CTL(ddr_interface_num));
+ phy_ctl.s.dsk_dbg_rd_start = 1;
+ DRAM_CSR_WRITE(node, BDK_LMCX_PHY_CTL(ddr_interface_num), phy_ctl.u);
+
+ //poll for read sequence to complete
+ do {
+ phy_ctl.u = BDK_CSR_READ(node, BDK_LMCX_PHY_CTL(ddr_interface_num));
+ } while (phy_ctl.s.dsk_dbg_rd_complete != 1);
+
+ deskew = phy_ctl.s.dsk_dbg_rd_data /*>> 3*/; // leave the flag bits for DBI
+ dac_value = phy_ctl.s.dsk_dbg_rd_data & 0xff;
+
+ settings[byte_lane] = (dac_or_dbi) ? dac_value : deskew;
+
+ } /* for (byte_lane = 8; byte_lane >= 0 ; --byte_lane) { */
+
+ return 0;
+}
+
+// print out the DBI settings array
+// arg dac_or_dbi is 1 for DAC, 0 for DBI
+void
+display_DAC_DBI_settings(int node, int lmc, int dac_or_dbi,
+ int ecc_ena, int *settings, char *title)
+{
+ int byte;
+ int flags;
+ int deskew;
+ const char *fc = " ?-=+*#&";
+
+ ddr_print("N%d.LMC%d: %s %s Deskew Settings %d:0 :",
+ node, lmc, title, (dac_or_dbi)?"DAC":"DBI", 7+ecc_ena);
+ for (byte = (7+ecc_ena); byte >= 0; --byte) { // FIXME: what about 32-bit mode?
+ if (dac_or_dbi) { // DAC
+ flags = 1; // say its locked to get blank
+ deskew = settings[byte] & 0xff;
+ } else { // DBI
+ flags = settings[byte] & 7;
+ deskew = (settings[byte] >> 3) & 0x7f;
+ }
+ ddr_print(" %3d %c", deskew, fc[flags^1]);
+ }
+ ddr_print("\n");
+}
+
+// Evaluate the DAC settings array
+static int
+evaluate_DAC_settings(int ddr_interface_64b, int ecc_ena, int *settings)
+{
+ int byte, dac;
+ int last = (ddr_interface_64b) ? 7 : 3;
+
+ // this looks only for DAC values that are EVEN
+ for (byte = (last+ecc_ena); byte >= 0; --byte) {
+ dac = settings[byte] & 0xff;
+ if ((dac & 1) == 0)
+ return 1;
+ }
+ return 0;
+}
+
+static void
+Perform_Offset_Training(bdk_node_t node, int rank_mask, int ddr_interface_num)
+{
+ bdk_lmcx_phy_ctl_t lmc_phy_ctl;
+ uint64_t orig_phy_ctl;
+ const char *s;
+
+ /*
+ * 6.9.8 LMC Offset Training
+ *
+ * LMC requires input-receiver offset training.
+ *
+ * 1. Write LMC(0)_PHY_CTL[DAC_ON] = 1
+ */
+ lmc_phy_ctl.u = BDK_CSR_READ(node, BDK_LMCX_PHY_CTL(ddr_interface_num));
+ orig_phy_ctl = lmc_phy_ctl.u;
+ lmc_phy_ctl.s.dac_on = 1;
+
+ // allow full CSR override
+ if ((s = lookup_env_parameter_ull("ddr_phy_ctl")) != NULL) {
+ lmc_phy_ctl.u = strtoull(s, NULL, 0);
+ }
+
+ // do not print or write if CSR does not change...
+ if (lmc_phy_ctl.u != orig_phy_ctl) {
+ ddr_print("PHY_CTL : 0x%016lx\n", lmc_phy_ctl.u);
+ DRAM_CSR_WRITE(node, BDK_LMCX_PHY_CTL(ddr_interface_num), lmc_phy_ctl.u);
+ }
+
+#if 0
+ // FIXME? do we really need to show RODT here?
+ bdk_lmcx_comp_ctl2_t lmc_comp_ctl2;
+ lmc_comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num));
+ ddr_print("Read ODT_CTL : 0x%x (%d ohms)\n",
+ lmc_comp_ctl2.s.rodt_ctl, imp_values->rodt_ohms[lmc_comp_ctl2.s.rodt_ctl]);
+#endif
+
+ /*
+ * 2. Write LMC(0)_SEQ_CTL[SEQ_SEL] = 0x0B and
+ * LMC(0)_SEQ_CTL[INIT_START] = 1.
+ *
+ * 3. Wait for LMC(0)_SEQ_CTL[SEQ_COMPLETE] to be set to 1.
+ */
+ perform_octeon3_ddr3_sequence(node, rank_mask, ddr_interface_num, 0x0B); /* Offset training sequence */
+
+}
+
+static void
+Perform_Internal_VREF_Training(bdk_node_t node, int rank_mask, int ddr_interface_num)
+{
+ bdk_lmcx_ext_config_t ext_config;
+
+ /*
+ * 6.9.9 LMC Internal Vref Training
+ *
+ * LMC requires input-reference-voltage training.
+ *
+ * 1. Write LMC(0)_EXT_CONFIG[VREFINT_SEQ_DESKEW] = 0.
+ */
+ ext_config.u = BDK_CSR_READ(node, BDK_LMCX_EXT_CONFIG(ddr_interface_num));
+ ext_config.s.vrefint_seq_deskew = 0;
+
+ VB_PRT(VBL_SEQ, "N%d.LMC%d: Performing LMC sequence: vrefint_seq_deskew = %d\n",
+ node, ddr_interface_num, ext_config.s.vrefint_seq_deskew);
+
+ DRAM_CSR_WRITE(node, BDK_LMCX_EXT_CONFIG(ddr_interface_num), ext_config.u);
+
+ /*
+ * 2. Write LMC(0)_SEQ_CTL[SEQ_SEL] = 0x0a and
+ * LMC(0)_SEQ_CTL[INIT_START] = 1.
+ *
+ * 3. Wait for LMC(0)_SEQ_CTL[SEQ_COMPLETE] to be set to 1.
+ */
+ perform_octeon3_ddr3_sequence(node, rank_mask, ddr_interface_num, 0x0A); /* LMC Internal Vref Training */
+}
+
+#define dbg_avg(format, ...) VB_PRT(VBL_DEV, format, ##__VA_ARGS__)
+static int
+process_samples_average(int16_t *bytes, int num_samples, int lmc, int lane_no)
+{
+ int i, savg, sadj, sum = 0, rng, ret, asum, trunc;
+ int16_t smin = 32767, smax = -32768;
+
+ dbg_avg("DBG_AVG%d.%d: ", lmc, lane_no);
+
+ for (i = 0; i < num_samples; i++) {
+ sum += bytes[i];
+ if (bytes[i] < smin) smin = bytes[i];
+ if (bytes[i] > smax) smax = bytes[i];
+ dbg_avg(" %3d", bytes[i]);
+ }
+ rng = smax - smin + 1;
+
+ dbg_avg(" (%3d, %3d, %2d)", smin, smax, rng);
+
+ asum = sum - smin - smax;
+
+ savg = divide_nint(sum * 10, num_samples);
+
+ sadj = divide_nint(asum * 10, (num_samples - 2));
+
+ trunc = asum / (num_samples - 2);
+
+ dbg_avg(" [%3d.%d, %3d.%d, %3d]", savg/10, savg%10, sadj/10, sadj%10, trunc);
+
+ sadj = divide_nint(sadj, 10);
+ if (trunc & 1)
+ ret = trunc;
+ else if (sadj & 1)
+ ret = sadj;
+ else
+ ret = trunc + 1;
+
+ dbg_avg(" -> %3d\n", ret);
+
+ return ret;
+}
+
+
+#define DEFAULT_SAT_RETRY_LIMIT 11 // 1 + 10 retries
+static int default_lock_retry_limit = 20; // 20 retries // FIXME: make a var for overriding
+
+static int
+Perform_Read_Deskew_Training(bdk_node_t node, int rank_mask, int ddr_interface_num,
+ int spd_rawcard_AorB, int print_flags, int ddr_interface_64b)
+{
+ int unsaturated, locked;
+ //int nibble_sat;
+ int sat_retries, lock_retries, lock_retries_total, lock_retries_limit;
+ int print_first;
+ int print_them_all;
+ deskew_counts_t dsk_counts;
+ uint64_t saved_wr_deskew_ena;
+#if DESKEW_RODT_CTL
+ bdk_lmcx_comp_ctl2_t comp_ctl2;
+ int save_deskew_rodt_ctl = -1;
+#endif
+ int is_t88p2 = !CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X); // added 81xx and 83xx
+
+ VB_PRT(VBL_FAE, "N%d.LMC%d: Performing Read Deskew Training.\n", node, ddr_interface_num);
+
+ // save original WR_DESKEW_ENA setting, and disable it for read deskew
+ saved_wr_deskew_ena = change_wr_deskew_ena(node, ddr_interface_num, 0);
+
+ sat_retries = 0;
+ lock_retries_total = 0;
+ unsaturated = 0;
+ print_first = VBL_FAE; // print the first one, FAE and above
+ print_them_all = dram_is_verbose(VBL_DEV4); // set to true for printing all normal deskew attempts
+
+ int loops, normal_loops = 1; // default to 1 NORMAL deskew training op...
+ const char *s;
+ if ((s = getenv("ddr_deskew_normal_loops")) != NULL) {
+ normal_loops = strtoul(s, NULL, 0);
+ }
+
+#if LOOK_FOR_STUCK_BYTE
+ // provide override for STUCK BYTE RESETS
+ int do_stuck_reset = ENABLE_STUCK_BYTE_RESET;
+ if ((s = getenv("ddr_enable_stuck_byte_reset")) != NULL) {
+ do_stuck_reset = !!strtoul(s, NULL, 0);
+ }
+#endif
+
+#if DESKEW_RODT_CTL
+ if ((s = getenv("ddr_deskew_rodt_ctl")) != NULL) {
+ int deskew_rodt_ctl = strtoul(s, NULL, 0);
+ comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num));
+ save_deskew_rodt_ctl = comp_ctl2.s.rodt_ctl;
+ comp_ctl2.s.rodt_ctl = deskew_rodt_ctl;
+ DRAM_CSR_WRITE(node, BDK_LMCX_COMP_CTL2(ddr_interface_num), comp_ctl2.u);
+ }
+#endif
+
+ lock_retries_limit = default_lock_retry_limit;
+ if (! CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X)) // added 81xx and 83xx
+ lock_retries_limit *= 2; // give pass 2.0 twice as many
+
+ do { /* while (sat_retries < sat_retry_limit) */
+
+ /*
+ * 6.9.10 LMC Deskew Training
+ *
+ * LMC requires input-read-data deskew training.
+ *
+ * 1. Write LMC(0)_EXT_CONFIG[VREFINT_SEQ_DESKEW] = 1.
+ */
+ VB_PRT(VBL_SEQ, "N%d.LMC%d: Performing LMC sequence: Set vrefint_seq_deskew = 1\n",
+ node, ddr_interface_num);
+ DRAM_CSR_MODIFY(ext_config, node, BDK_LMCX_EXT_CONFIG(ddr_interface_num),
+ ext_config.s.vrefint_seq_deskew = 1); /* Set Deskew sequence */
+
+ /*
+ * 2. Write LMC(0)_SEQ_CTL[SEQ_SEL] = 0x0A and
+ * LMC(0)_SEQ_CTL[INIT_START] = 1.
+ *
+ * 3. Wait for LMC(0)_SEQ_CTL[SEQ_COMPLETE] to be set to 1.
+ */
+ DRAM_CSR_MODIFY(phy_ctl, node, BDK_LMCX_PHY_CTL(ddr_interface_num),
+ phy_ctl.s.phy_dsk_reset = 1); /* RESET Deskew sequence */
+ perform_octeon3_ddr3_sequence(node, rank_mask, ddr_interface_num, 0x0A); /* LMC Deskew Training */
+
+ lock_retries = 0;
+
+ perform_read_deskew_training:
+ // maybe perform the NORMAL deskew training sequence multiple times before looking at lock status
+ for (loops = 0; loops < normal_loops; loops++) {
+ DRAM_CSR_MODIFY(phy_ctl, node, BDK_LMCX_PHY_CTL(ddr_interface_num),
+ phy_ctl.s.phy_dsk_reset = 0); /* Normal Deskew sequence */
+ perform_octeon3_ddr3_sequence(node, rank_mask, ddr_interface_num, 0x0A); /* LMC Deskew Training */
+ }
+ // Moved this from Validate_Read_Deskew_Training
+ /* Allow deskew results to stabilize before evaluating them. */
+ bdk_wait_usec(deskew_validation_delay);
+
+ // Now go look at lock and saturation status...
+ Validate_Read_Deskew_Training(node, rank_mask, ddr_interface_num, &dsk_counts, print_first);
+ if (print_first && !print_them_all) // after printing the first and not doing them all, no more
+ print_first = 0;
+
+ unsaturated = (dsk_counts.saturated == 0);
+ locked = (dsk_counts.unlocked == 0);
+ //nibble_sat = (dsk_counts.nibsat_errs != 0);
+
+ // only do locking retries if unsaturated or rawcard A or B, otherwise full SAT retry
+ if (unsaturated || (spd_rawcard_AorB && !is_t88p2 /*&& !nibble_sat*/)) {
+ if (!locked) { // and not locked
+ lock_retries++;
+ lock_retries_total++;
+ if (lock_retries <= lock_retries_limit) {
+ goto perform_read_deskew_training;
+ } else {
+ VB_PRT(VBL_TME, "N%d.LMC%d: LOCK RETRIES failed after %d retries\n",
+ node, ddr_interface_num, lock_retries_limit);
+ }
+ } else {
+ if (lock_retries_total > 0) // only print if we did try
+ VB_PRT(VBL_TME, "N%d.LMC%d: LOCK RETRIES successful after %d retries\n",
+ node, ddr_interface_num, lock_retries);
+ }
+ } /* if (unsaturated || spd_rawcard_AorB) */
+
+ ++sat_retries;
+
+#if LOOK_FOR_STUCK_BYTE
+ // FIXME: this is a bit of a hack at the moment...
+ // We want to force a Deskew RESET hopefully to unstick the bytes values
+ // and then resume normal deskew training as usual.
+ // For now, do only if it is all locked...
+ if (locked && (dsk_counts.bytes_stuck != 0)) {
+ BDK_CSR_INIT(lmc_config, node, BDK_LMCX_CONFIG(ddr_interface_num));
+ if (do_stuck_reset && lmc_config.s.mode_x4dev) { // FIXME: only when x4!!
+ unsaturated = 0; // to always make sure the while continues
+ VB_PRT(VBL_TME, "N%d.LMC%d: STUCK BYTE (0x%x), forcing deskew RESET\n",
+ node, ddr_interface_num, dsk_counts.bytes_stuck);
+ continue; // bypass the rest to get back to the RESET
+ } else {
+ VB_PRT(VBL_TME, "N%d.LMC%d: STUCK BYTE (0x%x), ignoring deskew RESET\n",
+ node, ddr_interface_num, dsk_counts.bytes_stuck);
+ }
+ }
+#endif
+ /*
+ * At this point, check for a DDR4 RDIMM that will not benefit from SAT retries; if so, no retries
+ */
+ if (spd_rawcard_AorB && !is_t88p2 /*&& !nibble_sat*/) {
+ VB_PRT(VBL_TME, "N%d.LMC%d: Read Deskew Training Loop: Exiting for RAWCARD == A or B.\n",
+ node, ddr_interface_num);
+ break; // no sat or lock retries
+ }
+
+ } while (!unsaturated && (sat_retries < DEFAULT_SAT_RETRY_LIMIT));
+
+#if DESKEW_RODT_CTL
+ if (save_deskew_rodt_ctl != -1) {
+ comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num));
+ comp_ctl2.s.rodt_ctl = save_deskew_rodt_ctl;
+ DRAM_CSR_WRITE(node, BDK_LMCX_COMP_CTL2(ddr_interface_num), comp_ctl2.u);
+ }
+#endif
+
+ VB_PRT(VBL_FAE, "N%d.LMC%d: Read Deskew Training %s. %d sat-retries, %d lock-retries\n",
+ node, ddr_interface_num,
+ (sat_retries >= DEFAULT_SAT_RETRY_LIMIT) ? "Timed Out" : "Completed",
+ sat_retries-1, lock_retries_total);
+
+ // restore original WR_DESKEW_ENA setting
+ change_wr_deskew_ena(node, ddr_interface_num, saved_wr_deskew_ena);
+
+ if ((dsk_counts.nibrng_errs != 0) || (dsk_counts.nibunl_errs != 0)) {
+ debug_print("N%d.LMC%d: NIBBLE ERROR(S) found, returning FAULT\n",
+ node, ddr_interface_num);
+ return -1; // we did retry locally, they did not help
+ }
+
+ // NOTE: we (currently) always print one last training validation before starting Read Leveling...
+
+ return 0;
+}
+
+static void
+do_write_deskew_op(bdk_node_t node, int ddr_interface_num,
+ int bit_sel, int byte_sel, int ena)
+{
+ bdk_lmcx_dll_ctl3_t ddr_dll_ctl3;
+
+ ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num));
+ SET_DDR_DLL_CTL3(bit_select, bit_sel);
+ SET_DDR_DLL_CTL3(byte_sel, byte_sel);
+ SET_DDR_DLL_CTL3(wr_deskew_ena, ena);
+ DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num), ddr_dll_ctl3.u);
+
+ ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num));
+}
+
+static void
+set_write_deskew_offset(bdk_node_t node, int ddr_interface_num,
+ int bit_sel, int byte_sel, int offset)
+{
+ bdk_lmcx_dll_ctl3_t ddr_dll_ctl3;
+
+ ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num));
+ SET_DDR_DLL_CTL3(bit_select, bit_sel);
+ SET_DDR_DLL_CTL3(byte_sel, byte_sel);
+ SET_DDR_DLL_CTL3(offset, offset);
+ DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num), ddr_dll_ctl3.u);
+
+ ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num));
+ SET_DDR_DLL_CTL3(wr_deskew_ld, 1);
+ DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num), ddr_dll_ctl3.u);
+
+ ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num));
+}
+
+static void
+Update_Write_Deskew_Settings(bdk_node_t node, int ddr_interface_num, deskew_data_t *dskdat)
+{
+ bdk_lmcx_config_t lmc_config;
+ int bit_num;
+ int byte_lane, byte_limit;
+
+ lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(ddr_interface_num));
+ byte_limit = ((lmc_config.s.mode32b) ? 4 : 8) + lmc_config.s.ecc_ena;
+
+ for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) {
+ for (bit_num = 0; bit_num <= 7; ++bit_num) {
+
+ set_write_deskew_offset(node, ddr_interface_num, bit_num, byte_lane + 1,
+ dskdat->bytes[byte_lane].bits[bit_num]);
+
+ } /* for (bit_num = 0; bit_num <= 7; ++bit_num) */
+ } /* for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) */
+
+ return;
+}
+
+#define ALL_BYTES 0x0A
+#define BS_NOOP 0x09
+#define BS_RESET 0x0F
+#define BS_REUSE 0x0A
+
+// set all entries to the same value (used during training)
+static void
+Set_Write_Deskew_Settings(bdk_node_t node, int ddr_interface_num, int value)
+{
+ bdk_lmcx_dll_ctl3_t ddr_dll_ctl3;
+ int bit_num;
+
+ VB_PRT(VBL_DEV2, "N%d.LMC%d: SetWriteDeskew: WRITE %d\n", node, ddr_interface_num, value);
+
+ for (bit_num = 0; bit_num <= 7; ++bit_num) {
+
+ // write a bit-deskew value to all bit-lanes of all bytes
+ ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num));
+ SET_DDR_DLL_CTL3(bit_select, bit_num);
+ SET_DDR_DLL_CTL3(byte_sel, ALL_BYTES); // FIXME? will this work in 32-bit mode?
+ SET_DDR_DLL_CTL3(offset, value);
+ DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num), ddr_dll_ctl3.u);
+
+ ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num));
+ SET_DDR_DLL_CTL3(wr_deskew_ld, 1);
+ DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num), ddr_dll_ctl3.u);
+
+ } /* for (bit_num = 0; bit_num <= 7; ++bit_num) */
+
+#if 0
+ // FIXME: for debug use only
+ Get_Deskew_Settings(node, ddr_interface_num, &dskdat);
+ Display_Deskew_Data(node, ddr_interface_num, &dskdat, VBL_NORM);
+#endif
+
+ return;
+}
+
+typedef struct {
+ uint8_t count[8];
+ uint8_t start[8];
+ uint8_t best_count[8];
+ uint8_t best_start[8];
+} deskew_bytelane_t;
+typedef struct {
+ deskew_bytelane_t bytes[9];
+} deskew_rank_t;
+
+deskew_rank_t deskew_history[4];
+
+#define DSKVAL_INCR 4
+
+static void
+Neutral_Write_Deskew_Setup(bdk_node_t node, int ddr_interface_num)
+{
+ // first: NO-OP, Select all bytes, Disable write bit-deskew
+ ddr_print("N%d.LMC%d: NEUTRAL Write Deskew Setup: first: NOOP\n", node, ddr_interface_num);
+ do_write_deskew_op(node, ddr_interface_num, BS_NOOP, ALL_BYTES, 0);
+ //Get_Deskew_Settings(node, ddr_interface_num, &dskdat);
+ //Display_Deskew_Data(node, ddr_interface_num, &dskdat, VBL_NORM);
+
+ // enable write bit-deskew and RESET the settings
+ ddr_print("N%d.LMC%d: NEUTRAL Write Deskew Setup: wr_ena: RESET\n", node, ddr_interface_num);
+ do_write_deskew_op(node, ddr_interface_num, BS_RESET, ALL_BYTES, 1);
+ //Get_Deskew_Settings(node, ddr_interface_num, &dskdat);
+ //Display_Deskew_Data(node, ddr_interface_num, &dskdat, VBL_NORM);
+}
+
+static void
+Perform_Write_Deskew_Training(bdk_node_t node, int ddr_interface_num)
+{
+ deskew_data_t dskdat;
+ int byte, bit_num;
+ int dskval, rankx, rank_mask, active_ranks, errors, bit_errs;
+ uint64_t hw_rank_offset;
+ uint64_t bad_bits[2];
+ uint64_t phys_addr;
+ deskew_rank_t *dhp;
+ int num_lmcs = __bdk_dram_get_num_lmc(node);
+
+ BDK_CSR_INIT(lmcx_config, node, BDK_LMCX_CONFIG(ddr_interface_num));
+ rank_mask = lmcx_config.s.init_status; // FIXME: is this right when we run?
+
+ // this should be correct for 1 or 2 ranks, 1 or 2 DIMMs
+ hw_rank_offset = 1ull << (28 + lmcx_config.s.pbank_lsb - lmcx_config.s.rank_ena + (num_lmcs/2));
+
+ VB_PRT(VBL_FAE, "N%d.LMC%d: Performing Write Deskew Training.\n", node, ddr_interface_num);
+
+ // first: NO-OP, Select all bytes, Disable write bit-deskew
+ ddr_print("N%d.LMC%d: WriteDeskewConfig: first: NOOP\n", node, ddr_interface_num);
+ do_write_deskew_op(node, ddr_interface_num, BS_NOOP, ALL_BYTES, 0);
+ //Get_Deskew_Settings(node, ddr_interface_num, &dskdat);
+ //Display_Deskew_Data(node, ddr_interface_num, &dskdat, VBL_NORM);
+
+ // enable write bit-deskew and RESET the settings
+ ddr_print("N%d.LMC%d: WriteDeskewConfig: wr_ena: RESET\n", node, ddr_interface_num);
+ do_write_deskew_op(node, ddr_interface_num, BS_RESET, ALL_BYTES, 1);
+ //Get_Deskew_Settings(node, ddr_interface_num, &dskdat);
+ //Display_Deskew_Data(node, ddr_interface_num, &dskdat, VBL_NORM);
+
+#if 0
+ // enable write bit-deskew and REUSE read bit-deskew settings
+ ddr_print("N%d.LMC%d: WriteDeskewConfig: wr_ena: REUSE\n", node, ddr_interface_num);
+ do_write_deskew_op(node, ddr_interface_num, BS_REUSE, ALL_BYTES, 1);
+ Get_Deskew_Settings(node, ddr_interface_num, &dskdat);
+ Display_Deskew_Data(node, ddr_interface_num, &dskdat, VBL_NORM);
+#endif
+
+#if 1
+ memset(deskew_history, 0, sizeof(deskew_history));
+
+ for (dskval = 0; dskval < 128; dskval += DSKVAL_INCR) {
+
+ Set_Write_Deskew_Settings(node, ddr_interface_num, dskval);
+
+ active_ranks = 0;
+ for (rankx = 0; rankx < 4; rankx++) {
+ if (!(rank_mask & (1 << rankx)))
+ continue;
+ dhp = &deskew_history[rankx];
+ phys_addr = hw_rank_offset * active_ranks;
+ active_ranks++;
+
+ errors = test_dram_byte_hw(node, ddr_interface_num, phys_addr, 0, bad_bits);
+
+ for (byte = 0; byte <= 8; byte++) { // do bytelane(s)
+
+ // check errors
+ if (errors & (1 << byte)) { // yes, error(s) in the byte lane in this rank
+ bit_errs = ((byte == 8) ? bad_bits[1] : bad_bits[0] >> (8 * byte)) & 0xFFULL;
+
+ VB_PRT(VBL_DEV2, "N%d.LMC%d.R%d: Byte %d Value %d: Address 0x%012lx errors 0x%x/0x%x\n",
+ node, ddr_interface_num, rankx, byte,
+ dskval, phys_addr, errors, bit_errs);
+
+ for (bit_num = 0; bit_num <= 7; bit_num++) {
+ if (!(bit_errs & (1 << bit_num)))
+ continue;
+ if (dhp->bytes[byte].count[bit_num] > 0) { // had started run
+ VB_PRT(VBL_DEV2, "N%d.LMC%d.R%d: Byte %d Bit %d Value %d: stopping a run here\n",
+ node, ddr_interface_num, rankx, byte, bit_num, dskval);
+ dhp->bytes[byte].count[bit_num] = 0; // stop now
+ }
+ } /* for (bit_num = 0; bit_num <= 7; bit_num++) */
+
+ // FIXME: else had not started run - nothing else to do?
+ } else { // no error in the byte lane
+ for (bit_num = 0; bit_num <= 7; bit_num++) {
+ if (dhp->bytes[byte].count[bit_num] == 0) { // first success, set run start
+ VB_PRT(VBL_DEV2, "N%d.LMC%d.R%d: Byte %d Bit %d Value %d: starting a run here\n",
+ node, ddr_interface_num, rankx, byte, bit_num, dskval);
+ dhp->bytes[byte].start[bit_num] = dskval;
+ }
+ dhp->bytes[byte].count[bit_num] += DSKVAL_INCR; // bump run length
+
+ // is this now the biggest window?
+ if (dhp->bytes[byte].count[bit_num] > dhp->bytes[byte].best_count[bit_num]) {
+ dhp->bytes[byte].best_count[bit_num] = dhp->bytes[byte].count[bit_num];
+ dhp->bytes[byte].best_start[bit_num] = dhp->bytes[byte].start[bit_num];
+ VB_PRT(VBL_DEV2, "N%d.LMC%d.R%d: Byte %d Bit %d Value %d: updating best to %d/%d\n",
+ node, ddr_interface_num, rankx, byte, bit_num, dskval,
+ dhp->bytes[byte].best_start[bit_num],
+ dhp->bytes[byte].best_count[bit_num]);
+ }
+ } /* for (bit_num = 0; bit_num <= 7; bit_num++) */
+ } /* error in the byte lane */
+ } /* for (byte = 0; byte <= 8; byte++) */
+ } /* for (rankx = 0; rankx < 4; rankx++) */
+ } /* for (dskval = 0; dskval < 128; dskval++) */
+
+
+ for (byte = 0; byte <= 8; byte++) { // do bytelane(s)
+
+ for (bit_num = 0; bit_num <= 7; bit_num++) { // do bits
+ int bit_beg, bit_end;
+
+ bit_beg = 0;
+ bit_end = 128;
+
+ for (rankx = 0; rankx < 4; rankx++) { // merge ranks
+ int rank_beg, rank_end, rank_count;
+ if (!(rank_mask & (1 << rankx)))
+ continue;
+
+ dhp = &deskew_history[rankx];
+ rank_beg = dhp->bytes[byte].best_start[bit_num];
+ rank_count = dhp->bytes[byte].best_count[bit_num];
+
+ if (!rank_count) {
+ VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: Byte %d Bit %d: EMPTY\n",
+ node, ddr_interface_num, rankx, byte, bit_num);
+ continue;
+ }
+
+ bit_beg = max(bit_beg, rank_beg);
+ rank_end = rank_beg + rank_count - DSKVAL_INCR;
+ bit_end = min(bit_end, rank_end);
+
+ } /* for (rankx = 0; rankx < 4; rankx++) */
+
+ dskdat.bytes[byte].bits[bit_num] = (bit_end + bit_beg) / 2;
+
+ } /* for (bit_num = 0; bit_num <= 7; bit_num++) */
+ } /* for (byte = 0; byte <= 8; byte++) */
+
+#endif
+
+ // update the write bit-deskew settings with final settings
+ ddr_print("N%d.LMC%d: WriteDeskewConfig: wr_ena: UPDATE\n", node, ddr_interface_num);
+ Update_Write_Deskew_Settings(node, ddr_interface_num, &dskdat);
+ Get_Deskew_Settings(node, ddr_interface_num, &dskdat);
+ Display_Deskew_Data(node, ddr_interface_num, &dskdat, VBL_NORM);
+
+ // last: NO-OP, Select all bytes, MUST leave write bit-deskew enabled
+ ddr_print("N%d.LMC%d: WriteDeskewConfig: last: wr_ena: NOOP\n", node, ddr_interface_num);
+ do_write_deskew_op(node, ddr_interface_num, BS_NOOP, ALL_BYTES, 1);
+ //Get_Deskew_Settings(node, ddr_interface_num, &dskdat);
+ //Display_Deskew_Data(node, ddr_interface_num, &dskdat, VBL_NORM);
+
+#if 0
+ // FIXME: disable/delete this when write bit-deskew works...
+ // final: NO-OP, Select all bytes, do NOT leave write bit-deskew enabled
+ ddr_print("N%d.LMC%d: WriteDeskewConfig: final: read: NOOP\n", node, ddr_interface_num);
+ do_write_deskew_op(node, ddr_interface_num, BS_NOOP, ALL_BYTES, 0);
+ Get_Deskew_Settings(node, ddr_interface_num, &dskdat);
+ Display_Deskew_Data(node, ddr_interface_num, &dskdat, VBL_NORM);
+#endif
+}
+
+#define SCALING_FACTOR (1000)
+#define Dprintf debug_print // make this "ddr_print" for extra debug output below
+static int compute_Vref_1slot_2rank(int rtt_wr, int rtt_park, int dqx_ctl, int rank_count)
+{
+ uint64_t Reff_s;
+ uint64_t Rser_s = 15;
+ uint64_t Vdd = 1200;
+ uint64_t Vref;
+ //uint64_t Vl;
+ uint64_t rtt_wr_s = (((rtt_wr == 0) || (rtt_wr == 99)) ? 1*1024*1024 : rtt_wr); // 99 == HiZ
+ uint64_t rtt_park_s = (((rtt_park == 0) || ((rank_count == 1) && (rtt_wr != 0))) ? 1*1024*1024 : rtt_park);
+ uint64_t dqx_ctl_s = (dqx_ctl == 0 ? 1*1024*1024 : dqx_ctl);
+ int Vref_value;
+ uint64_t Rangepc = 6000; // range1 base is 60%
+ uint64_t Vrefpc;
+ int Vref_range = 0;
+
+ Dprintf("rtt_wr = %d, rtt_park = %d, dqx_ctl = %d\n", rtt_wr, rtt_park, dqx_ctl);
+ Dprintf("rtt_wr_s = %d, rtt_park_s = %d, dqx_ctl_s = %d\n", rtt_wr_s, rtt_park_s, dqx_ctl_s);
+
+ Reff_s = divide_nint((rtt_wr_s * rtt_park_s) , (rtt_wr_s + rtt_park_s));
+ Dprintf("Reff_s = %d\n", Reff_s);
+
+ //Vl = (((Rser_s + dqx_ctl_s) * SCALING_FACTOR) / (Rser_s + dqx_ctl_s + Reff_s)) * Vdd / SCALING_FACTOR;
+ //printf("Vl = %d\n", Vl);
+
+ Vref = (((Rser_s + dqx_ctl_s) * SCALING_FACTOR) / (Rser_s + dqx_ctl_s + Reff_s)) + SCALING_FACTOR;
+ Dprintf("Vref = %d\n", Vref);
+
+ Vref = (Vref * Vdd) / 2 / SCALING_FACTOR;
+ Dprintf("Vref = %d\n", Vref);
+
+ Vrefpc = (Vref * 100 * 100) / Vdd;
+ Dprintf("Vrefpc = %d\n", Vrefpc);
+
+ if (Vrefpc < Rangepc) { // < range1 base, use range2
+ Vref_range = 1 << 6; // set bit A6 for range2
+ Rangepc = 4500; // range2 base is 45%
+ }
+
+ Vref_value = divide_nint(Vrefpc - Rangepc, 65);
+ if (Vref_value < 0)
+ Vref_value = Vref_range; // set to base of range as lowest value
+ else
+ Vref_value |= Vref_range;
+ Dprintf("Vref_value = %d (0x%02x)\n", Vref_value, Vref_value);
+
+ debug_print("rtt_wr:%d, rtt_park:%d, dqx_ctl:%d, Vref_value:%d (0x%x)\n",
+ rtt_wr, rtt_park, dqx_ctl, Vref_value, Vref_value);
+
+ return Vref_value;
+}
+static int compute_Vref_2slot_2rank(int rtt_wr, int rtt_park_00, int rtt_park_01, int dqx_ctl, int rtt_nom)
+{
+ //uint64_t Rser = 15;
+ uint64_t Vdd = 1200;
+ //uint64_t Vref;
+ uint64_t Vl, Vlp, Vcm;
+ uint64_t Rd0, Rd1, Rpullup;
+ uint64_t rtt_wr_s = (((rtt_wr == 0) || (rtt_wr == 99)) ? 1*1024*1024 : rtt_wr); // 99 == HiZ
+ uint64_t rtt_park_00_s = (rtt_park_00 == 0 ? 1*1024*1024 : rtt_park_00);
+ uint64_t rtt_park_01_s = (rtt_park_01 == 0 ? 1*1024*1024 : rtt_park_01);
+ uint64_t dqx_ctl_s = (dqx_ctl == 0 ? 1*1024*1024 : dqx_ctl);
+ uint64_t rtt_nom_s = (rtt_nom == 0 ? 1*1024*1024 : rtt_nom);
+ int Vref_value;
+ uint64_t Rangepc = 6000; // range1 base is 60%
+ uint64_t Vrefpc;
+ int Vref_range = 0;
+
+ // Rd0 = (RTT_NOM /*parallel*/ RTT_WR) + 15 = ((RTT_NOM * RTT_WR) / (RTT_NOM + RTT_WR)) + 15
+ Rd0 = divide_nint((rtt_nom_s * rtt_wr_s), (rtt_nom_s + rtt_wr_s)) + 15;
+ //printf("Rd0 = %ld\n", Rd0);
+
+ // Rd1 = (RTT_PARK_00 /*parallel*/ RTT_PARK_01) + 15 = ((RTT_PARK_00 * RTT_PARK_01) / (RTT_PARK_00 + RTT_PARK_01)) + 15
+ Rd1 = divide_nint((rtt_park_00_s * rtt_park_01_s), (rtt_park_00_s + rtt_park_01_s)) + 15;
+ //printf("Rd1 = %ld\n", Rd1);
+
+ // Rpullup = Rd0 /*parallel*/ Rd1 = (Rd0 * Rd1) / (Rd0 + Rd1)
+ Rpullup = divide_nint((Rd0 * Rd1), (Rd0 + Rd1));
+ //printf("Rpullup = %ld\n", Rpullup);
+
+ // Vl = (DQX_CTL / (DQX_CTL + Rpullup)) * 1.2
+ Vl = divide_nint((dqx_ctl_s * Vdd), (dqx_ctl_s + Rpullup));
+ //printf("Vl = %ld\n", Vl);
+
+ // Vlp = ((15 / Rd0) * (1.2 - Vl)) + Vl
+ Vlp = divide_nint((15 * (Vdd - Vl)), Rd0) + Vl;
+ //printf("Vlp = %ld\n", Vlp);
+
+ // Vcm = (Vlp + 1.2) / 2
+ Vcm = divide_nint((Vlp + Vdd), 2);
+ //printf("Vcm = %ld\n", Vcm);
+
+ // Vrefpc = (Vcm / 1.2) * 100
+ Vrefpc = divide_nint((Vcm * 100 * 100), Vdd);
+ //printf("Vrefpc = %ld\n", Vrefpc);
+
+ if (Vrefpc < Rangepc) { // < range1 base, use range2
+ Vref_range = 1 << 6; // set bit A6 for range2
+ Rangepc = 4500; // range2 base is 45%
+ }
+
+ Vref_value = divide_nint(Vrefpc - Rangepc, 65);
+ if (Vref_value < 0)
+ Vref_value = Vref_range; // set to base of range as lowest value
+ else
+ Vref_value |= Vref_range;
+ //printf("Vref_value = %d (0x%02x)\n", Vref_value, Vref_value);
+
+ debug_print("rtt_wr:%d, rtt_park_00:%d, rtt_park_01:%d, dqx_ctl:%d, rtt_nom:%d, Vref_value:%d (0x%x)\n",
+ rtt_wr, rtt_park_00, rtt_park_01, dqx_ctl, rtt_nom, Vref_value, Vref_value);
+
+ return Vref_value;
+}
+
+// NOTE: only call this for DIMMs with 1 or 2 ranks, not 4.
+int
+compute_vref_value(bdk_node_t node, int ddr_interface_num,
+ int rankx, int dimm_count, int rank_count,
+ impedence_values_t *imp_values, int is_stacked_die)
+{
+ int computed_final_vref_value = 0;
+
+ /* Calculate an override of the measured Vref value
+ but only for configurations we know how to...*/
+ // we have code for 2-rank DIMMs in both 1-slot or 2-slot configs,
+ // and can use the 2-rank 1-slot code for 1-rank DIMMs in 1-slot configs
+ // and can use the 2-rank 2-slot code for 1-rank DIMMs in 2-slot configs
+
+ int rtt_wr, dqx_ctl, rtt_nom, index;
+ bdk_lmcx_modereg_params1_t lmc_modereg_params1;
+ bdk_lmcx_modereg_params2_t lmc_modereg_params2;
+ bdk_lmcx_comp_ctl2_t comp_ctl2;
+
+ lmc_modereg_params1.u = BDK_CSR_READ(node, BDK_LMCX_MODEREG_PARAMS1(ddr_interface_num));
+ lmc_modereg_params2.u = BDK_CSR_READ(node, BDK_LMCX_MODEREG_PARAMS2(ddr_interface_num));
+ comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num));
+ dqx_ctl = imp_values->dqx_strength[comp_ctl2.s.dqx_ctl];
+
+ // WR always comes from the current rank
+ index = (lmc_modereg_params1.u >> (rankx * 12 + 5)) & 0x03;
+ if (!CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X)) {
+ index |= lmc_modereg_params1.u >> (51+rankx-2) & 0x04;
+ }
+ rtt_wr = imp_values->rtt_wr_ohms [index];
+
+ // separate calculations for 1 vs 2 DIMMs per LMC
+ if (dimm_count == 1) {
+ // PARK comes from this rank if 1-rank, otherwise other rank
+ index = (lmc_modereg_params2.u >> ((rankx ^ (rank_count - 1)) * 10 + 0)) & 0x07;
+ int rtt_park = imp_values->rtt_nom_ohms[index];
+ computed_final_vref_value = compute_Vref_1slot_2rank(rtt_wr, rtt_park, dqx_ctl, rank_count);
+ } else {
+ // get both PARK values from the other DIMM
+ index = (lmc_modereg_params2.u >> ((rankx ^ 0x02) * 10 + 0)) & 0x07;
+ int rtt_park_00 = imp_values->rtt_nom_ohms[index];
+ index = (lmc_modereg_params2.u >> ((rankx ^ 0x03) * 10 + 0)) & 0x07;
+ int rtt_park_01 = imp_values->rtt_nom_ohms[index];
+ // NOM comes from this rank if 1-rank, otherwise other rank
+ index = (lmc_modereg_params1.u >> ((rankx ^ (rank_count - 1)) * 12 + 9)) & 0x07;
+ rtt_nom = imp_values->rtt_nom_ohms[index];
+ computed_final_vref_value = compute_Vref_2slot_2rank(rtt_wr, rtt_park_00, rtt_park_01, dqx_ctl, rtt_nom);
+ }
+
+#if ENABLE_COMPUTED_VREF_ADJUSTMENT
+ {
+ int saved_final_vref_value = computed_final_vref_value;
+ BDK_CSR_INIT(lmc_config, node, BDK_LMCX_CONFIG(ddr_interface_num));
+ /*
+ New computed Vref = existing computed Vref – X
+
+ The value of X is depending on different conditions. Both #122 and #139 are 2Rx4 RDIMM,
+ while #124 is stacked die 2Rx4, so I conclude the results into two conditions:
+
+ 1. Stacked Die: 2Rx4
+ 1-slot: offset = 7. i, e New computed Vref = existing computed Vref – 7
+ 2-slot: offset = 6
+
+ 2. Regular: 2Rx4
+ 1-slot: offset = 3
+ 2-slot: offset = 2
+ */
+ // we know we never get called unless DDR4, so test just the other conditions
+ if((!!__bdk_dram_is_rdimm(node, 0)) &&
+ (rank_count == 2) &&
+ (lmc_config.s.mode_x4dev))
+ { // it must first be RDIMM and 2-rank and x4
+ if (is_stacked_die) { // now do according to stacked die or not...
+ computed_final_vref_value -= (dimm_count == 1) ? 7 : 6;
+ } else {
+ computed_final_vref_value -= (dimm_count == 1) ? 3 : 2;
+ }
+ // we have adjusted it, so print it out if verbosity is right
+ VB_PRT(VBL_TME, "N%d.LMC%d.R%d: adjusting computed vref from %2d (0x%02x) to %2d (0x%02x)\n",
+ node, ddr_interface_num, rankx,
+ saved_final_vref_value, saved_final_vref_value,
+ computed_final_vref_value, computed_final_vref_value);
+ }
+ }
+#endif
+ return computed_final_vref_value;
+}
+
+static unsigned int EXTR_WR(uint64_t u, int x)
+{
+ return (unsigned int)(((u >> (x*12+5)) & 0x3UL) | ((u >> (51+x-2)) & 0x4UL));
+}
+static void INSRT_WR(uint64_t *up, int x, int v)
+{
+ uint64_t u = *up;
+ u &= ~(((0x3UL) << (x*12+5)) | ((0x1UL) << (51+x)));
+ *up = (u | ((v & 0x3UL) << (x*12+5)) | ((v & 0x4UL) << (51+x-2)));
+ return;
+}
+
+static int encode_row_lsb_ddr3(int row_lsb, int ddr_interface_wide)
+{
+ int encoded_row_lsb;
+ int row_lsb_start = 14;
+
+ /* Decoding for row_lsb */
+ /* 000: row_lsb = mem_adr[14] */
+ /* 001: row_lsb = mem_adr[15] */
+ /* 010: row_lsb = mem_adr[16] */
+ /* 011: row_lsb = mem_adr[17] */
+ /* 100: row_lsb = mem_adr[18] */
+ /* 101: row_lsb = mem_adr[19] */
+ /* 110: row_lsb = mem_adr[20] */
+ /* 111: RESERVED */
+
+ row_lsb_start = 14;
+
+ encoded_row_lsb = row_lsb - row_lsb_start ;
+
+ return encoded_row_lsb;
+}
+
+static int encode_pbank_lsb_ddr3(int pbank_lsb, int ddr_interface_wide)
+{
+ int encoded_pbank_lsb;
+
+ /* Decoding for pbank_lsb */
+ /* 0000:DIMM = mem_adr[28] / rank = mem_adr[27] (if RANK_ENA) */
+ /* 0001:DIMM = mem_adr[29] / rank = mem_adr[28] " */
+ /* 0010:DIMM = mem_adr[30] / rank = mem_adr[29] " */
+ /* 0011:DIMM = mem_adr[31] / rank = mem_adr[30] " */
+ /* 0100:DIMM = mem_adr[32] / rank = mem_adr[31] " */
+ /* 0101:DIMM = mem_adr[33] / rank = mem_adr[32] " */
+ /* 0110:DIMM = mem_adr[34] / rank = mem_adr[33] " */
+ /* 0111:DIMM = 0 / rank = mem_adr[34] " */
+ /* 1000-1111: RESERVED */
+
+ int pbank_lsb_start = 0;
+
+ pbank_lsb_start = 28;
+
+ encoded_pbank_lsb = pbank_lsb - pbank_lsb_start;
+
+ return encoded_pbank_lsb;
+}
+
+static uint64_t octeon_read_lmcx_ddr3_rlevel_dbg(bdk_node_t node, int ddr_interface_num, int idx)
+{
+ DRAM_CSR_MODIFY(c, node, BDK_LMCX_RLEVEL_CTL(ddr_interface_num),
+ c.s.byte = idx);
+ BDK_CSR_READ(node, BDK_LMCX_RLEVEL_CTL(ddr_interface_num));
+ BDK_CSR_INIT(rlevel_dbg, node, BDK_LMCX_RLEVEL_DBG(ddr_interface_num));
+ return rlevel_dbg.s.bitmask;
+}
+
+static uint64_t octeon_read_lmcx_ddr3_wlevel_dbg(bdk_node_t node, int ddr_interface_num, int idx)
+{
+ bdk_lmcx_wlevel_dbg_t wlevel_dbg;
+
+ wlevel_dbg.u = 0;
+ wlevel_dbg.s.byte = idx;
+
+ DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_DBG(ddr_interface_num), wlevel_dbg.u);
+ BDK_CSR_READ(node, BDK_LMCX_WLEVEL_DBG(ddr_interface_num));
+
+ wlevel_dbg.u = BDK_CSR_READ(node, BDK_LMCX_WLEVEL_DBG(ddr_interface_num));
+ return wlevel_dbg.s.bitmask;
+}
+
+
+/*
+ * Apply a filter to the BITMASK results returned from Octeon
+ * read-leveling to determine the most likely delay result. This
+ * computed delay may be used to qualify the delay result returned by
+ * Octeon. Accumulate an error penalty for invalid characteristics of
+ * the bitmask so that they can be used to select the most reliable
+ * results.
+ *
+ * The algorithm searches for the largest contiguous MASK within a
+ * maximum RANGE of bits beginning with the MSB.
+ *
+ * 1. a MASK with a WIDTH less than 4 will be penalized
+ * 2. Bubbles in the bitmask that occur before or after the MASK
+ * will be penalized
+ * 3. If there are no trailing bubbles then extra bits that occur
+ * beyond the maximum RANGE will be penalized.
+ *
+ * +++++++++++++++++++++++++++++++++++++++++++++++++++
+ * + +
+ * + e.g. bitmask = 27B00 +
+ * + +
+ * + 63 +--- mstart 0 +
+ * + | | | +
+ * + | +---------+ +--- fb | +
+ * + | | range | | | +
+ * + V V V V V +
+ * + +
+ * + 0 0 ... 1 0 0 1 1 1 1 0 1 1 0 0 0 0 0 0 0 0 +
+ * + +
+ * + ^ ^ ^ +
+ * + | | mask| +
+ * + lb ---+ +-----+ +
+ * + width +
+ * + +
+ * +++++++++++++++++++++++++++++++++++++++++++++++++++
+ */
+#define RLEVEL_BITMASK_TRAILING_BITS_ERROR 5
+#define RLEVEL_BITMASK_BUBBLE_BITS_ERROR 11 // FIXME? now less than TOOLONG
+#define RLEVEL_BITMASK_NARROW_ERROR 6
+#define RLEVEL_BITMASK_BLANK_ERROR 100
+#define RLEVEL_BITMASK_TOOLONG_ERROR 12
+
+#define MASKRANGE_BITS 6
+#define MASKRANGE ((1 << MASKRANGE_BITS) - 1)
+
+static int
+validate_ddr3_rlevel_bitmask(rlevel_bitmask_t *rlevel_bitmask_p, int ddr_type)
+{
+ int i;
+ int errors = 0;
+ uint64_t mask = 0; /* Used in 64-bit comparisons */
+ int8_t mstart = 0;
+ uint8_t width = 0;
+ uint8_t firstbit = 0;
+ uint8_t lastbit = 0;
+ uint8_t bubble = 0;
+ uint8_t tbubble = 0;
+ uint8_t blank = 0;
+ uint8_t narrow = 0;
+ uint8_t trailing = 0;
+ uint64_t bitmask = rlevel_bitmask_p->bm;
+ uint8_t extras = 0;
+ uint8_t toolong = 0;
+ uint64_t temp;
+
+ if (bitmask == 0) {
+ blank += RLEVEL_BITMASK_BLANK_ERROR;
+ } else {
+
+ /* Look for fb, the first bit */
+ temp = bitmask;
+ while (!(temp & 1)) {
+ firstbit++;
+ temp >>= 1;
+ }
+
+ /* Look for lb, the last bit */
+ lastbit = firstbit;
+ while ((temp >>= 1))
+ lastbit++;
+
+ /* Start with the max range to try to find the largest mask within the bitmask data */
+ width = MASKRANGE_BITS;
+ for (mask = MASKRANGE; mask > 0; mask >>= 1, --width) {
+ for (mstart = lastbit - width + 1; mstart >= firstbit; --mstart) {
+ temp = mask << mstart;
+ if ((bitmask & temp) == temp)
+ goto done_now;
+ }
+ }
+ done_now:
+ /* look for any more contiguous 1's to the right of mstart */
+ if (width == MASKRANGE_BITS) { // only when maximum mask
+ while ((bitmask >> (mstart - 1)) & 1) { // slide right over more 1's
+ --mstart;
+ if (ddr_type == DDR4_DRAM) // only for DDR4
+ extras++; // count the number of extra bits
+ }
+ }
+
+ /* Penalize any extra 1's beyond the maximum desired mask */
+ if (extras > 0)
+ toolong = RLEVEL_BITMASK_TOOLONG_ERROR * ((1 << extras) - 1);
+
+ /* Detect if bitmask is too narrow. */
+ if (width < 4)
+ narrow = (4 - width) * RLEVEL_BITMASK_NARROW_ERROR;
+
+ /* detect leading bubble bits, that is, any 0's between first and mstart */
+ temp = bitmask >> (firstbit + 1);
+ i = mstart - firstbit - 1;
+ while (--i >= 0) {
+ if ((temp & 1) == 0)
+ bubble += RLEVEL_BITMASK_BUBBLE_BITS_ERROR;
+ temp >>= 1;
+ }
+
+ temp = bitmask >> (mstart + width + extras);
+ i = lastbit - (mstart + width + extras - 1);
+ while (--i >= 0) {
+ if (temp & 1) { /* Detect 1 bits after the trailing end of the mask, including last. */
+ trailing += RLEVEL_BITMASK_TRAILING_BITS_ERROR;
+ } else { /* Detect trailing bubble bits, that is, any 0's between end-of-mask and last */
+ tbubble += RLEVEL_BITMASK_BUBBLE_BITS_ERROR;
+ }
+ temp >>= 1;
+ }
+ }
+
+ errors = bubble + tbubble + blank + narrow + trailing + toolong;
+
+ /* Pass out useful statistics */
+ rlevel_bitmask_p->mstart = mstart;
+ rlevel_bitmask_p->width = width;
+
+ VB_PRT(VBL_DEV2, "bm:%08lx mask:%02lx, width:%2u, mstart:%2d, fb:%2u, lb:%2u"
+ " (bu:%2d, tb:%2d, bl:%2d, n:%2d, t:%2d, x:%2d) errors:%3d %s\n",
+ (unsigned long) bitmask, mask, width, mstart,
+ firstbit, lastbit, bubble, tbubble, blank, narrow,
+ trailing, toolong, errors, (errors) ? "=> invalid" : "");
+
+ return errors;
+}
+
+static int compute_ddr3_rlevel_delay(uint8_t mstart, uint8_t width, bdk_lmcx_rlevel_ctl_t rlevel_ctl)
+{
+ int delay;
+
+ debug_bitmask_print(" offset_en:%d", rlevel_ctl.cn8.offset_en);
+
+ if (rlevel_ctl.s.offset_en) {
+ delay = max(mstart, mstart + width - 1 - rlevel_ctl.s.offset);
+ } else {
+ /* if (rlevel_ctl.s.offset) { */ /* Experimental */
+ if (0) {
+ delay = max(mstart + rlevel_ctl.s.offset, mstart + 1);
+ /* Insure that the offset delay falls within the bitmask */
+ delay = min(delay, mstart + width-1);
+ } else {
+ delay = (width - 1) / 2 + mstart; /* Round down */
+ /* delay = (width/2) + mstart; */ /* Round up */
+ }
+ }
+
+ return delay;
+}
+
+#define WLEVEL_BYTE_BITS 5
+#define WLEVEL_BYTE_MSK ((1UL << 5) - 1)
+
+static void update_wlevel_rank_struct(bdk_lmcx_wlevel_rankx_t *lmc_wlevel_rank,
+ int byte, int delay)
+{
+ bdk_lmcx_wlevel_rankx_t temp_wlevel_rank;
+ if (byte >= 0 && byte <= 8) {
+ temp_wlevel_rank.u = lmc_wlevel_rank->u;
+ temp_wlevel_rank.u &= ~(WLEVEL_BYTE_MSK << (WLEVEL_BYTE_BITS * byte));
+ temp_wlevel_rank.u |= ((delay & WLEVEL_BYTE_MSK) << (WLEVEL_BYTE_BITS * byte));
+ lmc_wlevel_rank->u = temp_wlevel_rank.u;
+ }
+}
+
+static int get_wlevel_rank_struct(bdk_lmcx_wlevel_rankx_t *lmc_wlevel_rank,
+ int byte)
+{
+ int delay = 0;
+ if (byte >= 0 && byte <= 8) {
+ delay = ((lmc_wlevel_rank->u) >> (WLEVEL_BYTE_BITS * byte)) & WLEVEL_BYTE_MSK;
+ }
+ return delay;
+}
+
+#if 0
+// entry = 1 is valid, entry = 0 is invalid
+static int
+validity_matrix[4][4] = {[0] {1,1,1,0}, // valid pairs when cv == 0: 0,0 + 0,1 + 0,2 == "7"
+ [1] {0,1,1,1}, // valid pairs when cv == 1: 1,1 + 1,2 + 1,3 == "E"
+ [2] {1,0,1,1}, // valid pairs when cv == 2: 2,2 + 2,3 + 2,0 == "D"
+ [3] {1,1,0,1}}; // valid pairs when cv == 3: 3,3 + 3,0 + 3,1 == "B"
+#endif
+static int
+validate_seq(int *wl, int *seq)
+{
+ int seqx; // sequence index, step through the sequence array
+ int bitnum;
+ seqx = 0;
+ while (seq[seqx+1] >= 0) { // stop on next seq entry == -1
+ // but now, check current versus next
+#if 0
+ if ( !validity_matrix [wl[seq[seqx]]] [wl[seq[seqx+1]]] )
+ return 1;
+#else
+ bitnum = (wl[seq[seqx]] << 2) | wl[seq[seqx+1]];
+ if (!((1 << bitnum) & 0xBDE7)) // magic validity number (see matrix above)
+ return 1;
+#endif
+ seqx++;
+ }
+ return 0;
+}
+
+static int
+Validate_HW_WL_Settings(bdk_node_t node, int ddr_interface_num,
+ bdk_lmcx_wlevel_rankx_t *lmc_wlevel_rank,
+ int ecc_ena)
+{
+ int wl[9], byte, errors;
+
+ // arrange the sequences so
+ int useq[] = { 0,1,2,3,8,4,5,6,7,-1 }; // index 0 has byte 0, etc, ECC in middle
+ int rseq1[] = { 8,3,2,1,0,-1 }; // index 0 is ECC, then go down
+ int rseq2[] = { 4,5,6,7,-1 }; // index 0 has byte 4, then go up
+ int useqno[] = { 0,1,2,3,4,5,6,7,-1 }; // index 0 has byte 0, etc, no ECC
+ int rseq1no[] = { 3,2,1,0,-1 }; // index 0 is byte 3, then go down, no ECC
+
+ // in the CSR, bytes 0-7 are always data, byte 8 is ECC
+ for (byte = 0; byte < 8+ecc_ena; byte++) {
+ wl[byte] = (get_wlevel_rank_struct(lmc_wlevel_rank, byte) >> 1) & 3; // preprocess :-)
+ }
+
+ errors = 0;
+ if (__bdk_dram_is_rdimm(node, 0) != 0) { // RDIMM order
+ errors = validate_seq(wl, (ecc_ena) ? rseq1 : rseq1no);
+ errors += validate_seq(wl, rseq2);
+ } else { // UDIMM order
+ errors = validate_seq(wl, (ecc_ena) ? useq : useqno);
+ }
+
+ return errors;
+}
+
+#define RLEVEL_BYTE_BITS 6
+#define RLEVEL_BYTE_MSK ((1UL << 6) - 1)
+
+static void update_rlevel_rank_struct(bdk_lmcx_rlevel_rankx_t *lmc_rlevel_rank,
+ int byte, int delay)
+{
+ bdk_lmcx_rlevel_rankx_t temp_rlevel_rank;
+ if (byte >= 0 && byte <= 8) {
+ temp_rlevel_rank.u = lmc_rlevel_rank->u & ~(RLEVEL_BYTE_MSK << (RLEVEL_BYTE_BITS * byte));
+ temp_rlevel_rank.u |= ((delay & RLEVEL_BYTE_MSK) << (RLEVEL_BYTE_BITS * byte));
+ lmc_rlevel_rank->u = temp_rlevel_rank.u;
+ }
+}
+
+#if RLEXTRAS_PATCH || !DISABLE_SW_WL_PASS_2
+static int get_rlevel_rank_struct(bdk_lmcx_rlevel_rankx_t *lmc_rlevel_rank,
+ int byte)
+{
+ int delay = 0;
+ if (byte >= 0 && byte <= 8) {
+ delay = ((lmc_rlevel_rank->u) >> (RLEVEL_BYTE_BITS * byte)) & RLEVEL_BYTE_MSK;
+ }
+ return delay;
+}
+#endif
+
+static void unpack_rlevel_settings(int ddr_interface_bytemask, int ecc_ena,
+ rlevel_byte_data_t *rlevel_byte,
+ bdk_lmcx_rlevel_rankx_t lmc_rlevel_rank)
+{
+ if ((ddr_interface_bytemask & 0xff) == 0xff) {
+ if (ecc_ena) {
+ rlevel_byte[8].delay = lmc_rlevel_rank.cn83xx.byte7;
+ rlevel_byte[7].delay = lmc_rlevel_rank.cn83xx.byte6;
+ rlevel_byte[6].delay = lmc_rlevel_rank.cn83xx.byte5;
+ rlevel_byte[5].delay = lmc_rlevel_rank.cn83xx.byte4;
+ rlevel_byte[4].delay = lmc_rlevel_rank.cn83xx.byte8; /* ECC */
+ } else {
+ rlevel_byte[7].delay = lmc_rlevel_rank.cn83xx.byte7;
+ rlevel_byte[6].delay = lmc_rlevel_rank.cn83xx.byte6;
+ rlevel_byte[5].delay = lmc_rlevel_rank.cn83xx.byte5;
+ rlevel_byte[4].delay = lmc_rlevel_rank.cn83xx.byte4;
+ }
+ } else {
+ rlevel_byte[8].delay = lmc_rlevel_rank.cn83xx.byte8; /* unused */
+ rlevel_byte[7].delay = lmc_rlevel_rank.cn83xx.byte7; /* unused */
+ rlevel_byte[6].delay = lmc_rlevel_rank.cn83xx.byte6; /* unused */
+ rlevel_byte[5].delay = lmc_rlevel_rank.cn83xx.byte5; /* unused */
+ rlevel_byte[4].delay = lmc_rlevel_rank.cn83xx.byte4; /* ECC */
+ }
+ rlevel_byte[3].delay = lmc_rlevel_rank.cn83xx.byte3;
+ rlevel_byte[2].delay = lmc_rlevel_rank.cn83xx.byte2;
+ rlevel_byte[1].delay = lmc_rlevel_rank.cn83xx.byte1;
+ rlevel_byte[0].delay = lmc_rlevel_rank.cn83xx.byte0;
+}
+
+static void pack_rlevel_settings(int ddr_interface_bytemask, int ecc_ena,
+ rlevel_byte_data_t *rlevel_byte,
+ bdk_lmcx_rlevel_rankx_t *final_rlevel_rank)
+{
+ bdk_lmcx_rlevel_rankx_t lmc_rlevel_rank = *final_rlevel_rank;
+
+ if ((ddr_interface_bytemask & 0xff) == 0xff) {
+ if (ecc_ena) {
+ lmc_rlevel_rank.cn83xx.byte7 = rlevel_byte[8].delay;
+ lmc_rlevel_rank.cn83xx.byte6 = rlevel_byte[7].delay;
+ lmc_rlevel_rank.cn83xx.byte5 = rlevel_byte[6].delay;
+ lmc_rlevel_rank.cn83xx.byte4 = rlevel_byte[5].delay;
+ lmc_rlevel_rank.cn83xx.byte8 = rlevel_byte[4].delay; /* ECC */
+ } else {
+ lmc_rlevel_rank.cn83xx.byte7 = rlevel_byte[7].delay;
+ lmc_rlevel_rank.cn83xx.byte6 = rlevel_byte[6].delay;
+ lmc_rlevel_rank.cn83xx.byte5 = rlevel_byte[5].delay;
+ lmc_rlevel_rank.cn83xx.byte4 = rlevel_byte[4].delay;
+ }
+ } else {
+ lmc_rlevel_rank.cn83xx.byte8 = rlevel_byte[8].delay;
+ lmc_rlevel_rank.cn83xx.byte7 = rlevel_byte[7].delay;
+ lmc_rlevel_rank.cn83xx.byte6 = rlevel_byte[6].delay;
+ lmc_rlevel_rank.cn83xx.byte5 = rlevel_byte[5].delay;
+ lmc_rlevel_rank.cn83xx.byte4 = rlevel_byte[4].delay;
+ }
+ lmc_rlevel_rank.cn83xx.byte3 = rlevel_byte[3].delay;
+ lmc_rlevel_rank.cn83xx.byte2 = rlevel_byte[2].delay;
+ lmc_rlevel_rank.cn83xx.byte1 = rlevel_byte[1].delay;
+ lmc_rlevel_rank.cn83xx.byte0 = rlevel_byte[0].delay;
+
+ *final_rlevel_rank = lmc_rlevel_rank;
+}
+
+#if !DISABLE_SW_WL_PASS_2
+static void rlevel_to_wlevel(bdk_lmcx_rlevel_rankx_t *lmc_rlevel_rank,
+ bdk_lmcx_wlevel_rankx_t *lmc_wlevel_rank, int byte)
+{
+ int byte_delay = get_rlevel_rank_struct(lmc_rlevel_rank, byte);
+
+ debug_print("Estimating Wlevel delay byte %d: ", byte);
+ debug_print("Rlevel=%d => ", byte_delay);
+ byte_delay = divide_roundup(byte_delay,2) & 0x1e;
+ debug_print("Wlevel=%d\n", byte_delay);
+ update_wlevel_rank_struct(lmc_wlevel_rank, byte, byte_delay);
+}
+#endif /* !DISABLE_SW_WL_PASS_2 */
+
+/* Delay trend: constant=0, decreasing=-1, increasing=1 */
+static int calc_delay_trend(int v)
+{
+ if (v == 0)
+ return (0);
+ if (v < 0)
+ return (-1);
+ return 1;
+}
+
+/* Evaluate delay sequence across the whole range of byte delays while
+** keeping track of the overall delay trend, increasing or decreasing.
+** If the trend changes charge an error amount to the score.
+*/
+
+// NOTE: "max_adj_delay_inc" argument is, by default, 1 for DDR3 and 2 for DDR4
+
+static int nonsequential_delays(rlevel_byte_data_t *rlevel_byte,
+ int start, int end, int max_adj_delay_inc)
+{
+ int error = 0;
+ int delay_trend, prev_trend = 0;
+ int byte_idx;
+ int delay_inc;
+ int delay_diff;
+ int byte_err;
+
+ for (byte_idx = start; byte_idx < end; ++byte_idx) {
+ byte_err = 0;
+
+ delay_diff = rlevel_byte[byte_idx+1].delay - rlevel_byte[byte_idx].delay;
+ delay_trend = calc_delay_trend(delay_diff);
+
+ debug_bitmask_print("Byte %d: %2d, Byte %d: %2d, delay_trend: %2d, prev_trend: %2d",
+ byte_idx+0, rlevel_byte[byte_idx+0].delay,
+ byte_idx+1, rlevel_byte[byte_idx+1].delay,
+ delay_trend, prev_trend);
+
+ /* Increment error each time the trend changes to the opposite direction.
+ */
+ if ((prev_trend != 0) && (delay_trend != 0) && (prev_trend != delay_trend)) {
+ byte_err += RLEVEL_NONSEQUENTIAL_DELAY_ERROR;
+ prev_trend = delay_trend;
+ debug_bitmask_print(" => Nonsequential byte delay");
+ }
+
+ delay_inc = _abs(delay_diff); // how big was the delay change, if any
+
+ /* Even if the trend did not change to the opposite direction, check for
+ the magnitude of the change, and scale the penalty by the amount that
+ the size is larger than the provided limit.
+ */
+ if ((max_adj_delay_inc != 0) && (delay_inc > max_adj_delay_inc)) {
+ byte_err += (delay_inc - max_adj_delay_inc) * RLEVEL_ADJACENT_DELAY_ERROR;
+ debug_bitmask_print(" => Adjacent delay error");
+ }
+
+ debug_bitmask_print("\n");
+ if (delay_trend != 0)
+ prev_trend = delay_trend;
+
+ rlevel_byte[byte_idx+1].sqerrs = byte_err;
+ error += byte_err;
+ }
+ return error;
+}
+
+static int roundup_ddr3_wlevel_bitmask(int bitmask)
+{
+ int shifted_bitmask;
+ int leader;
+ int delay;
+
+ for (leader=0; leader<8; ++leader) {
+ shifted_bitmask = (bitmask>>leader);
+ if ((shifted_bitmask&1) == 0)
+ break;
+ }
+
+ for (/*leader=leader*/; leader<16; ++leader) {
+ shifted_bitmask = (bitmask>>(leader%8));
+ if (shifted_bitmask&1)
+ break;
+ }
+
+ delay = (leader & 1) ? leader + 1 : leader;
+ delay = delay % 8;
+
+ return delay;
+}
+
+/* Check to see if any custom offset values are provided */
+static int is_dll_offset_provided(const int8_t *dll_offset_table)
+{
+ int i;
+ if (dll_offset_table != NULL) {
+ for (i=0; i<9; ++i) {
+ if (dll_offset_table[i] != 0)
+ return (1);
+ }
+ }
+ return (0);
+}
+
+/////////////////// These are the RLEVEL settings display routines
+
+// flags
+#define WITH_NOTHING 0
+#define WITH_SCORE 1
+#define WITH_AVERAGE 2
+#define WITH_FINAL 4
+#define WITH_COMPUTE 8
+static void do_display_RL(bdk_node_t node, int ddr_interface_num,
+ bdk_lmcx_rlevel_rankx_t lmc_rlevel_rank,
+ int rank, int flags, int score)
+{
+ char score_buf[16];
+ if (flags & WITH_SCORE)
+ snprintf(score_buf, sizeof(score_buf), "(%d)", score);
+ else {
+ score_buf[0] = ' '; score_buf[1] = 0;
+ }
+
+ char *msg_buf;
+ char hex_buf[20];
+ if (flags & WITH_AVERAGE) {
+ msg_buf = " DELAY AVERAGES ";
+ } else if (flags & WITH_FINAL) {
+ msg_buf = " FINAL SETTINGS ";
+ } else if (flags & WITH_COMPUTE) {
+ msg_buf = " COMPUTED DELAYS ";
+ } else {
+ snprintf(hex_buf, sizeof(hex_buf), "0x%016lX", lmc_rlevel_rank.u);
+ msg_buf = hex_buf;
+ }
+
+ ddr_print("N%d.LMC%d.R%d: Rlevel Rank %#4x, %s : %5d %5d %5d %5d %5d %5d %5d %5d %5d %s\n",
+ node, ddr_interface_num, rank,
+ lmc_rlevel_rank.s.status,
+ msg_buf,
+ lmc_rlevel_rank.cn83xx.byte8,
+ lmc_rlevel_rank.cn83xx.byte7,
+ lmc_rlevel_rank.cn83xx.byte6,
+ lmc_rlevel_rank.cn83xx.byte5,
+ lmc_rlevel_rank.cn83xx.byte4,
+ lmc_rlevel_rank.cn83xx.byte3,
+ lmc_rlevel_rank.cn83xx.byte2,
+ lmc_rlevel_rank.cn83xx.byte1,
+ lmc_rlevel_rank.cn83xx.byte0,
+ score_buf
+ );
+}
+
+static inline void
+display_RL(bdk_node_t node, int ddr_interface_num, bdk_lmcx_rlevel_rankx_t lmc_rlevel_rank, int rank)
+{
+ do_display_RL(node, ddr_interface_num, lmc_rlevel_rank, rank, 0, 0);
+}
+
+static inline void
+display_RL_with_score(bdk_node_t node, int ddr_interface_num, bdk_lmcx_rlevel_rankx_t lmc_rlevel_rank, int rank, int score)
+{
+ do_display_RL(node, ddr_interface_num, lmc_rlevel_rank, rank, 1, score);
+}
+
+#if !PICK_BEST_RANK_SCORE_NOT_AVG
+static inline void
+display_RL_with_average(bdk_node_t node, int ddr_interface_num, bdk_lmcx_rlevel_rankx_t lmc_rlevel_rank, int rank, int score)
+{
+ do_display_RL(node, ddr_interface_num, lmc_rlevel_rank, rank, 3, score);
+}
+#endif
+
+static inline void
+display_RL_with_final(bdk_node_t node, int ddr_interface_num, bdk_lmcx_rlevel_rankx_t lmc_rlevel_rank, int rank)
+{
+ do_display_RL(node, ddr_interface_num, lmc_rlevel_rank, rank, 4, 0);
+}
+
+static inline void
+display_RL_with_computed(bdk_node_t node, int ddr_interface_num, bdk_lmcx_rlevel_rankx_t lmc_rlevel_rank, int rank, int score)
+{
+ do_display_RL(node, ddr_interface_num, lmc_rlevel_rank, rank, 9, score);
+}
+
+// flag values
+#define WITH_RODT_BLANK 0
+#define WITH_RODT_SKIPPING 1
+#define WITH_RODT_BESTROW 2
+#define WITH_RODT_BESTSCORE 3
+// control
+#define SKIP_SKIPPING 1
+
+static const char *with_rodt_canned_msgs[4] = { " ", "SKIPPING ", "BEST ROW ", "BEST SCORE" };
+
+static void display_RL_with_RODT(bdk_node_t node, int ddr_interface_num,
+ bdk_lmcx_rlevel_rankx_t lmc_rlevel_rank, int rank, int score,
+ int nom_ohms, int rodt_ohms, int flag)
+{
+ const char *msg_buf;
+ char set_buf[20];
+#if SKIP_SKIPPING
+ if (flag == WITH_RODT_SKIPPING) return;
+#endif
+ msg_buf = with_rodt_canned_msgs[flag];
+ if (nom_ohms < 0) {
+ snprintf(set_buf, sizeof(set_buf), " RODT %3d ", rodt_ohms);
+ } else {
+ snprintf(set_buf, sizeof(set_buf), "NOM %3d RODT %3d", nom_ohms, rodt_ohms);
+ }
+
+ VB_PRT(VBL_TME, "N%d.LMC%d.R%d: Rlevel %s %s : %5d %5d %5d %5d %5d %5d %5d %5d %5d (%d)\n",
+ node, ddr_interface_num, rank,
+ set_buf, msg_buf,
+ lmc_rlevel_rank.cn83xx.byte8,
+ lmc_rlevel_rank.cn83xx.byte7,
+ lmc_rlevel_rank.cn83xx.byte6,
+ lmc_rlevel_rank.cn83xx.byte5,
+ lmc_rlevel_rank.cn83xx.byte4,
+ lmc_rlevel_rank.cn83xx.byte3,
+ lmc_rlevel_rank.cn83xx.byte2,
+ lmc_rlevel_rank.cn83xx.byte1,
+ lmc_rlevel_rank.cn83xx.byte0,
+ score
+ );
+
+ // FIXME: does this help make the output a little easier to focus?
+ if (flag == WITH_RODT_BESTSCORE) {
+ VB_PRT(VBL_DEV, "-----------\n");
+ }
+}
+
+static void
+do_display_WL(bdk_node_t node, int ddr_interface_num, bdk_lmcx_wlevel_rankx_t lmc_wlevel_rank, int rank, int flags)
+{
+ char *msg_buf;
+ char hex_buf[20];
+ int vbl;
+ if (flags & WITH_FINAL) {
+ msg_buf = " FINAL SETTINGS ";
+ vbl = VBL_NORM;
+ } else {
+ snprintf(hex_buf, sizeof(hex_buf), "0x%016lX", lmc_wlevel_rank.u);
+ msg_buf = hex_buf;
+ vbl = VBL_FAE;
+ }
+
+ VB_PRT(vbl, "N%d.LMC%d.R%d: Wlevel Rank %#4x, %s : %5d %5d %5d %5d %5d %5d %5d %5d %5d\n",
+ node, ddr_interface_num, rank,
+ lmc_wlevel_rank.s.status,
+ msg_buf,
+ lmc_wlevel_rank.s.byte8,
+ lmc_wlevel_rank.s.byte7,
+ lmc_wlevel_rank.s.byte6,
+ lmc_wlevel_rank.s.byte5,
+ lmc_wlevel_rank.s.byte4,
+ lmc_wlevel_rank.s.byte3,
+ lmc_wlevel_rank.s.byte2,
+ lmc_wlevel_rank.s.byte1,
+ lmc_wlevel_rank.s.byte0
+ );
+}
+
+static inline void
+display_WL(bdk_node_t node, int ddr_interface_num, bdk_lmcx_wlevel_rankx_t lmc_wlevel_rank, int rank)
+{
+ do_display_WL(node, ddr_interface_num, lmc_wlevel_rank, rank, WITH_NOTHING);
+}
+
+static inline void
+display_WL_with_final(bdk_node_t node, int ddr_interface_num, bdk_lmcx_wlevel_rankx_t lmc_wlevel_rank, int rank)
+{
+ do_display_WL(node, ddr_interface_num, lmc_wlevel_rank, rank, WITH_FINAL);
+}
+
+// pretty-print bitmask adjuster
+static uint64_t
+PPBM(uint64_t bm)
+{
+ if (bm != 0ul) {
+ while ((bm & 0x0fful) == 0ul)
+ bm >>= 4;
+ }
+ return bm;
+}
+
+// xlate PACKED index to UNPACKED index to use with rlevel_byte
+#define XPU(i,e) (((i) < 4)?(i):((i)<8)?(i)+(e):4)
+// xlate UNPACKED index to PACKED index to use with rlevel_bitmask
+#define XUP(i,e) (((i) < 4)?(i):((i)>4)?(i)-(e):8)
+
+// flag values
+#define WITH_WL_BITMASKS 0
+#define WITH_RL_BITMASKS 1
+#define WITH_RL_MASK_SCORES 2
+#define WITH_RL_SEQ_SCORES 3
+static void
+do_display_BM(bdk_node_t node, int ddr_interface_num, int rank, void *bm, int flags, int ecc_ena)
+{
+ int ecc = !!ecc_ena;
+ if (flags == WITH_WL_BITMASKS) { // wlevel_bitmask array in PACKED index order, so just print them
+ int *bitmasks = (int *)bm;
+
+ ddr_print("N%d.LMC%d.R%d: Wlevel Debug Results : %05x %05x %05x %05x %05x %05x %05x %05x %05x\n",
+ node, ddr_interface_num, rank,
+ bitmasks[8],
+ bitmasks[7],
+ bitmasks[6],
+ bitmasks[5],
+ bitmasks[4],
+ bitmasks[3],
+ bitmasks[2],
+ bitmasks[1],
+ bitmasks[0]
+ );
+ } else
+ if (flags == WITH_RL_BITMASKS) { // rlevel_bitmask array in PACKED index order, so just print them
+ rlevel_bitmask_t *rlevel_bitmask = (rlevel_bitmask_t *)bm;
+ ddr_print("N%d.LMC%d.R%d: Rlevel Debug Bitmasks 8:0 : %05lx %05lx %05lx %05lx %05lx %05lx %05lx %05lx %05lx\n",
+ node, ddr_interface_num, rank,
+ PPBM(rlevel_bitmask[8].bm),
+ PPBM(rlevel_bitmask[7].bm),
+ PPBM(rlevel_bitmask[6].bm),
+ PPBM(rlevel_bitmask[5].bm),
+ PPBM(rlevel_bitmask[4].bm),
+ PPBM(rlevel_bitmask[3].bm),
+ PPBM(rlevel_bitmask[2].bm),
+ PPBM(rlevel_bitmask[1].bm),
+ PPBM(rlevel_bitmask[0].bm)
+ );
+ } else
+ if (flags == WITH_RL_MASK_SCORES) { // rlevel_bitmask array in PACKED index order, so just print them
+ rlevel_bitmask_t *rlevel_bitmask = (rlevel_bitmask_t *)bm;
+ ddr_print("N%d.LMC%d.R%d: Rlevel Debug Bitmask Scores 8:0 : %5d %5d %5d %5d %5d %5d %5d %5d %5d\n",
+ node, ddr_interface_num, rank,
+ rlevel_bitmask[8].errs,
+ rlevel_bitmask[7].errs,
+ rlevel_bitmask[6].errs,
+ rlevel_bitmask[5].errs,
+ rlevel_bitmask[4].errs,
+ rlevel_bitmask[3].errs,
+ rlevel_bitmask[2].errs,
+ rlevel_bitmask[1].errs,
+ rlevel_bitmask[0].errs
+ );
+ } else
+ if (flags == WITH_RL_SEQ_SCORES) { // rlevel_byte array in UNPACKED index order, so xlate and print them
+ rlevel_byte_data_t *rlevel_byte = (rlevel_byte_data_t *)bm;
+ ddr_print("N%d.LMC%d.R%d: Rlevel Debug Non-seq Scores 8:0 : %5d %5d %5d %5d %5d %5d %5d %5d %5d\n",
+ node, ddr_interface_num, rank,
+ rlevel_byte[XPU(8,ecc)].sqerrs,
+ rlevel_byte[XPU(7,ecc)].sqerrs,
+ rlevel_byte[XPU(6,ecc)].sqerrs,
+ rlevel_byte[XPU(5,ecc)].sqerrs,
+ rlevel_byte[XPU(4,ecc)].sqerrs,
+ rlevel_byte[XPU(3,ecc)].sqerrs,
+ rlevel_byte[XPU(2,ecc)].sqerrs,
+ rlevel_byte[XPU(1,ecc)].sqerrs,
+ rlevel_byte[XPU(0,ecc)].sqerrs
+ );
+ }
+}
+
+static inline void
+display_WL_BM(bdk_node_t node, int ddr_interface_num, int rank, int *bitmasks)
+{
+ do_display_BM(node, ddr_interface_num, rank, (void *)bitmasks, WITH_WL_BITMASKS, 0);
+}
+
+static inline void
+display_RL_BM(bdk_node_t node, int ddr_interface_num, int rank, rlevel_bitmask_t *bitmasks, int ecc_ena)
+{
+ do_display_BM(node, ddr_interface_num, rank, (void *)bitmasks, WITH_RL_BITMASKS, ecc_ena);
+}
+
+static inline void
+display_RL_BM_scores(bdk_node_t node, int ddr_interface_num, int rank, rlevel_bitmask_t *bitmasks, int ecc_ena)
+{
+ do_display_BM(node, ddr_interface_num, rank, (void *)bitmasks, WITH_RL_MASK_SCORES, ecc_ena);
+}
+
+static inline void
+display_RL_SEQ_scores(bdk_node_t node, int ddr_interface_num, int rank, rlevel_byte_data_t *bytes, int ecc_ena)
+{
+ do_display_BM(node, ddr_interface_num, rank, (void *)bytes, WITH_RL_SEQ_SCORES, ecc_ena);
+}
+
+unsigned short load_dll_offset(bdk_node_t node, int ddr_interface_num,
+ int dll_offset_mode, int byte_offset, int byte)
+{
+ bdk_lmcx_dll_ctl3_t ddr_dll_ctl3;
+ /* byte_sel:
+ 0x1 = byte 0, ..., 0x9 = byte 8
+ 0xA = all bytes */
+ int byte_sel = (byte == 10) ? byte : byte + 1;
+
+ ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num));
+ SET_DDR_DLL_CTL3(load_offset, 0);
+ DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num), ddr_dll_ctl3.u);
+ ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num));
+
+ SET_DDR_DLL_CTL3(mode_sel, dll_offset_mode);
+ SET_DDR_DLL_CTL3(offset, (_abs(byte_offset)&0x3f) | (_sign(byte_offset) << 6)); /* Always 6-bit field? */
+ SET_DDR_DLL_CTL3(byte_sel, byte_sel);
+ DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num), ddr_dll_ctl3.u);
+ ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num));
+
+ SET_DDR_DLL_CTL3(load_offset, 1);
+ DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num), ddr_dll_ctl3.u);
+ ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num));
+
+ return ((unsigned short) GET_DDR_DLL_CTL3(offset));
+}
+
+void change_dll_offset_enable(bdk_node_t node, int ddr_interface_num, int change)
+{
+ bdk_lmcx_dll_ctl3_t ddr_dll_ctl3;
+
+ ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num));
+ SET_DDR_DLL_CTL3(offset_ena, !!change);
+ DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num), ddr_dll_ctl3.u);
+ ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num));
+}
+
+static void process_custom_dll_offsets(bdk_node_t node, int ddr_interface_num, const char *enable_str,
+ const int8_t *offsets, const char *byte_str, int mode)
+{
+ const char *s;
+ int enabled;
+ int provided;
+
+ if ((s = lookup_env_parameter("%s", enable_str)) != NULL) {
+ enabled = !!strtol(s, NULL, 0);
+ } else
+ enabled = -1;
+
+ // enabled == -1: no override, do only configured offsets if provided
+ // enabled == 0: override OFF, do NOT do it even if configured offsets provided
+ // enabled == 1: override ON, do it for overrides plus configured offsets
+
+ if (enabled == 0)
+ return;
+
+ provided = is_dll_offset_provided(offsets);
+
+ if (enabled < 0 && !provided)
+ return;
+
+ int byte_offset;
+ unsigned short offset[9] = {0};
+ int byte;
+
+ // offsets need to be disabled while loading
+ change_dll_offset_enable(node, ddr_interface_num, 0);
+
+ for (byte = 0; byte < 9; ++byte) {
+
+ // always take the provided, if available
+ byte_offset = (provided) ? offsets[byte] : 0;
+
+ // then, if enabled, use any overrides present
+ if (enabled > 0) {
+ if ((s = lookup_env_parameter(byte_str, ddr_interface_num, byte)) != NULL) {
+ byte_offset = strtol(s, NULL, 0);
+ }
+ }
+
+ offset[byte] = load_dll_offset(node, ddr_interface_num, mode, byte_offset, byte);
+ }
+
+ // re-enable offsets after loading
+ change_dll_offset_enable(node, ddr_interface_num, 1);
+
+ ddr_print("N%d.LMC%d: DLL %s Offset 8:0 :"
+ " 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x\n",
+ node, ddr_interface_num, (mode == 2) ? "Read " : "Write",
+ offset[8], offset[7], offset[6], offset[5], offset[4],
+ offset[3], offset[2], offset[1], offset[0]);
+}
+
+void perform_octeon3_ddr3_sequence(bdk_node_t node, int rank_mask, int ddr_interface_num, int sequence)
+{
+ /*
+ * 3. Without changing any other fields in LMC(0)_CONFIG, write
+ * LMC(0)_CONFIG[RANKMASK] then write both
+ * LMC(0)_SEQ_CTL[SEQ_SEL,INIT_START] = 1 with a single CSR write
+ * operation. LMC(0)_CONFIG[RANKMASK] bits should be set to indicate
+ * the ranks that will participate in the sequence.
+ *
+ * The LMC(0)_SEQ_CTL[SEQ_SEL] value should select power-up/init or
+ * selfrefresh exit, depending on whether the DRAM parts are in
+ * self-refresh and whether their contents should be preserved. While
+ * LMC performs these sequences, it will not perform any other DDR3
+ * transactions. When the sequence is complete, hardware sets the
+ * LMC(0)_CONFIG[INIT_STATUS] bits for the ranks that have been
+ * initialized.
+ *
+ * If power-up/init is selected immediately following a DRESET
+ * assertion, LMC executes the sequence described in the "Reset and
+ * Initialization Procedure" section of the JEDEC DDR3
+ * specification. This includes activating CKE, writing all four DDR3
+ * mode registers on all selected ranks, and issuing the required ZQCL
+ * command. The LMC(0)_CONFIG[RANKMASK] value should select all ranks
+ * with attached DRAM in this case. If LMC(0)_CONTROL[RDIMM_ENA] = 1,
+ * LMC writes the JEDEC standard SSTE32882 control words selected by
+ * LMC(0)_DIMM_CTL[DIMM*_WMASK] between DDR_CKE* signal assertion and
+ * the first DDR3 mode register write operation.
+ * LMC(0)_DIMM_CTL[DIMM*_WMASK] should be cleared to 0 if the
+ * corresponding DIMM is not present.
+ *
+ * If self-refresh exit is selected, LMC executes the required SRX
+ * command followed by a refresh and ZQ calibration. Section 4.5
+ * describes behavior of a REF + ZQCS. LMC does not write the DDR3
+ * mode registers as part of this sequence, and the mode register
+ * parameters must match at self-refresh entry and exit times.
+ *
+ * 4. Read LMC(0)_SEQ_CTL and wait for LMC(0)_SEQ_CTL[SEQ_COMPLETE] to be
+ * set.
+ *
+ * 5. Read LMC(0)_CONFIG[INIT_STATUS] and confirm that all ranks have
+ * been initialized.
+ */
+
+ const char *s;
+ static const char *sequence_str[] = {
+ "Power-up/init",
+ "Read-leveling",
+ "Self-refresh entry",
+ "Self-refresh exit",
+ "Illegal",
+ "Illegal",
+ "Write-leveling",
+ "Init Register Control Words",
+ "Mode Register Write",
+ "MPR Register Access",
+ "LMC Deskew/Internal Vref training",
+ "Offset Training"
+ };
+
+ bdk_lmcx_seq_ctl_t seq_ctl;
+ bdk_lmcx_config_t lmc_config;
+
+ lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(ddr_interface_num));
+ lmc_config.s.rankmask = rank_mask;
+ DRAM_CSR_WRITE(node, BDK_LMCX_CONFIG(ddr_interface_num), lmc_config.u);
+
+ seq_ctl.u = 0;
+
+ seq_ctl.s.init_start = 1;
+ seq_ctl.s.seq_sel = sequence;
+
+ VB_PRT(VBL_SEQ, "N%d.LMC%d: Performing LMC sequence=%x: rank_mask=0x%02x, %s\n",
+ node, ddr_interface_num, sequence, rank_mask, sequence_str[sequence]);
+
+ if ((s = lookup_env_parameter("ddr_trigger_sequence%d", sequence)) != NULL) {
+ int trigger = strtoul(s, NULL, 0);
+ if (trigger)
+ pulse_gpio_pin(node, 1, 2);
+ }
+
+ DRAM_CSR_WRITE(node, BDK_LMCX_SEQ_CTL(ddr_interface_num), seq_ctl.u);
+ BDK_CSR_READ(node, BDK_LMCX_SEQ_CTL(ddr_interface_num));
+
+ /* Wait 100us minimum before checking for sequence complete */
+ bdk_wait_usec(100);
+ if (!bdk_is_platform(BDK_PLATFORM_ASIM) &&
+ BDK_CSR_WAIT_FOR_FIELD(node, BDK_LMCX_SEQ_CTL(ddr_interface_num), seq_complete, ==, 1, 1000000))
+ {
+ error_print("N%d.LMC%d: Timeout waiting for LMC sequence=%x, rank_mask=0x%02x, ignoring...\n",
+ node, ddr_interface_num, sequence, rank_mask);
+ }
+ else {
+ VB_PRT(VBL_SEQ, "N%d.LMC%d: LMC sequence=%x: Completed.\n", node, ddr_interface_num, sequence);
+ }
+}
+
+void ddr4_mrw(bdk_node_t node, int ddr_interface_num, int rank,
+ int mr_wr_addr, int mr_wr_sel, int mr_wr_bg1)
+{
+ bdk_lmcx_mr_mpr_ctl_t lmc_mr_mpr_ctl;
+
+ lmc_mr_mpr_ctl.u = 0;
+ lmc_mr_mpr_ctl.s.mr_wr_addr = (mr_wr_addr == -1) ? 0 : mr_wr_addr;
+ lmc_mr_mpr_ctl.s.mr_wr_sel = mr_wr_sel;
+ lmc_mr_mpr_ctl.s.mr_wr_rank = rank;
+ //lmc_mr_mpr_ctl.s.mr_wr_pda_mask =
+ //lmc_mr_mpr_ctl.s.mr_wr_pda_enable =
+ //lmc_mr_mpr_ctl.s.mpr_loc =
+ //lmc_mr_mpr_ctl.s.mpr_wr =
+ //lmc_mr_mpr_ctl.s.mpr_bit_select =
+ //lmc_mr_mpr_ctl.s.mpr_byte_select =
+ //lmc_mr_mpr_ctl.s.mpr_whole_byte_enable =
+ lmc_mr_mpr_ctl.s.mr_wr_use_default_value = (mr_wr_addr == -1) ? 1 : 0;
+ lmc_mr_mpr_ctl.s.mr_wr_bg1 = mr_wr_bg1;
+ DRAM_CSR_WRITE(node, BDK_LMCX_MR_MPR_CTL(ddr_interface_num), lmc_mr_mpr_ctl.u);
+
+ /* Mode Register Write */
+ perform_octeon3_ddr3_sequence(node, 1 << rank, ddr_interface_num, 0x8);
+}
+
+#define InvA0_17(x) (x ^ 0x22bf8)
+static void set_mpr_mode (bdk_node_t node, int rank_mask,
+ int ddr_interface_num, int dimm_count, int mpr, int bg1)
+{
+ int rankx;
+
+ ddr_print("All Ranks: Set mpr mode = %x %c-side\n",
+ mpr, (bg1==0) ? 'A' : 'B');
+
+ for (rankx = 0; rankx < dimm_count*4; rankx++) {
+ if (!(rank_mask & (1 << rankx)))
+ continue;
+ if (bg1 == 0)
+ ddr4_mrw(node, ddr_interface_num, rankx, mpr<<2, 3, bg1); /* MR3 A-side */
+ else
+ ddr4_mrw(node, ddr_interface_num, rankx, InvA0_17(mpr<<2), ~3, bg1); /* MR3 B-side */
+ }
+}
+
+#if ENABLE_DISPLAY_MPR_PAGE
+static void do_ddr4_mpr_read(bdk_node_t node, int ddr_interface_num, int rank,
+ int page, int location)
+{
+ bdk_lmcx_mr_mpr_ctl_t lmc_mr_mpr_ctl;
+
+ lmc_mr_mpr_ctl.u = BDK_CSR_READ(node, BDK_LMCX_MR_MPR_CTL(ddr_interface_num));
+
+ lmc_mr_mpr_ctl.s.mr_wr_addr = 0;
+ lmc_mr_mpr_ctl.s.mr_wr_sel = page; /* Page */
+ lmc_mr_mpr_ctl.s.mr_wr_rank = rank;
+ //lmc_mr_mpr_ctl.s.mr_wr_pda_mask =
+ //lmc_mr_mpr_ctl.s.mr_wr_pda_enable =
+ lmc_mr_mpr_ctl.s.mpr_loc = location;
+ lmc_mr_mpr_ctl.s.mpr_wr = 0; /* Read=0, Write=1 */
+ //lmc_mr_mpr_ctl.s.mpr_bit_select =
+ //lmc_mr_mpr_ctl.s.mpr_byte_select =
+ //lmc_mr_mpr_ctl.s.mpr_whole_byte_enable =
+ //lmc_mr_mpr_ctl.s.mr_wr_use_default_value =
+ //lmc_mr_mpr_ctl.s.mr_wr_bg1 =
+
+ DRAM_CSR_WRITE(node, BDK_LMCX_MR_MPR_CTL(ddr_interface_num), lmc_mr_mpr_ctl.u);
+
+ /* MPR register access sequence */
+ perform_octeon3_ddr3_sequence(node, 1 << rank, ddr_interface_num, 0x9);
+
+ debug_print("LMC_MR_MPR_CTL : 0x%016lx\n", lmc_mr_mpr_ctl.u);
+ debug_print("lmc_mr_mpr_ctl.s.mr_wr_addr: 0x%02x\n", lmc_mr_mpr_ctl.s.mr_wr_addr);
+ debug_print("lmc_mr_mpr_ctl.s.mr_wr_sel : 0x%02x\n", lmc_mr_mpr_ctl.s.mr_wr_sel);
+ debug_print("lmc_mr_mpr_ctl.s.mpr_loc : 0x%02x\n", lmc_mr_mpr_ctl.s.mpr_loc);
+ debug_print("lmc_mr_mpr_ctl.s.mpr_wr : 0x%02x\n", lmc_mr_mpr_ctl.s.mpr_wr);
+
+}
+#endif
+
+int set_rdimm_mode(bdk_node_t node, int ddr_interface_num, int enable)
+{
+ bdk_lmcx_control_t lmc_control;
+ int save_rdimm_mode;
+
+ lmc_control.u = BDK_CSR_READ(node, BDK_LMCX_CONTROL(ddr_interface_num));
+ save_rdimm_mode = lmc_control.s.rdimm_ena;
+ lmc_control.s.rdimm_ena = enable;
+ VB_PRT(VBL_FAE, "Setting RDIMM_ENA = %x\n", enable);
+ DRAM_CSR_WRITE(node, BDK_LMCX_CONTROL(ddr_interface_num), lmc_control.u);
+
+ return (save_rdimm_mode);
+}
+
+#if ENABLE_DISPLAY_MPR_PAGE
+static void ddr4_mpr_read(bdk_node_t node, int ddr_interface_num, int rank,
+ int page, int location, uint64_t *mpr_data)
+{
+ do_ddr4_mpr_read(node, ddr_interface_num, rank, page, location);
+
+ mpr_data[0] = BDK_CSR_READ(node, BDK_LMCX_MPR_DATA0(ddr_interface_num));
+ mpr_data[1] = BDK_CSR_READ(node, BDK_LMCX_MPR_DATA1(ddr_interface_num));
+ mpr_data[2] = BDK_CSR_READ(node, BDK_LMCX_MPR_DATA2(ddr_interface_num));
+
+ debug_print("MPR Read %016lx.%016lx.%016lx\n", mpr_data[2], mpr_data[1], mpr_data[0]);
+}
+
+/* Display MPR values for Page Location */
+static void Display_MPR_Page_Location(bdk_node_t node, int rank,
+ int ddr_interface_num, int dimm_count,
+ int page, int location, uint64_t *mpr_data)
+{
+ ddr4_mpr_read(node, ddr_interface_num, rank, page, location, mpr_data);
+ ddr_print("MPR Page %d, Loc %d %016lx.%016lx.%016lx\n",
+ page, location, mpr_data[2], mpr_data[1], mpr_data[0]);
+}
+
+/* Display MPR values for Page */
+static void Display_MPR_Page(bdk_node_t node, int rank_mask,
+ int ddr_interface_num, int dimm_count, int page)
+{
+ int rankx;
+ uint64_t mpr_data[3];
+
+ for (rankx = 0; rankx < dimm_count * 4;rankx++) {
+ if (!(rank_mask & (1 << rankx)))
+ continue;
+
+ ddr_print("Rank %d: MPR values for Page %d\n", rankx, page);
+ for (int location = 0; location < 4; location++) {
+ Display_MPR_Page_Location(node, rankx, ddr_interface_num, dimm_count,
+ page, location, &mpr_data[0]);
+ }
+
+ } /* for (rankx = 0; rankx < dimm_count * 4; rankx++) */
+}
+#endif
+
+void ddr4_mpr_write(bdk_node_t node, int ddr_interface_num, int rank,
+ int page, int location, uint8_t mpr_data)
+{
+ bdk_lmcx_mr_mpr_ctl_t lmc_mr_mpr_ctl;
+
+ lmc_mr_mpr_ctl.u = 0;
+ lmc_mr_mpr_ctl.s.mr_wr_addr = mpr_data;
+ lmc_mr_mpr_ctl.s.mr_wr_sel = page; /* Page */
+ lmc_mr_mpr_ctl.s.mr_wr_rank = rank;
+ //lmc_mr_mpr_ctl.s.mr_wr_pda_mask =
+ //lmc_mr_mpr_ctl.s.mr_wr_pda_enable =
+ lmc_mr_mpr_ctl.s.mpr_loc = location;
+ lmc_mr_mpr_ctl.s.mpr_wr = 1; /* Read=0, Write=1 */
+ //lmc_mr_mpr_ctl.s.mpr_bit_select =
+ //lmc_mr_mpr_ctl.s.mpr_byte_select =
+ //lmc_mr_mpr_ctl.s.mpr_whole_byte_enable =
+ //lmc_mr_mpr_ctl.s.mr_wr_use_default_value =
+ //lmc_mr_mpr_ctl.s.mr_wr_bg1 =
+ DRAM_CSR_WRITE(node, BDK_LMCX_MR_MPR_CTL(ddr_interface_num), lmc_mr_mpr_ctl.u);
+
+ /* MPR register access sequence */
+ perform_octeon3_ddr3_sequence(node, (1 << rank), ddr_interface_num, 0x9);
+
+ debug_print("LMC_MR_MPR_CTL : 0x%016lx\n", lmc_mr_mpr_ctl.u);
+ debug_print("lmc_mr_mpr_ctl.s.mr_wr_addr: 0x%02x\n", lmc_mr_mpr_ctl.s.mr_wr_addr);
+ debug_print("lmc_mr_mpr_ctl.s.mr_wr_sel : 0x%02x\n", lmc_mr_mpr_ctl.s.mr_wr_sel);
+ debug_print("lmc_mr_mpr_ctl.s.mpr_loc : 0x%02x\n", lmc_mr_mpr_ctl.s.mpr_loc);
+ debug_print("lmc_mr_mpr_ctl.s.mpr_wr : 0x%02x\n", lmc_mr_mpr_ctl.s.mpr_wr);
+}
+
+void set_vref(bdk_node_t node, int ddr_interface_num, int rank,
+ int range, int value)
+{
+ bdk_lmcx_mr_mpr_ctl_t lmc_mr_mpr_ctl;
+ bdk_lmcx_modereg_params3_t lmc_modereg_params3;
+ int mr_wr_addr = 0;
+
+ lmc_mr_mpr_ctl.u = 0;
+ lmc_modereg_params3.u = BDK_CSR_READ(node, BDK_LMCX_MODEREG_PARAMS3(ddr_interface_num));
+
+ mr_wr_addr |= lmc_modereg_params3.s.tccd_l<<10; /* A12:A10 tCCD_L */
+ mr_wr_addr |= 1<<7; /* A7 1 = Enable(Training Mode) */
+ mr_wr_addr |= range<<6; /* A6 VrefDQ Training Range */
+ mr_wr_addr |= value<<0; /* A5:A0 VrefDQ Training Value */
+
+ lmc_mr_mpr_ctl.s.mr_wr_addr = mr_wr_addr;
+ lmc_mr_mpr_ctl.s.mr_wr_sel = 6; /* Write MR6 */
+ lmc_mr_mpr_ctl.s.mr_wr_rank = rank;
+ //lmc_mr_mpr_ctl.s.mr_wr_pda_mask =
+ //lmc_mr_mpr_ctl.s.mr_wr_pda_enable =
+ //lmc_mr_mpr_ctl.s.mpr_loc = location;
+ //lmc_mr_mpr_ctl.s.mpr_wr = 0; /* Read=0, Write=1 */
+ //lmc_mr_mpr_ctl.s.mpr_bit_select =
+ //lmc_mr_mpr_ctl.s.mpr_byte_select =
+ //lmc_mr_mpr_ctl.s.mpr_whole_byte_enable =
+ //lmc_mr_mpr_ctl.s.mr_wr_use_default_value =
+ //lmc_mr_mpr_ctl.s.mr_wr_bg1 =
+ DRAM_CSR_WRITE(node, BDK_LMCX_MR_MPR_CTL(ddr_interface_num), lmc_mr_mpr_ctl.u);
+
+ /* 0x8 = Mode Register Write */
+ perform_octeon3_ddr3_sequence(node, 1<<rank, ddr_interface_num, 0x8);
+
+ /* It is vendor specific whether Vref_value is captured with A7=1.
+ A subsequent MRS might be necessary. */
+ perform_octeon3_ddr3_sequence(node, 1<<rank, ddr_interface_num, 0x8);
+
+ mr_wr_addr &= ~(1<<7); /* A7 0 = Disable(Training Mode) */
+ lmc_mr_mpr_ctl.s.mr_wr_addr = mr_wr_addr;
+ DRAM_CSR_WRITE(node, BDK_LMCX_MR_MPR_CTL(ddr_interface_num), lmc_mr_mpr_ctl.u);
+}
+
+static void set_DRAM_output_inversion (bdk_node_t node,
+ int ddr_interface_num,
+ int dimm_count,
+ int rank_mask,
+ int inversion)
+{
+ bdk_lmcx_ddr4_dimm_ctl_t lmc_ddr4_dimm_ctl;
+ bdk_lmcx_dimmx_params_t lmc_dimmx_params;
+ bdk_lmcx_dimm_ctl_t lmc_dimm_ctl;
+ int dimm_no;
+
+ lmc_ddr4_dimm_ctl.u = 0; /* Don't touch extended register control words */
+ DRAM_CSR_WRITE(node, BDK_LMCX_DDR4_DIMM_CTL(ddr_interface_num), lmc_ddr4_dimm_ctl.u);
+
+ ddr_print("All DIMMs: Register Control Word RC0 : %x\n", (inversion & 1));
+
+ for (dimm_no = 0; dimm_no < dimm_count; ++dimm_no) {
+ lmc_dimmx_params.u = BDK_CSR_READ(node, BDK_LMCX_DIMMX_PARAMS(ddr_interface_num, dimm_no));
+ lmc_dimmx_params.s.rc0 = (lmc_dimmx_params.s.rc0 & ~1) | (inversion & 1);
+ DRAM_CSR_WRITE(node, BDK_LMCX_DIMMX_PARAMS(ddr_interface_num, dimm_no), lmc_dimmx_params.u);
+ }
+
+ /* LMC0_DIMM_CTL */
+ lmc_dimm_ctl.u = BDK_CSR_READ(node, BDK_LMCX_DIMM_CTL(ddr_interface_num));
+ lmc_dimm_ctl.s.dimm0_wmask = 0x1;
+ lmc_dimm_ctl.s.dimm1_wmask = (dimm_count > 1) ? 0x0001 : 0x0000;
+
+ ddr_print("LMC DIMM_CTL : 0x%016lx\n",
+ lmc_dimm_ctl.u);
+ DRAM_CSR_WRITE(node, BDK_LMCX_DIMM_CTL(ddr_interface_num), lmc_dimm_ctl.u);
+
+ perform_octeon3_ddr3_sequence(node, rank_mask, ddr_interface_num, 0x7 ); /* Init RCW */
+}
+
+static void write_mpr_page0_pattern (bdk_node_t node, int rank_mask,
+ int ddr_interface_num, int dimm_count, int pattern, int location_mask)
+{
+ int rankx;
+ int location;
+
+ for (rankx = 0; rankx < dimm_count*4; rankx++) {
+ if (!(rank_mask & (1 << rankx)))
+ continue;
+ for (location = 0; location < 4; ++location) {
+ if (!(location_mask & (1 << location)))
+ continue;
+
+ ddr4_mpr_write(node, ddr_interface_num, rankx,
+ /* page */ 0, /* location */ location, pattern);
+ }
+ }
+}
+
+static void change_rdimm_mpr_pattern (bdk_node_t node, int rank_mask,
+ int ddr_interface_num, int dimm_count)
+{
+ int save_ref_zqcs_int;
+ bdk_lmcx_config_t lmc_config;
+
+ /*
+ Okay, here is the latest sequence. This should work for all
+ chips and passes (78,88,73,etc). This sequence should be run
+ immediately after DRAM INIT. The basic idea is to write the
+ same pattern into each of the 4 MPR locations in the DRAM, so
+ that the same value is returned when doing MPR reads regardless
+ of the inversion state. My advice is to put this into a
+ function, change_rdimm_mpr_pattern or something like that, so
+ that it can be called multiple times, as I think David wants a
+ clock-like pattern for OFFSET training, but does not want a
+ clock pattern for Bit-Deskew. You should then be able to call
+ this at any point in the init sequence (after DRAM init) to
+ change the pattern to a new value.
+ Mike
+
+ A correction: PHY doesn't need any pattern during offset
+ training, but needs clock like pattern for internal vref and
+ bit-dskew training. So for that reason, these steps below have
+ to be conducted before those trainings to pre-condition
+ the pattern. David
+
+ Note: Step 3, 4, 8 and 9 have to be done through RDIMM
+ sequence. If you issue MRW sequence to do RCW write (in o78 pass
+ 1 at least), LMC will still do two commands because
+ CONTROL[RDIMM_ENA] is still set high. We don't want it to have
+ any unintentional mode register write so it's best to do what
+ Mike is doing here.
+ Andrew
+ */
+
+
+ /* 1) Disable refresh (REF_ZQCS_INT = 0) */
+
+ debug_print("1) Disable refresh (REF_ZQCS_INT = 0)\n");
+
+ lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(ddr_interface_num));
+ save_ref_zqcs_int = lmc_config.s.ref_zqcs_int;
+ lmc_config.s.ref_zqcs_int = 0;
+ DRAM_CSR_WRITE(node, BDK_LMCX_CONFIG(ddr_interface_num), lmc_config.u);
+
+
+ /* 2) Put all devices in MPR mode (Run MRW sequence (sequence=8)
+ with MODEREG_PARAMS0[MPRLOC]=0,
+ MODEREG_PARAMS0[MPR]=1, MR_MPR_CTL[MR_WR_SEL]=3, and
+ MR_MPR_CTL[MR_WR_USE_DEFAULT_VALUE]=1) */
+
+ debug_print("2) Put all devices in MPR mode (Run MRW sequence (sequence=8)\n");
+
+ set_mpr_mode(node, rank_mask, ddr_interface_num, dimm_count, /* mpr */ 1, /* bg1 */ 0); /* A-side */
+ set_mpr_mode(node, rank_mask, ddr_interface_num, dimm_count, /* mpr */ 1, /* bg1 */ 1); /* B-side */
+
+ /* a. Or you can set MR_MPR_CTL[MR_WR_USE_DEFAULT_VALUE]=0 and set
+ the value you would like directly into
+ MR_MPR_CTL[MR_WR_ADDR] */
+
+ /* 3) Disable RCD Parity (if previously enabled) - parity does not
+ work if inversion disabled */
+
+ debug_print("3) Disable RCD Parity\n");
+
+ /* 4) Disable Inversion in the RCD. */
+ /* a. I did (3&4) via the RDIMM sequence (seq_sel=7), but it
+ may be easier to use the MRW sequence (seq_sel=8). Just set
+ MR_MPR_CTL[MR_WR_SEL]=7, MR_MPR_CTL[MR_WR_ADDR][3:0]=data,
+ MR_MPR_CTL[MR_WR_ADDR][7:4]=RCD reg */
+
+ debug_print("4) Disable Inversion in the RCD.\n");
+
+ set_DRAM_output_inversion(node, ddr_interface_num, dimm_count, rank_mask,
+ 1 /* 1=disable output inversion*/);
+
+ /* 5) Disable CONTROL[RDIMM_ENA] so that MR sequence goes out
+ non-inverted. */
+
+ debug_print("5) Disable CONTROL[RDIMM_ENA]\n");
+
+ set_rdimm_mode(node, ddr_interface_num, 0);
+
+ /* 6) Write all 4 MPR registers with the desired pattern (have to
+ do this for all enabled ranks) */
+ /* a. MR_MPR_CTL.MPR_WR=1, MR_MPR_CTL.MPR_LOC=0..3,
+ MR_MPR_CTL.MR_WR_SEL=0, MR_MPR_CTL.MR_WR_ADDR[7:0]=pattern */
+
+ debug_print("6) Write all 4 MPR page 0 Training Patterns\n");
+
+ write_mpr_page0_pattern(node, rank_mask,
+ ddr_interface_num, dimm_count, 0x55, 0x8);
+
+ /* 7) Re-enable RDIMM_ENA */
+
+ debug_print("7) Re-enable RDIMM_ENA\n");
+
+ set_rdimm_mode(node, ddr_interface_num, 1);
+
+ /* 8) Re-enable RDIMM inversion */
+
+ debug_print("8) Re-enable RDIMM inversion\n");
+
+ set_DRAM_output_inversion(node, ddr_interface_num, dimm_count, rank_mask,
+ 0 /* 0=re-enable output inversion*/);
+
+ /* 9) Re-enable RDIMM parity (if desired) */
+
+ debug_print("9) Re-enable RDIMM parity (if desired)\n");
+
+ /* 10)Take B-side devices out of MPR mode (Run MRW sequence
+ (sequence=8) with MODEREG_PARAMS0[MPRLOC]=0,
+ MODEREG_PARAMS0[MPR]=0, MR_MPR_CTL[MR_WR_SEL]=3, and
+ MR_MPR_CTL[MR_WR_USE_DEFAULT_VALUE]=1) */
+
+ debug_print("10)Take B-side devices out of MPR mode\n");
+
+ set_mpr_mode(node, rank_mask, ddr_interface_num, dimm_count, /* mpr */ 0, /* bg1 */ 1);
+
+ /* a. Or you can set MR_MPR_CTL[MR_WR_USE_DEFAULT_VALUE]=0 and
+ set the value you would like directly into
+ MR_MPR_CTL[MR_WR_ADDR] */
+
+ /* 11)Re-enable refresh (REF_ZQCS_INT=previous value) */
+
+ debug_print("11)Re-enable refresh (REF_ZQCS_INT=previous value)\n");
+
+ lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(ddr_interface_num));
+ lmc_config.s.ref_zqcs_int = save_ref_zqcs_int;
+ DRAM_CSR_WRITE(node, BDK_LMCX_CONFIG(ddr_interface_num), lmc_config.u);
+
+}
+
+static unsigned char ddr4_rodt_ohms [RODT_OHMS_COUNT ] = { 0, 40, 60, 80, 120, 240, 34, 48 };
+static unsigned char ddr4_rtt_nom_ohms [RTT_NOM_OHMS_COUNT ] = { 0, 60, 120, 40, 240, 48, 80, 34 };
+static unsigned char ddr4_rtt_nom_table [RTT_NOM_TABLE_COUNT ] = { 0, 4, 2, 6, 1, 5, 3, 7 };
+static unsigned char ddr4_rtt_wr_ohms [RTT_WR_OHMS_COUNT ] = { 0, 120, 240, 99, 80 }; // setting HiZ ohms to 99 for computed vref
+static unsigned char ddr4_dic_ohms [DIC_OHMS_COUNT ] = { 34, 48 };
+static short ddr4_drive_strength[DRIVE_STRENGTH_COUNT] = { 0, 0, 26, 30, 34, 40, 48, 68, 0,0,0,0,0,0,0 };
+static short ddr4_dqx_strength [DRIVE_STRENGTH_COUNT] = { 0, 24, 27, 30, 34, 40, 48, 60, 0,0,0,0,0,0,0 };
+
+impedence_values_t ddr4_impedence_values = {
+ .rodt_ohms = ddr4_rodt_ohms ,
+ .rtt_nom_ohms = ddr4_rtt_nom_ohms ,
+ .rtt_nom_table = ddr4_rtt_nom_table ,
+ .rtt_wr_ohms = ddr4_rtt_wr_ohms ,
+ .dic_ohms = ddr4_dic_ohms ,
+ .drive_strength = ddr4_drive_strength,
+ .dqx_strength = ddr4_dqx_strength ,
+};
+
+static unsigned char ddr3_rodt_ohms [RODT_OHMS_COUNT ] = { 0, 20, 30, 40, 60, 120, 0, 0 };
+static unsigned char ddr3_rtt_nom_ohms [RTT_NOM_OHMS_COUNT ] = { 0, 60, 120, 40, 20, 30, 0, 0 };
+static unsigned char ddr3_rtt_nom_table [RTT_NOM_TABLE_COUNT ] = { 0, 2, 1, 3, 5, 4, 0, 0 };
+static unsigned char ddr3_rtt_wr_ohms [RTT_WR_OHMS_COUNT ] = { 0, 60, 120 };
+static unsigned char ddr3_dic_ohms [DIC_OHMS_COUNT ] = { 40, 34 };
+static short ddr3_drive_strength[DRIVE_STRENGTH_COUNT] = { 0, 24, 27, 30, 34, 40, 48, 60, 0,0,0,0,0,0,0 };
+static impedence_values_t ddr3_impedence_values = {
+ .rodt_ohms = ddr3_rodt_ohms ,
+ .rtt_nom_ohms = ddr3_rtt_nom_ohms ,
+ .rtt_nom_table = ddr3_rtt_nom_table ,
+ .rtt_wr_ohms = ddr3_rtt_wr_ohms ,
+ .dic_ohms = ddr3_dic_ohms ,
+ .drive_strength = ddr3_drive_strength,
+ .dqx_strength = ddr3_drive_strength,
+};
+
+
+uint64_t
+hertz_to_psecs(uint64_t hertz)
+{
+ return divide_nint((uint64_t) 1000*1000*1000*1000, hertz); /* Clock in psecs */
+}
+
+#define DIVIDEND_SCALE 1000 /* Scale to avoid rounding error. */
+uint64_t
+psecs_to_mts(uint64_t psecs)
+{
+ //ddr_print("psecs %ld, divisor %ld\n", psecs, divide_nint((uint64_t)(2 * 1000000 * DIVIDEND_SCALE), psecs));
+ return divide_nint(divide_nint((uint64_t)(2 * 1000000 * DIVIDEND_SCALE), psecs), DIVIDEND_SCALE);
+}
+
+#define WITHIN(v,b,m) (((v)>=((b)-(m)))&&((v)<=((b)+(m))))
+
+// pretty-print version, only works with what comes from the SPD: tCKmin or tCKAVGmin
+unsigned long
+pretty_psecs_to_mts(uint64_t psecs)
+{
+ uint64_t ret = 0; // default to error
+ if (WITHIN(psecs, 1250, 1))
+ ret = 1600;
+ else if (WITHIN(psecs, 1071, 1))
+ ret = 1866;
+ else if (WITHIN(psecs, 937, 1))
+ ret = 2133;
+ else if (WITHIN(psecs, 833, 1))
+ ret = 2400;
+ else if (WITHIN(psecs, 750, 1))
+ ret = 2666;
+ return ret;
+}
+
+uint64_t
+mts_to_hertz(uint64_t mts)
+{
+ return ((mts * 1000 * 1000) / 2);
+}
+
+#define DEBUG_RC3X_COMPUTE 0
+#define rc3x_print(...) \
+ do { if (DEBUG_RC3X_COMPUTE) printf(__VA_ARGS__); } while (0)
+
+static int compute_rc3x (int64_t tclk_psecs)
+{
+ long speed;
+ long tclk_psecs_min, tclk_psecs_max;
+ long data_rate_mhz, data_rate_mhz_min, data_rate_mhz_max;
+ int rc3x;
+
+#define ENCODING_BASE 1240
+
+ data_rate_mhz = psecs_to_mts(tclk_psecs);
+
+ /* 2400 MT/s is a special case. Using integer arithmetic it rounds
+ from 833 psecs to 2401 MT/s. Force it to 2400 to pick the
+ proper setting from the table. */
+ if (tclk_psecs == 833)
+ data_rate_mhz = 2400;
+
+ for (speed = ENCODING_BASE; speed < 3200; speed += 20) {
+ int error = 0;
+
+ tclk_psecs_min = hertz_to_psecs(mts_to_hertz(speed + 00)); /* Clock in psecs */
+ tclk_psecs_max = hertz_to_psecs(mts_to_hertz(speed + 18)); /* Clock in psecs */
+
+ data_rate_mhz_min = psecs_to_mts(tclk_psecs_min);
+ data_rate_mhz_max = psecs_to_mts(tclk_psecs_max);
+
+ /* Force alingment to multiple to avound rounding errors. */
+ data_rate_mhz_min = ((data_rate_mhz_min + 18) / 20) * 20;
+ data_rate_mhz_max = ((data_rate_mhz_max + 18) / 20) * 20;
+
+ error += (speed + 00 != data_rate_mhz_min);
+ error += (speed + 20 != data_rate_mhz_max);
+
+ rc3x = (speed - ENCODING_BASE) / 20;
+
+ rc3x_print("rc3x: %02x speed: %4ld MT/s < f <= %4ld MT/s, psec: %3ld:%3ld %4ld:%4ld %s\n",
+ rc3x,
+ speed, speed + 20,
+ tclk_psecs_min, tclk_psecs_max,
+ data_rate_mhz_min, data_rate_mhz_max,
+ error ? "****" : "");
+
+ if (data_rate_mhz <= (speed + 20)) {
+ rc3x_print("rc3x: %4ld MT/s <= %4ld MT/s\n", data_rate_mhz, speed + 20);
+ break;
+ }
+ }
+ return rc3x;
+}
+
+static const int rlevel_separate_ab = 1;
+
+int init_octeon3_ddr3_interface(bdk_node_t node,
+ const ddr_configuration_t *ddr_configuration,
+ uint32_t ddr_hertz,
+ uint32_t cpu_hertz,
+ uint32_t ddr_ref_hertz,
+ int board_type,
+ int board_rev_maj,
+ int board_rev_min,
+ int ddr_interface_num,
+ uint32_t ddr_interface_mask
+ )
+{
+ const char *s;
+
+ const dimm_odt_config_t *odt_1rank_config = ddr_configuration->odt_1rank_config;
+ const dimm_odt_config_t *odt_2rank_config = ddr_configuration->odt_2rank_config;
+ const dimm_odt_config_t *odt_4rank_config = ddr_configuration->odt_4rank_config;
+ const dimm_config_t *dimm_config_table = ddr_configuration->dimm_config_table;
+ const dimm_odt_config_t *odt_config;
+ const ddr3_custom_config_t *custom_lmc_config = &ddr_configuration->custom_lmc_config;
+ int odt_idx;
+
+ /*
+ ** Compute clock rates to the nearest picosecond.
+ */
+ uint64_t tclk_psecs = hertz_to_psecs(ddr_hertz); /* Clock in psecs */
+ uint64_t eclk_psecs = hertz_to_psecs(cpu_hertz); /* Clock in psecs */
+
+ int row_bits, col_bits, num_banks, num_ranks, dram_width;
+ int dimm_count = 0;
+ int fatal_error = 0; /* Accumulate and report all the errors before giving up */
+
+ int safe_ddr_flag = 0; /* Flag that indicates safe DDR settings should be used */
+ int ddr_interface_64b = 1; /* THUNDER Default: 64bit interface width */
+ int ddr_interface_bytemask;
+ uint32_t mem_size_mbytes = 0;
+ unsigned int didx;
+ int bank_bits = 0;
+ int bunk_enable;
+ int rank_mask;
+ int column_bits_start = 1;
+ int row_lsb;
+ int pbank_lsb;
+ int use_ecc = 1;
+ int mtb_psec = 0; /* quiet */
+ short ftb_Dividend;
+ short ftb_Divisor;
+ int tAAmin;
+ int tCKmin;
+ int CL, min_cas_latency = 0, max_cas_latency = 0, override_cas_latency = 0;
+ int ddr_rtt_nom_auto, ddr_rodt_ctl_auto;
+ int i;
+
+ int spd_addr;
+ int spd_org;
+ int spd_banks;
+ int spd_rdimm;
+ int spd_dimm_type;
+ int spd_ecc;
+ uint32_t spd_cas_latency;
+ int spd_mtb_dividend;
+ int spd_mtb_divisor;
+ int spd_tck_min;
+ int spd_taa_min;
+ int spd_twr;
+ int spd_trcd;
+ int spd_trrd;
+ int spd_trp;
+ int spd_tras;
+ int spd_trc;
+ int spd_trfc;
+ int spd_twtr;
+ int spd_trtp;
+ int spd_tfaw;
+ int spd_addr_mirror;
+ int spd_package = 0;
+ int spd_rawcard = 0;
+ int spd_rawcard_AorB = 0;
+ int is_stacked_die = 0;
+ int disable_stacked_die = 0;
+ int is_3ds_dimm = 0; // 3DS
+ int lranks_per_prank = 1; // 3DS: logical ranks per package rank
+ int lranks_bits = 0; // 3DS: logical ranks bits
+ int die_capacity = 0; // in Mbits; only used for 3DS
+
+ /* FTB values are two's complement ranging from +127 to -128. */
+ typedef signed char SC_t;
+
+ int twr;
+ int trcd;
+ int trrd;
+ int trp;
+ int tras;
+ int trc;
+ int trfc;
+ int twtr;
+ int trtp = 0; /* quiet */
+ int tfaw;
+
+ int wlevel_bitmask_errors = 0;
+ int wlevel_loops;
+ int default_rtt_nom[4];
+ int dyn_rtt_nom_mask = 0;
+
+ ddr_type_t ddr_type;
+ int ddr4_tCKAVGmin = 0; /* quiet */
+ int ddr4_tCKAVGmax = 0; /* quiet */
+ int ddr4_tRCDmin = 0; /* quiet */
+ int ddr4_tRPmin = 0; /* quiet */
+ int ddr4_tRASmin = 0; /* quiet */
+ int ddr4_tRCmin = 0; /* quiet */
+ int ddr4_tRFC1min = 0; /* quiet */
+ int ddr4_tRFC2min = 0; /* quiet */
+ int ddr4_tRFC4min = 0; /* quiet */
+ int ddr4_tFAWmin = 0; /* quiet */
+ int ddr4_tRRD_Smin = 0; /* quiet */
+ int ddr4_tRRD_Lmin;
+ int ddr4_tCCD_Lmin;
+ impedence_values_t *imp_values;
+ int default_rodt_ctl;
+ // default to disabled (ie, LMC restart, not chip reset)
+ int ddr_disable_chip_reset = 1;
+ int disable_deskew_training = 0;
+ const char *dimm_type_name;
+
+ /* Allow the Write bit-deskew feature to be enabled when desired. */
+ // NOTE: THUNDER pass 2.x only, 81xx, 83xx
+ int enable_write_deskew = ENABLE_WRITE_DESKEW_DEFAULT;
+
+#if SWL_TRY_HWL_ALT
+ typedef struct {
+ uint16_t hwl_alt_mask; // mask of bytelanes with alternate
+ uint16_t hwl_alt_delay[9]; // bytelane alternate avail if mask=1
+ } hwl_alt_by_rank_t;
+ hwl_alt_by_rank_t hwl_alts[4];
+ memset(hwl_alts, 0, sizeof(hwl_alts));
+#endif /* SWL_TRY_HWL_ALT */
+
+ bdk_lmcx_config_t lmc_config;
+
+ /* Initialize these to shut up the compiler. They are configured
+ and used only for DDR4 */
+ ddr4_tRRD_Lmin = 6000;
+ ddr4_tCCD_Lmin = 6000;
+
+ ddr_print("\nInitializing node %d DDR interface %d, DDR Clock %d, DDR Reference Clock %d\n",
+ node, ddr_interface_num, ddr_hertz, ddr_ref_hertz);
+
+ if (dimm_config_table[0].spd_addr == 0 && !dimm_config_table[0].spd_ptr) {
+ error_print("ERROR: No dimms specified in the dimm_config_table.\n");
+ return (-1);
+ }
+
+ // allow some overrides to be done
+
+ // this one controls whether chip RESET is done, or LMC init restarted from step 6.9.6
+ if ((s = lookup_env_parameter("ddr_disable_chip_reset")) != NULL) {
+ ddr_disable_chip_reset = !!strtoul(s, NULL, 0);
+ }
+ // this one controls whether Deskew Training is performed
+ if ((s = lookup_env_parameter("ddr_disable_deskew_training")) != NULL) {
+ disable_deskew_training = !!strtoul(s, NULL, 0);
+ }
+ // this one is in Validate_Read_Deskew_Training and controls a preliminary delay
+ if ((s = lookup_env_parameter("ddr_deskew_validation_delay")) != NULL) {
+ deskew_validation_delay = strtoul(s, NULL, 0);
+ }
+ // this one is in Perform_Read_Deskew_Training and controls lock retries
+ if ((s = lookup_env_parameter("ddr_lock_retries")) != NULL) {
+ default_lock_retry_limit = strtoul(s, NULL, 0);
+ }
+ // this one controls whether stacked die status can affect processing
+ // disabling it will affect computed vref adjustment, and rodt_row_skip_mask
+ if ((s = lookup_env_parameter("ddr_disable_stacked_die")) != NULL) {
+ disable_stacked_die = !!strtoul(s, NULL, 0);
+ }
+
+ // setup/override for write bit-deskew feature
+ if (! CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X)) { // added 81xx and 83xx
+ // FIXME: allow override
+ if ((s = lookup_env_parameter("ddr_enable_write_deskew")) != NULL) {
+ enable_write_deskew = !!strtoul(s, NULL, 0);
+ } // else take default setting
+ } else { // not pass 2.x
+ enable_write_deskew = 0; // force disabled
+ }
+
+#if 0 // FIXME: do we really need this anymore?
+ if (dram_is_verbose(VBL_NORM)) {
+ printf("DDR SPD Table:");
+ for (didx = 0; didx < DDR_CFG_T_MAX_DIMMS; ++didx) {
+ if (dimm_config_table[didx].spd_addr == 0) break;
+ printf(" --ddr%dspd=0x%02x", ddr_interface_num, dimm_config_table[didx].spd_addr);
+ }
+ printf("\n");
+ }
+#endif
+
+ /*
+ ** Walk the DRAM Socket Configuration Table to see what is installed.
+ */
+ for (didx = 0; didx < DDR_CFG_T_MAX_DIMMS; ++didx)
+ {
+ /* Check for lower DIMM socket populated */
+ if (validate_dimm(node, &dimm_config_table[didx]) == 1) {
+ // NOTE: DIMM info printing is now done later when more details are available
+ ++dimm_count;
+ } else { break; } /* Finished when there is no lower DIMM */
+ }
+
+
+ initialize_ddr_clock(node,
+ ddr_configuration,
+ cpu_hertz,
+ ddr_hertz,
+ ddr_ref_hertz,
+ ddr_interface_num,
+ ddr_interface_mask);
+
+ if (!odt_1rank_config)
+ odt_1rank_config = disable_odt_config;
+ if (!odt_2rank_config)
+ odt_2rank_config = disable_odt_config;
+ if (!odt_4rank_config)
+ odt_4rank_config = disable_odt_config;
+
+ if ((s = lookup_env_parameter("ddr_safe")) != NULL) {
+ safe_ddr_flag = !!strtoul(s, NULL, 0);
+ }
+
+
+ if (dimm_count == 0) {
+ error_print("ERROR: DIMM 0 not detected.\n");
+ return(-1);
+ }
+
+ // look for 32-bit mode specified in the config
+ if (custom_lmc_config->mode32b) {
+ ddr_interface_64b = 0;
+ }
+
+ if (ddr_interface_64b == 0) { // check if 32-bit mode is bad
+ if (!CAVIUM_IS_MODEL(CAVIUM_CN81XX)) {
+ error_print("32-bit interface width is NOT supported for this Thunder model\n");
+ ddr_interface_64b = 1; // force to 64-bit
+ }
+ } else { // check if 64-bit mode is bad
+ if (CAVIUM_IS_MODEL(CAVIUM_CN81XX)) { // check the fuses on 81XX for forced 32-bit mode
+ BDK_CSR_INIT(mio_fus_dat2, node, BDK_MIO_FUS_DAT2);
+ if (mio_fus_dat2.s.lmc_mode32) {
+ error_print("32-bit interface width is ONLY supported for this Thunder model\n");
+ ddr_interface_64b = 0; // force to 32-bit
+ }
+ }
+ }
+
+ // finally, say we are in 32-bit mode when it has been validated
+ if (ddr_interface_64b == 0) {
+ ddr_print("N%d.LMC%d: Setting 32-bit data width\n",
+ node, ddr_interface_num);
+ }
+
+ /* ddr_type only indicates DDR4 or DDR3 */
+ ddr_type = get_ddr_type(node, &dimm_config_table[0]);
+ debug_print("DRAM Device Type: DDR%d\n", ddr_type);
+
+ spd_dimm_type = get_dimm_module_type(node, &dimm_config_table[0], ddr_type);
+
+ if (ddr_type == DDR4_DRAM) {
+ int spd_module_type;
+ int asymmetric;
+ const char *signal_load[4] = {"", "MLS", "3DS", "RSV"};
+
+ imp_values = &ddr4_impedence_values;
+ dimm_type_name = ddr4_dimm_types[spd_dimm_type];
+
+ spd_addr = read_spd(node, &dimm_config_table[0], DDR4_SPD_ADDRESSING_ROW_COL_BITS);
+ spd_org = read_spd(node, &dimm_config_table[0], DDR4_SPD_MODULE_ORGANIZATION);
+ spd_banks = 0xFF & read_spd(node, &dimm_config_table[0], DDR4_SPD_DENSITY_BANKS);
+
+ bank_bits = (2 + ((spd_banks >> 4) & 0x3)) + ((spd_banks >> 6) & 0x3);
+ bank_bits = min((int)bank_bits, 4); /* Controller can only address 4 bits. */
+
+ spd_package = 0XFF & read_spd(node, &dimm_config_table[0], DDR4_SPD_PACKAGE_TYPE);
+ if (spd_package & 0x80) { // non-monolithic device
+ is_stacked_die = (!disable_stacked_die) ? ((spd_package & 0x73) == 0x11) : 0;
+ ddr_print("DDR4: Package Type 0x%x (%s), %d die\n", spd_package,
+ signal_load[(spd_package & 3)], ((spd_package >> 4) & 7) + 1);
+ is_3ds_dimm = ((spd_package & 3) == 2); // is it 3DS?
+ if (is_3ds_dimm) { // is it 3DS?
+ lranks_per_prank = ((spd_package >> 4) & 7) + 1;
+ // FIXME: should make sure it is only 2H or 4H or 8H?
+ lranks_bits = lranks_per_prank >> 1;
+ if (lranks_bits == 4) lranks_bits = 3;
+ }
+ } else if (spd_package != 0) {
+ // FIXME: print non-zero monolithic device definition
+ ddr_print("DDR4: Package Type MONOLITHIC: %d die, signal load %d\n",
+ ((spd_package >> 4) & 7) + 1, (spd_package & 3));
+ }
+
+ asymmetric = (spd_org >> 6) & 1;
+ if (asymmetric) {
+ int spd_secondary_pkg = read_spd(node, &dimm_config_table[0],
+ DDR4_SPD_SECONDARY_PACKAGE_TYPE);
+ ddr_print("DDR4: Module Organization: ASYMMETRICAL: Secondary Package Type 0x%x\n",
+ spd_secondary_pkg);
+ } else {
+ uint64_t bus_width = 8 << (0x07 & read_spd(node, &dimm_config_table[0],
+ DDR4_SPD_MODULE_MEMORY_BUS_WIDTH));
+ uint64_t ddr_width = 4 << ((spd_org >> 0) & 0x7);
+ uint64_t module_cap;
+ int shift = (spd_banks & 0x0F);
+ die_capacity = (shift < 8) ? (256UL << shift) : ((12UL << (shift & 1)) << 10);
+ ddr_print("DDR4: Module Organization: SYMMETRICAL: capacity per die %d %cbit\n",
+ (die_capacity > 512) ? (die_capacity >> 10) : die_capacity,
+ (die_capacity > 512) ? 'G' : 'M');
+ module_cap = ((uint64_t)die_capacity << 20) / 8UL * bus_width / ddr_width *
+ /* no. pkg ranks*/(1UL + ((spd_org >> 3) & 0x7));
+ if (is_3ds_dimm) // is it 3DS?
+ module_cap *= /* die_count */(uint64_t)(((spd_package >> 4) & 7) + 1);
+ ddr_print("DDR4: Module Organization: SYMMETRICAL: capacity per module %ld GB\n",
+ module_cap >> 30);
+ }
+
+ spd_rawcard = 0xFF & read_spd(node, &dimm_config_table[0], DDR4_SPD_REFERENCE_RAW_CARD);
+ ddr_print("DDR4: Reference Raw Card 0x%x \n", spd_rawcard);
+
+ spd_module_type = read_spd(node, &dimm_config_table[0], DDR4_SPD_KEY_BYTE_MODULE_TYPE);
+ if (spd_module_type & 0x80) { // HYBRID module
+ ddr_print("DDR4: HYBRID module, type %s\n",
+ ((spd_module_type & 0x70) == 0x10) ? "NVDIMM" : "UNKNOWN");
+ }
+
+ spd_dimm_type = spd_module_type & 0x0F;
+ spd_rdimm = (spd_dimm_type == 1) || (spd_dimm_type == 5) || (spd_dimm_type == 8);
+ if (spd_rdimm) {
+ int spd_mfgr_id = read_spd(node, &dimm_config_table[0], DDR4_SPD_REGISTER_MANUFACTURER_ID_LSB) |
+ (read_spd(node, &dimm_config_table[0], DDR4_SPD_REGISTER_MANUFACTURER_ID_MSB) << 8);
+ int spd_register_rev = read_spd(node, &dimm_config_table[0], DDR4_SPD_REGISTER_REVISION_NUMBER);
+ ddr_print("DDR4: RDIMM Register Manufacturer ID 0x%x Revision 0x%x\n",
+ spd_mfgr_id, spd_register_rev);
+
+ // RAWCARD A or B must be bit 7=0 and bits 4-0 either 00000(A) or 00001(B)
+ spd_rawcard_AorB = ((spd_rawcard & 0x9fUL) <= 1);
+ }
+ } else {
+ imp_values = &ddr3_impedence_values;
+ dimm_type_name = ddr3_dimm_types[spd_dimm_type];
+
+ spd_addr = read_spd(node, &dimm_config_table[0], DDR3_SPD_ADDRESSING_ROW_COL_BITS);
+ spd_org = read_spd(node, &dimm_config_table[0], DDR3_SPD_MODULE_ORGANIZATION);
+ spd_banks = read_spd(node, &dimm_config_table[0], DDR3_SPD_DENSITY_BANKS) & 0xff;
+
+ bank_bits = 3 + ((spd_banks >> 4) & 0x7);
+ bank_bits = min((int)bank_bits, 3); /* Controller can only address 3 bits. */
+
+ spd_rdimm = (spd_dimm_type == 1) || (spd_dimm_type == 5) || (spd_dimm_type == 9);
+ }
+
+#if 0 // FIXME: why should this be possible OR needed?
+ if ((s = lookup_env_parameter("ddr_rdimm_ena")) != NULL) {
+ spd_rdimm = !!strtoul(s, NULL, 0);
+ }
+#endif
+
+ debug_print("spd_addr : %#06x\n", spd_addr );
+ debug_print("spd_org : %#06x\n", spd_org );
+ debug_print("spd_banks : %#06x\n", spd_banks );
+
+ row_bits = 12 + ((spd_addr >> 3) & 0x7);
+ col_bits = 9 + ((spd_addr >> 0) & 0x7);
+
+ num_ranks = 1 + ((spd_org >> 3) & 0x7);
+ dram_width = 4 << ((spd_org >> 0) & 0x7);
+ num_banks = 1 << bank_bits;
+
+ if ((s = lookup_env_parameter("ddr_num_ranks")) != NULL) {
+ num_ranks = strtoul(s, NULL, 0);
+ }
+
+ /* FIX
+ ** Check that values are within some theoretical limits.
+ ** col_bits(min) = row_lsb(min) - bank_bits(max) - bus_bits(max) = 14 - 3 - 4 = 7
+ ** col_bits(max) = row_lsb(max) - bank_bits(min) - bus_bits(min) = 18 - 2 - 3 = 13
+ */
+ if ((col_bits > 13) || (col_bits < 7)) {
+ error_print("Unsupported number of Col Bits: %d\n", col_bits);
+ ++fatal_error;
+ }
+
+ /* FIX
+ ** Check that values are within some theoretical limits.
+ ** row_bits(min) = pbank_lsb(min) - row_lsb(max) - rank_bits = 26 - 18 - 1 = 7
+ ** row_bits(max) = pbank_lsb(max) - row_lsb(min) - rank_bits = 33 - 14 - 1 = 18
+ */
+ if ((row_bits > 18) || (row_bits < 7)) {
+ error_print("Unsupported number of Row Bits: %d\n", row_bits);
+ ++fatal_error;
+ }
+
+ if (bdk_is_platform(BDK_PLATFORM_ASIM))
+ wlevel_loops = 0;
+ else {
+ wlevel_loops = WLEVEL_LOOPS_DEFAULT;
+ // accept generic or interface-specific override but not for ASIM...
+ if ((s = lookup_env_parameter("ddr_wlevel_loops")) == NULL)
+ s = lookup_env_parameter("ddr%d_wlevel_loops", ddr_interface_num);
+ if (s != NULL) {
+ wlevel_loops = strtoul(s, NULL, 0);
+ }
+ }
+
+ bunk_enable = (num_ranks > 1);
+
+ column_bits_start = 3;
+
+ row_lsb = column_bits_start + col_bits + bank_bits - (! ddr_interface_64b);
+ debug_print("row_lsb = column_bits_start + col_bits + bank_bits = %d\n", row_lsb);
+
+ pbank_lsb = row_lsb + row_bits + bunk_enable;
+ debug_print("pbank_lsb = row_lsb + row_bits + bunk_enable = %d\n", pbank_lsb);
+
+ if (lranks_per_prank > 1) {
+ pbank_lsb = row_lsb + row_bits + lranks_bits + bunk_enable;
+ ddr_print("DDR4: 3DS: pbank_lsb = (%d row_lsb) + (%d row_bits) + (%d lranks_bits) + (%d bunk_enable) = %d\n",
+ row_lsb, row_bits, lranks_bits, bunk_enable, pbank_lsb);
+ }
+
+ mem_size_mbytes = dimm_count * ((1ull << pbank_lsb) >> 20);
+ if (num_ranks == 4) {
+ /* Quad rank dimm capacity is equivalent to two dual-rank dimms. */
+ mem_size_mbytes *= 2;
+ }
+
+ /* Mask with 1 bits set for for each active rank, allowing 2 bits per dimm.
+ ** This makes later calculations simpler, as a variety of CSRs use this layout.
+ ** This init needs to be updated for dual configs (ie non-identical DIMMs).
+ ** Bit 0 = dimm0, rank 0
+ ** Bit 1 = dimm0, rank 1
+ ** Bit 2 = dimm1, rank 0
+ ** Bit 3 = dimm1, rank 1
+ ** ...
+ */
+ rank_mask = 0x1;
+ if (num_ranks > 1)
+ rank_mask = 0x3;
+ if (num_ranks > 2)
+ rank_mask = 0xf;
+
+ for (i = 1; i < dimm_count; i++)
+ rank_mask |= ((rank_mask & 0x3) << (2*i));
+
+
+#ifdef CAVIUM_ONLY
+ /* Special request: mismatched DIMM support. Slot 0: 2-Rank, Slot 1: 1-Rank */
+ if (0)
+ {
+ /*
+ ** Calculate the total memory size in terms of the total
+ ** number of ranks instead of the number of dimms. The usual
+ ** requirement is for both dimms to be identical. This check
+ ** works around that requirement to allow one exception. The
+ ** dimm in the second slot may now have fewer ranks than the
+ ** first slot.
+ */
+ int spd_org_dimm1;
+ int num_ranks_dimm1;
+ int rank_count;
+ int rank_mask_dimm1;
+
+ if (dimm_count > 1) {
+ spd_org_dimm1 = read_spd(node, &dimm_config_table[1] /* dimm 1*/,
+ DDR3_SPD_MODULE_ORGANIZATION);
+ num_ranks_dimm1 = 1 + ((spd_org_dimm1 >> 3) & 0x7);
+ rank_count = num_ranks/* dimm 0 */ + num_ranks_dimm1 /* dimm 1 */;
+
+ if (num_ranks != num_ranks_dimm1) {
+ mem_size_mbytes = rank_count * ((1ull << (pbank_lsb-bunk_enable)) >> 20);
+ rank_mask = 1 | ((num_ranks > 1) << 1);
+ rank_mask_dimm1 = 1 | ((num_ranks_dimm1 > 1) << 1);
+ rank_mask |= ((rank_mask_dimm1 & 0x3) << 2);
+ ddr_print("DIMM 1 - ranks: %d, size: %d MB\n",
+ num_ranks_dimm1, num_ranks_dimm1 * ((1ull << (pbank_lsb-bunk_enable)) >> 20));
+ }
+ }
+ }
+#endif /* CAVIUM_ONLY */
+
+ spd_ecc = get_dimm_ecc(node, &dimm_config_table[0], ddr_type);
+
+ VB_PRT(VBL_DEV, "Summary: - %d %s%s %dRx%d %s, row bits=%d, col bits=%d, bank bits=%d\n",
+ dimm_count, dimm_type_name, (dimm_count > 1) ? "s" : "",
+ num_ranks, dram_width, (spd_ecc) ? "ECC" : "non-ECC",
+ row_bits, col_bits, bank_bits);
+
+ // always print out the useful DIMM information...
+ for (i = 0; i < DDR_CFG_T_MAX_DIMMS; i++) {
+ if (i < dimm_count)
+ report_dimm(node, &dimm_config_table[i], i, ddr_interface_num,
+ num_ranks, dram_width, mem_size_mbytes / dimm_count);
+ else
+ if (validate_dimm(node, &dimm_config_table[i]) == 0) // only if there is a slot
+ printf("N%d.LMC%d.DIMM%d: Not Present\n", node, ddr_interface_num, i);
+ }
+
+ if (ddr_type == DDR4_DRAM) {
+ spd_cas_latency = ((0xff & read_spd(node, &dimm_config_table[0], DDR4_SPD_CAS_LATENCIES_BYTE0)) << 0);
+ spd_cas_latency |= ((0xff & read_spd(node, &dimm_config_table[0], DDR4_SPD_CAS_LATENCIES_BYTE1)) << 8);
+ spd_cas_latency |= ((0xff & read_spd(node, &dimm_config_table[0], DDR4_SPD_CAS_LATENCIES_BYTE2)) << 16);
+ spd_cas_latency |= ((0xff & read_spd(node, &dimm_config_table[0], DDR4_SPD_CAS_LATENCIES_BYTE3)) << 24);
+ } else {
+ spd_cas_latency = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_CAS_LATENCIES_LSB);
+ spd_cas_latency |= ((0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_CAS_LATENCIES_MSB)) << 8);
+ }
+ debug_print("spd_cas_latency : %#06x\n", spd_cas_latency );
+
+ if (ddr_type == DDR4_DRAM) {
+
+ /* No other values for DDR4 MTB and FTB are specified at the
+ * current time so don't bother reading them. Can't speculate how
+ * new values will be represented.
+ */
+ int spdMTB = 125;
+ int spdFTB = 1;
+
+ tAAmin
+ = spdMTB * read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_CAS_LATENCY_TAAMIN)
+ + spdFTB * (SC_t) read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_CAS_LATENCY_FINE_TAAMIN);
+
+ ddr4_tCKAVGmin
+ = spdMTB * read_spd(node, &dimm_config_table[0], DDR4_SPD_MINIMUM_CYCLE_TIME_TCKAVGMIN)
+ + spdFTB * (SC_t) read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_CYCLE_TIME_FINE_TCKAVGMIN);
+
+ ddr4_tCKAVGmax
+ = spdMTB * read_spd(node, &dimm_config_table[0], DDR4_SPD_MAXIMUM_CYCLE_TIME_TCKAVGMAX)
+ + spdFTB * (SC_t) read_spd(node, &dimm_config_table[0], DDR4_SPD_MAX_CYCLE_TIME_FINE_TCKAVGMAX);
+
+ ddr4_tRCDmin
+ = spdMTB * read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_RAS_CAS_DELAY_TRCDMIN)
+ + spdFTB * (SC_t) read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_RAS_TO_CAS_DELAY_FINE_TRCDMIN);
+
+ ddr4_tRPmin
+ = spdMTB * read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_ROW_PRECHARGE_DELAY_TRPMIN)
+ + spdFTB * (SC_t) read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_ROW_PRECHARGE_DELAY_FINE_TRPMIN);
+
+ ddr4_tRASmin
+ = spdMTB * (((read_spd(node, &dimm_config_table[0], DDR4_SPD_UPPER_NIBBLES_TRAS_TRC) & 0xf) << 8) +
+ ( read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_ACTIVE_PRECHARGE_LSB_TRASMIN) & 0xff));
+
+ ddr4_tRCmin
+ = spdMTB * ((((read_spd(node, &dimm_config_table[0], DDR4_SPD_UPPER_NIBBLES_TRAS_TRC) >> 4) & 0xf) << 8) +
+ ( read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_ACTIVE_REFRESH_LSB_TRCMIN) & 0xff))
+ + spdFTB * (SC_t) read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_ACT_TO_ACT_REFRESH_DELAY_FINE_TRCMIN);
+
+ ddr4_tRFC1min
+ = spdMTB * (((read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_REFRESH_RECOVERY_MSB_TRFC1MIN) & 0xff) << 8) +
+ ( read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_REFRESH_RECOVERY_LSB_TRFC1MIN) & 0xff));
+
+ ddr4_tRFC2min
+ = spdMTB * (((read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_REFRESH_RECOVERY_MSB_TRFC2MIN) & 0xff) << 8) +
+ ( read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_REFRESH_RECOVERY_LSB_TRFC2MIN) & 0xff));
+
+ ddr4_tRFC4min
+ = spdMTB * (((read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_REFRESH_RECOVERY_MSB_TRFC4MIN) & 0xff) << 8) +
+ ( read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_REFRESH_RECOVERY_LSB_TRFC4MIN) & 0xff));
+
+ ddr4_tFAWmin
+ = spdMTB * (((read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_FOUR_ACTIVE_WINDOW_MSN_TFAWMIN) & 0xf) << 8) +
+ ( read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_FOUR_ACTIVE_WINDOW_LSB_TFAWMIN) & 0xff));
+
+ ddr4_tRRD_Smin
+ = spdMTB * read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_ROW_ACTIVE_DELAY_SAME_TRRD_SMIN)
+ + spdFTB * (SC_t) read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_ACT_TO_ACT_DELAY_DIFF_FINE_TRRD_SMIN);
+
+ ddr4_tRRD_Lmin
+ = spdMTB * read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_ROW_ACTIVE_DELAY_DIFF_TRRD_LMIN)
+ + spdFTB * (SC_t) read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_ACT_TO_ACT_DELAY_SAME_FINE_TRRD_LMIN);
+
+ ddr4_tCCD_Lmin
+ = spdMTB * read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_CAS_TO_CAS_DELAY_TCCD_LMIN)
+ + spdFTB * (SC_t) read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_CAS_TO_CAS_DELAY_FINE_TCCD_LMIN);
+
+ ddr_print("%-45s : %6d ps\n", "Medium Timebase (MTB)", spdMTB);
+ ddr_print("%-45s : %6d ps\n", "Fine Timebase (FTB)", spdFTB);
+
+ #define DDR4_TWR 15000
+ #define DDR4_TWTR_S 2500
+
+
+ tCKmin = ddr4_tCKAVGmin;
+ twr = DDR4_TWR;
+ trcd = ddr4_tRCDmin;
+ trrd = ddr4_tRRD_Smin;
+ trp = ddr4_tRPmin;
+ tras = ddr4_tRASmin;
+ trc = ddr4_tRCmin;
+ trfc = ddr4_tRFC1min;
+ twtr = DDR4_TWTR_S;
+ tfaw = ddr4_tFAWmin;
+
+ if (spd_rdimm) {
+ spd_addr_mirror = read_spd(node, &dimm_config_table[0], DDR4_SPD_RDIMM_ADDR_MAPPING_FROM_REGISTER_TO_DRAM) & 0x1;
+ } else {
+ spd_addr_mirror = read_spd(node, &dimm_config_table[0], DDR4_SPD_UDIMM_ADDR_MAPPING_FROM_EDGE) & 0x1;
+ }
+ debug_print("spd_addr_mirror : %#06x\n", spd_addr_mirror );
+
+ } else { /* if (ddr_type == DDR4_DRAM) */
+ spd_mtb_dividend = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_MEDIUM_TIMEBASE_DIVIDEND);
+ spd_mtb_divisor = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_MEDIUM_TIMEBASE_DIVISOR);
+ spd_tck_min = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_MINIMUM_CYCLE_TIME_TCKMIN);
+ spd_taa_min = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_MIN_CAS_LATENCY_TAAMIN);
+
+ spd_twr = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_MIN_WRITE_RECOVERY_TWRMIN);
+ spd_trcd = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_MIN_RAS_CAS_DELAY_TRCDMIN);
+ spd_trrd = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_MIN_ROW_ACTIVE_DELAY_TRRDMIN);
+ spd_trp = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_MIN_ROW_PRECHARGE_DELAY_TRPMIN);
+ spd_tras = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_MIN_ACTIVE_PRECHARGE_LSB_TRASMIN);
+ spd_tras |= ((0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_UPPER_NIBBLES_TRAS_TRC)&0xf) << 8);
+ spd_trc = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_MIN_ACTIVE_REFRESH_LSB_TRCMIN);
+ spd_trc |= ((0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_UPPER_NIBBLES_TRAS_TRC)&0xf0) << 4);
+ spd_trfc = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_MIN_REFRESH_RECOVERY_LSB_TRFCMIN);
+ spd_trfc |= ((0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_MIN_REFRESH_RECOVERY_MSB_TRFCMIN)) << 8);
+ spd_twtr = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_MIN_INTERNAL_WRITE_READ_CMD_TWTRMIN);
+ spd_trtp = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_MIN_INTERNAL_READ_PRECHARGE_CMD_TRTPMIN);
+ spd_tfaw = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_MIN_FOUR_ACTIVE_WINDOW_TFAWMIN);
+ spd_tfaw |= ((0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_UPPER_NIBBLE_TFAW)&0xf) << 8);
+ spd_addr_mirror = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_ADDRESS_MAPPING) & 0x1;
+ spd_addr_mirror = spd_addr_mirror && !spd_rdimm; /* Only address mirror unbuffered dimms. */
+ ftb_Dividend = read_spd(node, &dimm_config_table[0], DDR3_SPD_FINE_TIMEBASE_DIVIDEND_DIVISOR) >> 4;
+ ftb_Divisor = read_spd(node, &dimm_config_table[0], DDR3_SPD_FINE_TIMEBASE_DIVIDEND_DIVISOR) & 0xf;
+ ftb_Divisor = (ftb_Divisor == 0) ? 1 : ftb_Divisor; /* Make sure that it is not 0 */
+
+ debug_print("spd_twr : %#06x\n", spd_twr );
+ debug_print("spd_trcd : %#06x\n", spd_trcd);
+ debug_print("spd_trrd : %#06x\n", spd_trrd);
+ debug_print("spd_trp : %#06x\n", spd_trp );
+ debug_print("spd_tras : %#06x\n", spd_tras);
+ debug_print("spd_trc : %#06x\n", spd_trc );
+ debug_print("spd_trfc : %#06x\n", spd_trfc);
+ debug_print("spd_twtr : %#06x\n", spd_twtr);
+ debug_print("spd_trtp : %#06x\n", spd_trtp);
+ debug_print("spd_tfaw : %#06x\n", spd_tfaw);
+ debug_print("spd_addr_mirror : %#06x\n", spd_addr_mirror);
+
+ mtb_psec = spd_mtb_dividend * 1000 / spd_mtb_divisor;
+ tAAmin = mtb_psec * spd_taa_min;
+ tAAmin += ftb_Dividend * (SC_t) read_spd(node, &dimm_config_table[0], DDR3_SPD_MIN_CAS_LATENCY_FINE_TAAMIN) / ftb_Divisor;
+ tCKmin = mtb_psec * spd_tck_min;
+ tCKmin += ftb_Dividend * (SC_t) read_spd(node, &dimm_config_table[0], DDR3_SPD_MINIMUM_CYCLE_TIME_FINE_TCKMIN) / ftb_Divisor;
+
+ twr = spd_twr * mtb_psec;
+ trcd = spd_trcd * mtb_psec;
+ trrd = spd_trrd * mtb_psec;
+ trp = spd_trp * mtb_psec;
+ tras = spd_tras * mtb_psec;
+ trc = spd_trc * mtb_psec;
+ trfc = spd_trfc * mtb_psec;
+ twtr = spd_twtr * mtb_psec;
+ trtp = spd_trtp * mtb_psec;
+ tfaw = spd_tfaw * mtb_psec;
+
+ } /* if (ddr_type == DDR4_DRAM) */
+
+ if (ddr_type == DDR4_DRAM) {
+ ddr_print("%-45s : %6d ps (%ld MT/s)\n", "SDRAM Minimum Cycle Time (tCKAVGmin)",ddr4_tCKAVGmin,
+ pretty_psecs_to_mts(ddr4_tCKAVGmin));
+ ddr_print("%-45s : %6d ps\n", "SDRAM Maximum Cycle Time (tCKAVGmax)", ddr4_tCKAVGmax);
+ ddr_print("%-45s : %6d ps\n", "Minimum CAS Latency Time (tAAmin)", tAAmin);
+ ddr_print("%-45s : %6d ps\n", "Minimum RAS to CAS Delay Time (tRCDmin)", ddr4_tRCDmin);
+ ddr_print("%-45s : %6d ps\n", "Minimum Row Precharge Delay Time (tRPmin)", ddr4_tRPmin);
+ ddr_print("%-45s : %6d ps\n", "Minimum Active to Precharge Delay (tRASmin)", ddr4_tRASmin);
+ ddr_print("%-45s : %6d ps\n", "Minimum Active to Active/Refr. Delay (tRCmin)", ddr4_tRCmin);
+ ddr_print("%-45s : %6d ps\n", "Minimum Refresh Recovery Delay (tRFC1min)", ddr4_tRFC1min);
+ ddr_print("%-45s : %6d ps\n", "Minimum Refresh Recovery Delay (tRFC2min)", ddr4_tRFC2min);
+ ddr_print("%-45s : %6d ps\n", "Minimum Refresh Recovery Delay (tRFC4min)", ddr4_tRFC4min);
+ ddr_print("%-45s : %6d ps\n", "Minimum Four Activate Window Time (tFAWmin)", ddr4_tFAWmin);
+ ddr_print("%-45s : %6d ps\n", "Minimum Act. to Act. Delay (tRRD_Smin)", ddr4_tRRD_Smin);
+ ddr_print("%-45s : %6d ps\n", "Minimum Act. to Act. Delay (tRRD_Lmin)", ddr4_tRRD_Lmin);
+ ddr_print("%-45s : %6d ps\n", "Minimum CAS to CAS Delay Time (tCCD_Lmin)", ddr4_tCCD_Lmin);
+ } else {
+ ddr_print("Medium Timebase (MTB) : %6d ps\n", mtb_psec);
+ ddr_print("Minimum Cycle Time (tCKmin) : %6d ps (%ld MT/s)\n", tCKmin,
+ pretty_psecs_to_mts(tCKmin));
+ ddr_print("Minimum CAS Latency Time (tAAmin) : %6d ps\n", tAAmin);
+ ddr_print("Write Recovery Time (tWR) : %6d ps\n", twr);
+ ddr_print("Minimum RAS to CAS delay (tRCD) : %6d ps\n", trcd);
+ ddr_print("Minimum Row Active to Row Active delay (tRRD) : %6d ps\n", trrd);
+ ddr_print("Minimum Row Precharge Delay (tRP) : %6d ps\n", trp);
+ ddr_print("Minimum Active to Precharge (tRAS) : %6d ps\n", tras);
+ ddr_print("Minimum Active to Active/Refresh Delay (tRC) : %6d ps\n", trc);
+ ddr_print("Minimum Refresh Recovery Delay (tRFC) : %6d ps\n", trfc);
+ ddr_print("Internal write to read command delay (tWTR) : %6d ps\n", twtr);
+ ddr_print("Min Internal Rd to Precharge Cmd Delay (tRTP) : %6d ps\n", trtp);
+ ddr_print("Minimum Four Activate Window Delay (tFAW) : %6d ps\n", tfaw);
+ }
+
+
+ /* When the cycle time is within 1 psec of the minimum accept it
+ as a slight rounding error and adjust it to exactly the minimum
+ cycle time. This avoids an unnecessary warning. */
+ if (_abs(tclk_psecs - tCKmin) < 2)
+ tclk_psecs = tCKmin;
+
+ if (tclk_psecs < (uint64_t)tCKmin) {
+ ddr_print("WARNING!!!!: DDR Clock Rate (tCLK: %ld) exceeds DIMM specifications (tCKmin: %ld)!!!!\n",
+ tclk_psecs, (uint64_t)tCKmin);
+ }
+
+
+ ddr_print("DDR Clock Rate (tCLK) : %6lu ps\n", tclk_psecs);
+ ddr_print("Core Clock Rate (eCLK) : %6lu ps\n", eclk_psecs);
+
+ if ((s = lookup_env_parameter("ddr_use_ecc")) != NULL) {
+ use_ecc = !!strtoul(s, NULL, 0);
+ }
+ use_ecc = use_ecc && spd_ecc;
+
+ ddr_interface_bytemask = ddr_interface_64b
+ ? (use_ecc ? 0x1ff : 0xff)
+ : (use_ecc ? 0x01f : 0x0f); // FIXME? 81xx does diff from 70xx
+
+ ddr_print("DRAM Interface width: %d bits %s bytemask 0x%x\n",
+ ddr_interface_64b ? 64 : 32, use_ecc ? "+ECC" : "",
+ ddr_interface_bytemask);
+
+ ddr_print("\n------ Board Custom Configuration Settings ------\n");
+ ddr_print("%-45s : %d\n", "MIN_RTT_NOM_IDX ", custom_lmc_config->min_rtt_nom_idx);
+ ddr_print("%-45s : %d\n", "MAX_RTT_NOM_IDX ", custom_lmc_config->max_rtt_nom_idx);
+ ddr_print("%-45s : %d\n", "MIN_RODT_CTL ", custom_lmc_config->min_rodt_ctl);
+ ddr_print("%-45s : %d\n", "MAX_RODT_CTL ", custom_lmc_config->max_rodt_ctl);
+ ddr_print("%-45s : %d\n", "MIN_CAS_LATENCY ", custom_lmc_config->min_cas_latency);
+ ddr_print("%-45s : %d\n", "OFFSET_EN ", custom_lmc_config->offset_en);
+ ddr_print("%-45s : %d\n", "OFFSET_UDIMM ", custom_lmc_config->offset_udimm);
+ ddr_print("%-45s : %d\n", "OFFSET_RDIMM ", custom_lmc_config->offset_rdimm);
+ ddr_print("%-45s : %d\n", "DDR_RTT_NOM_AUTO ", custom_lmc_config->ddr_rtt_nom_auto);
+ ddr_print("%-45s : %d\n", "DDR_RODT_CTL_AUTO ", custom_lmc_config->ddr_rodt_ctl_auto);
+ if (spd_rdimm)
+ ddr_print("%-45s : %d\n", "RLEVEL_COMP_OFFSET", custom_lmc_config->rlevel_comp_offset_rdimm);
+ else
+ ddr_print("%-45s : %d\n", "RLEVEL_COMP_OFFSET", custom_lmc_config->rlevel_comp_offset_udimm);
+ ddr_print("%-45s : %d\n", "RLEVEL_COMPUTE ", custom_lmc_config->rlevel_compute);
+ ddr_print("%-45s : %d\n", "DDR2T_UDIMM ", custom_lmc_config->ddr2t_udimm);
+ ddr_print("%-45s : %d\n", "DDR2T_RDIMM ", custom_lmc_config->ddr2t_rdimm);
+ ddr_print("%-45s : %d\n", "FPRCH2 ", custom_lmc_config->fprch2);
+ ddr_print("-------------------------------------------------\n");
+
+
+ CL = divide_roundup(tAAmin, tclk_psecs);
+
+ ddr_print("Desired CAS Latency : %6d\n", CL);
+
+ min_cas_latency = custom_lmc_config->min_cas_latency;
+
+
+ if ((s = lookup_env_parameter("ddr_min_cas_latency")) != NULL) {
+ min_cas_latency = strtoul(s, NULL, 0);
+ }
+
+ {
+ int base_CL;
+ ddr_print("CAS Latencies supported in DIMM :");
+ base_CL = (ddr_type == DDR4_DRAM) ? 7 : 4;
+ for (i=0; i<32; ++i) {
+ if ((spd_cas_latency >> i) & 1) {
+ ddr_print(" %d", i+base_CL);
+ max_cas_latency = i+base_CL;
+ if (min_cas_latency == 0)
+ min_cas_latency = i+base_CL;
+ }
+ }
+ ddr_print("\n");
+
+ /* Use relaxed timing when running slower than the minimum
+ supported speed. Adjust timing to match the smallest supported
+ CAS Latency. */
+ if (CL < min_cas_latency) {
+ uint64_t adjusted_tclk = tAAmin / min_cas_latency;
+ CL = min_cas_latency;
+ ddr_print("Slow clock speed. Adjusting timing: tClk = %lu, Adjusted tClk = %ld\n",
+ tclk_psecs, adjusted_tclk);
+ tclk_psecs = adjusted_tclk;
+ }
+
+ if ((s = lookup_env_parameter("ddr_cas_latency")) != NULL) {
+ override_cas_latency = strtoul(s, NULL, 0);
+ }
+
+ /* Make sure that the selected cas latency is legal */
+ for (i=(CL-base_CL); i<32; ++i) {
+ if ((spd_cas_latency >> i) & 1) {
+ CL = i+base_CL;
+ break;
+ }
+ }
+ }
+
+ if (CL > max_cas_latency)
+ CL = max_cas_latency;
+
+ if (override_cas_latency != 0) {
+ CL = override_cas_latency;
+ }
+
+ ddr_print("CAS Latency : %6d\n", CL);
+
+ if ((CL * tCKmin) > 20000)
+ {
+ ddr_print("(CLactual * tCKmin) = %d exceeds 20 ns\n", (CL * tCKmin));
+ }
+
+ if ((num_banks != 4) && (num_banks != 8) && (num_banks != 16))
+ {
+ error_print("Unsupported number of banks %d. Must be 4 or 8 or 16.\n", num_banks);
+ ++fatal_error;
+ }
+
+ if ((num_ranks != 1) && (num_ranks != 2) && (num_ranks != 4))
+ {
+ error_print("Unsupported number of ranks: %d\n", num_ranks);
+ ++fatal_error;
+ }
+
+ if (! CAVIUM_IS_MODEL(CAVIUM_CN81XX)) { // 88XX or 83XX, but not 81XX
+ if ((dram_width != 8) && (dram_width != 16) && (dram_width != 4)) {
+ error_print("Unsupported SDRAM Width, x%d. Must be x4, x8 or x16.\n", dram_width);
+ ++fatal_error;
+ }
+ } else if ((dram_width != 8) && (dram_width != 16)) { // 81XX can only do x8 or x16
+ error_print("Unsupported SDRAM Width, x%d. Must be x8 or x16.\n", dram_width);
+ ++fatal_error;
+ }
+
+
+ /*
+ ** Bail out here if things are not copasetic.
+ */
+ if (fatal_error)
+ return(-1);
+
+ /*
+ * 6.9.6 LMC RESET Initialization
+ *
+ * The purpose of this step is to assert/deassert the RESET# pin at the
+ * DDR3/DDR4 parts.
+ *
+ * This LMC RESET step is done for all enabled LMCs.
+ */
+ perform_lmc_reset(node, ddr_interface_num);
+
+ // Make sure scrambling is disabled during init...
+ {
+ bdk_lmcx_control_t lmc_control;
+
+ lmc_control.u = BDK_CSR_READ(node, BDK_LMCX_CONTROL(ddr_interface_num));
+ lmc_control.s.scramble_ena = 0;
+ DRAM_CSR_WRITE(node, BDK_LMCX_CONTROL(ddr_interface_num), lmc_control.u);
+
+ DRAM_CSR_WRITE(node, BDK_LMCX_SCRAMBLE_CFG0(ddr_interface_num), 0);
+ DRAM_CSR_WRITE(node, BDK_LMCX_SCRAMBLE_CFG1(ddr_interface_num), 0);
+ DRAM_CSR_WRITE(node, BDK_LMCX_SCRAMBLE_CFG2(ddr_interface_num), 0);
+ }
+
+
+ odt_idx = dimm_count - 1;
+
+ switch (num_ranks) {
+ case 1:
+ odt_config = odt_1rank_config;
+ break;
+ case 2:
+ odt_config = odt_2rank_config;
+ break;
+ case 4:
+ odt_config = odt_4rank_config;
+ break;
+ default:
+ odt_config = disable_odt_config;
+ error_print("Unsupported number of ranks: %d\n", num_ranks);
+ ++fatal_error;
+ }
+
+
+ /* Parameters from DDR3 Specifications */
+#define DDR3_tREFI 7800000 /* 7.8 us */
+#define DDR3_ZQCS 80000ull /* 80 ns */
+#define DDR3_ZQCS_Interval 1280000000 /* 128ms/100 */
+#define DDR3_tCKE 5000 /* 5 ns */
+#define DDR3_tMRD 4 /* 4 nCK */
+#define DDR3_tDLLK 512 /* 512 nCK */
+#define DDR3_tMPRR 1 /* 1 nCK */
+#define DDR3_tWLMRD 40 /* 40 nCK */
+#define DDR3_tWLDQSEN 25 /* 25 nCK */
+
+ /* Parameters from DDR4 Specifications */
+#define DDR4_tMRD 8 /* 8 nCK */
+#define DDR4_tDLLK 768 /* 768 nCK */
+
+ /*
+ * 6.9.7 Early LMC Initialization
+ *
+ * All of DDR PLL, LMC CK, and LMC DRESET initializations must be
+ * completed prior to starting this LMC initialization sequence.
+ *
+ * Perform the following five substeps for early LMC initialization:
+ *
+ * 1. Software must ensure there are no pending DRAM transactions.
+ *
+ * 2. Write LMC(0)_CONFIG, LMC(0)_CONTROL, LMC(0)_TIMING_PARAMS0,
+ * LMC(0)_TIMING_PARAMS1, LMC(0)_MODEREG_PARAMS0,
+ * LMC(0)_MODEREG_PARAMS1, LMC(0)_DUAL_MEMCFG, LMC(0)_NXM,
+ * LMC(0)_WODT_MASK, LMC(0)_RODT_MASK, LMC(0)_COMP_CTL2,
+ * LMC(0)_PHY_CTL, LMC(0)_DIMM0/1_PARAMS, and LMC(0)_DIMM_CTL with
+ * appropriate values. All sections in this chapter can be used to
+ * derive proper register settings.
+ */
+
+ /* LMC(0)_CONFIG */
+ {
+ lmc_config.u = 0;
+
+ lmc_config.s.ecc_ena = use_ecc;
+ lmc_config.s.row_lsb = encode_row_lsb_ddr3(row_lsb, ddr_interface_64b);
+ lmc_config.s.pbank_lsb = encode_pbank_lsb_ddr3(pbank_lsb, ddr_interface_64b);
+
+ lmc_config.s.idlepower = 0; /* Disabled */
+
+ if ((s = lookup_env_parameter("ddr_idlepower")) != NULL) {
+ lmc_config.s.idlepower = strtoul(s, NULL, 0);
+ }
+
+ lmc_config.s.forcewrite = 0; /* Disabled */
+ lmc_config.s.ecc_adr = 1; /* Include memory reference address in the ECC */
+
+ if ((s = lookup_env_parameter("ddr_ecc_adr")) != NULL) {
+ lmc_config.s.ecc_adr = strtoul(s, NULL, 0);
+ }
+
+ lmc_config.s.reset = 0;
+
+ /*
+ * Program LMC0_CONFIG[24:18], ref_zqcs_int(6:0) to
+ * RND-DN(tREFI/clkPeriod/512) Program LMC0_CONFIG[36:25],
+ * ref_zqcs_int(18:7) to
+ * RND-DN(ZQCS_Interval/clkPeriod/(512*128)). Note that this
+ * value should always be greater than 32, to account for
+ * resistor calibration delays.
+ */
+
+ lmc_config.s.ref_zqcs_int = ((DDR3_tREFI/tclk_psecs/512) & 0x7f);
+ lmc_config.s.ref_zqcs_int |= ((max(33ull, (DDR3_ZQCS_Interval/(tclk_psecs/100)/(512*128))) & 0xfff) << 7);
+
+
+ lmc_config.s.early_dqx = 1; /* Default to enabled */
+
+ if ((s = lookup_env_parameter("ddr_early_dqx")) == NULL)
+ s = lookup_env_parameter("ddr%d_early_dqx", ddr_interface_num);
+ if (s != NULL) {
+ lmc_config.s.early_dqx = strtoul(s, NULL, 0);
+ }
+
+ lmc_config.s.sref_with_dll = 0;
+
+ lmc_config.s.rank_ena = bunk_enable;
+ lmc_config.s.rankmask = rank_mask; /* Set later */
+ lmc_config.s.mirrmask = (spd_addr_mirror << 1 | spd_addr_mirror << 3) & rank_mask;
+ lmc_config.s.init_status = rank_mask; /* Set once and don't change it. */
+ lmc_config.s.early_unload_d0_r0 = 0;
+ lmc_config.s.early_unload_d0_r1 = 0;
+ lmc_config.s.early_unload_d1_r0 = 0;
+ lmc_config.s.early_unload_d1_r1 = 0;
+ lmc_config.s.scrz = 0;
+ // set 32-bit mode for real only when selected AND 81xx...
+ if (!ddr_interface_64b && CAVIUM_IS_MODEL(CAVIUM_CN81XX)) {
+ lmc_config.s.mode32b = 1;
+ }
+ VB_PRT(VBL_DEV, "%-45s : %d\n", "MODE32B (init)", lmc_config.s.mode32b);
+ lmc_config.s.mode_x4dev = (dram_width == 4) ? 1 : 0;
+ lmc_config.s.bg2_enable = ((ddr_type == DDR4_DRAM) && (dram_width == 16)) ? 0 : 1;
+
+ if ((s = lookup_env_parameter_ull("ddr_config")) != NULL) {
+ lmc_config.u = strtoull(s, NULL, 0);
+ }
+ ddr_print("LMC_CONFIG : 0x%016lx\n", lmc_config.u);
+ DRAM_CSR_WRITE(node, BDK_LMCX_CONFIG(ddr_interface_num), lmc_config.u);
+ }
+
+ /* LMC(0)_CONTROL */
+ {
+ bdk_lmcx_control_t lmc_control;
+ lmc_control.u = BDK_CSR_READ(node, BDK_LMCX_CONTROL(ddr_interface_num));
+ lmc_control.s.rdimm_ena = spd_rdimm;
+ lmc_control.s.bwcnt = 0; /* Clear counter later */
+ if (spd_rdimm)
+ lmc_control.s.ddr2t = (safe_ddr_flag ? 1 : custom_lmc_config->ddr2t_rdimm );
+ else
+ lmc_control.s.ddr2t = (safe_ddr_flag ? 1 : custom_lmc_config->ddr2t_udimm );
+ lmc_control.s.pocas = 0;
+ lmc_control.s.fprch2 = (safe_ddr_flag ? 2 : custom_lmc_config->fprch2 );
+ lmc_control.s.throttle_rd = safe_ddr_flag ? 1 : 0;
+ lmc_control.s.throttle_wr = safe_ddr_flag ? 1 : 0;
+ lmc_control.s.inorder_rd = safe_ddr_flag ? 1 : 0;
+ lmc_control.s.inorder_wr = safe_ddr_flag ? 1 : 0;
+ lmc_control.cn81xx.elev_prio_dis = safe_ddr_flag ? 1 : 0;
+ lmc_control.s.nxm_write_en = 0; /* discards writes to
+ addresses that don't exist
+ in the DRAM */
+ lmc_control.s.max_write_batch = 8;
+ lmc_control.s.xor_bank = 1;
+ lmc_control.s.auto_dclkdis = 1;
+ lmc_control.s.int_zqcs_dis = 0;
+ lmc_control.s.ext_zqcs_dis = 0;
+ lmc_control.s.bprch = 1;
+ lmc_control.s.wodt_bprch = 1;
+ lmc_control.s.rodt_bprch = 1;
+
+ if ((s = lookup_env_parameter("ddr_xor_bank")) != NULL) {
+ lmc_control.s.xor_bank = strtoul(s, NULL, 0);
+ }
+
+ if ((s = lookup_env_parameter("ddr_2t")) != NULL) {
+ lmc_control.s.ddr2t = strtoul(s, NULL, 0);
+ }
+
+ if ((s = lookup_env_parameter("ddr_fprch2")) != NULL) {
+ lmc_control.s.fprch2 = strtoul(s, NULL, 0);
+ }
+
+ if ((s = lookup_env_parameter("ddr_bprch")) != NULL) {
+ lmc_control.s.bprch = strtoul(s, NULL, 0);
+ }
+
+ if ((s = lookup_env_parameter("ddr_wodt_bprch")) != NULL) {
+ lmc_control.s.wodt_bprch = strtoul(s, NULL, 0);
+ }
+
+ if ((s = lookup_env_parameter("ddr_rodt_bprch")) != NULL) {
+ lmc_control.s.rodt_bprch = strtoul(s, NULL, 0);
+ }
+
+ if ((s = lookup_env_parameter("ddr_int_zqcs_dis")) != NULL) {
+ lmc_control.s.int_zqcs_dis = strtoul(s, NULL, 0);
+ }
+
+ if ((s = lookup_env_parameter("ddr_ext_zqcs_dis")) != NULL) {
+ lmc_control.s.ext_zqcs_dis = strtoul(s, NULL, 0);
+ }
+
+ if ((s = lookup_env_parameter_ull("ddr_control")) != NULL) {
+ lmc_control.u = strtoull(s, NULL, 0);
+ }
+ ddr_print("LMC_CONTROL : 0x%016lx\n", lmc_control.u);
+ DRAM_CSR_WRITE(node, BDK_LMCX_CONTROL(ddr_interface_num), lmc_control.u);
+ }
+
+ /* LMC(0)_TIMING_PARAMS0 */
+ {
+ unsigned trp_value;
+ bdk_lmcx_timing_params0_t lmc_timing_params0;
+ lmc_timing_params0.u = BDK_CSR_READ(node, BDK_LMCX_TIMING_PARAMS0(ddr_interface_num));
+
+ trp_value = divide_roundup(trp, tclk_psecs) - 1;
+ ddr_print("TIMING_PARAMS0[TRP]: NEW 0x%x, OLD 0x%x\n", trp_value,
+ trp_value + (unsigned)(divide_roundup(max(4*tclk_psecs, 7500ull), tclk_psecs)) - 4);
+#if 1
+ if ((s = lookup_env_parameter_ull("ddr_use_old_trp")) != NULL) {
+ if (!!strtoull(s, NULL, 0)) {
+ trp_value += divide_roundup(max(4*tclk_psecs, 7500ull), tclk_psecs) - 4;
+ ddr_print("TIMING_PARAMS0[trp]: USING OLD 0x%x\n", trp_value);
+ }
+ }
+#endif
+
+ lmc_timing_params0.s.txpr = divide_roundup(max(5*tclk_psecs, trfc+10000ull), 16*tclk_psecs);
+ lmc_timing_params0.s.tzqinit = divide_roundup(max(512*tclk_psecs, 640000ull), (256*tclk_psecs));
+ lmc_timing_params0.s.trp = trp_value & 0x1f;
+ lmc_timing_params0.s.tcksre = divide_roundup(max(5*tclk_psecs, 10000ull), tclk_psecs) - 1;
+
+ if (ddr_type == DDR4_DRAM) {
+ lmc_timing_params0.s.tzqcs = divide_roundup(128*tclk_psecs, (16*tclk_psecs)); /* Always 8. */
+ lmc_timing_params0.s.tcke = divide_roundup(max(3*tclk_psecs, (uint64_t) DDR3_tCKE), tclk_psecs) - 1;
+ lmc_timing_params0.s.tmrd = divide_roundup((DDR4_tMRD*tclk_psecs), tclk_psecs) - 1;
+ //lmc_timing_params0.s.tmod = divide_roundup(max(24*tclk_psecs, 15000ull), tclk_psecs) - 1;
+ lmc_timing_params0.s.tmod = 25; /* 25 is the max allowed */
+ lmc_timing_params0.s.tdllk = divide_roundup(DDR4_tDLLK, 256);
+ } else {
+ lmc_timing_params0.s.tzqcs = divide_roundup(max(64*tclk_psecs, DDR3_ZQCS), (16*tclk_psecs));
+ lmc_timing_params0.s.tcke = divide_roundup(DDR3_tCKE, tclk_psecs) - 1;
+ lmc_timing_params0.s.tmrd = divide_roundup((DDR3_tMRD*tclk_psecs), tclk_psecs) - 1;
+ lmc_timing_params0.s.tmod = divide_roundup(max(12*tclk_psecs, 15000ull), tclk_psecs) - 1;
+ lmc_timing_params0.s.tdllk = divide_roundup(DDR3_tDLLK, 256);
+ }
+
+ if ((s = lookup_env_parameter_ull("ddr_timing_params0")) != NULL) {
+ lmc_timing_params0.u = strtoull(s, NULL, 0);
+ }
+ ddr_print("TIMING_PARAMS0 : 0x%016lx\n", lmc_timing_params0.u);
+ DRAM_CSR_WRITE(node, BDK_LMCX_TIMING_PARAMS0(ddr_interface_num), lmc_timing_params0.u);
+ }
+
+ /* LMC(0)_TIMING_PARAMS1 */
+ {
+ int txp, temp_trcd, trfc_dlr;
+ bdk_lmcx_timing_params1_t lmc_timing_params1;
+ lmc_timing_params1.u = BDK_CSR_READ(node, BDK_LMCX_TIMING_PARAMS1(ddr_interface_num));
+
+ lmc_timing_params1.s.tmprr = divide_roundup(DDR3_tMPRR*tclk_psecs, tclk_psecs) - 1;
+
+ lmc_timing_params1.s.tras = divide_roundup(tras, tclk_psecs) - 1;
+
+ // NOTE: this is reworked for pass 2.x
+ temp_trcd = divide_roundup(trcd, tclk_psecs);
+#if 1
+ if (temp_trcd > 15)
+ ddr_print("TIMING_PARAMS1[trcd]: need extension bit for 0x%x\n", temp_trcd);
+#endif
+ if (CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X) && (temp_trcd > 15)) {
+ /* Let .trcd=0 serve as a flag that the field has
+ overflowed. Must use Additive Latency mode as a
+ workaround. */
+ temp_trcd = 0;
+ }
+ lmc_timing_params1.s.trcd = temp_trcd & 0x0f;
+ lmc_timing_params1.s.trcd_ext = (temp_trcd >> 4) & 1;
+
+ lmc_timing_params1.s.twtr = divide_roundup(twtr, tclk_psecs) - 1;
+ lmc_timing_params1.s.trfc = divide_roundup(trfc, 8*tclk_psecs);
+
+ // workaround needed for all THUNDER chips thru T88 Pass 2.0,
+ // but not 81xx and 83xx...
+ if ((ddr_type == DDR4_DRAM) && CAVIUM_IS_MODEL(CAVIUM_CN88XX)) {
+ /* Workaround bug 24006. Use Trrd_l. */
+ lmc_timing_params1.s.trrd = divide_roundup(ddr4_tRRD_Lmin, tclk_psecs) - 2;
+ } else
+ lmc_timing_params1.s.trrd = divide_roundup(trrd, tclk_psecs) - 2;
+
+ /*
+ ** tXP = max( 3nCK, 7.5 ns) DDR3-800 tCLK = 2500 psec
+ ** tXP = max( 3nCK, 7.5 ns) DDR3-1066 tCLK = 1875 psec
+ ** tXP = max( 3nCK, 6.0 ns) DDR3-1333 tCLK = 1500 psec
+ ** tXP = max( 3nCK, 6.0 ns) DDR3-1600 tCLK = 1250 psec
+ ** tXP = max( 3nCK, 6.0 ns) DDR3-1866 tCLK = 1071 psec
+ ** tXP = max( 3nCK, 6.0 ns) DDR3-2133 tCLK = 937 psec
+ */
+ txp = (tclk_psecs < 1875) ? 6000 : 7500;
+ // NOTE: this is reworked for pass 2.x
+ int temp_txp = divide_roundup(max(3*tclk_psecs, (unsigned)txp), tclk_psecs) - 1;
+#if 1
+ if (temp_txp > 7)
+ ddr_print("TIMING_PARAMS1[txp]: need extension bit for 0x%x\n", temp_txp);
+#endif
+ if (CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X) && (temp_txp > 7)) {
+ temp_txp = 7; // max it out
+ }
+ lmc_timing_params1.s.txp = temp_txp & 7;
+ lmc_timing_params1.s.txp_ext = (temp_txp >> 3) & 1;
+
+ lmc_timing_params1.s.twlmrd = divide_roundup(DDR3_tWLMRD*tclk_psecs, 4*tclk_psecs);
+ lmc_timing_params1.s.twldqsen = divide_roundup(DDR3_tWLDQSEN*tclk_psecs, 4*tclk_psecs);
+ lmc_timing_params1.s.tfaw = divide_roundup(tfaw, 4*tclk_psecs);
+ lmc_timing_params1.s.txpdll = divide_roundup(max(10*tclk_psecs, 24000ull), tclk_psecs) - 1;
+
+ if ((ddr_type == DDR4_DRAM) && is_3ds_dimm) {
+ /*
+ 4 Gb: tRFC_DLR = 90 ns
+ 8 Gb: tRFC_DLR = 120 ns
+ 16 Gb: tRFC_DLR = 190 ns FIXME?
+ */
+ // RNDUP[tRFC_DLR(ns) / (8 * TCYC(ns))]
+ if (die_capacity == 0x1000) // 4 Gbit
+ trfc_dlr = 90;
+ else if (die_capacity == 0x2000) // 8 Gbit
+ trfc_dlr = 120;
+ else if (die_capacity == 0x4000) // 16 Gbit
+ trfc_dlr = 190;
+ else
+ trfc_dlr = 0;
+
+ if (trfc_dlr == 0) {
+ ddr_print("N%d.LMC%d: ERROR: tRFC_DLR: die_capacity %u Mbit is illegal\n",
+ node, ddr_interface_num, die_capacity);
+ } else {
+ lmc_timing_params1.s.trfc_dlr = divide_roundup(trfc_dlr * 1000UL, 8*tclk_psecs);
+ ddr_print("N%d.LMC%d: TIMING_PARAMS1[trfc_dlr] set to %u\n",
+ node, ddr_interface_num, lmc_timing_params1.s.trfc_dlr);
+ }
+ }
+
+ if ((s = lookup_env_parameter_ull("ddr_timing_params1")) != NULL) {
+ lmc_timing_params1.u = strtoull(s, NULL, 0);
+ }
+ ddr_print("TIMING_PARAMS1 : 0x%016lx\n", lmc_timing_params1.u);
+ DRAM_CSR_WRITE(node, BDK_LMCX_TIMING_PARAMS1(ddr_interface_num), lmc_timing_params1.u);
+ }
+
+ /* LMC(0)_TIMING_PARAMS2 */
+ if (ddr_type == DDR4_DRAM) {
+ bdk_lmcx_timing_params1_t lmc_timing_params1;
+ bdk_lmcx_timing_params2_t lmc_timing_params2;
+ lmc_timing_params1.u = BDK_CSR_READ(node, BDK_LMCX_TIMING_PARAMS1(ddr_interface_num));
+ lmc_timing_params2.u = BDK_CSR_READ(node, BDK_LMCX_TIMING_PARAMS2(ddr_interface_num));
+ ddr_print("TIMING_PARAMS2 : 0x%016lx\n", lmc_timing_params2.u);
+
+ //lmc_timing_params2.s.trrd_l = divide_roundup(ddr4_tRRD_Lmin, tclk_psecs) - 1;
+ // NOTE: this is reworked for pass 2.x
+ int temp_trrd_l = divide_roundup(ddr4_tRRD_Lmin, tclk_psecs) - 2;
+#if 1
+ if (temp_trrd_l > 7)
+ ddr_print("TIMING_PARAMS2[trrd_l]: need extension bit for 0x%x\n", temp_trrd_l);
+#endif
+ if (CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X) && (temp_trrd_l > 7)) {
+ temp_trrd_l = 7; // max it out
+ }
+ lmc_timing_params2.s.trrd_l = temp_trrd_l & 7;
+ lmc_timing_params2.s.trrd_l_ext = (temp_trrd_l >> 3) & 1;
+
+ lmc_timing_params2.s.twtr_l = divide_nint(max(4*tclk_psecs, 7500ull), tclk_psecs) - 1; // correct for 1600-2400
+ lmc_timing_params2.s.t_rw_op_max = 7;
+ lmc_timing_params2.s.trtp = divide_roundup(max(4*tclk_psecs, 7500ull), tclk_psecs) - 1;
+
+ ddr_print("TIMING_PARAMS2 : 0x%016lx\n", lmc_timing_params2.u);
+ DRAM_CSR_WRITE(node, BDK_LMCX_TIMING_PARAMS2(ddr_interface_num), lmc_timing_params2.u);
+
+ /* Workaround Errata 25823 - LMC: Possible DDR4 tWTR_L not met
+ for Write-to-Read operations to the same Bank Group */
+ if (lmc_timing_params1.s.twtr < (lmc_timing_params2.s.twtr_l - 4)) {
+ lmc_timing_params1.s.twtr = lmc_timing_params2.s.twtr_l - 4;
+ ddr_print("ERRATA 25823: NEW: TWTR: %d, TWTR_L: %d\n", lmc_timing_params1.s.twtr, lmc_timing_params2.s.twtr_l);
+ ddr_print("TIMING_PARAMS1 : 0x%016lx\n", lmc_timing_params1.u);
+ DRAM_CSR_WRITE(node, BDK_LMCX_TIMING_PARAMS1(ddr_interface_num), lmc_timing_params1.u);
+ }
+ }
+
+ /* LMC(0)_MODEREG_PARAMS0 */
+ {
+ bdk_lmcx_modereg_params0_t lmc_modereg_params0;
+ int param;
+
+ lmc_modereg_params0.u = BDK_CSR_READ(node, BDK_LMCX_MODEREG_PARAMS0(ddr_interface_num));
+
+ if (ddr_type == DDR4_DRAM) {
+ lmc_modereg_params0.s.cwl = 0; /* 1600 (1250ps) */
+ if (tclk_psecs < 1250)
+ lmc_modereg_params0.s.cwl = 1; /* 1866 (1072ps) */
+ if (tclk_psecs < 1072)
+ lmc_modereg_params0.s.cwl = 2; /* 2133 (938ps) */
+ if (tclk_psecs < 938)
+ lmc_modereg_params0.s.cwl = 3; /* 2400 (833ps) */
+ if (tclk_psecs < 833)
+ lmc_modereg_params0.s.cwl = 4; /* 2666 (750ps) */
+ if (tclk_psecs < 750)
+ lmc_modereg_params0.s.cwl = 5; /* 3200 (625ps) */
+ } else {
+ /*
+ ** CSR CWL CAS write Latency
+ ** === === =================================
+ ** 0 5 ( tCK(avg) >= 2.5 ns)
+ ** 1 6 (2.5 ns > tCK(avg) >= 1.875 ns)
+ ** 2 7 (1.875 ns > tCK(avg) >= 1.5 ns)
+ ** 3 8 (1.5 ns > tCK(avg) >= 1.25 ns)
+ ** 4 9 (1.25 ns > tCK(avg) >= 1.07 ns)
+ ** 5 10 (1.07 ns > tCK(avg) >= 0.935 ns)
+ ** 6 11 (0.935 ns > tCK(avg) >= 0.833 ns)
+ ** 7 12 (0.833 ns > tCK(avg) >= 0.75 ns)
+ */
+
+ lmc_modereg_params0.s.cwl = 0;
+ if (tclk_psecs < 2500)
+ lmc_modereg_params0.s.cwl = 1;
+ if (tclk_psecs < 1875)
+ lmc_modereg_params0.s.cwl = 2;
+ if (tclk_psecs < 1500)
+ lmc_modereg_params0.s.cwl = 3;
+ if (tclk_psecs < 1250)
+ lmc_modereg_params0.s.cwl = 4;
+ if (tclk_psecs < 1070)
+ lmc_modereg_params0.s.cwl = 5;
+ if (tclk_psecs < 935)
+ lmc_modereg_params0.s.cwl = 6;
+ if (tclk_psecs < 833)
+ lmc_modereg_params0.s.cwl = 7;
+ }
+
+ if ((s = lookup_env_parameter("ddr_cwl")) != NULL) {
+ lmc_modereg_params0.s.cwl = strtoul(s, NULL, 0) - 5;
+ }
+
+ if (ddr_type == DDR4_DRAM) {
+ ddr_print("%-45s : %d, [0x%x]\n", "CAS Write Latency CWL, [CSR]",
+ lmc_modereg_params0.s.cwl + 9
+ + ((lmc_modereg_params0.s.cwl>2) ? (lmc_modereg_params0.s.cwl-3) * 2 : 0),
+ lmc_modereg_params0.s.cwl);
+ } else {
+ ddr_print("%-45s : %d, [0x%x]\n", "CAS Write Latency CWL, [CSR]",
+ lmc_modereg_params0.s.cwl + 5,
+ lmc_modereg_params0.s.cwl);
+ }
+
+ lmc_modereg_params0.s.mprloc = 0;
+ lmc_modereg_params0.s.mpr = 0;
+ lmc_modereg_params0.s.dll = (ddr_type == DDR4_DRAM)?1:0; /* disable(0) for DDR3 and enable(1) for DDR4 */
+ lmc_modereg_params0.s.al = 0;
+ lmc_modereg_params0.s.wlev = 0; /* Read Only */
+ lmc_modereg_params0.s.tdqs = ((ddr_type == DDR4_DRAM) || (dram_width != 8))?0:1; /* disable(0) for DDR4 and x4/x16 DDR3 */
+ lmc_modereg_params0.s.qoff = 0;
+ //lmc_modereg_params0.s.bl = 0; /* Don't touch block dirty logic */
+
+ if ((s = lookup_env_parameter("ddr_cl")) != NULL) {
+ CL = strtoul(s, NULL, 0);
+ ddr_print("CAS Latency : %6d\n", CL);
+ }
+
+ if (ddr_type == DDR4_DRAM) {
+ lmc_modereg_params0.s.cl = 0x0;
+ if (CL > 9)
+ lmc_modereg_params0.s.cl = 0x1;
+ if (CL > 10)
+ lmc_modereg_params0.s.cl = 0x2;
+ if (CL > 11)
+ lmc_modereg_params0.s.cl = 0x3;
+ if (CL > 12)
+ lmc_modereg_params0.s.cl = 0x4;
+ if (CL > 13)
+ lmc_modereg_params0.s.cl = 0x5;
+ if (CL > 14)
+ lmc_modereg_params0.s.cl = 0x6;
+ if (CL > 15)
+ lmc_modereg_params0.s.cl = 0x7;
+ if (CL > 16)
+ lmc_modereg_params0.s.cl = 0x8;
+ if (CL > 18)
+ lmc_modereg_params0.s.cl = 0x9;
+ if (CL > 20)
+ lmc_modereg_params0.s.cl = 0xA;
+ if (CL > 24)
+ lmc_modereg_params0.s.cl = 0xB;
+ } else {
+ lmc_modereg_params0.s.cl = 0x2;
+ if (CL > 5)
+ lmc_modereg_params0.s.cl = 0x4;
+ if (CL > 6)
+ lmc_modereg_params0.s.cl = 0x6;
+ if (CL > 7)
+ lmc_modereg_params0.s.cl = 0x8;
+ if (CL > 8)
+ lmc_modereg_params0.s.cl = 0xA;
+ if (CL > 9)
+ lmc_modereg_params0.s.cl = 0xC;
+ if (CL > 10)
+ lmc_modereg_params0.s.cl = 0xE;
+ if (CL > 11)
+ lmc_modereg_params0.s.cl = 0x1;
+ if (CL > 12)
+ lmc_modereg_params0.s.cl = 0x3;
+ if (CL > 13)
+ lmc_modereg_params0.s.cl = 0x5;
+ if (CL > 14)
+ lmc_modereg_params0.s.cl = 0x7;
+ if (CL > 15)
+ lmc_modereg_params0.s.cl = 0x9;
+ }
+
+ lmc_modereg_params0.s.rbt = 0; /* Read Only. */
+ lmc_modereg_params0.s.tm = 0;
+ lmc_modereg_params0.s.dllr = 0;
+
+ param = divide_roundup(twr, tclk_psecs);
+
+ if (ddr_type == DDR4_DRAM) { /* DDR4 */
+ lmc_modereg_params0.s.wrp = 1;
+ if (param > 12)
+ lmc_modereg_params0.s.wrp = 2;
+ if (param > 14)
+ lmc_modereg_params0.s.wrp = 3;
+ if (param > 16)
+ lmc_modereg_params0.s.wrp = 4;
+ if (param > 18)
+ lmc_modereg_params0.s.wrp = 5;
+ if (param > 20)
+ lmc_modereg_params0.s.wrp = 6;
+ if (param > 24) /* RESERVED in DDR4 spec */
+ lmc_modereg_params0.s.wrp = 7;
+ } else { /* DDR3 */
+ lmc_modereg_params0.s.wrp = 1;
+ if (param > 5)
+ lmc_modereg_params0.s.wrp = 2;
+ if (param > 6)
+ lmc_modereg_params0.s.wrp = 3;
+ if (param > 7)
+ lmc_modereg_params0.s.wrp = 4;
+ if (param > 8)
+ lmc_modereg_params0.s.wrp = 5;
+ if (param > 10)
+ lmc_modereg_params0.s.wrp = 6;
+ if (param > 12)
+ lmc_modereg_params0.s.wrp = 7;
+ }
+
+ lmc_modereg_params0.s.ppd = 0;
+
+ if ((s = lookup_env_parameter("ddr_wrp")) != NULL) {
+ lmc_modereg_params0.s.wrp = strtoul(s, NULL, 0);
+ }
+
+ ddr_print("%-45s : %d, [0x%x]\n", "Write recovery for auto precharge WRP, [CSR]",
+ param, lmc_modereg_params0.s.wrp);
+
+ if ((s = lookup_env_parameter_ull("ddr_modereg_params0")) != NULL) {
+ lmc_modereg_params0.u = strtoull(s, NULL, 0);
+ }
+ ddr_print("MODEREG_PARAMS0 : 0x%016lx\n", lmc_modereg_params0.u);
+ DRAM_CSR_WRITE(node, BDK_LMCX_MODEREG_PARAMS0(ddr_interface_num), lmc_modereg_params0.u);
+ }
+
+ /* LMC(0)_MODEREG_PARAMS1 */
+ {
+ bdk_lmcx_modereg_params1_t lmc_modereg_params1;
+
+ lmc_modereg_params1.u = odt_config[odt_idx].odt_mask1.u;
+
+#ifdef CAVIUM_ONLY
+ /* Special request: mismatched DIMM support. Slot 0: 2-Rank, Slot 1: 1-Rank */
+ if (rank_mask == 0x7) { /* 2-Rank, 1-Rank */
+ lmc_modereg_params1.s.rtt_nom_00 = 0;
+ lmc_modereg_params1.s.rtt_nom_01 = 3; /* rttnom_40ohm */
+ lmc_modereg_params1.s.rtt_nom_10 = 3; /* rttnom_40ohm */
+ lmc_modereg_params1.s.rtt_nom_11 = 0;
+ dyn_rtt_nom_mask = 0x6;
+ }
+#endif /* CAVIUM_ONLY */
+
+ if ((s = lookup_env_parameter("ddr_rtt_nom_mask")) != NULL) {
+ dyn_rtt_nom_mask = strtoul(s, NULL, 0);
+ }
+
+
+ /* Save the original rtt_nom settings before sweeping through settings. */
+ default_rtt_nom[0] = lmc_modereg_params1.s.rtt_nom_00;
+ default_rtt_nom[1] = lmc_modereg_params1.s.rtt_nom_01;
+ default_rtt_nom[2] = lmc_modereg_params1.s.rtt_nom_10;
+ default_rtt_nom[3] = lmc_modereg_params1.s.rtt_nom_11;
+
+ ddr_rtt_nom_auto = custom_lmc_config->ddr_rtt_nom_auto;
+
+ for (i=0; i<4; ++i) {
+ uint64_t value;
+ if ((s = lookup_env_parameter("ddr_rtt_nom_%1d%1d", !!(i&2), !!(i&1))) == NULL)
+ s = lookup_env_parameter("ddr%d_rtt_nom_%1d%1d", ddr_interface_num, !!(i&2), !!(i&1));
+ if (s != NULL) {
+ value = strtoul(s, NULL, 0);
+ lmc_modereg_params1.u &= ~((uint64_t)0x7 << (i*12+9));
+ lmc_modereg_params1.u |= ( (value & 0x7) << (i*12+9));
+ default_rtt_nom[i] = value;
+ ddr_rtt_nom_auto = 0;
+ }
+ }
+
+ if ((s = lookup_env_parameter("ddr_rtt_nom")) == NULL)
+ s = lookup_env_parameter("ddr%d_rtt_nom", ddr_interface_num);
+ if (s != NULL) {
+ uint64_t value;
+ value = strtoul(s, NULL, 0);
+
+ if (dyn_rtt_nom_mask & 1)
+ default_rtt_nom[0] = lmc_modereg_params1.s.rtt_nom_00 = value;
+ if (dyn_rtt_nom_mask & 2)
+ default_rtt_nom[1] = lmc_modereg_params1.s.rtt_nom_01 = value;
+ if (dyn_rtt_nom_mask & 4)
+ default_rtt_nom[2] = lmc_modereg_params1.s.rtt_nom_10 = value;
+ if (dyn_rtt_nom_mask & 8)
+ default_rtt_nom[3] = lmc_modereg_params1.s.rtt_nom_11 = value;
+
+ ddr_rtt_nom_auto = 0;
+ }
+
+ if ((s = lookup_env_parameter("ddr_rtt_wr")) != NULL) {
+ uint64_t value = strtoul(s, NULL, 0);
+ for (i=0; i<4; ++i) {
+ INSRT_WR(&lmc_modereg_params1.u, i, value);
+ }
+ }
+
+ for (i = 0; i < 4; ++i) {
+ uint64_t value;
+ if ((s = lookup_env_parameter("ddr_rtt_wr_%1d%1d", !!(i&2), !!(i&1))) == NULL)
+ s = lookup_env_parameter("ddr%d_rtt_wr_%1d%1d", ddr_interface_num, !!(i&2), !!(i&1));
+ if (s != NULL) {
+ value = strtoul(s, NULL, 0);
+ INSRT_WR(&lmc_modereg_params1.u, i, value);
+ }
+ }
+
+ // Make sure pass 1 has valid RTT_WR settings, because
+ // configuration files may be set-up for pass 2, and
+ // pass 1 supports no RTT_WR extension bits
+ if (CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X)) {
+ for (i = 0; i < 4; ++i) {
+ if (EXTR_WR(lmc_modereg_params1.u, i) > 3) { // if 80 or undefined
+ INSRT_WR(&lmc_modereg_params1.u, i, 1); // FIXME? always insert 120
+ ddr_print("RTT_WR_%d%d set to 120 for CN88XX pass 1\n", !!(i&2), i&1);
+ }
+ }
+ }
+ if ((s = lookup_env_parameter("ddr_dic")) != NULL) {
+ uint64_t value = strtoul(s, NULL, 0);
+ for (i=0; i<4; ++i) {
+ lmc_modereg_params1.u &= ~((uint64_t)0x3 << (i*12+7));
+ lmc_modereg_params1.u |= ( (value & 0x3) << (i*12+7));
+ }
+ }
+
+ for (i=0; i<4; ++i) {
+ uint64_t value;
+ if ((s = lookup_env_parameter("ddr_dic_%1d%1d", !!(i&2), !!(i&1))) != NULL) {
+ value = strtoul(s, NULL, 0);
+ lmc_modereg_params1.u &= ~((uint64_t)0x3 << (i*12+7));
+ lmc_modereg_params1.u |= ( (value & 0x3) << (i*12+7));
+ }
+ }
+
+ if ((s = lookup_env_parameter_ull("ddr_modereg_params1")) != NULL) {
+ lmc_modereg_params1.u = strtoull(s, NULL, 0);
+ }
+
+ ddr_print("RTT_NOM %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n",
+ imp_values->rtt_nom_ohms[lmc_modereg_params1.s.rtt_nom_11],
+ imp_values->rtt_nom_ohms[lmc_modereg_params1.s.rtt_nom_10],
+ imp_values->rtt_nom_ohms[lmc_modereg_params1.s.rtt_nom_01],
+ imp_values->rtt_nom_ohms[lmc_modereg_params1.s.rtt_nom_00],
+ lmc_modereg_params1.s.rtt_nom_11,
+ lmc_modereg_params1.s.rtt_nom_10,
+ lmc_modereg_params1.s.rtt_nom_01,
+ lmc_modereg_params1.s.rtt_nom_00);
+
+ ddr_print("RTT_WR %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n",
+ imp_values->rtt_wr_ohms[EXTR_WR(lmc_modereg_params1.u, 3)],
+ imp_values->rtt_wr_ohms[EXTR_WR(lmc_modereg_params1.u, 2)],
+ imp_values->rtt_wr_ohms[EXTR_WR(lmc_modereg_params1.u, 1)],
+ imp_values->rtt_wr_ohms[EXTR_WR(lmc_modereg_params1.u, 0)],
+ EXTR_WR(lmc_modereg_params1.u, 3),
+ EXTR_WR(lmc_modereg_params1.u, 2),
+ EXTR_WR(lmc_modereg_params1.u, 1),
+ EXTR_WR(lmc_modereg_params1.u, 0));
+
+ ddr_print("DIC %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n",
+ imp_values->dic_ohms[lmc_modereg_params1.s.dic_11],
+ imp_values->dic_ohms[lmc_modereg_params1.s.dic_10],
+ imp_values->dic_ohms[lmc_modereg_params1.s.dic_01],
+ imp_values->dic_ohms[lmc_modereg_params1.s.dic_00],
+ lmc_modereg_params1.s.dic_11,
+ lmc_modereg_params1.s.dic_10,
+ lmc_modereg_params1.s.dic_01,
+ lmc_modereg_params1.s.dic_00);
+
+ ddr_print("MODEREG_PARAMS1 : 0x%016lx\n", lmc_modereg_params1.u);
+ DRAM_CSR_WRITE(node, BDK_LMCX_MODEREG_PARAMS1(ddr_interface_num), lmc_modereg_params1.u);
+
+ } /* LMC(0)_MODEREG_PARAMS1 */
+
+ /* LMC(0)_MODEREG_PARAMS2 */
+ if (ddr_type == DDR4_DRAM) {
+ bdk_lmcx_modereg_params2_t lmc_modereg_params2;
+ lmc_modereg_params2.u = odt_config[odt_idx].odt_mask2.u;
+
+ for (i=0; i<4; ++i) {
+ uint64_t value;
+ if ((s = lookup_env_parameter("ddr_rtt_park_%1d%1d", !!(i&2), !!(i&1))) != NULL) {
+ value = strtoul(s, NULL, 0);
+ lmc_modereg_params2.u &= ~((uint64_t)0x7 << (i*10+0));
+ lmc_modereg_params2.u |= ( (value & 0x7) << (i*10+0));
+ }
+ }
+
+ if ((s = lookup_env_parameter("ddr_rtt_park")) != NULL) {
+ uint64_t value = strtoul(s, NULL, 0);
+ for (i=0; i<4; ++i) {
+ lmc_modereg_params2.u &= ~((uint64_t)0x7 << (i*10+0));
+ lmc_modereg_params2.u |= ( (value & 0x7) << (i*10+0));
+ }
+ }
+
+ if ((s = lookup_env_parameter_ull("ddr_modereg_params2")) != NULL) {
+ lmc_modereg_params2.u = strtoull(s, NULL, 0);
+ }
+
+ ddr_print("RTT_PARK %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n",
+ imp_values->rtt_nom_ohms[lmc_modereg_params2.s.rtt_park_11],
+ imp_values->rtt_nom_ohms[lmc_modereg_params2.s.rtt_park_10],
+ imp_values->rtt_nom_ohms[lmc_modereg_params2.s.rtt_park_01],
+ imp_values->rtt_nom_ohms[lmc_modereg_params2.s.rtt_park_00],
+ lmc_modereg_params2.s.rtt_park_11,
+ lmc_modereg_params2.s.rtt_park_10,
+ lmc_modereg_params2.s.rtt_park_01,
+ lmc_modereg_params2.s.rtt_park_00);
+
+ ddr_print("%-45s : 0x%x,0x%x,0x%x,0x%x\n", "VREF_RANGE",
+ lmc_modereg_params2.s.vref_range_11,
+ lmc_modereg_params2.s.vref_range_10,
+ lmc_modereg_params2.s.vref_range_01,
+ lmc_modereg_params2.s.vref_range_00);
+
+ ddr_print("%-45s : 0x%x,0x%x,0x%x,0x%x\n", "VREF_VALUE",
+ lmc_modereg_params2.s.vref_value_11,
+ lmc_modereg_params2.s.vref_value_10,
+ lmc_modereg_params2.s.vref_value_01,
+ lmc_modereg_params2.s.vref_value_00);
+
+ ddr_print("MODEREG_PARAMS2 : 0x%016lx\n", lmc_modereg_params2.u);
+ DRAM_CSR_WRITE(node, BDK_LMCX_MODEREG_PARAMS2(ddr_interface_num), lmc_modereg_params2.u);
+
+ } /* LMC(0)_MODEREG_PARAMS2 */
+
+ /* LMC(0)_MODEREG_PARAMS3 */
+ if (ddr_type == DDR4_DRAM) {
+ bdk_lmcx_modereg_params3_t lmc_modereg_params3;
+
+ lmc_modereg_params3.u = BDK_CSR_READ(node, BDK_LMCX_MODEREG_PARAMS3(ddr_interface_num));
+
+ //lmc_modereg_params3.s.max_pd =
+ //lmc_modereg_params3.s.tc_ref =
+ //lmc_modereg_params3.s.vref_mon =
+ //lmc_modereg_params3.s.cal =
+ //lmc_modereg_params3.s.sre_abort =
+ //lmc_modereg_params3.s.rd_preamble =
+ //lmc_modereg_params3.s.wr_preamble =
+ //lmc_modereg_params3.s.par_lat_mode =
+ //lmc_modereg_params3.s.odt_pd =
+ //lmc_modereg_params3.s.ca_par_pers =
+ //lmc_modereg_params3.s.dm =
+ //lmc_modereg_params3.s.wr_dbi =
+ //lmc_modereg_params3.s.rd_dbi =
+ lmc_modereg_params3.s.tccd_l = max(divide_roundup(ddr4_tCCD_Lmin, tclk_psecs), 5ull) - 4;
+ //lmc_modereg_params3.s.lpasr =
+ //lmc_modereg_params3.s.crc =
+ //lmc_modereg_params3.s.gd =
+ //lmc_modereg_params3.s.pda =
+ //lmc_modereg_params3.s.temp_sense =
+ //lmc_modereg_params3.s.fgrm =
+ //lmc_modereg_params3.s.wr_cmd_lat =
+ //lmc_modereg_params3.s.mpr_fmt =
+
+ if (!CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X)) {
+ int delay = 0;
+ if ((lranks_per_prank == 4) && (ddr_hertz >= 1000000000))
+ delay = 1;
+ lmc_modereg_params3.s.xrank_add_tccd_l = delay;
+ lmc_modereg_params3.s.xrank_add_tccd_s = delay;
+ }
+
+ ddr_print("MODEREG_PARAMS3 : 0x%016lx\n", lmc_modereg_params3.u);
+ DRAM_CSR_WRITE(node, BDK_LMCX_MODEREG_PARAMS3(ddr_interface_num), lmc_modereg_params3.u);
+ } /* LMC(0)_MODEREG_PARAMS3 */
+
+ /* LMC(0)_NXM */
+ {
+ bdk_lmcx_nxm_t lmc_nxm;
+ int num_bits = row_lsb + row_bits + lranks_bits - 26;
+ lmc_nxm.u = BDK_CSR_READ(node, BDK_LMCX_NXM(ddr_interface_num));
+
+ if (rank_mask & 0x1)
+ lmc_nxm.s.mem_msb_d0_r0 = num_bits;
+ if (rank_mask & 0x2)
+ lmc_nxm.s.mem_msb_d0_r1 = num_bits;
+ if (rank_mask & 0x4)
+ lmc_nxm.s.mem_msb_d1_r0 = num_bits;
+ if (rank_mask & 0x8)
+ lmc_nxm.s.mem_msb_d1_r1 = num_bits;
+
+ lmc_nxm.s.cs_mask = ~rank_mask & 0xff; /* Set the mask for non-existant ranks. */
+
+ if ((s = lookup_env_parameter_ull("ddr_nxm")) != NULL) {
+ lmc_nxm.u = strtoull(s, NULL, 0);
+ }
+ ddr_print("LMC_NXM : 0x%016lx\n", lmc_nxm.u);
+ DRAM_CSR_WRITE(node, BDK_LMCX_NXM(ddr_interface_num), lmc_nxm.u);
+ }
+
+ /* LMC(0)_WODT_MASK */
+ {
+ bdk_lmcx_wodt_mask_t lmc_wodt_mask;
+ lmc_wodt_mask.u = odt_config[odt_idx].odt_mask;
+
+ if ((s = lookup_env_parameter_ull("ddr_wodt_mask")) != NULL) {
+ lmc_wodt_mask.u = strtoull(s, NULL, 0);
+ }
+
+ ddr_print("WODT_MASK : 0x%016lx\n", lmc_wodt_mask.u);
+ DRAM_CSR_WRITE(node, BDK_LMCX_WODT_MASK(ddr_interface_num), lmc_wodt_mask.u);
+ }
+
+ /* LMC(0)_RODT_MASK */
+ {
+ int rankx;
+ bdk_lmcx_rodt_mask_t lmc_rodt_mask;
+ lmc_rodt_mask.u = odt_config[odt_idx].rodt_ctl;
+
+ if ((s = lookup_env_parameter_ull("ddr_rodt_mask")) != NULL) {
+ lmc_rodt_mask.u = strtoull(s, NULL, 0);
+ }
+
+ ddr_print("%-45s : 0x%016lx\n", "RODT_MASK", lmc_rodt_mask.u);
+ DRAM_CSR_WRITE(node, BDK_LMCX_RODT_MASK(ddr_interface_num), lmc_rodt_mask.u);
+
+ dyn_rtt_nom_mask = 0;
+ for (rankx = 0; rankx < dimm_count * 4;rankx++) {
+ if (!(rank_mask & (1 << rankx)))
+ continue;
+ dyn_rtt_nom_mask |= ((lmc_rodt_mask.u >> (8*rankx)) & 0xff);
+ }
+ if (num_ranks == 4) {
+ /* Normally ODT1 is wired to rank 1. For quad-ranked DIMMs
+ ODT1 is wired to the third rank (rank 2). The mask,
+ dyn_rtt_nom_mask, is used to indicate for which ranks
+ to sweep RTT_NOM during read-leveling. Shift the bit
+ from the ODT1 position over to the "ODT2" position so
+ that the read-leveling analysis comes out right. */
+ int odt1_bit = dyn_rtt_nom_mask & 2;
+ dyn_rtt_nom_mask &= ~2;
+ dyn_rtt_nom_mask |= odt1_bit<<1;
+ }
+ ddr_print("%-45s : 0x%02x\n", "DYN_RTT_NOM_MASK", dyn_rtt_nom_mask);
+ }
+
+ /* LMC(0)_COMP_CTL2 */
+ {
+ bdk_lmcx_comp_ctl2_t comp_ctl2;
+
+ comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num));
+
+ comp_ctl2.s.dqx_ctl = odt_config[odt_idx].odt_ena;
+ comp_ctl2.s.ck_ctl = (custom_lmc_config->ck_ctl == 0) ? 4 : custom_lmc_config->ck_ctl; /* Default 4=34.3 ohm */
+ comp_ctl2.s.cmd_ctl = (custom_lmc_config->cmd_ctl == 0) ? 4 : custom_lmc_config->cmd_ctl; /* Default 4=34.3 ohm */
+ comp_ctl2.s.control_ctl = (custom_lmc_config->ctl_ctl == 0) ? 4 : custom_lmc_config->ctl_ctl; /* Default 4=34.3 ohm */
+
+ // NOTE: these are now done earlier, in Step 6.9.3
+ // comp_ctl2.s.ntune_offset = 0;
+ // comp_ctl2.s.ptune_offset = 0;
+
+ ddr_rodt_ctl_auto = custom_lmc_config->ddr_rodt_ctl_auto;
+ if ((s = lookup_env_parameter("ddr_rodt_ctl_auto")) != NULL) {
+ ddr_rodt_ctl_auto = !!strtoul(s, NULL, 0);
+ }
+
+ default_rodt_ctl = odt_config[odt_idx].qs_dic;
+ if ((s = lookup_env_parameter("ddr_rodt_ctl")) == NULL)
+ s = lookup_env_parameter("ddr%d_rodt_ctl", ddr_interface_num);
+ if (s != NULL) {
+ default_rodt_ctl = strtoul(s, NULL, 0);
+ ddr_rodt_ctl_auto = 0;
+ }
+
+ comp_ctl2.s.rodt_ctl = default_rodt_ctl;
+
+ // if DDR4, force CK_CTL to 26 ohms if it is currently 34 ohms, and DCLK speed is 1 GHz or more...
+ if ((ddr_type == DDR4_DRAM) && (comp_ctl2.s.ck_ctl == ddr4_driver_34_ohm) && (ddr_hertz >= 1000000000)) {
+ comp_ctl2.s.ck_ctl = ddr4_driver_26_ohm; // lowest for DDR4 is 26 ohms
+ ddr_print("Forcing DDR4 COMP_CTL2[CK_CTL] to %d, %d ohms\n", comp_ctl2.s.ck_ctl,
+ imp_values->drive_strength[comp_ctl2.s.ck_ctl]);
+ }
+
+ if ((s = lookup_env_parameter("ddr_ck_ctl")) != NULL) {
+ comp_ctl2.s.ck_ctl = strtoul(s, NULL, 0);
+ }
+
+ if ((s = lookup_env_parameter("ddr_cmd_ctl")) != NULL) {
+ comp_ctl2.s.cmd_ctl = strtoul(s, NULL, 0);
+ }
+
+ if ((s = lookup_env_parameter("ddr_control_ctl")) != NULL) {
+ comp_ctl2.s.control_ctl = strtoul(s, NULL, 0);
+ }
+
+ if ((s = lookup_env_parameter("ddr_dqx_ctl")) != NULL) {
+ comp_ctl2.s.dqx_ctl = strtoul(s, NULL, 0);
+ }
+
+ ddr_print("%-45s : %d, %d ohms\n", "DQX_CTL ", comp_ctl2.s.dqx_ctl,
+ imp_values->dqx_strength [comp_ctl2.s.dqx_ctl ]);
+ ddr_print("%-45s : %d, %d ohms\n", "CK_CTL ", comp_ctl2.s.ck_ctl,
+ imp_values->drive_strength[comp_ctl2.s.ck_ctl ]);
+ ddr_print("%-45s : %d, %d ohms\n", "CMD_CTL ", comp_ctl2.s.cmd_ctl,
+ imp_values->drive_strength[comp_ctl2.s.cmd_ctl ]);
+ ddr_print("%-45s : %d, %d ohms\n", "CONTROL_CTL ", comp_ctl2.s.control_ctl,
+ imp_values->drive_strength[comp_ctl2.s.control_ctl]);
+ ddr_print("Read ODT_CTL : 0x%x (%d ohms)\n",
+ comp_ctl2.s.rodt_ctl, imp_values->rodt_ohms[comp_ctl2.s.rodt_ctl]);
+
+ DRAM_CSR_WRITE(node, BDK_LMCX_COMP_CTL2(ddr_interface_num), comp_ctl2.u);
+ }
+
+ /* LMC(0)_PHY_CTL */
+ {
+ bdk_lmcx_phy_ctl_t lmc_phy_ctl;
+ lmc_phy_ctl.u = BDK_CSR_READ(node, BDK_LMCX_PHY_CTL(ddr_interface_num));
+ lmc_phy_ctl.s.ts_stagger = 0;
+
+ if (!CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X) && (lranks_per_prank > 1)) {
+ lmc_phy_ctl.cn81xx.c0_sel = lmc_phy_ctl.cn81xx.c1_sel = 2; // C0 is TEN, C1 is A17
+ ddr_print("N%d.LMC%d: 3DS: setting PHY_CTL[cx_csel] = %d\n",
+ node, ddr_interface_num, lmc_phy_ctl.cn81xx.c1_sel);
+ }
+
+ ddr_print("PHY_CTL : 0x%016lx\n", lmc_phy_ctl.u);
+ DRAM_CSR_WRITE(node, BDK_LMCX_PHY_CTL(ddr_interface_num), lmc_phy_ctl.u);
+ }
+
+ /* LMC(0)_DIMM0/1_PARAMS */
+ if (spd_rdimm) {
+ bdk_lmcx_dimm_ctl_t lmc_dimm_ctl;
+
+ for (didx = 0; didx < (unsigned)dimm_count; ++didx) {
+ bdk_lmcx_dimmx_params_t lmc_dimmx_params;
+ int dimm = didx;
+ int rc;
+
+ lmc_dimmx_params.u = BDK_CSR_READ(node, BDK_LMCX_DIMMX_PARAMS(ddr_interface_num, dimm));
+
+
+ if (ddr_type == DDR4_DRAM) {
+
+ bdk_lmcx_dimmx_ddr4_params0_t lmc_dimmx_ddr4_params0;
+ bdk_lmcx_dimmx_ddr4_params1_t lmc_dimmx_ddr4_params1;
+ bdk_lmcx_ddr4_dimm_ctl_t lmc_ddr4_dimm_ctl;
+
+ lmc_dimmx_params.s.rc0 = 0;
+ lmc_dimmx_params.s.rc1 = 0;
+ lmc_dimmx_params.s.rc2 = 0;
+
+ rc = read_spd(node, &dimm_config_table[didx], DDR4_SPD_RDIMM_REGISTER_DRIVE_STRENGTH_CTL);
+ lmc_dimmx_params.s.rc3 = (rc >> 4) & 0xf;
+ lmc_dimmx_params.s.rc4 = ((rc >> 0) & 0x3) << 2;
+ lmc_dimmx_params.s.rc4 |= ((rc >> 2) & 0x3) << 0;
+
+ rc = read_spd(node, &dimm_config_table[didx], DDR4_SPD_RDIMM_REGISTER_DRIVE_STRENGTH_CK);
+ lmc_dimmx_params.s.rc5 = ((rc >> 0) & 0x3) << 2;
+ lmc_dimmx_params.s.rc5 |= ((rc >> 2) & 0x3) << 0;
+
+ lmc_dimmx_params.s.rc6 = 0;
+ lmc_dimmx_params.s.rc7 = 0;
+ lmc_dimmx_params.s.rc8 = 0;
+ lmc_dimmx_params.s.rc9 = 0;
+
+ /*
+ ** rc10 DDR4 RDIMM Operating Speed
+ ** ==== =========================================================
+ ** 0 tclk_psecs >= 1250 psec DDR4-1600 (1250 ps)
+ ** 1 1250 psec > tclk_psecs >= 1071 psec DDR4-1866 (1071 ps)
+ ** 2 1071 psec > tclk_psecs >= 938 psec DDR4-2133 ( 938 ps)
+ ** 3 938 psec > tclk_psecs >= 833 psec DDR4-2400 ( 833 ps)
+ ** 4 833 psec > tclk_psecs >= 750 psec DDR4-2666 ( 750 ps)
+ ** 5 750 psec > tclk_psecs >= 625 psec DDR4-3200 ( 625 ps)
+ */
+ lmc_dimmx_params.s.rc10 = 0;
+ if (1250 > tclk_psecs)
+ lmc_dimmx_params.s.rc10 = 1;
+ if (1071 > tclk_psecs)
+ lmc_dimmx_params.s.rc10 = 2;
+ if (938 > tclk_psecs)
+ lmc_dimmx_params.s.rc10 = 3;
+ if (833 > tclk_psecs)
+ lmc_dimmx_params.s.rc10 = 4;
+ if (750 > tclk_psecs)
+ lmc_dimmx_params.s.rc10 = 5;
+
+ lmc_dimmx_params.s.rc11 = 0;
+ lmc_dimmx_params.s.rc12 = 0;
+ lmc_dimmx_params.s.rc13 = (spd_dimm_type == 4) ? 0 : 4; /* 0=LRDIMM, 1=RDIMM */
+ lmc_dimmx_params.s.rc13 |= (ddr_type == DDR4_DRAM) ? (spd_addr_mirror << 3) : 0;
+ lmc_dimmx_params.s.rc14 = 0;
+ //lmc_dimmx_params.s.rc15 = 4; /* 0 nCK latency adder */
+ lmc_dimmx_params.s.rc15 = 0; /* 1 nCK latency adder */
+
+ lmc_dimmx_ddr4_params0.u = 0;
+
+ lmc_dimmx_ddr4_params0.s.rc8x = 0;
+ lmc_dimmx_ddr4_params0.s.rc7x = 0;
+ lmc_dimmx_ddr4_params0.s.rc6x = 0;
+ lmc_dimmx_ddr4_params0.s.rc5x = 0;
+ lmc_dimmx_ddr4_params0.s.rc4x = 0;
+
+ lmc_dimmx_ddr4_params0.s.rc3x = compute_rc3x(tclk_psecs);
+
+ lmc_dimmx_ddr4_params0.s.rc2x = 0;
+ lmc_dimmx_ddr4_params0.s.rc1x = 0;
+
+ lmc_dimmx_ddr4_params1.u = 0;
+
+ lmc_dimmx_ddr4_params1.s.rcbx = 0;
+ lmc_dimmx_ddr4_params1.s.rcax = 0;
+ lmc_dimmx_ddr4_params1.s.rc9x = 0;
+
+ lmc_ddr4_dimm_ctl.u = 0;
+ lmc_ddr4_dimm_ctl.s.ddr4_dimm0_wmask = 0x004;
+ lmc_ddr4_dimm_ctl.s.ddr4_dimm1_wmask = (dimm_count > 1) ? 0x004 : 0x0000;
+
+ /*
+ * Handle any overrides from envvars here...
+ */
+ if ((s = lookup_env_parameter("ddr_ddr4_params0")) != NULL) {
+ lmc_dimmx_ddr4_params0.u = strtoul(s, NULL, 0);
+ }
+
+ if ((s = lookup_env_parameter("ddr_ddr4_params1")) != NULL) {
+ lmc_dimmx_ddr4_params1.u = strtoul(s, NULL, 0);
+ }
+
+ if ((s = lookup_env_parameter("ddr_ddr4_dimm_ctl")) != NULL) {
+ lmc_ddr4_dimm_ctl.u = strtoul(s, NULL, 0);
+ }
+
+ for (i=0; i<11; ++i) {
+ uint64_t value;
+ if ((s = lookup_env_parameter("ddr_ddr4_rc%1xx", i+1)) != NULL) {
+ value = strtoul(s, NULL, 0);
+ if (i < 8) {
+ lmc_dimmx_ddr4_params0.u &= ~((uint64_t)0xff << (i*8));
+ lmc_dimmx_ddr4_params0.u |= (value << (i*8));
+ } else {
+ lmc_dimmx_ddr4_params1.u &= ~((uint64_t)0xff << ((i-8)*8));
+ lmc_dimmx_ddr4_params1.u |= (value << ((i-8)*8));
+ }
+ }
+ }
+
+ /*
+ * write the final CSR values
+ */
+ DRAM_CSR_WRITE(node, BDK_LMCX_DIMMX_DDR4_PARAMS0(ddr_interface_num, dimm), lmc_dimmx_ddr4_params0.u);
+
+ DRAM_CSR_WRITE(node, BDK_LMCX_DDR4_DIMM_CTL(ddr_interface_num), lmc_ddr4_dimm_ctl.u);
+
+ DRAM_CSR_WRITE(node, BDK_LMCX_DIMMX_DDR4_PARAMS1(ddr_interface_num, dimm), lmc_dimmx_ddr4_params1.u);
+
+ ddr_print("DIMM%d Register Control Words RCBx:RC1x : %x %x %x %x %x %x %x %x %x %x %x\n",
+ dimm,
+ lmc_dimmx_ddr4_params1.s.rcbx,
+ lmc_dimmx_ddr4_params1.s.rcax,
+ lmc_dimmx_ddr4_params1.s.rc9x,
+ lmc_dimmx_ddr4_params0.s.rc8x,
+ lmc_dimmx_ddr4_params0.s.rc7x,
+ lmc_dimmx_ddr4_params0.s.rc6x,
+ lmc_dimmx_ddr4_params0.s.rc5x,
+ lmc_dimmx_ddr4_params0.s.rc4x,
+ lmc_dimmx_ddr4_params0.s.rc3x,
+ lmc_dimmx_ddr4_params0.s.rc2x,
+ lmc_dimmx_ddr4_params0.s.rc1x );
+
+ } else { /* if (ddr_type == DDR4_DRAM) */
+ rc = read_spd(node, &dimm_config_table[didx], 69);
+ lmc_dimmx_params.s.rc0 = (rc >> 0) & 0xf;
+ lmc_dimmx_params.s.rc1 = (rc >> 4) & 0xf;
+
+ rc = read_spd(node, &dimm_config_table[didx], 70);
+ lmc_dimmx_params.s.rc2 = (rc >> 0) & 0xf;
+ lmc_dimmx_params.s.rc3 = (rc >> 4) & 0xf;
+
+ rc = read_spd(node, &dimm_config_table[didx], 71);
+ lmc_dimmx_params.s.rc4 = (rc >> 0) & 0xf;
+ lmc_dimmx_params.s.rc5 = (rc >> 4) & 0xf;
+
+ rc = read_spd(node, &dimm_config_table[didx], 72);
+ lmc_dimmx_params.s.rc6 = (rc >> 0) & 0xf;
+ lmc_dimmx_params.s.rc7 = (rc >> 4) & 0xf;
+
+ rc = read_spd(node, &dimm_config_table[didx], 73);
+ lmc_dimmx_params.s.rc8 = (rc >> 0) & 0xf;
+ lmc_dimmx_params.s.rc9 = (rc >> 4) & 0xf;
+
+ rc = read_spd(node, &dimm_config_table[didx], 74);
+ lmc_dimmx_params.s.rc10 = (rc >> 0) & 0xf;
+ lmc_dimmx_params.s.rc11 = (rc >> 4) & 0xf;
+
+ rc = read_spd(node, &dimm_config_table[didx], 75);
+ lmc_dimmx_params.s.rc12 = (rc >> 0) & 0xf;
+ lmc_dimmx_params.s.rc13 = (rc >> 4) & 0xf;
+
+ rc = read_spd(node, &dimm_config_table[didx], 76);
+ lmc_dimmx_params.s.rc14 = (rc >> 0) & 0xf;
+ lmc_dimmx_params.s.rc15 = (rc >> 4) & 0xf;
+
+
+ if ((s = lookup_env_parameter("ddr_clk_drive")) != NULL) {
+ if (strcmp(s,"light") == 0) {
+ lmc_dimmx_params.s.rc5 = 0x0; /* Light Drive */
+ }
+ if (strcmp(s,"moderate") == 0) {
+ lmc_dimmx_params.s.rc5 = 0x5; /* Moderate Drive */
+ }
+ if (strcmp(s,"strong") == 0) {
+ lmc_dimmx_params.s.rc5 = 0xA; /* Strong Drive */
+ }
+ }
+
+ if ((s = lookup_env_parameter("ddr_cmd_drive")) != NULL) {
+ if (strcmp(s,"light") == 0) {
+ lmc_dimmx_params.s.rc3 = 0x0; /* Light Drive */
+ }
+ if (strcmp(s,"moderate") == 0) {
+ lmc_dimmx_params.s.rc3 = 0x5; /* Moderate Drive */
+ }
+ if (strcmp(s,"strong") == 0) {
+ lmc_dimmx_params.s.rc3 = 0xA; /* Strong Drive */
+ }
+ }
+
+ if ((s = lookup_env_parameter("ddr_ctl_drive")) != NULL) {
+ if (strcmp(s,"light") == 0) {
+ lmc_dimmx_params.s.rc4 = 0x0; /* Light Drive */
+ }
+ if (strcmp(s,"moderate") == 0) {
+ lmc_dimmx_params.s.rc4 = 0x5; /* Moderate Drive */
+ }
+ }
+
+
+ /*
+ ** rc10 DDR3 RDIMM Operating Speed
+ ** ==== =========================================================
+ ** 0 tclk_psecs >= 2500 psec DDR3/DDR3L-800 (default)
+ ** 1 2500 psec > tclk_psecs >= 1875 psec DDR3/DDR3L-1066
+ ** 2 1875 psec > tclk_psecs >= 1500 psec DDR3/DDR3L-1333
+ ** 3 1500 psec > tclk_psecs >= 1250 psec DDR3/DDR3L-1600
+ ** 4 1250 psec > tclk_psecs >= 1071 psec DDR3-1866
+ */
+ lmc_dimmx_params.s.rc10 = 0;
+ if (2500 > tclk_psecs)
+ lmc_dimmx_params.s.rc10 = 1;
+ if (1875 > tclk_psecs)
+ lmc_dimmx_params.s.rc10 = 2;
+ if (1500 > tclk_psecs)
+ lmc_dimmx_params.s.rc10 = 3;
+ if (1250 > tclk_psecs)
+ lmc_dimmx_params.s.rc10 = 4;
+
+ } /* if (ddr_type == DDR4_DRAM) */
+
+ if ((s = lookup_env_parameter("ddr_dimmx_params")) != NULL) {
+ lmc_dimmx_params.u = strtoul(s, NULL, 0);
+ }
+
+ for (i=0; i<16; ++i) {
+ uint64_t value;
+ if ((s = lookup_env_parameter("ddr_rc%d", i)) != NULL) {
+ value = strtoul(s, NULL, 0);
+ lmc_dimmx_params.u &= ~((uint64_t)0xf << (i*4));
+ lmc_dimmx_params.u |= ( value << (i*4));
+ }
+ }
+
+ DRAM_CSR_WRITE(node, BDK_LMCX_DIMMX_PARAMS(ddr_interface_num, dimm), lmc_dimmx_params.u);
+
+ ddr_print("DIMM%d Register Control Words RC15:RC0 : %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x\n",
+ dimm,
+ lmc_dimmx_params.s.rc15,
+ lmc_dimmx_params.s.rc14,
+ lmc_dimmx_params.s.rc13,
+ lmc_dimmx_params.s.rc12,
+ lmc_dimmx_params.s.rc11,
+ lmc_dimmx_params.s.rc10,
+ lmc_dimmx_params.s.rc9 ,
+ lmc_dimmx_params.s.rc8 ,
+ lmc_dimmx_params.s.rc7 ,
+ lmc_dimmx_params.s.rc6 ,
+ lmc_dimmx_params.s.rc5 ,
+ lmc_dimmx_params.s.rc4 ,
+ lmc_dimmx_params.s.rc3 ,
+ lmc_dimmx_params.s.rc2 ,
+ lmc_dimmx_params.s.rc1 ,
+ lmc_dimmx_params.s.rc0 );
+ } /* for didx */
+
+ if (ddr_type == DDR4_DRAM) {
+
+ /* LMC0_DIMM_CTL */
+ lmc_dimm_ctl.u = BDK_CSR_READ(node, BDK_LMCX_DIMM_CTL(ddr_interface_num));
+ lmc_dimm_ctl.s.dimm0_wmask = 0xdf3f;
+ lmc_dimm_ctl.s.dimm1_wmask = (dimm_count > 1) ? 0xdf3f : 0x0000;
+ lmc_dimm_ctl.s.tcws = 0x4e0;
+ lmc_dimm_ctl.cn88xx.parity = custom_lmc_config->parity;
+
+ if ((s = lookup_env_parameter("ddr_dimm0_wmask")) != NULL) {
+ lmc_dimm_ctl.s.dimm0_wmask = strtoul(s, NULL, 0);
+ }
+
+ if ((s = lookup_env_parameter("ddr_dimm1_wmask")) != NULL) {
+ lmc_dimm_ctl.s.dimm1_wmask = strtoul(s, NULL, 0);
+ }
+
+ if ((s = lookup_env_parameter("ddr_dimm_ctl_parity")) != NULL) {
+ lmc_dimm_ctl.cn88xx.parity = strtoul(s, NULL, 0);
+ }
+
+ if ((s = lookup_env_parameter("ddr_dimm_ctl_tcws")) != NULL) {
+ lmc_dimm_ctl.s.tcws = strtoul(s, NULL, 0);
+ }
+
+ ddr_print("LMC DIMM_CTL : 0x%016lx\n", lmc_dimm_ctl.u);
+ DRAM_CSR_WRITE(node, BDK_LMCX_DIMM_CTL(ddr_interface_num), lmc_dimm_ctl.u);
+
+ perform_octeon3_ddr3_sequence(node, rank_mask,
+ ddr_interface_num, 0x7 ); /* Init RCW */
+
+ /* Write RC0D last */
+ lmc_dimm_ctl.s.dimm0_wmask = 0x2000;
+ lmc_dimm_ctl.s.dimm1_wmask = (dimm_count > 1) ? 0x2000 : 0x0000;
+ ddr_print("LMC DIMM_CTL : 0x%016lx\n", lmc_dimm_ctl.u);
+ DRAM_CSR_WRITE(node, BDK_LMCX_DIMM_CTL(ddr_interface_num), lmc_dimm_ctl.u);
+
+ /* Don't write any extended registers the second time */
+ DRAM_CSR_WRITE(node, BDK_LMCX_DDR4_DIMM_CTL(ddr_interface_num), 0);
+
+ perform_octeon3_ddr3_sequence(node, rank_mask,
+ ddr_interface_num, 0x7 ); /* Init RCW */
+ } else {
+
+ /* LMC0_DIMM_CTL */
+ lmc_dimm_ctl.u = BDK_CSR_READ(node, BDK_LMCX_DIMM_CTL(ddr_interface_num));
+ lmc_dimm_ctl.s.dimm0_wmask = 0xffff;
+ lmc_dimm_ctl.s.dimm1_wmask = (dimm_count > 1) ? 0xffff : 0x0000;
+ lmc_dimm_ctl.s.tcws = 0x4e0;
+ lmc_dimm_ctl.cn88xx.parity = custom_lmc_config->parity;
+
+ if ((s = lookup_env_parameter("ddr_dimm0_wmask")) != NULL) {
+ lmc_dimm_ctl.s.dimm0_wmask = strtoul(s, NULL, 0);
+ }
+
+ if ((s = lookup_env_parameter("ddr_dimm1_wmask")) != NULL) {
+ lmc_dimm_ctl.s.dimm1_wmask = strtoul(s, NULL, 0);
+ }
+
+ if ((s = lookup_env_parameter("ddr_dimm_ctl_parity")) != NULL) {
+ lmc_dimm_ctl.cn88xx.parity = strtoul(s, NULL, 0);
+ }
+
+ if ((s = lookup_env_parameter("ddr_dimm_ctl_tcws")) != NULL) {
+ lmc_dimm_ctl.s.tcws = strtoul(s, NULL, 0);
+ }
+
+ ddr_print("LMC DIMM_CTL : 0x%016lx\n", lmc_dimm_ctl.u);
+ DRAM_CSR_WRITE(node, BDK_LMCX_DIMM_CTL(ddr_interface_num), lmc_dimm_ctl.u);
+
+ perform_octeon3_ddr3_sequence(node, rank_mask,
+ ddr_interface_num, 0x7 ); /* Init RCW */
+ }
+ } else { /* if (spd_rdimm) */
+ /* Disable register control writes for unbuffered */
+ bdk_lmcx_dimm_ctl_t lmc_dimm_ctl;
+ lmc_dimm_ctl.u = BDK_CSR_READ(node, BDK_LMCX_DIMM_CTL(ddr_interface_num));
+ lmc_dimm_ctl.s.dimm0_wmask = 0;
+ lmc_dimm_ctl.s.dimm1_wmask = 0;
+ DRAM_CSR_WRITE(node, BDK_LMCX_DIMM_CTL(ddr_interface_num), lmc_dimm_ctl.u);
+ } /* if (spd_rdimm) */
+
+ /*
+ * Comments (steps 3 through 5) continue in perform_octeon3_ddr3_sequence()
+ */
+ {
+ bdk_lmcx_modereg_params0_t lmc_modereg_params0;
+
+ if (ddr_memory_preserved(node)) {
+ /* Contents are being preserved. Take DRAM out of
+ self-refresh first. Then init steps can procede
+ normally */
+ perform_octeon3_ddr3_sequence(node, rank_mask,
+ ddr_interface_num, 3); /* self-refresh exit */
+ }
+
+ lmc_modereg_params0.u = BDK_CSR_READ(node, BDK_LMCX_MODEREG_PARAMS0(ddr_interface_num));
+
+ lmc_modereg_params0.s.dllr = 1; /* Set during first init sequence */
+ DRAM_CSR_WRITE(node, BDK_LMCX_MODEREG_PARAMS0(ddr_interface_num), lmc_modereg_params0.u);
+
+ perform_ddr_init_sequence(node, rank_mask, ddr_interface_num);
+
+ lmc_modereg_params0.s.dllr = 0; /* Clear for normal operation */
+ DRAM_CSR_WRITE(node, BDK_LMCX_MODEREG_PARAMS0(ddr_interface_num), lmc_modereg_params0.u);
+ }
+
+ // NOTE: this must be done for pass 2.x and pass 1.x
+ if ((spd_rdimm) && (ddr_type == DDR4_DRAM)) {
+ VB_PRT(VBL_FAE, "Running init sequence 1\n");
+ change_rdimm_mpr_pattern(node, rank_mask, ddr_interface_num, dimm_count);
+ }
+
+#define DEFAULT_INTERNAL_VREF_TRAINING_LIMIT 5
+ int internal_retries = 0;
+ int deskew_training_errors;
+ int dac_eval_retries;
+ int dac_settings[9];
+ int num_samples;
+ int sample, lane;
+ int last_lane = ((ddr_interface_64b) ? 8 : 4) + use_ecc;
+
+#define DEFAULT_DAC_SAMPLES 7 // originally was 5
+#define DAC_RETRIES_LIMIT 2
+
+ typedef struct {
+ int16_t bytes[DEFAULT_DAC_SAMPLES];
+ } bytelane_sample_t;
+ bytelane_sample_t lanes[9];
+
+ memset(lanes, 0, sizeof(lanes));
+
+ if ((ddr_type == DDR4_DRAM) && !CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X)) {
+ num_samples = DEFAULT_DAC_SAMPLES;
+ } else {
+ num_samples = 1; // if DDR3 or no ability to write DAC values
+ }
+
+ perform_internal_vref_training:
+
+ for (sample = 0; sample < num_samples; sample++) {
+
+ dac_eval_retries = 0;
+
+ do { // make offset and internal vref training repeatable
+
+ /* 6.9.8 LMC Offset Training
+ LMC requires input-receiver offset training. */
+ Perform_Offset_Training(node, rank_mask, ddr_interface_num);
+
+ /* 6.9.9 LMC Internal Vref Training
+ LMC requires input-reference-voltage training. */
+ Perform_Internal_VREF_Training(node, rank_mask, ddr_interface_num);
+
+ // read and maybe display the DAC values for a sample
+ read_DAC_DBI_settings(node, ddr_interface_num, /*DAC*/1, dac_settings);
+ if ((num_samples == 1) || dram_is_verbose(VBL_DEV)) {
+ display_DAC_DBI_settings(node, ddr_interface_num, /*DAC*/1, use_ecc,
+ dac_settings, "Internal VREF");
+ }
+
+ // for DDR4, evaluate the DAC settings and retry if any issues
+ if (ddr_type == DDR4_DRAM) {
+ if (evaluate_DAC_settings(ddr_interface_64b, use_ecc, dac_settings)) {
+ if (++dac_eval_retries > DAC_RETRIES_LIMIT) {
+ ddr_print("N%d.LMC%d: DDR4 internal VREF DAC settings: retries exhausted; continuing...\n",
+ node, ddr_interface_num);
+ } else {
+ ddr_print("N%d.LMC%d: DDR4 internal VREF DAC settings inconsistent; retrying....\n",
+ node, ddr_interface_num); // FIXME? verbosity!!!
+ continue;
+ }
+ }
+ if (num_samples > 1) { // taking multiple samples, otherwise do nothing
+ // good sample or exhausted retries, record it
+ for (lane = 0; lane < last_lane; lane++) {
+ lanes[lane].bytes[sample] = dac_settings[lane];
+ }
+ }
+ }
+ break; // done if DDR3, or good sample, or exhausted retries
+
+ } while (1);
+
+ } /* for (sample = 0; sample < num_samples; sample++) */
+
+ if (num_samples > 1) {
+ debug_print("N%d.LMC%d: DDR4 internal VREF DAC settings: processing multiple samples...\n",
+ node, ddr_interface_num);
+
+ for (lane = 0; lane < last_lane; lane++) {
+ dac_settings[lane] = process_samples_average(&lanes[lane].bytes[0], num_samples,
+ ddr_interface_num, lane);
+ }
+ display_DAC_DBI_settings(node, ddr_interface_num, /*DAC*/1, use_ecc, dac_settings, "Averaged VREF");
+
+ // finally, write the final DAC values
+ for (lane = 0; lane < last_lane; lane++) {
+ load_dac_override(node, ddr_interface_num, dac_settings[lane], lane);
+ }
+ }
+
+#if DAC_OVERRIDE_EARLY
+ // as a second step, after internal VREF training, before starting deskew training:
+ // for DDR3 and THUNDER pass 2.x, override the DAC setting to 127
+ if ((ddr_type == DDR3_DRAM) && !CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X)) { // added 81xx and 83xx
+ load_dac_override(node, ddr_interface_num, 127, /* all */0x0A);
+ ddr_print("N%d.LMC%d: Overriding DDR3 internal VREF DAC settings to 127 (early).\n",
+ node, ddr_interface_num);
+ }
+#endif
+
+ /*
+ * 6.9.10 LMC Read Deskew Training
+ * LMC requires input-read-data deskew training.
+ */
+ if (! disable_deskew_training) {
+
+ deskew_training_errors = Perform_Read_Deskew_Training(node, rank_mask, ddr_interface_num,
+ spd_rawcard_AorB, 0, ddr_interface_64b);
+
+ // All the Deskew lock and saturation retries (may) have been done,
+ // but we ended up with nibble errors; so, as a last ditch effort,
+ // enable retries of the Internal Vref Training...
+ if (deskew_training_errors) {
+ if (internal_retries < DEFAULT_INTERNAL_VREF_TRAINING_LIMIT) {
+ internal_retries++;
+ VB_PRT(VBL_FAE, "N%d.LMC%d: Deskew training results still unsettled - retrying internal Vref training (%d)\n",
+ node, ddr_interface_num, internal_retries);
+ goto perform_internal_vref_training;
+ } else {
+ VB_PRT(VBL_FAE, "N%d.LMC%d: Deskew training incomplete - %d retries exhausted, but continuing...\n",
+ node, ddr_interface_num, internal_retries);
+ }
+ }
+
+ // FIXME: treat this as the final DSK print from now on, and print if VBL_NORM or above
+ // also, save the results of the original training
+ Validate_Read_Deskew_Training(node, rank_mask, ddr_interface_num, &deskew_training_results, VBL_NORM);
+
+ // setup write bit-deskew if enabled...
+ if (enable_write_deskew) {
+ ddr_print("N%d.LMC%d: WRITE BIT-DESKEW feature enabled- going NEUTRAL.\n",
+ node, ddr_interface_num);
+ Neutral_Write_Deskew_Setup(node, ddr_interface_num);
+ } /* if (enable_write_deskew) */
+
+ } /* if (! disable_deskew_training) */
+
+#if !DAC_OVERRIDE_EARLY
+ // as a final step in internal VREF training, after deskew training but before HW WL:
+ // for DDR3 and THUNDER pass 2.x, override the DAC setting to 127
+ if ((ddr_type == DDR3_DRAM) && !CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X)) { // added 81xx and 83xx
+ load_dac_override(node, ddr_interface_num, 127, /* all */0x0A);
+ ddr_print("N%d.LMC%d, Overriding DDR3 internal VREF DAC settings to 127 (late).\n",
+ node, ddr_interface_num);
+ }
+#endif
+
+
+ /* LMC(0)_EXT_CONFIG */
+ {
+ bdk_lmcx_ext_config_t ext_config;
+ ext_config.u = BDK_CSR_READ(node, BDK_LMCX_EXT_CONFIG(ddr_interface_num));
+ ext_config.s.vrefint_seq_deskew = 0;
+ ext_config.s.read_ena_bprch = 1;
+ ext_config.s.read_ena_fprch = 1;
+ ext_config.s.drive_ena_fprch = 1;
+ ext_config.s.drive_ena_bprch = 1;
+ ext_config.s.invert_data = 0; // make sure this is OFF for all current chips
+
+ if ((s = lookup_env_parameter("ddr_read_fprch")) != NULL) {
+ ext_config.s.read_ena_fprch = strtoul(s, NULL, 0);
+ }
+ if ((s = lookup_env_parameter("ddr_read_bprch")) != NULL) {
+ ext_config.s.read_ena_bprch = strtoul(s, NULL, 0);
+ }
+ if ((s = lookup_env_parameter("ddr_drive_fprch")) != NULL) {
+ ext_config.s.drive_ena_fprch = strtoul(s, NULL, 0);
+ }
+ if ((s = lookup_env_parameter("ddr_drive_bprch")) != NULL) {
+ ext_config.s.drive_ena_bprch = strtoul(s, NULL, 0);
+ }
+
+ if (!CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X) && (lranks_per_prank > 1)) {
+ ext_config.s.dimm0_cid = ext_config.s.dimm1_cid = lranks_bits;
+ ddr_print("N%d.LMC%d: 3DS: setting EXT_CONFIG[dimmx_cid] = %d\n",
+ node, ddr_interface_num, ext_config.s.dimm0_cid);
+ }
+
+ DRAM_CSR_WRITE(node, BDK_LMCX_EXT_CONFIG(ddr_interface_num), ext_config.u);
+ ddr_print("%-45s : 0x%016lx\n", "EXT_CONFIG", ext_config.u);
+ }
+
+
+ {
+ int save_ref_zqcs_int;
+ uint64_t temp_delay_usecs;
+
+ lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(ddr_interface_num));
+
+ /* Temporarily select the minimum ZQCS interval and wait
+ long enough for a few ZQCS calibrations to occur. This
+ should ensure that the calibration circuitry is
+ stabilized before read/write leveling occurs. */
+ save_ref_zqcs_int = lmc_config.s.ref_zqcs_int;
+ lmc_config.s.ref_zqcs_int = 1 | (32<<7); /* set smallest interval */
+
+ DRAM_CSR_WRITE(node, BDK_LMCX_CONFIG(ddr_interface_num), lmc_config.u);
+ BDK_CSR_READ(node, BDK_LMCX_CONFIG(ddr_interface_num));
+
+ /* Compute an appropriate delay based on the current ZQCS
+ interval. The delay should be long enough for the
+ current ZQCS delay counter to expire plus ten of the
+ minimum intarvals to ensure that some calibrations
+ occur. */
+ temp_delay_usecs = (((uint64_t)save_ref_zqcs_int >> 7)
+ * tclk_psecs * 100 * 512 * 128) / (10000*10000)
+ + 10 * ((uint64_t)32 * tclk_psecs * 100 * 512 * 128) / (10000*10000);
+
+ VB_PRT(VBL_FAE, "N%d.LMC%d: Waiting %ld usecs for ZQCS calibrations to start\n",
+ node, ddr_interface_num, temp_delay_usecs);
+ bdk_wait_usec(temp_delay_usecs);
+
+ lmc_config.s.ref_zqcs_int = save_ref_zqcs_int; /* Restore computed interval */
+
+ DRAM_CSR_WRITE(node, BDK_LMCX_CONFIG(ddr_interface_num), lmc_config.u);
+ BDK_CSR_READ(node, BDK_LMCX_CONFIG(ddr_interface_num));
+ }
+
+ /*
+ * 6.9.11 LMC Write Leveling
+ *
+ * LMC supports an automatic write leveling like that described in the
+ * JEDEC DDR3 specifications separately per byte-lane.
+ *
+ * All of DDR PLL, LMC CK, LMC DRESET, and early LMC initializations must
+ * be completed prior to starting this LMC write-leveling sequence.
+ *
+ * There are many possible procedures that will write-level all the
+ * attached DDR3 DRAM parts. One possibility is for software to simply
+ * write the desired values into LMC(0)_WLEVEL_RANK(0..3). This section
+ * describes one possible sequence that uses LMC's autowrite-leveling
+ * capabilities.
+ *
+ * 1. If the DQS/DQ delays on the board may be more than the ADD/CMD
+ * delays, then ensure that LMC(0)_CONFIG[EARLY_DQX] is set at this
+ * point.
+ *
+ * Do the remaining steps 2-7 separately for each rank i with attached
+ * DRAM.
+ *
+ * 2. Write LMC(0)_WLEVEL_RANKi = 0.
+ *
+ * 3. For ×8 parts:
+ *
+ * Without changing any other fields in LMC(0)_WLEVEL_CTL, write
+ * LMC(0)_WLEVEL_CTL[LANEMASK] to select all byte lanes with attached
+ * DRAM.
+ *
+ * For ×16 parts:
+ *
+ * Without changing any other fields in LMC(0)_WLEVEL_CTL, write
+ * LMC(0)_WLEVEL_CTL[LANEMASK] to select all even byte lanes with
+ * attached DRAM.
+ *
+ * 4. Without changing any other fields in LMC(0)_CONFIG,
+ *
+ * o write LMC(0)_SEQ_CTL[SEQ_SEL] to select write-leveling
+ *
+ * o write LMC(0)_CONFIG[RANKMASK] = (1 << i)
+ *
+ * o write LMC(0)_SEQ_CTL[INIT_START] = 1
+ *
+ * LMC will initiate write-leveling at this point. Assuming
+ * LMC(0)_WLEVEL_CTL [SSET] = 0, LMC first enables write-leveling on
+ * the selected DRAM rank via a DDR3 MR1 write, then sequences through
+ * and accumulates write-leveling results for eight different delay
+ * settings twice, starting at a delay of zero in this case since
+ * LMC(0)_WLEVEL_RANKi[BYTE*<4:3>] = 0, increasing by 1/8 CK each
+ * setting, covering a total distance of one CK, then disables the
+ * write-leveling via another DDR3 MR1 write.
+ *
+ * After the sequence through 16 delay settings is complete:
+ *
+ * o LMC sets LMC(0)_WLEVEL_RANKi[STATUS] = 3
+ *
+ * o LMC sets LMC(0)_WLEVEL_RANKi[BYTE*<2:0>] (for all ranks selected
+ * by LMC(0)_WLEVEL_CTL[LANEMASK]) to indicate the first write
+ * leveling result of 1 that followed result of 0 during the
+ * sequence, except that the LMC always writes
+ * LMC(0)_WLEVEL_RANKi[BYTE*<0>]=0.
+ *
+ * o Software can read the eight write-leveling results from the first
+ * pass through the delay settings by reading
+ * LMC(0)_WLEVEL_DBG[BITMASK] (after writing
+ * LMC(0)_WLEVEL_DBG[BYTE]). (LMC does not retain the writeleveling
+ * results from the second pass through the eight delay
+ * settings. They should often be identical to the
+ * LMC(0)_WLEVEL_DBG[BITMASK] results, though.)
+ *
+ * 5. Wait until LMC(0)_WLEVEL_RANKi[STATUS] != 2.
+ *
+ * LMC will have updated LMC(0)_WLEVEL_RANKi[BYTE*<2:0>] for all byte
+ * lanes selected by LMC(0)_WLEVEL_CTL[LANEMASK] at this point.
+ * LMC(0)_WLEVEL_RANKi[BYTE*<4:3>] will still be the value that
+ * software wrote in substep 2 above, which is 0.
+ *
+ * 6. For ×16 parts:
+ *
+ * Without changing any other fields in LMC(0)_WLEVEL_CTL, write
+ * LMC(0)_WLEVEL_CTL[LANEMASK] to select all odd byte lanes with
+ * attached DRAM.
+ *
+ * Repeat substeps 4 and 5 with this new LMC(0)_WLEVEL_CTL[LANEMASK]
+ * setting. Skip to substep 7 if this has already been done.
+ *
+ * For ×8 parts:
+ *
+ * Skip this substep. Go to substep 7.
+ *
+ * 7. Calculate LMC(0)_WLEVEL_RANKi[BYTE*<4:3>] settings for all byte
+ * lanes on all ranks with attached DRAM.
+ *
+ * At this point, all byte lanes on rank i with attached DRAM should
+ * have been write-leveled, and LMC(0)_WLEVEL_RANKi[BYTE*<2:0>] has
+ * the result for each byte lane.
+ *
+ * But note that the DDR3 write-leveling sequence will only determine
+ * the delay modulo the CK cycle time, and cannot determine how many
+ * additional CK cycles of delay are present. Software must calculate
+ * the number of CK cycles, or equivalently, the
+ * LMC(0)_WLEVEL_RANKi[BYTE*<4:3>] settings.
+ *
+ * This BYTE*<4:3> calculation is system/board specific.
+ *
+ * Many techniques can be used to calculate write-leveling BYTE*<4:3> values,
+ * including:
+ *
+ * o Known values for some byte lanes.
+ *
+ * o Relative values for some byte lanes relative to others.
+ *
+ * For example, suppose lane X is likely to require a larger
+ * write-leveling delay than lane Y. A BYTEX<2:0> value that is much
+ * smaller than the BYTEY<2:0> value may then indicate that the
+ * required lane X delay wrapped into the next CK, so BYTEX<4:3>
+ * should be set to BYTEY<4:3>+1.
+ *
+ * When ECC DRAM is not present (i.e. when DRAM is not attached to the
+ * DDR_CBS_0_* and DDR_CB<7:0> chip signals, or the DDR_DQS_<4>_* and
+ * DDR_DQ<35:32> chip signals), write LMC(0)_WLEVEL_RANK*[BYTE8] =
+ * LMC(0)_WLEVEL_RANK*[BYTE0], using the final calculated BYTE0 value.
+ * Write LMC(0)_WLEVEL_RANK*[BYTE4] = LMC(0)_WLEVEL_RANK*[BYTE0],
+ * using the final calculated BYTE0 value.
+ *
+ * 8. Initialize LMC(0)_WLEVEL_RANK* values for all unused ranks.
+ *
+ * Let rank i be a rank with attached DRAM.
+ *
+ * For all ranks j that do not have attached DRAM, set
+ * LMC(0)_WLEVEL_RANKj = LMC(0)_WLEVEL_RANKi.
+ */
+ { // Start HW write-leveling block
+#pragma pack(push,1)
+ bdk_lmcx_wlevel_ctl_t wlevel_ctl;
+ bdk_lmcx_wlevel_rankx_t lmc_wlevel_rank;
+ int rankx = 0;
+ int wlevel_bitmask[9];
+ int byte_idx;
+ int ecc_ena;
+ int ddr_wlevel_roundup = 0;
+ int ddr_wlevel_printall = (dram_is_verbose(VBL_FAE)); // or default to 1 to print all HW WL samples
+ int disable_hwl_validity = 0;
+ int default_wlevel_rtt_nom;
+#if WODT_MASK_2R_1S
+ uint64_t saved_wodt_mask = 0;
+#endif
+#pragma pack(pop)
+
+ if (wlevel_loops)
+ ddr_print("N%d.LMC%d: Performing Hardware Write-Leveling\n", node, ddr_interface_num);
+ else {
+ wlevel_bitmask_errors = 1; /* Force software write-leveling to run */
+ ddr_print("N%d.LMC%d: Forcing software Write-Leveling\n", node, ddr_interface_num);
+ }
+
+ default_wlevel_rtt_nom = (ddr_type == DDR3_DRAM) ? rttnom_20ohm : ddr4_rttnom_40ohm ; /* FIXME? */
+
+#if WODT_MASK_2R_1S
+ if ((ddr_type == DDR4_DRAM) && (num_ranks == 2) && (dimm_count == 1)) {
+ /* LMC(0)_WODT_MASK */
+ bdk_lmcx_wodt_mask_t lmc_wodt_mask;
+ // always save original so we can always restore later
+ saved_wodt_mask = BDK_CSR_READ(node, BDK_LMCX_WODT_MASK(ddr_interface_num));
+ if ((s = lookup_env_parameter_ull("ddr_hwl_wodt_mask")) != NULL) {
+ lmc_wodt_mask.u = strtoull(s, NULL, 0);
+ if (lmc_wodt_mask.u != saved_wodt_mask) { // print/store only when diff
+ ddr_print("WODT_MASK : 0x%016lx\n", lmc_wodt_mask.u);
+ DRAM_CSR_WRITE(node, BDK_LMCX_WODT_MASK(ddr_interface_num), lmc_wodt_mask.u);
+ }
+ }
+ }
+#endif /* WODT_MASK_2R_1S */
+
+ lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(ddr_interface_num));
+ ecc_ena = lmc_config.s.ecc_ena;
+
+ if ((s = lookup_env_parameter("ddr_wlevel_roundup")) != NULL) {
+ ddr_wlevel_roundup = strtoul(s, NULL, 0);
+ }
+ if ((s = lookup_env_parameter("ddr_wlevel_printall")) != NULL) {
+ ddr_wlevel_printall = strtoul(s, NULL, 0);
+ }
+
+ if ((s = lookup_env_parameter("ddr_disable_hwl_validity")) != NULL) {
+ disable_hwl_validity = !!strtoul(s, NULL, 0);
+ }
+
+ if ((s = lookup_env_parameter("ddr_wlevel_rtt_nom")) != NULL) {
+ default_wlevel_rtt_nom = strtoul(s, NULL, 0);
+ }
+
+ // For DDR3, we leave the WLEVEL_CTL fields at default settings
+ // For DDR4, we touch WLEVEL_CTL fields OR_DIS or BITMASK here
+ if (ddr_type == DDR4_DRAM) {
+ int default_or_dis = 1;
+ int default_bitmask = 0xFF;
+
+ // when x4, use only the lower nibble bits
+ if (dram_width == 4) {
+ default_bitmask = 0x0F;
+ VB_PRT(VBL_DEV, "N%d.LMC%d: WLEVEL_CTL: default bitmask is 0x%2x for DDR4 x4\n",
+ node, ddr_interface_num, default_bitmask);
+ }
+
+ wlevel_ctl.u = BDK_CSR_READ(node, BDK_LMCX_WLEVEL_CTL(ddr_interface_num));
+ wlevel_ctl.s.or_dis = default_or_dis;
+ wlevel_ctl.s.bitmask = default_bitmask;
+
+ // allow overrides
+ if ((s = lookup_env_parameter("ddr_wlevel_ctl_or_dis")) != NULL) {
+ wlevel_ctl.s.or_dis = !!strtoul(s, NULL, 0);
+ }
+ if ((s = lookup_env_parameter("ddr_wlevel_ctl_bitmask")) != NULL) {
+ wlevel_ctl.s.bitmask = strtoul(s, NULL, 0);
+ }
+
+ // print only if not defaults
+ if ((wlevel_ctl.s.or_dis != default_or_dis) || (wlevel_ctl.s.bitmask != default_bitmask)) {
+ ddr_print("N%d.LMC%d: WLEVEL_CTL: or_dis=%d, bitmask=0x%02x\n",
+ node, ddr_interface_num, wlevel_ctl.s.or_dis, wlevel_ctl.s.bitmask);
+ }
+ // always write
+ DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_CTL(ddr_interface_num), wlevel_ctl.u);
+ }
+
+ // Start the hardware write-leveling loop per rank
+ for (rankx = 0; rankx < dimm_count * 4; rankx++) {
+
+ if (!(rank_mask & (1 << rankx)))
+ continue;
+
+#if HW_WL_MAJORITY
+ // array to collect counts of byte-lane values
+ // assume low-order 3 bits and even, so really only 2 bit values
+ int wlevel_bytes[9][4];
+ memset(wlevel_bytes, 0, sizeof(wlevel_bytes));
+#endif
+
+ // restructure the looping so we can keep trying until we get the samples we want
+ //for (int wloop = 0; wloop < wlevel_loops; wloop++) {
+ int wloop = 0;
+ int wloop_retries = 0; // retries per sample for HW-related issues with bitmasks or values
+ int wloop_retries_total = 0;
+ int wloop_retries_exhausted = 0;
+#define WLOOP_RETRIES_DEFAULT 5
+ int wlevel_validity_errors;
+ int wlevel_bitmask_errors_rank = 0;
+ int wlevel_validity_errors_rank = 0;
+
+ while (wloop < wlevel_loops) {
+
+ wlevel_ctl.u = BDK_CSR_READ(node, BDK_LMCX_WLEVEL_CTL(ddr_interface_num));
+
+ wlevel_ctl.s.rtt_nom = (default_wlevel_rtt_nom > 0) ? (default_wlevel_rtt_nom - 1) : 7;
+
+
+ DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx), 0); /* Clear write-level delays */
+
+ wlevel_bitmask_errors = 0; /* Reset error counters */
+ wlevel_validity_errors = 0;
+
+ for (byte_idx=0; byte_idx<9; ++byte_idx) {
+ wlevel_bitmask[byte_idx] = 0; /* Reset bitmasks */
+ }
+
+#if HWL_BY_BYTE // FIXME???
+ /* Make a separate pass for each byte to reduce power. */
+ for (byte_idx=0; byte_idx<(8+ecc_ena); ++byte_idx) {
+
+ if (!(ddr_interface_bytemask&(1<<byte_idx)))
+ continue;
+
+ wlevel_ctl.s.lanemask = (1<<byte_idx);
+
+ DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_CTL(ddr_interface_num), wlevel_ctl.u);
+
+ /* Read and write values back in order to update the
+ status field. This insures that we read the updated
+ values after write-leveling has completed. */
+ DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx),
+ BDK_CSR_READ(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx)));
+
+ perform_octeon3_ddr3_sequence(node, 1 << rankx, ddr_interface_num, 6); /* write-leveling */
+
+ if (!bdk_is_platform(BDK_PLATFORM_ASIM) &&
+ BDK_CSR_WAIT_FOR_FIELD(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx),
+ status, ==, 3, 1000000))
+ {
+ error_print("ERROR: Timeout waiting for WLEVEL\n");
+ }
+ lmc_wlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx));
+
+ wlevel_bitmask[byte_idx] = octeon_read_lmcx_ddr3_wlevel_dbg(node, ddr_interface_num, byte_idx);
+ if (wlevel_bitmask[byte_idx] == 0)
+ ++wlevel_bitmask_errors;
+ } /* for (byte_idx=0; byte_idx<(8+ecc_ena); ++byte_idx) */
+
+ wlevel_ctl.s.lanemask = /*0x1ff*/ddr_interface_bytemask; // restore for RL
+ DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_CTL(ddr_interface_num), wlevel_ctl.u);
+#else
+ // do all the byte-lanes at the same time
+ wlevel_ctl.s.lanemask = /*0x1ff*/ddr_interface_bytemask; // FIXME?
+
+ DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_CTL(ddr_interface_num), wlevel_ctl.u);
+
+ /* Read and write values back in order to update the
+ status field. This insures that we read the updated
+ values after write-leveling has completed. */
+ DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx),
+ BDK_CSR_READ(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx)));
+
+ perform_octeon3_ddr3_sequence(node, 1 << rankx, ddr_interface_num, 6); /* write-leveling */
+
+ if (!bdk_is_platform(BDK_PLATFORM_ASIM) &&
+ BDK_CSR_WAIT_FOR_FIELD(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx),
+ status, ==, 3, 1000000))
+ {
+ error_print("ERROR: Timeout waiting for WLEVEL\n");
+ }
+
+ lmc_wlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx));
+
+ for (byte_idx=0; byte_idx<(8+ecc_ena); ++byte_idx) {
+ if (!(ddr_interface_bytemask&(1<<byte_idx)))
+ continue;
+ wlevel_bitmask[byte_idx] = octeon_read_lmcx_ddr3_wlevel_dbg(node, ddr_interface_num, byte_idx);
+ if (wlevel_bitmask[byte_idx] == 0)
+ ++wlevel_bitmask_errors;
+ } /* for (byte_idx=0; byte_idx<(8+ecc_ena); ++byte_idx) */
+#endif
+
+ // check validity only if no bitmask errors
+ if (wlevel_bitmask_errors == 0) {
+ if ((spd_dimm_type != 5) &&
+ (spd_dimm_type != 6) &&
+ (spd_dimm_type != 8) &&
+ (spd_dimm_type != 9) &&
+ (dram_width != 16) &&
+ (ddr_interface_64b) &&
+ !(disable_hwl_validity))
+ { // bypass if mini-[RU]DIMM or x16 or 32-bit or SO-[RU]DIMM
+ wlevel_validity_errors =
+ Validate_HW_WL_Settings(node, ddr_interface_num,
+ &lmc_wlevel_rank, ecc_ena);
+ wlevel_validity_errors_rank += (wlevel_validity_errors != 0);
+ }
+ } else
+ wlevel_bitmask_errors_rank++;
+
+ // before we print, if we had bitmask or validity errors, do a retry...
+ if ((wlevel_bitmask_errors != 0) || (wlevel_validity_errors != 0)) {
+ // VBL must be high to show the bad bitmaps or delays here also
+ if (dram_is_verbose(VBL_DEV2)) {
+ display_WL_BM(node, ddr_interface_num, rankx, wlevel_bitmask);
+ display_WL(node, ddr_interface_num, lmc_wlevel_rank, rankx);
+ }
+ if (wloop_retries < WLOOP_RETRIES_DEFAULT) {
+ wloop_retries++;
+ wloop_retries_total++;
+ // this printout is per-retry: only when VBL is high enough (DEV2?)
+ VB_PRT(VBL_DEV2, "N%d.LMC%d.R%d: H/W Write-Leveling had %s errors - retrying...\n",
+ node, ddr_interface_num, rankx,
+ (wlevel_bitmask_errors) ? "Bitmask" : "Validity");
+ continue; // this takes us back to the top without counting a sample
+ } else { // ran out of retries for this sample
+ // retries exhausted, do not print at normal VBL
+ VB_PRT(VBL_DEV2, "N%d.LMC%d.R%d: H/W Write-Leveling issues: %s errors\n",
+ node, ddr_interface_num, rankx,
+ (wlevel_bitmask_errors) ? "Bitmask" : "Validity");
+ wloop_retries_exhausted++;
+ }
+ }
+ // no errors or exhausted retries, use this sample
+ wloop_retries = 0; //reset for next sample
+
+ // when only 1 sample or forced, print the bitmasks first and current HW WL
+ if ((wlevel_loops == 1) || ddr_wlevel_printall) {
+ display_WL_BM(node, ddr_interface_num, rankx, wlevel_bitmask);
+ display_WL(node, ddr_interface_num, lmc_wlevel_rank, rankx);
+ }
+
+ if (ddr_wlevel_roundup) { /* Round up odd bitmask delays */
+ for (byte_idx=0; byte_idx<(8+ecc_ena); ++byte_idx) {
+ if (!(ddr_interface_bytemask&(1<<byte_idx)))
+ continue;
+ update_wlevel_rank_struct(&lmc_wlevel_rank,
+ byte_idx,
+ roundup_ddr3_wlevel_bitmask(wlevel_bitmask[byte_idx]));
+ } /* for (byte_idx=0; byte_idx<(8+ecc_ena); ++byte_idx) */
+ DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx), lmc_wlevel_rank.u);
+ display_WL(node, ddr_interface_num, lmc_wlevel_rank, rankx);
+ }
+
+#if HW_WL_MAJORITY
+ // OK, we have a decent sample, no bitmask or validity errors
+ for (byte_idx=0; byte_idx<(8+ecc_ena); ++byte_idx) {
+ if (!(ddr_interface_bytemask&(1<<byte_idx)))
+ continue;
+ // increment count of byte-lane value
+ int ix = (get_wlevel_rank_struct(&lmc_wlevel_rank, byte_idx) >> 1) & 3; // only 4 values
+ wlevel_bytes[byte_idx][ix]++;
+ } /* for (byte_idx=0; byte_idx<(8+ecc_ena); ++byte_idx) */
+#endif
+
+ wloop++; // if we get here, we have taken a decent sample
+
+ } /* while (wloop < wlevel_loops) */
+
+#if HW_WL_MAJORITY
+ // if we did sample more than once, try to pick a majority vote
+ if (wlevel_loops > 1) {
+ // look for the majority in each byte-lane
+ for (byte_idx = 0; byte_idx < (8+ecc_ena); ++byte_idx) {
+ int mx = -1, mc = 0, xc = 0, cc = 0;
+ int ix, ic;
+ if (!(ddr_interface_bytemask&(1<<byte_idx)))
+ continue;
+ for (ix = 0; ix < 4; ix++) {
+ ic = wlevel_bytes[byte_idx][ix];
+ // make a bitmask of the ones with a count
+ if (ic > 0) {
+ mc |= (1 << ix);
+ cc++; // count how many had non-zero counts
+ }
+ // find the majority
+ if (ic > xc) { // new max?
+ xc = ic; // yes
+ mx = ix; // set its index
+ }
+ }
+#if SWL_TRY_HWL_ALT
+ // see if there was an alternate
+ int alts = (mc & ~(1 << mx)); // take out the majority choice
+ if (alts != 0) {
+ for (ix = 0; ix < 4; ix++) {
+ if (alts & (1 << ix)) { // FIXME: could be done multiple times? bad if so
+ hwl_alts[rankx].hwl_alt_mask |= (1 << byte_idx); // set the mask
+ hwl_alts[rankx].hwl_alt_delay[byte_idx] = ix << 1; // record the value
+ VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: SWL_TRY_HWL_ALT: Byte %d maj %d (%d) alt %d (%d).\n",
+ node, ddr_interface_num, rankx, byte_idx, mx << 1, xc,
+ ix << 1, wlevel_bytes[byte_idx][ix]);
+ }
+ }
+ } else {
+ debug_print("N%d.LMC%d.R%d: SWL_TRY_HWL_ALT: Byte %d maj %d alt NONE.\n",
+ node, ddr_interface_num, rankx, byte_idx, mx << 1);
+ }
+#endif /* SWL_TRY_HWL_ALT */
+ if (cc > 2) { // unlikely, but...
+ // assume: counts for 3 indices are all 1
+ // possiblities are: 0/2/4, 2/4/6, 0/4/6, 0/2/6
+ // and the desired?: 2 , 4 , 6, 0
+ // we choose the middle, assuming one of the outliers is bad
+ // NOTE: this is an ugly hack at the moment; there must be a better way
+ switch (mc) {
+ case 0x7: mx = 1; break; // was 0/2/4, choose 2
+ case 0xb: mx = 0; break; // was 0/2/6, choose 0
+ case 0xd: mx = 3; break; // was 0/4/6, choose 6
+ case 0xe: mx = 2; break; // was 2/4/6, choose 4
+ default:
+ case 0xf: mx = 1; break; // was 0/2/4/6, choose 2?
+ }
+ error_print("N%d.LMC%d.R%d: HW WL MAJORITY: bad byte-lane %d (0x%x), using %d.\n",
+ node, ddr_interface_num, rankx, byte_idx, mc, mx << 1);
+ }
+ update_wlevel_rank_struct(&lmc_wlevel_rank, byte_idx, mx << 1);
+ } /* for (byte_idx=0; byte_idx<(8+ecc_ena); ++byte_idx) */
+
+ DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx), lmc_wlevel_rank.u);
+ display_WL_with_final(node, ddr_interface_num, lmc_wlevel_rank, rankx);
+ } /* if (wlevel_loops > 1) */
+#endif /* HW_WL_MAJORITY */
+ // maybe print an error summary for the rank
+ if ((wlevel_bitmask_errors_rank != 0) || (wlevel_validity_errors_rank != 0)) {
+ VB_PRT(VBL_FAE, "N%d.LMC%d.R%d: H/W Write-Leveling errors - %d bitmask, %d validity, %d retries, %d exhausted\n",
+ node, ddr_interface_num, rankx,
+ wlevel_bitmask_errors_rank, wlevel_validity_errors_rank,
+ wloop_retries_total, wloop_retries_exhausted);
+ }
+
+ } /* for (rankx = 0; rankx < dimm_count * 4;rankx++) */
+
+#if WODT_MASK_2R_1S
+ if ((ddr_type == DDR4_DRAM) && (num_ranks == 2) && (dimm_count == 1)) {
+ /* LMC(0)_WODT_MASK */
+ bdk_lmcx_wodt_mask_t lmc_wodt_mask;
+ // always read current so we can see if its different from saved
+ lmc_wodt_mask.u = BDK_CSR_READ(node, BDK_LMCX_WODT_MASK(ddr_interface_num));
+ if (lmc_wodt_mask.u != saved_wodt_mask) { // always restore what was saved if diff
+ lmc_wodt_mask.u = saved_wodt_mask;
+ ddr_print("WODT_MASK : 0x%016lx\n", lmc_wodt_mask.u);
+ DRAM_CSR_WRITE(node, BDK_LMCX_WODT_MASK(ddr_interface_num), lmc_wodt_mask.u);
+ }
+ }
+#endif /* WODT_MASK_2R_1S */
+
+ } // End HW write-leveling block
+
+ // At the end of HW Write Leveling, check on some things...
+ if (! disable_deskew_training) {
+
+ deskew_counts_t dsk_counts;
+ int retry_count = 0;
+
+ VB_PRT(VBL_FAE, "N%d.LMC%d: Check Deskew Settings before Read-Leveling.\n", node, ddr_interface_num);
+
+ do {
+ Validate_Read_Deskew_Training(node, rank_mask, ddr_interface_num, &dsk_counts, VBL_FAE);
+
+ // only RAWCARD A or B will not benefit from retraining if there's only saturation
+ // or any rawcard if there is a nibble error
+ if ((!spd_rawcard_AorB && dsk_counts.saturated > 0) ||
+ ((dsk_counts.nibrng_errs != 0) || (dsk_counts.nibunl_errs != 0)))
+ {
+ retry_count++;
+ VB_PRT(VBL_FAE, "N%d.LMC%d: Deskew Status indicates saturation or nibble errors - retry %d Training.\n",
+ node, ddr_interface_num, retry_count);
+ Perform_Read_Deskew_Training(node, rank_mask, ddr_interface_num,
+ spd_rawcard_AorB, 0, ddr_interface_64b);
+ } else
+ break;
+ } while (retry_count < 5);
+
+ // print the last setting only if we had to do retries here
+ if (retry_count > 0)
+ Validate_Read_Deskew_Training(node, rank_mask, ddr_interface_num, &dsk_counts, VBL_NORM);
+ }
+
+ /*
+ * 6.9.12 LMC Read Leveling
+ *
+ * LMC supports an automatic read-leveling separately per byte-lane using
+ * the DDR3 multipurpose register predefined pattern for system
+ * calibration defined in the JEDEC DDR3 specifications.
+ *
+ * All of DDR PLL, LMC CK, and LMC DRESET, and early LMC initializations
+ * must be completed prior to starting this LMC read-leveling sequence.
+ *
+ * Software could simply write the desired read-leveling values into
+ * LMC(0)_RLEVEL_RANK(0..3). This section describes a sequence that uses
+ * LMC's autoread-leveling capabilities.
+ *
+ * When LMC does the read-leveling sequence for a rank, it first enables
+ * the DDR3 multipurpose register predefined pattern for system
+ * calibration on the selected DRAM rank via a DDR3 MR3 write, then
+ * executes 64 RD operations at different internal delay settings, then
+ * disables the predefined pattern via another DDR3 MR3 write
+ * operation. LMC determines the pass or fail of each of the 64 settings
+ * independently for each byte lane, then writes appropriate
+ * LMC(0)_RLEVEL_RANK(0..3)[BYTE*] values for the rank.
+ *
+ * After read-leveling for a rank, software can read the 64 pass/fail
+ * indications for one byte lane via LMC(0)_RLEVEL_DBG[BITMASK]. Software
+ * can observe all pass/fail results for all byte lanes in a rank via
+ * separate read-leveling sequences on the rank with different
+ * LMC(0)_RLEVEL_CTL[BYTE] values.
+ *
+ * The 64 pass/fail results will typically have failures for the low
+ * delays, followed by a run of some passing settings, followed by more
+ * failures in the remaining high delays. LMC sets
+ * LMC(0)_RLEVEL_RANK(0..3)[BYTE*] to one of the passing settings.
+ * First, LMC selects the longest run of successes in the 64 results. (In
+ * the unlikely event that there is more than one longest run, LMC
+ * selects the first one.) Then if LMC(0)_RLEVEL_CTL[OFFSET_EN] = 1 and
+ * the selected run has more than LMC(0)_RLEVEL_CTL[OFFSET] successes,
+ * LMC selects the last passing setting in the run minus
+ * LMC(0)_RLEVEL_CTL[OFFSET]. Otherwise LMC selects the middle setting in
+ * the run (rounding earlier when necessary). We expect the read-leveling
+ * sequence to produce good results with the reset values
+ * LMC(0)_RLEVEL_CTL [OFFSET_EN]=1, LMC(0)_RLEVEL_CTL[OFFSET] = 2.
+ *
+ * The read-leveling sequence has the following steps:
+ *
+ * 1. Select desired LMC(0)_RLEVEL_CTL[OFFSET_EN,OFFSET,BYTE] settings.
+ * Do the remaining substeps 2-4 separately for each rank i with
+ * attached DRAM.
+ *
+ * 2. Without changing any other fields in LMC(0)_CONFIG,
+ *
+ * o write LMC(0)_SEQ_CTL[SEQ_SEL] to select read-leveling
+ *
+ * o write LMC(0)_CONFIG[RANKMASK] = (1 << i)
+ *
+ * o write LMC(0)_SEQ_CTL[INIT_START] = 1
+ *
+ * This initiates the previously-described read-leveling.
+ *
+ * 3. Wait until LMC(0)_RLEVEL_RANKi[STATUS] != 2
+ *
+ * LMC will have updated LMC(0)_RLEVEL_RANKi[BYTE*] for all byte lanes
+ * at this point.
+ *
+ * If ECC DRAM is not present (i.e. when DRAM is not attached to the
+ * DDR_CBS_0_* and DDR_CB<7:0> chip signals, or the DDR_DQS_<4>_* and
+ * DDR_DQ<35:32> chip signals), write LMC(0)_RLEVEL_RANK*[BYTE8] =
+ * LMC(0)_RLEVEL_RANK*[BYTE0]. Write LMC(0)_RLEVEL_RANK*[BYTE4] =
+ * LMC(0)_RLEVEL_RANK*[BYTE0].
+ *
+ * 4. If desired, consult LMC(0)_RLEVEL_DBG[BITMASK] and compare to
+ * LMC(0)_RLEVEL_RANKi[BYTE*] for the lane selected by
+ * LMC(0)_RLEVEL_CTL[BYTE]. If desired, modify LMC(0)_RLEVEL_CTL[BYTE]
+ * to a new value and repeat so that all BITMASKs can be observed.
+ *
+ * 5. Initialize LMC(0)_RLEVEL_RANK* values for all unused ranks.
+ *
+ * Let rank i be a rank with attached DRAM.
+ *
+ * For all ranks j that do not have attached DRAM, set
+ * LMC(0)_RLEVEL_RANKj = LMC(0)_RLEVEL_RANKi.
+ *
+ * This read-leveling sequence can help select the proper CN70XX ODT
+ * resistance value (LMC(0)_COMP_CTL2[RODT_CTL]). A hardware-generated
+ * LMC(0)_RLEVEL_RANKi[BYTEj] value (for a used byte lane j) that is
+ * drastically different from a neighboring LMC(0)_RLEVEL_RANKi[BYTEk]
+ * (for a used byte lane k) can indicate that the CN70XX ODT value is
+ * bad. It is possible to simultaneously optimize both
+ * LMC(0)_COMP_CTL2[RODT_CTL] and LMC(0)_RLEVEL_RANKn[BYTE*] values by
+ * performing this read-leveling sequence for several
+ * LMC(0)_COMP_CTL2[RODT_CTL] values and selecting the one with the best
+ * LMC(0)_RLEVEL_RANKn[BYTE*] profile for the ranks.
+ */
+
+ {
+#pragma pack(push,4)
+ bdk_lmcx_rlevel_rankx_t lmc_rlevel_rank;
+ bdk_lmcx_comp_ctl2_t lmc_comp_ctl2;
+ bdk_lmcx_rlevel_ctl_t rlevel_ctl;
+ bdk_lmcx_control_t lmc_control;
+ bdk_lmcx_modereg_params1_t lmc_modereg_params1;
+ unsigned char rodt_ctl;
+ unsigned char rankx = 0;
+ int rlevel_rodt_errors = 0;
+ unsigned char ecc_ena;
+ unsigned char rtt_nom;
+ unsigned char rtt_idx;
+ int min_rtt_nom_idx;
+ int max_rtt_nom_idx;
+ int min_rodt_ctl;
+ int max_rodt_ctl;
+ int rlevel_debug_loops = 1;
+ unsigned char save_ddr2t;
+ int rlevel_avg_loops;
+ int ddr_rlevel_compute;
+ int saved_ddr__ptune, saved_ddr__ntune, rlevel_comp_offset;
+ int saved_int_zqcs_dis = 0;
+ int disable_sequential_delay_check = 0;
+ int maximum_adjacent_rlevel_delay_increment = 0;
+ struct {
+ uint64_t setting;
+ int score;
+ } rlevel_scoreboard[RTT_NOM_OHMS_COUNT][RODT_OHMS_COUNT][4];
+ int print_nom_ohms;
+#if PERFECT_BITMASK_COUNTING
+ typedef struct {
+ uint8_t count[9][32]; // 8+ECC by 32 values
+ uint8_t total[9]; // 8+ECC
+ } rank_perfect_t;
+ rank_perfect_t rank_perfect_counts[4];
+#endif
+
+#pragma pack(pop)
+
+#if PERFECT_BITMASK_COUNTING
+ memset(rank_perfect_counts, 0, sizeof(rank_perfect_counts));
+#endif /* PERFECT_BITMASK_COUNTING */
+
+ lmc_control.u = BDK_CSR_READ(node, BDK_LMCX_CONTROL(ddr_interface_num));
+ save_ddr2t = lmc_control.s.ddr2t;
+
+ lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(ddr_interface_num));
+ ecc_ena = lmc_config.s.ecc_ena;
+
+#if 0
+ {
+ int save_ref_zqcs_int;
+ uint64_t temp_delay_usecs;
+
+ /* Temporarily select the minimum ZQCS interval and wait
+ long enough for a few ZQCS calibrations to occur. This
+ should ensure that the calibration circuitry is
+ stabilized before read-leveling occurs. */
+ save_ref_zqcs_int = lmc_config.s.ref_zqcs_int;
+ lmc_config.s.ref_zqcs_int = 1 | (32<<7); /* set smallest interval */
+ DRAM_CSR_WRITE(node, BDK_LMCX_CONFIG(ddr_interface_num), lmc_config.u);
+ BDK_CSR_READ(node, BDK_LMCX_CONFIG(ddr_interface_num));
+
+ /* Compute an appropriate delay based on the current ZQCS
+ interval. The delay should be long enough for the
+ current ZQCS delay counter to expire plus ten of the
+ minimum intarvals to ensure that some calibrations
+ occur. */
+ temp_delay_usecs = (((uint64_t)save_ref_zqcs_int >> 7)
+ * tclk_psecs * 100 * 512 * 128) / (10000*10000)
+ + 10 * ((uint64_t)32 * tclk_psecs * 100 * 512 * 128) / (10000*10000);
+
+ ddr_print ("Waiting %lu usecs for ZQCS calibrations to start\n",
+ temp_delay_usecs);
+ bdk_wait_usec(temp_delay_usecs);
+
+ lmc_config.s.ref_zqcs_int = save_ref_zqcs_int; /* Restore computed interval */
+ DRAM_CSR_WRITE(node, BDK_LMCX_CONFIG(ddr_interface_num), lmc_config.u);
+ BDK_CSR_READ(node, BDK_LMCX_CONFIG(ddr_interface_num));
+ }
+#endif
+
+ if ((s = lookup_env_parameter("ddr_rlevel_2t")) != NULL) {
+ lmc_control.s.ddr2t = strtoul(s, NULL, 0);
+ }
+
+ DRAM_CSR_WRITE(node, BDK_LMCX_CONTROL(ddr_interface_num), lmc_control.u);
+
+ ddr_print("N%d.LMC%d: Performing Read-Leveling\n", node, ddr_interface_num);
+
+ rlevel_ctl.u = BDK_CSR_READ(node, BDK_LMCX_RLEVEL_CTL(ddr_interface_num));
+
+ rlevel_avg_loops = custom_lmc_config->rlevel_average_loops;
+ if (rlevel_avg_loops == 0) {
+ rlevel_avg_loops = RLEVEL_AVG_LOOPS_DEFAULT;
+ if ((dimm_count == 1) || (num_ranks == 1)) // up the samples for these cases
+ rlevel_avg_loops = rlevel_avg_loops * 2 + 1;
+ }
+
+ ddr_rlevel_compute = custom_lmc_config->rlevel_compute;
+ rlevel_ctl.s.offset_en = custom_lmc_config->offset_en;
+ rlevel_ctl.s.offset = spd_rdimm
+ ? custom_lmc_config->offset_rdimm
+ : custom_lmc_config->offset_udimm;
+
+ rlevel_ctl.s.delay_unload_0 = 1; /* should normally be set */
+ rlevel_ctl.s.delay_unload_1 = 1; /* should normally be set */
+ rlevel_ctl.s.delay_unload_2 = 1; /* should normally be set */
+ rlevel_ctl.s.delay_unload_3 = 1; /* should normally be set */
+
+ rlevel_ctl.s.or_dis = 1; // default to get best bitmasks
+ if ((s = lookup_env_parameter("ddr_rlevel_or_dis")) != NULL) {
+ rlevel_ctl.s.or_dis = !!strtoul(s, NULL, 0);
+ }
+ rlevel_ctl.s.bitmask = 0xff; // should work in 32b mode also
+ if ((s = lookup_env_parameter("ddr_rlevel_ctl_bitmask")) != NULL) {
+ rlevel_ctl.s.bitmask = strtoul(s, NULL, 0);
+ }
+ debug_print("N%d.LMC%d: RLEVEL_CTL: or_dis=%d, bitmask=0x%02x\n",
+ node, ddr_interface_num,
+ rlevel_ctl.s.or_dis, rlevel_ctl.s.bitmask);
+
+ rlevel_comp_offset = spd_rdimm
+ ? custom_lmc_config->rlevel_comp_offset_rdimm
+ : custom_lmc_config->rlevel_comp_offset_udimm;
+
+ if ((s = lookup_env_parameter("ddr_rlevel_offset")) != NULL) {
+ rlevel_ctl.s.offset = strtoul(s, NULL, 0);
+ }
+
+ if ((s = lookup_env_parameter("ddr_rlevel_offset_en")) != NULL) {
+ rlevel_ctl.s.offset_en = strtoul(s, NULL, 0);
+ }
+ if ((s = lookup_env_parameter("ddr_rlevel_ctl")) != NULL) {
+ rlevel_ctl.u = strtoul(s, NULL, 0);
+ }
+
+ DRAM_CSR_WRITE(node, BDK_LMCX_RLEVEL_CTL(ddr_interface_num), rlevel_ctl.u);
+
+ if (bdk_is_platform(BDK_PLATFORM_ASIM))
+ rlevel_debug_loops = 0;
+
+ if ((s = lookup_env_parameter("ddr%d_rlevel_debug_loops", ddr_interface_num)) != NULL) {
+ rlevel_debug_loops = strtoul(s, NULL, 0);
+ }
+
+ if ((s = lookup_env_parameter("ddr_rtt_nom_auto")) != NULL) {
+ ddr_rtt_nom_auto = !!strtoul(s, NULL, 0);
+ }
+
+ if ((s = lookup_env_parameter("ddr_rlevel_average")) != NULL) {
+ rlevel_avg_loops = strtoul(s, NULL, 0);
+ }
+
+ if ((s = lookup_env_parameter("ddr_rlevel_compute")) != NULL) {
+ ddr_rlevel_compute = strtoul(s, NULL, 0);
+ }
+
+ ddr_print("RLEVEL_CTL : 0x%016lx\n", rlevel_ctl.u);
+ ddr_print("RLEVEL_OFFSET : %6d\n", rlevel_ctl.s.offset);
+ ddr_print("RLEVEL_OFFSET_EN : %6d\n", rlevel_ctl.s.offset_en);
+
+ /* The purpose for the indexed table is to sort the settings
+ ** by the ohm value to simplify the testing when incrementing
+ ** through the settings. (index => ohms) 1=120, 2=60, 3=40,
+ ** 4=30, 5=20 */
+ min_rtt_nom_idx = (custom_lmc_config->min_rtt_nom_idx == 0) ? 1 : custom_lmc_config->min_rtt_nom_idx;
+ max_rtt_nom_idx = (custom_lmc_config->max_rtt_nom_idx == 0) ? 5 : custom_lmc_config->max_rtt_nom_idx;
+
+ min_rodt_ctl = (custom_lmc_config->min_rodt_ctl == 0) ? 1 : custom_lmc_config->min_rodt_ctl;
+ max_rodt_ctl = (custom_lmc_config->max_rodt_ctl == 0) ? 5 : custom_lmc_config->max_rodt_ctl;
+
+ if ((s = lookup_env_parameter("ddr_min_rodt_ctl")) != NULL) {
+ min_rodt_ctl = strtoul(s, NULL, 0);
+ }
+ if ((s = lookup_env_parameter("ddr_max_rodt_ctl")) != NULL) {
+ max_rodt_ctl = strtoul(s, NULL, 0);
+ }
+ if ((s = lookup_env_parameter("ddr_min_rtt_nom_idx")) != NULL) {
+ min_rtt_nom_idx = strtoul(s, NULL, 0);
+ }
+ if ((s = lookup_env_parameter("ddr_max_rtt_nom_idx")) != NULL) {
+ max_rtt_nom_idx = strtoul(s, NULL, 0);
+ }
+
+#ifdef ENABLE_CUSTOM_RLEVEL_TABLE
+ if (custom_lmc_config->rlevel_table != NULL) {
+ char part_number[21];
+ /* Check for hard-coded read-leveling settings */
+ get_dimm_part_number(part_number, node, &dimm_config_table[0], 0, ddr_type);
+ for (rankx = 0; rankx < dimm_count * 4;rankx++) {
+ if (!(rank_mask & (1 << rankx)))
+ continue;
+
+ lmc_rlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx));
+
+ i = 0;
+ while (custom_lmc_config->rlevel_table[i].part != NULL) {
+ debug_print("DIMM part number:\"%s\", SPD: \"%s\"\n", custom_lmc_config->rlevel_table[i].part, part_number);
+ if ((strcmp(part_number, custom_lmc_config->rlevel_table[i].part) == 0)
+ && (_abs(custom_lmc_config->rlevel_table[i].speed - 2*ddr_hertz/(1000*1000)) < 10 ))
+ {
+ ddr_print("Using hard-coded read leveling for DIMM part number: \"%s\"\n", part_number);
+ lmc_rlevel_rank.u = custom_lmc_config->rlevel_table[i].rlevel_rank[ddr_interface_num][rankx];
+ DRAM_CSR_WRITE(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx), lmc_rlevel_rank.u);
+ lmc_rlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx));
+ display_RL(node, ddr_interface_num, lmc_rlevel_rank, rankx);
+ rlevel_debug_loops = 0; /* Disable h/w read-leveling */
+ break;
+ }
+ ++i;
+ }
+ }
+ }
+#endif /* ENABLE_CUSTOM_RLEVEL_TABLE */
+
+ while(rlevel_debug_loops--) {
+ /* Initialize the error scoreboard */
+ memset(rlevel_scoreboard, 0, sizeof(rlevel_scoreboard));
+
+ if ((s = lookup_env_parameter("ddr_rlevel_comp_offset")) != NULL) {
+ rlevel_comp_offset = strtoul(s, NULL, 0);
+ }
+
+ disable_sequential_delay_check = custom_lmc_config->disable_sequential_delay_check;
+
+ if ((s = lookup_env_parameter("ddr_disable_sequential_delay_check")) != NULL) {
+ disable_sequential_delay_check = strtoul(s, NULL, 0);
+ }
+
+ maximum_adjacent_rlevel_delay_increment = custom_lmc_config->maximum_adjacent_rlevel_delay_increment;
+
+ if ((s = lookup_env_parameter("ddr_maximum_adjacent_rlevel_delay_increment")) != NULL) {
+ maximum_adjacent_rlevel_delay_increment = strtoul(s, NULL, 0);
+ }
+
+ lmc_comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num));
+ saved_ddr__ptune = lmc_comp_ctl2.s.ddr__ptune;
+ saved_ddr__ntune = lmc_comp_ctl2.s.ddr__ntune;
+
+ /* Disable dynamic compensation settings */
+ if (rlevel_comp_offset != 0) {
+ lmc_comp_ctl2.s.ptune = saved_ddr__ptune;
+ lmc_comp_ctl2.s.ntune = saved_ddr__ntune;
+
+ /* Round up the ptune calculation to bias the odd cases toward ptune */
+ lmc_comp_ctl2.s.ptune += divide_roundup(rlevel_comp_offset, 2);
+ lmc_comp_ctl2.s.ntune -= rlevel_comp_offset/2;
+
+ lmc_control.u = BDK_CSR_READ(node, BDK_LMCX_CONTROL(ddr_interface_num));
+ saved_int_zqcs_dis = lmc_control.s.int_zqcs_dis;
+ lmc_control.s.int_zqcs_dis = 1; /* Disable ZQCS while in bypass. */
+ DRAM_CSR_WRITE(node, BDK_LMCX_CONTROL(ddr_interface_num), lmc_control.u);
+
+ lmc_comp_ctl2.s.byp = 1; /* Enable bypass mode */
+ DRAM_CSR_WRITE(node, BDK_LMCX_COMP_CTL2(ddr_interface_num), lmc_comp_ctl2.u);
+ BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num));
+ lmc_comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num)); /* Read again */
+ ddr_print("DDR__PTUNE/DDR__NTUNE : %d/%d\n",
+ lmc_comp_ctl2.s.ddr__ptune, lmc_comp_ctl2.s.ddr__ntune);
+ }
+
+ lmc_modereg_params1.u = BDK_CSR_READ(node, BDK_LMCX_MODEREG_PARAMS1(ddr_interface_num));
+
+ for (rtt_idx = min_rtt_nom_idx; rtt_idx <= max_rtt_nom_idx; ++rtt_idx) {
+ rtt_nom = imp_values->rtt_nom_table[rtt_idx];
+
+ /* When the read ODT mask is zero the dyn_rtt_nom_mask is
+ zero than RTT_NOM will not be changing during
+ read-leveling. Since the value is fixed we only need
+ to test it once. */
+ if (dyn_rtt_nom_mask == 0) {
+ print_nom_ohms = -1; // flag not to print NOM ohms
+ if (rtt_idx != min_rtt_nom_idx)
+ continue;
+ } else {
+ if (dyn_rtt_nom_mask & 1) lmc_modereg_params1.s.rtt_nom_00 = rtt_nom;
+ if (dyn_rtt_nom_mask & 2) lmc_modereg_params1.s.rtt_nom_01 = rtt_nom;
+ if (dyn_rtt_nom_mask & 4) lmc_modereg_params1.s.rtt_nom_10 = rtt_nom;
+ if (dyn_rtt_nom_mask & 8) lmc_modereg_params1.s.rtt_nom_11 = rtt_nom;
+ // FIXME? rank 0 ohms always for the printout?
+ print_nom_ohms = imp_values->rtt_nom_ohms[lmc_modereg_params1.s.rtt_nom_00];
+ }
+
+ DRAM_CSR_WRITE(node, BDK_LMCX_MODEREG_PARAMS1(ddr_interface_num), lmc_modereg_params1.u);
+ VB_PRT(VBL_TME, "\n");
+ VB_PRT(VBL_TME, "RTT_NOM %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n",
+ imp_values->rtt_nom_ohms[lmc_modereg_params1.s.rtt_nom_11],
+ imp_values->rtt_nom_ohms[lmc_modereg_params1.s.rtt_nom_10],
+ imp_values->rtt_nom_ohms[lmc_modereg_params1.s.rtt_nom_01],
+ imp_values->rtt_nom_ohms[lmc_modereg_params1.s.rtt_nom_00],
+ lmc_modereg_params1.s.rtt_nom_11,
+ lmc_modereg_params1.s.rtt_nom_10,
+ lmc_modereg_params1.s.rtt_nom_01,
+ lmc_modereg_params1.s.rtt_nom_00);
+
+ perform_ddr_init_sequence(node, rank_mask, ddr_interface_num);
+
+ // Try RANK outside RODT to rearrange the output...
+ for (rankx = 0; rankx < dimm_count * 4; rankx++) {
+ int byte_idx;
+ rlevel_byte_data_t rlevel_byte[9];
+ int average_loops;
+ int rlevel_rank_errors, rlevel_bitmask_errors, rlevel_nonseq_errors;
+ rlevel_bitmask_t rlevel_bitmask[9];
+#if PICK_BEST_RANK_SCORE_NOT_AVG
+ int rlevel_best_rank_score;
+#endif
+
+ if (!(rank_mask & (1 << rankx)))
+ continue;
+
+ for (rodt_ctl = max_rodt_ctl; rodt_ctl >= min_rodt_ctl; --rodt_ctl) {
+#if PICK_BEST_RANK_SCORE_NOT_AVG
+ rlevel_best_rank_score = DEFAULT_BEST_RANK_SCORE;
+#endif
+ rlevel_rodt_errors = 0;
+ lmc_comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num));
+ lmc_comp_ctl2.s.rodt_ctl = rodt_ctl;
+ DRAM_CSR_WRITE(node, BDK_LMCX_COMP_CTL2(ddr_interface_num), lmc_comp_ctl2.u);
+ lmc_comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num));
+ bdk_wait_usec(1); /* Give it a little time to take affect */
+ VB_PRT(VBL_DEV, "Read ODT_CTL : 0x%x (%d ohms)\n",
+ lmc_comp_ctl2.s.rodt_ctl, imp_values->rodt_ohms[lmc_comp_ctl2.s.rodt_ctl]);
+
+ memset(rlevel_byte, 0, sizeof(rlevel_byte));
+
+ for (average_loops = 0; average_loops < rlevel_avg_loops; average_loops++) {
+ rlevel_bitmask_errors = 0;
+
+ if (! (rlevel_separate_ab && spd_rdimm && (ddr_type == DDR4_DRAM))) {
+ /* Clear read-level delays */
+ DRAM_CSR_WRITE(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx), 0);
+
+ /* read-leveling */
+ perform_octeon3_ddr3_sequence(node, 1 << rankx, ddr_interface_num, 1);
+
+ if (!bdk_is_platform(BDK_PLATFORM_ASIM) &&
+ BDK_CSR_WAIT_FOR_FIELD(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx),
+ status, ==, 3, 1000000))
+ {
+ error_print("ERROR: Timeout waiting for RLEVEL\n");
+ }
+ }
+
+ lmc_rlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx));
+
+ { // start bitmask interpretation block
+ int redoing_nonseq_errs = 0;
+
+ memset(rlevel_bitmask, 0, sizeof(rlevel_bitmask));
+
+ if (rlevel_separate_ab && spd_rdimm && (ddr_type == DDR4_DRAM)) {
+ bdk_lmcx_rlevel_rankx_t lmc_rlevel_rank_aside;
+ bdk_lmcx_modereg_params0_t lmc_modereg_params0;
+
+ /* A-side */
+ lmc_modereg_params0.u = BDK_CSR_READ(node, BDK_LMCX_MODEREG_PARAMS0(ddr_interface_num));
+ lmc_modereg_params0.s.mprloc = 0; /* MPR Page 0 Location 0 */
+ DRAM_CSR_WRITE(node, BDK_LMCX_MODEREG_PARAMS0(ddr_interface_num), lmc_modereg_params0.u);
+
+ /* Clear read-level delays */
+ DRAM_CSR_WRITE(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx), 0);
+
+ perform_octeon3_ddr3_sequence(node, 1 << rankx, ddr_interface_num, 1); /* read-leveling */
+
+ if (!bdk_is_platform(BDK_PLATFORM_ASIM) &&
+ BDK_CSR_WAIT_FOR_FIELD(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx),
+ status, ==, 3, 1000000))
+ {
+ error_print("ERROR: Timeout waiting for RLEVEL\n");
+
+ }
+ lmc_rlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx));
+
+ lmc_rlevel_rank_aside.u = lmc_rlevel_rank.u;
+
+ rlevel_bitmask[0].bm = octeon_read_lmcx_ddr3_rlevel_dbg(node, ddr_interface_num, 0);
+ rlevel_bitmask[1].bm = octeon_read_lmcx_ddr3_rlevel_dbg(node, ddr_interface_num, 1);
+ rlevel_bitmask[2].bm = octeon_read_lmcx_ddr3_rlevel_dbg(node, ddr_interface_num, 2);
+ rlevel_bitmask[3].bm = octeon_read_lmcx_ddr3_rlevel_dbg(node, ddr_interface_num, 3);
+ rlevel_bitmask[8].bm = octeon_read_lmcx_ddr3_rlevel_dbg(node, ddr_interface_num, 8);
+ /* A-side complete */
+
+
+ /* B-side */
+ lmc_modereg_params0.u = BDK_CSR_READ(node, BDK_LMCX_MODEREG_PARAMS0(ddr_interface_num));
+ lmc_modereg_params0.s.mprloc = 3; /* MPR Page 0 Location 3 */
+ DRAM_CSR_WRITE(node, BDK_LMCX_MODEREG_PARAMS0(ddr_interface_num), lmc_modereg_params0.u);
+
+ /* Clear read-level delays */
+ DRAM_CSR_WRITE(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx), 0);
+
+ perform_octeon3_ddr3_sequence(node, 1 << rankx, ddr_interface_num, 1); /* read-leveling */
+
+ if (!bdk_is_platform(BDK_PLATFORM_ASIM) &&
+ BDK_CSR_WAIT_FOR_FIELD(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx),
+ status, ==, 3, 1000000))
+ {
+ error_print("ERROR: Timeout waiting for RLEVEL\n");
+ }
+ lmc_rlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx));
+
+ rlevel_bitmask[4].bm = octeon_read_lmcx_ddr3_rlevel_dbg(node, ddr_interface_num, 4);
+ rlevel_bitmask[5].bm = octeon_read_lmcx_ddr3_rlevel_dbg(node, ddr_interface_num, 5);
+ rlevel_bitmask[6].bm = octeon_read_lmcx_ddr3_rlevel_dbg(node, ddr_interface_num, 6);
+ rlevel_bitmask[7].bm = octeon_read_lmcx_ddr3_rlevel_dbg(node, ddr_interface_num, 7);
+ /* B-side complete */
+
+
+ update_rlevel_rank_struct(&lmc_rlevel_rank, 0, lmc_rlevel_rank_aside.cn83xx.byte0);
+ update_rlevel_rank_struct(&lmc_rlevel_rank, 1, lmc_rlevel_rank_aside.cn83xx.byte1);
+ update_rlevel_rank_struct(&lmc_rlevel_rank, 2, lmc_rlevel_rank_aside.cn83xx.byte2);
+ update_rlevel_rank_struct(&lmc_rlevel_rank, 3, lmc_rlevel_rank_aside.cn83xx.byte3);
+ update_rlevel_rank_struct(&lmc_rlevel_rank, 8, lmc_rlevel_rank_aside.cn83xx.byte8); /* ECC A-side */
+
+ lmc_modereg_params0.u = BDK_CSR_READ(node, BDK_LMCX_MODEREG_PARAMS0(ddr_interface_num));
+ lmc_modereg_params0.s.mprloc = 0; /* MPR Page 0 Location 0 */
+ DRAM_CSR_WRITE(node, BDK_LMCX_MODEREG_PARAMS0(ddr_interface_num), lmc_modereg_params0.u);
+
+ } /* if (rlevel_separate_ab && spd_rdimm && (ddr_type == DDR4_DRAM)) */
+
+ /*
+ * Evaluate the quality of the read-leveling delays from the bitmasks.
+ * Also save off a software computed read-leveling mask that may be
+ * used later to qualify the delay results from Octeon.
+ */
+ for (byte_idx = 0; byte_idx < (8+ecc_ena); ++byte_idx) {
+ int bmerr;
+ if (!(ddr_interface_bytemask&(1<<byte_idx)))
+ continue;
+ if (! (rlevel_separate_ab && spd_rdimm && (ddr_type == DDR4_DRAM))) {
+ rlevel_bitmask[byte_idx].bm =
+ octeon_read_lmcx_ddr3_rlevel_dbg(node, ddr_interface_num, byte_idx);
+ }
+ bmerr = validate_ddr3_rlevel_bitmask(&rlevel_bitmask[byte_idx], ddr_type);
+ rlevel_bitmask[byte_idx].errs = bmerr;
+ rlevel_bitmask_errors += bmerr;
+#if PERFECT_BITMASK_COUNTING
+ if ((ddr_type == DDR4_DRAM) && !bmerr) { // count only the "perfect" bitmasks
+ // FIXME: could optimize this a bit?
+ int delay = get_rlevel_rank_struct(&lmc_rlevel_rank, byte_idx);
+ rank_perfect_counts[rankx].count[byte_idx][delay] += 1;
+ rank_perfect_counts[rankx].total[byte_idx] += 1;
+ }
+#endif /* PERFECT_BITMASK_COUNTING */
+ }
+
+ /* Set delays for unused bytes to match byte 0. */
+ for (byte_idx = 0; byte_idx < 9; ++byte_idx) {
+ if (ddr_interface_bytemask & (1 << byte_idx))
+ continue;
+ update_rlevel_rank_struct(&lmc_rlevel_rank, byte_idx, lmc_rlevel_rank.cn83xx.byte0);
+ }
+
+ /* Save a copy of the byte delays in physical
+ order for sequential evaluation. */
+ unpack_rlevel_settings(ddr_interface_bytemask, ecc_ena, rlevel_byte, lmc_rlevel_rank);
+ redo_nonseq_errs:
+
+ rlevel_nonseq_errors = 0;
+
+ if (! disable_sequential_delay_check) {
+ if ((ddr_interface_bytemask & 0xff) == 0xff) {
+ /* Evaluate delay sequence across the whole range of bytes for standard dimms. */
+ if ((spd_dimm_type == 1) || (spd_dimm_type == 5)) { /* 1=RDIMM, 5=Mini-RDIMM */
+ int register_adjacent_delay = _abs(rlevel_byte[4].delay - rlevel_byte[5].delay);
+ /* Registered dimm topology routes from the center. */
+ rlevel_nonseq_errors += nonsequential_delays(rlevel_byte, 0, 3+ecc_ena,
+ maximum_adjacent_rlevel_delay_increment);
+ rlevel_nonseq_errors += nonsequential_delays(rlevel_byte, 5, 7+ecc_ena,
+ maximum_adjacent_rlevel_delay_increment);
+ // byte 5 sqerrs never gets cleared for RDIMMs
+ rlevel_byte[5].sqerrs = 0;
+ if (register_adjacent_delay > 1) {
+ /* Assess proximity of bytes on opposite sides of register */
+ rlevel_nonseq_errors += (register_adjacent_delay-1) * RLEVEL_ADJACENT_DELAY_ERROR;
+ // update byte 5 error
+ rlevel_byte[5].sqerrs += (register_adjacent_delay-1) * RLEVEL_ADJACENT_DELAY_ERROR;
+ }
+ }
+ if ((spd_dimm_type == 2) || (spd_dimm_type == 6)) { /* 2=UDIMM, 6=Mini-UDIMM */
+ /* Unbuffered dimm topology routes from end to end. */
+ rlevel_nonseq_errors += nonsequential_delays(rlevel_byte, 0, 7+ecc_ena,
+ maximum_adjacent_rlevel_delay_increment);
+ }
+ } else {
+ rlevel_nonseq_errors += nonsequential_delays(rlevel_byte, 0, 3+ecc_ena,
+ maximum_adjacent_rlevel_delay_increment);
+ }
+ } /* if (! disable_sequential_delay_check) */
+
+#if 0
+ // FIXME FIXME: disabled for now, it was too much...
+
+ // Calculate total errors for the rank:
+ // we do NOT add nonsequential errors if mini-[RU]DIMM or x16;
+ // mini-DIMMs and x16 devices have unusual sequence geometries.
+ // Make the final scores for them depend only on the bitmasks...
+ rlevel_rank_errors = rlevel_bitmask_errors;
+ if ((spd_dimm_type != 5) &&
+ (spd_dimm_type != 6) &&
+ (dram_width != 16))
+ {
+ rlevel_rank_errors += rlevel_nonseq_errors;
+ }
+#else
+ rlevel_rank_errors = rlevel_bitmask_errors + rlevel_nonseq_errors;
+#endif
+
+ // print original sample here only if we are not really averaging or picking best
+ // also do not print if we were redoing the NONSEQ score for using COMPUTED
+ if (!redoing_nonseq_errs && ((rlevel_avg_loops < 2) || dram_is_verbose(VBL_DEV2))) {
+ display_RL_BM(node, ddr_interface_num, rankx, rlevel_bitmask, ecc_ena);
+ display_RL_BM_scores(node, ddr_interface_num, rankx, rlevel_bitmask, ecc_ena);
+ display_RL_SEQ_scores(node, ddr_interface_num, rankx, rlevel_byte, ecc_ena);
+ display_RL_with_score(node, ddr_interface_num, lmc_rlevel_rank, rankx, rlevel_rank_errors);
+ }
+
+ if (ddr_rlevel_compute) {
+ if (!redoing_nonseq_errs) {
+ /* Recompute the delays based on the bitmask */
+ for (byte_idx = 0; byte_idx < (8+ecc_ena); ++byte_idx) {
+ if (!(ddr_interface_bytemask & (1 << byte_idx)))
+ continue;
+ update_rlevel_rank_struct(&lmc_rlevel_rank, byte_idx,
+ compute_ddr3_rlevel_delay(rlevel_bitmask[byte_idx].mstart,
+ rlevel_bitmask[byte_idx].width,
+ rlevel_ctl));
+ }
+
+ /* Override the copy of byte delays with the computed results. */
+ unpack_rlevel_settings(ddr_interface_bytemask, ecc_ena, rlevel_byte, lmc_rlevel_rank);
+
+ redoing_nonseq_errs = 1;
+ goto redo_nonseq_errs;
+
+ } else {
+ /* now print this if already printed the original sample */
+ if ((rlevel_avg_loops < 2) || dram_is_verbose(VBL_DEV2)) {
+ display_RL_with_computed(node, ddr_interface_num,
+ lmc_rlevel_rank, rankx,
+ rlevel_rank_errors);
+ }
+ }
+ } /* if (ddr_rlevel_compute) */
+
+ } // end bitmask interpretation block
+
+#if PICK_BEST_RANK_SCORE_NOT_AVG
+
+ // if it is a better (lower) score, then keep it
+ if (rlevel_rank_errors < rlevel_best_rank_score) {
+ rlevel_best_rank_score = rlevel_rank_errors;
+
+ // save the new best delays and best errors
+ for (byte_idx = 0; byte_idx < 9; ++byte_idx) {
+ rlevel_byte[byte_idx].best = rlevel_byte[byte_idx].delay;
+ rlevel_byte[byte_idx].bestsq = rlevel_byte[byte_idx].sqerrs;
+ // save bitmasks and their scores as well
+ // xlate UNPACKED index to PACKED index to get from rlevel_bitmask
+ rlevel_byte[byte_idx].bm = rlevel_bitmask[XUP(byte_idx, !!ecc_ena)].bm;
+ rlevel_byte[byte_idx].bmerrs = rlevel_bitmask[XUP(byte_idx, !!ecc_ena)].errs;
+ }
+ }
+#else /* PICK_BEST_RANK_SCORE_NOT_AVG */
+
+ /* Accumulate the total score across averaging loops for this setting */
+ debug_print("rlevel_scoreboard[rtt_nom=%d][rodt_ctl=%d][rankx=%d].score: %d [%d]\n",
+ rtt_nom, rodt_ctl, rankx, rlevel_rank_errors, average_loops);
+ rlevel_scoreboard[rtt_nom][rodt_ctl][rankx].score += rlevel_rank_errors;
+
+ /* Accumulate the delay totals and loop counts
+ necessary to compute average delay results */
+ for (byte_idx = 0; byte_idx < 9; ++byte_idx) {
+ if (rlevel_byte[byte_idx].delay != 0) { /* Don't include delay=0 in the average */
+ ++rlevel_byte[byte_idx].loop_count;
+ rlevel_byte[byte_idx].loop_total += rlevel_byte[byte_idx].delay;
+ }
+ } /* for (byte_idx = 0; byte_idx < 9; ++byte_idx) */
+#endif /* PICK_BEST_RANK_SCORE_NOT_AVG */
+
+ rlevel_rodt_errors += rlevel_rank_errors;
+
+ } /* for (average_loops = 0; average_loops < rlevel_avg_loops; average_loops++) */
+
+#if PICK_BEST_RANK_SCORE_NOT_AVG
+
+ /* We recorded the best score across the averaging loops */
+ rlevel_scoreboard[rtt_nom][rodt_ctl][rankx].score = rlevel_best_rank_score;
+
+ /* Restore the delays from the best fields that go with the best score */
+ for (byte_idx = 0; byte_idx < 9; ++byte_idx) {
+ rlevel_byte[byte_idx].delay = rlevel_byte[byte_idx].best;
+ rlevel_byte[byte_idx].sqerrs = rlevel_byte[byte_idx].bestsq;
+ }
+#else /* PICK_BEST_RANK_SCORE_NOT_AVG */
+
+ /* Compute the average score across averaging loops */
+ rlevel_scoreboard[rtt_nom][rodt_ctl][rankx].score =
+ divide_nint(rlevel_scoreboard[rtt_nom][rodt_ctl][rankx].score, rlevel_avg_loops);
+
+ /* Compute the average delay results */
+ for (byte_idx=0; byte_idx < 9; ++byte_idx) {
+ if (rlevel_byte[byte_idx].loop_count == 0)
+ rlevel_byte[byte_idx].loop_count = 1;
+ rlevel_byte[byte_idx].delay = divide_nint(rlevel_byte[byte_idx].loop_total,
+ rlevel_byte[byte_idx].loop_count);
+ }
+#endif /* PICK_BEST_RANK_SCORE_NOT_AVG */
+
+ lmc_rlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx));
+
+ pack_rlevel_settings(ddr_interface_bytemask, ecc_ena, rlevel_byte, &lmc_rlevel_rank);
+
+ if (rlevel_avg_loops > 1) {
+#if PICK_BEST_RANK_SCORE_NOT_AVG
+ // restore the "best" bitmasks and their scores for printing
+ for (byte_idx = 0; byte_idx < 9; ++byte_idx) {
+ if ((ddr_interface_bytemask & (1 << byte_idx)) == 0)
+ continue;
+ // xlate PACKED index to UNPACKED index to get from rlevel_byte
+ rlevel_bitmask[byte_idx].bm = rlevel_byte[XPU(byte_idx, !!ecc_ena)].bm;
+ rlevel_bitmask[byte_idx].errs = rlevel_byte[XPU(byte_idx, !!ecc_ena)].bmerrs;
+ }
+ // print bitmasks/scores here only for DEV // FIXME? lower VBL?
+ if (dram_is_verbose(VBL_DEV)) {
+ display_RL_BM(node, ddr_interface_num, rankx, rlevel_bitmask, ecc_ena);
+ display_RL_BM_scores(node, ddr_interface_num, rankx, rlevel_bitmask, ecc_ena);
+ display_RL_SEQ_scores(node, ddr_interface_num, rankx, rlevel_byte, ecc_ena);
+ }
+
+ display_RL_with_RODT(node, ddr_interface_num, lmc_rlevel_rank, rankx,
+ rlevel_scoreboard[rtt_nom][rodt_ctl][rankx].score,
+ print_nom_ohms, imp_values->rodt_ohms[rodt_ctl],
+ WITH_RODT_BESTSCORE);
+
+#else /* PICK_BEST_RANK_SCORE_NOT_AVG */
+ display_RL_with_average(node, ddr_interface_num, lmc_rlevel_rank, rankx,
+ rlevel_scoreboard[rtt_nom][rodt_ctl][rankx].score);
+#endif /* PICK_BEST_RANK_SCORE_NOT_AVG */
+
+ } /* if (rlevel_avg_loops > 1) */
+
+ rlevel_scoreboard[rtt_nom][rodt_ctl][rankx].setting = lmc_rlevel_rank.u;
+
+ } /* for (rodt_ctl = max_rodt_ctl; rodt_ctl >= min_rodt_ctl; --rodt_ctl) */
+ } /* for (rankx = 0; rankx < dimm_count*4; rankx++) */
+ } /* for (rtt_idx=min_rtt_nom_idx; rtt_idx<max_rtt_nom_idx; ++rtt_idx) */
+
+
+ /* Re-enable dynamic compensation settings. */
+ if (rlevel_comp_offset != 0) {
+ lmc_comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num));
+
+ lmc_comp_ctl2.s.ptune = 0;
+ lmc_comp_ctl2.s.ntune = 0;
+ lmc_comp_ctl2.s.byp = 0; /* Disable bypass mode */
+ DRAM_CSR_WRITE(node, BDK_LMCX_COMP_CTL2(ddr_interface_num), lmc_comp_ctl2.u);
+ BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num)); /* Read once */
+
+ lmc_comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num)); /* Read again */
+ ddr_print("DDR__PTUNE/DDR__NTUNE : %d/%d\n",
+ lmc_comp_ctl2.s.ddr__ptune, lmc_comp_ctl2.s.ddr__ntune);
+
+ lmc_control.u = BDK_CSR_READ(node, BDK_LMCX_CONTROL(ddr_interface_num));
+ lmc_control.s.int_zqcs_dis = saved_int_zqcs_dis; /* Restore original setting */
+ DRAM_CSR_WRITE(node, BDK_LMCX_CONTROL(ddr_interface_num), lmc_control.u);
+
+ }
+
+
+ {
+ int override_compensation = 0;
+ if ((s = lookup_env_parameter("ddr__ptune")) != NULL) {
+ saved_ddr__ptune = strtoul(s, NULL, 0);
+ override_compensation = 1;
+ }
+ if ((s = lookup_env_parameter("ddr__ntune")) != NULL) {
+ saved_ddr__ntune = strtoul(s, NULL, 0);
+ override_compensation = 1;
+ }
+ if (override_compensation) {
+ lmc_comp_ctl2.s.ptune = saved_ddr__ptune;
+ lmc_comp_ctl2.s.ntune = saved_ddr__ntune;
+
+ lmc_control.u = BDK_CSR_READ(node, BDK_LMCX_CONTROL(ddr_interface_num));
+ saved_int_zqcs_dis = lmc_control.s.int_zqcs_dis;
+ lmc_control.s.int_zqcs_dis = 1; /* Disable ZQCS while in bypass. */
+ DRAM_CSR_WRITE(node, BDK_LMCX_CONTROL(ddr_interface_num), lmc_control.u);
+
+ lmc_comp_ctl2.s.byp = 1; /* Enable bypass mode */
+ DRAM_CSR_WRITE(node, BDK_LMCX_COMP_CTL2(ddr_interface_num), lmc_comp_ctl2.u);
+ lmc_comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num)); /* Read again */
+
+ ddr_print("DDR__PTUNE/DDR__NTUNE : %d/%d\n",
+ lmc_comp_ctl2.s.ptune, lmc_comp_ctl2.s.ntune);
+ }
+ }
+ { /* Evaluation block */
+ int best_rodt_score = DEFAULT_BEST_RANK_SCORE; /* Start with an arbitrarily high score */
+ int auto_rodt_ctl = 0;
+ int auto_rtt_nom = 0;
+ int rodt_score;
+ int rodt_row_skip_mask = 0;
+
+ // just add specific RODT rows to the skip mask for DDR4 at this time...
+ if (ddr_type == DDR4_DRAM) {
+ rodt_row_skip_mask |= (1 << ddr4_rodt_ctl_34_ohm); // skip RODT row 34 ohms for all DDR4 types
+ rodt_row_skip_mask |= (1 << ddr4_rodt_ctl_40_ohm); // skip RODT row 40 ohms for all DDR4 types
+#if ADD_48_OHM_SKIP
+ rodt_row_skip_mask |= (1 << ddr4_rodt_ctl_48_ohm); // skip RODT row 48 ohms for all DDR4 types
+#endif /* ADD_48OHM_SKIP */
+#if NOSKIP_40_48_OHM
+ // For now, do not skip RODT row 40 or 48 ohm when ddr_hertz is above 1075 MHz
+ if (ddr_hertz > 1075000000) {
+ rodt_row_skip_mask &= ~(1 << ddr4_rodt_ctl_40_ohm); // noskip RODT row 40 ohms
+ rodt_row_skip_mask &= ~(1 << ddr4_rodt_ctl_48_ohm); // noskip RODT row 48 ohms
+ }
+#endif /* NOSKIP_40_48_OHM */
+#if NOSKIP_48_STACKED
+ // For now, do not skip RODT row 48 ohm for 2Rx4 stacked die DIMMs
+ if ((is_stacked_die) && (num_ranks == 2) && (dram_width == 4)) {
+ rodt_row_skip_mask &= ~(1 << ddr4_rodt_ctl_48_ohm); // noskip RODT row 48 ohms
+ }
+#endif /* NOSKIP_48_STACKED */
+#if NOSKIP_FOR_MINI
+ // for now, leave all rows eligible when we have mini-DIMMs...
+ if ((spd_dimm_type == 5) || (spd_dimm_type == 6)) {
+ rodt_row_skip_mask = 0;
+ }
+#endif /* NOSKIP_FOR_MINI */
+#if NOSKIP_FOR_2S_1R
+ // for now, leave all rows eligible when we have a 2-slot 1-rank config
+ if ((dimm_count == 2) && (num_ranks == 1)) {
+ rodt_row_skip_mask = 0;
+ }
+#endif /* NOSKIP_FOR_2S_1R */
+ }
+
+ VB_PRT(VBL_DEV, "Evaluating Read-Leveling Scoreboard for AUTO settings.\n");
+ for (rtt_idx=min_rtt_nom_idx; rtt_idx<=max_rtt_nom_idx; ++rtt_idx) {
+ rtt_nom = imp_values->rtt_nom_table[rtt_idx];
+
+ /* When the read ODT mask is zero the dyn_rtt_nom_mask is
+ zero than RTT_NOM will not be changing during
+ read-leveling. Since the value is fixed we only need
+ to test it once. */
+ if ((dyn_rtt_nom_mask == 0) && (rtt_idx != min_rtt_nom_idx))
+ continue;
+
+ for (rodt_ctl=max_rodt_ctl; rodt_ctl>=min_rodt_ctl; --rodt_ctl) {
+ rodt_score = 0;
+ for (rankx = 0; rankx < dimm_count * 4;rankx++) {
+ if (!(rank_mask & (1 << rankx)))
+ continue;
+ debug_print("rlevel_scoreboard[rtt_nom=%d][rodt_ctl=%d][rankx=%d].score:%d\n",
+ rtt_nom, rodt_ctl, rankx, rlevel_scoreboard[rtt_nom][rodt_ctl][rankx].score);
+ rodt_score += rlevel_scoreboard[rtt_nom][rodt_ctl][rankx].score;
+ }
+ // FIXME: do we need to skip RODT rows here, like we do below in the by-RANK settings?
+
+ /* When using automatic ODT settings use the ODT
+ settings associated with the best score for
+ all of the tested ODT combinations. */
+
+ if ((rodt_score < best_rodt_score) || // always take lower score, OR
+ ((rodt_score == best_rodt_score) && // take same score if RODT ohms are higher
+ (imp_values->rodt_ohms[rodt_ctl] > imp_values->rodt_ohms[auto_rodt_ctl])))
+ {
+ debug_print("AUTO: new best score for rodt:%d (%3d), new score:%d, previous score:%d\n",
+ rodt_ctl, imp_values->rodt_ohms[rodt_ctl], rodt_score, best_rodt_score);
+ best_rodt_score = rodt_score;
+ auto_rodt_ctl = rodt_ctl;
+ auto_rtt_nom = rtt_nom;
+ }
+ } /* for (rodt_ctl=max_rodt_ctl; rodt_ctl>=min_rodt_ctl; --rodt_ctl) */
+ } /* for (rtt_idx=min_rtt_nom_idx; rtt_idx<=max_rtt_nom_idx; ++rtt_idx) */
+
+ lmc_modereg_params1.u = BDK_CSR_READ(node, BDK_LMCX_MODEREG_PARAMS1(ddr_interface_num));
+
+ if (ddr_rtt_nom_auto) {
+ /* Store the automatically set RTT_NOM value */
+ if (dyn_rtt_nom_mask & 1) lmc_modereg_params1.s.rtt_nom_00 = auto_rtt_nom;
+ if (dyn_rtt_nom_mask & 2) lmc_modereg_params1.s.rtt_nom_01 = auto_rtt_nom;
+ if (dyn_rtt_nom_mask & 4) lmc_modereg_params1.s.rtt_nom_10 = auto_rtt_nom;
+ if (dyn_rtt_nom_mask & 8) lmc_modereg_params1.s.rtt_nom_11 = auto_rtt_nom;
+ } else {
+ /* restore the manual settings to the register */
+ lmc_modereg_params1.s.rtt_nom_00 = default_rtt_nom[0];
+ lmc_modereg_params1.s.rtt_nom_01 = default_rtt_nom[1];
+ lmc_modereg_params1.s.rtt_nom_10 = default_rtt_nom[2];
+ lmc_modereg_params1.s.rtt_nom_11 = default_rtt_nom[3];
+ }
+
+ DRAM_CSR_WRITE(node, BDK_LMCX_MODEREG_PARAMS1(ddr_interface_num), lmc_modereg_params1.u);
+ VB_PRT(VBL_DEV, "RTT_NOM %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n",
+ imp_values->rtt_nom_ohms[lmc_modereg_params1.s.rtt_nom_11],
+ imp_values->rtt_nom_ohms[lmc_modereg_params1.s.rtt_nom_10],
+ imp_values->rtt_nom_ohms[lmc_modereg_params1.s.rtt_nom_01],
+ imp_values->rtt_nom_ohms[lmc_modereg_params1.s.rtt_nom_00],
+ lmc_modereg_params1.s.rtt_nom_11,
+ lmc_modereg_params1.s.rtt_nom_10,
+ lmc_modereg_params1.s.rtt_nom_01,
+ lmc_modereg_params1.s.rtt_nom_00);
+
+ VB_PRT(VBL_DEV, "RTT_WR %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n",
+ imp_values->rtt_wr_ohms[EXTR_WR(lmc_modereg_params1.u, 3)],
+ imp_values->rtt_wr_ohms[EXTR_WR(lmc_modereg_params1.u, 2)],
+ imp_values->rtt_wr_ohms[EXTR_WR(lmc_modereg_params1.u, 1)],
+ imp_values->rtt_wr_ohms[EXTR_WR(lmc_modereg_params1.u, 0)],
+ EXTR_WR(lmc_modereg_params1.u, 3),
+ EXTR_WR(lmc_modereg_params1.u, 2),
+ EXTR_WR(lmc_modereg_params1.u, 1),
+ EXTR_WR(lmc_modereg_params1.u, 0));
+
+ VB_PRT(VBL_DEV, "DIC %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n",
+ imp_values->dic_ohms[lmc_modereg_params1.s.dic_11],
+ imp_values->dic_ohms[lmc_modereg_params1.s.dic_10],
+ imp_values->dic_ohms[lmc_modereg_params1.s.dic_01],
+ imp_values->dic_ohms[lmc_modereg_params1.s.dic_00],
+ lmc_modereg_params1.s.dic_11,
+ lmc_modereg_params1.s.dic_10,
+ lmc_modereg_params1.s.dic_01,
+ lmc_modereg_params1.s.dic_00);
+
+ if (ddr_type == DDR4_DRAM) {
+ bdk_lmcx_modereg_params2_t lmc_modereg_params2;
+ /*
+ * We must read the CSR, and not depend on odt_config[odt_idx].odt_mask2,
+ * since we could have overridden values with envvars.
+ * NOTE: this corrects the printout, since the CSR is not written with the old values...
+ */
+ lmc_modereg_params2.u = BDK_CSR_READ(node, BDK_LMCX_MODEREG_PARAMS2(ddr_interface_num));
+
+ VB_PRT(VBL_DEV, "RTT_PARK %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n",
+ imp_values->rtt_nom_ohms[lmc_modereg_params2.s.rtt_park_11],
+ imp_values->rtt_nom_ohms[lmc_modereg_params2.s.rtt_park_10],
+ imp_values->rtt_nom_ohms[lmc_modereg_params2.s.rtt_park_01],
+ imp_values->rtt_nom_ohms[lmc_modereg_params2.s.rtt_park_00],
+ lmc_modereg_params2.s.rtt_park_11,
+ lmc_modereg_params2.s.rtt_park_10,
+ lmc_modereg_params2.s.rtt_park_01,
+ lmc_modereg_params2.s.rtt_park_00);
+
+ VB_PRT(VBL_DEV, "%-45s : 0x%x,0x%x,0x%x,0x%x\n", "VREF_RANGE",
+ lmc_modereg_params2.s.vref_range_11,
+ lmc_modereg_params2.s.vref_range_10,
+ lmc_modereg_params2.s.vref_range_01,
+ lmc_modereg_params2.s.vref_range_00);
+
+ VB_PRT(VBL_DEV, "%-45s : 0x%x,0x%x,0x%x,0x%x\n", "VREF_VALUE",
+ lmc_modereg_params2.s.vref_value_11,
+ lmc_modereg_params2.s.vref_value_10,
+ lmc_modereg_params2.s.vref_value_01,
+ lmc_modereg_params2.s.vref_value_00);
+ }
+
+ lmc_comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num));
+ if (ddr_rodt_ctl_auto)
+ lmc_comp_ctl2.s.rodt_ctl = auto_rodt_ctl;
+ else
+ lmc_comp_ctl2.s.rodt_ctl = default_rodt_ctl; // back to the original setting
+ DRAM_CSR_WRITE(node, BDK_LMCX_COMP_CTL2(ddr_interface_num), lmc_comp_ctl2.u);
+ lmc_comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num));
+ VB_PRT(VBL_DEV, "Read ODT_CTL : 0x%x (%d ohms)\n",
+ lmc_comp_ctl2.s.rodt_ctl, imp_values->rodt_ohms[lmc_comp_ctl2.s.rodt_ctl]);
+
+ ////////////////// this is the start of the RANK MAJOR LOOP
+
+ for (rankx = 0; rankx < dimm_count * 4; rankx++) {
+ int best_rank_score = DEFAULT_BEST_RANK_SCORE; /* Start with an arbitrarily high score */
+ int best_rank_rtt_nom = 0;
+ //int best_rank_nom_ohms = 0;
+ int best_rank_ctl = 0;
+ int best_rank_ohms = 0;
+ int best_rankx = 0;
+
+ if (!(rank_mask & (1 << rankx)))
+ continue;
+
+ /* Use the delays associated with the best score for each individual rank */
+ VB_PRT(VBL_TME, "Evaluating Read-Leveling Scoreboard for Rank %d settings.\n", rankx);
+
+ // some of the rank-related loops below need to operate only on the ranks of a single DIMM,
+ // so create a mask for their use here
+ int dimm_rank_mask;
+ if (num_ranks == 4)
+ dimm_rank_mask = rank_mask; // should be 1111
+ else {
+ dimm_rank_mask = rank_mask & 3; // should be 01 or 11
+ if (rankx >= 2)
+ dimm_rank_mask <<= 2; // doing a rank on the second DIMM, should be 0100 or 1100
+ }
+ debug_print("DIMM rank mask: 0x%x, rank mask: 0x%x, rankx: %d\n", dimm_rank_mask, rank_mask, rankx);
+
+ ////////////////// this is the start of the BEST ROW SCORE LOOP
+
+ for (rtt_idx = min_rtt_nom_idx; rtt_idx <= max_rtt_nom_idx; ++rtt_idx) {
+ //int rtt_nom_ohms;
+ rtt_nom = imp_values->rtt_nom_table[rtt_idx];
+ //rtt_nom_ohms = imp_values->rtt_nom_ohms[rtt_nom];
+
+ /* When the read ODT mask is zero the dyn_rtt_nom_mask is
+ zero than RTT_NOM will not be changing during
+ read-leveling. Since the value is fixed we only need
+ to test it once. */
+ if ((dyn_rtt_nom_mask == 0) && (rtt_idx != min_rtt_nom_idx))
+ continue;
+
+ debug_print("N%d.LMC%d.R%d: starting RTT_NOM %d (%d)\n",
+ node, ddr_interface_num, rankx, rtt_nom, rtt_nom_ohms);
+
+ for (rodt_ctl = max_rodt_ctl; rodt_ctl >= min_rodt_ctl; --rodt_ctl) {
+ int next_ohms = imp_values->rodt_ohms[rodt_ctl];
+
+ // skip RODT rows in mask, but *NOT* rows with too high a score;
+ // we will not use the skipped ones for printing or evaluating, but
+ // we need to allow all the non-skipped ones to be candidates for "best"
+ if (((1 << rodt_ctl) & rodt_row_skip_mask) != 0) {
+ debug_print("N%d.LMC%d.R%d: SKIPPING rodt:%d (%d) with rank_score:%d\n",
+ node, ddr_interface_num, rankx, rodt_ctl, next_ohms, next_score);
+ continue;
+ }
+ for (int orankx = 0; orankx < dimm_count * 4; orankx++) { // this is ROFFIX-0528
+ if (!(dimm_rank_mask & (1 << orankx))) // stay on the same DIMM
+ continue;
+
+ int next_score = rlevel_scoreboard[rtt_nom][rodt_ctl][orankx].score;
+
+ if (next_score > best_rank_score) // always skip a higher score
+ continue;
+ if (next_score == best_rank_score) { // if scores are equal
+ if (next_ohms < best_rank_ohms) // always skip lower ohms
+ continue;
+ if (next_ohms == best_rank_ohms) { // if same ohms
+ if (orankx != rankx) // always skip the other rank(s)
+ continue;
+ }
+ // else next_ohms are greater, always choose it
+ }
+ // else next_score is less than current best, so always choose it
+ VB_PRT(VBL_DEV2, "N%d.LMC%d.R%d: new best score: rank %d, rodt %d(%3d), new best %d, previous best %d(%d)\n",
+ node, ddr_interface_num, rankx, orankx, rodt_ctl, next_ohms, next_score,
+ best_rank_score, best_rank_ohms);
+ best_rank_score = next_score;
+ best_rank_rtt_nom = rtt_nom;
+ //best_rank_nom_ohms = rtt_nom_ohms;
+ best_rank_ctl = rodt_ctl;
+ best_rank_ohms = next_ohms;
+ best_rankx = orankx;
+ lmc_rlevel_rank.u = rlevel_scoreboard[rtt_nom][rodt_ctl][orankx].setting;
+
+ } /* for (int orankx = 0; orankx < dimm_count * 4; orankx++) */
+ } /* for (rodt_ctl = max_rodt_ctl; rodt_ctl >= min_rodt_ctl; --rodt_ctl) */
+ } /* for (rtt_idx = min_rtt_nom_idx; rtt_idx <= max_rtt_nom_idx; ++rtt_idx) */
+
+ ////////////////// this is the end of the BEST ROW SCORE LOOP
+
+ // DANGER, Will Robinson!! Abort now if we did not find a best score at all...
+ if (best_rank_score == DEFAULT_BEST_RANK_SCORE) {
+ error_print("WARNING: no best rank score found for N%d.LMC%d.R%d - resetting node...\n",
+ node, ddr_interface_num, rankx);
+ bdk_wait_usec(500000);
+ bdk_reset_chip(node);
+ }
+
+ // FIXME: relative now, but still arbitrary...
+ // halve the range if 2 DIMMs unless they are single rank...
+ int MAX_RANK_SCORE = best_rank_score;
+ MAX_RANK_SCORE += (MAX_RANK_SCORE_LIMIT / ((num_ranks > 1) ? dimm_count : 1));
+
+ if (!ecc_ena){
+ lmc_rlevel_rank.cn83xx.byte8 = lmc_rlevel_rank.cn83xx.byte0; /* ECC is not used */
+ }
+
+ // at the end, write the best row settings to the current rank
+ DRAM_CSR_WRITE(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx), lmc_rlevel_rank.u);
+ lmc_rlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx));
+
+ bdk_lmcx_rlevel_rankx_t saved_rlevel_rank;
+ saved_rlevel_rank.u = lmc_rlevel_rank.u;
+
+ ////////////////// this is the start of the PRINT LOOP
+
+ // for pass==0, print current rank, pass==1 print other rank(s)
+ // this is done because we want to show each ranks RODT values together, not interlaced
+#if COUNT_RL_CANDIDATES
+ // keep separates for ranks - pass=0 target rank, pass=1 other rank on DIMM
+ int mask_skipped[2] = {0,0};
+ int score_skipped[2] = {0,0};
+ int selected_rows[2] = {0,0};
+ int zero_scores[2] = {0,0};
+#endif /* COUNT_RL_CANDIDATES */
+ for (int pass = 0; pass < 2; pass++ ) {
+ for (int orankx = 0; orankx < dimm_count * 4; orankx++) {
+ if (!(dimm_rank_mask & (1 << orankx))) // stay on the same DIMM
+ continue;
+
+ if (((pass == 0) && (orankx != rankx)) || ((pass != 0) && (orankx == rankx)))
+ continue;
+
+ for (rtt_idx = min_rtt_nom_idx; rtt_idx <= max_rtt_nom_idx; ++rtt_idx) {
+ rtt_nom = imp_values->rtt_nom_table[rtt_idx];
+ if (dyn_rtt_nom_mask == 0) {
+ print_nom_ohms = -1;
+ if (rtt_idx != min_rtt_nom_idx)
+ continue;
+ } else {
+ print_nom_ohms = imp_values->rtt_nom_ohms[rtt_nom];
+ }
+
+ // cycle through all the RODT values...
+ for (rodt_ctl = max_rodt_ctl; rodt_ctl >= min_rodt_ctl; --rodt_ctl) {
+ bdk_lmcx_rlevel_rankx_t temp_rlevel_rank;
+ int temp_score = rlevel_scoreboard[rtt_nom][rodt_ctl][orankx].score;
+ temp_rlevel_rank.u = rlevel_scoreboard[rtt_nom][rodt_ctl][orankx].setting;
+
+ // skip RODT rows in mask, or rows with too high a score;
+ // we will not use them for printing or evaluating...
+#if COUNT_RL_CANDIDATES
+ int skip_row;
+ if ((1 << rodt_ctl) & rodt_row_skip_mask) {
+ skip_row = WITH_RODT_SKIPPING;
+ ++mask_skipped[pass];
+ } else if (temp_score > MAX_RANK_SCORE) {
+ skip_row = WITH_RODT_SKIPPING;
+ ++score_skipped[pass];
+ } else {
+ skip_row = WITH_RODT_BLANK;
+ ++selected_rows[pass];
+ if (temp_score == 0)
+ ++zero_scores[pass];
+ }
+
+#else /* COUNT_RL_CANDIDATES */
+ int skip_row = (((1 << rodt_ctl) & rodt_row_skip_mask) || (temp_score > MAX_RANK_SCORE))
+ ? WITH_RODT_SKIPPING: WITH_RODT_BLANK;
+#endif /* COUNT_RL_CANDIDATES */
+
+ // identify and print the BEST ROW when it comes up
+ if ((skip_row == WITH_RODT_BLANK) &&
+ (best_rankx == orankx) &&
+ (best_rank_rtt_nom == rtt_nom) &&
+ (best_rank_ctl == rodt_ctl))
+ {
+ skip_row = WITH_RODT_BESTROW;
+ }
+
+ display_RL_with_RODT(node, ddr_interface_num,
+ temp_rlevel_rank, orankx, temp_score,
+ print_nom_ohms,
+ imp_values->rodt_ohms[rodt_ctl],
+ skip_row);
+
+ } /* for (rodt_ctl = max_rodt_ctl; rodt_ctl >= min_rodt_ctl; --rodt_ctl) */
+ } /* for (rtt_idx=min_rtt_nom_idx; rtt_idx<=max_rtt_nom_idx; ++rtt_idx) */
+ } /* for (int orankx = 0; orankx < dimm_count * 4; orankx++) { */
+ } /* for (int pass = 0; pass < 2; pass++ ) */
+#if COUNT_RL_CANDIDATES
+ VB_PRT(VBL_TME, "N%d.LMC%d.R%d: RLROWS: selected %d+%d, zero_scores %d+%d, mask_skipped %d+%d, score_skipped %d+%d\n",
+ node, ddr_interface_num, rankx,
+ selected_rows[0], selected_rows[1],
+ zero_scores[0], zero_scores[1],
+ mask_skipped[0], mask_skipped[1],
+ score_skipped[0], score_skipped[1]);
+#endif /* COUNT_RL_CANDIDATES */
+
+ ////////////////// this is the end of the PRINT LOOP
+
+ // now evaluate which bytes need adjusting
+ uint64_t byte_msk = 0x3f; // 6-bit fields
+ uint64_t best_byte, new_byte, temp_byte, orig_best_byte;
+
+ uint64_t rank_best_bytes[9]; // collect the new byte values; first init with current best for neighbor use
+ for (int byte_idx = 0, byte_sh = 0; byte_idx < 8+ecc_ena; byte_idx++, byte_sh += 6) {
+ rank_best_bytes[byte_idx] = (lmc_rlevel_rank.u >> byte_sh) & byte_msk;
+ }
+
+ ////////////////// this is the start of the BEST BYTE LOOP
+
+ for (int byte_idx = 0, byte_sh = 0; byte_idx < 8+ecc_ena; byte_idx++, byte_sh += 6) {
+ best_byte = orig_best_byte = rank_best_bytes[byte_idx];
+
+ ////////////////// this is the start of the BEST BYTE AVERAGING LOOP
+
+ // validate the initial "best" byte by looking at the average of the unskipped byte-column entries
+ // we want to do this before we go further, so we can try to start with a better initial value
+ // this is the so-called "BESTBUY" patch set
+ int sum = 0, count = 0;
+
+ for (rtt_idx = min_rtt_nom_idx; rtt_idx <= max_rtt_nom_idx; ++rtt_idx) {
+ rtt_nom = imp_values->rtt_nom_table[rtt_idx];
+ if ((dyn_rtt_nom_mask == 0) && (rtt_idx != min_rtt_nom_idx))
+ continue;
+
+ for (rodt_ctl = max_rodt_ctl; rodt_ctl >= min_rodt_ctl; --rodt_ctl) {
+ bdk_lmcx_rlevel_rankx_t temp_rlevel_rank;
+ int temp_score;
+ for (int orankx = 0; orankx < dimm_count * 4; orankx++) { // average over all the ranks
+ if (!(dimm_rank_mask & (1 << orankx))) // stay on the same DIMM
+ continue;
+ temp_score = rlevel_scoreboard[rtt_nom][rodt_ctl][orankx].score;
+ // skip RODT rows in mask, or rows with too high a score;
+ // we will not use them for printing or evaluating...
+
+ if (!((1 << rodt_ctl) & rodt_row_skip_mask) &&
+ (temp_score <= MAX_RANK_SCORE))
+ {
+ temp_rlevel_rank.u = rlevel_scoreboard[rtt_nom][rodt_ctl][orankx].setting;
+ temp_byte = (temp_rlevel_rank.u >> byte_sh) & byte_msk;
+ sum += temp_byte;
+ count++;
+ }
+ } /* for (int orankx = 0; orankx < dimm_count * 4; orankx++) */
+ } /* for (rodt_ctl = max_rodt_ctl; rodt_ctl >= min_rodt_ctl; --rodt_ctl) */
+ } /* for (rtt_idx=min_rtt_nom_idx; rtt_idx<=max_rtt_nom_idx; ++rtt_idx) */
+
+ ////////////////// this is the end of the BEST BYTE AVERAGING LOOP
+
+
+ uint64_t avg_byte = divide_nint(sum, count); // FIXME: validate count and sum??
+ int avg_diff = (int)best_byte - (int)avg_byte;
+ new_byte = best_byte;
+ if (avg_diff != 0) {
+ // bump best up/dn by 1, not necessarily all the way to avg
+ new_byte = best_byte + ((avg_diff > 0) ? -1: 1);
+ }
+
+ VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: START: Byte %d: best %d is different by %d from average %d, using %d.\n",
+ node, ddr_interface_num, rankx,
+ byte_idx, (int)best_byte, avg_diff, (int)avg_byte, (int)new_byte);
+ best_byte = new_byte;
+
+ // At this point best_byte is either:
+ // 1. the original byte-column value from the best scoring RODT row, OR
+ // 2. that value bumped toward the average of all the byte-column values
+ //
+ // best_byte will not change from here on...
+
+ ////////////////// this is the start of the BEST BYTE COUNTING LOOP
+
+ // NOTE: we do this next loop separately from above, because we count relative to "best_byte"
+ // which may have been modified by the above averaging operation...
+ //
+ // Also, the above only moves toward the average by +- 1, so that we will always have a count
+ // of at least 1 for the original best byte, even if all the others are further away and not counted;
+ // this ensures we will go back to the original if no others are counted...
+ // FIXME: this could cause issue if the range of values for a byte-lane are too disparate...
+ int count_less = 0, count_same = 0, count_more = 0;
+#if FAILSAFE_CHECK
+ uint64_t count_byte = new_byte; // save the value we will count around
+#endif /* FAILSAFE_CHECK */
+#if RANK_MAJORITY
+ int rank_less = 0, rank_same = 0, rank_more = 0;
+#endif /* RANK_MAJORITY */
+
+ for (rtt_idx = min_rtt_nom_idx; rtt_idx <= max_rtt_nom_idx; ++rtt_idx) {
+ rtt_nom = imp_values->rtt_nom_table[rtt_idx];
+ if ((dyn_rtt_nom_mask == 0) && (rtt_idx != min_rtt_nom_idx))
+ continue;
+
+ for (rodt_ctl = max_rodt_ctl; rodt_ctl >= min_rodt_ctl; --rodt_ctl) {
+ bdk_lmcx_rlevel_rankx_t temp_rlevel_rank;
+ int temp_score;
+ for (int orankx = 0; orankx < dimm_count * 4; orankx++) { // count over all the ranks
+ if (!(dimm_rank_mask & (1 << orankx))) // stay on the same DIMM
+ continue;
+ temp_score = rlevel_scoreboard[rtt_nom][rodt_ctl][orankx].score;
+ // skip RODT rows in mask, or rows with too high a score;
+ // we will not use them for printing or evaluating...
+ if (((1 << rodt_ctl) & rodt_row_skip_mask) ||
+ (temp_score > MAX_RANK_SCORE))
+ {
+ continue;
+ }
+ temp_rlevel_rank.u = rlevel_scoreboard[rtt_nom][rodt_ctl][orankx].setting;
+ temp_byte = (temp_rlevel_rank.u >> byte_sh) & byte_msk;
+
+ if (temp_byte == 0) // do not count it if illegal
+ continue;
+ else if (temp_byte == best_byte)
+ count_same++;
+ else if (temp_byte == best_byte - 1)
+ count_less++;
+ else if (temp_byte == best_byte + 1)
+ count_more++;
+ // else do not count anything more than 1 away from the best
+#if RANK_MAJORITY
+ // FIXME? count is relative to best_byte; should it be rank-based?
+ if (orankx != rankx) // rank counts only on main rank
+ continue;
+ else if (temp_byte == best_byte)
+ rank_same++;
+ else if (temp_byte == best_byte - 1)
+ rank_less++;
+ else if (temp_byte == best_byte + 1)
+ rank_more++;
+#endif /* RANK_MAJORITY */
+ } /* for (int orankx = 0; orankx < dimm_count * 4; orankx++) */
+ } /* for (rodt_ctl = max_rodt_ctl; rodt_ctl >= min_rodt_ctl; --rodt_ctl) */
+ } /* for (rtt_idx=min_rtt_nom_idx; rtt_idx<=max_rtt_nom_idx; ++rtt_idx) */
+
+#if RANK_MAJORITY
+ VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: COUNT: Byte %d: orig %d now %d, more %d same %d less %d (%d/%d/%d)\n",
+ node, ddr_interface_num, rankx,
+ byte_idx, (int)orig_best_byte, (int)best_byte,
+ count_more, count_same, count_less,
+ rank_more, rank_same, rank_less);
+#else /* RANK_MAJORITY */
+ VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: COUNT: Byte %d: orig %d now %d, more %d same %d less %d\n",
+ node, ddr_interface_num, rankx,
+ byte_idx, (int)orig_best_byte, (int)best_byte,
+ count_more, count_same, count_less);
+#endif /* RANK_MAJORITY */
+ ////////////////// this is the end of the BEST BYTE COUNTING LOOP
+
+ // choose the new byte value
+ // we need to check that there is no gap greater than 2 between adjacent bytes
+ // (adjacency depends on DIMM type)
+ // use the neighbor value to help decide
+ // initially, the rank_best_bytes[] will contain values from the chosen lowest score rank
+ new_byte = 0;
+
+ // neighbor is index-1 unless we are index 0 or index 8 (ECC)
+ int neighbor = (byte_idx == 8) ? 3 : ((byte_idx == 0) ? 1 : byte_idx - 1);
+ uint64_t neigh_byte = rank_best_bytes[neighbor];
+
+
+ // can go up or down or stay the same, so look at a numeric average to help
+ new_byte = divide_nint(((count_more * (best_byte + 1)) +
+ (count_same * (best_byte + 0)) +
+ (count_less * (best_byte - 1))),
+ max(1, (count_more + count_same + count_less)));
+
+ // use neighbor to help choose with average
+ if ((byte_idx > 0) && (_abs(neigh_byte - new_byte) > 2)) // but not for byte 0
+ {
+ uint64_t avg_pick = new_byte;
+ if ((new_byte - best_byte) != 0)
+ new_byte = best_byte; // back to best, average did not get better
+ else // avg was the same, still too far, now move it towards the neighbor
+ new_byte += (neigh_byte > new_byte) ? 1 : -1;
+
+ VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: AVERAGE: Byte %d: neighbor %d too different %d from average %d, picking %d.\n",
+ node, ddr_interface_num, rankx,
+ byte_idx, neighbor, (int)neigh_byte, (int)avg_pick, (int)new_byte);
+ }
+#if MAJORITY_OVER_AVG
+ // NOTE:
+ // For now, we let the neighbor processing above trump the new simple majority processing here.
+ // This is mostly because we have seen no smoking gun for a neighbor bad choice (yet?).
+ // Also note that we will ALWAYS be using byte 0 majority, because of the if clause above.
+ else {
+ // majority is dependent on the counts, which are relative to best_byte, so start there
+ uint64_t maj_byte = best_byte;
+ if ((count_more > count_same) && (count_more > count_less)) {
+ maj_byte++;
+ } else if ((count_less > count_same) && (count_less > count_more)) {
+ maj_byte--;
+ }
+ if (maj_byte != new_byte) {
+ // print only when majority choice is different from average
+ VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: MAJORTY: Byte %d: picking majority of %d over average %d.\n",
+ node, ddr_interface_num, rankx,
+ byte_idx, (int)maj_byte, (int)new_byte);
+ new_byte = maj_byte;
+ } else {
+ VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: AVERAGE: Byte %d: picking average of %d.\n",
+ node, ddr_interface_num, rankx,
+ byte_idx, (int)new_byte);
+ }
+#if RANK_MAJORITY
+ // rank majority is dependent on the rank counts, which are relative to best_byte,
+ // so start there, and adjust according to the rank counts majority
+ uint64_t rank_maj = best_byte;
+ if ((rank_more > rank_same) && (rank_more > rank_less)) {
+ rank_maj++;
+ } else if ((rank_less > rank_same) && (rank_less > rank_more)) {
+ rank_maj--;
+ }
+ int rank_sum = rank_more + rank_same + rank_less;
+
+ // now, let rank majority possibly rule over the current new_byte however we got it
+ if (rank_maj != new_byte) { // only if different
+ // Here is where we decide whether to completely apply RANK_MAJORITY or not
+ // FIXME: For the moment, we do it ONLY when running 2-slot configs
+ // FIXME? or when rank_sum is big enough?
+ if ((dimm_count > 1) || (rank_sum > 2)) {
+ // print only when rank majority choice is selected
+ VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: RANKMAJ: Byte %d: picking %d over %d.\n",
+ node, ddr_interface_num, rankx,
+ byte_idx, (int)rank_maj, (int)new_byte);
+ new_byte = rank_maj;
+ } else { // FIXME: print some info when we could have chosen RANKMAJ but did not
+ VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: RANKMAJ: Byte %d: NOT using %d over %d (best=%d,sum=%d).\n",
+ node, ddr_interface_num, rankx,
+ byte_idx, (int)rank_maj, (int)new_byte,
+ (int)best_byte, rank_sum);
+ }
+ }
+#endif /* RANK_MAJORITY */
+ }
+#else
+ else {
+ VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: AVERAGE: Byte %d: picking average of %d.\n",
+ node, ddr_interface_num, rankx,
+ byte_idx, (int)new_byte);
+ }
+#endif
+#if FAILSAFE_CHECK
+ // one last check:
+ // if new_byte is still count_byte, BUT there was no count for that value, DO SOMETHING!!!
+ // FIXME: go back to original best byte from the best row
+ if ((new_byte == count_byte) && (count_same == 0)) {
+ new_byte = orig_best_byte;
+ VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: FAILSAF: Byte %d: going back to original %d.\n",
+ node, ddr_interface_num, rankx,
+ byte_idx, (int)new_byte);
+ }
+#endif /* FAILSAFE_CHECK */
+#if PERFECT_BITMASK_COUNTING
+ // Look at counts for "perfect" bitmasks if we had any for this byte-lane.
+ // Remember, we only counted for DDR4, so zero means none or DDR3, and we bypass this...
+ if (rank_perfect_counts[rankx].total[byte_idx] > 0) {
+ // FIXME: should be more error checking, look for ties, etc...
+ int i, delay_count, delay_value, delay_max;
+ uint32_t ties;
+ delay_value = -1;
+ delay_max = 0;
+ ties = 0;
+
+ for (i = 0; i < 32; i++) {
+ delay_count = rank_perfect_counts[rankx].count[byte_idx][i];
+ if (delay_count > 0) { // only look closer if there are any,,,
+ if (delay_count > delay_max) {
+ delay_max = delay_count;
+ delay_value = i;
+ ties = 0; // reset ties to none
+ } else if (delay_count == delay_max) {
+ if (ties == 0)
+ ties = 1UL << delay_value; // put in original value
+ ties |= 1UL << i; // add new value
+ }
+ }
+ } /* for (i = 0; i < 32; i++) */
+
+ if (delay_value >= 0) {
+ if (ties != 0) {
+ if (ties & (1UL << (int)new_byte)) {
+ // leave choice as new_byte if any tied one is the same...
+
+
+ delay_value = (int)new_byte;
+ VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: PERFECT: Byte %d: TIES (0x%x) INCLUDED %d (%d)\n",
+ node, ddr_interface_num, rankx, byte_idx, ties, (int)new_byte, delay_max);
+ } else {
+ // FIXME: should choose a perfect one!!!
+ // FIXME: for now, leave the choice as new_byte
+ delay_value = (int)new_byte;
+ VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: PERFECT: Byte %d: TIES (0x%x) OMITTED %d (%d)\n",
+ node, ddr_interface_num, rankx, byte_idx, ties, (int)new_byte, delay_max);
+ }
+ } /* if (ties != 0) */
+
+ if (delay_value != (int)new_byte) {
+ delay_count = rank_perfect_counts[rankx].count[byte_idx][(int)new_byte];
+ VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: PERFECT: Byte %d: DIFF from %d (%d), USING %d (%d)\n",
+ node, ddr_interface_num, rankx, byte_idx, (int)new_byte,
+ delay_count, delay_value, delay_max);
+ new_byte = (uint64_t)delay_value; // FIXME: make this optional via envvar?
+ } else {
+ debug_print("N%d.LMC%d.R%d: PERFECT: Byte %d: SAME as %d (%d)\n",
+ node, ddr_interface_num, rankx, byte_idx, new_byte, delay_max);
+ }
+ }
+ } /* if (rank_perfect_counts[rankx].total[byte_idx] > 0) */
+ else {
+ if (ddr_type == DDR4_DRAM) { // only report when DDR4
+ // FIXME: remove or increase VBL for this output...
+ VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: PERFECT: Byte %d: ZERO perfect bitmasks\n",
+ node, ddr_interface_num, rankx, byte_idx);
+ }
+ } /* if (rank_perfect_counts[rankx].total[byte_idx] > 0) */
+#endif /* PERFECT_BITMASK_COUNTING */
+
+ VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: SUMMARY: Byte %d: %s: orig %d now %d, more %d same %d less %d, using %d\n",
+ node, ddr_interface_num, rankx,
+ byte_idx, "AVG", (int)orig_best_byte,
+ (int)best_byte, count_more, count_same, count_less, (int)new_byte);
+
+ // update the byte with the new value (NOTE: orig value in the CSR may not be current "best")
+ lmc_rlevel_rank.u &= ~(byte_msk << byte_sh);
+ lmc_rlevel_rank.u |= (new_byte << byte_sh);
+
+ rank_best_bytes[byte_idx] = new_byte; // save new best for neighbor use
+
+ } /* for (byte_idx = 0; byte_idx < 8+ecc_ena; byte_idx++) */
+
+ ////////////////// this is the end of the BEST BYTE LOOP
+
+ if (saved_rlevel_rank.u != lmc_rlevel_rank.u) {
+ DRAM_CSR_WRITE(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx), lmc_rlevel_rank.u);
+ lmc_rlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx));
+ debug_print("Adjusting Read-Leveling per-RANK settings.\n");
+ } else {
+ debug_print("Not Adjusting Read-Leveling per-RANK settings.\n");
+ }
+ display_RL_with_final(node, ddr_interface_num, lmc_rlevel_rank, rankx);
+
+#if RLEXTRAS_PATCH
+#define RLEVEL_RANKX_EXTRAS_INCR 4
+ if ((rank_mask & 0x0F) != 0x0F) { // if there are unused entries to be filled
+ bdk_lmcx_rlevel_rankx_t temp_rlevel_rank = lmc_rlevel_rank; // copy the current rank
+ int byte, delay;
+ if (rankx < 3) {
+ debug_print("N%d.LMC%d.R%d: checking for RLEVEL_RANK unused entries.\n",
+ node, ddr_interface_num, rankx);
+ for (byte = 0; byte < 9; byte++) { // modify the copy in prep for writing to empty slot(s)
+ delay = get_rlevel_rank_struct(&temp_rlevel_rank, byte) + RLEVEL_RANKX_EXTRAS_INCR;
+ if (delay > (int)RLEVEL_BYTE_MSK) delay = RLEVEL_BYTE_MSK;
+ update_rlevel_rank_struct(&temp_rlevel_rank, byte, delay);
+ }
+ if (rankx == 0) { // if rank 0, write rank 1 and rank 2 here if empty
+ if (!(rank_mask & (1<<1))) { // check that rank 1 is empty
+ VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: writing RLEVEL_RANK unused entry R%d.\n",
+ node, ddr_interface_num, rankx, 1);
+ DRAM_CSR_WRITE(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, 1), temp_rlevel_rank.u);
+ }
+ if (!(rank_mask & (1<<2))) { // check that rank 2 is empty
+ VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: writing RLEVEL_RANK unused entry R%d.\n",
+ node, ddr_interface_num, rankx, 2);
+ DRAM_CSR_WRITE(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, 2), temp_rlevel_rank.u);
+ }
+ }
+ // if ranks 0, 1 or 2, write rank 3 here if empty
+ if (!(rank_mask & (1<<3))) { // check that rank 3 is empty
+ VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: writing RLEVEL_RANK unused entry R%d.\n",
+ node, ddr_interface_num, rankx, 3);
+ DRAM_CSR_WRITE(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, 3), temp_rlevel_rank.u);
+ }
+ }
+ }
+#endif /* RLEXTRAS_PATCH */
+ } /* for (rankx = 0; rankx < dimm_count * 4; rankx++) */
+
+ ////////////////// this is the end of the RANK MAJOR LOOP
+
+ } /* Evaluation block */
+ } /* while(rlevel_debug_loops--) */
+
+ lmc_control.s.ddr2t = save_ddr2t;
+ DRAM_CSR_WRITE(node, BDK_LMCX_CONTROL(ddr_interface_num), lmc_control.u);
+ lmc_control.u = BDK_CSR_READ(node, BDK_LMCX_CONTROL(ddr_interface_num));
+ ddr_print("%-45s : %6d\n", "DDR2T", lmc_control.s.ddr2t); /* Display final 2T value */
+
+
+ perform_ddr_init_sequence(node, rank_mask, ddr_interface_num);
+
+ for (rankx = 0; rankx < dimm_count * 4;rankx++) {
+ uint64_t value;
+ int parameter_set = 0;
+ if (!(rank_mask & (1 << rankx)))
+ continue;
+
+ lmc_rlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx));
+
+ for (i=0; i<9; ++i) {
+ if ((s = lookup_env_parameter("ddr%d_rlevel_rank%d_byte%d", ddr_interface_num, rankx, i)) != NULL) {
+ parameter_set |= 1;
+ value = strtoul(s, NULL, 0);
+
+ update_rlevel_rank_struct(&lmc_rlevel_rank, i, value);
+ }
+ }
+
+ if ((s = lookup_env_parameter_ull("ddr%d_rlevel_rank%d", ddr_interface_num, rankx)) != NULL) {
+ parameter_set |= 1;
+ value = strtoull(s, NULL, 0);
+ lmc_rlevel_rank.u = value;
+ }
+
+
+ if (bdk_is_platform(BDK_PLATFORM_ASIM)) {
+ parameter_set |= 1;
+
+ lmc_rlevel_rank.cn83xx.byte8 = 3;
+ lmc_rlevel_rank.cn83xx.byte7 = 3;
+ lmc_rlevel_rank.cn83xx.byte6 = 3;
+ lmc_rlevel_rank.cn83xx.byte5 = 3;
+ lmc_rlevel_rank.cn83xx.byte4 = 3;
+ lmc_rlevel_rank.cn83xx.byte3 = 3;
+ lmc_rlevel_rank.cn83xx.byte2 = 3;
+ lmc_rlevel_rank.cn83xx.byte1 = 3;
+ lmc_rlevel_rank.cn83xx.byte0 = 3;
+ }
+
+ if (parameter_set) {
+ DRAM_CSR_WRITE(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx), lmc_rlevel_rank.u);
+ display_RL(node, ddr_interface_num, lmc_rlevel_rank, rankx);
+ }
+ }
+ }
+
+ /* Workaround Trcd overflow by using Additive latency. */
+ if (CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X))
+ {
+ bdk_lmcx_modereg_params0_t lmc_modereg_params0;
+ bdk_lmcx_timing_params1_t lmc_timing_params1;
+ bdk_lmcx_control_t lmc_control;
+ int rankx;
+
+ lmc_timing_params1.u = BDK_CSR_READ(node, BDK_LMCX_TIMING_PARAMS1(ddr_interface_num));
+ lmc_modereg_params0.u = BDK_CSR_READ(node, BDK_LMCX_MODEREG_PARAMS0(ddr_interface_num));
+ lmc_control.u = BDK_CSR_READ(node, BDK_LMCX_CONTROL(ddr_interface_num));
+
+ if (lmc_timing_params1.s.trcd == 0) {
+ ddr_print("Workaround Trcd overflow by using Additive latency.\n");
+ lmc_timing_params1.s.trcd = 12; /* Hard code this to 12 and enable additive latency */
+ lmc_modereg_params0.s.al = 2; /* CL-2 */
+ lmc_control.s.pocas = 1;
+
+ ddr_print("MODEREG_PARAMS0 : 0x%016lx\n", lmc_modereg_params0.u);
+ DRAM_CSR_WRITE(node, BDK_LMCX_MODEREG_PARAMS0(ddr_interface_num), lmc_modereg_params0.u);
+ ddr_print("TIMING_PARAMS1 : 0x%016lx\n", lmc_timing_params1.u);
+ DRAM_CSR_WRITE(node, BDK_LMCX_TIMING_PARAMS1(ddr_interface_num), lmc_timing_params1.u);
+
+ ddr_print("LMC_CONTROL : 0x%016lx\n", lmc_control.u);
+ DRAM_CSR_WRITE(node, BDK_LMCX_CONTROL(ddr_interface_num), lmc_control.u);
+
+ for (rankx = 0; rankx < dimm_count * 4; rankx++) {
+ if (!(rank_mask & (1 << rankx)))
+ continue;
+
+ ddr4_mrw(node, ddr_interface_num, rankx, -1, 1, 0); /* MR1 */
+ }
+ }
+ }
+
+ // this is here just for output, to allow check of the Deskew settings one last time...
+ if (! disable_deskew_training) {
+ deskew_counts_t dsk_counts;
+ VB_PRT(VBL_TME, "N%d.LMC%d: Check Deskew Settings before software Write-Leveling.\n",
+ node, ddr_interface_num);
+ Validate_Read_Deskew_Training(node, rank_mask, ddr_interface_num, &dsk_counts, VBL_TME); // TME for FINAL
+ }
+
+
+ /* Workaround Errata 26304 (T88@2.0)
+
+ When the CSRs LMCX_DLL_CTL3[WR_DESKEW_ENA] = 1 AND
+ LMCX_PHY_CTL2[DQS[0..8]_DSK_ADJ] > 4, set
+ LMCX_EXT_CONFIG[DRIVE_ENA_BPRCH] = 1.
+ */
+ if (CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS2_X)) { // only for 88XX pass 2, not 81xx or 83xx
+ bdk_lmcx_dll_ctl3_t dll_ctl3;
+ bdk_lmcx_phy_ctl2_t phy_ctl2;
+ bdk_lmcx_ext_config_t ext_config;
+ int increased_dsk_adj = 0;
+ int byte;
+
+ phy_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_PHY_CTL2(ddr_interface_num));
+ ext_config.u = BDK_CSR_READ(node, BDK_LMCX_EXT_CONFIG(ddr_interface_num));
+ dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num));
+
+ for (byte = 0; byte < 8; ++byte) {
+ if (!(ddr_interface_bytemask&(1<<byte)))
+ continue;
+ increased_dsk_adj |= (((phy_ctl2.u >> (byte*3)) & 0x7) > 4);
+ }
+
+ if ((dll_ctl3.s.wr_deskew_ena == 1) && increased_dsk_adj) {
+ ext_config.s.drive_ena_bprch = 1;
+ DRAM_CSR_WRITE(node, BDK_LMCX_EXT_CONFIG(ddr_interface_num),
+ ext_config.u);
+ }
+ }
+
+ /*
+ * 6.9.13 DRAM Vref Training for DDR4
+ *
+ * This includes software write-leveling
+ */
+
+ { // Software Write-Leveling block
+
+ /* Try to determine/optimize write-level delays experimentally. */
+#pragma pack(push,1)
+ bdk_lmcx_wlevel_rankx_t lmc_wlevel_rank;
+ bdk_lmcx_wlevel_rankx_t lmc_wlevel_rank_hw_results;
+ int byte;
+ int delay;
+ int rankx = 0;
+ int active_rank;
+#if !DISABLE_SW_WL_PASS_2
+ bdk_lmcx_rlevel_rankx_t lmc_rlevel_rank;
+ int sw_wlevel_offset = 1;
+#endif
+ int sw_wlevel_enable = 1; /* FIX... Should be customizable. */
+ int interfaces;
+ int measured_vref_flag;
+ typedef enum {
+ WL_ESTIMATED = 0, /* HW/SW wleveling failed. Results
+ estimated. */
+ WL_HARDWARE = 1, /* H/W wleveling succeeded */
+ WL_SOFTWARE = 2, /* S/W wleveling passed 2 contiguous
+ settings. */
+ WL_SOFTWARE1 = 3, /* S/W wleveling passed 1 marginal
+ setting. */
+ } sw_wl_status_t;
+
+ static const char *wl_status_strings[] = {
+ "(e)",
+ " ",
+ " ",
+ "(1)"
+ };
+ int sw_wlevel_hw_default = 1; // FIXME: make H/W assist the default now
+#pragma pack(pop)
+
+ if ((s = lookup_env_parameter("ddr_sw_wlevel_hw")) != NULL) {
+ sw_wlevel_hw_default = !!strtoul(s, NULL, 0);
+ }
+
+ // cannot use hw-assist when doing 32-bit
+ if (! ddr_interface_64b) {
+ sw_wlevel_hw_default = 0;
+ }
+
+ if ((s = lookup_env_parameter("ddr_software_wlevel")) != NULL) {
+ sw_wlevel_enable = strtoul(s, NULL, 0);
+ }
+
+#if SWL_WITH_HW_ALTS_CHOOSE_SW
+ // Choose the SW algo for SWL if any HWL alternates were found
+ // NOTE: we have to do this here, and for all, since HW-assist including ECC requires ECC enable
+ for (rankx = 0; rankx < dimm_count * 4; rankx++) {
+ if (!sw_wlevel_enable)
+ break;
+ if (!(rank_mask & (1 << rankx)))
+ continue;
+
+ // if we are doing HW-assist, and there are alternates, switch to SW-algorithm for all
+ if (sw_wlevel_hw && hwl_alts[rankx].hwl_alt_mask) {
+ ddr_print("N%d.LMC%d.R%d: Using SW algorithm for write-leveling this rank\n",
+ node, ddr_interface_num, rankx);
+ sw_wlevel_hw_default = 0;
+ break;
+ }
+ } /* for (rankx = 0; rankx < dimm_count * 4; rankx++) */
+#endif
+
+ /* Get the measured_vref setting from the config, check for an override... */
+ /* NOTE: measured_vref=1 (ON) means force use of MEASURED Vref... */
+ // NOTE: measured VREF can only be done for DDR4
+ if (ddr_type == DDR4_DRAM) {
+ measured_vref_flag = custom_lmc_config->measured_vref;
+ if ((s = lookup_env_parameter("ddr_measured_vref")) != NULL) {
+ measured_vref_flag = !!strtoul(s, NULL, 0);
+ }
+ } else {
+ measured_vref_flag = 0; // OFF for DDR3
+ }
+
+ /* Ensure disabled ECC for DRAM tests using the SW algo, else leave it untouched */
+ if (!sw_wlevel_hw_default) {
+ lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(ddr_interface_num));
+ lmc_config.s.ecc_ena = 0;
+ DRAM_CSR_WRITE(node, BDK_LMCX_CONFIG(ddr_interface_num), lmc_config.u);
+ }
+
+#if USE_L2_WAYS_LIMIT
+ limit_l2_ways(node, 0, 0); /* Disable l2 sets for DRAM testing */
+#endif
+
+ /* We need to track absolute rank number, as well as how many
+ ** active ranks we have. Two single rank DIMMs show up as
+ ** ranks 0 and 2, but only 2 ranks are active. */
+ active_rank = 0;
+
+ interfaces = __builtin_popcount(ddr_interface_mask);
+
+#define VREF_RANGE1_LIMIT 0x33 // range1 is valid for 0x00 - 0x32
+#define VREF_RANGE2_LIMIT 0x18 // range2 is valid for 0x00 - 0x17
+// full window is valid for 0x00 to 0x4A
+// let 0x00 - 0x17 be range2, 0x18 - 0x4a be range 1
+#define VREF_LIMIT (VREF_RANGE1_LIMIT + VREF_RANGE2_LIMIT)
+#define VREF_FINAL (VREF_LIMIT - 1)
+
+ for (rankx = 0; rankx < dimm_count * 4; rankx++) {
+ uint64_t rank_addr;
+ int vref_value, final_vref_value, final_vref_range = 0;
+ int start_vref_value = 0, computed_final_vref_value = -1;
+ char best_vref_values_count, vref_values_count;
+ char best_vref_values_start, vref_values_start;
+
+ int bytes_failed;
+ sw_wl_status_t byte_test_status[9];
+ sw_wl_status_t sw_wl_rank_status = WL_HARDWARE;
+ int sw_wl_failed = 0;
+ int sw_wlevel_hw = sw_wlevel_hw_default;
+
+ if (!sw_wlevel_enable)
+ break;
+
+ if (!(rank_mask & (1 << rankx)))
+ continue;
+
+ ddr_print("N%d.LMC%d.R%d: Performing Software Write-Leveling %s\n",
+ node, ddr_interface_num, rankx,
+ (sw_wlevel_hw) ? "with H/W assist" : "with S/W algorithm");
+
+ if ((ddr_type == DDR4_DRAM) && (num_ranks != 4)) {
+ // always compute when we can...
+ computed_final_vref_value = compute_vref_value(node, ddr_interface_num, rankx,
+ dimm_count, num_ranks, imp_values,
+ is_stacked_die);
+ if (!measured_vref_flag) // but only use it if allowed
+ start_vref_value = VREF_FINAL; // skip all the measured Vref processing, just the final setting
+ }
+
+ /* Save off the h/w wl results */
+ lmc_wlevel_rank_hw_results.u = BDK_CSR_READ(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx));
+
+ vref_values_count = 0;
+ vref_values_start = 0;
+ best_vref_values_count = 0;
+ best_vref_values_start = 0;
+
+ /* Loop one extra time using the Final Vref value. */
+ for (vref_value = start_vref_value; vref_value < VREF_LIMIT; ++vref_value) {
+ if (ddr_type == DDR4_DRAM) {
+ if (vref_value < VREF_FINAL) {
+ int vrange, vvalue;
+ if (vref_value < VREF_RANGE2_LIMIT) {
+ vrange = 1; vvalue = vref_value;
+ } else {
+ vrange = 0; vvalue = vref_value - VREF_RANGE2_LIMIT;
+ }
+ set_vref(node, ddr_interface_num, rankx,
+ vrange, vvalue);
+ } else { /* if (vref_value < VREF_FINAL) */
+ /* Print the final Vref value first. */
+
+ /* Always print the computed first if its valid */
+ if (computed_final_vref_value >= 0) {
+ ddr_print("N%d.LMC%d.R%d: Vref Computed Summary :"
+ " %2d (0x%02x)\n",
+ node, ddr_interface_num,
+ rankx, computed_final_vref_value,
+ computed_final_vref_value);
+ }
+ if (!measured_vref_flag) { // setup to use the computed
+ best_vref_values_count = 1;
+ final_vref_value = computed_final_vref_value;
+ } else { // setup to use the measured
+ if (best_vref_values_count > 0) {
+ best_vref_values_count = max(best_vref_values_count, 2);
+#if 0
+ // NOTE: this already adjusts VREF via calculating 40% rather than 50%
+ final_vref_value = best_vref_values_start + divide_roundup((best_vref_values_count-1)*4,10);
+ ddr_print("N%d.LMC%d.R%d: Vref Training Summary :"
+ " %2d <----- %2d (0x%02x) -----> %2d range: %2d\n",
+ node, ddr_interface_num, rankx, best_vref_values_start,
+ final_vref_value, final_vref_value,
+ best_vref_values_start+best_vref_values_count-1,
+ best_vref_values_count-1);
+#else
+ final_vref_value = best_vref_values_start + divide_nint(best_vref_values_count - 1, 2);
+ if (final_vref_value < VREF_RANGE2_LIMIT) {
+ final_vref_range = 1;
+ } else {
+ final_vref_range = 0; final_vref_value -= VREF_RANGE2_LIMIT;
+ }
+ {
+ int vvlo = best_vref_values_start;
+ int vrlo;
+ if (vvlo < VREF_RANGE2_LIMIT) {
+ vrlo = 2;
+ } else {
+ vrlo = 1; vvlo -= VREF_RANGE2_LIMIT;
+ }
+
+ int vvhi = best_vref_values_start + best_vref_values_count - 1;
+ int vrhi;
+ if (vvhi < VREF_RANGE2_LIMIT) {
+ vrhi = 2;
+ } else {
+ vrhi = 1; vvhi -= VREF_RANGE2_LIMIT;
+ }
+ ddr_print("N%d.LMC%d.R%d: Vref Training Summary :"
+ " 0x%02x/%1d <----- 0x%02x/%1d -----> 0x%02x/%1d, range: %2d\n",
+ node, ddr_interface_num, rankx,
+ vvlo, vrlo,
+ final_vref_value, final_vref_range + 1,
+ vvhi, vrhi,
+ best_vref_values_count-1);
+ }
+#endif
+
+ } else {
+ /* If nothing passed use the default Vref value for this rank */
+ bdk_lmcx_modereg_params2_t lmc_modereg_params2;
+ lmc_modereg_params2.u = BDK_CSR_READ(node, BDK_LMCX_MODEREG_PARAMS2(ddr_interface_num));
+ final_vref_value = (lmc_modereg_params2.u >> (rankx * 10 + 3)) & 0x3f;
+ final_vref_range = (lmc_modereg_params2.u >> (rankx * 10 + 9)) & 0x01;
+
+ ddr_print("N%d.LMC%d.R%d: Vref Using Default :"
+ " %2d <----- %2d (0x%02x) -----> %2d, range%1d\n",
+ node, ddr_interface_num, rankx,
+ final_vref_value, final_vref_value,
+ final_vref_value, final_vref_value, final_vref_range+1);
+ }
+ }
+
+ // allow override
+ if ((s = lookup_env_parameter("ddr%d_vref_value_%1d%1d",
+ ddr_interface_num, !!(rankx&2), !!(rankx&1))) != NULL) {
+ final_vref_value = strtoul(s, NULL, 0);
+ }
+
+ set_vref(node, ddr_interface_num, rankx, final_vref_range, final_vref_value);
+
+ } /* if (vref_value < VREF_FINAL) */
+ } /* if (ddr_type == DDR4_DRAM) */
+
+ lmc_wlevel_rank.u = lmc_wlevel_rank_hw_results.u; /* Restore the saved value */
+
+ for (byte = 0; byte < 9; ++byte)
+ byte_test_status[byte] = WL_ESTIMATED;
+
+ if (wlevel_bitmask_errors == 0) {
+
+ /* Determine address of DRAM to test for pass 1 of software write leveling. */
+ rank_addr = active_rank * (1ull << (pbank_lsb - bunk_enable + (interfaces/2)));
+ // FIXME: these now put in by test_dram_byte()
+ //rank_addr |= (ddr_interface_num<<7); /* Map address into proper interface */
+ //rank_addr = bdk_numa_get_address(node, rank_addr);
+ VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: Active Rank %d Address: 0x%lx\n",
+ node, ddr_interface_num, rankx, active_rank, rank_addr);
+
+ { // start parallel write-leveling block for delay high-order bits
+ int errors = 0;
+ int byte_delay[9];
+ uint64_t bytemask;
+ int bytes_todo;
+
+ if (ddr_interface_64b) {
+ bytes_todo = (sw_wlevel_hw) ? ddr_interface_bytemask : 0xFF;
+ bytemask = ~0ULL;
+ } else { // 32-bit, must be using SW algo, only data bytes
+ bytes_todo = 0x0f;
+ bytemask = 0x00000000ffffffffULL;
+ }
+
+ for (byte = 0; byte < 9; ++byte) {
+ if (!(bytes_todo & (1 << byte))) {
+ byte_delay[byte] = 0;
+ } else {
+ byte_delay[byte] = get_wlevel_rank_struct(&lmc_wlevel_rank, byte);
+ }
+ } /* for (byte = 0; byte < 9; ++byte) */
+
+#define WL_MIN_NO_ERRORS_COUNT 3 // FIXME? three passes without errors
+ int no_errors_count = 0;
+
+ // Change verbosity if using measured vs computed VREF or DDR3
+ // measured goes many times through SWL, computed and DDR3 only once
+ // so we want the EXHAUSTED messages at NORM for computed and DDR3,
+ // and at DEV2 for measured, just for completeness
+ int vbl_local = (measured_vref_flag) ? VBL_DEV2 : VBL_NORM;
+ uint64_t bad_bits[2];
+#if ENABLE_SW_WLEVEL_UTILIZATION
+ uint64_t sum_dram_dclk = 0, sum_dram_ops = 0;
+ uint64_t start_dram_dclk, stop_dram_dclk;
+ uint64_t start_dram_ops, stop_dram_ops;
+#endif
+ do {
+ // write the current set of WL delays
+ DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx), lmc_wlevel_rank.u);
+ lmc_wlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx));
+
+ bdk_watchdog_poke();
+
+ // do the test
+ if (sw_wlevel_hw) {
+ errors = run_best_hw_patterns(node, ddr_interface_num, rank_addr,
+ DBTRAIN_TEST, bad_bits);
+ errors &= bytes_todo; // keep only the ones we are still doing
+ } else {
+#if ENABLE_SW_WLEVEL_UTILIZATION
+ start_dram_dclk = BDK_CSR_READ(node, BDK_LMCX_DCLK_CNT(ddr_interface_num));
+ start_dram_ops = BDK_CSR_READ(node, BDK_LMCX_OPS_CNT(ddr_interface_num));
+#endif
+#if USE_ORIG_TEST_DRAM_BYTE
+ errors = test_dram_byte(node, ddr_interface_num, rank_addr, bytemask, bad_bits);
+#else
+ errors = dram_tuning_mem_xor(node, ddr_interface_num, rank_addr, bytemask, bad_bits);
+#endif
+#if ENABLE_SW_WLEVEL_UTILIZATION
+ stop_dram_dclk = BDK_CSR_READ(node, BDK_LMCX_DCLK_CNT(ddr_interface_num));
+ stop_dram_ops = BDK_CSR_READ(node, BDK_LMCX_OPS_CNT(ddr_interface_num));
+ sum_dram_dclk += stop_dram_dclk - start_dram_dclk;
+ sum_dram_ops += stop_dram_ops - start_dram_ops;
+#endif
+ }
+
+ VB_PRT(VBL_DEV2, "N%d.LMC%d.R%d: S/W write-leveling TEST: returned 0x%x\n",
+ node, ddr_interface_num, rankx, errors);
+
+ // remember, errors will not be returned for byte-lanes that have maxxed out...
+ if (errors == 0) {
+ no_errors_count++; // bump
+ if (no_errors_count > 1) // bypass check/update completely
+ continue; // to end of do-while
+ } else
+ no_errors_count = 0; // reset
+
+ // check errors by byte
+ for (byte = 0; byte < 9; ++byte) {
+ if (!(bytes_todo & (1 << byte)))
+ continue;
+
+ delay = byte_delay[byte];
+ if (errors & (1 << byte)) { // yes, an error in this byte lane
+ debug_print(" byte %d delay %2d Errors\n", byte, delay);
+ // since this byte had an error, we move to the next delay value, unless maxxed out
+ delay += 8; // incr by 8 to do only delay high-order bits
+ if (delay < 32) {
+ update_wlevel_rank_struct(&lmc_wlevel_rank, byte, delay);
+ debug_print(" byte %d delay %2d New\n", byte, delay);
+ byte_delay[byte] = delay;
+ } else { // reached max delay, maybe really done with this byte
+#if SWL_TRY_HWL_ALT
+ if (!measured_vref_flag && // consider an alt only for computed VREF and
+ (hwl_alts[rankx].hwl_alt_mask & (1 << byte))) // if an alt exists...
+ {
+ int bad_delay = delay & 0x6; // just orig low-3 bits
+ delay = hwl_alts[rankx].hwl_alt_delay[byte]; // yes, use it
+ hwl_alts[rankx].hwl_alt_mask &= ~(1 << byte); // clear that flag
+ update_wlevel_rank_struct(&lmc_wlevel_rank, byte, delay);
+ byte_delay[byte] = delay;
+ debug_print(" byte %d delay %2d ALTERNATE\n", byte, delay);
+ VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: SWL: Byte %d: %d FAIL, trying ALTERNATE %d\n",
+ node, ddr_interface_num, rankx, byte, bad_delay, delay);
+
+ } else
+#endif /* SWL_TRY_HWL_ALT */
+ {
+ unsigned bits_bad;
+ if (byte < 8) {
+ bytemask &= ~(0xffULL << (8*byte)); // test no longer, remove from byte mask
+ bits_bad = (unsigned)((bad_bits[0] >> (8 * byte)) & 0xffUL);
+ } else {
+ bits_bad = (unsigned)(bad_bits[1] & 0xffUL);
+ }
+ bytes_todo &= ~(1 << byte); // remove from bytes to do
+ byte_test_status[byte] = WL_ESTIMATED; // make sure this is set for this case
+ debug_print(" byte %d delay %2d Exhausted\n", byte, delay);
+ VB_PRT(vbl_local, "N%d.LMC%d.R%d: SWL: Byte %d (0x%02x): delay %d EXHAUSTED \n",
+ node, ddr_interface_num, rankx, byte, bits_bad, delay);
+ }
+ }
+ } else { // no error, stay with current delay, but keep testing it...
+ debug_print(" byte %d delay %2d Passed\n", byte, delay);
+ byte_test_status[byte] = WL_HARDWARE; // change status
+ }
+
+ } /* for (byte = 0; byte < 9; ++byte) */
+
+ } while (no_errors_count < WL_MIN_NO_ERRORS_COUNT);
+
+#if ENABLE_SW_WLEVEL_UTILIZATION
+ if (! sw_wlevel_hw) {
+ uint64_t percent_x10;
+ if (sum_dram_dclk == 0)
+ sum_dram_dclk = 1;
+ percent_x10 = sum_dram_ops * 1000 / sum_dram_dclk;
+ ddr_print("N%d.LMC%d.R%d: ops %lu, cycles %lu, used %lu.%lu%%\n",
+ node, ddr_interface_num, rankx, sum_dram_ops, sum_dram_dclk,
+ percent_x10 / 10, percent_x10 % 10);
+ }
+#endif
+ if (errors) {
+ debug_print("End WLEV_64 while loop: vref_value %d(0x%x), errors 0x%02x\n",
+ vref_value, vref_value, errors);
+ }
+ } // end parallel write-leveling block for delay high-order bits
+
+ if (sw_wlevel_hw) { // if we used HW-assist, we did the ECC byte when approp.
+ VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: HW-assist SWL - no ECC estimate!!!\n",
+ node, ddr_interface_num, rankx);
+ goto no_ecc_estimate;
+ }
+
+ if ((ddr_interface_bytemask & 0xff) == 0xff) {
+ if (use_ecc) {
+ int save_byte8 = lmc_wlevel_rank.s.byte8; // save original HW delay
+ byte_test_status[8] = WL_HARDWARE; /* say it is H/W delay value */
+
+ if ((save_byte8 != lmc_wlevel_rank.s.byte3) &&
+ (save_byte8 != lmc_wlevel_rank.s.byte4))
+ {
+ // try to calculate a workable delay using SW bytes 3 and 4 and HW byte 8
+ int test_byte8 = save_byte8;
+ int test_byte8_error;
+ int byte8_error = 0x1f;
+ int adder;
+ int avg_bytes = divide_nint(lmc_wlevel_rank.s.byte3+lmc_wlevel_rank.s.byte4, 2);
+ for (adder = 0; adder<= 32; adder+=8) {
+ test_byte8_error = _abs((adder+save_byte8) - avg_bytes);
+ if (test_byte8_error < byte8_error) {
+ byte8_error = test_byte8_error;
+ test_byte8 = save_byte8 + adder;
+ }
+ }
+
+#if SW_WL_CHECK_PATCH
+ // only do the check if we are not using measured VREF
+ if (!measured_vref_flag) {
+ test_byte8 &= ~1; /* Use only even settings, rounding down... */
+
+ // do validity check on the calculated ECC delay value
+ // this depends on the DIMM type
+ if (spd_rdimm) { // RDIMM
+ if (spd_dimm_type != 5) { // but not mini-RDIMM
+ // it can be > byte4, but should never be > byte3
+ if (test_byte8 > lmc_wlevel_rank.s.byte3) {
+ byte_test_status[8] = WL_ESTIMATED; /* say it is still estimated */
+ }
+ }
+ } else { // UDIMM
+ if ((test_byte8 < lmc_wlevel_rank.s.byte3) ||
+ (test_byte8 > lmc_wlevel_rank.s.byte4))
+ { // should never be outside the byte 3-4 range
+ byte_test_status[8] = WL_ESTIMATED; /* say it is still estimated */
+ }
+ }
+ /*
+ * Report whenever the calculation appears bad.
+ * This happens if some of the original values were off, or unexpected geometry
+ * from DIMM type, or custom circuitry (NIC225E, I am looking at you!).
+ * We will trust the calculated value, and depend on later testing to catch
+ * any instances when that value is truly bad.
+ */
+ if (byte_test_status[8] == WL_ESTIMATED) { // ESTIMATED means there may be an issue
+ ddr_print("N%d.LMC%d.R%d: SWL: (%cDIMM): calculated ECC delay unexpected (%d/%d/%d)\n",
+ node, ddr_interface_num, rankx, (spd_rdimm?'R':'U'),
+ lmc_wlevel_rank.s.byte4, test_byte8, lmc_wlevel_rank.s.byte3);
+ byte_test_status[8] = WL_HARDWARE;
+ }
+ }
+#endif /* SW_WL_CHECK_PATCH */
+ lmc_wlevel_rank.s.byte8 = test_byte8 & ~1; /* Use only even settings */
+ }
+
+ if (lmc_wlevel_rank.s.byte8 != save_byte8) {
+ /* Change the status if s/w adjusted the delay */
+ byte_test_status[8] = WL_SOFTWARE; /* Estimated delay */
+ }
+ } else {
+ byte_test_status[8] = WL_HARDWARE; /* H/W delay value */
+ lmc_wlevel_rank.s.byte8 = lmc_wlevel_rank.s.byte0; /* ECC is not used */
+ }
+ } else { /* if ((ddr_interface_bytemask & 0xff) == 0xff) */
+ if (use_ecc) {
+ /* Estimate the ECC byte delay */
+ lmc_wlevel_rank.s.byte4 |= (lmc_wlevel_rank.s.byte3 & 0x38); // add hi-order to b4
+ if ((lmc_wlevel_rank.s.byte4 & 0x06) < (lmc_wlevel_rank.s.byte3 & 0x06)) // orig b4 < orig b3
+ lmc_wlevel_rank.s.byte4 += 8; // must be next clock
+ } else {
+ lmc_wlevel_rank.s.byte4 = lmc_wlevel_rank.s.byte0; /* ECC is not used */
+ }
+ /* Change the status if s/w adjusted the delay */
+ byte_test_status[4] = WL_SOFTWARE; /* Estimated delay */
+ } /* if ((ddr_interface_bytemask & 0xff) == 0xff) */
+ } /* if (wlevel_bitmask_errors == 0) */
+
+ no_ecc_estimate:
+
+ bytes_failed = 0;
+ for (byte = 0; byte < 9; ++byte) {
+ /* Don't accumulate errors for untested bytes. */
+ if (!(ddr_interface_bytemask & (1 << byte)))
+ continue;
+ bytes_failed += (byte_test_status[byte] == WL_ESTIMATED);
+ }
+
+ /* Vref training loop is only used for DDR4 */
+ if (ddr_type != DDR4_DRAM)
+ break;
+
+ if (bytes_failed == 0) {
+ if (vref_values_count == 0) {
+ vref_values_start = vref_value;
+ }
+ ++vref_values_count;
+ if (vref_values_count > best_vref_values_count) {
+ best_vref_values_count = vref_values_count;
+ best_vref_values_start = vref_values_start;
+ debug_print("N%d.LMC%d.R%d: Vref Training (%2d) : 0x%02x <----- ???? -----> 0x%02x\n",
+ node, ddr_interface_num,
+ rankx, vref_value, best_vref_values_start,
+ best_vref_values_start+best_vref_values_count-1);
+ }
+ } else {
+ vref_values_count = 0;
+ debug_print("N%d.LMC%d.R%d: Vref Training (%2d) : failed\n",
+ node, ddr_interface_num,
+ rankx, vref_value);
+ }
+ } /* for (vref_value=0; vref_value<VREF_LIMIT; ++vref_value) */
+
+ /* Determine address of DRAM to test for pass 2 and final test of software write leveling. */
+ rank_addr = active_rank * (1ull << (pbank_lsb - bunk_enable + (interfaces/2)));
+ rank_addr |= (ddr_interface_num<<7); /* Map address into proper interface */
+ rank_addr = bdk_numa_get_address(node, rank_addr);
+ debug_print("N%d.LMC%d.R%d: Active Rank %d Address: 0x%lx\n",
+ node, ddr_interface_num, rankx, active_rank, rank_addr);
+
+ int errors;
+
+ if (bytes_failed) {
+
+#if !DISABLE_SW_WL_PASS_2
+
+ ddr_print("N%d.LMC%d.R%d: Starting SW Write-leveling pass 2\n",
+ node, ddr_interface_num, rankx);
+ sw_wl_rank_status = WL_SOFTWARE;
+
+ /* If previous s/w fixups failed then retry using s/w write-leveling. */
+ if (wlevel_bitmask_errors == 0) {
+ /* h/w succeeded but previous s/w fixups failed. So retry s/w. */
+ debug_print("N%d.LMC%d.R%d: Retrying software Write-Leveling.\n",
+ node, ddr_interface_num, rankx);
+ }
+
+ { // start parallel write-leveling block for delay low-order bits
+ int byte_delay[8];
+ int byte_passed[8];
+ uint64_t bytemask;
+ uint64_t bitmask;
+ int wl_offset;
+ int bytes_todo;
+
+ for (byte = 0; byte < 8; ++byte) {
+ byte_passed[byte] = 0;
+ }
+
+ bytes_todo = ddr_interface_bytemask;
+
+ for (wl_offset = sw_wlevel_offset; wl_offset >= 0; --wl_offset) {
+ debug_print("Starting wl_offset for-loop: %d\n", wl_offset);
+
+ bytemask = 0;
+
+ for (byte = 0; byte < 8; ++byte) {
+ byte_delay[byte] = 0;
+ if (!(bytes_todo & (1 << byte))) // this does not contain fully passed bytes
+ continue;
+
+ byte_passed[byte] = 0; // reset across passes if not fully passed
+ update_wlevel_rank_struct(&lmc_wlevel_rank, byte, 0); // all delays start at 0
+ bitmask = ((!ddr_interface_64b) && (byte == 4)) ? 0x0f: 0xff;
+ bytemask |= bitmask << (8*byte); // set the bytes bits in the bytemask
+ } /* for (byte = 0; byte < 8; ++byte) */
+
+ while (bytemask != 0) { // start a pass if there is any byte lane to test
+
+ debug_print("Starting bytemask while-loop: 0x%lx\n", bytemask);
+
+ // write this set of WL delays
+ DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx), lmc_wlevel_rank.u);
+ lmc_wlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx));
+
+ bdk_watchdog_poke();
+
+ // do the test
+ if (sw_wlevel_hw)
+ errors = run_best_hw_patterns(node, ddr_interface_num, rank_addr,
+ DBTRAIN_TEST, NULL);
+ else
+ errors = test_dram_byte(node, ddr_interface_num, rank_addr, bytemask, NULL);
+
+ debug_print("SWL pass 2: test_dram_byte returned 0x%x\n", errors);
+
+ // check errors by byte
+ for (byte = 0; byte < 8; ++byte) {
+ if (!(bytes_todo & (1 << byte)))
+ continue;
+
+ delay = byte_delay[byte];
+ if (errors & (1 << byte)) { // yes, an error
+ debug_print(" byte %d delay %2d Errors\n", byte, delay);
+ byte_passed[byte] = 0;
+ } else { // no error
+ byte_passed[byte] += 1;
+ if (byte_passed[byte] == (1 + wl_offset)) { /* Look for consecutive working settings */
+ debug_print(" byte %d delay %2d FULLY Passed\n", byte, delay);
+ if (wl_offset == 1) {
+ byte_test_status[byte] = WL_SOFTWARE;
+ } else if (wl_offset == 0) {
+ byte_test_status[byte] = WL_SOFTWARE1;
+ }
+ bytemask &= ~(0xffULL << (8*byte)); // test no longer, remove from byte mask this pass
+ bytes_todo &= ~(1 << byte); // remove completely from concern
+ continue; // on to the next byte, bypass delay updating!!
+ } else {
+ debug_print(" byte %d delay %2d Passed\n", byte, delay);
+ }
+ }
+ // error or no, here we move to the next delay value for this byte, unless done all delays
+ // only a byte that has "fully passed" will bypass around this,
+ delay += 2;
+ if (delay < 32) {
+ update_wlevel_rank_struct(&lmc_wlevel_rank, byte, delay);
+ debug_print(" byte %d delay %2d New\n", byte, delay);
+ byte_delay[byte] = delay;
+ } else {
+ // reached max delay, done with this byte
+ debug_print(" byte %d delay %2d Exhausted\n", byte, delay);
+ bytemask &= ~(0xffULL << (8*byte)); // test no longer, remove from byte mask this pass
+ }
+ } /* for (byte = 0; byte < 8; ++byte) */
+ debug_print("End of for-loop: bytemask 0x%lx\n", bytemask);
+
+ } /* while (bytemask != 0) */
+ } /* for (wl_offset = sw_wlevel_offset; wl_offset >= 0; --wl_offset) */
+
+ for (byte = 0; byte < 8; ++byte) {
+ // any bytes left in bytes_todo did not pass
+ if (bytes_todo & (1 << byte)) {
+ /* Last resort. Use Rlevel settings to estimate
+ Wlevel if software write-leveling fails */
+ debug_print("Using RLEVEL as WLEVEL estimate for byte %d\n", byte);
+ lmc_rlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx));
+ rlevel_to_wlevel(&lmc_rlevel_rank, &lmc_wlevel_rank, byte);
+ }
+ } /* for (byte = 0; byte < 8; ++byte) */
+
+ sw_wl_failed = (bytes_todo != 0);
+
+ } // end parallel write-leveling block for delay low-order bits
+
+ if (use_ecc) {
+ /* ECC byte has to be estimated. Take the average of the two surrounding bytes. */
+ int test_byte8 = divide_nint(lmc_wlevel_rank.s.byte3
+ + lmc_wlevel_rank.s.byte4
+ + 2 /* round-up*/ , 2);
+ lmc_wlevel_rank.s.byte8 = test_byte8 & ~1; /* Use only even settings */
+ byte_test_status[8] = WL_ESTIMATED; /* Estimated delay */
+ } else {
+ byte_test_status[8] = WL_HARDWARE; /* H/W delay value */
+ lmc_wlevel_rank.s.byte8 = lmc_wlevel_rank.s.byte0; /* ECC is not used */
+ }
+
+ /* Set delays for unused bytes to match byte 0. */
+ for (byte=0; byte<8; ++byte) {
+ if ((ddr_interface_bytemask & (1 << byte)))
+ continue;
+ update_wlevel_rank_struct(&lmc_wlevel_rank, byte,
+ lmc_wlevel_rank.s.byte0);
+ byte_test_status[byte] = WL_SOFTWARE;
+ }
+#else /* !DISABLE_SW_WL_PASS_2 */
+ // FIXME? the big hammer, did not even try SW WL pass2, assume only chip reset will help
+ ddr_print("N%d.LMC%d.R%d: S/W write-leveling pass 1 failed\n",
+ node, ddr_interface_num, rankx);
+ sw_wl_failed = 1;
+#endif /* !DISABLE_SW_WL_PASS_2 */
+
+ } else { /* if (bytes_failed) */
+
+ // SW WL pass 1 was OK, write the settings
+ DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx), lmc_wlevel_rank.u);
+ lmc_wlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx));
+
+#if SW_WL_CHECK_PATCH
+ // do validity check on the delay values by running the test 1 more time...
+ // FIXME: we really need to check the ECC byte setting here as well,
+ // so we need to enable ECC for this test!!!
+ // if there are any errors, claim SW WL failure
+ {
+ uint64_t datamask = (ddr_interface_64b) ? 0xffffffffffffffffULL : 0x00000000ffffffffULL;
+
+ // do the test
+ if (sw_wlevel_hw) {
+ errors = run_best_hw_patterns(node, ddr_interface_num, rank_addr,
+ DBTRAIN_TEST, NULL) & 0x0ff;
+ } else {
+#if USE_ORIG_TEST_DRAM_BYTE
+ errors = test_dram_byte(node, ddr_interface_num, rank_addr, datamask, NULL);
+#else
+ errors = dram_tuning_mem_xor(node, ddr_interface_num, rank_addr, datamask, NULL);
+#endif
+ }
+
+ if (errors) {
+ ddr_print("N%d.LMC%d.R%d: Wlevel Rank Final Test errors 0x%x\n",
+ node, ddr_interface_num, rankx, errors);
+ sw_wl_failed = 1;
+ }
+ }
+#endif /* SW_WL_CHECK_PATCH */
+
+ } /* if (bytes_failed) */
+
+ // FIXME? dump the WL settings, so we get more of a clue as to what happened where
+ ddr_print("N%d.LMC%d.R%d: Wlevel Rank %#4x, 0x%016lX : %2d%3s %2d%3s %2d%3s %2d%3s %2d%3s %2d%3s %2d%3s %2d%3s %2d%3s %s\n",
+ node, ddr_interface_num, rankx,
+ lmc_wlevel_rank.s.status,
+ lmc_wlevel_rank.u,
+ lmc_wlevel_rank.s.byte8, wl_status_strings[byte_test_status[8]],
+ lmc_wlevel_rank.s.byte7, wl_status_strings[byte_test_status[7]],
+ lmc_wlevel_rank.s.byte6, wl_status_strings[byte_test_status[6]],
+ lmc_wlevel_rank.s.byte5, wl_status_strings[byte_test_status[5]],
+ lmc_wlevel_rank.s.byte4, wl_status_strings[byte_test_status[4]],
+ lmc_wlevel_rank.s.byte3, wl_status_strings[byte_test_status[3]],
+ lmc_wlevel_rank.s.byte2, wl_status_strings[byte_test_status[2]],
+ lmc_wlevel_rank.s.byte1, wl_status_strings[byte_test_status[1]],
+ lmc_wlevel_rank.s.byte0, wl_status_strings[byte_test_status[0]],
+ (sw_wl_rank_status == WL_HARDWARE) ? "" : "(s)"
+ );
+
+ // finally, check for fatal conditions: either chip reset right here, or return error flag
+ if (((ddr_type == DDR4_DRAM) && (best_vref_values_count == 0)) || sw_wl_failed) {
+ if (!ddr_disable_chip_reset) { // do chip RESET
+ error_print("INFO: Short memory test indicates a retry is needed on N%d.LMC%d.R%d. Resetting node...\n",
+ node, ddr_interface_num, rankx);
+ bdk_wait_usec(500000);
+ bdk_reset_chip(node);
+ } else { // return error flag so LMC init can be retried...
+ ddr_print("INFO: Short memory test indicates a retry is needed on N%d.LMC%d.R%d. Restarting LMC init...\n",
+ node, ddr_interface_num, rankx);
+ return 0; // 0 indicates restart possible...
+ }
+ }
+
+ active_rank++;
+ } /* for (rankx = 0; rankx < dimm_count * 4; rankx++) */
+
+ // Finalize the write-leveling settings
+ for (rankx = 0; rankx < dimm_count * 4;rankx++) {
+ uint64_t value;
+ int parameter_set = 0;
+ if (!(rank_mask & (1 << rankx)))
+ continue;
+
+ lmc_wlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx));
+
+ if (bdk_is_platform(BDK_PLATFORM_ASIM)) {
+ parameter_set |= 1;
+
+ lmc_wlevel_rank.s.byte8 = 0;
+ lmc_wlevel_rank.s.byte7 = 0;
+ lmc_wlevel_rank.s.byte6 = 0;
+ lmc_wlevel_rank.s.byte5 = 0;
+ lmc_wlevel_rank.s.byte4 = 0;
+ lmc_wlevel_rank.s.byte3 = 0;
+ lmc_wlevel_rank.s.byte2 = 0;
+ lmc_wlevel_rank.s.byte1 = 0;
+ lmc_wlevel_rank.s.byte0 = 0;
+ }
+
+ for (i=0; i<9; ++i) {
+ if ((s = lookup_env_parameter("ddr%d_wlevel_rank%d_byte%d", ddr_interface_num, rankx, i)) != NULL) {
+ parameter_set |= 1;
+ value = strtoul(s, NULL, 0);
+
+ update_wlevel_rank_struct(&lmc_wlevel_rank, i, value);
+ }
+ }
+
+ if ((s = lookup_env_parameter_ull("ddr%d_wlevel_rank%d", ddr_interface_num, rankx)) != NULL) {
+ parameter_set |= 1;
+ value = strtoull(s, NULL, 0);
+ lmc_wlevel_rank.u = value;
+ }
+
+ if (parameter_set) {
+ DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx), lmc_wlevel_rank.u);
+ lmc_wlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx));
+ display_WL(node, ddr_interface_num, lmc_wlevel_rank, rankx);
+ }
+#if WLEXTRAS_PATCH
+ if ((rank_mask & 0x0F) != 0x0F) { // if there are unused entries to be filled
+ if (rankx < 3) {
+ debug_print("N%d.LMC%d.R%d: checking for WLEVEL_RANK unused entries.\n",
+ node, ddr_interface_num, rankx);
+ if (rankx == 0) { // if rank 0, write ranks 1 and 2 here if empty
+ if (!(rank_mask & (1<<1))) { // check that rank 1 is empty
+ DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, 1), lmc_wlevel_rank.u);
+ VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: writing WLEVEL_RANK unused entry R%d.\n",
+ node, ddr_interface_num, rankx, 1);
+ }
+ if (!(rank_mask & (1<<2))) { // check that rank 2 is empty
+ VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: writing WLEVEL_RANK unused entry R%d.\n",
+ node, ddr_interface_num, rankx, 2);
+ DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, 2), lmc_wlevel_rank.u);
+ }
+ }
+ // if rank 0, 1 or 2, write rank 3 here if empty
+ if (!(rank_mask & (1<<3))) { // check that rank 3 is empty
+ VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: writing WLEVEL_RANK unused entry R%d.\n",
+ node, ddr_interface_num, rankx, 3);
+ DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, 3), lmc_wlevel_rank.u);
+ }
+ }
+ }
+#endif /* WLEXTRAS_PATCH */
+
+ } /* for (rankx = 0; rankx < dimm_count * 4;rankx++) */
+
+ /* Restore the ECC configuration */
+ if (!sw_wlevel_hw_default) {
+ lmc_config.s.ecc_ena = use_ecc;
+ DRAM_CSR_WRITE(node, BDK_LMCX_CONFIG(ddr_interface_num), lmc_config.u);
+ }
+
+#if USE_L2_WAYS_LIMIT
+ /* Restore the l2 set configuration */
+ if ((s = lookup_env_parameter("limit_l2_ways")) != NULL) {
+ int ways = strtoul(s, NULL, 10);
+ limit_l2_ways(node, ways, 1);
+ } else {
+ limit_l2_ways(node, bdk_l2c_get_num_assoc(node), 0);
+ }
+#endif
+
+ } // End Software Write-Leveling block
+
+#if ENABLE_DISPLAY_MPR_PAGE
+ if (ddr_type == DDR4_DRAM) {
+ Display_MPR_Page(node, rank_mask, ddr_interface_num, dimm_count, 2);
+ Display_MPR_Page(node, rank_mask, ddr_interface_num, dimm_count, 0);
+ }
+#endif
+
+#if 1 // was #ifdef CAVIUM_ONLY
+ {
+ int i;
+ int setting[9];
+ bdk_lmcx_dll_ctl3_t ddr_dll_ctl3;
+ ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num));
+
+ for (i=0; i<9; ++i) {
+ SET_DDR_DLL_CTL3(dll90_byte_sel, ENCODE_DLL90_BYTE_SEL(i));
+ DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num), ddr_dll_ctl3.u);
+ BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num));
+ ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num));
+ setting[i] = GET_DDR_DLL_CTL3(dll90_setting);
+ debug_print("%d. LMC%d_DLL_CTL3[%d] = %016lx %d\n", i, ddr_interface_num,
+ GET_DDR_DLL_CTL3(dll90_byte_sel), ddr_dll_ctl3.u, setting[i]);
+ }
+
+ VB_PRT(VBL_DEV, "N%d.LMC%d: %-36s : %5d %5d %5d %5d %5d %5d %5d %5d %5d\n",
+ node, ddr_interface_num, "DLL90 Setting 8:0",
+ setting[8], setting[7], setting[6], setting[5], setting[4],
+ setting[3], setting[2], setting[1], setting[0]);
+
+ //BDK_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num), save_ddr_dll_ctl3.u);
+ }
+#endif /* CAVIUM_ONLY */
+
+ // any custom DLL read or write offsets, install them
+ // FIXME: no need to do these if we are going to auto-tune... ???
+
+ process_custom_dll_offsets(node, ddr_interface_num, "ddr_dll_write_offset",
+ custom_lmc_config->dll_write_offset, "ddr%d_dll_write_offset_byte%d", 1);
+ process_custom_dll_offsets(node, ddr_interface_num, "ddr_dll_read_offset",
+ custom_lmc_config->dll_read_offset, "ddr%d_dll_read_offset_byte%d", 2);
+
+ // we want to train write bit-deskew here...
+ if (! disable_deskew_training) {
+ if (enable_write_deskew) {
+ ddr_print("N%d.LMC%d: WRITE BIT-DESKEW feature training begins.\n",
+ node, ddr_interface_num);
+ Perform_Write_Deskew_Training(node, ddr_interface_num);
+ } /* if (enable_write_deskew) */
+ } /* if (! disable_deskew_training) */
+
+ /*
+ * 6.9.14 Final LMC Initialization
+ *
+ * Early LMC initialization, LMC write-leveling, and LMC read-leveling
+ * must be completed prior to starting this final LMC initialization.
+ *
+ * LMC hardware updates the LMC(0)_SLOT_CTL0, LMC(0)_SLOT_CTL1,
+ * LMC(0)_SLOT_CTL2 CSRs with minimum values based on the selected
+ * readleveling and write-leveling settings. Software should not write
+ * the final LMC(0)_SLOT_CTL0, LMC(0)_SLOT_CTL1, and LMC(0)_SLOT_CTL2
+ * values until after the final read-leveling and write-leveling settings
+ * are written.
+ *
+ * Software must ensure the LMC(0)_SLOT_CTL0, LMC(0)_SLOT_CTL1, and
+ * LMC(0)_SLOT_CTL2 CSR values are appropriate for this step. These CSRs
+ * select the minimum gaps between read operations and write operations
+ * of various types.
+ *
+ * Software must not reduce the values in these CSR fields below the
+ * values previously selected by the LMC hardware (during write-leveling
+ * and read-leveling steps above).
+ *
+ * All sections in this chapter may be used to derive proper settings for
+ * these registers.
+ *
+ * For minimal read latency, L2C_CTL[EF_ENA,EF_CNT] should be programmed
+ * properly. This should be done prior to the first read.
+ */
+
+#if ENABLE_SLOT_CTL_ACCESS
+ {
+ bdk_lmcx_slot_ctl0_t lmc_slot_ctl0;
+ bdk_lmcx_slot_ctl1_t lmc_slot_ctl1;
+ bdk_lmcx_slot_ctl2_t lmc_slot_ctl2;
+ bdk_lmcx_slot_ctl3_t lmc_slot_ctl3;
+
+ lmc_slot_ctl0.u = BDK_CSR_READ(node, BDK_LMCX_SLOT_CTL0(ddr_interface_num));
+ lmc_slot_ctl1.u = BDK_CSR_READ(node, BDK_LMCX_SLOT_CTL1(ddr_interface_num));
+ lmc_slot_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_SLOT_CTL2(ddr_interface_num));
+ lmc_slot_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_SLOT_CTL3(ddr_interface_num));
+
+ ddr_print("%-45s : 0x%016lx\n", "LMC_SLOT_CTL0", lmc_slot_ctl0.u);
+ ddr_print("%-45s : 0x%016lx\n", "LMC_SLOT_CTL1", lmc_slot_ctl1.u);
+ ddr_print("%-45s : 0x%016lx\n", "LMC_SLOT_CTL2", lmc_slot_ctl2.u);
+ ddr_print("%-45s : 0x%016lx\n", "LMC_SLOT_CTL3", lmc_slot_ctl3.u);
+
+ // for now, look only for SLOT_CTL1 envvar for override of contents
+ if ((s = lookup_env_parameter("ddr%d_slot_ctl1", ddr_interface_num)) != NULL) {
+ int slot_ctl1_incr = strtoul(s, NULL, 0);
+ // validate the value
+ if ((slot_ctl1_incr < 0) || (slot_ctl1_incr > 3)) { // allow 0 for printing only
+ error_print("ddr%d_slot_ctl1 illegal value (%d); must be 0-3\n",
+ ddr_interface_num, slot_ctl1_incr);
+ } else {
+
+#define INCR(csr, chip, field, incr) \
+ csr.chip.field = (csr.chip.field < (64 - incr)) ? (csr.chip.field + incr) : 63
+
+ // only print original when we are changing it!
+ if (slot_ctl1_incr)
+ ddr_print("%-45s : 0x%016lx\n", "LMC_SLOT_CTL1", lmc_slot_ctl1.u);
+
+ // modify all the SLOT_CTL1 fields by the increment, for now...
+ // but make sure the value will not overflow!!!
+ INCR(lmc_slot_ctl1, s, r2r_xrank_init, slot_ctl1_incr);
+ INCR(lmc_slot_ctl1, s, r2w_xrank_init, slot_ctl1_incr);
+ INCR(lmc_slot_ctl1, s, w2r_xrank_init, slot_ctl1_incr);
+ INCR(lmc_slot_ctl1, s, w2w_xrank_init, slot_ctl1_incr);
+ DRAM_CSR_WRITE(node, BDK_LMCX_SLOT_CTL1(ddr_interface_num), lmc_slot_ctl1.u);
+ lmc_slot_ctl1.u = BDK_CSR_READ(node, BDK_LMCX_SLOT_CTL1(ddr_interface_num));
+
+ // always print when we are changing it!
+ printf("%-45s : 0x%016lx\n", "LMC_SLOT_CTL1", lmc_slot_ctl1.u);
+ }
+ }
+ }
+#endif /* ENABLE_SLOT_CTL_ACCESS */
+ {
+ /* Clear any residual ECC errors */
+ int num_tads = 1;
+ int tad;
+
+ DRAM_CSR_WRITE(node, BDK_LMCX_INT(ddr_interface_num), -1ULL);
+ BDK_CSR_READ(node, BDK_LMCX_INT(ddr_interface_num));
+
+ for (tad=0; tad<num_tads; tad++)
+ DRAM_CSR_WRITE(node, BDK_L2C_TADX_INT_W1C(tad), BDK_CSR_READ(node, BDK_L2C_TADX_INT_W1C(tad)));
+
+ ddr_print("%-45s : 0x%08lx\n", "LMC_INT",
+ BDK_CSR_READ(node, BDK_LMCX_INT(ddr_interface_num)));
+
+#if 0
+ // NOTE: this must be done for pass 2.x
+ // must enable ECC interrupts to get ECC error info in LMCX_INT
+ if (! CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X)) { // added 81xx and 83xx
+ DRAM_CSR_WRITE(node, BDK_LMCX_INT_ENA_W1S(ddr_interface_num), -1ULL);
+ BDK_CSR_INIT(lmc_int_ena_w1s, node, BDK_LMCX_INT_ENA_W1S(ddr_interface_num));
+ ddr_print("%-45s : 0x%08lx\n", "LMC_INT_ENA_W1S", lmc_int_ena_w1s.u);
+ }
+#endif
+ }
+
+ // Now we can enable scrambling if desired...
+ {
+ bdk_lmcx_control_t lmc_control;
+ bdk_lmcx_scramble_cfg0_t lmc_scramble_cfg0;
+ bdk_lmcx_scramble_cfg1_t lmc_scramble_cfg1;
+ bdk_lmcx_scramble_cfg2_t lmc_scramble_cfg2;
+ bdk_lmcx_ns_ctl_t lmc_ns_ctl;
+
+ lmc_control.u = BDK_CSR_READ(node, BDK_LMCX_CONTROL(ddr_interface_num));
+ lmc_scramble_cfg0.u = BDK_CSR_READ(node, BDK_LMCX_SCRAMBLE_CFG0(ddr_interface_num));
+ lmc_scramble_cfg1.u = BDK_CSR_READ(node, BDK_LMCX_SCRAMBLE_CFG1(ddr_interface_num));
+ lmc_scramble_cfg2.u = BDK_CSR_READ(node, BDK_LMCX_SCRAMBLE_CFG2(ddr_interface_num));
+ lmc_ns_ctl.u = BDK_CSR_READ(node, BDK_LMCX_NS_CTL(ddr_interface_num));
+
+ /* Read the scramble setting from the config and see if we
+ need scrambling */
+ int use_scramble = bdk_config_get_int(BDK_CONFIG_DRAM_SCRAMBLE);
+ if (use_scramble == 2)
+ {
+ if (bdk_trust_get_level() >= BDK_TRUST_LEVEL_SIGNED)
+ use_scramble = 1;
+ else
+ use_scramble = 0;
+ }
+
+ /* Generate random values if scrambling is needed */
+ if (use_scramble)
+ {
+ lmc_scramble_cfg0.u = bdk_rng_get_random64();
+ lmc_scramble_cfg1.u = bdk_rng_get_random64();
+ lmc_scramble_cfg2.u = bdk_rng_get_random64();
+ lmc_ns_ctl.s.ns_scramble_dis = 0;
+ lmc_ns_ctl.s.adr_offset = 0;
+ lmc_control.s.scramble_ena = 1;
+ }
+
+ if ((s = lookup_env_parameter_ull("ddr_scramble_cfg0")) != NULL) {
+ lmc_scramble_cfg0.u = strtoull(s, NULL, 0);
+ lmc_control.s.scramble_ena = 1;
+ }
+ ddr_print("%-45s : 0x%016lx\n", "LMC_SCRAMBLE_CFG0", lmc_scramble_cfg0.u);
+
+ DRAM_CSR_WRITE(node, BDK_LMCX_SCRAMBLE_CFG0(ddr_interface_num), lmc_scramble_cfg0.u);
+
+ if ((s = lookup_env_parameter_ull("ddr_scramble_cfg1")) != NULL) {
+ lmc_scramble_cfg1.u = strtoull(s, NULL, 0);
+ lmc_control.s.scramble_ena = 1;
+ }
+ ddr_print("%-45s : 0x%016lx\n", "LMC_SCRAMBLE_CFG1", lmc_scramble_cfg1.u);
+ DRAM_CSR_WRITE(node, BDK_LMCX_SCRAMBLE_CFG1(ddr_interface_num), lmc_scramble_cfg1.u);
+
+ if ((s = lookup_env_parameter_ull("ddr_scramble_cfg2")) != NULL) {
+ lmc_scramble_cfg2.u = strtoull(s, NULL, 0);
+ lmc_control.s.scramble_ena = 1;
+ }
+ ddr_print("%-45s : 0x%016lx\n", "LMC_SCRAMBLE_CFG2", lmc_scramble_cfg2.u);
+ DRAM_CSR_WRITE(node, BDK_LMCX_SCRAMBLE_CFG2(ddr_interface_num), lmc_scramble_cfg2.u);
+
+ if ((s = lookup_env_parameter_ull("ddr_ns_ctl")) != NULL) {
+ lmc_ns_ctl.u = strtoull(s, NULL, 0);
+ }
+ ddr_print("%-45s : 0x%016lx\n", "LMC_NS_CTL", lmc_ns_ctl.u);
+ DRAM_CSR_WRITE(node, BDK_LMCX_NS_CTL(ddr_interface_num), lmc_ns_ctl.u);
+
+ DRAM_CSR_WRITE(node, BDK_LMCX_CONTROL(ddr_interface_num), lmc_control.u);
+
+ }
+
+ return(mem_size_mbytes);
+}
diff --git a/src/vendorcode/cavium/bdk/libdram/dram-init-ddr3.h b/src/vendorcode/cavium/bdk/libdram/dram-init-ddr3.h
new file mode 100644
index 0000000000..ba1060e5e0
--- /dev/null
+++ b/src/vendorcode/cavium/bdk/libdram/dram-init-ddr3.h
@@ -0,0 +1,97 @@
+/***********************license start***********************************
+* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights
+* reserved.
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are
+* met:
+*
+* * Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials provided
+* with the distribution.
+*
+* * Neither the name of Cavium Inc. nor the names of
+* its contributors may be used to endorse or promote products
+* derived from this software without specific prior written
+* permission.
+*
+* This Software, including technical data, may be subject to U.S. export
+* control laws, including the U.S. Export Administration Act and its
+* associated regulations, and may be subject to export or import
+* regulations in other countries.
+*
+* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
+* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR
+* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT
+* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY
+* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT
+* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES
+* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR
+* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT,
+* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK
+* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
+***********************license end**************************************/
+
+/**
+ * Function for DDR3 init. Internal use only.
+ */
+
+extern void perform_octeon3_ddr3_sequence(bdk_node_t node, int rank_mask,
+ int ddr_interface_num, int sequence);
+extern void perform_ddr_init_sequence(bdk_node_t node, int rank_mask,
+ int ddr_interface_num);
+extern int ddr_memory_preserved(bdk_node_t node);
+
+extern int init_octeon3_ddr3_interface(bdk_node_t node,
+ const ddr_configuration_t *ddr_configuration, uint32_t ddr_hertz,
+ uint32_t cpu_hertz, uint32_t ddr_ref_hertz, int board_type,
+ int board_rev_maj, int board_rev_min, int ddr_interface_num,
+ uint32_t ddr_interface_mask);
+
+extern void
+set_vref(bdk_node_t node, int ddr_interface_num, int rank,
+ int range, int value);
+
+typedef struct {
+ unsigned char *rodt_ohms;
+ unsigned char *rtt_nom_ohms;
+ unsigned char *rtt_nom_table;
+ unsigned char *rtt_wr_ohms;
+ unsigned char *dic_ohms;
+ short *drive_strength;
+ short *dqx_strength;
+} impedence_values_t;
+
+extern impedence_values_t ddr4_impedence_values;
+
+extern int
+compute_vref_value(bdk_node_t node, int ddr_interface_num,
+ int rankx, int dimm_count, int rank_count,
+ impedence_values_t *imp_values, int is_stacked_die);
+
+extern unsigned short
+load_dac_override(int node, int ddr_interface_num,
+ int dac_value, int byte);
+extern int
+read_DAC_DBI_settings(int node, int ddr_interface_num,
+ int dac_or_dbi, int *settings);
+extern void
+display_DAC_DBI_settings(int node, int ddr_interface_num, int dac_or_dbi,
+ int ecc_ena, int *settings, char *title);
+
+#define RODT_OHMS_COUNT 8
+#define RTT_NOM_OHMS_COUNT 8
+#define RTT_NOM_TABLE_COUNT 8
+#define RTT_WR_OHMS_COUNT 8
+#define DIC_OHMS_COUNT 3
+#define DRIVE_STRENGTH_COUNT 15
+
+extern uint64_t hertz_to_psecs(uint64_t hertz);
+extern uint64_t psecs_to_mts(uint64_t psecs);
+extern uint64_t mts_to_hertz(uint64_t mts);
+extern uint64_t pretty_psecs_to_mts(uint64_t psecs);
diff --git a/src/vendorcode/cavium/bdk/libdram/dram-internal.h b/src/vendorcode/cavium/bdk/libdram/dram-internal.h
new file mode 100644
index 0000000000..07fdbcbf54
--- /dev/null
+++ b/src/vendorcode/cavium/bdk/libdram/dram-internal.h
@@ -0,0 +1,201 @@
+#ifndef __DRAM_INTERNAL_H__
+#define __DRAM_INTERNAL_H__
+/***********************license start***********************************
+* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights
+* reserved.
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are
+* met:
+*
+* * Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials provided
+* with the distribution.
+*
+* * Neither the name of Cavium Inc. nor the names of
+* its contributors may be used to endorse or promote products
+* derived from this software without specific prior written
+* permission.
+*
+* This Software, including technical data, may be subject to U.S. export
+* control laws, including the U.S. Export Administration Act and its
+* associated regulations, and may be subject to export or import
+* regulations in other countries.
+*
+* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
+* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR
+* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT
+* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY
+* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT
+* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES
+* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR
+* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT,
+* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK
+* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
+***********************license end**************************************/
+
+/**
+ * This header defines all internal API for libdram. None
+ * of these functions should be called by users of the library.
+ * This is the only header that DRAM files should include
+ * from the libdram directory
+ */
+
+#include "libdram.h"
+#include "lib_octeon_shared.h"
+#include "dram-print.h"
+#include "dram-util.h"
+#include "dram-csr.h"
+#include "dram-env.h"
+#include "dram-gpio.h"
+#include "dram-spd.h"
+#include "dram-l2c.h"
+#include "dram-init-ddr3.h"
+
+#undef DRAM_CSR_WRITE_INLINE
+
+// define how many HW WL samples to take for majority voting
+// MUST BE odd!!
+// assume there should only be 2 possible values that will show up,
+// so treat ties as a problem!!!
+#define WLEVEL_LOOPS_DEFAULT 5 // NOTE: do not change this without checking the code!!!
+
+// define how many HW RL samples per rank to take
+// multiple samples will allow either:
+// 1. looking for the best sample score
+// 2. averaging the samples into a composite score
+// symbol PICK_BEST_RANK_SCORE_NOT_AVG is used to choose
+// (see dram-init-ddr3.c:
+#define RLEVEL_AVG_LOOPS_DEFAULT 3
+#define PICK_BEST_RANK_SCORE_NOT_AVG 1
+
+typedef struct {
+ int delay;
+ int loop_total;
+ int loop_count;
+ int best;
+ uint64_t bm;
+ int bmerrs;
+ int sqerrs;
+ int bestsq;
+} rlevel_byte_data_t;
+
+typedef struct {
+ uint64_t bm;
+ uint8_t mstart;
+ uint8_t width;
+ int errs;
+} rlevel_bitmask_t;
+
+#define SET_DDR_DLL_CTL3(field, expr) \
+ do { \
+ ddr_dll_ctl3.cn81xx.field = (expr); \
+ } while (0)
+
+#define ENCODE_DLL90_BYTE_SEL(byte_sel) ((byte_sel)+1)
+
+#define GET_DDR_DLL_CTL3(field) \
+ (ddr_dll_ctl3.cn81xx.field)
+
+
+#define RLEVEL_NONSEQUENTIAL_DELAY_ERROR 50
+#define RLEVEL_ADJACENT_DELAY_ERROR 30
+
+#define TWO_LMC_MASK 0x03
+#define FOUR_LMC_MASK 0x0f
+#define ONE_DIMM_MASK 0x01
+#define TWO_DIMM_MASK 0x03
+
+extern int initialize_ddr_clock(bdk_node_t node,
+ const ddr_configuration_t *ddr_configuration, uint32_t cpu_hertz,
+ uint32_t ddr_hertz, uint32_t ddr_ref_hertz, int ddr_interface_num,
+ uint32_t ddr_interface_mask);
+
+extern int test_dram_byte(bdk_node_t node, int ddr_interface_num, uint64_t p,
+ uint64_t bitmask, uint64_t *xor_data);
+extern int dram_tuning_mem_xor(bdk_node_t node, int ddr_interface_num, uint64_t p,
+ uint64_t bitmask, uint64_t *xor_data);
+
+// "mode" arg
+#define DBTRAIN_TEST 0
+#define DBTRAIN_DBI 1
+#define DBTRAIN_LFSR 2
+extern int test_dram_byte_hw(bdk_node_t node, int ddr_interface_num,
+ uint64_t p, int mode, uint64_t *xor_data);
+extern int run_best_hw_patterns(bdk_node_t node, int ddr_interface_num,
+ uint64_t p, int mode, uint64_t *xor_data);
+
+extern int get_dimm_part_number(char *buffer, bdk_node_t node,
+ const dimm_config_t *dimm_config,
+ int ddr_type);
+extern uint32_t get_dimm_serial_number(bdk_node_t node,
+ const dimm_config_t *dimm_config,
+ int ddr_type);
+
+extern int octeon_ddr_initialize(bdk_node_t node, uint32_t cpu_hertz,
+ uint32_t ddr_hertz, uint32_t ddr_ref_hertz, uint32_t ddr_interface_mask,
+ const ddr_configuration_t *ddr_configuration, uint32_t *measured_ddr_hertz,
+ int board_type, int board_rev_maj, int board_rev_min);
+
+extern uint64_t divide_nint(uint64_t dividend, uint64_t divisor);
+
+typedef enum {
+ DDR3_DRAM = 3,
+ DDR4_DRAM = 4,
+} ddr_type_t;
+
+static inline int get_ddr_type(bdk_node_t node, const dimm_config_t *dimm_config)
+{
+ int spd_ddr_type;
+
+#define DEVICE_TYPE DDR4_SPD_KEY_BYTE_DEVICE_TYPE // same for DDR3 and DDR4
+ spd_ddr_type = read_spd(node, dimm_config, DEVICE_TYPE);
+
+ debug_print("%s:%d spd_ddr_type=0x%02x\n", __FUNCTION__, __LINE__, spd_ddr_type);
+
+ /* we return only DDR4 or DDR3 */
+ return (spd_ddr_type == 0x0C) ? DDR4_DRAM : DDR3_DRAM;
+}
+
+static inline int get_dimm_ecc(bdk_node_t node, const dimm_config_t *dimm_config, int ddr_type)
+{
+#define BUS_WIDTH(t) (((t) == DDR4_DRAM) ? DDR4_SPD_MODULE_MEMORY_BUS_WIDTH : DDR3_SPD_MEMORY_BUS_WIDTH)
+
+ return !!(read_spd(node, dimm_config, BUS_WIDTH(ddr_type)) & 8);
+}
+
+static inline int get_dimm_module_type(bdk_node_t node, const dimm_config_t *dimm_config, int ddr_type)
+{
+#define MODULE_TYPE DDR4_SPD_KEY_BYTE_MODULE_TYPE // same for DDR3 and DDR4
+
+ return (read_spd(node, dimm_config, MODULE_TYPE) & 0x0F);
+}
+
+extern int common_ddr4_fixups(dram_config_t *cfg, uint32_t default_udimm_speed);
+
+#define DEFAULT_BEST_RANK_SCORE 9999999
+#define MAX_RANK_SCORE_LIMIT 99 // is this OK?
+
+unsigned short load_dll_offset(bdk_node_t node, int ddr_interface_num,
+ int dll_offset_mode, int byte_offset, int byte);
+void change_dll_offset_enable(bdk_node_t node, int ddr_interface_num, int change);
+
+extern int perform_dll_offset_tuning(bdk_node_t node, int dll_offset_mode, int do_tune);
+extern int perform_HW_dll_offset_tuning(bdk_node_t node, int dll_offset_mode, int bytelane);
+
+extern int perform_margin_write_voltage(bdk_node_t node);
+extern int perform_margin_read_voltage(bdk_node_t node);
+
+#define LMC_DDR3_RESET_ASSERT 0
+#define LMC_DDR3_RESET_DEASSERT 1
+extern void cn88xx_lmc_ddr3_reset(bdk_node_t node, int ddr_interface_num, int reset);
+extern void perform_lmc_reset(bdk_node_t node, int ddr_interface_num);
+extern void ddr4_mrw(bdk_node_t node, int ddr_interface_num, int rank,
+ int mr_wr_addr, int mr_wr_sel, int mr_wr_bg1);
+#endif /* __DRAM_INTERNAL_H__ */
+
diff --git a/src/vendorcode/cavium/bdk/libdram/dram-l2c.c b/src/vendorcode/cavium/bdk/libdram/dram-l2c.c
new file mode 100644
index 0000000000..11112955b2
--- /dev/null
+++ b/src/vendorcode/cavium/bdk/libdram/dram-l2c.c
@@ -0,0 +1,69 @@
+/***********************license start***********************************
+* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights
+* reserved.
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are
+* met:
+*
+* * Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials provided
+* with the distribution.
+*
+* * Neither the name of Cavium Inc. nor the names of
+* its contributors may be used to endorse or promote products
+* derived from this software without specific prior written
+* permission.
+*
+* This Software, including technical data, may be subject to U.S. export
+* control laws, including the U.S. Export Administration Act and its
+* associated regulations, and may be subject to export or import
+* regulations in other countries.
+*
+* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
+* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR
+* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT
+* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY
+* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT
+* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES
+* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR
+* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT,
+* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK
+* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
+***********************license end**************************************/
+#include <bdk.h>
+#include "dram-internal.h"
+
+int limit_l2_ways(bdk_node_t node, int ways, int verbose)
+{
+ int ways_max = bdk_l2c_get_num_assoc(node);
+ int ways_min = 0;
+ int errors = 0;
+
+ if (ways >= ways_min && ways <= ways_max)
+ {
+ uint32_t valid_mask = (0x1 << ways_max) - 1;
+ uint32_t mask = (valid_mask << ways) & valid_mask;
+ if (verbose)
+ printf("Limiting L2 to %d ways\n", ways);
+ for (int i = 0; i < (int)bdk_get_num_cores(node); i++)
+ errors += bdk_l2c_set_core_way_partition(node, i, mask);
+ errors += bdk_l2c_set_hw_way_partition(node, mask);
+ }
+ else
+ {
+ errors++;
+ printf("ERROR: invalid limit_l2_ways %d, must be between %d and %d\n",
+ ways, ways_min, ways_max);
+ }
+ if (errors)
+ puts("ERROR limiting L2 cache ways\n");
+
+ return errors;
+}
+
diff --git a/src/vendorcode/cavium/bdk/libdram/dram-l2c.h b/src/vendorcode/cavium/bdk/libdram/dram-l2c.h
new file mode 100644
index 0000000000..5d2840884b
--- /dev/null
+++ b/src/vendorcode/cavium/bdk/libdram/dram-l2c.h
@@ -0,0 +1,45 @@
+/***********************license start***********************************
+* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights
+* reserved.
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are
+* met:
+*
+* * Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials provided
+* with the distribution.
+*
+* * Neither the name of Cavium Inc. nor the names of
+* its contributors may be used to endorse or promote products
+* derived from this software without specific prior written
+* permission.
+*
+* This Software, including technical data, may be subject to U.S. export
+* control laws, including the U.S. Export Administration Act and its
+* associated regulations, and may be subject to export or import
+* regulations in other countries.
+*
+* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
+* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR
+* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT
+* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY
+* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT
+* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES
+* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR
+* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT,
+* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK
+* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
+***********************license end**************************************/
+
+/**
+ * Functions for controlling L2C. Internal use only.
+ */
+
+extern int limit_l2_ways(bdk_node_t node, int ways, int verbose);
+
diff --git a/src/vendorcode/cavium/bdk/libdram/dram-print.h b/src/vendorcode/cavium/bdk/libdram/dram-print.h
new file mode 100644
index 0000000000..94cdf92fbf
--- /dev/null
+++ b/src/vendorcode/cavium/bdk/libdram/dram-print.h
@@ -0,0 +1,86 @@
+/***********************license start***********************************
+* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights
+* reserved.
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are
+* met:
+*
+* * Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials provided
+* with the distribution.
+*
+* * Neither the name of Cavium Inc. nor the names of
+* its contributors may be used to endorse or promote products
+* derived from this software without specific prior written
+* permission.
+*
+* This Software, including technical data, may be subject to U.S. export
+* control laws, including the U.S. Export Administration Act and its
+* associated regulations, and may be subject to export or import
+* regulations in other countries.
+*
+* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
+* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR
+* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT
+* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY
+* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT
+* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES
+* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR
+* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT,
+* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK
+* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
+***********************license end**************************************/
+
+/**
+ * Functions for diplaying output in libdram. Internal use only.
+ */
+
+typedef enum {
+ // low 4 bits are verbosity level
+ VBL_OFF = 0, // use this only to init dram_verbosity
+ VBL_ALL = 0, // use this only in VBL_PR() to get printf equiv
+ VBL_NORM = 1,
+ VBL_FAE = 2,
+ VBL_TME = 3,
+ VBL_DEV = 4,
+ VBL_DEV2 = 5,
+ VBL_DEV3 = 6,
+ VBL_DEV4 = 7,
+ VBL_NONE = 15, // use this only in VBL_PR() to get no printing
+ // upper 4 bits are special verbosities
+ VBL_SEQ = 16,
+ VBL_CSRS = 32,
+ VBL_SPECIAL = 48,
+ // force at least 8 bits for enum
+ VBL_LAST = 255
+} dram_verbosity_t;
+
+extern dram_verbosity_t dram_verbosity;
+
+// "level" should be 1-7, or only one of the special bits
+// let the compiler optimize the test for verbosity
+#define is_verbosity_level(level) ((int)(dram_verbosity & 0x0f) >= (level))
+#define is_verbosity_special(level) (((int)(dram_verbosity & 0xf0) & (level)) != 0)
+#define dram_is_verbose(level) (((level) & VBL_SPECIAL) ? is_verbosity_special(level) : is_verbosity_level(level))
+
+#define VB_PRT(level, format, ...) \
+ do { \
+ if (dram_is_verbose(level)) \
+ printf(format, ##__VA_ARGS__); \
+ } while (0)
+
+#define ddr_print(format, ...) VB_PRT(VBL_NORM, format, ##__VA_ARGS__)
+
+#define error_print(format, ...) printf(format, ##__VA_ARGS__)
+
+#ifdef DEBUG_DEBUG_PRINT
+ #define debug_print(format, ...) printf(format, ##__VA_ARGS__)
+#else
+ #define debug_print(format, ...) do {} while (0)
+#endif
diff --git a/src/vendorcode/cavium/bdk/libdram/dram-spd.c b/src/vendorcode/cavium/bdk/libdram/dram-spd.c
new file mode 100644
index 0000000000..3717ca1109
--- /dev/null
+++ b/src/vendorcode/cavium/bdk/libdram/dram-spd.c
@@ -0,0 +1,583 @@
+/***********************license start***********************************
+* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights
+* reserved.
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are
+* met:
+*
+* * Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials provided
+* with the distribution.
+*
+* * Neither the name of Cavium Inc. nor the names of
+* its contributors may be used to endorse or promote products
+* derived from this software without specific prior written
+* permission.
+*
+* This Software, including technical data, may be subject to U.S. export
+* control laws, including the U.S. Export Administration Act and its
+* associated regulations, and may be subject to export or import
+* regulations in other countries.
+*
+* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
+* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR
+* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT
+* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY
+* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT
+* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES
+* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR
+* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT,
+* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK
+* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
+***********************license end**************************************/
+#include <bdk.h>
+#include <ctype.h>
+#include "dram-internal.h"
+
+/**
+ * Read the entire contents of a DIMM SPD and store it in the device tree. The
+ * current DRAM config is also updated, so future SPD accesses used the cached
+ * copy.
+ *
+ * @param node Node the DRAM config is for
+ * @param cfg Current DRAM config. Updated with SPD data
+ * @param lmc LMC to read DIMM for
+ * @param dimm DIMM slot for SPD to read
+ *
+ * @return Zero on success, negative on failure
+ */
+int read_entire_spd(bdk_node_t node, dram_config_t *cfg, int lmc, int dimm)
+{
+ /* If pointer to data is provided, use it, otherwise read from SPD over twsi */
+ if (cfg->config[lmc].dimm_config_table[dimm].spd_ptr)
+ return 0;
+ if (!cfg->config[lmc].dimm_config_table[dimm].spd_addr)
+ return -1;
+
+ /* Figure out how to access the SPD */
+ int spd_addr = cfg->config[lmc].dimm_config_table[dimm].spd_addr;
+ int bus = spd_addr >> 12;
+ int address = spd_addr & 0x7f;
+
+ /* Figure out the size we will read */
+ int64_t dev_type = bdk_twsix_read_ia(node, bus, address, DDR4_SPD_KEY_BYTE_DEVICE_TYPE, 1, 1);
+ if (dev_type < 0)
+ return -1; /* No DIMM */
+ int spd_size = (dev_type == 0x0c) ? 512 : 256;
+
+ /* Allocate storage */
+ uint32_t *spd_buf = malloc(spd_size);
+ if (!spd_buf)
+ return -1;
+ uint32_t *ptr = spd_buf;
+
+ for (int bank = 0; bank < (spd_size >> 8); bank++)
+ {
+ /* this should only happen for DDR4, which has a second bank of 256 bytes */
+ if (bank)
+ bdk_twsix_write_ia(node, bus, 0x36 | bank, 0, 2, 1, 0);
+ int bank_size = 256;
+ for (int i = 0; i < bank_size; i += 4)
+ {
+ int64_t data = bdk_twsix_read_ia(node, bus, address, i, 4, 1);
+ if (data < 0)
+ {
+ free(spd_buf);
+ bdk_error("Failed to read SPD data at 0x%x\n", i + (bank << 8));
+ /* Restore the bank to zero */
+ if (bank)
+ bdk_twsix_write_ia(node, bus, 0x36 | 0, 0, 2, 1, 0);
+ return -1;
+ }
+ else
+ *ptr++ = bdk_be32_to_cpu(data);
+ }
+ /* Restore the bank to zero */
+ if (bank)
+ bdk_twsix_write_ia(node, bus, 0x36 | 0, 0, 2, 1, 0);
+ }
+
+ /* Store the SPD in the device tree */
+ bdk_config_set_blob(spd_size, spd_buf, BDK_CONFIG_DDR_SPD_DATA, dimm, lmc, node);
+ cfg->config[lmc].dimm_config_table[dimm].spd_ptr = (void*)spd_buf;
+
+ return 0;
+}
+
+/* Read an DIMM SPD value, either using TWSI to read it from the DIMM, or
+ * from a provided array.
+ */
+int read_spd(bdk_node_t node, const dimm_config_t *dimm_config, int spd_field)
+{
+ /* If pointer to data is provided, use it, otherwise read from SPD over twsi */
+ if (dimm_config->spd_ptr)
+ return dimm_config->spd_ptr[spd_field];
+ else if (dimm_config->spd_addr)
+ {
+ int data;
+ int bus = dimm_config->spd_addr >> 12;
+ int address = dimm_config->spd_addr & 0x7f;
+
+ /* this should only happen for DDR4, which has a second bank of 256 bytes */
+ int bank = (spd_field >> 8) & 1;
+ if (bank) {
+ bdk_twsix_write_ia(node, bus, 0x36 | bank, 0, 2, 1, 0);
+ spd_field %= 256;
+ }
+
+ data = bdk_twsix_read_ia(node, bus, address, spd_field, 1, 1);
+
+ /* Restore the bank to zero */
+ if (bank) {
+ bdk_twsix_write_ia(node, bus, 0x36 | 0, 0, 2, 1, 0);
+ }
+
+ return data;
+ }
+ else
+ return -1;
+}
+
+static uint16_t ddr3_crc16(uint8_t *ptr, int count)
+{
+ /* From DDR3 spd specification */
+ int crc, i;
+ crc = 0;
+ while (--count >= 0)
+ {
+ crc = crc ^ (int)*ptr++ << 8;
+ for (i = 0; i < 8; ++i)
+ if (crc & 0x8000)
+ crc = crc << 1 ^ 0x1021;
+ else
+ crc = crc << 1;
+ }
+ return crc & 0xFFFF;
+}
+
+static int validate_spd_checksum_ddr3(bdk_node_t node, int twsi_addr, int silent)
+{
+ uint8_t spd_data[128];
+ int crc_bytes = 126;
+ uint16_t crc_comp;
+ int i;
+ int rv;
+ int ret = 1;
+ for (i = 0; i < 128; i++)
+ {
+ rv = bdk_twsix_read_ia(node, twsi_addr >> 12, twsi_addr & 0x7f, i, 1, 1);
+ if (rv < 0)
+ return 0; /* TWSI read error */
+ spd_data[i] = (uint8_t)rv;
+ }
+ /* Check byte 0 to see how many bytes checksum is over */
+ if (spd_data[0] & 0x80)
+ crc_bytes = 117;
+
+ crc_comp = ddr3_crc16(spd_data, crc_bytes);
+
+ if (spd_data[DDR3_SPD_CYCLICAL_REDUNDANCY_CODE_LOWER_NIBBLE] != (crc_comp & 0xff) ||
+ spd_data[DDR3_SPD_CYCLICAL_REDUNDANCY_CODE_UPPER_NIBBLE] != (crc_comp >> 8))
+ {
+ if (!silent) {
+ printf("DDR3 SPD CRC error, spd addr: 0x%x, calculated crc: 0x%04x, read crc: 0x%02x%02x\n",
+ twsi_addr, crc_comp,
+ spd_data[DDR3_SPD_CYCLICAL_REDUNDANCY_CODE_UPPER_NIBBLE],
+ spd_data[DDR3_SPD_CYCLICAL_REDUNDANCY_CODE_LOWER_NIBBLE]);
+ }
+ ret = 0;
+ }
+ return ret;
+}
+
+static int validate_spd_checksum(bdk_node_t node, int twsi_addr, int silent)
+{
+ int rv;
+
+ debug_print("Validating DIMM at address 0x%x\n", twsi_addr);
+
+ if (!twsi_addr) return 1; /* return OK if we are not doing real DIMMs */
+
+ /* Look up module type to determine if DDR3 or DDR4 */
+ rv = bdk_twsix_read_ia(node, twsi_addr >> 12, twsi_addr & 0x7f, 2, 1, 1);
+
+ if (rv >= 0xB && rv <= 0xC) /* this is DDR3 or DDR4, do same */
+ return validate_spd_checksum_ddr3(node, twsi_addr, silent);
+
+ if (!silent)
+ printf("Unrecognized DIMM type: 0x%x at spd address: 0x%x\n",
+ rv, twsi_addr);
+
+ return 0;
+}
+
+
+int validate_dimm(bdk_node_t node, const dimm_config_t *dimm_config)
+{
+ int spd_addr;
+
+ spd_addr = dimm_config->spd_addr;
+
+ debug_print("Validating dimm spd addr: 0x%02x spd ptr: %x\n",
+ spd_addr, dimm_config->spd_ptr);
+
+ // if the slot is not possible
+ if (!spd_addr && !dimm_config->spd_ptr)
+ return -1;
+
+ {
+ int val0, val1;
+ int ddr_type = get_ddr_type(node, dimm_config);
+
+ switch (ddr_type)
+ {
+ case DDR3_DRAM: /* DDR3 */
+ case DDR4_DRAM: /* DDR4 */
+
+ debug_print("Validating DDR%d DIMM\n", ((dimm_type >> 2) & 3) + 1);
+
+#define DENSITY_BANKS DDR4_SPD_DENSITY_BANKS // same for DDR3 and DDR4
+#define ROW_COL_BITS DDR4_SPD_ADDRESSING_ROW_COL_BITS // same for DDR3 and DDR4
+
+ val0 = read_spd(node, dimm_config, DENSITY_BANKS);
+ val1 = read_spd(node, dimm_config, ROW_COL_BITS);
+ if (val0 < 0 && val1 < 0) {
+ debug_print("Error reading SPD for DIMM\n");
+ return 0; /* Failed to read dimm */
+ }
+ if (val0 == 0xff && val1 == 0xff) {
+ ddr_print("Blank or unreadable SPD for DIMM\n");
+ return 0; /* Blank SPD or otherwise unreadable device */
+ }
+
+ /* Don't treat bad checksums as fatal. */
+ validate_spd_checksum(node, spd_addr, 0);
+ break;
+
+ case 0x00: /* Terminator detected. Fail silently. */
+ return 0;
+
+ default:
+ debug_print("Unknown DIMM type 0x%x for DIMM @ 0x%x\n",
+ dimm_type, dimm_config->spd_addr);
+ return 0; /* Failed to read dimm */
+ }
+ }
+
+ return 1;
+}
+
+int get_dimm_part_number(char *buffer, bdk_node_t node,
+ const dimm_config_t *dimm_config,
+ int ddr_type)
+{
+ int i;
+ int c;
+ int skipping = 1;
+ int strlen = 0;
+
+#define PART_LIMIT(t) (((t) == DDR4_DRAM) ? 19 : 18)
+#define PART_NUMBER(t) (((t) == DDR4_DRAM) ? DDR4_SPD_MODULE_PART_NUMBER : DDR3_SPD_MODULE_PART_NUMBER)
+
+ int limit = PART_LIMIT(ddr_type);
+ int offset = PART_NUMBER(ddr_type);
+
+ for (i = 0; i < limit; ++i) {
+
+ c = (read_spd(node, dimm_config, offset+i) & 0xff);
+ if (c == 0) // any null, we are done
+ break;
+
+ /* Skip leading spaces. */
+ if (skipping) {
+ if (isspace(c))
+ continue;
+ else
+ skipping = 0;
+ }
+
+ /* Put non-null non-leading-space-skipped char into buffer */
+ buffer[strlen] = c;
+ ++strlen;
+ }
+
+ if (strlen > 0) {
+ i = strlen - 1; // last char put into buf
+ while (i >= 0 && isspace((int)buffer[i])) { // still in buf and a space
+ --i;
+ --strlen;
+ }
+ }
+ buffer[strlen] = 0; /* Insure that the string is terminated */
+
+ return strlen;
+}
+
+uint32_t get_dimm_serial_number(bdk_node_t node, const dimm_config_t *dimm_config, int ddr_type)
+{
+ uint32_t serial_number = 0;
+ int offset;
+
+#define SERIAL_NUMBER(t) (((t) == DDR4_DRAM) ? DDR4_SPD_MODULE_SERIAL_NUMBER : DDR3_SPD_MODULE_SERIAL_NUMBER)
+
+ offset = SERIAL_NUMBER(ddr_type);
+
+ for (int i = 0, j = 24; i < 4; ++i, j -= 8) {
+ serial_number |= ((read_spd(node, dimm_config, offset + i) & 0xff) << j);
+ }
+
+ return serial_number;
+}
+
+static uint32_t get_dimm_checksum(bdk_node_t node, const dimm_config_t *dimm_config, int ddr_type)
+{
+ uint32_t spd_chksum;
+
+#define LOWER_NIBBLE(t) (((t) == DDR4_DRAM) ? DDR4_SPD_CYCLICAL_REDUNDANCY_CODE_LOWER_NIBBLE : DDR3_SPD_CYCLICAL_REDUNDANCY_CODE_LOWER_NIBBLE)
+#define UPPER_NIBBLE(t) (((t) == DDR4_DRAM) ? DDR4_SPD_CYCLICAL_REDUNDANCY_CODE_UPPER_NIBBLE : DDR3_SPD_CYCLICAL_REDUNDANCY_CODE_UPPER_NIBBLE)
+
+ spd_chksum = 0xff & read_spd(node, dimm_config, LOWER_NIBBLE(ddr_type));
+ spd_chksum |= ((0xff & read_spd(node, dimm_config, UPPER_NIBBLE(ddr_type))) << 8);
+
+ return spd_chksum;
+}
+
+static
+void report_common_dimm(bdk_node_t node, const dimm_config_t *dimm_config, int dimm,
+ const char **dimm_types, int ddr_type, char *volt_str,
+ int ddr_interface_num, int num_ranks, int dram_width, int dimm_size_mb)
+{
+ int spd_ecc;
+ unsigned spd_module_type;
+ uint32_t serial_number;
+ char part_number[21]; /* 20 bytes plus string terminator is big enough for either */
+ char *sn_str;
+
+ spd_module_type = get_dimm_module_type(node, dimm_config, ddr_type);
+ spd_ecc = get_dimm_ecc(node, dimm_config, ddr_type);
+
+ (void) get_dimm_part_number(part_number, node, dimm_config, ddr_type);
+
+ serial_number = get_dimm_serial_number(node, dimm_config, ddr_type);
+ if ((serial_number != 0) && (serial_number != 0xffffffff)) {
+ sn_str = "s/n";
+ } else {
+ serial_number = get_dimm_checksum(node, dimm_config, ddr_type);
+ sn_str = "chksum";
+ }
+
+ // FIXME: add output of DIMM rank/width, as in: 2Rx4, 1Rx8, etc
+ printf("N%d.LMC%d.DIMM%d: %d MB, DDR%d %s %dRx%d %s, p/n: %s, %s: %u, %s\n",
+ node, ddr_interface_num, dimm, dimm_size_mb, ddr_type,
+ dimm_types[spd_module_type], num_ranks, dram_width,
+ (spd_ecc ? "ECC" : "non-ECC"), part_number,
+ sn_str, serial_number, volt_str);
+}
+
+const char *ddr3_dimm_types[16] = {
+ /* 0000 */ "Undefined",
+ /* 0001 */ "RDIMM",
+ /* 0010 */ "UDIMM",
+ /* 0011 */ "SO-DIMM",
+ /* 0100 */ "Micro-DIMM",
+ /* 0101 */ "Mini-RDIMM",
+ /* 0110 */ "Mini-UDIMM",
+ /* 0111 */ "Mini-CDIMM",
+ /* 1000 */ "72b-SO-UDIMM",
+ /* 1001 */ "72b-SO-RDIMM",
+ /* 1010 */ "72b-SO-CDIMM"
+ /* 1011 */ "LRDIMM",
+ /* 1100 */ "16b-SO-DIMM",
+ /* 1101 */ "32b-SO-DIMM",
+ /* 1110 */ "Reserved",
+ /* 1111 */ "Reserved"
+};
+
+static
+void report_ddr3_dimm(bdk_node_t node, const dimm_config_t *dimm_config,
+ int dimm, int ddr_interface_num, int num_ranks,
+ int dram_width, int dimm_size_mb)
+{
+ int spd_voltage;
+ char *volt_str;
+
+ spd_voltage = read_spd(node, dimm_config, DDR3_SPD_NOMINAL_VOLTAGE);
+ if ((spd_voltage == 0) || (spd_voltage & 3))
+ volt_str = "1.5V";
+ if (spd_voltage & 2)
+ volt_str = "1.35V";
+ if (spd_voltage & 4)
+ volt_str = "1.2xV";
+
+ report_common_dimm(node, dimm_config, dimm, ddr3_dimm_types,
+ DDR3_DRAM, volt_str, ddr_interface_num,
+ num_ranks, dram_width, dimm_size_mb);
+}
+
+const char *ddr4_dimm_types[16] = {
+ /* 0000 */ "Extended",
+ /* 0001 */ "RDIMM",
+ /* 0010 */ "UDIMM",
+ /* 0011 */ "SO-DIMM",
+ /* 0100 */ "LRDIMM",
+ /* 0101 */ "Mini-RDIMM",
+ /* 0110 */ "Mini-UDIMM",
+ /* 0111 */ "Reserved",
+ /* 1000 */ "72b-SO-RDIMM",
+ /* 1001 */ "72b-SO-UDIMM",
+ /* 1010 */ "Reserved",
+ /* 1011 */ "Reserved",
+ /* 1100 */ "16b-SO-DIMM",
+ /* 1101 */ "32b-SO-DIMM",
+ /* 1110 */ "Reserved",
+ /* 1111 */ "Reserved"
+};
+
+static
+void report_ddr4_dimm(bdk_node_t node, const dimm_config_t *dimm_config,
+ int dimm, int ddr_interface_num, int num_ranks,
+ int dram_width, int dimm_size_mb)
+{
+ int spd_voltage;
+ char *volt_str;
+
+ spd_voltage = read_spd(node, dimm_config, DDR4_SPD_MODULE_NOMINAL_VOLTAGE);
+ if ((spd_voltage == 0x01) || (spd_voltage & 0x02))
+ volt_str = "1.2V";
+ if ((spd_voltage == 0x04) || (spd_voltage & 0x08))
+ volt_str = "TBD1 V";
+ if ((spd_voltage == 0x10) || (spd_voltage & 0x20))
+ volt_str = "TBD2 V";
+
+ report_common_dimm(node, dimm_config, dimm, ddr4_dimm_types,
+ DDR4_DRAM, volt_str, ddr_interface_num,
+ num_ranks, dram_width, dimm_size_mb);
+}
+
+void report_dimm(bdk_node_t node, const dimm_config_t *dimm_config,
+ int dimm, int ddr_interface_num, int num_ranks,
+ int dram_width, int dimm_size_mb)
+{
+ int ddr_type;
+
+ /* ddr_type only indicates DDR4 or DDR3 */
+ ddr_type = get_ddr_type(node, dimm_config);
+
+ if (ddr_type == DDR4_DRAM)
+ report_ddr4_dimm(node, dimm_config, dimm, ddr_interface_num,
+ num_ranks, dram_width, dimm_size_mb);
+ else
+ report_ddr3_dimm(node, dimm_config, dimm, ddr_interface_num,
+ num_ranks, dram_width, dimm_size_mb);
+}
+
+static int
+get_ddr4_spd_speed(bdk_node_t node, const dimm_config_t *dimm_config)
+{
+ int spdMTB = 125;
+ int spdFTB = 1;
+
+ int tCKAVGmin
+ = spdMTB * read_spd(node, dimm_config, DDR4_SPD_MINIMUM_CYCLE_TIME_TCKAVGMIN)
+ + spdFTB * (signed char) read_spd(node, dimm_config, DDR4_SPD_MIN_CYCLE_TIME_FINE_TCKAVGMIN);
+
+ return pretty_psecs_to_mts(tCKAVGmin);
+}
+
+static int
+get_ddr3_spd_speed(bdk_node_t node, const dimm_config_t *dimm_config)
+{
+ int spd_mtb_dividend = 0xff & read_spd(node, dimm_config, DDR3_SPD_MEDIUM_TIMEBASE_DIVIDEND);
+ int spd_mtb_divisor = 0xff & read_spd(node, dimm_config, DDR3_SPD_MEDIUM_TIMEBASE_DIVISOR);
+ int spd_tck_min = 0xff & read_spd(node, dimm_config, DDR3_SPD_MINIMUM_CYCLE_TIME_TCKMIN);
+
+ short ftb_Dividend = read_spd(node, dimm_config, DDR3_SPD_FINE_TIMEBASE_DIVIDEND_DIVISOR) >> 4;
+ short ftb_Divisor = read_spd(node, dimm_config, DDR3_SPD_FINE_TIMEBASE_DIVIDEND_DIVISOR) & 0xf;
+
+ ftb_Divisor = (ftb_Divisor == 0) ? 1 : ftb_Divisor; /* Make sure that it is not 0 */
+
+ int mtb_psec = spd_mtb_dividend * 1000 / spd_mtb_divisor;
+ int tCKmin = mtb_psec * spd_tck_min;
+ tCKmin += ftb_Dividend *
+ (signed char) read_spd(node, dimm_config, DDR3_SPD_MINIMUM_CYCLE_TIME_FINE_TCKMIN)
+ / ftb_Divisor;
+
+ return pretty_psecs_to_mts(tCKmin);
+}
+
+static int
+speed_bin_down(int speed)
+{
+ if (speed == 2133)
+ return 1866;
+ else if (speed == 1866)
+ return 1600;
+ else
+ return speed;
+}
+
+int
+dram_get_default_spd_speed(bdk_node_t node, const ddr_configuration_t *ddr_config)
+{
+ int lmc, dimm;
+ int speed, ret_speed = 0;
+ int ddr_type = get_ddr_type(node, &ddr_config[0].dimm_config_table[0]);
+ int dimm_speed[8], dimm_count = 0;
+ int dimms_per_lmc = 0;
+
+ for (lmc = 0; lmc < 4; lmc++) {
+ for (dimm = 0; dimm < DDR_CFG_T_MAX_DIMMS; dimm++) {
+ const dimm_config_t *dimm_config = &ddr_config[lmc].dimm_config_table[dimm];
+ if (/*dimm_config->spd_addr ||*/ dimm_config->spd_ptr)
+ {
+ speed = (ddr_type == DDR4_DRAM)
+ ? get_ddr4_spd_speed(node, dimm_config)
+ : get_ddr3_spd_speed(node, dimm_config);
+ //printf("N%d.LMC%d.DIMM%d: SPD speed %d\n", node, lmc, dimm, speed);
+ dimm_speed[dimm_count] = speed;
+ dimm_count++;
+ if (lmc == 0)
+ dimms_per_lmc++;
+ }
+ }
+ }
+
+ // all DIMMs must be same speed
+ speed = dimm_speed[0];
+ for (dimm = 1; dimm < dimm_count; dimm++) {
+ if (dimm_speed[dimm] != speed) {
+ ret_speed = -1;
+ goto finish_up;
+ }
+ }
+
+ // if 2400 or greater, use 2133
+ if (speed >= 2400)
+ speed = 2133;
+
+ // use next speed down if 2DPC...
+ if (dimms_per_lmc > 1)
+ speed = speed_bin_down(speed);
+
+ // Update the in memory config to match the automatically calculated speed
+ bdk_config_set_int(speed, BDK_CONFIG_DDR_SPEED, node);
+
+ // do filtering for our jittery PLL
+ if (speed == 2133)
+ speed = 2100;
+ else if (speed == 1866)
+ speed = 1880;
+
+ // OK, return what we have...
+ ret_speed = mts_to_hertz(speed);
+
+ finish_up:
+ //printf("N%d: Returning default SPD speed %d\n", node, ret_speed);
+ return ret_speed;
+}
diff --git a/src/vendorcode/cavium/bdk/libdram/dram-spd.h b/src/vendorcode/cavium/bdk/libdram/dram-spd.h
new file mode 100644
index 0000000000..df229f4959
--- /dev/null
+++ b/src/vendorcode/cavium/bdk/libdram/dram-spd.h
@@ -0,0 +1,166 @@
+/***********************license start***********************************
+* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights
+* reserved.
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are
+* met:
+*
+* * Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials provided
+* with the distribution.
+*
+* * Neither the name of Cavium Inc. nor the names of
+* its contributors may be used to endorse or promote products
+* derived from this software without specific prior written
+* permission.
+*
+* This Software, including technical data, may be subject to U.S. export
+* control laws, including the U.S. Export Administration Act and its
+* associated regulations, and may be subject to export or import
+* regulations in other countries.
+*
+* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
+* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR
+* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT
+* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY
+* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT
+* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES
+* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR
+* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT,
+* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK
+* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
+***********************license end**************************************/
+
+/**
+ * Functions, enumarations, and structures related to DIMM SPDs.
+ * Everything in this file is internal to libdram.
+ */
+
+/* data field addresses in the DDR3 SPD eeprom */
+typedef enum ddr3_spd_addrs {
+ DDR3_SPD_BYTES_PROGRAMMED = 0,
+ DDR3_SPD_REVISION = 1,
+ DDR3_SPD_KEY_BYTE_DEVICE_TYPE = 2,
+ DDR3_SPD_KEY_BYTE_MODULE_TYPE = 3,
+ DDR3_SPD_DENSITY_BANKS = 4,
+ DDR3_SPD_ADDRESSING_ROW_COL_BITS = 5,
+ DDR3_SPD_NOMINAL_VOLTAGE = 6,
+ DDR3_SPD_MODULE_ORGANIZATION = 7,
+ DDR3_SPD_MEMORY_BUS_WIDTH = 8,
+ DDR3_SPD_FINE_TIMEBASE_DIVIDEND_DIVISOR = 9,
+ DDR3_SPD_MEDIUM_TIMEBASE_DIVIDEND = 10,
+ DDR3_SPD_MEDIUM_TIMEBASE_DIVISOR = 11,
+ DDR3_SPD_MINIMUM_CYCLE_TIME_TCKMIN = 12,
+ DDR3_SPD_CAS_LATENCIES_LSB = 14,
+ DDR3_SPD_CAS_LATENCIES_MSB = 15,
+ DDR3_SPD_MIN_CAS_LATENCY_TAAMIN = 16,
+ DDR3_SPD_MIN_WRITE_RECOVERY_TWRMIN = 17,
+ DDR3_SPD_MIN_RAS_CAS_DELAY_TRCDMIN = 18,
+ DDR3_SPD_MIN_ROW_ACTIVE_DELAY_TRRDMIN = 19,
+ DDR3_SPD_MIN_ROW_PRECHARGE_DELAY_TRPMIN = 20,
+ DDR3_SPD_UPPER_NIBBLES_TRAS_TRC = 21,
+ DDR3_SPD_MIN_ACTIVE_PRECHARGE_LSB_TRASMIN = 22,
+ DDR3_SPD_MIN_ACTIVE_REFRESH_LSB_TRCMIN = 23,
+ DDR3_SPD_MIN_REFRESH_RECOVERY_LSB_TRFCMIN = 24,
+ DDR3_SPD_MIN_REFRESH_RECOVERY_MSB_TRFCMIN = 25,
+ DDR3_SPD_MIN_INTERNAL_WRITE_READ_CMD_TWTRMIN = 26,
+ DDR3_SPD_MIN_INTERNAL_READ_PRECHARGE_CMD_TRTPMIN = 27,
+ DDR3_SPD_UPPER_NIBBLE_TFAW = 28,
+ DDR3_SPD_MIN_FOUR_ACTIVE_WINDOW_TFAWMIN = 29,
+ DDR3_SPD_MINIMUM_CYCLE_TIME_FINE_TCKMIN = 34,
+ DDR3_SPD_MIN_CAS_LATENCY_FINE_TAAMIN = 35,
+ DDR3_SPD_MIN_RAS_CAS_DELAY_FINE_TRCDMIN = 36,
+ DDR3_SPD_MIN_ROW_PRECHARGE_DELAY_FINE_TRPMIN = 37,
+ DDR3_SPD_MIN_ACTIVE_REFRESH_LSB_FINE_TRCMIN = 38,
+ DDR3_SPD_ADDRESS_MAPPING = 63,
+ DDR3_SPD_MODULE_SERIAL_NUMBER = 122,
+ DDR3_SPD_CYCLICAL_REDUNDANCY_CODE_LOWER_NIBBLE = 126,
+ DDR3_SPD_CYCLICAL_REDUNDANCY_CODE_UPPER_NIBBLE = 127,
+ DDR3_SPD_MODULE_PART_NUMBER = 128
+} ddr3_spd_addr_t;
+
+/* data field addresses in the DDR4 SPD eeprom */
+typedef enum ddr4_spd_addrs {
+ DDR4_SPD_BYTES_PROGRAMMED = 0,
+ DDR4_SPD_REVISION = 1,
+ DDR4_SPD_KEY_BYTE_DEVICE_TYPE = 2,
+ DDR4_SPD_KEY_BYTE_MODULE_TYPE = 3,
+ DDR4_SPD_DENSITY_BANKS = 4,
+ DDR4_SPD_ADDRESSING_ROW_COL_BITS = 5,
+ DDR4_SPD_PACKAGE_TYPE = 6,
+ DDR4_SPD_OPTIONAL_FEATURES = 7,
+ DDR4_SPD_THERMAL_REFRESH_OPTIONS = 8,
+ DDR4_SPD_OTHER_OPTIONAL_FEATURES = 9,
+ DDR4_SPD_SECONDARY_PACKAGE_TYPE = 10,
+ DDR4_SPD_MODULE_NOMINAL_VOLTAGE = 11,
+ DDR4_SPD_MODULE_ORGANIZATION = 12,
+ DDR4_SPD_MODULE_MEMORY_BUS_WIDTH = 13,
+ DDR4_SPD_MODULE_THERMAL_SENSOR = 14,
+ DDR4_SPD_RESERVED_BYTE15 = 15,
+ DDR4_SPD_RESERVED_BYTE16 = 16,
+ DDR4_SPD_TIMEBASES = 17,
+ DDR4_SPD_MINIMUM_CYCLE_TIME_TCKAVGMIN = 18,
+ DDR4_SPD_MAXIMUM_CYCLE_TIME_TCKAVGMAX = 19,
+ DDR4_SPD_CAS_LATENCIES_BYTE0 = 20,
+ DDR4_SPD_CAS_LATENCIES_BYTE1 = 21,
+ DDR4_SPD_CAS_LATENCIES_BYTE2 = 22,
+ DDR4_SPD_CAS_LATENCIES_BYTE3 = 23,
+ DDR4_SPD_MIN_CAS_LATENCY_TAAMIN = 24,
+ DDR4_SPD_MIN_RAS_CAS_DELAY_TRCDMIN = 25,
+ DDR4_SPD_MIN_ROW_PRECHARGE_DELAY_TRPMIN = 26,
+ DDR4_SPD_UPPER_NIBBLES_TRAS_TRC = 27,
+ DDR4_SPD_MIN_ACTIVE_PRECHARGE_LSB_TRASMIN = 28,
+ DDR4_SPD_MIN_ACTIVE_REFRESH_LSB_TRCMIN = 29,
+ DDR4_SPD_MIN_REFRESH_RECOVERY_LSB_TRFC1MIN = 30,
+ DDR4_SPD_MIN_REFRESH_RECOVERY_MSB_TRFC1MIN = 31,
+ DDR4_SPD_MIN_REFRESH_RECOVERY_LSB_TRFC2MIN = 32,
+ DDR4_SPD_MIN_REFRESH_RECOVERY_MSB_TRFC2MIN = 33,
+ DDR4_SPD_MIN_REFRESH_RECOVERY_LSB_TRFC4MIN = 34,
+ DDR4_SPD_MIN_REFRESH_RECOVERY_MSB_TRFC4MIN = 35,
+ DDR4_SPD_MIN_FOUR_ACTIVE_WINDOW_MSN_TFAWMIN = 36,
+ DDR4_SPD_MIN_FOUR_ACTIVE_WINDOW_LSB_TFAWMIN = 37,
+ DDR4_SPD_MIN_ROW_ACTIVE_DELAY_SAME_TRRD_SMIN = 38,
+ DDR4_SPD_MIN_ROW_ACTIVE_DELAY_DIFF_TRRD_LMIN = 39,
+ DDR4_SPD_MIN_CAS_TO_CAS_DELAY_TCCD_LMIN = 40,
+ DDR4_SPD_MIN_CAS_TO_CAS_DELAY_FINE_TCCD_LMIN = 117,
+ DDR4_SPD_MIN_ACT_TO_ACT_DELAY_SAME_FINE_TRRD_LMIN = 118,
+ DDR4_SPD_MIN_ACT_TO_ACT_DELAY_DIFF_FINE_TRRD_SMIN = 119,
+ DDR4_SPD_MIN_ACT_TO_ACT_REFRESH_DELAY_FINE_TRCMIN = 120,
+ DDR4_SPD_MIN_ROW_PRECHARGE_DELAY_FINE_TRPMIN = 121,
+ DDR4_SPD_MIN_RAS_TO_CAS_DELAY_FINE_TRCDMIN = 122,
+ DDR4_SPD_MIN_CAS_LATENCY_FINE_TAAMIN = 123,
+ DDR4_SPD_MAX_CYCLE_TIME_FINE_TCKAVGMAX = 124,
+ DDR4_SPD_MIN_CYCLE_TIME_FINE_TCKAVGMIN = 125,
+ DDR4_SPD_CYCLICAL_REDUNDANCY_CODE_LOWER_NIBBLE = 126,
+ DDR4_SPD_CYCLICAL_REDUNDANCY_CODE_UPPER_NIBBLE = 127,
+ DDR4_SPD_REFERENCE_RAW_CARD = 130,
+ DDR4_SPD_UDIMM_ADDR_MAPPING_FROM_EDGE = 131,
+ DDR4_SPD_REGISTER_MANUFACTURER_ID_LSB = 133,
+ DDR4_SPD_REGISTER_MANUFACTURER_ID_MSB = 134,
+ DDR4_SPD_REGISTER_REVISION_NUMBER = 135,
+ DDR4_SPD_RDIMM_ADDR_MAPPING_FROM_REGISTER_TO_DRAM = 136,
+ DDR4_SPD_RDIMM_REGISTER_DRIVE_STRENGTH_CTL = 137,
+ DDR4_SPD_RDIMM_REGISTER_DRIVE_STRENGTH_CK = 138,
+ DDR4_SPD_MODULE_SERIAL_NUMBER = 325,
+ DDR4_SPD_MODULE_PART_NUMBER = 329
+} ddr4_spd_addr_t;
+
+extern int read_entire_spd(bdk_node_t node, dram_config_t *cfg, int lmc, int dimm);
+extern int read_spd(bdk_node_t node, const dimm_config_t *dimm_config, int spd_field);
+
+extern int validate_dimm(bdk_node_t node, const dimm_config_t *dimm_config);
+
+extern void report_dimm(bdk_node_t node, const dimm_config_t *dimm_config,
+ int dimm, int ddr_interface_num, int num_ranks,
+ int dram_width, int dimm_size_mb);
+
+extern int dram_get_default_spd_speed(bdk_node_t node, const ddr_configuration_t *ddr_config);
+
+extern const char *ddr3_dimm_types[];
+extern const char *ddr4_dimm_types[];
diff --git a/src/vendorcode/cavium/bdk/libdram/dram-tune-ddr3.c b/src/vendorcode/cavium/bdk/libdram/dram-tune-ddr3.c
new file mode 100644
index 0000000000..e0e9d4442c
--- /dev/null
+++ b/src/vendorcode/cavium/bdk/libdram/dram-tune-ddr3.c
@@ -0,0 +1,2012 @@
+/***********************license start***********************************
+* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights
+* reserved.
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are
+* met:
+*
+* * Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials provided
+* with the distribution.
+*
+* * Neither the name of Cavium Inc. nor the names of
+* its contributors may be used to endorse or promote products
+* derived from this software without specific prior written
+* permission.
+*
+* This Software, including technical data, may be subject to U.S. export
+* control laws, including the U.S. Export Administration Act and its
+* associated regulations, and may be subject to export or import
+* regulations in other countries.
+*
+* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
+* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR
+* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT
+* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY
+* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT
+* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES
+* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR
+* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT,
+* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK
+* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
+***********************license end**************************************/
+#include <bdk.h>
+#include "dram-internal.h"
+
+// if enhanced verbosity levels are defined, use them
+#if defined(VB_PRT)
+#define ddr_print2(format, ...) VB_PRT(VBL_FAE, format, ##__VA_ARGS__)
+#define ddr_print3(format, ...) VB_PRT(VBL_TME, format, ##__VA_ARGS__)
+#define ddr_print4(format, ...) VB_PRT(VBL_DEV, format, ##__VA_ARGS__)
+#define ddr_print5(format, ...) VB_PRT(VBL_DEV3, format, ##__VA_ARGS__)
+#else
+#define ddr_print2 ddr_print
+#define ddr_print4 ddr_print
+#define ddr_print5 ddr_print
+#endif
+
+static int64_t test_dram_byte_threads_done;
+static uint64_t test_dram_byte_threads_errs;
+static uint64_t test_dram_byte_lmc_errs[4];
+
+#if 0
+/*
+ * Suggested testing patterns.
+ */
+static const uint64_t test_pattern_2[] = {
+ 0xFFFFFFFFFFFFFFFFULL,
+ 0xAAAAAAAAAAAAAAAAULL,
+ 0xFFFFFFFFFFFFFFFFULL,
+ 0xAAAAAAAAAAAAAAAAULL,
+ 0x5555555555555555ULL,
+ 0xAAAAAAAAAAAAAAAAULL,
+ 0xFFFFFFFFFFFFFFFFULL,
+ 0xAAAAAAAAAAAAAAAAULL,
+ 0xFFFFFFFFFFFFFFFFULL,
+ 0x5555555555555555ULL,
+ 0xFFFFFFFFFFFFFFFFULL,
+ 0x5555555555555555ULL,
+ 0xAAAAAAAAAAAAAAAAULL,
+ 0x5555555555555555ULL,
+ 0xFFFFFFFFFFFFFFFFULL,
+ 0x5555555555555555ULL,
+};
+ /*
+ * or possibly
+ */
+static const uint64_t test_pattern_3[] = {
+ 0xFDFDFDFDFDFDFDFDULL,
+ 0x8787878787878787ULL,
+ 0xFEFEFEFEFEFEFEFEULL,
+ 0xC3C3C3C3C3C3C3C3ULL,
+ 0x7F7F7F7F7F7F7F7FULL,
+ 0xE1E1E1E1E1E1E1E1ULL,
+ 0xBFBFBFBFBFBFBFBFULL,
+ 0xF0F0F0F0F0F0F0F0ULL,
+ 0xDFDFDFDFDFDFDFDFULL,
+ 0x7878787878787878ULL,
+ 0xEFEFEFEFEFEFEFEFULL,
+ 0x3C3C3C3C3C3C3C3CULL,
+ 0xF7F7F7F7F7F7F7F7ULL,
+ 0x1E1E1E1E1E1E1E1EULL,
+ 0xFBFBFBFBFBFBFBFBULL,
+ 0x0F0F0F0F0F0F0F0FULL,
+};
+
+static const uint64_t test_pattern_1[] = {
+ 0xAAAAAAAAAAAAAAAAULL,
+ 0x5555555555555555ULL,
+ 0xAAAAAAAAAAAAAAAAULL,
+ 0x5555555555555555ULL,
+ 0xAAAAAAAAAAAAAAAAULL,
+ 0x5555555555555555ULL,
+ 0xAAAAAAAAAAAAAAAAULL,
+ 0x5555555555555555ULL,
+ 0xAAAAAAAAAAAAAAAAULL,
+ 0x5555555555555555ULL,
+ 0xAAAAAAAAAAAAAAAAULL,
+ 0x5555555555555555ULL,
+ 0xAAAAAAAAAAAAAAAAULL,
+ 0x5555555555555555ULL,
+ 0xAAAAAAAAAAAAAAAAULL,
+ 0x5555555555555555ULL,
+#if 0 // only need a cacheline size
+ 0xAAAAAAAAAAAAAAAAULL,
+ 0x5555555555555555ULL,
+ 0xAAAAAAAAAAAAAAAAULL,
+ 0x5555555555555555ULL,
+ 0xAAAAAAAAAAAAAAAAULL,
+ 0x5555555555555555ULL,
+ 0xAAAAAAAAAAAAAAAAULL,
+ 0x5555555555555555ULL,
+ 0xAAAAAAAAAAAAAAAAULL,
+ 0x5555555555555555ULL,
+ 0xAAAAAAAAAAAAAAAAULL,
+ 0x5555555555555555ULL,
+ 0xAAAAAAAAAAAAAAAAULL,
+ 0x5555555555555555ULL,
+ 0xAAAAAAAAAAAAAAAAULL,
+ 0x5555555555555555ULL,
+#endif
+};
+
+// setup default for test pattern array
+static const uint64_t *dram_tune_test_pattern = test_pattern_1;
+#endif
+
+// set this to 1 to shorten the testing to exit when all byte lanes have errors
+// having this at 0 forces the testing to take place over the entire range every iteration,
+// hopefully ensuring an even load on the memory subsystem
+#define EXIT_WHEN_ALL_LANES_HAVE_ERRORS 0
+
+#define DEFAULT_TEST_BURSTS 5 // FIXME: this is what works so far...// FIXME: was 7
+int dram_tune_use_bursts = DEFAULT_TEST_BURSTS;
+
+// dram_tune_rank_offset is used to offset the second area used in test_dram_mem_xor.
+//
+// If only a single-rank DIMM, the offset will be 256MB from the start of the first area,
+// which is more than enough for the restricted looping/address range actually tested...
+//
+// If a 2-rank DIMM, the offset will be the size of a rank's address space, so the effect
+// will be to have the first and second areas in different ranks on the same DIMM.
+//
+// So, we default this to single-rank, and it will be overridden when 2-ranks are detected.
+//
+
+// FIXME: ASSUME that we have DIMMS no less than 4GB in size
+
+// offset to first area that avoids any boot stuff in low range (below 256MB)
+#define AREA_BASE_OFFSET (1ULL << 28) // bit 28 always ON
+
+// offset to duplicate area; may coincide with rank 1 base address for 2-rank 4GB DIMM
+#define AREA_DUPE_OFFSET (1ULL << 31) // bit 31 always ON
+
+// defaults to DUPE, but will be set elsewhere to offset to next RANK if multi-rank DIMM
+static uint64_t dram_tune_rank_offset = AREA_DUPE_OFFSET; // default
+
+// defaults to 0, but will be set elsewhere to the address offset to next DIMM if multi-slot
+static uint64_t dram_tune_dimm_offset = 0; // default
+
+
+static int speed_bin_offset[3] = {25, 20, 15};
+static int speed_bin_winlen[3] = {70, 60, 60};
+
+static int
+get_speed_bin(bdk_node_t node, int lmc)
+{
+ uint32_t mts_speed = (libdram_get_freq_from_pll(node, lmc) / 1000000) * 2;
+ int ret = 0;
+
+ // FIXME: is this reasonable speed "binning"?
+ if (mts_speed >= 1700) {
+ if (mts_speed >= 2000)
+ ret = 2;
+ else
+ ret = 1;
+ }
+
+ debug_print("N%d.LMC%d: %s: returning bin %d for MTS %d\n",
+ node, lmc, __FUNCTION__, ret, mts_speed);
+
+ return ret;
+}
+
+static int is_low_risk_offset(int speed_bin, int offset)
+{
+ return (_abs(offset) <= speed_bin_offset[speed_bin]);
+}
+static int is_low_risk_winlen(int speed_bin, int winlen)
+{
+ return (winlen >= speed_bin_winlen[speed_bin]);
+}
+
+#define ENABLE_PREFETCH 0
+#define ENABLE_WBIL2 1
+#define ENABLE_SBLKDTY 0
+
+#define BDK_SYS_CVMCACHE_INV_L2 "#0,c11,c1,#1" // L2 Cache Invalidate
+#define BDK_CACHE_INV_L2(address) { asm volatile ("sys " BDK_SYS_CVMCACHE_INV_L2 ", %0" : : "r" (address)); }
+
+int dram_tuning_mem_xor(bdk_node_t node, int lmc, uint64_t p, uint64_t bitmask, uint64_t *xor_data)
+{
+ uint64_t p1, p2, d1, d2;
+ uint64_t v, v1;
+ uint64_t p2offset = 0x10000000/* was: dram_tune_rank_offset; */; // FIXME?
+ uint64_t datamask;
+ uint64_t xor;
+ uint64_t i, j, k;
+ uint64_t ii;
+ int errors = 0;
+ //uint64_t index;
+ uint64_t pattern1 = bdk_rng_get_random64();
+ uint64_t pattern2 = 0;
+ uint64_t bad_bits[2] = {0,0};
+
+#if ENABLE_SBLKDTY
+ BDK_CSR_MODIFY(c, node, BDK_L2C_CTL, c.s.dissblkdty = 0);
+#endif
+
+ // Byte lanes may be clear in the mask to indicate no testing on that lane.
+ datamask = bitmask;
+
+ // final address must include LMC and node
+ p |= (lmc<<7); /* Map address into proper interface */
+ p = bdk_numa_get_address(node, p); /* Map to node */
+
+ /* Add offset to both test regions to not clobber boot stuff
+ * when running from L2 for NAND boot.
+ */
+ p += AREA_BASE_OFFSET; // make sure base is out of the way of boot
+
+#define II_INC (1ULL << 29)
+#define II_MAX (1ULL << 31)
+#define K_INC (1ULL << 14)
+#define K_MAX (1ULL << 20)
+#define J_INC (1ULL << 9)
+#define J_MAX (1ULL << 12)
+#define I_INC (1ULL << 3)
+#define I_MAX (1ULL << 7)
+
+ debug_print("N%d.LMC%d: dram_tuning_mem_xor: phys_addr=0x%lx\n",
+ node, lmc, p);
+
+#if 0
+ int ix;
+ // add this loop to fill memory with the test pattern first
+ // loops are ordered so that only entire cachelines are written
+ for (ii = 0; ii < II_MAX; ii += II_INC) { // FIXME? extend the range of memory tested!!
+ for (k = 0; k < K_MAX; k += K_INC) {
+ for (j = 0; j < J_MAX; j += J_INC) {
+ p1 = p + ii + k + j;
+ p2 = p1 + p2offset;
+ for (i = 0, ix = 0; i < I_MAX; i += I_INC, ix++) {
+
+ v = dram_tune_test_pattern[ix];
+ v1 = v; // write the same thing to both areas
+
+ __bdk_dram_write64(p1 + i, v);
+ __bdk_dram_write64(p2 + i, v1);
+
+ }
+#if ENABLE_WBIL2
+ BDK_CACHE_WBI_L2(p1);
+ BDK_CACHE_WBI_L2(p2);
+#endif
+ }
+ }
+ } /* for (ii = 0; ii < (1ULL << 31); ii += (1ULL << 29)) */
+#endif
+
+#if ENABLE_PREFETCH
+ BDK_PREFETCH(p , BDK_CACHE_LINE_SIZE);
+ BDK_PREFETCH(p + p2offset, BDK_CACHE_LINE_SIZE);
+#endif
+
+ // loops are ordered so that only a single 64-bit slot is written to each cacheline at one time,
+ // then the cachelines are forced out; this should maximize read/write traffic
+ for (ii = 0; ii < II_MAX; ii += II_INC) { // FIXME? extend the range of memory tested!!
+ for (k = 0; k < K_MAX; k += K_INC) {
+ for (i = 0; i < I_MAX; i += I_INC) {
+ for (j = 0; j < J_MAX; j += J_INC) {
+
+ p1 = p + ii + k + j;
+ p2 = p1 + p2offset;
+
+#if ENABLE_PREFETCH
+ if (j < (J_MAX - J_INC)) {
+ BDK_PREFETCH(p1 + J_INC, BDK_CACHE_LINE_SIZE);
+ BDK_PREFETCH(p2 + J_INC, BDK_CACHE_LINE_SIZE);
+ }
+#endif
+
+ v = pattern1 * (p1 + i);
+ v1 = v; // write the same thing to both areas
+
+ __bdk_dram_write64(p1 + i, v);
+ __bdk_dram_write64(p2 + i, v1);
+
+#if ENABLE_WBIL2
+ BDK_CACHE_WBI_L2(p1);
+ BDK_CACHE_WBI_L2(p2);
+#endif
+ }
+ }
+ }
+ } /* for (ii = 0; ii < (1ULL << 31); ii += (1ULL << 29)) */
+
+ BDK_DCACHE_INVALIDATE;
+
+ debug_print("N%d.LMC%d: dram_tuning_mem_xor: done INIT loop\n",
+ node, lmc);
+
+ /* Make a series of passes over the memory areas. */
+
+ for (int burst = 0; burst < 1/* was: dram_tune_use_bursts*/; burst++)
+ {
+ uint64_t this_pattern = bdk_rng_get_random64();
+ pattern2 ^= this_pattern;
+
+ /* XOR the data with a random value, applying the change to both
+ * memory areas.
+ */
+#if ENABLE_PREFETCH
+ BDK_PREFETCH(p , BDK_CACHE_LINE_SIZE);
+ BDK_PREFETCH(p + p2offset, BDK_CACHE_LINE_SIZE);
+#endif
+
+ for (ii = 0; ii < II_MAX; ii += II_INC) { // FIXME? extend the range of memory tested!!
+ for (k = 0; k < K_MAX; k += K_INC) {
+ for (i = 0; i < I_MAX; i += I_INC) { // FIXME: rearranged, did not make much difference?
+ for (j = 0; j < J_MAX; j += J_INC) {
+
+ p1 = p + ii + k + j;
+ p2 = p1 + p2offset;
+
+#if ENABLE_PREFETCH
+ if (j < (J_MAX - J_INC)) {
+ BDK_PREFETCH(p1 + J_INC, BDK_CACHE_LINE_SIZE);
+ BDK_PREFETCH(p2 + J_INC, BDK_CACHE_LINE_SIZE);
+ }
+#endif
+
+ v = __bdk_dram_read64(p1 + i) ^ this_pattern;
+ v1 = __bdk_dram_read64(p2 + i) ^ this_pattern;
+
+#if ENABLE_WBIL2
+ BDK_CACHE_INV_L2(p1);
+ BDK_CACHE_INV_L2(p2);
+#endif
+
+ __bdk_dram_write64(p1 + i, v);
+ __bdk_dram_write64(p2 + i, v1);
+
+#if ENABLE_WBIL2
+ BDK_CACHE_WBI_L2(p1);
+ BDK_CACHE_WBI_L2(p2);
+#endif
+ }
+ }
+ }
+ } /* for (ii = 0; ii < (1ULL << 31); ii += (1ULL << 29)) */
+
+ BDK_DCACHE_INVALIDATE;
+
+ debug_print("N%d.LMC%d: dram_tuning_mem_xor: done MODIFY loop\n",
+ node, lmc);
+
+#if ENABLE_PREFETCH
+ BDK_PREFETCH(p , BDK_CACHE_LINE_SIZE);
+ BDK_PREFETCH(p + p2offset, BDK_CACHE_LINE_SIZE);
+#endif
+
+ /* Look for differences in the areas. If there is a mismatch, reset
+ * both memory locations with the same pattern. Failing to do so
+ * means that on all subsequent passes the pair of locations remain
+ * out of sync giving spurious errors.
+ */
+ // FIXME: change the loop order so that an entire cache line is compared at one time
+ // FIXME: this is so that a read error that occurs *anywhere* on the cacheline will be caught,
+ // FIXME: rather than comparing only 1 cacheline slot at a time, where an error on a different
+ // FIXME: slot will be missed that time around
+ // Does the above make sense?
+
+ for (ii = 0; ii < II_MAX; ii += II_INC) { // FIXME? extend the range of memory tested!!
+ for (k = 0; k < K_MAX; k += K_INC) {
+ for (j = 0; j < J_MAX; j += J_INC) {
+
+ p1 = p + ii + k + j;
+ p2 = p1 + p2offset;
+
+#if ENABLE_PREFETCH
+ if (j < (J_MAX - J_INC)) {
+ BDK_PREFETCH(p1 + J_INC, BDK_CACHE_LINE_SIZE);
+ BDK_PREFETCH(p2 + J_INC, BDK_CACHE_LINE_SIZE);
+ }
+#endif
+
+ // process entire cachelines in the innermost loop
+ for (i = 0; i < I_MAX; i += I_INC) {
+
+ v = ((p1 + i) * pattern1) ^ pattern2; // FIXME: this should predict what we find...???
+ d1 = __bdk_dram_read64(p1 + i);
+ d2 = __bdk_dram_read64(p2 + i);
+
+ xor = ((d1 ^ v) | (d2 ^ v)) & datamask; // union of error bits only in active byte lanes
+
+ if (!xor)
+ continue;
+
+ // accumulate bad bits
+ bad_bits[0] |= xor;
+ //bad_bits[1] |= ~mpr_data1 & 0xffUL; // cannot do ECC here
+
+ int bybit = 1;
+ uint64_t bymsk = 0xffULL; // start in byte lane 0
+ while (xor != 0) {
+ debug_print("ERROR(%03d): [0x%016lX] [0x%016lX] expected 0x%016lX d1 %016lX d2 %016lX\n",
+ burst, p1, p2, v, d1, d2);
+ if (xor & bymsk) { // error(s) in this lane
+ errors |= bybit; // set the byte error bit
+ xor &= ~bymsk; // clear byte lane in error bits
+ datamask &= ~bymsk; // clear the byte lane in the mask
+#if EXIT_WHEN_ALL_LANES_HAVE_ERRORS
+ if (datamask == 0) { // nothing left to do
+ return errors; // completely done when errors found in all byte lanes in datamask
+ }
+#endif /* EXIT_WHEN_ALL_LANES_HAVE_ERRORS */
+ }
+ bymsk <<= 8; // move mask into next byte lane
+ bybit <<= 1; // move bit into next byte position
+ }
+ }
+#if ENABLE_WBIL2
+ BDK_CACHE_WBI_L2(p1);
+ BDK_CACHE_WBI_L2(p2);
+#endif
+ }
+ }
+ } /* for (ii = 0; ii < (1ULL << 31); ii += (1ULL << 29)) */
+
+ debug_print("N%d.LMC%d: dram_tuning_mem_xor: done TEST loop\n",
+ node, lmc);
+
+ } /* for (int burst = 0; burst < dram_tune_use_bursts; burst++) */
+
+ if (xor_data != NULL) { // send the bad bits back...
+ xor_data[0] = bad_bits[0];
+ xor_data[1] = bad_bits[1]; // let it be zeroed
+ }
+
+#if ENABLE_SBLKDTY
+ BDK_CSR_MODIFY(c, node, BDK_L2C_CTL, c.s.dissblkdty = 1);
+#endif
+
+ return errors;
+}
+
+#undef II_INC
+#undef II_MAX
+
+#define EXTRACT(v, lsb, width) (((v) >> (lsb)) & ((1ull << (width)) - 1))
+#define LMCNO(address, xbits) (EXTRACT(address, 7, xbits) ^ EXTRACT(address, 20, xbits) ^ EXTRACT(address, 12, xbits))
+
+static int dram_tuning_mem_xor2(uint64_t p, uint64_t bitmask, int xbits)
+{
+ uint64_t p1, p2, d1, d2;
+ uint64_t v, vpred;
+ uint64_t p2offset = dram_tune_rank_offset; // FIXME?
+ uint64_t datamask;
+ uint64_t xor;
+ uint64_t ii;
+ uint64_t pattern1 = bdk_rng_get_random64();
+ uint64_t pattern2 = 0;
+ int errors = 0;
+ int errs_by_lmc[4] = { 0,0,0,0 };
+ int lmc;
+ uint64_t vbase, vincr;
+
+ // Byte lanes may be clear in the mask to indicate no testing on that lane.
+ datamask = bitmask;
+
+ /* Add offset to both test regions to not clobber boot stuff
+ * when running from L2 for NAND boot.
+ */
+ p += AREA_BASE_OFFSET; // make sure base is out of the way of boot
+
+ // move the multiplies outside the loop
+ vbase = p * pattern1;
+ vincr = 8 * pattern1;
+
+#define II_INC (1ULL << 3)
+#define II_MAX (1ULL << 22) // stop where the core ID bits start
+
+ // walk the memory areas by 8-byte words
+ v = vbase;
+ for (ii = 0; ii < II_MAX; ii += II_INC) {
+
+ p1 = p + ii;
+ p2 = p1 + p2offset;
+
+ __bdk_dram_write64(p1, v);
+ __bdk_dram_write64(p2, v);
+
+ v += vincr;
+ }
+
+ __bdk_dram_flush_to_mem_range(p , p + II_MAX);
+ __bdk_dram_flush_to_mem_range(p + p2offset, p + p2offset + II_MAX);
+ BDK_DCACHE_INVALIDATE;
+
+ /* Make a series of passes over the memory areas. */
+
+ for (int burst = 0; burst < dram_tune_use_bursts; burst++)
+ {
+ uint64_t this_pattern = bdk_rng_get_random64();
+ pattern2 ^= this_pattern;
+
+ /* XOR the data with a random value, applying the change to both
+ * memory areas.
+ */
+#if 0
+ BDK_PREFETCH(p , BDK_CACHE_LINE_SIZE);
+ BDK_PREFETCH(p + p2offset, BDK_CACHE_LINE_SIZE);
+#endif
+ for (ii = 0; ii < II_MAX; ii += II_INC) { // FIXME? extend the range of memory tested!!
+
+ p1 = p + ii;
+ p2 = p1 + p2offset;
+
+ d1 = __bdk_dram_read64(p1) ^ this_pattern;
+ d2 = __bdk_dram_read64(p2) ^ this_pattern;
+
+ __bdk_dram_write64(p1, d1);
+ __bdk_dram_write64(p2, d2);
+
+ }
+ __bdk_dram_flush_to_mem_range(p , p + II_MAX);
+ __bdk_dram_flush_to_mem_range(p + p2offset, p + p2offset + II_MAX);
+ BDK_DCACHE_INVALIDATE;
+
+ /* Look for differences in the areas. If there is a mismatch, reset
+ * both memory locations with the same pattern. Failing to do so
+ * means that on all subsequent passes the pair of locations remain
+ * out of sync giving spurious errors.
+ */
+#if 0
+ BDK_PREFETCH(p , BDK_CACHE_LINE_SIZE);
+ BDK_PREFETCH(p + p2offset, BDK_CACHE_LINE_SIZE);
+#endif
+ vpred = vbase;
+ for (ii = 0; ii < II_MAX; ii += II_INC) {
+
+ p1 = p + ii;
+ p2 = p1 + p2offset;
+
+ v = vpred ^ pattern2; // this should predict what we find...
+ d1 = __bdk_dram_read64(p1);
+ d2 = __bdk_dram_read64(p2);
+ vpred += vincr;
+
+ xor = ((d1 ^ v) | (d2 ^ v)) & datamask; // union of error bits only in active byte lanes
+ if (!xor) // no errors
+ continue;
+
+ lmc = LMCNO(p1, xbits); // FIXME: LMC should be SAME for p1 and p2!!!
+ if (lmc != (int)LMCNO(p2, xbits)) {
+ printf("ERROR: LMCs for addresses [0x%016lX] (%lld) and [0x%016lX] (%lld) differ!!!\n",
+ p1, LMCNO(p1, xbits), p2, LMCNO(p2, xbits));
+ }
+ int bybit = 1;
+ uint64_t bymsk = 0xffULL; // start in byte lane 0
+ while (xor != 0) {
+ debug_print("ERROR(%03d): [0x%016lX] [0x%016lX] expected 0x%016lX d1 %016lX d2 %016lX\n",
+ burst, p1, p2, v, d1, d2);
+ if (xor & bymsk) { // error(s) in this lane
+ errs_by_lmc[lmc] |= bybit; // set the byte error bit in the LMCs errors
+ errors |= bybit; // set the byte error bit
+ xor &= ~bymsk; // clear byte lane in error bits
+ //datamask &= ~bymsk; // clear the byte lane in the mask
+ }
+ bymsk <<= 8; // move mask into next byte lane
+ bybit <<= 1; // move bit into next byte position
+ } /* while (xor != 0) */
+ } /* for (ii = 0; ii < II_MAX; ii += II_INC) */
+ } /* for (int burst = 0; burst < dram_tune_use_bursts; burst++) */
+
+ // update the global LMC error states
+ for (lmc = 0; lmc < 4; lmc++) {
+ if (errs_by_lmc[lmc]) {
+ bdk_atomic_fetch_and_bset64_nosync(&test_dram_byte_lmc_errs[lmc], errs_by_lmc[lmc]);
+ }
+ }
+
+ return errors;
+}
+
+#if 0
+static int dram_tuning_mem_rows(uint64_t p, uint64_t bitmask)
+{
+ uint64_t p1, p2, d1, d2;
+ uint64_t v, v1;
+ uint64_t p2offset = dram_tune_rank_offset; // FIXME?
+ uint64_t datamask;
+ uint64_t xor;
+ int i, j, k, ii;
+ int errors = 0;
+ int index;
+ uint64_t pattern1 = 0; // FIXME: maybe this could be from a table?
+ uint64_t pattern2;
+
+ // Byte lanes may be clear in the mask to indicate no testing on that lane.
+ datamask = bitmask;
+
+ /* Add offset to both test regions to not clobber boot stuff
+ * when running from L2 for NAND boot.
+ */
+ p += 0x10000000; // FIXME? was: 0x4000000; // make sure base is out of the way of cores for tuning
+
+ pattern2 = pattern1;
+ for (k = 0; k < (1 << 20); k += (1 << 14)) {
+ for (j = 0; j < (1 << 12); j += (1 << 9)) {
+ for (i = 0; i < (1 << 7); i += 8) {
+ index = i + j + k;
+ p1 = p + index;
+ p2 = p1 + p2offset;
+
+ v = pattern2;
+ v1 = v; // write the same thing to same slot in both cachelines
+ pattern2 = ~pattern2; // flip bits for next slots
+
+ __bdk_dram_write64(p1, v);
+ __bdk_dram_write64(p2, v1);
+ }
+#if 1
+ BDK_CACHE_WBI_L2(p1);
+ BDK_CACHE_WBI_L2(p2);
+#endif
+ }
+ }
+
+#if 0
+ __bdk_dram_flush_to_mem_range(p, p + (1ULL << 20)); // max_addr is start + where k stops...
+ __bdk_dram_flush_to_mem_range(p + p2offset, p + p2offset + (1ULL << 20)); // max_addr is start + where k stops...
+#endif
+ BDK_DCACHE_INVALIDATE;
+
+ /* Make a series of passes over the memory areas. */
+
+ for (int burst = 0; burst < dram_tune_use_bursts; burst++)
+ {
+ /* just read and flip the bits applying the change to both
+ * memory areas.
+ */
+ for (k = 0; k < (1 << 20); k += (1 << 14)) {
+ for (j = 0; j < (1 << 12); j += (1 << 9)) {
+ for (i = 0; i < (1 << 7); i += 8) {
+ index = i + j + k;
+ p1 = p + index;
+ p2 = p1 + p2offset;
+
+ v = ~__bdk_dram_read64(p1);
+ v1 = ~__bdk_dram_read64(p2);
+
+ __bdk_dram_write64(p1, v);
+ __bdk_dram_write64(p2, v1);
+ }
+#if 1
+ BDK_CACHE_WBI_L2(p1);
+ BDK_CACHE_WBI_L2(p2);
+#endif
+ }
+ }
+
+#if 0
+ __bdk_dram_flush_to_mem_range(p, p + (1ULL << 20)); // max_addr is start + where k stops...
+ __bdk_dram_flush_to_mem_range(p + p2offset, p + p2offset + (1ULL << 20)); // max_addr is start + where k stops...
+#endif
+ BDK_DCACHE_INVALIDATE;
+
+ /* Look for differences in the areas. If there is a mismatch, reset
+ * both memory locations with the same pattern. Failing to do so
+ * means that on all subsequent passes the pair of locations remain
+ * out of sync giving spurious errors.
+ */
+
+ // FIXME: change the loop order so that an entire cache line is compared at one time
+ // FIXME: this is so that a read error that occurs *anywhere* on the cacheline will be caught,
+ // FIXME: rather than comparing only 1 cacheline slot at a time, where an error on a different
+ // FIXME: slot will be missed that time around
+ // Does the above make sense?
+
+ pattern2 = ~pattern1; // slots have been flipped by the above loop
+
+ for (k = 0; k < (1 << 20); k += (1 << 14)) {
+ for (j = 0; j < (1 << 12); j += (1 << 9)) {
+ for (i = 0; i < (1 << 7); i += 8) {
+ index = i + j + k;
+ p1 = p + index;
+ p2 = p1 + p2offset;
+
+ v = pattern2; // FIXME: this should predict what we find...???
+ d1 = __bdk_dram_read64(p1);
+ d2 = __bdk_dram_read64(p2);
+ pattern2 = ~pattern2; // flip for next slot
+
+ xor = ((d1 ^ v) | (d2 ^ v)) & datamask; // union of error bits only in active byte lanes
+
+ int bybit = 1;
+ uint64_t bymsk = 0xffULL; // start in byte lane 0
+ while (xor != 0) {
+ debug_print("ERROR(%03d): [0x%016lX] [0x%016lX] expected 0x%016lX d1 %016lX d2 %016lX\n",
+ burst, p1, p2, v, d1, d2);
+ if (xor & bymsk) { // error(s) in this lane
+ errors |= bybit; // set the byte error bit
+ xor &= ~bymsk; // clear byte lane in error bits
+ datamask &= ~bymsk; // clear the byte lane in the mask
+#if EXIT_WHEN_ALL_LANES_HAVE_ERRORS
+ if (datamask == 0) { // nothing left to do
+ return errors; // completely done when errors found in all byte lanes in datamask
+ }
+#endif /* EXIT_WHEN_ALL_LANES_HAVE_ERRORS */
+ }
+ bymsk <<= 8; // move mask into next byte lane
+ bybit <<= 1; // move bit into next byte position
+ }
+ }
+ }
+ }
+ pattern1 = ~pattern1; // flip the starting pattern for the next burst
+
+ } /* for (int burst = 0; burst < dram_tune_use_bursts; burst++) */
+ return errors;
+}
+#endif
+
+// cores to use
+#define DEFAULT_USE_CORES 44 // FIXME: was (1 << CORE_BITS)
+int dram_tune_use_cores = DEFAULT_USE_CORES; // max cores to use, override available
+int dram_tune_max_cores; // max cores available on a node
+#define CORE_SHIFT 22 // FIXME: offset into rank_address passed to test_dram_byte
+
+typedef void (*__dram_tuning_thread_t)(int arg, void *arg1);
+
+typedef struct
+{
+ bdk_node_t node;
+ int64_t num_lmcs;
+ uint64_t byte_mask;
+} test_dram_byte_info_t;
+
+static void dram_tuning_thread(int arg, void *arg1)
+{
+ test_dram_byte_info_t *test_info = arg1;
+ int core = arg;
+ uint64_t errs;
+ bdk_node_t node = test_info->node;
+ int num_lmcs, lmc;
+#if 0
+ num_lmcs = test_info->num_lmcs;
+ // map core numbers into hopefully equal groups per LMC
+ lmc = core % num_lmcs;
+#else
+ // FIXME: this code should allow running all the cores on a single LMC...
+ // if incoming num_lmcs > 0, then use as normal; if < 0 remap to a single LMC
+ if (test_info->num_lmcs >= 0) {
+ num_lmcs = test_info->num_lmcs;
+ // map core numbers into hopefully equal groups per LMC
+ lmc = core % num_lmcs;
+ } else {
+ num_lmcs = 1;
+ // incoming num_lmcs is (desired LMC - 10)
+ lmc = 10 + test_info->num_lmcs;
+ }
+#endif
+ uint64_t base_address = 0/* was: (lmc << 7); now done by callee */;
+ uint64_t bytemask = test_info->byte_mask;
+
+ /* Figure out our work memory range.
+ *
+ * Note: base_address above just provides the physical offset which determines
+ * specific LMC portions of the address space and does not have the node bits set.
+ */
+ //was: base_address = bdk_numa_get_address(node, base_address); // map to node // now done by callee
+ base_address |= (core << CORE_SHIFT); // FIXME: also put full core into address
+ if (dram_tune_dimm_offset) { // if multi-slot in some way, choose a DIMM for the core
+ base_address |= (core & (1 << (num_lmcs >> 1))) ? dram_tune_dimm_offset : 0;
+ }
+
+ debug_print("Node %d, core %d, Testing area 1 at 0x%011lx, area 2 at 0x%011lx\n",
+ node, core, base_address + AREA_BASE_OFFSET,
+ base_address + AREA_BASE_OFFSET + dram_tune_rank_offset);
+
+ errs = dram_tuning_mem_xor(node, lmc, base_address, bytemask, NULL);
+ //errs = dram_tuning_mem_rows(base_address, bytemask);
+
+ /* Report that we're done */
+ debug_print("Core %d on LMC %d node %d done with test_dram_byte with 0x%lx errs\n",
+ core, lmc, node, errs);
+
+ if (errs) {
+ bdk_atomic_fetch_and_bset64_nosync(&test_dram_byte_threads_errs, errs);
+ bdk_atomic_fetch_and_bset64_nosync(&test_dram_byte_lmc_errs[lmc], errs);
+ }
+
+ bdk_atomic_add64_nosync(&test_dram_byte_threads_done, 1);
+
+ return;
+}
+
+static void dram_tuning_thread2(int arg, void *arg1)
+{
+ test_dram_byte_info_t *test_info = arg1;
+ int core = arg;
+ uint64_t errs;
+ bdk_node_t node = test_info->node;
+ int num_lmcs = test_info->num_lmcs;
+
+ uint64_t base_address = 0; //
+ uint64_t bytemask = test_info->byte_mask;
+
+ /* Figure out our work memory range.
+ *
+ * Note: base_address above just provides the physical offset which determines
+ * specific portions of the address space and does not have the node bits set.
+ */
+ base_address = bdk_numa_get_address(node, base_address); // map to node
+ base_address |= (core << CORE_SHIFT); // FIXME: also put full core into address
+ if (dram_tune_dimm_offset) { // if multi-slot in some way, choose a DIMM for the core
+ base_address |= (core & 1) ? dram_tune_dimm_offset : 0;
+ }
+
+ debug_print("Node %d, core %d, Testing area 1 at 0x%011lx, area 2 at 0x%011lx\n",
+ node, core, base_address + AREA_BASE_OFFSET,
+ base_address + AREA_BASE_OFFSET + dram_tune_rank_offset);
+
+ errs = dram_tuning_mem_xor2(base_address, bytemask, (num_lmcs >> 1)); // 4->2, 2->1, 1->0
+ //errs = dram_tuning_mem_rows(base_address, bytemask);
+
+ /* Report that we're done */
+ debug_print("Core %d on LMC %d node %d done with test_dram_byte with 0x%lx errs\n",
+ core, lmc, node, errs);
+
+ if (errs) {
+ bdk_atomic_fetch_and_bset64_nosync(&test_dram_byte_threads_errs, errs);
+ // FIXME: this will have been done already in the called test routine
+ //bdk_atomic_fetch_and_bset64_nosync(&test_dram_byte_lmc_errs[lmc], errs);
+ }
+
+ bdk_atomic_add64_nosync(&test_dram_byte_threads_done, 1);
+
+ return;
+}
+
+static int dram_tune_use_xor2 = 1; // FIXME: do NOT default to original mem_xor (LMC-based) code
+
+static int
+run_dram_tuning_threads(bdk_node_t node, int num_lmcs, uint64_t bytemask)
+{
+ test_dram_byte_info_t test_dram_byte_info;
+ test_dram_byte_info_t *test_info = &test_dram_byte_info;
+ int total_count = 0;
+ __dram_tuning_thread_t thread_p = (dram_tune_use_xor2) ? dram_tuning_thread2 : dram_tuning_thread;
+
+ test_info->node = node;
+ test_info->num_lmcs = num_lmcs;
+ test_info->byte_mask = bytemask;
+
+ // init some global data
+ bdk_atomic_set64(&test_dram_byte_threads_done, 0);
+ bdk_atomic_set64((int64_t *)&test_dram_byte_threads_errs, 0);
+ bdk_atomic_set64((int64_t *)&test_dram_byte_lmc_errs[0], 0);
+ bdk_atomic_set64((int64_t *)&test_dram_byte_lmc_errs[1], 0);
+ bdk_atomic_set64((int64_t *)&test_dram_byte_lmc_errs[2], 0);
+ bdk_atomic_set64((int64_t *)&test_dram_byte_lmc_errs[3], 0);
+
+ /* Start threads for cores on the node */
+ if (bdk_numa_exists(node)) {
+ debug_print("Starting %d threads for test_dram_byte\n", dram_tune_use_cores);
+ for (int core = 0; core < dram_tune_use_cores; core++) {
+ if (bdk_thread_create(node, 0, thread_p, core, (void *)test_info, 0)) {
+ bdk_error("Failed to create thread %d for test_dram_byte\n", core);
+ } else {
+ total_count++;
+ }
+ }
+ }
+
+#if 0
+ /* Wait for threads to finish */
+ while (bdk_atomic_get64(&test_dram_byte_threads_done) < total_count)
+ bdk_thread_yield();
+#else
+#define TIMEOUT_SECS 5 // FIXME: long enough so a pass for a given setting will not print
+ /* Wait for threads to finish, with progress */
+ int cur_count;
+ uint64_t cur_time;
+ uint64_t period = bdk_clock_get_rate(bdk_numa_local(), BDK_CLOCK_TIME) * TIMEOUT_SECS; // FIXME?
+ uint64_t timeout = bdk_clock_get_count(BDK_CLOCK_TIME) + period;
+ do {
+ bdk_thread_yield();
+ cur_count = bdk_atomic_get64(&test_dram_byte_threads_done);
+ cur_time = bdk_clock_get_count(BDK_CLOCK_TIME);
+ if (cur_time >= timeout) {
+ printf("Waiting for %d cores\n", total_count - cur_count);
+ timeout = cur_time + period;
+ }
+ } while (cur_count < total_count);
+#endif
+
+ // NOTE: this is the summary of errors across all LMCs
+ return (int)bdk_atomic_get64((int64_t *)&test_dram_byte_threads_errs);
+}
+
+/* These variables count the number of ECC errors. They should only be accessed atomically */
+extern int64_t __bdk_dram_ecc_single_bit_errors[];
+extern int64_t __bdk_dram_ecc_double_bit_errors[];
+
+#if 0
+// make the tuning test callable as a standalone
+int
+bdk_run_dram_tuning_test(int node)
+{
+ int num_lmcs = __bdk_dram_get_num_lmc(node);
+ const char *s;
+ int lmc, byte;
+ int errors;
+ uint64_t start_dram_dclk[4], start_dram_ops[4];
+ int save_use_bursts;
+
+ // check for the cores on this node, abort if not more than 1 // FIXME?
+ dram_tune_max_cores = bdk_get_num_running_cores(node);
+ if (dram_tune_max_cores < 2) {
+ //bdk_init_cores(node, 0);
+ printf("N%d: ERROR: not enough cores to run the DRAM tuning test.\n", node);
+ return 0;
+ }
+
+ // but use only a certain number of cores, at most what is available
+ if ((s = getenv("ddr_tune_use_cores")) != NULL) {
+ dram_tune_use_cores = strtoul(s, NULL, 0);
+ if (dram_tune_use_cores <= 0) // allow 0 or negative to mean all
+ dram_tune_use_cores = dram_tune_max_cores;
+ }
+ if (dram_tune_use_cores > dram_tune_max_cores)
+ dram_tune_use_cores = dram_tune_max_cores;
+
+ // save the original bursts, so we can replace it with a better number for just testing
+ save_use_bursts = dram_tune_use_bursts;
+ dram_tune_use_bursts = 1500; // FIXME: hard code bursts for the test here...
+
+ // allow override of the test repeats (bursts) per thread create
+ if ((s = getenv("ddr_tune_use_bursts")) != NULL) {
+ dram_tune_use_bursts = strtoul(s, NULL, 10);
+ }
+
+ // allow override of the test mem_xor algorithm
+ if ((s = getenv("ddr_tune_use_xor2")) != NULL) {
+ dram_tune_use_xor2 = !!strtoul(s, NULL, 10);
+ }
+
+ // FIXME? consult LMC0 only
+ BDK_CSR_INIT(lmcx_config, node, BDK_LMCX_CONFIG(0));
+ if (lmcx_config.s.rank_ena) { // replace the default offset when there is more than 1 rank...
+ dram_tune_rank_offset = 1ull << (28 + lmcx_config.s.pbank_lsb - lmcx_config.s.rank_ena + (num_lmcs/2));
+ ddr_print("N%d: run_dram_tuning_test: changing rank offset to 0x%lx\n", node, dram_tune_rank_offset);
+ }
+ if (lmcx_config.s.init_status & 0x0c) { // bit 2 or 3 set indicates 2 DIMMs
+ dram_tune_dimm_offset = 1ull << (28 + lmcx_config.s.pbank_lsb + (num_lmcs/2));
+ ddr_print("N%d: run_dram_tuning_test: changing dimm offset to 0x%lx\n", node, dram_tune_dimm_offset);
+ }
+ int ddr_interface_64b = !lmcx_config.s.mode32b;
+
+ // construct the bytemask
+ int bytes_todo = (ddr_interface_64b) ? 0xff : 0x0f; // FIXME: hack?
+ uint64_t bytemask = 0;
+ for (byte = 0; byte < 8; ++byte) {
+ uint64_t bitmask;
+ if (bytes_todo & (1 << byte)) {
+ bitmask = ((!ddr_interface_64b) && (byte == 4)) ? 0x0f: 0xff;
+ bytemask |= bitmask << (8*byte); // set the bytes bits in the bytemask
+ }
+ } /* for (byte = 0; byte < 8; ++byte) */
+
+ // print current working values
+ ddr_print("N%d: run_dram_tuning_test: max %d cores, use %d cores, use %d bursts.\n",
+ node, dram_tune_max_cores, dram_tune_use_cores, dram_tune_use_bursts);
+
+ // do the setup on active LMCs
+ for (lmc = 0; lmc < num_lmcs; lmc++) {
+ // record start cycle CSRs here for utilization measure
+ start_dram_dclk[lmc] = BDK_CSR_READ(node, BDK_LMCX_DCLK_CNT(lmc));
+ start_dram_ops[lmc] = BDK_CSR_READ(node, BDK_LMCX_OPS_CNT(lmc));
+#if 0
+ bdk_atomic_set64(&__bdk_dram_ecc_single_bit_errors[lmc], 0);
+ bdk_atomic_set64(&__bdk_dram_ecc_double_bit_errors[lmc], 0);
+#else
+ __bdk_dram_ecc_single_bit_errors[lmc] = 0;
+ __bdk_dram_ecc_double_bit_errors[lmc] = 0;
+#endif
+ } /* for (lmc = 0; lmc < num_lmcs; lmc++) */
+
+ bdk_watchdog_poke();
+
+ // run the test(s)
+ // only 1 call should be enough, let the bursts, etc, control the load...
+ errors = run_dram_tuning_threads(node, num_lmcs, bytemask);
+
+ /* Check ECC error counters after the test */
+ int64_t ecc_single = 0;
+ int64_t ecc_double = 0;
+ int64_t ecc_single_errs[4];
+ int64_t ecc_double_errs[4];
+
+ // finally, print the utilizations all together, and sum the ECC errors
+ for (lmc = 0; lmc < num_lmcs; lmc++) {
+ uint64_t dclk_diff = BDK_CSR_READ(node, BDK_LMCX_DCLK_CNT(lmc)) - start_dram_dclk[lmc];
+ uint64_t ops_diff = BDK_CSR_READ(node, BDK_LMCX_OPS_CNT(lmc)) - start_dram_ops[lmc];
+ uint64_t percent_x10 = ops_diff * 1000 / dclk_diff;
+ printf("N%d.LMC%d: ops %lu, cycles %lu, used %lu.%lu%%\n",
+ node, lmc, ops_diff, dclk_diff, percent_x10 / 10, percent_x10 % 10);
+
+ ecc_single += (ecc_single_errs[lmc] = bdk_atomic_get64(&__bdk_dram_ecc_single_bit_errors[lmc]));
+ ecc_double += (ecc_double_errs[lmc] = bdk_atomic_get64(&__bdk_dram_ecc_double_bit_errors[lmc]));
+ } /* for (lmc = 0; lmc < num_lmcs; lmc++) */
+
+ /* Always print any ECC errors */
+ if (ecc_single || ecc_double) {
+ printf("Test \"%s\": ECC errors, %ld/%ld/%ld/%ld corrected, %ld/%ld/%ld/%ld uncorrected\n",
+ "DRAM Tuning Test",
+ ecc_single_errs[0], ecc_single_errs[1], ecc_single_errs[2], ecc_single_errs[3],
+ ecc_double_errs[0], ecc_double_errs[1], ecc_double_errs[2], ecc_double_errs[3]);
+ }
+ if (errors || ecc_double || ecc_single) {
+ printf("Test \"%s\": FAIL: %ld single, %ld double, %d compare errors\n",
+ "DRAM Tuning Test", ecc_single, ecc_double, errors);
+ }
+
+ // restore bursts
+ dram_tune_use_bursts = save_use_bursts;
+
+ return (errors + ecc_double + ecc_single);
+}
+#endif /* 0 */
+
+#define DEFAULT_SAMPLE_GRAN 3 // sample for errors every N offset values
+#define MIN_BYTE_OFFSET -63
+#define MAX_BYTE_OFFSET +63
+int dram_tune_use_gran = DEFAULT_SAMPLE_GRAN;
+
+static int
+auto_set_dll_offset(bdk_node_t node, int dll_offset_mode,
+ int num_lmcs, int ddr_interface_64b,
+ int do_tune)
+{
+ int byte_offset;
+ //unsigned short result[9];
+ int byte;
+ int byte_delay_start[4][9];
+ int byte_delay_count[4][9];
+ uint64_t byte_delay_windows [4][9];
+ int byte_delay_best_start[4][9];
+ int byte_delay_best_count[4][9];
+ //int this_rodt;
+ uint64_t ops_sum[4], dclk_sum[4];
+ uint64_t start_dram_dclk[4], stop_dram_dclk[4];
+ uint64_t start_dram_ops[4], stop_dram_ops[4];
+ int errors, tot_errors;
+ int lmc;
+ char *mode_str = (dll_offset_mode == 2) ? "Read" : "Write";
+ int mode_is_read = (dll_offset_mode == 2);
+ char *mode_blk = (dll_offset_mode == 2) ? " " : "";
+ int start_offset, end_offset, incr_offset;
+
+ int speed_bin = get_speed_bin(node, 0); // FIXME: just get from LMC0?
+ int low_risk_count = 0, needs_review_count = 0;
+
+ if (dram_tune_use_gran != DEFAULT_SAMPLE_GRAN) {
+ ddr_print2("N%d: Changing sample granularity from %d to %d\n",
+ node, DEFAULT_SAMPLE_GRAN, dram_tune_use_gran);
+ }
+ // ensure sample is taken at 0
+ start_offset = MIN_BYTE_OFFSET - (MIN_BYTE_OFFSET % dram_tune_use_gran);
+ end_offset = MAX_BYTE_OFFSET - (MAX_BYTE_OFFSET % dram_tune_use_gran);
+ incr_offset = dram_tune_use_gran;
+
+ memset(ops_sum, 0, sizeof(ops_sum));
+ memset(dclk_sum, 0, sizeof(dclk_sum));
+ memset(byte_delay_start, 0, sizeof(byte_delay_start));
+ memset(byte_delay_count, 0, sizeof(byte_delay_count));
+ memset(byte_delay_windows, 0, sizeof(byte_delay_windows));
+ memset(byte_delay_best_start, 0, sizeof(byte_delay_best_start));
+ memset(byte_delay_best_count, 0, sizeof(byte_delay_best_count));
+
+ // FIXME? consult LMC0 only
+ BDK_CSR_INIT(lmcx_config, node, BDK_LMCX_CONFIG(0));
+ if (lmcx_config.s.rank_ena) { // replace the default offset when there is more than 1 rank...
+ dram_tune_rank_offset = 1ull << (28 + lmcx_config.s.pbank_lsb - lmcx_config.s.rank_ena + (num_lmcs/2));
+ ddr_print2("N%d: Tuning multiple ranks per DIMM (rank offset 0x%lx).\n", node, dram_tune_rank_offset);
+ }
+ if (lmcx_config.s.init_status & 0x0c) { // bit 2 or 3 set indicates 2 DIMMs
+ dram_tune_dimm_offset = 1ull << (28 + lmcx_config.s.pbank_lsb + (num_lmcs/2));
+ ddr_print2("N%d: Tuning multiple DIMMs per channel (DIMM offset 0x%lx)\n", node, dram_tune_dimm_offset);
+ }
+
+ // FIXME? do this for LMC0 only
+ //BDK_CSR_INIT(comp_ctl2, node, BDK_LMCX_COMP_CTL2(0));
+ //this_rodt = comp_ctl2.s.rodt_ctl;
+
+ // construct the bytemask
+ int bytes_todo = (ddr_interface_64b) ? 0xff : 0x0f;
+ uint64_t bytemask = 0;
+ for (byte = 0; byte < 8; ++byte) {
+ if (bytes_todo & (1 << byte)) {
+ bytemask |= 0xfful << (8*byte); // set the bytes bits in the bytemask
+ }
+ } /* for (byte = 0; byte < 8; ++byte) */
+
+ // now loop through selected legal values for the DLL byte offset...
+
+ for (byte_offset = start_offset; byte_offset <= end_offset; byte_offset += incr_offset) {
+
+ // do the setup on active LMCs
+ for (lmc = 0; lmc < num_lmcs; lmc++) {
+ change_dll_offset_enable(node, lmc, 0);
+
+ // set all byte lanes at once
+ load_dll_offset(node, lmc, dll_offset_mode, byte_offset, 10 /* All bytes at once */);
+ // but then clear the ECC byte lane so it should be neutral for the test...
+ load_dll_offset(node, lmc, dll_offset_mode, 0, 8);
+
+ change_dll_offset_enable(node, lmc, 1);
+
+ // record start cycle CSRs here for utilization measure
+ start_dram_dclk[lmc] = BDK_CSR_READ(node, BDK_LMCX_DCLK_CNT(lmc));
+ start_dram_ops[lmc] = BDK_CSR_READ(node, BDK_LMCX_OPS_CNT(lmc));
+ } /* for (lmc = 0; lmc < num_lmcs; lmc++) */
+
+ bdk_watchdog_poke();
+
+ // run the test(s)
+ // only 1 call should be enough, let the bursts, etc, control the load...
+ tot_errors = run_dram_tuning_threads(node, num_lmcs, bytemask);
+
+ for (lmc = 0; lmc < num_lmcs; lmc++) {
+ // record stop cycle CSRs here for utilization measure
+ stop_dram_dclk[lmc] = BDK_CSR_READ(node, BDK_LMCX_DCLK_CNT(lmc));
+ stop_dram_ops[lmc] = BDK_CSR_READ(node, BDK_LMCX_OPS_CNT(lmc));
+
+ // accumulate...
+ ops_sum[lmc] += stop_dram_ops[lmc] - start_dram_ops[lmc];
+ dclk_sum[lmc] += stop_dram_dclk[lmc] - start_dram_dclk[lmc];
+
+ errors = test_dram_byte_lmc_errs[lmc];
+
+ // check errors by byte, but not ECC
+ for (byte = 0; byte < 8; ++byte) {
+ if (!(bytes_todo & (1 << byte))) // is this byte lane to be done
+ continue; // no
+
+ byte_delay_windows[lmc][byte] <<= 1; // always put in a zero
+ if (errors & (1 << byte)) { // yes, an error in this byte lane
+ byte_delay_count[lmc][byte] = 0; // stop now always
+ } else { // no error in this byte lane
+ if (byte_delay_count[lmc][byte] == 0) { // first success, set run start
+ byte_delay_start[lmc][byte] = byte_offset;
+ }
+ byte_delay_count[lmc][byte] += incr_offset; // bump run length
+
+ if (byte_delay_count[lmc][byte] > byte_delay_best_count[lmc][byte]) {
+ byte_delay_best_count[lmc][byte] = byte_delay_count[lmc][byte];
+ byte_delay_best_start[lmc][byte] = byte_delay_start[lmc][byte];
+ }
+ byte_delay_windows[lmc][byte] |= 1ULL; // for pass, put in a 1
+ }
+ } /* for (byte = 0; byte < 8; ++byte) */
+
+ // only print when there are errors and verbose...
+ if (errors) {
+ debug_print("DLL %s Offset Test %3d: errors 0x%x\n",
+ mode_str, byte_offset, errors);
+ }
+ } /* for (lmc = 0; lmc < num_lmcs; lmc++) */
+
+ } /* for (byte_offset=-63; byte_offset<63; byte_offset += incr_offset) */
+
+ // done with testing, load up and/or print out the offsets we found...
+
+ // only when margining...
+ if (!do_tune) {
+ printf(" \n");
+ printf("-------------------------------------\n");
+#if 0
+ uint32_t mts_speed = (libdram_get_freq_from_pll(node, 0) * 2) / 1000000; // FIXME: sample LMC0
+ printf("N%d: Starting %s Timing Margining for %d MT/s.\n", node, mode_str, mts_speed);
+#else
+ printf("N%d: Starting %s Timing Margining.\n", node, mode_str);
+#endif
+ printf(" \n");
+ } /* if (!do_tune) */
+
+ for (lmc = 0; lmc < num_lmcs; lmc++) {
+#if 1
+ // FIXME FIXME
+ // FIXME: this just makes ECC always show 0
+ byte_delay_best_start[lmc][8] = start_offset;
+ byte_delay_best_count[lmc][8] = end_offset - start_offset + incr_offset;
+#endif
+
+ // disable offsets while we load...
+ change_dll_offset_enable(node, lmc, 0);
+
+ // only when margining...
+ if (!do_tune) {
+ // print the heading
+ printf(" \n");
+ printf("N%d.LMC%d: %s Timing Margin %s : ", node, lmc, mode_str, mode_blk);
+ printf(" ECC/8 ");
+ for (byte = 7; byte >= 0; byte--) {
+ printf(" Byte %d ", byte);
+ }
+ printf("\n");
+ } /* if (!do_tune) */
+
+ // print and load the offset values
+ // print the windows bit arrays
+ // only when margining...
+ if (!do_tune) {
+ printf("N%d.LMC%d: DLL %s Offset Amount %s : ", node, lmc, mode_str, mode_blk);
+ } else {
+ ddr_print("N%d.LMC%d: SW DLL %s Offset Amount %s : ", node, lmc, mode_str, mode_blk);
+ }
+ for (byte = 8; byte >= 0; --byte) { // print in "normal" reverse index order
+
+ int count = byte_delay_best_count[lmc][byte];
+ if (count == 0)
+ count = incr_offset; // should make non-tested ECC byte come out 0
+
+ byte_offset = byte_delay_best_start[lmc][byte] +
+ ((count - incr_offset) / 2); // adj by incr
+
+ if (!do_tune) { // do counting and special flag if margining
+ int will_need_review = !is_low_risk_winlen(speed_bin, (count - incr_offset)) &&
+ !is_low_risk_offset(speed_bin, byte_offset);
+
+ printf("%10d%c", byte_offset, (will_need_review) ? '<' :' ');
+
+ if (will_need_review)
+ needs_review_count++;
+ else
+ low_risk_count++;
+ } else { // if just tuning, make the printout less lengthy
+ ddr_print("%5d ", byte_offset);
+ }
+
+ // FIXME? should we be able to override this?
+ if (mode_is_read) // for READ offsets, always store what we found
+ load_dll_offset(node, lmc, dll_offset_mode, byte_offset, byte);
+ else // for WRITE offsets, always store 0
+ load_dll_offset(node, lmc, dll_offset_mode, 0, byte);
+
+ }
+ if (!do_tune) {
+ printf("\n");
+ } else {
+ ddr_print("\n");
+ }
+
+
+ // re-enable the offsets now that we are done loading
+ change_dll_offset_enable(node, lmc, 1);
+
+ // only when margining...
+ if (!do_tune) {
+ // print the window sizes
+ printf("N%d.LMC%d: DLL %s Window Length %s : ", node, lmc, mode_str, mode_blk);
+ for (byte = 8; byte >= 0; --byte) { // print in "normal" reverse index order
+ int count = byte_delay_best_count[lmc][byte];
+ if (count == 0)
+ count = incr_offset; // should make non-tested ECC byte come out 0
+
+ // do this again since the "needs review" test is an AND...
+ byte_offset = byte_delay_best_start[lmc][byte] +
+ ((count - incr_offset) / 2); // adj by incr
+
+ int will_need_review = !is_low_risk_winlen(speed_bin, (count - incr_offset)) &&
+ !is_low_risk_offset(speed_bin, byte_offset);
+
+ printf("%10d%c", count - incr_offset, (will_need_review) ? '<' :' ');
+ }
+ printf("\n");
+
+ // print the window extents
+ printf("N%d.LMC%d: DLL %s Window Bounds %s : ", node, lmc, mode_str, mode_blk);
+ for (byte = 8; byte >= 0; --byte) { // print in "normal" reverse index order
+ int start = byte_delay_best_start[lmc][byte];
+ int count = byte_delay_best_count[lmc][byte];
+ if (count == 0)
+ count = incr_offset; // should make non-tested ECC byte come out 0
+ printf(" %3d to%3d ", start,
+ start + count - incr_offset);
+ }
+ printf("\n");
+#if 0
+ // FIXME: should have a way to force these out...
+ // print the windows bit arrays
+ printf("N%d.LMC%d: DLL %s Window Bitmap%s : ", node, lmc, mode_str, mode_blk);
+ for (byte = 8; byte >= 0; --byte) { // print in "normal" reverse index order
+ printf("%010lx ", byte_delay_windows[lmc][byte]);
+ }
+ printf("\n");
+#endif
+ } /* if (!do_tune) */
+ } /* for (lmc = 0; lmc < num_lmcs; lmc++) */
+
+ // only when margining...
+ if (!do_tune) {
+ // print the Summary line(s) here
+ printf(" \n");
+ printf("N%d: %s Timing Margining Summary : %s ", node, mode_str,
+ (needs_review_count > 0) ? "Needs Review" : "Low Risk");
+ if (needs_review_count > 0)
+ printf("(%d)", needs_review_count);
+ printf("\n");
+
+ // FIXME??? want to print here: "N0: %s Offsets have been applied already"
+
+ printf("-------------------------------------\n");
+ printf(" \n");
+ } /* if (!do_tune) */
+
+ // FIXME: we probably want this only when doing verbose...
+ // finally, print the utilizations all together
+ for (lmc = 0; lmc < num_lmcs; lmc++) {
+ uint64_t percent_x10 = ops_sum[lmc] * 1000 / dclk_sum[lmc];
+ ddr_print2("N%d.LMC%d: ops %lu, cycles %lu, used %lu.%lu%%\n",
+ node, lmc, ops_sum[lmc], dclk_sum[lmc], percent_x10 / 10, percent_x10 % 10);
+ } /* for (lmc = 0; lmc < num_lmcs; lmc++) */
+
+ // FIXME: only when verbose, or only when there are errors?
+ // run the test one last time
+ // print whether there are errors or not, but only when verbose...
+ bdk_watchdog_poke();
+ debug_print("N%d: %s: Start running test one last time\n", node, __FUNCTION__);
+ tot_errors = run_dram_tuning_threads(node, num_lmcs, bytemask);
+ debug_print("N%d: %s: Finished running test one last time\n", node, __FUNCTION__);
+ if (tot_errors)
+ ddr_print2("%s Timing Final Test: errors 0x%x\n", mode_str, tot_errors);
+
+ return (do_tune) ? tot_errors : !!(needs_review_count > 0);
+}
+
+#define USE_L2_WAYS_LIMIT 0 // non-zero to enable L2 ways limiting
+
+/*
+ * Automatically adjust the DLL offset for the data bytes
+ */
+int perform_dll_offset_tuning(bdk_node_t node, int dll_offset_mode, int do_tune)
+{
+ int ddr_interface_64b;
+ int save_ecc_ena[4];
+ bdk_lmcx_config_t lmc_config;
+ int lmc, num_lmcs = __bdk_dram_get_num_lmc(node);
+ const char *s;
+#if USE_L2_WAYS_LIMIT
+ int ways, ways_print = 0;
+#endif
+#if 0
+ int dram_tune_use_rodt = -1, save_rodt[4];
+ bdk_lmcx_comp_ctl2_t comp_ctl2;
+#endif
+ int loops = 1, loop;
+ uint64_t orig_coremask;
+ int errs = 0;
+
+ // enable any non-running cores on this node
+ orig_coremask = bdk_get_running_coremask(node);
+ ddr_print4("N%d: %s: Starting cores (mask was 0x%lx)\n",
+ node, __FUNCTION__, orig_coremask);
+ bdk_init_cores(node, ~0ULL & ~orig_coremask);
+ dram_tune_max_cores = bdk_get_num_running_cores(node);
+
+ // but use only a certain number of cores, at most what is available
+ if ((s = getenv("ddr_tune_use_cores")) != NULL) {
+ dram_tune_use_cores = strtoul(s, NULL, 0);
+ if (dram_tune_use_cores <= 0) // allow 0 or negative to mean all
+ dram_tune_use_cores = dram_tune_max_cores;
+ }
+ if (dram_tune_use_cores > dram_tune_max_cores)
+ dram_tune_use_cores = dram_tune_max_cores;
+
+ // see if we want to do the tuning more than once per LMC...
+ if ((s = getenv("ddr_tune_use_loops"))) {
+ loops = strtoul(s, NULL, 0);
+ }
+
+ // see if we want to change the granularity of the byte_offset sampling
+ if ((s = getenv("ddr_tune_use_gran"))) {
+ dram_tune_use_gran = strtoul(s, NULL, 0);
+ }
+
+ // allow override of the test repeats (bursts) per thread create
+ if ((s = getenv("ddr_tune_use_bursts")) != NULL) {
+ dram_tune_use_bursts = strtoul(s, NULL, 10);
+ }
+
+#if 0
+ // allow override of Read ODT setting just during the tuning run(s)
+ if ((s = getenv("ddr_tune_use_rodt")) != NULL) {
+ int temp = strtoul(s, NULL, 10);
+ // validity check
+ if (temp >= 0 && temp <= 7)
+ dram_tune_use_rodt = temp;
+ }
+#endif
+
+#if 0
+ // allow override of the test pattern
+ // FIXME: a bit simplistic...
+ if ((s = getenv("ddr_tune_use_pattern")) != NULL) {
+ int patno = strtoul(s, NULL, 10);
+ if (patno == 2)
+ dram_tune_test_pattern = test_pattern_2;
+ else if (patno == 3)
+ dram_tune_test_pattern = test_pattern_3;
+ else // all other values use default
+ dram_tune_test_pattern = test_pattern_1;
+ }
+#endif
+
+ // allow override of the test mem_xor algorithm
+ if ((s = getenv("ddr_tune_use_xor2")) != NULL) {
+ dram_tune_use_xor2 = !!strtoul(s, NULL, 10);
+ }
+
+ // print current working values
+ ddr_print2("N%d: Tuning will use %d cores of max %d cores, and use %d repeats.\n",
+ node, dram_tune_use_cores, dram_tune_max_cores,
+ dram_tune_use_bursts);
+
+#if USE_L2_WAYS_LIMIT
+ // see if L2 ways are limited
+ if ((s = lookup_env_parameter("limit_l2_ways")) != NULL) {
+ ways = strtoul(s, NULL, 10);
+ ways_print = 1;
+ } else {
+ ways = bdk_l2c_get_num_assoc(node);
+ }
+#endif
+
+#if 0
+ // if RODT is to be overridden during tuning, note change
+ if (dram_tune_use_rodt >= 0) {
+ ddr_print("N%d: using RODT %d for tuning.\n",
+ node, dram_tune_use_rodt);
+ }
+#endif
+
+ // FIXME? get flag from LMC0 only
+ lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(0));
+ ddr_interface_64b = !lmc_config.s.mode32b;
+
+ // do setup for each active LMC
+ debug_print("N%d: %s: starting LMCs setup.\n", node, __FUNCTION__);
+ for (lmc = 0; lmc < num_lmcs; lmc++) {
+
+#if 0
+ // if RODT change, save old and set new here...
+ if (dram_tune_use_rodt >= 0) {
+ comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(lmc));
+ save_rodt[lmc] = comp_ctl2.s.rodt_ctl;
+ comp_ctl2.s.rodt_ctl = dram_tune_use_rodt;
+ DRAM_CSR_WRITE(node, BDK_LMCX_COMP_CTL2(lmc), comp_ctl2.u);
+ BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(lmc));
+ }
+#endif
+ /* Disable ECC for DRAM tests */
+ lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(lmc));
+ save_ecc_ena[lmc] = lmc_config.s.ecc_ena;
+ lmc_config.s.ecc_ena = 0;
+ DRAM_CSR_WRITE(node, BDK_LMCX_CONFIG(lmc), lmc_config.u);
+ lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(lmc));
+
+ } /* for (lmc = 0; lmc < num_lmcs; lmc++) */
+
+#if USE_L2_WAYS_LIMIT
+ /* Disable l2 sets for DRAM testing */
+ limit_l2_ways(node, 0, ways_print);
+#endif
+
+ // testing is done on all LMCs simultaneously
+ // FIXME: for now, loop here to show what happens multiple times
+ for (loop = 0; loop < loops; loop++) {
+ /* Perform DLL offset tuning */
+ errs = auto_set_dll_offset(node, dll_offset_mode, num_lmcs, ddr_interface_64b, do_tune);
+ }
+
+#if USE_L2_WAYS_LIMIT
+ /* Restore the l2 set configuration */
+ limit_l2_ways(node, ways, ways_print);
+#endif
+
+ // perform cleanup on all active LMCs
+ debug_print("N%d: %s: starting LMCs cleanup.\n", node, __FUNCTION__);
+ for (lmc = 0; lmc < num_lmcs; lmc++) {
+
+ /* Restore ECC for DRAM tests */
+ lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(lmc));
+ lmc_config.s.ecc_ena = save_ecc_ena[lmc];
+ DRAM_CSR_WRITE(node, BDK_LMCX_CONFIG(lmc), lmc_config.u);
+ lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(lmc));
+#if 0
+ // if RODT change, restore old here...
+ if (dram_tune_use_rodt >= 0) {
+ comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(lmc));
+ comp_ctl2.s.rodt_ctl = save_rodt[lmc];
+ DRAM_CSR_WRITE(node, BDK_LMCX_COMP_CTL2(lmc), comp_ctl2.u);
+ BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(lmc));
+ }
+#endif
+ // finally, see if there are any read offset overrides after tuning
+ // FIXME: provide a way to do write offsets also??
+ if (dll_offset_mode == 2) {
+ for (int by = 0; by < 9; by++) {
+ if ((s = lookup_env_parameter("ddr%d_tune_byte%d", lmc, by)) != NULL) {
+ int dllro = strtoul(s, NULL, 10);
+ change_dll_offset_enable(node, lmc, 0);
+ load_dll_offset(node, lmc, /* read */2, dllro, by);
+ change_dll_offset_enable(node, lmc, 1);
+ }
+ }
+ }
+ } /* for (lmc = 0; lmc < num_lmcs; lmc++) */
+
+ // finish up...
+
+#if 0
+ // if RODT was overridden during tuning, note restore
+ if (dram_tune_use_rodt >= 0) {
+ ddr_print("N%d: restoring RODT %d after tuning.\n",
+ node, save_rodt[0]); // FIXME? use LMC0
+ }
+#endif
+
+ // put any cores on this node, that were not running at the start, back into reset
+ uint64_t reset_coremask = bdk_get_running_coremask(node) & ~orig_coremask;
+ if (reset_coremask) {
+ ddr_print4("N%d: %s: Stopping cores 0x%lx\n", node, __FUNCTION__,
+ reset_coremask);
+ bdk_reset_cores(node, reset_coremask);
+ } else {
+ ddr_print4("N%d: %s: leaving cores set to 0x%lx\n", node, __FUNCTION__,
+ orig_coremask);
+ }
+
+ return errs;
+
+} /* perform_dll_offset_tuning */
+
+/////////////////////////////////////////////////////////////////////////////////////////////
+
+///// HW-assist byte DLL offset tuning //////
+
+#if 1
+// setup defaults for byte test pattern array
+// take these first two from the HRM section 6.9.13
+static const uint64_t byte_pattern_0[] = {
+ 0xFFAAFFFFFF55FFFFULL, // GP0
+ 0x55555555AAAAAAAAULL, // GP1
+ 0xAA55AAAAULL, // GP2
+};
+static const uint64_t byte_pattern_1[] = {
+ 0xFBF7EFDFBF7FFEFDULL, // GP0
+ 0x0F1E3C78F0E1C387ULL, // GP1
+ 0xF0E1BF7FULL, // GP2
+};
+// this is from Andrew via LFSR with PRBS=0xFFFFAAAA
+static const uint64_t byte_pattern_2[] = {
+ 0xEE55AADDEE55AADDULL, // GP0
+ 0x55AADDEE55AADDEEULL, // GP1
+ 0x55EEULL, // GP2
+};
+// this is from Mike via LFSR with PRBS=0x4A519909
+static const uint64_t byte_pattern_3[] = {
+ 0x0088CCEE0088CCEEULL, // GP0
+ 0xBB552211BB552211ULL, // GP1
+ 0xBB00ULL, // GP2
+};
+
+static const uint64_t *byte_patterns[] = {
+ byte_pattern_0, byte_pattern_1, byte_pattern_2, byte_pattern_3 // FIXME: use all we have
+};
+#define NUM_BYTE_PATTERNS ((int)(sizeof(byte_patterns)/sizeof(uint64_t *)))
+
+#define DEFAULT_BYTE_BURSTS 32 // FIXME: this is what what the longest test usually has
+int dram_tune_byte_bursts = DEFAULT_BYTE_BURSTS;
+#endif
+
+static void
+setup_hw_pattern(bdk_node_t node, int lmc, const uint64_t *pattern_p)
+{
+ /*
+ 3) Setup GENERAL_PURPOSE[0-2] registers with the data pattern of choice.
+ a. GENERAL_PURPOSE0[DATA<63:0>] – sets the initial lower (rising edge) 64 bits of data.
+ b. GENERAL_PURPOSE1[DATA<63:0>] – sets the initial upper (falling edge) 64 bits of data.
+ c. GENERAL_PURPOSE2[DATA<15:0>] – sets the initial lower (rising edge <7:0>) and upper
+ (falling edge <15:8>) ECC data.
+ */
+ DRAM_CSR_WRITE(node, BDK_LMCX_GENERAL_PURPOSE0(lmc), pattern_p[0]);
+ DRAM_CSR_WRITE(node, BDK_LMCX_GENERAL_PURPOSE1(lmc), pattern_p[1]);
+ DRAM_CSR_WRITE(node, BDK_LMCX_GENERAL_PURPOSE2(lmc), pattern_p[2]);
+}
+
+#define DEFAULT_PRBS 0xFFFFAAAAUL /* FIXME: maybe try 0x4A519909UL */
+
+static void
+setup_lfsr_pattern(bdk_node_t node, int lmc, uint64_t data)
+{
+ uint32_t prbs;
+ const char *s;
+
+ if ((s = getenv("ddr_lfsr_prbs"))) {
+ prbs = strtoul(s, NULL, 0);
+ } else
+ prbs = DEFAULT_PRBS; // FIXME: from data arg?
+
+ /*
+ 2) DBTRAIN_CTL[LFSR_PATTERN_SEL] = 1
+ here data comes from the LFSR generating a PRBS pattern
+ CHAR_CTL.EN = 0
+ CHAR_CTL.SEL = 0; // for PRBS
+ CHAR_CTL.DR = 1;
+ CHAR_CTL.PRBS = setup for whatever type of PRBS to send
+ CHAR_CTL.SKEW_ON = 1;
+ */
+ BDK_CSR_INIT(char_ctl, node, BDK_LMCX_CHAR_CTL(lmc));
+ char_ctl.s.en = 0;
+ char_ctl.s.sel = 0;
+ char_ctl.s.dr = 1;
+ char_ctl.s.prbs = prbs;
+ char_ctl.s.skew_on = 1;
+ DRAM_CSR_WRITE(node, BDK_LMCX_CHAR_CTL(lmc), char_ctl.u);
+}
+
+int
+choose_best_hw_patterns(bdk_node_t node, int lmc, int mode)
+{
+ int new_mode = mode;
+ const char *s;
+
+ switch (mode) {
+ case DBTRAIN_TEST: // always choose LFSR if chip supports it
+ if (! CAVIUM_IS_MODEL(CAVIUM_CN88XX)) {
+ int lfsr_enable = 1;
+ if ((s = getenv("ddr_allow_lfsr"))) { // override?
+ lfsr_enable = !!strtoul(s, NULL, 0);
+ }
+ if (lfsr_enable)
+ new_mode = DBTRAIN_LFSR;
+ }
+ break;
+ case DBTRAIN_DBI: // possibly can allow LFSR use?
+ break;
+ case DBTRAIN_LFSR: // forced already
+ if (CAVIUM_IS_MODEL(CAVIUM_CN88XX)) {
+ ddr_print("ERROR: illegal HW assist mode %d\n", mode);
+ new_mode = DBTRAIN_TEST;
+ }
+ break;
+ default:
+ ddr_print("ERROR: unknown HW assist mode %d\n", mode);
+ }
+
+ if (new_mode != mode)
+ VB_PRT(VBL_DEV2, "choose_best_hw_patterns: changing mode %d to %d\n", mode, new_mode);
+
+ return new_mode;
+}
+
+int
+run_best_hw_patterns(bdk_node_t node, int lmc, uint64_t phys_addr,
+ int mode, uint64_t *xor_data)
+{
+ int pattern;
+ const uint64_t *pattern_p;
+ int errs, errors = 0;
+
+ // FIXME? always choose LFSR if chip supports it???
+ mode = choose_best_hw_patterns(node, lmc, mode);
+
+ if (mode == DBTRAIN_LFSR) {
+ setup_lfsr_pattern(node, lmc, 0);
+ errors = test_dram_byte_hw(node, lmc, phys_addr, mode, xor_data);
+ VB_PRT(VBL_DEV2, "%s: LFSR at A:0x%012lx errors 0x%x\n",
+ __FUNCTION__, phys_addr, errors);
+ } else {
+ for (pattern = 0; pattern < NUM_BYTE_PATTERNS; pattern++) {
+ pattern_p = byte_patterns[pattern];
+ setup_hw_pattern(node, lmc, pattern_p);
+
+ errs = test_dram_byte_hw(node, lmc, phys_addr, mode, xor_data);
+
+ VB_PRT(VBL_DEV2, "%s: PATTERN %d at A:0x%012lx errors 0x%x\n",
+ __FUNCTION__, pattern, phys_addr, errs);
+
+ errors |= errs;
+ } /* for (pattern = 0; pattern < NUM_BYTE_PATTERNS; pattern++) */
+ }
+ return errors;
+}
+
+static void
+hw_assist_test_dll_offset(bdk_node_t node, int dll_offset_mode,
+ int lmc, int bytelane)
+{
+ int byte_offset, new_best_offset[9];
+ int rank_delay_start[4][9];
+ int rank_delay_count[4][9];
+ int rank_delay_best_start[4][9];
+ int rank_delay_best_count[4][9];
+ int errors[4], off_errors, tot_errors;
+ int num_lmcs = __bdk_dram_get_num_lmc(node);
+ int rank_mask, rankx, active_ranks;
+ int pattern;
+ const uint64_t *pattern_p;
+ int byte;
+ char *mode_str = (dll_offset_mode == 2) ? "Read" : "Write";
+ int pat_best_offset[9];
+ uint64_t phys_addr;
+ int pat_beg, pat_end;
+ int rank_beg, rank_end;
+ int byte_lo, byte_hi;
+ uint64_t hw_rank_offset;
+ // FIXME? always choose LFSR if chip supports it???
+ int mode = choose_best_hw_patterns(node, lmc, DBTRAIN_TEST);
+
+ if (bytelane == 0x0A) { // all bytelanes
+ byte_lo = 0;
+ byte_hi = 8;
+ } else { // just 1
+ byte_lo = byte_hi = bytelane;
+ }
+
+ BDK_CSR_INIT(lmcx_config, node, BDK_LMCX_CONFIG(lmc));
+ rank_mask = lmcx_config.s.init_status;
+ // this should be correct for 1 or 2 ranks, 1 or 2 DIMMs
+ hw_rank_offset = 1ull << (28 + lmcx_config.s.pbank_lsb - lmcx_config.s.rank_ena + (num_lmcs/2));
+
+ debug_print("N%d: %s: starting LMC%d with rank offset 0x%lx\n",
+ node, __FUNCTION__, lmc, hw_rank_offset);
+
+ // start of pattern loop
+ // we do the set of tests for each pattern supplied...
+
+ memset(new_best_offset, 0, sizeof(new_best_offset));
+ for (pattern = 0; pattern < NUM_BYTE_PATTERNS; pattern++) {
+
+ memset(pat_best_offset, 0, sizeof(pat_best_offset));
+
+ if (mode == DBTRAIN_TEST) {
+ pattern_p = byte_patterns[pattern];
+ setup_hw_pattern(node, lmc, pattern_p);
+ } else {
+ setup_lfsr_pattern(node, lmc, 0);
+ }
+
+ // now loop through all legal values for the DLL byte offset...
+
+#define BYTE_OFFSET_INCR 3 // FIXME: make this tunable?
+
+ tot_errors = 0;
+
+ memset(rank_delay_count, 0, sizeof(rank_delay_count));
+ memset(rank_delay_start, 0, sizeof(rank_delay_start));
+ memset(rank_delay_best_count, 0, sizeof(rank_delay_best_count));
+ memset(rank_delay_best_start, 0, sizeof(rank_delay_best_start));
+
+ for (byte_offset = -63; byte_offset < 64; byte_offset += BYTE_OFFSET_INCR) {
+
+ // do the setup on the active LMC
+ // set the bytelanes DLL offsets
+ change_dll_offset_enable(node, lmc, 0);
+ load_dll_offset(node, lmc, dll_offset_mode, byte_offset, bytelane); // FIXME? bytelane?
+ change_dll_offset_enable(node, lmc, 1);
+
+ bdk_watchdog_poke();
+
+ // run the test on each rank
+ // only 1 call per rank should be enough, let the bursts, loops, etc, control the load...
+
+ off_errors = 0; // errors for this byte_offset, all ranks
+
+ active_ranks = 0;
+
+ for (rankx = 0; rankx < 4; rankx++) {
+ if (!(rank_mask & (1 << rankx)))
+ continue;
+
+ phys_addr = hw_rank_offset * active_ranks;
+ // FIXME: now done by test_dram_byte_hw()
+ //phys_addr |= (lmc << 7);
+ //phys_addr = bdk_numa_get_address(node, phys_addr); // map to node
+
+ active_ranks++;
+
+ // NOTE: return is a now a bitmask of the erroring bytelanes..
+ errors[rankx] = test_dram_byte_hw(node, lmc, phys_addr, mode, NULL);
+
+ for (byte = byte_lo; byte <= byte_hi; byte++) { // do bytelane(s)
+
+ // check errors
+ if (errors[rankx] & (1 << byte)) { // yes, an error in the byte lane in this rank
+ off_errors |= (1 << byte);
+
+ ddr_print5("N%d.LMC%d.R%d: Bytelane %d DLL %s Offset Test %3d: Address 0x%012lx errors 0x%x\n",
+ node, lmc, rankx, bytelane, mode_str,
+ byte_offset, phys_addr, errors[rankx]);
+
+ if (rank_delay_count[rankx][byte] > 0) { // had started run
+ ddr_print5("N%d.LMC%d.R%d: Bytelane %d DLL %s Offset Test %3d: stopping a run here\n",
+ node, lmc, rankx, bytelane, mode_str, byte_offset);
+ rank_delay_count[rankx][byte] = 0; // stop now
+ }
+ // FIXME: else had not started run - nothing else to do?
+ } else { // no error in the byte lane
+ if (rank_delay_count[rankx][byte] == 0) { // first success, set run start
+ ddr_print5("N%d.LMC%d.R%d: Bytelane %d DLL %s Offset Test %3d: starting a run here\n",
+ node, lmc, rankx, bytelane, mode_str, byte_offset);
+ rank_delay_start[rankx][byte] = byte_offset;
+ }
+ rank_delay_count[rankx][byte] += BYTE_OFFSET_INCR; // bump run length
+
+ // is this now the biggest window?
+ if (rank_delay_count[rankx][byte] > rank_delay_best_count[rankx][byte]) {
+ rank_delay_best_count[rankx][byte] = rank_delay_count[rankx][byte];
+ rank_delay_best_start[rankx][byte] = rank_delay_start[rankx][byte];
+ debug_print("N%d.LMC%d.R%d: Bytelane %d DLL %s Offset Test %3d: updating best to %d/%d\n",
+ node, lmc, rankx, bytelane, mode_str, byte_offset,
+ rank_delay_best_start[rankx][byte], rank_delay_best_count[rankx][byte]);
+ }
+ }
+ } /* for (byte = byte_lo; byte <= byte_hi; byte++) */
+ } /* for (rankx = 0; rankx < 4; rankx++) */
+
+ tot_errors |= off_errors;
+
+ } /* for (byte_offset = -63; byte_offset < 64; byte_offset += BYTE_OFFSET_INCR) */
+
+ // now choose the best byte_offsets for this pattern according to the best windows of the tested ranks
+ // calculate offset by constructing an average window from the rank windows
+ for (byte = byte_lo; byte <= byte_hi; byte++) {
+
+ pat_beg = -999;
+ pat_end = 999;
+
+ for (rankx = 0; rankx < 4; rankx++) {
+ if (!(rank_mask & (1 << rankx)))
+ continue;
+
+ rank_beg = rank_delay_best_start[rankx][byte];
+ pat_beg = max(pat_beg, rank_beg);
+ rank_end = rank_beg + rank_delay_best_count[rankx][byte] - BYTE_OFFSET_INCR;
+ pat_end = min(pat_end, rank_end);
+
+ ddr_print5("N%d.LMC%d.R%d: Bytelane %d DLL %s Offset Test: Rank Window %3d:%3d\n",
+ node, lmc, rankx, bytelane, mode_str, rank_beg, rank_end);
+
+ } /* for (rankx = 0; rankx < 4; rankx++) */
+
+ pat_best_offset[byte] = (pat_end + pat_beg) / 2;
+ ddr_print4("N%d.LMC%d: Bytelane %d DLL %s Offset Test: Pattern %d Average %3d\n",
+ node, lmc, byte, mode_str, pattern, pat_best_offset[byte]);
+
+#if 0
+ // FIXME: next print the window counts
+ sprintf(sbuffer, "N%d.LMC%d Pattern %d: DLL %s Offset Count ",
+ node, lmc, pattern, mode_str);
+ printf("%-45s : ", sbuffer);
+ printf(" %3d", byte_delay_best_count);
+ printf("\n");
+#endif
+
+ new_best_offset[byte] += pat_best_offset[byte]; // sum the pattern averages
+ } /* for (byte = byte_lo; byte <= byte_hi; byte++) */
+ } /* for (pattern = 0; pattern < NUM_BYTE_PATTERNS; pattern++) */
+ // end of pattern loop
+
+ ddr_print("N%d.LMC%d: HW DLL %s Offset Amount : ",
+ node, lmc, mode_str);
+
+ for (byte = byte_hi; byte >= byte_lo; --byte) { // print in decending byte index order
+ new_best_offset[byte] = divide_nint(new_best_offset[byte], NUM_BYTE_PATTERNS); // create the new average NINT
+
+ // print the best offsets from all patterns
+
+ if (bytelane == 0x0A) // print just the offset of all the bytes
+ ddr_print("%5d ", new_best_offset[byte]);
+ else
+ ddr_print("(byte %d) %5d ", byte, new_best_offset[byte]);
+
+
+#if 1
+ // done with testing, load up the best offsets we found...
+ change_dll_offset_enable(node, lmc, 0); // disable offsets while we load...
+ load_dll_offset(node, lmc, dll_offset_mode, new_best_offset[byte], byte);
+ change_dll_offset_enable(node, lmc, 1); // re-enable the offsets now that we are done loading
+#endif
+ } /* for (byte = byte_hi; byte >= byte_lo; --byte) */
+
+ ddr_print("\n");
+
+#if 0
+ // run the test one last time
+ // print whether there are errors or not, but only when verbose...
+ tot_errors = run_test_dram_byte_threads(node, num_lmcs, bytemask);
+ printf("N%d.LMC%d: Bytelane %d DLL %s Offset Final Test: errors 0x%x\n",
+ node, lmc, bytelane, mode_str, tot_errors);
+#endif
+}
+
+/*
+ * Automatically adjust the DLL offset for the selected bytelane using hardware-assist
+ */
+int perform_HW_dll_offset_tuning(bdk_node_t node, int dll_offset_mode, int bytelane)
+{
+ int save_ecc_ena[4];
+ bdk_lmcx_config_t lmc_config;
+ int lmc, num_lmcs = __bdk_dram_get_num_lmc(node);
+ const char *s;
+ //bdk_lmcx_comp_ctl2_t comp_ctl2;
+ int loops = 1, loop;
+
+ // see if we want to do the tuning more than once per LMC...
+ if ((s = getenv("ddr_tune_ecc_loops"))) {
+ loops = strtoul(s, NULL, 0);
+ }
+
+ // allow override of the test repeats (bursts)
+ if ((s = getenv("ddr_tune_byte_bursts")) != NULL) {
+ dram_tune_byte_bursts = strtoul(s, NULL, 10);
+ }
+
+ // print current working values
+ ddr_print2("N%d: H/W Tuning for bytelane %d will use %d loops, %d bursts, and %d patterns.\n",
+ node, bytelane, loops, dram_tune_byte_bursts,
+ NUM_BYTE_PATTERNS);
+
+ // FIXME? get flag from LMC0 only
+ lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(0));
+
+ // do once for each active LMC
+
+ for (lmc = 0; lmc < num_lmcs; lmc++) {
+
+ ddr_print4("N%d: H/W Tuning: starting LMC%d bytelane %d tune.\n", node, lmc, bytelane);
+
+ /* Enable ECC for the HW tests */
+ // NOTE: we do enable ECC, but the HW tests used will not generate "visible" errors
+ lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(lmc));
+ save_ecc_ena[lmc] = lmc_config.s.ecc_ena;
+ lmc_config.s.ecc_ena = 1;
+ DRAM_CSR_WRITE(node, BDK_LMCX_CONFIG(lmc), lmc_config.u);
+ lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(lmc));
+
+ // testing is done on a single LMC at a time
+ // FIXME: for now, loop here to show what happens multiple times
+ for (loop = 0; loop < loops; loop++) {
+ /* Perform DLL offset tuning */
+ //auto_set_dll_offset(node, 1 /* 1=write */, lmc, bytelane);
+ hw_assist_test_dll_offset(node, 2 /* 2=read */, lmc, bytelane);
+ }
+
+ // perform cleanup on active LMC
+ ddr_print4("N%d: H/W Tuning: finishing LMC%d bytelane %d tune.\n", node, lmc, bytelane);
+
+ /* Restore ECC for DRAM tests */
+ lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(lmc));
+ lmc_config.s.ecc_ena = save_ecc_ena[lmc];
+ DRAM_CSR_WRITE(node, BDK_LMCX_CONFIG(lmc), lmc_config.u);
+ lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(lmc));
+
+ // finally, see if there are any read offset overrides after tuning
+ for (int by = 0; by < 9; by++) {
+ if ((s = lookup_env_parameter("ddr%d_tune_byte%d", lmc, by)) != NULL) {
+ int dllro = strtoul(s, NULL, 10);
+ change_dll_offset_enable(node, lmc, 0);
+ load_dll_offset(node, lmc, 2 /* 2=read */, dllro, by);
+ change_dll_offset_enable(node, lmc, 1);
+ }
+ }
+
+ } /* for (lmc = 0; lmc < num_lmcs; lmc++) */
+
+ // finish up...
+
+ return 0;
+
+} /* perform_HW_dll_offset_tuning */
diff --git a/src/vendorcode/cavium/bdk/libdram/dram-util.h b/src/vendorcode/cavium/bdk/libdram/dram-util.h
new file mode 100644
index 0000000000..f8ab6c1552
--- /dev/null
+++ b/src/vendorcode/cavium/bdk/libdram/dram-util.h
@@ -0,0 +1,96 @@
+/***********************license start***********************************
+* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights
+* reserved.
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are
+* met:
+*
+* * Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials provided
+* with the distribution.
+*
+* * Neither the name of Cavium Inc. nor the names of
+* its contributors may be used to endorse or promote products
+* derived from this software without specific prior written
+* permission.
+*
+* This Software, including technical data, may be subject to U.S. export
+* control laws, including the U.S. Export Administration Act and its
+* associated regulations, and may be subject to export or import
+* regulations in other countries.
+*
+* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
+* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR
+* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT
+* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY
+* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT
+* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES
+* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR
+* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT,
+* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK
+* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
+***********************license end**************************************/
+
+/**
+ * Small utility functions for use by libdram internally. These
+ * are not meant for users's of the libdram API.
+ */
+
+/**
+ * Standard min(a,b) macro
+ */
+#define min(X, Y) \
+ ({ typeof (X) __x = (X); \
+ typeof (Y) __y = (Y); \
+ (__x < __y) ? __x : __y; })
+
+/**
+ * Standard max(a,b) macro
+ */
+#define max(X, Y) \
+ ({ typeof (X) __x = (X); typeof(Y) __y = (Y); \
+ (__x > __y) ? __x : __y; })
+
+/**
+ * Absolute value of an integer
+ *
+ * @param v
+ *
+ * @return
+ */
+static inline int64_t _abs(int64_t v)
+{
+ return (v < 0) ? -v : v;
+}
+
+/**
+ * Sign of an integer
+ *
+ * @param v
+ *
+ * @return
+ */
+static inline int64_t _sign(int64_t v)
+{
+ return v < 0;
+}
+
+/**
+ * Divide and round results up to the next higher integer.
+ *
+ * @param dividend
+ * @param divisor
+ *
+ * @return
+ */
+static inline uint64_t divide_roundup(uint64_t dividend, uint64_t divisor)
+{
+ return (dividend + divisor - 1) / divisor;
+}
+
diff --git a/src/vendorcode/cavium/bdk/libdram/lib_octeon_shared.c b/src/vendorcode/cavium/bdk/libdram/lib_octeon_shared.c
new file mode 100644
index 0000000000..cdc799744f
--- /dev/null
+++ b/src/vendorcode/cavium/bdk/libdram/lib_octeon_shared.c
@@ -0,0 +1,2165 @@
+/***********************license start***********************************
+* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights
+* reserved.
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are
+* met:
+*
+* * Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials provided
+* with the distribution.
+*
+* * Neither the name of Cavium Inc. nor the names of
+* its contributors may be used to endorse or promote products
+* derived from this software without specific prior written
+* permission.
+*
+* This Software, including technical data, may be subject to U.S. export
+* control laws, including the U.S. Export Administration Act and its
+* associated regulations, and may be subject to export or import
+* regulations in other countries.
+*
+* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
+* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR
+* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT
+* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY
+* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT
+* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES
+* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR
+* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT,
+* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK
+* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
+***********************license end**************************************/
+
+/* $Revision: 102369 $ */
+
+#include <bdk.h>
+#include "libbdk-arch/bdk-csrs-l2c.h"
+#include "dram-internal.h"
+
+/* Define DDR_DEBUG to debug the DDR interface. This also enables the
+** output necessary for review by Cavium Inc., Inc. */
+/* #define DDR_DEBUG */
+
+
+static int global_ddr_clock_initialized = 0;
+static int global_ddr_memory_preserved = 0;
+
+#if 1
+uint64_t max_p1 = 0UL;
+#endif
+
+/*
+ * SDRAM Physical Address (figure 6-2 from the HRM)
+ * 7 6 3 2 0
+ * +---------+----+----------------------+---------------+--------+---+------+-----+
+ * | Dimm |Rank| Row | Col | Bank | C | Col | Bus |
+ * +---------+----+----------------------+---------------+--------+---+------+-----+
+ * | ^ | | ^ | | |
+ * 0 or 1 | | 12-18 bits | 6-8 bits | 1 or 2 bits
+ * bit | 0 or 1 bit LMC_CONFIG[ROW_LSB]+X | (X=1 or 2, resp)
+ * | |
+ * LMC_CONFIG[PBANK_LSB]+X 3 or 4 bits
+ *
+ * Bus = Selects the byte on the 72-bit DDR3 bus
+ * Col = Column Address for the memory part (10-12 bits)
+ * C = Selects the LMC that services the reference
+ * (2 bits for 4 LMC mode, 1 bit for 2 LMC mode; X=width)
+ * Bank = Bank Address for the memory part (DDR3=3 bits, DDR4=3 or 4 bits)
+ * Row = Row Address for the memory part (12-18 bits)
+ * Rank = Optional Rank Address for dual-rank DIMMs
+ * (present when LMC_CONFIG[RANK_ENA] is set)
+ * Dimm = Optional DIMM address (preseent with more than 1 DIMM)
+ */
+
+
+/**
+ * Divide and round results to the nearest integer.
+ *
+ * @param dividend
+ * @param divisor
+ *
+ * @return
+ */
+uint64_t divide_nint(uint64_t dividend, uint64_t divisor)
+{
+ uint64_t quotent, remainder;
+ quotent = dividend / divisor;
+ remainder = dividend % divisor;
+ return quotent + ((remainder * 2) >= divisor);
+}
+
+/* Sometimes the pass/fail results for all possible delay settings
+ * determined by the read-leveling sequence is too forgiving. This
+ * usually occurs for DCLK speeds below 300 MHz. As a result the
+ * passing range is exaggerated. This function accepts the bitmask
+ * results from the sequence and truncates the passing range to a
+ * reasonable range and recomputes the proper deskew setting.
+ */
+
+/* Default ODT config must disable ODT */
+/* Must be const (read only) so that the structure is in flash */
+const dimm_odt_config_t disable_odt_config[] = {
+ /* DDR4 needs an additional field in the struct (odt_mask2) */
+ /* DIMMS ODT_ENA ODT_MASK ODT_MASK1 ODT_MASK2 QS_DIC RODT_CTL */
+ /* ===== ======= ======== ========= ========= ====== ======== */
+ /* 1 */ { 0, 0x0000, {.u = 0x0000}, {.u = 0x0000}, 0, 0x0000 },
+ /* 2 */ { 0, 0x0000, {.u = 0x0000}, {.u = 0x0000}, 0, 0x0000 },
+ /* 3 */ { 0, 0x0000, {.u = 0x0000}, {.u = 0x0000}, 0, 0x0000 },
+ /* 4 */ { 0, 0x0000, {.u = 0x0000}, {.u = 0x0000}, 0, 0x0000 },
+};
+/* Memory controller setup function */
+static int init_octeon_dram_interface(bdk_node_t node,
+ const ddr_configuration_t *ddr_configuration,
+ uint32_t ddr_hertz,
+ uint32_t cpu_hertz,
+ uint32_t ddr_ref_hertz,
+ int board_type,
+ int board_rev_maj,
+ int board_rev_min,
+ int ddr_interface_num,
+ uint32_t ddr_interface_mask)
+{
+ uint32_t mem_size_mbytes = 0;
+ int lmc_restart_retries = 0;
+
+ const char *s;
+ if ((s = lookup_env_parameter("ddr_timing_hertz")) != NULL)
+ ddr_hertz = strtoul(s, NULL, 0);
+
+ restart_lmc_init:
+
+ /* Poke the watchdog timer so it doesn't expire during DRAM init */
+ bdk_watchdog_poke();
+
+ mem_size_mbytes = init_octeon3_ddr3_interface(node,
+ ddr_configuration,
+ ddr_hertz,
+ cpu_hertz,
+ ddr_ref_hertz,
+ board_type,
+ board_rev_maj,
+ board_rev_min,
+ ddr_interface_num,
+ ddr_interface_mask);
+#define DEFAULT_RESTART_RETRIES 3
+ if (mem_size_mbytes == 0) { // means restart is possible
+ if (lmc_restart_retries < DEFAULT_RESTART_RETRIES) {
+ lmc_restart_retries++;
+ ddr_print("N%d.LMC%d Configuration problem: attempting LMC reset and init restart %d\n",
+ node, ddr_interface_num, lmc_restart_retries);
+ // re-assert RESET first, as that is the assumption of the init code
+ if (!ddr_memory_preserved(node))
+ cn88xx_lmc_ddr3_reset(node, ddr_interface_num, LMC_DDR3_RESET_ASSERT);
+ goto restart_lmc_init;
+ } else {
+ error_print("INFO: N%d.LMC%d Configuration: fatal problem remains after %d LMC init retries - Resetting node...\n",
+ node, ddr_interface_num, lmc_restart_retries);
+ bdk_wait_usec(500000);
+ bdk_reset_chip(node);
+ }
+ }
+
+ error_print("N%d.LMC%d Configuration Completed: %d MB\n",
+ node, ddr_interface_num, mem_size_mbytes);
+ return mem_size_mbytes;
+}
+
+#define DO_LIKE_RANDOM_XOR 1
+
+#if !DO_LIKE_RANDOM_XOR
+/*
+ * Suggested testing patterns.
+ *
+ * 0xFFFF_FFFF_FFFF_FFFF
+ * 0xAAAA_AAAA_AAAA_AAAA
+ * 0xFFFF_FFFF_FFFF_FFFF
+ * 0xAAAA_AAAA_AAAA_AAAA
+ * 0x5555_5555_5555_5555
+ * 0xAAAA_AAAA_AAAA_AAAA
+ * 0xFFFF_FFFF_FFFF_FFFF
+ * 0xAAAA_AAAA_AAAA_AAAA
+ * 0xFFFF_FFFF_FFFF_FFFF
+ * 0x5555_5555_5555_5555
+ * 0xFFFF_FFFF_FFFF_FFFF
+ * 0x5555_5555_5555_5555
+ * 0xAAAA_AAAA_AAAA_AAAA
+ * 0x5555_5555_5555_5555
+ * 0xFFFF_FFFF_FFFF_FFFF
+ * 0x5555_5555_5555_5555
+ *
+ * or possibly
+ *
+ * 0xFDFD_FDFD_FDFD_FDFD
+ * 0x8787_8787_8787_8787
+ * 0xFEFE_FEFE_FEFE_FEFE
+ * 0xC3C3_C3C3_C3C3_C3C3
+ * 0x7F7F_7F7F_7F7F_7F7F
+ * 0xE1E1_E1E1_E1E1_E1E1
+ * 0xBFBF_BFBF_BFBF_BFBF
+ * 0xF0F0_F0F0_F0F0_F0F0
+ * 0xDFDF_DFDF_DFDF_DFDF
+ * 0x7878_7878_7878_7878
+ * 0xEFEF_EFEF_EFEF_EFEF
+ * 0x3C3C_3C3C_3C3C_3C3C
+ * 0xF7F7_F7F7_F7F7_F7F7
+ * 0x1E1E_1E1E_1E1E_1E1E
+ * 0xFBFB_FBFB_FBFB_FBFB
+ * 0x0F0F_0F0F_0F0F_0F0F
+ */
+
+static const uint64_t test_pattern[] = {
+ 0xAAAAAAAAAAAAAAAAULL,
+ 0x5555555555555555ULL,
+ 0xAAAAAAAAAAAAAAAAULL,
+ 0x5555555555555555ULL,
+ 0xAAAAAAAAAAAAAAAAULL,
+ 0x5555555555555555ULL,
+ 0xAAAAAAAAAAAAAAAAULL,
+ 0x5555555555555555ULL,
+ 0xAAAAAAAAAAAAAAAAULL,
+ 0x5555555555555555ULL,
+ 0xAAAAAAAAAAAAAAAAULL,
+ 0x5555555555555555ULL,
+ 0xAAAAAAAAAAAAAAAAULL,
+ 0x5555555555555555ULL,
+ 0xAAAAAAAAAAAAAAAAULL,
+ 0x5555555555555555ULL,
+ 0xAAAAAAAAAAAAAAAAULL,
+ 0x5555555555555555ULL,
+ 0xAAAAAAAAAAAAAAAAULL,
+ 0x5555555555555555ULL,
+ 0xAAAAAAAAAAAAAAAAULL,
+ 0x5555555555555555ULL,
+ 0xAAAAAAAAAAAAAAAAULL,
+ 0x5555555555555555ULL,
+ 0xAAAAAAAAAAAAAAAAULL,
+ 0x5555555555555555ULL,
+ 0xAAAAAAAAAAAAAAAAULL,
+ 0x5555555555555555ULL,
+ 0xAAAAAAAAAAAAAAAAULL,
+ 0x5555555555555555ULL,
+ 0xAAAAAAAAAAAAAAAAULL,
+ 0x5555555555555555ULL,
+};
+#endif /* !DO_LIKE_RANDOM_XOR */
+
+int test_dram_byte(bdk_node_t node, int lmc, uint64_t p, uint64_t bitmask, uint64_t *xor_data)
+{
+ uint64_t p1, p2, d1, d2;
+ uint64_t v, v1;
+ uint64_t p2offset = 0x10000000;
+ uint64_t datamask;
+ uint64_t xor;
+ int i, j, k;
+ int errors = 0;
+ int index;
+#if DO_LIKE_RANDOM_XOR
+ uint64_t pattern1 = bdk_rng_get_random64();
+ uint64_t this_pattern;
+#endif
+ uint64_t bad_bits[2] = {0,0};
+
+ // When doing in parallel, the caller must provide full 8-byte bitmask.
+ // Byte lanes may be clear in the mask to indicate no testing on that lane.
+ datamask = bitmask;
+
+ // final address must include LMC and node
+ p |= (lmc<<7); /* Map address into proper interface */
+ p = bdk_numa_get_address(node, p); /* Map to node */
+
+ // Not on THUNDER: p |= 1ull<<63;
+
+ /* Add offset to both test regions to not clobber boot stuff
+ * when running from L2.
+ */
+ p += 0x10000000; // FIXME? was: 0x4000000; // make sure base is out of the way of boot
+
+ /* The loop ranges and increments walk through a range of addresses avoiding bits that alias
+ * to different memory interfaces (LMCs) on the CN88XX; ie we want to limit activity to a
+ * single memory channel.
+ */
+
+ /* Store something into each location first */
+ // NOTE: the ordering of loops is purposeful: fill full cachelines and flush
+ for (k = 0; k < (1 << 20); k += (1 << 14)) {
+ for (j = 0; j < (1 << 12); j += (1 << 9)) {
+ for (i = 0; i < (1 << 7); i += 8) {
+ index = i + j + k;
+ p1 = p + index;
+ p2 = p1 + p2offset;
+
+#if DO_LIKE_RANDOM_XOR
+ v = pattern1 * p1;
+ v1 = v; // write the same thing to both areas
+#else
+ v = 0ULL;
+ v1 = v;
+#endif
+ __bdk_dram_write64(p1, v);
+ __bdk_dram_write64(p2, v1);
+
+ /* Write back and invalidate the cache lines
+ *
+ * For OCX we cannot limit the number of L2 ways
+ * so instead we just write back and invalidate
+ * the L2 cache lines. This is not possible
+ * when booting remotely, however so this is
+ * only enabled for U-Boot right now.
+ * Potentially the BDK can also take advantage
+ * of this.
+ */
+ BDK_CACHE_WBI_L2(p1);
+ BDK_CACHE_WBI_L2(p2);
+ }
+ }
+ }
+
+ BDK_DCACHE_INVALIDATE;
+
+#if DO_LIKE_RANDOM_XOR
+ this_pattern = bdk_rng_get_random64();
+#endif
+
+ // modify the contents of each location in some way
+ // NOTE: the ordering of loops is purposeful: modify full cachelines and flush
+ for (k = 0; k < (1 << 20); k += (1 << 14)) {
+ for (j = 0; j < (1 << 12); j += (1 << 9)) {
+ for (i = 0; i < (1 << 7); i += 8) {
+ index = i + j + k;
+ p1 = p + index;
+ p2 = p1 + p2offset;
+#if DO_LIKE_RANDOM_XOR
+ v = __bdk_dram_read64(p1) ^ this_pattern;
+ v1 = __bdk_dram_read64(p2) ^ this_pattern;
+#else
+ v = test_pattern[index%(sizeof(test_pattern)/sizeof(uint64_t))];
+ v &= datamask;
+ v1 = ~v;
+#endif
+
+ debug_print("[0x%016llX]: 0x%016llX, [0x%016llX]: 0x%016llX\n",
+ p1, v, p2, v1);
+
+ __bdk_dram_write64(p1, v);
+ __bdk_dram_write64(p2, v1);
+
+ /* Write back and invalidate the cache lines
+ *
+ * For OCX we cannot limit the number of L2 ways
+ * so instead we just write back and invalidate
+ * the L2 cache lines. This is not possible
+ * when booting remotely, however so this is
+ * only enabled for U-Boot right now.
+ * Potentially the BDK can also take advantage
+ * of this.
+ */
+ BDK_CACHE_WBI_L2(p1);
+ BDK_CACHE_WBI_L2(p2);
+ }
+ }
+ }
+
+ BDK_DCACHE_INVALIDATE;
+
+ // test the contents of each location by predicting what should be there
+ // NOTE: the ordering of loops is purposeful: test full cachelines to detect
+ // an error occuring in any slot thereof
+ for (k = 0; k < (1 << 20); k += (1 << 14)) {
+ for (j = 0; j < (1 << 12); j += (1 << 9)) {
+ for (i = 0; i < (1 << 7); i += 8) {
+ index = i + j + k;
+ p1 = p + index;
+ p2 = p1 + p2offset;
+#if DO_LIKE_RANDOM_XOR
+ v = (p1 * pattern1) ^ this_pattern; // FIXME: this should predict what we find...???
+ d1 = __bdk_dram_read64(p1);
+ d2 = __bdk_dram_read64(p2);
+#else
+ v = test_pattern[index%(sizeof(test_pattern)/sizeof(uint64_t))];
+ d1 = __bdk_dram_read64(p1);
+ d2 = ~__bdk_dram_read64(p2);
+#endif
+ debug_print("[0x%016llX]: 0x%016llX, [0x%016llX]: 0x%016llX\n",
+ p1, d1, p2, d2);
+
+ xor = ((d1 ^ v) | (d2 ^ v)) & datamask; // union of error bits only in active byte lanes
+
+ if (!xor)
+ continue;
+
+ // accumulate bad bits
+ bad_bits[0] |= xor;
+ //bad_bits[1] |= ~mpr_data1 & 0xffUL; // cannot do ECC here
+
+ int bybit = 1;
+ uint64_t bymsk = 0xffULL; // start in byte lane 0
+ while (xor != 0) {
+ debug_print("ERROR: [0x%016llX] [0x%016llX] expected 0x%016llX xor %016llX\n",
+ p1, p2, v, xor);
+ if (xor & bymsk) { // error(s) in this lane
+ errors |= bybit; // set the byte error bit
+ xor &= ~bymsk; // clear byte lane in error bits
+ datamask &= ~bymsk; // clear the byte lane in the mask
+ if (datamask == 0) { // nothing left to do
+ goto done_now; // completely done when errors found in all byte lanes in datamask
+ }
+ }
+ bymsk <<= 8; // move mask into next byte lane
+ bybit <<= 1; // move bit into next byte position
+ }
+ }
+ }
+ }
+
+ done_now:
+ if (xor_data != NULL) { // send the bad bits back...
+ xor_data[0] = bad_bits[0];
+ xor_data[1] = bad_bits[1]; // let it be zeroed
+ }
+ return errors;
+}
+
+// NOTE: "mode" argument:
+// DBTRAIN_TEST: for testing using GP patterns, includes ECC
+// DBTRAIN_DBI: for DBI deskew training behavior (uses GP patterns)
+// DBTRAIN_LFSR: for testing using LFSR patterns, includes ECC
+// NOTE: trust the caller to specify the correct/supported mode
+//
+int test_dram_byte_hw(bdk_node_t node, int ddr_interface_num,
+ uint64_t p, int mode, uint64_t *xor_data)
+{
+ uint64_t p1;
+ uint64_t k;
+ int errors = 0;
+
+ uint64_t mpr_data0, mpr_data1;
+ uint64_t bad_bits[2] = {0,0};
+
+ int node_address, lmc, dimm;
+ int prank, lrank;
+ int bank, row, col;
+ int save_or_dis;
+ int byte;
+ int ba_loop, ba_bits;
+
+ bdk_lmcx_rlevel_ctl_t rlevel_ctl;
+ bdk_lmcx_dbtrain_ctl_t dbtrain_ctl;
+
+ int bank_errs;
+
+ // FIXME: K iterations set to 4 for now.
+ // FIXME: decrement to increase interations.
+ // FIXME: must be no less than 22 to stay above an LMC hash field.
+ int kshift = 26;
+ const char *s;
+
+ // allow override default setting for kshift
+ if ((s = getenv("ddr_tune_set_kshift")) != NULL) {
+ int temp = strtoul(s, NULL, 0);
+ if ((temp < 22) || (temp > 27)) {
+ ddr_print("N%d.LMC%d: ILLEGAL override of kshift to %d, using default %d\n",
+ node, ddr_interface_num, temp, kshift);
+ } else {
+ VB_PRT(VBL_DEV2, "N%d.LMC%d: overriding kshift (%d) to %d\n",
+ node, ddr_interface_num, kshift, temp);
+ kshift = temp;
+ }
+ }
+
+ /*
+ 1) Make sure that RLEVEL_CTL[OR_DIS] = 0.
+ */
+ rlevel_ctl.u = BDK_CSR_READ(node, BDK_LMCX_RLEVEL_CTL(ddr_interface_num));
+ save_or_dis = rlevel_ctl.s.or_dis;
+ rlevel_ctl.s.or_dis = 0; /* or_dis must be disabled for this sequence */
+ DRAM_CSR_WRITE(node, BDK_LMCX_RLEVEL_CTL(ddr_interface_num), rlevel_ctl.u);
+
+ /*
+ NOTE: this step done in the calling routine(s)
+ 3) Setup GENERAL_PURPOSE[0-2] registers with the data pattern of choice.
+ a. GENERAL_PURPOSE0[DATA<63:0>] – sets the initial lower (rising edge) 64 bits of data.
+ b. GENERAL_PURPOSE1[DATA<63:0>] – sets the initial upper (falling edge) 64 bits of data.
+ c. GENERAL_PURPOSE2[DATA<15:0>] – sets the initial lower (rising edge <7:0>) and upper
+ (falling edge <15:8>) ECC data.
+ */
+
+ // final address must include LMC and node
+ p |= (ddr_interface_num << 7); /* Map address into proper interface */
+ p = bdk_numa_get_address(node, p); /* Map to node */
+
+ /*
+ * Add base offset to both test regions to not clobber u-boot stuff
+ * when running from L2 for NAND boot.
+ */
+ p += 0x10000000; // offset to 256MB
+
+ errors = 0;
+
+ bdk_dram_address_extract_info(p, &node_address, &lmc, &dimm, &prank, &lrank, &bank, &row, &col);
+ VB_PRT(VBL_DEV2, "test_dram_byte_hw: START at A:0x%012lx, N%d L%d D%d R%d/%d B%1x Row:%05x Col:%05x\n",
+ p, node_address, lmc, dimm, prank, lrank, bank, row, col);
+
+ // only check once per call, and ignore if no match...
+ if ((int)node != node_address) {
+ error_print("ERROR: Node address mismatch; ignoring...\n");
+ return 0;
+ }
+ if (lmc != ddr_interface_num) {
+ error_print("ERROR: LMC address mismatch\n");
+ return 0;
+ }
+
+ /*
+ 7) Set PHY_CTL[PHY_RESET] = 1 (LMC automatically clears this as it’s a one-shot operation).
+ This is to get into the habit of resetting PHY’s SILO to the original 0 location.
+ */
+ BDK_CSR_MODIFY(phy_ctl, node, BDK_LMCX_PHY_CTL(ddr_interface_num),
+ phy_ctl.s.phy_reset = 1);
+
+ /* Walk through a range of addresses avoiding bits that alias
+ * interfaces on the CN88XX.
+ */
+
+ // FIXME: want to try to keep the K increment from affecting the LMC via hash,
+ // FIXME: so keep it above bit 21
+ // NOTE: we also want to keep k less than the base offset of bit 28 (256MB)
+
+ for (k = 0; k < (1UL << 28); k += (1UL << kshift)) {
+
+ // FIXME: the sequence will interate over 1/2 cacheline
+ // FIXME: for each unit specified in "read_cmd_count",
+ // FIXME: so, we setup each sequence to do the max cachelines it can
+
+ p1 = p + k;
+
+ bdk_dram_address_extract_info(p1, &node_address, &lmc, &dimm, &prank, &lrank, &bank, &row, &col);
+ VB_PRT(VBL_DEV3, "test_dram_byte_hw: NEXT interation at A:0x%012lx, N%d L%d D%d R%d/%d B%1x Row:%05x Col:%05x\n",
+ p1, node_address, lmc, dimm, prank, lrank, bank, row, col);
+
+ /*
+ 2) Setup the fields of the CSR DBTRAIN_CTL as follows:
+ a. COL, ROW, BA, BG, PRANK points to the starting point of the address.
+ You can just set them to all 0.
+ b. RW_TRAIN – set this to 1.
+ c. TCCD_L – set this to 0.
+ d. READ_CMD_COUNT – instruct the sequence to the how many writes/reads.
+ It is 5 bits field, so set to 31 of maximum # of r/w.
+ */
+ dbtrain_ctl.u = BDK_CSR_READ(node, BDK_LMCX_DBTRAIN_CTL(ddr_interface_num));
+ dbtrain_ctl.s.column_a = col;
+ dbtrain_ctl.s.row_a = row;
+ dbtrain_ctl.s.bg = (bank >> 2) & 3;
+ dbtrain_ctl.s.prank = (dimm * 2) + prank; // FIXME?
+ dbtrain_ctl.s.lrank = lrank; // FIXME?
+ dbtrain_ctl.s.activate = (mode == DBTRAIN_DBI);
+ dbtrain_ctl.s.write_ena = 1;
+ dbtrain_ctl.s.read_cmd_count = 31; // max count pass 1.x
+ if (! CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X)) // added 81xx and 83xx
+ dbtrain_ctl.s.cmd_count_ext = 3; // max count pass 2.x
+ else
+ dbtrain_ctl.s.cmd_count_ext = 0; // max count pass 1.x
+ dbtrain_ctl.s.rw_train = 1;
+ dbtrain_ctl.s.tccd_sel = (mode == DBTRAIN_DBI);
+
+ // LFSR should only be on when chip supports it...
+ dbtrain_ctl.s.lfsr_pattern_sel = (mode == DBTRAIN_LFSR) ? 1 : 0;
+
+ bank_errs = 0;
+
+ // for each address, iterate over the 4 "banks" in the BA
+ for (ba_loop = 0, ba_bits = bank & 3;
+ ba_loop < 4;
+ ba_loop++, ba_bits = (ba_bits + 1) & 3)
+ {
+ dbtrain_ctl.s.ba = ba_bits;
+ DRAM_CSR_WRITE(node, BDK_LMCX_DBTRAIN_CTL(ddr_interface_num), dbtrain_ctl.u);
+
+ VB_PRT(VBL_DEV3, "test_dram_byte_hw: DBTRAIN: Pr:%d Lr:%d Bg:%d Ba:%d Row:%05x Col:%05x\n",
+ dbtrain_ctl.s.prank, dbtrain_ctl.s.lrank,
+ dbtrain_ctl.s.bg, dbtrain_ctl.s.ba, row, col);
+ /*
+ 4) Kick off the sequence (SEQ_CTL[SEQ_SEL] = 14, SEQ_CTL[INIT_START] = 1).
+ 5) Poll on SEQ_CTL[SEQ_COMPLETE] for completion.
+ */
+ perform_octeon3_ddr3_sequence(node, prank, ddr_interface_num, 14);
+
+ /*
+ 6) Read MPR_DATA0 and MPR_DATA1 for results:
+ a. MPR_DATA0[MPR_DATA<63:0>] – comparison results for DQ63:DQ0.
+ (1 means MATCH, 0 means FAIL).
+ b. MPR_DATA1[MPR_DATA<7:0>] – comparison results for ECC bit7:0.
+ */
+ mpr_data0 = BDK_CSR_READ(node, BDK_LMCX_MPR_DATA0(ddr_interface_num));
+ mpr_data1 = BDK_CSR_READ(node, BDK_LMCX_MPR_DATA1(ddr_interface_num));
+
+ /*
+ 7) Set PHY_CTL[PHY_RESET] = 1 (LMC automatically clears this as it’s a one-shot operation).
+ This is to get into the habit of resetting PHY’s SILO to the original 0 location.
+ */
+ BDK_CSR_MODIFY(phy_ctl, node, BDK_LMCX_PHY_CTL(ddr_interface_num),
+ phy_ctl.s.phy_reset = 1);
+
+ if (mode == DBTRAIN_DBI)
+ continue; // bypass any error checking or updating when DBI mode
+
+ // data bytes
+ if (~mpr_data0) {
+ for (byte = 0; byte < 8; byte++) {
+ if ((~mpr_data0 >> (8 * byte)) & 0xffUL)
+ bank_errs |= (1 << byte);
+ }
+ // accumulate bad bits
+ bad_bits[0] |= ~mpr_data0;
+ }
+
+ // include ECC byte errors
+ if (~mpr_data1 & 0xffUL) {
+ bank_errs |= (1 << 8);
+ bad_bits[1] |= ~mpr_data1 & 0xffUL;
+ }
+
+ } /* for (int ba_loop = 0; ba_loop < 4; ba_loop++) */
+
+ errors |= bank_errs;
+
+ } /* end for (k=...) */
+
+ rlevel_ctl.s.or_dis = save_or_dis;
+ DRAM_CSR_WRITE(node, BDK_LMCX_RLEVEL_CTL(ddr_interface_num), rlevel_ctl.u);
+
+ if ((mode != DBTRAIN_DBI) && (xor_data != NULL)) { // send the bad bits back...
+ xor_data[0] = bad_bits[0];
+ xor_data[1] = bad_bits[1];
+ }
+
+ return errors;
+}
+
+static void set_ddr_memory_preserved(bdk_node_t node)
+{
+ global_ddr_memory_preserved |= 0x1 << node;
+
+}
+int ddr_memory_preserved(bdk_node_t node)
+{
+ return (global_ddr_memory_preserved & (0x1 << node)) != 0;
+}
+
+void perform_ddr_init_sequence(bdk_node_t node, int rank_mask,
+ int ddr_interface_num)
+{
+ const char *s;
+ int ddr_init_loops = 1;
+ int rankx;
+
+ if ((s = lookup_env_parameter("ddr%d_init_loops", ddr_interface_num)) != NULL)
+ ddr_init_loops = strtoul(s, NULL, 0);
+
+ while (ddr_init_loops--) {
+ for (rankx = 0; rankx < 8; rankx++) {
+ if (!(rank_mask & (1 << rankx)))
+ continue;
+
+ perform_octeon3_ddr3_sequence(node, (1 << rankx),
+ ddr_interface_num, 0); /* power-up/init */
+
+ bdk_wait_usec(1000); /* Wait a while. */
+
+ if ((s = lookup_env_parameter("ddr_sequence1")) != NULL) {
+ int sequence1;
+ sequence1 = strtoul(s, NULL, 0);
+ perform_octeon3_ddr3_sequence(node, (1 << rankx),
+ ddr_interface_num, sequence1);
+ }
+
+ if ((s = lookup_env_parameter("ddr_sequence2")) != NULL) {
+ int sequence2;
+ sequence2 = strtoul(s, NULL, 0);
+ perform_octeon3_ddr3_sequence(node, (1 << rankx),
+ ddr_interface_num, sequence2);
+ }
+ }
+ }
+}
+
+static void set_ddr_clock_initialized(bdk_node_t node, int ddr_interface, int inited_flag)
+{
+ int bit = node * 8 + ddr_interface;
+ if (inited_flag)
+ global_ddr_clock_initialized |= (0x1 << bit);
+ else
+ global_ddr_clock_initialized &= ~(0x1 << bit);
+}
+static int ddr_clock_initialized(bdk_node_t node, int ddr_interface)
+{
+ int bit = node * 8 + ddr_interface;
+ return (!!(global_ddr_clock_initialized & (0x1 << bit)));
+}
+
+
+static void cn78xx_lmc_dreset_init (bdk_node_t node, int ddr_interface_num)
+{
+ /*
+ * This is the embodiment of the 6.9.4 LMC DRESET Initialization section below.
+ *
+ * The remainder of this section describes the sequence for LMCn.
+ *
+ * 1. If not done already, write LMC(0..3)_DLL_CTL2 to its reset value
+ * (except without changing the LMC(0..3)_DLL_CTL2[INTF_EN] value from
+ * that set in the prior Step 3), including LMC(0..3)_DLL_CTL2[DRESET] = 1.
+ *
+ * 2. Without changing any other LMC(0..3)_DLL_CTL2 fields, write
+ * LMC(0..3)_DLL_CTL2[DLL_BRINGUP] = 1.
+ */
+
+ DRAM_CSR_MODIFY(c, node, BDK_LMCX_DLL_CTL2(ddr_interface_num),
+ c.s.dll_bringup = 1);
+
+ /*
+ * 3. Read LMC(0..3)_DLL_CTL2 and wait for the result.
+ */
+
+ BDK_CSR_READ(node, BDK_LMCX_DLL_CTL2(ddr_interface_num));
+
+ /*
+ * 4. Wait for a minimum of 10 LMC CK cycles.
+ */
+
+ bdk_wait_usec(1);
+
+ /*
+ * 5. Without changing any other fields in LMC(0..3)_DLL_CTL2, write
+ * LMC(0..3)_DLL_CTL2[QUAD_DLL_ENA] = 1.
+ * LMC(0..3)_DLL_CTL2[QUAD_DLL_ENA] must not change after this point
+ * without restarting the LMCn DRESET initialization sequence.
+ */
+
+ DRAM_CSR_MODIFY(c, node, BDK_LMCX_DLL_CTL2(ddr_interface_num),
+ c.s.quad_dll_ena = 1);
+
+ /*
+ * 6. Read LMC(0..3)_DLL_CTL2 and wait for the result.
+ */
+
+ BDK_CSR_READ(node, BDK_LMCX_DLL_CTL2(ddr_interface_num));
+
+ /*
+ * 7. Wait a minimum of 10 us.
+ */
+
+ bdk_wait_usec(10);
+
+ /*
+ * 8. Without changing any other fields in LMC(0..3)_DLL_CTL2, write
+ * LMC(0..3)_DLL_CTL2[DLL_BRINGUP] = 0.
+ * LMC(0..3)_DLL_CTL2[DLL_BRINGUP] must not change after this point
+ * without restarting the LMCn DRESET initialization sequence.
+ */
+
+ DRAM_CSR_MODIFY(c, node, BDK_LMCX_DLL_CTL2(ddr_interface_num),
+ c.s.dll_bringup = 0);
+
+ /*
+ * 9. Read LMC(0..3)_DLL_CTL2 and wait for the result.
+ */
+
+ BDK_CSR_READ(node, BDK_LMCX_DLL_CTL2(ddr_interface_num));
+
+ /*
+ * 10. Without changing any other fields in LMC(0..3)_DLL_CTL2, write
+ * LMC(0..3)_DLL_CTL2[DRESET] = 0.
+ * LMC(0..3)_DLL_CTL2[DRESET] must not change after this point without
+ * restarting the LMCn DRESET initialization sequence.
+ *
+ * After completing LMCn DRESET initialization, all LMC CSRs may be
+ * accessed. Prior to completing LMC DRESET initialization, only
+ * LMC(0..3)_DDR_PLL_CTL, LMC(0..3)_DLL_CTL2, LMC(0..3)_RESET_CTL, and
+ * LMC(0..3)_COMP_CTL2 LMC CSRs can be accessed.
+ */
+
+ DRAM_CSR_MODIFY(c, node, BDK_LMCX_DLL_CTL2(ddr_interface_num),
+ c.s.dreset = 0);
+
+ /*
+ * NEW STEP - necessary for O73, O78 P2.0, O75, and T88 P2.0
+ * McBuggin: #24821
+ *
+ * 11. Wait for a minimum of 10 LMC CK cycles.
+ */
+
+ bdk_wait_usec(1);
+}
+
+/*static*/ void cn88xx_lmc_ddr3_reset(bdk_node_t node, int ddr_interface_num, int reset)
+{
+ /*
+ * 4. Deassert DDRn_RESET_L pin by writing LMC(0..3)_RESET_CTL[DDR3RST] = 1
+ * without modifying any other LMC(0..3)_RESET_CTL fields.
+ * 5. Read LMC(0..3)_RESET_CTL and wait for the result.
+ * 6. Wait a minimum of 500us. This guarantees the necessary T = 500us
+ * delay between DDRn_RESET_L deassertion and DDRn_DIMM*_CKE* assertion.
+ */
+ ddr_print("LMC%d %s DDR_RESET_L\n", ddr_interface_num,
+ (reset == LMC_DDR3_RESET_DEASSERT) ? "De-asserting" : "Asserting");
+ DRAM_CSR_MODIFY(c, node, BDK_LMCX_RESET_CTL(ddr_interface_num),
+ c.cn8.ddr3rst = reset);
+ BDK_CSR_READ(node, BDK_LMCX_RESET_CTL(ddr_interface_num));
+ bdk_wait_usec(500);
+}
+
+int initialize_ddr_clock(bdk_node_t node,
+ const ddr_configuration_t *ddr_configuration,
+ uint32_t cpu_hertz,
+ uint32_t ddr_hertz,
+ uint32_t ddr_ref_hertz,
+ int ddr_interface_num,
+ uint32_t ddr_interface_mask
+ )
+{
+ const char *s;
+
+ if (ddr_clock_initialized(node, ddr_interface_num))
+ return 0;
+
+ if (!ddr_clock_initialized(node, 0)) { /* Do this once */
+ int i;
+ bdk_lmcx_reset_ctl_t reset_ctl;
+ /* Check to see if memory is to be preserved and set global flag */
+ for (i=3; i>=0; --i) {
+ if ((ddr_interface_mask & (1 << i)) == 0)
+ continue;
+ reset_ctl.u = BDK_CSR_READ(node, BDK_LMCX_RESET_CTL(i));
+ if (reset_ctl.s.ddr3psv == 1) {
+ ddr_print("LMC%d Preserving memory\n", i);
+ set_ddr_memory_preserved(node);
+
+ /* Re-initialize flags */
+ reset_ctl.cn8.ddr3pwarm = 0;
+ reset_ctl.cn8.ddr3psoft = 0;
+ reset_ctl.s.ddr3psv = 0;
+ DRAM_CSR_WRITE(node, BDK_LMCX_RESET_CTL(i), reset_ctl.u);
+ }
+ }
+ }
+
+ if (CAVIUM_IS_MODEL(CAVIUM_CN8XXX)) {
+
+ bdk_lmcx_ddr_pll_ctl_t ddr_pll_ctl;
+ const dimm_config_t *dimm_config_table = ddr_configuration->dimm_config_table;
+
+ /* ddr_type only indicates DDR4 or DDR3 */
+ int ddr_type = get_ddr_type(node, &dimm_config_table[0]);
+
+ /*
+ * 6.9 LMC Initialization Sequence
+ *
+ * There are 14 parts to the LMC initialization procedure:
+ *
+ * 1. LMC interface enable initialization
+ *
+ * 2. DDR PLL initialization
+ *
+ * 3. LMC CK initialization
+ *
+ * 4. LMC DRESET initialization
+ *
+ * 5. LMC CK local initialization
+ *
+ * 6. LMC RESET initialization
+ *
+ * 7. Early LMC initialization
+ *
+ * 8. LMC offset training
+ *
+ * 9. LMC internal Vref training
+ *
+ * 10. LMC deskew training
+ *
+ * 11. LMC write leveling
+ *
+ * 12. LMC read leveling
+ *
+ * 13. DRAM Vref Training for DDR4
+ *
+ * 14. Final LMC initialization
+ *
+ * CN88XX supports two modes:
+ *
+ * ­ two-LMC mode: both LMCs 2/3 must not be enabled
+ * (LMC2/3_DLL_CTL2[DRESET] must be set to 1 and LMC2/3_DLL_CTL2[INTF_EN]
+ * must be set to 0) and both LMCs 0/1 must be enabled).
+ *
+ * ­ four-LMC mode: all four LMCs 0..3 must be enabled.
+ *
+ * Steps 4 and 6..14 should each be performed for each enabled LMC (either
+ * twice or four times). Steps 1..3 and 5 are more global in nature and
+ * each must be executed exactly once (not once per LMC) each time the
+ * DDR PLL changes or is first brought up. Steps 1..3 and 5 need not be
+ * performed if the DDR PLL is stable.
+ *
+ * Generally, the steps are performed in order. The exception is that the
+ * CK local initialization (step 5) must be performed after some DRESET
+ * initializations (step 4) and before other DRESET initializations when
+ * the DDR PLL is brought up or changed. (The CK local initialization
+ * uses information from some LMCs to bring up the other local CKs.) The
+ * following text describes these ordering requirements in more detail.
+ *
+ * Following any chip reset, the DDR PLL must be brought up, and all 14
+ * steps should be executed. Subsequently, it is possible to execute only
+ * steps 4 and 6..14, or to execute only steps 8..14.
+ *
+ * The remainder of this section covers these initialization steps in
+ * sequence.
+ */
+
+ if (ddr_interface_num == 0) { /* Do this once */
+ bdk_lmcx_dll_ctl2_t dll_ctl2;
+ int loop_interface_num;
+
+ /*
+ * 6.9.1 LMC Interface-Enable Initialization
+ *
+ * LMC interface-enable initialization (Step 1) must be performed only
+ * once, not once per LMC in four-LMC mode. This step is not required
+ * in two-LMC mode.
+ *
+ * Perform the following three substeps for the LMC interface-enable
+ * initialization:
+ *
+ * 1. Without changing any other LMC2_DLL_CTL2 fields (LMC(0..3)_DLL_CTL2
+ * should be at their reset values after Step 1), write
+ * LMC2_DLL_CTL2[INTF_EN] = 1 if four-LMC mode is desired.
+ *
+ * 2. Without changing any other LMC3_DLL_CTL2 fields, write
+ * LMC3_DLL_CTL2[INTF_EN] = 1 if four-LMC mode is desired.
+ *
+ * 3. Read LMC2_DLL_CTL2 and wait for the result.
+ *
+ * The LMC2_DLL_CTL2[INTF_EN] and LMC3_DLL_CTL2[INTF_EN] values should
+ * not be changed by software from this point.
+ *
+ */
+
+ /* Put all LMCs into DRESET here; these are the reset values... */
+ for (loop_interface_num = 0; loop_interface_num < 4; ++loop_interface_num) {
+ if ((ddr_interface_mask & (1 << loop_interface_num)) == 0)
+ continue;
+
+ dll_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL2(loop_interface_num));
+
+ dll_ctl2.s.byp_setting = 0;
+ dll_ctl2.s.byp_sel = 0;
+ dll_ctl2.s.quad_dll_ena = 0;
+ dll_ctl2.s.dreset = 1;
+ dll_ctl2.s.dll_bringup = 0;
+ dll_ctl2.s.intf_en = 0;
+
+ DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL2(loop_interface_num), dll_ctl2.u);
+ }
+
+ /* Now set INTF_EN for *ONLY* LMC2/3 if they are to be active on 88XX. */
+ /* Do *NOT* touch LMC0/1 INTF_EN=0 setting on 88XX. */
+ /* But we do have to set LMC1 INTF_EN=1 on 83XX if we want it active... */
+ /* Note that 81xx has only LMC0 so the mask should reflect that. */
+ for (loop_interface_num = (CAVIUM_IS_MODEL(CAVIUM_CN83XX)) ? 1 : 2;
+ loop_interface_num < 4; ++loop_interface_num) {
+ if ((ddr_interface_mask & (1 << loop_interface_num)) == 0)
+ continue;
+
+ DRAM_CSR_MODIFY(c, node, BDK_LMCX_DLL_CTL2(loop_interface_num),
+ c.s.intf_en = 1);
+ BDK_CSR_READ(node, BDK_LMCX_DLL_CTL2(loop_interface_num));
+ }
+
+ /*
+ * 6.9.2 DDR PLL Initialization
+ *
+ * DDR PLL initialization (Step 2) must be performed for each chip reset
+ * and whenever the DDR clock speed changes. This step needs to be
+ * performed only once, not once per LMC.
+ *
+ * Perform the following eight substeps to initialize the DDR PLL:
+ *
+ * 1. If not done already, write all fields in LMC(0..1)_DDR_PLL_CTL and
+ * LMC(0..1)_DLL_CTL2 to their reset values, including:
+ *
+ * .. LMC0_DDR_PLL_CTL[DDR_DIV_RESET] = 1
+ * .. LMC0_DLL_CTL2[DRESET] = 1
+ *
+ * This substep is not necessary after a chip reset.
+ *
+ */
+
+ ddr_pll_ctl.u = BDK_CSR_READ(node, BDK_LMCX_DDR_PLL_CTL(0));
+
+ ddr_pll_ctl.cn83xx.reset_n = 0;
+ ddr_pll_ctl.cn83xx.ddr_div_reset = 1;
+ ddr_pll_ctl.cn83xx.phy_dcok = 0;
+ ddr_pll_ctl.cn83xx.dclk_invert = 0;
+
+ // allow override of LMC0 desired setting for DCLK_INVERT
+ if ((s = lookup_env_parameter("ddr0_set_dclk_invert")) != NULL) {
+ ddr_pll_ctl.cn83xx.dclk_invert = !!strtoul(s, NULL, 0);
+ ddr_print("LMC0: override DDR_PLL_CTL[dclk_invert] to %d\n",
+ ddr_pll_ctl.cn83xx.dclk_invert);
+ }
+
+ // always write LMC0 CSR, it must be active
+ DRAM_CSR_WRITE(node, BDK_LMCX_DDR_PLL_CTL(0), ddr_pll_ctl.u);
+ ddr_print("%-45s : 0x%016lx\n", "LMC0: DDR_PLL_CTL", ddr_pll_ctl.u);
+
+ // only when LMC1 is active
+ // NOTE: 81xx has only 1 LMC, and 83xx can operate in 1-LMC mode
+ if (ddr_interface_mask & 0x2) {
+
+ ddr_pll_ctl.cn83xx.dclk_invert ^= 1; /* DEFAULT: Toggle dclk_invert from LMC0 */
+
+ // allow override of LMC1 desired setting for DCLK_INVERT
+ if ((s = lookup_env_parameter("ddr1_set_dclk_invert")) != NULL) {
+ ddr_pll_ctl.cn83xx.dclk_invert = !!strtoul(s, NULL, 0);
+ ddr_print("LMC1: override DDR_PLL_CTL[dclk_invert] to %d\n",
+ ddr_pll_ctl.cn83xx.dclk_invert);
+ }
+
+ // always write LMC1 CSR when it is active
+ DRAM_CSR_WRITE(node, BDK_LMCX_DDR_PLL_CTL(1), ddr_pll_ctl.u);
+ ddr_print("%-45s : 0x%016lx\n", "LMC1: DDR_PLL_CTL", ddr_pll_ctl.u);
+ }
+
+ /*
+ * 2. If the current DRAM contents are not preserved (see
+ * LMC(0..3)_RESET_ CTL[DDR3PSV]), this is also an appropriate time to
+ * assert the RESET# pin of the DDR3/DDR4 DRAM parts. If desired, write
+ * LMC0_RESET_ CTL[DDR3RST] = 0 without modifying any other
+ * LMC0_RESET_CTL fields to assert the DDR_RESET_L pin. No action is
+ * required here to assert DDR_RESET_L following a chip reset. Refer to
+ * Section 6.9.6. Do this for all enabled LMCs.
+ */
+
+ for (loop_interface_num = 0;
+ ( !ddr_memory_preserved(node)) && loop_interface_num < 4;
+ ++loop_interface_num)
+ {
+
+ if ((ddr_interface_mask & (1 << loop_interface_num)) == 0)
+ continue;
+
+ cn88xx_lmc_ddr3_reset(node, loop_interface_num, LMC_DDR3_RESET_ASSERT);
+ }
+
+ /*
+ * 3. Without changing any other LMC0_DDR_PLL_CTL values, write LMC0_DDR_
+ * PLL_CTL[CLKF] with a value that gives a desired DDR PLL speed. The
+ * LMC0_DDR_PLL_CTL[CLKF] value should be selected in conjunction with
+ * the post-scalar divider values for LMC (LMC0_DDR_PLL_CTL[DDR_PS_EN])
+ * so that the desired LMC CK speeds are is produced (all enabled LMCs
+ * must run the same speed). Section 5.14 describes
+ * LMC0_DDR_PLL_CTL[CLKF] and LMC0_DDR_PLL_CTL[DDR_PS_EN] programmings
+ * that produce the desired LMC CK speed. Section 6.9.3 describes LMC CK
+ * initialization, which can be done separately from the DDR PLL
+ * initialization described in this section.
+ *
+ * The LMC0_DDR_PLL_CTL[CLKF] value must not change after this point
+ * without restarting this SDRAM PLL initialization sequence.
+ */
+
+ {
+ /* CLKF = (DCLK * (CLKR+1) * EN(1, 2, 3, 4, 5, 6, 7, 8, 10, 12))/DREF - 1 */
+ int en_idx, save_en_idx, best_en_idx=0;
+ uint64_t clkf, clkr, max_clkf = 127;
+ uint64_t best_clkf=0, best_clkr=0;
+ uint64_t best_pll_MHz = 0;
+ uint64_t pll_MHz;
+ uint64_t min_pll_MHz = 800;
+ uint64_t max_pll_MHz = 5000;
+ uint64_t error;
+ uint64_t best_error;
+ uint64_t best_calculated_ddr_hertz = 0;
+ uint64_t calculated_ddr_hertz = 0;
+ uint64_t orig_ddr_hertz = ddr_hertz;
+ static const int _en[] = {1, 2, 3, 4, 5, 6, 7, 8, 10, 12};
+ int override_pll_settings;
+ int new_bwadj;
+
+ error = best_error = ddr_hertz; /* Init to max error */
+
+ ddr_print("DDR Reference Hertz = %d\n", ddr_ref_hertz);
+
+ while (best_error == ddr_hertz) {
+
+ for (clkr = 0; clkr < 4; ++clkr) {
+ for (en_idx=sizeof(_en)/sizeof(int)-1; en_idx>=0; --en_idx) {
+ save_en_idx = en_idx;
+ clkf = ((ddr_hertz) * (clkr+1) * (_en[save_en_idx]));
+ clkf = divide_nint(clkf, ddr_ref_hertz) - 1;
+ pll_MHz = ddr_ref_hertz * (clkf+1) / (clkr+1) / 1000000;
+ calculated_ddr_hertz = ddr_ref_hertz * (clkf + 1) / ((clkr + 1) * (_en[save_en_idx]));
+ error = ddr_hertz - calculated_ddr_hertz;
+
+ if ((pll_MHz < min_pll_MHz) || (pll_MHz > max_pll_MHz)) continue;
+ if (clkf > max_clkf) continue; /* PLL requires clkf to be limited */
+ if (_abs(error) > _abs(best_error)) continue;
+
+ VB_PRT(VBL_TME, "clkr: %2lu, en[%d]: %2d, clkf: %4lu, pll_MHz: %4lu, ddr_hertz: %8lu, error: %8ld\n",
+ clkr, save_en_idx, _en[save_en_idx], clkf, pll_MHz, calculated_ddr_hertz, error);
+
+ /* Favor the highest PLL frequency. */
+ if ((_abs(error) < _abs(best_error)) || (pll_MHz > best_pll_MHz)) {
+ best_pll_MHz = pll_MHz;
+ best_calculated_ddr_hertz = calculated_ddr_hertz;
+ best_error = error;
+ best_clkr = clkr;
+ best_clkf = clkf;
+ best_en_idx = save_en_idx;
+ }
+ }
+ }
+
+ override_pll_settings = 0;
+
+ if ((s = lookup_env_parameter("ddr_pll_clkr")) != NULL) {
+ best_clkr = strtoul(s, NULL, 0);
+ override_pll_settings = 1;
+ }
+ if ((s = lookup_env_parameter("ddr_pll_clkf")) != NULL) {
+ best_clkf = strtoul(s, NULL, 0);
+ override_pll_settings = 1;
+ }
+ if ((s = lookup_env_parameter("ddr_pll_en_idx")) != NULL) {
+ best_en_idx = strtoul(s, NULL, 0);
+ override_pll_settings = 1;
+ }
+
+ if (override_pll_settings) {
+ best_pll_MHz = ddr_ref_hertz * (best_clkf+1) / (best_clkr+1) / 1000000;
+ best_calculated_ddr_hertz = ddr_ref_hertz * (best_clkf + 1) / ((best_clkr + 1) * (_en[best_en_idx]));
+ best_error = ddr_hertz - best_calculated_ddr_hertz;
+ }
+
+ ddr_print("clkr: %2lu, en[%d]: %2d, clkf: %4lu, pll_MHz: %4lu, ddr_hertz: %8lu, error: %8ld <==\n",
+ best_clkr, best_en_idx, _en[best_en_idx], best_clkf, best_pll_MHz,
+ best_calculated_ddr_hertz, best_error);
+
+ /* Try lowering the frequency if we can't get a working configuration */
+ if (best_error == ddr_hertz) {
+ if (ddr_hertz < orig_ddr_hertz - 10000000)
+ break;
+ ddr_hertz -= 1000000;
+ best_error = ddr_hertz;
+ }
+
+ } /* while (best_error == ddr_hertz) */
+
+
+ if (best_error == ddr_hertz) {
+ error_print("ERROR: Can not compute a legal DDR clock speed configuration.\n");
+ return(-1);
+ }
+
+ new_bwadj = (best_clkf + 1) / 10;
+ VB_PRT(VBL_TME, "bwadj: %2d\n", new_bwadj);
+
+ if ((s = lookup_env_parameter("ddr_pll_bwadj")) != NULL) {
+ new_bwadj = strtoul(s, NULL, 0);
+ VB_PRT(VBL_TME, "bwadj: %2d\n", new_bwadj);
+ }
+
+ for (loop_interface_num = 0; loop_interface_num<2; ++loop_interface_num) {
+ if ((ddr_interface_mask & (1 << loop_interface_num)) == 0)
+ continue;
+
+ // make sure we preserve any settings already there
+ ddr_pll_ctl.u = BDK_CSR_READ(node, BDK_LMCX_DDR_PLL_CTL(loop_interface_num));
+ ddr_print("LMC%d: DDR_PLL_CTL : 0x%016lx\n",
+ loop_interface_num, ddr_pll_ctl.u);
+
+ ddr_pll_ctl.cn83xx.ddr_ps_en = best_en_idx;
+ ddr_pll_ctl.cn83xx.clkf = best_clkf;
+ ddr_pll_ctl.cn83xx.clkr = best_clkr;
+ ddr_pll_ctl.cn83xx.reset_n = 0;
+ ddr_pll_ctl.cn83xx.bwadj = new_bwadj;
+
+ DRAM_CSR_WRITE(node, BDK_LMCX_DDR_PLL_CTL(loop_interface_num), ddr_pll_ctl.u);
+ ddr_print("LMC%d: DDR_PLL_CTL : 0x%016lx\n",
+ loop_interface_num, ddr_pll_ctl.u);
+ }
+ }
+
+
+ for (loop_interface_num = 0; loop_interface_num<4; ++loop_interface_num) {
+ if ((ddr_interface_mask & (1 << loop_interface_num)) == 0)
+ continue;
+
+ /*
+ * 4. Read LMC0_DDR_PLL_CTL and wait for the result.
+ */
+
+ BDK_CSR_READ(node, BDK_LMCX_DDR_PLL_CTL(loop_interface_num));
+
+ /*
+ * 5. Wait a minimum of 3 us.
+ */
+
+ bdk_wait_usec(3); /* Wait 3 us */
+
+ /*
+ * 6. Write LMC0_DDR_PLL_CTL[RESET_N] = 1 without changing any other
+ * LMC0_DDR_PLL_CTL values.
+ */
+
+ DRAM_CSR_MODIFY(c, node, BDK_LMCX_DDR_PLL_CTL(loop_interface_num),
+ c.cn83xx.reset_n = 1);
+
+ /*
+ * 7. Read LMC0_DDR_PLL_CTL and wait for the result.
+ */
+
+ BDK_CSR_READ(node, BDK_LMCX_DDR_PLL_CTL(loop_interface_num));
+
+ /*
+ * 8. Wait a minimum of 25 us.
+ */
+
+ bdk_wait_usec(25); /* Wait 25 us */
+
+ } /* for (loop_interface_num = 0; loop_interface_num<4; ++loop_interface_num) */
+
+ for (loop_interface_num = 0; loop_interface_num<4; ++loop_interface_num) {
+ if ((ddr_interface_mask & (1 << loop_interface_num)) == 0)
+ continue;
+ /*
+ * 6.9.3 LMC CK Initialization
+ *
+ * DDR PLL initialization must be completed prior to starting LMC CK
+ * initialization.
+ *
+ * Perform the following substeps to initialize the LMC CK. Perform
+ * substeps 1..3 for both LMC0 and LMC1.
+ *
+ * 1. Without changing any other LMC(0..3)_DDR_PLL_CTL values, write
+ * LMC(0..3)_DDR_PLL_CTL[DDR_DIV_RESET] = 1 and
+ * LMC(0..3)_DDR_PLL_CTL[DDR_PS_EN] with the appropriate value to get the
+ * desired LMC CK speed. Section 5.14 discusses CLKF and DDR_PS_EN
+ * programmings. The LMC(0..3)_DDR_PLL_CTL[DDR_PS_EN] must not change
+ * after this point without restarting this LMC CK initialization
+ * sequence.
+ */
+
+ DRAM_CSR_MODIFY(c, node, BDK_LMCX_DDR_PLL_CTL(loop_interface_num),
+ c.cn83xx.ddr_div_reset = 1);
+
+ /*
+ * 2. Without changing any other fields in LMC(0..3)_DDR_PLL_CTL, write
+ * LMC(0..3)_DDR_PLL_CTL[DDR4_MODE] = 0.
+ */
+
+ DRAM_CSR_MODIFY(c, node, BDK_LMCX_DDR_PLL_CTL(loop_interface_num),
+ c.cn83xx.ddr4_mode = (ddr_type == DDR4_DRAM) ? 1 : 0);
+
+ /*
+ * 3. Read LMC(0..3)_DDR_PLL_CTL and wait for the result.
+ */
+
+ BDK_CSR_READ(node, BDK_LMCX_DDR_PLL_CTL(loop_interface_num));
+
+ /*
+ * 4. Wait a minimum of 1 us.
+ */
+
+ bdk_wait_usec(1); /* Wait 1 us */
+
+ /*
+ * 5. Without changing any other fields in LMC(0..3)_DDR_PLL_CTL, write
+ * LMC(0..3)_DDR_PLL_CTL[PHY_DCOK] = 1.
+ */
+
+ DRAM_CSR_MODIFY(c, node, BDK_LMCX_DDR_PLL_CTL(loop_interface_num),
+ c.cn83xx.phy_dcok = 1);
+
+ /*
+ * 6. Read LMC(0..3)_DDR_PLL_CTL and wait for the result.
+ */
+
+ BDK_CSR_READ(node, BDK_LMCX_DDR_PLL_CTL(loop_interface_num));
+
+ /*
+ * 7. Wait a minimum of 20 us.
+ */
+
+ bdk_wait_usec(20); /* Wait 20 us */
+
+ /*
+ * 8. Without changing any other LMC(0..3)_COMP_CTL2 values, write
+ * LMC(0..3)_COMP_CTL2[CK_CTL,CONTROL_CTL,CMD_CTL] to the desired
+ * DDR*_CK_*_P control and command signals drive strength.
+ */
+
+ {
+ bdk_lmcx_comp_ctl2_t comp_ctl2;
+ const ddr3_custom_config_t *custom_lmc_config = &ddr_configuration->custom_lmc_config;
+
+ comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(loop_interface_num));
+
+ comp_ctl2.s.dqx_ctl = 4; /* Default 4=34.3 ohm */
+ comp_ctl2.s.ck_ctl =
+ (custom_lmc_config->ck_ctl == 0) ? 4 : custom_lmc_config->ck_ctl; /* Default 4=34.3 ohm */
+ comp_ctl2.s.cmd_ctl =
+ (custom_lmc_config->cmd_ctl == 0) ? 4 : custom_lmc_config->cmd_ctl; /* Default 4=34.3 ohm */
+
+ comp_ctl2.s.rodt_ctl = 0x4; /* 60 ohm */
+
+ // These need to be done here, not later in Step 6.9.7.
+ // NOTE: these are/will be specific to a chip; for now, set to 0
+ // should we provide overrides for these?
+ comp_ctl2.s.ntune_offset = 0;
+ comp_ctl2.s.ptune_offset = 0;
+
+ // now do any overrides...
+ if ((s = lookup_env_parameter("ddr_ck_ctl")) != NULL) {
+ comp_ctl2.s.ck_ctl = strtoul(s, NULL, 0);
+ }
+
+ if ((s = lookup_env_parameter("ddr_cmd_ctl")) != NULL) {
+ comp_ctl2.s.cmd_ctl = strtoul(s, NULL, 0);
+ }
+
+ if ((s = lookup_env_parameter("ddr_dqx_ctl")) != NULL) {
+ comp_ctl2.s.dqx_ctl = strtoul(s, NULL, 0);
+ }
+
+ DRAM_CSR_WRITE(node, BDK_LMCX_COMP_CTL2(loop_interface_num), comp_ctl2.u);
+ }
+
+ /*
+ * 9. Read LMC(0..3)_DDR_PLL_CTL and wait for the result.
+ */
+
+ BDK_CSR_READ(node, BDK_LMCX_DDR_PLL_CTL(loop_interface_num));
+
+ /*
+ * 10. Wait a minimum of 200 ns.
+ */
+
+ bdk_wait_usec(1); /* Wait 1 us */
+
+ /*
+ * 11. Without changing any other LMC(0..3)_DDR_PLL_CTL values, write
+ * LMC(0..3)_DDR_PLL_CTL[DDR_DIV_RESET] = 0.
+ */
+
+ DRAM_CSR_MODIFY(c, node, BDK_LMCX_DDR_PLL_CTL(loop_interface_num),
+ c.cn83xx.ddr_div_reset = 0);
+
+ /*
+ * 12. Read LMC(0..3)_DDR_PLL_CTL and wait for the result.
+ */
+
+ BDK_CSR_READ(node, BDK_LMCX_DDR_PLL_CTL(loop_interface_num));
+
+ /*
+ * 13. Wait a minimum of 200 ns.
+ */
+ bdk_wait_usec(1); /* Wait 1 us */
+
+ } /* for (loop_interface_num = 0; loop_interface_num<4; ++loop_interface_num) */
+
+ } /* if (ddr_interface_num == 0) */ /* Do this once */
+
+ if (ddr_interface_num == 0) { /* Do this once */
+ bdk_lmcx_dll_ctl3_t ddr_dll_ctl3;
+
+ /*
+ * 6.9.4 LMC DRESET Initialization
+ *
+ * All of the DDR PLL, LMC global CK, and LMC interface enable
+ * initializations must be completed prior to starting this LMC DRESET
+ * initialization (Step 4).
+ *
+ * This LMC DRESET step is done for all enabled LMCs.
+ *
+ * There are special constraints on the ordering of DRESET initialization
+ * (Steps 4) and CK local initialization (Step 5) whenever CK local
+ * initialization must be executed. CK local initialization must be
+ * executed whenever the DDR PLL is being brought up (for each chip reset
+ * and whenever the DDR clock speed changes).
+ *
+ * When Step 5 must be executed in the two-LMC mode case:
+ * ­ LMC0 DRESET initialization must occur before Step 5.
+ * ­ LMC1 DRESET initialization must occur after Step 5.
+ *
+ * When Step 5 must be executed in the four-LMC mode case:
+ * ­ LMC2 and LMC3 DRESET initialization must occur before Step 5.
+ * ­ LMC0 and LMC1 DRESET initialization must occur after Step 5.
+ */
+
+ if ((ddr_interface_mask == 0x1) || (ddr_interface_mask == 0x3)) {
+ /* ONE-LMC MODE FOR 81XX AND 83XX BEFORE STEP 5 */
+ /* TWO-LMC MODE BEFORE STEP 5 */
+ cn78xx_lmc_dreset_init(node, 0);
+
+ } else if (ddr_interface_mask == 0xf) {
+ /* FOUR-LMC MODE BEFORE STEP 5 */
+ cn78xx_lmc_dreset_init(node, 2);
+ cn78xx_lmc_dreset_init(node, 3);
+ }
+
+ /*
+ * 6.9.5 LMC CK Local Initialization
+ *
+ * All of DDR PLL, LMC global CK, and LMC interface-enable
+ * initializations must be completed prior to starting this LMC CK local
+ * initialization (Step 5).
+ *
+ * LMC CK Local initialization must be performed for each chip reset and
+ * whenever the DDR clock speed changes. This step needs to be performed
+ * only once, not once per LMC.
+ *
+ * There are special constraints on the ordering of DRESET initialization
+ * (Steps 4) and CK local initialization (Step 5) whenever CK local
+ * initialization must be executed. CK local initialization must be
+ * executed whenever the DDR PLL is being brought up (for each chip reset
+ * and whenever the DDR clock speed changes).
+ *
+ * When Step 5 must be executed in the two-LMC mode case:
+ * ­ LMC0 DRESET initialization must occur before Step 5.
+ * ­ LMC1 DRESET initialization must occur after Step 5.
+ *
+ * When Step 5 must be executed in the four-LMC mode case:
+ * ­ LMC2 and LMC3 DRESET initialization must occur before Step 5.
+ * ­ LMC0 and LMC1 DRESET initialization must occur after Step 5.
+ *
+ * LMC CK local initialization is different depending on whether two-LMC
+ * or four-LMC modes are desired.
+ */
+
+ if (ddr_interface_mask == 0x3) {
+ /*
+ * 6.9.5.1 LMC CK Local Initialization for Two-LMC Mode
+ *
+ * 1. Write LMC0_DLL_CTL3 to its reset value. (Note that
+ * LMC0_DLL_CTL3[DLL_90_BYTE_SEL] = 0x2 .. 0x8 should also work.)
+ */
+
+ ddr_dll_ctl3.u = 0;
+ ddr_dll_ctl3.s.dclk90_recal_dis = 1;
+ ddr_dll_ctl3.s.dll90_byte_sel = 1;
+ DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(0), ddr_dll_ctl3.u);
+
+ /*
+ * 2. Read LMC0_DLL_CTL3 and wait for the result.
+ */
+
+ BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(0));
+
+ /*
+ * 3. Without changing any other fields in LMC0_DLL_CTL3, write
+ * LMC0_DLL_CTL3[DCLK90_FWD] = 1. Writing LMC0_DLL_CTL3[DCLK90_FWD] = 1
+ * causes clock-delay information to be forwarded from LMC0 to LMC1.
+ */
+
+ ddr_dll_ctl3.s.dclk90_fwd = 1;
+ DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(0), ddr_dll_ctl3.u);
+
+ /*
+ * 4. Read LMC0_DLL_CTL3 and wait for the result.
+ */
+
+ BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(0));
+ } /* if (ddr_interface_mask == 0x3) */
+
+ if (ddr_interface_mask == 0xf) {
+ /*
+ * 6.9.5.2 LMC CK Local Initialization for Four-LMC Mode
+ *
+ * 1. Write LMC2_DLL_CTL3 to its reset value except
+ * LMC2_DLL_CTL3[DLL90_BYTE_SEL] = 0x7.
+ */
+
+ ddr_dll_ctl3.u = 0;
+ ddr_dll_ctl3.s.dclk90_recal_dis = 1;
+ ddr_dll_ctl3.s.dll90_byte_sel = 7;
+ DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(2), ddr_dll_ctl3.u);
+
+ /*
+ * 2. Write LMC3_DLL_CTL3 to its reset value except
+ * LMC3_DLL_CTL3[DLL90_BYTE_SEL] = 0x0.
+ */
+
+ ddr_dll_ctl3.u = 0;
+ ddr_dll_ctl3.s.dclk90_recal_dis = 1;
+ ddr_dll_ctl3.s.dll90_byte_sel = 0; /* HRM wants 0, not 2 */
+ DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(3), ddr_dll_ctl3.u); /* HRM wants LMC3 */
+
+ /*
+ * 3. Read LMC3_DLL_CTL3 and wait for the result.
+ */
+
+ BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(3));
+
+ /*
+ * 4. Without changing any other fields in LMC2_DLL_CTL3, write
+ * LMC2_DLL_CTL3[DCLK90_FWD] = 1 and LMC2_DLL_CTL3[DCLK90_RECAL_DIS] = 1.
+ * Writing LMC2_DLL_CTL3[DCLK90_FWD] = 1 causes LMC 2 to forward
+ * clock-delay information to LMC0. Setting
+ * LMC2_DLL_CTL3[DCLK90_RECAL_DIS] to 1 prevents LMC2 from periodically
+ * recalibrating this delay information.
+ */
+
+ DRAM_CSR_MODIFY(c, node, BDK_LMCX_DLL_CTL3(2),
+ c.s.dclk90_fwd = 1;
+ c.s.dclk90_recal_dis = 1);
+
+ /*
+ * 5. Without changing any other fields in LMC3_DLL_CTL3, write
+ * LMC3_DLL_CTL3[DCLK90_FWD] = 1 and LMC3_DLL_CTL3[DCLK90_RECAL_DIS] = 1.
+ * Writing LMC3_DLL_CTL3[DCLK90_FWD] = 1 causes LMC3 to forward
+ * clock-delay information to LMC1. Setting
+ * LMC3_DLL_CTL3[DCLK90_RECAL_DIS] to 1 prevents LMC3 from periodically
+ * recalibrating this delay information.
+ */
+
+ DRAM_CSR_MODIFY(c, node, BDK_LMCX_DLL_CTL3(3),
+ c.s.dclk90_fwd = 1;
+ c.s.dclk90_recal_dis = 1);
+
+ /*
+ * 6. Read LMC3_DLL_CTL3 and wait for the result.
+ */
+
+ BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(3));
+ } /* if (ddr_interface_mask == 0xf) */
+
+
+ /* ONE-LMC MODE AFTER STEP 5 - NOTHING */
+
+ /* TWO-LMC MODE AFTER STEP 5 */
+ if (ddr_interface_mask == 0x3) {
+ cn78xx_lmc_dreset_init(node, 1);
+ }
+
+ /* FOUR-LMC MODE AFTER STEP 5 */
+ if (ddr_interface_mask == 0xf) {
+ cn78xx_lmc_dreset_init(node, 0);
+ cn78xx_lmc_dreset_init(node, 1);
+
+ /* Enable periodic recalibration of DDR90 delay line in. */
+ DRAM_CSR_MODIFY(c, node, BDK_LMCX_DLL_CTL3(0),
+ c.s.dclk90_recal_dis = 0);
+ DRAM_CSR_MODIFY(c, node, BDK_LMCX_DLL_CTL3(1),
+ c.s.dclk90_recal_dis = 0);
+ }
+
+
+ /* Enable fine tune mode for all LMCs */
+ for (int lmc = 0; lmc<4; ++lmc) {
+ if ((ddr_interface_mask & (1 << lmc)) == 0)
+ continue;
+ DRAM_CSR_MODIFY(c, node, BDK_LMCX_DLL_CTL3(lmc),
+ c.s.fine_tune_mode = 1);
+ }
+
+ /* Enable the trim circuit on the appropriate channels to
+ adjust the DDR clock duty cycle for chips that support
+ it. */
+ if (! CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X)) { // added 81xx and 83xx
+ bdk_lmcx_phy_ctl_t lmc_phy_ctl;
+ int loop_interface_num;
+
+ for (loop_interface_num = 0; loop_interface_num<4; ++loop_interface_num) {
+ if ((ddr_interface_mask & (1 << loop_interface_num)) == 0)
+ continue;
+
+ lmc_phy_ctl.u = BDK_CSR_READ(node, BDK_LMCX_PHY_CTL(loop_interface_num));
+ lmc_phy_ctl.cn83xx.lv_mode = (~loop_interface_num) & 1; /* Odd LMCs = 0, Even LMCs = 1 */
+
+ ddr_print("LMC%d: PHY_CTL : 0x%016lx\n",
+ loop_interface_num, lmc_phy_ctl.u);
+ DRAM_CSR_WRITE(node, BDK_LMCX_PHY_CTL(loop_interface_num), lmc_phy_ctl.u);
+ }
+ }
+
+ } /* Do this once */
+
+ } /* if (CAVIUM_IS_MODEL(CAVIUM_CN8XXX)) */
+
+ set_ddr_clock_initialized(node, ddr_interface_num, 1);
+ return(0);
+}
+void
+perform_lmc_reset(bdk_node_t node, int ddr_interface_num)
+{
+ /*
+ * 6.9.6 LMC RESET Initialization
+ *
+ * The purpose of this step is to assert/deassert the RESET# pin at the
+ * DDR3/DDR4 parts.
+ *
+ * This LMC RESET step is done for all enabled LMCs.
+ *
+ * It may be appropriate to skip this step if the DDR3/DDR4 DRAM parts
+ * are in self refresh and are currently preserving their
+ * contents. (Software can determine this via
+ * LMC(0..3)_RESET_CTL[DDR3PSV] in some circumstances.) The remainder of
+ * this section assumes that the DRAM contents need not be preserved.
+ *
+ * The remainder of this section assumes that the CN78XX DDRn_RESET_L pin
+ * is attached to the RESET# pin of the attached DDR3/DDR4 parts, as will
+ * be appropriate in many systems.
+ *
+ * (In other systems, such as ones that can preserve DDR3/DDR4 part
+ * contents while CN78XX is powered down, it will not be appropriate to
+ * directly attach the CN78XX DDRn_RESET_L pin to DRESET# of the
+ * DDR3/DDR4 parts, and this section may not apply.)
+ *
+ * The remainder of this section describes the sequence for LMCn.
+ *
+ * Perform the following six substeps for LMC reset initialization:
+ *
+ * 1. If not done already, assert DDRn_RESET_L pin by writing
+ * LMC(0..3)_RESET_ CTL[DDR3RST] = 0 without modifying any other
+ * LMC(0..3)_RESET_CTL fields.
+ */
+
+ if ( !ddr_memory_preserved(node)) {
+ /*
+ * 2. Read LMC(0..3)_RESET_CTL and wait for the result.
+ */
+
+ BDK_CSR_READ(node, BDK_LMCX_RESET_CTL(ddr_interface_num));
+
+ /*
+ * 3. Wait until RESET# assertion-time requirement from JEDEC DDR3/DDR4
+ * specification is satisfied (200 us during a power-on ramp, 100ns when
+ * power is already stable).
+ */
+
+ bdk_wait_usec(200);
+
+ /*
+ * 4. Deassert DDRn_RESET_L pin by writing LMC(0..3)_RESET_CTL[DDR3RST] = 1
+ * without modifying any other LMC(0..3)_RESET_CTL fields.
+ * 5. Read LMC(0..3)_RESET_CTL and wait for the result.
+ * 6. Wait a minimum of 500us. This guarantees the necessary T = 500us
+ * delay between DDRn_RESET_L deassertion and DDRn_DIMM*_CKE* assertion.
+ */
+ cn88xx_lmc_ddr3_reset(node, ddr_interface_num, LMC_DDR3_RESET_DEASSERT);
+
+ /* Toggle Reset Again */
+ /* That is, assert, then de-assert, one more time */
+ cn88xx_lmc_ddr3_reset(node, ddr_interface_num, LMC_DDR3_RESET_ASSERT);
+ cn88xx_lmc_ddr3_reset(node, ddr_interface_num, LMC_DDR3_RESET_DEASSERT);
+
+ } /* if ( !ddr_memory_preserved(node)) */
+}
+
+///////////////////////////////////////////////////////////
+// start of DBI switchover
+
+/* first pattern example:
+ GENERAL_PURPOSE0.DATA == 64'h00ff00ff00ff00ff;
+ GENERAL_PURPOSE1.DATA == 64'h00ff00ff00ff00ff;
+ GENERAL_PURPOSE0.DATA == 16'h0000;
+*/
+const uint64_t dbi_pattern[3] = { 0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL, 0x0000ULL };
+
+// Perform switchover to DBI
+static void dbi_switchover_interface(int node, int lmc)
+{
+ bdk_lmcx_modereg_params0_t modereg_params0;
+ bdk_lmcx_modereg_params3_t modereg_params3;
+ bdk_lmcx_phy_ctl_t phy_ctl;
+ bdk_lmcx_config_t lmcx_config;
+ bdk_lmcx_ddr_pll_ctl_t ddr_pll_ctl;
+ int rank_mask, rankx, active_ranks;
+ uint64_t phys_addr, rank_offset;
+ int num_lmcs, errors;
+ int dbi_settings[9], byte, unlocked, retries;
+ int ecc_ena;
+ int rank_max = 1; // FIXME: make this 4 to try all the ranks
+
+ ddr_pll_ctl.u = BDK_CSR_READ(node, BDK_LMCX_DDR_PLL_CTL(0));
+
+ lmcx_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(lmc));
+ rank_mask = lmcx_config.s.init_status;
+ ecc_ena = lmcx_config.s.ecc_ena;
+
+ // FIXME: must filter out any non-supported configs
+ // ie, no DDR3, no x4 devices, no 81XX
+ if ((ddr_pll_ctl.cn83xx.ddr4_mode == 0) ||
+ (lmcx_config.s.mode_x4dev == 1) ||
+ CAVIUM_IS_MODEL(CAVIUM_CN81XX) )
+ {
+ ddr_print("N%d.LMC%d: DBI switchover: inappropriate device; EXITING...\n",
+ node, lmc);
+ return;
+ }
+
+ // this should be correct for 1 or 2 ranks, 1 or 2 DIMMs
+ num_lmcs = __bdk_dram_get_num_lmc(node);
+ rank_offset = 1ull << (28 + lmcx_config.s.pbank_lsb - lmcx_config.s.rank_ena + (num_lmcs/2));
+
+ ddr_print("N%d.LMC%d: DBI switchover: rank mask 0x%x, rank size 0x%016llx.\n",
+ node, lmc, rank_mask, (unsigned long long)rank_offset);
+
+ /* 1. conduct the current init sequence as usual all the way
+ after software write leveling.
+ */
+
+ read_DAC_DBI_settings(node, lmc, /*DBI*/0, dbi_settings);
+
+ display_DAC_DBI_settings(node, lmc, /* DBI */0, ecc_ena, dbi_settings, " INIT");
+
+ /* 2. set DBI related CSRs as below and issue MR write.
+ MODEREG_PARAMS3.WR_DBI=1
+ MODEREG_PARAMS3.RD_DBI=1
+ PHY_CTL.DBI_MODE_ENA=1
+ */
+ modereg_params0.u = BDK_CSR_READ(node, BDK_LMCX_MODEREG_PARAMS0(lmc));
+
+ modereg_params3.u = BDK_CSR_READ(node, BDK_LMCX_MODEREG_PARAMS3(lmc));
+ modereg_params3.s.wr_dbi = 1;
+ modereg_params3.s.rd_dbi = 1;
+ DRAM_CSR_WRITE(node, BDK_LMCX_MODEREG_PARAMS3(lmc), modereg_params3.u);
+
+ phy_ctl.u = BDK_CSR_READ(node, BDK_LMCX_PHY_CTL(lmc));
+ phy_ctl.s.dbi_mode_ena = 1;
+ DRAM_CSR_WRITE(node, BDK_LMCX_PHY_CTL(lmc), phy_ctl.u);
+
+ /*
+ there are two options for data to send. Lets start with (1) and could move to (2) in the future:
+
+ 1) DBTRAIN_CTL[LFSR_PATTERN_SEL] = 0 (or for older chips where this does not exist)
+ set data directly in these reigsters. this will yield a clk/2 pattern:
+ GENERAL_PURPOSE0.DATA == 64'h00ff00ff00ff00ff;
+ GENERAL_PURPOSE1.DATA == 64'h00ff00ff00ff00ff;
+ GENERAL_PURPOSE0.DATA == 16'h0000;
+ 2) DBTRAIN_CTL[LFSR_PATTERN_SEL] = 1
+ here data comes from the LFSR generating a PRBS pattern
+ CHAR_CTL.EN = 0
+ CHAR_CTL.SEL = 0; // for PRBS
+ CHAR_CTL.DR = 1;
+ CHAR_CTL.PRBS = setup for whatever type of PRBS to send
+ CHAR_CTL.SKEW_ON = 1;
+ */
+ DRAM_CSR_WRITE(node, BDK_LMCX_GENERAL_PURPOSE0(lmc), dbi_pattern[0]);
+ DRAM_CSR_WRITE(node, BDK_LMCX_GENERAL_PURPOSE1(lmc), dbi_pattern[1]);
+ DRAM_CSR_WRITE(node, BDK_LMCX_GENERAL_PURPOSE2(lmc), dbi_pattern[2]);
+
+ /*
+ 3. adjust cas_latency (only necessary if RD_DBI is set).
+ here is my code for doing this:
+
+ if (csr_model.MODEREG_PARAMS3.RD_DBI.value == 1) begin
+ case (csr_model.MODEREG_PARAMS0.CL.value)
+ 0,1,2,3,4: csr_model.MODEREG_PARAMS0.CL.value += 2; // CL 9-13 -> 11-15
+ 5: begin
+ // CL=14, CWL=10,12 gets +2, CLW=11,14 gets +3
+ if((csr_model.MODEREG_PARAMS0.CWL.value==1 || csr_model.MODEREG_PARAMS0.CWL.value==3))
+ csr_model.MODEREG_PARAMS0.CL.value = 7; // 14->16
+ else
+ csr_model.MODEREG_PARAMS0.CL.value = 13; // 14->17
+ end
+ 6: csr_model.MODEREG_PARAMS0.CL.value = 8; // 15->18
+ 7: csr_model.MODEREG_PARAMS0.CL.value = 14; // 16->19
+ 8: csr_model.MODEREG_PARAMS0.CL.value = 15; // 18->21
+ default:
+ `cn_fatal(("Error mem_cfg (%s) CL (%d) with RD_DBI=1, I am not sure what to do.",
+ mem_cfg, csr_model.MODEREG_PARAMS3.RD_DBI.value))
+ endcase
+ end
+ */
+ if (modereg_params3.s.rd_dbi == 1) {
+ int old_cl, new_cl, old_cwl;
+
+ old_cl = modereg_params0.s.cl;
+ old_cwl = modereg_params0.s.cwl;
+
+ switch (old_cl) {
+ case 0: case 1: case 2: case 3: case 4: new_cl = old_cl + 2; break; // 9-13->11-15
+ // CL=14, CWL=10,12 gets +2, CLW=11,14 gets +3
+ case 5: new_cl = ((old_cwl == 1) || (old_cwl == 3)) ? 7 : 13; break;
+ case 6: new_cl = 8; break; // 15->18
+ case 7: new_cl = 14; break; // 16->19
+ case 8: new_cl = 15; break; // 18->21
+ default:
+ error_print("ERROR: Bad CL value (%d) for DBI switchover.\n", old_cl);
+ // FIXME: need to error exit here...
+ old_cl = -1;
+ new_cl = -1;
+ break;
+ }
+ ddr_print("N%d.LMC%d: DBI switchover: CL ADJ: old_cl 0x%x, old_cwl 0x%x, new_cl 0x%x.\n",
+ node, lmc, old_cl, old_cwl, new_cl);
+ modereg_params0.s.cl = new_cl;
+ DRAM_CSR_WRITE(node, BDK_LMCX_MODEREG_PARAMS0(lmc), modereg_params0.u);
+ }
+
+ /*
+ 4. issue MRW to MR0 (CL) and MR5 (DBI), using LMC sequence SEQ_CTL[SEQ_SEL] = MRW.
+ */
+ // Use the default values, from the CSRs fields
+ // also, do B-sides for RDIMMs...
+
+ for (rankx = 0; rankx < 4; rankx++) {
+ if (!(rank_mask & (1 << rankx)))
+ continue;
+
+ // for RDIMMs, B-side writes should get done automatically when the A-side is written
+ ddr4_mrw(node, lmc, rankx, -1/* use_default*/, 0/*MRreg*/, 0 /*A-side*/); /* MR0 */
+ ddr4_mrw(node, lmc, rankx, -1/* use_default*/, 5/*MRreg*/, 0 /*A-side*/); /* MR5 */
+
+ } /* for (rankx = 0; rankx < 4; rankx++) */
+
+ /*
+ 5. conduct DBI bit deskew training via the General Purpose R/W sequence (dbtrain).
+ may need to run this over and over to get a lock (I need up to 5 in simulation):
+ SEQ_CTL[SEQ_SEL] = RW_TRAINING (15)
+ DBTRAIN_CTL.CMD_COUNT_EXT = all 1's
+ DBTRAIN_CTL.READ_CMD_COUNT = all 1's
+ DBTRAIN_CTL.TCCD_SEL = set according to MODEREG_PARAMS3[TCCD_L]
+ DBTRAIN_CTL.RW_TRAIN = 1
+ DBTRAIN_CTL.READ_DQ_COUNT = dont care
+ DBTRAIN_CTL.WRITE_ENA = 1;
+ DBTRAIN_CTL.ACTIVATE = 1;
+ DBTRAIN_CTL LRANK, PRANK, ROW_A, BG, BA, COLUMN_A = set to a valid address
+ */
+
+ // NOW - do the training
+ ddr_print("N%d.LMC%d: DBI switchover: TRAINING begins...\n",
+ node, lmc);
+
+ active_ranks = 0;
+ for (rankx = 0; rankx < rank_max; rankx++) {
+ if (!(rank_mask & (1 << rankx)))
+ continue;
+
+ phys_addr = rank_offset * active_ranks;
+ // FIXME: now done by test_dram_byte_hw()
+ //phys_addr |= (lmc << 7);
+ //phys_addr = bdk_numa_get_address(node, phys_addr); // map to node
+
+ active_ranks++;
+
+ retries = 0;
+
+#if 0
+ phy_ctl.u = BDK_CSR_READ(node, BDK_LMCX_PHY_CTL(lmc));
+ phy_ctl.s.phy_reset = 1; // FIXME: this may reset too much?
+ DRAM_CSR_WRITE(node, BDK_LMCX_PHY_CTL(lmc), phy_ctl.u);
+#endif
+
+restart_training:
+
+ // NOTE: return is a bitmask of the erroring bytelanes - we only print it
+ errors = test_dram_byte_hw(node, lmc, phys_addr, DBTRAIN_DBI, NULL);
+
+ ddr_print("N%d.LMC%d: DBI switchover: TEST: rank %d, phys_addr 0x%lx, errors 0x%x.\n",
+ node, lmc, rankx, phys_addr, errors);
+
+ // NEXT - check for locking
+ unlocked = 0;
+ read_DAC_DBI_settings(node, lmc, /*DBI*/0, dbi_settings);
+
+ for (byte = 0; byte < (8+ecc_ena); byte++) {
+ unlocked += (dbi_settings[byte] & 1) ^ 1;
+ }
+
+ // FIXME: print out the DBI settings array after each rank?
+ if (rank_max > 1) // only when doing more than 1 rank
+ display_DAC_DBI_settings(node, lmc, /* DBI */0, ecc_ena, dbi_settings, " RANK");
+
+ if (unlocked > 0) {
+ ddr_print("N%d.LMC%d: DBI switchover: LOCK: %d still unlocked.\n",
+ node, lmc, unlocked);
+
+ retries++;
+ if (retries < 10) {
+ goto restart_training;
+ } else {
+ ddr_print("N%d.LMC%d: DBI switchover: LOCK: %d retries exhausted.\n",
+ node, lmc, retries);
+ }
+ }
+ } /* for (rankx = 0; rankx < rank_max; rankx++) */
+
+ // print out the final DBI settings array
+ display_DAC_DBI_settings(node, lmc, /* DBI */0, ecc_ena, dbi_settings, "FINAL");
+}
+// end of DBI switchover
+///////////////////////////////////////////////////////////
+
+uint32_t measure_octeon_ddr_clock(bdk_node_t node,
+ const ddr_configuration_t *ddr_configuration,
+ uint32_t cpu_hertz,
+ uint32_t ddr_hertz,
+ uint32_t ddr_ref_hertz,
+ int ddr_interface_num,
+ uint32_t ddr_interface_mask)
+{
+ uint64_t core_clocks;
+ uint64_t ddr_clocks;
+ uint64_t calc_ddr_hertz;
+
+ if (ddr_configuration) {
+ if (initialize_ddr_clock(node,
+ ddr_configuration,
+ cpu_hertz,
+ ddr_hertz,
+ ddr_ref_hertz,
+ ddr_interface_num,
+ ddr_interface_mask) != 0)
+ return 0;
+ }
+
+ /* Dynamically determine the DDR clock speed */
+ core_clocks = bdk_clock_get_count(BDK_CLOCK_TIME);
+ ddr_clocks = BDK_CSR_READ(node, BDK_LMCX_DCLK_CNT(ddr_interface_num));
+ bdk_wait_usec(100000); /* 100ms */
+ ddr_clocks = BDK_CSR_READ(node, BDK_LMCX_DCLK_CNT(ddr_interface_num)) - ddr_clocks;
+ core_clocks = bdk_clock_get_count(BDK_CLOCK_TIME) - core_clocks;
+ calc_ddr_hertz = ddr_clocks * bdk_clock_get_rate(bdk_numa_local(), BDK_CLOCK_TIME) / core_clocks;
+
+ /* Asim doesn't have a DDR clock, force the measurement to be correct */
+ if (bdk_is_platform(BDK_PLATFORM_ASIM))
+ calc_ddr_hertz = ddr_hertz;
+
+ ddr_print("LMC%d: Measured DDR clock: %lu, cpu clock: %u, ddr clocks: %lu\n",
+ ddr_interface_num, calc_ddr_hertz, cpu_hertz, ddr_clocks);
+
+ /* Check for unreasonable settings. */
+ if (calc_ddr_hertz == 0) {
+ error_print("DDR clock misconfigured. Exiting.\n");
+ exit(1);
+ }
+ return calc_ddr_hertz;
+}
+
+int octeon_ddr_initialize(bdk_node_t node,
+ uint32_t cpu_hertz,
+ uint32_t ddr_hertz,
+ uint32_t ddr_ref_hertz,
+ uint32_t ddr_interface_mask,
+ const ddr_configuration_t *ddr_configuration,
+ uint32_t *measured_ddr_hertz,
+ int board_type,
+ int board_rev_maj,
+ int board_rev_min)
+{
+ uint32_t ddr_config_valid_mask = 0;
+ int memsize_mbytes = 0;
+ const char *s;
+ int retval;
+ int interface_index;
+ uint32_t ddr_max_speed = 1210000000; /* needs to be this high for DDR4 */
+ uint32_t calc_ddr_hertz = -1;
+
+#ifndef OCTEON_SDK_VERSION_STRING
+# define OCTEON_SDK_VERSION_STRING "Development Build"
+#endif
+
+ ddr_print(OCTEON_SDK_VERSION_STRING": $Revision: 102369 $\n");
+
+#ifdef CAVIUM_ONLY
+ /* Override speed restrictions to support internal testing. */
+ ddr_max_speed = 1210000000;
+#endif /* CAVIUM_ONLY */
+
+ if (ddr_hertz > ddr_max_speed) {
+ error_print("DDR clock speed %u exceeds maximum speed supported by "
+ "processor, reducing to %uHz\n",
+ ddr_hertz, ddr_max_speed);
+ ddr_hertz = ddr_max_speed;
+ }
+
+ // Do this earlier so we can return without doing unnecessary things...
+ /* Check for DIMM 0 socket populated for each LMC present */
+ for (interface_index = 0; interface_index < 4; ++interface_index) {
+ if ((ddr_interface_mask & (1 << interface_index)) &&
+ (validate_dimm(node, &ddr_configuration[interface_index].dimm_config_table[0])) == 1)
+ {
+ ddr_config_valid_mask |= (1 << interface_index);
+ }
+ }
+
+ if (CAVIUM_IS_MODEL(CAVIUM_CN88XX)) {
+ int four_lmc_mode = 1;
+
+ // Validate that it can only be 2-LMC mode or 4-LMC mode
+ if ((ddr_config_valid_mask != 0x03) && (ddr_config_valid_mask != 0x0f)) {
+ puts("ERROR: Invalid LMC configuration detected.\n");
+ return -1;
+ }
+
+ if ((s = lookup_env_parameter("ddr_four_lmc")) != NULL)
+ four_lmc_mode = !!strtoul(s, NULL, 0);
+
+ if (!four_lmc_mode) {
+ puts("Forcing two-LMC Mode.\n");
+ ddr_config_valid_mask &= ~(3<<2); /* Invalidate LMC[2:3] */
+ }
+ }
+
+ if (!ddr_config_valid_mask) {
+ puts("ERROR: No valid DIMMs detected on any DDR interface.\n");
+ return -1;
+ }
+
+ {
+ /*
+
+ rdf_cnt: Defines the sample point of the LMC response data in
+ the DDR-clock/core-clock crossing. For optimal
+ performance set to 10 * (DDR-clock period/core-clock
+ period) - 1. To disable set to 0. All other values
+ are reserved.
+ */
+
+ uint64_t rdf_cnt;
+ BDK_CSR_INIT(l2c_ctl, node, BDK_L2C_CTL);
+ /* It is more convenient to compute the ratio using clock
+ frequencies rather than clock periods. */
+ rdf_cnt = (((uint64_t) 10 * cpu_hertz) / ddr_hertz) - 1;
+ rdf_cnt = rdf_cnt<256 ? rdf_cnt : 255;
+ l2c_ctl.s.rdf_cnt = rdf_cnt;
+
+ if ((s = lookup_env_parameter("early_fill_count")) != NULL)
+ l2c_ctl.s.rdf_cnt = strtoul(s, NULL, 0);
+
+ ddr_print("%-45s : %d, cpu_hertz:%u, ddr_hertz:%u\n", "EARLY FILL COUNT ",
+ l2c_ctl.s.rdf_cnt, cpu_hertz, ddr_hertz);
+ DRAM_CSR_WRITE(node, BDK_L2C_CTL, l2c_ctl.u);
+ }
+
+ /* Check to see if we should limit the number of L2 ways. */
+ if ((s = lookup_env_parameter("limit_l2_ways")) != NULL) {
+ int ways = strtoul(s, NULL, 10);
+ limit_l2_ways(node, ways, 1);
+ }
+
+ /* We measure the DDR frequency by counting DDR clocks. We can
+ * confirm or adjust the expected frequency as necessary. We use
+ * the measured frequency to make accurate timing calculations
+ * used to configure the controller.
+ */
+ for (interface_index = 0; interface_index < 4; ++interface_index) {
+ uint32_t tmp_hertz;
+
+ if (! (ddr_config_valid_mask & (1 << interface_index)))
+ continue;
+
+ try_again:
+ // if we are LMC0
+ if (interface_index == 0) {
+ // if we are asking for 100 MHz refclk, we can only get it via alternate, so switch to it
+ if (ddr_ref_hertz == 100000000) {
+ DRAM_CSR_MODIFY(c, node, BDK_LMCX_DDR_PLL_CTL(0), c.s.dclk_alt_refclk_sel = 1);
+ bdk_wait_usec(1000); // wait 1 msec
+ } else {
+ // if we are NOT asking for 100MHz, then reset to (assumed) 50MHz and go on
+ DRAM_CSR_MODIFY(c, node, BDK_LMCX_DDR_PLL_CTL(0), c.s.dclk_alt_refclk_sel = 0);
+ bdk_wait_usec(1000); // wait 1 msec
+ }
+ }
+
+ tmp_hertz = measure_octeon_ddr_clock(node,
+ &ddr_configuration[interface_index],
+ cpu_hertz,
+ ddr_hertz,
+ ddr_ref_hertz,
+ interface_index,
+ ddr_config_valid_mask);
+
+ // if we are LMC0 and we are asked for 100 MHz refclk,
+ // we must be sure it is available
+ // If not, we print an error message, set to 50MHz, and go on...
+ if ((interface_index == 0) && (ddr_ref_hertz == 100000000)) {
+ // validate that the clock returned is close enough to the clock desired
+ // FIXME: is 5% close enough?
+ int hertz_diff = _abs((int)tmp_hertz - (int)ddr_hertz);
+ if (hertz_diff > ((int)ddr_hertz * 5 / 100)) { // nope, diff is greater than than 5%
+ ddr_print("N%d: DRAM init: requested 100 MHz refclk NOT FOUND\n", node);
+ ddr_ref_hertz = bdk_clock_get_rate(node, BDK_CLOCK_MAIN_REF);
+ set_ddr_clock_initialized(node, 0, 0); // clear the flag before trying again!!
+ goto try_again;
+ } else {
+ ddr_print("N%d: DRAM Init: requested 100 MHz refclk FOUND and SELECTED.\n", node);
+ }
+ }
+
+ if (tmp_hertz > 0)
+ calc_ddr_hertz = tmp_hertz;
+
+ } /* for (interface_index = 0; interface_index < 4; ++interface_index) */
+
+ if (measured_ddr_hertz)
+ *measured_ddr_hertz = calc_ddr_hertz;
+
+ memsize_mbytes = 0;
+ for (interface_index = 0; interface_index < 4; ++interface_index) {
+ if (! (ddr_config_valid_mask & (1 << interface_index))) { // if LMC has no DIMMs found
+ if (ddr_interface_mask & (1 << interface_index)) { // but the LMC is present
+ for (int i = 0; i < DDR_CFG_T_MAX_DIMMS; i++) {
+ // check for slot presence
+ if (validate_dimm(node, &ddr_configuration[interface_index].dimm_config_table[i]) == 0)
+ printf("N%d.LMC%d.DIMM%d: Not Present\n", node, interface_index, i);
+ }
+ error_print("N%d.LMC%d Configuration Completed: 0 MB\n", node, interface_index);
+ }
+ continue;
+ }
+
+ retval = init_octeon_dram_interface(node,
+ &ddr_configuration[interface_index],
+ calc_ddr_hertz, /* Configure using measured value */
+ cpu_hertz,
+ ddr_ref_hertz,
+ board_type,
+ board_rev_maj,
+ board_rev_min,
+ interface_index,
+ ddr_config_valid_mask);
+ if (retval > 0)
+ memsize_mbytes += retval;
+ }
+
+ if (memsize_mbytes == 0)
+ /* All interfaces failed to initialize, so return error */
+ return -1;
+
+ // switch over to DBI mode only for chips that support it, and enabled by envvar
+ if (! CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X)) { // added 81xx and 83xx
+ int do_dbi = 0;
+ if ((s = lookup_env_parameter("ddr_dbi_switchover")) != NULL) {
+ do_dbi = !!strtoul(s, NULL, 10);
+ }
+ if (do_dbi) {
+ ddr_print("DBI Switchover starting...\n");
+ for (interface_index = 0; interface_index < 4; ++interface_index) {
+ if (! (ddr_config_valid_mask & (1 << interface_index)))
+ continue;
+ dbi_switchover_interface(node, interface_index);
+ }
+ printf("DBI Switchover finished.\n");
+ }
+ }
+
+ // limit memory size if desired...
+ if ((s = lookup_env_parameter("limit_dram_mbytes")) != NULL) {
+ unsigned int mbytes = strtoul(s, NULL, 10);
+ if (mbytes > 0) {
+ memsize_mbytes = mbytes;
+ printf("Limiting DRAM size to %d MBytes based on limit_dram_mbytes env. variable\n",
+ mbytes);
+ }
+ }
+
+ return memsize_mbytes;
+}
+
diff --git a/src/vendorcode/cavium/bdk/libdram/lib_octeon_shared.h b/src/vendorcode/cavium/bdk/libdram/lib_octeon_shared.h
new file mode 100644
index 0000000000..b691e5286b
--- /dev/null
+++ b/src/vendorcode/cavium/bdk/libdram/lib_octeon_shared.h
@@ -0,0 +1,124 @@
+/***********************license start***********************************
+* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights
+* reserved.
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are
+* met:
+*
+* * Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials provided
+* with the distribution.
+*
+* * Neither the name of Cavium Inc. nor the names of
+* its contributors may be used to endorse or promote products
+* derived from this software without specific prior written
+* permission.
+*
+* This Software, including technical data, may be subject to U.S. export
+* control laws, including the U.S. Export Administration Act and its
+* associated regulations, and may be subject to export or import
+* regulations in other countries.
+*
+* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
+* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR
+* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT
+* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY
+* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT
+* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES
+* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR
+* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT,
+* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK
+* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
+***********************license end**************************************/
+
+extern const dimm_odt_config_t disable_odt_config[];
+
+#define rttnom_none 0 /* Rtt_Nom disabled */
+#define rttnom_60ohm 1 /* RZQ/4 = 240/4 = 60 ohms */
+#define rttnom_120ohm 2 /* RZQ/2 = 240/2 = 120 ohms */
+#define rttnom_40ohm 3 /* RZQ/6 = 240/6 = 40 ohms */
+#define rttnom_20ohm 4 /* RZQ/12 = 240/12 = 20 ohms */
+#define rttnom_30ohm 5 /* RZQ/8 = 240/8 = 30 ohms */
+#define rttnom_rsrv1 6 /* Reserved */
+#define rttnom_rsrv2 7 /* Reserved */
+
+#define rttwr_none 0 /* Dynamic ODT off */
+#define rttwr_60ohm 1 /* RZQ/4 = 240/4 = 60 ohms */
+#define rttwr_120ohm 2 /* RZQ/2 = 240/2 = 120 ohms */
+#define rttwr_rsrv1 3 /* Reserved */
+
+#define dic_40ohm 0 /* RZQ/6 = 240/6 = 40 ohms */
+#define dic_34ohm 1 /* RZQ/7 = 240/7 = 34 ohms */
+
+#define driver_24_ohm 1
+#define driver_27_ohm 2
+#define driver_30_ohm 3
+#define driver_34_ohm 4
+#define driver_40_ohm 5
+#define driver_48_ohm 6
+#define driver_60_ohm 7
+
+#define rodt_ctl_none 0
+#define rodt_ctl_20_ohm 1
+#define rodt_ctl_30_ohm 2
+#define rodt_ctl_40_ohm 3
+#define rodt_ctl_60_ohm 4
+#define rodt_ctl_120_ohm 5
+
+#define ddr4_rttnom_none 0 /* Rtt_Nom disabled */
+#define ddr4_rttnom_60ohm 1 /* RZQ/4 = 240/4 = 60 ohms */
+#define ddr4_rttnom_120ohm 2 /* RZQ/2 = 240/2 = 120 ohms */
+#define ddr4_rttnom_40ohm 3 /* RZQ/6 = 240/6 = 40 ohms */
+#define ddr4_rttnom_240ohm 4 /* RZQ/1 = 240/1 = 240 ohms */
+#define ddr4_rttnom_48ohm 5 /* RZQ/5 = 240/5 = 48 ohms */
+#define ddr4_rttnom_80ohm 6 /* RZQ/3 = 240/3 = 80 ohms */
+#define ddr4_rttnom_34ohm 7 /* RZQ/7 = 240/7 = 34 ohms */
+
+#define ddr4_rttwr_none 0 /* Dynamic ODT off */
+#define ddr4_rttwr_120ohm 1 /* RZQ/2 = 240/2 = 120 ohms */
+#define ddr4_rttwr_240ohm 2 /* RZQ/1 = 240/1 = 240 ohms */
+#define ddr4_rttwr_HiZ 3 /* HiZ */
+/* This setting will be available for cn78xx cn88xx pass 2 and cn73xx
+ pass 1. It is disabled for now. */
+//#define ddr4_rttwr_80ohm 4 /* RZQ/3 = 240/3 = 80 ohms */
+
+#define ddr4_dic_34ohm 0 /* RZQ/7 = 240/7 = 34 ohms */
+#define ddr4_dic_48ohm 1 /* RZQ/5 = 240/5 = 48 ohms */
+
+#define ddr4_rttpark_none 0 /* Rtt_Park disabled */
+#define ddr4_rttpark_60ohm 1 /* RZQ/4 = 240/4 = 60 ohms */
+#define ddr4_rttpark_120ohm 2 /* RZQ/2 = 240/2 = 120 ohms */
+#define ddr4_rttpark_40ohm 3 /* RZQ/6 = 240/6 = 40 ohms */
+#define ddr4_rttpark_240ohm 4 /* RZQ/1 = 240/1 = 240 ohms */
+#define ddr4_rttpark_48ohm 5 /* RZQ/5 = 240/5 = 48 ohms */
+#define ddr4_rttpark_80ohm 6 /* RZQ/3 = 240/3 = 80 ohms */
+#define ddr4_rttpark_34ohm 7 /* RZQ/7 = 240/7 = 34 ohms */
+
+#define ddr4_driver_26_ohm 2
+#define ddr4_driver_30_ohm 3
+#define ddr4_driver_34_ohm 4
+#define ddr4_driver_40_ohm 5
+#define ddr4_driver_48_ohm 6
+
+#define ddr4_dqx_driver_24_ohm 1
+#define ddr4_dqx_driver_27_ohm 2
+#define ddr4_dqx_driver_30_ohm 3
+#define ddr4_dqx_driver_34_ohm 4
+#define ddr4_dqx_driver_40_ohm 5
+#define ddr4_dqx_driver_48_ohm 6
+#define ddr4_dqx_driver_60_ohm 7
+
+#define ddr4_rodt_ctl_none 0
+#define ddr4_rodt_ctl_40_ohm 1
+#define ddr4_rodt_ctl_60_ohm 2
+#define ddr4_rodt_ctl_80_ohm 3
+#define ddr4_rodt_ctl_120_ohm 4
+#define ddr4_rodt_ctl_240_ohm 5
+#define ddr4_rodt_ctl_34_ohm 6
+#define ddr4_rodt_ctl_48_ohm 7
diff --git a/src/vendorcode/cavium/bdk/libdram/libdram-config-load.c b/src/vendorcode/cavium/bdk/libdram/libdram-config-load.c
new file mode 100644
index 0000000000..5173290187
--- /dev/null
+++ b/src/vendorcode/cavium/bdk/libdram/libdram-config-load.c
@@ -0,0 +1,262 @@
+/***********************license start***********************************
+* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights
+* reserved.
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are
+* met:
+*
+* * Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials provided
+* with the distribution.
+*
+* * Neither the name of Cavium Inc. nor the names of
+* its contributors may be used to endorse or promote products
+* derived from this software without specific prior written
+* permission.
+*
+* This Software, including technical data, may be subject to U.S. export
+* control laws, including the U.S. Export Administration Act and its
+* associated regulations, and may be subject to export or import
+* regulations in other countries.
+*
+* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
+* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR
+* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT
+* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY
+* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT
+* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES
+* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR
+* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT,
+* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK
+* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
+***********************license end**************************************/
+#include <bdk.h>
+
+/**
+ * Load a "odt_*rank_config" structure
+ *
+ * @param cfg Config to fill
+ * @param ranks Number of ranks we're loading (1,2,4)
+ * @param node Node we're loading for
+ * @param dimm Which DIMM this is for
+ * @param lmc Which LMC this is for
+ */
+static void load_rank_data(dram_config_t *cfg, int ranks, int num_dimms, int lmc, bdk_node_t node)
+{
+ /* Get a pointer to the structure we are filling */
+ dimm_odt_config_t *c;
+ switch (ranks)
+ {
+ case 1:
+ c = &cfg->config[lmc].odt_1rank_config[num_dimms - 1];
+ break;
+ case 2:
+ c = &cfg->config[lmc].odt_2rank_config[num_dimms - 1];
+ break;
+ case 4:
+ c = &cfg->config[lmc].odt_4rank_config[num_dimms - 1];
+ break;
+ default:
+ bdk_fatal("Unexpected number of ranks\n");
+ break;
+ }
+
+ /* Fill the global items */
+ c->odt_ena = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_DQX_CTL, ranks, num_dimms, lmc, node);
+ c->odt_mask = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_WODT_MASK, ranks, num_dimms, lmc, node);
+
+ /* Fill the per rank items */
+ int rank = 0;
+ c->odt_mask1.s.pasr_00 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_PASR, ranks, num_dimms, rank, lmc, node);
+ c->odt_mask1.s.asr_00 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_ASR, ranks, num_dimms, rank, lmc, node);
+ c->odt_mask1.s.srt_00 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_SRT, ranks, num_dimms, rank, lmc, node);
+ c->odt_mask1.s.rtt_wr_00 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_RTT_WR, ranks, num_dimms, rank, lmc, node);
+ c->odt_mask1.s.rtt_wr_00_ext = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_RTT_WR, ranks, num_dimms, rank, lmc, node) >> 2;
+ c->odt_mask1.s.dic_00 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_DIC, ranks, num_dimms, rank, lmc, node);
+ c->odt_mask1.s.rtt_nom_00 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_RTT_NOM, ranks, num_dimms, rank, lmc, node);
+ c->odt_mask1.s.db_output_impedance = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_DB_OUTPUT_IMPEDANCE, ranks, num_dimms, lmc, node);
+ rank = 1;
+ c->odt_mask1.s.pasr_01 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_PASR, ranks, num_dimms, rank, lmc, node);
+ c->odt_mask1.s.asr_01 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_ASR, ranks, num_dimms, rank, lmc, node);
+ c->odt_mask1.s.srt_01 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_SRT, ranks, num_dimms, rank, lmc, node);
+ c->odt_mask1.s.rtt_wr_01 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_RTT_WR, ranks, num_dimms, rank, lmc, node);
+ c->odt_mask1.s.rtt_wr_01_ext = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_RTT_WR, ranks, num_dimms, rank, lmc, node) >> 2;
+ c->odt_mask1.s.dic_01 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_DIC, ranks, num_dimms, rank, lmc, node);
+ c->odt_mask1.s.rtt_nom_01 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_RTT_NOM, ranks, num_dimms, rank, lmc, node);
+ rank = 2;
+ c->odt_mask1.s.pasr_10 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_PASR, ranks, num_dimms, rank, lmc, node);
+ c->odt_mask1.s.asr_10 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_ASR, ranks, num_dimms, rank, lmc, node);
+ c->odt_mask1.s.srt_10 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_SRT, ranks, num_dimms, rank, lmc, node);
+ c->odt_mask1.s.rtt_wr_10 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_RTT_WR, ranks, num_dimms, rank, lmc, node);
+ c->odt_mask1.s.rtt_wr_10_ext = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_RTT_WR, ranks, num_dimms, rank, lmc, node) >> 2;
+ c->odt_mask1.s.dic_10 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_DIC, ranks, num_dimms, rank, lmc, node);
+ c->odt_mask1.s.rtt_nom_10 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_RTT_NOM, ranks, num_dimms, rank, lmc, node);
+ rank = 3;
+ c->odt_mask1.s.pasr_11 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_PASR, ranks, num_dimms, rank, lmc, node);
+ c->odt_mask1.s.asr_11 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_ASR, ranks, num_dimms, rank, lmc, node);
+ c->odt_mask1.s.srt_11 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_SRT, ranks, num_dimms, rank, lmc, node);
+ c->odt_mask1.s.rtt_wr_11 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_RTT_WR, ranks, num_dimms, rank, lmc, node);
+ c->odt_mask1.s.rtt_wr_11_ext = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_RTT_WR, ranks, num_dimms, rank, lmc, node) >> 2;
+ c->odt_mask1.s.dic_11 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_DIC, ranks, num_dimms, rank, lmc, node);
+ c->odt_mask1.s.rtt_nom_11 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_RTT_NOM, ranks, num_dimms, rank, lmc, node);
+ rank = 0;
+ c->odt_mask2.s.rtt_park_00 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE2_RTT_PARK, ranks, num_dimms, rank, lmc, node);
+ c->odt_mask2.s.vref_value_00 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE2_VREF_VALUE, ranks, num_dimms, rank, lmc, node);
+ c->odt_mask2.s.vref_range_00 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE2_VREF_RANGE, ranks, num_dimms, rank, lmc, node);
+ c->odt_mask2.s.vrefdq_train_en = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE2_VREFDQ_TRAIN_EN, ranks, num_dimms, lmc, node);
+ rank = 1;
+ c->odt_mask2.s.rtt_park_01 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE2_RTT_PARK, ranks, num_dimms, rank, lmc, node);
+ c->odt_mask2.s.vref_value_01 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE2_VREF_VALUE, ranks, num_dimms, rank, lmc, node);
+ c->odt_mask2.s.vref_range_01 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE2_VREF_RANGE, ranks, num_dimms, rank, lmc, node);
+ rank = 2;
+ c->odt_mask2.s.rtt_park_10 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE2_RTT_PARK, ranks, num_dimms, rank, lmc, node);
+ c->odt_mask2.s.vref_value_10 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE2_VREF_VALUE, ranks, num_dimms, rank, lmc, node);
+ c->odt_mask2.s.vref_range_10 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE2_VREF_RANGE, ranks, num_dimms, rank, lmc, node);
+ rank = 3;
+ c->odt_mask2.s.rtt_park_11 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE2_RTT_PARK, ranks, num_dimms, rank, lmc, node);
+ c->odt_mask2.s.vref_value_11 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE2_VREF_VALUE, ranks, num_dimms, rank, lmc, node);
+ c->odt_mask2.s.vref_range_11 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE2_VREF_RANGE, ranks, num_dimms, rank, lmc, node);
+
+ /* Fill more global items */
+ c->qs_dic = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_RODT_CTL, ranks, num_dimms, lmc, node);
+ c->rodt_ctl = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_RODT_MASK, ranks, num_dimms, lmc, node);
+}
+
+/**
+ * Load a DRAM configuration based on the current bdk-config settings
+ *
+ * @param node Node the DRAM config is for
+ *
+ * @return Pointer to __libdram_global_cfg, a global structure. Returns NULL if bdk-config
+ * lacks information about DRAM.
+ */
+const dram_config_t *libdram_config_load(bdk_node_t node)
+{
+ dram_config_t *cfg = &__libdram_global_cfg;
+ const int MAX_LMCS = sizeof(cfg->config) / sizeof(cfg->config[0]);
+
+ /* Make all fields for the node default to zero */
+ memset(cfg, 0, sizeof(*cfg));
+
+ /* Fill the SPD data first as some parameters need to know the DRAM type
+ to lookup the correct values */
+ for (int lmc = 0; lmc < MAX_LMCS; lmc++)
+ {
+ for (int dimm = 0; dimm < DDR_CFG_T_MAX_DIMMS; dimm++)
+ {
+ int spd_addr = bdk_config_get_int(BDK_CONFIG_DDR_SPD_ADDR, dimm, lmc, node);
+ if (spd_addr)
+ {
+ cfg->config[lmc].dimm_config_table[dimm].spd_addr = spd_addr;
+ }
+ else
+ {
+ int spd_size;
+ const void *spd_data = bdk_config_get_blob(&spd_size, BDK_CONFIG_DDR_SPD_DATA, dimm, lmc, node);
+ if (spd_data && spd_size)
+ cfg->config[lmc].dimm_config_table[dimm].spd_ptr = spd_data;
+ }
+ }
+ }
+
+ /* Check that we know how to get DIMM inofmration. If not, return failure */
+ if (!cfg->config[0].dimm_config_table[0].spd_addr && !cfg->config[0].dimm_config_table[0].spd_ptr)
+ return NULL;
+
+ cfg->name = "Loaded from bdk-config";
+ for (int lmc = 0; lmc < MAX_LMCS; lmc++)
+ {
+ for (int num_dimms = 1; num_dimms <= DDR_CFG_T_MAX_DIMMS; num_dimms++)
+ {
+ load_rank_data(cfg, 1, num_dimms, lmc, node);
+ load_rank_data(cfg, 2, num_dimms, lmc, node);
+ load_rank_data(cfg, 4, num_dimms, lmc, node);
+ }
+
+ ddr_configuration_t *c = &cfg->config[lmc];
+ ddr3_custom_config_t *custom = &c->custom_lmc_config;
+ custom->min_rtt_nom_idx = bdk_config_get_int(BDK_CONFIG_DDR_CUSTOM_MIN_RTT_NOM_IDX, lmc, node);
+ custom->max_rtt_nom_idx = bdk_config_get_int(BDK_CONFIG_DDR_CUSTOM_MAX_RTT_NOM_IDX, lmc, node);
+ custom->min_rodt_ctl = bdk_config_get_int(BDK_CONFIG_DDR_CUSTOM_MIN_RODT_CTL, lmc, node);
+ custom->max_rodt_ctl = bdk_config_get_int(BDK_CONFIG_DDR_CUSTOM_MAX_RODT_CTL, lmc, node);
+ custom->ck_ctl = bdk_config_get_int(BDK_CONFIG_DDR_CUSTOM_CK_CTL, lmc, node);
+ custom->cmd_ctl = bdk_config_get_int(BDK_CONFIG_DDR_CUSTOM_CMD_CTL, lmc, node);
+ custom->ctl_ctl = bdk_config_get_int(BDK_CONFIG_DDR_CUSTOM_CTL_CTL, lmc, node);
+ custom->min_cas_latency = bdk_config_get_int(BDK_CONFIG_DDR_CUSTOM_MIN_CAS_LATENCY, lmc, node);
+ custom->offset_en = bdk_config_get_int(BDK_CONFIG_DDR_CUSTOM_OFFSET_EN, lmc, node);
+ custom->offset_udimm = bdk_config_get_int(BDK_CONFIG_DDR_CUSTOM_OFFSET, "UDIMM", lmc, node);
+ custom->offset_rdimm = bdk_config_get_int(BDK_CONFIG_DDR_CUSTOM_OFFSET, "RDIMM", lmc, node);
+ custom->rlevel_compute = bdk_config_get_int(BDK_CONFIG_DDR_CUSTOM_RLEVEL_COMPUTE, lmc, node);
+ custom->rlevel_comp_offset_udimm = bdk_config_get_int(BDK_CONFIG_DDR_CUSTOM_RLEVEL_COMP_OFFSET, "UDIMM", lmc, node);
+ custom->rlevel_comp_offset_rdimm = bdk_config_get_int(BDK_CONFIG_DDR_CUSTOM_RLEVEL_COMP_OFFSET, "RDIMM", lmc, node);
+ custom->ddr2t_udimm = bdk_config_get_int(BDK_CONFIG_DDR_CUSTOM_DDR2T, "UDIMM", lmc, node);
+ custom->ddr2t_rdimm = bdk_config_get_int(BDK_CONFIG_DDR_CUSTOM_DDR2T, "RDIMM", lmc, node);
+ custom->disable_sequential_delay_check = bdk_config_get_int(BDK_CONFIG_DDR_CUSTOM_DISABLE_SEQUENTIAL_DELAY_CHECK, lmc, node);
+ custom->maximum_adjacent_rlevel_delay_increment
+ = bdk_config_get_int(BDK_CONFIG_DDR_CUSTOM_MAXIMUM_ADJACENT_RLEVEL_DELAY_INCREMENT, lmc, node);
+ custom->parity = bdk_config_get_int(BDK_CONFIG_DDR_CUSTOM_PARITY, lmc, node);
+ custom->fprch2 = bdk_config_get_int(BDK_CONFIG_DDR_CUSTOM_FPRCH2, lmc, node);
+ custom->mode32b = bdk_config_get_int(BDK_CONFIG_DDR_CUSTOM_MODE32B, lmc, node);
+ custom->measured_vref = bdk_config_get_int(BDK_CONFIG_DDR_CUSTOM_MEASURED_VREF, lmc, node);
+
+ /* CN80XX only supports 32bit mode */
+ if (cavium_is_altpkg(CAVIUM_CN81XX))
+ custom->mode32b = 1;
+
+ /* Loop through 8 bytes, plus ecc byte */
+ #define NUM_BYTES 9 /* Max bytes on LMC (8 plus ECC) */
+ static int8_t dll_write_offset[NUM_BYTES];
+ static int8_t dll_read_offset[NUM_BYTES];
+ for (int b = 0; b < NUM_BYTES; b++)
+ {
+ dll_write_offset[b] = bdk_config_get_int(BDK_CONFIG_DDR_CUSTOM_DLL_WRITE_OFFSET, b, lmc, node);
+ dll_read_offset[b] = bdk_config_get_int(BDK_CONFIG_DDR_CUSTOM_DLL_READ_OFFSET, b, lmc, node);
+ }
+ custom->dll_write_offset = dll_write_offset;
+ custom->dll_read_offset = dll_read_offset;
+ }
+
+ int is_ddr4 = (cfg->config[0].odt_1rank_config[0].odt_mask2.u != 0);
+ int speed = bdk_config_get_int(BDK_CONFIG_DDR_SPEED, node);
+ switch (speed)
+ {
+ case 0: // AUTO
+ cfg->ddr_clock_hertz = 0;
+ break;
+ case 800:
+ case 1600:
+ case 2400:
+ cfg->ddr_clock_hertz = (uint64_t)speed * 1000000 / 2;
+ break;
+ case 666:
+ cfg->ddr_clock_hertz = 333333333;
+ break;
+ case 1066:
+ cfg->ddr_clock_hertz = 533333333;
+ break;
+ case 1333:
+ cfg->ddr_clock_hertz = 666666666;
+ break;
+ case 1866:
+ if (is_ddr4)
+ cfg->ddr_clock_hertz = 940000000;
+ else
+ cfg->ddr_clock_hertz = 933333333;
+ break;
+ case 2133:
+ cfg->ddr_clock_hertz = 1050000000;
+ break;
+ default:
+ bdk_warn("Unsupported DRAM speed of %d MT/s\n", speed);
+ cfg->ddr_clock_hertz = speed * 1000000 / 2;
+ break;
+ }
+
+ return cfg;
+};
diff --git a/src/vendorcode/cavium/bdk/libdram/libdram.c b/src/vendorcode/cavium/bdk/libdram/libdram.c
new file mode 100644
index 0000000000..b19486694c
--- /dev/null
+++ b/src/vendorcode/cavium/bdk/libdram/libdram.c
@@ -0,0 +1,718 @@
+/***********************license start***********************************
+* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights
+* reserved.
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are
+* met:
+*
+* * Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials provided
+* with the distribution.
+*
+* * Neither the name of Cavium Inc. nor the names of
+* its contributors may be used to endorse or promote products
+* derived from this software without specific prior written
+* permission.
+*
+* This Software, including technical data, may be subject to U.S. export
+* control laws, including the U.S. Export Administration Act and its
+* associated regulations, and may be subject to export or import
+* regulations in other countries.
+*
+* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
+* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR
+* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT
+* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY
+* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT
+* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES
+* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR
+* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT,
+* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK
+* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
+***********************license end**************************************/
+#include <bdk.h>
+#include "libbdk-arch/bdk-csrs-mio_fus.h"
+#include "dram-internal.h"
+
+/* This global variable is accessed through dram_is_verbose() to determine
+ the verbosity level. Use that function instead of setting it directly */
+dram_verbosity_t dram_verbosity = VBL_OFF; /* init this here so we could set a non-zero default */
+
+static uint32_t measured_ddr_hertz[BDK_NUMA_MAX_NODES];
+
+/* The various DRAM configs in the libdram/configs directory need space
+ to store the DRAM config. Since only one config is ever in active use
+ at a time, store the configs in __libdram_global_cfg. In a multi-node
+ setup, independent calls to get the DRAM config will load first node 0's
+ config, then node 1's */
+dram_config_t __libdram_global_cfg;
+
+static void bdk_dram_clear_mem(bdk_node_t node)
+{
+ if (!bdk_is_platform(BDK_PLATFORM_ASIM)) {
+ uint64_t mbytes = bdk_dram_get_size_mbytes(node);
+ uint64_t skip = (node == bdk_numa_master()) ? bdk_dram_get_top_of_bdk() : 0;
+ uint64_t len = (mbytes << 20) - skip;
+
+ BDK_TRACE(DRAM, "N%d: Clearing DRAM\n", node);
+ if (skip)
+ {
+ /* All memory below skip may contain valid data, so we can't clear
+ it. We still need to make sure all cache lines in this area are
+ fully dirty so that ECC bits will be updated on store. A single
+ write to the cache line isn't good enough because partial LMC
+ writes may be enabled */
+ ddr_print("N%d: Rewriting DRAM: start 0 length 0x%lx\n", node, skip);
+ volatile uint64_t *ptr = bdk_phys_to_ptr(bdk_numa_get_address(node, 8));
+ /* The above pointer got address 8 to avoid NULL pointer checking
+ in bdk_phys_to_ptr(). Correct it here */
+ ptr--;
+ uint64_t *end = bdk_phys_to_ptr(bdk_numa_get_address(node, skip));
+ while (ptr < end)
+ {
+ *ptr = *ptr;
+ ptr++;
+ }
+ }
+ ddr_print("N%d: Clearing DRAM: start 0x%lx length 0x%lx\n", node, skip, len);
+ bdk_zero_memory(bdk_phys_to_ptr(bdk_numa_get_address(node, skip)), len);
+ BDK_TRACE(DRAM, "N%d: DRAM clear complete\n", node);
+ }
+}
+
+static void bdk_dram_clear_ecc(bdk_node_t node)
+{
+ /* Clear any DRAM errors set during init */
+ BDK_TRACE(DRAM, "N%d: Clearing LMC ECC errors\n", node);
+ int num_lmc = __bdk_dram_get_num_lmc(node);
+ for (int lmc = 0; lmc < num_lmc; lmc++) {
+ DRAM_CSR_WRITE(node, BDK_LMCX_INT(lmc), BDK_CSR_READ(node, BDK_LMCX_INT(lmc)));
+ }
+}
+
+static void bdk_dram_enable_ecc_reporting(bdk_node_t node)
+{
+ /* Enable LMC ECC error HW reporting */
+ int num_lmc = __bdk_dram_get_num_lmc(node);
+
+ BDK_TRACE(DRAM, "N%d: Enable LMC ECC error reporting\n", node);
+
+ for (int lmc = 0; lmc < num_lmc; lmc++) {
+
+ // NOTE: this must be done for pass 2.x
+ // enable ECC interrupts to allow ECC error info in LMCX_INT
+ if (! CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X)) { // added 81xx and 83xx
+ DRAM_CSR_WRITE(node, BDK_LMCX_INT_ENA_W1S(lmc), -1ULL);
+ BDK_CSR_INIT(lmc_int_ena_w1s, node, BDK_LMCX_INT_ENA_W1S(lmc));
+ ddr_print("N%d.LMC%d: %-36s : 0x%08lx\n",
+ node, lmc, "LMC_INT_ENA_W1S", lmc_int_ena_w1s.u);
+ }
+ }
+}
+
+static void bdk_dram_disable_ecc_reporting(bdk_node_t node)
+{
+ /* Disable LMC ECC error HW reporting */
+ int num_lmc = __bdk_dram_get_num_lmc(node);
+
+ BDK_TRACE(DRAM, "N%d: Disable LMC ECC error reporting\n", node);
+
+ for (int lmc = 0; lmc < num_lmc; lmc++) {
+
+ // NOTE: this must be done for pass 2.x
+ // disable ECC interrupts to prevent ECC error info in LMCX_INT
+ if (! CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X)) { // added 81xx and 83xx
+ DRAM_CSR_WRITE(node, BDK_LMCX_INT_ENA_W1C(lmc), -1ULL);
+ BDK_CSR_INIT(lmc_int_ena_w1c, node, BDK_LMCX_INT_ENA_W1C(lmc));
+ ddr_print("N%d.LMC%d: %-36s : 0x%08lx\n",
+ node, lmc, "LMC_INT_ENA_W1C", lmc_int_ena_w1c.u);
+ }
+ }
+}
+
+// this routine simply makes the calls to the tuning routines and returns any errors
+static int bdk_libdram_tune_node(int node)
+{
+ int errs, tot_errs;
+ int do_dllro_hw = 0; // default to NO
+ int do_dllwo = 0; // default to NO
+ int do_eccdll = 0; // default to NO
+ const char *str;
+ BDK_CSR_INIT(lmc_config, node, BDK_LMCX_CONFIG(0)); // FIXME: probe LMC0
+ do_eccdll = (lmc_config.s.ecc_ena != 0); // change to ON if ECC enabled
+
+ // FIXME!!! make 81xx always use HW-assist tuning
+ if (CAVIUM_IS_MODEL(CAVIUM_CN81XX))
+ do_dllro_hw = 1;
+
+ // Automatically tune the data byte DLL read offsets
+ // always done by default, but allow use of HW-assist
+ // NOTE: HW-assist will also tune the ECC byte
+ str = getenv("ddr_tune_hw_offsets");
+ if (str)
+ do_dllro_hw = !!strtoul(str, NULL, 0);
+ BDK_TRACE(DRAM, "N%d: Starting DLL Read Offset Tuning for LMCs\n", node);
+ if (!do_dllro_hw || (lmc_config.s.mode32b != 0)) {
+ errs = perform_dll_offset_tuning(node, 2, /* tune */1);
+ } else {
+ errs = perform_HW_dll_offset_tuning(node, /* read */2, 0x0A/* all bytelanes */);
+ }
+ BDK_TRACE(DRAM, "N%d: Finished DLL Read Offset Tuning for LMCs, %d errors)\n",
+ node, errs);
+ tot_errs = errs;
+
+ // disabled by default for now, does not seem to be needed?
+ // Automatically tune the data byte DLL write offsets
+ // allow override of default setting
+ str = getenv("ddr_tune_write_offsets");
+ if (str)
+ do_dllwo = !!strtoul(str, NULL, 0);
+ if (do_dllwo) {
+ BDK_TRACE(DRAM, "N%d: Starting DLL Write Offset Tuning for LMCs\n", node);
+ errs = perform_dll_offset_tuning(node, /* write */1, /* tune */1);
+ BDK_TRACE(DRAM, "N%d: Finished DLL Write Offset Tuning for LMCs, %d errors)\n",
+ node, errs);
+ tot_errs += errs;
+ }
+
+ // disabled by default for now, does not seem to be needed much?
+ // Automatically tune the ECC byte DLL read offsets
+ // FIXME? allow override of the filtering
+ // FIXME? allow programmatic override, not via envvar?
+ str = getenv("ddr_tune_ecc_enable");
+ if (str)
+ do_eccdll = !!strtoul(str, NULL, 10);
+ if (do_eccdll && !do_dllro_hw && (lmc_config.s.mode32b == 0)) { // do not do HW-assist twice for ECC
+ BDK_TRACE(DRAM, "N%d: Starting ECC DLL Read Offset Tuning for LMCs\n", node);
+ errs = perform_HW_dll_offset_tuning(node, 2, 8/* ECC bytelane */);
+ BDK_TRACE(DRAM, "N%d: Finished ECC DLL Read Offset Tuning for LMCs, %d errors\n",
+ node, errs);
+ tot_errs += errs;
+ }
+
+ return tot_errs;
+}
+
+// this routine makes the calls to the tuning routines when criteria are met
+// intended to be called for automated tuning, to apply filtering...
+
+#define IS_DDR4 1
+#define IS_DDR3 0
+#define IS_RDIMM 1
+#define IS_UDIMM 0
+#define IS_1SLOT 1
+#define IS_2SLOT 0
+
+// FIXME: DDR3 is not tuned
+static const uint32_t ddr_speed_filter[2][2][2] = {
+ [IS_DDR4] = {
+ [IS_RDIMM] = {
+ [IS_1SLOT] = 940,
+ [IS_2SLOT] = 800
+ },
+ [IS_UDIMM] = {
+ [IS_1SLOT] = 1050,
+ [IS_2SLOT] = 940
+ },
+ },
+ [IS_DDR3] = {
+ [IS_RDIMM] = {
+ [IS_1SLOT] = 0, // disabled
+ [IS_2SLOT] = 0 // disabled
+ },
+ [IS_UDIMM] = {
+ [IS_1SLOT] = 0, // disabled
+ [IS_2SLOT] = 0 // disabled
+ }
+ }
+};
+
+static int bdk_libdram_maybe_tune_node(int node)
+{
+ const char *str;
+
+ // FIXME: allow an override here so that all configs can be tuned or none
+ // If the envvar is defined, always either force it or avoid it accordingly
+ if ((str = getenv("ddr_tune_all_configs")) != NULL) {
+ int tune_it = !!strtoul(str, NULL, 0);
+ printf("N%d: DRAM auto-tuning %s.\n", node, (tune_it) ? "forced" : "avoided");
+ return (tune_it) ? bdk_libdram_tune_node(node) : 0;
+ }
+
+ // filter the tuning calls here...
+ // determine if we should/can run automatically for this configuration
+ //
+ // FIXME: tune only when the configuration indicates it will help:
+ // DDR type, RDIMM or UDIMM, 1-slot or 2-slot, and speed
+ //
+ uint32_t ddr_speed = divide_nint(libdram_get_freq_from_pll(node, 0), 1000000); // sample LMC0
+ BDK_CSR_INIT(lmc_config, node, BDK_LMCX_CONFIG(0)); // sample LMC0
+
+ int is_ddr4 = !!__bdk_dram_is_ddr4(node, 0);
+ int is_rdimm = !!__bdk_dram_is_rdimm(node, 0);
+ int is_1slot = !!(lmc_config.s.init_status < 4); // HACK, should do better
+ int do_tune = 0;
+
+ uint32_t ddr_min_speed = ddr_speed_filter[is_ddr4][is_rdimm][is_1slot];
+ do_tune = (ddr_min_speed && (ddr_speed > ddr_min_speed));
+
+ ddr_print("N%d: DDR%d %cDIMM %d-slot at %d MHz %s eligible for auto-tuning.\n",
+ node, (is_ddr4)?4:3, (is_rdimm)?'R':'U', (is_1slot)?1:2,
+ ddr_speed, (do_tune)?"is":"is not");
+
+ // call the tuning routines, done filtering...
+ return ((do_tune) ? bdk_libdram_tune_node(node) : 0);
+}
+
+/**
+ * This is the main DRAM init function. Users of libdram should call this function,
+ * avoiding the other internal function. As a rule, functions starting with
+ * "libdram_*" are part of the external API and should be used.
+ *
+ * @param node Node to initialize. This may not be the same node as the one running the code
+ * @param dram_config
+ * DRAM configuration to use
+ * @param ddr_clock_override
+ * If non-zeo, this overrides the DRAM clock speed in the config structure. This
+ * allows quickly testing of different DRAM speeds without modifying the basic
+ * config. If zero, the DRAM speed in the config is used.
+ *
+ * @return Amount of memory in MB. Zero or negative is a failure.
+ */
+int libdram_config(int node, const dram_config_t *dram_config, int ddr_clock_override)
+{
+ if (bdk_is_platform(BDK_PLATFORM_ASIM))
+ return bdk_dram_get_size_mbytes(node);
+
+ /* Boards may need to mux the TWSI connection between THUNDERX and the BMC.
+ This allows the BMC to monitor DIMM temeratures and health */
+ int gpio_select = bdk_config_get_int(BDK_CONFIG_DRAM_CONFIG_GPIO);
+ if (gpio_select != -1)
+ bdk_gpio_initialize(bdk_numa_master(), gpio_select, 1, 1);
+
+ /* Read all the SPDs and store them in the device tree. They are needed by
+ later software to populate SMBIOS information */
+ for (int lmc = 0; lmc < 4; lmc++)
+ for (int dimm = 0; dimm < DDR_CFG_T_MAX_DIMMS; dimm++)
+ read_entire_spd(node, (dram_config_t *)dram_config, lmc, dimm);
+
+ const ddr_configuration_t *ddr_config = dram_config->config;
+ int ddr_clock_hertz = (ddr_clock_override) ? ddr_clock_override : dram_config->ddr_clock_hertz;
+ if (ddr_clock_hertz == 0) // 0 == AUTO
+ {
+ ddr_clock_hertz = dram_get_default_spd_speed(node, ddr_config);
+ if (ddr_clock_hertz < 0) {
+ printf("N%d: DRAM init: AUTO clock ILLEGAL configuration\n", node);
+ return -1;
+ }
+ }
+ int errs;
+
+ // At this point, we only know the desired clock rate (ddr_clock_hertz).
+ // We do not know whether we are configuring RDIMMs.
+ // We also do not yet know if 100MHz alternate refclk is actually available.
+ // so, if we are being asked for 2133MT/s or better, we still need to do:
+ // 1. probe for RDIMMs (if not, 50MHz refclk is good enough)
+ // 2. determine if 100MHz refclk is there, and switch to it before starting any configuration
+ //
+ // NOTES:
+ // 1. dclk_alt_refclk_sel need only be set on LMC0 (see above disabled code)
+ // 2. I think we need to first probe to see if we need it, and configure it then if dictated use
+ // 3. then go on to configure at the selected refclk
+ int ddr_refclk_hertz = bdk_clock_get_rate(node, BDK_CLOCK_MAIN_REF);
+ int alt_refclk = bdk_config_get_int(BDK_CONFIG_DDR_ALT_REFCLK, node);
+
+ char *str = getenv("ddr_100mhz_refclk");
+ if (str) { // if the envvar was found, force it to that setting
+ int do_100mhz = !!strtoul(str, NULL, 0);
+ alt_refclk = (do_100mhz) ? 100 : 50;
+ }
+
+ dram_verbosity = bdk_config_get_int(BDK_CONFIG_DRAM_VERBOSE);
+
+ // Here we check for fuses that limit the number of LMCs we can configure,
+ // but only on 83XX and 88XX...
+ int lmc_limit = 4;
+ if (CAVIUM_IS_MODEL(CAVIUM_CN88XX) || CAVIUM_IS_MODEL(CAVIUM_CN83XX)) {
+ BDK_CSR_INIT(mio_fus_dat2, node, BDK_MIO_FUS_DAT2);
+ if (mio_fus_dat2.s.lmc_half) {
+ lmc_limit = (CAVIUM_IS_MODEL(CAVIUM_CN88XX)) ? 2 : 1; // limit LMCs to half present
+ error_print("Only %d LMC(s)s supported for this Thunder model\n", lmc_limit);
+ }
+ }
+
+ /* We need to calculate the interface mask based on the provided SPD
+ addresses/contents */
+ uint32_t interface_mask = 0;
+ for (int i = 0; i < lmc_limit; i++)
+ {
+ // We need to check only DIMM 0 of each LMC for possible presence of the LMC.
+ // This trusts that the board database is correctly configured.
+ // Empty DIMM slots in present LMCs will be detected later.
+ if (ddr_config[i].dimm_config_table[0].spd_addr ||
+ ddr_config[i].dimm_config_table[0].spd_ptr)
+ interface_mask |= 1 << i;
+
+ // we know whether alternate refclk is always wanted
+ // we also know already if we want 2133 MT/s
+ // if alt refclk not always wanted, then probe DDR and DIMM type
+ // if DDR4 and RDIMMs, then set desired refclk to 100MHz, otherwise to default (50MHz)
+ // depend on ddr_initialize() to do the refclk selection and validation
+ if (i == 0) { // only check for LMC0
+ if (alt_refclk) { // if alternate refclk was specified, let it override everything
+ ddr_refclk_hertz = alt_refclk * 1000000;
+ ddr_print("N%d: DRAM init: %d MHz refclk is REQUESTED ALWAYS\n", node, alt_refclk);
+ } else if (ddr_clock_hertz > 1000000000) { // if more than 2000 MT/s
+ int ddr_type = get_ddr_type(node, &ddr_config[0].dimm_config_table[0]);
+ int spd_dimm_type = get_dimm_module_type(node, &ddr_config[0].dimm_config_table[0], ddr_type);
+ // is DDR4 and RDIMM just to be sure
+ if ((ddr_type == DDR4_DRAM) &&
+ ((spd_dimm_type == 1) || (spd_dimm_type == 5) || (spd_dimm_type == 8))) {
+ ddr_refclk_hertz = 100000000; // yes, we require 100MHz refclk, so set it
+ ddr_print("N%d: DRAM init: 100 MHz refclk is REQUIRED\n", node);
+ }
+ } // if (ddr_clock_hertz > 1000000000)
+ } // if (i == 0)
+ }
+
+ BDK_TRACE(DRAM, "N%d: DRAM init started (hertz=%d, refclk=%d, config=%p)\n",
+ node, ddr_clock_hertz, ddr_refclk_hertz, dram_config);
+ debug_print("N%d: DRAM init started (hertz=%d, refclk=%d, config=%p)\n",
+ node, ddr_clock_hertz, ddr_refclk_hertz, dram_config);
+
+ BDK_TRACE(DRAM, "N%d: Calling DRAM init\n", node);
+ measured_ddr_hertz[node] = 0;
+ int mbytes = octeon_ddr_initialize(node,
+ bdk_clock_get_rate(node, BDK_CLOCK_RCLK),
+ ddr_clock_hertz,
+ ddr_refclk_hertz,
+ interface_mask,
+ ddr_config,
+ &measured_ddr_hertz[node],
+ 0,
+ 0,
+ 0);
+ BDK_TRACE(DRAM, "N%d: DRAM init returned %d, measured %u Hz\n",
+ node, mbytes, measured_ddr_hertz[node]);
+
+ // do not tune or mess with memory if there was an init problem...
+ if (mbytes > 0) {
+
+ bdk_dram_disable_ecc_reporting(node);
+
+ // call the tuning routines, with filtering...
+ BDK_TRACE(DRAM, "N%d: Calling DRAM tuning\n", node);
+ errs = bdk_libdram_maybe_tune_node(node);
+ BDK_TRACE(DRAM, "N%d: DRAM tuning returned %d errors\n",
+ node, errs);
+
+ // finally, clear memory and any left-over ECC errors
+ bdk_dram_clear_mem(node);
+ bdk_dram_clear_ecc(node);
+
+ bdk_dram_enable_ecc_reporting(node);
+ }
+
+ /* Boards may need to mux the TWSI connection between THUNDERX and the BMC.
+ This allows the BMC to monitor DIMM temeratures and health */
+ if (gpio_select != -1)
+ bdk_gpio_initialize(bdk_numa_master(), gpio_select, 1, 0);
+
+ return mbytes;
+}
+
+/**
+ * This is the main DRAM tuning function. Users of libdram should call this function,
+ * avoiding the other internal function. As a rule, functions starting with
+ * "libdram_*" are part of the external API and should be used.
+ *
+ * @param node Node to tune. This may not be the same node as the one running the code
+ *
+ * @return Success or Fail
+ */
+int libdram_tune(int node)
+{
+ int tot_errs;
+ int l2c_is_locked = bdk_l2c_is_locked(node);
+
+ dram_verbosity = bdk_config_get_int(BDK_CONFIG_DRAM_VERBOSE);
+
+ // the only way this entry point should be called is from a MENU item,
+ // so, enable any non-running cores on this node, and leave them
+ // running at the end...
+ ddr_print("N%d: %s: Starting cores (mask was 0x%lx)\n",
+ node, __FUNCTION__, bdk_get_running_coremask(node));
+ bdk_init_cores(node, ~0ULL);
+
+ // must test for L2C locked here, cannot go on with it unlocked
+ // FIXME: but we only need to worry about Node 0???
+ if (node == 0) {
+ if (!l2c_is_locked) { // is unlocked, must lock it now
+ ddr_print("N%d: %s: L2C was unlocked - locking it now\n", node, __FUNCTION__);
+ // FIXME: this should be common-ized; it currently matches bdk_init()...
+ bdk_l2c_lock_mem_region(node, 0, bdk_l2c_get_cache_size_bytes(node) * 3 / 4);
+ } else {
+ ddr_print("N%d: %s: L2C was already locked - continuing\n", node, __FUNCTION__);
+ }
+ } else {
+ ddr_print("N%d: %s: non-zero node, not worrying about L2C lock status\n", node, __FUNCTION__);
+ }
+
+ // call the tuning routines, no filtering...
+ tot_errs = bdk_libdram_tune_node(node);
+
+ // FIXME: only for node 0, unlock L2C if it was unlocked before...
+ if (node == 0) {
+ if (!l2c_is_locked) { // it was Node 0 and unlocked, must re-unlock it now
+ ddr_print("N%d: Node 0 L2C was unlocked before - unlocking it now\n", node);
+ // FIXME: this should be common-ized; it currently matches bdk_init()...
+ bdk_l2c_unlock_mem_region(node, 0, bdk_l2c_get_cache_size_bytes(node) * 3 / 4);
+ } else {
+ ddr_print("N%d: %s: L2C was already locked - leaving it locked\n", node, __FUNCTION__);
+ }
+ } else {
+ ddr_print("N%d: %s: non-zero node, not worrying about L2C lock status\n", node, __FUNCTION__);
+ }
+
+ // make sure to clear memory and any ECC errs when done...
+ bdk_dram_clear_mem(node);
+ bdk_dram_clear_ecc(node);
+
+ return tot_errs;
+}
+
+/**
+ * This is the main function for DRAM margining of Write Voltage.
+ * Users of libdram should call this function,
+ * avoiding the other internal function. As a rule, functions starting with
+ * "libdram_*" are part of the external API and should be used.
+ *
+ * @param node Node to test. This may not be the same node as the one running the code
+ *
+ * @return Success or Fail
+ */
+static
+int libdram_margin_write_voltage(int node)
+{
+ int tot_errs;
+
+ // call the margining routine
+ tot_errs = perform_margin_write_voltage(node);
+
+ // make sure to clear memory and any ECC errs when done...
+ bdk_dram_clear_mem(node);
+ bdk_dram_clear_ecc(node);
+
+ return tot_errs;
+}
+
+/**
+ * This is the main function for DRAM margining of Read Voltage.
+ * Users of libdram should call this function,
+ * avoiding the other internal function. As a rule, functions starting with
+ * "libdram_*" are part of the external API and should be used.
+ *
+ * @param node Node to test. This may not be the same node as the one running the code
+ *
+ * @return Success or Fail
+ */
+static
+int libdram_margin_read_voltage(int node)
+{
+ int tot_errs;
+
+ // call the margining routine
+ tot_errs = perform_margin_read_voltage(node);
+
+ // make sure to clear memory and any ECC errs when done...
+ bdk_dram_clear_mem(node);
+ bdk_dram_clear_ecc(node);
+
+ return tot_errs;
+}
+
+/**
+ * This is the main function for DRAM margining of Write Timing.
+ * Users of libdram should call this function,
+ * avoiding the other internal function. As a rule, functions starting with
+ * "libdram_*" are part of the external API and should be used.
+ *
+ * @param node Node to test. This may not be the same node as the one running the code
+ *
+ * @return Success or Fail
+ */
+static
+int libdram_margin_write_timing(int node)
+{
+ int tot_errs;
+
+ // call the tuning routine, tell it we are margining not tuning...
+ tot_errs = perform_dll_offset_tuning(node, /* write offsets */1, /* margin */0);
+
+ // make sure to clear memory and any ECC errs when done...
+ bdk_dram_clear_mem(node);
+ bdk_dram_clear_ecc(node);
+
+ return tot_errs;
+}
+
+/**
+ * This is the main function for DRAM margining of Read Timing.
+ * Users of libdram should call this function,
+ * avoiding the other internal function. As a rule, functions starting with
+ * "libdram_*" are part of the external API and should be used.
+ *
+ * @param node Node to test. This may not be the same node as the one running the code
+ *
+ * @return Success or Fail
+ */
+static
+int libdram_margin_read_timing(int node)
+{
+ int tot_errs;
+
+ // call the tuning routine, tell it we are margining not tuning...
+ tot_errs = perform_dll_offset_tuning(node, /* read offsets */2, /* margin */0);
+
+ // make sure to clear memory and any ECC errs when done...
+ bdk_dram_clear_mem(node);
+ bdk_dram_clear_ecc(node);
+
+ return tot_errs;
+}
+
+/**
+ * This is the main function for all DRAM margining.
+ * Users of libdram should call this function,
+ * avoiding the other internal function. As a rule, functions starting with
+ * "libdram_*" are part of the external API and should be used.
+ *
+ * @param node Node to test. This may not be the same node as the one running the code
+ *
+ * @return Success or Fail
+ */
+int libdram_margin(int node)
+{
+ int ret_rt, ret_wt, ret_rv, ret_wv;
+ char *risk[2] = { "Low Risk", "Needs Review" };
+ int l2c_is_locked = bdk_l2c_is_locked(node);
+
+ // for now, no margining on 81xx, until we can reduce the dynamic runtime size...
+ if (CAVIUM_IS_MODEL(CAVIUM_CN81XX)) {
+ printf("Sorry, margining is not available on 81xx yet...\n");
+ return 0;
+ }
+
+ dram_verbosity = bdk_config_get_int(BDK_CONFIG_DRAM_VERBOSE);
+
+ // the only way this entry point should be called is from a MENU item,
+ // so, enable any non-running cores on this node, and leave them
+ // running at the end...
+ ddr_print("N%d: %s: Starting cores (mask was 0x%lx)\n",
+ node, __FUNCTION__, bdk_get_running_coremask(node));
+ bdk_init_cores(node, ~0ULL);
+
+ // must test for L2C locked here, cannot go on with it unlocked
+ // FIXME: but we only need to worry about Node 0???
+ if (node == 0) {
+ if (!l2c_is_locked) { // is unlocked, must lock it now
+ ddr_print("N%d: %s: L2C was unlocked - locking it now\n", node, __FUNCTION__);
+ // FIXME: this should be common-ized; it currently matches bdk_init()...
+ bdk_l2c_lock_mem_region(node, 0, bdk_l2c_get_cache_size_bytes(node) * 3 / 4);
+ } else {
+ ddr_print("N%d: %s: L2C was already locked - continuing\n", node, __FUNCTION__);
+ }
+ } else {
+ ddr_print("N%d: %s: non-zero node, not worrying about L2C lock status\n", node, __FUNCTION__);
+ }
+
+ debug_print("N%d: Starting DRAM Margin ALL\n", node);
+ ret_rt = libdram_margin_read_timing(node);
+ ret_wt = libdram_margin_write_timing(node);
+ ret_rv = libdram_margin_read_voltage(node);
+ ret_wv = libdram_margin_write_voltage(node);
+ debug_print("N%d: DRAM Margin ALL finished\n", node);
+
+ /*
+ >>> Summary from DDR Margining tool:
+ >>> N0: Read Timing Margin : Low Risk
+ >>> N0: Write Timing Margin : Low Risk
+ >>> N0: Read Voltage Margin : Low Risk
+ >>> N0: Write Voltage Margin : Low Risk
+ */
+ printf(" \n");
+ printf("-------------------------------------\n");
+ printf(" \n");
+ printf("Summary from DDR Margining tool\n");
+ printf("N%d: Read Timing Margin : %s\n", node, risk[!!ret_rt]);
+ printf("N%d: Write Timing Margin : %s\n", node, risk[!!ret_wt]);
+
+ // these may not have been done due to DDR3 and/or THUNDER pass 1.x
+ // FIXME? would it be better to print an appropriate message here?
+ if (ret_rv != -1) printf("N%d: Read Voltage Margin : %s\n", node, risk[!!ret_rv]);
+ if (ret_wv != -1) printf("N%d: Write Voltage Margin : %s\n", node, risk[!!ret_wv]);
+
+ printf(" \n");
+ printf("-------------------------------------\n");
+ printf(" \n");
+
+ // FIXME: only for node 0, unlock L2C if it was unlocked before...
+ if (node == 0) {
+ if (!l2c_is_locked) { // it was Node 0 and unlocked, must re-unlock it now
+ ddr_print("N%d: Node 0 L2C was unlocked before - unlocking it now\n", node);
+ // FIXME: this should be common-ized; it currently matches bdk_init()...
+ bdk_l2c_unlock_mem_region(node, 0, bdk_l2c_get_cache_size_bytes(node) * 3 / 4);
+ } else {
+ ddr_print("N%d: %s: L2C was already locked - leaving it locked\n", node, __FUNCTION__);
+ }
+ } else {
+ ddr_print("N%d: %s: non-zero node, not worrying about L2C lock status\n", node, __FUNCTION__);
+ }
+
+ return 0;
+}
+
+/**
+ * Get the measured DRAM frequency after a call to libdram_config
+ *
+ * @param node Node to get frequency for
+ *
+ * @return Frequency in Hz
+ */
+uint32_t libdram_get_freq(int node)
+{
+ return measured_ddr_hertz[node];
+}
+
+/**
+ * Get the measured DRAM frequency from the DDR_PLL_CTL CSR
+ *
+ * @param node Node to get frequency for
+ *
+ * @return Frequency in Hz
+ */
+uint32_t libdram_get_freq_from_pll(int node, int lmc)
+{
+ static const uint8_t _en[] = {1, 2, 3, 4, 5, 6, 7, 8, 10, 12};
+ BDK_CSR_INIT(c, node, BDK_LMCX_DDR_PLL_CTL(0));
+ // we check the alternate refclk select bit in LMC0 to indicate 100MHz use
+ // assumption: the alternate refclk is setup for 100MHz
+ uint64_t ddr_ref_hertz = (c.s.dclk_alt_refclk_sel) ? 100000000 : bdk_clock_get_rate(node, BDK_CLOCK_MAIN_REF);
+ uint64_t en = _en[c.cn83xx.ddr_ps_en];
+ uint64_t calculated_ddr_hertz = ddr_ref_hertz * (c.cn83xx.clkf + 1) / ((c.cn83xx.clkr + 1) * en);
+ return calculated_ddr_hertz;
+}
+
+#ifndef DRAM_CSR_WRITE_INLINE
+void dram_csr_write(bdk_node_t node, const char *csr_name, bdk_csr_type_t type, int busnum, int size, uint64_t address, uint64_t value)
+{
+ VB_PRT(VBL_CSRS, "N%d: DDR Config %s[%016lx] => %016lx\n", node, csr_name, address, value);
+ bdk_csr_write(node, type, busnum, size, address, value);
+}
+#endif