aboutsummaryrefslogtreecommitdiff
path: root/src/vendorcode/cavium/bdk/libdram
diff options
context:
space:
mode:
Diffstat (limited to 'src/vendorcode/cavium/bdk/libdram')
-rw-r--r--src/vendorcode/cavium/bdk/libdram/dram-csr.h86
-rw-r--r--src/vendorcode/cavium/bdk/libdram/dram-env.c83
-rw-r--r--src/vendorcode/cavium/bdk/libdram/dram-env.h48
-rw-r--r--src/vendorcode/cavium/bdk/libdram/dram-gpio.h46
-rw-r--r--src/vendorcode/cavium/bdk/libdram/dram-init-ddr3.c8535
-rw-r--r--src/vendorcode/cavium/bdk/libdram/dram-init-ddr3.h97
-rw-r--r--src/vendorcode/cavium/bdk/libdram/dram-internal.h201
-rw-r--r--src/vendorcode/cavium/bdk/libdram/dram-l2c.c69
-rw-r--r--src/vendorcode/cavium/bdk/libdram/dram-l2c.h45
-rw-r--r--src/vendorcode/cavium/bdk/libdram/dram-print.h86
-rw-r--r--src/vendorcode/cavium/bdk/libdram/dram-spd.c583
-rw-r--r--src/vendorcode/cavium/bdk/libdram/dram-spd.h166
-rw-r--r--src/vendorcode/cavium/bdk/libdram/dram-tune-ddr3.c2012
-rw-r--r--src/vendorcode/cavium/bdk/libdram/dram-util.h96
-rw-r--r--src/vendorcode/cavium/bdk/libdram/lib_octeon_shared.c2165
-rw-r--r--src/vendorcode/cavium/bdk/libdram/lib_octeon_shared.h124
-rw-r--r--src/vendorcode/cavium/bdk/libdram/libdram-config-load.c262
-rw-r--r--src/vendorcode/cavium/bdk/libdram/libdram.c718
18 files changed, 15422 insertions, 0 deletions
diff --git a/src/vendorcode/cavium/bdk/libdram/dram-csr.h b/src/vendorcode/cavium/bdk/libdram/dram-csr.h
new file mode 100644
index 0000000000..ffe1472a0b
--- /dev/null
+++ b/src/vendorcode/cavium/bdk/libdram/dram-csr.h
@@ -0,0 +1,86 @@
+/***********************license start***********************************
+* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights
+* reserved.
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are
+* met:
+*
+* * Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials provided
+* with the distribution.
+*
+* * Neither the name of Cavium Inc. nor the names of
+* its contributors may be used to endorse or promote products
+* derived from this software without specific prior written
+* permission.
+*
+* This Software, including technical data, may be subject to U.S. export
+* control laws, including the U.S. Export Administration Act and its
+* associated regulations, and may be subject to export or import
+* regulations in other countries.
+*
+* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
+* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR
+* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT
+* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY
+* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT
+* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES
+* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR
+* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT,
+* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK
+* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
+***********************license end**************************************/
+
+/**
+ * Functions and macros for libdram access to CSR. These build
+ * on the normal BDK functions to allow logging of CSRs based on
+ * the libdram verbosity level. Internal use only.
+ */
+
+/**
+ * Write a CSR, possibly logging it based on the verbosity
+ * level. You should use DRAM_CSR_WRITE() as a convientent
+ * wrapper.
+ *
+ * @param node
+ * @param csr_name
+ * @param type
+ * @param busnum
+ * @param size
+ * @param address
+ * @param value
+ */
+#ifdef DRAM_CSR_WRITE_INLINE
+static inline void dram_csr_write(bdk_node_t node, const char *csr_name, bdk_csr_type_t type, int busnum, int size, uint64_t address, uint64_t value) __attribute__((always_inline));
+static inline void dram_csr_write(bdk_node_t node, const char *csr_name, bdk_csr_type_t type, int busnum, int size, uint64_t address, uint64_t value)
+{
+ VB_PRT(VBL_CSRS, "N%d: DDR Config %s[%016lx] => %016lx\n", node, csr_name, address, value);
+ bdk_csr_write(node, type, busnum, size, address, value);
+}
+#else
+extern void dram_csr_write(bdk_node_t node, const char *csr_name, bdk_csr_type_t type, int busnum, int size, uint64_t address, uint64_t value);
+#endif
+
+/**
+ * Macro to write a CSR, logging if necessary
+ */
+#define DRAM_CSR_WRITE(node, csr, value) \
+ dram_csr_write(node, basename_##csr, bustype_##csr, busnum_##csr, sizeof(typedef_##csr), csr, value)
+
+/**
+ * Macro to make a read, modify, and write sequence easy. The "code_block"
+ * should be replaced with a C code block or a comma separated list of
+ * "name.s.field = value", without the quotes.
+ */
+#define DRAM_CSR_MODIFY(name, node, csr, code_block) do { \
+ typedef_##csr name = {.u = bdk_csr_read(node, bustype_##csr, busnum_##csr, sizeof(typedef_##csr), csr)}; \
+ code_block; \
+ dram_csr_write(node, basename_##csr, bustype_##csr, busnum_##csr, sizeof(typedef_##csr), csr, name.u); \
+ } while (0)
+
diff --git a/src/vendorcode/cavium/bdk/libdram/dram-env.c b/src/vendorcode/cavium/bdk/libdram/dram-env.c
new file mode 100644
index 0000000000..f25e6bdb26
--- /dev/null
+++ b/src/vendorcode/cavium/bdk/libdram/dram-env.c
@@ -0,0 +1,83 @@
+/***********************license start***********************************
+* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights
+* reserved.
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are
+* met:
+*
+* * Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials provided
+* with the distribution.
+*
+* * Neither the name of Cavium Inc. nor the names of
+* its contributors may be used to endorse or promote products
+* derived from this software without specific prior written
+* permission.
+*
+* This Software, including technical data, may be subject to U.S. export
+* control laws, including the U.S. Export Administration Act and its
+* associated regulations, and may be subject to export or import
+* regulations in other countries.
+*
+* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
+* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR
+* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT
+* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY
+* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT
+* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES
+* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR
+* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT,
+* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK
+* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
+***********************license end**************************************/
+#include <bdk.h>
+#include "dram-internal.h"
+
+const char* lookup_env_parameter(const char *format, ...)
+{
+ const char *s;
+ unsigned long value;
+ va_list args;
+ char buffer[64];
+
+ va_start(args, format);
+ vsnprintf(buffer, sizeof(buffer)-1, format, args);
+ buffer[sizeof(buffer)-1] = '\0';
+ va_end(args);
+
+ if ((s = getenv(buffer)) != NULL)
+ {
+ value = strtoul(s, NULL, 0);
+ error_print("Parameter found in environment: %s = \"%s\" 0x%lx (%ld)\n",
+ buffer, s, value, value);
+ }
+ return s;
+}
+
+const char* lookup_env_parameter_ull(const char *format, ...)
+{
+ const char *s;
+ unsigned long long value;
+ va_list args;
+ char buffer[64];
+
+ va_start(args, format);
+ vsnprintf(buffer, sizeof(buffer)-1, format, args);
+ buffer[sizeof(buffer)-1] = '\0';
+ va_end(args);
+
+ if ((s = getenv(buffer)) != NULL)
+ {
+ value = strtoull(s, NULL, 0);
+ error_print("Parameter found in environment: %s = 0x%016llx\n",
+ buffer, value);
+ }
+ return s;
+}
+
diff --git a/src/vendorcode/cavium/bdk/libdram/dram-env.h b/src/vendorcode/cavium/bdk/libdram/dram-env.h
new file mode 100644
index 0000000000..0f100e1b25
--- /dev/null
+++ b/src/vendorcode/cavium/bdk/libdram/dram-env.h
@@ -0,0 +1,48 @@
+/***********************license start***********************************
+* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights
+* reserved.
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are
+* met:
+*
+* * Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials provided
+* with the distribution.
+*
+* * Neither the name of Cavium Inc. nor the names of
+* its contributors may be used to endorse or promote products
+* derived from this software without specific prior written
+* permission.
+*
+* This Software, including technical data, may be subject to U.S. export
+* control laws, including the U.S. Export Administration Act and its
+* associated regulations, and may be subject to export or import
+* regulations in other countries.
+*
+* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
+* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR
+* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT
+* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY
+* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT
+* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES
+* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR
+* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT,
+* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK
+* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
+***********************license end**************************************/
+
+/**
+ * Functions for access the environment for DRAM tweaking.
+ * Intenral use only.
+ */
+
+
+extern const char *lookup_env_parameter(const char *format, ...) __attribute__ ((format(printf, 1, 2)));
+extern const char *lookup_env_parameter_ull(const char *format, ...) __attribute__ ((format(printf, 1, 2)));
+
diff --git a/src/vendorcode/cavium/bdk/libdram/dram-gpio.h b/src/vendorcode/cavium/bdk/libdram/dram-gpio.h
new file mode 100644
index 0000000000..62c9a5c190
--- /dev/null
+++ b/src/vendorcode/cavium/bdk/libdram/dram-gpio.h
@@ -0,0 +1,46 @@
+/***********************license start***********************************
+* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights
+* reserved.
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are
+* met:
+*
+* * Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials provided
+* with the distribution.
+*
+* * Neither the name of Cavium Inc. nor the names of
+* its contributors may be used to endorse or promote products
+* derived from this software without specific prior written
+* permission.
+*
+* This Software, including technical data, may be subject to U.S. export
+* control laws, including the U.S. Export Administration Act and its
+* associated regulations, and may be subject to export or import
+* regulations in other countries.
+*
+* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
+* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR
+* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT
+* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY
+* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT
+* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES
+* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR
+* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT,
+* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK
+* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
+***********************license end**************************************/
+
+/**
+ * Functions for reporting DRAM init status through GPIOs.
+ * Useful for triggering scopes and such. Internal use only.
+ */
+
+extern void pulse_gpio_pin(bdk_node_t node, int pin, int usecs);
+
diff --git a/src/vendorcode/cavium/bdk/libdram/dram-init-ddr3.c b/src/vendorcode/cavium/bdk/libdram/dram-init-ddr3.c
new file mode 100644
index 0000000000..edb42312f1
--- /dev/null
+++ b/src/vendorcode/cavium/bdk/libdram/dram-init-ddr3.c
@@ -0,0 +1,8535 @@
+/***********************license start***********************************
+* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights
+* reserved.
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are
+* met:
+*
+* * Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials provided
+* with the distribution.
+*
+* * Neither the name of Cavium Inc. nor the names of
+* its contributors may be used to endorse or promote products
+* derived from this software without specific prior written
+* permission.
+*
+* This Software, including technical data, may be subject to U.S. export
+* control laws, including the U.S. Export Administration Act and its
+* associated regulations, and may be subject to export or import
+* regulations in other countries.
+*
+* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
+* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR
+* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT
+* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY
+* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT
+* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES
+* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR
+* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT,
+* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK
+* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
+***********************license end**************************************/
+#include <bdk.h>
+#include "libbdk-arch/bdk-csrs-l2c_tad.h"
+#include "libbdk-arch/bdk-csrs-mio_fus.h"
+#include "dram-internal.h"
+
+#define WODT_MASK_2R_1S 1 // FIXME: did not seem to make much difference with #152 1-slot?
+
+#define DESKEW_RODT_CTL 1
+
+// Set to 1 to use the feature whenever possible automatically.
+// When 0, however, the feature is still available, and it can
+// be enabled via envvar override "ddr_enable_write_deskew=1".
+#define ENABLE_WRITE_DESKEW_DEFAULT 0
+
+#define ENABLE_COMPUTED_VREF_ADJUSTMENT 1
+
+#define RLEXTRAS_PATCH 1 // write to unused RL rank entries
+#define WLEXTRAS_PATCH 1 // write to unused WL rank entries
+#define ADD_48_OHM_SKIP 1
+#define NOSKIP_40_48_OHM 1
+#define NOSKIP_48_STACKED 1
+#define NOSKIP_FOR_MINI 1
+#define NOSKIP_FOR_2S_1R 1
+#define MAJORITY_OVER_AVG 1
+#define RANK_MAJORITY MAJORITY_OVER_AVG && 1
+#define SW_WL_CHECK_PATCH 1 // check validity after SW adjust
+#define HW_WL_MAJORITY 1
+#define SWL_TRY_HWL_ALT HW_WL_MAJORITY && 1 // try HW WL base alternate if available when SW WL fails
+#define DISABLE_SW_WL_PASS_2 1
+
+#define HWL_BY_BYTE 0 // FIXME? set to 1 to do HWL a byte at a time (seemed to work better earlier?)
+
+#define USE_ORIG_TEST_DRAM_BYTE 1
+
+// collect and print LMC utilization using SWL software algorithm
+#define ENABLE_SW_WLEVEL_UTILIZATION 0
+
+#define COUNT_RL_CANDIDATES 1
+
+#define LOOK_FOR_STUCK_BYTE 0
+#define ENABLE_STUCK_BYTE_RESET 0
+
+#define FAILSAFE_CHECK 1
+
+#define PERFECT_BITMASK_COUNTING 1
+
+#define DAC_OVERRIDE_EARLY 1
+
+#define SWL_WITH_HW_ALTS_CHOOSE_SW 0 // FIXME: allow override?
+
+#define DEBUG_VALIDATE_BITMASK 0
+#if DEBUG_VALIDATE_BITMASK
+#define debug_bitmask_print ddr_print
+#else
+#define debug_bitmask_print(...)
+#endif
+
+#define ENABLE_SLOT_CTL_ACCESS 0
+#undef ENABLE_CUSTOM_RLEVEL_TABLE
+
+#define ENABLE_DISPLAY_MPR_PAGE 0
+#if ENABLE_DISPLAY_MPR_PAGE
+static void Display_MPR_Page_Location(bdk_node_t node, int rank,
+ int ddr_interface_num, int dimm_count,
+ int page, int location, uint64_t *mpr_data);
+#endif
+
+#define USE_L2_WAYS_LIMIT 1
+
+/* Read out Deskew Settings for DDR */
+
+typedef struct {
+ uint16_t bits[8];
+} deskew_bytes_t;
+typedef struct {
+ deskew_bytes_t bytes[9];
+} deskew_data_t;
+
+static void
+Get_Deskew_Settings(bdk_node_t node, int ddr_interface_num, deskew_data_t *dskdat)
+{
+ bdk_lmcx_phy_ctl_t phy_ctl;
+ bdk_lmcx_config_t lmc_config;
+ int bit_num, bit_index;
+ int byte_lane, byte_limit;
+ // NOTE: these are for pass 2.x
+ int is_t88p2 = !CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X); // added 81xx and 83xx
+ int bit_end = (is_t88p2) ? 9 : 8;
+
+ lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(ddr_interface_num));
+ byte_limit = ((lmc_config.s.mode32b) ? 4 : 8) + lmc_config.s.ecc_ena;
+
+ memset(dskdat, 0, sizeof(*dskdat));
+
+ BDK_CSR_MODIFY(phy_ctl, node, BDK_LMCX_PHY_CTL(ddr_interface_num),
+ phy_ctl.s.dsk_dbg_clk_scaler = 3);
+
+ for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) {
+ bit_index = 0;
+ for (bit_num = 0; bit_num <= bit_end; ++bit_num) { // NOTE: this is for pass 2.x
+
+ if (bit_num == 4) continue;
+ if ((bit_num == 5) && is_t88p2) continue; // NOTE: this is for pass 2.x
+
+ // set byte lane and bit to read
+ BDK_CSR_MODIFY(phy_ctl, node, BDK_LMCX_PHY_CTL(ddr_interface_num),
+ (phy_ctl.s.dsk_dbg_bit_sel = bit_num,
+ phy_ctl.s.dsk_dbg_byte_sel = byte_lane));
+
+ // start read sequence
+ BDK_CSR_MODIFY(phy_ctl, node, BDK_LMCX_PHY_CTL(ddr_interface_num),
+ phy_ctl.s.dsk_dbg_rd_start = 1);
+
+ // poll for read sequence to complete
+ do {
+ phy_ctl.u = BDK_CSR_READ(node, BDK_LMCX_PHY_CTL(ddr_interface_num));
+ } while (phy_ctl.s.dsk_dbg_rd_complete != 1);
+
+ // record the data
+ dskdat->bytes[byte_lane].bits[bit_index] = phy_ctl.s.dsk_dbg_rd_data & 0x3ff;
+ bit_index++;
+
+ } /* for (bit_num = 0; bit_num <= bit_end; ++bit_num) */
+ } /* for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) */
+
+ return;
+}
+
+static void
+Display_Deskew_Data(bdk_node_t node, int ddr_interface_num,
+ deskew_data_t *dskdat, int print_enable)
+{
+ int byte_lane;
+ int bit_num;
+ uint16_t flags, deskew;
+ bdk_lmcx_config_t lmc_config;
+ int byte_limit;
+ const char *fc = " ?-=+*#&";
+
+ lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(ddr_interface_num));
+ byte_limit = ((lmc_config.s.mode32b) ? 4 : 8) + lmc_config.s.ecc_ena;
+
+ if (print_enable) {
+ VB_PRT(print_enable, "N%d.LMC%d: Deskew Data: Bit => :",
+ node, ddr_interface_num);
+ for (bit_num = 7; bit_num >= 0; --bit_num)
+ VB_PRT(print_enable, " %3d ", bit_num);
+ VB_PRT(print_enable, "\n");
+ }
+
+ for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) {
+ if (print_enable)
+ VB_PRT(print_enable, "N%d.LMC%d: Bit Deskew Byte %d %s :",
+ node, ddr_interface_num, byte_lane,
+ (print_enable >= VBL_TME) ? "FINAL" : " ");
+
+ for (bit_num = 7; bit_num >= 0; --bit_num) {
+
+ flags = dskdat->bytes[byte_lane].bits[bit_num] & 7;
+ deskew = dskdat->bytes[byte_lane].bits[bit_num] >> 3;
+
+ if (print_enable)
+ VB_PRT(print_enable, " %3d %c", deskew, fc[flags^1]);
+
+ } /* for (bit_num = 7; bit_num >= 0; --bit_num) */
+
+ if (print_enable)
+ VB_PRT(print_enable, "\n");
+
+ } /* for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) */
+
+ return;
+}
+
+static int
+change_wr_deskew_ena(bdk_node_t node, int ddr_interface_num, int new_state)
+{
+ bdk_lmcx_dll_ctl3_t ddr_dll_ctl3;
+ int saved_wr_deskew_ena;
+
+ // return original WR_DESKEW_ENA setting
+ ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num));
+ saved_wr_deskew_ena = !!GET_DDR_DLL_CTL3(wr_deskew_ena);
+ if (saved_wr_deskew_ena != !!new_state) { // write it only when changing it
+ SET_DDR_DLL_CTL3(wr_deskew_ena, !!new_state);
+ DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num), ddr_dll_ctl3.u);
+ }
+ return saved_wr_deskew_ena;
+}
+
+typedef struct {
+ int saturated; // number saturated
+ int unlocked; // number unlocked
+ int nibrng_errs; // nibble range errors
+ int nibunl_errs; // nibble unlocked errors
+ //int nibsat_errs; // nibble saturation errors
+ int bitval_errs; // bit value errors
+#if LOOK_FOR_STUCK_BYTE
+ int bytes_stuck; // byte(s) stuck
+#endif
+} deskew_counts_t;
+
+#define MIN_BITVAL 17
+#define MAX_BITVAL 110
+
+static deskew_counts_t deskew_training_results;
+static int deskew_validation_delay = 10000; // FIXME: make this a var for overriding
+
+static void
+Validate_Read_Deskew_Training(bdk_node_t node, int rank_mask, int ddr_interface_num,
+ deskew_counts_t *counts, int print_enable)
+{
+ int byte_lane, bit_num, nib_num;
+ int nibrng_errs, nibunl_errs, bitval_errs;
+ //int nibsat_errs;
+ bdk_lmcx_config_t lmc_config;
+ int16_t nib_min[2], nib_max[2], nib_unl[2]/*, nib_sat[2]*/;
+ // NOTE: these are for pass 2.x
+ int is_t88p2 = !CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X); // added 81xx and 83xx
+ int bit_start = (is_t88p2) ? 9 : 8;
+ int byte_limit;
+#if LOOK_FOR_STUCK_BYTE
+ uint64_t bl_mask[2]; // enough for 128 values
+ int bit_values;
+#endif
+ deskew_data_t dskdat;
+ int bit_index;
+ int16_t flags, deskew;
+ const char *fc = " ?-=+*#&";
+ int saved_wr_deskew_ena;
+ int bit_last;
+
+ // save original WR_DESKEW_ENA setting, and disable it for read deskew
+ saved_wr_deskew_ena = change_wr_deskew_ena(node, ddr_interface_num, 0);
+
+ lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(ddr_interface_num));
+ byte_limit = ((!lmc_config.s.mode32b) ? 8 : 4) + lmc_config.s.ecc_ena;
+
+ memset(counts, 0, sizeof(deskew_counts_t));
+
+ Get_Deskew_Settings(node, ddr_interface_num, &dskdat);
+
+ if (print_enable) {
+ VB_PRT(print_enable, "N%d.LMC%d: Deskew Settings: Bit => :",
+ node, ddr_interface_num);
+ for (bit_num = 7; bit_num >= 0; --bit_num)
+ VB_PRT(print_enable, " %3d ", bit_num);
+ VB_PRT(print_enable, "\n");
+ }
+
+ for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) {
+ if (print_enable)
+ VB_PRT(print_enable, "N%d.LMC%d: Bit Deskew Byte %d %s :",
+ node, ddr_interface_num, byte_lane,
+ (print_enable >= VBL_TME) ? "FINAL" : " ");
+
+ nib_min[0] = 127; nib_min[1] = 127;
+ nib_max[0] = 0; nib_max[1] = 0;
+ nib_unl[0] = 0; nib_unl[1] = 0;
+ //nib_sat[0] = 0; nib_sat[1] = 0;
+
+#if LOOK_FOR_STUCK_BYTE
+ bl_mask[0] = bl_mask[1] = 0;
+#endif
+
+ if ((lmc_config.s.mode32b == 1) && (byte_lane == 4)) {
+ bit_index = 3;
+ bit_last = 3;
+ if (print_enable)
+ VB_PRT(print_enable, " ");
+ } else {
+ bit_index = 7;
+ bit_last = bit_start;
+ }
+
+ for (bit_num = bit_last; bit_num >= 0; --bit_num) { // NOTE: this is for pass 2.x
+ if (bit_num == 4) continue;
+ if ((bit_num == 5) && is_t88p2) continue; // NOTE: this is for pass 2.x
+
+ nib_num = (bit_num > 4) ? 1 : 0;
+
+ flags = dskdat.bytes[byte_lane].bits[bit_index] & 7;
+ deskew = dskdat.bytes[byte_lane].bits[bit_index] >> 3;
+ bit_index--;
+
+ counts->saturated += !!(flags & 6);
+ counts->unlocked += !(flags & 1);
+
+ nib_unl[nib_num] += !(flags & 1);
+ //nib_sat[nib_num] += !!(flags & 6);
+
+ if (flags & 1) { // FIXME? only do range when locked
+ nib_min[nib_num] = min(nib_min[nib_num], deskew);
+ nib_max[nib_num] = max(nib_max[nib_num], deskew);
+ }
+
+#if LOOK_FOR_STUCK_BYTE
+ bl_mask[(deskew >> 6) & 1] |= 1UL << (deskew & 0x3f);
+#endif
+
+ if (print_enable)
+ VB_PRT(print_enable, " %3d %c", deskew, fc[flags^1]);
+
+ } /* for (bit_num = bit_last; bit_num >= 0; --bit_num) */
+
+ /*
+ Now look for nibble errors:
+
+ For bit 55, it looks like a bit deskew problem. When the upper nibble of byte 6
+ needs to go to saturation, bit 7 of byte 6 locks prematurely at 64.
+ For DIMMs with raw card A and B, can we reset the deskew training when we encounter this case?
+ The reset criteria should be looking at one nibble at a time for raw card A and B;
+ if the bit-deskew setting within a nibble is different by > 33, we'll issue a reset
+ to the bit deskew training.
+
+ LMC0 Bit Deskew Byte(6): 64 0 - 0 - 0 - 26 61 35 64
+ */
+ // upper nibble range, then lower nibble range
+ nibrng_errs = ((nib_max[1] - nib_min[1]) > 33) ? 1 : 0;
+ nibrng_errs |= ((nib_max[0] - nib_min[0]) > 33) ? 1 : 0;
+
+ // check for nibble all unlocked
+ nibunl_errs = ((nib_unl[0] == 4) || (nib_unl[1] == 4)) ? 1 : 0;
+
+ // check for nibble all saturated
+ //nibsat_errs = ((nib_sat[0] == 4) || (nib_sat[1] == 4)) ? 1 : 0;
+
+ // check for bit value errors, ie < 17 or > 110
+ // FIXME? assume max always > MIN_BITVAL and min < MAX_BITVAL
+ bitval_errs = ((nib_max[1] > MAX_BITVAL) || (nib_max[0] > MAX_BITVAL)) ? 1 : 0;
+ bitval_errs |= ((nib_min[1] < MIN_BITVAL) || (nib_min[0] < MIN_BITVAL)) ? 1 : 0;
+
+ if (((nibrng_errs != 0) || (nibunl_errs != 0) /*|| (nibsat_errs != 0)*/ || (bitval_errs != 0))
+ && print_enable)
+ {
+ VB_PRT(print_enable, " %c%c%c%c",
+ (nibrng_errs)?'R':' ',
+ (nibunl_errs)?'U':' ',
+ (bitval_errs)?'V':' ',
+ /*(nibsat_errs)?'S':*/' ');
+ }
+
+#if LOOK_FOR_STUCK_BYTE
+ bit_values = __builtin_popcountl(bl_mask[0]) + __builtin_popcountl(bl_mask[1]);
+ if (bit_values < 3) {
+ counts->bytes_stuck |= (1 << byte_lane);
+ if (print_enable)
+ VB_PRT(print_enable, "X");
+ }
+#endif
+ if (print_enable)
+ VB_PRT(print_enable, "\n");
+
+ counts->nibrng_errs |= (nibrng_errs << byte_lane);
+ counts->nibunl_errs |= (nibunl_errs << byte_lane);
+ //counts->nibsat_errs |= (nibsat_errs << byte_lane);
+ counts->bitval_errs |= (bitval_errs << byte_lane);
+
+#if LOOK_FOR_STUCK_BYTE
+ // just for completeness, allow print of the stuck values bitmask after the bytelane print
+ if ((bit_values < 3) && print_enable) {
+ VB_PRT(VBL_DEV, "N%d.LMC%d: Deskew byte %d STUCK on value 0x%016lx.%016lx\n",
+ node, ddr_interface_num, byte_lane,
+ bl_mask[1], bl_mask[0]);
+ }
+#endif
+
+ } /* for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) */
+
+ // restore original WR_DESKEW_ENA setting
+ change_wr_deskew_ena(node, ddr_interface_num, saved_wr_deskew_ena);
+
+ return;
+}
+
+unsigned short load_dac_override(int node, int ddr_interface_num,
+ int dac_value, int byte)
+{
+ bdk_lmcx_dll_ctl3_t ddr_dll_ctl3;
+ int bytex = (byte == 0x0A) ? byte : byte + 1; // single bytelanes incr by 1; A is for ALL
+
+ ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num));
+
+ SET_DDR_DLL_CTL3(byte_sel, bytex);
+ SET_DDR_DLL_CTL3(offset, dac_value >> 1); // only 7-bit field, use MS bits
+
+ ddr_dll_ctl3.s.bit_select = 0x9; /* No-op */
+ DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num), ddr_dll_ctl3.u);
+
+ ddr_dll_ctl3.s.bit_select = 0xC; /* Vref bypass setting load */
+ DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num), ddr_dll_ctl3.u);
+
+ ddr_dll_ctl3.s.bit_select = 0xD; /* Vref bypass on. */
+ DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num), ddr_dll_ctl3.u);
+
+ ddr_dll_ctl3.s.bit_select = 0x9; /* No-op */
+ DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num), ddr_dll_ctl3.u);
+
+ return ((unsigned short) GET_DDR_DLL_CTL3(offset));
+}
+
+// arg dac_or_dbi is 1 for DAC, 0 for DBI
+// returns 9 entries (bytelanes 0 through 8) in settings[]
+// returns 0 if OK, -1 if a problem
+int read_DAC_DBI_settings(int node, int ddr_interface_num,
+ int dac_or_dbi, int *settings)
+{
+ bdk_lmcx_phy_ctl_t phy_ctl;
+ int byte_lane, bit_num;
+ int deskew;
+ int dac_value;
+ int is_t88p2 = !CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X); // added 81xx and 83xx
+
+ phy_ctl.u = BDK_CSR_READ(node, BDK_LMCX_PHY_CTL(ddr_interface_num));
+ phy_ctl.s.dsk_dbg_clk_scaler = 3;
+ DRAM_CSR_WRITE(node, BDK_LMCX_PHY_CTL(ddr_interface_num), phy_ctl.u);
+
+ bit_num = (dac_or_dbi) ? 4 : 5;
+ if ((bit_num == 5) && !is_t88p2) { // NOTE: this is for pass 1.x
+ return -1;
+ }
+
+ for (byte_lane = 8; byte_lane >= 0 ; --byte_lane) { // FIXME: always assume ECC is available
+
+ //set byte lane and bit to read
+ phy_ctl.s.dsk_dbg_bit_sel = bit_num;
+ phy_ctl.s.dsk_dbg_byte_sel = byte_lane;
+ DRAM_CSR_WRITE(node, BDK_LMCX_PHY_CTL(ddr_interface_num), phy_ctl.u);
+
+ //start read sequence
+ phy_ctl.u = BDK_CSR_READ(node, BDK_LMCX_PHY_CTL(ddr_interface_num));
+ phy_ctl.s.dsk_dbg_rd_start = 1;
+ DRAM_CSR_WRITE(node, BDK_LMCX_PHY_CTL(ddr_interface_num), phy_ctl.u);
+
+ //poll for read sequence to complete
+ do {
+ phy_ctl.u = BDK_CSR_READ(node, BDK_LMCX_PHY_CTL(ddr_interface_num));
+ } while (phy_ctl.s.dsk_dbg_rd_complete != 1);
+
+ deskew = phy_ctl.s.dsk_dbg_rd_data /*>> 3*/; // leave the flag bits for DBI
+ dac_value = phy_ctl.s.dsk_dbg_rd_data & 0xff;
+
+ settings[byte_lane] = (dac_or_dbi) ? dac_value : deskew;
+
+ } /* for (byte_lane = 8; byte_lane >= 0 ; --byte_lane) { */
+
+ return 0;
+}
+
+// print out the DBI settings array
+// arg dac_or_dbi is 1 for DAC, 0 for DBI
+void
+display_DAC_DBI_settings(int node, int lmc, int dac_or_dbi,
+ int ecc_ena, int *settings, char *title)
+{
+ int byte;
+ int flags;
+ int deskew;
+ const char *fc = " ?-=+*#&";
+
+ ddr_print("N%d.LMC%d: %s %s Deskew Settings %d:0 :",
+ node, lmc, title, (dac_or_dbi)?"DAC":"DBI", 7+ecc_ena);
+ for (byte = (7+ecc_ena); byte >= 0; --byte) { // FIXME: what about 32-bit mode?
+ if (dac_or_dbi) { // DAC
+ flags = 1; // say its locked to get blank
+ deskew = settings[byte] & 0xff;
+ } else { // DBI
+ flags = settings[byte] & 7;
+ deskew = (settings[byte] >> 3) & 0x7f;
+ }
+ ddr_print(" %3d %c", deskew, fc[flags^1]);
+ }
+ ddr_print("\n");
+}
+
+// Evaluate the DAC settings array
+static int
+evaluate_DAC_settings(int ddr_interface_64b, int ecc_ena, int *settings)
+{
+ int byte, dac;
+ int last = (ddr_interface_64b) ? 7 : 3;
+
+ // this looks only for DAC values that are EVEN
+ for (byte = (last+ecc_ena); byte >= 0; --byte) {
+ dac = settings[byte] & 0xff;
+ if ((dac & 1) == 0)
+ return 1;
+ }
+ return 0;
+}
+
+static void
+Perform_Offset_Training(bdk_node_t node, int rank_mask, int ddr_interface_num)
+{
+ bdk_lmcx_phy_ctl_t lmc_phy_ctl;
+ uint64_t orig_phy_ctl;
+ const char *s;
+
+ /*
+ * 6.9.8 LMC Offset Training
+ *
+ * LMC requires input-receiver offset training.
+ *
+ * 1. Write LMC(0)_PHY_CTL[DAC_ON] = 1
+ */
+ lmc_phy_ctl.u = BDK_CSR_READ(node, BDK_LMCX_PHY_CTL(ddr_interface_num));
+ orig_phy_ctl = lmc_phy_ctl.u;
+ lmc_phy_ctl.s.dac_on = 1;
+
+ // allow full CSR override
+ if ((s = lookup_env_parameter_ull("ddr_phy_ctl")) != NULL) {
+ lmc_phy_ctl.u = strtoull(s, NULL, 0);
+ }
+
+ // do not print or write if CSR does not change...
+ if (lmc_phy_ctl.u != orig_phy_ctl) {
+ ddr_print("PHY_CTL : 0x%016lx\n", lmc_phy_ctl.u);
+ DRAM_CSR_WRITE(node, BDK_LMCX_PHY_CTL(ddr_interface_num), lmc_phy_ctl.u);
+ }
+
+#if 0
+ // FIXME? do we really need to show RODT here?
+ bdk_lmcx_comp_ctl2_t lmc_comp_ctl2;
+ lmc_comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num));
+ ddr_print("Read ODT_CTL : 0x%x (%d ohms)\n",
+ lmc_comp_ctl2.s.rodt_ctl, imp_values->rodt_ohms[lmc_comp_ctl2.s.rodt_ctl]);
+#endif
+
+ /*
+ * 2. Write LMC(0)_SEQ_CTL[SEQ_SEL] = 0x0B and
+ * LMC(0)_SEQ_CTL[INIT_START] = 1.
+ *
+ * 3. Wait for LMC(0)_SEQ_CTL[SEQ_COMPLETE] to be set to 1.
+ */
+ perform_octeon3_ddr3_sequence(node, rank_mask, ddr_interface_num, 0x0B); /* Offset training sequence */
+
+}
+
+static void
+Perform_Internal_VREF_Training(bdk_node_t node, int rank_mask, int ddr_interface_num)
+{
+ bdk_lmcx_ext_config_t ext_config;
+
+ /*
+ * 6.9.9 LMC Internal Vref Training
+ *
+ * LMC requires input-reference-voltage training.
+ *
+ * 1. Write LMC(0)_EXT_CONFIG[VREFINT_SEQ_DESKEW] = 0.
+ */
+ ext_config.u = BDK_CSR_READ(node, BDK_LMCX_EXT_CONFIG(ddr_interface_num));
+ ext_config.s.vrefint_seq_deskew = 0;
+
+ VB_PRT(VBL_SEQ, "N%d.LMC%d: Performing LMC sequence: vrefint_seq_deskew = %d\n",
+ node, ddr_interface_num, ext_config.s.vrefint_seq_deskew);
+
+ DRAM_CSR_WRITE(node, BDK_LMCX_EXT_CONFIG(ddr_interface_num), ext_config.u);
+
+ /*
+ * 2. Write LMC(0)_SEQ_CTL[SEQ_SEL] = 0x0a and
+ * LMC(0)_SEQ_CTL[INIT_START] = 1.
+ *
+ * 3. Wait for LMC(0)_SEQ_CTL[SEQ_COMPLETE] to be set to 1.
+ */
+ perform_octeon3_ddr3_sequence(node, rank_mask, ddr_interface_num, 0x0A); /* LMC Internal Vref Training */
+}
+
+#define dbg_avg(format, ...) VB_PRT(VBL_DEV, format, ##__VA_ARGS__)
+static int
+process_samples_average(int16_t *bytes, int num_samples, int lmc, int lane_no)
+{
+ int i, savg, sadj, sum = 0, rng, ret, asum, trunc;
+ int16_t smin = 32767, smax = -32768;
+
+ dbg_avg("DBG_AVG%d.%d: ", lmc, lane_no);
+
+ for (i = 0; i < num_samples; i++) {
+ sum += bytes[i];
+ if (bytes[i] < smin) smin = bytes[i];
+ if (bytes[i] > smax) smax = bytes[i];
+ dbg_avg(" %3d", bytes[i]);
+ }
+ rng = smax - smin + 1;
+
+ dbg_avg(" (%3d, %3d, %2d)", smin, smax, rng);
+
+ asum = sum - smin - smax;
+
+ savg = divide_nint(sum * 10, num_samples);
+
+ sadj = divide_nint(asum * 10, (num_samples - 2));
+
+ trunc = asum / (num_samples - 2);
+
+ dbg_avg(" [%3d.%d, %3d.%d, %3d]", savg/10, savg%10, sadj/10, sadj%10, trunc);
+
+ sadj = divide_nint(sadj, 10);
+ if (trunc & 1)
+ ret = trunc;
+ else if (sadj & 1)
+ ret = sadj;
+ else
+ ret = trunc + 1;
+
+ dbg_avg(" -> %3d\n", ret);
+
+ return ret;
+}
+
+
+#define DEFAULT_SAT_RETRY_LIMIT 11 // 1 + 10 retries
+static int default_lock_retry_limit = 20; // 20 retries // FIXME: make a var for overriding
+
+static int
+Perform_Read_Deskew_Training(bdk_node_t node, int rank_mask, int ddr_interface_num,
+ int spd_rawcard_AorB, int print_flags, int ddr_interface_64b)
+{
+ int unsaturated, locked;
+ //int nibble_sat;
+ int sat_retries, lock_retries, lock_retries_total, lock_retries_limit;
+ int print_first;
+ int print_them_all;
+ deskew_counts_t dsk_counts;
+ uint64_t saved_wr_deskew_ena;
+#if DESKEW_RODT_CTL
+ bdk_lmcx_comp_ctl2_t comp_ctl2;
+ int save_deskew_rodt_ctl = -1;
+#endif
+ int is_t88p2 = !CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X); // added 81xx and 83xx
+
+ VB_PRT(VBL_FAE, "N%d.LMC%d: Performing Read Deskew Training.\n", node, ddr_interface_num);
+
+ // save original WR_DESKEW_ENA setting, and disable it for read deskew
+ saved_wr_deskew_ena = change_wr_deskew_ena(node, ddr_interface_num, 0);
+
+ sat_retries = 0;
+ lock_retries_total = 0;
+ unsaturated = 0;
+ print_first = VBL_FAE; // print the first one, FAE and above
+ print_them_all = dram_is_verbose(VBL_DEV4); // set to true for printing all normal deskew attempts
+
+ int loops, normal_loops = 1; // default to 1 NORMAL deskew training op...
+ const char *s;
+ if ((s = getenv("ddr_deskew_normal_loops")) != NULL) {
+ normal_loops = strtoul(s, NULL, 0);
+ }
+
+#if LOOK_FOR_STUCK_BYTE
+ // provide override for STUCK BYTE RESETS
+ int do_stuck_reset = ENABLE_STUCK_BYTE_RESET;
+ if ((s = getenv("ddr_enable_stuck_byte_reset")) != NULL) {
+ do_stuck_reset = !!strtoul(s, NULL, 0);
+ }
+#endif
+
+#if DESKEW_RODT_CTL
+ if ((s = getenv("ddr_deskew_rodt_ctl")) != NULL) {
+ int deskew_rodt_ctl = strtoul(s, NULL, 0);
+ comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num));
+ save_deskew_rodt_ctl = comp_ctl2.s.rodt_ctl;
+ comp_ctl2.s.rodt_ctl = deskew_rodt_ctl;
+ DRAM_CSR_WRITE(node, BDK_LMCX_COMP_CTL2(ddr_interface_num), comp_ctl2.u);
+ }
+#endif
+
+ lock_retries_limit = default_lock_retry_limit;
+ if (! CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X)) // added 81xx and 83xx
+ lock_retries_limit *= 2; // give pass 2.0 twice as many
+
+ do { /* while (sat_retries < sat_retry_limit) */
+
+ /*
+ * 6.9.10 LMC Deskew Training
+ *
+ * LMC requires input-read-data deskew training.
+ *
+ * 1. Write LMC(0)_EXT_CONFIG[VREFINT_SEQ_DESKEW] = 1.
+ */
+ VB_PRT(VBL_SEQ, "N%d.LMC%d: Performing LMC sequence: Set vrefint_seq_deskew = 1\n",
+ node, ddr_interface_num);
+ DRAM_CSR_MODIFY(ext_config, node, BDK_LMCX_EXT_CONFIG(ddr_interface_num),
+ ext_config.s.vrefint_seq_deskew = 1); /* Set Deskew sequence */
+
+ /*
+ * 2. Write LMC(0)_SEQ_CTL[SEQ_SEL] = 0x0A and
+ * LMC(0)_SEQ_CTL[INIT_START] = 1.
+ *
+ * 3. Wait for LMC(0)_SEQ_CTL[SEQ_COMPLETE] to be set to 1.
+ */
+ DRAM_CSR_MODIFY(phy_ctl, node, BDK_LMCX_PHY_CTL(ddr_interface_num),
+ phy_ctl.s.phy_dsk_reset = 1); /* RESET Deskew sequence */
+ perform_octeon3_ddr3_sequence(node, rank_mask, ddr_interface_num, 0x0A); /* LMC Deskew Training */
+
+ lock_retries = 0;
+
+ perform_read_deskew_training:
+ // maybe perform the NORMAL deskew training sequence multiple times before looking at lock status
+ for (loops = 0; loops < normal_loops; loops++) {
+ DRAM_CSR_MODIFY(phy_ctl, node, BDK_LMCX_PHY_CTL(ddr_interface_num),
+ phy_ctl.s.phy_dsk_reset = 0); /* Normal Deskew sequence */
+ perform_octeon3_ddr3_sequence(node, rank_mask, ddr_interface_num, 0x0A); /* LMC Deskew Training */
+ }
+ // Moved this from Validate_Read_Deskew_Training
+ /* Allow deskew results to stabilize before evaluating them. */
+ bdk_wait_usec(deskew_validation_delay);
+
+ // Now go look at lock and saturation status...
+ Validate_Read_Deskew_Training(node, rank_mask, ddr_interface_num, &dsk_counts, print_first);
+ if (print_first && !print_them_all) // after printing the first and not doing them all, no more
+ print_first = 0;
+
+ unsaturated = (dsk_counts.saturated == 0);
+ locked = (dsk_counts.unlocked == 0);
+ //nibble_sat = (dsk_counts.nibsat_errs != 0);
+
+ // only do locking retries if unsaturated or rawcard A or B, otherwise full SAT retry
+ if (unsaturated || (spd_rawcard_AorB && !is_t88p2 /*&& !nibble_sat*/)) {
+ if (!locked) { // and not locked
+ lock_retries++;
+ lock_retries_total++;
+ if (lock_retries <= lock_retries_limit) {
+ goto perform_read_deskew_training;
+ } else {
+ VB_PRT(VBL_TME, "N%d.LMC%d: LOCK RETRIES failed after %d retries\n",
+ node, ddr_interface_num, lock_retries_limit);
+ }
+ } else {
+ if (lock_retries_total > 0) // only print if we did try
+ VB_PRT(VBL_TME, "N%d.LMC%d: LOCK RETRIES successful after %d retries\n",
+ node, ddr_interface_num, lock_retries);
+ }
+ } /* if (unsaturated || spd_rawcard_AorB) */
+
+ ++sat_retries;
+
+#if LOOK_FOR_STUCK_BYTE
+ // FIXME: this is a bit of a hack at the moment...
+ // We want to force a Deskew RESET hopefully to unstick the bytes values
+ // and then resume normal deskew training as usual.
+ // For now, do only if it is all locked...
+ if (locked && (dsk_counts.bytes_stuck != 0)) {
+ BDK_CSR_INIT(lmc_config, node, BDK_LMCX_CONFIG(ddr_interface_num));
+ if (do_stuck_reset && lmc_config.s.mode_x4dev) { // FIXME: only when x4!!
+ unsaturated = 0; // to always make sure the while continues
+ VB_PRT(VBL_TME, "N%d.LMC%d: STUCK BYTE (0x%x), forcing deskew RESET\n",
+ node, ddr_interface_num, dsk_counts.bytes_stuck);
+ continue; // bypass the rest to get back to the RESET
+ } else {
+ VB_PRT(VBL_TME, "N%d.LMC%d: STUCK BYTE (0x%x), ignoring deskew RESET\n",
+ node, ddr_interface_num, dsk_counts.bytes_stuck);
+ }
+ }
+#endif
+ /*
+ * At this point, check for a DDR4 RDIMM that will not benefit from SAT retries; if so, no retries
+ */
+ if (spd_rawcard_AorB && !is_t88p2 /*&& !nibble_sat*/) {
+ VB_PRT(VBL_TME, "N%d.LMC%d: Read Deskew Training Loop: Exiting for RAWCARD == A or B.\n",
+ node, ddr_interface_num);
+ break; // no sat or lock retries
+ }
+
+ } while (!unsaturated && (sat_retries < DEFAULT_SAT_RETRY_LIMIT));
+
+#if DESKEW_RODT_CTL
+ if (save_deskew_rodt_ctl != -1) {
+ comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num));
+ comp_ctl2.s.rodt_ctl = save_deskew_rodt_ctl;
+ DRAM_CSR_WRITE(node, BDK_LMCX_COMP_CTL2(ddr_interface_num), comp_ctl2.u);
+ }
+#endif
+
+ VB_PRT(VBL_FAE, "N%d.LMC%d: Read Deskew Training %s. %d sat-retries, %d lock-retries\n",
+ node, ddr_interface_num,
+ (sat_retries >= DEFAULT_SAT_RETRY_LIMIT) ? "Timed Out" : "Completed",
+ sat_retries-1, lock_retries_total);
+
+ // restore original WR_DESKEW_ENA setting
+ change_wr_deskew_ena(node, ddr_interface_num, saved_wr_deskew_ena);
+
+ if ((dsk_counts.nibrng_errs != 0) || (dsk_counts.nibunl_errs != 0)) {
+ debug_print("N%d.LMC%d: NIBBLE ERROR(S) found, returning FAULT\n",
+ node, ddr_interface_num);
+ return -1; // we did retry locally, they did not help
+ }
+
+ // NOTE: we (currently) always print one last training validation before starting Read Leveling...
+
+ return 0;
+}
+
+static void
+do_write_deskew_op(bdk_node_t node, int ddr_interface_num,
+ int bit_sel, int byte_sel, int ena)
+{
+ bdk_lmcx_dll_ctl3_t ddr_dll_ctl3;
+
+ ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num));
+ SET_DDR_DLL_CTL3(bit_select, bit_sel);
+ SET_DDR_DLL_CTL3(byte_sel, byte_sel);
+ SET_DDR_DLL_CTL3(wr_deskew_ena, ena);
+ DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num), ddr_dll_ctl3.u);
+
+ ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num));
+}
+
+static void
+set_write_deskew_offset(bdk_node_t node, int ddr_interface_num,
+ int bit_sel, int byte_sel, int offset)
+{
+ bdk_lmcx_dll_ctl3_t ddr_dll_ctl3;
+
+ ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num));
+ SET_DDR_DLL_CTL3(bit_select, bit_sel);
+ SET_DDR_DLL_CTL3(byte_sel, byte_sel);
+ SET_DDR_DLL_CTL3(offset, offset);
+ DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num), ddr_dll_ctl3.u);
+
+ ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num));
+ SET_DDR_DLL_CTL3(wr_deskew_ld, 1);
+ DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num), ddr_dll_ctl3.u);
+
+ ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num));
+}
+
+static void
+Update_Write_Deskew_Settings(bdk_node_t node, int ddr_interface_num, deskew_data_t *dskdat)
+{
+ bdk_lmcx_config_t lmc_config;
+ int bit_num;
+ int byte_lane, byte_limit;
+
+ lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(ddr_interface_num));
+ byte_limit = ((lmc_config.s.mode32b) ? 4 : 8) + lmc_config.s.ecc_ena;
+
+ for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) {
+ for (bit_num = 0; bit_num <= 7; ++bit_num) {
+
+ set_write_deskew_offset(node, ddr_interface_num, bit_num, byte_lane + 1,
+ dskdat->bytes[byte_lane].bits[bit_num]);
+
+ } /* for (bit_num = 0; bit_num <= 7; ++bit_num) */
+ } /* for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) */
+
+ return;
+}
+
+#define ALL_BYTES 0x0A
+#define BS_NOOP 0x09
+#define BS_RESET 0x0F
+#define BS_REUSE 0x0A
+
+// set all entries to the same value (used during training)
+static void
+Set_Write_Deskew_Settings(bdk_node_t node, int ddr_interface_num, int value)
+{
+ bdk_lmcx_dll_ctl3_t ddr_dll_ctl3;
+ int bit_num;
+
+ VB_PRT(VBL_DEV2, "N%d.LMC%d: SetWriteDeskew: WRITE %d\n", node, ddr_interface_num, value);
+
+ for (bit_num = 0; bit_num <= 7; ++bit_num) {
+
+ // write a bit-deskew value to all bit-lanes of all bytes
+ ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num));
+ SET_DDR_DLL_CTL3(bit_select, bit_num);
+ SET_DDR_DLL_CTL3(byte_sel, ALL_BYTES); // FIXME? will this work in 32-bit mode?
+ SET_DDR_DLL_CTL3(offset, value);
+ DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num), ddr_dll_ctl3.u);
+
+ ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num));
+ SET_DDR_DLL_CTL3(wr_deskew_ld, 1);
+ DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num), ddr_dll_ctl3.u);
+
+ } /* for (bit_num = 0; bit_num <= 7; ++bit_num) */
+
+#if 0
+ // FIXME: for debug use only
+ Get_Deskew_Settings(node, ddr_interface_num, &dskdat);
+ Display_Deskew_Data(node, ddr_interface_num, &dskdat, VBL_NORM);
+#endif
+
+ return;
+}
+
+typedef struct {
+ uint8_t count[8];
+ uint8_t start[8];
+ uint8_t best_count[8];
+ uint8_t best_start[8];
+} deskew_bytelane_t;
+typedef struct {
+ deskew_bytelane_t bytes[9];
+} deskew_rank_t;
+
+deskew_rank_t deskew_history[4];
+
+#define DSKVAL_INCR 4
+
+static void
+Neutral_Write_Deskew_Setup(bdk_node_t node, int ddr_interface_num)
+{
+ // first: NO-OP, Select all bytes, Disable write bit-deskew
+ ddr_print("N%d.LMC%d: NEUTRAL Write Deskew Setup: first: NOOP\n", node, ddr_interface_num);
+ do_write_deskew_op(node, ddr_interface_num, BS_NOOP, ALL_BYTES, 0);
+ //Get_Deskew_Settings(node, ddr_interface_num, &dskdat);
+ //Display_Deskew_Data(node, ddr_interface_num, &dskdat, VBL_NORM);
+
+ // enable write bit-deskew and RESET the settings
+ ddr_print("N%d.LMC%d: NEUTRAL Write Deskew Setup: wr_ena: RESET\n", node, ddr_interface_num);
+ do_write_deskew_op(node, ddr_interface_num, BS_RESET, ALL_BYTES, 1);
+ //Get_Deskew_Settings(node, ddr_interface_num, &dskdat);
+ //Display_Deskew_Data(node, ddr_interface_num, &dskdat, VBL_NORM);
+}
+
+static void
+Perform_Write_Deskew_Training(bdk_node_t node, int ddr_interface_num)
+{
+ deskew_data_t dskdat;
+ int byte, bit_num;
+ int dskval, rankx, rank_mask, active_ranks, errors, bit_errs;
+ uint64_t hw_rank_offset;
+ uint64_t bad_bits[2];
+ uint64_t phys_addr;
+ deskew_rank_t *dhp;
+ int num_lmcs = __bdk_dram_get_num_lmc(node);
+
+ BDK_CSR_INIT(lmcx_config, node, BDK_LMCX_CONFIG(ddr_interface_num));
+ rank_mask = lmcx_config.s.init_status; // FIXME: is this right when we run?
+
+ // this should be correct for 1 or 2 ranks, 1 or 2 DIMMs
+ hw_rank_offset = 1ull << (28 + lmcx_config.s.pbank_lsb - lmcx_config.s.rank_ena + (num_lmcs/2));
+
+ VB_PRT(VBL_FAE, "N%d.LMC%d: Performing Write Deskew Training.\n", node, ddr_interface_num);
+
+ // first: NO-OP, Select all bytes, Disable write bit-deskew
+ ddr_print("N%d.LMC%d: WriteDeskewConfig: first: NOOP\n", node, ddr_interface_num);
+ do_write_deskew_op(node, ddr_interface_num, BS_NOOP, ALL_BYTES, 0);
+ //Get_Deskew_Settings(node, ddr_interface_num, &dskdat);
+ //Display_Deskew_Data(node, ddr_interface_num, &dskdat, VBL_NORM);
+
+ // enable write bit-deskew and RESET the settings
+ ddr_print("N%d.LMC%d: WriteDeskewConfig: wr_ena: RESET\n", node, ddr_interface_num);
+ do_write_deskew_op(node, ddr_interface_num, BS_RESET, ALL_BYTES, 1);
+ //Get_Deskew_Settings(node, ddr_interface_num, &dskdat);
+ //Display_Deskew_Data(node, ddr_interface_num, &dskdat, VBL_NORM);
+
+#if 0
+ // enable write bit-deskew and REUSE read bit-deskew settings
+ ddr_print("N%d.LMC%d: WriteDeskewConfig: wr_ena: REUSE\n", node, ddr_interface_num);
+ do_write_deskew_op(node, ddr_interface_num, BS_REUSE, ALL_BYTES, 1);
+ Get_Deskew_Settings(node, ddr_interface_num, &dskdat);
+ Display_Deskew_Data(node, ddr_interface_num, &dskdat, VBL_NORM);
+#endif
+
+#if 1
+ memset(deskew_history, 0, sizeof(deskew_history));
+
+ for (dskval = 0; dskval < 128; dskval += DSKVAL_INCR) {
+
+ Set_Write_Deskew_Settings(node, ddr_interface_num, dskval);
+
+ active_ranks = 0;
+ for (rankx = 0; rankx < 4; rankx++) {
+ if (!(rank_mask & (1 << rankx)))
+ continue;
+ dhp = &deskew_history[rankx];
+ phys_addr = hw_rank_offset * active_ranks;
+ active_ranks++;
+
+ errors = test_dram_byte_hw(node, ddr_interface_num, phys_addr, 0, bad_bits);
+
+ for (byte = 0; byte <= 8; byte++) { // do bytelane(s)
+
+ // check errors
+ if (errors & (1 << byte)) { // yes, error(s) in the byte lane in this rank
+ bit_errs = ((byte == 8) ? bad_bits[1] : bad_bits[0] >> (8 * byte)) & 0xFFULL;
+
+ VB_PRT(VBL_DEV2, "N%d.LMC%d.R%d: Byte %d Value %d: Address 0x%012lx errors 0x%x/0x%x\n",
+ node, ddr_interface_num, rankx, byte,
+ dskval, phys_addr, errors, bit_errs);
+
+ for (bit_num = 0; bit_num <= 7; bit_num++) {
+ if (!(bit_errs & (1 << bit_num)))
+ continue;
+ if (dhp->bytes[byte].count[bit_num] > 0) { // had started run
+ VB_PRT(VBL_DEV2, "N%d.LMC%d.R%d: Byte %d Bit %d Value %d: stopping a run here\n",
+ node, ddr_interface_num, rankx, byte, bit_num, dskval);
+ dhp->bytes[byte].count[bit_num] = 0; // stop now
+ }
+ } /* for (bit_num = 0; bit_num <= 7; bit_num++) */
+
+ // FIXME: else had not started run - nothing else to do?
+ } else { // no error in the byte lane
+ for (bit_num = 0; bit_num <= 7; bit_num++) {
+ if (dhp->bytes[byte].count[bit_num] == 0) { // first success, set run start
+ VB_PRT(VBL_DEV2, "N%d.LMC%d.R%d: Byte %d Bit %d Value %d: starting a run here\n",
+ node, ddr_interface_num, rankx, byte, bit_num, dskval);
+ dhp->bytes[byte].start[bit_num] = dskval;
+ }
+ dhp->bytes[byte].count[bit_num] += DSKVAL_INCR; // bump run length
+
+ // is this now the biggest window?
+ if (dhp->bytes[byte].count[bit_num] > dhp->bytes[byte].best_count[bit_num]) {
+ dhp->bytes[byte].best_count[bit_num] = dhp->bytes[byte].count[bit_num];
+ dhp->bytes[byte].best_start[bit_num] = dhp->bytes[byte].start[bit_num];
+ VB_PRT(VBL_DEV2, "N%d.LMC%d.R%d: Byte %d Bit %d Value %d: updating best to %d/%d\n",
+ node, ddr_interface_num, rankx, byte, bit_num, dskval,
+ dhp->bytes[byte].best_start[bit_num],
+ dhp->bytes[byte].best_count[bit_num]);
+ }
+ } /* for (bit_num = 0; bit_num <= 7; bit_num++) */
+ } /* error in the byte lane */
+ } /* for (byte = 0; byte <= 8; byte++) */
+ } /* for (rankx = 0; rankx < 4; rankx++) */
+ } /* for (dskval = 0; dskval < 128; dskval++) */
+
+
+ for (byte = 0; byte <= 8; byte++) { // do bytelane(s)
+
+ for (bit_num = 0; bit_num <= 7; bit_num++) { // do bits
+ int bit_beg, bit_end;
+
+ bit_beg = 0;
+ bit_end = 128;
+
+ for (rankx = 0; rankx < 4; rankx++) { // merge ranks
+ int rank_beg, rank_end, rank_count;
+ if (!(rank_mask & (1 << rankx)))
+ continue;
+
+ dhp = &deskew_history[rankx];
+ rank_beg = dhp->bytes[byte].best_start[bit_num];
+ rank_count = dhp->bytes[byte].best_count[bit_num];
+
+ if (!rank_count) {
+ VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: Byte %d Bit %d: EMPTY\n",
+ node, ddr_interface_num, rankx, byte, bit_num);
+ continue;
+ }
+
+ bit_beg = max(bit_beg, rank_beg);
+ rank_end = rank_beg + rank_count - DSKVAL_INCR;
+ bit_end = min(bit_end, rank_end);
+
+ } /* for (rankx = 0; rankx < 4; rankx++) */
+
+ dskdat.bytes[byte].bits[bit_num] = (bit_end + bit_beg) / 2;
+
+ } /* for (bit_num = 0; bit_num <= 7; bit_num++) */
+ } /* for (byte = 0; byte <= 8; byte++) */
+
+#endif
+
+ // update the write bit-deskew settings with final settings
+ ddr_print("N%d.LMC%d: WriteDeskewConfig: wr_ena: UPDATE\n", node, ddr_interface_num);
+ Update_Write_Deskew_Settings(node, ddr_interface_num, &dskdat);
+ Get_Deskew_Settings(node, ddr_interface_num, &dskdat);
+ Display_Deskew_Data(node, ddr_interface_num, &dskdat, VBL_NORM);
+
+ // last: NO-OP, Select all bytes, MUST leave write bit-deskew enabled
+ ddr_print("N%d.LMC%d: WriteDeskewConfig: last: wr_ena: NOOP\n", node, ddr_interface_num);
+ do_write_deskew_op(node, ddr_interface_num, BS_NOOP, ALL_BYTES, 1);
+ //Get_Deskew_Settings(node, ddr_interface_num, &dskdat);
+ //Display_Deskew_Data(node, ddr_interface_num, &dskdat, VBL_NORM);
+
+#if 0
+ // FIXME: disable/delete this when write bit-deskew works...
+ // final: NO-OP, Select all bytes, do NOT leave write bit-deskew enabled
+ ddr_print("N%d.LMC%d: WriteDeskewConfig: final: read: NOOP\n", node, ddr_interface_num);
+ do_write_deskew_op(node, ddr_interface_num, BS_NOOP, ALL_BYTES, 0);
+ Get_Deskew_Settings(node, ddr_interface_num, &dskdat);
+ Display_Deskew_Data(node, ddr_interface_num, &dskdat, VBL_NORM);
+#endif
+}
+
+#define SCALING_FACTOR (1000)
+#define Dprintf debug_print // make this "ddr_print" for extra debug output below
+static int compute_Vref_1slot_2rank(int rtt_wr, int rtt_park, int dqx_ctl, int rank_count)
+{
+ uint64_t Reff_s;
+ uint64_t Rser_s = 15;
+ uint64_t Vdd = 1200;
+ uint64_t Vref;
+ //uint64_t Vl;
+ uint64_t rtt_wr_s = (((rtt_wr == 0) || (rtt_wr == 99)) ? 1*1024*1024 : rtt_wr); // 99 == HiZ
+ uint64_t rtt_park_s = (((rtt_park == 0) || ((rank_count == 1) && (rtt_wr != 0))) ? 1*1024*1024 : rtt_park);
+ uint64_t dqx_ctl_s = (dqx_ctl == 0 ? 1*1024*1024 : dqx_ctl);
+ int Vref_value;
+ uint64_t Rangepc = 6000; // range1 base is 60%
+ uint64_t Vrefpc;
+ int Vref_range = 0;
+
+ Dprintf("rtt_wr = %d, rtt_park = %d, dqx_ctl = %d\n", rtt_wr, rtt_park, dqx_ctl);
+ Dprintf("rtt_wr_s = %d, rtt_park_s = %d, dqx_ctl_s = %d\n", rtt_wr_s, rtt_park_s, dqx_ctl_s);
+
+ Reff_s = divide_nint((rtt_wr_s * rtt_park_s) , (rtt_wr_s + rtt_park_s));
+ Dprintf("Reff_s = %d\n", Reff_s);
+
+ //Vl = (((Rser_s + dqx_ctl_s) * SCALING_FACTOR) / (Rser_s + dqx_ctl_s + Reff_s)) * Vdd / SCALING_FACTOR;
+ //printf("Vl = %d\n", Vl);
+
+ Vref = (((Rser_s + dqx_ctl_s) * SCALING_FACTOR) / (Rser_s + dqx_ctl_s + Reff_s)) + SCALING_FACTOR;
+ Dprintf("Vref = %d\n", Vref);
+
+ Vref = (Vref * Vdd) / 2 / SCALING_FACTOR;
+ Dprintf("Vref = %d\n", Vref);
+
+ Vrefpc = (Vref * 100 * 100) / Vdd;
+ Dprintf("Vrefpc = %d\n", Vrefpc);
+
+ if (Vrefpc < Rangepc) { // < range1 base, use range2
+ Vref_range = 1 << 6; // set bit A6 for range2
+ Rangepc = 4500; // range2 base is 45%
+ }
+
+ Vref_value = divide_nint(Vrefpc - Rangepc, 65);
+ if (Vref_value < 0)
+ Vref_value = Vref_range; // set to base of range as lowest value
+ else
+ Vref_value |= Vref_range;
+ Dprintf("Vref_value = %d (0x%02x)\n", Vref_value, Vref_value);
+
+ debug_print("rtt_wr:%d, rtt_park:%d, dqx_ctl:%d, Vref_value:%d (0x%x)\n",
+ rtt_wr, rtt_park, dqx_ctl, Vref_value, Vref_value);
+
+ return Vref_value;
+}
+static int compute_Vref_2slot_2rank(int rtt_wr, int rtt_park_00, int rtt_park_01, int dqx_ctl, int rtt_nom)
+{
+ //uint64_t Rser = 15;
+ uint64_t Vdd = 1200;
+ //uint64_t Vref;
+ uint64_t Vl, Vlp, Vcm;
+ uint64_t Rd0, Rd1, Rpullup;
+ uint64_t rtt_wr_s = (((rtt_wr == 0) || (rtt_wr == 99)) ? 1*1024*1024 : rtt_wr); // 99 == HiZ
+ uint64_t rtt_park_00_s = (rtt_park_00 == 0 ? 1*1024*1024 : rtt_park_00);
+ uint64_t rtt_park_01_s = (rtt_park_01 == 0 ? 1*1024*1024 : rtt_park_01);
+ uint64_t dqx_ctl_s = (dqx_ctl == 0 ? 1*1024*1024 : dqx_ctl);
+ uint64_t rtt_nom_s = (rtt_nom == 0 ? 1*1024*1024 : rtt_nom);
+ int Vref_value;
+ uint64_t Rangepc = 6000; // range1 base is 60%
+ uint64_t Vrefpc;
+ int Vref_range = 0;
+
+ // Rd0 = (RTT_NOM /*parallel*/ RTT_WR) + 15 = ((RTT_NOM * RTT_WR) / (RTT_NOM + RTT_WR)) + 15
+ Rd0 = divide_nint((rtt_nom_s * rtt_wr_s), (rtt_nom_s + rtt_wr_s)) + 15;
+ //printf("Rd0 = %ld\n", Rd0);
+
+ // Rd1 = (RTT_PARK_00 /*parallel*/ RTT_PARK_01) + 15 = ((RTT_PARK_00 * RTT_PARK_01) / (RTT_PARK_00 + RTT_PARK_01)) + 15
+ Rd1 = divide_nint((rtt_park_00_s * rtt_park_01_s), (rtt_park_00_s + rtt_park_01_s)) + 15;
+ //printf("Rd1 = %ld\n", Rd1);
+
+ // Rpullup = Rd0 /*parallel*/ Rd1 = (Rd0 * Rd1) / (Rd0 + Rd1)
+ Rpullup = divide_nint((Rd0 * Rd1), (Rd0 + Rd1));
+ //printf("Rpullup = %ld\n", Rpullup);
+
+ // Vl = (DQX_CTL / (DQX_CTL + Rpullup)) * 1.2
+ Vl = divide_nint((dqx_ctl_s * Vdd), (dqx_ctl_s + Rpullup));
+ //printf("Vl = %ld\n", Vl);
+
+ // Vlp = ((15 / Rd0) * (1.2 - Vl)) + Vl
+ Vlp = divide_nint((15 * (Vdd - Vl)), Rd0) + Vl;
+ //printf("Vlp = %ld\n", Vlp);
+
+ // Vcm = (Vlp + 1.2) / 2
+ Vcm = divide_nint((Vlp + Vdd), 2);
+ //printf("Vcm = %ld\n", Vcm);
+
+ // Vrefpc = (Vcm / 1.2) * 100
+ Vrefpc = divide_nint((Vcm * 100 * 100), Vdd);
+ //printf("Vrefpc = %ld\n", Vrefpc);
+
+ if (Vrefpc < Rangepc) { // < range1 base, use range2
+ Vref_range = 1 << 6; // set bit A6 for range2
+ Rangepc = 4500; // range2 base is 45%
+ }
+
+ Vref_value = divide_nint(Vrefpc - Rangepc, 65);
+ if (Vref_value < 0)
+ Vref_value = Vref_range; // set to base of range as lowest value
+ else
+ Vref_value |= Vref_range;
+ //printf("Vref_value = %d (0x%02x)\n", Vref_value, Vref_value);
+
+ debug_print("rtt_wr:%d, rtt_park_00:%d, rtt_park_01:%d, dqx_ctl:%d, rtt_nom:%d, Vref_value:%d (0x%x)\n",
+ rtt_wr, rtt_park_00, rtt_park_01, dqx_ctl, rtt_nom, Vref_value, Vref_value);
+
+ return Vref_value;
+}
+
+// NOTE: only call this for DIMMs with 1 or 2 ranks, not 4.
+int
+compute_vref_value(bdk_node_t node, int ddr_interface_num,
+ int rankx, int dimm_count, int rank_count,
+ impedence_values_t *imp_values, int is_stacked_die)
+{
+ int computed_final_vref_value = 0;
+
+ /* Calculate an override of the measured Vref value
+ but only for configurations we know how to...*/
+ // we have code for 2-rank DIMMs in both 1-slot or 2-slot configs,
+ // and can use the 2-rank 1-slot code for 1-rank DIMMs in 1-slot configs
+ // and can use the 2-rank 2-slot code for 1-rank DIMMs in 2-slot configs
+
+ int rtt_wr, dqx_ctl, rtt_nom, index;
+ bdk_lmcx_modereg_params1_t lmc_modereg_params1;
+ bdk_lmcx_modereg_params2_t lmc_modereg_params2;
+ bdk_lmcx_comp_ctl2_t comp_ctl2;
+
+ lmc_modereg_params1.u = BDK_CSR_READ(node, BDK_LMCX_MODEREG_PARAMS1(ddr_interface_num));
+ lmc_modereg_params2.u = BDK_CSR_READ(node, BDK_LMCX_MODEREG_PARAMS2(ddr_interface_num));
+ comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num));
+ dqx_ctl = imp_values->dqx_strength[comp_ctl2.s.dqx_ctl];
+
+ // WR always comes from the current rank
+ index = (lmc_modereg_params1.u >> (rankx * 12 + 5)) & 0x03;
+ if (!CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X)) {
+ index |= lmc_modereg_params1.u >> (51+rankx-2) & 0x04;
+ }
+ rtt_wr = imp_values->rtt_wr_ohms [index];
+
+ // separate calculations for 1 vs 2 DIMMs per LMC
+ if (dimm_count == 1) {
+ // PARK comes from this rank if 1-rank, otherwise other rank
+ index = (lmc_modereg_params2.u >> ((rankx ^ (rank_count - 1)) * 10 + 0)) & 0x07;
+ int rtt_park = imp_values->rtt_nom_ohms[index];
+ computed_final_vref_value = compute_Vref_1slot_2rank(rtt_wr, rtt_park, dqx_ctl, rank_count);
+ } else {
+ // get both PARK values from the other DIMM
+ index = (lmc_modereg_params2.u >> ((rankx ^ 0x02) * 10 + 0)) & 0x07;
+ int rtt_park_00 = imp_values->rtt_nom_ohms[index];
+ index = (lmc_modereg_params2.u >> ((rankx ^ 0x03) * 10 + 0)) & 0x07;
+ int rtt_park_01 = imp_values->rtt_nom_ohms[index];
+ // NOM comes from this rank if 1-rank, otherwise other rank
+ index = (lmc_modereg_params1.u >> ((rankx ^ (rank_count - 1)) * 12 + 9)) & 0x07;
+ rtt_nom = imp_values->rtt_nom_ohms[index];
+ computed_final_vref_value = compute_Vref_2slot_2rank(rtt_wr, rtt_park_00, rtt_park_01, dqx_ctl, rtt_nom);
+ }
+
+#if ENABLE_COMPUTED_VREF_ADJUSTMENT
+ {
+ int saved_final_vref_value = computed_final_vref_value;
+ BDK_CSR_INIT(lmc_config, node, BDK_LMCX_CONFIG(ddr_interface_num));
+ /*
+ New computed Vref = existing computed Vref – X
+
+ The value of X is depending on different conditions. Both #122 and #139 are 2Rx4 RDIMM,
+ while #124 is stacked die 2Rx4, so I conclude the results into two conditions:
+
+ 1. Stacked Die: 2Rx4
+ 1-slot: offset = 7. i, e New computed Vref = existing computed Vref – 7
+ 2-slot: offset = 6
+
+ 2. Regular: 2Rx4
+ 1-slot: offset = 3
+ 2-slot: offset = 2
+ */
+ // we know we never get called unless DDR4, so test just the other conditions
+ if((!!__bdk_dram_is_rdimm(node, 0)) &&
+ (rank_count == 2) &&
+ (lmc_config.s.mode_x4dev))
+ { // it must first be RDIMM and 2-rank and x4
+ if (is_stacked_die) { // now do according to stacked die or not...
+ computed_final_vref_value -= (dimm_count == 1) ? 7 : 6;
+ } else {
+ computed_final_vref_value -= (dimm_count == 1) ? 3 : 2;
+ }
+ // we have adjusted it, so print it out if verbosity is right
+ VB_PRT(VBL_TME, "N%d.LMC%d.R%d: adjusting computed vref from %2d (0x%02x) to %2d (0x%02x)\n",
+ node, ddr_interface_num, rankx,
+ saved_final_vref_value, saved_final_vref_value,
+ computed_final_vref_value, computed_final_vref_value);
+ }
+ }
+#endif
+ return computed_final_vref_value;
+}
+
+static unsigned int EXTR_WR(uint64_t u, int x)
+{
+ return (unsigned int)(((u >> (x*12+5)) & 0x3UL) | ((u >> (51+x-2)) & 0x4UL));
+}
+static void INSRT_WR(uint64_t *up, int x, int v)
+{
+ uint64_t u = *up;
+ u &= ~(((0x3UL) << (x*12+5)) | ((0x1UL) << (51+x)));
+ *up = (u | ((v & 0x3UL) << (x*12+5)) | ((v & 0x4UL) << (51+x-2)));
+ return;
+}
+
+static int encode_row_lsb_ddr3(int row_lsb, int ddr_interface_wide)
+{
+ int encoded_row_lsb;
+ int row_lsb_start = 14;
+
+ /* Decoding for row_lsb */
+ /* 000: row_lsb = mem_adr[14] */
+ /* 001: row_lsb = mem_adr[15] */
+ /* 010: row_lsb = mem_adr[16] */
+ /* 011: row_lsb = mem_adr[17] */
+ /* 100: row_lsb = mem_adr[18] */
+ /* 101: row_lsb = mem_adr[19] */
+ /* 110: row_lsb = mem_adr[20] */
+ /* 111: RESERVED */
+
+ row_lsb_start = 14;
+
+ encoded_row_lsb = row_lsb - row_lsb_start ;
+
+ return encoded_row_lsb;
+}
+
+static int encode_pbank_lsb_ddr3(int pbank_lsb, int ddr_interface_wide)
+{
+ int encoded_pbank_lsb;
+
+ /* Decoding for pbank_lsb */
+ /* 0000:DIMM = mem_adr[28] / rank = mem_adr[27] (if RANK_ENA) */
+ /* 0001:DIMM = mem_adr[29] / rank = mem_adr[28] " */
+ /* 0010:DIMM = mem_adr[30] / rank = mem_adr[29] " */
+ /* 0011:DIMM = mem_adr[31] / rank = mem_adr[30] " */
+ /* 0100:DIMM = mem_adr[32] / rank = mem_adr[31] " */
+ /* 0101:DIMM = mem_adr[33] / rank = mem_adr[32] " */
+ /* 0110:DIMM = mem_adr[34] / rank = mem_adr[33] " */
+ /* 0111:DIMM = 0 / rank = mem_adr[34] " */
+ /* 1000-1111: RESERVED */
+
+ int pbank_lsb_start = 0;
+
+ pbank_lsb_start = 28;
+
+ encoded_pbank_lsb = pbank_lsb - pbank_lsb_start;
+
+ return encoded_pbank_lsb;
+}
+
+static uint64_t octeon_read_lmcx_ddr3_rlevel_dbg(bdk_node_t node, int ddr_interface_num, int idx)
+{
+ DRAM_CSR_MODIFY(c, node, BDK_LMCX_RLEVEL_CTL(ddr_interface_num),
+ c.s.byte = idx);
+ BDK_CSR_READ(node, BDK_LMCX_RLEVEL_CTL(ddr_interface_num));
+ BDK_CSR_INIT(rlevel_dbg, node, BDK_LMCX_RLEVEL_DBG(ddr_interface_num));
+ return rlevel_dbg.s.bitmask;
+}
+
+static uint64_t octeon_read_lmcx_ddr3_wlevel_dbg(bdk_node_t node, int ddr_interface_num, int idx)
+{
+ bdk_lmcx_wlevel_dbg_t wlevel_dbg;
+
+ wlevel_dbg.u = 0;
+ wlevel_dbg.s.byte = idx;
+
+ DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_DBG(ddr_interface_num), wlevel_dbg.u);
+ BDK_CSR_READ(node, BDK_LMCX_WLEVEL_DBG(ddr_interface_num));
+
+ wlevel_dbg.u = BDK_CSR_READ(node, BDK_LMCX_WLEVEL_DBG(ddr_interface_num));
+ return wlevel_dbg.s.bitmask;
+}
+
+
+/*
+ * Apply a filter to the BITMASK results returned from Octeon
+ * read-leveling to determine the most likely delay result. This
+ * computed delay may be used to qualify the delay result returned by
+ * Octeon. Accumulate an error penalty for invalid characteristics of
+ * the bitmask so that they can be used to select the most reliable
+ * results.
+ *
+ * The algorithm searches for the largest contiguous MASK within a
+ * maximum RANGE of bits beginning with the MSB.
+ *
+ * 1. a MASK with a WIDTH less than 4 will be penalized
+ * 2. Bubbles in the bitmask that occur before or after the MASK
+ * will be penalized
+ * 3. If there are no trailing bubbles then extra bits that occur
+ * beyond the maximum RANGE will be penalized.
+ *
+ * +++++++++++++++++++++++++++++++++++++++++++++++++++
+ * + +
+ * + e.g. bitmask = 27B00 +
+ * + +
+ * + 63 +--- mstart 0 +
+ * + | | | +
+ * + | +---------+ +--- fb | +
+ * + | | range | | | +
+ * + V V V V V +
+ * + +
+ * + 0 0 ... 1 0 0 1 1 1 1 0 1 1 0 0 0 0 0 0 0 0 +
+ * + +
+ * + ^ ^ ^ +
+ * + | | mask| +
+ * + lb ---+ +-----+ +
+ * + width +
+ * + +
+ * +++++++++++++++++++++++++++++++++++++++++++++++++++
+ */
+#define RLEVEL_BITMASK_TRAILING_BITS_ERROR 5
+#define RLEVEL_BITMASK_BUBBLE_BITS_ERROR 11 // FIXME? now less than TOOLONG
+#define RLEVEL_BITMASK_NARROW_ERROR 6
+#define RLEVEL_BITMASK_BLANK_ERROR 100
+#define RLEVEL_BITMASK_TOOLONG_ERROR 12
+
+#define MASKRANGE_BITS 6
+#define MASKRANGE ((1 << MASKRANGE_BITS) - 1)
+
+static int
+validate_ddr3_rlevel_bitmask(rlevel_bitmask_t *rlevel_bitmask_p, int ddr_type)
+{
+ int i;
+ int errors = 0;
+ uint64_t mask = 0; /* Used in 64-bit comparisons */
+ int8_t mstart = 0;
+ uint8_t width = 0;
+ uint8_t firstbit = 0;
+ uint8_t lastbit = 0;
+ uint8_t bubble = 0;
+ uint8_t tbubble = 0;
+ uint8_t blank = 0;
+ uint8_t narrow = 0;
+ uint8_t trailing = 0;
+ uint64_t bitmask = rlevel_bitmask_p->bm;
+ uint8_t extras = 0;
+ uint8_t toolong = 0;
+ uint64_t temp;
+
+ if (bitmask == 0) {
+ blank += RLEVEL_BITMASK_BLANK_ERROR;
+ } else {
+
+ /* Look for fb, the first bit */
+ temp = bitmask;
+ while (!(temp & 1)) {
+ firstbit++;
+ temp >>= 1;
+ }
+
+ /* Look for lb, the last bit */
+ lastbit = firstbit;
+ while ((temp >>= 1))
+ lastbit++;
+
+ /* Start with the max range to try to find the largest mask within the bitmask data */
+ width = MASKRANGE_BITS;
+ for (mask = MASKRANGE; mask > 0; mask >>= 1, --width) {
+ for (mstart = lastbit - width + 1; mstart >= firstbit; --mstart) {
+ temp = mask << mstart;
+ if ((bitmask & temp) == temp)
+ goto done_now;
+ }
+ }
+ done_now:
+ /* look for any more contiguous 1's to the right of mstart */
+ if (width == MASKRANGE_BITS) { // only when maximum mask
+ while ((bitmask >> (mstart - 1)) & 1) { // slide right over more 1's
+ --mstart;
+ if (ddr_type == DDR4_DRAM) // only for DDR4
+ extras++; // count the number of extra bits
+ }
+ }
+
+ /* Penalize any extra 1's beyond the maximum desired mask */
+ if (extras > 0)
+ toolong = RLEVEL_BITMASK_TOOLONG_ERROR * ((1 << extras) - 1);
+
+ /* Detect if bitmask is too narrow. */
+ if (width < 4)
+ narrow = (4 - width) * RLEVEL_BITMASK_NARROW_ERROR;
+
+ /* detect leading bubble bits, that is, any 0's between first and mstart */
+ temp = bitmask >> (firstbit + 1);
+ i = mstart - firstbit - 1;
+ while (--i >= 0) {
+ if ((temp & 1) == 0)
+ bubble += RLEVEL_BITMASK_BUBBLE_BITS_ERROR;
+ temp >>= 1;
+ }
+
+ temp = bitmask >> (mstart + width + extras);
+ i = lastbit - (mstart + width + extras - 1);
+ while (--i >= 0) {
+ if (temp & 1) { /* Detect 1 bits after the trailing end of the mask, including last. */
+ trailing += RLEVEL_BITMASK_TRAILING_BITS_ERROR;
+ } else { /* Detect trailing bubble bits, that is, any 0's between end-of-mask and last */
+ tbubble += RLEVEL_BITMASK_BUBBLE_BITS_ERROR;
+ }
+ temp >>= 1;
+ }
+ }
+
+ errors = bubble + tbubble + blank + narrow + trailing + toolong;
+
+ /* Pass out useful statistics */
+ rlevel_bitmask_p->mstart = mstart;
+ rlevel_bitmask_p->width = width;
+
+ VB_PRT(VBL_DEV2, "bm:%08lx mask:%02lx, width:%2u, mstart:%2d, fb:%2u, lb:%2u"
+ " (bu:%2d, tb:%2d, bl:%2d, n:%2d, t:%2d, x:%2d) errors:%3d %s\n",
+ (unsigned long) bitmask, mask, width, mstart,
+ firstbit, lastbit, bubble, tbubble, blank, narrow,
+ trailing, toolong, errors, (errors) ? "=> invalid" : "");
+
+ return errors;
+}
+
+static int compute_ddr3_rlevel_delay(uint8_t mstart, uint8_t width, bdk_lmcx_rlevel_ctl_t rlevel_ctl)
+{
+ int delay;
+
+ debug_bitmask_print(" offset_en:%d", rlevel_ctl.cn8.offset_en);
+
+ if (rlevel_ctl.s.offset_en) {
+ delay = max(mstart, mstart + width - 1 - rlevel_ctl.s.offset);
+ } else {
+ /* if (rlevel_ctl.s.offset) { */ /* Experimental */
+ if (0) {
+ delay = max(mstart + rlevel_ctl.s.offset, mstart + 1);
+ /* Insure that the offset delay falls within the bitmask */
+ delay = min(delay, mstart + width-1);
+ } else {
+ delay = (width - 1) / 2 + mstart; /* Round down */
+ /* delay = (width/2) + mstart; */ /* Round up */
+ }
+ }
+
+ return delay;
+}
+
+#define WLEVEL_BYTE_BITS 5
+#define WLEVEL_BYTE_MSK ((1UL << 5) - 1)
+
+static void update_wlevel_rank_struct(bdk_lmcx_wlevel_rankx_t *lmc_wlevel_rank,
+ int byte, int delay)
+{
+ bdk_lmcx_wlevel_rankx_t temp_wlevel_rank;
+ if (byte >= 0 && byte <= 8) {
+ temp_wlevel_rank.u = lmc_wlevel_rank->u;
+ temp_wlevel_rank.u &= ~(WLEVEL_BYTE_MSK << (WLEVEL_BYTE_BITS * byte));
+ temp_wlevel_rank.u |= ((delay & WLEVEL_BYTE_MSK) << (WLEVEL_BYTE_BITS * byte));
+ lmc_wlevel_rank->u = temp_wlevel_rank.u;
+ }
+}
+
+static int get_wlevel_rank_struct(bdk_lmcx_wlevel_rankx_t *lmc_wlevel_rank,
+ int byte)
+{
+ int delay = 0;
+ if (byte >= 0 && byte <= 8) {
+ delay = ((lmc_wlevel_rank->u) >> (WLEVEL_BYTE_BITS * byte)) & WLEVEL_BYTE_MSK;
+ }
+ return delay;
+}
+
+#if 0
+// entry = 1 is valid, entry = 0 is invalid
+static int
+validity_matrix[4][4] = {[0] {1,1,1,0}, // valid pairs when cv == 0: 0,0 + 0,1 + 0,2 == "7"
+ [1] {0,1,1,1}, // valid pairs when cv == 1: 1,1 + 1,2 + 1,3 == "E"
+ [2] {1,0,1,1}, // valid pairs when cv == 2: 2,2 + 2,3 + 2,0 == "D"
+ [3] {1,1,0,1}}; // valid pairs when cv == 3: 3,3 + 3,0 + 3,1 == "B"
+#endif
+static int
+validate_seq(int *wl, int *seq)
+{
+ int seqx; // sequence index, step through the sequence array
+ int bitnum;
+ seqx = 0;
+ while (seq[seqx+1] >= 0) { // stop on next seq entry == -1
+ // but now, check current versus next
+#if 0
+ if ( !validity_matrix [wl[seq[seqx]]] [wl[seq[seqx+1]]] )
+ return 1;
+#else
+ bitnum = (wl[seq[seqx]] << 2) | wl[seq[seqx+1]];
+ if (!((1 << bitnum) & 0xBDE7)) // magic validity number (see matrix above)
+ return 1;
+#endif
+ seqx++;
+ }
+ return 0;
+}
+
+static int
+Validate_HW_WL_Settings(bdk_node_t node, int ddr_interface_num,
+ bdk_lmcx_wlevel_rankx_t *lmc_wlevel_rank,
+ int ecc_ena)
+{
+ int wl[9], byte, errors;
+
+ // arrange the sequences so
+ int useq[] = { 0,1,2,3,8,4,5,6,7,-1 }; // index 0 has byte 0, etc, ECC in middle
+ int rseq1[] = { 8,3,2,1,0,-1 }; // index 0 is ECC, then go down
+ int rseq2[] = { 4,5,6,7,-1 }; // index 0 has byte 4, then go up
+ int useqno[] = { 0,1,2,3,4,5,6,7,-1 }; // index 0 has byte 0, etc, no ECC
+ int rseq1no[] = { 3,2,1,0,-1 }; // index 0 is byte 3, then go down, no ECC
+
+ // in the CSR, bytes 0-7 are always data, byte 8 is ECC
+ for (byte = 0; byte < 8+ecc_ena; byte++) {
+ wl[byte] = (get_wlevel_rank_struct(lmc_wlevel_rank, byte) >> 1) & 3; // preprocess :-)
+ }
+
+ errors = 0;
+ if (__bdk_dram_is_rdimm(node, 0) != 0) { // RDIMM order
+ errors = validate_seq(wl, (ecc_ena) ? rseq1 : rseq1no);
+ errors += validate_seq(wl, rseq2);
+ } else { // UDIMM order
+ errors = validate_seq(wl, (ecc_ena) ? useq : useqno);
+ }
+
+ return errors;
+}
+
+#define RLEVEL_BYTE_BITS 6
+#define RLEVEL_BYTE_MSK ((1UL << 6) - 1)
+
+static void update_rlevel_rank_struct(bdk_lmcx_rlevel_rankx_t *lmc_rlevel_rank,
+ int byte, int delay)
+{
+ bdk_lmcx_rlevel_rankx_t temp_rlevel_rank;
+ if (byte >= 0 && byte <= 8) {
+ temp_rlevel_rank.u = lmc_rlevel_rank->u & ~(RLEVEL_BYTE_MSK << (RLEVEL_BYTE_BITS * byte));
+ temp_rlevel_rank.u |= ((delay & RLEVEL_BYTE_MSK) << (RLEVEL_BYTE_BITS * byte));
+ lmc_rlevel_rank->u = temp_rlevel_rank.u;
+ }
+}
+
+#if RLEXTRAS_PATCH || !DISABLE_SW_WL_PASS_2
+static int get_rlevel_rank_struct(bdk_lmcx_rlevel_rankx_t *lmc_rlevel_rank,
+ int byte)
+{
+ int delay = 0;
+ if (byte >= 0 && byte <= 8) {
+ delay = ((lmc_rlevel_rank->u) >> (RLEVEL_BYTE_BITS * byte)) & RLEVEL_BYTE_MSK;
+ }
+ return delay;
+}
+#endif
+
+static void unpack_rlevel_settings(int ddr_interface_bytemask, int ecc_ena,
+ rlevel_byte_data_t *rlevel_byte,
+ bdk_lmcx_rlevel_rankx_t lmc_rlevel_rank)
+{
+ if ((ddr_interface_bytemask & 0xff) == 0xff) {
+ if (ecc_ena) {
+ rlevel_byte[8].delay = lmc_rlevel_rank.cn83xx.byte7;
+ rlevel_byte[7].delay = lmc_rlevel_rank.cn83xx.byte6;
+ rlevel_byte[6].delay = lmc_rlevel_rank.cn83xx.byte5;
+ rlevel_byte[5].delay = lmc_rlevel_rank.cn83xx.byte4;
+ rlevel_byte[4].delay = lmc_rlevel_rank.cn83xx.byte8; /* ECC */
+ } else {
+ rlevel_byte[7].delay = lmc_rlevel_rank.cn83xx.byte7;
+ rlevel_byte[6].delay = lmc_rlevel_rank.cn83xx.byte6;
+ rlevel_byte[5].delay = lmc_rlevel_rank.cn83xx.byte5;
+ rlevel_byte[4].delay = lmc_rlevel_rank.cn83xx.byte4;
+ }
+ } else {
+ rlevel_byte[8].delay = lmc_rlevel_rank.cn83xx.byte8; /* unused */
+ rlevel_byte[7].delay = lmc_rlevel_rank.cn83xx.byte7; /* unused */
+ rlevel_byte[6].delay = lmc_rlevel_rank.cn83xx.byte6; /* unused */
+ rlevel_byte[5].delay = lmc_rlevel_rank.cn83xx.byte5; /* unused */
+ rlevel_byte[4].delay = lmc_rlevel_rank.cn83xx.byte4; /* ECC */
+ }
+ rlevel_byte[3].delay = lmc_rlevel_rank.cn83xx.byte3;
+ rlevel_byte[2].delay = lmc_rlevel_rank.cn83xx.byte2;
+ rlevel_byte[1].delay = lmc_rlevel_rank.cn83xx.byte1;
+ rlevel_byte[0].delay = lmc_rlevel_rank.cn83xx.byte0;
+}
+
+static void pack_rlevel_settings(int ddr_interface_bytemask, int ecc_ena,
+ rlevel_byte_data_t *rlevel_byte,
+ bdk_lmcx_rlevel_rankx_t *final_rlevel_rank)
+{
+ bdk_lmcx_rlevel_rankx_t lmc_rlevel_rank = *final_rlevel_rank;
+
+ if ((ddr_interface_bytemask & 0xff) == 0xff) {
+ if (ecc_ena) {
+ lmc_rlevel_rank.cn83xx.byte7 = rlevel_byte[8].delay;
+ lmc_rlevel_rank.cn83xx.byte6 = rlevel_byte[7].delay;
+ lmc_rlevel_rank.cn83xx.byte5 = rlevel_byte[6].delay;
+ lmc_rlevel_rank.cn83xx.byte4 = rlevel_byte[5].delay;
+ lmc_rlevel_rank.cn83xx.byte8 = rlevel_byte[4].delay; /* ECC */
+ } else {
+ lmc_rlevel_rank.cn83xx.byte7 = rlevel_byte[7].delay;
+ lmc_rlevel_rank.cn83xx.byte6 = rlevel_byte[6].delay;
+ lmc_rlevel_rank.cn83xx.byte5 = rlevel_byte[5].delay;
+ lmc_rlevel_rank.cn83xx.byte4 = rlevel_byte[4].delay;
+ }
+ } else {
+ lmc_rlevel_rank.cn83xx.byte8 = rlevel_byte[8].delay;
+ lmc_rlevel_rank.cn83xx.byte7 = rlevel_byte[7].delay;
+ lmc_rlevel_rank.cn83xx.byte6 = rlevel_byte[6].delay;
+ lmc_rlevel_rank.cn83xx.byte5 = rlevel_byte[5].delay;
+ lmc_rlevel_rank.cn83xx.byte4 = rlevel_byte[4].delay;
+ }
+ lmc_rlevel_rank.cn83xx.byte3 = rlevel_byte[3].delay;
+ lmc_rlevel_rank.cn83xx.byte2 = rlevel_byte[2].delay;
+ lmc_rlevel_rank.cn83xx.byte1 = rlevel_byte[1].delay;
+ lmc_rlevel_rank.cn83xx.byte0 = rlevel_byte[0].delay;
+
+ *final_rlevel_rank = lmc_rlevel_rank;
+}
+
+#if !DISABLE_SW_WL_PASS_2
+static void rlevel_to_wlevel(bdk_lmcx_rlevel_rankx_t *lmc_rlevel_rank,
+ bdk_lmcx_wlevel_rankx_t *lmc_wlevel_rank, int byte)
+{
+ int byte_delay = get_rlevel_rank_struct(lmc_rlevel_rank, byte);
+
+ debug_print("Estimating Wlevel delay byte %d: ", byte);
+ debug_print("Rlevel=%d => ", byte_delay);
+ byte_delay = divide_roundup(byte_delay,2) & 0x1e;
+ debug_print("Wlevel=%d\n", byte_delay);
+ update_wlevel_rank_struct(lmc_wlevel_rank, byte, byte_delay);
+}
+#endif /* !DISABLE_SW_WL_PASS_2 */
+
+/* Delay trend: constant=0, decreasing=-1, increasing=1 */
+static int calc_delay_trend(int v)
+{
+ if (v == 0)
+ return (0);
+ if (v < 0)
+ return (-1);
+ return 1;
+}
+
+/* Evaluate delay sequence across the whole range of byte delays while
+** keeping track of the overall delay trend, increasing or decreasing.
+** If the trend changes charge an error amount to the score.
+*/
+
+// NOTE: "max_adj_delay_inc" argument is, by default, 1 for DDR3 and 2 for DDR4
+
+static int nonsequential_delays(rlevel_byte_data_t *rlevel_byte,
+ int start, int end, int max_adj_delay_inc)
+{
+ int error = 0;
+ int delay_trend, prev_trend = 0;
+ int byte_idx;
+ int delay_inc;
+ int delay_diff;
+ int byte_err;
+
+ for (byte_idx = start; byte_idx < end; ++byte_idx) {
+ byte_err = 0;
+
+ delay_diff = rlevel_byte[byte_idx+1].delay - rlevel_byte[byte_idx].delay;
+ delay_trend = calc_delay_trend(delay_diff);
+
+ debug_bitmask_print("Byte %d: %2d, Byte %d: %2d, delay_trend: %2d, prev_trend: %2d",
+ byte_idx+0, rlevel_byte[byte_idx+0].delay,
+ byte_idx+1, rlevel_byte[byte_idx+1].delay,
+ delay_trend, prev_trend);
+
+ /* Increment error each time the trend changes to the opposite direction.
+ */
+ if ((prev_trend != 0) && (delay_trend != 0) && (prev_trend != delay_trend)) {
+ byte_err += RLEVEL_NONSEQUENTIAL_DELAY_ERROR;
+ prev_trend = delay_trend;
+ debug_bitmask_print(" => Nonsequential byte delay");
+ }
+
+ delay_inc = _abs(delay_diff); // how big was the delay change, if any
+
+ /* Even if the trend did not change to the opposite direction, check for
+ the magnitude of the change, and scale the penalty by the amount that
+ the size is larger than the provided limit.
+ */
+ if ((max_adj_delay_inc != 0) && (delay_inc > max_adj_delay_inc)) {
+ byte_err += (delay_inc - max_adj_delay_inc) * RLEVEL_ADJACENT_DELAY_ERROR;
+ debug_bitmask_print(" => Adjacent delay error");
+ }
+
+ debug_bitmask_print("\n");
+ if (delay_trend != 0)
+ prev_trend = delay_trend;
+
+ rlevel_byte[byte_idx+1].sqerrs = byte_err;
+ error += byte_err;
+ }
+ return error;
+}
+
+static int roundup_ddr3_wlevel_bitmask(int bitmask)
+{
+ int shifted_bitmask;
+ int leader;
+ int delay;
+
+ for (leader=0; leader<8; ++leader) {
+ shifted_bitmask = (bitmask>>leader);
+ if ((shifted_bitmask&1) == 0)
+ break;
+ }
+
+ for (/*leader=leader*/; leader<16; ++leader) {
+ shifted_bitmask = (bitmask>>(leader%8));
+ if (shifted_bitmask&1)
+ break;
+ }
+
+ delay = (leader & 1) ? leader + 1 : leader;
+ delay = delay % 8;
+
+ return delay;
+}
+
+/* Check to see if any custom offset values are provided */
+static int is_dll_offset_provided(const int8_t *dll_offset_table)
+{
+ int i;
+ if (dll_offset_table != NULL) {
+ for (i=0; i<9; ++i) {
+ if (dll_offset_table[i] != 0)
+ return (1);
+ }
+ }
+ return (0);
+}
+
+/////////////////// These are the RLEVEL settings display routines
+
+// flags
+#define WITH_NOTHING 0
+#define WITH_SCORE 1
+#define WITH_AVERAGE 2
+#define WITH_FINAL 4
+#define WITH_COMPUTE 8
+static void do_display_RL(bdk_node_t node, int ddr_interface_num,
+ bdk_lmcx_rlevel_rankx_t lmc_rlevel_rank,
+ int rank, int flags, int score)
+{
+ char score_buf[16];
+ if (flags & WITH_SCORE)
+ snprintf(score_buf, sizeof(score_buf), "(%d)", score);
+ else {
+ score_buf[0] = ' '; score_buf[1] = 0;
+ }
+
+ char *msg_buf;
+ char hex_buf[20];
+ if (flags & WITH_AVERAGE) {
+ msg_buf = " DELAY AVERAGES ";
+ } else if (flags & WITH_FINAL) {
+ msg_buf = " FINAL SETTINGS ";
+ } else if (flags & WITH_COMPUTE) {
+ msg_buf = " COMPUTED DELAYS ";
+ } else {
+ snprintf(hex_buf, sizeof(hex_buf), "0x%016lX", lmc_rlevel_rank.u);
+ msg_buf = hex_buf;
+ }
+
+ ddr_print("N%d.LMC%d.R%d: Rlevel Rank %#4x, %s : %5d %5d %5d %5d %5d %5d %5d %5d %5d %s\n",
+ node, ddr_interface_num, rank,
+ lmc_rlevel_rank.s.status,
+ msg_buf,
+ lmc_rlevel_rank.cn83xx.byte8,
+ lmc_rlevel_rank.cn83xx.byte7,
+ lmc_rlevel_rank.cn83xx.byte6,
+ lmc_rlevel_rank.cn83xx.byte5,
+ lmc_rlevel_rank.cn83xx.byte4,
+ lmc_rlevel_rank.cn83xx.byte3,
+ lmc_rlevel_rank.cn83xx.byte2,
+ lmc_rlevel_rank.cn83xx.byte1,
+ lmc_rlevel_rank.cn83xx.byte0,
+ score_buf
+ );
+}
+
+static inline void
+display_RL(bdk_node_t node, int ddr_interface_num, bdk_lmcx_rlevel_rankx_t lmc_rlevel_rank, int rank)
+{
+ do_display_RL(node, ddr_interface_num, lmc_rlevel_rank, rank, 0, 0);
+}
+
+static inline void
+display_RL_with_score(bdk_node_t node, int ddr_interface_num, bdk_lmcx_rlevel_rankx_t lmc_rlevel_rank, int rank, int score)
+{
+ do_display_RL(node, ddr_interface_num, lmc_rlevel_rank, rank, 1, score);
+}
+
+#if !PICK_BEST_RANK_SCORE_NOT_AVG
+static inline void
+display_RL_with_average(bdk_node_t node, int ddr_interface_num, bdk_lmcx_rlevel_rankx_t lmc_rlevel_rank, int rank, int score)
+{
+ do_display_RL(node, ddr_interface_num, lmc_rlevel_rank, rank, 3, score);
+}
+#endif
+
+static inline void
+display_RL_with_final(bdk_node_t node, int ddr_interface_num, bdk_lmcx_rlevel_rankx_t lmc_rlevel_rank, int rank)
+{
+ do_display_RL(node, ddr_interface_num, lmc_rlevel_rank, rank, 4, 0);
+}
+
+static inline void
+display_RL_with_computed(bdk_node_t node, int ddr_interface_num, bdk_lmcx_rlevel_rankx_t lmc_rlevel_rank, int rank, int score)
+{
+ do_display_RL(node, ddr_interface_num, lmc_rlevel_rank, rank, 9, score);
+}
+
+// flag values
+#define WITH_RODT_BLANK 0
+#define WITH_RODT_SKIPPING 1
+#define WITH_RODT_BESTROW 2
+#define WITH_RODT_BESTSCORE 3
+// control
+#define SKIP_SKIPPING 1
+
+static const char *with_rodt_canned_msgs[4] = { " ", "SKIPPING ", "BEST ROW ", "BEST SCORE" };
+
+static void display_RL_with_RODT(bdk_node_t node, int ddr_interface_num,
+ bdk_lmcx_rlevel_rankx_t lmc_rlevel_rank, int rank, int score,
+ int nom_ohms, int rodt_ohms, int flag)
+{
+ const char *msg_buf;
+ char set_buf[20];
+#if SKIP_SKIPPING
+ if (flag == WITH_RODT_SKIPPING) return;
+#endif
+ msg_buf = with_rodt_canned_msgs[flag];
+ if (nom_ohms < 0) {
+ snprintf(set_buf, sizeof(set_buf), " RODT %3d ", rodt_ohms);
+ } else {
+ snprintf(set_buf, sizeof(set_buf), "NOM %3d RODT %3d", nom_ohms, rodt_ohms);
+ }
+
+ VB_PRT(VBL_TME, "N%d.LMC%d.R%d: Rlevel %s %s : %5d %5d %5d %5d %5d %5d %5d %5d %5d (%d)\n",
+ node, ddr_interface_num, rank,
+ set_buf, msg_buf,
+ lmc_rlevel_rank.cn83xx.byte8,
+ lmc_rlevel_rank.cn83xx.byte7,
+ lmc_rlevel_rank.cn83xx.byte6,
+ lmc_rlevel_rank.cn83xx.byte5,
+ lmc_rlevel_rank.cn83xx.byte4,
+ lmc_rlevel_rank.cn83xx.byte3,
+ lmc_rlevel_rank.cn83xx.byte2,
+ lmc_rlevel_rank.cn83xx.byte1,
+ lmc_rlevel_rank.cn83xx.byte0,
+ score
+ );
+
+ // FIXME: does this help make the output a little easier to focus?
+ if (flag == WITH_RODT_BESTSCORE) {
+ VB_PRT(VBL_DEV, "-----------\n");
+ }
+}
+
+static void
+do_display_WL(bdk_node_t node, int ddr_interface_num, bdk_lmcx_wlevel_rankx_t lmc_wlevel_rank, int rank, int flags)
+{
+ char *msg_buf;
+ char hex_buf[20];
+ int vbl;
+ if (flags & WITH_FINAL) {
+ msg_buf = " FINAL SETTINGS ";
+ vbl = VBL_NORM;
+ } else {
+ snprintf(hex_buf, sizeof(hex_buf), "0x%016lX", lmc_wlevel_rank.u);
+ msg_buf = hex_buf;
+ vbl = VBL_FAE;
+ }
+
+ VB_PRT(vbl, "N%d.LMC%d.R%d: Wlevel Rank %#4x, %s : %5d %5d %5d %5d %5d %5d %5d %5d %5d\n",
+ node, ddr_interface_num, rank,
+ lmc_wlevel_rank.s.status,
+ msg_buf,
+ lmc_wlevel_rank.s.byte8,
+ lmc_wlevel_rank.s.byte7,
+ lmc_wlevel_rank.s.byte6,
+ lmc_wlevel_rank.s.byte5,
+ lmc_wlevel_rank.s.byte4,
+ lmc_wlevel_rank.s.byte3,
+ lmc_wlevel_rank.s.byte2,
+ lmc_wlevel_rank.s.byte1,
+ lmc_wlevel_rank.s.byte0
+ );
+}
+
+static inline void
+display_WL(bdk_node_t node, int ddr_interface_num, bdk_lmcx_wlevel_rankx_t lmc_wlevel_rank, int rank)
+{
+ do_display_WL(node, ddr_interface_num, lmc_wlevel_rank, rank, WITH_NOTHING);
+}
+
+static inline void
+display_WL_with_final(bdk_node_t node, int ddr_interface_num, bdk_lmcx_wlevel_rankx_t lmc_wlevel_rank, int rank)
+{
+ do_display_WL(node, ddr_interface_num, lmc_wlevel_rank, rank, WITH_FINAL);
+}
+
+// pretty-print bitmask adjuster
+static uint64_t
+PPBM(uint64_t bm)
+{
+ if (bm != 0ul) {
+ while ((bm & 0x0fful) == 0ul)
+ bm >>= 4;
+ }
+ return bm;
+}
+
+// xlate PACKED index to UNPACKED index to use with rlevel_byte
+#define XPU(i,e) (((i) < 4)?(i):((i)<8)?(i)+(e):4)
+// xlate UNPACKED index to PACKED index to use with rlevel_bitmask
+#define XUP(i,e) (((i) < 4)?(i):((i)>4)?(i)-(e):8)
+
+// flag values
+#define WITH_WL_BITMASKS 0
+#define WITH_RL_BITMASKS 1
+#define WITH_RL_MASK_SCORES 2
+#define WITH_RL_SEQ_SCORES 3
+static void
+do_display_BM(bdk_node_t node, int ddr_interface_num, int rank, void *bm, int flags, int ecc_ena)
+{
+ int ecc = !!ecc_ena;
+ if (flags == WITH_WL_BITMASKS) { // wlevel_bitmask array in PACKED index order, so just print them
+ int *bitmasks = (int *)bm;
+
+ ddr_print("N%d.LMC%d.R%d: Wlevel Debug Results : %05x %05x %05x %05x %05x %05x %05x %05x %05x\n",
+ node, ddr_interface_num, rank,
+ bitmasks[8],
+ bitmasks[7],
+ bitmasks[6],
+ bitmasks[5],
+ bitmasks[4],
+ bitmasks[3],
+ bitmasks[2],
+ bitmasks[1],
+ bitmasks[0]
+ );
+ } else
+ if (flags == WITH_RL_BITMASKS) { // rlevel_bitmask array in PACKED index order, so just print them
+ rlevel_bitmask_t *rlevel_bitmask = (rlevel_bitmask_t *)bm;
+ ddr_print("N%d.LMC%d.R%d: Rlevel Debug Bitmasks 8:0 : %05lx %05lx %05lx %05lx %05lx %05lx %05lx %05lx %05lx\n",
+ node, ddr_interface_num, rank,
+ PPBM(rlevel_bitmask[8].bm),
+ PPBM(rlevel_bitmask[7].bm),
+ PPBM(rlevel_bitmask[6].bm),
+ PPBM(rlevel_bitmask[5].bm),
+ PPBM(rlevel_bitmask[4].bm),
+ PPBM(rlevel_bitmask[3].bm),
+ PPBM(rlevel_bitmask[2].bm),
+ PPBM(rlevel_bitmask[1].bm),
+ PPBM(rlevel_bitmask[0].bm)
+ );
+ } else
+ if (flags == WITH_RL_MASK_SCORES) { // rlevel_bitmask array in PACKED index order, so just print them
+ rlevel_bitmask_t *rlevel_bitmask = (rlevel_bitmask_t *)bm;
+ ddr_print("N%d.LMC%d.R%d: Rlevel Debug Bitmask Scores 8:0 : %5d %5d %5d %5d %5d %5d %5d %5d %5d\n",
+ node, ddr_interface_num, rank,
+ rlevel_bitmask[8].errs,
+ rlevel_bitmask[7].errs,
+ rlevel_bitmask[6].errs,
+ rlevel_bitmask[5].errs,
+ rlevel_bitmask[4].errs,
+ rlevel_bitmask[3].errs,
+ rlevel_bitmask[2].errs,
+ rlevel_bitmask[1].errs,
+ rlevel_bitmask[0].errs
+ );
+ } else
+ if (flags == WITH_RL_SEQ_SCORES) { // rlevel_byte array in UNPACKED index order, so xlate and print them
+ rlevel_byte_data_t *rlevel_byte = (rlevel_byte_data_t *)bm;
+ ddr_print("N%d.LMC%d.R%d: Rlevel Debug Non-seq Scores 8:0 : %5d %5d %5d %5d %5d %5d %5d %5d %5d\n",
+ node, ddr_interface_num, rank,
+ rlevel_byte[XPU(8,ecc)].sqerrs,
+ rlevel_byte[XPU(7,ecc)].sqerrs,
+ rlevel_byte[XPU(6,ecc)].sqerrs,
+ rlevel_byte[XPU(5,ecc)].sqerrs,
+ rlevel_byte[XPU(4,ecc)].sqerrs,
+ rlevel_byte[XPU(3,ecc)].sqerrs,
+ rlevel_byte[XPU(2,ecc)].sqerrs,
+ rlevel_byte[XPU(1,ecc)].sqerrs,
+ rlevel_byte[XPU(0,ecc)].sqerrs
+ );
+ }
+}
+
+static inline void
+display_WL_BM(bdk_node_t node, int ddr_interface_num, int rank, int *bitmasks)
+{
+ do_display_BM(node, ddr_interface_num, rank, (void *)bitmasks, WITH_WL_BITMASKS, 0);
+}
+
+static inline void
+display_RL_BM(bdk_node_t node, int ddr_interface_num, int rank, rlevel_bitmask_t *bitmasks, int ecc_ena)
+{
+ do_display_BM(node, ddr_interface_num, rank, (void *)bitmasks, WITH_RL_BITMASKS, ecc_ena);
+}
+
+static inline void
+display_RL_BM_scores(bdk_node_t node, int ddr_interface_num, int rank, rlevel_bitmask_t *bitmasks, int ecc_ena)
+{
+ do_display_BM(node, ddr_interface_num, rank, (void *)bitmasks, WITH_RL_MASK_SCORES, ecc_ena);
+}
+
+static inline void
+display_RL_SEQ_scores(bdk_node_t node, int ddr_interface_num, int rank, rlevel_byte_data_t *bytes, int ecc_ena)
+{
+ do_display_BM(node, ddr_interface_num, rank, (void *)bytes, WITH_RL_SEQ_SCORES, ecc_ena);
+}
+
+unsigned short load_dll_offset(bdk_node_t node, int ddr_interface_num,
+ int dll_offset_mode, int byte_offset, int byte)
+{
+ bdk_lmcx_dll_ctl3_t ddr_dll_ctl3;
+ /* byte_sel:
+ 0x1 = byte 0, ..., 0x9 = byte 8
+ 0xA = all bytes */
+ int byte_sel = (byte == 10) ? byte : byte + 1;
+
+ ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num));
+ SET_DDR_DLL_CTL3(load_offset, 0);
+ DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num), ddr_dll_ctl3.u);
+ ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num));
+
+ SET_DDR_DLL_CTL3(mode_sel, dll_offset_mode);
+ SET_DDR_DLL_CTL3(offset, (_abs(byte_offset)&0x3f) | (_sign(byte_offset) << 6)); /* Always 6-bit field? */
+ SET_DDR_DLL_CTL3(byte_sel, byte_sel);
+ DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num), ddr_dll_ctl3.u);
+ ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num));
+
+ SET_DDR_DLL_CTL3(load_offset, 1);
+ DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num), ddr_dll_ctl3.u);
+ ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num));
+
+ return ((unsigned short) GET_DDR_DLL_CTL3(offset));
+}
+
+void change_dll_offset_enable(bdk_node_t node, int ddr_interface_num, int change)
+{
+ bdk_lmcx_dll_ctl3_t ddr_dll_ctl3;
+
+ ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num));
+ SET_DDR_DLL_CTL3(offset_ena, !!change);
+ DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num), ddr_dll_ctl3.u);
+ ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num));
+}
+
+static void process_custom_dll_offsets(bdk_node_t node, int ddr_interface_num, const char *enable_str,
+ const int8_t *offsets, const char *byte_str, int mode)
+{
+ const char *s;
+ int enabled;
+ int provided;
+
+ if ((s = lookup_env_parameter("%s", enable_str)) != NULL) {
+ enabled = !!strtol(s, NULL, 0);
+ } else
+ enabled = -1;
+
+ // enabled == -1: no override, do only configured offsets if provided
+ // enabled == 0: override OFF, do NOT do it even if configured offsets provided
+ // enabled == 1: override ON, do it for overrides plus configured offsets
+
+ if (enabled == 0)
+ return;
+
+ provided = is_dll_offset_provided(offsets);
+
+ if (enabled < 0 && !provided)
+ return;
+
+ int byte_offset;
+ unsigned short offset[9] = {0};
+ int byte;
+
+ // offsets need to be disabled while loading
+ change_dll_offset_enable(node, ddr_interface_num, 0);
+
+ for (byte = 0; byte < 9; ++byte) {
+
+ // always take the provided, if available
+ byte_offset = (provided) ? offsets[byte] : 0;
+
+ // then, if enabled, use any overrides present
+ if (enabled > 0) {
+ if ((s = lookup_env_parameter(byte_str, ddr_interface_num, byte)) != NULL) {
+ byte_offset = strtol(s, NULL, 0);
+ }
+ }
+
+ offset[byte] = load_dll_offset(node, ddr_interface_num, mode, byte_offset, byte);
+ }
+
+ // re-enable offsets after loading
+ change_dll_offset_enable(node, ddr_interface_num, 1);
+
+ ddr_print("N%d.LMC%d: DLL %s Offset 8:0 :"
+ " 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x\n",
+ node, ddr_interface_num, (mode == 2) ? "Read " : "Write",
+ offset[8], offset[7], offset[6], offset[5], offset[4],
+ offset[3], offset[2], offset[1], offset[0]);
+}
+
+void perform_octeon3_ddr3_sequence(bdk_node_t node, int rank_mask, int ddr_interface_num, int sequence)
+{
+ /*
+ * 3. Without changing any other fields in LMC(0)_CONFIG, write
+ * LMC(0)_CONFIG[RANKMASK] then write both
+ * LMC(0)_SEQ_CTL[SEQ_SEL,INIT_START] = 1 with a single CSR write
+ * operation. LMC(0)_CONFIG[RANKMASK] bits should be set to indicate
+ * the ranks that will participate in the sequence.
+ *
+ * The LMC(0)_SEQ_CTL[SEQ_SEL] value should select power-up/init or
+ * selfrefresh exit, depending on whether the DRAM parts are in
+ * self-refresh and whether their contents should be preserved. While
+ * LMC performs these sequences, it will not perform any other DDR3
+ * transactions. When the sequence is complete, hardware sets the
+ * LMC(0)_CONFIG[INIT_STATUS] bits for the ranks that have been
+ * initialized.
+ *
+ * If power-up/init is selected immediately following a DRESET
+ * assertion, LMC executes the sequence described in the "Reset and
+ * Initialization Procedure" section of the JEDEC DDR3
+ * specification. This includes activating CKE, writing all four DDR3
+ * mode registers on all selected ranks, and issuing the required ZQCL
+ * command. The LMC(0)_CONFIG[RANKMASK] value should select all ranks
+ * with attached DRAM in this case. If LMC(0)_CONTROL[RDIMM_ENA] = 1,
+ * LMC writes the JEDEC standard SSTE32882 control words selected by
+ * LMC(0)_DIMM_CTL[DIMM*_WMASK] between DDR_CKE* signal assertion and
+ * the first DDR3 mode register write operation.
+ * LMC(0)_DIMM_CTL[DIMM*_WMASK] should be cleared to 0 if the
+ * corresponding DIMM is not present.
+ *
+ * If self-refresh exit is selected, LMC executes the required SRX
+ * command followed by a refresh and ZQ calibration. Section 4.5
+ * describes behavior of a REF + ZQCS. LMC does not write the DDR3
+ * mode registers as part of this sequence, and the mode register
+ * parameters must match at self-refresh entry and exit times.
+ *
+ * 4. Read LMC(0)_SEQ_CTL and wait for LMC(0)_SEQ_CTL[SEQ_COMPLETE] to be
+ * set.
+ *
+ * 5. Read LMC(0)_CONFIG[INIT_STATUS] and confirm that all ranks have
+ * been initialized.
+ */
+
+ const char *s;
+ static const char *sequence_str[] = {
+ "Power-up/init",
+ "Read-leveling",
+ "Self-refresh entry",
+ "Self-refresh exit",
+ "Illegal",
+ "Illegal",
+ "Write-leveling",
+ "Init Register Control Words",
+ "Mode Register Write",
+ "MPR Register Access",
+ "LMC Deskew/Internal Vref training",
+ "Offset Training"
+ };
+
+ bdk_lmcx_seq_ctl_t seq_ctl;
+ bdk_lmcx_config_t lmc_config;
+
+ lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(ddr_interface_num));
+ lmc_config.s.rankmask = rank_mask;
+ DRAM_CSR_WRITE(node, BDK_LMCX_CONFIG(ddr_interface_num), lmc_config.u);
+
+ seq_ctl.u = 0;
+
+ seq_ctl.s.init_start = 1;
+ seq_ctl.s.seq_sel = sequence;
+
+ VB_PRT(VBL_SEQ, "N%d.LMC%d: Performing LMC sequence=%x: rank_mask=0x%02x, %s\n",
+ node, ddr_interface_num, sequence, rank_mask, sequence_str[sequence]);
+
+ if ((s = lookup_env_parameter("ddr_trigger_sequence%d", sequence)) != NULL) {
+ int trigger = strtoul(s, NULL, 0);
+ if (trigger)
+ pulse_gpio_pin(node, 1, 2);
+ }
+
+ DRAM_CSR_WRITE(node, BDK_LMCX_SEQ_CTL(ddr_interface_num), seq_ctl.u);
+ BDK_CSR_READ(node, BDK_LMCX_SEQ_CTL(ddr_interface_num));
+
+ /* Wait 100us minimum before checking for sequence complete */
+ bdk_wait_usec(100);
+ if (!bdk_is_platform(BDK_PLATFORM_ASIM) &&
+ BDK_CSR_WAIT_FOR_FIELD(node, BDK_LMCX_SEQ_CTL(ddr_interface_num), seq_complete, ==, 1, 1000000))
+ {
+ error_print("N%d.LMC%d: Timeout waiting for LMC sequence=%x, rank_mask=0x%02x, ignoring...\n",
+ node, ddr_interface_num, sequence, rank_mask);
+ }
+ else {
+ VB_PRT(VBL_SEQ, "N%d.LMC%d: LMC sequence=%x: Completed.\n", node, ddr_interface_num, sequence);
+ }
+}
+
+void ddr4_mrw(bdk_node_t node, int ddr_interface_num, int rank,
+ int mr_wr_addr, int mr_wr_sel, int mr_wr_bg1)
+{
+ bdk_lmcx_mr_mpr_ctl_t lmc_mr_mpr_ctl;
+
+ lmc_mr_mpr_ctl.u = 0;
+ lmc_mr_mpr_ctl.s.mr_wr_addr = (mr_wr_addr == -1) ? 0 : mr_wr_addr;
+ lmc_mr_mpr_ctl.s.mr_wr_sel = mr_wr_sel;
+ lmc_mr_mpr_ctl.s.mr_wr_rank = rank;
+ //lmc_mr_mpr_ctl.s.mr_wr_pda_mask =
+ //lmc_mr_mpr_ctl.s.mr_wr_pda_enable =
+ //lmc_mr_mpr_ctl.s.mpr_loc =
+ //lmc_mr_mpr_ctl.s.mpr_wr =
+ //lmc_mr_mpr_ctl.s.mpr_bit_select =
+ //lmc_mr_mpr_ctl.s.mpr_byte_select =
+ //lmc_mr_mpr_ctl.s.mpr_whole_byte_enable =
+ lmc_mr_mpr_ctl.s.mr_wr_use_default_value = (mr_wr_addr == -1) ? 1 : 0;
+ lmc_mr_mpr_ctl.s.mr_wr_bg1 = mr_wr_bg1;
+ DRAM_CSR_WRITE(node, BDK_LMCX_MR_MPR_CTL(ddr_interface_num), lmc_mr_mpr_ctl.u);
+
+ /* Mode Register Write */
+ perform_octeon3_ddr3_sequence(node, 1 << rank, ddr_interface_num, 0x8);
+}
+
+#define InvA0_17(x) (x ^ 0x22bf8)
+static void set_mpr_mode (bdk_node_t node, int rank_mask,
+ int ddr_interface_num, int dimm_count, int mpr, int bg1)
+{
+ int rankx;
+
+ ddr_print("All Ranks: Set mpr mode = %x %c-side\n",
+ mpr, (bg1==0) ? 'A' : 'B');
+
+ for (rankx = 0; rankx < dimm_count*4; rankx++) {
+ if (!(rank_mask & (1 << rankx)))
+ continue;
+ if (bg1 == 0)
+ ddr4_mrw(node, ddr_interface_num, rankx, mpr<<2, 3, bg1); /* MR3 A-side */
+ else
+ ddr4_mrw(node, ddr_interface_num, rankx, InvA0_17(mpr<<2), ~3, bg1); /* MR3 B-side */
+ }
+}
+
+#if ENABLE_DISPLAY_MPR_PAGE
+static void do_ddr4_mpr_read(bdk_node_t node, int ddr_interface_num, int rank,
+ int page, int location)
+{
+ bdk_lmcx_mr_mpr_ctl_t lmc_mr_mpr_ctl;
+
+ lmc_mr_mpr_ctl.u = BDK_CSR_READ(node, BDK_LMCX_MR_MPR_CTL(ddr_interface_num));
+
+ lmc_mr_mpr_ctl.s.mr_wr_addr = 0;
+ lmc_mr_mpr_ctl.s.mr_wr_sel = page; /* Page */
+ lmc_mr_mpr_ctl.s.mr_wr_rank = rank;
+ //lmc_mr_mpr_ctl.s.mr_wr_pda_mask =
+ //lmc_mr_mpr_ctl.s.mr_wr_pda_enable =
+ lmc_mr_mpr_ctl.s.mpr_loc = location;
+ lmc_mr_mpr_ctl.s.mpr_wr = 0; /* Read=0, Write=1 */
+ //lmc_mr_mpr_ctl.s.mpr_bit_select =
+ //lmc_mr_mpr_ctl.s.mpr_byte_select =
+ //lmc_mr_mpr_ctl.s.mpr_whole_byte_enable =
+ //lmc_mr_mpr_ctl.s.mr_wr_use_default_value =
+ //lmc_mr_mpr_ctl.s.mr_wr_bg1 =
+
+ DRAM_CSR_WRITE(node, BDK_LMCX_MR_MPR_CTL(ddr_interface_num), lmc_mr_mpr_ctl.u);
+
+ /* MPR register access sequence */
+ perform_octeon3_ddr3_sequence(node, 1 << rank, ddr_interface_num, 0x9);
+
+ debug_print("LMC_MR_MPR_CTL : 0x%016lx\n", lmc_mr_mpr_ctl.u);
+ debug_print("lmc_mr_mpr_ctl.s.mr_wr_addr: 0x%02x\n", lmc_mr_mpr_ctl.s.mr_wr_addr);
+ debug_print("lmc_mr_mpr_ctl.s.mr_wr_sel : 0x%02x\n", lmc_mr_mpr_ctl.s.mr_wr_sel);
+ debug_print("lmc_mr_mpr_ctl.s.mpr_loc : 0x%02x\n", lmc_mr_mpr_ctl.s.mpr_loc);
+ debug_print("lmc_mr_mpr_ctl.s.mpr_wr : 0x%02x\n", lmc_mr_mpr_ctl.s.mpr_wr);
+
+}
+#endif
+
+int set_rdimm_mode(bdk_node_t node, int ddr_interface_num, int enable)
+{
+ bdk_lmcx_control_t lmc_control;
+ int save_rdimm_mode;
+
+ lmc_control.u = BDK_CSR_READ(node, BDK_LMCX_CONTROL(ddr_interface_num));
+ save_rdimm_mode = lmc_control.s.rdimm_ena;
+ lmc_control.s.rdimm_ena = enable;
+ VB_PRT(VBL_FAE, "Setting RDIMM_ENA = %x\n", enable);
+ DRAM_CSR_WRITE(node, BDK_LMCX_CONTROL(ddr_interface_num), lmc_control.u);
+
+ return (save_rdimm_mode);
+}
+
+#if ENABLE_DISPLAY_MPR_PAGE
+static void ddr4_mpr_read(bdk_node_t node, int ddr_interface_num, int rank,
+ int page, int location, uint64_t *mpr_data)
+{
+ do_ddr4_mpr_read(node, ddr_interface_num, rank, page, location);
+
+ mpr_data[0] = BDK_CSR_READ(node, BDK_LMCX_MPR_DATA0(ddr_interface_num));
+ mpr_data[1] = BDK_CSR_READ(node, BDK_LMCX_MPR_DATA1(ddr_interface_num));
+ mpr_data[2] = BDK_CSR_READ(node, BDK_LMCX_MPR_DATA2(ddr_interface_num));
+
+ debug_print("MPR Read %016lx.%016lx.%016lx\n", mpr_data[2], mpr_data[1], mpr_data[0]);
+}
+
+/* Display MPR values for Page Location */
+static void Display_MPR_Page_Location(bdk_node_t node, int rank,
+ int ddr_interface_num, int dimm_count,
+ int page, int location, uint64_t *mpr_data)
+{
+ ddr4_mpr_read(node, ddr_interface_num, rank, page, location, mpr_data);
+ ddr_print("MPR Page %d, Loc %d %016lx.%016lx.%016lx\n",
+ page, location, mpr_data[2], mpr_data[1], mpr_data[0]);
+}
+
+/* Display MPR values for Page */
+static void Display_MPR_Page(bdk_node_t node, int rank_mask,
+ int ddr_interface_num, int dimm_count, int page)
+{
+ int rankx;
+ uint64_t mpr_data[3];
+
+ for (rankx = 0; rankx < dimm_count * 4;rankx++) {
+ if (!(rank_mask & (1 << rankx)))
+ continue;
+
+ ddr_print("Rank %d: MPR values for Page %d\n", rankx, page);
+ for (int location = 0; location < 4; location++) {
+ Display_MPR_Page_Location(node, rankx, ddr_interface_num, dimm_count,
+ page, location, &mpr_data[0]);
+ }
+
+ } /* for (rankx = 0; rankx < dimm_count * 4; rankx++) */
+}
+#endif
+
+void ddr4_mpr_write(bdk_node_t node, int ddr_interface_num, int rank,
+ int page, int location, uint8_t mpr_data)
+{
+ bdk_lmcx_mr_mpr_ctl_t lmc_mr_mpr_ctl;
+
+ lmc_mr_mpr_ctl.u = 0;
+ lmc_mr_mpr_ctl.s.mr_wr_addr = mpr_data;
+ lmc_mr_mpr_ctl.s.mr_wr_sel = page; /* Page */
+ lmc_mr_mpr_ctl.s.mr_wr_rank = rank;
+ //lmc_mr_mpr_ctl.s.mr_wr_pda_mask =
+ //lmc_mr_mpr_ctl.s.mr_wr_pda_enable =
+ lmc_mr_mpr_ctl.s.mpr_loc = location;
+ lmc_mr_mpr_ctl.s.mpr_wr = 1; /* Read=0, Write=1 */
+ //lmc_mr_mpr_ctl.s.mpr_bit_select =
+ //lmc_mr_mpr_ctl.s.mpr_byte_select =
+ //lmc_mr_mpr_ctl.s.mpr_whole_byte_enable =
+ //lmc_mr_mpr_ctl.s.mr_wr_use_default_value =
+ //lmc_mr_mpr_ctl.s.mr_wr_bg1 =
+ DRAM_CSR_WRITE(node, BDK_LMCX_MR_MPR_CTL(ddr_interface_num), lmc_mr_mpr_ctl.u);
+
+ /* MPR register access sequence */
+ perform_octeon3_ddr3_sequence(node, (1 << rank), ddr_interface_num, 0x9);
+
+ debug_print("LMC_MR_MPR_CTL : 0x%016lx\n", lmc_mr_mpr_ctl.u);
+ debug_print("lmc_mr_mpr_ctl.s.mr_wr_addr: 0x%02x\n", lmc_mr_mpr_ctl.s.mr_wr_addr);
+ debug_print("lmc_mr_mpr_ctl.s.mr_wr_sel : 0x%02x\n", lmc_mr_mpr_ctl.s.mr_wr_sel);
+ debug_print("lmc_mr_mpr_ctl.s.mpr_loc : 0x%02x\n", lmc_mr_mpr_ctl.s.mpr_loc);
+ debug_print("lmc_mr_mpr_ctl.s.mpr_wr : 0x%02x\n", lmc_mr_mpr_ctl.s.mpr_wr);
+}
+
+void set_vref(bdk_node_t node, int ddr_interface_num, int rank,
+ int range, int value)
+{
+ bdk_lmcx_mr_mpr_ctl_t lmc_mr_mpr_ctl;
+ bdk_lmcx_modereg_params3_t lmc_modereg_params3;
+ int mr_wr_addr = 0;
+
+ lmc_mr_mpr_ctl.u = 0;
+ lmc_modereg_params3.u = BDK_CSR_READ(node, BDK_LMCX_MODEREG_PARAMS3(ddr_interface_num));
+
+ mr_wr_addr |= lmc_modereg_params3.s.tccd_l<<10; /* A12:A10 tCCD_L */
+ mr_wr_addr |= 1<<7; /* A7 1 = Enable(Training Mode) */
+ mr_wr_addr |= range<<6; /* A6 VrefDQ Training Range */
+ mr_wr_addr |= value<<0; /* A5:A0 VrefDQ Training Value */
+
+ lmc_mr_mpr_ctl.s.mr_wr_addr = mr_wr_addr;
+ lmc_mr_mpr_ctl.s.mr_wr_sel = 6; /* Write MR6 */
+ lmc_mr_mpr_ctl.s.mr_wr_rank = rank;
+ //lmc_mr_mpr_ctl.s.mr_wr_pda_mask =
+ //lmc_mr_mpr_ctl.s.mr_wr_pda_enable =
+ //lmc_mr_mpr_ctl.s.mpr_loc = location;
+ //lmc_mr_mpr_ctl.s.mpr_wr = 0; /* Read=0, Write=1 */
+ //lmc_mr_mpr_ctl.s.mpr_bit_select =
+ //lmc_mr_mpr_ctl.s.mpr_byte_select =
+ //lmc_mr_mpr_ctl.s.mpr_whole_byte_enable =
+ //lmc_mr_mpr_ctl.s.mr_wr_use_default_value =
+ //lmc_mr_mpr_ctl.s.mr_wr_bg1 =
+ DRAM_CSR_WRITE(node, BDK_LMCX_MR_MPR_CTL(ddr_interface_num), lmc_mr_mpr_ctl.u);
+
+ /* 0x8 = Mode Register Write */
+ perform_octeon3_ddr3_sequence(node, 1<<rank, ddr_interface_num, 0x8);
+
+ /* It is vendor specific whether Vref_value is captured with A7=1.
+ A subsequent MRS might be necessary. */
+ perform_octeon3_ddr3_sequence(node, 1<<rank, ddr_interface_num, 0x8);
+
+ mr_wr_addr &= ~(1<<7); /* A7 0 = Disable(Training Mode) */
+ lmc_mr_mpr_ctl.s.mr_wr_addr = mr_wr_addr;
+ DRAM_CSR_WRITE(node, BDK_LMCX_MR_MPR_CTL(ddr_interface_num), lmc_mr_mpr_ctl.u);
+}
+
+static void set_DRAM_output_inversion (bdk_node_t node,
+ int ddr_interface_num,
+ int dimm_count,
+ int rank_mask,
+ int inversion)
+{
+ bdk_lmcx_ddr4_dimm_ctl_t lmc_ddr4_dimm_ctl;
+ bdk_lmcx_dimmx_params_t lmc_dimmx_params;
+ bdk_lmcx_dimm_ctl_t lmc_dimm_ctl;
+ int dimm_no;
+
+ lmc_ddr4_dimm_ctl.u = 0; /* Don't touch extended register control words */
+ DRAM_CSR_WRITE(node, BDK_LMCX_DDR4_DIMM_CTL(ddr_interface_num), lmc_ddr4_dimm_ctl.u);
+
+ ddr_print("All DIMMs: Register Control Word RC0 : %x\n", (inversion & 1));
+
+ for (dimm_no = 0; dimm_no < dimm_count; ++dimm_no) {
+ lmc_dimmx_params.u = BDK_CSR_READ(node, BDK_LMCX_DIMMX_PARAMS(ddr_interface_num, dimm_no));
+ lmc_dimmx_params.s.rc0 = (lmc_dimmx_params.s.rc0 & ~1) | (inversion & 1);
+ DRAM_CSR_WRITE(node, BDK_LMCX_DIMMX_PARAMS(ddr_interface_num, dimm_no), lmc_dimmx_params.u);
+ }
+
+ /* LMC0_DIMM_CTL */
+ lmc_dimm_ctl.u = BDK_CSR_READ(node, BDK_LMCX_DIMM_CTL(ddr_interface_num));
+ lmc_dimm_ctl.s.dimm0_wmask = 0x1;
+ lmc_dimm_ctl.s.dimm1_wmask = (dimm_count > 1) ? 0x0001 : 0x0000;
+
+ ddr_print("LMC DIMM_CTL : 0x%016lx\n",
+ lmc_dimm_ctl.u);
+ DRAM_CSR_WRITE(node, BDK_LMCX_DIMM_CTL(ddr_interface_num), lmc_dimm_ctl.u);
+
+ perform_octeon3_ddr3_sequence(node, rank_mask, ddr_interface_num, 0x7 ); /* Init RCW */
+}
+
+static void write_mpr_page0_pattern (bdk_node_t node, int rank_mask,
+ int ddr_interface_num, int dimm_count, int pattern, int location_mask)
+{
+ int rankx;
+ int location;
+
+ for (rankx = 0; rankx < dimm_count*4; rankx++) {
+ if (!(rank_mask & (1 << rankx)))
+ continue;
+ for (location = 0; location < 4; ++location) {
+ if (!(location_mask & (1 << location)))
+ continue;
+
+ ddr4_mpr_write(node, ddr_interface_num, rankx,
+ /* page */ 0, /* location */ location, pattern);
+ }
+ }
+}
+
+static void change_rdimm_mpr_pattern (bdk_node_t node, int rank_mask,
+ int ddr_interface_num, int dimm_count)
+{
+ int save_ref_zqcs_int;
+ bdk_lmcx_config_t lmc_config;
+
+ /*
+ Okay, here is the latest sequence. This should work for all
+ chips and passes (78,88,73,etc). This sequence should be run
+ immediately after DRAM INIT. The basic idea is to write the
+ same pattern into each of the 4 MPR locations in the DRAM, so
+ that the same value is returned when doing MPR reads regardless
+ of the inversion state. My advice is to put this into a
+ function, change_rdimm_mpr_pattern or something like that, so
+ that it can be called multiple times, as I think David wants a
+ clock-like pattern for OFFSET training, but does not want a
+ clock pattern for Bit-Deskew. You should then be able to call
+ this at any point in the init sequence (after DRAM init) to
+ change the pattern to a new value.
+ Mike
+
+ A correction: PHY doesn't need any pattern during offset
+ training, but needs clock like pattern for internal vref and
+ bit-dskew training. So for that reason, these steps below have
+ to be conducted before those trainings to pre-condition
+ the pattern. David
+
+ Note: Step 3, 4, 8 and 9 have to be done through RDIMM
+ sequence. If you issue MRW sequence to do RCW write (in o78 pass
+ 1 at least), LMC will still do two commands because
+ CONTROL[RDIMM_ENA] is still set high. We don't want it to have
+ any unintentional mode register write so it's best to do what
+ Mike is doing here.
+ Andrew
+ */
+
+
+ /* 1) Disable refresh (REF_ZQCS_INT = 0) */
+
+ debug_print("1) Disable refresh (REF_ZQCS_INT = 0)\n");
+
+ lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(ddr_interface_num));
+ save_ref_zqcs_int = lmc_config.s.ref_zqcs_int;
+ lmc_config.s.ref_zqcs_int = 0;
+ DRAM_CSR_WRITE(node, BDK_LMCX_CONFIG(ddr_interface_num), lmc_config.u);
+
+
+ /* 2) Put all devices in MPR mode (Run MRW sequence (sequence=8)
+ with MODEREG_PARAMS0[MPRLOC]=0,
+ MODEREG_PARAMS0[MPR]=1, MR_MPR_CTL[MR_WR_SEL]=3, and
+ MR_MPR_CTL[MR_WR_USE_DEFAULT_VALUE]=1) */
+
+ debug_print("2) Put all devices in MPR mode (Run MRW sequence (sequence=8)\n");
+
+ set_mpr_mode(node, rank_mask, ddr_interface_num, dimm_count, /* mpr */ 1, /* bg1 */ 0); /* A-side */
+ set_mpr_mode(node, rank_mask, ddr_interface_num, dimm_count, /* mpr */ 1, /* bg1 */ 1); /* B-side */
+
+ /* a. Or you can set MR_MPR_CTL[MR_WR_USE_DEFAULT_VALUE]=0 and set
+ the value you would like directly into
+ MR_MPR_CTL[MR_WR_ADDR] */
+
+ /* 3) Disable RCD Parity (if previously enabled) - parity does not
+ work if inversion disabled */
+
+ debug_print("3) Disable RCD Parity\n");
+
+ /* 4) Disable Inversion in the RCD. */
+ /* a. I did (3&4) via the RDIMM sequence (seq_sel=7), but it
+ may be easier to use the MRW sequence (seq_sel=8). Just set
+ MR_MPR_CTL[MR_WR_SEL]=7, MR_MPR_CTL[MR_WR_ADDR][3:0]=data,
+ MR_MPR_CTL[MR_WR_ADDR][7:4]=RCD reg */
+
+ debug_print("4) Disable Inversion in the RCD.\n");
+
+ set_DRAM_output_inversion(node, ddr_interface_num, dimm_count, rank_mask,
+ 1 /* 1=disable output inversion*/);
+
+ /* 5) Disable CONTROL[RDIMM_ENA] so that MR sequence goes out
+ non-inverted. */
+
+ debug_print("5) Disable CONTROL[RDIMM_ENA]\n");
+
+ set_rdimm_mode(node, ddr_interface_num, 0);
+
+ /* 6) Write all 4 MPR registers with the desired pattern (have to
+ do this for all enabled ranks) */
+ /* a. MR_MPR_CTL.MPR_WR=1, MR_MPR_CTL.MPR_LOC=0..3,
+ MR_MPR_CTL.MR_WR_SEL=0, MR_MPR_CTL.MR_WR_ADDR[7:0]=pattern */
+
+ debug_print("6) Write all 4 MPR page 0 Training Patterns\n");
+
+ write_mpr_page0_pattern(node, rank_mask,
+ ddr_interface_num, dimm_count, 0x55, 0x8);
+
+ /* 7) Re-enable RDIMM_ENA */
+
+ debug_print("7) Re-enable RDIMM_ENA\n");
+
+ set_rdimm_mode(node, ddr_interface_num, 1);
+
+ /* 8) Re-enable RDIMM inversion */
+
+ debug_print("8) Re-enable RDIMM inversion\n");
+
+ set_DRAM_output_inversion(node, ddr_interface_num, dimm_count, rank_mask,
+ 0 /* 0=re-enable output inversion*/);
+
+ /* 9) Re-enable RDIMM parity (if desired) */
+
+ debug_print("9) Re-enable RDIMM parity (if desired)\n");
+
+ /* 10)Take B-side devices out of MPR mode (Run MRW sequence
+ (sequence=8) with MODEREG_PARAMS0[MPRLOC]=0,
+ MODEREG_PARAMS0[MPR]=0, MR_MPR_CTL[MR_WR_SEL]=3, and
+ MR_MPR_CTL[MR_WR_USE_DEFAULT_VALUE]=1) */
+
+ debug_print("10)Take B-side devices out of MPR mode\n");
+
+ set_mpr_mode(node, rank_mask, ddr_interface_num, dimm_count, /* mpr */ 0, /* bg1 */ 1);
+
+ /* a. Or you can set MR_MPR_CTL[MR_WR_USE_DEFAULT_VALUE]=0 and
+ set the value you would like directly into
+ MR_MPR_CTL[MR_WR_ADDR] */
+
+ /* 11)Re-enable refresh (REF_ZQCS_INT=previous value) */
+
+ debug_print("11)Re-enable refresh (REF_ZQCS_INT=previous value)\n");
+
+ lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(ddr_interface_num));
+ lmc_config.s.ref_zqcs_int = save_ref_zqcs_int;
+ DRAM_CSR_WRITE(node, BDK_LMCX_CONFIG(ddr_interface_num), lmc_config.u);
+
+}
+
+static unsigned char ddr4_rodt_ohms [RODT_OHMS_COUNT ] = { 0, 40, 60, 80, 120, 240, 34, 48 };
+static unsigned char ddr4_rtt_nom_ohms [RTT_NOM_OHMS_COUNT ] = { 0, 60, 120, 40, 240, 48, 80, 34 };
+static unsigned char ddr4_rtt_nom_table [RTT_NOM_TABLE_COUNT ] = { 0, 4, 2, 6, 1, 5, 3, 7 };
+static unsigned char ddr4_rtt_wr_ohms [RTT_WR_OHMS_COUNT ] = { 0, 120, 240, 99, 80 }; // setting HiZ ohms to 99 for computed vref
+static unsigned char ddr4_dic_ohms [DIC_OHMS_COUNT ] = { 34, 48 };
+static short ddr4_drive_strength[DRIVE_STRENGTH_COUNT] = { 0, 0, 26, 30, 34, 40, 48, 68, 0,0,0,0,0,0,0 };
+static short ddr4_dqx_strength [DRIVE_STRENGTH_COUNT] = { 0, 24, 27, 30, 34, 40, 48, 60, 0,0,0,0,0,0,0 };
+
+impedence_values_t ddr4_impedence_values = {
+ .rodt_ohms = ddr4_rodt_ohms ,
+ .rtt_nom_ohms = ddr4_rtt_nom_ohms ,
+ .rtt_nom_table = ddr4_rtt_nom_table ,
+ .rtt_wr_ohms = ddr4_rtt_wr_ohms ,
+ .dic_ohms = ddr4_dic_ohms ,
+ .drive_strength = ddr4_drive_strength,
+ .dqx_strength = ddr4_dqx_strength ,
+};
+
+static unsigned char ddr3_rodt_ohms [RODT_OHMS_COUNT ] = { 0, 20, 30, 40, 60, 120, 0, 0 };
+static unsigned char ddr3_rtt_nom_ohms [RTT_NOM_OHMS_COUNT ] = { 0, 60, 120, 40, 20, 30, 0, 0 };
+static unsigned char ddr3_rtt_nom_table [RTT_NOM_TABLE_COUNT ] = { 0, 2, 1, 3, 5, 4, 0, 0 };
+static unsigned char ddr3_rtt_wr_ohms [RTT_WR_OHMS_COUNT ] = { 0, 60, 120 };
+static unsigned char ddr3_dic_ohms [DIC_OHMS_COUNT ] = { 40, 34 };
+static short ddr3_drive_strength[DRIVE_STRENGTH_COUNT] = { 0, 24, 27, 30, 34, 40, 48, 60, 0,0,0,0,0,0,0 };
+static impedence_values_t ddr3_impedence_values = {
+ .rodt_ohms = ddr3_rodt_ohms ,
+ .rtt_nom_ohms = ddr3_rtt_nom_ohms ,
+ .rtt_nom_table = ddr3_rtt_nom_table ,
+ .rtt_wr_ohms = ddr3_rtt_wr_ohms ,
+ .dic_ohms = ddr3_dic_ohms ,
+ .drive_strength = ddr3_drive_strength,
+ .dqx_strength = ddr3_drive_strength,
+};
+
+
+uint64_t
+hertz_to_psecs(uint64_t hertz)
+{
+ return divide_nint((uint64_t) 1000*1000*1000*1000, hertz); /* Clock in psecs */
+}
+
+#define DIVIDEND_SCALE 1000 /* Scale to avoid rounding error. */
+uint64_t
+psecs_to_mts(uint64_t psecs)
+{
+ //ddr_print("psecs %ld, divisor %ld\n", psecs, divide_nint((uint64_t)(2 * 1000000 * DIVIDEND_SCALE), psecs));
+ return divide_nint(divide_nint((uint64_t)(2 * 1000000 * DIVIDEND_SCALE), psecs), DIVIDEND_SCALE);
+}
+
+#define WITHIN(v,b,m) (((v)>=((b)-(m)))&&((v)<=((b)+(m))))
+
+// pretty-print version, only works with what comes from the SPD: tCKmin or tCKAVGmin
+unsigned long
+pretty_psecs_to_mts(uint64_t psecs)
+{
+ uint64_t ret = 0; // default to error
+ if (WITHIN(psecs, 1250, 1))
+ ret = 1600;
+ else if (WITHIN(psecs, 1071, 1))
+ ret = 1866;
+ else if (WITHIN(psecs, 937, 1))
+ ret = 2133;
+ else if (WITHIN(psecs, 833, 1))
+ ret = 2400;
+ else if (WITHIN(psecs, 750, 1))
+ ret = 2666;
+ return ret;
+}
+
+uint64_t
+mts_to_hertz(uint64_t mts)
+{
+ return ((mts * 1000 * 1000) / 2);
+}
+
+#define DEBUG_RC3X_COMPUTE 0
+#define rc3x_print(...) \
+ do { if (DEBUG_RC3X_COMPUTE) printf(__VA_ARGS__); } while (0)
+
+static int compute_rc3x (int64_t tclk_psecs)
+{
+ long speed;
+ long tclk_psecs_min, tclk_psecs_max;
+ long data_rate_mhz, data_rate_mhz_min, data_rate_mhz_max;
+ int rc3x;
+
+#define ENCODING_BASE 1240
+
+ data_rate_mhz = psecs_to_mts(tclk_psecs);
+
+ /* 2400 MT/s is a special case. Using integer arithmetic it rounds
+ from 833 psecs to 2401 MT/s. Force it to 2400 to pick the
+ proper setting from the table. */
+ if (tclk_psecs == 833)
+ data_rate_mhz = 2400;
+
+ for (speed = ENCODING_BASE; speed < 3200; speed += 20) {
+ int error = 0;
+
+ tclk_psecs_min = hertz_to_psecs(mts_to_hertz(speed + 00)); /* Clock in psecs */
+ tclk_psecs_max = hertz_to_psecs(mts_to_hertz(speed + 18)); /* Clock in psecs */
+
+ data_rate_mhz_min = psecs_to_mts(tclk_psecs_min);
+ data_rate_mhz_max = psecs_to_mts(tclk_psecs_max);
+
+ /* Force alingment to multiple to avound rounding errors. */
+ data_rate_mhz_min = ((data_rate_mhz_min + 18) / 20) * 20;
+ data_rate_mhz_max = ((data_rate_mhz_max + 18) / 20) * 20;
+
+ error += (speed + 00 != data_rate_mhz_min);
+ error += (speed + 20 != data_rate_mhz_max);
+
+ rc3x = (speed - ENCODING_BASE) / 20;
+
+ rc3x_print("rc3x: %02x speed: %4ld MT/s < f <= %4ld MT/s, psec: %3ld:%3ld %4ld:%4ld %s\n",
+ rc3x,
+ speed, speed + 20,
+ tclk_psecs_min, tclk_psecs_max,
+ data_rate_mhz_min, data_rate_mhz_max,
+ error ? "****" : "");
+
+ if (data_rate_mhz <= (speed + 20)) {
+ rc3x_print("rc3x: %4ld MT/s <= %4ld MT/s\n", data_rate_mhz, speed + 20);
+ break;
+ }
+ }
+ return rc3x;
+}
+
+static const int rlevel_separate_ab = 1;
+
+int init_octeon3_ddr3_interface(bdk_node_t node,
+ const ddr_configuration_t *ddr_configuration,
+ uint32_t ddr_hertz,
+ uint32_t cpu_hertz,
+ uint32_t ddr_ref_hertz,
+ int board_type,
+ int board_rev_maj,
+ int board_rev_min,
+ int ddr_interface_num,
+ uint32_t ddr_interface_mask
+ )
+{
+ const char *s;
+
+ const dimm_odt_config_t *odt_1rank_config = ddr_configuration->odt_1rank_config;
+ const dimm_odt_config_t *odt_2rank_config = ddr_configuration->odt_2rank_config;
+ const dimm_odt_config_t *odt_4rank_config = ddr_configuration->odt_4rank_config;
+ const dimm_config_t *dimm_config_table = ddr_configuration->dimm_config_table;
+ const dimm_odt_config_t *odt_config;
+ const ddr3_custom_config_t *custom_lmc_config = &ddr_configuration->custom_lmc_config;
+ int odt_idx;
+
+ /*
+ ** Compute clock rates to the nearest picosecond.
+ */
+ uint64_t tclk_psecs = hertz_to_psecs(ddr_hertz); /* Clock in psecs */
+ uint64_t eclk_psecs = hertz_to_psecs(cpu_hertz); /* Clock in psecs */
+
+ int row_bits, col_bits, num_banks, num_ranks, dram_width;
+ int dimm_count = 0;
+ int fatal_error = 0; /* Accumulate and report all the errors before giving up */
+
+ int safe_ddr_flag = 0; /* Flag that indicates safe DDR settings should be used */
+ int ddr_interface_64b = 1; /* THUNDER Default: 64bit interface width */
+ int ddr_interface_bytemask;
+ uint32_t mem_size_mbytes = 0;
+ unsigned int didx;
+ int bank_bits = 0;
+ int bunk_enable;
+ int rank_mask;
+ int column_bits_start = 1;
+ int row_lsb;
+ int pbank_lsb;
+ int use_ecc = 1;
+ int mtb_psec = 0; /* quiet */
+ short ftb_Dividend;
+ short ftb_Divisor;
+ int tAAmin;
+ int tCKmin;
+ int CL, min_cas_latency = 0, max_cas_latency = 0, override_cas_latency = 0;
+ int ddr_rtt_nom_auto, ddr_rodt_ctl_auto;
+ int i;
+
+ int spd_addr;
+ int spd_org;
+ int spd_banks;
+ int spd_rdimm;
+ int spd_dimm_type;
+ int spd_ecc;
+ uint32_t spd_cas_latency;
+ int spd_mtb_dividend;
+ int spd_mtb_divisor;
+ int spd_tck_min;
+ int spd_taa_min;
+ int spd_twr;
+ int spd_trcd;
+ int spd_trrd;
+ int spd_trp;
+ int spd_tras;
+ int spd_trc;
+ int spd_trfc;
+ int spd_twtr;
+ int spd_trtp;
+ int spd_tfaw;
+ int spd_addr_mirror;
+ int spd_package = 0;
+ int spd_rawcard = 0;
+ int spd_rawcard_AorB = 0;
+ int is_stacked_die = 0;
+ int disable_stacked_die = 0;
+ int is_3ds_dimm = 0; // 3DS
+ int lranks_per_prank = 1; // 3DS: logical ranks per package rank
+ int lranks_bits = 0; // 3DS: logical ranks bits
+ int die_capacity = 0; // in Mbits; only used for 3DS
+
+ /* FTB values are two's complement ranging from +127 to -128. */
+ typedef signed char SC_t;
+
+ int twr;
+ int trcd;
+ int trrd;
+ int trp;
+ int tras;
+ int trc;
+ int trfc;
+ int twtr;
+ int trtp = 0; /* quiet */
+ int tfaw;
+
+ int wlevel_bitmask_errors = 0;
+ int wlevel_loops;
+ int default_rtt_nom[4];
+ int dyn_rtt_nom_mask = 0;
+
+ ddr_type_t ddr_type;
+ int ddr4_tCKAVGmin = 0; /* quiet */
+ int ddr4_tCKAVGmax = 0; /* quiet */
+ int ddr4_tRCDmin = 0; /* quiet */
+ int ddr4_tRPmin = 0; /* quiet */
+ int ddr4_tRASmin = 0; /* quiet */
+ int ddr4_tRCmin = 0; /* quiet */
+ int ddr4_tRFC1min = 0; /* quiet */
+ int ddr4_tRFC2min = 0; /* quiet */
+ int ddr4_tRFC4min = 0; /* quiet */
+ int ddr4_tFAWmin = 0; /* quiet */
+ int ddr4_tRRD_Smin = 0; /* quiet */
+ int ddr4_tRRD_Lmin;
+ int ddr4_tCCD_Lmin;
+ impedence_values_t *imp_values;
+ int default_rodt_ctl;
+ // default to disabled (ie, LMC restart, not chip reset)
+ int ddr_disable_chip_reset = 1;
+ int disable_deskew_training = 0;
+ const char *dimm_type_name;
+
+ /* Allow the Write bit-deskew feature to be enabled when desired. */
+ // NOTE: THUNDER pass 2.x only, 81xx, 83xx
+ int enable_write_deskew = ENABLE_WRITE_DESKEW_DEFAULT;
+
+#if SWL_TRY_HWL_ALT
+ typedef struct {
+ uint16_t hwl_alt_mask; // mask of bytelanes with alternate
+ uint16_t hwl_alt_delay[9]; // bytelane alternate avail if mask=1
+ } hwl_alt_by_rank_t;
+ hwl_alt_by_rank_t hwl_alts[4];
+ memset(hwl_alts, 0, sizeof(hwl_alts));
+#endif /* SWL_TRY_HWL_ALT */
+
+ bdk_lmcx_config_t lmc_config;
+
+ /* Initialize these to shut up the compiler. They are configured
+ and used only for DDR4 */
+ ddr4_tRRD_Lmin = 6000;
+ ddr4_tCCD_Lmin = 6000;
+
+ ddr_print("\nInitializing node %d DDR interface %d, DDR Clock %d, DDR Reference Clock %d\n",
+ node, ddr_interface_num, ddr_hertz, ddr_ref_hertz);
+
+ if (dimm_config_table[0].spd_addr == 0 && !dimm_config_table[0].spd_ptr) {
+ error_print("ERROR: No dimms specified in the dimm_config_table.\n");
+ return (-1);
+ }
+
+ // allow some overrides to be done
+
+ // this one controls whether chip RESET is done, or LMC init restarted from step 6.9.6
+ if ((s = lookup_env_parameter("ddr_disable_chip_reset")) != NULL) {
+ ddr_disable_chip_reset = !!strtoul(s, NULL, 0);
+ }
+ // this one controls whether Deskew Training is performed
+ if ((s = lookup_env_parameter("ddr_disable_deskew_training")) != NULL) {
+ disable_deskew_training = !!strtoul(s, NULL, 0);
+ }
+ // this one is in Validate_Read_Deskew_Training and controls a preliminary delay
+ if ((s = lookup_env_parameter("ddr_deskew_validation_delay")) != NULL) {
+ deskew_validation_delay = strtoul(s, NULL, 0);
+ }
+ // this one is in Perform_Read_Deskew_Training and controls lock retries
+ if ((s = lookup_env_parameter("ddr_lock_retries")) != NULL) {
+ default_lock_retry_limit = strtoul(s, NULL, 0);
+ }
+ // this one controls whether stacked die status can affect processing
+ // disabling it will affect computed vref adjustment, and rodt_row_skip_mask
+ if ((s = lookup_env_parameter("ddr_disable_stacked_die")) != NULL) {
+ disable_stacked_die = !!strtoul(s, NULL, 0);
+ }
+
+ // setup/override for write bit-deskew feature
+ if (! CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X)) { // added 81xx and 83xx
+ // FIXME: allow override
+ if ((s = lookup_env_parameter("ddr_enable_write_deskew")) != NULL) {
+ enable_write_deskew = !!strtoul(s, NULL, 0);
+ } // else take default setting
+ } else { // not pass 2.x
+ enable_write_deskew = 0; // force disabled
+ }
+
+#if 0 // FIXME: do we really need this anymore?
+ if (dram_is_verbose(VBL_NORM)) {
+ printf("DDR SPD Table:");
+ for (didx = 0; didx < DDR_CFG_T_MAX_DIMMS; ++didx) {
+ if (dimm_config_table[didx].spd_addr == 0) break;
+ printf(" --ddr%dspd=0x%02x", ddr_interface_num, dimm_config_table[didx].spd_addr);
+ }
+ printf("\n");
+ }
+#endif
+
+ /*
+ ** Walk the DRAM Socket Configuration Table to see what is installed.
+ */
+ for (didx = 0; didx < DDR_CFG_T_MAX_DIMMS; ++didx)
+ {
+ /* Check for lower DIMM socket populated */
+ if (validate_dimm(node, &dimm_config_table[didx]) == 1) {
+ // NOTE: DIMM info printing is now done later when more details are available
+ ++dimm_count;
+ } else { break; } /* Finished when there is no lower DIMM */
+ }
+
+
+ initialize_ddr_clock(node,
+ ddr_configuration,
+ cpu_hertz,
+ ddr_hertz,
+ ddr_ref_hertz,
+ ddr_interface_num,
+ ddr_interface_mask);
+
+ if (!odt_1rank_config)
+ odt_1rank_config = disable_odt_config;
+ if (!odt_2rank_config)
+ odt_2rank_config = disable_odt_config;
+ if (!odt_4rank_config)
+ odt_4rank_config = disable_odt_config;
+
+ if ((s = lookup_env_parameter("ddr_safe")) != NULL) {
+ safe_ddr_flag = !!strtoul(s, NULL, 0);
+ }
+
+
+ if (dimm_count == 0) {
+ error_print("ERROR: DIMM 0 not detected.\n");
+ return(-1);
+ }
+
+ // look for 32-bit mode specified in the config
+ if (custom_lmc_config->mode32b) {
+ ddr_interface_64b = 0;
+ }
+
+ if (ddr_interface_64b == 0) { // check if 32-bit mode is bad
+ if (!CAVIUM_IS_MODEL(CAVIUM_CN81XX)) {
+ error_print("32-bit interface width is NOT supported for this Thunder model\n");
+ ddr_interface_64b = 1; // force to 64-bit
+ }
+ } else { // check if 64-bit mode is bad
+ if (CAVIUM_IS_MODEL(CAVIUM_CN81XX)) { // check the fuses on 81XX for forced 32-bit mode
+ BDK_CSR_INIT(mio_fus_dat2, node, BDK_MIO_FUS_DAT2);
+ if (mio_fus_dat2.s.lmc_mode32) {
+ error_print("32-bit interface width is ONLY supported for this Thunder model\n");
+ ddr_interface_64b = 0; // force to 32-bit
+ }
+ }
+ }
+
+ // finally, say we are in 32-bit mode when it has been validated
+ if (ddr_interface_64b == 0) {
+ ddr_print("N%d.LMC%d: Setting 32-bit data width\n",
+ node, ddr_interface_num);
+ }
+
+ /* ddr_type only indicates DDR4 or DDR3 */
+ ddr_type = get_ddr_type(node, &dimm_config_table[0]);
+ debug_print("DRAM Device Type: DDR%d\n", ddr_type);
+
+ spd_dimm_type = get_dimm_module_type(node, &dimm_config_table[0], ddr_type);
+
+ if (ddr_type == DDR4_DRAM) {
+ int spd_module_type;
+ int asymmetric;
+ const char *signal_load[4] = {"", "MLS", "3DS", "RSV"};
+
+ imp_values = &ddr4_impedence_values;
+ dimm_type_name = ddr4_dimm_types[spd_dimm_type];
+
+ spd_addr = read_spd(node, &dimm_config_table[0], DDR4_SPD_ADDRESSING_ROW_COL_BITS);
+ spd_org = read_spd(node, &dimm_config_table[0], DDR4_SPD_MODULE_ORGANIZATION);
+ spd_banks = 0xFF & read_spd(node, &dimm_config_table[0], DDR4_SPD_DENSITY_BANKS);
+
+ bank_bits = (2 + ((spd_banks >> 4) & 0x3)) + ((spd_banks >> 6) & 0x3);
+ bank_bits = min((int)bank_bits, 4); /* Controller can only address 4 bits. */
+
+ spd_package = 0XFF & read_spd(node, &dimm_config_table[0], DDR4_SPD_PACKAGE_TYPE);
+ if (spd_package & 0x80) { // non-monolithic device
+ is_stacked_die = (!disable_stacked_die) ? ((spd_package & 0x73) == 0x11) : 0;
+ ddr_print("DDR4: Package Type 0x%x (%s), %d die\n", spd_package,
+ signal_load[(spd_package & 3)], ((spd_package >> 4) & 7) + 1);
+ is_3ds_dimm = ((spd_package & 3) == 2); // is it 3DS?
+ if (is_3ds_dimm) { // is it 3DS?
+ lranks_per_prank = ((spd_package >> 4) & 7) + 1;
+ // FIXME: should make sure it is only 2H or 4H or 8H?
+ lranks_bits = lranks_per_prank >> 1;
+ if (lranks_bits == 4) lranks_bits = 3;
+ }
+ } else if (spd_package != 0) {
+ // FIXME: print non-zero monolithic device definition
+ ddr_print("DDR4: Package Type MONOLITHIC: %d die, signal load %d\n",
+ ((spd_package >> 4) & 7) + 1, (spd_package & 3));
+ }
+
+ asymmetric = (spd_org >> 6) & 1;
+ if (asymmetric) {
+ int spd_secondary_pkg = read_spd(node, &dimm_config_table[0],
+ DDR4_SPD_SECONDARY_PACKAGE_TYPE);
+ ddr_print("DDR4: Module Organization: ASYMMETRICAL: Secondary Package Type 0x%x\n",
+ spd_secondary_pkg);
+ } else {
+ uint64_t bus_width = 8 << (0x07 & read_spd(node, &dimm_config_table[0],
+ DDR4_SPD_MODULE_MEMORY_BUS_WIDTH));
+ uint64_t ddr_width = 4 << ((spd_org >> 0) & 0x7);
+ uint64_t module_cap;
+ int shift = (spd_banks & 0x0F);
+ die_capacity = (shift < 8) ? (256UL << shift) : ((12UL << (shift & 1)) << 10);
+ ddr_print("DDR4: Module Organization: SYMMETRICAL: capacity per die %d %cbit\n",
+ (die_capacity > 512) ? (die_capacity >> 10) : die_capacity,
+ (die_capacity > 512) ? 'G' : 'M');
+ module_cap = ((uint64_t)die_capacity << 20) / 8UL * bus_width / ddr_width *
+ /* no. pkg ranks*/(1UL + ((spd_org >> 3) & 0x7));
+ if (is_3ds_dimm) // is it 3DS?
+ module_cap *= /* die_count */(uint64_t)(((spd_package >> 4) & 7) + 1);
+ ddr_print("DDR4: Module Organization: SYMMETRICAL: capacity per module %ld GB\n",
+ module_cap >> 30);
+ }
+
+ spd_rawcard = 0xFF & read_spd(node, &dimm_config_table[0], DDR4_SPD_REFERENCE_RAW_CARD);
+ ddr_print("DDR4: Reference Raw Card 0x%x \n", spd_rawcard);
+
+ spd_module_type = read_spd(node, &dimm_config_table[0], DDR4_SPD_KEY_BYTE_MODULE_TYPE);
+ if (spd_module_type & 0x80) { // HYBRID module
+ ddr_print("DDR4: HYBRID module, type %s\n",
+ ((spd_module_type & 0x70) == 0x10) ? "NVDIMM" : "UNKNOWN");
+ }
+
+ spd_dimm_type = spd_module_type & 0x0F;
+ spd_rdimm = (spd_dimm_type == 1) || (spd_dimm_type == 5) || (spd_dimm_type == 8);
+ if (spd_rdimm) {
+ int spd_mfgr_id = read_spd(node, &dimm_config_table[0], DDR4_SPD_REGISTER_MANUFACTURER_ID_LSB) |
+ (read_spd(node, &dimm_config_table[0], DDR4_SPD_REGISTER_MANUFACTURER_ID_MSB) << 8);
+ int spd_register_rev = read_spd(node, &dimm_config_table[0], DDR4_SPD_REGISTER_REVISION_NUMBER);
+ ddr_print("DDR4: RDIMM Register Manufacturer ID 0x%x Revision 0x%x\n",
+ spd_mfgr_id, spd_register_rev);
+
+ // RAWCARD A or B must be bit 7=0 and bits 4-0 either 00000(A) or 00001(B)
+ spd_rawcard_AorB = ((spd_rawcard & 0x9fUL) <= 1);
+ }
+ } else {
+ imp_values = &ddr3_impedence_values;
+ dimm_type_name = ddr3_dimm_types[spd_dimm_type];
+
+ spd_addr = read_spd(node, &dimm_config_table[0], DDR3_SPD_ADDRESSING_ROW_COL_BITS);
+ spd_org = read_spd(node, &dimm_config_table[0], DDR3_SPD_MODULE_ORGANIZATION);
+ spd_banks = read_spd(node, &dimm_config_table[0], DDR3_SPD_DENSITY_BANKS) & 0xff;
+
+ bank_bits = 3 + ((spd_banks >> 4) & 0x7);
+ bank_bits = min((int)bank_bits, 3); /* Controller can only address 3 bits. */
+
+ spd_rdimm = (spd_dimm_type == 1) || (spd_dimm_type == 5) || (spd_dimm_type == 9);
+ }
+
+#if 0 // FIXME: why should this be possible OR needed?
+ if ((s = lookup_env_parameter("ddr_rdimm_ena")) != NULL) {
+ spd_rdimm = !!strtoul(s, NULL, 0);
+ }
+#endif
+
+ debug_print("spd_addr : %#06x\n", spd_addr );
+ debug_print("spd_org : %#06x\n", spd_org );
+ debug_print("spd_banks : %#06x\n", spd_banks );
+
+ row_bits = 12 + ((spd_addr >> 3) & 0x7);
+ col_bits = 9 + ((spd_addr >> 0) & 0x7);
+
+ num_ranks = 1 + ((spd_org >> 3) & 0x7);
+ dram_width = 4 << ((spd_org >> 0) & 0x7);
+ num_banks = 1 << bank_bits;
+
+ if ((s = lookup_env_parameter("ddr_num_ranks")) != NULL) {
+ num_ranks = strtoul(s, NULL, 0);
+ }
+
+ /* FIX
+ ** Check that values are within some theoretical limits.
+ ** col_bits(min) = row_lsb(min) - bank_bits(max) - bus_bits(max) = 14 - 3 - 4 = 7
+ ** col_bits(max) = row_lsb(max) - bank_bits(min) - bus_bits(min) = 18 - 2 - 3 = 13
+ */
+ if ((col_bits > 13) || (col_bits < 7)) {
+ error_print("Unsupported number of Col Bits: %d\n", col_bits);
+ ++fatal_error;
+ }
+
+ /* FIX
+ ** Check that values are within some theoretical limits.
+ ** row_bits(min) = pbank_lsb(min) - row_lsb(max) - rank_bits = 26 - 18 - 1 = 7
+ ** row_bits(max) = pbank_lsb(max) - row_lsb(min) - rank_bits = 33 - 14 - 1 = 18
+ */
+ if ((row_bits > 18) || (row_bits < 7)) {
+ error_print("Unsupported number of Row Bits: %d\n", row_bits);
+ ++fatal_error;
+ }
+
+ if (bdk_is_platform(BDK_PLATFORM_ASIM))
+ wlevel_loops = 0;
+ else {
+ wlevel_loops = WLEVEL_LOOPS_DEFAULT;
+ // accept generic or interface-specific override but not for ASIM...
+ if ((s = lookup_env_parameter("ddr_wlevel_loops")) == NULL)
+ s = lookup_env_parameter("ddr%d_wlevel_loops", ddr_interface_num);
+ if (s != NULL) {
+ wlevel_loops = strtoul(s, NULL, 0);
+ }
+ }
+
+ bunk_enable = (num_ranks > 1);
+
+ column_bits_start = 3;
+
+ row_lsb = column_bits_start + col_bits + bank_bits - (! ddr_interface_64b);
+ debug_print("row_lsb = column_bits_start + col_bits + bank_bits = %d\n", row_lsb);
+
+ pbank_lsb = row_lsb + row_bits + bunk_enable;
+ debug_print("pbank_lsb = row_lsb + row_bits + bunk_enable = %d\n", pbank_lsb);
+
+ if (lranks_per_prank > 1) {
+ pbank_lsb = row_lsb + row_bits + lranks_bits + bunk_enable;
+ ddr_print("DDR4: 3DS: pbank_lsb = (%d row_lsb) + (%d row_bits) + (%d lranks_bits) + (%d bunk_enable) = %d\n",
+ row_lsb, row_bits, lranks_bits, bunk_enable, pbank_lsb);
+ }
+
+ mem_size_mbytes = dimm_count * ((1ull << pbank_lsb) >> 20);
+ if (num_ranks == 4) {
+ /* Quad rank dimm capacity is equivalent to two dual-rank dimms. */
+ mem_size_mbytes *= 2;
+ }
+
+ /* Mask with 1 bits set for for each active rank, allowing 2 bits per dimm.
+ ** This makes later calculations simpler, as a variety of CSRs use this layout.
+ ** This init needs to be updated for dual configs (ie non-identical DIMMs).
+ ** Bit 0 = dimm0, rank 0
+ ** Bit 1 = dimm0, rank 1
+ ** Bit 2 = dimm1, rank 0
+ ** Bit 3 = dimm1, rank 1
+ ** ...
+ */
+ rank_mask = 0x1;
+ if (num_ranks > 1)
+ rank_mask = 0x3;
+ if (num_ranks > 2)
+ rank_mask = 0xf;
+
+ for (i = 1; i < dimm_count; i++)
+ rank_mask |= ((rank_mask & 0x3) << (2*i));
+
+
+#ifdef CAVIUM_ONLY
+ /* Special request: mismatched DIMM support. Slot 0: 2-Rank, Slot 1: 1-Rank */
+ if (0)
+ {
+ /*
+ ** Calculate the total memory size in terms of the total
+ ** number of ranks instead of the number of dimms. The usual
+ ** requirement is for both dimms to be identical. This check
+ ** works around that requirement to allow one exception. The
+ ** dimm in the second slot may now have fewer ranks than the
+ ** first slot.
+ */
+ int spd_org_dimm1;
+ int num_ranks_dimm1;
+ int rank_count;
+ int rank_mask_dimm1;
+
+ if (dimm_count > 1) {
+ spd_org_dimm1 = read_spd(node, &dimm_config_table[1] /* dimm 1*/,
+ DDR3_SPD_MODULE_ORGANIZATION);
+ num_ranks_dimm1 = 1 + ((spd_org_dimm1 >> 3) & 0x7);
+ rank_count = num_ranks/* dimm 0 */ + num_ranks_dimm1 /* dimm 1 */;
+
+ if (num_ranks != num_ranks_dimm1) {
+ mem_size_mbytes = rank_count * ((1ull << (pbank_lsb-bunk_enable)) >> 20);
+ rank_mask = 1 | ((num_ranks > 1) << 1);
+ rank_mask_dimm1 = 1 | ((num_ranks_dimm1 > 1) << 1);
+ rank_mask |= ((rank_mask_dimm1 & 0x3) << 2);
+ ddr_print("DIMM 1 - ranks: %d, size: %d MB\n",
+ num_ranks_dimm1, num_ranks_dimm1 * ((1ull << (pbank_lsb-bunk_enable)) >> 20));
+ }
+ }
+ }
+#endif /* CAVIUM_ONLY */
+
+ spd_ecc = get_dimm_ecc(node, &dimm_config_table[0], ddr_type);
+
+ VB_PRT(VBL_DEV, "Summary: - %d %s%s %dRx%d %s, row bits=%d, col bits=%d, bank bits=%d\n",
+ dimm_count, dimm_type_name, (dimm_count > 1) ? "s" : "",
+ num_ranks, dram_width, (spd_ecc) ? "ECC" : "non-ECC",
+ row_bits, col_bits, bank_bits);
+
+ // always print out the useful DIMM information...
+ for (i = 0; i < DDR_CFG_T_MAX_DIMMS; i++) {
+ if (i < dimm_count)
+ report_dimm(node, &dimm_config_table[i], i, ddr_interface_num,
+ num_ranks, dram_width, mem_size_mbytes / dimm_count);
+ else
+ if (validate_dimm(node, &dimm_config_table[i]) == 0) // only if there is a slot
+ printf("N%d.LMC%d.DIMM%d: Not Present\n", node, ddr_interface_num, i);
+ }
+
+ if (ddr_type == DDR4_DRAM) {
+ spd_cas_latency = ((0xff & read_spd(node, &dimm_config_table[0], DDR4_SPD_CAS_LATENCIES_BYTE0)) << 0);
+ spd_cas_latency |= ((0xff & read_spd(node, &dimm_config_table[0], DDR4_SPD_CAS_LATENCIES_BYTE1)) << 8);
+ spd_cas_latency |= ((0xff & read_spd(node, &dimm_config_table[0], DDR4_SPD_CAS_LATENCIES_BYTE2)) << 16);
+ spd_cas_latency |= ((0xff & read_spd(node, &dimm_config_table[0], DDR4_SPD_CAS_LATENCIES_BYTE3)) << 24);
+ } else {
+ spd_cas_latency = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_CAS_LATENCIES_LSB);
+ spd_cas_latency |= ((0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_CAS_LATENCIES_MSB)) << 8);
+ }
+ debug_print("spd_cas_latency : %#06x\n", spd_cas_latency );
+
+ if (ddr_type == DDR4_DRAM) {
+
+ /* No other values for DDR4 MTB and FTB are specified at the
+ * current time so don't bother reading them. Can't speculate how
+ * new values will be represented.
+ */
+ int spdMTB = 125;
+ int spdFTB = 1;
+
+ tAAmin
+ = spdMTB * read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_CAS_LATENCY_TAAMIN)
+ + spdFTB * (SC_t) read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_CAS_LATENCY_FINE_TAAMIN);
+
+ ddr4_tCKAVGmin
+ = spdMTB * read_spd(node, &dimm_config_table[0], DDR4_SPD_MINIMUM_CYCLE_TIME_TCKAVGMIN)
+ + spdFTB * (SC_t) read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_CYCLE_TIME_FINE_TCKAVGMIN);
+
+ ddr4_tCKAVGmax
+ = spdMTB * read_spd(node, &dimm_config_table[0], DDR4_SPD_MAXIMUM_CYCLE_TIME_TCKAVGMAX)
+ + spdFTB * (SC_t) read_spd(node, &dimm_config_table[0], DDR4_SPD_MAX_CYCLE_TIME_FINE_TCKAVGMAX);
+
+ ddr4_tRCDmin
+ = spdMTB * read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_RAS_CAS_DELAY_TRCDMIN)
+ + spdFTB * (SC_t) read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_RAS_TO_CAS_DELAY_FINE_TRCDMIN);
+
+ ddr4_tRPmin
+ = spdMTB * read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_ROW_PRECHARGE_DELAY_TRPMIN)
+ + spdFTB * (SC_t) read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_ROW_PRECHARGE_DELAY_FINE_TRPMIN);
+
+ ddr4_tRASmin
+ = spdMTB * (((read_spd(node, &dimm_config_table[0], DDR4_SPD_UPPER_NIBBLES_TRAS_TRC) & 0xf) << 8) +
+ ( read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_ACTIVE_PRECHARGE_LSB_TRASMIN) & 0xff));
+
+ ddr4_tRCmin
+ = spdMTB * ((((read_spd(node, &dimm_config_table[0], DDR4_SPD_UPPER_NIBBLES_TRAS_TRC) >> 4) & 0xf) << 8) +
+ ( read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_ACTIVE_REFRESH_LSB_TRCMIN) & 0xff))
+ + spdFTB * (SC_t) read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_ACT_TO_ACT_REFRESH_DELAY_FINE_TRCMIN);
+
+ ddr4_tRFC1min
+ = spdMTB * (((read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_REFRESH_RECOVERY_MSB_TRFC1MIN) & 0xff) << 8) +
+ ( read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_REFRESH_RECOVERY_LSB_TRFC1MIN) & 0xff));
+
+ ddr4_tRFC2min
+ = spdMTB * (((read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_REFRESH_RECOVERY_MSB_TRFC2MIN) & 0xff) << 8) +
+ ( read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_REFRESH_RECOVERY_LSB_TRFC2MIN) & 0xff));
+
+ ddr4_tRFC4min
+ = spdMTB * (((read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_REFRESH_RECOVERY_MSB_TRFC4MIN) & 0xff) << 8) +
+ ( read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_REFRESH_RECOVERY_LSB_TRFC4MIN) & 0xff));
+
+ ddr4_tFAWmin
+ = spdMTB * (((read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_FOUR_ACTIVE_WINDOW_MSN_TFAWMIN) & 0xf) << 8) +
+ ( read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_FOUR_ACTIVE_WINDOW_LSB_TFAWMIN) & 0xff));
+
+ ddr4_tRRD_Smin
+ = spdMTB * read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_ROW_ACTIVE_DELAY_SAME_TRRD_SMIN)
+ + spdFTB * (SC_t) read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_ACT_TO_ACT_DELAY_DIFF_FINE_TRRD_SMIN);
+
+ ddr4_tRRD_Lmin
+ = spdMTB * read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_ROW_ACTIVE_DELAY_DIFF_TRRD_LMIN)
+ + spdFTB * (SC_t) read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_ACT_TO_ACT_DELAY_SAME_FINE_TRRD_LMIN);
+
+ ddr4_tCCD_Lmin
+ = spdMTB * read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_CAS_TO_CAS_DELAY_TCCD_LMIN)
+ + spdFTB * (SC_t) read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_CAS_TO_CAS_DELAY_FINE_TCCD_LMIN);
+
+ ddr_print("%-45s : %6d ps\n", "Medium Timebase (MTB)", spdMTB);
+ ddr_print("%-45s : %6d ps\n", "Fine Timebase (FTB)", spdFTB);
+
+ #define DDR4_TWR 15000
+ #define DDR4_TWTR_S 2500
+
+
+ tCKmin = ddr4_tCKAVGmin;
+ twr = DDR4_TWR;
+ trcd = ddr4_tRCDmin;
+ trrd = ddr4_tRRD_Smin;
+ trp = ddr4_tRPmin;
+ tras = ddr4_tRASmin;
+ trc = ddr4_tRCmin;
+ trfc = ddr4_tRFC1min;
+ twtr = DDR4_TWTR_S;
+ tfaw = ddr4_tFAWmin;
+
+ if (spd_rdimm) {
+ spd_addr_mirror = read_spd(node, &dimm_config_table[0], DDR4_SPD_RDIMM_ADDR_MAPPING_FROM_REGISTER_TO_DRAM) & 0x1;
+ } else {
+ spd_addr_mirror = read_spd(node, &dimm_config_table[0], DDR4_SPD_UDIMM_ADDR_MAPPING_FROM_EDGE) & 0x1;
+ }
+ debug_print("spd_addr_mirror : %#06x\n", spd_addr_mirror );
+
+ } else { /* if (ddr_type == DDR4_DRAM) */
+ spd_mtb_dividend = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_MEDIUM_TIMEBASE_DIVIDEND);
+ spd_mtb_divisor = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_MEDIUM_TIMEBASE_DIVISOR);
+ spd_tck_min = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_MINIMUM_CYCLE_TIME_TCKMIN);
+ spd_taa_min = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_MIN_CAS_LATENCY_TAAMIN);
+
+ spd_twr = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_MIN_WRITE_RECOVERY_TWRMIN);
+ spd_trcd = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_MIN_RAS_CAS_DELAY_TRCDMIN);
+ spd_trrd = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_MIN_ROW_ACTIVE_DELAY_TRRDMIN);
+ spd_trp = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_MIN_ROW_PRECHARGE_DELAY_TRPMIN);
+ spd_tras = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_MIN_ACTIVE_PRECHARGE_LSB_TRASMIN);
+ spd_tras |= ((0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_UPPER_NIBBLES_TRAS_TRC)&0xf) << 8);
+ spd_trc = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_MIN_ACTIVE_REFRESH_LSB_TRCMIN);
+ spd_trc |= ((0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_UPPER_NIBBLES_TRAS_TRC)&0xf0) << 4);
+ spd_trfc = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_MIN_REFRESH_RECOVERY_LSB_TRFCMIN);
+ spd_trfc |= ((0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_MIN_REFRESH_RECOVERY_MSB_TRFCMIN)) << 8);
+ spd_twtr = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_MIN_INTERNAL_WRITE_READ_CMD_TWTRMIN);
+ spd_trtp = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_MIN_INTERNAL_READ_PRECHARGE_CMD_TRTPMIN);
+ spd_tfaw = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_MIN_FOUR_ACTIVE_WINDOW_TFAWMIN);
+ spd_tfaw |= ((0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_UPPER_NIBBLE_TFAW)&0xf) << 8);
+ spd_addr_mirror = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_ADDRESS_MAPPING) & 0x1;
+ spd_addr_mirror = spd_addr_mirror && !spd_rdimm; /* Only address mirror unbuffered dimms. */
+ ftb_Dividend = read_spd(node, &dimm_config_table[0], DDR3_SPD_FINE_TIMEBASE_DIVIDEND_DIVISOR) >> 4;
+ ftb_Divisor = read_spd(node, &dimm_config_table[0], DDR3_SPD_FINE_TIMEBASE_DIVIDEND_DIVISOR) & 0xf;
+ ftb_Divisor = (ftb_Divisor == 0) ? 1 : ftb_Divisor; /* Make sure that it is not 0 */
+
+ debug_print("spd_twr : %#06x\n", spd_twr );
+ debug_print("spd_trcd : %#06x\n", spd_trcd);
+ debug_print("spd_trrd : %#06x\n", spd_trrd);
+ debug_print("spd_trp : %#06x\n", spd_trp );
+ debug_print("spd_tras : %#06x\n", spd_tras);
+ debug_print("spd_trc : %#06x\n", spd_trc );
+ debug_print("spd_trfc : %#06x\n", spd_trfc);
+ debug_print("spd_twtr : %#06x\n", spd_twtr);
+ debug_print("spd_trtp : %#06x\n", spd_trtp);
+ debug_print("spd_tfaw : %#06x\n", spd_tfaw);
+ debug_print("spd_addr_mirror : %#06x\n", spd_addr_mirror);
+
+ mtb_psec = spd_mtb_dividend * 1000 / spd_mtb_divisor;
+ tAAmin = mtb_psec * spd_taa_min;
+ tAAmin += ftb_Dividend * (SC_t) read_spd(node, &dimm_config_table[0], DDR3_SPD_MIN_CAS_LATENCY_FINE_TAAMIN) / ftb_Divisor;
+ tCKmin = mtb_psec * spd_tck_min;
+ tCKmin += ftb_Dividend * (SC_t) read_spd(node, &dimm_config_table[0], DDR3_SPD_MINIMUM_CYCLE_TIME_FINE_TCKMIN) / ftb_Divisor;
+
+ twr = spd_twr * mtb_psec;
+ trcd = spd_trcd * mtb_psec;
+ trrd = spd_trrd * mtb_psec;
+ trp = spd_trp * mtb_psec;
+ tras = spd_tras * mtb_psec;
+ trc = spd_trc * mtb_psec;
+ trfc = spd_trfc * mtb_psec;
+ twtr = spd_twtr * mtb_psec;
+ trtp = spd_trtp * mtb_psec;
+ tfaw = spd_tfaw * mtb_psec;
+
+ } /* if (ddr_type == DDR4_DRAM) */
+
+ if (ddr_type == DDR4_DRAM) {
+ ddr_print("%-45s : %6d ps (%ld MT/s)\n", "SDRAM Minimum Cycle Time (tCKAVGmin)",ddr4_tCKAVGmin,
+ pretty_psecs_to_mts(ddr4_tCKAVGmin));
+ ddr_print("%-45s : %6d ps\n", "SDRAM Maximum Cycle Time (tCKAVGmax)", ddr4_tCKAVGmax);
+ ddr_print("%-45s : %6d ps\n", "Minimum CAS Latency Time (tAAmin)", tAAmin);
+ ddr_print("%-45s : %6d ps\n", "Minimum RAS to CAS Delay Time (tRCDmin)", ddr4_tRCDmin);
+ ddr_print("%-45s : %6d ps\n", "Minimum Row Precharge Delay Time (tRPmin)", ddr4_tRPmin);
+ ddr_print("%-45s : %6d ps\n", "Minimum Active to Precharge Delay (tRASmin)", ddr4_tRASmin);
+ ddr_print("%-45s : %6d ps\n", "Minimum Active to Active/Refr. Delay (tRCmin)", ddr4_tRCmin);
+ ddr_print("%-45s : %6d ps\n", "Minimum Refresh Recovery Delay (tRFC1min)", ddr4_tRFC1min);
+ ddr_print("%-45s : %6d ps\n", "Minimum Refresh Recovery Delay (tRFC2min)", ddr4_tRFC2min);
+ ddr_print("%-45s : %6d ps\n", "Minimum Refresh Recovery Delay (tRFC4min)", ddr4_tRFC4min);
+ ddr_print("%-45s : %6d ps\n", "Minimum Four Activate Window Time (tFAWmin)", ddr4_tFAWmin);
+ ddr_print("%-45s : %6d ps\n", "Minimum Act. to Act. Delay (tRRD_Smin)", ddr4_tRRD_Smin);
+ ddr_print("%-45s : %6d ps\n", "Minimum Act. to Act. Delay (tRRD_Lmin)", ddr4_tRRD_Lmin);
+ ddr_print("%-45s : %6d ps\n", "Minimum CAS to CAS Delay Time (tCCD_Lmin)", ddr4_tCCD_Lmin);
+ } else {
+ ddr_print("Medium Timebase (MTB) : %6d ps\n", mtb_psec);
+ ddr_print("Minimum Cycle Time (tCKmin) : %6d ps (%ld MT/s)\n", tCKmin,
+ pretty_psecs_to_mts(tCKmin));
+ ddr_print("Minimum CAS Latency Time (tAAmin) : %6d ps\n", tAAmin);
+ ddr_print("Write Recovery Time (tWR) : %6d ps\n", twr);
+ ddr_print("Minimum RAS to CAS delay (tRCD) : %6d ps\n", trcd);
+ ddr_print("Minimum Row Active to Row Active delay (tRRD) : %6d ps\n", trrd);
+ ddr_print("Minimum Row Precharge Delay (tRP) : %6d ps\n", trp);
+ ddr_print("Minimum Active to Precharge (tRAS) : %6d ps\n", tras);
+ ddr_print("Minimum Active to Active/Refresh Delay (tRC) : %6d ps\n", trc);
+ ddr_print("Minimum Refresh Recovery Delay (tRFC) : %6d ps\n", trfc);
+ ddr_print("Internal write to read command delay (tWTR) : %6d ps\n", twtr);
+ ddr_print("Min Internal Rd to Precharge Cmd Delay (tRTP) : %6d ps\n", trtp);
+ ddr_print("Minimum Four Activate Window Delay (tFAW) : %6d ps\n", tfaw);
+ }
+
+
+ /* When the cycle time is within 1 psec of the minimum accept it
+ as a slight rounding error and adjust it to exactly the minimum
+ cycle time. This avoids an unnecessary warning. */
+ if (_abs(tclk_psecs - tCKmin) < 2)
+ tclk_psecs = tCKmin;
+
+ if (tclk_psecs < (uint64_t)tCKmin) {
+ ddr_print("WARNING!!!!: DDR Clock Rate (tCLK: %ld) exceeds DIMM specifications (tCKmin: %ld)!!!!\n",
+ tclk_psecs, (uint64_t)tCKmin);
+ }
+
+
+ ddr_print("DDR Clock Rate (tCLK) : %6lu ps\n", tclk_psecs);
+ ddr_print("Core Clock Rate (eCLK) : %6lu ps\n", eclk_psecs);
+
+ if ((s = lookup_env_parameter("ddr_use_ecc")) != NULL) {
+ use_ecc = !!strtoul(s, NULL, 0);
+ }
+ use_ecc = use_ecc && spd_ecc;
+
+ ddr_interface_bytemask = ddr_interface_64b
+ ? (use_ecc ? 0x1ff : 0xff)
+ : (use_ecc ? 0x01f : 0x0f); // FIXME? 81xx does diff from 70xx
+
+ ddr_print("DRAM Interface width: %d bits %s bytemask 0x%x\n",
+ ddr_interface_64b ? 64 : 32, use_ecc ? "+ECC" : "",
+ ddr_interface_bytemask);
+
+ ddr_print("\n------ Board Custom Configuration Settings ------\n");
+ ddr_print("%-45s : %d\n", "MIN_RTT_NOM_IDX ", custom_lmc_config->min_rtt_nom_idx);
+ ddr_print("%-45s : %d\n", "MAX_RTT_NOM_IDX ", custom_lmc_config->max_rtt_nom_idx);
+ ddr_print("%-45s : %d\n", "MIN_RODT_CTL ", custom_lmc_config->min_rodt_ctl);
+ ddr_print("%-45s : %d\n", "MAX_RODT_CTL ", custom_lmc_config->max_rodt_ctl);
+ ddr_print("%-45s : %d\n", "MIN_CAS_LATENCY ", custom_lmc_config->min_cas_latency);
+ ddr_print("%-45s : %d\n", "OFFSET_EN ", custom_lmc_config->offset_en);
+ ddr_print("%-45s : %d\n", "OFFSET_UDIMM ", custom_lmc_config->offset_udimm);
+ ddr_print("%-45s : %d\n", "OFFSET_RDIMM ", custom_lmc_config->offset_rdimm);
+ ddr_print("%-45s : %d\n", "DDR_RTT_NOM_AUTO ", custom_lmc_config->ddr_rtt_nom_auto);
+ ddr_print("%-45s : %d\n", "DDR_RODT_CTL_AUTO ", custom_lmc_config->ddr_rodt_ctl_auto);
+ if (spd_rdimm)
+ ddr_print("%-45s : %d\n", "RLEVEL_COMP_OFFSET", custom_lmc_config->rlevel_comp_offset_rdimm);
+ else
+ ddr_print("%-45s : %d\n", "RLEVEL_COMP_OFFSET", custom_lmc_config->rlevel_comp_offset_udimm);
+ ddr_print("%-45s : %d\n", "RLEVEL_COMPUTE ", custom_lmc_config->rlevel_compute);
+ ddr_print("%-45s : %d\n", "DDR2T_UDIMM ", custom_lmc_config->ddr2t_udimm);
+ ddr_print("%-45s : %d\n", "DDR2T_RDIMM ", custom_lmc_config->ddr2t_rdimm);
+ ddr_print("%-45s : %d\n", "FPRCH2 ", custom_lmc_config->fprch2);
+ ddr_print("-------------------------------------------------\n");
+
+
+ CL = divide_roundup(tAAmin, tclk_psecs);
+
+ ddr_print("Desired CAS Latency : %6d\n", CL);
+
+ min_cas_latency = custom_lmc_config->min_cas_latency;
+
+
+ if ((s = lookup_env_parameter("ddr_min_cas_latency")) != NULL) {
+ min_cas_latency = strtoul(s, NULL, 0);
+ }
+
+ {
+ int base_CL;
+ ddr_print("CAS Latencies supported in DIMM :");
+ base_CL = (ddr_type == DDR4_DRAM) ? 7 : 4;
+ for (i=0; i<32; ++i) {
+ if ((spd_cas_latency >> i) & 1) {
+ ddr_print(" %d", i+base_CL);
+ max_cas_latency = i+base_CL;
+ if (min_cas_latency == 0)
+ min_cas_latency = i+base_CL;
+ }
+ }
+ ddr_print("\n");
+
+ /* Use relaxed timing when running slower than the minimum
+ supported speed. Adjust timing to match the smallest supported
+ CAS Latency. */
+ if (CL < min_cas_latency) {
+ uint64_t adjusted_tclk = tAAmin / min_cas_latency;
+ CL = min_cas_latency;
+ ddr_print("Slow clock speed. Adjusting timing: tClk = %lu, Adjusted tClk = %ld\n",
+ tclk_psecs, adjusted_tclk);
+ tclk_psecs = adjusted_tclk;
+ }
+
+ if ((s = lookup_env_parameter("ddr_cas_latency")) != NULL) {
+ override_cas_latency = strtoul(s, NULL, 0);
+ }
+
+ /* Make sure that the selected cas latency is legal */
+ for (i=(CL-base_CL); i<32; ++i) {
+ if ((spd_cas_latency >> i) & 1) {
+ CL = i+base_CL;
+ break;
+ }
+ }
+ }
+
+ if (CL > max_cas_latency)
+ CL = max_cas_latency;
+
+ if (override_cas_latency != 0) {
+ CL = override_cas_latency;
+ }
+
+ ddr_print("CAS Latency : %6d\n", CL);
+
+ if ((CL * tCKmin) > 20000)
+ {
+ ddr_print("(CLactual * tCKmin) = %d exceeds 20 ns\n", (CL * tCKmin));
+ }
+
+ if ((num_banks != 4) && (num_banks != 8) && (num_banks != 16))
+ {
+ error_print("Unsupported number of banks %d. Must be 4 or 8 or 16.\n", num_banks);
+ ++fatal_error;
+ }
+
+ if ((num_ranks != 1) && (num_ranks != 2) && (num_ranks != 4))
+ {
+ error_print("Unsupported number of ranks: %d\n", num_ranks);
+ ++fatal_error;
+ }
+
+ if (! CAVIUM_IS_MODEL(CAVIUM_CN81XX)) { // 88XX or 83XX, but not 81XX
+ if ((dram_width != 8) && (dram_width != 16) && (dram_width != 4)) {
+ error_print("Unsupported SDRAM Width, x%d. Must be x4, x8 or x16.\n", dram_width);
+ ++fatal_error;
+ }
+ } else if ((dram_width != 8) && (dram_width != 16)) { // 81XX can only do x8 or x16
+ error_print("Unsupported SDRAM Width, x%d. Must be x8 or x16.\n", dram_width);
+ ++fatal_error;
+ }
+
+
+ /*
+ ** Bail out here if things are not copasetic.
+ */
+ if (fatal_error)
+ return(-1);
+
+ /*
+ * 6.9.6 LMC RESET Initialization
+ *
+ * The purpose of this step is to assert/deassert the RESET# pin at the
+ * DDR3/DDR4 parts.
+ *
+ * This LMC RESET step is done for all enabled LMCs.
+ */
+ perform_lmc_reset(node, ddr_interface_num);
+
+ // Make sure scrambling is disabled during init...
+ {
+ bdk_lmcx_control_t lmc_control;
+
+ lmc_control.u = BDK_CSR_READ(node, BDK_LMCX_CONTROL(ddr_interface_num));
+ lmc_control.s.scramble_ena = 0;
+ DRAM_CSR_WRITE(node, BDK_LMCX_CONTROL(ddr_interface_num), lmc_control.u);
+
+ DRAM_CSR_WRITE(node, BDK_LMCX_SCRAMBLE_CFG0(ddr_interface_num), 0);
+ DRAM_CSR_WRITE(node, BDK_LMCX_SCRAMBLE_CFG1(ddr_interface_num), 0);
+ DRAM_CSR_WRITE(node, BDK_LMCX_SCRAMBLE_CFG2(ddr_interface_num), 0);
+ }
+
+
+ odt_idx = dimm_count - 1;
+
+ switch (num_ranks) {
+ case 1:
+ odt_config = odt_1rank_config;
+ break;
+ case 2:
+ odt_config = odt_2rank_config;
+ break;
+ case 4:
+ odt_config = odt_4rank_config;
+ break;
+ default:
+ odt_config = disable_odt_config;
+ error_print("Unsupported number of ranks: %d\n", num_ranks);
+ ++fatal_error;
+ }
+
+
+ /* Parameters from DDR3 Specifications */
+#define DDR3_tREFI 7800000 /* 7.8 us */
+#define DDR3_ZQCS 80000ull /* 80 ns */
+#define DDR3_ZQCS_Interval 1280000000 /* 128ms/100 */
+#define DDR3_tCKE 5000 /* 5 ns */
+#define DDR3_tMRD 4 /* 4 nCK */
+#define DDR3_tDLLK 512 /* 512 nCK */
+#define DDR3_tMPRR 1 /* 1 nCK */
+#define DDR3_tWLMRD 40 /* 40 nCK */
+#define DDR3_tWLDQSEN 25 /* 25 nCK */
+
+ /* Parameters from DDR4 Specifications */
+#define DDR4_tMRD 8 /* 8 nCK */
+#define DDR4_tDLLK 768 /* 768 nCK */
+
+ /*
+ * 6.9.7 Early LMC Initialization
+ *
+ * All of DDR PLL, LMC CK, and LMC DRESET initializations must be
+ * completed prior to starting this LMC initialization sequence.
+ *
+ * Perform the following five substeps for early LMC initialization:
+ *
+ * 1. Software must ensure there are no pending DRAM transactions.
+ *
+ * 2. Write LMC(0)_CONFIG, LMC(0)_CONTROL, LMC(0)_TIMING_PARAMS0,
+ * LMC(0)_TIMING_PARAMS1, LMC(0)_MODEREG_PARAMS0,
+ * LMC(0)_MODEREG_PARAMS1, LMC(0)_DUAL_MEMCFG, LMC(0)_NXM,
+ * LMC(0)_WODT_MASK, LMC(0)_RODT_MASK, LMC(0)_COMP_CTL2,
+ * LMC(0)_PHY_CTL, LMC(0)_DIMM0/1_PARAMS, and LMC(0)_DIMM_CTL with
+ * appropriate values. All sections in this chapter can be used to
+ * derive proper register settings.
+ */
+
+ /* LMC(0)_CONFIG */
+ {
+ lmc_config.u = 0;
+
+ lmc_config.s.ecc_ena = use_ecc;
+ lmc_config.s.row_lsb = encode_row_lsb_ddr3(row_lsb, ddr_interface_64b);
+ lmc_config.s.pbank_lsb = encode_pbank_lsb_ddr3(pbank_lsb, ddr_interface_64b);
+
+ lmc_config.s.idlepower = 0; /* Disabled */
+
+ if ((s = lookup_env_parameter("ddr_idlepower")) != NULL) {
+ lmc_config.s.idlepower = strtoul(s, NULL, 0);
+ }
+
+ lmc_config.s.forcewrite = 0; /* Disabled */
+ lmc_config.s.ecc_adr = 1; /* Include memory reference address in the ECC */
+
+ if ((s = lookup_env_parameter("ddr_ecc_adr")) != NULL) {
+ lmc_config.s.ecc_adr = strtoul(s, NULL, 0);
+ }
+
+ lmc_config.s.reset = 0;
+
+ /*
+ * Program LMC0_CONFIG[24:18], ref_zqcs_int(6:0) to
+ * RND-DN(tREFI/clkPeriod/512) Program LMC0_CONFIG[36:25],
+ * ref_zqcs_int(18:7) to
+ * RND-DN(ZQCS_Interval/clkPeriod/(512*128)). Note that this
+ * value should always be greater than 32, to account for
+ * resistor calibration delays.
+ */
+
+ lmc_config.s.ref_zqcs_int = ((DDR3_tREFI/tclk_psecs/512) & 0x7f);
+ lmc_config.s.ref_zqcs_int |= ((max(33ull, (DDR3_ZQCS_Interval/(tclk_psecs/100)/(512*128))) & 0xfff) << 7);
+
+
+ lmc_config.s.early_dqx = 1; /* Default to enabled */
+
+ if ((s = lookup_env_parameter("ddr_early_dqx")) == NULL)
+ s = lookup_env_parameter("ddr%d_early_dqx", ddr_interface_num);
+ if (s != NULL) {
+ lmc_config.s.early_dqx = strtoul(s, NULL, 0);
+ }
+
+ lmc_config.s.sref_with_dll = 0;
+
+ lmc_config.s.rank_ena = bunk_enable;
+ lmc_config.s.rankmask = rank_mask; /* Set later */
+ lmc_config.s.mirrmask = (spd_addr_mirror << 1 | spd_addr_mirror << 3) & rank_mask;
+ lmc_config.s.init_status = rank_mask; /* Set once and don't change it. */
+ lmc_config.s.early_unload_d0_r0 = 0;
+ lmc_config.s.early_unload_d0_r1 = 0;
+ lmc_config.s.early_unload_d1_r0 = 0;
+ lmc_config.s.early_unload_d1_r1 = 0;
+ lmc_config.s.scrz = 0;
+ // set 32-bit mode for real only when selected AND 81xx...
+ if (!ddr_interface_64b && CAVIUM_IS_MODEL(CAVIUM_CN81XX)) {
+ lmc_config.s.mode32b = 1;
+ }
+ VB_PRT(VBL_DEV, "%-45s : %d\n", "MODE32B (init)", lmc_config.s.mode32b);
+ lmc_config.s.mode_x4dev = (dram_width == 4) ? 1 : 0;
+ lmc_config.s.bg2_enable = ((ddr_type == DDR4_DRAM) && (dram_width == 16)) ? 0 : 1;
+
+ if ((s = lookup_env_parameter_ull("ddr_config")) != NULL) {
+ lmc_config.u = strtoull(s, NULL, 0);
+ }
+ ddr_print("LMC_CONFIG : 0x%016lx\n", lmc_config.u);
+ DRAM_CSR_WRITE(node, BDK_LMCX_CONFIG(ddr_interface_num), lmc_config.u);
+ }
+
+ /* LMC(0)_CONTROL */
+ {
+ bdk_lmcx_control_t lmc_control;
+ lmc_control.u = BDK_CSR_READ(node, BDK_LMCX_CONTROL(ddr_interface_num));
+ lmc_control.s.rdimm_ena = spd_rdimm;
+ lmc_control.s.bwcnt = 0; /* Clear counter later */
+ if (spd_rdimm)
+ lmc_control.s.ddr2t = (safe_ddr_flag ? 1 : custom_lmc_config->ddr2t_rdimm );
+ else
+ lmc_control.s.ddr2t = (safe_ddr_flag ? 1 : custom_lmc_config->ddr2t_udimm );
+ lmc_control.s.pocas = 0;
+ lmc_control.s.fprch2 = (safe_ddr_flag ? 2 : custom_lmc_config->fprch2 );
+ lmc_control.s.throttle_rd = safe_ddr_flag ? 1 : 0;
+ lmc_control.s.throttle_wr = safe_ddr_flag ? 1 : 0;
+ lmc_control.s.inorder_rd = safe_ddr_flag ? 1 : 0;
+ lmc_control.s.inorder_wr = safe_ddr_flag ? 1 : 0;
+ lmc_control.cn81xx.elev_prio_dis = safe_ddr_flag ? 1 : 0;
+ lmc_control.s.nxm_write_en = 0; /* discards writes to
+ addresses that don't exist
+ in the DRAM */
+ lmc_control.s.max_write_batch = 8;
+ lmc_control.s.xor_bank = 1;
+ lmc_control.s.auto_dclkdis = 1;
+ lmc_control.s.int_zqcs_dis = 0;
+ lmc_control.s.ext_zqcs_dis = 0;
+ lmc_control.s.bprch = 1;
+ lmc_control.s.wodt_bprch = 1;
+ lmc_control.s.rodt_bprch = 1;
+
+ if ((s = lookup_env_parameter("ddr_xor_bank")) != NULL) {
+ lmc_control.s.xor_bank = strtoul(s, NULL, 0);
+ }
+
+ if ((s = lookup_env_parameter("ddr_2t")) != NULL) {
+ lmc_control.s.ddr2t = strtoul(s, NULL, 0);
+ }
+
+ if ((s = lookup_env_parameter("ddr_fprch2")) != NULL) {
+ lmc_control.s.fprch2 = strtoul(s, NULL, 0);
+ }
+
+ if ((s = lookup_env_parameter("ddr_bprch")) != NULL) {
+ lmc_control.s.bprch = strtoul(s, NULL, 0);
+ }
+
+ if ((s = lookup_env_parameter("ddr_wodt_bprch")) != NULL) {
+ lmc_control.s.wodt_bprch = strtoul(s, NULL, 0);
+ }
+
+ if ((s = lookup_env_parameter("ddr_rodt_bprch")) != NULL) {
+ lmc_control.s.rodt_bprch = strtoul(s, NULL, 0);
+ }
+
+ if ((s = lookup_env_parameter("ddr_int_zqcs_dis")) != NULL) {
+ lmc_control.s.int_zqcs_dis = strtoul(s, NULL, 0);
+ }
+
+ if ((s = lookup_env_parameter("ddr_ext_zqcs_dis")) != NULL) {
+ lmc_control.s.ext_zqcs_dis = strtoul(s, NULL, 0);
+ }
+
+ if ((s = lookup_env_parameter_ull("ddr_control")) != NULL) {
+ lmc_control.u = strtoull(s, NULL, 0);
+ }
+ ddr_print("LMC_CONTROL : 0x%016lx\n", lmc_control.u);
+ DRAM_CSR_WRITE(node, BDK_LMCX_CONTROL(ddr_interface_num), lmc_control.u);
+ }
+
+ /* LMC(0)_TIMING_PARAMS0 */
+ {
+ unsigned trp_value;
+ bdk_lmcx_timing_params0_t lmc_timing_params0;
+ lmc_timing_params0.u = BDK_CSR_READ(node, BDK_LMCX_TIMING_PARAMS0(ddr_interface_num));
+
+ trp_value = divide_roundup(trp, tclk_psecs) - 1;
+ ddr_print("TIMING_PARAMS0[TRP]: NEW 0x%x, OLD 0x%x\n", trp_value,
+ trp_value + (unsigned)(divide_roundup(max(4*tclk_psecs, 7500ull), tclk_psecs)) - 4);
+#if 1
+ if ((s = lookup_env_parameter_ull("ddr_use_old_trp")) != NULL) {
+ if (!!strtoull(s, NULL, 0)) {
+ trp_value += divide_roundup(max(4*tclk_psecs, 7500ull), tclk_psecs) - 4;
+ ddr_print("TIMING_PARAMS0[trp]: USING OLD 0x%x\n", trp_value);
+ }
+ }
+#endif
+
+ lmc_timing_params0.s.txpr = divide_roundup(max(5*tclk_psecs, trfc+10000ull), 16*tclk_psecs);
+ lmc_timing_params0.s.tzqinit = divide_roundup(max(512*tclk_psecs, 640000ull), (256*tclk_psecs));
+ lmc_timing_params0.s.trp = trp_value & 0x1f;
+ lmc_timing_params0.s.tcksre = divide_roundup(max(5*tclk_psecs, 10000ull), tclk_psecs) - 1;
+
+ if (ddr_type == DDR4_DRAM) {
+ lmc_timing_params0.s.tzqcs = divide_roundup(128*tclk_psecs, (16*tclk_psecs)); /* Always 8. */
+ lmc_timing_params0.s.tcke = divide_roundup(max(3*tclk_psecs, (uint64_t) DDR3_tCKE), tclk_psecs) - 1;
+ lmc_timing_params0.s.tmrd = divide_roundup((DDR4_tMRD*tclk_psecs), tclk_psecs) - 1;
+ //lmc_timing_params0.s.tmod = divide_roundup(max(24*tclk_psecs, 15000ull), tclk_psecs) - 1;
+ lmc_timing_params0.s.tmod = 25; /* 25 is the max allowed */
+ lmc_timing_params0.s.tdllk = divide_roundup(DDR4_tDLLK, 256);
+ } else {
+ lmc_timing_params0.s.tzqcs = divide_roundup(max(64*tclk_psecs, DDR3_ZQCS), (16*tclk_psecs));
+ lmc_timing_params0.s.tcke = divide_roundup(DDR3_tCKE, tclk_psecs) - 1;
+ lmc_timing_params0.s.tmrd = divide_roundup((DDR3_tMRD*tclk_psecs), tclk_psecs) - 1;
+ lmc_timing_params0.s.tmod = divide_roundup(max(12*tclk_psecs, 15000ull), tclk_psecs) - 1;
+ lmc_timing_params0.s.tdllk = divide_roundup(DDR3_tDLLK, 256);
+ }
+
+ if ((s = lookup_env_parameter_ull("ddr_timing_params0")) != NULL) {
+ lmc_timing_params0.u = strtoull(s, NULL, 0);
+ }
+ ddr_print("TIMING_PARAMS0 : 0x%016lx\n", lmc_timing_params0.u);
+ DRAM_CSR_WRITE(node, BDK_LMCX_TIMING_PARAMS0(ddr_interface_num), lmc_timing_params0.u);
+ }
+
+ /* LMC(0)_TIMING_PARAMS1 */
+ {
+ int txp, temp_trcd, trfc_dlr;
+ bdk_lmcx_timing_params1_t lmc_timing_params1;
+ lmc_timing_params1.u = BDK_CSR_READ(node, BDK_LMCX_TIMING_PARAMS1(ddr_interface_num));
+
+ lmc_timing_params1.s.tmprr = divide_roundup(DDR3_tMPRR*tclk_psecs, tclk_psecs) - 1;
+
+ lmc_timing_params1.s.tras = divide_roundup(tras, tclk_psecs) - 1;
+
+ // NOTE: this is reworked for pass 2.x
+ temp_trcd = divide_roundup(trcd, tclk_psecs);
+#if 1
+ if (temp_trcd > 15)
+ ddr_print("TIMING_PARAMS1[trcd]: need extension bit for 0x%x\n", temp_trcd);
+#endif
+ if (CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X) && (temp_trcd > 15)) {
+ /* Let .trcd=0 serve as a flag that the field has
+ overflowed. Must use Additive Latency mode as a
+ workaround. */
+ temp_trcd = 0;
+ }
+ lmc_timing_params1.s.trcd = temp_trcd & 0x0f;
+ lmc_timing_params1.s.trcd_ext = (temp_trcd >> 4) & 1;
+
+ lmc_timing_params1.s.twtr = divide_roundup(twtr, tclk_psecs) - 1;
+ lmc_timing_params1.s.trfc = divide_roundup(trfc, 8*tclk_psecs);
+
+ // workaround needed for all THUNDER chips thru T88 Pass 2.0,
+ // but not 81xx and 83xx...
+ if ((ddr_type == DDR4_DRAM) && CAVIUM_IS_MODEL(CAVIUM_CN88XX)) {
+ /* Workaround bug 24006. Use Trrd_l. */
+ lmc_timing_params1.s.trrd = divide_roundup(ddr4_tRRD_Lmin, tclk_psecs) - 2;
+ } else
+ lmc_timing_params1.s.trrd = divide_roundup(trrd, tclk_psecs) - 2;
+
+ /*
+ ** tXP = max( 3nCK, 7.5 ns) DDR3-800 tCLK = 2500 psec
+ ** tXP = max( 3nCK, 7.5 ns) DDR3-1066 tCLK = 1875 psec
+ ** tXP = max( 3nCK, 6.0 ns) DDR3-1333 tCLK = 1500 psec
+ ** tXP = max( 3nCK, 6.0 ns) DDR3-1600 tCLK = 1250 psec
+ ** tXP = max( 3nCK, 6.0 ns) DDR3-1866 tCLK = 1071 psec
+ ** tXP = max( 3nCK, 6.0 ns) DDR3-2133 tCLK = 937 psec
+ */
+ txp = (tclk_psecs < 1875) ? 6000 : 7500;
+ // NOTE: this is reworked for pass 2.x
+ int temp_txp = divide_roundup(max(3*tclk_psecs, (unsigned)txp), tclk_psecs) - 1;
+#if 1
+ if (temp_txp > 7)
+ ddr_print("TIMING_PARAMS1[txp]: need extension bit for 0x%x\n", temp_txp);
+#endif
+ if (CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X) && (temp_txp > 7)) {
+ temp_txp = 7; // max it out
+ }
+ lmc_timing_params1.s.txp = temp_txp & 7;
+ lmc_timing_params1.s.txp_ext = (temp_txp >> 3) & 1;
+
+ lmc_timing_params1.s.twlmrd = divide_roundup(DDR3_tWLMRD*tclk_psecs, 4*tclk_psecs);
+ lmc_timing_params1.s.twldqsen = divide_roundup(DDR3_tWLDQSEN*tclk_psecs, 4*tclk_psecs);
+ lmc_timing_params1.s.tfaw = divide_roundup(tfaw, 4*tclk_psecs);
+ lmc_timing_params1.s.txpdll = divide_roundup(max(10*tclk_psecs, 24000ull), tclk_psecs) - 1;
+
+ if ((ddr_type == DDR4_DRAM) && is_3ds_dimm) {
+ /*
+ 4 Gb: tRFC_DLR = 90 ns
+ 8 Gb: tRFC_DLR = 120 ns
+ 16 Gb: tRFC_DLR = 190 ns FIXME?
+ */
+ // RNDUP[tRFC_DLR(ns) / (8 * TCYC(ns))]
+ if (die_capacity == 0x1000) // 4 Gbit
+ trfc_dlr = 90;
+ else if (die_capacity == 0x2000) // 8 Gbit
+ trfc_dlr = 120;
+ else if (die_capacity == 0x4000) // 16 Gbit
+ trfc_dlr = 190;
+ else
+ trfc_dlr = 0;
+
+ if (trfc_dlr == 0) {
+ ddr_print("N%d.LMC%d: ERROR: tRFC_DLR: die_capacity %u Mbit is illegal\n",
+ node, ddr_interface_num, die_capacity);
+ } else {
+ lmc_timing_params1.s.trfc_dlr = divide_roundup(trfc_dlr * 1000UL, 8*tclk_psecs);
+ ddr_print("N%d.LMC%d: TIMING_PARAMS1[trfc_dlr] set to %u\n",
+ node, ddr_interface_num, lmc_timing_params1.s.trfc_dlr);
+ }
+ }
+
+ if ((s = lookup_env_parameter_ull("ddr_timing_params1")) != NULL) {
+ lmc_timing_params1.u = strtoull(s, NULL, 0);
+ }
+ ddr_print("TIMING_PARAMS1 : 0x%016lx\n", lmc_timing_params1.u);
+ DRAM_CSR_WRITE(node, BDK_LMCX_TIMING_PARAMS1(ddr_interface_num), lmc_timing_params1.u);
+ }
+
+ /* LMC(0)_TIMING_PARAMS2 */
+ if (ddr_type == DDR4_DRAM) {
+ bdk_lmcx_timing_params1_t lmc_timing_params1;
+ bdk_lmcx_timing_params2_t lmc_timing_params2;
+ lmc_timing_params1.u = BDK_CSR_READ(node, BDK_LMCX_TIMING_PARAMS1(ddr_interface_num));
+ lmc_timing_params2.u = BDK_CSR_READ(node, BDK_LMCX_TIMING_PARAMS2(ddr_interface_num));
+ ddr_print("TIMING_PARAMS2 : 0x%016lx\n", lmc_timing_params2.u);
+
+ //lmc_timing_params2.s.trrd_l = divide_roundup(ddr4_tRRD_Lmin, tclk_psecs) - 1;
+ // NOTE: this is reworked for pass 2.x
+ int temp_trrd_l = divide_roundup(ddr4_tRRD_Lmin, tclk_psecs) - 2;
+#if 1
+ if (temp_trrd_l > 7)
+ ddr_print("TIMING_PARAMS2[trrd_l]: need extension bit for 0x%x\n", temp_trrd_l);
+#endif
+ if (CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X) && (temp_trrd_l > 7)) {
+ temp_trrd_l = 7; // max it out
+ }
+ lmc_timing_params2.s.trrd_l = temp_trrd_l & 7;
+ lmc_timing_params2.s.trrd_l_ext = (temp_trrd_l >> 3) & 1;
+
+ lmc_timing_params2.s.twtr_l = divide_nint(max(4*tclk_psecs, 7500ull), tclk_psecs) - 1; // correct for 1600-2400
+ lmc_timing_params2.s.t_rw_op_max = 7;
+ lmc_timing_params2.s.trtp = divide_roundup(max(4*tclk_psecs, 7500ull), tclk_psecs) - 1;
+
+ ddr_print("TIMING_PARAMS2 : 0x%016lx\n", lmc_timing_params2.u);
+ DRAM_CSR_WRITE(node, BDK_LMCX_TIMING_PARAMS2(ddr_interface_num), lmc_timing_params2.u);
+
+ /* Workaround Errata 25823 - LMC: Possible DDR4 tWTR_L not met
+ for Write-to-Read operations to the same Bank Group */
+ if (lmc_timing_params1.s.twtr < (lmc_timing_params2.s.twtr_l - 4)) {
+ lmc_timing_params1.s.twtr = lmc_timing_params2.s.twtr_l - 4;
+ ddr_print("ERRATA 25823: NEW: TWTR: %d, TWTR_L: %d\n", lmc_timing_params1.s.twtr, lmc_timing_params2.s.twtr_l);
+ ddr_print("TIMING_PARAMS1 : 0x%016lx\n", lmc_timing_params1.u);
+ DRAM_CSR_WRITE(node, BDK_LMCX_TIMING_PARAMS1(ddr_interface_num), lmc_timing_params1.u);
+ }
+ }
+
+ /* LMC(0)_MODEREG_PARAMS0 */
+ {
+ bdk_lmcx_modereg_params0_t lmc_modereg_params0;
+ int param;
+
+ lmc_modereg_params0.u = BDK_CSR_READ(node, BDK_LMCX_MODEREG_PARAMS0(ddr_interface_num));
+
+ if (ddr_type == DDR4_DRAM) {
+ lmc_modereg_params0.s.cwl = 0; /* 1600 (1250ps) */
+ if (tclk_psecs < 1250)
+ lmc_modereg_params0.s.cwl = 1; /* 1866 (1072ps) */
+ if (tclk_psecs < 1072)
+ lmc_modereg_params0.s.cwl = 2; /* 2133 (938ps) */
+ if (tclk_psecs < 938)
+ lmc_modereg_params0.s.cwl = 3; /* 2400 (833ps) */
+ if (tclk_psecs < 833)
+ lmc_modereg_params0.s.cwl = 4; /* 2666 (750ps) */
+ if (tclk_psecs < 750)
+ lmc_modereg_params0.s.cwl = 5; /* 3200 (625ps) */
+ } else {
+ /*
+ ** CSR CWL CAS write Latency
+ ** === === =================================
+ ** 0 5 ( tCK(avg) >= 2.5 ns)
+ ** 1 6 (2.5 ns > tCK(avg) >= 1.875 ns)
+ ** 2 7 (1.875 ns > tCK(avg) >= 1.5 ns)
+ ** 3 8 (1.5 ns > tCK(avg) >= 1.25 ns)
+ ** 4 9 (1.25 ns > tCK(avg) >= 1.07 ns)
+ ** 5 10 (1.07 ns > tCK(avg) >= 0.935 ns)
+ ** 6 11 (0.935 ns > tCK(avg) >= 0.833 ns)
+ ** 7 12 (0.833 ns > tCK(avg) >= 0.75 ns)
+ */
+
+ lmc_modereg_params0.s.cwl = 0;
+ if (tclk_psecs < 2500)
+ lmc_modereg_params0.s.cwl = 1;
+ if (tclk_psecs < 1875)
+ lmc_modereg_params0.s.cwl = 2;
+ if (tclk_psecs < 1500)
+ lmc_modereg_params0.s.cwl = 3;
+ if (tclk_psecs < 1250)
+ lmc_modereg_params0.s.cwl = 4;
+ if (tclk_psecs < 1070)
+ lmc_modereg_params0.s.cwl = 5;
+ if (tclk_psecs < 935)
+ lmc_modereg_params0.s.cwl = 6;
+ if (tclk_psecs < 833)
+ lmc_modereg_params0.s.cwl = 7;
+ }
+
+ if ((s = lookup_env_parameter("ddr_cwl")) != NULL) {
+ lmc_modereg_params0.s.cwl = strtoul(s, NULL, 0) - 5;
+ }
+
+ if (ddr_type == DDR4_DRAM) {
+ ddr_print("%-45s : %d, [0x%x]\n", "CAS Write Latency CWL, [CSR]",
+ lmc_modereg_params0.s.cwl + 9
+ + ((lmc_modereg_params0.s.cwl>2) ? (lmc_modereg_params0.s.cwl-3) * 2 : 0),
+ lmc_modereg_params0.s.cwl);
+ } else {
+ ddr_print("%-45s : %d, [0x%x]\n", "CAS Write Latency CWL, [CSR]",
+ lmc_modereg_params0.s.cwl + 5,
+ lmc_modereg_params0.s.cwl);
+ }
+
+ lmc_modereg_params0.s.mprloc = 0;
+ lmc_modereg_params0.s.mpr = 0;
+ lmc_modereg_params0.s.dll = (ddr_type == DDR4_DRAM)?1:0; /* disable(0) for DDR3 and enable(1) for DDR4 */
+ lmc_modereg_params0.s.al = 0;
+ lmc_modereg_params0.s.wlev = 0; /* Read Only */
+ lmc_modereg_params0.s.tdqs = ((ddr_type == DDR4_DRAM) || (dram_width != 8))?0:1; /* disable(0) for DDR4 and x4/x16 DDR3 */
+ lmc_modereg_params0.s.qoff = 0;
+ //lmc_modereg_params0.s.bl = 0; /* Don't touch block dirty logic */
+
+ if ((s = lookup_env_parameter("ddr_cl")) != NULL) {
+ CL = strtoul(s, NULL, 0);
+ ddr_print("CAS Latency : %6d\n", CL);
+ }
+
+ if (ddr_type == DDR4_DRAM) {
+ lmc_modereg_params0.s.cl = 0x0;
+ if (CL > 9)
+ lmc_modereg_params0.s.cl = 0x1;
+ if (CL > 10)
+ lmc_modereg_params0.s.cl = 0x2;
+ if (CL > 11)
+ lmc_modereg_params0.s.cl = 0x3;
+ if (CL > 12)
+ lmc_modereg_params0.s.cl = 0x4;
+ if (CL > 13)
+ lmc_modereg_params0.s.cl = 0x5;
+ if (CL > 14)
+ lmc_modereg_params0.s.cl = 0x6;
+ if (CL > 15)
+ lmc_modereg_params0.s.cl = 0x7;
+ if (CL > 16)
+ lmc_modereg_params0.s.cl = 0x8;
+ if (CL > 18)
+ lmc_modereg_params0.s.cl = 0x9;
+ if (CL > 20)
+ lmc_modereg_params0.s.cl = 0xA;
+ if (CL > 24)
+ lmc_modereg_params0.s.cl = 0xB;
+ } else {
+ lmc_modereg_params0.s.cl = 0x2;
+ if (CL > 5)
+ lmc_modereg_params0.s.cl = 0x4;
+ if (CL > 6)
+ lmc_modereg_params0.s.cl = 0x6;
+ if (CL > 7)
+ lmc_modereg_params0.s.cl = 0x8;
+ if (CL > 8)
+ lmc_modereg_params0.s.cl = 0xA;
+ if (CL > 9)
+ lmc_modereg_params0.s.cl = 0xC;
+ if (CL > 10)
+ lmc_modereg_params0.s.cl = 0xE;
+ if (CL > 11)
+ lmc_modereg_params0.s.cl = 0x1;
+ if (CL > 12)
+ lmc_modereg_params0.s.cl = 0x3;
+ if (CL > 13)
+ lmc_modereg_params0.s.cl = 0x5;
+ if (CL > 14)
+ lmc_modereg_params0.s.cl = 0x7;
+ if (CL > 15)
+ lmc_modereg_params0.s.cl = 0x9;
+ }
+
+ lmc_modereg_params0.s.rbt = 0; /* Read Only. */
+ lmc_modereg_params0.s.tm = 0;
+ lmc_modereg_params0.s.dllr = 0;
+
+ param = divide_roundup(twr, tclk_psecs);
+
+ if (ddr_type == DDR4_DRAM) { /* DDR4 */
+ lmc_modereg_params0.s.wrp = 1;
+ if (param > 12)
+ lmc_modereg_params0.s.wrp = 2;
+ if (param > 14)
+ lmc_modereg_params0.s.wrp = 3;
+ if (param > 16)
+ lmc_modereg_params0.s.wrp = 4;
+ if (param > 18)
+ lmc_modereg_params0.s.wrp = 5;
+ if (param > 20)
+ lmc_modereg_params0.s.wrp = 6;
+ if (param > 24) /* RESERVED in DDR4 spec */
+ lmc_modereg_params0.s.wrp = 7;
+ } else { /* DDR3 */
+ lmc_modereg_params0.s.wrp = 1;
+ if (param > 5)
+ lmc_modereg_params0.s.wrp = 2;
+ if (param > 6)
+ lmc_modereg_params0.s.wrp = 3;
+ if (param > 7)
+ lmc_modereg_params0.s.wrp = 4;
+ if (param > 8)
+ lmc_modereg_params0.s.wrp = 5;
+ if (param > 10)
+ lmc_modereg_params0.s.wrp = 6;
+ if (param > 12)
+ lmc_modereg_params0.s.wrp = 7;
+ }
+
+ lmc_modereg_params0.s.ppd = 0;
+
+ if ((s = lookup_env_parameter("ddr_wrp")) != NULL) {
+ lmc_modereg_params0.s.wrp = strtoul(s, NULL, 0);
+ }
+
+ ddr_print("%-45s : %d, [0x%x]\n", "Write recovery for auto precharge WRP, [CSR]",
+ param, lmc_modereg_params0.s.wrp);
+
+ if ((s = lookup_env_parameter_ull("ddr_modereg_params0")) != NULL) {
+ lmc_modereg_params0.u = strtoull(s, NULL, 0);
+ }
+ ddr_print("MODEREG_PARAMS0 : 0x%016lx\n", lmc_modereg_params0.u);
+ DRAM_CSR_WRITE(node, BDK_LMCX_MODEREG_PARAMS0(ddr_interface_num), lmc_modereg_params0.u);
+ }
+
+ /* LMC(0)_MODEREG_PARAMS1 */
+ {
+ bdk_lmcx_modereg_params1_t lmc_modereg_params1;
+
+ lmc_modereg_params1.u = odt_config[odt_idx].odt_mask1.u;
+
+#ifdef CAVIUM_ONLY
+ /* Special request: mismatched DIMM support. Slot 0: 2-Rank, Slot 1: 1-Rank */
+ if (rank_mask == 0x7) { /* 2-Rank, 1-Rank */
+ lmc_modereg_params1.s.rtt_nom_00 = 0;
+ lmc_modereg_params1.s.rtt_nom_01 = 3; /* rttnom_40ohm */
+ lmc_modereg_params1.s.rtt_nom_10 = 3; /* rttnom_40ohm */
+ lmc_modereg_params1.s.rtt_nom_11 = 0;
+ dyn_rtt_nom_mask = 0x6;
+ }
+#endif /* CAVIUM_ONLY */
+
+ if ((s = lookup_env_parameter("ddr_rtt_nom_mask")) != NULL) {
+ dyn_rtt_nom_mask = strtoul(s, NULL, 0);
+ }
+
+
+ /* Save the original rtt_nom settings before sweeping through settings. */
+ default_rtt_nom[0] = lmc_modereg_params1.s.rtt_nom_00;
+ default_rtt_nom[1] = lmc_modereg_params1.s.rtt_nom_01;
+ default_rtt_nom[2] = lmc_modereg_params1.s.rtt_nom_10;
+ default_rtt_nom[3] = lmc_modereg_params1.s.rtt_nom_11;
+
+ ddr_rtt_nom_auto = custom_lmc_config->ddr_rtt_nom_auto;
+
+ for (i=0; i<4; ++i) {
+ uint64_t value;
+ if ((s = lookup_env_parameter("ddr_rtt_nom_%1d%1d", !!(i&2), !!(i&1))) == NULL)
+ s = lookup_env_parameter("ddr%d_rtt_nom_%1d%1d", ddr_interface_num, !!(i&2), !!(i&1));
+ if (s != NULL) {
+ value = strtoul(s, NULL, 0);
+ lmc_modereg_params1.u &= ~((uint64_t)0x7 << (i*12+9));
+ lmc_modereg_params1.u |= ( (value & 0x7) << (i*12+9));
+ default_rtt_nom[i] = value;
+ ddr_rtt_nom_auto = 0;
+ }
+ }
+
+ if ((s = lookup_env_parameter("ddr_rtt_nom")) == NULL)
+ s = lookup_env_parameter("ddr%d_rtt_nom", ddr_interface_num);
+ if (s != NULL) {
+ uint64_t value;
+ value = strtoul(s, NULL, 0);
+
+ if (dyn_rtt_nom_mask & 1)
+ default_rtt_nom[0] = lmc_modereg_params1.s.rtt_nom_00 = value;
+ if (dyn_rtt_nom_mask & 2)
+ default_rtt_nom[1] = lmc_modereg_params1.s.rtt_nom_01 = value;
+ if (dyn_rtt_nom_mask & 4)
+ default_rtt_nom[2] = lmc_modereg_params1.s.rtt_nom_10 = value;
+ if (dyn_rtt_nom_mask & 8)
+ default_rtt_nom[3] = lmc_modereg_params1.s.rtt_nom_11 = value;
+
+ ddr_rtt_nom_auto = 0;
+ }
+
+ if ((s = lookup_env_parameter("ddr_rtt_wr")) != NULL) {
+ uint64_t value = strtoul(s, NULL, 0);
+ for (i=0; i<4; ++i) {
+ INSRT_WR(&lmc_modereg_params1.u, i, value);
+ }
+ }
+
+ for (i = 0; i < 4; ++i) {
+ uint64_t value;
+ if ((s = lookup_env_parameter("ddr_rtt_wr_%1d%1d", !!(i&2), !!(i&1))) == NULL)
+ s = lookup_env_parameter("ddr%d_rtt_wr_%1d%1d", ddr_interface_num, !!(i&2), !!(i&1));
+ if (s != NULL) {
+ value = strtoul(s, NULL, 0);
+ INSRT_WR(&lmc_modereg_params1.u, i, value);
+ }
+ }
+
+ // Make sure pass 1 has valid RTT_WR settings, because
+ // configuration files may be set-up for pass 2, and
+ // pass 1 supports no RTT_WR extension bits
+ if (CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X)) {
+ for (i = 0; i < 4; ++i) {
+ if (EXTR_WR(lmc_modereg_params1.u, i) > 3) { // if 80 or undefined
+ INSRT_WR(&lmc_modereg_params1.u, i, 1); // FIXME? always insert 120
+ ddr_print("RTT_WR_%d%d set to 120 for CN88XX pass 1\n", !!(i&2), i&1);
+ }
+ }
+ }
+ if ((s = lookup_env_parameter("ddr_dic")) != NULL) {
+ uint64_t value = strtoul(s, NULL, 0);
+ for (i=0; i<4; ++i) {
+ lmc_modereg_params1.u &= ~((uint64_t)0x3 << (i*12+7));
+ lmc_modereg_params1.u |= ( (value & 0x3) << (i*12+7));
+ }
+ }
+
+ for (i=0; i<4; ++i) {
+ uint64_t value;
+ if ((s = lookup_env_parameter("ddr_dic_%1d%1d", !!(i&2), !!(i&1))) != NULL) {
+ value = strtoul(s, NULL, 0);
+ lmc_modereg_params1.u &= ~((uint64_t)0x3 << (i*12+7));
+ lmc_modereg_params1.u |= ( (value & 0x3) << (i*12+7));
+ }
+ }
+
+ if ((s = lookup_env_parameter_ull("ddr_modereg_params1")) != NULL) {
+ lmc_modereg_params1.u = strtoull(s, NULL, 0);
+ }
+
+ ddr_print("RTT_NOM %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n",
+ imp_values->rtt_nom_ohms[lmc_modereg_params1.s.rtt_nom_11],
+ imp_values->rtt_nom_ohms[lmc_modereg_params1.s.rtt_nom_10],
+ imp_values->rtt_nom_ohms[lmc_modereg_params1.s.rtt_nom_01],
+ imp_values->rtt_nom_ohms[lmc_modereg_params1.s.rtt_nom_00],
+ lmc_modereg_params1.s.rtt_nom_11,
+ lmc_modereg_params1.s.rtt_nom_10,
+ lmc_modereg_params1.s.rtt_nom_01,
+ lmc_modereg_params1.s.rtt_nom_00);
+
+ ddr_print("RTT_WR %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n",
+ imp_values->rtt_wr_ohms[EXTR_WR(lmc_modereg_params1.u, 3)],
+ imp_values->rtt_wr_ohms[EXTR_WR(lmc_modereg_params1.u, 2)],
+ imp_values->rtt_wr_ohms[EXTR_WR(lmc_modereg_params1.u, 1)],
+ imp_values->rtt_wr_ohms[EXTR_WR(lmc_modereg_params1.u, 0)],
+ EXTR_WR(lmc_modereg_params1.u, 3),
+ EXTR_WR(lmc_modereg_params1.u, 2),
+ EXTR_WR(lmc_modereg_params1.u, 1),
+ EXTR_WR(lmc_modereg_params1.u, 0));
+
+ ddr_print("DIC %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n",
+ imp_values->dic_ohms[lmc_modereg_params1.s.dic_11],
+ imp_values->dic_ohms[lmc_modereg_params1.s.dic_10],
+ imp_values->dic_ohms[lmc_modereg_params1.s.dic_01],
+ imp_values->dic_ohms[lmc_modereg_params1.s.dic_00],
+ lmc_modereg_params1.s.dic_11,
+ lmc_modereg_params1.s.dic_10,
+ lmc_modereg_params1.s.dic_01,
+ lmc_modereg_params1.s.dic_00);
+
+ ddr_print("MODEREG_PARAMS1 : 0x%016lx\n", lmc_modereg_params1.u);
+ DRAM_CSR_WRITE(node, BDK_LMCX_MODEREG_PARAMS1(ddr_interface_num), lmc_modereg_params1.u);
+
+ } /* LMC(0)_MODEREG_PARAMS1 */
+
+ /* LMC(0)_MODEREG_PARAMS2 */
+ if (ddr_type == DDR4_DRAM) {
+ bdk_lmcx_modereg_params2_t lmc_modereg_params2;
+ lmc_modereg_params2.u = odt_config[odt_idx].odt_mask2.u;
+
+ for (i=0; i<4; ++i) {
+ uint64_t value;
+ if ((s = lookup_env_parameter("ddr_rtt_park_%1d%1d", !!(i&2), !!(i&1))) != NULL) {
+ value = strtoul(s, NULL, 0);
+ lmc_modereg_params2.u &= ~((uint64_t)0x7 << (i*10+0));
+ lmc_modereg_params2.u |= ( (value & 0x7) << (i*10+0));
+ }
+ }
+
+ if ((s = lookup_env_parameter("ddr_rtt_park")) != NULL) {
+ uint64_t value = strtoul(s, NULL, 0);
+ for (i=0; i<4; ++i) {
+ lmc_modereg_params2.u &= ~((uint64_t)0x7 << (i*10+0));
+ lmc_modereg_params2.u |= ( (value & 0x7) << (i*10+0));
+ }
+ }
+
+ if ((s = lookup_env_parameter_ull("ddr_modereg_params2")) != NULL) {
+ lmc_modereg_params2.u = strtoull(s, NULL, 0);
+ }
+
+ ddr_print("RTT_PARK %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n",
+ imp_values->rtt_nom_ohms[lmc_modereg_params2.s.rtt_park_11],
+ imp_values->rtt_nom_ohms[lmc_modereg_params2.s.rtt_park_10],
+ imp_values->rtt_nom_ohms[lmc_modereg_params2.s.rtt_park_01],
+ imp_values->rtt_nom_ohms[lmc_modereg_params2.s.rtt_park_00],
+ lmc_modereg_params2.s.rtt_park_11,
+ lmc_modereg_params2.s.rtt_park_10,
+ lmc_modereg_params2.s.rtt_park_01,
+ lmc_modereg_params2.s.rtt_park_00);
+
+ ddr_print("%-45s : 0x%x,0x%x,0x%x,0x%x\n", "VREF_RANGE",
+ lmc_modereg_params2.s.vref_range_11,
+ lmc_modereg_params2.s.vref_range_10,
+ lmc_modereg_params2.s.vref_range_01,
+ lmc_modereg_params2.s.vref_range_00);
+
+ ddr_print("%-45s : 0x%x,0x%x,0x%x,0x%x\n", "VREF_VALUE",
+ lmc_modereg_params2.s.vref_value_11,
+ lmc_modereg_params2.s.vref_value_10,
+ lmc_modereg_params2.s.vref_value_01,
+ lmc_modereg_params2.s.vref_value_00);
+
+ ddr_print("MODEREG_PARAMS2 : 0x%016lx\n", lmc_modereg_params2.u);
+ DRAM_CSR_WRITE(node, BDK_LMCX_MODEREG_PARAMS2(ddr_interface_num), lmc_modereg_params2.u);
+
+ } /* LMC(0)_MODEREG_PARAMS2 */
+
+ /* LMC(0)_MODEREG_PARAMS3 */
+ if (ddr_type == DDR4_DRAM) {
+ bdk_lmcx_modereg_params3_t lmc_modereg_params3;
+
+ lmc_modereg_params3.u = BDK_CSR_READ(node, BDK_LMCX_MODEREG_PARAMS3(ddr_interface_num));
+
+ //lmc_modereg_params3.s.max_pd =
+ //lmc_modereg_params3.s.tc_ref =
+ //lmc_modereg_params3.s.vref_mon =
+ //lmc_modereg_params3.s.cal =
+ //lmc_modereg_params3.s.sre_abort =
+ //lmc_modereg_params3.s.rd_preamble =
+ //lmc_modereg_params3.s.wr_preamble =
+ //lmc_modereg_params3.s.par_lat_mode =
+ //lmc_modereg_params3.s.odt_pd =
+ //lmc_modereg_params3.s.ca_par_pers =
+ //lmc_modereg_params3.s.dm =
+ //lmc_modereg_params3.s.wr_dbi =
+ //lmc_modereg_params3.s.rd_dbi =
+ lmc_modereg_params3.s.tccd_l = max(divide_roundup(ddr4_tCCD_Lmin, tclk_psecs), 5ull) - 4;
+ //lmc_modereg_params3.s.lpasr =
+ //lmc_modereg_params3.s.crc =
+ //lmc_modereg_params3.s.gd =
+ //lmc_modereg_params3.s.pda =
+ //lmc_modereg_params3.s.temp_sense =
+ //lmc_modereg_params3.s.fgrm =
+ //lmc_modereg_params3.s.wr_cmd_lat =
+ //lmc_modereg_params3.s.mpr_fmt =
+
+ if (!CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X)) {
+ int delay = 0;
+ if ((lranks_per_prank == 4) && (ddr_hertz >= 1000000000))
+ delay = 1;
+ lmc_modereg_params3.s.xrank_add_tccd_l = delay;
+ lmc_modereg_params3.s.xrank_add_tccd_s = delay;
+ }
+
+ ddr_print("MODEREG_PARAMS3 : 0x%016lx\n", lmc_modereg_params3.u);
+ DRAM_CSR_WRITE(node, BDK_LMCX_MODEREG_PARAMS3(ddr_interface_num), lmc_modereg_params3.u);
+ } /* LMC(0)_MODEREG_PARAMS3 */
+
+ /* LMC(0)_NXM */
+ {
+ bdk_lmcx_nxm_t lmc_nxm;
+ int num_bits = row_lsb + row_bits + lranks_bits - 26;
+ lmc_nxm.u = BDK_CSR_READ(node, BDK_LMCX_NXM(ddr_interface_num));
+
+ if (rank_mask & 0x1)
+ lmc_nxm.s.mem_msb_d0_r0 = num_bits;
+ if (rank_mask & 0x2)
+ lmc_nxm.s.mem_msb_d0_r1 = num_bits;
+ if (rank_mask & 0x4)
+ lmc_nxm.s.mem_msb_d1_r0 = num_bits;
+ if (rank_mask & 0x8)
+ lmc_nxm.s.mem_msb_d1_r1 = num_bits;
+
+ lmc_nxm.s.cs_mask = ~rank_mask & 0xff; /* Set the mask for non-existant ranks. */
+
+ if ((s = lookup_env_parameter_ull("ddr_nxm")) != NULL) {
+ lmc_nxm.u = strtoull(s, NULL, 0);
+ }
+ ddr_print("LMC_NXM : 0x%016lx\n", lmc_nxm.u);
+ DRAM_CSR_WRITE(node, BDK_LMCX_NXM(ddr_interface_num), lmc_nxm.u);
+ }
+
+ /* LMC(0)_WODT_MASK */
+ {
+ bdk_lmcx_wodt_mask_t lmc_wodt_mask;
+ lmc_wodt_mask.u = odt_config[odt_idx].odt_mask;
+
+ if ((s = lookup_env_parameter_ull("ddr_wodt_mask")) != NULL) {
+ lmc_wodt_mask.u = strtoull(s, NULL, 0);
+ }
+
+ ddr_print("WODT_MASK : 0x%016lx\n", lmc_wodt_mask.u);
+ DRAM_CSR_WRITE(node, BDK_LMCX_WODT_MASK(ddr_interface_num), lmc_wodt_mask.u);
+ }
+
+ /* LMC(0)_RODT_MASK */
+ {
+ int rankx;
+ bdk_lmcx_rodt_mask_t lmc_rodt_mask;
+ lmc_rodt_mask.u = odt_config[odt_idx].rodt_ctl;
+
+ if ((s = lookup_env_parameter_ull("ddr_rodt_mask")) != NULL) {
+ lmc_rodt_mask.u = strtoull(s, NULL, 0);
+ }
+
+ ddr_print("%-45s : 0x%016lx\n", "RODT_MASK", lmc_rodt_mask.u);
+ DRAM_CSR_WRITE(node, BDK_LMCX_RODT_MASK(ddr_interface_num), lmc_rodt_mask.u);
+
+ dyn_rtt_nom_mask = 0;
+ for (rankx = 0; rankx < dimm_count * 4;rankx++) {
+ if (!(rank_mask & (1 << rankx)))
+ continue;
+ dyn_rtt_nom_mask |= ((lmc_rodt_mask.u >> (8*rankx)) & 0xff);
+ }
+ if (num_ranks == 4) {
+ /* Normally ODT1 is wired to rank 1. For quad-ranked DIMMs
+ ODT1 is wired to the third rank (rank 2). The mask,
+ dyn_rtt_nom_mask, is used to indicate for which ranks
+ to sweep RTT_NOM during read-leveling. Shift the bit
+ from the ODT1 position over to the "ODT2" position so
+ that the read-leveling analysis comes out right. */
+ int odt1_bit = dyn_rtt_nom_mask & 2;
+ dyn_rtt_nom_mask &= ~2;
+ dyn_rtt_nom_mask |= odt1_bit<<1;
+ }
+ ddr_print("%-45s : 0x%02x\n", "DYN_RTT_NOM_MASK", dyn_rtt_nom_mask);
+ }
+
+ /* LMC(0)_COMP_CTL2 */
+ {
+ bdk_lmcx_comp_ctl2_t comp_ctl2;
+
+ comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num));
+
+ comp_ctl2.s.dqx_ctl = odt_config[odt_idx].odt_ena;
+ comp_ctl2.s.ck_ctl = (custom_lmc_config->ck_ctl == 0) ? 4 : custom_lmc_config->ck_ctl; /* Default 4=34.3 ohm */
+ comp_ctl2.s.cmd_ctl = (custom_lmc_config->cmd_ctl == 0) ? 4 : custom_lmc_config->cmd_ctl; /* Default 4=34.3 ohm */
+ comp_ctl2.s.control_ctl = (custom_lmc_config->ctl_ctl == 0) ? 4 : custom_lmc_config->ctl_ctl; /* Default 4=34.3 ohm */
+
+ // NOTE: these are now done earlier, in Step 6.9.3
+ // comp_ctl2.s.ntune_offset = 0;
+ // comp_ctl2.s.ptune_offset = 0;
+
+ ddr_rodt_ctl_auto = custom_lmc_config->ddr_rodt_ctl_auto;
+ if ((s = lookup_env_parameter("ddr_rodt_ctl_auto")) != NULL) {
+ ddr_rodt_ctl_auto = !!strtoul(s, NULL, 0);
+ }
+
+ default_rodt_ctl = odt_config[odt_idx].qs_dic;
+ if ((s = lookup_env_parameter("ddr_rodt_ctl")) == NULL)
+ s = lookup_env_parameter("ddr%d_rodt_ctl", ddr_interface_num);
+ if (s != NULL) {
+ default_rodt_ctl = strtoul(s, NULL, 0);
+ ddr_rodt_ctl_auto = 0;
+ }
+
+ comp_ctl2.s.rodt_ctl = default_rodt_ctl;
+
+ // if DDR4, force CK_CTL to 26 ohms if it is currently 34 ohms, and DCLK speed is 1 GHz or more...
+ if ((ddr_type == DDR4_DRAM) && (comp_ctl2.s.ck_ctl == ddr4_driver_34_ohm) && (ddr_hertz >= 1000000000)) {
+ comp_ctl2.s.ck_ctl = ddr4_driver_26_ohm; // lowest for DDR4 is 26 ohms
+ ddr_print("Forcing DDR4 COMP_CTL2[CK_CTL] to %d, %d ohms\n", comp_ctl2.s.ck_ctl,
+ imp_values->drive_strength[comp_ctl2.s.ck_ctl]);
+ }
+
+ if ((s = lookup_env_parameter("ddr_ck_ctl")) != NULL) {
+ comp_ctl2.s.ck_ctl = strtoul(s, NULL, 0);
+ }
+
+ if ((s = lookup_env_parameter("ddr_cmd_ctl")) != NULL) {
+ comp_ctl2.s.cmd_ctl = strtoul(s, NULL, 0);
+ }
+
+ if ((s = lookup_env_parameter("ddr_control_ctl")) != NULL) {
+ comp_ctl2.s.control_ctl = strtoul(s, NULL, 0);
+ }
+
+ if ((s = lookup_env_parameter("ddr_dqx_ctl")) != NULL) {
+ comp_ctl2.s.dqx_ctl = strtoul(s, NULL, 0);
+ }
+
+ ddr_print("%-45s : %d, %d ohms\n", "DQX_CTL ", comp_ctl2.s.dqx_ctl,
+ imp_values->dqx_strength [comp_ctl2.s.dqx_ctl ]);
+ ddr_print("%-45s : %d, %d ohms\n", "CK_CTL ", comp_ctl2.s.ck_ctl,
+ imp_values->drive_strength[comp_ctl2.s.ck_ctl ]);
+ ddr_print("%-45s : %d, %d ohms\n", "CMD_CTL ", comp_ctl2.s.cmd_ctl,
+ imp_values->drive_strength[comp_ctl2.s.cmd_ctl ]);
+ ddr_print("%-45s : %d, %d ohms\n", "CONTROL_CTL ", comp_ctl2.s.control_ctl,
+ imp_values->drive_strength[comp_ctl2.s.control_ctl]);
+ ddr_print("Read ODT_CTL : 0x%x (%d ohms)\n",
+ comp_ctl2.s.rodt_ctl, imp_values->rodt_ohms[comp_ctl2.s.rodt_ctl]);
+
+ DRAM_CSR_WRITE(node, BDK_LMCX_COMP_CTL2(ddr_interface_num), comp_ctl2.u);
+ }
+
+ /* LMC(0)_PHY_CTL */
+ {
+ bdk_lmcx_phy_ctl_t lmc_phy_ctl;
+ lmc_phy_ctl.u = BDK_CSR_READ(node, BDK_LMCX_PHY_CTL(ddr_interface_num));
+ lmc_phy_ctl.s.ts_stagger = 0;
+
+ if (!CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X) && (lranks_per_prank > 1)) {
+ lmc_phy_ctl.cn81xx.c0_sel = lmc_phy_ctl.cn81xx.c1_sel = 2; // C0 is TEN, C1 is A17
+ ddr_print("N%d.LMC%d: 3DS: setting PHY_CTL[cx_csel] = %d\n",
+ node, ddr_interface_num, lmc_phy_ctl.cn81xx.c1_sel);
+ }
+
+ ddr_print("PHY_CTL : 0x%016lx\n", lmc_phy_ctl.u);
+ DRAM_CSR_WRITE(node, BDK_LMCX_PHY_CTL(ddr_interface_num), lmc_phy_ctl.u);
+ }
+
+ /* LMC(0)_DIMM0/1_PARAMS */
+ if (spd_rdimm) {
+ bdk_lmcx_dimm_ctl_t lmc_dimm_ctl;
+
+ for (didx = 0; didx < (unsigned)dimm_count; ++didx) {
+ bdk_lmcx_dimmx_params_t lmc_dimmx_params;
+ int dimm = didx;
+ int rc;
+
+ lmc_dimmx_params.u = BDK_CSR_READ(node, BDK_LMCX_DIMMX_PARAMS(ddr_interface_num, dimm));
+
+
+ if (ddr_type == DDR4_DRAM) {
+
+ bdk_lmcx_dimmx_ddr4_params0_t lmc_dimmx_ddr4_params0;
+ bdk_lmcx_dimmx_ddr4_params1_t lmc_dimmx_ddr4_params1;
+ bdk_lmcx_ddr4_dimm_ctl_t lmc_ddr4_dimm_ctl;
+
+ lmc_dimmx_params.s.rc0 = 0;
+ lmc_dimmx_params.s.rc1 = 0;
+ lmc_dimmx_params.s.rc2 = 0;
+
+ rc = read_spd(node, &dimm_config_table[didx], DDR4_SPD_RDIMM_REGISTER_DRIVE_STRENGTH_CTL);
+ lmc_dimmx_params.s.rc3 = (rc >> 4) & 0xf;
+ lmc_dimmx_params.s.rc4 = ((rc >> 0) & 0x3) << 2;
+ lmc_dimmx_params.s.rc4 |= ((rc >> 2) & 0x3) << 0;
+
+ rc = read_spd(node, &dimm_config_table[didx], DDR4_SPD_RDIMM_REGISTER_DRIVE_STRENGTH_CK);
+ lmc_dimmx_params.s.rc5 = ((rc >> 0) & 0x3) << 2;
+ lmc_dimmx_params.s.rc5 |= ((rc >> 2) & 0x3) << 0;
+
+ lmc_dimmx_params.s.rc6 = 0;
+ lmc_dimmx_params.s.rc7 = 0;
+ lmc_dimmx_params.s.rc8 = 0;
+ lmc_dimmx_params.s.rc9 = 0;
+
+ /*
+ ** rc10 DDR4 RDIMM Operating Speed
+ ** ==== =========================================================
+ ** 0 tclk_psecs >= 1250 psec DDR4-1600 (1250 ps)
+ ** 1 1250 psec > tclk_psecs >= 1071 psec DDR4-1866 (1071 ps)
+ ** 2 1071 psec > tclk_psecs >= 938 psec DDR4-2133 ( 938 ps)
+ ** 3 938 psec > tclk_psecs >= 833 psec DDR4-2400 ( 833 ps)
+ ** 4 833 psec > tclk_psecs >= 750 psec DDR4-2666 ( 750 ps)
+ ** 5 750 psec > tclk_psecs >= 625 psec DDR4-3200 ( 625 ps)
+ */
+ lmc_dimmx_params.s.rc10 = 0;
+ if (1250 > tclk_psecs)
+ lmc_dimmx_params.s.rc10 = 1;
+ if (1071 > tclk_psecs)
+ lmc_dimmx_params.s.rc10 = 2;
+ if (938 > tclk_psecs)
+ lmc_dimmx_params.s.rc10 = 3;
+ if (833 > tclk_psecs)
+ lmc_dimmx_params.s.rc10 = 4;
+ if (750 > tclk_psecs)
+ lmc_dimmx_params.s.rc10 = 5;
+
+ lmc_dimmx_params.s.rc11 = 0;
+ lmc_dimmx_params.s.rc12 = 0;
+ lmc_dimmx_params.s.rc13 = (spd_dimm_type == 4) ? 0 : 4; /* 0=LRDIMM, 1=RDIMM */
+ lmc_dimmx_params.s.rc13 |= (ddr_type == DDR4_DRAM) ? (spd_addr_mirror << 3) : 0;
+ lmc_dimmx_params.s.rc14 = 0;
+ //lmc_dimmx_params.s.rc15 = 4; /* 0 nCK latency adder */
+ lmc_dimmx_params.s.rc15 = 0; /* 1 nCK latency adder */
+
+ lmc_dimmx_ddr4_params0.u = 0;
+
+ lmc_dimmx_ddr4_params0.s.rc8x = 0;
+ lmc_dimmx_ddr4_params0.s.rc7x = 0;
+ lmc_dimmx_ddr4_params0.s.rc6x = 0;
+ lmc_dimmx_ddr4_params0.s.rc5x = 0;
+ lmc_dimmx_ddr4_params0.s.rc4x = 0;
+
+ lmc_dimmx_ddr4_params0.s.rc3x = compute_rc3x(tclk_psecs);
+
+ lmc_dimmx_ddr4_params0.s.rc2x = 0;
+ lmc_dimmx_ddr4_params0.s.rc1x = 0;
+
+ lmc_dimmx_ddr4_params1.u = 0;
+
+ lmc_dimmx_ddr4_params1.s.rcbx = 0;
+ lmc_dimmx_ddr4_params1.s.rcax = 0;
+ lmc_dimmx_ddr4_params1.s.rc9x = 0;
+
+ lmc_ddr4_dimm_ctl.u = 0;
+ lmc_ddr4_dimm_ctl.s.ddr4_dimm0_wmask = 0x004;
+ lmc_ddr4_dimm_ctl.s.ddr4_dimm1_wmask = (dimm_count > 1) ? 0x004 : 0x0000;
+
+ /*
+ * Handle any overrides from envvars here...
+ */
+ if ((s = lookup_env_parameter("ddr_ddr4_params0")) != NULL) {
+ lmc_dimmx_ddr4_params0.u = strtoul(s, NULL, 0);
+ }
+
+ if ((s = lookup_env_parameter("ddr_ddr4_params1")) != NULL) {
+ lmc_dimmx_ddr4_params1.u = strtoul(s, NULL, 0);
+ }
+
+ if ((s = lookup_env_parameter("ddr_ddr4_dimm_ctl")) != NULL) {
+ lmc_ddr4_dimm_ctl.u = strtoul(s, NULL, 0);
+ }
+
+ for (i=0; i<11; ++i) {
+ uint64_t value;
+ if ((s = lookup_env_parameter("ddr_ddr4_rc%1xx", i+1)) != NULL) {
+ value = strtoul(s, NULL, 0);
+ if (i < 8) {
+ lmc_dimmx_ddr4_params0.u &= ~((uint64_t)0xff << (i*8));
+ lmc_dimmx_ddr4_params0.u |= (value << (i*8));
+ } else {
+ lmc_dimmx_ddr4_params1.u &= ~((uint64_t)0xff << ((i-8)*8));
+ lmc_dimmx_ddr4_params1.u |= (value << ((i-8)*8));
+ }
+ }
+ }
+
+ /*
+ * write the final CSR values
+ */
+ DRAM_CSR_WRITE(node, BDK_LMCX_DIMMX_DDR4_PARAMS0(ddr_interface_num, dimm), lmc_dimmx_ddr4_params0.u);
+
+ DRAM_CSR_WRITE(node, BDK_LMCX_DDR4_DIMM_CTL(ddr_interface_num), lmc_ddr4_dimm_ctl.u);
+
+ DRAM_CSR_WRITE(node, BDK_LMCX_DIMMX_DDR4_PARAMS1(ddr_interface_num, dimm), lmc_dimmx_ddr4_params1.u);
+
+ ddr_print("DIMM%d Register Control Words RCBx:RC1x : %x %x %x %x %x %x %x %x %x %x %x\n",
+ dimm,
+ lmc_dimmx_ddr4_params1.s.rcbx,
+ lmc_dimmx_ddr4_params1.s.rcax,
+ lmc_dimmx_ddr4_params1.s.rc9x,
+ lmc_dimmx_ddr4_params0.s.rc8x,
+ lmc_dimmx_ddr4_params0.s.rc7x,
+ lmc_dimmx_ddr4_params0.s.rc6x,
+ lmc_dimmx_ddr4_params0.s.rc5x,
+ lmc_dimmx_ddr4_params0.s.rc4x,
+ lmc_dimmx_ddr4_params0.s.rc3x,
+ lmc_dimmx_ddr4_params0.s.rc2x,
+ lmc_dimmx_ddr4_params0.s.rc1x );
+
+ } else { /* if (ddr_type == DDR4_DRAM) */
+ rc = read_spd(node, &dimm_config_table[didx], 69);
+ lmc_dimmx_params.s.rc0 = (rc >> 0) & 0xf;
+ lmc_dimmx_params.s.rc1 = (rc >> 4) & 0xf;
+
+ rc = read_spd(node, &dimm_config_table[didx], 70);
+ lmc_dimmx_params.s.rc2 = (rc >> 0) & 0xf;
+ lmc_dimmx_params.s.rc3 = (rc >> 4) & 0xf;
+
+ rc = read_spd(node, &dimm_config_table[didx], 71);
+ lmc_dimmx_params.s.rc4 = (rc >> 0) & 0xf;
+ lmc_dimmx_params.s.rc5 = (rc >> 4) & 0xf;
+
+ rc = read_spd(node, &dimm_config_table[didx], 72);
+ lmc_dimmx_params.s.rc6 = (rc >> 0) & 0xf;
+ lmc_dimmx_params.s.rc7 = (rc >> 4) & 0xf;
+
+ rc = read_spd(node, &dimm_config_table[didx], 73);
+ lmc_dimmx_params.s.rc8 = (rc >> 0) & 0xf;
+ lmc_dimmx_params.s.rc9 = (rc >> 4) & 0xf;
+
+ rc = read_spd(node, &dimm_config_table[didx], 74);
+ lmc_dimmx_params.s.rc10 = (rc >> 0) & 0xf;
+ lmc_dimmx_params.s.rc11 = (rc >> 4) & 0xf;
+
+ rc = read_spd(node, &dimm_config_table[didx], 75);
+ lmc_dimmx_params.s.rc12 = (rc >> 0) & 0xf;
+ lmc_dimmx_params.s.rc13 = (rc >> 4) & 0xf;
+
+ rc = read_spd(node, &dimm_config_table[didx], 76);
+ lmc_dimmx_params.s.rc14 = (rc >> 0) & 0xf;
+ lmc_dimmx_params.s.rc15 = (rc >> 4) & 0xf;
+
+
+ if ((s = lookup_env_parameter("ddr_clk_drive")) != NULL) {
+ if (strcmp(s,"light") == 0) {
+ lmc_dimmx_params.s.rc5 = 0x0; /* Light Drive */
+ }
+ if (strcmp(s,"moderate") == 0) {
+ lmc_dimmx_params.s.rc5 = 0x5; /* Moderate Drive */
+ }
+ if (strcmp(s,"strong") == 0) {
+ lmc_dimmx_params.s.rc5 = 0xA; /* Strong Drive */
+ }
+ }
+
+ if ((s = lookup_env_parameter("ddr_cmd_drive")) != NULL) {
+ if (strcmp(s,"light") == 0) {
+ lmc_dimmx_params.s.rc3 = 0x0; /* Light Drive */
+ }
+ if (strcmp(s,"moderate") == 0) {
+ lmc_dimmx_params.s.rc3 = 0x5; /* Moderate Drive */
+ }
+ if (strcmp(s,"strong") == 0) {
+ lmc_dimmx_params.s.rc3 = 0xA; /* Strong Drive */
+ }
+ }
+
+ if ((s = lookup_env_parameter("ddr_ctl_drive")) != NULL) {
+ if (strcmp(s,"light") == 0) {
+ lmc_dimmx_params.s.rc4 = 0x0; /* Light Drive */
+ }
+ if (strcmp(s,"moderate") == 0) {
+ lmc_dimmx_params.s.rc4 = 0x5; /* Moderate Drive */
+ }
+ }
+
+
+ /*
+ ** rc10 DDR3 RDIMM Operating Speed
+ ** ==== =========================================================
+ ** 0 tclk_psecs >= 2500 psec DDR3/DDR3L-800 (default)
+ ** 1 2500 psec > tclk_psecs >= 1875 psec DDR3/DDR3L-1066
+ ** 2 1875 psec > tclk_psecs >= 1500 psec DDR3/DDR3L-1333
+ ** 3 1500 psec > tclk_psecs >= 1250 psec DDR3/DDR3L-1600
+ ** 4 1250 psec > tclk_psecs >= 1071 psec DDR3-1866
+ */
+ lmc_dimmx_params.s.rc10 = 0;
+ if (2500 > tclk_psecs)
+ lmc_dimmx_params.s.rc10 = 1;
+ if (1875 > tclk_psecs)
+ lmc_dimmx_params.s.rc10 = 2;
+ if (1500 > tclk_psecs)
+ lmc_dimmx_params.s.rc10 = 3;
+ if (1250 > tclk_psecs)
+ lmc_dimmx_params.s.rc10 = 4;
+
+ } /* if (ddr_type == DDR4_DRAM) */
+
+ if ((s = lookup_env_parameter("ddr_dimmx_params")) != NULL) {
+ lmc_dimmx_params.u = strtoul(s, NULL, 0);
+ }
+
+ for (i=0; i<16; ++i) {
+ uint64_t value;
+ if ((s = lookup_env_parameter("ddr_rc%d", i)) != NULL) {
+ value = strtoul(s, NULL, 0);
+ lmc_dimmx_params.u &= ~((uint64_t)0xf << (i*4));
+ lmc_dimmx_params.u |= ( value << (i*4));
+ }
+ }
+
+ DRAM_CSR_WRITE(node, BDK_LMCX_DIMMX_PARAMS(ddr_interface_num, dimm), lmc_dimmx_params.u);
+
+ ddr_print("DIMM%d Register Control Words RC15:RC0 : %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x\n",
+ dimm,
+ lmc_dimmx_params.s.rc15,
+ lmc_dimmx_params.s.rc14,
+ lmc_dimmx_params.s.rc13,
+ lmc_dimmx_params.s.rc12,
+ lmc_dimmx_params.s.rc11,
+ lmc_dimmx_params.s.rc10,
+ lmc_dimmx_params.s.rc9 ,
+ lmc_dimmx_params.s.rc8 ,
+ lmc_dimmx_params.s.rc7 ,
+ lmc_dimmx_params.s.rc6 ,
+ lmc_dimmx_params.s.rc5 ,
+ lmc_dimmx_params.s.rc4 ,
+ lmc_dimmx_params.s.rc3 ,
+ lmc_dimmx_params.s.rc2 ,
+ lmc_dimmx_params.s.rc1 ,
+ lmc_dimmx_params.s.rc0 );
+ } /* for didx */
+
+ if (ddr_type == DDR4_DRAM) {
+
+ /* LMC0_DIMM_CTL */
+ lmc_dimm_ctl.u = BDK_CSR_READ(node, BDK_LMCX_DIMM_CTL(ddr_interface_num));
+ lmc_dimm_ctl.s.dimm0_wmask = 0xdf3f;
+ lmc_dimm_ctl.s.dimm1_wmask = (dimm_count > 1) ? 0xdf3f : 0x0000;
+ lmc_dimm_ctl.s.tcws = 0x4e0;
+ lmc_dimm_ctl.cn88xx.parity = custom_lmc_config->parity;
+
+ if ((s = lookup_env_parameter("ddr_dimm0_wmask")) != NULL) {
+ lmc_dimm_ctl.s.dimm0_wmask = strtoul(s, NULL, 0);
+ }
+
+ if ((s = lookup_env_parameter("ddr_dimm1_wmask")) != NULL) {
+ lmc_dimm_ctl.s.dimm1_wmask = strtoul(s, NULL, 0);
+ }
+
+ if ((s = lookup_env_parameter("ddr_dimm_ctl_parity")) != NULL) {
+ lmc_dimm_ctl.cn88xx.parity = strtoul(s, NULL, 0);
+ }
+
+ if ((s = lookup_env_parameter("ddr_dimm_ctl_tcws")) != NULL) {
+ lmc_dimm_ctl.s.tcws = strtoul(s, NULL, 0);
+ }
+
+ ddr_print("LMC DIMM_CTL : 0x%016lx\n", lmc_dimm_ctl.u);
+ DRAM_CSR_WRITE(node, BDK_LMCX_DIMM_CTL(ddr_interface_num), lmc_dimm_ctl.u);
+
+ perform_octeon3_ddr3_sequence(node, rank_mask,
+ ddr_interface_num, 0x7 ); /* Init RCW */
+
+ /* Write RC0D last */
+ lmc_dimm_ctl.s.dimm0_wmask = 0x2000;
+ lmc_dimm_ctl.s.dimm1_wmask = (dimm_count > 1) ? 0x2000 : 0x0000;
+ ddr_print("LMC DIMM_CTL : 0x%016lx\n", lmc_dimm_ctl.u);
+ DRAM_CSR_WRITE(node, BDK_LMCX_DIMM_CTL(ddr_interface_num), lmc_dimm_ctl.u);
+
+ /* Don't write any extended registers the second time */
+ DRAM_CSR_WRITE(node, BDK_LMCX_DDR4_DIMM_CTL(ddr_interface_num), 0);
+
+ perform_octeon3_ddr3_sequence(node, rank_mask,
+ ddr_interface_num, 0x7 ); /* Init RCW */
+ } else {
+
+ /* LMC0_DIMM_CTL */
+ lmc_dimm_ctl.u = BDK_CSR_READ(node, BDK_LMCX_DIMM_CTL(ddr_interface_num));
+ lmc_dimm_ctl.s.dimm0_wmask = 0xffff;
+ lmc_dimm_ctl.s.dimm1_wmask = (dimm_count > 1) ? 0xffff : 0x0000;
+ lmc_dimm_ctl.s.tcws = 0x4e0;
+ lmc_dimm_ctl.cn88xx.parity = custom_lmc_config->parity;
+
+ if ((s = lookup_env_parameter("ddr_dimm0_wmask")) != NULL) {
+ lmc_dimm_ctl.s.dimm0_wmask = strtoul(s, NULL, 0);
+ }
+
+ if ((s = lookup_env_parameter("ddr_dimm1_wmask")) != NULL) {
+ lmc_dimm_ctl.s.dimm1_wmask = strtoul(s, NULL, 0);
+ }
+
+ if ((s = lookup_env_parameter("ddr_dimm_ctl_parity")) != NULL) {
+ lmc_dimm_ctl.cn88xx.parity = strtoul(s, NULL, 0);
+ }
+
+ if ((s = lookup_env_parameter("ddr_dimm_ctl_tcws")) != NULL) {
+ lmc_dimm_ctl.s.tcws = strtoul(s, NULL, 0);
+ }
+
+ ddr_print("LMC DIMM_CTL : 0x%016lx\n", lmc_dimm_ctl.u);
+ DRAM_CSR_WRITE(node, BDK_LMCX_DIMM_CTL(ddr_interface_num), lmc_dimm_ctl.u);
+
+ perform_octeon3_ddr3_sequence(node, rank_mask,
+ ddr_interface_num, 0x7 ); /* Init RCW */
+ }
+ } else { /* if (spd_rdimm) */
+ /* Disable register control writes for unbuffered */
+ bdk_lmcx_dimm_ctl_t lmc_dimm_ctl;
+ lmc_dimm_ctl.u = BDK_CSR_READ(node, BDK_LMCX_DIMM_CTL(ddr_interface_num));
+ lmc_dimm_ctl.s.dimm0_wmask = 0;
+ lmc_dimm_ctl.s.dimm1_wmask = 0;
+ DRAM_CSR_WRITE(node, BDK_LMCX_DIMM_CTL(ddr_interface_num), lmc_dimm_ctl.u);
+ } /* if (spd_rdimm) */
+
+ /*
+ * Comments (steps 3 through 5) continue in perform_octeon3_ddr3_sequence()
+ */
+ {
+ bdk_lmcx_modereg_params0_t lmc_modereg_params0;
+
+ if (ddr_memory_preserved(node)) {
+ /* Contents are being preserved. Take DRAM out of
+ self-refresh first. Then init steps can procede
+ normally */
+ perform_octeon3_ddr3_sequence(node, rank_mask,
+ ddr_interface_num, 3); /* self-refresh exit */
+ }
+
+ lmc_modereg_params0.u = BDK_CSR_READ(node, BDK_LMCX_MODEREG_PARAMS0(ddr_interface_num));
+
+ lmc_modereg_params0.s.dllr = 1; /* Set during first init sequence */
+ DRAM_CSR_WRITE(node, BDK_LMCX_MODEREG_PARAMS0(ddr_interface_num), lmc_modereg_params0.u);
+
+ perform_ddr_init_sequence(node, rank_mask, ddr_interface_num);
+
+ lmc_modereg_params0.s.dllr = 0; /* Clear for normal operation */
+ DRAM_CSR_WRITE(node, BDK_LMCX_MODEREG_PARAMS0(ddr_interface_num), lmc_modereg_params0.u);
+ }
+
+ // NOTE: this must be done for pass 2.x and pass 1.x
+ if ((spd_rdimm) && (ddr_type == DDR4_DRAM)) {
+ VB_PRT(VBL_FAE, "Running init sequence 1\n");
+ change_rdimm_mpr_pattern(node, rank_mask, ddr_interface_num, dimm_count);
+ }
+
+#define DEFAULT_INTERNAL_VREF_TRAINING_LIMIT 5
+ int internal_retries = 0;
+ int deskew_training_errors;
+ int dac_eval_retries;
+ int dac_settings[9];
+ int num_samples;
+ int sample, lane;
+ int last_lane = ((ddr_interface_64b) ? 8 : 4) + use_ecc;
+
+#define DEFAULT_DAC_SAMPLES 7 // originally was 5
+#define DAC_RETRIES_LIMIT 2
+
+ typedef struct {
+ int16_t bytes[DEFAULT_DAC_SAMPLES];
+ } bytelane_sample_t;
+ bytelane_sample_t lanes[9];
+
+ memset(lanes, 0, sizeof(lanes));
+
+ if ((ddr_type == DDR4_DRAM) && !CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X)) {
+ num_samples = DEFAULT_DAC_SAMPLES;
+ } else {
+ num_samples = 1; // if DDR3 or no ability to write DAC values
+ }
+
+ perform_internal_vref_training:
+
+ for (sample = 0; sample < num_samples; sample++) {
+
+ dac_eval_retries = 0;
+
+ do { // make offset and internal vref training repeatable
+
+ /* 6.9.8 LMC Offset Training
+ LMC requires input-receiver offset training. */
+ Perform_Offset_Training(node, rank_mask, ddr_interface_num);
+
+ /* 6.9.9 LMC Internal Vref Training
+ LMC requires input-reference-voltage training. */
+ Perform_Internal_VREF_Training(node, rank_mask, ddr_interface_num);
+
+ // read and maybe display the DAC values for a sample
+ read_DAC_DBI_settings(node, ddr_interface_num, /*DAC*/1, dac_settings);
+ if ((num_samples == 1) || dram_is_verbose(VBL_DEV)) {
+ display_DAC_DBI_settings(node, ddr_interface_num, /*DAC*/1, use_ecc,
+ dac_settings, "Internal VREF");
+ }
+
+ // for DDR4, evaluate the DAC settings and retry if any issues
+ if (ddr_type == DDR4_DRAM) {
+ if (evaluate_DAC_settings(ddr_interface_64b, use_ecc, dac_settings)) {
+ if (++dac_eval_retries > DAC_RETRIES_LIMIT) {
+ ddr_print("N%d.LMC%d: DDR4 internal VREF DAC settings: retries exhausted; continuing...\n",
+ node, ddr_interface_num);
+ } else {
+ ddr_print("N%d.LMC%d: DDR4 internal VREF DAC settings inconsistent; retrying....\n",
+ node, ddr_interface_num); // FIXME? verbosity!!!
+ continue;
+ }
+ }
+ if (num_samples > 1) { // taking multiple samples, otherwise do nothing
+ // good sample or exhausted retries, record it
+ for (lane = 0; lane < last_lane; lane++) {
+ lanes[lane].bytes[sample] = dac_settings[lane];
+ }
+ }
+ }
+ break; // done if DDR3, or good sample, or exhausted retries
+
+ } while (1);
+
+ } /* for (sample = 0; sample < num_samples; sample++) */
+
+ if (num_samples > 1) {
+ debug_print("N%d.LMC%d: DDR4 internal VREF DAC settings: processing multiple samples...\n",
+ node, ddr_interface_num);
+
+ for (lane = 0; lane < last_lane; lane++) {
+ dac_settings[lane] = process_samples_average(&lanes[lane].bytes[0], num_samples,
+ ddr_interface_num, lane);
+ }
+ display_DAC_DBI_settings(node, ddr_interface_num, /*DAC*/1, use_ecc, dac_settings, "Averaged VREF");
+
+ // finally, write the final DAC values
+ for (lane = 0; lane < last_lane; lane++) {
+ load_dac_override(node, ddr_interface_num, dac_settings[lane], lane);
+ }
+ }
+
+#if DAC_OVERRIDE_EARLY
+ // as a second step, after internal VREF training, before starting deskew training:
+ // for DDR3 and THUNDER pass 2.x, override the DAC setting to 127
+ if ((ddr_type == DDR3_DRAM) && !CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X)) { // added 81xx and 83xx
+ load_dac_override(node, ddr_interface_num, 127, /* all */0x0A);
+ ddr_print("N%d.LMC%d: Overriding DDR3 internal VREF DAC settings to 127 (early).\n",
+ node, ddr_interface_num);
+ }
+#endif
+
+ /*
+ * 6.9.10 LMC Read Deskew Training
+ * LMC requires input-read-data deskew training.
+ */
+ if (! disable_deskew_training) {
+
+ deskew_training_errors = Perform_Read_Deskew_Training(node, rank_mask, ddr_interface_num,
+ spd_rawcard_AorB, 0, ddr_interface_64b);
+
+ // All the Deskew lock and saturation retries (may) have been done,
+ // but we ended up with nibble errors; so, as a last ditch effort,
+ // enable retries of the Internal Vref Training...
+ if (deskew_training_errors) {
+ if (internal_retries < DEFAULT_INTERNAL_VREF_TRAINING_LIMIT) {
+ internal_retries++;
+ VB_PRT(VBL_FAE, "N%d.LMC%d: Deskew training results still unsettled - retrying internal Vref training (%d)\n",
+ node, ddr_interface_num, internal_retries);
+ goto perform_internal_vref_training;
+ } else {
+ VB_PRT(VBL_FAE, "N%d.LMC%d: Deskew training incomplete - %d retries exhausted, but continuing...\n",
+ node, ddr_interface_num, internal_retries);
+ }
+ }
+
+ // FIXME: treat this as the final DSK print from now on, and print if VBL_NORM or above
+ // also, save the results of the original training
+ Validate_Read_Deskew_Training(node, rank_mask, ddr_interface_num, &deskew_training_results, VBL_NORM);
+
+ // setup write bit-deskew if enabled...
+ if (enable_write_deskew) {
+ ddr_print("N%d.LMC%d: WRITE BIT-DESKEW feature enabled- going NEUTRAL.\n",
+ node, ddr_interface_num);
+ Neutral_Write_Deskew_Setup(node, ddr_interface_num);
+ } /* if (enable_write_deskew) */
+
+ } /* if (! disable_deskew_training) */
+
+#if !DAC_OVERRIDE_EARLY
+ // as a final step in internal VREF training, after deskew training but before HW WL:
+ // for DDR3 and THUNDER pass 2.x, override the DAC setting to 127
+ if ((ddr_type == DDR3_DRAM) && !CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X)) { // added 81xx and 83xx
+ load_dac_override(node, ddr_interface_num, 127, /* all */0x0A);
+ ddr_print("N%d.LMC%d, Overriding DDR3 internal VREF DAC settings to 127 (late).\n",
+ node, ddr_interface_num);
+ }
+#endif
+
+
+ /* LMC(0)_EXT_CONFIG */
+ {
+ bdk_lmcx_ext_config_t ext_config;
+ ext_config.u = BDK_CSR_READ(node, BDK_LMCX_EXT_CONFIG(ddr_interface_num));
+ ext_config.s.vrefint_seq_deskew = 0;
+ ext_config.s.read_ena_bprch = 1;
+ ext_config.s.read_ena_fprch = 1;
+ ext_config.s.drive_ena_fprch = 1;
+ ext_config.s.drive_ena_bprch = 1;
+ ext_config.s.invert_data = 0; // make sure this is OFF for all current chips
+
+ if ((s = lookup_env_parameter("ddr_read_fprch")) != NULL) {
+ ext_config.s.read_ena_fprch = strtoul(s, NULL, 0);
+ }
+ if ((s = lookup_env_parameter("ddr_read_bprch")) != NULL) {
+ ext_config.s.read_ena_bprch = strtoul(s, NULL, 0);
+ }
+ if ((s = lookup_env_parameter("ddr_drive_fprch")) != NULL) {
+ ext_config.s.drive_ena_fprch = strtoul(s, NULL, 0);
+ }
+ if ((s = lookup_env_parameter("ddr_drive_bprch")) != NULL) {
+ ext_config.s.drive_ena_bprch = strtoul(s, NULL, 0);
+ }
+
+ if (!CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X) && (lranks_per_prank > 1)) {
+ ext_config.s.dimm0_cid = ext_config.s.dimm1_cid = lranks_bits;
+ ddr_print("N%d.LMC%d: 3DS: setting EXT_CONFIG[dimmx_cid] = %d\n",
+ node, ddr_interface_num, ext_config.s.dimm0_cid);
+ }
+
+ DRAM_CSR_WRITE(node, BDK_LMCX_EXT_CONFIG(ddr_interface_num), ext_config.u);
+ ddr_print("%-45s : 0x%016lx\n", "EXT_CONFIG", ext_config.u);
+ }
+
+
+ {
+ int save_ref_zqcs_int;
+ uint64_t temp_delay_usecs;
+
+ lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(ddr_interface_num));
+
+ /* Temporarily select the minimum ZQCS interval and wait
+ long enough for a few ZQCS calibrations to occur. This
+ should ensure that the calibration circuitry is
+ stabilized before read/write leveling occurs. */
+ save_ref_zqcs_int = lmc_config.s.ref_zqcs_int;
+ lmc_config.s.ref_zqcs_int = 1 | (32<<7); /* set smallest interval */
+
+ DRAM_CSR_WRITE(node, BDK_LMCX_CONFIG(ddr_interface_num), lmc_config.u);
+ BDK_CSR_READ(node, BDK_LMCX_CONFIG(ddr_interface_num));
+
+ /* Compute an appropriate delay based on the current ZQCS
+ interval. The delay should be long enough for the
+ current ZQCS delay counter to expire plus ten of the
+ minimum intarvals to ensure that some calibrations
+ occur. */
+ temp_delay_usecs = (((uint64_t)save_ref_zqcs_int >> 7)
+ * tclk_psecs * 100 * 512 * 128) / (10000*10000)
+ + 10 * ((uint64_t)32 * tclk_psecs * 100 * 512 * 128) / (10000*10000);
+
+ VB_PRT(VBL_FAE, "N%d.LMC%d: Waiting %ld usecs for ZQCS calibrations to start\n",
+ node, ddr_interface_num, temp_delay_usecs);
+ bdk_wait_usec(temp_delay_usecs);
+
+ lmc_config.s.ref_zqcs_int = save_ref_zqcs_int; /* Restore computed interval */
+
+ DRAM_CSR_WRITE(node, BDK_LMCX_CONFIG(ddr_interface_num), lmc_config.u);
+ BDK_CSR_READ(node, BDK_LMCX_CONFIG(ddr_interface_num));
+ }
+
+ /*
+ * 6.9.11 LMC Write Leveling
+ *
+ * LMC supports an automatic write leveling like that described in the
+ * JEDEC DDR3 specifications separately per byte-lane.
+ *
+ * All of DDR PLL, LMC CK, LMC DRESET, and early LMC initializations must
+ * be completed prior to starting this LMC write-leveling sequence.
+ *
+ * There are many possible procedures that will write-level all the
+ * attached DDR3 DRAM parts. One possibility is for software to simply
+ * write the desired values into LMC(0)_WLEVEL_RANK(0..3). This section
+ * describes one possible sequence that uses LMC's autowrite-leveling
+ * capabilities.
+ *
+ * 1. If the DQS/DQ delays on the board may be more than the ADD/CMD
+ * delays, then ensure that LMC(0)_CONFIG[EARLY_DQX] is set at this
+ * point.
+ *
+ * Do the remaining steps 2-7 separately for each rank i with attached
+ * DRAM.
+ *
+ * 2. Write LMC(0)_WLEVEL_RANKi = 0.
+ *
+ * 3. For ×8 parts:
+ *
+ * Without changing any other fields in LMC(0)_WLEVEL_CTL, write
+ * LMC(0)_WLEVEL_CTL[LANEMASK] to select all byte lanes with attached
+ * DRAM.
+ *
+ * For ×16 parts:
+ *
+ * Without changing any other fields in LMC(0)_WLEVEL_CTL, write
+ * LMC(0)_WLEVEL_CTL[LANEMASK] to select all even byte lanes with
+ * attached DRAM.
+ *
+ * 4. Without changing any other fields in LMC(0)_CONFIG,
+ *
+ * o write LMC(0)_SEQ_CTL[SEQ_SEL] to select write-leveling
+ *
+ * o write LMC(0)_CONFIG[RANKMASK] = (1 << i)
+ *
+ * o write LMC(0)_SEQ_CTL[INIT_START] = 1
+ *
+ * LMC will initiate write-leveling at this point. Assuming
+ * LMC(0)_WLEVEL_CTL [SSET] = 0, LMC first enables write-leveling on
+ * the selected DRAM rank via a DDR3 MR1 write, then sequences through
+ * and accumulates write-leveling results for eight different delay
+ * settings twice, starting at a delay of zero in this case since
+ * LMC(0)_WLEVEL_RANKi[BYTE*<4:3>] = 0, increasing by 1/8 CK each
+ * setting, covering a total distance of one CK, then disables the
+ * write-leveling via another DDR3 MR1 write.
+ *
+ * After the sequence through 16 delay settings is complete:
+ *
+ * o LMC sets LMC(0)_WLEVEL_RANKi[STATUS] = 3
+ *
+ * o LMC sets LMC(0)_WLEVEL_RANKi[BYTE*<2:0>] (for all ranks selected
+ * by LMC(0)_WLEVEL_CTL[LANEMASK]) to indicate the first write
+ * leveling result of 1 that followed result of 0 during the
+ * sequence, except that the LMC always writes
+ * LMC(0)_WLEVEL_RANKi[BYTE*<0>]=0.
+ *
+ * o Software can read the eight write-leveling results from the first
+ * pass through the delay settings by reading
+ * LMC(0)_WLEVEL_DBG[BITMASK] (after writing
+ * LMC(0)_WLEVEL_DBG[BYTE]). (LMC does not retain the writeleveling
+ * results from the second pass through the eight delay
+ * settings. They should often be identical to the
+ * LMC(0)_WLEVEL_DBG[BITMASK] results, though.)
+ *
+ * 5. Wait until LMC(0)_WLEVEL_RANKi[STATUS] != 2.
+ *
+ * LMC will have updated LMC(0)_WLEVEL_RANKi[BYTE*<2:0>] for all byte
+ * lanes selected by LMC(0)_WLEVEL_CTL[LANEMASK] at this point.
+ * LMC(0)_WLEVEL_RANKi[BYTE*<4:3>] will still be the value that
+ * software wrote in substep 2 above, which is 0.
+ *
+ * 6. For ×16 parts:
+ *
+ * Without changing any other fields in LMC(0)_WLEVEL_CTL, write
+ * LMC(0)_WLEVEL_CTL[LANEMASK] to select all odd byte lanes with
+ * attached DRAM.
+ *
+ * Repeat substeps 4 and 5 with this new LMC(0)_WLEVEL_CTL[LANEMASK]
+ * setting. Skip to substep 7 if this has already been done.
+ *
+ * For ×8 parts:
+ *
+ * Skip this substep. Go to substep 7.
+ *
+ * 7. Calculate LMC(0)_WLEVEL_RANKi[BYTE*<4:3>] settings for all byte
+ * lanes on all ranks with attached DRAM.
+ *
+ * At this point, all byte lanes on rank i with attached DRAM should
+ * have been write-leveled, and LMC(0)_WLEVEL_RANKi[BYTE*<2:0>] has
+ * the result for each byte lane.
+ *
+ * But note that the DDR3 write-leveling sequence will only determine
+ * the delay modulo the CK cycle time, and cannot determine how many
+ * additional CK cycles of delay are present. Software must calculate
+ * the number of CK cycles, or equivalently, the
+ * LMC(0)_WLEVEL_RANKi[BYTE*<4:3>] settings.
+ *
+ * This BYTE*<4:3> calculation is system/board specific.
+ *
+ * Many techniques can be used to calculate write-leveling BYTE*<4:3> values,
+ * including:
+ *
+ * o Known values for some byte lanes.
+ *
+ * o Relative values for some byte lanes relative to others.
+ *
+ * For example, suppose lane X is likely to require a larger
+ * write-leveling delay than lane Y. A BYTEX<2:0> value that is much
+ * smaller than the BYTEY<2:0> value may then indicate that the
+ * required lane X delay wrapped into the next CK, so BYTEX<4:3>
+ * should be set to BYTEY<4:3>+1.
+ *
+ * When ECC DRAM is not present (i.e. when DRAM is not attached to the
+ * DDR_CBS_0_* and DDR_CB<7:0> chip signals, or the DDR_DQS_<4>_* and
+ * DDR_DQ<35:32> chip signals), write LMC(0)_WLEVEL_RANK*[BYTE8] =
+ * LMC(0)_WLEVEL_RANK*[BYTE0], using the final calculated BYTE0 value.
+ * Write LMC(0)_WLEVEL_RANK*[BYTE4] = LMC(0)_WLEVEL_RANK*[BYTE0],
+ * using the final calculated BYTE0 value.
+ *
+ * 8. Initialize LMC(0)_WLEVEL_RANK* values for all unused ranks.
+ *
+ * Let rank i be a rank with attached DRAM.
+ *
+ * For all ranks j that do not have attached DRAM, set
+ * LMC(0)_WLEVEL_RANKj = LMC(0)_WLEVEL_RANKi.
+ */
+ { // Start HW write-leveling block
+#pragma pack(push,1)
+ bdk_lmcx_wlevel_ctl_t wlevel_ctl;
+ bdk_lmcx_wlevel_rankx_t lmc_wlevel_rank;
+ int rankx = 0;
+ int wlevel_bitmask[9];
+ int byte_idx;
+ int ecc_ena;
+ int ddr_wlevel_roundup = 0;
+ int ddr_wlevel_printall = (dram_is_verbose(VBL_FAE)); // or default to 1 to print all HW WL samples
+ int disable_hwl_validity = 0;
+ int default_wlevel_rtt_nom;
+#if WODT_MASK_2R_1S
+ uint64_t saved_wodt_mask = 0;
+#endif
+#pragma pack(pop)
+
+ if (wlevel_loops)
+ ddr_print("N%d.LMC%d: Performing Hardware Write-Leveling\n", node, ddr_interface_num);
+ else {
+ wlevel_bitmask_errors = 1; /* Force software write-leveling to run */
+ ddr_print("N%d.LMC%d: Forcing software Write-Leveling\n", node, ddr_interface_num);
+ }
+
+ default_wlevel_rtt_nom = (ddr_type == DDR3_DRAM) ? rttnom_20ohm : ddr4_rttnom_40ohm ; /* FIXME? */
+
+#if WODT_MASK_2R_1S
+ if ((ddr_type == DDR4_DRAM) && (num_ranks == 2) && (dimm_count == 1)) {
+ /* LMC(0)_WODT_MASK */
+ bdk_lmcx_wodt_mask_t lmc_wodt_mask;
+ // always save original so we can always restore later
+ saved_wodt_mask = BDK_CSR_READ(node, BDK_LMCX_WODT_MASK(ddr_interface_num));
+ if ((s = lookup_env_parameter_ull("ddr_hwl_wodt_mask")) != NULL) {
+ lmc_wodt_mask.u = strtoull(s, NULL, 0);
+ if (lmc_wodt_mask.u != saved_wodt_mask) { // print/store only when diff
+ ddr_print("WODT_MASK : 0x%016lx\n", lmc_wodt_mask.u);
+ DRAM_CSR_WRITE(node, BDK_LMCX_WODT_MASK(ddr_interface_num), lmc_wodt_mask.u);
+ }
+ }
+ }
+#endif /* WODT_MASK_2R_1S */
+
+ lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(ddr_interface_num));
+ ecc_ena = lmc_config.s.ecc_ena;
+
+ if ((s = lookup_env_parameter("ddr_wlevel_roundup")) != NULL) {
+ ddr_wlevel_roundup = strtoul(s, NULL, 0);
+ }
+ if ((s = lookup_env_parameter("ddr_wlevel_printall")) != NULL) {
+ ddr_wlevel_printall = strtoul(s, NULL, 0);
+ }
+
+ if ((s = lookup_env_parameter("ddr_disable_hwl_validity")) != NULL) {
+ disable_hwl_validity = !!strtoul(s, NULL, 0);
+ }
+
+ if ((s = lookup_env_parameter("ddr_wlevel_rtt_nom")) != NULL) {
+ default_wlevel_rtt_nom = strtoul(s, NULL, 0);
+ }
+
+ // For DDR3, we leave the WLEVEL_CTL fields at default settings
+ // For DDR4, we touch WLEVEL_CTL fields OR_DIS or BITMASK here
+ if (ddr_type == DDR4_DRAM) {
+ int default_or_dis = 1;
+ int default_bitmask = 0xFF;
+
+ // when x4, use only the lower nibble bits
+ if (dram_width == 4) {
+ default_bitmask = 0x0F;
+ VB_PRT(VBL_DEV, "N%d.LMC%d: WLEVEL_CTL: default bitmask is 0x%2x for DDR4 x4\n",
+ node, ddr_interface_num, default_bitmask);
+ }
+
+ wlevel_ctl.u = BDK_CSR_READ(node, BDK_LMCX_WLEVEL_CTL(ddr_interface_num));
+ wlevel_ctl.s.or_dis = default_or_dis;
+ wlevel_ctl.s.bitmask = default_bitmask;
+
+ // allow overrides
+ if ((s = lookup_env_parameter("ddr_wlevel_ctl_or_dis")) != NULL) {
+ wlevel_ctl.s.or_dis = !!strtoul(s, NULL, 0);
+ }
+ if ((s = lookup_env_parameter("ddr_wlevel_ctl_bitmask")) != NULL) {
+ wlevel_ctl.s.bitmask = strtoul(s, NULL, 0);
+ }
+
+ // print only if not defaults
+ if ((wlevel_ctl.s.or_dis != default_or_dis) || (wlevel_ctl.s.bitmask != default_bitmask)) {
+ ddr_print("N%d.LMC%d: WLEVEL_CTL: or_dis=%d, bitmask=0x%02x\n",
+ node, ddr_interface_num, wlevel_ctl.s.or_dis, wlevel_ctl.s.bitmask);
+ }
+ // always write
+ DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_CTL(ddr_interface_num), wlevel_ctl.u);
+ }
+
+ // Start the hardware write-leveling loop per rank
+ for (rankx = 0; rankx < dimm_count * 4; rankx++) {
+
+ if (!(rank_mask & (1 << rankx)))
+ continue;
+
+#if HW_WL_MAJORITY
+ // array to collect counts of byte-lane values
+ // assume low-order 3 bits and even, so really only 2 bit values
+ int wlevel_bytes[9][4];
+ memset(wlevel_bytes, 0, sizeof(wlevel_bytes));
+#endif
+
+ // restructure the looping so we can keep trying until we get the samples we want
+ //for (int wloop = 0; wloop < wlevel_loops; wloop++) {
+ int wloop = 0;
+ int wloop_retries = 0; // retries per sample for HW-related issues with bitmasks or values
+ int wloop_retries_total = 0;
+ int wloop_retries_exhausted = 0;
+#define WLOOP_RETRIES_DEFAULT 5
+ int wlevel_validity_errors;
+ int wlevel_bitmask_errors_rank = 0;
+ int wlevel_validity_errors_rank = 0;
+
+ while (wloop < wlevel_loops) {
+
+ wlevel_ctl.u = BDK_CSR_READ(node, BDK_LMCX_WLEVEL_CTL(ddr_interface_num));
+
+ wlevel_ctl.s.rtt_nom = (default_wlevel_rtt_nom > 0) ? (default_wlevel_rtt_nom - 1) : 7;
+
+
+ DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx), 0); /* Clear write-level delays */
+
+ wlevel_bitmask_errors = 0; /* Reset error counters */
+ wlevel_validity_errors = 0;
+
+ for (byte_idx=0; byte_idx<9; ++byte_idx) {
+ wlevel_bitmask[byte_idx] = 0; /* Reset bitmasks */
+ }
+
+#if HWL_BY_BYTE // FIXME???
+ /* Make a separate pass for each byte to reduce power. */
+ for (byte_idx=0; byte_idx<(8+ecc_ena); ++byte_idx) {
+
+ if (!(ddr_interface_bytemask&(1<<byte_idx)))
+ continue;
+
+ wlevel_ctl.s.lanemask = (1<<byte_idx);
+
+ DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_CTL(ddr_interface_num), wlevel_ctl.u);
+
+ /* Read and write values back in order to update the
+ status field. This insures that we read the updated
+ values after write-leveling has completed. */
+ DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx),
+ BDK_CSR_READ(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx)));
+
+ perform_octeon3_ddr3_sequence(node, 1 << rankx, ddr_interface_num, 6); /* write-leveling */
+
+ if (!bdk_is_platform(BDK_PLATFORM_ASIM) &&
+ BDK_CSR_WAIT_FOR_FIELD(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx),
+ status, ==, 3, 1000000))
+ {
+ error_print("ERROR: Timeout waiting for WLEVEL\n");
+ }
+ lmc_wlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx));
+
+ wlevel_bitmask[byte_idx] = octeon_read_lmcx_ddr3_wlevel_dbg(node, ddr_interface_num, byte_idx);
+ if (wlevel_bitmask[byte_idx] == 0)
+ ++wlevel_bitmask_errors;
+ } /* for (byte_idx=0; byte_idx<(8+ecc_ena); ++byte_idx) */
+
+ wlevel_ctl.s.lanemask = /*0x1ff*/ddr_interface_bytemask; // restore for RL
+ DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_CTL(ddr_interface_num), wlevel_ctl.u);
+#else
+ // do all the byte-lanes at the same time
+ wlevel_ctl.s.lanemask = /*0x1ff*/ddr_interface_bytemask; // FIXME?
+
+ DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_CTL(ddr_interface_num), wlevel_ctl.u);
+
+ /* Read and write values back in order to update the
+ status field. This insures that we read the updated
+ values after write-leveling has completed. */
+ DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx),
+ BDK_CSR_READ(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx)));
+
+ perform_octeon3_ddr3_sequence(node, 1 << rankx, ddr_interface_num, 6); /* write-leveling */
+
+ if (!bdk_is_platform(BDK_PLATFORM_ASIM) &&
+ BDK_CSR_WAIT_FOR_FIELD(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx),
+ status, ==, 3, 1000000))
+ {
+ error_print("ERROR: Timeout waiting for WLEVEL\n");
+ }
+
+ lmc_wlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx));
+
+ for (byte_idx=0; byte_idx<(8+ecc_ena); ++byte_idx) {
+ if (!(ddr_interface_bytemask&(1<<byte_idx)))
+ continue;
+ wlevel_bitmask[byte_idx] = octeon_read_lmcx_ddr3_wlevel_dbg(node, ddr_interface_num, byte_idx);
+ if (wlevel_bitmask[byte_idx] == 0)
+ ++wlevel_bitmask_errors;
+ } /* for (byte_idx=0; byte_idx<(8+ecc_ena); ++byte_idx) */
+#endif
+
+ // check validity only if no bitmask errors
+ if (wlevel_bitmask_errors == 0) {
+ if ((spd_dimm_type != 5) &&
+ (spd_dimm_type != 6) &&
+ (spd_dimm_type != 8) &&
+ (spd_dimm_type != 9) &&
+ (dram_width != 16) &&
+ (ddr_interface_64b) &&
+ !(disable_hwl_validity))
+ { // bypass if mini-[RU]DIMM or x16 or 32-bit or SO-[RU]DIMM
+ wlevel_validity_errors =
+ Validate_HW_WL_Settings(node, ddr_interface_num,
+ &lmc_wlevel_rank, ecc_ena);
+ wlevel_validity_errors_rank += (wlevel_validity_errors != 0);
+ }
+ } else
+ wlevel_bitmask_errors_rank++;
+
+ // before we print, if we had bitmask or validity errors, do a retry...
+ if ((wlevel_bitmask_errors != 0) || (wlevel_validity_errors != 0)) {
+ // VBL must be high to show the bad bitmaps or delays here also
+ if (dram_is_verbose(VBL_DEV2)) {
+ display_WL_BM(node, ddr_interface_num, rankx, wlevel_bitmask);
+ display_WL(node, ddr_interface_num, lmc_wlevel_rank, rankx);
+ }
+ if (wloop_retries < WLOOP_RETRIES_DEFAULT) {
+ wloop_retries++;
+ wloop_retries_total++;
+ // this printout is per-retry: only when VBL is high enough (DEV2?)
+ VB_PRT(VBL_DEV2, "N%d.LMC%d.R%d: H/W Write-Leveling had %s errors - retrying...\n",
+ node, ddr_interface_num, rankx,
+ (wlevel_bitmask_errors) ? "Bitmask" : "Validity");
+ continue; // this takes us back to the top without counting a sample
+ } else { // ran out of retries for this sample
+ // retries exhausted, do not print at normal VBL
+ VB_PRT(VBL_DEV2, "N%d.LMC%d.R%d: H/W Write-Leveling issues: %s errors\n",
+ node, ddr_interface_num, rankx,
+ (wlevel_bitmask_errors) ? "Bitmask" : "Validity");
+ wloop_retries_exhausted++;
+ }
+ }
+ // no errors or exhausted retries, use this sample
+ wloop_retries = 0; //reset for next sample
+
+ // when only 1 sample or forced, print the bitmasks first and current HW WL
+ if ((wlevel_loops == 1) || ddr_wlevel_printall) {
+ display_WL_BM(node, ddr_interface_num, rankx, wlevel_bitmask);
+ display_WL(node, ddr_interface_num, lmc_wlevel_rank, rankx);
+ }
+
+ if (ddr_wlevel_roundup) { /* Round up odd bitmask delays */
+ for (byte_idx=0; byte_idx<(8+ecc_ena); ++byte_idx) {
+ if (!(ddr_interface_bytemask&(1<<byte_idx)))
+ continue;
+ update_wlevel_rank_struct(&lmc_wlevel_rank,
+ byte_idx,
+ roundup_ddr3_wlevel_bitmask(wlevel_bitmask[byte_idx]));
+ } /* for (byte_idx=0; byte_idx<(8+ecc_ena); ++byte_idx) */
+ DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx), lmc_wlevel_rank.u);
+ display_WL(node, ddr_interface_num, lmc_wlevel_rank, rankx);
+ }
+
+#if HW_WL_MAJORITY
+ // OK, we have a decent sample, no bitmask or validity errors
+ for (byte_idx=0; byte_idx<(8+ecc_ena); ++byte_idx) {
+ if (!(ddr_interface_bytemask&(1<<byte_idx)))
+ continue;
+ // increment count of byte-lane value
+ int ix = (get_wlevel_rank_struct(&lmc_wlevel_rank, byte_idx) >> 1) & 3; // only 4 values
+ wlevel_bytes[byte_idx][ix]++;
+ } /* for (byte_idx=0; byte_idx<(8+ecc_ena); ++byte_idx) */
+#endif
+
+ wloop++; // if we get here, we have taken a decent sample
+
+ } /* while (wloop < wlevel_loops) */
+
+#if HW_WL_MAJORITY
+ // if we did sample more than once, try to pick a majority vote
+ if (wlevel_loops > 1) {
+ // look for the majority in each byte-lane
+ for (byte_idx = 0; byte_idx < (8+ecc_ena); ++byte_idx) {
+ int mx = -1, mc = 0, xc = 0, cc = 0;
+ int ix, ic;
+ if (!(ddr_interface_bytemask&(1<<byte_idx)))
+ continue;
+ for (ix = 0; ix < 4; ix++) {
+ ic = wlevel_bytes[byte_idx][ix];
+ // make a bitmask of the ones with a count
+ if (ic > 0) {
+ mc |= (1 << ix);
+ cc++; // count how many had non-zero counts
+ }
+ // find the majority
+ if (ic > xc) { // new max?
+ xc = ic; // yes
+ mx = ix; // set its index
+ }
+ }
+#if SWL_TRY_HWL_ALT
+ // see if there was an alternate
+ int alts = (mc & ~(1 << mx)); // take out the majority choice
+ if (alts != 0) {
+ for (ix = 0; ix < 4; ix++) {
+ if (alts & (1 << ix)) { // FIXME: could be done multiple times? bad if so
+ hwl_alts[rankx].hwl_alt_mask |= (1 << byte_idx); // set the mask
+ hwl_alts[rankx].hwl_alt_delay[byte_idx] = ix << 1; // record the value
+ VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: SWL_TRY_HWL_ALT: Byte %d maj %d (%d) alt %d (%d).\n",
+ node, ddr_interface_num, rankx, byte_idx, mx << 1, xc,
+ ix << 1, wlevel_bytes[byte_idx][ix]);
+ }
+ }
+ } else {
+ debug_print("N%d.LMC%d.R%d: SWL_TRY_HWL_ALT: Byte %d maj %d alt NONE.\n",
+ node, ddr_interface_num, rankx, byte_idx, mx << 1);
+ }
+#endif /* SWL_TRY_HWL_ALT */
+ if (cc > 2) { // unlikely, but...
+ // assume: counts for 3 indices are all 1
+ // possiblities are: 0/2/4, 2/4/6, 0/4/6, 0/2/6
+ // and the desired?: 2 , 4 , 6, 0
+ // we choose the middle, assuming one of the outliers is bad
+ // NOTE: this is an ugly hack at the moment; there must be a better way
+ switch (mc) {
+ case 0x7: mx = 1; break; // was 0/2/4, choose 2
+ case 0xb: mx = 0; break; // was 0/2/6, choose 0
+ case 0xd: mx = 3; break; // was 0/4/6, choose 6
+ case 0xe: mx = 2; break; // was 2/4/6, choose 4
+ default:
+ case 0xf: mx = 1; break; // was 0/2/4/6, choose 2?
+ }
+ error_print("N%d.LMC%d.R%d: HW WL MAJORITY: bad byte-lane %d (0x%x), using %d.\n",
+ node, ddr_interface_num, rankx, byte_idx, mc, mx << 1);
+ }
+ update_wlevel_rank_struct(&lmc_wlevel_rank, byte_idx, mx << 1);
+ } /* for (byte_idx=0; byte_idx<(8+ecc_ena); ++byte_idx) */
+
+ DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx), lmc_wlevel_rank.u);
+ display_WL_with_final(node, ddr_interface_num, lmc_wlevel_rank, rankx);
+ } /* if (wlevel_loops > 1) */
+#endif /* HW_WL_MAJORITY */
+ // maybe print an error summary for the rank
+ if ((wlevel_bitmask_errors_rank != 0) || (wlevel_validity_errors_rank != 0)) {
+ VB_PRT(VBL_FAE, "N%d.LMC%d.R%d: H/W Write-Leveling errors - %d bitmask, %d validity, %d retries, %d exhausted\n",
+ node, ddr_interface_num, rankx,
+ wlevel_bitmask_errors_rank, wlevel_validity_errors_rank,
+ wloop_retries_total, wloop_retries_exhausted);
+ }
+
+ } /* for (rankx = 0; rankx < dimm_count * 4;rankx++) */
+
+#if WODT_MASK_2R_1S
+ if ((ddr_type == DDR4_DRAM) && (num_ranks == 2) && (dimm_count == 1)) {
+ /* LMC(0)_WODT_MASK */
+ bdk_lmcx_wodt_mask_t lmc_wodt_mask;
+ // always read current so we can see if its different from saved
+ lmc_wodt_mask.u = BDK_CSR_READ(node, BDK_LMCX_WODT_MASK(ddr_interface_num));
+ if (lmc_wodt_mask.u != saved_wodt_mask) { // always restore what was saved if diff
+ lmc_wodt_mask.u = saved_wodt_mask;
+ ddr_print("WODT_MASK : 0x%016lx\n", lmc_wodt_mask.u);
+ DRAM_CSR_WRITE(node, BDK_LMCX_WODT_MASK(ddr_interface_num), lmc_wodt_mask.u);
+ }
+ }
+#endif /* WODT_MASK_2R_1S */
+
+ } // End HW write-leveling block
+
+ // At the end of HW Write Leveling, check on some things...
+ if (! disable_deskew_training) {
+
+ deskew_counts_t dsk_counts;
+ int retry_count = 0;
+
+ VB_PRT(VBL_FAE, "N%d.LMC%d: Check Deskew Settings before Read-Leveling.\n", node, ddr_interface_num);
+
+ do {
+ Validate_Read_Deskew_Training(node, rank_mask, ddr_interface_num, &dsk_counts, VBL_FAE);
+
+ // only RAWCARD A or B will not benefit from retraining if there's only saturation
+ // or any rawcard if there is a nibble error
+ if ((!spd_rawcard_AorB && dsk_counts.saturated > 0) ||
+ ((dsk_counts.nibrng_errs != 0) || (dsk_counts.nibunl_errs != 0)))
+ {
+ retry_count++;
+ VB_PRT(VBL_FAE, "N%d.LMC%d: Deskew Status indicates saturation or nibble errors - retry %d Training.\n",
+ node, ddr_interface_num, retry_count);
+ Perform_Read_Deskew_Training(node, rank_mask, ddr_interface_num,
+ spd_rawcard_AorB, 0, ddr_interface_64b);
+ } else
+ break;
+ } while (retry_count < 5);
+
+ // print the last setting only if we had to do retries here
+ if (retry_count > 0)
+ Validate_Read_Deskew_Training(node, rank_mask, ddr_interface_num, &dsk_counts, VBL_NORM);
+ }
+
+ /*
+ * 6.9.12 LMC Read Leveling
+ *
+ * LMC supports an automatic read-leveling separately per byte-lane using
+ * the DDR3 multipurpose register predefined pattern for system
+ * calibration defined in the JEDEC DDR3 specifications.
+ *
+ * All of DDR PLL, LMC CK, and LMC DRESET, and early LMC initializations
+ * must be completed prior to starting this LMC read-leveling sequence.
+ *
+ * Software could simply write the desired read-leveling values into
+ * LMC(0)_RLEVEL_RANK(0..3). This section describes a sequence that uses
+ * LMC's autoread-leveling capabilities.
+ *
+ * When LMC does the read-leveling sequence for a rank, it first enables
+ * the DDR3 multipurpose register predefined pattern for system
+ * calibration on the selected DRAM rank via a DDR3 MR3 write, then
+ * executes 64 RD operations at different internal delay settings, then
+ * disables the predefined pattern via another DDR3 MR3 write
+ * operation. LMC determines the pass or fail of each of the 64 settings
+ * independently for each byte lane, then writes appropriate
+ * LMC(0)_RLEVEL_RANK(0..3)[BYTE*] values for the rank.
+ *
+ * After read-leveling for a rank, software can read the 64 pass/fail
+ * indications for one byte lane via LMC(0)_RLEVEL_DBG[BITMASK]. Software
+ * can observe all pass/fail results for all byte lanes in a rank via
+ * separate read-leveling sequences on the rank with different
+ * LMC(0)_RLEVEL_CTL[BYTE] values.
+ *
+ * The 64 pass/fail results will typically have failures for the low
+ * delays, followed by a run of some passing settings, followed by more
+ * failures in the remaining high delays. LMC sets
+ * LMC(0)_RLEVEL_RANK(0..3)[BYTE*] to one of the passing settings.
+ * First, LMC selects the longest run of successes in the 64 results. (In
+ * the unlikely event that there is more than one longest run, LMC
+ * selects the first one.) Then if LMC(0)_RLEVEL_CTL[OFFSET_EN] = 1 and
+ * the selected run has more than LMC(0)_RLEVEL_CTL[OFFSET] successes,
+ * LMC selects the last passing setting in the run minus
+ * LMC(0)_RLEVEL_CTL[OFFSET]. Otherwise LMC selects the middle setting in
+ * the run (rounding earlier when necessary). We expect the read-leveling
+ * sequence to produce good results with the reset values
+ * LMC(0)_RLEVEL_CTL [OFFSET_EN]=1, LMC(0)_RLEVEL_CTL[OFFSET] = 2.
+ *
+ * The read-leveling sequence has the following steps:
+ *
+ * 1. Select desired LMC(0)_RLEVEL_CTL[OFFSET_EN,OFFSET,BYTE] settings.
+ * Do the remaining substeps 2-4 separately for each rank i with
+ * attached DRAM.
+ *
+ * 2. Without changing any other fields in LMC(0)_CONFIG,
+ *
+ * o write LMC(0)_SEQ_CTL[SEQ_SEL] to select read-leveling
+ *
+ * o write LMC(0)_CONFIG[RANKMASK] = (1 << i)
+ *
+ * o write LMC(0)_SEQ_CTL[INIT_START] = 1
+ *
+ * This initiates the previously-described read-leveling.
+ *
+ * 3. Wait until LMC(0)_RLEVEL_RANKi[STATUS] != 2
+ *
+ * LMC will have updated LMC(0)_RLEVEL_RANKi[BYTE*] for all byte lanes
+ * at this point.
+ *
+ * If ECC DRAM is not present (i.e. when DRAM is not attached to the
+ * DDR_CBS_0_* and DDR_CB<7:0> chip signals, or the DDR_DQS_<4>_* and
+ * DDR_DQ<35:32> chip signals), write LMC(0)_RLEVEL_RANK*[BYTE8] =
+ * LMC(0)_RLEVEL_RANK*[BYTE0]. Write LMC(0)_RLEVEL_RANK*[BYTE4] =
+ * LMC(0)_RLEVEL_RANK*[BYTE0].
+ *
+ * 4. If desired, consult LMC(0)_RLEVEL_DBG[BITMASK] and compare to
+ * LMC(0)_RLEVEL_RANKi[BYTE*] for the lane selected by
+ * LMC(0)_RLEVEL_CTL[BYTE]. If desired, modify LMC(0)_RLEVEL_CTL[BYTE]
+ * to a new value and repeat so that all BITMASKs can be observed.
+ *
+ * 5. Initialize LMC(0)_RLEVEL_RANK* values for all unused ranks.
+ *
+ * Let rank i be a rank with attached DRAM.
+ *
+ * For all ranks j that do not have attached DRAM, set
+ * LMC(0)_RLEVEL_RANKj = LMC(0)_RLEVEL_RANKi.
+ *
+ * This read-leveling sequence can help select the proper CN70XX ODT
+ * resistance value (LMC(0)_COMP_CTL2[RODT_CTL]). A hardware-generated
+ * LMC(0)_RLEVEL_RANKi[BYTEj] value (for a used byte lane j) that is
+ * drastically different from a neighboring LMC(0)_RLEVEL_RANKi[BYTEk]
+ * (for a used byte lane k) can indicate that the CN70XX ODT value is
+ * bad. It is possible to simultaneously optimize both
+ * LMC(0)_COMP_CTL2[RODT_CTL] and LMC(0)_RLEVEL_RANKn[BYTE*] values by
+ * performing this read-leveling sequence for several
+ * LMC(0)_COMP_CTL2[RODT_CTL] values and selecting the one with the best
+ * LMC(0)_RLEVEL_RANKn[BYTE*] profile for the ranks.
+ */
+
+ {
+#pragma pack(push,4)
+ bdk_lmcx_rlevel_rankx_t lmc_rlevel_rank;
+ bdk_lmcx_comp_ctl2_t lmc_comp_ctl2;
+ bdk_lmcx_rlevel_ctl_t rlevel_ctl;
+ bdk_lmcx_control_t lmc_control;
+ bdk_lmcx_modereg_params1_t lmc_modereg_params1;
+ unsigned char rodt_ctl;
+ unsigned char rankx = 0;
+ int rlevel_rodt_errors = 0;
+ unsigned char ecc_ena;
+ unsigned char rtt_nom;
+ unsigned char rtt_idx;
+ int min_rtt_nom_idx;
+ int max_rtt_nom_idx;
+ int min_rodt_ctl;
+ int max_rodt_ctl;
+ int rlevel_debug_loops = 1;
+ unsigned char save_ddr2t;
+ int rlevel_avg_loops;
+ int ddr_rlevel_compute;
+ int saved_ddr__ptune, saved_ddr__ntune, rlevel_comp_offset;
+ int saved_int_zqcs_dis = 0;
+ int disable_sequential_delay_check = 0;
+ int maximum_adjacent_rlevel_delay_increment = 0;
+ struct {
+ uint64_t setting;
+ int score;
+ } rlevel_scoreboard[RTT_NOM_OHMS_COUNT][RODT_OHMS_COUNT][4];
+ int print_nom_ohms;
+#if PERFECT_BITMASK_COUNTING
+ typedef struct {
+ uint8_t count[9][32]; // 8+ECC by 32 values
+ uint8_t total[9]; // 8+ECC
+ } rank_perfect_t;
+ rank_perfect_t rank_perfect_counts[4];
+#endif
+
+#pragma pack(pop)
+
+#if PERFECT_BITMASK_COUNTING
+ memset(rank_perfect_counts, 0, sizeof(rank_perfect_counts));
+#endif /* PERFECT_BITMASK_COUNTING */
+
+ lmc_control.u = BDK_CSR_READ(node, BDK_LMCX_CONTROL(ddr_interface_num));
+ save_ddr2t = lmc_control.s.ddr2t;
+
+ lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(ddr_interface_num));
+ ecc_ena = lmc_config.s.ecc_ena;
+
+#if 0
+ {
+ int save_ref_zqcs_int;
+ uint64_t temp_delay_usecs;
+
+ /* Temporarily select the minimum ZQCS interval and wait
+ long enough for a few ZQCS calibrations to occur. This
+ should ensure that the calibration circuitry is
+ stabilized before read-leveling occurs. */
+ save_ref_zqcs_int = lmc_config.s.ref_zqcs_int;
+ lmc_config.s.ref_zqcs_int = 1 | (32<<7); /* set smallest interval */
+ DRAM_CSR_WRITE(node, BDK_LMCX_CONFIG(ddr_interface_num), lmc_config.u);
+ BDK_CSR_READ(node, BDK_LMCX_CONFIG(ddr_interface_num));
+
+ /* Compute an appropriate delay based on the current ZQCS
+ interval. The delay should be long enough for the
+ current ZQCS delay counter to expire plus ten of the
+ minimum intarvals to ensure that some calibrations
+ occur. */
+ temp_delay_usecs = (((uint64_t)save_ref_zqcs_int >> 7)
+ * tclk_psecs * 100 * 512 * 128) / (10000*10000)
+ + 10 * ((uint64_t)32 * tclk_psecs * 100 * 512 * 128) / (10000*10000);
+
+ ddr_print ("Waiting %lu usecs for ZQCS calibrations to start\n",
+ temp_delay_usecs);
+ bdk_wait_usec(temp_delay_usecs);
+
+ lmc_config.s.ref_zqcs_int = save_ref_zqcs_int; /* Restore computed interval */
+ DRAM_CSR_WRITE(node, BDK_LMCX_CONFIG(ddr_interface_num), lmc_config.u);
+ BDK_CSR_READ(node, BDK_LMCX_CONFIG(ddr_interface_num));
+ }
+#endif
+
+ if ((s = lookup_env_parameter("ddr_rlevel_2t")) != NULL) {
+ lmc_control.s.ddr2t = strtoul(s, NULL, 0);
+ }
+
+ DRAM_CSR_WRITE(node, BDK_LMCX_CONTROL(ddr_interface_num), lmc_control.u);
+
+ ddr_print("N%d.LMC%d: Performing Read-Leveling\n", node, ddr_interface_num);
+
+ rlevel_ctl.u = BDK_CSR_READ(node, BDK_LMCX_RLEVEL_CTL(ddr_interface_num));
+
+ rlevel_avg_loops = custom_lmc_config->rlevel_average_loops;
+ if (rlevel_avg_loops == 0) {
+ rlevel_avg_loops = RLEVEL_AVG_LOOPS_DEFAULT;
+ if ((dimm_count == 1) || (num_ranks == 1)) // up the samples for these cases
+ rlevel_avg_loops = rlevel_avg_loops * 2 + 1;
+ }
+
+ ddr_rlevel_compute = custom_lmc_config->rlevel_compute;
+ rlevel_ctl.s.offset_en = custom_lmc_config->offset_en;
+ rlevel_ctl.s.offset = spd_rdimm
+ ? custom_lmc_config->offset_rdimm
+ : custom_lmc_config->offset_udimm;
+
+ rlevel_ctl.s.delay_unload_0 = 1; /* should normally be set */
+ rlevel_ctl.s.delay_unload_1 = 1; /* should normally be set */
+ rlevel_ctl.s.delay_unload_2 = 1; /* should normally be set */
+ rlevel_ctl.s.delay_unload_3 = 1; /* should normally be set */
+
+ rlevel_ctl.s.or_dis = 1; // default to get best bitmasks
+ if ((s = lookup_env_parameter("ddr_rlevel_or_dis")) != NULL) {
+ rlevel_ctl.s.or_dis = !!strtoul(s, NULL, 0);
+ }
+ rlevel_ctl.s.bitmask = 0xff; // should work in 32b mode also
+ if ((s = lookup_env_parameter("ddr_rlevel_ctl_bitmask")) != NULL) {
+ rlevel_ctl.s.bitmask = strtoul(s, NULL, 0);
+ }
+ debug_print("N%d.LMC%d: RLEVEL_CTL: or_dis=%d, bitmask=0x%02x\n",
+ node, ddr_interface_num,
+ rlevel_ctl.s.or_dis, rlevel_ctl.s.bitmask);
+
+ rlevel_comp_offset = spd_rdimm
+ ? custom_lmc_config->rlevel_comp_offset_rdimm
+ : custom_lmc_config->rlevel_comp_offset_udimm;
+
+ if ((s = lookup_env_parameter("ddr_rlevel_offset")) != NULL) {
+ rlevel_ctl.s.offset = strtoul(s, NULL, 0);
+ }
+
+ if ((s = lookup_env_parameter("ddr_rlevel_offset_en")) != NULL) {
+ rlevel_ctl.s.offset_en = strtoul(s, NULL, 0);
+ }
+ if ((s = lookup_env_parameter("ddr_rlevel_ctl")) != NULL) {
+ rlevel_ctl.u = strtoul(s, NULL, 0);
+ }
+
+ DRAM_CSR_WRITE(node, BDK_LMCX_RLEVEL_CTL(ddr_interface_num), rlevel_ctl.u);
+
+ if (bdk_is_platform(BDK_PLATFORM_ASIM))
+ rlevel_debug_loops = 0;
+
+ if ((s = lookup_env_parameter("ddr%d_rlevel_debug_loops", ddr_interface_num)) != NULL) {
+ rlevel_debug_loops = strtoul(s, NULL, 0);
+ }
+
+ if ((s = lookup_env_parameter("ddr_rtt_nom_auto")) != NULL) {
+ ddr_rtt_nom_auto = !!strtoul(s, NULL, 0);
+ }
+
+ if ((s = lookup_env_parameter("ddr_rlevel_average")) != NULL) {
+ rlevel_avg_loops = strtoul(s, NULL, 0);
+ }
+
+ if ((s = lookup_env_parameter("ddr_rlevel_compute")) != NULL) {
+ ddr_rlevel_compute = strtoul(s, NULL, 0);
+ }
+
+ ddr_print("RLEVEL_CTL : 0x%016lx\n", rlevel_ctl.u);
+ ddr_print("RLEVEL_OFFSET : %6d\n", rlevel_ctl.s.offset);
+ ddr_print("RLEVEL_OFFSET_EN : %6d\n", rlevel_ctl.s.offset_en);
+
+ /* The purpose for the indexed table is to sort the settings
+ ** by the ohm value to simplify the testing when incrementing
+ ** through the settings. (index => ohms) 1=120, 2=60, 3=40,
+ ** 4=30, 5=20 */
+ min_rtt_nom_idx = (custom_lmc_config->min_rtt_nom_idx == 0) ? 1 : custom_lmc_config->min_rtt_nom_idx;
+ max_rtt_nom_idx = (custom_lmc_config->max_rtt_nom_idx == 0) ? 5 : custom_lmc_config->max_rtt_nom_idx;
+
+ min_rodt_ctl = (custom_lmc_config->min_rodt_ctl == 0) ? 1 : custom_lmc_config->min_rodt_ctl;
+ max_rodt_ctl = (custom_lmc_config->max_rodt_ctl == 0) ? 5 : custom_lmc_config->max_rodt_ctl;
+
+ if ((s = lookup_env_parameter("ddr_min_rodt_ctl")) != NULL) {
+ min_rodt_ctl = strtoul(s, NULL, 0);
+ }
+ if ((s = lookup_env_parameter("ddr_max_rodt_ctl")) != NULL) {
+ max_rodt_ctl = strtoul(s, NULL, 0);
+ }
+ if ((s = lookup_env_parameter("ddr_min_rtt_nom_idx")) != NULL) {
+ min_rtt_nom_idx = strtoul(s, NULL, 0);
+ }
+ if ((s = lookup_env_parameter("ddr_max_rtt_nom_idx")) != NULL) {
+ max_rtt_nom_idx = strtoul(s, NULL, 0);
+ }
+
+#ifdef ENABLE_CUSTOM_RLEVEL_TABLE
+ if (custom_lmc_config->rlevel_table != NULL) {
+ char part_number[21];
+ /* Check for hard-coded read-leveling settings */
+ get_dimm_part_number(part_number, node, &dimm_config_table[0], 0, ddr_type);
+ for (rankx = 0; rankx < dimm_count * 4;rankx++) {
+ if (!(rank_mask & (1 << rankx)))
+ continue;
+
+ lmc_rlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx));
+
+ i = 0;
+ while (custom_lmc_config->rlevel_table[i].part != NULL) {
+ debug_print("DIMM part number:\"%s\", SPD: \"%s\"\n", custom_lmc_config->rlevel_table[i].part, part_number);
+ if ((strcmp(part_number, custom_lmc_config->rlevel_table[i].part) == 0)
+ && (_abs(custom_lmc_config->rlevel_table[i].speed - 2*ddr_hertz/(1000*1000)) < 10 ))
+ {
+ ddr_print("Using hard-coded read leveling for DIMM part number: \"%s\"\n", part_number);
+ lmc_rlevel_rank.u = custom_lmc_config->rlevel_table[i].rlevel_rank[ddr_interface_num][rankx];
+ DRAM_CSR_WRITE(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx), lmc_rlevel_rank.u);
+ lmc_rlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx));
+ display_RL(node, ddr_interface_num, lmc_rlevel_rank, rankx);
+ rlevel_debug_loops = 0; /* Disable h/w read-leveling */
+ break;
+ }
+ ++i;
+ }
+ }
+ }
+#endif /* ENABLE_CUSTOM_RLEVEL_TABLE */
+
+ while(rlevel_debug_loops--) {
+ /* Initialize the error scoreboard */
+ memset(rlevel_scoreboard, 0, sizeof(rlevel_scoreboard));
+
+ if ((s = lookup_env_parameter("ddr_rlevel_comp_offset")) != NULL) {
+ rlevel_comp_offset = strtoul(s, NULL, 0);
+ }
+
+ disable_sequential_delay_check = custom_lmc_config->disable_sequential_delay_check;
+
+ if ((s = lookup_env_parameter("ddr_disable_sequential_delay_check")) != NULL) {
+ disable_sequential_delay_check = strtoul(s, NULL, 0);
+ }
+
+ maximum_adjacent_rlevel_delay_increment = custom_lmc_config->maximum_adjacent_rlevel_delay_increment;
+
+ if ((s = lookup_env_parameter("ddr_maximum_adjacent_rlevel_delay_increment")) != NULL) {
+ maximum_adjacent_rlevel_delay_increment = strtoul(s, NULL, 0);
+ }
+
+ lmc_comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num));
+ saved_ddr__ptune = lmc_comp_ctl2.s.ddr__ptune;
+ saved_ddr__ntune = lmc_comp_ctl2.s.ddr__ntune;
+
+ /* Disable dynamic compensation settings */
+ if (rlevel_comp_offset != 0) {
+ lmc_comp_ctl2.s.ptune = saved_ddr__ptune;
+ lmc_comp_ctl2.s.ntune = saved_ddr__ntune;
+
+ /* Round up the ptune calculation to bias the odd cases toward ptune */
+ lmc_comp_ctl2.s.ptune += divide_roundup(rlevel_comp_offset, 2);
+ lmc_comp_ctl2.s.ntune -= rlevel_comp_offset/2;
+
+ lmc_control.u = BDK_CSR_READ(node, BDK_LMCX_CONTROL(ddr_interface_num));
+ saved_int_zqcs_dis = lmc_control.s.int_zqcs_dis;
+ lmc_control.s.int_zqcs_dis = 1; /* Disable ZQCS while in bypass. */
+ DRAM_CSR_WRITE(node, BDK_LMCX_CONTROL(ddr_interface_num), lmc_control.u);
+
+ lmc_comp_ctl2.s.byp = 1; /* Enable bypass mode */
+ DRAM_CSR_WRITE(node, BDK_LMCX_COMP_CTL2(ddr_interface_num), lmc_comp_ctl2.u);
+ BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num));
+ lmc_comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num)); /* Read again */
+ ddr_print("DDR__PTUNE/DDR__NTUNE : %d/%d\n",
+ lmc_comp_ctl2.s.ddr__ptune, lmc_comp_ctl2.s.ddr__ntune);
+ }
+
+ lmc_modereg_params1.u = BDK_CSR_READ(node, BDK_LMCX_MODEREG_PARAMS1(ddr_interface_num));
+
+ for (rtt_idx = min_rtt_nom_idx; rtt_idx <= max_rtt_nom_idx; ++rtt_idx) {
+ rtt_nom = imp_values->rtt_nom_table[rtt_idx];
+
+ /* When the read ODT mask is zero the dyn_rtt_nom_mask is
+ zero than RTT_NOM will not be changing during
+ read-leveling. Since the value is fixed we only need
+ to test it once. */
+ if (dyn_rtt_nom_mask == 0) {
+ print_nom_ohms = -1; // flag not to print NOM ohms
+ if (rtt_idx != min_rtt_nom_idx)
+ continue;
+ } else {
+ if (dyn_rtt_nom_mask & 1) lmc_modereg_params1.s.rtt_nom_00 = rtt_nom;
+ if (dyn_rtt_nom_mask & 2) lmc_modereg_params1.s.rtt_nom_01 = rtt_nom;
+ if (dyn_rtt_nom_mask & 4) lmc_modereg_params1.s.rtt_nom_10 = rtt_nom;
+ if (dyn_rtt_nom_mask & 8) lmc_modereg_params1.s.rtt_nom_11 = rtt_nom;
+ // FIXME? rank 0 ohms always for the printout?
+ print_nom_ohms = imp_values->rtt_nom_ohms[lmc_modereg_params1.s.rtt_nom_00];
+ }
+
+ DRAM_CSR_WRITE(node, BDK_LMCX_MODEREG_PARAMS1(ddr_interface_num), lmc_modereg_params1.u);
+ VB_PRT(VBL_TME, "\n");
+ VB_PRT(VBL_TME, "RTT_NOM %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n",
+ imp_values->rtt_nom_ohms[lmc_modereg_params1.s.rtt_nom_11],
+ imp_values->rtt_nom_ohms[lmc_modereg_params1.s.rtt_nom_10],
+ imp_values->rtt_nom_ohms[lmc_modereg_params1.s.rtt_nom_01],
+ imp_values->rtt_nom_ohms[lmc_modereg_params1.s.rtt_nom_00],
+ lmc_modereg_params1.s.rtt_nom_11,
+ lmc_modereg_params1.s.rtt_nom_10,
+ lmc_modereg_params1.s.rtt_nom_01,
+ lmc_modereg_params1.s.rtt_nom_00);
+
+ perform_ddr_init_sequence(node, rank_mask, ddr_interface_num);
+
+ // Try RANK outside RODT to rearrange the output...
+ for (rankx = 0; rankx < dimm_count * 4; rankx++) {
+ int byte_idx;
+ rlevel_byte_data_t rlevel_byte[9];
+ int average_loops;
+ int rlevel_rank_errors, rlevel_bitmask_errors, rlevel_nonseq_errors;
+ rlevel_bitmask_t rlevel_bitmask[9];
+#if PICK_BEST_RANK_SCORE_NOT_AVG
+ int rlevel_best_rank_score;
+#endif
+
+ if (!(rank_mask & (1 << rankx)))
+ continue;
+
+ for (rodt_ctl = max_rodt_ctl; rodt_ctl >= min_rodt_ctl; --rodt_ctl) {
+#if PICK_BEST_RANK_SCORE_NOT_AVG
+ rlevel_best_rank_score = DEFAULT_BEST_RANK_SCORE;
+#endif
+ rlevel_rodt_errors = 0;
+ lmc_comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num));
+ lmc_comp_ctl2.s.rodt_ctl = rodt_ctl;
+ DRAM_CSR_WRITE(node, BDK_LMCX_COMP_CTL2(ddr_interface_num), lmc_comp_ctl2.u);
+ lmc_comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num));
+ bdk_wait_usec(1); /* Give it a little time to take affect */
+ VB_PRT(VBL_DEV, "Read ODT_CTL : 0x%x (%d ohms)\n",
+ lmc_comp_ctl2.s.rodt_ctl, imp_values->rodt_ohms[lmc_comp_ctl2.s.rodt_ctl]);
+
+ memset(rlevel_byte, 0, sizeof(rlevel_byte));
+
+ for (average_loops = 0; average_loops < rlevel_avg_loops; average_loops++) {
+ rlevel_bitmask_errors = 0;
+
+ if (! (rlevel_separate_ab && spd_rdimm && (ddr_type == DDR4_DRAM))) {
+ /* Clear read-level delays */
+ DRAM_CSR_WRITE(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx), 0);
+
+ /* read-leveling */
+ perform_octeon3_ddr3_sequence(node, 1 << rankx, ddr_interface_num, 1);
+
+ if (!bdk_is_platform(BDK_PLATFORM_ASIM) &&
+ BDK_CSR_WAIT_FOR_FIELD(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx),
+ status, ==, 3, 1000000))
+ {
+ error_print("ERROR: Timeout waiting for RLEVEL\n");
+ }
+ }
+
+ lmc_rlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx));
+
+ { // start bitmask interpretation block
+ int redoing_nonseq_errs = 0;
+
+ memset(rlevel_bitmask, 0, sizeof(rlevel_bitmask));
+
+ if (rlevel_separate_ab && spd_rdimm && (ddr_type == DDR4_DRAM)) {
+ bdk_lmcx_rlevel_rankx_t lmc_rlevel_rank_aside;
+ bdk_lmcx_modereg_params0_t lmc_modereg_params0;
+
+ /* A-side */
+ lmc_modereg_params0.u = BDK_CSR_READ(node, BDK_LMCX_MODEREG_PARAMS0(ddr_interface_num));
+ lmc_modereg_params0.s.mprloc = 0; /* MPR Page 0 Location 0 */
+ DRAM_CSR_WRITE(node, BDK_LMCX_MODEREG_PARAMS0(ddr_interface_num), lmc_modereg_params0.u);
+
+ /* Clear read-level delays */
+ DRAM_CSR_WRITE(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx), 0);
+
+ perform_octeon3_ddr3_sequence(node, 1 << rankx, ddr_interface_num, 1); /* read-leveling */
+
+ if (!bdk_is_platform(BDK_PLATFORM_ASIM) &&
+ BDK_CSR_WAIT_FOR_FIELD(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx),
+ status, ==, 3, 1000000))
+ {
+ error_print("ERROR: Timeout waiting for RLEVEL\n");
+
+ }
+ lmc_rlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx));
+
+ lmc_rlevel_rank_aside.u = lmc_rlevel_rank.u;
+
+ rlevel_bitmask[0].bm = octeon_read_lmcx_ddr3_rlevel_dbg(node, ddr_interface_num, 0);
+ rlevel_bitmask[1].bm = octeon_read_lmcx_ddr3_rlevel_dbg(node, ddr_interface_num, 1);
+ rlevel_bitmask[2].bm = octeon_read_lmcx_ddr3_rlevel_dbg(node, ddr_interface_num, 2);
+ rlevel_bitmask[3].bm = octeon_read_lmcx_ddr3_rlevel_dbg(node, ddr_interface_num, 3);
+ rlevel_bitmask[8].bm = octeon_read_lmcx_ddr3_rlevel_dbg(node, ddr_interface_num, 8);
+ /* A-side complete */
+
+
+ /* B-side */
+ lmc_modereg_params0.u = BDK_CSR_READ(node, BDK_LMCX_MODEREG_PARAMS0(ddr_interface_num));
+ lmc_modereg_params0.s.mprloc = 3; /* MPR Page 0 Location 3 */
+ DRAM_CSR_WRITE(node, BDK_LMCX_MODEREG_PARAMS0(ddr_interface_num), lmc_modereg_params0.u);
+
+ /* Clear read-level delays */
+ DRAM_CSR_WRITE(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx), 0);
+
+ perform_octeon3_ddr3_sequence(node, 1 << rankx, ddr_interface_num, 1); /* read-leveling */
+
+ if (!bdk_is_platform(BDK_PLATFORM_ASIM) &&
+ BDK_CSR_WAIT_FOR_FIELD(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx),
+ status, ==, 3, 1000000))
+ {
+ error_print("ERROR: Timeout waiting for RLEVEL\n");
+ }
+ lmc_rlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx));
+
+ rlevel_bitmask[4].bm = octeon_read_lmcx_ddr3_rlevel_dbg(node, ddr_interface_num, 4);
+ rlevel_bitmask[5].bm = octeon_read_lmcx_ddr3_rlevel_dbg(node, ddr_interface_num, 5);
+ rlevel_bitmask[6].bm = octeon_read_lmcx_ddr3_rlevel_dbg(node, ddr_interface_num, 6);
+ rlevel_bitmask[7].bm = octeon_read_lmcx_ddr3_rlevel_dbg(node, ddr_interface_num, 7);
+ /* B-side complete */
+
+
+ update_rlevel_rank_struct(&lmc_rlevel_rank, 0, lmc_rlevel_rank_aside.cn83xx.byte0);
+ update_rlevel_rank_struct(&lmc_rlevel_rank, 1, lmc_rlevel_rank_aside.cn83xx.byte1);
+ update_rlevel_rank_struct(&lmc_rlevel_rank, 2, lmc_rlevel_rank_aside.cn83xx.byte2);
+ update_rlevel_rank_struct(&lmc_rlevel_rank, 3, lmc_rlevel_rank_aside.cn83xx.byte3);
+ update_rlevel_rank_struct(&lmc_rlevel_rank, 8, lmc_rlevel_rank_aside.cn83xx.byte8); /* ECC A-side */
+
+ lmc_modereg_params0.u = BDK_CSR_READ(node, BDK_LMCX_MODEREG_PARAMS0(ddr_interface_num));
+ lmc_modereg_params0.s.mprloc = 0; /* MPR Page 0 Location 0 */
+ DRAM_CSR_WRITE(node, BDK_LMCX_MODEREG_PARAMS0(ddr_interface_num), lmc_modereg_params0.u);
+
+ } /* if (rlevel_separate_ab && spd_rdimm && (ddr_type == DDR4_DRAM)) */
+
+ /*
+ * Evaluate the quality of the read-leveling delays from the bitmasks.
+ * Also save off a software computed read-leveling mask that may be
+ * used later to qualify the delay results from Octeon.
+ */
+ for (byte_idx = 0; byte_idx < (8+ecc_ena); ++byte_idx) {
+ int bmerr;
+ if (!(ddr_interface_bytemask&(1<<byte_idx)))
+ continue;
+ if (! (rlevel_separate_ab && spd_rdimm && (ddr_type == DDR4_DRAM))) {
+ rlevel_bitmask[byte_idx].bm =
+ octeon_read_lmcx_ddr3_rlevel_dbg(node, ddr_interface_num, byte_idx);
+ }
+ bmerr = validate_ddr3_rlevel_bitmask(&rlevel_bitmask[byte_idx], ddr_type);
+ rlevel_bitmask[byte_idx].errs = bmerr;
+ rlevel_bitmask_errors += bmerr;
+#if PERFECT_BITMASK_COUNTING
+ if ((ddr_type == DDR4_DRAM) && !bmerr) { // count only the "perfect" bitmasks
+ // FIXME: could optimize this a bit?
+ int delay = get_rlevel_rank_struct(&lmc_rlevel_rank, byte_idx);
+ rank_perfect_counts[rankx].count[byte_idx][delay] += 1;
+ rank_perfect_counts[rankx].total[byte_idx] += 1;
+ }
+#endif /* PERFECT_BITMASK_COUNTING */
+ }
+
+ /* Set delays for unused bytes to match byte 0. */
+ for (byte_idx = 0; byte_idx < 9; ++byte_idx) {
+ if (ddr_interface_bytemask & (1 << byte_idx))
+ continue;
+ update_rlevel_rank_struct(&lmc_rlevel_rank, byte_idx, lmc_rlevel_rank.cn83xx.byte0);
+ }
+
+ /* Save a copy of the byte delays in physical
+ order for sequential evaluation. */
+ unpack_rlevel_settings(ddr_interface_bytemask, ecc_ena, rlevel_byte, lmc_rlevel_rank);
+ redo_nonseq_errs:
+
+ rlevel_nonseq_errors = 0;
+
+ if (! disable_sequential_delay_check) {
+ if ((ddr_interface_bytemask & 0xff) == 0xff) {
+ /* Evaluate delay sequence across the whole range of bytes for standard dimms. */
+ if ((spd_dimm_type == 1) || (spd_dimm_type == 5)) { /* 1=RDIMM, 5=Mini-RDIMM */
+ int register_adjacent_delay = _abs(rlevel_byte[4].delay - rlevel_byte[5].delay);
+ /* Registered dimm topology routes from the center. */
+ rlevel_nonseq_errors += nonsequential_delays(rlevel_byte, 0, 3+ecc_ena,
+ maximum_adjacent_rlevel_delay_increment);
+ rlevel_nonseq_errors += nonsequential_delays(rlevel_byte, 5, 7+ecc_ena,
+ maximum_adjacent_rlevel_delay_increment);
+ // byte 5 sqerrs never gets cleared for RDIMMs
+ rlevel_byte[5].sqerrs = 0;
+ if (register_adjacent_delay > 1) {
+ /* Assess proximity of bytes on opposite sides of register */
+ rlevel_nonseq_errors += (register_adjacent_delay-1) * RLEVEL_ADJACENT_DELAY_ERROR;
+ // update byte 5 error
+ rlevel_byte[5].sqerrs += (register_adjacent_delay-1) * RLEVEL_ADJACENT_DELAY_ERROR;
+ }
+ }
+ if ((spd_dimm_type == 2) || (spd_dimm_type == 6)) { /* 2=UDIMM, 6=Mini-UDIMM */
+ /* Unbuffered dimm topology routes from end to end. */
+ rlevel_nonseq_errors += nonsequential_delays(rlevel_byte, 0, 7+ecc_ena,
+ maximum_adjacent_rlevel_delay_increment);
+ }
+ } else {
+ rlevel_nonseq_errors += nonsequential_delays(rlevel_byte, 0, 3+ecc_ena,
+ maximum_adjacent_rlevel_delay_increment);
+ }
+ } /* if (! disable_sequential_delay_check) */
+
+#if 0
+ // FIXME FIXME: disabled for now, it was too much...
+
+ // Calculate total errors for the rank:
+ // we do NOT add nonsequential errors if mini-[RU]DIMM or x16;
+ // mini-DIMMs and x16 devices have unusual sequence geometries.
+ // Make the final scores for them depend only on the bitmasks...
+ rlevel_rank_errors = rlevel_bitmask_errors;
+ if ((spd_dimm_type != 5) &&
+ (spd_dimm_type != 6) &&
+ (dram_width != 16))
+ {
+ rlevel_rank_errors += rlevel_nonseq_errors;
+ }
+#else
+ rlevel_rank_errors = rlevel_bitmask_errors + rlevel_nonseq_errors;
+#endif
+
+ // print original sample here only if we are not really averaging or picking best
+ // also do not print if we were redoing the NONSEQ score for using COMPUTED
+ if (!redoing_nonseq_errs && ((rlevel_avg_loops < 2) || dram_is_verbose(VBL_DEV2))) {
+ display_RL_BM(node, ddr_interface_num, rankx, rlevel_bitmask, ecc_ena);
+ display_RL_BM_scores(node, ddr_interface_num, rankx, rlevel_bitmask, ecc_ena);
+ display_RL_SEQ_scores(node, ddr_interface_num, rankx, rlevel_byte, ecc_ena);
+ display_RL_with_score(node, ddr_interface_num, lmc_rlevel_rank, rankx, rlevel_rank_errors);
+ }
+
+ if (ddr_rlevel_compute) {
+ if (!redoing_nonseq_errs) {
+ /* Recompute the delays based on the bitmask */
+ for (byte_idx = 0; byte_idx < (8+ecc_ena); ++byte_idx) {
+ if (!(ddr_interface_bytemask & (1 << byte_idx)))
+ continue;
+ update_rlevel_rank_struct(&lmc_rlevel_rank, byte_idx,
+ compute_ddr3_rlevel_delay(rlevel_bitmask[byte_idx].mstart,
+ rlevel_bitmask[byte_idx].width,
+ rlevel_ctl));
+ }
+
+ /* Override the copy of byte delays with the computed results. */
+ unpack_rlevel_settings(ddr_interface_bytemask, ecc_ena, rlevel_byte, lmc_rlevel_rank);
+
+ redoing_nonseq_errs = 1;
+ goto redo_nonseq_errs;
+
+ } else {
+ /* now print this if already printed the original sample */
+ if ((rlevel_avg_loops < 2) || dram_is_verbose(VBL_DEV2)) {
+ display_RL_with_computed(node, ddr_interface_num,
+ lmc_rlevel_rank, rankx,
+ rlevel_rank_errors);
+ }
+ }
+ } /* if (ddr_rlevel_compute) */
+
+ } // end bitmask interpretation block
+
+#if PICK_BEST_RANK_SCORE_NOT_AVG
+
+ // if it is a better (lower) score, then keep it
+ if (rlevel_rank_errors < rlevel_best_rank_score) {
+ rlevel_best_rank_score = rlevel_rank_errors;
+
+ // save the new best delays and best errors
+ for (byte_idx = 0; byte_idx < 9; ++byte_idx) {
+ rlevel_byte[byte_idx].best = rlevel_byte[byte_idx].delay;
+ rlevel_byte[byte_idx].bestsq = rlevel_byte[byte_idx].sqerrs;
+ // save bitmasks and their scores as well
+ // xlate UNPACKED index to PACKED index to get from rlevel_bitmask
+ rlevel_byte[byte_idx].bm = rlevel_bitmask[XUP(byte_idx, !!ecc_ena)].bm;
+ rlevel_byte[byte_idx].bmerrs = rlevel_bitmask[XUP(byte_idx, !!ecc_ena)].errs;
+ }
+ }
+#else /* PICK_BEST_RANK_SCORE_NOT_AVG */
+
+ /* Accumulate the total score across averaging loops for this setting */
+ debug_print("rlevel_scoreboard[rtt_nom=%d][rodt_ctl=%d][rankx=%d].score: %d [%d]\n",
+ rtt_nom, rodt_ctl, rankx, rlevel_rank_errors, average_loops);
+ rlevel_scoreboard[rtt_nom][rodt_ctl][rankx].score += rlevel_rank_errors;
+
+ /* Accumulate the delay totals and loop counts
+ necessary to compute average delay results */
+ for (byte_idx = 0; byte_idx < 9; ++byte_idx) {
+ if (rlevel_byte[byte_idx].delay != 0) { /* Don't include delay=0 in the average */
+ ++rlevel_byte[byte_idx].loop_count;
+ rlevel_byte[byte_idx].loop_total += rlevel_byte[byte_idx].delay;
+ }
+ } /* for (byte_idx = 0; byte_idx < 9; ++byte_idx) */
+#endif /* PICK_BEST_RANK_SCORE_NOT_AVG */
+
+ rlevel_rodt_errors += rlevel_rank_errors;
+
+ } /* for (average_loops = 0; average_loops < rlevel_avg_loops; average_loops++) */
+
+#if PICK_BEST_RANK_SCORE_NOT_AVG
+
+ /* We recorded the best score across the averaging loops */
+ rlevel_scoreboard[rtt_nom][rodt_ctl][rankx].score = rlevel_best_rank_score;
+
+ /* Restore the delays from the best fields that go with the best score */
+ for (byte_idx = 0; byte_idx < 9; ++byte_idx) {
+ rlevel_byte[byte_idx].delay = rlevel_byte[byte_idx].best;
+ rlevel_byte[byte_idx].sqerrs = rlevel_byte[byte_idx].bestsq;
+ }
+#else /* PICK_BEST_RANK_SCORE_NOT_AVG */
+
+ /* Compute the average score across averaging loops */
+ rlevel_scoreboard[rtt_nom][rodt_ctl][rankx].score =
+ divide_nint(rlevel_scoreboard[rtt_nom][rodt_ctl][rankx].score, rlevel_avg_loops);
+
+ /* Compute the average delay results */
+ for (byte_idx=0; byte_idx < 9; ++byte_idx) {
+ if (rlevel_byte[byte_idx].loop_count == 0)
+ rlevel_byte[byte_idx].loop_count = 1;
+ rlevel_byte[byte_idx].delay = divide_nint(rlevel_byte[byte_idx].loop_total,
+ rlevel_byte[byte_idx].loop_count);
+ }
+#endif /* PICK_BEST_RANK_SCORE_NOT_AVG */
+
+ lmc_rlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx));
+
+ pack_rlevel_settings(ddr_interface_bytemask, ecc_ena, rlevel_byte, &lmc_rlevel_rank);
+
+ if (rlevel_avg_loops > 1) {
+#if PICK_BEST_RANK_SCORE_NOT_AVG
+ // restore the "best" bitmasks and their scores for printing
+ for (byte_idx = 0; byte_idx < 9; ++byte_idx) {
+ if ((ddr_interface_bytemask & (1 << byte_idx)) == 0)
+ continue;
+ // xlate PACKED index to UNPACKED index to get from rlevel_byte
+ rlevel_bitmask[byte_idx].bm = rlevel_byte[XPU(byte_idx, !!ecc_ena)].bm;
+ rlevel_bitmask[byte_idx].errs = rlevel_byte[XPU(byte_idx, !!ecc_ena)].bmerrs;
+ }
+ // print bitmasks/scores here only for DEV // FIXME? lower VBL?
+ if (dram_is_verbose(VBL_DEV)) {
+ display_RL_BM(node, ddr_interface_num, rankx, rlevel_bitmask, ecc_ena);
+ display_RL_BM_scores(node, ddr_interface_num, rankx, rlevel_bitmask, ecc_ena);
+ display_RL_SEQ_scores(node, ddr_interface_num, rankx, rlevel_byte, ecc_ena);
+ }
+
+ display_RL_with_RODT(node, ddr_interface_num, lmc_rlevel_rank, rankx,
+ rlevel_scoreboard[rtt_nom][rodt_ctl][rankx].score,
+ print_nom_ohms, imp_values->rodt_ohms[rodt_ctl],
+ WITH_RODT_BESTSCORE);
+
+#else /* PICK_BEST_RANK_SCORE_NOT_AVG */
+ display_RL_with_average(node, ddr_interface_num, lmc_rlevel_rank, rankx,
+ rlevel_scoreboard[rtt_nom][rodt_ctl][rankx].score);
+#endif /* PICK_BEST_RANK_SCORE_NOT_AVG */
+
+ } /* if (rlevel_avg_loops > 1) */
+
+ rlevel_scoreboard[rtt_nom][rodt_ctl][rankx].setting = lmc_rlevel_rank.u;
+
+ } /* for (rodt_ctl = max_rodt_ctl; rodt_ctl >= min_rodt_ctl; --rodt_ctl) */
+ } /* for (rankx = 0; rankx < dimm_count*4; rankx++) */
+ } /* for (rtt_idx=min_rtt_nom_idx; rtt_idx<max_rtt_nom_idx; ++rtt_idx) */
+
+
+ /* Re-enable dynamic compensation settings. */
+ if (rlevel_comp_offset != 0) {
+ lmc_comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num));
+
+ lmc_comp_ctl2.s.ptune = 0;
+ lmc_comp_ctl2.s.ntune = 0;
+ lmc_comp_ctl2.s.byp = 0; /* Disable bypass mode */
+ DRAM_CSR_WRITE(node, BDK_LMCX_COMP_CTL2(ddr_interface_num), lmc_comp_ctl2.u);
+ BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num)); /* Read once */
+
+ lmc_comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num)); /* Read again */
+ ddr_print("DDR__PTUNE/DDR__NTUNE : %d/%d\n",
+ lmc_comp_ctl2.s.ddr__ptune, lmc_comp_ctl2.s.ddr__ntune);
+
+ lmc_control.u = BDK_CSR_READ(node, BDK_LMCX_CONTROL(ddr_interface_num));
+ lmc_control.s.int_zqcs_dis = saved_int_zqcs_dis; /* Restore original setting */
+ DRAM_CSR_WRITE(node, BDK_LMCX_CONTROL(ddr_interface_num), lmc_control.u);
+
+ }
+
+
+ {
+ int override_compensation = 0;
+ if ((s = lookup_env_parameter("ddr__ptune")) != NULL) {
+ saved_ddr__ptune = strtoul(s, NULL, 0);
+ override_compensation = 1;
+ }
+ if ((s = lookup_env_parameter("ddr__ntune")) != NULL) {
+ saved_ddr__ntune = strtoul(s, NULL, 0);
+ override_compensation = 1;
+ }
+ if (override_compensation) {
+ lmc_comp_ctl2.s.ptune = saved_ddr__ptune;
+ lmc_comp_ctl2.s.ntune = saved_ddr__ntune;
+
+ lmc_control.u = BDK_CSR_READ(node, BDK_LMCX_CONTROL(ddr_interface_num));
+ saved_int_zqcs_dis = lmc_control.s.int_zqcs_dis;
+ lmc_control.s.int_zqcs_dis = 1; /* Disable ZQCS while in bypass. */
+ DRAM_CSR_WRITE(node, BDK_LMCX_CONTROL(ddr_interface_num), lmc_control.u);
+
+ lmc_comp_ctl2.s.byp = 1; /* Enable bypass mode */
+ DRAM_CSR_WRITE(node, BDK_LMCX_COMP_CTL2(ddr_interface_num), lmc_comp_ctl2.u);
+ lmc_comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num)); /* Read again */
+
+ ddr_print("DDR__PTUNE/DDR__NTUNE : %d/%d\n",
+ lmc_comp_ctl2.s.ptune, lmc_comp_ctl2.s.ntune);
+ }
+ }
+ { /* Evaluation block */
+ int best_rodt_score = DEFAULT_BEST_RANK_SCORE; /* Start with an arbitrarily high score */
+ int auto_rodt_ctl = 0;
+ int auto_rtt_nom = 0;
+ int rodt_score;
+ int rodt_row_skip_mask = 0;
+
+ // just add specific RODT rows to the skip mask for DDR4 at this time...
+ if (ddr_type == DDR4_DRAM) {
+ rodt_row_skip_mask |= (1 << ddr4_rodt_ctl_34_ohm); // skip RODT row 34 ohms for all DDR4 types
+ rodt_row_skip_mask |= (1 << ddr4_rodt_ctl_40_ohm); // skip RODT row 40 ohms for all DDR4 types
+#if ADD_48_OHM_SKIP
+ rodt_row_skip_mask |= (1 << ddr4_rodt_ctl_48_ohm); // skip RODT row 48 ohms for all DDR4 types
+#endif /* ADD_48OHM_SKIP */
+#if NOSKIP_40_48_OHM
+ // For now, do not skip RODT row 40 or 48 ohm when ddr_hertz is above 1075 MHz
+ if (ddr_hertz > 1075000000) {
+ rodt_row_skip_mask &= ~(1 << ddr4_rodt_ctl_40_ohm); // noskip RODT row 40 ohms
+ rodt_row_skip_mask &= ~(1 << ddr4_rodt_ctl_48_ohm); // noskip RODT row 48 ohms
+ }
+#endif /* NOSKIP_40_48_OHM */
+#if NOSKIP_48_STACKED
+ // For now, do not skip RODT row 48 ohm for 2Rx4 stacked die DIMMs
+ if ((is_stacked_die) && (num_ranks == 2) && (dram_width == 4)) {
+ rodt_row_skip_mask &= ~(1 << ddr4_rodt_ctl_48_ohm); // noskip RODT row 48 ohms
+ }
+#endif /* NOSKIP_48_STACKED */
+#if NOSKIP_FOR_MINI
+ // for now, leave all rows eligible when we have mini-DIMMs...
+ if ((spd_dimm_type == 5) || (spd_dimm_type == 6)) {
+ rodt_row_skip_mask = 0;
+ }
+#endif /* NOSKIP_FOR_MINI */
+#if NOSKIP_FOR_2S_1R
+ // for now, leave all rows eligible when we have a 2-slot 1-rank config
+ if ((dimm_count == 2) && (num_ranks == 1)) {
+ rodt_row_skip_mask = 0;
+ }
+#endif /* NOSKIP_FOR_2S_1R */
+ }
+
+ VB_PRT(VBL_DEV, "Evaluating Read-Leveling Scoreboard for AUTO settings.\n");
+ for (rtt_idx=min_rtt_nom_idx; rtt_idx<=max_rtt_nom_idx; ++rtt_idx) {
+ rtt_nom = imp_values->rtt_nom_table[rtt_idx];
+
+ /* When the read ODT mask is zero the dyn_rtt_nom_mask is
+ zero than RTT_NOM will not be changing during
+ read-leveling. Since the value is fixed we only need
+ to test it once. */
+ if ((dyn_rtt_nom_mask == 0) && (rtt_idx != min_rtt_nom_idx))
+ continue;
+
+ for (rodt_ctl=max_rodt_ctl; rodt_ctl>=min_rodt_ctl; --rodt_ctl) {
+ rodt_score = 0;
+ for (rankx = 0; rankx < dimm_count * 4;rankx++) {
+ if (!(rank_mask & (1 << rankx)))
+ continue;
+ debug_print("rlevel_scoreboard[rtt_nom=%d][rodt_ctl=%d][rankx=%d].score:%d\n",
+ rtt_nom, rodt_ctl, rankx, rlevel_scoreboard[rtt_nom][rodt_ctl][rankx].score);
+ rodt_score += rlevel_scoreboard[rtt_nom][rodt_ctl][rankx].score;
+ }
+ // FIXME: do we need to skip RODT rows here, like we do below in the by-RANK settings?
+
+ /* When using automatic ODT settings use the ODT
+ settings associated with the best score for
+ all of the tested ODT combinations. */
+
+ if ((rodt_score < best_rodt_score) || // always take lower score, OR
+ ((rodt_score == best_rodt_score) && // take same score if RODT ohms are higher
+ (imp_values->rodt_ohms[rodt_ctl] > imp_values->rodt_ohms[auto_rodt_ctl])))
+ {
+ debug_print("AUTO: new best score for rodt:%d (%3d), new score:%d, previous score:%d\n",
+ rodt_ctl, imp_values->rodt_ohms[rodt_ctl], rodt_score, best_rodt_score);
+ best_rodt_score = rodt_score;
+ auto_rodt_ctl = rodt_ctl;
+ auto_rtt_nom = rtt_nom;
+ }
+ } /* for (rodt_ctl=max_rodt_ctl; rodt_ctl>=min_rodt_ctl; --rodt_ctl) */
+ } /* for (rtt_idx=min_rtt_nom_idx; rtt_idx<=max_rtt_nom_idx; ++rtt_idx) */
+
+ lmc_modereg_params1.u = BDK_CSR_READ(node, BDK_LMCX_MODEREG_PARAMS1(ddr_interface_num));
+
+ if (ddr_rtt_nom_auto) {
+ /* Store the automatically set RTT_NOM value */
+ if (dyn_rtt_nom_mask & 1) lmc_modereg_params1.s.rtt_nom_00 = auto_rtt_nom;
+ if (dyn_rtt_nom_mask & 2) lmc_modereg_params1.s.rtt_nom_01 = auto_rtt_nom;
+ if (dyn_rtt_nom_mask & 4) lmc_modereg_params1.s.rtt_nom_10 = auto_rtt_nom;
+ if (dyn_rtt_nom_mask & 8) lmc_modereg_params1.s.rtt_nom_11 = auto_rtt_nom;
+ } else {
+ /* restore the manual settings to the register */
+ lmc_modereg_params1.s.rtt_nom_00 = default_rtt_nom[0];
+ lmc_modereg_params1.s.rtt_nom_01 = default_rtt_nom[1];
+ lmc_modereg_params1.s.rtt_nom_10 = default_rtt_nom[2];
+ lmc_modereg_params1.s.rtt_nom_11 = default_rtt_nom[3];
+ }
+
+ DRAM_CSR_WRITE(node, BDK_LMCX_MODEREG_PARAMS1(ddr_interface_num), lmc_modereg_params1.u);
+ VB_PRT(VBL_DEV, "RTT_NOM %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n",
+ imp_values->rtt_nom_ohms[lmc_modereg_params1.s.rtt_nom_11],
+ imp_values->rtt_nom_ohms[lmc_modereg_params1.s.rtt_nom_10],
+ imp_values->rtt_nom_ohms[lmc_modereg_params1.s.rtt_nom_01],
+ imp_values->rtt_nom_ohms[lmc_modereg_params1.s.rtt_nom_00],
+ lmc_modereg_params1.s.rtt_nom_11,
+ lmc_modereg_params1.s.rtt_nom_10,
+ lmc_modereg_params1.s.rtt_nom_01,
+ lmc_modereg_params1.s.rtt_nom_00);
+
+ VB_PRT(VBL_DEV, "RTT_WR %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n",
+ imp_values->rtt_wr_ohms[EXTR_WR(lmc_modereg_params1.u, 3)],
+ imp_values->rtt_wr_ohms[EXTR_WR(lmc_modereg_params1.u, 2)],
+ imp_values->rtt_wr_ohms[EXTR_WR(lmc_modereg_params1.u, 1)],
+ imp_values->rtt_wr_ohms[EXTR_WR(lmc_modereg_params1.u, 0)],
+ EXTR_WR(lmc_modereg_params1.u, 3),
+ EXTR_WR(lmc_modereg_params1.u, 2),
+ EXTR_WR(lmc_modereg_params1.u, 1),
+ EXTR_WR(lmc_modereg_params1.u, 0));
+
+ VB_PRT(VBL_DEV, "DIC %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n",
+ imp_values->dic_ohms[lmc_modereg_params1.s.dic_11],
+ imp_values->dic_ohms[lmc_modereg_params1.s.dic_10],
+ imp_values->dic_ohms[lmc_modereg_params1.s.dic_01],
+ imp_values->dic_ohms[lmc_modereg_params1.s.dic_00],
+ lmc_modereg_params1.s.dic_11,
+ lmc_modereg_params1.s.dic_10,
+ lmc_modereg_params1.s.dic_01,
+ lmc_modereg_params1.s.dic_00);
+
+ if (ddr_type == DDR4_DRAM) {
+ bdk_lmcx_modereg_params2_t lmc_modereg_params2;
+ /*
+ * We must read the CSR, and not depend on odt_config[odt_idx].odt_mask2,
+ * since we could have overridden values with envvars.
+ * NOTE: this corrects the printout, since the CSR is not written with the old values...
+ */
+ lmc_modereg_params2.u = BDK_CSR_READ(node, BDK_LMCX_MODEREG_PARAMS2(ddr_interface_num));
+
+ VB_PRT(VBL_DEV, "RTT_PARK %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n",
+ imp_values->rtt_nom_ohms[lmc_modereg_params2.s.rtt_park_11],
+ imp_values->rtt_nom_ohms[lmc_modereg_params2.s.rtt_park_10],
+ imp_values->rtt_nom_ohms[lmc_modereg_params2.s.rtt_park_01],
+ imp_values->rtt_nom_ohms[lmc_modereg_params2.s.rtt_park_00],
+ lmc_modereg_params2.s.rtt_park_11,
+ lmc_modereg_params2.s.rtt_park_10,
+ lmc_modereg_params2.s.rtt_park_01,
+ lmc_modereg_params2.s.rtt_park_00);
+
+ VB_PRT(VBL_DEV, "%-45s : 0x%x,0x%x,0x%x,0x%x\n", "VREF_RANGE",
+ lmc_modereg_params2.s.vref_range_11,
+ lmc_modereg_params2.s.vref_range_10,
+ lmc_modereg_params2.s.vref_range_01,
+ lmc_modereg_params2.s.vref_range_00);
+
+ VB_PRT(VBL_DEV, "%-45s : 0x%x,0x%x,0x%x,0x%x\n", "VREF_VALUE",
+ lmc_modereg_params2.s.vref_value_11,
+ lmc_modereg_params2.s.vref_value_10,
+ lmc_modereg_params2.s.vref_value_01,
+ lmc_modereg_params2.s.vref_value_00);
+ }
+
+ lmc_comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num));
+ if (ddr_rodt_ctl_auto)
+ lmc_comp_ctl2.s.rodt_ctl = auto_rodt_ctl;
+ else
+ lmc_comp_ctl2.s.rodt_ctl = default_rodt_ctl; // back to the original setting
+ DRAM_CSR_WRITE(node, BDK_LMCX_COMP_CTL2(ddr_interface_num), lmc_comp_ctl2.u);
+ lmc_comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num));
+ VB_PRT(VBL_DEV, "Read ODT_CTL : 0x%x (%d ohms)\n",
+ lmc_comp_ctl2.s.rodt_ctl, imp_values->rodt_ohms[lmc_comp_ctl2.s.rodt_ctl]);
+
+ ////////////////// this is the start of the RANK MAJOR LOOP
+
+ for (rankx = 0; rankx < dimm_count * 4; rankx++) {
+ int best_rank_score = DEFAULT_BEST_RANK_SCORE; /* Start with an arbitrarily high score */
+ int best_rank_rtt_nom = 0;
+ //int best_rank_nom_ohms = 0;
+ int best_rank_ctl = 0;
+ int best_rank_ohms = 0;
+ int best_rankx = 0;
+
+ if (!(rank_mask & (1 << rankx)))
+ continue;
+
+ /* Use the delays associated with the best score for each individual rank */
+ VB_PRT(VBL_TME, "Evaluating Read-Leveling Scoreboard for Rank %d settings.\n", rankx);
+
+ // some of the rank-related loops below need to operate only on the ranks of a single DIMM,
+ // so create a mask for their use here
+ int dimm_rank_mask;
+ if (num_ranks == 4)
+ dimm_rank_mask = rank_mask; // should be 1111
+ else {
+ dimm_rank_mask = rank_mask & 3; // should be 01 or 11
+ if (rankx >= 2)
+ dimm_rank_mask <<= 2; // doing a rank on the second DIMM, should be 0100 or 1100
+ }
+ debug_print("DIMM rank mask: 0x%x, rank mask: 0x%x, rankx: %d\n", dimm_rank_mask, rank_mask, rankx);
+
+ ////////////////// this is the start of the BEST ROW SCORE LOOP
+
+ for (rtt_idx = min_rtt_nom_idx; rtt_idx <= max_rtt_nom_idx; ++rtt_idx) {
+ //int rtt_nom_ohms;
+ rtt_nom = imp_values->rtt_nom_table[rtt_idx];
+ //rtt_nom_ohms = imp_values->rtt_nom_ohms[rtt_nom];
+
+ /* When the read ODT mask is zero the dyn_rtt_nom_mask is
+ zero than RTT_NOM will not be changing during
+ read-leveling. Since the value is fixed we only need
+ to test it once. */
+ if ((dyn_rtt_nom_mask == 0) && (rtt_idx != min_rtt_nom_idx))
+ continue;
+
+ debug_print("N%d.LMC%d.R%d: starting RTT_NOM %d (%d)\n",
+ node, ddr_interface_num, rankx, rtt_nom, rtt_nom_ohms);
+
+ for (rodt_ctl = max_rodt_ctl; rodt_ctl >= min_rodt_ctl; --rodt_ctl) {
+ int next_ohms = imp_values->rodt_ohms[rodt_ctl];
+
+ // skip RODT rows in mask, but *NOT* rows with too high a score;
+ // we will not use the skipped ones for printing or evaluating, but
+ // we need to allow all the non-skipped ones to be candidates for "best"
+ if (((1 << rodt_ctl) & rodt_row_skip_mask) != 0) {
+ debug_print("N%d.LMC%d.R%d: SKIPPING rodt:%d (%d) with rank_score:%d\n",
+ node, ddr_interface_num, rankx, rodt_ctl, next_ohms, next_score);
+ continue;
+ }
+ for (int orankx = 0; orankx < dimm_count * 4; orankx++) { // this is ROFFIX-0528
+ if (!(dimm_rank_mask & (1 << orankx))) // stay on the same DIMM
+ continue;
+
+ int next_score = rlevel_scoreboard[rtt_nom][rodt_ctl][orankx].score;
+
+ if (next_score > best_rank_score) // always skip a higher score
+ continue;
+ if (next_score == best_rank_score) { // if scores are equal
+ if (next_ohms < best_rank_ohms) // always skip lower ohms
+ continue;
+ if (next_ohms == best_rank_ohms) { // if same ohms
+ if (orankx != rankx) // always skip the other rank(s)
+ continue;
+ }
+ // else next_ohms are greater, always choose it
+ }
+ // else next_score is less than current best, so always choose it
+ VB_PRT(VBL_DEV2, "N%d.LMC%d.R%d: new best score: rank %d, rodt %d(%3d), new best %d, previous best %d(%d)\n",
+ node, ddr_interface_num, rankx, orankx, rodt_ctl, next_ohms, next_score,
+ best_rank_score, best_rank_ohms);
+ best_rank_score = next_score;
+ best_rank_rtt_nom = rtt_nom;
+ //best_rank_nom_ohms = rtt_nom_ohms;
+ best_rank_ctl = rodt_ctl;
+ best_rank_ohms = next_ohms;
+ best_rankx = orankx;
+ lmc_rlevel_rank.u = rlevel_scoreboard[rtt_nom][rodt_ctl][orankx].setting;
+
+ } /* for (int orankx = 0; orankx < dimm_count * 4; orankx++) */
+ } /* for (rodt_ctl = max_rodt_ctl; rodt_ctl >= min_rodt_ctl; --rodt_ctl) */
+ } /* for (rtt_idx = min_rtt_nom_idx; rtt_idx <= max_rtt_nom_idx; ++rtt_idx) */
+
+ ////////////////// this is the end of the BEST ROW SCORE LOOP
+
+ // DANGER, Will Robinson!! Abort now if we did not find a best score at all...
+ if (best_rank_score == DEFAULT_BEST_RANK_SCORE) {
+ error_print("WARNING: no best rank score found for N%d.LMC%d.R%d - resetting node...\n",
+ node, ddr_interface_num, rankx);
+ bdk_wait_usec(500000);
+ bdk_reset_chip(node);
+ }
+
+ // FIXME: relative now, but still arbitrary...
+ // halve the range if 2 DIMMs unless they are single rank...
+ int MAX_RANK_SCORE = best_rank_score;
+ MAX_RANK_SCORE += (MAX_RANK_SCORE_LIMIT / ((num_ranks > 1) ? dimm_count : 1));
+
+ if (!ecc_ena){
+ lmc_rlevel_rank.cn83xx.byte8 = lmc_rlevel_rank.cn83xx.byte0; /* ECC is not used */
+ }
+
+ // at the end, write the best row settings to the current rank
+ DRAM_CSR_WRITE(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx), lmc_rlevel_rank.u);
+ lmc_rlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx));
+
+ bdk_lmcx_rlevel_rankx_t saved_rlevel_rank;
+ saved_rlevel_rank.u = lmc_rlevel_rank.u;
+
+ ////////////////// this is the start of the PRINT LOOP
+
+ // for pass==0, print current rank, pass==1 print other rank(s)
+ // this is done because we want to show each ranks RODT values together, not interlaced
+#if COUNT_RL_CANDIDATES
+ // keep separates for ranks - pass=0 target rank, pass=1 other rank on DIMM
+ int mask_skipped[2] = {0,0};
+ int score_skipped[2] = {0,0};
+ int selected_rows[2] = {0,0};
+ int zero_scores[2] = {0,0};
+#endif /* COUNT_RL_CANDIDATES */
+ for (int pass = 0; pass < 2; pass++ ) {
+ for (int orankx = 0; orankx < dimm_count * 4; orankx++) {
+ if (!(dimm_rank_mask & (1 << orankx))) // stay on the same DIMM
+ continue;
+
+ if (((pass == 0) && (orankx != rankx)) || ((pass != 0) && (orankx == rankx)))
+ continue;
+
+ for (rtt_idx = min_rtt_nom_idx; rtt_idx <= max_rtt_nom_idx; ++rtt_idx) {
+ rtt_nom = imp_values->rtt_nom_table[rtt_idx];
+ if (dyn_rtt_nom_mask == 0) {
+ print_nom_ohms = -1;
+ if (rtt_idx != min_rtt_nom_idx)
+ continue;
+ } else {
+ print_nom_ohms = imp_values->rtt_nom_ohms[rtt_nom];
+ }
+
+ // cycle through all the RODT values...
+ for (rodt_ctl = max_rodt_ctl; rodt_ctl >= min_rodt_ctl; --rodt_ctl) {
+ bdk_lmcx_rlevel_rankx_t temp_rlevel_rank;
+ int temp_score = rlevel_scoreboard[rtt_nom][rodt_ctl][orankx].score;
+ temp_rlevel_rank.u = rlevel_scoreboard[rtt_nom][rodt_ctl][orankx].setting;
+
+ // skip RODT rows in mask, or rows with too high a score;
+ // we will not use them for printing or evaluating...
+#if COUNT_RL_CANDIDATES
+ int skip_row;
+ if ((1 << rodt_ctl) & rodt_row_skip_mask) {
+ skip_row = WITH_RODT_SKIPPING;
+ ++mask_skipped[pass];
+ } else if (temp_score > MAX_RANK_SCORE) {
+ skip_row = WITH_RODT_SKIPPING;
+ ++score_skipped[pass];
+ } else {
+ skip_row = WITH_RODT_BLANK;
+ ++selected_rows[pass];
+ if (temp_score == 0)
+ ++zero_scores[pass];
+ }
+
+#else /* COUNT_RL_CANDIDATES */
+ int skip_row = (((1 << rodt_ctl) & rodt_row_skip_mask) || (temp_score > MAX_RANK_SCORE))
+ ? WITH_RODT_SKIPPING: WITH_RODT_BLANK;
+#endif /* COUNT_RL_CANDIDATES */
+
+ // identify and print the BEST ROW when it comes up
+ if ((skip_row == WITH_RODT_BLANK) &&
+ (best_rankx == orankx) &&
+ (best_rank_rtt_nom == rtt_nom) &&
+ (best_rank_ctl == rodt_ctl))
+ {
+ skip_row = WITH_RODT_BESTROW;
+ }
+
+ display_RL_with_RODT(node, ddr_interface_num,
+ temp_rlevel_rank, orankx, temp_score,
+ print_nom_ohms,
+ imp_values->rodt_ohms[rodt_ctl],
+ skip_row);
+
+ } /* for (rodt_ctl = max_rodt_ctl; rodt_ctl >= min_rodt_ctl; --rodt_ctl) */
+ } /* for (rtt_idx=min_rtt_nom_idx; rtt_idx<=max_rtt_nom_idx; ++rtt_idx) */
+ } /* for (int orankx = 0; orankx < dimm_count * 4; orankx++) { */
+ } /* for (int pass = 0; pass < 2; pass++ ) */
+#if COUNT_RL_CANDIDATES
+ VB_PRT(VBL_TME, "N%d.LMC%d.R%d: RLROWS: selected %d+%d, zero_scores %d+%d, mask_skipped %d+%d, score_skipped %d+%d\n",
+ node, ddr_interface_num, rankx,
+ selected_rows[0], selected_rows[1],
+ zero_scores[0], zero_scores[1],
+ mask_skipped[0], mask_skipped[1],
+ score_skipped[0], score_skipped[1]);
+#endif /* COUNT_RL_CANDIDATES */
+
+ ////////////////// this is the end of the PRINT LOOP
+
+ // now evaluate which bytes need adjusting
+ uint64_t byte_msk = 0x3f; // 6-bit fields
+ uint64_t best_byte, new_byte, temp_byte, orig_best_byte;
+
+ uint64_t rank_best_bytes[9]; // collect the new byte values; first init with current best for neighbor use
+ for (int byte_idx = 0, byte_sh = 0; byte_idx < 8+ecc_ena; byte_idx++, byte_sh += 6) {
+ rank_best_bytes[byte_idx] = (lmc_rlevel_rank.u >> byte_sh) & byte_msk;
+ }
+
+ ////////////////// this is the start of the BEST BYTE LOOP
+
+ for (int byte_idx = 0, byte_sh = 0; byte_idx < 8+ecc_ena; byte_idx++, byte_sh += 6) {
+ best_byte = orig_best_byte = rank_best_bytes[byte_idx];
+
+ ////////////////// this is the start of the BEST BYTE AVERAGING LOOP
+
+ // validate the initial "best" byte by looking at the average of the unskipped byte-column entries
+ // we want to do this before we go further, so we can try to start with a better initial value
+ // this is the so-called "BESTBUY" patch set
+ int sum = 0, count = 0;
+
+ for (rtt_idx = min_rtt_nom_idx; rtt_idx <= max_rtt_nom_idx; ++rtt_idx) {
+ rtt_nom = imp_values->rtt_nom_table[rtt_idx];
+ if ((dyn_rtt_nom_mask == 0) && (rtt_idx != min_rtt_nom_idx))
+ continue;
+
+ for (rodt_ctl = max_rodt_ctl; rodt_ctl >= min_rodt_ctl; --rodt_ctl) {
+ bdk_lmcx_rlevel_rankx_t temp_rlevel_rank;
+ int temp_score;
+ for (int orankx = 0; orankx < dimm_count * 4; orankx++) { // average over all the ranks
+ if (!(dimm_rank_mask & (1 << orankx))) // stay on the same DIMM
+ continue;
+ temp_score = rlevel_scoreboard[rtt_nom][rodt_ctl][orankx].score;
+ // skip RODT rows in mask, or rows with too high a score;
+ // we will not use them for printing or evaluating...
+
+ if (!((1 << rodt_ctl) & rodt_row_skip_mask) &&
+ (temp_score <= MAX_RANK_SCORE))
+ {
+ temp_rlevel_rank.u = rlevel_scoreboard[rtt_nom][rodt_ctl][orankx].setting;
+ temp_byte = (temp_rlevel_rank.u >> byte_sh) & byte_msk;
+ sum += temp_byte;
+ count++;
+ }
+ } /* for (int orankx = 0; orankx < dimm_count * 4; orankx++) */
+ } /* for (rodt_ctl = max_rodt_ctl; rodt_ctl >= min_rodt_ctl; --rodt_ctl) */
+ } /* for (rtt_idx=min_rtt_nom_idx; rtt_idx<=max_rtt_nom_idx; ++rtt_idx) */
+
+ ////////////////// this is the end of the BEST BYTE AVERAGING LOOP
+
+
+ uint64_t avg_byte = divide_nint(sum, count); // FIXME: validate count and sum??
+ int avg_diff = (int)best_byte - (int)avg_byte;
+ new_byte = best_byte;
+ if (avg_diff != 0) {
+ // bump best up/dn by 1, not necessarily all the way to avg
+ new_byte = best_byte + ((avg_diff > 0) ? -1: 1);
+ }
+
+ VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: START: Byte %d: best %d is different by %d from average %d, using %d.\n",
+ node, ddr_interface_num, rankx,
+ byte_idx, (int)best_byte, avg_diff, (int)avg_byte, (int)new_byte);
+ best_byte = new_byte;
+
+ // At this point best_byte is either:
+ // 1. the original byte-column value from the best scoring RODT row, OR
+ // 2. that value bumped toward the average of all the byte-column values
+ //
+ // best_byte will not change from here on...
+
+ ////////////////// this is the start of the BEST BYTE COUNTING LOOP
+
+ // NOTE: we do this next loop separately from above, because we count relative to "best_byte"
+ // which may have been modified by the above averaging operation...
+ //
+ // Also, the above only moves toward the average by +- 1, so that we will always have a count
+ // of at least 1 for the original best byte, even if all the others are further away and not counted;
+ // this ensures we will go back to the original if no others are counted...
+ // FIXME: this could cause issue if the range of values for a byte-lane are too disparate...
+ int count_less = 0, count_same = 0, count_more = 0;
+#if FAILSAFE_CHECK
+ uint64_t count_byte = new_byte; // save the value we will count around
+#endif /* FAILSAFE_CHECK */
+#if RANK_MAJORITY
+ int rank_less = 0, rank_same = 0, rank_more = 0;
+#endif /* RANK_MAJORITY */
+
+ for (rtt_idx = min_rtt_nom_idx; rtt_idx <= max_rtt_nom_idx; ++rtt_idx) {
+ rtt_nom = imp_values->rtt_nom_table[rtt_idx];
+ if ((dyn_rtt_nom_mask == 0) && (rtt_idx != min_rtt_nom_idx))
+ continue;
+
+ for (rodt_ctl = max_rodt_ctl; rodt_ctl >= min_rodt_ctl; --rodt_ctl) {
+ bdk_lmcx_rlevel_rankx_t temp_rlevel_rank;
+ int temp_score;
+ for (int orankx = 0; orankx < dimm_count * 4; orankx++) { // count over all the ranks
+ if (!(dimm_rank_mask & (1 << orankx))) // stay on the same DIMM
+ continue;
+ temp_score = rlevel_scoreboard[rtt_nom][rodt_ctl][orankx].score;
+ // skip RODT rows in mask, or rows with too high a score;
+ // we will not use them for printing or evaluating...
+ if (((1 << rodt_ctl) & rodt_row_skip_mask) ||
+ (temp_score > MAX_RANK_SCORE))
+ {
+ continue;
+ }
+ temp_rlevel_rank.u = rlevel_scoreboard[rtt_nom][rodt_ctl][orankx].setting;
+ temp_byte = (temp_rlevel_rank.u >> byte_sh) & byte_msk;
+
+ if (temp_byte == 0) // do not count it if illegal
+ continue;
+ else if (temp_byte == best_byte)
+ count_same++;
+ else if (temp_byte == best_byte - 1)
+ count_less++;
+ else if (temp_byte == best_byte + 1)
+ count_more++;
+ // else do not count anything more than 1 away from the best
+#if RANK_MAJORITY
+ // FIXME? count is relative to best_byte; should it be rank-based?
+ if (orankx != rankx) // rank counts only on main rank
+ continue;
+ else if (temp_byte == best_byte)
+ rank_same++;
+ else if (temp_byte == best_byte - 1)
+ rank_less++;
+ else if (temp_byte == best_byte + 1)
+ rank_more++;
+#endif /* RANK_MAJORITY */
+ } /* for (int orankx = 0; orankx < dimm_count * 4; orankx++) */
+ } /* for (rodt_ctl = max_rodt_ctl; rodt_ctl >= min_rodt_ctl; --rodt_ctl) */
+ } /* for (rtt_idx=min_rtt_nom_idx; rtt_idx<=max_rtt_nom_idx; ++rtt_idx) */
+
+#if RANK_MAJORITY
+ VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: COUNT: Byte %d: orig %d now %d, more %d same %d less %d (%d/%d/%d)\n",
+ node, ddr_interface_num, rankx,
+ byte_idx, (int)orig_best_byte, (int)best_byte,
+ count_more, count_same, count_less,
+ rank_more, rank_same, rank_less);
+#else /* RANK_MAJORITY */
+ VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: COUNT: Byte %d: orig %d now %d, more %d same %d less %d\n",
+ node, ddr_interface_num, rankx,
+ byte_idx, (int)orig_best_byte, (int)best_byte,
+ count_more, count_same, count_less);
+#endif /* RANK_MAJORITY */
+ ////////////////// this is the end of the BEST BYTE COUNTING LOOP
+
+ // choose the new byte value
+ // we need to check that there is no gap greater than 2 between adjacent bytes
+ // (adjacency depends on DIMM type)
+ // use the neighbor value to help decide
+ // initially, the rank_best_bytes[] will contain values from the chosen lowest score rank
+ new_byte = 0;
+
+ // neighbor is index-1 unless we are index 0 or index 8 (ECC)
+ int neighbor = (byte_idx == 8) ? 3 : ((byte_idx == 0) ? 1 : byte_idx - 1);
+ uint64_t neigh_byte = rank_best_bytes[neighbor];
+
+
+ // can go up or down or stay the same, so look at a numeric average to help
+ new_byte = divide_nint(((count_more * (best_byte + 1)) +
+ (count_same * (best_byte + 0)) +
+ (count_less * (best_byte - 1))),
+ max(1, (count_more + count_same + count_less)));
+
+ // use neighbor to help choose with average
+ if ((byte_idx > 0) && (_abs(neigh_byte - new_byte) > 2)) // but not for byte 0
+ {
+ uint64_t avg_pick = new_byte;
+ if ((new_byte - best_byte) != 0)
+ new_byte = best_byte; // back to best, average did not get better
+ else // avg was the same, still too far, now move it towards the neighbor
+ new_byte += (neigh_byte > new_byte) ? 1 : -1;
+
+ VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: AVERAGE: Byte %d: neighbor %d too different %d from average %d, picking %d.\n",
+ node, ddr_interface_num, rankx,
+ byte_idx, neighbor, (int)neigh_byte, (int)avg_pick, (int)new_byte);
+ }
+#if MAJORITY_OVER_AVG
+ // NOTE:
+ // For now, we let the neighbor processing above trump the new simple majority processing here.
+ // This is mostly because we have seen no smoking gun for a neighbor bad choice (yet?).
+ // Also note that we will ALWAYS be using byte 0 majority, because of the if clause above.
+ else {
+ // majority is dependent on the counts, which are relative to best_byte, so start there
+ uint64_t maj_byte = best_byte;
+ if ((count_more > count_same) && (count_more > count_less)) {
+ maj_byte++;
+ } else if ((count_less > count_same) && (count_less > count_more)) {
+ maj_byte--;
+ }
+ if (maj_byte != new_byte) {
+ // print only when majority choice is different from average
+ VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: MAJORTY: Byte %d: picking majority of %d over average %d.\n",
+ node, ddr_interface_num, rankx,
+ byte_idx, (int)maj_byte, (int)new_byte);
+ new_byte = maj_byte;
+ } else {
+ VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: AVERAGE: Byte %d: picking average of %d.\n",
+ node, ddr_interface_num, rankx,
+ byte_idx, (int)new_byte);
+ }
+#if RANK_MAJORITY
+ // rank majority is dependent on the rank counts, which are relative to best_byte,
+ // so start there, and adjust according to the rank counts majority
+ uint64_t rank_maj = best_byte;
+ if ((rank_more > rank_same) && (rank_more > rank_less)) {
+ rank_maj++;
+ } else if ((rank_less > rank_same) && (rank_less > rank_more)) {
+ rank_maj--;
+ }
+ int rank_sum = rank_more + rank_same + rank_less;
+
+ // now, let rank majority possibly rule over the current new_byte however we got it
+ if (rank_maj != new_byte) { // only if different
+ // Here is where we decide whether to completely apply RANK_MAJORITY or not
+ // FIXME: For the moment, we do it ONLY when running 2-slot configs
+ // FIXME? or when rank_sum is big enough?
+ if ((dimm_count > 1) || (rank_sum > 2)) {
+ // print only when rank majority choice is selected
+ VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: RANKMAJ: Byte %d: picking %d over %d.\n",
+ node, ddr_interface_num, rankx,
+ byte_idx, (int)rank_maj, (int)new_byte);
+ new_byte = rank_maj;
+ } else { // FIXME: print some info when we could have chosen RANKMAJ but did not
+ VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: RANKMAJ: Byte %d: NOT using %d over %d (best=%d,sum=%d).\n",
+ node, ddr_interface_num, rankx,
+ byte_idx, (int)rank_maj, (int)new_byte,
+ (int)best_byte, rank_sum);
+ }
+ }
+#endif /* RANK_MAJORITY */
+ }
+#else
+ else {
+ VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: AVERAGE: Byte %d: picking average of %d.\n",
+ node, ddr_interface_num, rankx,
+ byte_idx, (int)new_byte);
+ }
+#endif
+#if FAILSAFE_CHECK
+ // one last check:
+ // if new_byte is still count_byte, BUT there was no count for that value, DO SOMETHING!!!
+ // FIXME: go back to original best byte from the best row
+ if ((new_byte == count_byte) && (count_same == 0)) {
+ new_byte = orig_best_byte;
+ VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: FAILSAF: Byte %d: going back to original %d.\n",
+ node, ddr_interface_num, rankx,
+ byte_idx, (int)new_byte);
+ }
+#endif /* FAILSAFE_CHECK */
+#if PERFECT_BITMASK_COUNTING
+ // Look at counts for "perfect" bitmasks if we had any for this byte-lane.
+ // Remember, we only counted for DDR4, so zero means none or DDR3, and we bypass this...
+ if (rank_perfect_counts[rankx].total[byte_idx] > 0) {
+ // FIXME: should be more error checking, look for ties, etc...
+ int i, delay_count, delay_value, delay_max;
+ uint32_t ties;
+ delay_value = -1;
+ delay_max = 0;
+ ties = 0;
+
+ for (i = 0; i < 32; i++) {
+ delay_count = rank_perfect_counts[rankx].count[byte_idx][i];
+ if (delay_count > 0) { // only look closer if there are any,,,
+ if (delay_count > delay_max) {
+ delay_max = delay_count;
+ delay_value = i;
+ ties = 0; // reset ties to none
+ } else if (delay_count == delay_max) {
+ if (ties == 0)
+ ties = 1UL << delay_value; // put in original value
+ ties |= 1UL << i; // add new value
+ }
+ }
+ } /* for (i = 0; i < 32; i++) */
+
+ if (delay_value >= 0) {
+ if (ties != 0) {
+ if (ties & (1UL << (int)new_byte)) {
+ // leave choice as new_byte if any tied one is the same...
+
+
+ delay_value = (int)new_byte;
+ VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: PERFECT: Byte %d: TIES (0x%x) INCLUDED %d (%d)\n",
+ node, ddr_interface_num, rankx, byte_idx, ties, (int)new_byte, delay_max);
+ } else {
+ // FIXME: should choose a perfect one!!!
+ // FIXME: for now, leave the choice as new_byte
+ delay_value = (int)new_byte;
+ VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: PERFECT: Byte %d: TIES (0x%x) OMITTED %d (%d)\n",
+ node, ddr_interface_num, rankx, byte_idx, ties, (int)new_byte, delay_max);
+ }
+ } /* if (ties != 0) */
+
+ if (delay_value != (int)new_byte) {
+ delay_count = rank_perfect_counts[rankx].count[byte_idx][(int)new_byte];
+ VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: PERFECT: Byte %d: DIFF from %d (%d), USING %d (%d)\n",
+ node, ddr_interface_num, rankx, byte_idx, (int)new_byte,
+ delay_count, delay_value, delay_max);
+ new_byte = (uint64_t)delay_value; // FIXME: make this optional via envvar?
+ } else {
+ debug_print("N%d.LMC%d.R%d: PERFECT: Byte %d: SAME as %d (%d)\n",
+ node, ddr_interface_num, rankx, byte_idx, new_byte, delay_max);
+ }
+ }
+ } /* if (rank_perfect_counts[rankx].total[byte_idx] > 0) */
+ else {
+ if (ddr_type == DDR4_DRAM) { // only report when DDR4
+ // FIXME: remove or increase VBL for this output...
+ VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: PERFECT: Byte %d: ZERO perfect bitmasks\n",
+ node, ddr_interface_num, rankx, byte_idx);
+ }
+ } /* if (rank_perfect_counts[rankx].total[byte_idx] > 0) */
+#endif /* PERFECT_BITMASK_COUNTING */
+
+ VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: SUMMARY: Byte %d: %s: orig %d now %d, more %d same %d less %d, using %d\n",
+ node, ddr_interface_num, rankx,
+ byte_idx, "AVG", (int)orig_best_byte,
+ (int)best_byte, count_more, count_same, count_less, (int)new_byte);
+
+ // update the byte with the new value (NOTE: orig value in the CSR may not be current "best")
+ lmc_rlevel_rank.u &= ~(byte_msk << byte_sh);
+ lmc_rlevel_rank.u |= (new_byte << byte_sh);
+
+ rank_best_bytes[byte_idx] = new_byte; // save new best for neighbor use
+
+ } /* for (byte_idx = 0; byte_idx < 8+ecc_ena; byte_idx++) */
+
+ ////////////////// this is the end of the BEST BYTE LOOP
+
+ if (saved_rlevel_rank.u != lmc_rlevel_rank.u) {
+ DRAM_CSR_WRITE(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx), lmc_rlevel_rank.u);
+ lmc_rlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx));
+ debug_print("Adjusting Read-Leveling per-RANK settings.\n");
+ } else {
+ debug_print("Not Adjusting Read-Leveling per-RANK settings.\n");
+ }
+ display_RL_with_final(node, ddr_interface_num, lmc_rlevel_rank, rankx);
+
+#if RLEXTRAS_PATCH
+#define RLEVEL_RANKX_EXTRAS_INCR 4
+ if ((rank_mask & 0x0F) != 0x0F) { // if there are unused entries to be filled
+ bdk_lmcx_rlevel_rankx_t temp_rlevel_rank = lmc_rlevel_rank; // copy the current rank
+ int byte, delay;
+ if (rankx < 3) {
+ debug_print("N%d.LMC%d.R%d: checking for RLEVEL_RANK unused entries.\n",
+ node, ddr_interface_num, rankx);
+ for (byte = 0; byte < 9; byte++) { // modify the copy in prep for writing to empty slot(s)
+ delay = get_rlevel_rank_struct(&temp_rlevel_rank, byte) + RLEVEL_RANKX_EXTRAS_INCR;
+ if (delay > (int)RLEVEL_BYTE_MSK) delay = RLEVEL_BYTE_MSK;
+ update_rlevel_rank_struct(&temp_rlevel_rank, byte, delay);
+ }
+ if (rankx == 0) { // if rank 0, write rank 1 and rank 2 here if empty
+ if (!(rank_mask & (1<<1))) { // check that rank 1 is empty
+ VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: writing RLEVEL_RANK unused entry R%d.\n",
+ node, ddr_interface_num, rankx, 1);
+ DRAM_CSR_WRITE(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, 1), temp_rlevel_rank.u);
+ }
+ if (!(rank_mask & (1<<2))) { // check that rank 2 is empty
+ VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: writing RLEVEL_RANK unused entry R%d.\n",
+ node, ddr_interface_num, rankx, 2);
+ DRAM_CSR_WRITE(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, 2), temp_rlevel_rank.u);
+ }
+ }
+ // if ranks 0, 1 or 2, write rank 3 here if empty
+ if (!(rank_mask & (1<<3))) { // check that rank 3 is empty
+ VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: writing RLEVEL_RANK unused entry R%d.\n",
+ node, ddr_interface_num, rankx, 3);
+ DRAM_CSR_WRITE(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, 3), temp_rlevel_rank.u);
+ }
+ }
+ }
+#endif /* RLEXTRAS_PATCH */
+ } /* for (rankx = 0; rankx < dimm_count * 4; rankx++) */
+
+ ////////////////// this is the end of the RANK MAJOR LOOP
+
+ } /* Evaluation block */
+ } /* while(rlevel_debug_loops--) */
+
+ lmc_control.s.ddr2t = save_ddr2t;
+ DRAM_CSR_WRITE(node, BDK_LMCX_CONTROL(ddr_interface_num), lmc_control.u);
+ lmc_control.u = BDK_CSR_READ(node, BDK_LMCX_CONTROL(ddr_interface_num));
+ ddr_print("%-45s : %6d\n", "DDR2T", lmc_control.s.ddr2t); /* Display final 2T value */
+
+
+ perform_ddr_init_sequence(node, rank_mask, ddr_interface_num);
+
+ for (rankx = 0; rankx < dimm_count * 4;rankx++) {
+ uint64_t value;
+ int parameter_set = 0;
+ if (!(rank_mask & (1 << rankx)))
+ continue;
+
+ lmc_rlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx));
+
+ for (i=0; i<9; ++i) {
+ if ((s = lookup_env_parameter("ddr%d_rlevel_rank%d_byte%d", ddr_interface_num, rankx, i)) != NULL) {
+ parameter_set |= 1;
+ value = strtoul(s, NULL, 0);
+
+ update_rlevel_rank_struct(&lmc_rlevel_rank, i, value);
+ }
+ }
+
+ if ((s = lookup_env_parameter_ull("ddr%d_rlevel_rank%d", ddr_interface_num, rankx)) != NULL) {
+ parameter_set |= 1;
+ value = strtoull(s, NULL, 0);
+ lmc_rlevel_rank.u = value;
+ }
+
+
+ if (bdk_is_platform(BDK_PLATFORM_ASIM)) {
+ parameter_set |= 1;
+
+ lmc_rlevel_rank.cn83xx.byte8 = 3;
+ lmc_rlevel_rank.cn83xx.byte7 = 3;
+ lmc_rlevel_rank.cn83xx.byte6 = 3;
+ lmc_rlevel_rank.cn83xx.byte5 = 3;
+ lmc_rlevel_rank.cn83xx.byte4 = 3;
+ lmc_rlevel_rank.cn83xx.byte3 = 3;
+ lmc_rlevel_rank.cn83xx.byte2 = 3;
+ lmc_rlevel_rank.cn83xx.byte1 = 3;
+ lmc_rlevel_rank.cn83xx.byte0 = 3;
+ }
+
+ if (parameter_set) {
+ DRAM_CSR_WRITE(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx), lmc_rlevel_rank.u);
+ display_RL(node, ddr_interface_num, lmc_rlevel_rank, rankx);
+ }
+ }
+ }
+
+ /* Workaround Trcd overflow by using Additive latency. */
+ if (CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X))
+ {
+ bdk_lmcx_modereg_params0_t lmc_modereg_params0;
+ bdk_lmcx_timing_params1_t lmc_timing_params1;
+ bdk_lmcx_control_t lmc_control;
+ int rankx;
+
+ lmc_timing_params1.u = BDK_CSR_READ(node, BDK_LMCX_TIMING_PARAMS1(ddr_interface_num));
+ lmc_modereg_params0.u = BDK_CSR_READ(node, BDK_LMCX_MODEREG_PARAMS0(ddr_interface_num));
+ lmc_control.u = BDK_CSR_READ(node, BDK_LMCX_CONTROL(ddr_interface_num));
+
+ if (lmc_timing_params1.s.trcd == 0) {
+ ddr_print("Workaround Trcd overflow by using Additive latency.\n");
+ lmc_timing_params1.s.trcd = 12; /* Hard code this to 12 and enable additive latency */
+ lmc_modereg_params0.s.al = 2; /* CL-2 */
+ lmc_control.s.pocas = 1;
+
+ ddr_print("MODEREG_PARAMS0 : 0x%016lx\n", lmc_modereg_params0.u);
+ DRAM_CSR_WRITE(node, BDK_LMCX_MODEREG_PARAMS0(ddr_interface_num), lmc_modereg_params0.u);
+ ddr_print("TIMING_PARAMS1 : 0x%016lx\n", lmc_timing_params1.u);
+ DRAM_CSR_WRITE(node, BDK_LMCX_TIMING_PARAMS1(ddr_interface_num), lmc_timing_params1.u);
+
+ ddr_print("LMC_CONTROL : 0x%016lx\n", lmc_control.u);
+ DRAM_CSR_WRITE(node, BDK_LMCX_CONTROL(ddr_interface_num), lmc_control.u);
+
+ for (rankx = 0; rankx < dimm_count * 4; rankx++) {
+ if (!(rank_mask & (1 << rankx)))
+ continue;
+
+ ddr4_mrw(node, ddr_interface_num, rankx, -1, 1, 0); /* MR1 */
+ }
+ }
+ }
+
+ // this is here just for output, to allow check of the Deskew settings one last time...
+ if (! disable_deskew_training) {
+ deskew_counts_t dsk_counts;
+ VB_PRT(VBL_TME, "N%d.LMC%d: Check Deskew Settings before software Write-Leveling.\n",
+ node, ddr_interface_num);
+ Validate_Read_Deskew_Training(node, rank_mask, ddr_interface_num, &dsk_counts, VBL_TME); // TME for FINAL
+ }
+
+
+ /* Workaround Errata 26304 (T88@2.0)
+
+ When the CSRs LMCX_DLL_CTL3[WR_DESKEW_ENA] = 1 AND
+ LMCX_PHY_CTL2[DQS[0..8]_DSK_ADJ] > 4, set
+ LMCX_EXT_CONFIG[DRIVE_ENA_BPRCH] = 1.
+ */
+ if (CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS2_X)) { // only for 88XX pass 2, not 81xx or 83xx
+ bdk_lmcx_dll_ctl3_t dll_ctl3;
+ bdk_lmcx_phy_ctl2_t phy_ctl2;
+ bdk_lmcx_ext_config_t ext_config;
+ int increased_dsk_adj = 0;
+ int byte;
+
+ phy_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_PHY_CTL2(ddr_interface_num));
+ ext_config.u = BDK_CSR_READ(node, BDK_LMCX_EXT_CONFIG(ddr_interface_num));
+ dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num));
+
+ for (byte = 0; byte < 8; ++byte) {
+ if (!(ddr_interface_bytemask&(1<<byte)))
+ continue;
+ increased_dsk_adj |= (((phy_ctl2.u >> (byte*3)) & 0x7) > 4);
+ }
+
+ if ((dll_ctl3.s.wr_deskew_ena == 1) && increased_dsk_adj) {
+ ext_config.s.drive_ena_bprch = 1;
+ DRAM_CSR_WRITE(node, BDK_LMCX_EXT_CONFIG(ddr_interface_num),
+ ext_config.u);
+ }
+ }
+
+ /*
+ * 6.9.13 DRAM Vref Training for DDR4
+ *
+ * This includes software write-leveling
+ */
+
+ { // Software Write-Leveling block
+
+ /* Try to determine/optimize write-level delays experimentally. */
+#pragma pack(push,1)
+ bdk_lmcx_wlevel_rankx_t lmc_wlevel_rank;
+ bdk_lmcx_wlevel_rankx_t lmc_wlevel_rank_hw_results;
+ int byte;
+ int delay;
+ int rankx = 0;
+ int active_rank;
+#if !DISABLE_SW_WL_PASS_2
+ bdk_lmcx_rlevel_rankx_t lmc_rlevel_rank;
+ int sw_wlevel_offset = 1;
+#endif
+ int sw_wlevel_enable = 1; /* FIX... Should be customizable. */
+ int interfaces;
+ int measured_vref_flag;
+ typedef enum {
+ WL_ESTIMATED = 0, /* HW/SW wleveling failed. Results
+ estimated. */
+ WL_HARDWARE = 1, /* H/W wleveling succeeded */
+ WL_SOFTWARE = 2, /* S/W wleveling passed 2 contiguous
+ settings. */
+ WL_SOFTWARE1 = 3, /* S/W wleveling passed 1 marginal
+ setting. */
+ } sw_wl_status_t;
+
+ static const char *wl_status_strings[] = {
+ "(e)",
+ " ",
+ " ",
+ "(1)"
+ };
+ int sw_wlevel_hw_default = 1; // FIXME: make H/W assist the default now
+#pragma pack(pop)
+
+ if ((s = lookup_env_parameter("ddr_sw_wlevel_hw")) != NULL) {
+ sw_wlevel_hw_default = !!strtoul(s, NULL, 0);
+ }
+
+ // cannot use hw-assist when doing 32-bit
+ if (! ddr_interface_64b) {
+ sw_wlevel_hw_default = 0;
+ }
+
+ if ((s = lookup_env_parameter("ddr_software_wlevel")) != NULL) {
+ sw_wlevel_enable = strtoul(s, NULL, 0);
+ }
+
+#if SWL_WITH_HW_ALTS_CHOOSE_SW
+ // Choose the SW algo for SWL if any HWL alternates were found
+ // NOTE: we have to do this here, and for all, since HW-assist including ECC requires ECC enable
+ for (rankx = 0; rankx < dimm_count * 4; rankx++) {
+ if (!sw_wlevel_enable)
+ break;
+ if (!(rank_mask & (1 << rankx)))
+ continue;
+
+ // if we are doing HW-assist, and there are alternates, switch to SW-algorithm for all
+ if (sw_wlevel_hw && hwl_alts[rankx].hwl_alt_mask) {
+ ddr_print("N%d.LMC%d.R%d: Using SW algorithm for write-leveling this rank\n",
+ node, ddr_interface_num, rankx);
+ sw_wlevel_hw_default = 0;
+ break;
+ }
+ } /* for (rankx = 0; rankx < dimm_count * 4; rankx++) */
+#endif
+
+ /* Get the measured_vref setting from the config, check for an override... */
+ /* NOTE: measured_vref=1 (ON) means force use of MEASURED Vref... */
+ // NOTE: measured VREF can only be done for DDR4
+ if (ddr_type == DDR4_DRAM) {
+ measured_vref_flag = custom_lmc_config->measured_vref;
+ if ((s = lookup_env_parameter("ddr_measured_vref")) != NULL) {
+ measured_vref_flag = !!strtoul(s, NULL, 0);
+ }
+ } else {
+ measured_vref_flag = 0; // OFF for DDR3
+ }
+
+ /* Ensure disabled ECC for DRAM tests using the SW algo, else leave it untouched */
+ if (!sw_wlevel_hw_default) {
+ lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(ddr_interface_num));
+ lmc_config.s.ecc_ena = 0;
+ DRAM_CSR_WRITE(node, BDK_LMCX_CONFIG(ddr_interface_num), lmc_config.u);
+ }
+
+#if USE_L2_WAYS_LIMIT
+ limit_l2_ways(node, 0, 0); /* Disable l2 sets for DRAM testing */
+#endif
+
+ /* We need to track absolute rank number, as well as how many
+ ** active ranks we have. Two single rank DIMMs show up as
+ ** ranks 0 and 2, but only 2 ranks are active. */
+ active_rank = 0;
+
+ interfaces = __builtin_popcount(ddr_interface_mask);
+
+#define VREF_RANGE1_LIMIT 0x33 // range1 is valid for 0x00 - 0x32
+#define VREF_RANGE2_LIMIT 0x18 // range2 is valid for 0x00 - 0x17
+// full window is valid for 0x00 to 0x4A
+// let 0x00 - 0x17 be range2, 0x18 - 0x4a be range 1
+#define VREF_LIMIT (VREF_RANGE1_LIMIT + VREF_RANGE2_LIMIT)
+#define VREF_FINAL (VREF_LIMIT - 1)
+
+ for (rankx = 0; rankx < dimm_count * 4; rankx++) {
+ uint64_t rank_addr;
+ int vref_value, final_vref_value, final_vref_range = 0;
+ int start_vref_value = 0, computed_final_vref_value = -1;
+ char best_vref_values_count, vref_values_count;
+ char best_vref_values_start, vref_values_start;
+
+ int bytes_failed;
+ sw_wl_status_t byte_test_status[9];
+ sw_wl_status_t sw_wl_rank_status = WL_HARDWARE;
+ int sw_wl_failed = 0;
+ int sw_wlevel_hw = sw_wlevel_hw_default;
+
+ if (!sw_wlevel_enable)
+ break;
+
+ if (!(rank_mask & (1 << rankx)))
+ continue;
+
+ ddr_print("N%d.LMC%d.R%d: Performing Software Write-Leveling %s\n",
+ node, ddr_interface_num, rankx,
+ (sw_wlevel_hw) ? "with H/W assist" : "with S/W algorithm");
+
+ if ((ddr_type == DDR4_DRAM) && (num_ranks != 4)) {
+ // always compute when we can...
+ computed_final_vref_value = compute_vref_value(node, ddr_interface_num, rankx,
+ dimm_count, num_ranks, imp_values,
+ is_stacked_die);
+ if (!measured_vref_flag) // but only use it if allowed
+ start_vref_value = VREF_FINAL; // skip all the measured Vref processing, just the final setting
+ }
+
+ /* Save off the h/w wl results */
+ lmc_wlevel_rank_hw_results.u = BDK_CSR_READ(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx));
+
+ vref_values_count = 0;
+ vref_values_start = 0;
+ best_vref_values_count = 0;
+ best_vref_values_start = 0;
+
+ /* Loop one extra time using the Final Vref value. */
+ for (vref_value = start_vref_value; vref_value < VREF_LIMIT; ++vref_value) {
+ if (ddr_type == DDR4_DRAM) {
+ if (vref_value < VREF_FINAL) {
+ int vrange, vvalue;
+ if (vref_value < VREF_RANGE2_LIMIT) {
+ vrange = 1; vvalue = vref_value;
+ } else {
+ vrange = 0; vvalue = vref_value - VREF_RANGE2_LIMIT;
+ }
+ set_vref(node, ddr_interface_num, rankx,
+ vrange, vvalue);
+ } else { /* if (vref_value < VREF_FINAL) */
+ /* Print the final Vref value first. */
+
+ /* Always print the computed first if its valid */
+ if (computed_final_vref_value >= 0) {
+ ddr_print("N%d.LMC%d.R%d: Vref Computed Summary :"
+ " %2d (0x%02x)\n",
+ node, ddr_interface_num,
+ rankx, computed_final_vref_value,
+ computed_final_vref_value);
+ }
+ if (!measured_vref_flag) { // setup to use the computed
+ best_vref_values_count = 1;
+ final_vref_value = computed_final_vref_value;
+ } else { // setup to use the measured
+ if (best_vref_values_count > 0) {
+ best_vref_values_count = max(best_vref_values_count, 2);
+#if 0
+ // NOTE: this already adjusts VREF via calculating 40% rather than 50%
+ final_vref_value = best_vref_values_start + divide_roundup((best_vref_values_count-1)*4,10);
+ ddr_print("N%d.LMC%d.R%d: Vref Training Summary :"
+ " %2d <----- %2d (0x%02x) -----> %2d range: %2d\n",
+ node, ddr_interface_num, rankx, best_vref_values_start,
+ final_vref_value, final_vref_value,
+ best_vref_values_start+best_vref_values_count-1,
+ best_vref_values_count-1);
+#else
+ final_vref_value = best_vref_values_start + divide_nint(best_vref_values_count - 1, 2);
+ if (final_vref_value < VREF_RANGE2_LIMIT) {
+ final_vref_range = 1;
+ } else {
+ final_vref_range = 0; final_vref_value -= VREF_RANGE2_LIMIT;
+ }
+ {
+ int vvlo = best_vref_values_start;
+ int vrlo;
+ if (vvlo < VREF_RANGE2_LIMIT) {
+ vrlo = 2;
+ } else {
+ vrlo = 1; vvlo -= VREF_RANGE2_LIMIT;
+ }
+
+ int vvhi = best_vref_values_start + best_vref_values_count - 1;
+ int vrhi;
+ if (vvhi < VREF_RANGE2_LIMIT) {
+ vrhi = 2;
+ } else {
+ vrhi = 1; vvhi -= VREF_RANGE2_LIMIT;
+ }
+ ddr_print("N%d.LMC%d.R%d: Vref Training Summary :"
+ " 0x%02x/%1d <----- 0x%02x/%1d -----> 0x%02x/%1d, range: %2d\n",
+ node, ddr_interface_num, rankx,
+ vvlo, vrlo,
+ final_vref_value, final_vref_range + 1,
+ vvhi, vrhi,
+ best_vref_values_count-1);
+ }
+#endif
+
+ } else {
+ /* If nothing passed use the default Vref value for this rank */
+ bdk_lmcx_modereg_params2_t lmc_modereg_params2;
+ lmc_modereg_params2.u = BDK_CSR_READ(node, BDK_LMCX_MODEREG_PARAMS2(ddr_interface_num));
+ final_vref_value = (lmc_modereg_params2.u >> (rankx * 10 + 3)) & 0x3f;
+ final_vref_range = (lmc_modereg_params2.u >> (rankx * 10 + 9)) & 0x01;
+
+ ddr_print("N%d.LMC%d.R%d: Vref Using Default :"
+ " %2d <----- %2d (0x%02x) -----> %2d, range%1d\n",
+ node, ddr_interface_num, rankx,
+ final_vref_value, final_vref_value,
+ final_vref_value, final_vref_value, final_vref_range+1);
+ }
+ }
+
+ // allow override
+ if ((s = lookup_env_parameter("ddr%d_vref_value_%1d%1d",
+ ddr_interface_num, !!(rankx&2), !!(rankx&1))) != NULL) {
+ final_vref_value = strtoul(s, NULL, 0);
+ }
+
+ set_vref(node, ddr_interface_num, rankx, final_vref_range, final_vref_value);
+
+ } /* if (vref_value < VREF_FINAL) */
+ } /* if (ddr_type == DDR4_DRAM) */
+
+ lmc_wlevel_rank.u = lmc_wlevel_rank_hw_results.u; /* Restore the saved value */
+
+ for (byte = 0; byte < 9; ++byte)
+ byte_test_status[byte] = WL_ESTIMATED;
+
+ if (wlevel_bitmask_errors == 0) {
+
+ /* Determine address of DRAM to test for pass 1 of software write leveling. */
+ rank_addr = active_rank * (1ull << (pbank_lsb - bunk_enable + (interfaces/2)));
+ // FIXME: these now put in by test_dram_byte()
+ //rank_addr |= (ddr_interface_num<<7); /* Map address into proper interface */
+ //rank_addr = bdk_numa_get_address(node, rank_addr);
+ VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: Active Rank %d Address: 0x%lx\n",
+ node, ddr_interface_num, rankx, active_rank, rank_addr);
+
+ { // start parallel write-leveling block for delay high-order bits
+ int errors = 0;
+ int byte_delay[9];
+ uint64_t bytemask;
+ int bytes_todo;
+
+ if (ddr_interface_64b) {
+ bytes_todo = (sw_wlevel_hw) ? ddr_interface_bytemask : 0xFF;
+ bytemask = ~0ULL;
+ } else { // 32-bit, must be using SW algo, only data bytes
+ bytes_todo = 0x0f;
+ bytemask = 0x00000000ffffffffULL;
+ }
+
+ for (byte = 0; byte < 9; ++byte) {
+ if (!(bytes_todo & (1 << byte))) {
+ byte_delay[byte] = 0;
+ } else {
+ byte_delay[byte] = get_wlevel_rank_struct(&lmc_wlevel_rank, byte);
+ }
+ } /* for (byte = 0; byte < 9; ++byte) */
+
+#define WL_MIN_NO_ERRORS_COUNT 3 // FIXME? three passes without errors
+ int no_errors_count = 0;
+
+ // Change verbosity if using measured vs computed VREF or DDR3
+ // measured goes many times through SWL, computed and DDR3 only once
+ // so we want the EXHAUSTED messages at NORM for computed and DDR3,
+ // and at DEV2 for measured, just for completeness
+ int vbl_local = (measured_vref_flag) ? VBL_DEV2 : VBL_NORM;
+ uint64_t bad_bits[2];
+#if ENABLE_SW_WLEVEL_UTILIZATION
+ uint64_t sum_dram_dclk = 0, sum_dram_ops = 0;
+ uint64_t start_dram_dclk, stop_dram_dclk;
+ uint64_t start_dram_ops, stop_dram_ops;
+#endif
+ do {
+ // write the current set of WL delays
+ DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx), lmc_wlevel_rank.u);
+ lmc_wlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx));
+
+ bdk_watchdog_poke();
+
+ // do the test
+ if (sw_wlevel_hw) {
+ errors = run_best_hw_patterns(node, ddr_interface_num, rank_addr,
+ DBTRAIN_TEST, bad_bits);
+ errors &= bytes_todo; // keep only the ones we are still doing
+ } else {
+#if ENABLE_SW_WLEVEL_UTILIZATION
+ start_dram_dclk = BDK_CSR_READ(node, BDK_LMCX_DCLK_CNT(ddr_interface_num));
+ start_dram_ops = BDK_CSR_READ(node, BDK_LMCX_OPS_CNT(ddr_interface_num));
+#endif
+#if USE_ORIG_TEST_DRAM_BYTE
+ errors = test_dram_byte(node, ddr_interface_num, rank_addr, bytemask, bad_bits);
+#else
+ errors = dram_tuning_mem_xor(node, ddr_interface_num, rank_addr, bytemask, bad_bits);
+#endif
+#if ENABLE_SW_WLEVEL_UTILIZATION
+ stop_dram_dclk = BDK_CSR_READ(node, BDK_LMCX_DCLK_CNT(ddr_interface_num));
+ stop_dram_ops = BDK_CSR_READ(node, BDK_LMCX_OPS_CNT(ddr_interface_num));
+ sum_dram_dclk += stop_dram_dclk - start_dram_dclk;
+ sum_dram_ops += stop_dram_ops - start_dram_ops;
+#endif
+ }
+
+ VB_PRT(VBL_DEV2, "N%d.LMC%d.R%d: S/W write-leveling TEST: returned 0x%x\n",
+ node, ddr_interface_num, rankx, errors);
+
+ // remember, errors will not be returned for byte-lanes that have maxxed out...
+ if (errors == 0) {
+ no_errors_count++; // bump
+ if (no_errors_count > 1) // bypass check/update completely
+ continue; // to end of do-while
+ } else
+ no_errors_count = 0; // reset
+
+ // check errors by byte
+ for (byte = 0; byte < 9; ++byte) {
+ if (!(bytes_todo & (1 << byte)))
+ continue;
+
+ delay = byte_delay[byte];
+ if (errors & (1 << byte)) { // yes, an error in this byte lane
+ debug_print(" byte %d delay %2d Errors\n", byte, delay);
+ // since this byte had an error, we move to the next delay value, unless maxxed out
+ delay += 8; // incr by 8 to do only delay high-order bits
+ if (delay < 32) {
+ update_wlevel_rank_struct(&lmc_wlevel_rank, byte, delay);
+ debug_print(" byte %d delay %2d New\n", byte, delay);
+ byte_delay[byte] = delay;
+ } else { // reached max delay, maybe really done with this byte
+#if SWL_TRY_HWL_ALT
+ if (!measured_vref_flag && // consider an alt only for computed VREF and
+ (hwl_alts[rankx].hwl_alt_mask & (1 << byte))) // if an alt exists...
+ {
+ int bad_delay = delay & 0x6; // just orig low-3 bits
+ delay = hwl_alts[rankx].hwl_alt_delay[byte]; // yes, use it
+ hwl_alts[rankx].hwl_alt_mask &= ~(1 << byte); // clear that flag
+ update_wlevel_rank_struct(&lmc_wlevel_rank, byte, delay);
+ byte_delay[byte] = delay;
+ debug_print(" byte %d delay %2d ALTERNATE\n", byte, delay);
+ VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: SWL: Byte %d: %d FAIL, trying ALTERNATE %d\n",
+ node, ddr_interface_num, rankx, byte, bad_delay, delay);
+
+ } else
+#endif /* SWL_TRY_HWL_ALT */
+ {
+ unsigned bits_bad;
+ if (byte < 8) {
+ bytemask &= ~(0xffULL << (8*byte)); // test no longer, remove from byte mask
+ bits_bad = (unsigned)((bad_bits[0] >> (8 * byte)) & 0xffUL);
+ } else {
+ bits_bad = (unsigned)(bad_bits[1] & 0xffUL);
+ }
+ bytes_todo &= ~(1 << byte); // remove from bytes to do
+ byte_test_status[byte] = WL_ESTIMATED; // make sure this is set for this case
+ debug_print(" byte %d delay %2d Exhausted\n", byte, delay);
+ VB_PRT(vbl_local, "N%d.LMC%d.R%d: SWL: Byte %d (0x%02x): delay %d EXHAUSTED \n",
+ node, ddr_interface_num, rankx, byte, bits_bad, delay);
+ }
+ }
+ } else { // no error, stay with current delay, but keep testing it...
+ debug_print(" byte %d delay %2d Passed\n", byte, delay);
+ byte_test_status[byte] = WL_HARDWARE; // change status
+ }
+
+ } /* for (byte = 0; byte < 9; ++byte) */
+
+ } while (no_errors_count < WL_MIN_NO_ERRORS_COUNT);
+
+#if ENABLE_SW_WLEVEL_UTILIZATION
+ if (! sw_wlevel_hw) {
+ uint64_t percent_x10;
+ if (sum_dram_dclk == 0)
+ sum_dram_dclk = 1;
+ percent_x10 = sum_dram_ops * 1000 / sum_dram_dclk;
+ ddr_print("N%d.LMC%d.R%d: ops %lu, cycles %lu, used %lu.%lu%%\n",
+ node, ddr_interface_num, rankx, sum_dram_ops, sum_dram_dclk,
+ percent_x10 / 10, percent_x10 % 10);
+ }
+#endif
+ if (errors) {
+ debug_print("End WLEV_64 while loop: vref_value %d(0x%x), errors 0x%02x\n",
+ vref_value, vref_value, errors);
+ }
+ } // end parallel write-leveling block for delay high-order bits
+
+ if (sw_wlevel_hw) { // if we used HW-assist, we did the ECC byte when approp.
+ VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: HW-assist SWL - no ECC estimate!!!\n",
+ node, ddr_interface_num, rankx);
+ goto no_ecc_estimate;
+ }
+
+ if ((ddr_interface_bytemask & 0xff) == 0xff) {
+ if (use_ecc) {
+ int save_byte8 = lmc_wlevel_rank.s.byte8; // save original HW delay
+ byte_test_status[8] = WL_HARDWARE; /* say it is H/W delay value */
+
+ if ((save_byte8 != lmc_wlevel_rank.s.byte3) &&
+ (save_byte8 != lmc_wlevel_rank.s.byte4))
+ {
+ // try to calculate a workable delay using SW bytes 3 and 4 and HW byte 8
+ int test_byte8 = save_byte8;
+ int test_byte8_error;
+ int byte8_error = 0x1f;
+ int adder;
+ int avg_bytes = divide_nint(lmc_wlevel_rank.s.byte3+lmc_wlevel_rank.s.byte4, 2);
+ for (adder = 0; adder<= 32; adder+=8) {
+ test_byte8_error = _abs((adder+save_byte8) - avg_bytes);
+ if (test_byte8_error < byte8_error) {
+ byte8_error = test_byte8_error;
+ test_byte8 = save_byte8 + adder;
+ }
+ }
+
+#if SW_WL_CHECK_PATCH
+ // only do the check if we are not using measured VREF
+ if (!measured_vref_flag) {
+ test_byte8 &= ~1; /* Use only even settings, rounding down... */
+
+ // do validity check on the calculated ECC delay value
+ // this depends on the DIMM type
+ if (spd_rdimm) { // RDIMM
+ if (spd_dimm_type != 5) { // but not mini-RDIMM
+ // it can be > byte4, but should never be > byte3
+ if (test_byte8 > lmc_wlevel_rank.s.byte3) {
+ byte_test_status[8] = WL_ESTIMATED; /* say it is still estimated */
+ }
+ }
+ } else { // UDIMM
+ if ((test_byte8 < lmc_wlevel_rank.s.byte3) ||
+ (test_byte8 > lmc_wlevel_rank.s.byte4))
+ { // should never be outside the byte 3-4 range
+ byte_test_status[8] = WL_ESTIMATED; /* say it is still estimated */
+ }
+ }
+ /*
+ * Report whenever the calculation appears bad.
+ * This happens if some of the original values were off, or unexpected geometry
+ * from DIMM type, or custom circuitry (NIC225E, I am looking at you!).
+ * We will trust the calculated value, and depend on later testing to catch
+ * any instances when that value is truly bad.
+ */
+ if (byte_test_status[8] == WL_ESTIMATED) { // ESTIMATED means there may be an issue
+ ddr_print("N%d.LMC%d.R%d: SWL: (%cDIMM): calculated ECC delay unexpected (%d/%d/%d)\n",
+ node, ddr_interface_num, rankx, (spd_rdimm?'R':'U'),
+ lmc_wlevel_rank.s.byte4, test_byte8, lmc_wlevel_rank.s.byte3);
+ byte_test_status[8] = WL_HARDWARE;
+ }
+ }
+#endif /* SW_WL_CHECK_PATCH */
+ lmc_wlevel_rank.s.byte8 = test_byte8 & ~1; /* Use only even settings */
+ }
+
+ if (lmc_wlevel_rank.s.byte8 != save_byte8) {
+ /* Change the status if s/w adjusted the delay */
+ byte_test_status[8] = WL_SOFTWARE; /* Estimated delay */
+ }
+ } else {
+ byte_test_status[8] = WL_HARDWARE; /* H/W delay value */
+ lmc_wlevel_rank.s.byte8 = lmc_wlevel_rank.s.byte0; /* ECC is not used */
+ }
+ } else { /* if ((ddr_interface_bytemask & 0xff) == 0xff) */
+ if (use_ecc) {
+ /* Estimate the ECC byte delay */
+ lmc_wlevel_rank.s.byte4 |= (lmc_wlevel_rank.s.byte3 & 0x38); // add hi-order to b4
+ if ((lmc_wlevel_rank.s.byte4 & 0x06) < (lmc_wlevel_rank.s.byte3 & 0x06)) // orig b4 < orig b3
+ lmc_wlevel_rank.s.byte4 += 8; // must be next clock
+ } else {
+ lmc_wlevel_rank.s.byte4 = lmc_wlevel_rank.s.byte0; /* ECC is not used */
+ }
+ /* Change the status if s/w adjusted the delay */
+ byte_test_status[4] = WL_SOFTWARE; /* Estimated delay */
+ } /* if ((ddr_interface_bytemask & 0xff) == 0xff) */
+ } /* if (wlevel_bitmask_errors == 0) */
+
+ no_ecc_estimate:
+
+ bytes_failed = 0;
+ for (byte = 0; byte < 9; ++byte) {
+ /* Don't accumulate errors for untested bytes. */
+ if (!(ddr_interface_bytemask & (1 << byte)))
+ continue;
+ bytes_failed += (byte_test_status[byte] == WL_ESTIMATED);
+ }
+
+ /* Vref training loop is only used for DDR4 */
+ if (ddr_type != DDR4_DRAM)
+ break;
+
+ if (bytes_failed == 0) {
+ if (vref_values_count == 0) {
+ vref_values_start = vref_value;
+ }
+ ++vref_values_count;
+ if (vref_values_count > best_vref_values_count) {
+ best_vref_values_count = vref_values_count;
+ best_vref_values_start = vref_values_start;
+ debug_print("N%d.LMC%d.R%d: Vref Training (%2d) : 0x%02x <----- ???? -----> 0x%02x\n",
+ node, ddr_interface_num,
+ rankx, vref_value, best_vref_values_start,
+ best_vref_values_start+best_vref_values_count-1);
+ }
+ } else {
+ vref_values_count = 0;
+ debug_print("N%d.LMC%d.R%d: Vref Training (%2d) : failed\n",
+ node, ddr_interface_num,
+ rankx, vref_value);
+ }
+ } /* for (vref_value=0; vref_value<VREF_LIMIT; ++vref_value) */
+
+ /* Determine address of DRAM to test for pass 2 and final test of software write leveling. */
+ rank_addr = active_rank * (1ull << (pbank_lsb - bunk_enable + (interfaces/2)));
+ rank_addr |= (ddr_interface_num<<7); /* Map address into proper interface */
+ rank_addr = bdk_numa_get_address(node, rank_addr);
+ debug_print("N%d.LMC%d.R%d: Active Rank %d Address: 0x%lx\n",
+ node, ddr_interface_num, rankx, active_rank, rank_addr);
+
+ int errors;
+
+ if (bytes_failed) {
+
+#if !DISABLE_SW_WL_PASS_2
+
+ ddr_print("N%d.LMC%d.R%d: Starting SW Write-leveling pass 2\n",
+ node, ddr_interface_num, rankx);
+ sw_wl_rank_status = WL_SOFTWARE;
+
+ /* If previous s/w fixups failed then retry using s/w write-leveling. */
+ if (wlevel_bitmask_errors == 0) {
+ /* h/w succeeded but previous s/w fixups failed. So retry s/w. */
+ debug_print("N%d.LMC%d.R%d: Retrying software Write-Leveling.\n",
+ node, ddr_interface_num, rankx);
+ }
+
+ { // start parallel write-leveling block for delay low-order bits
+ int byte_delay[8];
+ int byte_passed[8];
+ uint64_t bytemask;
+ uint64_t bitmask;
+ int wl_offset;
+ int bytes_todo;
+
+ for (byte = 0; byte < 8; ++byte) {
+ byte_passed[byte] = 0;
+ }
+
+ bytes_todo = ddr_interface_bytemask;
+
+ for (wl_offset = sw_wlevel_offset; wl_offset >= 0; --wl_offset) {
+ debug_print("Starting wl_offset for-loop: %d\n", wl_offset);
+
+ bytemask = 0;
+
+ for (byte = 0; byte < 8; ++byte) {
+ byte_delay[byte] = 0;
+ if (!(bytes_todo & (1 << byte))) // this does not contain fully passed bytes
+ continue;
+
+ byte_passed[byte] = 0; // reset across passes if not fully passed
+ update_wlevel_rank_struct(&lmc_wlevel_rank, byte, 0); // all delays start at 0
+ bitmask = ((!ddr_interface_64b) && (byte == 4)) ? 0x0f: 0xff;
+ bytemask |= bitmask << (8*byte); // set the bytes bits in the bytemask
+ } /* for (byte = 0; byte < 8; ++byte) */
+
+ while (bytemask != 0) { // start a pass if there is any byte lane to test
+
+ debug_print("Starting bytemask while-loop: 0x%lx\n", bytemask);
+
+ // write this set of WL delays
+ DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx), lmc_wlevel_rank.u);
+ lmc_wlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx));
+
+ bdk_watchdog_poke();
+
+ // do the test
+ if (sw_wlevel_hw)
+ errors = run_best_hw_patterns(node, ddr_interface_num, rank_addr,
+ DBTRAIN_TEST, NULL);
+ else
+ errors = test_dram_byte(node, ddr_interface_num, rank_addr, bytemask, NULL);
+
+ debug_print("SWL pass 2: test_dram_byte returned 0x%x\n", errors);
+
+ // check errors by byte
+ for (byte = 0; byte < 8; ++byte) {
+ if (!(bytes_todo & (1 << byte)))
+ continue;
+
+ delay = byte_delay[byte];
+ if (errors & (1 << byte)) { // yes, an error
+ debug_print(" byte %d delay %2d Errors\n", byte, delay);
+ byte_passed[byte] = 0;
+ } else { // no error
+ byte_passed[byte] += 1;
+ if (byte_passed[byte] == (1 + wl_offset)) { /* Look for consecutive working settings */
+ debug_print(" byte %d delay %2d FULLY Passed\n", byte, delay);
+ if (wl_offset == 1) {
+ byte_test_status[byte] = WL_SOFTWARE;
+ } else if (wl_offset == 0) {
+ byte_test_status[byte] = WL_SOFTWARE1;
+ }
+ bytemask &= ~(0xffULL << (8*byte)); // test no longer, remove from byte mask this pass
+ bytes_todo &= ~(1 << byte); // remove completely from concern
+ continue; // on to the next byte, bypass delay updating!!
+ } else {
+ debug_print(" byte %d delay %2d Passed\n", byte, delay);
+ }
+ }
+ // error or no, here we move to the next delay value for this byte, unless done all delays
+ // only a byte that has "fully passed" will bypass around this,
+ delay += 2;
+ if (delay < 32) {
+ update_wlevel_rank_struct(&lmc_wlevel_rank, byte, delay);
+ debug_print(" byte %d delay %2d New\n", byte, delay);
+ byte_delay[byte] = delay;
+ } else {
+ // reached max delay, done with this byte
+ debug_print(" byte %d delay %2d Exhausted\n", byte, delay);
+ bytemask &= ~(0xffULL << (8*byte)); // test no longer, remove from byte mask this pass
+ }
+ } /* for (byte = 0; byte < 8; ++byte) */
+ debug_print("End of for-loop: bytemask 0x%lx\n", bytemask);
+
+ } /* while (bytemask != 0) */
+ } /* for (wl_offset = sw_wlevel_offset; wl_offset >= 0; --wl_offset) */
+
+ for (byte = 0; byte < 8; ++byte) {
+ // any bytes left in bytes_todo did not pass
+ if (bytes_todo & (1 << byte)) {
+ /* Last resort. Use Rlevel settings to estimate
+ Wlevel if software write-leveling fails */
+ debug_print("Using RLEVEL as WLEVEL estimate for byte %d\n", byte);
+ lmc_rlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx));
+ rlevel_to_wlevel(&lmc_rlevel_rank, &lmc_wlevel_rank, byte);
+ }
+ } /* for (byte = 0; byte < 8; ++byte) */
+
+ sw_wl_failed = (bytes_todo != 0);
+
+ } // end parallel write-leveling block for delay low-order bits
+
+ if (use_ecc) {
+ /* ECC byte has to be estimated. Take the average of the two surrounding bytes. */
+ int test_byte8 = divide_nint(lmc_wlevel_rank.s.byte3
+ + lmc_wlevel_rank.s.byte4
+ + 2 /* round-up*/ , 2);
+ lmc_wlevel_rank.s.byte8 = test_byte8 & ~1; /* Use only even settings */
+ byte_test_status[8] = WL_ESTIMATED; /* Estimated delay */
+ } else {
+ byte_test_status[8] = WL_HARDWARE; /* H/W delay value */
+ lmc_wlevel_rank.s.byte8 = lmc_wlevel_rank.s.byte0; /* ECC is not used */
+ }
+
+ /* Set delays for unused bytes to match byte 0. */
+ for (byte=0; byte<8; ++byte) {
+ if ((ddr_interface_bytemask & (1 << byte)))
+ continue;
+ update_wlevel_rank_struct(&lmc_wlevel_rank, byte,
+ lmc_wlevel_rank.s.byte0);
+ byte_test_status[byte] = WL_SOFTWARE;
+ }
+#else /* !DISABLE_SW_WL_PASS_2 */
+ // FIXME? the big hammer, did not even try SW WL pass2, assume only chip reset will help
+ ddr_print("N%d.LMC%d.R%d: S/W write-leveling pass 1 failed\n",
+ node, ddr_interface_num, rankx);
+ sw_wl_failed = 1;
+#endif /* !DISABLE_SW_WL_PASS_2 */
+
+ } else { /* if (bytes_failed) */
+
+ // SW WL pass 1 was OK, write the settings
+ DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx), lmc_wlevel_rank.u);
+ lmc_wlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx));
+
+#if SW_WL_CHECK_PATCH
+ // do validity check on the delay values by running the test 1 more time...
+ // FIXME: we really need to check the ECC byte setting here as well,
+ // so we need to enable ECC for this test!!!
+ // if there are any errors, claim SW WL failure
+ {
+ uint64_t datamask = (ddr_interface_64b) ? 0xffffffffffffffffULL : 0x00000000ffffffffULL;
+
+ // do the test
+ if (sw_wlevel_hw) {
+ errors = run_best_hw_patterns(node, ddr_interface_num, rank_addr,
+ DBTRAIN_TEST, NULL) & 0x0ff;
+ } else {
+#if USE_ORIG_TEST_DRAM_BYTE
+ errors = test_dram_byte(node, ddr_interface_num, rank_addr, datamask, NULL);
+#else
+ errors = dram_tuning_mem_xor(node, ddr_interface_num, rank_addr, datamask, NULL);
+#endif
+ }
+
+ if (errors) {
+ ddr_print("N%d.LMC%d.R%d: Wlevel Rank Final Test errors 0x%x\n",
+ node, ddr_interface_num, rankx, errors);
+ sw_wl_failed = 1;
+ }
+ }
+#endif /* SW_WL_CHECK_PATCH */
+
+ } /* if (bytes_failed) */
+
+ // FIXME? dump the WL settings, so we get more of a clue as to what happened where
+ ddr_print("N%d.LMC%d.R%d: Wlevel Rank %#4x, 0x%016lX : %2d%3s %2d%3s %2d%3s %2d%3s %2d%3s %2d%3s %2d%3s %2d%3s %2d%3s %s\n",
+ node, ddr_interface_num, rankx,
+ lmc_wlevel_rank.s.status,
+ lmc_wlevel_rank.u,
+ lmc_wlevel_rank.s.byte8, wl_status_strings[byte_test_status[8]],
+ lmc_wlevel_rank.s.byte7, wl_status_strings[byte_test_status[7]],
+ lmc_wlevel_rank.s.byte6, wl_status_strings[byte_test_status[6]],
+ lmc_wlevel_rank.s.byte5, wl_status_strings[byte_test_status[5]],
+ lmc_wlevel_rank.s.byte4, wl_status_strings[byte_test_status[4]],
+ lmc_wlevel_rank.s.byte3, wl_status_strings[byte_test_status[3]],
+ lmc_wlevel_rank.s.byte2, wl_status_strings[byte_test_status[2]],
+ lmc_wlevel_rank.s.byte1, wl_status_strings[byte_test_status[1]],
+ lmc_wlevel_rank.s.byte0, wl_status_strings[byte_test_status[0]],
+ (sw_wl_rank_status == WL_HARDWARE) ? "" : "(s)"
+ );
+
+ // finally, check for fatal conditions: either chip reset right here, or return error flag
+ if (((ddr_type == DDR4_DRAM) && (best_vref_values_count == 0)) || sw_wl_failed) {
+ if (!ddr_disable_chip_reset) { // do chip RESET
+ error_print("INFO: Short memory test indicates a retry is needed on N%d.LMC%d.R%d. Resetting node...\n",
+ node, ddr_interface_num, rankx);
+ bdk_wait_usec(500000);
+ bdk_reset_chip(node);
+ } else { // return error flag so LMC init can be retried...
+ ddr_print("INFO: Short memory test indicates a retry is needed on N%d.LMC%d.R%d. Restarting LMC init...\n",
+ node, ddr_interface_num, rankx);
+ return 0; // 0 indicates restart possible...
+ }
+ }
+
+ active_rank++;
+ } /* for (rankx = 0; rankx < dimm_count * 4; rankx++) */
+
+ // Finalize the write-leveling settings
+ for (rankx = 0; rankx < dimm_count * 4;rankx++) {
+ uint64_t value;
+ int parameter_set = 0;
+ if (!(rank_mask & (1 << rankx)))
+ continue;
+
+ lmc_wlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx));
+
+ if (bdk_is_platform(BDK_PLATFORM_ASIM)) {
+ parameter_set |= 1;
+
+ lmc_wlevel_rank.s.byte8 = 0;
+ lmc_wlevel_rank.s.byte7 = 0;
+ lmc_wlevel_rank.s.byte6 = 0;
+ lmc_wlevel_rank.s.byte5 = 0;
+ lmc_wlevel_rank.s.byte4 = 0;
+ lmc_wlevel_rank.s.byte3 = 0;
+ lmc_wlevel_rank.s.byte2 = 0;
+ lmc_wlevel_rank.s.byte1 = 0;
+ lmc_wlevel_rank.s.byte0 = 0;
+ }
+
+ for (i=0; i<9; ++i) {
+ if ((s = lookup_env_parameter("ddr%d_wlevel_rank%d_byte%d", ddr_interface_num, rankx, i)) != NULL) {
+ parameter_set |= 1;
+ value = strtoul(s, NULL, 0);
+
+ update_wlevel_rank_struct(&lmc_wlevel_rank, i, value);
+ }
+ }
+
+ if ((s = lookup_env_parameter_ull("ddr%d_wlevel_rank%d", ddr_interface_num, rankx)) != NULL) {
+ parameter_set |= 1;
+ value = strtoull(s, NULL, 0);
+ lmc_wlevel_rank.u = value;
+ }
+
+ if (parameter_set) {
+ DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx), lmc_wlevel_rank.u);
+ lmc_wlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx));
+ display_WL(node, ddr_interface_num, lmc_wlevel_rank, rankx);
+ }
+#if WLEXTRAS_PATCH
+ if ((rank_mask & 0x0F) != 0x0F) { // if there are unused entries to be filled
+ if (rankx < 3) {
+ debug_print("N%d.LMC%d.R%d: checking for WLEVEL_RANK unused entries.\n",
+ node, ddr_interface_num, rankx);
+ if (rankx == 0) { // if rank 0, write ranks 1 and 2 here if empty
+ if (!(rank_mask & (1<<1))) { // check that rank 1 is empty
+ DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, 1), lmc_wlevel_rank.u);
+ VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: writing WLEVEL_RANK unused entry R%d.\n",
+ node, ddr_interface_num, rankx, 1);
+ }
+ if (!(rank_mask & (1<<2))) { // check that rank 2 is empty
+ VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: writing WLEVEL_RANK unused entry R%d.\n",
+ node, ddr_interface_num, rankx, 2);
+ DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, 2), lmc_wlevel_rank.u);
+ }
+ }
+ // if rank 0, 1 or 2, write rank 3 here if empty
+ if (!(rank_mask & (1<<3))) { // check that rank 3 is empty
+ VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: writing WLEVEL_RANK unused entry R%d.\n",
+ node, ddr_interface_num, rankx, 3);
+ DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, 3), lmc_wlevel_rank.u);
+ }
+ }
+ }
+#endif /* WLEXTRAS_PATCH */
+
+ } /* for (rankx = 0; rankx < dimm_count * 4;rankx++) */
+
+ /* Restore the ECC configuration */
+ if (!sw_wlevel_hw_default) {
+ lmc_config.s.ecc_ena = use_ecc;
+ DRAM_CSR_WRITE(node, BDK_LMCX_CONFIG(ddr_interface_num), lmc_config.u);
+ }
+
+#if USE_L2_WAYS_LIMIT
+ /* Restore the l2 set configuration */
+ if ((s = lookup_env_parameter("limit_l2_ways")) != NULL) {
+ int ways = strtoul(s, NULL, 10);
+ limit_l2_ways(node, ways, 1);
+ } else {
+ limit_l2_ways(node, bdk_l2c_get_num_assoc(node), 0);
+ }
+#endif
+
+ } // End Software Write-Leveling block
+
+#if ENABLE_DISPLAY_MPR_PAGE
+ if (ddr_type == DDR4_DRAM) {
+ Display_MPR_Page(node, rank_mask, ddr_interface_num, dimm_count, 2);
+ Display_MPR_Page(node, rank_mask, ddr_interface_num, dimm_count, 0);
+ }
+#endif
+
+#if 1 // was #ifdef CAVIUM_ONLY
+ {
+ int i;
+ int setting[9];
+ bdk_lmcx_dll_ctl3_t ddr_dll_ctl3;
+ ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num));
+
+ for (i=0; i<9; ++i) {
+ SET_DDR_DLL_CTL3(dll90_byte_sel, ENCODE_DLL90_BYTE_SEL(i));
+ DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num), ddr_dll_ctl3.u);
+ BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num));
+ ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num));
+ setting[i] = GET_DDR_DLL_CTL3(dll90_setting);
+ debug_print("%d. LMC%d_DLL_CTL3[%d] = %016lx %d\n", i, ddr_interface_num,
+ GET_DDR_DLL_CTL3(dll90_byte_sel), ddr_dll_ctl3.u, setting[i]);
+ }
+
+ VB_PRT(VBL_DEV, "N%d.LMC%d: %-36s : %5d %5d %5d %5d %5d %5d %5d %5d %5d\n",
+ node, ddr_interface_num, "DLL90 Setting 8:0",
+ setting[8], setting[7], setting[6], setting[5], setting[4],
+ setting[3], setting[2], setting[1], setting[0]);
+
+ //BDK_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num), save_ddr_dll_ctl3.u);
+ }
+#endif /* CAVIUM_ONLY */
+
+ // any custom DLL read or write offsets, install them
+ // FIXME: no need to do these if we are going to auto-tune... ???
+
+ process_custom_dll_offsets(node, ddr_interface_num, "ddr_dll_write_offset",
+ custom_lmc_config->dll_write_offset, "ddr%d_dll_write_offset_byte%d", 1);
+ process_custom_dll_offsets(node, ddr_interface_num, "ddr_dll_read_offset",
+ custom_lmc_config->dll_read_offset, "ddr%d_dll_read_offset_byte%d", 2);
+
+ // we want to train write bit-deskew here...
+ if (! disable_deskew_training) {
+ if (enable_write_deskew) {
+ ddr_print("N%d.LMC%d: WRITE BIT-DESKEW feature training begins.\n",
+ node, ddr_interface_num);
+ Perform_Write_Deskew_Training(node, ddr_interface_num);
+ } /* if (enable_write_deskew) */
+ } /* if (! disable_deskew_training) */
+
+ /*
+ * 6.9.14 Final LMC Initialization
+ *
+ * Early LMC initialization, LMC write-leveling, and LMC read-leveling
+ * must be completed prior to starting this final LMC initialization.
+ *
+ * LMC hardware updates the LMC(0)_SLOT_CTL0, LMC(0)_SLOT_CTL1,
+ * LMC(0)_SLOT_CTL2 CSRs with minimum values based on the selected
+ * readleveling and write-leveling settings. Software should not write
+ * the final LMC(0)_SLOT_CTL0, LMC(0)_SLOT_CTL1, and LMC(0)_SLOT_CTL2
+ * values until after the final read-leveling and write-leveling settings
+ * are written.
+ *
+ * Software must ensure the LMC(0)_SLOT_CTL0, LMC(0)_SLOT_CTL1, and
+ * LMC(0)_SLOT_CTL2 CSR values are appropriate for this step. These CSRs
+ * select the minimum gaps between read operations and write operations
+ * of various types.
+ *
+ * Software must not reduce the values in these CSR fields below the
+ * values previously selected by the LMC hardware (during write-leveling
+ * and read-leveling steps above).
+ *
+ * All sections in this chapter may be used to derive proper settings for
+ * these registers.
+ *
+ * For minimal read latency, L2C_CTL[EF_ENA,EF_CNT] should be programmed
+ * properly. This should be done prior to the first read.
+ */
+
+#if ENABLE_SLOT_CTL_ACCESS
+ {
+ bdk_lmcx_slot_ctl0_t lmc_slot_ctl0;
+ bdk_lmcx_slot_ctl1_t lmc_slot_ctl1;
+ bdk_lmcx_slot_ctl2_t lmc_slot_ctl2;
+ bdk_lmcx_slot_ctl3_t lmc_slot_ctl3;
+
+ lmc_slot_ctl0.u = BDK_CSR_READ(node, BDK_LMCX_SLOT_CTL0(ddr_interface_num));
+ lmc_slot_ctl1.u = BDK_CSR_READ(node, BDK_LMCX_SLOT_CTL1(ddr_interface_num));
+ lmc_slot_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_SLOT_CTL2(ddr_interface_num));
+ lmc_slot_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_SLOT_CTL3(ddr_interface_num));
+
+ ddr_print("%-45s : 0x%016lx\n", "LMC_SLOT_CTL0", lmc_slot_ctl0.u);
+ ddr_print("%-45s : 0x%016lx\n", "LMC_SLOT_CTL1", lmc_slot_ctl1.u);
+ ddr_print("%-45s : 0x%016lx\n", "LMC_SLOT_CTL2", lmc_slot_ctl2.u);
+ ddr_print("%-45s : 0x%016lx\n", "LMC_SLOT_CTL3", lmc_slot_ctl3.u);
+
+ // for now, look only for SLOT_CTL1 envvar for override of contents
+ if ((s = lookup_env_parameter("ddr%d_slot_ctl1", ddr_interface_num)) != NULL) {
+ int slot_ctl1_incr = strtoul(s, NULL, 0);
+ // validate the value
+ if ((slot_ctl1_incr < 0) || (slot_ctl1_incr > 3)) { // allow 0 for printing only
+ error_print("ddr%d_slot_ctl1 illegal value (%d); must be 0-3\n",
+ ddr_interface_num, slot_ctl1_incr);
+ } else {
+
+#define INCR(csr, chip, field, incr) \
+ csr.chip.field = (csr.chip.field < (64 - incr)) ? (csr.chip.field + incr) : 63
+
+ // only print original when we are changing it!
+ if (slot_ctl1_incr)
+ ddr_print("%-45s : 0x%016lx\n", "LMC_SLOT_CTL1", lmc_slot_ctl1.u);
+
+ // modify all the SLOT_CTL1 fields by the increment, for now...
+ // but make sure the value will not overflow!!!
+ INCR(lmc_slot_ctl1, s, r2r_xrank_init, slot_ctl1_incr);
+ INCR(lmc_slot_ctl1, s, r2w_xrank_init, slot_ctl1_incr);
+ INCR(lmc_slot_ctl1, s, w2r_xrank_init, slot_ctl1_incr);
+ INCR(lmc_slot_ctl1, s, w2w_xrank_init, slot_ctl1_incr);
+ DRAM_CSR_WRITE(node, BDK_LMCX_SLOT_CTL1(ddr_interface_num), lmc_slot_ctl1.u);
+ lmc_slot_ctl1.u = BDK_CSR_READ(node, BDK_LMCX_SLOT_CTL1(ddr_interface_num));
+
+ // always print when we are changing it!
+ printf("%-45s : 0x%016lx\n", "LMC_SLOT_CTL1", lmc_slot_ctl1.u);
+ }
+ }
+ }
+#endif /* ENABLE_SLOT_CTL_ACCESS */
+ {
+ /* Clear any residual ECC errors */
+ int num_tads = 1;
+ int tad;
+
+ DRAM_CSR_WRITE(node, BDK_LMCX_INT(ddr_interface_num), -1ULL);
+ BDK_CSR_READ(node, BDK_LMCX_INT(ddr_interface_num));
+
+ for (tad=0; tad<num_tads; tad++)
+ DRAM_CSR_WRITE(node, BDK_L2C_TADX_INT_W1C(tad), BDK_CSR_READ(node, BDK_L2C_TADX_INT_W1C(tad)));
+
+ ddr_print("%-45s : 0x%08lx\n", "LMC_INT",
+ BDK_CSR_READ(node, BDK_LMCX_INT(ddr_interface_num)));
+
+#if 0
+ // NOTE: this must be done for pass 2.x
+ // must enable ECC interrupts to get ECC error info in LMCX_INT
+ if (! CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X)) { // added 81xx and 83xx
+ DRAM_CSR_WRITE(node, BDK_LMCX_INT_ENA_W1S(ddr_interface_num), -1ULL);
+ BDK_CSR_INIT(lmc_int_ena_w1s, node, BDK_LMCX_INT_ENA_W1S(ddr_interface_num));
+ ddr_print("%-45s : 0x%08lx\n", "LMC_INT_ENA_W1S", lmc_int_ena_w1s.u);
+ }
+#endif
+ }
+
+ // Now we can enable scrambling if desired...
+ {
+ bdk_lmcx_control_t lmc_control;
+ bdk_lmcx_scramble_cfg0_t lmc_scramble_cfg0;
+ bdk_lmcx_scramble_cfg1_t lmc_scramble_cfg1;
+ bdk_lmcx_scramble_cfg2_t lmc_scramble_cfg2;
+ bdk_lmcx_ns_ctl_t lmc_ns_ctl;
+
+ lmc_control.u = BDK_CSR_READ(node, BDK_LMCX_CONTROL(ddr_interface_num));
+ lmc_scramble_cfg0.u = BDK_CSR_READ(node, BDK_LMCX_SCRAMBLE_CFG0(ddr_interface_num));
+ lmc_scramble_cfg1.u = BDK_CSR_READ(node, BDK_LMCX_SCRAMBLE_CFG1(ddr_interface_num));
+ lmc_scramble_cfg2.u = BDK_CSR_READ(node, BDK_LMCX_SCRAMBLE_CFG2(ddr_interface_num));
+ lmc_ns_ctl.u = BDK_CSR_READ(node, BDK_LMCX_NS_CTL(ddr_interface_num));
+
+ /* Read the scramble setting from the config and see if we
+ need scrambling */
+ int use_scramble = bdk_config_get_int(BDK_CONFIG_DRAM_SCRAMBLE);
+ if (use_scramble == 2)
+ {
+ if (bdk_trust_get_level() >= BDK_TRUST_LEVEL_SIGNED)
+ use_scramble = 1;
+ else
+ use_scramble = 0;
+ }
+
+ /* Generate random values if scrambling is needed */
+ if (use_scramble)
+ {
+ lmc_scramble_cfg0.u = bdk_rng_get_random64();
+ lmc_scramble_cfg1.u = bdk_rng_get_random64();
+ lmc_scramble_cfg2.u = bdk_rng_get_random64();
+ lmc_ns_ctl.s.ns_scramble_dis = 0;
+ lmc_ns_ctl.s.adr_offset = 0;
+ lmc_control.s.scramble_ena = 1;
+ }
+
+ if ((s = lookup_env_parameter_ull("ddr_scramble_cfg0")) != NULL) {
+ lmc_scramble_cfg0.u = strtoull(s, NULL, 0);
+ lmc_control.s.scramble_ena = 1;
+ }
+ ddr_print("%-45s : 0x%016lx\n", "LMC_SCRAMBLE_CFG0", lmc_scramble_cfg0.u);
+
+ DRAM_CSR_WRITE(node, BDK_LMCX_SCRAMBLE_CFG0(ddr_interface_num), lmc_scramble_cfg0.u);
+
+ if ((s = lookup_env_parameter_ull("ddr_scramble_cfg1")) != NULL) {
+ lmc_scramble_cfg1.u = strtoull(s, NULL, 0);
+ lmc_control.s.scramble_ena = 1;
+ }
+ ddr_print("%-45s : 0x%016lx\n", "LMC_SCRAMBLE_CFG1", lmc_scramble_cfg1.u);
+ DRAM_CSR_WRITE(node, BDK_LMCX_SCRAMBLE_CFG1(ddr_interface_num), lmc_scramble_cfg1.u);
+
+ if ((s = lookup_env_parameter_ull("ddr_scramble_cfg2")) != NULL) {
+ lmc_scramble_cfg2.u = strtoull(s, NULL, 0);
+ lmc_control.s.scramble_ena = 1;
+ }
+ ddr_print("%-45s : 0x%016lx\n", "LMC_SCRAMBLE_CFG2", lmc_scramble_cfg2.u);
+ DRAM_CSR_WRITE(node, BDK_LMCX_SCRAMBLE_CFG2(ddr_interface_num), lmc_scramble_cfg2.u);
+
+ if ((s = lookup_env_parameter_ull("ddr_ns_ctl")) != NULL) {
+ lmc_ns_ctl.u = strtoull(s, NULL, 0);
+ }
+ ddr_print("%-45s : 0x%016lx\n", "LMC_NS_CTL", lmc_ns_ctl.u);
+ DRAM_CSR_WRITE(node, BDK_LMCX_NS_CTL(ddr_interface_num), lmc_ns_ctl.u);
+
+ DRAM_CSR_WRITE(node, BDK_LMCX_CONTROL(ddr_interface_num), lmc_control.u);
+
+ }
+
+ return(mem_size_mbytes);
+}
diff --git a/src/vendorcode/cavium/bdk/libdram/dram-init-ddr3.h b/src/vendorcode/cavium/bdk/libdram/dram-init-ddr3.h
new file mode 100644
index 0000000000..ba1060e5e0
--- /dev/null
+++ b/src/vendorcode/cavium/bdk/libdram/dram-init-ddr3.h
@@ -0,0 +1,97 @@
+/***********************license start***********************************
+* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights
+* reserved.
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are
+* met:
+*
+* * Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials provided
+* with the distribution.
+*
+* * Neither the name of Cavium Inc. nor the names of
+* its contributors may be used to endorse or promote products
+* derived from this software without specific prior written
+* permission.
+*
+* This Software, including technical data, may be subject to U.S. export
+* control laws, including the U.S. Export Administration Act and its
+* associated regulations, and may be subject to export or import
+* regulations in other countries.
+*
+* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
+* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR
+* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT
+* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY
+* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT
+* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES
+* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR
+* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT,
+* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK
+* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
+***********************license end**************************************/
+
+/**
+ * Function for DDR3 init. Internal use only.
+ */
+
+extern void perform_octeon3_ddr3_sequence(bdk_node_t node, int rank_mask,
+ int ddr_interface_num, int sequence);
+extern void perform_ddr_init_sequence(bdk_node_t node, int rank_mask,
+ int ddr_interface_num);
+extern int ddr_memory_preserved(bdk_node_t node);
+
+extern int init_octeon3_ddr3_interface(bdk_node_t node,
+ const ddr_configuration_t *ddr_configuration, uint32_t ddr_hertz,
+ uint32_t cpu_hertz, uint32_t ddr_ref_hertz, int board_type,
+ int board_rev_maj, int board_rev_min, int ddr_interface_num,
+ uint32_t ddr_interface_mask);
+
+extern void
+set_vref(bdk_node_t node, int ddr_interface_num, int rank,
+ int range, int value);
+
+typedef struct {
+ unsigned char *rodt_ohms;
+ unsigned char *rtt_nom_ohms;
+ unsigned char *rtt_nom_table;
+ unsigned char *rtt_wr_ohms;
+ unsigned char *dic_ohms;
+ short *drive_strength;
+ short *dqx_strength;
+} impedence_values_t;
+
+extern impedence_values_t ddr4_impedence_values;
+
+extern int
+compute_vref_value(bdk_node_t node, int ddr_interface_num,
+ int rankx, int dimm_count, int rank_count,
+ impedence_values_t *imp_values, int is_stacked_die);
+
+extern unsigned short
+load_dac_override(int node, int ddr_interface_num,
+ int dac_value, int byte);
+extern int
+read_DAC_DBI_settings(int node, int ddr_interface_num,
+ int dac_or_dbi, int *settings);
+extern void
+display_DAC_DBI_settings(int node, int ddr_interface_num, int dac_or_dbi,
+ int ecc_ena, int *settings, char *title);
+
+#define RODT_OHMS_COUNT 8
+#define RTT_NOM_OHMS_COUNT 8
+#define RTT_NOM_TABLE_COUNT 8
+#define RTT_WR_OHMS_COUNT 8
+#define DIC_OHMS_COUNT 3
+#define DRIVE_STRENGTH_COUNT 15
+
+extern uint64_t hertz_to_psecs(uint64_t hertz);
+extern uint64_t psecs_to_mts(uint64_t psecs);
+extern uint64_t mts_to_hertz(uint64_t mts);
+extern uint64_t pretty_psecs_to_mts(uint64_t psecs);
diff --git a/src/vendorcode/cavium/bdk/libdram/dram-internal.h b/src/vendorcode/cavium/bdk/libdram/dram-internal.h
new file mode 100644
index 0000000000..07fdbcbf54
--- /dev/null
+++ b/src/vendorcode/cavium/bdk/libdram/dram-internal.h
@@ -0,0 +1,201 @@
+#ifndef __DRAM_INTERNAL_H__
+#define __DRAM_INTERNAL_H__
+/***********************license start***********************************
+* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights
+* reserved.
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are
+* met:
+*
+* * Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials provided
+* with the distribution.
+*
+* * Neither the name of Cavium Inc. nor the names of
+* its contributors may be used to endorse or promote products
+* derived from this software without specific prior written
+* permission.
+*
+* This Software, including technical data, may be subject to U.S. export
+* control laws, including the U.S. Export Administration Act and its
+* associated regulations, and may be subject to export or import
+* regulations in other countries.
+*
+* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
+* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR
+* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT
+* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY
+* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT
+* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES
+* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR
+* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT,
+* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK
+* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
+***********************license end**************************************/
+
+/**
+ * This header defines all internal API for libdram. None
+ * of these functions should be called by users of the library.
+ * This is the only header that DRAM files should include
+ * from the libdram directory
+ */
+
+#include "libdram.h"
+#include "lib_octeon_shared.h"
+#include "dram-print.h"
+#include "dram-util.h"
+#include "dram-csr.h"
+#include "dram-env.h"
+#include "dram-gpio.h"
+#include "dram-spd.h"
+#include "dram-l2c.h"
+#include "dram-init-ddr3.h"
+
+#undef DRAM_CSR_WRITE_INLINE
+
+// define how many HW WL samples to take for majority voting
+// MUST BE odd!!
+// assume there should only be 2 possible values that will show up,
+// so treat ties as a problem!!!
+#define WLEVEL_LOOPS_DEFAULT 5 // NOTE: do not change this without checking the code!!!
+
+// define how many HW RL samples per rank to take
+// multiple samples will allow either:
+// 1. looking for the best sample score
+// 2. averaging the samples into a composite score
+// symbol PICK_BEST_RANK_SCORE_NOT_AVG is used to choose
+// (see dram-init-ddr3.c:
+#define RLEVEL_AVG_LOOPS_DEFAULT 3
+#define PICK_BEST_RANK_SCORE_NOT_AVG 1
+
+typedef struct {
+ int delay;
+ int loop_total;
+ int loop_count;
+ int best;
+ uint64_t bm;
+ int bmerrs;
+ int sqerrs;
+ int bestsq;
+} rlevel_byte_data_t;
+
+typedef struct {
+ uint64_t bm;
+ uint8_t mstart;
+ uint8_t width;
+ int errs;
+} rlevel_bitmask_t;
+
+#define SET_DDR_DLL_CTL3(field, expr) \
+ do { \
+ ddr_dll_ctl3.cn81xx.field = (expr); \
+ } while (0)
+
+#define ENCODE_DLL90_BYTE_SEL(byte_sel) ((byte_sel)+1)
+
+#define GET_DDR_DLL_CTL3(field) \
+ (ddr_dll_ctl3.cn81xx.field)
+
+
+#define RLEVEL_NONSEQUENTIAL_DELAY_ERROR 50
+#define RLEVEL_ADJACENT_DELAY_ERROR 30
+
+#define TWO_LMC_MASK 0x03
+#define FOUR_LMC_MASK 0x0f
+#define ONE_DIMM_MASK 0x01
+#define TWO_DIMM_MASK 0x03
+
+extern int initialize_ddr_clock(bdk_node_t node,
+ const ddr_configuration_t *ddr_configuration, uint32_t cpu_hertz,
+ uint32_t ddr_hertz, uint32_t ddr_ref_hertz, int ddr_interface_num,
+ uint32_t ddr_interface_mask);
+
+extern int test_dram_byte(bdk_node_t node, int ddr_interface_num, uint64_t p,
+ uint64_t bitmask, uint64_t *xor_data);
+extern int dram_tuning_mem_xor(bdk_node_t node, int ddr_interface_num, uint64_t p,
+ uint64_t bitmask, uint64_t *xor_data);
+
+// "mode" arg
+#define DBTRAIN_TEST 0
+#define DBTRAIN_DBI 1
+#define DBTRAIN_LFSR 2
+extern int test_dram_byte_hw(bdk_node_t node, int ddr_interface_num,
+ uint64_t p, int mode, uint64_t *xor_data);
+extern int run_best_hw_patterns(bdk_node_t node, int ddr_interface_num,
+ uint64_t p, int mode, uint64_t *xor_data);
+
+extern int get_dimm_part_number(char *buffer, bdk_node_t node,
+ const dimm_config_t *dimm_config,
+ int ddr_type);
+extern uint32_t get_dimm_serial_number(bdk_node_t node,
+ const dimm_config_t *dimm_config,
+ int ddr_type);
+
+extern int octeon_ddr_initialize(bdk_node_t node, uint32_t cpu_hertz,
+ uint32_t ddr_hertz, uint32_t ddr_ref_hertz, uint32_t ddr_interface_mask,
+ const ddr_configuration_t *ddr_configuration, uint32_t *measured_ddr_hertz,
+ int board_type, int board_rev_maj, int board_rev_min);
+
+extern uint64_t divide_nint(uint64_t dividend, uint64_t divisor);
+
+typedef enum {
+ DDR3_DRAM = 3,
+ DDR4_DRAM = 4,
+} ddr_type_t;
+
+static inline int get_ddr_type(bdk_node_t node, const dimm_config_t *dimm_config)
+{
+ int spd_ddr_type;
+
+#define DEVICE_TYPE DDR4_SPD_KEY_BYTE_DEVICE_TYPE // same for DDR3 and DDR4
+ spd_ddr_type = read_spd(node, dimm_config, DEVICE_TYPE);
+
+ debug_print("%s:%d spd_ddr_type=0x%02x\n", __FUNCTION__, __LINE__, spd_ddr_type);
+
+ /* we return only DDR4 or DDR3 */
+ return (spd_ddr_type == 0x0C) ? DDR4_DRAM : DDR3_DRAM;
+}
+
+static inline int get_dimm_ecc(bdk_node_t node, const dimm_config_t *dimm_config, int ddr_type)
+{
+#define BUS_WIDTH(t) (((t) == DDR4_DRAM) ? DDR4_SPD_MODULE_MEMORY_BUS_WIDTH : DDR3_SPD_MEMORY_BUS_WIDTH)
+
+ return !!(read_spd(node, dimm_config, BUS_WIDTH(ddr_type)) & 8);
+}
+
+static inline int get_dimm_module_type(bdk_node_t node, const dimm_config_t *dimm_config, int ddr_type)
+{
+#define MODULE_TYPE DDR4_SPD_KEY_BYTE_MODULE_TYPE // same for DDR3 and DDR4
+
+ return (read_spd(node, dimm_config, MODULE_TYPE) & 0x0F);
+}
+
+extern int common_ddr4_fixups(dram_config_t *cfg, uint32_t default_udimm_speed);
+
+#define DEFAULT_BEST_RANK_SCORE 9999999
+#define MAX_RANK_SCORE_LIMIT 99 // is this OK?
+
+unsigned short load_dll_offset(bdk_node_t node, int ddr_interface_num,
+ int dll_offset_mode, int byte_offset, int byte);
+void change_dll_offset_enable(bdk_node_t node, int ddr_interface_num, int change);
+
+extern int perform_dll_offset_tuning(bdk_node_t node, int dll_offset_mode, int do_tune);
+extern int perform_HW_dll_offset_tuning(bdk_node_t node, int dll_offset_mode, int bytelane);
+
+extern int perform_margin_write_voltage(bdk_node_t node);
+extern int perform_margin_read_voltage(bdk_node_t node);
+
+#define LMC_DDR3_RESET_ASSERT 0
+#define LMC_DDR3_RESET_DEASSERT 1
+extern void cn88xx_lmc_ddr3_reset(bdk_node_t node, int ddr_interface_num, int reset);
+extern void perform_lmc_reset(bdk_node_t node, int ddr_interface_num);
+extern void ddr4_mrw(bdk_node_t node, int ddr_interface_num, int rank,
+ int mr_wr_addr, int mr_wr_sel, int mr_wr_bg1);
+#endif /* __DRAM_INTERNAL_H__ */
+
diff --git a/src/vendorcode/cavium/bdk/libdram/dram-l2c.c b/src/vendorcode/cavium/bdk/libdram/dram-l2c.c
new file mode 100644
index 0000000000..11112955b2
--- /dev/null
+++ b/src/vendorcode/cavium/bdk/libdram/dram-l2c.c
@@ -0,0 +1,69 @@
+/***********************license start***********************************
+* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights
+* reserved.
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are
+* met:
+*
+* * Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials provided
+* with the distribution.
+*
+* * Neither the name of Cavium Inc. nor the names of
+* its contributors may be used to endorse or promote products
+* derived from this software without specific prior written
+* permission.
+*
+* This Software, including technical data, may be subject to U.S. export
+* control laws, including the U.S. Export Administration Act and its
+* associated regulations, and may be subject to export or import
+* regulations in other countries.
+*
+* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
+* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR
+* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT
+* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY
+* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT
+* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES
+* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR
+* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT,
+* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK
+* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
+***********************license end**************************************/
+#include <bdk.h>
+#include "dram-internal.h"
+
+int limit_l2_ways(bdk_node_t node, int ways, int verbose)
+{
+ int ways_max = bdk_l2c_get_num_assoc(node);
+ int ways_min = 0;
+ int errors = 0;
+
+ if (ways >= ways_min && ways <= ways_max)
+ {
+ uint32_t valid_mask = (0x1 << ways_max) - 1;
+ uint32_t mask = (valid_mask << ways) & valid_mask;
+ if (verbose)
+ printf("Limiting L2 to %d ways\n", ways);
+ for (int i = 0; i < (int)bdk_get_num_cores(node); i++)
+ errors += bdk_l2c_set_core_way_partition(node, i, mask);
+ errors += bdk_l2c_set_hw_way_partition(node, mask);
+ }
+ else
+ {
+ errors++;
+ printf("ERROR: invalid limit_l2_ways %d, must be between %d and %d\n",
+ ways, ways_min, ways_max);
+ }
+ if (errors)
+ puts("ERROR limiting L2 cache ways\n");
+
+ return errors;
+}
+
diff --git a/src/vendorcode/cavium/bdk/libdram/dram-l2c.h b/src/vendorcode/cavium/bdk/libdram/dram-l2c.h
new file mode 100644
index 0000000000..5d2840884b
--- /dev/null
+++ b/src/vendorcode/cavium/bdk/libdram/dram-l2c.h
@@ -0,0 +1,45 @@
+/***********************license start***********************************
+* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights
+* reserved.
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are
+* met:
+*
+* * Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials provided
+* with the distribution.
+*
+* * Neither the name of Cavium Inc. nor the names of
+* its contributors may be used to endorse or promote products
+* derived from this software without specific prior written
+* permission.
+*
+* This Software, including technical data, may be subject to U.S. export
+* control laws, including the U.S. Export Administration Act and its
+* associated regulations, and may be subject to export or import
+* regulations in other countries.
+*
+* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
+* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR
+* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT
+* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY
+* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT
+* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES
+* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR
+* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT,
+* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK
+* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
+***********************license end**************************************/
+
+/**
+ * Functions for controlling L2C. Internal use only.
+ */
+
+extern int limit_l2_ways(bdk_node_t node, int ways, int verbose);
+
diff --git a/src/vendorcode/cavium/bdk/libdram/dram-print.h b/src/vendorcode/cavium/bdk/libdram/dram-print.h
new file mode 100644
index 0000000000..94cdf92fbf
--- /dev/null
+++ b/src/vendorcode/cavium/bdk/libdram/dram-print.h
@@ -0,0 +1,86 @@
+/***********************license start***********************************
+* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights
+* reserved.
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are
+* met:
+*
+* * Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials provided
+* with the distribution.
+*
+* * Neither the name of Cavium Inc. nor the names of
+* its contributors may be used to endorse or promote products
+* derived from this software without specific prior written
+* permission.
+*
+* This Software, including technical data, may be subject to U.S. export
+* control laws, including the U.S. Export Administration Act and its
+* associated regulations, and may be subject to export or import
+* regulations in other countries.
+*
+* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
+* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR
+* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT
+* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY
+* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT
+* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES
+* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR
+* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT,
+* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK
+* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
+***********************license end**************************************/
+
+/**
+ * Functions for diplaying output in libdram. Internal use only.
+ */
+
+typedef enum {
+ // low 4 bits are verbosity level
+ VBL_OFF = 0, // use this only to init dram_verbosity
+ VBL_ALL = 0, // use this only in VBL_PR() to get printf equiv
+ VBL_NORM = 1,
+ VBL_FAE = 2,
+ VBL_TME = 3,
+ VBL_DEV = 4,
+ VBL_DEV2 = 5,
+ VBL_DEV3 = 6,
+ VBL_DEV4 = 7,
+ VBL_NONE = 15, // use this only in VBL_PR() to get no printing
+ // upper 4 bits are special verbosities
+ VBL_SEQ = 16,
+ VBL_CSRS = 32,
+ VBL_SPECIAL = 48,
+ // force at least 8 bits for enum
+ VBL_LAST = 255
+} dram_verbosity_t;
+
+extern dram_verbosity_t dram_verbosity;
+
+// "level" should be 1-7, or only one of the special bits
+// let the compiler optimize the test for verbosity
+#define is_verbosity_level(level) ((int)(dram_verbosity & 0x0f) >= (level))
+#define is_verbosity_special(level) (((int)(dram_verbosity & 0xf0) & (level)) != 0)
+#define dram_is_verbose(level) (((level) & VBL_SPECIAL) ? is_verbosity_special(level) : is_verbosity_level(level))
+
+#define VB_PRT(level, format, ...) \
+ do { \
+ if (dram_is_verbose(level)) \
+ printf(format, ##__VA_ARGS__); \
+ } while (0)
+
+#define ddr_print(format, ...) VB_PRT(VBL_NORM, format, ##__VA_ARGS__)
+
+#define error_print(format, ...) printf(format, ##__VA_ARGS__)
+
+#ifdef DEBUG_DEBUG_PRINT
+ #define debug_print(format, ...) printf(format, ##__VA_ARGS__)
+#else
+ #define debug_print(format, ...) do {} while (0)
+#endif
diff --git a/src/vendorcode/cavium/bdk/libdram/dram-spd.c b/src/vendorcode/cavium/bdk/libdram/dram-spd.c
new file mode 100644
index 0000000000..3717ca1109
--- /dev/null
+++ b/src/vendorcode/cavium/bdk/libdram/dram-spd.c
@@ -0,0 +1,583 @@
+/***********************license start***********************************
+* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights
+* reserved.
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are
+* met:
+*
+* * Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials provided
+* with the distribution.
+*
+* * Neither the name of Cavium Inc. nor the names of
+* its contributors may be used to endorse or promote products
+* derived from this software without specific prior written
+* permission.
+*
+* This Software, including technical data, may be subject to U.S. export
+* control laws, including the U.S. Export Administration Act and its
+* associated regulations, and may be subject to export or import
+* regulations in other countries.
+*
+* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
+* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR
+* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT
+* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY
+* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT
+* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES
+* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR
+* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT,
+* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK
+* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
+***********************license end**************************************/
+#include <bdk.h>
+#include <ctype.h>
+#include "dram-internal.h"
+
+/**
+ * Read the entire contents of a DIMM SPD and store it in the device tree. The
+ * current DRAM config is also updated, so future SPD accesses used the cached
+ * copy.
+ *
+ * @param node Node the DRAM config is for
+ * @param cfg Current DRAM config. Updated with SPD data
+ * @param lmc LMC to read DIMM for
+ * @param dimm DIMM slot for SPD to read
+ *
+ * @return Zero on success, negative on failure
+ */
+int read_entire_spd(bdk_node_t node, dram_config_t *cfg, int lmc, int dimm)
+{
+ /* If pointer to data is provided, use it, otherwise read from SPD over twsi */
+ if (cfg->config[lmc].dimm_config_table[dimm].spd_ptr)
+ return 0;
+ if (!cfg->config[lmc].dimm_config_table[dimm].spd_addr)
+ return -1;
+
+ /* Figure out how to access the SPD */
+ int spd_addr = cfg->config[lmc].dimm_config_table[dimm].spd_addr;
+ int bus = spd_addr >> 12;
+ int address = spd_addr & 0x7f;
+
+ /* Figure out the size we will read */
+ int64_t dev_type = bdk_twsix_read_ia(node, bus, address, DDR4_SPD_KEY_BYTE_DEVICE_TYPE, 1, 1);
+ if (dev_type < 0)
+ return -1; /* No DIMM */
+ int spd_size = (dev_type == 0x0c) ? 512 : 256;
+
+ /* Allocate storage */
+ uint32_t *spd_buf = malloc(spd_size);
+ if (!spd_buf)
+ return -1;
+ uint32_t *ptr = spd_buf;
+
+ for (int bank = 0; bank < (spd_size >> 8); bank++)
+ {
+ /* this should only happen for DDR4, which has a second bank of 256 bytes */
+ if (bank)
+ bdk_twsix_write_ia(node, bus, 0x36 | bank, 0, 2, 1, 0);
+ int bank_size = 256;
+ for (int i = 0; i < bank_size; i += 4)
+ {
+ int64_t data = bdk_twsix_read_ia(node, bus, address, i, 4, 1);
+ if (data < 0)
+ {
+ free(spd_buf);
+ bdk_error("Failed to read SPD data at 0x%x\n", i + (bank << 8));
+ /* Restore the bank to zero */
+ if (bank)
+ bdk_twsix_write_ia(node, bus, 0x36 | 0, 0, 2, 1, 0);
+ return -1;
+ }
+ else
+ *ptr++ = bdk_be32_to_cpu(data);
+ }
+ /* Restore the bank to zero */
+ if (bank)
+ bdk_twsix_write_ia(node, bus, 0x36 | 0, 0, 2, 1, 0);
+ }
+
+ /* Store the SPD in the device tree */
+ bdk_config_set_blob(spd_size, spd_buf, BDK_CONFIG_DDR_SPD_DATA, dimm, lmc, node);
+ cfg->config[lmc].dimm_config_table[dimm].spd_ptr = (void*)spd_buf;
+
+ return 0;
+}
+
+/* Read an DIMM SPD value, either using TWSI to read it from the DIMM, or
+ * from a provided array.
+ */
+int read_spd(bdk_node_t node, const dimm_config_t *dimm_config, int spd_field)
+{
+ /* If pointer to data is provided, use it, otherwise read from SPD over twsi */
+ if (dimm_config->spd_ptr)
+ return dimm_config->spd_ptr[spd_field];
+ else if (dimm_config->spd_addr)
+ {
+ int data;
+ int bus = dimm_config->spd_addr >> 12;
+ int address = dimm_config->spd_addr & 0x7f;
+
+ /* this should only happen for DDR4, which has a second bank of 256 bytes */
+ int bank = (spd_field >> 8) & 1;
+ if (bank) {
+ bdk_twsix_write_ia(node, bus, 0x36 | bank, 0, 2, 1, 0);
+ spd_field %= 256;
+ }
+
+ data = bdk_twsix_read_ia(node, bus, address, spd_field, 1, 1);
+
+ /* Restore the bank to zero */
+ if (bank) {
+ bdk_twsix_write_ia(node, bus, 0x36 | 0, 0, 2, 1, 0);
+ }
+
+ return data;
+ }
+ else
+ return -1;
+}
+
+static uint16_t ddr3_crc16(uint8_t *ptr, int count)
+{
+ /* From DDR3 spd specification */
+ int crc, i;
+ crc = 0;
+ while (--count >= 0)
+ {
+ crc = crc ^ (int)*ptr++ << 8;
+ for (i = 0; i < 8; ++i)
+ if (crc & 0x8000)
+ crc = crc << 1 ^ 0x1021;
+ else
+ crc = crc << 1;
+ }
+ return crc & 0xFFFF;
+}
+
+static int validate_spd_checksum_ddr3(bdk_node_t node, int twsi_addr, int silent)
+{
+ uint8_t spd_data[128];
+ int crc_bytes = 126;
+ uint16_t crc_comp;
+ int i;
+ int rv;
+ int ret = 1;
+ for (i = 0; i < 128; i++)
+ {
+ rv = bdk_twsix_read_ia(node, twsi_addr >> 12, twsi_addr & 0x7f, i, 1, 1);
+ if (rv < 0)
+ return 0; /* TWSI read error */
+ spd_data[i] = (uint8_t)rv;
+ }
+ /* Check byte 0 to see how many bytes checksum is over */
+ if (spd_data[0] & 0x80)
+ crc_bytes = 117;
+
+ crc_comp = ddr3_crc16(spd_data, crc_bytes);
+
+ if (spd_data[DDR3_SPD_CYCLICAL_REDUNDANCY_CODE_LOWER_NIBBLE] != (crc_comp & 0xff) ||
+ spd_data[DDR3_SPD_CYCLICAL_REDUNDANCY_CODE_UPPER_NIBBLE] != (crc_comp >> 8))
+ {
+ if (!silent) {
+ printf("DDR3 SPD CRC error, spd addr: 0x%x, calculated crc: 0x%04x, read crc: 0x%02x%02x\n",
+ twsi_addr, crc_comp,
+ spd_data[DDR3_SPD_CYCLICAL_REDUNDANCY_CODE_UPPER_NIBBLE],
+ spd_data[DDR3_SPD_CYCLICAL_REDUNDANCY_CODE_LOWER_NIBBLE]);
+ }
+ ret = 0;
+ }
+ return ret;
+}
+
+static int validate_spd_checksum(bdk_node_t node, int twsi_addr, int silent)
+{
+ int rv;
+
+ debug_print("Validating DIMM at address 0x%x\n", twsi_addr);
+
+ if (!twsi_addr) return 1; /* return OK if we are not doing real DIMMs */
+
+ /* Look up module type to determine if DDR3 or DDR4 */
+ rv = bdk_twsix_read_ia(node, twsi_addr >> 12, twsi_addr & 0x7f, 2, 1, 1);
+
+ if (rv >= 0xB && rv <= 0xC) /* this is DDR3 or DDR4, do same */
+ return validate_spd_checksum_ddr3(node, twsi_addr, silent);
+
+ if (!silent)
+ printf("Unrecognized DIMM type: 0x%x at spd address: 0x%x\n",
+ rv, twsi_addr);
+
+ return 0;
+}
+
+
+int validate_dimm(bdk_node_t node, const dimm_config_t *dimm_config)
+{
+ int spd_addr;
+
+ spd_addr = dimm_config->spd_addr;
+
+ debug_print("Validating dimm spd addr: 0x%02x spd ptr: %x\n",
+ spd_addr, dimm_config->spd_ptr);
+
+ // if the slot is not possible
+ if (!spd_addr && !dimm_config->spd_ptr)
+ return -1;
+
+ {
+ int val0, val1;
+ int ddr_type = get_ddr_type(node, dimm_config);
+
+ switch (ddr_type)
+ {
+ case DDR3_DRAM: /* DDR3 */
+ case DDR4_DRAM: /* DDR4 */
+
+ debug_print("Validating DDR%d DIMM\n", ((dimm_type >> 2) & 3) + 1);
+
+#define DENSITY_BANKS DDR4_SPD_DENSITY_BANKS // same for DDR3 and DDR4
+#define ROW_COL_BITS DDR4_SPD_ADDRESSING_ROW_COL_BITS // same for DDR3 and DDR4
+
+ val0 = read_spd(node, dimm_config, DENSITY_BANKS);
+ val1 = read_spd(node, dimm_config, ROW_COL_BITS);
+ if (val0 < 0 && val1 < 0) {
+ debug_print("Error reading SPD for DIMM\n");
+ return 0; /* Failed to read dimm */
+ }
+ if (val0 == 0xff && val1 == 0xff) {
+ ddr_print("Blank or unreadable SPD for DIMM\n");
+ return 0; /* Blank SPD or otherwise unreadable device */
+ }
+
+ /* Don't treat bad checksums as fatal. */
+ validate_spd_checksum(node, spd_addr, 0);
+ break;
+
+ case 0x00: /* Terminator detected. Fail silently. */
+ return 0;
+
+ default:
+ debug_print("Unknown DIMM type 0x%x for DIMM @ 0x%x\n",
+ dimm_type, dimm_config->spd_addr);
+ return 0; /* Failed to read dimm */
+ }
+ }
+
+ return 1;
+}
+
+int get_dimm_part_number(char *buffer, bdk_node_t node,
+ const dimm_config_t *dimm_config,
+ int ddr_type)
+{
+ int i;
+ int c;
+ int skipping = 1;
+ int strlen = 0;
+
+#define PART_LIMIT(t) (((t) == DDR4_DRAM) ? 19 : 18)
+#define PART_NUMBER(t) (((t) == DDR4_DRAM) ? DDR4_SPD_MODULE_PART_NUMBER : DDR3_SPD_MODULE_PART_NUMBER)
+
+ int limit = PART_LIMIT(ddr_type);
+ int offset = PART_NUMBER(ddr_type);
+
+ for (i = 0; i < limit; ++i) {
+
+ c = (read_spd(node, dimm_config, offset+i) & 0xff);
+ if (c == 0) // any null, we are done
+ break;
+
+ /* Skip leading spaces. */
+ if (skipping) {
+ if (isspace(c))
+ continue;
+ else
+ skipping = 0;
+ }
+
+ /* Put non-null non-leading-space-skipped char into buffer */
+ buffer[strlen] = c;
+ ++strlen;
+ }
+
+ if (strlen > 0) {
+ i = strlen - 1; // last char put into buf
+ while (i >= 0 && isspace((int)buffer[i])) { // still in buf and a space
+ --i;
+ --strlen;
+ }
+ }
+ buffer[strlen] = 0; /* Insure that the string is terminated */
+
+ return strlen;
+}
+
+uint32_t get_dimm_serial_number(bdk_node_t node, const dimm_config_t *dimm_config, int ddr_type)
+{
+ uint32_t serial_number = 0;
+ int offset;
+
+#define SERIAL_NUMBER(t) (((t) == DDR4_DRAM) ? DDR4_SPD_MODULE_SERIAL_NUMBER : DDR3_SPD_MODULE_SERIAL_NUMBER)
+
+ offset = SERIAL_NUMBER(ddr_type);
+
+ for (int i = 0, j = 24; i < 4; ++i, j -= 8) {
+ serial_number |= ((read_spd(node, dimm_config, offset + i) & 0xff) << j);
+ }
+
+ return serial_number;
+}
+
+static uint32_t get_dimm_checksum(bdk_node_t node, const dimm_config_t *dimm_config, int ddr_type)
+{
+ uint32_t spd_chksum;
+
+#define LOWER_NIBBLE(t) (((t) == DDR4_DRAM) ? DDR4_SPD_CYCLICAL_REDUNDANCY_CODE_LOWER_NIBBLE : DDR3_SPD_CYCLICAL_REDUNDANCY_CODE_LOWER_NIBBLE)
+#define UPPER_NIBBLE(t) (((t) == DDR4_DRAM) ? DDR4_SPD_CYCLICAL_REDUNDANCY_CODE_UPPER_NIBBLE : DDR3_SPD_CYCLICAL_REDUNDANCY_CODE_UPPER_NIBBLE)
+
+ spd_chksum = 0xff & read_spd(node, dimm_config, LOWER_NIBBLE(ddr_type));
+ spd_chksum |= ((0xff & read_spd(node, dimm_config, UPPER_NIBBLE(ddr_type))) << 8);
+
+ return spd_chksum;
+}
+
+static
+void report_common_dimm(bdk_node_t node, const dimm_config_t *dimm_config, int dimm,
+ const char **dimm_types, int ddr_type, char *volt_str,
+ int ddr_interface_num, int num_ranks, int dram_width, int dimm_size_mb)
+{
+ int spd_ecc;
+ unsigned spd_module_type;
+ uint32_t serial_number;
+ char part_number[21]; /* 20 bytes plus string terminator is big enough for either */
+ char *sn_str;
+
+ spd_module_type = get_dimm_module_type(node, dimm_config, ddr_type);
+ spd_ecc = get_dimm_ecc(node, dimm_config, ddr_type);
+
+ (void) get_dimm_part_number(part_number, node, dimm_config, ddr_type);
+
+ serial_number = get_dimm_serial_number(node, dimm_config, ddr_type);
+ if ((serial_number != 0) && (serial_number != 0xffffffff)) {
+ sn_str = "s/n";
+ } else {
+ serial_number = get_dimm_checksum(node, dimm_config, ddr_type);
+ sn_str = "chksum";
+ }
+
+ // FIXME: add output of DIMM rank/width, as in: 2Rx4, 1Rx8, etc
+ printf("N%d.LMC%d.DIMM%d: %d MB, DDR%d %s %dRx%d %s, p/n: %s, %s: %u, %s\n",
+ node, ddr_interface_num, dimm, dimm_size_mb, ddr_type,
+ dimm_types[spd_module_type], num_ranks, dram_width,
+ (spd_ecc ? "ECC" : "non-ECC"), part_number,
+ sn_str, serial_number, volt_str);
+}
+
+const char *ddr3_dimm_types[16] = {
+ /* 0000 */ "Undefined",
+ /* 0001 */ "RDIMM",
+ /* 0010 */ "UDIMM",
+ /* 0011 */ "SO-DIMM",
+ /* 0100 */ "Micro-DIMM",
+ /* 0101 */ "Mini-RDIMM",
+ /* 0110 */ "Mini-UDIMM",
+ /* 0111 */ "Mini-CDIMM",
+ /* 1000 */ "72b-SO-UDIMM",
+ /* 1001 */ "72b-SO-RDIMM",
+ /* 1010 */ "72b-SO-CDIMM"
+ /* 1011 */ "LRDIMM",
+ /* 1100 */ "16b-SO-DIMM",
+ /* 1101 */ "32b-SO-DIMM",
+ /* 1110 */ "Reserved",
+ /* 1111 */ "Reserved"
+};
+
+static
+void report_ddr3_dimm(bdk_node_t node, const dimm_config_t *dimm_config,
+ int dimm, int ddr_interface_num, int num_ranks,
+ int dram_width, int dimm_size_mb)
+{
+ int spd_voltage;
+ char *volt_str;
+
+ spd_voltage = read_spd(node, dimm_config, DDR3_SPD_NOMINAL_VOLTAGE);
+ if ((spd_voltage == 0) || (spd_voltage & 3))
+ volt_str = "1.5V";
+ if (spd_voltage & 2)
+ volt_str = "1.35V";
+ if (spd_voltage & 4)
+ volt_str = "1.2xV";
+
+ report_common_dimm(node, dimm_config, dimm, ddr3_dimm_types,
+ DDR3_DRAM, volt_str, ddr_interface_num,
+ num_ranks, dram_width, dimm_size_mb);
+}
+
+const char *ddr4_dimm_types[16] = {
+ /* 0000 */ "Extended",
+ /* 0001 */ "RDIMM",
+ /* 0010 */ "UDIMM",
+ /* 0011 */ "SO-DIMM",
+ /* 0100 */ "LRDIMM",
+ /* 0101 */ "Mini-RDIMM",
+ /* 0110 */ "Mini-UDIMM",
+ /* 0111 */ "Reserved",
+ /* 1000 */ "72b-SO-RDIMM",
+ /* 1001 */ "72b-SO-UDIMM",
+ /* 1010 */ "Reserved",
+ /* 1011 */ "Reserved",
+ /* 1100 */ "16b-SO-DIMM",
+ /* 1101 */ "32b-SO-DIMM",
+ /* 1110 */ "Reserved",
+ /* 1111 */ "Reserved"
+};
+
+static
+void report_ddr4_dimm(bdk_node_t node, const dimm_config_t *dimm_config,
+ int dimm, int ddr_interface_num, int num_ranks,
+ int dram_width, int dimm_size_mb)
+{
+ int spd_voltage;
+ char *volt_str;
+
+ spd_voltage = read_spd(node, dimm_config, DDR4_SPD_MODULE_NOMINAL_VOLTAGE);
+ if ((spd_voltage == 0x01) || (spd_voltage & 0x02))
+ volt_str = "1.2V";
+ if ((spd_voltage == 0x04) || (spd_voltage & 0x08))
+ volt_str = "TBD1 V";
+ if ((spd_voltage == 0x10) || (spd_voltage & 0x20))
+ volt_str = "TBD2 V";
+
+ report_common_dimm(node, dimm_config, dimm, ddr4_dimm_types,
+ DDR4_DRAM, volt_str, ddr_interface_num,
+ num_ranks, dram_width, dimm_size_mb);
+}
+
+void report_dimm(bdk_node_t node, const dimm_config_t *dimm_config,
+ int dimm, int ddr_interface_num, int num_ranks,
+ int dram_width, int dimm_size_mb)
+{
+ int ddr_type;
+
+ /* ddr_type only indicates DDR4 or DDR3 */
+ ddr_type = get_ddr_type(node, dimm_config);
+
+ if (ddr_type == DDR4_DRAM)
+ report_ddr4_dimm(node, dimm_config, dimm, ddr_interface_num,
+ num_ranks, dram_width, dimm_size_mb);
+ else
+ report_ddr3_dimm(node, dimm_config, dimm, ddr_interface_num,
+ num_ranks, dram_width, dimm_size_mb);
+}
+
+static int
+get_ddr4_spd_speed(bdk_node_t node, const dimm_config_t *dimm_config)
+{
+ int spdMTB = 125;
+ int spdFTB = 1;
+
+ int tCKAVGmin
+ = spdMTB * read_spd(node, dimm_config, DDR4_SPD_MINIMUM_CYCLE_TIME_TCKAVGMIN)
+ + spdFTB * (signed char) read_spd(node, dimm_config, DDR4_SPD_MIN_CYCLE_TIME_FINE_TCKAVGMIN);
+
+ return pretty_psecs_to_mts(tCKAVGmin);
+}
+
+static int
+get_ddr3_spd_speed(bdk_node_t node, const dimm_config_t *dimm_config)
+{
+ int spd_mtb_dividend = 0xff & read_spd(node, dimm_config, DDR3_SPD_MEDIUM_TIMEBASE_DIVIDEND);
+ int spd_mtb_divisor = 0xff & read_spd(node, dimm_config, DDR3_SPD_MEDIUM_TIMEBASE_DIVISOR);
+ int spd_tck_min = 0xff & read_spd(node, dimm_config, DDR3_SPD_MINIMUM_CYCLE_TIME_TCKMIN);
+
+ short ftb_Dividend = read_spd(node, dimm_config, DDR3_SPD_FINE_TIMEBASE_DIVIDEND_DIVISOR) >> 4;
+ short ftb_Divisor = read_spd(node, dimm_config, DDR3_SPD_FINE_TIMEBASE_DIVIDEND_DIVISOR) & 0xf;
+
+ ftb_Divisor = (ftb_Divisor == 0) ? 1 : ftb_Divisor; /* Make sure that it is not 0 */
+
+ int mtb_psec = spd_mtb_dividend * 1000 / spd_mtb_divisor;
+ int tCKmin = mtb_psec * spd_tck_min;
+ tCKmin += ftb_Dividend *
+ (signed char) read_spd(node, dimm_config, DDR3_SPD_MINIMUM_CYCLE_TIME_FINE_TCKMIN)
+ / ftb_Divisor;
+
+ return pretty_psecs_to_mts(tCKmin);
+}
+
+static int
+speed_bin_down(int speed)
+{
+ if (speed == 2133)
+ return 1866;
+ else if (speed == 1866)
+ return 1600;
+ else
+ return speed;
+}
+
+int
+dram_get_default_spd_speed(bdk_node_t node, const ddr_configuration_t *ddr_config)
+{
+ int lmc, dimm;
+ int speed, ret_speed = 0;
+ int ddr_type = get_ddr_type(node, &ddr_config[0].dimm_config_table[0]);
+ int dimm_speed[8], dimm_count = 0;
+ int dimms_per_lmc = 0;
+
+ for (lmc = 0; lmc < 4; lmc++) {
+ for (dimm = 0; dimm < DDR_CFG_T_MAX_DIMMS; dimm++) {
+ const dimm_config_t *dimm_config = &ddr_config[lmc].dimm_config_table[dimm];
+ if (/*dimm_config->spd_addr ||*/ dimm_config->spd_ptr)
+ {
+ speed = (ddr_type == DDR4_DRAM)
+ ? get_ddr4_spd_speed(node, dimm_config)
+ : get_ddr3_spd_speed(node, dimm_config);
+ //printf("N%d.LMC%d.DIMM%d: SPD speed %d\n", node, lmc, dimm, speed);
+ dimm_speed[dimm_count] = speed;
+ dimm_count++;
+ if (lmc == 0)
+ dimms_per_lmc++;
+ }
+ }
+ }
+
+ // all DIMMs must be same speed
+ speed = dimm_speed[0];
+ for (dimm = 1; dimm < dimm_count; dimm++) {
+ if (dimm_speed[dimm] != speed) {
+ ret_speed = -1;
+ goto finish_up;
+ }
+ }
+
+ // if 2400 or greater, use 2133
+ if (speed >= 2400)
+ speed = 2133;
+
+ // use next speed down if 2DPC...
+ if (dimms_per_lmc > 1)
+ speed = speed_bin_down(speed);
+
+ // Update the in memory config to match the automatically calculated speed
+ bdk_config_set_int(speed, BDK_CONFIG_DDR_SPEED, node);
+
+ // do filtering for our jittery PLL
+ if (speed == 2133)
+ speed = 2100;
+ else if (speed == 1866)
+ speed = 1880;
+
+ // OK, return what we have...
+ ret_speed = mts_to_hertz(speed);
+
+ finish_up:
+ //printf("N%d: Returning default SPD speed %d\n", node, ret_speed);
+ return ret_speed;
+}
diff --git a/src/vendorcode/cavium/bdk/libdram/dram-spd.h b/src/vendorcode/cavium/bdk/libdram/dram-spd.h
new file mode 100644
index 0000000000..df229f4959
--- /dev/null
+++ b/src/vendorcode/cavium/bdk/libdram/dram-spd.h
@@ -0,0 +1,166 @@
+/***********************license start***********************************
+* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights
+* reserved.
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are
+* met:
+*
+* * Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials provided
+* with the distribution.
+*
+* * Neither the name of Cavium Inc. nor the names of
+* its contributors may be used to endorse or promote products
+* derived from this software without specific prior written
+* permission.
+*
+* This Software, including technical data, may be subject to U.S. export
+* control laws, including the U.S. Export Administration Act and its
+* associated regulations, and may be subject to export or import
+* regulations in other countries.
+*
+* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
+* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR
+* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT
+* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY
+* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT
+* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES
+* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR
+* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT,
+* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK
+* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
+***********************license end**************************************/
+
+/**
+ * Functions, enumarations, and structures related to DIMM SPDs.
+ * Everything in this file is internal to libdram.
+ */
+
+/* data field addresses in the DDR3 SPD eeprom */
+typedef enum ddr3_spd_addrs {
+ DDR3_SPD_BYTES_PROGRAMMED = 0,
+ DDR3_SPD_REVISION = 1,
+ DDR3_SPD_KEY_BYTE_DEVICE_TYPE = 2,
+ DDR3_SPD_KEY_BYTE_MODULE_TYPE = 3,
+ DDR3_SPD_DENSITY_BANKS = 4,
+ DDR3_SPD_ADDRESSING_ROW_COL_BITS = 5,
+ DDR3_SPD_NOMINAL_VOLTAGE = 6,
+ DDR3_SPD_MODULE_ORGANIZATION = 7,
+ DDR3_SPD_MEMORY_BUS_WIDTH = 8,
+ DDR3_SPD_FINE_TIMEBASE_DIVIDEND_DIVISOR = 9,
+ DDR3_SPD_MEDIUM_TIMEBASE_DIVIDEND = 10,
+ DDR3_SPD_MEDIUM_TIMEBASE_DIVISOR = 11,
+ DDR3_SPD_MINIMUM_CYCLE_TIME_TCKMIN = 12,
+ DDR3_SPD_CAS_LATENCIES_LSB = 14,
+ DDR3_SPD_CAS_LATENCIES_MSB = 15,
+ DDR3_SPD_MIN_CAS_LATENCY_TAAMIN = 16,
+ DDR3_SPD_MIN_WRITE_RECOVERY_TWRMIN = 17,
+ DDR3_SPD_MIN_RAS_CAS_DELAY_TRCDMIN = 18,
+ DDR3_SPD_MIN_ROW_ACTIVE_DELAY_TRRDMIN = 19,
+ DDR3_SPD_MIN_ROW_PRECHARGE_DELAY_TRPMIN = 20,
+ DDR3_SPD_UPPER_NIBBLES_TRAS_TRC = 21,
+ DDR3_SPD_MIN_ACTIVE_PRECHARGE_LSB_TRASMIN = 22,
+ DDR3_SPD_MIN_ACTIVE_REFRESH_LSB_TRCMIN = 23,
+ DDR3_SPD_MIN_REFRESH_RECOVERY_LSB_TRFCMIN = 24,
+ DDR3_SPD_MIN_REFRESH_RECOVERY_MSB_TRFCMIN = 25,
+ DDR3_SPD_MIN_INTERNAL_WRITE_READ_CMD_TWTRMIN = 26,
+ DDR3_SPD_MIN_INTERNAL_READ_PRECHARGE_CMD_TRTPMIN = 27,
+ DDR3_SPD_UPPER_NIBBLE_TFAW = 28,
+ DDR3_SPD_MIN_FOUR_ACTIVE_WINDOW_TFAWMIN = 29,
+ DDR3_SPD_MINIMUM_CYCLE_TIME_FINE_TCKMIN = 34,
+ DDR3_SPD_MIN_CAS_LATENCY_FINE_TAAMIN = 35,
+ DDR3_SPD_MIN_RAS_CAS_DELAY_FINE_TRCDMIN = 36,
+ DDR3_SPD_MIN_ROW_PRECHARGE_DELAY_FINE_TRPMIN = 37,
+ DDR3_SPD_MIN_ACTIVE_REFRESH_LSB_FINE_TRCMIN = 38,
+ DDR3_SPD_ADDRESS_MAPPING = 63,
+ DDR3_SPD_MODULE_SERIAL_NUMBER = 122,
+ DDR3_SPD_CYCLICAL_REDUNDANCY_CODE_LOWER_NIBBLE = 126,
+ DDR3_SPD_CYCLICAL_REDUNDANCY_CODE_UPPER_NIBBLE = 127,
+ DDR3_SPD_MODULE_PART_NUMBER = 128
+} ddr3_spd_addr_t;
+
+/* data field addresses in the DDR4 SPD eeprom */
+typedef enum ddr4_spd_addrs {
+ DDR4_SPD_BYTES_PROGRAMMED = 0,
+ DDR4_SPD_REVISION = 1,
+ DDR4_SPD_KEY_BYTE_DEVICE_TYPE = 2,
+ DDR4_SPD_KEY_BYTE_MODULE_TYPE = 3,
+ DDR4_SPD_DENSITY_BANKS = 4,
+ DDR4_SPD_ADDRESSING_ROW_COL_BITS = 5,
+ DDR4_SPD_PACKAGE_TYPE = 6,
+ DDR4_SPD_OPTIONAL_FEATURES = 7,
+ DDR4_SPD_THERMAL_REFRESH_OPTIONS = 8,
+ DDR4_SPD_OTHER_OPTIONAL_FEATURES = 9,
+ DDR4_SPD_SECONDARY_PACKAGE_TYPE = 10,
+ DDR4_SPD_MODULE_NOMINAL_VOLTAGE = 11,
+ DDR4_SPD_MODULE_ORGANIZATION = 12,
+ DDR4_SPD_MODULE_MEMORY_BUS_WIDTH = 13,
+ DDR4_SPD_MODULE_THERMAL_SENSOR = 14,
+ DDR4_SPD_RESERVED_BYTE15 = 15,
+ DDR4_SPD_RESERVED_BYTE16 = 16,
+ DDR4_SPD_TIMEBASES = 17,
+ DDR4_SPD_MINIMUM_CYCLE_TIME_TCKAVGMIN = 18,
+ DDR4_SPD_MAXIMUM_CYCLE_TIME_TCKAVGMAX = 19,
+ DDR4_SPD_CAS_LATENCIES_BYTE0 = 20,
+ DDR4_SPD_CAS_LATENCIES_BYTE1 = 21,
+ DDR4_SPD_CAS_LATENCIES_BYTE2 = 22,
+ DDR4_SPD_CAS_LATENCIES_BYTE3 = 23,
+ DDR4_SPD_MIN_CAS_LATENCY_TAAMIN = 24,
+ DDR4_SPD_MIN_RAS_CAS_DELAY_TRCDMIN = 25,
+ DDR4_SPD_MIN_ROW_PRECHARGE_DELAY_TRPMIN = 26,
+ DDR4_SPD_UPPER_NIBBLES_TRAS_TRC = 27,
+ DDR4_SPD_MIN_ACTIVE_PRECHARGE_LSB_TRASMIN = 28,
+ DDR4_SPD_MIN_ACTIVE_REFRESH_LSB_TRCMIN = 29,
+ DDR4_SPD_MIN_REFRESH_RECOVERY_LSB_TRFC1MIN = 30,
+ DDR4_SPD_MIN_REFRESH_RECOVERY_MSB_TRFC1MIN = 31,
+ DDR4_SPD_MIN_REFRESH_RECOVERY_LSB_TRFC2MIN = 32,
+ DDR4_SPD_MIN_REFRESH_RECOVERY_MSB_TRFC2MIN = 33,
+ DDR4_SPD_MIN_REFRESH_RECOVERY_LSB_TRFC4MIN = 34,
+ DDR4_SPD_MIN_REFRESH_RECOVERY_MSB_TRFC4MIN = 35,
+ DDR4_SPD_MIN_FOUR_ACTIVE_WINDOW_MSN_TFAWMIN = 36,
+ DDR4_SPD_MIN_FOUR_ACTIVE_WINDOW_LSB_TFAWMIN = 37,
+ DDR4_SPD_MIN_ROW_ACTIVE_DELAY_SAME_TRRD_SMIN = 38,
+ DDR4_SPD_MIN_ROW_ACTIVE_DELAY_DIFF_TRRD_LMIN = 39,
+ DDR4_SPD_MIN_CAS_TO_CAS_DELAY_TCCD_LMIN = 40,
+ DDR4_SPD_MIN_CAS_TO_CAS_DELAY_FINE_TCCD_LMIN = 117,
+ DDR4_SPD_MIN_ACT_TO_ACT_DELAY_SAME_FINE_TRRD_LMIN = 118,
+ DDR4_SPD_MIN_ACT_TO_ACT_DELAY_DIFF_FINE_TRRD_SMIN = 119,
+ DDR4_SPD_MIN_ACT_TO_ACT_REFRESH_DELAY_FINE_TRCMIN = 120,
+ DDR4_SPD_MIN_ROW_PRECHARGE_DELAY_FINE_TRPMIN = 121,
+ DDR4_SPD_MIN_RAS_TO_CAS_DELAY_FINE_TRCDMIN = 122,
+ DDR4_SPD_MIN_CAS_LATENCY_FINE_TAAMIN = 123,
+ DDR4_SPD_MAX_CYCLE_TIME_FINE_TCKAVGMAX = 124,
+ DDR4_SPD_MIN_CYCLE_TIME_FINE_TCKAVGMIN = 125,
+ DDR4_SPD_CYCLICAL_REDUNDANCY_CODE_LOWER_NIBBLE = 126,
+ DDR4_SPD_CYCLICAL_REDUNDANCY_CODE_UPPER_NIBBLE = 127,
+ DDR4_SPD_REFERENCE_RAW_CARD = 130,
+ DDR4_SPD_UDIMM_ADDR_MAPPING_FROM_EDGE = 131,
+ DDR4_SPD_REGISTER_MANUFACTURER_ID_LSB = 133,
+ DDR4_SPD_REGISTER_MANUFACTURER_ID_MSB = 134,
+ DDR4_SPD_REGISTER_REVISION_NUMBER = 135,
+ DDR4_SPD_RDIMM_ADDR_MAPPING_FROM_REGISTER_TO_DRAM = 136,
+ DDR4_SPD_RDIMM_REGISTER_DRIVE_STRENGTH_CTL = 137,
+ DDR4_SPD_RDIMM_REGISTER_DRIVE_STRENGTH_CK = 138,
+ DDR4_SPD_MODULE_SERIAL_NUMBER = 325,
+ DDR4_SPD_MODULE_PART_NUMBER = 329
+} ddr4_spd_addr_t;
+
+extern int read_entire_spd(bdk_node_t node, dram_config_t *cfg, int lmc, int dimm);
+extern int read_spd(bdk_node_t node, const dimm_config_t *dimm_config, int spd_field);
+
+extern int validate_dimm(bdk_node_t node, const dimm_config_t *dimm_config);
+
+extern void report_dimm(bdk_node_t node, const dimm_config_t *dimm_config,
+ int dimm, int ddr_interface_num, int num_ranks,
+ int dram_width, int dimm_size_mb);
+
+extern int dram_get_default_spd_speed(bdk_node_t node, const ddr_configuration_t *ddr_config);
+
+extern const char *ddr3_dimm_types[];
+extern const char *ddr4_dimm_types[];
diff --git a/src/vendorcode/cavium/bdk/libdram/dram-tune-ddr3.c b/src/vendorcode/cavium/bdk/libdram/dram-tune-ddr3.c
new file mode 100644
index 0000000000..e0e9d4442c
--- /dev/null
+++ b/src/vendorcode/cavium/bdk/libdram/dram-tune-ddr3.c
@@ -0,0 +1,2012 @@
+/***********************license start***********************************
+* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights
+* reserved.
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are
+* met:
+*
+* * Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials provided
+* with the distribution.
+*
+* * Neither the name of Cavium Inc. nor the names of
+* its contributors may be used to endorse or promote products
+* derived from this software without specific prior written
+* permission.
+*
+* This Software, including technical data, may be subject to U.S. export
+* control laws, including the U.S. Export Administration Act and its
+* associated regulations, and may be subject to export or import
+* regulations in other countries.
+*
+* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
+* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR
+* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT
+* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY
+* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT
+* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES
+* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR
+* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT,
+* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK
+* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
+***********************license end**************************************/
+#include <bdk.h>
+#include "dram-internal.h"
+
+// if enhanced verbosity levels are defined, use them
+#if defined(VB_PRT)
+#define ddr_print2(format, ...) VB_PRT(VBL_FAE, format, ##__VA_ARGS__)
+#define ddr_print3(format, ...) VB_PRT(VBL_TME, format, ##__VA_ARGS__)
+#define ddr_print4(format, ...) VB_PRT(VBL_DEV, format, ##__VA_ARGS__)
+#define ddr_print5(format, ...) VB_PRT(VBL_DEV3, format, ##__VA_ARGS__)
+#else
+#define ddr_print2 ddr_print
+#define ddr_print4 ddr_print
+#define ddr_print5 ddr_print
+#endif
+
+static int64_t test_dram_byte_threads_done;
+static uint64_t test_dram_byte_threads_errs;
+static uint64_t test_dram_byte_lmc_errs[4];
+
+#if 0
+/*
+ * Suggested testing patterns.
+ */
+static const uint64_t test_pattern_2[] = {
+ 0xFFFFFFFFFFFFFFFFULL,
+ 0xAAAAAAAAAAAAAAAAULL,
+ 0xFFFFFFFFFFFFFFFFULL,
+ 0xAAAAAAAAAAAAAAAAULL,
+ 0x5555555555555555ULL,
+ 0xAAAAAAAAAAAAAAAAULL,
+ 0xFFFFFFFFFFFFFFFFULL,
+ 0xAAAAAAAAAAAAAAAAULL,
+ 0xFFFFFFFFFFFFFFFFULL,
+ 0x5555555555555555ULL,
+ 0xFFFFFFFFFFFFFFFFULL,
+ 0x5555555555555555ULL,
+ 0xAAAAAAAAAAAAAAAAULL,
+ 0x5555555555555555ULL,
+ 0xFFFFFFFFFFFFFFFFULL,
+ 0x5555555555555555ULL,
+};
+ /*
+ * or possibly
+ */
+static const uint64_t test_pattern_3[] = {
+ 0xFDFDFDFDFDFDFDFDULL,
+ 0x8787878787878787ULL,
+ 0xFEFEFEFEFEFEFEFEULL,
+ 0xC3C3C3C3C3C3C3C3ULL,
+ 0x7F7F7F7F7F7F7F7FULL,
+ 0xE1E1E1E1E1E1E1E1ULL,
+ 0xBFBFBFBFBFBFBFBFULL,
+ 0xF0F0F0F0F0F0F0F0ULL,
+ 0xDFDFDFDFDFDFDFDFULL,
+ 0x7878787878787878ULL,
+ 0xEFEFEFEFEFEFEFEFULL,
+ 0x3C3C3C3C3C3C3C3CULL,
+ 0xF7F7F7F7F7F7F7F7ULL,
+ 0x1E1E1E1E1E1E1E1EULL,
+ 0xFBFBFBFBFBFBFBFBULL,
+ 0x0F0F0F0F0F0F0F0FULL,
+};
+
+static const uint64_t test_pattern_1[] = {
+ 0xAAAAAAAAAAAAAAAAULL,
+ 0x5555555555555555ULL,
+ 0xAAAAAAAAAAAAAAAAULL,
+ 0x5555555555555555ULL,
+ 0xAAAAAAAAAAAAAAAAULL,
+ 0x5555555555555555ULL,
+ 0xAAAAAAAAAAAAAAAAULL,
+ 0x5555555555555555ULL,
+ 0xAAAAAAAAAAAAAAAAULL,
+ 0x5555555555555555ULL,
+ 0xAAAAAAAAAAAAAAAAULL,
+ 0x5555555555555555ULL,
+ 0xAAAAAAAAAAAAAAAAULL,
+ 0x5555555555555555ULL,
+ 0xAAAAAAAAAAAAAAAAULL,
+ 0x5555555555555555ULL,
+#if 0 // only need a cacheline size
+ 0xAAAAAAAAAAAAAAAAULL,
+ 0x5555555555555555ULL,
+ 0xAAAAAAAAAAAAAAAAULL,
+ 0x5555555555555555ULL,
+ 0xAAAAAAAAAAAAAAAAULL,
+ 0x5555555555555555ULL,
+ 0xAAAAAAAAAAAAAAAAULL,
+ 0x5555555555555555ULL,
+ 0xAAAAAAAAAAAAAAAAULL,
+ 0x5555555555555555ULL,
+ 0xAAAAAAAAAAAAAAAAULL,
+ 0x5555555555555555ULL,
+ 0xAAAAAAAAAAAAAAAAULL,
+ 0x5555555555555555ULL,
+ 0xAAAAAAAAAAAAAAAAULL,
+ 0x5555555555555555ULL,
+#endif
+};
+
+// setup default for test pattern array
+static const uint64_t *dram_tune_test_pattern = test_pattern_1;
+#endif
+
+// set this to 1 to shorten the testing to exit when all byte lanes have errors
+// having this at 0 forces the testing to take place over the entire range every iteration,
+// hopefully ensuring an even load on the memory subsystem
+#define EXIT_WHEN_ALL_LANES_HAVE_ERRORS 0
+
+#define DEFAULT_TEST_BURSTS 5 // FIXME: this is what works so far...// FIXME: was 7
+int dram_tune_use_bursts = DEFAULT_TEST_BURSTS;
+
+// dram_tune_rank_offset is used to offset the second area used in test_dram_mem_xor.
+//
+// If only a single-rank DIMM, the offset will be 256MB from the start of the first area,
+// which is more than enough for the restricted looping/address range actually tested...
+//
+// If a 2-rank DIMM, the offset will be the size of a rank's address space, so the effect
+// will be to have the first and second areas in different ranks on the same DIMM.
+//
+// So, we default this to single-rank, and it will be overridden when 2-ranks are detected.
+//
+
+// FIXME: ASSUME that we have DIMMS no less than 4GB in size
+
+// offset to first area that avoids any boot stuff in low range (below 256MB)
+#define AREA_BASE_OFFSET (1ULL << 28) // bit 28 always ON
+
+// offset to duplicate area; may coincide with rank 1 base address for 2-rank 4GB DIMM
+#define AREA_DUPE_OFFSET (1ULL << 31) // bit 31 always ON
+
+// defaults to DUPE, but will be set elsewhere to offset to next RANK if multi-rank DIMM
+static uint64_t dram_tune_rank_offset = AREA_DUPE_OFFSET; // default
+
+// defaults to 0, but will be set elsewhere to the address offset to next DIMM if multi-slot
+static uint64_t dram_tune_dimm_offset = 0; // default
+
+
+static int speed_bin_offset[3] = {25, 20, 15};
+static int speed_bin_winlen[3] = {70, 60, 60};
+
+static int
+get_speed_bin(bdk_node_t node, int lmc)
+{
+ uint32_t mts_speed = (libdram_get_freq_from_pll(node, lmc) / 1000000) * 2;
+ int ret = 0;
+
+ // FIXME: is this reasonable speed "binning"?
+ if (mts_speed >= 1700) {
+ if (mts_speed >= 2000)
+ ret = 2;
+ else
+ ret = 1;
+ }
+
+ debug_print("N%d.LMC%d: %s: returning bin %d for MTS %d\n",
+ node, lmc, __FUNCTION__, ret, mts_speed);
+
+ return ret;
+}
+
+static int is_low_risk_offset(int speed_bin, int offset)
+{
+ return (_abs(offset) <= speed_bin_offset[speed_bin]);
+}
+static int is_low_risk_winlen(int speed_bin, int winlen)
+{
+ return (winlen >= speed_bin_winlen[speed_bin]);
+}
+
+#define ENABLE_PREFETCH 0
+#define ENABLE_WBIL2 1
+#define ENABLE_SBLKDTY 0
+
+#define BDK_SYS_CVMCACHE_INV_L2 "#0,c11,c1,#1" // L2 Cache Invalidate
+#define BDK_CACHE_INV_L2(address) { asm volatile ("sys " BDK_SYS_CVMCACHE_INV_L2 ", %0" : : "r" (address)); }
+
+int dram_tuning_mem_xor(bdk_node_t node, int lmc, uint64_t p, uint64_t bitmask, uint64_t *xor_data)
+{
+ uint64_t p1, p2, d1, d2;
+ uint64_t v, v1;
+ uint64_t p2offset = 0x10000000/* was: dram_tune_rank_offset; */; // FIXME?
+ uint64_t datamask;
+ uint64_t xor;
+ uint64_t i, j, k;
+ uint64_t ii;
+ int errors = 0;
+ //uint64_t index;
+ uint64_t pattern1 = bdk_rng_get_random64();
+ uint64_t pattern2 = 0;
+ uint64_t bad_bits[2] = {0,0};
+
+#if ENABLE_SBLKDTY
+ BDK_CSR_MODIFY(c, node, BDK_L2C_CTL, c.s.dissblkdty = 0);
+#endif
+
+ // Byte lanes may be clear in the mask to indicate no testing on that lane.
+ datamask = bitmask;
+
+ // final address must include LMC and node
+ p |= (lmc<<7); /* Map address into proper interface */
+ p = bdk_numa_get_address(node, p); /* Map to node */
+
+ /* Add offset to both test regions to not clobber boot stuff
+ * when running from L2 for NAND boot.
+ */
+ p += AREA_BASE_OFFSET; // make sure base is out of the way of boot
+
+#define II_INC (1ULL << 29)
+#define II_MAX (1ULL << 31)
+#define K_INC (1ULL << 14)
+#define K_MAX (1ULL << 20)
+#define J_INC (1ULL << 9)
+#define J_MAX (1ULL << 12)
+#define I_INC (1ULL << 3)
+#define I_MAX (1ULL << 7)
+
+ debug_print("N%d.LMC%d: dram_tuning_mem_xor: phys_addr=0x%lx\n",
+ node, lmc, p);
+
+#if 0
+ int ix;
+ // add this loop to fill memory with the test pattern first
+ // loops are ordered so that only entire cachelines are written
+ for (ii = 0; ii < II_MAX; ii += II_INC) { // FIXME? extend the range of memory tested!!
+ for (k = 0; k < K_MAX; k += K_INC) {
+ for (j = 0; j < J_MAX; j += J_INC) {
+ p1 = p + ii + k + j;
+ p2 = p1 + p2offset;
+ for (i = 0, ix = 0; i < I_MAX; i += I_INC, ix++) {
+
+ v = dram_tune_test_pattern[ix];
+ v1 = v; // write the same thing to both areas
+
+ __bdk_dram_write64(p1 + i, v);
+ __bdk_dram_write64(p2 + i, v1);
+
+ }
+#if ENABLE_WBIL2
+ BDK_CACHE_WBI_L2(p1);
+ BDK_CACHE_WBI_L2(p2);
+#endif
+ }
+ }
+ } /* for (ii = 0; ii < (1ULL << 31); ii += (1ULL << 29)) */
+#endif
+
+#if ENABLE_PREFETCH
+ BDK_PREFETCH(p , BDK_CACHE_LINE_SIZE);
+ BDK_PREFETCH(p + p2offset, BDK_CACHE_LINE_SIZE);
+#endif
+
+ // loops are ordered so that only a single 64-bit slot is written to each cacheline at one time,
+ // then the cachelines are forced out; this should maximize read/write traffic
+ for (ii = 0; ii < II_MAX; ii += II_INC) { // FIXME? extend the range of memory tested!!
+ for (k = 0; k < K_MAX; k += K_INC) {
+ for (i = 0; i < I_MAX; i += I_INC) {
+ for (j = 0; j < J_MAX; j += J_INC) {
+
+ p1 = p + ii + k + j;
+ p2 = p1 + p2offset;
+
+#if ENABLE_PREFETCH
+ if (j < (J_MAX - J_INC)) {
+ BDK_PREFETCH(p1 + J_INC, BDK_CACHE_LINE_SIZE);
+ BDK_PREFETCH(p2 + J_INC, BDK_CACHE_LINE_SIZE);
+ }
+#endif
+
+ v = pattern1 * (p1 + i);
+ v1 = v; // write the same thing to both areas
+
+ __bdk_dram_write64(p1 + i, v);
+ __bdk_dram_write64(p2 + i, v1);
+
+#if ENABLE_WBIL2
+ BDK_CACHE_WBI_L2(p1);
+ BDK_CACHE_WBI_L2(p2);
+#endif
+ }
+ }
+ }
+ } /* for (ii = 0; ii < (1ULL << 31); ii += (1ULL << 29)) */
+
+ BDK_DCACHE_INVALIDATE;
+
+ debug_print("N%d.LMC%d: dram_tuning_mem_xor: done INIT loop\n",
+ node, lmc);
+
+ /* Make a series of passes over the memory areas. */
+
+ for (int burst = 0; burst < 1/* was: dram_tune_use_bursts*/; burst++)
+ {
+ uint64_t this_pattern = bdk_rng_get_random64();
+ pattern2 ^= this_pattern;
+
+ /* XOR the data with a random value, applying the change to both
+ * memory areas.
+ */
+#if ENABLE_PREFETCH
+ BDK_PREFETCH(p , BDK_CACHE_LINE_SIZE);
+ BDK_PREFETCH(p + p2offset, BDK_CACHE_LINE_SIZE);
+#endif
+
+ for (ii = 0; ii < II_MAX; ii += II_INC) { // FIXME? extend the range of memory tested!!
+ for (k = 0; k < K_MAX; k += K_INC) {
+ for (i = 0; i < I_MAX; i += I_INC) { // FIXME: rearranged, did not make much difference?
+ for (j = 0; j < J_MAX; j += J_INC) {
+
+ p1 = p + ii + k + j;
+ p2 = p1 + p2offset;
+
+#if ENABLE_PREFETCH
+ if (j < (J_MAX - J_INC)) {
+ BDK_PREFETCH(p1 + J_INC, BDK_CACHE_LINE_SIZE);
+ BDK_PREFETCH(p2 + J_INC, BDK_CACHE_LINE_SIZE);
+ }
+#endif
+
+ v = __bdk_dram_read64(p1 + i) ^ this_pattern;
+ v1 = __bdk_dram_read64(p2 + i) ^ this_pattern;
+
+#if ENABLE_WBIL2
+ BDK_CACHE_INV_L2(p1);
+ BDK_CACHE_INV_L2(p2);
+#endif
+
+ __bdk_dram_write64(p1 + i, v);
+ __bdk_dram_write64(p2 + i, v1);
+
+#if ENABLE_WBIL2
+ BDK_CACHE_WBI_L2(p1);
+ BDK_CACHE_WBI_L2(p2);
+#endif
+ }
+ }
+ }
+ } /* for (ii = 0; ii < (1ULL << 31); ii += (1ULL << 29)) */
+
+ BDK_DCACHE_INVALIDATE;
+
+ debug_print("N%d.LMC%d: dram_tuning_mem_xor: done MODIFY loop\n",
+ node, lmc);
+
+#if ENABLE_PREFETCH
+ BDK_PREFETCH(p , BDK_CACHE_LINE_SIZE);
+ BDK_PREFETCH(p + p2offset, BDK_CACHE_LINE_SIZE);
+#endif
+
+ /* Look for differences in the areas. If there is a mismatch, reset
+ * both memory locations with the same pattern. Failing to do so
+ * means that on all subsequent passes the pair of locations remain
+ * out of sync giving spurious errors.
+ */
+ // FIXME: change the loop order so that an entire cache line is compared at one time
+ // FIXME: this is so that a read error that occurs *anywhere* on the cacheline will be caught,
+ // FIXME: rather than comparing only 1 cacheline slot at a time, where an error on a different
+ // FIXME: slot will be missed that time around
+ // Does the above make sense?
+
+ for (ii = 0; ii < II_MAX; ii += II_INC) { // FIXME? extend the range of memory tested!!
+ for (k = 0; k < K_MAX; k += K_INC) {
+ for (j = 0; j < J_MAX; j += J_INC) {
+
+ p1 = p + ii + k + j;
+ p2 = p1 + p2offset;
+
+#if ENABLE_PREFETCH
+ if (j < (J_MAX - J_INC)) {
+ BDK_PREFETCH(p1 + J_INC, BDK_CACHE_LINE_SIZE);
+ BDK_PREFETCH(p2 + J_INC, BDK_CACHE_LINE_SIZE);
+ }
+#endif
+
+ // process entire cachelines in the innermost loop
+ for (i = 0; i < I_MAX; i += I_INC) {
+
+ v = ((p1 + i) * pattern1) ^ pattern2; // FIXME: this should predict what we find...???
+ d1 = __bdk_dram_read64(p1 + i);
+ d2 = __bdk_dram_read64(p2 + i);
+
+ xor = ((d1 ^ v) | (d2 ^ v)) & datamask; // union of error bits only in active byte lanes
+
+ if (!xor)
+ continue;
+
+ // accumulate bad bits
+ bad_bits[0] |= xor;
+ //bad_bits[1] |= ~mpr_data1 & 0xffUL; // cannot do ECC here
+
+ int bybit = 1;
+ uint64_t bymsk = 0xffULL; // start in byte lane 0
+ while (xor != 0) {
+ debug_print("ERROR(%03d): [0x%016lX] [0x%016lX] expected 0x%016lX d1 %016lX d2 %016lX\n",
+ burst, p1, p2, v, d1, d2);
+ if (xor & bymsk) { // error(s) in this lane
+ errors |= bybit; // set the byte error bit
+ xor &= ~bymsk; // clear byte lane in error bits
+ datamask &= ~bymsk; // clear the byte lane in the mask
+#if EXIT_WHEN_ALL_LANES_HAVE_ERRORS
+ if (datamask == 0) { // nothing left to do
+ return errors; // completely done when errors found in all byte lanes in datamask
+ }
+#endif /* EXIT_WHEN_ALL_LANES_HAVE_ERRORS */
+ }
+ bymsk <<= 8; // move mask into next byte lane
+ bybit <<= 1; // move bit into next byte position
+ }
+ }
+#if ENABLE_WBIL2
+ BDK_CACHE_WBI_L2(p1);
+ BDK_CACHE_WBI_L2(p2);
+#endif
+ }
+ }
+ } /* for (ii = 0; ii < (1ULL << 31); ii += (1ULL << 29)) */
+
+ debug_print("N%d.LMC%d: dram_tuning_mem_xor: done TEST loop\n",
+ node, lmc);
+
+ } /* for (int burst = 0; burst < dram_tune_use_bursts; burst++) */
+
+ if (xor_data != NULL) { // send the bad bits back...
+ xor_data[0] = bad_bits[0];
+ xor_data[1] = bad_bits[1]; // let it be zeroed
+ }
+
+#if ENABLE_SBLKDTY
+ BDK_CSR_MODIFY(c, node, BDK_L2C_CTL, c.s.dissblkdty = 1);
+#endif
+
+ return errors;
+}
+
+#undef II_INC
+#undef II_MAX
+
+#define EXTRACT(v, lsb, width) (((v) >> (lsb)) & ((1ull << (width)) - 1))
+#define LMCNO(address, xbits) (EXTRACT(address, 7, xbits) ^ EXTRACT(address, 20, xbits) ^ EXTRACT(address, 12, xbits))
+
+static int dram_tuning_mem_xor2(uint64_t p, uint64_t bitmask, int xbits)
+{
+ uint64_t p1, p2, d1, d2;
+ uint64_t v, vpred;
+ uint64_t p2offset = dram_tune_rank_offset; // FIXME?
+ uint64_t datamask;
+ uint64_t xor;
+ uint64_t ii;
+ uint64_t pattern1 = bdk_rng_get_random64();
+ uint64_t pattern2 = 0;
+ int errors = 0;
+ int errs_by_lmc[4] = { 0,0,0,0 };
+ int lmc;
+ uint64_t vbase, vincr;
+
+ // Byte lanes may be clear in the mask to indicate no testing on that lane.
+ datamask = bitmask;
+
+ /* Add offset to both test regions to not clobber boot stuff
+ * when running from L2 for NAND boot.
+ */
+ p += AREA_BASE_OFFSET; // make sure base is out of the way of boot
+
+ // move the multiplies outside the loop
+ vbase = p * pattern1;
+ vincr = 8 * pattern1;
+
+#define II_INC (1ULL << 3)
+#define II_MAX (1ULL << 22) // stop where the core ID bits start
+
+ // walk the memory areas by 8-byte words
+ v = vbase;
+ for (ii = 0; ii < II_MAX; ii += II_INC) {
+
+ p1 = p + ii;
+ p2 = p1 + p2offset;
+
+ __bdk_dram_write64(p1, v);
+ __bdk_dram_write64(p2, v);
+
+ v += vincr;
+ }
+
+ __bdk_dram_flush_to_mem_range(p , p + II_MAX);
+ __bdk_dram_flush_to_mem_range(p + p2offset, p + p2offset + II_MAX);
+ BDK_DCACHE_INVALIDATE;
+
+ /* Make a series of passes over the memory areas. */
+
+ for (int burst = 0; burst < dram_tune_use_bursts; burst++)
+ {
+ uint64_t this_pattern = bdk_rng_get_random64();
+ pattern2 ^= this_pattern;
+
+ /* XOR the data with a random value, applying the change to both
+ * memory areas.
+ */
+#if 0
+ BDK_PREFETCH(p , BDK_CACHE_LINE_SIZE);
+ BDK_PREFETCH(p + p2offset, BDK_CACHE_LINE_SIZE);
+#endif
+ for (ii = 0; ii < II_MAX; ii += II_INC) { // FIXME? extend the range of memory tested!!
+
+ p1 = p + ii;
+ p2 = p1 + p2offset;
+
+ d1 = __bdk_dram_read64(p1) ^ this_pattern;
+ d2 = __bdk_dram_read64(p2) ^ this_pattern;
+
+ __bdk_dram_write64(p1, d1);
+ __bdk_dram_write64(p2, d2);
+
+ }
+ __bdk_dram_flush_to_mem_range(p , p + II_MAX);
+ __bdk_dram_flush_to_mem_range(p + p2offset, p + p2offset + II_MAX);
+ BDK_DCACHE_INVALIDATE;
+
+ /* Look for differences in the areas. If there is a mismatch, reset
+ * both memory locations with the same pattern. Failing to do so
+ * means that on all subsequent passes the pair of locations remain
+ * out of sync giving spurious errors.
+ */
+#if 0
+ BDK_PREFETCH(p , BDK_CACHE_LINE_SIZE);
+ BDK_PREFETCH(p + p2offset, BDK_CACHE_LINE_SIZE);
+#endif
+ vpred = vbase;
+ for (ii = 0; ii < II_MAX; ii += II_INC) {
+
+ p1 = p + ii;
+ p2 = p1 + p2offset;
+
+ v = vpred ^ pattern2; // this should predict what we find...
+ d1 = __bdk_dram_read64(p1);
+ d2 = __bdk_dram_read64(p2);
+ vpred += vincr;
+
+ xor = ((d1 ^ v) | (d2 ^ v)) & datamask; // union of error bits only in active byte lanes
+ if (!xor) // no errors
+ continue;
+
+ lmc = LMCNO(p1, xbits); // FIXME: LMC should be SAME for p1 and p2!!!
+ if (lmc != (int)LMCNO(p2, xbits)) {
+ printf("ERROR: LMCs for addresses [0x%016lX] (%lld) and [0x%016lX] (%lld) differ!!!\n",
+ p1, LMCNO(p1, xbits), p2, LMCNO(p2, xbits));
+ }
+ int bybit = 1;
+ uint64_t bymsk = 0xffULL; // start in byte lane 0
+ while (xor != 0) {
+ debug_print("ERROR(%03d): [0x%016lX] [0x%016lX] expected 0x%016lX d1 %016lX d2 %016lX\n",
+ burst, p1, p2, v, d1, d2);
+ if (xor & bymsk) { // error(s) in this lane
+ errs_by_lmc[lmc] |= bybit; // set the byte error bit in the LMCs errors
+ errors |= bybit; // set the byte error bit
+ xor &= ~bymsk; // clear byte lane in error bits
+ //datamask &= ~bymsk; // clear the byte lane in the mask
+ }
+ bymsk <<= 8; // move mask into next byte lane
+ bybit <<= 1; // move bit into next byte position
+ } /* while (xor != 0) */
+ } /* for (ii = 0; ii < II_MAX; ii += II_INC) */
+ } /* for (int burst = 0; burst < dram_tune_use_bursts; burst++) */
+
+ // update the global LMC error states
+ for (lmc = 0; lmc < 4; lmc++) {
+ if (errs_by_lmc[lmc]) {
+ bdk_atomic_fetch_and_bset64_nosync(&test_dram_byte_lmc_errs[lmc], errs_by_lmc[lmc]);
+ }
+ }
+
+ return errors;
+}
+
+#if 0
+static int dram_tuning_mem_rows(uint64_t p, uint64_t bitmask)
+{
+ uint64_t p1, p2, d1, d2;
+ uint64_t v, v1;
+ uint64_t p2offset = dram_tune_rank_offset; // FIXME?
+ uint64_t datamask;
+ uint64_t xor;
+ int i, j, k, ii;
+ int errors = 0;
+ int index;
+ uint64_t pattern1 = 0; // FIXME: maybe this could be from a table?
+ uint64_t pattern2;
+
+ // Byte lanes may be clear in the mask to indicate no testing on that lane.
+ datamask = bitmask;
+
+ /* Add offset to both test regions to not clobber boot stuff
+ * when running from L2 for NAND boot.
+ */
+ p += 0x10000000; // FIXME? was: 0x4000000; // make sure base is out of the way of cores for tuning
+
+ pattern2 = pattern1;
+ for (k = 0; k < (1 << 20); k += (1 << 14)) {
+ for (j = 0; j < (1 << 12); j += (1 << 9)) {
+ for (i = 0; i < (1 << 7); i += 8) {
+ index = i + j + k;
+ p1 = p + index;
+ p2 = p1 + p2offset;
+
+ v = pattern2;
+ v1 = v; // write the same thing to same slot in both cachelines
+ pattern2 = ~pattern2; // flip bits for next slots
+
+ __bdk_dram_write64(p1, v);
+ __bdk_dram_write64(p2, v1);
+ }
+#if 1
+ BDK_CACHE_WBI_L2(p1);
+ BDK_CACHE_WBI_L2(p2);
+#endif
+ }
+ }
+
+#if 0
+ __bdk_dram_flush_to_mem_range(p, p + (1ULL << 20)); // max_addr is start + where k stops...
+ __bdk_dram_flush_to_mem_range(p + p2offset, p + p2offset + (1ULL << 20)); // max_addr is start + where k stops...
+#endif
+ BDK_DCACHE_INVALIDATE;
+
+ /* Make a series of passes over the memory areas. */
+
+ for (int burst = 0; burst < dram_tune_use_bursts; burst++)
+ {
+ /* just read and flip the bits applying the change to both
+ * memory areas.
+ */
+ for (k = 0; k < (1 << 20); k += (1 << 14)) {
+ for (j = 0; j < (1 << 12); j += (1 << 9)) {
+ for (i = 0; i < (1 << 7); i += 8) {
+ index = i + j + k;
+ p1 = p + index;
+ p2 = p1 + p2offset;
+
+ v = ~__bdk_dram_read64(p1);
+ v1 = ~__bdk_dram_read64(p2);
+
+ __bdk_dram_write64(p1, v);
+ __bdk_dram_write64(p2, v1);
+ }
+#if 1
+ BDK_CACHE_WBI_L2(p1);
+ BDK_CACHE_WBI_L2(p2);
+#endif
+ }
+ }
+
+#if 0
+ __bdk_dram_flush_to_mem_range(p, p + (1ULL << 20)); // max_addr is start + where k stops...
+ __bdk_dram_flush_to_mem_range(p + p2offset, p + p2offset + (1ULL << 20)); // max_addr is start + where k stops...
+#endif
+ BDK_DCACHE_INVALIDATE;
+
+ /* Look for differences in the areas. If there is a mismatch, reset
+ * both memory locations with the same pattern. Failing to do so
+ * means that on all subsequent passes the pair of locations remain
+ * out of sync giving spurious errors.
+ */
+
+ // FIXME: change the loop order so that an entire cache line is compared at one time
+ // FIXME: this is so that a read error that occurs *anywhere* on the cacheline will be caught,
+ // FIXME: rather than comparing only 1 cacheline slot at a time, where an error on a different
+ // FIXME: slot will be missed that time around
+ // Does the above make sense?
+
+ pattern2 = ~pattern1; // slots have been flipped by the above loop
+
+ for (k = 0; k < (1 << 20); k += (1 << 14)) {
+ for (j = 0; j < (1 << 12); j += (1 << 9)) {
+ for (i = 0; i < (1 << 7); i += 8) {
+ index = i + j + k;
+ p1 = p + index;
+ p2 = p1 + p2offset;
+
+ v = pattern2; // FIXME: this should predict what we find...???
+ d1 = __bdk_dram_read64(p1);
+ d2 = __bdk_dram_read64(p2);
+ pattern2 = ~pattern2; // flip for next slot
+
+ xor = ((d1 ^ v) | (d2 ^ v)) & datamask; // union of error bits only in active byte lanes
+
+ int bybit = 1;
+ uint64_t bymsk = 0xffULL; // start in byte lane 0
+ while (xor != 0) {
+ debug_print("ERROR(%03d): [0x%016lX] [0x%016lX] expected 0x%016lX d1 %016lX d2 %016lX\n",
+ burst, p1, p2, v, d1, d2);
+ if (xor & bymsk) { // error(s) in this lane
+ errors |= bybit; // set the byte error bit
+ xor &= ~bymsk; // clear byte lane in error bits
+ datamask &= ~bymsk; // clear the byte lane in the mask
+#if EXIT_WHEN_ALL_LANES_HAVE_ERRORS
+ if (datamask == 0) { // nothing left to do
+ return errors; // completely done when errors found in all byte lanes in datamask
+ }
+#endif /* EXIT_WHEN_ALL_LANES_HAVE_ERRORS */
+ }
+ bymsk <<= 8; // move mask into next byte lane
+ bybit <<= 1; // move bit into next byte position
+ }
+ }
+ }
+ }
+ pattern1 = ~pattern1; // flip the starting pattern for the next burst
+
+ } /* for (int burst = 0; burst < dram_tune_use_bursts; burst++) */
+ return errors;
+}
+#endif
+
+// cores to use
+#define DEFAULT_USE_CORES 44 // FIXME: was (1 << CORE_BITS)
+int dram_tune_use_cores = DEFAULT_USE_CORES; // max cores to use, override available
+int dram_tune_max_cores; // max cores available on a node
+#define CORE_SHIFT 22 // FIXME: offset into rank_address passed to test_dram_byte
+
+typedef void (*__dram_tuning_thread_t)(int arg, void *arg1);
+
+typedef struct
+{
+ bdk_node_t node;
+ int64_t num_lmcs;
+ uint64_t byte_mask;
+} test_dram_byte_info_t;
+
+static void dram_tuning_thread(int arg, void *arg1)
+{
+ test_dram_byte_info_t *test_info = arg1;
+ int core = arg;
+ uint64_t errs;
+ bdk_node_t node = test_info->node;
+ int num_lmcs, lmc;
+#if 0
+ num_lmcs = test_info->num_lmcs;
+ // map core numbers into hopefully equal groups per LMC
+ lmc = core % num_lmcs;
+#else
+ // FIXME: this code should allow running all the cores on a single LMC...
+ // if incoming num_lmcs > 0, then use as normal; if < 0 remap to a single LMC
+ if (test_info->num_lmcs >= 0) {
+ num_lmcs = test_info->num_lmcs;
+ // map core numbers into hopefully equal groups per LMC
+ lmc = core % num_lmcs;
+ } else {
+ num_lmcs = 1;
+ // incoming num_lmcs is (desired LMC - 10)
+ lmc = 10 + test_info->num_lmcs;
+ }
+#endif
+ uint64_t base_address = 0/* was: (lmc << 7); now done by callee */;
+ uint64_t bytemask = test_info->byte_mask;
+
+ /* Figure out our work memory range.
+ *
+ * Note: base_address above just provides the physical offset which determines
+ * specific LMC portions of the address space and does not have the node bits set.
+ */
+ //was: base_address = bdk_numa_get_address(node, base_address); // map to node // now done by callee
+ base_address |= (core << CORE_SHIFT); // FIXME: also put full core into address
+ if (dram_tune_dimm_offset) { // if multi-slot in some way, choose a DIMM for the core
+ base_address |= (core & (1 << (num_lmcs >> 1))) ? dram_tune_dimm_offset : 0;
+ }
+
+ debug_print("Node %d, core %d, Testing area 1 at 0x%011lx, area 2 at 0x%011lx\n",
+ node, core, base_address + AREA_BASE_OFFSET,
+ base_address + AREA_BASE_OFFSET + dram_tune_rank_offset);
+
+ errs = dram_tuning_mem_xor(node, lmc, base_address, bytemask, NULL);
+ //errs = dram_tuning_mem_rows(base_address, bytemask);
+
+ /* Report that we're done */
+ debug_print("Core %d on LMC %d node %d done with test_dram_byte with 0x%lx errs\n",
+ core, lmc, node, errs);
+
+ if (errs) {
+ bdk_atomic_fetch_and_bset64_nosync(&test_dram_byte_threads_errs, errs);
+ bdk_atomic_fetch_and_bset64_nosync(&test_dram_byte_lmc_errs[lmc], errs);
+ }
+
+ bdk_atomic_add64_nosync(&test_dram_byte_threads_done, 1);
+
+ return;
+}
+
+static void dram_tuning_thread2(int arg, void *arg1)
+{
+ test_dram_byte_info_t *test_info = arg1;
+ int core = arg;
+ uint64_t errs;
+ bdk_node_t node = test_info->node;
+ int num_lmcs = test_info->num_lmcs;
+
+ uint64_t base_address = 0; //
+ uint64_t bytemask = test_info->byte_mask;
+
+ /* Figure out our work memory range.
+ *
+ * Note: base_address above just provides the physical offset which determines
+ * specific portions of the address space and does not have the node bits set.
+ */
+ base_address = bdk_numa_get_address(node, base_address); // map to node
+ base_address |= (core << CORE_SHIFT); // FIXME: also put full core into address
+ if (dram_tune_dimm_offset) { // if multi-slot in some way, choose a DIMM for the core
+ base_address |= (core & 1) ? dram_tune_dimm_offset : 0;
+ }
+
+ debug_print("Node %d, core %d, Testing area 1 at 0x%011lx, area 2 at 0x%011lx\n",
+ node, core, base_address + AREA_BASE_OFFSET,
+ base_address + AREA_BASE_OFFSET + dram_tune_rank_offset);
+
+ errs = dram_tuning_mem_xor2(base_address, bytemask, (num_lmcs >> 1)); // 4->2, 2->1, 1->0
+ //errs = dram_tuning_mem_rows(base_address, bytemask);
+
+ /* Report that we're done */
+ debug_print("Core %d on LMC %d node %d done with test_dram_byte with 0x%lx errs\n",
+ core, lmc, node, errs);
+
+ if (errs) {
+ bdk_atomic_fetch_and_bset64_nosync(&test_dram_byte_threads_errs, errs);
+ // FIXME: this will have been done already in the called test routine
+ //bdk_atomic_fetch_and_bset64_nosync(&test_dram_byte_lmc_errs[lmc], errs);
+ }
+
+ bdk_atomic_add64_nosync(&test_dram_byte_threads_done, 1);
+
+ return;
+}
+
+static int dram_tune_use_xor2 = 1; // FIXME: do NOT default to original mem_xor (LMC-based) code
+
+static int
+run_dram_tuning_threads(bdk_node_t node, int num_lmcs, uint64_t bytemask)
+{
+ test_dram_byte_info_t test_dram_byte_info;
+ test_dram_byte_info_t *test_info = &test_dram_byte_info;
+ int total_count = 0;
+ __dram_tuning_thread_t thread_p = (dram_tune_use_xor2) ? dram_tuning_thread2 : dram_tuning_thread;
+
+ test_info->node = node;
+ test_info->num_lmcs = num_lmcs;
+ test_info->byte_mask = bytemask;
+
+ // init some global data
+ bdk_atomic_set64(&test_dram_byte_threads_done, 0);
+ bdk_atomic_set64((int64_t *)&test_dram_byte_threads_errs, 0);
+ bdk_atomic_set64((int64_t *)&test_dram_byte_lmc_errs[0], 0);
+ bdk_atomic_set64((int64_t *)&test_dram_byte_lmc_errs[1], 0);
+ bdk_atomic_set64((int64_t *)&test_dram_byte_lmc_errs[2], 0);
+ bdk_atomic_set64((int64_t *)&test_dram_byte_lmc_errs[3], 0);
+
+ /* Start threads for cores on the node */
+ if (bdk_numa_exists(node)) {
+ debug_print("Starting %d threads for test_dram_byte\n", dram_tune_use_cores);
+ for (int core = 0; core < dram_tune_use_cores; core++) {
+ if (bdk_thread_create(node, 0, thread_p, core, (void *)test_info, 0)) {
+ bdk_error("Failed to create thread %d for test_dram_byte\n", core);
+ } else {
+ total_count++;
+ }
+ }
+ }
+
+#if 0
+ /* Wait for threads to finish */
+ while (bdk_atomic_get64(&test_dram_byte_threads_done) < total_count)
+ bdk_thread_yield();
+#else
+#define TIMEOUT_SECS 5 // FIXME: long enough so a pass for a given setting will not print
+ /* Wait for threads to finish, with progress */
+ int cur_count;
+ uint64_t cur_time;
+ uint64_t period = bdk_clock_get_rate(bdk_numa_local(), BDK_CLOCK_TIME) * TIMEOUT_SECS; // FIXME?
+ uint64_t timeout = bdk_clock_get_count(BDK_CLOCK_TIME) + period;
+ do {
+ bdk_thread_yield();
+ cur_count = bdk_atomic_get64(&test_dram_byte_threads_done);
+ cur_time = bdk_clock_get_count(BDK_CLOCK_TIME);
+ if (cur_time >= timeout) {
+ printf("Waiting for %d cores\n", total_count - cur_count);
+ timeout = cur_time + period;
+ }
+ } while (cur_count < total_count);
+#endif
+
+ // NOTE: this is the summary of errors across all LMCs
+ return (int)bdk_atomic_get64((int64_t *)&test_dram_byte_threads_errs);
+}
+
+/* These variables count the number of ECC errors. They should only be accessed atomically */
+extern int64_t __bdk_dram_ecc_single_bit_errors[];
+extern int64_t __bdk_dram_ecc_double_bit_errors[];
+
+#if 0
+// make the tuning test callable as a standalone
+int
+bdk_run_dram_tuning_test(int node)
+{
+ int num_lmcs = __bdk_dram_get_num_lmc(node);
+ const char *s;
+ int lmc, byte;
+ int errors;
+ uint64_t start_dram_dclk[4], start_dram_ops[4];
+ int save_use_bursts;
+
+ // check for the cores on this node, abort if not more than 1 // FIXME?
+ dram_tune_max_cores = bdk_get_num_running_cores(node);
+ if (dram_tune_max_cores < 2) {
+ //bdk_init_cores(node, 0);
+ printf("N%d: ERROR: not enough cores to run the DRAM tuning test.\n", node);
+ return 0;
+ }
+
+ // but use only a certain number of cores, at most what is available
+ if ((s = getenv("ddr_tune_use_cores")) != NULL) {
+ dram_tune_use_cores = strtoul(s, NULL, 0);
+ if (dram_tune_use_cores <= 0) // allow 0 or negative to mean all
+ dram_tune_use_cores = dram_tune_max_cores;
+ }
+ if (dram_tune_use_cores > dram_tune_max_cores)
+ dram_tune_use_cores = dram_tune_max_cores;
+
+ // save the original bursts, so we can replace it with a better number for just testing
+ save_use_bursts = dram_tune_use_bursts;
+ dram_tune_use_bursts = 1500; // FIXME: hard code bursts for the test here...
+
+ // allow override of the test repeats (bursts) per thread create
+ if ((s = getenv("ddr_tune_use_bursts")) != NULL) {
+ dram_tune_use_bursts = strtoul(s, NULL, 10);
+ }
+
+ // allow override of the test mem_xor algorithm
+ if ((s = getenv("ddr_tune_use_xor2")) != NULL) {
+ dram_tune_use_xor2 = !!strtoul(s, NULL, 10);
+ }
+
+ // FIXME? consult LMC0 only
+ BDK_CSR_INIT(lmcx_config, node, BDK_LMCX_CONFIG(0));
+ if (lmcx_config.s.rank_ena) { // replace the default offset when there is more than 1 rank...
+ dram_tune_rank_offset = 1ull << (28 + lmcx_config.s.pbank_lsb - lmcx_config.s.rank_ena + (num_lmcs/2));
+ ddr_print("N%d: run_dram_tuning_test: changing rank offset to 0x%lx\n", node, dram_tune_rank_offset);
+ }
+ if (lmcx_config.s.init_status & 0x0c) { // bit 2 or 3 set indicates 2 DIMMs
+ dram_tune_dimm_offset = 1ull << (28 + lmcx_config.s.pbank_lsb + (num_lmcs/2));
+ ddr_print("N%d: run_dram_tuning_test: changing dimm offset to 0x%lx\n", node, dram_tune_dimm_offset);
+ }
+ int ddr_interface_64b = !lmcx_config.s.mode32b;
+
+ // construct the bytemask
+ int bytes_todo = (ddr_interface_64b) ? 0xff : 0x0f; // FIXME: hack?
+ uint64_t bytemask = 0;
+ for (byte = 0; byte < 8; ++byte) {
+ uint64_t bitmask;
+ if (bytes_todo & (1 << byte)) {
+ bitmask = ((!ddr_interface_64b) && (byte == 4)) ? 0x0f: 0xff;
+ bytemask |= bitmask << (8*byte); // set the bytes bits in the bytemask
+ }
+ } /* for (byte = 0; byte < 8; ++byte) */
+
+ // print current working values
+ ddr_print("N%d: run_dram_tuning_test: max %d cores, use %d cores, use %d bursts.\n",
+ node, dram_tune_max_cores, dram_tune_use_cores, dram_tune_use_bursts);
+
+ // do the setup on active LMCs
+ for (lmc = 0; lmc < num_lmcs; lmc++) {
+ // record start cycle CSRs here for utilization measure
+ start_dram_dclk[lmc] = BDK_CSR_READ(node, BDK_LMCX_DCLK_CNT(lmc));
+ start_dram_ops[lmc] = BDK_CSR_READ(node, BDK_LMCX_OPS_CNT(lmc));
+#if 0
+ bdk_atomic_set64(&__bdk_dram_ecc_single_bit_errors[lmc], 0);
+ bdk_atomic_set64(&__bdk_dram_ecc_double_bit_errors[lmc], 0);
+#else
+ __bdk_dram_ecc_single_bit_errors[lmc] = 0;
+ __bdk_dram_ecc_double_bit_errors[lmc] = 0;
+#endif
+ } /* for (lmc = 0; lmc < num_lmcs; lmc++) */
+
+ bdk_watchdog_poke();
+
+ // run the test(s)
+ // only 1 call should be enough, let the bursts, etc, control the load...
+ errors = run_dram_tuning_threads(node, num_lmcs, bytemask);
+
+ /* Check ECC error counters after the test */
+ int64_t ecc_single = 0;
+ int64_t ecc_double = 0;
+ int64_t ecc_single_errs[4];
+ int64_t ecc_double_errs[4];
+
+ // finally, print the utilizations all together, and sum the ECC errors
+ for (lmc = 0; lmc < num_lmcs; lmc++) {
+ uint64_t dclk_diff = BDK_CSR_READ(node, BDK_LMCX_DCLK_CNT(lmc)) - start_dram_dclk[lmc];
+ uint64_t ops_diff = BDK_CSR_READ(node, BDK_LMCX_OPS_CNT(lmc)) - start_dram_ops[lmc];
+ uint64_t percent_x10 = ops_diff * 1000 / dclk_diff;
+ printf("N%d.LMC%d: ops %lu, cycles %lu, used %lu.%lu%%\n",
+ node, lmc, ops_diff, dclk_diff, percent_x10 / 10, percent_x10 % 10);
+
+ ecc_single += (ecc_single_errs[lmc] = bdk_atomic_get64(&__bdk_dram_ecc_single_bit_errors[lmc]));
+ ecc_double += (ecc_double_errs[lmc] = bdk_atomic_get64(&__bdk_dram_ecc_double_bit_errors[lmc]));
+ } /* for (lmc = 0; lmc < num_lmcs; lmc++) */
+
+ /* Always print any ECC errors */
+ if (ecc_single || ecc_double) {
+ printf("Test \"%s\": ECC errors, %ld/%ld/%ld/%ld corrected, %ld/%ld/%ld/%ld uncorrected\n",
+ "DRAM Tuning Test",
+ ecc_single_errs[0], ecc_single_errs[1], ecc_single_errs[2], ecc_single_errs[3],
+ ecc_double_errs[0], ecc_double_errs[1], ecc_double_errs[2], ecc_double_errs[3]);
+ }
+ if (errors || ecc_double || ecc_single) {
+ printf("Test \"%s\": FAIL: %ld single, %ld double, %d compare errors\n",
+ "DRAM Tuning Test", ecc_single, ecc_double, errors);
+ }
+
+ // restore bursts
+ dram_tune_use_bursts = save_use_bursts;
+
+ return (errors + ecc_double + ecc_single);
+}
+#endif /* 0 */
+
+#define DEFAULT_SAMPLE_GRAN 3 // sample for errors every N offset values
+#define MIN_BYTE_OFFSET -63
+#define MAX_BYTE_OFFSET +63
+int dram_tune_use_gran = DEFAULT_SAMPLE_GRAN;
+
+static int
+auto_set_dll_offset(bdk_node_t node, int dll_offset_mode,
+ int num_lmcs, int ddr_interface_64b,
+ int do_tune)
+{
+ int byte_offset;
+ //unsigned short result[9];
+ int byte;
+ int byte_delay_start[4][9];
+ int byte_delay_count[4][9];
+ uint64_t byte_delay_windows [4][9];
+ int byte_delay_best_start[4][9];
+ int byte_delay_best_count[4][9];
+ //int this_rodt;
+ uint64_t ops_sum[4], dclk_sum[4];
+ uint64_t start_dram_dclk[4], stop_dram_dclk[4];
+ uint64_t start_dram_ops[4], stop_dram_ops[4];
+ int errors, tot_errors;
+ int lmc;
+ char *mode_str = (dll_offset_mode == 2) ? "Read" : "Write";
+ int mode_is_read = (dll_offset_mode == 2);
+ char *mode_blk = (dll_offset_mode == 2) ? " " : "";
+ int start_offset, end_offset, incr_offset;
+
+ int speed_bin = get_speed_bin(node, 0); // FIXME: just get from LMC0?
+ int low_risk_count = 0, needs_review_count = 0;
+
+ if (dram_tune_use_gran != DEFAULT_SAMPLE_GRAN) {
+ ddr_print2("N%d: Changing sample granularity from %d to %d\n",
+ node, DEFAULT_SAMPLE_GRAN, dram_tune_use_gran);
+ }
+ // ensure sample is taken at 0
+ start_offset = MIN_BYTE_OFFSET - (MIN_BYTE_OFFSET % dram_tune_use_gran);
+ end_offset = MAX_BYTE_OFFSET - (MAX_BYTE_OFFSET % dram_tune_use_gran);
+ incr_offset = dram_tune_use_gran;
+
+ memset(ops_sum, 0, sizeof(ops_sum));
+ memset(dclk_sum, 0, sizeof(dclk_sum));
+ memset(byte_delay_start, 0, sizeof(byte_delay_start));
+ memset(byte_delay_count, 0, sizeof(byte_delay_count));
+ memset(byte_delay_windows, 0, sizeof(byte_delay_windows));
+ memset(byte_delay_best_start, 0, sizeof(byte_delay_best_start));
+ memset(byte_delay_best_count, 0, sizeof(byte_delay_best_count));
+
+ // FIXME? consult LMC0 only
+ BDK_CSR_INIT(lmcx_config, node, BDK_LMCX_CONFIG(0));
+ if (lmcx_config.s.rank_ena) { // replace the default offset when there is more than 1 rank...
+ dram_tune_rank_offset = 1ull << (28 + lmcx_config.s.pbank_lsb - lmcx_config.s.rank_ena + (num_lmcs/2));
+ ddr_print2("N%d: Tuning multiple ranks per DIMM (rank offset 0x%lx).\n", node, dram_tune_rank_offset);
+ }
+ if (lmcx_config.s.init_status & 0x0c) { // bit 2 or 3 set indicates 2 DIMMs
+ dram_tune_dimm_offset = 1ull << (28 + lmcx_config.s.pbank_lsb + (num_lmcs/2));
+ ddr_print2("N%d: Tuning multiple DIMMs per channel (DIMM offset 0x%lx)\n", node, dram_tune_dimm_offset);
+ }
+
+ // FIXME? do this for LMC0 only
+ //BDK_CSR_INIT(comp_ctl2, node, BDK_LMCX_COMP_CTL2(0));
+ //this_rodt = comp_ctl2.s.rodt_ctl;
+
+ // construct the bytemask
+ int bytes_todo = (ddr_interface_64b) ? 0xff : 0x0f;
+ uint64_t bytemask = 0;
+ for (byte = 0; byte < 8; ++byte) {
+ if (bytes_todo & (1 << byte)) {
+ bytemask |= 0xfful << (8*byte); // set the bytes bits in the bytemask
+ }
+ } /* for (byte = 0; byte < 8; ++byte) */
+
+ // now loop through selected legal values for the DLL byte offset...
+
+ for (byte_offset = start_offset; byte_offset <= end_offset; byte_offset += incr_offset) {
+
+ // do the setup on active LMCs
+ for (lmc = 0; lmc < num_lmcs; lmc++) {
+ change_dll_offset_enable(node, lmc, 0);
+
+ // set all byte lanes at once
+ load_dll_offset(node, lmc, dll_offset_mode, byte_offset, 10 /* All bytes at once */);
+ // but then clear the ECC byte lane so it should be neutral for the test...
+ load_dll_offset(node, lmc, dll_offset_mode, 0, 8);
+
+ change_dll_offset_enable(node, lmc, 1);
+
+ // record start cycle CSRs here for utilization measure
+ start_dram_dclk[lmc] = BDK_CSR_READ(node, BDK_LMCX_DCLK_CNT(lmc));
+ start_dram_ops[lmc] = BDK_CSR_READ(node, BDK_LMCX_OPS_CNT(lmc));
+ } /* for (lmc = 0; lmc < num_lmcs; lmc++) */
+
+ bdk_watchdog_poke();
+
+ // run the test(s)
+ // only 1 call should be enough, let the bursts, etc, control the load...
+ tot_errors = run_dram_tuning_threads(node, num_lmcs, bytemask);
+
+ for (lmc = 0; lmc < num_lmcs; lmc++) {
+ // record stop cycle CSRs here for utilization measure
+ stop_dram_dclk[lmc] = BDK_CSR_READ(node, BDK_LMCX_DCLK_CNT(lmc));
+ stop_dram_ops[lmc] = BDK_CSR_READ(node, BDK_LMCX_OPS_CNT(lmc));
+
+ // accumulate...
+ ops_sum[lmc] += stop_dram_ops[lmc] - start_dram_ops[lmc];
+ dclk_sum[lmc] += stop_dram_dclk[lmc] - start_dram_dclk[lmc];
+
+ errors = test_dram_byte_lmc_errs[lmc];
+
+ // check errors by byte, but not ECC
+ for (byte = 0; byte < 8; ++byte) {
+ if (!(bytes_todo & (1 << byte))) // is this byte lane to be done
+ continue; // no
+
+ byte_delay_windows[lmc][byte] <<= 1; // always put in a zero
+ if (errors & (1 << byte)) { // yes, an error in this byte lane
+ byte_delay_count[lmc][byte] = 0; // stop now always
+ } else { // no error in this byte lane
+ if (byte_delay_count[lmc][byte] == 0) { // first success, set run start
+ byte_delay_start[lmc][byte] = byte_offset;
+ }
+ byte_delay_count[lmc][byte] += incr_offset; // bump run length
+
+ if (byte_delay_count[lmc][byte] > byte_delay_best_count[lmc][byte]) {
+ byte_delay_best_count[lmc][byte] = byte_delay_count[lmc][byte];
+ byte_delay_best_start[lmc][byte] = byte_delay_start[lmc][byte];
+ }
+ byte_delay_windows[lmc][byte] |= 1ULL; // for pass, put in a 1
+ }
+ } /* for (byte = 0; byte < 8; ++byte) */
+
+ // only print when there are errors and verbose...
+ if (errors) {
+ debug_print("DLL %s Offset Test %3d: errors 0x%x\n",
+ mode_str, byte_offset, errors);
+ }
+ } /* for (lmc = 0; lmc < num_lmcs; lmc++) */
+
+ } /* for (byte_offset=-63; byte_offset<63; byte_offset += incr_offset) */
+
+ // done with testing, load up and/or print out the offsets we found...
+
+ // only when margining...
+ if (!do_tune) {
+ printf(" \n");
+ printf("-------------------------------------\n");
+#if 0
+ uint32_t mts_speed = (libdram_get_freq_from_pll(node, 0) * 2) / 1000000; // FIXME: sample LMC0
+ printf("N%d: Starting %s Timing Margining for %d MT/s.\n", node, mode_str, mts_speed);
+#else
+ printf("N%d: Starting %s Timing Margining.\n", node, mode_str);
+#endif
+ printf(" \n");
+ } /* if (!do_tune) */
+
+ for (lmc = 0; lmc < num_lmcs; lmc++) {
+#if 1
+ // FIXME FIXME
+ // FIXME: this just makes ECC always show 0
+ byte_delay_best_start[lmc][8] = start_offset;
+ byte_delay_best_count[lmc][8] = end_offset - start_offset + incr_offset;
+#endif
+
+ // disable offsets while we load...
+ change_dll_offset_enable(node, lmc, 0);
+
+ // only when margining...
+ if (!do_tune) {
+ // print the heading
+ printf(" \n");
+ printf("N%d.LMC%d: %s Timing Margin %s : ", node, lmc, mode_str, mode_blk);
+ printf(" ECC/8 ");
+ for (byte = 7; byte >= 0; byte--) {
+ printf(" Byte %d ", byte);
+ }
+ printf("\n");
+ } /* if (!do_tune) */
+
+ // print and load the offset values
+ // print the windows bit arrays
+ // only when margining...
+ if (!do_tune) {
+ printf("N%d.LMC%d: DLL %s Offset Amount %s : ", node, lmc, mode_str, mode_blk);
+ } else {
+ ddr_print("N%d.LMC%d: SW DLL %s Offset Amount %s : ", node, lmc, mode_str, mode_blk);
+ }
+ for (byte = 8; byte >= 0; --byte) { // print in "normal" reverse index order
+
+ int count = byte_delay_best_count[lmc][byte];
+ if (count == 0)
+ count = incr_offset; // should make non-tested ECC byte come out 0
+
+ byte_offset = byte_delay_best_start[lmc][byte] +
+ ((count - incr_offset) / 2); // adj by incr
+
+ if (!do_tune) { // do counting and special flag if margining
+ int will_need_review = !is_low_risk_winlen(speed_bin, (count - incr_offset)) &&
+ !is_low_risk_offset(speed_bin, byte_offset);
+
+ printf("%10d%c", byte_offset, (will_need_review) ? '<' :' ');
+
+ if (will_need_review)
+ needs_review_count++;
+ else
+ low_risk_count++;
+ } else { // if just tuning, make the printout less lengthy
+ ddr_print("%5d ", byte_offset);
+ }
+
+ // FIXME? should we be able to override this?
+ if (mode_is_read) // for READ offsets, always store what we found
+ load_dll_offset(node, lmc, dll_offset_mode, byte_offset, byte);
+ else // for WRITE offsets, always store 0
+ load_dll_offset(node, lmc, dll_offset_mode, 0, byte);
+
+ }
+ if (!do_tune) {
+ printf("\n");
+ } else {
+ ddr_print("\n");
+ }
+
+
+ // re-enable the offsets now that we are done loading
+ change_dll_offset_enable(node, lmc, 1);
+
+ // only when margining...
+ if (!do_tune) {
+ // print the window sizes
+ printf("N%d.LMC%d: DLL %s Window Length %s : ", node, lmc, mode_str, mode_blk);
+ for (byte = 8; byte >= 0; --byte) { // print in "normal" reverse index order
+ int count = byte_delay_best_count[lmc][byte];
+ if (count == 0)
+ count = incr_offset; // should make non-tested ECC byte come out 0
+
+ // do this again since the "needs review" test is an AND...
+ byte_offset = byte_delay_best_start[lmc][byte] +
+ ((count - incr_offset) / 2); // adj by incr
+
+ int will_need_review = !is_low_risk_winlen(speed_bin, (count - incr_offset)) &&
+ !is_low_risk_offset(speed_bin, byte_offset);
+
+ printf("%10d%c", count - incr_offset, (will_need_review) ? '<' :' ');
+ }
+ printf("\n");
+
+ // print the window extents
+ printf("N%d.LMC%d: DLL %s Window Bounds %s : ", node, lmc, mode_str, mode_blk);
+ for (byte = 8; byte >= 0; --byte) { // print in "normal" reverse index order
+ int start = byte_delay_best_start[lmc][byte];
+ int count = byte_delay_best_count[lmc][byte];
+ if (count == 0)
+ count = incr_offset; // should make non-tested ECC byte come out 0
+ printf(" %3d to%3d ", start,
+ start + count - incr_offset);
+ }
+ printf("\n");
+#if 0
+ // FIXME: should have a way to force these out...
+ // print the windows bit arrays
+ printf("N%d.LMC%d: DLL %s Window Bitmap%s : ", node, lmc, mode_str, mode_blk);
+ for (byte = 8; byte >= 0; --byte) { // print in "normal" reverse index order
+ printf("%010lx ", byte_delay_windows[lmc][byte]);
+ }
+ printf("\n");
+#endif
+ } /* if (!do_tune) */
+ } /* for (lmc = 0; lmc < num_lmcs; lmc++) */
+
+ // only when margining...
+ if (!do_tune) {
+ // print the Summary line(s) here
+ printf(" \n");
+ printf("N%d: %s Timing Margining Summary : %s ", node, mode_str,
+ (needs_review_count > 0) ? "Needs Review" : "Low Risk");
+ if (needs_review_count > 0)
+ printf("(%d)", needs_review_count);
+ printf("\n");
+
+ // FIXME??? want to print here: "N0: %s Offsets have been applied already"
+
+ printf("-------------------------------------\n");
+ printf(" \n");
+ } /* if (!do_tune) */
+
+ // FIXME: we probably want this only when doing verbose...
+ // finally, print the utilizations all together
+ for (lmc = 0; lmc < num_lmcs; lmc++) {
+ uint64_t percent_x10 = ops_sum[lmc] * 1000 / dclk_sum[lmc];
+ ddr_print2("N%d.LMC%d: ops %lu, cycles %lu, used %lu.%lu%%\n",
+ node, lmc, ops_sum[lmc], dclk_sum[lmc], percent_x10 / 10, percent_x10 % 10);
+ } /* for (lmc = 0; lmc < num_lmcs; lmc++) */
+
+ // FIXME: only when verbose, or only when there are errors?
+ // run the test one last time
+ // print whether there are errors or not, but only when verbose...
+ bdk_watchdog_poke();
+ debug_print("N%d: %s: Start running test one last time\n", node, __FUNCTION__);
+ tot_errors = run_dram_tuning_threads(node, num_lmcs, bytemask);
+ debug_print("N%d: %s: Finished running test one last time\n", node, __FUNCTION__);
+ if (tot_errors)
+ ddr_print2("%s Timing Final Test: errors 0x%x\n", mode_str, tot_errors);
+
+ return (do_tune) ? tot_errors : !!(needs_review_count > 0);
+}
+
+#define USE_L2_WAYS_LIMIT 0 // non-zero to enable L2 ways limiting
+
+/*
+ * Automatically adjust the DLL offset for the data bytes
+ */
+int perform_dll_offset_tuning(bdk_node_t node, int dll_offset_mode, int do_tune)
+{
+ int ddr_interface_64b;
+ int save_ecc_ena[4];
+ bdk_lmcx_config_t lmc_config;
+ int lmc, num_lmcs = __bdk_dram_get_num_lmc(node);
+ const char *s;
+#if USE_L2_WAYS_LIMIT
+ int ways, ways_print = 0;
+#endif
+#if 0
+ int dram_tune_use_rodt = -1, save_rodt[4];
+ bdk_lmcx_comp_ctl2_t comp_ctl2;
+#endif
+ int loops = 1, loop;
+ uint64_t orig_coremask;
+ int errs = 0;
+
+ // enable any non-running cores on this node
+ orig_coremask = bdk_get_running_coremask(node);
+ ddr_print4("N%d: %s: Starting cores (mask was 0x%lx)\n",
+ node, __FUNCTION__, orig_coremask);
+ bdk_init_cores(node, ~0ULL & ~orig_coremask);
+ dram_tune_max_cores = bdk_get_num_running_cores(node);
+
+ // but use only a certain number of cores, at most what is available
+ if ((s = getenv("ddr_tune_use_cores")) != NULL) {
+ dram_tune_use_cores = strtoul(s, NULL, 0);
+ if (dram_tune_use_cores <= 0) // allow 0 or negative to mean all
+ dram_tune_use_cores = dram_tune_max_cores;
+ }
+ if (dram_tune_use_cores > dram_tune_max_cores)
+ dram_tune_use_cores = dram_tune_max_cores;
+
+ // see if we want to do the tuning more than once per LMC...
+ if ((s = getenv("ddr_tune_use_loops"))) {
+ loops = strtoul(s, NULL, 0);
+ }
+
+ // see if we want to change the granularity of the byte_offset sampling
+ if ((s = getenv("ddr_tune_use_gran"))) {
+ dram_tune_use_gran = strtoul(s, NULL, 0);
+ }
+
+ // allow override of the test repeats (bursts) per thread create
+ if ((s = getenv("ddr_tune_use_bursts")) != NULL) {
+ dram_tune_use_bursts = strtoul(s, NULL, 10);
+ }
+
+#if 0
+ // allow override of Read ODT setting just during the tuning run(s)
+ if ((s = getenv("ddr_tune_use_rodt")) != NULL) {
+ int temp = strtoul(s, NULL, 10);
+ // validity check
+ if (temp >= 0 && temp <= 7)
+ dram_tune_use_rodt = temp;
+ }
+#endif
+
+#if 0
+ // allow override of the test pattern
+ // FIXME: a bit simplistic...
+ if ((s = getenv("ddr_tune_use_pattern")) != NULL) {
+ int patno = strtoul(s, NULL, 10);
+ if (patno == 2)
+ dram_tune_test_pattern = test_pattern_2;
+ else if (patno == 3)
+ dram_tune_test_pattern = test_pattern_3;
+ else // all other values use default
+ dram_tune_test_pattern = test_pattern_1;
+ }
+#endif
+
+ // allow override of the test mem_xor algorithm
+ if ((s = getenv("ddr_tune_use_xor2")) != NULL) {
+ dram_tune_use_xor2 = !!strtoul(s, NULL, 10);
+ }
+
+ // print current working values
+ ddr_print2("N%d: Tuning will use %d cores of max %d cores, and use %d repeats.\n",
+ node, dram_tune_use_cores, dram_tune_max_cores,
+ dram_tune_use_bursts);
+
+#if USE_L2_WAYS_LIMIT
+ // see if L2 ways are limited
+ if ((s = lookup_env_parameter("limit_l2_ways")) != NULL) {
+ ways = strtoul(s, NULL, 10);
+ ways_print = 1;
+ } else {
+ ways = bdk_l2c_get_num_assoc(node);
+ }
+#endif
+
+#if 0
+ // if RODT is to be overridden during tuning, note change
+ if (dram_tune_use_rodt >= 0) {
+ ddr_print("N%d: using RODT %d for tuning.\n",
+ node, dram_tune_use_rodt);
+ }
+#endif
+
+ // FIXME? get flag from LMC0 only
+ lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(0));
+ ddr_interface_64b = !lmc_config.s.mode32b;
+
+ // do setup for each active LMC
+ debug_print("N%d: %s: starting LMCs setup.\n", node, __FUNCTION__);
+ for (lmc = 0; lmc < num_lmcs; lmc++) {
+
+#if 0
+ // if RODT change, save old and set new here...
+ if (dram_tune_use_rodt >= 0) {
+ comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(lmc));
+ save_rodt[lmc] = comp_ctl2.s.rodt_ctl;
+ comp_ctl2.s.rodt_ctl = dram_tune_use_rodt;
+ DRAM_CSR_WRITE(node, BDK_LMCX_COMP_CTL2(lmc), comp_ctl2.u);
+ BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(lmc));
+ }
+#endif
+ /* Disable ECC for DRAM tests */
+ lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(lmc));
+ save_ecc_ena[lmc] = lmc_config.s.ecc_ena;
+ lmc_config.s.ecc_ena = 0;
+ DRAM_CSR_WRITE(node, BDK_LMCX_CONFIG(lmc), lmc_config.u);
+ lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(lmc));
+
+ } /* for (lmc = 0; lmc < num_lmcs; lmc++) */
+
+#if USE_L2_WAYS_LIMIT
+ /* Disable l2 sets for DRAM testing */
+ limit_l2_ways(node, 0, ways_print);
+#endif
+
+ // testing is done on all LMCs simultaneously
+ // FIXME: for now, loop here to show what happens multiple times
+ for (loop = 0; loop < loops; loop++) {
+ /* Perform DLL offset tuning */
+ errs = auto_set_dll_offset(node, dll_offset_mode, num_lmcs, ddr_interface_64b, do_tune);
+ }
+
+#if USE_L2_WAYS_LIMIT
+ /* Restore the l2 set configuration */
+ limit_l2_ways(node, ways, ways_print);
+#endif
+
+ // perform cleanup on all active LMCs
+ debug_print("N%d: %s: starting LMCs cleanup.\n", node, __FUNCTION__);
+ for (lmc = 0; lmc < num_lmcs; lmc++) {
+
+ /* Restore ECC for DRAM tests */
+ lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(lmc));
+ lmc_config.s.ecc_ena = save_ecc_ena[lmc];
+ DRAM_CSR_WRITE(node, BDK_LMCX_CONFIG(lmc), lmc_config.u);
+ lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(lmc));
+#if 0
+ // if RODT change, restore old here...
+ if (dram_tune_use_rodt >= 0) {
+ comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(lmc));
+ comp_ctl2.s.rodt_ctl = save_rodt[lmc];
+ DRAM_CSR_WRITE(node, BDK_LMCX_COMP_CTL2(lmc), comp_ctl2.u);
+ BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(lmc));
+ }
+#endif
+ // finally, see if there are any read offset overrides after tuning
+ // FIXME: provide a way to do write offsets also??
+ if (dll_offset_mode == 2) {
+ for (int by = 0; by < 9; by++) {
+ if ((s = lookup_env_parameter("ddr%d_tune_byte%d", lmc, by)) != NULL) {
+ int dllro = strtoul(s, NULL, 10);
+ change_dll_offset_enable(node, lmc, 0);
+ load_dll_offset(node, lmc, /* read */2, dllro, by);
+ change_dll_offset_enable(node, lmc, 1);
+ }
+ }
+ }
+ } /* for (lmc = 0; lmc < num_lmcs; lmc++) */
+
+ // finish up...
+
+#if 0
+ // if RODT was overridden during tuning, note restore
+ if (dram_tune_use_rodt >= 0) {
+ ddr_print("N%d: restoring RODT %d after tuning.\n",
+ node, save_rodt[0]); // FIXME? use LMC0
+ }
+#endif
+
+ // put any cores on this node, that were not running at the start, back into reset
+ uint64_t reset_coremask = bdk_get_running_coremask(node) & ~orig_coremask;
+ if (reset_coremask) {
+ ddr_print4("N%d: %s: Stopping cores 0x%lx\n", node, __FUNCTION__,
+ reset_coremask);
+ bdk_reset_cores(node, reset_coremask);
+ } else {
+ ddr_print4("N%d: %s: leaving cores set to 0x%lx\n", node, __FUNCTION__,
+ orig_coremask);
+ }
+
+ return errs;
+
+} /* perform_dll_offset_tuning */
+
+/////////////////////////////////////////////////////////////////////////////////////////////
+
+///// HW-assist byte DLL offset tuning //////
+
+#if 1
+// setup defaults for byte test pattern array
+// take these first two from the HRM section 6.9.13
+static const uint64_t byte_pattern_0[] = {
+ 0xFFAAFFFFFF55FFFFULL, // GP0
+ 0x55555555AAAAAAAAULL, // GP1
+ 0xAA55AAAAULL, // GP2
+};
+static const uint64_t byte_pattern_1[] = {
+ 0xFBF7EFDFBF7FFEFDULL, // GP0
+ 0x0F1E3C78F0E1C387ULL, // GP1
+ 0xF0E1BF7FULL, // GP2
+};
+// this is from Andrew via LFSR with PRBS=0xFFFFAAAA
+static const uint64_t byte_pattern_2[] = {
+ 0xEE55AADDEE55AADDULL, // GP0
+ 0x55AADDEE55AADDEEULL, // GP1
+ 0x55EEULL, // GP2
+};
+// this is from Mike via LFSR with PRBS=0x4A519909
+static const uint64_t byte_pattern_3[] = {
+ 0x0088CCEE0088CCEEULL, // GP0
+ 0xBB552211BB552211ULL, // GP1
+ 0xBB00ULL, // GP2
+};
+
+static const uint64_t *byte_patterns[] = {
+ byte_pattern_0, byte_pattern_1, byte_pattern_2, byte_pattern_3 // FIXME: use all we have
+};
+#define NUM_BYTE_PATTERNS ((int)(sizeof(byte_patterns)/sizeof(uint64_t *)))
+
+#define DEFAULT_BYTE_BURSTS 32 // FIXME: this is what what the longest test usually has
+int dram_tune_byte_bursts = DEFAULT_BYTE_BURSTS;
+#endif
+
+static void
+setup_hw_pattern(bdk_node_t node, int lmc, const uint64_t *pattern_p)
+{
+ /*
+ 3) Setup GENERAL_PURPOSE[0-2] registers with the data pattern of choice.
+ a. GENERAL_PURPOSE0[DATA<63:0>] – sets the initial lower (rising edge) 64 bits of data.
+ b. GENERAL_PURPOSE1[DATA<63:0>] – sets the initial upper (falling edge) 64 bits of data.
+ c. GENERAL_PURPOSE2[DATA<15:0>] – sets the initial lower (rising edge <7:0>) and upper
+ (falling edge <15:8>) ECC data.
+ */
+ DRAM_CSR_WRITE(node, BDK_LMCX_GENERAL_PURPOSE0(lmc), pattern_p[0]);
+ DRAM_CSR_WRITE(node, BDK_LMCX_GENERAL_PURPOSE1(lmc), pattern_p[1]);
+ DRAM_CSR_WRITE(node, BDK_LMCX_GENERAL_PURPOSE2(lmc), pattern_p[2]);
+}
+
+#define DEFAULT_PRBS 0xFFFFAAAAUL /* FIXME: maybe try 0x4A519909UL */
+
+static void
+setup_lfsr_pattern(bdk_node_t node, int lmc, uint64_t data)
+{
+ uint32_t prbs;
+ const char *s;
+
+ if ((s = getenv("ddr_lfsr_prbs"))) {
+ prbs = strtoul(s, NULL, 0);
+ } else
+ prbs = DEFAULT_PRBS; // FIXME: from data arg?
+
+ /*
+ 2) DBTRAIN_CTL[LFSR_PATTERN_SEL] = 1
+ here data comes from the LFSR generating a PRBS pattern
+ CHAR_CTL.EN = 0
+ CHAR_CTL.SEL = 0; // for PRBS
+ CHAR_CTL.DR = 1;
+ CHAR_CTL.PRBS = setup for whatever type of PRBS to send
+ CHAR_CTL.SKEW_ON = 1;
+ */
+ BDK_CSR_INIT(char_ctl, node, BDK_LMCX_CHAR_CTL(lmc));
+ char_ctl.s.en = 0;
+ char_ctl.s.sel = 0;
+ char_ctl.s.dr = 1;
+ char_ctl.s.prbs = prbs;
+ char_ctl.s.skew_on = 1;
+ DRAM_CSR_WRITE(node, BDK_LMCX_CHAR_CTL(lmc), char_ctl.u);
+}
+
+int
+choose_best_hw_patterns(bdk_node_t node, int lmc, int mode)
+{
+ int new_mode = mode;
+ const char *s;
+
+ switch (mode) {
+ case DBTRAIN_TEST: // always choose LFSR if chip supports it
+ if (! CAVIUM_IS_MODEL(CAVIUM_CN88XX)) {
+ int lfsr_enable = 1;
+ if ((s = getenv("ddr_allow_lfsr"))) { // override?
+ lfsr_enable = !!strtoul(s, NULL, 0);
+ }
+ if (lfsr_enable)
+ new_mode = DBTRAIN_LFSR;
+ }
+ break;
+ case DBTRAIN_DBI: // possibly can allow LFSR use?
+ break;
+ case DBTRAIN_LFSR: // forced already
+ if (CAVIUM_IS_MODEL(CAVIUM_CN88XX)) {
+ ddr_print("ERROR: illegal HW assist mode %d\n", mode);
+ new_mode = DBTRAIN_TEST;
+ }
+ break;
+ default:
+ ddr_print("ERROR: unknown HW assist mode %d\n", mode);
+ }
+
+ if (new_mode != mode)
+ VB_PRT(VBL_DEV2, "choose_best_hw_patterns: changing mode %d to %d\n", mode, new_mode);
+
+ return new_mode;
+}
+
+int
+run_best_hw_patterns(bdk_node_t node, int lmc, uint64_t phys_addr,
+ int mode, uint64_t *xor_data)
+{
+ int pattern;
+ const uint64_t *pattern_p;
+ int errs, errors = 0;
+
+ // FIXME? always choose LFSR if chip supports it???
+ mode = choose_best_hw_patterns(node, lmc, mode);
+
+ if (mode == DBTRAIN_LFSR) {
+ setup_lfsr_pattern(node, lmc, 0);
+ errors = test_dram_byte_hw(node, lmc, phys_addr, mode, xor_data);
+ VB_PRT(VBL_DEV2, "%s: LFSR at A:0x%012lx errors 0x%x\n",
+ __FUNCTION__, phys_addr, errors);
+ } else {
+ for (pattern = 0; pattern < NUM_BYTE_PATTERNS; pattern++) {
+ pattern_p = byte_patterns[pattern];
+ setup_hw_pattern(node, lmc, pattern_p);
+
+ errs = test_dram_byte_hw(node, lmc, phys_addr, mode, xor_data);
+
+ VB_PRT(VBL_DEV2, "%s: PATTERN %d at A:0x%012lx errors 0x%x\n",
+ __FUNCTION__, pattern, phys_addr, errs);
+
+ errors |= errs;
+ } /* for (pattern = 0; pattern < NUM_BYTE_PATTERNS; pattern++) */
+ }
+ return errors;
+}
+
+static void
+hw_assist_test_dll_offset(bdk_node_t node, int dll_offset_mode,
+ int lmc, int bytelane)
+{
+ int byte_offset, new_best_offset[9];
+ int rank_delay_start[4][9];
+ int rank_delay_count[4][9];
+ int rank_delay_best_start[4][9];
+ int rank_delay_best_count[4][9];
+ int errors[4], off_errors, tot_errors;
+ int num_lmcs = __bdk_dram_get_num_lmc(node);
+ int rank_mask, rankx, active_ranks;
+ int pattern;
+ const uint64_t *pattern_p;
+ int byte;
+ char *mode_str = (dll_offset_mode == 2) ? "Read" : "Write";
+ int pat_best_offset[9];
+ uint64_t phys_addr;
+ int pat_beg, pat_end;
+ int rank_beg, rank_end;
+ int byte_lo, byte_hi;
+ uint64_t hw_rank_offset;
+ // FIXME? always choose LFSR if chip supports it???
+ int mode = choose_best_hw_patterns(node, lmc, DBTRAIN_TEST);
+
+ if (bytelane == 0x0A) { // all bytelanes
+ byte_lo = 0;
+ byte_hi = 8;
+ } else { // just 1
+ byte_lo = byte_hi = bytelane;
+ }
+
+ BDK_CSR_INIT(lmcx_config, node, BDK_LMCX_CONFIG(lmc));
+ rank_mask = lmcx_config.s.init_status;
+ // this should be correct for 1 or 2 ranks, 1 or 2 DIMMs
+ hw_rank_offset = 1ull << (28 + lmcx_config.s.pbank_lsb - lmcx_config.s.rank_ena + (num_lmcs/2));
+
+ debug_print("N%d: %s: starting LMC%d with rank offset 0x%lx\n",
+ node, __FUNCTION__, lmc, hw_rank_offset);
+
+ // start of pattern loop
+ // we do the set of tests for each pattern supplied...
+
+ memset(new_best_offset, 0, sizeof(new_best_offset));
+ for (pattern = 0; pattern < NUM_BYTE_PATTERNS; pattern++) {
+
+ memset(pat_best_offset, 0, sizeof(pat_best_offset));
+
+ if (mode == DBTRAIN_TEST) {
+ pattern_p = byte_patterns[pattern];
+ setup_hw_pattern(node, lmc, pattern_p);
+ } else {
+ setup_lfsr_pattern(node, lmc, 0);
+ }
+
+ // now loop through all legal values for the DLL byte offset...
+
+#define BYTE_OFFSET_INCR 3 // FIXME: make this tunable?
+
+ tot_errors = 0;
+
+ memset(rank_delay_count, 0, sizeof(rank_delay_count));
+ memset(rank_delay_start, 0, sizeof(rank_delay_start));
+ memset(rank_delay_best_count, 0, sizeof(rank_delay_best_count));
+ memset(rank_delay_best_start, 0, sizeof(rank_delay_best_start));
+
+ for (byte_offset = -63; byte_offset < 64; byte_offset += BYTE_OFFSET_INCR) {
+
+ // do the setup on the active LMC
+ // set the bytelanes DLL offsets
+ change_dll_offset_enable(node, lmc, 0);
+ load_dll_offset(node, lmc, dll_offset_mode, byte_offset, bytelane); // FIXME? bytelane?
+ change_dll_offset_enable(node, lmc, 1);
+
+ bdk_watchdog_poke();
+
+ // run the test on each rank
+ // only 1 call per rank should be enough, let the bursts, loops, etc, control the load...
+
+ off_errors = 0; // errors for this byte_offset, all ranks
+
+ active_ranks = 0;
+
+ for (rankx = 0; rankx < 4; rankx++) {
+ if (!(rank_mask & (1 << rankx)))
+ continue;
+
+ phys_addr = hw_rank_offset * active_ranks;
+ // FIXME: now done by test_dram_byte_hw()
+ //phys_addr |= (lmc << 7);
+ //phys_addr = bdk_numa_get_address(node, phys_addr); // map to node
+
+ active_ranks++;
+
+ // NOTE: return is a now a bitmask of the erroring bytelanes..
+ errors[rankx] = test_dram_byte_hw(node, lmc, phys_addr, mode, NULL);
+
+ for (byte = byte_lo; byte <= byte_hi; byte++) { // do bytelane(s)
+
+ // check errors
+ if (errors[rankx] & (1 << byte)) { // yes, an error in the byte lane in this rank
+ off_errors |= (1 << byte);
+
+ ddr_print5("N%d.LMC%d.R%d: Bytelane %d DLL %s Offset Test %3d: Address 0x%012lx errors 0x%x\n",
+ node, lmc, rankx, bytelane, mode_str,
+ byte_offset, phys_addr, errors[rankx]);
+
+ if (rank_delay_count[rankx][byte] > 0) { // had started run
+ ddr_print5("N%d.LMC%d.R%d: Bytelane %d DLL %s Offset Test %3d: stopping a run here\n",
+ node, lmc, rankx, bytelane, mode_str, byte_offset);
+ rank_delay_count[rankx][byte] = 0; // stop now
+ }
+ // FIXME: else had not started run - nothing else to do?
+ } else { // no error in the byte lane
+ if (rank_delay_count[rankx][byte] == 0) { // first success, set run start
+ ddr_print5("N%d.LMC%d.R%d: Bytelane %d DLL %s Offset Test %3d: starting a run here\n",
+ node, lmc, rankx, bytelane, mode_str, byte_offset);
+ rank_delay_start[rankx][byte] = byte_offset;
+ }
+ rank_delay_count[rankx][byte] += BYTE_OFFSET_INCR; // bump run length
+
+ // is this now the biggest window?
+ if (rank_delay_count[rankx][byte] > rank_delay_best_count[rankx][byte]) {
+ rank_delay_best_count[rankx][byte] = rank_delay_count[rankx][byte];
+ rank_delay_best_start[rankx][byte] = rank_delay_start[rankx][byte];
+ debug_print("N%d.LMC%d.R%d: Bytelane %d DLL %s Offset Test %3d: updating best to %d/%d\n",
+ node, lmc, rankx, bytelane, mode_str, byte_offset,
+ rank_delay_best_start[rankx][byte], rank_delay_best_count[rankx][byte]);
+ }
+ }
+ } /* for (byte = byte_lo; byte <= byte_hi; byte++) */
+ } /* for (rankx = 0; rankx < 4; rankx++) */
+
+ tot_errors |= off_errors;
+
+ } /* for (byte_offset = -63; byte_offset < 64; byte_offset += BYTE_OFFSET_INCR) */
+
+ // now choose the best byte_offsets for this pattern according to the best windows of the tested ranks
+ // calculate offset by constructing an average window from the rank windows
+ for (byte = byte_lo; byte <= byte_hi; byte++) {
+
+ pat_beg = -999;
+ pat_end = 999;
+
+ for (rankx = 0; rankx < 4; rankx++) {
+ if (!(rank_mask & (1 << rankx)))
+ continue;
+
+ rank_beg = rank_delay_best_start[rankx][byte];
+ pat_beg = max(pat_beg, rank_beg);
+ rank_end = rank_beg + rank_delay_best_count[rankx][byte] - BYTE_OFFSET_INCR;
+ pat_end = min(pat_end, rank_end);
+
+ ddr_print5("N%d.LMC%d.R%d: Bytelane %d DLL %s Offset Test: Rank Window %3d:%3d\n",
+ node, lmc, rankx, bytelane, mode_str, rank_beg, rank_end);
+
+ } /* for (rankx = 0; rankx < 4; rankx++) */
+
+ pat_best_offset[byte] = (pat_end + pat_beg) / 2;
+ ddr_print4("N%d.LMC%d: Bytelane %d DLL %s Offset Test: Pattern %d Average %3d\n",
+ node, lmc, byte, mode_str, pattern, pat_best_offset[byte]);
+
+#if 0
+ // FIXME: next print the window counts
+ sprintf(sbuffer, "N%d.LMC%d Pattern %d: DLL %s Offset Count ",
+ node, lmc, pattern, mode_str);
+ printf("%-45s : ", sbuffer);
+ printf(" %3d", byte_delay_best_count);
+ printf("\n");
+#endif
+
+ new_best_offset[byte] += pat_best_offset[byte]; // sum the pattern averages
+ } /* for (byte = byte_lo; byte <= byte_hi; byte++) */
+ } /* for (pattern = 0; pattern < NUM_BYTE_PATTERNS; pattern++) */
+ // end of pattern loop
+
+ ddr_print("N%d.LMC%d: HW DLL %s Offset Amount : ",
+ node, lmc, mode_str);
+
+ for (byte = byte_hi; byte >= byte_lo; --byte) { // print in decending byte index order
+ new_best_offset[byte] = divide_nint(new_best_offset[byte], NUM_BYTE_PATTERNS); // create the new average NINT
+
+ // print the best offsets from all patterns
+
+ if (bytelane == 0x0A) // print just the offset of all the bytes
+ ddr_print("%5d ", new_best_offset[byte]);
+ else
+ ddr_print("(byte %d) %5d ", byte, new_best_offset[byte]);
+
+
+#if 1
+ // done with testing, load up the best offsets we found...
+ change_dll_offset_enable(node, lmc, 0); // disable offsets while we load...
+ load_dll_offset(node, lmc, dll_offset_mode, new_best_offset[byte], byte);
+ change_dll_offset_enable(node, lmc, 1); // re-enable the offsets now that we are done loading
+#endif
+ } /* for (byte = byte_hi; byte >= byte_lo; --byte) */
+
+ ddr_print("\n");
+
+#if 0
+ // run the test one last time
+ // print whether there are errors or not, but only when verbose...
+ tot_errors = run_test_dram_byte_threads(node, num_lmcs, bytemask);
+ printf("N%d.LMC%d: Bytelane %d DLL %s Offset Final Test: errors 0x%x\n",
+ node, lmc, bytelane, mode_str, tot_errors);
+#endif
+}
+
+/*
+ * Automatically adjust the DLL offset for the selected bytelane using hardware-assist
+ */
+int perform_HW_dll_offset_tuning(bdk_node_t node, int dll_offset_mode, int bytelane)
+{
+ int save_ecc_ena[4];
+ bdk_lmcx_config_t lmc_config;
+ int lmc, num_lmcs = __bdk_dram_get_num_lmc(node);
+ const char *s;
+ //bdk_lmcx_comp_ctl2_t comp_ctl2;
+ int loops = 1, loop;
+
+ // see if we want to do the tuning more than once per LMC...
+ if ((s = getenv("ddr_tune_ecc_loops"))) {
+ loops = strtoul(s, NULL, 0);
+ }
+
+ // allow override of the test repeats (bursts)
+ if ((s = getenv("ddr_tune_byte_bursts")) != NULL) {
+ dram_tune_byte_bursts = strtoul(s, NULL, 10);
+ }
+
+ // print current working values
+ ddr_print2("N%d: H/W Tuning for bytelane %d will use %d loops, %d bursts, and %d patterns.\n",
+ node, bytelane, loops, dram_tune_byte_bursts,
+ NUM_BYTE_PATTERNS);
+
+ // FIXME? get flag from LMC0 only
+ lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(0));
+
+ // do once for each active LMC
+
+ for (lmc = 0; lmc < num_lmcs; lmc++) {
+
+ ddr_print4("N%d: H/W Tuning: starting LMC%d bytelane %d tune.\n", node, lmc, bytelane);
+
+ /* Enable ECC for the HW tests */
+ // NOTE: we do enable ECC, but the HW tests used will not generate "visible" errors
+ lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(lmc));
+ save_ecc_ena[lmc] = lmc_config.s.ecc_ena;
+ lmc_config.s.ecc_ena = 1;
+ DRAM_CSR_WRITE(node, BDK_LMCX_CONFIG(lmc), lmc_config.u);
+ lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(lmc));
+
+ // testing is done on a single LMC at a time
+ // FIXME: for now, loop here to show what happens multiple times
+ for (loop = 0; loop < loops; loop++) {
+ /* Perform DLL offset tuning */
+ //auto_set_dll_offset(node, 1 /* 1=write */, lmc, bytelane);
+ hw_assist_test_dll_offset(node, 2 /* 2=read */, lmc, bytelane);
+ }
+
+ // perform cleanup on active LMC
+ ddr_print4("N%d: H/W Tuning: finishing LMC%d bytelane %d tune.\n", node, lmc, bytelane);
+
+ /* Restore ECC for DRAM tests */
+ lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(lmc));
+ lmc_config.s.ecc_ena = save_ecc_ena[lmc];
+ DRAM_CSR_WRITE(node, BDK_LMCX_CONFIG(lmc), lmc_config.u);
+ lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(lmc));
+
+ // finally, see if there are any read offset overrides after tuning
+ for (int by = 0; by < 9; by++) {
+ if ((s = lookup_env_parameter("ddr%d_tune_byte%d", lmc, by)) != NULL) {
+ int dllro = strtoul(s, NULL, 10);
+ change_dll_offset_enable(node, lmc, 0);
+ load_dll_offset(node, lmc, 2 /* 2=read */, dllro, by);
+ change_dll_offset_enable(node, lmc, 1);
+ }
+ }
+
+ } /* for (lmc = 0; lmc < num_lmcs; lmc++) */
+
+ // finish up...
+
+ return 0;
+
+} /* perform_HW_dll_offset_tuning */
diff --git a/src/vendorcode/cavium/bdk/libdram/dram-util.h b/src/vendorcode/cavium/bdk/libdram/dram-util.h
new file mode 100644
index 0000000000..f8ab6c1552
--- /dev/null
+++ b/src/vendorcode/cavium/bdk/libdram/dram-util.h
@@ -0,0 +1,96 @@
+/***********************license start***********************************
+* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights
+* reserved.
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are
+* met:
+*
+* * Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials provided
+* with the distribution.
+*
+* * Neither the name of Cavium Inc. nor the names of
+* its contributors may be used to endorse or promote products
+* derived from this software without specific prior written
+* permission.
+*
+* This Software, including technical data, may be subject to U.S. export
+* control laws, including the U.S. Export Administration Act and its
+* associated regulations, and may be subject to export or import
+* regulations in other countries.
+*
+* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
+* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR
+* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT
+* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY
+* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT
+* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES
+* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR
+* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT,
+* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK
+* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
+***********************license end**************************************/
+
+/**
+ * Small utility functions for use by libdram internally. These
+ * are not meant for users's of the libdram API.
+ */
+
+/**
+ * Standard min(a,b) macro
+ */
+#define min(X, Y) \
+ ({ typeof (X) __x = (X); \
+ typeof (Y) __y = (Y); \
+ (__x < __y) ? __x : __y; })
+
+/**
+ * Standard max(a,b) macro
+ */
+#define max(X, Y) \
+ ({ typeof (X) __x = (X); typeof(Y) __y = (Y); \
+ (__x > __y) ? __x : __y; })
+
+/**
+ * Absolute value of an integer
+ *
+ * @param v
+ *
+ * @return
+ */
+static inline int64_t _abs(int64_t v)
+{
+ return (v < 0) ? -v : v;
+}
+
+/**
+ * Sign of an integer
+ *
+ * @param v
+ *
+ * @return
+ */
+static inline int64_t _sign(int64_t v)
+{
+ return v < 0;
+}
+
+/**
+ * Divide and round results up to the next higher integer.
+ *
+ * @param dividend
+ * @param divisor
+ *
+ * @return
+ */
+static inline uint64_t divide_roundup(uint64_t dividend, uint64_t divisor)
+{
+ return (dividend + divisor - 1) / divisor;
+}
+
diff --git a/src/vendorcode/cavium/bdk/libdram/lib_octeon_shared.c b/src/vendorcode/cavium/bdk/libdram/lib_octeon_shared.c
new file mode 100644
index 0000000000..cdc799744f
--- /dev/null
+++ b/src/vendorcode/cavium/bdk/libdram/lib_octeon_shared.c
@@ -0,0 +1,2165 @@
+/***********************license start***********************************
+* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights
+* reserved.
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are
+* met:
+*
+* * Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials provided
+* with the distribution.
+*
+* * Neither the name of Cavium Inc. nor the names of
+* its contributors may be used to endorse or promote products
+* derived from this software without specific prior written
+* permission.
+*
+* This Software, including technical data, may be subject to U.S. export
+* control laws, including the U.S. Export Administration Act and its
+* associated regulations, and may be subject to export or import
+* regulations in other countries.
+*
+* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
+* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR
+* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT
+* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY
+* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT
+* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES
+* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR
+* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT,
+* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK
+* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
+***********************license end**************************************/
+
+/* $Revision: 102369 $ */
+
+#include <bdk.h>
+#include "libbdk-arch/bdk-csrs-l2c.h"
+#include "dram-internal.h"
+
+/* Define DDR_DEBUG to debug the DDR interface. This also enables the
+** output necessary for review by Cavium Inc., Inc. */
+/* #define DDR_DEBUG */
+
+
+static int global_ddr_clock_initialized = 0;
+static int global_ddr_memory_preserved = 0;
+
+#if 1
+uint64_t max_p1 = 0UL;
+#endif
+
+/*
+ * SDRAM Physical Address (figure 6-2 from the HRM)
+ * 7 6 3 2 0
+ * +---------+----+----------------------+---------------+--------+---+------+-----+
+ * | Dimm |Rank| Row | Col | Bank | C | Col | Bus |
+ * +---------+----+----------------------+---------------+--------+---+------+-----+
+ * | ^ | | ^ | | |
+ * 0 or 1 | | 12-18 bits | 6-8 bits | 1 or 2 bits
+ * bit | 0 or 1 bit LMC_CONFIG[ROW_LSB]+X | (X=1 or 2, resp)
+ * | |
+ * LMC_CONFIG[PBANK_LSB]+X 3 or 4 bits
+ *
+ * Bus = Selects the byte on the 72-bit DDR3 bus
+ * Col = Column Address for the memory part (10-12 bits)
+ * C = Selects the LMC that services the reference
+ * (2 bits for 4 LMC mode, 1 bit for 2 LMC mode; X=width)
+ * Bank = Bank Address for the memory part (DDR3=3 bits, DDR4=3 or 4 bits)
+ * Row = Row Address for the memory part (12-18 bits)
+ * Rank = Optional Rank Address for dual-rank DIMMs
+ * (present when LMC_CONFIG[RANK_ENA] is set)
+ * Dimm = Optional DIMM address (preseent with more than 1 DIMM)
+ */
+
+
+/**
+ * Divide and round results to the nearest integer.
+ *
+ * @param dividend
+ * @param divisor
+ *
+ * @return
+ */
+uint64_t divide_nint(uint64_t dividend, uint64_t divisor)
+{
+ uint64_t quotent, remainder;
+ quotent = dividend / divisor;
+ remainder = dividend % divisor;
+ return quotent + ((remainder * 2) >= divisor);
+}
+
+/* Sometimes the pass/fail results for all possible delay settings
+ * determined by the read-leveling sequence is too forgiving. This
+ * usually occurs for DCLK speeds below 300 MHz. As a result the
+ * passing range is exaggerated. This function accepts the bitmask
+ * results from the sequence and truncates the passing range to a
+ * reasonable range and recomputes the proper deskew setting.
+ */
+
+/* Default ODT config must disable ODT */
+/* Must be const (read only) so that the structure is in flash */
+const dimm_odt_config_t disable_odt_config[] = {
+ /* DDR4 needs an additional field in the struct (odt_mask2) */
+ /* DIMMS ODT_ENA ODT_MASK ODT_MASK1 ODT_MASK2 QS_DIC RODT_CTL */
+ /* ===== ======= ======== ========= ========= ====== ======== */
+ /* 1 */ { 0, 0x0000, {.u = 0x0000}, {.u = 0x0000}, 0, 0x0000 },
+ /* 2 */ { 0, 0x0000, {.u = 0x0000}, {.u = 0x0000}, 0, 0x0000 },
+ /* 3 */ { 0, 0x0000, {.u = 0x0000}, {.u = 0x0000}, 0, 0x0000 },
+ /* 4 */ { 0, 0x0000, {.u = 0x0000}, {.u = 0x0000}, 0, 0x0000 },
+};
+/* Memory controller setup function */
+static int init_octeon_dram_interface(bdk_node_t node,
+ const ddr_configuration_t *ddr_configuration,
+ uint32_t ddr_hertz,
+ uint32_t cpu_hertz,
+ uint32_t ddr_ref_hertz,
+ int board_type,
+ int board_rev_maj,
+ int board_rev_min,
+ int ddr_interface_num,
+ uint32_t ddr_interface_mask)
+{
+ uint32_t mem_size_mbytes = 0;
+ int lmc_restart_retries = 0;
+
+ const char *s;
+ if ((s = lookup_env_parameter("ddr_timing_hertz")) != NULL)
+ ddr_hertz = strtoul(s, NULL, 0);
+
+ restart_lmc_init:
+
+ /* Poke the watchdog timer so it doesn't expire during DRAM init */
+ bdk_watchdog_poke();
+
+ mem_size_mbytes = init_octeon3_ddr3_interface(node,
+ ddr_configuration,
+ ddr_hertz,
+ cpu_hertz,
+ ddr_ref_hertz,
+ board_type,
+ board_rev_maj,
+ board_rev_min,
+ ddr_interface_num,
+ ddr_interface_mask);
+#define DEFAULT_RESTART_RETRIES 3
+ if (mem_size_mbytes == 0) { // means restart is possible
+ if (lmc_restart_retries < DEFAULT_RESTART_RETRIES) {
+ lmc_restart_retries++;
+ ddr_print("N%d.LMC%d Configuration problem: attempting LMC reset and init restart %d\n",
+ node, ddr_interface_num, lmc_restart_retries);
+ // re-assert RESET first, as that is the assumption of the init code
+ if (!ddr_memory_preserved(node))
+ cn88xx_lmc_ddr3_reset(node, ddr_interface_num, LMC_DDR3_RESET_ASSERT);
+ goto restart_lmc_init;
+ } else {
+ error_print("INFO: N%d.LMC%d Configuration: fatal problem remains after %d LMC init retries - Resetting node...\n",
+ node, ddr_interface_num, lmc_restart_retries);
+ bdk_wait_usec(500000);
+ bdk_reset_chip(node);
+ }
+ }
+
+ error_print("N%d.LMC%d Configuration Completed: %d MB\n",
+ node, ddr_interface_num, mem_size_mbytes);
+ return mem_size_mbytes;
+}
+
+#define DO_LIKE_RANDOM_XOR 1
+
+#if !DO_LIKE_RANDOM_XOR
+/*
+ * Suggested testing patterns.
+ *
+ * 0xFFFF_FFFF_FFFF_FFFF
+ * 0xAAAA_AAAA_AAAA_AAAA
+ * 0xFFFF_FFFF_FFFF_FFFF
+ * 0xAAAA_AAAA_AAAA_AAAA
+ * 0x5555_5555_5555_5555
+ * 0xAAAA_AAAA_AAAA_AAAA
+ * 0xFFFF_FFFF_FFFF_FFFF
+ * 0xAAAA_AAAA_AAAA_AAAA
+ * 0xFFFF_FFFF_FFFF_FFFF
+ * 0x5555_5555_5555_5555
+ * 0xFFFF_FFFF_FFFF_FFFF
+ * 0x5555_5555_5555_5555
+ * 0xAAAA_AAAA_AAAA_AAAA
+ * 0x5555_5555_5555_5555
+ * 0xFFFF_FFFF_FFFF_FFFF
+ * 0x5555_5555_5555_5555
+ *
+ * or possibly
+ *
+ * 0xFDFD_FDFD_FDFD_FDFD
+ * 0x8787_8787_8787_8787
+ * 0xFEFE_FEFE_FEFE_FEFE
+ * 0xC3C3_C3C3_C3C3_C3C3
+ * 0x7F7F_7F7F_7F7F_7F7F
+ * 0xE1E1_E1E1_E1E1_E1E1
+ * 0xBFBF_BFBF_BFBF_BFBF
+ * 0xF0F0_F0F0_F0F0_F0F0
+ * 0xDFDF_DFDF_DFDF_DFDF
+ * 0x7878_7878_7878_7878
+ * 0xEFEF_EFEF_EFEF_EFEF
+ * 0x3C3C_3C3C_3C3C_3C3C
+ * 0xF7F7_F7F7_F7F7_F7F7
+ * 0x1E1E_1E1E_1E1E_1E1E
+ * 0xFBFB_FBFB_FBFB_FBFB
+ * 0x0F0F_0F0F_0F0F_0F0F
+ */
+
+static const uint64_t test_pattern[] = {
+ 0xAAAAAAAAAAAAAAAAULL,
+ 0x5555555555555555ULL,
+ 0xAAAAAAAAAAAAAAAAULL,
+ 0x5555555555555555ULL,
+ 0xAAAAAAAAAAAAAAAAULL,
+ 0x5555555555555555ULL,
+ 0xAAAAAAAAAAAAAAAAULL,
+ 0x5555555555555555ULL,
+ 0xAAAAAAAAAAAAAAAAULL,
+ 0x5555555555555555ULL,
+ 0xAAAAAAAAAAAAAAAAULL,
+ 0x5555555555555555ULL,
+ 0xAAAAAAAAAAAAAAAAULL,
+ 0x5555555555555555ULL,
+ 0xAAAAAAAAAAAAAAAAULL,
+ 0x5555555555555555ULL,
+ 0xAAAAAAAAAAAAAAAAULL,
+ 0x5555555555555555ULL,
+ 0xAAAAAAAAAAAAAAAAULL,
+ 0x5555555555555555ULL,
+ 0xAAAAAAAAAAAAAAAAULL,
+ 0x5555555555555555ULL,
+ 0xAAAAAAAAAAAAAAAAULL,
+ 0x5555555555555555ULL,
+ 0xAAAAAAAAAAAAAAAAULL,
+ 0x5555555555555555ULL,
+ 0xAAAAAAAAAAAAAAAAULL,
+ 0x5555555555555555ULL,
+ 0xAAAAAAAAAAAAAAAAULL,
+ 0x5555555555555555ULL,
+ 0xAAAAAAAAAAAAAAAAULL,
+ 0x5555555555555555ULL,
+};
+#endif /* !DO_LIKE_RANDOM_XOR */
+
+int test_dram_byte(bdk_node_t node, int lmc, uint64_t p, uint64_t bitmask, uint64_t *xor_data)
+{
+ uint64_t p1, p2, d1, d2;
+ uint64_t v, v1;
+ uint64_t p2offset = 0x10000000;
+ uint64_t datamask;
+ uint64_t xor;
+ int i, j, k;
+ int errors = 0;
+ int index;
+#if DO_LIKE_RANDOM_XOR
+ uint64_t pattern1 = bdk_rng_get_random64();
+ uint64_t this_pattern;
+#endif
+ uint64_t bad_bits[2] = {0,0};
+
+ // When doing in parallel, the caller must provide full 8-byte bitmask.
+ // Byte lanes may be clear in the mask to indicate no testing on that lane.
+ datamask = bitmask;
+
+ // final address must include LMC and node
+ p |= (lmc<<7); /* Map address into proper interface */
+ p = bdk_numa_get_address(node, p); /* Map to node */
+
+ // Not on THUNDER: p |= 1ull<<63;
+
+ /* Add offset to both test regions to not clobber boot stuff
+ * when running from L2.
+ */
+ p += 0x10000000; // FIXME? was: 0x4000000; // make sure base is out of the way of boot
+
+ /* The loop ranges and increments walk through a range of addresses avoiding bits that alias
+ * to different memory interfaces (LMCs) on the CN88XX; ie we want to limit activity to a
+ * single memory channel.
+ */
+
+ /* Store something into each location first */
+ // NOTE: the ordering of loops is purposeful: fill full cachelines and flush
+ for (k = 0; k < (1 << 20); k += (1 << 14)) {
+ for (j = 0; j < (1 << 12); j += (1 << 9)) {
+ for (i = 0; i < (1 << 7); i += 8) {
+ index = i + j + k;
+ p1 = p + index;
+ p2 = p1 + p2offset;
+
+#if DO_LIKE_RANDOM_XOR
+ v = pattern1 * p1;
+ v1 = v; // write the same thing to both areas
+#else
+ v = 0ULL;
+ v1 = v;
+#endif
+ __bdk_dram_write64(p1, v);
+ __bdk_dram_write64(p2, v1);
+
+ /* Write back and invalidate the cache lines
+ *
+ * For OCX we cannot limit the number of L2 ways
+ * so instead we just write back and invalidate
+ * the L2 cache lines. This is not possible
+ * when booting remotely, however so this is
+ * only enabled for U-Boot right now.
+ * Potentially the BDK can also take advantage
+ * of this.
+ */
+ BDK_CACHE_WBI_L2(p1);
+ BDK_CACHE_WBI_L2(p2);
+ }
+ }
+ }
+
+ BDK_DCACHE_INVALIDATE;
+
+#if DO_LIKE_RANDOM_XOR
+ this_pattern = bdk_rng_get_random64();
+#endif
+
+ // modify the contents of each location in some way
+ // NOTE: the ordering of loops is purposeful: modify full cachelines and flush
+ for (k = 0; k < (1 << 20); k += (1 << 14)) {
+ for (j = 0; j < (1 << 12); j += (1 << 9)) {
+ for (i = 0; i < (1 << 7); i += 8) {
+ index = i + j + k;
+ p1 = p + index;
+ p2 = p1 + p2offset;
+#if DO_LIKE_RANDOM_XOR
+ v = __bdk_dram_read64(p1) ^ this_pattern;
+ v1 = __bdk_dram_read64(p2) ^ this_pattern;
+#else
+ v = test_pattern[index%(sizeof(test_pattern)/sizeof(uint64_t))];
+ v &= datamask;
+ v1 = ~v;
+#endif
+
+ debug_print("[0x%016llX]: 0x%016llX, [0x%016llX]: 0x%016llX\n",
+ p1, v, p2, v1);
+
+ __bdk_dram_write64(p1, v);
+ __bdk_dram_write64(p2, v1);
+
+ /* Write back and invalidate the cache lines
+ *
+ * For OCX we cannot limit the number of L2 ways
+ * so instead we just write back and invalidate
+ * the L2 cache lines. This is not possible
+ * when booting remotely, however so this is
+ * only enabled for U-Boot right now.
+ * Potentially the BDK can also take advantage
+ * of this.
+ */
+ BDK_CACHE_WBI_L2(p1);
+ BDK_CACHE_WBI_L2(p2);
+ }
+ }
+ }
+
+ BDK_DCACHE_INVALIDATE;
+
+ // test the contents of each location by predicting what should be there
+ // NOTE: the ordering of loops is purposeful: test full cachelines to detect
+ // an error occuring in any slot thereof
+ for (k = 0; k < (1 << 20); k += (1 << 14)) {
+ for (j = 0; j < (1 << 12); j += (1 << 9)) {
+ for (i = 0; i < (1 << 7); i += 8) {
+ index = i + j + k;
+ p1 = p + index;
+ p2 = p1 + p2offset;
+#if DO_LIKE_RANDOM_XOR
+ v = (p1 * pattern1) ^ this_pattern; // FIXME: this should predict what we find...???
+ d1 = __bdk_dram_read64(p1);
+ d2 = __bdk_dram_read64(p2);
+#else
+ v = test_pattern[index%(sizeof(test_pattern)/sizeof(uint64_t))];
+ d1 = __bdk_dram_read64(p1);
+ d2 = ~__bdk_dram_read64(p2);
+#endif
+ debug_print("[0x%016llX]: 0x%016llX, [0x%016llX]: 0x%016llX\n",
+ p1, d1, p2, d2);
+
+ xor = ((d1 ^ v) | (d2 ^ v)) & datamask; // union of error bits only in active byte lanes
+
+ if (!xor)
+ continue;
+
+ // accumulate bad bits
+ bad_bits[0] |= xor;
+ //bad_bits[1] |= ~mpr_data1 & 0xffUL; // cannot do ECC here
+
+ int bybit = 1;
+ uint64_t bymsk = 0xffULL; // start in byte lane 0
+ while (xor != 0) {
+ debug_print("ERROR: [0x%016llX] [0x%016llX] expected 0x%016llX xor %016llX\n",
+ p1, p2, v, xor);
+ if (xor & bymsk) { // error(s) in this lane
+ errors |= bybit; // set the byte error bit
+ xor &= ~bymsk; // clear byte lane in error bits
+ datamask &= ~bymsk; // clear the byte lane in the mask
+ if (datamask == 0) { // nothing left to do
+ goto done_now; // completely done when errors found in all byte lanes in datamask
+ }
+ }
+ bymsk <<= 8; // move mask into next byte lane
+ bybit <<= 1; // move bit into next byte position
+ }
+ }
+ }
+ }
+
+ done_now:
+ if (xor_data != NULL) { // send the bad bits back...
+ xor_data[0] = bad_bits[0];
+ xor_data[1] = bad_bits[1]; // let it be zeroed
+ }
+ return errors;
+}
+
+// NOTE: "mode" argument:
+// DBTRAIN_TEST: for testing using GP patterns, includes ECC
+// DBTRAIN_DBI: for DBI deskew training behavior (uses GP patterns)
+// DBTRAIN_LFSR: for testing using LFSR patterns, includes ECC
+// NOTE: trust the caller to specify the correct/supported mode
+//
+int test_dram_byte_hw(bdk_node_t node, int ddr_interface_num,
+ uint64_t p, int mode, uint64_t *xor_data)
+{
+ uint64_t p1;
+ uint64_t k;
+ int errors = 0;
+
+ uint64_t mpr_data0, mpr_data1;
+ uint64_t bad_bits[2] = {0,0};
+
+ int node_address, lmc, dimm;
+ int prank, lrank;
+ int bank, row, col;
+ int save_or_dis;
+ int byte;
+ int ba_loop, ba_bits;
+
+ bdk_lmcx_rlevel_ctl_t rlevel_ctl;
+ bdk_lmcx_dbtrain_ctl_t dbtrain_ctl;
+
+ int bank_errs;
+
+ // FIXME: K iterations set to 4 for now.
+ // FIXME: decrement to increase interations.
+ // FIXME: must be no less than 22 to stay above an LMC hash field.
+ int kshift = 26;
+ const char *s;
+
+ // allow override default setting for kshift
+ if ((s = getenv("ddr_tune_set_kshift")) != NULL) {
+ int temp = strtoul(s, NULL, 0);
+ if ((temp < 22) || (temp > 27)) {
+ ddr_print("N%d.LMC%d: ILLEGAL override of kshift to %d, using default %d\n",
+ node, ddr_interface_num, temp, kshift);
+ } else {
+ VB_PRT(VBL_DEV2, "N%d.LMC%d: overriding kshift (%d) to %d\n",
+ node, ddr_interface_num, kshift, temp);
+ kshift = temp;
+ }
+ }
+
+ /*
+ 1) Make sure that RLEVEL_CTL[OR_DIS] = 0.
+ */
+ rlevel_ctl.u = BDK_CSR_READ(node, BDK_LMCX_RLEVEL_CTL(ddr_interface_num));
+ save_or_dis = rlevel_ctl.s.or_dis;
+ rlevel_ctl.s.or_dis = 0; /* or_dis must be disabled for this sequence */
+ DRAM_CSR_WRITE(node, BDK_LMCX_RLEVEL_CTL(ddr_interface_num), rlevel_ctl.u);
+
+ /*
+ NOTE: this step done in the calling routine(s)
+ 3) Setup GENERAL_PURPOSE[0-2] registers with the data pattern of choice.
+ a. GENERAL_PURPOSE0[DATA<63:0>] – sets the initial lower (rising edge) 64 bits of data.
+ b. GENERAL_PURPOSE1[DATA<63:0>] – sets the initial upper (falling edge) 64 bits of data.
+ c. GENERAL_PURPOSE2[DATA<15:0>] – sets the initial lower (rising edge <7:0>) and upper
+ (falling edge <15:8>) ECC data.
+ */
+
+ // final address must include LMC and node
+ p |= (ddr_interface_num << 7); /* Map address into proper interface */
+ p = bdk_numa_get_address(node, p); /* Map to node */
+
+ /*
+ * Add base offset to both test regions to not clobber u-boot stuff
+ * when running from L2 for NAND boot.
+ */
+ p += 0x10000000; // offset to 256MB
+
+ errors = 0;
+
+ bdk_dram_address_extract_info(p, &node_address, &lmc, &dimm, &prank, &lrank, &bank, &row, &col);
+ VB_PRT(VBL_DEV2, "test_dram_byte_hw: START at A:0x%012lx, N%d L%d D%d R%d/%d B%1x Row:%05x Col:%05x\n",
+ p, node_address, lmc, dimm, prank, lrank, bank, row, col);
+
+ // only check once per call, and ignore if no match...
+ if ((int)node != node_address) {
+ error_print("ERROR: Node address mismatch; ignoring...\n");
+ return 0;
+ }
+ if (lmc != ddr_interface_num) {
+ error_print("ERROR: LMC address mismatch\n");
+ return 0;
+ }
+
+ /*
+ 7) Set PHY_CTL[PHY_RESET] = 1 (LMC automatically clears this as it’s a one-shot operation).
+ This is to get into the habit of resetting PHY’s SILO to the original 0 location.
+ */
+ BDK_CSR_MODIFY(phy_ctl, node, BDK_LMCX_PHY_CTL(ddr_interface_num),
+ phy_ctl.s.phy_reset = 1);
+
+ /* Walk through a range of addresses avoiding bits that alias
+ * interfaces on the CN88XX.
+ */
+
+ // FIXME: want to try to keep the K increment from affecting the LMC via hash,
+ // FIXME: so keep it above bit 21
+ // NOTE: we also want to keep k less than the base offset of bit 28 (256MB)
+
+ for (k = 0; k < (1UL << 28); k += (1UL << kshift)) {
+
+ // FIXME: the sequence will interate over 1/2 cacheline
+ // FIXME: for each unit specified in "read_cmd_count",
+ // FIXME: so, we setup each sequence to do the max cachelines it can
+
+ p1 = p + k;
+
+ bdk_dram_address_extract_info(p1, &node_address, &lmc, &dimm, &prank, &lrank, &bank, &row, &col);
+ VB_PRT(VBL_DEV3, "test_dram_byte_hw: NEXT interation at A:0x%012lx, N%d L%d D%d R%d/%d B%1x Row:%05x Col:%05x\n",
+ p1, node_address, lmc, dimm, prank, lrank, bank, row, col);
+
+ /*
+ 2) Setup the fields of the CSR DBTRAIN_CTL as follows:
+ a. COL, ROW, BA, BG, PRANK points to the starting point of the address.
+ You can just set them to all 0.
+ b. RW_TRAIN – set this to 1.
+ c. TCCD_L – set this to 0.
+ d. READ_CMD_COUNT – instruct the sequence to the how many writes/reads.
+ It is 5 bits field, so set to 31 of maximum # of r/w.
+ */
+ dbtrain_ctl.u = BDK_CSR_READ(node, BDK_LMCX_DBTRAIN_CTL(ddr_interface_num));
+ dbtrain_ctl.s.column_a = col;
+ dbtrain_ctl.s.row_a = row;
+ dbtrain_ctl.s.bg = (bank >> 2) & 3;
+ dbtrain_ctl.s.prank = (dimm * 2) + prank; // FIXME?
+ dbtrain_ctl.s.lrank = lrank; // FIXME?
+ dbtrain_ctl.s.activate = (mode == DBTRAIN_DBI);
+ dbtrain_ctl.s.write_ena = 1;
+ dbtrain_ctl.s.read_cmd_count = 31; // max count pass 1.x
+ if (! CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X)) // added 81xx and 83xx
+ dbtrain_ctl.s.cmd_count_ext = 3; // max count pass 2.x
+ else
+ dbtrain_ctl.s.cmd_count_ext = 0; // max count pass 1.x
+ dbtrain_ctl.s.rw_train = 1;
+ dbtrain_ctl.s.tccd_sel = (mode == DBTRAIN_DBI);
+
+ // LFSR should only be on when chip supports it...
+ dbtrain_ctl.s.lfsr_pattern_sel = (mode == DBTRAIN_LFSR) ? 1 : 0;
+
+ bank_errs = 0;
+
+ // for each address, iterate over the 4 "banks" in the BA
+ for (ba_loop = 0, ba_bits = bank & 3;
+ ba_loop < 4;
+ ba_loop++, ba_bits = (ba_bits + 1) & 3)
+ {
+ dbtrain_ctl.s.ba = ba_bits;
+ DRAM_CSR_WRITE(node, BDK_LMCX_DBTRAIN_CTL(ddr_interface_num), dbtrain_ctl.u);
+
+ VB_PRT(VBL_DEV3, "test_dram_byte_hw: DBTRAIN: Pr:%d Lr:%d Bg:%d Ba:%d Row:%05x Col:%05x\n",
+ dbtrain_ctl.s.prank, dbtrain_ctl.s.lrank,
+ dbtrain_ctl.s.bg, dbtrain_ctl.s.ba, row, col);
+ /*
+ 4) Kick off the sequence (SEQ_CTL[SEQ_SEL] = 14, SEQ_CTL[INIT_START] = 1).
+ 5) Poll on SEQ_CTL[SEQ_COMPLETE] for completion.
+ */
+ perform_octeon3_ddr3_sequence(node, prank, ddr_interface_num, 14);
+
+ /*
+ 6) Read MPR_DATA0 and MPR_DATA1 for results:
+ a. MPR_DATA0[MPR_DATA<63:0>] – comparison results for DQ63:DQ0.
+ (1 means MATCH, 0 means FAIL).
+ b. MPR_DATA1[MPR_DATA<7:0>] – comparison results for ECC bit7:0.
+ */
+ mpr_data0 = BDK_CSR_READ(node, BDK_LMCX_MPR_DATA0(ddr_interface_num));
+ mpr_data1 = BDK_CSR_READ(node, BDK_LMCX_MPR_DATA1(ddr_interface_num));
+
+ /*
+ 7) Set PHY_CTL[PHY_RESET] = 1 (LMC automatically clears this as it’s a one-shot operation).
+ This is to get into the habit of resetting PHY’s SILO to the original 0 location.
+ */
+ BDK_CSR_MODIFY(phy_ctl, node, BDK_LMCX_PHY_CTL(ddr_interface_num),
+ phy_ctl.s.phy_reset = 1);
+
+ if (mode == DBTRAIN_DBI)
+ continue; // bypass any error checking or updating when DBI mode
+
+ // data bytes
+ if (~mpr_data0) {
+ for (byte = 0; byte < 8; byte++) {
+ if ((~mpr_data0 >> (8 * byte)) & 0xffUL)
+ bank_errs |= (1 << byte);
+ }
+ // accumulate bad bits
+ bad_bits[0] |= ~mpr_data0;
+ }
+
+ // include ECC byte errors
+ if (~mpr_data1 & 0xffUL) {
+ bank_errs |= (1 << 8);
+ bad_bits[1] |= ~mpr_data1 & 0xffUL;
+ }
+
+ } /* for (int ba_loop = 0; ba_loop < 4; ba_loop++) */
+
+ errors |= bank_errs;
+
+ } /* end for (k=...) */
+
+ rlevel_ctl.s.or_dis = save_or_dis;
+ DRAM_CSR_WRITE(node, BDK_LMCX_RLEVEL_CTL(ddr_interface_num), rlevel_ctl.u);
+
+ if ((mode != DBTRAIN_DBI) && (xor_data != NULL)) { // send the bad bits back...
+ xor_data[0] = bad_bits[0];
+ xor_data[1] = bad_bits[1];
+ }
+
+ return errors;
+}
+
+static void set_ddr_memory_preserved(bdk_node_t node)
+{
+ global_ddr_memory_preserved |= 0x1 << node;
+
+}
+int ddr_memory_preserved(bdk_node_t node)
+{
+ return (global_ddr_memory_preserved & (0x1 << node)) != 0;
+}
+
+void perform_ddr_init_sequence(bdk_node_t node, int rank_mask,
+ int ddr_interface_num)
+{
+ const char *s;
+ int ddr_init_loops = 1;
+ int rankx;
+
+ if ((s = lookup_env_parameter("ddr%d_init_loops", ddr_interface_num)) != NULL)
+ ddr_init_loops = strtoul(s, NULL, 0);
+
+ while (ddr_init_loops--) {
+ for (rankx = 0; rankx < 8; rankx++) {
+ if (!(rank_mask & (1 << rankx)))
+ continue;
+
+ perform_octeon3_ddr3_sequence(node, (1 << rankx),
+ ddr_interface_num, 0); /* power-up/init */
+
+ bdk_wait_usec(1000); /* Wait a while. */
+
+ if ((s = lookup_env_parameter("ddr_sequence1")) != NULL) {
+ int sequence1;
+ sequence1 = strtoul(s, NULL, 0);
+ perform_octeon3_ddr3_sequence(node, (1 << rankx),
+ ddr_interface_num, sequence1);
+ }
+
+ if ((s = lookup_env_parameter("ddr_sequence2")) != NULL) {
+ int sequence2;
+ sequence2 = strtoul(s, NULL, 0);
+ perform_octeon3_ddr3_sequence(node, (1 << rankx),
+ ddr_interface_num, sequence2);
+ }
+ }
+ }
+}
+
+static void set_ddr_clock_initialized(bdk_node_t node, int ddr_interface, int inited_flag)
+{
+ int bit = node * 8 + ddr_interface;
+ if (inited_flag)
+ global_ddr_clock_initialized |= (0x1 << bit);
+ else
+ global_ddr_clock_initialized &= ~(0x1 << bit);
+}
+static int ddr_clock_initialized(bdk_node_t node, int ddr_interface)
+{
+ int bit = node * 8 + ddr_interface;
+ return (!!(global_ddr_clock_initialized & (0x1 << bit)));
+}
+
+
+static void cn78xx_lmc_dreset_init (bdk_node_t node, int ddr_interface_num)
+{
+ /*
+ * This is the embodiment of the 6.9.4 LMC DRESET Initialization section below.
+ *
+ * The remainder of this section describes the sequence for LMCn.
+ *
+ * 1. If not done already, write LMC(0..3)_DLL_CTL2 to its reset value
+ * (except without changing the LMC(0..3)_DLL_CTL2[INTF_EN] value from
+ * that set in the prior Step 3), including LMC(0..3)_DLL_CTL2[DRESET] = 1.
+ *
+ * 2. Without changing any other LMC(0..3)_DLL_CTL2 fields, write
+ * LMC(0..3)_DLL_CTL2[DLL_BRINGUP] = 1.
+ */
+
+ DRAM_CSR_MODIFY(c, node, BDK_LMCX_DLL_CTL2(ddr_interface_num),
+ c.s.dll_bringup = 1);
+
+ /*
+ * 3. Read LMC(0..3)_DLL_CTL2 and wait for the result.
+ */
+
+ BDK_CSR_READ(node, BDK_LMCX_DLL_CTL2(ddr_interface_num));
+
+ /*
+ * 4. Wait for a minimum of 10 LMC CK cycles.
+ */
+
+ bdk_wait_usec(1);
+
+ /*
+ * 5. Without changing any other fields in LMC(0..3)_DLL_CTL2, write
+ * LMC(0..3)_DLL_CTL2[QUAD_DLL_ENA] = 1.
+ * LMC(0..3)_DLL_CTL2[QUAD_DLL_ENA] must not change after this point
+ * without restarting the LMCn DRESET initialization sequence.
+ */
+
+ DRAM_CSR_MODIFY(c, node, BDK_LMCX_DLL_CTL2(ddr_interface_num),
+ c.s.quad_dll_ena = 1);
+
+ /*
+ * 6. Read LMC(0..3)_DLL_CTL2 and wait for the result.
+ */
+
+ BDK_CSR_READ(node, BDK_LMCX_DLL_CTL2(ddr_interface_num));
+
+ /*
+ * 7. Wait a minimum of 10 us.
+ */
+
+ bdk_wait_usec(10);
+
+ /*
+ * 8. Without changing any other fields in LMC(0..3)_DLL_CTL2, write
+ * LMC(0..3)_DLL_CTL2[DLL_BRINGUP] = 0.
+ * LMC(0..3)_DLL_CTL2[DLL_BRINGUP] must not change after this point
+ * without restarting the LMCn DRESET initialization sequence.
+ */
+
+ DRAM_CSR_MODIFY(c, node, BDK_LMCX_DLL_CTL2(ddr_interface_num),
+ c.s.dll_bringup = 0);
+
+ /*
+ * 9. Read LMC(0..3)_DLL_CTL2 and wait for the result.
+ */
+
+ BDK_CSR_READ(node, BDK_LMCX_DLL_CTL2(ddr_interface_num));
+
+ /*
+ * 10. Without changing any other fields in LMC(0..3)_DLL_CTL2, write
+ * LMC(0..3)_DLL_CTL2[DRESET] = 0.
+ * LMC(0..3)_DLL_CTL2[DRESET] must not change after this point without
+ * restarting the LMCn DRESET initialization sequence.
+ *
+ * After completing LMCn DRESET initialization, all LMC CSRs may be
+ * accessed. Prior to completing LMC DRESET initialization, only
+ * LMC(0..3)_DDR_PLL_CTL, LMC(0..3)_DLL_CTL2, LMC(0..3)_RESET_CTL, and
+ * LMC(0..3)_COMP_CTL2 LMC CSRs can be accessed.
+ */
+
+ DRAM_CSR_MODIFY(c, node, BDK_LMCX_DLL_CTL2(ddr_interface_num),
+ c.s.dreset = 0);
+
+ /*
+ * NEW STEP - necessary for O73, O78 P2.0, O75, and T88 P2.0
+ * McBuggin: #24821
+ *
+ * 11. Wait for a minimum of 10 LMC CK cycles.
+ */
+
+ bdk_wait_usec(1);
+}
+
+/*static*/ void cn88xx_lmc_ddr3_reset(bdk_node_t node, int ddr_interface_num, int reset)
+{
+ /*
+ * 4. Deassert DDRn_RESET_L pin by writing LMC(0..3)_RESET_CTL[DDR3RST] = 1
+ * without modifying any other LMC(0..3)_RESET_CTL fields.
+ * 5. Read LMC(0..3)_RESET_CTL and wait for the result.
+ * 6. Wait a minimum of 500us. This guarantees the necessary T = 500us
+ * delay between DDRn_RESET_L deassertion and DDRn_DIMM*_CKE* assertion.
+ */
+ ddr_print("LMC%d %s DDR_RESET_L\n", ddr_interface_num,
+ (reset == LMC_DDR3_RESET_DEASSERT) ? "De-asserting" : "Asserting");
+ DRAM_CSR_MODIFY(c, node, BDK_LMCX_RESET_CTL(ddr_interface_num),
+ c.cn8.ddr3rst = reset);
+ BDK_CSR_READ(node, BDK_LMCX_RESET_CTL(ddr_interface_num));
+ bdk_wait_usec(500);
+}
+
+int initialize_ddr_clock(bdk_node_t node,
+ const ddr_configuration_t *ddr_configuration,
+ uint32_t cpu_hertz,
+ uint32_t ddr_hertz,
+ uint32_t ddr_ref_hertz,
+ int ddr_interface_num,
+ uint32_t ddr_interface_mask
+ )
+{
+ const char *s;
+
+ if (ddr_clock_initialized(node, ddr_interface_num))
+ return 0;
+
+ if (!ddr_clock_initialized(node, 0)) { /* Do this once */
+ int i;
+ bdk_lmcx_reset_ctl_t reset_ctl;
+ /* Check to see if memory is to be preserved and set global flag */
+ for (i=3; i>=0; --i) {
+ if ((ddr_interface_mask & (1 << i)) == 0)
+ continue;
+ reset_ctl.u = BDK_CSR_READ(node, BDK_LMCX_RESET_CTL(i));
+ if (reset_ctl.s.ddr3psv == 1) {
+ ddr_print("LMC%d Preserving memory\n", i);
+ set_ddr_memory_preserved(node);
+
+ /* Re-initialize flags */
+ reset_ctl.cn8.ddr3pwarm = 0;
+ reset_ctl.cn8.ddr3psoft = 0;
+ reset_ctl.s.ddr3psv = 0;
+ DRAM_CSR_WRITE(node, BDK_LMCX_RESET_CTL(i), reset_ctl.u);
+ }
+ }
+ }
+
+ if (CAVIUM_IS_MODEL(CAVIUM_CN8XXX)) {
+
+ bdk_lmcx_ddr_pll_ctl_t ddr_pll_ctl;
+ const dimm_config_t *dimm_config_table = ddr_configuration->dimm_config_table;
+
+ /* ddr_type only indicates DDR4 or DDR3 */
+ int ddr_type = get_ddr_type(node, &dimm_config_table[0]);
+
+ /*
+ * 6.9 LMC Initialization Sequence
+ *
+ * There are 14 parts to the LMC initialization procedure:
+ *
+ * 1. LMC interface enable initialization
+ *
+ * 2. DDR PLL initialization
+ *
+ * 3. LMC CK initialization
+ *
+ * 4. LMC DRESET initialization
+ *
+ * 5. LMC CK local initialization
+ *
+ * 6. LMC RESET initialization
+ *
+ * 7. Early LMC initialization
+ *
+ * 8. LMC offset training
+ *
+ * 9. LMC internal Vref training
+ *
+ * 10. LMC deskew training
+ *
+ * 11. LMC write leveling
+ *
+ * 12. LMC read leveling
+ *
+ * 13. DRAM Vref Training for DDR4
+ *
+ * 14. Final LMC initialization
+ *
+ * CN88XX supports two modes:
+ *
+ * ­ two-LMC mode: both LMCs 2/3 must not be enabled
+ * (LMC2/3_DLL_CTL2[DRESET] must be set to 1 and LMC2/3_DLL_CTL2[INTF_EN]
+ * must be set to 0) and both LMCs 0/1 must be enabled).
+ *
+ * ­ four-LMC mode: all four LMCs 0..3 must be enabled.
+ *
+ * Steps 4 and 6..14 should each be performed for each enabled LMC (either
+ * twice or four times). Steps 1..3 and 5 are more global in nature and
+ * each must be executed exactly once (not once per LMC) each time the
+ * DDR PLL changes or is first brought up. Steps 1..3 and 5 need not be
+ * performed if the DDR PLL is stable.
+ *
+ * Generally, the steps are performed in order. The exception is that the
+ * CK local initialization (step 5) must be performed after some DRESET
+ * initializations (step 4) and before other DRESET initializations when
+ * the DDR PLL is brought up or changed. (The CK local initialization
+ * uses information from some LMCs to bring up the other local CKs.) The
+ * following text describes these ordering requirements in more detail.
+ *
+ * Following any chip reset, the DDR PLL must be brought up, and all 14
+ * steps should be executed. Subsequently, it is possible to execute only
+ * steps 4 and 6..14, or to execute only steps 8..14.
+ *
+ * The remainder of this section covers these initialization steps in
+ * sequence.
+ */
+
+ if (ddr_interface_num == 0) { /* Do this once */
+ bdk_lmcx_dll_ctl2_t dll_ctl2;
+ int loop_interface_num;
+
+ /*
+ * 6.9.1 LMC Interface-Enable Initialization
+ *
+ * LMC interface-enable initialization (Step 1) must be performed only
+ * once, not once per LMC in four-LMC mode. This step is not required
+ * in two-LMC mode.
+ *
+ * Perform the following three substeps for the LMC interface-enable
+ * initialization:
+ *
+ * 1. Without changing any other LMC2_DLL_CTL2 fields (LMC(0..3)_DLL_CTL2
+ * should be at their reset values after Step 1), write
+ * LMC2_DLL_CTL2[INTF_EN] = 1 if four-LMC mode is desired.
+ *
+ * 2. Without changing any other LMC3_DLL_CTL2 fields, write
+ * LMC3_DLL_CTL2[INTF_EN] = 1 if four-LMC mode is desired.
+ *
+ * 3. Read LMC2_DLL_CTL2 and wait for the result.
+ *
+ * The LMC2_DLL_CTL2[INTF_EN] and LMC3_DLL_CTL2[INTF_EN] values should
+ * not be changed by software from this point.
+ *
+ */
+
+ /* Put all LMCs into DRESET here; these are the reset values... */
+ for (loop_interface_num = 0; loop_interface_num < 4; ++loop_interface_num) {
+ if ((ddr_interface_mask & (1 << loop_interface_num)) == 0)
+ continue;
+
+ dll_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL2(loop_interface_num));
+
+ dll_ctl2.s.byp_setting = 0;
+ dll_ctl2.s.byp_sel = 0;
+ dll_ctl2.s.quad_dll_ena = 0;
+ dll_ctl2.s.dreset = 1;
+ dll_ctl2.s.dll_bringup = 0;
+ dll_ctl2.s.intf_en = 0;
+
+ DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL2(loop_interface_num), dll_ctl2.u);
+ }
+
+ /* Now set INTF_EN for *ONLY* LMC2/3 if they are to be active on 88XX. */
+ /* Do *NOT* touch LMC0/1 INTF_EN=0 setting on 88XX. */
+ /* But we do have to set LMC1 INTF_EN=1 on 83XX if we want it active... */
+ /* Note that 81xx has only LMC0 so the mask should reflect that. */
+ for (loop_interface_num = (CAVIUM_IS_MODEL(CAVIUM_CN83XX)) ? 1 : 2;
+ loop_interface_num < 4; ++loop_interface_num) {
+ if ((ddr_interface_mask & (1 << loop_interface_num)) == 0)
+ continue;
+
+ DRAM_CSR_MODIFY(c, node, BDK_LMCX_DLL_CTL2(loop_interface_num),
+ c.s.intf_en = 1);
+ BDK_CSR_READ(node, BDK_LMCX_DLL_CTL2(loop_interface_num));
+ }
+
+ /*
+ * 6.9.2 DDR PLL Initialization
+ *
+ * DDR PLL initialization (Step 2) must be performed for each chip reset
+ * and whenever the DDR clock speed changes. This step needs to be
+ * performed only once, not once per LMC.
+ *
+ * Perform the following eight substeps to initialize the DDR PLL:
+ *
+ * 1. If not done already, write all fields in LMC(0..1)_DDR_PLL_CTL and
+ * LMC(0..1)_DLL_CTL2 to their reset values, including:
+ *
+ * .. LMC0_DDR_PLL_CTL[DDR_DIV_RESET] = 1
+ * .. LMC0_DLL_CTL2[DRESET] = 1
+ *
+ * This substep is not necessary after a chip reset.
+ *
+ */
+
+ ddr_pll_ctl.u = BDK_CSR_READ(node, BDK_LMCX_DDR_PLL_CTL(0));
+
+ ddr_pll_ctl.cn83xx.reset_n = 0;
+ ddr_pll_ctl.cn83xx.ddr_div_reset = 1;
+ ddr_pll_ctl.cn83xx.phy_dcok = 0;
+ ddr_pll_ctl.cn83xx.dclk_invert = 0;
+
+ // allow override of LMC0 desired setting for DCLK_INVERT
+ if ((s = lookup_env_parameter("ddr0_set_dclk_invert")) != NULL) {
+ ddr_pll_ctl.cn83xx.dclk_invert = !!strtoul(s, NULL, 0);
+ ddr_print("LMC0: override DDR_PLL_CTL[dclk_invert] to %d\n",
+ ddr_pll_ctl.cn83xx.dclk_invert);
+ }
+
+ // always write LMC0 CSR, it must be active
+ DRAM_CSR_WRITE(node, BDK_LMCX_DDR_PLL_CTL(0), ddr_pll_ctl.u);
+ ddr_print("%-45s : 0x%016lx\n", "LMC0: DDR_PLL_CTL", ddr_pll_ctl.u);
+
+ // only when LMC1 is active
+ // NOTE: 81xx has only 1 LMC, and 83xx can operate in 1-LMC mode
+ if (ddr_interface_mask & 0x2) {
+
+ ddr_pll_ctl.cn83xx.dclk_invert ^= 1; /* DEFAULT: Toggle dclk_invert from LMC0 */
+
+ // allow override of LMC1 desired setting for DCLK_INVERT
+ if ((s = lookup_env_parameter("ddr1_set_dclk_invert")) != NULL) {
+ ddr_pll_ctl.cn83xx.dclk_invert = !!strtoul(s, NULL, 0);
+ ddr_print("LMC1: override DDR_PLL_CTL[dclk_invert] to %d\n",
+ ddr_pll_ctl.cn83xx.dclk_invert);
+ }
+
+ // always write LMC1 CSR when it is active
+ DRAM_CSR_WRITE(node, BDK_LMCX_DDR_PLL_CTL(1), ddr_pll_ctl.u);
+ ddr_print("%-45s : 0x%016lx\n", "LMC1: DDR_PLL_CTL", ddr_pll_ctl.u);
+ }
+
+ /*
+ * 2. If the current DRAM contents are not preserved (see
+ * LMC(0..3)_RESET_ CTL[DDR3PSV]), this is also an appropriate time to
+ * assert the RESET# pin of the DDR3/DDR4 DRAM parts. If desired, write
+ * LMC0_RESET_ CTL[DDR3RST] = 0 without modifying any other
+ * LMC0_RESET_CTL fields to assert the DDR_RESET_L pin. No action is
+ * required here to assert DDR_RESET_L following a chip reset. Refer to
+ * Section 6.9.6. Do this for all enabled LMCs.
+ */
+
+ for (loop_interface_num = 0;
+ ( !ddr_memory_preserved(node)) && loop_interface_num < 4;
+ ++loop_interface_num)
+ {
+
+ if ((ddr_interface_mask & (1 << loop_interface_num)) == 0)
+ continue;
+
+ cn88xx_lmc_ddr3_reset(node, loop_interface_num, LMC_DDR3_RESET_ASSERT);
+ }
+
+ /*
+ * 3. Without changing any other LMC0_DDR_PLL_CTL values, write LMC0_DDR_
+ * PLL_CTL[CLKF] with a value that gives a desired DDR PLL speed. The
+ * LMC0_DDR_PLL_CTL[CLKF] value should be selected in conjunction with
+ * the post-scalar divider values for LMC (LMC0_DDR_PLL_CTL[DDR_PS_EN])
+ * so that the desired LMC CK speeds are is produced (all enabled LMCs
+ * must run the same speed). Section 5.14 describes
+ * LMC0_DDR_PLL_CTL[CLKF] and LMC0_DDR_PLL_CTL[DDR_PS_EN] programmings
+ * that produce the desired LMC CK speed. Section 6.9.3 describes LMC CK
+ * initialization, which can be done separately from the DDR PLL
+ * initialization described in this section.
+ *
+ * The LMC0_DDR_PLL_CTL[CLKF] value must not change after this point
+ * without restarting this SDRAM PLL initialization sequence.
+ */
+
+ {
+ /* CLKF = (DCLK * (CLKR+1) * EN(1, 2, 3, 4, 5, 6, 7, 8, 10, 12))/DREF - 1 */
+ int en_idx, save_en_idx, best_en_idx=0;
+ uint64_t clkf, clkr, max_clkf = 127;
+ uint64_t best_clkf=0, best_clkr=0;
+ uint64_t best_pll_MHz = 0;
+ uint64_t pll_MHz;
+ uint64_t min_pll_MHz = 800;
+ uint64_t max_pll_MHz = 5000;
+ uint64_t error;
+ uint64_t best_error;
+ uint64_t best_calculated_ddr_hertz = 0;
+ uint64_t calculated_ddr_hertz = 0;
+ uint64_t orig_ddr_hertz = ddr_hertz;
+ static const int _en[] = {1, 2, 3, 4, 5, 6, 7, 8, 10, 12};
+ int override_pll_settings;
+ int new_bwadj;
+
+ error = best_error = ddr_hertz; /* Init to max error */
+
+ ddr_print("DDR Reference Hertz = %d\n", ddr_ref_hertz);
+
+ while (best_error == ddr_hertz) {
+
+ for (clkr = 0; clkr < 4; ++clkr) {
+ for (en_idx=sizeof(_en)/sizeof(int)-1; en_idx>=0; --en_idx) {
+ save_en_idx = en_idx;
+ clkf = ((ddr_hertz) * (clkr+1) * (_en[save_en_idx]));
+ clkf = divide_nint(clkf, ddr_ref_hertz) - 1;
+ pll_MHz = ddr_ref_hertz * (clkf+1) / (clkr+1) / 1000000;
+ calculated_ddr_hertz = ddr_ref_hertz * (clkf + 1) / ((clkr + 1) * (_en[save_en_idx]));
+ error = ddr_hertz - calculated_ddr_hertz;
+
+ if ((pll_MHz < min_pll_MHz) || (pll_MHz > max_pll_MHz)) continue;
+ if (clkf > max_clkf) continue; /* PLL requires clkf to be limited */
+ if (_abs(error) > _abs(best_error)) continue;
+
+ VB_PRT(VBL_TME, "clkr: %2lu, en[%d]: %2d, clkf: %4lu, pll_MHz: %4lu, ddr_hertz: %8lu, error: %8ld\n",
+ clkr, save_en_idx, _en[save_en_idx], clkf, pll_MHz, calculated_ddr_hertz, error);
+
+ /* Favor the highest PLL frequency. */
+ if ((_abs(error) < _abs(best_error)) || (pll_MHz > best_pll_MHz)) {
+ best_pll_MHz = pll_MHz;
+ best_calculated_ddr_hertz = calculated_ddr_hertz;
+ best_error = error;
+ best_clkr = clkr;
+ best_clkf = clkf;
+ best_en_idx = save_en_idx;
+ }
+ }
+ }
+
+ override_pll_settings = 0;
+
+ if ((s = lookup_env_parameter("ddr_pll_clkr")) != NULL) {
+ best_clkr = strtoul(s, NULL, 0);
+ override_pll_settings = 1;
+ }
+ if ((s = lookup_env_parameter("ddr_pll_clkf")) != NULL) {
+ best_clkf = strtoul(s, NULL, 0);
+ override_pll_settings = 1;
+ }
+ if ((s = lookup_env_parameter("ddr_pll_en_idx")) != NULL) {
+ best_en_idx = strtoul(s, NULL, 0);
+ override_pll_settings = 1;
+ }
+
+ if (override_pll_settings) {
+ best_pll_MHz = ddr_ref_hertz * (best_clkf+1) / (best_clkr+1) / 1000000;
+ best_calculated_ddr_hertz = ddr_ref_hertz * (best_clkf + 1) / ((best_clkr + 1) * (_en[best_en_idx]));
+ best_error = ddr_hertz - best_calculated_ddr_hertz;
+ }
+
+ ddr_print("clkr: %2lu, en[%d]: %2d, clkf: %4lu, pll_MHz: %4lu, ddr_hertz: %8lu, error: %8ld <==\n",
+ best_clkr, best_en_idx, _en[best_en_idx], best_clkf, best_pll_MHz,
+ best_calculated_ddr_hertz, best_error);
+
+ /* Try lowering the frequency if we can't get a working configuration */
+ if (best_error == ddr_hertz) {
+ if (ddr_hertz < orig_ddr_hertz - 10000000)
+ break;
+ ddr_hertz -= 1000000;
+ best_error = ddr_hertz;
+ }
+
+ } /* while (best_error == ddr_hertz) */
+
+
+ if (best_error == ddr_hertz) {
+ error_print("ERROR: Can not compute a legal DDR clock speed configuration.\n");
+ return(-1);
+ }
+
+ new_bwadj = (best_clkf + 1) / 10;
+ VB_PRT(VBL_TME, "bwadj: %2d\n", new_bwadj);
+
+ if ((s = lookup_env_parameter("ddr_pll_bwadj")) != NULL) {
+ new_bwadj = strtoul(s, NULL, 0);
+ VB_PRT(VBL_TME, "bwadj: %2d\n", new_bwadj);
+ }
+
+ for (loop_interface_num = 0; loop_interface_num<2; ++loop_interface_num) {
+ if ((ddr_interface_mask & (1 << loop_interface_num)) == 0)
+ continue;
+
+ // make sure we preserve any settings already there
+ ddr_pll_ctl.u = BDK_CSR_READ(node, BDK_LMCX_DDR_PLL_CTL(loop_interface_num));
+ ddr_print("LMC%d: DDR_PLL_CTL : 0x%016lx\n",
+ loop_interface_num, ddr_pll_ctl.u);
+
+ ddr_pll_ctl.cn83xx.ddr_ps_en = best_en_idx;
+ ddr_pll_ctl.cn83xx.clkf = best_clkf;
+ ddr_pll_ctl.cn83xx.clkr = best_clkr;
+ ddr_pll_ctl.cn83xx.reset_n = 0;
+ ddr_pll_ctl.cn83xx.bwadj = new_bwadj;
+
+ DRAM_CSR_WRITE(node, BDK_LMCX_DDR_PLL_CTL(loop_interface_num), ddr_pll_ctl.u);
+ ddr_print("LMC%d: DDR_PLL_CTL : 0x%016lx\n",
+ loop_interface_num, ddr_pll_ctl.u);
+ }
+ }
+
+
+ for (loop_interface_num = 0; loop_interface_num<4; ++loop_interface_num) {
+ if ((ddr_interface_mask & (1 << loop_interface_num)) == 0)
+ continue;
+
+ /*
+ * 4. Read LMC0_DDR_PLL_CTL and wait for the result.
+ */
+
+ BDK_CSR_READ(node, BDK_LMCX_DDR_PLL_CTL(loop_interface_num));
+
+ /*
+ * 5. Wait a minimum of 3 us.
+ */
+
+ bdk_wait_usec(3); /* Wait 3 us */
+
+ /*
+ * 6. Write LMC0_DDR_PLL_CTL[RESET_N] = 1 without changing any other
+ * LMC0_DDR_PLL_CTL values.
+ */
+
+ DRAM_CSR_MODIFY(c, node, BDK_LMCX_DDR_PLL_CTL(loop_interface_num),
+ c.cn83xx.reset_n = 1);
+
+ /*
+ * 7. Read LMC0_DDR_PLL_CTL and wait for the result.
+ */
+
+ BDK_CSR_READ(node, BDK_LMCX_DDR_PLL_CTL(loop_interface_num));
+
+ /*
+ * 8. Wait a minimum of 25 us.
+ */
+
+ bdk_wait_usec(25); /* Wait 25 us */
+
+ } /* for (loop_interface_num = 0; loop_interface_num<4; ++loop_interface_num) */
+
+ for (loop_interface_num = 0; loop_interface_num<4; ++loop_interface_num) {
+ if ((ddr_interface_mask & (1 << loop_interface_num)) == 0)
+ continue;
+ /*
+ * 6.9.3 LMC CK Initialization
+ *
+ * DDR PLL initialization must be completed prior to starting LMC CK
+ * initialization.
+ *
+ * Perform the following substeps to initialize the LMC CK. Perform
+ * substeps 1..3 for both LMC0 and LMC1.
+ *
+ * 1. Without changing any other LMC(0..3)_DDR_PLL_CTL values, write
+ * LMC(0..3)_DDR_PLL_CTL[DDR_DIV_RESET] = 1 and
+ * LMC(0..3)_DDR_PLL_CTL[DDR_PS_EN] with the appropriate value to get the
+ * desired LMC CK speed. Section 5.14 discusses CLKF and DDR_PS_EN
+ * programmings. The LMC(0..3)_DDR_PLL_CTL[DDR_PS_EN] must not change
+ * after this point without restarting this LMC CK initialization
+ * sequence.
+ */
+
+ DRAM_CSR_MODIFY(c, node, BDK_LMCX_DDR_PLL_CTL(loop_interface_num),
+ c.cn83xx.ddr_div_reset = 1);
+
+ /*
+ * 2. Without changing any other fields in LMC(0..3)_DDR_PLL_CTL, write
+ * LMC(0..3)_DDR_PLL_CTL[DDR4_MODE] = 0.
+ */
+
+ DRAM_CSR_MODIFY(c, node, BDK_LMCX_DDR_PLL_CTL(loop_interface_num),
+ c.cn83xx.ddr4_mode = (ddr_type == DDR4_DRAM) ? 1 : 0);
+
+ /*
+ * 3. Read LMC(0..3)_DDR_PLL_CTL and wait for the result.
+ */
+
+ BDK_CSR_READ(node, BDK_LMCX_DDR_PLL_CTL(loop_interface_num));
+
+ /*
+ * 4. Wait a minimum of 1 us.
+ */
+
+ bdk_wait_usec(1); /* Wait 1 us */
+
+ /*
+ * 5. Without changing any other fields in LMC(0..3)_DDR_PLL_CTL, write
+ * LMC(0..3)_DDR_PLL_CTL[PHY_DCOK] = 1.
+ */
+
+ DRAM_CSR_MODIFY(c, node, BDK_LMCX_DDR_PLL_CTL(loop_interface_num),
+ c.cn83xx.phy_dcok = 1);
+
+ /*
+ * 6. Read LMC(0..3)_DDR_PLL_CTL and wait for the result.
+ */
+
+ BDK_CSR_READ(node, BDK_LMCX_DDR_PLL_CTL(loop_interface_num));
+
+ /*
+ * 7. Wait a minimum of 20 us.
+ */
+
+ bdk_wait_usec(20); /* Wait 20 us */
+
+ /*
+ * 8. Without changing any other LMC(0..3)_COMP_CTL2 values, write
+ * LMC(0..3)_COMP_CTL2[CK_CTL,CONTROL_CTL,CMD_CTL] to the desired
+ * DDR*_CK_*_P control and command signals drive strength.
+ */
+
+ {
+ bdk_lmcx_comp_ctl2_t comp_ctl2;
+ const ddr3_custom_config_t *custom_lmc_config = &ddr_configuration->custom_lmc_config;
+
+ comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(loop_interface_num));
+
+ comp_ctl2.s.dqx_ctl = 4; /* Default 4=34.3 ohm */
+ comp_ctl2.s.ck_ctl =
+ (custom_lmc_config->ck_ctl == 0) ? 4 : custom_lmc_config->ck_ctl; /* Default 4=34.3 ohm */
+ comp_ctl2.s.cmd_ctl =
+ (custom_lmc_config->cmd_ctl == 0) ? 4 : custom_lmc_config->cmd_ctl; /* Default 4=34.3 ohm */
+
+ comp_ctl2.s.rodt_ctl = 0x4; /* 60 ohm */
+
+ // These need to be done here, not later in Step 6.9.7.
+ // NOTE: these are/will be specific to a chip; for now, set to 0
+ // should we provide overrides for these?
+ comp_ctl2.s.ntune_offset = 0;
+ comp_ctl2.s.ptune_offset = 0;
+
+ // now do any overrides...
+ if ((s = lookup_env_parameter("ddr_ck_ctl")) != NULL) {
+ comp_ctl2.s.ck_ctl = strtoul(s, NULL, 0);
+ }
+
+ if ((s = lookup_env_parameter("ddr_cmd_ctl")) != NULL) {
+ comp_ctl2.s.cmd_ctl = strtoul(s, NULL, 0);
+ }
+
+ if ((s = lookup_env_parameter("ddr_dqx_ctl")) != NULL) {
+ comp_ctl2.s.dqx_ctl = strtoul(s, NULL, 0);
+ }
+
+ DRAM_CSR_WRITE(node, BDK_LMCX_COMP_CTL2(loop_interface_num), comp_ctl2.u);
+ }
+
+ /*
+ * 9. Read LMC(0..3)_DDR_PLL_CTL and wait for the result.
+ */
+
+ BDK_CSR_READ(node, BDK_LMCX_DDR_PLL_CTL(loop_interface_num));
+
+ /*
+ * 10. Wait a minimum of 200 ns.
+ */
+
+ bdk_wait_usec(1); /* Wait 1 us */
+
+ /*
+ * 11. Without changing any other LMC(0..3)_DDR_PLL_CTL values, write
+ * LMC(0..3)_DDR_PLL_CTL[DDR_DIV_RESET] = 0.
+ */
+
+ DRAM_CSR_MODIFY(c, node, BDK_LMCX_DDR_PLL_CTL(loop_interface_num),
+ c.cn83xx.ddr_div_reset = 0);
+
+ /*
+ * 12. Read LMC(0..3)_DDR_PLL_CTL and wait for the result.
+ */
+
+ BDK_CSR_READ(node, BDK_LMCX_DDR_PLL_CTL(loop_interface_num));
+
+ /*
+ * 13. Wait a minimum of 200 ns.
+ */
+ bdk_wait_usec(1); /* Wait 1 us */
+
+ } /* for (loop_interface_num = 0; loop_interface_num<4; ++loop_interface_num) */
+
+ } /* if (ddr_interface_num == 0) */ /* Do this once */
+
+ if (ddr_interface_num == 0) { /* Do this once */
+ bdk_lmcx_dll_ctl3_t ddr_dll_ctl3;
+
+ /*
+ * 6.9.4 LMC DRESET Initialization
+ *
+ * All of the DDR PLL, LMC global CK, and LMC interface enable
+ * initializations must be completed prior to starting this LMC DRESET
+ * initialization (Step 4).
+ *
+ * This LMC DRESET step is done for all enabled LMCs.
+ *
+ * There are special constraints on the ordering of DRESET initialization
+ * (Steps 4) and CK local initialization (Step 5) whenever CK local
+ * initialization must be executed. CK local initialization must be
+ * executed whenever the DDR PLL is being brought up (for each chip reset
+ * and whenever the DDR clock speed changes).
+ *
+ * When Step 5 must be executed in the two-LMC mode case:
+ * ­ LMC0 DRESET initialization must occur before Step 5.
+ * ­ LMC1 DRESET initialization must occur after Step 5.
+ *
+ * When Step 5 must be executed in the four-LMC mode case:
+ * ­ LMC2 and LMC3 DRESET initialization must occur before Step 5.
+ * ­ LMC0 and LMC1 DRESET initialization must occur after Step 5.
+ */
+
+ if ((ddr_interface_mask == 0x1) || (ddr_interface_mask == 0x3)) {
+ /* ONE-LMC MODE FOR 81XX AND 83XX BEFORE STEP 5 */
+ /* TWO-LMC MODE BEFORE STEP 5 */
+ cn78xx_lmc_dreset_init(node, 0);
+
+ } else if (ddr_interface_mask == 0xf) {
+ /* FOUR-LMC MODE BEFORE STEP 5 */
+ cn78xx_lmc_dreset_init(node, 2);
+ cn78xx_lmc_dreset_init(node, 3);
+ }
+
+ /*
+ * 6.9.5 LMC CK Local Initialization
+ *
+ * All of DDR PLL, LMC global CK, and LMC interface-enable
+ * initializations must be completed prior to starting this LMC CK local
+ * initialization (Step 5).
+ *
+ * LMC CK Local initialization must be performed for each chip reset and
+ * whenever the DDR clock speed changes. This step needs to be performed
+ * only once, not once per LMC.
+ *
+ * There are special constraints on the ordering of DRESET initialization
+ * (Steps 4) and CK local initialization (Step 5) whenever CK local
+ * initialization must be executed. CK local initialization must be
+ * executed whenever the DDR PLL is being brought up (for each chip reset
+ * and whenever the DDR clock speed changes).
+ *
+ * When Step 5 must be executed in the two-LMC mode case:
+ * ­ LMC0 DRESET initialization must occur before Step 5.
+ * ­ LMC1 DRESET initialization must occur after Step 5.
+ *
+ * When Step 5 must be executed in the four-LMC mode case:
+ * ­ LMC2 and LMC3 DRESET initialization must occur before Step 5.
+ * ­ LMC0 and LMC1 DRESET initialization must occur after Step 5.
+ *
+ * LMC CK local initialization is different depending on whether two-LMC
+ * or four-LMC modes are desired.
+ */
+
+ if (ddr_interface_mask == 0x3) {
+ /*
+ * 6.9.5.1 LMC CK Local Initialization for Two-LMC Mode
+ *
+ * 1. Write LMC0_DLL_CTL3 to its reset value. (Note that
+ * LMC0_DLL_CTL3[DLL_90_BYTE_SEL] = 0x2 .. 0x8 should also work.)
+ */
+
+ ddr_dll_ctl3.u = 0;
+ ddr_dll_ctl3.s.dclk90_recal_dis = 1;
+ ddr_dll_ctl3.s.dll90_byte_sel = 1;
+ DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(0), ddr_dll_ctl3.u);
+
+ /*
+ * 2. Read LMC0_DLL_CTL3 and wait for the result.
+ */
+
+ BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(0));
+
+ /*
+ * 3. Without changing any other fields in LMC0_DLL_CTL3, write
+ * LMC0_DLL_CTL3[DCLK90_FWD] = 1. Writing LMC0_DLL_CTL3[DCLK90_FWD] = 1
+ * causes clock-delay information to be forwarded from LMC0 to LMC1.
+ */
+
+ ddr_dll_ctl3.s.dclk90_fwd = 1;
+ DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(0), ddr_dll_ctl3.u);
+
+ /*
+ * 4. Read LMC0_DLL_CTL3 and wait for the result.
+ */
+
+ BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(0));
+ } /* if (ddr_interface_mask == 0x3) */
+
+ if (ddr_interface_mask == 0xf) {
+ /*
+ * 6.9.5.2 LMC CK Local Initialization for Four-LMC Mode
+ *
+ * 1. Write LMC2_DLL_CTL3 to its reset value except
+ * LMC2_DLL_CTL3[DLL90_BYTE_SEL] = 0x7.
+ */
+
+ ddr_dll_ctl3.u = 0;
+ ddr_dll_ctl3.s.dclk90_recal_dis = 1;
+ ddr_dll_ctl3.s.dll90_byte_sel = 7;
+ DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(2), ddr_dll_ctl3.u);
+
+ /*
+ * 2. Write LMC3_DLL_CTL3 to its reset value except
+ * LMC3_DLL_CTL3[DLL90_BYTE_SEL] = 0x0.
+ */
+
+ ddr_dll_ctl3.u = 0;
+ ddr_dll_ctl3.s.dclk90_recal_dis = 1;
+ ddr_dll_ctl3.s.dll90_byte_sel = 0; /* HRM wants 0, not 2 */
+ DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(3), ddr_dll_ctl3.u); /* HRM wants LMC3 */
+
+ /*
+ * 3. Read LMC3_DLL_CTL3 and wait for the result.
+ */
+
+ BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(3));
+
+ /*
+ * 4. Without changing any other fields in LMC2_DLL_CTL3, write
+ * LMC2_DLL_CTL3[DCLK90_FWD] = 1 and LMC2_DLL_CTL3[DCLK90_RECAL_DIS] = 1.
+ * Writing LMC2_DLL_CTL3[DCLK90_FWD] = 1 causes LMC 2 to forward
+ * clock-delay information to LMC0. Setting
+ * LMC2_DLL_CTL3[DCLK90_RECAL_DIS] to 1 prevents LMC2 from periodically
+ * recalibrating this delay information.
+ */
+
+ DRAM_CSR_MODIFY(c, node, BDK_LMCX_DLL_CTL3(2),
+ c.s.dclk90_fwd = 1;
+ c.s.dclk90_recal_dis = 1);
+
+ /*
+ * 5. Without changing any other fields in LMC3_DLL_CTL3, write
+ * LMC3_DLL_CTL3[DCLK90_FWD] = 1 and LMC3_DLL_CTL3[DCLK90_RECAL_DIS] = 1.
+ * Writing LMC3_DLL_CTL3[DCLK90_FWD] = 1 causes LMC3 to forward
+ * clock-delay information to LMC1. Setting
+ * LMC3_DLL_CTL3[DCLK90_RECAL_DIS] to 1 prevents LMC3 from periodically
+ * recalibrating this delay information.
+ */
+
+ DRAM_CSR_MODIFY(c, node, BDK_LMCX_DLL_CTL3(3),
+ c.s.dclk90_fwd = 1;
+ c.s.dclk90_recal_dis = 1);
+
+ /*
+ * 6. Read LMC3_DLL_CTL3 and wait for the result.
+ */
+
+ BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(3));
+ } /* if (ddr_interface_mask == 0xf) */
+
+
+ /* ONE-LMC MODE AFTER STEP 5 - NOTHING */
+
+ /* TWO-LMC MODE AFTER STEP 5 */
+ if (ddr_interface_mask == 0x3) {
+ cn78xx_lmc_dreset_init(node, 1);
+ }
+
+ /* FOUR-LMC MODE AFTER STEP 5 */
+ if (ddr_interface_mask == 0xf) {
+ cn78xx_lmc_dreset_init(node, 0);
+ cn78xx_lmc_dreset_init(node, 1);
+
+ /* Enable periodic recalibration of DDR90 delay line in. */
+ DRAM_CSR_MODIFY(c, node, BDK_LMCX_DLL_CTL3(0),
+ c.s.dclk90_recal_dis = 0);
+ DRAM_CSR_MODIFY(c, node, BDK_LMCX_DLL_CTL3(1),
+ c.s.dclk90_recal_dis = 0);
+ }
+
+
+ /* Enable fine tune mode for all LMCs */
+ for (int lmc = 0; lmc<4; ++lmc) {
+ if ((ddr_interface_mask & (1 << lmc)) == 0)
+ continue;
+ DRAM_CSR_MODIFY(c, node, BDK_LMCX_DLL_CTL3(lmc),
+ c.s.fine_tune_mode = 1);
+ }
+
+ /* Enable the trim circuit on the appropriate channels to
+ adjust the DDR clock duty cycle for chips that support
+ it. */
+ if (! CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X)) { // added 81xx and 83xx
+ bdk_lmcx_phy_ctl_t lmc_phy_ctl;
+ int loop_interface_num;
+
+ for (loop_interface_num = 0; loop_interface_num<4; ++loop_interface_num) {
+ if ((ddr_interface_mask & (1 << loop_interface_num)) == 0)
+ continue;
+
+ lmc_phy_ctl.u = BDK_CSR_READ(node, BDK_LMCX_PHY_CTL(loop_interface_num));
+ lmc_phy_ctl.cn83xx.lv_mode = (~loop_interface_num) & 1; /* Odd LMCs = 0, Even LMCs = 1 */
+
+ ddr_print("LMC%d: PHY_CTL : 0x%016lx\n",
+ loop_interface_num, lmc_phy_ctl.u);
+ DRAM_CSR_WRITE(node, BDK_LMCX_PHY_CTL(loop_interface_num), lmc_phy_ctl.u);
+ }
+ }
+
+ } /* Do this once */
+
+ } /* if (CAVIUM_IS_MODEL(CAVIUM_CN8XXX)) */
+
+ set_ddr_clock_initialized(node, ddr_interface_num, 1);
+ return(0);
+}
+void
+perform_lmc_reset(bdk_node_t node, int ddr_interface_num)
+{
+ /*
+ * 6.9.6 LMC RESET Initialization
+ *
+ * The purpose of this step is to assert/deassert the RESET# pin at the
+ * DDR3/DDR4 parts.
+ *
+ * This LMC RESET step is done for all enabled LMCs.
+ *
+ * It may be appropriate to skip this step if the DDR3/DDR4 DRAM parts
+ * are in self refresh and are currently preserving their
+ * contents. (Software can determine this via
+ * LMC(0..3)_RESET_CTL[DDR3PSV] in some circumstances.) The remainder of
+ * this section assumes that the DRAM contents need not be preserved.
+ *
+ * The remainder of this section assumes that the CN78XX DDRn_RESET_L pin
+ * is attached to the RESET# pin of the attached DDR3/DDR4 parts, as will
+ * be appropriate in many systems.
+ *
+ * (In other systems, such as ones that can preserve DDR3/DDR4 part
+ * contents while CN78XX is powered down, it will not be appropriate to
+ * directly attach the CN78XX DDRn_RESET_L pin to DRESET# of the
+ * DDR3/DDR4 parts, and this section may not apply.)
+ *
+ * The remainder of this section describes the sequence for LMCn.
+ *
+ * Perform the following six substeps for LMC reset initialization:
+ *
+ * 1. If not done already, assert DDRn_RESET_L pin by writing
+ * LMC(0..3)_RESET_ CTL[DDR3RST] = 0 without modifying any other
+ * LMC(0..3)_RESET_CTL fields.
+ */
+
+ if ( !ddr_memory_preserved(node)) {
+ /*
+ * 2. Read LMC(0..3)_RESET_CTL and wait for the result.
+ */
+
+ BDK_CSR_READ(node, BDK_LMCX_RESET_CTL(ddr_interface_num));
+
+ /*
+ * 3. Wait until RESET# assertion-time requirement from JEDEC DDR3/DDR4
+ * specification is satisfied (200 us during a power-on ramp, 100ns when
+ * power is already stable).
+ */
+
+ bdk_wait_usec(200);
+
+ /*
+ * 4. Deassert DDRn_RESET_L pin by writing LMC(0..3)_RESET_CTL[DDR3RST] = 1
+ * without modifying any other LMC(0..3)_RESET_CTL fields.
+ * 5. Read LMC(0..3)_RESET_CTL and wait for the result.
+ * 6. Wait a minimum of 500us. This guarantees the necessary T = 500us
+ * delay between DDRn_RESET_L deassertion and DDRn_DIMM*_CKE* assertion.
+ */
+ cn88xx_lmc_ddr3_reset(node, ddr_interface_num, LMC_DDR3_RESET_DEASSERT);
+
+ /* Toggle Reset Again */
+ /* That is, assert, then de-assert, one more time */
+ cn88xx_lmc_ddr3_reset(node, ddr_interface_num, LMC_DDR3_RESET_ASSERT);
+ cn88xx_lmc_ddr3_reset(node, ddr_interface_num, LMC_DDR3_RESET_DEASSERT);
+
+ } /* if ( !ddr_memory_preserved(node)) */
+}
+
+///////////////////////////////////////////////////////////
+// start of DBI switchover
+
+/* first pattern example:
+ GENERAL_PURPOSE0.DATA == 64'h00ff00ff00ff00ff;
+ GENERAL_PURPOSE1.DATA == 64'h00ff00ff00ff00ff;
+ GENERAL_PURPOSE0.DATA == 16'h0000;
+*/
+const uint64_t dbi_pattern[3] = { 0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL, 0x0000ULL };
+
+// Perform switchover to DBI
+static void dbi_switchover_interface(int node, int lmc)
+{
+ bdk_lmcx_modereg_params0_t modereg_params0;
+ bdk_lmcx_modereg_params3_t modereg_params3;
+ bdk_lmcx_phy_ctl_t phy_ctl;
+ bdk_lmcx_config_t lmcx_config;
+ bdk_lmcx_ddr_pll_ctl_t ddr_pll_ctl;
+ int rank_mask, rankx, active_ranks;
+ uint64_t phys_addr, rank_offset;
+ int num_lmcs, errors;
+ int dbi_settings[9], byte, unlocked, retries;
+ int ecc_ena;
+ int rank_max = 1; // FIXME: make this 4 to try all the ranks
+
+ ddr_pll_ctl.u = BDK_CSR_READ(node, BDK_LMCX_DDR_PLL_CTL(0));
+
+ lmcx_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(lmc));
+ rank_mask = lmcx_config.s.init_status;
+ ecc_ena = lmcx_config.s.ecc_ena;
+
+ // FIXME: must filter out any non-supported configs
+ // ie, no DDR3, no x4 devices, no 81XX
+ if ((ddr_pll_ctl.cn83xx.ddr4_mode == 0) ||
+ (lmcx_config.s.mode_x4dev == 1) ||
+ CAVIUM_IS_MODEL(CAVIUM_CN81XX) )
+ {
+ ddr_print("N%d.LMC%d: DBI switchover: inappropriate device; EXITING...\n",
+ node, lmc);
+ return;
+ }
+
+ // this should be correct for 1 or 2 ranks, 1 or 2 DIMMs
+ num_lmcs = __bdk_dram_get_num_lmc(node);
+ rank_offset = 1ull << (28 + lmcx_config.s.pbank_lsb - lmcx_config.s.rank_ena + (num_lmcs/2));
+
+ ddr_print("N%d.LMC%d: DBI switchover: rank mask 0x%x, rank size 0x%016llx.\n",
+ node, lmc, rank_mask, (unsigned long long)rank_offset);
+
+ /* 1. conduct the current init sequence as usual all the way
+ after software write leveling.
+ */
+
+ read_DAC_DBI_settings(node, lmc, /*DBI*/0, dbi_settings);
+
+ display_DAC_DBI_settings(node, lmc, /* DBI */0, ecc_ena, dbi_settings, " INIT");
+
+ /* 2. set DBI related CSRs as below and issue MR write.
+ MODEREG_PARAMS3.WR_DBI=1
+ MODEREG_PARAMS3.RD_DBI=1
+ PHY_CTL.DBI_MODE_ENA=1
+ */
+ modereg_params0.u = BDK_CSR_READ(node, BDK_LMCX_MODEREG_PARAMS0(lmc));
+
+ modereg_params3.u = BDK_CSR_READ(node, BDK_LMCX_MODEREG_PARAMS3(lmc));
+ modereg_params3.s.wr_dbi = 1;
+ modereg_params3.s.rd_dbi = 1;
+ DRAM_CSR_WRITE(node, BDK_LMCX_MODEREG_PARAMS3(lmc), modereg_params3.u);
+
+ phy_ctl.u = BDK_CSR_READ(node, BDK_LMCX_PHY_CTL(lmc));
+ phy_ctl.s.dbi_mode_ena = 1;
+ DRAM_CSR_WRITE(node, BDK_LMCX_PHY_CTL(lmc), phy_ctl.u);
+
+ /*
+ there are two options for data to send. Lets start with (1) and could move to (2) in the future:
+
+ 1) DBTRAIN_CTL[LFSR_PATTERN_SEL] = 0 (or for older chips where this does not exist)
+ set data directly in these reigsters. this will yield a clk/2 pattern:
+ GENERAL_PURPOSE0.DATA == 64'h00ff00ff00ff00ff;
+ GENERAL_PURPOSE1.DATA == 64'h00ff00ff00ff00ff;
+ GENERAL_PURPOSE0.DATA == 16'h0000;
+ 2) DBTRAIN_CTL[LFSR_PATTERN_SEL] = 1
+ here data comes from the LFSR generating a PRBS pattern
+ CHAR_CTL.EN = 0
+ CHAR_CTL.SEL = 0; // for PRBS
+ CHAR_CTL.DR = 1;
+ CHAR_CTL.PRBS = setup for whatever type of PRBS to send
+ CHAR_CTL.SKEW_ON = 1;
+ */
+ DRAM_CSR_WRITE(node, BDK_LMCX_GENERAL_PURPOSE0(lmc), dbi_pattern[0]);
+ DRAM_CSR_WRITE(node, BDK_LMCX_GENERAL_PURPOSE1(lmc), dbi_pattern[1]);
+ DRAM_CSR_WRITE(node, BDK_LMCX_GENERAL_PURPOSE2(lmc), dbi_pattern[2]);
+
+ /*
+ 3. adjust cas_latency (only necessary if RD_DBI is set).
+ here is my code for doing this:
+
+ if (csr_model.MODEREG_PARAMS3.RD_DBI.value == 1) begin
+ case (csr_model.MODEREG_PARAMS0.CL.value)
+ 0,1,2,3,4: csr_model.MODEREG_PARAMS0.CL.value += 2; // CL 9-13 -> 11-15
+ 5: begin
+ // CL=14, CWL=10,12 gets +2, CLW=11,14 gets +3
+ if((csr_model.MODEREG_PARAMS0.CWL.value==1 || csr_model.MODEREG_PARAMS0.CWL.value==3))
+ csr_model.MODEREG_PARAMS0.CL.value = 7; // 14->16
+ else
+ csr_model.MODEREG_PARAMS0.CL.value = 13; // 14->17
+ end
+ 6: csr_model.MODEREG_PARAMS0.CL.value = 8; // 15->18
+ 7: csr_model.MODEREG_PARAMS0.CL.value = 14; // 16->19
+ 8: csr_model.MODEREG_PARAMS0.CL.value = 15; // 18->21
+ default:
+ `cn_fatal(("Error mem_cfg (%s) CL (%d) with RD_DBI=1, I am not sure what to do.",
+ mem_cfg, csr_model.MODEREG_PARAMS3.RD_DBI.value))
+ endcase
+ end
+ */
+ if (modereg_params3.s.rd_dbi == 1) {
+ int old_cl, new_cl, old_cwl;
+
+ old_cl = modereg_params0.s.cl;
+ old_cwl = modereg_params0.s.cwl;
+
+ switch (old_cl) {
+ case 0: case 1: case 2: case 3: case 4: new_cl = old_cl + 2; break; // 9-13->11-15
+ // CL=14, CWL=10,12 gets +2, CLW=11,14 gets +3
+ case 5: new_cl = ((old_cwl == 1) || (old_cwl == 3)) ? 7 : 13; break;
+ case 6: new_cl = 8; break; // 15->18
+ case 7: new_cl = 14; break; // 16->19
+ case 8: new_cl = 15; break; // 18->21
+ default:
+ error_print("ERROR: Bad CL value (%d) for DBI switchover.\n", old_cl);
+ // FIXME: need to error exit here...
+ old_cl = -1;
+ new_cl = -1;
+ break;
+ }
+ ddr_print("N%d.LMC%d: DBI switchover: CL ADJ: old_cl 0x%x, old_cwl 0x%x, new_cl 0x%x.\n",
+ node, lmc, old_cl, old_cwl, new_cl);
+ modereg_params0.s.cl = new_cl;
+ DRAM_CSR_WRITE(node, BDK_LMCX_MODEREG_PARAMS0(lmc), modereg_params0.u);
+ }
+
+ /*
+ 4. issue MRW to MR0 (CL) and MR5 (DBI), using LMC sequence SEQ_CTL[SEQ_SEL] = MRW.
+ */
+ // Use the default values, from the CSRs fields
+ // also, do B-sides for RDIMMs...
+
+ for (rankx = 0; rankx < 4; rankx++) {
+ if (!(rank_mask & (1 << rankx)))
+ continue;
+
+ // for RDIMMs, B-side writes should get done automatically when the A-side is written
+ ddr4_mrw(node, lmc, rankx, -1/* use_default*/, 0/*MRreg*/, 0 /*A-side*/); /* MR0 */
+ ddr4_mrw(node, lmc, rankx, -1/* use_default*/, 5/*MRreg*/, 0 /*A-side*/); /* MR5 */
+
+ } /* for (rankx = 0; rankx < 4; rankx++) */
+
+ /*
+ 5. conduct DBI bit deskew training via the General Purpose R/W sequence (dbtrain).
+ may need to run this over and over to get a lock (I need up to 5 in simulation):
+ SEQ_CTL[SEQ_SEL] = RW_TRAINING (15)
+ DBTRAIN_CTL.CMD_COUNT_EXT = all 1's
+ DBTRAIN_CTL.READ_CMD_COUNT = all 1's
+ DBTRAIN_CTL.TCCD_SEL = set according to MODEREG_PARAMS3[TCCD_L]
+ DBTRAIN_CTL.RW_TRAIN = 1
+ DBTRAIN_CTL.READ_DQ_COUNT = dont care
+ DBTRAIN_CTL.WRITE_ENA = 1;
+ DBTRAIN_CTL.ACTIVATE = 1;
+ DBTRAIN_CTL LRANK, PRANK, ROW_A, BG, BA, COLUMN_A = set to a valid address
+ */
+
+ // NOW - do the training
+ ddr_print("N%d.LMC%d: DBI switchover: TRAINING begins...\n",
+ node, lmc);
+
+ active_ranks = 0;
+ for (rankx = 0; rankx < rank_max; rankx++) {
+ if (!(rank_mask & (1 << rankx)))
+ continue;
+
+ phys_addr = rank_offset * active_ranks;
+ // FIXME: now done by test_dram_byte_hw()
+ //phys_addr |= (lmc << 7);
+ //phys_addr = bdk_numa_get_address(node, phys_addr); // map to node
+
+ active_ranks++;
+
+ retries = 0;
+
+#if 0
+ phy_ctl.u = BDK_CSR_READ(node, BDK_LMCX_PHY_CTL(lmc));
+ phy_ctl.s.phy_reset = 1; // FIXME: this may reset too much?
+ DRAM_CSR_WRITE(node, BDK_LMCX_PHY_CTL(lmc), phy_ctl.u);
+#endif
+
+restart_training:
+
+ // NOTE: return is a bitmask of the erroring bytelanes - we only print it
+ errors = test_dram_byte_hw(node, lmc, phys_addr, DBTRAIN_DBI, NULL);
+
+ ddr_print("N%d.LMC%d: DBI switchover: TEST: rank %d, phys_addr 0x%lx, errors 0x%x.\n",
+ node, lmc, rankx, phys_addr, errors);
+
+ // NEXT - check for locking
+ unlocked = 0;
+ read_DAC_DBI_settings(node, lmc, /*DBI*/0, dbi_settings);
+
+ for (byte = 0; byte < (8+ecc_ena); byte++) {
+ unlocked += (dbi_settings[byte] & 1) ^ 1;
+ }
+
+ // FIXME: print out the DBI settings array after each rank?
+ if (rank_max > 1) // only when doing more than 1 rank
+ display_DAC_DBI_settings(node, lmc, /* DBI */0, ecc_ena, dbi_settings, " RANK");
+
+ if (unlocked > 0) {
+ ddr_print("N%d.LMC%d: DBI switchover: LOCK: %d still unlocked.\n",
+ node, lmc, unlocked);
+
+ retries++;
+ if (retries < 10) {
+ goto restart_training;
+ } else {
+ ddr_print("N%d.LMC%d: DBI switchover: LOCK: %d retries exhausted.\n",
+ node, lmc, retries);
+ }
+ }
+ } /* for (rankx = 0; rankx < rank_max; rankx++) */
+
+ // print out the final DBI settings array
+ display_DAC_DBI_settings(node, lmc, /* DBI */0, ecc_ena, dbi_settings, "FINAL");
+}
+// end of DBI switchover
+///////////////////////////////////////////////////////////
+
+uint32_t measure_octeon_ddr_clock(bdk_node_t node,
+ const ddr_configuration_t *ddr_configuration,
+ uint32_t cpu_hertz,
+ uint32_t ddr_hertz,
+ uint32_t ddr_ref_hertz,
+ int ddr_interface_num,
+ uint32_t ddr_interface_mask)
+{
+ uint64_t core_clocks;
+ uint64_t ddr_clocks;
+ uint64_t calc_ddr_hertz;
+
+ if (ddr_configuration) {
+ if (initialize_ddr_clock(node,
+ ddr_configuration,
+ cpu_hertz,
+ ddr_hertz,
+ ddr_ref_hertz,
+ ddr_interface_num,
+ ddr_interface_mask) != 0)
+ return 0;
+ }
+
+ /* Dynamically determine the DDR clock speed */
+ core_clocks = bdk_clock_get_count(BDK_CLOCK_TIME);
+ ddr_clocks = BDK_CSR_READ(node, BDK_LMCX_DCLK_CNT(ddr_interface_num));
+ bdk_wait_usec(100000); /* 100ms */
+ ddr_clocks = BDK_CSR_READ(node, BDK_LMCX_DCLK_CNT(ddr_interface_num)) - ddr_clocks;
+ core_clocks = bdk_clock_get_count(BDK_CLOCK_TIME) - core_clocks;
+ calc_ddr_hertz = ddr_clocks * bdk_clock_get_rate(bdk_numa_local(), BDK_CLOCK_TIME) / core_clocks;
+
+ /* Asim doesn't have a DDR clock, force the measurement to be correct */
+ if (bdk_is_platform(BDK_PLATFORM_ASIM))
+ calc_ddr_hertz = ddr_hertz;
+
+ ddr_print("LMC%d: Measured DDR clock: %lu, cpu clock: %u, ddr clocks: %lu\n",
+ ddr_interface_num, calc_ddr_hertz, cpu_hertz, ddr_clocks);
+
+ /* Check for unreasonable settings. */
+ if (calc_ddr_hertz == 0) {
+ error_print("DDR clock misconfigured. Exiting.\n");
+ exit(1);
+ }
+ return calc_ddr_hertz;
+}
+
+int octeon_ddr_initialize(bdk_node_t node,
+ uint32_t cpu_hertz,
+ uint32_t ddr_hertz,
+ uint32_t ddr_ref_hertz,
+ uint32_t ddr_interface_mask,
+ const ddr_configuration_t *ddr_configuration,
+ uint32_t *measured_ddr_hertz,
+ int board_type,
+ int board_rev_maj,
+ int board_rev_min)
+{
+ uint32_t ddr_config_valid_mask = 0;
+ int memsize_mbytes = 0;
+ const char *s;
+ int retval;
+ int interface_index;
+ uint32_t ddr_max_speed = 1210000000; /* needs to be this high for DDR4 */
+ uint32_t calc_ddr_hertz = -1;
+
+#ifndef OCTEON_SDK_VERSION_STRING
+# define OCTEON_SDK_VERSION_STRING "Development Build"
+#endif
+
+ ddr_print(OCTEON_SDK_VERSION_STRING": $Revision: 102369 $\n");
+
+#ifdef CAVIUM_ONLY
+ /* Override speed restrictions to support internal testing. */
+ ddr_max_speed = 1210000000;
+#endif /* CAVIUM_ONLY */
+
+ if (ddr_hertz > ddr_max_speed) {
+ error_print("DDR clock speed %u exceeds maximum speed supported by "
+ "processor, reducing to %uHz\n",
+ ddr_hertz, ddr_max_speed);
+ ddr_hertz = ddr_max_speed;
+ }
+
+ // Do this earlier so we can return without doing unnecessary things...
+ /* Check for DIMM 0 socket populated for each LMC present */
+ for (interface_index = 0; interface_index < 4; ++interface_index) {
+ if ((ddr_interface_mask & (1 << interface_index)) &&
+ (validate_dimm(node, &ddr_configuration[interface_index].dimm_config_table[0])) == 1)
+ {
+ ddr_config_valid_mask |= (1 << interface_index);
+ }
+ }
+
+ if (CAVIUM_IS_MODEL(CAVIUM_CN88XX)) {
+ int four_lmc_mode = 1;
+
+ // Validate that it can only be 2-LMC mode or 4-LMC mode
+ if ((ddr_config_valid_mask != 0x03) && (ddr_config_valid_mask != 0x0f)) {
+ puts("ERROR: Invalid LMC configuration detected.\n");
+ return -1;
+ }
+
+ if ((s = lookup_env_parameter("ddr_four_lmc")) != NULL)
+ four_lmc_mode = !!strtoul(s, NULL, 0);
+
+ if (!four_lmc_mode) {
+ puts("Forcing two-LMC Mode.\n");
+ ddr_config_valid_mask &= ~(3<<2); /* Invalidate LMC[2:3] */
+ }
+ }
+
+ if (!ddr_config_valid_mask) {
+ puts("ERROR: No valid DIMMs detected on any DDR interface.\n");
+ return -1;
+ }
+
+ {
+ /*
+
+ rdf_cnt: Defines the sample point of the LMC response data in
+ the DDR-clock/core-clock crossing. For optimal
+ performance set to 10 * (DDR-clock period/core-clock
+ period) - 1. To disable set to 0. All other values
+ are reserved.
+ */
+
+ uint64_t rdf_cnt;
+ BDK_CSR_INIT(l2c_ctl, node, BDK_L2C_CTL);
+ /* It is more convenient to compute the ratio using clock
+ frequencies rather than clock periods. */
+ rdf_cnt = (((uint64_t) 10 * cpu_hertz) / ddr_hertz) - 1;
+ rdf_cnt = rdf_cnt<256 ? rdf_cnt : 255;
+ l2c_ctl.s.rdf_cnt = rdf_cnt;
+
+ if ((s = lookup_env_parameter("early_fill_count")) != NULL)
+ l2c_ctl.s.rdf_cnt = strtoul(s, NULL, 0);
+
+ ddr_print("%-45s : %d, cpu_hertz:%u, ddr_hertz:%u\n", "EARLY FILL COUNT ",
+ l2c_ctl.s.rdf_cnt, cpu_hertz, ddr_hertz);
+ DRAM_CSR_WRITE(node, BDK_L2C_CTL, l2c_ctl.u);
+ }
+
+ /* Check to see if we should limit the number of L2 ways. */
+ if ((s = lookup_env_parameter("limit_l2_ways")) != NULL) {
+ int ways = strtoul(s, NULL, 10);
+ limit_l2_ways(node, ways, 1);
+ }
+
+ /* We measure the DDR frequency by counting DDR clocks. We can
+ * confirm or adjust the expected frequency as necessary. We use
+ * the measured frequency to make accurate timing calculations
+ * used to configure the controller.
+ */
+ for (interface_index = 0; interface_index < 4; ++interface_index) {
+ uint32_t tmp_hertz;
+
+ if (! (ddr_config_valid_mask & (1 << interface_index)))
+ continue;
+
+ try_again:
+ // if we are LMC0
+ if (interface_index == 0) {
+ // if we are asking for 100 MHz refclk, we can only get it via alternate, so switch to it
+ if (ddr_ref_hertz == 100000000) {
+ DRAM_CSR_MODIFY(c, node, BDK_LMCX_DDR_PLL_CTL(0), c.s.dclk_alt_refclk_sel = 1);
+ bdk_wait_usec(1000); // wait 1 msec
+ } else {
+ // if we are NOT asking for 100MHz, then reset to (assumed) 50MHz and go on
+ DRAM_CSR_MODIFY(c, node, BDK_LMCX_DDR_PLL_CTL(0), c.s.dclk_alt_refclk_sel = 0);
+ bdk_wait_usec(1000); // wait 1 msec
+ }
+ }
+
+ tmp_hertz = measure_octeon_ddr_clock(node,
+ &ddr_configuration[interface_index],
+ cpu_hertz,
+ ddr_hertz,
+ ddr_ref_hertz,
+ interface_index,
+ ddr_config_valid_mask);
+
+ // if we are LMC0 and we are asked for 100 MHz refclk,
+ // we must be sure it is available
+ // If not, we print an error message, set to 50MHz, and go on...
+ if ((interface_index == 0) && (ddr_ref_hertz == 100000000)) {
+ // validate that the clock returned is close enough to the clock desired
+ // FIXME: is 5% close enough?
+ int hertz_diff = _abs((int)tmp_hertz - (int)ddr_hertz);
+ if (hertz_diff > ((int)ddr_hertz * 5 / 100)) { // nope, diff is greater than than 5%
+ ddr_print("N%d: DRAM init: requested 100 MHz refclk NOT FOUND\n", node);
+ ddr_ref_hertz = bdk_clock_get_rate(node, BDK_CLOCK_MAIN_REF);
+ set_ddr_clock_initialized(node, 0, 0); // clear the flag before trying again!!
+ goto try_again;
+ } else {
+ ddr_print("N%d: DRAM Init: requested 100 MHz refclk FOUND and SELECTED.\n", node);
+ }
+ }
+
+ if (tmp_hertz > 0)
+ calc_ddr_hertz = tmp_hertz;
+
+ } /* for (interface_index = 0; interface_index < 4; ++interface_index) */
+
+ if (measured_ddr_hertz)
+ *measured_ddr_hertz = calc_ddr_hertz;
+
+ memsize_mbytes = 0;
+ for (interface_index = 0; interface_index < 4; ++interface_index) {
+ if (! (ddr_config_valid_mask & (1 << interface_index))) { // if LMC has no DIMMs found
+ if (ddr_interface_mask & (1 << interface_index)) { // but the LMC is present
+ for (int i = 0; i < DDR_CFG_T_MAX_DIMMS; i++) {
+ // check for slot presence
+ if (validate_dimm(node, &ddr_configuration[interface_index].dimm_config_table[i]) == 0)
+ printf("N%d.LMC%d.DIMM%d: Not Present\n", node, interface_index, i);
+ }
+ error_print("N%d.LMC%d Configuration Completed: 0 MB\n", node, interface_index);
+ }
+ continue;
+ }
+
+ retval = init_octeon_dram_interface(node,
+ &ddr_configuration[interface_index],
+ calc_ddr_hertz, /* Configure using measured value */
+ cpu_hertz,
+ ddr_ref_hertz,
+ board_type,
+ board_rev_maj,
+ board_rev_min,
+ interface_index,
+ ddr_config_valid_mask);
+ if (retval > 0)
+ memsize_mbytes += retval;
+ }
+
+ if (memsize_mbytes == 0)
+ /* All interfaces failed to initialize, so return error */
+ return -1;
+
+ // switch over to DBI mode only for chips that support it, and enabled by envvar
+ if (! CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X)) { // added 81xx and 83xx
+ int do_dbi = 0;
+ if ((s = lookup_env_parameter("ddr_dbi_switchover")) != NULL) {
+ do_dbi = !!strtoul(s, NULL, 10);
+ }
+ if (do_dbi) {
+ ddr_print("DBI Switchover starting...\n");
+ for (interface_index = 0; interface_index < 4; ++interface_index) {
+ if (! (ddr_config_valid_mask & (1 << interface_index)))
+ continue;
+ dbi_switchover_interface(node, interface_index);
+ }
+ printf("DBI Switchover finished.\n");
+ }
+ }
+
+ // limit memory size if desired...
+ if ((s = lookup_env_parameter("limit_dram_mbytes")) != NULL) {
+ unsigned int mbytes = strtoul(s, NULL, 10);
+ if (mbytes > 0) {
+ memsize_mbytes = mbytes;
+ printf("Limiting DRAM size to %d MBytes based on limit_dram_mbytes env. variable\n",
+ mbytes);
+ }
+ }
+
+ return memsize_mbytes;
+}
+
diff --git a/src/vendorcode/cavium/bdk/libdram/lib_octeon_shared.h b/src/vendorcode/cavium/bdk/libdram/lib_octeon_shared.h
new file mode 100644
index 0000000000..b691e5286b
--- /dev/null
+++ b/src/vendorcode/cavium/bdk/libdram/lib_octeon_shared.h
@@ -0,0 +1,124 @@
+/***********************license start***********************************
+* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights
+* reserved.
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are
+* met:
+*
+* * Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials provided
+* with the distribution.
+*
+* * Neither the name of Cavium Inc. nor the names of
+* its contributors may be used to endorse or promote products
+* derived from this software without specific prior written
+* permission.
+*
+* This Software, including technical data, may be subject to U.S. export
+* control laws, including the U.S. Export Administration Act and its
+* associated regulations, and may be subject to export or import
+* regulations in other countries.
+*
+* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
+* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR
+* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT
+* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY
+* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT
+* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES
+* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR
+* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT,
+* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK
+* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
+***********************license end**************************************/
+
+extern const dimm_odt_config_t disable_odt_config[];
+
+#define rttnom_none 0 /* Rtt_Nom disabled */
+#define rttnom_60ohm 1 /* RZQ/4 = 240/4 = 60 ohms */
+#define rttnom_120ohm 2 /* RZQ/2 = 240/2 = 120 ohms */
+#define rttnom_40ohm 3 /* RZQ/6 = 240/6 = 40 ohms */
+#define rttnom_20ohm 4 /* RZQ/12 = 240/12 = 20 ohms */
+#define rttnom_30ohm 5 /* RZQ/8 = 240/8 = 30 ohms */
+#define rttnom_rsrv1 6 /* Reserved */
+#define rttnom_rsrv2 7 /* Reserved */
+
+#define rttwr_none 0 /* Dynamic ODT off */
+#define rttwr_60ohm 1 /* RZQ/4 = 240/4 = 60 ohms */
+#define rttwr_120ohm 2 /* RZQ/2 = 240/2 = 120 ohms */
+#define rttwr_rsrv1 3 /* Reserved */
+
+#define dic_40ohm 0 /* RZQ/6 = 240/6 = 40 ohms */
+#define dic_34ohm 1 /* RZQ/7 = 240/7 = 34 ohms */
+
+#define driver_24_ohm 1
+#define driver_27_ohm 2
+#define driver_30_ohm 3
+#define driver_34_ohm 4
+#define driver_40_ohm 5
+#define driver_48_ohm 6
+#define driver_60_ohm 7
+
+#define rodt_ctl_none 0
+#define rodt_ctl_20_ohm 1
+#define rodt_ctl_30_ohm 2
+#define rodt_ctl_40_ohm 3
+#define rodt_ctl_60_ohm 4
+#define rodt_ctl_120_ohm 5
+
+#define ddr4_rttnom_none 0 /* Rtt_Nom disabled */
+#define ddr4_rttnom_60ohm 1 /* RZQ/4 = 240/4 = 60 ohms */
+#define ddr4_rttnom_120ohm 2 /* RZQ/2 = 240/2 = 120 ohms */
+#define ddr4_rttnom_40ohm 3 /* RZQ/6 = 240/6 = 40 ohms */
+#define ddr4_rttnom_240ohm 4 /* RZQ/1 = 240/1 = 240 ohms */
+#define ddr4_rttnom_48ohm 5 /* RZQ/5 = 240/5 = 48 ohms */
+#define ddr4_rttnom_80ohm 6 /* RZQ/3 = 240/3 = 80 ohms */
+#define ddr4_rttnom_34ohm 7 /* RZQ/7 = 240/7 = 34 ohms */
+
+#define ddr4_rttwr_none 0 /* Dynamic ODT off */
+#define ddr4_rttwr_120ohm 1 /* RZQ/2 = 240/2 = 120 ohms */
+#define ddr4_rttwr_240ohm 2 /* RZQ/1 = 240/1 = 240 ohms */
+#define ddr4_rttwr_HiZ 3 /* HiZ */
+/* This setting will be available for cn78xx cn88xx pass 2 and cn73xx
+ pass 1. It is disabled for now. */
+//#define ddr4_rttwr_80ohm 4 /* RZQ/3 = 240/3 = 80 ohms */
+
+#define ddr4_dic_34ohm 0 /* RZQ/7 = 240/7 = 34 ohms */
+#define ddr4_dic_48ohm 1 /* RZQ/5 = 240/5 = 48 ohms */
+
+#define ddr4_rttpark_none 0 /* Rtt_Park disabled */
+#define ddr4_rttpark_60ohm 1 /* RZQ/4 = 240/4 = 60 ohms */
+#define ddr4_rttpark_120ohm 2 /* RZQ/2 = 240/2 = 120 ohms */
+#define ddr4_rttpark_40ohm 3 /* RZQ/6 = 240/6 = 40 ohms */
+#define ddr4_rttpark_240ohm 4 /* RZQ/1 = 240/1 = 240 ohms */
+#define ddr4_rttpark_48ohm 5 /* RZQ/5 = 240/5 = 48 ohms */
+#define ddr4_rttpark_80ohm 6 /* RZQ/3 = 240/3 = 80 ohms */
+#define ddr4_rttpark_34ohm 7 /* RZQ/7 = 240/7 = 34 ohms */
+
+#define ddr4_driver_26_ohm 2
+#define ddr4_driver_30_ohm 3
+#define ddr4_driver_34_ohm 4
+#define ddr4_driver_40_ohm 5
+#define ddr4_driver_48_ohm 6
+
+#define ddr4_dqx_driver_24_ohm 1
+#define ddr4_dqx_driver_27_ohm 2
+#define ddr4_dqx_driver_30_ohm 3
+#define ddr4_dqx_driver_34_ohm 4
+#define ddr4_dqx_driver_40_ohm 5
+#define ddr4_dqx_driver_48_ohm 6
+#define ddr4_dqx_driver_60_ohm 7
+
+#define ddr4_rodt_ctl_none 0
+#define ddr4_rodt_ctl_40_ohm 1
+#define ddr4_rodt_ctl_60_ohm 2
+#define ddr4_rodt_ctl_80_ohm 3
+#define ddr4_rodt_ctl_120_ohm 4
+#define ddr4_rodt_ctl_240_ohm 5
+#define ddr4_rodt_ctl_34_ohm 6
+#define ddr4_rodt_ctl_48_ohm 7
diff --git a/src/vendorcode/cavium/bdk/libdram/libdram-config-load.c b/src/vendorcode/cavium/bdk/libdram/libdram-config-load.c
new file mode 100644
index 0000000000..5173290187
--- /dev/null
+++ b/src/vendorcode/cavium/bdk/libdram/libdram-config-load.c
@@ -0,0 +1,262 @@
+/***********************license start***********************************
+* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights
+* reserved.
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are
+* met:
+*
+* * Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials provided
+* with the distribution.
+*
+* * Neither the name of Cavium Inc. nor the names of
+* its contributors may be used to endorse or promote products
+* derived from this software without specific prior written
+* permission.
+*
+* This Software, including technical data, may be subject to U.S. export
+* control laws, including the U.S. Export Administration Act and its
+* associated regulations, and may be subject to export or import
+* regulations in other countries.
+*
+* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
+* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR
+* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT
+* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY
+* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT
+* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES
+* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR
+* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT,
+* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK
+* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
+***********************license end**************************************/
+#include <bdk.h>
+
+/**
+ * Load a "odt_*rank_config" structure
+ *
+ * @param cfg Config to fill
+ * @param ranks Number of ranks we're loading (1,2,4)
+ * @param node Node we're loading for
+ * @param dimm Which DIMM this is for
+ * @param lmc Which LMC this is for
+ */
+static void load_rank_data(dram_config_t *cfg, int ranks, int num_dimms, int lmc, bdk_node_t node)
+{
+ /* Get a pointer to the structure we are filling */
+ dimm_odt_config_t *c;
+ switch (ranks)
+ {
+ case 1:
+ c = &cfg->config[lmc].odt_1rank_config[num_dimms - 1];
+ break;
+ case 2:
+ c = &cfg->config[lmc].odt_2rank_config[num_dimms - 1];
+ break;
+ case 4:
+ c = &cfg->config[lmc].odt_4rank_config[num_dimms - 1];
+ break;
+ default:
+ bdk_fatal("Unexpected number of ranks\n");
+ break;
+ }
+
+ /* Fill the global items */
+ c->odt_ena = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_DQX_CTL, ranks, num_dimms, lmc, node);
+ c->odt_mask = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_WODT_MASK, ranks, num_dimms, lmc, node);
+
+ /* Fill the per rank items */
+ int rank = 0;
+ c->odt_mask1.s.pasr_00 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_PASR, ranks, num_dimms, rank, lmc, node);
+ c->odt_mask1.s.asr_00 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_ASR, ranks, num_dimms, rank, lmc, node);
+ c->odt_mask1.s.srt_00 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_SRT, ranks, num_dimms, rank, lmc, node);
+ c->odt_mask1.s.rtt_wr_00 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_RTT_WR, ranks, num_dimms, rank, lmc, node);
+ c->odt_mask1.s.rtt_wr_00_ext = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_RTT_WR, ranks, num_dimms, rank, lmc, node) >> 2;
+ c->odt_mask1.s.dic_00 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_DIC, ranks, num_dimms, rank, lmc, node);
+ c->odt_mask1.s.rtt_nom_00 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_RTT_NOM, ranks, num_dimms, rank, lmc, node);
+ c->odt_mask1.s.db_output_impedance = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_DB_OUTPUT_IMPEDANCE, ranks, num_dimms, lmc, node);
+ rank = 1;
+ c->odt_mask1.s.pasr_01 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_PASR, ranks, num_dimms, rank, lmc, node);
+ c->odt_mask1.s.asr_01 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_ASR, ranks, num_dimms, rank, lmc, node);
+ c->odt_mask1.s.srt_01 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_SRT, ranks, num_dimms, rank, lmc, node);
+ c->odt_mask1.s.rtt_wr_01 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_RTT_WR, ranks, num_dimms, rank, lmc, node);
+ c->odt_mask1.s.rtt_wr_01_ext = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_RTT_WR, ranks, num_dimms, rank, lmc, node) >> 2;
+ c->odt_mask1.s.dic_01 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_DIC, ranks, num_dimms, rank, lmc, node);
+ c->odt_mask1.s.rtt_nom_01 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_RTT_NOM, ranks, num_dimms, rank, lmc, node);
+ rank = 2;
+ c->odt_mask1.s.pasr_10 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_PASR, ranks, num_dimms, rank, lmc, node);
+ c->odt_mask1.s.asr_10 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_ASR, ranks, num_dimms, rank, lmc, node);
+ c->odt_mask1.s.srt_10 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_SRT, ranks, num_dimms, rank, lmc, node);
+ c->odt_mask1.s.rtt_wr_10 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_RTT_WR, ranks, num_dimms, rank, lmc, node);
+ c->odt_mask1.s.rtt_wr_10_ext = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_RTT_WR, ranks, num_dimms, rank, lmc, node) >> 2;
+ c->odt_mask1.s.dic_10 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_DIC, ranks, num_dimms, rank, lmc, node);
+ c->odt_mask1.s.rtt_nom_10 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_RTT_NOM, ranks, num_dimms, rank, lmc, node);
+ rank = 3;
+ c->odt_mask1.s.pasr_11 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_PASR, ranks, num_dimms, rank, lmc, node);
+ c->odt_mask1.s.asr_11 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_ASR, ranks, num_dimms, rank, lmc, node);
+ c->odt_mask1.s.srt_11 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_SRT, ranks, num_dimms, rank, lmc, node);
+ c->odt_mask1.s.rtt_wr_11 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_RTT_WR, ranks, num_dimms, rank, lmc, node);
+ c->odt_mask1.s.rtt_wr_11_ext = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_RTT_WR, ranks, num_dimms, rank, lmc, node) >> 2;
+ c->odt_mask1.s.dic_11 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_DIC, ranks, num_dimms, rank, lmc, node);
+ c->odt_mask1.s.rtt_nom_11 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE1_RTT_NOM, ranks, num_dimms, rank, lmc, node);
+ rank = 0;
+ c->odt_mask2.s.rtt_park_00 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE2_RTT_PARK, ranks, num_dimms, rank, lmc, node);
+ c->odt_mask2.s.vref_value_00 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE2_VREF_VALUE, ranks, num_dimms, rank, lmc, node);
+ c->odt_mask2.s.vref_range_00 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE2_VREF_RANGE, ranks, num_dimms, rank, lmc, node);
+ c->odt_mask2.s.vrefdq_train_en = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE2_VREFDQ_TRAIN_EN, ranks, num_dimms, lmc, node);
+ rank = 1;
+ c->odt_mask2.s.rtt_park_01 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE2_RTT_PARK, ranks, num_dimms, rank, lmc, node);
+ c->odt_mask2.s.vref_value_01 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE2_VREF_VALUE, ranks, num_dimms, rank, lmc, node);
+ c->odt_mask2.s.vref_range_01 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE2_VREF_RANGE, ranks, num_dimms, rank, lmc, node);
+ rank = 2;
+ c->odt_mask2.s.rtt_park_10 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE2_RTT_PARK, ranks, num_dimms, rank, lmc, node);
+ c->odt_mask2.s.vref_value_10 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE2_VREF_VALUE, ranks, num_dimms, rank, lmc, node);
+ c->odt_mask2.s.vref_range_10 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE2_VREF_RANGE, ranks, num_dimms, rank, lmc, node);
+ rank = 3;
+ c->odt_mask2.s.rtt_park_11 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE2_RTT_PARK, ranks, num_dimms, rank, lmc, node);
+ c->odt_mask2.s.vref_value_11 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE2_VREF_VALUE, ranks, num_dimms, rank, lmc, node);
+ c->odt_mask2.s.vref_range_11 = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_MODE2_VREF_RANGE, ranks, num_dimms, rank, lmc, node);
+
+ /* Fill more global items */
+ c->qs_dic = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_RODT_CTL, ranks, num_dimms, lmc, node);
+ c->rodt_ctl = bdk_config_get_int(BDK_CONFIG_DDR_RANKS_RODT_MASK, ranks, num_dimms, lmc, node);
+}
+
+/**
+ * Load a DRAM configuration based on the current bdk-config settings
+ *
+ * @param node Node the DRAM config is for
+ *
+ * @return Pointer to __libdram_global_cfg, a global structure. Returns NULL if bdk-config
+ * lacks information about DRAM.
+ */
+const dram_config_t *libdram_config_load(bdk_node_t node)
+{
+ dram_config_t *cfg = &__libdram_global_cfg;
+ const int MAX_LMCS = sizeof(cfg->config) / sizeof(cfg->config[0]);
+
+ /* Make all fields for the node default to zero */
+ memset(cfg, 0, sizeof(*cfg));
+
+ /* Fill the SPD data first as some parameters need to know the DRAM type
+ to lookup the correct values */
+ for (int lmc = 0; lmc < MAX_LMCS; lmc++)
+ {
+ for (int dimm = 0; dimm < DDR_CFG_T_MAX_DIMMS; dimm++)
+ {
+ int spd_addr = bdk_config_get_int(BDK_CONFIG_DDR_SPD_ADDR, dimm, lmc, node);
+ if (spd_addr)
+ {
+ cfg->config[lmc].dimm_config_table[dimm].spd_addr = spd_addr;
+ }
+ else
+ {
+ int spd_size;
+ const void *spd_data = bdk_config_get_blob(&spd_size, BDK_CONFIG_DDR_SPD_DATA, dimm, lmc, node);
+ if (spd_data && spd_size)
+ cfg->config[lmc].dimm_config_table[dimm].spd_ptr = spd_data;
+ }
+ }
+ }
+
+ /* Check that we know how to get DIMM inofmration. If not, return failure */
+ if (!cfg->config[0].dimm_config_table[0].spd_addr && !cfg->config[0].dimm_config_table[0].spd_ptr)
+ return NULL;
+
+ cfg->name = "Loaded from bdk-config";
+ for (int lmc = 0; lmc < MAX_LMCS; lmc++)
+ {
+ for (int num_dimms = 1; num_dimms <= DDR_CFG_T_MAX_DIMMS; num_dimms++)
+ {
+ load_rank_data(cfg, 1, num_dimms, lmc, node);
+ load_rank_data(cfg, 2, num_dimms, lmc, node);
+ load_rank_data(cfg, 4, num_dimms, lmc, node);
+ }
+
+ ddr_configuration_t *c = &cfg->config[lmc];
+ ddr3_custom_config_t *custom = &c->custom_lmc_config;
+ custom->min_rtt_nom_idx = bdk_config_get_int(BDK_CONFIG_DDR_CUSTOM_MIN_RTT_NOM_IDX, lmc, node);
+ custom->max_rtt_nom_idx = bdk_config_get_int(BDK_CONFIG_DDR_CUSTOM_MAX_RTT_NOM_IDX, lmc, node);
+ custom->min_rodt_ctl = bdk_config_get_int(BDK_CONFIG_DDR_CUSTOM_MIN_RODT_CTL, lmc, node);
+ custom->max_rodt_ctl = bdk_config_get_int(BDK_CONFIG_DDR_CUSTOM_MAX_RODT_CTL, lmc, node);
+ custom->ck_ctl = bdk_config_get_int(BDK_CONFIG_DDR_CUSTOM_CK_CTL, lmc, node);
+ custom->cmd_ctl = bdk_config_get_int(BDK_CONFIG_DDR_CUSTOM_CMD_CTL, lmc, node);
+ custom->ctl_ctl = bdk_config_get_int(BDK_CONFIG_DDR_CUSTOM_CTL_CTL, lmc, node);
+ custom->min_cas_latency = bdk_config_get_int(BDK_CONFIG_DDR_CUSTOM_MIN_CAS_LATENCY, lmc, node);
+ custom->offset_en = bdk_config_get_int(BDK_CONFIG_DDR_CUSTOM_OFFSET_EN, lmc, node);
+ custom->offset_udimm = bdk_config_get_int(BDK_CONFIG_DDR_CUSTOM_OFFSET, "UDIMM", lmc, node);
+ custom->offset_rdimm = bdk_config_get_int(BDK_CONFIG_DDR_CUSTOM_OFFSET, "RDIMM", lmc, node);
+ custom->rlevel_compute = bdk_config_get_int(BDK_CONFIG_DDR_CUSTOM_RLEVEL_COMPUTE, lmc, node);
+ custom->rlevel_comp_offset_udimm = bdk_config_get_int(BDK_CONFIG_DDR_CUSTOM_RLEVEL_COMP_OFFSET, "UDIMM", lmc, node);
+ custom->rlevel_comp_offset_rdimm = bdk_config_get_int(BDK_CONFIG_DDR_CUSTOM_RLEVEL_COMP_OFFSET, "RDIMM", lmc, node);
+ custom->ddr2t_udimm = bdk_config_get_int(BDK_CONFIG_DDR_CUSTOM_DDR2T, "UDIMM", lmc, node);
+ custom->ddr2t_rdimm = bdk_config_get_int(BDK_CONFIG_DDR_CUSTOM_DDR2T, "RDIMM", lmc, node);
+ custom->disable_sequential_delay_check = bdk_config_get_int(BDK_CONFIG_DDR_CUSTOM_DISABLE_SEQUENTIAL_DELAY_CHECK, lmc, node);
+ custom->maximum_adjacent_rlevel_delay_increment
+ = bdk_config_get_int(BDK_CONFIG_DDR_CUSTOM_MAXIMUM_ADJACENT_RLEVEL_DELAY_INCREMENT, lmc, node);
+ custom->parity = bdk_config_get_int(BDK_CONFIG_DDR_CUSTOM_PARITY, lmc, node);
+ custom->fprch2 = bdk_config_get_int(BDK_CONFIG_DDR_CUSTOM_FPRCH2, lmc, node);
+ custom->mode32b = bdk_config_get_int(BDK_CONFIG_DDR_CUSTOM_MODE32B, lmc, node);
+ custom->measured_vref = bdk_config_get_int(BDK_CONFIG_DDR_CUSTOM_MEASURED_VREF, lmc, node);
+
+ /* CN80XX only supports 32bit mode */
+ if (cavium_is_altpkg(CAVIUM_CN81XX))
+ custom->mode32b = 1;
+
+ /* Loop through 8 bytes, plus ecc byte */
+ #define NUM_BYTES 9 /* Max bytes on LMC (8 plus ECC) */
+ static int8_t dll_write_offset[NUM_BYTES];
+ static int8_t dll_read_offset[NUM_BYTES];
+ for (int b = 0; b < NUM_BYTES; b++)
+ {
+ dll_write_offset[b] = bdk_config_get_int(BDK_CONFIG_DDR_CUSTOM_DLL_WRITE_OFFSET, b, lmc, node);
+ dll_read_offset[b] = bdk_config_get_int(BDK_CONFIG_DDR_CUSTOM_DLL_READ_OFFSET, b, lmc, node);
+ }
+ custom->dll_write_offset = dll_write_offset;
+ custom->dll_read_offset = dll_read_offset;
+ }
+
+ int is_ddr4 = (cfg->config[0].odt_1rank_config[0].odt_mask2.u != 0);
+ int speed = bdk_config_get_int(BDK_CONFIG_DDR_SPEED, node);
+ switch (speed)
+ {
+ case 0: // AUTO
+ cfg->ddr_clock_hertz = 0;
+ break;
+ case 800:
+ case 1600:
+ case 2400:
+ cfg->ddr_clock_hertz = (uint64_t)speed * 1000000 / 2;
+ break;
+ case 666:
+ cfg->ddr_clock_hertz = 333333333;
+ break;
+ case 1066:
+ cfg->ddr_clock_hertz = 533333333;
+ break;
+ case 1333:
+ cfg->ddr_clock_hertz = 666666666;
+ break;
+ case 1866:
+ if (is_ddr4)
+ cfg->ddr_clock_hertz = 940000000;
+ else
+ cfg->ddr_clock_hertz = 933333333;
+ break;
+ case 2133:
+ cfg->ddr_clock_hertz = 1050000000;
+ break;
+ default:
+ bdk_warn("Unsupported DRAM speed of %d MT/s\n", speed);
+ cfg->ddr_clock_hertz = speed * 1000000 / 2;
+ break;
+ }
+
+ return cfg;
+};
diff --git a/src/vendorcode/cavium/bdk/libdram/libdram.c b/src/vendorcode/cavium/bdk/libdram/libdram.c
new file mode 100644
index 0000000000..b19486694c
--- /dev/null
+++ b/src/vendorcode/cavium/bdk/libdram/libdram.c
@@ -0,0 +1,718 @@
+/***********************license start***********************************
+* Copyright (c) 2003-2017 Cavium Inc. (support@cavium.com). All rights
+* reserved.
+*
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are
+* met:
+*
+* * Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials provided
+* with the distribution.
+*
+* * Neither the name of Cavium Inc. nor the names of
+* its contributors may be used to endorse or promote products
+* derived from this software without specific prior written
+* permission.
+*
+* This Software, including technical data, may be subject to U.S. export
+* control laws, including the U.S. Export Administration Act and its
+* associated regulations, and may be subject to export or import
+* regulations in other countries.
+*
+* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
+* AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR
+* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT
+* TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY
+* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT
+* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES
+* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR
+* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT,
+* QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK
+* ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
+***********************license end**************************************/
+#include <bdk.h>
+#include "libbdk-arch/bdk-csrs-mio_fus.h"
+#include "dram-internal.h"
+
+/* This global variable is accessed through dram_is_verbose() to determine
+ the verbosity level. Use that function instead of setting it directly */
+dram_verbosity_t dram_verbosity = VBL_OFF; /* init this here so we could set a non-zero default */
+
+static uint32_t measured_ddr_hertz[BDK_NUMA_MAX_NODES];
+
+/* The various DRAM configs in the libdram/configs directory need space
+ to store the DRAM config. Since only one config is ever in active use
+ at a time, store the configs in __libdram_global_cfg. In a multi-node
+ setup, independent calls to get the DRAM config will load first node 0's
+ config, then node 1's */
+dram_config_t __libdram_global_cfg;
+
+static void bdk_dram_clear_mem(bdk_node_t node)
+{
+ if (!bdk_is_platform(BDK_PLATFORM_ASIM)) {
+ uint64_t mbytes = bdk_dram_get_size_mbytes(node);
+ uint64_t skip = (node == bdk_numa_master()) ? bdk_dram_get_top_of_bdk() : 0;
+ uint64_t len = (mbytes << 20) - skip;
+
+ BDK_TRACE(DRAM, "N%d: Clearing DRAM\n", node);
+ if (skip)
+ {
+ /* All memory below skip may contain valid data, so we can't clear
+ it. We still need to make sure all cache lines in this area are
+ fully dirty so that ECC bits will be updated on store. A single
+ write to the cache line isn't good enough because partial LMC
+ writes may be enabled */
+ ddr_print("N%d: Rewriting DRAM: start 0 length 0x%lx\n", node, skip);
+ volatile uint64_t *ptr = bdk_phys_to_ptr(bdk_numa_get_address(node, 8));
+ /* The above pointer got address 8 to avoid NULL pointer checking
+ in bdk_phys_to_ptr(). Correct it here */
+ ptr--;
+ uint64_t *end = bdk_phys_to_ptr(bdk_numa_get_address(node, skip));
+ while (ptr < end)
+ {
+ *ptr = *ptr;
+ ptr++;
+ }
+ }
+ ddr_print("N%d: Clearing DRAM: start 0x%lx length 0x%lx\n", node, skip, len);
+ bdk_zero_memory(bdk_phys_to_ptr(bdk_numa_get_address(node, skip)), len);
+ BDK_TRACE(DRAM, "N%d: DRAM clear complete\n", node);
+ }
+}
+
+static void bdk_dram_clear_ecc(bdk_node_t node)
+{
+ /* Clear any DRAM errors set during init */
+ BDK_TRACE(DRAM, "N%d: Clearing LMC ECC errors\n", node);
+ int num_lmc = __bdk_dram_get_num_lmc(node);
+ for (int lmc = 0; lmc < num_lmc; lmc++) {
+ DRAM_CSR_WRITE(node, BDK_LMCX_INT(lmc), BDK_CSR_READ(node, BDK_LMCX_INT(lmc)));
+ }
+}
+
+static void bdk_dram_enable_ecc_reporting(bdk_node_t node)
+{
+ /* Enable LMC ECC error HW reporting */
+ int num_lmc = __bdk_dram_get_num_lmc(node);
+
+ BDK_TRACE(DRAM, "N%d: Enable LMC ECC error reporting\n", node);
+
+ for (int lmc = 0; lmc < num_lmc; lmc++) {
+
+ // NOTE: this must be done for pass 2.x
+ // enable ECC interrupts to allow ECC error info in LMCX_INT
+ if (! CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X)) { // added 81xx and 83xx
+ DRAM_CSR_WRITE(node, BDK_LMCX_INT_ENA_W1S(lmc), -1ULL);
+ BDK_CSR_INIT(lmc_int_ena_w1s, node, BDK_LMCX_INT_ENA_W1S(lmc));
+ ddr_print("N%d.LMC%d: %-36s : 0x%08lx\n",
+ node, lmc, "LMC_INT_ENA_W1S", lmc_int_ena_w1s.u);
+ }
+ }
+}
+
+static void bdk_dram_disable_ecc_reporting(bdk_node_t node)
+{
+ /* Disable LMC ECC error HW reporting */
+ int num_lmc = __bdk_dram_get_num_lmc(node);
+
+ BDK_TRACE(DRAM, "N%d: Disable LMC ECC error reporting\n", node);
+
+ for (int lmc = 0; lmc < num_lmc; lmc++) {
+
+ // NOTE: this must be done for pass 2.x
+ // disable ECC interrupts to prevent ECC error info in LMCX_INT
+ if (! CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X)) { // added 81xx and 83xx
+ DRAM_CSR_WRITE(node, BDK_LMCX_INT_ENA_W1C(lmc), -1ULL);
+ BDK_CSR_INIT(lmc_int_ena_w1c, node, BDK_LMCX_INT_ENA_W1C(lmc));
+ ddr_print("N%d.LMC%d: %-36s : 0x%08lx\n",
+ node, lmc, "LMC_INT_ENA_W1C", lmc_int_ena_w1c.u);
+ }
+ }
+}
+
+// this routine simply makes the calls to the tuning routines and returns any errors
+static int bdk_libdram_tune_node(int node)
+{
+ int errs, tot_errs;
+ int do_dllro_hw = 0; // default to NO
+ int do_dllwo = 0; // default to NO
+ int do_eccdll = 0; // default to NO
+ const char *str;
+ BDK_CSR_INIT(lmc_config, node, BDK_LMCX_CONFIG(0)); // FIXME: probe LMC0
+ do_eccdll = (lmc_config.s.ecc_ena != 0); // change to ON if ECC enabled
+
+ // FIXME!!! make 81xx always use HW-assist tuning
+ if (CAVIUM_IS_MODEL(CAVIUM_CN81XX))
+ do_dllro_hw = 1;
+
+ // Automatically tune the data byte DLL read offsets
+ // always done by default, but allow use of HW-assist
+ // NOTE: HW-assist will also tune the ECC byte
+ str = getenv("ddr_tune_hw_offsets");
+ if (str)
+ do_dllro_hw = !!strtoul(str, NULL, 0);
+ BDK_TRACE(DRAM, "N%d: Starting DLL Read Offset Tuning for LMCs\n", node);
+ if (!do_dllro_hw || (lmc_config.s.mode32b != 0)) {
+ errs = perform_dll_offset_tuning(node, 2, /* tune */1);
+ } else {
+ errs = perform_HW_dll_offset_tuning(node, /* read */2, 0x0A/* all bytelanes */);
+ }
+ BDK_TRACE(DRAM, "N%d: Finished DLL Read Offset Tuning for LMCs, %d errors)\n",
+ node, errs);
+ tot_errs = errs;
+
+ // disabled by default for now, does not seem to be needed?
+ // Automatically tune the data byte DLL write offsets
+ // allow override of default setting
+ str = getenv("ddr_tune_write_offsets");
+ if (str)
+ do_dllwo = !!strtoul(str, NULL, 0);
+ if (do_dllwo) {
+ BDK_TRACE(DRAM, "N%d: Starting DLL Write Offset Tuning for LMCs\n", node);
+ errs = perform_dll_offset_tuning(node, /* write */1, /* tune */1);
+ BDK_TRACE(DRAM, "N%d: Finished DLL Write Offset Tuning for LMCs, %d errors)\n",
+ node, errs);
+ tot_errs += errs;
+ }
+
+ // disabled by default for now, does not seem to be needed much?
+ // Automatically tune the ECC byte DLL read offsets
+ // FIXME? allow override of the filtering
+ // FIXME? allow programmatic override, not via envvar?
+ str = getenv("ddr_tune_ecc_enable");
+ if (str)
+ do_eccdll = !!strtoul(str, NULL, 10);
+ if (do_eccdll && !do_dllro_hw && (lmc_config.s.mode32b == 0)) { // do not do HW-assist twice for ECC
+ BDK_TRACE(DRAM, "N%d: Starting ECC DLL Read Offset Tuning for LMCs\n", node);
+ errs = perform_HW_dll_offset_tuning(node, 2, 8/* ECC bytelane */);
+ BDK_TRACE(DRAM, "N%d: Finished ECC DLL Read Offset Tuning for LMCs, %d errors\n",
+ node, errs);
+ tot_errs += errs;
+ }
+
+ return tot_errs;
+}
+
+// this routine makes the calls to the tuning routines when criteria are met
+// intended to be called for automated tuning, to apply filtering...
+
+#define IS_DDR4 1
+#define IS_DDR3 0
+#define IS_RDIMM 1
+#define IS_UDIMM 0
+#define IS_1SLOT 1
+#define IS_2SLOT 0
+
+// FIXME: DDR3 is not tuned
+static const uint32_t ddr_speed_filter[2][2][2] = {
+ [IS_DDR4] = {
+ [IS_RDIMM] = {
+ [IS_1SLOT] = 940,
+ [IS_2SLOT] = 800
+ },
+ [IS_UDIMM] = {
+ [IS_1SLOT] = 1050,
+ [IS_2SLOT] = 940
+ },
+ },
+ [IS_DDR3] = {
+ [IS_RDIMM] = {
+ [IS_1SLOT] = 0, // disabled
+ [IS_2SLOT] = 0 // disabled
+ },
+ [IS_UDIMM] = {
+ [IS_1SLOT] = 0, // disabled
+ [IS_2SLOT] = 0 // disabled
+ }
+ }
+};
+
+static int bdk_libdram_maybe_tune_node(int node)
+{
+ const char *str;
+
+ // FIXME: allow an override here so that all configs can be tuned or none
+ // If the envvar is defined, always either force it or avoid it accordingly
+ if ((str = getenv("ddr_tune_all_configs")) != NULL) {
+ int tune_it = !!strtoul(str, NULL, 0);
+ printf("N%d: DRAM auto-tuning %s.\n", node, (tune_it) ? "forced" : "avoided");
+ return (tune_it) ? bdk_libdram_tune_node(node) : 0;
+ }
+
+ // filter the tuning calls here...
+ // determine if we should/can run automatically for this configuration
+ //
+ // FIXME: tune only when the configuration indicates it will help:
+ // DDR type, RDIMM or UDIMM, 1-slot or 2-slot, and speed
+ //
+ uint32_t ddr_speed = divide_nint(libdram_get_freq_from_pll(node, 0), 1000000); // sample LMC0
+ BDK_CSR_INIT(lmc_config, node, BDK_LMCX_CONFIG(0)); // sample LMC0
+
+ int is_ddr4 = !!__bdk_dram_is_ddr4(node, 0);
+ int is_rdimm = !!__bdk_dram_is_rdimm(node, 0);
+ int is_1slot = !!(lmc_config.s.init_status < 4); // HACK, should do better
+ int do_tune = 0;
+
+ uint32_t ddr_min_speed = ddr_speed_filter[is_ddr4][is_rdimm][is_1slot];
+ do_tune = (ddr_min_speed && (ddr_speed > ddr_min_speed));
+
+ ddr_print("N%d: DDR%d %cDIMM %d-slot at %d MHz %s eligible for auto-tuning.\n",
+ node, (is_ddr4)?4:3, (is_rdimm)?'R':'U', (is_1slot)?1:2,
+ ddr_speed, (do_tune)?"is":"is not");
+
+ // call the tuning routines, done filtering...
+ return ((do_tune) ? bdk_libdram_tune_node(node) : 0);
+}
+
+/**
+ * This is the main DRAM init function. Users of libdram should call this function,
+ * avoiding the other internal function. As a rule, functions starting with
+ * "libdram_*" are part of the external API and should be used.
+ *
+ * @param node Node to initialize. This may not be the same node as the one running the code
+ * @param dram_config
+ * DRAM configuration to use
+ * @param ddr_clock_override
+ * If non-zeo, this overrides the DRAM clock speed in the config structure. This
+ * allows quickly testing of different DRAM speeds without modifying the basic
+ * config. If zero, the DRAM speed in the config is used.
+ *
+ * @return Amount of memory in MB. Zero or negative is a failure.
+ */
+int libdram_config(int node, const dram_config_t *dram_config, int ddr_clock_override)
+{
+ if (bdk_is_platform(BDK_PLATFORM_ASIM))
+ return bdk_dram_get_size_mbytes(node);
+
+ /* Boards may need to mux the TWSI connection between THUNDERX and the BMC.
+ This allows the BMC to monitor DIMM temeratures and health */
+ int gpio_select = bdk_config_get_int(BDK_CONFIG_DRAM_CONFIG_GPIO);
+ if (gpio_select != -1)
+ bdk_gpio_initialize(bdk_numa_master(), gpio_select, 1, 1);
+
+ /* Read all the SPDs and store them in the device tree. They are needed by
+ later software to populate SMBIOS information */
+ for (int lmc = 0; lmc < 4; lmc++)
+ for (int dimm = 0; dimm < DDR_CFG_T_MAX_DIMMS; dimm++)
+ read_entire_spd(node, (dram_config_t *)dram_config, lmc, dimm);
+
+ const ddr_configuration_t *ddr_config = dram_config->config;
+ int ddr_clock_hertz = (ddr_clock_override) ? ddr_clock_override : dram_config->ddr_clock_hertz;
+ if (ddr_clock_hertz == 0) // 0 == AUTO
+ {
+ ddr_clock_hertz = dram_get_default_spd_speed(node, ddr_config);
+ if (ddr_clock_hertz < 0) {
+ printf("N%d: DRAM init: AUTO clock ILLEGAL configuration\n", node);
+ return -1;
+ }
+ }
+ int errs;
+
+ // At this point, we only know the desired clock rate (ddr_clock_hertz).
+ // We do not know whether we are configuring RDIMMs.
+ // We also do not yet know if 100MHz alternate refclk is actually available.
+ // so, if we are being asked for 2133MT/s or better, we still need to do:
+ // 1. probe for RDIMMs (if not, 50MHz refclk is good enough)
+ // 2. determine if 100MHz refclk is there, and switch to it before starting any configuration
+ //
+ // NOTES:
+ // 1. dclk_alt_refclk_sel need only be set on LMC0 (see above disabled code)
+ // 2. I think we need to first probe to see if we need it, and configure it then if dictated use
+ // 3. then go on to configure at the selected refclk
+ int ddr_refclk_hertz = bdk_clock_get_rate(node, BDK_CLOCK_MAIN_REF);
+ int alt_refclk = bdk_config_get_int(BDK_CONFIG_DDR_ALT_REFCLK, node);
+
+ char *str = getenv("ddr_100mhz_refclk");
+ if (str) { // if the envvar was found, force it to that setting
+ int do_100mhz = !!strtoul(str, NULL, 0);
+ alt_refclk = (do_100mhz) ? 100 : 50;
+ }
+
+ dram_verbosity = bdk_config_get_int(BDK_CONFIG_DRAM_VERBOSE);
+
+ // Here we check for fuses that limit the number of LMCs we can configure,
+ // but only on 83XX and 88XX...
+ int lmc_limit = 4;
+ if (CAVIUM_IS_MODEL(CAVIUM_CN88XX) || CAVIUM_IS_MODEL(CAVIUM_CN83XX)) {
+ BDK_CSR_INIT(mio_fus_dat2, node, BDK_MIO_FUS_DAT2);
+ if (mio_fus_dat2.s.lmc_half) {
+ lmc_limit = (CAVIUM_IS_MODEL(CAVIUM_CN88XX)) ? 2 : 1; // limit LMCs to half present
+ error_print("Only %d LMC(s)s supported for this Thunder model\n", lmc_limit);
+ }
+ }
+
+ /* We need to calculate the interface mask based on the provided SPD
+ addresses/contents */
+ uint32_t interface_mask = 0;
+ for (int i = 0; i < lmc_limit; i++)
+ {
+ // We need to check only DIMM 0 of each LMC for possible presence of the LMC.
+ // This trusts that the board database is correctly configured.
+ // Empty DIMM slots in present LMCs will be detected later.
+ if (ddr_config[i].dimm_config_table[0].spd_addr ||
+ ddr_config[i].dimm_config_table[0].spd_ptr)
+ interface_mask |= 1 << i;
+
+ // we know whether alternate refclk is always wanted
+ // we also know already if we want 2133 MT/s
+ // if alt refclk not always wanted, then probe DDR and DIMM type
+ // if DDR4 and RDIMMs, then set desired refclk to 100MHz, otherwise to default (50MHz)
+ // depend on ddr_initialize() to do the refclk selection and validation
+ if (i == 0) { // only check for LMC0
+ if (alt_refclk) { // if alternate refclk was specified, let it override everything
+ ddr_refclk_hertz = alt_refclk * 1000000;
+ ddr_print("N%d: DRAM init: %d MHz refclk is REQUESTED ALWAYS\n", node, alt_refclk);
+ } else if (ddr_clock_hertz > 1000000000) { // if more than 2000 MT/s
+ int ddr_type = get_ddr_type(node, &ddr_config[0].dimm_config_table[0]);
+ int spd_dimm_type = get_dimm_module_type(node, &ddr_config[0].dimm_config_table[0], ddr_type);
+ // is DDR4 and RDIMM just to be sure
+ if ((ddr_type == DDR4_DRAM) &&
+ ((spd_dimm_type == 1) || (spd_dimm_type == 5) || (spd_dimm_type == 8))) {
+ ddr_refclk_hertz = 100000000; // yes, we require 100MHz refclk, so set it
+ ddr_print("N%d: DRAM init: 100 MHz refclk is REQUIRED\n", node);
+ }
+ } // if (ddr_clock_hertz > 1000000000)
+ } // if (i == 0)
+ }
+
+ BDK_TRACE(DRAM, "N%d: DRAM init started (hertz=%d, refclk=%d, config=%p)\n",
+ node, ddr_clock_hertz, ddr_refclk_hertz, dram_config);
+ debug_print("N%d: DRAM init started (hertz=%d, refclk=%d, config=%p)\n",
+ node, ddr_clock_hertz, ddr_refclk_hertz, dram_config);
+
+ BDK_TRACE(DRAM, "N%d: Calling DRAM init\n", node);
+ measured_ddr_hertz[node] = 0;
+ int mbytes = octeon_ddr_initialize(node,
+ bdk_clock_get_rate(node, BDK_CLOCK_RCLK),
+ ddr_clock_hertz,
+ ddr_refclk_hertz,
+ interface_mask,
+ ddr_config,
+ &measured_ddr_hertz[node],
+ 0,
+ 0,
+ 0);
+ BDK_TRACE(DRAM, "N%d: DRAM init returned %d, measured %u Hz\n",
+ node, mbytes, measured_ddr_hertz[node]);
+
+ // do not tune or mess with memory if there was an init problem...
+ if (mbytes > 0) {
+
+ bdk_dram_disable_ecc_reporting(node);
+
+ // call the tuning routines, with filtering...
+ BDK_TRACE(DRAM, "N%d: Calling DRAM tuning\n", node);
+ errs = bdk_libdram_maybe_tune_node(node);
+ BDK_TRACE(DRAM, "N%d: DRAM tuning returned %d errors\n",
+ node, errs);
+
+ // finally, clear memory and any left-over ECC errors
+ bdk_dram_clear_mem(node);
+ bdk_dram_clear_ecc(node);
+
+ bdk_dram_enable_ecc_reporting(node);
+ }
+
+ /* Boards may need to mux the TWSI connection between THUNDERX and the BMC.
+ This allows the BMC to monitor DIMM temeratures and health */
+ if (gpio_select != -1)
+ bdk_gpio_initialize(bdk_numa_master(), gpio_select, 1, 0);
+
+ return mbytes;
+}
+
+/**
+ * This is the main DRAM tuning function. Users of libdram should call this function,
+ * avoiding the other internal function. As a rule, functions starting with
+ * "libdram_*" are part of the external API and should be used.
+ *
+ * @param node Node to tune. This may not be the same node as the one running the code
+ *
+ * @return Success or Fail
+ */
+int libdram_tune(int node)
+{
+ int tot_errs;
+ int l2c_is_locked = bdk_l2c_is_locked(node);
+
+ dram_verbosity = bdk_config_get_int(BDK_CONFIG_DRAM_VERBOSE);
+
+ // the only way this entry point should be called is from a MENU item,
+ // so, enable any non-running cores on this node, and leave them
+ // running at the end...
+ ddr_print("N%d: %s: Starting cores (mask was 0x%lx)\n",
+ node, __FUNCTION__, bdk_get_running_coremask(node));
+ bdk_init_cores(node, ~0ULL);
+
+ // must test for L2C locked here, cannot go on with it unlocked
+ // FIXME: but we only need to worry about Node 0???
+ if (node == 0) {
+ if (!l2c_is_locked) { // is unlocked, must lock it now
+ ddr_print("N%d: %s: L2C was unlocked - locking it now\n", node, __FUNCTION__);
+ // FIXME: this should be common-ized; it currently matches bdk_init()...
+ bdk_l2c_lock_mem_region(node, 0, bdk_l2c_get_cache_size_bytes(node) * 3 / 4);
+ } else {
+ ddr_print("N%d: %s: L2C was already locked - continuing\n", node, __FUNCTION__);
+ }
+ } else {
+ ddr_print("N%d: %s: non-zero node, not worrying about L2C lock status\n", node, __FUNCTION__);
+ }
+
+ // call the tuning routines, no filtering...
+ tot_errs = bdk_libdram_tune_node(node);
+
+ // FIXME: only for node 0, unlock L2C if it was unlocked before...
+ if (node == 0) {
+ if (!l2c_is_locked) { // it was Node 0 and unlocked, must re-unlock it now
+ ddr_print("N%d: Node 0 L2C was unlocked before - unlocking it now\n", node);
+ // FIXME: this should be common-ized; it currently matches bdk_init()...
+ bdk_l2c_unlock_mem_region(node, 0, bdk_l2c_get_cache_size_bytes(node) * 3 / 4);
+ } else {
+ ddr_print("N%d: %s: L2C was already locked - leaving it locked\n", node, __FUNCTION__);
+ }
+ } else {
+ ddr_print("N%d: %s: non-zero node, not worrying about L2C lock status\n", node, __FUNCTION__);
+ }
+
+ // make sure to clear memory and any ECC errs when done...
+ bdk_dram_clear_mem(node);
+ bdk_dram_clear_ecc(node);
+
+ return tot_errs;
+}
+
+/**
+ * This is the main function for DRAM margining of Write Voltage.
+ * Users of libdram should call this function,
+ * avoiding the other internal function. As a rule, functions starting with
+ * "libdram_*" are part of the external API and should be used.
+ *
+ * @param node Node to test. This may not be the same node as the one running the code
+ *
+ * @return Success or Fail
+ */
+static
+int libdram_margin_write_voltage(int node)
+{
+ int tot_errs;
+
+ // call the margining routine
+ tot_errs = perform_margin_write_voltage(node);
+
+ // make sure to clear memory and any ECC errs when done...
+ bdk_dram_clear_mem(node);
+ bdk_dram_clear_ecc(node);
+
+ return tot_errs;
+}
+
+/**
+ * This is the main function for DRAM margining of Read Voltage.
+ * Users of libdram should call this function,
+ * avoiding the other internal function. As a rule, functions starting with
+ * "libdram_*" are part of the external API and should be used.
+ *
+ * @param node Node to test. This may not be the same node as the one running the code
+ *
+ * @return Success or Fail
+ */
+static
+int libdram_margin_read_voltage(int node)
+{
+ int tot_errs;
+
+ // call the margining routine
+ tot_errs = perform_margin_read_voltage(node);
+
+ // make sure to clear memory and any ECC errs when done...
+ bdk_dram_clear_mem(node);
+ bdk_dram_clear_ecc(node);
+
+ return tot_errs;
+}
+
+/**
+ * This is the main function for DRAM margining of Write Timing.
+ * Users of libdram should call this function,
+ * avoiding the other internal function. As a rule, functions starting with
+ * "libdram_*" are part of the external API and should be used.
+ *
+ * @param node Node to test. This may not be the same node as the one running the code
+ *
+ * @return Success or Fail
+ */
+static
+int libdram_margin_write_timing(int node)
+{
+ int tot_errs;
+
+ // call the tuning routine, tell it we are margining not tuning...
+ tot_errs = perform_dll_offset_tuning(node, /* write offsets */1, /* margin */0);
+
+ // make sure to clear memory and any ECC errs when done...
+ bdk_dram_clear_mem(node);
+ bdk_dram_clear_ecc(node);
+
+ return tot_errs;
+}
+
+/**
+ * This is the main function for DRAM margining of Read Timing.
+ * Users of libdram should call this function,
+ * avoiding the other internal function. As a rule, functions starting with
+ * "libdram_*" are part of the external API and should be used.
+ *
+ * @param node Node to test. This may not be the same node as the one running the code
+ *
+ * @return Success or Fail
+ */
+static
+int libdram_margin_read_timing(int node)
+{
+ int tot_errs;
+
+ // call the tuning routine, tell it we are margining not tuning...
+ tot_errs = perform_dll_offset_tuning(node, /* read offsets */2, /* margin */0);
+
+ // make sure to clear memory and any ECC errs when done...
+ bdk_dram_clear_mem(node);
+ bdk_dram_clear_ecc(node);
+
+ return tot_errs;
+}
+
+/**
+ * This is the main function for all DRAM margining.
+ * Users of libdram should call this function,
+ * avoiding the other internal function. As a rule, functions starting with
+ * "libdram_*" are part of the external API and should be used.
+ *
+ * @param node Node to test. This may not be the same node as the one running the code
+ *
+ * @return Success or Fail
+ */
+int libdram_margin(int node)
+{
+ int ret_rt, ret_wt, ret_rv, ret_wv;
+ char *risk[2] = { "Low Risk", "Needs Review" };
+ int l2c_is_locked = bdk_l2c_is_locked(node);
+
+ // for now, no margining on 81xx, until we can reduce the dynamic runtime size...
+ if (CAVIUM_IS_MODEL(CAVIUM_CN81XX)) {
+ printf("Sorry, margining is not available on 81xx yet...\n");
+ return 0;
+ }
+
+ dram_verbosity = bdk_config_get_int(BDK_CONFIG_DRAM_VERBOSE);
+
+ // the only way this entry point should be called is from a MENU item,
+ // so, enable any non-running cores on this node, and leave them
+ // running at the end...
+ ddr_print("N%d: %s: Starting cores (mask was 0x%lx)\n",
+ node, __FUNCTION__, bdk_get_running_coremask(node));
+ bdk_init_cores(node, ~0ULL);
+
+ // must test for L2C locked here, cannot go on with it unlocked
+ // FIXME: but we only need to worry about Node 0???
+ if (node == 0) {
+ if (!l2c_is_locked) { // is unlocked, must lock it now
+ ddr_print("N%d: %s: L2C was unlocked - locking it now\n", node, __FUNCTION__);
+ // FIXME: this should be common-ized; it currently matches bdk_init()...
+ bdk_l2c_lock_mem_region(node, 0, bdk_l2c_get_cache_size_bytes(node) * 3 / 4);
+ } else {
+ ddr_print("N%d: %s: L2C was already locked - continuing\n", node, __FUNCTION__);
+ }
+ } else {
+ ddr_print("N%d: %s: non-zero node, not worrying about L2C lock status\n", node, __FUNCTION__);
+ }
+
+ debug_print("N%d: Starting DRAM Margin ALL\n", node);
+ ret_rt = libdram_margin_read_timing(node);
+ ret_wt = libdram_margin_write_timing(node);
+ ret_rv = libdram_margin_read_voltage(node);
+ ret_wv = libdram_margin_write_voltage(node);
+ debug_print("N%d: DRAM Margin ALL finished\n", node);
+
+ /*
+ >>> Summary from DDR Margining tool:
+ >>> N0: Read Timing Margin : Low Risk
+ >>> N0: Write Timing Margin : Low Risk
+ >>> N0: Read Voltage Margin : Low Risk
+ >>> N0: Write Voltage Margin : Low Risk
+ */
+ printf(" \n");
+ printf("-------------------------------------\n");
+ printf(" \n");
+ printf("Summary from DDR Margining tool\n");
+ printf("N%d: Read Timing Margin : %s\n", node, risk[!!ret_rt]);
+ printf("N%d: Write Timing Margin : %s\n", node, risk[!!ret_wt]);
+
+ // these may not have been done due to DDR3 and/or THUNDER pass 1.x
+ // FIXME? would it be better to print an appropriate message here?
+ if (ret_rv != -1) printf("N%d: Read Voltage Margin : %s\n", node, risk[!!ret_rv]);
+ if (ret_wv != -1) printf("N%d: Write Voltage Margin : %s\n", node, risk[!!ret_wv]);
+
+ printf(" \n");
+ printf("-------------------------------------\n");
+ printf(" \n");
+
+ // FIXME: only for node 0, unlock L2C if it was unlocked before...
+ if (node == 0) {
+ if (!l2c_is_locked) { // it was Node 0 and unlocked, must re-unlock it now
+ ddr_print("N%d: Node 0 L2C was unlocked before - unlocking it now\n", node);
+ // FIXME: this should be common-ized; it currently matches bdk_init()...
+ bdk_l2c_unlock_mem_region(node, 0, bdk_l2c_get_cache_size_bytes(node) * 3 / 4);
+ } else {
+ ddr_print("N%d: %s: L2C was already locked - leaving it locked\n", node, __FUNCTION__);
+ }
+ } else {
+ ddr_print("N%d: %s: non-zero node, not worrying about L2C lock status\n", node, __FUNCTION__);
+ }
+
+ return 0;
+}
+
+/**
+ * Get the measured DRAM frequency after a call to libdram_config
+ *
+ * @param node Node to get frequency for
+ *
+ * @return Frequency in Hz
+ */
+uint32_t libdram_get_freq(int node)
+{
+ return measured_ddr_hertz[node];
+}
+
+/**
+ * Get the measured DRAM frequency from the DDR_PLL_CTL CSR
+ *
+ * @param node Node to get frequency for
+ *
+ * @return Frequency in Hz
+ */
+uint32_t libdram_get_freq_from_pll(int node, int lmc)
+{
+ static const uint8_t _en[] = {1, 2, 3, 4, 5, 6, 7, 8, 10, 12};
+ BDK_CSR_INIT(c, node, BDK_LMCX_DDR_PLL_CTL(0));
+ // we check the alternate refclk select bit in LMC0 to indicate 100MHz use
+ // assumption: the alternate refclk is setup for 100MHz
+ uint64_t ddr_ref_hertz = (c.s.dclk_alt_refclk_sel) ? 100000000 : bdk_clock_get_rate(node, BDK_CLOCK_MAIN_REF);
+ uint64_t en = _en[c.cn83xx.ddr_ps_en];
+ uint64_t calculated_ddr_hertz = ddr_ref_hertz * (c.cn83xx.clkf + 1) / ((c.cn83xx.clkr + 1) * en);
+ return calculated_ddr_hertz;
+}
+
+#ifndef DRAM_CSR_WRITE_INLINE
+void dram_csr_write(bdk_node_t node, const char *csr_name, bdk_csr_type_t type, int busnum, int size, uint64_t address, uint64_t value)
+{
+ VB_PRT(VBL_CSRS, "N%d: DDR Config %s[%016lx] => %016lx\n", node, csr_name, address, value);
+ bdk_csr_write(node, type, busnum, size, address, value);
+}
+#endif