aboutsummaryrefslogtreecommitdiff
path: root/src/vendorcode/cavium/bdk/libdram/dram-tune-ddr3.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/vendorcode/cavium/bdk/libdram/dram-tune-ddr3.c')
-rw-r--r--src/vendorcode/cavium/bdk/libdram/dram-tune-ddr3.c1365
1 files changed, 437 insertions, 928 deletions
diff --git a/src/vendorcode/cavium/bdk/libdram/dram-tune-ddr3.c b/src/vendorcode/cavium/bdk/libdram/dram-tune-ddr3.c
index e0e9d4442c..2c6a105dae 100644
--- a/src/vendorcode/cavium/bdk/libdram/dram-tune-ddr3.c
+++ b/src/vendorcode/cavium/bdk/libdram/dram-tune-ddr3.c
@@ -39,6 +39,13 @@
#include <bdk.h>
#include "dram-internal.h"
+#include <string.h>
+#include <lame_string.h> /* for strtoul */
+#include <libbdk-hal/bdk-atomic.h>
+#include <libbdk-hal/bdk-clock.h>
+#include <libbdk-hal/bdk-rng.h>
+#include <libbdk-os/bdk-init.h>
+
// if enhanced verbosity levels are defined, use them
#if defined(VB_PRT)
#define ddr_print2(format, ...) VB_PRT(VBL_FAE, format, ##__VA_ARGS__)
@@ -185,14 +192,14 @@ get_speed_bin(bdk_node_t node, int lmc)
// FIXME: is this reasonable speed "binning"?
if (mts_speed >= 1700) {
- if (mts_speed >= 2000)
- ret = 2;
- else
- ret = 1;
+ if (mts_speed >= 2000)
+ ret = 2;
+ else
+ ret = 1;
}
debug_print("N%d.LMC%d: %s: returning bin %d for MTS %d\n",
- node, lmc, __FUNCTION__, ret, mts_speed);
+ node, lmc, __FUNCTION__, ret, mts_speed);
return ret;
}
@@ -261,25 +268,25 @@ int dram_tuning_mem_xor(bdk_node_t node, int lmc, uint64_t p, uint64_t bitmask,
// add this loop to fill memory with the test pattern first
// loops are ordered so that only entire cachelines are written
for (ii = 0; ii < II_MAX; ii += II_INC) { // FIXME? extend the range of memory tested!!
- for (k = 0; k < K_MAX; k += K_INC) {
- for (j = 0; j < J_MAX; j += J_INC) {
- p1 = p + ii + k + j;
- p2 = p1 + p2offset;
- for (i = 0, ix = 0; i < I_MAX; i += I_INC, ix++) {
+ for (k = 0; k < K_MAX; k += K_INC) {
+ for (j = 0; j < J_MAX; j += J_INC) {
+ p1 = p + ii + k + j;
+ p2 = p1 + p2offset;
+ for (i = 0, ix = 0; i < I_MAX; i += I_INC, ix++) {
- v = dram_tune_test_pattern[ix];
- v1 = v; // write the same thing to both areas
+ v = dram_tune_test_pattern[ix];
+ v1 = v; // write the same thing to both areas
- __bdk_dram_write64(p1 + i, v);
- __bdk_dram_write64(p2 + i, v1);
+ __bdk_dram_write64(p1 + i, v);
+ __bdk_dram_write64(p2 + i, v1);
- }
+ }
#if ENABLE_WBIL2
- BDK_CACHE_WBI_L2(p1);
- BDK_CACHE_WBI_L2(p2);
+ BDK_CACHE_WBI_L2(p1);
+ BDK_CACHE_WBI_L2(p2);
#endif
- }
- }
+ }
+ }
} /* for (ii = 0; ii < (1ULL << 31); ii += (1ULL << 29)) */
#endif
@@ -291,12 +298,12 @@ int dram_tuning_mem_xor(bdk_node_t node, int lmc, uint64_t p, uint64_t bitmask,
// loops are ordered so that only a single 64-bit slot is written to each cacheline at one time,
// then the cachelines are forced out; this should maximize read/write traffic
for (ii = 0; ii < II_MAX; ii += II_INC) { // FIXME? extend the range of memory tested!!
- for (k = 0; k < K_MAX; k += K_INC) {
- for (i = 0; i < I_MAX; i += I_INC) {
- for (j = 0; j < J_MAX; j += J_INC) {
+ for (k = 0; k < K_MAX; k += K_INC) {
+ for (i = 0; i < I_MAX; i += I_INC) {
+ for (j = 0; j < J_MAX; j += J_INC) {
- p1 = p + ii + k + j;
- p2 = p1 + p2offset;
+ p1 = p + ii + k + j;
+ p2 = p1 + p2offset;
#if ENABLE_PREFETCH
if (j < (J_MAX - J_INC)) {
@@ -304,20 +311,20 @@ int dram_tuning_mem_xor(bdk_node_t node, int lmc, uint64_t p, uint64_t bitmask,
BDK_PREFETCH(p2 + J_INC, BDK_CACHE_LINE_SIZE);
}
#endif
-
- v = pattern1 * (p1 + i);
- v1 = v; // write the same thing to both areas
- __bdk_dram_write64(p1 + i, v);
- __bdk_dram_write64(p2 + i, v1);
+ v = pattern1 * (p1 + i);
+ v1 = v; // write the same thing to both areas
+
+ __bdk_dram_write64(p1 + i, v);
+ __bdk_dram_write64(p2 + i, v1);
#if ENABLE_WBIL2
- BDK_CACHE_WBI_L2(p1);
- BDK_CACHE_WBI_L2(p2);
+ BDK_CACHE_WBI_L2(p1);
+ BDK_CACHE_WBI_L2(p2);
#endif
- }
- }
- }
+ }
+ }
+ }
} /* for (ii = 0; ii < (1ULL << 31); ii += (1ULL << 29)) */
BDK_DCACHE_INVALIDATE;
@@ -329,24 +336,24 @@ int dram_tuning_mem_xor(bdk_node_t node, int lmc, uint64_t p, uint64_t bitmask,
for (int burst = 0; burst < 1/* was: dram_tune_use_bursts*/; burst++)
{
- uint64_t this_pattern = bdk_rng_get_random64();
- pattern2 ^= this_pattern;
+ uint64_t this_pattern = bdk_rng_get_random64();
+ pattern2 ^= this_pattern;
/* XOR the data with a random value, applying the change to both
* memory areas.
*/
#if ENABLE_PREFETCH
- BDK_PREFETCH(p , BDK_CACHE_LINE_SIZE);
- BDK_PREFETCH(p + p2offset, BDK_CACHE_LINE_SIZE);
+ BDK_PREFETCH(p , BDK_CACHE_LINE_SIZE);
+ BDK_PREFETCH(p + p2offset, BDK_CACHE_LINE_SIZE);
#endif
- for (ii = 0; ii < II_MAX; ii += II_INC) { // FIXME? extend the range of memory tested!!
- for (k = 0; k < K_MAX; k += K_INC) {
- for (i = 0; i < I_MAX; i += I_INC) { // FIXME: rearranged, did not make much difference?
- for (j = 0; j < J_MAX; j += J_INC) {
+ for (ii = 0; ii < II_MAX; ii += II_INC) { // FIXME? extend the range of memory tested!!
+ for (k = 0; k < K_MAX; k += K_INC) {
+ for (i = 0; i < I_MAX; i += I_INC) { // FIXME: rearranged, did not make much difference?
+ for (j = 0; j < J_MAX; j += J_INC) {
- p1 = p + ii + k + j;
- p2 = p1 + p2offset;
+ p1 = p + ii + k + j;
+ p2 = p1 + p2offset;
#if ENABLE_PREFETCH
if (j < (J_MAX - J_INC)) {
@@ -354,26 +361,26 @@ int dram_tuning_mem_xor(bdk_node_t node, int lmc, uint64_t p, uint64_t bitmask,
BDK_PREFETCH(p2 + J_INC, BDK_CACHE_LINE_SIZE);
}
#endif
-
- v = __bdk_dram_read64(p1 + i) ^ this_pattern;
- v1 = __bdk_dram_read64(p2 + i) ^ this_pattern;
+
+ v = __bdk_dram_read64(p1 + i) ^ this_pattern;
+ v1 = __bdk_dram_read64(p2 + i) ^ this_pattern;
#if ENABLE_WBIL2
- BDK_CACHE_INV_L2(p1);
- BDK_CACHE_INV_L2(p2);
+ BDK_CACHE_INV_L2(p1);
+ BDK_CACHE_INV_L2(p2);
#endif
- __bdk_dram_write64(p1 + i, v);
- __bdk_dram_write64(p2 + i, v1);
+ __bdk_dram_write64(p1 + i, v);
+ __bdk_dram_write64(p2 + i, v1);
#if ENABLE_WBIL2
- BDK_CACHE_WBI_L2(p1);
- BDK_CACHE_WBI_L2(p2);
+ BDK_CACHE_WBI_L2(p1);
+ BDK_CACHE_WBI_L2(p2);
#endif
- }
- }
- }
- } /* for (ii = 0; ii < (1ULL << 31); ii += (1ULL << 29)) */
+ }
+ }
+ }
+ } /* for (ii = 0; ii < (1ULL << 31); ii += (1ULL << 29)) */
BDK_DCACHE_INVALIDATE;
@@ -381,8 +388,8 @@ int dram_tuning_mem_xor(bdk_node_t node, int lmc, uint64_t p, uint64_t bitmask,
node, lmc);
#if ENABLE_PREFETCH
- BDK_PREFETCH(p , BDK_CACHE_LINE_SIZE);
- BDK_PREFETCH(p + p2offset, BDK_CACHE_LINE_SIZE);
+ BDK_PREFETCH(p , BDK_CACHE_LINE_SIZE);
+ BDK_PREFETCH(p + p2offset, BDK_CACHE_LINE_SIZE);
#endif
/* Look for differences in the areas. If there is a mismatch, reset
@@ -390,18 +397,18 @@ int dram_tuning_mem_xor(bdk_node_t node, int lmc, uint64_t p, uint64_t bitmask,
* means that on all subsequent passes the pair of locations remain
* out of sync giving spurious errors.
*/
- // FIXME: change the loop order so that an entire cache line is compared at one time
- // FIXME: this is so that a read error that occurs *anywhere* on the cacheline will be caught,
- // FIXME: rather than comparing only 1 cacheline slot at a time, where an error on a different
- // FIXME: slot will be missed that time around
- // Does the above make sense?
+ // FIXME: change the loop order so that an entire cache line is compared at one time
+ // FIXME: this is so that a read error that occurs *anywhere* on the cacheline will be caught,
+ // FIXME: rather than comparing only 1 cacheline slot at a time, where an error on a different
+ // FIXME: slot will be missed that time around
+ // Does the above make sense?
- for (ii = 0; ii < II_MAX; ii += II_INC) { // FIXME? extend the range of memory tested!!
- for (k = 0; k < K_MAX; k += K_INC) {
- for (j = 0; j < J_MAX; j += J_INC) {
+ for (ii = 0; ii < II_MAX; ii += II_INC) { // FIXME? extend the range of memory tested!!
+ for (k = 0; k < K_MAX; k += K_INC) {
+ for (j = 0; j < J_MAX; j += J_INC) {
- p1 = p + ii + k + j;
- p2 = p1 + p2offset;
+ p1 = p + ii + k + j;
+ p2 = p1 + p2offset;
#if ENABLE_PREFETCH
if (j < (J_MAX - J_INC)) {
@@ -409,15 +416,15 @@ int dram_tuning_mem_xor(bdk_node_t node, int lmc, uint64_t p, uint64_t bitmask,
BDK_PREFETCH(p2 + J_INC, BDK_CACHE_LINE_SIZE);
}
#endif
-
- // process entire cachelines in the innermost loop
- for (i = 0; i < I_MAX; i += I_INC) {
- v = ((p1 + i) * pattern1) ^ pattern2; // FIXME: this should predict what we find...???
- d1 = __bdk_dram_read64(p1 + i);
- d2 = __bdk_dram_read64(p2 + i);
+ // process entire cachelines in the innermost loop
+ for (i = 0; i < I_MAX; i += I_INC) {
+
+ v = ((p1 + i) * pattern1) ^ pattern2; // FIXME: this should predict what we find...???
+ d1 = __bdk_dram_read64(p1 + i);
+ d2 = __bdk_dram_read64(p2 + i);
- xor = ((d1 ^ v) | (d2 ^ v)) & datamask; // union of error bits only in active byte lanes
+ xor = ((d1 ^ v) | (d2 ^ v)) & datamask; // union of error bits only in active byte lanes
if (!xor)
continue;
@@ -426,32 +433,32 @@ int dram_tuning_mem_xor(bdk_node_t node, int lmc, uint64_t p, uint64_t bitmask,
bad_bits[0] |= xor;
//bad_bits[1] |= ~mpr_data1 & 0xffUL; // cannot do ECC here
- int bybit = 1;
- uint64_t bymsk = 0xffULL; // start in byte lane 0
- while (xor != 0) {
- debug_print("ERROR(%03d): [0x%016lX] [0x%016lX] expected 0x%016lX d1 %016lX d2 %016lX\n",
- burst, p1, p2, v, d1, d2);
- if (xor & bymsk) { // error(s) in this lane
- errors |= bybit; // set the byte error bit
- xor &= ~bymsk; // clear byte lane in error bits
- datamask &= ~bymsk; // clear the byte lane in the mask
+ int bybit = 1;
+ uint64_t bymsk = 0xffULL; // start in byte lane 0
+ while (xor != 0) {
+ debug_print("ERROR(%03d): [0x%016lX] [0x%016lX] expected 0x%016lX d1 %016lX d2 %016lX\n",
+ burst, p1, p2, v, d1, d2);
+ if (xor & bymsk) { // error(s) in this lane
+ errors |= bybit; // set the byte error bit
+ xor &= ~bymsk; // clear byte lane in error bits
+ datamask &= ~bymsk; // clear the byte lane in the mask
#if EXIT_WHEN_ALL_LANES_HAVE_ERRORS
- if (datamask == 0) { // nothing left to do
- return errors; // completely done when errors found in all byte lanes in datamask
- }
+ if (datamask == 0) { // nothing left to do
+ return errors; // completely done when errors found in all byte lanes in datamask
+ }
#endif /* EXIT_WHEN_ALL_LANES_HAVE_ERRORS */
- }
- bymsk <<= 8; // move mask into next byte lane
- bybit <<= 1; // move bit into next byte position
- }
- }
+ }
+ bymsk <<= 8; // move mask into next byte lane
+ bybit <<= 1; // move bit into next byte position
+ }
+ }
#if ENABLE_WBIL2
- BDK_CACHE_WBI_L2(p1);
- BDK_CACHE_WBI_L2(p2);
+ BDK_CACHE_WBI_L2(p1);
+ BDK_CACHE_WBI_L2(p2);
#endif
- }
- }
- } /* for (ii = 0; ii < (1ULL << 31); ii += (1ULL << 29)) */
+ }
+ }
+ } /* for (ii = 0; ii < (1ULL << 31); ii += (1ULL << 29)) */
debug_print("N%d.LMC%d: dram_tuning_mem_xor: done TEST loop\n",
node, lmc);
@@ -476,278 +483,6 @@ int dram_tuning_mem_xor(bdk_node_t node, int lmc, uint64_t p, uint64_t bitmask,
#define EXTRACT(v, lsb, width) (((v) >> (lsb)) & ((1ull << (width)) - 1))
#define LMCNO(address, xbits) (EXTRACT(address, 7, xbits) ^ EXTRACT(address, 20, xbits) ^ EXTRACT(address, 12, xbits))
-static int dram_tuning_mem_xor2(uint64_t p, uint64_t bitmask, int xbits)
-{
- uint64_t p1, p2, d1, d2;
- uint64_t v, vpred;
- uint64_t p2offset = dram_tune_rank_offset; // FIXME?
- uint64_t datamask;
- uint64_t xor;
- uint64_t ii;
- uint64_t pattern1 = bdk_rng_get_random64();
- uint64_t pattern2 = 0;
- int errors = 0;
- int errs_by_lmc[4] = { 0,0,0,0 };
- int lmc;
- uint64_t vbase, vincr;
-
- // Byte lanes may be clear in the mask to indicate no testing on that lane.
- datamask = bitmask;
-
- /* Add offset to both test regions to not clobber boot stuff
- * when running from L2 for NAND boot.
- */
- p += AREA_BASE_OFFSET; // make sure base is out of the way of boot
-
- // move the multiplies outside the loop
- vbase = p * pattern1;
- vincr = 8 * pattern1;
-
-#define II_INC (1ULL << 3)
-#define II_MAX (1ULL << 22) // stop where the core ID bits start
-
- // walk the memory areas by 8-byte words
- v = vbase;
- for (ii = 0; ii < II_MAX; ii += II_INC) {
-
- p1 = p + ii;
- p2 = p1 + p2offset;
-
- __bdk_dram_write64(p1, v);
- __bdk_dram_write64(p2, v);
-
- v += vincr;
- }
-
- __bdk_dram_flush_to_mem_range(p , p + II_MAX);
- __bdk_dram_flush_to_mem_range(p + p2offset, p + p2offset + II_MAX);
- BDK_DCACHE_INVALIDATE;
-
- /* Make a series of passes over the memory areas. */
-
- for (int burst = 0; burst < dram_tune_use_bursts; burst++)
- {
- uint64_t this_pattern = bdk_rng_get_random64();
- pattern2 ^= this_pattern;
-
- /* XOR the data with a random value, applying the change to both
- * memory areas.
- */
-#if 0
- BDK_PREFETCH(p , BDK_CACHE_LINE_SIZE);
- BDK_PREFETCH(p + p2offset, BDK_CACHE_LINE_SIZE);
-#endif
- for (ii = 0; ii < II_MAX; ii += II_INC) { // FIXME? extend the range of memory tested!!
-
- p1 = p + ii;
- p2 = p1 + p2offset;
-
- d1 = __bdk_dram_read64(p1) ^ this_pattern;
- d2 = __bdk_dram_read64(p2) ^ this_pattern;
-
- __bdk_dram_write64(p1, d1);
- __bdk_dram_write64(p2, d2);
-
- }
- __bdk_dram_flush_to_mem_range(p , p + II_MAX);
- __bdk_dram_flush_to_mem_range(p + p2offset, p + p2offset + II_MAX);
- BDK_DCACHE_INVALIDATE;
-
- /* Look for differences in the areas. If there is a mismatch, reset
- * both memory locations with the same pattern. Failing to do so
- * means that on all subsequent passes the pair of locations remain
- * out of sync giving spurious errors.
- */
-#if 0
- BDK_PREFETCH(p , BDK_CACHE_LINE_SIZE);
- BDK_PREFETCH(p + p2offset, BDK_CACHE_LINE_SIZE);
-#endif
- vpred = vbase;
- for (ii = 0; ii < II_MAX; ii += II_INC) {
-
- p1 = p + ii;
- p2 = p1 + p2offset;
-
- v = vpred ^ pattern2; // this should predict what we find...
- d1 = __bdk_dram_read64(p1);
- d2 = __bdk_dram_read64(p2);
- vpred += vincr;
-
- xor = ((d1 ^ v) | (d2 ^ v)) & datamask; // union of error bits only in active byte lanes
- if (!xor) // no errors
- continue;
-
- lmc = LMCNO(p1, xbits); // FIXME: LMC should be SAME for p1 and p2!!!
- if (lmc != (int)LMCNO(p2, xbits)) {
- printf("ERROR: LMCs for addresses [0x%016lX] (%lld) and [0x%016lX] (%lld) differ!!!\n",
- p1, LMCNO(p1, xbits), p2, LMCNO(p2, xbits));
- }
- int bybit = 1;
- uint64_t bymsk = 0xffULL; // start in byte lane 0
- while (xor != 0) {
- debug_print("ERROR(%03d): [0x%016lX] [0x%016lX] expected 0x%016lX d1 %016lX d2 %016lX\n",
- burst, p1, p2, v, d1, d2);
- if (xor & bymsk) { // error(s) in this lane
- errs_by_lmc[lmc] |= bybit; // set the byte error bit in the LMCs errors
- errors |= bybit; // set the byte error bit
- xor &= ~bymsk; // clear byte lane in error bits
- //datamask &= ~bymsk; // clear the byte lane in the mask
- }
- bymsk <<= 8; // move mask into next byte lane
- bybit <<= 1; // move bit into next byte position
- } /* while (xor != 0) */
- } /* for (ii = 0; ii < II_MAX; ii += II_INC) */
- } /* for (int burst = 0; burst < dram_tune_use_bursts; burst++) */
-
- // update the global LMC error states
- for (lmc = 0; lmc < 4; lmc++) {
- if (errs_by_lmc[lmc]) {
- bdk_atomic_fetch_and_bset64_nosync(&test_dram_byte_lmc_errs[lmc], errs_by_lmc[lmc]);
- }
- }
-
- return errors;
-}
-
-#if 0
-static int dram_tuning_mem_rows(uint64_t p, uint64_t bitmask)
-{
- uint64_t p1, p2, d1, d2;
- uint64_t v, v1;
- uint64_t p2offset = dram_tune_rank_offset; // FIXME?
- uint64_t datamask;
- uint64_t xor;
- int i, j, k, ii;
- int errors = 0;
- int index;
- uint64_t pattern1 = 0; // FIXME: maybe this could be from a table?
- uint64_t pattern2;
-
- // Byte lanes may be clear in the mask to indicate no testing on that lane.
- datamask = bitmask;
-
- /* Add offset to both test regions to not clobber boot stuff
- * when running from L2 for NAND boot.
- */
- p += 0x10000000; // FIXME? was: 0x4000000; // make sure base is out of the way of cores for tuning
-
- pattern2 = pattern1;
- for (k = 0; k < (1 << 20); k += (1 << 14)) {
- for (j = 0; j < (1 << 12); j += (1 << 9)) {
- for (i = 0; i < (1 << 7); i += 8) {
- index = i + j + k;
- p1 = p + index;
- p2 = p1 + p2offset;
-
- v = pattern2;
- v1 = v; // write the same thing to same slot in both cachelines
- pattern2 = ~pattern2; // flip bits for next slots
-
- __bdk_dram_write64(p1, v);
- __bdk_dram_write64(p2, v1);
- }
-#if 1
- BDK_CACHE_WBI_L2(p1);
- BDK_CACHE_WBI_L2(p2);
-#endif
- }
- }
-
-#if 0
- __bdk_dram_flush_to_mem_range(p, p + (1ULL << 20)); // max_addr is start + where k stops...
- __bdk_dram_flush_to_mem_range(p + p2offset, p + p2offset + (1ULL << 20)); // max_addr is start + where k stops...
-#endif
- BDK_DCACHE_INVALIDATE;
-
- /* Make a series of passes over the memory areas. */
-
- for (int burst = 0; burst < dram_tune_use_bursts; burst++)
- {
- /* just read and flip the bits applying the change to both
- * memory areas.
- */
- for (k = 0; k < (1 << 20); k += (1 << 14)) {
- for (j = 0; j < (1 << 12); j += (1 << 9)) {
- for (i = 0; i < (1 << 7); i += 8) {
- index = i + j + k;
- p1 = p + index;
- p2 = p1 + p2offset;
-
- v = ~__bdk_dram_read64(p1);
- v1 = ~__bdk_dram_read64(p2);
-
- __bdk_dram_write64(p1, v);
- __bdk_dram_write64(p2, v1);
- }
-#if 1
- BDK_CACHE_WBI_L2(p1);
- BDK_CACHE_WBI_L2(p2);
-#endif
- }
- }
-
-#if 0
- __bdk_dram_flush_to_mem_range(p, p + (1ULL << 20)); // max_addr is start + where k stops...
- __bdk_dram_flush_to_mem_range(p + p2offset, p + p2offset + (1ULL << 20)); // max_addr is start + where k stops...
-#endif
- BDK_DCACHE_INVALIDATE;
-
- /* Look for differences in the areas. If there is a mismatch, reset
- * both memory locations with the same pattern. Failing to do so
- * means that on all subsequent passes the pair of locations remain
- * out of sync giving spurious errors.
- */
-
- // FIXME: change the loop order so that an entire cache line is compared at one time
- // FIXME: this is so that a read error that occurs *anywhere* on the cacheline will be caught,
- // FIXME: rather than comparing only 1 cacheline slot at a time, where an error on a different
- // FIXME: slot will be missed that time around
- // Does the above make sense?
-
- pattern2 = ~pattern1; // slots have been flipped by the above loop
-
- for (k = 0; k < (1 << 20); k += (1 << 14)) {
- for (j = 0; j < (1 << 12); j += (1 << 9)) {
- for (i = 0; i < (1 << 7); i += 8) {
- index = i + j + k;
- p1 = p + index;
- p2 = p1 + p2offset;
-
- v = pattern2; // FIXME: this should predict what we find...???
- d1 = __bdk_dram_read64(p1);
- d2 = __bdk_dram_read64(p2);
- pattern2 = ~pattern2; // flip for next slot
-
- xor = ((d1 ^ v) | (d2 ^ v)) & datamask; // union of error bits only in active byte lanes
-
- int bybit = 1;
- uint64_t bymsk = 0xffULL; // start in byte lane 0
- while (xor != 0) {
- debug_print("ERROR(%03d): [0x%016lX] [0x%016lX] expected 0x%016lX d1 %016lX d2 %016lX\n",
- burst, p1, p2, v, d1, d2);
- if (xor & bymsk) { // error(s) in this lane
- errors |= bybit; // set the byte error bit
- xor &= ~bymsk; // clear byte lane in error bits
- datamask &= ~bymsk; // clear the byte lane in the mask
-#if EXIT_WHEN_ALL_LANES_HAVE_ERRORS
- if (datamask == 0) { // nothing left to do
- return errors; // completely done when errors found in all byte lanes in datamask
- }
-#endif /* EXIT_WHEN_ALL_LANES_HAVE_ERRORS */
- }
- bymsk <<= 8; // move mask into next byte lane
- bybit <<= 1; // move bit into next byte position
- }
- }
- }
- }
- pattern1 = ~pattern1; // flip the starting pattern for the next burst
-
- } /* for (int burst = 0; burst < dram_tune_use_bursts; burst++) */
- return errors;
-}
-#endif
-
// cores to use
#define DEFAULT_USE_CORES 44 // FIXME: was (1 << CORE_BITS)
int dram_tune_use_cores = DEFAULT_USE_CORES; // max cores to use, override available
@@ -763,109 +498,6 @@ typedef struct
uint64_t byte_mask;
} test_dram_byte_info_t;
-static void dram_tuning_thread(int arg, void *arg1)
-{
- test_dram_byte_info_t *test_info = arg1;
- int core = arg;
- uint64_t errs;
- bdk_node_t node = test_info->node;
- int num_lmcs, lmc;
-#if 0
- num_lmcs = test_info->num_lmcs;
- // map core numbers into hopefully equal groups per LMC
- lmc = core % num_lmcs;
-#else
- // FIXME: this code should allow running all the cores on a single LMC...
- // if incoming num_lmcs > 0, then use as normal; if < 0 remap to a single LMC
- if (test_info->num_lmcs >= 0) {
- num_lmcs = test_info->num_lmcs;
- // map core numbers into hopefully equal groups per LMC
- lmc = core % num_lmcs;
- } else {
- num_lmcs = 1;
- // incoming num_lmcs is (desired LMC - 10)
- lmc = 10 + test_info->num_lmcs;
- }
-#endif
- uint64_t base_address = 0/* was: (lmc << 7); now done by callee */;
- uint64_t bytemask = test_info->byte_mask;
-
- /* Figure out our work memory range.
- *
- * Note: base_address above just provides the physical offset which determines
- * specific LMC portions of the address space and does not have the node bits set.
- */
- //was: base_address = bdk_numa_get_address(node, base_address); // map to node // now done by callee
- base_address |= (core << CORE_SHIFT); // FIXME: also put full core into address
- if (dram_tune_dimm_offset) { // if multi-slot in some way, choose a DIMM for the core
- base_address |= (core & (1 << (num_lmcs >> 1))) ? dram_tune_dimm_offset : 0;
- }
-
- debug_print("Node %d, core %d, Testing area 1 at 0x%011lx, area 2 at 0x%011lx\n",
- node, core, base_address + AREA_BASE_OFFSET,
- base_address + AREA_BASE_OFFSET + dram_tune_rank_offset);
-
- errs = dram_tuning_mem_xor(node, lmc, base_address, bytemask, NULL);
- //errs = dram_tuning_mem_rows(base_address, bytemask);
-
- /* Report that we're done */
- debug_print("Core %d on LMC %d node %d done with test_dram_byte with 0x%lx errs\n",
- core, lmc, node, errs);
-
- if (errs) {
- bdk_atomic_fetch_and_bset64_nosync(&test_dram_byte_threads_errs, errs);
- bdk_atomic_fetch_and_bset64_nosync(&test_dram_byte_lmc_errs[lmc], errs);
- }
-
- bdk_atomic_add64_nosync(&test_dram_byte_threads_done, 1);
-
- return;
-}
-
-static void dram_tuning_thread2(int arg, void *arg1)
-{
- test_dram_byte_info_t *test_info = arg1;
- int core = arg;
- uint64_t errs;
- bdk_node_t node = test_info->node;
- int num_lmcs = test_info->num_lmcs;
-
- uint64_t base_address = 0; //
- uint64_t bytemask = test_info->byte_mask;
-
- /* Figure out our work memory range.
- *
- * Note: base_address above just provides the physical offset which determines
- * specific portions of the address space and does not have the node bits set.
- */
- base_address = bdk_numa_get_address(node, base_address); // map to node
- base_address |= (core << CORE_SHIFT); // FIXME: also put full core into address
- if (dram_tune_dimm_offset) { // if multi-slot in some way, choose a DIMM for the core
- base_address |= (core & 1) ? dram_tune_dimm_offset : 0;
- }
-
- debug_print("Node %d, core %d, Testing area 1 at 0x%011lx, area 2 at 0x%011lx\n",
- node, core, base_address + AREA_BASE_OFFSET,
- base_address + AREA_BASE_OFFSET + dram_tune_rank_offset);
-
- errs = dram_tuning_mem_xor2(base_address, bytemask, (num_lmcs >> 1)); // 4->2, 2->1, 1->0
- //errs = dram_tuning_mem_rows(base_address, bytemask);
-
- /* Report that we're done */
- debug_print("Core %d on LMC %d node %d done with test_dram_byte with 0x%lx errs\n",
- core, lmc, node, errs);
-
- if (errs) {
- bdk_atomic_fetch_and_bset64_nosync(&test_dram_byte_threads_errs, errs);
- // FIXME: this will have been done already in the called test routine
- //bdk_atomic_fetch_and_bset64_nosync(&test_dram_byte_lmc_errs[lmc], errs);
- }
-
- bdk_atomic_add64_nosync(&test_dram_byte_threads_done, 1);
-
- return;
-}
-
static int dram_tune_use_xor2 = 1; // FIXME: do NOT default to original mem_xor (LMC-based) code
static int
@@ -874,7 +506,6 @@ run_dram_tuning_threads(bdk_node_t node, int num_lmcs, uint64_t bytemask)
test_dram_byte_info_t test_dram_byte_info;
test_dram_byte_info_t *test_info = &test_dram_byte_info;
int total_count = 0;
- __dram_tuning_thread_t thread_p = (dram_tune_use_xor2) ? dram_tuning_thread2 : dram_tuning_thread;
test_info->node = node;
test_info->num_lmcs = num_lmcs;
@@ -890,20 +521,14 @@ run_dram_tuning_threads(bdk_node_t node, int num_lmcs, uint64_t bytemask)
/* Start threads for cores on the node */
if (bdk_numa_exists(node)) {
- debug_print("Starting %d threads for test_dram_byte\n", dram_tune_use_cores);
- for (int core = 0; core < dram_tune_use_cores; core++) {
- if (bdk_thread_create(node, 0, thread_p, core, (void *)test_info, 0)) {
- bdk_error("Failed to create thread %d for test_dram_byte\n", core);
- } else {
- total_count++;
- }
- }
+ /* FIXME(dhendrix): We shouldn't hit this. */
+ die("bdk_numa_exists() is non-zero\n");
}
#if 0
/* Wait for threads to finish */
while (bdk_atomic_get64(&test_dram_byte_threads_done) < total_count)
- bdk_thread_yield();
+ bdk_thread_yield();
#else
#define TIMEOUT_SECS 5 // FIXME: long enough so a pass for a given setting will not print
/* Wait for threads to finish, with progress */
@@ -912,7 +537,7 @@ run_dram_tuning_threads(bdk_node_t node, int num_lmcs, uint64_t bytemask)
uint64_t period = bdk_clock_get_rate(bdk_numa_local(), BDK_CLOCK_TIME) * TIMEOUT_SECS; // FIXME?
uint64_t timeout = bdk_clock_get_count(BDK_CLOCK_TIME) + period;
do {
- bdk_thread_yield();
+// bdk_thread_yield(); /* FIXME(dhendrix): don't yield... */
cur_count = bdk_atomic_get64(&test_dram_byte_threads_done);
cur_time = bdk_clock_get_count(BDK_CLOCK_TIME);
if (cur_time >= timeout) {
@@ -927,136 +552,10 @@ run_dram_tuning_threads(bdk_node_t node, int num_lmcs, uint64_t bytemask)
}
/* These variables count the number of ECC errors. They should only be accessed atomically */
-extern int64_t __bdk_dram_ecc_single_bit_errors[];
+/* FIXME(dhendrix): redundant declaration in original BDK sources */
+//extern int64_t __bdk_dram_ecc_single_bit_errors[];
extern int64_t __bdk_dram_ecc_double_bit_errors[];
-#if 0
-// make the tuning test callable as a standalone
-int
-bdk_run_dram_tuning_test(int node)
-{
- int num_lmcs = __bdk_dram_get_num_lmc(node);
- const char *s;
- int lmc, byte;
- int errors;
- uint64_t start_dram_dclk[4], start_dram_ops[4];
- int save_use_bursts;
-
- // check for the cores on this node, abort if not more than 1 // FIXME?
- dram_tune_max_cores = bdk_get_num_running_cores(node);
- if (dram_tune_max_cores < 2) {
- //bdk_init_cores(node, 0);
- printf("N%d: ERROR: not enough cores to run the DRAM tuning test.\n", node);
- return 0;
- }
-
- // but use only a certain number of cores, at most what is available
- if ((s = getenv("ddr_tune_use_cores")) != NULL) {
- dram_tune_use_cores = strtoul(s, NULL, 0);
- if (dram_tune_use_cores <= 0) // allow 0 or negative to mean all
- dram_tune_use_cores = dram_tune_max_cores;
- }
- if (dram_tune_use_cores > dram_tune_max_cores)
- dram_tune_use_cores = dram_tune_max_cores;
-
- // save the original bursts, so we can replace it with a better number for just testing
- save_use_bursts = dram_tune_use_bursts;
- dram_tune_use_bursts = 1500; // FIXME: hard code bursts for the test here...
-
- // allow override of the test repeats (bursts) per thread create
- if ((s = getenv("ddr_tune_use_bursts")) != NULL) {
- dram_tune_use_bursts = strtoul(s, NULL, 10);
- }
-
- // allow override of the test mem_xor algorithm
- if ((s = getenv("ddr_tune_use_xor2")) != NULL) {
- dram_tune_use_xor2 = !!strtoul(s, NULL, 10);
- }
-
- // FIXME? consult LMC0 only
- BDK_CSR_INIT(lmcx_config, node, BDK_LMCX_CONFIG(0));
- if (lmcx_config.s.rank_ena) { // replace the default offset when there is more than 1 rank...
- dram_tune_rank_offset = 1ull << (28 + lmcx_config.s.pbank_lsb - lmcx_config.s.rank_ena + (num_lmcs/2));
- ddr_print("N%d: run_dram_tuning_test: changing rank offset to 0x%lx\n", node, dram_tune_rank_offset);
- }
- if (lmcx_config.s.init_status & 0x0c) { // bit 2 or 3 set indicates 2 DIMMs
- dram_tune_dimm_offset = 1ull << (28 + lmcx_config.s.pbank_lsb + (num_lmcs/2));
- ddr_print("N%d: run_dram_tuning_test: changing dimm offset to 0x%lx\n", node, dram_tune_dimm_offset);
- }
- int ddr_interface_64b = !lmcx_config.s.mode32b;
-
- // construct the bytemask
- int bytes_todo = (ddr_interface_64b) ? 0xff : 0x0f; // FIXME: hack?
- uint64_t bytemask = 0;
- for (byte = 0; byte < 8; ++byte) {
- uint64_t bitmask;
- if (bytes_todo & (1 << byte)) {
- bitmask = ((!ddr_interface_64b) && (byte == 4)) ? 0x0f: 0xff;
- bytemask |= bitmask << (8*byte); // set the bytes bits in the bytemask
- }
- } /* for (byte = 0; byte < 8; ++byte) */
-
- // print current working values
- ddr_print("N%d: run_dram_tuning_test: max %d cores, use %d cores, use %d bursts.\n",
- node, dram_tune_max_cores, dram_tune_use_cores, dram_tune_use_bursts);
-
- // do the setup on active LMCs
- for (lmc = 0; lmc < num_lmcs; lmc++) {
- // record start cycle CSRs here for utilization measure
- start_dram_dclk[lmc] = BDK_CSR_READ(node, BDK_LMCX_DCLK_CNT(lmc));
- start_dram_ops[lmc] = BDK_CSR_READ(node, BDK_LMCX_OPS_CNT(lmc));
-#if 0
- bdk_atomic_set64(&__bdk_dram_ecc_single_bit_errors[lmc], 0);
- bdk_atomic_set64(&__bdk_dram_ecc_double_bit_errors[lmc], 0);
-#else
- __bdk_dram_ecc_single_bit_errors[lmc] = 0;
- __bdk_dram_ecc_double_bit_errors[lmc] = 0;
-#endif
- } /* for (lmc = 0; lmc < num_lmcs; lmc++) */
-
- bdk_watchdog_poke();
-
- // run the test(s)
- // only 1 call should be enough, let the bursts, etc, control the load...
- errors = run_dram_tuning_threads(node, num_lmcs, bytemask);
-
- /* Check ECC error counters after the test */
- int64_t ecc_single = 0;
- int64_t ecc_double = 0;
- int64_t ecc_single_errs[4];
- int64_t ecc_double_errs[4];
-
- // finally, print the utilizations all together, and sum the ECC errors
- for (lmc = 0; lmc < num_lmcs; lmc++) {
- uint64_t dclk_diff = BDK_CSR_READ(node, BDK_LMCX_DCLK_CNT(lmc)) - start_dram_dclk[lmc];
- uint64_t ops_diff = BDK_CSR_READ(node, BDK_LMCX_OPS_CNT(lmc)) - start_dram_ops[lmc];
- uint64_t percent_x10 = ops_diff * 1000 / dclk_diff;
- printf("N%d.LMC%d: ops %lu, cycles %lu, used %lu.%lu%%\n",
- node, lmc, ops_diff, dclk_diff, percent_x10 / 10, percent_x10 % 10);
-
- ecc_single += (ecc_single_errs[lmc] = bdk_atomic_get64(&__bdk_dram_ecc_single_bit_errors[lmc]));
- ecc_double += (ecc_double_errs[lmc] = bdk_atomic_get64(&__bdk_dram_ecc_double_bit_errors[lmc]));
- } /* for (lmc = 0; lmc < num_lmcs; lmc++) */
-
- /* Always print any ECC errors */
- if (ecc_single || ecc_double) {
- printf("Test \"%s\": ECC errors, %ld/%ld/%ld/%ld corrected, %ld/%ld/%ld/%ld uncorrected\n",
- "DRAM Tuning Test",
- ecc_single_errs[0], ecc_single_errs[1], ecc_single_errs[2], ecc_single_errs[3],
- ecc_double_errs[0], ecc_double_errs[1], ecc_double_errs[2], ecc_double_errs[3]);
- }
- if (errors || ecc_double || ecc_single) {
- printf("Test \"%s\": FAIL: %ld single, %ld double, %d compare errors\n",
- "DRAM Tuning Test", ecc_single, ecc_double, errors);
- }
-
- // restore bursts
- dram_tune_use_bursts = save_use_bursts;
-
- return (errors + ecc_double + ecc_single);
-}
-#endif /* 0 */
-
#define DEFAULT_SAMPLE_GRAN 3 // sample for errors every N offset values
#define MIN_BYTE_OFFSET -63
#define MAX_BYTE_OFFSET +63
@@ -1064,8 +563,8 @@ int dram_tune_use_gran = DEFAULT_SAMPLE_GRAN;
static int
auto_set_dll_offset(bdk_node_t node, int dll_offset_mode,
- int num_lmcs, int ddr_interface_64b,
- int do_tune)
+ int num_lmcs, int ddr_interface_64b,
+ int do_tune)
{
int byte_offset;
//unsigned short result[9];
@@ -1081,17 +580,17 @@ auto_set_dll_offset(bdk_node_t node, int dll_offset_mode,
uint64_t start_dram_ops[4], stop_dram_ops[4];
int errors, tot_errors;
int lmc;
- char *mode_str = (dll_offset_mode == 2) ? "Read" : "Write";
+ const char *mode_str = (dll_offset_mode == 2) ? "Read" : "Write"; /* FIXME(dhendrix): const */
int mode_is_read = (dll_offset_mode == 2);
- char *mode_blk = (dll_offset_mode == 2) ? " " : "";
+ const char *mode_blk = (dll_offset_mode == 2) ? " " : ""; /* FIXME(dhendrix): const */
int start_offset, end_offset, incr_offset;
int speed_bin = get_speed_bin(node, 0); // FIXME: just get from LMC0?
int low_risk_count = 0, needs_review_count = 0;
if (dram_tune_use_gran != DEFAULT_SAMPLE_GRAN) {
- ddr_print2("N%d: Changing sample granularity from %d to %d\n",
- node, DEFAULT_SAMPLE_GRAN, dram_tune_use_gran);
+ ddr_print2("N%d: Changing sample granularity from %d to %d\n",
+ node, DEFAULT_SAMPLE_GRAN, dram_tune_use_gran);
}
// ensure sample is taken at 0
start_offset = MIN_BYTE_OFFSET - (MIN_BYTE_OFFSET % dram_tune_use_gran);
@@ -1109,12 +608,14 @@ auto_set_dll_offset(bdk_node_t node, int dll_offset_mode,
// FIXME? consult LMC0 only
BDK_CSR_INIT(lmcx_config, node, BDK_LMCX_CONFIG(0));
if (lmcx_config.s.rank_ena) { // replace the default offset when there is more than 1 rank...
- dram_tune_rank_offset = 1ull << (28 + lmcx_config.s.pbank_lsb - lmcx_config.s.rank_ena + (num_lmcs/2));
- ddr_print2("N%d: Tuning multiple ranks per DIMM (rank offset 0x%lx).\n", node, dram_tune_rank_offset);
+ dram_tune_rank_offset = 1ull << (28 + lmcx_config.s.pbank_lsb - lmcx_config.s.rank_ena + (num_lmcs/2));
+ /* FIXME(dhendrix): %lx --> %llx */
+ ddr_print2("N%d: Tuning multiple ranks per DIMM (rank offset 0x%llx).\n", node, dram_tune_rank_offset);
}
if (lmcx_config.s.init_status & 0x0c) { // bit 2 or 3 set indicates 2 DIMMs
- dram_tune_dimm_offset = 1ull << (28 + lmcx_config.s.pbank_lsb + (num_lmcs/2));
- ddr_print2("N%d: Tuning multiple DIMMs per channel (DIMM offset 0x%lx)\n", node, dram_tune_dimm_offset);
+ dram_tune_dimm_offset = 1ull << (28 + lmcx_config.s.pbank_lsb + (num_lmcs/2));
+ /* FIXME(dhendrix): %lx --> %llx */
+ ddr_print2("N%d: Tuning multiple DIMMs per channel (DIMM offset 0x%llx)\n", node, dram_tune_dimm_offset);
}
// FIXME? do this for LMC0 only
@@ -1125,76 +626,76 @@ auto_set_dll_offset(bdk_node_t node, int dll_offset_mode,
int bytes_todo = (ddr_interface_64b) ? 0xff : 0x0f;
uint64_t bytemask = 0;
for (byte = 0; byte < 8; ++byte) {
- if (bytes_todo & (1 << byte)) {
- bytemask |= 0xfful << (8*byte); // set the bytes bits in the bytemask
- }
+ if (bytes_todo & (1 << byte)) {
+ bytemask |= 0xfful << (8*byte); // set the bytes bits in the bytemask
+ }
} /* for (byte = 0; byte < 8; ++byte) */
// now loop through selected legal values for the DLL byte offset...
for (byte_offset = start_offset; byte_offset <= end_offset; byte_offset += incr_offset) {
- // do the setup on active LMCs
- for (lmc = 0; lmc < num_lmcs; lmc++) {
- change_dll_offset_enable(node, lmc, 0);
-
- // set all byte lanes at once
- load_dll_offset(node, lmc, dll_offset_mode, byte_offset, 10 /* All bytes at once */);
- // but then clear the ECC byte lane so it should be neutral for the test...
- load_dll_offset(node, lmc, dll_offset_mode, 0, 8);
-
- change_dll_offset_enable(node, lmc, 1);
-
- // record start cycle CSRs here for utilization measure
- start_dram_dclk[lmc] = BDK_CSR_READ(node, BDK_LMCX_DCLK_CNT(lmc));
- start_dram_ops[lmc] = BDK_CSR_READ(node, BDK_LMCX_OPS_CNT(lmc));
- } /* for (lmc = 0; lmc < num_lmcs; lmc++) */
-
- bdk_watchdog_poke();
-
- // run the test(s)
- // only 1 call should be enough, let the bursts, etc, control the load...
- tot_errors = run_dram_tuning_threads(node, num_lmcs, bytemask);
-
- for (lmc = 0; lmc < num_lmcs; lmc++) {
- // record stop cycle CSRs here for utilization measure
- stop_dram_dclk[lmc] = BDK_CSR_READ(node, BDK_LMCX_DCLK_CNT(lmc));
- stop_dram_ops[lmc] = BDK_CSR_READ(node, BDK_LMCX_OPS_CNT(lmc));
-
- // accumulate...
- ops_sum[lmc] += stop_dram_ops[lmc] - start_dram_ops[lmc];
- dclk_sum[lmc] += stop_dram_dclk[lmc] - start_dram_dclk[lmc];
-
- errors = test_dram_byte_lmc_errs[lmc];
-
- // check errors by byte, but not ECC
- for (byte = 0; byte < 8; ++byte) {
- if (!(bytes_todo & (1 << byte))) // is this byte lane to be done
- continue; // no
-
- byte_delay_windows[lmc][byte] <<= 1; // always put in a zero
- if (errors & (1 << byte)) { // yes, an error in this byte lane
- byte_delay_count[lmc][byte] = 0; // stop now always
- } else { // no error in this byte lane
- if (byte_delay_count[lmc][byte] == 0) { // first success, set run start
- byte_delay_start[lmc][byte] = byte_offset;
- }
- byte_delay_count[lmc][byte] += incr_offset; // bump run length
-
- if (byte_delay_count[lmc][byte] > byte_delay_best_count[lmc][byte]) {
- byte_delay_best_count[lmc][byte] = byte_delay_count[lmc][byte];
- byte_delay_best_start[lmc][byte] = byte_delay_start[lmc][byte];
- }
- byte_delay_windows[lmc][byte] |= 1ULL; // for pass, put in a 1
- }
- } /* for (byte = 0; byte < 8; ++byte) */
-
- // only print when there are errors and verbose...
- if (errors) {
- debug_print("DLL %s Offset Test %3d: errors 0x%x\n",
- mode_str, byte_offset, errors);
- }
- } /* for (lmc = 0; lmc < num_lmcs; lmc++) */
+ // do the setup on active LMCs
+ for (lmc = 0; lmc < num_lmcs; lmc++) {
+ change_dll_offset_enable(node, lmc, 0);
+
+ // set all byte lanes at once
+ load_dll_offset(node, lmc, dll_offset_mode, byte_offset, 10 /* All bytes at once */);
+ // but then clear the ECC byte lane so it should be neutral for the test...
+ load_dll_offset(node, lmc, dll_offset_mode, 0, 8);
+
+ change_dll_offset_enable(node, lmc, 1);
+
+ // record start cycle CSRs here for utilization measure
+ start_dram_dclk[lmc] = BDK_CSR_READ(node, BDK_LMCX_DCLK_CNT(lmc));
+ start_dram_ops[lmc] = BDK_CSR_READ(node, BDK_LMCX_OPS_CNT(lmc));
+ } /* for (lmc = 0; lmc < num_lmcs; lmc++) */
+
+ bdk_watchdog_poke();
+
+ // run the test(s)
+ // only 1 call should be enough, let the bursts, etc, control the load...
+ tot_errors = run_dram_tuning_threads(node, num_lmcs, bytemask);
+
+ for (lmc = 0; lmc < num_lmcs; lmc++) {
+ // record stop cycle CSRs here for utilization measure
+ stop_dram_dclk[lmc] = BDK_CSR_READ(node, BDK_LMCX_DCLK_CNT(lmc));
+ stop_dram_ops[lmc] = BDK_CSR_READ(node, BDK_LMCX_OPS_CNT(lmc));
+
+ // accumulate...
+ ops_sum[lmc] += stop_dram_ops[lmc] - start_dram_ops[lmc];
+ dclk_sum[lmc] += stop_dram_dclk[lmc] - start_dram_dclk[lmc];
+
+ errors = test_dram_byte_lmc_errs[lmc];
+
+ // check errors by byte, but not ECC
+ for (byte = 0; byte < 8; ++byte) {
+ if (!(bytes_todo & (1 << byte))) // is this byte lane to be done
+ continue; // no
+
+ byte_delay_windows[lmc][byte] <<= 1; // always put in a zero
+ if (errors & (1 << byte)) { // yes, an error in this byte lane
+ byte_delay_count[lmc][byte] = 0; // stop now always
+ } else { // no error in this byte lane
+ if (byte_delay_count[lmc][byte] == 0) { // first success, set run start
+ byte_delay_start[lmc][byte] = byte_offset;
+ }
+ byte_delay_count[lmc][byte] += incr_offset; // bump run length
+
+ if (byte_delay_count[lmc][byte] > byte_delay_best_count[lmc][byte]) {
+ byte_delay_best_count[lmc][byte] = byte_delay_count[lmc][byte];
+ byte_delay_best_start[lmc][byte] = byte_delay_start[lmc][byte];
+ }
+ byte_delay_windows[lmc][byte] |= 1ULL; // for pass, put in a 1
+ }
+ } /* for (byte = 0; byte < 8; ++byte) */
+
+ // only print when there are errors and verbose...
+ if (errors) {
+ debug_print("DLL %s Offset Test %3d: errors 0x%x\n",
+ mode_str, byte_offset, errors);
+ }
+ } /* for (lmc = 0; lmc < num_lmcs; lmc++) */
} /* for (byte_offset=-63; byte_offset<63; byte_offset += incr_offset) */
@@ -1202,153 +703,154 @@ auto_set_dll_offset(bdk_node_t node, int dll_offset_mode,
// only when margining...
if (!do_tune) {
- printf(" \n");
- printf("-------------------------------------\n");
+ printf(" \n");
+ printf("-------------------------------------\n");
#if 0
- uint32_t mts_speed = (libdram_get_freq_from_pll(node, 0) * 2) / 1000000; // FIXME: sample LMC0
- printf("N%d: Starting %s Timing Margining for %d MT/s.\n", node, mode_str, mts_speed);
+ uint32_t mts_speed = (libdram_get_freq_from_pll(node, 0) * 2) / 1000000; // FIXME: sample LMC0
+ printf("N%d: Starting %s Timing Margining for %d MT/s.\n", node, mode_str, mts_speed);
#else
- printf("N%d: Starting %s Timing Margining.\n", node, mode_str);
+ printf("N%d: Starting %s Timing Margining.\n", node, mode_str);
#endif
- printf(" \n");
+ printf(" \n");
} /* if (!do_tune) */
for (lmc = 0; lmc < num_lmcs; lmc++) {
#if 1
- // FIXME FIXME
- // FIXME: this just makes ECC always show 0
- byte_delay_best_start[lmc][8] = start_offset;
- byte_delay_best_count[lmc][8] = end_offset - start_offset + incr_offset;
+ // FIXME FIXME
+ // FIXME: this just makes ECC always show 0
+ byte_delay_best_start[lmc][8] = start_offset;
+ byte_delay_best_count[lmc][8] = end_offset - start_offset + incr_offset;
#endif
- // disable offsets while we load...
- change_dll_offset_enable(node, lmc, 0);
-
- // only when margining...
- if (!do_tune) {
- // print the heading
- printf(" \n");
- printf("N%d.LMC%d: %s Timing Margin %s : ", node, lmc, mode_str, mode_blk);
- printf(" ECC/8 ");
- for (byte = 7; byte >= 0; byte--) {
- printf(" Byte %d ", byte);
- }
- printf("\n");
- } /* if (!do_tune) */
-
- // print and load the offset values
- // print the windows bit arrays
- // only when margining...
- if (!do_tune) {
+ // disable offsets while we load...
+ change_dll_offset_enable(node, lmc, 0);
+
+ // only when margining...
+ if (!do_tune) {
+ // print the heading
+ printf(" \n");
+ printf("N%d.LMC%d: %s Timing Margin %s : ", node, lmc, mode_str, mode_blk);
+ printf(" ECC/8 ");
+ for (byte = 7; byte >= 0; byte--) {
+ printf(" Byte %d ", byte);
+ }
+ printf("\n");
+ } /* if (!do_tune) */
+
+ // print and load the offset values
+ // print the windows bit arrays
+ // only when margining...
+ if (!do_tune) {
printf("N%d.LMC%d: DLL %s Offset Amount %s : ", node, lmc, mode_str, mode_blk);
} else {
ddr_print("N%d.LMC%d: SW DLL %s Offset Amount %s : ", node, lmc, mode_str, mode_blk);
}
- for (byte = 8; byte >= 0; --byte) { // print in "normal" reverse index order
+ for (byte = 8; byte >= 0; --byte) { // print in "normal" reverse index order
- int count = byte_delay_best_count[lmc][byte];
- if (count == 0)
- count = incr_offset; // should make non-tested ECC byte come out 0
-
- byte_offset = byte_delay_best_start[lmc][byte] +
- ((count - incr_offset) / 2); // adj by incr
+ int count = byte_delay_best_count[lmc][byte];
+ if (count == 0)
+ count = incr_offset; // should make non-tested ECC byte come out 0
- if (!do_tune) { // do counting and special flag if margining
+ byte_offset = byte_delay_best_start[lmc][byte] +
+ ((count - incr_offset) / 2); // adj by incr
+
+ if (!do_tune) { // do counting and special flag if margining
int will_need_review = !is_low_risk_winlen(speed_bin, (count - incr_offset)) &&
- !is_low_risk_offset(speed_bin, byte_offset);
+ !is_low_risk_offset(speed_bin, byte_offset);
printf("%10d%c", byte_offset, (will_need_review) ? '<' :' ');
- if (will_need_review)
- needs_review_count++;
- else
- low_risk_count++;
- } else { // if just tuning, make the printout less lengthy
+ if (will_need_review)
+ needs_review_count++;
+ else
+ low_risk_count++;
+ } else { // if just tuning, make the printout less lengthy
ddr_print("%5d ", byte_offset);
}
- // FIXME? should we be able to override this?
- if (mode_is_read) // for READ offsets, always store what we found
- load_dll_offset(node, lmc, dll_offset_mode, byte_offset, byte);
- else // for WRITE offsets, always store 0
- load_dll_offset(node, lmc, dll_offset_mode, 0, byte);
+ // FIXME? should we be able to override this?
+ if (mode_is_read) // for READ offsets, always store what we found
+ load_dll_offset(node, lmc, dll_offset_mode, byte_offset, byte);
+ else // for WRITE offsets, always store 0
+ load_dll_offset(node, lmc, dll_offset_mode, 0, byte);
- }
- if (!do_tune) {
+ }
+ if (!do_tune) {
printf("\n");
} else {
ddr_print("\n");
}
- // re-enable the offsets now that we are done loading
- change_dll_offset_enable(node, lmc, 1);
-
- // only when margining...
- if (!do_tune) {
- // print the window sizes
- printf("N%d.LMC%d: DLL %s Window Length %s : ", node, lmc, mode_str, mode_blk);
- for (byte = 8; byte >= 0; --byte) { // print in "normal" reverse index order
- int count = byte_delay_best_count[lmc][byte];
- if (count == 0)
- count = incr_offset; // should make non-tested ECC byte come out 0
-
- // do this again since the "needs review" test is an AND...
- byte_offset = byte_delay_best_start[lmc][byte] +
- ((count - incr_offset) / 2); // adj by incr
-
- int will_need_review = !is_low_risk_winlen(speed_bin, (count - incr_offset)) &&
- !is_low_risk_offset(speed_bin, byte_offset);
-
- printf("%10d%c", count - incr_offset, (will_need_review) ? '<' :' ');
- }
- printf("\n");
-
- // print the window extents
- printf("N%d.LMC%d: DLL %s Window Bounds %s : ", node, lmc, mode_str, mode_blk);
- for (byte = 8; byte >= 0; --byte) { // print in "normal" reverse index order
- int start = byte_delay_best_start[lmc][byte];
- int count = byte_delay_best_count[lmc][byte];
- if (count == 0)
- count = incr_offset; // should make non-tested ECC byte come out 0
- printf(" %3d to%3d ", start,
- start + count - incr_offset);
- }
- printf("\n");
+ // re-enable the offsets now that we are done loading
+ change_dll_offset_enable(node, lmc, 1);
+
+ // only when margining...
+ if (!do_tune) {
+ // print the window sizes
+ printf("N%d.LMC%d: DLL %s Window Length %s : ", node, lmc, mode_str, mode_blk);
+ for (byte = 8; byte >= 0; --byte) { // print in "normal" reverse index order
+ int count = byte_delay_best_count[lmc][byte];
+ if (count == 0)
+ count = incr_offset; // should make non-tested ECC byte come out 0
+
+ // do this again since the "needs review" test is an AND...
+ byte_offset = byte_delay_best_start[lmc][byte] +
+ ((count - incr_offset) / 2); // adj by incr
+
+ int will_need_review = !is_low_risk_winlen(speed_bin, (count - incr_offset)) &&
+ !is_low_risk_offset(speed_bin, byte_offset);
+
+ printf("%10d%c", count - incr_offset, (will_need_review) ? '<' :' ');
+ }
+ printf("\n");
+
+ // print the window extents
+ printf("N%d.LMC%d: DLL %s Window Bounds %s : ", node, lmc, mode_str, mode_blk);
+ for (byte = 8; byte >= 0; --byte) { // print in "normal" reverse index order
+ int start = byte_delay_best_start[lmc][byte];
+ int count = byte_delay_best_count[lmc][byte];
+ if (count == 0)
+ count = incr_offset; // should make non-tested ECC byte come out 0
+ printf(" %3d to%3d ", start,
+ start + count - incr_offset);
+ }
+ printf("\n");
#if 0
- // FIXME: should have a way to force these out...
- // print the windows bit arrays
- printf("N%d.LMC%d: DLL %s Window Bitmap%s : ", node, lmc, mode_str, mode_blk);
- for (byte = 8; byte >= 0; --byte) { // print in "normal" reverse index order
- printf("%010lx ", byte_delay_windows[lmc][byte]);
- }
- printf("\n");
+ // FIXME: should have a way to force these out...
+ // print the windows bit arrays
+ printf("N%d.LMC%d: DLL %s Window Bitmap%s : ", node, lmc, mode_str, mode_blk);
+ for (byte = 8; byte >= 0; --byte) { // print in "normal" reverse index order
+ printf("%010lx ", byte_delay_windows[lmc][byte]);
+ }
+ printf("\n");
#endif
- } /* if (!do_tune) */
+ } /* if (!do_tune) */
} /* for (lmc = 0; lmc < num_lmcs; lmc++) */
// only when margining...
if (!do_tune) {
- // print the Summary line(s) here
- printf(" \n");
- printf("N%d: %s Timing Margining Summary : %s ", node, mode_str,
- (needs_review_count > 0) ? "Needs Review" : "Low Risk");
- if (needs_review_count > 0)
- printf("(%d)", needs_review_count);
- printf("\n");
-
- // FIXME??? want to print here: "N0: %s Offsets have been applied already"
-
- printf("-------------------------------------\n");
- printf(" \n");
+ // print the Summary line(s) here
+ printf(" \n");
+ printf("N%d: %s Timing Margining Summary : %s ", node, mode_str,
+ (needs_review_count > 0) ? "Needs Review" : "Low Risk");
+ if (needs_review_count > 0)
+ printf("(%d)", needs_review_count);
+ printf("\n");
+
+ // FIXME??? want to print here: "N0: %s Offsets have been applied already"
+
+ printf("-------------------------------------\n");
+ printf(" \n");
} /* if (!do_tune) */
// FIXME: we probably want this only when doing verbose...
// finally, print the utilizations all together
for (lmc = 0; lmc < num_lmcs; lmc++) {
- uint64_t percent_x10 = ops_sum[lmc] * 1000 / dclk_sum[lmc];
- ddr_print2("N%d.LMC%d: ops %lu, cycles %lu, used %lu.%lu%%\n",
- node, lmc, ops_sum[lmc], dclk_sum[lmc], percent_x10 / 10, percent_x10 % 10);
+ uint64_t percent_x10 = ops_sum[lmc] * 1000 / dclk_sum[lmc];
+ /* FIXME(dhendrix): %lu --> %llu */
+ ddr_print2("N%d.LMC%d: ops %llu, cycles %llu, used %llu.%llu%%\n",
+ node, lmc, ops_sum[lmc], dclk_sum[lmc], percent_x10 / 10, percent_x10 % 10);
} /* for (lmc = 0; lmc < num_lmcs; lmc++) */
// FIXME: only when verbose, or only when there are errors?
@@ -1359,7 +861,7 @@ auto_set_dll_offset(bdk_node_t node, int dll_offset_mode,
tot_errors = run_dram_tuning_threads(node, num_lmcs, bytemask);
debug_print("N%d: %s: Finished running test one last time\n", node, __FUNCTION__);
if (tot_errors)
- ddr_print2("%s Timing Final Test: errors 0x%x\n", mode_str, tot_errors);
+ ddr_print2("%s Timing Final Test: errors 0x%x\n", mode_str, tot_errors);
return (do_tune) ? tot_errors : !!(needs_review_count > 0);
}
@@ -1389,28 +891,30 @@ int perform_dll_offset_tuning(bdk_node_t node, int dll_offset_mode, int do_tune)
// enable any non-running cores on this node
orig_coremask = bdk_get_running_coremask(node);
- ddr_print4("N%d: %s: Starting cores (mask was 0x%lx)\n",
- node, __FUNCTION__, orig_coremask);
- bdk_init_cores(node, ~0ULL & ~orig_coremask);
+ /* FIXME(dhendrix): %lx --> %llx */
+ ddr_print4("N%d: %s: Starting cores (mask was 0x%llx)\n",
+ node, __FUNCTION__, orig_coremask);
+ /* FIXME(dhendrix): don't call bdk_init_cores(). */
+// bdk_init_cores(node, ~0ULL & ~orig_coremask);
dram_tune_max_cores = bdk_get_num_running_cores(node);
// but use only a certain number of cores, at most what is available
if ((s = getenv("ddr_tune_use_cores")) != NULL) {
- dram_tune_use_cores = strtoul(s, NULL, 0);
- if (dram_tune_use_cores <= 0) // allow 0 or negative to mean all
- dram_tune_use_cores = dram_tune_max_cores;
+ dram_tune_use_cores = strtoul(s, NULL, 0);
+ if (dram_tune_use_cores <= 0) // allow 0 or negative to mean all
+ dram_tune_use_cores = dram_tune_max_cores;
}
if (dram_tune_use_cores > dram_tune_max_cores)
- dram_tune_use_cores = dram_tune_max_cores;
+ dram_tune_use_cores = dram_tune_max_cores;
// see if we want to do the tuning more than once per LMC...
if ((s = getenv("ddr_tune_use_loops"))) {
- loops = strtoul(s, NULL, 0);
+ loops = strtoul(s, NULL, 0);
}
// see if we want to change the granularity of the byte_offset sampling
if ((s = getenv("ddr_tune_use_gran"))) {
- dram_tune_use_gran = strtoul(s, NULL, 0);
+ dram_tune_use_gran = strtoul(s, NULL, 0);
}
// allow override of the test repeats (bursts) per thread create
@@ -1422,9 +926,9 @@ int perform_dll_offset_tuning(bdk_node_t node, int dll_offset_mode, int do_tune)
// allow override of Read ODT setting just during the tuning run(s)
if ((s = getenv("ddr_tune_use_rodt")) != NULL) {
int temp = strtoul(s, NULL, 10);
- // validity check
- if (temp >= 0 && temp <= 7)
- dram_tune_use_rodt = temp;
+ // validity check
+ if (temp >= 0 && temp <= 7)
+ dram_tune_use_rodt = temp;
}
#endif
@@ -1432,13 +936,13 @@ int perform_dll_offset_tuning(bdk_node_t node, int dll_offset_mode, int do_tune)
// allow override of the test pattern
// FIXME: a bit simplistic...
if ((s = getenv("ddr_tune_use_pattern")) != NULL) {
- int patno = strtoul(s, NULL, 10);
- if (patno == 2)
- dram_tune_test_pattern = test_pattern_2;
- else if (patno == 3)
- dram_tune_test_pattern = test_pattern_3;
- else // all other values use default
- dram_tune_test_pattern = test_pattern_1;
+ int patno = strtoul(s, NULL, 10);
+ if (patno == 2)
+ dram_tune_test_pattern = test_pattern_2;
+ else if (patno == 3)
+ dram_tune_test_pattern = test_pattern_3;
+ else // all other values use default
+ dram_tune_test_pattern = test_pattern_1;
}
#endif
@@ -1449,24 +953,24 @@ int perform_dll_offset_tuning(bdk_node_t node, int dll_offset_mode, int do_tune)
// print current working values
ddr_print2("N%d: Tuning will use %d cores of max %d cores, and use %d repeats.\n",
- node, dram_tune_use_cores, dram_tune_max_cores,
- dram_tune_use_bursts);
+ node, dram_tune_use_cores, dram_tune_max_cores,
+ dram_tune_use_bursts);
#if USE_L2_WAYS_LIMIT
// see if L2 ways are limited
if ((s = lookup_env_parameter("limit_l2_ways")) != NULL) {
- ways = strtoul(s, NULL, 10);
- ways_print = 1;
+ ways = strtoul(s, NULL, 10);
+ ways_print = 1;
} else {
- ways = bdk_l2c_get_num_assoc(node);
+ ways = bdk_l2c_get_num_assoc(node);
}
#endif
#if 0
// if RODT is to be overridden during tuning, note change
if (dram_tune_use_rodt >= 0) {
- ddr_print("N%d: using RODT %d for tuning.\n",
- node, dram_tune_use_rodt);
+ ddr_print("N%d: using RODT %d for tuning.\n",
+ node, dram_tune_use_rodt);
}
#endif
@@ -1479,21 +983,21 @@ int perform_dll_offset_tuning(bdk_node_t node, int dll_offset_mode, int do_tune)
for (lmc = 0; lmc < num_lmcs; lmc++) {
#if 0
- // if RODT change, save old and set new here...
- if (dram_tune_use_rodt >= 0) {
- comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(lmc));
- save_rodt[lmc] = comp_ctl2.s.rodt_ctl;
- comp_ctl2.s.rodt_ctl = dram_tune_use_rodt;
- DRAM_CSR_WRITE(node, BDK_LMCX_COMP_CTL2(lmc), comp_ctl2.u);
- BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(lmc));
- }
+ // if RODT change, save old and set new here...
+ if (dram_tune_use_rodt >= 0) {
+ comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(lmc));
+ save_rodt[lmc] = comp_ctl2.s.rodt_ctl;
+ comp_ctl2.s.rodt_ctl = dram_tune_use_rodt;
+ DRAM_CSR_WRITE(node, BDK_LMCX_COMP_CTL2(lmc), comp_ctl2.u);
+ BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(lmc));
+ }
#endif
- /* Disable ECC for DRAM tests */
- lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(lmc));
- save_ecc_ena[lmc] = lmc_config.s.ecc_ena;
- lmc_config.s.ecc_ena = 0;
- DRAM_CSR_WRITE(node, BDK_LMCX_CONFIG(lmc), lmc_config.u);
- lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(lmc));
+ /* Disable ECC for DRAM tests */
+ lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(lmc));
+ save_ecc_ena[lmc] = lmc_config.s.ecc_ena;
+ lmc_config.s.ecc_ena = 0;
+ DRAM_CSR_WRITE(node, BDK_LMCX_CONFIG(lmc), lmc_config.u);
+ lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(lmc));
} /* for (lmc = 0; lmc < num_lmcs; lmc++) */
@@ -1505,8 +1009,8 @@ int perform_dll_offset_tuning(bdk_node_t node, int dll_offset_mode, int do_tune)
// testing is done on all LMCs simultaneously
// FIXME: for now, loop here to show what happens multiple times
for (loop = 0; loop < loops; loop++) {
- /* Perform DLL offset tuning */
- errs = auto_set_dll_offset(node, dll_offset_mode, num_lmcs, ddr_interface_64b, do_tune);
+ /* Perform DLL offset tuning */
+ errs = auto_set_dll_offset(node, dll_offset_mode, num_lmcs, ddr_interface_64b, do_tune);
}
#if USE_L2_WAYS_LIMIT
@@ -1518,21 +1022,21 @@ int perform_dll_offset_tuning(bdk_node_t node, int dll_offset_mode, int do_tune)
debug_print("N%d: %s: starting LMCs cleanup.\n", node, __FUNCTION__);
for (lmc = 0; lmc < num_lmcs; lmc++) {
- /* Restore ECC for DRAM tests */
- lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(lmc));
- lmc_config.s.ecc_ena = save_ecc_ena[lmc];
- DRAM_CSR_WRITE(node, BDK_LMCX_CONFIG(lmc), lmc_config.u);
- lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(lmc));
+ /* Restore ECC for DRAM tests */
+ lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(lmc));
+ lmc_config.s.ecc_ena = save_ecc_ena[lmc];
+ DRAM_CSR_WRITE(node, BDK_LMCX_CONFIG(lmc), lmc_config.u);
+ lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(lmc));
#if 0
- // if RODT change, restore old here...
- if (dram_tune_use_rodt >= 0) {
- comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(lmc));
- comp_ctl2.s.rodt_ctl = save_rodt[lmc];
- DRAM_CSR_WRITE(node, BDK_LMCX_COMP_CTL2(lmc), comp_ctl2.u);
- BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(lmc));
- }
+ // if RODT change, restore old here...
+ if (dram_tune_use_rodt >= 0) {
+ comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(lmc));
+ comp_ctl2.s.rodt_ctl = save_rodt[lmc];
+ DRAM_CSR_WRITE(node, BDK_LMCX_COMP_CTL2(lmc), comp_ctl2.u);
+ BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(lmc));
+ }
#endif
- // finally, see if there are any read offset overrides after tuning
+ // finally, see if there are any read offset overrides after tuning
// FIXME: provide a way to do write offsets also??
if (dll_offset_mode == 2) {
for (int by = 0; by < 9; by++) {
@@ -1551,20 +1055,24 @@ int perform_dll_offset_tuning(bdk_node_t node, int dll_offset_mode, int do_tune)
#if 0
// if RODT was overridden during tuning, note restore
if (dram_tune_use_rodt >= 0) {
- ddr_print("N%d: restoring RODT %d after tuning.\n",
- node, save_rodt[0]); // FIXME? use LMC0
+ ddr_print("N%d: restoring RODT %d after tuning.\n",
+ node, save_rodt[0]); // FIXME? use LMC0
}
#endif
// put any cores on this node, that were not running at the start, back into reset
- uint64_t reset_coremask = bdk_get_running_coremask(node) & ~orig_coremask;
+ /* FIXME(dhendrix): don't reset cores... */
+// uint64_t reset_coremask = bdk_get_running_coremask(node) & ~orig_coremask;
+ uint64_t reset_coremask = 0;
if (reset_coremask) {
- ddr_print4("N%d: %s: Stopping cores 0x%lx\n", node, __FUNCTION__,
- reset_coremask);
- bdk_reset_cores(node, reset_coremask);
+ /* FIXME(dhendrix): %lx --> %llx */
+ ddr_print4("N%d: %s: Stopping cores 0x%llx\n", node, __FUNCTION__,
+ reset_coremask);
+ bdk_reset_cores(node, reset_coremask);
} else {
- ddr_print4("N%d: %s: leaving cores set to 0x%lx\n", node, __FUNCTION__,
- orig_coremask);
+ /* FIXME(dhendrix): %lx --> %llx */
+ ddr_print4("N%d: %s: leaving cores set to 0x%llx\n", node, __FUNCTION__,
+ orig_coremask);
}
return errs;
@@ -1656,7 +1164,8 @@ setup_lfsr_pattern(bdk_node_t node, int lmc, uint64_t data)
DRAM_CSR_WRITE(node, BDK_LMCX_CHAR_CTL(lmc), char_ctl.u);
}
-int
+/* FIXME(dhendrix): made static to avoid need for prototype */
+static int
choose_best_hw_patterns(bdk_node_t node, int lmc, int mode)
{
int new_mode = mode;
@@ -1705,7 +1214,7 @@ run_best_hw_patterns(bdk_node_t node, int lmc, uint64_t phys_addr,
if (mode == DBTRAIN_LFSR) {
setup_lfsr_pattern(node, lmc, 0);
errors = test_dram_byte_hw(node, lmc, phys_addr, mode, xor_data);
- VB_PRT(VBL_DEV2, "%s: LFSR at A:0x%012lx errors 0x%x\n",
+ VB_PRT(VBL_DEV2, "%s: LFSR at A:0x%012llx errors 0x%x\n",
__FUNCTION__, phys_addr, errors);
} else {
for (pattern = 0; pattern < NUM_BYTE_PATTERNS; pattern++) {
@@ -1714,7 +1223,7 @@ run_best_hw_patterns(bdk_node_t node, int lmc, uint64_t phys_addr,
errs = test_dram_byte_hw(node, lmc, phys_addr, mode, xor_data);
- VB_PRT(VBL_DEV2, "%s: PATTERN %d at A:0x%012lx errors 0x%x\n",
+ VB_PRT(VBL_DEV2, "%s: PATTERN %d at A:0x%012llx errors 0x%x\n",
__FUNCTION__, pattern, phys_addr, errs);
errors |= errs;
@@ -1738,7 +1247,7 @@ hw_assist_test_dll_offset(bdk_node_t node, int dll_offset_mode,
int pattern;
const uint64_t *pattern_p;
int byte;
- char *mode_str = (dll_offset_mode == 2) ? "Read" : "Write";
+ const char *mode_str = (dll_offset_mode == 2) ? "Read" : "Write";
int pat_best_offset[9];
uint64_t phys_addr;
int pat_beg, pat_end;
@@ -1769,7 +1278,7 @@ hw_assist_test_dll_offset(bdk_node_t node, int dll_offset_mode,
memset(new_best_offset, 0, sizeof(new_best_offset));
for (pattern = 0; pattern < NUM_BYTE_PATTERNS; pattern++) {
- memset(pat_best_offset, 0, sizeof(pat_best_offset));
+ memset(pat_best_offset, 0, sizeof(pat_best_offset));
if (mode == DBTRAIN_TEST) {
pattern_p = byte_patterns[pattern];
@@ -1778,47 +1287,47 @@ hw_assist_test_dll_offset(bdk_node_t node, int dll_offset_mode,
setup_lfsr_pattern(node, lmc, 0);
}
- // now loop through all legal values for the DLL byte offset...
+ // now loop through all legal values for the DLL byte offset...
#define BYTE_OFFSET_INCR 3 // FIXME: make this tunable?
- tot_errors = 0;
+ tot_errors = 0;
- memset(rank_delay_count, 0, sizeof(rank_delay_count));
- memset(rank_delay_start, 0, sizeof(rank_delay_start));
- memset(rank_delay_best_count, 0, sizeof(rank_delay_best_count));
- memset(rank_delay_best_start, 0, sizeof(rank_delay_best_start));
+ memset(rank_delay_count, 0, sizeof(rank_delay_count));
+ memset(rank_delay_start, 0, sizeof(rank_delay_start));
+ memset(rank_delay_best_count, 0, sizeof(rank_delay_best_count));
+ memset(rank_delay_best_start, 0, sizeof(rank_delay_best_start));
- for (byte_offset = -63; byte_offset < 64; byte_offset += BYTE_OFFSET_INCR) {
+ for (byte_offset = -63; byte_offset < 64; byte_offset += BYTE_OFFSET_INCR) {
- // do the setup on the active LMC
- // set the bytelanes DLL offsets
- change_dll_offset_enable(node, lmc, 0);
- load_dll_offset(node, lmc, dll_offset_mode, byte_offset, bytelane); // FIXME? bytelane?
- change_dll_offset_enable(node, lmc, 1);
+ // do the setup on the active LMC
+ // set the bytelanes DLL offsets
+ change_dll_offset_enable(node, lmc, 0);
+ load_dll_offset(node, lmc, dll_offset_mode, byte_offset, bytelane); // FIXME? bytelane?
+ change_dll_offset_enable(node, lmc, 1);
- bdk_watchdog_poke();
+ bdk_watchdog_poke();
- // run the test on each rank
- // only 1 call per rank should be enough, let the bursts, loops, etc, control the load...
-
- off_errors = 0; // errors for this byte_offset, all ranks
+ // run the test on each rank
+ // only 1 call per rank should be enough, let the bursts, loops, etc, control the load...
+
+ off_errors = 0; // errors for this byte_offset, all ranks
active_ranks = 0;
- for (rankx = 0; rankx < 4; rankx++) {
+ for (rankx = 0; rankx < 4; rankx++) {
if (!(rank_mask & (1 << rankx)))
continue;
- phys_addr = hw_rank_offset * active_ranks;
- // FIXME: now done by test_dram_byte_hw()
+ phys_addr = hw_rank_offset * active_ranks;
+ // FIXME: now done by test_dram_byte_hw()
//phys_addr |= (lmc << 7);
//phys_addr = bdk_numa_get_address(node, phys_addr); // map to node
active_ranks++;
// NOTE: return is a now a bitmask of the erroring bytelanes..
- errors[rankx] = test_dram_byte_hw(node, lmc, phys_addr, mode, NULL);
+ errors[rankx] = test_dram_byte_hw(node, lmc, phys_addr, mode, NULL);
for (byte = byte_lo; byte <= byte_hi; byte++) { // do bytelane(s)
@@ -1826,7 +1335,7 @@ hw_assist_test_dll_offset(bdk_node_t node, int dll_offset_mode,
if (errors[rankx] & (1 << byte)) { // yes, an error in the byte lane in this rank
off_errors |= (1 << byte);
- ddr_print5("N%d.LMC%d.R%d: Bytelane %d DLL %s Offset Test %3d: Address 0x%012lx errors 0x%x\n",
+ ddr_print5("N%d.LMC%d.R%d: Bytelane %d DLL %s Offset Test %3d: Address 0x%012llx errors 0x%x\n",
node, lmc, rankx, bytelane, mode_str,
byte_offset, phys_addr, errors[rankx]);
@@ -1854,13 +1363,13 @@ hw_assist_test_dll_offset(bdk_node_t node, int dll_offset_mode,
}
}
} /* for (byte = byte_lo; byte <= byte_hi; byte++) */
- } /* for (rankx = 0; rankx < 4; rankx++) */
+ } /* for (rankx = 0; rankx < 4; rankx++) */
- tot_errors |= off_errors;
+ tot_errors |= off_errors;
- } /* for (byte_offset = -63; byte_offset < 64; byte_offset += BYTE_OFFSET_INCR) */
+ } /* for (byte_offset = -63; byte_offset < 64; byte_offset += BYTE_OFFSET_INCR) */
- // now choose the best byte_offsets for this pattern according to the best windows of the tested ranks
+ // now choose the best byte_offsets for this pattern according to the best windows of the tested ranks
// calculate offset by constructing an average window from the rank windows
for (byte = byte_lo; byte <= byte_hi; byte++) {
@@ -1928,7 +1437,7 @@ hw_assist_test_dll_offset(bdk_node_t node, int dll_offset_mode,
// print whether there are errors or not, but only when verbose...
tot_errors = run_test_dram_byte_threads(node, num_lmcs, bytemask);
printf("N%d.LMC%d: Bytelane %d DLL %s Offset Final Test: errors 0x%x\n",
- node, lmc, bytelane, mode_str, tot_errors);
+ node, lmc, bytelane, mode_str, tot_errors);
#endif
}
@@ -1946,7 +1455,7 @@ int perform_HW_dll_offset_tuning(bdk_node_t node, int dll_offset_mode, int bytel
// see if we want to do the tuning more than once per LMC...
if ((s = getenv("ddr_tune_ecc_loops"))) {
- loops = strtoul(s, NULL, 0);
+ loops = strtoul(s, NULL, 0);
}
// allow override of the test repeats (bursts)
@@ -1956,8 +1465,8 @@ int perform_HW_dll_offset_tuning(bdk_node_t node, int dll_offset_mode, int bytel
// print current working values
ddr_print2("N%d: H/W Tuning for bytelane %d will use %d loops, %d bursts, and %d patterns.\n",
- node, bytelane, loops, dram_tune_byte_bursts,
- NUM_BYTE_PATTERNS);
+ node, bytelane, loops, dram_tune_byte_bursts,
+ NUM_BYTE_PATTERNS);
// FIXME? get flag from LMC0 only
lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(0));
@@ -1966,42 +1475,42 @@ int perform_HW_dll_offset_tuning(bdk_node_t node, int dll_offset_mode, int bytel
for (lmc = 0; lmc < num_lmcs; lmc++) {
- ddr_print4("N%d: H/W Tuning: starting LMC%d bytelane %d tune.\n", node, lmc, bytelane);
-
- /* Enable ECC for the HW tests */
- // NOTE: we do enable ECC, but the HW tests used will not generate "visible" errors
- lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(lmc));
- save_ecc_ena[lmc] = lmc_config.s.ecc_ena;
- lmc_config.s.ecc_ena = 1;
- DRAM_CSR_WRITE(node, BDK_LMCX_CONFIG(lmc), lmc_config.u);
- lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(lmc));
-
- // testing is done on a single LMC at a time
- // FIXME: for now, loop here to show what happens multiple times
- for (loop = 0; loop < loops; loop++) {
- /* Perform DLL offset tuning */
- //auto_set_dll_offset(node, 1 /* 1=write */, lmc, bytelane);
- hw_assist_test_dll_offset(node, 2 /* 2=read */, lmc, bytelane);
- }
-
- // perform cleanup on active LMC
- ddr_print4("N%d: H/W Tuning: finishing LMC%d bytelane %d tune.\n", node, lmc, bytelane);
-
- /* Restore ECC for DRAM tests */
- lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(lmc));
- lmc_config.s.ecc_ena = save_ecc_ena[lmc];
- DRAM_CSR_WRITE(node, BDK_LMCX_CONFIG(lmc), lmc_config.u);
- lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(lmc));
-
- // finally, see if there are any read offset overrides after tuning
- for (int by = 0; by < 9; by++) {
- if ((s = lookup_env_parameter("ddr%d_tune_byte%d", lmc, by)) != NULL) {
- int dllro = strtoul(s, NULL, 10);
- change_dll_offset_enable(node, lmc, 0);
- load_dll_offset(node, lmc, 2 /* 2=read */, dllro, by);
- change_dll_offset_enable(node, lmc, 1);
- }
- }
+ ddr_print4("N%d: H/W Tuning: starting LMC%d bytelane %d tune.\n", node, lmc, bytelane);
+
+ /* Enable ECC for the HW tests */
+ // NOTE: we do enable ECC, but the HW tests used will not generate "visible" errors
+ lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(lmc));
+ save_ecc_ena[lmc] = lmc_config.s.ecc_ena;
+ lmc_config.s.ecc_ena = 1;
+ DRAM_CSR_WRITE(node, BDK_LMCX_CONFIG(lmc), lmc_config.u);
+ lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(lmc));
+
+ // testing is done on a single LMC at a time
+ // FIXME: for now, loop here to show what happens multiple times
+ for (loop = 0; loop < loops; loop++) {
+ /* Perform DLL offset tuning */
+ //auto_set_dll_offset(node, 1 /* 1=write */, lmc, bytelane);
+ hw_assist_test_dll_offset(node, 2 /* 2=read */, lmc, bytelane);
+ }
+
+ // perform cleanup on active LMC
+ ddr_print4("N%d: H/W Tuning: finishing LMC%d bytelane %d tune.\n", node, lmc, bytelane);
+
+ /* Restore ECC for DRAM tests */
+ lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(lmc));
+ lmc_config.s.ecc_ena = save_ecc_ena[lmc];
+ DRAM_CSR_WRITE(node, BDK_LMCX_CONFIG(lmc), lmc_config.u);
+ lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(lmc));
+
+ // finally, see if there are any read offset overrides after tuning
+ for (int by = 0; by < 9; by++) {
+ if ((s = lookup_env_parameter("ddr%d_tune_byte%d", lmc, by)) != NULL) {
+ int dllro = strtoul(s, NULL, 10);
+ change_dll_offset_enable(node, lmc, 0);
+ load_dll_offset(node, lmc, 2 /* 2=read */, dllro, by);
+ change_dll_offset_enable(node, lmc, 1);
+ }
+ }
} /* for (lmc = 0; lmc < num_lmcs; lmc++) */