From 7d48ac5c7dfb52fc470bbad1013b4d460bc6a1e0 Mon Sep 17 00:00:00 2001 From: David Hendricks Date: Fri, 9 Mar 2018 14:30:38 -0800 Subject: soc/cavium: Integrate BDK files into coreboot * Make it compile. * Fix whitespace errors. * Fix printf formats. * Add missing headers includes * Guard headers with ifdefs Compile DRAM init code in romstage. Compile QLM, PCIe, RNG, PHY, GPIO, MDIO init code in ramstage. Change-Id: I0a93219a14bfb6ebe41103a825d5032b11e7f2c6 Signed-off-by: David Hendricks Reviewed-on: https://review.coreboot.org/25089 Reviewed-by: Philipp Deppenwiese Tested-by: build bot (Jenkins) --- src/vendorcode/cavium/bdk/libdram/dram-tune-ddr3.c | 1365 +++++++------------- 1 file changed, 437 insertions(+), 928 deletions(-) (limited to 'src/vendorcode/cavium/bdk/libdram/dram-tune-ddr3.c') diff --git a/src/vendorcode/cavium/bdk/libdram/dram-tune-ddr3.c b/src/vendorcode/cavium/bdk/libdram/dram-tune-ddr3.c index e0e9d4442c..2c6a105dae 100644 --- a/src/vendorcode/cavium/bdk/libdram/dram-tune-ddr3.c +++ b/src/vendorcode/cavium/bdk/libdram/dram-tune-ddr3.c @@ -39,6 +39,13 @@ #include #include "dram-internal.h" +#include +#include /* for strtoul */ +#include +#include +#include +#include + // if enhanced verbosity levels are defined, use them #if defined(VB_PRT) #define ddr_print2(format, ...) VB_PRT(VBL_FAE, format, ##__VA_ARGS__) @@ -185,14 +192,14 @@ get_speed_bin(bdk_node_t node, int lmc) // FIXME: is this reasonable speed "binning"? if (mts_speed >= 1700) { - if (mts_speed >= 2000) - ret = 2; - else - ret = 1; + if (mts_speed >= 2000) + ret = 2; + else + ret = 1; } debug_print("N%d.LMC%d: %s: returning bin %d for MTS %d\n", - node, lmc, __FUNCTION__, ret, mts_speed); + node, lmc, __FUNCTION__, ret, mts_speed); return ret; } @@ -261,25 +268,25 @@ int dram_tuning_mem_xor(bdk_node_t node, int lmc, uint64_t p, uint64_t bitmask, // add this loop to fill memory with the test pattern first // loops are ordered so that only entire cachelines are written for (ii = 0; ii < II_MAX; ii += II_INC) { // FIXME? extend the range of memory tested!! - for (k = 0; k < K_MAX; k += K_INC) { - for (j = 0; j < J_MAX; j += J_INC) { - p1 = p + ii + k + j; - p2 = p1 + p2offset; - for (i = 0, ix = 0; i < I_MAX; i += I_INC, ix++) { + for (k = 0; k < K_MAX; k += K_INC) { + for (j = 0; j < J_MAX; j += J_INC) { + p1 = p + ii + k + j; + p2 = p1 + p2offset; + for (i = 0, ix = 0; i < I_MAX; i += I_INC, ix++) { - v = dram_tune_test_pattern[ix]; - v1 = v; // write the same thing to both areas + v = dram_tune_test_pattern[ix]; + v1 = v; // write the same thing to both areas - __bdk_dram_write64(p1 + i, v); - __bdk_dram_write64(p2 + i, v1); + __bdk_dram_write64(p1 + i, v); + __bdk_dram_write64(p2 + i, v1); - } + } #if ENABLE_WBIL2 - BDK_CACHE_WBI_L2(p1); - BDK_CACHE_WBI_L2(p2); + BDK_CACHE_WBI_L2(p1); + BDK_CACHE_WBI_L2(p2); #endif - } - } + } + } } /* for (ii = 0; ii < (1ULL << 31); ii += (1ULL << 29)) */ #endif @@ -291,12 +298,12 @@ int dram_tuning_mem_xor(bdk_node_t node, int lmc, uint64_t p, uint64_t bitmask, // loops are ordered so that only a single 64-bit slot is written to each cacheline at one time, // then the cachelines are forced out; this should maximize read/write traffic for (ii = 0; ii < II_MAX; ii += II_INC) { // FIXME? extend the range of memory tested!! - for (k = 0; k < K_MAX; k += K_INC) { - for (i = 0; i < I_MAX; i += I_INC) { - for (j = 0; j < J_MAX; j += J_INC) { + for (k = 0; k < K_MAX; k += K_INC) { + for (i = 0; i < I_MAX; i += I_INC) { + for (j = 0; j < J_MAX; j += J_INC) { - p1 = p + ii + k + j; - p2 = p1 + p2offset; + p1 = p + ii + k + j; + p2 = p1 + p2offset; #if ENABLE_PREFETCH if (j < (J_MAX - J_INC)) { @@ -304,20 +311,20 @@ int dram_tuning_mem_xor(bdk_node_t node, int lmc, uint64_t p, uint64_t bitmask, BDK_PREFETCH(p2 + J_INC, BDK_CACHE_LINE_SIZE); } #endif - - v = pattern1 * (p1 + i); - v1 = v; // write the same thing to both areas - __bdk_dram_write64(p1 + i, v); - __bdk_dram_write64(p2 + i, v1); + v = pattern1 * (p1 + i); + v1 = v; // write the same thing to both areas + + __bdk_dram_write64(p1 + i, v); + __bdk_dram_write64(p2 + i, v1); #if ENABLE_WBIL2 - BDK_CACHE_WBI_L2(p1); - BDK_CACHE_WBI_L2(p2); + BDK_CACHE_WBI_L2(p1); + BDK_CACHE_WBI_L2(p2); #endif - } - } - } + } + } + } } /* for (ii = 0; ii < (1ULL << 31); ii += (1ULL << 29)) */ BDK_DCACHE_INVALIDATE; @@ -329,24 +336,24 @@ int dram_tuning_mem_xor(bdk_node_t node, int lmc, uint64_t p, uint64_t bitmask, for (int burst = 0; burst < 1/* was: dram_tune_use_bursts*/; burst++) { - uint64_t this_pattern = bdk_rng_get_random64(); - pattern2 ^= this_pattern; + uint64_t this_pattern = bdk_rng_get_random64(); + pattern2 ^= this_pattern; /* XOR the data with a random value, applying the change to both * memory areas. */ #if ENABLE_PREFETCH - BDK_PREFETCH(p , BDK_CACHE_LINE_SIZE); - BDK_PREFETCH(p + p2offset, BDK_CACHE_LINE_SIZE); + BDK_PREFETCH(p , BDK_CACHE_LINE_SIZE); + BDK_PREFETCH(p + p2offset, BDK_CACHE_LINE_SIZE); #endif - for (ii = 0; ii < II_MAX; ii += II_INC) { // FIXME? extend the range of memory tested!! - for (k = 0; k < K_MAX; k += K_INC) { - for (i = 0; i < I_MAX; i += I_INC) { // FIXME: rearranged, did not make much difference? - for (j = 0; j < J_MAX; j += J_INC) { + for (ii = 0; ii < II_MAX; ii += II_INC) { // FIXME? extend the range of memory tested!! + for (k = 0; k < K_MAX; k += K_INC) { + for (i = 0; i < I_MAX; i += I_INC) { // FIXME: rearranged, did not make much difference? + for (j = 0; j < J_MAX; j += J_INC) { - p1 = p + ii + k + j; - p2 = p1 + p2offset; + p1 = p + ii + k + j; + p2 = p1 + p2offset; #if ENABLE_PREFETCH if (j < (J_MAX - J_INC)) { @@ -354,26 +361,26 @@ int dram_tuning_mem_xor(bdk_node_t node, int lmc, uint64_t p, uint64_t bitmask, BDK_PREFETCH(p2 + J_INC, BDK_CACHE_LINE_SIZE); } #endif - - v = __bdk_dram_read64(p1 + i) ^ this_pattern; - v1 = __bdk_dram_read64(p2 + i) ^ this_pattern; + + v = __bdk_dram_read64(p1 + i) ^ this_pattern; + v1 = __bdk_dram_read64(p2 + i) ^ this_pattern; #if ENABLE_WBIL2 - BDK_CACHE_INV_L2(p1); - BDK_CACHE_INV_L2(p2); + BDK_CACHE_INV_L2(p1); + BDK_CACHE_INV_L2(p2); #endif - __bdk_dram_write64(p1 + i, v); - __bdk_dram_write64(p2 + i, v1); + __bdk_dram_write64(p1 + i, v); + __bdk_dram_write64(p2 + i, v1); #if ENABLE_WBIL2 - BDK_CACHE_WBI_L2(p1); - BDK_CACHE_WBI_L2(p2); + BDK_CACHE_WBI_L2(p1); + BDK_CACHE_WBI_L2(p2); #endif - } - } - } - } /* for (ii = 0; ii < (1ULL << 31); ii += (1ULL << 29)) */ + } + } + } + } /* for (ii = 0; ii < (1ULL << 31); ii += (1ULL << 29)) */ BDK_DCACHE_INVALIDATE; @@ -381,8 +388,8 @@ int dram_tuning_mem_xor(bdk_node_t node, int lmc, uint64_t p, uint64_t bitmask, node, lmc); #if ENABLE_PREFETCH - BDK_PREFETCH(p , BDK_CACHE_LINE_SIZE); - BDK_PREFETCH(p + p2offset, BDK_CACHE_LINE_SIZE); + BDK_PREFETCH(p , BDK_CACHE_LINE_SIZE); + BDK_PREFETCH(p + p2offset, BDK_CACHE_LINE_SIZE); #endif /* Look for differences in the areas. If there is a mismatch, reset @@ -390,18 +397,18 @@ int dram_tuning_mem_xor(bdk_node_t node, int lmc, uint64_t p, uint64_t bitmask, * means that on all subsequent passes the pair of locations remain * out of sync giving spurious errors. */ - // FIXME: change the loop order so that an entire cache line is compared at one time - // FIXME: this is so that a read error that occurs *anywhere* on the cacheline will be caught, - // FIXME: rather than comparing only 1 cacheline slot at a time, where an error on a different - // FIXME: slot will be missed that time around - // Does the above make sense? + // FIXME: change the loop order so that an entire cache line is compared at one time + // FIXME: this is so that a read error that occurs *anywhere* on the cacheline will be caught, + // FIXME: rather than comparing only 1 cacheline slot at a time, where an error on a different + // FIXME: slot will be missed that time around + // Does the above make sense? - for (ii = 0; ii < II_MAX; ii += II_INC) { // FIXME? extend the range of memory tested!! - for (k = 0; k < K_MAX; k += K_INC) { - for (j = 0; j < J_MAX; j += J_INC) { + for (ii = 0; ii < II_MAX; ii += II_INC) { // FIXME? extend the range of memory tested!! + for (k = 0; k < K_MAX; k += K_INC) { + for (j = 0; j < J_MAX; j += J_INC) { - p1 = p + ii + k + j; - p2 = p1 + p2offset; + p1 = p + ii + k + j; + p2 = p1 + p2offset; #if ENABLE_PREFETCH if (j < (J_MAX - J_INC)) { @@ -409,15 +416,15 @@ int dram_tuning_mem_xor(bdk_node_t node, int lmc, uint64_t p, uint64_t bitmask, BDK_PREFETCH(p2 + J_INC, BDK_CACHE_LINE_SIZE); } #endif - - // process entire cachelines in the innermost loop - for (i = 0; i < I_MAX; i += I_INC) { - v = ((p1 + i) * pattern1) ^ pattern2; // FIXME: this should predict what we find...??? - d1 = __bdk_dram_read64(p1 + i); - d2 = __bdk_dram_read64(p2 + i); + // process entire cachelines in the innermost loop + for (i = 0; i < I_MAX; i += I_INC) { + + v = ((p1 + i) * pattern1) ^ pattern2; // FIXME: this should predict what we find...??? + d1 = __bdk_dram_read64(p1 + i); + d2 = __bdk_dram_read64(p2 + i); - xor = ((d1 ^ v) | (d2 ^ v)) & datamask; // union of error bits only in active byte lanes + xor = ((d1 ^ v) | (d2 ^ v)) & datamask; // union of error bits only in active byte lanes if (!xor) continue; @@ -426,32 +433,32 @@ int dram_tuning_mem_xor(bdk_node_t node, int lmc, uint64_t p, uint64_t bitmask, bad_bits[0] |= xor; //bad_bits[1] |= ~mpr_data1 & 0xffUL; // cannot do ECC here - int bybit = 1; - uint64_t bymsk = 0xffULL; // start in byte lane 0 - while (xor != 0) { - debug_print("ERROR(%03d): [0x%016lX] [0x%016lX] expected 0x%016lX d1 %016lX d2 %016lX\n", - burst, p1, p2, v, d1, d2); - if (xor & bymsk) { // error(s) in this lane - errors |= bybit; // set the byte error bit - xor &= ~bymsk; // clear byte lane in error bits - datamask &= ~bymsk; // clear the byte lane in the mask + int bybit = 1; + uint64_t bymsk = 0xffULL; // start in byte lane 0 + while (xor != 0) { + debug_print("ERROR(%03d): [0x%016lX] [0x%016lX] expected 0x%016lX d1 %016lX d2 %016lX\n", + burst, p1, p2, v, d1, d2); + if (xor & bymsk) { // error(s) in this lane + errors |= bybit; // set the byte error bit + xor &= ~bymsk; // clear byte lane in error bits + datamask &= ~bymsk; // clear the byte lane in the mask #if EXIT_WHEN_ALL_LANES_HAVE_ERRORS - if (datamask == 0) { // nothing left to do - return errors; // completely done when errors found in all byte lanes in datamask - } + if (datamask == 0) { // nothing left to do + return errors; // completely done when errors found in all byte lanes in datamask + } #endif /* EXIT_WHEN_ALL_LANES_HAVE_ERRORS */ - } - bymsk <<= 8; // move mask into next byte lane - bybit <<= 1; // move bit into next byte position - } - } + } + bymsk <<= 8; // move mask into next byte lane + bybit <<= 1; // move bit into next byte position + } + } #if ENABLE_WBIL2 - BDK_CACHE_WBI_L2(p1); - BDK_CACHE_WBI_L2(p2); + BDK_CACHE_WBI_L2(p1); + BDK_CACHE_WBI_L2(p2); #endif - } - } - } /* for (ii = 0; ii < (1ULL << 31); ii += (1ULL << 29)) */ + } + } + } /* for (ii = 0; ii < (1ULL << 31); ii += (1ULL << 29)) */ debug_print("N%d.LMC%d: dram_tuning_mem_xor: done TEST loop\n", node, lmc); @@ -476,278 +483,6 @@ int dram_tuning_mem_xor(bdk_node_t node, int lmc, uint64_t p, uint64_t bitmask, #define EXTRACT(v, lsb, width) (((v) >> (lsb)) & ((1ull << (width)) - 1)) #define LMCNO(address, xbits) (EXTRACT(address, 7, xbits) ^ EXTRACT(address, 20, xbits) ^ EXTRACT(address, 12, xbits)) -static int dram_tuning_mem_xor2(uint64_t p, uint64_t bitmask, int xbits) -{ - uint64_t p1, p2, d1, d2; - uint64_t v, vpred; - uint64_t p2offset = dram_tune_rank_offset; // FIXME? - uint64_t datamask; - uint64_t xor; - uint64_t ii; - uint64_t pattern1 = bdk_rng_get_random64(); - uint64_t pattern2 = 0; - int errors = 0; - int errs_by_lmc[4] = { 0,0,0,0 }; - int lmc; - uint64_t vbase, vincr; - - // Byte lanes may be clear in the mask to indicate no testing on that lane. - datamask = bitmask; - - /* Add offset to both test regions to not clobber boot stuff - * when running from L2 for NAND boot. - */ - p += AREA_BASE_OFFSET; // make sure base is out of the way of boot - - // move the multiplies outside the loop - vbase = p * pattern1; - vincr = 8 * pattern1; - -#define II_INC (1ULL << 3) -#define II_MAX (1ULL << 22) // stop where the core ID bits start - - // walk the memory areas by 8-byte words - v = vbase; - for (ii = 0; ii < II_MAX; ii += II_INC) { - - p1 = p + ii; - p2 = p1 + p2offset; - - __bdk_dram_write64(p1, v); - __bdk_dram_write64(p2, v); - - v += vincr; - } - - __bdk_dram_flush_to_mem_range(p , p + II_MAX); - __bdk_dram_flush_to_mem_range(p + p2offset, p + p2offset + II_MAX); - BDK_DCACHE_INVALIDATE; - - /* Make a series of passes over the memory areas. */ - - for (int burst = 0; burst < dram_tune_use_bursts; burst++) - { - uint64_t this_pattern = bdk_rng_get_random64(); - pattern2 ^= this_pattern; - - /* XOR the data with a random value, applying the change to both - * memory areas. - */ -#if 0 - BDK_PREFETCH(p , BDK_CACHE_LINE_SIZE); - BDK_PREFETCH(p + p2offset, BDK_CACHE_LINE_SIZE); -#endif - for (ii = 0; ii < II_MAX; ii += II_INC) { // FIXME? extend the range of memory tested!! - - p1 = p + ii; - p2 = p1 + p2offset; - - d1 = __bdk_dram_read64(p1) ^ this_pattern; - d2 = __bdk_dram_read64(p2) ^ this_pattern; - - __bdk_dram_write64(p1, d1); - __bdk_dram_write64(p2, d2); - - } - __bdk_dram_flush_to_mem_range(p , p + II_MAX); - __bdk_dram_flush_to_mem_range(p + p2offset, p + p2offset + II_MAX); - BDK_DCACHE_INVALIDATE; - - /* Look for differences in the areas. If there is a mismatch, reset - * both memory locations with the same pattern. Failing to do so - * means that on all subsequent passes the pair of locations remain - * out of sync giving spurious errors. - */ -#if 0 - BDK_PREFETCH(p , BDK_CACHE_LINE_SIZE); - BDK_PREFETCH(p + p2offset, BDK_CACHE_LINE_SIZE); -#endif - vpred = vbase; - for (ii = 0; ii < II_MAX; ii += II_INC) { - - p1 = p + ii; - p2 = p1 + p2offset; - - v = vpred ^ pattern2; // this should predict what we find... - d1 = __bdk_dram_read64(p1); - d2 = __bdk_dram_read64(p2); - vpred += vincr; - - xor = ((d1 ^ v) | (d2 ^ v)) & datamask; // union of error bits only in active byte lanes - if (!xor) // no errors - continue; - - lmc = LMCNO(p1, xbits); // FIXME: LMC should be SAME for p1 and p2!!! - if (lmc != (int)LMCNO(p2, xbits)) { - printf("ERROR: LMCs for addresses [0x%016lX] (%lld) and [0x%016lX] (%lld) differ!!!\n", - p1, LMCNO(p1, xbits), p2, LMCNO(p2, xbits)); - } - int bybit = 1; - uint64_t bymsk = 0xffULL; // start in byte lane 0 - while (xor != 0) { - debug_print("ERROR(%03d): [0x%016lX] [0x%016lX] expected 0x%016lX d1 %016lX d2 %016lX\n", - burst, p1, p2, v, d1, d2); - if (xor & bymsk) { // error(s) in this lane - errs_by_lmc[lmc] |= bybit; // set the byte error bit in the LMCs errors - errors |= bybit; // set the byte error bit - xor &= ~bymsk; // clear byte lane in error bits - //datamask &= ~bymsk; // clear the byte lane in the mask - } - bymsk <<= 8; // move mask into next byte lane - bybit <<= 1; // move bit into next byte position - } /* while (xor != 0) */ - } /* for (ii = 0; ii < II_MAX; ii += II_INC) */ - } /* for (int burst = 0; burst < dram_tune_use_bursts; burst++) */ - - // update the global LMC error states - for (lmc = 0; lmc < 4; lmc++) { - if (errs_by_lmc[lmc]) { - bdk_atomic_fetch_and_bset64_nosync(&test_dram_byte_lmc_errs[lmc], errs_by_lmc[lmc]); - } - } - - return errors; -} - -#if 0 -static int dram_tuning_mem_rows(uint64_t p, uint64_t bitmask) -{ - uint64_t p1, p2, d1, d2; - uint64_t v, v1; - uint64_t p2offset = dram_tune_rank_offset; // FIXME? - uint64_t datamask; - uint64_t xor; - int i, j, k, ii; - int errors = 0; - int index; - uint64_t pattern1 = 0; // FIXME: maybe this could be from a table? - uint64_t pattern2; - - // Byte lanes may be clear in the mask to indicate no testing on that lane. - datamask = bitmask; - - /* Add offset to both test regions to not clobber boot stuff - * when running from L2 for NAND boot. - */ - p += 0x10000000; // FIXME? was: 0x4000000; // make sure base is out of the way of cores for tuning - - pattern2 = pattern1; - for (k = 0; k < (1 << 20); k += (1 << 14)) { - for (j = 0; j < (1 << 12); j += (1 << 9)) { - for (i = 0; i < (1 << 7); i += 8) { - index = i + j + k; - p1 = p + index; - p2 = p1 + p2offset; - - v = pattern2; - v1 = v; // write the same thing to same slot in both cachelines - pattern2 = ~pattern2; // flip bits for next slots - - __bdk_dram_write64(p1, v); - __bdk_dram_write64(p2, v1); - } -#if 1 - BDK_CACHE_WBI_L2(p1); - BDK_CACHE_WBI_L2(p2); -#endif - } - } - -#if 0 - __bdk_dram_flush_to_mem_range(p, p + (1ULL << 20)); // max_addr is start + where k stops... - __bdk_dram_flush_to_mem_range(p + p2offset, p + p2offset + (1ULL << 20)); // max_addr is start + where k stops... -#endif - BDK_DCACHE_INVALIDATE; - - /* Make a series of passes over the memory areas. */ - - for (int burst = 0; burst < dram_tune_use_bursts; burst++) - { - /* just read and flip the bits applying the change to both - * memory areas. - */ - for (k = 0; k < (1 << 20); k += (1 << 14)) { - for (j = 0; j < (1 << 12); j += (1 << 9)) { - for (i = 0; i < (1 << 7); i += 8) { - index = i + j + k; - p1 = p + index; - p2 = p1 + p2offset; - - v = ~__bdk_dram_read64(p1); - v1 = ~__bdk_dram_read64(p2); - - __bdk_dram_write64(p1, v); - __bdk_dram_write64(p2, v1); - } -#if 1 - BDK_CACHE_WBI_L2(p1); - BDK_CACHE_WBI_L2(p2); -#endif - } - } - -#if 0 - __bdk_dram_flush_to_mem_range(p, p + (1ULL << 20)); // max_addr is start + where k stops... - __bdk_dram_flush_to_mem_range(p + p2offset, p + p2offset + (1ULL << 20)); // max_addr is start + where k stops... -#endif - BDK_DCACHE_INVALIDATE; - - /* Look for differences in the areas. If there is a mismatch, reset - * both memory locations with the same pattern. Failing to do so - * means that on all subsequent passes the pair of locations remain - * out of sync giving spurious errors. - */ - - // FIXME: change the loop order so that an entire cache line is compared at one time - // FIXME: this is so that a read error that occurs *anywhere* on the cacheline will be caught, - // FIXME: rather than comparing only 1 cacheline slot at a time, where an error on a different - // FIXME: slot will be missed that time around - // Does the above make sense? - - pattern2 = ~pattern1; // slots have been flipped by the above loop - - for (k = 0; k < (1 << 20); k += (1 << 14)) { - for (j = 0; j < (1 << 12); j += (1 << 9)) { - for (i = 0; i < (1 << 7); i += 8) { - index = i + j + k; - p1 = p + index; - p2 = p1 + p2offset; - - v = pattern2; // FIXME: this should predict what we find...??? - d1 = __bdk_dram_read64(p1); - d2 = __bdk_dram_read64(p2); - pattern2 = ~pattern2; // flip for next slot - - xor = ((d1 ^ v) | (d2 ^ v)) & datamask; // union of error bits only in active byte lanes - - int bybit = 1; - uint64_t bymsk = 0xffULL; // start in byte lane 0 - while (xor != 0) { - debug_print("ERROR(%03d): [0x%016lX] [0x%016lX] expected 0x%016lX d1 %016lX d2 %016lX\n", - burst, p1, p2, v, d1, d2); - if (xor & bymsk) { // error(s) in this lane - errors |= bybit; // set the byte error bit - xor &= ~bymsk; // clear byte lane in error bits - datamask &= ~bymsk; // clear the byte lane in the mask -#if EXIT_WHEN_ALL_LANES_HAVE_ERRORS - if (datamask == 0) { // nothing left to do - return errors; // completely done when errors found in all byte lanes in datamask - } -#endif /* EXIT_WHEN_ALL_LANES_HAVE_ERRORS */ - } - bymsk <<= 8; // move mask into next byte lane - bybit <<= 1; // move bit into next byte position - } - } - } - } - pattern1 = ~pattern1; // flip the starting pattern for the next burst - - } /* for (int burst = 0; burst < dram_tune_use_bursts; burst++) */ - return errors; -} -#endif - // cores to use #define DEFAULT_USE_CORES 44 // FIXME: was (1 << CORE_BITS) int dram_tune_use_cores = DEFAULT_USE_CORES; // max cores to use, override available @@ -763,109 +498,6 @@ typedef struct uint64_t byte_mask; } test_dram_byte_info_t; -static void dram_tuning_thread(int arg, void *arg1) -{ - test_dram_byte_info_t *test_info = arg1; - int core = arg; - uint64_t errs; - bdk_node_t node = test_info->node; - int num_lmcs, lmc; -#if 0 - num_lmcs = test_info->num_lmcs; - // map core numbers into hopefully equal groups per LMC - lmc = core % num_lmcs; -#else - // FIXME: this code should allow running all the cores on a single LMC... - // if incoming num_lmcs > 0, then use as normal; if < 0 remap to a single LMC - if (test_info->num_lmcs >= 0) { - num_lmcs = test_info->num_lmcs; - // map core numbers into hopefully equal groups per LMC - lmc = core % num_lmcs; - } else { - num_lmcs = 1; - // incoming num_lmcs is (desired LMC - 10) - lmc = 10 + test_info->num_lmcs; - } -#endif - uint64_t base_address = 0/* was: (lmc << 7); now done by callee */; - uint64_t bytemask = test_info->byte_mask; - - /* Figure out our work memory range. - * - * Note: base_address above just provides the physical offset which determines - * specific LMC portions of the address space and does not have the node bits set. - */ - //was: base_address = bdk_numa_get_address(node, base_address); // map to node // now done by callee - base_address |= (core << CORE_SHIFT); // FIXME: also put full core into address - if (dram_tune_dimm_offset) { // if multi-slot in some way, choose a DIMM for the core - base_address |= (core & (1 << (num_lmcs >> 1))) ? dram_tune_dimm_offset : 0; - } - - debug_print("Node %d, core %d, Testing area 1 at 0x%011lx, area 2 at 0x%011lx\n", - node, core, base_address + AREA_BASE_OFFSET, - base_address + AREA_BASE_OFFSET + dram_tune_rank_offset); - - errs = dram_tuning_mem_xor(node, lmc, base_address, bytemask, NULL); - //errs = dram_tuning_mem_rows(base_address, bytemask); - - /* Report that we're done */ - debug_print("Core %d on LMC %d node %d done with test_dram_byte with 0x%lx errs\n", - core, lmc, node, errs); - - if (errs) { - bdk_atomic_fetch_and_bset64_nosync(&test_dram_byte_threads_errs, errs); - bdk_atomic_fetch_and_bset64_nosync(&test_dram_byte_lmc_errs[lmc], errs); - } - - bdk_atomic_add64_nosync(&test_dram_byte_threads_done, 1); - - return; -} - -static void dram_tuning_thread2(int arg, void *arg1) -{ - test_dram_byte_info_t *test_info = arg1; - int core = arg; - uint64_t errs; - bdk_node_t node = test_info->node; - int num_lmcs = test_info->num_lmcs; - - uint64_t base_address = 0; // - uint64_t bytemask = test_info->byte_mask; - - /* Figure out our work memory range. - * - * Note: base_address above just provides the physical offset which determines - * specific portions of the address space and does not have the node bits set. - */ - base_address = bdk_numa_get_address(node, base_address); // map to node - base_address |= (core << CORE_SHIFT); // FIXME: also put full core into address - if (dram_tune_dimm_offset) { // if multi-slot in some way, choose a DIMM for the core - base_address |= (core & 1) ? dram_tune_dimm_offset : 0; - } - - debug_print("Node %d, core %d, Testing area 1 at 0x%011lx, area 2 at 0x%011lx\n", - node, core, base_address + AREA_BASE_OFFSET, - base_address + AREA_BASE_OFFSET + dram_tune_rank_offset); - - errs = dram_tuning_mem_xor2(base_address, bytemask, (num_lmcs >> 1)); // 4->2, 2->1, 1->0 - //errs = dram_tuning_mem_rows(base_address, bytemask); - - /* Report that we're done */ - debug_print("Core %d on LMC %d node %d done with test_dram_byte with 0x%lx errs\n", - core, lmc, node, errs); - - if (errs) { - bdk_atomic_fetch_and_bset64_nosync(&test_dram_byte_threads_errs, errs); - // FIXME: this will have been done already in the called test routine - //bdk_atomic_fetch_and_bset64_nosync(&test_dram_byte_lmc_errs[lmc], errs); - } - - bdk_atomic_add64_nosync(&test_dram_byte_threads_done, 1); - - return; -} - static int dram_tune_use_xor2 = 1; // FIXME: do NOT default to original mem_xor (LMC-based) code static int @@ -874,7 +506,6 @@ run_dram_tuning_threads(bdk_node_t node, int num_lmcs, uint64_t bytemask) test_dram_byte_info_t test_dram_byte_info; test_dram_byte_info_t *test_info = &test_dram_byte_info; int total_count = 0; - __dram_tuning_thread_t thread_p = (dram_tune_use_xor2) ? dram_tuning_thread2 : dram_tuning_thread; test_info->node = node; test_info->num_lmcs = num_lmcs; @@ -890,20 +521,14 @@ run_dram_tuning_threads(bdk_node_t node, int num_lmcs, uint64_t bytemask) /* Start threads for cores on the node */ if (bdk_numa_exists(node)) { - debug_print("Starting %d threads for test_dram_byte\n", dram_tune_use_cores); - for (int core = 0; core < dram_tune_use_cores; core++) { - if (bdk_thread_create(node, 0, thread_p, core, (void *)test_info, 0)) { - bdk_error("Failed to create thread %d for test_dram_byte\n", core); - } else { - total_count++; - } - } + /* FIXME(dhendrix): We shouldn't hit this. */ + die("bdk_numa_exists() is non-zero\n"); } #if 0 /* Wait for threads to finish */ while (bdk_atomic_get64(&test_dram_byte_threads_done) < total_count) - bdk_thread_yield(); + bdk_thread_yield(); #else #define TIMEOUT_SECS 5 // FIXME: long enough so a pass for a given setting will not print /* Wait for threads to finish, with progress */ @@ -912,7 +537,7 @@ run_dram_tuning_threads(bdk_node_t node, int num_lmcs, uint64_t bytemask) uint64_t period = bdk_clock_get_rate(bdk_numa_local(), BDK_CLOCK_TIME) * TIMEOUT_SECS; // FIXME? uint64_t timeout = bdk_clock_get_count(BDK_CLOCK_TIME) + period; do { - bdk_thread_yield(); +// bdk_thread_yield(); /* FIXME(dhendrix): don't yield... */ cur_count = bdk_atomic_get64(&test_dram_byte_threads_done); cur_time = bdk_clock_get_count(BDK_CLOCK_TIME); if (cur_time >= timeout) { @@ -927,136 +552,10 @@ run_dram_tuning_threads(bdk_node_t node, int num_lmcs, uint64_t bytemask) } /* These variables count the number of ECC errors. They should only be accessed atomically */ -extern int64_t __bdk_dram_ecc_single_bit_errors[]; +/* FIXME(dhendrix): redundant declaration in original BDK sources */ +//extern int64_t __bdk_dram_ecc_single_bit_errors[]; extern int64_t __bdk_dram_ecc_double_bit_errors[]; -#if 0 -// make the tuning test callable as a standalone -int -bdk_run_dram_tuning_test(int node) -{ - int num_lmcs = __bdk_dram_get_num_lmc(node); - const char *s; - int lmc, byte; - int errors; - uint64_t start_dram_dclk[4], start_dram_ops[4]; - int save_use_bursts; - - // check for the cores on this node, abort if not more than 1 // FIXME? - dram_tune_max_cores = bdk_get_num_running_cores(node); - if (dram_tune_max_cores < 2) { - //bdk_init_cores(node, 0); - printf("N%d: ERROR: not enough cores to run the DRAM tuning test.\n", node); - return 0; - } - - // but use only a certain number of cores, at most what is available - if ((s = getenv("ddr_tune_use_cores")) != NULL) { - dram_tune_use_cores = strtoul(s, NULL, 0); - if (dram_tune_use_cores <= 0) // allow 0 or negative to mean all - dram_tune_use_cores = dram_tune_max_cores; - } - if (dram_tune_use_cores > dram_tune_max_cores) - dram_tune_use_cores = dram_tune_max_cores; - - // save the original bursts, so we can replace it with a better number for just testing - save_use_bursts = dram_tune_use_bursts; - dram_tune_use_bursts = 1500; // FIXME: hard code bursts for the test here... - - // allow override of the test repeats (bursts) per thread create - if ((s = getenv("ddr_tune_use_bursts")) != NULL) { - dram_tune_use_bursts = strtoul(s, NULL, 10); - } - - // allow override of the test mem_xor algorithm - if ((s = getenv("ddr_tune_use_xor2")) != NULL) { - dram_tune_use_xor2 = !!strtoul(s, NULL, 10); - } - - // FIXME? consult LMC0 only - BDK_CSR_INIT(lmcx_config, node, BDK_LMCX_CONFIG(0)); - if (lmcx_config.s.rank_ena) { // replace the default offset when there is more than 1 rank... - dram_tune_rank_offset = 1ull << (28 + lmcx_config.s.pbank_lsb - lmcx_config.s.rank_ena + (num_lmcs/2)); - ddr_print("N%d: run_dram_tuning_test: changing rank offset to 0x%lx\n", node, dram_tune_rank_offset); - } - if (lmcx_config.s.init_status & 0x0c) { // bit 2 or 3 set indicates 2 DIMMs - dram_tune_dimm_offset = 1ull << (28 + lmcx_config.s.pbank_lsb + (num_lmcs/2)); - ddr_print("N%d: run_dram_tuning_test: changing dimm offset to 0x%lx\n", node, dram_tune_dimm_offset); - } - int ddr_interface_64b = !lmcx_config.s.mode32b; - - // construct the bytemask - int bytes_todo = (ddr_interface_64b) ? 0xff : 0x0f; // FIXME: hack? - uint64_t bytemask = 0; - for (byte = 0; byte < 8; ++byte) { - uint64_t bitmask; - if (bytes_todo & (1 << byte)) { - bitmask = ((!ddr_interface_64b) && (byte == 4)) ? 0x0f: 0xff; - bytemask |= bitmask << (8*byte); // set the bytes bits in the bytemask - } - } /* for (byte = 0; byte < 8; ++byte) */ - - // print current working values - ddr_print("N%d: run_dram_tuning_test: max %d cores, use %d cores, use %d bursts.\n", - node, dram_tune_max_cores, dram_tune_use_cores, dram_tune_use_bursts); - - // do the setup on active LMCs - for (lmc = 0; lmc < num_lmcs; lmc++) { - // record start cycle CSRs here for utilization measure - start_dram_dclk[lmc] = BDK_CSR_READ(node, BDK_LMCX_DCLK_CNT(lmc)); - start_dram_ops[lmc] = BDK_CSR_READ(node, BDK_LMCX_OPS_CNT(lmc)); -#if 0 - bdk_atomic_set64(&__bdk_dram_ecc_single_bit_errors[lmc], 0); - bdk_atomic_set64(&__bdk_dram_ecc_double_bit_errors[lmc], 0); -#else - __bdk_dram_ecc_single_bit_errors[lmc] = 0; - __bdk_dram_ecc_double_bit_errors[lmc] = 0; -#endif - } /* for (lmc = 0; lmc < num_lmcs; lmc++) */ - - bdk_watchdog_poke(); - - // run the test(s) - // only 1 call should be enough, let the bursts, etc, control the load... - errors = run_dram_tuning_threads(node, num_lmcs, bytemask); - - /* Check ECC error counters after the test */ - int64_t ecc_single = 0; - int64_t ecc_double = 0; - int64_t ecc_single_errs[4]; - int64_t ecc_double_errs[4]; - - // finally, print the utilizations all together, and sum the ECC errors - for (lmc = 0; lmc < num_lmcs; lmc++) { - uint64_t dclk_diff = BDK_CSR_READ(node, BDK_LMCX_DCLK_CNT(lmc)) - start_dram_dclk[lmc]; - uint64_t ops_diff = BDK_CSR_READ(node, BDK_LMCX_OPS_CNT(lmc)) - start_dram_ops[lmc]; - uint64_t percent_x10 = ops_diff * 1000 / dclk_diff; - printf("N%d.LMC%d: ops %lu, cycles %lu, used %lu.%lu%%\n", - node, lmc, ops_diff, dclk_diff, percent_x10 / 10, percent_x10 % 10); - - ecc_single += (ecc_single_errs[lmc] = bdk_atomic_get64(&__bdk_dram_ecc_single_bit_errors[lmc])); - ecc_double += (ecc_double_errs[lmc] = bdk_atomic_get64(&__bdk_dram_ecc_double_bit_errors[lmc])); - } /* for (lmc = 0; lmc < num_lmcs; lmc++) */ - - /* Always print any ECC errors */ - if (ecc_single || ecc_double) { - printf("Test \"%s\": ECC errors, %ld/%ld/%ld/%ld corrected, %ld/%ld/%ld/%ld uncorrected\n", - "DRAM Tuning Test", - ecc_single_errs[0], ecc_single_errs[1], ecc_single_errs[2], ecc_single_errs[3], - ecc_double_errs[0], ecc_double_errs[1], ecc_double_errs[2], ecc_double_errs[3]); - } - if (errors || ecc_double || ecc_single) { - printf("Test \"%s\": FAIL: %ld single, %ld double, %d compare errors\n", - "DRAM Tuning Test", ecc_single, ecc_double, errors); - } - - // restore bursts - dram_tune_use_bursts = save_use_bursts; - - return (errors + ecc_double + ecc_single); -} -#endif /* 0 */ - #define DEFAULT_SAMPLE_GRAN 3 // sample for errors every N offset values #define MIN_BYTE_OFFSET -63 #define MAX_BYTE_OFFSET +63 @@ -1064,8 +563,8 @@ int dram_tune_use_gran = DEFAULT_SAMPLE_GRAN; static int auto_set_dll_offset(bdk_node_t node, int dll_offset_mode, - int num_lmcs, int ddr_interface_64b, - int do_tune) + int num_lmcs, int ddr_interface_64b, + int do_tune) { int byte_offset; //unsigned short result[9]; @@ -1081,17 +580,17 @@ auto_set_dll_offset(bdk_node_t node, int dll_offset_mode, uint64_t start_dram_ops[4], stop_dram_ops[4]; int errors, tot_errors; int lmc; - char *mode_str = (dll_offset_mode == 2) ? "Read" : "Write"; + const char *mode_str = (dll_offset_mode == 2) ? "Read" : "Write"; /* FIXME(dhendrix): const */ int mode_is_read = (dll_offset_mode == 2); - char *mode_blk = (dll_offset_mode == 2) ? " " : ""; + const char *mode_blk = (dll_offset_mode == 2) ? " " : ""; /* FIXME(dhendrix): const */ int start_offset, end_offset, incr_offset; int speed_bin = get_speed_bin(node, 0); // FIXME: just get from LMC0? int low_risk_count = 0, needs_review_count = 0; if (dram_tune_use_gran != DEFAULT_SAMPLE_GRAN) { - ddr_print2("N%d: Changing sample granularity from %d to %d\n", - node, DEFAULT_SAMPLE_GRAN, dram_tune_use_gran); + ddr_print2("N%d: Changing sample granularity from %d to %d\n", + node, DEFAULT_SAMPLE_GRAN, dram_tune_use_gran); } // ensure sample is taken at 0 start_offset = MIN_BYTE_OFFSET - (MIN_BYTE_OFFSET % dram_tune_use_gran); @@ -1109,12 +608,14 @@ auto_set_dll_offset(bdk_node_t node, int dll_offset_mode, // FIXME? consult LMC0 only BDK_CSR_INIT(lmcx_config, node, BDK_LMCX_CONFIG(0)); if (lmcx_config.s.rank_ena) { // replace the default offset when there is more than 1 rank... - dram_tune_rank_offset = 1ull << (28 + lmcx_config.s.pbank_lsb - lmcx_config.s.rank_ena + (num_lmcs/2)); - ddr_print2("N%d: Tuning multiple ranks per DIMM (rank offset 0x%lx).\n", node, dram_tune_rank_offset); + dram_tune_rank_offset = 1ull << (28 + lmcx_config.s.pbank_lsb - lmcx_config.s.rank_ena + (num_lmcs/2)); + /* FIXME(dhendrix): %lx --> %llx */ + ddr_print2("N%d: Tuning multiple ranks per DIMM (rank offset 0x%llx).\n", node, dram_tune_rank_offset); } if (lmcx_config.s.init_status & 0x0c) { // bit 2 or 3 set indicates 2 DIMMs - dram_tune_dimm_offset = 1ull << (28 + lmcx_config.s.pbank_lsb + (num_lmcs/2)); - ddr_print2("N%d: Tuning multiple DIMMs per channel (DIMM offset 0x%lx)\n", node, dram_tune_dimm_offset); + dram_tune_dimm_offset = 1ull << (28 + lmcx_config.s.pbank_lsb + (num_lmcs/2)); + /* FIXME(dhendrix): %lx --> %llx */ + ddr_print2("N%d: Tuning multiple DIMMs per channel (DIMM offset 0x%llx)\n", node, dram_tune_dimm_offset); } // FIXME? do this for LMC0 only @@ -1125,76 +626,76 @@ auto_set_dll_offset(bdk_node_t node, int dll_offset_mode, int bytes_todo = (ddr_interface_64b) ? 0xff : 0x0f; uint64_t bytemask = 0; for (byte = 0; byte < 8; ++byte) { - if (bytes_todo & (1 << byte)) { - bytemask |= 0xfful << (8*byte); // set the bytes bits in the bytemask - } + if (bytes_todo & (1 << byte)) { + bytemask |= 0xfful << (8*byte); // set the bytes bits in the bytemask + } } /* for (byte = 0; byte < 8; ++byte) */ // now loop through selected legal values for the DLL byte offset... for (byte_offset = start_offset; byte_offset <= end_offset; byte_offset += incr_offset) { - // do the setup on active LMCs - for (lmc = 0; lmc < num_lmcs; lmc++) { - change_dll_offset_enable(node, lmc, 0); - - // set all byte lanes at once - load_dll_offset(node, lmc, dll_offset_mode, byte_offset, 10 /* All bytes at once */); - // but then clear the ECC byte lane so it should be neutral for the test... - load_dll_offset(node, lmc, dll_offset_mode, 0, 8); - - change_dll_offset_enable(node, lmc, 1); - - // record start cycle CSRs here for utilization measure - start_dram_dclk[lmc] = BDK_CSR_READ(node, BDK_LMCX_DCLK_CNT(lmc)); - start_dram_ops[lmc] = BDK_CSR_READ(node, BDK_LMCX_OPS_CNT(lmc)); - } /* for (lmc = 0; lmc < num_lmcs; lmc++) */ - - bdk_watchdog_poke(); - - // run the test(s) - // only 1 call should be enough, let the bursts, etc, control the load... - tot_errors = run_dram_tuning_threads(node, num_lmcs, bytemask); - - for (lmc = 0; lmc < num_lmcs; lmc++) { - // record stop cycle CSRs here for utilization measure - stop_dram_dclk[lmc] = BDK_CSR_READ(node, BDK_LMCX_DCLK_CNT(lmc)); - stop_dram_ops[lmc] = BDK_CSR_READ(node, BDK_LMCX_OPS_CNT(lmc)); - - // accumulate... - ops_sum[lmc] += stop_dram_ops[lmc] - start_dram_ops[lmc]; - dclk_sum[lmc] += stop_dram_dclk[lmc] - start_dram_dclk[lmc]; - - errors = test_dram_byte_lmc_errs[lmc]; - - // check errors by byte, but not ECC - for (byte = 0; byte < 8; ++byte) { - if (!(bytes_todo & (1 << byte))) // is this byte lane to be done - continue; // no - - byte_delay_windows[lmc][byte] <<= 1; // always put in a zero - if (errors & (1 << byte)) { // yes, an error in this byte lane - byte_delay_count[lmc][byte] = 0; // stop now always - } else { // no error in this byte lane - if (byte_delay_count[lmc][byte] == 0) { // first success, set run start - byte_delay_start[lmc][byte] = byte_offset; - } - byte_delay_count[lmc][byte] += incr_offset; // bump run length - - if (byte_delay_count[lmc][byte] > byte_delay_best_count[lmc][byte]) { - byte_delay_best_count[lmc][byte] = byte_delay_count[lmc][byte]; - byte_delay_best_start[lmc][byte] = byte_delay_start[lmc][byte]; - } - byte_delay_windows[lmc][byte] |= 1ULL; // for pass, put in a 1 - } - } /* for (byte = 0; byte < 8; ++byte) */ - - // only print when there are errors and verbose... - if (errors) { - debug_print("DLL %s Offset Test %3d: errors 0x%x\n", - mode_str, byte_offset, errors); - } - } /* for (lmc = 0; lmc < num_lmcs; lmc++) */ + // do the setup on active LMCs + for (lmc = 0; lmc < num_lmcs; lmc++) { + change_dll_offset_enable(node, lmc, 0); + + // set all byte lanes at once + load_dll_offset(node, lmc, dll_offset_mode, byte_offset, 10 /* All bytes at once */); + // but then clear the ECC byte lane so it should be neutral for the test... + load_dll_offset(node, lmc, dll_offset_mode, 0, 8); + + change_dll_offset_enable(node, lmc, 1); + + // record start cycle CSRs here for utilization measure + start_dram_dclk[lmc] = BDK_CSR_READ(node, BDK_LMCX_DCLK_CNT(lmc)); + start_dram_ops[lmc] = BDK_CSR_READ(node, BDK_LMCX_OPS_CNT(lmc)); + } /* for (lmc = 0; lmc < num_lmcs; lmc++) */ + + bdk_watchdog_poke(); + + // run the test(s) + // only 1 call should be enough, let the bursts, etc, control the load... + tot_errors = run_dram_tuning_threads(node, num_lmcs, bytemask); + + for (lmc = 0; lmc < num_lmcs; lmc++) { + // record stop cycle CSRs here for utilization measure + stop_dram_dclk[lmc] = BDK_CSR_READ(node, BDK_LMCX_DCLK_CNT(lmc)); + stop_dram_ops[lmc] = BDK_CSR_READ(node, BDK_LMCX_OPS_CNT(lmc)); + + // accumulate... + ops_sum[lmc] += stop_dram_ops[lmc] - start_dram_ops[lmc]; + dclk_sum[lmc] += stop_dram_dclk[lmc] - start_dram_dclk[lmc]; + + errors = test_dram_byte_lmc_errs[lmc]; + + // check errors by byte, but not ECC + for (byte = 0; byte < 8; ++byte) { + if (!(bytes_todo & (1 << byte))) // is this byte lane to be done + continue; // no + + byte_delay_windows[lmc][byte] <<= 1; // always put in a zero + if (errors & (1 << byte)) { // yes, an error in this byte lane + byte_delay_count[lmc][byte] = 0; // stop now always + } else { // no error in this byte lane + if (byte_delay_count[lmc][byte] == 0) { // first success, set run start + byte_delay_start[lmc][byte] = byte_offset; + } + byte_delay_count[lmc][byte] += incr_offset; // bump run length + + if (byte_delay_count[lmc][byte] > byte_delay_best_count[lmc][byte]) { + byte_delay_best_count[lmc][byte] = byte_delay_count[lmc][byte]; + byte_delay_best_start[lmc][byte] = byte_delay_start[lmc][byte]; + } + byte_delay_windows[lmc][byte] |= 1ULL; // for pass, put in a 1 + } + } /* for (byte = 0; byte < 8; ++byte) */ + + // only print when there are errors and verbose... + if (errors) { + debug_print("DLL %s Offset Test %3d: errors 0x%x\n", + mode_str, byte_offset, errors); + } + } /* for (lmc = 0; lmc < num_lmcs; lmc++) */ } /* for (byte_offset=-63; byte_offset<63; byte_offset += incr_offset) */ @@ -1202,153 +703,154 @@ auto_set_dll_offset(bdk_node_t node, int dll_offset_mode, // only when margining... if (!do_tune) { - printf(" \n"); - printf("-------------------------------------\n"); + printf(" \n"); + printf("-------------------------------------\n"); #if 0 - uint32_t mts_speed = (libdram_get_freq_from_pll(node, 0) * 2) / 1000000; // FIXME: sample LMC0 - printf("N%d: Starting %s Timing Margining for %d MT/s.\n", node, mode_str, mts_speed); + uint32_t mts_speed = (libdram_get_freq_from_pll(node, 0) * 2) / 1000000; // FIXME: sample LMC0 + printf("N%d: Starting %s Timing Margining for %d MT/s.\n", node, mode_str, mts_speed); #else - printf("N%d: Starting %s Timing Margining.\n", node, mode_str); + printf("N%d: Starting %s Timing Margining.\n", node, mode_str); #endif - printf(" \n"); + printf(" \n"); } /* if (!do_tune) */ for (lmc = 0; lmc < num_lmcs; lmc++) { #if 1 - // FIXME FIXME - // FIXME: this just makes ECC always show 0 - byte_delay_best_start[lmc][8] = start_offset; - byte_delay_best_count[lmc][8] = end_offset - start_offset + incr_offset; + // FIXME FIXME + // FIXME: this just makes ECC always show 0 + byte_delay_best_start[lmc][8] = start_offset; + byte_delay_best_count[lmc][8] = end_offset - start_offset + incr_offset; #endif - // disable offsets while we load... - change_dll_offset_enable(node, lmc, 0); - - // only when margining... - if (!do_tune) { - // print the heading - printf(" \n"); - printf("N%d.LMC%d: %s Timing Margin %s : ", node, lmc, mode_str, mode_blk); - printf(" ECC/8 "); - for (byte = 7; byte >= 0; byte--) { - printf(" Byte %d ", byte); - } - printf("\n"); - } /* if (!do_tune) */ - - // print and load the offset values - // print the windows bit arrays - // only when margining... - if (!do_tune) { + // disable offsets while we load... + change_dll_offset_enable(node, lmc, 0); + + // only when margining... + if (!do_tune) { + // print the heading + printf(" \n"); + printf("N%d.LMC%d: %s Timing Margin %s : ", node, lmc, mode_str, mode_blk); + printf(" ECC/8 "); + for (byte = 7; byte >= 0; byte--) { + printf(" Byte %d ", byte); + } + printf("\n"); + } /* if (!do_tune) */ + + // print and load the offset values + // print the windows bit arrays + // only when margining... + if (!do_tune) { printf("N%d.LMC%d: DLL %s Offset Amount %s : ", node, lmc, mode_str, mode_blk); } else { ddr_print("N%d.LMC%d: SW DLL %s Offset Amount %s : ", node, lmc, mode_str, mode_blk); } - for (byte = 8; byte >= 0; --byte) { // print in "normal" reverse index order + for (byte = 8; byte >= 0; --byte) { // print in "normal" reverse index order - int count = byte_delay_best_count[lmc][byte]; - if (count == 0) - count = incr_offset; // should make non-tested ECC byte come out 0 - - byte_offset = byte_delay_best_start[lmc][byte] + - ((count - incr_offset) / 2); // adj by incr + int count = byte_delay_best_count[lmc][byte]; + if (count == 0) + count = incr_offset; // should make non-tested ECC byte come out 0 - if (!do_tune) { // do counting and special flag if margining + byte_offset = byte_delay_best_start[lmc][byte] + + ((count - incr_offset) / 2); // adj by incr + + if (!do_tune) { // do counting and special flag if margining int will_need_review = !is_low_risk_winlen(speed_bin, (count - incr_offset)) && - !is_low_risk_offset(speed_bin, byte_offset); + !is_low_risk_offset(speed_bin, byte_offset); printf("%10d%c", byte_offset, (will_need_review) ? '<' :' '); - if (will_need_review) - needs_review_count++; - else - low_risk_count++; - } else { // if just tuning, make the printout less lengthy + if (will_need_review) + needs_review_count++; + else + low_risk_count++; + } else { // if just tuning, make the printout less lengthy ddr_print("%5d ", byte_offset); } - // FIXME? should we be able to override this? - if (mode_is_read) // for READ offsets, always store what we found - load_dll_offset(node, lmc, dll_offset_mode, byte_offset, byte); - else // for WRITE offsets, always store 0 - load_dll_offset(node, lmc, dll_offset_mode, 0, byte); + // FIXME? should we be able to override this? + if (mode_is_read) // for READ offsets, always store what we found + load_dll_offset(node, lmc, dll_offset_mode, byte_offset, byte); + else // for WRITE offsets, always store 0 + load_dll_offset(node, lmc, dll_offset_mode, 0, byte); - } - if (!do_tune) { + } + if (!do_tune) { printf("\n"); } else { ddr_print("\n"); } - // re-enable the offsets now that we are done loading - change_dll_offset_enable(node, lmc, 1); - - // only when margining... - if (!do_tune) { - // print the window sizes - printf("N%d.LMC%d: DLL %s Window Length %s : ", node, lmc, mode_str, mode_blk); - for (byte = 8; byte >= 0; --byte) { // print in "normal" reverse index order - int count = byte_delay_best_count[lmc][byte]; - if (count == 0) - count = incr_offset; // should make non-tested ECC byte come out 0 - - // do this again since the "needs review" test is an AND... - byte_offset = byte_delay_best_start[lmc][byte] + - ((count - incr_offset) / 2); // adj by incr - - int will_need_review = !is_low_risk_winlen(speed_bin, (count - incr_offset)) && - !is_low_risk_offset(speed_bin, byte_offset); - - printf("%10d%c", count - incr_offset, (will_need_review) ? '<' :' '); - } - printf("\n"); - - // print the window extents - printf("N%d.LMC%d: DLL %s Window Bounds %s : ", node, lmc, mode_str, mode_blk); - for (byte = 8; byte >= 0; --byte) { // print in "normal" reverse index order - int start = byte_delay_best_start[lmc][byte]; - int count = byte_delay_best_count[lmc][byte]; - if (count == 0) - count = incr_offset; // should make non-tested ECC byte come out 0 - printf(" %3d to%3d ", start, - start + count - incr_offset); - } - printf("\n"); + // re-enable the offsets now that we are done loading + change_dll_offset_enable(node, lmc, 1); + + // only when margining... + if (!do_tune) { + // print the window sizes + printf("N%d.LMC%d: DLL %s Window Length %s : ", node, lmc, mode_str, mode_blk); + for (byte = 8; byte >= 0; --byte) { // print in "normal" reverse index order + int count = byte_delay_best_count[lmc][byte]; + if (count == 0) + count = incr_offset; // should make non-tested ECC byte come out 0 + + // do this again since the "needs review" test is an AND... + byte_offset = byte_delay_best_start[lmc][byte] + + ((count - incr_offset) / 2); // adj by incr + + int will_need_review = !is_low_risk_winlen(speed_bin, (count - incr_offset)) && + !is_low_risk_offset(speed_bin, byte_offset); + + printf("%10d%c", count - incr_offset, (will_need_review) ? '<' :' '); + } + printf("\n"); + + // print the window extents + printf("N%d.LMC%d: DLL %s Window Bounds %s : ", node, lmc, mode_str, mode_blk); + for (byte = 8; byte >= 0; --byte) { // print in "normal" reverse index order + int start = byte_delay_best_start[lmc][byte]; + int count = byte_delay_best_count[lmc][byte]; + if (count == 0) + count = incr_offset; // should make non-tested ECC byte come out 0 + printf(" %3d to%3d ", start, + start + count - incr_offset); + } + printf("\n"); #if 0 - // FIXME: should have a way to force these out... - // print the windows bit arrays - printf("N%d.LMC%d: DLL %s Window Bitmap%s : ", node, lmc, mode_str, mode_blk); - for (byte = 8; byte >= 0; --byte) { // print in "normal" reverse index order - printf("%010lx ", byte_delay_windows[lmc][byte]); - } - printf("\n"); + // FIXME: should have a way to force these out... + // print the windows bit arrays + printf("N%d.LMC%d: DLL %s Window Bitmap%s : ", node, lmc, mode_str, mode_blk); + for (byte = 8; byte >= 0; --byte) { // print in "normal" reverse index order + printf("%010lx ", byte_delay_windows[lmc][byte]); + } + printf("\n"); #endif - } /* if (!do_tune) */ + } /* if (!do_tune) */ } /* for (lmc = 0; lmc < num_lmcs; lmc++) */ // only when margining... if (!do_tune) { - // print the Summary line(s) here - printf(" \n"); - printf("N%d: %s Timing Margining Summary : %s ", node, mode_str, - (needs_review_count > 0) ? "Needs Review" : "Low Risk"); - if (needs_review_count > 0) - printf("(%d)", needs_review_count); - printf("\n"); - - // FIXME??? want to print here: "N0: %s Offsets have been applied already" - - printf("-------------------------------------\n"); - printf(" \n"); + // print the Summary line(s) here + printf(" \n"); + printf("N%d: %s Timing Margining Summary : %s ", node, mode_str, + (needs_review_count > 0) ? "Needs Review" : "Low Risk"); + if (needs_review_count > 0) + printf("(%d)", needs_review_count); + printf("\n"); + + // FIXME??? want to print here: "N0: %s Offsets have been applied already" + + printf("-------------------------------------\n"); + printf(" \n"); } /* if (!do_tune) */ // FIXME: we probably want this only when doing verbose... // finally, print the utilizations all together for (lmc = 0; lmc < num_lmcs; lmc++) { - uint64_t percent_x10 = ops_sum[lmc] * 1000 / dclk_sum[lmc]; - ddr_print2("N%d.LMC%d: ops %lu, cycles %lu, used %lu.%lu%%\n", - node, lmc, ops_sum[lmc], dclk_sum[lmc], percent_x10 / 10, percent_x10 % 10); + uint64_t percent_x10 = ops_sum[lmc] * 1000 / dclk_sum[lmc]; + /* FIXME(dhendrix): %lu --> %llu */ + ddr_print2("N%d.LMC%d: ops %llu, cycles %llu, used %llu.%llu%%\n", + node, lmc, ops_sum[lmc], dclk_sum[lmc], percent_x10 / 10, percent_x10 % 10); } /* for (lmc = 0; lmc < num_lmcs; lmc++) */ // FIXME: only when verbose, or only when there are errors? @@ -1359,7 +861,7 @@ auto_set_dll_offset(bdk_node_t node, int dll_offset_mode, tot_errors = run_dram_tuning_threads(node, num_lmcs, bytemask); debug_print("N%d: %s: Finished running test one last time\n", node, __FUNCTION__); if (tot_errors) - ddr_print2("%s Timing Final Test: errors 0x%x\n", mode_str, tot_errors); + ddr_print2("%s Timing Final Test: errors 0x%x\n", mode_str, tot_errors); return (do_tune) ? tot_errors : !!(needs_review_count > 0); } @@ -1389,28 +891,30 @@ int perform_dll_offset_tuning(bdk_node_t node, int dll_offset_mode, int do_tune) // enable any non-running cores on this node orig_coremask = bdk_get_running_coremask(node); - ddr_print4("N%d: %s: Starting cores (mask was 0x%lx)\n", - node, __FUNCTION__, orig_coremask); - bdk_init_cores(node, ~0ULL & ~orig_coremask); + /* FIXME(dhendrix): %lx --> %llx */ + ddr_print4("N%d: %s: Starting cores (mask was 0x%llx)\n", + node, __FUNCTION__, orig_coremask); + /* FIXME(dhendrix): don't call bdk_init_cores(). */ +// bdk_init_cores(node, ~0ULL & ~orig_coremask); dram_tune_max_cores = bdk_get_num_running_cores(node); // but use only a certain number of cores, at most what is available if ((s = getenv("ddr_tune_use_cores")) != NULL) { - dram_tune_use_cores = strtoul(s, NULL, 0); - if (dram_tune_use_cores <= 0) // allow 0 or negative to mean all - dram_tune_use_cores = dram_tune_max_cores; + dram_tune_use_cores = strtoul(s, NULL, 0); + if (dram_tune_use_cores <= 0) // allow 0 or negative to mean all + dram_tune_use_cores = dram_tune_max_cores; } if (dram_tune_use_cores > dram_tune_max_cores) - dram_tune_use_cores = dram_tune_max_cores; + dram_tune_use_cores = dram_tune_max_cores; // see if we want to do the tuning more than once per LMC... if ((s = getenv("ddr_tune_use_loops"))) { - loops = strtoul(s, NULL, 0); + loops = strtoul(s, NULL, 0); } // see if we want to change the granularity of the byte_offset sampling if ((s = getenv("ddr_tune_use_gran"))) { - dram_tune_use_gran = strtoul(s, NULL, 0); + dram_tune_use_gran = strtoul(s, NULL, 0); } // allow override of the test repeats (bursts) per thread create @@ -1422,9 +926,9 @@ int perform_dll_offset_tuning(bdk_node_t node, int dll_offset_mode, int do_tune) // allow override of Read ODT setting just during the tuning run(s) if ((s = getenv("ddr_tune_use_rodt")) != NULL) { int temp = strtoul(s, NULL, 10); - // validity check - if (temp >= 0 && temp <= 7) - dram_tune_use_rodt = temp; + // validity check + if (temp >= 0 && temp <= 7) + dram_tune_use_rodt = temp; } #endif @@ -1432,13 +936,13 @@ int perform_dll_offset_tuning(bdk_node_t node, int dll_offset_mode, int do_tune) // allow override of the test pattern // FIXME: a bit simplistic... if ((s = getenv("ddr_tune_use_pattern")) != NULL) { - int patno = strtoul(s, NULL, 10); - if (patno == 2) - dram_tune_test_pattern = test_pattern_2; - else if (patno == 3) - dram_tune_test_pattern = test_pattern_3; - else // all other values use default - dram_tune_test_pattern = test_pattern_1; + int patno = strtoul(s, NULL, 10); + if (patno == 2) + dram_tune_test_pattern = test_pattern_2; + else if (patno == 3) + dram_tune_test_pattern = test_pattern_3; + else // all other values use default + dram_tune_test_pattern = test_pattern_1; } #endif @@ -1449,24 +953,24 @@ int perform_dll_offset_tuning(bdk_node_t node, int dll_offset_mode, int do_tune) // print current working values ddr_print2("N%d: Tuning will use %d cores of max %d cores, and use %d repeats.\n", - node, dram_tune_use_cores, dram_tune_max_cores, - dram_tune_use_bursts); + node, dram_tune_use_cores, dram_tune_max_cores, + dram_tune_use_bursts); #if USE_L2_WAYS_LIMIT // see if L2 ways are limited if ((s = lookup_env_parameter("limit_l2_ways")) != NULL) { - ways = strtoul(s, NULL, 10); - ways_print = 1; + ways = strtoul(s, NULL, 10); + ways_print = 1; } else { - ways = bdk_l2c_get_num_assoc(node); + ways = bdk_l2c_get_num_assoc(node); } #endif #if 0 // if RODT is to be overridden during tuning, note change if (dram_tune_use_rodt >= 0) { - ddr_print("N%d: using RODT %d for tuning.\n", - node, dram_tune_use_rodt); + ddr_print("N%d: using RODT %d for tuning.\n", + node, dram_tune_use_rodt); } #endif @@ -1479,21 +983,21 @@ int perform_dll_offset_tuning(bdk_node_t node, int dll_offset_mode, int do_tune) for (lmc = 0; lmc < num_lmcs; lmc++) { #if 0 - // if RODT change, save old and set new here... - if (dram_tune_use_rodt >= 0) { - comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(lmc)); - save_rodt[lmc] = comp_ctl2.s.rodt_ctl; - comp_ctl2.s.rodt_ctl = dram_tune_use_rodt; - DRAM_CSR_WRITE(node, BDK_LMCX_COMP_CTL2(lmc), comp_ctl2.u); - BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(lmc)); - } + // if RODT change, save old and set new here... + if (dram_tune_use_rodt >= 0) { + comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(lmc)); + save_rodt[lmc] = comp_ctl2.s.rodt_ctl; + comp_ctl2.s.rodt_ctl = dram_tune_use_rodt; + DRAM_CSR_WRITE(node, BDK_LMCX_COMP_CTL2(lmc), comp_ctl2.u); + BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(lmc)); + } #endif - /* Disable ECC for DRAM tests */ - lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(lmc)); - save_ecc_ena[lmc] = lmc_config.s.ecc_ena; - lmc_config.s.ecc_ena = 0; - DRAM_CSR_WRITE(node, BDK_LMCX_CONFIG(lmc), lmc_config.u); - lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(lmc)); + /* Disable ECC for DRAM tests */ + lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(lmc)); + save_ecc_ena[lmc] = lmc_config.s.ecc_ena; + lmc_config.s.ecc_ena = 0; + DRAM_CSR_WRITE(node, BDK_LMCX_CONFIG(lmc), lmc_config.u); + lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(lmc)); } /* for (lmc = 0; lmc < num_lmcs; lmc++) */ @@ -1505,8 +1009,8 @@ int perform_dll_offset_tuning(bdk_node_t node, int dll_offset_mode, int do_tune) // testing is done on all LMCs simultaneously // FIXME: for now, loop here to show what happens multiple times for (loop = 0; loop < loops; loop++) { - /* Perform DLL offset tuning */ - errs = auto_set_dll_offset(node, dll_offset_mode, num_lmcs, ddr_interface_64b, do_tune); + /* Perform DLL offset tuning */ + errs = auto_set_dll_offset(node, dll_offset_mode, num_lmcs, ddr_interface_64b, do_tune); } #if USE_L2_WAYS_LIMIT @@ -1518,21 +1022,21 @@ int perform_dll_offset_tuning(bdk_node_t node, int dll_offset_mode, int do_tune) debug_print("N%d: %s: starting LMCs cleanup.\n", node, __FUNCTION__); for (lmc = 0; lmc < num_lmcs; lmc++) { - /* Restore ECC for DRAM tests */ - lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(lmc)); - lmc_config.s.ecc_ena = save_ecc_ena[lmc]; - DRAM_CSR_WRITE(node, BDK_LMCX_CONFIG(lmc), lmc_config.u); - lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(lmc)); + /* Restore ECC for DRAM tests */ + lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(lmc)); + lmc_config.s.ecc_ena = save_ecc_ena[lmc]; + DRAM_CSR_WRITE(node, BDK_LMCX_CONFIG(lmc), lmc_config.u); + lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(lmc)); #if 0 - // if RODT change, restore old here... - if (dram_tune_use_rodt >= 0) { - comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(lmc)); - comp_ctl2.s.rodt_ctl = save_rodt[lmc]; - DRAM_CSR_WRITE(node, BDK_LMCX_COMP_CTL2(lmc), comp_ctl2.u); - BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(lmc)); - } + // if RODT change, restore old here... + if (dram_tune_use_rodt >= 0) { + comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(lmc)); + comp_ctl2.s.rodt_ctl = save_rodt[lmc]; + DRAM_CSR_WRITE(node, BDK_LMCX_COMP_CTL2(lmc), comp_ctl2.u); + BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(lmc)); + } #endif - // finally, see if there are any read offset overrides after tuning + // finally, see if there are any read offset overrides after tuning // FIXME: provide a way to do write offsets also?? if (dll_offset_mode == 2) { for (int by = 0; by < 9; by++) { @@ -1551,20 +1055,24 @@ int perform_dll_offset_tuning(bdk_node_t node, int dll_offset_mode, int do_tune) #if 0 // if RODT was overridden during tuning, note restore if (dram_tune_use_rodt >= 0) { - ddr_print("N%d: restoring RODT %d after tuning.\n", - node, save_rodt[0]); // FIXME? use LMC0 + ddr_print("N%d: restoring RODT %d after tuning.\n", + node, save_rodt[0]); // FIXME? use LMC0 } #endif // put any cores on this node, that were not running at the start, back into reset - uint64_t reset_coremask = bdk_get_running_coremask(node) & ~orig_coremask; + /* FIXME(dhendrix): don't reset cores... */ +// uint64_t reset_coremask = bdk_get_running_coremask(node) & ~orig_coremask; + uint64_t reset_coremask = 0; if (reset_coremask) { - ddr_print4("N%d: %s: Stopping cores 0x%lx\n", node, __FUNCTION__, - reset_coremask); - bdk_reset_cores(node, reset_coremask); + /* FIXME(dhendrix): %lx --> %llx */ + ddr_print4("N%d: %s: Stopping cores 0x%llx\n", node, __FUNCTION__, + reset_coremask); + bdk_reset_cores(node, reset_coremask); } else { - ddr_print4("N%d: %s: leaving cores set to 0x%lx\n", node, __FUNCTION__, - orig_coremask); + /* FIXME(dhendrix): %lx --> %llx */ + ddr_print4("N%d: %s: leaving cores set to 0x%llx\n", node, __FUNCTION__, + orig_coremask); } return errs; @@ -1656,7 +1164,8 @@ setup_lfsr_pattern(bdk_node_t node, int lmc, uint64_t data) DRAM_CSR_WRITE(node, BDK_LMCX_CHAR_CTL(lmc), char_ctl.u); } -int +/* FIXME(dhendrix): made static to avoid need for prototype */ +static int choose_best_hw_patterns(bdk_node_t node, int lmc, int mode) { int new_mode = mode; @@ -1705,7 +1214,7 @@ run_best_hw_patterns(bdk_node_t node, int lmc, uint64_t phys_addr, if (mode == DBTRAIN_LFSR) { setup_lfsr_pattern(node, lmc, 0); errors = test_dram_byte_hw(node, lmc, phys_addr, mode, xor_data); - VB_PRT(VBL_DEV2, "%s: LFSR at A:0x%012lx errors 0x%x\n", + VB_PRT(VBL_DEV2, "%s: LFSR at A:0x%012llx errors 0x%x\n", __FUNCTION__, phys_addr, errors); } else { for (pattern = 0; pattern < NUM_BYTE_PATTERNS; pattern++) { @@ -1714,7 +1223,7 @@ run_best_hw_patterns(bdk_node_t node, int lmc, uint64_t phys_addr, errs = test_dram_byte_hw(node, lmc, phys_addr, mode, xor_data); - VB_PRT(VBL_DEV2, "%s: PATTERN %d at A:0x%012lx errors 0x%x\n", + VB_PRT(VBL_DEV2, "%s: PATTERN %d at A:0x%012llx errors 0x%x\n", __FUNCTION__, pattern, phys_addr, errs); errors |= errs; @@ -1738,7 +1247,7 @@ hw_assist_test_dll_offset(bdk_node_t node, int dll_offset_mode, int pattern; const uint64_t *pattern_p; int byte; - char *mode_str = (dll_offset_mode == 2) ? "Read" : "Write"; + const char *mode_str = (dll_offset_mode == 2) ? "Read" : "Write"; int pat_best_offset[9]; uint64_t phys_addr; int pat_beg, pat_end; @@ -1769,7 +1278,7 @@ hw_assist_test_dll_offset(bdk_node_t node, int dll_offset_mode, memset(new_best_offset, 0, sizeof(new_best_offset)); for (pattern = 0; pattern < NUM_BYTE_PATTERNS; pattern++) { - memset(pat_best_offset, 0, sizeof(pat_best_offset)); + memset(pat_best_offset, 0, sizeof(pat_best_offset)); if (mode == DBTRAIN_TEST) { pattern_p = byte_patterns[pattern]; @@ -1778,47 +1287,47 @@ hw_assist_test_dll_offset(bdk_node_t node, int dll_offset_mode, setup_lfsr_pattern(node, lmc, 0); } - // now loop through all legal values for the DLL byte offset... + // now loop through all legal values for the DLL byte offset... #define BYTE_OFFSET_INCR 3 // FIXME: make this tunable? - tot_errors = 0; + tot_errors = 0; - memset(rank_delay_count, 0, sizeof(rank_delay_count)); - memset(rank_delay_start, 0, sizeof(rank_delay_start)); - memset(rank_delay_best_count, 0, sizeof(rank_delay_best_count)); - memset(rank_delay_best_start, 0, sizeof(rank_delay_best_start)); + memset(rank_delay_count, 0, sizeof(rank_delay_count)); + memset(rank_delay_start, 0, sizeof(rank_delay_start)); + memset(rank_delay_best_count, 0, sizeof(rank_delay_best_count)); + memset(rank_delay_best_start, 0, sizeof(rank_delay_best_start)); - for (byte_offset = -63; byte_offset < 64; byte_offset += BYTE_OFFSET_INCR) { + for (byte_offset = -63; byte_offset < 64; byte_offset += BYTE_OFFSET_INCR) { - // do the setup on the active LMC - // set the bytelanes DLL offsets - change_dll_offset_enable(node, lmc, 0); - load_dll_offset(node, lmc, dll_offset_mode, byte_offset, bytelane); // FIXME? bytelane? - change_dll_offset_enable(node, lmc, 1); + // do the setup on the active LMC + // set the bytelanes DLL offsets + change_dll_offset_enable(node, lmc, 0); + load_dll_offset(node, lmc, dll_offset_mode, byte_offset, bytelane); // FIXME? bytelane? + change_dll_offset_enable(node, lmc, 1); - bdk_watchdog_poke(); + bdk_watchdog_poke(); - // run the test on each rank - // only 1 call per rank should be enough, let the bursts, loops, etc, control the load... - - off_errors = 0; // errors for this byte_offset, all ranks + // run the test on each rank + // only 1 call per rank should be enough, let the bursts, loops, etc, control the load... + + off_errors = 0; // errors for this byte_offset, all ranks active_ranks = 0; - for (rankx = 0; rankx < 4; rankx++) { + for (rankx = 0; rankx < 4; rankx++) { if (!(rank_mask & (1 << rankx))) continue; - phys_addr = hw_rank_offset * active_ranks; - // FIXME: now done by test_dram_byte_hw() + phys_addr = hw_rank_offset * active_ranks; + // FIXME: now done by test_dram_byte_hw() //phys_addr |= (lmc << 7); //phys_addr = bdk_numa_get_address(node, phys_addr); // map to node active_ranks++; // NOTE: return is a now a bitmask of the erroring bytelanes.. - errors[rankx] = test_dram_byte_hw(node, lmc, phys_addr, mode, NULL); + errors[rankx] = test_dram_byte_hw(node, lmc, phys_addr, mode, NULL); for (byte = byte_lo; byte <= byte_hi; byte++) { // do bytelane(s) @@ -1826,7 +1335,7 @@ hw_assist_test_dll_offset(bdk_node_t node, int dll_offset_mode, if (errors[rankx] & (1 << byte)) { // yes, an error in the byte lane in this rank off_errors |= (1 << byte); - ddr_print5("N%d.LMC%d.R%d: Bytelane %d DLL %s Offset Test %3d: Address 0x%012lx errors 0x%x\n", + ddr_print5("N%d.LMC%d.R%d: Bytelane %d DLL %s Offset Test %3d: Address 0x%012llx errors 0x%x\n", node, lmc, rankx, bytelane, mode_str, byte_offset, phys_addr, errors[rankx]); @@ -1854,13 +1363,13 @@ hw_assist_test_dll_offset(bdk_node_t node, int dll_offset_mode, } } } /* for (byte = byte_lo; byte <= byte_hi; byte++) */ - } /* for (rankx = 0; rankx < 4; rankx++) */ + } /* for (rankx = 0; rankx < 4; rankx++) */ - tot_errors |= off_errors; + tot_errors |= off_errors; - } /* for (byte_offset = -63; byte_offset < 64; byte_offset += BYTE_OFFSET_INCR) */ + } /* for (byte_offset = -63; byte_offset < 64; byte_offset += BYTE_OFFSET_INCR) */ - // now choose the best byte_offsets for this pattern according to the best windows of the tested ranks + // now choose the best byte_offsets for this pattern according to the best windows of the tested ranks // calculate offset by constructing an average window from the rank windows for (byte = byte_lo; byte <= byte_hi; byte++) { @@ -1928,7 +1437,7 @@ hw_assist_test_dll_offset(bdk_node_t node, int dll_offset_mode, // print whether there are errors or not, but only when verbose... tot_errors = run_test_dram_byte_threads(node, num_lmcs, bytemask); printf("N%d.LMC%d: Bytelane %d DLL %s Offset Final Test: errors 0x%x\n", - node, lmc, bytelane, mode_str, tot_errors); + node, lmc, bytelane, mode_str, tot_errors); #endif } @@ -1946,7 +1455,7 @@ int perform_HW_dll_offset_tuning(bdk_node_t node, int dll_offset_mode, int bytel // see if we want to do the tuning more than once per LMC... if ((s = getenv("ddr_tune_ecc_loops"))) { - loops = strtoul(s, NULL, 0); + loops = strtoul(s, NULL, 0); } // allow override of the test repeats (bursts) @@ -1956,8 +1465,8 @@ int perform_HW_dll_offset_tuning(bdk_node_t node, int dll_offset_mode, int bytel // print current working values ddr_print2("N%d: H/W Tuning for bytelane %d will use %d loops, %d bursts, and %d patterns.\n", - node, bytelane, loops, dram_tune_byte_bursts, - NUM_BYTE_PATTERNS); + node, bytelane, loops, dram_tune_byte_bursts, + NUM_BYTE_PATTERNS); // FIXME? get flag from LMC0 only lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(0)); @@ -1966,42 +1475,42 @@ int perform_HW_dll_offset_tuning(bdk_node_t node, int dll_offset_mode, int bytel for (lmc = 0; lmc < num_lmcs; lmc++) { - ddr_print4("N%d: H/W Tuning: starting LMC%d bytelane %d tune.\n", node, lmc, bytelane); - - /* Enable ECC for the HW tests */ - // NOTE: we do enable ECC, but the HW tests used will not generate "visible" errors - lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(lmc)); - save_ecc_ena[lmc] = lmc_config.s.ecc_ena; - lmc_config.s.ecc_ena = 1; - DRAM_CSR_WRITE(node, BDK_LMCX_CONFIG(lmc), lmc_config.u); - lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(lmc)); - - // testing is done on a single LMC at a time - // FIXME: for now, loop here to show what happens multiple times - for (loop = 0; loop < loops; loop++) { - /* Perform DLL offset tuning */ - //auto_set_dll_offset(node, 1 /* 1=write */, lmc, bytelane); - hw_assist_test_dll_offset(node, 2 /* 2=read */, lmc, bytelane); - } - - // perform cleanup on active LMC - ddr_print4("N%d: H/W Tuning: finishing LMC%d bytelane %d tune.\n", node, lmc, bytelane); - - /* Restore ECC for DRAM tests */ - lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(lmc)); - lmc_config.s.ecc_ena = save_ecc_ena[lmc]; - DRAM_CSR_WRITE(node, BDK_LMCX_CONFIG(lmc), lmc_config.u); - lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(lmc)); - - // finally, see if there are any read offset overrides after tuning - for (int by = 0; by < 9; by++) { - if ((s = lookup_env_parameter("ddr%d_tune_byte%d", lmc, by)) != NULL) { - int dllro = strtoul(s, NULL, 10); - change_dll_offset_enable(node, lmc, 0); - load_dll_offset(node, lmc, 2 /* 2=read */, dllro, by); - change_dll_offset_enable(node, lmc, 1); - } - } + ddr_print4("N%d: H/W Tuning: starting LMC%d bytelane %d tune.\n", node, lmc, bytelane); + + /* Enable ECC for the HW tests */ + // NOTE: we do enable ECC, but the HW tests used will not generate "visible" errors + lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(lmc)); + save_ecc_ena[lmc] = lmc_config.s.ecc_ena; + lmc_config.s.ecc_ena = 1; + DRAM_CSR_WRITE(node, BDK_LMCX_CONFIG(lmc), lmc_config.u); + lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(lmc)); + + // testing is done on a single LMC at a time + // FIXME: for now, loop here to show what happens multiple times + for (loop = 0; loop < loops; loop++) { + /* Perform DLL offset tuning */ + //auto_set_dll_offset(node, 1 /* 1=write */, lmc, bytelane); + hw_assist_test_dll_offset(node, 2 /* 2=read */, lmc, bytelane); + } + + // perform cleanup on active LMC + ddr_print4("N%d: H/W Tuning: finishing LMC%d bytelane %d tune.\n", node, lmc, bytelane); + + /* Restore ECC for DRAM tests */ + lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(lmc)); + lmc_config.s.ecc_ena = save_ecc_ena[lmc]; + DRAM_CSR_WRITE(node, BDK_LMCX_CONFIG(lmc), lmc_config.u); + lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(lmc)); + + // finally, see if there are any read offset overrides after tuning + for (int by = 0; by < 9; by++) { + if ((s = lookup_env_parameter("ddr%d_tune_byte%d", lmc, by)) != NULL) { + int dllro = strtoul(s, NULL, 10); + change_dll_offset_enable(node, lmc, 0); + load_dll_offset(node, lmc, 2 /* 2=read */, dllro, by); + change_dll_offset_enable(node, lmc, 1); + } + } } /* for (lmc = 0; lmc < num_lmcs; lmc++) */ -- cgit v1.2.3