From 846be446d3099544c2d1029ca2b7884c1641441d Mon Sep 17 00:00:00 2001 From: Huayang Duan Date: Fri, 30 Aug 2019 18:01:19 +0800 Subject: soc/mediatek/mt8183: Use cached calibration result for faster bootup Load calibration params from flash. If the format of the params is correct, use these calibration params for fast calibration to reduce the bootup time. Bootup time of DRAM partial calibration: - 1,349,385 usecs with low frequency - 924,698 usecs with middle frequency - 1,270,089 usecs with high frequency 3,544,172 usecs in total. Bootup time of DRAM fast calibration: - 216,663 usecs with low frequency - 328,220 usecs with middle frequency - 322,612 usecs with high frequency 867,495 usecs in total. BUG=b:139099592 BRANCH=none TEST=Boots correctly on Kukui Change-Id: I9ef4265dd369a1c276bb02294696556df927e7bc Signed-off-by: Huayang Duan Reviewed-on: https://review.coreboot.org/c/coreboot/+/35164 Reviewed-by: Hung-Te Lin Tested-by: build bot (Jenkins) --- src/soc/mediatek/mt8183/dramc_init_setting.c | 16 + src/soc/mediatek/mt8183/dramc_pi_calibration_api.c | 508 +++++++++++++-------- src/soc/mediatek/mt8183/emi.c | 13 +- src/soc/mediatek/mt8183/include/soc/emi.h | 45 +- src/soc/mediatek/mt8183/include/soc/memlayout.ld | 3 + src/soc/mediatek/mt8183/memory.c | 4 +- 6 files changed, 394 insertions(+), 195 deletions(-) (limited to 'src/soc/mediatek/mt8183') diff --git a/src/soc/mediatek/mt8183/dramc_init_setting.c b/src/soc/mediatek/mt8183/dramc_init_setting.c index b8491d3360..f4905e244f 100644 --- a/src/soc/mediatek/mt8183/dramc_init_setting.c +++ b/src/soc/mediatek/mt8183/dramc_init_setting.c @@ -612,6 +612,22 @@ static void dramc_duty_set_dqs_delay(u8 chn, const s8 *s_dqsDelay) static void dramc_duty_calibration(const struct sdram_params *params, u8 freq_group) { + switch (params->source) { + case DRAMC_PARAM_SOURCE_SDRAM_CONFIG: + break; + case DRAMC_PARAM_SOURCE_FLASH: + dramc_dbg("bypass duty calibration\n"); + + for (u8 chn = 0; chn < CHANNEL_MAX; chn++) { + dramc_duty_set_clk_delay(chn, params->duty_clk_delay[chn]); + dramc_duty_set_dqs_delay(chn, params->duty_dqs_delay[chn]); + } + return; + default: + die("Invalid DRAM param source %u\n", params->source); + return; + } + s8 clkDelay[CHANNEL_MAX] = {0x0}; s8 dqsDelay[CHANNEL_MAX][DQS_NUMBER] = {0x0}; diff --git a/src/soc/mediatek/mt8183/dramc_pi_calibration_api.c b/src/soc/mediatek/mt8183/dramc_pi_calibration_api.c index 794e4f0efd..6537af0987 100644 --- a/src/soc/mediatek/mt8183/dramc_pi_calibration_api.c +++ b/src/soc/mediatek/mt8183/dramc_pi_calibration_api.c @@ -248,19 +248,28 @@ static void dramc_write_leveling(u8 chn, u8 rank, u8 freq_group, } static void dramc_cmd_bus_training(u8 chn, u8 rank, u8 freq_group, - const struct sdram_params *params) + const struct sdram_params *params, const bool fast_calib) { - u32 cbt_cs, mr12_value; + u32 final_vref, clk_dly, cmd_dly, cs_dly; - cbt_cs = params->cbt_cs_dly[chn][rank]; - mr12_value = params->cbt_final_vref[chn][rank]; + clk_dly = params->cbt_clk_dly[chn][rank]; + cmd_dly = params->cbt_cmd_dly[chn][rank]; + cs_dly = params->cbt_cs_dly[chn][rank]; + final_vref = params->cbt_final_vref[chn][rank]; - /* CBT adjust cs */ - clrsetbits_le32(&ch[chn].phy.shu[0].rk[rank].ca_cmd[9], - SHU1_CA_CMD9_RG_RK_ARFINE_TUNE_CS_MASK, cbt_cs << 0); + if (fast_calib) { + /* Set CLK and CA delay */ + clrsetbits_le32(&ch[chn].phy.shu[0].rk[rank].ca_cmd[9], + (0x3f << 8) | (0x3f << 24), + (cmd_dly << 8) | (clk_dly << 24)); + udelay(1); + } + + /* Set CLK and CS delay */ + clrsetbits_le32(&ch[chn].phy.shu[0].rk[rank].ca_cmd[9], 0x3f, cs_dly << 0); /* CBT set vref */ - dramc_mode_reg_write_by_rank(chn, rank, 12, mr12_value); + dramc_mode_reg_write_by_rank(chn, rank, 12, final_vref); } static void dramc_read_dbi_onoff(bool on) @@ -781,6 +790,13 @@ static void dramc_rx_dqs_gating_cal_pre(u8 chn, u8 rank) } +static void set_selph_gating_value(uint32_t *addr, u8 dly, u8 dly_p1) +{ + clrsetbits_le32(addr, 0x77777777, + (dly << 0) | (dly << 8) | (dly << 16) | (dly << 24) | + (dly_p1 << 4) | (dly_p1 << 12) | (dly_p1 << 20) | (dly_p1 << 28)); +} + static void dramc_write_dqs_gating_result(u8 chn, u8 rank, u8 *best_coarse_tune2t, u8 *best_coarse_tune0p5t, u8 *best_coarse_tune2t_p1, u8 *best_coarse_tune0p5t_p1) @@ -840,66 +856,17 @@ static void dramc_write_dqs_gating_result(u8 chn, u8 rank, (best_coarse_0p5t_rodt_p1[0] << 4) | (best_coarse_0p5t_rodt_p1[1] << 12)); } -static void dramc_rx_dqs_gating_cal(u8 chn, u8 rank, u8 freq_group, - const struct sdram_params *params) +static void dramc_rx_dqs_gating_cal_partial(u8 chn, u8 rank, + u32 coarse_start, u32 coarse_end, u8 freqDiv, + u8 *pass_begin, u8 *pass_count, u8 *pass_count_1, u8 *dqs_done, + u8 *dqs_high, u8 *dqs_transition, u8 *dly_coarse_large_cnt, + u8 *dly_coarse_0p5t_cnt, u8 *dly_fine_tune_cnt) { - u8 dqs, fsp, freqDiv = 4; - u8 pass_begin[DQS_NUMBER] = {0}, pass_count[DQS_NUMBER] = {0}, - pass_count_1[DQS_NUMBER] = {0}, dqs_done[DQS_NUMBER] = {0}; - u8 min_coarse_tune2t[DQS_NUMBER], min_coarse_tune0p5t[DQS_NUMBER], - min_fine_tune[DQS_NUMBER]; - u8 best_fine_tune[DQS_NUMBER], best_coarse_tune0p5t[DQS_NUMBER], - best_coarse_tune2t[DQS_NUMBER]; - u8 best_coarse_tune0p5t_p1[DQS_NUMBER], best_coarse_tune2t_p1[DQS_NUMBER]; - u8 dqs_high[DQS_NUMBER] = {0}, dqs_transition[DQS_NUMBER] = {0}; - u8 dly_coarse_large_cnt[DQS_NUMBER] = {0}, dly_coarse_0p5t_cnt[DQS_NUMBER] = {0}, - dly_fine_tune_cnt[DQS_NUMBER] = {0}; - u32 coarse_start, coarse_end; + u8 dqs; u32 debug_cnt[DQS_NUMBER]; - struct reg_value regs_bak[] = { - {&ch[chn].ao.stbcal, 0x0}, - {&ch[chn].ao.stbcal1, 0x0}, - {&ch[chn].ao.ddrconf0, 0x0}, - {&ch[chn].ao.spcmd, 0x0}, - {&ch[chn].ao.refctrl0, 0x0}, - {&ch[chn].phy.b[0].dq[6], 0x0}, - {&ch[chn].phy.b[1].dq[6], 0x0}, - }; - for (size_t i = 0; i < ARRAY_SIZE(regs_bak); i++) - regs_bak[i].value = read32(regs_bak[i].addr); - - fsp = get_freq_fsq(freq_group); - dramc_rx_dqs_isi_pulse_cg_switch(chn, false); - - MR01Value[fsp] |= 0x80; - dramc_mode_reg_write_by_rank(chn, rank, 0x1, MR01Value[fsp]); - dramc_rx_dqs_gating_cal_pre(chn, rank); - - u32 dummy_rd_backup = read32(&ch[chn].ao.dummy_rd); - dramc_engine2_init(chn, rank, TEST2_1_CAL, 0xaa000023, true); - - switch (freq_group) { - case LP4X_DDR1600: - coarse_start = 18; - break; - case LP4X_DDR2400: - coarse_start = 25; - break; - case LP4X_DDR3200: - coarse_start = 25; - break; - case LP4X_DDR3600: - coarse_start = 21; - break; - default: - die("Invalid DDR frequency group %u\n", freq_group); - return; - } - coarse_end = coarse_start + 12; - - dramc_dbg("[Gating]\n"); - for (u32 coarse_tune = coarse_start; coarse_tune < coarse_end; coarse_tune++) { + for (u32 coarse_tune = coarse_start; coarse_tune < coarse_end; + coarse_tune++) { u32 dly_coarse_large_rodt = 0, dly_coarse_0p5t_rodt = 0; u32 dly_coarse_large_rodt_p1 = 4, dly_coarse_0p5t_rodt_p1 = 4; @@ -914,39 +881,23 @@ static void dramc_rx_dqs_gating_cal(u8 chn, u8 rank, u8 freq_group, dly_coarse_large_rodt = value >> 3; dly_coarse_0p5t_rodt = value - (dly_coarse_large_rodt << 3); - value = (dly_coarse_large << 3) + dly_coarse_0p5t - 11; dly_coarse_large_rodt_p1 = value >> 3; dly_coarse_0p5t_rodt_p1 = value - (dly_coarse_large_rodt_p1 << 3); } - clrsetbits_le32(&ch[chn].ao.shu[0].rk[rank].selph_dqsg0, - 0x77777777, - (dly_coarse_large << 0) | (dly_coarse_large << 8) | - (dly_coarse_large << 16) | (dly_coarse_large << 24) | - (dly_coarse_large_p1 << 4) | (dly_coarse_large_p1 << 12) | - (dly_coarse_large_p1 << 20) | (dly_coarse_large_p1 << 28)); - clrsetbits_le32(&ch[chn].ao.shu[0].rk[rank].selph_dqsg1, - 0x77777777, - (dly_coarse_0p5t << 0) | (dly_coarse_0p5t << 8) | - (dly_coarse_0p5t << 16) | (dly_coarse_0p5t << 24) | - (dly_coarse_0p5t_p1 << 4) | (dly_coarse_0p5t_p1 << 12) | - (dly_coarse_0p5t_p1 << 20) | (dly_coarse_0p5t_p1 << 28)); - clrsetbits_le32(&ch[chn].ao.shu[0].rk[rank].selph_odten0, - 0x77777777, - (dly_coarse_large_rodt << 0) | (dly_coarse_large_rodt << 8) | - (dly_coarse_large_rodt << 16) | (dly_coarse_large_rodt << 24) | - (dly_coarse_large_rodt_p1 << 4) | (dly_coarse_large_rodt_p1 << 12) | - (dly_coarse_large_rodt_p1 << 20) | (dly_coarse_large_rodt_p1 << 28)); - clrsetbits_le32(&ch[chn].ao.shu[0].rk[rank].selph_odten1, - 0x77777777, - (dly_coarse_0p5t_rodt << 0) | (dly_coarse_0p5t_rodt << 8) | - (dly_coarse_0p5t_rodt << 16) | (dly_coarse_0p5t_rodt << 24) | - (dly_coarse_0p5t_rodt_p1 << 4) | (dly_coarse_0p5t_rodt_p1 << 12) | - (dly_coarse_0p5t_rodt_p1 << 20) | (dly_coarse_0p5t_rodt_p1 << 28)); - - for (u8 dly_fine_xt = 0; dly_fine_xt < DQS_GW_FINE_END; dly_fine_xt += 4) { + set_selph_gating_value(&ch[chn].ao.shu[0].rk[rank].selph_dqsg0, + dly_coarse_large, dly_coarse_large_p1); + set_selph_gating_value(&ch[chn].ao.shu[0].rk[rank].selph_dqsg1, + dly_coarse_0p5t, dly_coarse_0p5t_p1); + set_selph_gating_value(&ch[chn].ao.shu[0].rk[rank].selph_odten0, + dly_coarse_large_rodt, dly_coarse_large_rodt_p1); + set_selph_gating_value(&ch[chn].ao.shu[0].rk[rank].selph_odten1, + dly_coarse_0p5t_rodt, dly_coarse_0p5t_rodt_p1); + + for (u8 dly_fine_xt = 0; dly_fine_xt < DQS_GW_FINE_END; + dly_fine_xt += 4) { dramc_set_gating_mode(chn, 0); write32(&ch[chn].ao.shu[0].rk[rank].dqsien, dly_fine_xt | (dly_fine_xt << 8)); @@ -961,9 +912,9 @@ static void dramc_rx_dqs_gating_cal(u8 chn, u8 rank, u8 freq_group, dramc_engine2_run(chn, TE_OP_READ_CHECK); u32 result_r = read32(&ch[chn].phy.misc_stberr_rk0_r) & - MISC_STBERR_RK_R_STBERR_RK_R_MASK; + MISC_STBERR_RK_R_STBERR_RK_R_MASK; u32 result_f = read32(&ch[chn].phy.misc_stberr_rk0_f) & - MISC_STBERR_RK_F_STBERR_RK_F_MASK; + MISC_STBERR_RK_F_STBERR_RK_F_MASK; debug_cnt[0] = read32(&ch[chn].nao.dqsgnwcnt[0]); debug_cnt[1] = (debug_cnt[0] >> 16) & 0xffff; debug_cnt[0] &= 0xffff; @@ -990,20 +941,95 @@ static void dramc_rx_dqs_gating_cal(u8 chn, u8 rank, u8 freq_group, dramc_dbg("\n"); if (dramc_find_gating_window(result_r, result_f, debug_cnt, - dly_coarse_large, dly_coarse_0p5t, pass_begin, pass_count, - pass_count_1, &dly_fine_xt, dqs_high, dqs_done)) + dly_coarse_large, dly_coarse_0p5t, pass_begin, + pass_count, pass_count_1, &dly_fine_xt, + dqs_high, dqs_done)) coarse_tune = coarse_end; } } +} + +static void dramc_rx_dqs_gating_cal(u8 chn, u8 rank, u8 freq_group, + const struct sdram_params *params, const bool fast_calib) +{ + u8 dqs, fsp, freqDiv = 4; + u8 pass_begin[DQS_NUMBER] = {0}, pass_count[DQS_NUMBER] = {0}, + pass_count_1[DQS_NUMBER] = {0}, dqs_done[DQS_NUMBER] = {0}; + u8 min_coarse_tune2t[DQS_NUMBER], min_coarse_tune0p5t[DQS_NUMBER], + min_fine_tune[DQS_NUMBER]; + u8 best_fine_tune[DQS_NUMBER], best_coarse_tune0p5t[DQS_NUMBER], + best_coarse_tune2t[DQS_NUMBER]; + u8 best_coarse_tune0p5t_p1[DQS_NUMBER], best_coarse_tune2t_p1[DQS_NUMBER]; + u8 dqs_high[DQS_NUMBER] = {0}, dqs_transition[DQS_NUMBER] = {0}; + u8 dly_coarse_large_cnt[DQS_NUMBER] = {0}, dly_coarse_0p5t_cnt[DQS_NUMBER] = {0}, + dly_fine_tune_cnt[DQS_NUMBER] = {0}; + u32 coarse_start, coarse_end; + + struct reg_value regs_bak[] = { + {&ch[chn].ao.stbcal, 0x0}, + {&ch[chn].ao.stbcal1, 0x0}, + {&ch[chn].ao.ddrconf0, 0x0}, + {&ch[chn].ao.spcmd, 0x0}, + {&ch[chn].ao.refctrl0, 0x0}, + {&ch[chn].phy.b[0].dq[6], 0x0}, + {&ch[chn].phy.b[1].dq[6], 0x0}, + }; + for (size_t i = 0; i < ARRAY_SIZE(regs_bak); i++) + regs_bak[i].value = read32(regs_bak[i].addr); - dramc_engine2_end(chn, dummy_rd_backup); + fsp = get_freq_fsq(freq_group); + dramc_rx_dqs_isi_pulse_cg_switch(chn, false); - for (dqs = 0; dqs < DQS_NUMBER; dqs++) { - pass_count[dqs] = dqs_transition[dqs]; - min_fine_tune[dqs] = dly_fine_tune_cnt[dqs]; - min_coarse_tune0p5t[dqs] = dly_coarse_0p5t_cnt[dqs]; - min_coarse_tune2t[dqs] = dly_coarse_large_cnt[dqs]; + MR01Value[fsp] |= 0x80; + dramc_mode_reg_write_by_rank(chn, rank, 0x1, MR01Value[fsp]); + dramc_rx_dqs_gating_cal_pre(chn, rank); + u32 dummy_rd_backup = read32(&ch[chn].ao.dummy_rd); + dramc_engine2_init(chn, rank, TEST2_1_CAL, 0xaa000023, true); + + switch (freq_group) { + case LP4X_DDR1600: + coarse_start = 18; + break; + case LP4X_DDR2400: + coarse_start = 25; + break; + case LP4X_DDR3200: + coarse_start = 25; + break; + case LP4X_DDR3600: + coarse_start = 21; + break; + default: + die("Invalid DDR frequency group %u\n", freq_group); + return; + } + coarse_end = coarse_start + 12; + + dramc_dbg("[Gating]\n"); + + if (!fast_calib) { + dramc_rx_dqs_gating_cal_partial(chn, rank, + coarse_start, coarse_end, + freqDiv, pass_begin, pass_count, pass_count_1, dqs_done, + dqs_high, dqs_transition, dly_coarse_large_cnt, + dly_coarse_0p5t_cnt, dly_fine_tune_cnt); + dramc_engine2_end(chn, dummy_rd_backup); + } + + for (dqs = 0; dqs < DQS_NUMBER; dqs++) { + if (fast_calib) { + dramc_dbg("[bypass Gating params] dqs: %d\n", dqs); + pass_count[dqs] = params->gating_pass_count[chn][rank][dqs]; + min_fine_tune[dqs] = params->gating_fine_tune[chn][rank][dqs]; + min_coarse_tune0p5t[dqs] = params->gating05T[chn][rank][dqs]; + min_coarse_tune2t[dqs] = params->gating2T[chn][rank][dqs]; + } else { + pass_count[dqs] = dqs_transition[dqs]; + min_fine_tune[dqs] = dly_fine_tune_cnt[dqs]; + min_coarse_tune0p5t[dqs] = dly_coarse_0p5t_cnt[dqs]; + min_coarse_tune2t[dqs] = dly_coarse_large_cnt[dqs]; + } u8 tmp_offset = pass_count[dqs] * DQS_GW_FINE_STEP / 2; u8 tmp_value = min_fine_tune[dqs] + tmp_offset; best_fine_tune[dqs] = tmp_value % RX_DLY_DQSIENSTB_LOOP; @@ -1548,9 +1574,35 @@ static void dramc_set_dqdqs_dly(u8 chn, u8 rank, enum CAL_TYPE type, u8 *small_v dramc_set_tx_dly_factor(chn, rank, type, small_value, dly); } +static void dramc_set_tx_dly_center(struct per_byte_dly *center_dly, + const struct win_perbit_dly *vref_dly) +{ + int index; + struct per_byte_dly *dly; + + for (u8 byte = 0; byte < DQS_NUMBER; byte++) { + dly = ¢er_dly[byte]; + dly->min_center = 0xffff; + dly->max_center = 0; + + for (u8 bit = 0; bit < DQS_BIT_NUMBER; bit++) { + index = bit + 8 * byte; + if (vref_dly[index].win_center < dly->min_center) + dly->min_center = vref_dly[index].win_center; + if (vref_dly[index].win_center > dly->max_center) + dly->max_center = vref_dly[index].win_center; + } + dramc_dbg("center_dly[%d].min_center = %d, " + "center_dly[%d].max_center = %d\n", + byte, center_dly[byte].min_center, + byte, center_dly[byte].max_center); + } +} + static void dramc_set_tx_best_dly(u8 chn, u8 rank, bool bypass_tx, struct win_perbit_dly *vref_dly, enum CAL_TYPE type, u8 freq_group, - u16 *tx_dq_precal_result, u16 dly_cell_unit, const struct sdram_params *params) + u16 *tx_dq_precal_result, u16 dly_cell_unit, const struct sdram_params *params, + const bool fast_calib) { int index, clock_rate; u8 use_delay_cell; @@ -1581,20 +1633,23 @@ static void dramc_set_tx_best_dly(u8 chn, u8 rank, bool bypass_tx, else use_delay_cell = 0; - for (u8 byte = 0; byte < DQS_NUMBER; byte++) { - center_dly[byte].min_center = 0xffff; - center_dly[byte].max_center = 0; - - for (u8 bit = 0; bit < DQS_BIT_NUMBER; bit++) { - index = bit + 8 * byte; - if (vref_dly[index].win_center < center_dly[byte].min_center) - center_dly[byte].min_center = vref_dly[index].win_center; - if (vref_dly[index].win_center > center_dly[byte].max_center) - center_dly[byte].max_center = vref_dly[index].win_center; + if (fast_calib && bypass_tx) { + dramc_dbg("bypass TX\n"); + for (u8 byte = 0; byte < DQS_NUMBER; byte++) { + center_dly[byte].min_center = params->tx_center_min[chn][rank][byte]; + center_dly[byte].max_center = params->tx_center_max[chn][rank][byte]; + for (u8 bit = 0; bit < DQS_BIT_NUMBER; bit++) { + index = bit + 8 * byte; + vref_dly[index].win_center = + params->tx_win_center[chn][rank][index]; + vref_dly[index].best_first = + params->tx_first_pass[chn][rank][index]; + vref_dly[index].best_last = + params->tx_last_pass[chn][rank][index]; + } } - dramc_dbg("[channel %d] [rank %d] byte:%d, center_dly[byte].min_center:%d, center_dly[byte].max_center:%d\n", - chn, rank, byte, center_dly[byte].min_center, - center_dly[byte].max_center); + } else { + dramc_set_tx_dly_center(center_dly, vref_dly); } for (u8 byte = 0; byte < DQS_NUMBER; byte++) { @@ -1697,13 +1752,57 @@ static u32 dram_k_perbit(u8 chn, enum CAL_TYPE type) return err_value; } +static void dramc_window_perbit_cal_partial(u8 chn, u8 rank, + s16 dly_begin, s16 dly_end, s16 dly_step, + enum CAL_TYPE type, u8 *small_value, u8 vref_scan_enable, + struct win_perbit_dly *win_perbit) +{ + u32 finish_bit = 0; + + for (s16 dly = dly_begin; dly < dly_end; dly += dly_step) { + dramc_set_dqdqs_dly(chn, rank, type, small_value, dly); + + u32 err_value = dram_k_perbit(chn, type); + if (!vref_scan_enable) + dramc_dbg("%d ", dly); + + for (size_t bit = 0; bit < DQ_DATA_WIDTH; bit++) { + bool bit_fail = (err_value & ((u32) 1 << bit)) != 0; + + /* pass window bigger than 7, + * consider as real pass window. + */ + if (dramc_check_dqdqs_win(&(win_perbit[bit]), + dly, dly_end, bit_fail) > 7) + finish_bit |= (1 << bit); + + if (vref_scan_enable) + continue; + + dramc_dbg("%s", bit_fail ? "x" : "o"); + if (bit % DQS_BIT_NUMBER == 7) + dramc_dbg(" "); + } + + if (!vref_scan_enable) + dramc_dbg(" [MSB]\n"); + + if (finish_bit == 0xffff && (err_value & 0xffff) == 0xffff) { + dramc_dbg("all bits window found, " + "early break! delay=%#x\n", dly); + break; + } + } +} + static u8 dramc_window_perbit_cal(u8 chn, u8 rank, u8 freq_group, - enum CAL_TYPE type, const struct sdram_params *params) + enum CAL_TYPE type, const struct sdram_params *params, + const bool fast_calib) { u8 vref = 0, vref_begin = 0, vref_end = 1, vref_step = 1, vref_use = 0; u8 vref_scan_enable = 0, small_reg_value = 0xff; - s16 dly, dly_begin = 0, dly_end = 0, dly_step = 1; - u32 dummy_rd_bak_engine2 = 0, err_value, finish_bit, win_min_max = 0; + s16 dly_begin = 0, dly_end = 0, dly_step = 1; + u32 dummy_rd_bak_engine2 = 0, finish_bit, win_min_max = 0; static u16 dq_precal_result[DQS_NUMBER]; struct vref_perbit_dly vref_dly; struct win_perbit_dly win_perbit[DQ_DATA_WIDTH]; @@ -1711,16 +1810,30 @@ static u8 dramc_window_perbit_cal(u8 chn, u8 rank, u8 freq_group, u8 fsp = get_freq_fsq(freq_group); u8 vref_range = !fsp; + bool bypass_tx = !fsp; dramc_get_vref_prop(rank, type, fsp, &vref_scan_enable, &vref_begin, &vref_end); dramc_get_dly_range(chn, rank, type, freq_group, dq_precal_result, &dly_begin, &dly_end, params); + if (fast_calib) { + if (type == RX_WIN_TEST_ENG && vref_scan_enable == 1) { + vref_begin = params->rx_vref[chn]; + vref_end = vref_begin + 1; + dramc_dbg("bypass RX vref: %d\n", vref_begin); + } else if (type == TX_WIN_DQ_ONLY) { + vref_begin = params->tx_vref[chn][rank]; + vref_end = vref_begin + 1; + dramc_dbg("bypass TX vref: %d\n", vref_begin); + } + vref_dly.best_vref = vref_begin; + } + if ((type == RX_WIN_RD_DQC || type == RX_WIN_TEST_ENG) && fsp == FSP_0) dly_step = 2; - dramc_dbg("[channel %d] [rank %d] type:%d, vref_enable:%d, vref range[%d:%d]\n", + dramc_dbg("[channel %d] [rank %d] type: %d, vref_enable: %d, vref range[%d : %d]\n", chn, rank, type, vref_scan_enable, vref_begin, vref_end); if (type == TX_WIN_DQ_ONLY || type == TX_WIN_DQ_DQM) { @@ -1735,6 +1848,17 @@ static u8 dramc_window_perbit_cal(u8 chn, u8 rank, u8 freq_group, vref_step = 2; } + if (fast_calib && bypass_tx && + (type == TX_WIN_DQ_ONLY || type == TX_WIN_DQ_DQM)) { + dramc_set_tx_best_dly(chn, rank, true, vref_dly.perbit_dly, + type, freq_group, dq_precal_result, dly_cell_unit, + params, fast_calib); + + if (vref_scan_enable) + dramc_set_vref(chn, rank, type, vref_dly.best_vref); + return 0; + } + if (type == RX_WIN_RD_DQC) { dramc_rx_rd_dqc_init(chn, rank); } else { @@ -1770,36 +1894,20 @@ static u8 dramc_window_perbit_cal(u8 chn, u8 rank, u8 freq_group, RX_DQ, FIRST_DQ_DELAY); } - for (dly = dly_begin; dly < dly_end; dly += dly_step) { - dramc_set_dqdqs_dly(chn, rank, type, &small_reg_value, dly); - - err_value = dram_k_perbit(chn, type); - if (!vref_scan_enable) - dramc_dbg("%d ", dly); - + if (fast_calib && + (type == RX_WIN_RD_DQC || type == RX_WIN_TEST_ENG)) { + dramc_dbg("bypass RX params\n"); for (size_t bit = 0; bit < DQ_DATA_WIDTH; bit++) { - bool bit_fail = (err_value & ((u32) 1 << bit)) != 0; - - /* pass window bigger than 7, consider as real pass window */ - if (dramc_check_dqdqs_win(&(win_perbit[bit]), - dly, dly_end, bit_fail) > 7) - finish_bit |= (1 << bit); - - if (vref_scan_enable) - continue; - dramc_dbg("%s", bit_fail ? "x" : "o"); - if (bit % DQS_BIT_NUMBER == 7) - dramc_dbg(" "); - } - - if (!vref_scan_enable) - dramc_dbg(" [MSB]\n"); - - if (finish_bit == 0xffff && (err_value & 0xffff) == 0xffff) { - dramc_dbg("all bits window found, early break! delay=0x%x\n", - dly); - break; + win_perbit[bit].best_first = + params->rx_firspass[chn][rank][bit]; + win_perbit[bit].best_last = + params->rx_lastpass[chn][rank][bit]; } + } else { + dramc_window_perbit_cal_partial(chn, rank, + dly_begin, dly_end, dly_step, + type, &small_reg_value, + vref_scan_enable, win_perbit); } for (size_t bit = 0; bit < DQ_DATA_WIDTH; bit++) @@ -1807,7 +1915,8 @@ static u8 dramc_window_perbit_cal(u8 chn, u8 rank, u8 freq_group, win_perbit[bit].best_first, win_perbit[bit].best_last, win_perbit[bit].best_last - win_perbit[bit].best_first); - if (dramk_calc_best_vref(type, vref_use, &vref_dly, win_perbit, &win_min_max)) + if (dramk_calc_best_vref(type, vref_use, &vref_dly, + win_perbit, &win_min_max)) break; } @@ -1822,8 +1931,9 @@ static u8 dramc_window_perbit_cal(u8 chn, u8 rank, u8 freq_group, if (type == RX_WIN_RD_DQC || type == RX_WIN_TEST_ENG) dramc_set_rx_best_dly(chn, rank, vref_dly.perbit_dly); else - dramc_set_tx_best_dly(chn, rank, false, vref_dly.perbit_dly, type, - freq_group, dq_precal_result, dly_cell_unit, params); + dramc_set_tx_best_dly(chn, rank, false, + vref_dly.perbit_dly, type, freq_group, + dq_precal_result, dly_cell_unit, params, fast_calib); if (vref_scan_enable && type == TX_WIN_DQ_ONLY) dramc_set_vref(chn, rank, type, vref_dly.best_vref); @@ -1858,7 +1968,8 @@ static void dramc_dle_factor_handler(u8 chn, u8 val, u8 freq_group) dram_phy_reset(chn); } -static u8 dramc_rx_datlat_cal(u8 chn, u8 rank, u8 freq_group, const struct sdram_params *params) +static u8 dramc_rx_datlat_cal(u8 chn, u8 rank, u8 freq_group, + const struct sdram_params *params, const bool fast_calib) { u32 datlat, begin = 0, first = 0, sum = 0, best_step; u32 datlat_start = 7; @@ -1871,38 +1982,43 @@ static u8 dramc_rx_datlat_cal(u8 chn, u8 rank, u8 freq_group, const struct sdram u32 dummy_rd_backup = read32(&ch[chn].ao.dummy_rd); dramc_engine2_init(chn, rank, TEST2_1_CAL, TEST2_2_CAL, false); - for (datlat = datlat_start; datlat < DATLAT_TAP_NUMBER; datlat++) { - dramc_dle_factor_handler(chn, datlat, freq_group); - - u32 err = dramc_engine2_run(chn, TE_OP_WRITE_READ_CHECK); - if (err == 0) { - if (begin == 0) { - first = datlat; - begin = 1; - } - if (begin == 1) { - sum++; - if (sum > 4) - break; + if (fast_calib) { + best_step = params->rx_datlat[chn][rank]; + dramc_dbg("bypass DATLAT, best_step: %d\n", best_step); + } else { + for (datlat = datlat_start; datlat < DATLAT_TAP_NUMBER; datlat++) { + dramc_dle_factor_handler(chn, datlat, freq_group); + + u32 err = dramc_engine2_run(chn, TE_OP_WRITE_READ_CHECK); + if (err == 0) { + if (begin == 0) { + first = datlat; + begin = 1; + } + if (begin == 1) { + sum++; + if (sum > 4) + break; + } + } else { + if (begin == 1) + begin = 0xff; } - } else { - if (begin == 1) - begin = 0xff; - } - dramc_dbg("Datlat=%2d, err_value=0x%4x, sum=%d\n", datlat, err, sum); - } + dramc_dbg("Datlat=%2d, err_value=0x%4x, sum=%d\n", datlat, err, sum); + } - dramc_engine2_end(chn, dummy_rd_backup); + dramc_engine2_end(chn, dummy_rd_backup); - assert(sum != 0); + assert(sum != 0); - if (sum <= 3) - best_step = first + (sum >> 1); - else - best_step = first + 2; - dramc_dbg("First_step=%d, total pass=%d, best_step=%d\n", - begin, sum, best_step); + if (sum <= 3) + best_step = first + (sum >> 1); + else + best_step = first + 2; + dramc_dbg("First_step=%d, total pass=%d, best_step=%d\n", + begin, sum, best_step); + } dramc_dle_factor_handler(chn, best_step, freq_group); @@ -1997,20 +2113,40 @@ static void dramc_rx_dqs_gating_post_process(u8 chn, u8 freq_group) void dramc_calibrate_all_channels(const struct sdram_params *pams, u8 freq_group) { + bool fast_calib; + switch (pams->source) { + case DRAMC_PARAM_SOURCE_SDRAM_CONFIG: + fast_calib = false; + break; + case DRAMC_PARAM_SOURCE_FLASH: + fast_calib = true; + break; + default: + die("Invalid DRAM param source %u\n", pams->source); + return; + } + u8 rx_datlat[RANK_MAX] = {0}; for (u8 chn = 0; chn < CHANNEL_MAX; chn++) { for (u8 rk = RANK_0; rk < RANK_MAX; rk++) { dramc_show("Start K ch:%d, rank:%d\n", chn, rk); dramc_auto_refresh_switch(chn, false); - dramc_cmd_bus_training(chn, rk, freq_group, pams); + dramc_cmd_bus_training(chn, rk, freq_group, pams, + fast_calib); dramc_write_leveling(chn, rk, freq_group, pams->wr_level); dramc_auto_refresh_switch(chn, true); - dramc_rx_dqs_gating_cal(chn, rk, freq_group, pams); - dramc_window_perbit_cal(chn, rk, freq_group, RX_WIN_RD_DQC, pams); - dramc_window_perbit_cal(chn, rk, freq_group, TX_WIN_DQ_DQM, pams); - dramc_window_perbit_cal(chn, rk, freq_group, TX_WIN_DQ_ONLY, pams); - rx_datlat[rk] = dramc_rx_datlat_cal(chn, rk, freq_group, pams); - dramc_window_perbit_cal(chn, rk, freq_group, RX_WIN_TEST_ENG, pams); + dramc_rx_dqs_gating_cal(chn, rk, freq_group, pams, + fast_calib); + dramc_window_perbit_cal(chn, rk, freq_group, + RX_WIN_RD_DQC, pams, fast_calib); + dramc_window_perbit_cal(chn, rk, freq_group, + TX_WIN_DQ_DQM, pams, fast_calib); + dramc_window_perbit_cal(chn, rk, freq_group, + TX_WIN_DQ_ONLY, pams, fast_calib); + rx_datlat[rk] = dramc_rx_datlat_cal(chn, rk, freq_group, + pams, fast_calib); + dramc_window_perbit_cal(chn, rk, freq_group, + RX_WIN_TEST_ENG, pams, fast_calib); } dramc_rx_dqs_gating_post_process(chn, freq_group); diff --git a/src/soc/mediatek/mt8183/emi.c b/src/soc/mediatek/mt8183/emi.c index 8bd8a39a35..3b5b2a7631 100644 --- a/src/soc/mediatek/mt8183/emi.c +++ b/src/soc/mediatek/mt8183/emi.c @@ -343,10 +343,10 @@ void enable_emi_dcm(void) static void do_calib(const struct sdram_params *params, u8 freq_group) { - dramc_show("Start K freq group:%d\n", frequency_table[freq_group]); + dramc_show("Start K, current clock is:%d\n", params->frequency); dramc_calibrate_all_channels(params, freq_group); dramc_ac_timing_optimize(freq_group); - dramc_show("%s K freq group:%d finish!\n", __func__, frequency_table[freq_group]); + dramc_show("K finish with clock:%d\n", params->frequency); } static void after_calib(void) @@ -355,18 +355,23 @@ static void after_calib(void) dramc_runtime_config(); } -void mt_set_emi(const struct sdram_params *params) +void mt_set_emi(const struct sdram_params *freq_params) { const u8 *freq_tbl; + const int shuffle = DRAM_DFS_SHUFFLE_1; u8 current_freqsel; + const struct sdram_params *params; if (CONFIG(MT8183_DRAM_EMCP)) freq_tbl = freq_shuffle_emcp; else freq_tbl = freq_shuffle; - current_freqsel = freq_tbl[DRAM_DFS_SHUFFLE_1]; + + current_freqsel = freq_tbl[shuffle]; + params = &freq_params[shuffle]; init_dram(params, current_freqsel); do_calib(params, current_freqsel); + after_calib(); } diff --git a/src/soc/mediatek/mt8183/include/soc/emi.h b/src/soc/mediatek/mt8183/include/soc/emi.h index 15889eeca4..ab21bc7e12 100644 --- a/src/soc/mediatek/mt8183/include/soc/emi.h +++ b/src/soc/mediatek/mt8183/include/soc/emi.h @@ -20,10 +20,49 @@ #include #include +enum DRAMC_PARAM_SOURCE { + DRAMC_PARAM_SOURCE_SDRAM_INVALID = 0, + DRAMC_PARAM_SOURCE_SDRAM_CONFIG, + DRAMC_PARAM_SOURCE_FLASH, +}; + struct sdram_params { + u16 source; /* DRAMC_PARAM_SOURCE */ + u16 frequency; u8 wr_level[CHANNEL_MAX][RANK_MAX][DQS_NUMBER]; - u8 cbt_cs_dly[CHANNEL_MAX][RANK_MAX]; + + /* DUTY */ + s8 duty_clk_delay[CHANNEL_MAX]; + s8 duty_dqs_delay[CHANNEL_MAX][DQS_NUMBER]; + + /* CBT */ u8 cbt_final_vref[CHANNEL_MAX][RANK_MAX]; + u8 cbt_clk_dly[CHANNEL_MAX][RANK_MAX]; + u8 cbt_cmd_dly[CHANNEL_MAX][RANK_MAX]; + u8 cbt_cs_dly[CHANNEL_MAX][RANK_MAX]; + + /* Gating */ + u8 gating2T[CHANNEL_MAX][RANK_MAX][DQS_NUMBER]; + u8 gating05T[CHANNEL_MAX][RANK_MAX][DQS_NUMBER]; + u8 gating_fine_tune[CHANNEL_MAX][RANK_MAX][DQS_NUMBER]; + u8 gating_pass_count[CHANNEL_MAX][RANK_MAX][DQS_NUMBER]; + + /* TX perbit */ + u8 tx_vref[CHANNEL_MAX][RANK_MAX]; + u16 tx_center_min[CHANNEL_MAX][RANK_MAX][DQS_NUMBER]; + u16 tx_center_max[CHANNEL_MAX][RANK_MAX][DQS_NUMBER]; + u16 tx_win_center[CHANNEL_MAX][RANK_MAX][DQ_DATA_WIDTH]; + u16 tx_first_pass[CHANNEL_MAX][RANK_MAX][DQ_DATA_WIDTH]; + u16 tx_last_pass[CHANNEL_MAX][RANK_MAX][DQ_DATA_WIDTH]; + + /* datlat */ + u8 rx_datlat[CHANNEL_MAX][RANK_MAX]; + + /* RX perbit */ + u8 rx_vref[CHANNEL_MAX]; + s16 rx_firspass[CHANNEL_MAX][RANK_MAX][DQ_DATA_WIDTH]; + u8 rx_lastpass[CHANNEL_MAX][RANK_MAX][DQ_DATA_WIDTH]; + u32 emi_cona_val; u32 emi_conh_val; u32 emi_conf_val; @@ -46,7 +85,7 @@ int complex_mem_test(u8 *start, unsigned int len); size_t sdram_size(void); const struct sdram_params *get_sdram_config(void); void enable_emi_dcm(void); -void mt_set_emi(const struct sdram_params *params); -void mt_mem_init(const struct sdram_params *params); +void mt_set_emi(const struct sdram_params *freq_params); +void mt_mem_init(const struct sdram_params *freq_params); #endif /* SOC_MEDIATEK_MT8183_EMI_H */ diff --git a/src/soc/mediatek/mt8183/include/soc/memlayout.ld b/src/soc/mediatek/mt8183/include/soc/memlayout.ld index 73c880afbf..82e404f790 100644 --- a/src/soc/mediatek/mt8183/include/soc/memlayout.ld +++ b/src/soc/mediatek/mt8183/include/soc/memlayout.ld @@ -24,6 +24,8 @@ */ #define SRAM_L2C_START(addr) SYMBOL(sram_l2c, addr) #define SRAM_L2C_END(addr) SYMBOL(esram_l2c, addr) +#define DRAM_INIT_CODE(addr, size) \ + REGION(dram_init_code, addr, size, 4) SECTIONS { @@ -42,6 +44,7 @@ SECTIONS SRAM_L2C_START(0x00200000) OVERLAP_DECOMPRESSOR_VERSTAGE_ROMSTAGE(0x00201000, 188K) BOOTBLOCK(0x00230000, 64K) + DRAM_INIT_CODE(0x00240000, 256K) SRAM_L2C_END(0x00280000) DRAM_START(0x40000000) diff --git a/src/soc/mediatek/mt8183/memory.c b/src/soc/mediatek/mt8183/memory.c index b2c744198b..67f6c65363 100644 --- a/src/soc/mediatek/mt8183/memory.c +++ b/src/soc/mediatek/mt8183/memory.c @@ -19,12 +19,12 @@ #include #include -void mt_mem_init(const struct sdram_params *params) +void mt_mem_init(const struct sdram_params *freq_params) { u64 rank_size[RANK_MAX]; /* memory calibration */ - mt_set_emi(params); + mt_set_emi(freq_params); if (CONFIG(MEMORY_TEST)) { size_t r; -- cgit v1.2.3