/* * Copyright (C) 2011-2014 Apple Inc. All rights reserved. * * This document is the property of Apple Inc. * It is considered confidential and proprietary. * * This document may not be reproduced or transmitted in any form, * in whole or in part, without the express written permission of * Apple Inc. */ #include #include #include #include #include #include #include #include #include #include #include #include // PRBS7 patterns used for ca calibration. amc_phy_params.cacalib_sw_loops * amc_phy_params.cacalib_hw_loops must equal 64 // We extend the size by 8 and repeat the first 8 values because when swloop=64, we don't go outside the array for programming CACALPAT1-7 static const uint32_t CA_PRBS7_PATTERNS[CA_NUM_PATTERNS + 8] = { 0x2550B, 0xCF135, 0xC4342, 0x67BFF, 0x825A0, 0x1487E, 0x984EF, 0xEA43E, 0x0B277, 0xA388D, 0xE5E5F, 0x96DDe, 0x8CC91, 0x720D1, 0xE1649, 0xA8ACA, 0x466E2, 0x73381, 0x1A14F, 0xFEC40, 0x93698, 0x49C83, 0xEEC28, 0x35563, 0x692CE, 0xE4D0F, 0x6DAD8, 0xDAA1B, 0xA70AB, 0xDB94B, 0x5C7AD, 0x8DFC1, 0x897D7, 0xB70C3, 0x7DAB0, 0x7C9E0, 0x87EE6, 0xD186C, 0x04816, 0x3E714, 0xCAA73, 0x01350, 0xFB706, 0x5668A, 0xD507A, 0x3AF02, 0xF4D67, 0xCB923, 0xFA456, 0xAD18C, 0x836F0, 0xEEF78, 0xCE265, 0x3F444, 0x31D75, 0x575DA, 0x2E77C, 0x6C988, 0x21D1D, 0xF1621, 0x0E931, 0x668AF, 0x792A6, 0x42EF4, 0x2550B, 0xCF135, 0xC4342, 0x67BFF, 0x825A0, 0x1487E, 0x984EF, 0xEA43E, }; // PRBS patterns for Wrdq and Rddq (for the one after Wrdq) calibration static const uint32_t DQ_PRBS7_PATTERNS[DQ_NUM_PATTERNS] = { 0x85858585, 0x4a4a4a4a, 0x9a9a9a9a, 0x9e9e9e9e, 0xa1a1a1a1, 0x88888888, 0xffffffff, 0xcfcfcfcf, 0xd0d0d0d0, 0x04040404, 0x3f3f3f3f, 0x29292929, 0x77777777, 0x30303030, 0x1f1f1f1f, 0xd4d4d4d4, 0x3b3b3b3b, 0x16161616, 0x5e5e5e5e, 0x47474747, 0x2f2f2f2f, 0xcbcbcbcb, 0xefefefef, 0x2d2d2d2d, 0x48484848, 0x19191919, 0x68686868, 0xe4e4e4e4, 0x24242424, 0xc2c2c2c2, 0x65656565, 0x51515151, 0x71717171, 0x8c8c8c8c, 0xc0c0c0c0, 0xe6e6e6e6, 0xa7a7a7a7, 0x34343434, 0x20202020, 0xfdfdfdfd, 0x4c4c4c4c, 0x26262626, 0x41414141, 0x93939393, 0x14141414, 0xdddddddd, 0xb1b1b1b1, 0x6a6a6a6a, 0x67676767, 0xd2d2d2d2, 0x87878787, 0xc9c9c9c9, 0x6c6c6c6c, 0xdbdbdbdb, 0x0d0d0d0d, 0xb5b5b5b5, 0x55555555, 0x4e4e4e4e, 0xa5a5a5a5, 0xb7b7b7b7, 0xd6d6d6d6, 0xb8b8b8b8, 0xe0e0e0e0, 0x1b1b1b1b, 0xebebebeb, 0x12121212, 0x61616161, 0x6e6e6e6e, 0x58585858, 0xfbfbfbfb, 0xf0f0f0f0, 0xf9f9f9f9, 0x73737373, 0x0f0f0f0f, 0x36363636, 0xa3a3a3a3, 0x0b0b0b0b, 0x09090909, 0x8a8a8a8a, 0x7c7c7c7c, 0x39393939, 0x95959595, 0xa8a8a8a8, 0x02020202, 0x83838383, 0xf6f6f6f6, 0x45454545, 0xacacacac, 0x3d3d3d3d, 0xaaaaaaaa, 0x81818181, 0x75757575, 0xb3b3b3b3, 0xe9e9e9e9, 0x91919191, 0x97979797, 0x2b2b2b2b, 0xf4f4f4f4, 0xc6c6c6c6, 0x5a5a5a5a, 0x78787878, 0x06060606, 0xbcbcbcbc, 0xdfdfdfdf, 0x32323232, 0x9c9c9c9c, 0x22222222, 0x7e7e7e7e, 0xbabababa, 0x63636363, 0xedededed, 0xaeaeaeae, 0xbebebebe, 0x5c5c5c5c, 0xc4c4c4c4, 0xd9d9d9d9, 0x8e8e8e8e, 0x43434343, 0x10101010, 0xe2e2e2e2, 0x98989898, 0x1d1d1d1d, 0x57575757, 0xcdcdcdcd, 0x53535353, 0xf2f2f2f2, 0x7a7a7a7a, 0x85858585 }; static uint32_t ca_patterns_mask[CA_NUM_PATTERNS]; static int32_t mdllcode[AMC_NUM_CHANNELS][2]; // 1 for AMP_DQ and 1 for AMP_CA // Used to save calibration values for each bit per channel and rank for every iteration static int32_t ca_cal_per_loopchrnk_right[CA_MAX_LOOP_CHN_RNK][CA_NUM_BITS]; static int32_t ca_cal_per_loopchrnk_left[CA_MAX_LOOP_CHN_RNK][CA_NUM_BITS]; // ca data aggregated over all iterations static int32_t ca_cal_per_chrnk_right[AMC_NUM_CHANNELS * AMC_NUM_RANKS][CA_NUM_BITS]; static int32_t ca_cal_per_chrnk_left[AMC_NUM_CHANNELS * AMC_NUM_RANKS][CA_NUM_BITS]; // rddq data aggregated over all iterations static int32_t rddq_cal_per_chrnk_right[AMC_NUM_CHANNELS * AMC_NUM_RANKS][DQ_TOTAL_BITS]; static int32_t rddq_cal_per_chrnk_left[AMC_NUM_CHANNELS * AMC_NUM_RANKS][DQ_TOTAL_BITS]; // wrdq data aggregated over all iterations static int32_t wrdq_cal_per_chrnk_right[AMC_NUM_CHANNELS * AMC_NUM_RANKS][DQ_TOTAL_BITS]; static int32_t wrdq_cal_per_chrnk_left[AMC_NUM_CHANNELS * AMC_NUM_RANKS][DQ_TOTAL_BITS]; // wrlvl data aggregated over all iterations, we save value for 4 byte lanes + the ca value static int32_t wrlvl_cal_per_chrnk_rise[AMC_NUM_CHANNELS * AMC_NUM_RANKS][DQ_NUM_BYTES + 1]; static int32_t wrlvl_cal_per_chrnk_fall[AMC_NUM_CHANNELS * AMC_NUM_RANKS][DQ_NUM_BYTES + 1]; // This array will hold the contents of memory that will be used for dq calibration static uint8_t dqcal_saved_data[AMC_NUM_RANKS][sizeof(DQ_PRBS7_PATTERNS) * AMC_NUM_CHANNELS]__aligned(32); // This array will hold the calibration values to be saved to the PMU for resume boot static uint8_t cal_pmu_bits[CALIB_PMU_BYTES] = { 0 }; // Static local function declarations static void calibrate_ca(void); static void calibrate_rddq(bool after_wrddqcal); static void calibrate_wrlvl(void); static void calibrate_wrdq(void); static void save_masterdll_values(void); static void generate_ca_patterns_mask(void); static void amp_program_ca_patterns(uint32_t ch, uint32_t rnk, uint32_t swloop); static void amp_init_ca_offset_and_deskew(uint32_t ch); static uint32_t amp_mask_ca_bits(uint32_t ch, uint32_t mr_cmd); static void amp_push_casdll_out(uint32_t ch, int32_t offset); static void amp_enable_cacal_mode(bool enable, uint32_t ch); static void amp_run_cacal(uint32_t ch); static void amp_push_ctl_out(uint32_t ch, uint32_t dly_val); static void amp_setup_rddq_cal(uint32_t ch, uint32_t rnk); static void amp_set_rddq_sdll(uint32_t ch, uint32_t byte, uint32_t offset); static void amp_run_rddqcal(uint32_t ch); static void amp_wrlvl_init(void); static void amp_phy_update(uint32_t ch, uint32_t update); static void amp_set_cawrlvl_sdll(uint32_t ch, uint32_t offset, bool set_dly_sel); static void amp_set_dqwrlvl_sdll(uint32_t ch, uint32_t byte, uint32_t offset, bool set_dly_sel); static void amp_run_wrlvlcal(uint32_t ch, uint32_t wrlvlrun); static void amp_set_wrdq_sdll(uint32_t ch, uint32_t byte, int32_t offset); static void run_cacal_sequence(uint32_t ch, uint32_t rnk, uint32_t mr_cmd, uint32_t mask_bits, uint32_t swloop); static void find_cacal_right_side_failing_point(uint32_t ch, uint32_t rnk, uint32_t mr_cmd, uint32_t combined_mask, uint32_t swloop); static void find_cacal_right_side_passing_point(uint32_t ch, uint32_t rnk, uint32_t mr_cmd, uint32_t combined_mask, uint32_t swloop); static void enter_cacal_mode(uint32_t ch, uint32_t rnk, uint32_t mr_cmd, bool enter); static void find_cacal_left_side_failing_point(uint32_t ch, uint32_t rnk, uint32_t combined_mask, uint32_t swloop); static void find_cacal_left_side_passing_point(uint32_t ch, uint32_t rnk, uint32_t combined_mask, uint32_t swloop); static void ca_program_final_values(void); static void find_rddqcal_right_side_failing_point(uint32_t ch, uint32_t rnk, bool after_wrddqcal); static void find_rddqcal_right_side_passing_point(uint32_t ch, uint32_t rnk, int32_t *start_b, bool after_wrddqcal); static void find_rddqcal_left_side_failing_point(uint32_t ch, uint32_t rnk, bool after_wrddqcal); static void find_rddqcal_left_side_passing_point(uint32_t ch, uint32_t rnk, int32_t *start_b, bool after_wrddqcal); static void rddq_program_final_values(bool after_wrddqcal); static uint32_t wrlvl_encode_dlyval(uint32_t ch, uint32_t phy_type, uint32_t val); static uint32_t wrlvl_encode_clk90dly(uint32_t ch, uint32_t val); static void push_wrlvl_to_0s_region(uint32_t ch, uint32_t rnk); static void find_wrlvl_0to1_transition(uint32_t ch, uint32_t rnk); static void find_wrlvl_1to0_transition(uint32_t ch, uint32_t rnk); static void wrlvl_program_final_values(void); static void find_wrdqcal_right_side_failing_point(uint32_t ch, uint32_t rnk); static void find_wrdqcal_right_side_passing_point(uint32_t ch, uint32_t rnk, int32_t *start_b); static void find_wrdqcal_left_side_failing_point(uint32_t ch, uint32_t rnk); static void find_wrdqcal_left_side_passing_point(uint32_t ch, uint32_t rnk, int32_t *start_b); static void wrdq_program_final_values(void); static uint32_t wr_rd_pattern_result(uint32_t ch, uint32_t rnk, uint32_t sdll_value); static void save_restore_ca_wrlvl_regs(uint32_t save_or_restore); static void save_restore_memory_region(bool dqcal_start); static int32_t find_center_of_eye(int32_t left_pos_val, int32_t right_pos_val); static int32_t find_common_endpoint(int32_t val0, int32_t val1, uint32_t min_or_max); /////////////////////////////////////////////////////////////////////////////// ////// Global functions /////////////////////////////////////////////////////////////////////////////// #ifdef AMP_SWIZZLE void amp_swizzle_init(void) { #if (AMP_SWIZZLE == AMP_SWIZZLE_PER_J34M) // per int ch; for (ch = 0; ch < AMC_NUM_CHANNELS; ch++) { // Set up the CA Byte Select Mapping for both DQ and CA rAMP_CACALBYTESEL(AMP_DQ,ch) = 0x00000001; rAMP_CACALBYTESEL(AMP_CA,ch) = 0x00000001; // Set up the CA Bit Select Bit Mapping for both DQ and CA rAMP_CACALBITSELMAP(AMP_DQ,ch,0) = 0x01234567; rAMP_CACALBITSELMAP(AMP_DQ,ch,1) = 0xabcdef67; rAMP_CACALBITSELMAP(AMP_DQ,ch,2) = 0x0000ef89; rAMP_CACALBITSELMAP(AMP_CA,ch,0) = 0x01234567; rAMP_CACALBITSELMAP(AMP_CA,ch,1) = 0xabcdef67; rAMP_CACALBITSELMAP(AMP_CA,ch,2) = 0x0000ef89; } #endif // end of (AMP_SWIZZLE == AMP_SWIZZLE_PER_J34M) } #endif void amc_phy_init(bool resume) { uint32_t ch, f; #if !SUPPORT_FPGA uint32_t rd, dq; #endif for (ch = 0; ch < AMC_NUM_CHANNELS; ch++) { rAMP_DLLUPDTCTRL(AMP_DQ, ch) = 0x00017307; rAMP_DLLUPDTCTRL(AMP_CA, ch) = 0x00017307; } amc_phy_enable_dqs_pulldown(false); #ifdef AMP_SWIZZLE amp_swizzle_init(); #endif for (ch = 0; ch < AMC_NUM_CHANNELS; ch++) { rAMP_AMPEN(AMP_DQ, ch) = 1; rAMP_AMPEN(AMP_CA, ch) = 1; #if !SUPPORT_FPGA rAMP_DQDQSDS(ch) = amc_phy_params.drive_strength; rAMP_NONDQDS(ch) = amc_phy_params.drive_strength; #endif for (f = 0; f < AMP_FREQUENCY_SLOTS; f++) { rAMP_DIFFMODE_FREQ(ch, f) = 0x00000121; } #if !SUPPORT_FPGA for (rd = 0; rd < AMP_MAX_RD; rd++) { for (dq = 0; dq < AMP_MAX_DQ; dq++) { rAMP_RDDQDESKEW_CTRL(ch, rd, dq) = 0x00000006; } } rAMP_DLLLOCKTIM(AMP_DQ, ch) = 0x000d0013; rAMP_DLLLOCKTIM(AMP_CA, ch) = 0x000d0013; #endif for (f = 0; f < AMP_FREQUENCY_SLOTS; f++) { rAMP_DQSINDLLSCL_FREQ(ch, f) = amc_phy_params.freq[f].dqsindllscl; rAMP_CAOUTDLLSCL_FREQ(ch, f) = amc_phy_params.freq[f].caoutdllscl; } for (f = 0; f < AMP_FREQUENCY_SLOTS; f++) { rAMP_RDCAPCFG_FREQ(AMP_DQ, ch, f) = amc_phy_params.freq[f].rdcapcfg; } #if !SUPPORT_FPGA rAMP_DLLUPDTCTRL(AMP_DQ, ch) = 0x00017507; rAMP_DLLUPDTCTRL(AMP_CA, ch) = 0x00017507; if (!amc_phy_params.imp_auto_cal) { rAMP_IMPAUTOCAL(AMP_DQ, ch) = 0x000103ac; rAMP_IMPAUTOCAL(AMP_CA, ch) = 0x000103ac; } else { rAMP_IMPAUTOCAL(AMP_DQ, ch) = amc_phy_params.imp_auto_cal; rAMP_IMPAUTOCAL(AMP_CA, ch) = amc_phy_params.imp_auto_cal; } #endif #if !SUPPORT_FPGA // Keep DLL update interval to 0, it will restored after write calibration. // CoreOS: M7 init sequence change #if SUB_PLATFORM_S7002 rAMP_DLLUPDTINTVL(AMP_DQ, ch) = 0x10200000; rAMP_DLLUPDTINTVL(AMP_CA, ch) = 0x10200000; #else rAMP_DLLUPDTINTVL(AMP_DQ, ch) = 0x1020005a; rAMP_DLLUPDTINTVL(AMP_CA, ch) = 0x1020005a; #endif # ifdef rAMP_MDLLFREQBINDISABLE rAMP_MDLLFREQBINDISABLE(AMP_DQ, ch) = 0x00000008; rAMP_MDLLFREQBINDISABLE(AMP_CA, ch) = 0x00000008; # endif #else rAMP_DLLUPDTINTVL(AMP_DQ, ch) = 0; rAMP_DLLUPDTINTVL(AMP_CA, ch) = 0; # ifdef rAMP_MDLLFREQBINDISABLE rAMP_MDLLFREQBINDISABLE(AMP_DQ, ch) = 0x0000000F; rAMP_MDLLFREQBINDISABLE(AMP_CA, ch) = 0x0000000F; # endif #endif #if !SUPPORT_FPGA rAMP_DLLEN(AMP_DQ, ch) = 0x00000100; rAMP_DLLEN(AMP_CA, ch) = 0x00000100; rAMP_DLLEN(AMP_DQ, ch) = 0x00000101; rAMP_DLLEN(AMP_CA, ch) = 0x00000101; rAMP_DLLEN(AMP_DQ, ch) = 0x00000100; rAMP_DLLEN(AMP_CA, ch) = 0x00000100; amc_phy_run_dll_update(ch); #endif rAMP_AMPINIT(AMP_DQ, ch) = 0x00000001; rAMP_AMPINIT(AMP_CA, ch) = 0x00000001; #if !SUPPORT_FPGA rAMP_IMPCALCMD(AMP_CA, ch) = 0x00000101; rAMP_IMPCALCMD(AMP_DQ, ch) = 0x00000101; while (rAMP_IMPCALCMD(AMP_CA, ch) & 0x1) {} while (rAMP_IMPCALCMD(AMP_DQ, ch) & 0x1) {} // Wait 5 us after Impedence Calibration to avoid McPhyPending // preventing the SRFSM from exiting SR. spin(5); #endif } #if !SUPPORT_FPGA #ifndef AMP_CALIBRATION_SKIP // Restore CA and WrLvl offsets from PMU if (resume) save_restore_ca_wrlvl_regs(CALIB_RESTORE); #endif #endif } void amc_phy_enable_dqs_pulldown(bool enable) { // Stub because H6 init sequence does not recommend doing this } void amc_phy_scale_dll(int freqsel, int factor) { } void amc_phy_run_dll_update(uint8_t ch) { rAMP_DLLUPDTCMD(AMP_DQ, ch) = 0x00000001; rAMP_DLLUPDTCMD(AMP_CA, ch) = 0x00000001; while ((rAMP_DLLUPDTCMD(AMP_DQ, ch) & 0x1) != 0) ; while ((rAMP_DLLUPDTCMD(AMP_CA, ch) & 0x1) != 0) ; } void amc_phy_bypass_prep(int step) { } void amc_phy_finalize() { } // Perform CA, RDDQ, and WRLVL calibration void amc_phy_calibration_ca_rddq_cal(bool resume) { #ifndef AMP_CALIBRATION_SKIP if ((amc_phy_params.cacalib_hw_loops * amc_phy_params.cacalib_sw_loops) != CA_NUM_PATTERNS) panic("Memory calibration: hwloops (%d) and swloops (%d) values are unexpected\n", amc_phy_params.cacalib_hw_loops, amc_phy_params.cacalib_sw_loops); amc_calibration_start(true); if (!resume) calibrate_ca(); /* * The first RdDq Cal is using MRR32 and MRR40. It's needed for WrDq calibration. Hence run before WrDq. * The second Rd Dq calibration is PRBS pattern based, which needs Wr Dq calibration done. Hence done after WrDq. * PRBS patterns help in reducing aliasing, hence needed for better accuracy. */ calibrate_rddq(false); if (!resume) { calibrate_wrlvl(); // Save off the CA and WrLvl offsets to PMU save_restore_ca_wrlvl_regs(CALIB_SAVE); } amc_calibration_start(false); #endif } void amc_phy_calibration_wrdq_cal(bool resume) { #ifndef AMP_CALIBRATION_SKIP if (resume) save_restore_memory_region(true); amc_calibration_start(true); // ok to keep PSQWQCTL0 and PSQWQCTL1 at their value setup for wrdqcal even for the rddqcal that follows amc_wrdqcal_start(true); calibrate_wrdq(); /* * The first RdDq Cal is using MRR32 and MRR40. It's needed for WrDq calibration. Hence run before WrDq. * The second Rd Dq calibration is PRBS pattern based, which needs Wr Dq calibration done. Hence done after WrDq. * PRBS patterns help in reducing aliasing, hence needed for better accuracy. */ calibrate_rddq(true); amc_wrdqcal_start(false); amc_calibration_start(false); if (resume) save_restore_memory_region(false); #endif uint8_t ch; for (ch = 0; ch < AMC_NUM_CHANNELS; ch++) { // Restore AMP DLL update interval to POR value // CoreOS: M7 init sequence change #if !SUPPORT_FPGA #if SUB_PLATFORM_S7002 rAMP_DLLUPDTINTVL(AMP_DQ, ch) = 0x1020005a; rAMP_DLLUPDTINTVL(AMP_CA, ch) = 0x1020005a; #endif #endif // Enable AMP clock gating only after Wrdqcal is done rAMP_AMPCLK(AMP_DQ, ch) = 0x00010000; rAMP_AMPCLK(AMP_CA, ch) = 0x00010000; } } /////////////////////////////////////////////////////////////////////////////// ////// Local functions /////////////////////////////////////////////////////////////////////////////// // To dump calibration results from iBoot menu command static int dump_mem_calibration_info(int argc, struct cmd_arg *args) { uint32_t ch, byte, bit; dprintf(DEBUG_INFO, "Memory calibration results\n"); for (ch = 0; ch < AMC_NUM_CHANNELS; ch++) { dprintf(DEBUG_INFO, "Channel %d\n", ch); dprintf(DEBUG_INFO, "\tCA SDLL: 0x%02x\n", rAMP_CASDLLCTRL(ch) & DLLVAL_BITS); dprintf(DEBUG_INFO, "\t\tPer Bit Deskew: "); for (bit = 0; bit < CA_NUM_BITS; bit++) dprintf(DEBUG_INFO, "0x%02x ", rAMP_CADESKEW_CTRL(ch, bit) & DESKEW_CTRL_BITS); dprintf(DEBUG_INFO, "\n\t\tCS, CK, CKE Deskew: 0x%02x", rAMP_CKDESKEW_CTRL(ch) & DESKEW_CTRL_BITS); dprintf(DEBUG_INFO, "\n"); dprintf(DEBUG_INFO, "\tCA WrLvlSDLL: 0x%02x\n", rAMP_CAWRLVLSDLLCODE(ch) & DLLVAL_BITS); dprintf(DEBUG_INFO, "\tDQ WrLvlSDLL: "); for (byte = 0; byte < DQ_NUM_BYTES; byte++) dprintf(DEBUG_INFO, "0x%02x ", rAMP_DQWRLVLSDLLCODE(ch, byte) & DLLVAL_BITS); dprintf(DEBUG_INFO, "\n"); dprintf(DEBUG_INFO, "\tRead DQ:\n"); for (byte = 0; byte < DQ_NUM_BYTES; byte++) { dprintf(DEBUG_INFO, "\t\tByte %d SDLL: 0x%02x\n", byte, rAMP_DQSDLLCTRL_RD(ch, byte) & DLLVAL_BITS); dprintf(DEBUG_INFO, "\t\t\tPer Bit Deskew: "); for (bit = 0; bit < DQ_NUM_BITS_PER_BYTE; bit++) dprintf(DEBUG_INFO, "0x%02x ", rAMP_RDDQDESKEW_CTRL(ch, byte, bit) & DESKEW_CTRL_BITS); dprintf(DEBUG_INFO, "\n"); } dprintf(DEBUG_INFO, "\tWrite DQ:\n"); for (byte = 0; byte < DQ_NUM_BYTES; byte++) { dprintf(DEBUG_INFO, "\t\tByte %d SDLL: 0x%02x\n", byte, rAMP_DQSDLLCTRL_WR(ch, byte) & DLLVAL_BITS); dprintf(DEBUG_INFO, "\t\t\tPer Bit Deskew: "); for (bit = 0; bit < DQ_NUM_BITS_PER_BYTE; bit++) dprintf(DEBUG_INFO, "0x%02x ", rAMP_WRDQDESKEW_CTRL(ch, byte, bit) & DESKEW_CTRL_BITS); dprintf(DEBUG_INFO, "\n"); } } return 0; } MENU_COMMAND_DEBUG(memcal_info, dump_mem_calibration_info, "Prints memory calibration results", NULL); static void calibrate_ca(void) { uint32_t ch, rnk, swloop, mask_bits; generate_ca_patterns_mask(); // Required since the dll values may change slightly during calibration save_masterdll_values(); // Calibration sequence is to be run for each rank in each channel, amc_phy_params.cacalib_sw_loops number of times for (ch = 0; ch < AMC_NUM_CHANNELS; ch++) { for (rnk = 0; rnk < AMC_NUM_RANKS; rnk++) { for (swloop = 0; swloop < amc_phy_params.cacalib_sw_loops; swloop++) { amp_program_ca_patterns(ch, rnk, swloop); amp_init_ca_offset_and_deskew(ch); // Training of CA Bits 0-3 and 5-8: MR41 cmd (training cmd must be sent before cacalibmode is enabled in AMP) mask_bits = amp_mask_ca_bits(ch, MR41); amc_mrcmd_to_ch_rnk(MR_WRITE, ch, rnk, MR41, MR41 << 2); amp_enable_cacal_mode(true, ch); run_cacal_sequence(ch, rnk, MR41, mask_bits, swloop); amp_enable_cacal_mode(false, ch); amp_init_ca_offset_and_deskew(ch); // Training of CA Bits 4 and 9: MR48 cmd (training cmd must be sent before cacalibmode is enabled in AMP) mask_bits = amp_mask_ca_bits(ch, MR48); amc_mrcmd_to_ch_rnk(MR_WRITE, ch, rnk, MR48, MR48 << 2); amp_enable_cacal_mode(true, ch); run_cacal_sequence(ch, rnk, MR48, mask_bits, swloop); amp_enable_cacal_mode(false, ch); amp_init_ca_offset_and_deskew(ch); // Exit CA Training mode: MR42 amc_mrcmd_to_ch_rnk(MR_WRITE, ch, rnk, MR42, MR42 << 2); } } } // By now, we have compiled right and left edges of passing window for all CA bits over a number of iterations // Aggregate the results, and find the center point of the window, and program it ca_program_final_values(); } static void calibrate_rddq(bool after_wrddqcal) { uint32_t ch, rnk, data; // step7 if (after_wrddqcal == false) { for (ch = 0; ch < AMC_NUM_CHANNELS; ch++) { for (rnk = 0; rnk < AMC_NUM_RANKS; rnk++) { amp_setup_rddq_cal(ch, rnk); amc_mrcmd_to_ch_rnk(MR_READ, ch, rnk, MR5, (uintptr_t)&data); amc_enable_rddqcal(true); // Find the left and right edges of the eye find_rddqcal_right_side_failing_point(ch, rnk, false); find_rddqcal_left_side_failing_point(ch, rnk, false); amc_enable_rddqcal(false); } } } // step10 else { for (ch = 0; ch < AMC_NUM_CHANNELS; ch++) { for (rnk = 0; rnk < AMC_NUM_RANKS; rnk++) { // Find the left and right edges of the eye using PRBS patterns // These results will be more accurate find_rddqcal_right_side_failing_point(ch, rnk, true); find_rddqcal_left_side_failing_point(ch, rnk, true); } } } // Now that we have per bit left and right endpoints for each channel and rank, aggregate and program final values rddq_program_final_values(after_wrddqcal); } // Align the clock signal with the DQ signals static void calibrate_wrlvl(void) { uint32_t ch, rnk; uint32_t data, cawrlvlsdll; amp_wrlvl_init(); for (ch = 0; ch < AMC_NUM_CHANNELS; ch++) { cawrlvlsdll = rAMP_CAWRLVLSDLLCODE(ch); for (rnk = 0; rnk < AMC_NUM_RANKS; rnk++) { data = 0x80 + RD_LATENCY_ENCODE; // 0x80 is added here to set the Write Level bit (bit 7) to 1 amc_mrcmd_to_ch_rnk(MR_WRITE, ch, rnk, MR2, data); // find the region where all bits return a 0 push_wrlvl_to_0s_region(ch, rnk); // push out the clock signal until all bits return a 1 find_wrlvl_0to1_transition(ch, rnk); // now go back towards the transition edge found earlier, but from this side of the edge find_wrlvl_1to0_transition(ch, rnk); // reset cawrlvlsdllcode to original value (0), before sending cmd to exit wrlvl mode (MR2) amp_set_cawrlvl_sdll(ch, cawrlvlsdll, false); data = RD_LATENCY_ENCODE; amc_mrcmd_to_ch_rnk(MR_WRITE, ch, rnk, MR2, data); } } // Program the final wrlvl values wrlvl_program_final_values(); } static void calibrate_wrdq(void) { uint32_t ch, rnk; for (ch = 0; ch < AMC_NUM_CHANNELS; ch++) { for (rnk = 0; rnk < AMC_NUM_RANKS; rnk++) { find_wrdqcal_right_side_failing_point(ch, rnk); find_wrdqcal_left_side_failing_point(ch, rnk); } } wrdq_program_final_values(); } static void save_masterdll_values(void) { uint32_t ch; for (ch = 0; ch < AMC_NUM_CHANNELS; ch++) { mdllcode[ch][AMP_DQ] = (rAMP_MDLLCODE(AMP_DQ, ch) & DLLVAL_BITS); mdllcode[ch][AMP_CA] = (rAMP_MDLLCODE(AMP_CA, ch) & DLLVAL_BITS); } } static void generate_ca_patterns_mask(void) { uint32_t index, patr, patf = 0; uint32_t mask = 0; // generate the pattern to be used for each CA calibration iteration for (index = 0; index < (amc_phy_params.cacalib_sw_loops * amc_phy_params.cacalib_hw_loops); index++) { patr = (CA_PRBS7_PATTERNS[index]) & CA_ALL_BITS; patf = (CA_PRBS7_PATTERNS[index]) >> CA_NUM_BITS; mask = patr ^ patf; ca_patterns_mask[index] = mask; } } static void amp_program_ca_patterns(uint32_t ch, uint32_t rnk, uint32_t swloop) { uint32_t cacalctrl, p; // Program rank, hardware loop count, and timing params // Timing params are taken from lpddr3 jedec spec cacalctrl = (rnk << 24) | ((amc_phy_params.cacalib_hw_loops - 1) << 16) | (16 << 8) | (10 << 0); rAMP_CACALCTRL(AMP_DQ, ch) = cacalctrl; rAMP_CACALCTRL(AMP_CA, ch) = cacalctrl; for (p = 0; p < AMP_MAX_PATTERNS; p++) { rAMP_CACALPAT(AMP_DQ, ch, p) = CA_PRBS7_PATTERNS[(swloop * amc_phy_params.cacalib_hw_loops) + p]; rAMP_CACALPAT(AMP_CA, ch, p) = CA_PRBS7_PATTERNS[(swloop * amc_phy_params.cacalib_hw_loops) + p]; } } // (Re-)Initialize ca offset and deskew registers static void amp_init_ca_offset_and_deskew(uint32_t ch) { uint8_t d; int32_t camdllcode = mdllcode[ch][AMP_CA]; // ensure negative sign is set with mdllcode value for ca offset (mdllcode guaranteed by designers not to be negative) amp_push_casdll_out(ch, (-1 * camdllcode)); // Clear cadeskewctrl registers for (d = 0; d < CA_NUM_BITS; d++) rAMP_CADESKEW_CTRL(ch, d) = 0; } static uint32_t amp_mask_ca_bits(uint32_t ch, uint32_t mr_cmd) { uint32_t mask_bits; // Assuming no byte swizzling if (mr_cmd == MR41) { // MR41: Mask out bits 9 and 4 mask_bits = 0x210; } else if (mr_cmd == MR48) { // MR48: Mask out bits 0-3 and bits 5-8 mask_bits = 0x1EF; } else { // No bits are masked out mask_bits = 0; } rAMP_CACALMASK(AMP_DQ, ch) = mask_bits; rAMP_CACALMASK(AMP_CA, ch) = mask_bits; return mask_bits; } static void amp_push_casdll_out(uint32_t ch, int32_t offset) { uint32_t ca_bit; uint32_t cadeskewcode; int32_t camdllcode = mdllcode[ch][AMP_CA]; if (offset > 0) { // New equation given by Rakesh: if offset is within DELIMIT_POS_ADJ_CASDLL steps of camdllcode, limit it to (master dll - DELIMIT_POS_ADJ_CASDLL) if (offset >= (camdllcode - DELIMIT_POS_ADJ_CASDLL)) { uint8_t difference = (uint8_t) (offset - (camdllcode - DELIMIT_POS_ADJ_CASDLL)); offset = camdllcode - DELIMIT_POS_ADJ_CASDLL; if (difference >= MAX_DESKEW_PROGRAMMED) cadeskewcode = MAX_DESKEW_PROGRAMMED; else cadeskewcode = difference; // Adjust deskew registers for each ca bit for (ca_bit = 0; ca_bit < CA_NUM_BITS; ca_bit++) rAMP_CADESKEW_CTRL(ch, ca_bit) = cadeskewcode; } } rAMP_CASDLLCTRL(ch) = (1 << 24) | INT_TO_OFFSET(offset); while (rAMP_CASDLLCTRL(ch) & (1 << 24)); } static void amp_enable_cacal_mode(bool enable, uint32_t ch) { // Set or clear CACalMode bit if (enable) rAMP_CACALRUN(AMP_CA, ch) |= CACALRUN_CACALMODE; else rAMP_CACALRUN(AMP_CA, ch) &= ~CACALRUN_CACALMODE; } static void amp_run_cacal(uint32_t ch) { // DQ must be set before CA rAMP_CACALRUN(AMP_DQ, ch) |= CACALRUN_RUNCACAL; // CACalMode should already be set rAMP_CACALRUN(AMP_CA, ch) |= CACALRUN_RUNCACAL; // Poll on the DQ register while(rAMP_CACALRUN(AMP_DQ, ch) & CACALRUN_RUNCACAL); } static void amp_push_ctl_out(uint32_t ch, uint32_t dly_val) { uint32_t cadramsigdly; rAMP_TESTMODE(AMP_CA, ch) = TESTMODE_FORCECKELOW; // Fix for Radar 10790574 - Hold Violation on CKE if (dly_val >= 0xd) cadramsigdly = (3 << 4); else if (dly_val >= 0xa) cadramsigdly = (2 << 4); else if (dly_val >= 0x8) cadramsigdly = (1 << 4); else cadramsigdly = (0 << 4); rAMP_DRAMSIGDLY(AMP_CA, ch, 0) = cadramsigdly; rAMP_CSDESKEW_CTRL(ch) = dly_val; rAMP_CKDESKEW_CTRL(ch) = dly_val; rAMP_CKEDESKEW_CTRL(ch) = dly_val; rAMP_TESTMODE(AMP_CA, ch) = 0; } static void amp_setup_rddq_cal(uint32_t ch, uint32_t rnk) { // At this point the AMC's READLEVELING should already be setup as 0x00000300 // Make DQCALCTRL.DQCalPatSel (bits 1:0) match READLEVELING.RdLvlPatOpt rAMP_DQCALCTRL(ch) = (rnk << 16) | (RDDQ_LOOPCNT << 8) | (3 << 0); } // This functions set the slave dll for a particular byte lane of RDDQ as specified in the offset parameter static void amp_set_rddq_sdll(uint32_t ch, uint32_t byte, uint32_t offset) { rAMP_DQSDLLCTRL_RD(ch, byte) = (1 << 24) | offset; // Wait for Run bit to clear while(rAMP_DQSDLLCTRL_RD(ch, byte) & (1 << 24)); } static void amp_run_rddqcal(uint32_t ch) { rAMP_DQCALRUN(ch) = 1; while (rAMP_DQCALRUN(ch) & 1); } static void amp_wrlvl_init(void) { uint32_t ch; for (ch = 0; ch < AMC_NUM_CHANNELS; ch++) { // Write Leveling Timing Control Registers to program tMRD and tWLO timing params // Taking these values from jedec_lpddr3s8_4gb_x32_1600.soma // tWLO has a max value of 20ns for 1600 freq // tWLMRD has a min value of 40tck for 1600 freq rAMP_DQWRLVLTIM(ch) = (12 << 8) | (16 << 0); } } // follow a certain sequence required when wrlvl dlysel is to be updated static void amp_phy_update(uint32_t ch, uint32_t update) { // Release CKE and disable phyupdt to allow normal operation if (!update) rAMP_TESTMODE(AMP_CA, ch) = 0; // issue phyupdt to block AMC traffic rAMP_CAPHYUPDTCRTL(ch) = update; // wait for the phyupdt change to take effect. there is only 1 bit in the status reg: bit 0. while((rAMP_CAPHYUPDTSTATUS(ch) & 1) != (update & 1)); // CKE must be low when updating dlysel to avoid glitches if (update) rAMP_TESTMODE(AMP_CA, ch) = TESTMODE_FORCECKELOW; } // Must ensure that WrLvL SDLLs are programmed with all precautions to avoid glitch on clock signals static void amp_set_cawrlvl_sdll(uint32_t ch, uint32_t offset, bool set_dly_sel) { if (set_dly_sel) { // Must send phyupdt to AMC to avoid traffic while CKE is low amp_phy_update(ch, 1); // Ok to set directly to final value (instead of incrementing) since CKE is low rAMP_CAWRLVLSDLLCODE(ch) = offset; /* * Since M7 memory clock much slower than Alcatraz, need to toggle phyupdt to ensure * refreshes that have piled up due to phyupdt being set are flushed out */ if (amc_phy_params.wrlvl_togglephyupdt) { amp_phy_update(ch, 0); amp_phy_update(ch, 1); } // program the dlysel value with CKE low to avoid glitches to DRAM rAMP_CAWRLVLCLKDLYSEL(ch) = wrlvl_encode_dlyval(ch, AMP_CA, offset); // disable phyupdt and release CKE back to high amp_phy_update(ch, 0); } else { uint32_t cawrlvlsdll = rAMP_CAWRLVLSDLLCODE(ch); int32_t step = (cawrlvlsdll < offset) ? 1 : -1; // when CKE is not low, need to step by 1 to avoid glitches to DRAM for ( ; cawrlvlsdll != offset; cawrlvlsdll += step) rAMP_CAWRLVLSDLLCODE(ch) = cawrlvlsdll + step; } } // Must ensure that WrLvL SDLLs are programmed with all precautions to avoid glitches static void amp_set_dqwrlvl_sdll(uint32_t ch, uint32_t byte, uint32_t offset, bool set_dly_sel) { if (set_dly_sel) { uint32_t dqwrlvldlychainctrl = rAMP_DQWRLVLDLYCHAINCTRL(ch, byte); // Must send phyupdt to AMC to avoid traffic while CKE is low amp_phy_update(ch, 1); // Ok to set directory final value (instead of incrementing) since CKE is low rAMP_DQWRLVLSDLLCODE(ch, byte) = offset; /* * Since M7 memory clock much slower than Alcatraz, need to toggle phyupdt to ensure * refreshes that have piled up due to phyupdt being set are flushed out */ if (amc_phy_params.wrlvl_togglephyupdt) { amp_phy_update(ch, 0); amp_phy_update(ch, 1); } // program dlysel (also, preserve bits 17:16) with CKE low to avoid glitches to DRAM rAMP_DQWRLVLDLYCHAINCTRL(ch, byte) = (dqwrlvldlychainctrl & 0x00030000) | wrlvl_encode_dlyval(ch, AMP_DQ, offset); // disable phyupdt and release CKE back to high amp_phy_update(ch, 0); } else { uint32_t dqwrlvlsdll = rAMP_DQWRLVLSDLLCODE(ch, byte); int32_t step = (dqwrlvlsdll < offset) ? 1 : -1; // when CKE is not low, need to step by 1 to avoid glitches to DRAM for ( ; dqwrlvlsdll != offset; dqwrlvlsdll += step) rAMP_DQWRLVLSDLLCODE(ch, byte) = dqwrlvlsdll + step; } } static void amp_run_wrlvlcal(uint32_t ch, uint32_t wrlvlrun) { rAMP_DQWRLVLRUN(ch) = wrlvlrun; while(rAMP_DQWRLVLRUN(ch)); } // This functions set the slave dll for a particular byte lane of WRDQ as specified in the offset parameter static void amp_set_wrdq_sdll(uint32_t ch, uint32_t byte, int32_t offset) { uint32_t dq_bit; uint32_t dqdeskewcode; int32_t dqmdllcode = mdllcode[ch][AMP_DQ]; if (offset > 0) { // New equation given by Rakesh: if offset is within DELIMIT_POS_ADJ_WRDQSDLL steps of dqmdllcode, limit it to (master dll - DELIMIT_POS_ADJ_WRDQSDLL) if (offset >= (dqmdllcode - DELIMIT_POS_ADJ_WRDQSDLL)) { uint8_t difference = (uint8_t) (offset - (dqmdllcode - DELIMIT_POS_ADJ_WRDQSDLL)); offset = dqmdllcode - DELIMIT_POS_ADJ_WRDQSDLL; if (difference >= DQ_MAX_DESKEW_PER_BIT) dqdeskewcode = DQ_MAX_DESKEW_PER_BIT; else dqdeskewcode = difference; // Adjust deskew registers for each dq bit for (dq_bit = 0; dq_bit < DQ_NUM_BITS_PER_BYTE; dq_bit++) rAMP_WRDQDESKEW_CTRL(ch, byte, dq_bit) = dqdeskewcode; // Also update the Data Mask (DM), controlled by the DQSDESKEW register rAMP_WRDQSDESKEW_CTRL(ch, byte) = dqdeskewcode; } // set wrlvlclk90dly (bits 17:16 of wrlvldlychainctrl reg) if positive sdll value rAMP_DQWRLVLDLYCHAINCTRL(ch, byte) = (wrlvl_encode_clk90dly(ch, offset) << 16) | (rAMP_DQWRLVLDLYCHAINCTRL(ch, byte) & 0x3); } rAMP_DQSDLLCTRL_WR(ch, byte) = (1 << 24) | INT_TO_OFFSET(offset); // Wait for Run bit to clear while(rAMP_DQSDLLCTRL_WR(ch, byte) & (1 << 24)); } static void run_cacal_sequence(uint32_t ch, uint32_t rnk, uint32_t mr_cmd, uint32_t mask_bits, uint32_t swloop) { uint32_t combined_mask, hwloop; uint32_t pat_mask = 0; for (hwloop = 0; hwloop < amc_phy_params.cacalib_hw_loops; hwloop++) pat_mask |= ca_patterns_mask[(swloop * amc_phy_params.cacalib_hw_loops) + hwloop]; // This represents the bits that don't have a transition on any of the patterns used during the hwloop calibration combined_mask = mask_bits | (CA_ALL_BITS - pat_mask); // To find the FAIL <-> PASS <-> FAIL window find_cacal_right_side_failing_point(ch, rnk, mr_cmd, combined_mask, swloop); find_cacal_left_side_failing_point(ch, rnk, combined_mask, swloop); } // Establish the right edge of the window by finding the point where all CA bits fail static void find_cacal_right_side_failing_point(uint32_t ch, uint32_t rnk, uint32_t mr_cmd, uint32_t combined_mask, uint32_t swloop) { bool all_bits_fail = false; uint32_t cacalresult = 0; uint32_t push_ck_out = 0; cacalresult = CA_ALL_BITS; // Increase delay to the right until all bits fail do { amp_run_cacal(ch); cacalresult = cacalresult & (rAMP_CACALRESULT(ch) & CA_ALL_BITS); if ((cacalresult & (CA_ALL_BITS ^ combined_mask)) != 0) { all_bits_fail = false; push_ck_out = push_ck_out + FINER_STEP_SZ; amp_init_ca_offset_and_deskew(ch); // Make AMP and DRAM exit CaCal Mode in order to update the CK, CKE, and CS delays enter_cacal_mode(ch, rnk, mr_cmd, false); // Update CK, CKE, and CS signal delays amp_push_ctl_out(ch, push_ck_out); // Re-enter CaCal mode enter_cacal_mode(ch, rnk, mr_cmd, true); } else { all_bits_fail = true; // Do a per bit calculation of when they start passing again find_cacal_right_side_passing_point(ch, rnk, mr_cmd, combined_mask, swloop); } } while ((push_ck_out < MAX_DESKEW_OFFSET) && (all_bits_fail == false)); if ((push_ck_out >= MAX_DESKEW_OFFSET) && (all_bits_fail == false)) { dprintf(DEBUG_INFO, "Memory CA calibration: Unable to find right side failing point for channel %d\n", ch); // Failing point cannot be found, continuing to passing point assuming failure at this setting find_cacal_right_side_passing_point(ch, rnk, mr_cmd, combined_mask, swloop); } // Reset CK delay back to 0 if (rAMP_CKDESKEW_CTRL(ch)) { // Exit CaCal Mode for AMP and DRAM before modifying CK, CKE, and CS signals enter_cacal_mode(ch, rnk, mr_cmd, false); // Ok from Rakesh to set to 0 directly instead of decrementing by 1 amp_push_ctl_out(ch, 0); // Re-enable CACal Mode enter_cacal_mode(ch, rnk, mr_cmd, true); } } // Finds the passing region on the right edge of window static void find_cacal_right_side_passing_point(uint32_t ch, uint32_t rnk, uint32_t mr_cmd, uint32_t combined_mask, uint32_t swloop) { bool switch_from_cktoca; int32_t tap_value; uint32_t cacalresult; int32_t camdllcode; int32_t saved_val; uint32_t all_bits_pass; uint32_t BitPass[CA_NUM_BITS] = { 0 }; uint32_t SolidBitPass[CA_NUM_BITS] = { 0 }; uint32_t step_incr; uint8_t bit_indx; uint32_t ckdeskew; uint32_t loopchrnk_indx; all_bits_pass = 0; step_incr = FINER_STEP_SZ; camdllcode = mdllcode[ch][AMP_CA]; ckdeskew = rAMP_CKDESKEW_CTRL(ch); // For every swloop, we'll save passing values for each channel & rank loopchrnk_indx = (swloop * AMC_NUM_CHANNELS * AMC_NUM_RANKS) + (ch * AMC_NUM_RANKS) + rnk; if (ckdeskew) { tap_value = ckdeskew; switch_from_cktoca = false; } else { // Since clock delay is already down to 0, use the slave delay. // We only have 2 knobs to turn for delay: clock and sdll tap_value = (rAMP_CASDLLCTRL(ch) & DLLVAL_BITS); tap_value = OFFSET_TO_INT(tap_value); switch_from_cktoca = true; } // combined_mask contains don't care bits (due to pattern) or masked bits (MR41 or MR48), so consider those done for (bit_indx = 0; bit_indx < CA_NUM_BITS; bit_indx++) if ((combined_mask & (1 << bit_indx)) != 0) BitPass[bit_indx] = 1; // Finding Right side passing point on per bit level. Moving Right to Left to find point where it turns from FAIL TO PASS do { if (switch_from_cktoca == false) { // Make AMP and DRAM exit CaCal Mode in order to update the CK, CKE, and CS delays enter_cacal_mode(ch, rnk, mr_cmd, false); // Update CK, CKE, and CS signal delays amp_push_ctl_out(ch, tap_value); // Re-enter CaCal mode enter_cacal_mode(ch, rnk, mr_cmd, true); } else { amp_push_casdll_out(ch, tap_value); } // Run the ca calibration in hw amp_run_cacal(ch); cacalresult = rAMP_CACALRESULT(ch) & CA_ALL_BITS; // Make sure that each Bit sees a transition from 0 to 1 on CaCalresult Register for (bit_indx = 0; bit_indx < CA_NUM_BITS; bit_indx++) { // For bits that are not masked, need to check pass/fail if ((combined_mask & (1 << bit_indx)) == 0) { if ((BitPass[bit_indx] == 0) && ((cacalresult & (1 << bit_indx)) != 0)) { if (SolidBitPass[bit_indx] == SOLID_PASS_DETECT) { // Bit has passed for SOLID_PASS_DETECT number of times, consider it done. BitPass[bit_indx] = 1; } else if (SolidBitPass[bit_indx] > 0) { SolidBitPass[bit_indx] = SolidBitPass[bit_indx] + 1; } else { // This is the first time this bit has passed, save this point in the array SolidBitPass[bit_indx] = SolidBitPass[bit_indx] + 1; if (switch_from_cktoca == false) { // MdllCode is considered '0' in this case saved_val = -1 * (tap_value + camdllcode); } else { saved_val = tap_value; } ca_cal_per_loopchrnk_right[loopchrnk_indx][bit_indx] = saved_val; } } else { // Bit failed to pass calibration, reset the SolidBitPass value to 0 SolidBitPass[bit_indx] = 0; } } } all_bits_pass = 1; for (bit_indx = 0; bit_indx < CA_NUM_BITS; bit_indx++) { all_bits_pass = all_bits_pass & BitPass[bit_indx]; if (all_bits_pass == 0) break; } // If ALL bits are not passing - keep moving ca signals from Right to Left if (all_bits_pass == 0) { if ((tap_value == 0) && (switch_from_cktoca == false)) { switch_from_cktoca = true; tap_value = (rAMP_CASDLLCTRL(ch) & DLLVAL_BITS); tap_value = OFFSET_TO_INT(tap_value); } if (switch_from_cktoca == false) { tap_value = tap_value - step_incr; } else { tap_value = tap_value + step_incr; } } } while ((tap_value <= MAX_SDLL_VAL) && (all_bits_pass == 0)); if (all_bits_pass == 0) { panic("Memory CA calibration: Unable to find passing point for all bits on the right side"); } } static void enter_cacal_mode(uint32_t ch, uint32_t rnk, uint32_t mr_cmd, bool enter) { // For entry, send MR41 command to DRAM before AMP register is changed if (enter) { // Re-enter CaCal Mode with MR41 always since some DRAMs don't support entering this mode with MR48 amc_mrcmd_to_ch_rnk(MR_WRITE, ch, rnk, MR41, MR41 << 2); if (mr_cmd != MR41) amc_mrcmd_to_ch_rnk(MR_WRITE, ch, rnk, mr_cmd, mr_cmd << 2); amp_enable_cacal_mode(true, ch); } // For exit, change AMP register before sending DRAM command (MR42) else { amp_enable_cacal_mode(false, ch); amc_mrcmd_to_ch_rnk(MR_WRITE, ch, rnk, MR42, MR42 << 2); } } static void find_cacal_left_side_failing_point(uint32_t ch, uint32_t rnk, uint32_t combined_mask, uint32_t swloop) { // At this point, we've already played with all possible CK delays. At the end of find_cacal_right_side_failing_point routine, // we reset the CK delays to 0. // Loop through CaSDLLOvrVal from -MasterDLL to +Max until all failing points on the left side are found uint32_t all_bits_fail; uint32_t step_incr; int32_t push_ca_out; uint32_t cacalresult; int32_t camdllcode; uint32_t max_caleft_point_reached; int32_t max_caleft_point_val; int32_t casdllctrl, ca0deskewctrl; all_bits_fail = 0; cacalresult = CA_ALL_BITS; step_incr = COARSE_STEP_SZ; max_caleft_point_reached = 0; camdllcode = mdllcode[ch][AMP_CA]; max_caleft_point_val = camdllcode + MAX_DESKEW_OFFSET - DELIMIT_POS_ADJ_CASDLL; casdllctrl = rAMP_CASDLLCTRL(ch) & DLLVAL_BITS; casdllctrl= OFFSET_TO_INT(casdllctrl); ca0deskewctrl = rAMP_CADESKEW_CTRL(ch, 0); // ca0deskewctrl will be non-zero only if casdll reached (camdllcode - DELIMIT_POS_ADJ_CASDLL) push_ca_out = casdllctrl + ca0deskewctrl; // Increment CaSDLLOvrVal from -ve Master Code to +MAX_SDLL_VAL do { if (push_ca_out >= max_caleft_point_val) { max_caleft_point_reached = 1; } // Push out this new ca offset amp_push_casdll_out(ch, push_ca_out); // run the calibration in hw amp_run_cacal(ch); cacalresult = cacalresult & (rAMP_CACALRESULT(ch) & CA_ALL_BITS); // combined mask has don't care bits (based on pattern) and masked bits (based on MR41 or MR48) that we should ignore if ((cacalresult & (CA_ALL_BITS ^ combined_mask)) != 0) { all_bits_fail = 0; } else { all_bits_fail = 1; // Now, we have found the left edge of window. Find the passing point for all bits find_cacal_left_side_passing_point(ch, rnk, combined_mask, swloop); } // increase the offset if (all_bits_fail == 0) push_ca_out = push_ca_out + step_incr; if ((push_ca_out > MAX_SDLL_VAL) && (all_bits_fail == 0)) { panic("Memory CA calibration: Unable to find failing point for all bits on the left side"); } // Forcefully ending this loop as there are no more sdll taps left to proceed ahead if (max_caleft_point_reached && (all_bits_fail == 0)) { dprintf(DEBUG_INFO, "Memory CA calibration: SDLL ran out of taps when trying to find left side failing point\n"); find_cacal_left_side_passing_point(ch, rnk, combined_mask, swloop); all_bits_fail = 1; } } while ((push_ca_out <= MAX_SDLL_VAL) && (all_bits_fail == 0) && (max_caleft_point_reached == 0)); } static void find_cacal_left_side_passing_point(uint32_t ch, uint32_t rnk, uint32_t combined_mask, uint32_t swloop) { uint32_t loopchrnk_indx; uint32_t BitPass[CA_NUM_BITS] = { 0 }; uint32_t SolidBitPass[CA_NUM_BITS] = { 0 }; int32_t tap_value; uint32_t cacalresult; int32_t camdllcode; int32_t ca0deskewctrl; uint32_t all_bits_pass; uint32_t step_incr; uint32_t bit_indx; loopchrnk_indx = (swloop * AMC_NUM_CHANNELS * AMC_NUM_RANKS) + (ch * AMC_NUM_RANKS) + rnk; tap_value = rAMP_CASDLLCTRL(ch) & DLLVAL_BITS; tap_value = OFFSET_TO_INT(tap_value); ca0deskewctrl = rAMP_CADESKEW_CTRL(ch, 0); camdllcode = mdllcode[ch][AMP_CA]; step_incr = FINER_STEP_SZ; all_bits_pass = 0; // ca0deskewctrl will be non-zero only if casdll reached (camdllcode - DELIMIT_POS_ADJ_CASDLL) tap_value += ca0deskewctrl; // combined_mask contains don't care bits (due to pattern) or masked bits (MR41 or MR48), so consider those passed for (bit_indx = 0; bit_indx < CA_NUM_BITS; bit_indx++) if ((combined_mask & (1 << bit_indx)) != 0) BitPass[bit_indx] = 1; // Finding Left side passing point on per bit level. Move Left to Right to find point where it turns from FAIL TO PASS do { // Push out this new ca offset amp_push_casdll_out(ch, tap_value); // Run the calibration in hw amp_run_cacal(ch); cacalresult = rAMP_CACALRESULT(ch) & CA_ALL_BITS; // Make sure that each Bit sees a transition from 0 to 1 on CaCalresult Register for (bit_indx=0; bit_indx < CA_NUM_BITS; bit_indx++) { // check pass/fail for bits not masked if ((combined_mask & (1 << bit_indx)) == 0) { if ((BitPass[bit_indx] == 0) && ((cacalresult & (1 << bit_indx)) != 0)) { if (SolidBitPass[bit_indx] == SOLID_PASS_DETECT) { // bit has passed SOLID_PASS_DETECT straight times, consider it done BitPass[bit_indx] = 1; } else if (SolidBitPass[bit_indx] > 0) { SolidBitPass[bit_indx] = SolidBitPass[bit_indx] + 1; } else { // first time bit has passed, record this value SolidBitPass[bit_indx] = SolidBitPass[bit_indx] + 1; ca_cal_per_loopchrnk_left[loopchrnk_indx][bit_indx] = tap_value; } } else { // bit failed calibration, reset the SolidBitPass value back to 0 SolidBitPass[bit_indx] = 0; } } } all_bits_pass = 1; for (bit_indx=0; bit_indx < CA_NUM_BITS; bit_indx++) { all_bits_pass = all_bits_pass & BitPass[bit_indx]; if (all_bits_pass == 0) break; } // If ALL bits are not passing - keep moving from Left to Right Side of window if (all_bits_pass == 0) { tap_value = tap_value - step_incr; } if ((tap_value < (-1 * camdllcode)) && (all_bits_pass == 0)) { // print error message as Left Failing Point cannot be found all_bits_pass = 1; panic("Memory CA calibration: Unable to find passing point for all bits on the left side"); } } while ((tap_value > (-1 * camdllcode)) && (all_bits_pass == 0)); } static void ca_program_final_values(void) { uint32_t loopchrnk0_indx, loopchrnk1_indx, chrnk0_indx, chrnk1_indx, ch; uint32_t bit_indx; int32_t ca_bit_center[CA_NUM_BITS]; int32_t ca_bit_deskew[CA_NUM_BITS]; int32_t tmp_left_pos_val, tmp_right_pos_val; int32_t left_pos_val; int32_t right_pos_val; int32_t camdllcode; int32_t min_ca_bit_center; int32_t adj_ca_bit_center; uint32_t cs_adj_val; int32_t rank_val[AMP_MAX_RANKS_PER_CHAN]; uint32_t swloop, hwloop; uint32_t mask; uint32_t comb_mask, tmp_mask; uint32_t mask_txn_detect; for (ch = 0; ch < AMC_NUM_CHANNELS; ch++) { camdllcode = mdllcode[ch][AMP_CA]; // Calculate the Center Points for each CA bit for (bit_indx=0; bit_indx < CA_NUM_BITS; bit_indx++) { comb_mask = 0x0; mask_txn_detect = 0x0; tmp_mask = 0x0; // Compute the aggr eye over multiple swloop and hwloop for all ranks for (swloop = 0; swloop < amc_phy_params.cacalib_sw_loops; swloop++) { mask = 0x0; for (hwloop=0; hwloop < amc_phy_params.cacalib_hw_loops; hwloop++) mask = mask | ca_patterns_mask[(swloop * amc_phy_params.cacalib_hw_loops) + hwloop]; // An explanation of the masks is below. Note that we only recorded result for a bit from a particular iteration if the bit had a transition. // mask: for pattern(s) sent in this swloop, indicates if the bit had a transition // tmp_mask: aggregates mask over all loops, including current swloop // comb_mask: aggregates mask over all loops, upto the last iteration of the swloop. After it is used to generate mask_txn_detect, it catches upto same value as tmp_mask // mask_txn_detect: indicates the first time a bit transitioned was in this swloop tmp_mask = tmp_mask | mask; mask_txn_detect = tmp_mask ^ comb_mask; comb_mask = comb_mask | mask; /* * Rank 0 */ loopchrnk0_indx = (swloop * AMC_NUM_CHANNELS * AMC_NUM_RANKS) + (ch * AMC_NUM_RANKS) + 0; chrnk0_indx = (ch * AMC_NUM_RANKS) + 0; /* Left side */ // lookup the value in the left side for this bit given loop, ch, and rnk rank_val[0] = ca_cal_per_loopchrnk_left[loopchrnk0_indx][bit_indx]; tmp_left_pos_val = rank_val[0]; // If this is the first time this bit transitioned, just put it in the aggregate result array if (mask_txn_detect & (1 << bit_indx)) { left_pos_val = tmp_left_pos_val; ca_cal_per_chrnk_left[chrnk0_indx][bit_indx] = left_pos_val; } else if (mask & (1 << bit_indx)) { // This is not the 1st time this bit transitioned so there is a recorded result already, but since we have a new result // to compare with, find the value that would cover both points and put that in the array left_pos_val = ca_cal_per_chrnk_left[chrnk0_indx][bit_indx]; left_pos_val = find_common_endpoint(tmp_left_pos_val, left_pos_val, MIN_ENDPT); ca_cal_per_chrnk_left[chrnk0_indx][bit_indx] = left_pos_val; } /* Right side */ // lookup the value in the right side for this bit given loop, ch, and rnk rank_val[0] = ca_cal_per_loopchrnk_right[loopchrnk0_indx][bit_indx]; tmp_right_pos_val = rank_val[0]; // If this is the first time this bit transitioned, just put it in the aggregate result array if (mask_txn_detect & (1 << bit_indx)) { right_pos_val = tmp_right_pos_val; ca_cal_per_chrnk_right[chrnk0_indx][bit_indx] = right_pos_val; } else if (mask & (1 << bit_indx)) { // This is not the 1st time this bit transitioned so there is a recorded result already, but since we have a new result // to compare with, find the value that would cover both points and put that in the array right_pos_val = ca_cal_per_chrnk_right[chrnk0_indx][bit_indx]; right_pos_val = find_common_endpoint(tmp_right_pos_val, right_pos_val, MAX_ENDPT); ca_cal_per_chrnk_right[chrnk0_indx][bit_indx] = right_pos_val; } if (AMC_NUM_RANKS > 1) { /* * Rank 1 */ if (AMC_NUM_RANKS > AMP_MAX_RANKS_PER_CHAN) panic("amp_v2: AMC_NUM_RANKS = %d is more than hw is capable of supporting (%d)\n", AMC_NUM_RANKS, AMP_MAX_RANKS_PER_CHAN); loopchrnk1_indx = (swloop * AMC_NUM_CHANNELS * AMC_NUM_RANKS) + (ch * AMC_NUM_RANKS) + 1; chrnk1_indx = (ch * AMC_NUM_RANKS) + 1; /* Left side */ // lookup the value in the left side for this bit given loop, ch, and rnk rank_val[1] = ca_cal_per_loopchrnk_left[loopchrnk1_indx][bit_indx]; tmp_left_pos_val = rank_val[1]; // If this is the first time this bit transitioned, just put it in the aggregate result array if (mask_txn_detect & (1 << bit_indx)) { left_pos_val = tmp_left_pos_val; ca_cal_per_chrnk_left[chrnk1_indx][bit_indx] = left_pos_val; } else if (mask & (1 << bit_indx)) { // This is not the 1st time this bit transitioned so there is a recorded result already, but since we have a new result // to compare with, find the value that would cover both points and put that in the array left_pos_val = ca_cal_per_chrnk_left[chrnk1_indx][bit_indx]; left_pos_val = find_common_endpoint(tmp_left_pos_val, left_pos_val, MIN_ENDPT); ca_cal_per_chrnk_left[chrnk1_indx][bit_indx] = left_pos_val; } /* Right side */ // lookup the value in the right side for this bit given loop, ch, and rnk rank_val[1] = ca_cal_per_loopchrnk_right[loopchrnk1_indx][bit_indx]; tmp_right_pos_val = rank_val[1]; // If this is the first time this bit transitioned, just put it in the aggregate result array if (mask_txn_detect & (1 << bit_indx)) { right_pos_val = tmp_right_pos_val; ca_cal_per_chrnk_right[chrnk1_indx][bit_indx] = right_pos_val; } else if (mask & (1 << bit_indx)) { // This is not the 1st time this bit transitioned so there is a recorded result already, but since we have a new result // to compare with, find the value that would cover both points and put that in the array right_pos_val = ca_cal_per_chrnk_right[chrnk1_indx][bit_indx]; right_pos_val = find_common_endpoint(tmp_right_pos_val, right_pos_val, MAX_ENDPT); ca_cal_per_chrnk_right[chrnk1_indx][bit_indx] = right_pos_val; } // Find the common endpoint for both ranks left_pos_val = find_common_endpoint(ca_cal_per_chrnk_left[chrnk0_indx][bit_indx], ca_cal_per_chrnk_left[chrnk1_indx][bit_indx], MIN_ENDPT); right_pos_val = find_common_endpoint(ca_cal_per_chrnk_right[chrnk0_indx][bit_indx], ca_cal_per_chrnk_right[chrnk1_indx][bit_indx], MAX_ENDPT); } } // At this point, the left edge and the right edge of the eye for this channel and bit are defined by left_pos_val and right_pos_val // Find the center of the eye ca_bit_center[bit_indx] = find_center_of_eye(left_pos_val, right_pos_val); } // Since center for each bit may be different, find the min val // Min val will get programmed to the sdll, while the other bits will require deskew min_ca_bit_center = ca_bit_center[0]; for (bit_indx = 1; bit_indx < CA_NUM_BITS; bit_indx++) { if (ca_bit_center[bit_indx] < min_ca_bit_center) min_ca_bit_center = ca_bit_center[bit_indx]; } // for positive sdll, clamp it to mdllcode - DELIMIT_POS_ADJ_CASDLL if (min_ca_bit_center > (camdllcode - DELIMIT_POS_ADJ_CASDLL)) { min_ca_bit_center = camdllcode - DELIMIT_POS_ADJ_CASDLL; } // Since the min value of all bits is chosen for sdll, if the rest of the bits need more delay, compute their deskew for (bit_indx=0; bit_indx < CA_NUM_BITS; bit_indx++) { if (ca_bit_center[bit_indx] < min_ca_bit_center) panic("Memory CA Calibration: ca_bit_center[%d] = (%d) < min_ca_bit_center = %d\n", bit_indx, ca_bit_center[bit_indx], min_ca_bit_center); ca_bit_deskew[bit_indx] = ca_bit_center[bit_indx] - min_ca_bit_center; } // If min < -camdllcode, then we will clamp the sdll to -mdll // and put the remaining delay on the CK signals if (min_ca_bit_center < (-1 * camdllcode)) { cs_adj_val = (-1 * min_ca_bit_center) - camdllcode; adj_ca_bit_center = (-1 * camdllcode); } else { cs_adj_val = 0; adj_ca_bit_center = min_ca_bit_center; } /* * Finally, program the values */ for (bit_indx = 0; bit_indx < CA_NUM_BITS; bit_indx++) { // Make sure deskew value programmed is not negative and is <= MAX_DESKEW_PROGRAMMED if ((ca_bit_deskew[bit_indx] < 0) || (ca_bit_deskew[bit_indx] > MAX_DESKEW_PROGRAMMED)) panic("Memory CA Calibration: ca_bit_deskew[%d] = %d invalid\n", bit_indx, ca_bit_deskew[bit_indx]); } // Push the remainder of the delay to CK signals (if adj_CaBitCenterPoint_val_data was clamped to camdll) amp_push_ctl_out(ch, cs_adj_val); // Program the SDLL with the adjusted min value amp_push_casdll_out(ch, adj_ca_bit_center); // Program the CA Deskew values for each bit for (bit_indx = 0; bit_indx < CA_NUM_BITS; bit_indx++) { rAMP_CADESKEW_CTRL(ch, bit_indx) = ca_bit_deskew[bit_indx]; } } } // Loop through PerBitDeskewCode ranges for rddq until failing points for each byte (& bit) are found. static void find_rddqcal_right_side_failing_point(uint32_t ch, uint32_t rnk, bool after_wrddqcal) { uint32_t dq_deskew; uint32_t all_bits_fail; uint32_t bits_fail_b[DQ_NUM_BYTES] = { 0 }; uint32_t rddqcalresult; uint32_t mask_b[DQ_NUM_BYTES]; int32_t start_b[DQ_NUM_BYTES]; uint32_t byte, bit; all_bits_fail = 0; dq_deskew = 0; // set the rddq sdll to negative dqmdllcode mdllcode[ch][AMP_DQ] = (rAMP_MDLLCODE(AMP_DQ, ch) & DLLVAL_BITS); for (byte = 0; byte < DQ_NUM_BYTES; byte++) { amp_set_rddq_sdll(ch, byte, (1 << SIGN_BIT_POS) + mdllcode[ch][AMP_DQ]); // initialize the mask for each byte lane mask_b[byte] = 0xFF << (byte * 8); } rddqcalresult = 0xFFFFFFFF; // PerBit Deskew lines cannot be pushed beyond DQ_MAX_DESKEW_PER_BIT value do { for (byte = 0; byte < DQ_NUM_BYTES; byte++) { if (bits_fail_b[byte] == 0) { for (bit = 0; bit < DQ_NUM_BITS_PER_BYTE; bit++) rAMP_RDDQDESKEW_CTRL(ch, byte, bit) = dq_deskew; } } // Call Basic run Dq Cal Commands if (after_wrddqcal == false) { amp_run_rddqcal(ch); rddqcalresult &= rAMP_DQCALRESULT(ch); } else { rddqcalresult &= wr_rd_pattern_result(ch, rnk, dq_deskew); } for (byte = 0; byte < DQ_NUM_BYTES; byte++) { // if all bits haven't failed yet and this run shows all bits failing, we have found the failing point for this byte if ((bits_fail_b[byte] == 0) && ((rddqcalresult & mask_b[byte]) == 0)) { bits_fail_b[byte] = 1; start_b[byte] = dq_deskew; } } all_bits_fail = bits_fail_b[0] & bits_fail_b[1] & bits_fail_b[2] & bits_fail_b[3]; if (all_bits_fail == 1) { // If failing point has been found for all bits, find the passing point now find_rddqcal_right_side_passing_point(ch, rnk, start_b, after_wrddqcal); } else { // To find right failing point, make more negative adjustment to the sdll (same as incrementing deskew) dq_deskew = dq_deskew + COARSE_STEP_SZ; } } while ((dq_deskew <= DQ_MAX_DESKEW_PER_BIT) && (all_bits_fail == 0)); if ((dq_deskew > DQ_MAX_DESKEW_PER_BIT) && (all_bits_fail == 0)) { // print error message as Right Failing Point cannot be found dprintf(DEBUG_INFO, "Memory Rddq cal: Right side failing point not found, max deskew limit reach for channel %d", ch); // Assume failure at this setting, and continue to passing point for (byte = 0; byte < DQ_NUM_BYTES; byte++) { // if all bits haven't failed yet, assign start_b for this byte to current reg setting if (bits_fail_b[byte] == 0) start_b[byte] = dq_deskew - COARSE_STEP_SZ; } find_rddqcal_right_side_passing_point(ch, rnk, start_b, after_wrddqcal); } // Reset deskew for all bits to 0 for (byte = 0; byte < DQ_NUM_BYTES; byte++) for (bit = 0; bit < DQ_NUM_BITS_PER_BYTE; bit++) rAMP_RDDQDESKEW_CTRL(ch, byte, bit) = 0; } // Purpose of this function is to start from right side failing point and find locations for every DQ bit // until the start of passing window for that bit is found // Save all this locations to compute the center of window static void find_rddqcal_right_side_passing_point(uint32_t ch, uint32_t rnk, int32_t *start_b, bool after_wrddqcal) { uint32_t chrnk_indx; bool switch_from_dqstodq, max_tap_value_reached; int32_t tap_value_b[DQ_NUM_BYTES]; uint32_t BitPass[DQ_TOTAL_BITS] = { 0 }; uint32_t SolidBitPass[DQ_TOTAL_BITS] = { 0 }; uint32_t rddqcalresult; uint32_t all_bits_pass; uint32_t all_bits_pass_b[DQ_NUM_BYTES] = { 0 }; uint32_t step_incr; uint32_t bit_indx, byte; int32_t dqmdllcode; int32_t saved_val; chrnk_indx = (ch * AMC_NUM_RANKS) + rnk; all_bits_pass = 0; switch_from_dqstodq = false; max_tap_value_reached = false; rddqcalresult = 0xFFFFFFFF; step_incr = FINER_STEP_SZ; dqmdllcode = mdllcode[ch][AMP_DQ]; for (byte = 0; byte < DQ_NUM_BYTES; byte++) { tap_value_b[byte] = start_b[byte]; } // Moving Right to Left to find point where each bit turns from FAIL TO PASS do { if (switch_from_dqstodq == false) { // continue to update per bit deskew until all bits pass for each byte lane for (byte = 0; byte < DQ_NUM_BYTES; byte++) { if (all_bits_pass_b[byte] == 0) { for (bit_indx = 0; bit_indx < DQ_NUM_BITS_PER_BYTE; bit_indx++) rAMP_RDDQDESKEW_CTRL(ch, byte, bit_indx) = tap_value_b[byte]; } } } else { // adjust rddq sdll until all bits pass for each byte lane for (byte = 0; byte < DQ_NUM_BYTES; byte++) if (all_bits_pass_b[byte] == 0) amp_set_rddq_sdll(ch, byte, INT_TO_OFFSET(tap_value_b[byte])); } // Run rddq calibration in hw if (after_wrddqcal == false) { amp_run_rddqcal(ch); rddqcalresult = rAMP_DQCALRESULT(ch); } else { rddqcalresult = wr_rd_pattern_result(ch, rnk, tap_value_b[0]); } // Make sure that each Bit sees a transition from 0 to 1 on DqCalresult Register for (bit_indx = 0; bit_indx < DQ_TOTAL_BITS; bit_indx++) { byte = bit_indx >> 3; // bit_indx / DQ_NUM_BITS_PER_BYTE // Check if this bit passed during the calibration (not necessarily for first time) if ((BitPass[bit_indx] == 0) && ((rddqcalresult & (1 << bit_indx)) != 0)) { // Has this bit passed SOLID_PASS_DETECT number of times? Then consider it done if (SolidBitPass[bit_indx] == SOLID_PASS_DETECT) { BitPass[bit_indx] = 1; } else if (SolidBitPass[bit_indx] > 0) { SolidBitPass[bit_indx] = SolidBitPass[bit_indx] + 1; } else { // bit passed for the first time, record this value in the global array as the right edge SolidBitPass[bit_indx] = SolidBitPass[bit_indx] + 1; byte = bit_indx >> 3; // bit_indx / DQ_NUM_BITS_PER_BYTE if (switch_from_dqstodq == false) // consider mdllcode as '0' since sdll is set to -mdllcode saved_val = -1 * (tap_value_b[byte] + dqmdllcode); else saved_val = tap_value_b[byte]; rddq_cal_per_chrnk_right[chrnk_indx][bit_indx] = saved_val; } } else { // bit failed calibration, reset the pass count to 0 SolidBitPass[bit_indx] = 0; } } all_bits_pass = 1; for (byte = 0; byte < DQ_NUM_BYTES; byte++) all_bits_pass_b[byte] = 1; for (bit_indx = 0; bit_indx < DQ_TOTAL_BITS; bit_indx++) { byte = bit_indx >> 3; // bit_indx / DQ_NUM_BITS_PER_BYTE // Did all the bits pass (SOLID_PASS_DETECT number of times) in this byte lane? // If anyone of the bits failed, then the byte flag is cleared all_bits_pass_b[byte] = all_bits_pass_b[byte] & BitPass[bit_indx]; // Did all bits in all byte lanes pass? all_bits_pass = all_bits_pass & BitPass[bit_indx]; } // If ALL bits are not passing - keep moving from Right to Left Side of window (by adding less negative adjustment to mdll) if (all_bits_pass == 0) { // Even if one of the byte lanes arrives early to tap_value = 0. Remain here until all byte lane catch up before proceeding to pushing out dq // check for all bytes reaching 0 on the tap value (could be deskew or sdll) int32_t all_bytes_tap = tap_value_b[0]; for (byte = 1; (byte < DQ_NUM_BYTES) && (all_bytes_tap == 0); byte++) { all_bytes_tap += tap_value_b[byte]; } // if the tap_value for all bytes has reached 0 on the deskew, make the transition to SDLL if ((all_bytes_tap == 0) && (switch_from_dqstodq == false)) { switch_from_dqstodq = true; for (byte = 0; byte < DQ_NUM_BYTES; byte++) { tap_value_b[byte] = (rAMP_DQSDLLCTRL_RD(ch, byte) & DLLVAL_BITS); tap_value_b[byte] = OFFSET_TO_INT(tap_value_b[byte]); } } // To find right side passing point, add less negative adjustment to mdll (same as decrementing deskew) // For deskew taps, we just decrement by step_incr if we haven't reached 0 yet if (switch_from_dqstodq == false) { for (byte = 0; byte < DQ_NUM_BYTES; byte++) if (tap_value_b[byte] > 0) tap_value_b[byte] -= step_incr; } else { // For sdll taps, increment it for (byte = 0; byte < DQ_NUM_BYTES; byte++) { if (all_bits_pass_b[byte] == 0) { tap_value_b[byte] += step_incr; } } } } // trigger for loop to end if any of the bytes reach max tap value for (byte = 0; byte < DQ_NUM_BYTES; byte++) { if (!max_tap_value_reached) max_tap_value_reached = (tap_value_b[byte] > MAX_SDLL_VAL); if (max_tap_value_reached) { if (all_bits_pass == 0) panic("Memory rddq calibration: Unable to find right side passing point, max tap value reached"); break; } } } while ((!max_tap_value_reached) && (all_bits_pass == 0)); } // Purpose of this function is to start push DQS out till left side failing point of Data window is found static void find_rddqcal_left_side_failing_point(uint32_t ch, uint32_t rnk, bool after_wrddqcal) { int32_t rddqsdll[DQ_NUM_BYTES]; uint32_t rddqcalresult; uint32_t all_bits_fail; uint32_t all_bits_fail_b[DQ_NUM_BYTES] = { 0 }; uint32_t step_incr; uint32_t mask_b[DQ_NUM_BYTES]; int32_t start_b[DQ_NUM_BYTES]; uint32_t byte; bool max_tap_value_reached = false; all_bits_fail = 0; rddqcalresult = 0xFFFFFFFF; step_incr = COARSE_STEP_SZ; for (byte = 0; byte < DQ_NUM_BYTES; byte++) { // initialize the mask for each byte lane mask_b[byte] = 0xFF << (byte * 8); // Get the starting values for RD DQS SDLL rddqsdll[byte] = rAMP_DQSDLLCTRL_RD(ch, byte); rddqsdll[byte] = OFFSET_TO_INT(rddqsdll[byte]); } // To find left failing point, keep adding less negative adjustment to mdll do { for (byte = 0; byte < DQ_NUM_BYTES; byte++) { // set the new sdll for this byte lane if all bits are not yet failing if (all_bits_fail_b[byte] == 0) amp_set_rddq_sdll(ch, byte, INT_TO_OFFSET(rddqsdll[byte])); } // Run rddqcal in hw if (after_wrddqcal == false) { amp_run_rddqcal(ch); rddqcalresult &= rAMP_DQCALRESULT(ch); } else { rddqcalresult &= wr_rd_pattern_result(ch, rnk, rddqsdll[0]); } // If the result of all bits in this byte show a fail, record this as the failing point all_bits_fail = 1; for (byte = 0; byte < DQ_NUM_BYTES; byte++) { if ((all_bits_fail_b[byte] == 0) && ((rddqcalresult & mask_b[byte]) == 0)) { all_bits_fail_b[byte] = 1; start_b[byte] = rddqsdll[byte]; } all_bits_fail &= all_bits_fail_b[byte]; } // all bytes fail, call the function to find left passing point if (all_bits_fail == 1) { find_rddqcal_left_side_passing_point(ch, rnk, start_b, after_wrddqcal); } else { // if the byte has not yet failed, find the next sdll value to be set for (byte = 0; byte < DQ_NUM_BYTES; byte++) { if (all_bits_fail_b[byte] == 0) { rddqsdll[byte] += step_incr; } } } for (byte = 0; byte < DQ_NUM_BYTES; byte++) { // none of the previous bytes reached max_tap_value, then update the boolean if (!max_tap_value_reached) { max_tap_value_reached = (rddqsdll[byte] > MAX_SDLL_VAL); if (max_tap_value_reached) { dprintf(DEBUG_INFO, "Memory rddq calibration: Unable to find left failing point, max tap value reached for ch %d byte %d", ch, byte); break; } } } if (max_tap_value_reached) { // Continue to passing point if any of the bytes reaches max value and not all bits are failing if (all_bits_fail == 0) { for (byte = 0; byte < DQ_NUM_BYTES; byte++) { if (all_bits_fail_b[byte] == 0) start_b[byte] = MAX_SDLL_VAL; } find_rddqcal_left_side_passing_point(ch, rnk, start_b, after_wrddqcal); } } } while ((!max_tap_value_reached) && (all_bits_fail == 0)); } // Purpose of this function is to start from left side failing point and find passing locations for every DQ bit on left side of window // Save all the locations to compute the center of window later // To find left passing point, move to the right from the failing point, which means keep adding more negative adjustment to mdll static void find_rddqcal_left_side_passing_point(uint32_t ch, uint32_t rnk, int32_t *start_b, bool after_wrddqcal) { uint32_t chrnk_indx; bool max_tap_value_reached = false; int32_t tap_value_b[DQ_NUM_BYTES]; uint32_t BitPass[DQ_TOTAL_BITS] = { 0 }; uint32_t SolidBitPass[DQ_TOTAL_BITS] = { 0 }; uint32_t rddqcalresult; uint32_t all_bits_pass; uint32_t all_bits_pass_b[DQ_NUM_BYTES] = { 0 }; uint32_t step_incr; uint32_t bit_indx, byte; chrnk_indx = (ch * AMC_NUM_RANKS) + rnk; all_bits_pass = 0; rddqcalresult = 0xFFFFFFFF; step_incr = FINER_STEP_SZ; for (byte = 0; byte < DQ_NUM_BYTES; byte++) { tap_value_b[byte] = start_b[byte]; } // Finding Left side passing point on per bit level. Moving Left to Right (keep adding more negative adj to mdll) to find point where it turns from FAIL TO PASS do { for (byte = 0; byte < DQ_NUM_BYTES; byte++) { // if we haven't found all bits passing for this byte, push out new sdll value if (all_bits_pass_b[byte] == 0) amp_set_rddq_sdll(ch, byte, INT_TO_OFFSET(tap_value_b[byte])); } // Run rddqcal in hw if (after_wrddqcal == false) { amp_run_rddqcal(ch); rddqcalresult = rAMP_DQCALRESULT(ch); } else { rddqcalresult = wr_rd_pattern_result(ch, rnk, tap_value_b[0]); } // Make sure that each Bit sees a transition from 0 to 1 on DqCalresult Register for (bit_indx = 0; bit_indx < DQ_TOTAL_BITS; bit_indx++) { // Check if this bit passed during the calibration (not necessarily for first time) if ((BitPass[bit_indx] == 0) && ((rddqcalresult & (1 << bit_indx)) != 0)) { // Has this bit passed SOLID_PASS_DETECT number of times? Then consider it done if (SolidBitPass[bit_indx] == SOLID_PASS_DETECT) { BitPass[bit_indx] = 1; } else if (SolidBitPass[bit_indx] > 0) { SolidBitPass[bit_indx] = SolidBitPass[bit_indx] + 1; } else { // bit passed for the first time, record this value in the global array as the left edge SolidBitPass[bit_indx] = SolidBitPass[bit_indx] + 1; byte = bit_indx >> 3; // bit_indx / DQ_NUM_BITS_PER_BYTE rddq_cal_per_chrnk_left[chrnk_indx][bit_indx] = tap_value_b[byte]; } } else { // bit failed calibration, reset the pass count to 0 SolidBitPass[bit_indx] = 0; } } all_bits_pass = 1; for (byte = 0; byte < DQ_NUM_BYTES; byte++) all_bits_pass_b[byte] = 1; for (bit_indx = 0; bit_indx < DQ_TOTAL_BITS; bit_indx++) { byte = bit_indx >> 3; // bit_indx / DQ_NUM_BITS_PER_BYTE // Did all the bits pass (SOLID_PASS_DETECT number of times) in this byte lane? // If anyone of the bits failed, then the byte flag is cleared all_bits_pass_b[byte] = all_bits_pass_b[byte] & BitPass[bit_indx]; // Did all bits in all byte lanes pass? all_bits_pass = all_bits_pass & BitPass[bit_indx]; } // If ALL bits are not passing - keep moving from Left to Right Side of window (by adding more negative adjustment to mdll) if (all_bits_pass == 0) { for (byte = 0; byte < DQ_NUM_BYTES; byte++) { // if this byte lane does not have all passing bits, adjust this byte's sdll if (all_bits_pass_b[byte] == 0) { tap_value_b[byte] -= step_incr; } } } // check for end of loop condition for (byte = 0; byte < DQ_NUM_BYTES; byte++) { if (!max_tap_value_reached) max_tap_value_reached = (tap_value_b[byte] < (-1 * MAX_SDLL_VAL)); if (max_tap_value_reached) { if (all_bits_pass == 0) panic("Memory rddq calibration: Unable to find left passing point, max tap value reached"); break; } // panic if we get beyond -dqmdllcode, since we really shouldn't have to go that far if ((all_bits_pass == 0) && (tap_value_b[byte] < (-1 * mdllcode[ch][AMP_DQ]))) panic("Memory rddq calibration: Not yet found left passing point but SDLL < -dqmdllcode for ch %d byte %d", ch, byte); } } while ((!max_tap_value_reached) && (all_bits_pass == 0)); } static void rddq_program_final_values(bool after_wrddqcal) { uint32_t ch, bit_indx, byte; uint32_t chrnk0_indx, chrnk1_indx; int32_t rddq_bit_center[DQ_TOTAL_BITS]; int32_t rddq_bit_deskew[DQ_TOTAL_BITS]; int32_t left_pos_val; int32_t right_pos_val; int32_t max_rddq_center[DQ_NUM_BYTES]; int32_t dqmdllcode; for (ch = 0; ch < AMC_NUM_CHANNELS; ch++) { dqmdllcode = mdllcode[ch][AMP_DQ]; // find the center point of passing window for each bit over all ranks for (bit_indx = 0; bit_indx < DQ_TOTAL_BITS; bit_indx++) { chrnk0_indx = (ch * AMC_NUM_RANKS) + 0; left_pos_val = rddq_cal_per_chrnk_left[chrnk0_indx][bit_indx]; right_pos_val = rddq_cal_per_chrnk_right[chrnk0_indx][bit_indx]; if (AMC_NUM_RANKS > 1) { chrnk1_indx = (ch * AMC_NUM_RANKS) + 1; // find the endpoint that covers both ranks left_pos_val = find_common_endpoint(rddq_cal_per_chrnk_left[chrnk0_indx][bit_indx], rddq_cal_per_chrnk_left[chrnk1_indx][bit_indx], MIN_ENDPT); right_pos_val = find_common_endpoint(rddq_cal_per_chrnk_right[chrnk0_indx][bit_indx], rddq_cal_per_chrnk_right[chrnk1_indx][bit_indx], MAX_ENDPT); } // find center of the eye for this bit rddq_bit_center[bit_indx] = find_center_of_eye(left_pos_val, right_pos_val); } // , Need additional shift to DQ offset if (after_wrddqcal) { int8_t signed_byte_center_point[DQ_TOTAL_BITS]; // convert to signed bytes first as required by shift function for (bit_indx = 0; bit_indx < DQ_TOTAL_BITS; bit_indx++) signed_byte_center_point[bit_indx] = (int8_t) rddq_bit_center[bit_indx]; // call platform specific amc routine to apply apropriate shifts depending on DRAM vendor amc_dram_shift_dq_offset(signed_byte_center_point, DQ_TOTAL_BITS); // convert shifted signed bytes back to signed ints for (bit_indx = 0; bit_indx < DQ_TOTAL_BITS; bit_indx++) rddq_bit_center[bit_indx] = (int32_t) signed_byte_center_point[bit_indx]; } // initialize the max centerpoint to the 1st bit's center point in each byte lane for (byte = 0; byte < DQ_NUM_BYTES; byte++) max_rddq_center[byte] = rddq_bit_center[byte * DQ_NUM_BITS_PER_BYTE]; // Find the maximum CenterPoint per byte lane given each bit's center point for (bit_indx=0; bit_indx < DQ_TOTAL_BITS; bit_indx++) { byte = bit_indx >> 3; // bit_indx / DQ_NUM_BITS_PER_BYTE // if this bit's center point is greater than current max, make it the new max (we'll program this to sdll, and other values will require deskew) if (rddq_bit_center[bit_indx] > max_rddq_center[byte]) max_rddq_center[byte] = rddq_bit_center[bit_indx]; } // if the max for each byte lane is < -dqmdllcode, clamp it to -dqmdllcode (the remainder will go on per bit deskew) for (byte = 0; byte < DQ_NUM_BYTES; byte++) { if (max_rddq_center[byte] < (-1 * dqmdllcode)) max_rddq_center[byte] = (-1 * dqmdllcode); } // Compute the individual deskew values: any bits with center point < max for its byte lane will require deskew // Each bit's center is guaranteed to be <= max for its byte lane // Deskewing means adding more negative adjustment for this bit in addition to the sdll, which is clamped on the negative side to -dqmdllcode for (bit_indx = 0; bit_indx < DQ_TOTAL_BITS; bit_indx++) { byte = bit_indx >> 3; // bit_indx / DQ_NUM_BITS_PER_BYTE if (rddq_bit_center[bit_indx] > max_rddq_center[byte]) panic("Memory Rddq calibration: rddq_bit_center[%d] = %d > max_rddq_center[%d] = %d\n", bit_indx, rddq_bit_center[bit_indx], byte, max_rddq_center[byte]); rddq_bit_deskew[bit_indx] = max_rddq_center[byte] - rddq_bit_center[bit_indx]; if ((rddq_bit_deskew[bit_indx] < 0) || (rddq_bit_deskew[bit_indx] > DQ_MAX_DESKEW_PER_BIT)) panic("Memory Rddq calibration: rddq_bit_deskew[%d] = %d invalid\n", bit_indx, rddq_bit_deskew[bit_indx]); } // Program the SDLL and deskew per bit for each byte lane for (byte = 0; byte < DQ_NUM_BYTES; byte++) { amp_set_rddq_sdll(ch, byte, INT_TO_OFFSET(max_rddq_center[byte])); // per bit deskew for this byte lane for (bit_indx = 0; bit_indx < DQ_NUM_BITS_PER_BYTE; bit_indx++) { rAMP_RDDQDESKEW_CTRL(ch, byte, bit_indx) = rddq_bit_deskew[(byte * DQ_NUM_BITS_PER_BYTE) + bit_indx]; } } } // for (ch = 0; ch < AMC_NUM_CHANNELS; ch++) } static uint32_t wrlvl_encode_dlyval(uint32_t ch, uint32_t phy_type, uint32_t val) { uint32_t ret_val, mdll; mdll = mdllcode[ch][phy_type]; if (val < ( (2 * mdll) - 2 )) { ret_val = 0; } else if (val < ( 2 * mdll )) { ret_val = 1; } else if (val < ( (2 * mdll) + 3 )) { ret_val = 2; } else { ret_val = 3; } return ret_val; } static uint32_t wrlvl_encode_clk90dly(uint32_t ch, uint32_t val) { uint32_t ret_val, mdll; mdll = mdllcode[ch][AMP_DQ]; if (val < (mdll - 2)) { ret_val = 0; } else if (val < mdll) { ret_val = 1; } else if (val < (mdll + 3)) { ret_val = 2; } else { ret_val = 3; } return ret_val; } static void push_wrlvl_to_0s_region(uint32_t ch, uint32_t rnk) { uint32_t wrlvldata, byte; uint32_t cawrlvlcode = 0; bool max_tap_value_reached = false; uint32_t wrlvlrun = 0xF; uint32_t dqwrlvlcode[DQ_NUM_BYTES] = { 0 }; // Note that incrementing cawrlvl sdll has opposite effect of incrementing dqwrlvl do { // If any byte lane shows that it returned a value of 1 - push ca wrlvl sdll out by 1 tap cawrlvlcode++; amp_set_cawrlvl_sdll(ch, cawrlvlcode, false); for (byte = 0; byte < DQ_NUM_BYTES; byte++) { // if this byte already showed a 0 during last run, push dqwrlvl sdll by 1 tap // this is done to ensure this byte remains at 0 despite cawrlvl sdll being incremented above if ((wrlvlrun & (1 << byte)) == 0) { dqwrlvlcode[byte]++; amp_set_dqwrlvl_sdll(ch, byte, dqwrlvlcode[byte], false); } } // Run Wrlvl calibration in hw amp_run_wrlvlcal(ch, wrlvlrun); // result in reported in AMPWRLVLDATA register wrlvldata = rAMP_DQWRLVLDATA(ch); // check if all bits for this byte returned a 0, then this byte is done for (byte = 0; byte < DQ_NUM_BYTES; byte++) { if (wrlvlrun & (1 << byte)) { if ((wrlvldata & (0xFF << (byte * DQ_NUM_BITS_PER_BYTE))) == 0) wrlvlrun &= ~(1 << byte); } } // Exit if ca or dq wrlvl sdlls reach max tap value if (cawrlvlcode == MAX_CAWRLVL_CODE) { max_tap_value_reached = true; if (wrlvlrun) panic("Memory Wrlvl calibration: CA sdll reached max tap value, yet all bytes not all 0s"); } else { for (byte = 0; byte < DQ_NUM_BYTES; byte++) { if (dqwrlvlcode[byte] == MAX_DQWRLVL_CODE) { if (wrlvlrun) panic("Memory Wrlvl calibration: DQ%d sdll reached max tap value, yet all bytes not all 0s", byte); max_tap_value_reached = true; break; } } } } while (wrlvlrun && !max_tap_value_reached); } // Keep incrementing dqsdll until the byte shows 1s again. This counters the casdll that was incremented previously in order to show 0s static void find_wrlvl_0to1_transition(uint32_t ch, uint32_t rnk) { uint32_t chrnk_indx, byte; uint32_t wrlvlrun, wrlvldata; bool max_tap_value_reached = false; uint32_t dqwrlvlcode[DQ_NUM_BYTES]; uint32_t cawrlvlcode = rAMP_CAWRLVLSDLLCODE(ch); wrlvlrun = 0xF; wrlvldata = 0; for (byte = 0; byte < DQ_NUM_BYTES; byte++) dqwrlvlcode[byte] = rAMP_DQWRLVLSDLLCODE(ch, byte); chrnk_indx = (ch * AMC_NUM_RANKS) + rnk; do { for (byte = 0; byte < DQ_NUM_BYTES; byte++) { // if this byte is still showing a 0, increment the sdll if (wrlvlrun & (1 << byte)) { dqwrlvlcode[byte]++; amp_set_dqwrlvl_sdll(ch, byte, dqwrlvlcode[byte], false); } } // run the wrlvl calibration in hw amp_run_wrlvlcal(ch, wrlvlrun); wrlvldata = rAMP_DQWRLVLDATA(ch); // check if all bits return a 1 for this byte, then this byte is done for (byte = 0; byte < DQ_NUM_BYTES; byte++) { if (wrlvlrun & (1 << byte)) { if (((int) (wrlvldata & (0xFF << (byte * DQ_NUM_BITS_PER_BYTE)))) == (0xFF << (byte * DQ_NUM_BITS_PER_BYTE))) wrlvlrun &= ~(1 << byte); } } // Exit if any of the byte lane's sdll reaches max for (byte = 0; byte < DQ_NUM_BYTES; byte++) { if (dqwrlvlcode[byte] == MAX_DQWRLVL_CODE) { if (wrlvlrun) panic("Memory Wrlvl calibration: DQ%d sdll reached max tap value, yet all bytes not all 1s", byte); max_tap_value_reached = true; break; } } } while (wrlvlrun && !max_tap_value_reached); // save the per byte codes for this channel and rank for (byte = 0; byte < DQ_NUM_BYTES; byte++) wrlvl_cal_per_chrnk_rise[chrnk_indx][byte] = dqwrlvlcode[byte]; // in the "5th byte" entry, save the cawrlvl code wrlvl_cal_per_chrnk_rise[chrnk_indx][byte] = cawrlvlcode; } // Go back towards the 0s region (that was found earlier). Note: not trying to find the next edge, just the previous edge that was found already static void find_wrlvl_1to0_transition(uint32_t ch, uint32_t rnk) { uint32_t chrnk_indx, byte; uint32_t wrlvlrun, wrlvldata; bool max_tap_value_reached = false; uint32_t dqwrlvlcode[DQ_NUM_BYTES]; uint32_t cawrlvlcode = rAMP_CAWRLVLSDLLCODE(ch); bool incr_cawrlvl = false; chrnk_indx = (ch * AMC_NUM_RANKS) + rnk; wrlvlrun = 0xF; // jump ahead by SOLID_PASS_DETECT into the 1s region for (byte = 0; byte < DQ_NUM_BYTES; byte++) { dqwrlvlcode[byte] = rAMP_DQWRLVLSDLLCODE(ch, byte); dqwrlvlcode[byte] += (SOLID_PASS_DETECT + 1); // + 1 because code is decremented before programming the sdll } do { // Make sure dqwrlvlsdll > 0, otherwise switch to cawrlvlsdll for (byte = 0; (byte < DQ_NUM_BYTES) && !incr_cawrlvl; byte++) { if (dqwrlvlcode[byte] == 0) incr_cawrlvl = true; } // if we've reached 0 on any dqwrlvlsdll that were being decremented, switch to incrementing the cawrlvlsdll (same effect) if (incr_cawrlvl) { cawrlvlcode++; amp_set_cawrlvl_sdll(ch, cawrlvlcode, false); // In order to keep bytes that have transitioned to 0 to stay there, increment dqwrlvlsdll (counters effect of incrementing cawrlvlsdll) for (byte = 0; byte < DQ_NUM_BYTES; byte++) { if ((wrlvlrun & (1 << byte)) == 0) { dqwrlvlcode[byte]++; amp_set_dqwrlvl_sdll(ch, byte, dqwrlvlcode[byte], false); } } } else { // if run bit is set for this byte, push out the new sdll value after decrementing by 1 for (byte = 0; byte < DQ_NUM_BYTES; byte++) { if (wrlvlrun & (1 << byte)) { dqwrlvlcode[byte]--; amp_set_dqwrlvl_sdll(ch, byte, dqwrlvlcode[byte], false); } } } // run the wrlvl calibration in hw amp_run_wrlvlcal(ch, wrlvlrun); wrlvldata = rAMP_DQWRLVLDATA(ch); // check if all bits for this byte returned a 0, then this byte is done for (byte = 0; byte < DQ_NUM_BYTES; byte++) { if (wrlvlrun & (1 << byte)) { if ((wrlvldata & (0xFF << (byte * DQ_NUM_BITS_PER_BYTE))) == 0) wrlvlrun &= ~(1 << byte); } } // check if we've reached max tap value if (incr_cawrlvl && (cawrlvlcode == MAX_CAWRLVL_CODE)) { max_tap_value_reached = true; if (wrlvlrun) panic("Memory Wrlvl calibration: max tap value reached, yet all bytes not back to 0s"); } } while (wrlvlrun && !max_tap_value_reached); // save the per byte codes for this channel and rank for (byte = 0; byte < DQ_NUM_BYTES; byte++) wrlvl_cal_per_chrnk_fall[chrnk_indx][byte] = dqwrlvlcode[byte]; // in the "5th byte" entry, save the cawrlvl code wrlvl_cal_per_chrnk_fall[chrnk_indx][byte] = cawrlvlcode; } static void wrlvl_program_final_values(void) { uint32_t ch, chrnk0_indx, chrnk1_indx; uint32_t rank_rise_val[AMP_MAX_RANKS_PER_CHAN], rank_fall_val[AMP_MAX_RANKS_PER_CHAN]; uint32_t edge_pos[AMP_MAX_RANKS_PER_CHAN]; uint32_t common_edge_pos, min_edge_pos; uint32_t byte; uint32_t saved_val[DQ_NUM_BYTES + 1]; for (ch = 0; ch < AMC_NUM_CHANNELS; ch++) { // we go upto DQ_NUM_BYTES + 1 to also take into account the cawrlvlcode that is stored in the 5th element for (byte = 0; byte < (DQ_NUM_BYTES + 1); byte++) { // Rank 0 chrnk0_indx = (ch * AMC_NUM_RANKS) + 0; rank_rise_val[0] = wrlvl_cal_per_chrnk_rise[chrnk0_indx][byte]; rank_fall_val[0] = wrlvl_cal_per_chrnk_fall[chrnk0_indx][byte]; // average of 2 values is the edge for this rank edge_pos[0] = (rank_rise_val[0] + rank_fall_val[0]) >> 1; common_edge_pos = edge_pos[0]; // Adjust for Dual ranks if (AMC_NUM_RANKS > 1) { chrnk1_indx = (ch * AMC_NUM_RANKS) + 1; rank_rise_val[1] = wrlvl_cal_per_chrnk_rise[chrnk1_indx][byte]; rank_fall_val[1] = wrlvl_cal_per_chrnk_fall[chrnk1_indx][byte]; edge_pos[1] = (rank_rise_val[1] + rank_fall_val[1]) >> 1; // common_edge_pos between both ranks is simply their average common_edge_pos = (edge_pos[0] + edge_pos[1]) >> 1; } // save the wrlvlsdll for each byte (and the ca) saved_val[byte] = common_edge_pos; } // Find the min among all bytes (and the ca) min_edge_pos = saved_val[DQ_NUM_BYTES]; // initialize min as the cawrlvlsdll for (byte = 0; byte < DQ_NUM_BYTES; byte++) if (saved_val[byte] < min_edge_pos) min_edge_pos = saved_val[byte]; // We'll subtract the min from all 5 sdlls, including ca // so the byte sdlls which are in opposite direction also need to be asjusted for (byte = 0; byte < (DQ_NUM_BYTES + 1); byte++) { saved_val[byte] -= min_edge_pos; // Program the values into the registers if (byte == DQ_NUM_BYTES) { // cawrlvl (use dlysel, which will require phyupdt and forceckelow) amp_set_cawrlvl_sdll(ch, saved_val[byte], true); } else { // dqwrlvl (use dlysel, which will require phyupdt and forceckelow) amp_set_dqwrlvl_sdll(ch, byte, saved_val[byte], true); } } } } // Keep pushing out WRDQS lines (controlled by WRDQM registers, oddly) until right side failing point is found static void find_wrdqcal_right_side_failing_point(uint32_t ch, uint32_t rnk) { uint32_t push_dqs_out; uint32_t all_bits_fail; uint32_t bits_fail_b[DQ_NUM_BYTES] = { 0 }; uint32_t wrdqcalresult_cumulative; uint32_t mask_b[DQ_NUM_BYTES]; int32_t start_b[DQ_NUM_BYTES]; uint32_t byte; uint32_t cawrlvlcode = rAMP_CAWRLVLSDLLCODE(ch); all_bits_fail = 0; push_dqs_out = 0; wrdqcalresult_cumulative = 0xFFFFFFFF; // set the wrdq sdll to negative dqmdllcode mdllcode[ch][AMP_DQ] = (rAMP_MDLLCODE(AMP_DQ, ch) & DLLVAL_BITS); for (byte = 0; byte < DQ_NUM_BYTES; byte++) { amp_set_wrdq_sdll(ch, byte, (-1 * mdllcode[ch][AMP_DQ])); // initialize the mask for each byte lane mask_b[byte] = 0xFF << (byte * 8); } do { // NOTE: When DQS are pushed out then - cawrlvl sdll needs to be pushed out as well with equal taps // can use dlysel (with phyupdt and forceckelow) amp_set_cawrlvl_sdll(ch, cawrlvlcode + push_dqs_out, true); // Keep pushing per bit DQS (controlled by DM regs, oddly) out until all bytes start to fail for (byte = 0; byte < DQ_NUM_BYTES; byte++) rAMP_WRDMDESKEW_CTRL(ch, byte) = push_dqs_out; // Perform the WrDq calibration with PRBS patterns wrdqcalresult_cumulative &= wr_rd_pattern_result(ch, rnk, push_dqs_out); all_bits_fail = 1; for (byte = 0; byte < DQ_NUM_BYTES; byte++) { if (bits_fail_b[byte] == 0) { // if all bits fail for this byte for the 1st time, we've found the right failing point if ((wrdqcalresult_cumulative & mask_b[byte]) == 0) { bits_fail_b[byte] = 1; start_b[byte] = push_dqs_out; } } all_bits_fail &= bits_fail_b[byte]; } // if all bits in all bytes fail, find the right passing point if (all_bits_fail == 1) { find_wrdqcal_right_side_passing_point(ch, rnk, start_b); } else { // increase the deskew since all bits are not yet failing push_dqs_out += COARSE_STEP_SZ; } } while ((push_dqs_out <= DQ_MAX_DESKEW_PER_BIT) && (all_bits_fail == 0)); if ((push_dqs_out > DQ_MAX_DESKEW_PER_BIT) && (all_bits_fail == 0)) { // Right Failing Point cannot be found dprintf(DEBUG_INFO, "Memory Wrdq calibration: Max deskew reached, but right failing point not found for ch %d", ch); // Assume failure point is current reg setting for (byte = 0; byte < DQ_NUM_BYTES; byte++) { if (bits_fail_b[byte] == 0) start_b[byte] = push_dqs_out - COARSE_STEP_SZ; } // conitnue to passing point find_wrdqcal_right_side_passing_point(ch, rnk, start_b); } // Before quitting restore the cawrlvlsdll and per byte deskew back to original values. // can use dlysel (with phyupdt and forceckelow) amp_set_cawrlvl_sdll(ch, cawrlvlcode, true); for (byte = 0; byte < DQ_NUM_BYTES; byte++) rAMP_WRDMDESKEW_CTRL(ch, byte) = 0; } // Keep decreasing per byte deskew until right passing point is found static void find_wrdqcal_right_side_passing_point(uint32_t ch, uint32_t rnk, int32_t *start_b) { uint32_t chrnk_indx; bool switch_from_dqstodq, max_tap_value_reached; int32_t tap_value_b[DQ_NUM_BYTES]; uint32_t BitPass[DQ_TOTAL_BITS] = { 0 }; uint32_t SolidBitPass[DQ_TOTAL_BITS] = { 0 }; uint32_t wrdqcalresult; uint32_t all_bits_pass; uint32_t all_bits_pass_b[DQ_NUM_BYTES] = { 0 }; uint32_t step_incr; uint32_t bit_indx, byte; int32_t dqmdllcode, max_tap_value; int32_t saved_val, max_value; uint32_t cawrlvlcode = rAMP_CAWRLVLSDLLCODE(ch); chrnk_indx = (ch * AMC_NUM_RANKS) + rnk; all_bits_pass = 0; switch_from_dqstodq = false; max_tap_value_reached = false; step_incr = FINER_STEP_SZ; dqmdllcode = mdllcode[ch][AMP_DQ]; max_tap_value = dqmdllcode - DELIMIT_POS_ADJ_WRDQSDLL + DQ_MAX_DESKEW_PER_BIT; // initialize tap_values to max of all bytes' start values max_value = start_b[0]; for (byte = 1; byte < DQ_NUM_BYTES; byte++) max_value = (start_b[byte] > max_value) ? start_b[byte] : max_value; for (byte = 0; byte < DQ_NUM_BYTES; byte++) { tap_value_b[byte] = max_value; } // Any change in DM_DESKEW registers will require an equal change to cawrlvl sdll do { if (switch_from_dqstodq == false) { // cawrlvlcode is decremented with tap_value_b each time // can use dlysel (with phyupdt and forceckelow) amp_set_cawrlvl_sdll(ch, cawrlvlcode, true); // Keep pushing per bit DQS out until all bytes start to fail for (byte = 0; byte < DQ_NUM_BYTES; byte++) rAMP_WRDMDESKEW_CTRL(ch, byte) = tap_value_b[byte]; } else { // adjust wrdq sdll until all bits pass for each byte lane for (byte = 0; byte < DQ_NUM_BYTES; byte++) { if (all_bits_pass_b[byte] == 0) amp_set_wrdq_sdll(ch, byte, tap_value_b[byte]); } } // Send the PRBS patterns and read them back to see which bits are passing or failing wrdqcalresult = wr_rd_pattern_result(ch, rnk, tap_value_b[0] + tap_value_b[1] + tap_value_b[2] + tap_value_b[3]); // Make sure that each Bit sees a transition from 0 (fail) to 1 (pass) on wrdqcalresult for (bit_indx = 0; bit_indx < DQ_TOTAL_BITS; bit_indx++) { byte = bit_indx >> 3; // bit_indx / DQ_NUM_BITS_PER_BYTE // Check if this bit passed during the calibration (not necessarily for first time) if ((BitPass[bit_indx] == 0) && ((wrdqcalresult & (1 << bit_indx)) != 0)) { // Has this bit passed SOLID_PASS_DETECT number of times? Then consider it done if (SolidBitPass[bit_indx] == SOLID_PASS_DETECT) { BitPass[bit_indx] = 1; } else if (SolidBitPass[bit_indx] > 0) { SolidBitPass[bit_indx] = SolidBitPass[bit_indx] + 1; } else { // bit passed for the first time, record this value in the global array as the right edge SolidBitPass[bit_indx] = SolidBitPass[bit_indx] + 1; byte = bit_indx >> 3; // bit_indx / DQ_NUM_BITS_PER_BYTE if (switch_from_dqstodq == false) // consider mdllcode as '0' since sdll is set to -mdllcode saved_val = -1 * (tap_value_b[byte] + dqmdllcode); else saved_val = tap_value_b[byte]; wrdq_cal_per_chrnk_right[chrnk_indx][bit_indx] = saved_val; } } else { // bit failed calibration, reset the pass count to 0 SolidBitPass[bit_indx] = 0; } } all_bits_pass = 1; for (byte = 0; byte < DQ_NUM_BYTES; byte++) all_bits_pass_b[byte] = 1; for (bit_indx = 0; bit_indx < DQ_TOTAL_BITS; bit_indx++) { byte = bit_indx >> 3; // bit_indx / DQ_NUM_BITS_PER_BYTE // Did all the bits pass (SOLID_PASS_DETECT number of times) in this byte lane? // If anyone of the bits failed, then the byte flag is cleared all_bits_pass_b[byte] = all_bits_pass_b[byte] & BitPass[bit_indx]; // Did all bits in all byte lanes pass? all_bits_pass = all_bits_pass & BitPass[bit_indx]; } // If ALL bits are not passing - keep moving from Right to Left Side of window (by adding less negative adjustment to mdll) if (all_bits_pass == 0) { // Even if one of the byte lanes arrives early to tap_value = 0. Remain here until all byte lane catch up before proceeding to pushing out dq // check for all bytes reaching 0 on the tap value (could be deskew or sdll) uint32_t all_bytes_tap = tap_value_b[0]; for (byte = 1; (byte < DQ_NUM_BYTES) && (all_bytes_tap == 0); byte++) { all_bytes_tap += tap_value_b[byte]; } // if the tap_value for all bytes has reached 0 on the deskew, make the transition to SDLL if ((all_bytes_tap == 0) && (switch_from_dqstodq == false)) { switch_from_dqstodq = true; for (byte = 0; byte < DQ_NUM_BYTES; byte++) { tap_value_b[byte] = (rAMP_DQSDLLCTRL_WR(ch, byte) & DLLVAL_BITS); tap_value_b[byte] = OFFSET_TO_INT(tap_value_b[byte]); } } // To find right side passing point, add less negative adjustment to mdll (same as decrementing deskew) // For deskew taps, we just decrement by step_incr if we haven't reached 0 yet // Note: All deskew taps will reach 0 at the same time since their start values are equal, and they are decremented regardless of pass or fail if (switch_from_dqstodq == false) { // Also decrement cawrlvlsdllcode along with tap_value_b if (tap_value_b[0] > 0) cawrlvlcode -= step_incr; for (byte = 0; byte < DQ_NUM_BYTES; byte++) if (tap_value_b[byte] > 0) tap_value_b[byte] -= step_incr; } else { // For sdll taps, increment for (byte = 0; byte < DQ_NUM_BYTES; byte++) { if (all_bits_pass_b[byte] == 0) { tap_value_b[byte] += step_incr; } } } } // trigger for loop to end if any of the bytes reach max tap value for (byte = 0; byte < DQ_NUM_BYTES; byte++) { if (!max_tap_value_reached) max_tap_value_reached = (tap_value_b[byte] > max_tap_value); if (max_tap_value_reached) { if (all_bits_pass == 0) panic("Memory wrdq calibration: Unable to find right side passing point for channel %d, max tap value reached. start_b[] = {0x%x 0x%x 0x%x 0x%x}", ch, start_b[0], start_b[1], start_b[2], start_b[3]); } } } while ((!max_tap_value_reached) && (all_bits_pass == 0)); } // To find left failing point, keep adding less negative adjustment to mdll static void find_wrdqcal_left_side_failing_point(uint32_t ch, uint32_t rnk) { int32_t wrdqsdll[DQ_NUM_BYTES]; uint32_t wrdqcalresult; uint32_t all_bits_fail; uint32_t all_bits_fail_b[DQ_NUM_BYTES] = { 0 }; uint32_t step_incr; uint32_t mask_b[DQ_NUM_BYTES]; int32_t start_b[DQ_NUM_BYTES]; uint32_t byte; bool max_tap_value_reached = false; int32_t dqmdllcode, max_tap_value; dqmdllcode = mdllcode[ch][AMP_DQ]; max_tap_value = dqmdllcode - DELIMIT_POS_ADJ_WRDQSDLL + DQ_MAX_DESKEW_PER_BIT; all_bits_fail = 0; wrdqcalresult = 0xFFFFFFFF; step_incr = COARSE_STEP_SZ; for (byte = 0; byte < DQ_NUM_BYTES; byte++) { // initialize the mask for each byte lane mask_b[byte] = 0xFF << (byte * 8); // Get the starting values for WR DQS SDLL wrdqsdll[byte] = rAMP_DQSDLLCTRL_WR(ch, byte) & DLLVAL_BITS; wrdqsdll[byte] = OFFSET_TO_INT(wrdqsdll[byte]); // Add per-bit deskew to wrdqsdll[byte] if sdll reached mdll - DELIMIT_POS_ADJ_WRDQSDLL (otherwise, deskew should be 0) // At this point per-bit deskew should be the same for each bit in this byte. Use bit 0's deskew value wrdqsdll[byte] += rAMP_WRDQDESKEW_CTRL(ch, byte, 0); } // Start with sdll value for which right passing point was found, then increase (less negative) until all bits fail do { for (byte = 0; byte < DQ_NUM_BYTES; byte++) { // set the new sdll for this byte lane if all bits are not yet failing if (all_bits_fail_b[byte] == 0) amp_set_wrdq_sdll(ch, byte, wrdqsdll[byte]); } // Send the PRBS patterns and read them back to see which bits are passing or failing wrdqcalresult &= wr_rd_pattern_result(ch, rnk, wrdqsdll[0] + wrdqsdll[1] + wrdqsdll[2] + wrdqsdll[3]); // If the result of all bits in this byte show a fail, record this as the failing point all_bits_fail = 1; for (byte = 0; byte < DQ_NUM_BYTES; byte++) { if ((all_bits_fail_b[byte] == 0) && ((wrdqcalresult & mask_b[byte]) == 0)) { all_bits_fail_b[byte] = 1; start_b[byte] = wrdqsdll[byte]; } all_bits_fail &= all_bits_fail_b[byte]; } if (all_bits_fail == 1) { find_wrdqcal_left_side_passing_point (ch, rnk, start_b); } else { // if the byte has not yet failed, find the next sdll value to be set for (byte = 0; byte < DQ_NUM_BYTES; byte++) { if (all_bits_fail_b[byte] == 0) { wrdqsdll[byte] += step_incr; } } } for (byte = 0; byte < DQ_NUM_BYTES; byte++) { // none of the previous bytes reached max_tap_value, then update the boolean if (!max_tap_value_reached) { max_tap_value_reached = (wrdqsdll[byte] > max_tap_value); if (max_tap_value_reached) { dprintf(DEBUG_INFO, "Memory wrdq calibration: Unable to find left failing point, max tap value reached for ch %d byte %d", ch, byte); break; } } } if (max_tap_value_reached) { // Continue to passing point if any of the bytes reaches max value and not all bits are failing if (all_bits_fail == 0) { for (byte = 0; byte < DQ_NUM_BYTES; byte++) { if (all_bits_fail_b[byte] == 0) start_b[byte] = max_tap_value; } find_wrdqcal_left_side_passing_point(ch, rnk, start_b); } } } while ((!max_tap_value_reached) && (all_bits_fail == 0)); } static void find_wrdqcal_left_side_passing_point(uint32_t ch, uint32_t rnk, int32_t *start_b) { uint32_t chrnk_indx; bool max_tap_value_reached = false; int32_t tap_value_b[DQ_NUM_BYTES]; uint32_t BitPass[DQ_TOTAL_BITS] = { 0 }; uint32_t SolidBitPass[DQ_TOTAL_BITS] = { 0 }; uint32_t wrdqcalresult; uint32_t all_bits_pass; uint32_t all_bits_pass_b[DQ_NUM_BYTES] = { 0 }; uint32_t step_incr; uint32_t bit_indx, byte; int32_t dqmdllcode, max_tap_value; dqmdllcode = mdllcode[ch][AMP_DQ]; max_tap_value = -1 * dqmdllcode; chrnk_indx = (ch * AMC_NUM_RANKS) + rnk; all_bits_pass = 0; wrdqcalresult = 0xFFFFFFFF; step_incr = FINER_STEP_SZ; for (byte = 0; byte < DQ_NUM_BYTES; byte++) { tap_value_b[byte] = start_b[byte]; } // Finding Left side failing point on per bit level. Moving Left to Right (keep adding more negative adj to mdll) to find point where it turns from FAIL TO PASS do { // adjust wrdq sdll until all bits pass for each byte lane for (byte = 0; byte < DQ_NUM_BYTES; byte++) { if (all_bits_pass_b[byte] == 0) amp_set_wrdq_sdll(ch, byte, tap_value_b[byte]); } // Send the PRBS patterns and read them back to see which bits are passing or failing wrdqcalresult = wr_rd_pattern_result(ch, rnk, tap_value_b[0] + tap_value_b[1] + tap_value_b[2] + tap_value_b[3]); // Make sure that each Bit sees a transition from 0 (fail) to 1 (pass) on wrdqcalresult for (bit_indx = 0; bit_indx < DQ_TOTAL_BITS; bit_indx++) { // Check if this bit passed during the calibration (not necessarily for first time) if ((BitPass[bit_indx] == 0) && ((wrdqcalresult & (1 << bit_indx)) != 0)) { // Has this bit passed SOLID_PASS_DETECT number of times? Then consider it done if (SolidBitPass[bit_indx] == SOLID_PASS_DETECT) { BitPass[bit_indx] = 1; } else if (SolidBitPass[bit_indx] > 0) { SolidBitPass[bit_indx] = SolidBitPass[bit_indx] + 1; } else { // bit passed for the first time, record this value in the global array as the right edge SolidBitPass[bit_indx] = SolidBitPass[bit_indx] + 1; byte = bit_indx >> 3; // bit_indx / DQ_NUM_BITS_PER_BYTE wrdq_cal_per_chrnk_left[chrnk_indx][bit_indx] = tap_value_b[byte]; } } else { // bit failed calibration, reset the pass count to 0 SolidBitPass[bit_indx] = 0; } } all_bits_pass = 1; for (byte = 0; byte < DQ_NUM_BYTES; byte++) all_bits_pass_b[byte] = 1; for (bit_indx = 0; bit_indx < DQ_TOTAL_BITS; bit_indx++) { byte = bit_indx >> 3; // bit_indx / DQ_NUM_BITS_PER_BYTE // Did all the bits pass (SOLID_PASS_DETECT number of times) in this byte lane? // If anyone of the bits failed, then the byte flag is cleared all_bits_pass_b[byte] = all_bits_pass_b[byte] & BitPass[bit_indx]; // Did all bits in all byte lanes pass? all_bits_pass = all_bits_pass & BitPass[bit_indx]; } // If ALL bits are not passing - keep moving from Left to Right Side of window (by adding more negative adjustment to mdll) if (all_bits_pass == 0) { // For sdll taps, increment for neg tap_val, decrement for positive for (byte = 0; byte < DQ_NUM_BYTES; byte++) { if (all_bits_pass_b[byte] == 0) { tap_value_b[byte] -= step_incr; } } } // check for end of loop condition for (byte = 0; byte < DQ_NUM_BYTES; byte++) { if (!max_tap_value_reached) max_tap_value_reached = (tap_value_b[byte] < max_tap_value); if (max_tap_value_reached) { if (all_bits_pass_b[byte] == 0) panic("Memory wrdq calibration: Unable to find left passing point, max tap value reached. start_b[] = {0x%x, 0x%x, 0x%x, 0x%x}", start_b[0], start_b[1], start_b[2], start_b[3]); break; } } } while ((!max_tap_value_reached) && (all_bits_pass == 0)); } static void wrdq_program_final_values(void) { uint32_t ch, bit_indx, byte; uint32_t chrnk0_indx, chrnk1_indx; int32_t dqmdllcode; int32_t wrdq_bit_center[DQ_TOTAL_BITS]; int32_t wrdq_bit_deskew[DQ_TOTAL_BITS]; int32_t left_pos_val; int32_t right_pos_val; int32_t min_wrdq_center[DQ_NUM_BYTES]; int32_t min_dq_deskew_code, max_dq_deskew_code; for (ch = 0; ch < AMC_NUM_CHANNELS; ch++) { dqmdllcode = mdllcode[ch][AMP_DQ]; // find the center point of passing window for each bit over all ranks for (bit_indx = 0; bit_indx < DQ_TOTAL_BITS; bit_indx++) { chrnk0_indx = (ch * AMC_NUM_RANKS) + 0; left_pos_val = wrdq_cal_per_chrnk_left[chrnk0_indx][bit_indx]; right_pos_val = wrdq_cal_per_chrnk_right[chrnk0_indx][bit_indx]; if (AMC_NUM_RANKS > 1) { chrnk1_indx = (ch * AMC_NUM_RANKS) + 1; // find the endpoint that covers both ranks left_pos_val = find_common_endpoint(wrdq_cal_per_chrnk_left[chrnk0_indx][bit_indx], wrdq_cal_per_chrnk_left[chrnk1_indx][bit_indx], MIN_ENDPT); right_pos_val = find_common_endpoint(wrdq_cal_per_chrnk_right[chrnk0_indx][bit_indx], wrdq_cal_per_chrnk_right[chrnk1_indx][bit_indx], MAX_ENDPT); } // find center of the eye for this bit wrdq_bit_center[bit_indx] = find_center_of_eye(left_pos_val, right_pos_val); } // , Need additional shift to DQ offset int8_t signed_byte_center_point[DQ_TOTAL_BITS]; // convert to signed bytes first as required by the shift function for (bit_indx = 0; bit_indx < DQ_TOTAL_BITS; bit_indx++) signed_byte_center_point[bit_indx] = (int8_t) wrdq_bit_center[bit_indx]; // call platform specific amc routine to apply apropriate shifts depending on DRAM vendor amc_dram_shift_dq_offset(signed_byte_center_point, DQ_TOTAL_BITS); // convert shifted signed bytes back to offset format for (bit_indx = 0; bit_indx < DQ_TOTAL_BITS; bit_indx++) wrdq_bit_center[bit_indx] = (int32_t) signed_byte_center_point[bit_indx]; // initialize the min centerpoint to the 1st bit's center point in each byte lane for (byte = 0; byte < DQ_NUM_BYTES; byte++) min_wrdq_center[byte] = wrdq_bit_center[byte * DQ_NUM_BITS_PER_BYTE]; // Find the min CenterPoint per byte lane given each bit's center point for (bit_indx=0; bit_indx < DQ_TOTAL_BITS; bit_indx++) { byte = bit_indx >> 3; // bit_indx / DQ_NUM_BITS_PER_BYTE // if this bit's center point is less than current min, make it the new min // if this bit's center point is less than current min, make it the new min if (wrdq_bit_center[bit_indx] < min_wrdq_center[byte]) min_wrdq_center[byte] = wrdq_bit_center[bit_indx]; } // for positive value, clamp it to mdllcode - DELIMIT_POS_ADJ_WRDQSDLL for (byte = 0; byte < DQ_NUM_BYTES; byte++) { if (min_wrdq_center[byte] > (dqmdllcode - DELIMIT_POS_ADJ_WRDQSDLL)) min_wrdq_center[byte] = (dqmdllcode - DELIMIT_POS_ADJ_WRDQSDLL); } // Compute the individual deskew values: any bits with center point > min for its byte lane will require deskew // Each bit's center is guaranteed to be >= min for its byte lane // Deskewing means adding more positive adjustment for this bit in addition to the sdll, which is clamped on the negative side to -dqmdllcode // and clamped on the positive side to (mdllcode - DELIMIT_POS_ADJ_WRDQSDLL) for (bit_indx = 0; bit_indx < DQ_TOTAL_BITS; bit_indx++) { byte = bit_indx >> 3; // bit_indx / DQ_NUM_BITS_PER_BYTE if (wrdq_bit_center[bit_indx] < min_wrdq_center[byte]) panic("Memory Wrdq Calibration: wrdq_bit_center[%d] = (%d) < min_wrdq_center[%d] = %d\n", bit_indx, wrdq_bit_center[bit_indx], byte, min_wrdq_center[byte]); wrdq_bit_deskew[bit_indx] = wrdq_bit_center[bit_indx] - min_wrdq_center[byte]; // Make sure deskew value programmed is not negative and is <= DQ_MAX_DESKEW_PER_BIT if ((wrdq_bit_deskew[bit_indx] < 0) || (wrdq_bit_deskew[bit_indx] > DQ_MAX_DESKEW_PER_BIT)) panic("Memory Wrdq Calibration: wrdq_bit_deskew[%d] = %d invalid\n", bit_indx, wrdq_bit_deskew[bit_indx]); } // if the min for each byte lane is < -dqmdllcode, we'll need to adjust/clamp it to -dqmdllcode for (byte = 0; byte < DQ_NUM_BYTES; byte++) { if (min_wrdq_center[byte] < (-1 * dqmdllcode)) { int32_t dqs_deskew = (-1 * dqmdllcode) - min_wrdq_center[byte]; // put the remainder on DQS rAMP_WRDMDESKEW_CTRL(ch, byte) = dqs_deskew; min_wrdq_center[byte] = (-1 * dqmdllcode); } } // Program the SDLL and deskew per bit for each byte lane for (byte = 0; byte < DQ_NUM_BYTES; byte++) { amp_set_wrdq_sdll(ch, byte, min_wrdq_center[byte]); // init the min and max deskew values for each byte to the 1st bit in the byte min_dq_deskew_code = wrdq_bit_deskew[(byte * DQ_NUM_BITS_PER_BYTE)]; max_dq_deskew_code = wrdq_bit_deskew[(byte * DQ_NUM_BITS_PER_BYTE)]; // per bit deskew for this byte lane for (bit_indx = 0; bit_indx < DQ_NUM_BITS_PER_BYTE; bit_indx++) { rAMP_WRDQDESKEW_CTRL(ch, byte, bit_indx) = wrdq_bit_deskew[(byte * DQ_NUM_BITS_PER_BYTE) + bit_indx]; // is this bit the new min or max? if (wrdq_bit_deskew[(byte * DQ_NUM_BITS_PER_BYTE) + bit_indx] < min_dq_deskew_code) min_dq_deskew_code = wrdq_bit_deskew[(byte * DQ_NUM_BITS_PER_BYTE) + bit_indx]; else if (wrdq_bit_deskew[(byte * DQ_NUM_BITS_PER_BYTE) + bit_indx] > max_dq_deskew_code) max_dq_deskew_code = wrdq_bit_deskew[(byte * DQ_NUM_BITS_PER_BYTE) + bit_indx]; } // find midpoint of deskew registers for this byte, and program it to DM (controlled by DQS regs, oddly) rAMP_WRDQSDESKEW_CTRL(ch, byte) = (min_dq_deskew_code + max_dq_deskew_code) >> 1; } } // for (ch = 0; ch < AMC_NUM_CHANNELS; ch++) } // This function writes PRBS7 patterns to dram for given channel and rank, // and reads them back. Read back values are compared with data that was written static uint32_t wr_rd_pattern_result(uint32_t ch, uint32_t rnk, uint32_t sdll_value) { uint32_t chrnk_indx, result, result_per_wr_and_rdbk; uint32_t pattern_indx, pattern, readback_data; uint32_t col, word; uint64_t mem_region, mem_addr; uint32_t all_bits = 0xFFFFFFFF; uint32_t consecutive_cols_per_chrnk = (DQ_CONSECUTIVE_BYTES_PER_CHRNK / DQ_BYTES_PER_COL); result = all_bits; result_per_wr_and_rdbk = all_bits; chrnk_indx = (ch * AMC_NUM_RANKS) + rnk; pattern_indx = sdll_value & DLLVAL_BITS; // sdll tap indexes into pattern array // need APB read inserted in function wr_rd_pattern_result rAMP_RDFIFOPTRSTS(ch); // write the patterns to memory 4 bytes at a time // interleaving applies every DQ_CONSECUTIVE_BYTES_PER_CHRNK bytes, so recompute the address at that point // Note that bank and row are fixed for (col = 0; col < DQ_NUM_PATTERNS; col += consecutive_cols_per_chrnk) { mem_region = amc_get_uncached_dram_virt_addr(ch, rnk, DQ_BANK, DQ_ROW, col); // next 16 words (or columns) are consecutively stored in a [channel,rank] combo for (word = 0; word < consecutive_cols_per_chrnk; word++) { mem_addr = mem_region + (uint64_t)((word << 2)); // last pattern in array is dummy value, so skip it pattern = DQ_PRBS7_PATTERNS[pattern_indx % (DQ_NUM_PATTERNS - 1)]; pattern_indx++; // write the pattern *(uint32_t *)mem_addr = pattern; } } pattern_indx = sdll_value & DLLVAL_BITS; // Now, read back the patterns (have to do it in a separate loop than the writes to get more robust calib values) for (col = 0; col < DQ_NUM_PATTERNS; col += consecutive_cols_per_chrnk) { mem_region = amc_get_uncached_dram_virt_addr(ch, rnk, DQ_BANK, DQ_ROW, col); // next 16 words (or columns) are consecutively stored in a [channel,rank] combo for (word = 0; word < consecutive_cols_per_chrnk; word++) { mem_addr = mem_region + (uint64_t)((word << 2)); // last pattern in array is dummy value, so skip it pattern = DQ_PRBS7_PATTERNS[pattern_indx % (DQ_NUM_PATTERNS - 1)]; pattern_indx++; // read the pattern readback_data = *(uint32_t *)mem_addr; // records if read back value was different than written value by // clearing bits that are different in the final result result_per_wr_and_rdbk &= ~(readback_data ^ pattern); } // result variable accumulates the results of all pattern matching results for a given sdll_value result &= result_per_wr_and_rdbk; } // failing bits are clear, passing bits are set return result; } // Save or restore ca and wrlvl registers for resume boot // Registers must be stored/retrieved in exactly the order below static void save_restore_ca_wrlvl_regs(uint32_t save_or_restore) { uint32_t ch, bit_indx, byte; uint32_t byte_pos = 0; if (save_or_restore == CALIB_SAVE) { for (ch = 0; ch < AMC_NUM_CHANNELS; ch++) { // save the CA registers for this channel uint8_t casdll = (uint8_t) (rAMP_CASDLLCTRL(ch) & DLLVAL_BITS); for (bit_indx = 0; bit_indx < CA_NUM_BITS; bit_indx++) { uint8_t ca_deskew = (uint8_t) (rAMP_CADESKEW_CTRL(ch, bit_indx) & DESKEW_CTRL_BITS); uint8_t ca_offset = INT_TO_OFFSET(OFFSET_TO_INT(casdll) + OFFSET_TO_INT(ca_deskew)); cal_pmu_bits[byte_pos++] = ca_offset; } // CK, CS, and CKE share the same value uint8_t ck_deskew = (uint8_t) (rAMP_CKDESKEW_CTRL(ch) & DESKEW_CTRL_BITS); cal_pmu_bits[byte_pos++] = ck_deskew; // save the WrLvl registers for this channel (4 DQ SDLLs and 1 CA SDLL) for (byte = 0; byte < (DQ_NUM_BYTES + 1); byte++) { // cawrlvlsdll is stored as the "5th" byte if (byte == DQ_NUM_BYTES) cal_pmu_bits[byte_pos++] = (uint8_t) (rAMP_CAWRLVLSDLLCODE(ch) & DLLVAL_BITS); else cal_pmu_bits[byte_pos++] = (uint8_t) (rAMP_DQWRLVLSDLLCODE(ch, byte) & DLLVAL_BITS); } } #if !SUB_PLATFORM_S7002 // Save the cal_pmu_bits array to PMU nvram if (power_store_memory_calibration((void *) cal_pmu_bits, CALIB_NUM_BYTES_TO_SAVE) == 0) { #if AMP_NO_PMU_PANIC printf("Unable to save memory calibration values to PMU nvram\n"); #else panic("Unable to save memory calibration values to PMU nvram\n"); #endif } #endif // #if !SUB_PLATFORM_S7002 } else { #if !SUB_PLATFORM_S7002 // Retrieve cal_pmu_bits array from PMU nvram if (power_load_memory_calibration((void *) cal_pmu_bits, CALIB_NUM_BYTES_TO_SAVE) == 0) { #if AMP_NO_PMU_PANIC printf("Unable to load memory calibration values from PMU nvram\n"); #else panic("Unable to load memory calibration values from PMU nvram\n"); #endif } #endif // #if !SUB_PLATFORM_S7002 save_masterdll_values(); for (ch = 0; ch < AMC_NUM_CHANNELS; ch++) { int8_t casdll = OFFSET_TO_INT(cal_pmu_bits[byte_pos]); int32_t ca_offset[CA_NUM_BITS]; for (bit_indx = 0; bit_indx < CA_NUM_BITS; bit_indx++) { ca_offset[bit_indx] = OFFSET_TO_INT(cal_pmu_bits[byte_pos]); byte_pos++; if (ca_offset[bit_indx] < casdll) casdll = ca_offset[bit_indx]; } // get the value pushed on CK, CS, CKE signals, add it to casdll casdll -= OFFSET_TO_INT(cal_pmu_bits[byte_pos]); byte_pos++; uint8_t caclk = 0; // check if sdll < -camdllcode, then clamp to it. If sdll > camdllcode - DELIMIT_POS_ADJ_CASDLL, then also clamp it. if (casdll < (-1 * mdllcode[ch][AMP_CA])) { caclk = (uint8_t) ((-1 * casdll) - mdllcode[ch][AMP_CA]); casdll = (-1 * mdllcode[ch][AMP_CA]); } else if (casdll > (mdllcode[ch][AMP_CA] - DELIMIT_POS_ADJ_CASDLL)) { casdll = mdllcode[ch][AMP_CA] - DELIMIT_POS_ADJ_CASDLL; } // write the casdll register, and caclk into ctl signals amp_push_casdll_out(ch, casdll); if (caclk) amp_push_ctl_out(ch, caclk); // compute deskew and write to the per bit deskew registers for (bit_indx = 0; bit_indx < CA_NUM_BITS; bit_indx++) { uint8_t ca_deskew = (uint8_t) (ca_offset[bit_indx] - (casdll - caclk)); rAMP_CADESKEW_CTRL(ch, bit_indx) = ca_deskew;; } for (byte = 0; byte < (DQ_NUM_BYTES + 1); byte++) { uint8_t wrlvlsdll = cal_pmu_bits[byte_pos++]; // At this point, DRAM is not in WRLVL mode so can use dlysel with forceckelow if (byte == DQ_NUM_BYTES) amp_set_cawrlvl_sdll(ch, wrlvlsdll, true); else amp_set_dqwrlvl_sdll(ch, byte, wrlvlsdll, true); } } } } // Bit packing functions are not needed (enough space in PMU to be rid of this complexity) #if 0 // Inserts the data at given byte and bit position in the cal_pmu_bits array // Assumes that num_bits is always <= 8 static void cal_save_value(uint8_t data, uint32_t num_bits, uint32_t *bit_pos, uint32_t *byte_pos) { uint32_t space_in_this_byte; uint8_t mask; if (((*bit_pos) > 7) || ((*byte_pos) >= CALIB_PMU_BYTES)) panic("Error! bit position %d > 7 or byte position %d > capacity (%d)\n", *bit_pos, *byte_pos, CALIB_PMU_BYTES); // how many bits left in this byte? space_in_this_byte = 8 - (*bit_pos); // we'll grab as many bits from the data as there is space in this byte if (space_in_this_byte >= num_bits) mask = (1 << num_bits) - 1; else mask = (1 << space_in_this_byte) - 1; // Set the data value at given byte (only as many bits as space and making sure to preserve the other bits in this byte) cal_pmu_bits[*byte_pos] |= ((data & mask) << *bit_pos); if (space_in_this_byte < num_bits) { // any remainder bits get saved to the next byte cal_pmu_bits[(*byte_pos) + 1] = (data >> space_in_this_byte); (*byte_pos)++; *bit_pos = num_bits - space_in_this_byte; } else if (space_in_this_byte == num_bits) { (*byte_pos)++; *bit_pos = 0; } else { (*bit_pos) += num_bits; } } // Retrieve the data at given byte and bit position in the cal_pmu_bits array // Assumes that num_bits is always <= 8 static uint8_t cal_retrieve_value(uint32_t num_bits, uint32_t *bit_pos, uint32_t *byte_pos) { uint32_t space_in_this_byte; uint8_t mask, remainder_mask, ret_val; if (((*bit_pos) > 7) || ((*byte_pos) >= CALIB_PMU_BYTES)) panic("Error! bit position %d > 7 or byte position %d > capacity (%d)\n", *bit_pos, *byte_pos, CALIB_PMU_BYTES); // how many bits left in this byte? space_in_this_byte = 8 - (*bit_pos); // we'll grab as many bits from the array as there is space in this byte (max of num_bits) if (space_in_this_byte >= num_bits) mask = (1 << num_bits) - 1; else { mask = (1 << space_in_this_byte) - 1; remainder_mask = (1 << (num_bits - space_in_this_byte)) - 1; } // Get the data value at given byte (only as many bits as space) ret_val = (cal_pmu_bits[*byte_pos] >> *bit_pos) & mask; if (space_in_this_byte < num_bits) { // any remainder bits get loaded from the next byte ret_val |= (cal_pmu_bits[(*byte_pos) + 1] & remainder_mask) << space_in_this_byte; (*byte_pos)++; *bit_pos = num_bits - space_in_this_byte; } else if (space_in_this_byte == num_bits) { (*byte_pos)++; *bit_pos = 0; } else { (*bit_pos) += num_bits; } return ret_val; } #endif // Before starting dq calibration, saves the contents of dram region that will be written to with calibration patterns. // After calibration is complete, restores the contents back to DRAM. static void save_restore_memory_region(bool dqcal_start) { uint32_t rnk; volatile uintptr_t mem_addr, src, dest; mem_addr = SDRAM_BASE_UNCACHED; for (rnk = 0; rnk < AMC_NUM_RANKS; rnk++) { mem_addr |= (rnk << DQ_ADDR_RANK_BIT); if (dqcal_start) { dest = (uintptr_t) &(dqcal_saved_data[rnk][0]); src = mem_addr; } else { dest = mem_addr; src = (uintptr_t) &(dqcal_saved_data[rnk][0]); } // we'll be writing (or have written) the patterns for each channel memcpy((void *) dest, (void *) src, sizeof(DQ_PRBS7_PATTERNS) * AMC_NUM_CHANNELS); } } #if 0 // Given an input where bit SIGN_BIT_POS represents the sign, and the rest is magnitude // separate out the sign and magnitude and return those values to the caller static void get_offset_sign_magnitude(uint32_t offset, uint32_t *neg_bit_set, uint32_t *tap_val) { *neg_bit_set = (offset & (1 << SIGN_BIT_POS)) >> SIGN_BIT_POS; *tap_val = offset - (*neg_bit_set << SIGN_BIT_POS); } #endif static int32_t find_center_of_eye(int32_t left_pos_val, int32_t right_pos_val) { if (left_pos_val < right_pos_val) panic("Memory calibration: find_center_of_eye: Left value (0x%x) is < right value (0x%x)", left_pos_val, right_pos_val); // center of 2 signed integers is simply their average return ((left_pos_val + right_pos_val) / 2); } // Select the value that would include the other value in the eye static int32_t find_common_endpoint(int32_t val0, int32_t val1, uint32_t min_or_max) { int32_t retVal = val0; // For the right endpoint, select the rightmost value on the number line (max value) if (min_or_max == MAX_ENDPT) { retVal = (val0 > val1) ? val0 : val1; } // For the left endpoint, select the leftmost value (min value) else { retVal = (val0 < val1) ? val0 : val1; } return retVal; }