3191 lines
114 KiB
C
3191 lines
114 KiB
C
|
/*
|
||
|
* Copyright (C) 2011-2014 Apple Inc. All rights reserved.
|
||
|
*
|
||
|
* This document is the property of Apple Inc.
|
||
|
* It is considered confidential and proprietary.
|
||
|
*
|
||
|
* This document may not be reproduced or transmitted in any form,
|
||
|
* in whole or in part, without the express written permission of
|
||
|
* Apple Inc.
|
||
|
*/
|
||
|
|
||
|
#include <debug.h>
|
||
|
#include <sys/menu.h>
|
||
|
#include <drivers/amc/amc.h>
|
||
|
#include <drivers/amc/amc_phy.h>
|
||
|
#include <drivers/amc/amc_regs.h>
|
||
|
#include <drivers/amp/amp_v2.h>
|
||
|
#include <drivers/amp/amp_v2_calibration.h>
|
||
|
#include <drivers/dram.h>
|
||
|
#include <drivers/miu.h>
|
||
|
#include <drivers/power.h>
|
||
|
#include <platform.h>
|
||
|
#include <sys.h>
|
||
|
|
||
|
// PRBS7 patterns used for ca calibration. amc_phy_params.cacalib_sw_loops * amc_phy_params.cacalib_hw_loops must equal 64
|
||
|
// We extend the size by 8 and repeat the first 8 values because when swloop=64, we don't go outside the array for programming CACALPAT1-7
|
||
|
static const uint32_t CA_PRBS7_PATTERNS[CA_NUM_PATTERNS + 8] = {
|
||
|
0x2550B, 0xCF135, 0xC4342, 0x67BFF, 0x825A0, 0x1487E, 0x984EF, 0xEA43E,
|
||
|
0x0B277, 0xA388D, 0xE5E5F, 0x96DDe, 0x8CC91, 0x720D1, 0xE1649, 0xA8ACA,
|
||
|
0x466E2, 0x73381, 0x1A14F, 0xFEC40, 0x93698, 0x49C83, 0xEEC28, 0x35563,
|
||
|
0x692CE, 0xE4D0F, 0x6DAD8, 0xDAA1B, 0xA70AB, 0xDB94B, 0x5C7AD, 0x8DFC1,
|
||
|
0x897D7, 0xB70C3, 0x7DAB0, 0x7C9E0, 0x87EE6, 0xD186C, 0x04816, 0x3E714,
|
||
|
0xCAA73, 0x01350, 0xFB706, 0x5668A, 0xD507A, 0x3AF02, 0xF4D67, 0xCB923,
|
||
|
0xFA456, 0xAD18C, 0x836F0, 0xEEF78, 0xCE265, 0x3F444, 0x31D75, 0x575DA,
|
||
|
0x2E77C, 0x6C988, 0x21D1D, 0xF1621, 0x0E931, 0x668AF, 0x792A6, 0x42EF4,
|
||
|
0x2550B, 0xCF135, 0xC4342, 0x67BFF, 0x825A0, 0x1487E, 0x984EF, 0xEA43E,
|
||
|
};
|
||
|
|
||
|
|
||
|
// PRBS patterns for Wrdq and Rddq (for the one after Wrdq) calibration
|
||
|
static const uint32_t DQ_PRBS7_PATTERNS[DQ_NUM_PATTERNS] = {
|
||
|
0x85858585, 0x4a4a4a4a, 0x9a9a9a9a, 0x9e9e9e9e, 0xa1a1a1a1, 0x88888888, 0xffffffff, 0xcfcfcfcf,
|
||
|
0xd0d0d0d0, 0x04040404, 0x3f3f3f3f, 0x29292929, 0x77777777, 0x30303030, 0x1f1f1f1f, 0xd4d4d4d4,
|
||
|
0x3b3b3b3b, 0x16161616, 0x5e5e5e5e, 0x47474747, 0x2f2f2f2f, 0xcbcbcbcb, 0xefefefef, 0x2d2d2d2d,
|
||
|
0x48484848, 0x19191919, 0x68686868, 0xe4e4e4e4, 0x24242424, 0xc2c2c2c2, 0x65656565, 0x51515151,
|
||
|
0x71717171, 0x8c8c8c8c, 0xc0c0c0c0, 0xe6e6e6e6, 0xa7a7a7a7, 0x34343434, 0x20202020, 0xfdfdfdfd,
|
||
|
0x4c4c4c4c, 0x26262626, 0x41414141, 0x93939393, 0x14141414, 0xdddddddd, 0xb1b1b1b1, 0x6a6a6a6a,
|
||
|
0x67676767, 0xd2d2d2d2, 0x87878787, 0xc9c9c9c9, 0x6c6c6c6c, 0xdbdbdbdb, 0x0d0d0d0d, 0xb5b5b5b5,
|
||
|
0x55555555, 0x4e4e4e4e, 0xa5a5a5a5, 0xb7b7b7b7, 0xd6d6d6d6, 0xb8b8b8b8, 0xe0e0e0e0, 0x1b1b1b1b,
|
||
|
0xebebebeb, 0x12121212, 0x61616161, 0x6e6e6e6e, 0x58585858, 0xfbfbfbfb, 0xf0f0f0f0, 0xf9f9f9f9,
|
||
|
0x73737373, 0x0f0f0f0f, 0x36363636, 0xa3a3a3a3, 0x0b0b0b0b, 0x09090909, 0x8a8a8a8a, 0x7c7c7c7c,
|
||
|
0x39393939, 0x95959595, 0xa8a8a8a8, 0x02020202, 0x83838383, 0xf6f6f6f6, 0x45454545, 0xacacacac,
|
||
|
0x3d3d3d3d, 0xaaaaaaaa, 0x81818181, 0x75757575, 0xb3b3b3b3, 0xe9e9e9e9, 0x91919191, 0x97979797,
|
||
|
0x2b2b2b2b, 0xf4f4f4f4, 0xc6c6c6c6, 0x5a5a5a5a, 0x78787878, 0x06060606, 0xbcbcbcbc, 0xdfdfdfdf,
|
||
|
0x32323232, 0x9c9c9c9c, 0x22222222, 0x7e7e7e7e, 0xbabababa, 0x63636363, 0xedededed, 0xaeaeaeae,
|
||
|
0xbebebebe, 0x5c5c5c5c, 0xc4c4c4c4, 0xd9d9d9d9, 0x8e8e8e8e, 0x43434343, 0x10101010, 0xe2e2e2e2,
|
||
|
0x98989898, 0x1d1d1d1d, 0x57575757, 0xcdcdcdcd, 0x53535353, 0xf2f2f2f2, 0x7a7a7a7a, 0x85858585
|
||
|
};
|
||
|
|
||
|
|
||
|
static uint32_t ca_patterns_mask[CA_NUM_PATTERNS];
|
||
|
static int32_t mdllcode[AMC_NUM_CHANNELS][2]; // 1 for AMP_DQ and 1 for AMP_CA
|
||
|
|
||
|
// Used to save calibration values for each bit per channel and rank for every iteration
|
||
|
static int32_t ca_cal_per_loopchrnk_right[CA_MAX_LOOP_CHN_RNK][CA_NUM_BITS];
|
||
|
static int32_t ca_cal_per_loopchrnk_left[CA_MAX_LOOP_CHN_RNK][CA_NUM_BITS];
|
||
|
|
||
|
// ca data aggregated over all iterations
|
||
|
static int32_t ca_cal_per_chrnk_right[AMC_NUM_CHANNELS * AMC_NUM_RANKS][CA_NUM_BITS];
|
||
|
static int32_t ca_cal_per_chrnk_left[AMC_NUM_CHANNELS * AMC_NUM_RANKS][CA_NUM_BITS];
|
||
|
|
||
|
// rddq data aggregated over all iterations
|
||
|
static int32_t rddq_cal_per_chrnk_right[AMC_NUM_CHANNELS * AMC_NUM_RANKS][DQ_TOTAL_BITS];
|
||
|
static int32_t rddq_cal_per_chrnk_left[AMC_NUM_CHANNELS * AMC_NUM_RANKS][DQ_TOTAL_BITS];
|
||
|
|
||
|
// wrdq data aggregated over all iterations
|
||
|
static int32_t wrdq_cal_per_chrnk_right[AMC_NUM_CHANNELS * AMC_NUM_RANKS][DQ_TOTAL_BITS];
|
||
|
static int32_t wrdq_cal_per_chrnk_left[AMC_NUM_CHANNELS * AMC_NUM_RANKS][DQ_TOTAL_BITS];
|
||
|
|
||
|
// wrlvl data aggregated over all iterations, we save value for 4 byte lanes + the ca value
|
||
|
static int32_t wrlvl_cal_per_chrnk_rise[AMC_NUM_CHANNELS * AMC_NUM_RANKS][DQ_NUM_BYTES + 1];
|
||
|
static int32_t wrlvl_cal_per_chrnk_fall[AMC_NUM_CHANNELS * AMC_NUM_RANKS][DQ_NUM_BYTES + 1];
|
||
|
|
||
|
// This array will hold the contents of memory that will be used for dq calibration
|
||
|
static uint8_t dqcal_saved_data[AMC_NUM_RANKS][sizeof(DQ_PRBS7_PATTERNS) * AMC_NUM_CHANNELS]__aligned(32);
|
||
|
|
||
|
// This array will hold the calibration values to be saved to the PMU for resume boot
|
||
|
static uint8_t cal_pmu_bits[CALIB_PMU_BYTES] = { 0 };
|
||
|
|
||
|
// Static local function declarations
|
||
|
static void calibrate_ca(void);
|
||
|
static void calibrate_rddq(bool after_wrddqcal);
|
||
|
static void calibrate_wrlvl(void);
|
||
|
static void calibrate_wrdq(void);
|
||
|
static void save_masterdll_values(void);
|
||
|
static void generate_ca_patterns_mask(void);
|
||
|
static void amp_program_ca_patterns(uint32_t ch, uint32_t rnk, uint32_t swloop);
|
||
|
static void amp_init_ca_offset_and_deskew(uint32_t ch);
|
||
|
static uint32_t amp_mask_ca_bits(uint32_t ch, uint32_t mr_cmd);
|
||
|
static void amp_push_casdll_out(uint32_t ch, int32_t offset);
|
||
|
static void amp_enable_cacal_mode(bool enable, uint32_t ch);
|
||
|
static void amp_run_cacal(uint32_t ch);
|
||
|
static void amp_push_ctl_out(uint32_t ch, uint32_t dly_val);
|
||
|
static void amp_setup_rddq_cal(uint32_t ch, uint32_t rnk);
|
||
|
static void amp_set_rddq_sdll(uint32_t ch, uint32_t byte, uint32_t offset);
|
||
|
static void amp_run_rddqcal(uint32_t ch);
|
||
|
static void amp_wrlvl_init(void);
|
||
|
static void amp_phy_update(uint32_t ch, uint32_t update);
|
||
|
static void amp_set_cawrlvl_sdll(uint32_t ch, uint32_t offset, bool set_dly_sel);
|
||
|
static void amp_set_dqwrlvl_sdll(uint32_t ch, uint32_t byte, uint32_t offset, bool set_dly_sel);
|
||
|
static void amp_run_wrlvlcal(uint32_t ch, uint32_t wrlvlrun);
|
||
|
static void amp_set_wrdq_sdll(uint32_t ch, uint32_t byte, int32_t offset);
|
||
|
static void run_cacal_sequence(uint32_t ch, uint32_t rnk, uint32_t mr_cmd, uint32_t mask_bits, uint32_t swloop);
|
||
|
static void find_cacal_right_side_failing_point(uint32_t ch, uint32_t rnk, uint32_t mr_cmd, uint32_t combined_mask, uint32_t swloop);
|
||
|
static void find_cacal_right_side_passing_point(uint32_t ch, uint32_t rnk, uint32_t mr_cmd, uint32_t combined_mask, uint32_t swloop);
|
||
|
static void enter_cacal_mode(uint32_t ch, uint32_t rnk, uint32_t mr_cmd, bool enter);
|
||
|
static void find_cacal_left_side_failing_point(uint32_t ch, uint32_t rnk, uint32_t combined_mask, uint32_t swloop);
|
||
|
static void find_cacal_left_side_passing_point(uint32_t ch, uint32_t rnk, uint32_t combined_mask, uint32_t swloop);
|
||
|
static void ca_program_final_values(void);
|
||
|
static void find_rddqcal_right_side_failing_point(uint32_t ch, uint32_t rnk, bool after_wrddqcal);
|
||
|
static void find_rddqcal_right_side_passing_point(uint32_t ch, uint32_t rnk, int32_t *start_b, bool after_wrddqcal);
|
||
|
static void find_rddqcal_left_side_failing_point(uint32_t ch, uint32_t rnk, bool after_wrddqcal);
|
||
|
static void find_rddqcal_left_side_passing_point(uint32_t ch, uint32_t rnk, int32_t *start_b, bool after_wrddqcal);
|
||
|
static void rddq_program_final_values(bool after_wrddqcal);
|
||
|
static uint32_t wrlvl_encode_dlyval(uint32_t ch, uint32_t phy_type, uint32_t val);
|
||
|
static uint32_t wrlvl_encode_clk90dly(uint32_t ch, uint32_t val);
|
||
|
static void push_wrlvl_to_0s_region(uint32_t ch, uint32_t rnk);
|
||
|
static void find_wrlvl_0to1_transition(uint32_t ch, uint32_t rnk);
|
||
|
static void find_wrlvl_1to0_transition(uint32_t ch, uint32_t rnk);
|
||
|
static void wrlvl_program_final_values(void);
|
||
|
static void find_wrdqcal_right_side_failing_point(uint32_t ch, uint32_t rnk);
|
||
|
static void find_wrdqcal_right_side_passing_point(uint32_t ch, uint32_t rnk, int32_t *start_b);
|
||
|
static void find_wrdqcal_left_side_failing_point(uint32_t ch, uint32_t rnk);
|
||
|
static void find_wrdqcal_left_side_passing_point(uint32_t ch, uint32_t rnk, int32_t *start_b);
|
||
|
static void wrdq_program_final_values(void);
|
||
|
static uint32_t wr_rd_pattern_result(uint32_t ch, uint32_t rnk, uint32_t sdll_value);
|
||
|
static void save_restore_ca_wrlvl_regs(uint32_t save_or_restore);
|
||
|
static void save_restore_memory_region(bool dqcal_start);
|
||
|
static int32_t find_center_of_eye(int32_t left_pos_val, int32_t right_pos_val);
|
||
|
static int32_t find_common_endpoint(int32_t val0, int32_t val1, uint32_t min_or_max);
|
||
|
|
||
|
///////////////////////////////////////////////////////////////////////////////
|
||
|
////// Global functions
|
||
|
///////////////////////////////////////////////////////////////////////////////
|
||
|
|
||
|
#ifdef AMP_SWIZZLE
|
||
|
void amp_swizzle_init(void)
|
||
|
{
|
||
|
#if (AMP_SWIZZLE == AMP_SWIZZLE_PER_J34M) // per <rdar://15498696>
|
||
|
int ch;
|
||
|
|
||
|
for (ch = 0; ch < AMC_NUM_CHANNELS; ch++) {
|
||
|
// Set up the CA Byte Select Mapping for both DQ and CA
|
||
|
rAMP_CACALBYTESEL(AMP_DQ,ch) = 0x00000001;
|
||
|
rAMP_CACALBYTESEL(AMP_CA,ch) = 0x00000001;
|
||
|
|
||
|
// Set up the CA Bit Select Bit Mapping for both DQ and CA
|
||
|
rAMP_CACALBITSELMAP(AMP_DQ,ch,0) = 0x01234567;
|
||
|
rAMP_CACALBITSELMAP(AMP_DQ,ch,1) = 0xabcdef67;
|
||
|
rAMP_CACALBITSELMAP(AMP_DQ,ch,2) = 0x0000ef89;
|
||
|
rAMP_CACALBITSELMAP(AMP_CA,ch,0) = 0x01234567;
|
||
|
rAMP_CACALBITSELMAP(AMP_CA,ch,1) = 0xabcdef67;
|
||
|
rAMP_CACALBITSELMAP(AMP_CA,ch,2) = 0x0000ef89;
|
||
|
}
|
||
|
#endif // end of (AMP_SWIZZLE == AMP_SWIZZLE_PER_J34M)
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
void amc_phy_init(bool resume)
|
||
|
{
|
||
|
uint32_t ch, f;
|
||
|
#if !SUPPORT_FPGA
|
||
|
uint32_t rd, dq;
|
||
|
#endif
|
||
|
|
||
|
for (ch = 0; ch < AMC_NUM_CHANNELS; ch++) {
|
||
|
rAMP_DLLUPDTCTRL(AMP_DQ, ch) = 0x00017307;
|
||
|
rAMP_DLLUPDTCTRL(AMP_CA, ch) = 0x00017307;
|
||
|
}
|
||
|
amc_phy_enable_dqs_pulldown(false);
|
||
|
|
||
|
#ifdef AMP_SWIZZLE
|
||
|
amp_swizzle_init();
|
||
|
#endif
|
||
|
|
||
|
for (ch = 0; ch < AMC_NUM_CHANNELS; ch++) {
|
||
|
rAMP_AMPEN(AMP_DQ, ch) = 1;
|
||
|
rAMP_AMPEN(AMP_CA, ch) = 1;
|
||
|
|
||
|
#if !SUPPORT_FPGA
|
||
|
|
||
|
rAMP_DQDQSDS(ch) = amc_phy_params.drive_strength;
|
||
|
rAMP_NONDQDS(ch) = amc_phy_params.drive_strength;
|
||
|
#endif
|
||
|
|
||
|
for (f = 0; f < AMP_FREQUENCY_SLOTS; f++) {
|
||
|
rAMP_DIFFMODE_FREQ(ch, f) = 0x00000121;
|
||
|
}
|
||
|
|
||
|
#if !SUPPORT_FPGA
|
||
|
for (rd = 0; rd < AMP_MAX_RD; rd++) {
|
||
|
for (dq = 0; dq < AMP_MAX_DQ; dq++) {
|
||
|
rAMP_RDDQDESKEW_CTRL(ch, rd, dq) = 0x00000006;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
rAMP_DLLLOCKTIM(AMP_DQ, ch) = 0x000d0013;
|
||
|
rAMP_DLLLOCKTIM(AMP_CA, ch) = 0x000d0013;
|
||
|
#endif
|
||
|
|
||
|
for (f = 0; f < AMP_FREQUENCY_SLOTS; f++) {
|
||
|
rAMP_DQSINDLLSCL_FREQ(ch, f) = amc_phy_params.freq[f].dqsindllscl;
|
||
|
rAMP_CAOUTDLLSCL_FREQ(ch, f) = amc_phy_params.freq[f].caoutdllscl;
|
||
|
}
|
||
|
|
||
|
for (f = 0; f < AMP_FREQUENCY_SLOTS; f++) {
|
||
|
rAMP_RDCAPCFG_FREQ(AMP_DQ, ch, f) = amc_phy_params.freq[f].rdcapcfg;
|
||
|
}
|
||
|
|
||
|
#if !SUPPORT_FPGA
|
||
|
rAMP_DLLUPDTCTRL(AMP_DQ, ch) = 0x00017507;
|
||
|
rAMP_DLLUPDTCTRL(AMP_CA, ch) = 0x00017507;
|
||
|
|
||
|
if (!amc_phy_params.imp_auto_cal) {
|
||
|
rAMP_IMPAUTOCAL(AMP_DQ, ch) = 0x000103ac;
|
||
|
rAMP_IMPAUTOCAL(AMP_CA, ch) = 0x000103ac;
|
||
|
}
|
||
|
else {
|
||
|
rAMP_IMPAUTOCAL(AMP_DQ, ch) = amc_phy_params.imp_auto_cal;
|
||
|
rAMP_IMPAUTOCAL(AMP_CA, ch) = amc_phy_params.imp_auto_cal;
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
#if !SUPPORT_FPGA
|
||
|
// Keep DLL update interval to 0, it will restored after write calibration.
|
||
|
// <rdar://problem/15777393> CoreOS: M7 init sequence change
|
||
|
#if SUB_PLATFORM_S7002
|
||
|
rAMP_DLLUPDTINTVL(AMP_DQ, ch) = 0x10200000;
|
||
|
rAMP_DLLUPDTINTVL(AMP_CA, ch) = 0x10200000;
|
||
|
#else
|
||
|
rAMP_DLLUPDTINTVL(AMP_DQ, ch) = 0x1020005a;
|
||
|
rAMP_DLLUPDTINTVL(AMP_CA, ch) = 0x1020005a;
|
||
|
#endif
|
||
|
|
||
|
# ifdef rAMP_MDLLFREQBINDISABLE
|
||
|
rAMP_MDLLFREQBINDISABLE(AMP_DQ, ch) = 0x00000008;
|
||
|
rAMP_MDLLFREQBINDISABLE(AMP_CA, ch) = 0x00000008;
|
||
|
# endif
|
||
|
#else
|
||
|
rAMP_DLLUPDTINTVL(AMP_DQ, ch) = 0;
|
||
|
rAMP_DLLUPDTINTVL(AMP_CA, ch) = 0;
|
||
|
|
||
|
# ifdef rAMP_MDLLFREQBINDISABLE
|
||
|
rAMP_MDLLFREQBINDISABLE(AMP_DQ, ch) = 0x0000000F;
|
||
|
rAMP_MDLLFREQBINDISABLE(AMP_CA, ch) = 0x0000000F;
|
||
|
# endif
|
||
|
#endif
|
||
|
|
||
|
#if !SUPPORT_FPGA
|
||
|
rAMP_DLLEN(AMP_DQ, ch) = 0x00000100;
|
||
|
rAMP_DLLEN(AMP_CA, ch) = 0x00000100;
|
||
|
|
||
|
rAMP_DLLEN(AMP_DQ, ch) = 0x00000101;
|
||
|
rAMP_DLLEN(AMP_CA, ch) = 0x00000101;
|
||
|
|
||
|
rAMP_DLLEN(AMP_DQ, ch) = 0x00000100;
|
||
|
rAMP_DLLEN(AMP_CA, ch) = 0x00000100;
|
||
|
|
||
|
amc_phy_run_dll_update(ch);
|
||
|
#endif
|
||
|
|
||
|
rAMP_AMPINIT(AMP_DQ, ch) = 0x00000001;
|
||
|
rAMP_AMPINIT(AMP_CA, ch) = 0x00000001;
|
||
|
|
||
|
#if !SUPPORT_FPGA
|
||
|
rAMP_IMPCALCMD(AMP_CA, ch) = 0x00000101;
|
||
|
rAMP_IMPCALCMD(AMP_DQ, ch) = 0x00000101;
|
||
|
|
||
|
while (rAMP_IMPCALCMD(AMP_CA, ch) & 0x1) {}
|
||
|
while (rAMP_IMPCALCMD(AMP_DQ, ch) & 0x1) {}
|
||
|
|
||
|
// Wait 5 us after Impedence Calibration to avoid McPhyPending
|
||
|
// preventing the SRFSM from exiting SR.
|
||
|
spin(5);
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
#if !SUPPORT_FPGA
|
||
|
#ifndef AMP_CALIBRATION_SKIP
|
||
|
// Restore CA and WrLvl offsets from PMU
|
||
|
if (resume)
|
||
|
save_restore_ca_wrlvl_regs(CALIB_RESTORE);
|
||
|
#endif
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
void amc_phy_enable_dqs_pulldown(bool enable)
|
||
|
{
|
||
|
// Stub because H6 init sequence does not recommend doing this
|
||
|
}
|
||
|
|
||
|
void amc_phy_scale_dll(int freqsel, int factor)
|
||
|
{
|
||
|
}
|
||
|
|
||
|
void amc_phy_run_dll_update(uint8_t ch)
|
||
|
{
|
||
|
rAMP_DLLUPDTCMD(AMP_DQ, ch) = 0x00000001;
|
||
|
rAMP_DLLUPDTCMD(AMP_CA, ch) = 0x00000001;
|
||
|
|
||
|
while ((rAMP_DLLUPDTCMD(AMP_DQ, ch) & 0x1) != 0) ;
|
||
|
while ((rAMP_DLLUPDTCMD(AMP_CA, ch) & 0x1) != 0) ;
|
||
|
}
|
||
|
|
||
|
void amc_phy_bypass_prep(int step)
|
||
|
{
|
||
|
}
|
||
|
|
||
|
void amc_phy_finalize()
|
||
|
{
|
||
|
}
|
||
|
|
||
|
// Perform CA, RDDQ, and WRLVL calibration
|
||
|
void amc_phy_calibration_ca_rddq_cal(bool resume)
|
||
|
{
|
||
|
#ifndef AMP_CALIBRATION_SKIP
|
||
|
|
||
|
if ((amc_phy_params.cacalib_hw_loops * amc_phy_params.cacalib_sw_loops) != CA_NUM_PATTERNS)
|
||
|
panic("Memory calibration: hwloops (%d) and swloops (%d) values are unexpected\n",
|
||
|
amc_phy_params.cacalib_hw_loops, amc_phy_params.cacalib_sw_loops);
|
||
|
|
||
|
amc_calibration_start(true);
|
||
|
|
||
|
if (!resume)
|
||
|
calibrate_ca();
|
||
|
|
||
|
/*
|
||
|
* The first RdDq Cal is using MRR32 and MRR40. It's needed for WrDq calibration. Hence run before WrDq.
|
||
|
* The second Rd Dq calibration is PRBS pattern based, which needs Wr Dq calibration done. Hence done after WrDq.
|
||
|
* PRBS patterns help in reducing aliasing, hence needed for better accuracy.
|
||
|
*/
|
||
|
calibrate_rddq(false);
|
||
|
|
||
|
if (!resume) {
|
||
|
calibrate_wrlvl();
|
||
|
// Save off the CA and WrLvl offsets to PMU
|
||
|
save_restore_ca_wrlvl_regs(CALIB_SAVE);
|
||
|
}
|
||
|
|
||
|
amc_calibration_start(false);
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
void amc_phy_calibration_wrdq_cal(bool resume)
|
||
|
{
|
||
|
#ifndef AMP_CALIBRATION_SKIP
|
||
|
if (resume)
|
||
|
save_restore_memory_region(true);
|
||
|
|
||
|
amc_calibration_start(true);
|
||
|
|
||
|
// ok to keep PSQWQCTL0 and PSQWQCTL1 at their value setup for wrdqcal even for the rddqcal that follows
|
||
|
amc_wrdqcal_start(true);
|
||
|
|
||
|
calibrate_wrdq();
|
||
|
|
||
|
/*
|
||
|
* The first RdDq Cal is using MRR32 and MRR40. It's needed for WrDq calibration. Hence run before WrDq.
|
||
|
* The second Rd Dq calibration is PRBS pattern based, which needs Wr Dq calibration done. Hence done after WrDq.
|
||
|
* PRBS patterns help in reducing aliasing, hence needed for better accuracy.
|
||
|
*/
|
||
|
calibrate_rddq(true);
|
||
|
|
||
|
amc_wrdqcal_start(false);
|
||
|
|
||
|
amc_calibration_start(false);
|
||
|
|
||
|
if (resume)
|
||
|
save_restore_memory_region(false);
|
||
|
#endif
|
||
|
|
||
|
uint8_t ch;
|
||
|
for (ch = 0; ch < AMC_NUM_CHANNELS; ch++) {
|
||
|
// Restore AMP DLL update interval to POR value
|
||
|
// <rdar://problem/15777393> CoreOS: M7 init sequence change
|
||
|
#if !SUPPORT_FPGA
|
||
|
#if SUB_PLATFORM_S7002
|
||
|
rAMP_DLLUPDTINTVL(AMP_DQ, ch) = 0x1020005a;
|
||
|
rAMP_DLLUPDTINTVL(AMP_CA, ch) = 0x1020005a;
|
||
|
#endif
|
||
|
#endif
|
||
|
|
||
|
// Enable AMP clock gating only after Wrdqcal is done
|
||
|
rAMP_AMPCLK(AMP_DQ, ch) = 0x00010000;
|
||
|
rAMP_AMPCLK(AMP_CA, ch) = 0x00010000;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
///////////////////////////////////////////////////////////////////////////////
|
||
|
////// Local functions
|
||
|
///////////////////////////////////////////////////////////////////////////////
|
||
|
|
||
|
// To dump calibration results from iBoot menu command
|
||
|
static int dump_mem_calibration_info(int argc, struct cmd_arg *args)
|
||
|
{
|
||
|
uint32_t ch, byte, bit;
|
||
|
|
||
|
dprintf(DEBUG_INFO, "Memory calibration results\n");
|
||
|
|
||
|
for (ch = 0; ch < AMC_NUM_CHANNELS; ch++) {
|
||
|
dprintf(DEBUG_INFO, "Channel %d\n", ch);
|
||
|
|
||
|
dprintf(DEBUG_INFO, "\tCA SDLL: 0x%02x\n", rAMP_CASDLLCTRL(ch) & DLLVAL_BITS);
|
||
|
|
||
|
dprintf(DEBUG_INFO, "\t\tPer Bit Deskew: ");
|
||
|
for (bit = 0; bit < CA_NUM_BITS; bit++)
|
||
|
dprintf(DEBUG_INFO, "0x%02x ", rAMP_CADESKEW_CTRL(ch, bit) & DESKEW_CTRL_BITS);
|
||
|
|
||
|
dprintf(DEBUG_INFO, "\n\t\tCS, CK, CKE Deskew: 0x%02x", rAMP_CKDESKEW_CTRL(ch) & DESKEW_CTRL_BITS);
|
||
|
|
||
|
dprintf(DEBUG_INFO, "\n");
|
||
|
|
||
|
dprintf(DEBUG_INFO, "\tCA WrLvlSDLL: 0x%02x\n", rAMP_CAWRLVLSDLLCODE(ch) & DLLVAL_BITS);
|
||
|
dprintf(DEBUG_INFO, "\tDQ WrLvlSDLL: ");
|
||
|
for (byte = 0; byte < DQ_NUM_BYTES; byte++)
|
||
|
dprintf(DEBUG_INFO, "0x%02x ", rAMP_DQWRLVLSDLLCODE(ch, byte) & DLLVAL_BITS);
|
||
|
|
||
|
dprintf(DEBUG_INFO, "\n");
|
||
|
|
||
|
dprintf(DEBUG_INFO, "\tRead DQ:\n");
|
||
|
for (byte = 0; byte < DQ_NUM_BYTES; byte++) {
|
||
|
dprintf(DEBUG_INFO, "\t\tByte %d SDLL: 0x%02x\n", byte, rAMP_DQSDLLCTRL_RD(ch, byte) & DLLVAL_BITS);
|
||
|
|
||
|
dprintf(DEBUG_INFO, "\t\t\tPer Bit Deskew: ");
|
||
|
for (bit = 0; bit < DQ_NUM_BITS_PER_BYTE; bit++)
|
||
|
dprintf(DEBUG_INFO, "0x%02x ", rAMP_RDDQDESKEW_CTRL(ch, byte, bit) & DESKEW_CTRL_BITS);
|
||
|
|
||
|
dprintf(DEBUG_INFO, "\n");
|
||
|
|
||
|
}
|
||
|
|
||
|
dprintf(DEBUG_INFO, "\tWrite DQ:\n");
|
||
|
for (byte = 0; byte < DQ_NUM_BYTES; byte++) {
|
||
|
dprintf(DEBUG_INFO, "\t\tByte %d SDLL: 0x%02x\n", byte, rAMP_DQSDLLCTRL_WR(ch, byte) & DLLVAL_BITS);
|
||
|
|
||
|
dprintf(DEBUG_INFO, "\t\t\tPer Bit Deskew: ");
|
||
|
for (bit = 0; bit < DQ_NUM_BITS_PER_BYTE; bit++)
|
||
|
dprintf(DEBUG_INFO, "0x%02x ", rAMP_WRDQDESKEW_CTRL(ch, byte, bit) & DESKEW_CTRL_BITS);
|
||
|
|
||
|
dprintf(DEBUG_INFO, "\n");
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
MENU_COMMAND_DEBUG(memcal_info, dump_mem_calibration_info, "Prints memory calibration results", NULL);
|
||
|
|
||
|
static void calibrate_ca(void)
|
||
|
{
|
||
|
uint32_t ch, rnk, swloop, mask_bits;
|
||
|
|
||
|
generate_ca_patterns_mask();
|
||
|
|
||
|
// Required since the dll values may change slightly during calibration
|
||
|
save_masterdll_values();
|
||
|
|
||
|
// Calibration sequence is to be run for each rank in each channel, amc_phy_params.cacalib_sw_loops number of times
|
||
|
for (ch = 0; ch < AMC_NUM_CHANNELS; ch++) {
|
||
|
for (rnk = 0; rnk < AMC_NUM_RANKS; rnk++) {
|
||
|
for (swloop = 0; swloop < amc_phy_params.cacalib_sw_loops; swloop++) {
|
||
|
amp_program_ca_patterns(ch, rnk, swloop);
|
||
|
|
||
|
amp_init_ca_offset_and_deskew(ch);
|
||
|
|
||
|
// Training of CA Bits 0-3 and 5-8: MR41 cmd (training cmd must be sent before cacalibmode is enabled in AMP)
|
||
|
mask_bits = amp_mask_ca_bits(ch, MR41);
|
||
|
amc_mrcmd_to_ch_rnk(MR_WRITE, ch, rnk, MR41, MR41 << 2);
|
||
|
amp_enable_cacal_mode(true, ch);
|
||
|
run_cacal_sequence(ch, rnk, MR41, mask_bits, swloop);
|
||
|
amp_enable_cacal_mode(false, ch);
|
||
|
|
||
|
amp_init_ca_offset_and_deskew(ch);
|
||
|
|
||
|
// Training of CA Bits 4 and 9: MR48 cmd (training cmd must be sent before cacalibmode is enabled in AMP)
|
||
|
mask_bits = amp_mask_ca_bits(ch, MR48);
|
||
|
amc_mrcmd_to_ch_rnk(MR_WRITE, ch, rnk, MR48, MR48 << 2);
|
||
|
amp_enable_cacal_mode(true, ch);
|
||
|
run_cacal_sequence(ch, rnk, MR48, mask_bits, swloop);
|
||
|
amp_enable_cacal_mode(false, ch);
|
||
|
|
||
|
amp_init_ca_offset_and_deskew(ch);
|
||
|
|
||
|
// Exit CA Training mode: MR42
|
||
|
amc_mrcmd_to_ch_rnk(MR_WRITE, ch, rnk, MR42, MR42 << 2);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// By now, we have compiled right and left edges of passing window for all CA bits over a number of iterations
|
||
|
// Aggregate the results, and find the center point of the window, and program it
|
||
|
ca_program_final_values();
|
||
|
}
|
||
|
|
||
|
static void calibrate_rddq(bool after_wrddqcal)
|
||
|
{
|
||
|
uint32_t ch, rnk, data;
|
||
|
|
||
|
// step7
|
||
|
if (after_wrddqcal == false) {
|
||
|
|
||
|
for (ch = 0; ch < AMC_NUM_CHANNELS; ch++) {
|
||
|
for (rnk = 0; rnk < AMC_NUM_RANKS; rnk++) {
|
||
|
amp_setup_rddq_cal(ch, rnk);
|
||
|
|
||
|
amc_mrcmd_to_ch_rnk(MR_READ, ch, rnk, MR5, (uintptr_t)&data);
|
||
|
|
||
|
amc_enable_rddqcal(true);
|
||
|
|
||
|
// Find the left and right edges of the eye
|
||
|
find_rddqcal_right_side_failing_point(ch, rnk, false);
|
||
|
find_rddqcal_left_side_failing_point(ch, rnk, false);
|
||
|
|
||
|
amc_enable_rddqcal(false);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
// step10
|
||
|
else {
|
||
|
for (ch = 0; ch < AMC_NUM_CHANNELS; ch++) {
|
||
|
for (rnk = 0; rnk < AMC_NUM_RANKS; rnk++) {
|
||
|
// Find the left and right edges of the eye using PRBS patterns
|
||
|
// These results will be more accurate
|
||
|
find_rddqcal_right_side_failing_point(ch, rnk, true);
|
||
|
find_rddqcal_left_side_failing_point(ch, rnk, true);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Now that we have per bit left and right endpoints for each channel and rank, aggregate and program final values
|
||
|
rddq_program_final_values(after_wrddqcal);
|
||
|
}
|
||
|
|
||
|
// Align the clock signal with the DQ signals
|
||
|
static void calibrate_wrlvl(void)
|
||
|
{
|
||
|
uint32_t ch, rnk;
|
||
|
uint32_t data, cawrlvlsdll;
|
||
|
|
||
|
amp_wrlvl_init();
|
||
|
|
||
|
for (ch = 0; ch < AMC_NUM_CHANNELS; ch++) {
|
||
|
|
||
|
cawrlvlsdll = rAMP_CAWRLVLSDLLCODE(ch);
|
||
|
|
||
|
for (rnk = 0; rnk < AMC_NUM_RANKS; rnk++) {
|
||
|
data = 0x80 + RD_LATENCY_ENCODE; // 0x80 is added here to set the Write Level bit (bit 7) to 1
|
||
|
|
||
|
amc_mrcmd_to_ch_rnk(MR_WRITE, ch, rnk, MR2, data);
|
||
|
|
||
|
// find the region where all bits return a 0
|
||
|
push_wrlvl_to_0s_region(ch, rnk);
|
||
|
|
||
|
// push out the clock signal until all bits return a 1
|
||
|
find_wrlvl_0to1_transition(ch, rnk);
|
||
|
|
||
|
// now go back towards the transition edge found earlier, but from this side of the edge
|
||
|
find_wrlvl_1to0_transition(ch, rnk);
|
||
|
|
||
|
// reset cawrlvlsdllcode to original value (0), before sending cmd to exit wrlvl mode (MR2)
|
||
|
amp_set_cawrlvl_sdll(ch, cawrlvlsdll, false);
|
||
|
|
||
|
data = RD_LATENCY_ENCODE;
|
||
|
amc_mrcmd_to_ch_rnk(MR_WRITE, ch, rnk, MR2, data);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Program the final wrlvl values
|
||
|
wrlvl_program_final_values();
|
||
|
}
|
||
|
|
||
|
static void calibrate_wrdq(void)
|
||
|
{
|
||
|
uint32_t ch, rnk;
|
||
|
|
||
|
for (ch = 0; ch < AMC_NUM_CHANNELS; ch++) {
|
||
|
for (rnk = 0; rnk < AMC_NUM_RANKS; rnk++) {
|
||
|
find_wrdqcal_right_side_failing_point(ch, rnk);
|
||
|
find_wrdqcal_left_side_failing_point(ch, rnk);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
wrdq_program_final_values();
|
||
|
}
|
||
|
|
||
|
static void save_masterdll_values(void)
|
||
|
{
|
||
|
uint32_t ch;
|
||
|
|
||
|
for (ch = 0; ch < AMC_NUM_CHANNELS; ch++) {
|
||
|
mdllcode[ch][AMP_DQ] = (rAMP_MDLLCODE(AMP_DQ, ch) & DLLVAL_BITS);
|
||
|
mdllcode[ch][AMP_CA] = (rAMP_MDLLCODE(AMP_CA, ch) & DLLVAL_BITS);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static void generate_ca_patterns_mask(void)
|
||
|
{
|
||
|
uint32_t index, patr, patf = 0;
|
||
|
uint32_t mask = 0;
|
||
|
|
||
|
// generate the pattern to be used for each CA calibration iteration
|
||
|
for (index = 0; index < (amc_phy_params.cacalib_sw_loops * amc_phy_params.cacalib_hw_loops); index++) {
|
||
|
patr = (CA_PRBS7_PATTERNS[index]) & CA_ALL_BITS;
|
||
|
patf = (CA_PRBS7_PATTERNS[index]) >> CA_NUM_BITS;
|
||
|
mask = patr ^ patf;
|
||
|
ca_patterns_mask[index] = mask;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static void amp_program_ca_patterns(uint32_t ch, uint32_t rnk, uint32_t swloop)
|
||
|
{
|
||
|
uint32_t cacalctrl, p;
|
||
|
|
||
|
// Program rank, hardware loop count, and timing params
|
||
|
// Timing params are taken from lpddr3 jedec spec
|
||
|
cacalctrl = (rnk << 24) | ((amc_phy_params.cacalib_hw_loops - 1) << 16) | (16 << 8) | (10 << 0);
|
||
|
|
||
|
rAMP_CACALCTRL(AMP_DQ, ch) = cacalctrl;
|
||
|
rAMP_CACALCTRL(AMP_CA, ch) = cacalctrl;
|
||
|
|
||
|
for (p = 0; p < AMP_MAX_PATTERNS; p++) {
|
||
|
rAMP_CACALPAT(AMP_DQ, ch, p) = CA_PRBS7_PATTERNS[(swloop * amc_phy_params.cacalib_hw_loops) + p];
|
||
|
rAMP_CACALPAT(AMP_CA, ch, p) = CA_PRBS7_PATTERNS[(swloop * amc_phy_params.cacalib_hw_loops) + p];
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// (Re-)Initialize ca offset and deskew registers
|
||
|
static void amp_init_ca_offset_and_deskew(uint32_t ch)
|
||
|
{
|
||
|
uint8_t d;
|
||
|
int32_t camdllcode = mdllcode[ch][AMP_CA];
|
||
|
|
||
|
// ensure negative sign is set with mdllcode value for ca offset (mdllcode guaranteed by designers not to be negative)
|
||
|
amp_push_casdll_out(ch, (-1 * camdllcode));
|
||
|
|
||
|
// Clear cadeskewctrl registers
|
||
|
for (d = 0; d < CA_NUM_BITS; d++)
|
||
|
rAMP_CADESKEW_CTRL(ch, d) = 0;
|
||
|
}
|
||
|
|
||
|
static uint32_t amp_mask_ca_bits(uint32_t ch, uint32_t mr_cmd)
|
||
|
{
|
||
|
uint32_t mask_bits;
|
||
|
|
||
|
// Assuming no byte swizzling
|
||
|
if (mr_cmd == MR41) {
|
||
|
// MR41: Mask out bits 9 and 4
|
||
|
mask_bits = 0x210;
|
||
|
} else if (mr_cmd == MR48) {
|
||
|
// MR48: Mask out bits 0-3 and bits 5-8
|
||
|
mask_bits = 0x1EF;
|
||
|
} else {
|
||
|
// No bits are masked out
|
||
|
mask_bits = 0;
|
||
|
}
|
||
|
|
||
|
rAMP_CACALMASK(AMP_DQ, ch) = mask_bits;
|
||
|
rAMP_CACALMASK(AMP_CA, ch) = mask_bits;
|
||
|
|
||
|
return mask_bits;
|
||
|
}
|
||
|
|
||
|
static void amp_push_casdll_out(uint32_t ch, int32_t offset)
|
||
|
{
|
||
|
uint32_t ca_bit;
|
||
|
uint32_t cadeskewcode;
|
||
|
int32_t camdllcode = mdllcode[ch][AMP_CA];
|
||
|
|
||
|
if (offset > 0) {
|
||
|
// New equation given by Rakesh: if offset is within DELIMIT_POS_ADJ_CASDLL steps of camdllcode, limit it to (master dll - DELIMIT_POS_ADJ_CASDLL)
|
||
|
if (offset >= (camdllcode - DELIMIT_POS_ADJ_CASDLL)) {
|
||
|
uint8_t difference = (uint8_t) (offset - (camdllcode - DELIMIT_POS_ADJ_CASDLL));
|
||
|
offset = camdllcode - DELIMIT_POS_ADJ_CASDLL;
|
||
|
|
||
|
if (difference >= MAX_DESKEW_PROGRAMMED)
|
||
|
cadeskewcode = MAX_DESKEW_PROGRAMMED;
|
||
|
else
|
||
|
cadeskewcode = difference;
|
||
|
|
||
|
// Adjust deskew registers for each ca bit
|
||
|
for (ca_bit = 0; ca_bit < CA_NUM_BITS; ca_bit++)
|
||
|
rAMP_CADESKEW_CTRL(ch, ca_bit) = cadeskewcode;
|
||
|
|
||
|
}
|
||
|
}
|
||
|
|
||
|
rAMP_CASDLLCTRL(ch) = (1 << 24) | INT_TO_OFFSET(offset);
|
||
|
while (rAMP_CASDLLCTRL(ch) & (1 << 24));
|
||
|
}
|
||
|
|
||
|
static void amp_enable_cacal_mode(bool enable, uint32_t ch)
|
||
|
{
|
||
|
// Set or clear CACalMode bit
|
||
|
if (enable)
|
||
|
rAMP_CACALRUN(AMP_CA, ch) |= CACALRUN_CACALMODE;
|
||
|
else
|
||
|
rAMP_CACALRUN(AMP_CA, ch) &= ~CACALRUN_CACALMODE;
|
||
|
}
|
||
|
|
||
|
static void amp_run_cacal(uint32_t ch)
|
||
|
{
|
||
|
// DQ must be set before CA
|
||
|
rAMP_CACALRUN(AMP_DQ, ch) |= CACALRUN_RUNCACAL;
|
||
|
// CACalMode should already be set
|
||
|
rAMP_CACALRUN(AMP_CA, ch) |= CACALRUN_RUNCACAL;
|
||
|
// Poll on the DQ register
|
||
|
while(rAMP_CACALRUN(AMP_DQ, ch) & CACALRUN_RUNCACAL);
|
||
|
}
|
||
|
|
||
|
static void amp_push_ctl_out(uint32_t ch, uint32_t dly_val)
|
||
|
{
|
||
|
uint32_t cadramsigdly;
|
||
|
|
||
|
rAMP_TESTMODE(AMP_CA, ch) = TESTMODE_FORCECKELOW;
|
||
|
|
||
|
// Fix for Radar 10790574 - Hold Violation on CKE
|
||
|
if (dly_val >= 0xd)
|
||
|
cadramsigdly = (3 << 4);
|
||
|
else if (dly_val >= 0xa)
|
||
|
cadramsigdly = (2 << 4);
|
||
|
else if (dly_val >= 0x8)
|
||
|
cadramsigdly = (1 << 4);
|
||
|
else
|
||
|
cadramsigdly = (0 << 4);
|
||
|
|
||
|
rAMP_DRAMSIGDLY(AMP_CA, ch, 0) = cadramsigdly;
|
||
|
rAMP_CSDESKEW_CTRL(ch) = dly_val;
|
||
|
rAMP_CKDESKEW_CTRL(ch) = dly_val;
|
||
|
rAMP_CKEDESKEW_CTRL(ch) = dly_val;
|
||
|
|
||
|
rAMP_TESTMODE(AMP_CA, ch) = 0;
|
||
|
}
|
||
|
|
||
|
static void amp_setup_rddq_cal(uint32_t ch, uint32_t rnk)
|
||
|
{
|
||
|
// At this point the AMC's READLEVELING should already be setup as 0x00000300
|
||
|
// Make DQCALCTRL.DQCalPatSel (bits 1:0) match READLEVELING.RdLvlPatOpt
|
||
|
rAMP_DQCALCTRL(ch) = (rnk << 16) | (RDDQ_LOOPCNT << 8) | (3 << 0);
|
||
|
}
|
||
|
|
||
|
// This functions set the slave dll for a particular byte lane of RDDQ as specified in the offset parameter
|
||
|
static void amp_set_rddq_sdll(uint32_t ch, uint32_t byte, uint32_t offset)
|
||
|
{
|
||
|
rAMP_DQSDLLCTRL_RD(ch, byte) = (1 << 24) | offset;
|
||
|
// Wait for Run bit to clear
|
||
|
while(rAMP_DQSDLLCTRL_RD(ch, byte) & (1 << 24));
|
||
|
}
|
||
|
|
||
|
static void amp_run_rddqcal(uint32_t ch)
|
||
|
{
|
||
|
rAMP_DQCALRUN(ch) = 1;
|
||
|
while (rAMP_DQCALRUN(ch) & 1);
|
||
|
}
|
||
|
|
||
|
static void amp_wrlvl_init(void)
|
||
|
{
|
||
|
uint32_t ch;
|
||
|
|
||
|
for (ch = 0; ch < AMC_NUM_CHANNELS; ch++) {
|
||
|
// Write Leveling Timing Control Registers to program tMRD and tWLO timing params
|
||
|
// Taking these values from jedec_lpddr3s8_4gb_x32_1600.soma
|
||
|
// tWLO has a max value of 20ns for 1600 freq
|
||
|
// tWLMRD has a min value of 40tck for 1600 freq
|
||
|
|
||
|
rAMP_DQWRLVLTIM(ch) = (12 << 8) | (16 << 0);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// follow a certain sequence required when wrlvl dlysel is to be updated
|
||
|
static void amp_phy_update(uint32_t ch, uint32_t update) {
|
||
|
// Release CKE and disable phyupdt to allow normal operation
|
||
|
if (!update)
|
||
|
rAMP_TESTMODE(AMP_CA, ch) = 0;
|
||
|
|
||
|
// issue phyupdt to block AMC traffic
|
||
|
rAMP_CAPHYUPDTCRTL(ch) = update;
|
||
|
// wait for the phyupdt change to take effect. there is only 1 bit in the status reg: bit 0.
|
||
|
while((rAMP_CAPHYUPDTSTATUS(ch) & 1) != (update & 1));
|
||
|
|
||
|
// CKE must be low when updating dlysel to avoid glitches
|
||
|
if (update)
|
||
|
rAMP_TESTMODE(AMP_CA, ch) = TESTMODE_FORCECKELOW;
|
||
|
}
|
||
|
|
||
|
// Must ensure that WrLvL SDLLs are programmed with all precautions to avoid glitch on clock signals
|
||
|
static void amp_set_cawrlvl_sdll(uint32_t ch, uint32_t offset, bool set_dly_sel)
|
||
|
{
|
||
|
if (set_dly_sel) {
|
||
|
// Must send phyupdt to AMC to avoid traffic while CKE is low
|
||
|
amp_phy_update(ch, 1);
|
||
|
|
||
|
// Ok to set directly to final value (instead of incrementing) since CKE is low
|
||
|
rAMP_CAWRLVLSDLLCODE(ch) = offset;
|
||
|
|
||
|
/*
|
||
|
* Since M7 memory clock much slower than Alcatraz, need to toggle phyupdt to ensure
|
||
|
* refreshes that have piled up due to phyupdt being set are flushed out
|
||
|
*/
|
||
|
if (amc_phy_params.wrlvl_togglephyupdt) {
|
||
|
amp_phy_update(ch, 0);
|
||
|
amp_phy_update(ch, 1);
|
||
|
}
|
||
|
|
||
|
// program the dlysel value with CKE low to avoid glitches to DRAM
|
||
|
rAMP_CAWRLVLCLKDLYSEL(ch) = wrlvl_encode_dlyval(ch, AMP_CA, offset);
|
||
|
|
||
|
// disable phyupdt and release CKE back to high
|
||
|
amp_phy_update(ch, 0);
|
||
|
} else {
|
||
|
uint32_t cawrlvlsdll = rAMP_CAWRLVLSDLLCODE(ch);
|
||
|
int32_t step = (cawrlvlsdll < offset) ? 1 : -1;
|
||
|
|
||
|
// when CKE is not low, need to step by 1 to avoid glitches to DRAM
|
||
|
for ( ; cawrlvlsdll != offset; cawrlvlsdll += step)
|
||
|
rAMP_CAWRLVLSDLLCODE(ch) = cawrlvlsdll + step;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Must ensure that WrLvL SDLLs are programmed with all precautions to avoid glitches
|
||
|
static void amp_set_dqwrlvl_sdll(uint32_t ch, uint32_t byte, uint32_t offset, bool set_dly_sel)
|
||
|
{
|
||
|
if (set_dly_sel) {
|
||
|
uint32_t dqwrlvldlychainctrl = rAMP_DQWRLVLDLYCHAINCTRL(ch, byte);
|
||
|
|
||
|
// Must send phyupdt to AMC to avoid traffic while CKE is low
|
||
|
amp_phy_update(ch, 1);
|
||
|
|
||
|
// Ok to set directory final value (instead of incrementing) since CKE is low
|
||
|
rAMP_DQWRLVLSDLLCODE(ch, byte) = offset;
|
||
|
|
||
|
/*
|
||
|
* Since M7 memory clock much slower than Alcatraz, need to toggle phyupdt to ensure
|
||
|
* refreshes that have piled up due to phyupdt being set are flushed out
|
||
|
*/
|
||
|
if (amc_phy_params.wrlvl_togglephyupdt) {
|
||
|
amp_phy_update(ch, 0);
|
||
|
amp_phy_update(ch, 1);
|
||
|
}
|
||
|
|
||
|
// program dlysel (also, preserve bits 17:16) with CKE low to avoid glitches to DRAM
|
||
|
rAMP_DQWRLVLDLYCHAINCTRL(ch, byte) = (dqwrlvldlychainctrl & 0x00030000) | wrlvl_encode_dlyval(ch, AMP_DQ, offset);
|
||
|
|
||
|
// disable phyupdt and release CKE back to high
|
||
|
amp_phy_update(ch, 0);
|
||
|
} else {
|
||
|
uint32_t dqwrlvlsdll = rAMP_DQWRLVLSDLLCODE(ch, byte);
|
||
|
int32_t step = (dqwrlvlsdll < offset) ? 1 : -1;
|
||
|
|
||
|
// when CKE is not low, need to step by 1 to avoid glitches to DRAM
|
||
|
for ( ; dqwrlvlsdll != offset; dqwrlvlsdll += step)
|
||
|
rAMP_DQWRLVLSDLLCODE(ch, byte) = dqwrlvlsdll + step;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static void amp_run_wrlvlcal(uint32_t ch, uint32_t wrlvlrun)
|
||
|
{
|
||
|
rAMP_DQWRLVLRUN(ch) = wrlvlrun;
|
||
|
while(rAMP_DQWRLVLRUN(ch));
|
||
|
}
|
||
|
|
||
|
// This functions set the slave dll for a particular byte lane of WRDQ as specified in the offset parameter
|
||
|
static void amp_set_wrdq_sdll(uint32_t ch, uint32_t byte, int32_t offset)
|
||
|
{
|
||
|
uint32_t dq_bit;
|
||
|
uint32_t dqdeskewcode;
|
||
|
int32_t dqmdllcode = mdllcode[ch][AMP_DQ];
|
||
|
|
||
|
if (offset > 0) {
|
||
|
// New equation given by Rakesh: if offset is within DELIMIT_POS_ADJ_WRDQSDLL steps of dqmdllcode, limit it to (master dll - DELIMIT_POS_ADJ_WRDQSDLL)
|
||
|
if (offset >= (dqmdllcode - DELIMIT_POS_ADJ_WRDQSDLL)) {
|
||
|
uint8_t difference = (uint8_t) (offset - (dqmdllcode - DELIMIT_POS_ADJ_WRDQSDLL));
|
||
|
offset = dqmdllcode - DELIMIT_POS_ADJ_WRDQSDLL;
|
||
|
|
||
|
if (difference >= DQ_MAX_DESKEW_PER_BIT)
|
||
|
dqdeskewcode = DQ_MAX_DESKEW_PER_BIT;
|
||
|
else
|
||
|
dqdeskewcode = difference;
|
||
|
|
||
|
// Adjust deskew registers for each dq bit
|
||
|
for (dq_bit = 0; dq_bit < DQ_NUM_BITS_PER_BYTE; dq_bit++)
|
||
|
rAMP_WRDQDESKEW_CTRL(ch, byte, dq_bit) = dqdeskewcode;
|
||
|
|
||
|
// Also update the Data Mask (DM), controlled by the DQSDESKEW register
|
||
|
rAMP_WRDQSDESKEW_CTRL(ch, byte) = dqdeskewcode;
|
||
|
}
|
||
|
|
||
|
// set wrlvlclk90dly (bits 17:16 of wrlvldlychainctrl reg) if positive sdll value
|
||
|
rAMP_DQWRLVLDLYCHAINCTRL(ch, byte) = (wrlvl_encode_clk90dly(ch, offset) << 16) | (rAMP_DQWRLVLDLYCHAINCTRL(ch, byte) & 0x3);
|
||
|
}
|
||
|
|
||
|
rAMP_DQSDLLCTRL_WR(ch, byte) = (1 << 24) | INT_TO_OFFSET(offset);
|
||
|
// Wait for Run bit to clear
|
||
|
while(rAMP_DQSDLLCTRL_WR(ch, byte) & (1 << 24));
|
||
|
}
|
||
|
|
||
|
static void run_cacal_sequence(uint32_t ch, uint32_t rnk, uint32_t mr_cmd, uint32_t mask_bits, uint32_t swloop)
|
||
|
{
|
||
|
uint32_t combined_mask, hwloop;
|
||
|
uint32_t pat_mask = 0;
|
||
|
|
||
|
for (hwloop = 0; hwloop < amc_phy_params.cacalib_hw_loops; hwloop++)
|
||
|
pat_mask |= ca_patterns_mask[(swloop * amc_phy_params.cacalib_hw_loops) + hwloop];
|
||
|
|
||
|
// This represents the bits that don't have a transition on any of the patterns used during the hwloop calibration
|
||
|
combined_mask = mask_bits | (CA_ALL_BITS - pat_mask);
|
||
|
|
||
|
// To find the FAIL <-> PASS <-> FAIL window
|
||
|
find_cacal_right_side_failing_point(ch, rnk, mr_cmd, combined_mask, swloop);
|
||
|
find_cacal_left_side_failing_point(ch, rnk, combined_mask, swloop);
|
||
|
}
|
||
|
|
||
|
// Establish the right edge of the window by finding the point where all CA bits fail
|
||
|
static void find_cacal_right_side_failing_point(uint32_t ch, uint32_t rnk, uint32_t mr_cmd, uint32_t combined_mask, uint32_t swloop)
|
||
|
{
|
||
|
bool all_bits_fail = false;
|
||
|
uint32_t cacalresult = 0;
|
||
|
uint32_t push_ck_out = 0;
|
||
|
|
||
|
cacalresult = CA_ALL_BITS;
|
||
|
|
||
|
// Increase delay to the right until all bits fail
|
||
|
do {
|
||
|
amp_run_cacal(ch);
|
||
|
cacalresult = cacalresult & (rAMP_CACALRESULT(ch) & CA_ALL_BITS);
|
||
|
|
||
|
if ((cacalresult & (CA_ALL_BITS ^ combined_mask)) != 0) {
|
||
|
all_bits_fail = false;
|
||
|
push_ck_out = push_ck_out + FINER_STEP_SZ;
|
||
|
|
||
|
amp_init_ca_offset_and_deskew(ch);
|
||
|
|
||
|
// Make AMP and DRAM exit CaCal Mode in order to update the CK, CKE, and CS delays
|
||
|
enter_cacal_mode(ch, rnk, mr_cmd, false);
|
||
|
|
||
|
// Update CK, CKE, and CS signal delays
|
||
|
amp_push_ctl_out(ch, push_ck_out);
|
||
|
|
||
|
// Re-enter CaCal mode
|
||
|
enter_cacal_mode(ch, rnk, mr_cmd, true);
|
||
|
} else {
|
||
|
all_bits_fail = true;
|
||
|
|
||
|
// Do a per bit calculation of when they start passing again
|
||
|
find_cacal_right_side_passing_point(ch, rnk, mr_cmd, combined_mask, swloop);
|
||
|
}
|
||
|
} while ((push_ck_out < MAX_DESKEW_OFFSET) && (all_bits_fail == false));
|
||
|
|
||
|
if ((push_ck_out >= MAX_DESKEW_OFFSET) && (all_bits_fail == false)) {
|
||
|
dprintf(DEBUG_INFO, "Memory CA calibration: Unable to find right side failing point for channel %d\n", ch);
|
||
|
|
||
|
// Failing point cannot be found, continuing to passing point assuming failure at this setting
|
||
|
find_cacal_right_side_passing_point(ch, rnk, mr_cmd, combined_mask, swloop);
|
||
|
}
|
||
|
|
||
|
// Reset CK delay back to 0
|
||
|
if (rAMP_CKDESKEW_CTRL(ch)) {
|
||
|
// Exit CaCal Mode for AMP and DRAM before modifying CK, CKE, and CS signals
|
||
|
enter_cacal_mode(ch, rnk, mr_cmd, false);
|
||
|
|
||
|
// Ok from Rakesh to set to 0 directly instead of decrementing by 1
|
||
|
amp_push_ctl_out(ch, 0);
|
||
|
|
||
|
// Re-enable CACal Mode
|
||
|
enter_cacal_mode(ch, rnk, mr_cmd, true);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Finds the passing region on the right edge of window
|
||
|
static void find_cacal_right_side_passing_point(uint32_t ch, uint32_t rnk, uint32_t mr_cmd, uint32_t combined_mask, uint32_t swloop)
|
||
|
{
|
||
|
bool switch_from_cktoca;
|
||
|
int32_t tap_value;
|
||
|
uint32_t cacalresult;
|
||
|
int32_t camdllcode;
|
||
|
int32_t saved_val;
|
||
|
uint32_t all_bits_pass;
|
||
|
uint32_t BitPass[CA_NUM_BITS] = { 0 };
|
||
|
uint32_t SolidBitPass[CA_NUM_BITS] = { 0 };
|
||
|
uint32_t step_incr;
|
||
|
uint8_t bit_indx;
|
||
|
uint32_t ckdeskew;
|
||
|
uint32_t loopchrnk_indx;
|
||
|
|
||
|
all_bits_pass = 0;
|
||
|
step_incr = FINER_STEP_SZ;
|
||
|
camdllcode = mdllcode[ch][AMP_CA];
|
||
|
ckdeskew = rAMP_CKDESKEW_CTRL(ch);
|
||
|
|
||
|
// For every swloop, we'll save passing values for each channel & rank
|
||
|
loopchrnk_indx = (swloop * AMC_NUM_CHANNELS * AMC_NUM_RANKS) + (ch * AMC_NUM_RANKS) + rnk;
|
||
|
|
||
|
if (ckdeskew) {
|
||
|
tap_value = ckdeskew;
|
||
|
switch_from_cktoca = false;
|
||
|
} else {
|
||
|
// Since clock delay is already down to 0, use the slave delay.
|
||
|
// We only have 2 knobs to turn for delay: clock and sdll
|
||
|
tap_value = (rAMP_CASDLLCTRL(ch) & DLLVAL_BITS);
|
||
|
tap_value = OFFSET_TO_INT(tap_value);
|
||
|
switch_from_cktoca = true;
|
||
|
}
|
||
|
|
||
|
// combined_mask contains don't care bits (due to pattern) or masked bits (MR41 or MR48), so consider those done
|
||
|
for (bit_indx = 0; bit_indx < CA_NUM_BITS; bit_indx++)
|
||
|
if ((combined_mask & (1 << bit_indx)) != 0)
|
||
|
BitPass[bit_indx] = 1;
|
||
|
|
||
|
// Finding Right side passing point on per bit level. Moving Right to Left to find point where it turns from FAIL TO PASS
|
||
|
do {
|
||
|
if (switch_from_cktoca == false) {
|
||
|
// Make AMP and DRAM exit CaCal Mode in order to update the CK, CKE, and CS delays
|
||
|
enter_cacal_mode(ch, rnk, mr_cmd, false);
|
||
|
|
||
|
// Update CK, CKE, and CS signal delays
|
||
|
amp_push_ctl_out(ch, tap_value);
|
||
|
|
||
|
// Re-enter CaCal mode
|
||
|
enter_cacal_mode(ch, rnk, mr_cmd, true);
|
||
|
} else {
|
||
|
amp_push_casdll_out(ch, tap_value);
|
||
|
}
|
||
|
|
||
|
// Run the ca calibration in hw
|
||
|
amp_run_cacal(ch);
|
||
|
cacalresult = rAMP_CACALRESULT(ch) & CA_ALL_BITS;
|
||
|
|
||
|
// Make sure that each Bit sees a transition from 0 to 1 on CaCalresult Register
|
||
|
for (bit_indx = 0; bit_indx < CA_NUM_BITS; bit_indx++) {
|
||
|
// For bits that are not masked, need to check pass/fail
|
||
|
if ((combined_mask & (1 << bit_indx)) == 0) {
|
||
|
if ((BitPass[bit_indx] == 0) && ((cacalresult & (1 << bit_indx)) != 0)) {
|
||
|
if (SolidBitPass[bit_indx] == SOLID_PASS_DETECT) {
|
||
|
// Bit has passed for SOLID_PASS_DETECT number of times, consider it done.
|
||
|
BitPass[bit_indx] = 1;
|
||
|
} else if (SolidBitPass[bit_indx] > 0) {
|
||
|
SolidBitPass[bit_indx] = SolidBitPass[bit_indx] + 1;
|
||
|
} else {
|
||
|
// This is the first time this bit has passed, save this point in the array
|
||
|
SolidBitPass[bit_indx] = SolidBitPass[bit_indx] + 1;
|
||
|
if (switch_from_cktoca == false) {
|
||
|
// MdllCode is considered '0' in this case
|
||
|
saved_val = -1 * (tap_value + camdllcode);
|
||
|
} else {
|
||
|
saved_val = tap_value;
|
||
|
}
|
||
|
|
||
|
ca_cal_per_loopchrnk_right[loopchrnk_indx][bit_indx] = saved_val;
|
||
|
}
|
||
|
} else {
|
||
|
// Bit failed to pass calibration, reset the SolidBitPass value to 0
|
||
|
SolidBitPass[bit_indx] = 0;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
all_bits_pass = 1;
|
||
|
for (bit_indx = 0; bit_indx < CA_NUM_BITS; bit_indx++) {
|
||
|
all_bits_pass = all_bits_pass & BitPass[bit_indx];
|
||
|
if (all_bits_pass == 0)
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
// If ALL bits are not passing - keep moving ca signals from Right to Left
|
||
|
if (all_bits_pass == 0) {
|
||
|
if ((tap_value == 0) && (switch_from_cktoca == false)) {
|
||
|
switch_from_cktoca = true;
|
||
|
|
||
|
tap_value = (rAMP_CASDLLCTRL(ch) & DLLVAL_BITS);
|
||
|
tap_value = OFFSET_TO_INT(tap_value);
|
||
|
}
|
||
|
|
||
|
if (switch_from_cktoca == false) {
|
||
|
tap_value = tap_value - step_incr;
|
||
|
} else {
|
||
|
tap_value = tap_value + step_incr;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
} while ((tap_value <= MAX_SDLL_VAL) && (all_bits_pass == 0));
|
||
|
|
||
|
if (all_bits_pass == 0) {
|
||
|
panic("Memory CA calibration: Unable to find passing point for all bits on the right side");
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static void enter_cacal_mode(uint32_t ch, uint32_t rnk, uint32_t mr_cmd, bool enter)
|
||
|
{
|
||
|
// For entry, send MR41 command to DRAM before AMP register is changed
|
||
|
if (enter) {
|
||
|
// Re-enter CaCal Mode with MR41 always since some DRAMs don't support entering this mode with MR48
|
||
|
amc_mrcmd_to_ch_rnk(MR_WRITE, ch, rnk, MR41, MR41 << 2);
|
||
|
if (mr_cmd != MR41)
|
||
|
amc_mrcmd_to_ch_rnk(MR_WRITE, ch, rnk, mr_cmd, mr_cmd << 2);
|
||
|
|
||
|
amp_enable_cacal_mode(true, ch);
|
||
|
}
|
||
|
|
||
|
// For exit, change AMP register before sending DRAM command (MR42)
|
||
|
else {
|
||
|
amp_enable_cacal_mode(false, ch);
|
||
|
amc_mrcmd_to_ch_rnk(MR_WRITE, ch, rnk, MR42, MR42 << 2);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static void find_cacal_left_side_failing_point(uint32_t ch, uint32_t rnk, uint32_t combined_mask, uint32_t swloop)
|
||
|
{
|
||
|
// At this point, we've already played with all possible CK delays. At the end of find_cacal_right_side_failing_point routine,
|
||
|
// we reset the CK delays to 0.
|
||
|
// Loop through CaSDLLOvrVal from -MasterDLL to +Max until all failing points on the left side are found
|
||
|
|
||
|
uint32_t all_bits_fail;
|
||
|
uint32_t step_incr;
|
||
|
int32_t push_ca_out;
|
||
|
uint32_t cacalresult;
|
||
|
int32_t camdllcode;
|
||
|
uint32_t max_caleft_point_reached;
|
||
|
int32_t max_caleft_point_val;
|
||
|
int32_t casdllctrl, ca0deskewctrl;
|
||
|
|
||
|
all_bits_fail = 0;
|
||
|
cacalresult = CA_ALL_BITS;
|
||
|
step_incr = COARSE_STEP_SZ;
|
||
|
max_caleft_point_reached = 0;
|
||
|
|
||
|
camdllcode = mdllcode[ch][AMP_CA];
|
||
|
max_caleft_point_val = camdllcode + MAX_DESKEW_OFFSET - DELIMIT_POS_ADJ_CASDLL;
|
||
|
|
||
|
casdllctrl = rAMP_CASDLLCTRL(ch) & DLLVAL_BITS;
|
||
|
casdllctrl= OFFSET_TO_INT(casdllctrl);
|
||
|
ca0deskewctrl = rAMP_CADESKEW_CTRL(ch, 0);
|
||
|
|
||
|
// ca0deskewctrl will be non-zero only if casdll reached (camdllcode - DELIMIT_POS_ADJ_CASDLL)
|
||
|
push_ca_out = casdllctrl + ca0deskewctrl;
|
||
|
|
||
|
// Increment CaSDLLOvrVal from -ve Master Code to +MAX_SDLL_VAL
|
||
|
do {
|
||
|
if (push_ca_out >= max_caleft_point_val) {
|
||
|
max_caleft_point_reached = 1;
|
||
|
}
|
||
|
// Push out this new ca offset
|
||
|
amp_push_casdll_out(ch, push_ca_out);
|
||
|
|
||
|
// run the calibration in hw
|
||
|
amp_run_cacal(ch);
|
||
|
cacalresult = cacalresult & (rAMP_CACALRESULT(ch) & CA_ALL_BITS);
|
||
|
|
||
|
// combined mask has don't care bits (based on pattern) and masked bits (based on MR41 or MR48) that we should ignore
|
||
|
if ((cacalresult & (CA_ALL_BITS ^ combined_mask)) != 0) {
|
||
|
all_bits_fail = 0;
|
||
|
} else {
|
||
|
all_bits_fail = 1;
|
||
|
|
||
|
// Now, we have found the left edge of window. Find the passing point for all bits
|
||
|
find_cacal_left_side_passing_point(ch, rnk, combined_mask, swloop);
|
||
|
}
|
||
|
|
||
|
// increase the offset
|
||
|
if (all_bits_fail == 0)
|
||
|
push_ca_out = push_ca_out + step_incr;
|
||
|
|
||
|
if ((push_ca_out > MAX_SDLL_VAL) && (all_bits_fail == 0)) {
|
||
|
panic("Memory CA calibration: Unable to find failing point for all bits on the left side");
|
||
|
}
|
||
|
|
||
|
// Forcefully ending this loop as there are no more sdll taps left to proceed ahead
|
||
|
if (max_caleft_point_reached && (all_bits_fail == 0))
|
||
|
{
|
||
|
dprintf(DEBUG_INFO, "Memory CA calibration: SDLL ran out of taps when trying to find left side failing point\n");
|
||
|
|
||
|
find_cacal_left_side_passing_point(ch, rnk, combined_mask, swloop);
|
||
|
all_bits_fail = 1;
|
||
|
}
|
||
|
} while ((push_ca_out <= MAX_SDLL_VAL) && (all_bits_fail == 0) && (max_caleft_point_reached == 0));
|
||
|
}
|
||
|
|
||
|
static void find_cacal_left_side_passing_point(uint32_t ch, uint32_t rnk, uint32_t combined_mask, uint32_t swloop)
|
||
|
{
|
||
|
uint32_t loopchrnk_indx;
|
||
|
uint32_t BitPass[CA_NUM_BITS] = { 0 };
|
||
|
uint32_t SolidBitPass[CA_NUM_BITS] = { 0 };
|
||
|
int32_t tap_value;
|
||
|
uint32_t cacalresult;
|
||
|
int32_t camdllcode;
|
||
|
int32_t ca0deskewctrl;
|
||
|
uint32_t all_bits_pass;
|
||
|
uint32_t step_incr;
|
||
|
uint32_t bit_indx;
|
||
|
|
||
|
loopchrnk_indx = (swloop * AMC_NUM_CHANNELS * AMC_NUM_RANKS) + (ch * AMC_NUM_RANKS) + rnk;
|
||
|
|
||
|
tap_value = rAMP_CASDLLCTRL(ch) & DLLVAL_BITS;
|
||
|
tap_value = OFFSET_TO_INT(tap_value);
|
||
|
ca0deskewctrl = rAMP_CADESKEW_CTRL(ch, 0);
|
||
|
camdllcode = mdllcode[ch][AMP_CA];
|
||
|
|
||
|
step_incr = FINER_STEP_SZ;
|
||
|
all_bits_pass = 0;
|
||
|
|
||
|
// ca0deskewctrl will be non-zero only if casdll reached (camdllcode - DELIMIT_POS_ADJ_CASDLL)
|
||
|
tap_value += ca0deskewctrl;
|
||
|
|
||
|
// combined_mask contains don't care bits (due to pattern) or masked bits (MR41 or MR48), so consider those passed
|
||
|
for (bit_indx = 0; bit_indx < CA_NUM_BITS; bit_indx++)
|
||
|
if ((combined_mask & (1 << bit_indx)) != 0)
|
||
|
BitPass[bit_indx] = 1;
|
||
|
|
||
|
// Finding Left side passing point on per bit level. Move Left to Right to find point where it turns from FAIL TO PASS
|
||
|
do {
|
||
|
// Push out this new ca offset
|
||
|
amp_push_casdll_out(ch, tap_value);
|
||
|
|
||
|
// Run the calibration in hw
|
||
|
amp_run_cacal(ch);
|
||
|
cacalresult = rAMP_CACALRESULT(ch) & CA_ALL_BITS;
|
||
|
|
||
|
// Make sure that each Bit sees a transition from 0 to 1 on CaCalresult Register
|
||
|
for (bit_indx=0; bit_indx < CA_NUM_BITS; bit_indx++) {
|
||
|
// check pass/fail for bits not masked
|
||
|
if ((combined_mask & (1 << bit_indx)) == 0) {
|
||
|
if ((BitPass[bit_indx] == 0) && ((cacalresult & (1 << bit_indx)) != 0)) {
|
||
|
if (SolidBitPass[bit_indx] == SOLID_PASS_DETECT) {
|
||
|
// bit has passed SOLID_PASS_DETECT straight times, consider it done
|
||
|
BitPass[bit_indx] = 1;
|
||
|
} else if (SolidBitPass[bit_indx] > 0) {
|
||
|
SolidBitPass[bit_indx] = SolidBitPass[bit_indx] + 1;
|
||
|
} else {
|
||
|
// first time bit has passed, record this value
|
||
|
SolidBitPass[bit_indx] = SolidBitPass[bit_indx] + 1;
|
||
|
|
||
|
ca_cal_per_loopchrnk_left[loopchrnk_indx][bit_indx] = tap_value; }
|
||
|
} else {
|
||
|
// bit failed calibration, reset the SolidBitPass value back to 0
|
||
|
SolidBitPass[bit_indx] = 0;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
all_bits_pass = 1;
|
||
|
for (bit_indx=0; bit_indx < CA_NUM_BITS; bit_indx++) {
|
||
|
all_bits_pass = all_bits_pass & BitPass[bit_indx];
|
||
|
if (all_bits_pass == 0)
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
// If ALL bits are not passing - keep moving from Left to Right Side of window
|
||
|
if (all_bits_pass == 0) {
|
||
|
tap_value = tap_value - step_incr;
|
||
|
}
|
||
|
|
||
|
if ((tap_value < (-1 * camdllcode)) && (all_bits_pass == 0)) {
|
||
|
// print error message as Left Failing Point cannot be found
|
||
|
all_bits_pass = 1;
|
||
|
panic("Memory CA calibration: Unable to find passing point for all bits on the left side");
|
||
|
}
|
||
|
} while ((tap_value > (-1 * camdllcode)) && (all_bits_pass == 0));
|
||
|
}
|
||
|
|
||
|
static void ca_program_final_values(void)
|
||
|
{
|
||
|
uint32_t loopchrnk0_indx, loopchrnk1_indx, chrnk0_indx, chrnk1_indx, ch;
|
||
|
uint32_t bit_indx;
|
||
|
int32_t ca_bit_center[CA_NUM_BITS];
|
||
|
int32_t ca_bit_deskew[CA_NUM_BITS];
|
||
|
int32_t tmp_left_pos_val, tmp_right_pos_val;
|
||
|
int32_t left_pos_val;
|
||
|
int32_t right_pos_val;
|
||
|
int32_t camdllcode;
|
||
|
int32_t min_ca_bit_center;
|
||
|
int32_t adj_ca_bit_center;
|
||
|
uint32_t cs_adj_val;
|
||
|
|
||
|
int32_t rank_val[AMP_MAX_RANKS_PER_CHAN];
|
||
|
uint32_t swloop, hwloop;
|
||
|
uint32_t mask;
|
||
|
uint32_t comb_mask, tmp_mask;
|
||
|
uint32_t mask_txn_detect;
|
||
|
|
||
|
for (ch = 0; ch < AMC_NUM_CHANNELS; ch++) {
|
||
|
camdllcode = mdllcode[ch][AMP_CA];
|
||
|
|
||
|
// Calculate the Center Points for each CA bit
|
||
|
for (bit_indx=0; bit_indx < CA_NUM_BITS; bit_indx++) {
|
||
|
comb_mask = 0x0;
|
||
|
mask_txn_detect = 0x0;
|
||
|
tmp_mask = 0x0;
|
||
|
|
||
|
// Compute the aggr eye over multiple swloop and hwloop for all ranks
|
||
|
for (swloop = 0; swloop < amc_phy_params.cacalib_sw_loops; swloop++) {
|
||
|
mask = 0x0;
|
||
|
for (hwloop=0; hwloop < amc_phy_params.cacalib_hw_loops; hwloop++)
|
||
|
mask = mask | ca_patterns_mask[(swloop * amc_phy_params.cacalib_hw_loops) + hwloop];
|
||
|
|
||
|
// An explanation of the masks is below. Note that we only recorded result for a bit from a particular iteration if the bit had a transition.
|
||
|
// mask: for pattern(s) sent in this swloop, indicates if the bit had a transition
|
||
|
// tmp_mask: aggregates mask over all loops, including current swloop
|
||
|
// comb_mask: aggregates mask over all loops, upto the last iteration of the swloop. After it is used to generate mask_txn_detect, it catches upto same value as tmp_mask
|
||
|
// mask_txn_detect: indicates the first time a bit transitioned was in this swloop
|
||
|
tmp_mask = tmp_mask | mask;
|
||
|
mask_txn_detect = tmp_mask ^ comb_mask;
|
||
|
comb_mask = comb_mask | mask;
|
||
|
|
||
|
/*
|
||
|
* Rank 0
|
||
|
*/
|
||
|
|
||
|
loopchrnk0_indx = (swloop * AMC_NUM_CHANNELS * AMC_NUM_RANKS) + (ch * AMC_NUM_RANKS) + 0;
|
||
|
chrnk0_indx = (ch * AMC_NUM_RANKS) + 0;
|
||
|
|
||
|
/* Left side */
|
||
|
|
||
|
// lookup the value in the left side for this bit given loop, ch, and rnk
|
||
|
rank_val[0] = ca_cal_per_loopchrnk_left[loopchrnk0_indx][bit_indx];
|
||
|
tmp_left_pos_val = rank_val[0];
|
||
|
|
||
|
// If this is the first time this bit transitioned, just put it in the aggregate result array
|
||
|
if (mask_txn_detect & (1 << bit_indx)) {
|
||
|
left_pos_val = tmp_left_pos_val;
|
||
|
ca_cal_per_chrnk_left[chrnk0_indx][bit_indx] = left_pos_val;
|
||
|
} else if (mask & (1 << bit_indx)) {
|
||
|
// This is not the 1st time this bit transitioned so there is a recorded result already, but since we have a new result
|
||
|
// to compare with, find the value that would cover both points and put that in the array
|
||
|
left_pos_val = ca_cal_per_chrnk_left[chrnk0_indx][bit_indx];
|
||
|
left_pos_val = find_common_endpoint(tmp_left_pos_val, left_pos_val, MIN_ENDPT);
|
||
|
ca_cal_per_chrnk_left[chrnk0_indx][bit_indx] = left_pos_val;
|
||
|
}
|
||
|
|
||
|
/* Right side */
|
||
|
|
||
|
// lookup the value in the right side for this bit given loop, ch, and rnk
|
||
|
rank_val[0] = ca_cal_per_loopchrnk_right[loopchrnk0_indx][bit_indx];
|
||
|
tmp_right_pos_val = rank_val[0];
|
||
|
|
||
|
// If this is the first time this bit transitioned, just put it in the aggregate result array
|
||
|
if (mask_txn_detect & (1 << bit_indx)) {
|
||
|
right_pos_val = tmp_right_pos_val;
|
||
|
ca_cal_per_chrnk_right[chrnk0_indx][bit_indx] = right_pos_val;
|
||
|
} else if (mask & (1 << bit_indx)) {
|
||
|
// This is not the 1st time this bit transitioned so there is a recorded result already, but since we have a new result
|
||
|
// to compare with, find the value that would cover both points and put that in the array
|
||
|
right_pos_val = ca_cal_per_chrnk_right[chrnk0_indx][bit_indx];
|
||
|
right_pos_val = find_common_endpoint(tmp_right_pos_val, right_pos_val, MAX_ENDPT);
|
||
|
ca_cal_per_chrnk_right[chrnk0_indx][bit_indx] = right_pos_val;
|
||
|
}
|
||
|
|
||
|
if (AMC_NUM_RANKS > 1) {
|
||
|
/*
|
||
|
* Rank 1
|
||
|
*/
|
||
|
|
||
|
if (AMC_NUM_RANKS > AMP_MAX_RANKS_PER_CHAN)
|
||
|
panic("amp_v2: AMC_NUM_RANKS = %d is more than hw is capable of supporting (%d)\n", AMC_NUM_RANKS, AMP_MAX_RANKS_PER_CHAN);
|
||
|
|
||
|
loopchrnk1_indx = (swloop * AMC_NUM_CHANNELS * AMC_NUM_RANKS) + (ch * AMC_NUM_RANKS) + 1;
|
||
|
chrnk1_indx = (ch * AMC_NUM_RANKS) + 1;
|
||
|
|
||
|
/* Left side */
|
||
|
|
||
|
// lookup the value in the left side for this bit given loop, ch, and rnk
|
||
|
rank_val[1] = ca_cal_per_loopchrnk_left[loopchrnk1_indx][bit_indx];
|
||
|
tmp_left_pos_val = rank_val[1];
|
||
|
|
||
|
// If this is the first time this bit transitioned, just put it in the aggregate result array
|
||
|
if (mask_txn_detect & (1 << bit_indx)) {
|
||
|
left_pos_val = tmp_left_pos_val;
|
||
|
ca_cal_per_chrnk_left[chrnk1_indx][bit_indx] = left_pos_val;
|
||
|
} else if (mask & (1 << bit_indx)) {
|
||
|
// This is not the 1st time this bit transitioned so there is a recorded result already, but since we have a new result
|
||
|
// to compare with, find the value that would cover both points and put that in the array
|
||
|
left_pos_val = ca_cal_per_chrnk_left[chrnk1_indx][bit_indx];
|
||
|
left_pos_val = find_common_endpoint(tmp_left_pos_val, left_pos_val, MIN_ENDPT);
|
||
|
ca_cal_per_chrnk_left[chrnk1_indx][bit_indx] = left_pos_val;
|
||
|
}
|
||
|
|
||
|
/* Right side */
|
||
|
|
||
|
// lookup the value in the right side for this bit given loop, ch, and rnk
|
||
|
rank_val[1] = ca_cal_per_loopchrnk_right[loopchrnk1_indx][bit_indx];
|
||
|
tmp_right_pos_val = rank_val[1];
|
||
|
|
||
|
// If this is the first time this bit transitioned, just put it in the aggregate result array
|
||
|
if (mask_txn_detect & (1 << bit_indx)) {
|
||
|
right_pos_val = tmp_right_pos_val;
|
||
|
ca_cal_per_chrnk_right[chrnk1_indx][bit_indx] = right_pos_val;
|
||
|
} else if (mask & (1 << bit_indx)) {
|
||
|
// This is not the 1st time this bit transitioned so there is a recorded result already, but since we have a new result
|
||
|
// to compare with, find the value that would cover both points and put that in the array
|
||
|
right_pos_val = ca_cal_per_chrnk_right[chrnk1_indx][bit_indx];
|
||
|
right_pos_val = find_common_endpoint(tmp_right_pos_val, right_pos_val, MAX_ENDPT);
|
||
|
ca_cal_per_chrnk_right[chrnk1_indx][bit_indx] = right_pos_val;
|
||
|
}
|
||
|
|
||
|
// Find the common endpoint for both ranks
|
||
|
left_pos_val = find_common_endpoint(ca_cal_per_chrnk_left[chrnk0_indx][bit_indx], ca_cal_per_chrnk_left[chrnk1_indx][bit_indx], MIN_ENDPT);
|
||
|
right_pos_val = find_common_endpoint(ca_cal_per_chrnk_right[chrnk0_indx][bit_indx], ca_cal_per_chrnk_right[chrnk1_indx][bit_indx], MAX_ENDPT);
|
||
|
}
|
||
|
|
||
|
}
|
||
|
|
||
|
// At this point, the left edge and the right edge of the eye for this channel and bit are defined by left_pos_val and right_pos_val
|
||
|
// Find the center of the eye
|
||
|
ca_bit_center[bit_indx] = find_center_of_eye(left_pos_val, right_pos_val);
|
||
|
}
|
||
|
|
||
|
// Since center for each bit may be different, find the min val
|
||
|
// Min val will get programmed to the sdll, while the other bits will require deskew
|
||
|
min_ca_bit_center = ca_bit_center[0];
|
||
|
|
||
|
for (bit_indx = 1; bit_indx < CA_NUM_BITS; bit_indx++) {
|
||
|
if (ca_bit_center[bit_indx] < min_ca_bit_center)
|
||
|
min_ca_bit_center = ca_bit_center[bit_indx];
|
||
|
}
|
||
|
|
||
|
// for positive sdll, clamp it to mdllcode - DELIMIT_POS_ADJ_CASDLL
|
||
|
if (min_ca_bit_center > (camdllcode - DELIMIT_POS_ADJ_CASDLL)) {
|
||
|
min_ca_bit_center = camdllcode - DELIMIT_POS_ADJ_CASDLL;
|
||
|
}
|
||
|
|
||
|
// Since the min value of all bits is chosen for sdll, if the rest of the bits need more delay, compute their deskew
|
||
|
for (bit_indx=0; bit_indx < CA_NUM_BITS; bit_indx++) {
|
||
|
|
||
|
if (ca_bit_center[bit_indx] < min_ca_bit_center)
|
||
|
panic("Memory CA Calibration: ca_bit_center[%d] = (%d) < min_ca_bit_center = %d\n", bit_indx, ca_bit_center[bit_indx], min_ca_bit_center);
|
||
|
|
||
|
ca_bit_deskew[bit_indx] = ca_bit_center[bit_indx] - min_ca_bit_center;
|
||
|
}
|
||
|
|
||
|
// If min < -camdllcode, then we will clamp the sdll to -mdll
|
||
|
// and put the remaining delay on the CK signals
|
||
|
if (min_ca_bit_center < (-1 * camdllcode)) {
|
||
|
cs_adj_val = (-1 * min_ca_bit_center) - camdllcode;
|
||
|
adj_ca_bit_center = (-1 * camdllcode);
|
||
|
} else {
|
||
|
cs_adj_val = 0;
|
||
|
adj_ca_bit_center = min_ca_bit_center;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Finally, program the values
|
||
|
*/
|
||
|
|
||
|
for (bit_indx = 0; bit_indx < CA_NUM_BITS; bit_indx++) {
|
||
|
// Make sure deskew value programmed is not negative and is <= MAX_DESKEW_PROGRAMMED
|
||
|
if ((ca_bit_deskew[bit_indx] < 0) || (ca_bit_deskew[bit_indx] > MAX_DESKEW_PROGRAMMED))
|
||
|
panic("Memory CA Calibration: ca_bit_deskew[%d] = %d invalid\n", bit_indx, ca_bit_deskew[bit_indx]);
|
||
|
}
|
||
|
|
||
|
// Push the remainder of the delay to CK signals (if adj_CaBitCenterPoint_val_data was clamped to camdll)
|
||
|
amp_push_ctl_out(ch, cs_adj_val);
|
||
|
|
||
|
// Program the SDLL with the adjusted min value
|
||
|
amp_push_casdll_out(ch, adj_ca_bit_center);
|
||
|
|
||
|
// Program the CA Deskew values for each bit
|
||
|
for (bit_indx = 0; bit_indx < CA_NUM_BITS; bit_indx++) {
|
||
|
rAMP_CADESKEW_CTRL(ch, bit_indx) = ca_bit_deskew[bit_indx];
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Loop through PerBitDeskewCode ranges for rddq until failing points for each byte (& bit) are found.
|
||
|
static void find_rddqcal_right_side_failing_point(uint32_t ch, uint32_t rnk, bool after_wrddqcal)
|
||
|
{
|
||
|
uint32_t dq_deskew;
|
||
|
uint32_t all_bits_fail;
|
||
|
uint32_t bits_fail_b[DQ_NUM_BYTES] = { 0 };
|
||
|
uint32_t rddqcalresult;
|
||
|
uint32_t mask_b[DQ_NUM_BYTES];
|
||
|
int32_t start_b[DQ_NUM_BYTES];
|
||
|
uint32_t byte, bit;
|
||
|
|
||
|
all_bits_fail = 0;
|
||
|
dq_deskew = 0;
|
||
|
|
||
|
// set the rddq sdll to negative dqmdllcode
|
||
|
mdllcode[ch][AMP_DQ] = (rAMP_MDLLCODE(AMP_DQ, ch) & DLLVAL_BITS);
|
||
|
for (byte = 0; byte < DQ_NUM_BYTES; byte++) {
|
||
|
amp_set_rddq_sdll(ch, byte, (1 << SIGN_BIT_POS) + mdllcode[ch][AMP_DQ]);
|
||
|
|
||
|
// initialize the mask for each byte lane
|
||
|
mask_b[byte] = 0xFF << (byte * 8);
|
||
|
}
|
||
|
|
||
|
rddqcalresult = 0xFFFFFFFF;
|
||
|
|
||
|
// PerBit Deskew lines cannot be pushed beyond DQ_MAX_DESKEW_PER_BIT value
|
||
|
do {
|
||
|
for (byte = 0; byte < DQ_NUM_BYTES; byte++) {
|
||
|
if (bits_fail_b[byte] == 0) {
|
||
|
for (bit = 0; bit < DQ_NUM_BITS_PER_BYTE; bit++)
|
||
|
rAMP_RDDQDESKEW_CTRL(ch, byte, bit) = dq_deskew;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Call Basic run Dq Cal Commands
|
||
|
if (after_wrddqcal == false) {
|
||
|
amp_run_rddqcal(ch);
|
||
|
rddqcalresult &= rAMP_DQCALRESULT(ch);
|
||
|
} else {
|
||
|
rddqcalresult &= wr_rd_pattern_result(ch, rnk, dq_deskew);
|
||
|
}
|
||
|
|
||
|
for (byte = 0; byte < DQ_NUM_BYTES; byte++) {
|
||
|
// if all bits haven't failed yet and this run shows all bits failing, we have found the failing point for this byte
|
||
|
if ((bits_fail_b[byte] == 0) && ((rddqcalresult & mask_b[byte]) == 0)) {
|
||
|
bits_fail_b[byte] = 1;
|
||
|
start_b[byte] = dq_deskew;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
all_bits_fail = bits_fail_b[0] & bits_fail_b[1] & bits_fail_b[2] & bits_fail_b[3];
|
||
|
|
||
|
if (all_bits_fail == 1) {
|
||
|
// If failing point has been found for all bits, find the passing point now
|
||
|
find_rddqcal_right_side_passing_point(ch, rnk, start_b, after_wrddqcal);
|
||
|
} else {
|
||
|
// To find right failing point, make more negative adjustment to the sdll (same as incrementing deskew)
|
||
|
dq_deskew = dq_deskew + COARSE_STEP_SZ;
|
||
|
}
|
||
|
|
||
|
} while ((dq_deskew <= DQ_MAX_DESKEW_PER_BIT) && (all_bits_fail == 0));
|
||
|
|
||
|
if ((dq_deskew > DQ_MAX_DESKEW_PER_BIT) && (all_bits_fail == 0)) {
|
||
|
// print error message as Right Failing Point cannot be found
|
||
|
dprintf(DEBUG_INFO, "Memory Rddq cal: Right side failing point not found, max deskew limit reach for channel %d", ch);
|
||
|
|
||
|
// Assume failure at this setting, and continue to passing point
|
||
|
for (byte = 0; byte < DQ_NUM_BYTES; byte++) {
|
||
|
// if all bits haven't failed yet, assign start_b for this byte to current reg setting
|
||
|
if (bits_fail_b[byte] == 0)
|
||
|
start_b[byte] = dq_deskew - COARSE_STEP_SZ;
|
||
|
}
|
||
|
|
||
|
find_rddqcal_right_side_passing_point(ch, rnk, start_b, after_wrddqcal);
|
||
|
}
|
||
|
|
||
|
// Reset deskew for all bits to 0
|
||
|
for (byte = 0; byte < DQ_NUM_BYTES; byte++)
|
||
|
for (bit = 0; bit < DQ_NUM_BITS_PER_BYTE; bit++)
|
||
|
rAMP_RDDQDESKEW_CTRL(ch, byte, bit) = 0;
|
||
|
}
|
||
|
|
||
|
// Purpose of this function is to start from right side failing point and find locations for every DQ bit
|
||
|
// until the start of passing window for that bit is found
|
||
|
// Save all this locations to compute the center of window
|
||
|
static void find_rddqcal_right_side_passing_point(uint32_t ch, uint32_t rnk, int32_t *start_b, bool after_wrddqcal)
|
||
|
{
|
||
|
uint32_t chrnk_indx;
|
||
|
bool switch_from_dqstodq, max_tap_value_reached;
|
||
|
int32_t tap_value_b[DQ_NUM_BYTES];
|
||
|
uint32_t BitPass[DQ_TOTAL_BITS] = { 0 };
|
||
|
uint32_t SolidBitPass[DQ_TOTAL_BITS] = { 0 };
|
||
|
uint32_t rddqcalresult;
|
||
|
uint32_t all_bits_pass;
|
||
|
uint32_t all_bits_pass_b[DQ_NUM_BYTES] = { 0 };
|
||
|
uint32_t step_incr;
|
||
|
uint32_t bit_indx, byte;
|
||
|
int32_t dqmdllcode;
|
||
|
int32_t saved_val;
|
||
|
|
||
|
chrnk_indx = (ch * AMC_NUM_RANKS) + rnk;
|
||
|
all_bits_pass = 0;
|
||
|
switch_from_dqstodq = false;
|
||
|
max_tap_value_reached = false;
|
||
|
rddqcalresult = 0xFFFFFFFF;
|
||
|
step_incr = FINER_STEP_SZ;
|
||
|
dqmdllcode = mdllcode[ch][AMP_DQ];
|
||
|
|
||
|
for (byte = 0; byte < DQ_NUM_BYTES; byte++) {
|
||
|
tap_value_b[byte] = start_b[byte];
|
||
|
}
|
||
|
|
||
|
// Moving Right to Left to find point where each bit turns from FAIL TO PASS
|
||
|
do {
|
||
|
if (switch_from_dqstodq == false) {
|
||
|
// continue to update per bit deskew until all bits pass for each byte lane
|
||
|
for (byte = 0; byte < DQ_NUM_BYTES; byte++) {
|
||
|
if (all_bits_pass_b[byte] == 0) {
|
||
|
for (bit_indx = 0; bit_indx < DQ_NUM_BITS_PER_BYTE; bit_indx++)
|
||
|
rAMP_RDDQDESKEW_CTRL(ch, byte, bit_indx) = tap_value_b[byte];
|
||
|
}
|
||
|
}
|
||
|
} else {
|
||
|
// adjust rddq sdll until all bits pass for each byte lane
|
||
|
for (byte = 0; byte < DQ_NUM_BYTES; byte++)
|
||
|
if (all_bits_pass_b[byte] == 0)
|
||
|
amp_set_rddq_sdll(ch, byte, INT_TO_OFFSET(tap_value_b[byte]));
|
||
|
}
|
||
|
|
||
|
// Run rddq calibration in hw
|
||
|
if (after_wrddqcal == false) {
|
||
|
amp_run_rddqcal(ch);
|
||
|
rddqcalresult = rAMP_DQCALRESULT(ch);
|
||
|
} else {
|
||
|
rddqcalresult = wr_rd_pattern_result(ch, rnk, tap_value_b[0]);
|
||
|
}
|
||
|
|
||
|
// Make sure that each Bit sees a transition from 0 to 1 on DqCalresult Register
|
||
|
for (bit_indx = 0; bit_indx < DQ_TOTAL_BITS; bit_indx++) {
|
||
|
|
||
|
byte = bit_indx >> 3; // bit_indx / DQ_NUM_BITS_PER_BYTE
|
||
|
|
||
|
// Check if this bit passed during the calibration (not necessarily for first time)
|
||
|
if ((BitPass[bit_indx] == 0) && ((rddqcalresult & (1 << bit_indx)) != 0)) {
|
||
|
// Has this bit passed SOLID_PASS_DETECT number of times? Then consider it done
|
||
|
if (SolidBitPass[bit_indx] == SOLID_PASS_DETECT) {
|
||
|
BitPass[bit_indx] = 1;
|
||
|
} else if (SolidBitPass[bit_indx] > 0) {
|
||
|
SolidBitPass[bit_indx] = SolidBitPass[bit_indx] + 1;
|
||
|
} else {
|
||
|
// bit passed for the first time, record this value in the global array as the right edge
|
||
|
SolidBitPass[bit_indx] = SolidBitPass[bit_indx] + 1;
|
||
|
|
||
|
byte = bit_indx >> 3; // bit_indx / DQ_NUM_BITS_PER_BYTE
|
||
|
|
||
|
if (switch_from_dqstodq == false)
|
||
|
// consider mdllcode as '0' since sdll is set to -mdllcode
|
||
|
saved_val = -1 * (tap_value_b[byte] + dqmdllcode);
|
||
|
else
|
||
|
saved_val = tap_value_b[byte];
|
||
|
|
||
|
rddq_cal_per_chrnk_right[chrnk_indx][bit_indx] = saved_val;
|
||
|
}
|
||
|
} else {
|
||
|
// bit failed calibration, reset the pass count to 0
|
||
|
SolidBitPass[bit_indx] = 0;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
all_bits_pass = 1;
|
||
|
for (byte = 0; byte < DQ_NUM_BYTES; byte++)
|
||
|
all_bits_pass_b[byte] = 1;
|
||
|
|
||
|
for (bit_indx = 0; bit_indx < DQ_TOTAL_BITS; bit_indx++) {
|
||
|
byte = bit_indx >> 3; // bit_indx / DQ_NUM_BITS_PER_BYTE
|
||
|
|
||
|
// Did all the bits pass (SOLID_PASS_DETECT number of times) in this byte lane?
|
||
|
// If anyone of the bits failed, then the byte flag is cleared
|
||
|
all_bits_pass_b[byte] = all_bits_pass_b[byte] & BitPass[bit_indx];
|
||
|
|
||
|
// Did all bits in all byte lanes pass?
|
||
|
all_bits_pass = all_bits_pass & BitPass[bit_indx];
|
||
|
}
|
||
|
|
||
|
// If ALL bits are not passing - keep moving from Right to Left Side of window (by adding less negative adjustment to mdll)
|
||
|
if (all_bits_pass == 0) {
|
||
|
// Even if one of the byte lanes arrives early to tap_value = 0. Remain here until all byte lane catch up before proceeding to pushing out dq
|
||
|
|
||
|
// check for all bytes reaching 0 on the tap value (could be deskew or sdll)
|
||
|
int32_t all_bytes_tap = tap_value_b[0];
|
||
|
for (byte = 1; (byte < DQ_NUM_BYTES) && (all_bytes_tap == 0); byte++) {
|
||
|
all_bytes_tap += tap_value_b[byte];
|
||
|
}
|
||
|
|
||
|
// if the tap_value for all bytes has reached 0 on the deskew, make the transition to SDLL
|
||
|
if ((all_bytes_tap == 0) && (switch_from_dqstodq == false)) {
|
||
|
switch_from_dqstodq = true;
|
||
|
|
||
|
for (byte = 0; byte < DQ_NUM_BYTES; byte++) {
|
||
|
tap_value_b[byte] = (rAMP_DQSDLLCTRL_RD(ch, byte) & DLLVAL_BITS);
|
||
|
tap_value_b[byte] = OFFSET_TO_INT(tap_value_b[byte]);
|
||
|
}
|
||
|
|
||
|
}
|
||
|
|
||
|
// To find right side passing point, add less negative adjustment to mdll (same as decrementing deskew)
|
||
|
|
||
|
// For deskew taps, we just decrement by step_incr if we haven't reached 0 yet
|
||
|
if (switch_from_dqstodq == false) {
|
||
|
for (byte = 0; byte < DQ_NUM_BYTES; byte++)
|
||
|
if (tap_value_b[byte] > 0)
|
||
|
tap_value_b[byte] -= step_incr;
|
||
|
} else {
|
||
|
// For sdll taps, increment it
|
||
|
for (byte = 0; byte < DQ_NUM_BYTES; byte++) {
|
||
|
if (all_bits_pass_b[byte] == 0) {
|
||
|
tap_value_b[byte] += step_incr;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// trigger for loop to end if any of the bytes reach max tap value
|
||
|
for (byte = 0; byte < DQ_NUM_BYTES; byte++) {
|
||
|
if (!max_tap_value_reached)
|
||
|
max_tap_value_reached = (tap_value_b[byte] > MAX_SDLL_VAL);
|
||
|
|
||
|
if (max_tap_value_reached) {
|
||
|
if (all_bits_pass == 0)
|
||
|
panic("Memory rddq calibration: Unable to find right side passing point, max tap value reached");
|
||
|
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
} while ((!max_tap_value_reached) && (all_bits_pass == 0));
|
||
|
}
|
||
|
|
||
|
// Purpose of this function is to start push DQS out till left side failing point of Data window is found
|
||
|
static void find_rddqcal_left_side_failing_point(uint32_t ch, uint32_t rnk, bool after_wrddqcal)
|
||
|
{
|
||
|
int32_t rddqsdll[DQ_NUM_BYTES];
|
||
|
uint32_t rddqcalresult;
|
||
|
uint32_t all_bits_fail;
|
||
|
uint32_t all_bits_fail_b[DQ_NUM_BYTES] = { 0 };
|
||
|
uint32_t step_incr;
|
||
|
uint32_t mask_b[DQ_NUM_BYTES];
|
||
|
int32_t start_b[DQ_NUM_BYTES];
|
||
|
uint32_t byte;
|
||
|
bool max_tap_value_reached = false;
|
||
|
|
||
|
all_bits_fail = 0;
|
||
|
rddqcalresult = 0xFFFFFFFF;
|
||
|
step_incr = COARSE_STEP_SZ;
|
||
|
|
||
|
for (byte = 0; byte < DQ_NUM_BYTES; byte++) {
|
||
|
// initialize the mask for each byte lane
|
||
|
mask_b[byte] = 0xFF << (byte * 8);
|
||
|
|
||
|
// Get the starting values for RD DQS SDLL
|
||
|
rddqsdll[byte] = rAMP_DQSDLLCTRL_RD(ch, byte);
|
||
|
rddqsdll[byte] = OFFSET_TO_INT(rddqsdll[byte]);
|
||
|
}
|
||
|
|
||
|
// To find left failing point, keep adding less negative adjustment to mdll
|
||
|
do {
|
||
|
for (byte = 0; byte < DQ_NUM_BYTES; byte++) {
|
||
|
// set the new sdll for this byte lane if all bits are not yet failing
|
||
|
if (all_bits_fail_b[byte] == 0)
|
||
|
amp_set_rddq_sdll(ch, byte, INT_TO_OFFSET(rddqsdll[byte]));
|
||
|
}
|
||
|
|
||
|
// Run rddqcal in hw
|
||
|
if (after_wrddqcal == false) {
|
||
|
amp_run_rddqcal(ch);
|
||
|
rddqcalresult &= rAMP_DQCALRESULT(ch);
|
||
|
} else {
|
||
|
rddqcalresult &= wr_rd_pattern_result(ch, rnk, rddqsdll[0]);
|
||
|
}
|
||
|
|
||
|
// If the result of all bits in this byte show a fail, record this as the failing point
|
||
|
all_bits_fail = 1;
|
||
|
for (byte = 0; byte < DQ_NUM_BYTES; byte++) {
|
||
|
if ((all_bits_fail_b[byte] == 0) && ((rddqcalresult & mask_b[byte]) == 0)) {
|
||
|
all_bits_fail_b[byte] = 1;
|
||
|
start_b[byte] = rddqsdll[byte];
|
||
|
}
|
||
|
|
||
|
all_bits_fail &= all_bits_fail_b[byte];
|
||
|
}
|
||
|
|
||
|
// all bytes fail, call the function to find left passing point
|
||
|
if (all_bits_fail == 1) {
|
||
|
find_rddqcal_left_side_passing_point(ch, rnk, start_b, after_wrddqcal);
|
||
|
} else {
|
||
|
// if the byte has not yet failed, find the next sdll value to be set
|
||
|
for (byte = 0; byte < DQ_NUM_BYTES; byte++) {
|
||
|
if (all_bits_fail_b[byte] == 0) {
|
||
|
rddqsdll[byte] += step_incr;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
for (byte = 0; byte < DQ_NUM_BYTES; byte++) {
|
||
|
// none of the previous bytes reached max_tap_value, then update the boolean
|
||
|
if (!max_tap_value_reached) {
|
||
|
max_tap_value_reached = (rddqsdll[byte] > MAX_SDLL_VAL);
|
||
|
|
||
|
if (max_tap_value_reached) {
|
||
|
dprintf(DEBUG_INFO, "Memory rddq calibration: Unable to find left failing point, max tap value reached for ch %d byte %d", ch, byte);
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (max_tap_value_reached) {
|
||
|
// Continue to passing point if any of the bytes reaches max value and not all bits are failing
|
||
|
if (all_bits_fail == 0) {
|
||
|
for (byte = 0; byte < DQ_NUM_BYTES; byte++) {
|
||
|
if (all_bits_fail_b[byte] == 0)
|
||
|
start_b[byte] = MAX_SDLL_VAL;
|
||
|
}
|
||
|
|
||
|
find_rddqcal_left_side_passing_point(ch, rnk, start_b, after_wrddqcal);
|
||
|
}
|
||
|
}
|
||
|
} while ((!max_tap_value_reached) && (all_bits_fail == 0));
|
||
|
}
|
||
|
|
||
|
// Purpose of this function is to start from left side failing point and find passing locations for every DQ bit on left side of window
|
||
|
// Save all the locations to compute the center of window later
|
||
|
// To find left passing point, move to the right from the failing point, which means keep adding more negative adjustment to mdll
|
||
|
static void find_rddqcal_left_side_passing_point(uint32_t ch, uint32_t rnk, int32_t *start_b, bool after_wrddqcal)
|
||
|
{
|
||
|
uint32_t chrnk_indx;
|
||
|
bool max_tap_value_reached = false;
|
||
|
int32_t tap_value_b[DQ_NUM_BYTES];
|
||
|
uint32_t BitPass[DQ_TOTAL_BITS] = { 0 };
|
||
|
uint32_t SolidBitPass[DQ_TOTAL_BITS] = { 0 };
|
||
|
uint32_t rddqcalresult;
|
||
|
uint32_t all_bits_pass;
|
||
|
uint32_t all_bits_pass_b[DQ_NUM_BYTES] = { 0 };
|
||
|
uint32_t step_incr;
|
||
|
uint32_t bit_indx, byte;
|
||
|
|
||
|
chrnk_indx = (ch * AMC_NUM_RANKS) + rnk;
|
||
|
all_bits_pass = 0;
|
||
|
rddqcalresult = 0xFFFFFFFF;
|
||
|
step_incr = FINER_STEP_SZ;
|
||
|
|
||
|
for (byte = 0; byte < DQ_NUM_BYTES; byte++) {
|
||
|
tap_value_b[byte] = start_b[byte];
|
||
|
}
|
||
|
|
||
|
// Finding Left side passing point on per bit level. Moving Left to Right (keep adding more negative adj to mdll) to find point where it turns from FAIL TO PASS
|
||
|
do {
|
||
|
|
||
|
for (byte = 0; byte < DQ_NUM_BYTES; byte++) {
|
||
|
// if we haven't found all bits passing for this byte, push out new sdll value
|
||
|
if (all_bits_pass_b[byte] == 0)
|
||
|
amp_set_rddq_sdll(ch, byte, INT_TO_OFFSET(tap_value_b[byte]));
|
||
|
}
|
||
|
|
||
|
// Run rddqcal in hw
|
||
|
if (after_wrddqcal == false) {
|
||
|
amp_run_rddqcal(ch);
|
||
|
rddqcalresult = rAMP_DQCALRESULT(ch);
|
||
|
} else {
|
||
|
rddqcalresult = wr_rd_pattern_result(ch, rnk, tap_value_b[0]);
|
||
|
}
|
||
|
|
||
|
// Make sure that each Bit sees a transition from 0 to 1 on DqCalresult Register
|
||
|
for (bit_indx = 0; bit_indx < DQ_TOTAL_BITS; bit_indx++) {
|
||
|
|
||
|
// Check if this bit passed during the calibration (not necessarily for first time)
|
||
|
if ((BitPass[bit_indx] == 0) && ((rddqcalresult & (1 << bit_indx)) != 0)) {
|
||
|
// Has this bit passed SOLID_PASS_DETECT number of times? Then consider it done
|
||
|
if (SolidBitPass[bit_indx] == SOLID_PASS_DETECT) {
|
||
|
BitPass[bit_indx] = 1;
|
||
|
} else if (SolidBitPass[bit_indx] > 0) {
|
||
|
SolidBitPass[bit_indx] = SolidBitPass[bit_indx] + 1;
|
||
|
} else {
|
||
|
// bit passed for the first time, record this value in the global array as the left edge
|
||
|
SolidBitPass[bit_indx] = SolidBitPass[bit_indx] + 1;
|
||
|
|
||
|
byte = bit_indx >> 3; // bit_indx / DQ_NUM_BITS_PER_BYTE
|
||
|
|
||
|
rddq_cal_per_chrnk_left[chrnk_indx][bit_indx] = tap_value_b[byte];
|
||
|
}
|
||
|
} else {
|
||
|
// bit failed calibration, reset the pass count to 0
|
||
|
SolidBitPass[bit_indx] = 0;
|
||
|
}
|
||
|
|
||
|
}
|
||
|
|
||
|
all_bits_pass = 1;
|
||
|
for (byte = 0; byte < DQ_NUM_BYTES; byte++)
|
||
|
all_bits_pass_b[byte] = 1;
|
||
|
|
||
|
for (bit_indx = 0; bit_indx < DQ_TOTAL_BITS; bit_indx++) {
|
||
|
byte = bit_indx >> 3; // bit_indx / DQ_NUM_BITS_PER_BYTE
|
||
|
|
||
|
// Did all the bits pass (SOLID_PASS_DETECT number of times) in this byte lane?
|
||
|
// If anyone of the bits failed, then the byte flag is cleared
|
||
|
all_bits_pass_b[byte] = all_bits_pass_b[byte] & BitPass[bit_indx];
|
||
|
|
||
|
// Did all bits in all byte lanes pass?
|
||
|
all_bits_pass = all_bits_pass & BitPass[bit_indx];
|
||
|
}
|
||
|
|
||
|
// If ALL bits are not passing - keep moving from Left to Right Side of window (by adding more negative adjustment to mdll)
|
||
|
if (all_bits_pass == 0) {
|
||
|
|
||
|
for (byte = 0; byte < DQ_NUM_BYTES; byte++) {
|
||
|
|
||
|
// if this byte lane does not have all passing bits, adjust this byte's sdll
|
||
|
if (all_bits_pass_b[byte] == 0) {
|
||
|
tap_value_b[byte] -= step_incr;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// check for end of loop condition
|
||
|
for (byte = 0; byte < DQ_NUM_BYTES; byte++) {
|
||
|
if (!max_tap_value_reached)
|
||
|
max_tap_value_reached = (tap_value_b[byte] < (-1 * MAX_SDLL_VAL));
|
||
|
|
||
|
if (max_tap_value_reached) {
|
||
|
if (all_bits_pass == 0)
|
||
|
panic("Memory rddq calibration: Unable to find left passing point, max tap value reached");
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
// panic if we get beyond -dqmdllcode, since we really shouldn't have to go that far
|
||
|
if ((all_bits_pass == 0) && (tap_value_b[byte] < (-1 * mdllcode[ch][AMP_DQ])))
|
||
|
panic("Memory rddq calibration: Not yet found left passing point but SDLL < -dqmdllcode for ch %d byte %d", ch, byte);
|
||
|
}
|
||
|
} while ((!max_tap_value_reached) && (all_bits_pass == 0));
|
||
|
}
|
||
|
|
||
|
static void rddq_program_final_values(bool after_wrddqcal)
|
||
|
{
|
||
|
uint32_t ch, bit_indx, byte;
|
||
|
uint32_t chrnk0_indx, chrnk1_indx;
|
||
|
int32_t rddq_bit_center[DQ_TOTAL_BITS];
|
||
|
int32_t rddq_bit_deskew[DQ_TOTAL_BITS];
|
||
|
int32_t left_pos_val;
|
||
|
int32_t right_pos_val;
|
||
|
int32_t max_rddq_center[DQ_NUM_BYTES];
|
||
|
int32_t dqmdllcode;
|
||
|
|
||
|
for (ch = 0; ch < AMC_NUM_CHANNELS; ch++) {
|
||
|
dqmdllcode = mdllcode[ch][AMP_DQ];
|
||
|
|
||
|
// find the center point of passing window for each bit over all ranks
|
||
|
for (bit_indx = 0; bit_indx < DQ_TOTAL_BITS; bit_indx++) {
|
||
|
|
||
|
chrnk0_indx = (ch * AMC_NUM_RANKS) + 0;
|
||
|
left_pos_val = rddq_cal_per_chrnk_left[chrnk0_indx][bit_indx];
|
||
|
right_pos_val = rddq_cal_per_chrnk_right[chrnk0_indx][bit_indx];
|
||
|
|
||
|
if (AMC_NUM_RANKS > 1) {
|
||
|
|
||
|
chrnk1_indx = (ch * AMC_NUM_RANKS) + 1;
|
||
|
|
||
|
// find the endpoint that covers both ranks
|
||
|
left_pos_val = find_common_endpoint(rddq_cal_per_chrnk_left[chrnk0_indx][bit_indx],
|
||
|
rddq_cal_per_chrnk_left[chrnk1_indx][bit_indx],
|
||
|
MIN_ENDPT);
|
||
|
right_pos_val = find_common_endpoint(rddq_cal_per_chrnk_right[chrnk0_indx][bit_indx],
|
||
|
rddq_cal_per_chrnk_right[chrnk1_indx][bit_indx],
|
||
|
MAX_ENDPT);
|
||
|
}
|
||
|
|
||
|
// find center of the eye for this bit
|
||
|
rddq_bit_center[bit_indx] = find_center_of_eye(left_pos_val, right_pos_val);
|
||
|
}
|
||
|
|
||
|
// <rdar://problem/13439594>, <rdar://problem/13888162> Need additional shift to DQ offset
|
||
|
if (after_wrddqcal) {
|
||
|
|
||
|
int8_t signed_byte_center_point[DQ_TOTAL_BITS];
|
||
|
|
||
|
// convert to signed bytes first as required by shift function
|
||
|
for (bit_indx = 0; bit_indx < DQ_TOTAL_BITS; bit_indx++)
|
||
|
signed_byte_center_point[bit_indx] = (int8_t) rddq_bit_center[bit_indx];
|
||
|
|
||
|
// call platform specific amc routine to apply apropriate shifts depending on DRAM vendor
|
||
|
amc_dram_shift_dq_offset(signed_byte_center_point, DQ_TOTAL_BITS);
|
||
|
|
||
|
// convert shifted signed bytes back to signed ints
|
||
|
for (bit_indx = 0; bit_indx < DQ_TOTAL_BITS; bit_indx++)
|
||
|
rddq_bit_center[bit_indx] = (int32_t) signed_byte_center_point[bit_indx];
|
||
|
}
|
||
|
|
||
|
// initialize the max centerpoint to the 1st bit's center point in each byte lane
|
||
|
for (byte = 0; byte < DQ_NUM_BYTES; byte++)
|
||
|
max_rddq_center[byte] = rddq_bit_center[byte * DQ_NUM_BITS_PER_BYTE];
|
||
|
|
||
|
// Find the maximum CenterPoint per byte lane given each bit's center point
|
||
|
for (bit_indx=0; bit_indx < DQ_TOTAL_BITS; bit_indx++) {
|
||
|
|
||
|
byte = bit_indx >> 3; // bit_indx / DQ_NUM_BITS_PER_BYTE
|
||
|
|
||
|
// if this bit's center point is greater than current max, make it the new max (we'll program this to sdll, and other values will require deskew)
|
||
|
if (rddq_bit_center[bit_indx] > max_rddq_center[byte])
|
||
|
max_rddq_center[byte] = rddq_bit_center[bit_indx];
|
||
|
}
|
||
|
|
||
|
// if the max for each byte lane is < -dqmdllcode, clamp it to -dqmdllcode (the remainder will go on per bit deskew)
|
||
|
for (byte = 0; byte < DQ_NUM_BYTES; byte++) {
|
||
|
|
||
|
if (max_rddq_center[byte] < (-1 * dqmdllcode))
|
||
|
max_rddq_center[byte] = (-1 * dqmdllcode);
|
||
|
}
|
||
|
|
||
|
// Compute the individual deskew values: any bits with center point < max for its byte lane will require deskew
|
||
|
// Each bit's center is guaranteed to be <= max for its byte lane
|
||
|
// Deskewing means adding more negative adjustment for this bit in addition to the sdll, which is clamped on the negative side to -dqmdllcode
|
||
|
for (bit_indx = 0; bit_indx < DQ_TOTAL_BITS; bit_indx++) {
|
||
|
|
||
|
byte = bit_indx >> 3; // bit_indx / DQ_NUM_BITS_PER_BYTE
|
||
|
|
||
|
if (rddq_bit_center[bit_indx] > max_rddq_center[byte])
|
||
|
panic("Memory Rddq calibration: rddq_bit_center[%d] = %d > max_rddq_center[%d] = %d\n", bit_indx, rddq_bit_center[bit_indx], byte, max_rddq_center[byte]);
|
||
|
|
||
|
rddq_bit_deskew[bit_indx] = max_rddq_center[byte] - rddq_bit_center[bit_indx];
|
||
|
|
||
|
if ((rddq_bit_deskew[bit_indx] < 0) || (rddq_bit_deskew[bit_indx] > DQ_MAX_DESKEW_PER_BIT))
|
||
|
panic("Memory Rddq calibration: rddq_bit_deskew[%d] = %d invalid\n", bit_indx, rddq_bit_deskew[bit_indx]);
|
||
|
}
|
||
|
|
||
|
// Program the SDLL and deskew per bit for each byte lane
|
||
|
for (byte = 0; byte < DQ_NUM_BYTES; byte++) {
|
||
|
amp_set_rddq_sdll(ch, byte, INT_TO_OFFSET(max_rddq_center[byte]));
|
||
|
|
||
|
// per bit deskew for this byte lane
|
||
|
for (bit_indx = 0; bit_indx < DQ_NUM_BITS_PER_BYTE; bit_indx++) {
|
||
|
rAMP_RDDQDESKEW_CTRL(ch, byte, bit_indx) = rddq_bit_deskew[(byte * DQ_NUM_BITS_PER_BYTE) + bit_indx];
|
||
|
}
|
||
|
}
|
||
|
} // for (ch = 0; ch < AMC_NUM_CHANNELS; ch++)
|
||
|
}
|
||
|
|
||
|
static uint32_t wrlvl_encode_dlyval(uint32_t ch, uint32_t phy_type, uint32_t val) {
|
||
|
|
||
|
uint32_t ret_val, mdll;
|
||
|
|
||
|
mdll = mdllcode[ch][phy_type];
|
||
|
|
||
|
if (val < ( (2 * mdll) - 2 )) {
|
||
|
ret_val = 0;
|
||
|
} else if (val < ( 2 * mdll )) {
|
||
|
ret_val = 1;
|
||
|
} else if (val < ( (2 * mdll) + 3 )) {
|
||
|
ret_val = 2;
|
||
|
} else {
|
||
|
ret_val = 3;
|
||
|
}
|
||
|
|
||
|
return ret_val;
|
||
|
}
|
||
|
|
||
|
static uint32_t wrlvl_encode_clk90dly(uint32_t ch, uint32_t val) {
|
||
|
|
||
|
uint32_t ret_val, mdll;
|
||
|
|
||
|
mdll = mdllcode[ch][AMP_DQ];
|
||
|
|
||
|
if (val < (mdll - 2)) {
|
||
|
ret_val = 0;
|
||
|
} else if (val < mdll) {
|
||
|
ret_val = 1;
|
||
|
} else if (val < (mdll + 3)) {
|
||
|
ret_val = 2;
|
||
|
} else {
|
||
|
ret_val = 3;
|
||
|
}
|
||
|
|
||
|
return ret_val;
|
||
|
}
|
||
|
|
||
|
static void push_wrlvl_to_0s_region(uint32_t ch, uint32_t rnk)
|
||
|
{
|
||
|
uint32_t wrlvldata, byte;
|
||
|
uint32_t cawrlvlcode = 0;
|
||
|
bool max_tap_value_reached = false;
|
||
|
uint32_t wrlvlrun = 0xF;
|
||
|
uint32_t dqwrlvlcode[DQ_NUM_BYTES] = { 0 };
|
||
|
|
||
|
// Note that incrementing cawrlvl sdll has opposite effect of incrementing dqwrlvl
|
||
|
|
||
|
do {
|
||
|
// If any byte lane shows that it returned a value of 1 - push ca wrlvl sdll out by 1 tap
|
||
|
cawrlvlcode++;
|
||
|
amp_set_cawrlvl_sdll(ch, cawrlvlcode, false);
|
||
|
|
||
|
for (byte = 0; byte < DQ_NUM_BYTES; byte++) {
|
||
|
// if this byte already showed a 0 during last run, push dqwrlvl sdll by 1 tap
|
||
|
// this is done to ensure this byte remains at 0 despite cawrlvl sdll being incremented above
|
||
|
if ((wrlvlrun & (1 << byte)) == 0) {
|
||
|
dqwrlvlcode[byte]++;
|
||
|
amp_set_dqwrlvl_sdll(ch, byte, dqwrlvlcode[byte], false);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Run Wrlvl calibration in hw
|
||
|
amp_run_wrlvlcal(ch, wrlvlrun);
|
||
|
|
||
|
// result in reported in AMPWRLVLDATA register
|
||
|
wrlvldata = rAMP_DQWRLVLDATA(ch);
|
||
|
|
||
|
// check if all bits for this byte returned a 0, then this byte is done
|
||
|
for (byte = 0; byte < DQ_NUM_BYTES; byte++) {
|
||
|
if (wrlvlrun & (1 << byte)) {
|
||
|
if ((wrlvldata & (0xFF << (byte * DQ_NUM_BITS_PER_BYTE))) == 0)
|
||
|
wrlvlrun &= ~(1 << byte);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Exit if ca or dq wrlvl sdlls reach max tap value
|
||
|
if (cawrlvlcode == MAX_CAWRLVL_CODE) {
|
||
|
max_tap_value_reached = true;
|
||
|
if (wrlvlrun)
|
||
|
panic("Memory Wrlvl calibration: CA sdll reached max tap value, yet all bytes not all 0s");
|
||
|
} else {
|
||
|
|
||
|
for (byte = 0; byte < DQ_NUM_BYTES; byte++) {
|
||
|
if (dqwrlvlcode[byte] == MAX_DQWRLVL_CODE) {
|
||
|
if (wrlvlrun)
|
||
|
panic("Memory Wrlvl calibration: DQ%d sdll reached max tap value, yet all bytes not all 0s", byte);
|
||
|
max_tap_value_reached = true;
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
} while (wrlvlrun && !max_tap_value_reached);
|
||
|
}
|
||
|
|
||
|
// Keep incrementing dqsdll until the byte shows 1s again. This counters the casdll that was incremented previously in order to show 0s
|
||
|
static void find_wrlvl_0to1_transition(uint32_t ch, uint32_t rnk)
|
||
|
{
|
||
|
uint32_t chrnk_indx, byte;
|
||
|
uint32_t wrlvlrun, wrlvldata;
|
||
|
bool max_tap_value_reached = false;
|
||
|
uint32_t dqwrlvlcode[DQ_NUM_BYTES];
|
||
|
uint32_t cawrlvlcode = rAMP_CAWRLVLSDLLCODE(ch);
|
||
|
|
||
|
wrlvlrun = 0xF;
|
||
|
wrlvldata = 0;
|
||
|
|
||
|
for (byte = 0; byte < DQ_NUM_BYTES; byte++)
|
||
|
dqwrlvlcode[byte] = rAMP_DQWRLVLSDLLCODE(ch, byte);
|
||
|
|
||
|
chrnk_indx = (ch * AMC_NUM_RANKS) + rnk;
|
||
|
|
||
|
do {
|
||
|
for (byte = 0; byte < DQ_NUM_BYTES; byte++) {
|
||
|
// if this byte is still showing a 0, increment the sdll
|
||
|
if (wrlvlrun & (1 << byte)) {
|
||
|
dqwrlvlcode[byte]++;
|
||
|
amp_set_dqwrlvl_sdll(ch, byte, dqwrlvlcode[byte], false);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// run the wrlvl calibration in hw
|
||
|
amp_run_wrlvlcal(ch, wrlvlrun);
|
||
|
|
||
|
wrlvldata = rAMP_DQWRLVLDATA(ch);
|
||
|
|
||
|
// check if all bits return a 1 for this byte, then this byte is done
|
||
|
for (byte = 0; byte < DQ_NUM_BYTES; byte++) {
|
||
|
if (wrlvlrun & (1 << byte)) {
|
||
|
if (((int) (wrlvldata & (0xFF << (byte * DQ_NUM_BITS_PER_BYTE)))) == (0xFF << (byte * DQ_NUM_BITS_PER_BYTE)))
|
||
|
wrlvlrun &= ~(1 << byte);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Exit if any of the byte lane's sdll reaches max
|
||
|
for (byte = 0; byte < DQ_NUM_BYTES; byte++) {
|
||
|
if (dqwrlvlcode[byte] == MAX_DQWRLVL_CODE) {
|
||
|
if (wrlvlrun)
|
||
|
panic("Memory Wrlvl calibration: DQ%d sdll reached max tap value, yet all bytes not all 1s", byte);
|
||
|
max_tap_value_reached = true;
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
} while (wrlvlrun && !max_tap_value_reached);
|
||
|
|
||
|
// save the per byte codes for this channel and rank
|
||
|
for (byte = 0; byte < DQ_NUM_BYTES; byte++)
|
||
|
wrlvl_cal_per_chrnk_rise[chrnk_indx][byte] = dqwrlvlcode[byte];
|
||
|
// in the "5th byte" entry, save the cawrlvl code
|
||
|
wrlvl_cal_per_chrnk_rise[chrnk_indx][byte] = cawrlvlcode;
|
||
|
}
|
||
|
|
||
|
// Go back towards the 0s region (that was found earlier). Note: not trying to find the next edge, just the previous edge that was found already
|
||
|
static void find_wrlvl_1to0_transition(uint32_t ch, uint32_t rnk)
|
||
|
{
|
||
|
uint32_t chrnk_indx, byte;
|
||
|
uint32_t wrlvlrun, wrlvldata;
|
||
|
bool max_tap_value_reached = false;
|
||
|
uint32_t dqwrlvlcode[DQ_NUM_BYTES];
|
||
|
uint32_t cawrlvlcode = rAMP_CAWRLVLSDLLCODE(ch);
|
||
|
bool incr_cawrlvl = false;
|
||
|
|
||
|
chrnk_indx = (ch * AMC_NUM_RANKS) + rnk;
|
||
|
wrlvlrun = 0xF;
|
||
|
|
||
|
// jump ahead by SOLID_PASS_DETECT into the 1s region
|
||
|
for (byte = 0; byte < DQ_NUM_BYTES; byte++) {
|
||
|
dqwrlvlcode[byte] = rAMP_DQWRLVLSDLLCODE(ch, byte);
|
||
|
dqwrlvlcode[byte] += (SOLID_PASS_DETECT + 1); // + 1 because code is decremented before programming the sdll
|
||
|
}
|
||
|
|
||
|
do {
|
||
|
// Make sure dqwrlvlsdll > 0, otherwise switch to cawrlvlsdll
|
||
|
for (byte = 0; (byte < DQ_NUM_BYTES) && !incr_cawrlvl; byte++) {
|
||
|
if (dqwrlvlcode[byte] == 0)
|
||
|
incr_cawrlvl = true;
|
||
|
}
|
||
|
|
||
|
// if we've reached 0 on any dqwrlvlsdll that were being decremented, switch to incrementing the cawrlvlsdll (same effect)
|
||
|
if (incr_cawrlvl) {
|
||
|
cawrlvlcode++;
|
||
|
amp_set_cawrlvl_sdll(ch, cawrlvlcode, false);
|
||
|
|
||
|
// In order to keep bytes that have transitioned to 0 to stay there, increment dqwrlvlsdll (counters effect of incrementing cawrlvlsdll)
|
||
|
for (byte = 0; byte < DQ_NUM_BYTES; byte++) {
|
||
|
if ((wrlvlrun & (1 << byte)) == 0) {
|
||
|
dqwrlvlcode[byte]++;
|
||
|
amp_set_dqwrlvl_sdll(ch, byte, dqwrlvlcode[byte], false);
|
||
|
}
|
||
|
}
|
||
|
} else {
|
||
|
// if run bit is set for this byte, push out the new sdll value after decrementing by 1
|
||
|
for (byte = 0; byte < DQ_NUM_BYTES; byte++) {
|
||
|
if (wrlvlrun & (1 << byte)) {
|
||
|
dqwrlvlcode[byte]--;
|
||
|
amp_set_dqwrlvl_sdll(ch, byte, dqwrlvlcode[byte], false);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// run the wrlvl calibration in hw
|
||
|
amp_run_wrlvlcal(ch, wrlvlrun);
|
||
|
|
||
|
wrlvldata = rAMP_DQWRLVLDATA(ch);
|
||
|
|
||
|
// check if all bits for this byte returned a 0, then this byte is done
|
||
|
for (byte = 0; byte < DQ_NUM_BYTES; byte++) {
|
||
|
if (wrlvlrun & (1 << byte)) {
|
||
|
if ((wrlvldata & (0xFF << (byte * DQ_NUM_BITS_PER_BYTE))) == 0)
|
||
|
wrlvlrun &= ~(1 << byte);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// check if we've reached max tap value
|
||
|
if (incr_cawrlvl && (cawrlvlcode == MAX_CAWRLVL_CODE)) {
|
||
|
max_tap_value_reached = true;
|
||
|
if (wrlvlrun)
|
||
|
panic("Memory Wrlvl calibration: max tap value reached, yet all bytes not back to 0s");
|
||
|
}
|
||
|
|
||
|
} while (wrlvlrun && !max_tap_value_reached);
|
||
|
|
||
|
// save the per byte codes for this channel and rank
|
||
|
for (byte = 0; byte < DQ_NUM_BYTES; byte++)
|
||
|
wrlvl_cal_per_chrnk_fall[chrnk_indx][byte] = dqwrlvlcode[byte];
|
||
|
|
||
|
// in the "5th byte" entry, save the cawrlvl code
|
||
|
wrlvl_cal_per_chrnk_fall[chrnk_indx][byte] = cawrlvlcode;
|
||
|
}
|
||
|
|
||
|
static void wrlvl_program_final_values(void)
|
||
|
{
|
||
|
uint32_t ch, chrnk0_indx, chrnk1_indx;
|
||
|
uint32_t rank_rise_val[AMP_MAX_RANKS_PER_CHAN], rank_fall_val[AMP_MAX_RANKS_PER_CHAN];
|
||
|
uint32_t edge_pos[AMP_MAX_RANKS_PER_CHAN];
|
||
|
uint32_t common_edge_pos, min_edge_pos;
|
||
|
uint32_t byte;
|
||
|
uint32_t saved_val[DQ_NUM_BYTES + 1];
|
||
|
|
||
|
for (ch = 0; ch < AMC_NUM_CHANNELS; ch++) {
|
||
|
// we go upto DQ_NUM_BYTES + 1 to also take into account the cawrlvlcode that is stored in the 5th element
|
||
|
for (byte = 0; byte < (DQ_NUM_BYTES + 1); byte++) {
|
||
|
|
||
|
// Rank 0
|
||
|
chrnk0_indx = (ch * AMC_NUM_RANKS) + 0;
|
||
|
rank_rise_val[0] = wrlvl_cal_per_chrnk_rise[chrnk0_indx][byte];
|
||
|
rank_fall_val[0] = wrlvl_cal_per_chrnk_fall[chrnk0_indx][byte];
|
||
|
// average of 2 values is the edge for this rank
|
||
|
edge_pos[0] = (rank_rise_val[0] + rank_fall_val[0]) >> 1;
|
||
|
common_edge_pos = edge_pos[0];
|
||
|
|
||
|
// Adjust for Dual ranks
|
||
|
if (AMC_NUM_RANKS > 1) {
|
||
|
chrnk1_indx = (ch * AMC_NUM_RANKS) + 1;
|
||
|
rank_rise_val[1] = wrlvl_cal_per_chrnk_rise[chrnk1_indx][byte];
|
||
|
rank_fall_val[1] = wrlvl_cal_per_chrnk_fall[chrnk1_indx][byte];
|
||
|
edge_pos[1] = (rank_rise_val[1] + rank_fall_val[1]) >> 1;
|
||
|
|
||
|
// common_edge_pos between both ranks is simply their average
|
||
|
common_edge_pos = (edge_pos[0] + edge_pos[1]) >> 1;
|
||
|
}
|
||
|
|
||
|
// save the wrlvlsdll for each byte (and the ca)
|
||
|
saved_val[byte] = common_edge_pos;
|
||
|
}
|
||
|
|
||
|
// Find the min among all bytes (and the ca)
|
||
|
min_edge_pos = saved_val[DQ_NUM_BYTES]; // initialize min as the cawrlvlsdll
|
||
|
for (byte = 0; byte < DQ_NUM_BYTES; byte++)
|
||
|
if (saved_val[byte] < min_edge_pos)
|
||
|
min_edge_pos = saved_val[byte];
|
||
|
|
||
|
// We'll subtract the min from all 5 sdlls, including ca
|
||
|
// so the byte sdlls which are in opposite direction also need to be asjusted
|
||
|
for (byte = 0; byte < (DQ_NUM_BYTES + 1); byte++) {
|
||
|
saved_val[byte] -= min_edge_pos;
|
||
|
|
||
|
// Program the values into the registers
|
||
|
if (byte == DQ_NUM_BYTES) {
|
||
|
// cawrlvl (use dlysel, which will require phyupdt and forceckelow)
|
||
|
amp_set_cawrlvl_sdll(ch, saved_val[byte], true);
|
||
|
} else {
|
||
|
// dqwrlvl (use dlysel, which will require phyupdt and forceckelow)
|
||
|
amp_set_dqwrlvl_sdll(ch, byte, saved_val[byte], true);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
// Keep pushing out WRDQS lines (controlled by WRDQM registers, oddly) until right side failing point is found
|
||
|
static void find_wrdqcal_right_side_failing_point(uint32_t ch, uint32_t rnk)
|
||
|
{
|
||
|
uint32_t push_dqs_out;
|
||
|
uint32_t all_bits_fail;
|
||
|
uint32_t bits_fail_b[DQ_NUM_BYTES] = { 0 };
|
||
|
uint32_t wrdqcalresult_cumulative;
|
||
|
uint32_t mask_b[DQ_NUM_BYTES];
|
||
|
int32_t start_b[DQ_NUM_BYTES];
|
||
|
uint32_t byte;
|
||
|
uint32_t cawrlvlcode = rAMP_CAWRLVLSDLLCODE(ch);
|
||
|
|
||
|
all_bits_fail = 0;
|
||
|
push_dqs_out = 0;
|
||
|
wrdqcalresult_cumulative = 0xFFFFFFFF;
|
||
|
|
||
|
// set the wrdq sdll to negative dqmdllcode
|
||
|
mdllcode[ch][AMP_DQ] = (rAMP_MDLLCODE(AMP_DQ, ch) & DLLVAL_BITS);
|
||
|
for (byte = 0; byte < DQ_NUM_BYTES; byte++) {
|
||
|
amp_set_wrdq_sdll(ch, byte, (-1 * mdllcode[ch][AMP_DQ]));
|
||
|
|
||
|
// initialize the mask for each byte lane
|
||
|
mask_b[byte] = 0xFF << (byte * 8);
|
||
|
}
|
||
|
|
||
|
do {
|
||
|
// NOTE: When DQS are pushed out then - cawrlvl sdll needs to be pushed out as well with equal taps
|
||
|
// can use dlysel (with phyupdt and forceckelow)
|
||
|
amp_set_cawrlvl_sdll(ch, cawrlvlcode + push_dqs_out, true);
|
||
|
|
||
|
// Keep pushing per bit DQS (controlled by DM regs, oddly) out until all bytes start to fail
|
||
|
for (byte = 0; byte < DQ_NUM_BYTES; byte++)
|
||
|
rAMP_WRDMDESKEW_CTRL(ch, byte) = push_dqs_out;
|
||
|
|
||
|
// Perform the WrDq calibration with PRBS patterns
|
||
|
wrdqcalresult_cumulative &= wr_rd_pattern_result(ch, rnk, push_dqs_out);
|
||
|
|
||
|
all_bits_fail = 1;
|
||
|
for (byte = 0; byte < DQ_NUM_BYTES; byte++) {
|
||
|
if (bits_fail_b[byte] == 0) {
|
||
|
// if all bits fail for this byte for the 1st time, we've found the right failing point
|
||
|
if ((wrdqcalresult_cumulative & mask_b[byte]) == 0) {
|
||
|
bits_fail_b[byte] = 1;
|
||
|
start_b[byte] = push_dqs_out;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
all_bits_fail &= bits_fail_b[byte];
|
||
|
}
|
||
|
|
||
|
// if all bits in all bytes fail, find the right passing point
|
||
|
if (all_bits_fail == 1) {
|
||
|
find_wrdqcal_right_side_passing_point(ch, rnk, start_b);
|
||
|
} else {
|
||
|
// increase the deskew since all bits are not yet failing
|
||
|
push_dqs_out += COARSE_STEP_SZ;
|
||
|
}
|
||
|
|
||
|
} while ((push_dqs_out <= DQ_MAX_DESKEW_PER_BIT) && (all_bits_fail == 0));
|
||
|
|
||
|
if ((push_dqs_out > DQ_MAX_DESKEW_PER_BIT) && (all_bits_fail == 0)) {
|
||
|
// Right Failing Point cannot be found
|
||
|
dprintf(DEBUG_INFO, "Memory Wrdq calibration: Max deskew reached, but right failing point not found for ch %d", ch);
|
||
|
|
||
|
// Assume failure point is current reg setting
|
||
|
for (byte = 0; byte < DQ_NUM_BYTES; byte++) {
|
||
|
if (bits_fail_b[byte] == 0)
|
||
|
start_b[byte] = push_dqs_out - COARSE_STEP_SZ;
|
||
|
}
|
||
|
|
||
|
// conitnue to passing point
|
||
|
find_wrdqcal_right_side_passing_point(ch, rnk, start_b);
|
||
|
}
|
||
|
|
||
|
// Before quitting restore the cawrlvlsdll and per byte deskew back to original values.
|
||
|
// can use dlysel (with phyupdt and forceckelow)
|
||
|
amp_set_cawrlvl_sdll(ch, cawrlvlcode, true);
|
||
|
for (byte = 0; byte < DQ_NUM_BYTES; byte++)
|
||
|
rAMP_WRDMDESKEW_CTRL(ch, byte) = 0;
|
||
|
}
|
||
|
|
||
|
// Keep decreasing per byte deskew until right passing point is found
|
||
|
static void find_wrdqcal_right_side_passing_point(uint32_t ch, uint32_t rnk, int32_t *start_b)
|
||
|
{
|
||
|
uint32_t chrnk_indx;
|
||
|
bool switch_from_dqstodq, max_tap_value_reached;
|
||
|
int32_t tap_value_b[DQ_NUM_BYTES];
|
||
|
uint32_t BitPass[DQ_TOTAL_BITS] = { 0 };
|
||
|
uint32_t SolidBitPass[DQ_TOTAL_BITS] = { 0 };
|
||
|
uint32_t wrdqcalresult;
|
||
|
uint32_t all_bits_pass;
|
||
|
uint32_t all_bits_pass_b[DQ_NUM_BYTES] = { 0 };
|
||
|
uint32_t step_incr;
|
||
|
uint32_t bit_indx, byte;
|
||
|
int32_t dqmdllcode, max_tap_value;
|
||
|
int32_t saved_val, max_value;
|
||
|
uint32_t cawrlvlcode = rAMP_CAWRLVLSDLLCODE(ch);
|
||
|
|
||
|
chrnk_indx = (ch * AMC_NUM_RANKS) + rnk;
|
||
|
all_bits_pass = 0;
|
||
|
switch_from_dqstodq = false;
|
||
|
max_tap_value_reached = false;
|
||
|
step_incr = FINER_STEP_SZ;
|
||
|
dqmdllcode = mdllcode[ch][AMP_DQ];
|
||
|
max_tap_value = dqmdllcode - DELIMIT_POS_ADJ_WRDQSDLL + DQ_MAX_DESKEW_PER_BIT;
|
||
|
|
||
|
// initialize tap_values to max of all bytes' start values
|
||
|
max_value = start_b[0];
|
||
|
for (byte = 1; byte < DQ_NUM_BYTES; byte++)
|
||
|
max_value = (start_b[byte] > max_value) ? start_b[byte] : max_value;
|
||
|
|
||
|
for (byte = 0; byte < DQ_NUM_BYTES; byte++) {
|
||
|
tap_value_b[byte] = max_value;
|
||
|
}
|
||
|
|
||
|
// Any change in DM_DESKEW registers will require an equal change to cawrlvl sdll
|
||
|
do {
|
||
|
if (switch_from_dqstodq == false) {
|
||
|
// cawrlvlcode is decremented with tap_value_b each time
|
||
|
// can use dlysel (with phyupdt and forceckelow)
|
||
|
amp_set_cawrlvl_sdll(ch, cawrlvlcode, true);
|
||
|
|
||
|
// Keep pushing per bit DQS out until all bytes start to fail
|
||
|
for (byte = 0; byte < DQ_NUM_BYTES; byte++)
|
||
|
rAMP_WRDMDESKEW_CTRL(ch, byte) = tap_value_b[byte];
|
||
|
} else {
|
||
|
// adjust wrdq sdll until all bits pass for each byte lane
|
||
|
for (byte = 0; byte < DQ_NUM_BYTES; byte++) {
|
||
|
if (all_bits_pass_b[byte] == 0)
|
||
|
amp_set_wrdq_sdll(ch, byte, tap_value_b[byte]);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Send the PRBS patterns and read them back to see which bits are passing or failing
|
||
|
wrdqcalresult = wr_rd_pattern_result(ch, rnk, tap_value_b[0] + tap_value_b[1] + tap_value_b[2] + tap_value_b[3]);
|
||
|
|
||
|
// Make sure that each Bit sees a transition from 0 (fail) to 1 (pass) on wrdqcalresult
|
||
|
for (bit_indx = 0; bit_indx < DQ_TOTAL_BITS; bit_indx++) {
|
||
|
|
||
|
byte = bit_indx >> 3; // bit_indx / DQ_NUM_BITS_PER_BYTE
|
||
|
|
||
|
// Check if this bit passed during the calibration (not necessarily for first time)
|
||
|
if ((BitPass[bit_indx] == 0) && ((wrdqcalresult & (1 << bit_indx)) != 0)) {
|
||
|
// Has this bit passed SOLID_PASS_DETECT number of times? Then consider it done
|
||
|
if (SolidBitPass[bit_indx] == SOLID_PASS_DETECT) {
|
||
|
BitPass[bit_indx] = 1;
|
||
|
} else if (SolidBitPass[bit_indx] > 0) {
|
||
|
SolidBitPass[bit_indx] = SolidBitPass[bit_indx] + 1;
|
||
|
} else {
|
||
|
// bit passed for the first time, record this value in the global array as the right edge
|
||
|
SolidBitPass[bit_indx] = SolidBitPass[bit_indx] + 1;
|
||
|
|
||
|
byte = bit_indx >> 3; // bit_indx / DQ_NUM_BITS_PER_BYTE
|
||
|
|
||
|
if (switch_from_dqstodq == false)
|
||
|
// consider mdllcode as '0' since sdll is set to -mdllcode
|
||
|
saved_val = -1 * (tap_value_b[byte] + dqmdllcode);
|
||
|
else
|
||
|
saved_val = tap_value_b[byte];
|
||
|
|
||
|
wrdq_cal_per_chrnk_right[chrnk_indx][bit_indx] = saved_val;
|
||
|
}
|
||
|
} else {
|
||
|
// bit failed calibration, reset the pass count to 0
|
||
|
SolidBitPass[bit_indx] = 0;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
all_bits_pass = 1;
|
||
|
for (byte = 0; byte < DQ_NUM_BYTES; byte++)
|
||
|
all_bits_pass_b[byte] = 1;
|
||
|
|
||
|
for (bit_indx = 0; bit_indx < DQ_TOTAL_BITS; bit_indx++) {
|
||
|
byte = bit_indx >> 3; // bit_indx / DQ_NUM_BITS_PER_BYTE
|
||
|
|
||
|
// Did all the bits pass (SOLID_PASS_DETECT number of times) in this byte lane?
|
||
|
// If anyone of the bits failed, then the byte flag is cleared
|
||
|
all_bits_pass_b[byte] = all_bits_pass_b[byte] & BitPass[bit_indx];
|
||
|
|
||
|
// Did all bits in all byte lanes pass?
|
||
|
all_bits_pass = all_bits_pass & BitPass[bit_indx];
|
||
|
}
|
||
|
|
||
|
// If ALL bits are not passing - keep moving from Right to Left Side of window (by adding less negative adjustment to mdll)
|
||
|
if (all_bits_pass == 0) {
|
||
|
// Even if one of the byte lanes arrives early to tap_value = 0. Remain here until all byte lane catch up before proceeding to pushing out dq
|
||
|
|
||
|
// check for all bytes reaching 0 on the tap value (could be deskew or sdll)
|
||
|
uint32_t all_bytes_tap = tap_value_b[0];
|
||
|
for (byte = 1; (byte < DQ_NUM_BYTES) && (all_bytes_tap == 0); byte++) {
|
||
|
all_bytes_tap += tap_value_b[byte];
|
||
|
}
|
||
|
|
||
|
// if the tap_value for all bytes has reached 0 on the deskew, make the transition to SDLL
|
||
|
if ((all_bytes_tap == 0) && (switch_from_dqstodq == false)) {
|
||
|
switch_from_dqstodq = true;
|
||
|
|
||
|
for (byte = 0; byte < DQ_NUM_BYTES; byte++) {
|
||
|
tap_value_b[byte] = (rAMP_DQSDLLCTRL_WR(ch, byte) & DLLVAL_BITS);
|
||
|
tap_value_b[byte] = OFFSET_TO_INT(tap_value_b[byte]);
|
||
|
}
|
||
|
|
||
|
}
|
||
|
|
||
|
// To find right side passing point, add less negative adjustment to mdll (same as decrementing deskew)
|
||
|
|
||
|
// For deskew taps, we just decrement by step_incr if we haven't reached 0 yet
|
||
|
// Note: All deskew taps will reach 0 at the same time since their start values are equal, and they are decremented regardless of pass or fail
|
||
|
if (switch_from_dqstodq == false) {
|
||
|
|
||
|
// Also decrement cawrlvlsdllcode along with tap_value_b
|
||
|
if (tap_value_b[0] > 0)
|
||
|
cawrlvlcode -= step_incr;
|
||
|
|
||
|
for (byte = 0; byte < DQ_NUM_BYTES; byte++)
|
||
|
if (tap_value_b[byte] > 0)
|
||
|
tap_value_b[byte] -= step_incr;
|
||
|
} else {
|
||
|
// For sdll taps, increment
|
||
|
for (byte = 0; byte < DQ_NUM_BYTES; byte++) {
|
||
|
if (all_bits_pass_b[byte] == 0) {
|
||
|
tap_value_b[byte] += step_incr;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// trigger for loop to end if any of the bytes reach max tap value
|
||
|
for (byte = 0; byte < DQ_NUM_BYTES; byte++) {
|
||
|
if (!max_tap_value_reached)
|
||
|
max_tap_value_reached = (tap_value_b[byte] > max_tap_value);
|
||
|
|
||
|
if (max_tap_value_reached) {
|
||
|
if (all_bits_pass == 0)
|
||
|
panic("Memory wrdq calibration: Unable to find right side passing point for channel %d, max tap value reached. start_b[] = {0x%x 0x%x 0x%x 0x%x}", ch, start_b[0], start_b[1], start_b[2], start_b[3]);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
} while ((!max_tap_value_reached) && (all_bits_pass == 0));
|
||
|
}
|
||
|
|
||
|
// To find left failing point, keep adding less negative adjustment to mdll
|
||
|
static void find_wrdqcal_left_side_failing_point(uint32_t ch, uint32_t rnk)
|
||
|
{
|
||
|
int32_t wrdqsdll[DQ_NUM_BYTES];
|
||
|
uint32_t wrdqcalresult;
|
||
|
uint32_t all_bits_fail;
|
||
|
uint32_t all_bits_fail_b[DQ_NUM_BYTES] = { 0 };
|
||
|
uint32_t step_incr;
|
||
|
uint32_t mask_b[DQ_NUM_BYTES];
|
||
|
int32_t start_b[DQ_NUM_BYTES];
|
||
|
uint32_t byte;
|
||
|
bool max_tap_value_reached = false;
|
||
|
int32_t dqmdllcode, max_tap_value;
|
||
|
|
||
|
dqmdllcode = mdllcode[ch][AMP_DQ];
|
||
|
max_tap_value = dqmdllcode - DELIMIT_POS_ADJ_WRDQSDLL + DQ_MAX_DESKEW_PER_BIT;
|
||
|
|
||
|
all_bits_fail = 0;
|
||
|
wrdqcalresult = 0xFFFFFFFF;
|
||
|
step_incr = COARSE_STEP_SZ;
|
||
|
|
||
|
for (byte = 0; byte < DQ_NUM_BYTES; byte++) {
|
||
|
// initialize the mask for each byte lane
|
||
|
mask_b[byte] = 0xFF << (byte * 8);
|
||
|
|
||
|
// Get the starting values for WR DQS SDLL
|
||
|
wrdqsdll[byte] = rAMP_DQSDLLCTRL_WR(ch, byte) & DLLVAL_BITS;
|
||
|
wrdqsdll[byte] = OFFSET_TO_INT(wrdqsdll[byte]);
|
||
|
|
||
|
|
||
|
// Add per-bit deskew to wrdqsdll[byte] if sdll reached mdll - DELIMIT_POS_ADJ_WRDQSDLL (otherwise, deskew should be 0)
|
||
|
// At this point per-bit deskew should be the same for each bit in this byte. Use bit 0's deskew value
|
||
|
wrdqsdll[byte] += rAMP_WRDQDESKEW_CTRL(ch, byte, 0);
|
||
|
}
|
||
|
|
||
|
// Start with sdll value for which right passing point was found, then increase (less negative) until all bits fail
|
||
|
do {
|
||
|
|
||
|
for (byte = 0; byte < DQ_NUM_BYTES; byte++) {
|
||
|
// set the new sdll for this byte lane if all bits are not yet failing
|
||
|
if (all_bits_fail_b[byte] == 0)
|
||
|
amp_set_wrdq_sdll(ch, byte, wrdqsdll[byte]);
|
||
|
}
|
||
|
|
||
|
// Send the PRBS patterns and read them back to see which bits are passing or failing
|
||
|
wrdqcalresult &= wr_rd_pattern_result(ch, rnk, wrdqsdll[0] + wrdqsdll[1] + wrdqsdll[2] + wrdqsdll[3]);
|
||
|
|
||
|
// If the result of all bits in this byte show a fail, record this as the failing point
|
||
|
all_bits_fail = 1;
|
||
|
for (byte = 0; byte < DQ_NUM_BYTES; byte++) {
|
||
|
if ((all_bits_fail_b[byte] == 0) && ((wrdqcalresult & mask_b[byte]) == 0)) {
|
||
|
all_bits_fail_b[byte] = 1;
|
||
|
start_b[byte] = wrdqsdll[byte];
|
||
|
}
|
||
|
|
||
|
all_bits_fail &= all_bits_fail_b[byte];
|
||
|
}
|
||
|
|
||
|
if (all_bits_fail == 1) {
|
||
|
find_wrdqcal_left_side_passing_point (ch, rnk, start_b);
|
||
|
} else {
|
||
|
// if the byte has not yet failed, find the next sdll value to be set
|
||
|
for (byte = 0; byte < DQ_NUM_BYTES; byte++) {
|
||
|
if (all_bits_fail_b[byte] == 0) {
|
||
|
wrdqsdll[byte] += step_incr;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
for (byte = 0; byte < DQ_NUM_BYTES; byte++) {
|
||
|
// none of the previous bytes reached max_tap_value, then update the boolean
|
||
|
if (!max_tap_value_reached) {
|
||
|
max_tap_value_reached = (wrdqsdll[byte] > max_tap_value);
|
||
|
|
||
|
if (max_tap_value_reached) {
|
||
|
dprintf(DEBUG_INFO, "Memory wrdq calibration: Unable to find left failing point, max tap value reached for ch %d byte %d", ch, byte);
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (max_tap_value_reached) {
|
||
|
// Continue to passing point if any of the bytes reaches max value and not all bits are failing
|
||
|
if (all_bits_fail == 0) {
|
||
|
for (byte = 0; byte < DQ_NUM_BYTES; byte++) {
|
||
|
if (all_bits_fail_b[byte] == 0)
|
||
|
start_b[byte] = max_tap_value;
|
||
|
}
|
||
|
|
||
|
find_wrdqcal_left_side_passing_point(ch, rnk, start_b);
|
||
|
}
|
||
|
}
|
||
|
} while ((!max_tap_value_reached) && (all_bits_fail == 0));
|
||
|
}
|
||
|
|
||
|
static void find_wrdqcal_left_side_passing_point(uint32_t ch, uint32_t rnk, int32_t *start_b)
|
||
|
{
|
||
|
uint32_t chrnk_indx;
|
||
|
bool max_tap_value_reached = false;
|
||
|
int32_t tap_value_b[DQ_NUM_BYTES];
|
||
|
uint32_t BitPass[DQ_TOTAL_BITS] = { 0 };
|
||
|
uint32_t SolidBitPass[DQ_TOTAL_BITS] = { 0 };
|
||
|
uint32_t wrdqcalresult;
|
||
|
uint32_t all_bits_pass;
|
||
|
uint32_t all_bits_pass_b[DQ_NUM_BYTES] = { 0 };
|
||
|
uint32_t step_incr;
|
||
|
uint32_t bit_indx, byte;
|
||
|
int32_t dqmdllcode, max_tap_value;
|
||
|
|
||
|
dqmdllcode = mdllcode[ch][AMP_DQ];
|
||
|
max_tap_value = -1 * dqmdllcode;
|
||
|
|
||
|
chrnk_indx = (ch * AMC_NUM_RANKS) + rnk;
|
||
|
all_bits_pass = 0;
|
||
|
wrdqcalresult = 0xFFFFFFFF;
|
||
|
step_incr = FINER_STEP_SZ;
|
||
|
|
||
|
for (byte = 0; byte < DQ_NUM_BYTES; byte++) {
|
||
|
tap_value_b[byte] = start_b[byte];
|
||
|
}
|
||
|
|
||
|
// Finding Left side failing point on per bit level. Moving Left to Right (keep adding more negative adj to mdll) to find point where it turns from FAIL TO PASS
|
||
|
do {
|
||
|
// adjust wrdq sdll until all bits pass for each byte lane
|
||
|
for (byte = 0; byte < DQ_NUM_BYTES; byte++) {
|
||
|
if (all_bits_pass_b[byte] == 0)
|
||
|
amp_set_wrdq_sdll(ch, byte, tap_value_b[byte]);
|
||
|
}
|
||
|
|
||
|
// Send the PRBS patterns and read them back to see which bits are passing or failing
|
||
|
wrdqcalresult = wr_rd_pattern_result(ch, rnk, tap_value_b[0] + tap_value_b[1] + tap_value_b[2] + tap_value_b[3]);
|
||
|
|
||
|
// Make sure that each Bit sees a transition from 0 (fail) to 1 (pass) on wrdqcalresult
|
||
|
for (bit_indx = 0; bit_indx < DQ_TOTAL_BITS; bit_indx++) {
|
||
|
// Check if this bit passed during the calibration (not necessarily for first time)
|
||
|
if ((BitPass[bit_indx] == 0) && ((wrdqcalresult & (1 << bit_indx)) != 0)) {
|
||
|
// Has this bit passed SOLID_PASS_DETECT number of times? Then consider it done
|
||
|
if (SolidBitPass[bit_indx] == SOLID_PASS_DETECT) {
|
||
|
BitPass[bit_indx] = 1;
|
||
|
} else if (SolidBitPass[bit_indx] > 0) {
|
||
|
SolidBitPass[bit_indx] = SolidBitPass[bit_indx] + 1;
|
||
|
} else {
|
||
|
// bit passed for the first time, record this value in the global array as the right edge
|
||
|
SolidBitPass[bit_indx] = SolidBitPass[bit_indx] + 1;
|
||
|
|
||
|
byte = bit_indx >> 3; // bit_indx / DQ_NUM_BITS_PER_BYTE
|
||
|
|
||
|
wrdq_cal_per_chrnk_left[chrnk_indx][bit_indx] = tap_value_b[byte];
|
||
|
}
|
||
|
} else {
|
||
|
// bit failed calibration, reset the pass count to 0
|
||
|
SolidBitPass[bit_indx] = 0;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
all_bits_pass = 1;
|
||
|
for (byte = 0; byte < DQ_NUM_BYTES; byte++)
|
||
|
all_bits_pass_b[byte] = 1;
|
||
|
|
||
|
for (bit_indx = 0; bit_indx < DQ_TOTAL_BITS; bit_indx++) {
|
||
|
byte = bit_indx >> 3; // bit_indx / DQ_NUM_BITS_PER_BYTE
|
||
|
|
||
|
// Did all the bits pass (SOLID_PASS_DETECT number of times) in this byte lane?
|
||
|
// If anyone of the bits failed, then the byte flag is cleared
|
||
|
all_bits_pass_b[byte] = all_bits_pass_b[byte] & BitPass[bit_indx];
|
||
|
|
||
|
// Did all bits in all byte lanes pass?
|
||
|
all_bits_pass = all_bits_pass & BitPass[bit_indx];
|
||
|
}
|
||
|
|
||
|
// If ALL bits are not passing - keep moving from Left to Right Side of window (by adding more negative adjustment to mdll)
|
||
|
if (all_bits_pass == 0) {
|
||
|
// For sdll taps, increment for neg tap_val, decrement for positive
|
||
|
for (byte = 0; byte < DQ_NUM_BYTES; byte++) {
|
||
|
if (all_bits_pass_b[byte] == 0) {
|
||
|
tap_value_b[byte] -= step_incr;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// check for end of loop condition
|
||
|
for (byte = 0; byte < DQ_NUM_BYTES; byte++) {
|
||
|
if (!max_tap_value_reached)
|
||
|
max_tap_value_reached = (tap_value_b[byte] < max_tap_value);
|
||
|
|
||
|
if (max_tap_value_reached) {
|
||
|
if (all_bits_pass_b[byte] == 0)
|
||
|
panic("Memory wrdq calibration: Unable to find left passing point, max tap value reached. start_b[] = {0x%x, 0x%x, 0x%x, 0x%x}", start_b[0], start_b[1], start_b[2], start_b[3]);
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
} while ((!max_tap_value_reached) && (all_bits_pass == 0));
|
||
|
}
|
||
|
|
||
|
static void wrdq_program_final_values(void)
|
||
|
{
|
||
|
uint32_t ch, bit_indx, byte;
|
||
|
uint32_t chrnk0_indx, chrnk1_indx;
|
||
|
int32_t dqmdllcode;
|
||
|
int32_t wrdq_bit_center[DQ_TOTAL_BITS];
|
||
|
int32_t wrdq_bit_deskew[DQ_TOTAL_BITS];
|
||
|
int32_t left_pos_val;
|
||
|
int32_t right_pos_val;
|
||
|
int32_t min_wrdq_center[DQ_NUM_BYTES];
|
||
|
int32_t min_dq_deskew_code, max_dq_deskew_code;
|
||
|
|
||
|
for (ch = 0; ch < AMC_NUM_CHANNELS; ch++) {
|
||
|
dqmdllcode = mdllcode[ch][AMP_DQ];
|
||
|
|
||
|
// find the center point of passing window for each bit over all ranks
|
||
|
for (bit_indx = 0; bit_indx < DQ_TOTAL_BITS; bit_indx++) {
|
||
|
|
||
|
chrnk0_indx = (ch * AMC_NUM_RANKS) + 0;
|
||
|
left_pos_val = wrdq_cal_per_chrnk_left[chrnk0_indx][bit_indx];
|
||
|
right_pos_val = wrdq_cal_per_chrnk_right[chrnk0_indx][bit_indx];
|
||
|
|
||
|
if (AMC_NUM_RANKS > 1) {
|
||
|
|
||
|
chrnk1_indx = (ch * AMC_NUM_RANKS) + 1;
|
||
|
|
||
|
// find the endpoint that covers both ranks
|
||
|
left_pos_val = find_common_endpoint(wrdq_cal_per_chrnk_left[chrnk0_indx][bit_indx],
|
||
|
wrdq_cal_per_chrnk_left[chrnk1_indx][bit_indx],
|
||
|
MIN_ENDPT);
|
||
|
right_pos_val = find_common_endpoint(wrdq_cal_per_chrnk_right[chrnk0_indx][bit_indx],
|
||
|
wrdq_cal_per_chrnk_right[chrnk1_indx][bit_indx],
|
||
|
MAX_ENDPT);
|
||
|
}
|
||
|
|
||
|
// find center of the eye for this bit
|
||
|
wrdq_bit_center[bit_indx] = find_center_of_eye(left_pos_val, right_pos_val);
|
||
|
}
|
||
|
|
||
|
|
||
|
// <rdar://problem/13439594>, <rdar://problem/13888162> Need additional shift to DQ offset
|
||
|
int8_t signed_byte_center_point[DQ_TOTAL_BITS];
|
||
|
|
||
|
// convert to signed bytes first as required by the shift function
|
||
|
for (bit_indx = 0; bit_indx < DQ_TOTAL_BITS; bit_indx++)
|
||
|
signed_byte_center_point[bit_indx] = (int8_t) wrdq_bit_center[bit_indx];
|
||
|
|
||
|
// call platform specific amc routine to apply apropriate shifts depending on DRAM vendor
|
||
|
amc_dram_shift_dq_offset(signed_byte_center_point, DQ_TOTAL_BITS);
|
||
|
|
||
|
// convert shifted signed bytes back to offset format
|
||
|
for (bit_indx = 0; bit_indx < DQ_TOTAL_BITS; bit_indx++)
|
||
|
wrdq_bit_center[bit_indx] = (int32_t) signed_byte_center_point[bit_indx];
|
||
|
|
||
|
|
||
|
// initialize the min centerpoint to the 1st bit's center point in each byte lane
|
||
|
for (byte = 0; byte < DQ_NUM_BYTES; byte++)
|
||
|
min_wrdq_center[byte] = wrdq_bit_center[byte * DQ_NUM_BITS_PER_BYTE];
|
||
|
|
||
|
// Find the min CenterPoint per byte lane given each bit's center point
|
||
|
for (bit_indx=0; bit_indx < DQ_TOTAL_BITS; bit_indx++) {
|
||
|
|
||
|
byte = bit_indx >> 3; // bit_indx / DQ_NUM_BITS_PER_BYTE
|
||
|
|
||
|
// if this bit's center point is less than current min, make it the new min
|
||
|
// if this bit's center point is less than current min, make it the new min
|
||
|
if (wrdq_bit_center[bit_indx] < min_wrdq_center[byte])
|
||
|
min_wrdq_center[byte] = wrdq_bit_center[bit_indx];
|
||
|
}
|
||
|
|
||
|
// for positive value, clamp it to mdllcode - DELIMIT_POS_ADJ_WRDQSDLL
|
||
|
for (byte = 0; byte < DQ_NUM_BYTES; byte++) {
|
||
|
if (min_wrdq_center[byte] > (dqmdllcode - DELIMIT_POS_ADJ_WRDQSDLL))
|
||
|
min_wrdq_center[byte] = (dqmdllcode - DELIMIT_POS_ADJ_WRDQSDLL);
|
||
|
}
|
||
|
|
||
|
// Compute the individual deskew values: any bits with center point > min for its byte lane will require deskew
|
||
|
// Each bit's center is guaranteed to be >= min for its byte lane
|
||
|
// Deskewing means adding more positive adjustment for this bit in addition to the sdll, which is clamped on the negative side to -dqmdllcode
|
||
|
// and clamped on the positive side to (mdllcode - DELIMIT_POS_ADJ_WRDQSDLL)
|
||
|
for (bit_indx = 0; bit_indx < DQ_TOTAL_BITS; bit_indx++) {
|
||
|
|
||
|
byte = bit_indx >> 3; // bit_indx / DQ_NUM_BITS_PER_BYTE
|
||
|
|
||
|
if (wrdq_bit_center[bit_indx] < min_wrdq_center[byte])
|
||
|
panic("Memory Wrdq Calibration: wrdq_bit_center[%d] = (%d) < min_wrdq_center[%d] = %d\n", bit_indx, wrdq_bit_center[bit_indx], byte, min_wrdq_center[byte]);
|
||
|
|
||
|
wrdq_bit_deskew[bit_indx] = wrdq_bit_center[bit_indx] - min_wrdq_center[byte];
|
||
|
|
||
|
// Make sure deskew value programmed is not negative and is <= DQ_MAX_DESKEW_PER_BIT
|
||
|
if ((wrdq_bit_deskew[bit_indx] < 0) || (wrdq_bit_deskew[bit_indx] > DQ_MAX_DESKEW_PER_BIT))
|
||
|
panic("Memory Wrdq Calibration: wrdq_bit_deskew[%d] = %d invalid\n", bit_indx, wrdq_bit_deskew[bit_indx]);
|
||
|
}
|
||
|
|
||
|
// if the min for each byte lane is < -dqmdllcode, we'll need to adjust/clamp it to -dqmdllcode
|
||
|
for (byte = 0; byte < DQ_NUM_BYTES; byte++) {
|
||
|
if (min_wrdq_center[byte] < (-1 * dqmdllcode)) {
|
||
|
int32_t dqs_deskew = (-1 * dqmdllcode) - min_wrdq_center[byte];
|
||
|
// <rdar://problem/14116888> put the remainder on DQS
|
||
|
rAMP_WRDMDESKEW_CTRL(ch, byte) = dqs_deskew;
|
||
|
min_wrdq_center[byte] = (-1 * dqmdllcode);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Program the SDLL and deskew per bit for each byte lane
|
||
|
for (byte = 0; byte < DQ_NUM_BYTES; byte++) {
|
||
|
amp_set_wrdq_sdll(ch, byte, min_wrdq_center[byte]);
|
||
|
|
||
|
// init the min and max deskew values for each byte to the 1st bit in the byte
|
||
|
min_dq_deskew_code = wrdq_bit_deskew[(byte * DQ_NUM_BITS_PER_BYTE)];
|
||
|
max_dq_deskew_code = wrdq_bit_deskew[(byte * DQ_NUM_BITS_PER_BYTE)];
|
||
|
// per bit deskew for this byte lane
|
||
|
for (bit_indx = 0; bit_indx < DQ_NUM_BITS_PER_BYTE; bit_indx++) {
|
||
|
rAMP_WRDQDESKEW_CTRL(ch, byte, bit_indx) = wrdq_bit_deskew[(byte * DQ_NUM_BITS_PER_BYTE) + bit_indx];
|
||
|
|
||
|
// is this bit the new min or max?
|
||
|
if (wrdq_bit_deskew[(byte * DQ_NUM_BITS_PER_BYTE) + bit_indx] < min_dq_deskew_code)
|
||
|
min_dq_deskew_code = wrdq_bit_deskew[(byte * DQ_NUM_BITS_PER_BYTE) + bit_indx];
|
||
|
else if (wrdq_bit_deskew[(byte * DQ_NUM_BITS_PER_BYTE) + bit_indx] > max_dq_deskew_code)
|
||
|
max_dq_deskew_code = wrdq_bit_deskew[(byte * DQ_NUM_BITS_PER_BYTE) + bit_indx];
|
||
|
}
|
||
|
|
||
|
// find midpoint of deskew registers for this byte, and program it to DM (controlled by DQS regs, oddly)
|
||
|
rAMP_WRDQSDESKEW_CTRL(ch, byte) = (min_dq_deskew_code + max_dq_deskew_code) >> 1;
|
||
|
}
|
||
|
} // for (ch = 0; ch < AMC_NUM_CHANNELS; ch++)
|
||
|
}
|
||
|
|
||
|
// This function writes PRBS7 patterns to dram for given channel and rank,
|
||
|
// and reads them back. Read back values are compared with data that was written
|
||
|
static uint32_t wr_rd_pattern_result(uint32_t ch, uint32_t rnk, uint32_t sdll_value)
|
||
|
{
|
||
|
uint32_t chrnk_indx, result, result_per_wr_and_rdbk;
|
||
|
uint32_t pattern_indx, pattern, readback_data;
|
||
|
uint32_t col, word;
|
||
|
uint64_t mem_region, mem_addr;
|
||
|
uint32_t all_bits = 0xFFFFFFFF;
|
||
|
uint32_t consecutive_cols_per_chrnk = (DQ_CONSECUTIVE_BYTES_PER_CHRNK / DQ_BYTES_PER_COL);
|
||
|
|
||
|
result = all_bits;
|
||
|
result_per_wr_and_rdbk = all_bits;
|
||
|
|
||
|
chrnk_indx = (ch * AMC_NUM_RANKS) + rnk;
|
||
|
pattern_indx = sdll_value & DLLVAL_BITS; // sdll tap indexes into pattern array
|
||
|
|
||
|
// <rdar://problem/14017861> need APB read inserted in function wr_rd_pattern_result
|
||
|
rAMP_RDFIFOPTRSTS(ch);
|
||
|
|
||
|
// write the patterns to memory 4 bytes at a time
|
||
|
// interleaving applies every DQ_CONSECUTIVE_BYTES_PER_CHRNK bytes, so recompute the address at that point
|
||
|
// Note that bank and row are fixed
|
||
|
for (col = 0; col < DQ_NUM_PATTERNS; col += consecutive_cols_per_chrnk) {
|
||
|
|
||
|
mem_region = amc_get_uncached_dram_virt_addr(ch, rnk, DQ_BANK, DQ_ROW, col);
|
||
|
|
||
|
// next 16 words (or columns) are consecutively stored in a [channel,rank] combo
|
||
|
for (word = 0; word < consecutive_cols_per_chrnk; word++) {
|
||
|
|
||
|
mem_addr = mem_region + (uint64_t)((word << 2));
|
||
|
|
||
|
// last pattern in array is dummy value, so skip it
|
||
|
pattern = DQ_PRBS7_PATTERNS[pattern_indx % (DQ_NUM_PATTERNS - 1)];
|
||
|
pattern_indx++;
|
||
|
|
||
|
// write the pattern
|
||
|
*(uint32_t *)mem_addr = pattern;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
pattern_indx = sdll_value & DLLVAL_BITS;
|
||
|
|
||
|
// Now, read back the patterns (have to do it in a separate loop than the writes to get more robust calib values)
|
||
|
for (col = 0; col < DQ_NUM_PATTERNS; col += consecutive_cols_per_chrnk) {
|
||
|
|
||
|
mem_region = amc_get_uncached_dram_virt_addr(ch, rnk, DQ_BANK, DQ_ROW, col);
|
||
|
|
||
|
// next 16 words (or columns) are consecutively stored in a [channel,rank] combo
|
||
|
for (word = 0; word < consecutive_cols_per_chrnk; word++) {
|
||
|
|
||
|
mem_addr = mem_region + (uint64_t)((word << 2));
|
||
|
|
||
|
// last pattern in array is dummy value, so skip it
|
||
|
pattern = DQ_PRBS7_PATTERNS[pattern_indx % (DQ_NUM_PATTERNS - 1)];
|
||
|
pattern_indx++;
|
||
|
|
||
|
// read the pattern
|
||
|
readback_data = *(uint32_t *)mem_addr;
|
||
|
|
||
|
// records if read back value was different than written value by
|
||
|
// clearing bits that are different in the final result
|
||
|
result_per_wr_and_rdbk &= ~(readback_data ^ pattern);
|
||
|
}
|
||
|
|
||
|
// result variable accumulates the results of all pattern matching results for a given sdll_value
|
||
|
result &= result_per_wr_and_rdbk;
|
||
|
}
|
||
|
|
||
|
// failing bits are clear, passing bits are set
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
// Save or restore ca and wrlvl registers for resume boot
|
||
|
// Registers must be stored/retrieved in exactly the order below
|
||
|
static void save_restore_ca_wrlvl_regs(uint32_t save_or_restore)
|
||
|
{
|
||
|
uint32_t ch, bit_indx, byte;
|
||
|
uint32_t byte_pos = 0;
|
||
|
|
||
|
if (save_or_restore == CALIB_SAVE) {
|
||
|
|
||
|
for (ch = 0; ch < AMC_NUM_CHANNELS; ch++) {
|
||
|
|
||
|
// save the CA registers for this channel
|
||
|
uint8_t casdll = (uint8_t) (rAMP_CASDLLCTRL(ch) & DLLVAL_BITS);
|
||
|
|
||
|
for (bit_indx = 0; bit_indx < CA_NUM_BITS; bit_indx++) {
|
||
|
uint8_t ca_deskew = (uint8_t) (rAMP_CADESKEW_CTRL(ch, bit_indx) & DESKEW_CTRL_BITS);
|
||
|
uint8_t ca_offset = INT_TO_OFFSET(OFFSET_TO_INT(casdll) + OFFSET_TO_INT(ca_deskew));
|
||
|
cal_pmu_bits[byte_pos++] = ca_offset;
|
||
|
}
|
||
|
|
||
|
// CK, CS, and CKE share the same value
|
||
|
uint8_t ck_deskew = (uint8_t) (rAMP_CKDESKEW_CTRL(ch) & DESKEW_CTRL_BITS);
|
||
|
cal_pmu_bits[byte_pos++] = ck_deskew;
|
||
|
|
||
|
// save the WrLvl registers for this channel (4 DQ SDLLs and 1 CA SDLL)
|
||
|
for (byte = 0; byte < (DQ_NUM_BYTES + 1); byte++) {
|
||
|
|
||
|
// cawrlvlsdll is stored as the "5th" byte
|
||
|
if (byte == DQ_NUM_BYTES)
|
||
|
cal_pmu_bits[byte_pos++] = (uint8_t) (rAMP_CAWRLVLSDLLCODE(ch) & DLLVAL_BITS);
|
||
|
else
|
||
|
cal_pmu_bits[byte_pos++] = (uint8_t) (rAMP_DQWRLVLSDLLCODE(ch, byte) & DLLVAL_BITS);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
#if !SUB_PLATFORM_S7002
|
||
|
// Save the cal_pmu_bits array to PMU nvram
|
||
|
if (power_store_memory_calibration((void *) cal_pmu_bits, CALIB_NUM_BYTES_TO_SAVE) == 0)
|
||
|
{
|
||
|
#if AMP_NO_PMU_PANIC
|
||
|
printf("Unable to save memory calibration values to PMU nvram\n");
|
||
|
#else
|
||
|
panic("Unable to save memory calibration values to PMU nvram\n");
|
||
|
#endif
|
||
|
}
|
||
|
#endif // #if !SUB_PLATFORM_S7002
|
||
|
|
||
|
} else {
|
||
|
|
||
|
#if !SUB_PLATFORM_S7002
|
||
|
// Retrieve cal_pmu_bits array from PMU nvram
|
||
|
if (power_load_memory_calibration((void *) cal_pmu_bits, CALIB_NUM_BYTES_TO_SAVE) == 0)
|
||
|
{
|
||
|
#if AMP_NO_PMU_PANIC
|
||
|
printf("Unable to load memory calibration values from PMU nvram\n");
|
||
|
#else
|
||
|
panic("Unable to load memory calibration values from PMU nvram\n");
|
||
|
#endif
|
||
|
}
|
||
|
#endif // #if !SUB_PLATFORM_S7002
|
||
|
|
||
|
save_masterdll_values();
|
||
|
|
||
|
for (ch = 0; ch < AMC_NUM_CHANNELS; ch++) {
|
||
|
|
||
|
int8_t casdll = OFFSET_TO_INT(cal_pmu_bits[byte_pos]);
|
||
|
int32_t ca_offset[CA_NUM_BITS];
|
||
|
|
||
|
for (bit_indx = 0; bit_indx < CA_NUM_BITS; bit_indx++) {
|
||
|
ca_offset[bit_indx] = OFFSET_TO_INT(cal_pmu_bits[byte_pos]);
|
||
|
byte_pos++;
|
||
|
if (ca_offset[bit_indx] < casdll)
|
||
|
casdll = ca_offset[bit_indx];
|
||
|
}
|
||
|
|
||
|
// get the value pushed on CK, CS, CKE signals, add it to casdll
|
||
|
casdll -= OFFSET_TO_INT(cal_pmu_bits[byte_pos]);
|
||
|
byte_pos++;
|
||
|
uint8_t caclk = 0;
|
||
|
|
||
|
// check if sdll < -camdllcode, then clamp to it. If sdll > camdllcode - DELIMIT_POS_ADJ_CASDLL, then also clamp it.
|
||
|
if (casdll < (-1 * mdllcode[ch][AMP_CA])) {
|
||
|
caclk = (uint8_t) ((-1 * casdll) - mdllcode[ch][AMP_CA]);
|
||
|
casdll = (-1 * mdllcode[ch][AMP_CA]);
|
||
|
} else if (casdll > (mdllcode[ch][AMP_CA] - DELIMIT_POS_ADJ_CASDLL)) {
|
||
|
casdll = mdllcode[ch][AMP_CA] - DELIMIT_POS_ADJ_CASDLL;
|
||
|
}
|
||
|
|
||
|
// write the casdll register, and caclk into ctl signals
|
||
|
amp_push_casdll_out(ch, casdll);
|
||
|
if (caclk)
|
||
|
amp_push_ctl_out(ch, caclk);
|
||
|
|
||
|
// compute deskew and write to the per bit deskew registers
|
||
|
for (bit_indx = 0; bit_indx < CA_NUM_BITS; bit_indx++) {
|
||
|
uint8_t ca_deskew = (uint8_t) (ca_offset[bit_indx] - (casdll - caclk));
|
||
|
rAMP_CADESKEW_CTRL(ch, bit_indx) = ca_deskew;;
|
||
|
}
|
||
|
|
||
|
for (byte = 0; byte < (DQ_NUM_BYTES + 1); byte++) {
|
||
|
uint8_t wrlvlsdll = cal_pmu_bits[byte_pos++];
|
||
|
|
||
|
// At this point, DRAM is not in WRLVL mode so can use dlysel with forceckelow
|
||
|
if (byte == DQ_NUM_BYTES)
|
||
|
amp_set_cawrlvl_sdll(ch, wrlvlsdll, true);
|
||
|
else
|
||
|
amp_set_dqwrlvl_sdll(ch, byte, wrlvlsdll, true);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
// Bit packing functions are not needed (enough space in PMU to be rid of this complexity)
|
||
|
#if 0
|
||
|
// Inserts the data at given byte and bit position in the cal_pmu_bits array
|
||
|
// Assumes that num_bits is always <= 8
|
||
|
static void cal_save_value(uint8_t data, uint32_t num_bits, uint32_t *bit_pos, uint32_t *byte_pos)
|
||
|
{
|
||
|
uint32_t space_in_this_byte;
|
||
|
uint8_t mask;
|
||
|
|
||
|
if (((*bit_pos) > 7) || ((*byte_pos) >= CALIB_PMU_BYTES))
|
||
|
panic("Error! bit position %d > 7 or byte position %d > capacity (%d)\n", *bit_pos, *byte_pos, CALIB_PMU_BYTES);
|
||
|
|
||
|
// how many bits left in this byte?
|
||
|
space_in_this_byte = 8 - (*bit_pos);
|
||
|
|
||
|
// we'll grab as many bits from the data as there is space in this byte
|
||
|
if (space_in_this_byte >= num_bits)
|
||
|
mask = (1 << num_bits) - 1;
|
||
|
else
|
||
|
mask = (1 << space_in_this_byte) - 1;
|
||
|
|
||
|
// Set the data value at given byte (only as many bits as space and making sure to preserve the other bits in this byte)
|
||
|
cal_pmu_bits[*byte_pos] |= ((data & mask) << *bit_pos);
|
||
|
|
||
|
if (space_in_this_byte < num_bits) {
|
||
|
// any remainder bits get saved to the next byte
|
||
|
cal_pmu_bits[(*byte_pos) + 1] = (data >> space_in_this_byte);
|
||
|
(*byte_pos)++;
|
||
|
*bit_pos = num_bits - space_in_this_byte;
|
||
|
} else if (space_in_this_byte == num_bits) {
|
||
|
(*byte_pos)++;
|
||
|
*bit_pos = 0;
|
||
|
} else {
|
||
|
(*bit_pos) += num_bits;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Retrieve the data at given byte and bit position in the cal_pmu_bits array
|
||
|
// Assumes that num_bits is always <= 8
|
||
|
static uint8_t cal_retrieve_value(uint32_t num_bits, uint32_t *bit_pos, uint32_t *byte_pos)
|
||
|
{
|
||
|
uint32_t space_in_this_byte;
|
||
|
uint8_t mask, remainder_mask, ret_val;
|
||
|
|
||
|
if (((*bit_pos) > 7) || ((*byte_pos) >= CALIB_PMU_BYTES))
|
||
|
panic("Error! bit position %d > 7 or byte position %d > capacity (%d)\n", *bit_pos, *byte_pos, CALIB_PMU_BYTES);
|
||
|
|
||
|
// how many bits left in this byte?
|
||
|
space_in_this_byte = 8 - (*bit_pos);
|
||
|
|
||
|
// we'll grab as many bits from the array as there is space in this byte (max of num_bits)
|
||
|
if (space_in_this_byte >= num_bits)
|
||
|
mask = (1 << num_bits) - 1;
|
||
|
else {
|
||
|
mask = (1 << space_in_this_byte) - 1;
|
||
|
remainder_mask = (1 << (num_bits - space_in_this_byte)) - 1;
|
||
|
}
|
||
|
|
||
|
// Get the data value at given byte (only as many bits as space)
|
||
|
ret_val = (cal_pmu_bits[*byte_pos] >> *bit_pos) & mask;
|
||
|
|
||
|
if (space_in_this_byte < num_bits) {
|
||
|
// any remainder bits get loaded from the next byte
|
||
|
ret_val |= (cal_pmu_bits[(*byte_pos) + 1] & remainder_mask) << space_in_this_byte;
|
||
|
(*byte_pos)++;
|
||
|
*bit_pos = num_bits - space_in_this_byte;
|
||
|
} else if (space_in_this_byte == num_bits) {
|
||
|
(*byte_pos)++;
|
||
|
*bit_pos = 0;
|
||
|
} else {
|
||
|
(*bit_pos) += num_bits;
|
||
|
}
|
||
|
|
||
|
return ret_val;
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
|
||
|
// Before starting dq calibration, saves the contents of dram region that will be written to with calibration patterns.
|
||
|
// After calibration is complete, restores the contents back to DRAM.
|
||
|
static void save_restore_memory_region(bool dqcal_start)
|
||
|
{
|
||
|
uint32_t rnk;
|
||
|
volatile uintptr_t mem_addr, src, dest;
|
||
|
|
||
|
mem_addr = SDRAM_BASE_UNCACHED;
|
||
|
|
||
|
for (rnk = 0; rnk < AMC_NUM_RANKS; rnk++) {
|
||
|
mem_addr |= (rnk << DQ_ADDR_RANK_BIT);
|
||
|
|
||
|
if (dqcal_start) {
|
||
|
dest = (uintptr_t) &(dqcal_saved_data[rnk][0]);
|
||
|
src = mem_addr;
|
||
|
} else {
|
||
|
dest = mem_addr;
|
||
|
src = (uintptr_t) &(dqcal_saved_data[rnk][0]);
|
||
|
}
|
||
|
|
||
|
// we'll be writing (or have written) the patterns for each channel
|
||
|
memcpy((void *) dest, (void *) src, sizeof(DQ_PRBS7_PATTERNS) * AMC_NUM_CHANNELS);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
#if 0
|
||
|
// Given an input where bit SIGN_BIT_POS represents the sign, and the rest is magnitude
|
||
|
// separate out the sign and magnitude and return those values to the caller
|
||
|
static void get_offset_sign_magnitude(uint32_t offset, uint32_t *neg_bit_set, uint32_t *tap_val)
|
||
|
{
|
||
|
*neg_bit_set = (offset & (1 << SIGN_BIT_POS)) >> SIGN_BIT_POS;
|
||
|
*tap_val = offset - (*neg_bit_set << SIGN_BIT_POS);
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
static int32_t find_center_of_eye(int32_t left_pos_val, int32_t right_pos_val)
|
||
|
{
|
||
|
if (left_pos_val < right_pos_val)
|
||
|
panic("Memory calibration: find_center_of_eye: Left value (0x%x) is < right value (0x%x)", left_pos_val, right_pos_val);
|
||
|
|
||
|
// center of 2 signed integers is simply their average
|
||
|
return ((left_pos_val + right_pos_val) / 2);
|
||
|
}
|
||
|
|
||
|
// Select the value that would include the other value in the eye
|
||
|
static int32_t find_common_endpoint(int32_t val0, int32_t val1, uint32_t min_or_max)
|
||
|
{
|
||
|
int32_t retVal = val0;
|
||
|
|
||
|
// For the right endpoint, select the rightmost value on the number line (max value)
|
||
|
if (min_or_max == MAX_ENDPT) {
|
||
|
retVal = (val0 > val1) ? val0 : val1;
|
||
|
}
|
||
|
// For the left endpoint, select the leftmost value (min value)
|
||
|
else {
|
||
|
retVal = (val0 < val1) ? val0 : val1;
|
||
|
}
|
||
|
|
||
|
return retVal;
|
||
|
}
|