/*----------------------------------------------------------------------------*
 * Copyright Statement:                                                       *
 *                                                                            *
 *   This software/firmware and related documentation ("MediaTek Software")   *
 * are protected under international and related jurisdictions'copyright laws *
 * as unpublished works. The information contained herein is confidential and *
 * proprietary to MediaTek Inc. Without the prior written permission of       *
 * MediaTek Inc., any reproduction, modification, use or disclosure of        *
 * MediaTek Software, and information contained herein, in whole or in part,  *
 * shall be strictly prohibited.                                              *
 * MediaTek Inc. Copyright (C) 2010. All rights reserved.                     *
 *                                                                            *
 *   BY OPENING THIS FILE, RECEIVER HEREBY UNEQUIVOCALLY ACKNOWLEDGES AND     *
 * AGREES TO THE FOLLOWING:                                                   *
 *                                                                            *
 *   1)Any and all intellectual property rights (including without            *
 * limitation, patent, copyright, and trade secrets) in and to this           *
 * Software/firmware and related documentation ("MediaTek Software") shall    *
 * remain the exclusive property of MediaTek Inc. Any and all intellectual    *
 * property rights (including without limitation, patent, copyright, and      *
 * trade secrets) in and to any modifications and derivatives to MediaTek     *
 * Software, whoever made, shall also remain the exclusive property of        *
 * MediaTek Inc.  Nothing herein shall be construed as any transfer of any    *
 * title to any intellectual property right in MediaTek Software to Receiver. *
 *                                                                            *
 *   2)This MediaTek Software Receiver received from MediaTek Inc. and/or its *
 * representatives is provided to Receiver on an "AS IS" basis only.          *
 * MediaTek Inc. expressly disclaims all warranties, expressed or implied,    *
 * including but not limited to any implied warranties of merchantability,    *
 * non-infringement and fitness for a particular purpose and any warranties   *
 * arising out of course of performance, course of dealing or usage of trade. *
 * MediaTek Inc. does not provide any warranty whatsoever with respect to the *
 * software of any third party which may be used by, incorporated in, or      *
 * supplied with the MediaTek Software, and Receiver agrees to look only to   *
 * such third parties for any warranty claim relating thereto.  Receiver      *
 * expressly acknowledges that it is Receiver's sole responsibility to obtain *
 * from any third party all proper licenses contained in or delivered with    *
 * MediaTek Software.  MediaTek is not responsible for any MediaTek Software  *
 * releases made to Receiver's specifications or to conform to a particular   *
 * standard or open forum.                                                    *
 *                                                                            *
 *   3)Receiver further acknowledge that Receiver may, either presently       *
 * and/or in the future, instruct MediaTek Inc. to assist it in the           *
 * development and the implementation, in accordance with Receiver's designs, *
 * of certain softwares relating to Receiver's product(s) (the "Services").   *
 * Except as may be otherwise agreed to in writing, no warranties of any      *
 * kind, whether express or implied, are given by MediaTek Inc. with respect  *
 * to the Services provided, and the Services are provided on an "AS IS"      *
 * basis. Receiver further acknowledges that the Services may contain errors  *
 * that testing is important and it is solely responsible for fully testing   *
 * the Services and/or derivatives thereof before they are used, sublicensed  *
 * or distributed. Should there be any third party action brought against     *
 * MediaTek Inc. arising out of or relating to the Services, Receiver agree   *
 * to fully indemnify and hold MediaTek Inc. harmless.  If the parties        *
 * mutually agree to enter into or continue a business relationship or other  *
 * arrangement, the terms and conditions set forth herein shall remain        *
 * effective and, unless explicitly stated otherwise, shall prevail in the    *
 * event of a conflict in the terms in any agreements entered into between    *
 * the parties.                                                               *
 *                                                                            *
 *   4)Receiver's sole and exclusive remedy and MediaTek Inc.'s entire and    *
 * cumulative liability with respect to MediaTek Software released hereunder  *
 * will be, at MediaTek Inc.'s sole discretion, to replace or revise the      *
 * MediaTek Software at issue.                                                *
 *                                                                            *
 *   5)The transaction contemplated hereunder shall be construed in           *
 * accordance with the laws of Singapore, excluding its conflict of laws      *
 * principles.  Any disputes, controversies or claims arising thereof and     *
 * related thereto shall be settled via arbitration in Singapore, under the   *
 * then current rules of the International Chamber of Commerce (ICC).  The    *
 * arbitration shall be conducted in English. The awards of the arbitration   *
 * shall be final and binding upon both parties and shall be entered and      *
 * enforceable in any court of competent jurisdiction.                        *
 *---------------------------------------------------------------------------*/

//-----------------------------------------------------------------------------
// Include files
//-----------------------------------------------------------------------------
#include "dramc_common.h"
#include "dramc_int_global.h"
#include "x_hal_io.h"
#include "sv_c_data_traffic.h"
#include "dramc_pi_api_pcddr.h"

#define BITMAP_BITS_MAX         128

#define MAX_CLK_PI_DELAY        31

#define PASS_RANGE_NA   0x7fff

static U8 fgwrlevel_done = 0;

#if __ETT__
U8 gETT_WHILE_1_flag = 1;
#endif

ddr_mr_val_t gMRVal[CHANNEL_NUM][RANK_MAX];

U8 u1MR01Value[FSP_MAX];
U8 u1MR02Value[FSP_MAX];
U8 u1MR03Value[FSP_MAX];
U8 u1MR11Value[FSP_MAX];
U8 u1MR18Value[FSP_MAX];
U8 u1MR19Value[FSP_MAX];
U8 u1MR20Value[FSP_MAX];
U8 u1MR21Value[FSP_MAX];
U8 u1MR22Value[FSP_MAX];
U8 u1MR51Value[FSP_MAX];

U8 u1MR04Value[RANK_MAX];
U8 u1MR13Value[RANK_MAX];
U8 u1MR26Value[RANK_MAX];
U8 u1MR30Value[RANK_MAX];

U8 u1MR12Value[CHANNEL_NUM][RANK_MAX][FSP_MAX];
U8 u1MR14Value[CHANNEL_NUM][RANK_MAX][FSP_MAX];
U16 gu2MR0_Value[RANK_MAX] = {0xffff, 0xffff};

#if PINMUX_AUTO_TEST_PER_BIT_RX
S16 gFinalRXPerbitFirstPass[CHANNEL_NUM][DQ_DATA_WIDTH];
#endif
#if PINMUX_AUTO_TEST_PER_BIT_TX
S16 gFinalTXPerbitFirstPass[CHANNEL_NUM][DQ_DATA_WIDTH];
#endif

#ifdef FOR_HQA_TEST_USED
U16 gFinalRXPerbitWin[CHANNEL_NUM][RANK_MAX][DQ_DATA_WIDTH];
U16 gFinalTXPerbitWin[CHANNEL_NUM][RANK_MAX][DQ_DATA_WIDTH];
U16 gFinalTXPerbitWin_min_max[CHANNEL_NUM][RANK_MAX];
U16 gFinalTXPerbitWin_min_margin[CHANNEL_NUM][RANK_MAX];
U16 gFinalTXPerbitWin_min_margin_bit[CHANNEL_NUM][RANK_MAX];
S8 gFinalClkDuty[CHANNEL_NUM];
U32 gFinalClkDutyMinMax[CHANNEL_NUM][2];
S8 gFinalDQSDuty[CHANNEL_NUM][DQS_NUMBER];
U32 gFinalDQSDutyMinMax[CHANNEL_NUM][DQS_NUMBER][2];
#endif

U8 gFinalRXVrefDQ[CHANNEL_NUM][RANK_MAX][2];
U8 gFinalTXVrefDQ[CHANNEL_NUM][RANK_MAX];

#if defined(RELEASE)
U8 gEye_Scan_color_flag = 0;
U8 gRX_EYE_Scan_flag = 0;
U8 gRX_EYE_Scan_only_higheset_freq_flag = 1;
U8 gTX_EYE_Scan_flag = 1;
U8 gTX_EYE_Scan_only_higheset_freq_flag = 1;
U8 gEye_Scan_unterm_highest_flag = 0;
#elif  (defined(CFG_DRAM_LOG_TO_STORAGE))
U8 gEye_Scan_color_flag = 0;
U8 gRX_EYE_Scan_flag = 1;
U8 gRX_EYE_Scan_only_higheset_freq_flag = 1;
U8 gTX_EYE_Scan_flag = 1;
U8 gTX_EYE_Scan_only_higheset_freq_flag = 1;
U8 gEye_Scan_unterm_highest_flag = 0;
#else
U8 gEye_Scan_color_flag = 1;
U8 gRX_EYE_Scan_flag = 0;
U8 gRX_EYE_Scan_only_higheset_freq_flag = 1;
U8 gTX_EYE_Scan_flag = 0;
U8 gTX_EYE_Scan_only_higheset_freq_flag = 1;
U8 gEye_Scan_unterm_highest_flag = 0;
#endif

#ifdef FOR_HQA_REPORT_USED
#if defined(CFG_DRAM_LOG_TO_STORAGE)
U8 gHQALog_flag = 1;
#else
U8 gHQALog_flag = 0;
#endif
U16 gHQALOG_RX_delay_cell_ps_075V = 0;
#endif

#if (TX_AUTO_K_ENABLE && TX_AUTO_K_WORKAROUND)
U32 u4DQM_MCK_RK1_backup;
U32 u4DQM_UI_RK1_backup;
U32 u4DQM_PI_RK1_backup[2];
U32 u4DQ_MCK_RK1_backup;
U32 u4DQ_UI_RK1_backup;
U32 u4DQ_PI_RK1_backup[2];
#endif

#if DDR_ENABLE_RX_DVS_CAL
U8 u1DVS_increase[RANK_MAX][DQS_NUMBER];
#endif

static S32 wrlevel_dqs_final_delay[RANK_MAX][DQS_NUMBER]; // 3 is channel number
U32 u4gVcore[DRAM_DFS_SHUFFLE_MAX];

JMETER_DELAYCELL_T JMeter_DelayCell_Table[DRAM_DFS_SRAM_MAX];

U16 u2g_num_dlycell_perT = 0;
U8 gFinalRXVrefDQForSpeedUp[CHANNEL_NUM][RANK_MAX][2/*ODT_onoff*/][2/*2bytes*/] = {{{{0}}}};
U32 gDramcSwImpedanceResult[IMP_DRV_MAX] = { 0 };//ODT_ON/OFF x DRVP/DRVN/ODTP/ODTN

S16 gu2RX_DQS_Duty_Offset[DQS_NUMBER][2];

#define RX_DELAY_PRE_CAL !PINMUX_AUTO_TEST_PER_BIT_RX
#if RX_DELAY_PRE_CAL
S16 s2RxDelayPreCal=PASS_RANGE_NA;
#endif

#if MRW_CHECK_ONLY
U16 u2MRRecord[CHANNEL_NUM][RANK_MAX][FSP_MAX][MR_NUM];
#endif
#if MRW_CHECK_ONLY || MRW_BACKUP
U8 gFSPWR_Flag[RANK_MAX]={FSP_0};
#endif

#if PRINT_CALIBRATION_SUMMARY
void vSetCalibrationResult(DRAMC_CTX_T *p, U8 ucCalType, U8 ucResult)
{
    U32 *Pointer_CalExecute,*Pointer_CalResult;
    if (ucCalType == DRAM_CALIBRATION_SW_IMPEDANCE)
    {
        Pointer_CalExecute = &p->SWImpCalExecute;
        Pointer_CalResult = &p->SWImpCalResult;
    }
    else
    {
        Pointer_CalExecute = &p->aru4CalExecuteFlag[p->channel][p->rank];
        Pointer_CalResult = &p->aru4CalResultFlag[p->channel][p->rank];
    }

    if (ucResult == DRAM_FAIL)  // Calibration FAIL
    {
        *Pointer_CalExecute |= (1<<ucCalType); // ececution done
        *Pointer_CalResult |= (1<<ucCalType); // no result found
	         }
    else if(ucResult == DRAM_OK)  // Calibration OK
    {
        *Pointer_CalExecute |= (1<<ucCalType); // ececution done
        *Pointer_CalResult &= (~(1<<ucCalType)); // result found
    }
    else if(ucResult == DRAM_FAST_K)  // FAST K
    {
         *Pointer_CalExecute &= (~(1<<ucCalType)); // no ececution
         *Pointer_CalResult &= (~(1<<ucCalType)); // result found
    }
    else  // NO K
    {
          *Pointer_CalExecute &= (~(1<<ucCalType)); // no ececution
          *Pointer_CalResult |= (1<<ucCalType); // no result found
    }
}

#if PRINT_CALIBRATION_SUMMARY_FASTK_CHECK
void Fast_K_CheckResult(DRAMC_CTX_T *p, U8 ucCalType)
{
    U32 CheckResult=0xFFFFFFFF;
    U32 debug_cnt[2], u4all_result_R, u4all_result_F;
    BOOL FastK_Check_flag=0;
    U32 *Pointer_FastKExecute,*Pointer_FastKResult;

    Pointer_FastKExecute = &p->FastKExecuteFlag[p->channel][p->rank];
    Pointer_FastKResult = &p->FastKResultFlag[p->channel][p->rank];

    if ((ucCalType==DRAM_CALIBRATION_TX_PERBIT)||(ucCalType==DRAM_CALIBRATION_DATLAT)||(ucCalType==DRAM_CALIBRATION_RX_PERBIT))
    {
        DramcEngine2Init(p, p->test2_1, p->test2_2, TEST_XTALK_PATTERN, 0, TE_NO_UI_SHIFT);
        CheckResult = DramcEngine2Run(p,TE_OP_WRITE_READ_CHECK , TEST_XTALK_PATTERN);
        DramcEngine2End(p);
        FastK_Check_flag=1;
    }
    else if (ucCalType==DRAM_CALIBRATION_RX_RDDQC)
    {
        DramcRxWinRDDQCInit(p);
        CheckResult = DramcRxWinRDDQCRun(p);
        DramcRxWinRDDQCEnd(p);
        FastK_Check_flag=1;
    }
    else if (ucCalType==DRAM_CALIBRATION_GATING)
    {
        DramcEngine2Init(p, 0x55000000, 0xaa000000 |0x23, TEST_AUDIO_PATTERN, 0, TE_NO_UI_SHIFT);

        //Gating Counter Reset
        DramPhyReset(p);
        vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_MISC_STBCAL2), 1,
                MISC_STBCAL2_DQSG_CNT_RST);
        mcDELAY_US(1);
        vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_MISC_STBCAL2), 0,
                MISC_STBCAL2_DQSG_CNT_RST);

        DramcEngine2Run(p, TE_OP_READ_CHECK, TEST_AUDIO_PATTERN);

        debug_cnt[0] = u4IO32Read4B(DRAMC_REG_ADDR(DDRPHY_REG_CAL_DQSG_CNT_B0));
        debug_cnt[1] = u4IO32Read4B(DRAMC_REG_ADDR(DDRPHY_REG_CAL_DQSG_CNT_B1));

        //mcSHOW_DBG_MSG(" 0x%X  ",u4DebugCnt))
        if (debug_cnt[0]==0x4646 && debug_cnt[1]==0x4646)
            CheckResult=0;

        DramcEngine2End(p);
        FastK_Check_flag=1;
    }


    if ((FastK_Check_flag==1)&&(CheckResult==0))
    {
        //mcSHOW_DBG_MSG(" [FAST K CHECK]->PASS\n"))
        *Pointer_FastKResult &= (~(1<<ucCalType); // result PASS
        *Pointer_FastKExecute |= (1<<ucCalType);; // Excuted
    }
    else if ((FastK_Check_flag==1)&&(CheckResult !=0))
    {
        //mcSHOW_DBG_MSG(" [FAST K CHECK]->FAIL\n"))
        *Pointer_FastKResult |= (1<<ucCalType); // result FAIL
        *Pointer_FastKExecute |= (1<<ucCalType);; // Excuted
    }
}
#endif

const char *szCalibStatusName[DRAM_CALIBRATION_MAX]=
{
    "SW Impedance             ",
    "DUTY Scan                ",
    "ZQ Calibration           ",
    "Jitter Meter             ",
    "CBT Training             ",
    "Write leveling PI        ",
    "Write leveling DLY       ",
    "DUTYCYCLE_MONITOR        ",
    "RX DQS gating            ",
    "RX DQ/DQS(RDDQC)         ",
    "RX DQ/DQS(RDDQC DQM ONLY)",
    "TX DQ/DQS                ",
    "RX DATLAT                ",
    "RX DQ/DQS(Engine)        ",
    "TX OE                    ",
};

void vPrintCalibrationResult(DRAMC_CTX_T *p)
{
    U8 ucCHIdx, ucRankIdx, ucCalIdx;
    U32 ucCalResult_All, ucCalExecute_All;
    U8 ucCalResult, ucCalExecute;
    U8 u1CalibrationFail;

    mcSHOW_DBG_MSG("\n\n[Calibration Summary] Freqency %d\n", p->frequency);
#if __SLT__
    mcSHOW_PARSER_MSG(("\n\n[Calibration Summary] Freqency %d\n", p->frequency));
#endif

    //for(ucFreqIdx=0; ucFreqIdx<DRAM_DFS_SHUFFLE_MAX; ucFreqIdx++)
    {
        //mcSHOW_DBG_MSG("==Freqency = %d==\n", get_FreqTbl_by_shuffleIndex(p,ucFreqIdx)->frequency);
        for(ucCHIdx=0; ucCHIdx<p->support_channel_num; ucCHIdx++)
        {
            for(ucRankIdx=0; ucRankIdx<p->support_rank_num; ucRankIdx++)
            {
                u1CalibrationFail =0;
                ucCalExecute_All = p->aru4CalExecuteFlag[ucCHIdx][ucRankIdx];
                ucCalResult_All = p->aru4CalResultFlag[ucCHIdx][ucRankIdx];
                mcSHOW_DBG_MSG("CH %d, Rank %d\n", ucCHIdx, ucRankIdx);
                //mcSHOW_DBG_MSG("[vPrintCalibrationResult] Channel = %d, Rank= %d, Freq.= %d, (ucCalExecute_All 0x%x, ucCalResult_All 0x%x)\n", ucCHIdx, ucRankIdx, ucFreqIdx, ucCalExecute_All, ucCalResult_All);
#if __SLT__
		mcSHOW_PARSER_MSG(("CH %d, Rank %d\n", ucCHIdx, ucRankIdx));
#endif
                for(ucCalIdx =0; ucCalIdx<DRAM_CALIBRATION_MAX; ucCalIdx++)
                {
                    if(ucCalIdx==0)
                    {
                        ucCalExecute = (U8)p->SWImpCalExecute; //for SW Impedence
                        ucCalResult = (U8)p->SWImpCalResult; //for SW Impedence
                    }
                    else if(ucCalIdx == DRAM_CALIBRATION_CA_TRAIN)
                    {
                        ucCalExecute = 0;
                        ucCalResult = 1;  //NO K
                    }
                    else
                    {
                        ucCalExecute = (U8)((ucCalExecute_All >>ucCalIdx) & 0x1);
                        ucCalResult =  (U8)((ucCalResult_All >>ucCalIdx) & 0x1);
                    }

                    #if PRINT_CALIBRATION_SUMMARY_DETAIL
                    mcSHOW_DBG_MSG("%s: ", szCalibStatusName[ucCalIdx]);
                    if(ucCalExecute==1 && ucCalResult ==1) // excuted and fail
                    {
                        u1CalibrationFail =1;
                        mcSHOW_DBG_MSG("%s\n", "@_@FAIL@_@");
#if __SLT__
			mcSHOW_PARSER_MSG(("%s: %s\n", szCalibStatusName[ucCalIdx], ((ucCalResult == 0) ? "OK" : "Fail")));
#endif
#if defined(SLT)
                        mcSHOW_ERR_MSG("[dramc] DRAM_FATAL_ERR_FLAG = 0x80000000\n");
                        while (1);
#endif
                    }
                    else if (ucCalExecute==1 && ucCalResult ==0) // DRAM_OK
                    {
                        mcSHOW_DBG_MSG("%s\n", "PASS");
                    }
                    else if (ucCalExecute==0 && ucCalResult ==0) // DRAM_FAST K
                    {
                        mcSHOW_DBG_MSG("%s\n", "FAST K");
                    }
                    else //DRAM_NO K
                    {
                        mcSHOW_DBG_MSG("%s\n", "NO K");
                    }

                    #else
                    if(ucCalExecute==1 && ucCalResult ==1) // excuted and fail
                    {
                        u1CalibrationFail =1;
                        mcSHOW_DBG_MSG("%s: %s\n", szCalibStatusName[ucCalIdx],"@_@FAIL@_@");
#if defined(SLT)
                        mcSHOW_ERR_MSG("[dramc] DRAM_FATAL_ERR_FLAG = 0x80000000\n");
                        while (1);
#endif
                    }
                    #endif
                }

                if(u1CalibrationFail ==0)
                {
                    mcSHOW_DBG_MSG("All Pass.\n");
#if __SLT__
		    mcSHOW_PARSER_MSG(("All Pass.\n\n"));
#endif
                }
                mcSHOW_DBG_MSG("\n");
            }
        }
    }

}
#endif


void vInitGlobalVariablesByCondition(DRAMC_CTX_T *p)
{
    memset((void *)&gMRVal, 0x0, sizeof(gMRVal));
}

const U8 PCDDR4_O1_Mapping_POP[CHANNEL_NUM][16] ={
	//CH-A
	{
		0, 1, 2, 3, 4, 5, 6, 7,
		8, 9, 10, 11, 12, 13, 14, 15
	},
};

//O1 DRAM->APHY
const U8 PCDDR3_O1_Mapping_POP[CHANNEL_NUM][16] ={
    //CH-A
	{
		0, 1, 2, 3, 4, 5, 6, 7,
		8, 9, 10, 11, 12, 13, 14, 15
	},
    //CH-B
    //CH-C
    //CH-D
};

//O1 DRAM->APHY
const U8 PCDDR3_O1_Mapping_POP_DSC[CHANNEL_NUM][16] ={
    //CH-A
	{
		0, 1, 2, 3, 4, 5, 6, 7,
		8, 9, 10, 11, 12, 13, 14, 15
    },
    //CH-B
    //CH-C
    //CH-D
};


#if (fcFOR_CHIP_ID == fcGriffin)
/* To process per-bit related mapping. Use DRAM DQ as index to find APHY bit */
const U8 pcddr4_dq_dramc2phy_mapping[CHANNEL_NUM][DQ_DATA_WIDTH] = {
    {
		12, 13, 14, 15, 9, 3, 11, 1,
		10, 0, 4, 5, 6, 2, 8, 7,
    },
#if CHANNEL_NUM > 1
    //RSV
#endif
};

const U8 pcddr3_dq_dramc2phy_mapping_DSC[CHANNEL_NUM][DQ_DATA_WIDTH] = {
    {
        12, 13, 14, 15, 11, 1, 9, 3,
		7, 4, 2, 8, 0, 6, 5, 10,
    },
#if CHANNEL_NUM > 1
    //RSV
#endif
};

const U8 pcddr3_dq_dramc2phy_mapping_KGD[CHANNEL_NUM][DQ_DATA_WIDTH] = {
    {
        7, 4, 5, 6, 0, 3, 2, 1,
		15, 12, 14, 13, 9, 10, 8, 11,
    },
#if CHANNEL_NUM > 1
    //RSV
#endif
};

U8 const * get_dq_dramc2phy_mapping(DRAMC_CTX_T *p)
{
    if (is_ddr4_family(p))
        return pcddr4_dq_dramc2phy_mapping[p->channel];
    else if (is_ddr3_family(p)) {
        if (p->DRAMPinmux == PINMUX_DSC)
            return pcddr3_dq_dramc2phy_mapping_DSC[p->channel];
        else
            return pcddr3_dq_dramc2phy_mapping_KGD[p->channel];
    } else {
        mcSHOW_ERR_MSG("%s: Invalid dram type!!\n", __func__);
        ASSERT(0);
    }
	return NULL;
}
#endif

void vBeforeCalibration(DRAMC_CTX_T *p)
{
    U8 rank, rank_bak;

	EnableDramcPhyDCM(p, DCM_OFF); //Let CLK always free-run


#if DDR_ENABLE_RX_DVS_CAL || DDR_ENABLE_RX_TRACKING
    DramcRxInputDelayTrackingInit_byFreq(p);
#endif

    DramcHWGatingOnOff(p, 0); //disable gating tracking

    CKEFixOnOff(p, CKE_WRITE_TO_ALL_RANK, CKE_FIXON, CKE_WRITE_TO_ALL_CHANNEL); //Let CLK always on during calibration

#if ENABLE_TMRRI_NEW_MODE
    SetCKE2RankIndependent(p); //CKE should be controlled independently
#endif

    //WDBI-OFF
    vIO32WriteFldAlign_All(DRAMC_REG_SHU_TX_SET0, 0x0, SHU_TX_SET0_DBIWR);

#ifdef DDR_IMPEDANCE_TRACKING_ENABLE
    // set correct setting to control IMPCAL HW Tracking in shuffle RG
    // if p->freq >= 1333, enable IMP HW tracking(SHU_DRVING1_DIS_IMPCAL_HW=0), else SHU_DRVING1_DIS_IMPCAL_HW = 1
    U8 u1DisImpHw;
    U32 u4TermFreq;

    if (is_ddr4_family(p))
        u4TermFreq = DDR4_MRFSP_TERM_FREQ;

    u1DisImpHw = is_ddr3_family(p) ? 1 : ((p->frequency >= u4TermFreq) ? 0 : 1);

    vIO32WriteFldMulti_All(DDRPHY_REG_MISC_SHU_IMPEDAMCE_UPD_DIS1, 
        P_Fld(0x1, MISC_SHU_IMPEDAMCE_UPD_DIS1_ODTN_UPD_DIS) |
        P_Fld(0x1, MISC_SHU_IMPEDAMCE_UPD_DIS1_DRVN_UPD_DIS) |
        P_Fld(u1DisImpHw, MISC_SHU_IMPEDAMCE_UPD_DIS1_DRVP_UPD_DIS) |
        P_Fld(0x1, MISC_SHU_IMPEDAMCE_UPD_DIS1_CMD2_ODTN_UPD_DIS) |
        P_Fld(0x1, MISC_SHU_IMPEDAMCE_UPD_DIS1_CMD2_DRVN_UPD_DIS) |
        P_Fld(0x1, MISC_SHU_IMPEDAMCE_UPD_DIS1_CMD2_DRVP_UPD_DIS) |
        P_Fld(0x1, MISC_SHU_IMPEDAMCE_UPD_DIS1_CMD1_ODTN_UPD_DIS) |
        P_Fld(0x1, MISC_SHU_IMPEDAMCE_UPD_DIS1_CMD1_DRVN_UPD_DIS) |
        P_Fld(0x1, MISC_SHU_IMPEDAMCE_UPD_DIS1_CMD1_DRVP_UPD_DIS) |
        P_Fld(0x1, MISC_SHU_IMPEDAMCE_UPD_DIS1_CS_ODTN_UPD_DIS) |
        P_Fld(0x1, MISC_SHU_IMPEDAMCE_UPD_DIS1_CS_DRVN_UPD_DIS) |
        P_Fld(0x1, MISC_SHU_IMPEDAMCE_UPD_DIS1_CS_DRVP_UPD_DIS) |
        P_Fld(0x1, MISC_SHU_IMPEDAMCE_UPD_DIS1_WCK_ODTN_UPD_DIS) |
        P_Fld(0x1, MISC_SHU_IMPEDAMCE_UPD_DIS1_WCK_DRVN_UPD_DIS) |
        P_Fld(0x1, MISC_SHU_IMPEDAMCE_UPD_DIS1_WCK_DRVP_UPD_DIS) |
        P_Fld(u1DisImpHw, MISC_SHU_IMPEDAMCE_UPD_DIS1_DQ_ODTN_UPD_DIS) |
        P_Fld(u1DisImpHw, MISC_SHU_IMPEDAMCE_UPD_DIS1_DQ_DRVN_UPD_DIS) |
        P_Fld(u1DisImpHw, MISC_SHU_IMPEDAMCE_UPD_DIS1_DQ_DRVP_UPD_DIS) |
        P_Fld(u1DisImpHw, MISC_SHU_IMPEDAMCE_UPD_DIS1_DQS_ODTN_UPD_DIS) |
        P_Fld(u1DisImpHw, MISC_SHU_IMPEDAMCE_UPD_DIS1_DQS_DRVN_UPD_DIS) |
        P_Fld(u1DisImpHw, MISC_SHU_IMPEDAMCE_UPD_DIS1_DQS_DRVP_UPD_DIS));
    vIO32WriteFldMulti_All(DDRPHY_REG_SHU_MISC_SW_IMPCAL,
        P_Fld(u1DisImpHw, SHU_MISC_SW_IMPCAL_DQ1_DRVN_UPD_DIS) |
        P_Fld(u1DisImpHw, SHU_MISC_SW_IMPCAL_DQ1_DRVP_UPD_DIS) |
        P_Fld(u1DisImpHw, SHU_MISC_SW_IMPCAL_DQ1_ODTN_UPD_DIS) |
        P_Fld(u1DisImpHw, SHU_MISC_SW_IMPCAL_DQS1_DRVN_UPD_DIS) |
        P_Fld(u1DisImpHw, SHU_MISC_SW_IMPCAL_DQS1_DRVP_UPD_DIS) |
        P_Fld(u1DisImpHw, SHU_MISC_SW_IMPCAL_DQS1_ODTN_UPD_DIS) |
        P_Fld(1, SHU_MISC_SW_IMPCAL_DQ2_DRVN_UPD_DIS) |
        P_Fld(1, SHU_MISC_SW_IMPCAL_DQ2_DRVP_UPD_DIS) |
        P_Fld(1, SHU_MISC_SW_IMPCAL_DQ2_ODTN_UPD_DIS) |
        P_Fld(1, SHU_MISC_SW_IMPCAL_DQS2_DRVN_UPD_DIS) |
        P_Fld(1, SHU_MISC_SW_IMPCAL_DQS2_DRVP_UPD_DIS) |
        P_Fld(1, SHU_MISC_SW_IMPCAL_DQS2_ODTN_UPD_DIS));
    vIO32WriteFldAlign_All(DDRPHY_REG_SHU_MISC_IMPCAL1, (u1DisImpHw? 0x0:0x40), SHU_MISC_IMPCAL1_IMPCALCNT);

    vIO32WriteFldAlign_All(DDRPHY_REG_SHU_MISC_DRVING1, u1DisImpHw, SHU_MISC_DRVING1_DIS_IMPCAL_HW);
    vIO32WriteFldAlign_All(DDRPHY_REG_SHU_MISC_DRVING1, 0x1, SHU_MISC_DRVING1_DIS_IMP_ODTN_TRACK);
    vIO32WriteFldAlign_All(DDRPHY_REG_SHU_MISC_DRVING2, 0x1, SHU_MISC_DRVING2_DIS_IMPCAL_ODT_EN);
    vIO32WriteFldAlign_All(DDRPHY_REG_SHU_CA_CMD12, u1DisImpHw, SHU_CA_CMD12_RG_RIMP_UNTERM_EN);
#endif

    vIO32WriteFldMulti_All(DDRPHY_REG_MISC_CLK_CTRL, P_Fld(1, MISC_CLK_CTRL_DVFS_CLK_MEM_SEL)
                                                    | P_Fld(1, MISC_CLK_CTRL_DVFS_MEM_CK_MUX_UPDATE_EN));


	vIO32WriteFldMulti_All(DRAMC_REG_SHU_ZQ_SET0,
			P_Fld(0x1ff, SHU_ZQ_SET0_ZQCSCNT) | //Every refresh number to issue ZQCS commands, only for DDR3/LPDDR2/LPDDR3/LPDDR4
			P_Fld(0x1b, SHU_ZQ_SET0_TZQLAT));
    vIO32WriteFldAlign_All(DRAMC_REG_ZQ_SET1, 0x20, ZQ_SET1_HWZQ_RTSWCMD_CNT); /* cc add for MP */

    if (p->support_channel_num == CHANNEL_SINGLE)
    {
        //single channel, ZQCSDUAL=0, ZQCSMASK=0
        vIO32WriteFldMulti(DRAMC_REG_ADDR(DRAMC_REG_ZQ_SET0), P_Fld(0, ZQ_SET0_ZQCSDUAL) | P_Fld(0x0, ZQ_SET0_ZQCSMASK));
    }
    else if (p->support_channel_num == CHANNEL_DUAL)
    {
        // HW ZQ command is channel interleaving since 2 channel share the same ZQ pin.
        #ifdef DDR_ZQCS_ENABLE
        // dual channel, ZQCSDUAL =1, and CHA ZQCSMASK=0, CHB ZQCSMASK=1

        vIO32WriteFldMulti_All(DRAMC_REG_ZQ_SET0, P_Fld(1, ZQ_SET0_ZQCSDUAL) |
                                               P_Fld(0, ZQ_SET0_ZQCSMASK_OPT) |
                                               P_Fld(0, ZQ_SET0_ZQMASK_CGAR) |
                                               P_Fld(0, ZQ_SET0_ZQCS_MASK_SEL_CGAR));

        // DRAMC CHA(CHN0):ZQCSMASK=1, DRAMC CHB(CHN1):ZQCSMASK=0.
        // ZQCSMASK setting: (Ch A, Ch B) = (1,0) or (0,1)
        // if CHA.ZQCSMASK=1, and then set CHA.ZQCALDISB=1 first, else set CHB.ZQCALDISB=1 first
        channel_backup_and_set(p, CHANNEL_A);
        vIO32WriteFldAlign(DRAMC_REG_ZQ_SET0, 1, ZQ_SET0_ZQCSMASK);
        channel_set(p, CHANNEL_B);
        vIO32WriteFldAlign(DRAMC_REG_ZQ_SET0, 0, ZQ_SET0_ZQCSMASK);
        channel_restore(p);

        // DRAMC CHA(CHN0):ZQ_SET0_ZQCS_MASK_SEL=0, DRAMC CHB(CHN1):ZQ_SET0_ZQCS_MASK_SEL=0.
        vIO32WriteFldAlign_All(DRAMC_REG_ZQ_SET0, 0, ZQ_SET0_ZQCS_MASK_SEL);
        #endif
    }
#if (CHANNEL_NUM > 2)
    else if (p->support_channel_num == CHANNEL_FOURTH)
    {
        // HW ZQ command is channel interleaving since 2 channel share the same ZQ pin.
        #ifdef DDR_ZQCS_ENABLE
        // dual channel, ZQCSDUAL =1, and CHA ZQCSMASK=0, CHB ZQCSMASK=1

        vIO32WriteFldMulti_All(DRAMC_REG_ZQ_SET0, P_Fld(1, ZQ_SET0_ZQCSDUAL) |
                                               P_Fld(0, ZQ_SET0_ZQCALL) |
                                               P_Fld(0, ZQ_SET0_ZQ_SRF_OPT) |
                                               P_Fld(0, ZQ_SET0_ZQCSMASK_OPT) |
                                               P_Fld(0, ZQ_SET0_ZQMASK_CGAR) |
                                               P_Fld(0, ZQ_SET0_ZQCS_MASK_SEL_CGAR));

        // DRAMC CHA(CHN0):ZQCSMASK=1, DRAMC CHB(CHN1):ZQCSMASK=0.
        // ZQCSMASK setting: (Ch A, Ch C) = (1,0) or (0,1), (Ch B, Ch D) = (1,0) or (0,1)
        // if CHA.ZQCSMASK=1, and then set CHA.ZQCALDISB=1 first, else set CHB.ZQCALDISB=1 first
    #if fcFOR_CHIP_ID == fcGriffin
        channel_backup_and_set(p, CHANNEL_A);
        vIO32WriteFldAlign(DRAMC_REG_ZQ_SET0, 1, ZQ_SET0_ZQCSMASK);
        channel_set(p, CHANNEL_B);
        vIO32WriteFldAlign(DRAMC_REG_ZQ_SET0, 0, ZQ_SET0_ZQCSMASK);
        channel_set(p, CHANNEL_C);
        vIO32WriteFldAlign(DRAMC_REG_ZQ_SET0, 0, ZQ_SET0_ZQCSMASK);
        channel_set(p, CHANNEL_D);
        vIO32WriteFldAlign(DRAMC_REG_ZQ_SET0, 1, ZQ_SET0_ZQCSMASK);
        channel_restore(p);
    #endif

        // DRAMC CHA(CHN0):ZQ_SET0_ZQCS_MASK_SEL=0, DRAMC CHB(CHN1):ZQ_SET0_ZQCS_MASK_SEL=0.
        vIO32WriteFldAlign_All(DRAMC_REG_ZQ_SET0, 0, ZQ_SET0_ZQCS_MASK_SEL);
        #endif
    }
#endif

    // Set 0 to be able to adjust TX DQS/DQ/DQM PI during calibration, for new cross rank mode.
    vIO32WriteFldAlign_All(DDRPHY_REG_SHU_B0_DQ2, 0, SHU_B0_DQ2_RG_ARPI_OFFSET_LAT_EN_B0);
    vIO32WriteFldAlign_All(DDRPHY_REG_SHU_B1_DQ2, 0, SHU_B1_DQ2_RG_ARPI_OFFSET_LAT_EN_B1);
    vIO32WriteFldAlign_All(DDRPHY_REG_SHU_B2_DQ2, 0, SHU_B2_DQ2_RG_ARPI_OFFSET_LAT_EN_B2);

    vIO32WriteFldAlign_All(DRAMC_REG_DCM_SUB_CTRL, 0x0, DCM_SUB_CTRL_SUBCLK_CTRL_TX_AUTOK);

    // ARPI_DQ SW mode mux, TX DQ use 1: PHY Reg 0: DRAMC Reg
    #if ENABLE_PA_IMPRO_FOR_TX_TRACKING
    vIO32WriteFldAlign_All(DRAMC_REG_DCM_SUB_CTRL, 0, DCM_SUB_CTRL_SUBCLK_CTRL_TX_TRACKING);
    #endif
    //Disable HW MR18/19 to prevent fail case when doing SW MR18/19 in DQSOSCAuto
    vIO32WriteFldMulti_All(DRAMC_REG_DQSOSCR,
        P_Fld(0x1, DQSOSCR_DQSOSCRDIS) |
        P_Fld(0x0, DQSOSCR_TDQS2DQ_UPD_BLOCKING));

    rank_bak = p->rank;
    for (rank = RANK_0; rank < p->support_rank_num; rank++)
        vIO32WriteFldAlign_All(DRAMC_REG_RK_REF_CTRL, 0x1, RK_REF_CTRL_REFDIS); //disable ab refresh
    vSetRank(p, rank_bak);

    vIO32WriteFldAlign_All(DRAMC_REG_SHU_MATYPE, u1MaType, SHU_MATYPE_MATYPE);
    vIO32WriteFldAlign_All(DDRPHY_REG_MISC_CTRL5, 0, MISC_CTRL5_R_MBIST_HOLDB); //cc MP 

    #if __IPMv2_TO_BE_PORTING__
    TX_Path_Algorithm(p);
    #endif
}

void vAfterCalibration(DRAMC_CTX_T *p)
{
//    U8 backup_channel,u1ChannelIdx;

#if DDR_ENABLE_READ_DBI
    EnableDRAMModeRegReadDBIAfterCalibration(p);
#endif

#if __IPMv2_TO_BE_PORTING__

#if DDR_ENABLE_WRITE_DBI
    EnableDRAMModeRegWriteDBIAfterCalibration(p);
#endif
#endif

    CKEFixOnOff(p, CKE_WRITE_TO_ALL_RANK, CKE_DYNAMIC, CKE_WRITE_TO_ALL_CHANNEL); //After CKE FIX on/off, CKE should be returned to dynamic (control by HW)

    vIO32WriteFldAlign_All(DRAMC_REG_DUMMY_RD, p->support_rank_num, DUMMY_RD_RANK_NUM);

    vAutoRefreshSwitch(p, ENABLE);

	vIO32WriteFldAlign_All(DRAMC_REG_ADDR(DRAMC_REG_DQSOSCR), 0, DQSOSCR_TXUPDMODE);
}

static DRAM_STATUS_T DramcTriggerAndWait(DRAMC_CTX_T *p, REG_TRANSFER_T TriggerReg, REG_TRANSFER_T RepondsReg)
{
//    U32 u4TimeCnt = TIME_OUT_CNT;
    // @Darren, Rx HW AutoK simulation time
    // RX delay all range -511~255, step:4,DDR800semi + TEST2_OFF=0x100 => 8661us/per rank
    // RX delay all range -327~252, step:8,DDR800semi, TEST2_OFF=0x100 => 3276us/per rank
    U32 u4TimeCnt = DDR_HW_AUTOK_POLLING_CNT;
    DRAM_STATUS_T u4RespFlag = 0;

    vIO32WriteFldAlign(DRAMC_REG_ADDR(TriggerReg.u4Addr), 0, TriggerReg.u4Fld); // Init EN status
    vIO32WriteFldAlign(DRAMC_REG_ADDR(TriggerReg.u4Addr), 1, TriggerReg.u4Fld);
    do
    {
        u4RespFlag = u4IO32ReadFldAlign(DRAMC_REG_ADDR(RepondsReg.u4Addr), RepondsReg.u4Fld);
        u4TimeCnt --;
        mcDELAY_US(1);
    }while ((u4RespFlag == 0) && (u4TimeCnt > 0));

    if (u4TimeCnt == 0)//time out
    {
        mcSHOW_DBG_MSG("[DramcTriggerAndWait] Wait 0x%x respond fail (time out)\n", RepondsReg.u4Addr);
        return DRAM_FAIL;
    }

    return DRAM_OK;
}

void O1PathOnOff(DRAMC_CTX_T *p, U8 u1OnOff)
{

    U8 u1VrefSel;

    if (u1OnOff == ON)
    {
        // These RG will be restored when leaving each calibration flow
        // -------------------------------------------------------
        // VREF_UNTERM_EN
        vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_B0_VREF), 1, SHU_B0_VREF_RG_RX_ARDQ_VREF_UNTERM_EN_B0);
        vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_B1_VREF), 1, SHU_B1_VREF_RG_RX_ARDQ_VREF_UNTERM_EN_B1);

#if (fcFOR_CHIP_ID == fcGriffin)
        vIO32WriteFldAlign_All(DRAMC_REG_ADDR(DDRPHY_REG_MISC_RX_IN_GATE_EN_CTRL),1, MISC_RX_IN_GATE_EN_CTRL_RX_IN_GATE_EN_OPT);
        vIO32WriteFldAlign_All(DRAMC_REG_ADDR(DDRPHY_REG_MISC_RX_IN_BUFF_EN_CTRL),1, MISC_RX_IN_BUFF_EN_CTRL_RX_IN_BUFF_EN_OPT);
#else
        vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_MISC_RX_IN_GATE_EN_CTRL),1, MISC_RX_IN_GATE_EN_CTRL_RX_IN_GATE_EN_OPT);
        vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_MISC_RX_IN_BUFF_EN_CTRL),1, MISC_RX_IN_BUFF_EN_CTRL_RX_IN_BUFF_EN_OPT);
#endif
        u1VrefSel = 0xe;

        vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_B0_DQ5), u1VrefSel, SHU_B0_DQ5_RG_RX_ARDQ_VREF_SEL_B0);
        vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_B1_DQ5), u1VrefSel, SHU_B1_DQ5_RG_RX_ARDQ_VREF_SEL_B1);
        }

    // DQ_O1 enable/release
    // -------------------------------------------------------
    // Actually this RG naming is O1_EN in APHY
    vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_B0_DQ6), u1OnOff, B0_DQ6_RG_RX_ARDQ_O1_SEL_B0);
    vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_B0_DQ6), u1OnOff, B0_DQ6_RG_RX_ARDQ_BIAS_PS_B0);
    vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_B1_DQ6), u1OnOff, B1_DQ6_RG_RX_ARDQ_O1_SEL_B1);
    vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_B1_DQ6), u1OnOff, B1_DQ6_RG_RX_ARDQ_BIAS_PS_B1);

    // DQ_IN_BUFF_EN
    vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_B0_DQ3),
                        P_Fld(u1OnOff, B0_DQ3_RG_RX_ARDQ_SMT_EN_B0) |
                        P_Fld(u1OnOff, B0_DQ3_RG_RX_ARDQ_IN_BUFF_EN_B0) |
                        P_Fld(u1OnOff, B0_DQ3_RG_RX_ARDQS0_IN_BUFF_EN_B0));
    vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_B1_DQ3),
                        P_Fld(u1OnOff, B1_DQ3_RG_RX_ARDQ_SMT_EN_B1) |
                        P_Fld(u1OnOff, B1_DQ3_RG_RX_ARDQ_IN_BUFF_EN_B1) |
                        P_Fld(u1OnOff, B1_DQ3_RG_RX_ARDQS0_IN_BUFF_EN_B1));

    // DQ_BUFF_EN_SEL
    vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_B0_PHY3), u1OnOff, B0_PHY3_RG_RX_ARDQ_BUFF_EN_SEL_B0);
    vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_B1_PHY3), u1OnOff, B1_PHY3_RG_RX_ARDQ_BUFF_EN_SEL_B1);

    // Gating always ON
#if (fcFOR_CHIP_ID == fcGriffin)
    vIO32WriteFldAlign_All(DRAMC_REG_ADDR(DDRPHY_REG_MISC_RX_IN_GATE_EN_CTRL),(u1OnOff << 1) | u1OnOff, MISC_RX_IN_GATE_EN_CTRL_FIX_IN_GATE_EN);
#else
    vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_MISC_RX_IN_GATE_EN_CTRL),(u1OnOff << 1) | u1OnOff, MISC_RX_IN_GATE_EN_CTRL_FIX_IN_GATE_EN);
#endif
    mcDELAY_US(1);
}


U8 u1IsPhaseMode(DRAMC_CTX_T *p)
{
    if ((vGet_DDR_Loop_Mode(p) == OPEN_LOOP_MODE) || (vGet_DDR_Loop_Mode(p) == SEMI_OPEN_LOOP_MODE))
        return TRUE;
    else // DDR800_CLOSE_LOOP and NORMAL_CLOSE_LOOP
        return FALSE;
}

DRAM_STATUS_T DramcZQCalibration(DRAMC_CTX_T * p,U8 rank)
{
	RTSWCMD_PARAM_T param;

	memset((void *)&param, 0, sizeof param);

	param.rank = rank;
	param.selector = RUNTIME_SWCMD_ZQCL;

	DramcTriggerRTSWCMD(p, &param);
	vSetCalibrationResult(p, DRAM_CALIBRATION_ZQ, DRAM_OK);

	return DRAM_OK;
}

//-------------------------------------------------------------------------
/** DramcWriteLeveling
 *  start Write Leveling Calibration.
 *  @param p                Pointer of context created by DramcCtxCreate.
 *  @param  apply           (U8): 0 don't apply the register we set  1 apply the register we set ,default don't apply.
 *  @retval status          (DRAM_STATUS_T): DRAM_OK or DRAM_FAIL
 */
//-------------------------------------------------------------------------
#define WRITE_LEVELING_MOVD_DQS 1//UI

U8 u1MCK2UI_DivShift(DRAMC_CTX_T *p)
{
    if (vGet_Div_Mode(p) == DIV4_MODE)
    {
        return MCK_TO_4UI_SHIFT;
    }
    else
    {
        return MCK_TO_8UI_SHIFT;
    }
}

static DRAM_STATUS_T ExecuteMoveDramCDelay(DRAMC_CTX_T *p,
                                                    REG_TRANSFER_T ui_reg,
                                                    REG_TRANSFER_T mck_reg,
                                                    S8 iShiftUI)
{
    S32 s4HighLevelDelay, s4DelaySum;
    U32 u4TmpUI, u4TmpMCK;
    U8 ucDataRateDivShift = 0;
    DRAM_STATUS_T MoveResult;

    ucDataRateDivShift = u1MCK2UI_DivShift(p);

    u4TmpUI = u4IO32ReadFldAlign(DRAMC_REG_ADDR(ui_reg.u4Addr), ui_reg.u4Fld) & (~(1 << ucDataRateDivShift));
    u4TmpMCK = u4IO32ReadFldAlign(DRAMC_REG_ADDR(mck_reg.u4Addr), mck_reg.u4Fld);
    //mcSHOW_DBG_MSG("Base:  u4TmpMCK:%d,  u4TmpUI: %d,\n", u4TmpMCK, u4TmpUI);

    s4HighLevelDelay = (u4TmpMCK << ucDataRateDivShift) + u4TmpUI;
    s4DelaySum = (s4HighLevelDelay + iShiftUI);

    if (s4DelaySum < 0)
    {
        u4TmpUI = 0;
        u4TmpMCK = 0;
        MoveResult = DRAM_FAIL;
    }
    else
    {
        u4TmpMCK = s4DelaySum >> ucDataRateDivShift;
        u4TmpUI = s4DelaySum - (u4TmpMCK << ucDataRateDivShift);
        MoveResult = DRAM_OK;
    }

    vIO32WriteFldAlign(DRAMC_REG_ADDR(ui_reg.u4Addr), u4TmpUI, ui_reg.u4Fld);
    vIO32WriteFldAlign(DRAMC_REG_ADDR(mck_reg.u4Addr), u4TmpMCK, mck_reg.u4Fld);
    //mcSHOW_DBG_MSG("[%d]  Final ==> u4TmpMCK:%d,  u4TmpUI: %d,\n", iShiftUI, u4TmpMCK, u4TmpUI);

    return MoveResult;
}

static void _LoopAryToDelay(DRAMC_CTX_T *p,
                                  REG_TRANSFER_T *ui_reg,
                                  REG_TRANSFER_T *mck_reg,
                                  U8 u1RG_num,
                                  S8 iShiftUI,
                                  BYTES_T eByteIdx)
{
    U8 idx = 0, step = 1;
    if (eByteIdx == BYTE_0)
    {
        idx = 0;
        step = 2;
    }
    else if (eByteIdx == BYTE_1)
    {
        idx = 1;
        step = 2;
    }

    for (; idx < u1RG_num; idx += step)
    {
        ExecuteMoveDramCDelay(p, ui_reg[idx], mck_reg[idx], iShiftUI);
    }
}

void PCDDR_ShiftDQSUI(DRAMC_CTX_T *p, S8 iShiftUI, BYTES_T eByteIdx)
{
    // DQS / DQS_OEN
    REG_TRANSFER_T TransferUIRegs[]  = {{DRAMC_REG_SHURK_SELPH_DQS1, SHURK_SELPH_DQS1_DLY_DQS0},        // Byte0
                                        {DRAMC_REG_SHURK_SELPH_DQS1, SHURK_SELPH_DQS1_DLY_DQS1}};       // Byte1
    REG_TRANSFER_T TransferMCKRegs[] = {{DRAMC_REG_SHURK_SELPH_DQS0, SHURK_SELPH_DQS0_TXDLY_DQS0},
                                        {DRAMC_REG_SHURK_SELPH_DQS0, SHURK_SELPH_DQS0_TXDLY_DQS1}};

    _LoopAryToDelay(p, TransferUIRegs, TransferMCKRegs,
                       sizeof(TransferUIRegs) / sizeof(REG_TRANSFER_T),
                       iShiftUI, eByteIdx);
}

void PCDDR_ShiftDQS_OENUI(DRAMC_CTX_T *p, S8 iShiftUI, BYTES_T eByteIdx)
{
    // DQS / DQS_OEN
    REG_TRANSFER_T TransferUIRegs[]  = {{DRAMC_REG_SHURK_SELPH_DQS1, SHURK_SELPH_DQS1_DLY_OEN_DQS0},    // Byte0
                                        {DRAMC_REG_SHURK_SELPH_DQS1, SHURK_SELPH_DQS1_DLY_OEN_DQS1}};   // Byte1
    REG_TRANSFER_T TransferMCKRegs[] = {{DRAMC_REG_SHURK_SELPH_DQS0, SHURK_SELPH_DQS0_TXDLY_OEN_DQS0},
                                        {DRAMC_REG_SHURK_SELPH_DQS0, SHURK_SELPH_DQS0_TXDLY_OEN_DQS1}};

    _LoopAryToDelay(p, TransferUIRegs, TransferMCKRegs,
                       sizeof(TransferUIRegs) / sizeof(REG_TRANSFER_T),
                       iShiftUI, eByteIdx);
}

void ShiftDQUI(DRAMC_CTX_T *p, S8 iShiftUI, BYTES_T eByteIdx)
{
    // Shift DQ / DQM / DQ_OEN / DQM_OEN
    REG_TRANSFER_T TransferUIRegs[]  = {{DRAMC_REG_SHURK_SELPH_DQ3, SHURK_SELPH_DQ3_DLY_DQM0},      // Byte0
                                        {DRAMC_REG_SHURK_SELPH_DQ3, SHURK_SELPH_DQ3_DLY_DQM1},      // Byte1
                                        {DRAMC_REG_SHURK_SELPH_DQ2, SHURK_SELPH_DQ2_DLY_DQ0},       // Byte0
                                        {DRAMC_REG_SHURK_SELPH_DQ2, SHURK_SELPH_DQ2_DLY_DQ1}};  // Byte1
    REG_TRANSFER_T TransferMCKRegs[] = {{DRAMC_REG_SHURK_SELPH_DQ1, SHURK_SELPH_DQ1_TXDLY_DQM0},
                                        {DRAMC_REG_SHURK_SELPH_DQ1, SHURK_SELPH_DQ1_TXDLY_DQM1},
                                        {DRAMC_REG_SHURK_SELPH_DQ0, SHURK_SELPH_DQ0_TXDLY_DQ0},
                                        {DRAMC_REG_SHURK_SELPH_DQ0, SHURK_SELPH_DQ0_TXDLY_DQ1}};

    _LoopAryToDelay(p, TransferUIRegs, TransferMCKRegs,
                    sizeof(TransferUIRegs) / sizeof(REG_TRANSFER_T),
                    iShiftUI, eByteIdx);
}

void ShiftDQUI_AllRK(DRAMC_CTX_T *p, S8 iShiftUI, BYTES_T eByteIdx)
{
    U8 backup_rank, rk_i;
    backup_rank = u1GetRank(p);

    // Shift DQ / DQM / DQ_OEN / DQM_OEN
    for (rk_i = RANK_0; rk_i < p->support_rank_num; rk_i++)
    {
        vSetRank(p, rk_i);
        ShiftDQUI(p, iShiftUI, eByteIdx);
    }
    vSetRank(p, backup_rank);
}

void ShiftDQ_OENUI(DRAMC_CTX_T *p, S8 iShiftUI, BYTES_T eByteIdx)
{
    REG_TRANSFER_T TransferUIRegs[]  = {{DRAMC_REG_SHURK_SELPH_DQ3, SHURK_SELPH_DQ3_DLY_OEN_DQM0},  // Byte0
                                        {DRAMC_REG_SHURK_SELPH_DQ3, SHURK_SELPH_DQ3_DLY_OEN_DQM1},  // Byte1
                                        {DRAMC_REG_SHURK_SELPH_DQ2, SHURK_SELPH_DQ2_DLY_OEN_DQ0},   // Byte0
                                        {DRAMC_REG_SHURK_SELPH_DQ2, SHURK_SELPH_DQ2_DLY_OEN_DQ1}};  // Byte1
    REG_TRANSFER_T TransferMCKRegs[] = {{DRAMC_REG_SHURK_SELPH_DQ1, SHURK_SELPH_DQ1_TXDLY_OEN_DQM0},
                                        {DRAMC_REG_SHURK_SELPH_DQ1, SHURK_SELPH_DQ1_TXDLY_OEN_DQM1},
                                        {DRAMC_REG_SHURK_SELPH_DQ0, SHURK_SELPH_DQ0_TXDLY_OEN_DQ0},
                                        {DRAMC_REG_SHURK_SELPH_DQ0, SHURK_SELPH_DQ0_TXDLY_OEN_DQ1}};

    _LoopAryToDelay(p, TransferUIRegs, TransferMCKRegs,
                    sizeof(TransferUIRegs) / sizeof(REG_TRANSFER_T),
                    iShiftUI, eByteIdx);
}

void ShiftDQ_OENUI_AllRK(DRAMC_CTX_T *p, S8 iShiftUI, BYTES_T eByteIdx)
{
    U8 backup_rank, rk_i;
    backup_rank = u1GetRank(p);

    // Shift DQ / DQM / DQ_OEN / DQM_OEN
    for (rk_i = RANK_0; rk_i < p->support_rank_num; rk_i++)
    {
        vSetRank(p, rk_i);
        ShiftDQ_OENUI(p, iShiftUI, eByteIdx);
    }
    vSetRank(p, backup_rank);
}

void ShiftDQSWCK_UI(DRAMC_CTX_T *p, S8 iShiftUI, BYTES_T eByteIdx)
{
    PCDDR_ShiftDQSUI(p, iShiftUI, eByteIdx);
    PCDDR_ShiftDQS_OENUI(p, iShiftUI, eByteIdx);
}


//static void vSetDramMRWriteLevelingOnOff(DRAMC_CTX_T *p, U8 u1OnOff)
void vSetDramMRWriteLevelingOnOff(DRAMC_CTX_T *p, U8 u1OnOff)
{
    if (u1OnOff)
        gMRVal[p->channel][p->rank].mr01 |= 0x80;//bit 7 =1
    else
        gMRVal[p->channel][p->rank].mr01 &= 0xff7f;

    if (is_ddr3_family(p)) {
        DramcModeRegWriteByRank(p, p->rank, 1,gMRVal[p->channel][p->rank].mr01);
    } else if(is_ddr4_family(p)) {
        /* Disable Dynamic ODT before entering write leveling */
        if (u1OnOff == ENABLE)
            DramcModeRegWriteByRank(p, p->rank, 2, gMRVal[p->channel][p->rank].mr02 & ~(0X7 << 9));

        DramcModeRegWriteByRank(p, p->rank, 1,gMRVal[p->channel][p->rank].mr01);

        if (u1OnOff == DISABLE)
            DramcModeRegWriteByRank(p, p->rank, 2, gMRVal[p->channel][p->rank].mr02); /* Restore MR2 */
    }
}


#if (DDR_ENABLE_WRITE_LEVELING_CAL == 1)
#define DQPI_PER_UI (32)
#define STORAGED_DLY_UNIT (24)
#define CBT_WLEV_AUTOK_PI_RANGE_MAX 192
static void WriteLevelingScanRange_PI(DRAMC_CTX_T *p, S32 *ps4DlyBegin, S32 *ps4DlyEnd, U8 *pu1PIStep, S16 *pPI_bound, WLEV_DELAY_BASED_T stDelayBase)
{
    S32 s4DlyBegin = 0, s4DlyEnd;
    U8 u1PIStep;
    S16 PI_bound;

    if (stDelayBase == PI_BASED)
    {
        // Giving PI scan range
        s4DlyBegin = WRITE_LEVELING_MOVD_DQS * 32 - MAX_CLK_PI_DELAY - 1;
        s4DlyEnd = s4DlyBegin + 192;

        if ((vGet_DDR_Loop_Mode(p) == OPEN_LOOP_MODE))
        {
            u1PIStep = 16;
            PI_bound = 32;
        }
        else if ((vGet_DDR_Loop_Mode(p) == SEMI_OPEN_LOOP_MODE))
        {
            u1PIStep = 8;
            PI_bound = 32;
        }
        else
        {
            u1PIStep = 1;
            PI_bound = 64;
        }
    }
    else // stDelayBase == DLY_BASED
    {
        // Giving delay cell scan range
        s4DlyBegin = 0;
        s4DlyEnd = 2 * STORAGED_DLY_UNIT;

        u1PIStep = 1;    // One step is 1/4 delay cell
        PI_bound = 1024; // No bounadary as delay cell based
    }
    mcSHOW_DBG_MSG2("Begin: %d, End: %d, Step: %d, Bound: %d\n", s4DlyBegin, s4DlyEnd, u1PIStep, PI_bound);

    *ps4DlyBegin = s4DlyBegin;
    *ps4DlyEnd = s4DlyEnd;
    *pu1PIStep = u1PIStep;
    *pPI_bound = PI_bound;

}

#if ENABLE_WDQS_MODE_2
DRAM_STATUS_T WriteLevelingPosCal(DRAMC_CTX_T *p, WLEV_DELAY_BASED_T stDelayBase)
{
    DRAM_RANK_T backup_rank = u1GetRank(p);
    U8 wrlevel_dqs_delay[DQS_NUMBER] = {0};
    U8 rank_i = 0;

    if((wrlevel_dqs_final_delay[RANK_0][0] - wrlevel_dqs_final_delay[RANK_1][0])>=9 ||
        (wrlevel_dqs_final_delay[RANK_0][0] - wrlevel_dqs_final_delay[RANK_1][0])<=-9 ||
        (wrlevel_dqs_final_delay[RANK_0][1] - wrlevel_dqs_final_delay[RANK_1][1])>=9 ||
        (wrlevel_dqs_final_delay[RANK_0][1] - wrlevel_dqs_final_delay[RANK_1][1])<=-9 )
    {
        mcSHOW_ERR_MSG("[WARNING] Larger WL R2R !!\n");
        #if CHECK_HQA_CRITERIA
        while(1);
        #endif
    }

    wrlevel_dqs_delay[0] = (wrlevel_dqs_final_delay[RANK_0][0] + wrlevel_dqs_final_delay[RANK_1][0]) >> 1;
    wrlevel_dqs_delay[1] = (wrlevel_dqs_final_delay[RANK_0][1] + wrlevel_dqs_final_delay[RANK_1][1]) >> 1;

    wrlevel_dqs_final_delay[RANK_0][0] = wrlevel_dqs_final_delay[RANK_1][0] = wrlevel_dqs_delay[0];
    wrlevel_dqs_final_delay[RANK_0][1] = wrlevel_dqs_final_delay[RANK_1][1] = wrlevel_dqs_delay[1];

    for (rank_i = p->rank; rank_i < p->support_rank_num; rank_i++)
    {
        vSetRank(p, rank_i);

        // set to best values for  DQS
        if (stDelayBase == PI_BASED)
        {
            // Adjust DQS output delay.
            vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B0_DQ), wrlevel_dqs_delay[0], SHU_RK_B0_DQ_ARPI_PBYTE_B0);
            vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B1_DQ), wrlevel_dqs_delay[1], SHU_RK_B1_DQ_ARPI_PBYTE_B1);
        }
        else // stDelayBase == DLY_BASED
        {
            vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B0_TXDLY3), wrlevel_dqs_delay[0], SHU_RK_B0_TXDLY3_TX_ARWCK_DLY_B0);
            vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B1_TXDLY3), wrlevel_dqs_delay[1], SHU_RK_B1_TXDLY3_TX_ARWCK_DLY_B1);
            vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B0_TXDLY3), wrlevel_dqs_delay[0], SHU_RK_B0_TXDLY3_TX_ARWCKB_DLY_B0);
            vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B1_TXDLY3), wrlevel_dqs_delay[1], SHU_RK_B1_TXDLY3_TX_ARWCKB_DLY_B1);
        }
    }

    vSetRank(p, backup_rank);

    mcSHOW_DBG_MSG("[WriteLevelingPosCal] DQS PI B0/B1 = %d/%d\n", wrlevel_dqs_delay[0], wrlevel_dqs_delay[1]);
}
#endif
static void vSetWlevDly(DRAMC_CTX_T *p, U16 u2Delay, BYTES_T eByteIdx)
{
    if ((eByteIdx== BYTE_0)||(eByteIdx== ALL_BYTES))
    {
        vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B0_DQ), u2Delay, SHU_RK_B0_DQ_ARPI_PBYTE_B0);
    }
    if ((eByteIdx== BYTE_1)||(eByteIdx== ALL_BYTES))
    {
        vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B1_DQ), u2Delay, SHU_RK_B1_DQ_ARPI_PBYTE_B1);   
    }
}
static void cbt_wlev_autok_raw_data(DRAMC_CTX_T *p, u32 *bitmap, u8 steps, u8 pin)
{
	bitmap[0] = u4IO32Read4B(DRAMC_REG_ADDR(DRAMC_REG_CBT_WLEV_ATK_RESULT0 + pin * 8));

	if (steps > 32) {
		bitmap[1] = u4IO32Read4B(DRAMC_REG_ADDR(DRAMC_REG_CBT_WLEV_ATK_RESULT1 + pin * 8));
	} else {
		bitmap[1] = 0xFFFFFFFF;
	}
}

static void cbt_wlev_autok_max_pass_win(DRAMC_CTX_T *p, u32 *bitmap, u8 pin)
{
	switch (pin & 0x3)
	{
		case 0:
			bitmap[0] = u4IO32ReadFldAlign(DRAMC_REG_ADDR(DRAMC_REG_CBT_WLEV_ATK2_RESULT2 + (pin & 0x4)),
					CBT_WLEV_ATK2_RESULT2_CBT_WLEV_ATK_MAX_PW_INIT_BIT0);
			bitmap[1] = u4IO32ReadFldAlign(DRAMC_REG_ADDR(DRAMC_REG_CBT_WLEV_ATK2_RESULT0 + (pin & 0x4)),
					CBT_WLEV_ATK2_RESULT0_CBT_WLEV_ATK_MAX_PW_LEN_BIT0);
			break;
		case 1:
			bitmap[0] = u4IO32ReadFldAlign(DRAMC_REG_ADDR(DRAMC_REG_CBT_WLEV_ATK2_RESULT2 + (pin & 0x4)),
					CBT_WLEV_ATK2_RESULT2_CBT_WLEV_ATK_MAX_PW_INIT_BIT1);
			bitmap[1] = u4IO32ReadFldAlign(DRAMC_REG_ADDR(DRAMC_REG_CBT_WLEV_ATK2_RESULT0 + (pin & 0x4)),
					CBT_WLEV_ATK2_RESULT0_CBT_WLEV_ATK_MAX_PW_LEN_BIT1);
			break;
		case 2:
			bitmap[0] = u4IO32ReadFldAlign(DRAMC_REG_ADDR(DRAMC_REG_CBT_WLEV_ATK2_RESULT2 + (pin & 0x4)),
					CBT_WLEV_ATK2_RESULT2_CBT_WLEV_ATK_MAX_PW_INIT_BIT2);
			bitmap[1] = u4IO32ReadFldAlign(DRAMC_REG_ADDR(DRAMC_REG_CBT_WLEV_ATK2_RESULT0 + (pin & 0x4)),
					CBT_WLEV_ATK2_RESULT0_CBT_WLEV_ATK_MAX_PW_LEN_BIT2);
			break;
		case 3:
			bitmap[0] = u4IO32ReadFldAlign(DRAMC_REG_ADDR(DRAMC_REG_CBT_WLEV_ATK2_RESULT2 + (pin & 0x4)),
					CBT_WLEV_ATK2_RESULT2_CBT_WLEV_ATK_MAX_PW_INIT_BIT3);
			bitmap[1] = u4IO32ReadFldAlign(DRAMC_REG_ADDR(DRAMC_REG_CBT_WLEV_ATK2_RESULT0 + (pin & 0x4)),
					CBT_WLEV_ATK2_RESULT0_CBT_WLEV_ATK_MAX_PW_LEN_BIT3);
			break;
	}
}

static int cbt_wlev_train_autok(DRAMC_CTX_T *p, ATUOK_MODE_T autok_mode,
	    u8 initpi, u8 lenpi, u8 respi, u8 pin_num)
{
//	u8 sweep_max_cnt, i;
	u32 cnt, ready;

	/*
	* it's takes 3.6us for one step.
	* max times is 64, about 3.6 * 64 = 231us
	*/
	if(autok_mode != AUTOK_DQS)
	{
        ASSERT(autok_mode == AUTOK_DQS);
        mcSHOW_DBG_MSG("DDR4 only support AUTOK_DQS\n");
	}
	cnt = TIME_OUT_CNT * 3;

    vIO32WriteFldMulti(DRAMC_REG_ADDR(DRAMC_REG_CBT_WLEV_ATK_CTRL1),
		P_Fld(1, CBT_WLEV_ATK_CTRL1_CBT_WLEV_ATK_EBEN) |
		P_Fld(WLEV_AUTOK_EB_THRESHOLD >> respi, CBT_WLEV_ATK_CTRL1_CBT_WLEV_ATK_EBLEN) |
		P_Fld(0, CBT_WLEV_ATK_CTRL1_CBT_ATK_CA1UI64PI) |
		P_Fld(0, CBT_WLEV_ATK_CTRL1_UIDQS_SW));

	vIO32WriteFldMulti(DRAMC_REG_ADDR(DRAMC_REG_CBT_WLEV_ATK_CTRL0),
			P_Fld(0, CBT_WLEV_ATK_CTRL0_ARPIDQS_SW));

	vIO32WriteFldMulti(DRAMC_REG_ADDR(DRAMC_REG_CBT_WLEV_ATK_CTRL0),
			P_Fld(0x3, CBT_WLEV_ATK_CTRL0_CBT_WLEV_ATK_INTV));

	vIO32WriteFldMulti(DRAMC_REG_ADDR(DRAMC_REG_CBT_WLEV_ATK_CTRL0),
			P_Fld(lenpi, CBT_WLEV_ATK_CTRL0_CBT_WLEV_ATK_LENPI) |
			P_Fld(respi, CBT_WLEV_ATK_CTRL0_CBT_WLEV_ATK_RESPI) |
			P_Fld(initpi, CBT_WLEV_ATK_CTRL0_CBT_WLEV_ATK_INITPI));

	vIO32WriteFldMulti(DRAMC_REG_ADDR(DRAMC_REG_CBT_WLEV_ATK_CTRL0),
		P_Fld(1, CBT_WLEV_ATK_CTRL0_WLEV_ATKEN));
	do {
		ready = u4IO32ReadFldAlign(DRAMC_REG_ADDR(DRAMC_REG_CBT_WLEV_STATUS),
				CBT_WLEV_STATUS_CBT_WLEV_ATK_RESPONSE);
		cnt--;
		mcDELAY_US(1);
	}
	#if FOR_DV_SIMULATION_USED
	while (ready == 0);
	#else
	while ((ready == 0) && (cnt > 0));
	#endif

	if (cnt == 0){
		mcSHOW_DBG_MSG("[cbt_autok] Resp fail (time out) for DQSTrain\n");
	}

	vIO32WriteFldMulti(DRAMC_REG_ADDR(DRAMC_REG_CBT_WLEV_ATK_CTRL0),
			P_Fld(1, CBT_WLEV_ATK_CTRL0_ARPIDQS_SW));
	vIO32WriteFldMulti(DRAMC_REG_ADDR(DRAMC_REG_CBT_WLEV_ATK_CTRL1),
			P_Fld(0xF, CBT_WLEV_ATK_CTRL1_UIDQS_SW));
	vIO32WriteFldMulti(DRAMC_REG_ADDR(DRAMC_REG_CBT_WLEV_ATK_CTRL0),
			P_Fld(0, CBT_WLEV_ATK_CTRL0_WLEV_ATKEN));

	return 0;
}

static u8 get_wlev_autok_sweep_max_cnt(DRAMC_CTX_T *p, u8 pi_dly, u8 lenpi, u8 p2u)
{
	u8 max;

	if (u1IsPhaseMode(p) == TRUE)
	{
		/* for OPEN/SEMI, ATK can not cross UI */
		max = p2u;
		max -= pi_dly;
	}
	else
	{
		/* step count, which includes start point */
		max = CBT_WLEV_AUTOK_PI_RANGE_MAX + 1;
	}

	return (lenpi > max) ? max : lenpi;
}
static DRAM_STATUS_T WLEV_autok_result(DRAMC_CTX_T *p,
	u8 u1Initpi, u8 u1Lenpi, u8 u1Respi,
	S32 *wrlevel_dqs_delay, u8 pin_num)
{
	DRAM_STATUS_T KResult = DRAM_FAIL;
	U8 n, use_rawdata, u1DoneFlg;
	U8 u1Delay, u1CmpResult_tmp, u1Status;
	U32 bitmap[2];

#if IPM_VERSION >= 21
	use_rawdata = 0;
#else
	use_rawdata = 1;
#endif


	if (use_rawdata==0)
	{
		mcSHOW_DBG_MSG2("PASS WINDOW MODE\n");
		for (n = 0;n < pin_num;n++)
		{
			cbt_wlev_autok_max_pass_win(p, bitmap, n); //bitmap[o]: pw_init, bitmap[1]: pw_len
			wrlevel_dqs_delay[n] = u1Initpi + bitmap[0] * (1<<u1Respi);
			mcSHOW_DBG_MSG2("PIN%d: u1Initpi = 0x%x, bitmap[0] = 0x%x, bitmap[1] = 0x%x\n", n, u1Initpi, bitmap[0], bitmap[1]);
			if ((bitmap[0] != 0) || (bitmap[1] != 0))
			{
				KResult = DRAM_OK;
			}
		}
	}
	else
	{
		mcSHOW_DBG_MSG2("RAW DATA MODE\n");
		if ((p->data_width == DATA_WIDTH_16BIT))
		 	u1DoneFlg = 0xfc;
		 else
		 	u1DoneFlg = 0xf0;

		for (n = 0;n < pin_num;n++)
		{
			cbt_wlev_autok_raw_data(p, bitmap, u1Lenpi, n); //bitmap[o]: delay=0~31, bitmap[1]: delay=32~63
			mcSHOW_DBG_MSG2("PIN%d: bitmap0 = 0x%x, bitmap1 = 0x%x\n", n, bitmap[0], bitmap[1]);

			for (u1Delay = 0; u1Delay < 64; u1Delay++)
			{
				if (u1Delay < 32)
				{
					u1CmpResult_tmp = (bitmap[0]>>u1Delay)&0x1;
				}
				else
				{
					u1CmpResult_tmp = (bitmap[1]>>(u1Delay-32))&0x1;
				}

				mcSHOW_DBG_MSG2("u1Delay %d=> Pin %d: %d  \n", u1Initpi+(u1Delay<<u1Respi), n, u1CmpResult_tmp);

				if ((u1Status == 0) && (u1CmpResult_tmp == 0))
				{
					u1Status = 1;
				}
				else if ((u1Status >= 1) && (u1CmpResult_tmp == 0))
				{
					u1Status = 1;
				}
				else if ((u1Status >= 1) && (u1CmpResult_tmp != 0))
				{
					u1Status++;
				}

				#if ENABLE_DDR800_OPEN_LOOP_MODE_OPTION
				if (u1IsPhaseMode(p) == TRUE)
				{
					if(u1Status == 2)
					{
						wrlevel_dqs_delay[n] = u1Delay; //45 degree = 8*PI
						u1DoneFlg |= (0x01 << n);
					}
				}
				else
				#endif
				{
					if (((u1Status <<u1Respi) > WLEV_PASS_CRITERIA) || ((u1Delay == 63) && (u1Status > 1)))
					{
						wrlevel_dqs_delay[n] = u1Delay - ((u1Status - 1) <<u1Respi);
						u1DoneFlg |= (0x01 << n);
					}
				}
			}
		}
		if (u1DoneFlg == 0xff) // all bytes are done
		{
			KResult = DRAM_OK;
		}
	}

	return KResult;
}

AUTOK_PI_RESOLUTION get_wlev_autok_respi(DRAMC_CTX_T *p, U8 u1StepSize)
{
    U8 u1Respi=0, u1ShiftIdx;

    for (u1ShiftIdx = 0; u1ShiftIdx < AUTOK_RESPI_MAX; u1ShiftIdx++)
    {
        if ((0x1<<u1ShiftIdx) == u1StepSize)
        {
            u1Respi = u1ShiftIdx;
            break;
        }
    }

    mcSHOW_DBG_MSG2("WL_AUTOK_RESPI = %d (stepsize = %d)\n", u1Respi, u1StepSize);
    return u1Respi;
}
static DRAM_STATUS_T WLEV_AUTOK(DRAMC_CTX_T *p, U16 s4DlyBegin, U16 s4DlyEnd, U8 u1PIStep, S32 PI_bound, S32* wrlevel_dqs_delay, WLEV_DELAY_BASED_T stDelayBase)
{
    DRAM_STATUS_T KResult = DRAM_FAIL;
    U8 u1ByteIdx, u1AutoKType = AUTOK_DQS;
    U8 u1Respi = get_wlev_autok_respi(p, u1PIStep);
    U8 u1Initpi = s4DlyBegin;
    U8 u1DelayStep = get_wlev_autok_sweep_max_cnt(p, u1Initpi, s4DlyEnd - s4DlyBegin + 1, 32);
    U8 u1Lenpi = (u1DelayStep- 1) >> u1Respi;
    S32 wrlevel_dqs_delay_temp;

    mcSHOW_DBG_MSG2("\n **wlev autok: initpi: %02d, respi: %02d, u1lenpi : %02d, u1AutoKType : %02d-->\n", u1Initpi, u1Respi, u1Lenpi, u1AutoKType);
    mcSHOW_DBG_MSG2("\n **wlev autok: s4DlyBegin: %02d, s4DlyEnd: %02d, u1DelayStep : %02d\n", s4DlyBegin, s4DlyEnd, u1DelayStep);

    cbt_wlev_train_autok(p, u1AutoKType, u1Initpi, u1Lenpi, u1Respi, DQS_NUMBER);

    KResult = WLEV_autok_result(p, u1Initpi, u1Lenpi, u1Respi, wrlevel_dqs_delay, DQS_NUMBER);

#if (fcFOR_CHIP_ID == fcGriffin)
    if (0) //NeedByteSwap(p))
    {
        wrlevel_dqs_delay_temp = wrlevel_dqs_delay[BYTE_0];
        wrlevel_dqs_delay[BYTE_0] = wrlevel_dqs_delay[BYTE_1];
        wrlevel_dqs_delay[BYTE_1] = wrlevel_dqs_delay_temp;
    }
#endif

    wrlevel_dqs_final_delay[p->rank][BYTE_0] = wrlevel_dqs_delay[BYTE_0];
    wrlevel_dqs_final_delay[p->rank][BYTE_1] = wrlevel_dqs_delay[BYTE_1];

    for (u1ByteIdx = 0; u1ByteIdx < DQS_NUMBER; u1ByteIdx++)
    {
        mcSHOW_DBG_MSG("PIN %d: final delay = %d\n", u1ByteIdx, wrlevel_dqs_final_delay[p->rank][u1ByteIdx]);
    }

    return KResult;
}

static U8 WLEV_SWK(DRAMC_CTX_T *p, S32 s4DlyBegin, S32 s4DlyEnd, U8 u1PIStep, U16 PI_bound, WLEV_DELAY_BASED_T stDelayBase)
{
#if (SET_PATTERN_MANUALLY_FOR_DEBUG == 1)
    U32 u4dq_o1 = 0;
#endif
	U32 ucDoneFlg;
    U8 byte_i;
    U8 ucHW_cmp_raw_data =0, u1OverBoundCnt = 0;
    U8 uccmp_result[DQS_NUMBER], ucStatus[DQS_NUMBER];
    DRAM_STATUS_T KResult = DRAM_FAIL;
    S32 iDelay;
    if(stDelayBase == DLY_BASED)
    {
        mcSHOW_DBG_MSG2("DDR4 don't support DLY_BASED\n");
        ASSERT(stDelayBase == PI_BASED);
    }
    for (byte_i = 0; byte_i < (S32)(p->data_width / DQS_BIT_NUMBER); byte_i++)
    {
        ucStatus[byte_i] = 0;
        wrlevel_dqs_final_delay[p->rank][byte_i] = 0;
    }

    if ((p->data_width == DATA_WIDTH_16BIT))
        ucDoneFlg = 0xfc;
    else
        ucDoneFlg = 0xf0;

    mcSHOW_DBG_MSG2("[Write Leveling]\n");
    mcSHOW_DBG_MSG2("delay  byte0  byte1  byte2  byte3\n\n");

    for (iDelay = s4DlyBegin; iDelay <= s4DlyEnd; iDelay += u1PIStep)
    {
        u8 u1dly = iDelay;
        if (stDelayBase == PI_BASED)
        {
            u1dly = u1dly % PI_bound;
            if ((iDelay / PI_bound) == (u1OverBoundCnt + 1))
            {
                u1OverBoundCnt++;
                //_bitmap_stored_num = 0;
                ShiftDQSWCK_UI(p, PI_bound / DQPI_PER_UI, ALL_BYTES);
            }
        }
    
        ucHW_cmp_raw_data = 0;
        if (iDelay <= 0)
        {
            // Adjust Clk output delay.
            vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_CA_CMD), -iDelay, SHU_RK_CA_CMD_RG_ARPI_CLK);
        }
        else
        {
            vSetWlevDly(p, iDelay, ALL_BYTES);
        }
        #ifdef ETT_PRINT_FORMAT
            mcSHOW_DBG_MSG2("  %d    ", iDelay);
        #else
            mcSHOW_DBG_MSG2("  %2d    ", iDelay);
        #endif
#if (SET_PATTERN_MANUALLY_FOR_DEBUG == 1)
        //Trigger DQS pulse
        vIO32WriteFldAlign(DRAMC_REG_ADDR(DRAMC_REG_CBT_WLEV_CTRL0), 1, CBT_WLEV_CTRL0_CBT_WLEV_DQS_TRIG);
        vIO32WriteFldAlign(DRAMC_REG_ADDR(DRAMC_REG_CBT_WLEV_CTRL0), 0, CBT_WLEV_CTRL0_CBT_WLEV_DQS_TRIG);

        // Wait tWLO (20ns) before receiving result, especially in DV SIM.
        mcDELAY_US(1);

        //Read DQ_O1 from register
        u4dq_o1 = u4IO32ReadFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_MISC_DQO1), MISC_DQO1_DQO1_RO);
        mcSHOW_DBG_MSG2("  O1(%X)  ", u4dq_o1);
        for (byte_i = 0; byte_i < (p->data_width / DQS_BIT_NUMBER); byte_i++)
        {
            ucHW_cmp_raw_data |= (U8)((u4dq_o1 >> byte_i * 8) & 0x1) << byte_i;
        }
#else
        // Trigger and wait
        REG_TRANSFER_T TriggerReg = {DRAMC_REG_CBT_WLEV_CTRL0, CBT_WLEV_CTRL0_WLEV_DQSPATEN};
        REG_TRANSFER_T RepondsReg = {DRAMC_REG_CBT_WLEV_STATUS, CBT_WLEV_STATUS_WLEV_CMP_CPT};
        KResult = DramcTriggerAndWait(p, TriggerReg, RepondsReg);

        // Read result
        ucHW_cmp_raw_data = u4IO32ReadFldAlign(DRAMC_REG_ADDR(DRAMC_REG_CBT_WLEV_STATUS), CBT_WLEV_STATUS_WLEV_CMP_ERR);

        vIO32WriteFldAlign(DRAMC_REG_ADDR(DRAMC_REG_CBT_WLEV_CTRL0), 0, CBT_WLEV_CTRL0_WLEV_DQSPATEN);
#endif // SET_PATTERN_MANUALLY_FOR_DEBUG
#if WLEV_O1_PINMUX_WORKAROUND
        // Swap the SW K result of byte0/byte1
        U8 B1B0 = 0;
        (ucHW_cmp_raw_data & 0x1)? (B1B0 |= 0x2): (B1B0 &= ~0x2);
        (ucHW_cmp_raw_data & 0x2)? (B1B0 |= 0x1): (B1B0 &= ~0x1);
        ucHW_cmp_raw_data = B1B0;
#endif

        for (byte_i = 0; byte_i < (p->data_width / DQS_BIT_NUMBER); byte_i++)
        {
             uccmp_result[byte_i] = (U8)((ucHW_cmp_raw_data >> byte_i) & 0x1);
        
             mcSHOW_DBG_MSG2("  %x   ", uccmp_result[byte_i]);
        
             if ((ucStatus[byte_i] == 0) && (uccmp_result[byte_i] == 0))
             {
                 ucStatus[byte_i] = 1;
             }
             else if ((ucStatus[byte_i] >= 1) && (uccmp_result[byte_i] == 0))
             {
                 ucStatus[byte_i] = 1;
             }
             else if ((ucStatus[byte_i] >= 1) && (uccmp_result[byte_i] != 0))
             {
                 ucStatus[byte_i]++;
             }
        
             if ((ucDoneFlg & (0x01 << byte_i)) == 0)// result not found of byte yet
             {
                 if ((ucStatus[byte_i] * u1PIStep > 7) || ((iDelay == s4DlyEnd - 1) && (ucStatus[byte_i] > 1)))
                 {
                     wrlevel_dqs_final_delay[p->rank][byte_i] = iDelay - (ucStatus[byte_i] - 2) * u1PIStep;
                     ucDoneFlg |= (0x01 << byte_i);
                     //mcSHOW_DBG_MSG("(record %d) ", wrlevel_dqs_final_delay[p->rank][byte_i]);
                 }
             }
        }
        mcSHOW_DBG_MSG2("\n");
        if (ucDoneFlg == 0xff)
        {
            mcSHOW_DBG_MSG2(" Early break \n");
            break;
        }
    }
    if (u1OverBoundCnt > 0)
        ShiftDQSWCK_UI(p, -u1OverBoundCnt * (PI_bound / DQPI_PER_UI), ALL_BYTES);

    if (ucDoneFlg == 0xff)
    {
        // all bytes are done
        fgwrlevel_done = 1;
        KResult = DRAM_OK;
    }
    else
    {
        KResult = DRAM_FAIL;
        #if __FLASH_TOOL_DA__
        PINInfo_flashtool.WL_ERR_FLAG|=(0x1<<(p->channel*2+p->rank));
        #endif
    }
    mcSHOW_DBG_MSG2("pass bytecount = 0x%x (0xff: all bytes pass) \n\n", ucDoneFlg);
    return KResult;
}

#define SET_PATTERN_MANUALLY_FOR_DEBUG 0

DRAM_STATUS_T DramcWriteLeveling(DRAMC_CTX_T *p, u8 isAutoK, WLEV_DELAY_BASED_T stDelayBase)
{
// Note that below procedure is based on "ODT off"
    DRAM_STATUS_T KResult = DRAM_FAIL;

//    U8 *uiLPDDR_O1_Mapping = NULL;
    U32 u4value = 0;	// u4value1 = 0, u4dq_o1 = 0, u4dq_o1_tmp[DQS_NUMBER];
    U8 byte_i, rank_i;
#if (SUPPORT_SAVE_TIME_FOR_CALIBRATION && BYPASS_WRITELEVELING)
	U8 ucDoneFlg;
#endif
//    S32 iDelay, ClockDelayMax;
//    U8 ucStatus[DQS_NUMBER], ucdq_o1[DQS_NUMBER], ucdq_o1_shift[DQS_NUMBER];
//    U8 ucHW_cmp_raw_data, uccmp_result[DQS_NUMBER];
    DRAM_RANK_T backup_rank;

    S32 wrlevel_dq_delay_pi[DQS_NUMBER]; // 3 is channel number
    S32 wrlevel_dqs_delay[DQS_NUMBER]; // 3 is channel number
//    u8 idx = 0;
//    U16 _bitmap_stored_num = 0;

    S32 s4DlyBegin, s4DlyEnd;
    U8 u1PIStep;
//    U8 jj = 0, u1OverBoundCnt = 0;
    S16 PI_bound = 64;

    // error handling
    if (!p)
    {
        mcSHOW_ERR_MSG("context NULL\n");
        return DRAM_FAIL;
    }

    mcDUMP_REG_MSG("\n[dumpRG] DramcWriteLeveling \n");
#if VENDER_JV_LOG
        vPrintCalibrationBasicInfo_ForJV(p);
#else
        vPrintCalibrationBasicInfo(p);
#endif

    fgwrlevel_done = 0;
    backup_rank = u1GetRank(p);

    //DramcRankSwap(p, p->rank);
    //tx_rank_sel is selected by SW //Lewis@20180604: tx_rank_sel is selected by SW in WL if TMRRI design has changed.
    vIO32WriteFldAlign(DRAMC_REG_ADDR(DRAMC_REG_TX_SET0), p->rank, TX_SET0_TXRANK);
    vIO32WriteFldAlign(DRAMC_REG_ADDR(DRAMC_REG_TX_SET0), 1, TX_SET0_TXRANKFIX); //TXRANKFIX should be write after TXRANK

    // backup mode settings
    U32 u4RegBackupAddress[] =
    {
        (MIX_RG_CHECK(DRAMC_REG_CBT_WLEV_CTRL0)),
        (MIX_RG_CHECK(DRAMC_REG_CBT_WLEV_CTRL1)),
        (MIX_RG_CHECK(DRAMC_REG_CBT_WLEV_CTRL3)),
        (MIX_RG_CHECK(DRAMC_REG_CBT_WLEV_CTRL5)),
        (MIX_RG_CHECK(DDRPHY_REG_SHU_B0_VREF)),
        (MIX_RG_CHECK(DDRPHY_REG_SHU_B1_VREF)),
        (MIX_RG_CHECK(DDRPHY_REG_B0_DQ5)),           //in O1PathOnOff()
        (MIX_RG_CHECK(DDRPHY_REG_B1_DQ5)),           //in O1PathOnOff()
        (MIX_RG_CHECK(DRAMC_REG_DRAMC_PD_CTRL)),
    };

    U32 u4MixRgBackupAddress[][2] = {
        {DDRPHY_REG_MISC_RX_IN_GATE_EN_CTRL, MISC_RX_IN_GATE_EN_CTRL_RX_IN_GATE_EN_OPT},
        {DDRPHY_REG_MISC_RX_IN_GATE_EN_CTRL, MISC_RX_IN_GATE_EN_CTRL_FIX_IN_GATE_EN},
        {DDRPHY_REG_MISC_RX_IN_BUFF_EN_CTRL, MISC_RX_IN_BUFF_EN_CTRL_RX_IN_BUFF_EN_OPT},
    };
    DramcBackupRegisters(p, u4RegBackupAddress, sizeof(u4RegBackupAddress) / sizeof(U32), TO_ONE_CHANNEL);
    DramcBackupMixedRG(p, u4MixRgBackupAddress, ARRAY_SIZE(u4MixRgBackupAddress), TO_ALL_CHANNEL);

    //default set DRAM FAIL
    vSetCalibrationResult(p, DRAM_CALIBRATION_WRITE_LEVEL_PI, DRAM_FAIL);

#if MRW_CHECK_ONLY
    mcSHOW_MRW_MSG("\n==[MR Dump] %s==\n", __func__);
#endif

    if (p->isWLevInitShift[p->channel] == FALSE)
    {
        U8 u1DQSAdditionUI = 0;

    #if DDR_CS_DELAY_1T
        u1DQSAdditionUI = 2;
    #endif
        // It must be PI_BASED or FAIL!!
        ASSERT(stDelayBase == PI_BASED);

        p->isWLevInitShift[p->channel] = TRUE;

        // This flow would be excuted just one time, so all ranks(maybe rank0/1) should be adjusted at once.
        ShiftDQUI_AllRK(p, u1DQSAdditionUI -WRITE_LEVELING_MOVD_DQS, ALL_BYTES);
        ShiftDQ_OENUI_AllRK(p, u1DQSAdditionUI -WRITE_LEVELING_MOVD_DQS, ALL_BYTES);
        ShiftDQSWCK_UI(p, u1DQSAdditionUI -WRITE_LEVELING_MOVD_DQS, ALL_BYTES);
        // Set DQS PI-based delay to 0
        vSetWlevDly(p, 0, ALL_BYTES);
    }

    // decide algorithm parameters according to freq.(PI mode/ phase mode)
    WriteLevelingScanRange_PI(p, &s4DlyBegin, &s4DlyEnd, &u1PIStep, &PI_bound, stDelayBase);

#if FOR_DV_SIMULATION_USED
    u1PIStep = 2;
#endif

    // Not support autok to delay cell based mode.
    if (stDelayBase == DLY_BASED)
        isAutoK = FALSE;


#if (SUPPORT_SAVE_TIME_FOR_CALIBRATION && BYPASS_WRITELEVELING)
    if (p->femmc_Ready == 1)
    {
        wrlevel_dqs_final_delay[p->rank][0] = p->pSavetimeData->u1WriteLeveling_bypass_Save[p->channel][p->rank][0];
        wrlevel_dqs_final_delay[p->rank][1] = p->pSavetimeData->u1WriteLeveling_bypass_Save[p->channel][p->rank][1];

        ucDoneFlg = 0xff;
        KResult = DRAM_OK;
        vSetCalibrationResult(p, DRAM_CALIBRATION_WRITE_LEVEL_PI, DRAM_FAST_K);
    }
    else
#endif
    {
        // free-run dramc/ddrphy clk (DCMEN2=0, MIOCKCTRLOFF=1, PHYCLKDYNGEN=0, COMBCLKCTRL=0)
        // free-run dram clk(APHYCKCG_FIXOFF =1, TCKFIXON=1)
        vIO32WriteFldMulti(DRAMC_REG_ADDR(DRAMC_REG_DRAMC_PD_CTRL),
            P_Fld(0, DRAMC_PD_CTRL_DCMEN2) |
            P_Fld(1, DRAMC_PD_CTRL_MIOCKCTRLOFF) |
            P_Fld(0, DRAMC_PD_CTRL_PHYCLKDYNGEN) |
            P_Fld(0, DRAMC_PD_CTRL_COMBCLKCTRL) |
            P_Fld(1, DRAMC_PD_CTRL_APHYCKCG_FIXOFF) |
            P_Fld(1, DRAMC_PD_CTRL_TCKFIXON));

        //Make CKE fixed at 1 (Don't enter power down, Put this before issuing MRS): CKEFIXON = 1
        CKEFixOnOff(p, p->rank, CKE_FIXON, TO_ONE_CHANNEL);

        //PHY RX Setting for Write Leveling
        //Let IO to O1 path valid, Enable SMT_EN
        O1PathOnOff(p, ON);

        // Set for auto gen WCK pattern
        //set_cbt_wlev_intv(p);//ddr3 4,porting later

        //Write leveling enable ON, WRITE_LEVEL_EN should be set before MRW-WL-mode fire, or the tWLDQSEN will violate spec
        vIO32WriteFldAlign(DRAMC_REG_ADDR(DRAMC_REG_CBT_WLEV_CTRL0), 1, CBT_WLEV_CTRL0_WRITE_LEVEL_EN);

        // Issue MR to enable dram write leveling mode
        vSetDramMRWriteLevelingOnOff(p, ENABLE);

        //wait tWLDQSEN after enabling write leveling mode
        mcDELAY_US(1);
#if 1
            // Adjust MCK number to generate 8 WCK pulse
        vIO32WriteFldAlign(DRAMC_REG_ADDR(DRAMC_REG_CBT_WLEV_CTRL3), 0xa, CBT_WLEV_CTRL3_DQSBX_G);
		//vIO32WriteFldAlign(DRAMC_REG_ADDR(DRAMC_REG_CBT_WLEV_CTRL3), 0x0, CBT_WLEV_CTRL3_DQSBY_G);
        // Issue MR to enable dram write leveling mode
        //wait tWLDQSEN after enabling write leveling mode
        // Set write leveling pattern
#else
       if(p->pDFSTable->divmode ==DIV4_MODE ){//ddr3 ok
               vIO32WriteFldAlign(DRAMC_REG_ADDR(DRAMC_REG_CBT_WLEV_CTRL3), 0xa, CBT_WLEV_CTRL3_DQSBX_G);

               vIO32WriteFldAlign(DRAMC_REG_ADDR(DRAMC_REG_CBT_WLEV_CTRL3), 0xa, CBT_WLEV_CTRL3_DQSBY_G);

            }
       if(p->pDFSTable->divmode==DIV8_MODE){
                // Used by auto generate pattern
               vIO32WriteFldAlign(DRAMC_REG_ADDR(DRAMC_REG_CBT_WLEV_CTRL3), 0xa, CBT_WLEV_CTRL3_DQSBX_G);

               vIO32WriteFldAlign(DRAMC_REG_ADDR(DRAMC_REG_CBT_WLEV_CTRL3), 0xa, CBT_WLEV_CTRL3_DQSBY_G);
                // Used by auto generate pattern
               vIO32WriteFldAlign(DRAMC_REG_ADDR(DRAMC_REG_CBT_WLEV_CTRL3), 0xa, CBT_WLEV_CTRL3_DQSBX1_G);

               vIO32WriteFldAlign(DRAMC_REG_ADDR(DRAMC_REG_CBT_WLEV_CTRL3), 0xa, CBT_WLEV_CTRL3_DQSBY1_G);
            }
#endif

        vIO32WriteFldAlign(DRAMC_REG_ADDR(DRAMC_REG_CBT_WLEV_CTRL0), 1, CBT_WLEV_CTRL0_CBTMASKDQSOE);

        // select DQS
        u4value = 0x3;//select byte 0/1, it means DQS 0/1
        vIO32WriteFldAlign(DRAMC_REG_ADDR(DRAMC_REG_CBT_WLEV_CTRL0), u4value, CBT_WLEV_CTRL0_CBT_WLEV_DQS_SEL);

        // wait tWLMRD (40 nCL / 40 ns) before DQS pulse (DDR3 / LPDDR3)
        mcDELAY_US(1);

        //Proceed write leveling...
        //Initilize sw parameters
//        ClockDelayMax = MAX_TX_DQSDLY_TAPS;
        if(isAutoK == AUTOK_OFF)
        {
            KResult = WLEV_SWK(p, s4DlyBegin, s4DlyEnd, u1PIStep, PI_bound, PI_BASED);
        }
        else
        {
            KResult = WLEV_AUTOK(p, s4DlyBegin, s4DlyEnd, u1PIStep, PI_bound, wrlevel_dqs_delay, PI_BASED);
        }
    }
    vSetCalibrationResult(p, DRAM_CALIBRATION_WRITE_LEVEL_PI, KResult);
#if defined(FOR_HQA_TEST_USED) && defined(FOR_HQA_REPORT_USED)
    if (gHQALog_flag == 1)
    {
        for (byte_i = 0; byte_i < (p->data_width / DQS_BIT_NUMBER); byte_i++)
        {
            HQA_Log_Message_for_Report(p, p->channel, p->rank, HQA_REPORT_FORMAT1, "", "WriteLeveling_DQS", byte_i, wrlevel_dqs_final_delay[p->rank][byte_i], NULL);
        }
    }
#endif

#if SUPPORT_SAVE_TIME_FOR_CALIBRATION
    if (p->femmc_Ready == 0)
    {
		p->pSavetimeData->u1WriteLeveling_bypass_Save[p->channel][p->rank][0] = wrlevel_dqs_final_delay[p->rank][0];
		p->pSavetimeData->u1WriteLeveling_bypass_Save[p->channel][p->rank][1] = wrlevel_dqs_final_delay[p->rank][1];
    }
#endif

    vSetDramMRWriteLevelingOnOff(p, DISABLE); // Disable DDR write leveling mode:  issue MR2[7] to enable write leveling
    // Write leveling enable OFF
    vIO32WriteFldAlign(DRAMC_REG_ADDR(DRAMC_REG_CBT_WLEV_CTRL0), 0, CBT_WLEV_CTRL0_WRITE_LEVEL_EN);
    //Disable DQ_O1, SELO1ASO=0 for power saving
    O1PathOnOff(p, OFF);
    //tx_rank_sel is selected by HW
    vIO32WriteFldAlign(DRAMC_REG_ADDR(DRAMC_REG_TX_SET0), 0, TX_SET0_TXRANK);
    vIO32WriteFldAlign(DRAMC_REG_ADDR(DRAMC_REG_TX_SET0), 0, TX_SET0_TXRANKFIX); //TXRANKFIX should be write after TXRANK
    //restore registers.
    
    DramcRestoreRegisters(p, u4RegBackupAddress, sizeof(u4RegBackupAddress) / sizeof(U32), TO_ONE_CHANNEL);
    DramcRestoreMixedRG(p, u4MixRgBackupAddress, ARRAY_SIZE(u4MixRgBackupAddress), TO_ALL_CHANNEL);

    // Calculate DQS "PI" delay, nothing to do with delay cell
    for (byte_i = 0; byte_i < (p->data_width / DQS_BIT_NUMBER); byte_i++)
    {
        mcSHOW_DBG_MSG("Write leveling (Byte %d): %d", byte_i, wrlevel_dqs_final_delay[p->rank][byte_i]);
        mcDUMP_REG_MSG("Write leveling (Byte %d): %d", byte_i, wrlevel_dqs_final_delay[p->rank][byte_i]);
        if (wrlevel_dqs_final_delay[p->rank][byte_i] >= PI_bound)
        {
            ShiftDQSWCK_UI(p, (wrlevel_dqs_final_delay[p->rank][byte_i] / PI_bound) * (PI_bound / DQPI_PER_UI), byte_i);
            wrlevel_dqs_final_delay[p->rank][byte_i] %= PI_bound;
        }
        wrlevel_dqs_delay[byte_i] = wrlevel_dqs_final_delay[p->rank][byte_i];
        mcSHOW_DBG_MSG(" => %d\n", wrlevel_dqs_delay[byte_i]);
        mcDUMP_REG_MSG(" => %d\n", wrlevel_dqs_delay[byte_i]);
#if __SLT__
        mcSHOW_PARSER_MSG(("[%d Mbps][CH%d][RK%d][WL]DQS%d dly: %d\n",p->frequency*2, p->channel,p->rank,byte_i, wrlevel_dqs_delay[byte_i]));
#endif
    }

    for (byte_i = 0; byte_i < (p->data_width / DQS_BIT_NUMBER); byte_i++)
    {
        wrlevel_dq_delay_pi[byte_i] = wrlevel_dqs_delay[byte_i] + 0x10;
        if (wrlevel_dq_delay_pi[byte_i] >= 0x40)
        {
            wrlevel_dq_delay_pi[byte_i] -= 0x40;
            ShiftDQUI_AllRK(p, 2, byte_i);
            ShiftDQ_OENUI_AllRK(p, 2, byte_i);
         }
    }
    for (rank_i = p->rank; rank_i < RANK_MAX; rank_i++)
    {
        vSetRank(p, rank_i);
        //updata dq
        vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B0_DQ),
                     P_Fld(wrlevel_dq_delay_pi[0], SHU_RK_B0_DQ_SW_ARPI_DQ_B0) |
                     P_Fld(wrlevel_dq_delay_pi[0], SHU_RK_B0_DQ_SW_ARPI_DQM_B0));
        vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B1_DQ),
                     P_Fld(wrlevel_dq_delay_pi[1], SHU_RK_B1_DQ_SW_ARPI_DQ_B1) |
                     P_Fld(wrlevel_dq_delay_pi[1], SHU_RK_B1_DQ_SW_ARPI_DQM_B1));

        // set to best values for  DQS
        if (stDelayBase == PI_BASED)
        {
            // Adjust DQS output delay.
            vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B0_DQ), wrlevel_dqs_delay[0], SHU_RK_B0_DQ_ARPI_PBYTE_B0);
            vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B1_DQ), wrlevel_dqs_delay[1], SHU_RK_B1_DQ_ARPI_PBYTE_B1);
        }
        else // stDelayBase == DLY_BASED
        {
            vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B0_TXDLY3), wrlevel_dqs_delay[0], SHU_RK_B0_TXDLY3_TX_ARWCK_DLY_B0);
            vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B1_TXDLY3), wrlevel_dqs_delay[1], SHU_RK_B1_TXDLY3_TX_ARWCK_DLY_B1);
            vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B0_TXDLY3), wrlevel_dqs_delay[0], SHU_RK_B0_TXDLY3_TX_ARWCKB_DLY_B0);
            vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B1_TXDLY3), wrlevel_dqs_delay[1], SHU_RK_B1_TXDLY3_TX_ARWCKB_DLY_B1);
        }
    }
    vSetRank(p, backup_rank);
    mcSHOW_DBG_MSG3("[DramcWriteLeveling] Done\n\n");
#if __SLT__
    mcSHOW_PARSER_MSG(("[DramcWriteLeveling] Done\n\n"));
#endif
    return KResult;
}
#endif //DDR_ENABLE_WRITE_LEVELING_CAL
#if 0
static void RunTime_SW_Cmd(DRAMC_CTX_T *p, RUNTIME_SWCMD_SEL_T runtime_SW_cmd_sel)
{
    U32 u4Response = 0;
    U32 u4TimeCnt = TIME_OUT_CNT;

    // Select a RT SW command
    vIO32WriteFldAlign(DRAMC_REG_ADDR(DRAMC_REG_SWCMD_EN), runtime_SW_cmd_sel, SWCMD_EN_RTSWCMD_SEL);

    // Set _CNT, _AGE, _RANK
    vIO32WriteFldAlign(DRAMC_REG_ADDR(DRAMC_REG_RTSWCMD_CNT), 0x30, RTSWCMD_CNT_RTSWCMD_CNT);
    vIO32WriteFldMulti(DRAMC_REG_ADDR(DRAMC_REG_SWCMD_CTRL2),
            P_Fld(0, SWCMD_CTRL2_RTSWCMD_AGE) |
            P_Fld(p->rank, SWCMD_CTRL2_RTSWCMD_RK));

    // If command is CAS_FS/CAS_OFF, replace RTSWCMD_RK = 2'b11.
    // Avoid this RK value at CAS_FS/CAS_OFF no match.
    if ((runtime_SW_cmd_sel == RUNTIME_SWCMD_CAS_FS) || (runtime_SW_cmd_sel == RUNTIME_SWCMD_CAS_OFF))
            vIO32WriteFldAlign(DRAMC_REG_ADDR(DRAMC_REG_SWCMD_CTRL2), 0x3, SWCMD_CTRL2_RTSWCMD_RK);

    // Trigger RT SW command
    vIO32WriteFldAlign(DRAMC_REG_ADDR(DRAMC_REG_SWCMD_EN), 1, SWCMD_EN_RTSWCMDEN);

    do
    {
        u4Response = u4IO32ReadFldAlign(DRAMC_REG_ADDR(DRAMC_REG_SPCMDRESP3), SPCMDRESP3_RTSWCMD_RESPONSE);
        u4TimeCnt --;
        mcDELAY_US(1);
    }while ((u4Response == 0) && (u4TimeCnt > 0));

    if (u4TimeCnt == 0)//time out
    {
        mcSHOW_DBG_MSG("[LP5 RT SW Cmd ] Resp fail (time out)\n");
    }

    // Release RT SW command
    vIO32WriteFldAlign(DRAMC_REG_ADDR(DRAMC_REG_SWCMD_EN), 0, SWCMD_EN_RTSWCMDEN);

}

static U8 FetchRGSettingVal(int step_val)
{
    if (step_val <= 0)
        return (U8)(-step_val);
    else
        return ((U8)step_val | 0x08);
}
#endif

void vResetDelayChainBeforeCalibration(DRAMC_CTX_T *p)
{
    U8 u1RankIdx, u1RankIdxBak;

    u1RankIdxBak = u1GetRank(p);

    for(u1RankIdx=RANK_0; u1RankIdx<RANK_MAX; u1RankIdx++)
    {
        vSetRank(p, u1RankIdx);

        // Tx CA
        vIO32Write4B(DDRPHY_REG_SHU_RK_CA_TXDLY0, 0);
        vIO32Write4B(DDRPHY_REG_SHU_RK_CA_TXDLY1, 0);
        vIO32Write4B(DDRPHY_REG_SHU_RK_B1_TXDLY0, 0);
        vIO32Write4B(DDRPHY_REG_SHU_RK_B1_TXDLY1, 0);
        vIO32Write4B(DDRPHY_REG_SHU_RK_B1_TXDLY3, 0);
        // Tx DQ
        vIO32Write4B(DDRPHY_REG_SHU_RK_B0_TXDLY0, 0);
        vIO32Write4B(DDRPHY_REG_SHU_RK_B0_TXDLY1, 0);
        vIO32Write4B(DDRPHY_REG_SHU_RK_B2_TXDLY0, 0);
        vIO32Write4B(DDRPHY_REG_SHU_RK_B2_TXDLY1, 0);
        // Tx DQM/WCK
        vIO32Write4B(DDRPHY_REG_SHU_RK_B0_TXDLY3, 0);
        vIO32Write4B(DDRPHY_REG_SHU_RK_B2_TXDLY3, 0);
    }

    vSetRank(p, u1RankIdxBak);
}


//Reset PHY to prevent glitch when change DQS gating delay or RX DQS input delay
// [Lynx] Evere_st : cannot reset single channel. All DramC and All Phy have to reset together.
void DramPhyReset(DRAMC_CTX_T *p)
{
    // Evere_st change reset order : reset DQS before DQ, move PHY reset to final.
    vIO32WriteFldAlign_All(DRAMC_REG_ADDR(DRAMC_REG_RX_SET0), 1, RX_SET0_RDATRST);// read data counter reset
    vIO32WriteFldAlign_All(DRAMC_REG_ADDR(DDRPHY_REG_MISC_CTRL1), 1, MISC_CTRL1_R_DMPHYRST);

    //RG_ARCMD_RESETB & RG_ARDQ_RESETB_B0/1 only reset once at init, Justin Chan.
    ///TODO: need to confirm RG_ARCMD_RESETB & RG_ARDQ_RESETB_B0/1 is reset at mem.c
    vIO32WriteFldMulti_All(DRAMC_REG_ADDR(DDRPHY_REG_B0_DQ9),
            P_Fld(0, B0_DQ9_RG_RX_ARDQS0_STBEN_RESETB_B0) |
            P_Fld(0, B0_DQ9_RG_RX_ARDQ_STBEN_RESETB_B0));
    vIO32WriteFldMulti_All(DRAMC_REG_ADDR(DDRPHY_REG_B1_DQ9),
            P_Fld(0, B1_DQ9_RG_RX_ARDQS0_STBEN_RESETB_B1) |
            P_Fld(0, B1_DQ9_RG_RX_ARDQ_STBEN_RESETB_B1));
    vIO32WriteFldMulti_All(DRAMC_REG_ADDR(DDRPHY_REG_B2_DQ9),
            P_Fld(0, B2_DQ9_RG_RX_ARDQS0_STBEN_RESETB_B2) |
            P_Fld(0, B2_DQ9_RG_RX_ARDQ_STBEN_RESETB_B2));
    mcDELAY_US(1);//delay 10ns
    vIO32WriteFldMulti_All(DRAMC_REG_ADDR(DDRPHY_REG_B2_DQ9),
	        P_Fld(1, B2_DQ9_RG_RX_ARDQS0_STBEN_RESETB_B2) |
	        P_Fld(1, B2_DQ9_RG_RX_ARDQ_STBEN_RESETB_B2));
    vIO32WriteFldMulti_All(DRAMC_REG_ADDR(DDRPHY_REG_B1_DQ9),
            P_Fld(1, B1_DQ9_RG_RX_ARDQS0_STBEN_RESETB_B1) |
            P_Fld(1, B1_DQ9_RG_RX_ARDQ_STBEN_RESETB_B1));
    vIO32WriteFldMulti_All(DRAMC_REG_ADDR(DDRPHY_REG_B0_DQ9),
            P_Fld(1, B0_DQ9_RG_RX_ARDQS0_STBEN_RESETB_B0) |
            P_Fld(1, B0_DQ9_RG_RX_ARDQ_STBEN_RESETB_B0));

    vIO32WriteFldAlign_All(DRAMC_REG_ADDR(DDRPHY_REG_MISC_CTRL1), 0, MISC_CTRL1_R_DMPHYRST);
    vIO32WriteFldAlign_All(DRAMC_REG_ADDR(DRAMC_REG_RX_SET0), 0, RX_SET0_RDATRST);// read data counter reset
}

#if (DDR_ENABLE_GATING_CAL == 1)
#define GATING_PATTERN_NUM          0x23
#define GATING_GOLDEND_DQSCNT_DDR4       0x2323
#define RXDQS_GATING_AUTO_DBG_REG_NUM    6
#define GATING_DDR3_WA        1
/* Preamble & Postamble setting. Currently use macro to define.
 * Later may use speed or MR setting to decide
 * !!! REVIEW !!!
 */

#if GATING_ADJUST_TXDLY_FOR_TRACKING
U8 u1TXDLY_Cal_min =0xff, u1TXDLY_Cal_max=0;
U8 ucbest_coarse_mck_backup[RANK_MAX][DQS_NUMBER];
U8 ucbest_coarse_ui_backup[RANK_MAX][DQS_NUMBER];
U8 ucbest_coarse_mck_P1_backup[RANK_MAX][DQS_NUMBER];
U8 ucbest_coarse_ui_P1_backup[RANK_MAX][DQS_NUMBER];
#endif

enum st_lead_lag {
	ST_LEAD_LAG_INVALID = 0,
	ST_LEAD_LAG_BOTH_HIGH,
	ST_LEAD_HIGH_LAG_LOW,
	ST_LEAD_LOW_LAG_HIGH,
	ST_LEAD_LAG_BOTH_LOW,
};

enum st_gating {
	ST_GATING_IDLE = 0,
	ST_GATING_SEARCH_HIGH,
	ST_GATING_SEARCH_TRANS,
	ST_GATING_SEARCH_LOW,
	ST_GATING_DONE,
};

struct rxdqs_gating_leadlag_fsm_ctrl {
	enum st_lead_lag lead_lag_cur_st[DQS_NUMBER];
	enum st_lead_lag lead_lag_lst_st[DQS_NUMBER];

	enum st_gating gating_cur_st[DQS_NUMBER];
	enum st_gating gating_nxt_st[DQS_NUMBER];
};

#if __SLT__
unsigned char dram_slt_log_enable = 0;
#endif

struct rxdqs_gating_cal {
	//U8 dqsien_dly_mck;
	U8 dqsien_dly_ui;
	U8 dqsien_dly_pi;

	//U8 dqsien_dly_mck_p1;
	//U8 dqsien_dly_ui_p1;

	U8 dqsien_pi_adj_step;

	U8 dqsien_pi_per_ui;
	//U8 dqsien_ui_per_mck;
	//U8 dqsien_freq_div;
};

struct rxdqs_gating_trans {
	U8 dqs_lead[DQS_NUMBER];
	U8 dqs_lag[DQS_NUMBER];
	U8 dqs_high[DQS_NUMBER];
#if GATING_LEADLAG_LOW_LEVEL_CHECK
    U8 dqs_low[DQS_NUMBER];
#endif
	U8 dqs_transition[DQS_NUMBER];
	U8 dqsien_dly_mck_leadlag[DQS_NUMBER];
	U8 dqsien_dly_ui_leadlag[DQS_NUMBER];
	U8 dqsien_dly_pi_leadlag[DQS_NUMBER];
    struct rxdqs_gating_leadlag_fsm_ctrl fsm;
};

struct rxdqs_gating_best_win {
	//U8 best_dqsien_dly_mck[DQS_NUMBER];
	U8 best_dqsien_dly_ui[DQS_NUMBER];
	U8 best_dqsien_dly_pi[DQS_NUMBER];
	//U8 best_dqsien_dly_mck_p1[DQS_NUMBER];
	//U8 best_dqsien_dly_ui_p1[DQS_NUMBER];
	//U8 best_dqsien_dly_pi_p1[DQS_NUMBER];
};

struct rxdqs_gating_auto_param {
	U8 early_break;
	U8 dbg_mode;

	U8 init_mck;
	U8 init_ui;
	U8 end_mck;
	U8 end_ui;
	U8 pi_offset;

	U8 burst_len;
};

#define ENABLE_GATING_AUTOK_WA          1

#if ENABLE_GATING_AUTOK_WA
U8 __wa__gating_swk_for_autok = 0;
U8 __wa__gating_autok_init_ui[RANK_MAX] = { 0 };
#endif

#ifndef MIN
#define MIN(_a, _b) ((_a) < (_b) ? (_b) : (_a))
#endif

/* Reuse DV algoritm to get Gating start pos */
U8 get_gating_start_pos(DRAMC_CTX_T *p, U8 u1AutoK)
{
	U32 ui = 0;
	U32 ui_b0, ui_b1;

	ui_b0 = u4IO32ReadFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B0_DQSIEN_DLY), SHU_RK_B0_DQSIEN_DLY_DQSIEN_UI_B0);
	ui_b1 = u4IO32ReadFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B1_DQSIEN_DLY), SHU_RK_B1_DQSIEN_DLY_DQSIEN_UI_B1);

	ui = MIN(ui_b0, ui_b1);

	if (ui >= 3) {
		ui -= 3;
	} else {
		mcSHOW_ERR_MSG("[%s] Gating UI = %d too small for calibration\n", __func__, ui);
		ASSERT(0);
	}

#if fcFOR_CHIP_ID == fcGriffin
	if (is_ddr3_family(p)) {
		if (p->frequency == DDR2133_FREQ)
			ui = 6;
		else if (p->frequency == DDR1866_FREQ)
			ui = 8;
		else {
			mcSHOW_DBG_MSG("%s: Unsupported freq\n", __func__);
			ASSERT(0);
		}
	}
#endif
	mcSHOW_DBG_MSG("[%s] calculated gating ui = %d \n", __func__, ui);

	return ui;
}

#if GATING_RODT_LATANCY_EN
U8 get_rodt_mck2ui(DRAMC_CTX_T *p)
{
	if (vGet_Div_Mode(p) == DIV16_MODE)
		return 8;
	else if (vGet_Div_Mode(p) == DIV8_MODE)
		return 4;
	else
		return 2;
}
#endif

static void rxdqs_gating_misc_process(DRAMC_CTX_T *p,
    struct rxdqs_gating_best_win *rxdqs_best_win)
{
#if DDR_GATING_ADJUST_TXDLY_FOR_TRACKING
    U8 u1TX_dly_DQSgated = 0;
#endif
    U8 dqs_i;

    /* Set result of useless bytes (if any) as 0. */
    for (dqs_i = (p->data_width/DQS_BIT_NUMBER); dqs_i < DQS_NUMBER; dqs_i++) {
            rxdqs_best_win->best_dqsien_dly_ui[dqs_i] = 0;
            rxdqs_best_win->best_dqsien_dly_pi[dqs_i]= 0;

#if GATING_ADJUST_TXDLY_FOR_TRACKING
        ucbest_coarse_mck_backup[p->rank][dqs_i] =
            ucbest_coarse_ui_backup[p->rank][dqs_i] = 0;
        ucbest_coarse_mck_P1_backup[p->rank][dqs_i] =
            ucbest_coarse_ui_P1_backup[p->rank][dqs_i] = 0;
#endif
    }

    for (dqs_i=0; dqs_i<(p->data_width/DQS_BIT_NUMBER); dqs_i++) {
#ifdef FOR_HQA_REPORT_USED
        HQA_Log_Message_for_Report(p, p->channel, p->rank, HQA_REPORT_FORMAT6, "DQSINCTL ", "", 0,
            u4IO32ReadFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_MISC_SHU_RK_DQSCTL), MISC_SHU_RK_DQSCTL_DQSINCTL), NULL);
        HQA_Log_Message_for_Report(p, p->channel, p->rank, HQA_REPORT_FORMAT0,
            "Gating_Center_", "05T", dqs_i, rxdqs_best_win->best_dqsien_dly_ui[dqs_i], NULL);
        HQA_Log_Message_for_Report(p, p->channel, p->rank, HQA_REPORT_FORMAT0,
            "Gating_Center_", "PI", dqs_i, rxdqs_best_win->best_dqsien_dly_pi[dqs_i], NULL);
#endif

        /*TINFO="best DQS%d delay(2T, 0.5T, PI) = (%d, %d, %d)\n", dqs_i, rxdqs_best_win.best_dqsien_dly_mck[dqs_i], rxdqs_best_win.best_dqsien_dly_ui[dqs_i], rxdqs_best_win.best_dqsien_dly_pi[dqs_i])); */
        mcSHOW_DBG_MSG("best DQS%d dly(UI, PI) = (%d, %d)\n", dqs_i,
            rxdqs_best_win->best_dqsien_dly_ui[dqs_i],
            rxdqs_best_win->best_dqsien_dly_pi[dqs_i]);
        mcDUMP_REG_MSG("best DQS%d dly(UI, PI) = (%d, %d)\n", dqs_i,
            rxdqs_best_win->best_dqsien_dly_ui[dqs_i],
            rxdqs_best_win->best_dqsien_dly_pi[dqs_i]);
        /* cc mark mcFPRINTF((fp_A60501,"best DQS%d dly(MCK, UI, PI) = (%d, %d, %d)\n", dqs_i,
            rxdqs_best_win.best_dqsien_dly_mck[dqs_i],
            rxdqs_best_win.best_dqsien_dly_ui[dqs_i],
            rxdqs_best_win.best_dqsien_dly_pi[dqs_i]));
        */

#if DDR_GATING_ADJUST_TXDLY_FOR_TRACKING
    u1TX_dly_DQSgated = (rxdqs_best_win->best_dqsien_dly_mck[dqs_i] << 4) +
        rxdqs_best_win->best_dqsien_dly_ui[dqs_i];

    if (vGet_Div_Mode(p) == DIV16_MODE)
        u1TX_dly_DQSgated >>= 4;
    else if (vGet_Div_Mode(p) == DIV8_MODE)
        u1TX_dly_DQSgated >>= 3;
    else
        u1TX_dly_DQSgated >>= 2;

    if (u1TX_dly_DQSgated < u1TXDLY_Cal_min)
        u1TXDLY_Cal_min = u1TX_dly_DQSgated;

    ucbest_coarse_ui_backup[p->rank][dqs_i] = rxdqs_best_win->best_dqsien_dly_ui[dqs_i];
    ucbest_coarse_mck_backup[p->rank][dqs_i] = rxdqs_best_win->best_dqsien_dly_mck[dqs_i];
#endif
    }

    mcSHOW_DBG_MSG("\n");
    //cc mark mcFPRINTF((fp_A60501,"\n"));

#if 0
    for (dqs_i=0; dqs_i<(p->data_width/DQS_BIT_NUMBER); dqs_i++) {
        /*TINFO="best DQS%d P1 delay(2T, 0.5T, PI) = (%d, %d, %d)\n", dqs_i, rxdqs_best_win.best_dqsien_dly_mck_p1[dqs_i], rxdqs_best_win.best_dqsien_dly_ui_p1[dqs_i], rxdqs_best_win.best_dqsien_dly_pi_p1[dqs_i]*/
        mcSHOW_DBG_MSG("best DQS%d P1 dly(MCK, UI, PI) = (%d, %d, %d)\n", dqs_i,
            rxdqs_best_win->best_dqsien_dly_mck_p1[dqs_i],
            rxdqs_best_win->best_dqsien_dly_ui_p1[dqs_i],
            rxdqs_best_win->best_dqsien_dly_pi_p1[dqs_i]);
        mcDUMP_REG_MSG("best DQS%d P1 dly(MCK, UI, PI) = (%d, %d, %d)\n", dqs_i,
            rxdqs_best_win->best_dqsien_dly_mck_p1[dqs_i],
            rxdqs_best_win->best_dqsien_dly_ui_p1[dqs_i],
            rxdqs_best_win->best_dqsien_dly_pi_p1[dqs_i]);
        /* cc mark mcFPRINTF((fp_A60501,"best DQS%d P1 dly(2T, 0.5T, PI) = (%d, %d, %d)\n", dqs_i,
            rxdqs_best_win.best_dqsien_dly_mck_p1[dqs_i],
            rxdqs_best_win.best_dqsien_dly_ui_p1[dqs_i],
            rxdqs_best_win.best_dqsien_dly_pi_p1[dqs_i]));
        */

#if DDR_GATING_ADJUST_TXDLY_FOR_TRACKING
        // find max gating TXDLY (should be in P1)
        u1TX_dly_DQSgated = (rxdqs_best_win->best_dqsien_dly_mck_p1[dqs_i] << 4) +
            rxdqs_best_win->best_dqsien_dly_ui_p1[dqs_i];

        if (vGet_Div_Mode(p) == DIV16_MODE)
            u1TX_dly_DQSgated >>= 4;
        else if (vGet_Div_Mode(p) == DIV8_MODE)
            u1TX_dly_DQSgated >>= 3;
        else
            u1TX_dly_DQSgated >>= 2;

        if(u1TX_dly_DQSgated > u1TXDLY_Cal_max)
            u1TXDLY_Cal_max = u1TX_dly_DQSgated;

        ucbest_coarse_ui_P1_backup[p->rank][dqs_i] = rxdqs_best_win->best_dqsien_dly_ui_p1[dqs_i];
        ucbest_coarse_mck_P1_backup[p->rank][dqs_i] = rxdqs_best_win->best_dqsien_dly_mck_p1[dqs_i];
#endif
    }
#endif

#if RDSEL_TRACKING_EN
	//Byte 0
	vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B0_INI_UIPI),
		(ucbest_coarse_mck_backup[p->rank][0] << 4) | (ucbest_coarse_ui_backup[p->rank][0]),
		SHU_RK_B0_INI_UIPI_CURR_INI_UI_B0);//UI
	vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B0_INI_UIPI), rxdqs_best_win->best_dqsien_dly_pi[0],
		SHU_RK_B0_INI_UIPI_CURR_INI_PI_B0); //PI
	//Byte 1
	vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B1_INI_UIPI),
		(ucbest_coarse_mck_backup[p->rank][1] << 4) | (ucbest_coarse_ui_backup[p->rank][1]),
		SHU_RK_B1_INI_UIPI_CURR_INI_UI_B1);//UI
	vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B1_INI_UIPI),
		rxdqs_best_win->best_dqsien_dly_pi[1], SHU_RK_B1_INI_UIPI_CURR_INI_PI_B1); //PI
#endif

}

#if __IPMv2_TO_BE_PORTING__
static void rxdqs_gating_auto_cal_reset(DRAMC_CTX_T *p)
{
	/* Reset internal autok status and logic */
	vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_MISC_DQSIEN_AUTOK_CFG0),
		P_Fld(0x1, MISC_DQSIEN_AUTOK_CFG0_DQSIEN_AUTOK_RK0_SW_RST) |
		P_Fld(0x1, MISC_DQSIEN_AUTOK_CFG0_DQSIEN_AUTOK_RK1_SW_RST) |
		P_Fld(0x1, MISC_DQSIEN_AUTOK_CFG0_DQSIEN_AUTOK_SW_RST));

	mcDELAY_US(1);

	vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_MISC_DQSIEN_AUTOK_CFG0),
		P_Fld(0x0, MISC_DQSIEN_AUTOK_CFG0_DQSIEN_AUTOK_RK0_SW_RST) |
		P_Fld(0x0, MISC_DQSIEN_AUTOK_CFG0_DQSIEN_AUTOK_RK1_SW_RST) |
		P_Fld(0x0, MISC_DQSIEN_AUTOK_CFG0_DQSIEN_AUTOK_SW_RST));
}
static void rxdqs_gating_auto_cal_cfg(DRAMC_CTX_T *p,
	struct rxdqs_gating_auto_param *auto_param)
{
	/* Before start calibration, reset all state machine and all rank's state */
	rxdqs_gating_auto_cal_reset(p);


	/*-----------
	 * Normal Setting, Same as SW calibration
	 *---------------*/
	if (p->frequency == 800) {
		vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_MISC_STBCAL1),
			0x1, MISC_STBCAL1_STBCNT_SW_RST);
	}

	vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_MISC_STBCAL1),
		0x1, MISC_STBCAL1_STBCNT_SHU_RST_EN);

	/* SELPH_MODE = BY RANK */
	vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_MISC_STBCAL2),
		0x1, MISC_STBCAL2_DQSIEN_SELPH_BY_RANK_EN);

	if (p->dram_type == TYPE_LPDDR5) {
		vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_MISC_STBCAL2),
			0x1, MISC_STBCAL2_STB_PICG_EARLY_1T_EN);
	}

	vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_MISC_STBCAL1),
		0x1, MISC_STBCAL1_DIS_PI_TRACK_AS_NOT_RD);

	/* PICG_EARLY_EN */
	vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_B0_DQ6),
		0x1, B0_DQ6_RG_RX_ARDQ_OP_BIAS_SW_EN_B0);
	vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_B0_DQ6),
		0x1, B0_DQ6_RG_RX_ARDQ_OP_BIAS_SW_EN_B0);
	vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_MISC_STBCAL2),
		0x1, MISC_STBCAL2_STB_PICG_EARLY_1T_EN);

	/* BURST_MODE */
	vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_MISC_SHU_STBCAL),
		0x1, MISC_SHU_STBCAL_DQSIEN_BURST_MODE);

	vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_B0_DQ9),
		0x1, B0_DQ9_RG_RX_ARDQS0_DQSIENMODE_B0);
	vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_B1_DQ9),
		0x1, B1_DQ9_RG_RX_ARDQS0_DQSIENMODE_B1);

	vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_B0_DQ6),
		0x2, B0_DQ6_RG_RX_ARDQ_BIAS_VREF_SEL_B0);
	vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_B1_DQ6),
		0x2, B1_DQ6_RG_RX_ARDQ_BIAS_VREF_SEL_B1);
	vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_MISC_STBCAL),
		0x1, MISC_STBCAL_DQSIENMODE);

	/* New Rank Mode */
	vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_MISC_STBCAL2),
		P_Fld(0x1, MISC_STBCAL2_STB_IG_XRANK_CG_RST) |
		P_Fld(0x1, MISC_STBCAL2_STB_RST_BY_RANK) |
		P_Fld(0x1, MISC_STBCAL2_DQSIEN_SELPH_BY_RANK_EN));

	vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_B0_PHY2),
		0x1, B0_PHY2_RG_RX_ARDQS_DQSIEN_UI_LEAD_LAG_EN_B0);
	vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_B1_PHY2),
		0x1, B1_PHY2_RG_RX_ARDQS_DQSIEN_UI_LEAD_LAG_EN_B1);

	/* dummy read */
	vIO32WriteFldAlign(DRAMC_REG_ADDR(DRAMC_REG_DUMMY_RD),
		0x1, DUMMY_RD_DUMMY_RD_PA_OPT);
	vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_MISC_CG_CTRL0),
		0x1, MISC_CG_CTRL0_RG_CG_PHY_OFF_DIABLE);

    //Yulia add workaround for auto K pattern length. : Apply for all project before IPM_V2
    //Dummy read BL should be controlled by DQSIEN_AUTOK_BURST_LENGTH, but now we can only use dummy read length(DMY_RD_LEN)
    //DMY_RD_LEN (0 for BL8, 1 for BL16, 3 for BL32)
    vIO32WriteFldAlign(DRAMC_REG_ADDR(DRAMC_REG_RK_DUMMY_RD_ADR), 3/*auto_param->burst_len*/, RK_DUMMY_RD_ADR_DMY_RD_LEN);

	/* Decide by HW  Although Dummy read used, but TA2 has higher priority */
	vIO32WriteFldAlign(DRAMC_REG_ADDR(DRAMC_REG_TEST2_A4),
		0x4, TEST2_A4_TESTAGENTRKSEL);

	//vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_MISC_STBCAL2), 1,
	//	MISC_STBCAL2_STBENCMPEN);

	/*-----------
	 * Auto calibration setting
	 *-------------------*/
	vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_MISC_DQSIEN_AUTOK_CFG0),
	    #if __IPMv2_TO_BE_PORTING__
		P_Fld(auto_param->init_mck, MISC_DQSIEN_AUTOK_CFG0_DQSIEN_AUTOK_INI_MCK) |
		P_Fld(auto_param->init_ui, MISC_DQSIEN_AUTOK_CFG0_DQSIEN_AUTOK_INI__UI) |
		P_Fld(auto_param->end_mck, MISC_DQSIEN_AUTOK_CFG0_DQSIEN_AUTOK_END_MCK) |
		P_Fld(auto_param->end_ui, MISC_DQSIEN_AUTOK_CFG0_DQSIEN_AUTOK_END__UI) |
		#endif
		P_Fld(auto_param->pi_offset, MISC_DQSIEN_AUTOK_CFG0_DQSIEN_AUTOK_PI_OFFSET) |
		P_Fld(p->rank, MISC_DQSIEN_AUTOK_CFG0_DQSIEN_AUTOK_CUR_RANK) |
		P_Fld(auto_param->burst_len, MISC_DQSIEN_AUTOK_CFG0_DQSIEN_AUTOK_BURST_LENGTH) |
		P_Fld(0x1, MISC_DQSIEN_AUTOK_CFG0_DQSIEN_AUTOK_B0_EN) |
		P_Fld(0x1, MISC_DQSIEN_AUTOK_CFG0_DQSIEN_AUTOK_B1_EN));

	vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_MISC_DQSIEN_AUTOK_CFG0),
		auto_param->dbg_mode, MISC_DQSIEN_AUTOK_CFG0_DQSIEN_AUTOK_DEBUG_MODE_EN);

	vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_MISC_DQSIEN_AUTOK_CFG0),
		auto_param->early_break, MISC_DQSIEN_AUTOK_CFG0_DQSIEN_AUTOK_EARLY_BREAK_EN);

	/*---------
	 * DV settings
	 *-------------------*/
	vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_MISC_STBCAL),
		0x0, MISC_STBCAL_PICGEN);
	vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_MISC_SHU_STBCAL),
		P_Fld(0x0, MISC_SHU_STBCAL_STBCALEN) |
		P_Fld(0x0, MISC_SHU_STBCAL_STB_SELPHCALEN));

	mcSHOW_DBG_MSG("[Gating] AUTO K with param:\n");
	mcSHOW_DBG_MSG("\tinit_mck: %d, init_ui: %d, end_mck: %d, end_ui: %d\n",
		auto_param->init_mck, auto_param->init_ui,
		auto_param->end_mck, auto_param->end_ui);
	mcSHOW_DBG_MSG("\tpi_offset: %d, early_break: %s\n", auto_param->pi_offset,
		(auto_param->early_break)? "ENABLE" : "DISABLE");
}

static void rxdqs_gating_auto_cal_trigger(DRAMC_CTX_T *p)
{
	mcSHOW_DBG_MSG("[Gating] AUTO K start...\n");
	vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_MISC_DQSIEN_AUTOK_CFG0),
		0x1, MISC_DQSIEN_AUTOK_CFG0_DQSIEN_AUTOK_GO);
}

static void rxdqs_gating_auto_cal_stop(DRAMC_CTX_T *p)
{
	mcSHOW_DBG_MSG("[Gating] AUTO K stop...\n");
	vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_MISC_DQSIEN_AUTOK_CFG0),
		0x0, MISC_DQSIEN_AUTOK_CFG0_DQSIEN_AUTOK_GO);

	rxdqs_gating_auto_cal_reset(p);
}

/* By autoK: Set the result calibrated by HW to RG */
static void rxdqs_gating_auto_final_set(DRAMC_CTX_T *p, U8 mck,
	U8 ui, U8 pi, U8 byte, U8 mck2ui, U8 freq_div, struct rxdqs_gating_best_win *best_win)
{
	U8 mck_p1, ui_p1;
	U8 total_ui;
#if GATING_RODT_LATANCY_EN
	U8 mck_rodt, ui_rodt;
	U8 mck_rodt_p1, ui_rodt_p1;
#endif
	U16 value;

	///TODO: Temp solution. Use vGet_Div_Mode( ) instead later
	if (u4IO32ReadFldAlign(DRAMC_REG_ADDR(DRAMC_REG_SHU_COMMON0), SHU_COMMON0_FREQDIV8))
		total_ui = (mck << 4) + ui; /* 1:16 mode */
	else if (u4IO32ReadFldAlign(DRAMC_REG_ADDR(DRAMC_REG_SHU_COMMON0), SHU_COMMON0_FREQDIV4))
		total_ui = (mck << 3) + ui; /* 1: 8 mode */
	else
		total_ui = (mck << 2) + ui; /* 1: 4 mode */

	/* RG is always 1:16 mode */
	mck = (total_ui >> 4);
	ui = (total_ui & 0xf);

    mcSHOW_DBG_MSG("[Gating][RG] Final result: (%d, %d, %d)\n", mck, ui, pi);
	value = mck * mck2ui + ui; /* Total UI number */
	mck_p1 = (value + freq_div) / mck2ui;
	ui_p1 = (value + freq_div) % mck2ui;
    mcSHOW_DBG_MSG("[Gating][RG] Final result P1: (%d, %d)\n", mck_p1, ui_p1);
#if GATING_RODT_LATANCY_EN
	if (value >= 11) {
		U8 rodt_mck2ui = get_rodt_mck2ui(p);

		value -= 11;
		mck_rodt = value / rodt_mck2ui;
		ui_rodt = value % rodt_mck2ui;

		mck_rodt_p1 = mck_rodt;
		ui_rodt_p1 = ui_rodt;
	} else {
		mck_rodt = 0;
		ui_rodt = 0;

		mck_rodt_p1 = 4;
		ui_rodt_p1 = 4;
		mcSHOW_DBG_MSG("[Warning] RODT cannot be -11UI\n");
	}
#endif

    best_win->best_dqsien_dly_mck[byte] = mck;
    best_win->best_dqsien_dly_mck_p1[byte] = mck_p1;
    best_win->best_dqsien_dly_ui[byte] = ui;
    best_win->best_dqsien_dly_ui_p1[byte] = ui_p1;
    best_win->best_dqsien_dly_pi[byte] = pi;
    best_win->best_dqsien_dly_pi_p1[byte] = pi;

    #if __IPMv2_TO_BE_PORTING__
	if (byte == 0) {
		/* Set DQSIEN delay in MCK and UI */
		vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B0_DQSIEN_MCK_UI_DLY),
			P_Fld(mck, SHU_RK_B0_DQSIEN_MCK_UI_DLY_DQSIEN_MCK_P0_B0) |
			P_Fld(ui, SHU_RK_B0_DQSIEN_MCK_UI_DLY_DQSIEN_UI_P0_B0) |
			P_Fld(mck_p1, SHU_RK_B0_DQSIEN_MCK_UI_DLY_DQSIEN_MCK_P1_B0) |
			P_Fld(ui_p1, SHU_RK_B0_DQSIEN_MCK_UI_DLY_DQSIEN_UI_P1_B0));

		vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B0_DQSIEN_PI_DLY),
			pi, SHU_RK_B0_DQSIEN_PI_DLY_DQSIEN_PI_B0);
	#if GATING_RODT_LATANCY_EN
		vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B0_RODTEN_MCK_UI_DLY),
			P_Fld(mck_rodt,
			SHU_RK_B0_RODTEN_MCK_UI_DLY_RODTEN_MCK_P0_B0) |
			P_Fld(ui_rodt,
			SHU_RK_B0_RODTEN_MCK_UI_DLY_RODTEN_UI_P0_B0) |
			P_Fld(mck_rodt_p1,
			SHU_RK_B0_RODTEN_MCK_UI_DLY_RODTEN_MCK_P1_B0) |
			P_Fld(ui_rodt_p1,
			SHU_RK_B0_RODTEN_MCK_UI_DLY_RODTEN_UI_P1_B0));
	#endif
	} else {
		vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B1_DQSIEN_MCK_UI_DLY),
			P_Fld(mck, SHU_RK_B1_DQSIEN_MCK_UI_DLY_DQSIEN_MCK_P0_B1) |
			P_Fld(ui, SHU_RK_B1_DQSIEN_MCK_UI_DLY_DQSIEN_UI_P0_B1) |
			P_Fld(mck_p1, SHU_RK_B1_DQSIEN_MCK_UI_DLY_DQSIEN_MCK_P1_B1) |
			P_Fld(ui_p1, SHU_RK_B1_DQSIEN_MCK_UI_DLY_DQSIEN_UI_P1_B1));
		vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B1_DQSIEN_PI_DLY),
			pi, SHU_RK_B1_DQSIEN_PI_DLY_DQSIEN_PI_B1);
	#if GATING_RODT_LATANCY_EN
		vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B1_RODTEN_MCK_UI_DLY),
			P_Fld(mck_rodt,
			SHU_RK_B1_RODTEN_MCK_UI_DLY_RODTEN_MCK_P0_B1) |
			P_Fld(ui_rodt,
			SHU_RK_B1_RODTEN_MCK_UI_DLY_RODTEN_UI_P0_B1) |
			P_Fld(mck_rodt_p1,
			SHU_RK_B1_RODTEN_MCK_UI_DLY_RODTEN_MCK_P1_B1) |
			P_Fld(ui_rodt_p1,
			SHU_RK_B1_RODTEN_MCK_UI_DLY_RODTEN_UI_P1_B1));
	#endif
	}
	#endif
}

#define RXDQS_GATING_AUTO_CAL_STATUS_BYTE_OFFSET		0x40

static DRAM_STATUS_T rxdqs_gating_auto_cal_status(DRAMC_CTX_T *p,
	struct rxdqs_gating_auto_param *auto_param, U8 mck2ui, U8 freq_div)
{
	U8 mck_center[DQS_NUMBER], ui_center[DQS_NUMBER], pi_center[DQS_NUMBER];
	U8 mck_left[DQS_NUMBER], ui_left[DQS_NUMBER], pi_left[DQS_NUMBER];
	U8 mck_right[DQS_NUMBER], ui_right[DQS_NUMBER], pi_right[DQS_NUMBER];
	U8 done[DQS_NUMBER] = { 0 }, error[DQS_NUMBER] = { 0 };
    struct rxdqs_gating_best_win rxdqs_best_win;
	DRAM_STATUS_T ret;
	U8 done_bytes, total_bytes;
	U8 byte_ofst;
	U8 dqs_i;
	U8 div_mode;

	total_bytes = p->data_width / DQS_BIT_NUMBER;
	done_bytes = 0;
	ret = DRAM_OK;

	while (done_bytes < total_bytes) {
		for (dqs_i = 0; dqs_i < (p->data_width / DQS_BIT_NUMBER); dqs_i++) {
			/* If already done, skip this byte */
			if (done[dqs_i])
				continue;

			byte_ofst = dqs_i * RXDQS_GATING_AUTO_CAL_STATUS_BYTE_OFFSET;

			done[dqs_i] = u4IO32ReadFldAlign(DRAMC_REG_ADDR(
				DDRPHY_REG_DQSIEN_AUTOK_B0_RK0_STATUS0 + byte_ofst),
				DQSIEN_AUTOK_B0_RK0_STATUS0_AUTOK_DONE_B0_RK0);
			error[dqs_i] = u4IO32ReadFldAlign(DRAMC_REG_ADDR(
				DDRPHY_REG_DQSIEN_AUTOK_B0_RK0_STATUS0 + byte_ofst),
				DQSIEN_AUTOK_B0_RK0_STATUS0_AUTOK_ERR_B0_RK0);

			/* If autok fail, done flag will not be asserted. */
			if (done[dqs_i] || error[dqs_i]) {
				/* Done and Pass */
				if (error[dqs_i] == 0) {
				    #if __IPMv2_TO_BE_PORTING__
					mck_center[dqs_i] = u4IO32ReadFldAlign(DRAMC_REG_ADDR(
						DDRPHY_REG_DQSIEN_AUTOK_B0_RK0_STATUS0 + byte_ofst),
						DQSIEN_AUTOK_B0_RK0_STATUS0_DQSIEN_AUTOK_C_MCK_B0_RK0);
					ui_center[dqs_i] = u4IO32ReadFldAlign(DRAMC_REG_ADDR(
						DDRPHY_REG_DQSIEN_AUTOK_B0_RK0_STATUS0 + byte_ofst),
						DQSIEN_AUTOK_B0_RK0_STATUS0_DQSIEN_AUTOK_C__UI_B0_RK0);
					pi_center[dqs_i] = u4IO32ReadFldAlign(DRAMC_REG_ADDR(
						DDRPHY_REG_DQSIEN_AUTOK_B0_RK0_STATUS0 + byte_ofst),
						DQSIEN_AUTOK_B0_RK0_STATUS0_DQSIEN_AUTOK_C__PI_B0_RK0);

					mck_left[dqs_i] = u4IO32ReadFldAlign(DRAMC_REG_ADDR(
						DDRPHY_REG_DQSIEN_AUTOK_B0_RK0_STATUS1 + byte_ofst),
						DQSIEN_AUTOK_B0_RK0_STATUS1_DQSIEN_AUTOK_L_MCK_B0_RK0);
					ui_left[dqs_i] = u4IO32ReadFldAlign(DRAMC_REG_ADDR(
						DDRPHY_REG_DQSIEN_AUTOK_B0_RK0_STATUS1 + byte_ofst),
						DQSIEN_AUTOK_B0_RK0_STATUS1_DQSIEN_AUTOK_L__UI_B0_RK0);
					pi_left[dqs_i] = u4IO32ReadFldAlign(DRAMC_REG_ADDR(
						DDRPHY_REG_DQSIEN_AUTOK_B0_RK0_STATUS1 + byte_ofst),
						DQSIEN_AUTOK_B0_RK0_STATUS1_DQSIEN_AUTOK_L__PI_B0_RK0);

					/* If early break mode not enabled, right boundary could be found */
					if (auto_param->early_break == DISABLE) {
						mck_right[dqs_i] = u4IO32ReadFldAlign(DRAMC_REG_ADDR(
							DDRPHY_REG_DQSIEN_AUTOK_B0_RK0_STATUS1 + byte_ofst),
							DQSIEN_AUTOK_B0_RK0_STATUS1_DQSIEN_AUTOK_R_MCK_B0_RK0);
						ui_right[dqs_i] = u4IO32ReadFldAlign(DRAMC_REG_ADDR(
							DDRPHY_REG_DQSIEN_AUTOK_B0_RK0_STATUS1 + byte_ofst),
							DQSIEN_AUTOK_B0_RK0_STATUS1_DQSIEN_AUTOK_R__UI_B0_RK0);
						pi_right[dqs_i] = u4IO32ReadFldAlign(DRAMC_REG_ADDR(
							DDRPHY_REG_DQSIEN_AUTOK_B0_RK0_STATUS1 + byte_ofst),
							DQSIEN_AUTOK_B0_RK0_STATUS1_DQSIEN_AUTOK_R__PI_B0_RK0);
					}
					#endif
				}
                else
				{
    				#if 0
					/* Done but fail */
					if (auto_param->dbg_mode == ENABLE) {
						U32 dbg_reg_addr;
						U32 dbg_reg_idx;
						U32 dbg_reg_val;

						dbg_reg_addr = DRAMC_REG_ADDR(
							DDRPHY_REG_DQSIEN_AUTOK_B0_RK0_DBG_STATUS0 + byte_ofst);
						for (dbg_reg_idx = 0;
							dbg_reg_idx < RXDQS_GATING_AUTO_DBG_REG_NUM;
							dbg_reg_idx++, dbg_reg_addr += 4) {
							dbg_reg_val = u4IO32Read4B(dbg_reg_addr);

							mcSHOW_ERR_MSG("B%d Gating AUTOK DBG Status-%d: [0x%08x]\n",
								dqs_i, dbg_reg_idx, dbg_reg_val);
						}
					}
                    #endif

					/* If error occurred for this byte, it will be treated as a DONE condition */
					done[dqs_i] = 1;
				}


#if 1
                /* Done but fail */
                if (auto_param->dbg_mode == ENABLE) {
                    U32 dbg_reg_addr;
                    U32 dbg_reg_idx;
                    U32 dbg_reg_val;

                    dbg_reg_addr = DRAMC_REG_ADDR(
                        DDRPHY_REG_DQSIEN_AUTOK_B0_RK0_DBG_STATUS0 + byte_ofst);
                    for (dbg_reg_idx = 0;
                        dbg_reg_idx < RXDQS_GATING_AUTO_DBG_REG_NUM;
                        dbg_reg_idx++, dbg_reg_addr += 4) {
                        dbg_reg_val = u4IO32Read4B(dbg_reg_addr);

                        mcSHOW_ERR_MSG("B%d Gating AUTOK DBG Status-%d: [0x%08x]\n",
                            dqs_i, dbg_reg_idx, dbg_reg_val);
                    }
                }
#endif
				done_bytes++;
			}
		}

		mcDELAY_MS(1);
	}

	/* Log it */
	for (dqs_i = 0; dqs_i < (p->data_width / DQS_BIT_NUMBER); dqs_i++) {
		mcSHOW_DBG_MSG("[Gating][%s] AUTOK of CH-%d, Rk-%d, Byte-%d:\n",
			error[dqs_i]? "Fail" : "Pass", p->channel, p->rank, dqs_i);

		if (done[dqs_i]) {
			if (error[dqs_i] == 0) {
				mcSHOW_DBG_MSG("\tcenter(%d, %d, %d)\n",
							mck_center[dqs_i], ui_center[dqs_i], pi_center[dqs_i]);
				mcSHOW_DBG_MSG("\tleft(%d, %d, %d)\n",
							mck_left[dqs_i], ui_left[dqs_i], pi_left[dqs_i]);

				if (auto_param->early_break == DISABLE) {
					mcSHOW_DBG_MSG("\tright(%d, %d, %d)\n",
								mck_right[dqs_i], ui_right[dqs_i], pi_right[dqs_i]);
				}
			}
			if (error[dqs_i]) {
				ret = DRAM_FAIL;
			} else {
				/* If passed, shall set the result to RG */
				rxdqs_gating_auto_final_set(p, mck_center[dqs_i],
						ui_center[dqs_i], pi_center[dqs_i], dqs_i, mck2ui, freq_div, &rxdqs_best_win);
			}
		}
	}

	rxdqs_gating_auto_cal_stop(p);
    rxdqs_gating_misc_process(p, &rxdqs_best_win);

	return ret;
}

static DRAM_STATUS_T dramc_rx_dqs_gating_auto_cal(DRAMC_CTX_T *p)
{
	struct rxdqs_gating_auto_param auto_param;
	DRAM_STATUS_T ret;
	U8 start_ui, end_ui;
	U8 mck2ui_rg, mck2ui_hw, freq_div;

	U32 reg_backup_address[ ] = {
		(DRAMC_REG_ADDR(DRAMC_REG_DUMMY_RD)),
		(DRAMC_REG_ADDR(DDRPHY_REG_MISC_CG_CTRL0)),
		(DRAMC_REG_ADDR(DRAMC_REG_TEST2_A4)),
		(DRAMC_REG_ADDR(DRAMC_REG_RK_DUMMY_RD_ADR))
	};

	DramcBackupRegisters(p, reg_backup_address,
		sizeof (reg_backup_address) / sizeof (U32), TO_ONE_CHANNEL);

	memset((void *)&auto_param, 0, sizeof auto_param);

	/* 60868 has different mck2ui relations for HW and RG */
	if (vGet_Div_Mode(p) == DIV16_MODE)
		mck2ui_hw = 16;
	else if (vGet_Div_Mode(p) == DIV8_MODE)
		mck2ui_hw = 8;
	else
		mck2ui_hw = 4;

#if ENABLE_GATING_AUTOK_WA
    if (__wa__gating_autok_init_ui[p->rank] > 3)
        start_ui = __wa__gating_autok_init_ui[p->rank] - 3;
    else
#endif
	start_ui = u1GetGatingStartPos(p, AUTOK_ON);
	end_ui = start_ui + 32;

	/* Set auto calibration params */
	auto_param.early_break = ENABLE;
	auto_param.dbg_mode = ENABLE;
	auto_param.init_mck = start_ui / mck2ui_hw;
	auto_param.init_ui = start_ui % mck2ui_hw;
    auto_param.end_mck = end_ui / mck2ui_hw;
	auto_param.end_ui = end_ui % mck2ui_hw;
	auto_param.pi_offset = 2; /* 2 ^ 2 = 4 */
	auto_param.burst_len = RXDQS_BURST_LEN_8;
	mck2ui_rg = DQS_GW_UI_PER_MCK; /* 1 mck = ? UI. Decided by (Tmck/Tck) * (Tck/Twck) */
	if (vGet_Div_Mode(p) == DIV4_MODE)
		freq_div = 2;
	else
		freq_div = 4;

#if FOR_DV_SIMULATION_USED == 1
	cal_sv_rand_args_t *psra = get_psra();

	if (psra) {
		auto_param.early_break =
			    psra->dqsien_autok_early_break_en? ENABLE: DISABLE;
		auto_param.dbg_mode =
			    psra->dqsien_autok_dbg_mode_en? ENABLE: DISABLE;
		auto_param.pi_offset =
			    psra->dqsien_autok_pi_offset? ENABLE: DISABLE;
	}
#endif /* FOR_DV_SIMULATION_USED == 1 */

	rxdqs_gating_auto_cal_cfg(p, &auto_param);

	/* Trigger HW auto k */
	rxdqs_gating_auto_cal_trigger(p);

	ret = rxdqs_gating_auto_cal_status(p, &auto_param, mck2ui_rg, freq_div);

	DramcRestoreRegisters(p, reg_backup_address,
		sizeof (reg_backup_address) / sizeof (U32), TO_ONE_CHANNEL);

    DramPhyReset(p);

	return ret;
}
#endif //__IPMv2_TO_BE_PORTING__

static void rxdqs_gating_sw_cal_init(DRAMC_CTX_T *p, U8 use_enhanced_rdqs)
{
#if (fcFOR_CHIP_ID == fcGriffin)
	U32 bc_bak = GetDramcBroadcast();
	DramcBroadcastOnOff(DRAMC_BROADCAST_ON);
#endif

	vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_MISC_STBCAL2), 1,
		MISC_STBCAL2_STBENCMPEN);

	/* enable &reset DQS counter */
	vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_MISC_STBCAL2), 1,
		MISC_STBCAL2_DQSG_CNT_EN);
	mcDELAY_US(4); /* wait 1 auto refresh after DQS Counter enable */

	vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_MISC_STBCAL2), 1,
		MISC_STBCAL2_DQSG_CNT_RST);
	mcDELAY_US(1); /* delay 2T */
	vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_MISC_STBCAL2), 0,
		MISC_STBCAL2_DQSG_CNT_RST);

	vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_MISC_CTRL1),
		u1GetRank(p), MISC_CTRL1_R_DMSTBENCMP_RK_OPT);

	DramcEngine2Init(p, 0x55000000, 0xaa000000 | GATING_PATTERN_NUM,
		TEST_AUDIO_PATTERN, 0, TE_NO_UI_SHIFT);

	if (use_enhanced_rdqs) {
		/* TBD. Enter Enhanced RDQS training mode */
	}

#if (fcFOR_CHIP_ID == fcGriffin)
	DramcBroadcastOnOff(bc_bak);
#endif
}

static void rxdqs_gating_set_final_result(DRAMC_CTX_T *p,
	struct rxdqs_gating_best_win *best_win)
{
	vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B0_DQSIEN_DLY),
		P_Fld(best_win->best_dqsien_dly_ui[0], SHU_RK_B0_DQSIEN_DLY_DQSIEN_UI_B0) |
		P_Fld(best_win->best_dqsien_dly_pi[0], SHU_RK_B0_DQSIEN_DLY_DQSIEN_PI_B0));
	vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B1_DQSIEN_DLY),
		P_Fld(best_win->best_dqsien_dly_ui[1], SHU_RK_B1_DQSIEN_DLY_DQSIEN_UI_B1) |
		P_Fld(best_win->best_dqsien_dly_pi[1], SHU_RK_B1_DQSIEN_DLY_DQSIEN_PI_B1));

	if (p->data_width == DATA_WIDTH_32BIT) {
		DRAM_CHANNEL_T ch_bak = p->channel;
		vSetPHY2ChannelMapping(p, CHANNEL_B);

		vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B0_DQSIEN_DLY),
			P_Fld(best_win->best_dqsien_dly_ui[2], SHU_RK_B0_DQSIEN_DLY_DQSIEN_UI_B0) |
			P_Fld(best_win->best_dqsien_dly_pi[2], SHU_RK_B0_DQSIEN_DLY_DQSIEN_PI_B0));
		vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B1_DQSIEN_DLY),
			P_Fld(best_win->best_dqsien_dly_ui[3], SHU_RK_B1_DQSIEN_DLY_DQSIEN_UI_B1) |
			P_Fld(best_win->best_dqsien_dly_pi[3], SHU_RK_B1_DQSIEN_DLY_DQSIEN_PI_B1));

		vSetPHY2ChannelMapping(p, ch_bak);
	}
}

static void rxdqs_gating_set_dqsien_dly(DRAMC_CTX_T *p,
	struct rxdqs_gating_cal *rxdqs_cal)
{
	/* Set DQSIEN delay in MCK and UI */
	vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B0_DQSIEN_DLY),
		P_Fld(rxdqs_cal->dqsien_dly_ui, SHU_RK_B0_DQSIEN_DLY_DQSIEN_UI_B0) |
		P_Fld(rxdqs_cal->dqsien_dly_pi, SHU_RK_B0_DQSIEN_DLY_DQSIEN_PI_B0));
	vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B1_DQSIEN_DLY),
		P_Fld(rxdqs_cal->dqsien_dly_ui, SHU_RK_B1_DQSIEN_DLY_DQSIEN_UI_B1) |
		P_Fld(rxdqs_cal->dqsien_dly_pi, SHU_RK_B1_DQSIEN_DLY_DQSIEN_PI_B1));

	if (p->data_width == DATA_WIDTH_32BIT) {
		DRAM_CHANNEL_T ch_bak = p->channel;
		vSetPHY2ChannelMapping(p, CHANNEL_B);

		vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B0_DQSIEN_DLY),
			P_Fld(rxdqs_cal->dqsien_dly_ui, SHU_RK_B0_DQSIEN_DLY_DQSIEN_UI_B0) |
			P_Fld(rxdqs_cal->dqsien_dly_pi, SHU_RK_B0_DQSIEN_DLY_DQSIEN_PI_B0));
		vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B1_DQSIEN_DLY),
			P_Fld(rxdqs_cal->dqsien_dly_ui, SHU_RK_B1_DQSIEN_DLY_DQSIEN_UI_B1) |
			P_Fld(rxdqs_cal->dqsien_dly_pi, SHU_RK_B1_DQSIEN_DLY_DQSIEN_PI_B1));

		vSetPHY2ChannelMapping(p, ch_bak);
	}
}

static void rxdqs_gating_sw_cal_trigger(DRAMC_CTX_T *p,
	struct rxdqs_gating_cal *rxdqs_cal)
{
	DramPhyReset(p);

	vIO32WriteFldAlign_All(DRAMC_REG_ADDR(DDRPHY_REG_MISC_STBCAL2), 1,
		MISC_STBCAL2_DQSG_CNT_RST);
	mcDELAY_US(1);
	vIO32WriteFldAlign_All(DRAMC_REG_ADDR(DDRPHY_REG_MISC_STBCAL2), 0,
		MISC_STBCAL2_DQSG_CNT_RST);

	/* enable TE2, audio pattern */
	DramcEngine2Run(p, TE_OP_READ_CHECK, TEST_AUDIO_PATTERN);
}

static void rxdqs_gating_update_fsm(DRAMC_CTX_T *p,
	struct rxdqs_gating_trans *rxdqs_trans, U8 byte, U8 dly_ui, U8 dly_pi, U8 pi_step)
{
	struct rxdqs_gating_leadlag_fsm_ctrl *fsm;
	U8 lead_lag_val;
#if GATING_DDR3_WA
	U8 lead_lag_deb_val = is_ddr4_family(p) ? 32 : 8;
#else
	U8 lead_lag_deb_val = is_ddr4_family(p) ? 32 : 16;
#endif
	U8 r_filter_min = 16;
	static U8 r_filter_count[DQS_NUMBER] = { 0 };

	fsm = &rxdqs_trans->fsm;

	lead_lag_val = (rxdqs_trans->dqs_lead[byte] << 1) | rxdqs_trans->dqs_lag[byte];

	switch (lead_lag_val) {
	case 0x0:
		fsm->lead_lag_cur_st[byte] = ST_LEAD_LAG_BOTH_LOW;
		break;
	case 0x1:
		fsm->lead_lag_cur_st[byte] = ST_LEAD_LOW_LAG_HIGH;
		break;
	case 0x2:
		fsm->lead_lag_cur_st[byte] = ST_LEAD_HIGH_LAG_LOW;
		break;
	case 0x3:
		fsm->lead_lag_cur_st[byte] = ST_LEAD_LAG_BOTH_HIGH;
		break;
	default:
		fsm->lead_lag_cur_st[byte] = ST_LEAD_LAG_INVALID;
		break;
	}

	switch (fsm->gating_cur_st[byte]) {
	case ST_GATING_IDLE:
		if (fsm->lead_lag_cur_st[byte] == ST_LEAD_LAG_BOTH_HIGH) {
			fsm->gating_nxt_st[byte] = ST_GATING_SEARCH_HIGH;
			rxdqs_trans->dqs_high[byte] = 1;
			rxdqs_trans->dqs_low[byte] = 0;
			rxdqs_trans->dqs_transition[byte] = 1;
		} else {
			fsm->gating_nxt_st[byte] = ST_GATING_IDLE;
		}
		break;

	case ST_GATING_SEARCH_HIGH:
		if (fsm->lead_lag_cur_st[byte] == ST_LEAD_LAG_BOTH_HIGH) {
			rxdqs_trans->dqs_high[byte]++;
			rxdqs_trans->dqsien_dly_ui_leadlag[byte] = dly_ui;
			rxdqs_trans->dqsien_dly_pi_leadlag[byte] = dly_pi;

			if ((rxdqs_trans->dqs_high[byte] * pi_step) >= lead_lag_deb_val) {
				fsm->gating_nxt_st[byte] = ST_GATING_SEARCH_TRANS;
			}
		} else {
			fsm->gating_nxt_st[byte] = ST_GATING_IDLE;
		}
		break;

	case ST_GATING_SEARCH_TRANS:
		if (fsm->lead_lag_cur_st[byte] == ST_LEAD_LAG_BOTH_HIGH) {
			/* Note, may have such case (1, 1)->(1, 0)->(1, 1)->(1, 0) */
			rxdqs_trans->dqsien_dly_ui_leadlag[byte] = dly_ui;
			rxdqs_trans->dqsien_dly_pi_leadlag[byte] = dly_pi;
			rxdqs_trans->dqs_high[byte]++;
			rxdqs_trans->dqs_transition[byte] = 1;
		} else if (fsm->lead_lag_cur_st[byte] == ST_LEAD_HIGH_LAG_LOW) {
			rxdqs_trans->dqs_transition[byte]++;
		} else if (fsm->lead_lag_cur_st[byte] == ST_LEAD_LAG_BOTH_LOW) {
		#if GATING_LEADLAG_LOW_LEVEL_CHECK
			fsm->gating_nxt_st[byte] = ST_GATING_SEARCH_LOW;
			rxdqs_trans->dqs_low[byte] = 1;
			r_filter_count[byte] = 0;
		#else
			fsm->gating_nxt_st[byte] = ST_GATING_DONE;
		#endif
		} else {
			fsm->gating_nxt_st[byte] = ST_GATING_IDLE;
		}
		break;

	case ST_GATING_SEARCH_LOW:
	#if GATING_LEADLAG_LOW_LEVEL_CHECK
			r_filter_count[byte]++;
		if (fsm->lead_lag_cur_st[byte] == ST_LEAD_LAG_BOTH_LOW) {
			rxdqs_trans->dqs_low[byte]++;

				if ((rxdqs_trans->dqs_low[byte] * pi_step) >= lead_lag_deb_val) {
				#if GATING_DDR3_WA
					if (is_ddr3_family(p)) {
						if ((rxdqs_trans->dqs_low[byte] + rxdqs_trans->dqs_transition[byte] + rxdqs_trans->dqs_high[byte]) * pi_step >= 32)
							fsm->gating_nxt_st[byte] = ST_GATING_DONE;
					} else
				#endif
					{
						fsm->gating_nxt_st[byte] = ST_GATING_DONE;
					}
				}
			} else if (fsm->lead_lag_cur_st[byte] == ST_LEAD_LOW_LAG_HIGH) {
				/* from (0, 0) -> (0, 1) while debouce_thrd not satisfied
				 * is not acceptable
				 */
				 fsm->gating_nxt_st[byte] = ST_GATING_IDLE;
			} else {
				/* Since tDQSCK various, (LEAD, LAG) flag may return to (1, 0) or (1, 1)
				 * from (0, 0). SW should allow such case and continue
				 * to count (0, 0), without being reset to IDLE state
				 */
				rxdqs_trans->dqs_low[byte] = 0;

				/* But if (0, 0) still not stable found r_filter_min PI after enter into SEARCH_LOW,
				 * this will be treat as a glitch and will reset to IDLE state
				 */
				if ((r_filter_count[byte] * pi_step) >= r_filter_min)
					fsm->gating_nxt_st[byte] = ST_GATING_IDLE;
			}
	#endif
		break;

	case ST_GATING_DONE:
	default:
		break;
	}

	mcSHOW_DBG_MSG6("%s: dqs_i %d lead_lag state from %d --> %d\n", __func__,
		byte, fsm->lead_lag_lst_st[byte], fsm->lead_lag_cur_st[byte]);
	mcSHOW_DBG_MSG6("%s: dqs_i %d gating state from %d --> %d\n", __func__,
		byte, fsm->gating_cur_st[byte], fsm->gating_nxt_st[byte]);

	fsm->gating_cur_st[byte] = fsm->gating_nxt_st[byte];
	fsm->lead_lag_lst_st[byte] = fsm->lead_lag_cur_st[byte];
}

static void rxdqs_gating_get_leadlag(DRAMC_CTX_T *p,
	struct rxdqs_gating_trans *rxdqs_trans,
	struct rxdqs_gating_cal *rxdqs_cal)
{
	U8 dqs_i;
//    U8 debounce_thrd_PI = (p->dram_type == TYPE_DDR4) ? 32 : 16;

	for (dqs_i = 0; dqs_i < (p->data_width / DQS_BIT_NUMBER); dqs_i++) {
		if (dqs_i == 0) {
			rxdqs_trans->dqs_lead[0] = u4IO32ReadFldAlign(
				DRAMC_REG_ADDR(DDRPHY_REG_MISC_PHY_RGS_STBEN_B0),
				MISC_PHY_RGS_STBEN_B0_AD_RX_ARDQS0_STBEN_LEAD_B0);
			rxdqs_trans->dqs_lag[0] = u4IO32ReadFldAlign(
				DRAMC_REG_ADDR(DDRPHY_REG_MISC_PHY_RGS_STBEN_B0),
				MISC_PHY_RGS_STBEN_B0_AD_RX_ARDQS0_STBEN_LAG_B0);
		} else {
		#if (fcFOR_CHIP_ID == fcGriffin)
			channel_backup_and_set(p, CHANNEL_B);
			rxdqs_trans->dqs_lead[1] = u4IO32ReadFldAlign(
				DRAMC_REG_ADDR(DDRPHY_REG_MISC_PHY_RGS_STBEN_B0),
				MISC_PHY_RGS_STBEN_B0_AD_RX_ARDQS0_STBEN_LEAD_B0);
			rxdqs_trans->dqs_lag[1] = u4IO32ReadFldAlign(
				DRAMC_REG_ADDR(DDRPHY_REG_MISC_PHY_RGS_STBEN_B0),
				MISC_PHY_RGS_STBEN_B0_AD_RX_ARDQS0_STBEN_LAG_B0);
			channel_restore(p);
		#endif
		}

        rxdqs_gating_update_fsm(p, rxdqs_trans, dqs_i, rxdqs_cal->dqsien_dly_ui, rxdqs_cal->dqsien_dly_pi, rxdqs_cal->dqsien_pi_adj_step);

	}
}

static U8 rxdqs_gating_sw_cal(DRAMC_CTX_T *p,
	struct rxdqs_gating_trans *rxdqs_trans,
	struct rxdqs_gating_cal *rxdqs_cal, U8 *pass_byte_count,
	struct rxdqs_gating_best_win *best_win)
{
	U8 gating_error[DQS_NUMBER];
	U32 debug_cnt[DQS_NUMBER];
    U32 debug_pass_cnt;
	U8 dqs_i;
//	U8 dqs_result_r, dqs_result_f;
//	U16 debug_cnt_per_byte;
	U8 passed_bytes;
	U8 ui2pi,offset,pass_count;

	memset(debug_cnt, 0, sizeof(debug_cnt));
	passed_bytes = *pass_byte_count;

	rxdqs_gating_sw_cal_trigger(p, rxdqs_cal);

	if (p->rank == RANK_0) {
		gating_error[0] = u4IO32ReadFldAlign(DRAMC_REG_ADDR(
			DDRPHY_REG_MISC_RD_DET_ERR_FLAG_B0),
			MISC_RD_DET_ERR_FLAG_B0_RD_BURST_CMP_ERR_RK0_B0);
	#if (fcFOR_CHIP_ID == fcGriffin)
		channel_backup_and_set(p, CHANNEL_B);
		gating_error[1] = u4IO32ReadFldAlign(DRAMC_REG_ADDR(
			DDRPHY_REG_MISC_RD_DET_ERR_FLAG_B0),
			MISC_RD_DET_ERR_FLAG_B0_RD_BURST_CMP_ERR_RK0_B0);
		channel_restore(p);
	#endif
	} else {
		gating_error[0] = u4IO32ReadFldAlign(DRAMC_REG_ADDR(
			DDRPHY_REG_MISC_RD_DET_ERR_FLAG_B0),
			MISC_RD_DET_ERR_FLAG_B0_RD_BURST_CMP_ERR_RK1_B0);
	#if (fcFOR_CHIP_ID == fcGriffin)
		channel_backup_and_set(p, CHANNEL_B);
		gating_error[1] = u4IO32ReadFldAlign(DRAMC_REG_ADDR(
			DDRPHY_REG_MISC_RD_DET_ERR_FLAG_B0),
			MISC_RD_DET_ERR_FLAG_B0_RD_BURST_CMP_ERR_RK1_B0);
		channel_restore(p);
	#endif
	}

	/* read DQS counter
	 * Note: DQS counter is no longer used as pass condition. Here
	 * Read it and log it is just as debug method. Any way, DQS counter
	 * can still be used as a clue: it will be n*0x23 when gating is correct
	 */
	debug_cnt[0] = u4IO32Read4B(DRAMC_REG_ADDR(DDRPHY_REG_CAL_DQSG_CNT_B0));
#if (fcFOR_CHIP_ID == fcGriffin)
	channel_backup_and_set(p, CHANNEL_B);
	debug_cnt[1] = u4IO32Read4B(DRAMC_REG_ADDR(DDRPHY_REG_CAL_DQSG_CNT_B0));
	channel_restore(p);
#endif
	if (p->data_width == DATA_WIDTH_32BIT) {
        channel_backup_and_set(p, CHANNEL_B);
		debug_cnt[2] = u4IO32Read4B(DDRPHY_REG_CAL_DQSG_CNT_B0);
		debug_cnt[3] = u4IO32Read4B(DDRPHY_REG_CAL_DQSG_CNT_B1);
        channel_restore(p);
	}

	/* read (lead, lag) */
	rxdqs_gating_get_leadlag(p, rxdqs_trans, rxdqs_cal);

	mcSHOW_DBG_MSG("%d %d | ", rxdqs_cal->dqsien_dly_ui,
		rxdqs_cal->dqsien_dly_pi);
	mcSHOW_DBG_MSG("B1->B0 | %x %x | %x %x | (%d %d) (%d %d)\n",
		debug_cnt[1], debug_cnt[0], gating_error[1], gating_error[0],
		rxdqs_trans->dqs_lead[1], rxdqs_trans->dqs_lag[1],
		rxdqs_trans->dqs_lead[0], rxdqs_trans->dqs_lag[0]);

	debug_pass_cnt = GATING_GOLDEND_DQSCNT_DDR4;

	/* Decide the window center */
	for (dqs_i = 0; dqs_i < (p->data_width / DQS_BIT_NUMBER); dqs_i++) {
		if (passed_bytes & (1 << dqs_i))
			continue;

		if ((gating_error[dqs_i] == 0) && (debug_cnt[dqs_i] == debug_pass_cnt)) {
			/* Calcuate DQSIEN position */
			if (rxdqs_trans->fsm.gating_cur_st[dqs_i] == ST_GATING_DONE) {
				pass_count = rxdqs_trans->dqs_transition[dqs_i];
				offset = (pass_count * rxdqs_cal->dqsien_pi_adj_step) / 2;
				//U8 mck2ui, ui2pi, freq_div;
				//U8 tmp;
				U16 total_pi;


				//mck2ui = rxdqs_cal->dqsien_ui_per_mck;
				ui2pi = rxdqs_cal->dqsien_pi_per_ui;
				//freq_div = rxdqs_cal->dqsien_freq_div;
	                total_pi = rxdqs_trans->dqsien_dly_pi_leadlag[dqs_i] + offset +
	                    rxdqs_trans->dqsien_dly_ui_leadlag[dqs_i] * ui2pi;

				/* PI */
				//tmp = rxdqs_trans->dqsien_dly_pi_leadlag[dqs_i] + offset + gGatingOffsetAdj;
				best_win->best_dqsien_dly_pi[dqs_i] = total_pi % ui2pi;

				/* UI & MCK - P0 */
				//tmp /= ui2pi;
				rxdqs_trans->dqsien_dly_ui_leadlag[dqs_i] = total_pi / ui2pi;
				best_win->best_dqsien_dly_ui[dqs_i] =
					rxdqs_trans->dqsien_dly_ui_leadlag[dqs_i];
				//best_win->best_dqsien_dly_mck[dqs_i] =
				//	rxdqs_trans->dqsien_dly_mck_leadlag[dqs_i] + (tmp / mck2ui);
#if 0
				/* UI & MCK - P1 */
				best_win->best_dqsien_dly_ui_p1[dqs_i] =
					best_win->best_dqsien_dly_mck[dqs_i] * mck2ui +
					best_win->best_dqsien_dly_ui[dqs_i] + freq_div; /* Total UI for Phase1 */
				mcSHOW_DBG_MSG("Total UI for P1: %d, mck2ui %d\n",
					best_win->best_dqsien_dly_mck_p1[dqs_i], mck2ui);
				best_win->best_dqsien_dly_mck_p1[dqs_i] =
					best_win->best_dqsien_dly_ui_p1[dqs_i] / mck2ui;
				best_win->best_dqsien_dly_ui_p1[dqs_i] =
					best_win->best_dqsien_dly_ui_p1[dqs_i] % mck2ui;
#endif

//				best_win->best_dqsien_dly_ui[dqs_i] = rxdqs_cal->dqsien_dly_ui;
//				best_win->best_dqsien_dly_pi[dqs_i] = rxdqs_cal->dqsien_dly_pi;
#if __SLT__
				mcSHOW_SLT_MSG(("best dqsien dly found for B%d: (%d, %d)\n", dqs_i,
					best_win->best_dqsien_dly_ui[dqs_i],
					best_win->best_dqsien_dly_pi[dqs_i]));
				mcSHOW_PARSER_MSG(("[%d Mbps][CH%d][RK%d][Gating] best DQS%d dly(UI, PI) = (%d, %d)\n",
					p->frequency*2, p->channel, p->rank,dqs_i, best_win->best_dqsien_dly_ui[dqs_i], best_win->best_dqsien_dly_pi[dqs_i]));
#endif
				mcSHOW_DBG_MSG("best dqsien dly found for B%d: (%d, %d)\n", dqs_i,
					best_win->best_dqsien_dly_ui[dqs_i],
					best_win->best_dqsien_dly_pi[dqs_i]);
				passed_bytes |= 1 << dqs_i;

				if (((p->data_width == DATA_WIDTH_16BIT) && (passed_bytes == 0x3)) ||
					((p->data_width == DATA_WIDTH_32BIT) && (passed_bytes == 0xf))) {
					break;
				}
			}
		} else {
			/* Clear lead lag info in case lead/lag flag toggled
               while gating counter & gating error still incorrect*/
            rxdqs_trans->fsm.gating_cur_st[dqs_i] = ST_GATING_IDLE;
		}
	}

	*pass_byte_count = passed_bytes;

	return 0;
}

static DRAM_STATUS_T dramc_rx_dqs_gating_sw_cal(DRAMC_CTX_T *p,
	U8 use_enhance_rdqs)
{
	struct rxdqs_gating_cal rxdqs_cal;
	struct rxdqs_gating_trans rxdqs_trans;
	struct rxdqs_gating_best_win rxdqs_best_win;
	U8  dly_ui_start, dly_ui_end;
	U8 pi_per_ui;// ui_per_mck, freq_div;
	U8 pass_byte_count;
//	U32 value;
	U8 dqs_i;
	U8 u1GatingErrorFlag=0;

#if DDR_GATING_ADJUST_TXDLY_FOR_TRACKING
	U8 u1TX_dly_DQSgated = 0;
#endif

	if (p == NULL) {
		mcSHOW_ERR_MSG("[Error] Context NULL\n");
		return DRAM_FAIL;
	}

	U32 reg_backup_address[ ] = {
		(DRAMC_REG_ADDR(DDRPHY_REG_B0_DQ6)),
		(DRAMC_REG_ADDR(DDRPHY_REG_B1_DQ6)),
		(DRAMC_REG_ADDR(DDRPHY_REG_SHU_B0_DQ10)),
		(DRAMC_REG_ADDR(DDRPHY_REG_SHU_B1_DQ10)),
		(DRAMC_REG_ADDR(DDRPHY_REG_MISC_STBCAL1)),
		(DRAMC_REG_ADDR(DDRPHY_REG_MISC_STBCAL2)),
		(DRAMC_REG_ADDR(DDRPHY_REG_MISC_SHU_ODTCTRL)),
	};

	/* Register backup */
	DramcBackupRegisters(p, reg_backup_address,
		sizeof (reg_backup_address) / sizeof (U32), TO_ALL_CHANNEL);

	memset(&rxdqs_cal, 0, sizeof(struct rxdqs_gating_cal));
	memset(&rxdqs_trans, 0, sizeof(struct rxdqs_gating_trans));
	memset(&rxdqs_best_win, 0, sizeof(struct rxdqs_gating_best_win));

	rxdqs_gating_sw_cal_init(p, use_enhance_rdqs);

	pi_per_ui = DQS_GW_PI_PER_UI; /* 1 UI = ? PI. Sams as CBT, differ according to data rate?? */

	rxdqs_cal.dqsien_pi_adj_step = is_ddr3_family(p) ? 1 : DQS_GW_FINE_STEP;
	rxdqs_cal.dqsien_pi_per_ui = pi_per_ui;

	dly_ui_start = get_gating_start_pos(p, AUTOK_OFF);
	rxdqs_cal.dqsien_dly_ui = dly_ui_start;
	dly_ui_end = dly_ui_start + 16;
	pass_byte_count = 0;

	for (; rxdqs_cal.dqsien_dly_ui < dly_ui_end;
		rxdqs_cal.dqsien_dly_ui += DQS_GW_COARSE_STEP) {
		for (rxdqs_cal.dqsien_dly_pi = 0; rxdqs_cal.dqsien_dly_pi <
			pi_per_ui; rxdqs_cal.dqsien_dly_pi += rxdqs_cal.dqsien_pi_adj_step) {
			rxdqs_gating_set_dqsien_dly(p, &rxdqs_cal);
			rxdqs_gating_sw_cal(p, &rxdqs_trans, &rxdqs_cal, &pass_byte_count, &rxdqs_best_win);

			if (((p->data_width == DATA_WIDTH_16BIT) && (pass_byte_count == 0x3)) ||
				((p->data_width == DATA_WIDTH_32BIT) && (pass_byte_count == 0xf))) {
				rxdqs_cal.dqsien_dly_ui = dly_ui_end;
				break;
			}
		}
	}

	//check if there is no pass taps for each DQS
	for (dqs_i=0; dqs_i<(p->data_width/DQS_BIT_NUMBER); dqs_i++) {
		if ((pass_byte_count & (1 << dqs_i)) == 0) {
			u1GatingErrorFlag = 1;
#if __SLT__
			mcSHOW_SLT_MSG(("error, no pass taps in DQS_%d!\n", dqs_i));
			ASSERT(0);
#else
			mcSHOW_ERR_MSG("error, no pass taps in DQS_%d!\n", dqs_i);
#endif
		}
	}

	if (u1GatingErrorFlag == 0)
		vSetCalibrationResult(p, DRAM_CALIBRATION_GATING, DRAM_OK);

    rxdqs_gating_misc_process(p, &rxdqs_best_win);

	mcSHOW_DBG_MSG("[Gating] SW calibration Done\n");
#if __SLT__
	mcSHOW_PARSER_MSG(("[DramcRxdqsGatingCal] Done\n\n"));
#endif
	DramcEngine2End(p);
	rxdqs_gating_set_final_result(p, &rxdqs_best_win);

	DramcRestoreRegisters(p, reg_backup_address,
		sizeof (reg_backup_address) / sizeof (U32), TO_ALL_CHANNEL);

	DramPhyReset(p);

	return DRAM_OK;
}

/* LPDDR5 Rx DQS Gating */
DRAM_STATUS_T dramc_rx_dqs_gating_cal(DRAMC_CTX_T *p,
	    U8 autok, U8 use_enhanced_rdqs)
{
	//DRAM_STATUS_T ret;

	mcDUMP_REG_MSG("\n[dumpRG] DramcGating \n");
	vPrintCalibrationBasicInfo(p);

#if 0//ENABLE_GATING_AUTOK_WA
             if (autok) {
                 __wa__gating_swk_for_autok = 1;
                 dramc_rx_dqs_gating_sw_cal(p, use_enhanced_rdqs);
                 __wa__gating_swk_for_autok = 0;
             }
#endif

     // default set FAIL
    vSetCalibrationResult(p, DRAM_CALIBRATION_GATING, DRAM_FAIL);

	/* Try HW auto calibration first. If failed,
	 * will try SW mode.
	 */
#if 0
	if (autok) {
		ret = dramc_rx_dqs_gating_auto_cal(p);
		if (ret == DRAM_OK) {
			vSetCalibrationResult(p, DRAM_CALIBRATION_GATING, DRAM_OK);
			return DRAM_OK;
		}

		mcSHOW_ERR_MSG("[Error] Gating auto calibration fail!!\n");
	}
#endif

	mcSHOW_DBG_MSG("[Gating] SW mode calibration\n");

	return dramc_rx_dqs_gating_sw_cal(p, use_enhanced_rdqs);
}

///TODO: wait for porting +++
#if (DDR_ENABLE_GATING_CAL && DDR_GATING_ADJUST_TXDLY_FOR_TRACKING)
void DramcRxdqsGatingPostProcess(DRAMC_CTX_T *p)
{
    U8 dqs_i, u1RankRxDVS = 0;
    U8 u1RankIdx, u1RankMax, u1RankBak;
    S8 s1ChangeDQSINCTL;
#if XRTRTR_NEW_CROSS_RK_MODE
    U16 u2PHSINCTL = 0;
    U32 u4Rank_Sel_MCK_P0[2], u4Rank_Sel_MCK_P1[2], u4RANKINCTL_STB;
#endif
#if RDSEL_TRACKING_EN
    U32 u4PI_value[2] = {0};
#endif
    U32 backup_rank;
    U32 u4ReadDQSINCTL, u4ReadRODT, u4ReadTXDLY[RANK_MAX][DQS_NUMBER], u4ReadTXDLY_P1[RANK_MAX][DQS_NUMBER], u4RankINCTL_ROOT, u4XRTR2R, reg_TX_dly_DQSgated_min = 0;
    U8 mck2ui_shift;

    backup_rank = u1GetRank(p);

#ifdef XRTR2R_PERFORM_ENHANCE_DQSG_RX_DLY
    if (vGet_Div_Mode(p) == DIV8_MODE)
    {
        // wei-jen: DQSgated_min should be 2 when freq >= 1333, 1 when freq < 1333
        if (p->frequency >= 1333)
        {
            reg_TX_dly_DQSgated_min = 2;
        }
        else
        {
            reg_TX_dly_DQSgated_min = 1;
        }
    }
    else // for LPDDR4 1:4 mode
    {
        // 1866,1600,1333,1200  : reg_TX_dly_DQSgated (min) =2
        reg_TX_dly_DQSgated_min = 2;
    }
#else
    // wei-jen: DQSgated_min should be 3 when freq >= 1333, 2 when freq < 1333
    if (p->frequency >= 1333)
    {
        reg_TX_dly_DQSgated_min = 3;
    }
    else
    {
        reg_TX_dly_DQSgated_min = 2;
    }
#endif

    //Sylv_ia MP setting is switched to new mode, so RANKRXDVS can be set as 0 (review by HJ Huang)
#if 0
    vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_SHU_B0_DQ7), u1RankRxDVS, SHU_B0_DQ7_R_DMRANKRXDVS_B0);
    vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_SHU_B1_DQ7), u1RankRxDVS, SHU_B1_DQ7_R_DMRANKRXDVS_B1);
#endif
    // === End of DVS setting =====

    s1ChangeDQSINCTL = reg_TX_dly_DQSgated_min - u1TXDLY_Cal_min;

    mcDUMP_REG_MSG("\n[dumpRG] RxdqsGatingPostProcess\n");

    mcSHOW_DBG_MSG("[RxdqsGatingPostProcess] freq %d\n"
                    "ChangeDQSINCTL %d, reg_TX_dly_DQSgated_min %d, u1TXDLY_Cal_min %d\n",
                        p->frequency,
                        s1ChangeDQSINCTL, reg_TX_dly_DQSgated_min, u1TXDLY_Cal_min);
    mcDUMP_REG_MSG("[RxdqsGatingPostProcess] freq %d\n"
                       "ChangeDQSINCTL %d, reg_TX_dly_DQSgated_min %d, u1TXDLY_Cal_min %d\n",
                        p->frequency,
                        s1ChangeDQSINCTL, reg_TX_dly_DQSgated_min, u1TXDLY_Cal_min);

    if (vGet_Div_Mode(p) == DIV16_MODE)
        mck2ui_shift = 4;
    else if (vGet_Div_Mode(p) == DIV8_MODE)
        mck2ui_shift = 3;
    else
        mck2ui_shift = 2;

    if (s1ChangeDQSINCTL != 0)  // need to change DQSINCTL and TXDLY of each byte
    {
        u1TXDLY_Cal_min += s1ChangeDQSINCTL;
        u1TXDLY_Cal_max += s1ChangeDQSINCTL;

        if (p->support_rank_num == RANK_DUAL)
            u1RankMax = RANK_MAX;
        else
             u1RankMax = RANK_1;

        for (u1RankIdx = 0; u1RankIdx < u1RankMax; u1RankIdx++)
        {
            mcSHOW_DBG_MSG2("Rank: %d\n", u1RankIdx);
            mcDUMP_REG_MSG("Rank: %d\n", u1RankIdx);

            for (dqs_i = 0; dqs_i < (p->data_width / DQS_BIT_NUMBER); dqs_i++)
            {
                U8 total_ui, total_ui_P1;
                total_ui = (ucbest_coarse_mck_backup[u1RankIdx][dqs_i] << 4) + ucbest_coarse_ui_backup[u1RankIdx][dqs_i];
                total_ui_P1 = (ucbest_coarse_mck_P1_backup[u1RankIdx][dqs_i] << 4) + ucbest_coarse_ui_P1_backup[u1RankIdx][dqs_i];

                total_ui += (s1ChangeDQSINCTL << mck2ui_shift);
                total_ui_P1 += (s1ChangeDQSINCTL << mck2ui_shift);

                ucbest_coarse_mck_backup[u1RankIdx][dqs_i] = (total_ui >> 4);
                ucbest_coarse_ui_backup[u1RankIdx][dqs_i] = total_ui & 0xf;

                ucbest_coarse_mck_P1_backup[u1RankIdx][dqs_i] = (total_ui_P1 >> 4);
                ucbest_coarse_ui_P1_backup[u1RankIdx][dqs_i] = total_ui_P1 & 0xf;

                mcSHOW_DBG_MSG("best DQS%d dly(2T, 0.5T) = (%d, %d)\n", dqs_i, ucbest_coarse_mck_backup[u1RankIdx][dqs_i], ucbest_coarse_ui_backup[u1RankIdx][dqs_i]);
                mcDUMP_REG_MSG("PostProcess best DQS%d dly(2T, 0.5T) = (%d, %d)\n", dqs_i, ucbest_coarse_mck_backup[u1RankIdx][dqs_i], ucbest_coarse_ui_backup[u1RankIdx][dqs_i]);
            }
            for (dqs_i = 0; dqs_i < (p->data_width / DQS_BIT_NUMBER); dqs_i++)
            {
                mcSHOW_DBG_MSG("best DQS%d P1 dly(2T, 0.5T) = (%d, %d)\n", dqs_i, ucbest_coarse_mck_P1_backup[u1RankIdx][dqs_i], ucbest_coarse_ui_P1_backup[u1RankIdx][dqs_i]);
                mcDUMP_REG_MSG("PostProcess best DQS%d P1 dly(2T, 0.5T) = (%d, %d)\n", dqs_i, ucbest_coarse_mck_P1_backup[u1RankIdx][dqs_i], ucbest_coarse_ui_P1_backup[u1RankIdx][dqs_i]);
            }
        }

        for (u1RankIdx = 0; u1RankIdx < u1RankMax; u1RankIdx++)
        {
            vSetRank(p, u1RankIdx);
            // 4T or 2T coarse tune
            /* Set DQSIEN delay in MCK and UI */
            #if __IPMv2_TO_BE_PORTING__
            vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B0_DQSIEN_MCK_UI_DLY),
                P_Fld(ucbest_coarse_mck_backup[u1RankIdx][0],
                SHU_RK_B0_DQSIEN_MCK_UI_DLY_DQSIEN_MCK_P0_B0) |
                P_Fld(ucbest_coarse_ui_backup[u1RankIdx][0],
                SHU_RK_B0_DQSIEN_MCK_UI_DLY_DQSIEN_UI_P0_B0) |
                P_Fld(ucbest_coarse_mck_P1_backup[u1RankIdx][0],
                SHU_RK_B0_DQSIEN_MCK_UI_DLY_DQSIEN_MCK_P1_B0) |
                P_Fld(ucbest_coarse_ui_P1_backup[u1RankIdx][0],
                SHU_RK_B0_DQSIEN_MCK_UI_DLY_DQSIEN_UI_P1_B0));

            vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B1_DQSIEN_MCK_UI_DLY),
                P_Fld(ucbest_coarse_mck_backup[u1RankIdx][1],
                SHU_RK_B1_DQSIEN_MCK_UI_DLY_DQSIEN_MCK_P0_B1) |
                P_Fld(ucbest_coarse_ui_backup[u1RankIdx][1],
                SHU_RK_B1_DQSIEN_MCK_UI_DLY_DQSIEN_UI_P0_B1) |
                P_Fld(ucbest_coarse_mck_P1_backup[u1RankIdx][1],
                SHU_RK_B1_DQSIEN_MCK_UI_DLY_DQSIEN_MCK_P1_B1) |
                P_Fld(ucbest_coarse_ui_P1_backup[u1RankIdx][1],
                SHU_RK_B1_DQSIEN_MCK_UI_DLY_DQSIEN_UI_P1_B1));
            #endif
        #if RDSEL_TRACKING_EN
            //Byte 0
            vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B0_INI_UIPI),
                (ucbest_coarse_mck_backup[u1RankIdx][0] << 4) | (ucbest_coarse_ui_backup[u1RankIdx][0]),
                SHU_RK_B0_INI_UIPI_CURR_INI_UI_B0);//UI
            //Byte 1
            vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B1_INI_UIPI),
                (ucbest_coarse_mck_backup[u1RankIdx][1] << 4) | (ucbest_coarse_ui_backup[u1RankIdx][1]),
                SHU_RK_B1_INI_UIPI_CURR_INI_UI_B1);//UI
        #endif
        }
    }
    vSetRank(p, backup_rank);

    u4ReadDQSINCTL = u4IO32ReadFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_MISC_SHU_RK_DQSCTL),
        MISC_SHU_RK_DQSCTL_DQSINCTL);
    mcDUMP_REG_MSG("u4ReadDQSINCTL=%d\n", u4ReadDQSINCTL);
    u4ReadDQSINCTL -= s1ChangeDQSINCTL;

    #if DDR_ENABLE_READ_DBI
    if (p->DBI_R_onoff[p->dram_fsp])
    {
        u4ReadDQSINCTL++;
        #if 0//cc mark for reg not found
        u4ReadRODT = u4IO32ReadFldAlign(DRAMC_REG_ADDR(DRAMC_REG_SHU_ODTCTRL), SHU_ODTCTRL_RODT);
        vIO32WriteFldAlign(DRAMC_REG_ADDR(DRAMC_REG_SHU_ODTCTRL), u4ReadRODT + 1, SHU_ODTCTRL_RODT); //update RODT value when READ_DBI is on
        #endif
    }
    #endif

#if XRTRTR_NEW_CROSS_RK_MODE
		for (dqs_i = 0; dqs_i < (p->data_width / DQS_BIT_NUMBER); dqs_i++)
		{
			if (ucbest_coarse_mck_backup[RANK_0][dqs_i] > ucbest_coarse_mck_backup[RANK_1][dqs_i])
			{
				u4Rank_Sel_MCK_P0[dqs_i] = (ucbest_coarse_mck_backup[RANK_0][dqs_i] > 0)? (ucbest_coarse_mck_backup[RANK_0][dqs_i] - 1): 0;
				u4Rank_Sel_MCK_P1[dqs_i] = (ucbest_coarse_mck_P1_backup[RANK_0][dqs_i] > 0)? (ucbest_coarse_mck_P1_backup[RANK_0][dqs_i] - 1): 0;
			}
			else
			{
				u4Rank_Sel_MCK_P0[dqs_i] = (ucbest_coarse_mck_backup[RANK_1][dqs_i] > 0)? (ucbest_coarse_mck_backup[RANK_1][dqs_i] - 1): 0;
				u4Rank_Sel_MCK_P1[dqs_i] = (ucbest_coarse_mck_P1_backup[RANK_1][dqs_i] > 0)? (ucbest_coarse_mck_P1_backup[RANK_1][dqs_i] - 1): 0;
			}
		}
		#if __IPMv2_TO_BE_PORTING__
		vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_SHU_B0_RANK_SELPH_UI_DLY),
            P_Fld(u4Rank_Sel_MCK_P0[0], SHU_B0_RANK_SELPH_UI_DLY_RANKSEL_MCK_DLY_P0_B0) |
            P_Fld(u4Rank_Sel_MCK_P1[0], SHU_B0_RANK_SELPH_UI_DLY_RANKSEL_MCK_DLY_P1_B0));
        vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_SHU_B1_RANK_SELPH_UI_DLY),
            P_Fld(u4Rank_Sel_MCK_P0[1], SHU_B1_RANK_SELPH_UI_DLY_RANKSEL_MCK_DLY_P0_B1) |
            P_Fld(u4Rank_Sel_MCK_P1[1], SHU_B1_RANK_SELPH_UI_DLY_RANKSEL_MCK_DLY_P1_B1));
        #endif

		u4RANKINCTL_STB = (u4ReadDQSINCTL > 2)? (u4ReadDQSINCTL - 2): 0;
		u2PHSINCTL = (u4ReadDQSINCTL == 0)? 0: (u4ReadDQSINCTL - 1);
		vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_MISC_SHU_RANKCTL), u4RANKINCTL_STB, MISC_SHU_RANKCTL_RANKINCTL_STB);
		vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_MISC_RANK_SEL_STB), u2PHSINCTL, SHU_MISC_RANK_SEL_STB_RANK_SEL_PHSINCTL);
#endif

#ifdef XRTR2R_PERFORM_ENHANCE_DQSG_RX_DLY
    // Wei-Jen: RANKINCTL_RXDLY = RANKINCTL = RankINCTL_ROOT = u4ReadDQSINCTL-2, if XRTR2R_PERFORM_ENHANCE_DQSG_RX_DLY enable
    // Wei-Jen: New algorithm : u4ReadDQSINCTL-2 >= 0
    if (u4ReadDQSINCTL >= 2)
    {
        u4RankINCTL_ROOT = u4ReadDQSINCTL - 2;
    }
    else
    {
        u4RankINCTL_ROOT = 0;
        mcSHOW_ERR_MSG("u4RankINCTL_ROOT <2, Please check\n");
#if (__ETT__)
        while (1);
#endif
    }
#else
    //Modify for corner IC failed at HQA test XTLV
    if (u4ReadDQSINCTL >= 3)
    {
        u4RankINCTL_ROOT = u4ReadDQSINCTL - 3;
    }
    else
    {
        u4RankINCTL_ROOT = 0;
        mcSHOW_ERR_MSG("u4RankINCTL_ROOT <3, Risk for supporting 1066/RL8\n");
    }
#endif

    //DQSINCTL
    vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_MISC_SHU_RK_DQSCTL),
        u4ReadDQSINCTL, MISC_SHU_RK_DQSCTL_DQSINCTL);  //Rank0 DQSINCTL
    vSetRank(p, RANK_1);
    vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_MISC_SHU_RK_DQSCTL),
        u4ReadDQSINCTL, MISC_SHU_RK_DQSCTL_DQSINCTL);  //Rank1 DQSINCTL
    vSetRank(p, backup_rank);

    //No need to update RODT. If we update RODT, also need to update SELPH_ODTEN0_TXDLY
    //vIO32WriteFldAlign(DRAMC_REG_ADDR(DRAMC_REG_SHU_ODTCTRL), u4ReadDQSINCTL, SHU_ODTCTRL_RODT);           //RODT = DQSINCTL

    vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_MISC_SHU_RANKCTL),
        u4ReadDQSINCTL, MISC_SHU_RANKCTL_RANKINCTL_PHY);  //RANKINCTL_PHY = DQSINCTL
    vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_MISC_SHU_RANKCTL),
        u4RankINCTL_ROOT, MISC_SHU_RANKCTL_RANKINCTL);  //RANKINCTL= DQSINCTL -3
    vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_MISC_SHU_RANKCTL),
        u4RankINCTL_ROOT, MISC_SHU_RANKCTL_RANKINCTL_ROOT1);  //RANKINCTL_ROOT1= DQSINCTL -3

#ifdef XRTR2R_PERFORM_ENHANCE_DQSG_RX_DLY
    vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_MISC_SHU_RANKCTL),
        u4RankINCTL_ROOT, MISC_SHU_RANKCTL_RANKINCTL_RXDLY);

    u4XRTR2R = u4IO32ReadFldAlign(DRAMC_REG_ADDR(DRAMC_REG_SHU_ACTIM_XRT), SHU_ACTIM_XRT_XRTR2R);

    mcSHOW_DBG_MSG2("TX_dly_DQSgated check: min %d  max %d, ChangeDQSINCTL=%d\n", u1TXDLY_Cal_min, u1TXDLY_Cal_max, s1ChangeDQSINCTL);
    mcSHOW_DBG_MSG2("DQSINCTL=%d, RANKINCTL=%d, u4XRTR2R=%d\n", u4ReadDQSINCTL, u4RankINCTL_ROOT, u4XRTR2R);
    mcDUMP_REG_MSG("TX_dly_DQSgated check: min %d  max %d, ChangeDQSINCTL=%d\n", u1TXDLY_Cal_min, u1TXDLY_Cal_max, s1ChangeDQSINCTL);
    mcDUMP_REG_MSG("DQSINCTL=%d, RANKINCTL=%d, u4XRTR2R=%d\n", u4ReadDQSINCTL, u4RankINCTL_ROOT, u4XRTR2R);
#else
    //XRTR2R=A-phy forbidden margin(6T) + reg_TX_dly_DQSgated (max) +Roundup(tDQSCKdiff/MCK+0.25MCK)+1(05T sel_ph margin)-1(forbidden margin overlap part)
    //Roundup(tDQSCKdiff/MCK+1UI) =1~2 all LP3 and LP4 timing
    //u4XRTR2R= 8 + u1TXDLY_Cal_max;  // 6+ u1TXDLY_Cal_max +2

    //Modify for corner IC failed at HQA test XTLV @ 3200MHz
    u4XRTR2R = 8 + u1TXDLY_Cal_max + 1;  // 6+ u1TXDLY_Cal_max +2
    if (u4XRTR2R > 12)
    {
        u4XRTR2R = 12;
        mcSHOW_ERR_MSG("XRTR2R > 12, Max value is 12\n");
    }
    vIO32WriteFldAlign(DRAMC_REG_ADDR(DRAMC_REG_SHU_ACTIM_XRT), u4XRTR2R, SHU_ACTIM_XRT_XRTR2R);

    mcSHOW_DBG_MSG2("TX_dly_DQSgated check: min %d  max %d, ChangeDQSINCTL=%d\n", u1TXDLY_Cal_min, u1TXDLY_Cal_max, s1ChangeDQSINCTL);
    mcSHOW_DBG_MSG2("DQSINCTL=%d, RANKINCTL=%d, u4XRTR2R=%d\n", u4ReadDQSINCTL, u4RankINCTL_ROOT, u4XRTR2R);
    mcDUMP_REG_MSG("TX_dly_DQSgated check: min %d  max %d, ChangeDQSINCTL=%d\n", u1TXDLY_Cal_min, u1TXDLY_Cal_max, s1ChangeDQSINCTL);
    mcDUMP_REG_MSG("DQSINCTL=%d, RANKINCTL=%d, u4XRTR2R=%d\n", u4ReadDQSINCTL, u4RankINCTL_ROOT, u4XRTR2R);
#endif

    vSetRank(p, backup_rank);
}

void DramcRxdqsGatingPreProcess(DRAMC_CTX_T *p)
{
    u1TXDLY_Cal_min = 0xff;
    u1TXDLY_Cal_max = 0;
}
#endif
#endif

#if DDR_RDDQC_PINMUX_WORKAROUND
void DDR4_RDDQCPinmuxWorkaround(DRAMC_CTX_T *p)
{
    U8 *DDR_RDDQC_Mapping;

    const U8 DDR4_RDDQC_Mapping_POP[PINMUX_MAX][CHANNEL_NUM][16] =
    {
        {
        // for DSC
            //CH-A
            {
                0, 1, 2, 3, 4, 5, 6, 7,
                8, 9, 10, 11, 12, 13, 14, 15
            },
            #if (CHANNEL_NUM>1)
            //CH-B
            {
                0, 1, 2, 3, 4, 5, 6, 7,
                8, 9, 10, 11, 12, 13, 14, 15
            },
            #endif
            #if (CHANNEL_NUM>2)
            //CH-C
            {
                0, 1, 6, 7, 4, 5, 3, 2,
                9, 8, 11, 10, 15, 14, 12, 13
            },
            //CH-D
            {
                1, 0, 5, 4, 7, 2, 3, 6,
                8, 9, 11, 10, 12, 14, 13, 15
            },
            #endif
        },
        {
        // for LPBK
            // TODO: need porting
        },
        {
        // for EMCP
            //CH-A
            {
                1, 0, 3, 2, 4, 7, 6, 5,
                8, 9, 10, 12, 15, 14, 11, 13
            },
            #if (CHANNEL_NUM>1)
            //CH-B
            {
                0, 1, 7, 4, 2, 5, 6, 3,
                9, 8, 10, 12, 11, 14, 13, 15
            },
            #endif
            #if (CHANNEL_NUM>2)
            //CH-C
            {
                1, 0, 3, 2, 4, 7, 6, 5,
                8, 9, 10, 12, 15, 14, 11, 13
            },
            //CH-D
            {
                0, 1, 7, 4, 2, 5, 6, 3,
                9, 8, 10, 12, 11, 14, 13, 15
            },
            #endif
        }
    };

	DDR_RDDQC_Mapping = (U8 *)DDR4_RDDQC_Mapping_POP[p->DRAMPinmux][p->channel];

	//Set RDDQC pinmux
	vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_MISC_MRR_PINMUX_CTRL0),
		P_Fld(DDR_RDDQC_Mapping[0], MISC_MRR_PINMUX_CTRL0_MRR_PINMUX_SEL_DQ0) |
		P_Fld(DDR_RDDQC_Mapping[1], MISC_MRR_PINMUX_CTRL0_MRR_PINMUX_SEL_DQ1) |
		P_Fld(DDR_RDDQC_Mapping[2], MISC_MRR_PINMUX_CTRL0_MRR_PINMUX_SEL_DQ2) |
		P_Fld(DDR_RDDQC_Mapping[3], MISC_MRR_PINMUX_CTRL0_MRR_PINMUX_SEL_DQ3) |
		P_Fld(DDR_RDDQC_Mapping[4], MISC_MRR_PINMUX_CTRL0_MRR_PINMUX_SEL_DQ4) |
		P_Fld(DDR_RDDQC_Mapping[5], MISC_MRR_PINMUX_CTRL0_MRR_PINMUX_SEL_DQ5) |
		P_Fld(DDR_RDDQC_Mapping[6], MISC_MRR_PINMUX_CTRL0_MRR_PINMUX_SEL_DQ6) |
		P_Fld(DDR_RDDQC_Mapping[7], MISC_MRR_PINMUX_CTRL0_MRR_PINMUX_SEL_DQ7));
	vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_MISC_MRR_PINMUX_CTRL1),
		P_Fld(DDR_RDDQC_Mapping[8], MISC_MRR_PINMUX_CTRL1_MRR_PINMUX_SEL_DQ8) |
		P_Fld(DDR_RDDQC_Mapping[9], MISC_MRR_PINMUX_CTRL1_MRR_PINMUX_SEL_DQ9) |
		P_Fld(DDR_RDDQC_Mapping[10], MISC_MRR_PINMUX_CTRL1_MRR_PINMUX_SEL_DQ10) |
		P_Fld(DDR_RDDQC_Mapping[11], MISC_MRR_PINMUX_CTRL1_MRR_PINMUX_SEL_DQ11) |
		P_Fld(DDR_RDDQC_Mapping[12], MISC_MRR_PINMUX_CTRL1_MRR_PINMUX_SEL_DQ12) |
		P_Fld(DDR_RDDQC_Mapping[13], MISC_MRR_PINMUX_CTRL1_MRR_PINMUX_SEL_DQ13) |
		P_Fld(DDR_RDDQC_Mapping[14], MISC_MRR_PINMUX_CTRL1_MRR_PINMUX_SEL_DQ14) |
		P_Fld(DDR_RDDQC_Mapping[15], MISC_MRR_PINMUX_CTRL1_MRR_PINMUX_SEL_DQ15));
}
#endif

U32 DramcRxWinRDDQCInit(DRAMC_CTX_T *p)
{
	RTSWCMD_PARAM_T param;
	U32 mpr_en = 0;
	dram_mpr_mode_t mpr_mode = DDR_MPRR_MODE;

	//vIO32WriteFldAlign(DRAMC_REG_ADDR(DRAMC_REG_DRAMC_PD_CTRL), 0, DRAMC_PD_CTRL_PHYCLKDYNGEN);

	// Disable Read DBI
	vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_B0_DQ7), 0, SHU_B0_DQ7_R_DMDQMDBI_SHU_B0);
	vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_B1_DQ7), 0, SHU_B1_DQ7_R_DMDQMDBI_SHU_B1);
	vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_B2_DQ7), 0, SHU_B2_DQ7_R_DMDQMDBI_SHU_B2);

	//block ALE
	vIO32WriteFldAlign(DRAMC_REG_ADDR(DRAMC_REG_DRAMCTRL), 1, DRAMCTRL_ALEBLOCK);

	//PREA
	memset((void *)&param, 0, sizeof param);

	param.selector = RUNTIME_SWCMD_PREA;
	param.rank = p->rank;
	DramcTriggerRTSWCMD(p, &param);

	//switch to mpr mode
	switch (mpr_mode)
	{
		case SERIAL_MODE:
			mpr_en = gMRVal[p->channel][p->rank].mr03 | 0x4;
			break;
		case PARALLEL_MODE:
			mpr_en = gMRVal[p->channel][p->rank].mr03 | 0x804;
			break;
		case STAGGERED_MODE:
			mpr_en = gMRVal[p->channel][p->rank].mr03 | 0x1004;
			break;
		default:
			mcSHOW_ERR_MSG("Error mpr mode!");
			break;
	}

	if (is_ddr3_family(p))
		mpr_en &= ~0x3;

	DramcModeRegWriteByRank(p, p->rank, 3, mpr_en);

	vIO32WriteFldAlign(DRAMC_REG_ADDR(DRAMC_REG_MPRR_CTRL1), mpr_mode & 0x3, MPRR_CTRL1_MPRR_MODE);

	//set golden pattern
	vIO32WriteFldMulti(DRAMC_REG_ADDR(DRAMC_REG_MPRR_CTRL0),
		P_Fld(0x0, MPRR_CTRL0_MPRR3_GOLDEN) |
		P_Fld(0xf, MPRR_CTRL0_MPRR2_GOLDEN) |
		P_Fld(0x33, MPRR_CTRL0_MPRR1_GOLDEN) |
		P_Fld(0x55, MPRR_CTRL0_MPRR0_GOLDEN));

#if DDR_RDDQC_PINMUX_WORKAROUND
	// Translate pin order by MRR bit sel
	DDR4_RDDQCPinmuxWorkaround(p);
#endif

	// Open gated clock, by KaiHsin   (DCM)
	vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_SHU_B0_DQ8),
		P_Fld(1, SHU_B0_DQ8_R_DMRXDLY_CG_IG_B0));
	vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_SHU_B1_DQ8),
		P_Fld(1, SHU_B1_DQ8_R_DMRXDLY_CG_IG_B1));
	vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_SHU_B2_DQ8),
		P_Fld(1, SHU_B2_DQ8_R_DMRXDLY_CG_IG_B2));
	return DRAM_OK;
}

/* Issue "RD DQ Calibration"
 * 1. SWCMD_CTRL1_RDDQC_LP_ENB = 1 to stop RDDQC burst
 * 2. RDDQCEN = 1 for RDDQC
 * 3. Wait rddqc_response = 1
 * 4. Read compare result
 * 5. RDDQCEN = 0
 */
 U32 DramcRxWinRDDQCRun(DRAMC_CTX_T *p)
{
	RTSWCMD_PARAM_T param;

	memset((void *)&param, 0, sizeof param);

	param.selector = RUNTIME_SWCMD_MPRR;
	param.rddqc_rff_enable_dqmk = FALSE;
	param.rank = p->rank;
	DramcTriggerRTSWCMD(p, &param);

	return param.rddqc_rff_cmp_result;
}

U32 DramcRxWinRDDQCEnd(DRAMC_CTX_T *p)
{
	U32 mpr_dis;
	// Recover MPC Rank
	vIO32WriteFldAlign(DRAMC_REG_ADDR(DRAMC_REG_SWCMD_CTRL2), 0x0, SWCMD_CTRL2_RTSWCMD_RK);
	// open ALE
	vIO32WriteFldAlign(DRAMC_REG_ADDR(DRAMC_REG_DRAMCTRL), 0, DRAMCTRL_ALEBLOCK);

	mpr_dis = gMRVal[p->channel][p->rank].mr03;

	DramcModeRegWriteByRank(p, p->rank, 3, mpr_dis);

	return DRAM_OK;
}

static void SetRxDqDelay(DRAMC_CTX_T *p, U8 ii, S16 iDelay)
{
	vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B0_RXDLY0 + (ii * 4)),
		P_Fld(iDelay, SHU_RK_B0_RXDLY0_RX_ARDQ0_R_DLY_B0) |
		P_Fld(iDelay, SHU_RK_B0_RXDLY0_RX_ARDQ0_F_DLY_B0) |
		P_Fld(iDelay, SHU_RK_B0_RXDLY0_RX_ARDQ1_R_DLY_B0) |
		P_Fld(iDelay, SHU_RK_B0_RXDLY0_RX_ARDQ1_F_DLY_B0));
	vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B1_RXDLY0 + (ii * 4)),
		P_Fld(iDelay, SHU_RK_B1_RXDLY0_RX_ARDQ0_R_DLY_B1) |
		P_Fld(iDelay, SHU_RK_B1_RXDLY0_RX_ARDQ0_F_DLY_B1) |
		P_Fld(iDelay, SHU_RK_B1_RXDLY0_RX_ARDQ1_R_DLY_B1) |
		P_Fld(iDelay, SHU_RK_B1_RXDLY0_RX_ARDQ1_F_DLY_B1));
}

static void SetRxDqDqsDelay(DRAMC_CTX_T *p, S16 iDelay)
{
	U8 ii;

	if (iDelay <= 0)
	{
		vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B0_RXDLY5),
			P_Fld((-iDelay), SHU_RK_B0_RXDLY5_RX_ARDQS0_R_DLY_B0) |
			P_Fld((-iDelay), SHU_RK_B0_RXDLY5_RX_ARDQS0_F_DLY_B0));
		vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B1_RXDLY5),
			P_Fld((-iDelay), SHU_RK_B1_RXDLY5_RX_ARDQS0_R_DLY_B1) |
			P_Fld((-iDelay), SHU_RK_B1_RXDLY5_RX_ARDQS0_F_DLY_B1));
		DramPhyReset(p);
	}
	else {
		// Adjust DQM output delay.
		vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B0_RXDLY4),
			P_Fld(iDelay, SHU_RK_B0_RXDLY4_RX_ARDQM0_R_DLY_B0) |
			P_Fld(iDelay, SHU_RK_B0_RXDLY4_RX_ARDQM0_F_DLY_B0));
		vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B1_RXDLY4),
			P_Fld(iDelay, SHU_RK_B1_RXDLY4_RX_ARDQM0_R_DLY_B1) |
			P_Fld(iDelay, SHU_RK_B1_RXDLY4_RX_ARDQM0_F_DLY_B1));
		DramPhyReset(p);

		// Adjust DQ output delay.
		for (ii = 0; ii < 4; ii++)
			SetRxDqDelay(p, ii, iDelay);
	}
}

DRAM_STATUS_T DramcRxWindowPerbitCal(DRAMC_CTX_T *p,
                                              RX_PATTERN_OPTION_T u1UseTestEngine,
                                              U8 *u1AssignedVref,
                                              U8 isAutoK)
{
    U8 ii, u1BitIdx, u1ByteIdx;
#if ENABLE_EYESCAN_GRAPH
    U32 u1vrefidx;
	S16 u4DelayStep=1;
#endif
    U8 ucbit_first, ucbit_last;
    S16 iDelay = 0, S16DelayBegin = 0;
    U16 u16DelayEnd = 0, u16DelayStep = 1;
    U32 uiFinishCount;
    U32 u4err_value, u4fail_bit;
    PASS_WIN_DATA_T WinPerBit[DQ_DATA_WIDTH + RDDQC_ADD_DMI_NUM], FinalWinPerBit[DQ_DATA_WIDTH + RDDQC_ADD_DMI_NUM];
    S32 iDQSDlyPerbyte[DQS_NUMBER], iDQMDlyPerbyte[DQS_NUMBER];//, iFinalDQSDly[DQS_NUMBER];
    U8 u1VrefScanEnable = FALSE;
    U16 u2TempWinSum[DQS_NUMBER]={0}, u2rx_window_sum[DQS_NUMBER]={0}, u2TmpDQMSum=0;
    U16 u2VrefLevel, u2FinalVref [DQS_NUMBER]= {0xe, 0xe};
    U16 u2VrefBegin, u2VrefEnd, u2VrefStep;
//    U32 u4fail_bit_R, u4fail_bit_F;
    U8  u1RXEyeScanEnable=0,u1PrintCalibrationProc;
    U16 u1min_bit_by_vref[DQS_NUMBER];
    U16 u1min_winsize_by_vref[DQS_NUMBER];
    U16 u1min_winsize[DQS_NUMBER]={0};
#if __SLT__
    U16 u1min_bit[DQS_NUMBER]={0};
#endif
    U8 u1CalDQMNum = 0;
    U32 u4PassFlags = 0xFFFF;
    #if PINMUX_AUTO_TEST_PER_BIT_RX
    U8 check_pinmux_flag[DQ_DATA_WIDTH] = {0};
    #endif
#if __SLT__
	U16 slt_u1min_bit_by_vref[DQS_NUMBER][RX_VREF_RANGE_END]={0};
	U16 slt_u1min_winsize_by_vref[DQS_NUMBER][RX_VREF_RANGE_END]={0};
#endif


#if ENABLE_EYESCAN_GRAPH
    U8 EyeScan_index[DQ_DATA_WIDTH + RDDQC_ADD_DMI_NUM] = {0};
    U8 u1pass_in_this_vref_flag[DQ_DATA_WIDTH + RDDQC_ADD_DMI_NUM];
#endif

    U8 backup_rank, u1KnownVref[2]={0xff, 0xff};

	// error handling
	if (!p)
	{
		mcSHOW_ERR_MSG("context NULL\n");
		return DRAM_FAIL;
	}

#if DDR_RDDQC_PINMUX_WORKAROUND
	U32 u4RegBackupAddress[] =
	{
		(DRAMC_REG_ADDR(DDRPHY_REG_MISC_MRR_PINMUX_CTRL0)),
		(DRAMC_REG_ADDR(DDRPHY_REG_MISC_MRR_PINMUX_CTRL1)),
	};

	//Back up dramC register
	DramcBackupRegisters(p, u4RegBackupAddress, ARRAY_SIZE(u4RegBackupAddress), TO_ONE_CHANNEL);
#endif

	if (u1UseTestEngine == PATTERN_TEST_ENGINE)
		u1RXEyeScanEnable = GetEyeScanEnable(p, 1);

#if (FEATURE_RDDQC_K_DMI == TRUE)
	if (u1UseTestEngine == PATTERN_RDDQC)
	{
		u1CalDQMNum = 2;
		iDQMDlyPerbyte[0] = -0xFFFFFF;
		iDQMDlyPerbyte[1] = -0xFFFFFF;
	}
	else
#endif
	{
		u1CalDQMNum = 0;
		iDQMDlyPerbyte[0] = u4IO32ReadFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B0_RXDLY4), SHU_RK_B0_RXDLY4_RX_ARDQM0_R_DLY_B0);
		iDQMDlyPerbyte[1] = u4IO32ReadFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B1_RXDLY4), SHU_RK_B1_RXDLY4_RX_ARDQM0_R_DLY_B1);

	}

#if ENABLE_EYESCAN_GRAPH
	if (u1UseTestEngine == PATTERN_TEST_ENGINE)
	{
		for(u1vrefidx=0; u1vrefidx<EYESCAN_RX_VREF_RANGE_END;u1vrefidx++)
		{
			for (u1BitIdx = 0; u1BitIdx < DQ_DATA_WIDTH; u1BitIdx++)
			{
				for(ii=0; ii<EYESCAN_BROKEN_NUM; ii++)
				{
					gEyeScan_Min[u1vrefidx][u1BitIdx][ii] = EYESCAN_DATA_INVALID;
					gEyeScan_Max[u1vrefidx][u1BitIdx][ii] = EYESCAN_DATA_INVALID;

					gEyeScan_ContinueVrefHeight[u1BitIdx] = 0;
					gEyeScan_TotalPassCount[u1BitIdx] = 0;
				}
			}
		}
	}
#endif


	//When doing RxWindowPerbitCal, should make sure that auto refresh is disable
	if (u1UseTestEngine == PATTERN_TEST_ENGINE)
		vAutoRefreshSwitch(p, ENABLE);

	//CKEFixOnOff(p, p->rank, CKE_FIXON, CKE_WRITE_TO_ONE_CHANNEL);

	backup_rank = u1GetRank(p);

	//defult set result fail. When window found, update the result as oK
	if (u1UseTestEngine == PATTERN_TEST_ENGINE)
	{
		vSetCalibrationResult(p, DRAM_CALIBRATION_RX_PERBIT, DRAM_FAIL);

		// Something wrong with TA2 pattern -- SI, which causes RX autoK fail.
		if (isAutoK == TRUE)
		{
			DramcEngine2Init(p, p->test2_1, p->test2_2, TEST_XTALK_PATTERN, 0, TE_NO_UI_SHIFT);
		}
		else
		{
#if ENABLE_K_WITH_WORST_SI_UI_SHIFT
			DramcEngine2Init(p, p->test2_1, p->test2_2, p->test_pattern, 0, TE_UI_SHIFT);//UI_SHIFT + LEN1
#else
			DramcEngine2Init(p, p->test2_1, p->test2_2, p->test_pattern, 0, TE_NO_UI_SHIFT);
#endif
		}
	}
	else
	{
		vSetCalibrationResult(p, DRAM_CALIBRATION_RX_RDDQC, DRAM_FAIL);
		DramcRxWinRDDQCInit(p);
	}

	// Intialize, diable RX Vref
	u2VrefBegin = 0;
	u2VrefEnd = 0;
	u2VrefStep = 1;

    if ((u1UseTestEngine == PATTERN_TEST_ENGINE))
    {
    #if (FOR_DV_SIMULATION_USED==0)
        if ((p->rank==RANK_0) || (p->frequency >= RX_VREF_DUAL_RANK_K_FREQ) || (u1RXEyeScanEnable==1))
            u1VrefScanEnable =1;
    #else
            u1VrefScanEnable =0;
    #endif
    }

    u1PrintCalibrationProc = ((u1VrefScanEnable == 0) || (u1RXEyeScanEnable == 1) || (u1AssignedVref != NULL));

#if SUPPORT_SAVE_TIME_FOR_CALIBRATION
    if (p->femmc_Ready == 1 && ((p->Bypass_RDDQC && u1UseTestEngine == PATTERN_RDDQC) || (p->Bypass_RXWINDOW && u1UseTestEngine == PATTERN_TEST_ENGINE)))
    {
        mcSHOW_DBG_MSG("[FAST_K] Bypass RX Calibration\n");
    }
    else
#endif
    {

        mcDUMP_REG_MSG("\n[dumpRG] %s\n",u1UseTestEngine==PATTERN_RDDQC?"RDDQC":"DramcRxWindowPerbitCal");
#if VENDER_JV_LOG
#if 0 //BU don't want customer knows our RX's ability
        if (u1UseTestEngine == 1)
            vPrintCalibrationBasicInfo_ForJV(p);
#endif
#else
        vPrintCalibrationBasicInfo(p);
#if __SLT__
	vPrintCalibrationBasicInfoDiag(p);
#endif
#endif
        mcSHOW_DBG_MSG2("Start DQ dly to find pass range UseTestEngine =%d\n", u1UseTestEngine);
    }

    mcSHOW_DBG_MSG2("UseTestEngine: %d\n", u1UseTestEngine);
    mcSHOW_DBG_MSG("RX Vref Scan: %d\n", u1VrefScanEnable);

    if (u1VrefScanEnable)
    {
        if ((Get_Vref_Calibration_OnOff(p) == VREF_CALI_OFF) && (u1RXEyeScanEnable == 0))
        {
            u2VrefBegin = 0;
            u2VrefEnd = 0;
            u1KnownVref[0] = gFinalRXVrefDQForSpeedUp[p->channel][p->rank][p->odt_onoff][0];// byte 0
            u1KnownVref[1] = gFinalRXVrefDQForSpeedUp[p->channel][p->rank][p->odt_onoff][1];// byte 1

            if (u1UseTestEngine == PATTERN_TEST_ENGINE && ((u1KnownVref[0] == 0) || (u1KnownVref[1] == 0)))
            {
//                mcSHOW_ERR_MSG("\nWrong frequency K order= %d\n");
                #if __ETT__
                while (1);
                #endif
            }
        }
        else if (u1AssignedVref != NULL)  // need to specify RX Vref and don't scan RX Vref.
        {
            u2VrefBegin = 0;
            u2VrefEnd = 0;
            u1KnownVref[0] = u1AssignedVref[0];  // byte 0
            u1KnownVref[1] = u1AssignedVref[1];  // byte 1
        }
        else
        {
            #if FOR_DV_SIMULATION_USED
            u2VrefBegin = RX_VREF_RANGE_BEGIN;
            #else
            if (u1RXEyeScanEnable == 0)
            {
		u2VrefBegin = 0;
                u2VrefEnd = RX_VREF_RANGE_END - 1;//21;
                mcSHOW_DBG_MSG("\nSet Vref Range= %d -> %d\n",u2VrefBegin,u2VrefEnd);
            }
            else
            {
                u2VrefBegin = 0;//Lewis@20160817: Enlarge RX Vref range for eye scan
                u2VrefEnd = RX_VREF_RANGE_END - 1;
                mcSHOW_DBG_MSG("\nSet Eyescan Vref Range= %d -> %d\n",u2VrefBegin,u2VrefEnd);
            }
        #endif
        }

        u2VrefStep = RX_VREF_RANGE_STEP;

        vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_B0_DQ5), 1, B0_DQ5_RG_RX_ARDQ_VREF_EN_B0);
        vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_B1_DQ5), 1, B1_DQ5_RG_RX_ARDQ_VREF_EN_B1);
    }
    else // Disable RX Vref
    {
        u2VrefBegin = 0;
        u2VrefEnd = 0;
        u2VrefStep = 1;
    }

#if SUPPORT_SAVE_TIME_FOR_CALIBRATION
    if (p->femmc_Ready == 1 && ((p->Bypass_RDDQC && u1UseTestEngine == PATTERN_RDDQC) || (p->Bypass_RXWINDOW && u1UseTestEngine == PATTERN_TEST_ENGINE)))
    {
        // load RX DQS and DQM delay from eMMC
        for (u1ByteIdx = 0; u1ByteIdx < (p->data_width / DQS_BIT_NUMBER); u1ByteIdx++)
        {
            if (u1VrefScanEnable)
            {
                // load RX Vref from eMMC
            #if ( SUPPORT_SAVE_TIME_FOR_CALIBRATION && BYPASS_VREF_CAL)
                u2FinalVref[u1ByteIdx] = p->pSavetimeData->u1RxWinPerbitVref_Save[p->channel][u1ByteIdx];
            #endif
            }

            iDQSDlyPerbyte[u1ByteIdx] = p->pSavetimeData->u1RxWinPerbit_DQS[p->channel][p->rank][u1ByteIdx];
            iDQMDlyPerbyte[u1ByteIdx] = p->pSavetimeData->u1RxWinPerbit_DQM[p->channel][p->rank][u1ByteIdx];
        }

        // load RX DQ delay from eMMC
        for (u1BitIdx = 0; u1BitIdx < 16; u1BitIdx++)
        {
            FinalWinPerBit[u1BitIdx].best_dqdly = p->pSavetimeData->u1RxWinPerbit_DQ[p->channel][p->rank][u1BitIdx];
        }

        if (u1UseTestEngine == PATTERN_TEST_ENGINE)
            vSetCalibrationResult(p, DRAM_CALIBRATION_RX_PERBIT, DRAM_FAST_K);
        else
            vSetCalibrationResult(p, DRAM_CALIBRATION_RX_RDDQC, DRAM_FAST_K);
    }
    else
#endif
    {
		if (p->frequency >= DDR2667_FREQ) {
			S16DelayBegin = -48;
		} else if (p->frequency >= DDR1600_FREQ) {
			S16DelayBegin= -63;
		} else {
			S16DelayBegin= -127;
		}
		u16DelayEnd = 63;

#if FOR_DV_SIMULATION_USED
		if (p->frequency <= DDR1600_FREQ)
			u16DelayStep = 8;
		else
			u16DelayStep = 4;

		//if RDDQD, roughly calibration
		//if (u1UseTestEngine == PATTERN_RDDQC)
		//	u16DelayStep <<= 1;
#else
		u16DelayStep = 2;

#if RX_DELAY_PRE_CAL
		if (u1UseTestEngine == PATTERN_RDDQC) {
			s2RxDelayPreCal =PASS_RANGE_NA;
		} else {
			S16DelayBegin = s2RxDelayPreCal - 10;  // for test engine
			if (S16DelayBegin < -127)
				S16DelayBegin = -127;
		}
#endif

		if(u1UseTestEngine == PATTERN_RDDQC) //if RDDQD, roughly calibration
			u16DelayStep <<= 1;
#endif

        mcSHOW_DBG_MSG("\nRX Vref %d -> %d, step: %d\n", u2VrefBegin, u2VrefEnd, u2VrefStep);
        mcSHOW_DBG_MSG("\nRX Delay %d -> %d, step: %d\n", S16DelayBegin, u16DelayEnd, u16DelayStep);

        for (u2VrefLevel = u2VrefBegin; u2VrefLevel <= u2VrefEnd; u2VrefLevel += u2VrefStep)
        {
            if (u1VrefScanEnable == TRUE)
            {
                //Set RX Vref Here
                if (u1KnownVref[0] != 0xff && u1KnownVref[1] != 0xff)
                {
#if __SLT__
                    mcSHOW_SLT_MSG(("\nSet Vref, RX VrefLevel [Byte0]: %d [Byte1]: %d\n", u2VrefLevel, u2VrefLevel));
#endif
                    mcSHOW_DBG_MSG("\nSet Vref, RX VrefLevel [Byte0]: %d\n", u1KnownVref[0]);
                    mcSHOW_DBG_MSG("                         [Byte1]: %d\n", u1KnownVref[1]);

                    vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_B0_DQ5), u1KnownVref[0], SHU_B0_DQ5_RG_RX_ARDQ_VREF_SEL_B0);
                    vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_B1_DQ5), u1KnownVref[1], SHU_B1_DQ5_RG_RX_ARDQ_VREF_SEL_B1);
                }
                else  //normal vref calibration
                {
#if __SLT__
                    mcSHOW_SLT_MSG(("\nSet Vref, RX VrefLevel [Byte0]: %d [Byte1]: %d\n", u2VrefLevel, u2VrefLevel));
#endif
                    mcSHOW_DBG_MSG("\nSet Vref, RX VrefLevel [Byte0]: %d\n", u2VrefLevel);
                    mcSHOW_DBG_MSG("                         [Byte1]: %d\n", u2VrefLevel);
                    vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_B0_DQ5), u2VrefLevel, SHU_B0_DQ5_RG_RX_ARDQ_VREF_SEL_B0);
                    vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_B1_DQ5), u2VrefLevel, SHU_B1_DQ5_RG_RX_ARDQ_VREF_SEL_B1);
                }
            }

            // 1.delay DQ ,find the pass widnow (left boundary).
            // 2.delay DQS find the pass window (right boundary).
            // 3.Find the best DQ / DQS to satify the middle value of the overall pass window per bit
            // 4.Set DQS delay to the max per byte, delay DQ to de-skew

            // initialize parameters
            uiFinishCount = 0;

            for (u1ByteIdx = 0; u1ByteIdx < (p->data_width / DQS_BIT_NUMBER); u1ByteIdx++)
            {
                u2TempWinSum[u1ByteIdx] = 0;
                u1min_bit_by_vref[u1ByteIdx] = 0xffff;
                u1min_winsize_by_vref[u1ByteIdx] = 0xffff;
            }

            for (u1BitIdx = 0; u1BitIdx < p->data_width + u1CalDQMNum; u1BitIdx++)
            {
                WinPerBit[u1BitIdx].first_pass = (S16)PASS_RANGE_NA;
                WinPerBit[u1BitIdx].last_pass = (S16)PASS_RANGE_NA;
                FinalWinPerBit[u1BitIdx].first_pass = (S16)PASS_RANGE_NA;
                FinalWinPerBit[u1BitIdx].last_pass = (S16)PASS_RANGE_NA;

                #if ENABLE_EYESCAN_GRAPH
                gEyeScan_CaliDelay[u1BitIdx/8] = 0;
                gEyeScan_DelayCellPI[u1BitIdx] = 0;
                EyeScan_index[u1BitIdx] = 0;
                u1pass_in_this_vref_flag[u1BitIdx] = 0;
                #endif
            }

			// Adjust DQM output delay to 0
			vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B0_RXDLY4),
				P_Fld(0, SHU_RK_B0_RXDLY4_RX_ARDQM0_R_DLY_B0) |
				P_Fld(0, SHU_RK_B0_RXDLY4_RX_ARDQM0_F_DLY_B0));
			vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B1_RXDLY4),
				P_Fld(0, SHU_RK_B1_RXDLY4_RX_ARDQM0_R_DLY_B1) |
				P_Fld(0, SHU_RK_B1_RXDLY4_RX_ARDQM0_F_DLY_B1));

			// Adjust DQS output delay to 0
			vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B0_RXDLY5),
				P_Fld(0, SHU_RK_B0_RXDLY5_RX_ARDQS0_R_DLY_B0) |
				P_Fld(0, SHU_RK_B0_RXDLY5_RX_ARDQS0_F_DLY_B0));
			vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B1_RXDLY5),
				P_Fld(0, SHU_RK_B1_RXDLY5_RX_ARDQS0_R_DLY_B1) |
				P_Fld(0, SHU_RK_B1_RXDLY5_RX_ARDQS0_F_DLY_B1));
            // Adjust DQ output delay to 0
            //every 2bit dq have the same delay register address
            for (ii = 0; ii < 4; ii++)
            {
                #if PINMUX_AUTO_TEST_PER_BIT_RX
                if(gRX_check_per_bit_flag == 1)
                {
                    //not reset delay cell
                }
                else
                #endif
                {
                    SetRxDqDelay(p, ii, 0);
                }
            }

            for (iDelay = S16DelayBegin; iDelay <= u16DelayEnd; iDelay += u16DelayStep)
            {
                SetRxDqDqsDelay(p, iDelay);

                if (u1UseTestEngine == PATTERN_TEST_ENGINE)
                {
                    u4err_value = DramcEngine2Run(p, TE_OP_WRITE_READ_CHECK, p->test_pattern);
                }
                else
                {
                    u4err_value = DramcRxWinRDDQCRun(p);
                }
                if(u1PrintCalibrationProc)
                {
                #ifdef ETT_PRINT_FORMAT
                    mcSHOW_DBG_MSG2("%d, [0]", iDelay);
                #else
                    mcSHOW_DBG_MSG2("iDelay= %4d, [0]", iDelay);
                #endif
                }

                // check fail bit ,0 ok ,others fail
                for (u1BitIdx = 0; u1BitIdx < p->data_width + u1CalDQMNum; u1BitIdx++)
                {
                    u4fail_bit = u4err_value & ((U32)1 << u1BitIdx);

                    if (WinPerBit[u1BitIdx].first_pass == PASS_RANGE_NA)
                    {
                        if (u4fail_bit == 0) //compare correct: pass
                        {
                            WinPerBit[u1BitIdx].first_pass = iDelay;

                        #if RX_DELAY_PRE_CAL
                            if((u1UseTestEngine == PATTERN_RDDQC) && (s2RxDelayPreCal == PASS_RANGE_NA))
                            {
                                s2RxDelayPreCal = iDelay;
                            }

                            if((u1UseTestEngine == PATTERN_TEST_ENGINE) && (iDelay == S16DelayBegin))
                            {
                                mcSHOW_ERR_MSG("RX_DELAY_PRE_CAL: Warning, possible miss RX window boundary\n");
                                #if __ETT__
                                //DDR800, RX window too big, will pass at DQS dealy 127 (-127). Therefore, don't stop.
                                if(p->frequency >= 600)
                                    while(1);
                                #endif
                            }
                        #endif

                        #if ENABLE_EYESCAN_GRAPH
                                u1pass_in_this_vref_flag[u1BitIdx]=1;
                        #endif
                        }
                    }
                    else if (WinPerBit[u1BitIdx].last_pass == PASS_RANGE_NA)
                    {
                        //mcSHOW_DBG_MSG("fb%d \n", u4fail_bit);

                        if (u4fail_bit != 0) //compare error : fail
                        {
                            WinPerBit[u1BitIdx].last_pass = (iDelay - 1);
                        }
                        else if (iDelay > (u16DelayEnd - u16DelayStep))
                        {
                            WinPerBit[u1BitIdx].last_pass = iDelay;
                        }

                        if (WinPerBit[u1BitIdx].last_pass != PASS_RANGE_NA)
                        {
                            if ((WinPerBit[u1BitIdx].last_pass - WinPerBit[u1BitIdx].first_pass)\
                                >= (FinalWinPerBit[u1BitIdx].last_pass - FinalWinPerBit[u1BitIdx].first_pass))
                            {
                                //if window size bigger than RX_PASS_WIN_CRITERIA, consider as real pass window. If not, don't update finish counte and won't do early break;
                                if ((WinPerBit[u1BitIdx].last_pass - WinPerBit[u1BitIdx].first_pass) > RX_PASS_WIN_CRITERIA)
                                    uiFinishCount |= (1 << u1BitIdx);

                                //update bigger window size
                                FinalWinPerBit[u1BitIdx].first_pass = WinPerBit[u1BitIdx].first_pass;
                                #if PINMUX_AUTO_TEST_PER_BIT_RX
                                if (check_pinmux_flag[u1BitIdx] == 0) {
                                    gFinalRXPerbitFirstPass[p->channel][u1BitIdx] = WinPerBit[u1BitIdx].first_pass;
                                    check_pinmux_flag[u1BitIdx] = 1;
                                }
                                #endif
                                FinalWinPerBit[u1BitIdx].last_pass = WinPerBit[u1BitIdx].last_pass;
                            }

                            #if ENABLE_EYESCAN_GRAPH && !FOR_DV_SIMULATION_USED
                            if (u1UseTestEngine == PATTERN_TEST_ENGINE)
                            {
                                if (EyeScan_index[u1BitIdx] < EYESCAN_BROKEN_NUM)
                                {
                                    gEyeScan_Min[u2VrefLevel/EYESCAN_GRAPH_RX_VREF_STEP][u1BitIdx][EyeScan_index[u1BitIdx]] = WinPerBit[u1BitIdx].first_pass;
                                    gEyeScan_Max[u2VrefLevel/EYESCAN_GRAPH_RX_VREF_STEP][u1BitIdx][EyeScan_index[u1BitIdx]] = WinPerBit[u1BitIdx].last_pass;
                                    mcSHOW_DBG_MSG3("\nu2VrefLevel=%d, u1BitIdx=%d, index=%d (%d, %d)==\n",u2VrefLevel, u1BitIdx, EyeScan_index[u1BitIdx], gEyeScan_Min[u2VrefLevel/EYESCAN_GRAPH_RX_VREF_STEP][u1BitIdx][EyeScan_index[u1BitIdx]], gEyeScan_Max[u2VrefLevel/EYESCAN_GRAPH_RX_VREF_STEP][u1BitIdx][EyeScan_index[u1BitIdx]]);
                                    EyeScan_index[u1BitIdx]=EyeScan_index[u1BitIdx]+1;
                                }
                            }
                            #endif

                            //reset tmp window
                            WinPerBit[u1BitIdx].first_pass = PASS_RANGE_NA;
                            WinPerBit[u1BitIdx].last_pass = PASS_RANGE_NA;
                        }
                    }

                    if(u1PrintCalibrationProc)
                    {
                        if (u1BitIdx % DQS_BIT_NUMBER == 0)
                        {
                            mcSHOW_DBG_MSG2(" ");
                        }

                        if (u4fail_bit == 0)
                        {
                            mcSHOW_DBG_MSG2("o");
                            #if ENABLE_EYESCAN_GRAPH
                            gEyeScan_TotalPassCount[u1BitIdx]+=(u4DelayStep*EYESCAN_GRAPH_RX_VREF_STEP);
                            #endif
                        }
                        else
                        {
                            mcSHOW_DBG_MSG2("x");
                        }

                       #if ENABLE_EYESCAN_GRAPH
                       gEyeScan_TotalPassCount[u1BitIdx]+=(u4DelayStep*EYESCAN_GRAPH_RX_VREF_STEP);
                       #endif
                    }
                }

                if(u1PrintCalibrationProc)
                {
                    mcSHOW_DBG_MSG2(" [MSB]\n");
                }

            #if (FEATURE_RDDQC_K_DMI == TRUE)
                if (u1CalDQMNum != 0)
                    u4PassFlags = 0x3FFFF;
                else
            #endif
                    u4PassFlags = 0xFFFF;

                //if all bits widnow found and all bits turns to fail again, early break;
                if(uiFinishCount == u4PassFlags)
                {
                    if(u1UseTestEngine)
                        vSetCalibrationResult(p, DRAM_CALIBRATION_RX_PERBIT, DRAM_OK);
                    else
                        vSetCalibrationResult(p, DRAM_CALIBRATION_RX_RDDQC, DRAM_OK);

                    if((u1VrefScanEnable==0)  || u1RXEyeScanEnable)
                    {
                        if((u4err_value & u4PassFlags) == u4PassFlags)
                        {
                                #if !REDUCE_LOG_FOR_PRELOADER
                                mcSHOW_DBG_MSG("\nRX all bits window found, early break!\n");
                                #endif
                                break;  //early break
                        }
                    }
                }
            }

            //find u1min_bit_by_vref/u1min_winsize_by_vref/u2TempWinSum
            for (u1BitIdx = 0; u1BitIdx < p->data_width + u1CalDQMNum ; u1BitIdx++)
            {
                u1ByteIdx = u1BitIdx/DQS_BIT_NUMBER;

                if(!isAutoK)
                    FinalWinPerBit[u1BitIdx].win_size = FinalWinPerBit[u1BitIdx].last_pass - FinalWinPerBit[u1BitIdx].first_pass + (FinalWinPerBit[u1BitIdx].last_pass==FinalWinPerBit[u1BitIdx].first_pass?0:1);

                #if 0
                if(FinalWinPerBit[u1BitIdx].first_pass == PASS_RANGE_NA)
                    FinalWinPerBit[u1BitIdx].win_size = 0;
                else
                    FinalWinPerBit[u1BitIdx].win_size= FinalWinPerBit[u1BitIdx].last_pass- FinalWinPerBit[u1BitIdx].first_pass + u4DelayStep;
                #endif

                if (FinalWinPerBit[u1BitIdx].win_size < u1min_winsize_by_vref[u1ByteIdx])
                {
                    u1min_bit_by_vref[u1ByteIdx]= u1BitIdx;
                    u1min_winsize_by_vref[u1ByteIdx] = FinalWinPerBit[u1BitIdx].win_size;
                }

                u2TempWinSum[u1ByteIdx] += FinalWinPerBit[u1BitIdx].win_size;  //Sum of DQ Windows for vref selection

                #if ENABLE_EYESCAN_GRAPH
                gEyeScan_WinSize[u2VrefLevel/EYESCAN_GRAPH_RX_VREF_STEP][u1BitIdx] = FinalWinPerBit[u1BitIdx].win_size;

                #endif
            }

            //choose Vref
            for (u1ByteIdx=0; u1ByteIdx<(p->data_width/DQS_BIT_NUMBER); u1ByteIdx++)
            {
                if((u1min_winsize_by_vref[u1ByteIdx] > u1min_winsize[u1ByteIdx]) ||
                  ((u1min_winsize_by_vref[u1ByteIdx] == u1min_winsize[u1ByteIdx]) && (u2TempWinSum[u1ByteIdx] > u2rx_window_sum[u1ByteIdx])))
                {
                    u2rx_window_sum[u1ByteIdx] = u2TempWinSum[u1ByteIdx];
                    u1min_winsize[u1ByteIdx] = u1min_winsize_by_vref[u1ByteIdx];
#if __SLT__
                    u1min_bit[u1ByteIdx] = u1min_bit_by_vref[u1ByteIdx];
#endif
                    if(u1KnownVref[u1ByteIdx] != 0xff)
                    {
                        u2FinalVref[u1ByteIdx] = u1KnownVref[u1ByteIdx];
                    }
                    else
                    {
                        u2FinalVref[u1ByteIdx] = u2VrefLevel;
                    }
#if __SLT__
					mcSHOW_SLT_MSG(("RX Vref B%d= %d, ", u1ByteIdx, u2FinalVref[u1ByteIdx]));
					mcSHOW_SLT_MSG(("Window Sum %d, worse bit %d, min window %d\n", u2TempWinSum[u1ByteIdx], u1min_bit_by_vref[u1ByteIdx], u1min_winsize_by_vref[u1ByteIdx]));
					slt_u1min_bit_by_vref[u1ByteIdx][u2FinalVref[u1ByteIdx]] = u1min_bit_by_vref[u1ByteIdx];
					slt_u1min_winsize_by_vref[u1ByteIdx][u2FinalVref[u1ByteIdx]] = u1min_winsize_by_vref[u1ByteIdx];
#endif

                    mcSHOW_DBG_MSG2("RX Vref B%d= %d, ", u1ByteIdx, u2FinalVref[u1ByteIdx]);
                    mcSHOW_DBG_MSG2("Window Sum %d, worse bit %d, min window %d\n", u2TempWinSum[u1ByteIdx], u1min_bit_by_vref[u1ByteIdx], u1min_winsize_by_vref[u1ByteIdx]);
                    for (u1BitIdx = (u1ByteIdx * DQS_BIT_NUMBER); u1BitIdx < (u1ByteIdx * DQS_BIT_NUMBER) + DQS_BIT_NUMBER; u1BitIdx++)
                    {
                        FinalWinPerBit[u1BitIdx].win_center = (FinalWinPerBit[u1BitIdx].last_pass + FinalWinPerBit[u1BitIdx].first_pass) >> 1;     // window center of each DQ bit

                        if(u1PrintCalibrationProc)
                        {
                        #ifdef ETT_PRINT_FORMAT
                            mcSHOW_DBG_MSG("iDelay=%d, Bit %d, Center %d (%d ~ %d) %d\n", iDelay, u1BitIdx, FinalWinPerBit[u1BitIdx].win_center, FinalWinPerBit[u1BitIdx].first_pass, FinalWinPerBit[u1BitIdx].last_pass, FinalWinPerBit[u1BitIdx].win_size);
                        #else
                            mcSHOW_DBG_MSG("iDelay=%d, Bit %2d, Center %3d (%4d ~ %4d) %d\n", iDelay, u1BitIdx, FinalWinPerBit[u1BitIdx].win_center, FinalWinPerBit[u1BitIdx].first_pass, FinalWinPerBit[u1BitIdx].last_pass, FinalWinPerBit[u1BitIdx].win_size);
                        #endif
                        }

#ifdef FOR_HQA_TEST_USED
                        if (u1UseTestEngine == PATTERN_TEST_ENGINE)
                        {
                            gFinalRXPerbitWin[p->channel][p->rank][u1BitIdx] = FinalWinPerBit[u1BitIdx].win_size;
                        }
#endif
#if __FLASH_TOOL_DA__
                        if (u1UseTestEngine == PATTERN_RDDQC)
                        {
                            PINInfo_flashtool.DQ_RX_WIN_SIZE[p->channel][p->rank][u1BitIdx]= FinalWinPerBit[u1BitIdx].win_size;
                        }
#endif
                    }

                #if (FEATURE_RDDQC_K_DMI == TRUE)
                    if (u1CalDQMNum != 0)
                    {
                        FinalWinPerBit[p->data_width + u1ByteIdx].win_center = (FinalWinPerBit[p->data_width + u1ByteIdx].last_pass + FinalWinPerBit[p->data_width + u1ByteIdx].first_pass) >> 1;     // window center of DQM bit
                        mcSHOW_DBG_MSG("iDelay=%d, DQM %d, Center %d (%d ~ %d) %d\n", iDelay, u1ByteIdx, FinalWinPerBit[p->data_width + u1ByteIdx].win_center, FinalWinPerBit[p->data_width + u1ByteIdx].first_pass, FinalWinPerBit[p->data_width + u1ByteIdx].last_pass, FinalWinPerBit[u1BitIdx].win_size);
                    }
                #endif

                }
            }

#if ENABLE_EYESCAN_GRAPH
            for (u1BitIdx = 0; u1BitIdx < p->data_width; u1BitIdx++)
            {
                if (u1pass_in_this_vref_flag[u1BitIdx])
                {
                    U8 continuevrefheightfirstpass, continuevrefheightlastpass;

                    continuevrefheightfirstpass = gEyeScan_ContinueVrefHeight[u1BitIdx] & 0xff;
                    if (continuevrefheightfirstpass == 0)
                    {
                        continuevrefheightfirstpass = u2VrefLevel;
                    }
                    continuevrefheightlastpass = u2VrefLevel;
                    gEyeScan_ContinueVrefHeight[u1BitIdx] = (continuevrefheightlastpass<<8) | continuevrefheightfirstpass;
                }
            }

#endif

            if((u1min_winsize[0]>RX_PASS_WIN_CRITERIA && u1min_winsize[1]>RX_PASS_WIN_CRITERIA) && (u2TempWinSum[0] < (u2rx_window_sum[0]*95/100)) && (u2TempWinSum[1] < (u2rx_window_sum[1]*95/100))&& u1RXEyeScanEnable == 0)
            {
                //mcSHOW_DBG_MSG("\nRX Vref found, early break!\n");
                u2VrefLevel = u2VrefEnd;
                break;//max vref found, early break;
            }
        }
        #ifdef FOR_HQA_REPORT_USED
        if (u1UseTestEngine == PATTERN_TEST_ENGINE)
        {
            for (u1ByteIdx=0; u1ByteIdx<(p->data_width/DQS_BIT_NUMBER); u1ByteIdx++)
            {
                if(((u1min_winsize[u1ByteIdx] * gHQALOG_RX_delay_cell_ps_075V * DDRPhyFMeter() * 2) )/1000000 < 50)
                {
                    mcSHOW_ERR_MSG("[WARNING] Smaller RX win !!\n");
                    #if CHECK_HQA_CRITERIA
                    while(1);
                    #endif
                }
            }
        }
        #endif

        if (u1UseTestEngine == PATTERN_TEST_ENGINE)
        {
            DramcEngine2End(p);
            vAutoRefreshSwitch(p, DISABLE);
        }
        else
        {
            DramcRxWinRDDQCEnd(p);
        }

        //CKEFixOnOff(p, p->rank, CKE_DYNAMIC, CKE_WRITE_TO_ONE_CHANNEL);

        if ((u1UseTestEngine == PATTERN_RDDQC) && (is_ddr3_family(p))) {
            /* DDR3 may only send data back on DQ0 of each byte  All DQs within a byte share the same result */
            for (u1ByteIdx = 0; u1ByteIdx < (p->data_width / DQS_BIT_NUMBER); u1ByteIdx++) {
                PASS_WIN_DATA_T tmp = FinalWinPerBit[u1ByteIdx * DQS_BIT_NUMBER];

                for (u1BitIdx = 0; u1BitIdx < DQS_BIT_NUMBER; u1BitIdx++)
                    FinalWinPerBit[u1BitIdx + u1ByteIdx * DQS_BIT_NUMBER] = tmp;
            }
        }
#if __SLT__
	if (u1VrefScanEnable || is_ddr3_family(p)) {
		for (u1BitIdx = 0; u1BitIdx < p->data_width; u1BitIdx++) {
			mcSHOW_PARSER_MSG(("[%d Mbps][CH%d][RK%d][RX] Bit%d Center %d (%d ~ %d) %d\n",\
				p->frequency*2, p->channel,p->rank,\
				u1BitIdx, FinalWinPerBit[u1BitIdx].win_center, FinalWinPerBit[u1BitIdx].first_pass,\
				FinalWinPerBit[u1BitIdx].last_pass, FinalWinPerBit[u1BitIdx].win_size));
		}

		mcSHOW_PARSER_MSG(("[%d Mbps][CH%d][RK%d][RX] Best Vref %d, Window Min %d at DQ%d, Window Sum %d\n",\
			p->frequency*2,p->channel, p->rank, u2FinalVref[0], u1min_winsize[0], u1min_bit[0], u2rx_window_sum[0]));
	}
#endif
        // 3
        //As per byte, check max DQS delay in 8-bit. Except for the bit of max DQS delay, delay DQ to fulfill setup time = hold time
        for (u1ByteIdx = 0; u1ByteIdx < (p->data_width / DQS_BIT_NUMBER); u1ByteIdx++)
        {
            u2TmpDQMSum =0;
            ucbit_first = DQS_BIT_NUMBER * u1ByteIdx;
            ucbit_last = DQS_BIT_NUMBER * u1ByteIdx + DQS_BIT_NUMBER - 1;
            iDQSDlyPerbyte[u1ByteIdx] = MAX_RX_DQSDLY_TAPS;

            for (u1BitIdx = ucbit_first; u1BitIdx <= ucbit_last; u1BitIdx++)
            {
                // find out max Center value
                if (FinalWinPerBit[u1BitIdx].win_center < iDQSDlyPerbyte[u1ByteIdx])
                {
                    iDQSDlyPerbyte[u1ByteIdx] = FinalWinPerBit[u1BitIdx].win_center;
                }
            }
        #if (FEATURE_RDDQC_K_DMI == TRUE)
            if (u1CalDQMNum != 0)
            {
                if (iDQSDlyPerbyte[u1ByteIdx] > FinalWinPerBit[p->data_width + u1ByteIdx].win_center)
                    iDQSDlyPerbyte[u1ByteIdx] = FinalWinPerBit[p->data_width + u1ByteIdx].win_center;
            }
        #endif

            if (iDQSDlyPerbyte[u1ByteIdx] > 0)  // Delay DQS=0, Delay DQ only
            {
                iDQSDlyPerbyte[u1ByteIdx] = 0;
            }
            else  //Need to delay DQS
            {
                iDQSDlyPerbyte[u1ByteIdx] = -iDQSDlyPerbyte[u1ByteIdx];
            }

            // we delay DQ or DQS to let DQS sample the middle of rx pass window for all the 8 bits,
            for (u1BitIdx = ucbit_first; u1BitIdx <= ucbit_last; u1BitIdx++)
            {
                FinalWinPerBit[u1BitIdx].best_dqdly = iDQSDlyPerbyte[u1ByteIdx] + FinalWinPerBit[u1BitIdx].win_center;
                u2TmpDQMSum += FinalWinPerBit[u1BitIdx].best_dqdly;
#if ENABLE_EYESCAN_GRAPH
                    gEyeScan_DelayCellPI[u1BitIdx] = FinalWinPerBit[u1BitIdx].best_dqdly;
#endif
            }
        #if (FEATURE_RDDQC_K_DMI == TRUE)
            if (u1CalDQMNum != 0)
                iDQMDlyPerbyte[u1ByteIdx] = iDQSDlyPerbyte[u1ByteIdx] + FinalWinPerBit[p->data_width + u1ByteIdx].win_center;
        #else
            // calculate DQM as average of 8 DQ delay
            iDQMDlyPerbyte[u1ByteIdx] = u2TmpDQMSum / DQS_BIT_NUMBER;
        #endif

#ifdef FOR_HQA_REPORT_USED
            HQA_Log_Message_for_Report(p, p->channel, p->rank, HQA_REPORT_FORMAT0, "RX_Window_Center_", "DQS", u1ByteIdx, iDQSDlyPerbyte[u1ByteIdx], NULL);
            HQA_Log_Message_for_Report(p, p->channel, p->rank, HQA_REPORT_FORMAT0, "RX_Window_Center_", "DQM", u1ByteIdx, iDQMDlyPerbyte[u1ByteIdx], NULL);
            for (u1BitIdx = ucbit_first; u1BitIdx <= ucbit_last; u1BitIdx++)
            {
                HQA_Log_Message_for_Report(p, p->channel, p->rank, HQA_REPORT_FORMAT1, "RX_Window_Center_", "DQ", u1BitIdx, FinalWinPerBit[u1BitIdx].win_center, NULL);
            }
#endif
        }
    }

    if (u1VrefScanEnable == TRUE)
    {
         vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_B0_DQ5), u2FinalVref[0], SHU_B0_DQ5_RG_RX_ARDQ_VREF_SEL_B0);
         vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_B1_DQ5), u2FinalVref[1], SHU_B1_DQ5_RG_RX_ARDQ_VREF_SEL_B1);
#if __SLT__
	 mcSHOW_PARSER_MSG(("\nFinal RX Vref %d, apply to both rank0 and 1\n", u2FinalVref[0]));
#endif
    }

    // set dqs delay, (dqm delay)
    for (u1ByteIdx = 0; u1ByteIdx < (p->data_width / DQS_BIT_NUMBER); u1ByteIdx++)
    {
        //Set RX Final Vref Here
        if (u1VrefScanEnable == TRUE)
        {
        #if SUPPORT_SAVE_TIME_FOR_CALIBRATION
            if (p->femmc_Ready == 0)
            {
                p->pSavetimeData->u1RxWinPerbitVref_Save[p->channel][u1ByteIdx] = u2FinalVref[u1ByteIdx];
            }
        #endif
#if __SLT__
			mcSHOW_SLT_MSG(("\nFinal RX Vref Byte %d = %d ", u1ByteIdx, u2FinalVref[u1ByteIdx]));
			mcSHOW_SLT_MSG(("worse bit %d, min window %d\n", slt_u1min_bit_by_vref[u1ByteIdx][u2FinalVref[u1ByteIdx]] , slt_u1min_winsize_by_vref[u1ByteIdx][u2FinalVref[u1ByteIdx]]));
			mcSHOW_SLT_MSG(("u2g_num_dlycell_perT = %d \n", u2g_num_dlycell_perT));
			if ((slt_u1min_winsize_by_vref[u1ByteIdx][u2FinalVref[u1ByteIdx]]*4) < u2g_num_dlycell_perT)
			{
				mcSHOW_SLT_MSG(("[WARNING] Smaller RX win !!\n"));
				mcSHOW_SLT_MSG(("min RX win < 0.5UI \n"));
				ASSERT(0);
			}
#endif

            mcSHOW_DBG_MSG("\nFinal RX Vref Byte %d = %d", u1ByteIdx, u2FinalVref[u1ByteIdx]);
            mcDUMP_REG_MSG("\nFinal RX Vref Byte %d = %d", u1ByteIdx, u2FinalVref[u1ByteIdx]);

            // When only calibrate RX Vref for Rank 0, apply the same value for Rank 1.
            if (p->rank == RANK_0)
            {
                gFinalRXVrefDQ[p->channel][RANK_0][u1ByteIdx] = (U8) u2FinalVref[u1ByteIdx];
                gFinalRXVrefDQForSpeedUp[p->channel][RANK_0][p->odt_onoff][u1ByteIdx] = (U8) u2FinalVref[u1ByteIdx];
                mcSHOW_DBG_MSG(" to rank0");
                mcDUMP_REG_MSG(" to rank0");
            }
            gFinalRXVrefDQ[p->channel][RANK_1][u1ByteIdx] = (U8) u2FinalVref[u1ByteIdx];
            gFinalRXVrefDQForSpeedUp[p->channel][RANK_1][p->odt_onoff][u1ByteIdx] = (U8) u2FinalVref[u1ByteIdx];
            mcSHOW_DBG_MSG(" to rank1\n");
            mcDUMP_REG_MSG(" to rank1\n");
        }

    #if SUPPORT_SAVE_TIME_FOR_CALIBRATION
        if (p->femmc_Ready == 0)
        {
            p->pSavetimeData->u1RxWinPerbit_DQS[p->channel][p->rank][u1ByteIdx] = (U32)iDQSDlyPerbyte[u1ByteIdx];
        #if (FEATURE_RDDQC_K_DMI == TRUE)
            if (u1CalDQMNum != 0)
        #endif
            {
                p->pSavetimeData->u1RxWinPerbit_DQM[p->channel][p->rank][u1ByteIdx] = (U32)iDQMDlyPerbyte[u1ByteIdx];
            }
        }
    #endif
    }

#if DUMP_TA2_WINDOW_SIZE_RX_TX
    //RX
    if (u1UseTestEngine == PATTERN_TEST_ENGINE)
    {
        U32 u4B0Tatal =0;
        U32 u4B1Tatal =0;
        mcSHOW_DBG_MSG("RX window per bit CH[%d] Rank[%d] window size\n", p->channel, p->rank);
        for (u1BitIdx = 0; u1BitIdx < p->data_width; u1BitIdx++)
        {
            mcSHOW_DBG_MSG("DQ[%d] size = %d\n", u1BitIdx, gFinalRXPerbitWin[p->channel][p->rank][u1BitIdx]);
            if(u1BitIdx < 8)
            {
                u4B0Tatal += gFinalRXPerbitWin[p->channel][p->rank][u1BitIdx];
            }
            else
            {
                u4B1Tatal += gFinalRXPerbitWin[p->channel][p->rank][u1BitIdx];
            }
        }
        mcSHOW_DBG_MSG("total rx window size B0: %d B1: %d\n", u4B0Tatal, u4B1Tatal);
    }
#endif

#if (fcFOR_CHIP_ID == fcGriffin)
	/* For fcCheetah DSC (BGA) pinmux, need swap DQS delay */
	if (p->DRAMPinmux == PINMUX_DSC) {
		S32 tmp;

		tmp = iDQSDlyPerbyte[0];
		iDQSDlyPerbyte[0] = iDQSDlyPerbyte[1];
		iDQSDlyPerbyte[1] = tmp;
	}
#endif

	// set dqs delay, (dqm delay)
	vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B0_RXDLY5),
		P_Fld((U32)iDQSDlyPerbyte[0], SHU_RK_B0_RXDLY5_RX_ARDQS0_R_DLY_B0) |
		P_Fld((U32)iDQSDlyPerbyte[0], SHU_RK_B0_RXDLY5_RX_ARDQS0_F_DLY_B0));
	vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B0_RXDLY4),
		P_Fld((U32)iDQMDlyPerbyte[0], SHU_RK_B0_RXDLY4_RX_ARDQM0_R_DLY_B0) |
		P_Fld((U32)iDQMDlyPerbyte[0], SHU_RK_B0_RXDLY4_RX_ARDQM0_F_DLY_B0));
	vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B1_RXDLY5),
		P_Fld((U32)iDQSDlyPerbyte[1], SHU_RK_B1_RXDLY5_RX_ARDQS0_R_DLY_B1) |
		P_Fld((U32)iDQSDlyPerbyte[1], SHU_RK_B1_RXDLY5_RX_ARDQS0_F_DLY_B1));
	vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B1_RXDLY4),
		P_Fld((U32)iDQMDlyPerbyte[1], SHU_RK_B1_RXDLY4_RX_ARDQM0_R_DLY_B1) |
		P_Fld((U32)iDQMDlyPerbyte[1], SHU_RK_B1_RXDLY4_RX_ARDQM0_F_DLY_B1));

	// set dq delay
	for (u1BitIdx = 0; u1BitIdx < DQS_BIT_NUMBER; u1BitIdx += 2)
	{

		//U8 const *mapping_tbl = get_dq_dramc2phy_mapping(p);

		vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B0_RXDLY0 + u1BitIdx * 2),
			P_Fld(((U32)FinalWinPerBit[u1BitIdx].best_dqdly), SHU_RK_B0_RXDLY0_RX_ARDQ0_R_DLY_B0) |
			P_Fld(((U32)FinalWinPerBit[u1BitIdx].best_dqdly), SHU_RK_B0_RXDLY0_RX_ARDQ0_F_DLY_B0) |
			P_Fld(((U32)FinalWinPerBit[u1BitIdx + 1].best_dqdly), SHU_RK_B0_RXDLY0_RX_ARDQ1_R_DLY_B0) |
			P_Fld(((U32)FinalWinPerBit[u1BitIdx + 1].best_dqdly), SHU_RK_B0_RXDLY0_RX_ARDQ1_F_DLY_B0));

		vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B1_RXDLY0 + u1BitIdx * 2),
			P_Fld((U32)FinalWinPerBit[u1BitIdx + 8].best_dqdly, SHU_RK_B1_RXDLY0_RX_ARDQ0_R_DLY_B1) |
			P_Fld((U32)FinalWinPerBit[u1BitIdx + 8].best_dqdly, SHU_RK_B1_RXDLY0_RX_ARDQ0_F_DLY_B1) |
			P_Fld((U32)FinalWinPerBit[u1BitIdx + 9].best_dqdly, SHU_RK_B1_RXDLY0_RX_ARDQ1_R_DLY_B1) |
			P_Fld((U32)FinalWinPerBit[u1BitIdx + 9].best_dqdly, SHU_RK_B1_RXDLY0_RX_ARDQ1_F_DLY_B1));
}

#if SUPPORT_SAVE_TIME_FOR_CALIBRATION
    if (p->femmc_Ready == 0)
	{
        for (u1BitIdx = 0; u1BitIdx < 16; u1BitIdx++)
    	{
         	p->pSavetimeData->u1RxWinPerbit_DQ[p->channel][p->rank][u1BitIdx] = (U32)FinalWinPerBit[u1BitIdx].best_dqdly;
#if RUNTIME_SHMOO_RELEATED_FUNCTION
         	p->pSavetimeData->u1RxWinPerbitDQ_firsbypass_Save[p->channel][p->rank][u1BitIdx] = (U32)FinalWinPerBit[u1BitIdx].first_pass;
        	p->pSavetimeData->u1RxWinPerbitDQ_lastbypass_Save[p->channel][p->rank][u1BitIdx] = (U32)FinalWinPerBit[u1BitIdx].last_pass;
#endif
      	}
	}
#endif

    DramPhyReset(p);

#if DDR_RDDQC_PINMUX_WORKAROUND
    DramcRestoreRegisters(p, u4RegBackupAddress, sizeof(u4RegBackupAddress) / sizeof(U32), TO_ONE_CHANNEL);
#endif

    vSetRank(p, backup_rank);

    vPrintCalibrationBasicInfo(p);

#ifdef ETT_PRINT_FORMAT
    mcSHOW_DBG_MSG("DQS Delay:\nDQS0 = %d, DQS1 = %d\n"
                    "DQM Delay:\nDQM0 = %d, DQM1 = %d\n",
                        iDQSDlyPerbyte[0], iDQSDlyPerbyte[1],
                        iDQMDlyPerbyte[0], iDQMDlyPerbyte[1]);
    mcDUMP_REG_MSG("DQS Delay:\nDQS0 = %d, DQS1 = %d\n"
                    "DQM Delay:\nDQM0 = %d, DQM1 = %d\n",
                        iDQSDlyPerbyte[0], iDQSDlyPerbyte[1],
                        iDQMDlyPerbyte[0], iDQMDlyPerbyte[1]);
#else
    mcSHOW_DBG_MSG("DQS Delay:\nDQS0 = %2d, DQS1 = %2d\n"
                    "DQM Delay:\nDQM0 = %2d, DQM1 = %2d\n",
                        iDQSDlyPerbyte[0], iDQSDlyPerbyte[1],
                        iDQMDlyPerbyte[0], iDQMDlyPerbyte[1]);
    mcDUMP_REG_MSG("DQS Delay:\nDQS0 = %2d, DQS1 = %2d\n"
                    "DQM Delay:\nDQM0 = %2d, DQM1 = %2d\n",
                        iDQSDlyPerbyte[0], iDQSDlyPerbyte[1],
                        iDQMDlyPerbyte[0], iDQMDlyPerbyte[1]);
#endif
    mcSHOW_DBG_MSG("DQ Delay:\n");
    mcDUMP_REG_MSG("DQ Delay:\n");

    for (u1BitIdx = 0; u1BitIdx < p->data_width; u1BitIdx = u1BitIdx + 4)
    {
#ifdef ETT_PRINT_FORMAT
        mcSHOW_DBG_MSG("DQ%d =%d, DQ%d =%d, DQ%d =%d, DQ%d =%d\n", u1BitIdx, FinalWinPerBit[u1BitIdx].best_dqdly, u1BitIdx+1, FinalWinPerBit[u1BitIdx+1].best_dqdly, u1BitIdx+2, FinalWinPerBit[u1BitIdx+2].best_dqdly, u1BitIdx+3, FinalWinPerBit[u1BitIdx+3].best_dqdly);
        mcDUMP_REG_MSG("DQ%d =%d, DQ%d =%d, DQ%d =%d, DQ%d =%d\n", u1BitIdx, FinalWinPerBit[u1BitIdx].best_dqdly, u1BitIdx+1, FinalWinPerBit[u1BitIdx+1].best_dqdly, u1BitIdx+2, FinalWinPerBit[u1BitIdx+2].best_dqdly, u1BitIdx+3, FinalWinPerBit[u1BitIdx+3].best_dqdly);
#else
        mcSHOW_DBG_MSG("DQ%2d =%2d, DQ%2d =%2d, DQ%2d =%2d, DQ%2d =%2d\n", u1BitIdx, FinalWinPerBit[u1BitIdx].best_dqdly, u1BitIdx+1, FinalWinPerBit[u1BitIdx+1].best_dqdly, u1BitIdx+2, FinalWinPerBit[u1BitIdx+2].best_dqdly, u1BitIdx+3, FinalWinPerBit[u1BitIdx+3].best_dqdly);
        mcDUMP_REG_MSG("DQ%2d =%2d, DQ%2d =%2d, DQ%2d =%2d, DQ%2d =%2d\n", u1BitIdx, FinalWinPerBit[u1BitIdx].best_dqdly, u1BitIdx+1, FinalWinPerBit[u1BitIdx+1].best_dqdly, u1BitIdx+2, FinalWinPerBit[u1BitIdx+2].best_dqdly, u1BitIdx+3, FinalWinPerBit[u1BitIdx+3].best_dqdly);
#endif
    }
    mcSHOW_DBG_MSG("\n\n");
    mcSHOW_DBG_MSG3("[DramcRxWindowPerbitCal] Done\n");
#if __SLT__
    mcSHOW_PARSER_MSG(("\n"));
    mcSHOW_PARSER_MSG(("[DramcRxWindowPerbitCal] Done\n\n"));
#endif
    return DRAM_OK;

    // Log example  ==> Neec to update
    /*
------------------------------------------------------
Start calculate dq time and dqs time /
Find max DQS delay per byte / Adjust DQ delay to align DQS...
------------------------------------------------------
bit# 0 : dq time=11 dqs time= 8
bit# 1 : dq time=11 dqs time= 8
bit# 2 : dq time=11 dqs time= 6
bit# 3 : dq time=10 dqs time= 8
bit# 4 : dq time=11 dqs time= 8
bit# 5 : dq time=10 dqs time= 8
bit# 6 : dq time=11 dqs time= 8
bit# 7 : dq time= 9 dqs time= 6
----seperate line----
bit# 8 : dq time=12 dqs time= 7
bit# 9 : dq time=10 dqs time= 8
bit#10 : dq time=11 dqs time= 8
bit#11 : dq time=10 dqs time= 8
bit#12 : dq time=11 dqs time= 8
bit#13 : dq time=11 dqs time= 8
bit#14 : dq time=11 dqs time= 8
bit#15 : dq time=12 dqs time= 8
----seperate line----
bit#16 : dq time=11 dqs time= 7
bit#17 : dq time=10 dqs time= 8
bit#18 : dq time=11 dqs time= 7
bit#19 : dq time=11 dqs time= 6
bit#20 : dq time=10 dqs time= 9
bit#21 : dq time=11 dqs time=10
bit#22 : dq time=11 dqs time=10
bit#23 : dq time= 9 dqs time= 9
----seperate line----
bit#24 : dq time=12 dqs time= 6
bit#25 : dq time=13 dqs time= 6
bit#26 : dq time=13 dqs time= 7
bit#27 : dq time=11 dqs time= 7
bit#28 : dq time=12 dqs time= 8
bit#29 : dq time=10 dqs time= 8
bit#30 : dq time=13 dqs time= 7
bit#31 : dq time=11 dqs time= 8
----seperate line----
==================================================
    dramc_rxdqs_perbit_swcal_v2
    channel=2(2:cha, 3:chb) apply = 1
==================================================
DQS Delay :
 DQS0 = 0 DQS1 = 0 DQS2 = 0 DQS3 = 0
DQ Delay :
DQ 0 =  1 DQ 1 =  1 DQ 2 =  2 DQ 3 =  1
DQ 4 =  1 DQ 5 =  1 DQ 6 =  1 DQ 7 =  1
DQ 8 =  2 DQ 9 =  1 DQ10 =  1 DQ11 =  1
DQ12 =  1 DQ13 =  1 DQ14 =  1 DQ15 =  2
DQ16 =  2 DQ17 =  1 DQ18 =  2 DQ19 =  2
DQ20 =  0 DQ21 =  0 DQ22 =  0 DQ23 =  0
DQ24 =  3 DQ25 =  3 DQ26 =  3 DQ27 =  2
DQ28 =  2 DQ29 =  1 DQ30 =  3 DQ31 =  1
_______________________________________________________________
   */
}

#if ENABLE_DDR_CS_ADJUST
#define MAX_CS_PI_DELAY         63

struct cmd_delay {
    U8 cs_ui[2];
    U16 cs_pi[2];
    U8 cmd_ui[2];
    U16 cmd_pi[2];
};

static void get_cmd_delay(DRAMC_CTX_T *p, struct cmd_delay *delay)
{
    U8 ch;
    U8 ch_max;

    ch_max = p->support_channel_num;
#if (fcFOR_CHIP_ID == fcGriffin)
    ch_max++;
#endif

    for (ch = CHANNEL_A; ch < ch_max; ch++) {
        channel_backup_and_set(p, ch);
        delay->cs_pi[ch] = u4IO32ReadFldAlign(DDRPHY_REG_SHU_RK_CA_CMD, SHU_RK_CA_CMD_RG_ARPI_CS);
        delay->cmd_pi[ch] = u4IO32ReadFldAlign(DDRPHY_REG_SHU_RK_CA_CMD, SHU_RK_CA_CMD_RG_ARPI_CMD);
        channel_restore(p);
    }

    delay->cs_ui[CHANNEL_A] = u4IO32ReadFldAlign(DRAMC_REG_SHU_SELPH_CA5, SHU_SELPH_CA5_DLY_CS);

    /* All other CMD UI shall be the same */
    delay->cmd_ui[CHANNEL_A] = u4IO32ReadFldAlign(DRAMC_REG_SHU_SELPH_CA5, SHU_SELPH_CA5_DLY_WE);
}

static void update_cs_delay(DRAMC_CTX_T *p, struct cmd_delay *orig_delay, U16 pi_offset, U8 to_default)
{
    U16 pi_tmp, ui_tmp;

    if (to_default) {
        if (p->DRAMPinmux == PINMUX_DSC)
            pi_tmp = orig_delay->cs_pi[CHANNEL_B];
        else
            pi_tmp = orig_delay->cs_pi[CHANNEL_A];
        ui_tmp = 0;
    } else {
        pi_tmp = pi_offset % 64;
        ui_tmp = (pi_offset / 64) * 2;
    }

    if (p->DRAMPinmux == PINMUX_DSC) {
        channel_backup_and_set(p, CHANNEL_B);
        vIO32WriteFldAlign(DDRPHY_REG_SHU_RK_CA_CMD, pi_tmp, SHU_RK_CA_CMD_RG_ARPI_CS);
        channel_restore(p);
    } else {
        vIO32WriteFldAlign(DDRPHY_REG_SHU_RK_CA_CMD, pi_tmp, SHU_RK_CA_CMD_RG_ARPI_CS);
    }
    vIO32WriteFldMulti(DRAMC_REG_ADDR(DRAMC_REG_SHU_SELPH_CA5),
        P_Fld(orig_delay->cs_ui[CHANNEL_A] + ui_tmp, SHU_SELPH_CA5_DLY_CS     ) |
        P_Fld(orig_delay->cs_ui[CHANNEL_A] + ui_tmp, SHU_SELPH_CA5_DLY_CKE    ) |
        P_Fld(orig_delay->cs_ui[CHANNEL_A] + ui_tmp, SHU_SELPH_CA5_DLY_ODT    ) |
        P_Fld(orig_delay->cs_ui[CHANNEL_A] + ui_tmp, SHU_SELPH_CA5_DLY_RESET  ) |
        P_Fld(orig_delay->cs_ui[CHANNEL_A] + ui_tmp, SHU_SELPH_CA5_DLY_CS1    ));
        
    vIO32WriteFldMulti(DRAMC_REG_ADDR(DRAMC_REG_SHU_SELPH_CA6),
        P_Fld(orig_delay->cs_ui[CHANNEL_A] + ui_tmp, SHU_SELPH_CA6_DLY_CKE1   ));
}

static void update_cmd_delay(DRAMC_CTX_T *p, struct cmd_delay *orig_delay, U16 pi_offset, U8 to_default)
{
    U16 pi_tmp, ui_tmp;

    if (to_default) {
        pi_tmp = orig_delay->cmd_pi[CHANNEL_A];
        ui_tmp = 0;
    } else {
        pi_tmp = pi_offset % 64;
        ui_tmp = (pi_offset / 64) * 2;
    }

    vIO32WriteFldAlign_All(DDRPHY_REG_SHU_RK_CA_CMD, pi_tmp, SHU_RK_CA_CMD_RG_ARPI_CMD);
	channel_backup_and_set(p, CHANNEL_B);
	vIO32WriteFldAlign(DDRPHY_REG_SHU_RK_CA_CMD, pi_tmp, SHU_RK_CA_CMD_RG_ARPI_CLK);
	channel_restore(p);
    if (p->DRAMPinmux == PINMUX_DSC) {
        vIO32WriteFldAlign(DDRPHY_REG_SHU_RK_CA_CMD, pi_tmp, SHU_RK_CA_CMD_RG_ARPI_CS);
    } else {
        channel_backup_and_set(p, CHANNEL_B);
        vIO32WriteFldAlign(DDRPHY_REG_SHU_RK_CA_CMD, pi_tmp, SHU_RK_CA_CMD_RG_ARPI_CS);
        channel_restore(p);
    }
    vIO32WriteFldMulti(DRAMC_REG_ADDR(DRAMC_REG_SHU_SELPH_CA5),
        P_Fld(orig_delay->cmd_ui[CHANNEL_A] + ui_tmp, SHU_SELPH_CA5_DLY_WE     ) |
        P_Fld(orig_delay->cmd_ui[CHANNEL_A] + ui_tmp, SHU_SELPH_CA5_DLY_CAS    ) |
        P_Fld(orig_delay->cmd_ui[CHANNEL_A] + ui_tmp, SHU_SELPH_CA5_DLY_RAS    ));
        
    vIO32WriteFldMulti(DRAMC_REG_ADDR(DRAMC_REG_SHU_SELPH_CA6),
        P_Fld(orig_delay->cmd_ui[CHANNEL_A] + ui_tmp, SHU_SELPH_CA6_DLY_BA0    ) |
        P_Fld(orig_delay->cmd_ui[CHANNEL_A] + ui_tmp, SHU_SELPH_CA6_DLY_BA1    ) |
        P_Fld(orig_delay->cmd_ui[CHANNEL_A] + ui_tmp, SHU_SELPH_CA6_DLY_BA2    ));

    vIO32WriteFldMulti(DRAMC_REG_ADDR(DRAMC_REG_SHU_SELPH_CA7),
        P_Fld(orig_delay->cmd_ui[CHANNEL_A] + ui_tmp, SHU_SELPH_CA7_DLY_RA0    ) |
        P_Fld(orig_delay->cmd_ui[CHANNEL_A] + ui_tmp, SHU_SELPH_CA7_DLY_RA1    ) |
        P_Fld(orig_delay->cmd_ui[CHANNEL_A] + ui_tmp, SHU_SELPH_CA7_DLY_RA2    ) |
        P_Fld(orig_delay->cmd_ui[CHANNEL_A] + ui_tmp, SHU_SELPH_CA7_DLY_RA3    ) |
        P_Fld(orig_delay->cmd_ui[CHANNEL_A] + ui_tmp, SHU_SELPH_CA7_DLY_RA4    ) |
        P_Fld(orig_delay->cmd_ui[CHANNEL_A] + ui_tmp, SHU_SELPH_CA7_DLY_RA5    ) |
        P_Fld(orig_delay->cmd_ui[CHANNEL_A] + ui_tmp, SHU_SELPH_CA7_DLY_RA6    ) |
        P_Fld(orig_delay->cmd_ui[CHANNEL_A] + ui_tmp, SHU_SELPH_CA7_DLY_RA7    ));
    
    vIO32WriteFldMulti(DRAMC_REG_ADDR(DRAMC_REG_SHU_SELPH_CA8),
        P_Fld(orig_delay->cmd_ui[CHANNEL_A] + ui_tmp, SHU_SELPH_CA8_DLY_RA8    ) |
        P_Fld(orig_delay->cmd_ui[CHANNEL_A] + ui_tmp, SHU_SELPH_CA8_DLY_RA9    ) |
        P_Fld(orig_delay->cmd_ui[CHANNEL_A] + ui_tmp, SHU_SELPH_CA8_DLY_RA10   ) |
        P_Fld(orig_delay->cmd_ui[CHANNEL_A] + ui_tmp, SHU_SELPH_CA8_DLY_RA11   ) |
        P_Fld(orig_delay->cmd_ui[CHANNEL_A] + ui_tmp, SHU_SELPH_CA8_DLY_RA12   ) |
        P_Fld(orig_delay->cmd_ui[CHANNEL_A] + ui_tmp, SHU_SELPH_CA8_DLY_RA13   ) |
        P_Fld(orig_delay->cmd_ui[CHANNEL_A] + ui_tmp, SHU_SELPH_CA8_DLY_RA14   ) |
        P_Fld(orig_delay->cmd_ui[CHANNEL_A] + ui_tmp, SHU_SELPH_CA8_DLY_RA15   ));
}

void DramModeRegReInit(DRAMC_CTX_T *p)
{
    vIO32WriteFldAlign_All(DRAMC_REG_ADDR(DDRPHY_REG_MISC_CTRL1), 0, MISC_CTRL1_R_DMDA_RRESETB_I);
    CKEFixOnOff(p, TO_ALL_RANK, CKE_FIXOFF, TO_ALL_CHANNEL);
    mcDELAY_US(200);
    vIO32WriteFldAlign_All(DRAMC_REG_ADDR(DDRPHY_REG_MISC_CTRL1), 1, MISC_CTRL1_R_DMDA_RRESETB_I);
    mcDELAY_US(500);
    CKEFixOnOff(p, TO_ALL_RANK, CKE_FIXON, TO_ALL_CHANNEL);

    if (is_ddr3_family(p)) {
        DramcModeRegWriteByRank(p, p->rank, 2, gMRVal[p->channel][p->rank].mr02);
        DramcModeRegWriteByRank(p, p->rank, 3, gMRVal[p->channel][p->rank].mr03);
        DramcModeRegWriteByRank(p, p->rank, 1, gMRVal[p->channel][p->rank].mr01);
        DramcModeRegWriteByRank(p, p->rank, 0, gMRVal[p->channel][p->rank].mr00);
        DDR3_SWZQ(p, p->rank);
    } else {
        DramcModeRegWriteByRank(p, p->rank, 2, gMRVal[p->channel][p->rank].mr02);
        DramcModeRegWriteByRank(p, p->rank, 3, gMRVal[p->channel][p->rank].mr03);
        DramcModeRegWriteByRank(p, p->rank, 1, gMRVal[p->channel][p->rank].mr01);
        DramcModeRegWriteByRank(p, p->rank, 4, gMRVal[p->channel][p->rank].mr04);
        DramcModeRegWriteByRank(p, p->rank, 5, gMRVal[p->channel][p->rank].mr05);
        DramcModeRegWriteByRank(p, p->rank, 6, gMRVal[p->channel][p->rank].mr06 | 0x80);
        mcDELAY_US(1);
        DramcModeRegWriteByRank(p, p->rank, 6, gMRVal[p->channel][p->rank].mr06 | 0x80);
        mcDELAY_US(1);
        DramcModeRegWriteByRank(p, p->rank, 6, gMRVal[p->channel][p->rank].mr06);
        DramcModeRegWriteByRank(p, p->rank, 0, gMRVal[p->channel][p->rank].mr00);
        DDR4_SWZQ(p, p->rank);
    }

    mcDELAY_US(1); /* for ZQ */
}

void DramcAdjustCommandBus(DRAMC_CTX_T *p)
{
    U8 pi_start, pi_end, pi_step;
    U8 first_pass, last_pass, win_size;
    U8 delay, final_delay;
    U32 compare_result;
    struct cmd_delay default_delay;

    vPrintCalibrationBasicInfo(p);

    DramcEngine2Init(p, p->test2_1, p->test2_2, p->test_pattern, 0, TE_UI_SHIFT);//UI_SHIFT + LEN1

    /* Backup */
    get_cmd_delay(p, &default_delay);

    pi_start = 0;
    pi_end = 127;
#if FOR_DV_SIMULATION_USED
	pi_step = 16;
#else
    pi_step = 1;
#endif

    first_pass = 0xff;
    last_pass = 0xff;

    mcSHOW_DBG_MSG("%s: CS Adjust start\n", __func__);

    for (delay = pi_start; delay < pi_end; delay += pi_step) {
        update_cs_delay(p, &default_delay, delay, FALSE);
        DramPhyReset(p);

        compare_result = DramcEngine2Run(p, TE_OP_WRITE_READ_CHECK, p->test_pattern);

        mcSHOW_DBG_MSG("%d: result: 0x%x\n", delay, compare_result);

        if (compare_result) {
            update_cs_delay(p, &default_delay, 0, TRUE);
            DramModeRegReInit(p);
        }

        if (compare_result == 0) {
            if (first_pass == 0xff)
                first_pass = delay;

            if (delay >= pi_end)
                last_pass = delay - pi_step;
        } else if (first_pass != 0xff) {
                if (delay - first_pass < 20) {
                    first_pass = 0xff;
                } else {
                    last_pass = delay;
                    break;
                }
        }
    }

    if (first_pass != 0xff && last_pass != 0xff) {
        win_size = (last_pass - first_pass) + 1;
        final_delay = (first_pass + last_pass) >> 1;

        mcSHOW_DBG_MSG("%s: CS win_size = %d, final_delay = %d\n", __func__,
            win_size, final_delay);

        update_cs_delay(p, &default_delay, final_delay, FALSE);
        } else {
        mcSHOW_DBG_MSG("%s: CS Window not found, restore default value\n", __func__);
        update_cs_delay(p, &default_delay, 0, TRUE);
        }

    /* CMD */
    last_pass = 0xff;
    first_pass = 0xff;

    mcSHOW_DBG_MSG("%s: CMD Adjust start\n", __func__);

    for (delay = pi_start; delay < pi_end; delay += pi_step) {
        update_cmd_delay(p, &default_delay, delay, FALSE);
        DramPhyReset(p);

        compare_result = DramcEngine2Run(p, TE_OP_WRITE_READ_CHECK, p->test_pattern);

        mcSHOW_DBG_MSG("%d: result: 0x%x\n", delay, compare_result);

        if (compare_result) {
            update_cmd_delay(p, &default_delay, 0, TRUE);
            DramModeRegReInit(p);
        }

        if (compare_result == 0) {
            if (first_pass == 0xff)
                first_pass = delay;

            if (delay >= pi_end)
                last_pass = delay - pi_step;
        } else if (first_pass != 0xff) {
                if (delay - first_pass < 20) {
                    first_pass = 0xff;
                } else {
                    last_pass = delay;
                    break;
                }
        }
    }

    if (first_pass != 0xff && last_pass != 0xff) {
        win_size = (last_pass - first_pass) + 1;
        final_delay = (first_pass + last_pass) >> 1;

        mcSHOW_DBG_MSG("%s: CMD win_size = %d, final_delay = %d\n", __func__,
            win_size, final_delay);

        update_cmd_delay(p, &default_delay, final_delay, FALSE);
        } else {
        mcSHOW_DBG_MSG("%s: CMD Window not found, restore default value\n", __func__);
        update_cmd_delay(p, &default_delay, 0, TRUE);
    }

    DramPhyReset(p);
    DramcEngine2End(p);
}
#endif

#if DDR_ENABLE_RX_DVS_CAL
static U8 DramcRxDVSCal(DRAMC_CTX_T *p, U8 u1byte)
{
    U8 u1rising_lead, u1falling_lead, u1rising_lag, u1falling_lag, u1lead_lag;

    if (u1byte == 0)
    {
        u1rising_lead = u4IO32ReadFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_MISC_FT_STATUS0), MISC_FT_STATUS0_AD_RX_ARDQ_DVS_R_LEAD_B0);
        u1falling_lead = u4IO32ReadFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_MISC_FT_STATUS1), MISC_FT_STATUS1_AD_RX_ARDQ_DVS_F_LEAD_B0);
        u1rising_lag = u4IO32ReadFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_MISC_FT_STATUS0), MISC_FT_STATUS0_AD_RX_ARDQ_DVS_R_LAG_B0);
        u1falling_lag = u4IO32ReadFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_MISC_FT_STATUS1), MISC_FT_STATUS1_AD_RX_ARDQ_DVS_F_LAG_B0);
    }
	else //byte1
    {
        u1rising_lead = u4IO32ReadFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_MISC_FT_STATUS0), MISC_FT_STATUS0_AD_RX_ARDQ_DVS_R_LAG_B1);
        u1falling_lead = u4IO32ReadFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_MISC_FT_STATUS1), MISC_FT_STATUS1_AD_RX_ARDQ_DVS_F_LEAD_B1);
        u1rising_lag = u4IO32ReadFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_MISC_FT_STATUS0), MISC_FT_STATUS0_AD_RX_ARDQ_DVS_R_LAG_B1);
        u1falling_lag = u4IO32ReadFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_MISC_FT_STATUS1), MISC_FT_STATUS1_AD_RX_ARDQ_DVS_F_LAG_B1);
    }

    mcSHOW_DBG_MSG2("Byte%d | LEAD(%d %d) | LAG(%d %d)\n", u1byte, u1rising_lead, u1falling_lead, u1rising_lag, u1falling_lag);

    u1lead_lag = (u1rising_lead | u1falling_lead | u1rising_lag | u1falling_lag);

    return u1lead_lag;
}

DRAM_STATUS_T DramcRxDVSWindowCal(DRAMC_CTX_T *p)
{
    U8 ii, u1ByteIdx;
    S16 iDelay = 0, S16DelayBegin = 0;
    U16 u16DelayEnd = 0, u16DelayStep = 1;
    U32 u4err_value;

    U8 u1lead_lag, u1DVS_first_flag[DQS_NUMBER]={0}, u1DVS_first_pass[DQS_NUMBER]={0}, u1DVS_pass_window[DQS_NUMBER]={0}, u1finish_flag[DQS_NUMBER]={0};
    U32 u4RegBackupAddress[] =
    {
        (DRAMC_REG_ADDR(DDRPHY_REG_SHU_B0_DQ11)),
        (DRAMC_REG_ADDR(DDRPHY_REG_SHU_B1_DQ11)),
        (DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B0_RXDLY0)),
        (DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B1_RXDLY0)),
        (DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B0_RXDLY5)),
        (DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B1_RXDLY5)),
        (DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B0_RXDLY4)),
        (DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B1_RXDLY4)),
    };

    // error handling
    if (!p)
    {
        mcSHOW_ERR_MSG("context NULL\n");
        return DRAM_FAIL;
    }

    mcSHOW_DBG_MSG("\n RX DVS calibration \n");

    //When doing RxWindowPerbitCal, should make sure that auto refresh is disable
    vAutoRefreshSwitch(p, DISABLE);
    //CKEFixOnOff(p, p->rank, CKE_FIXON, CKE_WRITE_TO_ONE_CHANNEL);

    vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_B0_DQ11), 1, SHU_B0_DQ11_RG_RX_ARDQ_DVS_EN_B0);
    vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_B1_DQ11), 1, SHU_B1_DQ11_RG_RX_ARDQ_DVS_EN_B1);

    //defult set result fail. When window found, update the result as oK
#if ENABLE_K_WITH_WORST_SI_UI_SHIFT
    DramcEngine2Init(p, p->test2_1, p->test2_2, p->test_pattern, 0, TE_UI_SHIFT);//PIC Need to check if need to use UI_SHIFT;//UI_SHIFT + LEN1
#else
    DramcEngine2Init(p, p->test2_1, p->test2_2, TEST_XTALK_PATTERN, 0, TE_NO_UI_SHIFT);
#endif


    u16DelayStep = 4;
    // Just for DV SIM test
    S16DelayBegin = -80;
    u16DelayEnd = 100;

    mcSHOW_DBG_MSG("\nRX Delay %d -> %d, step: %d\n", S16DelayBegin, u16DelayEnd, u16DelayStep);

    {
        // Adjust DQM output delay to 0
        vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B0_RXDLY4),
                P_Fld(0, SHU_RK_B0_RXDLY4_RX_ARDQM0_R_DLY_B0) |
                P_Fld(0, SHU_RK_B0_RXDLY4_RX_ARDQM0_F_DLY_B0));
        vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B1_RXDLY4),
                P_Fld(0, SHU_RK_B1_RXDLY4_RX_ARDQM0_R_DLY_B1) |
                P_Fld(0, SHU_RK_B1_RXDLY4_RX_ARDQM0_F_DLY_B1));

        // Adjust DQ output delay to 0
        //every 2bit dq have the same delay register address
        for (ii = 0; ii < 4; ii++)
            SetRxDqDelay(p, ii, 0);
        {
            // non-autok flow
            for (iDelay = S16DelayBegin; iDelay <= u16DelayEnd; iDelay += u16DelayStep)
            {
                SetRxDqDqsDelay(p, iDelay);

                u4err_value = DramcEngine2Run(p, TE_OP_WRITE_READ_CHECK, p->test_pattern);

                mcSHOW_DBG_MSG2("iDelay= %d, err_value: 0x%x", iDelay, u4err_value);

                for(u1ByteIdx=0; u1ByteIdx<(p->data_width/DQS_BIT_NUMBER); u1ByteIdx++)
                {
                    u1lead_lag = DramcRxDVSCal(p, u1ByteIdx);

                    if ((u1lead_lag == 0) && (u1DVS_first_flag[u1ByteIdx] == 0) && (((u4err_value >> (u1ByteIdx<<3)) & 0xff) == 0))
                    {
                        u1DVS_first_pass[u1ByteIdx] = iDelay;
                        u1DVS_first_flag[u1ByteIdx] = 1;
						mcSHOW_DBG_MSG("Byte%d find first pass delay\n"))
                    }
                    else if (((u1lead_lag == 1) || (((u4err_value >> (u1ByteIdx<<3)) & 0xff) != 0)) && (u1DVS_first_flag[u1ByteIdx] == 1) && (u1finish_flag[u1ByteIdx] == 0))
                    {
                        u1DVS_pass_window[u1ByteIdx] = iDelay - u1DVS_first_pass[u1ByteIdx] - u16DelayStep;

                        if (u1DVS_pass_window[u1ByteIdx] < 7) //if window size bigger than 7, consider as real pass window.
                        {
                            u1DVS_pass_window[u1ByteIdx] = 0;
                            u1DVS_first_flag[u1ByteIdx] = 0;
                            mcSHOW_DBG_MSG("Byte%d find fake window\n"))
                        }
						else
                        {
                             u1finish_flag[u1ByteIdx] = 1;
                             mcSHOW_DBG_MSG("Byte%d find pass window\n"))
                        }
                    }
                }

                if ((u1finish_flag[0]==1) && (u1finish_flag[1]==1))
                {
                    mcSHOW_DBG_MSG("Two byte DVS window find, early break!\n");
                    break;
                }
            }
        }
    }

    DramcEngine2End(p);

    for (u1ByteIdx = 0; u1ByteIdx < (p->data_width / DQS_BIT_NUMBER); u1ByteIdx++)
    {
        u1DVS_increase[p->rank][u1ByteIdx] = (u1DVS_pass_window[u1ByteIdx] > 8)? ((u1DVS_pass_window[u1ByteIdx] - 8) >> 3): 0;
        mcSHOW_DBG_MSG("\nByte %d final DVS window size(M) %d, DVS increase %d\n", u1ByteIdx, u1DVS_pass_window[u1ByteIdx], u1DVS_increase[p->rank][u1ByteIdx]);
    }

    DramcRestoreRegisters(p, u4RegBackupAddress, sizeof(u4RegBackupAddress) / sizeof(U32);

    vAutoRefreshSwitch(p, ENABLE);

    DramPhyReset(p);

    vPrintCalibrationBasicInfo(p);

    mcSHOW_DBG_MSG("\n\n");
    mcSHOW_DBG_MSG3("[DramcRxDVSWindowCal] Done\n");

return DRAM_OK;
}

void DramcDramcRxDVSCalPostProcess(DRAMC_CTX_T *p)
{
    U8 rank_i, u1ByteIdx, u1DVS_increase_final, u1DVS_dly_final[DQS_NUMBER]={0};
    U8 backup_rank = p->rank;

    for (u1ByteIdx = 0; u1ByteIdx < (p->data_width / DQS_BIT_NUMBER); u1ByteIdx++)
    {
        if (p->support_rank_num == RANK_DUAL)
            u1DVS_increase_final = (u1DVS_increase[RANK_0][u1ByteIdx] < u1DVS_increase[RANK_1][u1ByteIdx])? u1DVS_increase[RANK_0][u1ByteIdx] : u1DVS_increase[RANK_1][u1ByteIdx];
        else
            u1DVS_increase_final = u1DVS_increase[p->rank][u1ByteIdx];

        if (u1ByteIdx == 0)
        {
            u1DVS_dly_final[u1ByteIdx] = u1DVS_increase_final + (u4IO32ReadFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_B0_DQ11), SHU_B0_DQ11_RG_RX_ARDQ_DVS_DLY_B0));
            vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_B0_DQ11), u1DVS_dly_final[u1ByteIdx], SHU_B0_DQ11_RG_RX_ARDQ_DVS_DLY_B0);
        }
		else //byte1
	    {
	        u1DVS_dly_final[u1ByteIdx] = u1DVS_increase_final + (u4IO32ReadFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_B1_DQ11), SHU_B1_DQ11_RG_RX_ARDQ_DVS_DLY_B1));
	        vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_B1_DQ11), u1DVS_dly_final[u1ByteIdx], SHU_B1_DQ11_RG_RX_ARDQ_DVS_DLY_B1);
	    }
        mcSHOW_DBG_MSG("Byte%d final DVS delay: %d\n", u1ByteIdx, u1DVS_dly_final[u1ByteIdx]);
    }

    for(rank_i=RANK_0; rank_i< p->support_rank_num; rank_i++)
    {
        vSetRank(p, rank_i);
        DramcRxWindowPerbitCal(p, PATTERN_TEST_ENGINE, DVS_CAL_KEEP_VREF, AUTOK_OFF);
    }

    if ((DramcRxDVSCal(p, 0) == 1) || (DramcRxDVSCal(p, 1) == 1)) //Prevent set wrong DV dly
    {
        mcSHOW_ERR_MSG("Final DVS delay is out of RX window\n");
        for (u1ByteIdx = 0; u1ByteIdx < (p->data_width / DQS_BIT_NUMBER); u1ByteIdx++)
        {
            if (u1DVS_dly_final[u1ByteIdx] > 0)
            {
                u1DVS_dly_final[u1ByteIdx] -= 1;
                if (u1ByteIdx == 0)
                {
                    vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_B0_DQ11), u1DVS_dly_final[u1ByteIdx], SHU_B0_DQ11_RG_RX_ARDQ_DVS_DLY_B0);
                }
		        else //byte1
	            {
	                vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_B1_DQ11), u1DVS_dly_final[u1ByteIdx], SHU_B1_DQ11_RG_RX_ARDQ_DVS_DLY_B1);
	            }
            }
            for(rank_i=RANK_0; rank_i< p->support_rank_num; rank_i++)
            {
                vSetRank(p, rank_i);
                DramcRxWindowPerbitCal(p, PATTERN_TEST_ENGINE, DVS_CAL_KEEP_VREF, AUTOK_OFF);
            }
        }
    }

    vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_B0_DQ11), 1, SHU_B0_DQ11_RG_RX_ARDQ_DVS_EN_B0);
    vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_B1_DQ11), 1, SHU_B1_DQ11_RG_RX_ARDQ_DVS_EN_B1);

    vSetRank(p, backup_rank);
}
#endif

#if DDR_ENABLE_DATLAT_CAL
void dle_factor_handler(DRAMC_CTX_T *p, U8 curr_val)
{
    U8 u1DATLAT_DSEL = 0;
    U8 u1DLECG_OptionEXT1 = 0;
    U8 u1DLECG_OptionEXT2 = 0;
    U8 u1DLECG_OptionEXT3 = 0;

    // If (RX_PIPE_BYPASS_ENABLE == 1) bypass RX PIPE, so RG_DATLAT_DSEL = RG_DATLAT
    // else RG_DATLAT_DSEL = RG_DATLAT - 1
    if (u4IO32ReadFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_MISC_RX_PIPE_CTRL), SHU_MISC_RX_PIPE_CTRL_RX_PIPE_BYPASS_EN))
    {
        u1DATLAT_DSEL = curr_val;
    }
    else
    {
        if (curr_val < 1)
            u1DATLAT_DSEL = curr_val;
        else
            u1DATLAT_DSEL = curr_val - 1;
    }

//    mcSHOW_DBG_MSG("DATLAT: %d, u1DATLAT_DSEL: %d\n", curr_val, u1DATLAT_DSEL);

    vIO32WriteFldMulti_All(DRAMC_REG_ADDR(DDRPHY_REG_MISC_SHU_RDAT),
            P_Fld(curr_val, MISC_SHU_RDAT_DATLAT) |
            P_Fld(u1DATLAT_DSEL, MISC_SHU_RDAT_DATLAT_DSEL) |
            P_Fld(u1DATLAT_DSEL, MISC_SHU_RDAT_DATLAT_DSEL_PHY));

    // Had been adjusted for 868 already.
    //(>=8 & <14) set EXT1 =1, EXT2=0, EXT3=0
    //(>= 14 & <19) set EXT1=1, EXT2=1, EXT3=0
    //(>=19) set EXT1=1, EXT2=1, EXT3=1
    u1DLECG_OptionEXT1 = (curr_val >= 8)? (1): (0);
    u1DLECG_OptionEXT2 = (curr_val >= 14)? (1): (0);
    u1DLECG_OptionEXT3 = (curr_val >= 19)? (1): (0);
//    mcSHOW_DBG_MSG("u1DLECG_OptionEXT1: %d, 2 for %d, 3 for %d\n", u1DLECG_OptionEXT1, u1DLECG_OptionEXT2, u1DLECG_OptionEXT3);
    vIO32WriteFldMulti_All(DRAMC_REG_ADDR(DRAMC_REG_SHU_RX_CG_SET0),
            P_Fld(u1DLECG_OptionEXT1, SHU_RX_CG_SET0_READ_START_EXTEND1) |
            P_Fld(u1DLECG_OptionEXT1, SHU_RX_CG_SET0_DLE_LAST_EXTEND1) |
            P_Fld((u1DLECG_OptionEXT2), SHU_RX_CG_SET0_READ_START_EXTEND2) |
            P_Fld((u1DLECG_OptionEXT2), SHU_RX_CG_SET0_DLE_LAST_EXTEND2) |
            P_Fld((u1DLECG_OptionEXT3), SHU_RX_CG_SET0_READ_START_EXTEND3) |
            P_Fld((u1DLECG_OptionEXT3), SHU_RX_CG_SET0_DLE_LAST_EXTEND3));

    DramPhyReset(p);

}

static U8 aru1RxDatlatResult[RANK_MAX];
DRAM_STATUS_T DramcRxdatlatCal(DRAMC_CTX_T *p)
{
    U8 ii;// ucStartCalVal = 0;
    U32 u4prv_register_080;
    U32 u4err_value = 0xffffffff;
    U8 ucfirst, ucbegin, ucsum, ucbest_step;// ucpipe_num = 0;
    U16 u2DatlatBegin;

    // error handling
    if (!p)
    {
        mcSHOW_ERR_MSG("context NULL\n");
        return DRAM_FAIL;
    }

    mcDUMP_REG_MSG("\n[dumpRG] DramcRxdatlatCal\n");
    #if VENDER_JV_LOG
    vPrintCalibrationBasicInfo_ForJV(p);
    #else
    vPrintCalibrationBasicInfo(p);
    #endif

    vAutoRefreshSwitch(p, ENABLE);
    // pre-save
    // 0x07c[6:4]   DATLAT bit2-bit0
    u4prv_register_080 = u4IO32Read4B(DRAMC_REG_ADDR(DDRPHY_REG_MISC_SHU_RDAT));

    //default set FAIL
    vSetCalibrationResult(p, DRAM_CALIBRATION_DATLAT, DRAM_FAIL);

    // init best_step to default
    ucbest_step = (U8) u4IO32ReadFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_MISC_SHU_RDAT), MISC_SHU_RDAT_DATLAT);
    mcSHOW_DBG_MSG("DATLAT Default: 0x%x\n", ucbest_step);
    mcDUMP_REG_MSG("DATLAT Default: 0x%x\n", ucbest_step);

    // 1.set DATLAT 0-15 (0-21 for MT6595)
    // 2.enable engine1 or engine2
    // 3.check result  ,3~4 taps pass
    // 4.set DATLAT 2nd value for optimal

    // Initialize
    ucfirst = 0xff;
    ucbegin = 0;
    ucsum = 0;

    DramcEngine2Init(p, p->test2_1, p->test2_2, p->test_pattern, 0, TE_UI_SHIFT);//UI_SHIFT + LEN1

#if FOR_DV_SIMULATION_USED
    u2DatlatBegin = ucbest_step - 5;
#else
    u2DatlatBegin = 0;
#endif

#if (SUPPORT_SAVE_TIME_FOR_CALIBRATION && BYPASS_DATLAT)
	if (p->femmc_Ready == 1)
	{
        ucbest_step = p->pSavetimeData->u1RxDatlat_Save[p->channel][p->rank];
    }
    else
#endif
    {
        for (ii = u2DatlatBegin; ii < DATLAT_TAP_NUMBER; ii++)
        {
            // 1
            dle_factor_handler(p, ii);

            // 2
            u4err_value = DramcEngine2Run(p, TE_OP_WRITE_READ_CHECK, p->test_pattern);

            // 3
            if (u4err_value == 0)
            {
                if (ucbegin == 0)
                {
                    // first tap which is pass
                    ucfirst = ii;
                    ucbegin = 1;
                }
                if (ucbegin == 1)
                {
                    ucsum++;

                    if (ucsum > 4)
                        break;  //early break.
                }
            }
            else
            {
                if (ucbegin == 1)
                {
                    // pass range end
                    ucbegin = 0xff;
                }
            }

        #ifdef ETT_PRINT_FORMAT
            mcSHOW_DBG_MSG("%d, 0x%x, sum = %d\n", ii, u4err_value, ucsum);
        #else
            mcSHOW_DBG_MSG("TAP = %2d, err_value = 0x%8x,  sum = %d\n", ii, u4err_value, ucsum);
        #endif
        }
        DramcEngine2End(p);

        // 4
        if (ucsum == 0)
        {
            mcSHOW_ERR_MSG("no DATLAT taps pass, DATLAT calibration fail!\n");
        }
        else if (ucsum <= 3)
        {
            ucbest_step = ucfirst + (ucsum >> 1);
        }
        else // window is larger than 3
        {
            ucbest_step = ucfirst + 1;
        }
        // if (sum == 0) FAIL!!
        // if (sum == 1) best_step = first
        // if (sum >= 2) best_step = first + 1

    }
    aru1RxDatlatResult[p->rank] = ucbest_step;


#if SUPPORT_SAVE_TIME_FOR_CALIBRATION
    if (p->femmc_Ready == 0)
    {
        p->pSavetimeData->u1RxDatlat_Save[p->channel][p->rank] = ucbest_step;
    }
#endif

    mcSHOW_DBG_MSG("best_step = %d\n\n", ucbest_step);
    mcDUMP_REG_MSG("best_step=%d\n\n", ucbest_step);
#if __SLT__
    mcSHOW_PARSER_MSG(("\n[%d Mbps][CH%d][RK%d][DATLAT] pattern=%d first_step=%d total pass=%d best_step=%d\n",
	p->frequency*2, p->channel, p->rank, p->test_pattern, ucfirst, ucsum, ucbest_step));
#endif
#if __A60868_TO_BE_PORTING__
#if __ETT__
    U8 _init_Datlat_value = vDramcACTimingGetDatLat(p);
    if ((_init_Datlat_value > (ucbest_step + 1)) || (_init_Datlat_value < (ucbest_step - 1)))
    {
        mcSHOW_DBG_MSG("[WARNING!!] Datlat initial value(%d) = best_step(%d) %c %d, out of range!\n\n",
                           _init_Datlat_value,
                           ucbest_step,
                           (ucbest_step > _init_Datlat_value)? '-': '+',
                           abs(ucbest_step - _init_Datlat_value));
        while (1);
    }
#endif
#endif

#if defined(FOR_HQA_TEST_USED) && defined(FOR_HQA_REPORT_USED)
    HQA_Log_Message_for_Report(p, p->channel, p->rank, HQA_REPORT_FORMAT2, "DATLAT", "", 0, ucbest_step, NULL);
#endif

#if (SUPPORT_SAVE_TIME_FOR_CALIBRATION && BYPASS_DATLAT)
    if (p->femmc_Ready == 1)
    {
        dle_factor_handler(p, ucbest_step);
        vSetCalibrationResult(p, DRAM_CALIBRATION_DATLAT, DRAM_FAST_K);
    }
    else
#endif
    {
        if (ucsum < 4)
        {
            mcSHOW_DBG_MSG2("[NOTICE] CH%d, DatlatSum %d\n", p->channel, ucsum);
        }

        if (ucsum == 0)
        {
            mcSHOW_ERR_MSG("DATLAT calibration fail, write back to default values!\n");
            vIO32Write4B_All(DRAMC_REG_ADDR(DDRPHY_REG_MISC_SHU_RDAT), u4prv_register_080);
            vSetCalibrationResult(p, DRAM_CALIBRATION_DATLAT, DRAM_FAIL);
        }
        else
        {
            dle_factor_handler(p, ucbest_step);
            vSetCalibrationResult(p, DRAM_CALIBRATION_DATLAT, DRAM_OK);
        }
    }

    vAutoRefreshSwitch(p, DISABLE);
#if __SLT__
    mcSHOW_PARSER_MSG(("[DramcRxdatlatCal] Done\n\n"));
#endif
    mcSHOW_DBG_MSG3("[DramcRxdatlatCal] Done\n");
	return DRAM_OK;
}

DRAM_STATUS_T DramcDualRankRxdatlatCal(DRAMC_CTX_T *p)
{
    U8 u1FinalDatlat, u1Datlat0, u1Datlat1;

    u1Datlat0 = aru1RxDatlatResult[0];
    u1Datlat1 = aru1RxDatlatResult[1];

    if (p->support_rank_num == RANK_DUAL)
    {
        if (u1Datlat0 > u1Datlat1)
        {
            u1FinalDatlat = u1Datlat0;
        }
        else
        {
            u1FinalDatlat = u1Datlat1;
        }
    }
    else
    {
        u1FinalDatlat = u1Datlat0;
    }

#if DDR_ENABLE_READ_DBI
    if (p->DBI_R_onoff[p->dram_fsp])
    {
      u1FinalDatlat++;
    }
#endif

    dle_factor_handler(p, u1FinalDatlat);

#if RDSEL_TRACKING_EN
    vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_MISC_RDSEL_TRACK), u1FinalDatlat, SHU_MISC_RDSEL_TRACK_DMDATLAT_I);
#endif

    mcSHOW_DBG_MSG("[DualRankRxdatlatCal] RK0: %d, RK1: %d, Final_Datlat %d\n", u1Datlat0, u1Datlat1, u1FinalDatlat);
    mcDUMP_REG_MSG("[DualRankRxdatlatCal] RK0: %d, RK1: %d, Final_Datlat %d\n", u1Datlat0, u1Datlat1, u1FinalDatlat);

    return DRAM_OK;

}
#endif // DDR_ENABLE_DATLAT_CAL

///TODO: Fix thie option
#if DDR_ENABLE_TX_PERBIT_CAL

//=============================================================
///// DramC TX perbi calibration ----------Begin--------------
//=============================================================
//-------------------------------------------------------------------------
/** DramcTxWindowPerbitCal (v2)
 *  TX DQS per bit SW calibration.
 *  @param p                Pointer of context created by DramcCtxCreate.
 *  @param  apply           (U8): 0 don't apply the register we set  1 apply the register we set ,default don't apply.
 *  @retval status          (DRAM_STATUS_T): DRAM_OK or DRAM_FAIL
 */
//-------------------------------------------------------------------------
#if FOR_DV_SIMULATION_USED
#define TX_VREF_RANGE_BEGIN       0
#define TX_VREF_RANGE_END           2 // binary 110010
#define TX_VREF_RANGE_STEP         2
#else
#define TX_VREF_RANGE_BEGIN       16
#define TX_VREF_RANGE_END           50 // binary 110010
#define TX_VREF_RANGE_STEP         2
#endif

#define TX_DQ_UI_TO_PI_TAP         64 // 1 PI = tCK/64, total 128 PI, 1UI = 32 PI
#define TX_PHASE_DQ_UI_TO_PI_TAP         32 // 1 PI = tCK/64, total 128 PI, 1UI = 32 PI for DDR800 semi open loop mode
#define TX_VREF_DATA_NUM 50
#define TX_VREF_PASS_CONDITION 0
#define TX_PASS_WIN_CRITERIA    7
#define TX_VREF_BOUNDARY_NOT_READY 0xff

typedef struct _PASS_WIN_DATA_BY_VREF_T
{
    U16 u2VrefUsed;
    U16 u2WinSum_byVref;
    U8 u1WorseBitWinSize_byVref;
    U8 u1WorseBitIdx_byVref;
} PASS_WIN_DATA_BY_VREF_T;

U16 WDQSMode2AcTxOEShift(DRAMC_CTX_T *p)
{
    U16 u2TxDQOEShift = 0;

    switch (p->frequency)
    {
        case 1866:
            u2TxDQOEShift = 19;
            break;
        case 1600:
        case 1200:
        case 800:
        case 600:
            u2TxDQOEShift = 15;
            break;
        case 933:
            u2TxDQOEShift = 11;
            break;
        case 400:
            u2TxDQOEShift = 16;
            break;
        default:
            mcSHOW_ERR_MSG("[WDQSMode2AcTxOEShift] frequency err!\n");
            #if __ETT__
            while (1);
            #endif
    }

    return u2TxDQOEShift;
}

void TxWinTransferDelayToUIPI(DRAMC_CTX_T *p, U16 delay, BOOL adj_to_center, struct tx_dly_for_all *dly_all)
{
    U8 u1Small_ui_to_large, u1PI = 0, u164PIto1UI, u1TxDQOEShift = 0;
    U16 u2TmpValue;
#if __IPMv2_TO_BE_PORTING__
	U16 u2DQOE_shift;
#endif
    DDR800_MODE_T eDdr800Mode = vGet_DDR_Loop_Mode(p);
    U8 u1PiTap = (u1IsPhaseMode(p) == TRUE) ? TX_PHASE_DQ_UI_TO_PI_TAP : TX_DQ_UI_TO_PI_TAP;

    ASSERT(dly_all != NULL);

    u1Small_ui_to_large = u1MCK2UI_DivShift(p);
#if (fcFOR_CHIP_ID == fcGriffin)
    u1TxDQOEShift = 6; /* from Jouling */
#else
#if ENABLE_WDQS_MODE_2
    u1TxDQOEShift = WDQSMode2AcTxOEShift(p);
#else
    u1TxDQOEShift = TX_DQ_OE_SHIFT_LP4;
#endif
#endif

    u1PI = delay & (u1PiTap-1);
    dly_all->pi = u1PI;

    if (u1IsPhaseMode(p))
        u164PIto1UI = 0;
    else
        u164PIto1UI = 1;

    u2TmpValue = (delay / u1PiTap) << u164PIto1UI; // 1:8 mode for 2UI carry, DDR800 1:4 mode for 1UI carry

    if (adj_to_center && (eDdr800Mode == CLOSE_LOOP_MODE))
    {
        if (dly_all->pi < 10)
        {
            dly_all->pi += (u1PiTap) >> 1;
            u2TmpValue --;
        }
        else if (dly_all->pi > u1PiTap - 10)
        {
            dly_all->pi -= (u1PiTap) >> 1;
            u2TmpValue ++;
        }
    }

#if 0
    *pu1UISmall_DQ = u2TmpValue % u1Small_ui_to_large;
    *pu1UILarge_DQ = u2TmpValue / u1Small_ui_to_large;
#else
    dly_all->ui = u2TmpValue - ((u2TmpValue >> u1Small_ui_to_large) << u1Small_ui_to_large);
    dly_all->mck = (u2TmpValue >> u1Small_ui_to_large);
#endif

    u2TmpValue -= u1TxDQOEShift;

    dly_all->ui_oe = u2TmpValue - ((u2TmpValue >> u1Small_ui_to_large) << u1Small_ui_to_large);
    dly_all->mck_oe = (u2TmpValue >> u1Small_ui_to_large);
}
#if 1
static void TxPrintWidnowInfo(DRAMC_CTX_T *p, PASS_WIN_DATA_T WinPerBitData[])
{
    U8 u1BitIdx;

    for (u1BitIdx = 0; u1BitIdx < DQS_BIT_NUMBER; u1BitIdx++)
    {
    #ifdef ETT_PRINT_FORMAT
        mcSHOW_DBG_MSG("TX Bit%d (%d~%d) %d %d,   Bit%d (%d~%d) %d %d,", \
            u1BitIdx, WinPerBitData[u1BitIdx].first_pass, WinPerBitData[u1BitIdx].last_pass, WinPerBitData[u1BitIdx].win_size, WinPerBitData[u1BitIdx].win_center, \
            u1BitIdx + 8, WinPerBitData[u1BitIdx + 8].first_pass, WinPerBitData[u1BitIdx + 8].last_pass, WinPerBitData[u1BitIdx + 8].win_size, WinPerBitData[u1BitIdx + 8].win_center);
    #else
        mcSHOW_DBG_MSG("TX Bit%2d (%2d~%2d) %2d %2d,   Bit%2d (%2d~%2d) %2d %2d,", \
            u1BitIdx, WinPerBitData[u1BitIdx].first_pass, WinPerBitData[u1BitIdx].last_pass, WinPerBitData[u1BitIdx].win_size, WinPerBitData[u1BitIdx].win_center, \
            u1BitIdx + 8, WinPerBitData[u1BitIdx + 8].first_pass, WinPerBitData[u1BitIdx + 8].last_pass, WinPerBitData[u1BitIdx + 8].win_size, WinPerBitData[u1BitIdx + 8].win_center);
    #endif
    #if __FLASH_TOOL_DA__
            PINInfo_flashtool.DQ_TX_WIN_SIZE[p->channel][p->rank][u1BitIdx] = WinPerBitData[u1BitIdx].win_size;
            PINInfo_flashtool.DQ_TX_WIN_SIZE[p->channel][p->rank][u1BitIdx+8] = WinPerBitData[u1BitIdx+8].win_size;
    #endif
        mcSHOW_DBG_MSG("\n");
    }
    mcSHOW_DBG_MSG("\n");
#if DUMP_TA2_WINDOW_SIZE_RX_TX
    {
        U32 u4B0Tatal=0;
        U32 u4B1Tatal=0;
        for (u1BitIdx = 0; u1BitIdx < p->data_width; u1BitIdx++)
        {
            if(u1BitIdx < 8)
            {
                u4B0Tatal += WinPerBitData[u1BitIdx].win_size;
            }
            else
            {
                u4B1Tatal += WinPerBitData[u1BitIdx].win_size;
            }
        }
        mcSHOW_DBG_MSG("\ntotal tx window size B0: %d B1: %d\n", u4B0Tatal, u4B1Tatal);
    }
#endif
}
#endif

static void TXPerbitCalibrationInit(DRAMC_CTX_T *p, U8 calType)
{
    //Set TX delay chain to 0
    if (calType != TX_DQ_DQS_MOVE_DQM_ONLY)
    {
    #if 1
    #if PINMUX_AUTO_TEST_PER_BIT_TX
        if(gTX_check_per_bit_flag == 1)
        {
            //not reset delay cell
        }
        else
    #endif
        {
            vIO32Write4B(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B0_TXDLY0), 0);
            vIO32Write4B(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B0_TXDLY1), 0);
            vIO32Write4B(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B1_TXDLY0), 0);
            vIO32Write4B(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B1_TXDLY1), 0);
        }
    #else
        vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_SHU_RK_B0_DQ), P_Fld(0x0, SHU_RK_B0_DQ_RK0_TX_ARDQ7_DLY_B0)
            | P_Fld(0x0, SHU_RK_B0_DQ_RK0_TX_ARDQ6_DLY_B0)
            | P_Fld(0x0, SHU_RK_B0_DQ_RK0_TX_ARDQ5_DLY_B0)
            | P_Fld(0x0, SHU_RK_B0_DQ_RK0_TX_ARDQ4_DLY_B0)
            | P_Fld(0x0, SHU_RK_B0_DQ_RK0_TX_ARDQ3_DLY_B0)
            | P_Fld(0x0, SHU_RK_B0_DQ_RK0_TX_ARDQ2_DLY_B0)
            | P_Fld(0x0, SHU_RK_B0_DQ_RK0_TX_ARDQ1_DLY_B0)
            | P_Fld(0x0, SHU_RK_B0_DQ_RK0_TX_ARDQ0_DLY_B0));
        vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_SHU_RK_B1_DQ), P_Fld(0x0, SHU_RK_B1_DQ_RK0_TX_ARDQ7_DLY_B1)
            | P_Fld(0x0, SHU_RK_B1_DQ_RK0_TX_ARDQ6_DLY_B1)
            | P_Fld(0x0, SHU_RK_B1_DQ_RK0_TX_ARDQ5_DLY_B1)
            | P_Fld(0x0, SHU_RK_B1_DQ_RK0_TX_ARDQ4_DLY_B1)
            | P_Fld(0x0, SHU_RK_B1_DQ_RK0_TX_ARDQ3_DLY_B1)
            | P_Fld(0x0, SHU_RK_B1_DQ_RK0_TX_ARDQ2_DLY_B1)
            | P_Fld(0x0, SHU_RK_B1_DQ_RK0_TX_ARDQ1_DLY_B1)
            | P_Fld(0x0, SHU_RK_B1_DQ_RK0_TX_ARDQ0_DLY_B1));
    #endif
        vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B0_TXDLY3), 0x0, SHU_RK_B0_TXDLY3_TX_ARDQM0_DLY_B0);
        vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B1_TXDLY3), 0x0, SHU_RK_B1_TXDLY3_TX_ARDQM0_DLY_B1);
    }
#if (TX_K_DQM_MODE == 2)
    else
    {
        vIO32WriteFldAlign(DRAMC_REG_ADDR(DRAMC_REG_TEST2_A0), 0x1, TEST2_A0_WRCLR0);
        vIO32WriteFldAlign(DRAMC_REG_ADDR(DRAMC_REG_TEST2_A4), 0x1, TEST2_A4_TEST2_DQMTGL);
    }
#endif


    //Use HW TX tracking value
    //R_DMARPIDQ_SW :drphy_conf (0x170[7])(default set 1)
    //   0: DQS2DQ PI setting controlled by HW
    //R_DMARUIDQ_SW : Dramc_conf(0x156[15])(default set 1)
    //    0: DQS2DQ UI setting controlled by HW
    ///TODO: need backup original setting?
    //vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_MISC_CTRL1), 1, MISC_CTRL1_R_DMARPIDQ_SW);
    //vIO32WriteFldAlign(DRAMC_REG_ADDR(DRAMC_REG_DQSOSCR), 1, DQSOSCR_ARUIDQ_SW);

}

#define TX_TDQS2DQ_PRE_CAL 0
#if TX_TDQS2DQ_PRE_CAL
//  (1) DDR800 1:4 mode
//  (2) DDR1200/1600 1:4 mode
//  (3) 1:8 mode
// The 3 condition have different MCK2UI/UI2PI. Therefore, TX DQS2DQ should be record separately.
// Here, we record (2) and (3).  DDR800 1:4 skip recording DQS2DQ.
U16 u2DQS2DQ_Pre_Cal[CHANNEL_NUM][RANK_MAX][2/*DIV_Mode*/] = {0};
#endif

static void TXScanRange_PI(DRAMC_CTX_T *p, DRAM_TX_PER_BIT_CALIBRATION_TYTE_T calType, U16 *pu2Begin, U16 *pu2End)
{
    U8 u1MCK2UI, u1UI2PI, u1ByteIdx;
    U32 u4RegValue_TXDLY, u4RegValue_dly;
    U8 ucdq_ui_large_bak[DQS_NUMBER], ucdq_ui_small_bak[DQS_NUMBER];
    U16 u2TempVirtualDelay, u2SmallestVirtualDelay = 0xffff;
    U16 u2DQDelayBegin = 0, u2DQDelayEnd = 0;

    u4RegValue_TXDLY = u4IO32Read4B(DRAMC_REG_ADDR(DRAMC_REG_SHURK_SELPH_DQS0));
    u4RegValue_dly = u4IO32Read4B(DRAMC_REG_ADDR(DRAMC_REG_SHURK_SELPH_DQS1));

    u1MCK2UI = u1MCK2UI_DivShift(p);

    if (vGet_DDR_Loop_Mode(p) == DDR800_CLOSE_LOOP)
        u1UI2PI = 6;
    else
        u1UI2PI = 5;

    // find smallest DQS delay
    for (u1ByteIdx = 0; u1ByteIdx < (p->data_width / DQS_BIT_NUMBER); u1ByteIdx++)
    {
        ucdq_ui_large_bak[u1ByteIdx] = (u4RegValue_TXDLY >> (u1ByteIdx << 2)) & 0x7;// MCK
        ucdq_ui_small_bak[u1ByteIdx] = (u4RegValue_dly >> (u1ByteIdx << 2)) & 0x7;// UI
        //wrlevel_dqs_final_delay[p->rank][u1ByteIdx]  ==> PI
        u2TempVirtualDelay = (((ucdq_ui_large_bak[u1ByteIdx] << u1MCK2UI) + ucdq_ui_small_bak[u1ByteIdx]) << u1UI2PI) + wrlevel_dqs_final_delay[p->rank][u1ByteIdx];

        if (u2TempVirtualDelay < u2SmallestVirtualDelay)
        {
            u2SmallestVirtualDelay = u2TempVirtualDelay;
        }

        //mcSHOW_DBG_MSG2("Original DQS_B%d VirtualDelay %d = (%d %d %d)\n", u1ByteIdx, u2TempVirtualDelay,
        //                ucdq_ui_large_bak[u1ByteIdx], ucdq_ui_small_bak[u1ByteIdx], wrlevel_dqs_final_delay[p->rank][u1ByteIdx]);
    }

    u2DQDelayBegin = u2SmallestVirtualDelay;

    #if TX_TDQS2DQ_PRE_CAL
    if (u2DQS2DQ_Pre_Cal[p->channel][p->rank][vGet_Div_Mode(p)] > 0)
    {
        U16 u2TmpShift;
        mcSHOW_DBG_MSG("TX_TDQS2DQ_PRE_CAL : change DQ begin %d -->", u2DQDelayBegin);

        u2TmpShift = (u2DQS2DQ_Pre_Cal[p->channel][p->rank][vGet_Div_Mode(p)]* p->frequency) / 1000;
        if (u2TmpShift >= 15)
            u2TmpShift -= 15;
        else
            u2TmpShift = 0;

        u2DQDelayBegin += u2TmpShift;
        mcSHOW_DBG_MSG("%d (+%d)\n", u2DQDelayBegin, u2TmpShift);
    }
    #endif

    #if (TX_K_DQM_MODE == 1) && DDR_TX_K_DQM_WITH_WDBI
    if (calType == TX_DQ_DQS_MOVE_DQM_ONLY)
    {
        // DBI on, calibration range -1MCK
        u2DQDelayBegin -= (1 << (u1MCK2UI + 5));
    }
    #endif
    /* Scan range: 1MCK */
    u2DQDelayEnd = u2DQDelayBegin + ((1 << u1MCK2UI) << u1UI2PI);

    *pu2Begin = u2DQDelayBegin;
    *pu2End = u2DQDelayEnd;

    #if 0//TX_TDQS2DQ_PRE_CAL
    mcSHOW_DBG_MSG("TXScanRange_PI %d~%d\n", u2DQDelayBegin, u2DQDelayEnd);
    #endif
}


static void TXScanRange_Vref(DRAMC_CTX_T *p, U8 u1VrefScanEnable, U16* pu2Range, U16 *pu2Begin, U16 *pu2End, U16 *pu2Step)
{
    U16 u2VrefBegin, u2VrefEnd;

    if (u1VrefScanEnable)
    {
    #if (SUPPORT_SAVE_TIME_FOR_CALIBRATION && BYPASS_VREF_CAL)
        if (p->femmc_Ready == 1)
        {
            // if fast K, use TX Vref that saved.
            u2VrefBegin = p->pSavetimeData->u1TxWindowPerbitVref_Save[p->channel][p->rank];
            u2VrefEnd = u2VrefBegin + 1;
        }
        else
    #endif
        {
        #if __IPMv2_TO_BE_PORTING__
            if (p->odt_onoff == ODT_OFF)
            {
                if (p->dram_type == TYPE_LPDDR4)
                {
                    //range 1
                    u2VrefBegin = 13 - 5; // 300/1100(VDDQ) = 27.2%
                    u2VrefEnd = 13 + 5;
                }
                else
                {
                    //range 1
                    u2VrefBegin = 27 - 5; // 290/600(VDDQ)=48.3%
                    u2VrefEnd = 27 + 5;
                }
            }
            else
            {
                // range 0
                u2VrefBegin = TX_VREF_RANGE_BEGIN;
                u2VrefEnd = TX_VREF_RANGE_END;
            }
        #endif
            /* Temp: default value +/- 5 */
            u2VrefBegin = 0;
            u2VrefEnd = 50;
        }
    }
    else //LPDDR3, the for loop will only excute u2VrefLevel=TX_VREF_RANGE_END/2.
    {
        u2VrefBegin = 0;
        u2VrefEnd = 0;
    }

    *pu2Range = 1; ///TODO: CONFIRM DDR4 seems to use RANGE1 for TERMed case
    *pu2Begin = u2VrefBegin;
    *pu2End = u2VrefEnd;
#if FOR_DV_SIMULATION_USED
    *pu2Step = 8;
#else
    *pu2Step = TX_VREF_RANGE_STEP;
#endif
}

static U16 TxChooseVref(DRAMC_CTX_T *p, PASS_WIN_DATA_BY_VREF_T pVrefInfo[], U8 u1VrefNum)
{
    U8 u1VrefIdx, u1WinSizeOfWorseBit = 0;
	U8 u1WorseBitIdx = 0;
	#if TX_VREF_PASS_CONDITION
    U8 u1VrefPassBegin = TX_VREF_BOUNDARY_NOT_READY, u1VrefPassEnd = TX_VREF_BOUNDARY_NOT_READY, u1TempPassNum = 0, u1MaxVerfPassNum = 0;
	U8 u1VrefPassEnd_Final = TX_VREF_BOUNDARY_NOT_READY;
	U8 u1VrefPassBegin_Final = TX_VREF_BOUNDARY_NOT_READY;
	#endif
    U16 u2MaxWinSum = 0;
    U16 u2FinalVref = 0;

    for (u1VrefIdx = 0; u1VrefIdx < u1VrefNum; u1VrefIdx++)
    {
#if __SLT__
        mcSHOW_SLT_MSG(("TX Vref=%d, minBit %d, minWin=%d, winSum=%d\n",
            pVrefInfo[u1VrefIdx].u2VrefUsed,
            pVrefInfo[u1VrefIdx].u1WorseBitIdx_byVref,
            pVrefInfo[u1VrefIdx].u1WorseBitWinSize_byVref,
            pVrefInfo[u1VrefIdx].u2WinSum_byVref));
#endif
        mcSHOW_DBG_MSG("TX Vref=%d, minBit %d, minWin=%d, winSum=%d\n",
            pVrefInfo[u1VrefIdx].u2VrefUsed,
            pVrefInfo[u1VrefIdx].u1WorseBitIdx_byVref,
            pVrefInfo[u1VrefIdx].u1WorseBitWinSize_byVref,
            pVrefInfo[u1VrefIdx].u2WinSum_byVref);

        #if TX_VREF_PASS_CONDITION
        if ((pVrefInfo[u1VrefIdx].u1WorseBitWinSize_byVref > TX_VREF_PASS_CONDITION))
        {
            if (u1VrefPassBegin == TX_VREF_BOUNDARY_NOT_READY)
            {
                u1VrefPassBegin = pVrefInfo[u1VrefIdx].u2VrefUsed;
                u1TempPassNum = 1;
            }
            else
                u1TempPassNum ++;

            if (u1VrefIdx == u1VrefNum - 1)
            {
                u1VrefPassEnd = pVrefInfo[u1VrefIdx].u2VrefUsed;
                if (u1TempPassNum > u1MaxVerfPassNum)
                {
                    u1VrefPassBegin_Final = u1VrefPassBegin;
                    u1VrefPassEnd_Final = u1VrefPassEnd;
                    u1MaxVerfPassNum = u1TempPassNum;
                }
            }
        }
        else
        {
            if ((u1VrefPassBegin != TX_VREF_BOUNDARY_NOT_READY) && (u1VrefPassEnd == TX_VREF_BOUNDARY_NOT_READY))
            {
                u1VrefPassEnd = pVrefInfo[u1VrefIdx].u2VrefUsed - TX_VREF_RANGE_STEP;
                if (u1TempPassNum > u1MaxVerfPassNum)
                {
                    u1VrefPassBegin_Final = u1VrefPassBegin;
                    u1VrefPassEnd_Final = u1VrefPassEnd;
                    u1MaxVerfPassNum = u1TempPassNum;
                }
                u1VrefPassBegin = 0xff;
                u1VrefPassEnd = 0xff;
                u1TempPassNum = 0;
            }
        }
        #endif
    }

    #if TX_VREF_PASS_CONDITION
    //if((u1VrefPassBegin_Final !=TX_VREF_BOUNDARY_NOT_READY) && (u1VrefPassEnd_Final!=TX_VREF_BOUNDARY_NOT_READY))
    if (u1MaxVerfPassNum > 0)
    {
        // vref pass window found
        u2FinalVref = (u1VrefPassBegin_Final + u1VrefPassEnd_Final) >> 1;
        mcSHOW_DBG_MSG("[TxChooseVref] Window > %d, Vref (%d~%d), Final Vref %d\n", TX_VREF_PASS_CONDITION, u1VrefPassBegin_Final, u1VrefPassEnd_Final, u2FinalVref);
    }
    else
    #endif
    {
        // not vref found
        for (u1VrefIdx = 0; u1VrefIdx < u1VrefNum; u1VrefIdx++)
        {
            if ((pVrefInfo[u1VrefIdx].u1WorseBitWinSize_byVref > u1WinSizeOfWorseBit) ||
                ((pVrefInfo[u1VrefIdx].u1WorseBitWinSize_byVref == u1WinSizeOfWorseBit) && (pVrefInfo[u1VrefIdx].u2WinSum_byVref > u2MaxWinSum)))
            {
                u1WinSizeOfWorseBit = pVrefInfo[u1VrefIdx].u1WorseBitWinSize_byVref;
                u1WorseBitIdx = pVrefInfo[u1VrefIdx].u1WorseBitIdx_byVref;
                u2MaxWinSum = pVrefInfo[u1VrefIdx].u2WinSum_byVref;
                u2FinalVref = pVrefInfo[u1VrefIdx].u2VrefUsed;
            }
        }
        mcSHOW_DBG_MSG("[TxChooseVref] Worse bit %d, Min win %d, Win sum %d, Final Vref %d\n", u1WorseBitIdx, u1WinSizeOfWorseBit, u2MaxWinSum, u2FinalVref);
        mcDUMP_REG_MSG("[TxChooseVref] Worse bit %d, Min win %d, Win sum %d, Final Vref %d\n", u1WorseBitIdx, u1WinSizeOfWorseBit, u2MaxWinSum, u2FinalVref);
#if __SLT__
	mcSHOW_PARSER_MSG(("[%d Mbps][CH%d][RK%d][TX] Best Vref %d  VrefRange %d, Window Min %d at DQ%d, Window Sum %d\n",\
		  p->frequency*2,p->channel, p->rank, u2FinalVref, (gMRVal[p->channel][p->rank].mr06>>6) & 0x1, u1WinSizeOfWorseBit, u1WorseBitIdx, u2MaxWinSum));
#endif
    }

    return u2FinalVref;
}


void DramcTXSetVref(DRAMC_CTX_T *p, U8 u1VrefRange, U8 u1VrefValue)
{
    U16 mr06 = gMRVal[p->channel][p->rank].mr06;

    if (!is_ddr4_family(p))
        return;

    mr06 &= ~0xff;

    mr06 |= (0x1 << 7) | (u1VrefRange << 6);
    DramcModeRegWriteByRank(p, p->rank, 0x6, mr06);

    mr06 |= u1VrefValue;
    DramcModeRegWriteByRank(p, p->rank, 0x6, mr06);

    mr06 &= ~(0x1 << 7);
    DramcModeRegWriteByRank(p, p->rank, 0x6, mr06);

    gMRVal[p->channel][p->rank].mr06 = mr06;
    mcSHOW_DBG_MSG("[%s] TX Vref : CH%d Rank%d, TX Range %d Vref %d\n",
        __func__, p->channel, p->rank, u1VrefRange, (u1VrefValue & 0x3f));
}


static void TXSetFinalVref(DRAMC_CTX_T *p, U16 u2FinalRange, U16 u2FinalVref)
{
    if (is_ddr3_family(p))
        return;

    DramcTXSetVref(p, u2FinalRange, u2FinalVref);

#ifdef FOR_HQA_TEST_USED
    gFinalTXVrefDQ[p->channel][p->rank] = (U8) u2FinalVref;
#endif

#if VENDER_JV_LOG
    mcSHOW_DBG_MSG5("\nFinal TX Range %d Vref %d\n\n", u2FinalRange, u2FinalVref);
#else
    mcSHOW_DBG_MSG("\nFinal TX Range %d Vref %d\n\n", u2FinalRange, u2FinalVref);
#if __SLT__
    mcSHOW_PARSER_MSG(("\nFinal TX Range %d Vref %d\n\n", u2FinalRange, u2FinalVref));
#endif
#endif
}

#if 0
#if ENABLE_TX_TRACKING
#if !BYPASS_CALIBRATION
static
#endif
void TXUpdateTXTracking(DRAMC_CTX_T *p, DRAM_TX_PER_BIT_CALIBRATION_TYTE_T calType, U8 ucdq_pi[], U8 ucdqm_pi[])
{
     if (calType == TX_DQ_DQS_MOVE_DQ_ONLY || calType == TX_DQ_DQS_MOVE_DQM_ONLY)
     {
         //make a copy to dramc reg for TX DQ tracking used
         if (calType == TX_DQ_DQS_MOVE_DQ_ONLY)
         {
            vIO32WriteFldMulti(DRAMC_REG_ADDR(DRAMC_REG_SHURK_PI),
                            P_Fld(ucdq_pi[0], SHURK_PI_RK0_ARPI_DQ_B0) | P_Fld(ucdq_pi[1], SHURK_PI_RK0_ARPI_DQ_B1));

            // Source DQ
            vIO32WriteFldMulti(DRAMC_REG_ADDR(DRAMC_REG_SHURK_DQS2DQ_CAL1),
                                                    P_Fld(ucdq_pi[1], SHURK_DQS2DQ_CAL1_BOOT_ORIG_UI_RK0_DQ1) |
                                                    P_Fld(ucdq_pi[0], SHURK_DQS2DQ_CAL1_BOOT_ORIG_UI_RK0_DQ0));
            // Target DQ
             vIO32WriteFldMulti(DRAMC_REG_ADDR(DRAMC_REG_SHURK_DQS2DQ_CAL2),
                                                     P_Fld(ucdq_pi[1], SHURK_DQS2DQ_CAL2_BOOT_TARG_UI_RK0_DQ1) |
                                                     P_Fld(ucdq_pi[0], SHURK_DQS2DQ_CAL2_BOOT_TARG_UI_RK0_DQ0));
         }

         //if(calType ==TX_DQ_DQS_MOVE_DQM_ONLY || (calType ==TX_DQ_DQS_MOVE_DQ_ONLY))
         {
            vIO32WriteFldMulti(DRAMC_REG_ADDR(DRAMC_REG_SHURK_PI),
                            P_Fld(ucdqm_pi[0], SHURK_PI_RK0_ARPI_DQM_B0) | P_Fld(ucdqm_pi[1], SHURK_PI_RK0_ARPI_DQM_B1));

            // Target DQM
             vIO32WriteFldMulti(DRAMC_REG_ADDR(DRAMC_REG_SHURK_DQS2DQ_CAL5),
                                                 P_Fld(ucdqm_pi[1], SHURK_DQS2DQ_CAL5_BOOT_TARG_UI_RK0_DQM1) |
                                                 P_Fld(ucdqm_pi[0], SHURK_DQS2DQ_CAL5_BOOT_TARG_UI_RK0_DQM0));
         }
     }


#if 0// for LP3 , TX tracking will be disable, don't need to set DQ delay in DramC.
     ///TODO: check LP3 byte mapping of dramC
     vIO32WriteFldMulti(DRAMC_REG_SHURK0_PI + (CHANNEL_A << POS_BANK_NUM), \
                              P_Fld(ucdq_final_pi[0], SHURK0_PI_RK0_ARPI_DQ_B0) | P_Fld(ucdq_final_pi[1], SHURK0_PI_RK0_ARPI_DQ_B1));

     vIO32WriteFldMulti(DRAMC_REG_SHURK0_PI + SHIFT_TO_CHB_ADDR, \
                              P_Fld(ucdq_final_pi[2], SHURK0_PI_RK0_ARPI_DQ_B0) | P_Fld(ucdq_final_pi[3], SHURK0_PI_RK0_ARPI_DQ_B1));
#endif

}
#endif //End ENABLE_TX_TRACKING
#endif

#if 1
#if DDR_ENABLE_TX_TRACKING
#if !BYPASS_CALIBRATION
static
#endif
void TXUpdateTXTracking(DRAMC_CTX_T *p, DRAM_TX_PER_BIT_CALIBRATION_TYTE_T calType, struct tx_dly_per_byte *dly_per_byte)
{
     if (calType == TX_DQ_DQS_MOVE_DQ_ONLY || calType == TX_DQ_DQS_MOVE_DQM_ONLY)
     {
         //make a copy to dramc reg for TX DQ tracking used
         if (calType == TX_DQ_DQS_MOVE_DQ_ONLY)
         {
            vIO32WriteFldMulti(DRAMC_REG_ADDR(DRAMC_REG_SHURK_PI),
                P_Fld(dly_per_byte->dq_pi[0], SHURK_PI_RK0_ARPI_DQ_B0) |
                P_Fld(dly_per_byte->dq_pi[1], SHURK_PI_RK0_ARPI_DQ_B1));

            // Source DQ
            vIO32WriteFldMulti(DRAMC_REG_ADDR(DRAMC_REG_SHURK_DQS2DQ_CAL1),
                P_Fld(dly_per_byte->dq_pi[1], SHURK_DQS2DQ_CAL1_BOOT_ORIG_UI_RK0_DQ1) |
                P_Fld(dly_per_byte->dq_pi[0], SHURK_DQS2DQ_CAL1_BOOT_ORIG_UI_RK0_DQ0));
            // Target DQ
             vIO32WriteFldMulti(DRAMC_REG_ADDR(DRAMC_REG_SHURK_DQS2DQ_CAL2),
                P_Fld(dly_per_byte->dq_pi[1], SHURK_DQS2DQ_CAL2_BOOT_TARG_UI_RK0_DQ1) |
                P_Fld(dly_per_byte->dq_pi[0], SHURK_DQS2DQ_CAL2_BOOT_TARG_UI_RK0_DQ0));
         }

         //if(calType ==TX_DQ_DQS_MOVE_DQM_ONLY || (calType ==TX_DQ_DQS_MOVE_DQ_ONLY))
         {
            vIO32WriteFldMulti(DRAMC_REG_ADDR(DRAMC_REG_SHURK_PI),
                P_Fld(dly_per_byte->dqm_pi[0], SHURK_PI_RK0_ARPI_DQM_B0) |
                P_Fld(dly_per_byte->dqm_pi[1], SHURK_PI_RK0_ARPI_DQM_B1));

            // Target DQM
            vIO32WriteFldMulti(DRAMC_REG_ADDR(DRAMC_REG_SHURK_DQS2DQ_CAL5),
                P_Fld(dly_per_byte->dqm_pi[1], SHURK_DQS2DQ_CAL5_BOOT_TARG_UI_RK0_DQM1) |
                P_Fld(dly_per_byte->dqm_pi[0], SHURK_DQS2DQ_CAL5_BOOT_TARG_UI_RK0_DQM0));
         }
     }


#if 0// for LP3 , TX tracking will be disable, don't need to set DQ delay in DramC.
     ///TODO: check LP3 byte mapping of dramC
     vIO32WriteFldMulti(DRAMC_REG_SHURK0_PI + (CHANNEL_A << POS_BANK_NUM), \
                              P_Fld(ucdq_final_pi[0], SHURK0_PI_RK0_ARPI_DQ_B0) | P_Fld(ucdq_final_pi[1], SHURK0_PI_RK0_ARPI_DQ_B1));

     vIO32WriteFldMulti(DRAMC_REG_SHURK0_PI + SHIFT_TO_CHB_ADDR, \
                              P_Fld(ucdq_final_pi[2], SHURK0_PI_RK0_ARPI_DQ_B0) | P_Fld(ucdq_final_pi[3], SHURK0_PI_RK0_ARPI_DQ_B1));
#endif

}
#endif //End ENABLE_TX_TRACKING


#if !BYPASS_CALIBRATION
static
#endif
void TXSetDelayReg_DQ(DRAMC_CTX_T *p, U8 u1UpdateRegUI, struct tx_dly_per_byte *dly_per_byte)
{
    if (u1UpdateRegUI)
    {
        vIO32WriteFldMulti(DRAMC_REG_ADDR(DRAMC_REG_SHURK_SELPH_DQ0), \
                                     P_Fld(dly_per_byte->dq_mck[0], SHURK_SELPH_DQ0_TXDLY_DQ0) |
                                     P_Fld(dly_per_byte->dq_mck[1], SHURK_SELPH_DQ0_TXDLY_DQ1) |
                                     P_Fld(dly_per_byte->dq_mck[2], SHURK_SELPH_DQ0_TXDLY_DQ2) |
                                     P_Fld(dly_per_byte->dq_mck[3], SHURK_SELPH_DQ0_TXDLY_DQ3) |
                                     P_Fld(dly_per_byte->dq_mck_oe[0], SHURK_SELPH_DQ0_TXDLY_OEN_DQ0) |
                                     P_Fld(dly_per_byte->dq_mck_oe[1], SHURK_SELPH_DQ0_TXDLY_OEN_DQ1) |
                                     P_Fld(dly_per_byte->dq_mck_oe[2], SHURK_SELPH_DQ0_TXDLY_OEN_DQ2) |
                                     P_Fld(dly_per_byte->dq_mck_oe[3], SHURK_SELPH_DQ0_TXDLY_OEN_DQ3));

        // DLY_DQ[2:0]
        vIO32WriteFldMulti(DRAMC_REG_ADDR(DRAMC_REG_SHURK_SELPH_DQ2), \
                                     P_Fld(dly_per_byte->dq_ui[0], SHURK_SELPH_DQ2_DLY_DQ0) |
                                     P_Fld(dly_per_byte->dq_ui[1], SHURK_SELPH_DQ2_DLY_DQ1) |
                                     P_Fld(dly_per_byte->dq_ui[2], SHURK_SELPH_DQ2_DLY_DQ2) |
                                     P_Fld(dly_per_byte->dq_ui[3], SHURK_SELPH_DQ2_DLY_DQ3) |
                                     P_Fld(dly_per_byte->dq_ui_oe[0], SHURK_SELPH_DQ2_DLY_OEN_DQ0) |
                                     P_Fld(dly_per_byte->dq_ui_oe[1], SHURK_SELPH_DQ2_DLY_OEN_DQ1) |
                                     P_Fld(dly_per_byte->dq_ui_oe[2], SHURK_SELPH_DQ2_DLY_OEN_DQ2) |
                                     P_Fld(dly_per_byte->dq_ui_oe[3], SHURK_SELPH_DQ2_DLY_OEN_DQ3));
    }


    vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B0_DQ), dly_per_byte->dq_pi[0], SHU_RK_B0_DQ_SW_ARPI_DQ_B0);
    vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B1_DQ), dly_per_byte->dq_pi[1], SHU_RK_B1_DQ_SW_ARPI_DQ_B1);
}

#if !BYPASS_CALIBRATION
static
#endif
void TXSetDelayReg_DQM(DRAMC_CTX_T *p, U8 u1UpdateRegUI, struct tx_dly_per_byte *dly_per_byte)
{
    if (u1UpdateRegUI)
    {
        vIO32WriteFldMulti(DRAMC_REG_ADDR(DRAMC_REG_SHURK_SELPH_DQ1),
                                     P_Fld(dly_per_byte->dqm_mck[0], SHURK_SELPH_DQ1_TXDLY_DQM0) |
                                     P_Fld(dly_per_byte->dqm_mck[1], SHURK_SELPH_DQ1_TXDLY_DQM1) |
                                     P_Fld(dly_per_byte->dqm_mck[2], SHURK_SELPH_DQ1_TXDLY_DQM2) |
                                     P_Fld(dly_per_byte->dqm_mck[3], SHURK_SELPH_DQ1_TXDLY_DQM3) |
                                     P_Fld(dly_per_byte->dqm_mck_oe[0], SHURK_SELPH_DQ1_TXDLY_OEN_DQM0) |
                                     P_Fld(dly_per_byte->dqm_mck_oe[1], SHURK_SELPH_DQ1_TXDLY_OEN_DQM1) |
                                     P_Fld(dly_per_byte->dqm_mck_oe[2], SHURK_SELPH_DQ1_TXDLY_OEN_DQM2) |
                                     P_Fld(dly_per_byte->dqm_mck_oe[3], SHURK_SELPH_DQ1_TXDLY_OEN_DQM3));

         // DLY_DQM[2:0]
        vIO32WriteFldMulti(DRAMC_REG_ADDR(DRAMC_REG_SHURK_SELPH_DQ3),
                                     P_Fld(dly_per_byte->dqm_ui[0], SHURK_SELPH_DQ3_DLY_DQM0) |
                                     P_Fld(dly_per_byte->dqm_ui[1], SHURK_SELPH_DQ3_DLY_DQM1) |
                                     P_Fld(dly_per_byte->dqm_ui[2], SHURK_SELPH_DQ3_DLY_DQM2) |
                                     P_Fld(dly_per_byte->dqm_ui[3], SHURK_SELPH_DQ3_DLY_DQM3) |
                                     P_Fld(dly_per_byte->dqm_ui_oe[0], SHURK_SELPH_DQ3_DLY_OEN_DQM0) |
                                     P_Fld(dly_per_byte->dqm_ui_oe[1], SHURK_SELPH_DQ3_DLY_OEN_DQM1) |
                                     P_Fld(dly_per_byte->dqm_ui_oe[2], SHURK_SELPH_DQ3_DLY_OEN_DQM2) |
                                     P_Fld(dly_per_byte->dqm_ui_oe[3], SHURK_SELPH_DQ3_DLY_OEN_DQM3));
    }

    vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B0_DQ), dly_per_byte->dqm_pi[0], SHU_RK_B0_DQ_SW_ARPI_DQM_B0);
    vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B1_DQ), dly_per_byte->dqm_pi[1], SHU_RK_B1_DQ_SW_ARPI_DQM_B1);
}
#endif

#if TX_AUTO_K_ENABLE
static void Tx_Auto_K_Init(DRAMC_CTX_T *p, DRAM_TX_PER_BIT_CALIBRATION_TYTE_T calType, U8 ucdq_pi, U8 u1PI_Len)
{
	U8 pi_thrd = 0xa;
	U8 early_break = 0x0;

#if FOR_DV_SIMULATION_USED == 1
	cal_sv_rand_args_t *psra = get_psra();

	if (psra) {
		pi_thrd = psra->tx_atk_pass_pi_thrd & 0xFF;
		early_break = psra->tx_atk_early_break & 0xFF;
	}
#endif

#if ENABLE_PA_IMPRO_FOR_TX_AUTOK
    vIO32WriteFldAlign(DRAMC_REG_ADDR(DRAMC_REG_DCM_SUB_CTRL), 0x1, DCM_SUB_CTRL_SUBCLK_CTRL_TX_AUTOK);
    vIO32WriteFldAlign(DRAMC_REG_ADDR(DRAMC_REG_TX_CG_SET0), 0x1, TX_CG_SET0_TX_ATK_CLKRUN);
#endif

    if (calType == TX_DQ_DQS_MOVE_DQ_DQM)
    {
        vIO32WriteFldMulti(DRAMC_REG_ADDR(DRAMC_REG_TX_ATK_SET1),
                                     P_Fld(0x1, TX_ATK_SET1_TX_ATK_DQ_PI_EN) |    //enable TX DQ auto K
                                     P_Fld(0x1, TX_ATK_SET1_TX_ATK_DQM_PI_EN));   //enable TX DQM auto K
        vIO32WriteFldMulti(DRAMC_REG_ADDR(DRAMC_REG_TX_ATK_SET0),
                                     P_Fld(ucdq_pi, TX_ATK_SET0_TX_ATK_DQ_B0_PI_INIT) |    //Set begin position of DQ B0
                                     P_Fld(ucdq_pi, TX_ATK_SET0_TX_ATK_DQ_B1_PI_INIT) |    //Set begin position of DQ B1
                                     P_Fld(ucdq_pi, TX_ATK_SET0_TX_ATK_DQM_B0_PI_INIT) |   //Set begin position of DQM B0
                                     P_Fld(ucdq_pi, TX_ATK_SET0_TX_ATK_DQM_B1_PI_INIT));   //Set begin position of DQM B1
    }
    else if (calType == TX_DQ_DQS_MOVE_DQM_ONLY)
    {
        vIO32WriteFldAlign(DRAMC_REG_ADDR(DRAMC_REG_TX_ATK_SET1), 0x1, TX_ATK_SET1_TX_ATK_DQM_PI_EN);
        vIO32WriteFldMulti(DRAMC_REG_ADDR(DRAMC_REG_TX_ATK_SET0),
                                     P_Fld(ucdq_pi, TX_ATK_SET0_TX_ATK_DQM_B0_PI_INIT) |
                                     P_Fld(ucdq_pi, TX_ATK_SET0_TX_ATK_DQM_B1_PI_INIT));
    }
	else
    {
        vIO32WriteFldAlign(DRAMC_REG_ADDR(DRAMC_REG_TX_ATK_SET1), 0x1, TX_ATK_SET1_TX_ATK_DQ_PI_EN);
        vIO32WriteFldMulti(DRAMC_REG_ADDR(DRAMC_REG_TX_ATK_SET0),
                                     P_Fld(ucdq_pi, TX_ATK_SET0_TX_ATK_DQ_B0_PI_INIT) |
                                     P_Fld(ucdq_pi, TX_ATK_SET0_TX_ATK_DQ_B1_PI_INIT));
    }

	vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_MISC_CTRL1), 0, MISC_CTRL1_R_DMARPIDQ_SW); //Switch PI SW mode to HW mode (control by DRAMC not APHY)
	vIO32WriteFldMulti(DRAMC_REG_ADDR(DRAMC_REG_TX_ATK_SET1),
			P_Fld(u1PI_Len, TX_ATK_SET1_TX_ATK_PI_LEN) |    //enable TX auto k len
			P_Fld(pi_thrd, TX_ATK_SET1_TX_ATK_PASS_PI_THRD));  //Set threshold of PI pass window
#if (fcFOR_CHIP_ID == fcIPM) //Fix at Mar_gaux
	vIO32WriteFldAlign(DRAMC_REG_ADDR(DRAMC_REG_TX_ATK_SET1), early_break, TX_ATK_SET1_TX_ATK_EARLY_BREAK); //Enable early break
#endif

    vIO32WriteFldMulti(DRAMC_REG_ADDR(DRAMC_REG_SHU_TX_SET1),
            P_Fld(0x3, SHU_TX_SET1_TXOEN_AUTOSET_DQ_OFFSET) |
            P_Fld(0x3, SHU_TX_SET1_TXOEN_AUTOSET_DQS_OFFSET) |
            P_Fld(0x1, SHU_TX_SET1_TXOEN_AUTOSET_EN));   //Enable OE auto adjust

#if TX_AUTO_K_DEBUG_ENABLE
	vIO32WriteFldAlign(DRAMC_REG_ADDR(DRAMC_REG_TX_ATK_SET1), 0x1, TX_ATK_SET1_TX_ATK_DBG_EN);
#endif
}

static void Tx_Auto_K_complete_check(DRAMC_CTX_T *p)
{
    U32 u4loop_count = 0;

    while ((u4IO32ReadFldAlign(DRAMC_REG_ADDR(DRAMC_REG_TX_ATK_RESULT8), TX_ATK_RESULT8_TX_ATK_DONE) != 0x1))
    {
        mcDELAY_US(1);
        u4loop_count++;
        //mcSHOW_DBG_MSG("Wait! TX auto K is not done!\n");
        if (u4loop_count > 100000)
        {
            mcSHOW_ERR_MSG("Error! TX auto K is not done!\n");
            break;
        }
    }

    if ((u4IO32ReadFldAlign(DRAMC_REG_ADDR(DRAMC_REG_TX_ATK_RESULT8), TX_ATK_RESULT8_TX_ATK_FIND_PW) == 0x1))
    {
        vSetCalibrationResult(p, DRAM_CALIBRATION_TX_PERBIT, DRAM_OK);
        mcSHOW_DBG_MSG("Tx auto K, all bit find passs window\n");
    }
    else
    {
        mcSHOW_ERR_MSG("Error! TX auto K is fail!\n");
    }
}

static void Tx_Auto_K_Clear(DRAMC_CTX_T *p)
{
    vIO32WriteFldAlign(DRAMC_REG_ADDR(DRAMC_REG_TX_ATK_SET1), 0x0, TX_ATK_SET1_TX_ATK_TRIG); //Disable Tx auto K
    vIO32WriteFldAlign(DRAMC_REG_ADDR(DRAMC_REG_SHU_TX_SET1), 0x0, SHU_TX_SET1_TXOEN_AUTOSET_EN);
    vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_MISC_CTRL1), 0x1, MISC_CTRL1_R_DMARPIDQ_SW);
    vIO32WriteFldAlign(DRAMC_REG_ADDR(DRAMC_REG_TX_ATK_SET1), 0x0, TX_ATK_SET1_TX_ATK_DBG_EN);
    vIO32WriteFldAlign(DRAMC_REG_ADDR(DRAMC_REG_TX_ATK_SET1), 0x1, TX_ATK_SET1_TX_ATK_CLR); //Clear state machine
    vIO32WriteFldAlign(DRAMC_REG_ADDR(DRAMC_REG_TX_ATK_SET1), 0x0, TX_ATK_SET1_TX_ATK_CLR);
    vIO32WriteFldMulti(DRAMC_REG_ADDR(DRAMC_REG_TX_ATK_SET1),
                                 P_Fld(0x0, TX_ATK_SET1_TX_ATK_PI_LEN) |
                                 P_Fld(0x0, TX_ATK_SET1_TX_ATK_DQ_PI_EN) |
                                 P_Fld(0x0, TX_ATK_SET1_TX_ATK_DQM_PI_EN));
#if ENABLE_PA_IMPRO_FOR_TX_AUTOK
    vIO32WriteFldAlign(DRAMC_REG_ADDR(DRAMC_REG_TX_CG_SET0), 0x0, TX_CG_SET0_TX_ATK_CLKRUN);
#endif
}

#if TX_AUTO_K_WORKAROUND
static void Tx_Auto_K_DQM_Workaround(DRAMC_CTX_T *p)
{
    U32 u4DQM_MCK, u4DQM_UI, u4DQM_PI_B0, u4DQM_PI_B1;
    //Set RK1 DQM DLY to RK0
    vIO32Write4B(DRAMC_REG_ADDR(DRAMC_REG_SHURK_SELPH_DQ1), u4DQM_MCK_RK1_backup);
    vIO32Write4B(DRAMC_REG_ADDR(DRAMC_REG_SHURK_SELPH_DQ3), u4DQM_UI_RK1_backup);
    vIO32Write4B(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B0_DQ), u4DQM_PI_RK1_backup[0]);
    vIO32Write4B(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B1_DQ), u4DQM_PI_RK1_backup[1]);
}
static void Tx_Auto_K_DQ_Workaround(DRAMC_CTX_T *p)
{
    U32 u4DQ_MCK, u4DQ_UI, u4DQ_PI_B0, u4DQ_PI_B1;
    //Set RK1 DQM DLY to RK0
    vIO32Write4B(DRAMC_REG_ADDR(DRAMC_REG_SHURK_SELPH_DQ0), u4DQ_MCK_RK1_backup);
    vIO32Write4B(DRAMC_REG_ADDR(DRAMC_REG_SHURK_SELPH_DQ2), u4DQ_UI_RK1_backup);
    vIO32Write4B(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B0_DQ), u4DQ_PI_RK1_backup[0]);
    vIO32Write4B(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B1_DQ), u4DQ_PI_RK1_backup[1]);
}
#endif

#if TX_AUTO_K_DEBUG_ENABLE
static void Tx_Auto_K_Debug_Message(DRAMC_CTX_T *p, U8 u1PI_Len)
{
    U8 u1bit_num = 0, u1BitIdx;
    U16 u2Length = 0, u2Length_max = 0;
    U32 u4status;
    U32 u4status_bit[4][DQ_DATA_WIDTH];

    if (u1PI_Len == 0)
        u2Length_max = 48;
    else
        u2Length_max = 32 * (1 + u1PI_Len);

    for (u1BitIdx = 0; u1BitIdx < p->data_width; u1BitIdx++)
    {
        vIO32WriteFldAlign(DRAMC_REG_ADDR(DRAMC_REG_TX_ATK_SET1), u1BitIdx, TX_ATK_SET1_TX_ATK_DBG_BIT_SEL);

        u4status_bit[0][u1BitIdx] = u4IO32Read4B(DRAMC_REG_ADDR(DRAMC_REG_TX_ATK_DBG_BIT_STATUS1));
        u4status_bit[1][u1BitIdx] = u4IO32Read4B(DRAMC_REG_ADDR(DRAMC_REG_TX_ATK_DBG_BIT_STATUS2));
        u4status_bit[2][u1BitIdx] = u4IO32Read4B(DRAMC_REG_ADDR(DRAMC_REG_TX_ATK_DBG_BIT_STATUS3));
        u4status_bit[3][u1BitIdx] = u4IO32Read4B(DRAMC_REG_ADDR(DRAMC_REG_TX_ATK_DBG_BIT_STATUS4));
    }

    mcSHOW_DBG_MSG2("Debug TX DQ PASS/FAIL status:\n");

    for (u2Length = 0; u2Length < u2Length_max; u2Length++)
    {
        mcSHOW_DBG_MSG2("Delay=%d ", u2Length);

        for (u1bit_num = 0; u1bit_num < p->data_width; u1bit_num++)
        {
            u4status = ((u4status_bit[u2Length / 32][u1bit_num] >> (u2Length % 32)) & 0x1);

            if (u4status == 0)
            {
                mcSHOW_DBG_MSG2("x");
            }
            else
            {
                mcSHOW_DBG_MSG2("o");
            }

            if (u1bit_num == (p->data_width - 1))
            {
                mcSHOW_DBG_MSG2(" \n");
            }
        }
    }

    //mcSHOW_DBG_MSG("Debug DQ PASS(1)/FAIL(0) bit: %d, STATUS1: 0x%x, STATUS2: 0x%x, STATUS3: 0x%x, STATUS4: 0x%x,\n",u1BitIdx,u4status_bit[0][u1BitIdx],u4status_bit[1][u1BitIdx],u4status_bit[2][u1BitIdx],u4status_bit[3][u1BitIdx]);
}
#endif
#endif

#if (TX_K_DQM_MODE == 1) && (DDR_TX_K_DQM_WITH_WDBI || DDR_ENABLE_WRITE_DBI)
void vSwitchWriteDBISettings(DRAMC_CTX_T *p, U8 u1OnOff)
{
    S8 u1TXShiftMCK;

    u1TXShiftMCK = (u1OnOff)? -1: 1;
    DramcWriteShiftMCKForWriteDBI(p, u1TXShiftMCK); //Tx DQ/DQM -1 MCK for write DBI ON

    SetDramModeRegForWriteDBIOnOff(p, p->dram_fsp, u1OnOff);
    DramcWriteDBIOnOff(p, u1OnOff);

    #if (TX_AUTO_K_ENABLE && TX_AUTO_K_WORKAROUND)
    if (p->rank == RANK_1)
    {
        u4DQ_MCK_RK1_backup = u4IO32Read4B(DRAMC_REG_ADDR(DRAMC_REG_SHURK_SELPH_DQ0));
        u4DQ_UI_RK1_backup = u4IO32Read4B(DRAMC_REG_ADDR(DRAMC_REG_SHURK_SELPH_DQ2));
        u4DQ_PI_RK1_backup[0] = u4IO32Read4B(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B0_DQ));
        u4DQ_PI_RK1_backup[1] = u4IO32Read4B(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B1_DQ));
    }
    #endif
}
#endif

DRAM_STATUS_T DramcTxWindowPerbitCal(DRAMC_CTX_T *p, DRAM_TX_PER_BIT_CALIBRATION_TYTE_T calType, U8 u1VrefScanEnable, U8 isAutoK)
{
    struct tx_dly_for_all dly_all;
    struct tx_dly_per_byte dly_per_byte;
    U8 u1BitTemp, u1BitIdx, u1ByteIdx, u1RankIdx, backup_rank;
    U32 uiFinishCount;
    PASS_WIN_DATA_T WinPerBit[DQ_DATA_WIDTH], VrefWinPerBit[DQ_DATA_WIDTH], FinalWinPerBit[DQ_DATA_WIDTH];

    U16 uiDelay, u2DQDelayBegin, u2DQDelayEnd, u2DQDelayStep = 1;

    //cc mark U8 ucdq_pi, ucdq_ui_small, ucdq_ui_large, ucdq_oen_ui_small, ucdq_oen_ui_large;
    U8 ucdq_ui_small_reg_value, u1UpdateRegUI;  // for UI and TXDLY change check, if different , set reg.

    //cc mark U8 ucdq_reg_pi[DQS_NUMBER], ucdq_reg_ui_large[DQS_NUMBER], ucdq_reg_ui_small[DQS_NUMBER];
    //cc mark U8 ucdq_reg_oen_ui_large[DQS_NUMBER], ucdq_reg_oen_ui_small[DQS_NUMBER];

    //cc mark U8 ucdq_reg_dqm_pi[DQS_NUMBER] = {0}, ucdq_reg_dqm_ui_large[DQS_NUMBER] = {0}, ucdq_reg_dqm_ui_small[DQS_NUMBER] = {0};
    //cc mark U8 ucdq_reg_dqm_oen_ui_large[DQS_NUMBER] = {0}, ucdq_reg_dqm_oen_ui_small[DQS_NUMBER] = {0};

#if 1//TX_DQM_CALC_MAX_MIN_CENTER
    U16 u2DQM_Delay;  // LP4 only
    U16 u2Center_min[DQS_NUMBER] = {0}, u2Center_max[DQS_NUMBER] = {0};
#endif
    U8 u1EnableDelayCell = 0;
    U16 u2DelayCellOfst[DQ_DATA_WIDTH] = {0};
    U32 u4err_value, u4fail_bit;
    U16 u2FinalRange = 0, u2FinalVref;
    U16 u2VrefLevel, u2VrefBegin, u2VrefEnd, u2VrefStep;
    U16 u2TempWinSum, u2MaxWindowSum = 0;//, u2tx_window_sum[TX_VREF_DATA_NUM]={0};
//    U32 u4TempRegValue;
    U8 u1min_bit = 0, u1min_winsize = 0;
    U8 u1VrefIdx = 0;
    U8 u1PIDiff;
    PASS_WIN_DATA_BY_VREF_T VrefInfo[TX_VREF_DATA_NUM];

    if (!p)
    {
        mcSHOW_ERR_MSG("context NULL\n");
        return DRAM_FAIL;
    }

#if 0//TX_AUTO_K_ENABLE
    U8 u1PI_Len, u1dq_shift;
    U32 PwMaxInitReg[4] = {DRAMC_REG_TX_ATK_RESULT0, DRAMC_REG_TX_ATK_RESULT1, DRAMC_REG_TX_ATK_RESULT2, DRAMC_REG_TX_ATK_RESULT3};
    U32 PwMaxLenReg[4] = {DRAMC_REG_TX_ATK_RESULT4, DRAMC_REG_TX_ATK_RESULT5, DRAMC_REG_TX_ATK_RESULT6, DRAMC_REG_TX_ATK_RESULT7};
    U32 u4Length = 0;
#if TX_AUTO_K_WORKAROUND
    U8 u1backup_Rank = 0;
#endif
#if TX_AUTO_K_WORKAROUND
    U32 u4RegBackupAddress[] =
    {
        (DRAMC_REG_ADDR(DRAMC_REG_SHURK_SELPH_DQ0)),
        (DRAMC_REG_ADDR(DRAMC_REG_SHURK_SELPH_DQ2)),
        (DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B0_DQ)),
        (DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B1_DQ)),
        (DRAMC_REG_ADDR(DRAMC_REG_SHURK_SELPH_DQ1)),
        (DRAMC_REG_ADDR(DRAMC_REG_SHURK_SELPH_DQ3)),
        (DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B0_DQ)),
        (DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B1_DQ)),
    };
#endif
#endif

#if 0
    if(0)//(p->channel==CHANNEL_D)
    {
        ucdq_reg_ui_large[0] = ucdq_reg_ui_large[1] = 2;
        ucdq_reg_ui_small[0] = ucdq_reg_ui_small[1] = 1;
        ucdq_reg_oen_ui_large[0] = ucdq_reg_oen_ui_large[1] = 1;
        ucdq_reg_oen_ui_small[0] = ucdq_reg_oen_ui_small[1] = 6;
        ucdq_reg_pi[0] = ucdq_reg_pi[1] = 20;


        ucdq_reg_dqm_ui_large[0] = ucdq_reg_dqm_ui_large[1] = 2;
        ucdq_reg_dqm_ui_small[0] = ucdq_reg_dqm_ui_small[1] = 1;
        ucdq_reg_dqm_oen_ui_large[0] = ucdq_reg_dqm_oen_ui_large[1] = 1;
        ucdq_reg_dqm_oen_ui_small[0] = ucdq_reg_dqm_oen_ui_small[1] = 6;
        ucdq_reg_dqm_pi[0] = ucdq_reg_dqm_pi[1] = 20;

        TXSetDelayReg_DQ(p, TRUE, ucdq_reg_ui_large, ucdq_reg_oen_ui_large, ucdq_reg_ui_small, ucdq_reg_oen_ui_small, ucdq_reg_pi);
        TXSetDelayReg_DQM(p, TRUE, ucdq_reg_dqm_ui_large, ucdq_reg_dqm_oen_ui_large, ucdq_reg_dqm_ui_small, ucdq_reg_dqm_oen_ui_small, ucdq_reg_dqm_pi);
        return;
    }
#endif

    mcDUMP_REG_MSG("\n[dumpRG] DramcTxWindowPerbitCal\n");
#if VENDER_JV_LOG
    if (calType == TX_DQ_DQS_MOVE_DQ_ONLY)
        vPrintCalibrationBasicInfo_ForJV(p);
#else
        vPrintCalibrationBasicInfo(p);
#if __SLT__
	vPrintCalibrationBasicInfoDiag(p);
#endif
#endif
#if __SLT__
    if (calType ==TX_DQ_DQS_MOVE_DQ_ONLY && u1VrefScanEnable==1)
        vPrintCalibrationBasicInfoDiag(p);
#endif

    backup_rank = u1GetRank(p);
	vAutoRefreshSwitch(p, ENABLE);
    if (is_ddr3_family(p))
        u1VrefScanEnable = FALSE;

    TXPerbitCalibrationInit(p, calType);
    TXScanRange_PI(p, calType, &u2DQDelayBegin, &u2DQDelayEnd);
    TXScanRange_Vref(p, u1VrefScanEnable, &u2FinalRange, &u2VrefBegin, &u2VrefEnd, &u2VrefStep);

    //default set FAIL
    vSetCalibrationResult(p, DRAM_CALIBRATION_TX_PERBIT, DRAM_FAIL);

#if TX_AUTO_K_ENABLE
    /* HW AUTOK */
    if (isAutoK) {
        ///TODO: HW NOT support for now
        return DRAM_OK;
    }
#endif

    /* SW calibration */
#if 0
    if (isAutoK)
    {
#if TX_AUTO_K_ENABLE
    //CKEFixOnOff(p, p->rank, CKE_FIXON, CKE_WRITE_TO_ONE_CHANNEL); //Let CLK always on

    //Set base address of TX MCK and UI
    u1UpdateRegUI = 1;
    uiDelay = u2DQDelayBegin;
    u1PI_Len = 3;
    TxWinTransferDelayToUIPI(p, uiDelay, 0, &ucdq_ui_large, &ucdq_ui_small, &ucdq_pi, &ucdq_oen_ui_large, &ucdq_oen_ui_small);

    for (u1ByteIdx = 0; u1ByteIdx < DQS_NUMBER; u1ByteIdx++)
    {
        if (u1UpdateRegUI)
        {
            ucdq_reg_ui_large[u1ByteIdx] = ucdq_ui_large;
            ucdq_reg_ui_small[u1ByteIdx] = ucdq_ui_small;
            ucdq_reg_oen_ui_large[u1ByteIdx] = ucdq_oen_ui_large;
            ucdq_reg_oen_ui_small[u1ByteIdx] = ucdq_oen_ui_small;

            ucdq_reg_dqm_ui_large[u1ByteIdx] = ucdq_ui_large;
            ucdq_reg_dqm_ui_small[u1ByteIdx] = ucdq_ui_small;
            ucdq_reg_dqm_oen_ui_large[u1ByteIdx] = ucdq_oen_ui_large;
            ucdq_reg_dqm_oen_ui_small[u1ByteIdx] = ucdq_oen_ui_small;
        }

            ucdq_reg_pi[u1ByteIdx] = ucdq_pi;
            ucdq_reg_dqm_pi[u1ByteIdx] = ucdq_pi;
    }

#if TX_AUTO_K_WORKAROUND
    if (p->rank == 1)
    {
        u1backup_Rank = 1;
        p->rank = 0;
        DramcBackupRegisters(p, u4RegBackupAddress, sizeof(u4RegBackupAddress) / sizeof(U32));
    }
#endif

    if (calType == TX_DQ_DQS_MOVE_DQ_ONLY || calType == TX_DQ_DQS_MOVE_DQ_DQM)
    {
        TXSetDelayReg_DQ(p, u1UpdateRegUI, ucdq_reg_ui_large, ucdq_reg_oen_ui_large, ucdq_reg_ui_small, ucdq_reg_oen_ui_small, ucdq_reg_pi);
        mcSHOW_DBG_MSG("TX Auto-K set begin delay DQ MCK: %d, UI: %d, PI: %d\n", ucdq_reg_ui_large[0], ucdq_reg_ui_small[0], ucdq_reg_pi[0]);

    #if TX_AUTO_K_WORKAROUND
        if ((calType == TX_DQ_DQS_MOVE_DQ_ONLY) && (u1backup_Rank == 1))
            Tx_Auto_K_DQM_Workaround(p); //Set best DLY value of RK1 DQM to RK0 DQM
    #endif
    }
    if (calType == TX_DQ_DQS_MOVE_DQM_ONLY || calType == TX_DQ_DQS_MOVE_DQ_DQM)
    {
        TXSetDelayReg_DQM(p, u1UpdateRegUI, ucdq_reg_dqm_ui_large, ucdq_reg_dqm_oen_ui_large, ucdq_reg_dqm_ui_small, ucdq_reg_dqm_oen_ui_small, ucdq_reg_dqm_pi);
        mcSHOW_DBG_MSG("TX Auto-K set begin delay DQM MCK: %d, UI: %d, PI: %d\n", ucdq_reg_dqm_ui_large[0], ucdq_reg_dqm_ui_small[0], ucdq_reg_dqm_pi[0]);

    #if TX_AUTO_K_WORKAROUND
        if ((calType == TX_DQ_DQS_MOVE_DQM_ONLY) && (u1backup_Rank == 1))
            Tx_Auto_K_DQ_Workaround(p); //Set best DLY value of RK1 DQ to RK0 DQ
    #endif
    }

#if TX_AUTO_K_WORKAROUND
    if (u1backup_Rank == 1)
        p->rank = 1;
#endif

    //Tx_Auto_K_Init(p, calType, ucdq_pi, u1PI_Len); //u1PI_Len = 1 means that PI len is 64 PI
#endif
    }
#endif
    {
    if (vGet_DDR_Loop_Mode(p) == SEMI_OPEN_LOOP_MODE)
        u2DQDelayStep = (1 << 3);
    else if (vGet_DDR_Loop_Mode(p) == OPEN_LOOP_MODE)
        u2DQDelayStep = (1 << 4);
    else if (calType == TX_DQ_DQS_MOVE_DQ_DQM)
        u2DQDelayStep = 2;
    else
        u2DQDelayStep = 1;

#if FOR_DV_SIMULATION_USED
    u2DQDelayStep = 8;
#endif
    }

#if 0
    mcSHOW_DBG_MSG("[TxWindowPerbitCal] calType=%d, VrefScanEnable %d (Range %d,  VrefBegin %d, u2VrefEnd %d)\n"
                    "\nBegin, DQ Scan Range %d~%d\n",
                    calType, u1VrefScanEnable, u2FinalRange, u2VrefBegin, u2VrefEnd, u2DQDelayBegin, u2DQDelayEnd);
#endif

#if SUPPORT_SAVE_TIME_FOR_CALIBRATION
    if (p->femmc_Ready == 1 && (p->Bypass_TXWINDOW))
    {
        for (u1ByteIdx = 0; u1ByteIdx < (p->data_width / DQS_BIT_NUMBER); u1ByteIdx++)
        {
            u2Center_min[u1ByteIdx] = p->pSavetimeData->u1TxCenter_min_Save[p->channel][p->rank][u1ByteIdx];
            u2Center_max[u1ByteIdx] = p->pSavetimeData->u1TxCenter_max_Save[p->channel][p->rank][u1ByteIdx];

            for (u1BitIdx = 0; u1BitIdx < DQS_BIT_NUMBER; u1BitIdx++)
            {
                u1BitTemp = u1ByteIdx * DQS_BIT_NUMBER + u1BitIdx;
                FinalWinPerBit[u1BitTemp].win_center = p->pSavetimeData->u1Txwin_center_Save[p->channel][p->rank][u1BitTemp];
            }
        }
        vSetCalibrationResult(p, DRAM_CALIBRATION_TX_PERBIT, DRAM_FAST_K);
    }
    else
#endif
    {
#if ENABLE_K_WITH_WORST_SI_UI_SHIFT
#if (TX_K_DQM_MODE == 2)
        if (calType == TX_DQ_DQS_MOVE_DQM_ONLY)
        {
            DramcEngine2Init(p, p->test2_1, p->test2_2, p->test_pattern, 0, TE_NO_UI_SHIFT);//UI_SHIFT + LEN1
        }
        else
#endif
{
        DramcEngine2Init(p, p->test2_1, p->test2_2, p->test_pattern, 0, TE_UI_SHIFT);//UI_SHIFT + LEN1
}
#else
        DramcEngine2Init(p, p->test2_1, p->test2_2, TEST_XTALK_PATTERN, 0, TE_NO_UI_SHIFT);
#endif

        for (u2VrefLevel = u2VrefBegin; u2VrefLevel <= u2VrefEnd; u2VrefLevel += u2VrefStep)
        {
            // SET tx Vref (DQ) here, DDR3 no need to do this.
            if (u1VrefScanEnable)
            {
            #if (!REDUCE_LOG_FOR_PRELOADER)
                mcSHOW_DBG_MSG("\n\n\tTX VrefRange %d, VrefLevel=%d\n", u2FinalRange, u2VrefLevel);
            #endif

            #if VENDER_JV_LOG
                if (calType == TX_DQ_DQS_MOVE_DQ_ONLY)
                {
                    mcSHOW_DBG_MSG5("\n\tTX VrefRange %d, VrefLevel=%d\n", u2FinalRange, u2VrefLevel);
                }
            #endif

                DramcTXSetVref(p, u2FinalRange, u2VrefLevel);
            }
            else
            {
                mcSHOW_DBG_MSG("\n\n\tTX Vref Scan disable\n");
            }

            // initialize parameters
            uiFinishCount = 0;
            u2TempWinSum = 0;
            ucdq_ui_small_reg_value = 0xff;

            for (u1BitIdx = 0; u1BitIdx < p->data_width; u1BitIdx++)
            {
                WinPerBit[u1BitIdx].first_pass = (S16)PASS_RANGE_NA;
                WinPerBit[u1BitIdx].last_pass = (S16)PASS_RANGE_NA;
                VrefWinPerBit[u1BitIdx].first_pass = (S16)PASS_RANGE_NA;
                VrefWinPerBit[u1BitIdx].last_pass = (S16)PASS_RANGE_NA;
            }

#if 0
            if (isAutoK)
            {
        #if TX_AUTO_K_ENABLE
            Tx_Auto_K_Init(p, calType, ucdq_pi, u1PI_Len); //u1PI_Len = 1 means that PI len is 64 PI
            vIO32WriteFldAlign(DRAMC_REG_ADDR(DRAMC_REG_TX_ATK_SET1), 0x1, TX_ATK_SET1_TX_ATK_TRIG); //TX Auto K start
        #endif
            }
#endif
            {
            //Move DQ delay ,  1 PI = tCK/64, total 128 PI, 1UI = 32 PI
            //For data rate 3200, max tDQS2DQ is 2.56UI (82 PI)
            //For data rate 4266, max tDQS2DQ is 3.41UI (109 PI)
            for (uiDelay = u2DQDelayBegin; uiDelay < u2DQDelayEnd; uiDelay += u2DQDelayStep)
            {
                u1UpdateRegUI = 0;
                TxWinTransferDelayToUIPI(p, uiDelay, 0, &dly_all);

                // Check if TX UI changed, if not change , don't need to set reg again
                if (ucdq_ui_small_reg_value != dly_all.ui)
                {
                    u1UpdateRegUI = 1;
                    ucdq_ui_small_reg_value = dly_all.ui;
                }

                for (u1ByteIdx = 0; u1ByteIdx < (p->data_width / DQS_BIT_NUMBER); u1ByteIdx++)
                {
                    if (u1UpdateRegUI)
                    {
                        dly_per_byte.dq_mck[u1ByteIdx] = dly_all.mck;
                        dly_per_byte.dq_ui[u1ByteIdx] = dly_all.ui;
                        dly_per_byte.dq_mck_oe[u1ByteIdx] = dly_all.mck_oe;
                        dly_per_byte.dq_ui_oe[u1ByteIdx] = dly_all.ui_oe;

                        dly_per_byte.dqm_mck[u1ByteIdx] = dly_all.mck;
                        dly_per_byte.dqm_ui[u1ByteIdx] = dly_all.ui;
                        dly_per_byte.dqm_mck_oe[u1ByteIdx] = dly_all.mck_oe;
                        dly_per_byte.dqm_ui_oe[u1ByteIdx] = dly_all.ui_oe;
                    }

                    dly_per_byte.dq_pi[u1ByteIdx] = dly_all.pi;
                    dly_per_byte.dqm_pi[u1ByteIdx] = dly_all.pi;
                }

                if (calType == TX_DQ_DQS_MOVE_DQ_ONLY || calType == TX_DQ_DQS_MOVE_DQ_DQM)
                {
                    TXSetDelayReg_DQ(p, u1UpdateRegUI, &dly_per_byte);
                }

                if (calType == TX_DQ_DQS_MOVE_DQM_ONLY || calType == TX_DQ_DQS_MOVE_DQ_DQM)
                {
                    TXSetDelayReg_DQM(p, u1UpdateRegUI, &dly_per_byte);
                }

                u4err_value = 0;
#if ENABLE_K_WITH_WORST_SI_UI_SHIFT
                //DramcEngine2SetPat(p, p->test_pattern, 0, 0, TE_UI_SHIFT);
                u4err_value = DramcEngine2Run(p, TE_OP_WRITE_READ_CHECK, p->test_pattern);
#else
                //audio + xtalk pattern
                DramcEngine2SetPat(p, TEST_AUDIO_PATTERN, 0, 0, TE_NO_UI_SHIFT);
                u4err_value = DramcEngine2Run(p, TE_OP_WRITE_READ_CHECK, TEST_AUDIO_PATTERN);
                DramcEngine2SetPat(p, TEST_XTALK_PATTERN, 0, 1, TE_NO_UI_SHIFT);
                u4err_value |= DramcEngine2Run(p, TE_OP_WRITE_READ_CHECK, TEST_XTALK_PATTERN);
#endif
                //audio + xtalk pattern
                //u4err_value = 0;
                //DramcEngine2SetPat(p, TEST_AUDIO_PATTERN, 0, 0);
                //u4err_value = DramcEngine2Run(p, TE_OP_WRITE_READ_CHECK, TEST_AUDIO_PATTERN);
                //DramcEngine2SetPat(p, TEST_XTALK_PATTERN, 0, 1);
                //u4err_value |= DramcEngine2Run(p, TE_OP_WRITE_READ_CHECK, TEST_XTALK_PATTERN);

                if (u1VrefScanEnable == 0 && (calType != TX_DQ_DQS_MOVE_DQM_ONLY))
                {
                    //mcSHOW_DBG_MSG("Delay=%3d |%2d %2d %3d| %2d %2d| 0x%8x [0]",uiDelay, ucdq_ui_large,ucdq_ui_small, ucdq_pi, ucdq_oen_ui_large,ucdq_oen_ui_small, u4err_value);
                #ifdef ETT_PRINT_FORMAT
                    if (u4err_value != 0)
                    {
                        mcSHOW_DBG_MSG2("%d |%d %d %d|[0]", uiDelay, dly_all.mck, dly_all.ui, dly_all.pi);
                    }
                #else
                    mcSHOW_DBG_MSG2("Delay=%3d |%2d %2d %3d| 0x%8x [0]", uiDelay, dly_all.mck, dly_all.ui, dly_all.pi, u4err_value);
                #endif
                }

                // check fail bit ,0 ok ,others fail
                for (u1BitIdx = 0; u1BitIdx < p->data_width; u1BitIdx++)
                {
                    u4fail_bit = u4err_value & ((U32)1 << u1BitIdx);

                    if (u1VrefScanEnable == 0 && (calType != TX_DQ_DQS_MOVE_DQM_ONLY))
                    {
                        if(u4err_value != 0)
                        {
                            if (u1BitIdx % DQS_BIT_NUMBER == 0)
                            {
                                mcSHOW_DBG_MSG2(" ");
                            }

                            if (u4fail_bit == 0)
                            {
                                 mcSHOW_DBG_MSG2("o");
                            }
                            else
                            {
                                mcSHOW_DBG_MSG2("x");
                            }
                        }
                    }

                    if (WinPerBit[u1BitIdx].first_pass == PASS_RANGE_NA)
                    {
                        if (u4fail_bit == 0) //compare correct: pass
                        {
                            WinPerBit[u1BitIdx].first_pass = uiDelay;

                        #if TX_TDQS2DQ_PRE_CAL
                            if ((calType == TX_DQ_DQS_MOVE_DQ_ONLY) && (u1VrefScanEnable == FALSE))
                            {
                                if (u2DQS2DQ_Pre_Cal[p->channel][p->rank][vGet_Div_Mode(p)] == 0)
                                {
                                    u2DQS2DQ_Pre_Cal[p->channel][p->rank][vGet_Div_Mode(p)] = ((uiDelay - u2DQDelayBegin)* 1000) / p->frequency;
                                }

                                if (uiDelay == u2DQDelayBegin)
                                {
                                    mcSHOW_ERR_MSG("TX_TDQS2DQ_PRE_CAL: Warning, possible miss TX window boundary\n");
                                #if __ETT__
                                    while (1);
                                #endif
                                }
                            }
                        #endif
                        }
                    }
                    else if (WinPerBit[u1BitIdx].last_pass == PASS_RANGE_NA)
                    {
                        if (u4fail_bit != 0) //compare error : fail
                        {
                            WinPerBit[u1BitIdx].last_pass = uiDelay - u2DQDelayStep;
                        }
                        else if (uiDelay > (u2DQDelayEnd - u2DQDelayStep))
                        {
                            WinPerBit[u1BitIdx].last_pass = uiDelay;
                        }

                        if (WinPerBit[u1BitIdx].last_pass != PASS_RANGE_NA)
                        {
                            if ((WinPerBit[u1BitIdx].last_pass - WinPerBit[u1BitIdx].first_pass) >= (VrefWinPerBit[u1BitIdx].last_pass - VrefWinPerBit[u1BitIdx].first_pass))
                            {
                                if ((VrefWinPerBit[u1BitIdx].last_pass != PASS_RANGE_NA) && (VrefWinPerBit[u1BitIdx].last_pass - VrefWinPerBit[u1BitIdx].first_pass) > 0)
                                {
                                    mcSHOW_DBG_MSG2("Bit[%d] Bigger window update %d > %d, window broken?\n", u1BitIdx, \
                                        (WinPerBit[u1BitIdx].last_pass - WinPerBit[u1BitIdx].first_pass), (VrefWinPerBit[u1BitIdx].last_pass - VrefWinPerBit[u1BitIdx].first_pass));
                                }

                                //if window size bigger than TX_PASS_WIN_CRITERIA, consider as real pass window. If not, don't update finish counte and won't do early break;
                                if ((WinPerBit[u1BitIdx].last_pass - WinPerBit[u1BitIdx].first_pass) > TX_PASS_WIN_CRITERIA)
                                    uiFinishCount |= (1 << u1BitIdx);

                                //update bigger window size
                                VrefWinPerBit[u1BitIdx].first_pass = WinPerBit[u1BitIdx].first_pass;
                                VrefWinPerBit[u1BitIdx].last_pass = WinPerBit[u1BitIdx].last_pass;
                            }

                            //reset tmp window
                            WinPerBit[u1BitIdx].first_pass = PASS_RANGE_NA;
                            WinPerBit[u1BitIdx].last_pass = PASS_RANGE_NA;
                        }
                     }
                }

                if(u1VrefScanEnable==0 && (calType != TX_DQ_DQS_MOVE_DQM_ONLY))
                {
                    if(u4err_value != 0)
                    {
                        mcSHOW_DBG_MSG2(" [MSB]\n");
                    }
                }

                //if all bits widnow found and all bits turns to fail again, early break;
                if (uiFinishCount == 0xffff)
                {
                    vSetCalibrationResult(p, DRAM_CALIBRATION_TX_PERBIT, DRAM_OK);
                #if !REDUCE_LOG_FOR_PRELOADER
                #ifdef ETT_PRINT_FORMAT
                    mcSHOW_DBG_MSG2("TX calibration finding left boundary early break. PI DQ delay=0x%B\n", uiDelay);
                #else
                    mcSHOW_DBG_MSG2("TX calibration finding left boundary early break. PI DQ delay=0x%2x\n", uiDelay);
                #endif
                #endif
                    break;  //early break
                }
            }
            }

#if 0
            if (isAutoK)
            {
        #if TX_AUTO_K_ENABLE
                Tx_Auto_K_complete_check(p);
            #if TX_AUTO_K_DEBUG_ENABLE
                Tx_Auto_K_Debug_Message(p, u1PI_Len);
            #endif
        #endif
            }
#endif

            // (1) calculate per bit window size
            // (2) find out min win of all DQ bits
            // (3) calculate perbit window center
            u1min_winsize = 0xff;
            u1min_bit = 0xff;
            for (u1BitIdx = 0; u1BitIdx < p->data_width; u1BitIdx++)
            {
            #if 0
                if (isAutoK)
                {
            #if TX_AUTO_K_ENABLE
                u1dq_shift = ((u1BitIdx % 4) * 8);
                VrefWinPerBit[u1BitIdx].first_pass = u2DQDelayBegin - ucdq_pi + ((u4IO32Read4B(DRAMC_REG_ADDR(PwMaxInitReg[u1BitIdx / 4])) & (0xff << u1dq_shift)) >> u1dq_shift);
                VrefWinPerBit[u1BitIdx].last_pass = ((u4IO32Read4B(DRAMC_REG_ADDR(PwMaxLenReg[u1BitIdx / 4])) & (0xff << u1dq_shift)) >> u1dq_shift) + VrefWinPerBit[u1BitIdx].first_pass;
                VrefWinPerBit[u1BitIdx].win_size = ((u4IO32Read4B(DRAMC_REG_ADDR(PwMaxLenReg[u1BitIdx / 4])) & (0xff << u1dq_shift)) >> u1dq_shift);

                if (u1PI_Len == 0)
                    u4Length = 48;
                else
                    u4Length = 32 * (1 + u1PI_Len);

                if ((VrefWinPerBit[u1BitIdx].first_pass == (int)(u2DQDelayBegin - ucdq_pi)) || (VrefWinPerBit[u1BitIdx].last_pass == (int)(u2DQDelayBegin + u4Length)))
                {
                    mcSHOW_ERR_MSG("Error! Probably miss pass window!\n");
                }

                mcSHOW_DBG_MSG("TX DQ bit %d, first pass: %d, last pass: %d\n", u1BitIdx, VrefWinPerBit[u1BitIdx].first_pass, VrefWinPerBit[u1BitIdx].last_pass);
            #else
                //if(VrefWinPerBit[u1BitIdx].last_pass == VrefWinPerBit[u1BitIdx].first_pass)
                if (VrefWinPerBit[u1BitIdx].first_pass == PASS_RANGE_NA)
                    VrefWinPerBit[u1BitIdx].win_size = 0;
                else
                    VrefWinPerBit[u1BitIdx].win_size = VrefWinPerBit[u1BitIdx].last_pass - VrefWinPerBit[u1BitIdx].first_pass + u2DQDelayStep;
            #endif
                }
                else
            #endif
                {
                    if (VrefWinPerBit[u1BitIdx].first_pass == PASS_RANGE_NA)
                        VrefWinPerBit[u1BitIdx].win_size = 0;
                    else
                        VrefWinPerBit[u1BitIdx].win_size = VrefWinPerBit[u1BitIdx].last_pass - VrefWinPerBit[u1BitIdx].first_pass + u2DQDelayStep;
                }

                if (VrefWinPerBit[u1BitIdx].win_size < u1min_winsize)
                {
                    u1min_bit = u1BitIdx;
                    u1min_winsize = VrefWinPerBit[u1BitIdx].win_size;
                }

                u2TempWinSum += VrefWinPerBit[u1BitIdx].win_size;  //Sum of CA Windows for vref selection

            #if VENDER_JV_LOG
                if (calType == TX_DQ_DQS_MOVE_DQ_ONLY)
                {
                    mcSHOW_DBG_MSG5("TX Bit%d, %d%%\n", u1BitIdx, (VrefWinPerBit[u1BitIdx].win_size * 100 + 31) / 32);
                }
            #endif


                // calculate per bit window position and print
                VrefWinPerBit[u1BitIdx].win_center = (VrefWinPerBit[u1BitIdx].first_pass + VrefWinPerBit[u1BitIdx].last_pass) >> 1;
            #if PINMUX_AUTO_TEST_PER_BIT_TX
                gFinalTXPerbitFirstPass[p->channel][u1BitIdx] = VrefWinPerBit[u1BitIdx].first_pass;
            #endif
            }


        #if 1//__ETT__
            if (u1VrefScanEnable == 0)
            {
#if __SLT__
                dram_slt_log_enable = 1;
#endif
                //mcSHOW_DBG_MSG("\n\tCH=%d, VrefRange= %d, VrefLevel = %d\n", p->channel, u2FinalRange, u2VrefLevel);
                TxPrintWidnowInfo(p, VrefWinPerBit);
#if __SLT__
                dram_slt_log_enable = 0;
#endif
            }
        #endif

            if (u1VrefScanEnable == 1)
            {
                if (u2TempWinSum > u2MaxWindowSum)
                    u2MaxWindowSum = u2TempWinSum;

                VrefInfo[u1VrefIdx].u2VrefUsed = u2VrefLevel;
                VrefInfo[u1VrefIdx].u1WorseBitWinSize_byVref = u1min_winsize;
                VrefInfo[u1VrefIdx].u1WorseBitIdx_byVref = u1min_bit;
                VrefInfo[u1VrefIdx].u2WinSum_byVref = u2TempWinSum;
                u1VrefIdx ++;
            }

#if 0
        #if TX_AUTO_K_ENABLE
            if (isAutoK)
                Tx_Auto_K_Clear(p);
        #endif
#endif

        #if TX_VREF_PASS_CONDITION
            if (u1VrefScanEnable && (u2TempWinSum < (u2MaxWindowSum * 95 / 100)) && (u1min_winsize < TX_VREF_PASS_CONDITION))
        #else
            if (u1VrefScanEnable && (u2TempWinSum < (u2MaxWindowSum * 95 / 100)) && (u1min_winsize > TX_PASS_WIN_CRITERIA))
        #endif
            {
#if __SLT__
                mcSHOW_SLT_MSG(("\nTX Vref early break, caculate TX vref\n"));
#endif

                mcSHOW_DBG_MSG("\nTX Vref early break, caculate TX vref\n");
                break;
            }

#if 0
        #if TX_AUTO_K_ENABLE
            Tx_Auto_K_Clear(p);
        #endif
#endif
        }

        DramcEngine2End(p);

#if 0
    #if (TX_AUTO_K_ENABLE && TX_AUTO_K_WORKAROUND)
        if ((isAutoK) && (p->rank == RANK_1))
        {
            vSetRank(p, RANK_0);
            DramcRestoreRegisters(p, u4RegBackupAddress, sizeof(u4RegBackupAddress) / sizeof(U32));
            vSetRank(p, backup_rank);
        }
    #endif
#endif

        if (u1VrefScanEnable == 0)// ..if time domain (not vref scan) , calculate window center of all bits.
        {
            // Calculate the center of DQ pass window
            // Record center sum of each byte
            for (u1ByteIdx = 0; u1ByteIdx < (p->data_width / DQS_BIT_NUMBER); u1ByteIdx++)
            {
            #if 1//TX_DQM_CALC_MAX_MIN_CENTER
                u2Center_min[u1ByteIdx] = 0xffff;
                u2Center_max[u1ByteIdx] = 0;
            #endif

                for (u1BitIdx = 0; u1BitIdx < DQS_BIT_NUMBER; u1BitIdx++)
                {
                    u1BitTemp = u1ByteIdx * DQS_BIT_NUMBER + u1BitIdx;
                    memcpy(FinalWinPerBit, VrefWinPerBit, sizeof(PASS_WIN_DATA_T) * DQ_DATA_WIDTH);

                    if (FinalWinPerBit[u1BitTemp].win_center < u2Center_min[u1ByteIdx])
                        u2Center_min[u1ByteIdx] = FinalWinPerBit[u1BitTemp].win_center;

                    if (FinalWinPerBit[u1BitTemp].win_center > u2Center_max[u1ByteIdx])
                        u2Center_max[u1ByteIdx] = FinalWinPerBit[u1BitTemp].win_center;

                #ifdef FOR_HQA_TEST_USED
                    if ((calType == TX_DQ_DQS_MOVE_DQ_ONLY) && (u1VrefScanEnable == 0))
                    {
                        gFinalTXPerbitWin[p->channel][p->rank][u1BitTemp] = FinalWinPerBit[u1BitTemp].win_size;
                    }
                #endif
                }
            }

        #if SUPPORT_SAVE_TIME_FOR_CALIBRATION
            if (p->femmc_Ready == 0)//save firtst run pass value
            {
                for (u1ByteIdx = 0; u1ByteIdx < (p->data_width / DQS_BIT_NUMBER); u1ByteIdx++)
                {
                    if (calType == TX_DQ_DQS_MOVE_DQ_ONLY) // && u1VrefScanEnable==0
                    {
                        p->pSavetimeData->u1TxCenter_min_Save[p->channel][p->rank][u1ByteIdx] = u2Center_min[u1ByteIdx];
                        p->pSavetimeData->u1TxCenter_max_Save[p->channel][p->rank][u1ByteIdx] = u2Center_max[u1ByteIdx];

                        for (u1BitIdx = 0; u1BitIdx < DQS_BIT_NUMBER; u1BitIdx++)
                        {
                            u1BitTemp = u1ByteIdx * DQS_BIT_NUMBER + u1BitIdx;
                            p->pSavetimeData->u1Txwin_center_Save[p->channel][p->rank][u1BitTemp] = FinalWinPerBit[u1BitTemp].win_center;
                        }
                    }
                }
            }
        #endif
        }
    }

    // SET tx Vref (DQ) = u2FinalVref, LP3 no need to set this.
    if (u1VrefScanEnable)
    {
    #if SUPPORT_SAVE_TIME_FOR_CALIBRATION && BYPASS_VREF_CAL
        if (p->femmc_Ready == 1 && (p->Bypass_TXWINDOW))
        {
            u2FinalVref = p->pSavetimeData->u1TxWindowPerbitVref_Save[p->channel][p->rank];
        }
        else
    #endif
        {
            u2FinalVref = TxChooseVref(p, VrefInfo, u1VrefIdx);
        #if SUPPORT_SAVE_TIME_FOR_CALIBRATION
            if (p->femmc_Ready == 0)////save firtst run Vref value
            {
                p->pSavetimeData->u1TxWindowPerbitVref_Save[p->channel][p->rank] = u2FinalVref;
            }
        #endif
        }

        TXSetFinalVref(p, u2FinalRange, u2FinalVref);
        return DRAM_OK;
    }
    else
    {
    #if SUPPORT_SAVE_TIME_FOR_CALIBRATION
        if (p->femmc_Ready == 0)////save firtst run Vref value
        {
            //p->pSavetimeData->u1TxWindowPerbitVref_Save[p->channel][p->rank] = u1MR14Value[p->channel][p->rank][p->dram_fsp] & 0x3f;
        }
    #endif
    }
#if __SLT__
	if ((calType == TX_DQ_DQS_MOVE_DQ_ONLY)) {
		for (u1BitIdx = 0; u1BitIdx < DQS_BIT_NUMBER; u1BitIdx++) {
				mcSHOW_PARSER_MSG(("[%d Mbps][CH%d][RK%d][TX] Bit%d (%d~%d) %d %d,   Bit%d (%d~%d) %d %d,\n",	p->frequency*2, p->channel,p->rank,\
					u1BitIdx, FinalWinPerBit[u1BitIdx].first_pass, FinalWinPerBit[u1BitIdx].last_pass, FinalWinPerBit[u1BitIdx].win_size, FinalWinPerBit[u1BitIdx].win_center, \
					u1BitIdx+8, FinalWinPerBit[u1BitIdx+8].first_pass, FinalWinPerBit[u1BitIdx+8].last_pass, FinalWinPerBit[u1BitIdx+8].win_size, FinalWinPerBit[u1BitIdx+8].win_center));
		}
	}

	if ((calType == TX_DQ_DQS_MOVE_DQ_ONLY) && (u1VrefScanEnable == 0))
	{
		if(u1min_winsize < 16)
		{
			mcSHOW_SLT_MSG(("[WARNING] Smaller TX win < 0.5UI !!\n"));
			mcSHOW_SLT_MSG(("[WARNING] u1min_bit = %d,u1min_winsize = %d\n", u1min_bit, u1min_winsize));
			ASSERT(0);
		}
	}
#endif


#ifdef FOR_HQA_TEST_USED
    if (calType == TX_DQ_DQS_MOVE_DQ_ONLY)
    {
        gFinalTXPerbitWin_min_max[p->channel][p->rank] = u1min_winsize;
        if(u1min_winsize<16)
        {
            mcSHOW_ERR_MSG("[WARNING] Smaller TX win !!\n");
        #if CHECK_HQA_CRITERIA
            ASSERT(0);
        #endif
        }
    }
#endif

    if ((calType == TX_DQ_DQS_MOVE_DQ_ONLY) && (p->u2DelayCellTimex100 != 0))
    {
        u1EnableDelayCell = 1;
        mcSHOW_DBG_MSG("[TX_PER_BIT_DELAY_CELL] DelayCellTimex100 =%d/100 ps\n", p->u2DelayCellTimex100);
        mcDUMP_REG_MSG("[TX_PER_BIT_DELAY_CELL] DelayCellTimex100 =%d/100 ps\n", p->u2DelayCellTimex100);
#if __SLT__
        mcSHOW_PARSER_MSG(("[TX_PER_BIT_DELAY_CELL] DelayCellTimex100 =%d/100 ps\n", p->u2DelayCellTimex100));
#endif
    }

    //Calculate the center of DQ pass window
    //average the center delay
    for (u1ByteIdx = 0; u1ByteIdx < (p->data_width / DQS_BIT_NUMBER); u1ByteIdx++)
    {
        mcSHOW_DBG_MSG(" == TX Byte %d ==\n", u1ByteIdx);
        mcDUMP_REG_MSG(" == TX Byte %d ==\n", u1ByteIdx);
#if __SLT__
        mcSHOW_SLT_MSG((" == TX Byte %d ==\n", u1ByteIdx));
#endif
        u2DQM_Delay = ((u2Center_min[u1ByteIdx] + u2Center_max[u1ByteIdx]) >> 1); //(max +min)/2

        if (u1EnableDelayCell == 0)
        {
            uiDelay = u2DQM_Delay;
        }
        else// if(calType == TX_DQ_DQS_MOVE_DQ_ONLY)
        {
            uiDelay = u2Center_min[u1ByteIdx];  // for DQ PI delay , will adjust with delay cell

            // calculate delay cell perbit
            for (u1BitIdx = 0; u1BitIdx < DQS_BIT_NUMBER; u1BitIdx++)
            {
                u1BitTemp = u1ByteIdx * DQS_BIT_NUMBER + u1BitIdx;
                u1PIDiff = FinalWinPerBit[u1BitTemp].win_center - u2Center_min[u1ByteIdx];
                if (p->u2DelayCellTimex100 != 0)
                {
                    u2DelayCellOfst[u1BitTemp] = (u1PIDiff * 100000000 / (p->frequency << 6)) / p->u2DelayCellTimex100;

                    mcSHOW_DBG_MSG("u2DelayCellOfst[%d]=%d cells (%d PI)\n", u1BitTemp, u2DelayCellOfst[u1BitTemp], u1PIDiff);
                    mcDUMP_REG_MSG("u2DelayCellOfst[%d]=%d cells (%d PI)\n", u1BitTemp, u2DelayCellOfst[u1BitTemp], u1PIDiff);
#if __SLT__
		    mcSHOW_PARSER_MSG(("delay_cell_offset[%d]=%d cells (%d PI)\n", u1BitTemp, u2DelayCellOfst[u1BitTemp], u1PIDiff));
#endif
                    if(u2DelayCellOfst[u1BitTemp]>15)
                    {
                        mcSHOW_DBG_MSG("[WARNING] TX DQ%d delay cell %d >15, adjust to 15 cell\n", u1BitIdx, u2DelayCellOfst[u1BitTemp]);
                        u2DelayCellOfst[u1BitTemp] =15;
                    }
                }
                else
                {
                    mcSHOW_ERR_MSG("Error: Cell time (p->u2DelayCellTimex100) is 0 \n");
                    break;
                }
            }

        }

        TxWinTransferDelayToUIPI(p, uiDelay, 1, &dly_all);
        dly_per_byte.dq_mck[u1ByteIdx] = dly_all.mck;
        dly_per_byte.dq_ui[u1ByteIdx] = dly_all.ui;
        dly_per_byte.dq_pi[u1ByteIdx] = dly_all.pi;
        dly_per_byte.dq_mck_oe[u1ByteIdx] = dly_all.mck_oe;
        dly_per_byte.dq_ui_oe[u1ByteIdx] = dly_all.ui_oe;

        TxWinTransferDelayToUIPI(p, u2DQM_Delay, 1, &dly_all);
        dly_per_byte.dqm_mck[u1ByteIdx] = dly_all.mck;
        dly_per_byte.dqm_ui[u1ByteIdx] = dly_all.ui;
        dly_per_byte.dqm_pi[u1ByteIdx] = dly_all.pi;
        dly_per_byte.dqm_mck_oe[u1ByteIdx] = dly_all.mck_oe;
        dly_per_byte.dqm_ui_oe[u1ByteIdx] = dly_all.ui_oe;

        if (calType == TX_DQ_DQS_MOVE_DQ_ONLY || calType == TX_DQ_DQS_MOVE_DQ_DQM)
        {
            mcSHOW_DBG_MSG("Update DQ  dly =%d (%d ,%d, %d)  DQ  OEN =(%d ,%d)\n", uiDelay,
                dly_per_byte.dq_mck[u1ByteIdx], dly_per_byte.dq_ui[u1ByteIdx],
                dly_per_byte.dq_pi[u1ByteIdx], dly_per_byte.dq_mck_oe[u1ByteIdx],
                dly_per_byte.dq_ui_oe[u1ByteIdx]);
            mcDUMP_REG_MSG("Update DQ  dly =%d (%d ,%d, %d)  DQ  OEN =(%d ,%d)\n", uiDelay,
                dly_per_byte.dq_mck[u1ByteIdx], dly_per_byte.dq_ui[u1ByteIdx],
                dly_per_byte.dq_pi[u1ByteIdx], dly_per_byte.dq_mck_oe[u1ByteIdx],
                dly_per_byte.dq_ui_oe[u1ByteIdx]);
#if __SLT__
            mcSHOW_PARSER_MSG(("\nByte%d, DQ PI dly = %d, DQM PI dly = %d\n",  u1ByteIdx, uiDelay, u2DQM_Delay));
            mcSHOW_PARSER_MSG(("Final DQ PI dly(LargeUI, SmallUI, PI) = (%d ,%d, %d)\n", dly_per_byte.dq_mck[u1ByteIdx], dly_per_byte.dq_ui[u1ByteIdx], dly_per_byte.dq_pi[u1ByteIdx]));
            mcSHOW_PARSER_MSG(("OEN DQ PI dly(LargeUI, SmallUI, PI) = (%d ,%d, %d)\n\n", dly_per_byte.dqm_mck_oe[u1ByteIdx], dly_per_byte.dqm_ui_oe[u1ByteIdx], dly_per_byte.dqm_pi[u1ByteIdx]));

			mcSHOW_SLT_MSG(("Update DQ dly = %d (%d ,%d, %d)  DQ  OEN = (%d ,%d)\n", uiDelay,
				dly_per_byte.dq_mck[u1ByteIdx], dly_per_byte.dq_ui[u1ByteIdx],
				dly_per_byte.dq_pi[u1ByteIdx], dly_per_byte.dq_mck_oe[u1ByteIdx],
				dly_per_byte.dq_ui_oe[u1ByteIdx]));
#endif
        }

        //if(calType ==TX_DQ_DQS_MOVE_DQM_ONLY || calType== TX_DQ_DQS_MOVE_DQ_DQM)
        {
            mcSHOW_DBG_MSG("Update DQM dly =%d (%d ,%d, %d)  DQM OEN =(%d ,%d)\n", u2DQM_Delay,
                dly_per_byte.dqm_mck[u1ByteIdx], dly_per_byte.dqm_ui[u1ByteIdx],
                dly_per_byte.dqm_pi[u1ByteIdx], dly_per_byte.dqm_mck_oe[u1ByteIdx],
                dly_per_byte.dqm_ui_oe[u1ByteIdx]);
            mcDUMP_REG_MSG("Update DQM dly =%d (%d ,%d, %d)  DQM OEN =(%d ,%d)\n", u2DQM_Delay,
                dly_per_byte.dqm_mck[u1ByteIdx], dly_per_byte.dqm_ui[u1ByteIdx],
                dly_per_byte.dqm_pi[u1ByteIdx], dly_per_byte.dqm_mck_oe[u1ByteIdx],
                dly_per_byte.dqm_ui_oe[u1ByteIdx]);
#if __SLT__
			mcSHOW_SLT_MSG(("Update DQM dly = %d (%d ,%d, %d)  DQM OEN = (%d ,%d)\n\n", u2DQM_Delay,
				dly_per_byte.dqm_mck[u1ByteIdx], dly_per_byte.dqm_ui[u1ByteIdx],
				dly_per_byte.dqm_pi[u1ByteIdx], dly_per_byte.dqm_mck_oe[u1ByteIdx],
				dly_per_byte.dqm_ui_oe[u1ByteIdx]));
#endif
        }
        mcSHOW_DBG_MSG("\n");

#ifdef FOR_HQA_REPORT_USED
        if (calType == TX_DQ_DQS_MOVE_DQ_ONLY)
        {
            for (u1BitIdx = 0; u1BitIdx < p->data_width; u1BitIdx++)
            {
                HQA_Log_Message_for_Report(p, p->channel, p->rank, HQA_REPORT_FORMAT1, "TX_Window_Center_", "DQ", u1BitIdx, FinalWinPerBit[u1BitIdx].win_center, NULL);
            }
        }

        if (calType == TX_DQ_DQS_MOVE_DQM_ONLY)
        {
            HQA_Log_Message_for_Report(p, p->channel, p->rank, HQA_REPORT_FORMAT0, "TX_Window_Center_", "DQM", u1ByteIdx, u2DQM_Delay, NULL);
        }
#if 0
        HQA_Log_Message_for_Report(p, p->channel, p->rank, HQA_REPORT_FORMAT1, "TX_Window_Center_", "LargeUI", u1ByteIdx, ucdq_reg_ui_large[u1ByteIdx], NULL);
        HQA_Log_Message_for_Report(p, p->channel, p->rank, HQA_REPORT_FORMAT0, "TX_Window_Center_", "SmallUI", u1ByteIdx, ucdq_reg_ui_small[u1ByteIdx], NULL);
        HQA_Log_Message_for_Report(p, p->channel, p->rank, HQA_REPORT_FORMAT0, "TX_Window_Center_", "PI", u1ByteIdx, ucdq_reg_pi[u1ByteIdx], NULL);
#endif
#endif

    }


#if REG_ACCESS_PORTING_DGB
    RegLogEnable = 1;
#endif

        /* p->rank = RANK_0, save to Reg Rank0 and Rank1, p->rank = RANK_1, save to Reg Rank1 */
        for (u1RankIdx = p->rank; u1RankIdx < RANK_MAX; u1RankIdx++)
        {
            vSetRank(p, u1RankIdx);

            if (calType == TX_DQ_DQS_MOVE_DQ_ONLY || calType == TX_DQ_DQS_MOVE_DQ_DQM)
            {
                TXSetDelayReg_DQ(p, TRUE, &dly_per_byte);
            }

            TXSetDelayReg_DQM(p, TRUE, &dly_per_byte);

             if (u1EnableDelayCell)
             {
                //U8 const *mapping_tbl = get_dq_dramc2phy_mapping(p);

                vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B0_TXDLY0),
                           P_Fld(u2DelayCellOfst[3], SHU_RK_B0_TXDLY0_TX_ARDQ3_DLY_B0)
                         | P_Fld(u2DelayCellOfst[2], SHU_RK_B0_TXDLY0_TX_ARDQ2_DLY_B0)
                         | P_Fld(u2DelayCellOfst[1], SHU_RK_B0_TXDLY0_TX_ARDQ1_DLY_B0)
                         | P_Fld(u2DelayCellOfst[0], SHU_RK_B0_TXDLY0_TX_ARDQ0_DLY_B0));
                 vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B0_TXDLY1),
                           P_Fld(u2DelayCellOfst[7], SHU_RK_B0_TXDLY1_TX_ARDQ7_DLY_B0)
                         | P_Fld(u2DelayCellOfst[6], SHU_RK_B0_TXDLY1_TX_ARDQ6_DLY_B0)
                         | P_Fld(u2DelayCellOfst[5], SHU_RK_B0_TXDLY1_TX_ARDQ5_DLY_B0)
                         | P_Fld(u2DelayCellOfst[4], SHU_RK_B0_TXDLY1_TX_ARDQ4_DLY_B0));
                 vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B1_TXDLY0),
                           P_Fld(u2DelayCellOfst[11], SHU_RK_B1_TXDLY0_TX_ARDQ3_DLY_B1)
                         | P_Fld(u2DelayCellOfst[10], SHU_RK_B1_TXDLY0_TX_ARDQ2_DLY_B1)
                         | P_Fld(u2DelayCellOfst[9], SHU_RK_B1_TXDLY0_TX_ARDQ1_DLY_B1)
                         | P_Fld(u2DelayCellOfst[8], SHU_RK_B1_TXDLY0_TX_ARDQ0_DLY_B1));
                 vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B1_TXDLY1),
                           P_Fld(u2DelayCellOfst[15], SHU_RK_B1_TXDLY1_TX_ARDQ7_DLY_B1)
                         | P_Fld(u2DelayCellOfst[14], SHU_RK_B1_TXDLY1_TX_ARDQ6_DLY_B1)
                         | P_Fld(u2DelayCellOfst[13], SHU_RK_B1_TXDLY1_TX_ARDQ5_DLY_B1)
                         | P_Fld(u2DelayCellOfst[12], SHU_RK_B1_TXDLY1_TX_ARDQ4_DLY_B1));
             }

        #if DDR_ENABLE_TX_TRACKING
            TXUpdateTXTracking(p, calType, &dly_per_byte);
        #endif
        }

        vSetRank(p, backup_rank);

#if 0
        if (isAutoK)
        {
    #if TX_AUTO_K_ENABLE
        #if TX_AUTO_K_WORKAROUND
            vIO32WriteFldMulti(DRAMC_REG_ADDR(DRAMC_REG_TX_ATK_SET0),
                                 P_Fld(ucdq_reg_pi[0], TX_ATK_SET0_TX_ATK_DQ_B0_PI_INIT) |
                                 P_Fld(ucdq_reg_pi[1], TX_ATK_SET0_TX_ATK_DQ_B1_PI_INIT) |
                                 P_Fld(ucdq_reg_dqm_pi[0], TX_ATK_SET0_TX_ATK_DQM_B0_PI_INIT) |
                                 P_Fld(ucdq_reg_dqm_pi[1], TX_ATK_SET0_TX_ATK_DQM_B1_PI_INIT)); //If TX auto-k is enable, TX_PI will be switch to PI_INIT
        #endif
    #endif
        }
#endif

#if REG_ACCESS_PORTING_DGB
    RegLogEnable = 0;
#endif

#if 0
#if (TX_AUTO_K_ENABLE && TX_AUTO_K_WORKAROUND)
    if ((isAutoK) && (p->rank == RANK_1) && (calType == TX_DQ_DQS_MOVE_DQ_DQM))
    {
        u4DQM_MCK_RK1_backup = u4IO32Read4B(DRAMC_REG_ADDR(DRAMC_REG_SHURK_SELPH_DQ1));
        u4DQM_UI_RK1_backup = u4IO32Read4B(DRAMC_REG_ADDR(DRAMC_REG_SHURK_SELPH_DQ3));
        u4DQM_PI_RK1_backup[0] = u4IO32Read4B(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B0_DQ));
        u4DQM_PI_RK1_backup[1] = u4IO32Read4B(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B1_DQ));
        u4DQ_MCK_RK1_backup = u4IO32Read4B(DRAMC_REG_ADDR(DRAMC_REG_SHURK_SELPH_DQ0));
        u4DQ_UI_RK1_backup = u4IO32Read4B(DRAMC_REG_ADDR(DRAMC_REG_SHURK_SELPH_DQ2));
        u4DQ_PI_RK1_backup[0] = u4IO32Read4B(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B0_DQ));
        u4DQ_PI_RK1_backup[1] = u4IO32Read4B(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B1_DQ));
    }
#endif
#endif

#if (TX_K_DQM_MODE == 2)
    if (calType == TX_DQ_DQS_MOVE_DQM_ONLY)
    {
        vIO32WriteFldAlign(DRAMC_REG_ADDR(DRAMC_REG_TEST2_A0), 0x0, TEST2_A0_WRCLR0);
        vIO32WriteFldAlign(DRAMC_REG_ADDR(DRAMC_REG_TEST2_A4), 0x0, TEST2_A4_TEST2_DQMTGL);
    }
#endif

	vAutoRefreshSwitch(p, DISABLE);
    mcSHOW_DBG_MSG3("[TxWindowPerbitCal] Done\n\n");
#if __SLT__
    mcSHOW_PARSER_MSG(("[TxWindowPerbitCal] Done\n\n"));
#endif
#if 0
    vIO32WriteFldAlign_All(DRAMC_REG_ADDR(DRAMC_REG_PADCTL4), 1, PADCTL4_CKEFIXON);  // test only
#endif

    return DRAM_OK;
}


#endif //DDR_ENABLE_TX_PERBIT_CAL

#if ENABLE_EYESCAN_GRAPH	// && CFG_LPDDR_ENABLE
void Dramc_K_TX_EyeScan_Log(DRAMC_CTX_T *p)
{
    U8 ucindex, u1BitIdx, u1ByteIdx;
    U8 ii, u1vrefidx;
    PASS_WIN_DATA_T WinPerBit[DQ_DATA_WIDTH], VrefWinPerBit[DQ_DATA_WIDTH], FinalWinPerBit[DQ_DATA_WIDTH];
    U16 tx_pi_delay[4], tx_dqm_pi_delay[4];
    U16 uiDelay;
    U16 u2VrefLevel, u2VrefBegin, u2VrefEnd, u2VrefStep, u2VrefRange;
    //U8 ucdq_pi, ucdq_ui_small, ucdq_ui_large,ucdq_oen_ui_small, ucdq_oen_ui_large;
    U32 uiFinishCount;
    U16 u2TempWinSum, u2tx_window_sum=0;
    U32 u4err_value, u4fail_bit;
    #if 1//TX_DQM_CALC_MAX_MIN_CENTER
    U16 u2Center_min[DQS_NUMBER],u2Center_max[DQS_NUMBER];
    #endif
    struct tx_dly_for_all dly_all;

    U16 TXPerbitWin_min_max = 0;
    U32 min_winsize;

    U8 EyeScan_index[DQ_DATA_WIDTH];

    U16 backup_u1MR14Value;
    U8 u1pass_in_this_vref_flag[DQ_DATA_WIDTH];

    U8 u1MCK2UI, u1UI2PI;

    U32 u4RegBackupAddress[] =
    {
        (DRAMC_REG_ADDR(DRAMC_REG_SHURK_SELPH_DQ0)),
        (DRAMC_REG_ADDR(DRAMC_REG_SHURK_SELPH_DQ2)),
        (DRAMC_REG_ADDR(DRAMC_REG_SHURK_SELPH_DQ1)),
        (DRAMC_REG_ADDR(DRAMC_REG_SHURK_SELPH_DQ3)),
        (DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B0_DQ)),
        (DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B1_DQ)),
    };

    if (GetEyeScanEnable(p, 2)==DISABLE) return;

    //if (gTX_EYE_Scan_only_higheset_freq_flag==1 && p->frequency != u2DFSGetHighestFreq(p)) return;

    //backup register value
    DramcBackupRegisters(p, u4RegBackupAddress, sizeof(u4RegBackupAddress)/sizeof(U32), TO_ONE_CHANNEL);

    backup_u1MR14Value = u1MR14Value[p->channel][p->rank][p->dram_fsp];
    //Jimmy Temp
    DramcModeRegReadByRank(p, p->rank, 14, &backup_u1MR14Value);

    if (gFinalTXVrefDQ[p->channel][p->rank] ==0) //Set final TX Vref as default value
        gFinalTXVrefDQ[p->channel][p->rank] = u1MR14Value[p->channel][p->rank][p->dram_fsp];

    //set initial values
    for(u1vrefidx=0; u1vrefidx<=VREF_VOLTAGE_TABLE_NUM_LP5-1;u1vrefidx++)
    {
        for (u1BitIdx = 0; u1BitIdx < p->data_width; u1BitIdx++)
        {
            for(ii=0; ii<EYESCAN_BROKEN_NUM; ii++)
            {
                gEyeScan_Min[u1vrefidx][u1BitIdx][ii] = EYESCAN_DATA_INVALID;
                gEyeScan_Max[u1vrefidx][u1BitIdx][ii] = EYESCAN_DATA_INVALID;
            }
            gEyeScan_ContinueVrefHeight[u1BitIdx] = 0;
            gEyeScan_TotalPassCount[u1BitIdx] = 0;
        }
    }


    u1MCK2UI = u1MCK2UI_DivShift(p);

    //if (vGet_DDR800_Mode(p) == DDR800_CLOSE_LOOP)
    //    u1UI2PI = 6;
    //else
        u1UI2PI = 5;


    for(u1ByteIdx=0; u1ByteIdx < p->data_width/DQS_BIT_NUMBER; u1ByteIdx++)
    {
        if (u1ByteIdx == 0)
        {
            tx_pi_delay[u1ByteIdx] = (u4IO32ReadFldAlign(DRAMC_REG_ADDR(DRAMC_REG_SHURK_SELPH_DQ0), SHURK_SELPH_DQ0_TXDLY_DQ0)<<(u1MCK2UI+u1UI2PI)) +
                          (u4IO32ReadFldAlign(DRAMC_REG_ADDR(DRAMC_REG_SHURK_SELPH_DQ2), SHURK_SELPH_DQ2_DLY_DQ0)<<u1UI2PI) +
                          u4IO32ReadFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B0_DQ), SHU_RK_B0_DQ_SW_ARPI_DQ_B0)*(u1IsPhaseMode(p)==TRUE ? 8 : 1);

            tx_dqm_pi_delay[u1ByteIdx] = (u4IO32ReadFldAlign(DRAMC_REG_ADDR(DRAMC_REG_SHURK_SELPH_DQ1), SHURK_SELPH_DQ1_TXDLY_DQM0)<<(u1MCK2UI+u1UI2PI)) +
                              (u4IO32ReadFldAlign(DRAMC_REG_ADDR(DRAMC_REG_SHURK_SELPH_DQ3), SHURK_SELPH_DQ3_DLY_DQM0)<<u1UI2PI) +
                              u4IO32ReadFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B0_DQ), SHU_RK_B0_DQ_SW_ARPI_DQM_B0)*(u1IsPhaseMode(p)==TRUE ? 8 : 1);
        }
        else
        {
            tx_pi_delay[u1ByteIdx] = (u4IO32ReadFldAlign(DRAMC_REG_ADDR(DRAMC_REG_SHURK_SELPH_DQ0), SHURK_SELPH_DQ0_TXDLY_DQ1)<<(u1MCK2UI+u1UI2PI)) +
                          (u4IO32ReadFldAlign(DRAMC_REG_ADDR(DRAMC_REG_SHURK_SELPH_DQ2), SHURK_SELPH_DQ2_DLY_DQ1)<<u1UI2PI) +
                          u4IO32ReadFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B1_DQ), SHU_RK_B1_DQ_SW_ARPI_DQ_B1)*(u1IsPhaseMode(p)==TRUE ? 8 : 1);

            tx_dqm_pi_delay[u1ByteIdx] = (u4IO32ReadFldAlign(DRAMC_REG_ADDR(DRAMC_REG_SHURK_SELPH_DQ1), SHURK_SELPH_DQ1_TXDLY_DQM1)<<(u1MCK2UI+u1UI2PI)) +
                              (u4IO32ReadFldAlign(DRAMC_REG_ADDR(DRAMC_REG_SHURK_SELPH_DQ3), SHURK_SELPH_DQ3_DLY_DQM1)<<u1UI2PI) +
                              u4IO32ReadFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B1_DQ), SHU_RK_B1_DQ_SW_ARPI_DQM_B1)*(u1IsPhaseMode(p)==TRUE ? 8 : 1);
        }
    }

    u2VrefRange = 0;
    u2VrefBegin = 0;
    u2VrefEnd = DDR_VREF_VOLTAGE_TABLE_NUM - 1;
    u2VrefStep = EYESCAN_GRAPH_CATX_VREF_STEP;
    mcSHOW_DBG_MSG3("\nTX Vref %d -> %d, step: %d\n", u2VrefBegin, u2VrefEnd, u2VrefStep);

#if ENABLE_K_WITH_WORST_SI_UI_SHIFT
    DramcEngine2Init(p, p->test2_1, p->test2_2, p->test_pattern, 0, TE_UI_SHIFT);//UI_SHIFT + LEN1
#else
    DramcEngine2Init(p, p->test2_1, p->test2_2, TEST_XTALK_PATTERN, 0, TE_NO_UI_SHIFT);
#endif


    for(u2VrefLevel = u2VrefBegin; u2VrefLevel <= u2VrefEnd; u2VrefLevel += u2VrefStep)
    {
        //set vref
//fra        u1MR14Value[p->channel][p->rank][p->dram_fsp] = (u2VrefLevel | (u2VrefRange<<6));
        DramcTXSetVref(p, u2VrefRange, u2VrefLevel);
        mcSHOW_DBG_MSG3("\n\n Set TX VrefRange %d, VrefLevel=%d\n", u2VrefRange, u2VrefLevel);

        // initialize parameters
        uiFinishCount = 0;
        u2TempWinSum =0;

        for (u1BitIdx = 0; u1BitIdx < p->data_width; u1BitIdx++)
        {
            WinPerBit[u1BitIdx].first_pass = (S16)PASS_RANGE_NA;
            WinPerBit[u1BitIdx].last_pass = (S16)PASS_RANGE_NA;
            VrefWinPerBit[u1BitIdx].first_pass = (S16)PASS_RANGE_NA;
            VrefWinPerBit[u1BitIdx].last_pass = (S16)PASS_RANGE_NA;

            gEyeScan_DelayCellPI[u1BitIdx] = 0;

            EyeScan_index[u1BitIdx] = 0;
            u1pass_in_this_vref_flag[u1BitIdx] = 0;
        }

        for (uiDelay=0; uiDelay<64; uiDelay+=(u1IsPhaseMode(p)==TRUE ? 8 : 1))
        {
            TxWinTransferDelayToUIPI(p, tx_pi_delay[0] + uiDelay - 32, 0, &dly_all);
            vIO32WriteFldMulti(DRAMC_REG_ADDR(DRAMC_REG_SHURK_SELPH_DQ0),
                                             P_Fld(dly_all.mck, SHURK_SELPH_DQ0_TXDLY_DQ0) |
                                             P_Fld(dly_all.mck_oe, SHURK_SELPH_DQ0_TXDLY_OEN_DQ0));
            vIO32WriteFldMulti(DRAMC_REG_ADDR(DRAMC_REG_SHURK_SELPH_DQ2),
                                             P_Fld(dly_all.ui, SHURK_SELPH_DQ2_DLY_DQ0) |
                                             P_Fld(dly_all.ui_oe, SHURK_SELPH_DQ2_DLY_OEN_DQ0));
            vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B0_DQ), dly_all.pi, SHU_RK_B0_DQ_SW_ARPI_DQ_B0);

            TxWinTransferDelayToUIPI(p, tx_pi_delay[1] + uiDelay - 32, 0, &dly_all);
            vIO32WriteFldMulti(DRAMC_REG_ADDR(DRAMC_REG_SHURK_SELPH_DQ0),
                                             P_Fld(dly_all.mck, SHURK_SELPH_DQ0_TXDLY_DQ1) |
                                             P_Fld(dly_all.mck_oe, SHURK_SELPH_DQ0_TXDLY_OEN_DQ1));
            vIO32WriteFldMulti(DRAMC_REG_ADDR(DRAMC_REG_SHURK_SELPH_DQ2),
                                             P_Fld(dly_all.ui, SHURK_SELPH_DQ2_DLY_DQ1) |
                                             P_Fld(dly_all.ui_oe, SHURK_SELPH_DQ2_DLY_OEN_DQ1));
            vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B1_DQ), dly_all.pi, SHU_RK_B1_DQ_SW_ARPI_DQ_B1);

            TxWinTransferDelayToUIPI(p, tx_dqm_pi_delay[0] + uiDelay - 32, 0, &dly_all);
            vIO32WriteFldMulti(DRAMC_REG_ADDR(DRAMC_REG_SHURK_SELPH_DQ1),
                                            P_Fld(dly_all.mck, SHURK_SELPH_DQ1_TXDLY_DQM0) |
                                            P_Fld(dly_all.mck_oe, SHURK_SELPH_DQ1_TXDLY_OEN_DQM0));
           vIO32WriteFldMulti(DRAMC_REG_ADDR(DRAMC_REG_SHURK_SELPH_DQ3),
                                            P_Fld(dly_all.ui, SHURK_SELPH_DQ3_DLY_DQM0) |
                                            P_Fld(dly_all.ui_oe, SHURK_SELPH_DQ3_DLY_OEN_DQM0));
            vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B0_DQ), dly_all.pi,  SHU_RK_B0_DQ_SW_ARPI_DQM_B0);

            TxWinTransferDelayToUIPI(p, tx_dqm_pi_delay[1] + uiDelay - 32, 0, &dly_all);
            vIO32WriteFldMulti(DRAMC_REG_ADDR(DRAMC_REG_SHURK_SELPH_DQ1),
                                             P_Fld(dly_all.mck, SHURK_SELPH_DQ1_TXDLY_DQM1) |
                                             P_Fld(dly_all.mck_oe, SHURK_SELPH_DQ1_TXDLY_OEN_DQM1));
            vIO32WriteFldMulti(DRAMC_REG_ADDR(DRAMC_REG_SHURK_SELPH_DQ3),
                                             P_Fld(dly_all.ui, SHURK_SELPH_DQ3_DLY_DQM1) |
                                             P_Fld(dly_all.ui_oe, SHURK_SELPH_DQ3_DLY_OEN_DQM1));
            vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B1_DQ), dly_all.pi, SHU_RK_B1_DQ_SW_ARPI_DQM_B1);

            u4err_value=0;
#if ENABLE_K_WITH_WORST_SI_UI_SHIFT
            //DramcEngine2SetPat(p, p->test_pattern, 0, 0, TE_UI_SHIFT);
            u4err_value = DramcEngine2Run(p, TE_OP_WRITE_READ_CHECK, p->test_pattern);
#else
            //audio + xtalk pattern
            DramcEngine2SetPat(p, TEST_AUDIO_PATTERN, 0, 0, TE_NO_UI_SHIFT);
            u4err_value = DramcEngine2Run(p, TE_OP_WRITE_READ_CHECK, TEST_AUDIO_PATTERN);
            DramcEngine2SetPat(p, TEST_XTALK_PATTERN, 0, 1, TE_NO_UI_SHIFT);
            u4err_value |= DramcEngine2Run(p, TE_OP_WRITE_READ_CHECK, TEST_XTALK_PATTERN);
#endif
            // audio + xtalk pattern
            //u4err_value=0;
            //DramcEngine2SetPat(p,TEST_AUDIO_PATTERN, 0, 0, TE_NO_UI_SHIFT);
            //u4err_value = DramcEngine2Run(p, TE_OP_WRITE_READ_CHECK, TEST_AUDIO_PATTERN);
            //DramcEngine2SetPat(p,TEST_XTALK_PATTERN, 0, 1, TE_NO_UI_SHIFT);
            //u4err_value |= DramcEngine2Run(p, TE_OP_WRITE_READ_CHECK, TEST_XTALK_PATTERN);

            // check fail bit ,0 ok ,others fail
            for (u1BitIdx = 0; u1BitIdx < p->data_width; u1BitIdx++)
            {
                u4fail_bit = u4err_value&((U32)1<<u1BitIdx);

                if (u4fail_bit == 0)
                {
                    gEyeScan_TotalPassCount[u1BitIdx]+=EYESCAN_GRAPH_CATX_VREF_STEP;
                }

                if(WinPerBit[u1BitIdx].first_pass== PASS_RANGE_NA)
                {
                    if(u4fail_bit==0) //compare correct: pass
                    {
                        WinPerBit[u1BitIdx].first_pass = uiDelay;
                        u1pass_in_this_vref_flag[u1BitIdx] = 1;
                    }
                }
                else if(WinPerBit[u1BitIdx].last_pass == PASS_RANGE_NA)
                {
                    if(u4fail_bit !=0) //compare error : fail
                    {
                        WinPerBit[u1BitIdx].last_pass  = (uiDelay-1);
                    }
                    else if (uiDelay>=63)
                    {
                        WinPerBit[u1BitIdx].last_pass  = 63;
                    }

                    if(WinPerBit[u1BitIdx].last_pass  !=PASS_RANGE_NA)
                    {
                        if((WinPerBit[u1BitIdx].last_pass -WinPerBit[u1BitIdx].first_pass) >= (VrefWinPerBit[u1BitIdx].last_pass -VrefWinPerBit[u1BitIdx].first_pass))
                        {
                            //if window size bigger than 7, consider as real pass window. If not, don't update finish counte and won't do early break;
                            if((WinPerBit[u1BitIdx].last_pass -WinPerBit[u1BitIdx].first_pass) >7)
                                uiFinishCount |= (1<<u1BitIdx);

                            //update bigger window size
                            VrefWinPerBit[u1BitIdx].first_pass = WinPerBit[u1BitIdx].first_pass;
                            VrefWinPerBit[u1BitIdx].last_pass = WinPerBit[u1BitIdx].last_pass;
                        }


                            if (EyeScan_index[u1BitIdx] < EYESCAN_BROKEN_NUM)
                            {
#if VENDER_JV_LOG || defined(RELEASE)
                                gEyeScan_Min[(u2VrefLevel+u2VrefRange*30)/EYESCAN_GRAPH_CATX_VREF_STEP][u1BitIdx][EyeScan_index[u1BitIdx]] = WinPerBit[u1BitIdx].first_pass;
                                gEyeScan_Max[(u2VrefLevel+u2VrefRange*30)/EYESCAN_GRAPH_CATX_VREF_STEP][u1BitIdx][EyeScan_index[u1BitIdx]] = WinPerBit[u1BitIdx].last_pass;
#else
//fra                                gEyeScan_Min[(u2VrefLevel+u2VrefRange*30)/EYESCAN_GRAPH_CATX_VREF_STEP][u1BitIdx][EyeScan_index[u1BitIdx]] = WinPerBit[u1BitIdx].first_pass + tx_pi_delay[u1BitIdx/8]-32;
//fra                                gEyeScan_Max[(u2VrefLevel+u2VrefRange*30)/EYESCAN_GRAPH_CATX_VREF_STEP][u1BitIdx][EyeScan_index[u1BitIdx]] = WinPerBit[u1BitIdx].last_pass + tx_pi_delay[u1BitIdx/8]-32;
                                gEyeScan_Min[(u2VrefLevel+u2VrefRange*30)/EYESCAN_GRAPH_CATX_VREF_STEP][u1BitIdx][EyeScan_index[u1BitIdx]] = (S8) WinPerBit[u1BitIdx].first_pass;
                                gEyeScan_Max[(u2VrefLevel+u2VrefRange*30)/EYESCAN_GRAPH_CATX_VREF_STEP][u1BitIdx][EyeScan_index[u1BitIdx]] = (S8) WinPerBit[u1BitIdx].last_pass;
                                mcSHOW_DBG_MSG3("VrefRange %d, VrefLevel=%d, u1BitIdx=%d, index=%d (%d, %d)==\n",u2VrefRange,u2VrefLevel, u1BitIdx, EyeScan_index[u1BitIdx], gEyeScan_Min[u2VrefLevel/EYESCAN_GRAPH_CATX_VREF_STEP][u1BitIdx][EyeScan_index[u1BitIdx]], gEyeScan_Max[u2VrefLevel/EYESCAN_GRAPH_CATX_VREF_STEP][u1BitIdx][EyeScan_index[u1BitIdx]]);
                                gEyeScan_MinMax_store_delay[u1BitIdx/8] =  tx_pi_delay[u1BitIdx/8]-32; /* save this information for HQA pass/fail judgement used */
#endif
                                EyeScan_index[u1BitIdx]=EyeScan_index[u1BitIdx]+1;
                            }


                        //reset tmp window
                        WinPerBit[u1BitIdx].first_pass = PASS_RANGE_NA;
                        WinPerBit[u1BitIdx].last_pass = PASS_RANGE_NA;
                    }
                 }
               }
        }

        min_winsize = 0xffff;
        for (u1BitIdx = 0; u1BitIdx < p->data_width; u1BitIdx++)
        {
            VrefWinPerBit[u1BitIdx].win_size = VrefWinPerBit[u1BitIdx].last_pass- VrefWinPerBit[u1BitIdx].first_pass +(VrefWinPerBit[u1BitIdx].last_pass==VrefWinPerBit[u1BitIdx].first_pass?0:1);

            if (VrefWinPerBit[u1BitIdx].win_size < min_winsize)
            {
                min_winsize = VrefWinPerBit[u1BitIdx].win_size;
            }

            u2TempWinSum += VrefWinPerBit[u1BitIdx].win_size;  //Sum of CA Windows for vref selection

            gEyeScan_WinSize[(u2VrefLevel+u2VrefRange*30)/EYESCAN_GRAPH_CATX_VREF_STEP][u1BitIdx] = VrefWinPerBit[u1BitIdx].win_size;

#ifdef FOR_HQA_TEST_USED
            if((((backup_u1MR14Value>>6)&1) == u2VrefRange) && ((backup_u1MR14Value&0x3f)==u2VrefLevel))
            {
                gFinalTXPerbitWin[p->channel][p->rank][u1BitIdx] = VrefWinPerBit[u1BitIdx].win_size;
            }
#endif

        }

        if ((min_winsize > TXPerbitWin_min_max) || ((min_winsize == TXPerbitWin_min_max) && (u2TempWinSum >u2tx_window_sum)))
        {
            TXPerbitWin_min_max = min_winsize;
            u2tx_window_sum =u2TempWinSum;

            //Calculate the center of DQ pass window
            // Record center sum of each byte
            for (u1ByteIdx=0; u1ByteIdx<(p->data_width/DQS_BIT_NUMBER); u1ByteIdx++)
            {
        #if 1//TX_DQM_CALC_MAX_MIN_CENTER
                u2Center_min[u1ByteIdx] = 0xffff;
                u2Center_max[u1ByteIdx] = 0;
        #endif

                for (u1BitIdx=0; u1BitIdx<DQS_BIT_NUMBER; u1BitIdx++)
                {
                    ucindex = u1ByteIdx * DQS_BIT_NUMBER + u1BitIdx;
                    FinalWinPerBit[ucindex].first_pass = VrefWinPerBit[ucindex].first_pass;
                    FinalWinPerBit[ucindex].last_pass =  VrefWinPerBit[ucindex].last_pass;
                    FinalWinPerBit[ucindex].win_size = VrefWinPerBit[ucindex].win_size;
                    FinalWinPerBit[ucindex].win_center = (FinalWinPerBit[ucindex].first_pass + FinalWinPerBit[ucindex].last_pass) >> 1;

                    if(FinalWinPerBit[ucindex].win_center < u2Center_min[u1ByteIdx])
                        u2Center_min[u1ByteIdx] = FinalWinPerBit[ucindex].win_center;

                    if(FinalWinPerBit[ucindex].win_center > u2Center_max[u1ByteIdx])
                        u2Center_max[u1ByteIdx] = FinalWinPerBit[ucindex].win_center;
                }
            }
        }


        if(u2VrefRange==0 && u2VrefLevel ==50 && p->dram_type!=TYPE_LPDDR5)
        {
            u2VrefRange = 1;
            u2VrefLevel = 20;
        }

        for (u1BitIdx = 0; u1BitIdx < p->data_width; u1BitIdx++)
        {
            if (u1pass_in_this_vref_flag[u1BitIdx]) gEyeScan_ContinueVrefHeight[u1BitIdx]+=EYESCAN_GRAPH_CATX_VREF_STEP;  //count pass number of continue vref
        }
    }

    DramcEngine2End(p);

    //Calculate the center of DQ pass window
    //average the center delay
    for (u1ByteIdx=0; u1ByteIdx<(p->data_width/DQS_BIT_NUMBER); u1ByteIdx++)
    {
        uiDelay = ((u2Center_min[u1ByteIdx] + u2Center_max[u1ByteIdx])>>1); //(max +min)/2

#if VENDER_JV_LOG || defined(RELEASE)
        gEyeScan_CaliDelay[u1ByteIdx] = uiDelay;
#else
        gEyeScan_CaliDelay[u1ByteIdx] = uiDelay + tx_pi_delay[u1ByteIdx]-32;
#endif
    }


    //restore to orignal value
    DramcRestoreRegisters(p, u4RegBackupAddress, sizeof(u4RegBackupAddress)/sizeof(U32), TO_ONE_CHANNEL);

    //restore Vref
    u2VrefRange = backup_u1MR14Value>>6;
    u2VrefLevel = backup_u1MR14Value & 0x3f;

    DramcTXSetVref(p, u2VrefRange, u2VrefLevel);
    u1MR14Value[p->channel][p->rank][p->dram_fsp] = backup_u1MR14Value;

}
#endif

#if DDR_ENABLE_TX_OE_CAL
#define TX_OE_PATTERN_USE_TA2 1
#define TX_OE_SCAN_FULL_RANGE 0

void DramcTxOECalibration(DRAMC_CTX_T *p)
{
    U8 u1ByteIdx, ucBegin[2] = {0xff, 0xff}, ucEnd[2] = {0xff, 0xff}, ucbest_step[2];
    //U8 ucbegin=0xff, , ucfirst, ucsum, ucbest_step;
    U32 u4RegValue_TXDLY, u4RegValue_dly, u4err_value;
    U16 u2Delay, u2TempVirtualDelay, u2SmallestVirtualDelay = 0xffff;
    U16 u2DQOEN_DelayBegin, u2DQEN_DelayEnd;
    U8 ucdq_ui_large_bak[DQS_NUMBER], ucdq_ui_small_bak[DQS_NUMBER];
    U8 ucdq_oen_ui_large[2], ucdq_oen_ui_small[2];
    U8 ucdq_current_ui_large, ucdq_current_ui_small;
    //U8 ucdq_ui_large_reg_value=0xff, ucdq_ui_small_reg_value=0xff;
    U8 ucdq_final_dqm_oen_ui_large[DQS_NUMBER] = {0}, ucdq_final_dqm_oen_ui_small[DQS_NUMBER] = {0};
    DRAM_STATUS_T KResult;
    U8 u1TxDQOEShift = 0;

    u1TxDQOEShift = TX_DQ_OE_SHIFT_LP4;

    #if TX_OE_PATTERN_USE_TA2
    mcSHOW_DBG_MSG("\n[DramC_TX_OE_Calibration] TA2\n");
    #else
    mcSHOW_DBG_MSG("\n[DramC_TX_OE_Calibration] DMA\n");
    #endif

    mcDUMP_REG_MSG("\n[dumpRG] DramcTXOECalibration\n");
#if VENDER_JV_LOG
    vPrintCalibrationBasicInfo_ForJV(p);
#else
    vPrintCalibrationBasicInfo(p);
#endif

    //default set FAIL
    vSetCalibrationResult(p, DRAM_CALIBRATION_TX_OE, DRAM_FAIL);

#if (SUPPORT_SAVE_TIME_FOR_CALIBRATION)
    if (p->femmc_Ready == 1)
    {
        for (u1ByteIdx = 0; u1ByteIdx < DQS_NUMBER; u1ByteIdx++)
        {
            ucdq_oen_ui_large[u1ByteIdx] = p->pSavetimeData->u1TX_OE_DQ_MCK[p->channel][p->rank][u1ByteIdx];
            ucdq_oen_ui_small[u1ByteIdx] = p->pSavetimeData->u1TX_OE_DQ_UI[p->channel][p->rank][u1ByteIdx];
        }
	 vSetCalibrationResult(p, DRAM_CALIBRATION_TX_OE, DRAM_FAST_K);
    }
    else
#endif
    {
        u4RegValue_TXDLY = u4IO32Read4B(DRAMC_REG_ADDR(DRAMC_REG_SHURK_SELPH_DQ0));
        u4RegValue_dly = u4IO32Read4B(DRAMC_REG_ADDR(DRAMC_REG_SHURK_SELPH_DQ2));

        // find smallest DQ byte delay
        for (u1ByteIdx = 0; u1ByteIdx < (p->data_width / DQS_BIT_NUMBER); u1ByteIdx++)
        {
            ucdq_ui_large_bak[u1ByteIdx] = (u4RegValue_TXDLY >> (u1ByteIdx * 4)) & 0x7;
            ucdq_ui_small_bak[u1ByteIdx] = (u4RegValue_dly >> (u1ByteIdx * 4)) & 0x7;

            u2TempVirtualDelay = (ucdq_ui_large_bak[u1ByteIdx] << 3) + ucdq_ui_small_bak[u1ByteIdx];
            if (u2TempVirtualDelay < u2SmallestVirtualDelay)
            {
                u2SmallestVirtualDelay = u2TempVirtualDelay;
            }

            mcSHOW_DBG_MSG("Original DQ_B%d (%d %d) =%d, OEN = %d\n", u1ByteIdx, ucdq_ui_large_bak[u1ByteIdx], ucdq_ui_small_bak[u1ByteIdx], u2TempVirtualDelay, u2TempVirtualDelay - u1TxDQOEShift);
        }

        #if TX_OE_PATTERN_USE_TA2
        DramcEngine2Init(p, p->test2_1, 0xaa000200, TEST_SSOXTALK_PATTERN | 0x80, 0, TE_NO_UI_SHIFT);
        #else
        DramcDmaEngine((DRAMC_CTX_T *)p, 0x50000000, 0x60000000, 0xff00, 8, DMA_PREPARE_DATA_ONLY, p->support_channel_num);
        #endif

        #if TX_OE_SCAN_FULL_RANGE
        // -17~+8 UI
        if (u2SmallestVirtualDelay >= 17)
            u2DQOEN_DelayBegin = u2SmallestVirtualDelay - 17;
        else
            u2DQOEN_DelayBegin = 0;

        u2DQEN_DelayEnd = u2DQOEN_DelayBegin + 25;

        #else // reduce range to speed up
        if (u2SmallestVirtualDelay >= 7)
            u2DQOEN_DelayBegin = u2SmallestVirtualDelay - 6;
        else
            u2DQOEN_DelayBegin = 0;

        u2DQEN_DelayEnd = u2DQOEN_DelayBegin + 13;
        #endif

        for (u2Delay = u2DQOEN_DelayBegin; u2Delay <= u2DQEN_DelayEnd; u2Delay++)
        {
            ucdq_current_ui_large = (u2Delay >> 3);
            ucdq_current_ui_small = u2Delay & 0x7;
            //mcSHOW_DBG_MSG("\nucdq_oen_ui %d %d ", ucdq_oen_ui_large, ucdq_oen_ui_small);

            vIO32WriteFldMulti(DRAMC_REG_ADDR(DRAMC_REG_SHURK_SELPH_DQ0), \
                                            P_Fld(ucdq_current_ui_large, SHURK_SELPH_DQ0_TXDLY_OEN_DQ0) | \
                                            P_Fld(ucdq_current_ui_large, SHURK_SELPH_DQ0_TXDLY_OEN_DQ1) );

            vIO32WriteFldMulti(DRAMC_REG_ADDR(DRAMC_REG_SHURK_SELPH_DQ1), \
                                            P_Fld(ucdq_current_ui_large, SHURK_SELPH_DQ1_TXDLY_OEN_DQM0) | \
                                            P_Fld(ucdq_current_ui_large, SHURK_SELPH_DQ1_TXDLY_OEN_DQM1));

            // DLY_DQ[2:0]
           vIO32WriteFldMulti(DRAMC_REG_ADDR(DRAMC_REG_SHURK_SELPH_DQ2), \
                                            P_Fld(ucdq_current_ui_small, SHURK_SELPH_DQ2_DLY_OEN_DQ0) | \
                                            P_Fld(ucdq_current_ui_small, SHURK_SELPH_DQ2_DLY_OEN_DQ1) );

             // DLY_DQM[2:0]
            vIO32WriteFldMulti(DRAMC_REG_ADDR(DRAMC_REG_SHURK_SELPH_DQ3), \
                                             P_Fld(ucdq_current_ui_small, SHURK_SELPH_DQ3_DLY_OEN_DQM0) | \
                                             P_Fld(ucdq_current_ui_small, SHURK_SELPH_DQ3_DLY_OEN_DQM1));

            #if TX_OE_PATTERN_USE_TA2
            u4err_value = DramcEngine2Run(p, TE_OP_WRITE_READ_CHECK, TEST_SSOXTALK_PATTERN);
            #else
            u4err_value = DramcDmaEngine((DRAMC_CTX_T *)p, 0x50000000, 0x60000000, 0xff00, 8, DMA_CHECK_DATA_ACCESS_AND_COMPARE, p->support_channel_num);
            #endif

            // 3
            for (u1ByteIdx = 0; u1ByteIdx < (p->data_width / DQS_BIT_NUMBER); u1ByteIdx++)
            {
                if (((u4err_value >> (u1ByteIdx << 3)) & 0xff) == 0)
                {
                    if (ucBegin[u1ByteIdx] == 0xff)
                        ucBegin[u1ByteIdx] = u2Delay;

                    ucEnd[u1ByteIdx] = u2Delay;
                }
            }

#ifdef ETT_PRINT_FORMAT
            mcSHOW_DBG_MSG("%d, 0x%X, End_B0=%d End_B1=%d\n", u2Delay, u4err_value, ucEnd[0], ucEnd[1]);
#else
            mcSHOW_DBG_MSG("TAP=%2d, err_value=0x%8x, End_B0=%d End_B1=%d\n", u2Delay, u4err_value, ucEnd[0], ucEnd[1]);
#endif

            if ((u4err_value & 0xffff != 0) && (ucBegin[0] != 0) && (ucBegin[1] != 0))
                break; // early break;
        }

        #if TX_OE_PATTERN_USE_TA2
        DramcEngine2End(p);
        #endif

        // 4
        for (u1ByteIdx = 0; u1ByteIdx < DQS_NUMBER; u1ByteIdx++)
        {
            if (ucEnd[u1ByteIdx] == 0xff)
            {
                ucbest_step[u1ByteIdx] = u2SmallestVirtualDelay - u1TxDQOEShift;  //bakcup original delay, will be uesed if Pass window not found.
                mcSHOW_ERR_MSG("Byte %d no TX OE taps pass, calibration fail!\n", u1ByteIdx);
            }
            else // window is larger htan 3
            {
                ucbest_step[u1ByteIdx] = (ucBegin[u1ByteIdx] + ucEnd[u1ByteIdx]) >> 1;
            }
            mcSHOW_DBG_MSG("Byte%d end_step=%d  best_step=%d\n", u1ByteIdx, ucEnd[u1ByteIdx], ucbest_step[u1ByteIdx]);

            ucdq_oen_ui_large[u1ByteIdx] = (ucbest_step[u1ByteIdx] >> 3);
            ucdq_oen_ui_small[u1ByteIdx] = ucbest_step[u1ByteIdx] & 0x7;
        }
        if ((ucEnd[0]== 0xff)  || (ucEnd[1]== 0xff))
            KResult = DRAM_FAIL;
        else // all bytes are done
            KResult = DRAM_OK;

        vSetCalibrationResult(p, DRAM_CALIBRATION_TX_OE, KResult);
    }

    for (u1ByteIdx = 0; u1ByteIdx < DQS_NUMBER; u1ByteIdx++)
    {
        mcSHOW_DBG_MSG("Byte%d TX OE(2T, 0.5T) = (%d, %d)\n", u1ByteIdx, ucdq_oen_ui_large[u1ByteIdx], ucdq_oen_ui_small[u1ByteIdx]);
        mcDUMP_REG_MSG("Byte%d TX OE(2T, 0.5T) = (%d, %d)\n", u1ByteIdx, ucdq_oen_ui_large[u1ByteIdx], ucdq_oen_ui_small[u1ByteIdx]);
    }
    mcSHOW_DBG_MSG("\n\n");

    vIO32WriteFldMulti(DRAMC_REG_ADDR(DRAMC_REG_SHURK_SELPH_DQ0), \
                                    P_Fld(ucdq_oen_ui_large[0], SHURK_SELPH_DQ0_TXDLY_OEN_DQ0) | \
                                    P_Fld(ucdq_oen_ui_large[1], SHURK_SELPH_DQ0_TXDLY_OEN_DQ1));

    vIO32WriteFldMulti(DRAMC_REG_ADDR(DRAMC_REG_SHURK_SELPH_DQ1), \
                                    P_Fld(ucdq_oen_ui_large[0], SHURK_SELPH_DQ1_TXDLY_OEN_DQM0) | \
                                    P_Fld(ucdq_oen_ui_large[1], SHURK_SELPH_DQ1_TXDLY_OEN_DQM1));
    // DLY_DQ[2:0]
    vIO32WriteFldMulti(DRAMC_REG_ADDR(DRAMC_REG_SHURK_SELPH_DQ2), \
                                    P_Fld(ucdq_oen_ui_small[0], SHURK_SELPH_DQ2_DLY_OEN_DQ0) | \
                                    P_Fld(ucdq_oen_ui_small[1], SHURK_SELPH_DQ2_DLY_OEN_DQ1) );
     // DLY_DQM[2:0]
    vIO32WriteFldMulti(DRAMC_REG_ADDR(DRAMC_REG_SHURK_SELPH_DQ3), \
                                     P_Fld(ucdq_oen_ui_small[0], SHURK_SELPH_DQ3_DLY_OEN_DQM0) | \
                                     P_Fld(ucdq_oen_ui_small[1], SHURK_SELPH_DQ3_DLY_OEN_DQM1));

    #if (SUPPORT_SAVE_TIME_FOR_CALIBRATION)
    if (p->femmc_Ready == 0)
    {
        for (u1ByteIdx = 0; u1ByteIdx < DQS_NUMBER; u1ByteIdx++)
        {
            p->pSavetimeData->u1TX_OE_DQ_MCK[p->channel][p->rank][u1ByteIdx] = ucdq_oen_ui_large[u1ByteIdx];
            p->pSavetimeData->u1TX_OE_DQ_UI[p->channel][p->rank][u1ByteIdx] = ucdq_oen_ui_small[u1ByteIdx];
        }
    }
    #endif

}
#endif

void CmdOEOnOff(DRAMC_CTX_T *p, U8 u1OnOff, CHANNEL_RANK_SEL_T CmdOeDisChannelNUM)
{
    U8 u1Force0;

    if (u1OnOff == ON)
        u1Force0 = 0;
    else
        u1Force0 = 1;

    if (CmdOeDisChannelNUM == TO_ALL_CHANNEL)
    {
        vIO32WriteFldMulti_All(DRAMC_REG_CMD_DEC_CTRL0,
            P_Fld(u1Force0, CMD_DEC_CTRL0_CS0FORCE1_SRK) |
            P_Fld(u1Force0, CMD_DEC_CTRL0_CS1FORCE1_SRK));
    }
    else
    {
        vIO32WriteFldMulti(DRAMC_REG_ADDR(DRAMC_REG_CMD_DEC_CTRL0),
            P_Fld(u1Force0, CMD_DEC_CTRL0_CS0FORCE1_SRK) |
            P_Fld(u1Force0, CMD_DEC_CTRL0_CS1FORCE1_SRK));
    }
}

static void OEDisable(DRAMC_CTX_T *p)
{
    //OE disable - start
    vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_B0_DQ2),
        P_Fld( 0 , B0_DQ2_RG_TX_ARDQM_OE_TIE_SEL_B0 ) |
        P_Fld( 1       , B0_DQ2_RG_TX_ARDQM_OE_TIE_EN_B0  ) |
        P_Fld( 0 , B0_DQ2_RG_TX_ARDQ_OE_TIE_SEL_B0 ) |
        P_Fld( 0xff       , B0_DQ2_RG_TX_ARDQ_OE_TIE_EN_B0  ));
    vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_B1_DQ2),
        P_Fld( 0 , B1_DQ2_RG_TX_ARDQM_OE_TIE_SEL_B1 ) |
        P_Fld( 1       , B1_DQ2_RG_TX_ARDQM_OE_TIE_EN_B1  ) |
        P_Fld( 0 , B1_DQ2_RG_TX_ARDQ_OE_TIE_SEL_B1 ) |
        P_Fld( 0xff       , B1_DQ2_RG_TX_ARDQ_OE_TIE_EN_B1  ));

    vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_SHU_B0_DQ1),
        P_Fld( 0 , SHU_B0_DQ13_RG_TX_ARDQS_OE_TIE_SEL_B0 ) |
        P_Fld( 1       , SHU_B0_DQ13_RG_TX_ARDQS_OE_TIE_EN_B0  ) |
        P_Fld( 0 , SHU_B0_DQ13_RG_TX_ARWCK_OE_TIE_SEL_B0 ) |
        P_Fld( 1       , SHU_B0_DQ13_RG_TX_ARWCK_OE_TIE_EN_B0  ) |
        P_Fld( 0 , SHU_B0_DQ13_RG_TX_ARDQSB_OE_TIE_SEL_B0  ) |
        P_Fld( 1       , SHU_B0_DQ13_RG_TX_ARDQSB_OE_TIE_EN_B0 ) |
        P_Fld( 0 , SHU_B0_DQ13_RG_TX_ARWCKB_OE_TIE_SEL_B0 ) |
        P_Fld( 1       , SHU_B0_DQ13_RG_TX_ARWCKB_OE_TIE_EN_B0  ));

    vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_SHU_B1_DQ13),
        P_Fld( 0 , SHU_B1_DQ13_RG_TX_ARDQS_OE_TIE_SEL_B1 ) |
        P_Fld( 1       , SHU_B1_DQ13_RG_TX_ARDQS_OE_TIE_EN_B1  ) |
        P_Fld( 0 , SHU_B1_DQ13_RG_TX_ARWCK_OE_TIE_SEL_B1 ) |
        P_Fld( 1       , SHU_B1_DQ13_RG_TX_ARWCK_OE_TIE_EN_B1  ) |
        P_Fld( 0 , SHU_B1_DQ13_RG_TX_ARDQSB_OE_TIE_SEL_B1  ) |
        P_Fld( 1       , SHU_B1_DQ13_RG_TX_ARDQSB_OE_TIE_EN_B1 ) |
        P_Fld( 0 , SHU_B1_DQ13_RG_TX_ARWCKB_OE_TIE_SEL_B1 ) |
        P_Fld( 1       , SHU_B1_DQ13_RG_TX_ARWCKB_OE_TIE_EN_B1  ));

    CmdOEOnOff(p, DISABLE, TO_ONE_CHANNEL);
    CKEFixOnOff(p, TO_ALL_RANK, CKE_FIXOFF, TO_ONE_CHANNEL);
    //OE disable - end
}


//-------------------------------------------------------------------------
/** DramcMiockJmeter
 *  start MIOCK jitter meter.
 *  @param p                Pointer of context created by DramcCtxCreate.
 *  @param block_no         (U8): block 0 or 1.
 *  @retval status          (DRAM_STATUS_T): DRAM_OK or DRAM_FAIL
 */
//-------------------------------------------------------------------------

#if DDR_ENABLE_8PHASE_CAL || DDR_ENABLE_MIOCK_JMETER_CAL
static DRAM_STATUS_T DramcJmeterAutoK(DRAMC_CTX_T *p, JMETER_T *pJmtrInfo)
{
    DRAM_STATUS_T eResponse = DRAM_OK;
    REG_TRANSFER_T TransPosReg = {DDRPHY_REG_JM_8PH_AUTOK_STATUS0, JM_8PH_AUTOK_STATUS0_JM_8PH_AUTOK_TRANS_POS0};
    U16 u2Jm_dly_start = pJmtrInfo->u2JmDlyStart;
    U16 u2Jm_dly_end = pJmtrInfo->u2JmDlyEnd; /*512 not divisible by step. It will cause HW AutoK not finish*/
    U16 u2Jm_dly_step = pJmtrInfo->u1JmDlyStep;
    U8 u1AutoKTransCnt = 0, u1TransDir = 0;
    U8 u1AutoKSel = 0; //0x0: B0, 0x1: B1 and 0x2: CA
    U8 u1IntVal = 0x0 /*Set 1 equals to 16 JM delay*/, u1DbgEn = DISABLE;
    U32 TransDbgRegAddr = 0, TransDbgRegAddrFld0 = 0, TransDbgRegAddrFld1 = 0, TransDbgRegAddrFld2 = 0;
    U8 u1LastCnt = 0, u1OccurCnt = 0, u1NextCnt = 0; 
    U8 u1EarlyBreakEn = 1, u1EarlyBreakThd = 0, u1Eb_Fail = FALSE, u1TransOverFlow = FALSE; // for 8-Phase Find 1 valid transition 
    U16 u2SampleCnt = 100, u1TransPos = 0; // Set 1 equals to 64 cycles * 50

    #if (FOR_DV_SIMULATION_USED == 1)
    u2SampleCnt = 2; // Saving time for DV sim
    #endif

    /* 
    For Jmeter Find 3 valid transition (1T), 
    < DDR2400 don't enable early break. It will not find 3 valid transition point.
    */
    u1EarlyBreakEn = 1;
    u1EarlyBreakThd = (p->frequency >= 1200) ? 0x3 : 0x2;
    
    // Clear FSM to the IDLE state (SW reset)
    vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_MISC_JM_8PH_AUTOK_CFG0), 0x1, MISC_JM_8PH_AUTOK_CFG0_JM_8PH_AUTOK_CLEAR);
    mcDELAY_US(1);
    vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_MISC_JM_8PH_AUTOK_CFG0), 0x0, MISC_JM_8PH_AUTOK_CFG0_JM_8PH_AUTOK_CLEAR);

    // Clock gating control (1: clk gating, 0: clk free-run)
    vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_MISC_JM_8PH_AUTOK_CFG0), P_Fld(u2Jm_dly_start, MISC_JM_8PH_AUTOK_CFG0_JM_8PH_AUTOK_BEGIN)
                                        | P_Fld(u2Jm_dly_end-1, MISC_JM_8PH_AUTOK_CFG0_JM_8PH_AUTOK_LEN)
                                        | P_Fld(u2Jm_dly_step, MISC_JM_8PH_AUTOK_CFG0_JM_8PH_AUTOK_STEP)
                                        | P_Fld(u1AutoKSel, MISC_JM_8PH_AUTOK_CFG0_JM_8PH_AUTOK_SEL)
                                        | P_Fld(u1DbgEn, MISC_JM_8PH_AUTOK_CFG0_JM_8PH_AUTOK_DBG_EN)
                                        | P_Fld(u1EarlyBreakEn, MISC_JM_8PH_AUTOK_CFG0_JM_8PH_AUTOK_BREAK_EN)
                                        | P_Fld(u1EarlyBreakThd, MISC_JM_8PH_AUTOK_CFG0_JM_8PH_AUTOK_BREAK_THRD)
                                        | P_Fld(0x0, MISC_JM_8PH_AUTOK_CFG0_JM_8PH_AUTOK_CG_CTRL));

    // Set sample two transition interval JM DQS delay restriction
    // Set 1 equals to 16 JM DQS delay
    vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_MISC_JM_8PH_AUTOK_CFG1), P_Fld(u1IntVal, MISC_JM_8PH_AUTOK_CFG1_JM_8PH_AUTOK_TRANS_INTVAL)
                                        | P_Fld(0x1, MISC_JM_8PH_AUTOK_CFG1_JM_8PH_AUTOK_SAMPLE_DLY)
                                        | P_Fld((u2SampleCnt>>1), MISC_JM_8PH_AUTOK_CFG1_JM_8PH_AUTOK_SAMPLE_H_THRD)
                                        | P_Fld((u2SampleCnt>>1), MISC_JM_8PH_AUTOK_CFG1_JM_8PH_AUTOK_SAMPLE_L_THRD)
                                        | P_Fld(u2SampleCnt, MISC_JM_8PH_AUTOK_CFG1_JM_8PH_AUTOK_SAMPLE_CNT));

    // Trigger and Wait
    REG_TRANSFER_T TriggerReg = {DDRPHY_REG_MISC_JM_8PH_AUTOK_CFG0, MISC_JM_8PH_AUTOK_CFG0_JM_8PH_AUTOK_TRIG};
    REG_TRANSFER_T RepondsReg = {DDRPHY_REG_JM_8PH_AUTOK_STATUS0, JM_8PH_AUTOK_STATUS0_JM_8PH_AUTOK_DONE};
    // MISC_JM_8PH_AUTOK_CFG0_JM_8PH_AUTOK_TRIG = 1 for Jmeter HW AutoK
    // MISC_JM_8PH_AUTOK_CFG0_JM_8PH_AUTOK_TRIG = 2 for 8-Phase HW AutoK (Design is FAIL)
    eResponse |= DramcTriggerAndWait(p, TriggerReg, RepondsReg);
    if (eResponse == DRAM_FAIL)
        return eResponse;

    // Report the transition count (L->H or H->L)
    pJmtrInfo->u1TransCnt = u4IO32ReadFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_JM_8PH_AUTOK_STATUS0), JM_8PH_AUTOK_STATUS0_JM_8PH_AUTOK_TRANS_CNT);
    u1Eb_Fail = u4IO32ReadFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_JM_8PH_AUTOK_STATUS1), JM_8PH_AUTOK_STATUS1_JM_8PH_AUTOK_EB_FAIL);

    mcSHOW_DBG_MSG3("[DramcJmeterAutoK] TransCnt = %d, Early Break Flag = %s\n", pJmtrInfo->u1TransCnt, u1Eb_Fail?"FAIL":"PASS");

    for (u1AutoKTransCnt = 0; u1AutoKTransCnt < pJmtrInfo->u1TransCnt; u1AutoKTransCnt++)
    {
        switch (u1AutoKTransCnt)
        {
            case 0:
                TransPosReg.u4Addr = DDRPHY_REG_JM_8PH_AUTOK_STATUS0;
                TransPosReg.u4Fld = JM_8PH_AUTOK_STATUS0_JM_8PH_AUTOK_TRANS_POS0;
                // Report first transition direction (1: rising, 0: falling) for init
                u1TransDir = u4IO32ReadFldAlign(DRAMC_REG_ADDR(TransPosReg.u4Addr), JM_8PH_AUTOK_STATUS0_JM_8PH_AUTOK_TRANS_DIR);
                break;
            case 1:
                TransPosReg.u4Addr = DDRPHY_REG_JM_8PH_AUTOK_STATUS0;
                TransPosReg.u4Fld = JM_8PH_AUTOK_STATUS0_JM_8PH_AUTOK_TRANS_POS1;
                u1TransDir = !u1TransDir;
                break;
            case 2:
                TransPosReg.u4Addr = DDRPHY_REG_JM_8PH_AUTOK_STATUS0;
                TransPosReg.u4Fld = JM_8PH_AUTOK_STATUS0_JM_8PH_AUTOK_TRANS_POS2;
                u1TransDir = !u1TransDir;
                break;
            case 3:
                TransPosReg.u4Addr = DDRPHY_REG_JM_8PH_AUTOK_STATUS1;
                TransPosReg.u4Fld = JM_8PH_AUTOK_STATUS1_JM_8PH_AUTOK_TRANS_POS3;
                u1TransDir = !u1TransDir;
                break;
            case 4:
                TransPosReg.u4Addr = DDRPHY_REG_JM_8PH_AUTOK_STATUS1;
                TransPosReg.u4Fld = JM_8PH_AUTOK_STATUS1_JM_8PH_AUTOK_TRANS_POS4;
                u1TransDir = !u1TransDir;
                break;
            case 5:
                TransPosReg.u4Addr = DDRPHY_REG_JM_8PH_AUTOK_STATUS1;
                TransPosReg.u4Fld = JM_8PH_AUTOK_STATUS1_JM_8PH_AUTOK_TRANS_POS5;
                u1TransDir = !u1TransDir;
                break;
            case 6:
                TransPosReg.u4Addr = DDRPHY_REG_JM_8PH_AUTOK_STATUS2;
                TransPosReg.u4Fld = JM_8PH_AUTOK_STATUS2_JM_8PH_AUTOK_TRANS_POS6;
                u1TransDir = !u1TransDir;
                break;
            default:
                mcSHOW_ERR_MSG("[DramcJmeterAutoK] fail at JM_8PH_AUTOK_TRANS_POS%d incorrect !!!\n", u1AutoKTransCnt);
                break;
        }

        u1TransPos = u4IO32ReadFldAlign(DRAMC_REG_ADDR(TransPosReg.u4Addr), TransPosReg.u4Fld);
        pJmtrInfo->JmtrInfo[u1AutoKTransCnt].u1JmDelay = u1TransPos;
        pJmtrInfo->JmtrInfo[u1AutoKTransCnt].u1TransLevel = u1TransDir;
        mcSHOW_DBG_MSG3("[DramcJmeterAutoK] TransCnt%d, TransPos = %d, TransLevel = %d\n", u1AutoKTransCnt, u1TransPos, u1TransDir);
    }

    if (u1DbgEn)
    {
        TransDbgRegAddrFld0 = JM_8PH_AUTOK_DBG_STATUS0_JM_8PH_AUTOK_TRANS0_CNT0;
        TransDbgRegAddrFld1 = JM_8PH_AUTOK_DBG_STATUS0_JM_8PH_AUTOK_TRANS0_CNT1;
        TransDbgRegAddrFld2 = JM_8PH_AUTOK_DBG_STATUS0_JM_8PH_AUTOK_TRANS0_CNT2;

        for (u1AutoKTransCnt = 0; u1AutoKTransCnt < pJmtrInfo->u1TransCnt; u1AutoKTransCnt++)
        {
            TransDbgRegAddr = DDRPHY_REG_JM_8PH_AUTOK_DBG_STATUS0 + (u1AutoKTransCnt << 2);

            u1LastCnt = u4IO32ReadFldAlign(DRAMC_REG_ADDR(TransDbgRegAddr), TransDbgRegAddrFld0);
            u1OccurCnt = u4IO32ReadFldAlign(DRAMC_REG_ADDR(TransDbgRegAddr), TransDbgRegAddrFld1);
            u1NextCnt = u4IO32ReadFldAlign(DRAMC_REG_ADDR(TransDbgRegAddr), TransDbgRegAddrFld2);
    
            mcSHOW_DBG_MSG3("[DramcJmeterAutoK] DBG%d, LastCnt = %d, OccurCnt = %d, NextCnt = %d\n", u1AutoKTransCnt, u1LastCnt, u1OccurCnt, u1NextCnt);
        }

        u1TransOverFlow = u4IO32ReadFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_JM_8PH_AUTOK_STATUS1), JM_8PH_AUTOK_STATUS1_JM_8PH_AUTOK_TRANS_OVERFLOW);
        mcSHOW_DBG_MSG3("[DramcJmeterAutoK]  Trans Overflow = %d\n", u1TransOverFlow);
    }

    // Clear FSM to the IDLE state (SW reset)
    vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_MISC_JM_8PH_AUTOK_CFG0), 0x1, MISC_JM_8PH_AUTOK_CFG0_JM_8PH_AUTOK_CLEAR);
    mcDELAY_US(1);
    vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_MISC_JM_8PH_AUTOK_CFG0), 0x0, MISC_JM_8PH_AUTOK_CFG0_JM_8PH_AUTOK_CLEAR);

    return eResponse;
}

static void DramcJmeterInit(DRAMC_CTX_T *p, U8 u1IsJmtrK)
{
    OEDisable(p);

    //DramcHWGatingOnOff(p, 0); // disable Gating tracking for DQS PI, Remove to vApplyConfigBeforeCalibration
    if(u1IsJmtrK != TRUE)
    {
        vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_MISC_SHU_STBCAL), P_Fld(0x0, MISC_SHU_STBCAL_STBCALEN)
                                                           | P_Fld(0x0, MISC_SHU_STBCAL_STB_SELPHCALEN));
    }

    // @A60868 for *RANK_SEL_SER_EN* = 0 to DA_RX_ARDQ_RANK_SEL_TXD_*[0]
    //              for *RANK_SEL_SER_EN* = 1 to DA_RX_ARDQ_RANK_SEL_TXD_*[7:0]
    // The *RANK_SEL_SER_EN* = 0 is old mode.
    // The *RANK_SEL_SER_EN* = 1 is new mode when background no any access.
    vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_B0_DQ11), 0, SHU_B0_DQ11_RG_RX_ARDQ_RANK_SEL_SER_EN_B0);
    vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_B1_DQ11), 0, SHU_B1_DQ11_RG_RX_ARDQ_RANK_SEL_SER_EN_B1);
    vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_CA_CMD11), 0, SHU_CA_CMD11_RG_RX_ARCA_RANK_SEL_SER_EN_CA);

    //@Darren, DLL off to stable fix middle transion from high to low or low to high at high vcore
    vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_SHU_B0_DLL1), P_Fld(0x0, SHU_B0_DLL1_RG_ARDLL_PHDET_EN_B0)
                                        | P_Fld(0x0, SHU_B0_DLL1_RG_ARDLL_PHDET_OUT_SEL_B0));
    vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_SHU_B1_DLL1), P_Fld(0x0, SHU_B1_DLL1_RG_ARDLL_PHDET_EN_B1)
                                        | P_Fld(0x0, SHU_B1_DLL1_RG_ARDLL_PHDET_OUT_SEL_B1));

#if (fcFOR_CHIP_ID == fcGriffin)
    channel_backup_and_set(p, CHANNEL_B);
    vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_SHU_CA_DLL1), P_Fld(0x0, SHU_CA_DLL1_RG_ARDLL_PHDET_EN_CA)
                                    | P_Fld(0x0, SHU_CA_DLL1_RG_ARDLL_PHDET_OUT_SEL_CA));
    channel_restore(p);
#endif
    /* Master last */
    vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_SHU_CA_DLL1), P_Fld(0x0, SHU_CA_DLL1_RG_ARDLL_PHDET_EN_CA)
                                        | P_Fld(0x0, SHU_CA_DLL1_RG_ARDLL_PHDET_OUT_SEL_CA));

    //MCK4X CG
    vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_MISC_CTRL1), 0, MISC_CTRL1_R_DMDQSIENCG_EN);
    //@A60868, DQS PI mode for JMTR
    vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_B0_DLL_ARPI2), 0, SHU_B0_DLL_ARPI2_RG_ARPI_CG_DQSIEN_B0); // DQS PI mode
    vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_B1_DLL_ARPI2), 0, SHU_B1_DLL_ARPI2_RG_ARPI_CG_DQSIEN_B1); // DQS PI mode
    //@IPMV2, RX_EYE_SCAN_CG_CTRL set 0 : clock free-run, set 1 : clock gated by CW Hsieh
    vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_MISC_DUTYSCAN1), 0, MISC_DUTYSCAN1_RX_EYE_SCAN_CG_CTRL); // enable toggle cnt
    vIO32WriteFldMulti(DRAMC_REG_ADDR(DRAMC_REG_TX_CG_SET1), P_Fld(0xf, TX_CG_SET1_DRAMC_ARPI_CG_MPDIV_DATA_FR)
                                                           | P_Fld(0xf, TX_CG_SET1_DRAMC_ARPI_CG_MCK_DATA_FR));
    vIO32WriteFldAlign(DRAMC_REG_ADDR(DRAMC_REG_DRAMC_PD_CTRL), 1, DRAMC_PD_CTRL_APHYCKCG_FIXOFF);
    //@IPMV2, end
    vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_MISC_CTRL4), 0, MISC_CTRL4_R_OPT2_CG_DQSIEN); // Remove to Golden settings for Jmeter clock
    vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_MISC_STBCAL), 0, MISC_STBCAL_DQSIENCG_NORMAL_EN); // @Darren need confirm for DQS*_ERR_CNT, APHY PICG freerun
    //@A60868, End

    vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_B0_DQ6), 1, B0_DQ6_RG_RX_ARDQ_EYE_DLY_DQS_BYPASS_B0);
    vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_B1_DQ6), 1, B1_DQ6_RG_RX_ARDQ_EYE_DLY_DQS_BYPASS_B1);

    vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_MISC_DUTYSCAN1), 1, MISC_DUTYSCAN1_RX_EYE_SCAN_EN);
    vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_MISC_DUTYSCAN1), 0x1, MISC_DUTYSCAN1_EYESCAN_DQS_SYNC_EN);
    vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_B0_DQ5), 1, B0_DQ5_RG_RX_ARDQ_EYE_EN_B0);
    vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_B1_DQ5), 1, B1_DQ5_RG_RX_ARDQ_EYE_EN_B1);
    #if (CHIP_PROCESS<=5)
    vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B0_PHY_VREF_CTRL1), 1, SHU_RK_B0_PHY_VREF_CTRL1_RG_RX_ARDQ_VREF_EN_RK_B0);
    vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B1_PHY_VREF_CTRL1), 1, SHU_RK_B1_PHY_VREF_CTRL1_RG_RX_ARDQ_VREF_EN_RK_B1);
    #else
    vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_B0_DQ5), 1, B0_DQ5_RG_RX_ARDQ_VREF_EN_B0);
    vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_B1_DQ5), 1, B1_DQ5_RG_RX_ARDQ_VREF_EN_B1);
    #endif
    vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_B0_DQ3), 1, B0_DQ3_RG_RX_ARDQ_SMT_EN_B0);
    vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_B1_DQ3), 1, B1_DQ3_RG_RX_ARDQ_SMT_EN_B1);
    //@A60868, JMTR en

    vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_B0_PHY2), 1, B0_PHY2_RG_RX_ARDQS_JM_EN_B0);
    vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_B1_PHY2), 1, B1_PHY2_RG_RX_ARDQS_JM_EN_B1);
    //@A60868, End

#if (fcFOR_CHIP_ID == fcGriffin)
    vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_B0_DQ6), 1, B0_DQ6_RG_RX_ARDQ_JM_SEL_B0);
    vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_B1_DQ6), 1, B1_DQ6_RG_RX_ARDQ_JM_SEL_B1);
#else
    vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_B0_PHY2), 1, B0_PHY2_RG_RX_ARDQS_JM_SEL_B0);
    vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_B1_PHY2), 1, B1_PHY2_RG_RX_ARDQS_JM_SEL_B1);
#endif

    //Enable MIOCK jitter meter mode ( RG_RX_MIOCK_JIT_EN=1)
    vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_MISC_DUTYSCAN1), 1, MISC_DUTYSCAN1_RX_MIOCK_JIT_EN);

    //Disable DQ eye scan (b'1), for counter clear
    vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_MISC_DUTYSCAN1), 0, MISC_DUTYSCAN1_RX_EYE_SCAN_EN);
    vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_MISC_DUTYSCAN1), 0, MISC_DUTYSCAN1_DQSERRCNT_DIS);
}

static void DramcJmeterCalib(DRAMC_CTX_T *p, JMETER_T *pJmtrInfo, U8 u1IsJmtrK)
{
    U16 u2Jm_dly_start = pJmtrInfo->u2JmDlyStart;
    U16 u2Jm_dly_end = pJmtrInfo->u2JmDlyEnd;
    U16 u2Jm_dly_step = pJmtrInfo->u1JmDlyStep;
    U16 ucdqs_dly;
    U32 u4sample_cnt, u4ones_cnt;
    U16 fgcurrent_value = 0xffff, fginitial_value = 0xffff, ucsearch_state = 0xffff;

    for (ucdqs_dly = u2Jm_dly_start; ucdqs_dly < u2Jm_dly_end; ucdqs_dly += u2Jm_dly_step)
    {
        //@A60868, Set CLK delay (RG_*_RX_ARDQS_JM_DLY_B*)
        vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_B0_PHY2), ucdqs_dly, B0_PHY2_RG_RX_ARDQS_JM_DLY_B0);
        vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_B1_PHY2), ucdqs_dly, B1_PHY2_RG_RX_ARDQS_JM_DLY_B1);
        //@A60868, End

        //Reset eye scan counters (reg_sw_rst): 1 to 0
        vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_MISC_DUTYSCAN1), 1, MISC_DUTYSCAN1_REG_SW_RST);
        vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_MISC_DUTYSCAN1), 0, MISC_DUTYSCAN1_REG_SW_RST);

        //Enable DQ eye scan (b'1)
        vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_MISC_DUTYSCAN1), 1, MISC_DUTYSCAN1_RX_EYE_SCAN_EN);

        //2ns/sample, here we delay 1ms about 500 samples
        mcDELAY_US(10);

        //Disable DQ eye scan (b'1), for counter latch
        vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_MISC_DUTYSCAN1), 0, MISC_DUTYSCAN1_RX_EYE_SCAN_EN);

        //Read the counter values from registers (toggle_cnt*, dqs_err_cnt*);
        u4sample_cnt = u4IO32ReadFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_MISC_DUTY_TOGGLE_CNT), MISC_DUTY_TOGGLE_CNT_TOGGLE_CNT);
        u4ones_cnt = u4IO32ReadFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_MISC_DUTY_DQS0_ERR_CNT), MISC_DUTY_DQS0_ERR_CNT_DQS0_ERR_CNT);

#ifndef DDR_INIT_TIME_PROFILING
        if (u1IsJmtrK == TRUE){
            mcSHOW_DBG_MSG2("%d : %d, %d\n", ucdqs_dly, u4sample_cnt, u4ones_cnt);
        }
#endif

        //change to boolean value
        if (u4ones_cnt < (u4sample_cnt / 2))
        {
            fgcurrent_value = 0;
        }
        else
        {
            fgcurrent_value = 1;
        }


        if (ucsearch_state == 0xffff)
        {
            //record initial value at the beginning
            fginitial_value = fgcurrent_value;
            ucsearch_state = 0;
        }
        else
        {
            // check if change value
            if (fgcurrent_value != fginitial_value)
            {
                // start of the period
                fginitial_value = fgcurrent_value;
                pJmtrInfo->JmtrInfo[ucsearch_state].u1JmDelay = ucdqs_dly;
                pJmtrInfo->JmtrInfo[ucsearch_state].u1TransLevel = fgcurrent_value;

                ucsearch_state++;
                pJmtrInfo->u1TransCnt = ucsearch_state;
                if (ucsearch_state == CYCLE_1T)
                    break; // 1T early break;
            }
        }
    }
}
#endif

#if DDR_ENABLE_MIOCK_JMETER_CAL
U16 DramcMiockJmeter(DRAMC_CTX_T *p, U8 isAutoK)
{
    U8 backup_rank, u1RankIdx;
    U16 ucstart_period = 0, ucmiddle_period = 0, ucend_period = 0;
    // Read PCW
    U16 u2real_freq, u2real_period;
    // DQSIEN
    U8 u1RxGatingPI = 0, u1RxGatingPI_start = 0, u1RxGatingPI_end = 63;
    // Jmeter Scan
    JMETER_T JmtrInfo;
//    U8 u1JmtrPrintCnt = 0;
    DRAM_STATUS_T eStatus = DRAM_OK;
    U16 num_dlycell_perT;
    U16 delay_cell_ps=0;
#if (fcFOR_CHIP_ID == fcGriffin)
    U32 bak_SHU_CA_DLL1_CHB;
#endif
    U32 u4RegBackupAddress[] =
    {
        (MIX_RG_CHECK(DDRPHY_REG_MISC_DUTYSCAN1)),
        (MIX_RG_CHECK(DDRPHY_REG_B0_DQ6)),
        (MIX_RG_CHECK(DDRPHY_REG_B1_DQ6)),
        (MIX_RG_CHECK(DDRPHY_REG_B0_DQ5)),
        (MIX_RG_CHECK(DDRPHY_REG_B1_DQ5)),
        (MIX_RG_CHECK(DDRPHY_REG_B0_DQ3)),
        (MIX_RG_CHECK(DDRPHY_REG_B1_DQ3)),
        (MIX_RG_CHECK(DDRPHY_REG_MISC_CTRL1)),
        (MIX_RG_CHECK(DDRPHY_REG_MISC_CTRL4)),
        (MIX_RG_CHECK(DDRPHY_REG_B0_PHY2)),
        (MIX_RG_CHECK(DDRPHY_REG_B1_PHY2)),
        (MIX_RG_CHECK(DDRPHY_REG_SHU_B0_DLL_ARPI2)),
        (MIX_RG_CHECK(DDRPHY_REG_SHU_B1_DLL_ARPI2)),
        (MIX_RG_CHECK(DDRPHY_REG_SHU_B0_DQ11)),
        (MIX_RG_CHECK(DDRPHY_REG_SHU_B1_DQ11)),
        (MIX_RG_CHECK(DDRPHY_REG_SHU_CA_CMD11)),
        (MIX_RG_CHECK(DDRPHY_REG_MISC_STBCAL)),
        (MIX_RG_CHECK(DDRPHY_REG_SHU_RK_B0_DQSIEN_DLY)), // need porting to Jmeter
        (MIX_RG_CHECK(DRAMC_REG_TX_CG_SET1)),
        (MIX_RG_CHECK(DRAMC_REG_DRAMC_PD_CTRL)),
        (MIX_RG_CHECK(DDRPHY_REG_MISC_JM_8PH_AUTOK_CFG0)),
        (MIX_RG_CHECK(DDRPHY_REG_MISC_JM_8PH_AUTOK_CFG1)),
        (MIX_RG_CHECK(DDRPHY_REG_B0_DQ2)),
        (MIX_RG_CHECK(DDRPHY_REG_B1_DQ2)),
        (MIX_RG_CHECK(DDRPHY_REG_SHU_B0_DQ13)),
        (MIX_RG_CHECK(DDRPHY_REG_SHU_B1_DQ13)),
        (MIX_RG_CHECK(DRAMC_REG_CMD_DEC_CTRL0)), //in CmdOEOnOff()
        (MIX_RG_CHECK(DRAMC_REG_CKECTRL)), //in CKEFixOnOff()
        (MIX_RG_CHECK(DDRPHY_REG_SHU_CA_DLL1)),
        (MIX_RG_CHECK(DDRPHY_REG_SHU_B0_DLL1)),
        (MIX_RG_CHECK(DDRPHY_REG_SHU_B1_DLL1)),
    };

    backup_rank = u1GetRank(p);
    mcSHOW_DBG_MSG("[DramcMiockJmeter] for CH%d\n", p->channel);

    //backup register value
    DramcBackupRegisters(p, u4RegBackupAddress, sizeof(u4RegBackupAddress) / sizeof(U32), TO_ONE_CHANNEL);
#if (fcFOR_CHIP_ID == fcGriffin) /* CHB DLL PHDET_EN_CA shall be dis/en as sequence required */
    channel_backup_and_set(p, CHANNEL_B);
    bak_SHU_CA_DLL1_CHB = u4IO32Read4B(DDRPHY_REG_SHU_CA_DLL1);
    channel_restore(p);
#endif

    //default set fail
    vSetCalibrationResult(p, DRAM_CALIBRATION_JITTER_METER, DRAM_FAIL);

    //mcDUMP_REG_MSG("\n[dumpRG] DramcMiockJmeter\n");
#if VENDER_JV_LOG
    vPrintCalibrationBasicInfo_ForJV(p);
#else
    vPrintCalibrationBasicInfo(p);
#endif

    DramcJmeterInit(p, TRUE);

    for (u1RxGatingPI = u1RxGatingPI_start; u1RxGatingPI < u1RxGatingPI_end; u1RxGatingPI++)
    {
        mcSHOW_DBG_MSG2("\nu1RxGatingPI = %d\n", u1RxGatingPI);

        for (u1RankIdx = RANK_0; u1RankIdx < p->support_rank_num; u1RankIdx++)
        {
            vSetRank(p, u1RankIdx);
            vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B0_DQSIEN_DLY), u1RxGatingPI, SHU_RK_B0_DQSIEN_DLY_DQSIEN_PI_B0); // for rank*_B0
        }
        vSetRank(p, backup_rank);

        memset(&JmtrInfo, 0, sizeof(JmtrInfo));

        if (isAutoK)
        {
            JmtrInfo.u2JmDlyEnd = 509; /*512 not divisible by step. It will cause HW AutoK not finish*/
            JmtrInfo.u1JmDlyStep = 4;
            eStatus |= DramcJmeterAutoK(p, &JmtrInfo);
            if (eStatus == DRAM_FAIL)
            {
                mcSHOW_ERR_MSG("Jmeter Hw AutoK is FAIL !!!\n");
                delay_cell_ps = 0; //for LP3 and LP4 lookup table used
                return delay_cell_ps;
            }
        }
        else
        {
        #if (fcFOR_CHIP_ID == fcGriffin)
            JmtrInfo.u2JmDlyEnd = 128;
        #else
            JmtrInfo.u2JmDlyEnd = 512;
        #endif
            JmtrInfo.u1JmDlyStep = 4;
            DramcJmeterCalib(p, &JmtrInfo, TRUE);
        }

        if ((JmtrInfo.u1TransCnt == CYCLE_1T) || (JmtrInfo.u1TransCnt == CYCLE_05T)) // (1T or 0.5T)
            break;
    }

    //restore to orignal value
    DramcRestoreRegisters(p, u4RegBackupAddress, sizeof(u4RegBackupAddress) / sizeof(U32), TO_ONE_CHANNEL);
#if (fcFOR_CHIP_ID == fcGriffin)
    channel_backup_and_set(p, CHANNEL_B);
    vIO32Write4B(DDRPHY_REG_SHU_CA_DLL1, bak_SHU_CA_DLL1_CHB);
    channel_restore(p);
#endif

#if 0 //@Darren, for debug print
    for (u1JmtrPrintCnt = 0; u1JmtrPrintCnt < JmtrInfo.u1TransCnt; u1JmtrPrintCnt++)
    {
            mcSHOW_DBG_MSG("\n\t JmtrInfo.JmtrInfo[%d].u1JmDelay = %d\n", u1JmtrPrintCnt, JmtrInfo.JmtrInfo[u1JmtrPrintCnt].u1JmDelay);
            mcSHOW_DBG_MSG("\n\t JmtrInfo.JmtrInfo[%d].u1TransLevel = %d\n", u1JmtrPrintCnt, JmtrInfo.JmtrInfo[u1JmtrPrintCnt].u1TransLevel);
    }
    mcSHOW_DBG_MSG("\n\tMIOCK jitter meter - ucsearch_state = %d\n", ucsearch_state);
#endif

    if (JmtrInfo.u1TransCnt != CYCLE_1T)
    {
        if (JmtrInfo.u1TransCnt != CYCLE_05T)
        {
            mcSHOW_DBG_MSG("\n\tMIOCK jitter meter - ch=%d\n", p->channel);
            mcSHOW_DBG_MSG("\tLess than 0.5T data. Cannot calculate delay cell time\n\n");

            delay_cell_ps = 0;   //for LP3 and LP4 lookup table used

            return delay_cell_ps;
        }
        else
        {   // for 0.5T
            //Calculate 1 delay cell = ? ps
            // 1T = ? delay cell
            ucstart_period = JmtrInfo.JmtrInfo[0].u1JmDelay;
            ucmiddle_period = JmtrInfo.JmtrInfo[JmtrInfo.u1TransCnt-1].u1JmDelay;
            num_dlycell_perT = (ucmiddle_period - ucstart_period) * 2;
            // 1T = ? ps
        }
    }
    else
    {   // for 1T
        //Calculate 1 delay cell = ? ps
        // 1T = ? delay cell
        ucstart_period = JmtrInfo.JmtrInfo[0].u1JmDelay;
        ucend_period = JmtrInfo.JmtrInfo[JmtrInfo.u1TransCnt-1].u1JmDelay;
        num_dlycell_perT = (ucend_period - ucstart_period);
        // 1T = ? ps
    }
    vSetCalibrationResult(p, DRAM_CALIBRATION_JITTER_METER, DRAM_OK);

    u2real_freq = DDRPhyGetRealFreq(p);
    u2real_period = (U16) (1000000 / u2real_freq);

    //calculate delay cell time
    delay_cell_ps = u2real_period * 100 / num_dlycell_perT;

    if (JmtrInfo.u1TransCnt == CYCLE_1T)
    { // 1T
        mcSHOW_DBG_MSG("\n\tMIOCK jitter meter\tch=%d\n\n"
                        "1T = (%d-%d) = %d dly cells\n"
                        "Clock freq = %d MHz, period = %d ps, 1 dly cell = %d/100 ps\n\n",
                            p->channel,
                            ucend_period, ucstart_period, num_dlycell_perT,
                            u2real_freq, u2real_period, delay_cell_ps);
    }
    else
    { // 0.5T
        mcSHOW_DBG_MSG("\n\tMIOCK jitter meter\tch=%d\n\n"
                        "1T = (%d-%d)*2 = %d dly cells\n"
                        "Clock freq = %d MHz, period = %d ps, 1 dly cell = %d/100 ps\n\n",
                            p->channel,
                            ucmiddle_period, ucstart_period, num_dlycell_perT,
                            u2real_freq, u2real_period, delay_cell_ps);
    }
	u2g_num_dlycell_perT = num_dlycell_perT;

    return delay_cell_ps;
}

static U16 get_DelayCell_by_Vcore(U32 check_vcore_value)
{
    U8 i= 0;

    for (i = SRAM_SHU0; i < DRAM_DFS_SRAM_MAX; i++)
    {
        if (JMeter_DelayCell_Table[i].Vcore == check_vcore_value)
        {
            return JMeter_DelayCell_Table[i].delay_cell_ps; //found !!
        }
    }

    return 0; //not found
}

/* "picoseconds per delay cell" depends on Vcore only (frequency doesn't matter)
 * 1. Retrieve current freq's vcore voltage using pmic API
 * 2. Perform delay cell time calculation (Bypass if shuffle vcore value is the same as before)
 */
U16 GetVcoreDelayCellTime(DRAMC_CTX_T *p)
{
    U32 curr_vcore_value = 0;
    U16 delay_cell_ps;
    U8  vcore_i=0;

#if SUPPORT_SAVE_TIME_FOR_CALIBRATION
    if(p->femmc_Ready==1)
    {
        mcSHOW_DBG_MSG("[FAST_K] Freq=%d, DelayCellTimex100=%d\n", p->frequency, p->pSavetimeData->u2DelayCellTimex100);
        return p->pSavetimeData->u2DelayCellTimex100;
    }
#endif

#if FOR_DV_SIMULATION_USED==0
    curr_vcore_value = dramc_get_vcore_voltage();
#endif

    /* find delay cell by curr_vcore_value */
    delay_cell_ps = get_DelayCell_by_Vcore(curr_vcore_value);

    if (delay_cell_ps == 0)
    {
        /* not found!! */

        /* can K JMeter */
        if (p->frequency > 600)
        {
            delay_cell_ps = DramcMiockJmeter(p, AUTOK_OFF);

#ifdef FOR_HQA_TEST_USED
            /* if K fail, then look up table */
            if (delay_cell_ps == 0) delay_cell_ps = GetVcoreDelayCellTimeFromTable(p); //lookup table
#endif
        }
#ifdef FOR_HQA_TEST_USED
        else
        {
            /* save K JMeter's time, use llok up table directly */
            delay_cell_ps = GetVcoreDelayCellTimeFromTable(p); //lookup table      
        }
#endif
        if (delay_cell_ps == 0)
        {
            //not found finally
            mcSHOW_ERR_MSG("[%s] Get Delay Cell by Vcore fail!!\n", __func__);
#if __ETT__
            ASSERT(0);
#endif
        }

        /* save values */
        for(vcore_i=0; vcore_i<DRAM_DFS_SRAM_MAX; vcore_i++)
        {
            if (JMeter_DelayCell_Table[vcore_i].Vcore == 0) //save current vcore's delay cell
            {
                JMeter_DelayCell_Table[vcore_i].delay_cell_ps = delay_cell_ps;
                JMeter_DelayCell_Table[vcore_i].Vcore = curr_vcore_value;

                mcSHOW_DBG_MSG3("DelayCellTimex100 (VCORE=%d, cell=%d)\n", JMeter_DelayCell_Table[vcore_i].Vcore, delay_cell_ps);
                break;
            }
        }
    }
    
#if __ETT__
    mcSHOW_DBG_MSG("[%s] Freq=%d, VCORE=%d, cell=%d\n", __func__, p->frequency, curr_vcore_value, delay_cell_ps);
#endif

#ifdef FOR_HQA_REPORT_USED
    if (gHQALog_flag == 1)
    {
        HQA_LOG_Print_Prefix_String(p); mcSHOW_DBG_MSG("delaycell_CBT %d\n", delay_cell_ps);
    }
#endif

#if SUPPORT_SAVE_TIME_FOR_CALIBRATION
    if(p->femmc_Ready==0)
    {
        p->pSavetimeData->u2DelayCellTimex100 = delay_cell_ps;
    }
#endif

    return delay_cell_ps;
}

void Get_RX_DelayCell(DRAMC_CTX_T *p)
{
#if defined(FOR_HQA_REPORT_USED) && (FOR_DV_SIMULATION_USED==0)

    #if SUPPORT_SAVE_TIME_FOR_CALIBRATION
        if(p->femmc_Ready == 1)
        {
            return;  //gHQALOG_RX_delay_cell_ps_075V is not used in fastk (Only needed in HQA report and eyescan log).
        }
    #endif

        if (gHQALOG_RX_delay_cell_ps_075V == 0)
        {
#if __ETT__
            mcSHOW_DBG_MSG("RX delay cell calibration (%d):\n", hqa_vmddr_class);

            switch (hqa_vmddr_class)
            {
                case 1:
                    dramc_set_vcore_voltage(_SEL_VMDDR(HV));
                    break;
                case 2:
                    dramc_set_vcore_voltage(_SEL_VMDDR(NV));
                    break;
                case 3:
                    dramc_set_vcore_voltage(_SEL_VMDDR(LV));
                    break;
            }
#else
            // set vcore to RX used 0.75V
            dramc_set_vcore_voltage(SEL_VMDDR);  //set vmddr voltage to vcore to K RX delay cell
#endif

            gHQALOG_RX_delay_cell_ps_075V = GetVcoreDelayCellTime(p);

            // set vocre back
            vSetVcoreByFreq(p);
        }
#endif
}
#endif

//-------------------------------------------------------------------------
/** Dramc8PhaseCal
 *  start 8-Phase Calibration.
 *  @param p                Pointer of context created by DramcCtxCreate.
 *  @param block_no         (U8): block 0 or 1.
 *  @retval status          (DRAM_STATUS_T): DRAM_OK or DRAM_FAIL
 */
//-------------------------------------------------------------------------
#if DDR_ENABLE_8PHASE_CAL
DRAM_STATUS_T Dramc8PhaseCal(DRAMC_CTX_T *p)
{
    u8 isAutoK = 0;
    U8 u18Ph_dly_loop_break = FALSE;
    U8 u1DqsienPI = 0;
    DQS_8_PHASE_T e8Phase_SM = DQS_8PH_DEGREE_0;
    U8 u18Ph_dly_final = 0xff;
    U8 u18Ph_dly = 0, u18Ph_start = 0, u18Ph_end = 0;

    U16 u2R0 = 0xffff, u2R180 = 0xffff, u2R = 0xffff;
    U16 u2P = 0xffff, ucdqs_dly = 0;
    S16 s2Err_code = 0x7fff, s2Err_code_min = 0x7fff;

    U8 backup_rank, u1RankIdx, u18PhDlyBackup = 0;
    U8 u1loop_cnt = 0, u1early_break_cnt = 5;

    // Jmeter Scan
    JMETER_T JmtrInfo;
    U8 u1JmtrPrintCnt = 0;

    U32 u4backup_broadcast= GetDramcBroadcast();
    DRAM_STATUS_T eDRAMStatus = DRAM_OK;

#ifdef DUMP_INIT_RG_LOG_TO_DE //for FT dump 3733 dram_init.c
    return DRAM_OK;
#endif

    u1DqsienPI = 0x0;

    // error handling
    if (!p)
    {
        mcSHOW_ERR_MSG("context NULL\n");
        return DRAM_FAIL;
    }

    U32 u4RegBackupAddress[] =
    {
        (MIX_RG_CHECK(DDRPHY_REG_MISC_DUTYSCAN1)),
        (MIX_RG_CHECK(DDRPHY_REG_B0_DQ6)),
        (MIX_RG_CHECK(DDRPHY_REG_B1_DQ6)),
        (MIX_RG_CHECK(DDRPHY_REG_B0_DQ5)),
        (MIX_RG_CHECK(DDRPHY_REG_B1_DQ5)),
        (MIX_RG_CHECK(DDRPHY_REG_B0_DQ3)),
        (MIX_RG_CHECK(DDRPHY_REG_B1_DQ3)),
        (MIX_RG_CHECK(DDRPHY_REG_MISC_CTRL1)),
        (MIX_RG_CHECK(DDRPHY_REG_MISC_CTRL4)),
        (MIX_RG_CHECK(DDRPHY_REG_B0_PHY2)),
        (MIX_RG_CHECK(DDRPHY_REG_B1_PHY2)),
        (MIX_RG_CHECK(DDRPHY_REG_SHU_B0_DLL_ARPI2)),
        (MIX_RG_CHECK(DDRPHY_REG_SHU_B1_DLL_ARPI2)),
        (MIX_RG_CHECK(DDRPHY_REG_SHU_B0_DQ11)),
        (MIX_RG_CHECK(DDRPHY_REG_SHU_B1_DQ11)),
        (MIX_RG_CHECK(DDRPHY_REG_SHU_CA_CMD11)),
        (MIX_RG_CHECK(DDRPHY_REG_MISC_STBCAL)),
        (MIX_RG_CHECK(DDRPHY_REG_SHU_RK_B0_DQSIEN_DLY)), // need porting to Jmeter
        (MIX_RG_CHECK(DRAMC_REG_TX_CG_SET1)),
        (MIX_RG_CHECK(DRAMC_REG_DRAMC_PD_CTRL)),
        (MIX_RG_CHECK(DDRPHY_REG_MISC_JM_8PH_AUTOK_CFG0)),
        (MIX_RG_CHECK(DDRPHY_REG_MISC_JM_8PH_AUTOK_CFG1)),
        #if __IPMv2_TO_BE_PORTING__
        (MIX_RG_CHECK(DDRPHY_REG_MISC_JMETER)),
        #endif
        //(DRAMC_REG_ADDR(DDRPHY_REG_MISC_STBCAL2)), // for gating on/off backup/restore
        //(DRAMC_REG_ADDR(DDRPHY_REG_MISC_DVFSCTL2)), // for gating on/off backup/restore
        (DRAMC_REG_ADDR(DDRPHY_REG_MISC_SHU_STBCAL)), // for gating on/off backup/restore
#if 0
        (MIX_RG_CHECK(DDRPHY_REG_B0_DLL_ARPI0)),
        (MIX_RG_CHECK(DDRPHY_REG_B1_DLL_ARPI0)),
        (MIX_RG_CHECK(DDRPHY_REG_CA_DLL_ARPI0)),
        (MIX_RG_CHECK(DDRPHY_REG_SHU_B0_DQ6)),
        (MIX_RG_CHECK(DDRPHY_REG_SHU_B1_DQ6)),
        (MIX_RG_CHECK(DDRPHY_REG_SHU_CA_CMD6)),
#endif
        (MIX_RG_CHECK(DDRPHY_REG_SHU_CA_DLL1)),
        (MIX_RG_CHECK(DDRPHY_REG_SHU_B0_DLL1)),
        (MIX_RG_CHECK(DDRPHY_REG_SHU_B1_DLL1)),
        (MIX_RG_CHECK(DDRPHY_REG_B0_DQ2)),
        (MIX_RG_CHECK(DDRPHY_REG_B1_DQ2)),
        //(MIX_RG_CHECK(DDRPHY_REG_CA_CMD2)),
        (MIX_RG_CHECK(DDRPHY_REG_SHU_B0_DQ13)),
        (MIX_RG_CHECK(DDRPHY_REG_SHU_B1_DQ13)),
        //(MIX_RG_CHECK(DDRPHY_REG_SHU_CA_CMD13)),
        (MIX_RG_CHECK(DRAMC_REG_CMD_DEC_CTRL0)), //in CmdOEOnOff()
        (MIX_RG_CHECK(DRAMC_REG_CKECTRL)), //in CKEFixOnOff()
    };

    if ((p->frequency < 1866) || (Get_MDL_Used_Flag() == GET_MDL_USED))
    {
        //mcSHOW_ERR_MSG("skip 8-Phase Calib Freq is %d < 1866 !!!\n", p->frequency);
        return DRAM_OK;
    }

    mcDUMP_REG_MSG("\n[dumpRG] Dramc8PhaseCal\n");
#if VENDER_JV_LOG
    vPrintCalibrationBasicInfo_ForJV(p);
#else
    vPrintCalibrationBasicInfo(p);
#endif

    backup_rank = u1GetRank(p);
    DramcBroadcastOnOff(DRAMC_BROADCAST_OFF);

#if SUPPORT_SAVE_TIME_FOR_CALIBRATION
    if(p->femmc_Ready == 1)
    {
        u18Ph_dly_final = p->pSavetimeData->u18Ph_dly;
        goto exit;
    }
#endif

    //backup register value
    DramcBackupRegisters(p, u4RegBackupAddress, sizeof(u4RegBackupAddress) / sizeof(U32), TO_ONE_CHANNEL);

    DramcJmeterInit(p, FALSE);
    u18PhDlyBackup = u4IO32ReadFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_B0_DQ1), SHU_B0_DQ1_RG_ARPI_MIDPI_8PH_DLY_B0);

    for (e8Phase_SM = DQS_8PH_DEGREE_0; e8Phase_SM < DQS_8PH_DEGREE_MAX; e8Phase_SM++)
    {
        switch (e8Phase_SM)
        {
            case DQS_8PH_DEGREE_0:
                u1DqsienPI = 16;
                u18Ph_start = 0;
                u18Ph_end = 1;
                break;
            case DQS_8PH_DEGREE_180:
                u1DqsienPI = 48;
                u18Ph_start = 0;
                u18Ph_end = 1;
                break;
            case DQS_8PH_DEGREE_45:
                u1DqsienPI = 24;
                u18Ph_start = 0;
                u18Ph_end = 64;
                break;
            default:
                mcSHOW_ERR_MSG("u18Phase_SM err!\n");
                #if __ETT__
                ASSERT(0);
                #endif
        }

        mcSHOW_DBG_MSG2("\n[Dramc8PhaseCal] SM_%d, 8PH_DLY (%d~%d), DQSIEN PI = %d, DV_8PH_DLY = %d\n", e8Phase_SM, u18Ph_start, u18Ph_end, u1DqsienPI, u18PhDlyBackup);

        //to see 1T(H,L) or 1T(L,H) from delaycell=0 to 127
        //NOTE: Must set dual ranks for Rx path
        for (u1RankIdx = RANK_0; u1RankIdx < p->support_rank_num; u1RankIdx++)
        {
            vSetRank(p, u1RankIdx);
            vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B0_DQSIEN_DLY), u1DqsienPI, SHU_RK_B0_DQSIEN_DLY_DQSIEN_PI_B0); // for rank*_B0
        }
        vSetRank(p, backup_rank);

        for (u18Ph_dly = u18Ph_start; u18Ph_dly < u18Ph_end; u18Ph_dly++)
        {
            vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_B0_DQ1), u18Ph_dly, SHU_B0_DQ1_RG_ARPI_MIDPI_8PH_DLY_B0);
            vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_B1_DQ1), u18Ph_dly, SHU_B1_DQ1_RG_ARPI_MIDPI_8PH_DLY_B1);
            vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_CA_CMD1), u18Ph_dly, SHU_CA_CMD1_RG_ARPI_MIDPI_8PH_DLY_CA);

            memset(&JmtrInfo, 0, sizeof(JmtrInfo));

            if (isAutoK)
            {
                JmtrInfo.u2JmDlyEnd = 509; /*512 not divisible by step. It will cause HW AutoK not finish*/
                JmtrInfo.u1JmDlyStep = 1;
                eDRAMStatus |= DramcJmeterAutoK(p, &JmtrInfo);
                if (eDRAMStatus == DRAM_FAIL)
                {
                    mcSHOW_ERR_MSG("8Phase Hw AutoK is FAIL (to Default) !!!\n");
                    u18Ph_dly_final = u18PhDlyBackup; //rollback to init settings
                    goto exit;
                }
            }
            else
            {
                JmtrInfo.u2JmDlyEnd = 512;
                JmtrInfo.u1JmDlyStep = 1;
                DramcJmeterCalib(p, &JmtrInfo, FALSE);
            }

            for (u1JmtrPrintCnt = 0; u1JmtrPrintCnt < JmtrInfo.u1TransCnt; u1JmtrPrintCnt++)
            {
                if (JmtrInfo.JmtrInfo[u1JmtrPrintCnt].u1TransLevel == 1) // find the High Level
                {
                    ucdqs_dly = JmtrInfo.JmtrInfo[u1JmtrPrintCnt].u1JmDelay;

                    if (e8Phase_SM == DQS_8PH_DEGREE_0)
                    {
                        u2R0 = ucdqs_dly;
                        mcSHOW_DBG_MSG2("[8PH=%d] R0 (H) = %d\n", u18Ph_dly, u2R0);
                        break; // break ucdqs_dly for loop
                    }
                    else if (e8Phase_SM == DQS_8PH_DEGREE_180)
                    {
                        u2R180 = ucdqs_dly;
                        if (u2R180 > u2R0)
                        {
                            u2R = u2R0 + ((u2R180 - u2R0) >> 2); // u2R180 >= u2R0 for (u1R180 - u1R0)/4 for 180 degree. /2 for 90 degree
                            mcSHOW_DBG_MSG2("[8PH=%d] R = %d, R180 (H) = %d\n", u18Ph_dly, u2R, u2R180);
                            break; // break ucdqs_dly for loop
                        }
                    }
                    else if (e8Phase_SM == DQS_8PH_DEGREE_45)
                    {
                        u2P = ucdqs_dly;
                        if (u2P > u2R0) // u2P ~= DQS_8PH_DEGREE_180
                        {
                            // Absolute to find min diff
                            if (u2R > u2P)
                                s2Err_code = u2R - u2P;
                            else
                                s2Err_code = u2P - u2R;

                            if (s2Err_code == 0)
                            {
                                s2Err_code_min = s2Err_code;
                                u18Ph_dly_final = u18Ph_dly;
                                u18Ph_dly_loop_break = TRUE;
                            }
                            else if (s2Err_code < s2Err_code_min)
                            {
                                s2Err_code_min = s2Err_code;
                                u18Ph_dly_final = u18Ph_dly;
                                u1loop_cnt = 0;
                            }
                            else if (s2Err_code >= s2Err_code_min) // 8PH_DLY deviate from 45 degree edge (H)
                            {
                                // check early break for u18Ph_dly for loop
                                u1loop_cnt++;
                                if (u1loop_cnt > u1early_break_cnt)
                                    u18Ph_dly_loop_break = TRUE;
                            }

                            mcSHOW_DBG_MSG2("[8PH=%d] diff (P-R) = %d, eb_cnt = %d, P = %d\n", u18Ph_dly, s2Err_code, u1loop_cnt, u2P);

                            break; // if (s2Err_code == s2Err_code_min) for next u18Ph_dly
                        }
                    }
                }
            }

            if (JmtrInfo.u1TransCnt == u1JmtrPrintCnt) // Error handing when not find transaction
            {
                u18Ph_dly_final = u18PhDlyBackup; //rollback to init settings
                eDRAMStatus = DRAM_FAIL;
                mcSHOW_ERR_MSG("\n[Dramc8PhaseCal] 8Phase SM_%d is fail (to Default) !!!\n", e8Phase_SM);
                goto exit;
            }

            if (u18Ph_dly_loop_break == TRUE) // early break for for (u18Ph_dly = u18Ph_start...
                break;
        }
    }

exit:
#if SUPPORT_SAVE_TIME_FOR_CALIBRATION
    if(p->femmc_Ready == 0)
    {
        p->pSavetimeData->u18Ph_dly = u18Ph_dly_final;
    }
#endif
    mcSHOW_DBG_MSG("\n[Dramc8PhaseCal] Final 8PH_DLY = %d\n\n", u18Ph_dly_final);
    //mcDUMP_REG_MSG("\n[Dramc8PhaseCal] Final 8PH_DLY = %d\n\n", u18Ph_dly_final);

    vIO32WriteFldAlign_All(DDRPHY_REG_SHU_B0_DQ1, u18Ph_dly_final, SHU_B0_DQ1_RG_ARPI_MIDPI_8PH_DLY_B0);
    vIO32WriteFldAlign_All(DDRPHY_REG_SHU_B1_DQ1, u18Ph_dly_final, SHU_B1_DQ1_RG_ARPI_MIDPI_8PH_DLY_B1);
    vIO32WriteFldAlign_All(DDRPHY_REG_SHU_CA_CMD1, u18Ph_dly_final, SHU_CA_CMD1_RG_ARPI_MIDPI_8PH_DLY_CA);

#if SUPPORT_SAVE_TIME_FOR_CALIBRATION
    if(p->femmc_Ready == 0)
#endif
    {
        //restore to orignal value
        DramcRestoreRegisters(p, u4RegBackupAddress, sizeof(u4RegBackupAddress) / sizeof(U32), TO_ONE_CHANNEL);
    }
    DramcBroadcastOnOff(u4backup_broadcast);

    return eDRAMStatus;
}
#endif

#if DDR_ENABLE_SW_IMPED_CAL
/* Impedance have a total of 19 steps, but the HW value mapping to hardware is 0~15, 29~31
* This function adjusts passed value u1ImpVal by adjust step count "u1AdjStepCnt"
* After adjustment, if value is 1. Too large (val > 31) -> set to max 31
*                               2. Too small (val < 0) -> set to min 0
*                               3. Value is between 15 & 29, adjust accordingly ( 15 < value < 29 )
* returns: Impedance value after adjustment
*/
#if SUPPORT_HYNIX_RX_DQS_WEAK_PULL || APPLY_SIGNAL_WAVEFORM_SETTINGS_ADJUST
static U32 SwImpedanceAdjust(U32 u4ImpVal, S8 s1StepCnt)
{
    S32 S4ImpedanceTemp = (S32)u4ImpVal;

   // Perform impedance value adjustment
    S4ImpedanceTemp += s1StepCnt;
    /* After adjustment, if value is 1. Too large (val > 31) -> set to max 31
     *                               2. Too small (val < 0) -> set to min 0
     *                               3. Value is between 15 & 29, adjust accordingly ( 15 < value < 29 )
     */
    if ((S4ImpedanceTemp > 15) && (S4ImpedanceTemp < 29)) //Value is between 15 & 29 ( 15 < value < 29)
    {
        S4ImpedanceTemp = S4ImpedanceTemp - 16 + 29;
    }

    if (S4ImpedanceTemp > 31) //Value after adjustment too large -> set to max 31
    {
        S4ImpedanceTemp = 31;
    }
    else if (S4ImpedanceTemp < 0) //Value after adjustment too small -> set to min 0
    {
        S4ImpedanceTemp = 0;
    }

    return (U32)S4ImpedanceTemp;
}
#endif

void DramcSwImpedanceSaveRegister(DRAMC_CTX_T *p)
{
    U32 backup_broadcast;

    backup_broadcast = GetDramcBroadcast();

    DramcBroadcastOnOff(DRAMC_BROADCAST_ON);
    //DQ
    vIO32WriteFldMulti(DDRPHY_REG_SHU_MISC_DRVING1,
            P_Fld(gDramcSwImpedanceResult[DRVP], SHU_MISC_DRVING1_DQDRVP2) |
            P_Fld(gDramcSwImpedanceResult[DRVN], SHU_MISC_DRVING1_DQDRVN2));
    vIO32WriteFldMulti(DDRPHY_REG_SHU_MISC_DRVING2,
            P_Fld(gDramcSwImpedanceResult[DRVP], SHU_MISC_DRVING2_DQDRVP1) |
            P_Fld(gDramcSwImpedanceResult[DRVN], SHU_MISC_DRVING2_DQDRVN1));
    vIO32WriteFldMulti(DDRPHY_REG_SHU_MISC_DRVING3,
            P_Fld(gDramcSwImpedanceResult[ODTP], SHU_MISC_DRVING3_DQODTP2) |
            P_Fld(gDramcSwImpedanceResult[ODTN], SHU_MISC_DRVING3_DQODTN2));
    vIO32WriteFldMulti(DDRPHY_REG_SHU_MISC_DRVING4,
            P_Fld(gDramcSwImpedanceResult[ODTP], SHU_MISC_DRVING4_DQODTP1) |
            P_Fld(gDramcSwImpedanceResult[ODTN], SHU_MISC_DRVING4_DQODTN1));
    //new registers (03/17/2021) for byte2
    vIO32WriteFldMulti(DDRPHY_REG_SHU_MISC_DRVING8,
            P_Fld(gDramcSwImpedanceResult[DRVP], SHU_MISC_DRVING8_DQDRVP3) |
            P_Fld(gDramcSwImpedanceResult[DRVN], SHU_MISC_DRVING8_DQDRVN3));
    vIO32WriteFldMulti(DDRPHY_REG_SHU_MISC_DRVING8,
            P_Fld(gDramcSwImpedanceResult[ODTP], SHU_MISC_DRVING8_DQODTP3) |
            P_Fld(gDramcSwImpedanceResult[ODTN], SHU_MISC_DRVING8_DQODTN3));

    //DQS
    #if SUPPORT_HYNIX_RX_DQS_WEAK_PULL
    if (p->vendor_id == VENDOR_HYNIX)
    {  U32 temp_value[4];
        int i;
        for(i=0; i<4; i++)
        {
            temp_value[i] = SwImpedanceAdjust(gDramcSwImpedanceResult[dq_freq_option][i], 2);
        }
        vIO32WriteFldMulti(DDRPHY_REG_SHU_MISC_DRVING1,
                P_Fld(temp_value[0], SHU_MISC_DRVING1_DQSDRVP2) |
                P_Fld(temp_value[1], SHU_MISC_DRVING1_DQSDRVN2));
        vIO32WriteFldMulti(DDRPHY_REG_SHU_MISC_DRVING1,
                P_Fld(temp_value[0], SHU_MISC_DRVING1_DQSDRVP1) |
                P_Fld(temp_value[1], SHU_MISC_DRVING1_DQSDRVN1));
        vIO32WriteFldMulti(DDRPHY_REG_SHU_MISC_DRVING3,
                P_Fld(temp_value[2], SHU_MISC_DRVING3_DQSODTP2) |
                P_Fld(temp_value[3], SHU_MISC_DRVING3_DQSODTN2));
        vIO32WriteFldMulti(DDRPHY_REG_SHU_MISC_DRVING3,
                P_Fld(temp_value[2], SHU_MISC_DRVING3_DQSODTP) |
                P_Fld(temp_value[3], SHU_MISC_DRVING3_DQSODTN));
         //new registers (03/17/2021) for byte2
        vIO32WriteFldMulti(DDRPHY_REG_SHU_MISC_DRVING7,
                P_Fld(temp_value[0], SHU_MISC_DRVING7_DQSDRVP3) |
                P_Fld(temp_value[1], SHU_MISC_DRVING7_DQSDRVN3));
        vIO32WriteFldMulti(DDRPHY_REG_SHU_MISC_DRVING7,
                P_Fld(temp_value[2], SHU_MISC_DRVING7_DQSODTP3) |
                P_Fld(temp_value[3], SHU_MISC_DRVING7_DQSODTN3));
    }
    else
    #endif
    {
        vIO32WriteFldMulti(DDRPHY_REG_SHU_MISC_DRVING1,
                P_Fld(gDramcSwImpedanceResult[DRVP], SHU_MISC_DRVING1_DQSDRVP2) |
                P_Fld(gDramcSwImpedanceResult[DRVN], SHU_MISC_DRVING1_DQSDRVN2));
        vIO32WriteFldMulti(DDRPHY_REG_SHU_MISC_DRVING1,
                P_Fld(gDramcSwImpedanceResult[DRVP], SHU_MISC_DRVING1_DQSDRVP1) |
                P_Fld(gDramcSwImpedanceResult[DRVN], SHU_MISC_DRVING1_DQSDRVN1));
        vIO32WriteFldMulti(DDRPHY_REG_SHU_MISC_DRVING3,
                P_Fld(gDramcSwImpedanceResult[ODTP], SHU_MISC_DRVING3_DQSODTP2) |
                P_Fld(gDramcSwImpedanceResult[ODTN], SHU_MISC_DRVING3_DQSODTN2));
        vIO32WriteFldMulti(DDRPHY_REG_SHU_MISC_DRVING3,
                P_Fld(gDramcSwImpedanceResult[ODTP], SHU_MISC_DRVING3_DQSODTP) |
                P_Fld(gDramcSwImpedanceResult[ODTN], SHU_MISC_DRVING3_DQSODTN));
        //new registers (03/17/2021) for byte2
        vIO32WriteFldMulti(DDRPHY_REG_SHU_MISC_DRVING7,
                P_Fld(gDramcSwImpedanceResult[DRVP], SHU_MISC_DRVING7_DQSDRVP3) |
                P_Fld(gDramcSwImpedanceResult[DRVN], SHU_MISC_DRVING7_DQSDRVN3));
        vIO32WriteFldMulti(DDRPHY_REG_SHU_MISC_DRVING7,
                P_Fld(gDramcSwImpedanceResult[ODTP], SHU_MISC_DRVING7_DQSODTP3) |
                P_Fld(gDramcSwImpedanceResult[ODTN], SHU_MISC_DRVING7_DQSODTN3));
    }

    //CMD & CLK    CKE and CS shares CMD
    vIO32WriteFldMulti(DDRPHY_REG_SHU_MISC_DRVING2,
            P_Fld(gDramcSwImpedanceResult[DRVP], SHU_MISC_DRVING2_CMDDRVP2) |
            P_Fld(gDramcSwImpedanceResult[DRVN], SHU_MISC_DRVING2_CMDDRVN2));
    vIO32WriteFldMulti(DDRPHY_REG_SHU_MISC_DRVING2,
            P_Fld(gDramcSwImpedanceResult[DRVP], SHU_MISC_DRVING2_CMDDRVP1) |
            P_Fld(gDramcSwImpedanceResult[DRVN], SHU_MISC_DRVING2_CMDDRVN1));
    vIO32WriteFldMulti(DDRPHY_REG_SHU_MISC_DRVING4,
            P_Fld(gDramcSwImpedanceResult[ODTP], SHU_MISC_DRVING4_CMDODTP2) |
            P_Fld(gDramcSwImpedanceResult[ODTN], SHU_MISC_DRVING4_CMDODTN2));
    vIO32WriteFldMulti(DDRPHY_REG_SHU_MISC_DRVING4,
            P_Fld(gDramcSwImpedanceResult[ODTP], SHU_MISC_DRVING4_CMDODTP1) |
            P_Fld(gDramcSwImpedanceResult[ODTN], SHU_MISC_DRVING4_CMDODTN1));

    DramcBroadcastOnOff(backup_broadcast);
}

//-------------------------------------------------------------------------
/** vImpCalVrefSel
 *  Set IMP_VREF_SEL for DRVP, DRVN, Run-time/Tracking
 * Definitions to make IMPCAL_VREF_SEL function more readable */
//-------------------------------------------------------------------------
#define IMPCAL_STAGE_DRVP     0
#define IMPCAL_STAGE_DRVN     1
#define IMPCAL_STAGE_ODTP     2
#define IMPCAL_STAGE_ODTN     3
#define IMPCAL_STAGE_TRACKING 4

/* DDR3 IMP_VREF_SEL */

static const U8 ImpPCDDR4VrefSel[IMP_DRV_MAX] = {0x37, 0x2b, 0x2b, 0x2b}; /* DRVP  DRVN  ODTP  ODTN */
static const U8 ImpPCDDR3VrefSel[IMP_DRV_MAX] = {0x2b, 0x2b, 0x2b, 0x2b}; /* DRVP  DRVN  ODTP  ODTN */

/* Refer to "IMPCAL Settings" document register "RG_RIMP_VREF_SEL" settings */
// @Maoauo: DRVP/ODTN for IMP tracking. But DRVN not support IMP tracking. (before La_fite)
// DRVP/DRVN/ODTN for IMP tracking after Pe_trus
static void vImpCalVrefSel(DRAMC_CTX_T *p, U8 u1ImpCalStage)
{
    const U8 *ImpVrefSel = ImpPCDDR4VrefSel;
    U8 u1RegTmpValue = 0;
    U32 u4DrvFld = 0;

    if (is_ddr3_family(p))
       ImpVrefSel = ImpPCDDR3VrefSel;

    u4DrvFld = SHU_CA_CMD12_RG_RIMP_VREF_SEL_DRVP;//DDR3&4 using same register

    if (u1ImpCalStage == IMPCAL_STAGE_TRACKING) {
        u1RegTmpValue = ImpVrefSel[DRVP]; /* Tracking DRVP only */
    } else {
        u1RegTmpValue = ImpVrefSel[u1ImpCalStage];
    }

    // dbg msg after vref_sel selection
    mcSHOW_DBG_MSG3("[vImpCalVrefSel] IMP_VREF_SEL 0x%x, IMPCAL stage:%u\n",
                      u1RegTmpValue, u1ImpCalStage);

    vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_CA_CMD12), u1RegTmpValue, u4DrvFld);

    return;
}

static U32 DramcSwImpCalResult(DRAMC_CTX_T *p, char *drvType, U32 u4Fld)
{
    U32 u4ImpxDrv = 0, u4ImpCalResult = 0;
    U32 u4CheckImpChange = (u4Fld == SHU_MISC_IMPCAL1_IMPDRVP)? 1: 0;

    for (u4ImpxDrv = 0; u4ImpxDrv < 32; u4ImpxDrv++)
    {
#if (fcFOR_CHIP_ID == fcGriffin)
        if (u4ImpxDrv == 16) //0~15, 29~31
            u4ImpxDrv = 29;
#endif

        vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_MISC_IMPCAL1), u4ImpxDrv, u4Fld);
        mcDELAY_US(1);
        u4ImpCalResult = u4IO32ReadFldAlign((DDRPHY_REG_MISC_PHY_RGS_CMD), MISC_PHY_RGS_CMD_RGS_RIMPCALOUT);
        mcSHOW_DBG_MSG2("OCD %s=%d ,CALOUT=%d\n", drvType, u4ImpxDrv, u4ImpCalResult);

        if (u4ImpCalResult == u4CheckImpChange)//first found
        {
            mcSHOW_DBG_MSG2("\nOCD %s calibration OK! %s=%d\n\n", drvType, drvType, u4ImpxDrv);
            break;
        }
    }

    if (u4ImpxDrv == 32 || u4ImpxDrv == 0) // Can't find SwImp drv results
    {
        mcSHOW_DBG_MSG2("\nOCD %s calibration FAIL! %s=%d\n\n", drvType, drvType, u4ImpxDrv);
    }

    return u4ImpxDrv;
}

static U32 DramcSwImpedance_ODTP_Calculation(DRAMC_CTX_T *p, U32 u4DRVP_Result)
{
    U32 u4ODTP_Result = 0;
    U8 offset;

	if (is_ddr3_family(p) && !p->odt_onoff)
		return 0;

    offset = (is_ddr4_family(p)) ? 2 : 6;

    u4DRVP_Result = ((u4DRVP_Result + 2) * 66 + 99) / 100;

    if (u4DRVP_Result >= offset)
        u4DRVP_Result -= offset;
    else
        u4DRVP_Result = 0;

    u4ODTP_Result = u4DRVP_Result;

    return u4ODTP_Result;
}

/*
DramcSwImpedanceCal_Init()
===============================================================================
Author & date:
    -> Hongbo Li (MTK13176) -> 03/17/2021
Functionality:
    -> Register initial setting for impedance calibration flow.
Reason for adding:
    -> Simplify DramcSwImpedanceCal() flow.
Called by:
    -> only by DramcSwImpedanceCal()
Misc:
    -> requires MISC_IMP_CTRL1_RG_RIMP_DDR3_SEL set to 0x1 for both DDR4
       and DDR3 flow (MISC_IMP_CTRL1_RG_RIMP_DDR3_SEL to 0x0)
    -> CMD12_RG_RIMP_REV ...... <0> => 1 ; <1> => 0
    -> Verified with Designer
*/
static void DramcSwImpedanceCal_Init(DRAMC_CTX_T *p)
{
     vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_CA_CMD12), 1, SHU_CA_CMD12_RG_RIMP_REV);
    //RG_IMPCAL_VREF_SEL (now set in vImpCalVrefSel())
    //RG_IMPCAL_LP3_EN=0, RG_IMPCAL_LP4_EN=1
    vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_MISC_IMP_CTRL1), 0, MISC_IMP_CTRL1_RG_RIMP_PRE_EN);
    vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_MISC_IMPCAL), P_Fld(0, MISC_IMPCAL_IMPCAL_CALI_ENN) | P_Fld(1, MISC_IMPCAL_IMPCAL_IMPPDP) |
                        P_Fld(1, MISC_IMPCAL_IMPCAL_IMPPDN));    //RG_RIMP_BIAS_EN and RG_RIMP_VREF_EN move to IMPPDP and IMPPDN

    /*PHYPLL Test monitor OD enable & test mode monitor mode enable*/
    vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_PHYPLL1), P_Fld(0, PHYPLL1_RG_RPHYPLL_TSTOD_EN) | P_Fld(0, PHYPLL1_RG_RPHYPLL_TSTCK_EN));

    vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_MISC_IMP_CTRL1), P_Fld(1, MISC_IMP_CTRL1_RG_IMP_EN) |
                        P_Fld(1, MISC_IMP_CTRL1_RG_RIMP_DDR3_SEL) |
                        P_Fld(1, MISC_IMP_CTRL1_RG_RIMP_VREF_EN) |
                        P_Fld(0, MISC_IMP_CTRL1_RG_RIMP_DDR4_SEL)); // as defined DDR4 keep low this should set 0, waitting for verication
    mcDELAY_US(1);

    vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_MISC_IMPCAL), 1, MISC_IMPCAL_IMPCAL_CALI_EN);
    vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_SHU_MISC_IMPCAL1), P_Fld(0, SHU_MISC_IMPCAL1_IMPDRVN) | P_Fld(0, SHU_MISC_IMPCAL1_IMPDRVP));
}

DRAM_STATUS_T DramcSwImpedanceCal(DRAMC_CTX_T *p)
{
    U32 u4DRVP_Result = 0xff, u4DRVN_Result = 0xff;
    //U32 u4BaklReg_DDRPHY_MISC_IMP_CTRL0, u4BaklReg_DDRPHY_MISC_IMP_CTRL1;
//    U32 u4BaklReg_DRAMC_REG_IMPCAL;
    U8 backup_channel;
    U32 backup_broadcast;
    U8 u1DrvType = 0, u1CALI_ENP = 0, u1CALI_ENN = 0;// u1Drv05 = 0; //u1DDR4 = 1 for non lp5, no need for this variable
    U32 u4SwImpCalResult = 0, u4DrvFld = 0;
    char *drvStr = "NULL";
    U32 backup_IMP_CAL;

    backup_broadcast = GetDramcBroadcast();
    DramcBroadcastOnOff(DRAMC_BROADCAST_OFF);

    //default set FAIL
    vSetCalibrationResult(p, DRAM_CALIBRATION_SW_IMPEDANCE, DRAM_FAIL);

    //RG dump Log
    mcDUMP_REG_MSG("\n[dumpRG] DramcSwImpedenceCal\n");
#if VENDER_JV_LOG
    vPrintCalibrationBasicInfo_ForJV(p);
#else
    vPrintCalibrationBasicInfo(p);
#endif
    //Suspend: DA_RIMP_DMSUS=1
    vIO32WriteFldMulti_All(DDRPHY_REG_MISC_LP_CTRL, P_Fld(0x0, MISC_LP_CTRL_RG_ARDMSUS_10) |
                        P_Fld(0x0, MISC_LP_CTRL_RG_ARDMSUS_10_LP_SEL) |
                        P_Fld(0x0, MISC_LP_CTRL_RG_RIMP_DMSUS_10) |
                        P_Fld(0x0, MISC_LP_CTRL_RG_RIMP_DMSUS_10_LP_SEL));

    //Disable IMP HW Tracking
    //Hw Imp tracking disable for all channels Because SwImpCal will be K again when resume from DDR reserved mode
    vIO32WriteFldAlign_All(DDRPHY_REG_MISC_IMPCAL, 0, MISC_IMPCAL_IMPCAL_HW);

    backup_channel = p->channel;
    vSetPHY2ChannelMapping(p, CHANNEL_A);
    backup_IMP_CAL = u4IO32Read4B(DRAMC_REG_ADDR(DDRPHY_REG_MISC_IMPCAL));
 
    DramcSwImpedanceCal_Init(p);


    //PCDDR34: ODTP/DRVP/DRVN calibration start
    for (u1DrvType = DRVP; u1DrvType < IMP_DRV_MAX; u1DrvType++) // Calibration sequence for ODTP, DRVP and DRVN
    {
        if (u1DrvType == ODTN || u1DrvType == ODTP) // no use, skip ODTN & ODTP
            continue;

        /* Set IMP_VREF_SEL value for DRVP/DRVN and ODTN */
        vImpCalVrefSel(p, u1DrvType);

        switch (u1DrvType)
        {
            case DRVP:
                drvStr = "DRVP";
                u1CALI_ENP = 0x1;
                u1CALI_ENN = 0x0;
                u4DrvFld = SHU_MISC_IMPCAL1_IMPDRVP;
                u4DRVP_Result = 0;
                break;
            case DRVN:
                drvStr = "DRVN";
                u1CALI_ENP = 0x0;
                u1CALI_ENN = 0x0; // 0x1 change to ODTN path
                u4DrvFld = SHU_MISC_IMPCAL1_IMPDRVN;
                break;
            default:
                mcSHOW_ERR_MSG("[DramcSwImpedanceCal] Warnning: Need confirm u1DrvType for SW IMP Calibration !!!\n");
                break;
        }

        // @A60868 for DRVn/p and ODTn select
        vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_MISC_IMPCAL), u1CALI_ENP, MISC_IMPCAL_IMPCAL_CALI_ENP);  //MISC_IMP_CTRL1_RG_IMP_OCD_PUCMP_EN move to CALI_ENP
        vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_MISC_IMPCAL), u1CALI_ENN, MISC_IMPCAL_IMPCAL_CALI_ENN);  //MISC_IMP_CTRL1_RG_RIMP_ODT_EN move to CALI_ENN
        //DRVP=DRVP_FINAL
        vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_MISC_IMPCAL1), u4DRVP_Result, SHU_MISC_IMPCAL1_IMPDRVP);  //PUCMP_EN move to CALI_ENP
        //RIMP_DRV05 for LP4/5
        vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_CA_CMD12), 1, SHU_CA_CMD12_RG_RIMP_DRV05);
        //If RGS_TX_OCD_IMPCALOUTX=1
        //RG_IMPX_DRVN++;
        //Else save RG_IMPX_DRVN value and assign to DRVN
        u4SwImpCalResult = DramcSwImpCalResult(p, drvStr, u4DrvFld);
        switch (u1DrvType)
        {
            case DRVP:
                u4DRVP_Result = u4SwImpCalResult;
                break;
            case DRVN:
                u4DRVN_Result = u4SwImpCalResult;
                break;
            default:
                mcSHOW_ERR_MSG("[DramcSwImpedanceCal] Warnning: Need confirm u4SwImpCalResult for SW IMP Calibration !!!\n");
                break;
        }
    }
    //Register Restore
    vIO32Write4B(DRAMC_REG_ADDR(DDRPHY_REG_MISC_IMPCAL), backup_IMP_CAL);

    mcSHOW_DBG_MSG("[SwImpedanceCal] DRVP=%d, DRVN=%d\n", u4DRVP_Result, u4DRVN_Result);
#if __SLT__
    mcSHOW_PARSER_MSG(("Final Impdance Cal Result: DRVP = %d, DRVN = %d\n", u4DRVP_Result, u4DRVN_Result));
#endif
    #if 0//HYNIX_IMPX_ADJUST
    if (u1Para)
    {
        u4ODTP_Result = ImpedanceAdjustment_Hynix(u4ODTP_Result, u1Para);
    }
    #endif

    gDramcSwImpedanceResult[DRVP] = u4DRVP_Result;
    gDramcSwImpedanceResult[DRVN] = u4DRVN_Result;
    gDramcSwImpedanceResult[ODTP] = DramcSwImpedance_ODTP_Calculation(p, u4DRVP_Result);//do calculation on ODTP value
    gDramcSwImpedanceResult[ODTN] = 0;

#if RUNTIME_SHMOO_RELEATED_FUNCTION && SUPPORT_SAVE_TIME_FOR_CALIBRATION
    {
        U8 u1drv;
        {
            for (u1drv = 0; u1drv < 4; u1drv++)
            {
                if (p->femmc_Ready == 0)
                    p->pSavetimeData->u1SwImpedanceResule[u1drv] = gDramcSwImpedanceResult[u1drv];
                else
                {
                    gDramcSwImpedanceResult[u1drv] = p->pSavetimeData->u1SwImpedanceResule[u1drv];
                    vSetCalibrationResult(p, DRAM_CALIBRATION_SW_IMPEDANCE, DRAM_FAST_K);
                }
            }
        }
    }
#endif

    mcSHOW_DBG_MSG("Reg: DRVP=%d, DRVN=%d, ODTP=%d\n", gDramcSwImpedanceResult[DRVP],
                                    gDramcSwImpedanceResult[DRVN], gDramcSwImpedanceResult[ODTP]);
    mcDUMP_REG_MSG("Reg: DRVP=%d, DRVN=%d, ODTP=%d\n", gDramcSwImpedanceResult[DRVP],
                                    gDramcSwImpedanceResult[DRVN], gDramcSwImpedanceResult[ODTP]);

#if APPLY_SIGNAL_WAVEFORM_SETTINGS_ADJUST
    if(is_ddr4_family(p)||is_ddr3_family(p))
    {
            gDramcSwImpedanceResult[DRVP] = SwImpedanceAdjust(gDramcSwImpedanceResult[DRVP], gDramcSwImpedanceAdjust[DRVP]);
            gDramcSwImpedanceResult[ODTP] = SwImpedanceAdjust(gDramcSwImpedanceResult[ODTP], gDramcSwImpedanceAdjust[ODTP]);
    }
    else
    {
        mcSHOW_ERR_MSG("fetal error, NO such DRAM type. Error log location: %s", __func__);
    }
    mcSHOW_DBG_MSG("Reg: DRVP=%d, DRVN=%d, ODTN=%d (After Adjust)\n", gDramcSwImpedanceResult[DRVP],
                                        gDramcSwImpedanceResult[DRVN], gDramcSwImpedanceResult[ODTP]);
#endif

#if __FLASH_TOOL_DA__
    if((gDramcSwImpedanceResult[ODTP] ==0)||(gDramcSwImpedanceResult[ODTP] >=31))
    {
        mcSHOW_DBG_MSG("[WARNING] freq_region = %d, ODTP = %d ==> unexpect value\n", freq_region, gDramcSwImpedanceResult[ODTP]);
        PINInfo_flashtool.IMP_ERR_FLAG |= (0x1<<(freq_region+ODTP));
    }
    else if((gDramcSwImpedanceResult[DRVP] ==0)||(gDramcSwImpedanceResult[DRVP] >=31))
    {
        mcSHOW_DBG_MSG("[WARNING] freq_region = %d, DRVP = %d ==> unexpect value\n", freq_region, gDramcSwImpedanceResult[DRVP]);
        PINInfo_flashtool.IMP_ERR_FLAG |= (0x1<<(freq_region+DRVP));
    }
    else if((gDramcSwImpedanceResult[DRVN] ==0)||(gDramcSwImpedanceResult[DRVN] >=31))
    {
        mcSHOW_DBG_MSG("[WARNING] freq_region = %d, DRVN = %d ==> unexpect value\n", freq_region, gDramcSwImpedanceResult[DRVN]);
        PINInfo_flashtool.IMP_ERR_FLAG |= (0x1<<(freq_region+DRVN));
    }
    else
#endif
    {
        vSetCalibrationResult(p, DRAM_CALIBRATION_SW_IMPEDANCE, DRAM_OK);
    }

    if (is_ddr4_family(p))
        vImpCalVrefSel(p, IMPCAL_STAGE_TRACKING);

    mcSHOW_DBG_MSG3("[DramcSwImpedanceCal] Done\n\n");

    vSetPHY2ChannelMapping(p, backup_channel);
    DramcBroadcastOnOff(backup_broadcast);

    return DRAM_OK;
}
#endif //DDR_ENABLE_SW_IMPED_CAL

#if (TX_K_DQM_MODE == 1) && (ENABLE_WRITE_DBI || DDR_TX_K_DQM_WITH_WDBI)
void DramcWriteShiftMCKForWriteDBI(DRAMC_CTX_T *p, S8 iShiftMCK)
{
    U8 ucDataRateDivShift = 0;
    S8 s1UIMove = 0;

    ucDataRateDivShift = u1MCK2UI_DivShift(p);
    s1UIMove = iShiftMCK * (S8)(1 << ucDataRateDivShift);
    ShiftDQUI(p, s1UIMove, ALL_BYTES);
}
#endif

#if DDR_ENABLE_DUTY_CAL
#define DutyPrintAllLog         0
#define DutyPrintCalibrationLog 1

#define DUTY_OFFSET_START -8
#define DUTY_OFFSET_END 8

#define CLOCK_PI_START 0
#define CLOCK_PI_END 63

#if FOR_DV_SIMULATION_USED
#define CLOCK_PI_STEP 16
#else
#define CLOCK_PI_STEP 2
#endif

#define ClockDutyFailLowerBound 4500    // 45%
#define ClockDutyFailUpperBound 5500    // 55%
#define ClockDutyMiddleBound    5000    // 50%

void DramcClockDutySetClkDelayCell(DRAMC_CTX_T *p, unsigned char u1RankIdx, signed char scDutyDelay, unsigned char use_rev_bit)
{
    unsigned int save_offset;
    unsigned char ucDelay, ucDelayB;
    unsigned char ucRev_Bit0=0, ucRev_Bit1=0;

//    mcSHOW_DBG_MSG2(("CH%d, Final CLK duty delay cell = %d\n", p->channel, scDutyDelay));

	if (scDutyDelay < 0)
	{
        ucDelay = -scDutyDelay;
        ucDelayB = 0;

        if (use_rev_bit)
        {
            ucRev_Bit0 = 1;
            ucRev_Bit1 = 0;
        }
	}
	else if (scDutyDelay > 0)
	{
        ucDelay = 0;
        ucDelayB= scDutyDelay;

        if (use_rev_bit)
        {
            ucRev_Bit0 = 0;
            ucRev_Bit1 = 1;
        }
	}
	else
	{
        ucDelay = 0;
        ucDelayB= 0;

        if (use_rev_bit)
        {
            ucRev_Bit0 = 0;
            ucRev_Bit1 = 0;
	}
}

#if 0//cc mark for Cheetah DUTY_SCAN_V2_ONLY_K_HIGHEST_FREQ
    for(u1ShuffleIdx = 0; u1ShuffleIdx<DRAM_DFS_SHUFFLE_MAX; u1ShuffleIdx++)
#endif
{
        //save_offset = u1ShuffleIdx * SHU_GRP_DDRPHY_OFFSET + u1RankIdx*0x100;
        save_offset = 0;
        vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_CA_TXDLY3) + save_offset, P_Fld(ucDelay, SHU_RK_CA_TXDLY3_TX_ARCLK_DLY) | P_Fld(ucDelay, SHU_RK_CA_TXDLY3_TX_ARCLKB_DLY));
        vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_CA_TXDLY4) + save_offset, P_Fld(ucDelayB, SHU_RK_CA_TXDLY4_TX_ARCLK_DLYB) | P_Fld(ucDelayB, SHU_RK_CA_TXDLY4_TX_ARCLKB_DLYB));

        //cc replace save_offset = u1ShuffleIdx * SHU_GRP_DDRPHY_OFFSET;
        //cc replace  vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_SHU_CA_CMD3), P_Fld(ucRev_Bit0, SHU_CA_CMD3_RG_TX_ARCMD_PU_BIT0) | P_Fld(ucRev_Bit1, SHU_CA_CMD3_RG_TX_ARCMD_PU_BIT1));
    }

    vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_CA_MCK4X_EN),
        P_Fld(ucRev_Bit0, CA_MCK4X_EN_RG_TX_ARCLK_MCK4X_DLY_EN) |
        P_Fld(ucRev_Bit1, CA_MCK4X_EN_RG_TX_ARCLK_MCK4XB_DLY_EN));

}

void DQSDutyScan_SetDqsDelayCell(DRAMC_CTX_T *p, signed char *scDutyDelay, unsigned char use_rev_bit)
{
    unsigned char u1DQSIdx, u1RankIdx = 0;
    unsigned char u1Delay[2], u1DelayB[2];
    unsigned char ucRev_Bit0[2]={0,0}, ucRev_Bit1[2]={0,0};

//    mcSHOW_DBG_MSG2(("CH%d, Final DQS0 duty delay cell = %d\n", p->channel, scDutyDelay[0]));
//    mcSHOW_DBG_MSG2(("CH%d, Final DQS1 duty delay cell = %d\n", p->channel, scDutyDelay[1]));

    for(u1DQSIdx=0; u1DQSIdx<2; u1DQSIdx++)
    {
        if(scDutyDelay[u1DQSIdx] <0)
        {
            u1Delay[u1DQSIdx]  = -(scDutyDelay[u1DQSIdx]);
            u1DelayB[u1DQSIdx]  =0;

            if (use_rev_bit)
            {
                ucRev_Bit0[u1DQSIdx] = 1;
                ucRev_Bit1[u1DQSIdx] = 0;
    }
}
        else if(scDutyDelay[u1DQSIdx] >0)
{
            u1Delay[u1DQSIdx]  = 0;
            u1DelayB[u1DQSIdx]  = scDutyDelay[u1DQSIdx];

            if (use_rev_bit)
    {
                ucRev_Bit0[u1DQSIdx] = 0;
                ucRev_Bit1[u1DQSIdx] = 1;
    }
}
        else
        {
            u1Delay[u1DQSIdx]  = 0;
            u1DelayB[u1DQSIdx]  =0;

            if (use_rev_bit)
{
                ucRev_Bit0[u1DQSIdx] = 0;
                ucRev_Bit1[u1DQSIdx] = 0;
            }
        }
}

#if 0//DUTY_SCAN_V2_ONLY_K_HIGHEST_FREQ
    for(u1ShuffleIdx = 0; u1ShuffleIdx<DRAM_DFS_SHUFFLE_MAX; u1ShuffleIdx++)
#endif
{
        for(u1RankIdx = 0; u1RankIdx < p->support_rank_num; u1RankIdx++)
    {
        #if fcFOR_CHIP_ID == fcGriffin
            U8 tmp;
            tmp = u1Delay[0];
            u1Delay[0] = u1Delay[1];
            u1Delay[1] = tmp;

            tmp = u1DelayB[0];
            u1DelayB[0] = u1DelayB[1];
            u1DelayB[1] = tmp;
        #endif
            vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B0_TXDLY2),
                P_Fld(u1Delay[0], SHU_RK_B0_TXDLY2_TX_ARDQS0_DLY_B0) |
                P_Fld(u1Delay[0], SHU_RK_B0_TXDLY2_TX_ARDQS0B_DLY_B0));
            vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B0_TXDLY2),
                P_Fld(u1DelayB[0], SHU_RK_B0_TXDLY2_TX_ARDQS0_DLYB_B0) |
                P_Fld(u1DelayB[0], SHU_RK_B0_TXDLY2_TX_ARDQS0B_DLYB_B0));

            vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B1_TXDLY2),
                P_Fld(u1Delay[1], SHU_RK_B1_TXDLY2_TX_ARDQS0_DLY_B1) |
                P_Fld(u1Delay[1], SHU_RK_B1_TXDLY2_TX_ARDQS0B_DLY_B1));
            vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B1_TXDLY2),
                P_Fld(u1DelayB[1], SHU_RK_B1_TXDLY2_TX_ARDQS0_DLYB_B1) |
                P_Fld(u1DelayB[1], SHU_RK_B1_TXDLY2_TX_ARDQS0B_DLYB_B1));
    }
}

    //cc add
    vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_B0_MCK4X_EN),
        P_Fld(ucRev_Bit0[0], B0_MCK4X_EN_RG_TX_ARDQS_MCK4X_DLY_EN_B0) |
        P_Fld(ucRev_Bit1[0], B0_MCK4X_EN_RG_TX_ARDQS_MCK4XB_DLY_EN_B0));
    vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_B2_MCK4X_EN),
        P_Fld(ucRev_Bit0[1], B2_MCK4X_EN_RG_TX_ARDQS_MCK4X_DLY_EN_B2) |
        P_Fld(ucRev_Bit1[1], B2_MCK4X_EN_RG_TX_ARDQS_MCK4XB_DLY_EN_B2));

}

// offset is not related to DQ/DQM/DQS
// we have a circuit to measure duty, But this circuit is not very accurate
// so we need to K offset of this circuit first
// After we got this offset, then we can use it to measure duty
// this offset can measure DQ/DQS/DQM, and every byte has this circuit, too.
// B0/B2/CA all have one circuit.
// CA's circuit can measure CLK duty
// B0/B2's can measure DQ/DQM/DQS duty
signed char DutyScan_Offset_Convert(unsigned char val)
{
    unsigned char calibration_sequence[15]={0xf, 0xe, 0xd, 0xc, 0xb, 0xa, 0x9, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7};

    return ((signed char)(calibration_sequence[val]>8 ? 0-(calibration_sequence[val]&0x7) : calibration_sequence[val]));

}

void DutyScan_Offset_Calibration(DRAMC_CTX_T *p)
{
    unsigned char calibration_sequence[15]={0xf, 0xe, 0xd, 0xc, 0xb, 0xa, 0x9, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7};
    unsigned char i, read_val_b0, read_val_b1, read_val_ca;
    unsigned char cal_i_b0=0xff, cal_i_b1=0xff, cal_i_ca=0xff;

#if VENDER_JV_LOG
    vPrintCalibrationBasicInfo_ForJV(p);
#else
    vPrintCalibrationBasicInfo(p);
#endif


#if DutyPrintCalibrationLog
    mcSHOW_DBG_MSG(("[Duty_Offset_Calibration]\n\n"));
#endif

        vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_CA_CMD6),
            P_Fld(1, CA_CMD6_RG_TX_ARCMD_DATA_SWAP_EN) |
            P_Fld(1, CA_CMD6_RG_RX_ARCMD_LPBK_EN) |
            P_Fld(0, CA_CMD6_RG_TX_ARCMD_DDR4_SEL) |
            P_Fld(1, CA_CMD6_RG_TX_ARCMD_DDR3_SEL) |
            P_Fld(2, CA_CMD6_RG_TX_ARCMD_DATA_SWAP));
		vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_SHU_CA_CMD5),
			P_Fld(0, SHU_CA_CMD5_RG_RX_ARCMD_VREF_BYPASS) |
			P_Fld(0xB, SHU_CA_CMD5_RG_RX_ARCMD_VREF_SEL));
		vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_CA_PHY3),
			P_Fld(1, CA_PHY3_RG_RX_ARCA_DUTY_VCAL_0P5EN_CA));
		vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_CA_CMD0),
			P_Fld(0, SHU_RK_CA_CMD0_RG_RX_ARCA2_OFFC));
		vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_CA_CMD5),
			P_Fld(1, CA_CMD5_RG_RX_ARCMD_VREF_EN) |
			P_Fld(1, CA_CMD5_RG_RX_ARCMD_EYE_VREF_EN));

		vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_B0_DQ6),
			P_Fld(1, B0_DQ6_RG_RX_ARDQ_LPBK_EN_B0) |
            P_Fld(0, B0_DQ6_RG_TX_ARDQ_DDR4_SEL_B0) |
			P_Fld(1, B0_DQ6_RG_TX_ARDQ_DDR3_SEL_B0));
        vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_B0_DQ6),
            P_Fld(1, B0_DQ6_RG_TX_ARDQ_DATA_SWAP_EN_B0) |
            P_Fld(2, B0_DQ6_RG_TX_ARDQ_DATA_SWAP_B0));
		vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_SHU_B0_DQ5),
			P_Fld(0,SHU_B0_DQ5_RG_RX_ARDQ_VREF_BYPASS_B0) |
			P_Fld(0xB,SHU_B0_DQ5_RG_RX_ARDQ_VREF_SEL_B0));
		vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_B0_PHY3),
			P_Fld(1,B0_PHY3_RG_RX_ARDQ_DUTY_VCAL_0P5EN_B0));
		vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B0_DQ0),
			P_Fld(0,SHU_RK_B0_DQ0_RG_RX_ARDQ2_OFFC_B0));
		vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_B0_DQ5),
			P_Fld(1,B0_DQ5_RG_RX_ARDQ_VREF_EN_B0) |
			P_Fld(1,B0_DQ5_RG_RX_ARDQ_EYE_VREF_EN_B0));

		vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_B1_DQ6),
			P_Fld(1, B1_DQ6_RG_RX_ARDQ_LPBK_EN_B1) |
            P_Fld(0, B1_DQ6_RG_TX_ARDQ_DDR4_SEL_B1) |
			P_Fld(1, B1_DQ6_RG_TX_ARDQ_DDR3_SEL_B1));
        vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_B1_DQ6),
            P_Fld(1, B1_DQ6_RG_TX_ARDQ_DATA_SWAP_EN_B1) |
            P_Fld(2, B1_DQ6_RG_TX_ARDQ_DATA_SWAP_B1));
		vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_SHU_B1_DQ5),
			P_Fld(0,SHU_B1_DQ5_RG_RX_ARDQ_VREF_BYPASS_B1) |
			P_Fld(0xB,SHU_B1_DQ5_RG_RX_ARDQ_VREF_SEL_B1));
		vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_B1_PHY3),
			P_Fld(1,B1_PHY3_RG_RX_ARDQ_DUTY_VCAL_0P5EN_B1));
		vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B1_DQ0),
			P_Fld(0,SHU_RK_B1_DQ0_RG_RX_ARDQ2_OFFC_B1));
		vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_B1_DQ5),
			P_Fld(1,B1_DQ5_RG_RX_ARDQ_VREF_EN_B1) |
			P_Fld(1,B1_DQ5_RG_RX_ARDQ_EYE_VREF_EN_B1));

        //vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_SHU_CA_DLL_ARPI2),
        //    P_Fld(0x0, SHU_CA_DLL_ARPI2_RG_ARPI_MPDIV_CG_CA));

    mcDELAY_US(1);
	vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_CA_PHY3),
		P_Fld(1, CA_PHY3_RG_RX_ARCA_DUTY_VCAL_EN_CA));
	vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_B0_PHY3),
		P_Fld(1, B0_PHY3_RG_RX_ARDQ_DUTY_VCAL_EN_B0));
	vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_B1_PHY3),
		P_Fld(1, B1_PHY3_RG_RX_ARDQ_DUTY_VCAL_EN_B1));

    vIO32WriteFldAlign((DDRPHY_REG_SHU_CA_CMD11), 0x1, SHU_CA_CMD11_RG_RX_ARCA_OFFSETC_EN_CA);
    vIO32WriteFldAlign((DDRPHY_REG_SHU_B0_DQ11), 0x1, SHU_B0_DQ11_RG_RX_ARDQ_OFFSETC_EN_B0);
    vIO32WriteFldAlign((DDRPHY_REG_SHU_B1_DQ11), 0x1, SHU_B1_DQ11_RG_RX_ARDQ_OFFSETC_EN_B1);

#if DutyPrintCalibrationLog
    mcSHOW_DBG_MSG("\tB0\tB1\tCA\n");
    mcSHOW_DBG_MSG("===========================\n");
#endif

    for(i=0; i<15; i++)
    {
		vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B0_DQ0),calibration_sequence[i],SHU_RK_B0_DQ0_RG_RX_ARDQ2_OFFC_B0);
		vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B1_DQ0),calibration_sequence[i],SHU_RK_B1_DQ0_RG_RX_ARDQ2_OFFC_B1);
		vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_CA_CMD0),calibration_sequence[i],SHU_RK_CA_CMD0_RG_RX_ARCA2_OFFC);

        mcDELAY_US(1);
		read_val_b0 = u4IO32ReadFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_MISC_AD_RX_DQ_O1),MISC_AD_RX_DQ_O1_AD_RX_ARDQ_O1_B0_BIT2);
		read_val_ca = u4IO32ReadFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_MISC_AD_RX_CMD_O1),MISC_AD_RX_CMD_O1_AD_RX_ARCA2_O1);
        channel_backup_and_set(p, CHANNEL_B);
		read_val_b1 = u4IO32ReadFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_MISC_AD_RX_DQ_O1), MISC_AD_RX_DQ_O1_AD_RX_ARDQ_O1_B0_BIT2);
        channel_restore(p);
#if DutyPrintCalibrationLog
        mcSHOW_DBG_MSG("%d\t%d\t%d\t%d\n",
            DutyScan_Offset_Convert(i), read_val_b0, read_val_b1, read_val_ca);
#endif

        if (read_val_b0 == 0 && cal_i_b0==0xff)
        {
            cal_i_b0 = i;
        }

        if (read_val_b1 == 0 && cal_i_b1==0xff)
        {
            cal_i_b1 = i;
        }

        if (read_val_ca == 0 && cal_i_ca==0xff)
        {
            cal_i_ca = i;
        }
    }

    if (cal_i_b0==0 || cal_i_b1==0 || cal_i_ca==0)
    {
#if DutyPrintCalibrationLog
        mcSHOW_DBG_MSG("offset calibration i=-7 and AD_RX_*RDQ_O1_B*<2>/AD_RX_*RCA2_O1 ==0 !!\n");
#endif
#if __ETT__
        while(1);
#endif
    }
    else if ((read_val_b0==1 && cal_i_b0==0xff) || (read_val_b1==1 && cal_i_b1==0xff) || (read_val_ca==1 && cal_i_ca==0xff))
    {
#if DutyPrintCalibrationLog
        mcSHOW_DBG_MSG("offset calibration i=7 and AD_RX_*RDQ_O1_B*<2>/AD_RX_*RCA2_O1 ==1 !!\n");
#endif
#if __ETT__
        while(1);
#endif

    }
    else
    {
#if DutyPrintCalibrationLog
        mcSHOW_DBG_MSG("===========================\n");
        mcSHOW_DBG_MSG("\tB0:%d\tB1:%d\tCA:%d\n",DutyScan_Offset_Convert(cal_i_b0),DutyScan_Offset_Convert(cal_i_b1),DutyScan_Offset_Convert(cal_i_ca));
#endif
    }

    //cc replace  vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_SHU_B0_DQ0), 0, SHU_B0_DQ0_RG_TX_ARDQS0_DRVP_PRE_B0_BIT0);
    //cc replace  vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_SHU_B2_DQ0), 0, SHU_B2_DQ0_RG_TX_ARDQS0_DRVP_PRE_B2_BIT0);
    //cc replace  vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_SHU_CA_CMD0), 0, SHU_CA_CMD0_RG_TX_ARCLK_DRVP_PRE_BIT0);
	vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_B0_DQ11),0,SHU_B0_DQ11_RG_RX_ARDQ_OFFSETC_EN_B0);
	vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_B1_DQ11),0,SHU_B1_DQ11_RG_RX_ARDQ_OFFSETC_EN_B1);
	vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_CA_CMD11),0,SHU_CA_CMD11_RG_RX_ARCA_OFFSETC_EN_CA);

    if (cal_i_b0!=0xff) vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B0_DQ0), calibration_sequence[cal_i_b0], SHU_RK_B0_DQ0_RG_RX_ARDQ2_OFFC_B0);
    if (cal_i_b1!=0xff) vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B1_DQ0), calibration_sequence[cal_i_b1], SHU_RK_B1_DQ0_RG_RX_ARDQ2_OFFC_B1);
    if (cal_i_ca!=0xff) vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_CA_CMD0), calibration_sequence[cal_i_ca], SHU_RK_CA_CMD0_RG_RX_ARCA2_OFFC);

    return;
}

#if defined(YH_SWEEP_IC)
typedef struct _YH_SWEEP_IC_T
{
    unsigned int maxduty;
    unsigned int minduty;
    unsigned int dutydiff;
    unsigned int avgduty;
} YH_SWEEP_IC_T;

YH_SWEEP_IC_T gYH_Sweep_IC_test_result[4][CHANNEL_NUM][DQS_NUMBER];

#define YH_SWEEP_IC_PASS_CRITERIO 1 // 0: FT  1: SLT
void YH_Sweep_IC_Print_Result(DRAMC_CTX_T *p)
{
    unsigned char u1ChannelIdx, u1ByteIdx, k_type;
    unsigned char u1ByteIdxNum;

    //  SLT:
    //        CHB CLK duty max-min j5.3%: FAIL0
    //        NDQS duty max-min j5.8%: FAIL1
    //        NDQDQM maxduty j54.5% or min_duty<45.5% or max-min j5.8%: FAIL2

    mcSHOW_DBG_MSG2("\n\n YH Sweep IC Print Result =========\n");

    for(k_type=0; k_type<4; k_type++)
    {

        if (k_type == DutyScan_Calibration_K_CLK) u1ByteIdxNum = 1;
        else u1ByteIdxNum = 2;

        for(u1ChannelIdx=0; u1ChannelIdx<CHANNEL_NUM; u1ChannelIdx++)
                for(u1ByteIdx=0; u1ByteIdx<u1ByteIdxNum; u1ByteIdx++)
                {
                    if (k_type == DutyScan_Calibration_K_CLK && u1ChannelIdx == CHANNEL_B)
                    {
                        mcSHOW_DBG_MSG2("CH%d CLK max-min Duty %d%% : ",u1ChannelIdx, gYH_Sweep_IC_test_result[k_type][u1ChannelIdx][u1ByteIdx].dutydiff);
#if YH_SWEEP_IC_PASS_CRITERIO
                        if (gYH_Sweep_IC_test_result[k_type][u1ChannelIdx][u1ByteIdx].dutydiff > 530)
#else
                        if (gYH_Sweep_IC_test_result[k_type][u1ChannelIdx][u1ByteIdx].dutydiff > 450)
#endif
    {
                            mcSHOW_DBG_MSG2("FAIL0\n");
    }
    else
    {
                            mcSHOW_DBG_MSG2("PASS\n");
                        }
                    }
                    if (k_type == DutyScan_Calibration_K_DQS)
        {
                        mcSHOW_DBG_MSG2("CH%d DQS Byte %d max-min Duty %d%% : ",u1ChannelIdx, u1ByteIdx, gYH_Sweep_IC_test_result[k_type][u1ChannelIdx][u1ByteIdx].dutydiff);
#if YH_SWEEP_IC_PASS_CRITERIO
                        if (gYH_Sweep_IC_test_result[k_type][u1ChannelIdx][u1ByteIdx].dutydiff > 580)
#else
                        if (gYH_Sweep_IC_test_result[k_type][u1ChannelIdx][u1ByteIdx].dutydiff > 500)
#endif
        {
                            mcSHOW_DBG_MSG2("FAIL1\n");
            }
                        else
            {
                            mcSHOW_DBG_MSG2("PASS\n");
                        }
            }
                    if (k_type == DutyScan_Calibration_K_DQ || k_type == DutyScan_Calibration_K_DQM)
        {
                        mcSHOW_DBG_MSG2("CH%d %s Byte %d max Duty %d%%, min Duty %d%% : ",u1ChannelIdx, k_type == DutyScan_Calibration_K_DQ ? "DQ" : "DQM", u1ByteIdx, gYH_Sweep_IC_test_result[k_type][u1ChannelIdx][u1ByteIdx].maxduty, gYH_Sweep_IC_test_result[k_type][u1ChannelIdx][u1ByteIdx].minduty);
#if YH_SWEEP_IC_PASS_CRITERIO
                        if  (gYH_Sweep_IC_test_result[k_type][u1ChannelIdx][u1ByteIdx].minduty < 4550 || gYH_Sweep_IC_test_result[k_type][u1ChannelIdx][u1ByteIdx].maxduty > 5450 || gYH_Sweep_IC_test_result[k_type][u1ChannelIdx][u1ByteIdx].dutydiff > 580)
#else
                        if  (gYH_Sweep_IC_test_result[k_type][u1ChannelIdx][u1ByteIdx].minduty < 4600 || gYH_Sweep_IC_test_result[k_type][u1ChannelIdx][u1ByteIdx].maxduty > 5400 || gYH_Sweep_IC_test_result[k_type][u1ChannelIdx][u1ByteIdx].dutydiff > 500)
#endif
            {
                            mcSHOW_DBG_MSG2("FAIL2\n");
        }
                        else
        {
                            mcSHOW_DBG_MSG2("PASS\n");
                        }
                    }
                }
    }
            }
#endif

signed char gcFinal_K_Duty_clk_delay_cell[DQS_NUMBER];
signed char gcFinal_K_Duty_DQS_delay_cell[DQS_NUMBER];
DRAM_STATUS_T DutyScan_Calibration_Flow(DRAMC_CTX_T *p, unsigned char k_type, unsigned char use_rev_bit)
        {
    DRAM_STATUS_T KResult = DRAM_FAIL;
    signed char scinner_duty_ofst, scFinal_clk_delay_cell[DQS_NUMBER]={0,0};
    signed char scinner_duty_ofst_start = 0, scinner_duty_ofst_end = 0;
    int  s4PICnt, s4PIBegin, s4PIEnd, s4PICnt_mod64;
    signed char i, swap_idx, ucdqs_i, ucdqs_i_count=2;
//    unsigned char u1ByteIdx;
    unsigned char ucDelay, ucDelayB;
    unsigned char ucRev_Bit0=0, ucRev_Bit1=0;
    unsigned int u4DutyDiff, u4DutyDiff_Limit=900;

    unsigned char vref_sel_value[2], cal_out_value;
    int duty_value[2];
    int final_duty;

    unsigned int ucperiod_duty_max=0, ucperiod_duty_min=0xffffffff;
    unsigned int ucperiod_duty_averige=0, ucFinal_period_duty_averige[DQS_NUMBER]={0,0}, ucmost_approach_50_percent=0xffffffff;
    unsigned int ucFinal_period_duty_max[DQS_NUMBER] = {0,0}, ucFinal_period_duty_min[DQS_NUMBER] = {0,0};
    unsigned int ucperiod_duty_max_clk_dly=0, ucperiod_duty_min_clk_dly=0;
    unsigned int ucFinal_duty_max_clk_dly[DQS_NUMBER]={0},ucFinal_duty_min_clk_dly[DQS_NUMBER]={0};
    unsigned char str_clk_duty[]="CLK", str_dqs_duty[]="DQS", str_dq_duty[]="DQ", str_dqm_duty[]="DQM";
    unsigned char *str_who_am_I=str_clk_duty;
    unsigned char early_break_count=0;

    mcSHOW_DBG_MSG2("\n[DutyScan_Calibration_Flow] %s Calibration\n", use_rev_bit==0 ? "First" : "Second");
    mcSHOW_DBG_MSG2("\n[DutyScan_Calibration_Flow] k_type=%d, use_rev_bit=%d\n", k_type, use_rev_bit);
    /*TINFO="\n[DutyScan_Calibration_Flow] k_type=%d\n", k_type */


    if (k_type == DutyScan_Calibration_K_CLK)
        {
#if DutyPrintCalibrationLog
        mcSHOW_DBG_MSG("\n[  *PHDET_EN*=0  \n");
#endif
        vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_CA_DLL1), 0x0, SHU_CA_DLL1_RG_ARDLL_PHDET_EN_CA);
        }
    else
        {
        // DQS duty test 3
        //mcSHOW_DBG_MSG2(("\n[*PHDET_EN*=0]\n"));
#if DutyPrintCalibrationLog
        mcSHOW_DBG_MSG2("[*PI*RESETB*=0  *PHDET_EN*=0  *PI_RESETB*=1]\n");
#endif
        /*TINFO="[*PI*RESETB*=0  *PHDET_EN*=0  *PI_RESETB*=1]\n" */
        vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_B0_DLL_ARPI0), 0x0, B0_DLL_ARPI0_RG_ARPI_RESETB_B0);
        vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_B1_DLL_ARPI0), 0x0, B1_DLL_ARPI0_RG_ARPI_RESETB_B1);

        vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_B0_DLL1), 0x0, SHU_B0_DLL1_RG_ARDLL_PHDET_EN_B0);
        vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_B1_DLL1), 0x0, SHU_B1_DLL1_RG_ARDLL_PHDET_EN_B1);

        vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_B0_DLL_ARPI0), 0x1, B0_DLL_ARPI0_RG_ARPI_RESETB_B0);
        vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_B1_DLL_ARPI0), 0x1, B1_DLL_ARPI0_RG_ARPI_RESETB_B1);
        }

    //CLK Source Select (DQ/DQM/DQS/CLK)
    if (k_type == DutyScan_Calibration_K_DQ) // K DQB0_PHY3_RG_RX_ARDQ_DUTY_VCAL_EN_B0
        {
        vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_B0_PHY3), 0, B0_PHY3_RG_RX_ARDQ_DUTY_VCAL_CLK_SEL_B0); // cc: APHY intf change
        vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_B1_PHY3), 0, B1_PHY3_RG_RX_ARDQ_DUTY_VCAL_CLK_SEL_B1); // cc: APHY intf change

        ucdqs_i_count = 2;
        str_who_am_I = (unsigned char*)str_dq_duty;
        scinner_duty_ofst_start = 0;
        scinner_duty_ofst_end = 0;
        }
    else if (k_type == DutyScan_Calibration_K_DQM) // K DQM cc note: design not support...
        {
        vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_B0_PHY3), 0, B0_PHY3_RG_RX_ARDQ_DUTY_VCAL_CLK_SEL_B0); //cc: APHY intf change

        vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_B1_PHY3), 0, B1_PHY3_RG_RX_ARDQ_DUTY_VCAL_CLK_SEL_B1); //cc: APHY intf change

        ucdqs_i_count = 2;
        str_who_am_I = (unsigned char*)str_dqm_duty;
        scinner_duty_ofst_start = 0;
        scinner_duty_ofst_end = 0;
        }
    else if (k_type == DutyScan_Calibration_K_DQS) // K DQS
    {
        vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_B0_PHY3), 1, B0_PHY3_RG_RX_ARDQ_DUTY_VCAL_CLK_SEL_B0); //cc: APHY intf change
        vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_B1_PHY3), 1, B1_PHY3_RG_RX_ARDQ_DUTY_VCAL_CLK_SEL_B1); //cc: APHY intf change

        ucdqs_i_count = 2;
        str_who_am_I = (unsigned char*)str_dqs_duty;
    scinner_duty_ofst_start = DUTY_OFFSET_START;
    scinner_duty_ofst_end = DUTY_OFFSET_END;

#if SUPPORT_SAVE_TIME_FOR_CALIBRATION
        p->pSavetimeData->u1dqs_use_rev_bit = use_rev_bit;
#endif
    }
    else if (k_type == DutyScan_Calibration_K_CLK) // K CLK CA_PHY3_RG_RX_ARCA_DUTY_VCAL_CLK_SEL_CA
    {
        vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_CA_PHY3), 1, CA_PHY3_RG_RX_ARCA_DUTY_VCAL_CLK_SEL_CA); // cc: APHY intf change

        ucdqs_i_count = 1;
        str_who_am_I = (unsigned char*)str_clk_duty;
    scinner_duty_ofst_start = DUTY_OFFSET_START;
    scinner_duty_ofst_end = DUTY_OFFSET_END;

#if SUPPORT_SAVE_TIME_FOR_CALIBRATION
        p->pSavetimeData->u1clk_use_rev_bit = use_rev_bit;
#endif
    }

#if defined(YH_SWEEP_IC) || FT_DSIM_USED
        scinner_duty_ofst_start=0;
        scinner_duty_ofst_end=0;
#endif

    if (k_type == DutyScan_Calibration_K_CLK)
    {
        u4DutyDiff_Limit = 530;
    }
    else
    {
        u4DutyDiff_Limit = 580;
    }


    if (k_type == DutyScan_Calibration_K_CLK)
    {
        s4PIBegin = 0;
        s4PIEnd = 0;
    }
    else
    {
        s4PIBegin = CLOCK_PI_START;
        s4PIEnd = CLOCK_PI_END;
    }

    for(ucdqs_i = 0; ucdqs_i < ucdqs_i_count; ucdqs_i++)
    {
#if DutyPrintCalibrationLog
        if (k_type == DutyScan_Calibration_K_CLK)
        {
            mcSHOW_DBG_MSG2("\n[CLK Duty scan]\n");
            /*TINFO="\n[CLK Duty scan]\n", ucdqs_i */
        }
        else
        {
            mcSHOW_DBG_MSG2("\n[%s B%d Duty scan]\n", str_who_am_I, ucdqs_i);
            /*TINFO="\n[%s B%d Duty scan]\n", str_who_am_I, ucdqs_i */
        }
#endif

        ucmost_approach_50_percent=0xffffffff;
        early_break_count=0;

        signed char duty_step = 1;
        signed char pi_step = CLOCK_PI_STEP;
#if FOR_DV_SIMULATION_USED
        duty_step = 8;
        pi_step = 16;
#endif

        for(scinner_duty_ofst=scinner_duty_ofst_start; scinner_duty_ofst<=scinner_duty_ofst_end; scinner_duty_ofst += duty_step)
        {
            ucperiod_duty_max = 0;
            ucperiod_duty_min = 100000;

            if (scinner_duty_ofst<0)
            {
                ucDelay = -scinner_duty_ofst;
                ucDelayB = 0;

                if (use_rev_bit)
            {
                    ucRev_Bit0 = 1;
                    ucRev_Bit1 = 0;
                }
            }
            else if (scinner_duty_ofst>0)
            {
                ucDelay = 0;
                ucDelayB= scinner_duty_ofst;

                if (use_rev_bit)
            {
                    ucRev_Bit0 = 0;
                    ucRev_Bit1 = 1;
                }
            }
            else
            {
                ucDelay = 0;
                ucDelayB= 0;

                if (use_rev_bit)
            {
                    ucRev_Bit0 = 0;
                    ucRev_Bit1 = 0;
                }
            }

            if (k_type == DutyScan_Calibration_K_DQS)
            {
                    if (ucdqs_i == 0)
            {
                #if fcFOR_CHIP_ID == fcGriffin
                    vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B1_TXDLY2), P_Fld(ucDelay, SHU_RK_B1_TXDLY2_TX_ARDQS0_DLY_B1)
                                                                                | P_Fld(ucDelay, SHU_RK_B1_TXDLY2_TX_ARDQS0B_DLY_B1)
                                                                                | P_Fld(ucDelayB, SHU_RK_B1_TXDLY2_TX_ARDQS0_DLYB_B1)
                                                                                | P_Fld(ucDelayB, SHU_RK_B1_TXDLY2_TX_ARDQS0B_DLYB_B1));
                #else
                    vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B0_TXDLY2), P_Fld(ucDelay, SHU_RK_B0_TXDLY2_TX_ARDQS0_DLY_B0)
                                                                                | P_Fld(ucDelay, SHU_RK_B0_TXDLY2_TX_ARDQS0B_DLY_B0)
                                                                                | P_Fld(ucDelayB, SHU_RK_B0_TXDLY2_TX_ARDQS0_DLYB_B0)
                                                                                | P_Fld(ucDelayB, SHU_RK_B0_TXDLY2_TX_ARDQS0B_DLYB_B0));

                    #endif
                    vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_B0_MCK4X_EN), P_Fld(ucRev_Bit0, B0_MCK4X_EN_RG_TX_ARDQS_MCK4X_DLY_EN_B0)
                                                                           | P_Fld(ucRev_Bit1, B0_MCK4X_EN_RG_TX_ARDQS_MCK4XB_DLY_EN_B0));

                }
                else
                {
                #if fcFOR_CHIP_ID == fcGriffin
                    vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B0_TXDLY2), P_Fld(ucDelay, SHU_RK_B0_TXDLY2_TX_ARDQS0_DLY_B0)
                                                                                | P_Fld(ucDelay, SHU_RK_B0_TXDLY2_TX_ARDQS0B_DLY_B0)
                                                                                | P_Fld(ucDelayB, SHU_RK_B0_TXDLY2_TX_ARDQS0_DLYB_B0)
                                                                                | P_Fld(ucDelayB, SHU_RK_B0_TXDLY2_TX_ARDQS0B_DLYB_B0));
                #else
                    vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B1_TXDLY2), P_Fld(ucDelay, SHU_RK_B1_TXDLY2_TX_ARDQS0_DLY_B1)
                                                                                | P_Fld(ucDelay, SHU_RK_B1_TXDLY2_TX_ARDQS0B_DLY_B1)
                                                                                | P_Fld(ucDelayB, SHU_RK_B1_TXDLY2_TX_ARDQS0_DLYB_B1)
                                                                                | P_Fld(ucDelayB, SHU_RK_B1_TXDLY2_TX_ARDQS0B_DLYB_B1));
                            #endif

                    vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_B1_MCK4X_EN), P_Fld(ucRev_Bit0, B1_MCK4X_EN_RG_TX_ARDQS_MCK4X_DLY_EN_B1)
                                                                           | P_Fld(ucRev_Bit1, B1_MCK4X_EN_RG_TX_ARDQS_MCK4XB_DLY_EN_B1));
                                    }

                            }

            if (k_type == DutyScan_Calibration_K_CLK)
                                    {
                vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_CA_TXDLY3), P_Fld(ucDelay, SHU_RK_CA_TXDLY3_TX_ARCLK_DLY)
                                                                            | P_Fld(ucDelay, SHU_RK_CA_TXDLY3_TX_ARCLKB_DLY));
                vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_CA_TXDLY4), P_Fld(ucDelayB, SHU_RK_CA_TXDLY4_TX_ARCLK_DLYB)
                                                                            | P_Fld(ucDelayB, SHU_RK_CA_TXDLY4_TX_ARCLKB_DLYB));

                vIO32WriteFldMulti(DRAMC_REG_ADDR(DDRPHY_REG_CA_MCK4X_EN), P_Fld(ucRev_Bit0, CA_MCK4X_EN_RG_TX_ARCLK_MCK4X_DLY_EN)
                                                                         | P_Fld(ucRev_Bit1, CA_MCK4X_EN_RG_TX_ARCLK_MCK4XB_DLY_EN));
                        }

            for(s4PICnt=s4PIBegin; s4PICnt<=s4PIEnd; s4PICnt += pi_step)
                        {
                    s4PICnt_mod64 = (s4PICnt+64)&0x3f;//s4PICnt_mod64 = (s4PICnt+64)%64;
#if DutyPrintAllLog
                    //if(scinner_duty_ofst!=DUTY_OFFSET_START)
                    mcSHOW_DBG_MSG2("PI= %d\n", s4PICnt_mod64);
                            #endif

                if (k_type == DutyScan_Calibration_K_DQS)
                            {
                    if (ucdqs_i==0)
                                    {
                     #if fcFOR_CHIP_ID == fcGriffin
                        vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B1_DQ), s4PICnt_mod64, SHU_RK_B1_DQ_ARPI_PBYTE_B1);
                     #else
                        vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B0_DQ), s4PICnt_mod64, SHU_RK_B0_DQ_ARPI_PBYTE_B0);
                                #endif
                            }
                    else
                    {
                    #if fcFOR_CHIP_ID == fcGriffin
                        vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B0_DQ), s4PICnt_mod64, SHU_RK_B0_DQ_ARPI_PBYTE_B0);
                    #else
                        vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B1_DQ), s4PICnt_mod64, SHU_RK_B1_DQ_ARPI_PBYTE_B1);
                            #endif
                                    }
                            }
                    else
                    if (k_type == DutyScan_Calibration_K_CLK)
                    {
                        vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_CA_CMD), s4PICnt_mod64, SHU_RK_CA_CMD_RG_ARPI_CLK);
                        }                    
                        else
                    if (k_type == DutyScan_Calibration_K_DQ)
                        {
                    if (ucdqs_i==0)
                            {
                    #if fcFOR_CHIP_ID == fcGriffin
                        vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B1_DQ), s4PICnt_mod64, SHU_RK_B1_DQ_SW_ARPI_DQ_B1);
                    #else
                        vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B0_DQ), s4PICnt_mod64, SHU_RK_B0_DQ_SW_ARPI_DQ_B0);
                                #endif
                            }
            else
            {
                    #if fcFOR_CHIP_ID == fcGriffin
                            vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B0_DQ), s4PICnt_mod64, SHU_RK_B0_DQ_SW_ARPI_DQ_B0);
                    #else
                            vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B1_DQ), s4PICnt_mod64, SHU_RK_B1_DQ_SW_ARPI_DQ_B1);
#endif
                        }
                    }
                    else
                    if (k_type == DutyScan_Calibration_K_DQM)
                    {
                    if (ucdqs_i==0)
                    {
                        vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B0_DQ), s4PICnt_mod64, SHU_RK_B0_DQ_SW_ARPI_DQM_B0);
                    }
                    else
                    {
                        vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_RK_B1_DQ), s4PICnt_mod64, SHU_RK_B1_DQ_SW_ARPI_DQM_B1);
                        }
                    }

                    for(swap_idx=0; swap_idx<2; swap_idx++)
                    {
                        if (k_type == DutyScan_Calibration_K_CLK)
                        {
                            if (swap_idx==0)
                            {
                            vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_CA_MCK4X_EN), 2, CA_MCK4X_EN_RG_TX_ARCLK_DATA_SWAP);
                            }
                            else
                            {
                            vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_CA_MCK4X_EN), 3, CA_MCK4X_EN_RG_TX_ARCLK_DATA_SWAP);
                            }

                            vref_sel_value[swap_idx]= 0;
                        vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_CA_CMD5), vref_sel_value[swap_idx]>>1, SHU_CA_CMD5_RG_RX_ARCMD_VREF_SEL);
                        vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_CA_PHY3), (vref_sel_value[swap_idx]&1)==1? 1 : 0, CA_PHY3_RG_RX_ARCA_DUTY_VCAL_0P5EN_CA);
                        }
                    else
                    {
                        if (ucdqs_i==0)
                        {
                            if (swap_idx==0)
                            {
                                vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_B0_DQ6), 2, B0_DQ6_RG_TX_ARDQ_DATA_SWAP_B0);
                            }
                            else
                            {
                                vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_B0_DQ6), 3, B0_DQ6_RG_TX_ARDQ_DATA_SWAP_B0);
                            }

                            vref_sel_value[swap_idx]= 0;
                            vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_B0_DQ5), vref_sel_value[swap_idx]>>1, SHU_B0_DQ5_RG_RX_ARDQ_VREF_SEL_B0);
                            vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_B0_PHY3), (vref_sel_value[swap_idx]&1)==1? 1 : 0, B0_PHY3_RG_RX_ARDQ_DUTY_VCAL_0P5EN_B0);
                        }
                        else
                        {
                            if (swap_idx==0)
                            {
                                vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_B1_DQ6), 2, B1_DQ6_RG_TX_ARDQ_DATA_SWAP_B1);
                            }
                            else
                            {
                                vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_B1_DQ6), 3, B1_DQ6_RG_TX_ARDQ_DATA_SWAP_B1);
                            }

                            vref_sel_value[swap_idx]= 0;
                            vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_B1_DQ5), vref_sel_value[swap_idx]>>1, SHU_B1_DQ5_RG_RX_ARDQ_VREF_SEL_B1);
                            vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_B1_PHY3), (vref_sel_value[swap_idx]&1)==1? 1: 0, B1_PHY3_RG_RX_ARDQ_DUTY_VCAL_0P5EN_B1);
                        }
                        }

                    for(i=5; i>=0; i--)
                        {
                            if (k_type == DutyScan_Calibration_K_CLK)
                            {
                                vref_sel_value[swap_idx] |= (1<<i);
                            vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_CA_CMD5), vref_sel_value[swap_idx]>>1, SHU_CA_CMD5_RG_RX_ARCMD_VREF_SEL);
                            vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_CA_PHY3), (vref_sel_value[swap_idx]&1)==1?0:1, CA_PHY3_RG_RX_ARCA_DUTY_VCAL_0P5EN_CA);

                                mcDELAY_US(1);

                            cal_out_value = u4IO32ReadFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_MISC_AD_RX_CMD_O1), MISC_AD_RX_CMD_O1_AD_RX_ARCA2_O1);

                                if (cal_out_value == 0)
                                {
                                    vref_sel_value[swap_idx] &= ~(1<<i);
                                }
                            }
                                    else
                            {
                                if (ucdqs_i==0)
                                {
                                vref_sel_value[swap_idx] |= (1<<i);
                                vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_B0_DQ5), vref_sel_value[swap_idx]>>1, SHU_B0_DQ5_RG_RX_ARDQ_VREF_SEL_B0);
                                //cc replace  vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_SHU_B0_DQ0), (vref_sel_value[swap_idx]&1)==1?0:1, SHU_B0_DQ0_RG_TX_ARDQS0_DRVP_PRE_B0_BIT1);
                                vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_B0_PHY3), (vref_sel_value[swap_idx]&1)==1?0:1, B0_PHY3_RG_RX_ARDQ_DUTY_VCAL_0P5EN_B0);
  
                                mcDELAY_US(1);

                                cal_out_value = u4IO32ReadFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_MISC_AD_RX_DQ_O1), MISC_AD_RX_DQ_O1_AD_RX_ARDQ_O1_B0_BIT2);
                            }
                            else
                            {
                                vref_sel_value[swap_idx] |= (1<<i);
                                vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_B1_DQ5), vref_sel_value[swap_idx]>>1, SHU_B1_DQ5_RG_RX_ARDQ_VREF_SEL_B1);
                                //cc repalce  vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_SHU_B2_DQ0), (vref_sel_value[swap_idx]&1)==1?0:1, SHU_B0_DQ0_RG_TX_ARDQS0_DRVP_PRE_B0_BIT1);
                                vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_B1_PHY3), (vref_sel_value[swap_idx]&1)==1?0:1, B1_PHY3_RG_RX_ARDQ_DUTY_VCAL_0P5EN_B1);

                                mcDELAY_US(1);
                                channel_backup_and_set(p, CHANNEL_B);
                                cal_out_value = u4IO32ReadFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_MISC_AD_RX_DQ_O1),MISC_AD_RX_DQ_O1_AD_RX_ARDQ_O1_B0_BIT2);
                                channel_restore(p);
                                }
  
#if DutyPrintAllLog
                            //mcSHOW_DBG_MSG2(("Fra i=%d vref_sel_value[swap_idx]=%x, cal_out=%d\n",i, vref_sel_value[swap_idx], cal_out_value));
#endif

                                if (cal_out_value == 0)
                                {
                                    vref_sel_value[swap_idx] &= ~(1<<i);
                                }
                            }
                        }
                   }


                    for(swap_idx=0; swap_idx<2; swap_idx++)
                    {
                        if (vref_sel_value[swap_idx]<=31)
                        {
                        duty_value[swap_idx] = (vref_sel_value[swap_idx]-23)*69+5050;
                        }
                        else
                        {
                        duty_value[swap_idx] = (vref_sel_value[swap_idx]-32)*55+5600;
                        }
                    }

#if DutyPrintAllLog
                mcSHOW_DBG_MSG2("\t[%d][%d] B%d : Vref_Sel=0x%x, Swap Vref_Sel=0x%x\n", scinner_duty_ofst, s4PICnt, ucdqs_i, vref_sel_value[0], vref_sel_value[1]);
                mcSHOW_DBG_MSG2("\t[%d][%d] B%d : duty_value=%d, Swap duty_value=%d\n", scinner_duty_ofst, s4PICnt, ucdqs_i, duty_value[0], duty_value[1]);
#endif

                    final_duty = 5000+((duty_value[0]-duty_value[1])/2);

                if (final_duty > (int) ucperiod_duty_max)
                    {
                        ucperiod_duty_max = final_duty;
                        ucperiod_duty_max_clk_dly = s4PICnt;
                    }
                if (final_duty < (int) ucperiod_duty_min)
                    {
                        ucperiod_duty_min = final_duty;
                        ucperiod_duty_min_clk_dly = s4PICnt;
                    }

#if DutyPrintAllLog
                mcSHOW_DBG_MSG2("\t[%d][%d] B%d : Final_Duty=%d\n", scinner_duty_ofst, s4PICnt, ucdqs_i, final_duty);
#endif
                }



            ucperiod_duty_averige = (ucperiod_duty_max + ucperiod_duty_min)>>1;

#if DutyPrintCalibrationLog
            if (k_type==DutyScan_Calibration_K_CLK)
                {
                mcSHOW_DBG_MSG2("[%d] CLK\n",scinner_duty_ofst);
                /*TINFO="[%d] CLK\n",scinner_duty_ofst */
                }
                else
                {
                mcSHOW_DBG_MSG2("[%d] %s%d\n",scinner_duty_ofst, str_who_am_I, ucdqs_i);
                /*TINFO="[%d] %s%d\n",scinner_duty_ofst, str_who_am_I, ucdqs_i */
                }
            mcSHOW_DBG_MSG2("\tMAX Duty = %d%%(X100), CLK PI=%d\n",ucperiod_duty_max, ucperiod_duty_max_clk_dly);
            /*TINFO="\tMAX Duty = %d%%(X100), CLK PI=%d\n",ucperiod_duty_max, ucperiod_duty_max_clk_dly */
            mcSHOW_DBG_MSG2("\tMIN Duty = %d%%(X100), CLK PI=%d\n",ucperiod_duty_min, ucperiod_duty_min_clk_dly);
            /*TINFO="\tMIN Duty = %d%%(X100), CLK PI=%d\n",ucperiod_duty_min, ucperiod_duty_min_clk_dly */
            mcSHOW_DBG_MSG2("\tAVG Duty = %d%%(X100)\n", ucperiod_duty_averige);
            /*TINFO="\tAVG Duty = %d%%(X100)\n", ucperiod_duty_averige */
#endif 

            if (ucperiod_duty_averige >= ClockDutyMiddleBound)
            {
                if ((scinner_duty_ofst<=0 && ((ucperiod_duty_averige-ClockDutyMiddleBound+(ucperiod_duty_max-ucperiod_duty_min)/2) <= ucmost_approach_50_percent)) ||
                     (scinner_duty_ofst>0 && ((ucperiod_duty_averige-ClockDutyMiddleBound+(ucperiod_duty_max-ucperiod_duty_min)/2) < ucmost_approach_50_percent)))
            {
                    ucmost_approach_50_percent = ucperiod_duty_averige-ClockDutyMiddleBound+(ucperiod_duty_max-ucperiod_duty_min)/2;
                    scFinal_clk_delay_cell[ucdqs_i] = scinner_duty_ofst;
                    ucFinal_period_duty_averige[ucdqs_i] = ucperiod_duty_averige;
                    ucFinal_period_duty_max[ucdqs_i] = ucperiod_duty_max;
                    ucFinal_period_duty_min[ucdqs_i] = ucperiod_duty_min;
                    ucFinal_duty_max_clk_dly[ucdqs_i] = ucperiod_duty_max_clk_dly;
                    ucFinal_duty_min_clk_dly[ucdqs_i] = ucperiod_duty_min_clk_dly;
#if DutyPrintCalibrationLog
                    mcSHOW_DBG_MSG2("\t!!! ucmost_approach_50_percent = %d%%(X100) !!!\n",ucmost_approach_50_percent);
                    /*TINFO="!!! ucmost_approach_50_percent = %d%%(X100) !!!\n",ucmost_approach_50_percent */
#endif
                    early_break_count = 0;
            }
            else
            {
                    if (scinner_duty_ofst>0) early_break_count ++;
#if DutyPrintAllLog==0
                    if (early_break_count>=2) break; //early break;
    #endif			
                }
            }
            else
            {
                if ((scinner_duty_ofst<=0 && ((ClockDutyMiddleBound-ucperiod_duty_averige+(ucperiod_duty_max-ucperiod_duty_min)/2) <= ucmost_approach_50_percent)) ||
                    (scinner_duty_ofst>0 && ((ClockDutyMiddleBound-ucperiod_duty_averige+(ucperiod_duty_max-ucperiod_duty_min)/2) < ucmost_approach_50_percent)))
            {
                    ucmost_approach_50_percent = ClockDutyMiddleBound-ucperiod_duty_averige+(ucperiod_duty_max-ucperiod_duty_min)/2;
                    scFinal_clk_delay_cell[ucdqs_i] = scinner_duty_ofst;
                    ucFinal_period_duty_averige[ucdqs_i] = ucperiod_duty_averige;
                    ucFinal_period_duty_max[ucdqs_i] = ucperiod_duty_max;
                    ucFinal_period_duty_min[ucdqs_i] = ucperiod_duty_min;
                    ucFinal_duty_max_clk_dly[ucdqs_i] = ucperiod_duty_max_clk_dly;
                    ucFinal_duty_min_clk_dly[ucdqs_i] = ucperiod_duty_min_clk_dly;
#if DutyPrintCalibrationLog
                    mcSHOW_DBG_MSG2("\t!!! ucmost_approach_50_percent = %d%%(X100) !!!\n",ucmost_approach_50_percent);
                /*TINFO="!!! ucmost_approach_50_percent = %d%%(X100) !!!\n",ucmost_approach_50_percent */
#endif
                early_break_count = 0;
            }
            else
            {
                    if (scinner_duty_ofst>0) early_break_count ++;
#if DutyPrintAllLog==0
                    if (early_break_count>=2) break; //early break;
#endif
                }
            }

#if DutyPrintCalibrationLog
            mcSHOW_DBG_MSG2("\n");
            /*TINFO="\n" */
#endif
        }
    }

    for(ucdqs_i = 0; ucdqs_i < ucdqs_i_count; ucdqs_i++)
    {
        //for SLT, use ERR_MSG to force print log

        if (k_type == DutyScan_Calibration_K_CLK)
        {
            mcSHOW_DBG_MSG("\n==%s %d==\n", str_who_am_I, ucdqs_i);
            /*TINFO="\n==%s ==\n", str_who_am_I */
        }
        else
        {
            mcSHOW_DBG_MSG("\n==%s %d ==\n", str_who_am_I, ucdqs_i);
            /*TINFO="\n==%s %d ==\n", str_who_am_I, ucdqs_i */
        }
        mcSHOW_DBG_MSG("Final %s duty delay cell = %d\n", str_who_am_I, scFinal_clk_delay_cell[ucdqs_i]);
        /*TINFO="Final %s duty delay cell = %d\n", str_who_am_I, scFinal_clk_delay_cell[ucdqs_i] */
        mcSHOW_DBG_MSG("[%d] MAX Duty = %d%%(X100), DQS PI = %d\n",scFinal_clk_delay_cell[ucdqs_i], ucFinal_period_duty_max[ucdqs_i], ucFinal_duty_max_clk_dly[ucdqs_i]);
        /*TINFO="[%d] MAX Duty = %d%%(X100), DQS PI = %d\n",scFinal_clk_delay_cell[ucdqs_i], ucFinal_period_duty_max[ucdqs_i], ucFinal_duty_max_clk_dly[ucdqs_i] */
        mcSHOW_DBG_MSG("[%d] MIN Duty = %d%%(X100), DQS PI = %d\n",scFinal_clk_delay_cell[ucdqs_i], ucFinal_period_duty_min[ucdqs_i], ucFinal_duty_min_clk_dly[ucdqs_i]);
        /*TINFO="[%d] MIN Duty = %d%%(X100), DQS PI = %d\n",scFinal_clk_delay_cell[ucdqs_i], ucFinal_period_duty_min[ucdqs_i], ucFinal_duty_min_clk_dly[ucdqs_i] */
        mcSHOW_DBG_MSG("[%d] AVG Duty = %d%%(X100)\n", scFinal_clk_delay_cell[ucdqs_i], ucFinal_period_duty_averige[ucdqs_i]);
        /*TINFO="[%d] AVG Duty = %d%%(X100)\n", scFinal_clk_delay_cell[ucdqs_i], ucFinal_period_duty_averige[ucdqs_i] */
    }

#if FT_DSIM_USED
    FT_Duty_Compare_PassFail(p->channel, k_type, ucFinal_period_duty_max[0] , ucFinal_period_duty_min[0],ucFinal_period_duty_max[1] , ucFinal_period_duty_min[1]);
#else
    for(ucdqs_i=0; ucdqs_i<ucdqs_i_count; ucdqs_i++)
    {
        u4DutyDiff = ucFinal_period_duty_max[ucdqs_i] - ucFinal_period_duty_min[ucdqs_i];

#if DQS_DUTY_SLT_CONDITION_TEST
        if (k_type == DutyScan_Calibration_K_CLK || (k_type == DutyScan_Calibration_K_DQS))
        {
            u4DQSDutyDiff_Rec[p->channel][ucdqs_i][u1GlobalTestCnt]=u4DutyDiff;

            u4DQSDutyDutyDly[p->channel][ucdqs_i] = scFinal_clk_delay_cell[ucdqs_i];

            if(u4DutyDiff > u4DQSDutyDiff_Max[p->channel][ucdqs_i])
                u4DQSDutyDiff_Max[p->channel][ucdqs_i] = u4DutyDiff;

            if(u4DutyDiff < u4DQSDutyDiff_Min[p->channel][ucdqs_i])
                u4DQSDutyDiff_Min[p->channel][ucdqs_i] = u4DutyDiff;

            u4DQSDutyDiff_Avrg[p->channel][ucdqs_i]  += u4DutyDiff;
        }
            #endif

#if defined(YH_SWEEP_IC)
        gYH_Sweep_IC_test_result[k_type][p->channel][ucdqs_i].maxduty = ucFinal_period_duty_max[ucdqs_i];
        gYH_Sweep_IC_test_result[k_type][p->channel][ucdqs_i].minduty = ucFinal_period_duty_min[ucdqs_i];
        gYH_Sweep_IC_test_result[k_type][p->channel][ucdqs_i].dutydiff = u4DutyDiff;
        gYH_Sweep_IC_test_result[k_type][p->channel][ucdqs_i].avgduty = ucFinal_period_duty_averige[ucdqs_i];
	        #else
        if ((((k_type == DutyScan_Calibration_K_CLK) || (k_type == DutyScan_Calibration_K_DQS)) && (u4DutyDiff < u4DutyDiff_Limit)) ||
	           (((k_type == DutyScan_Calibration_K_DQ) || (k_type == DutyScan_Calibration_K_DQM)) && ((u4DutyDiff < u4DutyDiff_Limit) && (ucFinal_period_duty_averige[ucdqs_i] >= 4550 && ucFinal_period_duty_averige[ucdqs_i] <= 5450))))
	        {
	            if (k_type == DutyScan_Calibration_K_CLK)
	            {
                mcSHOW_DBG_MSG("\nCH%d %s Duty spec in!! Max-Min= %d%%\n",p->channel, str_who_am_I, u4DutyDiff);
	                /*TINFO="\nCH%d %s Duty spec in!! Max-Min= %d%%\n",p->channel, str_who_am_I, u4DutyDiff */
	            }
	            else
	            {
                mcSHOW_DBG_MSG("\nCH%d %s %d Duty spec in!! Max-Min= %d%%\n",p->channel, str_who_am_I, ucdqs_i, u4DutyDiff);
	                /*TINFO="\nCH%d %s %d Duty spec in!! Max-Min= %d%%\n",p->channel, str_who_am_I, ucdqs_i, u4DutyDiff */
	            }
	            KResult = DRAM_OK;
	        }
	        else
	        {
	            if (k_type == DutyScan_Calibration_K_CLK)
	            {
                mcSHOW_DBG_MSG("\nCH%d %s Duty spec out!! Max-Min= %d%% >%d%%\n", p->channel, str_who_am_I, u4DutyDiff, u4DutyDiff_Limit);
	                /*TINFO="\nCH%d %s Duty spec out!! Max-Min= %d%% >8%%\n", p->channel, str_who_am_I, u4DutyDiff */
	            }
	            else
	            {
                mcSHOW_DBG_MSG("\nCH%d %s %d Duty spec out!! Max-Min= %d%% >%d%%\n", p->channel, str_who_am_I, ucdqs_i, u4DutyDiff, u4DutyDiff_Limit);
	                /*TINFO="\nCH%d %s %d Duty spec out!! Max-Min= %d%% >8%%\n", p->channel, str_who_am_I, ucdqs_i, u4DutyDiff */
	            }
            //#if defined(SLT)
                    //while(1); //stop here
            //#endif

            #if __ETT__

                #if DQS_DUTY_SLT_CONDITION_TEST
                retStatus = DRAM_FAIL;
                #else
                    while(1); //stop here
	            #endif

	            #endif
	        }
#endif
    }

#endif

    if (k_type == DutyScan_Calibration_K_DQS)
    {
#if SUPPORT_SAVE_TIME_FOR_CALIBRATION
        if(p->femmc_Ready==0)
        {
            p->pSavetimeData->s1DQSDuty_clk_delay_cell[p->channel][0] = scFinal_clk_delay_cell[0];
            p->pSavetimeData->s1DQSDuty_clk_delay_cell[p->channel][1] = scFinal_clk_delay_cell[1];
        }
#endif

        // backup K DQS final values
        gcFinal_K_Duty_DQS_delay_cell[RANK_0] = scFinal_clk_delay_cell[0];
        gcFinal_K_Duty_DQS_delay_cell[RANK_1] = scFinal_clk_delay_cell[1];

        DQSDutyScan_SetDqsDelayCell(p, scFinal_clk_delay_cell, use_rev_bit);

		//according to JC's suggestion @ 2019/07/25
		if (use_rev_bit==1)
    {
			if (scFinal_clk_delay_cell[0] == 8 || scFinal_clk_delay_cell[0] == -8 || scFinal_clk_delay_cell[1] == 8 || scFinal_clk_delay_cell[1] == -8)
        {
				mcSHOW_ERR_MSG(("Warning !! DQS Duty is 8 or -8 !!\n"));
        }
		}
#ifdef FOR_HQA_TEST_USED
        gFinalDQSDuty[p->channel][0] = scFinal_clk_delay_cell[0];
        gFinalDQSDuty[p->channel][1] = scFinal_clk_delay_cell[1];
        gFinalDQSDutyMinMax[p->channel][0][0] = ucFinal_period_duty_min[0];
        gFinalDQSDutyMinMax[p->channel][0][1] = ucFinal_period_duty_max[0];
        gFinalDQSDutyMinMax[p->channel][1][0] = ucFinal_period_duty_min[1];
        gFinalDQSDutyMinMax[p->channel][1][1] = ucFinal_period_duty_max[1];
#endif
    }

    if (k_type == DutyScan_Calibration_K_CLK)
    {
        DramcClockDutySetClkDelayCell(p, RANK_0, scFinal_clk_delay_cell[0], use_rev_bit);
        if(p->support_rank_num == RANK_DUAL)
        {
            DramcClockDutySetClkDelayCell(p, RANK_1, scFinal_clk_delay_cell[0], use_rev_bit);
        }

        // backup K CLK final values
        gcFinal_K_Duty_clk_delay_cell[RANK_0] = scFinal_clk_delay_cell[0];
        gcFinal_K_Duty_clk_delay_cell[RANK_1] = scFinal_clk_delay_cell[0];
		//according to JC's suggestion @ 2019/07/25
		if (use_rev_bit==1)
    {
			if (scFinal_clk_delay_cell[0] == 8 || scFinal_clk_delay_cell[0] == -8)
        {
				mcSHOW_ERR_MSG(("Warning !! CLK Duty is 8 or -8 !!\n"));
        }
    }
#if SUPPORT_SAVE_TIME_FOR_CALIBRATION
        if(p->femmc_Ready==0)
        {
            p->pSavetimeData->s1ClockDuty_clk_delay_cell[p->channel][RANK_0] = scFinal_clk_delay_cell[0];
            p->pSavetimeData->s1ClockDuty_clk_delay_cell[p->channel][RANK_1] = scFinal_clk_delay_cell[0];
        }
#endif

#ifdef FOR_HQA_TEST_USED
        gFinalClkDuty[p->channel] = scFinal_clk_delay_cell[0];
        gFinalClkDutyMinMax[p->channel][0] = ucFinal_period_duty_min[0];
        gFinalClkDutyMinMax[p->channel][1] = ucFinal_period_duty_max[0];
#endif
    }

    DramPhyReset(p);
    mcSHOW_DBG_MSG("[DutyScan_Calibration_Flow] ====Done====\n");
    /*TINFO="[DutyScan_Calibration_Flow] ====Done====\n" */

    return KResult;
}

void DramcNewDutyCalibration(DRAMC_CTX_T *p)
{
//    unsigned char u1ChannelIdx, u1backup_channel;
#if(DQS_DUTY_SLT_CONDITION_TEST)
    unsigned short u2TestCnt, u2FailCnt=0, u2TestCntTotal =20; //fra 400;
    unsigned char u1ByteIdx, u1PI_FB;
    unsigned int u4Variance;
#endif
    unsigned char use_rev_bit=0;

    DRAM_STATUS_T u2FailStatusByCh={0};

    //backup register value
#if FT_DSIM_USED==0
    unsigned int u4RegBackupAddress[] =
    {
		(DDRPHY_REG_B0_DQ6),
		(DDRPHY_REG_SHU_B0_DQ5),
		(DDRPHY_REG_B0_PHY3),
		(DDRPHY_REG_CA_MCK4X_EN),
		(DDRPHY_REG_B0_DQ5),
		(DDRPHY_REG_CA_CMD6),
		(DDRPHY_REG_SHU_CA_CMD5),
		(DDRPHY_REG_CA_PHY3),
		(DDRPHY_REG_CA_CMD5),
		(DDRPHY_REG_B1_PHY3),
		(DDRPHY_REG_B0_DLL_ARPI0),
		(DDRPHY_REG_B1_DLL_ARPI0),
		(DDRPHY_REG_SHU_B0_DLL1),
		(DDRPHY_REG_SHU_B1_DLL1),
		(DDRPHY_REG_B0_MCK4X_EN),
		(DDRPHY_REG_B1_MCK4X_EN),
		(DDRPHY_REG_SHU_RK_B0_DQ0),
		(DDRPHY_REG_SHU_RK_B1_DQ0),
		(DDRPHY_REG_SHU_RK_CA_CMD0),
		(DDRPHY_REG_SHU_RK_B0_DQ),
		(DDRPHY_REG_SHU_RK_B1_DQ),
		(DDRPHY_REG_SHU_RK_CA_CMD),
		(DDRPHY_REG_SHU_B1_DQ5),
		(DDRPHY_REG_B1_DQ6),
		(DDRPHY_REG_SHU_CA_DLL1),
    };
#endif
    vSetCalibrationResult(p, DRAM_CALIBRATION_DUTY_SCAN, DRAM_FAIL);
#if 0
#if DUTY_SCAN_V2_ONLY_K_HIGHEST_FREQ
    if((p->frequency == dfs_get_highest_freq(p)) && (get_pre_miock_jmeter_hqa_used_flag()==0))
#else
    if(get_pre_miock_jmeter_hqa_used_flag()==0)
#endif
#endif
	U8 u1backup_rank = u1GetRank(p);
	vSetRank(p, RANK_0);


#if DUAL_FREQ_K
/* DDR1600 is not final used, just do CBT for later high-freq calibration */
    if (p->frequency <= DDR1600_FREQ)
        return;
#endif

    #if SUPPORT_SAVE_TIME_FOR_CALIBRATION
    if(p->femmc_Ready==1)
    {
        DramcClockDutySetClkDelayCell(p, RANK_0, p->pSavetimeData->s1ClockDuty_clk_delay_cell[p->channel][RANK_0], use_rev_bit);
        if(p->support_rank_num == RANK_DUAL)
            DramcClockDutySetClkDelayCell(p, RANK_1, p->pSavetimeData->s1ClockDuty_clk_delay_cell[p->channel][RANK_1], use_rev_bit);
        DQSDutyScan_SetDqsDelayCell(p, p->pSavetimeData->s1DQSDuty_clk_delay_cell[p->channel], use_rev_bit);
        vSetCalibrationResult(p, DRAM_CALIBRATION_DUTY_SCAN, DRAM_FAST_K);
        return;
    }
    else
    #endif
    {
#if !FT_DSIM_USED
        if (Get_Duty_Calibration_Mode(p) == DUTY_DEFAULT)
        {
            //clear global variables to 0 delay
            gcFinal_K_Duty_clk_delay_cell[RANK_0] = 0;
            gcFinal_K_Duty_clk_delay_cell[RANK_1] = 0;
            gcFinal_K_Duty_DQS_delay_cell[RANK_0] = 0;
            gcFinal_K_Duty_DQS_delay_cell[RANK_1] = 0;
        }
#endif
#if !FT_DSIM_USED
        if ((Get_Duty_Calibration_Mode(p) == DUTY_DEFAULT) || (Get_Duty_Calibration_Mode(p) == DUTY_LAST_K))
        {
            DramcClockDutySetClkDelayCell(p, RANK_0, gcFinal_K_Duty_clk_delay_cell[RANK_0], use_rev_bit);
            if(p->support_rank_num == RANK_DUAL)
                DramcClockDutySetClkDelayCell(p, RANK_1, gcFinal_K_Duty_clk_delay_cell[RANK_1], use_rev_bit);
            DQSDutyScan_SetDqsDelayCell(p, gcFinal_K_Duty_clk_delay_cell, use_rev_bit);
#if SUPPORT_SAVE_TIME_FOR_CALIBRATION
            if(p->femmc_Ready==0)
            {
                p->pSavetimeData->s1ClockDuty_clk_delay_cell[p->channel][RANK_0] = gcFinal_K_Duty_clk_delay_cell[RANK_0];
                p->pSavetimeData->s1ClockDuty_clk_delay_cell[p->channel][RANK_1] = gcFinal_K_Duty_clk_delay_cell[RANK_1];
                p->pSavetimeData->s1DQSDuty_clk_delay_cell[p->channel][RANK_0] = gcFinal_K_Duty_DQS_delay_cell[RANK_0];
                p->pSavetimeData->s1DQSDuty_clk_delay_cell[p->channel][RANK_1] = gcFinal_K_Duty_DQS_delay_cell[RANK_1];
            }
#endif

            vSetCalibrationResult(p, DRAM_CALIBRATION_DUTY_SCAN, DRAM_NO_K);
                return;
            }
#endif
        //Clk free run
        //enable_dramc_phy_dcm(p, 0);
        EnableDramcPhyDCM(p,0);

        //Fix rank to rank0
        vIO32WriteFldAlign(DRAMC_REG_ADDR(DRAMC_REG_TX_SET0), 0, TX_SET0_TXRANK);
        vIO32WriteFldAlign(DRAMC_REG_ADDR(DRAMC_REG_TX_SET0), 1, TX_SET0_TXRANKFIX);

        /* cc add according to ANA Mark's comment */

        vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_B0_DLL2), 0x1, SHU_B0_DLL2_RG_ARDQ_RG_TX_LPBK_SEL);
        vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_B1_DLL2), 0x1, SHU_B1_DLL2_RG_ARDQ_RG_TX_LPBK_SEL);
        vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_SHU_CA_DLL2), 0x1, SHU_CA_DLL2_RG_ARCMD_RG_TX_LPBK_SEL);

        //backup register value
    #if FT_DSIM_USED==0
        DramcBackupRegisters(p, u4RegBackupAddress, sizeof(u4RegBackupAddress)/sizeof(unsigned int), TO_ONE_CHANNEL);
        #endif

        DutyScan_Offset_Calibration(p);

        u2FailStatusByCh= DutyScan_Calibration_Flow(p, DutyScan_Calibration_K_CLK, 1);
        u2FailStatusByCh|= DutyScan_Calibration_Flow(p, DutyScan_Calibration_K_DQS, 1);

                #if APPLY_DQDQM_DUTY_CALIBRATION
        u2FailStatusByCh|= DutyScan_Calibration_Flow(p, DutyScan_Calibration_K_DQ, 0);
        u2FailStatusByCh|= DutyScan_Calibration_Flow(p, DutyScan_Calibration_K_DQM, 0);
#endif

        #if FT_DSIM_USED==0
            //restore to orignal value
            DramcRestoreRegisters(p, u4RegBackupAddress, sizeof(u4RegBackupAddress)/sizeof(unsigned int), TO_ONE_CHANNEL);
                #endif
            if (u2FailStatusByCh == 0)
                vSetCalibrationResult(p, DRAM_CALIBRATION_DUTY_SCAN, DRAM_OK);

        //Set K DQS MCK4X_DLY_EN and MCK4XB_DLY_EN again, this is especially for K DQS because other bit fields need to be restored.
        DQSDutyScan_SetDqsDelayCell(p, gcFinal_K_Duty_DQS_delay_cell, use_rev_bit);

            vIO32WriteFldAlign(DRAMC_REG_ADDR(DRAMC_REG_TX_SET0), 0, TX_SET0_TXRANK);
            vIO32WriteFldAlign(DRAMC_REG_ADDR(DRAMC_REG_TX_SET0), 0, TX_SET0_TXRANKFIX);

        /* cc add */
        vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_B0_DQ6), 0x0, B0_DQ6_RG_TX_ARDQ_LPBK_SEL_B0);
        vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_B1_DQ6), 0x0, B1_DQ6_RG_TX_ARDQ_LPBK_SEL_B1);
        vIO32WriteFldAlign(DRAMC_REG_ADDR(DDRPHY_REG_CA_CMD6), 0x0, CA_CMD6_RG_TX_ARCA_LPBK_SEL_CA);
    }
    mcSHOW_DBG_MSG("[DramcNewDutyCalibration] Done\n\n");
    vSetRank(p, u1backup_rank);
}
#endif

