/*
 * Copyright (C) 2012 Altera Corporation <www.altera.com>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *  - Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *  - Neither the name of the Altera Corporation nor the
 *    names of its contributors may be used to endorse or promote products
 *    derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL ALTERA CORPORATION BE LIABLE FOR ANY
 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include <common.h>
#include <asm/io.h>
#include <asm/errno.h>
#include <wait_bit.h>
#include "cadence_qspi.h"

#define CQSPI_REG_POLL_US			(1) /* 1us */
#define CQSPI_REG_RETRY				(10000)
#define CQSPI_POLL_IDLE_RETRY			(3)

#define CQSPI_FIFO_WIDTH			(4)

#define CQSPI_REG_SRAM_THRESHOLD_WORDS		(50)

/* Transfer mode */
#define CQSPI_INST_TYPE_SINGLE			(0)
#define CQSPI_INST_TYPE_DUAL			(1)
#define CQSPI_INST_TYPE_QUAD			(2)

#define CQSPI_STIG_DATA_LEN_MAX			(8)
#define CQSPI_INDIRECTTRIGGER_ADDR_MASK		(0xFFFFF)

#ifdef CONFIG_SPI_FLASH_MACRONIX
#define CQSPI_DUMMY_CLKS_PER_BYTE		(6)
#else
#define CQSPI_DUMMY_CLKS_PER_BYTE		(8)
#endif /* CONFIG_SPI_FLASH_MACRONIX */

#define CQSPI_DUMMY_BYTES_MAX			(4)


#define CQSPI_REG_SRAM_FILL_THRESHOLD	\
	((CQSPI_REG_SRAM_SIZE_WORD / 2) * CQSPI_FIFO_WIDTH)
/****************************************************************************
 * Controller's configuration and status register (offset from QSPI_BASE)
 ****************************************************************************/
#define	CQSPI_REG_CONFIG			0x00
#define	CQSPI_REG_CONFIG_CLK_POL_LSB		1
#define	CQSPI_REG_CONFIG_CLK_PHA_LSB		2
#define	CQSPI_REG_CONFIG_ENABLE_MASK		BIT(0)
#define	CQSPI_REG_CONFIG_DIRECT_MASK		BIT(7)
#define	CQSPI_REG_CONFIG_DECODE_MASK		BIT(9)
#define	CQSPI_REG_CONFIG_XIP_IMM_MASK		BIT(18)
#define	CQSPI_REG_CONFIG_CHIPSELECT_LSB		10
#define	CQSPI_REG_CONFIG_BAUD_LSB		19
#define	CQSPI_REG_CONFIG_IDLE_LSB		31
#define	CQSPI_REG_CONFIG_CHIPSELECT_MASK	0xF
#define	CQSPI_REG_CONFIG_BAUD_MASK		0xF

#define	CQSPI_REG_RD_INSTR			0x04
#define	CQSPI_REG_RD_INSTR_OPCODE_LSB		0
#define	CQSPI_REG_RD_INSTR_TYPE_INSTR_LSB	8
#define	CQSPI_REG_RD_INSTR_TYPE_ADDR_LSB	12
#define	CQSPI_REG_RD_INSTR_TYPE_DATA_LSB	16
#define	CQSPI_REG_RD_INSTR_MODE_EN_LSB		20
#define	CQSPI_REG_RD_INSTR_DUMMY_LSB		24
#define	CQSPI_REG_RD_INSTR_TYPE_INSTR_MASK	0x3
#define	CQSPI_REG_RD_INSTR_TYPE_ADDR_MASK	0x3
#define	CQSPI_REG_RD_INSTR_TYPE_DATA_MASK	0x3
#define	CQSPI_REG_RD_INSTR_DUMMY_MASK		0x1F

#define	CQSPI_REG_WR_INSTR			0x08
#define	CQSPI_REG_WR_INSTR_OPCODE_LSB		0

#define	CQSPI_REG_DELAY				0x0C
#define	CQSPI_REG_DELAY_TSLCH_LSB		0
#define	CQSPI_REG_DELAY_TCHSH_LSB		8
#define	CQSPI_REG_DELAY_TSD2D_LSB		16
#define	CQSPI_REG_DELAY_TSHSL_LSB		24
#define	CQSPI_REG_DELAY_TSLCH_MASK		0xFF
#define	CQSPI_REG_DELAY_TCHSH_MASK		0xFF
#define	CQSPI_REG_DELAY_TSD2D_MASK		0xFF
#define	CQSPI_REG_DELAY_TSHSL_MASK		0xFF

#define	CQSPI_READLCAPTURE			0x10
#define	CQSPI_READLCAPTURE_BYPASS_LSB		0
#define	CQSPI_READLCAPTURE_DELAY_LSB		1
#define	CQSPI_READLCAPTURE_DELAY_MASK		0xF

#define	CQSPI_REG_SIZE				0x14
#define	CQSPI_REG_SIZE_ADDRESS_LSB		0
#define	CQSPI_REG_SIZE_PAGE_LSB			4
#define	CQSPI_REG_SIZE_BLOCK_LSB		16
#define	CQSPI_REG_SIZE_ADDRESS_MASK		0xF
#define	CQSPI_REG_SIZE_PAGE_MASK		0xFFF
#define	CQSPI_REG_SIZE_BLOCK_MASK		0x3F

#define	CQSPI_REG_SRAMPARTITION			0x18
#define	CQSPI_REG_INDIRECTTRIGGER		0x1C

#define	CQSPI_REG_REMAP				0x24
#define	CQSPI_REG_MODE_BIT			0x28

#define	CQSPI_REG_SDRAMLEVEL			0x2C
#define	CQSPI_REG_SDRAMLEVEL_RD_LSB		0
#define	CQSPI_REG_SDRAMLEVEL_WR_LSB		16
#define	CQSPI_REG_SDRAMLEVEL_RD_MASK		0xFFFF
#define	CQSPI_REG_SDRAMLEVEL_WR_MASK		0xFFFF

#define	CQSPI_REG_IRQSTATUS			0x40
#define	CQSPI_REG_IRQMASK			0x44

#define	CQSPI_REG_INDIRECTRD			0x60
#define	CQSPI_REG_INDIRECTRD_START_MASK		BIT(0)
#define	CQSPI_REG_INDIRECTRD_CANCEL_MASK	BIT(1)
#define	CQSPI_REG_INDIRECTRD_INPROGRESS_MASK	BIT(2)
#define	CQSPI_REG_INDIRECTRD_DONE_MASK		BIT(5)

#define	CQSPI_REG_INDIRECTRDWATERMARK		0x64
#define	CQSPI_REG_INDIRECTRDSTARTADDR		0x68
#define	CQSPI_REG_INDIRECTRDBYTES		0x6C

#define	CQSPI_REG_CMDCTRL			0x90
#define	CQSPI_REG_CMDCTRL_EXECUTE_MASK		BIT(0)
#define	CQSPI_REG_CMDCTRL_INPROGRESS_MASK	BIT(1)
#define	CQSPI_REG_CMDCTRL_DUMMY_LSB		7
#define	CQSPI_REG_CMDCTRL_WR_BYTES_LSB		12
#define	CQSPI_REG_CMDCTRL_WR_EN_LSB		15
#define	CQSPI_REG_CMDCTRL_ADD_BYTES_LSB		16
#define	CQSPI_REG_CMDCTRL_ADDR_EN_LSB		19
#define	CQSPI_REG_CMDCTRL_RD_BYTES_LSB		20
#define	CQSPI_REG_CMDCTRL_RD_EN_LSB		23
#define	CQSPI_REG_CMDCTRL_OPCODE_LSB		24
#define	CQSPI_REG_CMDCTRL_DUMMY_MASK		0x1F
#define	CQSPI_REG_CMDCTRL_WR_BYTES_MASK		0x7
#define	CQSPI_REG_CMDCTRL_ADD_BYTES_MASK	0x3
#define	CQSPI_REG_CMDCTRL_RD_BYTES_MASK		0x7
#define	CQSPI_REG_CMDCTRL_OPCODE_MASK		0xFF

#define	CQSPI_REG_INDIRECTWR			0x70
#define	CQSPI_REG_INDIRECTWR_START_MASK		BIT(0)
#define	CQSPI_REG_INDIRECTWR_CANCEL_MASK	BIT(1)
#define	CQSPI_REG_INDIRECTWR_INPROGRESS_MASK	BIT(2)
#define	CQSPI_REG_INDIRECTWR_DONE_MASK		BIT(5)

#define	CQSPI_REG_INDIRECTWRWATERMARK		0x74
#define	CQSPI_REG_INDIRECTWRSTARTADDR		0x78
#define	CQSPI_REG_INDIRECTWRBYTES		0x7C
#define CQSPI_INDIRECT_TRIGGER_ADDR_RANGE_REG	0x80

#define	CQSPI_REG_CMDADDRESS			0x94
#define	CQSPI_REG_CMDREADDATALOWER		0xA0
#define	CQSPI_REG_CMDREADDATAUPPER		0xA4
#define	CQSPI_REG_CMDWRITEDATALOWER		0xA8
#define	CQSPI_REG_CMDWRITEDATAUPPER		0xAC

#define CQSPI_PHY_CONFIG_REG			0xB4
#define CQSPI_PHY_CONFIG_TX_DLL_DELAY_LSB	16
#define CQSPI_PHY_CONFIG_RESET_LSB		30
#define CQSPI_PHY_CONFIG_RESYNC_LSB		31
#define CQSPI_PHY_MASTER_INITIAL_DELAY_LSB	0
#define CQSPI_PHY_MASTER_CONTROL_REG		0xB8
#define CQSPI_DLL_OBSERVABLE_LOWER_REG		0xBC
#define DLL_OBSERVABLE_LOWER_DLL_LOCK_INC_MASK	0x1

#define QSPI_REF_CLK				400000000

#define CQSPI_REG_IS_IDLE(base)					\
	((readl(base + CQSPI_REG_CONFIG) >>		\
		CQSPI_REG_CONFIG_IDLE_LSB) & 0x1)

#define CQSPI_CAL_DELAY(tdelay_ns, tref_ns, tsclk_ns)		\
	((((tdelay_ns) - (tsclk_ns)) / (tref_ns)))

#define CQSPI_GET_RD_SRAM_LEVEL(reg_base)			\
	(((readl(reg_base + CQSPI_REG_SDRAMLEVEL)) >>	\
	CQSPI_REG_SDRAMLEVEL_RD_LSB) & CQSPI_REG_SDRAMLEVEL_RD_MASK)

#define CQSPI_GET_WR_SRAM_LEVEL(reg_base)			\
	(((readl(reg_base + CQSPI_REG_SDRAMLEVEL)) >>	\
	CQSPI_REG_SDRAMLEVEL_WR_LSB) & CQSPI_REG_SDRAMLEVEL_WR_MASK)

static unsigned int cadence_qspi_apb_cmd2addr(const unsigned char *addr_buf,
	unsigned int addr_width)
{
	unsigned int addr;

	addr = (addr_buf[0] << 16) | (addr_buf[1] << 8) | addr_buf[2];

	if (addr_width == 4)
		addr = (addr << 8) | addr_buf[3];

	return addr;
}

static unsigned int cadence_qspi_indac_addr(const unsigned char *addr_buf,
    unsigned int addr_width)
{
	unsigned int addr;

    addr = (addr_buf[0] << 16) | (addr_buf[1] << 8) | addr_buf[2];

	if (addr_width == 4)
		addr = (addr << 8) | addr_buf[3];
	
	return addr;
}

void cadence_qspi_apb_controller_enable(void *reg_base)
{
	unsigned int reg;
	reg = readl(reg_base + CQSPI_REG_CONFIG);
	reg |= CQSPI_REG_CONFIG_ENABLE_MASK;
	writel(reg, reg_base + CQSPI_REG_CONFIG);
	return;
}

void cadence_qspi_apb_controller_disable(void *reg_base)
{
	unsigned int reg;
	reg = readl(reg_base + CQSPI_REG_CONFIG);
	reg &= ~CQSPI_REG_CONFIG_ENABLE_MASK;
	writel(reg, reg_base + CQSPI_REG_CONFIG);
	return;
}

void disable_qspi_direct_access(void *reg_base)
{
	u32 reg;

	reg = readl(reg_base + CQSPI_REG_CONFIG);
	reg &= ~(CQSPI_REG_CONFIG_DIRECT_MASK);
	writel(reg, reg_base + CQSPI_REG_CONFIG);

	return;
}

void enable_qspi_direct_access(void *reg_base)
{
	u32 reg;

	reg = readl(reg_base + CQSPI_REG_CONFIG);
	reg |= CQSPI_REG_CONFIG_DIRECT_MASK;
	writel(reg, reg_base + CQSPI_REG_CONFIG);

	return;
}

/* Return 1 if idle, otherwise return 0 (busy). */
static unsigned int cadence_qspi_wait_idle(void *reg_base)
{
	unsigned int start, count = 0;
	/* timeout in unit of ms */
	unsigned int timeout = 5000;

	start = get_timer(0);
	for ( ; get_timer(start) < timeout ; ) {
		if (CQSPI_REG_IS_IDLE(reg_base))
			count++;
		else
			count = 0;
		/*
		 * Ensure the QSPI controller is in true idle state after
		 * reading back the same idle status consecutively
		 */
		if (count >= CQSPI_POLL_IDLE_RETRY)
			return 1;
	}

	/* Timeout, still in busy mode. */
	printf("QSPI: QSPI is still busy after poll for %d times.\n",
	       CQSPI_REG_RETRY);
	return 0;
}

void cadence_qspi_apb_readdata_capture(void *reg_base,
				unsigned int bypass, unsigned int delay)
{
	unsigned int reg;
	cadence_qspi_apb_controller_disable(reg_base);

	reg = readl(reg_base + CQSPI_READLCAPTURE);

	if (bypass)
		reg |= (1 << CQSPI_READLCAPTURE_BYPASS_LSB);
	else
		reg &= ~(1 << CQSPI_READLCAPTURE_BYPASS_LSB);

	reg &= ~(CQSPI_READLCAPTURE_DELAY_MASK
		<< CQSPI_READLCAPTURE_DELAY_LSB);

	reg |= ((delay & CQSPI_READLCAPTURE_DELAY_MASK)
		<< CQSPI_READLCAPTURE_DELAY_LSB);

	writel(reg, reg_base + CQSPI_READLCAPTURE);

	cadence_qspi_apb_controller_enable(reg_base);
	return;
}

void cadence_qspi_apb_config_baudrate_div(void *reg_base,
	unsigned int ref_clk_hz, unsigned int sclk_hz)
{
	unsigned int reg;
	unsigned int div;

	cadence_qspi_apb_controller_disable(reg_base);
	reg = readl(reg_base + CQSPI_REG_CONFIG);
	reg &= ~(CQSPI_REG_CONFIG_BAUD_MASK << CQSPI_REG_CONFIG_BAUD_LSB);

	div = ref_clk_hz / sclk_hz;

	if (div > 32)
		div = 32;

	/* Check if even number. */
	if ((div & 1)) {
		div = (div / 2);
	} else {
		if (ref_clk_hz % sclk_hz)
			/* ensure generated SCLK doesn't exceed user
			specified sclk_hz */
			div = (div / 2);
		else
			div = (div / 2) - 1;
	}

	debug("%s: ref_clk %dHz sclk %dHz Div 0x%x\n", __func__,
	      ref_clk_hz, sclk_hz, div);

	div = (div & CQSPI_REG_CONFIG_BAUD_MASK) << CQSPI_REG_CONFIG_BAUD_LSB;
	reg |= div;
	writel(reg, reg_base + CQSPI_REG_CONFIG);

	cadence_qspi_apb_controller_enable(reg_base);
	return;
}

static void cadence_qspi_readb(void *ahbbase, u8 *rxbuf,
                unsigned int bytes_to_read)
{
#ifdef CONFIG_SYS_BIG_ENDIAN
	int i, unaligned_bytes;
	u8 tmp[4];
	u8 *ptr = (u8 *) ahbbase;

	unaligned_bytes = bytes_to_read % 4;

	/* read all the data which are aligned */
	for (i = 0; i < bytes_to_read - unaligned_bytes; i++) {
		(*(u8 *)(rxbuf + i)) = readb(ptr + i);
	}

	/* access up to the 4-byte aligned data */
	for (i = 0; i < 4; i++)
		tmp[i] = readb(ptr);

	/* swap this data to the rxbuf */
	for (i = 0; i < unaligned_bytes; i++)
		rxbuf[bytes_to_read - unaligned_bytes + i] = tmp[4 - i - 1];
#else
	readsb(ahbbase, rxbuf, bytes_to_read);
#endif
}

static int cadence_qspi_writeb(void *ahbbase, const u8 *txbuf,
                unsigned int write_bytes)
{
#ifdef CONFIG_SYS_BIG_ENDIAN
	/* For BE systems, the issue arrieses due to the LE hardware.
	 * The writes to SRAM are in bytes and SRAM writes to flash
	 * in burst of halfwords and this leads to all sorts of
	 * endian swapping along the way. The only way to fix this
	 * is if we do extra bytes writes (force aligning) but that
	 * is DANGEROUS! Force the upper layers to write 4-byte aligned
	 */
	return -EINVAL;
#else
	writesb(ahbbase, txbuf, write_bytes);
#endif
}

static void cadence_qspi_readl(void *ahbbase, u8 *rxbuf,
				unsigned int bytes_to_read)
{
#ifdef CONFIG_SYS_BIG_ENDIAN
	int i;

	for (i = 0; i < bytes_to_read; i++)
		(*(u32 *)(rxbuf + i * 4)) = readl(ahbbase + i * 4);
#else
	readsl(ahbbase, rxbuf, bytes_to_read)
#endif
}

static void cadence_qspi_writel(void *ahbbase, const u8 *txbuf,
				unsigned int write_bytes)
{
#ifdef CONFIG_SYS_BIG_ENDIAN
	int i;

	for (i = 0; i < write_bytes; i++)
		writel((*(u32 *)(txbuf + i * 4)), ahbbase);
#else
	writesl(ahbbase, txbuf, write_bytes);
#endif
}

void cadence_qspi_apb_set_clk_mode(void *reg_base,
	unsigned int clk_pol, unsigned int clk_pha)
{
	unsigned int reg;

	cadence_qspi_apb_controller_disable(reg_base);
	reg = readl(reg_base + CQSPI_REG_CONFIG);
	reg &= ~(1 <<
		(CQSPI_REG_CONFIG_CLK_POL_LSB | CQSPI_REG_CONFIG_CLK_PHA_LSB));

	reg |= ((clk_pol & 0x1) << CQSPI_REG_CONFIG_CLK_POL_LSB);
	reg |= ((clk_pha & 0x1) << CQSPI_REG_CONFIG_CLK_PHA_LSB);

	writel(reg, reg_base + CQSPI_REG_CONFIG);

	cadence_qspi_apb_controller_enable(reg_base);
	return;
}

void cadence_qspi_apb_chipselect(void *reg_base,
	unsigned int chip_select, unsigned int decoder_enable)
{
	unsigned int reg;

	cadence_qspi_apb_controller_disable(reg_base);

	debug("%s : chipselect %d decode %d\n", __func__, chip_select,
	      decoder_enable);

	reg = readl(reg_base + CQSPI_REG_CONFIG);
	/* docoder */
	if (decoder_enable) {
		reg |= CQSPI_REG_CONFIG_DECODE_MASK;
	} else {
		reg &= ~CQSPI_REG_CONFIG_DECODE_MASK;
		/* Convert CS if without decoder.
		 * CS0 to 4b'1110
		 * CS1 to 4b'1101
		 * CS2 to 4b'1011
		 * CS3 to 4b'0111
		 */
		chip_select = 0xF & ~(1 << chip_select);
	}

	reg &= ~(CQSPI_REG_CONFIG_CHIPSELECT_MASK
			<< CQSPI_REG_CONFIG_CHIPSELECT_LSB);
	reg |= (chip_select & CQSPI_REG_CONFIG_CHIPSELECT_MASK)
			<< CQSPI_REG_CONFIG_CHIPSELECT_LSB;
	writel(reg, reg_base + CQSPI_REG_CONFIG);

	cadence_qspi_apb_controller_enable(reg_base);
	return;
}

void cadence_qspi_apb_delay(void *reg_base,
	unsigned int ref_clk, unsigned int sclk_hz,
	unsigned int tshsl_ns, unsigned int tsd2d_ns,
	unsigned int tchsh_ns, unsigned int tslch_ns)
{
	unsigned int ref_clk_ns;
	unsigned int sclk_ns;
	unsigned int tshsl, tchsh, tslch, tsd2d;
	unsigned int reg;

	cadence_qspi_apb_controller_disable(reg_base);

	/* Convert to ns. */
	ref_clk_ns = (1000000000) / ref_clk;

	/* Convert to ns. */
	sclk_ns = (1000000000) / sclk_hz;

	/* Plus 1 to round up 1 clock cycle. */
	tshsl = CQSPI_CAL_DELAY(tshsl_ns, ref_clk_ns, sclk_ns) + 1;
	tchsh = CQSPI_CAL_DELAY(tchsh_ns, ref_clk_ns, sclk_ns) + 1;
	tslch = CQSPI_CAL_DELAY(tslch_ns, ref_clk_ns, sclk_ns) + 1;
	tsd2d = CQSPI_CAL_DELAY(tsd2d_ns, ref_clk_ns, sclk_ns) + 1;

	reg = ((tshsl & CQSPI_REG_DELAY_TSHSL_MASK)
			<< CQSPI_REG_DELAY_TSHSL_LSB);
	reg |= ((tchsh & CQSPI_REG_DELAY_TCHSH_MASK)
			<< CQSPI_REG_DELAY_TCHSH_LSB);
	reg |= ((tslch & CQSPI_REG_DELAY_TSLCH_MASK)
			<< CQSPI_REG_DELAY_TSLCH_LSB);
	reg |= ((tsd2d & CQSPI_REG_DELAY_TSD2D_MASK)
			<< CQSPI_REG_DELAY_TSD2D_LSB);
	writel(reg, reg_base + CQSPI_REG_DELAY);

	cadence_qspi_apb_controller_enable(reg_base);
	return;
}

void cadence_qspi_apb_controller_init(struct cadence_spi_platdata *plat)
{
	unsigned long reg;
	unsigned long timeout;

	cadence_qspi_apb_controller_disable(plat->regbase);

#ifdef CONFIG_CADENCE_PHY_MODE
	/* phy mode initialization */
	reg = 0x00000000;
	writel(reg, plat->regbase + CQSPI_PHY_CONFIG_REG);
	reg = 0x14 << CQSPI_PHY_MASTER_INITIAL_DELAY_LSB;
	writel(reg, plat->regbase + CQSPI_PHY_MASTER_CONTROL_REG);
	reg = 0x4 << CQSPI_PHY_CONFIG_TX_DLL_DELAY_LSB;
	reg |= (0x1 << CQSPI_PHY_CONFIG_RESYNC_LSB);
	reg |= (0x1 << CQSPI_PHY_CONFIG_RESET_LSB);
	writel(reg, plat->regbase + CQSPI_PHY_CONFIG_REG);

	timeout = 0xffff;
	do {
		if (timeout-- == 0) {
			printf("timeout waiting for qspi dll lock\n");
			break;
		}
		reg = readl(plat->regbase + CQSPI_DLL_OBSERVABLE_LOWER_REG);

	} while ((reg & DLL_OBSERVABLE_LOWER_DLL_LOCK_INC_MASK)
			!= DLL_OBSERVABLE_LOWER_DLL_LOCK_INC_MASK);

	udelay(100);
#endif

	/* Configure the device size and address bytes */
	reg = readl(plat->regbase + CQSPI_REG_SIZE);
	/* Clear the previous value */
	reg &= ~(CQSPI_REG_SIZE_PAGE_MASK << CQSPI_REG_SIZE_PAGE_LSB);
	reg &= ~(CQSPI_REG_SIZE_BLOCK_MASK << CQSPI_REG_SIZE_BLOCK_LSB);
	reg |= (plat->page_size << CQSPI_REG_SIZE_PAGE_LSB);
	reg |= (plat->block_size << CQSPI_REG_SIZE_BLOCK_LSB);
	writel(reg, plat->regbase + CQSPI_REG_SIZE);

	/* Configure the remap address register, no remap */
	writel(0, plat->regbase + CQSPI_REG_REMAP);

	/* Indirect mode configurations */
	writel((plat->sram_size), plat->regbase + CQSPI_REG_SRAMPARTITION);

	/* Disable all interrupts */
	writel(0, plat->regbase + CQSPI_REG_IRQMASK);

	/* disable direct access mode */
	disable_qspi_direct_access(plat->regbase);
	
	/* re-enable apb controller */
	cadence_qspi_apb_controller_enable(plat->regbase);

	/* wait till qspi is idle  */
	while (!cadence_qspi_wait_idle(plat->regbase));
	udelay(10);

	return;
}

static int cadence_qspi_apb_exec_flash_cmd(void *reg_base,
	unsigned int reg)
{
	unsigned int retry = CQSPI_REG_RETRY;

	/* Write the CMDCTRL without start execution. */
	writel(reg, reg_base + CQSPI_REG_CMDCTRL);
	/* Start execute */
	reg |= CQSPI_REG_CMDCTRL_EXECUTE_MASK;
	writel(reg, reg_base + CQSPI_REG_CMDCTRL);

	while (retry--) {
		reg = readl(reg_base + CQSPI_REG_CMDCTRL);
		if ((reg & CQSPI_REG_CMDCTRL_INPROGRESS_MASK) == 0)
			break;
		udelay(1);
	}

	if (!retry) {
		printf("QSPI: flash command execution timeout\n");
		return -EIO;
	}

	/* Polling QSPI idle status. */
	while (!cadence_qspi_wait_idle(reg_base));

	return 0;
}

/* For command RDID, RDSR. */
int cadence_qspi_apb_command_read(void *reg_base,
	unsigned int cmdlen, const u8 *cmdbuf, unsigned int rxlen,
	u8 *rxbuf)
{
	unsigned int reg;
	unsigned int read_len;
	int status;
#ifdef CONFIG_SYS_BIG_ENDIAN
	int i;
	uint8_t *tmp;
#endif /* CONFIG_SYS_BIG_ENDIAN */

	if (!cmdlen || rxlen > CQSPI_STIG_DATA_LEN_MAX || rxbuf == NULL) {
		printf("QSPI: Invalid input arguments cmdlen %d rxlen %d\n",
		       cmdlen, rxlen);
		return -EINVAL;
	}

	reg = cmdbuf[0] << CQSPI_REG_CMDCTRL_OPCODE_LSB;

	reg |= (0x1 << CQSPI_REG_CMDCTRL_RD_EN_LSB);

	/* 0 means 1 byte. */
	reg |= (((rxlen - 1) & CQSPI_REG_CMDCTRL_RD_BYTES_MASK)
		<< CQSPI_REG_CMDCTRL_RD_BYTES_LSB);
	status = cadence_qspi_apb_exec_flash_cmd(reg_base, reg);
	if (status != 0)
		return status;

	reg = readl(reg_base + CQSPI_REG_CMDREADDATALOWER);
	/* Put the read value into rx_buf */
	read_len = (rxlen > 4) ? 4 : rxlen;

#ifndef CONFIG_SYS_BIG_ENDIAN
	memcpy(rxbuf, &reg, read_len);
#else
	tmp = (u8 *) &reg;
	for (i = 0; i < read_len; i++)
		rxbuf[i] = tmp[4 - i - 1];
#endif /* CONFIG_SYS_BIG_ENDIAN */
	
	rxbuf += read_len;
	if (rxlen > 4) {
		reg = readl(reg_base + CQSPI_REG_CMDREADDATAUPPER);

		read_len = rxlen - read_len;
#ifndef CONFIG_SYS_BIG_ENDIAN
		memcpy(rxbuf, &reg, read_len);
#else
		tmp = (u8 *) &reg;
		for (i = 0; i < read_len; i++)
			rxbuf[i] = tmp[4 - i - 1];
#endif
	}
	return 0;
}

/* For commands: WRSR, WREN, WRDI, CHIP_ERASE, BE, etc. */
int cadence_qspi_apb_command_write(void *reg_base, unsigned int cmdlen,
	const u8 *cmdbuf, unsigned int txlen,  const u8 *txbuf)
{
	unsigned int reg = 0;
	unsigned int addr_value;
	unsigned int wr_data;
	unsigned int wr_len;
	unsigned int status;
#ifdef CONFIG_SYS_BIG_ENDIAN
	int i;
	uint8_t *tmp;
#endif /* CONFIG_SYS_BIG_ENDIAN */

	if (!cmdlen || cmdlen > 5 || txlen > 8 || cmdbuf == NULL) {
		printf("QSPI: Invalid input arguments cmdlen %d txlen %d\n",
		       cmdlen, txlen);
		return -EINVAL;
	}

	reg |= cmdbuf[0] << CQSPI_REG_CMDCTRL_OPCODE_LSB;

	if (cmdlen == 4 || cmdlen == 5) {
		/* Command with address */
		reg |= (0x1 << CQSPI_REG_CMDCTRL_ADDR_EN_LSB);
		/* Number of bytes to write. */
		reg |= ((cmdlen - 2) & CQSPI_REG_CMDCTRL_ADD_BYTES_MASK)
			<< CQSPI_REG_CMDCTRL_ADD_BYTES_LSB;
		/* Get address */
		addr_value = cadence_qspi_apb_cmd2addr(&cmdbuf[1],
			cmdlen >= 5 ? 4 : 3);

		writel(addr_value, reg_base + CQSPI_REG_CMDADDRESS);
	}

	if (txlen) {
		/* writing data = yes */
		reg |= (0x1 << CQSPI_REG_CMDCTRL_WR_EN_LSB);
		reg |= ((txlen - 1) & CQSPI_REG_CMDCTRL_WR_BYTES_MASK)
			<< CQSPI_REG_CMDCTRL_WR_BYTES_LSB;

		wr_len = txlen > 4 ? 4 : txlen;

#ifdef CONFIG_SYS_BIG_ENDIAN
		tmp = (u8 *) &wr_data;
		for (i = 0; i < wr_len; i++)
			tmp[4 - i - 1] = txbuf[i];
#else
		memcpy(&wr_data, txbuf, wr_len);
#endif /* CONFIG_SYS_BIG_ENDIAN */

		writel(wr_data, reg_base +
			CQSPI_REG_CMDWRITEDATALOWER);

		if (txlen > 4) {
			txbuf += wr_len;
			wr_len = txlen - wr_len;
			memcpy(&wr_data, txbuf, wr_len);
			writel(wr_data, reg_base +
				CQSPI_REG_CMDWRITEDATAUPPER);
		}
	}

	/* Execute the command */
	status = cadence_qspi_apb_exec_flash_cmd(reg_base, reg);
	if (status != 0)
		return status;
	
	udelay(30);
	return 0;

}

/* Opcode + Address (3/4 bytes) + dummy bytes (0-4 bytes) */
int cadence_qspi_apb_indirect_read_setup(struct cadence_spi_platdata *plat,
	unsigned int cmdlen, const u8 *cmdbuf)
{
	unsigned int reg;
	unsigned int rd_reg;
	unsigned int addr_value;
	unsigned int dummy_clk;
	unsigned int dummy_bytes;
	unsigned int addr_bytes;

	/*
	 * Identify addr_byte. All NOR flash device drivers are using fast read
	 * which always expecting 1 dummy byte, 1 cmd byte and 3/4 addr byte.
	 * With that, the length is in value of 5 or 6. Only FRAM chip from
	 * ramtron using normal read (which won't need dummy byte).
	 * Unlikely NOR flash using normal read due to performance issue.
	 */
	if (cmdlen >= 5)
		/* to cater fast read where cmd + addr + dummy */
		addr_bytes = cmdlen - 2;
	else
		/* for normal read (only ramtron as of now) */
		addr_bytes = cmdlen - 1;

	/* Setup the indirect trigger address */
	writel(((u32)plat->ahbbase & CQSPI_INDIRECTTRIGGER_ADDR_MASK),
	       plat->regbase + CQSPI_REG_INDIRECTTRIGGER);

	/* Configure the opcode */
	rd_reg = cmdbuf[0] << CQSPI_REG_RD_INSTR_OPCODE_LSB;

#ifdef CONFIG_SPI_FLASH_QUAD
	/* Instruction and address at DQ0, data at DQ0-3. */
	rd_reg |= CQSPI_INST_TYPE_QUAD << CQSPI_REG_RD_INSTR_TYPE_DATA_LSB;
	rd_reg |= CQSPI_INST_TYPE_QUAD << CQSPI_REG_RD_INSTR_TYPE_ADDR_LSB;
#endif

	/* The remaining lenght is dummy bytes. */
	dummy_bytes = cmdlen - addr_bytes - 1;

	/* Get address */
#ifdef CONFIG_SYS_BIG_ENDIAN
	addr_value = cadence_qspi_apb_cmd2addr(&cmdbuf[1], 
				((dummy_bytes) ? addr_bytes - 1: addr_bytes));
#else
	addr_value = cadence_qspi_indac_addr(&cmdbuf[1], addr_bytes);
#endif /* CONFIG_SYS_BIG_ENDIAN */
	writel(addr_value, plat->regbase + CQSPI_REG_INDIRECTRDSTARTADDR);

	if (dummy_bytes) {
		if (dummy_bytes > CQSPI_DUMMY_BYTES_MAX)
			dummy_bytes = CQSPI_DUMMY_BYTES_MAX;

#ifndef CONFIG_QSPI_QUAD_MODE
		/* This configuration causes a byte skew in the 4-read 
		 * direction observed in Lantiq Cadence based QSPI IP
		 */
		rd_reg |= (1 << CQSPI_REG_RD_INSTR_MODE_EN_LSB);
#endif /* CONFIG_QSPI_QUAD_MODE */

#if defined(CONFIG_SPL_SPI_XIP) && defined(CONFIG_SPL_BUILD)
		writel(0x0, plat->regbase + CQSPI_REG_MODE_BIT);
#else
		writel(0xFF, plat->regbase + CQSPI_REG_MODE_BIT);
#endif
		/* Convert to clock cycles. */
		dummy_clk = dummy_bytes * CQSPI_DUMMY_CLKS_PER_BYTE;

#ifndef CONFIG_QSPI_QUAD_MODE
		/* Need to minus the mode byte (8 clocks). 
		 * In the Cadence IP for Intel chips, this causes
		 * two bytes shift on the line in 4-read mode
		 */
		dummy_clk -= CQSPI_DUMMY_CLKS_PER_BYTE;
#endif /* CONFIG_QSPI_QUAD_MODE */

		if (dummy_clk)
			rd_reg |= (dummy_clk & CQSPI_REG_RD_INSTR_DUMMY_MASK)
				<< CQSPI_REG_RD_INSTR_DUMMY_LSB;
	}
	writel(rd_reg, plat->regbase + CQSPI_REG_RD_INSTR);

	/* set device size */
	reg = readl(plat->regbase + CQSPI_REG_SIZE);
	reg &= ~CQSPI_REG_SIZE_ADDRESS_MASK;
#ifdef CONFIG_SPI_FLASH_QUAD
	/* addr_bytes contains dummy bytes
	 * The controller needs to know the
	 * exact dummy/addr bytes as described
	 * in the flash datasheet
	 */
	reg |= (addr_bytes - 1 - dummy_bytes);
#else
	reg |= (addr_bytes - 1);
#endif
	writel(reg, plat->regbase + CQSPI_REG_SIZE);
	return 0;
}

static u32 cadence_qspi_get_rd_sram_level(struct cadence_spi_platdata *plat)
{
	u32 reg = readl(plat->regbase + CQSPI_REG_SDRAMLEVEL);
	reg >>= CQSPI_REG_SDRAMLEVEL_RD_LSB;
	return reg & CQSPI_REG_SDRAMLEVEL_RD_MASK;
}

static int cadence_qspi_wait_for_data(struct cadence_spi_platdata *plat)
{
	unsigned int timeout = 10000;
	u32 reg;

	while (timeout--) {
		reg = cadence_qspi_get_rd_sram_level(plat);
		if (reg)
			return reg;
		udelay(1);
	}

	return -ETIMEDOUT;
}

int cadence_qspi_apb_indirect_read_execute(struct cadence_spi_platdata *plat,
	unsigned int n_rx, u8 *rxbuf)
{
	unsigned int remaining = n_rx;
	unsigned int bytes_to_read = 0;
	int ret;

#ifdef CONFIG_SYS_BIG_ENDIAN
	/* Ugly work around for unaligned access to flash in BE mode.
	 * So we trick the hardware to read 4 bytes aligned data so that
	 * we can access the last byte read by the hardware in the SRAM
	 * and do the swap for the last half-word. The hardware is in LE
	 * mode, hence data read for half-word will not be what we want
	 * it to be at the correct memory area
	*/
	unsigned int unaligned_read;
	if (n_rx % 4) {
		unaligned_read = (n_rx + (4 - (n_rx % 4)));
		writel(unaligned_read, plat->regbase + CQSPI_REG_INDIRECTRDBYTES);
	} else {
		writel(n_rx, plat->regbase + CQSPI_REG_INDIRECTRDBYTES);
	}
#else
	writel(n_rx, plat->regbase + CQSPI_REG_INDIRECTRDBYTES);
#endif
	/* set the trigger address range correctly else the 
 	 * sram level will go beyond the read fifo level at 
 	 * high freq. causing an exception during reads 
 	 */
	writel(0x8, plat->regbase + CQSPI_INDIRECT_TRIGGER_ADDR_RANGE_REG); 
	/* Start the indirect read transfer */
	writel(CQSPI_REG_INDIRECTRD_START_MASK,
	       plat->regbase + CQSPI_REG_INDIRECTRD);

	while (remaining > 0) {
		ret = cadence_qspi_wait_for_data(plat);
		if (ret < 0) {
			printf("Indirect write timed out (%i)\n", ret);
			goto failrd;
		}

		bytes_to_read = ret;

		while (bytes_to_read != 0) {
			bytes_to_read *= CQSPI_FIFO_WIDTH;
			bytes_to_read = bytes_to_read > remaining ?
					remaining : bytes_to_read;
			bytes_to_read = bytes_to_read > 64 ?
					64 : bytes_to_read;
			/* Handle non-4-byte aligned access to avoid data abort. */
			if (((uintptr_t)rxbuf % 4) || (bytes_to_read % 4))
				cadence_qspi_readb(plat->ahbbase, rxbuf, bytes_to_read);
			else
				cadence_qspi_readl(plat->ahbbase, rxbuf, bytes_to_read >> 2);

			rxbuf += bytes_to_read;
			remaining -= bytes_to_read;
			bytes_to_read = cadence_qspi_get_rd_sram_level(plat);
		}
	}

	/* Check indirect done status */
	ret = wait_for_bit("QSPI", plat->regbase + CQSPI_REG_INDIRECTRD,
			   CQSPI_REG_INDIRECTRD_DONE_MASK, 1, 10, 0);
	if (ret) {
		printf("Indirect read completion error (%i)\n", ret);
		goto failrd;
	}

	/* wait till qspi bus is ready */
	while (!cadence_qspi_wait_idle(plat->regbase));

	/* Clear indirect completion status */
	writel(CQSPI_REG_INDIRECTRD_DONE_MASK,
	       plat->regbase + CQSPI_REG_INDIRECTRD);

	return 0;

failrd:
	/* Cancel the indirect read */
	writel(CQSPI_REG_INDIRECTRD_CANCEL_MASK,
	       plat->regbase + CQSPI_REG_INDIRECTRD);
	return ret;
}

/* Opcode + Address (3/4 bytes) */
int cadence_qspi_apb_indirect_write_setup(struct cadence_spi_platdata *plat,
	unsigned int cmdlen, const u8 *cmdbuf)
{
	unsigned int reg;
	unsigned int addr_bytes = cmdlen > 4 ? 4 : 3;

	if (cmdlen < 4 || cmdbuf == NULL) {
		printf("QSPI: Invalid input argument, len %d cmdbuf 0x%08x\n",
		       cmdlen, (unsigned int)cmdbuf);
		return -EINVAL;
	}
	/* Setup the indirect trigger address */
	writel(((u32)plat->ahbbase & CQSPI_INDIRECTTRIGGER_ADDR_MASK),
	       plat->regbase + CQSPI_REG_INDIRECTTRIGGER);

	/* Configure the opcode */
	reg = cmdbuf[0] << CQSPI_REG_WR_INSTR_OPCODE_LSB;
#ifdef CONFIG_SPI_FLASH_QUAD
	reg |= CQSPI_INST_TYPE_QUAD << CQSPI_REG_RD_INSTR_TYPE_DATA_LSB;
	reg |= CQSPI_INST_TYPE_QUAD << CQSPI_REG_RD_INSTR_TYPE_ADDR_LSB;
#endif
	writel(reg, plat->regbase + CQSPI_REG_WR_INSTR);

	/* Setup write address. */
#ifdef CONFIG_SYS_BIG_ENDIAN
	reg = cadence_qspi_indac_addr(&cmdbuf[1], addr_bytes);
#else
	reg = cadence_qspi_apb_cmd2addr(&cmdbuf[1], addr_bytes);
#endif /* CONFIG_SYS_BIG_ENDIAN */
	writel(reg, plat->regbase + CQSPI_REG_INDIRECTWRSTARTADDR);

	reg = readl(plat->regbase + CQSPI_REG_SIZE);
	reg &= ~CQSPI_REG_SIZE_ADDRESS_MASK;
	reg |= (addr_bytes - 1);
	writel(reg, plat->regbase + CQSPI_REG_SIZE);
	return 0;
}

int cadence_qspi_apb_indirect_write_execute(struct cadence_spi_platdata *plat,
	unsigned int n_tx, const u8 *txbuf)
{
	unsigned int page_size = plat->page_size;
	unsigned int remaining = n_tx;
	unsigned int write_bytes;
	int ret;

	/* Configure the indirect read transfer bytes */
	writel(n_tx, plat->regbase + CQSPI_REG_INDIRECTWRBYTES);

	/* Start the indirect write transfer */
	writel(CQSPI_REG_INDIRECTWR_START_MASK,
	       plat->regbase + CQSPI_REG_INDIRECTWR);

	while (remaining > 0) {
		write_bytes = remaining > page_size ? page_size : remaining;
		/* Handle non-4-byte aligned access to avoid data abort. */
		if (((uintptr_t)txbuf % 4) || (write_bytes % 4)) {
			ret = cadence_qspi_writeb(plat->ahbbase, txbuf, write_bytes);
			if (ret) {
				printf("Unaligned write failed! For BE systems, avoid unaligned writes\n");
				goto failwr;
			}
		} else
			cadence_qspi_writel(plat->ahbbase, txbuf, write_bytes >> 2);

		ret = wait_for_bit("QSPI", plat->regbase + CQSPI_REG_SDRAMLEVEL,
				   CQSPI_REG_SDRAMLEVEL_WR_MASK <<
				   CQSPI_REG_SDRAMLEVEL_WR_LSB, 0, 10, 0);
		if (ret) {
			printf("Indirect write timed out (%i)\n", ret);
			goto failwr;
		}

		txbuf += write_bytes;
		remaining -= write_bytes;
	}

	/* Check indirect write status field */
	ret = wait_for_bit("QSPI", plat->regbase + CQSPI_REG_INDIRECTWR,
				CQSPI_REG_INDIRECTWR_INPROGRESS_MASK, 0, 10, 0);
	if (ret) {
		printf("Indirect write completion error (%i)\n", ret);
		goto failwr;
	}

	/* wait till SPI bus is ready */
	while (!cadence_qspi_wait_idle(plat->regbase));

	/* Clear indirect completion status */
	writel(CQSPI_REG_INDIRECTWR_DONE_MASK,
	       plat->regbase + CQSPI_REG_INDIRECTWR);

	return 0;

failwr:
	/* Cancel the indirect write */
	writel(CQSPI_REG_INDIRECTWR_CANCEL_MASK,
	       plat->regbase + CQSPI_REG_INDIRECTWR);
	return ret;
}

void cadence_qspi_apb_enter_xip(void *reg_base, char xip_dummy)
{
	unsigned int reg;

	/* enter XiP mode immediately and enable direct mode */
	reg = readl(reg_base + CQSPI_REG_CONFIG);
	reg |= CQSPI_REG_CONFIG_ENABLE_MASK;
	reg |= CQSPI_REG_CONFIG_DIRECT_MASK;
	reg |= CQSPI_REG_CONFIG_XIP_IMM_MASK;
	writel(reg, reg_base + CQSPI_REG_CONFIG);

	/* keep the XiP mode */
	writel(xip_dummy, reg_base + CQSPI_REG_MODE_BIT);

	/* Enable mode bit at devrd */
	reg = readl(reg_base + CQSPI_REG_RD_INSTR);
	reg |= (1 << CQSPI_REG_RD_INSTR_MODE_EN_LSB);
	writel(reg, reg_base + CQSPI_REG_RD_INSTR);
}
