/* Copyright (c) 2008-2013, The Linux Foundation. All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 and
 * only version 2 as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 */
/*
 * SPI driver for Qualcomm MSM platforms
 *
 */
#include <linux/version.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/spinlock.h>
#include <linux/list.h>
#include <linux/irq.h>
#include <linux/platform_device.h>
#include <linux/spi/spi.h>
#include <linux/interrupt.h>
#include <linux/err.h>
#include <linux/clk.h>
#include <linux/delay.h>
#include <linux/workqueue.h>
#include <linux/io.h>
#include <linux/debugfs.h>
#include <mach/msm_spi.h>
#include <linux/dma-mapping.h>
#include <linux/sched.h>
#include <mach/dma.h>
#include <asm/atomic.h>
#include <linux/mutex.h>
#include <linux/gpio.h>
#include <linux/remote_spinlock.h>
#include <linux/pm_qos.h>
#include <linux/of.h>
#include <linux/of_gpio.h>
#include <linux/pm_runtime.h>
#include "spi_qsd.h"

static int msm_spi_pm_resume_runtime(struct device *device);
static int msm_spi_pm_suspend_runtime(struct device *device);

static inline int msm_spi_configure_gsbi(struct msm_spi *dd,
					struct platform_device *pdev)
{
	struct resource *resource;
	unsigned long   gsbi_mem_phys_addr;
	size_t          gsbi_mem_size;
	void __iomem    *gsbi_base;

	resource  = platform_get_resource(pdev, IORESOURCE_MEM, 1);
	if (!resource)
		return 0;

	gsbi_mem_phys_addr = resource->start;
	gsbi_mem_size = resource_size(resource);
	if (!devm_request_mem_region(&pdev->dev, gsbi_mem_phys_addr,
					gsbi_mem_size, SPI_DRV_NAME))
		return -ENXIO;

	gsbi_base = devm_ioremap(&pdev->dev, gsbi_mem_phys_addr,
					gsbi_mem_size);
	if (!gsbi_base)
		return -ENXIO;

	/* Set GSBI to SPI mode */
	writel_relaxed(GSBI_SPI_CONFIG, gsbi_base + GSBI_CTRL_REG);

	return 0;
}

static inline void msm_spi_register_init(struct msm_spi *dd)
{
	writel_relaxed(0x00000001, dd->base + SPI_SW_RESET);
	msm_spi_set_state(dd, SPI_OP_STATE_RESET);
	writel_relaxed(0x00000000, dd->base + SPI_OPERATIONAL);
	writel_relaxed(0x00000000, dd->base + SPI_CONFIG);
	writel_relaxed(0x00000000, dd->base + SPI_IO_MODES);
	if (dd->qup_ver)
		writel_relaxed(0x00000000, dd->base + QUP_OPERATIONAL_MASK);
}

static inline int msm_spi_request_gpios(struct msm_spi *dd)
{
	int i;
	int result = 0;

	for (i = 0; i < ARRAY_SIZE(spi_rsrcs); ++i) {
		if (dd->spi_gpios[i] >= 0) {
			result = gpio_request(dd->spi_gpios[i], spi_rsrcs[i]);
			if (result) {
				dev_err(dd->dev, "%s: gpio_request for pin %d "
					"failed with error %d\n", __func__,
					dd->spi_gpios[i], result);
				goto error;
			}
		}
	}
	return 0;

error:
	for (; --i >= 0;) {
		if (dd->spi_gpios[i] >= 0)
			gpio_free(dd->spi_gpios[i]);
	}
	return result;
}

static inline void msm_spi_free_gpios(struct msm_spi *dd)
{
	int i;

	for (i = 0; i < ARRAY_SIZE(spi_rsrcs); ++i) {
		if (dd->spi_gpios[i] >= 0)
			gpio_free(dd->spi_gpios[i]);
	}

	for (i = 0; i < ARRAY_SIZE(spi_cs_rsrcs); ++i) {
		if (dd->cs_gpios[i].valid) {
			gpio_free(dd->cs_gpios[i].gpio_num);
			dd->cs_gpios[i].valid = 0;
		}
	}
}

/**
 * msm_spi_clk_max_rate: finds the nearest lower rate for a clk
 * @clk the clock for which to find nearest lower rate
 * @rate clock frequency in Hz
 * @return nearest lower rate or negative error value
 *
 * Public clock API extends clk_round_rate which is a ceiling function. This
 * function is a floor function implemented as a binary search using the
 * ceiling function.
 */
static long msm_spi_clk_max_rate(struct clk *clk, unsigned long rate)
{
	long lowest_available, nearest_low, step_size, cur;
	long step_direction = -1;
	long guess = rate;
	int  max_steps = 10;

	cur =  clk_round_rate(clk, rate);
	if (cur == rate)
		return rate;

	/* if we got here then: cur > rate */
	lowest_available =  clk_round_rate(clk, 0);
	if (lowest_available > rate)
		return -EINVAL;

	step_size = (rate - lowest_available) >> 1;
	nearest_low = lowest_available;

	while (max_steps-- && step_size) {
		guess += step_size * step_direction;

		cur =  clk_round_rate(clk, guess);

		if ((cur < rate) && (cur > nearest_low))
			nearest_low = cur;

		/*
		 * if we stepped too far, then start stepping in the other
		 * direction with half the step size
		 */
		if (((cur > rate) && (step_direction > 0))
		 || ((cur < rate) && (step_direction < 0))) {
			step_direction = -step_direction;
			step_size >>= 1;
		 }
	}
	return nearest_low;
}

static void msm_spi_clock_set(struct msm_spi *dd, int speed)
{
	long rate;
	int rc;

	rate = msm_spi_clk_max_rate(dd->clk, speed);
	if (rate < 0) {
		dev_err(dd->dev,
		"%s: no match found for requested clock frequency:%d",
			__func__, speed);
		return;
	}

	rc = clk_set_rate(dd->clk, rate);
	if (!rc)
		dd->clock_speed = rate;
}

static int msm_spi_calculate_size(int *fifo_size,
				  int *block_size,
				  int block,
				  int mult)
{
	int words;

	switch (block) {
	case 0:
		words = 1; /* 4 bytes */
		break;
	case 1:
		words = 4; /* 16 bytes */
		break;
	case 2:
		words = 8; /* 32 bytes */
		break;
	default:
		return -EINVAL;
	}

	switch (mult) {
	case 0:
		*fifo_size = words * 2;
		break;
	case 1:
		*fifo_size = words * 4;
		break;
	case 2:
		*fifo_size = words * 8;
		break;
	case 3:
		*fifo_size = words * 16;
		break;
	default:
		return -EINVAL;
	}

	*block_size = words * sizeof(u32); /* in bytes */
	return 0;
}

static void get_next_transfer(struct msm_spi *dd)
{
	struct spi_transfer *t = dd->cur_transfer;

	if (t->transfer_list.next != &dd->cur_msg->transfers) {
		dd->cur_transfer = list_entry(t->transfer_list.next,
					      struct spi_transfer,
					      transfer_list);
		dd->write_buf          = dd->cur_transfer->tx_buf;
		dd->read_buf           = dd->cur_transfer->rx_buf;
	}
}

static void __init msm_spi_calculate_fifo_size(struct msm_spi *dd)
{
	u32 spi_iom;
	int block;
	int mult;

	spi_iom = readl_relaxed(dd->base + SPI_IO_MODES);

	block = (spi_iom & SPI_IO_M_INPUT_BLOCK_SIZE) >> INPUT_BLOCK_SZ_SHIFT;
	mult = (spi_iom & SPI_IO_M_INPUT_FIFO_SIZE) >> INPUT_FIFO_SZ_SHIFT;
	if (msm_spi_calculate_size(&dd->input_fifo_size, &dd->input_block_size,
				   block, mult)) {
		goto fifo_size_err;
	}

	block = (spi_iom & SPI_IO_M_OUTPUT_BLOCK_SIZE) >> OUTPUT_BLOCK_SZ_SHIFT;
	mult = (spi_iom & SPI_IO_M_OUTPUT_FIFO_SIZE) >> OUTPUT_FIFO_SZ_SHIFT;
	if (msm_spi_calculate_size(&dd->output_fifo_size,
				   &dd->output_block_size, block, mult)) {
		goto fifo_size_err;
	}
	/* DM mode is not available for this block size */
	if (dd->input_block_size == 4 || dd->output_block_size == 4)
		dd->use_dma = 0;

	if (dd->use_dma) {
		dd->input_burst_size = max(dd->input_block_size,
					DM_BURST_SIZE);
		dd->output_burst_size = max(dd->output_block_size,
					DM_BURST_SIZE);
	}
	return;

fifo_size_err:
	dd->use_dma = 0;
	pr_err("%s: invalid FIFO size, SPI_IO_MODES=0x%x\n", __func__, spi_iom);
	return;
}

static void msm_spi_read_word_from_fifo(struct msm_spi *dd)
{
	u32   data_in;
	int   i;
	int   shift;

	data_in = readl_relaxed(dd->base + SPI_INPUT_FIFO);
	if (dd->read_buf) {
		for (i = 0; (i < dd->bytes_per_word) &&
			     dd->rx_bytes_remaining; i++) {
			/* The data format depends on bytes_per_word:
			   4 bytes: 0x12345678
			   3 bytes: 0x00123456
			   2 bytes: 0x00001234
			   1 byte : 0x00000012
			*/
			shift = 8 * (dd->bytes_per_word - i - 1);
			*dd->read_buf++ = (data_in & (0xFF << shift)) >> shift;
			dd->rx_bytes_remaining--;
		}
	} else {
		if (dd->rx_bytes_remaining >= dd->bytes_per_word)
			dd->rx_bytes_remaining -= dd->bytes_per_word;
		else
			dd->rx_bytes_remaining = 0;
	}

	dd->read_xfr_cnt++;
	if (dd->multi_xfr) {
		if (!dd->rx_bytes_remaining)
			dd->read_xfr_cnt = 0;
		else if ((dd->read_xfr_cnt * dd->bytes_per_word) ==
						dd->read_len) {
			struct spi_transfer *t = dd->cur_rx_transfer;
			if (t->transfer_list.next != &dd->cur_msg->transfers) {
				t = list_entry(t->transfer_list.next,
						struct spi_transfer,
						transfer_list);
				dd->read_buf = t->rx_buf;
				dd->read_len = t->len;
				dd->read_xfr_cnt = 0;
				dd->cur_rx_transfer = t;
			}
		}
	}
}

static inline bool msm_spi_is_valid_state(struct msm_spi *dd)
{
	u32 spi_op = readl_relaxed(dd->base + SPI_STATE);

	return spi_op & SPI_OP_STATE_VALID;
}

static inline void msm_spi_udelay(unsigned long delay_usecs)
{
	/*
	 * For smaller values of delay, context switch time
	 * would negate the usage of usleep
	 */
	if (delay_usecs > 20)
		usleep_range(delay_usecs, delay_usecs);
	else if (delay_usecs)
		udelay(delay_usecs);
}

static inline int msm_spi_wait_valid(struct msm_spi *dd)
{
	unsigned long delay = 0;
	unsigned long timeout = 0;

	if (dd->clock_speed == 0)
		return -EINVAL;
	/*
	 * Based on the SPI clock speed, sufficient time
	 * should be given for the SPI state transition
	 * to occur
	 */
	delay = (10 * USEC_PER_SEC) / dd->clock_speed;
	/*
	 * For small delay values, the default timeout would
	 * be one jiffy
	 */
	if (delay < SPI_DELAY_THRESHOLD)
		delay = SPI_DELAY_THRESHOLD;

	/* Adding one to round off to the nearest jiffy */
	timeout = jiffies + msecs_to_jiffies(delay * SPI_DEFAULT_TIMEOUT) + 1;
	while (!msm_spi_is_valid_state(dd)) {
		if (time_after(jiffies, timeout)) {
			if (!msm_spi_is_valid_state(dd)) {
				if (dd->cur_msg)
					dd->cur_msg->status = -EIO;
				dev_err(dd->dev, "%s: SPI operational state"
					"not valid\n", __func__);
				return -ETIMEDOUT;
			} else
				return 0;
		}
		msm_spi_udelay(delay);
	}
	return 0;
}

static inline int msm_spi_set_state(struct msm_spi *dd,
				    enum msm_spi_state state)
{
	enum msm_spi_state cur_state;
	if (msm_spi_wait_valid(dd))
		return -EIO;
	cur_state = readl_relaxed(dd->base + SPI_STATE);
	/* Per spec:
	   For PAUSE_STATE to RESET_STATE, two writes of (10) are required */
	if (((cur_state & SPI_OP_STATE) == SPI_OP_STATE_PAUSE) &&
			(state == SPI_OP_STATE_RESET)) {
		writel_relaxed(SPI_OP_STATE_CLEAR_BITS, dd->base + SPI_STATE);
		writel_relaxed(SPI_OP_STATE_CLEAR_BITS, dd->base + SPI_STATE);
	} else {
		writel_relaxed((cur_state & ~SPI_OP_STATE) | state,
		       dd->base + SPI_STATE);
	}
	if (msm_spi_wait_valid(dd))
		return -EIO;

	return 0;
}

static inline void msm_spi_add_configs(struct msm_spi *dd, u32 *config, int n)
{
	*config &= ~(SPI_NO_INPUT|SPI_NO_OUTPUT);

	if (n != (*config & SPI_CFG_N))
		*config = (*config & ~SPI_CFG_N) | n;

	if ((dd->mode == SPI_DMOV_MODE) && (!dd->read_len)) {
		if (dd->read_buf == NULL)
			*config |= SPI_NO_INPUT;
		if (dd->write_buf == NULL)
			*config |= SPI_NO_OUTPUT;
	}
}

static void msm_spi_set_config(struct msm_spi *dd, int bpw)
{
	u32 spi_config;

	spi_config = readl_relaxed(dd->base + SPI_CONFIG);

	if (dd->cur_msg->spi->mode & SPI_CPHA)
		spi_config &= ~SPI_CFG_INPUT_FIRST;
	else
		spi_config |= SPI_CFG_INPUT_FIRST;
	if (dd->cur_msg->spi->mode & SPI_LOOP)
		spi_config |= SPI_CFG_LOOPBACK;
	else
		spi_config &= ~SPI_CFG_LOOPBACK;
	msm_spi_add_configs(dd, &spi_config, bpw-1);
	writel_relaxed(spi_config, dd->base + SPI_CONFIG);
	msm_spi_set_qup_config(dd, bpw);
}

static void msm_spi_setup_dm_transfer(struct msm_spi *dd)
{
	dmov_box *box;
	int bytes_to_send, bytes_sent;
	int tx_num_rows, rx_num_rows;
	u32 num_transfers;

	atomic_set(&dd->rx_irq_called, 0);
	atomic_set(&dd->tx_irq_called, 0);
	if (dd->write_len && !dd->read_len) {
		/* WR-WR transfer */
		bytes_sent = dd->cur_msg_len - dd->tx_bytes_remaining;
		dd->write_buf = dd->temp_buf;
	} else {
		bytes_sent = dd->cur_transfer->len - dd->tx_bytes_remaining;
		/* For WR-RD transfer, bytes_sent can be negative */
		if (bytes_sent < 0)
			bytes_sent = 0;
	}
	/* We'll send in chunks of SPI_MAX_LEN if larger than
	 * 4K bytes for targets that have only 12 bits in
	 * QUP_MAX_OUTPUT_CNT register. If the target supports
	 * more than 12bits then we send the data in chunks of
	 * the infinite_mode value that is defined in the
	 * corresponding board file.
	 */
	if (!dd->pdata->infinite_mode)
		dd->max_trfr_len = SPI_MAX_LEN;
	else
		dd->max_trfr_len = (dd->pdata->infinite_mode) *
			   (dd->bytes_per_word);

	bytes_to_send = min_t(u32, dd->tx_bytes_remaining,
			      dd->max_trfr_len);

	dd->cur_msg->actual_length = bytes_to_send;
	num_transfers = DIV_ROUND_UP(bytes_to_send, dd->bytes_per_word);
	dd->tx_unaligned_len = bytes_to_send % dd->output_burst_size;
	dd->rx_unaligned_len = bytes_to_send % dd->input_burst_size;
	tx_num_rows = bytes_to_send / dd->output_burst_size;
	rx_num_rows = bytes_to_send / dd->input_burst_size;

	dd->mode = SPI_DMOV_MODE;

	if (tx_num_rows) {
		/* src in 16 MSB, dst in 16 LSB */
		box = &dd->tx_dmov_cmd->box;
		box->src_row_addr = dd->cur_transfer->tx_dma + bytes_sent;
		box->src_dst_len
			= (dd->output_burst_size << 16) | dd->output_burst_size;
		box->num_rows = (tx_num_rows << 16) | tx_num_rows;
		box->row_offset = (dd->output_burst_size << 16) | 0;

		dd->tx_dmov_cmd->cmd_ptr = CMD_PTR_LP |
				   DMOV_CMD_ADDR(dd->tx_dmov_cmd_dma +
				   offsetof(struct spi_dmov_cmd, box));
	} else {
		dd->tx_dmov_cmd->cmd_ptr = CMD_PTR_LP |
				   DMOV_CMD_ADDR(dd->tx_dmov_cmd_dma +
				   offsetof(struct spi_dmov_cmd, single_pad));
	}

	if (rx_num_rows) {
		/* src in 16 MSB, dst in 16 LSB */
		box = &dd->rx_dmov_cmd->box;
		box->dst_row_addr = dd->cur_transfer->rx_dma + bytes_sent;
		box->src_dst_len
			= (dd->input_burst_size << 16) | dd->input_burst_size;
		box->num_rows = (rx_num_rows << 16) | rx_num_rows;
		box->row_offset = (0 << 16) | dd->input_burst_size;

		dd->rx_dmov_cmd->cmd_ptr = CMD_PTR_LP |
				   DMOV_CMD_ADDR(dd->rx_dmov_cmd_dma +
				   offsetof(struct spi_dmov_cmd, box));
	} else {
		dd->rx_dmov_cmd->cmd_ptr = CMD_PTR_LP |
				   DMOV_CMD_ADDR(dd->rx_dmov_cmd_dma +
				   offsetof(struct spi_dmov_cmd, single_pad));
	}

	if (!dd->tx_unaligned_len) {
		dd->tx_dmov_cmd->box.cmd |= CMD_LC;
	} else {
		dmov_s *tx_cmd = &(dd->tx_dmov_cmd->single_pad);
		u32 tx_offset = dd->cur_transfer->len - dd->tx_unaligned_len;

		if ((dd->multi_xfr) && (dd->read_len <= 0))
			tx_offset = dd->cur_msg_len - dd->tx_unaligned_len;

		dd->tx_dmov_cmd->box.cmd &= ~CMD_LC;

		memset(dd->tx_padding, 0, dd->output_burst_size);
		if (dd->write_buf)
			memcpy(dd->tx_padding, dd->write_buf + tx_offset,
			       dd->tx_unaligned_len);

		tx_cmd->src = dd->tx_padding_dma;
		tx_cmd->len = dd->output_burst_size;
	}

	if (!dd->rx_unaligned_len) {
		dd->rx_dmov_cmd->box.cmd |= CMD_LC;
	} else {
		dmov_s *rx_cmd = &(dd->rx_dmov_cmd->single_pad);
		dd->rx_dmov_cmd->box.cmd &= ~CMD_LC;

		memset(dd->rx_padding, 0, dd->input_burst_size);
		rx_cmd->dst = dd->rx_padding_dma;
		rx_cmd->len = dd->input_burst_size;
	}

	/* This also takes care of the padding dummy buf
	   Since this is set to the correct length, the
	   dummy bytes won't be actually sent */
	if (dd->multi_xfr) {
		u32 write_transfers = 0;
		u32 read_transfers = 0;

		if (dd->write_len > 0) {
			write_transfers = DIV_ROUND_UP(dd->write_len,
						       dd->bytes_per_word);
			writel_relaxed(write_transfers,
				       dd->base + SPI_MX_OUTPUT_COUNT);
		}
		if (dd->read_len > 0) {
			/*
			 *  The read following a write transfer must take
			 *  into account, that the bytes pertaining to
			 *  the write transfer needs to be discarded,
			 *  before the actual read begins.
			 */
			read_transfers = DIV_ROUND_UP(dd->read_len +
						      dd->write_len,
						      dd->bytes_per_word);
			writel_relaxed(read_transfers,
				       dd->base + SPI_MX_INPUT_COUNT);
		}
	} else {
		if (dd->write_buf)
			writel_relaxed(num_transfers,
				       dd->base + SPI_MX_OUTPUT_COUNT);
		if (dd->read_buf)
			writel_relaxed(num_transfers,
				       dd->base + SPI_MX_INPUT_COUNT);
	}
}

static void msm_spi_enqueue_dm_commands(struct msm_spi *dd)
{
	dma_coherent_pre_ops();
	if (dd->write_buf)
		msm_dmov_enqueue_cmd(dd->tx_dma_chan, &dd->tx_hdr);
	if (dd->read_buf)
		msm_dmov_enqueue_cmd(dd->rx_dma_chan, &dd->rx_hdr);
}

/* SPI core on targets that does not support infinite mode can send
   maximum of 4K transfers or 64K transfers depending up on size of
   MAX_OUTPUT_COUNT register, Therefore, we are sending in several
   chunks. Upon completion we send the next chunk, or complete the
   transfer if everything is finished. On targets that support
   infinite mode, we send all the bytes in as single chunk.
*/
static int msm_spi_dm_send_next(struct msm_spi *dd)
{
	/* By now we should have sent all the bytes in FIFO mode,
	 * However to make things right, we'll check anyway.
	 */
	if (dd->mode != SPI_DMOV_MODE)
		return 0;

	/* On targets which does not support infinite mode,
	   We need to send more chunks, if we sent max last time  */
	if (dd->tx_bytes_remaining > dd->max_trfr_len) {
		dd->tx_bytes_remaining -= dd->max_trfr_len;
		if (msm_spi_set_state(dd, SPI_OP_STATE_RESET))
			return 0;
		dd->read_len = dd->write_len = 0;
		msm_spi_setup_dm_transfer(dd);
		msm_spi_enqueue_dm_commands(dd);
		if (msm_spi_set_state(dd, SPI_OP_STATE_RUN))
			return 0;
		return 1;
	} else if (dd->read_len && dd->write_len) {
		dd->tx_bytes_remaining -= dd->cur_transfer->len;
		if (list_is_last(&dd->cur_transfer->transfer_list,
					    &dd->cur_msg->transfers))
			return 0;
		get_next_transfer(dd);
		if (msm_spi_set_state(dd, SPI_OP_STATE_PAUSE))
			return 0;
		dd->tx_bytes_remaining = dd->read_len + dd->write_len;
		dd->read_buf = dd->temp_buf;
		dd->read_len = dd->write_len = -1;
		msm_spi_setup_dm_transfer(dd);
		msm_spi_enqueue_dm_commands(dd);
		if (msm_spi_set_state(dd, SPI_OP_STATE_RUN))
			return 0;
		return 1;
	}
	return 0;
}

static inline void msm_spi_ack_transfer(struct msm_spi *dd)
{
	writel_relaxed(SPI_OP_MAX_INPUT_DONE_FLAG |
		       SPI_OP_MAX_OUTPUT_DONE_FLAG,
		       dd->base + SPI_OPERATIONAL);
	/* Ensure done flag was cleared before proceeding further */
	mb();
}

/* Figure which irq occured and call the relevant functions */
static inline irqreturn_t msm_spi_qup_irq(int irq, void *dev_id)
{
	u32 op, ret = IRQ_NONE;
	struct msm_spi *dd = dev_id;

	if (pm_runtime_suspended(dd->dev)) {
		dev_warn(dd->dev, "QUP: pm runtime suspend, irq:%d\n", irq);
		return ret;
	}
	if (readl_relaxed(dd->base + SPI_ERROR_FLAGS) ||
	    readl_relaxed(dd->base + QUP_ERROR_FLAGS)) {
		struct spi_master *master = dev_get_drvdata(dd->dev);
		ret |= msm_spi_error_irq(irq, master);
	}

	op = readl_relaxed(dd->base + SPI_OPERATIONAL);
	if (op & SPI_OP_INPUT_SERVICE_FLAG) {
		writel_relaxed(SPI_OP_INPUT_SERVICE_FLAG,
			       dd->base + SPI_OPERATIONAL);
		/*
		 * Ensure service flag was cleared before further
		 * processing of interrupt.
		 */
		mb();
		ret |= msm_spi_input_irq(irq, dev_id);
	}

	if (op & SPI_OP_OUTPUT_SERVICE_FLAG) {
		writel_relaxed(SPI_OP_OUTPUT_SERVICE_FLAG,
			       dd->base + SPI_OPERATIONAL);
		/*
		 * Ensure service flag was cleared before further
		 * processing of interrupt.
		 */
		mb();
		ret |= msm_spi_output_irq(irq, dev_id);
	}

	if (dd->done) {
		complete(&dd->transfer_complete);
		dd->done = 0;
	}
	return ret;
}

static irqreturn_t msm_spi_input_irq(int irq, void *dev_id)
{
	struct msm_spi	       *dd = dev_id;

	dd->stat_rx++;

	if (dd->mode == SPI_MODE_NONE)
		return IRQ_HANDLED;

	if (dd->mode == SPI_DMOV_MODE) {
		u32 op = readl_relaxed(dd->base + SPI_OPERATIONAL);
		if ((!dd->read_buf || op & SPI_OP_MAX_INPUT_DONE_FLAG) &&
		    (!dd->write_buf || op & SPI_OP_MAX_OUTPUT_DONE_FLAG)) {
			msm_spi_ack_transfer(dd);
			if (dd->rx_unaligned_len == 0) {
				if (atomic_inc_return(&dd->rx_irq_called) == 1)
					return IRQ_HANDLED;
			}
			msm_spi_complete(dd);
			return IRQ_HANDLED;
		}
		return IRQ_NONE;
	}

	if (dd->mode == SPI_FIFO_MODE) {
		while ((readl_relaxed(dd->base + SPI_OPERATIONAL) &
			SPI_OP_IP_FIFO_NOT_EMPTY) &&
			(dd->rx_bytes_remaining > 0)) {
			msm_spi_read_word_from_fifo(dd);
		}
		if (dd->rx_bytes_remaining == 0)
			msm_spi_complete(dd);
	}

	return IRQ_HANDLED;
}

static void msm_spi_write_word_to_fifo(struct msm_spi *dd)
{
	u32    word;
	u8     byte;
	int    i;

	word = 0;
	if (dd->write_buf) {
		for (i = 0; (i < dd->bytes_per_word) &&
			     dd->tx_bytes_remaining; i++) {
			dd->tx_bytes_remaining--;
			byte = *dd->write_buf++;
			word |= (byte << (BITS_PER_BYTE * (3 - i)));
		}
	} else
		if (dd->tx_bytes_remaining > dd->bytes_per_word)
			dd->tx_bytes_remaining -= dd->bytes_per_word;
		else
			dd->tx_bytes_remaining = 0;
	dd->write_xfr_cnt++;
	if (dd->multi_xfr) {
		if (!dd->tx_bytes_remaining) {
			dd->cur_msg->actual_length += dd->write_xfr_cnt;
			dd->write_xfr_cnt = 0;
		} else if ((dd->write_xfr_cnt * dd->bytes_per_word) ==
						dd->write_len) {
			struct spi_transfer *t = dd->cur_tx_transfer;
			if (t->transfer_list.next != &dd->cur_msg->transfers) {
				t = list_entry(t->transfer_list.next,
						struct spi_transfer,
						transfer_list);
				dd->write_buf = t->tx_buf;
				dd->write_len = t->len;
				dd->cur_msg->actual_length += dd->write_xfr_cnt;
				dd->write_xfr_cnt = 0;
				dd->cur_tx_transfer = t;
			}
		}
	}
	writel_relaxed(word, dd->base + SPI_OUTPUT_FIFO);
}

static inline void msm_spi_write_rmn_to_fifo(struct msm_spi *dd)
{
	int count = 0;

	while ((dd->tx_bytes_remaining > 0) && (count < dd->input_fifo_size) &&
	       !(readl_relaxed(dd->base + SPI_OPERATIONAL) &
		SPI_OP_OUTPUT_FIFO_FULL)) {
		msm_spi_write_word_to_fifo(dd);
		count++;
	}
}

static irqreturn_t msm_spi_output_irq(int irq, void *dev_id)
{
	struct msm_spi	       *dd = dev_id;

	dd->stat_tx++;

	if (dd->mode == SPI_MODE_NONE)
		return IRQ_HANDLED;

	if (dd->mode == SPI_DMOV_MODE) {
		/* TX_ONLY transaction is handled here
		   This is the only place we send complete at tx and not rx */
		if (dd->read_buf == NULL &&
		    readl_relaxed(dd->base + SPI_OPERATIONAL) &
		    SPI_OP_MAX_OUTPUT_DONE_FLAG) {
			msm_spi_ack_transfer(dd);
			if (atomic_inc_return(&dd->tx_irq_called) == 1)
				return IRQ_HANDLED;
			msm_spi_complete(dd);
			return IRQ_HANDLED;
		}
		return IRQ_NONE;
	}

	/* Output FIFO is empty. Transmit any outstanding write data. */
	if (dd->mode == SPI_FIFO_MODE)
		msm_spi_write_rmn_to_fifo(dd);

	return IRQ_HANDLED;
}

static irqreturn_t msm_spi_error_irq(int irq, void *dev_id)
{
	struct spi_master	*master = dev_id;
	struct msm_spi          *dd = spi_master_get_devdata(master);
	u32                      spi_err;

	spi_err = readl_relaxed(dd->base + SPI_ERROR_FLAGS);
	if (spi_err & SPI_ERR_OUTPUT_OVER_RUN_ERR)
		dev_warn(master->dev.parent, "SPI output overrun error\n");
	if (spi_err & SPI_ERR_INPUT_UNDER_RUN_ERR)
		dev_warn(master->dev.parent, "SPI input underrun error\n");
	if (spi_err & SPI_ERR_OUTPUT_UNDER_RUN_ERR)
		dev_warn(master->dev.parent, "SPI output underrun error\n");
	msm_spi_get_clk_err(dd, &spi_err);
	if (spi_err & SPI_ERR_CLK_OVER_RUN_ERR)
		dev_warn(master->dev.parent, "SPI clock overrun error\n");
	if (spi_err & SPI_ERR_CLK_UNDER_RUN_ERR)
		dev_warn(master->dev.parent, "SPI clock underrun error\n");
	msm_spi_clear_error_flags(dd);
	msm_spi_ack_clk_err(dd);
	/* Ensure clearing of QUP_ERROR_FLAGS was completed */
	mb();
	return IRQ_HANDLED;
}

static int msm_spi_map_dma_buffers(struct msm_spi *dd)
{
	struct device *dev;
	struct spi_transfer *first_xfr;
	struct spi_transfer *nxt_xfr = NULL;
	void *tx_buf, *rx_buf;
	unsigned tx_len, rx_len;
	int ret = -EINVAL;

	dev = &dd->cur_msg->spi->dev;
	first_xfr = dd->cur_transfer;
	tx_buf = (void *)first_xfr->tx_buf;
	rx_buf = first_xfr->rx_buf;
	tx_len = rx_len = first_xfr->len;

	/*
	 * For WR-WR and WR-RD transfers, we allocate our own temporary
	 * buffer and copy the data to/from the client buffers.
	 */
	if (dd->multi_xfr) {
		dd->temp_buf = kzalloc(dd->cur_msg_len,
				       GFP_KERNEL | __GFP_DMA);
		if (!dd->temp_buf)
			return -ENOMEM;
		nxt_xfr = list_entry(first_xfr->transfer_list.next,
				     struct spi_transfer, transfer_list);

		if (dd->write_len && !dd->read_len) {
			if (!first_xfr->tx_buf || !nxt_xfr->tx_buf)
				goto error;

			memcpy(dd->temp_buf, first_xfr->tx_buf, first_xfr->len);
			memcpy(dd->temp_buf + first_xfr->len, nxt_xfr->tx_buf,
			       nxt_xfr->len);
			tx_buf = dd->temp_buf;
			tx_len = dd->cur_msg_len;
		} else {
			if (!first_xfr->tx_buf || !nxt_xfr->rx_buf)
				goto error;

			rx_buf = dd->temp_buf;
			rx_len = dd->cur_msg_len;
		}
	}
	if (tx_buf != NULL) {
		first_xfr->tx_dma = dma_map_single(dev, tx_buf,
						   tx_len, DMA_TO_DEVICE);
		if (dma_mapping_error(NULL, first_xfr->tx_dma)) {
			dev_err(dev, "dma %cX %d bytes error\n",
				'T', tx_len);
			ret = -ENOMEM;
			goto error;
		}
	}
	if (rx_buf != NULL) {
		dma_addr_t dma_handle;
		dma_handle = dma_map_single(dev, rx_buf,
					    rx_len, DMA_FROM_DEVICE);
		if (dma_mapping_error(NULL, dma_handle)) {
			dev_err(dev, "dma %cX %d bytes error\n",
				'R', rx_len);
			if (tx_buf != NULL)
				dma_unmap_single(NULL, first_xfr->tx_dma,
						 tx_len, DMA_TO_DEVICE);
			ret = -ENOMEM;
			goto error;
		}
		if (dd->multi_xfr)
			nxt_xfr->rx_dma = dma_handle;
		else
			first_xfr->rx_dma = dma_handle;
	}
	return 0;

error:
	kfree(dd->temp_buf);
	dd->temp_buf = NULL;
	return ret;
}

static void msm_spi_unmap_dma_buffers(struct msm_spi *dd)
{
	struct device *dev;
	u32 offset;

	dev = &dd->cur_msg->spi->dev;
	if (dd->cur_msg->is_dma_mapped)
		goto unmap_end;

	if (dd->multi_xfr) {
		if (dd->write_len && !dd->read_len) {
			dma_unmap_single(dev,
					 dd->cur_transfer->tx_dma,
					 dd->cur_msg_len,
					 DMA_TO_DEVICE);
		} else {
			struct spi_transfer *prev_xfr;
			prev_xfr = list_entry(
				   dd->cur_transfer->transfer_list.prev,
				   struct spi_transfer,
				   transfer_list);
			if (dd->cur_transfer->rx_buf) {
				dma_unmap_single(dev,
						 dd->cur_transfer->rx_dma,
						 dd->cur_msg_len,
						 DMA_FROM_DEVICE);
			}
			if (prev_xfr->tx_buf) {
				dma_unmap_single(dev,
						 prev_xfr->tx_dma,
						 prev_xfr->len,
						 DMA_TO_DEVICE);
			}
			if (dd->read_buf) {
				dma_coherent_post_ops();
				if (dd->rx_unaligned_len) {
					offset = dd->cur_msg_len
						- dd->rx_unaligned_len;
					memcpy(dd->read_buf + offset,
					       dd->rx_padding,
					       dd->rx_unaligned_len);
				}
				memcpy(dd->cur_transfer->rx_buf,
				       dd->read_buf + prev_xfr->len,
				       dd->cur_transfer->len);
			}
		}
		kfree(dd->temp_buf);
		dd->temp_buf = NULL;
		return;
	} else {
		if (dd->cur_transfer->rx_buf)
			dma_unmap_single(dev, dd->cur_transfer->rx_dma,
					 dd->cur_transfer->len,
					 DMA_FROM_DEVICE);
		if (dd->cur_transfer->tx_buf)
			dma_unmap_single(dev, dd->cur_transfer->tx_dma,
					 dd->cur_transfer->len,
					 DMA_TO_DEVICE);
	}

unmap_end:
	/* If we padded the transfer, we copy it from the padding buf */
	if (dd->rx_unaligned_len && dd->read_buf) {
		offset = dd->cur_transfer->len - dd->rx_unaligned_len;
		dma_coherent_post_ops();
		memcpy(dd->read_buf + offset, dd->rx_padding,
		       dd->rx_unaligned_len);
	}
}

/**
 * msm_use_dm - decides whether to use data mover for this
 * 		transfer
 * @dd:       device
 * @tr:       transfer
 *
 * Start using DM if:
 * 1. Transfer is longer than 3*block size.
 * 2. Buffers should be aligned to cache line.
 * 3. For WR-RD or WR-WR transfers, if condition (1) and (2) above are met.
  */
static inline int msm_use_dm(struct msm_spi *dd, struct spi_transfer *tr,
			     u8 bpw)
{
	u32 cache_line = dma_get_cache_alignment();

	if (!dd->use_dma)
		return 0;

	if (dd->cur_msg_len < 3*dd->input_block_size)
		return 0;

	if (dd->multi_xfr && !dd->read_len && !dd->write_len)
		return 0;

	if (tr->tx_buf) {
		if (!IS_ALIGNED((size_t)tr->tx_buf, cache_line))
			return 0;
	}
	if (tr->rx_buf) {
		if (!IS_ALIGNED((size_t)tr->rx_buf, cache_line))
			return 0;
	}

	if (tr->cs_change &&
	   ((bpw != 8) && (bpw != 16) && (bpw != 32)))
		return 0;
	return 1;
}

static void get_last_transfer(struct msm_spi *dd)
{
	struct spi_transfer *t = dd->cur_transfer;
	struct spi_transfer *nxt;
	struct spi_message *msg = dd->cur_msg;

	while (!list_is_last(&t->transfer_list, &msg->transfers)) {
		nxt = list_entry(t->transfer_list.next,
				 struct spi_transfer, transfer_list);
		t = nxt;
	}
	dd->cur_transfer = t;

	return;
}

static void msm_spi_process_transfer(struct msm_spi *dd)
{
	u8  bpw;
	u32 spi_ioc;
	u32 spi_iom;
	u32 spi_ioc_orig;
	u32 max_speed;
	u32 chip_select;
	u32 read_count;
	u32 timeout;
	u32 int_loopback = 0;

	dd->tx_bytes_remaining = dd->cur_msg_len;
	dd->rx_bytes_remaining = dd->cur_msg_len;
	dd->read_buf           = dd->cur_transfer->rx_buf;
	dd->write_buf          = dd->cur_transfer->tx_buf;
	init_completion(&dd->transfer_complete);
	if (dd->cur_transfer->bits_per_word)
		bpw = dd->cur_transfer->bits_per_word;
	else
		if (dd->cur_msg->spi->bits_per_word)
			bpw = dd->cur_msg->spi->bits_per_word;
		else
			bpw = 8;
	dd->bytes_per_word = (bpw + 7) / 8;

	if (dd->cur_transfer->speed_hz)
		max_speed = dd->cur_transfer->speed_hz;
	else
		max_speed = dd->cur_msg->spi->max_speed_hz;
	if (!dd->clock_speed || max_speed != dd->clock_speed)
		msm_spi_clock_set(dd, max_speed);

	read_count = DIV_ROUND_UP(dd->cur_msg_len, dd->bytes_per_word);
	if (dd->cur_msg->spi->mode & SPI_LOOP)
		int_loopback = 1;
	if (int_loopback && dd->multi_xfr &&
			(read_count > dd->input_fifo_size)) {
		if (dd->read_len && dd->write_len)
			pr_err(
			"%s:Internal Loopback does not support > fifo size"
			"for write-then-read transactions\n",
			__func__);
		else if (dd->write_len && !dd->read_len)
			pr_err(
			"%s:Internal Loopback does not support > fifo size"
			"for write-then-write transactions\n",
			__func__);
		return;
	}
	if (!msm_use_dm(dd, dd->cur_transfer, bpw)) {
		dd->mode = SPI_FIFO_MODE;
		if (dd->multi_xfr) {
			dd->read_len = dd->cur_transfer->len;
			dd->write_len = dd->cur_transfer->len;
		}
		/* read_count cannot exceed fifo_size, and only one READ COUNT
		   interrupt is generated per transaction, so for transactions
		   larger than fifo size READ COUNT must be disabled.
		   For those transactions we usually move to Data Mover mode.
		*/
		if (read_count <= dd->input_fifo_size) {
			writel_relaxed(read_count,
				       dd->base + SPI_MX_READ_COUNT);
			msm_spi_set_write_count(dd, read_count);
		} else {
			writel_relaxed(0, dd->base + SPI_MX_READ_COUNT);
			msm_spi_set_write_count(dd, 0);
		}
	} else {
		dd->mode = SPI_DMOV_MODE;
		if (dd->write_len && dd->read_len) {
			dd->tx_bytes_remaining = dd->write_len;
			dd->rx_bytes_remaining = dd->read_len;
		}
	}

	/* Write mode - fifo or data mover*/
	spi_iom = readl_relaxed(dd->base + SPI_IO_MODES);
	spi_iom &= ~(SPI_IO_M_INPUT_MODE | SPI_IO_M_OUTPUT_MODE);
	spi_iom = (spi_iom | (dd->mode << OUTPUT_MODE_SHIFT));
	spi_iom = (spi_iom | (dd->mode << INPUT_MODE_SHIFT));
	/* Turn on packing for data mover */
	if (dd->mode == SPI_DMOV_MODE)
		spi_iom |= SPI_IO_M_PACK_EN | SPI_IO_M_UNPACK_EN;
	else
		spi_iom &= ~(SPI_IO_M_PACK_EN | SPI_IO_M_UNPACK_EN);
	writel_relaxed(spi_iom, dd->base + SPI_IO_MODES);

	msm_spi_set_config(dd, bpw);

	spi_ioc = readl_relaxed(dd->base + SPI_IO_CONTROL);
	spi_ioc_orig = spi_ioc;
	if (dd->cur_msg->spi->mode & SPI_CPOL)
		spi_ioc |= SPI_IO_C_CLK_IDLE_HIGH;
	else
		spi_ioc &= ~SPI_IO_C_CLK_IDLE_HIGH;
	chip_select = dd->cur_msg->spi->chip_select << 2;
	if ((spi_ioc & SPI_IO_C_CS_SELECT) != chip_select)
		spi_ioc = (spi_ioc & ~SPI_IO_C_CS_SELECT) | chip_select;
	if (!dd->cur_transfer->cs_change)
		spi_ioc |= SPI_IO_C_MX_CS_MODE;
	if (spi_ioc != spi_ioc_orig)
		writel_relaxed(spi_ioc, dd->base + SPI_IO_CONTROL);

	if (dd->mode == SPI_DMOV_MODE) {
		msm_spi_setup_dm_transfer(dd);
		msm_spi_enqueue_dm_commands(dd);
	}
	/* The output fifo interrupt handler will handle all writes after
	   the first. Restricting this to one write avoids contention
	   issues and race conditions between this thread and the int handler
	*/
	else if (dd->mode == SPI_FIFO_MODE) {
		if (msm_spi_prepare_for_write(dd))
			goto transfer_end;
		msm_spi_start_write(dd, read_count);
	}

	/* Only enter the RUN state after the first word is written into
	   the output FIFO. Otherwise, the output FIFO EMPTY interrupt
	   might fire before the first word is written resulting in a
	   possible race condition.
	 */
	if (msm_spi_set_state(dd, SPI_OP_STATE_RUN))
		goto transfer_end;

	/* FIXME: Can be reduced once system level DDR access is corrected */
	timeout = 1000 * msecs_to_jiffies(
	      DIV_ROUND_UP(dd->cur_msg_len * 8,
		 DIV_ROUND_UP(max_speed, MSEC_PER_SEC)));

	/* Assume success, this might change later upon transaction result */
	dd->cur_msg->status = 0;
	do {
		if (!wait_for_completion_timeout(&dd->transfer_complete,
						 timeout)) {
			dev_err(dd->dev, "%s: SPI transaction "
					 "timeout\n", __func__);
			dd->cur_msg->status = -EIO;
			dd->cur_msg->actual_length = 0;
			if (dd->mode == SPI_DMOV_MODE) {
				msm_dmov_flush(dd->tx_dma_chan, 1);
				msm_dmov_flush(dd->rx_dma_chan, 1);
			}

			/* Before unmap dma buffer make sure that
			 * we are in the last transfer of SPI message
			 * so that prev_xfr points to a valid entry.
			 */
			get_last_transfer(dd);
			break;
		}
	} while (msm_spi_dm_send_next(dd));

	msm_spi_udelay(dd->cur_transfer->delay_usecs);
transfer_end:
	if (dd->mode == SPI_DMOV_MODE)
		msm_spi_unmap_dma_buffers(dd);
	dd->mode = SPI_MODE_NONE;

	msm_spi_set_state(dd, SPI_OP_STATE_RESET);
	writel_relaxed(spi_ioc & ~SPI_IO_C_MX_CS_MODE,
		       dd->base + SPI_IO_CONTROL);
}

static void get_transfer_length(struct msm_spi *dd)
{
	struct spi_transfer *tr;
	int num_xfrs = 0;
	int readlen = 0;
	int writelen = 0;

	dd->cur_msg_len = 0;
	dd->multi_xfr = 0;
	dd->read_len = dd->write_len = 0;

	list_for_each_entry(tr, &dd->cur_msg->transfers, transfer_list) {
		if (tr->tx_buf)
			writelen += tr->len;
		if (tr->rx_buf)
			readlen += tr->len;
		dd->cur_msg_len += tr->len;
		num_xfrs++;
	}

	if (num_xfrs == 2) {
		struct spi_transfer *first_xfr = dd->cur_transfer;

		dd->multi_xfr = 1;
		tr = list_entry(first_xfr->transfer_list.next,
				struct spi_transfer,
				transfer_list);
		/*
		 * We update dd->read_len and dd->write_len only
		 * for WR-WR and WR-RD transfers.
		 */
		if ((first_xfr->tx_buf) && (!first_xfr->rx_buf)) {
			if (((tr->tx_buf) && (!tr->rx_buf)) ||
			    ((!tr->tx_buf) && (tr->rx_buf))) {
				dd->read_len = readlen;
				dd->write_len = writelen;
			}
		}
	} else if (num_xfrs > 1)
		dd->multi_xfr = 1;
}

static inline int combine_transfers(struct msm_spi *dd)
{
	struct spi_transfer *t = dd->cur_transfer;
	struct spi_transfer *nxt;
	int xfrs_grped = 1;

	dd->cur_msg_len = dd->cur_transfer->len;
	while (t->transfer_list.next != &dd->cur_msg->transfers) {
		nxt = list_entry(t->transfer_list.next,
				 struct spi_transfer,
				 transfer_list);
		if (t->cs_change != nxt->cs_change)
			return xfrs_grped;
		dd->cur_msg_len += nxt->len;
		xfrs_grped++;
		t = nxt;
	}
	return xfrs_grped;
}

static inline void write_force_cs(struct msm_spi *dd, bool set_flag)
{
	u32 spi_ioc;
	u32 spi_ioc_orig;

	spi_ioc = readl_relaxed(dd->base + SPI_IO_CONTROL);
	spi_ioc_orig = spi_ioc;
	if (set_flag)
		spi_ioc |= SPI_IO_C_FORCE_CS;
	else
		spi_ioc &= ~SPI_IO_C_FORCE_CS;

	if (spi_ioc != spi_ioc_orig)
		writel_relaxed(spi_ioc, dd->base + SPI_IO_CONTROL);
}

static void msm_spi_process_message(struct msm_spi *dd)
{
	int xfrs_grped = 0;
	int cs_num;
	int rc;
	bool xfer_delay = false;
	struct spi_transfer *tr;

	dd->write_xfr_cnt = dd->read_xfr_cnt = 0;
	cs_num = dd->cur_msg->spi->chip_select;
	if ((!(dd->cur_msg->spi->mode & SPI_LOOP)) &&
		(!(dd->cs_gpios[cs_num].valid)) &&
		(dd->cs_gpios[cs_num].gpio_num >= 0)) {
		rc = gpio_request(dd->cs_gpios[cs_num].gpio_num,
				spi_cs_rsrcs[cs_num]);
		if (rc) {
			dev_err(dd->dev, "gpio_request for pin %d failed with "
				"error %d\n", dd->cs_gpios[cs_num].gpio_num,
				rc);
			return;
		}
		dd->cs_gpios[cs_num].valid = 1;
	}

	list_for_each_entry(tr,
				&dd->cur_msg->transfers,
				transfer_list) {
		if (tr->delay_usecs) {
			dev_info(dd->dev, "SPI slave requests delay per txn :%d",
					tr->delay_usecs);
			xfer_delay = true;
			break;
		}
	}

	/* Don't combine xfers if delay is needed after every xfer */
	if (dd->qup_ver || xfer_delay) {
		if (dd->qup_ver)
			write_force_cs(dd, 0);
		list_for_each_entry(dd->cur_transfer,
				&dd->cur_msg->transfers,
				transfer_list) {
			struct spi_transfer *t = dd->cur_transfer;
			struct spi_transfer *nxt;

			if (t->transfer_list.next != &dd->cur_msg->transfers) {
				nxt = list_entry(t->transfer_list.next,
						struct spi_transfer,
						transfer_list);

				if (dd->qup_ver &&
					t->cs_change == nxt->cs_change)
					write_force_cs(dd, 1);
				else if (dd->qup_ver)
					write_force_cs(dd, 0);
			}

			dd->cur_msg_len = dd->cur_transfer->len;
			msm_spi_process_transfer(dd);
		}
	} else {
		dd->cur_transfer = list_first_entry(&dd->cur_msg->transfers,
						    struct spi_transfer,
						    transfer_list);
		get_transfer_length(dd);
		if (dd->multi_xfr && !dd->read_len && !dd->write_len) {
			/*
			 * Handling of multi-transfers.
			 * FIFO mode is used by default
			 */
			list_for_each_entry(dd->cur_transfer,
					    &dd->cur_msg->transfers,
					    transfer_list) {
				if (!dd->cur_transfer->len)
					goto error;
				if (xfrs_grped) {
					xfrs_grped--;
					continue;
				} else {
					dd->read_len = dd->write_len = 0;
					xfrs_grped = combine_transfers(dd);
				}

				dd->cur_tx_transfer = dd->cur_transfer;
				dd->cur_rx_transfer = dd->cur_transfer;
				msm_spi_process_transfer(dd);
				xfrs_grped--;
			}
		} else {
			/* Handling of a single transfer or
			 * WR-WR or WR-RD transfers
			 */
			if ((!dd->cur_msg->is_dma_mapped) &&
			    (msm_use_dm(dd, dd->cur_transfer,
					dd->cur_transfer->bits_per_word))) {
				/* Mapping of DMA buffers */
				int ret = msm_spi_map_dma_buffers(dd);
				if (ret < 0) {
					dd->cur_msg->status = ret;
					goto error;
				}
			}

			dd->cur_tx_transfer = dd->cur_transfer;
			dd->cur_rx_transfer = dd->cur_transfer;
			msm_spi_process_transfer(dd);
		}
	}

	return;

error:
	if (dd->cs_gpios[cs_num].valid) {
		gpio_free(dd->cs_gpios[cs_num].gpio_num);
		dd->cs_gpios[cs_num].valid = 0;
	}
}


/* pull messages from queue & process */
static void msm_spi_message_handler(struct msm_spi *dd)
{
	unsigned long        flags;
	u32                  status_error = 0;

	pm_runtime_get_sync(dd->dev);

	mutex_lock(&dd->core_lock);

	/*
	 * Counter-part of system-suspend when runtime-pm is not enabled.
	 * This way, resume can be left empty and device will be put in
	 * active mode only if client requests anything on the bus
	 */
	if (!pm_runtime_enabled(dd->dev))
		msm_spi_pm_resume_runtime(dd->dev);

	if (dd->use_rlock)
		remote_mutex_lock(&dd->r_lock);

	if (!msm_spi_is_valid_state(dd)) {
		dev_err(dd->dev, "%s: SPI operational state not valid\n",
			__func__);
		status_error = 1;
	}

	spin_lock_irqsave(&dd->queue_lock, flags);
	dd->transfer_pending = 1;
	while (!list_empty(&dd->queue)) {
		dd->cur_msg = list_entry(dd->queue.next,
					 struct spi_message, queue);
		list_del_init(&dd->cur_msg->queue);
		spin_unlock_irqrestore(&dd->queue_lock, flags);
		dd->cur_msg->actual_length = 0;
		if (status_error)
			dd->cur_msg->status = -EIO;
		else
			msm_spi_process_message(dd);
		if (dd->cur_msg->complete)
			dd->cur_msg->complete(dd->cur_msg->context);
		spin_lock_irqsave(&dd->queue_lock, flags);
	}
	dd->transfer_pending = 0;
	spin_unlock_irqrestore(&dd->queue_lock, flags);

	if (dd->use_rlock)
		remote_mutex_unlock(&dd->r_lock);

	mutex_unlock(&dd->core_lock);

	pm_runtime_mark_last_busy(dd->dev);
	pm_runtime_put_autosuspend(dd->dev);

	/* If needed, this can be done after the current message is complete,
	   and work can be continued upon resume. No motivation for now. */
	if (dd->suspended)
		wake_up_interruptible(&dd->continue_suspend);
}

static void msm_spi_workq(struct work_struct *work)
{
	struct msm_spi *dd =
		container_of(work, struct msm_spi, work_data);
	msm_spi_message_handler(dd);
}

static void msm_spi_kthread(struct kthread_work *work)
{
	struct msm_spi *dd =
		container_of(work, struct msm_spi, spi_kthread_work);
	msm_spi_message_handler(dd);
}

static inline void msm_spi_queue_thread(struct msm_spi *dd)
{
	if (dd->pdata->thread_mode == MSM_SPI_THREAD_RT)
		queue_kthread_work(&dd->spi_kthread_worker, &dd->spi_kthread_work);
	else
		queue_work(dd->workqueue, &dd->work_data);
}

static int msm_spi_transfer(struct spi_device *spi, struct spi_message *msg)
{
	struct msm_spi	*dd;
	unsigned long    flags;
	struct spi_transfer *tr;

	dd = spi_master_get_devdata(spi->master);

	if (list_empty(&msg->transfers) || !msg->complete)
		return -EINVAL;

	list_for_each_entry(tr, &msg->transfers, transfer_list) {
		/* Check message parameters */
		if (tr->speed_hz > dd->pdata->max_clock_speed ||
		    (tr->bits_per_word &&
		     (tr->bits_per_word < 4 || tr->bits_per_word > 32)) ||
		    (tr->tx_buf == NULL && tr->rx_buf == NULL)) {
			dev_err(&spi->dev, "Invalid transfer: %d Hz, %d bpw"
					   "tx=%p, rx=%p\n",
					    tr->speed_hz, tr->bits_per_word,
					    tr->tx_buf, tr->rx_buf);
			return -EINVAL;
		}
	}

	spin_lock_irqsave(&dd->queue_lock, flags);
	list_add_tail(&msg->queue, &dd->queue);
	spin_unlock_irqrestore(&dd->queue_lock, flags);
	msm_spi_queue_thread(dd);
	return 0;
}

#if defined(USE_NECPF_KERNEL_CRASHLOG)
/*
 * Rotary project, m25p80_panic_write() 用実装。(BUG#35205)
 *   panic() からコールされる為、アトミックなコードでなければならない。
 *   u-boot の flash driver から移植。
 *   本来は、flash/spi のレイヤを分けて実装すべきだが割愛。
 */

/*
 * GPIO
 */

/* from gpio.h */

/* GPIO TLMM: Direction */
#define GPIO_INPUT      0
#define GPIO_OUTPUT     1

/* GPIO TLMM: Pullup/Pulldown */
#define GPIO_NO_PULL    0
#define GPIO_PULL_DOWN  1
#define GPIO_KEEPER     2
#define GPIO_PULL_UP    3

/* from iomap.h */

#define GPIO_CONFIG_ADDR(x) (MSM_TLMM_BASE + 0x1000 + (x)*0x10)
#define GPIO_IN_OUT_ADDR(x) (MSM_TLMM_BASE + 0x1004 + (x)*0x10)

/*
 * SPI
 */

/* from spi.h */

#define SPI_XFER_BEGIN 0x01	/* Assert CS before transfer */
#define SPI_XFER_END   0x02	/* Deassert CS after transfer */

/* from ipq_spi.h */

#define QUP_STATE_VALID_BIT 2
#define QUP_STATE_VALID 1
#define QUP_STATE_MASK 0x3
#define QUP_CONFIG_MINI_CORE_MSK                (0x0F << 8)
#define QUP_CONFIG_MINI_CORE_SPI                (1 << 8)
#define SPI_QUP_CONF_INPUT_MSK                  (1 << 7)
#define SPI_QUP_CONF_INPUT_ENA                  (0 << 7)
#define SPI_QUP_CONF_OUTPUT_MSK                 (1 << 6)
#define SPI_QUP_CONF_OUTPUT_ENA                 (0 << 6)
#define QUP_STATE_RUN_STATE                     0x1
#define QUP_STATE_RESET_STATE                   0x0
#define SPI_BIT_WORD_MSK                        0x1F
#define SPI_8_BIT_WORD                          0x07
#define LOOP_BACK_MSK                           (1 << 8)
#define NO_LOOP_BACK                            (0 << 8)
#define SLAVE_OPERATION_MSK                     (1 << 5)
#define SLAVE_OPERATION                         (0 << 5)
#define CLK_ALWAYS_ON                           (0 << 9)
#define MX_CS_MODE                              (0 << 8)
#define NO_TRI_STATE                            (1 << 0)
#define OUTPUT_BIT_SHIFT_MSK                    (1 << 16)
#define OUTPUT_BIT_SHIFT_EN                     (1 << 16)
#define INPUT_BLOCK_MODE_MSK                    (0x03 << 12)
#define INPUT_BLOCK_MODE                        (0x01 << 12)
#define OUTPUT_BLOCK_MODE_MSK                   (0x03 << 10)
#define OUTPUT_BLOCK_MODE                       (0x01 << 10)

#define SPI_INPUT_FIRST_MODE                    (1 << 9)
#define SPI_IO_CONTROL_CLOCK_IDLE_HIGH          (1 << 10)
#define SPI_INPUT_BLOCK_SIZE 			4
#define SPI_OUTPUT_BLOCK_SIZE			4
#define GSBI5_SPI_CLK                           21
#define GSBI5_SPI_MISO                          19
#define GSBI5_SPI_MOSI                          18
#define GSBI5_SPI_CS_0                          20
#define GSBI5_SPI_CS_1                          61
#define GSBI5_SPI_CS_2                          62
#define GSBI5_SPI_CS_3                          2

#define GPIO_FUNC_ENABLE                        1
#define GPIO_FUNC_DISABLE                       0
#define FUNC_SEL_1                              1
#define FUNC_SEL_3                              3
#define FUNC_SEL_GPIO                           0
#define GPIO_DRV_STR_10MA                       0x4
#define GPIO_DRV_STR_11MA                       0x7
#define GPIO_OUT                                1

#define SPI_RESET_STATE 0
#define SPI_RUN_STATE   1
#define GSBI_SPI_MODE_0                         0
#define GSBI_SPI_MODE_1                         1
#define GSBI_SPI_MODE_2                         2
#define GSBI_SPI_MODE_3                         3

/* from ipq_spi.c */

#define DUMMY_DATA_VAL          0
#define TIMEOUT_CNT             100
#define NUM_PORTS               3
#define NUM_GSBI_PINS           3
#define TLMM_ARGS               6
#define GSBI_PIN_IDX            0
#define FUNC_SEL_IDX            1
#define GPIO_DIR_IDX            2
#define PULL_CONF_IDX           3
#define DRV_STR_IDX             4
#define GPIO_EN_IDX             5

/*
 * SPI flash
 */

/* from spi_flash_internal.h */

#define SPI_FLASH_PROG_TIMEOUT 2 /* 2秒 */
#define FAST_READ_DUMMY_BYTE 1

/* Spansion new commands for 4-byte address access */
#define CMD_4READ_ARRAY_FAST 0x0c
#define CMD_4PAGE_PROGRAM 0x12

#define CMD_READ_STATUS 0x05
#define CMD_WRITE_ENABLE 0x06

/* Common status */
#define STATUS_WIP 0x01

/*
 * GPIO
 */
static void
spi_gpio_tlmm_config(unsigned int gpio, unsigned int func,
		     unsigned int out, unsigned int pull,
		     unsigned int drvstr, unsigned int oe)
{
	unsigned int val = 0;
	const volatile void __iomem *addr = GPIO_CONFIG_ADDR(gpio);

	val |= pull;
	val |= func << 2;
	val |= drvstr << 6;
	val |= oe << 9;
	writel_relaxed(val, addr);
}

/* Function to assert and De-assert chip select */
static void
CS_change(int enable)
{
	unsigned int cs_gpio = GSBI5_SPI_CS_0;
	const volatile void __iomem *addr = GPIO_IN_OUT_ADDR(cs_gpio);
	uint32_t val = readl_relaxed(addr);

	val &= (~(1 << GPIO_OUT));
	if (!enable)
		val |= (1 << GPIO_OUT);
	writel_relaxed(val, addr);
}

static unsigned int gsbi_pin_conf[NUM_PORTS][NUM_GSBI_PINS][TLMM_ARGS] = {
	{
		/* GSBI5 CLK */
		{GSBI5_SPI_CLK,  FUNC_SEL_1, GPIO_INPUT,
		 GPIO_PULL_DOWN, GPIO_DRV_STR_11MA, GPIO_FUNC_DISABLE},
		/* GSBI5 MISO */
		{GSBI5_SPI_MISO, FUNC_SEL_1, GPIO_INPUT,
		 GPIO_PULL_DOWN, GPIO_DRV_STR_10MA, GPIO_FUNC_DISABLE},
		/* GSBI5 MOSI */
		{GSBI5_SPI_MOSI, FUNC_SEL_1, GPIO_INPUT,
		 GPIO_PULL_DOWN, GPIO_DRV_STR_10MA, GPIO_FUNC_DISABLE}
	}
};

/* GSBIn TLMM configuration */
static void
gsbi_pin_config(unsigned int port_num, int cs_num)
{
	unsigned int gpio;
	unsigned int i;

	/*
	 * Configure SPI_CLK, SPI_MISO and SPI_MOSI
	 */
	for (i = 0; i < NUM_GSBI_PINS; i++) {
		unsigned int func_sel;
		unsigned int io_config;
		unsigned int pull_config;
		unsigned int drv_strength;
		unsigned int gpio_en;
		unsigned int *ptr;

		ptr = gsbi_pin_conf[port_num][i];
		gpio		= *(ptr + GSBI_PIN_IDX);
		func_sel	= *(ptr + FUNC_SEL_IDX);
		io_config	= *(ptr + GPIO_DIR_IDX);
		pull_config	= *(ptr + PULL_CONF_IDX);
		drv_strength	= *(ptr + DRV_STR_IDX);
		gpio_en	= *(ptr + GPIO_EN_IDX);

		spi_gpio_tlmm_config(gpio, func_sel, io_config,
				     pull_config, drv_strength, gpio_en);
	}

	gpio = GSBI5_SPI_CS_0;
	/* configure CS */
	spi_gpio_tlmm_config(gpio, FUNC_SEL_GPIO, GPIO_OUTPUT, GPIO_PULL_UP,
			     GPIO_DRV_STR_10MA, GPIO_FUNC_ENABLE);
	CS_change(0);
}

/*
 * SPI
 */

static void
msk_writel(const volatile void __iomem *addr, u32 msk, u32 val)
{
	u32 dat;

	dat = readl_relaxed(addr);
	dat &= ~msk;
	dat |= val & msk;
	writel_relaxed(dat, addr);
}

static int
check_bit_state(
	const volatile void __iomem *reg_addr,
	int bit_num, int val, int us_delay)
{
	unsigned int count = TIMEOUT_CNT;
	unsigned int bit_val = ((readl_relaxed(reg_addr) >> bit_num) & 0x01);

	while (bit_val != val) {
		count--;
		if (count == 0)
			return -ETIMEDOUT;
		udelay(us_delay);
		bit_val = ((readl_relaxed(reg_addr) >> bit_num) & 0x01);
	}

	return 0;
}

/* Check whether GSBIn_QUP State is valid */
static int
check_qup_state_valid(const struct msm_spi *dd)
{

	return check_bit_state(
		dd->base + SPI_STATE, QUP_STATE_VALID_BIT, QUP_STATE_VALID, 1);
}

/*
 * Configure GSBIn Core state
 */
static int
config_spi_state(const struct msm_spi *dd, unsigned int state)
{
	uint32_t val;
	int ret = 0;

	ret = check_qup_state_valid(dd);
	if (ret != 0)
		return ret;

	switch (state) {
	case SPI_RUN_STATE:
		/* Set the state to RUN */
		val = ((readl_relaxed(dd->base + SPI_STATE) & ~QUP_STATE_MASK)
		       | QUP_STATE_RUN_STATE);
		writel_relaxed(val, dd->base + SPI_STATE);
		ret = check_qup_state_valid(dd);
		if (ret != 0)
			return ret;
		break;
	case SPI_RESET_STATE:
		/* Set the state to RESET */
		val = ((readl_relaxed(dd->base + SPI_STATE) & ~QUP_STATE_MASK)
		       | QUP_STATE_RESET_STATE);
		writel_relaxed(val, dd->base + SPI_STATE);
		ret = check_qup_state_valid(dd);
		if (ret != 0)
			return ret;
		break;
	default:
		pr_err("err: unsupported GSBI SPI state : %d\n", state);
		ret = -EINVAL;
		break;
	}

	return ret;
}

/* Reset entire QUP and all mini cores */
static void
spi_reset(const struct msm_spi *dd)
{
	writel_relaxed(0x1, dd->base + SPI_SW_RESET);
	udelay(5);
}

/* Set GSBIn SPI Mode */
static void
spi_set_mode(const struct msm_spi *dd, unsigned int mode)
{
	unsigned int clk_idle_state;
	unsigned int input_first_mode;
	uint32_t val;

	switch (mode) {
	case GSBI_SPI_MODE_0:
		clk_idle_state = 0;
		input_first_mode = SPI_INPUT_FIRST_MODE;
		break;
	case GSBI_SPI_MODE_1:
		clk_idle_state = 0;
		input_first_mode = 0;
		break;
	case GSBI_SPI_MODE_2:
		clk_idle_state = 1;
		input_first_mode = SPI_INPUT_FIRST_MODE;
		break;
	case GSBI_SPI_MODE_3:
		clk_idle_state = 1;
		input_first_mode = 0;
		break;
	default:
		pr_err("err : unsupported spi mode : %d\n", mode);
		return;
	}

	val = readl_relaxed(dd->base + SPI_CONFIG);
	val |= input_first_mode;
	writel_relaxed(val, dd->base + SPI_CONFIG);

	val = readl_relaxed(dd->base + SPI_IO_CONTROL);
	if (clk_idle_state)
		val |= SPI_IO_CONTROL_CLOCK_IDLE_HIGH;
	else
		val &= ~SPI_IO_CONTROL_CLOCK_IDLE_HIGH;
	writel_relaxed(val, dd->base + SPI_IO_CONTROL);
}

/* GSBIn SPI Hardware Initialisation */
static int
spi_hw_init(const struct msm_spi *dd)
{
	int ret;

	/* GSBI module configuration */
	spi_reset(dd);

	/* Set the GSBIn QUP state */
	ret = config_spi_state(dd, SPI_RESET_STATE);
	if (ret)
		return ret;

#if 0	/* msm_spi_probe() の msm_spi_configure_gsbi() で設定済み。 */
	/* Configure GSBI_CTRL register to set protocol_mode to SPI:011 */
	clrsetbits_le32(ds->regs->gsbi_ctrl,
			PROTOCOL_CODE_MSK, PROTOCOL_CODE_SPI);
#endif

	/*
	 * Configure Mini core to SPI core with Input Output enabled,
	 * SPI master, N = 8 bits
	 */
	msk_writel(dd->base + QUP_CONFIG,
		   (QUP_CONFIG_MINI_CORE_MSK | SPI_QUP_CONF_INPUT_MSK |
		    SPI_QUP_CONF_OUTPUT_MSK | SPI_BIT_WORD_MSK),
		   (QUP_CONFIG_MINI_CORE_SPI | SPI_QUP_CONF_INPUT_ENA |
		    SPI_QUP_CONF_OUTPUT_ENA | SPI_8_BIT_WORD));

	/*
	 * Configure Input first SPI protocol,
	 * SPI master mode and no loopback
	 */
	msk_writel(dd->base + SPI_CONFIG,
		   (LOOP_BACK_MSK | SLAVE_OPERATION_MSK),
		   (NO_LOOP_BACK | SLAVE_OPERATION));

	/*
	 * Configure SPI IO Control Register
	 * CLK_ALWAYS_ON = 0
	 * MX_CS_MODE = 0
	 * NO_TRI_STATE = 1
	 */
	writel_relaxed((CLK_ALWAYS_ON | MX_CS_MODE | NO_TRI_STATE),
	       dd->base + SPI_IO_CONTROL);

	/*
	 * Configure SPI IO Modes.
	 * OUTPUT_BIT_SHIFT_EN = 1
	 * INPUT_MODE = Block Mode
	 * OUTPUT MODE = Block Mode
	 */
	msk_writel(dd->base + SPI_IO_MODES,
		   (OUTPUT_BIT_SHIFT_MSK | INPUT_BLOCK_MODE_MSK |
		    OUTPUT_BLOCK_MODE_MSK),
		   (OUTPUT_BIT_SHIFT_EN | INPUT_BLOCK_MODE |
		    OUTPUT_BLOCK_MODE));

	spi_set_mode(dd, GSBI_SPI_MODE_0);

	/* Disable Error mask */
	writel_relaxed(0, dd->base + QUP_ERROR_FLAGS_EN);
	writel_relaxed(0, dd->base + SPI_ERROR_FLAGS_EN);

	return 0;
}

static int
spi_claim_bus(const struct msm_spi *dd)
{
	unsigned int ret;

	/* GPIO Configuration for SPI port */
	gsbi_pin_config(0, 0);

	msm_spi_pm_resume_runtime(dd->dev);

	ret = spi_hw_init(dd);
	if (ret)
		return -EIO;

	return 0;
}

/*
 * Function to check wheather Input or Output FIFO
 * has data to be serviced
 */
static int
check_fifo_status(const volatile void __iomem *reg_addr)
{
	unsigned int count = TIMEOUT_CNT;
	unsigned int status_flag;
	unsigned int val;

	do {
		val = readl_relaxed(reg_addr);
		count--;
		if (count == 0)
			return -ETIMEDOUT;
		status_flag = ((val & SPI_OP_OUTPUT_SERVICE_FLAG) |
			       (val & SPI_OP_INPUT_SERVICE_FLAG));
	} while (!status_flag);

	return 0;
}

/*
 * Function to write data to OUTPUT FIFO
 */
static void
spi_write_byte(const struct msm_spi *dd, unsigned char data)
{

	/* Wait for space in the FIFO */
	while ((readl_relaxed(dd->base + SPI_OPERATIONAL)) &
	       SPI_OP_OUTPUT_FIFO_FULL) {
		udelay(1);
	}

	/* Write the byte of data */
	writel_relaxed(data, dd->base + SPI_OUTPUT_FIFO);
}

/*
 * Function to read data from Input FIFO
 */
static unsigned char
spi_read_byte(const struct msm_spi *dd)
{

	/* Wait for Data in FIFO */
	while (!(readl_relaxed(dd->base + SPI_OPERATIONAL) &
		 SPI_OP_IP_FIFO_NOT_EMPTY)) {
		udelay(1);
	}

	/* Read a byte of data */
	return readl_relaxed(dd->base + SPI_INPUT_FIFO) & 0xff;
}

/* Drain input fifo
 * If input fifo is not empty drain the input FIFO. When the
 * input fifo is drained make sure that the output fifo is also
 * empty and break when the input fifo is completely drained.
 */
static void
flush_fifos(const struct msm_spi *dd)
{
	unsigned int fifo_data;

	while (1) {
		if (readl_relaxed(dd->base + SPI_OPERATIONAL) &
		    SPI_OP_IP_FIFO_NOT_EMPTY) {
			fifo_data = readl_relaxed(dd->base + SPI_INPUT_FIFO);
		} else {
			if (!(readl_relaxed(dd->base + SPI_OPERATIONAL) &
			      SPI_OP_OP_FIFO_NOT_EMPTY)) {
				if (!(readl_relaxed(
					      dd->base + SPI_OPERATIONAL) &
				      SPI_OP_IP_FIFO_NOT_EMPTY))
					break;
			}
		}
	}

	(void)fifo_data;
}

/*
 * Function to read bytes number of data from the Input FIFO
 */
static int
gsbi_spi_read(const struct msm_spi *dd, u8 *data_buffer,
	      unsigned int bytes, unsigned long flags)
{
	uint32_t val;
	unsigned int i;
	unsigned int read_bytes = bytes;
	unsigned int fifo_count;
	int ret = 0;
	int state_config;

	if (flags & SPI_XFER_BEGIN) {
		CS_change(1);	/* Assert chip select */
	}

	/* Configure no of bytes to read */
	state_config = config_spi_state(dd, SPI_RESET_STATE);
	if (state_config) {
		pr_err("Err: %s:%d state_config:%d\n",
		       __func__, __LINE__, state_config);
		return state_config;
	}

	writel_relaxed(bytes, dd->base + SPI_MX_OUTPUT_COUNT);
	writel_relaxed(bytes, dd->base + SPI_MX_INPUT_COUNT);

	state_config = config_spi_state(dd, SPI_RUN_STATE);
	if (state_config) {
		pr_err("Err: %s:%d state_config:%d\n",
		       __func__, __LINE__, state_config);
		return state_config;
	}

	while (read_bytes) {

		ret = check_fifo_status(dd->base + SPI_OPERATIONAL);
		if (ret != 0) {
			pr_err("Err: %s:%d ret:%d\n", __func__, __LINE__, ret);
			goto out;
		}

		val = readl_relaxed(dd->base + SPI_OPERATIONAL);
		if (val & SPI_OP_INPUT_SERVICE_FLAG) {
			/*
			 * acknowledge to hw that software will
			 * read input data
			 */
			val &= SPI_OP_INPUT_SERVICE_FLAG;
			writel_relaxed(val, dd->base + SPI_OPERATIONAL);

			fifo_count = ((read_bytes > SPI_INPUT_BLOCK_SIZE) ?
				      SPI_INPUT_BLOCK_SIZE : read_bytes);

			for (i = 0; i < fifo_count; i++) {
				*data_buffer = spi_read_byte(dd);
				data_buffer++;
				read_bytes--;
			}
		}

		if (val & SPI_OP_OUTPUT_SERVICE_FLAG) {
			/*
			 * acknowledge to hw that software will
			 * write output data
			 */
			val &= SPI_OP_OUTPUT_SERVICE_FLAG;
			writel_relaxed(val, dd->base + SPI_OPERATIONAL);

			fifo_count = ((read_bytes > SPI_OUTPUT_BLOCK_SIZE) ?
				      SPI_OUTPUT_BLOCK_SIZE : read_bytes);

			for (i = 0; i < fifo_count; i++) {
				/*
				 * Write dummy data byte for the device
				 * to shift in actual data. Most of the SPI devices
				 * accepts dummy data value as 0. In case of any
				 * other value change DUMMY_DATA_VAL.
				 */
				spi_write_byte(dd, DUMMY_DATA_VAL);
			}
		}
	}

	if (flags & SPI_XFER_END) {
		flush_fifos(dd);
		goto out;
	}

	return ret;

out:
	CS_change(0);		/* Deassert CS */

	/*
	 * Put the SPI Core back in the Reset State
	 * to end the transfer
	 */
	(void)config_spi_state(dd, SPI_RESET_STATE);

	return ret;

}

/*
 * Function to write data to the Output FIFO
 */
static int
gsbi_spi_write(
	const struct msm_spi *dd, const u8 *cmd_buffer,
	unsigned int bytes, unsigned long flags)
{
	uint32_t val;
	unsigned int i;
	unsigned int write_len = bytes;
	unsigned int read_len = bytes;
	unsigned int fifo_count;
	int ret = 0;
	int state_config;

	if (flags & SPI_XFER_BEGIN) {
		CS_change(1);	/* Select the chip select */
	}

	state_config = config_spi_state(dd, SPI_RESET_STATE);
	if (state_config) {
		pr_err("Err: %s:%d state_config:%d\n",
		       __func__, __LINE__, state_config);
		return state_config;
	}

	/* No of bytes to be written in Output FIFO */
	writel_relaxed(bytes, dd->base + SPI_MX_OUTPUT_COUNT);
	writel_relaxed(bytes, dd->base + SPI_MX_INPUT_COUNT);
	state_config = config_spi_state(dd, SPI_RUN_STATE);
	if (state_config) {
		pr_err("Err: %s:%d state_config:%d\n",
		       __func__, __LINE__, state_config);
		return state_config;
	}

	/*
	 * read_len considered to ensure that we read the dummy data for the
	 * write we performed. This is needed to ensure with WR-RD transaction
	 * to get the actual data on the subsequent read cycle that happens
	 */
	while (write_len || read_len) {
		ret = check_fifo_status(dd->base + SPI_OPERATIONAL);
		if (ret != 0) {
			pr_err("Err: %s:%d ret:%d\n", __func__, __LINE__, ret);
			goto out;
		}

		val = readl_relaxed(dd->base + SPI_OPERATIONAL);
		if (val & SPI_OP_OUTPUT_SERVICE_FLAG) {
			/*
			 * acknowledge to hw that software will write
			 * expected output data
			 */
			val &= SPI_OP_OUTPUT_SERVICE_FLAG;
			writel_relaxed(val, dd->base + SPI_OPERATIONAL);

			if (write_len > SPI_OUTPUT_BLOCK_SIZE)
                                fifo_count = SPI_OUTPUT_BLOCK_SIZE;
                        else
                                fifo_count = write_len;

			for (i = 0; i < fifo_count; i++) {
				/* Write actual data to output FIFO */
				spi_write_byte(dd, *cmd_buffer);
				cmd_buffer++;
				write_len--;
			}
		}
		if (val & SPI_OP_INPUT_SERVICE_FLAG) {
			/*
			 * acknowledge to hw that software
			 * will read input data
			 */
			val &= SPI_OP_INPUT_SERVICE_FLAG;
			writel_relaxed(val, dd->base + SPI_OPERATIONAL);

			if (read_len > SPI_INPUT_BLOCK_SIZE)
				fifo_count = SPI_INPUT_BLOCK_SIZE;
			else
				fifo_count = read_len;

			for (i = 0; i < fifo_count; i++) {
				/* Read dummy data for the data written */
				(void)spi_read_byte(dd);

				/* Decrement the write count after reading the
				 * dummy data from the device.
				 * This is to make sure we read dummy data
				 * before we write the data to fifo
				 */
				read_len--;
			}
		}
	}

	if (flags & SPI_XFER_END) {
		flush_fifos(dd);
		goto out;
	}

	return ret;

out:
	CS_change(0);		/* Deassert CS */

	/*
	 * Put the SPI Core back in the Reset State
	 * to end the transfer
	 */
	(void)config_spi_state(dd, SPI_RESET_STATE);

	return ret;
}

/*
 * This function is invoked with either tx_buf or rx_buf.
 * Calling this function with both null does a chip select change.
 */
static int
spi_xfer(const struct msm_spi *dd, unsigned int bitlen,
	 const void *dout, void *din, unsigned long flags)
{
	unsigned int len;
	const u8 *txp = dout;
	u8 *rxp = din;
	int ret;

	if (bitlen & 0x07) {
		pr_err("Err : Invalid bit length");
		return -EINVAL;
	}

	len = bitlen >> 3;

	if (dout != NULL) {
		ret = gsbi_spi_write(dd, txp, len, flags);
		if (ret != 0) {
			pr_err("Err: %s:%d ret:%d\n", __func__, __LINE__, ret);
			return ret;
		}
	}

	if (din != NULL)
		return gsbi_spi_read(dd, rxp, len, flags);

	if ((din == NULL) && (dout == NULL)) {
		/* To handle only when chip select change is needed */
		ret = gsbi_spi_write(dd, NULL, 0, flags);
	}

	return ret;
}

/*
 * SPI flash
 */

const static struct spi_flash {
	u32 page_size;		/* Write (page) size */
	u32 addr_width;		/* 3 or 4 byte address width */
	u8 read_opcode;
	u8 write_opcode;
} necpf_flash = {
	.page_size = 256,
	.addr_width = 4,
	.read_opcode = CMD_4READ_ARRAY_FAST,
	.write_opcode = CMD_4PAGE_PROGRAM,
};

static void
spi_flash_addr(u32 addr_width, u32 addr, u8 *cmd)
{
	/* cmd[0] is actual command */
	cmd[1] = addr >> (addr_width * 8 -  8);
	cmd[2] = addr >> (addr_width * 8 - 16);
	cmd[3] = addr >> (addr_width * 8 - 24);
	cmd[4] = addr >> (addr_width * 8 - 32);
}

static int
flash_cmdsz(u32 addr_width)
{
	/* This is needed for driver which have not updated this parameter */
	return 1 + addr_width;
}

static int
spi_flash_read_write(
	const struct msm_spi *dd, const u8 *cmd, size_t cmd_len,
	const u8 *data_out, u8 *data_in, size_t data_len)
{
	unsigned long flags = SPI_XFER_BEGIN;
	int ret;

	pr_devel("[%s] %s%s data_len:%d cmd = {%02x %02x %02x %02x %02x}\n",
		 __func__, data_out ? "W" : "", data_in ? "R" : "", data_len,
		 cmd[0], cmd[1], cmd[2], cmd[3], cmd[4]);

	if (data_len == 0)
		flags |= SPI_XFER_END;

	ret = spi_xfer(dd, cmd_len * 8, cmd, NULL, flags);
	if (ret) {
		pr_err("SF: Failed to send command (%zu bytes): %d\n",
		       cmd_len, ret);
	} else if (data_len != 0) {
		ret = spi_xfer(
			dd, data_len * 8, data_out, data_in, SPI_XFER_END);
		if (ret)
			pr_err("SF: Failed to transfer %zu "
			       "bytes of data: %d\n", data_len, ret);
	}

	return ret;
}

static int
spi_flash_cmd_read(const struct msm_spi *dd, const u8 *cmd,
		   size_t cmd_len, void *data, size_t data_len)
{
	return spi_flash_read_write(dd, cmd, cmd_len, NULL, data, data_len);
}

static int
spi_flash_cmd(const struct msm_spi *dd, u8 cmd, void *response, size_t len)
{
	return spi_flash_cmd_read(dd, &cmd, 1, response, len);
}

/* Enable writing on the SPI flash. */
static inline int
spi_flash_cmd_write_enable(const struct msm_spi *dd)
{
	return spi_flash_cmd(dd, CMD_WRITE_ENABLE, NULL, 0);
}

static int
spi_flash_cmd_write(const struct msm_spi *dd, const u8 *cmd, size_t cmd_len,
		    const void *data, size_t data_len)
{
	return spi_flash_read_write(dd, cmd, cmd_len, data, NULL, data_len);
}

static int
spi_flash_read_common(const struct msm_spi *dd, const u8 *cmd, size_t cmd_len,
		      void *data, size_t data_len)
{
	int ret;

	spi_claim_bus(dd);
	ret = spi_flash_cmd_read(dd, cmd, cmd_len, data, data_len);
//	spi_release_bus(spi);

	return ret;
}

/* 現状未使用 */
int spi_flash_cmd_read_fast(const struct msm_spi *, u32, size_t, void *);
int
spi_flash_cmd_read_fast(
	const struct msm_spi *dd, u32 offset, size_t len, void *data)
{
	const struct spi_flash *flash = &necpf_flash;
	u32 addr_width = flash->addr_width;
	u8 cmd[6];

	cmd[0] = flash->read_opcode;
	spi_flash_addr(addr_width, offset, cmd);
	cmd[5] = 0x00;

	pr_devel("%s:%d cmd: %02x %02x %02x %02x %02x\n", __func__, __LINE__,
		 cmd[0], cmd[1], cmd[2], cmd[3], cmd[4]);
	return spi_flash_read_common(dd, cmd, flash_cmdsz(addr_width)
				     + FAST_READ_DUMMY_BYTE, data, len);
}

static int
spi_flash_cmd_poll_bit(
	const struct msm_spi *dd, unsigned long sec, u8 cmd, u8 poll_bit)
{
	unsigned long ms = sec * 1000;
	unsigned long ms_cnt;
	int ret;
	u8 status;

	ret = spi_xfer(dd, 8, &cmd, NULL, SPI_XFER_BEGIN);
	if (ret) {
		pr_err("SF: Failed to send command %02x: %d\n", cmd, ret);
		return ret;
	}

	for (ms_cnt = 0; ms_cnt < ms; ms_cnt++) {
		ret = spi_xfer(dd, 8, NULL, &status, 0);
		if (ret) {
			pr_err("Err: %s:%d ret:%d\n", __func__, __LINE__, ret);
			return -1;
		}
		if ((status & poll_bit) == 0)
			break;
		udelay(1000);
	}

	spi_xfer(dd, 0, NULL, NULL, SPI_XFER_END);

	if ((status & poll_bit) == 0)
		return 0;

	/* Timed out */
	pr_err("SF: time out!\n");
	return -1;
}

static int
spi_flash_cmd_wait_ready(const struct msm_spi *dd, unsigned long timeout)
{
	return spi_flash_cmd_poll_bit(
		dd, timeout, CMD_READ_STATUS, STATUS_WIP);
}

static int
spi_flash_cmd_write_multi(
	const struct msm_spi *dd, u32 offset, size_t len, const u_char *buf)
{
	const struct spi_flash *flash = &necpf_flash;
	unsigned long page_addr, byte_addr, page_size;
	size_t chunk_len, actual;
	int ret;
	u8 cmd[5];

	page_size = flash->page_size;
	page_addr = offset / page_size;
	byte_addr = offset % page_size;

	ret = spi_claim_bus(dd);
	if (ret) {
		pr_err("SF: unable to claim SPI bus\n");
		return ret;
	}

	cmd[0] = flash->write_opcode;
	for (actual = 0; actual < len; actual += chunk_len) {
		chunk_len = min(
			(unsigned long)(len - actual), page_size - byte_addr);

		if (flash->addr_width == 4) {
			cmd[1] = page_addr >> 16;
			cmd[2] = page_addr >> 8;
			cmd[3] = page_addr;
			cmd[4] = byte_addr;
		} else {
			cmd[1] = page_addr >> 8;
			cmd[2] = page_addr;
			cmd[3] = byte_addr;
		}
		pr_devel("PP: 0x%p => cmd = { 0x%02x 0x%02x%02x%02x 0x%02x }"
			 " chunk_len = %zu\n", buf + actual,
			 cmd[0], cmd[1], cmd[2], cmd[3], cmd[4], chunk_len);

		ret = spi_flash_cmd_write_enable(dd);
		if (ret < 0) {
			pr_err("SF: enabling write failed\n");
			break;
		}

		ret = spi_flash_cmd_write(
			dd, cmd, flash_cmdsz(flash->addr_width),
			buf + actual, chunk_len);
		if (ret < 0) {
			pr_err("SF: write failed\n");
			break;
		}

		ret = spi_flash_cmd_wait_ready(dd, SPI_FLASH_PROG_TIMEOUT);
		if (ret)
			break;

		page_addr++;
		byte_addr = 0;
	}

	pr_info("SF: program %s %zu bytes @ %#x\n",
		 ret ? "failure" : "success", len, offset);

//	spi_release_bus(flash->spi);
	return ret;
}

static int
msm_spi_panic_transfer(
	struct spi_device *spi, loff_t to, size_t len,
	size_t *retlen, const u_char *buf)
{
	const struct msm_spi *dd = spi_master_get_devdata(spi->master);
	int ret;

	pr_devel("[%s] to:0x%llx len:%d\n", __func__, to, len);
	ret = spi_flash_cmd_write_multi(dd, (uint32_t)to, len, buf);
	*retlen = ret ? 0 : len;
	return ret;
}

#endif	/* USE_NECPF_KERNEL_CRASHLOG */

static int msm_spi_setup(struct spi_device *spi)
{
	struct msm_spi	*dd;
	int              rc = 0;
	u32              spi_ioc;
	u32              spi_config;
	u32              mask;

	if (spi->bits_per_word < 4 || spi->bits_per_word > 32) {
		dev_err(&spi->dev, "%s: invalid bits_per_word %d\n",
			__func__, spi->bits_per_word);
		rc = -EINVAL;
	}
	if (spi->chip_select > SPI_NUM_CHIPSELECTS-1) {
		dev_err(&spi->dev, "%s, chip select %d exceeds max value %d\n",
			__func__, spi->chip_select, SPI_NUM_CHIPSELECTS - 1);
		rc = -EINVAL;
	}

	if (rc)
		goto err_setup_exit;

	dd = spi_master_get_devdata(spi->master);

	pm_runtime_get_sync(dd->dev);

	mutex_lock(&dd->core_lock);

	/* Counter-part of system-suspend when runtime-pm is not enabled. */
	if (!pm_runtime_enabled(dd->dev))
		msm_spi_pm_resume_runtime(dd->dev);

	if (dd->use_rlock)
		remote_mutex_lock(&dd->r_lock);

	spi_ioc = readl_relaxed(dd->base + SPI_IO_CONTROL);
	mask = SPI_IO_C_CS_N_POLARITY_0 << spi->chip_select;
	if (spi->mode & SPI_CS_HIGH)
		spi_ioc |= mask;
	else
		spi_ioc &= ~mask;
	if (spi->mode & SPI_CPOL)
		spi_ioc |= SPI_IO_C_CLK_IDLE_HIGH;
	else
		spi_ioc &= ~SPI_IO_C_CLK_IDLE_HIGH;

	writel_relaxed(spi_ioc, dd->base + SPI_IO_CONTROL);

	spi_config = readl_relaxed(dd->base + SPI_CONFIG);
	if (spi->mode & SPI_LOOP)
		spi_config |= SPI_CFG_LOOPBACK;
	else
		spi_config &= ~SPI_CFG_LOOPBACK;
	if (spi->mode & SPI_CPHA)
		spi_config &= ~SPI_CFG_INPUT_FIRST;
	else
		spi_config |= SPI_CFG_INPUT_FIRST;
	writel_relaxed(spi_config, dd->base + SPI_CONFIG);

	/* Ensure previous write completed before disabling the clocks */
	mb();

	if (dd->use_rlock)
		remote_mutex_unlock(&dd->r_lock);

	/* Counter-part of system-resume when runtime-pm is not enabled. */
	if (!pm_runtime_enabled(dd->dev))
		msm_spi_pm_suspend_runtime(dd->dev);

	mutex_unlock(&dd->core_lock);

	pm_runtime_mark_last_busy(dd->dev);
	pm_runtime_put_autosuspend(dd->dev);

err_setup_exit:
	return rc;
}

#ifdef CONFIG_DEBUG_FS
static int debugfs_iomem_x32_set(void *data, u64 val)
{
	writel_relaxed(val, data);
	/* Ensure the previous write completed. */
	mb();
	return 0;
}

static int debugfs_iomem_x32_get(void *data, u64 *val)
{
	*val = readl_relaxed(data);
	/* Ensure the previous read completed. */
	mb();
	return 0;
}

DEFINE_SIMPLE_ATTRIBUTE(fops_iomem_x32, debugfs_iomem_x32_get,
			debugfs_iomem_x32_set, "0x%08llx\n");

static void spi_debugfs_init(struct msm_spi *dd)
{
	dd->dent_spi = debugfs_create_dir(dev_name(dd->dev), NULL);
	if (dd->dent_spi) {
		int i;

		for (i = 0; i < ARRAY_SIZE(debugfs_spi_regs); i++) {
			dd->debugfs_spi_regs[i] =
			   debugfs_create_file(
			       debugfs_spi_regs[i].name,
			       debugfs_spi_regs[i].mode,
			       dd->dent_spi,
			       dd->base + debugfs_spi_regs[i].offset,
			       &fops_iomem_x32);
		}
	}
}

static void spi_debugfs_exit(struct msm_spi *dd)
{
	if (dd->dent_spi) {
		int i;

		debugfs_remove_recursive(dd->dent_spi);
		dd->dent_spi = NULL;
		for (i = 0; i < ARRAY_SIZE(debugfs_spi_regs); i++)
			dd->debugfs_spi_regs[i] = NULL;
	}
}
#else
static void spi_debugfs_init(struct msm_spi *dd) {}
static void spi_debugfs_exit(struct msm_spi *dd) {}
#endif

/* ===Device attributes begin=== */
static ssize_t show_stats(struct device *dev, struct device_attribute *attr,
			  char *buf)
{
	struct spi_master *master = dev_get_drvdata(dev);
	struct msm_spi *dd =  spi_master_get_devdata(master);

	return snprintf(buf, PAGE_SIZE,
			"Device       %s\n"
			"rx fifo_size = %d spi words\n"
			"tx fifo_size = %d spi words\n"
			"use_dma ?    %s\n"
			"rx block size = %d bytes\n"
			"tx block size = %d bytes\n"
			"input burst size = %d bytes\n"
			"output burst size = %d bytes\n"
			"DMA configuration:\n"
			"tx_ch=%d, rx_ch=%d, tx_crci= %d, rx_crci=%d\n"
			"--statistics--\n"
			"Rx isrs  = %d\n"
			"Tx isrs  = %d\n"
			"DMA error  = %d\n"
			"--debug--\n"
			"NA yet\n",
			dev_name(dev),
			dd->input_fifo_size,
			dd->output_fifo_size,
			dd->use_dma ? "yes" : "no",
			dd->input_block_size,
			dd->output_block_size,
			dd->input_burst_size,
			dd->output_burst_size,
			dd->tx_dma_chan,
			dd->rx_dma_chan,
			dd->tx_dma_crci,
			dd->rx_dma_crci,
			dd->stat_rx + dd->stat_dmov_rx,
			dd->stat_tx + dd->stat_dmov_tx,
			dd->stat_dmov_tx_err + dd->stat_dmov_rx_err
			);
}

/* Reset statistics on write */
static ssize_t set_stats(struct device *dev, struct device_attribute *attr,
			 const char *buf, size_t count)
{
	struct msm_spi *dd = dev_get_drvdata(dev);
	dd->stat_rx = 0;
	dd->stat_tx = 0;
	dd->stat_dmov_rx = 0;
	dd->stat_dmov_tx = 0;
	dd->stat_dmov_rx_err = 0;
	dd->stat_dmov_tx_err = 0;
	return count;
}

static DEVICE_ATTR(stats, S_IRUGO | S_IWUSR, show_stats, set_stats);

static struct attribute *dev_attrs[] = {
	&dev_attr_stats.attr,
	NULL,
};

static struct attribute_group dev_attr_grp = {
	.attrs = dev_attrs,
};
/* ===Device attributes end=== */

/**
 * spi_dmov_tx_complete_func - DataMover tx completion callback
 *
 * Executed in IRQ context (Data Mover's IRQ) DataMover's
 * spinlock @msm_dmov_lock held.
 */
static void spi_dmov_tx_complete_func(struct msm_dmov_cmd *cmd,
				      unsigned int result,
				      struct msm_dmov_errdata *err)
{
	struct msm_spi *dd;

	if (!(result & DMOV_RSLT_VALID)) {
		pr_err("Invalid DMOV result: rc=0x%08x, cmd = %p", result, cmd);
		return;
	}
	/* restore original context */
	dd = container_of(cmd, struct msm_spi, tx_hdr);
	if (result & DMOV_RSLT_DONE) {
		dd->stat_dmov_tx++;
		if ((atomic_inc_return(&dd->tx_irq_called) == 1))
			return;
		complete(&dd->transfer_complete);
	} else {
		/* Error or flush */
		if (result & DMOV_RSLT_ERROR) {
			dev_err(dd->dev, "DMA error (0x%08x)\n", result);
			dd->stat_dmov_tx_err++;
		}
		if (result & DMOV_RSLT_FLUSH) {
			/*
			 * Flushing normally happens in process of
			 * removing, when we are waiting for outstanding
			 * DMA commands to be flushed.
			 */
			dev_info(dd->dev,
				 "DMA channel flushed (0x%08x)\n", result);
		}
		if (err)
			dev_err(dd->dev,
				"Flush data(%08x %08x %08x %08x %08x %08x)\n",
				err->flush[0], err->flush[1], err->flush[2],
				err->flush[3], err->flush[4], err->flush[5]);
		dd->cur_msg->status = -EIO;
		complete(&dd->transfer_complete);
	}
}

/**
 * spi_dmov_rx_complete_func - DataMover rx completion callback
 *
 * Executed in IRQ context (Data Mover's IRQ)
 * DataMover's spinlock @msm_dmov_lock held.
 */
static void spi_dmov_rx_complete_func(struct msm_dmov_cmd *cmd,
				      unsigned int result,
				      struct msm_dmov_errdata *err)
{
	struct msm_spi *dd;

	if (!(result & DMOV_RSLT_VALID)) {
		pr_err("Invalid DMOV result(rc = 0x%08x, cmd = %p)",
		       result, cmd);
		return;
	}
	/* restore original context */
	dd = container_of(cmd, struct msm_spi, rx_hdr);
	if (result & DMOV_RSLT_DONE) {
		dd->stat_dmov_rx++;
		if (atomic_inc_return(&dd->rx_irq_called) == 1)
			return;
		complete(&dd->transfer_complete);
	} else {
		/** Error or flush  */
		if (result & DMOV_RSLT_ERROR) {
			dev_err(dd->dev, "DMA error(0x%08x)\n", result);
			dd->stat_dmov_rx_err++;
		}
		if (result & DMOV_RSLT_FLUSH) {
			dev_info(dd->dev,
				"DMA channel flushed(0x%08x)\n", result);
		}
		if (err)
			dev_err(dd->dev,
				"Flush data(%08x %08x %08x %08x %08x %08x)\n",
				err->flush[0], err->flush[1], err->flush[2],
				err->flush[3], err->flush[4], err->flush[5]);
		dd->cur_msg->status = -EIO;
		complete(&dd->transfer_complete);
	}
}

static inline u32 get_chunk_size(struct msm_spi *dd, int input_burst_size,
			int output_burst_size)
{
	u32 cache_line = dma_get_cache_alignment();
	int burst_size = (input_burst_size > output_burst_size) ?
		input_burst_size : output_burst_size;

	return (roundup(sizeof(struct spi_dmov_cmd), DM_BYTE_ALIGN) +
			  roundup(burst_size, cache_line))*2;
}

static void msm_spi_teardown_dma(struct msm_spi *dd)
{
	int limit = 0;

	if (!dd->use_dma)
		return;

	while (dd->mode == SPI_DMOV_MODE && limit++ < 50) {
		msm_dmov_flush(dd->tx_dma_chan, 1);
		msm_dmov_flush(dd->rx_dma_chan, 1);
		msleep(10);
	}

	dma_free_coherent(NULL,
		get_chunk_size(dd, dd->input_burst_size, dd->output_burst_size),
		dd->tx_dmov_cmd,
		dd->tx_dmov_cmd_dma);
	dd->tx_dmov_cmd = dd->rx_dmov_cmd = NULL;
	dd->tx_padding = dd->rx_padding = NULL;
}

static __init int msm_spi_init_dma(struct msm_spi *dd)
{
	dmov_box *box;
	u32 cache_line = dma_get_cache_alignment();

	/* Allocate all as one chunk, since all is smaller than page size */

	/* We send NULL device, since it requires coherent_dma_mask id
	   device definition, we're okay with using system pool */
	dd->tx_dmov_cmd
		= dma_alloc_coherent(NULL,
			get_chunk_size(dd, dd->input_burst_size,
				dd->output_burst_size),
			&dd->tx_dmov_cmd_dma, GFP_KERNEL);
	if (dd->tx_dmov_cmd == NULL)
		return -ENOMEM;

	/* DMA addresses should be 64 bit aligned aligned */
	dd->rx_dmov_cmd = (struct spi_dmov_cmd *)
			  ALIGN((size_t)&dd->tx_dmov_cmd[1], DM_BYTE_ALIGN);
	dd->rx_dmov_cmd_dma = ALIGN(dd->tx_dmov_cmd_dma +
			      sizeof(struct spi_dmov_cmd), DM_BYTE_ALIGN);

	/* Buffers should be aligned to cache line */
	dd->tx_padding = (u8 *)ALIGN((size_t)&dd->rx_dmov_cmd[1], cache_line);
	dd->tx_padding_dma = ALIGN(dd->rx_dmov_cmd_dma +
			      sizeof(struct spi_dmov_cmd), cache_line);
	dd->rx_padding = (u8 *)ALIGN((size_t)(dd->tx_padding +
		dd->output_burst_size), cache_line);
	dd->rx_padding_dma = ALIGN(dd->tx_padding_dma + dd->output_burst_size,
				      cache_line);

	/* Setup DM commands */
	box = &(dd->rx_dmov_cmd->box);
	box->cmd = CMD_MODE_BOX | CMD_SRC_CRCI(dd->rx_dma_crci);
	box->src_row_addr = (uint32_t)dd->mem_phys_addr + SPI_INPUT_FIFO;
	dd->rx_hdr.cmdptr = DMOV_CMD_PTR_LIST |
				   DMOV_CMD_ADDR(dd->rx_dmov_cmd_dma +
				   offsetof(struct spi_dmov_cmd, cmd_ptr));
	dd->rx_hdr.complete_func = spi_dmov_rx_complete_func;

	box = &(dd->tx_dmov_cmd->box);
	box->cmd = CMD_MODE_BOX | CMD_DST_CRCI(dd->tx_dma_crci);
	box->dst_row_addr = (uint32_t)dd->mem_phys_addr + SPI_OUTPUT_FIFO;
	dd->tx_hdr.cmdptr = DMOV_CMD_PTR_LIST |
			    DMOV_CMD_ADDR(dd->tx_dmov_cmd_dma +
			    offsetof(struct spi_dmov_cmd, cmd_ptr));
	dd->tx_hdr.complete_func = spi_dmov_tx_complete_func;

	dd->tx_dmov_cmd->single_pad.cmd = CMD_MODE_SINGLE | CMD_LC |
					  CMD_DST_CRCI(dd->tx_dma_crci);
	dd->tx_dmov_cmd->single_pad.dst = (uint32_t)dd->mem_phys_addr +
					   SPI_OUTPUT_FIFO;
	dd->rx_dmov_cmd->single_pad.cmd = CMD_MODE_SINGLE | CMD_LC |
					  CMD_SRC_CRCI(dd->rx_dma_crci);
	dd->rx_dmov_cmd->single_pad.src = (uint32_t)dd->mem_phys_addr +
					  SPI_INPUT_FIFO;

	/* Clear remaining activities on channel */
	msm_dmov_flush(dd->tx_dma_chan, 1);
	msm_dmov_flush(dd->rx_dma_chan, 1);

	return 0;
}

struct msm_spi_platform_data *msm_spi_dt_to_pdata(struct platform_device *pdev)
{
	struct device_node *node = pdev->dev.of_node;
	struct msm_spi_platform_data *pdata;

	pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
	if (!pdata) {
		pr_err("Unable to allocate platform data\n");
		return NULL;
	}

	of_property_read_u32(node, "spi-max-frequency",
			&pdata->max_clock_speed);
	of_property_read_u32(node, "infinite_mode",
			&pdata->infinite_mode);

	return pdata;
}

static int msm_spi_init_thread(struct platform_device *pdev, struct msm_spi *dd)
{
	int ret;
	struct sched_param param;
	struct spi_master *master;

	if (dd->pdata->thread_mode == MSM_SPI_THREAD_RT) {
		init_kthread_worker(&dd->spi_kthread_worker);
		init_kthread_work(&dd->spi_kthread_work, msm_spi_kthread);
		dd->spi_kthread = kthread_run(kthread_worker_fn,
				(void *)&dd->spi_kthread_worker, "msm-spi-thread");
		if (IS_ERR(dd->spi_kthread)) {
			pr_err("Unable to create msm_spi kthread\n");
			return -1;
		}

		if (dd->pdata->thread_priority) {
			param.sched_priority = dd->pdata->thread_priority;
			ret = sched_setscheduler(dd->spi_kthread, SCHED_FIFO, &param);
			if (ret)
				pr_err("%s : Error setting priority, error: %d\n",
						__func__, ret);
		}
	} else {
		master = platform_get_drvdata(pdev);
		INIT_WORK(&dd->work_data, msm_spi_workq);
		dd->workqueue = create_singlethread_workqueue(
				dev_name(master->dev.parent));
		if (!dd->workqueue) {
			pr_err("Unable to create msm_spi workqueue\n");
			return -1;
		}
	}

	return 0;
}

static void msm_spi_destroy_thread(struct msm_spi *dd)
{
	if (dd->pdata->thread_mode == MSM_SPI_THREAD_RT)
		kthread_stop(dd->spi_kthread);
	else
		destroy_workqueue(dd->workqueue);
}

static void
msm_spi_irq_affinity_notify(struct irq_affinity_notify *notify,
			    const cpumask_t *mask)
{
	struct msm_spi *dd =
		container_of(notify, struct msm_spi, irq_notify);

	if (dd->pdata->thread_mode == MSM_SPI_THREAD_RT)
		sched_setaffinity(pid_nr(task_pid(dd->spi_kthread)), mask);
}

static void msm_spi_irq_affinity_release(struct kref *ref)
{
	return;
}

static int __init msm_spi_probe(struct platform_device *pdev)
{
	struct spi_master      *master;
	struct msm_spi	       *dd;
	struct resource	       *resource;
	int                     rc = -ENXIO;
	int                     locked = 0;
	int                     i = 0;
	int                     clk_enabled = 0;
	int                     pclk_enabled = 0;
	struct msm_spi_platform_data *pdata;
	enum of_gpio_flags flags;

	master = spi_alloc_master(&pdev->dev, sizeof(struct msm_spi));
	if (!master) {
		rc = -ENOMEM;
		dev_err(&pdev->dev, "master allocation failed\n");
		goto err_probe_exit;
	}

	master->bus_num        = pdev->id;
	master->mode_bits      = SPI_SUPPORTED_MODES;
	master->num_chipselect = SPI_NUM_CHIPSELECTS;
	master->setup          = msm_spi_setup;
	master->transfer       = msm_spi_transfer;
#if defined(USE_NECPF_KERNEL_CRASHLOG)
	master->panic_transfer = msm_spi_panic_transfer;
#endif	/* USE_NECPF_KERNEL_CRASHLOG */
	platform_set_drvdata(pdev, master);
	dd = spi_master_get_devdata(master);

	if (pdev->dev.of_node) {
		dd->qup_ver = SPI_QUP_VERSION_BFAM;
		master->dev.of_node = pdev->dev.of_node;
		pdata = msm_spi_dt_to_pdata(pdev);
		if (!pdata) {
			rc = -ENOMEM;
			goto err_probe_exit;
		}

		rc = of_property_read_u32(pdev->dev.of_node,
				"cell-index", &pdev->id);
		if (rc)
			dev_warn(&pdev->dev,
				"using default bus_num %d\n", pdev->id);
		else
			master->bus_num = pdev->id;

		for (i = 0; i < ARRAY_SIZE(spi_rsrcs); ++i) {
			dd->spi_gpios[i] = of_get_gpio_flags(pdev->dev.of_node,
								i, &flags);
		}

		for (i = 0; i < ARRAY_SIZE(spi_cs_rsrcs); ++i) {
			dd->cs_gpios[i].gpio_num = of_get_named_gpio_flags(
						pdev->dev.of_node, "cs-gpios",
						i, &flags);
			dd->cs_gpios[i].valid = 0;
		}
	} else {
		pdata = pdev->dev.platform_data;
		dd->qup_ver = SPI_QUP_VERSION_NONE;

		for (i = 0; i < ARRAY_SIZE(spi_rsrcs); ++i) {
			resource = platform_get_resource(pdev, IORESOURCE_IO,
							i);
			dd->spi_gpios[i] = resource ? resource->start : -1;
		}

		for (i = 0; i < ARRAY_SIZE(spi_cs_rsrcs); ++i) {
			resource = platform_get_resource(pdev, IORESOURCE_IO,
						i + ARRAY_SIZE(spi_rsrcs));
			dd->cs_gpios[i].gpio_num = resource ?
							resource->start : -1;
			dd->cs_gpios[i].valid = 0;
		}
	}

	dd->pdata = pdata;
	resource = platform_get_resource(pdev, IORESOURCE_MEM, 0);
	if (!resource) {
		rc = -ENXIO;
		goto err_probe_res;
	}

	dd->mem_phys_addr = resource->start;
	dd->mem_size = resource_size(resource);

	if (pdata) {
		if (pdata->dma_config) {
			rc = pdata->dma_config();
			if (rc) {
				dev_warn(&pdev->dev,
					"%s: DM mode not supported\n",
					__func__);
				dd->use_dma = 0;
				goto skip_dma_resources;
			}
		}
		resource = platform_get_resource(pdev, IORESOURCE_DMA, 0);
		if (resource) {
			dd->rx_dma_chan = resource->start;
			dd->tx_dma_chan = resource->end;
			resource = platform_get_resource(pdev, IORESOURCE_DMA,
							1);
			if (!resource) {
				rc = -ENXIO;
				goto err_probe_res;
			}

			dd->rx_dma_crci = resource->start;
			dd->tx_dma_crci = resource->end;
			dd->use_dma = 1;
			master->dma_alignment =	dma_get_cache_alignment();
		}
	}

skip_dma_resources:

	spin_lock_init(&dd->queue_lock);
	mutex_init(&dd->core_lock);
	INIT_LIST_HEAD(&dd->queue);
	init_waitqueue_head(&dd->continue_suspend);

	if (msm_spi_init_thread(pdev, dd))
		goto err_probe_thread;

	if (!devm_request_mem_region(&pdev->dev, dd->mem_phys_addr,
					dd->mem_size, SPI_DRV_NAME)) {
		rc = -ENXIO;
		goto err_probe_reqmem;
	}

	dd->base = devm_ioremap(&pdev->dev, dd->mem_phys_addr, dd->mem_size);
	if (!dd->base) {
		rc = -ENOMEM;
		goto err_probe_reqmem;
	}

	if (pdata && pdata->rsl_id) {
		struct remote_mutex_id rmid;
		rmid.r_spinlock_id = pdata->rsl_id;
		rmid.delay_us = SPI_TRYLOCK_DELAY;

		rc = remote_mutex_init(&dd->r_lock, &rmid);
		if (rc) {
			dev_err(&pdev->dev, "%s: unable to init remote_mutex "
				"(%s), (rc=%d)\n", rmid.r_spinlock_id,
				__func__, rc);
			goto err_probe_rlock_init;
		}

		dd->use_rlock = 1;
		dd->pm_lat = pdata->pm_lat;
		pm_qos_add_request(&qos_req_list, PM_QOS_CPU_DMA_LATENCY,
					PM_QOS_DEFAULT_VALUE);
	}

	mutex_lock(&dd->core_lock);
	if (dd->use_rlock)
		remote_mutex_lock(&dd->r_lock);

	locked = 1;
	dd->dev = &pdev->dev;
	dd->clk = clk_get(&pdev->dev, "core_clk");
	if (IS_ERR(dd->clk)) {
		dev_err(&pdev->dev, "%s: unable to get core_clk\n", __func__);
		rc = PTR_ERR(dd->clk);
		goto err_probe_clk_get;
	}

	dd->pclk = clk_get(&pdev->dev, "iface_clk");
	if (IS_ERR(dd->pclk)) {
		dev_err(&pdev->dev, "%s: unable to get iface_clk\n", __func__);
		rc = PTR_ERR(dd->pclk);
		goto err_probe_pclk_get;
	}

	if (pdata && pdata->max_clock_speed)
		msm_spi_clock_set(dd, dd->pdata->max_clock_speed);

	rc = clk_prepare_enable(dd->clk);
	if (rc) {
		dev_err(&pdev->dev, "%s: unable to enable core_clk\n",
			__func__);
		goto err_probe_clk_enable;
	}

	clk_enabled = 1;
	rc = clk_prepare_enable(dd->pclk);
	if (rc) {
		dev_err(&pdev->dev, "%s: unable to enable iface_clk\n",
		__func__);
		goto err_probe_pclk_enable;
	}

	pclk_enabled = 1;
	rc = msm_spi_configure_gsbi(dd, pdev);
	if (rc)
		goto err_probe_gsbi;

	msm_spi_calculate_fifo_size(dd);
	if (dd->use_dma) {
		rc = msm_spi_init_dma(dd);
		if (rc)
			goto err_probe_dma;
	}

	msm_spi_register_init(dd);
	/*
	 * The SPI core generates a bogus input overrun error on some targets,
	 * when a transition from run to reset state occurs and if the FIFO has
	 * an odd number of entries. Hence we disable the INPUT_OVER_RUN_ERR_EN
	 * bit.
	 */
	msm_spi_enable_error_flags(dd);

	writel_relaxed(SPI_IO_C_NO_TRI_STATE, dd->base + SPI_IO_CONTROL);
	rc = msm_spi_set_state(dd, SPI_OP_STATE_RESET);
	if (rc)
		goto err_probe_state;

	clk_disable_unprepare(dd->clk);
	clk_disable_unprepare(dd->pclk);
	clk_enabled = 0;
	pclk_enabled = 0;

	dd->suspended = 1;
	dd->transfer_pending = 0;
	dd->multi_xfr = 0;
	dd->mode = SPI_MODE_NONE;

	rc = msm_spi_request_irq(dd, pdev, master);
	if (rc)
		goto err_probe_irq;

	msm_spi_disable_irqs(dd);
	if (dd->use_rlock)
		remote_mutex_unlock(&dd->r_lock);

	dd->irq_notify.notify = msm_spi_irq_affinity_notify;
	dd->irq_notify.release = msm_spi_irq_affinity_release;
	irq_set_affinity_notifier(dd->irq_in, &dd->irq_notify);

	mutex_unlock(&dd->core_lock);
	locked = 0;

	pm_runtime_set_autosuspend_delay(&pdev->dev, MSEC_PER_SEC);
	pm_runtime_use_autosuspend(&pdev->dev);
	pm_runtime_enable(&pdev->dev);

	rc = spi_register_master(master);
	if (rc)
		goto err_probe_reg_master;

	rc = sysfs_create_group(&(dd->dev->kobj), &dev_attr_grp);
	if (rc) {
		dev_err(&pdev->dev, "failed to create dev. attrs : %d\n", rc);
		goto err_attrs;
	}

	spi_debugfs_init(dd);

	return 0;

err_attrs:
	spi_unregister_master(master);
err_probe_reg_master:
	pm_runtime_disable(&pdev->dev);
err_probe_irq:
err_probe_state:
	msm_spi_teardown_dma(dd);
err_probe_dma:
err_probe_gsbi:
	if (pclk_enabled)
		clk_disable_unprepare(dd->pclk);
err_probe_pclk_enable:
	if (clk_enabled)
		clk_disable_unprepare(dd->clk);
err_probe_clk_enable:
	clk_put(dd->pclk);
err_probe_pclk_get:
	clk_put(dd->clk);
err_probe_clk_get:
	if (locked) {
		if (dd->use_rlock)
			remote_mutex_unlock(&dd->r_lock);

		mutex_unlock(&dd->core_lock);
	}
err_probe_rlock_init:
err_probe_reqmem:
	msm_spi_destroy_thread(dd);
err_probe_thread:
err_probe_res:
	spi_master_put(master);
err_probe_exit:
	return rc;
}

#ifdef CONFIG_PM
static int msm_spi_pm_suspend_runtime(struct device *device)
{
	struct platform_device *pdev = to_platform_device(device);
	struct spi_master *master = platform_get_drvdata(pdev);
	struct msm_spi	  *dd;
	unsigned long	   flags;

	dev_dbg(device, "pm_runtime: suspending...\n");
	if (!master)
		goto suspend_exit;
	dd = spi_master_get_devdata(master);
	if (!dd)
		goto suspend_exit;

	if (dd->suspended)
		return 0;

	/*
	 * Make sure nothing is added to the queue while we're
	 * suspending
	 */
	spin_lock_irqsave(&dd->queue_lock, flags);
	dd->suspended = 1;
	spin_unlock_irqrestore(&dd->queue_lock, flags);

	/* Wait for transactions to end, or time out */
	wait_event_interruptible(dd->continue_suspend,
		!dd->transfer_pending);

	msm_spi_disable_irqs(dd);
	clk_disable_unprepare(dd->clk);
	clk_disable_unprepare(dd->pclk);

	/* Free  the spi clk, miso, mosi, cs gpio */
	if (dd->pdata && dd->pdata->gpio_release)
		dd->pdata->gpio_release();

	msm_spi_free_gpios(dd);

	if (pm_qos_request_active(&qos_req_list))
		pm_qos_update_request(&qos_req_list,
				PM_QOS_DEFAULT_VALUE);
suspend_exit:
	return 0;
}

static int msm_spi_pm_resume_runtime(struct device *device)
{
	struct platform_device *pdev = to_platform_device(device);
	struct spi_master *master = platform_get_drvdata(pdev);
	struct msm_spi	  *dd;
	int ret = 0;

	dev_dbg(device, "pm_runtime: resuming...\n");
	if (!master)
		goto resume_exit;
	dd = spi_master_get_devdata(master);
	if (!dd)
		goto resume_exit;

	if (!dd->suspended)
		return 0;

	if (pm_qos_request_active(&qos_req_list))
		pm_qos_update_request(&qos_req_list,
				  dd->pm_lat);

	/* Configure the spi clk, miso, mosi and cs gpio */
	if (dd->pdata->gpio_config) {
		ret = dd->pdata->gpio_config();
		if (ret) {
			dev_err(dd->dev,
					"%s: error configuring GPIOs\n",
					__func__);
			return ret;
		}
	}

	ret = msm_spi_request_gpios(dd);
	if (ret)
		return ret;

	clk_prepare_enable(dd->clk);
	clk_prepare_enable(dd->pclk);
	msm_spi_enable_irqs(dd);
	dd->suspended = 0;
resume_exit:
	return 0;
}

static int msm_spi_suspend(struct device *device)
{
	if (!pm_runtime_enabled(device) || !pm_runtime_suspended(device)) {
		struct platform_device *pdev = to_platform_device(device);
		struct spi_master *master = platform_get_drvdata(pdev);
		struct msm_spi   *dd;

		dev_dbg(device, "system suspend");
		if (!master)
			goto suspend_exit;
		dd = spi_master_get_devdata(master);
		if (!dd)
			goto suspend_exit;
		msm_spi_pm_suspend_runtime(device);
	}
suspend_exit:
	return 0;
}

static int msm_spi_resume(struct device *device)
{
	/*
	 * Rely on runtime-PM to call resume in case it is enabled
	 * Even if it's not enabled, rely on 1st client transaction to do
	 * clock ON and gpio configuration
	 */
	dev_dbg(device, "system resume");
	return 0;
}
#else
#define msm_spi_suspend NULL
#define msm_spi_resume NULL
#define msm_spi_pm_suspend_runtime NULL
#define msm_spi_pm_resume_runtime NULL
#endif /* CONFIG_PM */

static int __devexit msm_spi_remove(struct platform_device *pdev)
{
	struct spi_master *master = platform_get_drvdata(pdev);
	struct msm_spi    *dd = spi_master_get_devdata(master);

	pm_qos_remove_request(&qos_req_list);
	spi_debugfs_exit(dd);
	sysfs_remove_group(&pdev->dev.kobj, &dev_attr_grp);

	msm_spi_teardown_dma(dd);

	pm_runtime_disable(&pdev->dev);
	pm_runtime_set_suspended(&pdev->dev);
	clk_put(dd->clk);
	clk_put(dd->pclk);
	msm_spi_destroy_thread(dd);
	platform_set_drvdata(pdev, 0);
	spi_unregister_master(master);
	spi_master_put(master);

	return 0;
}

static struct of_device_id msm_spi_dt_match[] = {
	{
		.compatible = "qcom,spi-qup-v2",
	},
	{}
};

static const struct dev_pm_ops msm_spi_dev_pm_ops = {
	SET_SYSTEM_SLEEP_PM_OPS(msm_spi_suspend, msm_spi_resume)
	SET_RUNTIME_PM_OPS(msm_spi_pm_suspend_runtime,
			msm_spi_pm_resume_runtime, NULL)
};

static struct platform_driver msm_spi_driver = {
	.driver		= {
		.name	= SPI_DRV_NAME,
		.owner	= THIS_MODULE,
		.pm		= &msm_spi_dev_pm_ops,
		.of_match_table = msm_spi_dt_match,
	},
	.remove		= __exit_p(msm_spi_remove),
};

static int __init msm_spi_init(void)
{
	return platform_driver_probe(&msm_spi_driver, msm_spi_probe);
}
subsys_initcall(msm_spi_init);

static void __exit msm_spi_exit(void)
{
	platform_driver_unregister(&msm_spi_driver);
}
module_exit(msm_spi_exit);

MODULE_LICENSE("GPL v2");
MODULE_VERSION("0.4");
MODULE_ALIAS("platform:"SPI_DRV_NAME);
