/*
 * File      : spi-davinci.c
 * This file is part of RT-Thread RTOS
 * COPYRIGHT (C) 2006, RT-Thread Development Team
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License along
 *  with this program; if not, write to the Free Software Foundation, Inc.,
 *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Change Logs:
 * Date           Author		Notes
 * 2011-01-13     weety		first version
 */

#include <rthw.h>
#include <rtthread.h>
#include <rtdevice.h>
#include <dm36x.h>
#include <edma.h>
#include "spi-davinci.h"

#define unlikely(x) x

#define barrier() __asm__ __volatile__("": : :"memory")
#define cpu_relax() barrier()

#define SPI_DEBUG			0	
#if SPI_DEBUG
#define spi_dbg(dev, fmt, ...) \
	do { \
		rt_kprintf("%s:", dev->parent.name); \
		rt_kprintf(fmt, ##__VA_ARGS__); \
	} while(0)
#else
#define spi_dbg(dev, fmt, ...)
#endif

#define SZ_64K 0x10000
#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))

#define SPI_NO_RESOURCE		((resource_size_t)-1)

#define SPI_MAX_CHIPSELECT	2

#define CS_DEFAULT	0xFF

#define __iomem
#define BIT(nr)			(1UL << (nr))

#define SPIFMT_PHASE_MASK	BIT(16)
#define SPIFMT_POLARITY_MASK	BIT(17)
#define SPIFMT_DISTIMER_MASK	BIT(18)
#define SPIFMT_SHIFTDIR_MASK	BIT(20)
#define SPIFMT_WAITENA_MASK	BIT(21)
#define SPIFMT_PARITYENA_MASK	BIT(22)
#define SPIFMT_ODD_PARITY_MASK	BIT(23)
#define SPIFMT_WDELAY_MASK	0x3f000000u
#define SPIFMT_WDELAY_SHIFT	24
#define SPIFMT_PRESCALE_SHIFT	8

/* SPIPC0 */
#define SPIPC0_DIFUN_MASK	BIT(11)		/* MISO */
#define SPIPC0_DOFUN_MASK	BIT(10)		/* MOSI */
#define SPIPC0_CLKFUN_MASK	BIT(9)		/* CLK */
#define SPIPC0_SPIENA_MASK	BIT(8)		/* nREADY */

#define SPIINT_MASKALL		0x0101035F
#define SPIINT_MASKINT		0x0000015F
#define SPI_INTLVL_1		0x000001FF
#define SPI_INTLVL_0		0x00000000

/* SPIDAT1 (upper 16 bit defines) */
#define SPIDAT1_CSHOLD_MASK	BIT(12)

/* SPIGCR1 */
#define SPIGCR1_CLKMOD_MASK	BIT(1)
#define SPIGCR1_MASTER_MASK     BIT(0)
#define SPIGCR1_POWERDOWN_MASK	BIT(8)
#define SPIGCR1_LOOPBACK_MASK	BIT(16)
#define SPIGCR1_SPIENA_MASK	BIT(24)

/* SPIBUF */
#define SPIBUF_TXFULL_MASK	BIT(29)
#define SPIBUF_RXEMPTY_MASK	BIT(31)

/* SPIDELAY */
#define SPIDELAY_C2TDELAY_SHIFT 24
#define SPIDELAY_C2TDELAY_MASK  (0xFF << SPIDELAY_C2TDELAY_SHIFT)
#define SPIDELAY_T2CDELAY_SHIFT 16
#define SPIDELAY_T2CDELAY_MASK  (0xFF << SPIDELAY_T2CDELAY_SHIFT)
#define SPIDELAY_T2EDELAY_SHIFT 8
#define SPIDELAY_T2EDELAY_MASK  (0xFF << SPIDELAY_T2EDELAY_SHIFT)
#define SPIDELAY_C2EDELAY_SHIFT 0
#define SPIDELAY_C2EDELAY_MASK  0xFF

/* Error Masks */
#define SPIFLG_DLEN_ERR_MASK		BIT(0)
#define SPIFLG_TIMEOUT_MASK		BIT(1)
#define SPIFLG_PARERR_MASK		BIT(2)
#define SPIFLG_DESYNC_MASK		BIT(3)
#define SPIFLG_BITERR_MASK		BIT(4)
#define SPIFLG_OVRRUN_MASK		BIT(6)
#define SPIFLG_BUF_INIT_ACTIVE_MASK	BIT(24)
#define SPIFLG_ERROR_MASK		(SPIFLG_DLEN_ERR_MASK \
				| SPIFLG_TIMEOUT_MASK | SPIFLG_PARERR_MASK \
				| SPIFLG_DESYNC_MASK | SPIFLG_BITERR_MASK \
				| SPIFLG_OVRRUN_MASK)

#define SPIINT_DMA_REQ_EN	BIT(16)

/* SPI Controller registers */
#define SPIGCR0		0x00
#define SPIGCR1		0x04
#define SPIINT		0x08
#define SPILVL		0x0c
#define SPIFLG		0x10
#define SPIPC0		0x14
#define SPIDAT1		0x3c
#define SPIBUF		0x40
#define SPIDELAY	0x48
#define SPIDEF		0x4c
#define SPIFMT0		0x50

/* We have 2 DMA channels per CS, one for RX and one for TX */
struct davinci_spi_dma {
	int			tx_channel;
	int			rx_channel;
	int			dummy_param_slot;
	enum dma_event_q	eventq;
};

/* SPI Controller driver's private data. */
struct davinci_spi {
	struct rt_spi_bus parent;
	struct clk		*clk;

	u8			version;
	void __iomem		*base;
	u32			irq;
	struct rt_completion	done;

	const void		*tx;
	void			*rx;
#define SMP_CACHE_BYTES 32
#define SPI_TMP_BUFSZ	(SMP_CACHE_BYTES + 1)
	u8			rx_tmp_buf[SPI_TMP_BUFSZ];
	int			rcount;
	int			wcount;
	struct davinci_spi_dma	dma;

	void			(*get_rx)(u32 rx_data, struct davinci_spi *);
	u32			(*get_tx)(struct davinci_spi *);

	u8			bytes_per_word[SPI_MAX_CHIPSELECT];
	u8			chip_sel[SPI_MAX_CHIPSELECT];
	struct davinci_spi_config *controller_data;
	int			cshold_bug;
};

static struct davinci_spi_config davinci_spi_default_cfg;

extern void mmu_clean_dcache(rt_uint32_t buffer, rt_uint32_t size);
extern void mmu_invalidate_dcache(rt_uint32_t buffer, rt_uint32_t size);

static void davinci_spi_rx_buf_u8(u32 data, struct davinci_spi *dspi)
{
	if (dspi->rx) {
		u8 *rx = dspi->rx;
		*rx++ = (u8)data;
		dspi->rx = rx;
	}
}

static void davinci_spi_rx_buf_u16(u32 data, struct davinci_spi *dspi)
{
	if (dspi->rx) {
		u16 *rx = dspi->rx;
		*rx++ = (u16)data;
		dspi->rx = rx;
	}
}

static u32 davinci_spi_tx_buf_u8(struct davinci_spi *dspi)
{
	u32 data = 0;
	if (dspi->tx) {
		const u8 *tx = dspi->tx;
		data = *tx++;
		dspi->tx = tx;
	}
	return data;
}

static u32 davinci_spi_tx_buf_u16(struct davinci_spi *dspi)
{
	u32 data = 0;
	if (dspi->tx) {
		const u16 *tx = dspi->tx;
		data = *tx++;
		dspi->tx = tx;
	}
	return data;
}

static inline void set_io_bits(void __iomem *addr, u32 bits)
{
	u32 v = readl(addr);

	v |= bits;
	writel(v, addr);
}

static inline void clear_io_bits(void __iomem *addr, u32 bits)
{
	u32 v = readl(addr);

	v &= ~bits;
	writel(v, addr);
}

/*
 * Interface to control the chip select signal
 */
static void davinci_spi_chipselect(struct rt_spi_device *spi, int value)
{
	struct davinci_spi *dspi;
	u8 chip_sel = (u8)spi->parent.user_data;
	u16 spidat1 = CS_DEFAULT;
	bool gpio_chipsel = RT_FALSE;

	dspi = spi->bus->parent.user_data;

	if (chip_sel < SPI_MAX_CHIPSELECT &&
				dspi->chip_sel[chip_sel] != SPI_INTERN_CS)
		gpio_chipsel = RT_TRUE;

	/*
	 * Board specific chip select logic decides the polarity and cs
	 * line for the controller
	 */
	if (gpio_chipsel) {
		if (value == 0)
			gpio_set_value(dspi->chip_sel[chip_sel], 0);
		else
			gpio_set_value(dspi->chip_sel[chip_sel], 1);
	} else {
		spidat1 = readw(dspi->base + SPIDAT1 + 2);
		if (value == 0) {
			spidat1 |= SPIDAT1_CSHOLD_MASK;
			spidat1 &= ~(0x1 << chip_sel);
		} else {
			spidat1 &= ~SPIDAT1_CSHOLD_MASK;
			spidat1 |= 0x03;
		}
		rt_kprintf("0x%04x\n", spidat1);

		writew(spidat1, dspi->base + SPIDAT1 + 2);
	}
}

/**
 * davinci_spi_get_prescale - Calculates the correct prescale value
 * @maxspeed_hz: the maximum rate the SPI clock can run at
 *
 * This function calculates the prescale value that generates a clock rate
 * less than or equal to the specified maximum.
 *
 * Returns: calculated prescale - 1 for easy programming into SPI registers
 * or negative error number if valid prescalar cannot be updated.
 */
static inline int davinci_spi_get_prescale(struct davinci_spi *dspi,
							u32 max_speed_hz)
{
	int ret;

	ret = DIV_ROUND_UP(clk_get_rate(dspi->clk), max_speed_hz);

	if (ret < 3) {
		rt_kprintf("spi clock freq too high\n");
		ret = 3;
	}
	if (ret > 256) {
		rt_kprintf("spi clock freq too litter\n");
		ret = 256;
	}

	/*if (ret < 3 || ret > 256)
		return -RT_ERROR;*/

	return ret - 1;
}

/**
 * davinci_spi_setup_transfer - This functions will determine transfer method
 * @spi: spi device on which data transfer to be done
 * @t: spi transfer in which transfer info is filled
 *
 * This function determines data transfer method (8/16/32 bit transfer).
 * It will also set the SPI Clock Control register according to
 * SPI slave device freq.
 */
static int davinci_spi_setup_transfer(struct rt_spi_device *spi,
		struct rt_spi_configuration *cfg)
{

	struct davinci_spi *dspi;
	struct davinci_spi_config *spicfg;
	u8 bits_per_word = 0;
	u32 hz = 0, spifmt = 0, prescale = 0;
	u8 chip_select = (u8)spi->parent.user_data;

	dspi = spi->bus->parent.user_data;

	bits_per_word = cfg->data_width;
	hz = cfg->max_hz;

	/*
	 * Assign function pointer to appropriate transfer method
	 * 8bit, 16bit or 32bit transfer
	 */
	if (bits_per_word <= 8 && bits_per_word >= 2) {
		dspi->get_rx = davinci_spi_rx_buf_u8;
		dspi->get_tx = davinci_spi_tx_buf_u8;
		dspi->bytes_per_word[chip_select] = 1;
	} else if (bits_per_word <= 16 && bits_per_word >= 2) {
		dspi->get_rx = davinci_spi_rx_buf_u16;
		dspi->get_tx = davinci_spi_tx_buf_u16;
		dspi->bytes_per_word[chip_select] = 2;
	} else
		return -RT_ERROR;

	/* Set up SPIFMTn register, unique to this chipselect. */

	prescale = davinci_spi_get_prescale(dspi, hz);
	if (prescale < 0)
		return prescale;

	spifmt = (prescale << SPIFMT_PRESCALE_SHIFT) | (bits_per_word & 0x1f);

	if (!(cfg->mode & RT_SPI_MSB))
		spifmt |= SPIFMT_SHIFTDIR_MASK;

	if (cfg->mode & RT_SPI_CPOL)
		spifmt |= SPIFMT_POLARITY_MASK;

	if (!(cfg->mode & RT_SPI_CPHA))
		spifmt |= SPIFMT_PHASE_MASK;

	/*
	 * Version 1 hardware supports two basic SPI modes:
	 *  - Standard SPI mode uses 4 pins, with chipselect
	 *  - 3 pin SPI is a 4 pin variant without CS (SPI_NO_CS)
	 *	(distinct from SPI_3WIRE, with just one data wire;
	 *	or similar variants without MOSI or without MISO)
	 *
	 * Version 2 hardware supports an optional handshaking signal,
	 * so it can support two more modes:
	 *  - 5 pin SPI variant is standard SPI plus SPI_READY
	 *  - 4 pin with enable is (SPI_READY | SPI_NO_CS)
	 */

	if (dspi->version == SPI_VERSION_2) {

		u32 delay = 0;

		spifmt |= ((spicfg->wdelay << SPIFMT_WDELAY_SHIFT)
							& SPIFMT_WDELAY_MASK);

		if (spicfg->odd_parity)
			spifmt |= SPIFMT_ODD_PARITY_MASK;

		if (spicfg->parity_enable)
			spifmt |= SPIFMT_PARITYENA_MASK;

		if (spicfg->timer_disable) {
			spifmt |= SPIFMT_DISTIMER_MASK;
		} else {
			delay |= (spicfg->c2tdelay << SPIDELAY_C2TDELAY_SHIFT)
						& SPIDELAY_C2TDELAY_MASK;
			delay |= (spicfg->t2cdelay << SPIDELAY_T2CDELAY_SHIFT)
						& SPIDELAY_T2CDELAY_MASK;
		}

		if (cfg->mode & RT_SPI_READY) {
			spifmt |= SPIFMT_WAITENA_MASK;
			delay |= (spicfg->t2edelay << SPIDELAY_T2EDELAY_SHIFT)
						& SPIDELAY_T2EDELAY_MASK;
			delay |= (spicfg->c2edelay << SPIDELAY_C2EDELAY_SHIFT)
						& SPIDELAY_C2EDELAY_MASK;
		}

		writel(delay, dspi->base + SPIDELAY);
	}

	writel(spifmt, dspi->base + SPIFMT0);

	return 0;
}

#if 0
/**
 * davinci_spi_setup - This functions will set default transfer method
 * @spi: spi device on which data transfer to be done
 *
 * This functions sets the default transfer method.
 */
static int davinci_spi_setup(struct spi_device *spi)
{
	int retval = 0;
	struct davinci_spi *dspi;
	struct davinci_spi_platform_data *pdata;

	dspi = spi_master_get_devdata(spi->master);
	pdata = dspi->pdata;

	/* if bits per word length is zero then set it default 8 */
	if (!spi->bits_per_word)
		spi->bits_per_word = 8;

	if (!(spi->mode & SPI_NO_CS)) {
		if ((pdata->chip_sel == NULL) ||
		    (pdata->chip_sel[spi->chip_select] == SPI_INTERN_CS))
			set_io_bits(dspi->base + SPIPC0, 1 << spi->chip_select);

	}

	if (spi->mode & SPI_READY)
		set_io_bits(dspi->base + SPIPC0, SPIPC0_SPIENA_MASK);

	if (spi->mode & SPI_LOOP)
		set_io_bits(dspi->base + SPIGCR1, SPIGCR1_LOOPBACK_MASK);
	else
		clear_io_bits(dspi->base + SPIGCR1, SPIGCR1_LOOPBACK_MASK);

	return retval;
}
#endif

static int davinci_spi_check_error(struct davinci_spi *dspi, int int_status)
{
	struct rt_device *sdev = &dspi->parent.parent;

	if (int_status & SPIFLG_TIMEOUT_MASK) {
		spi_dbg(sdev, "SPI Time-out Error\n");
		return -RT_ETIMEOUT;
	}
	if (int_status & SPIFLG_DESYNC_MASK) {
		spi_dbg(sdev, "SPI Desynchronization Error\n");
		return -RT_EIO;
	}
	if (int_status & SPIFLG_BITERR_MASK) {
		spi_dbg(sdev, "SPI Bit error\n");
		return -RT_EIO;
	}

	if (dspi->version == SPI_VERSION_2) {
		if (int_status & SPIFLG_DLEN_ERR_MASK) {
			spi_dbg(sdev, "SPI Data Length Error\n");
			return -RT_EIO;
		}
		if (int_status & SPIFLG_PARERR_MASK) {
			spi_dbg(sdev, "SPI Parity Error\n");
			return -RT_EIO;
		}
		if (int_status & SPIFLG_OVRRUN_MASK) {
			spi_dbg(sdev, "SPI Data Overrun error\n");
			return -RT_EIO;
		}
		if (int_status & SPIFLG_BUF_INIT_ACTIVE_MASK) {
			spi_dbg(sdev, "SPI Buffer Init Active\n");
			return -RT_EBUSY;
		}
	}

	return 0;
}

/**
 * davinci_spi_process_events - check for and handle any SPI controller events
 * @dspi: the controller data
 *
 * This function will check the SPIFLG register and handle any events that are
 * detected there
 */
static int davinci_spi_process_events(struct davinci_spi *dspi)
{
	u32 buf, status, errors = 0, spidat1;

	buf = readl(dspi->base + SPIBUF);

	if (dspi->rcount > 0 && !(buf & SPIBUF_RXEMPTY_MASK)) {
		dspi->get_rx(buf & 0xFFFF, dspi);
		dspi->rcount--;
	}

	status = readl(dspi->base + SPIFLG);

	if (unlikely(status & SPIFLG_ERROR_MASK)) {
		errors = status & SPIFLG_ERROR_MASK;
		goto out;
	}

	if (dspi->wcount > 0 && !(buf & SPIBUF_TXFULL_MASK)) {
		spidat1 = readl(dspi->base + SPIDAT1);
		dspi->wcount--;
		spidat1 &= ~0xFFFF;
		spidat1 |= 0xFFFF & dspi->get_tx(dspi);
		writel(spidat1, dspi->base + SPIDAT1);
	}

out:
	return errors;
}

static void davinci_spi_dma_callback(unsigned lch, u16 status, void *data)
{
	struct davinci_spi *dspi = data;
	struct davinci_spi_dma *dma = &dspi->dma;

	edma_stop(lch);

	if (status == DMA_COMPLETE) {
		if (lch == dma->rx_channel)
			dspi->rcount = 0;
		if (lch == dma->tx_channel)
			dspi->wcount = 0;
	}

	if ((!dspi->wcount && !dspi->rcount) || (status != DMA_COMPLETE))
		rt_completion_done(&dspi->done);
}

/**
 * davinci_spi_bufs - functions which will handle transfer data
 * @spi: spi device on which data transfer to be done
 * @t: spi transfer in which transfer info is filled
 *
 * This function will put data to be transferred into data register
 * of SPI controller and then wait until the completion will be marked
 * by the IRQ Handler.
 */
static int davinci_spi_bufs(struct rt_spi_device *spi, struct rt_spi_message *msg)
{
	struct davinci_spi *dspi;
	int data_type, ret;
	u32 tx_data, spidat1;
	u32 errors = 0;
	struct davinci_spi_config *spicfg;
	unsigned rx_buf_count;
	struct rt_device *sdev;
	u8 chip_select = (u8)spi->parent.user_data;

	dspi = spi->bus->parent.user_data;
	spicfg = (struct davinci_spi_config *)dspi->controller_data;
	if (!spicfg)
		spicfg = &davinci_spi_default_cfg;
	sdev = &dspi->parent.parent;

	/* convert len to words based on bits_per_word */
	data_type = dspi->bytes_per_word[chip_select];

	dspi->tx = msg->send_buf;
	dspi->rx = msg->recv_buf;
	dspi->wcount = msg->length / data_type;
	dspi->rcount = dspi->wcount;

	spidat1 = readl(dspi->base + SPIDAT1);

	clear_io_bits(dspi->base + SPIGCR1, SPIGCR1_POWERDOWN_MASK);
	set_io_bits(dspi->base + SPIGCR1, SPIGCR1_SPIENA_MASK);

	rt_completion_init(&(dspi->done));

	if (msg->cs_take)
		davinci_spi_chipselect(spi, 0);

	if (spicfg->io_type == SPI_IO_TYPE_INTR)
		set_io_bits(dspi->base + SPIINT, SPIINT_MASKINT);

	if (msg->length > 0) {
		if (spicfg->io_type != SPI_IO_TYPE_DMA) {
			/* start the transfer */
			dspi->wcount--;
			tx_data = dspi->get_tx(dspi);
			spidat1 &= 0xFFFF0000;
			spidat1 |= tx_data & 0xFFFF;
			writel(spidat1, dspi->base + SPIDAT1);
		} else {
			struct davinci_spi_dma *dma;
			unsigned long tx_reg, rx_reg;
			struct edmacc_param param;
			void *rx_buf;
			int b, c;

			dma = &dspi->dma;

			tx_reg = (unsigned long)dspi->base + SPIDAT1;
			rx_reg = (unsigned long)dspi->base + SPIBUF;

			/*
			 * Transmit DMA setup
			 *
			 * If there is transmit data, map the transmit buffer, set it
			 * as the source of data and set the source B index to data
			 * size. If there is no transmit data, set the transmit register
			 * as the source of data, and set the source B index to zero.
			 *
			 * The destination is always the transmit register itself. And
			 * the destination never increments.
			 */

			if (msg->send_buf) {
				mmu_clean_dcache((rt_uint32_t)msg->send_buf, (rt_uint32_t)msg->length);
			}

			/*
			 * If number of words is greater than 65535, then we need
			 * to configure a 3 dimension transfer.  Use the BCNTRLD
			 * feature to allow for transfers that aren't even multiples
			 * of 65535 (or any other possible b size) by first transferring
			 * the remainder amount then grabbing the next N blocks of
			 * 65535 words.
			 */

			c = dspi->wcount / (SZ_64K - 1);	/* N 65535 Blocks */
			b = dspi->wcount - c * (SZ_64K - 1);	/* Remainder */
			if (b)
				c++;
			else
				b = SZ_64K - 1;

			param.opt = TCINTEN | EDMA_TCC(dma->tx_channel);
			param.src = msg->send_buf ? msg->send_buf : tx_reg;
			param.a_b_cnt = b << 16 | data_type;
			param.dst = tx_reg;
			param.src_dst_bidx = msg->send_buf ? data_type : 0;
			param.link_bcntrld = 0xffffffff;
			param.src_dst_cidx = msg->send_buf ? data_type : 0;
			param.ccnt = c;
			edma_write_slot(dma->tx_channel, &param);
			edma_link(dma->tx_channel, dma->dummy_param_slot);

			/*
			 * Receive DMA setup
			 *
			 * If there is receive buffer, use it to receive data. If there
			 * is none provided, use a temporary receive buffer. Set the
			 * destination B index to 0 so effectively only one byte is used
			 * in the temporary buffer (address does not increment).
			 *
			 * The source of receive data is the receive data register. The
			 * source address never increments.
			 */

			if (msg->recv_buf) {
				rx_buf = msg->recv_buf;
				rx_buf_count = msg->length;
			} else {
				rx_buf = dspi->rx_tmp_buf;
				rx_buf_count = sizeof(dspi->rx_tmp_buf);
			}

			mmu_invalidate_dcache((rt_uint32_t)rx_buf, (rt_uint32_t)rx_buf_count);

			param.opt = TCINTEN | EDMA_TCC(dma->rx_channel);
			param.src = rx_reg;
			param.a_b_cnt = b << 16 | data_type;
			param.dst = rx_buf;
			param.src_dst_bidx = (msg->recv_buf ? data_type : 0) << 16;
			param.link_bcntrld = 0xffffffff;
			param.src_dst_cidx = (msg->recv_buf ? data_type : 0) << 16;
			param.ccnt = c;
			edma_write_slot(dma->rx_channel, &param);

			if (dspi->cshold_bug)
				writew(spidat1 >> 16, dspi->base + SPIDAT1 + 2);

			edma_start(dma->rx_channel);
			edma_start(dma->tx_channel);
			set_io_bits(dspi->base + SPIINT, SPIINT_DMA_REQ_EN);
		}

		/* Wait for the transfer to complete */
		if (spicfg->io_type != SPI_IO_TYPE_POLL) {
			rt_completion_wait(&(dspi->done), RT_WAITING_FOREVER);
		} else {
			while (dspi->rcount > 0 || dspi->wcount > 0) {
				errors = davinci_spi_process_events(dspi);
				if (errors)
					break;
				cpu_relax();
			}
		}
	}

	if (msg->cs_release)
		davinci_spi_chipselect(spi, 1);

	clear_io_bits(dspi->base + SPIINT, SPIINT_MASKALL);
	if (spicfg->io_type == SPI_IO_TYPE_DMA) {
		clear_io_bits(dspi->base + SPIINT, SPIINT_DMA_REQ_EN);
	}

	clear_io_bits(dspi->base + SPIGCR1, SPIGCR1_SPIENA_MASK);
	set_io_bits(dspi->base + SPIGCR1, SPIGCR1_POWERDOWN_MASK);

	/*
	 * Check for bit error, desync error,parity error,timeout error and
	 * receive overflow errors
	 */
	if (errors) {
		ret = davinci_spi_check_error(dspi, errors);
		rt_kprintf("%s: error reported but no error found!\n",
							spi->bus->parent.parent.name);
		return ret;
	}

	if (dspi->rcount != 0 || dspi->wcount != 0) {
		spi_dbg(sdev, "SPI data transfer error\n");
		return -RT_EIO;
	}

	return msg->length;
}

/**
 * davinci_spi_irq - Interrupt handler for SPI Master Controller
 * @irq: IRQ number for this SPI Master
 * @context_data: structure for SPI Master controller davinci_spi
 *
 * ISR will determine that interrupt arrives either for READ or WRITE command.
 * According to command it will do the appropriate action. It will check
 * transfer length and if it is not zero then dispatch transfer command again.
 * If transfer length is zero then it will indicate the COMPLETION so that
 * davinci_spi_bufs function can go ahead.
 */
static void davinci_spi_irq(int irq, void *data)
{
	struct davinci_spi *dspi = data;
	int status;

	status = davinci_spi_process_events(dspi);
	if (unlikely(status != 0))
		clear_io_bits(dspi->base + SPIINT, SPIINT_MASKINT);

	if ((!dspi->rcount && !dspi->wcount) || status)
		rt_completion_done(&dspi->done);
}

static int davinci_spi_request_dma(struct davinci_spi *dspi)
{
	int r;
	struct davinci_spi_dma *dma = &dspi->dma;

	r = edma_alloc_channel(dma->rx_channel, davinci_spi_dma_callback, dspi,
								dma->eventq);
	if (r < 0) {
		rt_kprintf("Unable to request DMA channel for SPI RX\n");
		r = -RT_EFULL;
		goto rx_dma_failed;
	}

	r = edma_alloc_channel(dma->tx_channel, davinci_spi_dma_callback, dspi,
								dma->eventq);
	if (r < 0) {
		rt_kprintf("Unable to request DMA channel for SPI TX\n");
		r = -RT_EFULL;
		goto tx_dma_failed;
	}

	r = edma_alloc_slot(EDMA_CTLR(dma->tx_channel), EDMA_SLOT_ANY);
	if (r < 0) {
		rt_kprintf("Unable to request SPI TX DMA param slot\n");
		r = -RT_EFULL;
		goto param_failed;
	}
	dma->dummy_param_slot = r;
	edma_link(dma->dummy_param_slot, dma->dummy_param_slot);

	return 0;
param_failed:
	edma_free_channel(dma->tx_channel);
tx_dma_failed:
	edma_free_channel(dma->rx_channel);
rx_dma_failed:
	return r;
}

static rt_err_t configure(struct rt_spi_device *device,
                          struct rt_spi_configuration *configuration)
{
	return davinci_spi_setup_transfer(device, configuration);
}

static rt_uint32_t xfer(struct rt_spi_device *device, struct rt_spi_message *message)
{
	return davinci_spi_bufs(device, message);
};



static struct rt_spi_ops davinci_spi_ops =
{
    configure,
    xfer
};

static void udelay (volatile rt_uint32_t us)
{
	volatile rt_int32_t i;
	for (; us > 0; us--)
	{
		i = 5000;
		while(i > 0)
		{
			i--;
		}
	}
}

void spi_pin_cfg(void)
{
	rt_uint32_t val;

	val = davinci_readl(PINMUX3);
	val |= 0x80000000; /* SPI1 */
	davinci_writel(val, PINMUX3);

	val = davinci_readl(PINMUX4);
	val &= 0xffffffc0; /* SPI1 */
	val |= 0x05;//0x00000015; /* SPI1 */
	davinci_writel(val, PINMUX4);
}

/**
 * davinci_spi_probe - probe function for SPI Master Controller
 * @pdev: platform_device structure which contains plateform specific data
 *
 * According to Linux Device Model this function will be invoked by Linux
 * with platform_device struct which contains the device specific info.
 * This function will map the SPI controller's memory, register IRQ,
 * Reset SPI controller and setting its registers to default value.
 * It will invoke spi_bitbang_start to create work queue so that client driver
 * can register transfer method to work queue.
 */
static int davinci_spi_probe(struct davinci_spi *dspi, char *spi_bus_name)
{
	int i = 0, ret = 0;
	u32 spipc0;

	spi_pin_cfg();
	psc_change_state(DAVINCI_DM365_LPSC_SPI1, PSC_ENABLE);

	dspi->base = DM3XX_SPI1_BASE;//spi;

	dspi->irq = IRQ_DM3XX_SPINT1_0;

	rt_hw_interrupt_install(dspi->irq, davinci_spi_irq, dspi, spi_bus_name);
	rt_hw_interrupt_umask(dspi->irq);

	dspi->clk = clk_get("SPICLK");

	dspi->version = SPI_VERSION_1;
	dspi->chip_sel[0] = 29;//SPI_INTERN_CS;
	dspi->chip_sel[1] = 0;//GPIO0

	dspi->dma.rx_channel = 15;
	dspi->dma.tx_channel = 14;
	dspi->dma.eventq = EVENTQ_3;

	ret = davinci_spi_request_dma(dspi);
	if (ret)
		goto err;

	rt_kprintf("%s: DMA: supported\n", spi_bus_name);
	rt_kprintf("%s: DMA: RX channel: %d, TX channel: %d, "
			"event queue: %d\n", spi_bus_name, dspi->dma.rx_channel, 
			dspi->dma.tx_channel, dspi->dma.eventq);

	dspi->get_rx = davinci_spi_rx_buf_u8;
	dspi->get_tx = davinci_spi_tx_buf_u8;

	rt_completion_init(&dspi->done);

	/* Reset In/OUT SPI module */
	writel(0, dspi->base + SPIGCR0);
	udelay(100);
	writel(1, dspi->base + SPIGCR0);

	/* Set up SPIPC0.  CS and ENA init is done in davinci_spi_setup */
	spipc0 = SPIPC0_DIFUN_MASK | SPIPC0_DOFUN_MASK | SPIPC0_CLKFUN_MASK;
	writel(spipc0, dspi->base + SPIPC0);

	/* initialize chip selects */
	for (i = 0; i < SPI_MAX_CHIPSELECT; i++) {
		if (dspi->chip_sel[i] != SPI_INTERN_CS)
			gpio_direction_output(dspi->chip_sel[i], 1);
	}

	if (0)
		writel(SPI_INTLVL_1, dspi->base + SPILVL);
	else
		writel(SPI_INTLVL_0, dspi->base + SPILVL);

	writel(CS_DEFAULT, dspi->base + SPIDEF);

	/* master mode default */
	set_io_bits(dspi->base + SPIGCR1, SPIGCR1_CLKMOD_MASK);
	set_io_bits(dspi->base + SPIGCR1, SPIGCR1_MASTER_MASK);
	set_io_bits(dspi->base + SPIGCR1, SPIGCR1_POWERDOWN_MASK);

	//set_io_bits(dspi->base + SPIGCR1, SPIGCR1_LOOPBACK_MASK);//LOOP BACK mode

	rt_kprintf("%s: Controller at 0x%p\n", spi_bus_name, dspi->base);

	dspi->parent.parent.user_data = dspi;

	return rt_spi_bus_register(&dspi->parent, spi_bus_name, &davinci_spi_ops);

	return ret;

free_dma:
	edma_free_channel(dspi->dma.tx_channel);
	edma_free_channel(dspi->dma.rx_channel);
	edma_free_slot(dspi->dma.dummy_param_slot);

err:
	return ret;
}


int rt_hw_spi_init(void)
{
    /* register spi bus */
    {
        static struct davinci_spi dspi;
		rt_memset(&dspi, 0, sizeof(dspi));
		davinci_spi_probe(&dspi, "spi1");
    }
    /* attach cs */
    {
        static struct rt_spi_device spi_device;
        rt_spi_bus_attach_device(&spi_device, "spi10", "spi1", (void *)0);
    }
	{
        static struct rt_spi_device spi_device;
        rt_spi_bus_attach_device(&spi_device, "spi11", "spi1", (void *)1);
    }

    return 0;
}