/**
 * \file
 *
 * \brief DMACA DMA Controller Driver
 *
 * - Compiler:           IAR EWAVR32 and GNU GCC for AVR32
 * - Supported devices:  All devices
 * - AppNote:
 *
 * \author               Atmel Corporation: http://www.atmel.com \n
 *                       Support and FAQ: http://support.atmel.no/
 *
 * \page License
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * 1. Redistributions of source code must retain the above copyright notice,
 * this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright notice,
 * this list of conditions and the following disclaimer in the documentation
 * and/or other materials provided with the distribution.
 *
 * 3. The name of Atmel may not be used to endorse or promote products derived
 * from this software without specific prior written permission.
 *
 * 4. This software may only be redistributed and used in connection with an
 * Atmel AVR product.
 *
 * THIS SOFTWARE IS PROVIDED BY ATMEL "AS IS" AND ANY EXPRESS OR IMPLIED
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT ARE
 * EXPRESSLY AND SPECIFICALLY DISCLAIMED. IN NO EVENT SHALL ATMEL BE LIABLE FOR
 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
 * DAMAGE.
 */
#include <assert.h>
#include <bitops.h>
#include <buffer.h>
#include <dmapool.h>
#include <interrupt.h>
#include <io.h>
#include <irq_handler.h>
#include <status-codes.h>
#include <chip/memory-map.h>
#include <chip/irq-map.h>
#include <dmac/dma_controller.h>
#include <dmac/dmaca.h>

#include <app/config_dmapool.h>

#include "dmac_util.h"
#include "dmaca_regs.h"

#ifdef CONFIG_DMACA_NR_CHANNELS
# if CONFIG_DMACA_NR_CHANNELS > CHIP_DMACA_NR_CHANNELS
#  error Too many DMACA channels requested
# endif
# define NR_CHANNELS	CONFIG_DMACA_NR_CHANNELS
#else
# define NR_CHANNELS	CHIP_DMACA_NR_CHANNELS
#endif

#ifndef dmaca_desc_physmem_pool
# define dmaca_desc_physmem_pool	hsb_sram_pool
#endif

/** \brief Flags reflecting a channel's state */
enum dmaca_chan_flag {
	DMACA_CHAN_ALLOCATED,	//!< In use by a client
	DMACA_CHAN_ENABLED,	//!< Ready to accept transfers
	DMACA_CHAN_ACTIVE,	//!< One or more transfers are ongoing
};

struct dmaca_channel {
	struct dma_pool		desc_pool;
	struct slist		req_queue;
	struct slist		buf_queue;
	void			*regs;
	uint8_t			mask;
	unsigned long		flags;
	phys_addr_t		rx_reg_addr;
	phys_addr_t		tx_reg_addr;
	struct dmac_channel	dch;
};

static struct dmaca_channel	dmaca_channel[NR_CHANNELS];
static int			dmaca_use_count;

static struct dmaca_channel *dmaca_channel_of(struct dmac_channel *ch)
{
	return container_of(ch, struct dmaca_channel, dch);
}

static void dmaca_controller_get(void)
{
	if (dmaca_use_count++ == 0)
		dmaca_write_reg(DMACFG, DMACA_DMACFG_DMA_EN);
}

static void dmaca_controller_put(void)
{
	if (--dmaca_use_count == 0)
		dmaca_write_reg(DMACFG, 0);
}

static unsigned int dmaca_chan_id(struct dmaca_channel *chan)
{
	return ilog2(chan->mask);
}

static inline struct dmac_request *dmaca_chan_peek_head_req(
		struct dmaca_channel *chan)
{
	return slist_peek_head(&chan->req_queue, struct dmac_request, node);
}

static inline struct dmac_request *dmaca_chan_pop_head_req(
		struct dmaca_channel *chan)
{
	return slist_pop_head(&chan->req_queue, struct dmac_request, node);
}

static inline struct buffer *dmaca_chan_peek_head_buf(
		struct dmaca_channel *chan)
{
	return slist_peek_head(&chan->buf_queue, struct buffer, node);
}

static struct dmaca_hw_desc *dmaca_chan_alloc_desc(struct dmaca_channel *chan,
		phys_addr_t *phys)
{
	return dma_pool_alloc(&chan->desc_pool, phys);
}

static void dmaca_chan_free_desc(struct dmaca_channel *chan,
		struct dmaca_hw_desc *desc)
{
	dma_pool_free(&chan->desc_pool, desc);
}

static void dmaca_chan_buf_done(struct dmaca_channel *chan, struct buffer *buf)
{
	struct dmaca_hw_desc	*desc = buf->dma_desc;

	dmaca_chan_free_desc(chan, desc);
	buf->dma_desc = NULL;
}

static void dmaca_chan_req_done(struct dmaca_channel *chan,
		struct dmac_request *req, int status)
{
	req->status = status;
	slist_give_back_head(&req->buf_list, &chan->buf_queue);
	if (req->req_done)
		req->req_done(&chan->dch, req);
}

static void dmaca_chan_reset(struct dmac_channel *dch)
{
	struct dmaca_channel	*chan = dmaca_channel_of(dch);
	struct dmac_request	*req;
	struct buffer		*buf;
	unsigned long		iflags;

	dbg_verbose("dmaca ch%u reset: e%x s%08x d%08x l%08x c%08x:%08x\n",
			dmaca_chan_id(chan), dmaca_read_reg(CH_EN),
			dmaca_chan_read_reg(chan, SAR),
			dmaca_chan_read_reg(chan, DAR),
			dmaca_chan_read_reg(chan, LLP),
			dmaca_chan_read_reg(chan, CTLH),
			dmaca_chan_read_reg(chan, CTLL));
	dbg_verbose("  status %x\n", dmaca_read_reg(STATUS_INT));

	/* First, prevent any queue operations while we're flushing */
	iflags = cpu_irq_save();
	clear_bit(DMACA_CHAN_ENABLED, &chan->flags);
	clear_bit(DMACA_CHAN_ACTIVE, &chan->flags);
	dmaca_clear_chan_bit(chan, MASK_TFR);
	dmaca_clear_chan_bit(chan, MASK_BLOCK);
	dmaca_clear_chan_bit(chan, MASK_ERR);
	cpu_irq_restore(iflags);

	/* Disable the channel */
	dmaca_clear_chan_bit(chan, CH_EN);
	while (dmaca_test_chan_bit(chan, CH_EN))
		barrier();

	/* Free the DMA descriptors */
	slist_for_each(&chan->buf_queue, buf, node) {
		if (!buf->dma_desc)
			break;

		dmaca_chan_buf_done(chan, buf);
	}

	/* Terminate all requests */
	while (!slist_is_empty(&chan->req_queue)) {
		req = dmaca_chan_pop_head_req(chan);
		dmaca_chan_req_done(chan, req, -STATUS_IO_ERROR);
	}

	/* Allow queueing new requests */
	set_bit(DMACA_CHAN_ENABLED, &chan->flags);
}

static void dmaca_chan_process_queue(struct dmaca_channel *chan)
{
	struct dmaca_hw_desc	*desc;
	struct dmaca_hw_desc	*desc_next;
	struct buffer		*buf;
	struct buffer		*buf_next;
	phys_addr_t		phys;
	phys_addr_t		phys_next;
	struct dmac_request	*req;

	assert(!dmaca_test_chan_bit(chan, CH_EN));
	assert(test_bit(DMACA_CHAN_ENABLED, &chan->flags));
	assert(!test_bit(DMACA_CHAN_ACTIVE, &chan->flags));

	dbg_printf("dmaca ch%u: processing queue...\n", dmaca_chan_id(chan));

	if (slist_is_empty(&chan->buf_queue)) {
		dbg_printf("  - no buffers\n");
		assert(slist_is_empty(&chan->req_queue));
		goto queue_is_empty;
	}

	set_bit(DMACA_CHAN_ACTIVE, &chan->flags);
	cpu_irq_enable();

	desc = dmaca_chan_alloc_desc(chan, &phys);
	if (!desc) {
		dbg_error("dmaca ch%u: failed to allocate descriptor\n",
				dmaca_chan_id(chan));
		cpu_irq_disable();
		goto no_desc;
	}

	dmaca_chan_write_reg(chan, LLP, phys);

	/*
	 * Submit as many buffers as possible. We stop when reaching
	 * the end of the queue, or when we cannot allocate any more
	 * descriptors.
	 *
	 * At this point, we know there's at least one entry in the
	 * queue, and we have already allocated the first descriptor,
	 * so we'll always submit at least one buffer.
	 *
	 * If the last buffer submitted doesn't cause an interrupt on
	 * its own (not last in a request, or no request callback),
	 * we'll still get a "transfer complete" interrupt so that we
	 * can advance the queue.
	 */
	cpu_irq_disable();
	req = dmaca_chan_peek_head_req(chan);
	slist_for_each_safe(&chan->buf_queue, buf, buf_next, node) {
		unsigned int	xfer_width;
		unsigned int	burst_length;
		uint32_t	ctll;

		if (!desc)
			break;

		desc_next = NULL;
		if (slist_node_is_valid(&chan->buf_queue, &buf_next->node))
			desc_next = dmaca_chan_alloc_desc(chan, &phys_next);

		cpu_irq_enable();

		xfer_width = req->reg_width;
		burst_length = req->burst_length;
		assert(xfer_width <= DMAC_REG_WIDTH_32BIT);

		ctll = DMACA_CTLL_DST_TR_WIDTH(xfer_width)
			| DMACA_CTLL_SRC_TR_WIDTH(xfer_width)
			| DMACA_CTLL_DST_MSIZE(burst_length)
			| DMACA_CTLL_SRC_MSIZE(burst_length);
		if (desc_next)
			ctll |= DMACA_CTLL_LLP_D_EN | DMACA_CTLL_LLP_S_EN;

		if (req->direction == DMA_FROM_DEVICE) {
			ctll |= DMACA_CTLL_DINC_INCREMENT
				| DMACA_CTLL_SINC_NO_CHANGE
				| DMACA_CTLL_TT_P2M;
			desc->sar = chan->rx_reg_addr;
			desc->dar = buf->addr.phys;
		} else {
			ctll |= DMACA_CTLL_DINC_NO_CHANGE
				| DMACA_CTLL_SINC_INCREMENT
				| DMACA_CTLL_TT_M2P;
			desc->sar = buf->addr.phys;
			desc->dar = chan->tx_reg_addr;
		}

		if (!desc_next)
			ctll |= DMACA_CTLL_INT_EN;

		if (slist_node_is_last(&req->buf_list, &buf->node)) {
			if (req->req_done)
				ctll |= DMACA_CTLL_INT_EN;
			req = slist_peek_next(&req->node,
					struct dmac_request, node);
		}

		desc->llp = phys_next;
		desc->ctll = ctll;
		desc->ctlh = DMACA_CTLH_BLOCK_TS(buf->len >> xfer_width);

		dbg_printf("  D%08lx: s%08x d%08x l%08x c%08x:%08x\n",
				phys, desc->sar, desc->dar, desc->llp,
				desc->ctlh, desc->ctll);

		buf->dma_desc = desc;
		desc = desc_next;
		phys = phys_next;
		cpu_irq_disable();
	}

	/* The compiler must not move any stores beyond this point */
	barrier();
	dmaca_chan_write_reg(chan, CTLL,
			DMACA_CTLL_LLP_D_EN | DMACA_CTLL_LLP_S_EN);

	/* Enable interrupts and enable the channel */
	dmaca_set_chan_bit(chan, MASK_TFR);
	dmaca_set_chan_bit(chan, MASK_BLOCK);
	dmaca_set_chan_bit(chan, MASK_ERR);
	dmaca_set_chan_bit(chan, CH_EN);

	dbg_printf("  CH_EN: %x\n", dmaca_read_reg(CH_EN));

	return;

no_desc:
	clear_bit(DMACA_CHAN_ACTIVE, &chan->flags);

queue_is_empty:
	dmaca_clear_chan_bit(chan, MASK_TFR);
	dmaca_clear_chan_bit(chan, MASK_BLOCK);
	dmaca_clear_chan_bit(chan, MASK_ERR);
	dmaca_controller_put();
}

static void dmaca_chan_submit_req(struct dmac_channel *dch,
		struct dmac_request *req)
{
	struct dmaca_channel	*chan = dmaca_channel_of(dch);
	bool			queued = true;

	dbg_printf("dmaca ch%u: submit req %p\n", dmaca_chan_id(chan), req);

	assert(cpu_irq_is_enabled());
	assert(test_bit(DMACA_CHAN_ALLOCATED, &chan->flags));
	dmac_verify_req(req);

	req->bytes_xfered = 0;
	req->status = -STATUS_IN_PROGRESS;

	cpu_irq_disable();
	if (likely(test_bit(DMACA_CHAN_ENABLED, &chan->flags))) {
		slist_insert_tail(&chan->req_queue, &req->node);
		slist_borrow_to_tail(&chan->buf_queue, &req->buf_list);
		if (!test_bit(DMACA_CHAN_ACTIVE, &chan->flags)) {
			dmaca_controller_get();
			dmaca_chan_process_queue(chan);
		}
	} else {
		queued = false;
	}
	cpu_irq_enable();

	if (!queued)
		dmaca_chan_req_done(chan, req, -STATUS_IO_ERROR);
}

static void dmaca_chan_all_done(struct dmaca_channel *chan)
{
	struct dmac_request	*req;

	assert(!dmaca_test_chan_bit(chan, CH_EN));

	dmaca_write_reg(CLEAR_TFR, chan->mask);
	dmaca_write_reg(CLEAR_BLOCK, chan->mask);

	dmaca_clear_chan_bit(chan, MASK_TFR);
	dmaca_clear_chan_bit(chan, MASK_BLOCK);

	dbg_printf("dmaca ch%u: all done\n", dmaca_chan_id(chan));

	/* The controller is idle; terminate all submitted requests normally */
	while (!slist_is_empty(&chan->req_queue)) {
		struct buffer	*buf;
		size_t		bytes_xfered = 0;

		req = dmaca_chan_peek_head_req(chan);
		do {
			buf = dmaca_chan_peek_head_buf(chan);
			if (!buf->dma_desc) {
				req->bytes_xfered += bytes_xfered;
				goto done;
			}

			slist_pop_head_node(&chan->buf_queue);
			bytes_xfered += buf->len;
			dmaca_chan_buf_done(chan, buf);
		} while (&buf->node != req->buf_list.last);

		dmaca_chan_pop_head_req(chan);
		req->bytes_xfered += bytes_xfered;
		dmaca_chan_req_done(chan, req, 0);
	}

done:
	clear_bit(DMACA_CHAN_ACTIVE, &chan->flags);
	dmaca_chan_process_queue(chan);
}

static void dmaca_chan_scan_queue(struct dmaca_channel *chan)
{
	struct dmac_request	*req;
	size_t			bytes_xfered;
	phys_addr_t		llp;

	dmaca_write_reg(CLEAR_BLOCK, chan->mask);

	llp = dmaca_chan_read_reg(chan, LLP);

	/* LLP is only valid if the channel is active */
	if (!dmaca_test_chan_bit(chan, CH_EN)) {
		dmaca_chan_all_done(chan);
		return;
	}

	dbg_printf("dmaca ch%u scan queue: LLP=%08lx\n",
			dmaca_chan_id(chan), llp);

	assert(!slist_is_empty(&chan->req_queue));
	assert(!slist_is_empty(&chan->buf_queue));

	req = dmaca_chan_peek_head_req(chan);
	bytes_xfered = 0;
	while (1) {
		struct dmaca_hw_desc	*desc;
		struct buffer		*buf;
		struct buffer		*buf_next;

		buf = dmaca_chan_peek_head_buf(chan);
		if (slist_node_is_last(&chan->buf_queue, &buf->node))
			break;

		buf_next = slist_entry(&buf->node.next, struct buffer, node);
		if (!buf_next->dma_desc)
			break;

		desc = buf->dma_desc;
		if (desc->llp == llp)
			break;

		slist_pop_head_node(&chan->buf_queue);
		req->bytes_xfered += buf->len;
		dmaca_chan_buf_done(chan, buf);

		if (slist_node_is_last(&req->buf_list, &buf->node)) {
			slist_pop_head_node(&chan->req_queue);
			dmaca_chan_req_done(chan, req, 0);
			req = dmaca_chan_peek_head_req(chan);
		}
	}
}

static void dmaca_chan_error(struct dmaca_channel *chan)
{
	struct dmac_request	*req;

	dbg_printf("dmaca ch%u error\n", dmaca_chan_id(chan));
	dbg_printf("  SAR %08x DAR %08x\n", dmaca_chan_read_reg(chan, SAR),
			dmaca_chan_read_reg(chan, DAR));

	/* Terminate any completed requests normally */
	dmaca_chan_scan_queue(chan);

	assert(!slist_is_empty(&chan->req_queue));
	assert(!slist_is_empty(&chan->buf_queue));
	assert(!dmaca_test_chan_bit(chan, CH_EN));

	/* Terminate the request that caused the error */
	req = dmaca_chan_pop_head_req(chan);

	/* Clear and disable all channel interrupts */
	dmaca_write_reg(CLEAR_TFR, chan->mask);
	dmaca_write_reg(CLEAR_BLOCK, chan->mask);
	dmaca_write_reg(CLEAR_ERR, chan->mask);
	dmaca_clear_chan_bit(chan, MASK_TFR);
	dmaca_clear_chan_bit(chan, MASK_BLOCK);
	dmaca_clear_chan_bit(chan, MASK_ERR);

	/* The client must flush and/or restart the queue if necessary */
	clear_bit(DMACA_CHAN_ACTIVE, &chan->flags);

	/*
	 * This must come last since it might start flushing the queue
	 * and/or resubmitting requests.
	 */
	dmaca_chan_req_done(chan, req, -STATUS_BAD_ADDRESS);

	dbg_printf("dmaca ch%u error recovery complete\n", dmaca_chan_id(chan));
}

static void dmaca_interrupt(void *data)
{
	unsigned int	i;
	uint32_t	status_tfr;
	uint32_t	status_block;
	uint32_t	status_err;

	status_block = dmaca_read_reg(STATUS_BLOCK);
	status_tfr = dmaca_read_reg(STATUS_TFR);
	status_err = dmaca_read_reg(STATUS_ERR);

	dbg_printf("dmaca interrupt: %x/%x/%x\n", status_block,
			status_tfr, status_err);

	for (i = 0; i < NR_CHANNELS; i++) {
		struct dmaca_channel	*chan;

		chan = &dmaca_channel[i];
		if (status_err & chan->mask)
			dmaca_chan_error(chan);
		else if (status_tfr & chan->mask)
			dmaca_chan_all_done(chan);
		else if (status_block & chan->mask)
			dmaca_chan_scan_queue(chan);
	}
}
DEFINE_IRQ_HANDLER(dmaca, dmaca_interrupt, 0);

static void dmaca_chan_init(struct dmaca_channel *chan, unsigned int index,
		enum dmac_periph_id rx_periph, enum dmac_periph_id tx_periph,
		phys_addr_t rx_reg_addr, phys_addr_t tx_reg_addr)
{
	uint32_t	cfgh;

	build_assert(NR_CHANNELS < 8);
	assert(index < NR_CHANNELS);
	assert(rx_periph != DMAC_PERIPH_NONE || tx_periph != DMAC_PERIPH_NONE);

	slist_init(&chan->req_queue);
	slist_init(&chan->buf_queue);
	chan->regs = (void *)(DMACA_BASE + index * DMACA_CHAN_REGS_SIZE);
	chan->mask = 1 << index;
	chan->rx_reg_addr = rx_reg_addr;
	chan->tx_reg_addr = tx_reg_addr;

	chan->dch.submit_req = dmaca_chan_submit_req;
	chan->dch.reset = dmaca_chan_reset;
	chan->dch.max_buffer_size = 2048;

	cfgh = 0;
	if (rx_periph != DMAC_PERIPH_NONE)
		cfgh |= DMACA_CFGH_SRC_PER(dmaca_get_periph_id(rx_periph));
	if (tx_periph != DMAC_PERIPH_NONE)
		cfgh |= DMACA_CFGH_DST_PER(dmaca_get_periph_id(tx_periph));

	dmaca_chan_write_reg(chan, LLP, 0);
	dmaca_chan_write_reg(chan, CTLL, 0);
	dmaca_chan_write_reg(chan, CTLH, 0);
	dmaca_chan_write_reg(chan, CFGL, 0);
	dmaca_chan_write_reg(chan, CFGH, cfgh);

	set_bit(DMACA_CHAN_ENABLED, &chan->flags);
};

struct dmac_channel *dmaca_alloc_channel(struct dma_controller *dmac,
		enum dmac_periph_id rx_periph, enum dmac_periph_id tx_periph,
		phys_addr_t rx_reg_addr, phys_addr_t tx_reg_addr)
{
	struct dmaca_channel	*chan;
	unsigned int		i;

	dbg_printf("dmaca alloc_channel: %u[%08lx]/%u[%08lx]\n",
			rx_periph, rx_reg_addr, tx_periph, tx_reg_addr);

	assert(cpu_irq_is_enabled());

	for (i = 0; i < ARRAY_LEN(dmaca_channel); i++) {
		chan = &dmaca_channel[i];

		dbg_printf("  ch%u flags: 0x%lx\n", i, chan->flags);
		if (!atomic_test_and_set_bit(DMACA_CHAN_ALLOCATED,
					&chan->flags)) {
			dmaca_chan_init(chan, i, rx_periph, tx_periph,
					rx_reg_addr, tx_reg_addr);
			break;
		}
		chan = NULL;
	}

	return chan ? &chan->dch : NULL;
}

void dmaca_free_channel(struct dma_controller *dmac,
		struct dmac_channel *dch)
{
	struct dmaca_channel	*chan = dmaca_channel_of(dch);

	/* The queue must be empty, and there must be no ongoing transfers */
	assert(slist_is_empty(&chan->req_queue));
	assert(slist_is_empty(&chan->buf_queue));
	assert(test_bit(DMACA_CHAN_ALLOCATED, &chan->flags));
	assert(test_bit(DMACA_CHAN_ENABLED, &chan->flags));
	assert(!test_bit(DMACA_CHAN_ACTIVE, &chan->flags));

	atomic_clear_bit(DMACA_CHAN_ENABLED, &chan->flags);
	atomic_clear_bit(DMACA_CHAN_ALLOCATED, &chan->flags);
}

struct dma_controller dmaca_controller = {
	.alloc_chan	= dmaca_alloc_channel,
	.free_chan	= dmaca_free_channel,
};

void dmaca_init(void)
{
	unsigned int		i;

	/*
	 * We need a descriptor pool for each channel. If we use a
	 * shared pool for all channels, we risk that one channel ends
	 * up consuming all of the descriptors, so that other channels
	 * are not able to process any buffers at all. In this
	 * situation, it's very difficult to resume queue processing
	 * for those channels since we don't have any obvious trigger
	 * mechanisms.
	 */
	for (i = 0; i < ARRAY_LEN(dmaca_channel); i++) {
		struct dmaca_channel	*chan;

		chan = &dmaca_channel[i];
		dma_pool_init_coherent_physmem(&chan->desc_pool,
				&dmaca_desc_physmem_pool,
				CONFIG_DMACA_NR_DESCRIPTORS,
				sizeof(struct dmaca_hw_desc), 2);
	}

	setup_irq_handler(DMACA_IRQ, dmaca, 0, &dmaca_controller);
}
