Linux Audio

Check our new training course

Loading...
   1/*
   2 * Copyright (C) 2006-2009 DENX Software Engineering.
   3 *
   4 * Author: Yuri Tikhonov <yur@emcraft.com>
   5 *
   6 * Further porting to arch/powerpc by
   7 * 	Anatolij Gustschin <agust@denx.de>
   8 *
   9 * This program is free software; you can redistribute it and/or modify it
  10 * under the terms of the GNU General Public License as published by the Free
  11 * Software Foundation; either version 2 of the License, or (at your option)
  12 * any later version.
  13 *
  14 * This program is distributed in the hope that it will be useful, but WITHOUT
  15 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  16 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  17 * more details.
  18 *
  19 * You should have received a copy of the GNU General Public License along with
  20 * this program; if not, write to the Free Software Foundation, Inc., 59
  21 * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  22 *
  23 * The full GNU General Public License is included in this distribution in the
  24 * file called COPYING.
  25 */
  26
  27/*
  28 * This driver supports the asynchrounous DMA copy and RAID engines available
  29 * on the AMCC PPC440SPe Processors.
  30 * Based on the Intel Xscale(R) family of I/O Processors (IOP 32x, 33x, 134x)
  31 * ADMA driver written by D.Williams.
  32 */
  33
  34#include <linux/init.h>
  35#include <linux/module.h>
  36#include <linux/async_tx.h>
  37#include <linux/delay.h>
  38#include <linux/dma-mapping.h>
  39#include <linux/spinlock.h>
  40#include <linux/interrupt.h>
  41#include <linux/slab.h>
  42#include <linux/uaccess.h>
  43#include <linux/proc_fs.h>
  44#include <linux/of.h>
  45#include <linux/of_platform.h>
  46#include <asm/dcr.h>
  47#include <asm/dcr-regs.h>
  48#include "adma.h"
  49#include "../dmaengine.h"
  50
  51enum ppc_adma_init_code {
  52	PPC_ADMA_INIT_OK = 0,
  53	PPC_ADMA_INIT_MEMRES,
  54	PPC_ADMA_INIT_MEMREG,
  55	PPC_ADMA_INIT_ALLOC,
  56	PPC_ADMA_INIT_COHERENT,
  57	PPC_ADMA_INIT_CHANNEL,
  58	PPC_ADMA_INIT_IRQ1,
  59	PPC_ADMA_INIT_IRQ2,
  60	PPC_ADMA_INIT_REGISTER
  61};
  62
  63static char *ppc_adma_errors[] = {
  64	[PPC_ADMA_INIT_OK] = "ok",
  65	[PPC_ADMA_INIT_MEMRES] = "failed to get memory resource",
  66	[PPC_ADMA_INIT_MEMREG] = "failed to request memory region",
  67	[PPC_ADMA_INIT_ALLOC] = "failed to allocate memory for adev "
  68				"structure",
  69	[PPC_ADMA_INIT_COHERENT] = "failed to allocate coherent memory for "
  70				   "hardware descriptors",
  71	[PPC_ADMA_INIT_CHANNEL] = "failed to allocate memory for channel",
  72	[PPC_ADMA_INIT_IRQ1] = "failed to request first irq",
  73	[PPC_ADMA_INIT_IRQ2] = "failed to request second irq",
  74	[PPC_ADMA_INIT_REGISTER] = "failed to register dma async device",
  75};
  76
  77static enum ppc_adma_init_code
  78ppc440spe_adma_devices[PPC440SPE_ADMA_ENGINES_NUM];
  79
  80struct ppc_dma_chan_ref {
  81	struct dma_chan *chan;
  82	struct list_head node;
  83};
  84
  85/* The list of channels exported by ppc440spe ADMA */
  86struct list_head
  87ppc440spe_adma_chan_list = LIST_HEAD_INIT(ppc440spe_adma_chan_list);
  88
  89/* This flag is set when want to refetch the xor chain in the interrupt
  90 * handler
  91 */
  92static u32 do_xor_refetch;
  93
  94/* Pointer to DMA0, DMA1 CP/CS FIFO */
  95static void *ppc440spe_dma_fifo_buf;
  96
  97/* Pointers to last submitted to DMA0, DMA1 CDBs */
  98static struct ppc440spe_adma_desc_slot *chan_last_sub[3];
  99static struct ppc440spe_adma_desc_slot *chan_first_cdb[3];
 100
 101/* Pointer to last linked and submitted xor CB */
 102static struct ppc440spe_adma_desc_slot *xor_last_linked;
 103static struct ppc440spe_adma_desc_slot *xor_last_submit;
 104
 105/* This array is used in data-check operations for storing a pattern */
 106static char ppc440spe_qword[16];
 107
 108static atomic_t ppc440spe_adma_err_irq_ref;
 109static dcr_host_t ppc440spe_mq_dcr_host;
 110static unsigned int ppc440spe_mq_dcr_len;
 111
 112/* Since RXOR operations use the common register (MQ0_CF2H) for setting-up
 113 * the block size in transactions, then we do not allow to activate more than
 114 * only one RXOR transactions simultaneously. So use this var to store
 115 * the information about is RXOR currently active (PPC440SPE_RXOR_RUN bit is
 116 * set) or not (PPC440SPE_RXOR_RUN is clear).
 117 */
 118static unsigned long ppc440spe_rxor_state;
 119
 120/* These are used in enable & check routines
 121 */
 122static u32 ppc440spe_r6_enabled;
 123static struct ppc440spe_adma_chan *ppc440spe_r6_tchan;
 124static struct completion ppc440spe_r6_test_comp;
 125
 126static int ppc440spe_adma_dma2rxor_prep_src(
 127		struct ppc440spe_adma_desc_slot *desc,
 128		struct ppc440spe_rxor *cursor, int index,
 129		int src_cnt, u32 addr);
 130static void ppc440spe_adma_dma2rxor_set_src(
 131		struct ppc440spe_adma_desc_slot *desc,
 132		int index, dma_addr_t addr);
 133static void ppc440spe_adma_dma2rxor_set_mult(
 134		struct ppc440spe_adma_desc_slot *desc,
 135		int index, u8 mult);
 136
 137#ifdef ADMA_LL_DEBUG
 138#define ADMA_LL_DBG(x) ({ if (1) x; 0; })
 139#else
 140#define ADMA_LL_DBG(x) ({ if (0) x; 0; })
 141#endif
 142
 143static void print_cb(struct ppc440spe_adma_chan *chan, void *block)
 144{
 145	struct dma_cdb *cdb;
 146	struct xor_cb *cb;
 147	int i;
 148
 149	switch (chan->device->id) {
 150	case 0:
 151	case 1:
 152		cdb = block;
 153
 154		pr_debug("CDB at %p [%d]:\n"
 155			"\t attr 0x%02x opc 0x%02x cnt 0x%08x\n"
 156			"\t sg1u 0x%08x sg1l 0x%08x\n"
 157			"\t sg2u 0x%08x sg2l 0x%08x\n"
 158			"\t sg3u 0x%08x sg3l 0x%08x\n",
 159			cdb, chan->device->id,
 160			cdb->attr, cdb->opc, le32_to_cpu(cdb->cnt),
 161			le32_to_cpu(cdb->sg1u), le32_to_cpu(cdb->sg1l),
 162			le32_to_cpu(cdb->sg2u), le32_to_cpu(cdb->sg2l),
 163			le32_to_cpu(cdb->sg3u), le32_to_cpu(cdb->sg3l)
 164		);
 165		break;
 166	case 2:
 167		cb = block;
 168
 169		pr_debug("CB at %p [%d]:\n"
 170			"\t cbc 0x%08x cbbc 0x%08x cbs 0x%08x\n"
 171			"\t cbtah 0x%08x cbtal 0x%08x\n"
 172			"\t cblah 0x%08x cblal 0x%08x\n",
 173			cb, chan->device->id,
 174			cb->cbc, cb->cbbc, cb->cbs,
 175			cb->cbtah, cb->cbtal,
 176			cb->cblah, cb->cblal);
 177		for (i = 0; i < 16; i++) {
 178			if (i && !cb->ops[i].h && !cb->ops[i].l)
 179				continue;
 180			pr_debug("\t ops[%2d]: h 0x%08x l 0x%08x\n",
 181				i, cb->ops[i].h, cb->ops[i].l);
 182		}
 183		break;
 184	}
 185}
 186
 187static void print_cb_list(struct ppc440spe_adma_chan *chan,
 188			  struct ppc440spe_adma_desc_slot *iter)
 189{
 190	for (; iter; iter = iter->hw_next)
 191		print_cb(chan, iter->hw_desc);
 192}
 193
 194static void prep_dma_xor_dbg(int id, dma_addr_t dst, dma_addr_t *src,
 195			     unsigned int src_cnt)
 196{
 197	int i;
 198
 199	pr_debug("\n%s(%d):\nsrc: ", __func__, id);
 200	for (i = 0; i < src_cnt; i++)
 201		pr_debug("\t0x%016llx ", src[i]);
 202	pr_debug("dst:\n\t0x%016llx\n", dst);
 203}
 204
 205static void prep_dma_pq_dbg(int id, dma_addr_t *dst, dma_addr_t *src,
 206			    unsigned int src_cnt)
 207{
 208	int i;
 209
 210	pr_debug("\n%s(%d):\nsrc: ", __func__, id);
 211	for (i = 0; i < src_cnt; i++)
 212		pr_debug("\t0x%016llx ", src[i]);
 213	pr_debug("dst: ");
 214	for (i = 0; i < 2; i++)
 215		pr_debug("\t0x%016llx ", dst[i]);
 216}
 217
 218static void prep_dma_pqzero_sum_dbg(int id, dma_addr_t *src,
 219				    unsigned int src_cnt,
 220				    const unsigned char *scf)
 221{
 222	int i;
 223
 224	pr_debug("\n%s(%d):\nsrc(coef): ", __func__, id);
 225	if (scf) {
 226		for (i = 0; i < src_cnt; i++)
 227			pr_debug("\t0x%016llx(0x%02x) ", src[i], scf[i]);
 228	} else {
 229		for (i = 0; i < src_cnt; i++)
 230			pr_debug("\t0x%016llx(no) ", src[i]);
 231	}
 232
 233	pr_debug("dst: ");
 234	for (i = 0; i < 2; i++)
 235		pr_debug("\t0x%016llx ", src[src_cnt + i]);
 236}
 237
 238/******************************************************************************
 239 * Command (Descriptor) Blocks low-level routines
 240 ******************************************************************************/
 241/**
 242 * ppc440spe_desc_init_interrupt - initialize the descriptor for INTERRUPT
 243 * pseudo operation
 244 */
 245static void ppc440spe_desc_init_interrupt(struct ppc440spe_adma_desc_slot *desc,
 246					  struct ppc440spe_adma_chan *chan)
 247{
 248	struct xor_cb *p;
 249
 250	switch (chan->device->id) {
 251	case PPC440SPE_XOR_ID:
 252		p = desc->hw_desc;
 253		memset(desc->hw_desc, 0, sizeof(struct xor_cb));
 254		/* NOP with Command Block Complete Enable */
 255		p->cbc = XOR_CBCR_CBCE_BIT;
 256		break;
 257	case PPC440SPE_DMA0_ID:
 258	case PPC440SPE_DMA1_ID:
 259		memset(desc->hw_desc, 0, sizeof(struct dma_cdb));
 260		/* NOP with interrupt */
 261		set_bit(PPC440SPE_DESC_INT, &desc->flags);
 262		break;
 263	default:
 264		printk(KERN_ERR "Unsupported id %d in %s\n", chan->device->id,
 265				__func__);
 266		break;
 267	}
 268}
 269
 270/**
 271 * ppc440spe_desc_init_null_xor - initialize the descriptor for NULL XOR
 272 * pseudo operation
 273 */
 274static void ppc440spe_desc_init_null_xor(struct ppc440spe_adma_desc_slot *desc)
 275{
 276	memset(desc->hw_desc, 0, sizeof(struct xor_cb));
 277	desc->hw_next = NULL;
 278	desc->src_cnt = 0;
 279	desc->dst_cnt = 1;
 280}
 281
 282/**
 283 * ppc440spe_desc_init_xor - initialize the descriptor for XOR operation
 284 */
 285static void ppc440spe_desc_init_xor(struct ppc440spe_adma_desc_slot *desc,
 286					 int src_cnt, unsigned long flags)
 287{
 288	struct xor_cb *hw_desc = desc->hw_desc;
 289
 290	memset(desc->hw_desc, 0, sizeof(struct xor_cb));
 291	desc->hw_next = NULL;
 292	desc->src_cnt = src_cnt;
 293	desc->dst_cnt = 1;
 294
 295	hw_desc->cbc = XOR_CBCR_TGT_BIT | src_cnt;
 296	if (flags & DMA_PREP_INTERRUPT)
 297		/* Enable interrupt on completion */
 298		hw_desc->cbc |= XOR_CBCR_CBCE_BIT;
 299}
 300
 301/**
 302 * ppc440spe_desc_init_dma2pq - initialize the descriptor for PQ
 303 * operation in DMA2 controller
 304 */
 305static void ppc440spe_desc_init_dma2pq(struct ppc440spe_adma_desc_slot *desc,
 306		int dst_cnt, int src_cnt, unsigned long flags)
 307{
 308	struct xor_cb *hw_desc = desc->hw_desc;
 309
 310	memset(desc->hw_desc, 0, sizeof(struct xor_cb));
 311	desc->hw_next = NULL;
 312	desc->src_cnt = src_cnt;
 313	desc->dst_cnt = dst_cnt;
 314	memset(desc->reverse_flags, 0, sizeof(desc->reverse_flags));
 315	desc->descs_per_op = 0;
 316
 317	hw_desc->cbc = XOR_CBCR_TGT_BIT;
 318	if (flags & DMA_PREP_INTERRUPT)
 319		/* Enable interrupt on completion */
 320		hw_desc->cbc |= XOR_CBCR_CBCE_BIT;
 321}
 322
 323#define DMA_CTRL_FLAGS_LAST	DMA_PREP_FENCE
 324#define DMA_PREP_ZERO_P		(DMA_CTRL_FLAGS_LAST << 1)
 325#define DMA_PREP_ZERO_Q		(DMA_PREP_ZERO_P << 1)
 326
 327/**
 328 * ppc440spe_desc_init_dma01pq - initialize the descriptors for PQ operation
 329 * with DMA0/1
 330 */
 331static void ppc440spe_desc_init_dma01pq(struct ppc440spe_adma_desc_slot *desc,
 332				int dst_cnt, int src_cnt, unsigned long flags,
 333				unsigned long op)
 334{
 335	struct dma_cdb *hw_desc;
 336	struct ppc440spe_adma_desc_slot *iter;
 337	u8 dopc;
 338
 339	/* Common initialization of a PQ descriptors chain */
 340	set_bits(op, &desc->flags);
 341	desc->src_cnt = src_cnt;
 342	desc->dst_cnt = dst_cnt;
 343
 344	/* WXOR MULTICAST if both P and Q are being computed
 345	 * MV_SG1_SG2 if Q only
 346	 */
 347	dopc = (desc->dst_cnt == DMA_DEST_MAX_NUM) ?
 348		DMA_CDB_OPC_MULTICAST : DMA_CDB_OPC_MV_SG1_SG2;
 349
 350	list_for_each_entry(iter, &desc->group_list, chain_node) {
 351		hw_desc = iter->hw_desc;
 352		memset(iter->hw_desc, 0, sizeof(struct dma_cdb));
 353
 354		if (likely(!list_is_last(&iter->chain_node,
 355				&desc->group_list))) {
 356			/* set 'next' pointer */
 357			iter->hw_next = list_entry(iter->chain_node.next,
 358				struct ppc440spe_adma_desc_slot, chain_node);
 359			clear_bit(PPC440SPE_DESC_INT, &iter->flags);
 360		} else {
 361			/* this is the last descriptor.
 362			 * this slot will be pasted from ADMA level
 363			 * each time it wants to configure parameters
 364			 * of the transaction (src, dst, ...)
 365			 */
 366			iter->hw_next = NULL;
 367			if (flags & DMA_PREP_INTERRUPT)
 368				set_bit(PPC440SPE_DESC_INT, &iter->flags);
 369			else
 370				clear_bit(PPC440SPE_DESC_INT, &iter->flags);
 371		}
 372	}
 373
 374	/* Set OPS depending on WXOR/RXOR type of operation */
 375	if (!test_bit(PPC440SPE_DESC_RXOR, &desc->flags)) {
 376		/* This is a WXOR only chain:
 377		 * - first descriptors are for zeroing destinations
 378		 *   if PPC440SPE_ZERO_P/Q set;
 379		 * - descriptors remained are for GF-XOR operations.
 380		 */
 381		iter = list_first_entry(&desc->group_list,
 382					struct ppc440spe_adma_desc_slot,
 383					chain_node);
 384
 385		if (test_bit(PPC440SPE_ZERO_P, &desc->flags)) {
 386			hw_desc = iter->hw_desc;
 387			hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
 388			iter = list_first_entry(&iter->chain_node,
 389					struct ppc440spe_adma_desc_slot,
 390					chain_node);
 391		}
 392
 393		if (test_bit(PPC440SPE_ZERO_Q, &desc->flags)) {
 394			hw_desc = iter->hw_desc;
 395			hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
 396			iter = list_first_entry(&iter->chain_node,
 397					struct ppc440spe_adma_desc_slot,
 398					chain_node);
 399		}
 400
 401		list_for_each_entry_from(iter, &desc->group_list, chain_node) {
 402			hw_desc = iter->hw_desc;
 403			hw_desc->opc = dopc;
 404		}
 405	} else {
 406		/* This is either RXOR-only or mixed RXOR/WXOR */
 407
 408		/* The first 1 or 2 slots in chain are always RXOR,
 409		 * if need to calculate P & Q, then there are two
 410		 * RXOR slots; if only P or only Q, then there is one
 411		 */
 412		iter = list_first_entry(&desc->group_list,
 413					struct ppc440spe_adma_desc_slot,
 414					chain_node);
 415		hw_desc = iter->hw_desc;
 416		hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
 417
 418		if (desc->dst_cnt == DMA_DEST_MAX_NUM) {
 419			iter = list_first_entry(&iter->chain_node,
 420						struct ppc440spe_adma_desc_slot,
 421						chain_node);
 422			hw_desc = iter->hw_desc;
 423			hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
 424		}
 425
 426		/* The remaining descs (if any) are WXORs */
 427		if (test_bit(PPC440SPE_DESC_WXOR, &desc->flags)) {
 428			iter = list_first_entry(&iter->chain_node,
 429						struct ppc440spe_adma_desc_slot,
 430						chain_node);
 431			list_for_each_entry_from(iter, &desc->group_list,
 432						chain_node) {
 433				hw_desc = iter->hw_desc;
 434				hw_desc->opc = dopc;
 435			}
 436		}
 437	}
 438}
 439
 440/**
 441 * ppc440spe_desc_init_dma01pqzero_sum - initialize the descriptor
 442 * for PQ_ZERO_SUM operation
 443 */
 444static void ppc440spe_desc_init_dma01pqzero_sum(
 445				struct ppc440spe_adma_desc_slot *desc,
 446				int dst_cnt, int src_cnt)
 447{
 448	struct dma_cdb *hw_desc;
 449	struct ppc440spe_adma_desc_slot *iter;
 450	int i = 0;
 451	u8 dopc = (dst_cnt == 2) ? DMA_CDB_OPC_MULTICAST :
 452				   DMA_CDB_OPC_MV_SG1_SG2;
 453	/*
 454	 * Initialize starting from 2nd or 3rd descriptor dependent
 455	 * on dst_cnt. First one or two slots are for cloning P
 456	 * and/or Q to chan->pdest and/or chan->qdest as we have
 457	 * to preserve original P/Q.
 458	 */
 459	iter = list_first_entry(&desc->group_list,
 460				struct ppc440spe_adma_desc_slot, chain_node);
 461	iter = list_entry(iter->chain_node.next,
 462			  struct ppc440spe_adma_desc_slot, chain_node);
 463
 464	if (dst_cnt > 1) {
 465		iter = list_entry(iter->chain_node.next,
 466				  struct ppc440spe_adma_desc_slot, chain_node);
 467	}
 468	/* initialize each source descriptor in chain */
 469	list_for_each_entry_from(iter, &desc->group_list, chain_node) {
 470		hw_desc = iter->hw_desc;
 471		memset(iter->hw_desc, 0, sizeof(struct dma_cdb));
 472		iter->src_cnt = 0;
 473		iter->dst_cnt = 0;
 474
 475		/* This is a ZERO_SUM operation:
 476		 * - <src_cnt> descriptors starting from 2nd or 3rd
 477		 *   descriptor are for GF-XOR operations;
 478		 * - remaining <dst_cnt> descriptors are for checking the result
 479		 */
 480		if (i++ < src_cnt)
 481			/* MV_SG1_SG2 if only Q is being verified
 482			 * MULTICAST if both P and Q are being verified
 483			 */
 484			hw_desc->opc = dopc;
 485		else
 486			/* DMA_CDB_OPC_DCHECK128 operation */
 487			hw_desc->opc = DMA_CDB_OPC_DCHECK128;
 488
 489		if (likely(!list_is_last(&iter->chain_node,
 490					 &desc->group_list))) {
 491			/* set 'next' pointer */
 492			iter->hw_next = list_entry(iter->chain_node.next,
 493						struct ppc440spe_adma_desc_slot,
 494						chain_node);
 495		} else {
 496			/* this is the last descriptor.
 497			 * this slot will be pasted from ADMA level
 498			 * each time it wants to configure parameters
 499			 * of the transaction (src, dst, ...)
 500			 */
 501			iter->hw_next = NULL;
 502			/* always enable interrupt generation since we get
 503			 * the status of pqzero from the handler
 504			 */
 505			set_bit(PPC440SPE_DESC_INT, &iter->flags);
 506		}
 507	}
 508	desc->src_cnt = src_cnt;
 509	desc->dst_cnt = dst_cnt;
 510}
 511
 512/**
 513 * ppc440spe_desc_init_memcpy - initialize the descriptor for MEMCPY operation
 514 */
 515static void ppc440spe_desc_init_memcpy(struct ppc440spe_adma_desc_slot *desc,
 516					unsigned long flags)
 517{
 518	struct dma_cdb *hw_desc = desc->hw_desc;
 519
 520	memset(desc->hw_desc, 0, sizeof(struct dma_cdb));
 521	desc->hw_next = NULL;
 522	desc->src_cnt = 1;
 523	desc->dst_cnt = 1;
 524
 525	if (flags & DMA_PREP_INTERRUPT)
 526		set_bit(PPC440SPE_DESC_INT, &desc->flags);
 527	else
 528		clear_bit(PPC440SPE_DESC_INT, &desc->flags);
 529
 530	hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
 531}
 532
 533/**
 534 * ppc440spe_desc_init_memset - initialize the descriptor for MEMSET operation
 535 */
 536static void ppc440spe_desc_init_memset(struct ppc440spe_adma_desc_slot *desc,
 537					int value, unsigned long flags)
 538{
 539	struct dma_cdb *hw_desc = desc->hw_desc;
 540
 541	memset(desc->hw_desc, 0, sizeof(struct dma_cdb));
 542	desc->hw_next = NULL;
 543	desc->src_cnt = 1;
 544	desc->dst_cnt = 1;
 545
 546	if (flags & DMA_PREP_INTERRUPT)
 547		set_bit(PPC440SPE_DESC_INT, &desc->flags);
 548	else
 549		clear_bit(PPC440SPE_DESC_INT, &desc->flags);
 550
 551	hw_desc->sg1u = hw_desc->sg1l = cpu_to_le32((u32)value);
 552	hw_desc->sg3u = hw_desc->sg3l = cpu_to_le32((u32)value);
 553	hw_desc->opc = DMA_CDB_OPC_DFILL128;
 554}
 555
 556/**
 557 * ppc440spe_desc_set_src_addr - set source address into the descriptor
 558 */
 559static void ppc440spe_desc_set_src_addr(struct ppc440spe_adma_desc_slot *desc,
 560					struct ppc440spe_adma_chan *chan,
 561					int src_idx, dma_addr_t addrh,
 562					dma_addr_t addrl)
 563{
 564	struct dma_cdb *dma_hw_desc;
 565	struct xor_cb *xor_hw_desc;
 566	phys_addr_t addr64, tmplow, tmphi;
 567
 568	switch (chan->device->id) {
 569	case PPC440SPE_DMA0_ID:
 570	case PPC440SPE_DMA1_ID:
 571		if (!addrh) {
 572			addr64 = addrl;
 573			tmphi = (addr64 >> 32);
 574			tmplow = (addr64 & 0xFFFFFFFF);
 575		} else {
 576			tmphi = addrh;
 577			tmplow = addrl;
 578		}
 579		dma_hw_desc = desc->hw_desc;
 580		dma_hw_desc->sg1l = cpu_to_le32((u32)tmplow);
 581		dma_hw_desc->sg1u |= cpu_to_le32((u32)tmphi);
 582		break;
 583	case PPC440SPE_XOR_ID:
 584		xor_hw_desc = desc->hw_desc;
 585		xor_hw_desc->ops[src_idx].l = addrl;
 586		xor_hw_desc->ops[src_idx].h |= addrh;
 587		break;
 588	}
 589}
 590
 591/**
 592 * ppc440spe_desc_set_src_mult - set source address mult into the descriptor
 593 */
 594static void ppc440spe_desc_set_src_mult(struct ppc440spe_adma_desc_slot *desc,
 595			struct ppc440spe_adma_chan *chan, u32 mult_index,
 596			int sg_index, unsigned char mult_value)
 597{
 598	struct dma_cdb *dma_hw_desc;
 599	struct xor_cb *xor_hw_desc;
 600	u32 *psgu;
 601
 602	switch (chan->device->id) {
 603	case PPC440SPE_DMA0_ID:
 604	case PPC440SPE_DMA1_ID:
 605		dma_hw_desc = desc->hw_desc;
 606
 607		switch (sg_index) {
 608		/* for RXOR operations set multiplier
 609		 * into source cued address
 610		 */
 611		case DMA_CDB_SG_SRC:
 612			psgu = &dma_hw_desc->sg1u;
 613			break;
 614		/* for WXOR operations set multiplier
 615		 * into destination cued address(es)
 616		 */
 617		case DMA_CDB_SG_DST1:
 618			psgu = &dma_hw_desc->sg2u;
 619			break;
 620		case DMA_CDB_SG_DST2:
 621			psgu = &dma_hw_desc->sg3u;
 622			break;
 623		default:
 624			BUG();
 625		}
 626
 627		*psgu |= cpu_to_le32(mult_value << mult_index);
 628		break;
 629	case PPC440SPE_XOR_ID:
 630		xor_hw_desc = desc->hw_desc;
 631		break;
 632	default:
 633		BUG();
 634	}
 635}
 636
 637/**
 638 * ppc440spe_desc_set_dest_addr - set destination address into the descriptor
 639 */
 640static void ppc440spe_desc_set_dest_addr(struct ppc440spe_adma_desc_slot *desc,
 641				struct ppc440spe_adma_chan *chan,
 642				dma_addr_t addrh, dma_addr_t addrl,
 643				u32 dst_idx)
 644{
 645	struct dma_cdb *dma_hw_desc;
 646	struct xor_cb *xor_hw_desc;
 647	phys_addr_t addr64, tmphi, tmplow;
 648	u32 *psgu, *psgl;
 649
 650	switch (chan->device->id) {
 651	case PPC440SPE_DMA0_ID:
 652	case PPC440SPE_DMA1_ID:
 653		if (!addrh) {
 654			addr64 = addrl;
 655			tmphi = (addr64 >> 32);
 656			tmplow = (addr64 & 0xFFFFFFFF);
 657		} else {
 658			tmphi = addrh;
 659			tmplow = addrl;
 660		}
 661		dma_hw_desc = desc->hw_desc;
 662
 663		psgu = dst_idx ? &dma_hw_desc->sg3u : &dma_hw_desc->sg2u;
 664		psgl = dst_idx ? &dma_hw_desc->sg3l : &dma_hw_desc->sg2l;
 665
 666		*psgl = cpu_to_le32((u32)tmplow);
 667		*psgu |= cpu_to_le32((u32)tmphi);
 668		break;
 669	case PPC440SPE_XOR_ID:
 670		xor_hw_desc = desc->hw_desc;
 671		xor_hw_desc->cbtal = addrl;
 672		xor_hw_desc->cbtah |= addrh;
 673		break;
 674	}
 675}
 676
 677/**
 678 * ppc440spe_desc_set_byte_count - set number of data bytes involved
 679 * into the operation
 680 */
 681static void ppc440spe_desc_set_byte_count(struct ppc440spe_adma_desc_slot *desc,
 682				struct ppc440spe_adma_chan *chan,
 683				u32 byte_count)
 684{
 685	struct dma_cdb *dma_hw_desc;
 686	struct xor_cb *xor_hw_desc;
 687
 688	switch (chan->device->id) {
 689	case PPC440SPE_DMA0_ID:
 690	case PPC440SPE_DMA1_ID:
 691		dma_hw_desc = desc->hw_desc;
 692		dma_hw_desc->cnt = cpu_to_le32(byte_count);
 693		break;
 694	case PPC440SPE_XOR_ID:
 695		xor_hw_desc = desc->hw_desc;
 696		xor_hw_desc->cbbc = byte_count;
 697		break;
 698	}
 699}
 700
 701/**
 702 * ppc440spe_desc_set_rxor_block_size - set RXOR block size
 703 */
 704static inline void ppc440spe_desc_set_rxor_block_size(u32 byte_count)
 705{
 706	/* assume that byte_count is aligned on the 512-boundary;
 707	 * thus write it directly to the register (bits 23:31 are
 708	 * reserved there).
 709	 */
 710	dcr_write(ppc440spe_mq_dcr_host, DCRN_MQ0_CF2H, byte_count);
 711}
 712
 713/**
 714 * ppc440spe_desc_set_dcheck - set CHECK pattern
 715 */
 716static void ppc440spe_desc_set_dcheck(struct ppc440spe_adma_desc_slot *desc,
 717				struct ppc440spe_adma_chan *chan, u8 *qword)
 718{
 719	struct dma_cdb *dma_hw_desc;
 720
 721	switch (chan->device->id) {
 722	case PPC440SPE_DMA0_ID:
 723	case PPC440SPE_DMA1_ID:
 724		dma_hw_desc = desc->hw_desc;
 725		iowrite32(qword[0], &dma_hw_desc->sg3l);
 726		iowrite32(qword[4], &dma_hw_desc->sg3u);
 727		iowrite32(qword[8], &dma_hw_desc->sg2l);
 728		iowrite32(qword[12], &dma_hw_desc->sg2u);
 729		break;
 730	default:
 731		BUG();
 732	}
 733}
 734
 735/**
 736 * ppc440spe_xor_set_link - set link address in xor CB
 737 */
 738static void ppc440spe_xor_set_link(struct ppc440spe_adma_desc_slot *prev_desc,
 739				struct ppc440spe_adma_desc_slot *next_desc)
 740{
 741	struct xor_cb *xor_hw_desc = prev_desc->hw_desc;
 742
 743	if (unlikely(!next_desc || !(next_desc->phys))) {
 744		printk(KERN_ERR "%s: next_desc=0x%p; next_desc->phys=0x%llx\n",
 745			__func__, next_desc,
 746			next_desc ? next_desc->phys : 0);
 747		BUG();
 748	}
 749
 750	xor_hw_desc->cbs = 0;
 751	xor_hw_desc->cblal = next_desc->phys;
 752	xor_hw_desc->cblah = 0;
 753	xor_hw_desc->cbc |= XOR_CBCR_LNK_BIT;
 754}
 755
 756/**
 757 * ppc440spe_desc_set_link - set the address of descriptor following this
 758 * descriptor in chain
 759 */
 760static void ppc440spe_desc_set_link(struct ppc440spe_adma_chan *chan,
 761				struct ppc440spe_adma_desc_slot *prev_desc,
 762				struct ppc440spe_adma_desc_slot *next_desc)
 763{
 764	unsigned long flags;
 765	struct ppc440spe_adma_desc_slot *tail = next_desc;
 766
 767	if (unlikely(!prev_desc || !next_desc ||
 768		(prev_desc->hw_next && prev_desc->hw_next != next_desc))) {
 769		/* If previous next is overwritten something is wrong.
 770		 * though we may refetch from append to initiate list
 771		 * processing; in this case - it's ok.
 772		 */
 773		printk(KERN_ERR "%s: prev_desc=0x%p; next_desc=0x%p; "
 774			"prev->hw_next=0x%p\n", __func__, prev_desc,
 775			next_desc, prev_desc ? prev_desc->hw_next : 0);
 776		BUG();
 777	}
 778
 779	local_irq_save(flags);
 780
 781	/* do s/w chaining both for DMA and XOR descriptors */
 782	prev_desc->hw_next = next_desc;
 783
 784	switch (chan->device->id) {
 785	case PPC440SPE_DMA0_ID:
 786	case PPC440SPE_DMA1_ID:
 787		break;
 788	case PPC440SPE_XOR_ID:
 789		/* bind descriptor to the chain */
 790		while (tail->hw_next)
 791			tail = tail->hw_next;
 792		xor_last_linked = tail;
 793
 794		if (prev_desc == xor_last_submit)
 795			/* do not link to the last submitted CB */
 796			break;
 797		ppc440spe_xor_set_link(prev_desc, next_desc);
 798		break;
 799	}
 800
 801	local_irq_restore(flags);
 802}
 803
 804/**
 805 * ppc440spe_desc_get_src_addr - extract the source address from the descriptor
 806 */
 807static u32 ppc440spe_desc_get_src_addr(struct ppc440spe_adma_desc_slot *desc,
 808				struct ppc440spe_adma_chan *chan, int src_idx)
 809{
 810	struct dma_cdb *dma_hw_desc;
 811	struct xor_cb *xor_hw_desc;
 812
 813	switch (chan->device->id) {
 814	case PPC440SPE_DMA0_ID:
 815	case PPC440SPE_DMA1_ID:
 816		dma_hw_desc = desc->hw_desc;
 817		/* May have 0, 1, 2, or 3 sources */
 818		switch (dma_hw_desc->opc) {
 819		case DMA_CDB_OPC_NO_OP:
 820		case DMA_CDB_OPC_DFILL128:
 821			return 0;
 822		case DMA_CDB_OPC_DCHECK128:
 823			if (unlikely(src_idx)) {
 824				printk(KERN_ERR "%s: try to get %d source for"
 825				    " DCHECK128\n", __func__, src_idx);
 826				BUG();
 827			}
 828			return le32_to_cpu(dma_hw_desc->sg1l);
 829		case DMA_CDB_OPC_MULTICAST:
 830		case DMA_CDB_OPC_MV_SG1_SG2:
 831			if (unlikely(src_idx > 2)) {
 832				printk(KERN_ERR "%s: try to get %d source from"
 833				    " DMA descr\n", __func__, src_idx);
 834				BUG();
 835			}
 836			if (src_idx) {
 837				if (le32_to_cpu(dma_hw_desc->sg1u) &
 838				    DMA_CUED_XOR_WIN_MSK) {
 839					u8 region;
 840
 841					if (src_idx == 1)
 842						return le32_to_cpu(
 843						    dma_hw_desc->sg1l) +
 844							desc->unmap_len;
 845
 846					region = (le32_to_cpu(
 847					    dma_hw_desc->sg1u)) >>
 848						DMA_CUED_REGION_OFF;
 849
 850					region &= DMA_CUED_REGION_MSK;
 851					switch (region) {
 852					case DMA_RXOR123:
 853						return le32_to_cpu(
 854						    dma_hw_desc->sg1l) +
 855							(desc->unmap_len << 1);
 856					case DMA_RXOR124:
 857						return le32_to_cpu(
 858						    dma_hw_desc->sg1l) +
 859							(desc->unmap_len * 3);
 860					case DMA_RXOR125:
 861						return le32_to_cpu(
 862						    dma_hw_desc->sg1l) +
 863							(desc->unmap_len << 2);
 864					default:
 865						printk(KERN_ERR
 866						    "%s: try to"
 867						    " get src3 for region %02x"
 868						    "PPC440SPE_DESC_RXOR12?\n",
 869						    __func__, region);
 870						BUG();
 871					}
 872				} else {
 873					printk(KERN_ERR
 874						"%s: try to get %d"
 875						" source for non-cued descr\n",
 876						__func__, src_idx);
 877					BUG();
 878				}
 879			}
 880			return le32_to_cpu(dma_hw_desc->sg1l);
 881		default:
 882			printk(KERN_ERR "%s: unknown OPC 0x%02x\n",
 883				__func__, dma_hw_desc->opc);
 884			BUG();
 885		}
 886		return le32_to_cpu(dma_hw_desc->sg1l);
 887	case PPC440SPE_XOR_ID:
 888		/* May have up to 16 sources */
 889		xor_hw_desc = desc->hw_desc;
 890		return xor_hw_desc->ops[src_idx].l;
 891	}
 892	return 0;
 893}
 894
 895/**
 896 * ppc440spe_desc_get_dest_addr - extract the destination address from the
 897 * descriptor
 898 */
 899static u32 ppc440spe_desc_get_dest_addr(struct ppc440spe_adma_desc_slot *desc,
 900				struct ppc440spe_adma_chan *chan, int idx)
 901{
 902	struct dma_cdb *dma_hw_desc;
 903	struct xor_cb *xor_hw_desc;
 904
 905	switch (chan->device->id) {
 906	case PPC440SPE_DMA0_ID:
 907	case PPC440SPE_DMA1_ID:
 908		dma_hw_desc = desc->hw_desc;
 909
 910		if (likely(!idx))
 911			return le32_to_cpu(dma_hw_desc->sg2l);
 912		return le32_to_cpu(dma_hw_desc->sg3l);
 913	case PPC440SPE_XOR_ID:
 914		xor_hw_desc = desc->hw_desc;
 915		return xor_hw_desc->cbtal;
 916	}
 917	return 0;
 918}
 919
 920/**
 921 * ppc440spe_desc_get_src_num - extract the number of source addresses from
 922 * the descriptor
 923 */
 924static u32 ppc440spe_desc_get_src_num(struct ppc440spe_adma_desc_slot *desc,
 925				struct ppc440spe_adma_chan *chan)
 926{
 927	struct dma_cdb *dma_hw_desc;
 928	struct xor_cb *xor_hw_desc;
 929
 930	switch (chan->device->id) {
 931	case PPC440SPE_DMA0_ID:
 932	case PPC440SPE_DMA1_ID:
 933		dma_hw_desc = desc->hw_desc;
 934
 935		switch (dma_hw_desc->opc) {
 936		case DMA_CDB_OPC_NO_OP:
 937		case DMA_CDB_OPC_DFILL128:
 938			return 0;
 939		case DMA_CDB_OPC_DCHECK128:
 940			return 1;
 941		case DMA_CDB_OPC_MV_SG1_SG2:
 942		case DMA_CDB_OPC_MULTICAST:
 943			/*
 944			 * Only for RXOR operations we have more than
 945			 * one source
 946			 */
 947			if (le32_to_cpu(dma_hw_desc->sg1u) &
 948			    DMA_CUED_XOR_WIN_MSK) {
 949				/* RXOR op, there are 2 or 3 sources */
 950				if (((le32_to_cpu(dma_hw_desc->sg1u) >>
 951				    DMA_CUED_REGION_OFF) &
 952				      DMA_CUED_REGION_MSK) == DMA_RXOR12) {
 953					/* RXOR 1-2 */
 954					return 2;
 955				} else {
 956					/* RXOR 1-2-3/1-2-4/1-2-5 */
 957					return 3;
 958				}
 959			}
 960			return 1;
 961		default:
 962			printk(KERN_ERR "%s: unknown OPC 0x%02x\n",
 963				__func__, dma_hw_desc->opc);
 964			BUG();
 965		}
 966	case PPC440SPE_XOR_ID:
 967		/* up to 16 sources */
 968		xor_hw_desc = desc->hw_desc;
 969		return xor_hw_desc->cbc & XOR_CDCR_OAC_MSK;
 970	default:
 971		BUG();
 972	}
 973	return 0;
 974}
 975
 976/**
 977 * ppc440spe_desc_get_dst_num - get the number of destination addresses in
 978 * this descriptor
 979 */
 980static u32 ppc440spe_desc_get_dst_num(struct ppc440spe_adma_desc_slot *desc,
 981				struct ppc440spe_adma_chan *chan)
 982{
 983	struct dma_cdb *dma_hw_desc;
 984
 985	switch (chan->device->id) {
 986	case PPC440SPE_DMA0_ID:
 987	case PPC440SPE_DMA1_ID:
 988		/* May be 1 or 2 destinations */
 989		dma_hw_desc = desc->hw_desc;
 990		switch (dma_hw_desc->opc) {
 991		case DMA_CDB_OPC_NO_OP:
 992		case DMA_CDB_OPC_DCHECK128:
 993			return 0;
 994		case DMA_CDB_OPC_MV_SG1_SG2:
 995		case DMA_CDB_OPC_DFILL128:
 996			return 1;
 997		case DMA_CDB_OPC_MULTICAST:
 998			if (desc->dst_cnt == 2)
 999				return 2;
1000			else
1001				return 1;
1002		default:
1003			printk(KERN_ERR "%s: unknown OPC 0x%02x\n",
1004				__func__, dma_hw_desc->opc);
1005			BUG();
1006		}
1007	case PPC440SPE_XOR_ID:
1008		/* Always only 1 destination */
1009		return 1;
1010	default:
1011		BUG();
1012	}
1013	return 0;
1014}
1015
1016/**
1017 * ppc440spe_desc_get_link - get the address of the descriptor that
1018 * follows this one
1019 */
1020static inline u32 ppc440spe_desc_get_link(struct ppc440spe_adma_desc_slot *desc,
1021					struct ppc440spe_adma_chan *chan)
1022{
1023	if (!desc->hw_next)
1024		return 0;
1025
1026	return desc->hw_next->phys;
1027}
1028
1029/**
1030 * ppc440spe_desc_is_aligned - check alignment
1031 */
1032static inline int ppc440spe_desc_is_aligned(
1033	struct ppc440spe_adma_desc_slot *desc, int num_slots)
1034{
1035	return (desc->idx & (num_slots - 1)) ? 0 : 1;
1036}
1037
1038/**
1039 * ppc440spe_chan_xor_slot_count - get the number of slots necessary for
1040 * XOR operation
1041 */
1042static int ppc440spe_chan_xor_slot_count(size_t len, int src_cnt,
1043			int *slots_per_op)
1044{
1045	int slot_cnt;
1046
1047	/* each XOR descriptor provides up to 16 source operands */
1048	slot_cnt = *slots_per_op = (src_cnt + XOR_MAX_OPS - 1)/XOR_MAX_OPS;
1049
1050	if (likely(len <= PPC440SPE_ADMA_XOR_MAX_BYTE_COUNT))
1051		return slot_cnt;
1052
1053	printk(KERN_ERR "%s: len %d > max %d !!\n",
1054		__func__, len, PPC440SPE_ADMA_XOR_MAX_BYTE_COUNT);
1055	BUG();
1056	return slot_cnt;
1057}
1058
1059/**
1060 * ppc440spe_dma2_pq_slot_count - get the number of slots necessary for
1061 * DMA2 PQ operation
1062 */
1063static int ppc440spe_dma2_pq_slot_count(dma_addr_t *srcs,
1064		int src_cnt, size_t len)
1065{
1066	signed long long order = 0;
1067	int state = 0;
1068	int addr_count = 0;
1069	int i;
1070	for (i = 1; i < src_cnt; i++) {
1071		dma_addr_t cur_addr = srcs[i];
1072		dma_addr_t old_addr = srcs[i-1];
1073		switch (state) {
1074		case 0:
1075			if (cur_addr == old_addr + len) {
1076				/* direct RXOR */
1077				order = 1;
1078				state = 1;
1079				if (i == src_cnt-1)
1080					addr_count++;
1081			} else if (old_addr == cur_addr + len) {
1082				/* reverse RXOR */
1083				order = -1;
1084				state = 1;
1085				if (i == src_cnt-1)
1086					addr_count++;
1087			} else {
1088				state = 3;
1089			}
1090			break;
1091		case 1:
1092			if (i == src_cnt-2 || (order == -1
1093				&& cur_addr != old_addr - len)) {
1094				order = 0;
1095				state = 0;
1096				addr_count++;
1097			} else if (cur_addr == old_addr + len*order) {
1098				state = 2;
1099				if (i == src_cnt-1)
1100					addr_count++;
1101			} else if (cur_addr == old_addr + 2*len) {
1102				state = 2;
1103				if (i == src_cnt-1)
1104					addr_count++;
1105			} else if (cur_addr == old_addr + 3*len) {
1106				state = 2;
1107				if (i == src_cnt-1)
1108					addr_count++;
1109			} else {
1110				order = 0;
1111				state = 0;
1112				addr_count++;
1113			}
1114			break;
1115		case 2:
1116			order = 0;
1117			state = 0;
1118			addr_count++;
1119				break;
1120		}
1121		if (state == 3)
1122			break;
1123	}
1124	if (src_cnt <= 1 || (state != 1 && state != 2)) {
1125		pr_err("%s: src_cnt=%d, state=%d, addr_count=%d, order=%lld\n",
1126			__func__, src_cnt, state, addr_count, order);
1127		for (i = 0; i < src_cnt; i++)
1128			pr_err("\t[%d] 0x%llx \n", i, srcs[i]);
1129		BUG();
1130	}
1131
1132	return (addr_count + XOR_MAX_OPS - 1) / XOR_MAX_OPS;
1133}
1134
1135
1136/******************************************************************************
1137 * ADMA channel low-level routines
1138 ******************************************************************************/
1139
1140static u32
1141ppc440spe_chan_get_current_descriptor(struct ppc440spe_adma_chan *chan);
1142static void ppc440spe_chan_append(struct ppc440spe_adma_chan *chan);
1143
1144/**
1145 * ppc440spe_adma_device_clear_eot_status - interrupt ack to XOR or DMA engine
1146 */
1147static void ppc440spe_adma_device_clear_eot_status(
1148					struct ppc440spe_adma_chan *chan)
1149{
1150	struct dma_regs *dma_reg;
1151	struct xor_regs *xor_reg;
1152	u8 *p = chan->device->dma_desc_pool_virt;
1153	struct dma_cdb *cdb;
1154	u32 rv, i;
1155
1156	switch (chan->device->id) {
1157	case PPC440SPE_DMA0_ID:
1158	case PPC440SPE_DMA1_ID:
1159		/* read FIFO to ack */
1160		dma_reg = chan->device->dma_reg;
1161		while ((rv = ioread32(&dma_reg->csfpl))) {
1162			i = rv & DMA_CDB_ADDR_MSK;
1163			cdb = (struct dma_cdb *)&p[i -
1164			    (u32)chan->device->dma_desc_pool];
1165
1166			/* Clear opcode to ack. This is necessary for
1167			 * ZeroSum operations only
1168			 */
1169			cdb->opc = 0;
1170
1171			if (test_bit(PPC440SPE_RXOR_RUN,
1172			    &ppc440spe_rxor_state)) {
1173				/* probably this is a completed RXOR op,
1174				 * get pointer to CDB using the fact that
1175				 * physical and virtual addresses of CDB
1176				 * in pools have the same offsets
1177				 */
1178				if (le32_to_cpu(cdb->sg1u) &
1179				    DMA_CUED_XOR_BASE) {
1180					/* this is a RXOR */
1181					clear_bit(PPC440SPE_RXOR_RUN,
1182						  &ppc440spe_rxor_state);
1183				}
1184			}
1185
1186			if (rv & DMA_CDB_STATUS_MSK) {
1187				/* ZeroSum check failed
1188				 */
1189				struct ppc440spe_adma_desc_slot *iter;
1190				dma_addr_t phys = rv & ~DMA_CDB_MSK;
1191
1192				/*
1193				 * Update the status of corresponding
1194				 * descriptor.
1195				 */
1196				list_for_each_entry(iter, &chan->chain,
1197				    chain_node) {
1198					if (iter->phys == phys)
1199						break;
1200				}
1201				/*
1202				 * if cannot find the corresponding
1203				 * slot it's a bug
1204				 */
1205				BUG_ON(&iter->chain_node == &chan->chain);
1206
1207				if (iter->xor_check_result) {
1208					if (test_bit(PPC440SPE_DESC_PCHECK,
1209						     &iter->flags)) {
1210						*iter->xor_check_result |=
1211							SUM_CHECK_P_RESULT;
1212					} else
1213					if (test_bit(PPC440SPE_DESC_QCHECK,
1214						     &iter->flags)) {
1215						*iter->xor_check_result |=
1216							SUM_CHECK_Q_RESULT;
1217					} else
1218						BUG();
1219				}
1220			}
1221		}
1222
1223		rv = ioread32(&dma_reg->dsts);
1224		if (rv) {
1225			pr_err("DMA%d err status: 0x%x\n",
1226			       chan->device->id, rv);
1227			/* write back to clear */
1228			iowrite32(rv, &dma_reg->dsts);
1229		}
1230		break;
1231	case PPC440SPE_XOR_ID:
1232		/* reset status bits to ack */
1233		xor_reg = chan->device->xor_reg;
1234		rv = ioread32be(&xor_reg->sr);
1235		iowrite32be(rv, &xor_reg->sr);
1236
1237		if (rv & (XOR_IE_ICBIE_BIT|XOR_IE_ICIE_BIT|XOR_IE_RPTIE_BIT)) {
1238			if (rv & XOR_IE_RPTIE_BIT) {
1239				/* Read PLB Timeout Error.
1240				 * Try to resubmit the CB
1241				 */
1242				u32 val = ioread32be(&xor_reg->ccbalr);
1243
1244				iowrite32be(val, &xor_reg->cblalr);
1245
1246				val = ioread32be(&xor_reg->crsr);
1247				iowrite32be(val | XOR_CRSR_XAE_BIT,
1248					    &xor_reg->crsr);
1249			} else
1250				pr_err("XOR ERR 0x%x status\n", rv);
1251			break;
1252		}
1253
1254		/*  if the XORcore is idle, but there are unprocessed CBs
1255		 * then refetch the s/w chain here
1256		 */
1257		if (!(ioread32be(&xor_reg->sr) & XOR_SR_XCP_BIT) &&
1258		    do_xor_refetch)
1259			ppc440spe_chan_append(chan);
1260		break;
1261	}
1262}
1263
1264/**
1265 * ppc440spe_chan_is_busy - get the channel status
1266 */
1267static int ppc440spe_chan_is_busy(struct ppc440spe_adma_chan *chan)
1268{
1269	struct dma_regs *dma_reg;
1270	struct xor_regs *xor_reg;
1271	int busy = 0;
1272
1273	switch (chan->device->id) {
1274	case PPC440SPE_DMA0_ID:
1275	case PPC440SPE_DMA1_ID:
1276		dma_reg = chan->device->dma_reg;
1277		/*  if command FIFO's head and tail pointers are equal and
1278		 * status tail is the same as command, then channel is free
1279		 */
1280		if (ioread16(&dma_reg->cpfhp) != ioread16(&dma_reg->cpftp) ||
1281		    ioread16(&dma_reg->cpftp) != ioread16(&dma_reg->csftp))
1282			busy = 1;
1283		break;
1284	case PPC440SPE_XOR_ID:
1285		/* use the special status bit for the XORcore
1286		 */
1287		xor_reg = chan->device->xor_reg;
1288		busy = (ioread32be(&xor_reg->sr) & XOR_SR_XCP_BIT) ? 1 : 0;
1289		break;
1290	}
1291
1292	return busy;
1293}
1294
1295/**
1296 * ppc440spe_chan_set_first_xor_descriptor -  init XORcore chain
1297 */
1298static void ppc440spe_chan_set_first_xor_descriptor(
1299				struct ppc440spe_adma_chan *chan,
1300				struct ppc440spe_adma_desc_slot *next_desc)
1301{
1302	struct xor_regs *xor_reg = chan->device->xor_reg;
1303
1304	if (ioread32be(&xor_reg->sr) & XOR_SR_XCP_BIT)
1305		printk(KERN_INFO "%s: Warn: XORcore is running "
1306			"when try to set the first CDB!\n",
1307			__func__);
1308
1309	xor_last_submit = xor_last_linked = next_desc;
1310
1311	iowrite32be(XOR_CRSR_64BA_BIT, &xor_reg->crsr);
1312
1313	iowrite32be(next_desc->phys, &xor_reg->cblalr);
1314	iowrite32be(0, &xor_reg->cblahr);
1315	iowrite32be(ioread32be(&xor_reg->cbcr) | XOR_CBCR_LNK_BIT,
1316		    &xor_reg->cbcr);
1317
1318	chan->hw_chain_inited = 1;
1319}
1320
1321/**
1322 * ppc440spe_dma_put_desc - put DMA0,1 descriptor to FIFO.
1323 * called with irqs disabled
1324 */
1325static void ppc440spe_dma_put_desc(struct ppc440spe_adma_chan *chan,
1326		struct ppc440spe_adma_desc_slot *desc)
1327{
1328	u32 pcdb;
1329	struct dma_regs *dma_reg = chan->device->dma_reg;
1330
1331	pcdb = desc->phys;
1332	if (!test_bit(PPC440SPE_DESC_INT, &desc->flags))
1333		pcdb |= DMA_CDB_NO_INT;
1334
1335	chan_last_sub[chan->device->id] = desc;
1336
1337	ADMA_LL_DBG(print_cb(chan, desc->hw_desc));
1338
1339	iowrite32(pcdb, &dma_reg->cpfpl);
1340}
1341
1342/**
1343 * ppc440spe_chan_append - update the h/w chain in the channel
1344 */
1345static void ppc440spe_chan_append(struct ppc440spe_adma_chan *chan)
1346{
1347	struct xor_regs *xor_reg;
1348	struct ppc440spe_adma_desc_slot *iter;
1349	struct xor_cb *xcb;
1350	u32 cur_desc;
1351	unsigned long flags;
1352
1353	local_irq_save(flags);
1354
1355	switch (chan->device->id) {
1356	case PPC440SPE_DMA0_ID:
1357	case PPC440SPE_DMA1_ID:
1358		cur_desc = ppc440spe_chan_get_current_descriptor(chan);
1359
1360		if (likely(cur_desc)) {
1361			iter = chan_last_sub[chan->device->id];
1362			BUG_ON(!iter);
1363		} else {
1364			/* first peer */
1365			iter = chan_first_cdb[chan->device->id];
1366			BUG_ON(!iter);
1367			ppc440spe_dma_put_desc(chan, iter);
1368			chan->hw_chain_inited = 1;
1369		}
1370
1371		/* is there something new to append */
1372		if (!iter->hw_next)
1373			break;
1374
1375		/* flush descriptors from the s/w queue to fifo */
1376		list_for_each_entry_continue(iter, &chan->chain, chain_node) {
1377			ppc440spe_dma_put_desc(chan, iter);
1378			if (!iter->hw_next)
1379				break;
1380		}
1381		break;
1382	case PPC440SPE_XOR_ID:
1383		/* update h/w links and refetch */
1384		if (!xor_last_submit->hw_next)
1385			break;
1386
1387		xor_reg = chan->device->xor_reg;
1388		/* the last linked CDB has to generate an interrupt
1389		 * that we'd be able to append the next lists to h/w
1390		 * regardless of the XOR engine state at the moment of
1391		 * appending of these next lists
1392		 */
1393		xcb = xor_last_linked->hw_desc;
1394		xcb->cbc |= XOR_CBCR_CBCE_BIT;
1395
1396		if (!(ioread32be(&xor_reg->sr) & XOR_SR_XCP_BIT)) {
1397			/* XORcore is idle. Refetch now */
1398			do_xor_refetch = 0;
1399			ppc440spe_xor_set_link(xor_last_submit,
1400				xor_last_submit->hw_next);
1401
1402			ADMA_LL_DBG(print_cb_list(chan,
1403				xor_last_submit->hw_next));
1404
1405			xor_last_submit = xor_last_linked;
1406			iowrite32be(ioread32be(&xor_reg->crsr) |
1407				    XOR_CRSR_RCBE_BIT | XOR_CRSR_64BA_BIT,
1408				    &xor_reg->crsr);
1409		} else {
1410			/* XORcore is running. Refetch later in the handler */
1411			do_xor_refetch = 1;
1412		}
1413
1414		break;
1415	}
1416
1417	local_irq_restore(flags);
1418}
1419
1420/**
1421 * ppc440spe_chan_get_current_descriptor - get the currently executed descriptor
1422 */
1423static u32
1424ppc440spe_chan_get_current_descriptor(struct ppc440spe_adma_chan *chan)
1425{
1426	struct dma_regs *dma_reg;
1427	struct xor_regs *xor_reg;
1428
1429	if (unlikely(!chan->hw_chain_inited))
1430		/* h/w descriptor chain is not initialized yet */
1431		return 0;
1432
1433	switch (chan->device->id) {
1434	case PPC440SPE_DMA0_ID:
1435	case PPC440SPE_DMA1_ID:
1436		dma_reg = chan->device->dma_reg;
1437		return ioread32(&dma_reg->acpl) & (~DMA_CDB_MSK);
1438	case PPC440SPE_XOR_ID:
1439		xor_reg = chan->device->xor_reg;
1440		return ioread32be(&xor_reg->ccbalr);
1441	}
1442	return 0;
1443}
1444
1445/**
1446 * ppc440spe_chan_run - enable the channel
1447 */
1448static void ppc440spe_chan_run(struct ppc440spe_adma_chan *chan)
1449{
1450	struct xor_regs *xor_reg;
1451
1452	switch (chan->device->id) {
1453	case PPC440SPE_DMA0_ID:
1454	case PPC440SPE_DMA1_ID:
1455		/* DMAs are always enabled, do nothing */
1456		break;
1457	case PPC440SPE_XOR_ID:
1458		/* drain write buffer */
1459		xor_reg = chan->device->xor_reg;
1460
1461		/* fetch descriptor pointed to in <link> */
1462		iowrite32be(XOR_CRSR_64BA_BIT | XOR_CRSR_XAE_BIT,
1463			    &xor_reg->crsr);
1464		break;
1465	}
1466}
1467
1468/******************************************************************************
1469 * ADMA device level
1470 ******************************************************************************/
1471
1472static void ppc440spe_chan_start_null_xor(struct ppc440spe_adma_chan *chan);
1473static int ppc440spe_adma_alloc_chan_resources(struct dma_chan *chan);
1474
1475static dma_cookie_t
1476ppc440spe_adma_tx_submit(struct dma_async_tx_descriptor *tx);
1477
1478static void ppc440spe_adma_set_dest(struct ppc440spe_adma_desc_slot *tx,
1479				    dma_addr_t addr, int index);
1480static void
1481ppc440spe_adma_memcpy_xor_set_src(struct ppc440spe_adma_desc_slot *tx,
1482				  dma_addr_t addr, int index);
1483
1484static void
1485ppc440spe_adma_pq_set_dest(struct ppc440spe_adma_desc_slot *tx,
1486			   dma_addr_t *paddr, unsigned long flags);
1487static void
1488ppc440spe_adma_pq_set_src(struct ppc440spe_adma_desc_slot *tx,
1489			  dma_addr_t addr, int index);
1490static void
1491ppc440spe_adma_pq_set_src_mult(struct ppc440spe_adma_desc_slot *tx,
1492			       unsigned char mult, int index, int dst_pos);
1493static void
1494ppc440spe_adma_pqzero_sum_set_dest(struct ppc440spe_adma_desc_slot *tx,
1495				   dma_addr_t paddr, dma_addr_t qaddr);
1496
1497static struct page *ppc440spe_rxor_srcs[32];
1498
1499/**
1500 * ppc440spe_can_rxor - check if the operands may be processed with RXOR
1501 */
1502static int ppc440spe_can_rxor(struct page **srcs, int src_cnt, size_t len)
1503{
1504	int i, order = 0, state = 0;
1505	int idx = 0;
1506
1507	if (unlikely(!(src_cnt > 1)))
1508		return 0;
1509
1510	BUG_ON(src_cnt > ARRAY_SIZE(ppc440spe_rxor_srcs));
1511
1512	/* Skip holes in the source list before checking */
1513	for (i = 0; i < src_cnt; i++) {
1514		if (!srcs[i])
1515			continue;
1516		ppc440spe_rxor_srcs[idx++] = srcs[i];
1517	}
1518	src_cnt = idx;
1519
1520	for (i = 1; i < src_cnt; i++) {
1521		char *cur_addr = page_address(ppc440spe_rxor_srcs[i]);
1522		char *old_addr = page_address(ppc440spe_rxor_srcs[i - 1]);
1523
1524		switch (state) {
1525		case 0:
1526			if (cur_addr == old_addr + len) {
1527				/* direct RXOR */
1528				order = 1;
1529				state = 1;
1530			} else if (old_addr == cur_addr + len) {
1531				/* reverse RXOR */
1532				order = -1;
1533				state = 1;
1534			} else
1535				goto out;
1536			break;
1537		case 1:
1538			if ((i == src_cnt - 2) ||
1539			    (order == -1 && cur_addr != old_addr - len)) {
1540				order = 0;
1541				state = 0;
1542			} else if ((cur_addr == old_addr + len * order) ||
1543				   (cur_addr == old_addr + 2 * len) ||
1544				   (cur_addr == old_addr + 3 * len)) {
1545				state = 2;
1546			} else {
1547				order = 0;
1548				state = 0;
1549			}
1550			break;
1551		case 2:
1552			order = 0;
1553			state = 0;
1554			break;
1555		}
1556	}
1557
1558out:
1559	if (state == 1 || state == 2)
1560		return 1;
1561
1562	return 0;
1563}
1564
1565/**
1566 * ppc440spe_adma_device_estimate - estimate the efficiency of processing
1567 *	the operation given on this channel. It's assumed that 'chan' is
1568 *	capable to process 'cap' type of operation.
1569 * @chan: channel to use
1570 * @cap: type of transaction
1571 * @dst_lst: array of destination pointers
1572 * @dst_cnt: number of destination operands
1573 * @src_lst: array of source pointers
1574 * @src_cnt: number of source operands
1575 * @src_sz: size of each source operand
1576 */
1577static int ppc440spe_adma_estimate(struct dma_chan *chan,
1578	enum dma_transaction_type cap, struct page **dst_lst, int dst_cnt,
1579	struct page **src_lst, int src_cnt, size_t src_sz)
1580{
1581	int ef = 1;
1582
1583	if (cap == DMA_PQ || cap == DMA_PQ_VAL) {
1584		/* If RAID-6 capabilities were not activated don't try
1585		 * to use them
1586		 */
1587		if (unlikely(!ppc440spe_r6_enabled))
1588			return -1;
1589	}
1590	/*  In the current implementation of ppc440spe ADMA driver it
1591	 * makes sense to pick out only pq case, because it may be
1592	 * processed:
1593	 * (1) either using Biskup method on DMA2;
1594	 * (2) or on DMA0/1.
1595	 *  Thus we give a favour to (1) if the sources are suitable;
1596	 * else let it be processed on one of the DMA0/1 engines.
1597	 *  In the sum_product case where destination is also the
1598	 * source process it on DMA0/1 only.
1599	 */
1600	if (cap == DMA_PQ && chan->chan_id == PPC440SPE_XOR_ID) {
1601
1602		if (dst_cnt == 1 && src_cnt == 2 && dst_lst[0] == src_lst[1])
1603			ef = 0; /* sum_product case, process on DMA0/1 */
1604		else if (ppc440spe_can_rxor(src_lst, src_cnt, src_sz))
1605			ef = 3; /* override (DMA0/1 + idle) */
1606		else
1607			ef = 0; /* can't process on DMA2 if !rxor */
1608	}
1609
1610	/* channel idleness increases the priority */
1611	if (likely(ef) &&
1612	    !ppc440spe_chan_is_busy(to_ppc440spe_adma_chan(chan)))
1613		ef++;
1614
1615	return ef;
1616}
1617
1618struct dma_chan *
1619ppc440spe_async_tx_find_best_channel(enum dma_transaction_type cap,
1620	struct page **dst_lst, int dst_cnt, struct page **src_lst,
1621	int src_cnt, size_t src_sz)
1622{
1623	struct dma_chan *best_chan = NULL;
1624	struct ppc_dma_chan_ref *ref;
1625	int best_rank = -1;
1626
1627	if (unlikely(!src_sz))
1628		return NULL;
1629	if (src_sz > PAGE_SIZE) {
1630		/*
1631		 * should a user of the api ever pass > PAGE_SIZE requests
1632		 * we sort out cases where temporary page-sized buffers
1633		 * are used.
1634		 */
1635		switch (cap) {
1636		case DMA_PQ:
1637			if (src_cnt == 1 && dst_lst[1] == src_lst[0])
1638				return NULL;
1639			if (src_cnt == 2 && dst_lst[1] == src_lst[1])
1640				return NULL;
1641			break;
1642		case DMA_PQ_VAL:
1643		case DMA_XOR_VAL:
1644			return NULL;
1645		default:
1646			break;
1647		}
1648	}
1649
1650	list_for_each_entry(ref, &ppc440spe_adma_chan_list, node) {
1651		if (dma_has_cap(cap, ref->chan->device->cap_mask)) {
1652			int rank;
1653
1654			rank = ppc440spe_adma_estimate(ref->chan, cap, dst_lst,
1655					dst_cnt, src_lst, src_cnt, src_sz);
1656			if (rank > best_rank) {
1657				best_rank = rank;
1658				best_chan = ref->chan;
1659			}
1660		}
1661	}
1662
1663	return best_chan;
1664}
1665EXPORT_SYMBOL_GPL(ppc440spe_async_tx_find_best_channel);
1666
1667/**
1668 * ppc440spe_get_group_entry - get group entry with index idx
1669 * @tdesc: is the last allocated slot in the group.
1670 */
1671static struct ppc440spe_adma_desc_slot *
1672ppc440spe_get_group_entry(struct ppc440spe_adma_desc_slot *tdesc, u32 entry_idx)
1673{
1674	struct ppc440spe_adma_desc_slot *iter = tdesc->group_head;
1675	int i = 0;
1676
1677	if (entry_idx < 0 || entry_idx >= (tdesc->src_cnt + tdesc->dst_cnt)) {
1678		printk("%s: entry_idx %d, src_cnt %d, dst_cnt %d\n",
1679			__func__, entry_idx, tdesc->src_cnt, tdesc->dst_cnt);
1680		BUG();
1681	}
1682
1683	list_for_each_entry(iter, &tdesc->group_list, chain_node) {
1684		if (i++ == entry_idx)
1685			break;
1686	}
1687	return iter;
1688}
1689
1690/**
1691 * ppc440spe_adma_free_slots - flags descriptor slots for reuse
1692 * @slot: Slot to free
1693 * Caller must hold &ppc440spe_chan->lock while calling this function
1694 */
1695static void ppc440spe_adma_free_slots(struct ppc440spe_adma_desc_slot *slot,
1696				      struct ppc440spe_adma_chan *chan)
1697{
1698	int stride = slot->slots_per_op;
1699
1700	while (stride--) {
1701		slot->slots_per_op = 0;
1702		slot = list_entry(slot->slot_node.next,
1703				struct ppc440spe_adma_desc_slot,
1704				slot_node);
1705	}
1706}
1707
1708static void ppc440spe_adma_unmap(struct ppc440spe_adma_chan *chan,
1709				 struct ppc440spe_adma_desc_slot *desc)
1710{
1711	u32 src_cnt, dst_cnt;
1712	dma_addr_t addr;
1713
1714	/*
1715	 * get the number of sources & destination
1716	 * included in this descriptor and unmap
1717	 * them all
1718	 */
1719	src_cnt = ppc440spe_desc_get_src_num(desc, chan);
1720	dst_cnt = ppc440spe_desc_get_dst_num(desc, chan);
1721
1722	/* unmap destinations */
1723	if (!(desc->async_tx.flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
1724		while (dst_cnt--) {
1725			addr = ppc440spe_desc_get_dest_addr(
1726				desc, chan, dst_cnt);
1727			dma_unmap_page(chan->device->dev,
1728					addr, desc->unmap_len,
1729					DMA_FROM_DEVICE);
1730		}
1731	}
1732
1733	/* unmap sources */
1734	if (!(desc->async_tx.flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
1735		while (src_cnt--) {
1736			addr = ppc440spe_desc_get_src_addr(
1737				desc, chan, src_cnt);
1738			dma_unmap_page(chan->device->dev,
1739					addr, desc->unmap_len,
1740					DMA_TO_DEVICE);
1741		}
1742	}
1743}
1744
1745/**
1746 * ppc440spe_adma_run_tx_complete_actions - call functions to be called
1747 * upon completion
1748 */
1749static dma_cookie_t ppc440spe_adma_run_tx_complete_actions(
1750		struct ppc440spe_adma_desc_slot *desc,
1751		struct ppc440spe_adma_chan *chan,
1752		dma_cookie_t cookie)
1753{
1754	int i;
1755
1756	BUG_ON(desc->async_tx.cookie < 0);
1757	if (desc->async_tx.cookie > 0) {
1758		cookie = desc->async_tx.cookie;
1759		desc->async_tx.cookie = 0;
1760
1761		/* call the callback (must not sleep or submit new
1762		 * operations to this channel)
1763		 */
1764		if (desc->async_tx.callback)
1765			desc->async_tx.callback(
1766				desc->async_tx.callback_param);
1767
1768		/* unmap dma addresses
1769		 * (unmap_single vs unmap_page?)
1770		 *
1771		 * actually, ppc's dma_unmap_page() functions are empty, so
1772		 * the following code is just for the sake of completeness
1773		 */
1774		if (chan && chan->needs_unmap && desc->group_head &&
1775		     desc->unmap_len) {
1776			struct ppc440spe_adma_desc_slot *unmap =
1777							desc->group_head;
1778			/* assume 1 slot per op always */
1779			u32 slot_count = unmap->slot_cnt;
1780
1781			/* Run through the group list and unmap addresses */
1782			for (i = 0; i < slot_count; i++) {
1783				BUG_ON(!unmap);
1784				ppc440spe_adma_unmap(chan, unmap);
1785				unmap = unmap->hw_next;
1786			}
1787		}
1788	}
1789
1790	/* run dependent operations */
1791	dma_run_dependencies(&desc->async_tx);
1792
1793	return cookie;
1794}
1795
1796/**
1797 * ppc440spe_adma_clean_slot - clean up CDB slot (if ack is set)
1798 */
1799static int ppc440spe_adma_clean_slot(struct ppc440spe_adma_desc_slot *desc,
1800		struct ppc440spe_adma_chan *chan)
1801{
1802	/* the client is allowed to attach dependent operations
1803	 * until 'ack' is set
1804	 */
1805	if (!async_tx_test_ack(&desc->async_tx))
1806		return 0;
1807
1808	/* leave the last descriptor in the chain
1809	 * so we can append to it
1810	 */
1811	if (list_is_last(&desc->chain_node, &chan->chain) ||
1812	    desc->phys == ppc440spe_chan_get_current_descriptor(chan))
1813		return 1;
1814
1815	if (chan->device->id != PPC440SPE_XOR_ID) {
1816		/* our DMA interrupt handler clears opc field of
1817		 * each processed descriptor. For all types of
1818		 * operations except for ZeroSum we do not actually
1819		 * need ack from the interrupt handler. ZeroSum is a
1820		 * special case since the result of this operation
1821		 * is available from the handler only, so if we see
1822		 * such type of descriptor (which is unprocessed yet)
1823		 * then leave it in chain.
1824		 */
1825		struct dma_cdb *cdb = desc->hw_desc;
1826		if (cdb->opc == DMA_CDB_OPC_DCHECK128)
1827			return 1;
1828	}
1829
1830	dev_dbg(chan->device->common.dev, "\tfree slot %llx: %d stride: %d\n",
1831		desc->phys, desc->idx, desc->slots_per_op);
1832
1833	list_del(&desc->chain_node);
1834	ppc440spe_adma_free_slots(desc, chan);
1835	return 0;
1836}
1837
1838/**
1839 * __ppc440spe_adma_slot_cleanup - this is the common clean-up routine
1840 *	which runs through the channel CDBs list until reach the descriptor
1841 *	currently processed. When routine determines that all CDBs of group
1842 *	are completed then corresponding callbacks (if any) are called and slots
1843 *	are freed.
1844 */
1845static void __ppc440spe_adma_slot_cleanup(struct ppc440spe_adma_chan *chan)
1846{
1847	struct ppc440spe_adma_desc_slot *iter, *_iter, *group_start = NULL;
1848	dma_cookie_t cookie = 0;
1849	u32 current_desc = ppc440spe_chan_get_current_descriptor(chan);
1850	int busy = ppc440spe_chan_is_busy(chan);
1851	int seen_current = 0, slot_cnt = 0, slots_per_op = 0;
1852
1853	dev_dbg(chan->device->common.dev, "ppc440spe adma%d: %s\n",
1854		chan->device->id, __func__);
1855
1856	if (!current_desc) {
1857		/*  There were no transactions yet, so
1858		 * nothing to clean
1859		 */
1860		return;
1861	}
1862
1863	/* free completed slots from the chain starting with
1864	 * the oldest descriptor
1865	 */
1866	list_for_each_entry_safe(iter, _iter, &chan->chain,
1867					chain_node) {
1868		dev_dbg(chan->device->common.dev, "\tcookie: %d slot: %d "
1869		    "busy: %d this_desc: %#llx next_desc: %#x "
1870		    "cur: %#x ack: %d\n",
1871		    iter->async_tx.cookie, iter->idx, busy, iter->phys,
1872		    ppc440spe_desc_get_link(iter, chan), current_desc,
1873		    async_tx_test_ack(&iter->async_tx));
1874		prefetch(_iter);
1875		prefetch(&_iter->async_tx);
1876
1877		/* do not advance past the current descriptor loaded into the
1878		 * hardware channel,subsequent descriptors are either in process
1879		 * or have not been submitted
1880		 */
1881		if (seen_current)
1882			break;
1883
1884		/* stop the search if we reach the current descriptor and the
1885		 * channel is busy, or if it appears that the current descriptor
1886		 * needs to be re-read (i.e. has been appended to)
1887		 */
1888		if (iter->phys == current_desc) {
1889			BUG_ON(seen_current++);
1890			if (busy || ppc440spe_desc_get_link(iter, chan)) {
1891				/* not all descriptors of the group have
1892				 * been completed; exit.
1893				 */
1894				break;
1895			}
1896		}
1897
1898		/* detect the start of a group transaction */
1899		if (!slot_cnt && !slots_per_op) {
1900			slot_cnt = iter->slot_cnt;
1901			slots_per_op = iter->slots_per_op;
1902			if (slot_cnt <= slots_per_op) {
1903				slot_cnt = 0;
1904				slots_per_op = 0;
1905			}
1906		}
1907
1908		if (slot_cnt) {
1909			if (!group_start)
1910				group_start = iter;
1911			slot_cnt -= slots_per_op;
1912		}
1913
1914		/* all the members of a group are complete */
1915		if (slots_per_op != 0 && slot_cnt == 0) {
1916			struct ppc440spe_adma_desc_slot *grp_iter, *_grp_iter;
1917			int end_of_chain = 0;
1918
1919			/* clean up the group */
1920			slot_cnt = group_start->slot_cnt;
1921			grp_iter = group_start;
1922			list_for_each_entry_safe_from(grp_iter, _grp_iter,
1923				&chan->chain, chain_node) {
1924
1925				cookie = ppc440spe_adma_run_tx_complete_actions(
1926					grp_iter, chan, cookie);
1927
1928				slot_cnt -= slots_per_op;
1929				end_of_chain = ppc440spe_adma_clean_slot(
1930				    grp_iter, chan);
1931				if (end_of_chain && slot_cnt) {
1932					/* Should wait for ZeroSum completion */
1933					if (cookie > 0)
1934						chan->common.completed_cookie = cookie;
1935					return;
1936				}
1937
1938				if (slot_cnt == 0 || end_of_chain)
1939					break;
1940			}
1941
1942			/* the group should be complete at this point */
1943			BUG_ON(slot_cnt);
1944
1945			slots_per_op = 0;
1946			group_start = NULL;
1947			if (end_of_chain)
1948				break;
1949			else
1950				continue;
1951		} else if (slots_per_op) /* wait for group completion */
1952			continue;
1953
1954		cookie = ppc440spe_adma_run_tx_complete_actions(iter, chan,
1955		    cookie);
1956
1957		if (ppc440spe_adma_clean_slot(iter, chan))
1958			break;
1959	}
1960
1961	BUG_ON(!seen_current);
1962
1963	if (cookie > 0) {
1964		chan->common.completed_cookie = cookie;
1965		pr_debug("\tcompleted cookie %d\n", cookie);
1966	}
1967
1968}
1969
1970/**
1971 * ppc440spe_adma_tasklet - clean up watch-dog initiator
1972 */
1973static void ppc440spe_adma_tasklet(unsigned long data)
1974{
1975	struct ppc440spe_adma_chan *chan = (struct ppc440spe_adma_chan *) data;
1976
1977	spin_lock_nested(&chan->lock, SINGLE_DEPTH_NESTING);
1978	__ppc440spe_adma_slot_cleanup(chan);
1979	spin_unlock(&chan->lock);
1980}
1981
1982/**
1983 * ppc440spe_adma_slot_cleanup - clean up scheduled initiator
1984 */
1985static void ppc440spe_adma_slot_cleanup(struct ppc440spe_adma_chan *chan)
1986{
1987	spin_lock_bh(&chan->lock);
1988	__ppc440spe_adma_slot_cleanup(chan);
1989	spin_unlock_bh(&chan->lock);
1990}
1991
1992/**
1993 * ppc440spe_adma_alloc_slots - allocate free slots (if any)
1994 */
1995static struct ppc440spe_adma_desc_slot *ppc440spe_adma_alloc_slots(
1996		struct ppc440spe_adma_chan *chan, int num_slots,
1997		int slots_per_op)
1998{
1999	struct ppc440spe_adma_desc_slot *iter = NULL, *_iter;
2000	struct ppc440spe_adma_desc_slot *alloc_start = NULL;
2001	struct list_head chain = LIST_HEAD_INIT(chain);
2002	int slots_found, retry = 0;
2003
2004
2005	BUG_ON(!num_slots || !slots_per_op);
2006	/* start search from the last allocated descrtiptor
2007	 * if a contiguous allocation can not be found start searching
2008	 * from the beginning of the list
2009	 */
2010retry:
2011	slots_found = 0;
2012	if (retry == 0)
2013		iter = chan->last_used;
2014	else
2015		iter = list_entry(&chan->all_slots,
2016				  struct ppc440spe_adma_desc_slot,
2017				  slot_node);
2018	list_for_each_entry_safe_continue(iter, _iter, &chan->all_slots,
2019	    slot_node) {
2020		prefetch(_iter);
2021		prefetch(&_iter->async_tx);
2022		if (iter->slots_per_op) {
2023			slots_found = 0;
2024			continue;
2025		}
2026
2027		/* start the allocation if the slot is correctly aligned */
2028		if (!slots_found++)
2029			alloc_start = iter;
2030
2031		if (slots_found == num_slots) {
2032			struct ppc440spe_adma_desc_slot *alloc_tail = NULL;
2033			struct ppc440spe_adma_desc_slot *last_used = NULL;
2034
2035			iter = alloc_start;
2036			while (num_slots) {
2037				int i;
2038				/* pre-ack all but the last descriptor */
2039				if (num_slots != slots_per_op)
2040					async_tx_ack(&iter->async_tx);
2041
2042				list_add_tail(&iter->chain_node, &chain);
2043				alloc_tail = iter;
2044				iter->async_tx.cookie = 0;
2045				iter->hw_next = NULL;
2046				iter->flags = 0;
2047				iter->slot_cnt = num_slots;
2048				iter->xor_check_result = NULL;
2049				for (i = 0; i < slots_per_op; i++) {
2050					iter->slots_per_op = slots_per_op - i;
2051					last_used = iter;
2052					iter = list_entry(iter->slot_node.next,
2053						struct ppc440spe_adma_desc_slot,
2054						slot_node);
2055				}
2056				num_slots -= slots_per_op;
2057			}
2058			alloc_tail->group_head = alloc_start;
2059			alloc_tail->async_tx.cookie = -EBUSY;
2060			list_splice(&chain, &alloc_tail->group_list);
2061			chan->last_used = last_used;
2062			return alloc_tail;
2063		}
2064	}
2065	if (!retry++)
2066		goto retry;
2067
2068	/* try to free some slots if the allocation fails */
2069	tasklet_schedule(&chan->irq_tasklet);
2070	return NULL;
2071}
2072
2073/**
2074 * ppc440spe_adma_alloc_chan_resources -  allocate pools for CDB slots
2075 */
2076static int ppc440spe_adma_alloc_chan_resources(struct dma_chan *chan)
2077{
2078	struct ppc440spe_adma_chan *ppc440spe_chan;
2079	struct ppc440spe_adma_desc_slot *slot = NULL;
2080	char *hw_desc;
2081	int i, db_sz;
2082	int init;
2083
2084	ppc440spe_chan = to_ppc440spe_adma_chan(chan);
2085	init = ppc440spe_chan->slots_allocated ? 0 : 1;
2086	chan->chan_id = ppc440spe_chan->device->id;
2087
2088	/* Allocate descriptor slots */
2089	i = ppc440spe_chan->slots_allocated;
2090	if (ppc440spe_chan->device->id != PPC440SPE_XOR_ID)
2091		db_sz = sizeof(struct dma_cdb);
2092	else
2093		db_sz = sizeof(struct xor_cb);
2094
2095	for (; i < (ppc440spe_chan->device->pool_size / db_sz); i++) {
2096		slot = kzalloc(sizeof(struct ppc440spe_adma_desc_slot),
2097			       GFP_KERNEL);
2098		if (!slot) {
2099			printk(KERN_INFO "SPE ADMA Channel only initialized"
2100				" %d descriptor slots", i--);
2101			break;
2102		}
2103
2104		hw_desc = (char *) ppc440spe_chan->device->dma_desc_pool_virt;
2105		slot->hw_desc = (void *) &hw_desc[i * db_sz];
2106		dma_async_tx_descriptor_init(&slot->async_tx, chan);
2107		slot->async_tx.tx_submit = ppc440spe_adma_tx_submit;
2108		INIT_LIST_HEAD(&slot->chain_node);
2109		INIT_LIST_HEAD(&slot->slot_node);
2110		INIT_LIST_HEAD(&slot->group_list);
2111		slot->phys = ppc440spe_chan->device->dma_desc_pool + i * db_sz;
2112		slot->idx = i;
2113
2114		spin_lock_bh(&ppc440spe_chan->lock);
2115		ppc440spe_chan->slots_allocated++;
2116		list_add_tail(&slot->slot_node, &ppc440spe_chan->all_slots);
2117		spin_unlock_bh(&ppc440spe_chan->lock);
2118	}
2119
2120	if (i && !ppc440spe_chan->last_used) {
2121		ppc440spe_chan->last_used =
2122			list_entry(ppc440spe_chan->all_slots.next,
2123				struct ppc440spe_adma_desc_slot,
2124				slot_node);
2125	}
2126
2127	dev_dbg(ppc440spe_chan->device->common.dev,
2128		"ppc440spe adma%d: allocated %d descriptor slots\n",
2129		ppc440spe_chan->device->id, i);
2130
2131	/* initialize the channel and the chain with a null operation */
2132	if (init) {
2133		switch (ppc440spe_chan->device->id) {
2134		case PPC440SPE_DMA0_ID:
2135		case PPC440SPE_DMA1_ID:
2136			ppc440spe_chan->hw_chain_inited = 0;
2137			/* Use WXOR for self-testing */
2138			if (!ppc440spe_r6_tchan)
2139				ppc440spe_r6_tchan = ppc440spe_chan;
2140			break;
2141		case PPC440SPE_XOR_ID:
2142			ppc440spe_chan_start_null_xor(ppc440spe_chan);
2143			break;
2144		default:
2145			BUG();
2146		}
2147		ppc440spe_chan->needs_unmap = 1;
2148	}
2149
2150	return (i > 0) ? i : -ENOMEM;
2151}
2152
2153/**
2154 * ppc440spe_rxor_set_region_data -
2155 */
2156static void ppc440spe_rxor_set_region(struct ppc440spe_adma_desc_slot *desc,
2157	u8 xor_arg_no, u32 mask)
2158{
2159	struct xor_cb *xcb = desc->hw_desc;
2160
2161	xcb->ops[xor_arg_no].h |= mask;
2162}
2163
2164/**
2165 * ppc440spe_rxor_set_src -
2166 */
2167static void ppc440spe_rxor_set_src(struct ppc440spe_adma_desc_slot *desc,
2168	u8 xor_arg_no, dma_addr_t addr)
2169{
2170	struct xor_cb *xcb = desc->hw_desc;
2171
2172	xcb->ops[xor_arg_no].h |= DMA_CUED_XOR_BASE;
2173	xcb->ops[xor_arg_no].l = addr;
2174}
2175
2176/**
2177 * ppc440spe_rxor_set_mult -
2178 */
2179static void ppc440spe_rxor_set_mult(struct ppc440spe_adma_desc_slot *desc,
2180	u8 xor_arg_no, u8 idx, u8 mult)
2181{
2182	struct xor_cb *xcb = desc->hw_desc;
2183
2184	xcb->ops[xor_arg_no].h |= mult << (DMA_CUED_MULT1_OFF + idx * 8);
2185}
2186
2187/**
2188 * ppc440spe_adma_check_threshold - append CDBs to h/w chain if threshold
2189 *	has been achieved
2190 */
2191static void ppc440spe_adma_check_threshold(struct ppc440spe_adma_chan *chan)
2192{
2193	dev_dbg(chan->device->common.dev, "ppc440spe adma%d: pending: %d\n",
2194		chan->device->id, chan->pending);
2195
2196	if (chan->pending >= PPC440SPE_ADMA_THRESHOLD) {
2197		chan->pending = 0;
2198		ppc440spe_chan_append(chan);
2199	}
2200}
2201
2202/**
2203 * ppc440spe_adma_tx_submit - submit new descriptor group to the channel
2204 *	(it's not necessary that descriptors will be submitted to the h/w
2205 *	chains too right now)
2206 */
2207static dma_cookie_t ppc440spe_adma_tx_submit(struct dma_async_tx_descriptor *tx)
2208{
2209	struct ppc440spe_adma_desc_slot *sw_desc;
2210	struct ppc440spe_adma_chan *chan = to_ppc440spe_adma_chan(tx->chan);
2211	struct ppc440spe_adma_desc_slot *group_start, *old_chain_tail;
2212	int slot_cnt;
2213	int slots_per_op;
2214	dma_cookie_t cookie;
2215
2216	sw_desc = tx_to_ppc440spe_adma_slot(tx);
2217
2218	group_start = sw_desc->group_head;
2219	slot_cnt = group_start->slot_cnt;
2220	slots_per_op = group_start->slots_per_op;
2221
2222	spin_lock_bh(&chan->lock);
2223	cookie = dma_cookie_assign(tx);
2224
2225	if (unlikely(list_empty(&chan->chain))) {
2226		/* first peer */
2227		list_splice_init(&sw_desc->group_list, &chan->chain);
2228		chan_first_cdb[chan->device->id] = group_start;
2229	} else {
2230		/* isn't first peer, bind CDBs to chain */
2231		old_chain_tail = list_entry(chan->chain.prev,
2232					struct ppc440spe_adma_desc_slot,
2233					chain_node);
2234		list_splice_init(&sw_desc->group_list,
2235		    &old_chain_tail->chain_node);
2236		/* fix up the hardware chain */
2237		ppc440spe_desc_set_link(chan, old_chain_tail, group_start);
2238	}
2239
2240	/* increment the pending count by the number of operations */
2241	chan->pending += slot_cnt / slots_per_op;
2242	ppc440spe_adma_check_threshold(chan);
2243	spin_unlock_bh(&chan->lock);
2244
2245	dev_dbg(chan->device->common.dev,
2246		"ppc440spe adma%d: %s cookie: %d slot: %d tx %p\n",
2247		chan->device->id, __func__,
2248		sw_desc->async_tx.cookie, sw_desc->idx, sw_desc);
2249
2250	return cookie;
2251}
2252
2253/**
2254 * ppc440spe_adma_prep_dma_interrupt - prepare CDB for a pseudo DMA operation
2255 */
2256static struct dma_async_tx_descriptor *ppc440spe_adma_prep_dma_interrupt(
2257		struct dma_chan *chan, unsigned long flags)
2258{
2259	struct ppc440spe_adma_chan *ppc440spe_chan;
2260	struct ppc440spe_adma_desc_slot *sw_desc, *group_start;
2261	int slot_cnt, slots_per_op;
2262
2263	ppc440spe_chan = to_ppc440spe_adma_chan(chan);
2264
2265	dev_dbg(ppc440spe_chan->device->common.dev,
2266		"ppc440spe adma%d: %s\n", ppc440spe_chan->device->id,
2267		__func__);
2268
2269	spin_lock_bh(&ppc440spe_chan->lock);
2270	slot_cnt = slots_per_op = 1;
2271	sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt,
2272			slots_per_op);
2273	if (sw_desc) {
2274		group_start = sw_desc->group_head;
2275		ppc440spe_desc_init_interrupt(group_start, ppc440spe_chan);
2276		group_start->unmap_len = 0;
2277		sw_desc->async_tx.flags = flags;
2278	}
2279	spin_unlock_bh(&ppc440spe_chan->lock);
2280
2281	return sw_desc ? &sw_desc->async_tx : NULL;
2282}
2283
2284/**
2285 * ppc440spe_adma_prep_dma_memcpy - prepare CDB for a MEMCPY operation
2286 */
2287static struct dma_async_tx_descriptor *ppc440spe_adma_prep_dma_memcpy(
2288		struct dma_chan *chan, dma_addr_t dma_dest,
2289		dma_addr_t dma_src, size_t len, unsigned long flags)
2290{
2291	struct ppc440spe_adma_chan *ppc440spe_chan;
2292	struct ppc440spe_adma_desc_slot *sw_desc, *group_start;
2293	int slot_cnt, slots_per_op;
2294
2295	ppc440spe_chan = to_ppc440spe_adma_chan(chan);
2296
2297	if (unlikely(!len))
2298		return NULL;
2299
2300	BUG_ON(len > PPC440SPE_ADMA_DMA_MAX_BYTE_COUNT);
2301
2302	spin_lock_bh(&ppc440spe_chan->lock);
2303
2304	dev_dbg(ppc440spe_chan->device->common.dev,
2305		"ppc440spe adma%d: %s len: %u int_en %d\n",
2306		ppc440spe_chan->device->id, __func__, len,
2307		flags & DMA_PREP_INTERRUPT ? 1 : 0);
2308	slot_cnt = slots_per_op = 1;
2309	sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt,
2310		slots_per_op);
2311	if (sw_desc) {
2312		group_start = sw_desc->group_head;
2313		ppc440spe_desc_init_memcpy(group_start, flags);
2314		ppc440spe_adma_set_dest(group_start, dma_dest, 0);
2315		ppc440spe_adma_memcpy_xor_set_src(group_start, dma_src, 0);
2316		ppc440spe_desc_set_byte_count(group_start, ppc440spe_chan, len);
2317		sw_desc->unmap_len = len;
2318		sw_desc->async_tx.flags = flags;
2319	}
2320	spin_unlock_bh(&ppc440spe_chan->lock);
2321
2322	return sw_desc ? &sw_desc->async_tx : NULL;
2323}
2324
2325/**
2326 * ppc440spe_adma_prep_dma_memset - prepare CDB for a MEMSET operation
2327 */
2328static struct dma_async_tx_descriptor *ppc440spe_adma_prep_dma_memset(
2329		struct dma_chan *chan, dma_addr_t dma_dest, int value,
2330		size_t len, unsigned long flags)
2331{
2332	struct ppc440spe_adma_chan *ppc440spe_chan;
2333	struct ppc440spe_adma_desc_slot *sw_desc, *group_start;
2334	int slot_cnt, slots_per_op;
2335
2336	ppc440spe_chan = to_ppc440spe_adma_chan(chan);
2337
2338	if (unlikely(!len))
2339		return NULL;
2340
2341	BUG_ON(len > PPC440SPE_ADMA_DMA_MAX_BYTE_COUNT);
2342
2343	spin_lock_bh(&ppc440spe_chan->lock);
2344
2345	dev_dbg(ppc440spe_chan->device->common.dev,
2346		"ppc440spe adma%d: %s cal: %u len: %u int_en %d\n",
2347		ppc440spe_chan->device->id, __func__, value, len,
2348		flags & DMA_PREP_INTERRUPT ? 1 : 0);
2349
2350	slot_cnt = slots_per_op = 1;
2351	sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt,
2352		slots_per_op);
2353	if (sw_desc) {
2354		group_start = sw_desc->group_head;
2355		ppc440spe_desc_init_memset(group_start, value, flags);
2356		ppc440spe_adma_set_dest(group_start, dma_dest, 0);
2357		ppc440spe_desc_set_byte_count(group_start, ppc440spe_chan, len);
2358		sw_desc->unmap_len = len;
2359		sw_desc->async_tx.flags = flags;
2360	}
2361	spin_unlock_bh(&ppc440spe_chan->lock);
2362
2363	return sw_desc ? &sw_desc->async_tx : NULL;
2364}
2365
2366/**
2367 * ppc440spe_adma_prep_dma_xor - prepare CDB for a XOR operation
2368 */
2369static struct dma_async_tx_descriptor *ppc440spe_adma_prep_dma_xor(
2370		struct dma_chan *chan, dma_addr_t dma_dest,
2371		dma_addr_t *dma_src, u32 src_cnt, size_t len,
2372		unsigned long flags)
2373{
2374	struct ppc440spe_adma_chan *ppc440spe_chan;
2375	struct ppc440spe_adma_desc_slot *sw_desc, *group_start;
2376	int slot_cnt, slots_per_op;
2377
2378	ppc440spe_chan = to_ppc440spe_adma_chan(chan);
2379
2380	ADMA_LL_DBG(prep_dma_xor_dbg(ppc440spe_chan->device->id,
2381				     dma_dest, dma_src, src_cnt));
2382	if (unlikely(!len))
2383		return NULL;
2384	BUG_ON(len > PPC440SPE_ADMA_XOR_MAX_BYTE_COUNT);
2385
2386	dev_dbg(ppc440spe_chan->device->common.dev,
2387		"ppc440spe adma%d: %s src_cnt: %d len: %u int_en: %d\n",
2388		ppc440spe_chan->device->id, __func__, src_cnt, len,
2389		flags & DMA_PREP_INTERRUPT ? 1 : 0);
2390
2391	spin_lock_bh(&ppc440spe_chan->lock);
2392	slot_cnt = ppc440spe_chan_xor_slot_count(len, src_cnt, &slots_per_op);
2393	sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt,
2394			slots_per_op);
2395	if (sw_desc) {
2396		group_start = sw_desc->group_head;
2397		ppc440spe_desc_init_xor(group_start, src_cnt, flags);
2398		ppc440spe_adma_set_dest(group_start, dma_dest, 0);
2399		while (src_cnt--)
2400			ppc440spe_adma_memcpy_xor_set_src(group_start,
2401				dma_src[src_cnt], src_cnt);
2402		ppc440spe_desc_set_byte_count(group_start, ppc440spe_chan, len);
2403		sw_desc->unmap_len = len;
2404		sw_desc->async_tx.flags = flags;
2405	}
2406	spin_unlock_bh(&ppc440spe_chan->lock);
2407
2408	return sw_desc ? &sw_desc->async_tx : NULL;
2409}
2410
2411static inline void
2412ppc440spe_desc_set_xor_src_cnt(struct ppc440spe_adma_desc_slot *desc,
2413				int src_cnt);
2414static void ppc440spe_init_rxor_cursor(struct ppc440spe_rxor *cursor);
2415
2416/**
2417 * ppc440spe_adma_init_dma2rxor_slot -
2418 */
2419static void ppc440spe_adma_init_dma2rxor_slot(
2420		struct ppc440spe_adma_desc_slot *desc,
2421		dma_addr_t *src, int src_cnt)
2422{
2423	int i;
2424
2425	/* initialize CDB */
2426	for (i = 0; i < src_cnt; i++) {
2427		ppc440spe_adma_dma2rxor_prep_src(desc, &desc->rxor_cursor, i,
2428						 desc->src_cnt, (u32)src[i]);
2429	}
2430}
2431
2432/**
2433 * ppc440spe_dma01_prep_mult -
2434 * for Q operation where destination is also the source
2435 */
2436static struct ppc440spe_adma_desc_slot *ppc440spe_dma01_prep_mult(
2437		struct ppc440spe_adma_chan *ppc440spe_chan,
2438		dma_addr_t *dst, int dst_cnt, dma_addr_t *src, int src_cnt,
2439		const unsigned char *scf, size_t len, unsigned long flags)
2440{
2441	struct ppc440spe_adma_desc_slot *sw_desc = NULL;
2442	unsigned long op = 0;
2443	int slot_cnt;
2444
2445	set_bit(PPC440SPE_DESC_WXOR, &op);
2446	slot_cnt = 2;
2447
2448	spin_lock_bh(&ppc440spe_chan->lock);
2449
2450	/* use WXOR, each descriptor occupies one slot */
2451	sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt, 1);
2452	if (sw_desc) {
2453		struct ppc440spe_adma_chan *chan;
2454		struct ppc440spe_adma_desc_slot *iter;
2455		struct dma_cdb *hw_desc;
2456
2457		chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan);
2458		set_bits(op, &sw_desc->flags);
2459		sw_desc->src_cnt = src_cnt;
2460		sw_desc->dst_cnt = dst_cnt;
2461		/* First descriptor, zero data in the destination and copy it
2462		 * to q page using MULTICAST transfer.
2463		 */
2464		iter = list_first_entry(&sw_desc->group_list,
2465					struct ppc440spe_adma_desc_slot,
2466					chain_node);
2467		memset(iter->hw_desc, 0, sizeof(struct dma_cdb));
2468		/* set 'next' pointer */
2469		iter->hw_next = list_entry(iter->chain_node.next,
2470					   struct ppc440spe_adma_desc_slot,
2471					   chain_node);
2472		clear_bit(PPC440SPE_DESC_INT, &iter->flags);
2473		hw_desc = iter->hw_desc;
2474		hw_desc->opc = DMA_CDB_OPC_MULTICAST;
2475
2476		ppc440spe_desc_set_dest_addr(iter, chan,
2477					     DMA_CUED_XOR_BASE, dst[0], 0);
2478		ppc440spe_desc_set_dest_addr(iter, chan, 0, dst[1], 1);
2479		ppc440spe_desc_set_src_addr(iter, chan, 0, DMA_CUED_XOR_HB,
2480					    src[0]);
2481		ppc440spe_desc_set_byte_count(iter, ppc440spe_chan, len);
2482		iter->unmap_len = len;
2483
2484		/*
2485		 * Second descriptor, multiply data from the q page
2486		 * and store the result in real destination.
2487		 */
2488		iter = list_first_entry(&iter->chain_node,
2489					struct ppc440spe_adma_desc_slot,
2490					chain_node);
2491		memset(iter->hw_desc, 0, sizeof(struct dma_cdb));
2492		iter->hw_next = NULL;
2493		if (flags & DMA_PREP_INTERRUPT)
2494			set_bit(PPC440SPE_DESC_INT, &iter->flags);
2495		else
2496			clear_bit(PPC440SPE_DESC_INT, &iter->flags);
2497
2498		hw_desc = iter->hw_desc;
2499		hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
2500		ppc440spe_desc_set_src_addr(iter, chan, 0,
2501					    DMA_CUED_XOR_HB, dst[1]);
2502		ppc440spe_desc_set_dest_addr(iter, chan,
2503					     DMA_CUED_XOR_BASE, dst[0], 0);
2504
2505		ppc440spe_desc_set_src_mult(iter, chan, DMA_CUED_MULT1_OFF,
2506					    DMA_CDB_SG_DST1, scf[0]);
2507		ppc440spe_desc_set_byte_count(iter, ppc440spe_chan, len);
2508		iter->unmap_len = len;
2509		sw_desc->async_tx.flags = flags;
2510	}
2511
2512	spin_unlock_bh(&ppc440spe_chan->lock);
2513
2514	return sw_desc;
2515}
2516
2517/**
2518 * ppc440spe_dma01_prep_sum_product -
2519 * Dx = A*(P+Pxy) + B*(Q+Qxy) operation where destination is also
2520 * the source.
2521 */
2522static struct ppc440spe_adma_desc_slot *ppc440spe_dma01_prep_sum_product(
2523		struct ppc440spe_adma_chan *ppc440spe_chan,
2524		dma_addr_t *dst, dma_addr_t *src, int src_cnt,
2525		const unsigned char *scf, size_t len, unsigned long flags)
2526{
2527	struct ppc440spe_adma_desc_slot *sw_desc = NULL;
2528	unsigned long op = 0;
2529	int slot_cnt;
2530
2531	set_bit(PPC440SPE_DESC_WXOR, &op);
2532	slot_cnt = 3;
2533
2534	spin_lock_bh(&ppc440spe_chan->lock);
2535
2536	/* WXOR, each descriptor occupies one slot */
2537	sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt, 1);
2538	if (sw_desc) {
2539		struct ppc440spe_adma_chan *chan;
2540		struct ppc440spe_adma_desc_slot *iter;
2541		struct dma_cdb *hw_desc;
2542
2543		chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan);
2544		set_bits(op, &sw_desc->flags);
2545		sw_desc->src_cnt = src_cnt;
2546		sw_desc->dst_cnt = 1;
2547		/* 1st descriptor, src[1] data to q page and zero destination */
2548		iter = list_first_entry(&sw_desc->group_list,
2549					struct ppc440spe_adma_desc_slot,
2550					chain_node);
2551		memset(iter->hw_desc, 0, sizeof(struct dma_cdb));
2552		iter->hw_next = list_entry(iter->chain_node.next,
2553					   struct ppc440spe_adma_desc_slot,
2554					   chain_node);
2555		clear_bit(PPC440SPE_DESC_INT, &iter->flags);
2556		hw_desc = iter->hw_desc;
2557		hw_desc->opc = DMA_CDB_OPC_MULTICAST;
2558
2559		ppc440spe_desc_set_dest_addr(iter, chan, DMA_CUED_XOR_BASE,
2560					     *dst, 0);
2561		ppc440spe_desc_set_dest_addr(iter, chan, 0,
2562					     ppc440spe_chan->qdest, 1);
2563		ppc440spe_desc_set_src_addr(iter, chan, 0, DMA_CUED_XOR_HB,
2564					    src[1]);
2565		ppc440spe_desc_set_byte_count(iter, ppc440spe_chan, len);
2566		iter->unmap_len = len;
2567
2568		/* 2nd descriptor, multiply src[1] data and store the
2569		 * result in destination */
2570		iter = list_first_entry(&iter->chain_node,
2571					struct ppc440spe_adma_desc_slot,
2572					chain_node);
2573		memset(iter->hw_desc, 0, sizeof(struct dma_cdb));
2574		/* set 'next' pointer */
2575		iter->hw_next = list_entry(iter->chain_node.next,
2576					   struct ppc440spe_adma_desc_slot,
2577					   chain_node);
2578		if (flags & DMA_PREP_INTERRUPT)
2579			set_bit(PPC440SPE_DESC_INT, &iter->flags);
2580		else
2581			clear_bit(PPC440SPE_DESC_INT, &iter->flags);
2582
2583		hw_desc = iter->hw_desc;
2584		hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
2585		ppc440spe_desc_set_src_addr(iter, chan, 0, DMA_CUED_XOR_HB,
2586					    ppc440spe_chan->qdest);
2587		ppc440spe_desc_set_dest_addr(iter, chan, DMA_CUED_XOR_BASE,
2588					     *dst, 0);
2589		ppc440spe_desc_set_src_mult(iter, chan,	DMA_CUED_MULT1_OFF,
2590					    DMA_CDB_SG_DST1, scf[1]);
2591		ppc440spe_desc_set_byte_count(iter, ppc440spe_chan, len);
2592		iter->unmap_len = len;
2593
2594		/*
2595		 * 3rd descriptor, multiply src[0] data and xor it
2596		 * with destination
2597		 */
2598		iter = list_first_entry(&iter->chain_node,
2599					struct ppc440spe_adma_desc_slot,
2600					chain_node);
2601		memset(iter->hw_desc, 0, sizeof(struct dma_cdb));
2602		iter->hw_next = NULL;
2603		if (flags & DMA_PREP_INTERRUPT)
2604			set_bit(PPC440SPE_DESC_INT, &iter->flags);
2605		else
2606			clear_bit(PPC440SPE_DESC_INT, &iter->flags);
2607
2608		hw_desc = iter->hw_desc;
2609		hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
2610		ppc440spe_desc_set_src_addr(iter, chan, 0, DMA_CUED_XOR_HB,
2611					    src[0]);
2612		ppc440spe_desc_set_dest_addr(iter, chan, DMA_CUED_XOR_BASE,
2613					     *dst, 0);
2614		ppc440spe_desc_set_src_mult(iter, chan, DMA_CUED_MULT1_OFF,
2615					    DMA_CDB_SG_DST1, scf[0]);
2616		ppc440spe_desc_set_byte_count(iter, ppc440spe_chan, len);
2617		iter->unmap_len = len;
2618		sw_desc->async_tx.flags = flags;
2619	}
2620
2621	spin_unlock_bh(&ppc440spe_chan->lock);
2622
2623	return sw_desc;
2624}
2625
2626static struct ppc440spe_adma_desc_slot *ppc440spe_dma01_prep_pq(
2627		struct ppc440spe_adma_chan *ppc440spe_chan,
2628		dma_addr_t *dst, int dst_cnt, dma_addr_t *src, int src_cnt,
2629		const unsigned char *scf, size_t len, unsigned long flags)
2630{
2631	int slot_cnt;
2632	struct ppc440spe_adma_desc_slot *sw_desc = NULL, *iter;
2633	unsigned long op = 0;
2634	unsigned char mult = 1;
2635
2636	pr_debug("%s: dst_cnt %d, src_cnt %d, len %d\n",
2637		 __func__, dst_cnt, src_cnt, len);
2638	/*  select operations WXOR/RXOR depending on the
2639	 * source addresses of operators and the number
2640	 * of destinations (RXOR support only Q-parity calculations)
2641	 */
2642	set_bit(PPC440SPE_DESC_WXOR, &op);
2643	if (!test_and_set_bit(PPC440SPE_RXOR_RUN, &ppc440spe_rxor_state)) {
2644		/* no active RXOR;
2645		 * do RXOR if:
2646		 * - there are more than 1 source,
2647		 * - len is aligned on 512-byte boundary,
2648		 * - source addresses fit to one of 4 possible regions.
2649		 */
2650		if (src_cnt > 1 &&
2651		    !(len & MQ0_CF2H_RXOR_BS_MASK) &&
2652		    (src[0] + len) == src[1]) {
2653			/* may do RXOR R1 R2 */
2654			set_bit(PPC440SPE_DESC_RXOR, &op);
2655			if (src_cnt != 2) {
2656				/* may try to enhance region of RXOR */
2657				if ((src[1] + len) == src[2]) {
2658					/* do RXOR R1 R2 R3 */
2659					set_bit(PPC440SPE_DESC_RXOR123,
2660						&op);
2661				} else if ((src[1] + len * 2) == src[2]) {
2662					/* do RXOR R1 R2 R4 */
2663					set_bit(PPC440SPE_DESC_RXOR124, &op);
2664				} else if ((src[1] + len * 3) == src[2]) {
2665					/* do RXOR R1 R2 R5 */
2666					set_bit(PPC440SPE_DESC_RXOR125,
2667						&op);
2668				} else {
2669					/* do RXOR R1 R2 */
2670					set_bit(PPC440SPE_DESC_RXOR12,
2671						&op);
2672				}
2673			} else {
2674				/* do RXOR R1 R2 */
2675				set_bit(PPC440SPE_DESC_RXOR12, &op);
2676			}
2677		}
2678
2679		if (!test_bit(PPC440SPE_DESC_RXOR, &op)) {
2680			/* can not do this operation with RXOR */
2681			clear_bit(PPC440SPE_RXOR_RUN,
2682				&ppc440spe_rxor_state);
2683		} else {
2684			/* can do; set block size right now */
2685			ppc440spe_desc_set_rxor_block_size(len);
2686		}
2687	}
2688
2689	/* Number of necessary slots depends on operation type selected */
2690	if (!test_bit(PPC440SPE_DESC_RXOR, &op)) {
2691		/*  This is a WXOR only chain. Need descriptors for each
2692		 * source to GF-XOR them with WXOR, and need descriptors
2693		 * for each destination to zero them with WXOR
2694		 */
2695		slot_cnt = src_cnt;
2696
2697		if (flags & DMA_PREP_ZERO_P) {
2698			slot_cnt++;
2699			set_bit(PPC440SPE_ZERO_P, &op);
2700		}
2701		if (flags & DMA_PREP_ZERO_Q) {
2702			slot_cnt++;
2703			set_bit(PPC440SPE_ZERO_Q, &op);
2704		}
2705	} else {
2706		/*  Need 1/2 descriptor for RXOR operation, and
2707		 * need (src_cnt - (2 or 3)) for WXOR of sources
2708		 * remained (if any)
2709		 */
2710		slot_cnt = dst_cnt;
2711
2712		if (flags & DMA_PREP_ZERO_P)
2713			set_bit(PPC440SPE_ZERO_P, &op);
2714		if (flags & DMA_PREP_ZERO_Q)
2715			set_bit(PPC440SPE_ZERO_Q, &op);
2716
2717		if (test_bit(PPC440SPE_DESC_RXOR12, &op))
2718			slot_cnt += src_cnt - 2;
2719		else
2720			slot_cnt += src_cnt - 3;
2721
2722		/*  Thus we have either RXOR only chain or
2723		 * mixed RXOR/WXOR
2724		 */
2725		if (slot_cnt == dst_cnt)
2726			/* RXOR only chain */
2727			clear_bit(PPC440SPE_DESC_WXOR, &op);
2728	}
2729
2730	spin_lock_bh(&ppc440spe_chan->lock);
2731	/* for both RXOR/WXOR each descriptor occupies one slot */
2732	sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt, 1);
2733	if (sw_desc) {
2734		ppc440spe_desc_init_dma01pq(sw_desc, dst_cnt, src_cnt,
2735				flags, op);
2736
2737		/* setup dst/src/mult */
2738		pr_debug("%s: set dst descriptor 0, 1: 0x%016llx, 0x%016llx\n",
2739			 __func__, dst[0], dst[1]);
2740		ppc440spe_adma_pq_set_dest(sw_desc, dst, flags);
2741		while (src_cnt--) {
2742			ppc440spe_adma_pq_set_src(sw_desc, src[src_cnt],
2743						  src_cnt);
2744
2745			/* NOTE: "Multi = 0 is equivalent to = 1" as it
2746			 * stated in 440SPSPe_RAID6_Addendum_UM_1_17.pdf
2747			 * doesn't work for RXOR with DMA0/1! Instead, multi=0
2748			 * leads to zeroing source data after RXOR.
2749			 * So, for P case set-up mult=1 explicitly.
2750			 */
2751			if (!(flags & DMA_PREP_PQ_DISABLE_Q))
2752				mult = scf[src_cnt];
2753			ppc440spe_adma_pq_set_src_mult(sw_desc,
2754				mult, src_cnt,  dst_cnt - 1);
2755		}
2756
2757		/* Setup byte count foreach slot just allocated */
2758		sw_desc->async_tx.flags = flags;
2759		list_for_each_entry(iter, &sw_desc->group_list,
2760				chain_node) {
2761			ppc440spe_desc_set_byte_count(iter,
2762				ppc440spe_chan, len);
2763			iter->unmap_len = len;
2764		}
2765	}
2766	spin_unlock_bh(&ppc440spe_chan->lock);
2767
2768	return sw_desc;
2769}
2770
2771static struct ppc440spe_adma_desc_slot *ppc440spe_dma2_prep_pq(
2772		struct ppc440spe_adma_chan *ppc440spe_chan,
2773		dma_addr_t *dst, int dst_cnt, dma_addr_t *src, int src_cnt,
2774		const unsigned char *scf, size_t len, unsigned long flags)
2775{
2776	int slot_cnt, descs_per_op;
2777	struct ppc440spe_adma_desc_slot *sw_desc = NULL, *iter;
2778	unsigned long op = 0;
2779	unsigned char mult = 1;
2780
2781	BUG_ON(!dst_cnt);
2782	/*pr_debug("%s: dst_cnt %d, src_cnt %d, len %d\n",
2783		 __func__, dst_cnt, src_cnt, len);*/
2784
2785	spin_lock_bh(&ppc440spe_chan->lock);
2786	descs_per_op = ppc440spe_dma2_pq_slot_count(src, src_cnt, len);
2787	if (descs_per_op < 0) {
2788		spin_unlock_bh(&ppc440spe_chan->lock);
2789		return NULL;
2790	}
2791
2792	/* depending on number of sources we have 1 or 2 RXOR chains */
2793	slot_cnt = descs_per_op * dst_cnt;
2794
2795	sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt, 1);
2796	if (sw_desc) {
2797		op = slot_cnt;
2798		sw_desc->async_tx.flags = flags;
2799		list_for_each_entry(iter, &sw_desc->group_list, chain_node) {
2800			ppc440spe_desc_init_dma2pq(iter, dst_cnt, src_cnt,
2801				--op ? 0 : flags);
2802			ppc440spe_desc_set_byte_count(iter, ppc440spe_chan,
2803				len);
2804			iter->unmap_len = len;
2805
2806			ppc440spe_init_rxor_cursor(&(iter->rxor_cursor));
2807			iter->rxor_cursor.len = len;
2808			iter->descs_per_op = descs_per_op;
2809		}
2810		op = 0;
2811		list_for_each_entry(iter, &sw_desc->group_list, chain_node) {
2812			op++;
2813			if (op % descs_per_op == 0)
2814				ppc440spe_adma_init_dma2rxor_slot(iter, src,
2815								  src_cnt);
2816			if (likely(!list_is_last(&iter->chain_node,
2817						 &sw_desc->group_list))) {
2818				/* set 'next' pointer */
2819				iter->hw_next =
2820					list_entry(iter->chain_node.next,
2821						struct ppc440spe_adma_desc_slot,
2822						chain_node);
2823				ppc440spe_xor_set_link(iter, iter->hw_next);
2824			} else {
2825				/* this is the last descriptor. */
2826				iter->hw_next = NULL;
2827			}
2828		}
2829
2830		/* fixup head descriptor */
2831		sw_desc->dst_cnt = dst_cnt;
2832		if (flags & DMA_PREP_ZERO_P)
2833			set_bit(PPC440SPE_ZERO_P, &sw_desc->flags);
2834		if (flags & DMA_PREP_ZERO_Q)
2835			set_bit(PPC440SPE_ZERO_Q, &sw_desc->flags);
2836
2837		/* setup dst/src/mult */
2838		ppc440spe_adma_pq_set_dest(sw_desc, dst, flags);
2839
2840		while (src_cnt--) {
2841			/* handle descriptors (if dst_cnt == 2) inside
2842			 * the ppc440spe_adma_pq_set_srcxxx() functions
2843			 */
2844			ppc440spe_adma_pq_set_src(sw_desc, src[src_cnt],
2845						  src_cnt);
2846			if (!(flags & DMA_PREP_PQ_DISABLE_Q))
2847				mult = scf[src_cnt];
2848			ppc440spe_adma_pq_set_src_mult(sw_desc,
2849					mult, src_cnt, dst_cnt - 1);
2850		}
2851	}
2852	spin_unlock_bh(&ppc440spe_chan->lock);
2853	ppc440spe_desc_set_rxor_block_size(len);
2854	return sw_desc;
2855}
2856
2857/**
2858 * ppc440spe_adma_prep_dma_pq - prepare CDB (group) for a GF-XOR operation
2859 */
2860static struct dma_async_tx_descriptor *ppc440spe_adma_prep_dma_pq(
2861		struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
2862		unsigned int src_cnt, const unsigned char *scf,
2863		size_t len, unsigned long flags)
2864{
2865	struct ppc440spe_adma_chan *ppc440spe_chan;
2866	struct ppc440spe_adma_desc_slot *sw_desc = NULL;
2867	int dst_cnt = 0;
2868
2869	ppc440spe_chan = to_ppc440spe_adma_chan(chan);
2870
2871	ADMA_LL_DBG(prep_dma_pq_dbg(ppc440spe_chan->device->id,
2872				    dst, src, src_cnt));
2873	BUG_ON(!len);
2874	BUG_ON(len > PPC440SPE_ADMA_XOR_MAX_BYTE_COUNT);
2875	BUG_ON(!src_cnt);
2876
2877	if (src_cnt == 1 && dst[1] == src[0]) {
2878		dma_addr_t dest[2];
2879
2880		/* dst[1] is real destination (Q) */
2881		dest[0] = dst[1];
2882		/* this is the page to multicast source data to */
2883		dest[1] = ppc440spe_chan->qdest;
2884		sw_desc = ppc440spe_dma01_prep_mult(ppc440spe_chan,
2885				dest, 2, src, src_cnt, scf, len, flags);
2886		return sw_desc ? &sw_desc->async_tx : NULL;
2887	}
2888
2889	if (src_cnt == 2 && dst[1] == src[1]) {
2890		sw_desc = ppc440spe_dma01_prep_sum_product(ppc440spe_chan,
2891					&dst[1], src, 2, scf, len, flags);
2892		return sw_desc ? &sw_desc->async_tx : NULL;
2893	}
2894
2895	if (!(flags & DMA_PREP_PQ_DISABLE_P)) {
2896		BUG_ON(!dst[0]);
2897		dst_cnt++;
2898		flags |= DMA_PREP_ZERO_P;
2899	}
2900
2901	if (!(flags & DMA_PREP_PQ_DISABLE_Q)) {
2902		BUG_ON(!dst[1]);
2903		dst_cnt++;
2904		flags |= DMA_PREP_ZERO_Q;
2905	}
2906
2907	BUG_ON(!dst_cnt);
2908
2909	dev_dbg(ppc440spe_chan->device->common.dev,
2910		"ppc440spe adma%d: %s src_cnt: %d len: %u int_en: %d\n",
2911		ppc440spe_chan->device->id, __func__, src_cnt, len,
2912		flags & DMA_PREP_INTERRUPT ? 1 : 0);
2913
2914	switch (ppc440spe_chan->device->id) {
2915	case PPC440SPE_DMA0_ID:
2916	case PPC440SPE_DMA1_ID:
2917		sw_desc = ppc440spe_dma01_prep_pq(ppc440spe_chan,
2918				dst, dst_cnt, src, src_cnt, scf,
2919				len, flags);
2920		break;
2921
2922	case PPC440SPE_XOR_ID:
2923		sw_desc = ppc440spe_dma2_prep_pq(ppc440spe_chan,
2924				dst, dst_cnt, src, src_cnt, scf,
2925				len, flags);
2926		break;
2927	}
2928
2929	return sw_desc ? &sw_desc->async_tx : NULL;
2930}
2931
2932/**
2933 * ppc440spe_adma_prep_dma_pqzero_sum - prepare CDB group for
2934 * a PQ_ZERO_SUM operation
2935 */
2936static struct dma_async_tx_descriptor *ppc440spe_adma_prep_dma_pqzero_sum(
2937		struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
2938		unsigned int src_cnt, const unsigned char *scf, size_t len,
2939		enum sum_check_flags *pqres, unsigned long flags)
2940{
2941	struct ppc440spe_adma_chan *ppc440spe_chan;
2942	struct ppc440spe_adma_desc_slot *sw_desc, *iter;
2943	dma_addr_t pdest, qdest;
2944	int slot_cnt, slots_per_op, idst, dst_cnt;
2945
2946	ppc440spe_chan = to_ppc440spe_adma_chan(chan);
2947
2948	if (flags & DMA_PREP_PQ_DISABLE_P)
2949		pdest = 0;
2950	else
2951		pdest = pq[0];
2952
2953	if (flags & DMA_PREP_PQ_DISABLE_Q)
2954		qdest = 0;
2955	else
2956		qdest = pq[1];
2957
2958	ADMA_LL_DBG(prep_dma_pqzero_sum_dbg(ppc440spe_chan->device->id,
2959					    src, src_cnt, scf));
2960
2961	/* Always use WXOR for P/Q calculations (two destinations).
2962	 * Need 1 or 2 extra slots to verify results are zero.
2963	 */
2964	idst = dst_cnt = (pdest && qdest) ? 2 : 1;
2965
2966	/* One additional slot per destination to clone P/Q
2967	 * before calculation (we have to preserve destinations).
2968	 */
2969	slot_cnt = src_cnt + dst_cnt * 2;
2970	slots_per_op = 1;
2971
2972	spin_lock_bh(&ppc440spe_chan->lock);
2973	sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt,
2974					     slots_per_op);
2975	if (sw_desc) {
2976		ppc440spe_desc_init_dma01pqzero_sum(sw_desc, dst_cnt, src_cnt);
2977
2978		/* Setup byte count for each slot just allocated */
2979		sw_desc->async_tx.flags = flags;
2980		list_for_each_entry(iter, &sw_desc->group_list, chain_node) {
2981			ppc440spe_desc_set_byte_count(iter, ppc440spe_chan,
2982						      len);
2983			iter->unmap_len = len;
2984		}
2985
2986		if (pdest) {
2987			struct dma_cdb *hw_desc;
2988			struct ppc440spe_adma_chan *chan;
2989
2990			iter = sw_desc->group_head;
2991			chan = to_ppc440spe_adma_chan(iter->async_tx.chan);
2992			memset(iter->hw_desc, 0, sizeof(struct dma_cdb));
2993			iter->hw_next = list_entry(iter->chain_node.next,
2994						struct ppc440spe_adma_desc_slot,
2995						chain_node);
2996			hw_desc = iter->hw_desc;
2997			hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
2998			iter->src_cnt = 0;
2999			iter->dst_cnt = 0;
3000			ppc440spe_desc_set_dest_addr(iter, chan, 0,
3001						     ppc440spe_chan->pdest, 0);
3002			ppc440spe_desc_set_src_addr(iter, chan, 0, 0, pdest);
3003			ppc440spe_desc_set_byte_count(iter, ppc440spe_chan,
3004						      len);
3005			iter->unmap_len = 0;
3006			/* override pdest to preserve original P */
3007			pdest = ppc440spe_chan->pdest;
3008		}
3009		if (qdest) {
3010			struct dma_cdb *hw_desc;
3011			struct ppc440spe_adma_chan *chan;
3012
3013			iter = list_first_entry(&sw_desc->group_list,
3014						struct ppc440spe_adma_desc_slot,
3015						chain_node);
3016			chan = to_ppc440spe_adma_chan(iter->async_tx.chan);
3017
3018			if (pdest) {
3019				iter = list_entry(iter->chain_node.next,
3020						struct ppc440spe_adma_desc_slot,
3021						chain_node);
3022			}
3023
3024			memset(iter->hw_desc, 0, sizeof(struct dma_cdb));
3025			iter->hw_next = list_entry(iter->chain_node.next,
3026						struct ppc440spe_adma_desc_slot,
3027						chain_node);
3028			hw_desc = iter->hw_desc;
3029			hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
3030			iter->src_cnt = 0;
3031			iter->dst_cnt = 0;
3032			ppc440spe_desc_set_dest_addr(iter, chan, 0,
3033						     ppc440spe_chan->qdest, 0);
3034			ppc440spe_desc_set_src_addr(iter, chan, 0, 0, qdest);
3035			ppc440spe_desc_set_byte_count(iter, ppc440spe_chan,
3036						      len);
3037			iter->unmap_len = 0;
3038			/* override qdest to preserve original Q */
3039			qdest = ppc440spe_chan->qdest;
3040		}
3041
3042		/* Setup destinations for P/Q ops */
3043		ppc440spe_adma_pqzero_sum_set_dest(sw_desc, pdest, qdest);
3044
3045		/* Setup zero QWORDs into DCHECK CDBs */
3046		idst = dst_cnt;
3047		list_for_each_entry_reverse(iter, &sw_desc->group_list,
3048					    chain_node) {
3049			/*
3050			 * The last CDB corresponds to Q-parity check,
3051			 * the one before last CDB corresponds
3052			 * P-parity check
3053			 */
3054			if (idst == DMA_DEST_MAX_NUM) {
3055				if (idst == dst_cnt) {
3056					set_bit(PPC440SPE_DESC_QCHECK,
3057						&iter->flags);
3058				} else {
3059					set_bit(PPC440SPE_DESC_PCHECK,
3060						&iter->flags);
3061				}
3062			} else {
3063				if (qdest) {
3064					set_bit(PPC440SPE_DESC_QCHECK,
3065						&iter->flags);
3066				} else {
3067					set_bit(PPC440SPE_DESC_PCHECK,
3068						&iter->flags);
3069				}
3070			}
3071			iter->xor_check_result = pqres;
3072
3073			/*
3074			 * set it to zero, if check fail then result will
3075			 * be updated
3076			 */
3077			*iter->xor_check_result = 0;
3078			ppc440spe_desc_set_dcheck(iter, ppc440spe_chan,
3079				ppc440spe_qword);
3080
3081			if (!(--dst_cnt))
3082				break;
3083		}
3084
3085		/* Setup sources and mults for P/Q ops */
3086		list_for_each_entry_continue_reverse(iter, &sw_desc->group_list,
3087						     chain_node) {
3088			struct ppc440spe_adma_chan *chan;
3089			u32 mult_dst;
3090
3091			chan = to_ppc440spe_adma_chan(iter->async_tx.chan);
3092			ppc440spe_desc_set_src_addr(iter, chan, 0,
3093						    DMA_CUED_XOR_HB,
3094						    src[src_cnt - 1]);
3095			if (qdest) {
3096				mult_dst = (dst_cnt - 1) ? DMA_CDB_SG_DST2 :
3097							   DMA_CDB_SG_DST1;
3098				ppc440spe_desc_set_src_mult(iter, chan,
3099							    DMA_CUED_MULT1_OFF,
3100							    mult_dst,
3101							    scf[src_cnt - 1]);
3102			}
3103			if (!(--src_cnt))
3104				break;
3105		}
3106	}
3107	spin_unlock_bh(&ppc440spe_chan->lock);
3108	return sw_desc ? &sw_desc->async_tx : NULL;
3109}
3110
3111/**
3112 * ppc440spe_adma_prep_dma_xor_zero_sum - prepare CDB group for
3113 * XOR ZERO_SUM operation
3114 */
3115static struct dma_async_tx_descriptor *ppc440spe_adma_prep_dma_xor_zero_sum(
3116		struct dma_chan *chan, dma_addr_t *src, unsigned int src_cnt,
3117		size_t len, enum sum_check_flags *result, unsigned long flags)
3118{
3119	struct dma_async_tx_descriptor *tx;
3120	dma_addr_t pq[2];
3121
3122	/* validate P, disable Q */
3123	pq[0] = src[0];
3124	pq[1] = 0;
3125	flags |= DMA_PREP_PQ_DISABLE_Q;
3126
3127	tx = ppc440spe_adma_prep_dma_pqzero_sum(chan, pq, &src[1],
3128						src_cnt - 1, 0, len,
3129						result, flags);
3130	return tx;
3131}
3132
3133/**
3134 * ppc440spe_adma_set_dest - set destination address into descriptor
3135 */
3136static void ppc440spe_adma_set_dest(struct ppc440spe_adma_desc_slot *sw_desc,
3137		dma_addr_t addr, int index)
3138{
3139	struct ppc440spe_adma_chan *chan;
3140
3141	BUG_ON(index >= sw_desc->dst_cnt);
3142
3143	chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan);
3144
3145	switch (chan->device->id) {
3146	case PPC440SPE_DMA0_ID:
3147	case PPC440SPE_DMA1_ID:
3148		/* to do: support transfers lengths >
3149		 * PPC440SPE_ADMA_DMA/XOR_MAX_BYTE_COUNT
3150		 */
3151		ppc440spe_desc_set_dest_addr(sw_desc->group_head,
3152			chan, 0, addr, index);
3153		break;
3154	case PPC440SPE_XOR_ID:
3155		sw_desc = ppc440spe_get_group_entry(sw_desc, index);
3156		ppc440spe_desc_set_dest_addr(sw_desc,
3157			chan, 0, addr, index);
3158		break;
3159	}
3160}
3161
3162static void ppc440spe_adma_pq_zero_op(struct ppc440spe_adma_desc_slot *iter,
3163		struct ppc440spe_adma_chan *chan, dma_addr_t addr)
3164{
3165	/*  To clear destinations update the descriptor
3166	 * (P or Q depending on index) as follows:
3167	 * addr is destination (0 corresponds to SG2):
3168	 */
3169	ppc440spe_desc_set_dest_addr(iter, chan, DMA_CUED_XOR_BASE, addr, 0);
3170
3171	/* ... and the addr is source: */
3172	ppc440spe_desc_set_src_addr(iter, chan, 0, DMA_CUED_XOR_HB, addr);
3173
3174	/* addr is always SG2 then the mult is always DST1 */
3175	ppc440spe_desc_set_src_mult(iter, chan, DMA_CUED_MULT1_OFF,
3176				    DMA_CDB_SG_DST1, 1);
3177}
3178
3179/**
3180 * ppc440spe_adma_pq_set_dest - set destination address into descriptor
3181 * for the PQXOR operation
3182 */
3183static void ppc440spe_adma_pq_set_dest(struct ppc440spe_adma_desc_slot *sw_desc,
3184		dma_addr_t *addrs, unsigned long flags)
3185{
3186	struct ppc440spe_adma_desc_slot *iter;
3187	struct ppc440spe_adma_chan *chan;
3188	dma_addr_t paddr, qaddr;
3189	dma_addr_t addr = 0, ppath, qpath;
3190	int index = 0, i;
3191
3192	chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan);
3193
3194	if (flags & DMA_PREP_PQ_DISABLE_P)
3195		paddr = 0;
3196	else
3197		paddr = addrs[0];
3198
3199	if (flags & DMA_PREP_PQ_DISABLE_Q)
3200		qaddr = 0;
3201	else
3202		qaddr = addrs[1];
3203
3204	if (!paddr || !qaddr)
3205		addr = paddr ? paddr : qaddr;
3206
3207	switch (chan->device->id) {
3208	case PPC440SPE_DMA0_ID:
3209	case PPC440SPE_DMA1_ID:
3210		/* walk through the WXOR source list and set P/Q-destinations
3211		 * for each slot:
3212		 */
3213		if (!test_bit(PPC440SPE_DESC_RXOR, &sw_desc->flags)) {
3214			/* This is WXOR-only chain; may have 1/2 zero descs */
3215			if (test_bit(PPC440SPE_ZERO_P, &sw_desc->flags))
3216				index++;
3217			if (test_bit(PPC440SPE_ZERO_Q, &sw_desc->flags))
3218				index++;
3219
3220			iter = ppc440spe_get_group_entry(sw_desc, index);
3221			if (addr) {
3222				/* one destination */
3223				list_for_each_entry_from(iter,
3224					&sw_desc->group_list, chain_node)
3225					ppc440spe_desc_set_dest_addr(iter, chan,
3226						DMA_CUED_XOR_BASE, addr, 0);
3227			} else {
3228				/* two destinations */
3229				list_for_each_entry_from(iter,
3230					&sw_desc->group_list, chain_node) {
3231					ppc440spe_desc_set_dest_addr(iter, chan,
3232						DMA_CUED_XOR_BASE, paddr, 0);
3233					ppc440spe_desc_set_dest_addr(iter, chan,
3234						DMA_CUED_XOR_BASE, qaddr, 1);
3235				}
3236			}
3237
3238			if (index) {
3239				/*  To clear destinations update the descriptor
3240				 * (1st,2nd, or both depending on flags)
3241				 */
3242				index = 0;
3243				if (test_bit(PPC440SPE_ZERO_P,
3244						&sw_desc->flags)) {
3245					iter = ppc440spe_get_group_entry(
3246							sw_desc, index++);
3247					ppc440spe_adma_pq_zero_op(iter, chan,
3248							paddr);
3249				}
3250
3251				if (test_bit(PPC440SPE_ZERO_Q,
3252						&sw_desc->flags)) {
3253					iter = ppc440spe_get_group_entry(
3254							sw_desc, index++);
3255					ppc440spe_adma_pq_zero_op(iter, chan,
3256							qaddr);
3257				}
3258
3259				return;
3260			}
3261		} else {
3262			/* This is RXOR-only or RXOR/WXOR mixed chain */
3263
3264			/* If we want to include destination into calculations,
3265			 * then make dest addresses cued with mult=1 (XOR).
3266			 */
3267			ppath = test_bit(PPC440SPE_ZERO_P, &sw_desc->flags) ?
3268					DMA_CUED_XOR_HB :
3269					DMA_CUED_XOR_BASE |
3270						(1 << DMA_CUED_MULT1_OFF);
3271			qpath = test_bit(PPC440SPE_ZERO_Q, &sw_desc->flags) ?
3272					DMA_CUED_XOR_HB :
3273					DMA_CUED_XOR_BASE |
3274						(1 << DMA_CUED_MULT1_OFF);
3275
3276			/* Setup destination(s) in RXOR slot(s) */
3277			iter = ppc440spe_get_group_entry(sw_desc, index++);
3278			ppc440spe_desc_set_dest_addr(iter, chan,
3279						paddr ? ppath : qpath,
3280						paddr ? paddr : qaddr, 0);
3281			if (!addr) {
3282				/* two destinations */
3283				iter = ppc440spe_get_group_entry(sw_desc,
3284								 index++);
3285				ppc440spe_desc_set_dest_addr(iter, chan,
3286						qpath, qaddr, 0);
3287			}
3288
3289			if (test_bit(PPC440SPE_DESC_WXOR, &sw_desc->flags)) {
3290				/* Setup destination(s) in remaining WXOR
3291				 * slots
3292				 */
3293				iter = ppc440spe_get_group_entry(sw_desc,
3294								 index);
3295				if (addr) {
3296					/* one destination */
3297					list_for_each_entry_from(iter,
3298					    &sw_desc->group_list,
3299					    chain_node)
3300						ppc440spe_desc_set_dest_addr(
3301							iter, chan,
3302							DMA_CUED_XOR_BASE,
3303							addr, 0);
3304
3305				} else {
3306					/* two destinations */
3307					list_for_each_entry_from(iter,
3308					    &sw_desc->group_list,
3309					    chain_node) {
3310						ppc440spe_desc_set_dest_addr(
3311							iter, chan,
3312							DMA_CUED_XOR_BASE,
3313							paddr, 0);
3314						ppc440spe_desc_set_dest_addr(
3315							iter, chan,
3316							DMA_CUED_XOR_BASE,
3317							qaddr, 1);
3318					}
3319				}
3320			}
3321
3322		}
3323		break;
3324
3325	case PPC440SPE_XOR_ID:
3326		/* DMA2 descriptors have only 1 destination, so there are
3327		 * two chains - one for each dest.
3328		 * If we want to include destination into calculations,
3329		 * then make dest addresses cued with mult=1 (XOR).
3330		 */
3331		ppath = test_bit(PPC440SPE_ZERO_P, &sw_desc->flags) ?
3332				DMA_CUED_XOR_HB :
3333				DMA_CUED_XOR_BASE |
3334					(1 << DMA_CUED_MULT1_OFF);
3335
3336		qpath = test_bit(PPC440SPE_ZERO_Q, &sw_desc->flags) ?
3337				DMA_CUED_XOR_HB :
3338				DMA_CUED_XOR_BASE |
3339					(1 << DMA_CUED_MULT1_OFF);
3340
3341		iter = ppc440spe_get_group_entry(sw_desc, 0);
3342		for (i = 0; i < sw_desc->descs_per_op; i++) {
3343			ppc440spe_desc_set_dest_addr(iter, chan,
3344				paddr ? ppath : qpath,
3345				paddr ? paddr : qaddr, 0);
3346			iter = list_entry(iter->chain_node.next,
3347					  struct ppc440spe_adma_desc_slot,
3348					  chain_node);
3349		}
3350
3351		if (!addr) {
3352			/* Two destinations; setup Q here */
3353			iter = ppc440spe_get_group_entry(sw_desc,
3354				sw_desc->descs_per_op);
3355			for (i = 0; i < sw_desc->descs_per_op; i++) {
3356				ppc440spe_desc_set_dest_addr(iter,
3357					chan, qpath, qaddr, 0);
3358				iter = list_entry(iter->chain_node.next,
3359						struct ppc440spe_adma_desc_slot,
3360						chain_node);
3361			}
3362		}
3363
3364		break;
3365	}
3366}
3367
3368/**
3369 * ppc440spe_adma_pq_zero_sum_set_dest - set destination address into descriptor
3370 * for the PQ_ZERO_SUM operation
3371 */
3372static void ppc440spe_adma_pqzero_sum_set_dest(
3373		struct ppc440spe_adma_desc_slot *sw_desc,
3374		dma_addr_t paddr, dma_addr_t qaddr)
3375{
3376	struct ppc440spe_adma_desc_slot *iter, *end;
3377	struct ppc440spe_adma_chan *chan;
3378	dma_addr_t addr = 0;
3379	int idx;
3380
3381	chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan);
3382
3383	/* walk through the WXOR source list and set P/Q-destinations
3384	 * for each slot
3385	 */
3386	idx = (paddr && qaddr) ? 2 : 1;
3387	/* set end */
3388	list_for_each_entry_reverse(end, &sw_desc->group_list,
3389				    chain_node) {
3390		if (!(--idx))
3391			break;
3392	}
3393	/* set start */
3394	idx = (paddr && qaddr) ? 2 : 1;
3395	iter = ppc440spe_get_group_entry(sw_desc, idx);
3396
3397	if (paddr && qaddr) {
3398		/* two destinations */
3399		list_for_each_entry_from(iter, &sw_desc->group_list,
3400					 chain_node) {
3401			if (unlikely(iter == end))
3402				break;
3403			ppc440spe_desc_set_dest_addr(iter, chan,
3404						DMA_CUED_XOR_BASE, paddr, 0);
3405			ppc440spe_desc_set_dest_addr(iter, chan,
3406						DMA_CUED_XOR_BASE, qaddr, 1);
3407		}
3408	} else {
3409		/* one destination */
3410		addr = paddr ? paddr : qaddr;
3411		list_for_each_entry_from(iter, &sw_desc->group_list,
3412					 chain_node) {
3413			if (unlikely(iter == end))
3414				break;
3415			ppc440spe_desc_set_dest_addr(iter, chan,
3416						DMA_CUED_XOR_BASE, addr, 0);
3417		}
3418	}
3419
3420	/*  The remaining descriptors are DATACHECK. These have no need in
3421	 * destination. Actually, these destinations are used there
3422	 * as sources for check operation. So, set addr as source.
3423	 */
3424	ppc440spe_desc_set_src_addr(end, chan, 0, 0, addr ? addr : paddr);
3425
3426	if (!addr) {
3427		end = list_entry(end->chain_node.next,
3428				 struct ppc440spe_adma_desc_slot, chain_node);
3429		ppc440spe_desc_set_src_addr(end, chan, 0, 0, qaddr);
3430	}
3431}
3432
3433/**
3434 * ppc440spe_desc_set_xor_src_cnt - set source count into descriptor
3435 */
3436static inline void ppc440spe_desc_set_xor_src_cnt(
3437			struct ppc440spe_adma_desc_slot *desc,
3438			int src_cnt)
3439{
3440	struct xor_cb *hw_desc = desc->hw_desc;
3441
3442	hw_desc->cbc &= ~XOR_CDCR_OAC_MSK;
3443	hw_desc->cbc |= src_cnt;
3444}
3445
3446/**
3447 * ppc440spe_adma_pq_set_src - set source address into descriptor
3448 */
3449static void ppc440spe_adma_pq_set_src(struct ppc440spe_adma_desc_slot *sw_desc,
3450		dma_addr_t addr, int index)
3451{
3452	struct ppc440spe_adma_chan *chan;
3453	dma_addr_t haddr = 0;
3454	struct ppc440spe_adma_desc_slot *iter = NULL;
3455
3456	chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan);
3457
3458	switch (chan->device->id) {
3459	case PPC440SPE_DMA0_ID:
3460	case PPC440SPE_DMA1_ID:
3461		/* DMA0,1 may do: WXOR, RXOR, RXOR+WXORs chain
3462		 */
3463		if (test_bit(PPC440SPE_DESC_RXOR, &sw_desc->flags)) {
3464			/* RXOR-only or RXOR/WXOR operation */
3465			int iskip = test_bit(PPC440SPE_DESC_RXOR12,
3466				&sw_desc->flags) ?  2 : 3;
3467
3468			if (index == 0) {
3469				/* 1st slot (RXOR) */
3470				/* setup sources region (R1-2-3, R1-2-4,
3471				 * or R1-2-5)
3472				 */
3473				if (test_bit(PPC440SPE_DESC_RXOR12,
3474						&sw_desc->flags))
3475					haddr = DMA_RXOR12 <<
3476						DMA_CUED_REGION_OFF;
3477				else if (test_bit(PPC440SPE_DESC_RXOR123,
3478				    &sw_desc->flags))
3479					haddr = DMA_RXOR123 <<
3480						DMA_CUED_REGION_OFF;
3481				else if (test_bit(PPC440SPE_DESC_RXOR124,
3482				    &sw_desc->flags))
3483					haddr = DMA_RXOR124 <<
3484						DMA_CUED_REGION_OFF;
3485				else if (test_bit(PPC440SPE_DESC_RXOR125,
3486				    &sw_desc->flags))
3487					haddr = DMA_RXOR125 <<
3488						DMA_CUED_REGION_OFF;
3489				else
3490					BUG();
3491				haddr |= DMA_CUED_XOR_BASE;
3492				iter = ppc440spe_get_group_entry(sw_desc, 0);
3493			} else if (index < iskip) {
3494				/* 1st slot (RXOR)
3495				 * shall actually set source address only once
3496				 * instead of first <iskip>
3497				 */
3498				iter = NULL;
3499			} else {
3500				/* 2nd/3d and next slots (WXOR);
3501				 * skip first slot with RXOR
3502				 */
3503				haddr = DMA_CUED_XOR_HB;
3504				iter = ppc440spe_get_group_entry(sw_desc,
3505				    index - iskip + sw_desc->dst_cnt);
3506			}
3507		} else {
3508			int znum = 0;
3509
3510			/* WXOR-only operation; skip first slots with
3511			 * zeroing destinations
3512			 */
3513			if (test_bit(PPC440SPE_ZERO_P, &sw_desc->flags))
3514				znum++;
3515			if (test_bit(PPC440SPE_ZERO_Q, &sw_desc->flags))
3516				znum++;
3517
3518			haddr = DMA_CUED_XOR_HB;
3519			iter = ppc440spe_get_group_entry(sw_desc,
3520					index + znum);
3521		}
3522
3523		if (likely(iter)) {
3524			ppc440spe_desc_set_src_addr(iter, chan, 0, haddr, addr);
3525
3526			if (!index &&
3527			    test_bit(PPC440SPE_DESC_RXOR, &sw_desc->flags) &&
3528			    sw_desc->dst_cnt == 2) {
3529				/* if we have two destinations for RXOR, then
3530				 * setup source in the second descr too
3531				 */
3532				iter = ppc440spe_get_group_entry(sw_desc, 1);
3533				ppc440spe_desc_set_src_addr(iter, chan, 0,
3534					haddr, addr);
3535			}
3536		}
3537		break;
3538
3539	case PPC440SPE_XOR_ID:
3540		/* DMA2 may do Biskup */
3541		iter = sw_desc->group_head;
3542		if (iter->dst_cnt == 2) {
3543			/* both P & Q calculations required; set P src here */
3544			ppc440spe_adma_dma2rxor_set_src(iter, index, addr);
3545
3546			/* this is for Q */
3547			iter = ppc440spe_get_group_entry(sw_desc,
3548				sw_desc->descs_per_op);
3549		}
3550		ppc440spe_adma_dma2rxor_set_src(iter, index, addr);
3551		break;
3552	}
3553}
3554
3555/**
3556 * ppc440spe_adma_memcpy_xor_set_src - set source address into descriptor
3557 */
3558static void ppc440spe_adma_memcpy_xor_set_src(
3559		struct ppc440spe_adma_desc_slot *sw_desc,
3560		dma_addr_t addr, int index)
3561{
3562	struct ppc440spe_adma_chan *chan;
3563
3564	chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan);
3565	sw_desc = sw_desc->group_head;
3566
3567	if (likely(sw_desc))
3568		ppc440spe_desc_set_src_addr(sw_desc, chan, index, 0, addr);
3569}
3570
3571/**
3572 * ppc440spe_adma_dma2rxor_inc_addr  -
3573 */
3574static void ppc440spe_adma_dma2rxor_inc_addr(
3575		struct ppc440spe_adma_desc_slot *desc,
3576		struct ppc440spe_rxor *cursor, int index, int src_cnt)
3577{
3578	cursor->addr_count++;
3579	if (index == src_cnt - 1) {
3580		ppc440spe_desc_set_xor_src_cnt(desc, cursor->addr_count);
3581	} else if (cursor->addr_count == XOR_MAX_OPS) {
3582		ppc440spe_desc_set_xor_src_cnt(desc, cursor->addr_count);
3583		cursor->addr_count = 0;
3584		cursor->desc_count++;
3585	}
3586}
3587
3588/**
3589 * ppc440spe_adma_dma2rxor_prep_src - setup RXOR types in DMA2 CDB
3590 */
3591static int ppc440spe_adma_dma2rxor_prep_src(
3592		struct ppc440spe_adma_desc_slot *hdesc,
3593		struct ppc440spe_rxor *cursor, int index,
3594		int src_cnt, u32 addr)
3595{
3596	int rval = 0;
3597	u32 sign;
3598	struct ppc440spe_adma_desc_slot *desc = hdesc;
3599	int i;
3600
3601	for (i = 0; i < cursor->desc_count; i++) {
3602		desc = list_entry(hdesc->chain_node.next,
3603				  struct ppc440spe_adma_desc_slot,
3604				  chain_node);
3605	}
3606
3607	switch (cursor->state) {
3608	case 0:
3609		if (addr == cursor->addrl + cursor->len) {
3610			/* direct RXOR */
3611			cursor->state = 1;
3612			cursor->xor_count++;
3613			if (index == src_cnt-1) {
3614				ppc440spe_rxor_set_region(desc,
3615					cursor->addr_count,
3616					DMA_RXOR12 << DMA_CUED_REGION_OFF);
3617				ppc440spe_adma_dma2rxor_inc_addr(
3618					desc, cursor, index, src_cnt);
3619			}
3620		} else if (cursor->addrl == addr + cursor->len) {
3621			/* reverse RXOR */
3622			cursor->state = 1;
3623			cursor->xor_count++;
3624			set_bit(cursor->addr_count, &desc->reverse_flags[0]);
3625			if (index == src_cnt-1) {
3626				ppc440spe_rxor_set_region(desc,
3627					cursor->addr_count,
3628					DMA_RXOR12 << DMA_CUED_REGION_OFF);
3629				ppc440spe_adma_dma2rxor_inc_addr(
3630					desc, cursor, index, src_cnt);
3631			}
3632		} else {
3633			printk(KERN_ERR "Cannot build "
3634				"DMA2 RXOR command block.\n");
3635			BUG();
3636		}
3637		break;
3638	case 1:
3639		sign = test_bit(cursor->addr_count,
3640				desc->reverse_flags)
3641			? -1 : 1;
3642		if (index == src_cnt-2 || (sign == -1
3643			&& addr != cursor->addrl - 2*cursor->len)) {
3644			cursor->state = 0;
3645			cursor->xor_count = 1;
3646			cursor->addrl = addr;
3647			ppc440spe_rxor_set_region(desc,
3648				cursor->addr_count,
3649				DMA_RXOR12 << DMA_CUED_REGION_OFF);
3650			ppc440spe_adma_dma2rxor_inc_addr(
3651				desc, cursor, index, src_cnt);
3652		} else if (addr == cursor->addrl + 2*sign*cursor->len) {
3653			cursor->state = 2;
3654			cursor->xor_count = 0;
3655			ppc440spe_rxor_set_region(desc,
3656				cursor->addr_count,
3657				DMA_RXOR123 << DMA_CUED_REGION_OFF);
3658			if (index == src_cnt-1) {
3659				ppc440spe_adma_dma2rxor_inc_addr(
3660					desc, cursor, index, src_cnt);
3661			}
3662		} else if (addr == cursor->addrl + 3*cursor->len) {
3663			cursor->state = 2;
3664			cursor->xor_count = 0;
3665			ppc440spe_rxor_set_region(desc,
3666				cursor->addr_count,
3667				DMA_RXOR124 << DMA_CUED_REGION_OFF);
3668			if (index == src_cnt-1) {
3669				ppc440spe_adma_dma2rxor_inc_addr(
3670					desc, cursor, index, src_cnt);
3671			}
3672		} else if (addr == cursor->addrl + 4*cursor->len) {
3673			cursor->state = 2;
3674			cursor->xor_count = 0;
3675			ppc440spe_rxor_set_region(desc,
3676				cursor->addr_count,
3677				DMA_RXOR125 << DMA_CUED_REGION_OFF);
3678			if (index == src_cnt-1) {
3679				ppc440spe_adma_dma2rxor_inc_addr(
3680					desc, cursor, index, src_cnt);
3681			}
3682		} else {
3683			cursor->state = 0;
3684			cursor->xor_count = 1;
3685			cursor->addrl = addr;
3686			ppc440spe_rxor_set_region(desc,
3687				cursor->addr_count,
3688				DMA_RXOR12 << DMA_CUED_REGION_OFF);
3689			ppc440spe_adma_dma2rxor_inc_addr(
3690				desc, cursor, index, src_cnt);
3691		}
3692		break;
3693	case 2:
3694		cursor->state = 0;
3695		cursor->addrl = addr;
3696		cursor->xor_count++;
3697		if (index) {
3698			ppc440spe_adma_dma2rxor_inc_addr(
3699				desc, cursor, index, src_cnt);
3700		}
3701		break;
3702	}
3703
3704	return rval;
3705}
3706
3707/**
3708 * ppc440spe_adma_dma2rxor_set_src - set RXOR source address; it's assumed that
3709 *	ppc440spe_adma_dma2rxor_prep_src() has already done prior this call
3710 */
3711static void ppc440spe_adma_dma2rxor_set_src(
3712		struct ppc440spe_adma_desc_slot *desc,
3713		int index, dma_addr_t addr)
3714{
3715	struct xor_cb *xcb = desc->hw_desc;
3716	int k = 0, op = 0, lop = 0;
3717
3718	/* get the RXOR operand which corresponds to index addr */
3719	while (op <= index) {
3720		lop = op;
3721		if (k == XOR_MAX_OPS) {
3722			k = 0;
3723			desc = list_entry(desc->chain_node.next,
3724				struct ppc440spe_adma_desc_slot, chain_node);
3725			xcb = desc->hw_desc;
3726
3727		}
3728		if ((xcb->ops[k++].h & (DMA_RXOR12 << DMA_CUED_REGION_OFF)) ==
3729		    (DMA_RXOR12 << DMA_CUED_REGION_OFF))
3730			op += 2;
3731		else
3732			op += 3;
3733	}
3734
3735	BUG_ON(k < 1);
3736
3737	if (test_bit(k-1, desc->reverse_flags)) {
3738		/* reverse operand order; put last op in RXOR group */
3739		if (index == op - 1)
3740			ppc440spe_rxor_set_src(desc, k - 1, addr);
3741	} else {
3742		/* direct operand order; put first op in RXOR group */
3743		if (index == lop)
3744			ppc440spe_rxor_set_src(desc, k - 1, addr);
3745	}
3746}
3747
3748/**
3749 * ppc440spe_adma_dma2rxor_set_mult - set RXOR multipliers; it's assumed that
3750 *	ppc440spe_adma_dma2rxor_prep_src() has already done prior this call
3751 */
3752static void ppc440spe_adma_dma2rxor_set_mult(
3753		struct ppc440spe_adma_desc_slot *desc,
3754		int index, u8 mult)
3755{
3756	struct xor_cb *xcb = desc->hw_desc;
3757	int k = 0, op = 0, lop = 0;
3758
3759	/* get the RXOR operand which corresponds to index mult */
3760	while (op <= index) {
3761		lop = op;
3762		if (k == XOR_MAX_OPS) {
3763			k = 0;
3764			desc = list_entry(desc->chain_node.next,
3765					  struct ppc440spe_adma_desc_slot,
3766					  chain_node);
3767			xcb = desc->hw_desc;
3768
3769		}
3770		if ((xcb->ops[k++].h & (DMA_RXOR12 << DMA_CUED_REGION_OFF)) ==
3771		    (DMA_RXOR12 << DMA_CUED_REGION_OFF))
3772			op += 2;
3773		else
3774			op += 3;
3775	}
3776
3777	BUG_ON(k < 1);
3778	if (test_bit(k-1, desc->reverse_flags)) {
3779		/* reverse order */
3780		ppc440spe_rxor_set_mult(desc, k - 1, op - index - 1, mult);
3781	} else {
3782		/* direct order */
3783		ppc440spe_rxor_set_mult(desc, k - 1, index - lop, mult);
3784	}
3785}
3786
3787/**
3788 * ppc440spe_init_rxor_cursor -
3789 */
3790static void ppc440spe_init_rxor_cursor(struct ppc440spe_rxor *cursor)
3791{
3792	memset(cursor, 0, sizeof(struct ppc440spe_rxor));
3793	cursor->state = 2;
3794}
3795
3796/**
3797 * ppc440spe_adma_pq_set_src_mult - set multiplication coefficient into
3798 * descriptor for the PQXOR operation
3799 */
3800static void ppc440spe_adma_pq_set_src_mult(
3801		struct ppc440spe_adma_desc_slot *sw_desc,
3802		unsigned char mult, int index, int dst_pos)
3803{
3804	struct ppc440spe_adma_chan *chan;
3805	u32 mult_idx, mult_dst;
3806	struct ppc440spe_adma_desc_slot *iter = NULL, *iter1 = NULL;
3807
3808	chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan);
3809
3810	switch (chan->device->id) {
3811	case PPC440SPE_DMA0_ID:
3812	case PPC440SPE_DMA1_ID:
3813		if (test_bit(PPC440SPE_DESC_RXOR, &sw_desc->flags)) {
3814			int region = test_bit(PPC440SPE_DESC_RXOR12,
3815					&sw_desc->flags) ? 2 : 3;
3816
3817			if (index < region) {
3818				/* RXOR multipliers */
3819				iter = ppc440spe_get_group_entry(sw_desc,
3820					sw_desc->dst_cnt - 1);
3821				if (sw_desc->dst_cnt == 2)
3822					iter1 = ppc440spe_get_group_entry(
3823							sw_desc, 0);
3824
3825				mult_idx = DMA_CUED_MULT1_OFF + (index << 3);
3826				mult_dst = DMA_CDB_SG_SRC;
3827			} else {
3828				/* WXOR multiplier */
3829				iter = ppc440spe_get_group_entry(sw_desc,
3830							index - region +
3831							sw_desc->dst_cnt);
3832				mult_idx = DMA_CUED_MULT1_OFF;
3833				mult_dst = dst_pos ? DMA_CDB_SG_DST2 :
3834						     DMA_CDB_SG_DST1;
3835			}
3836		} else {
3837			int znum = 0;
3838
3839			/* WXOR-only;
3840			 * skip first slots with destinations (if ZERO_DST has
3841			 * place)
3842			 */
3843			if (test_bit(PPC440SPE_ZERO_P, &sw_desc->flags))
3844				znum++;
3845			if (test_bit(PPC440SPE_ZERO_Q, &sw_desc->flags))
3846				znum++;
3847
3848			iter = ppc440spe_get_group_entry(sw_desc, index + znum);
3849			mult_idx = DMA_CUED_MULT1_OFF;
3850			mult_dst = dst_pos ? DMA_CDB_SG_DST2 : DMA_CDB_SG_DST1;
3851		}
3852
3853		if (likely(iter)) {
3854			ppc440spe_desc_set_src_mult(iter, chan,
3855				mult_idx, mult_dst, mult);
3856
3857			if (unlikely(iter1)) {
3858				/* if we have two destinations for RXOR, then
3859				 * we've just set Q mult. Set-up P now.
3860				 */
3861				ppc440spe_desc_set_src_mult(iter1, chan,
3862					mult_idx, mult_dst, 1);
3863			}
3864
3865		}
3866		break;
3867
3868	case PPC440SPE_XOR_ID:
3869		iter = sw_desc->group_head;
3870		if (sw_desc->dst_cnt == 2) {
3871			/* both P & Q calculations required; set P mult here */
3872			ppc440spe_adma_dma2rxor_set_mult(iter, index, 1);
3873
3874			/* and then set Q mult */
3875			iter = ppc440spe_get_group_entry(sw_desc,
3876			       sw_desc->descs_per_op);
3877		}
3878		ppc440spe_adma_dma2rxor_set_mult(iter, index, mult);
3879		break;
3880	}
3881}
3882
3883/**
3884 * ppc440spe_adma_free_chan_resources - free the resources allocated
3885 */
3886static void ppc440spe_adma_free_chan_resources(struct dma_chan *chan)
3887{
3888	struct ppc440spe_adma_chan *ppc440spe_chan;
3889	struct ppc440spe_adma_desc_slot *iter, *_iter;
3890	int in_use_descs = 0;
3891
3892	ppc440spe_chan = to_ppc440spe_adma_chan(chan);
3893	ppc440spe_adma_slot_cleanup(ppc440spe_chan);
3894
3895	spin_lock_bh(&ppc440spe_chan->lock);
3896	list_for_each_entry_safe(iter, _iter, &ppc440spe_chan->chain,
3897					chain_node) {
3898		in_use_descs++;
3899		list_del(&iter->chain_node);
3900	}
3901	list_for_each_entry_safe_reverse(iter, _iter,
3902			&ppc440spe_chan->all_slots, slot_node) {
3903		list_del(&iter->slot_node);
3904		kfree(iter);
3905		ppc440spe_chan->slots_allocated--;
3906	}
3907	ppc440spe_chan->last_used = NULL;
3908
3909	dev_dbg(ppc440spe_chan->device->common.dev,
3910		"ppc440spe adma%d %s slots_allocated %d\n",
3911		ppc440spe_chan->device->id,
3912		__func__, ppc440spe_chan->slots_allocated);
3913	spin_unlock_bh(&ppc440spe_chan->lock);
3914
3915	/* one is ok since we left it on there on purpose */
3916	if (in_use_descs > 1)
3917		printk(KERN_ERR "SPE: Freeing %d in use descriptors!\n",
3918			in_use_descs - 1);
3919}
3920
3921/**
3922 * ppc440spe_adma_tx_status - poll the status of an ADMA transaction
3923 * @chan: ADMA channel handle
3924 * @cookie: ADMA transaction identifier
3925 * @txstate: a holder for the current state of the channel
3926 */
3927static enum dma_status ppc440spe_adma_tx_status(struct dma_chan *chan,
3928			dma_cookie_t cookie, struct dma_tx_state *txstate)
3929{
3930	struct ppc440spe_adma_chan *ppc440spe_chan;
3931	enum dma_status ret;
3932
3933	ppc440spe_chan = to_ppc440spe_adma_chan(chan);
3934	ret = dma_cookie_status(chan, cookie, txstate);
3935	if (ret == DMA_SUCCESS)
3936		return ret;
3937
3938	ppc440spe_adma_slot_cleanup(ppc440spe_chan);
3939
3940	return dma_cookie_status(chan, cookie, txstate);
3941}
3942
3943/**
3944 * ppc440spe_adma_eot_handler - end of transfer interrupt handler
3945 */
3946static irqreturn_t ppc440spe_adma_eot_handler(int irq, void *data)
3947{
3948	struct ppc440spe_adma_chan *chan = data;
3949
3950	dev_dbg(chan->device->common.dev,
3951		"ppc440spe adma%d: %s\n", chan->device->id, __func__);
3952
3953	tasklet_schedule(&chan->irq_tasklet);
3954	ppc440spe_adma_device_clear_eot_status(chan);
3955
3956	return IRQ_HANDLED;
3957}
3958
3959/**
3960 * ppc440spe_adma_err_handler - DMA error interrupt handler;
3961 *	do the same things as a eot handler
3962 */
3963static irqreturn_t ppc440spe_adma_err_handler(int irq, void *data)
3964{
3965	struct ppc440spe_adma_chan *chan = data;
3966
3967	dev_dbg(chan->device->common.dev,
3968		"ppc440spe adma%d: %s\n", chan->device->id, __func__);
3969
3970	tasklet_schedule(&chan->irq_tasklet);
3971	ppc440spe_adma_device_clear_eot_status(chan);
3972
3973	return IRQ_HANDLED;
3974}
3975
3976/**
3977 * ppc440spe_test_callback - called when test operation has been done
3978 */
3979static void ppc440spe_test_callback(void *unused)
3980{
3981	complete(&ppc440spe_r6_test_comp);
3982}
3983
3984/**
3985 * ppc440spe_adma_issue_pending - flush all pending descriptors to h/w
3986 */
3987static void ppc440spe_adma_issue_pending(struct dma_chan *chan)
3988{
3989	struct ppc440spe_adma_chan *ppc440spe_chan;
3990
3991	ppc440spe_chan = to_ppc440spe_adma_chan(chan);
3992	dev_dbg(ppc440spe_chan->device->common.dev,
3993		"ppc440spe adma%d: %s %d \n", ppc440spe_chan->device->id,
3994		__func__, ppc440spe_chan->pending);
3995
3996	if (ppc440spe_chan->pending) {
3997		ppc440spe_chan->pending = 0;
3998		ppc440spe_chan_append(ppc440spe_chan);
3999	}
4000}
4001
4002/**
4003 * ppc440spe_chan_start_null_xor - initiate the first XOR operation (DMA engines
4004 *	use FIFOs (as opposite to chains used in XOR) so this is a XOR
4005 *	specific operation)
4006 */
4007static void ppc440spe_chan_start_null_xor(struct ppc440spe_adma_chan *chan)
4008{
4009	struct ppc440spe_adma_desc_slot *sw_desc, *group_start;
4010	dma_cookie_t cookie;
4011	int slot_cnt, slots_per_op;
4012
4013	dev_dbg(chan->device->common.dev,
4014		"ppc440spe adma%d: %s\n", chan->device->id, __func__);
4015
4016	spin_lock_bh(&chan->lock);
4017	slot_cnt = ppc440spe_chan_xor_slot_count(0, 2, &slots_per_op);
4018	sw_desc = ppc440spe_adma_alloc_slots(chan, slot_cnt, slots_per_op);
4019	if (sw_desc) {
4020		group_start = sw_desc->group_head;
4021		list_splice_init(&sw_desc->group_list, &chan->chain);
4022		async_tx_ack(&sw_desc->async_tx);
4023		ppc440spe_desc_init_null_xor(group_start);
4024
4025		cookie = dma_cookie_assign(&sw_desc->async_tx);
4026
4027		/* initialize the completed cookie to be less than
4028		 * the most recently used cookie
4029		 */
4030		chan->common.completed_cookie = cookie - 1;
4031
4032		/* channel should not be busy */
4033		BUG_ON(ppc440spe_chan_is_busy(chan));
4034
4035		/* set the descriptor address */
4036		ppc440spe_chan_set_first_xor_descriptor(chan, sw_desc);
4037
4038		/* run the descriptor */
4039		ppc440spe_chan_run(chan);
4040	} else
4041		printk(KERN_ERR "ppc440spe adma%d"
4042			" failed to allocate null descriptor\n",
4043			chan->device->id);
4044	spin_unlock_bh(&chan->lock);
4045}
4046
4047/**
4048 * ppc440spe_test_raid6 - test are RAID-6 capabilities enabled successfully.
4049 *	For this we just perform one WXOR operation with the same source
4050 *	and destination addresses, the GF-multiplier is 1; so if RAID-6
4051 *	capabilities are enabled then we'll get src/dst filled with zero.
4052 */
4053static int ppc440spe_test_raid6(struct ppc440spe_adma_chan *chan)
4054{
4055	struct ppc440spe_adma_desc_slot *sw_desc, *iter;
4056	struct page *pg;
4057	char *a;
4058	dma_addr_t dma_addr, addrs[2];
4059	unsigned long op = 0;
4060	int rval = 0;
4061
4062	set_bit(PPC440SPE_DESC_WXOR, &op);
4063
4064	pg = alloc_page(GFP_KERNEL);
4065	if (!pg)
4066		return -ENOMEM;
4067
4068	spin_lock_bh(&chan->lock);
4069	sw_desc = ppc440spe_adma_alloc_slots(chan, 1, 1);
4070	if (sw_desc) {
4071		/* 1 src, 1 dsr, int_ena, WXOR */
4072		ppc440spe_desc_init_dma01pq(sw_desc, 1, 1, 1, op);
4073		list_for_each_entry(iter, &sw_desc->group_list, chain_node) {
4074			ppc440spe_desc_set_byte_count(iter, chan, PAGE_SIZE);
4075			iter->unmap_len = PAGE_SIZE;
4076		}
4077	} else {
4078		rval = -EFAULT;
4079		spin_unlock_bh(&chan->lock);
4080		goto exit;
4081	}
4082	spin_unlock_bh(&chan->lock);
4083
4084	/* Fill the test page with ones */
4085	memset(page_address(pg), 0xFF, PAGE_SIZE);
4086	dma_addr = dma_map_page(chan->device->dev, pg, 0,
4087				PAGE_SIZE, DMA_BIDIRECTIONAL);
4088
4089	/* Setup addresses */
4090	ppc440spe_adma_pq_set_src(sw_desc, dma_addr, 0);
4091	ppc440spe_adma_pq_set_src_mult(sw_desc, 1, 0, 0);
4092	addrs[0] = dma_addr;
4093	addrs[1] = 0;
4094	ppc440spe_adma_pq_set_dest(sw_desc, addrs, DMA_PREP_PQ_DISABLE_Q);
4095
4096	async_tx_ack(&sw_desc->async_tx);
4097	sw_desc->async_tx.callback = ppc440spe_test_callback;
4098	sw_desc->async_tx.callback_param = NULL;
4099
4100	init_completion(&ppc440spe_r6_test_comp);
4101
4102	ppc440spe_adma_tx_submit(&sw_desc->async_tx);
4103	ppc440spe_adma_issue_pending(&chan->common);
4104
4105	wait_for_completion(&ppc440spe_r6_test_comp);
4106
4107	/* Now check if the test page is zeroed */
4108	a = page_address(pg);
4109	if ((*(u32 *)a) == 0 && memcmp(a, a+4, PAGE_SIZE-4) == 0) {
4110		/* page is zero - RAID-6 enabled */
4111		rval = 0;
4112	} else {
4113		/* RAID-6 was not enabled */
4114		rval = -EINVAL;
4115	}
4116exit:
4117	__free_page(pg);
4118	return rval;
4119}
4120
4121static void ppc440spe_adma_init_capabilities(struct ppc440spe_adma_device *adev)
4122{
4123	switch (adev->id) {
4124	case PPC440SPE_DMA0_ID:
4125	case PPC440SPE_DMA1_ID:
4126		dma_cap_set(DMA_MEMCPY, adev->common.cap_mask);
4127		dma_cap_set(DMA_INTERRUPT, adev->common.cap_mask);
4128		dma_cap_set(DMA_MEMSET, adev->common.cap_mask);
4129		dma_cap_set(DMA_PQ, adev->common.cap_mask);
4130		dma_cap_set(DMA_PQ_VAL, adev->common.cap_mask);
4131		dma_cap_set(DMA_XOR_VAL, adev->common.cap_mask);
4132		break;
4133	case PPC440SPE_XOR_ID:
4134		dma_cap_set(DMA_XOR, adev->common.cap_mask);
4135		dma_cap_set(DMA_PQ, adev->common.cap_mask);
4136		dma_cap_set(DMA_INTERRUPT, adev->common.cap_mask);
4137		adev->common.cap_mask = adev->common.cap_mask;
4138		break;
4139	}
4140
4141	/* Set base routines */
4142	adev->common.device_alloc_chan_resources =
4143				ppc440spe_adma_alloc_chan_resources;
4144	adev->common.device_free_chan_resources =
4145				ppc440spe_adma_free_chan_resources;
4146	adev->common.device_tx_status = ppc440spe_adma_tx_status;
4147	adev->common.device_issue_pending = ppc440spe_adma_issue_pending;
4148
4149	/* Set prep routines based on capability */
4150	if (dma_has_cap(DMA_MEMCPY, adev->common.cap_mask)) {
4151		adev->common.device_prep_dma_memcpy =
4152			ppc440spe_adma_prep_dma_memcpy;
4153	}
4154	if (dma_has_cap(DMA_MEMSET, adev->common.cap_mask)) {
4155		adev->common.device_prep_dma_memset =
4156			ppc440spe_adma_prep_dma_memset;
4157	}
4158	if (dma_has_cap(DMA_XOR, adev->common.cap_mask)) {
4159		adev->common.max_xor = XOR_MAX_OPS;
4160		adev->common.device_prep_dma_xor =
4161			ppc440spe_adma_prep_dma_xor;
4162	}
4163	if (dma_has_cap(DMA_PQ, adev->common.cap_mask)) {
4164		switch (adev->id) {
4165		case PPC440SPE_DMA0_ID:
4166			dma_set_maxpq(&adev->common,
4167				DMA0_FIFO_SIZE / sizeof(struct dma_cdb), 0);
4168			break;
4169		case PPC440SPE_DMA1_ID:
4170			dma_set_maxpq(&adev->common,
4171				DMA1_FIFO_SIZE / sizeof(struct dma_cdb), 0);
4172			break;
4173		case PPC440SPE_XOR_ID:
4174			adev->common.max_pq = XOR_MAX_OPS * 3;
4175			break;
4176		}
4177		adev->common.device_prep_dma_pq =
4178			ppc440spe_adma_prep_dma_pq;
4179	}
4180	if (dma_has_cap(DMA_PQ_VAL, adev->common.cap_mask)) {
4181		switch (adev->id) {
4182		case PPC440SPE_DMA0_ID:
4183			adev->common.max_pq = DMA0_FIFO_SIZE /
4184						sizeof(struct dma_cdb);
4185			break;
4186		case PPC440SPE_DMA1_ID:
4187			adev->common.max_pq = DMA1_FIFO_SIZE /
4188						sizeof(struct dma_cdb);
4189			break;
4190		}
4191		adev->common.device_prep_dma_pq_val =
4192			ppc440spe_adma_prep_dma_pqzero_sum;
4193	}
4194	if (dma_has_cap(DMA_XOR_VAL, adev->common.cap_mask)) {
4195		switch (adev->id) {
4196		case PPC440SPE_DMA0_ID:
4197			adev->common.max_xor = DMA0_FIFO_SIZE /
4198						sizeof(struct dma_cdb);
4199			break;
4200		case PPC440SPE_DMA1_ID:
4201			adev->common.max_xor = DMA1_FIFO_SIZE /
4202						sizeof(struct dma_cdb);
4203			break;
4204		}
4205		adev->common.device_prep_dma_xor_val =
4206			ppc440spe_adma_prep_dma_xor_zero_sum;
4207	}
4208	if (dma_has_cap(DMA_INTERRUPT, adev->common.cap_mask)) {
4209		adev->common.device_prep_dma_interrupt =
4210			ppc440spe_adma_prep_dma_interrupt;
4211	}
4212	pr_info("%s: AMCC(R) PPC440SP(E) ADMA Engine: "
4213	  "( %s%s%s%s%s%s%s)\n",
4214	  dev_name(adev->dev),
4215	  dma_has_cap(DMA_PQ, adev->common.cap_mask) ? "pq " : "",
4216	  dma_has_cap(DMA_PQ_VAL, adev->common.cap_mask) ? "pq_val " : "",
4217	  dma_has_cap(DMA_XOR, adev->common.cap_mask) ? "xor " : "",
4218	  dma_has_cap(DMA_XOR_VAL, adev->common.cap_mask) ? "xor_val " : "",
4219	  dma_has_cap(DMA_MEMCPY, adev->common.cap_mask) ? "memcpy " : "",
4220	  dma_has_cap(DMA_MEMSET, adev->common.cap_mask)  ? "memset " : "",
4221	  dma_has_cap(DMA_INTERRUPT, adev->common.cap_mask) ? "intr " : "");
4222}
4223
4224static int ppc440spe_adma_setup_irqs(struct ppc440spe_adma_device *adev,
4225				     struct ppc440spe_adma_chan *chan,
4226				     int *initcode)
4227{
4228	struct platform_device *ofdev;
4229	struct device_node *np;
4230	int ret;
4231
4232	ofdev = container_of(adev->dev, struct platform_device, dev);
4233	np = ofdev->dev.of_node;
4234	if (adev->id != PPC440SPE_XOR_ID) {
4235		adev->err_irq = irq_of_parse_and_map(np, 1);
4236		if (adev->err_irq == NO_IRQ) {
4237			dev_warn(adev->dev, "no err irq resource?\n");
4238			*initcode = PPC_ADMA_INIT_IRQ2;
4239			adev->err_irq = -ENXIO;
4240		} else
4241			atomic_inc(&ppc440spe_adma_err_irq_ref);
4242	} else {
4243		adev->err_irq = -ENXIO;
4244	}
4245
4246	adev->irq = irq_of_parse_and_map(np, 0);
4247	if (adev->irq == NO_IRQ) {
4248		dev_err(adev->dev, "no irq resource\n");
4249		*initcode = PPC_ADMA_INIT_IRQ1;
4250		ret = -ENXIO;
4251		goto err_irq_map;
4252	}
4253	dev_dbg(adev->dev, "irq %d, err irq %d\n",
4254		adev->irq, adev->err_irq);
4255
4256	ret = request_irq(adev->irq, ppc440spe_adma_eot_handler,
4257			  0, dev_driver_string(adev->dev), chan);
4258	if (ret) {
4259		dev_err(adev->dev, "can't request irq %d\n",
4260			adev->irq);
4261		*initcode = PPC_ADMA_INIT_IRQ1;
4262		ret = -EIO;
4263		goto err_req1;
4264	}
4265
4266	/* only DMA engines have a separate error IRQ
4267	 * so it's Ok if err_irq < 0 in XOR engine case.
4268	 */
4269	if (adev->err_irq > 0) {
4270		/* both DMA engines share common error IRQ */
4271		ret = request_irq(adev->err_irq,
4272				  ppc440spe_adma_err_handler,
4273				  IRQF_SHARED,
4274				  dev_driver_string(adev->dev),
4275				  chan);
4276		if (ret) {
4277			dev_err(adev->dev, "can't request irq %d\n",
4278				adev->err_irq);
4279			*initcode = PPC_ADMA_INIT_IRQ2;
4280			ret = -EIO;
4281			goto err_req2;
4282		}
4283	}
4284
4285	if (adev->id == PPC440SPE_XOR_ID) {
4286		/* enable XOR engine interrupts */
4287		iowrite32be(XOR_IE_CBCIE_BIT | XOR_IE_ICBIE_BIT |
4288			    XOR_IE_ICIE_BIT | XOR_IE_RPTIE_BIT,
4289			    &adev->xor_reg->ier);
4290	} else {
4291		u32 mask, enable;
4292
4293		np = of_find_compatible_node(NULL, NULL, "ibm,i2o-440spe");
4294		if (!np) {
4295			pr_err("%s: can't find I2O device tree node\n",
4296				__func__);
4297			ret = -ENODEV;
4298			goto err_req2;
4299		}
4300		adev->i2o_reg = of_iomap(np, 0);
4301		if (!adev->i2o_reg) {
4302			pr_err("%s: failed to map I2O registers\n", __func__);
4303			of_node_put(np);
4304			ret = -EINVAL;
4305			goto err_req2;
4306		}
4307		of_node_put(np);
4308		/* Unmask 'CS FIFO Attention' interrupts and
4309		 * enable generating interrupts on errors
4310		 */
4311		enable = (adev->id == PPC440SPE_DMA0_ID) ?
4312			 ~(I2O_IOPIM_P0SNE | I2O_IOPIM_P0EM) :
4313			 ~(I2O_IOPIM_P1SNE | I2O_IOPIM_P1EM);
4314		mask = ioread32(&adev->i2o_reg->iopim) & enable;
4315		iowrite32(mask, &adev->i2o_reg->iopim);
4316	}
4317	return 0;
4318
4319err_req2:
4320	free_irq(adev->irq, chan);
4321err_req1:
4322	irq_dispose_mapping(adev->irq);
4323err_irq_map:
4324	if (adev->err_irq > 0) {
4325		if (atomic_dec_and_test(&ppc440spe_adma_err_irq_ref))
4326			irq_dispose_mapping(adev->err_irq);
4327	}
4328	return ret;
4329}
4330
4331static void ppc440spe_adma_release_irqs(struct ppc440spe_adma_device *adev,
4332					struct ppc440spe_adma_chan *chan)
4333{
4334	u32 mask, disable;
4335
4336	if (adev->id == PPC440SPE_XOR_ID) {
4337		/* disable XOR engine interrupts */
4338		mask = ioread32be(&adev->xor_reg->ier);
4339		mask &= ~(XOR_IE_CBCIE_BIT | XOR_IE_ICBIE_BIT |
4340			  XOR_IE_ICIE_BIT | XOR_IE_RPTIE_BIT);
4341		iowrite32be(mask, &adev->xor_reg->ier);
4342	} else {
4343		/* disable DMAx engine interrupts */
4344		disable = (adev->id == PPC440SPE_DMA0_ID) ?
4345			  (I2O_IOPIM_P0SNE | I2O_IOPIM_P0EM) :
4346			  (I2O_IOPIM_P1SNE | I2O_IOPIM_P1EM);
4347		mask = ioread32(&adev->i2o_reg->iopim) | disable;
4348		iowrite32(mask, &adev->i2o_reg->iopim);
4349	}
4350	free_irq(adev->irq, chan);
4351	irq_dispose_mapping(adev->irq);
4352	if (adev->err_irq > 0) {
4353		free_irq(adev->err_irq, chan);
4354		if (atomic_dec_and_test(&ppc440spe_adma_err_irq_ref)) {
4355			irq_dispose_mapping(adev->err_irq);
4356			iounmap(adev->i2o_reg);
4357		}
4358	}
4359}
4360
4361/**
4362 * ppc440spe_adma_probe - probe the asynch device
4363 */
4364static int __devinit ppc440spe_adma_probe(struct platform_device *ofdev)
4365{
4366	struct device_node *np = ofdev->dev.of_node;
4367	struct resource res;
4368	struct ppc440spe_adma_device *adev;
4369	struct ppc440spe_adma_chan *chan;
4370	struct ppc_dma_chan_ref *ref, *_ref;
4371	int ret = 0, initcode = PPC_ADMA_INIT_OK;
4372	const u32 *idx;
4373	int len;
4374	void *regs;
4375	u32 id, pool_size;
4376
4377	if (of_device_is_compatible(np, "amcc,xor-accelerator")) {
4378		id = PPC440SPE_XOR_ID;
4379		/* As far as the XOR engine is concerned, it does not
4380		 * use FIFOs but uses linked list. So there is no dependency
4381		 * between pool size to allocate and the engine configuration.
4382		 */
4383		pool_size = PAGE_SIZE << 1;
4384	} else {
4385		/* it is DMA0 or DMA1 */
4386		idx = of_get_property(np, "cell-index", &len);
4387		if (!idx || (len != sizeof(u32))) {
4388			dev_err(&ofdev->dev, "Device node %s has missing "
4389				"or invalid cell-index property\n",
4390				np->full_name);
4391			return -EINVAL;
4392		}
4393		id = *idx;
4394		/* DMA0,1 engines use FIFO to maintain CDBs, so we
4395		 * should allocate the pool accordingly to size of this
4396		 * FIFO. Thus, the pool size depends on the FIFO depth:
4397		 * how much CDBs pointers the FIFO may contain then so
4398		 * much CDBs we should provide in the pool.
4399		 * That is
4400		 *   CDB size = 32B;
4401		 *   CDBs number = (DMA0_FIFO_SIZE >> 3);
4402		 *   Pool size = CDBs number * CDB size =
4403		 *      = (DMA0_FIFO_SIZE >> 3) << 5 = DMA0_FIFO_SIZE << 2.
4404		 */
4405		pool_size = (id == PPC440SPE_DMA0_ID) ?
4406			    DMA0_FIFO_SIZE : DMA1_FIFO_SIZE;
4407		pool_size <<= 2;
4408	}
4409
4410	if (of_address_to_resource(np, 0, &res)) {
4411		dev_err(&ofdev->dev, "failed to get memory resource\n");
4412		initcode = PPC_ADMA_INIT_MEMRES;
4413		ret = -ENODEV;
4414		goto out;
4415	}
4416
4417	if (!request_mem_region(res.start, resource_size(&res),
4418				dev_driver_string(&ofdev->dev))) {
4419		dev_err(&ofdev->dev, "failed to request memory region %pR\n",
4420			&res);
4421		initcode = PPC_ADMA_INIT_MEMREG;
4422		ret = -EBUSY;
4423		goto out;
4424	}
4425
4426	/* create a device */
4427	adev = kzalloc(sizeof(*adev), GFP_KERNEL);
4428	if (!adev) {
4429		dev_err(&ofdev->dev, "failed to allocate device\n");
4430		initcode = PPC_ADMA_INIT_ALLOC;
4431		ret = -ENOMEM;
4432		goto err_adev_alloc;
4433	}
4434
4435	adev->id = id;
4436	adev->pool_size = pool_size;
4437	/* allocate coherent memory for hardware descriptors */
4438	adev->dma_desc_pool_virt = dma_alloc_coherent(&ofdev->dev,
4439					adev->pool_size, &adev->dma_desc_pool,
4440					GFP_KERNEL);
4441	if (adev->dma_desc_pool_virt == NULL) {
4442		dev_err(&ofdev->dev, "failed to allocate %d bytes of coherent "
4443			"memory for hardware descriptors\n",
4444			adev->pool_size);
4445		initcode = PPC_ADMA_INIT_COHERENT;
4446		ret = -ENOMEM;
4447		goto err_dma_alloc;
4448	}
4449	dev_dbg(&ofdev->dev, "allocted descriptor pool virt 0x%p phys 0x%llx\n",
4450		adev->dma_desc_pool_virt, (u64)adev->dma_desc_pool);
4451
4452	regs = ioremap(res.start, resource_size(&res));
4453	if (!regs) {
4454		dev_err(&ofdev->dev, "failed to ioremap regs!\n");
4455		goto err_regs_alloc;
4456	}
4457
4458	if (adev->id == PPC440SPE_XOR_ID) {
4459		adev->xor_reg = regs;
4460		/* Reset XOR */
4461		iowrite32be(XOR_CRSR_XASR_BIT, &adev->xor_reg->crsr);
4462		iowrite32be(XOR_CRSR_64BA_BIT, &adev->xor_reg->crrr);
4463	} else {
4464		size_t fifo_size = (adev->id == PPC440SPE_DMA0_ID) ?
4465				   DMA0_FIFO_SIZE : DMA1_FIFO_SIZE;
4466		adev->dma_reg = regs;
4467		/* DMAx_FIFO_SIZE is defined in bytes,
4468		 * <fsiz> - is defined in number of CDB pointers (8byte).
4469		 * DMA FIFO Length = CSlength + CPlength, where
4470		 * CSlength = CPlength = (fsiz + 1) * 8.
4471		 */
4472		iowrite32(DMA_FIFO_ENABLE | ((fifo_size >> 3) - 2),
4473			  &adev->dma_reg->fsiz);
4474		/* Configure DMA engine */
4475		iowrite32(DMA_CFG_DXEPR_HP | DMA_CFG_DFMPP_HP | DMA_CFG_FALGN,
4476			  &adev->dma_reg->cfg);
4477		/* Clear Status */
4478		iowrite32(~0, &adev->dma_reg->dsts);
4479	}
4480
4481	adev->dev = &ofdev->dev;
4482	adev->common.dev = &ofdev->dev;
4483	INIT_LIST_HEAD(&adev->common.channels);
4484	dev_set_drvdata(&ofdev->dev, adev);
4485
4486	/* create a channel */
4487	chan = kzalloc(sizeof(*chan), GFP_KERNEL);
4488	if (!chan) {
4489		dev_err(&ofdev->dev, "can't allocate channel structure\n");
4490		initcode = PPC_ADMA_INIT_CHANNEL;
4491		ret = -ENOMEM;
4492		goto err_chan_alloc;
4493	}
4494
4495	spin_lock_init(&chan->lock);
4496	INIT_LIST_HEAD(&chan->chain);
4497	INIT_LIST_HEAD(&chan->all_slots);
4498	chan->device = adev;
4499	chan->common.device = &adev->common;
4500	dma_cookie_init(&chan->common);
4501	list_add_tail(&chan->common.device_node, &adev->common.channels);
4502	tasklet_init(&chan->irq_tasklet, ppc440spe_adma_tasklet,
4503		     (unsigned long)chan);
4504
4505	/* allocate and map helper pages for async validation or
4506	 * async_mult/async_sum_product operations on DMA0/1.
4507	 */
4508	if (adev->id != PPC440SPE_XOR_ID) {
4509		chan->pdest_page = alloc_page(GFP_KERNEL);
4510		chan->qdest_page = alloc_page(GFP_KERNEL);
4511		if (!chan->pdest_page ||
4512		    !chan->qdest_page) {
4513			if (chan->pdest_page)
4514				__free_page(chan->pdest_page);
4515			if (chan->qdest_page)
4516				__free_page(chan->qdest_page);
4517			ret = -ENOMEM;
4518			goto err_page_alloc;
4519		}
4520		chan->pdest = dma_map_page(&ofdev->dev, chan->pdest_page, 0,
4521					   PAGE_SIZE, DMA_BIDIRECTIONAL);
4522		chan->qdest = dma_map_page(&ofdev->dev, chan->qdest_page, 0,
4523					   PAGE_SIZE, DMA_BIDIRECTIONAL);
4524	}
4525
4526	ref = kmalloc(sizeof(*ref), GFP_KERNEL);
4527	if (ref) {
4528		ref->chan = &chan->common;
4529		INIT_LIST_HEAD(&ref->node);
4530		list_add_tail(&ref->node, &ppc440spe_adma_chan_list);
4531	} else {
4532		dev_err(&ofdev->dev, "failed to allocate channel reference!\n");
4533		ret = -ENOMEM;
4534		goto err_ref_alloc;
4535	}
4536
4537	ret = ppc440spe_adma_setup_irqs(adev, chan, &initcode);
4538	if (ret)
4539		goto err_irq;
4540
4541	ppc440spe_adma_init_capabilities(adev);
4542
4543	ret = dma_async_device_register(&adev->common);
4544	if (ret) {
4545		initcode = PPC_ADMA_INIT_REGISTER;
4546		dev_err(&ofdev->dev, "failed to register dma device\n");
4547		goto err_dev_reg;
4548	}
4549
4550	goto out;
4551
4552err_dev_reg:
4553	ppc440spe_adma_release_irqs(adev, chan);
4554err_irq:
4555	list_for_each_entry_safe(ref, _ref, &ppc440spe_adma_chan_list, node) {
4556		if (chan == to_ppc440spe_adma_chan(ref->chan)) {
4557			list_del(&ref->node);
4558			kfree(ref);
4559		}
4560	}
4561err_ref_alloc:
4562	if (adev->id != PPC440SPE_XOR_ID) {
4563		dma_unmap_page(&ofdev->dev, chan->pdest,
4564			       PAGE_SIZE, DMA_BIDIRECTIONAL);
4565		dma_unmap_page(&ofdev->dev, chan->qdest,
4566			       PAGE_SIZE, DMA_BIDIRECTIONAL);
4567		__free_page(chan->pdest_page);
4568		__free_page(chan->qdest_page);
4569	}
4570err_page_alloc:
4571	kfree(chan);
4572err_chan_alloc:
4573	if (adev->id == PPC440SPE_XOR_ID)
4574		iounmap(adev->xor_reg);
4575	else
4576		iounmap(adev->dma_reg);
4577err_regs_alloc:
4578	dma_free_coherent(adev->dev, adev->pool_size,
4579			  adev->dma_desc_pool_virt,
4580			  adev->dma_desc_pool);
4581err_dma_alloc:
4582	kfree(adev);
4583err_adev_alloc:
4584	release_mem_region(res.start, resource_size(&res));
4585out:
4586	if (id < PPC440SPE_ADMA_ENGINES_NUM)
4587		ppc440spe_adma_devices[id] = initcode;
4588
4589	return ret;
4590}
4591
4592/**
4593 * ppc440spe_adma_remove - remove the asynch device
4594 */
4595static int __devexit ppc440spe_adma_remove(struct platform_device *ofdev)
4596{
4597	struct ppc440spe_adma_device *adev = dev_get_drvdata(&ofdev->dev);
4598	struct device_node *np = ofdev->dev.of_node;
4599	struct resource res;
4600	struct dma_chan *chan, *_chan;
4601	struct ppc_dma_chan_ref *ref, *_ref;
4602	struct ppc440spe_adma_chan *ppc440spe_chan;
4603
4604	dev_set_drvdata(&ofdev->dev, NULL);
4605	if (adev->id < PPC440SPE_ADMA_ENGINES_NUM)
4606		ppc440spe_adma_devices[adev->id] = -1;
4607
4608	dma_async_device_unregister(&adev->common);
4609
4610	list_for_each_entry_safe(chan, _chan, &adev->common.channels,
4611				 device_node) {
4612		ppc440spe_chan = to_ppc440spe_adma_chan(chan);
4613		ppc440spe_adma_release_irqs(adev, ppc440spe_chan);
4614		tasklet_kill(&ppc440spe_chan->irq_tasklet);
4615		if (adev->id != PPC440SPE_XOR_ID) {
4616			dma_unmap_page(&ofdev->dev, ppc440spe_chan->pdest,
4617					PAGE_SIZE, DMA_BIDIRECTIONAL);
4618			dma_unmap_page(&ofdev->dev, ppc440spe_chan->qdest,
4619					PAGE_SIZE, DMA_BIDIRECTIONAL);
4620			__free_page(ppc440spe_chan->pdest_page);
4621			__free_page(ppc440spe_chan->qdest_page);
4622		}
4623		list_for_each_entry_safe(ref, _ref, &ppc440spe_adma_chan_list,
4624					 node) {
4625			if (ppc440spe_chan ==
4626			    to_ppc440spe_adma_chan(ref->chan)) {
4627				list_del(&ref->node);
4628				kfree(ref);
4629			}
4630		}
4631		list_del(&chan->device_node);
4632		kfree(ppc440spe_chan);
4633	}
4634
4635	dma_free_coherent(adev->dev, adev->pool_size,
4636			  adev->dma_desc_pool_virt, adev->dma_desc_pool);
4637	if (adev->id == PPC440SPE_XOR_ID)
4638		iounmap(adev->xor_reg);
4639	else
4640		iounmap(adev->dma_reg);
4641	of_address_to_resource(np, 0, &res);
4642	release_mem_region(res.start, resource_size(&res));
4643	kfree(adev);
4644	return 0;
4645}
4646
4647/*
4648 * /sys driver interface to enable h/w RAID-6 capabilities
4649 * Files created in e.g. /sys/devices/plb.0/400100100.dma0/driver/
4650 * directory are "devices", "enable" and "poly".
4651 * "devices" shows available engines.
4652 * "enable" is used to enable RAID-6 capabilities or to check
4653 * whether these has been activated.
4654 * "poly" allows setting/checking used polynomial (for PPC440SPe only).
4655 */
4656
4657static ssize_t show_ppc440spe_devices(struct device_driver *dev, char *buf)
4658{
4659	ssize_t size = 0;
4660	int i;
4661
4662	for (i = 0; i < PPC440SPE_ADMA_ENGINES_NUM; i++) {
4663		if (ppc440spe_adma_devices[i] == -1)
4664			continue;
4665		size += snprintf(buf + size, PAGE_SIZE - size,
4666				 "PPC440SP(E)-ADMA.%d: %s\n", i,
4667				 ppc_adma_errors[ppc440spe_adma_devices[i]]);
4668	}
4669	return size;
4670}
4671
4672static ssize_t show_ppc440spe_r6enable(struct device_driver *dev, char *buf)
4673{
4674	return snprintf(buf, PAGE_SIZE,
4675			"PPC440SP(e) RAID-6 capabilities are %sABLED.\n",
4676			ppc440spe_r6_enabled ? "EN" : "DIS");
4677}
4678
4679static ssize_t store_ppc440spe_r6enable(struct device_driver *dev,
4680					const char *buf, size_t count)
4681{
4682	unsigned long val;
4683
4684	if (!count || count > 11)
4685		return -EINVAL;
4686
4687	if (!ppc440spe_r6_tchan)
4688		return -EFAULT;
4689
4690	/* Write a key */
4691	sscanf(buf, "%lx", &val);
4692	dcr_write(ppc440spe_mq_dcr_host, DCRN_MQ0_XORBA, val);
4693	isync();
4694
4695	/* Verify whether it really works now */
4696	if (ppc440spe_test_raid6(ppc440spe_r6_tchan) == 0) {
4697		pr_info("PPC440SP(e) RAID-6 has been activated "
4698			"successfully\n");
4699		ppc440spe_r6_enabled = 1;
4700	} else {
4701		pr_info("PPC440SP(e) RAID-6 hasn't been activated!"
4702			" Error key ?\n");
4703		ppc440spe_r6_enabled = 0;
4704	}
4705	return count;
4706}
4707
4708static ssize_t show_ppc440spe_r6poly(struct device_driver *dev, char *buf)
4709{
4710	ssize_t size = 0;
4711	u32 reg;
4712
4713#ifdef CONFIG_440SP
4714	/* 440SP has fixed polynomial */
4715	reg = 0x4d;
4716#else
4717	reg = dcr_read(ppc440spe_mq_dcr_host, DCRN_MQ0_CFBHL);
4718	reg >>= MQ0_CFBHL_POLY;
4719	reg &= 0xFF;
4720#endif
4721
4722	size = snprintf(buf, PAGE_SIZE, "PPC440SP(e) RAID-6 driver "
4723			"uses 0x1%02x polynomial.\n", reg);
4724	return size;
4725}
4726
4727static ssize_t store_ppc440spe_r6poly(struct device_driver *dev,
4728				      const char *buf, size_t count)
4729{
4730	unsigned long reg, val;
4731
4732#ifdef CONFIG_440SP
4733	/* 440SP uses default 0x14D polynomial only */
4734	return -EINVAL;
4735#endif
4736
4737	if (!count || count > 6)
4738		return -EINVAL;
4739
4740	/* e.g., 0x14D or 0x11D */
4741	sscanf(buf, "%lx", &val);
4742
4743	if (val & ~0x1FF)
4744		return -EINVAL;
4745
4746	val &= 0xFF;
4747	reg = dcr_read(ppc440spe_mq_dcr_host, DCRN_MQ0_CFBHL);
4748	reg &= ~(0xFF << MQ0_CFBHL_POLY);
4749	reg |= val << MQ0_CFBHL_POLY;
4750	dcr_write(ppc440spe_mq_dcr_host, DCRN_MQ0_CFBHL, reg);
4751
4752	return count;
4753}
4754
4755static DRIVER_ATTR(devices, S_IRUGO, show_ppc440spe_devices, NULL);
4756static DRIVER_ATTR(enable, S_IRUGO | S_IWUSR, show_ppc440spe_r6enable,
4757		   store_ppc440spe_r6enable);
4758static DRIVER_ATTR(poly, S_IRUGO | S_IWUSR, show_ppc440spe_r6poly,
4759		   store_ppc440spe_r6poly);
4760
4761/*
4762 * Common initialisation for RAID engines; allocate memory for
4763 * DMAx FIFOs, perform configuration common for all DMA engines.
4764 * Further DMA engine specific configuration is done at probe time.
4765 */
4766static int ppc440spe_configure_raid_devices(void)
4767{
4768	struct device_node *np;
4769	struct resource i2o_res;
4770	struct i2o_regs __iomem *i2o_reg;
4771	dcr_host_t i2o_dcr_host;
4772	unsigned int dcr_base, dcr_len;
4773	int i, ret;
4774
4775	np = of_find_compatible_node(NULL, NULL, "ibm,i2o-440spe");
4776	if (!np) {
4777		pr_err("%s: can't find I2O device tree node\n",
4778			__func__);
4779		return -ENODEV;
4780	}
4781
4782	if (of_address_to_resource(np, 0, &i2o_res)) {
4783		of_node_put(np);
4784		return -EINVAL;
4785	}
4786
4787	i2o_reg = of_iomap(np, 0);
4788	if (!i2o_reg) {
4789		pr_err("%s: failed to map I2O registers\n", __func__);
4790		of_node_put(np);
4791		return -EINVAL;
4792	}
4793
4794	/* Get I2O DCRs base */
4795	dcr_base = dcr_resource_start(np, 0);
4796	dcr_len = dcr_resource_len(np, 0);
4797	if (!dcr_base && !dcr_len) {
4798		pr_err("%s: can't get DCR registers base/len!\n",
4799			np->full_name);
4800		of_node_put(np);
4801		iounmap(i2o_reg);
4802		return -ENODEV;
4803	}
4804
4805	i2o_dcr_host = dcr_map(np, dcr_base, dcr_len);
4806	if (!DCR_MAP_OK(i2o_dcr_host)) {
4807		pr_err("%s: failed to map DCRs!\n", np->full_name);
4808		of_node_put(np);
4809		iounmap(i2o_reg);
4810		return -ENODEV;
4811	}
4812	of_node_put(np);
4813
4814	/* Provide memory regions for DMA's FIFOs: I2O, DMA0 and DMA1 share
4815	 * the base address of FIFO memory space.
4816	 * Actually we need twice more physical memory than programmed in the
4817	 * <fsiz> register (because there are two FIFOs for each DMA: CP and CS)
4818	 */
4819	ppc440spe_dma_fifo_buf = kmalloc((DMA0_FIFO_SIZE + DMA1_FIFO_SIZE) << 1,
4820					 GFP_KERNEL);
4821	if (!ppc440spe_dma_fifo_buf) {
4822		pr_err("%s: DMA FIFO buffer allocation failed.\n", __func__);
4823		iounmap(i2o_reg);
4824		dcr_unmap(i2o_dcr_host, dcr_len);
4825		return -ENOMEM;
4826	}
4827
4828	/*
4829	 * Configure h/w
4830	 */
4831	/* Reset I2O/DMA */
4832	mtdcri(SDR0, DCRN_SDR0_SRST, DCRN_SDR0_SRST_I2ODMA);
4833	mtdcri(SDR0, DCRN_SDR0_SRST, 0);
4834
4835	/* Setup the base address of mmaped registers */
4836	dcr_write(i2o_dcr_host, DCRN_I2O0_IBAH, (u32)(i2o_res.start >> 32));
4837	dcr_write(i2o_dcr_host, DCRN_I2O0_IBAL, (u32)(i2o_res.start) |
4838						I2O_REG_ENABLE);
4839	dcr_unmap(i2o_dcr_host, dcr_len);
4840
4841	/* Setup FIFO memory space base address */
4842	iowrite32(0, &i2o_reg->ifbah);
4843	iowrite32(((u32)__pa(ppc440spe_dma_fifo_buf)), &i2o_reg->ifbal);
4844
4845	/* set zero FIFO size for I2O, so the whole
4846	 * ppc440spe_dma_fifo_buf is used by DMAs.
4847	 * DMAx_FIFOs will be configured while probe.
4848	 */
4849	iowrite32(0, &i2o_reg->ifsiz);
4850	iounmap(i2o_reg);
4851
4852	/* To prepare WXOR/RXOR functionality we need access to
4853	 * Memory Queue Module DCRs (finally it will be enabled
4854	 * via /sys interface of the ppc440spe ADMA driver).
4855	 */
4856	np = of_find_compatible_node(NULL, NULL, "ibm,mq-440spe");
4857	if (!np) {
4858		pr_err("%s: can't find MQ device tree node\n",
4859			__func__);
4860		ret = -ENODEV;
4861		goto out_free;
4862	}
4863
4864	/* Get MQ DCRs base */
4865	dcr_base = dcr_resource_start(np, 0);
4866	dcr_len = dcr_resource_len(np, 0);
4867	if (!dcr_base && !dcr_len) {
4868		pr_err("%s: can't get DCR registers base/len!\n",
4869			np->full_name);
4870		ret = -ENODEV;
4871		goto out_mq;
4872	}
4873
4874	ppc440spe_mq_dcr_host = dcr_map(np, dcr_base, dcr_len);
4875	if (!DCR_MAP_OK(ppc440spe_mq_dcr_host)) {
4876		pr_err("%s: failed to map DCRs!\n", np->full_name);
4877		ret = -ENODEV;
4878		goto out_mq;
4879	}
4880	of_node_put(np);
4881	ppc440spe_mq_dcr_len = dcr_len;
4882
4883	/* Set HB alias */
4884	dcr_write(ppc440spe_mq_dcr_host, DCRN_MQ0_BAUH, DMA_CUED_XOR_HB);
4885
4886	/* Set:
4887	 * - LL transaction passing limit to 1;
4888	 * - Memory controller cycle limit to 1;
4889	 * - Galois Polynomial to 0x14d (default)
4890	 */
4891	dcr_write(ppc440spe_mq_dcr_host, DCRN_MQ0_CFBHL,
4892		  (1 << MQ0_CFBHL_TPLM) | (1 << MQ0_CFBHL_HBCL) |
4893		  (PPC440SPE_DEFAULT_POLY << MQ0_CFBHL_POLY));
4894
4895	atomic_set(&ppc440spe_adma_err_irq_ref, 0);
4896	for (i = 0; i < PPC440SPE_ADMA_ENGINES_NUM; i++)
4897		ppc440spe_adma_devices[i] = -1;
4898
4899	return 0;
4900
4901out_mq:
4902	of_node_put(np);
4903out_free:
4904	kfree(ppc440spe_dma_fifo_buf);
4905	return ret;
4906}
4907
4908static const struct of_device_id ppc440spe_adma_of_match[] __devinitconst = {
4909	{ .compatible	= "ibm,dma-440spe", },
4910	{ .compatible	= "amcc,xor-accelerator", },
4911	{},
4912};
4913MODULE_DEVICE_TABLE(of, ppc440spe_adma_of_match);
4914
4915static struct platform_driver ppc440spe_adma_driver = {
4916	.probe = ppc440spe_adma_probe,
4917	.remove = __devexit_p(ppc440spe_adma_remove),
4918	.driver = {
4919		.name = "PPC440SP(E)-ADMA",
4920		.owner = THIS_MODULE,
4921		.of_match_table = ppc440spe_adma_of_match,
4922	},
4923};
4924
4925static __init int ppc440spe_adma_init(void)
4926{
4927	int ret;
4928
4929	ret = ppc440spe_configure_raid_devices();
4930	if (ret)
4931		return ret;
4932
4933	ret = platform_driver_register(&ppc440spe_adma_driver);
4934	if (ret) {
4935		pr_err("%s: failed to register platform driver\n",
4936			__func__);
4937		goto out_reg;
4938	}
4939
4940	/* Initialization status */
4941	ret = driver_create_file(&ppc440spe_adma_driver.driver,
4942				 &driver_attr_devices);
4943	if (ret)
4944		goto out_dev;
4945
4946	/* RAID-6 h/w enable entry */
4947	ret = driver_create_file(&ppc440spe_adma_driver.driver,
4948				 &driver_attr_enable);
4949	if (ret)
4950		goto out_en;
4951
4952	/* GF polynomial to use */
4953	ret = driver_create_file(&ppc440spe_adma_driver.driver,
4954				 &driver_attr_poly);
4955	if (!ret)
4956		return ret;
4957
4958	driver_remove_file(&ppc440spe_adma_driver.driver,
4959			   &driver_attr_enable);
4960out_en:
4961	driver_remove_file(&ppc440spe_adma_driver.driver,
4962			   &driver_attr_devices);
4963out_dev:
4964	/* User will not be able to enable h/w RAID-6 */
4965	pr_err("%s: failed to create RAID-6 driver interface\n",
4966		__func__);
4967	platform_driver_unregister(&ppc440spe_adma_driver);
4968out_reg:
4969	dcr_unmap(ppc440spe_mq_dcr_host, ppc440spe_mq_dcr_len);
4970	kfree(ppc440spe_dma_fifo_buf);
4971	return ret;
4972}
4973
4974static void __exit ppc440spe_adma_exit(void)
4975{
4976	driver_remove_file(&ppc440spe_adma_driver.driver,
4977			   &driver_attr_poly);
4978	driver_remove_file(&ppc440spe_adma_driver.driver,
4979			   &driver_attr_enable);
4980	driver_remove_file(&ppc440spe_adma_driver.driver,
4981			   &driver_attr_devices);
4982	platform_driver_unregister(&ppc440spe_adma_driver);
4983	dcr_unmap(ppc440spe_mq_dcr_host, ppc440spe_mq_dcr_len);
4984	kfree(ppc440spe_dma_fifo_buf);
4985}
4986
4987arch_initcall(ppc440spe_adma_init);
4988module_exit(ppc440spe_adma_exit);
4989
4990MODULE_AUTHOR("Yuri Tikhonov <yur@emcraft.com>");
4991MODULE_DESCRIPTION("PPC440SPE ADMA Engine Driver");
4992MODULE_LICENSE("GPL");