Linux Audio

Check our new training course

Loading...
Note: File does not exist in v6.8.
   1/*
   2 * This file is provided under a dual BSD/GPLv2 license.  When using or
   3 * redistributing this file, you may do so under either license.
   4 *
   5 * GPL LICENSE SUMMARY
   6 *
   7 * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
   8 *
   9 * This program is free software; you can redistribute it and/or modify it
  10 * under the terms and conditions of the GNU General Public License,
  11 * version 2, as published by the Free Software Foundation.
  12 *
  13 * This program is distributed in the hope that it will be useful, but WITHOUT
  14 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  16 * more details.
  17 *
  18 * You should have received a copy of the GNU General Public License along with
  19 * this program; if not, write to the Free Software Foundation, Inc.,
  20 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
  21 *
  22 * The full GNU General Public License is included in this distribution in
  23 * the file called "COPYING".
  24 *
  25 * BSD LICENSE
  26 *
  27 * Copyright(c) 2004-2009 Intel Corporation. All rights reserved.
  28 *
  29 * Redistribution and use in source and binary forms, with or without
  30 * modification, are permitted provided that the following conditions are met:
  31 *
  32 *   * Redistributions of source code must retain the above copyright
  33 *     notice, this list of conditions and the following disclaimer.
  34 *   * Redistributions in binary form must reproduce the above copyright
  35 *     notice, this list of conditions and the following disclaimer in
  36 *     the documentation and/or other materials provided with the
  37 *     distribution.
  38 *   * Neither the name of Intel Corporation nor the names of its
  39 *     contributors may be used to endorse or promote products derived
  40 *     from this software without specific prior written permission.
  41 *
  42 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  43 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  44 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  45 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  46 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  47 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  48 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  49 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  50 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  51 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  52 * POSSIBILITY OF SUCH DAMAGE.
  53 */
  54
  55/*
  56 * Support routines for v3+ hardware
  57 */
  58
  59#include <linux/pci.h>
  60#include <linux/gfp.h>
  61#include <linux/dmaengine.h>
  62#include <linux/dma-mapping.h>
  63#include <linux/prefetch.h>
  64#include "registers.h"
  65#include "hw.h"
  66#include "dma.h"
  67#include "dma_v2.h"
  68
  69/* ioat hardware assumes at least two sources for raid operations */
  70#define src_cnt_to_sw(x) ((x) + 2)
  71#define src_cnt_to_hw(x) ((x) - 2)
  72
  73/* provide a lookup table for setting the source address in the base or
  74 * extended descriptor of an xor or pq descriptor
  75 */
  76static const u8 xor_idx_to_desc = 0xe0;
  77static const u8 xor_idx_to_field[] = { 1, 4, 5, 6, 7, 0, 1, 2 };
  78static const u8 pq_idx_to_desc = 0xf8;
  79static const u8 pq_idx_to_field[] = { 1, 4, 5, 0, 1, 2, 4, 5 };
  80
  81static dma_addr_t xor_get_src(struct ioat_raw_descriptor *descs[2], int idx)
  82{
  83	struct ioat_raw_descriptor *raw = descs[xor_idx_to_desc >> idx & 1];
  84
  85	return raw->field[xor_idx_to_field[idx]];
  86}
  87
  88static void xor_set_src(struct ioat_raw_descriptor *descs[2],
  89			dma_addr_t addr, u32 offset, int idx)
  90{
  91	struct ioat_raw_descriptor *raw = descs[xor_idx_to_desc >> idx & 1];
  92
  93	raw->field[xor_idx_to_field[idx]] = addr + offset;
  94}
  95
  96static dma_addr_t pq_get_src(struct ioat_raw_descriptor *descs[2], int idx)
  97{
  98	struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1];
  99
 100	return raw->field[pq_idx_to_field[idx]];
 101}
 102
 103static void pq_set_src(struct ioat_raw_descriptor *descs[2],
 104		       dma_addr_t addr, u32 offset, u8 coef, int idx)
 105{
 106	struct ioat_pq_descriptor *pq = (struct ioat_pq_descriptor *) descs[0];
 107	struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1];
 108
 109	raw->field[pq_idx_to_field[idx]] = addr + offset;
 110	pq->coef[idx] = coef;
 111}
 112
 113static void ioat3_dma_unmap(struct ioat2_dma_chan *ioat,
 114			    struct ioat_ring_ent *desc, int idx)
 115{
 116	struct ioat_chan_common *chan = &ioat->base;
 117	struct pci_dev *pdev = chan->device->pdev;
 118	size_t len = desc->len;
 119	size_t offset = len - desc->hw->size;
 120	struct dma_async_tx_descriptor *tx = &desc->txd;
 121	enum dma_ctrl_flags flags = tx->flags;
 122
 123	switch (desc->hw->ctl_f.op) {
 124	case IOAT_OP_COPY:
 125		if (!desc->hw->ctl_f.null) /* skip 'interrupt' ops */
 126			ioat_dma_unmap(chan, flags, len, desc->hw);
 127		break;
 128	case IOAT_OP_FILL: {
 129		struct ioat_fill_descriptor *hw = desc->fill;
 130
 131		if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP))
 132			ioat_unmap(pdev, hw->dst_addr - offset, len,
 133				   PCI_DMA_FROMDEVICE, flags, 1);
 134		break;
 135	}
 136	case IOAT_OP_XOR_VAL:
 137	case IOAT_OP_XOR: {
 138		struct ioat_xor_descriptor *xor = desc->xor;
 139		struct ioat_ring_ent *ext;
 140		struct ioat_xor_ext_descriptor *xor_ex = NULL;
 141		int src_cnt = src_cnt_to_sw(xor->ctl_f.src_cnt);
 142		struct ioat_raw_descriptor *descs[2];
 143		int i;
 144
 145		if (src_cnt > 5) {
 146			ext = ioat2_get_ring_ent(ioat, idx + 1);
 147			xor_ex = ext->xor_ex;
 148		}
 149
 150		if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
 151			descs[0] = (struct ioat_raw_descriptor *) xor;
 152			descs[1] = (struct ioat_raw_descriptor *) xor_ex;
 153			for (i = 0; i < src_cnt; i++) {
 154				dma_addr_t src = xor_get_src(descs, i);
 155
 156				ioat_unmap(pdev, src - offset, len,
 157					   PCI_DMA_TODEVICE, flags, 0);
 158			}
 159
 160			/* dest is a source in xor validate operations */
 161			if (xor->ctl_f.op == IOAT_OP_XOR_VAL) {
 162				ioat_unmap(pdev, xor->dst_addr - offset, len,
 163					   PCI_DMA_TODEVICE, flags, 1);
 164				break;
 165			}
 166		}
 167
 168		if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP))
 169			ioat_unmap(pdev, xor->dst_addr - offset, len,
 170				   PCI_DMA_FROMDEVICE, flags, 1);
 171		break;
 172	}
 173	case IOAT_OP_PQ_VAL:
 174	case IOAT_OP_PQ: {
 175		struct ioat_pq_descriptor *pq = desc->pq;
 176		struct ioat_ring_ent *ext;
 177		struct ioat_pq_ext_descriptor *pq_ex = NULL;
 178		int src_cnt = src_cnt_to_sw(pq->ctl_f.src_cnt);
 179		struct ioat_raw_descriptor *descs[2];
 180		int i;
 181
 182		if (src_cnt > 3) {
 183			ext = ioat2_get_ring_ent(ioat, idx + 1);
 184			pq_ex = ext->pq_ex;
 185		}
 186
 187		/* in the 'continue' case don't unmap the dests as sources */
 188		if (dmaf_p_disabled_continue(flags))
 189			src_cnt--;
 190		else if (dmaf_continue(flags))
 191			src_cnt -= 3;
 192
 193		if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
 194			descs[0] = (struct ioat_raw_descriptor *) pq;
 195			descs[1] = (struct ioat_raw_descriptor *) pq_ex;
 196			for (i = 0; i < src_cnt; i++) {
 197				dma_addr_t src = pq_get_src(descs, i);
 198
 199				ioat_unmap(pdev, src - offset, len,
 200					   PCI_DMA_TODEVICE, flags, 0);
 201			}
 202
 203			/* the dests are sources in pq validate operations */
 204			if (pq->ctl_f.op == IOAT_OP_XOR_VAL) {
 205				if (!(flags & DMA_PREP_PQ_DISABLE_P))
 206					ioat_unmap(pdev, pq->p_addr - offset,
 207						   len, PCI_DMA_TODEVICE, flags, 0);
 208				if (!(flags & DMA_PREP_PQ_DISABLE_Q))
 209					ioat_unmap(pdev, pq->q_addr - offset,
 210						   len, PCI_DMA_TODEVICE, flags, 0);
 211				break;
 212			}
 213		}
 214
 215		if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
 216			if (!(flags & DMA_PREP_PQ_DISABLE_P))
 217				ioat_unmap(pdev, pq->p_addr - offset, len,
 218					   PCI_DMA_BIDIRECTIONAL, flags, 1);
 219			if (!(flags & DMA_PREP_PQ_DISABLE_Q))
 220				ioat_unmap(pdev, pq->q_addr - offset, len,
 221					   PCI_DMA_BIDIRECTIONAL, flags, 1);
 222		}
 223		break;
 224	}
 225	default:
 226		dev_err(&pdev->dev, "%s: unknown op type: %#x\n",
 227			__func__, desc->hw->ctl_f.op);
 228	}
 229}
 230
 231static bool desc_has_ext(struct ioat_ring_ent *desc)
 232{
 233	struct ioat_dma_descriptor *hw = desc->hw;
 234
 235	if (hw->ctl_f.op == IOAT_OP_XOR ||
 236	    hw->ctl_f.op == IOAT_OP_XOR_VAL) {
 237		struct ioat_xor_descriptor *xor = desc->xor;
 238
 239		if (src_cnt_to_sw(xor->ctl_f.src_cnt) > 5)
 240			return true;
 241	} else if (hw->ctl_f.op == IOAT_OP_PQ ||
 242		   hw->ctl_f.op == IOAT_OP_PQ_VAL) {
 243		struct ioat_pq_descriptor *pq = desc->pq;
 244
 245		if (src_cnt_to_sw(pq->ctl_f.src_cnt) > 3)
 246			return true;
 247	}
 248
 249	return false;
 250}
 251
 252/**
 253 * __cleanup - reclaim used descriptors
 254 * @ioat: channel (ring) to clean
 255 *
 256 * The difference from the dma_v2.c __cleanup() is that this routine
 257 * handles extended descriptors and dma-unmapping raid operations.
 258 */
 259static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete)
 260{
 261	struct ioat_chan_common *chan = &ioat->base;
 262	struct ioat_ring_ent *desc;
 263	bool seen_current = false;
 264	int idx = ioat->tail, i;
 265	u16 active;
 266
 267	dev_dbg(to_dev(chan), "%s: head: %#x tail: %#x issued: %#x\n",
 268		__func__, ioat->head, ioat->tail, ioat->issued);
 269
 270	active = ioat2_ring_active(ioat);
 271	for (i = 0; i < active && !seen_current; i++) {
 272		struct dma_async_tx_descriptor *tx;
 273
 274		smp_read_barrier_depends();
 275		prefetch(ioat2_get_ring_ent(ioat, idx + i + 1));
 276		desc = ioat2_get_ring_ent(ioat, idx + i);
 277		dump_desc_dbg(ioat, desc);
 278		tx = &desc->txd;
 279		if (tx->cookie) {
 280			chan->completed_cookie = tx->cookie;
 281			ioat3_dma_unmap(ioat, desc, idx + i);
 282			tx->cookie = 0;
 283			if (tx->callback) {
 284				tx->callback(tx->callback_param);
 285				tx->callback = NULL;
 286			}
 287		}
 288
 289		if (tx->phys == phys_complete)
 290			seen_current = true;
 291
 292		/* skip extended descriptors */
 293		if (desc_has_ext(desc)) {
 294			BUG_ON(i + 1 >= active);
 295			i++;
 296		}
 297	}
 298	smp_mb(); /* finish all descriptor reads before incrementing tail */
 299	ioat->tail = idx + i;
 300	BUG_ON(active && !seen_current); /* no active descs have written a completion? */
 301	chan->last_completion = phys_complete;
 302
 303	if (active - i == 0) {
 304		dev_dbg(to_dev(chan), "%s: cancel completion timeout\n",
 305			__func__);
 306		clear_bit(IOAT_COMPLETION_PENDING, &chan->state);
 307		mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
 308	}
 309	/* 5 microsecond delay per pending descriptor */
 310	writew(min((5 * (active - i)), IOAT_INTRDELAY_MASK),
 311	       chan->device->reg_base + IOAT_INTRDELAY_OFFSET);
 312}
 313
 314static void ioat3_cleanup(struct ioat2_dma_chan *ioat)
 315{
 316	struct ioat_chan_common *chan = &ioat->base;
 317	unsigned long phys_complete;
 318
 319	spin_lock_bh(&chan->cleanup_lock);
 320	if (ioat_cleanup_preamble(chan, &phys_complete))
 321		__cleanup(ioat, phys_complete);
 322	spin_unlock_bh(&chan->cleanup_lock);
 323}
 324
 325static void ioat3_cleanup_event(unsigned long data)
 326{
 327	struct ioat2_dma_chan *ioat = to_ioat2_chan((void *) data);
 328
 329	ioat3_cleanup(ioat);
 330	writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET);
 331}
 332
 333static void ioat3_restart_channel(struct ioat2_dma_chan *ioat)
 334{
 335	struct ioat_chan_common *chan = &ioat->base;
 336	unsigned long phys_complete;
 337
 338	ioat2_quiesce(chan, 0);
 339	if (ioat_cleanup_preamble(chan, &phys_complete))
 340		__cleanup(ioat, phys_complete);
 341
 342	__ioat2_restart_chan(ioat);
 343}
 344
 345static void ioat3_timer_event(unsigned long data)
 346{
 347	struct ioat2_dma_chan *ioat = to_ioat2_chan((void *) data);
 348	struct ioat_chan_common *chan = &ioat->base;
 349
 350	if (test_bit(IOAT_COMPLETION_PENDING, &chan->state)) {
 351		unsigned long phys_complete;
 352		u64 status;
 353
 354		status = ioat_chansts(chan);
 355
 356		/* when halted due to errors check for channel
 357		 * programming errors before advancing the completion state
 358		 */
 359		if (is_ioat_halted(status)) {
 360			u32 chanerr;
 361
 362			chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
 363			dev_err(to_dev(chan), "%s: Channel halted (%x)\n",
 364				__func__, chanerr);
 365			if (test_bit(IOAT_RUN, &chan->state))
 366				BUG_ON(is_ioat_bug(chanerr));
 367			else /* we never got off the ground */
 368				return;
 369		}
 370
 371		/* if we haven't made progress and we have already
 372		 * acknowledged a pending completion once, then be more
 373		 * forceful with a restart
 374		 */
 375		spin_lock_bh(&chan->cleanup_lock);
 376		if (ioat_cleanup_preamble(chan, &phys_complete))
 377			__cleanup(ioat, phys_complete);
 378		else if (test_bit(IOAT_COMPLETION_ACK, &chan->state)) {
 379			spin_lock_bh(&ioat->prep_lock);
 380			ioat3_restart_channel(ioat);
 381			spin_unlock_bh(&ioat->prep_lock);
 382		} else {
 383			set_bit(IOAT_COMPLETION_ACK, &chan->state);
 384			mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
 385		}
 386		spin_unlock_bh(&chan->cleanup_lock);
 387	} else {
 388		u16 active;
 389
 390		/* if the ring is idle, empty, and oversized try to step
 391		 * down the size
 392		 */
 393		spin_lock_bh(&chan->cleanup_lock);
 394		spin_lock_bh(&ioat->prep_lock);
 395		active = ioat2_ring_active(ioat);
 396		if (active == 0 && ioat->alloc_order > ioat_get_alloc_order())
 397			reshape_ring(ioat, ioat->alloc_order-1);
 398		spin_unlock_bh(&ioat->prep_lock);
 399		spin_unlock_bh(&chan->cleanup_lock);
 400
 401		/* keep shrinking until we get back to our minimum
 402		 * default size
 403		 */
 404		if (ioat->alloc_order > ioat_get_alloc_order())
 405			mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
 406	}
 407}
 408
 409static enum dma_status
 410ioat3_tx_status(struct dma_chan *c, dma_cookie_t cookie,
 411		struct dma_tx_state *txstate)
 412{
 413	struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
 414
 415	if (ioat_tx_status(c, cookie, txstate) == DMA_SUCCESS)
 416		return DMA_SUCCESS;
 417
 418	ioat3_cleanup(ioat);
 419
 420	return ioat_tx_status(c, cookie, txstate);
 421}
 422
 423static struct dma_async_tx_descriptor *
 424ioat3_prep_memset_lock(struct dma_chan *c, dma_addr_t dest, int value,
 425		       size_t len, unsigned long flags)
 426{
 427	struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
 428	struct ioat_ring_ent *desc;
 429	size_t total_len = len;
 430	struct ioat_fill_descriptor *fill;
 431	u64 src_data = (0x0101010101010101ULL) * (value & 0xff);
 432	int num_descs, idx, i;
 433
 434	num_descs = ioat2_xferlen_to_descs(ioat, len);
 435	if (likely(num_descs) && ioat2_check_space_lock(ioat, num_descs) == 0)
 436		idx = ioat->head;
 437	else
 438		return NULL;
 439	i = 0;
 440	do {
 441		size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log);
 442
 443		desc = ioat2_get_ring_ent(ioat, idx + i);
 444		fill = desc->fill;
 445
 446		fill->size = xfer_size;
 447		fill->src_data = src_data;
 448		fill->dst_addr = dest;
 449		fill->ctl = 0;
 450		fill->ctl_f.op = IOAT_OP_FILL;
 451
 452		len -= xfer_size;
 453		dest += xfer_size;
 454		dump_desc_dbg(ioat, desc);
 455	} while (++i < num_descs);
 456
 457	desc->txd.flags = flags;
 458	desc->len = total_len;
 459	fill->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
 460	fill->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
 461	fill->ctl_f.compl_write = 1;
 462	dump_desc_dbg(ioat, desc);
 463
 464	/* we leave the channel locked to ensure in order submission */
 465	return &desc->txd;
 466}
 467
 468static struct dma_async_tx_descriptor *
 469__ioat3_prep_xor_lock(struct dma_chan *c, enum sum_check_flags *result,
 470		      dma_addr_t dest, dma_addr_t *src, unsigned int src_cnt,
 471		      size_t len, unsigned long flags)
 472{
 473	struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
 474	struct ioat_ring_ent *compl_desc;
 475	struct ioat_ring_ent *desc;
 476	struct ioat_ring_ent *ext;
 477	size_t total_len = len;
 478	struct ioat_xor_descriptor *xor;
 479	struct ioat_xor_ext_descriptor *xor_ex = NULL;
 480	struct ioat_dma_descriptor *hw;
 481	int num_descs, with_ext, idx, i;
 482	u32 offset = 0;
 483	u8 op = result ? IOAT_OP_XOR_VAL : IOAT_OP_XOR;
 484
 485	BUG_ON(src_cnt < 2);
 486
 487	num_descs = ioat2_xferlen_to_descs(ioat, len);
 488	/* we need 2x the number of descriptors to cover greater than 5
 489	 * sources
 490	 */
 491	if (src_cnt > 5) {
 492		with_ext = 1;
 493		num_descs *= 2;
 494	} else
 495		with_ext = 0;
 496
 497	/* completion writes from the raid engine may pass completion
 498	 * writes from the legacy engine, so we need one extra null
 499	 * (legacy) descriptor to ensure all completion writes arrive in
 500	 * order.
 501	 */
 502	if (likely(num_descs) && ioat2_check_space_lock(ioat, num_descs+1) == 0)
 503		idx = ioat->head;
 504	else
 505		return NULL;
 506	i = 0;
 507	do {
 508		struct ioat_raw_descriptor *descs[2];
 509		size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log);
 510		int s;
 511
 512		desc = ioat2_get_ring_ent(ioat, idx + i);
 513		xor = desc->xor;
 514
 515		/* save a branch by unconditionally retrieving the
 516		 * extended descriptor xor_set_src() knows to not write
 517		 * to it in the single descriptor case
 518		 */
 519		ext = ioat2_get_ring_ent(ioat, idx + i + 1);
 520		xor_ex = ext->xor_ex;
 521
 522		descs[0] = (struct ioat_raw_descriptor *) xor;
 523		descs[1] = (struct ioat_raw_descriptor *) xor_ex;
 524		for (s = 0; s < src_cnt; s++)
 525			xor_set_src(descs, src[s], offset, s);
 526		xor->size = xfer_size;
 527		xor->dst_addr = dest + offset;
 528		xor->ctl = 0;
 529		xor->ctl_f.op = op;
 530		xor->ctl_f.src_cnt = src_cnt_to_hw(src_cnt);
 531
 532		len -= xfer_size;
 533		offset += xfer_size;
 534		dump_desc_dbg(ioat, desc);
 535	} while ((i += 1 + with_ext) < num_descs);
 536
 537	/* last xor descriptor carries the unmap parameters and fence bit */
 538	desc->txd.flags = flags;
 539	desc->len = total_len;
 540	if (result)
 541		desc->result = result;
 542	xor->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
 543
 544	/* completion descriptor carries interrupt bit */
 545	compl_desc = ioat2_get_ring_ent(ioat, idx + i);
 546	compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT;
 547	hw = compl_desc->hw;
 548	hw->ctl = 0;
 549	hw->ctl_f.null = 1;
 550	hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
 551	hw->ctl_f.compl_write = 1;
 552	hw->size = NULL_DESC_BUFFER_SIZE;
 553	dump_desc_dbg(ioat, compl_desc);
 554
 555	/* we leave the channel locked to ensure in order submission */
 556	return &compl_desc->txd;
 557}
 558
 559static struct dma_async_tx_descriptor *
 560ioat3_prep_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
 561	       unsigned int src_cnt, size_t len, unsigned long flags)
 562{
 563	return __ioat3_prep_xor_lock(chan, NULL, dest, src, src_cnt, len, flags);
 564}
 565
 566struct dma_async_tx_descriptor *
 567ioat3_prep_xor_val(struct dma_chan *chan, dma_addr_t *src,
 568		    unsigned int src_cnt, size_t len,
 569		    enum sum_check_flags *result, unsigned long flags)
 570{
 571	/* the cleanup routine only sets bits on validate failure, it
 572	 * does not clear bits on validate success... so clear it here
 573	 */
 574	*result = 0;
 575
 576	return __ioat3_prep_xor_lock(chan, result, src[0], &src[1],
 577				     src_cnt - 1, len, flags);
 578}
 579
 580static void
 581dump_pq_desc_dbg(struct ioat2_dma_chan *ioat, struct ioat_ring_ent *desc, struct ioat_ring_ent *ext)
 582{
 583	struct device *dev = to_dev(&ioat->base);
 584	struct ioat_pq_descriptor *pq = desc->pq;
 585	struct ioat_pq_ext_descriptor *pq_ex = ext ? ext->pq_ex : NULL;
 586	struct ioat_raw_descriptor *descs[] = { (void *) pq, (void *) pq_ex };
 587	int src_cnt = src_cnt_to_sw(pq->ctl_f.src_cnt);
 588	int i;
 589
 590	dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x"
 591		" sz: %#x ctl: %#x (op: %d int: %d compl: %d pq: '%s%s' src_cnt: %d)\n",
 592		desc_id(desc), (unsigned long long) desc->txd.phys,
 593		(unsigned long long) (pq_ex ? pq_ex->next : pq->next),
 594		desc->txd.flags, pq->size, pq->ctl, pq->ctl_f.op, pq->ctl_f.int_en,
 595		pq->ctl_f.compl_write,
 596		pq->ctl_f.p_disable ? "" : "p", pq->ctl_f.q_disable ? "" : "q",
 597		pq->ctl_f.src_cnt);
 598	for (i = 0; i < src_cnt; i++)
 599		dev_dbg(dev, "\tsrc[%d]: %#llx coef: %#x\n", i,
 600			(unsigned long long) pq_get_src(descs, i), pq->coef[i]);
 601	dev_dbg(dev, "\tP: %#llx\n", pq->p_addr);
 602	dev_dbg(dev, "\tQ: %#llx\n", pq->q_addr);
 603}
 604
 605static struct dma_async_tx_descriptor *
 606__ioat3_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result,
 607		     const dma_addr_t *dst, const dma_addr_t *src,
 608		     unsigned int src_cnt, const unsigned char *scf,
 609		     size_t len, unsigned long flags)
 610{
 611	struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
 612	struct ioat_chan_common *chan = &ioat->base;
 613	struct ioat_ring_ent *compl_desc;
 614	struct ioat_ring_ent *desc;
 615	struct ioat_ring_ent *ext;
 616	size_t total_len = len;
 617	struct ioat_pq_descriptor *pq;
 618	struct ioat_pq_ext_descriptor *pq_ex = NULL;
 619	struct ioat_dma_descriptor *hw;
 620	u32 offset = 0;
 621	u8 op = result ? IOAT_OP_PQ_VAL : IOAT_OP_PQ;
 622	int i, s, idx, with_ext, num_descs;
 623
 624	dev_dbg(to_dev(chan), "%s\n", __func__);
 625	/* the engine requires at least two sources (we provide
 626	 * at least 1 implied source in the DMA_PREP_CONTINUE case)
 627	 */
 628	BUG_ON(src_cnt + dmaf_continue(flags) < 2);
 629
 630	num_descs = ioat2_xferlen_to_descs(ioat, len);
 631	/* we need 2x the number of descriptors to cover greater than 3
 632	 * sources (we need 1 extra source in the q-only continuation
 633	 * case and 3 extra sources in the p+q continuation case.
 634	 */
 635	if (src_cnt + dmaf_p_disabled_continue(flags) > 3 ||
 636	    (dmaf_continue(flags) && !dmaf_p_disabled_continue(flags))) {
 637		with_ext = 1;
 638		num_descs *= 2;
 639	} else
 640		with_ext = 0;
 641
 642	/* completion writes from the raid engine may pass completion
 643	 * writes from the legacy engine, so we need one extra null
 644	 * (legacy) descriptor to ensure all completion writes arrive in
 645	 * order.
 646	 */
 647	if (likely(num_descs) &&
 648	    ioat2_check_space_lock(ioat, num_descs+1) == 0)
 649		idx = ioat->head;
 650	else
 651		return NULL;
 652	i = 0;
 653	do {
 654		struct ioat_raw_descriptor *descs[2];
 655		size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log);
 656
 657		desc = ioat2_get_ring_ent(ioat, idx + i);
 658		pq = desc->pq;
 659
 660		/* save a branch by unconditionally retrieving the
 661		 * extended descriptor pq_set_src() knows to not write
 662		 * to it in the single descriptor case
 663		 */
 664		ext = ioat2_get_ring_ent(ioat, idx + i + with_ext);
 665		pq_ex = ext->pq_ex;
 666
 667		descs[0] = (struct ioat_raw_descriptor *) pq;
 668		descs[1] = (struct ioat_raw_descriptor *) pq_ex;
 669
 670		for (s = 0; s < src_cnt; s++)
 671			pq_set_src(descs, src[s], offset, scf[s], s);
 672
 673		/* see the comment for dma_maxpq in include/linux/dmaengine.h */
 674		if (dmaf_p_disabled_continue(flags))
 675			pq_set_src(descs, dst[1], offset, 1, s++);
 676		else if (dmaf_continue(flags)) {
 677			pq_set_src(descs, dst[0], offset, 0, s++);
 678			pq_set_src(descs, dst[1], offset, 1, s++);
 679			pq_set_src(descs, dst[1], offset, 0, s++);
 680		}
 681		pq->size = xfer_size;
 682		pq->p_addr = dst[0] + offset;
 683		pq->q_addr = dst[1] + offset;
 684		pq->ctl = 0;
 685		pq->ctl_f.op = op;
 686		pq->ctl_f.src_cnt = src_cnt_to_hw(s);
 687		pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P);
 688		pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q);
 689
 690		len -= xfer_size;
 691		offset += xfer_size;
 692	} while ((i += 1 + with_ext) < num_descs);
 693
 694	/* last pq descriptor carries the unmap parameters and fence bit */
 695	desc->txd.flags = flags;
 696	desc->len = total_len;
 697	if (result)
 698		desc->result = result;
 699	pq->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
 700	dump_pq_desc_dbg(ioat, desc, ext);
 701
 702	/* completion descriptor carries interrupt bit */
 703	compl_desc = ioat2_get_ring_ent(ioat, idx + i);
 704	compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT;
 705	hw = compl_desc->hw;
 706	hw->ctl = 0;
 707	hw->ctl_f.null = 1;
 708	hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
 709	hw->ctl_f.compl_write = 1;
 710	hw->size = NULL_DESC_BUFFER_SIZE;
 711	dump_desc_dbg(ioat, compl_desc);
 712
 713	/* we leave the channel locked to ensure in order submission */
 714	return &compl_desc->txd;
 715}
 716
 717static struct dma_async_tx_descriptor *
 718ioat3_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
 719	      unsigned int src_cnt, const unsigned char *scf, size_t len,
 720	      unsigned long flags)
 721{
 722	/* specify valid address for disabled result */
 723	if (flags & DMA_PREP_PQ_DISABLE_P)
 724		dst[0] = dst[1];
 725	if (flags & DMA_PREP_PQ_DISABLE_Q)
 726		dst[1] = dst[0];
 727
 728	/* handle the single source multiply case from the raid6
 729	 * recovery path
 730	 */
 731	if ((flags & DMA_PREP_PQ_DISABLE_P) && src_cnt == 1) {
 732		dma_addr_t single_source[2];
 733		unsigned char single_source_coef[2];
 734
 735		BUG_ON(flags & DMA_PREP_PQ_DISABLE_Q);
 736		single_source[0] = src[0];
 737		single_source[1] = src[0];
 738		single_source_coef[0] = scf[0];
 739		single_source_coef[1] = 0;
 740
 741		return __ioat3_prep_pq_lock(chan, NULL, dst, single_source, 2,
 742					    single_source_coef, len, flags);
 743	} else
 744		return __ioat3_prep_pq_lock(chan, NULL, dst, src, src_cnt, scf,
 745					    len, flags);
 746}
 747
 748struct dma_async_tx_descriptor *
 749ioat3_prep_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
 750		  unsigned int src_cnt, const unsigned char *scf, size_t len,
 751		  enum sum_check_flags *pqres, unsigned long flags)
 752{
 753	/* specify valid address for disabled result */
 754	if (flags & DMA_PREP_PQ_DISABLE_P)
 755		pq[0] = pq[1];
 756	if (flags & DMA_PREP_PQ_DISABLE_Q)
 757		pq[1] = pq[0];
 758
 759	/* the cleanup routine only sets bits on validate failure, it
 760	 * does not clear bits on validate success... so clear it here
 761	 */
 762	*pqres = 0;
 763
 764	return __ioat3_prep_pq_lock(chan, pqres, pq, src, src_cnt, scf, len,
 765				    flags);
 766}
 767
 768static struct dma_async_tx_descriptor *
 769ioat3_prep_pqxor(struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src,
 770		 unsigned int src_cnt, size_t len, unsigned long flags)
 771{
 772	unsigned char scf[src_cnt];
 773	dma_addr_t pq[2];
 774
 775	memset(scf, 0, src_cnt);
 776	pq[0] = dst;
 777	flags |= DMA_PREP_PQ_DISABLE_Q;
 778	pq[1] = dst; /* specify valid address for disabled result */
 779
 780	return __ioat3_prep_pq_lock(chan, NULL, pq, src, src_cnt, scf, len,
 781				    flags);
 782}
 783
 784struct dma_async_tx_descriptor *
 785ioat3_prep_pqxor_val(struct dma_chan *chan, dma_addr_t *src,
 786		     unsigned int src_cnt, size_t len,
 787		     enum sum_check_flags *result, unsigned long flags)
 788{
 789	unsigned char scf[src_cnt];
 790	dma_addr_t pq[2];
 791
 792	/* the cleanup routine only sets bits on validate failure, it
 793	 * does not clear bits on validate success... so clear it here
 794	 */
 795	*result = 0;
 796
 797	memset(scf, 0, src_cnt);
 798	pq[0] = src[0];
 799	flags |= DMA_PREP_PQ_DISABLE_Q;
 800	pq[1] = pq[0]; /* specify valid address for disabled result */
 801
 802	return __ioat3_prep_pq_lock(chan, result, pq, &src[1], src_cnt - 1, scf,
 803				    len, flags);
 804}
 805
 806static struct dma_async_tx_descriptor *
 807ioat3_prep_interrupt_lock(struct dma_chan *c, unsigned long flags)
 808{
 809	struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
 810	struct ioat_ring_ent *desc;
 811	struct ioat_dma_descriptor *hw;
 812
 813	if (ioat2_check_space_lock(ioat, 1) == 0)
 814		desc = ioat2_get_ring_ent(ioat, ioat->head);
 815	else
 816		return NULL;
 817
 818	hw = desc->hw;
 819	hw->ctl = 0;
 820	hw->ctl_f.null = 1;
 821	hw->ctl_f.int_en = 1;
 822	hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
 823	hw->ctl_f.compl_write = 1;
 824	hw->size = NULL_DESC_BUFFER_SIZE;
 825	hw->src_addr = 0;
 826	hw->dst_addr = 0;
 827
 828	desc->txd.flags = flags;
 829	desc->len = 1;
 830
 831	dump_desc_dbg(ioat, desc);
 832
 833	/* we leave the channel locked to ensure in order submission */
 834	return &desc->txd;
 835}
 836
 837static void __devinit ioat3_dma_test_callback(void *dma_async_param)
 838{
 839	struct completion *cmp = dma_async_param;
 840
 841	complete(cmp);
 842}
 843
 844#define IOAT_NUM_SRC_TEST 6 /* must be <= 8 */
 845static int __devinit ioat_xor_val_self_test(struct ioatdma_device *device)
 846{
 847	int i, src_idx;
 848	struct page *dest;
 849	struct page *xor_srcs[IOAT_NUM_SRC_TEST];
 850	struct page *xor_val_srcs[IOAT_NUM_SRC_TEST + 1];
 851	dma_addr_t dma_srcs[IOAT_NUM_SRC_TEST + 1];
 852	dma_addr_t dma_addr, dest_dma;
 853	struct dma_async_tx_descriptor *tx;
 854	struct dma_chan *dma_chan;
 855	dma_cookie_t cookie;
 856	u8 cmp_byte = 0;
 857	u32 cmp_word;
 858	u32 xor_val_result;
 859	int err = 0;
 860	struct completion cmp;
 861	unsigned long tmo;
 862	struct device *dev = &device->pdev->dev;
 863	struct dma_device *dma = &device->common;
 864
 865	dev_dbg(dev, "%s\n", __func__);
 866
 867	if (!dma_has_cap(DMA_XOR, dma->cap_mask))
 868		return 0;
 869
 870	for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) {
 871		xor_srcs[src_idx] = alloc_page(GFP_KERNEL);
 872		if (!xor_srcs[src_idx]) {
 873			while (src_idx--)
 874				__free_page(xor_srcs[src_idx]);
 875			return -ENOMEM;
 876		}
 877	}
 878
 879	dest = alloc_page(GFP_KERNEL);
 880	if (!dest) {
 881		while (src_idx--)
 882			__free_page(xor_srcs[src_idx]);
 883		return -ENOMEM;
 884	}
 885
 886	/* Fill in src buffers */
 887	for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) {
 888		u8 *ptr = page_address(xor_srcs[src_idx]);
 889		for (i = 0; i < PAGE_SIZE; i++)
 890			ptr[i] = (1 << src_idx);
 891	}
 892
 893	for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++)
 894		cmp_byte ^= (u8) (1 << src_idx);
 895
 896	cmp_word = (cmp_byte << 24) | (cmp_byte << 16) |
 897			(cmp_byte << 8) | cmp_byte;
 898
 899	memset(page_address(dest), 0, PAGE_SIZE);
 900
 901	dma_chan = container_of(dma->channels.next, struct dma_chan,
 902				device_node);
 903	if (dma->device_alloc_chan_resources(dma_chan) < 1) {
 904		err = -ENODEV;
 905		goto out;
 906	}
 907
 908	/* test xor */
 909	dest_dma = dma_map_page(dev, dest, 0, PAGE_SIZE, DMA_FROM_DEVICE);
 910	for (i = 0; i < IOAT_NUM_SRC_TEST; i++)
 911		dma_srcs[i] = dma_map_page(dev, xor_srcs[i], 0, PAGE_SIZE,
 912					   DMA_TO_DEVICE);
 913	tx = dma->device_prep_dma_xor(dma_chan, dest_dma, dma_srcs,
 914				      IOAT_NUM_SRC_TEST, PAGE_SIZE,
 915				      DMA_PREP_INTERRUPT);
 916
 917	if (!tx) {
 918		dev_err(dev, "Self-test xor prep failed\n");
 919		err = -ENODEV;
 920		goto free_resources;
 921	}
 922
 923	async_tx_ack(tx);
 924	init_completion(&cmp);
 925	tx->callback = ioat3_dma_test_callback;
 926	tx->callback_param = &cmp;
 927	cookie = tx->tx_submit(tx);
 928	if (cookie < 0) {
 929		dev_err(dev, "Self-test xor setup failed\n");
 930		err = -ENODEV;
 931		goto free_resources;
 932	}
 933	dma->device_issue_pending(dma_chan);
 934
 935	tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
 936
 937	if (dma->device_tx_status(dma_chan, cookie, NULL) != DMA_SUCCESS) {
 938		dev_err(dev, "Self-test xor timed out\n");
 939		err = -ENODEV;
 940		goto free_resources;
 941	}
 942
 943	dma_sync_single_for_cpu(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE);
 944	for (i = 0; i < (PAGE_SIZE / sizeof(u32)); i++) {
 945		u32 *ptr = page_address(dest);
 946		if (ptr[i] != cmp_word) {
 947			dev_err(dev, "Self-test xor failed compare\n");
 948			err = -ENODEV;
 949			goto free_resources;
 950		}
 951	}
 952	dma_sync_single_for_device(dev, dest_dma, PAGE_SIZE, DMA_TO_DEVICE);
 953
 954	/* skip validate if the capability is not present */
 955	if (!dma_has_cap(DMA_XOR_VAL, dma_chan->device->cap_mask))
 956		goto free_resources;
 957
 958	/* validate the sources with the destintation page */
 959	for (i = 0; i < IOAT_NUM_SRC_TEST; i++)
 960		xor_val_srcs[i] = xor_srcs[i];
 961	xor_val_srcs[i] = dest;
 962
 963	xor_val_result = 1;
 964
 965	for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++)
 966		dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE,
 967					   DMA_TO_DEVICE);
 968	tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs,
 969					  IOAT_NUM_SRC_TEST + 1, PAGE_SIZE,
 970					  &xor_val_result, DMA_PREP_INTERRUPT);
 971	if (!tx) {
 972		dev_err(dev, "Self-test zero prep failed\n");
 973		err = -ENODEV;
 974		goto free_resources;
 975	}
 976
 977	async_tx_ack(tx);
 978	init_completion(&cmp);
 979	tx->callback = ioat3_dma_test_callback;
 980	tx->callback_param = &cmp;
 981	cookie = tx->tx_submit(tx);
 982	if (cookie < 0) {
 983		dev_err(dev, "Self-test zero setup failed\n");
 984		err = -ENODEV;
 985		goto free_resources;
 986	}
 987	dma->device_issue_pending(dma_chan);
 988
 989	tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
 990
 991	if (dma->device_tx_status(dma_chan, cookie, NULL) != DMA_SUCCESS) {
 992		dev_err(dev, "Self-test validate timed out\n");
 993		err = -ENODEV;
 994		goto free_resources;
 995	}
 996
 997	if (xor_val_result != 0) {
 998		dev_err(dev, "Self-test validate failed compare\n");
 999		err = -ENODEV;
1000		goto free_resources;
1001	}
1002
1003	/* skip memset if the capability is not present */
1004	if (!dma_has_cap(DMA_MEMSET, dma_chan->device->cap_mask))
1005		goto free_resources;
1006
1007	/* test memset */
1008	dma_addr = dma_map_page(dev, dest, 0,
1009			PAGE_SIZE, DMA_FROM_DEVICE);
1010	tx = dma->device_prep_dma_memset(dma_chan, dma_addr, 0, PAGE_SIZE,
1011					 DMA_PREP_INTERRUPT);
1012	if (!tx) {
1013		dev_err(dev, "Self-test memset prep failed\n");
1014		err = -ENODEV;
1015		goto free_resources;
1016	}
1017
1018	async_tx_ack(tx);
1019	init_completion(&cmp);
1020	tx->callback = ioat3_dma_test_callback;
1021	tx->callback_param = &cmp;
1022	cookie = tx->tx_submit(tx);
1023	if (cookie < 0) {
1024		dev_err(dev, "Self-test memset setup failed\n");
1025		err = -ENODEV;
1026		goto free_resources;
1027	}
1028	dma->device_issue_pending(dma_chan);
1029
1030	tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
1031
1032	if (dma->device_tx_status(dma_chan, cookie, NULL) != DMA_SUCCESS) {
1033		dev_err(dev, "Self-test memset timed out\n");
1034		err = -ENODEV;
1035		goto free_resources;
1036	}
1037
1038	for (i = 0; i < PAGE_SIZE/sizeof(u32); i++) {
1039		u32 *ptr = page_address(dest);
1040		if (ptr[i]) {
1041			dev_err(dev, "Self-test memset failed compare\n");
1042			err = -ENODEV;
1043			goto free_resources;
1044		}
1045	}
1046
1047	/* test for non-zero parity sum */
1048	xor_val_result = 0;
1049	for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++)
1050		dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE,
1051					   DMA_TO_DEVICE);
1052	tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs,
1053					  IOAT_NUM_SRC_TEST + 1, PAGE_SIZE,
1054					  &xor_val_result, DMA_PREP_INTERRUPT);
1055	if (!tx) {
1056		dev_err(dev, "Self-test 2nd zero prep failed\n");
1057		err = -ENODEV;
1058		goto free_resources;
1059	}
1060
1061	async_tx_ack(tx);
1062	init_completion(&cmp);
1063	tx->callback = ioat3_dma_test_callback;
1064	tx->callback_param = &cmp;
1065	cookie = tx->tx_submit(tx);
1066	if (cookie < 0) {
1067		dev_err(dev, "Self-test  2nd zero setup failed\n");
1068		err = -ENODEV;
1069		goto free_resources;
1070	}
1071	dma->device_issue_pending(dma_chan);
1072
1073	tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
1074
1075	if (dma->device_tx_status(dma_chan, cookie, NULL) != DMA_SUCCESS) {
1076		dev_err(dev, "Self-test 2nd validate timed out\n");
1077		err = -ENODEV;
1078		goto free_resources;
1079	}
1080
1081	if (xor_val_result != SUM_CHECK_P_RESULT) {
1082		dev_err(dev, "Self-test validate failed compare\n");
1083		err = -ENODEV;
1084		goto free_resources;
1085	}
1086
1087free_resources:
1088	dma->device_free_chan_resources(dma_chan);
1089out:
1090	src_idx = IOAT_NUM_SRC_TEST;
1091	while (src_idx--)
1092		__free_page(xor_srcs[src_idx]);
1093	__free_page(dest);
1094	return err;
1095}
1096
1097static int __devinit ioat3_dma_self_test(struct ioatdma_device *device)
1098{
1099	int rc = ioat_dma_self_test(device);
1100
1101	if (rc)
1102		return rc;
1103
1104	rc = ioat_xor_val_self_test(device);
1105	if (rc)
1106		return rc;
1107
1108	return 0;
1109}
1110
1111static int ioat3_reset_hw(struct ioat_chan_common *chan)
1112{
1113	/* throw away whatever the channel was doing and get it
1114	 * initialized, with ioat3 specific workarounds
1115	 */
1116	struct ioatdma_device *device = chan->device;
1117	struct pci_dev *pdev = device->pdev;
1118	u32 chanerr;
1119	u16 dev_id;
1120	int err;
1121
1122	ioat2_quiesce(chan, msecs_to_jiffies(100));
1123
1124	chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
1125	writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET);
1126
1127	/* -= IOAT ver.3 workarounds =- */
1128	/* Write CHANERRMSK_INT with 3E07h to mask out the errors
1129	 * that can cause stability issues for IOAT ver.3, and clear any
1130	 * pending errors
1131	 */
1132	pci_write_config_dword(pdev, IOAT_PCI_CHANERRMASK_INT_OFFSET, 0x3e07);
1133	err = pci_read_config_dword(pdev, IOAT_PCI_CHANERR_INT_OFFSET, &chanerr);
1134	if (err) {
1135		dev_err(&pdev->dev, "channel error register unreachable\n");
1136		return err;
1137	}
1138	pci_write_config_dword(pdev, IOAT_PCI_CHANERR_INT_OFFSET, chanerr);
1139
1140	/* Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit
1141	 * (workaround for spurious config parity error after restart)
1142	 */
1143	pci_read_config_word(pdev, IOAT_PCI_DEVICE_ID_OFFSET, &dev_id);
1144	if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0)
1145		pci_write_config_dword(pdev, IOAT_PCI_DMAUNCERRSTS_OFFSET, 0x10);
1146
1147	return ioat2_reset_sync(chan, msecs_to_jiffies(200));
1148}
1149
1150int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca)
1151{
1152	struct pci_dev *pdev = device->pdev;
1153	int dca_en = system_has_dca_enabled(pdev);
1154	struct dma_device *dma;
1155	struct dma_chan *c;
1156	struct ioat_chan_common *chan;
1157	bool is_raid_device = false;
1158	int err;
1159	u32 cap;
1160
1161	device->enumerate_channels = ioat2_enumerate_channels;
1162	device->reset_hw = ioat3_reset_hw;
1163	device->self_test = ioat3_dma_self_test;
1164	dma = &device->common;
1165	dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy_lock;
1166	dma->device_issue_pending = ioat2_issue_pending;
1167	dma->device_alloc_chan_resources = ioat2_alloc_chan_resources;
1168	dma->device_free_chan_resources = ioat2_free_chan_resources;
1169
1170	dma_cap_set(DMA_INTERRUPT, dma->cap_mask);
1171	dma->device_prep_dma_interrupt = ioat3_prep_interrupt_lock;
1172
1173	cap = readl(device->reg_base + IOAT_DMA_CAP_OFFSET);
1174
1175	/* dca is incompatible with raid operations */
1176	if (dca_en && (cap & (IOAT_CAP_XOR|IOAT_CAP_PQ)))
1177		cap &= ~(IOAT_CAP_XOR|IOAT_CAP_PQ);
1178
1179	if (cap & IOAT_CAP_XOR) {
1180		is_raid_device = true;
1181		dma->max_xor = 8;
1182		dma->xor_align = 6;
1183
1184		dma_cap_set(DMA_XOR, dma->cap_mask);
1185		dma->device_prep_dma_xor = ioat3_prep_xor;
1186
1187		dma_cap_set(DMA_XOR_VAL, dma->cap_mask);
1188		dma->device_prep_dma_xor_val = ioat3_prep_xor_val;
1189	}
1190	if (cap & IOAT_CAP_PQ) {
1191		is_raid_device = true;
1192		dma_set_maxpq(dma, 8, 0);
1193		dma->pq_align = 6;
1194
1195		dma_cap_set(DMA_PQ, dma->cap_mask);
1196		dma->device_prep_dma_pq = ioat3_prep_pq;
1197
1198		dma_cap_set(DMA_PQ_VAL, dma->cap_mask);
1199		dma->device_prep_dma_pq_val = ioat3_prep_pq_val;
1200
1201		if (!(cap & IOAT_CAP_XOR)) {
1202			dma->max_xor = 8;
1203			dma->xor_align = 6;
1204
1205			dma_cap_set(DMA_XOR, dma->cap_mask);
1206			dma->device_prep_dma_xor = ioat3_prep_pqxor;
1207
1208			dma_cap_set(DMA_XOR_VAL, dma->cap_mask);
1209			dma->device_prep_dma_xor_val = ioat3_prep_pqxor_val;
1210		}
1211	}
1212	if (is_raid_device && (cap & IOAT_CAP_FILL_BLOCK)) {
1213		dma_cap_set(DMA_MEMSET, dma->cap_mask);
1214		dma->device_prep_dma_memset = ioat3_prep_memset_lock;
1215	}
1216
1217
1218	if (is_raid_device) {
1219		dma->device_tx_status = ioat3_tx_status;
1220		device->cleanup_fn = ioat3_cleanup_event;
1221		device->timer_fn = ioat3_timer_event;
1222	} else {
1223		dma->device_tx_status = ioat_dma_tx_status;
1224		device->cleanup_fn = ioat2_cleanup_event;
1225		device->timer_fn = ioat2_timer_event;
1226	}
1227
1228	#ifdef CONFIG_ASYNC_TX_DISABLE_PQ_VAL_DMA
1229	dma_cap_clear(DMA_PQ_VAL, dma->cap_mask);
1230	dma->device_prep_dma_pq_val = NULL;
1231	#endif
1232
1233	#ifdef CONFIG_ASYNC_TX_DISABLE_XOR_VAL_DMA
1234	dma_cap_clear(DMA_XOR_VAL, dma->cap_mask);
1235	dma->device_prep_dma_xor_val = NULL;
1236	#endif
1237
1238	err = ioat_probe(device);
1239	if (err)
1240		return err;
1241	ioat_set_tcp_copy_break(262144);
1242
1243	list_for_each_entry(c, &dma->channels, device_node) {
1244		chan = to_chan_common(c);
1245		writel(IOAT_DMA_DCA_ANY_CPU,
1246		       chan->reg_base + IOAT_DCACTRL_OFFSET);
1247	}
1248
1249	err = ioat_register(device);
1250	if (err)
1251		return err;
1252
1253	ioat_kobject_add(device, &ioat2_ktype);
1254
1255	if (dca)
1256		device->dca = ioat3_dca_init(pdev, device->reg_base);
1257
1258	return 0;
1259}