Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.1.
   1/*
   2 * Intel MIC Platform Software Stack (MPSS)
   3 *
   4 * Copyright(c) 2016 Intel Corporation.
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License, version 2, as
   8 * published by the Free Software Foundation.
   9 *
  10 * This program is distributed in the hope that it will be useful, but
  11 * WITHOUT ANY WARRANTY; without even the implied warranty of
  12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13 * General Public License for more details.
  14 *
  15 * The full GNU General Public License is included in this distribution in
  16 * the file called "COPYING".
  17 *
  18 * Intel Virtio Over PCIe (VOP) driver.
  19 *
  20 */
  21#include <linux/sched.h>
  22#include <linux/poll.h>
  23#include <linux/dma-mapping.h>
  24
  25#include <linux/mic_common.h>
  26#include "../common/mic_dev.h"
  27
  28#include <linux/mic_ioctl.h>
  29#include "vop_main.h"
  30
  31/* Helper API to obtain the VOP PCIe device */
  32static inline struct device *vop_dev(struct vop_vdev *vdev)
  33{
  34	return vdev->vpdev->dev.parent;
  35}
  36
  37/* Helper API to check if a virtio device is initialized */
  38static inline int vop_vdev_inited(struct vop_vdev *vdev)
  39{
  40	if (!vdev)
  41		return -EINVAL;
  42	/* Device has not been created yet */
  43	if (!vdev->dd || !vdev->dd->type) {
  44		dev_err(vop_dev(vdev), "%s %d err %d\n",
  45			__func__, __LINE__, -EINVAL);
  46		return -EINVAL;
  47	}
  48	/* Device has been removed/deleted */
  49	if (vdev->dd->type == -1) {
  50		dev_dbg(vop_dev(vdev), "%s %d err %d\n",
  51			__func__, __LINE__, -ENODEV);
  52		return -ENODEV;
  53	}
  54	return 0;
  55}
  56
  57static void _vop_notify(struct vringh *vrh)
  58{
  59	struct vop_vringh *vvrh = container_of(vrh, struct vop_vringh, vrh);
  60	struct vop_vdev *vdev = vvrh->vdev;
  61	struct vop_device *vpdev = vdev->vpdev;
  62	s8 db = vdev->dc->h2c_vdev_db;
  63
  64	if (db != -1)
  65		vpdev->hw_ops->send_intr(vpdev, db);
  66}
  67
  68static void vop_virtio_init_post(struct vop_vdev *vdev)
  69{
  70	struct mic_vqconfig *vqconfig = mic_vq_config(vdev->dd);
  71	struct vop_device *vpdev = vdev->vpdev;
  72	int i, used_size;
  73
  74	for (i = 0; i < vdev->dd->num_vq; i++) {
  75		used_size = PAGE_ALIGN(sizeof(u16) * 3 +
  76				sizeof(struct vring_used_elem) *
  77				le16_to_cpu(vqconfig->num));
  78		if (!le64_to_cpu(vqconfig[i].used_address)) {
  79			dev_warn(vop_dev(vdev), "used_address zero??\n");
  80			continue;
  81		}
  82		vdev->vvr[i].vrh.vring.used =
  83			(void __force *)vpdev->hw_ops->ioremap(
  84			vpdev,
  85			le64_to_cpu(vqconfig[i].used_address),
  86			used_size);
  87	}
  88
  89	vdev->dc->used_address_updated = 0;
  90
  91	dev_info(vop_dev(vdev), "%s: device type %d LINKUP\n",
  92		 __func__, vdev->virtio_id);
  93}
  94
  95static inline void vop_virtio_device_reset(struct vop_vdev *vdev)
  96{
  97	int i;
  98
  99	dev_dbg(vop_dev(vdev), "%s: status %d device type %d RESET\n",
 100		__func__, vdev->dd->status, vdev->virtio_id);
 101
 102	for (i = 0; i < vdev->dd->num_vq; i++)
 103		/*
 104		 * Avoid lockdep false positive. The + 1 is for the vop
 105		 * mutex which is held in the reset devices code path.
 106		 */
 107		mutex_lock_nested(&vdev->vvr[i].vr_mutex, i + 1);
 108
 109	/* 0 status means "reset" */
 110	vdev->dd->status = 0;
 111	vdev->dc->vdev_reset = 0;
 112	vdev->dc->host_ack = 1;
 113
 114	for (i = 0; i < vdev->dd->num_vq; i++) {
 115		struct vringh *vrh = &vdev->vvr[i].vrh;
 116
 117		vdev->vvr[i].vring.info->avail_idx = 0;
 118		vrh->completed = 0;
 119		vrh->last_avail_idx = 0;
 120		vrh->last_used_idx = 0;
 121	}
 122
 123	for (i = 0; i < vdev->dd->num_vq; i++)
 124		mutex_unlock(&vdev->vvr[i].vr_mutex);
 125}
 126
 127static void vop_virtio_reset_devices(struct vop_info *vi)
 128{
 129	struct list_head *pos, *tmp;
 130	struct vop_vdev *vdev;
 131
 132	list_for_each_safe(pos, tmp, &vi->vdev_list) {
 133		vdev = list_entry(pos, struct vop_vdev, list);
 134		vop_virtio_device_reset(vdev);
 135		vdev->poll_wake = 1;
 136		wake_up(&vdev->waitq);
 137	}
 138}
 139
 140static void vop_bh_handler(struct work_struct *work)
 141{
 142	struct vop_vdev *vdev = container_of(work, struct vop_vdev,
 143			virtio_bh_work);
 144
 145	if (vdev->dc->used_address_updated)
 146		vop_virtio_init_post(vdev);
 147
 148	if (vdev->dc->vdev_reset)
 149		vop_virtio_device_reset(vdev);
 150
 151	vdev->poll_wake = 1;
 152	wake_up(&vdev->waitq);
 153}
 154
 155static irqreturn_t _vop_virtio_intr_handler(int irq, void *data)
 156{
 157	struct vop_vdev *vdev = data;
 158	struct vop_device *vpdev = vdev->vpdev;
 159
 160	vpdev->hw_ops->ack_interrupt(vpdev, vdev->virtio_db);
 161	schedule_work(&vdev->virtio_bh_work);
 162	return IRQ_HANDLED;
 163}
 164
 165static int vop_virtio_config_change(struct vop_vdev *vdev, void *argp)
 166{
 167	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake);
 168	int ret = 0, retry, i;
 169	struct vop_device *vpdev = vdev->vpdev;
 170	struct vop_info *vi = dev_get_drvdata(&vpdev->dev);
 171	struct mic_bootparam *bootparam = vpdev->hw_ops->get_dp(vpdev);
 172	s8 db = bootparam->h2c_config_db;
 173
 174	mutex_lock(&vi->vop_mutex);
 175	for (i = 0; i < vdev->dd->num_vq; i++)
 176		mutex_lock_nested(&vdev->vvr[i].vr_mutex, i + 1);
 177
 178	if (db == -1 || vdev->dd->type == -1) {
 179		ret = -EIO;
 180		goto exit;
 181	}
 182
 183	memcpy(mic_vq_configspace(vdev->dd), argp, vdev->dd->config_len);
 184	vdev->dc->config_change = MIC_VIRTIO_PARAM_CONFIG_CHANGED;
 185	vpdev->hw_ops->send_intr(vpdev, db);
 186
 187	for (retry = 100; retry--;) {
 188		ret = wait_event_timeout(wake, vdev->dc->guest_ack,
 189					 msecs_to_jiffies(100));
 190		if (ret)
 191			break;
 192	}
 193
 194	dev_dbg(vop_dev(vdev),
 195		"%s %d retry: %d\n", __func__, __LINE__, retry);
 196	vdev->dc->config_change = 0;
 197	vdev->dc->guest_ack = 0;
 198exit:
 199	for (i = 0; i < vdev->dd->num_vq; i++)
 200		mutex_unlock(&vdev->vvr[i].vr_mutex);
 201	mutex_unlock(&vi->vop_mutex);
 202	return ret;
 203}
 204
 205static int vop_copy_dp_entry(struct vop_vdev *vdev,
 206			     struct mic_device_desc *argp, __u8 *type,
 207			     struct mic_device_desc **devpage)
 208{
 209	struct vop_device *vpdev = vdev->vpdev;
 210	struct mic_device_desc *devp;
 211	struct mic_vqconfig *vqconfig;
 212	int ret = 0, i;
 213	bool slot_found = false;
 214
 215	vqconfig = mic_vq_config(argp);
 216	for (i = 0; i < argp->num_vq; i++) {
 217		if (le16_to_cpu(vqconfig[i].num) > MIC_MAX_VRING_ENTRIES) {
 218			ret =  -EINVAL;
 219			dev_err(vop_dev(vdev), "%s %d err %d\n",
 220				__func__, __LINE__, ret);
 221			goto exit;
 222		}
 223	}
 224
 225	/* Find the first free device page entry */
 226	for (i = sizeof(struct mic_bootparam);
 227		i < MIC_DP_SIZE - mic_total_desc_size(argp);
 228		i += mic_total_desc_size(devp)) {
 229		devp = vpdev->hw_ops->get_dp(vpdev) + i;
 230		if (devp->type == 0 || devp->type == -1) {
 231			slot_found = true;
 232			break;
 233		}
 234	}
 235	if (!slot_found) {
 236		ret =  -EINVAL;
 237		dev_err(vop_dev(vdev), "%s %d err %d\n",
 238			__func__, __LINE__, ret);
 239		goto exit;
 240	}
 241	/*
 242	 * Save off the type before doing the memcpy. Type will be set in the
 243	 * end after completing all initialization for the new device.
 244	 */
 245	*type = argp->type;
 246	argp->type = 0;
 247	memcpy(devp, argp, mic_desc_size(argp));
 248
 249	*devpage = devp;
 250exit:
 251	return ret;
 252}
 253
 254static void vop_init_device_ctrl(struct vop_vdev *vdev,
 255				 struct mic_device_desc *devpage)
 256{
 257	struct mic_device_ctrl *dc;
 258
 259	dc = (void *)devpage + mic_aligned_desc_size(devpage);
 260
 261	dc->config_change = 0;
 262	dc->guest_ack = 0;
 263	dc->vdev_reset = 0;
 264	dc->host_ack = 0;
 265	dc->used_address_updated = 0;
 266	dc->c2h_vdev_db = -1;
 267	dc->h2c_vdev_db = -1;
 268	vdev->dc = dc;
 269}
 270
 271static int vop_virtio_add_device(struct vop_vdev *vdev,
 272				 struct mic_device_desc *argp)
 273{
 274	struct vop_info *vi = vdev->vi;
 275	struct vop_device *vpdev = vi->vpdev;
 276	struct mic_device_desc *dd = NULL;
 277	struct mic_vqconfig *vqconfig;
 278	int vr_size, i, j, ret;
 279	u8 type = 0;
 280	s8 db = -1;
 281	char irqname[16];
 282	struct mic_bootparam *bootparam;
 283	u16 num;
 284	dma_addr_t vr_addr;
 285
 286	bootparam = vpdev->hw_ops->get_dp(vpdev);
 287	init_waitqueue_head(&vdev->waitq);
 288	INIT_LIST_HEAD(&vdev->list);
 289	vdev->vpdev = vpdev;
 290
 291	ret = vop_copy_dp_entry(vdev, argp, &type, &dd);
 292	if (ret) {
 293		dev_err(vop_dev(vdev), "%s %d err %d\n",
 294			__func__, __LINE__, ret);
 295		kfree(vdev);
 296		return ret;
 297	}
 298
 299	vop_init_device_ctrl(vdev, dd);
 300
 301	vdev->dd = dd;
 302	vdev->virtio_id = type;
 303	vqconfig = mic_vq_config(dd);
 304	INIT_WORK(&vdev->virtio_bh_work, vop_bh_handler);
 305
 306	for (i = 0; i < dd->num_vq; i++) {
 307		struct vop_vringh *vvr = &vdev->vvr[i];
 308		struct mic_vring *vr = &vdev->vvr[i].vring;
 309
 310		num = le16_to_cpu(vqconfig[i].num);
 311		mutex_init(&vvr->vr_mutex);
 312		vr_size = PAGE_ALIGN(vring_size(num, MIC_VIRTIO_RING_ALIGN) +
 313			sizeof(struct _mic_vring_info));
 314		vr->va = (void *)
 315			__get_free_pages(GFP_KERNEL | __GFP_ZERO,
 316					 get_order(vr_size));
 317		if (!vr->va) {
 318			ret = -ENOMEM;
 319			dev_err(vop_dev(vdev), "%s %d err %d\n",
 320				__func__, __LINE__, ret);
 321			goto err;
 322		}
 323		vr->len = vr_size;
 324		vr->info = vr->va + vring_size(num, MIC_VIRTIO_RING_ALIGN);
 325		vr->info->magic = cpu_to_le32(MIC_MAGIC + vdev->virtio_id + i);
 326		vr_addr = dma_map_single(&vpdev->dev, vr->va, vr_size,
 327					 DMA_BIDIRECTIONAL);
 328		if (dma_mapping_error(&vpdev->dev, vr_addr)) {
 329			free_pages((unsigned long)vr->va, get_order(vr_size));
 330			ret = -ENOMEM;
 331			dev_err(vop_dev(vdev), "%s %d err %d\n",
 332				__func__, __LINE__, ret);
 333			goto err;
 334		}
 335		vqconfig[i].address = cpu_to_le64(vr_addr);
 336
 337		vring_init(&vr->vr, num, vr->va, MIC_VIRTIO_RING_ALIGN);
 338		ret = vringh_init_kern(&vvr->vrh,
 339				       *(u32 *)mic_vq_features(vdev->dd),
 340				       num, false, vr->vr.desc, vr->vr.avail,
 341				       vr->vr.used);
 342		if (ret) {
 343			dev_err(vop_dev(vdev), "%s %d err %d\n",
 344				__func__, __LINE__, ret);
 345			goto err;
 346		}
 347		vringh_kiov_init(&vvr->riov, NULL, 0);
 348		vringh_kiov_init(&vvr->wiov, NULL, 0);
 349		vvr->head = USHRT_MAX;
 350		vvr->vdev = vdev;
 351		vvr->vrh.notify = _vop_notify;
 352		dev_dbg(&vpdev->dev,
 353			"%s %d index %d va %p info %p vr_size 0x%x\n",
 354			__func__, __LINE__, i, vr->va, vr->info, vr_size);
 355		vvr->buf = (void *)__get_free_pages(GFP_KERNEL,
 356					get_order(VOP_INT_DMA_BUF_SIZE));
 357		vvr->buf_da = dma_map_single(&vpdev->dev,
 358					  vvr->buf, VOP_INT_DMA_BUF_SIZE,
 359					  DMA_BIDIRECTIONAL);
 360	}
 361
 362	snprintf(irqname, sizeof(irqname), "vop%dvirtio%d", vpdev->index,
 363		 vdev->virtio_id);
 364	vdev->virtio_db = vpdev->hw_ops->next_db(vpdev);
 365	vdev->virtio_cookie = vpdev->hw_ops->request_irq(vpdev,
 366			_vop_virtio_intr_handler, irqname, vdev,
 367			vdev->virtio_db);
 368	if (IS_ERR(vdev->virtio_cookie)) {
 369		ret = PTR_ERR(vdev->virtio_cookie);
 370		dev_dbg(&vpdev->dev, "request irq failed\n");
 371		goto err;
 372	}
 373
 374	vdev->dc->c2h_vdev_db = vdev->virtio_db;
 375
 376	/*
 377	 * Order the type update with previous stores. This write barrier
 378	 * is paired with the corresponding read barrier before the uncached
 379	 * system memory read of the type, on the card while scanning the
 380	 * device page.
 381	 */
 382	smp_wmb();
 383	dd->type = type;
 384	argp->type = type;
 385
 386	if (bootparam) {
 387		db = bootparam->h2c_config_db;
 388		if (db != -1)
 389			vpdev->hw_ops->send_intr(vpdev, db);
 390	}
 391	dev_dbg(&vpdev->dev, "Added virtio id %d db %d\n", dd->type, db);
 392	return 0;
 393err:
 394	vqconfig = mic_vq_config(dd);
 395	for (j = 0; j < i; j++) {
 396		struct vop_vringh *vvr = &vdev->vvr[j];
 397
 398		dma_unmap_single(&vpdev->dev, le64_to_cpu(vqconfig[j].address),
 399				 vvr->vring.len, DMA_BIDIRECTIONAL);
 400		free_pages((unsigned long)vvr->vring.va,
 401			   get_order(vvr->vring.len));
 402	}
 403	return ret;
 404}
 405
 406static void vop_dev_remove(struct vop_info *pvi, struct mic_device_ctrl *devp,
 407			   struct vop_device *vpdev)
 408{
 409	struct mic_bootparam *bootparam = vpdev->hw_ops->get_dp(vpdev);
 410	s8 db;
 411	int ret, retry;
 412	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake);
 413
 414	devp->config_change = MIC_VIRTIO_PARAM_DEV_REMOVE;
 415	db = bootparam->h2c_config_db;
 416	if (db != -1)
 417		vpdev->hw_ops->send_intr(vpdev, db);
 418	else
 419		goto done;
 420	for (retry = 15; retry--;) {
 421		ret = wait_event_timeout(wake, devp->guest_ack,
 422					 msecs_to_jiffies(1000));
 423		if (ret)
 424			break;
 425	}
 426done:
 427	devp->config_change = 0;
 428	devp->guest_ack = 0;
 429}
 430
 431static void vop_virtio_del_device(struct vop_vdev *vdev)
 432{
 433	struct vop_info *vi = vdev->vi;
 434	struct vop_device *vpdev = vdev->vpdev;
 435	int i;
 436	struct mic_vqconfig *vqconfig;
 437	struct mic_bootparam *bootparam = vpdev->hw_ops->get_dp(vpdev);
 438
 439	if (!bootparam)
 440		goto skip_hot_remove;
 441	vop_dev_remove(vi, vdev->dc, vpdev);
 442skip_hot_remove:
 443	vpdev->hw_ops->free_irq(vpdev, vdev->virtio_cookie, vdev);
 444	flush_work(&vdev->virtio_bh_work);
 445	vqconfig = mic_vq_config(vdev->dd);
 446	for (i = 0; i < vdev->dd->num_vq; i++) {
 447		struct vop_vringh *vvr = &vdev->vvr[i];
 448
 449		dma_unmap_single(&vpdev->dev,
 450				 vvr->buf_da, VOP_INT_DMA_BUF_SIZE,
 451				 DMA_BIDIRECTIONAL);
 452		free_pages((unsigned long)vvr->buf,
 453			   get_order(VOP_INT_DMA_BUF_SIZE));
 454		vringh_kiov_cleanup(&vvr->riov);
 455		vringh_kiov_cleanup(&vvr->wiov);
 456		dma_unmap_single(&vpdev->dev, le64_to_cpu(vqconfig[i].address),
 457				 vvr->vring.len, DMA_BIDIRECTIONAL);
 458		free_pages((unsigned long)vvr->vring.va,
 459			   get_order(vvr->vring.len));
 460	}
 461	/*
 462	 * Order the type update with previous stores. This write barrier
 463	 * is paired with the corresponding read barrier before the uncached
 464	 * system memory read of the type, on the card while scanning the
 465	 * device page.
 466	 */
 467	smp_wmb();
 468	vdev->dd->type = -1;
 469}
 470
 471/*
 472 * vop_sync_dma - Wrapper for synchronous DMAs.
 473 *
 474 * @dev - The address of the pointer to the device instance used
 475 * for DMA registration.
 476 * @dst - destination DMA address.
 477 * @src - source DMA address.
 478 * @len - size of the transfer.
 479 *
 480 * Return DMA_SUCCESS on success
 481 */
 482static int vop_sync_dma(struct vop_vdev *vdev, dma_addr_t dst, dma_addr_t src,
 483			size_t len)
 484{
 485	int err = 0;
 486	struct dma_device *ddev;
 487	struct dma_async_tx_descriptor *tx;
 488	struct vop_info *vi = dev_get_drvdata(&vdev->vpdev->dev);
 489	struct dma_chan *vop_ch = vi->dma_ch;
 490
 491	if (!vop_ch) {
 492		err = -EBUSY;
 493		goto error;
 494	}
 495	ddev = vop_ch->device;
 496	tx = ddev->device_prep_dma_memcpy(vop_ch, dst, src, len,
 497		DMA_PREP_FENCE);
 498	if (!tx) {
 499		err = -ENOMEM;
 500		goto error;
 501	} else {
 502		dma_cookie_t cookie;
 503
 504		cookie = tx->tx_submit(tx);
 505		if (dma_submit_error(cookie)) {
 506			err = -ENOMEM;
 507			goto error;
 508		}
 509		dma_async_issue_pending(vop_ch);
 510		err = dma_sync_wait(vop_ch, cookie);
 511	}
 512error:
 513	if (err)
 514		dev_err(&vi->vpdev->dev, "%s %d err %d\n",
 515			__func__, __LINE__, err);
 516	return err;
 517}
 518
 519#define VOP_USE_DMA true
 520
 521/*
 522 * Initiates the copies across the PCIe bus from card memory to a user
 523 * space buffer. When transfers are done using DMA, source/destination
 524 * addresses and transfer length must follow the alignment requirements of
 525 * the MIC DMA engine.
 526 */
 527static int vop_virtio_copy_to_user(struct vop_vdev *vdev, void __user *ubuf,
 528				   size_t len, u64 daddr, size_t dlen,
 529				   int vr_idx)
 530{
 531	struct vop_device *vpdev = vdev->vpdev;
 532	void __iomem *dbuf = vpdev->hw_ops->ioremap(vpdev, daddr, len);
 533	struct vop_vringh *vvr = &vdev->vvr[vr_idx];
 534	struct vop_info *vi = dev_get_drvdata(&vpdev->dev);
 535	size_t dma_alignment = 1 << vi->dma_ch->device->copy_align;
 536	bool x200 = is_dma_copy_aligned(vi->dma_ch->device, 1, 1, 1);
 537	size_t dma_offset, partlen;
 538	int err;
 539
 540	if (!VOP_USE_DMA) {
 541		if (copy_to_user(ubuf, (void __force *)dbuf, len)) {
 542			err = -EFAULT;
 543			dev_err(vop_dev(vdev), "%s %d err %d\n",
 544				__func__, __LINE__, err);
 545			goto err;
 546		}
 547		vdev->in_bytes += len;
 548		err = 0;
 549		goto err;
 550	}
 551
 552	dma_offset = daddr - round_down(daddr, dma_alignment);
 553	daddr -= dma_offset;
 554	len += dma_offset;
 555	/*
 556	 * X100 uses DMA addresses as seen by the card so adding
 557	 * the aperture base is not required for DMA. However x200
 558	 * requires DMA addresses to be an offset into the bar so
 559	 * add the aperture base for x200.
 560	 */
 561	if (x200)
 562		daddr += vpdev->aper->pa;
 563	while (len) {
 564		partlen = min_t(size_t, len, VOP_INT_DMA_BUF_SIZE);
 565		err = vop_sync_dma(vdev, vvr->buf_da, daddr,
 566				   ALIGN(partlen, dma_alignment));
 567		if (err) {
 568			dev_err(vop_dev(vdev), "%s %d err %d\n",
 569				__func__, __LINE__, err);
 570			goto err;
 571		}
 572		if (copy_to_user(ubuf, vvr->buf + dma_offset,
 573				 partlen - dma_offset)) {
 574			err = -EFAULT;
 575			dev_err(vop_dev(vdev), "%s %d err %d\n",
 576				__func__, __LINE__, err);
 577			goto err;
 578		}
 579		daddr += partlen;
 580		ubuf += partlen;
 581		dbuf += partlen;
 582		vdev->in_bytes_dma += partlen;
 583		vdev->in_bytes += partlen;
 584		len -= partlen;
 585		dma_offset = 0;
 586	}
 587	err = 0;
 588err:
 589	vpdev->hw_ops->iounmap(vpdev, dbuf);
 590	dev_dbg(vop_dev(vdev),
 591		"%s: ubuf %p dbuf %p len 0x%lx vr_idx 0x%x\n",
 592		__func__, ubuf, dbuf, len, vr_idx);
 593	return err;
 594}
 595
 596/*
 597 * Initiates copies across the PCIe bus from a user space buffer to card
 598 * memory. When transfers are done using DMA, source/destination addresses
 599 * and transfer length must follow the alignment requirements of the MIC
 600 * DMA engine.
 601 */
 602static int vop_virtio_copy_from_user(struct vop_vdev *vdev, void __user *ubuf,
 603				     size_t len, u64 daddr, size_t dlen,
 604				     int vr_idx)
 605{
 606	struct vop_device *vpdev = vdev->vpdev;
 607	void __iomem *dbuf = vpdev->hw_ops->ioremap(vpdev, daddr, len);
 608	struct vop_vringh *vvr = &vdev->vvr[vr_idx];
 609	struct vop_info *vi = dev_get_drvdata(&vdev->vpdev->dev);
 610	size_t dma_alignment = 1 << vi->dma_ch->device->copy_align;
 611	bool x200 = is_dma_copy_aligned(vi->dma_ch->device, 1, 1, 1);
 612	size_t partlen;
 613	bool dma = VOP_USE_DMA;
 614	int err = 0;
 615
 616	if (daddr & (dma_alignment - 1)) {
 617		vdev->tx_dst_unaligned += len;
 618		dma = false;
 619	} else if (ALIGN(len, dma_alignment) > dlen) {
 620		vdev->tx_len_unaligned += len;
 621		dma = false;
 622	}
 623
 624	if (!dma)
 625		goto memcpy;
 626
 627	/*
 628	 * X100 uses DMA addresses as seen by the card so adding
 629	 * the aperture base is not required for DMA. However x200
 630	 * requires DMA addresses to be an offset into the bar so
 631	 * add the aperture base for x200.
 632	 */
 633	if (x200)
 634		daddr += vpdev->aper->pa;
 635	while (len) {
 636		partlen = min_t(size_t, len, VOP_INT_DMA_BUF_SIZE);
 637
 638		if (copy_from_user(vvr->buf, ubuf, partlen)) {
 639			err = -EFAULT;
 640			dev_err(vop_dev(vdev), "%s %d err %d\n",
 641				__func__, __LINE__, err);
 642			goto err;
 643		}
 644		err = vop_sync_dma(vdev, daddr, vvr->buf_da,
 645				   ALIGN(partlen, dma_alignment));
 646		if (err) {
 647			dev_err(vop_dev(vdev), "%s %d err %d\n",
 648				__func__, __LINE__, err);
 649			goto err;
 650		}
 651		daddr += partlen;
 652		ubuf += partlen;
 653		dbuf += partlen;
 654		vdev->out_bytes_dma += partlen;
 655		vdev->out_bytes += partlen;
 656		len -= partlen;
 657	}
 658memcpy:
 659	/*
 660	 * We are copying to IO below and should ideally use something
 661	 * like copy_from_user_toio(..) if it existed.
 662	 */
 663	if (copy_from_user((void __force *)dbuf, ubuf, len)) {
 664		err = -EFAULT;
 665		dev_err(vop_dev(vdev), "%s %d err %d\n",
 666			__func__, __LINE__, err);
 667		goto err;
 668	}
 669	vdev->out_bytes += len;
 670	err = 0;
 671err:
 672	vpdev->hw_ops->iounmap(vpdev, dbuf);
 673	dev_dbg(vop_dev(vdev),
 674		"%s: ubuf %p dbuf %p len 0x%lx vr_idx 0x%x\n",
 675		__func__, ubuf, dbuf, len, vr_idx);
 676	return err;
 677}
 678
 679#define MIC_VRINGH_READ true
 680
 681/* Determine the total number of bytes consumed in a VRINGH KIOV */
 682static inline u32 vop_vringh_iov_consumed(struct vringh_kiov *iov)
 683{
 684	int i;
 685	u32 total = iov->consumed;
 686
 687	for (i = 0; i < iov->i; i++)
 688		total += iov->iov[i].iov_len;
 689	return total;
 690}
 691
 692/*
 693 * Traverse the VRINGH KIOV and issue the APIs to trigger the copies.
 694 * This API is heavily based on the vringh_iov_xfer(..) implementation
 695 * in vringh.c. The reason we cannot reuse vringh_iov_pull_kern(..)
 696 * and vringh_iov_push_kern(..) directly is because there is no
 697 * way to override the VRINGH xfer(..) routines as of v3.10.
 698 */
 699static int vop_vringh_copy(struct vop_vdev *vdev, struct vringh_kiov *iov,
 700			   void __user *ubuf, size_t len, bool read, int vr_idx,
 701			   size_t *out_len)
 702{
 703	int ret = 0;
 704	size_t partlen, tot_len = 0;
 705
 706	while (len && iov->i < iov->used) {
 707		struct kvec *kiov = &iov->iov[iov->i];
 708
 709		partlen = min(kiov->iov_len, len);
 710		if (read)
 711			ret = vop_virtio_copy_to_user(vdev, ubuf, partlen,
 712						      (u64)kiov->iov_base,
 713						      kiov->iov_len,
 714						      vr_idx);
 715		else
 716			ret = vop_virtio_copy_from_user(vdev, ubuf, partlen,
 717							(u64)kiov->iov_base,
 718							kiov->iov_len,
 719							vr_idx);
 720		if (ret) {
 721			dev_err(vop_dev(vdev), "%s %d err %d\n",
 722				__func__, __LINE__, ret);
 723			break;
 724		}
 725		len -= partlen;
 726		ubuf += partlen;
 727		tot_len += partlen;
 728		iov->consumed += partlen;
 729		kiov->iov_len -= partlen;
 730		kiov->iov_base += partlen;
 731		if (!kiov->iov_len) {
 732			/* Fix up old iov element then increment. */
 733			kiov->iov_len = iov->consumed;
 734			kiov->iov_base -= iov->consumed;
 735
 736			iov->consumed = 0;
 737			iov->i++;
 738		}
 739	}
 740	*out_len = tot_len;
 741	return ret;
 742}
 743
 744/*
 745 * Use the standard VRINGH infrastructure in the kernel to fetch new
 746 * descriptors, initiate the copies and update the used ring.
 747 */
 748static int _vop_virtio_copy(struct vop_vdev *vdev, struct mic_copy_desc *copy)
 749{
 750	int ret = 0;
 751	u32 iovcnt = copy->iovcnt;
 752	struct iovec iov;
 753	struct iovec __user *u_iov = copy->iov;
 754	void __user *ubuf = NULL;
 755	struct vop_vringh *vvr = &vdev->vvr[copy->vr_idx];
 756	struct vringh_kiov *riov = &vvr->riov;
 757	struct vringh_kiov *wiov = &vvr->wiov;
 758	struct vringh *vrh = &vvr->vrh;
 759	u16 *head = &vvr->head;
 760	struct mic_vring *vr = &vvr->vring;
 761	size_t len = 0, out_len;
 762
 763	copy->out_len = 0;
 764	/* Fetch a new IOVEC if all previous elements have been processed */
 765	if (riov->i == riov->used && wiov->i == wiov->used) {
 766		ret = vringh_getdesc_kern(vrh, riov, wiov,
 767					  head, GFP_KERNEL);
 768		/* Check if there are available descriptors */
 769		if (ret <= 0)
 770			return ret;
 771	}
 772	while (iovcnt) {
 773		if (!len) {
 774			/* Copy over a new iovec from user space. */
 775			ret = copy_from_user(&iov, u_iov, sizeof(*u_iov));
 776			if (ret) {
 777				ret = -EINVAL;
 778				dev_err(vop_dev(vdev), "%s %d err %d\n",
 779					__func__, __LINE__, ret);
 780				break;
 781			}
 782			len = iov.iov_len;
 783			ubuf = iov.iov_base;
 784		}
 785		/* Issue all the read descriptors first */
 786		ret = vop_vringh_copy(vdev, riov, ubuf, len,
 787				      MIC_VRINGH_READ, copy->vr_idx, &out_len);
 788		if (ret) {
 789			dev_err(vop_dev(vdev), "%s %d err %d\n",
 790				__func__, __LINE__, ret);
 791			break;
 792		}
 793		len -= out_len;
 794		ubuf += out_len;
 795		copy->out_len += out_len;
 796		/* Issue the write descriptors next */
 797		ret = vop_vringh_copy(vdev, wiov, ubuf, len,
 798				      !MIC_VRINGH_READ, copy->vr_idx, &out_len);
 799		if (ret) {
 800			dev_err(vop_dev(vdev), "%s %d err %d\n",
 801				__func__, __LINE__, ret);
 802			break;
 803		}
 804		len -= out_len;
 805		ubuf += out_len;
 806		copy->out_len += out_len;
 807		if (!len) {
 808			/* One user space iovec is now completed */
 809			iovcnt--;
 810			u_iov++;
 811		}
 812		/* Exit loop if all elements in KIOVs have been processed. */
 813		if (riov->i == riov->used && wiov->i == wiov->used)
 814			break;
 815	}
 816	/*
 817	 * Update the used ring if a descriptor was available and some data was
 818	 * copied in/out and the user asked for a used ring update.
 819	 */
 820	if (*head != USHRT_MAX && copy->out_len && copy->update_used) {
 821		u32 total = 0;
 822
 823		/* Determine the total data consumed */
 824		total += vop_vringh_iov_consumed(riov);
 825		total += vop_vringh_iov_consumed(wiov);
 826		vringh_complete_kern(vrh, *head, total);
 827		*head = USHRT_MAX;
 828		if (vringh_need_notify_kern(vrh) > 0)
 829			vringh_notify(vrh);
 830		vringh_kiov_cleanup(riov);
 831		vringh_kiov_cleanup(wiov);
 832		/* Update avail idx for user space */
 833		vr->info->avail_idx = vrh->last_avail_idx;
 834	}
 835	return ret;
 836}
 837
 838static inline int vop_verify_copy_args(struct vop_vdev *vdev,
 839				       struct mic_copy_desc *copy)
 840{
 841	if (!vdev || copy->vr_idx >= vdev->dd->num_vq)
 842		return -EINVAL;
 843	return 0;
 844}
 845
 846/* Copy a specified number of virtio descriptors in a chain */
 847static int vop_virtio_copy_desc(struct vop_vdev *vdev,
 848				struct mic_copy_desc *copy)
 849{
 850	int err;
 851	struct vop_vringh *vvr;
 852
 853	err = vop_verify_copy_args(vdev, copy);
 854	if (err)
 855		return err;
 856
 857	vvr = &vdev->vvr[copy->vr_idx];
 858	mutex_lock(&vvr->vr_mutex);
 859	if (!vop_vdevup(vdev)) {
 860		err = -ENODEV;
 861		dev_err(vop_dev(vdev), "%s %d err %d\n",
 862			__func__, __LINE__, err);
 863		goto err;
 864	}
 865	err = _vop_virtio_copy(vdev, copy);
 866	if (err) {
 867		dev_err(vop_dev(vdev), "%s %d err %d\n",
 868			__func__, __LINE__, err);
 869	}
 870err:
 871	mutex_unlock(&vvr->vr_mutex);
 872	return err;
 873}
 874
 875static int vop_open(struct inode *inode, struct file *f)
 876{
 877	struct vop_vdev *vdev;
 878	struct vop_info *vi = container_of(f->private_data,
 879		struct vop_info, miscdev);
 880
 881	vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
 882	if (!vdev)
 883		return -ENOMEM;
 884	vdev->vi = vi;
 885	mutex_init(&vdev->vdev_mutex);
 886	f->private_data = vdev;
 887	init_completion(&vdev->destroy);
 888	complete(&vdev->destroy);
 889	return 0;
 890}
 891
 892static int vop_release(struct inode *inode, struct file *f)
 893{
 894	struct vop_vdev *vdev = f->private_data, *vdev_tmp;
 895	struct vop_info *vi = vdev->vi;
 896	struct list_head *pos, *tmp;
 897	bool found = false;
 898
 899	mutex_lock(&vdev->vdev_mutex);
 900	if (vdev->deleted)
 901		goto unlock;
 902	mutex_lock(&vi->vop_mutex);
 903	list_for_each_safe(pos, tmp, &vi->vdev_list) {
 904		vdev_tmp = list_entry(pos, struct vop_vdev, list);
 905		if (vdev == vdev_tmp) {
 906			vop_virtio_del_device(vdev);
 907			list_del(pos);
 908			found = true;
 909			break;
 910		}
 911	}
 912	mutex_unlock(&vi->vop_mutex);
 913unlock:
 914	mutex_unlock(&vdev->vdev_mutex);
 915	if (!found)
 916		wait_for_completion(&vdev->destroy);
 917	f->private_data = NULL;
 918	kfree(vdev);
 919	return 0;
 920}
 921
 922static long vop_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
 923{
 924	struct vop_vdev *vdev = f->private_data;
 925	struct vop_info *vi = vdev->vi;
 926	void __user *argp = (void __user *)arg;
 927	int ret;
 928
 929	switch (cmd) {
 930	case MIC_VIRTIO_ADD_DEVICE:
 931	{
 932		struct mic_device_desc dd, *dd_config;
 933
 934		if (copy_from_user(&dd, argp, sizeof(dd)))
 935			return -EFAULT;
 936
 937		if (mic_aligned_desc_size(&dd) > MIC_MAX_DESC_BLK_SIZE ||
 938		    dd.num_vq > MIC_MAX_VRINGS)
 939			return -EINVAL;
 940
 941		dd_config = kzalloc(mic_desc_size(&dd), GFP_KERNEL);
 942		if (!dd_config)
 943			return -ENOMEM;
 944		if (copy_from_user(dd_config, argp, mic_desc_size(&dd))) {
 945			ret = -EFAULT;
 946			goto free_ret;
 947		}
 948		/* Ensure desc has not changed between the two reads */
 949		if (memcmp(&dd, dd_config, sizeof(dd))) {
 950			ret = -EINVAL;
 951			goto free_ret;
 952		}
 953		mutex_lock(&vdev->vdev_mutex);
 954		mutex_lock(&vi->vop_mutex);
 955		ret = vop_virtio_add_device(vdev, dd_config);
 956		if (ret)
 957			goto unlock_ret;
 958		list_add_tail(&vdev->list, &vi->vdev_list);
 959unlock_ret:
 960		mutex_unlock(&vi->vop_mutex);
 961		mutex_unlock(&vdev->vdev_mutex);
 962free_ret:
 963		kfree(dd_config);
 964		return ret;
 965	}
 966	case MIC_VIRTIO_COPY_DESC:
 967	{
 968		struct mic_copy_desc copy;
 969
 970		mutex_lock(&vdev->vdev_mutex);
 971		ret = vop_vdev_inited(vdev);
 972		if (ret)
 973			goto _unlock_ret;
 974
 975		if (copy_from_user(&copy, argp, sizeof(copy))) {
 976			ret = -EFAULT;
 977			goto _unlock_ret;
 978		}
 979
 980		ret = vop_virtio_copy_desc(vdev, &copy);
 981		if (ret < 0)
 982			goto _unlock_ret;
 983		if (copy_to_user(
 984			&((struct mic_copy_desc __user *)argp)->out_len,
 985			&copy.out_len, sizeof(copy.out_len)))
 986			ret = -EFAULT;
 987_unlock_ret:
 988		mutex_unlock(&vdev->vdev_mutex);
 989		return ret;
 990	}
 991	case MIC_VIRTIO_CONFIG_CHANGE:
 992	{
 993		void *buf;
 994
 995		mutex_lock(&vdev->vdev_mutex);
 996		ret = vop_vdev_inited(vdev);
 997		if (ret)
 998			goto __unlock_ret;
 999		buf = kzalloc(vdev->dd->config_len, GFP_KERNEL);
1000		if (!buf) {
1001			ret = -ENOMEM;
1002			goto __unlock_ret;
1003		}
1004		if (copy_from_user(buf, argp, vdev->dd->config_len)) {
1005			ret = -EFAULT;
1006			goto done;
1007		}
1008		ret = vop_virtio_config_change(vdev, buf);
1009done:
1010		kfree(buf);
1011__unlock_ret:
1012		mutex_unlock(&vdev->vdev_mutex);
1013		return ret;
1014	}
1015	default:
1016		return -ENOIOCTLCMD;
1017	};
1018	return 0;
1019}
1020
1021/*
1022 * We return POLLIN | POLLOUT from poll when new buffers are enqueued, and
1023 * not when previously enqueued buffers may be available. This means that
1024 * in the card->host (TX) path, when userspace is unblocked by poll it
1025 * must drain all available descriptors or it can stall.
1026 */
1027static unsigned int vop_poll(struct file *f, poll_table *wait)
1028{
1029	struct vop_vdev *vdev = f->private_data;
1030	int mask = 0;
1031
1032	mutex_lock(&vdev->vdev_mutex);
1033	if (vop_vdev_inited(vdev)) {
1034		mask = POLLERR;
1035		goto done;
1036	}
1037	poll_wait(f, &vdev->waitq, wait);
1038	if (vop_vdev_inited(vdev)) {
1039		mask = POLLERR;
1040	} else if (vdev->poll_wake) {
1041		vdev->poll_wake = 0;
1042		mask = POLLIN | POLLOUT;
1043	}
1044done:
1045	mutex_unlock(&vdev->vdev_mutex);
1046	return mask;
1047}
1048
1049static inline int
1050vop_query_offset(struct vop_vdev *vdev, unsigned long offset,
1051		 unsigned long *size, unsigned long *pa)
1052{
1053	struct vop_device *vpdev = vdev->vpdev;
1054	unsigned long start = MIC_DP_SIZE;
1055	int i;
1056
1057	/*
1058	 * MMAP interface is as follows:
1059	 * offset				region
1060	 * 0x0					virtio device_page
1061	 * 0x1000				first vring
1062	 * 0x1000 + size of 1st vring		second vring
1063	 * ....
1064	 */
1065	if (!offset) {
1066		*pa = virt_to_phys(vpdev->hw_ops->get_dp(vpdev));
1067		*size = MIC_DP_SIZE;
1068		return 0;
1069	}
1070
1071	for (i = 0; i < vdev->dd->num_vq; i++) {
1072		struct vop_vringh *vvr = &vdev->vvr[i];
1073
1074		if (offset == start) {
1075			*pa = virt_to_phys(vvr->vring.va);
1076			*size = vvr->vring.len;
1077			return 0;
1078		}
1079		start += vvr->vring.len;
1080	}
1081	return -1;
1082}
1083
1084/*
1085 * Maps the device page and virtio rings to user space for readonly access.
1086 */
1087static int vop_mmap(struct file *f, struct vm_area_struct *vma)
1088{
1089	struct vop_vdev *vdev = f->private_data;
1090	unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
1091	unsigned long pa, size = vma->vm_end - vma->vm_start, size_rem = size;
1092	int i, err;
1093
1094	err = vop_vdev_inited(vdev);
1095	if (err)
1096		goto ret;
1097	if (vma->vm_flags & VM_WRITE) {
1098		err = -EACCES;
1099		goto ret;
1100	}
1101	while (size_rem) {
1102		i = vop_query_offset(vdev, offset, &size, &pa);
1103		if (i < 0) {
1104			err = -EINVAL;
1105			goto ret;
1106		}
1107		err = remap_pfn_range(vma, vma->vm_start + offset,
1108				      pa >> PAGE_SHIFT, size,
1109				      vma->vm_page_prot);
1110		if (err)
1111			goto ret;
1112		size_rem -= size;
1113		offset += size;
1114	}
1115ret:
1116	return err;
1117}
1118
1119static const struct file_operations vop_fops = {
1120	.open = vop_open,
1121	.release = vop_release,
1122	.unlocked_ioctl = vop_ioctl,
1123	.poll = vop_poll,
1124	.mmap = vop_mmap,
1125	.owner = THIS_MODULE,
1126};
1127
1128int vop_host_init(struct vop_info *vi)
1129{
1130	int rc;
1131	struct miscdevice *mdev;
1132	struct vop_device *vpdev = vi->vpdev;
1133
1134	INIT_LIST_HEAD(&vi->vdev_list);
1135	vi->dma_ch = vpdev->dma_ch;
1136	mdev = &vi->miscdev;
1137	mdev->minor = MISC_DYNAMIC_MINOR;
1138	snprintf(vi->name, sizeof(vi->name), "vop_virtio%d", vpdev->index);
1139	mdev->name = vi->name;
1140	mdev->fops = &vop_fops;
1141	mdev->parent = &vpdev->dev;
1142
1143	rc = misc_register(mdev);
1144	if (rc)
1145		dev_err(&vpdev->dev, "%s failed rc %d\n", __func__, rc);
1146	return rc;
1147}
1148
1149void vop_host_uninit(struct vop_info *vi)
1150{
1151	struct list_head *pos, *tmp;
1152	struct vop_vdev *vdev;
1153
1154	mutex_lock(&vi->vop_mutex);
1155	vop_virtio_reset_devices(vi);
1156	list_for_each_safe(pos, tmp, &vi->vdev_list) {
1157		vdev = list_entry(pos, struct vop_vdev, list);
1158		list_del(pos);
1159		reinit_completion(&vdev->destroy);
1160		mutex_unlock(&vi->vop_mutex);
1161		mutex_lock(&vdev->vdev_mutex);
1162		vop_virtio_del_device(vdev);
1163		vdev->deleted = true;
1164		mutex_unlock(&vdev->vdev_mutex);
1165		complete(&vdev->destroy);
1166		mutex_lock(&vi->vop_mutex);
1167	}
1168	mutex_unlock(&vi->vop_mutex);
1169	misc_deregister(&vi->miscdev);
1170}