Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.1.
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Copyright (C) 2018-2020 Intel Corporation.
   4 * Copyright (C) 2020 Red Hat, Inc.
   5 *
   6 * Author: Tiwei Bie <tiwei.bie@intel.com>
   7 *         Jason Wang <jasowang@redhat.com>
   8 *
   9 * Thanks Michael S. Tsirkin for the valuable comments and
  10 * suggestions.  And thanks to Cunming Liang and Zhihong Wang for all
  11 * their supports.
  12 */
  13
  14#include <linux/kernel.h>
  15#include <linux/module.h>
  16#include <linux/cdev.h>
  17#include <linux/device.h>
  18#include <linux/mm.h>
  19#include <linux/iommu.h>
  20#include <linux/uuid.h>
  21#include <linux/vdpa.h>
  22#include <linux/nospec.h>
  23#include <linux/vhost.h>
  24#include <linux/virtio_net.h>
  25#include <linux/kernel.h>
  26
  27#include "vhost.h"
  28
  29enum {
  30	VHOST_VDPA_BACKEND_FEATURES =
  31	(1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2) |
  32	(1ULL << VHOST_BACKEND_F_IOTLB_BATCH),
  33};
  34
  35#define VHOST_VDPA_DEV_MAX (1U << MINORBITS)
  36
  37struct vhost_vdpa {
  38	struct vhost_dev vdev;
  39	struct iommu_domain *domain;
  40	struct vhost_virtqueue *vqs;
  41	struct completion completion;
  42	struct vdpa_device *vdpa;
  43	struct device dev;
  44	struct cdev cdev;
  45	atomic_t opened;
  46	int nvqs;
  47	int virtio_id;
  48	int minor;
  49	struct eventfd_ctx *config_ctx;
  50	int in_batch;
  51};
  52
  53static DEFINE_IDA(vhost_vdpa_ida);
  54
  55static dev_t vhost_vdpa_major;
  56
  57static void handle_vq_kick(struct vhost_work *work)
  58{
  59	struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
  60						  poll.work);
  61	struct vhost_vdpa *v = container_of(vq->dev, struct vhost_vdpa, vdev);
  62	const struct vdpa_config_ops *ops = v->vdpa->config;
  63
  64	ops->kick_vq(v->vdpa, vq - v->vqs);
  65}
  66
  67static irqreturn_t vhost_vdpa_virtqueue_cb(void *private)
  68{
  69	struct vhost_virtqueue *vq = private;
  70	struct eventfd_ctx *call_ctx = vq->call_ctx.ctx;
  71
  72	if (call_ctx)
  73		eventfd_signal(call_ctx, 1);
  74
  75	return IRQ_HANDLED;
  76}
  77
  78static irqreturn_t vhost_vdpa_config_cb(void *private)
  79{
  80	struct vhost_vdpa *v = private;
  81	struct eventfd_ctx *config_ctx = v->config_ctx;
  82
  83	if (config_ctx)
  84		eventfd_signal(config_ctx, 1);
  85
  86	return IRQ_HANDLED;
  87}
  88
  89static void vhost_vdpa_setup_vq_irq(struct vhost_vdpa *v, u16 qid)
  90{
  91	struct vhost_virtqueue *vq = &v->vqs[qid];
  92	const struct vdpa_config_ops *ops = v->vdpa->config;
  93	struct vdpa_device *vdpa = v->vdpa;
  94	int ret, irq;
  95
  96	if (!ops->get_vq_irq)
  97		return;
  98
  99	irq = ops->get_vq_irq(vdpa, qid);
 100	spin_lock(&vq->call_ctx.ctx_lock);
 101	irq_bypass_unregister_producer(&vq->call_ctx.producer);
 102	if (!vq->call_ctx.ctx || irq < 0) {
 103		spin_unlock(&vq->call_ctx.ctx_lock);
 104		return;
 105	}
 106
 107	vq->call_ctx.producer.token = vq->call_ctx.ctx;
 108	vq->call_ctx.producer.irq = irq;
 109	ret = irq_bypass_register_producer(&vq->call_ctx.producer);
 110	spin_unlock(&vq->call_ctx.ctx_lock);
 111}
 112
 113static void vhost_vdpa_unsetup_vq_irq(struct vhost_vdpa *v, u16 qid)
 114{
 115	struct vhost_virtqueue *vq = &v->vqs[qid];
 116
 117	spin_lock(&vq->call_ctx.ctx_lock);
 118	irq_bypass_unregister_producer(&vq->call_ctx.producer);
 119	spin_unlock(&vq->call_ctx.ctx_lock);
 120}
 121
 122static void vhost_vdpa_reset(struct vhost_vdpa *v)
 123{
 124	struct vdpa_device *vdpa = v->vdpa;
 125
 126	vdpa_reset(vdpa);
 127	v->in_batch = 0;
 128}
 129
 130static long vhost_vdpa_get_device_id(struct vhost_vdpa *v, u8 __user *argp)
 131{
 132	struct vdpa_device *vdpa = v->vdpa;
 133	const struct vdpa_config_ops *ops = vdpa->config;
 134	u32 device_id;
 135
 136	device_id = ops->get_device_id(vdpa);
 137
 138	if (copy_to_user(argp, &device_id, sizeof(device_id)))
 139		return -EFAULT;
 140
 141	return 0;
 142}
 143
 144static long vhost_vdpa_get_status(struct vhost_vdpa *v, u8 __user *statusp)
 145{
 146	struct vdpa_device *vdpa = v->vdpa;
 147	const struct vdpa_config_ops *ops = vdpa->config;
 148	u8 status;
 149
 150	status = ops->get_status(vdpa);
 151
 152	if (copy_to_user(statusp, &status, sizeof(status)))
 153		return -EFAULT;
 154
 155	return 0;
 156}
 157
 158static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp)
 159{
 160	struct vdpa_device *vdpa = v->vdpa;
 161	const struct vdpa_config_ops *ops = vdpa->config;
 162	u8 status, status_old;
 163	int nvqs = v->nvqs;
 164	u16 i;
 165
 166	if (copy_from_user(&status, statusp, sizeof(status)))
 167		return -EFAULT;
 168
 169	status_old = ops->get_status(vdpa);
 170
 171	/*
 172	 * Userspace shouldn't remove status bits unless reset the
 173	 * status to 0.
 174	 */
 175	if (status != 0 && (ops->get_status(vdpa) & ~status) != 0)
 176		return -EINVAL;
 177
 178	ops->set_status(vdpa, status);
 179
 180	if ((status & VIRTIO_CONFIG_S_DRIVER_OK) && !(status_old & VIRTIO_CONFIG_S_DRIVER_OK))
 181		for (i = 0; i < nvqs; i++)
 182			vhost_vdpa_setup_vq_irq(v, i);
 183
 184	if ((status_old & VIRTIO_CONFIG_S_DRIVER_OK) && !(status & VIRTIO_CONFIG_S_DRIVER_OK))
 185		for (i = 0; i < nvqs; i++)
 186			vhost_vdpa_unsetup_vq_irq(v, i);
 187
 188	return 0;
 189}
 190
 191static int vhost_vdpa_config_validate(struct vhost_vdpa *v,
 192				      struct vhost_vdpa_config *c)
 193{
 194	long size = 0;
 195
 196	switch (v->virtio_id) {
 197	case VIRTIO_ID_NET:
 198		size = sizeof(struct virtio_net_config);
 199		break;
 200	}
 201
 202	if (c->len == 0)
 203		return -EINVAL;
 204
 205	if (c->len > size - c->off)
 206		return -E2BIG;
 207
 208	return 0;
 209}
 210
 211static long vhost_vdpa_get_config(struct vhost_vdpa *v,
 212				  struct vhost_vdpa_config __user *c)
 213{
 214	struct vdpa_device *vdpa = v->vdpa;
 215	struct vhost_vdpa_config config;
 216	unsigned long size = offsetof(struct vhost_vdpa_config, buf);
 217	u8 *buf;
 218
 219	if (copy_from_user(&config, c, size))
 220		return -EFAULT;
 221	if (vhost_vdpa_config_validate(v, &config))
 222		return -EINVAL;
 223	buf = kvzalloc(config.len, GFP_KERNEL);
 224	if (!buf)
 225		return -ENOMEM;
 226
 227	vdpa_get_config(vdpa, config.off, buf, config.len);
 228
 229	if (copy_to_user(c->buf, buf, config.len)) {
 230		kvfree(buf);
 231		return -EFAULT;
 232	}
 233
 234	kvfree(buf);
 235	return 0;
 236}
 237
 238static long vhost_vdpa_set_config(struct vhost_vdpa *v,
 239				  struct vhost_vdpa_config __user *c)
 240{
 241	struct vdpa_device *vdpa = v->vdpa;
 242	const struct vdpa_config_ops *ops = vdpa->config;
 243	struct vhost_vdpa_config config;
 244	unsigned long size = offsetof(struct vhost_vdpa_config, buf);
 245	u8 *buf;
 246
 247	if (copy_from_user(&config, c, size))
 248		return -EFAULT;
 249	if (vhost_vdpa_config_validate(v, &config))
 250		return -EINVAL;
 251	buf = kvzalloc(config.len, GFP_KERNEL);
 252	if (!buf)
 253		return -ENOMEM;
 254
 255	if (copy_from_user(buf, c->buf, config.len)) {
 256		kvfree(buf);
 257		return -EFAULT;
 258	}
 259
 260	ops->set_config(vdpa, config.off, buf, config.len);
 261
 262	kvfree(buf);
 263	return 0;
 264}
 265
 266static long vhost_vdpa_get_features(struct vhost_vdpa *v, u64 __user *featurep)
 267{
 268	struct vdpa_device *vdpa = v->vdpa;
 269	const struct vdpa_config_ops *ops = vdpa->config;
 270	u64 features;
 271
 272	features = ops->get_features(vdpa);
 273
 274	if (copy_to_user(featurep, &features, sizeof(features)))
 275		return -EFAULT;
 276
 277	return 0;
 278}
 279
 280static long vhost_vdpa_set_features(struct vhost_vdpa *v, u64 __user *featurep)
 281{
 282	struct vdpa_device *vdpa = v->vdpa;
 283	const struct vdpa_config_ops *ops = vdpa->config;
 284	u64 features;
 285
 286	/*
 287	 * It's not allowed to change the features after they have
 288	 * been negotiated.
 289	 */
 290	if (ops->get_status(vdpa) & VIRTIO_CONFIG_S_FEATURES_OK)
 291		return -EBUSY;
 292
 293	if (copy_from_user(&features, featurep, sizeof(features)))
 294		return -EFAULT;
 295
 296	if (vdpa_set_features(vdpa, features))
 297		return -EINVAL;
 298
 299	return 0;
 300}
 301
 302static long vhost_vdpa_get_vring_num(struct vhost_vdpa *v, u16 __user *argp)
 303{
 304	struct vdpa_device *vdpa = v->vdpa;
 305	const struct vdpa_config_ops *ops = vdpa->config;
 306	u16 num;
 307
 308	num = ops->get_vq_num_max(vdpa);
 309
 310	if (copy_to_user(argp, &num, sizeof(num)))
 311		return -EFAULT;
 312
 313	return 0;
 314}
 315
 316static void vhost_vdpa_config_put(struct vhost_vdpa *v)
 317{
 318	if (v->config_ctx)
 319		eventfd_ctx_put(v->config_ctx);
 320}
 321
 322static long vhost_vdpa_set_config_call(struct vhost_vdpa *v, u32 __user *argp)
 323{
 324	struct vdpa_callback cb;
 325	int fd;
 326	struct eventfd_ctx *ctx;
 327
 328	cb.callback = vhost_vdpa_config_cb;
 329	cb.private = v->vdpa;
 330	if (copy_from_user(&fd, argp, sizeof(fd)))
 331		return  -EFAULT;
 332
 333	ctx = fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(fd);
 334	swap(ctx, v->config_ctx);
 335
 336	if (!IS_ERR_OR_NULL(ctx))
 337		eventfd_ctx_put(ctx);
 338
 339	if (IS_ERR(v->config_ctx))
 340		return PTR_ERR(v->config_ctx);
 341
 342	v->vdpa->config->set_config_cb(v->vdpa, &cb);
 343
 344	return 0;
 345}
 346
 347static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
 348				   void __user *argp)
 349{
 350	struct vdpa_device *vdpa = v->vdpa;
 351	const struct vdpa_config_ops *ops = vdpa->config;
 352	struct vdpa_vq_state vq_state;
 353	struct vdpa_callback cb;
 354	struct vhost_virtqueue *vq;
 355	struct vhost_vring_state s;
 356	u32 idx;
 357	long r;
 358
 359	r = get_user(idx, (u32 __user *)argp);
 360	if (r < 0)
 361		return r;
 362
 363	if (idx >= v->nvqs)
 364		return -ENOBUFS;
 365
 366	idx = array_index_nospec(idx, v->nvqs);
 367	vq = &v->vqs[idx];
 368
 369	switch (cmd) {
 370	case VHOST_VDPA_SET_VRING_ENABLE:
 371		if (copy_from_user(&s, argp, sizeof(s)))
 372			return -EFAULT;
 373		ops->set_vq_ready(vdpa, idx, s.num);
 374		return 0;
 375	case VHOST_GET_VRING_BASE:
 376		r = ops->get_vq_state(v->vdpa, idx, &vq_state);
 377		if (r)
 378			return r;
 379
 380		vq->last_avail_idx = vq_state.avail_index;
 381		break;
 382	}
 383
 384	r = vhost_vring_ioctl(&v->vdev, cmd, argp);
 385	if (r)
 386		return r;
 387
 388	switch (cmd) {
 389	case VHOST_SET_VRING_ADDR:
 390		if (ops->set_vq_address(vdpa, idx,
 391					(u64)(uintptr_t)vq->desc,
 392					(u64)(uintptr_t)vq->avail,
 393					(u64)(uintptr_t)vq->used))
 394			r = -EINVAL;
 395		break;
 396
 397	case VHOST_SET_VRING_BASE:
 398		vq_state.avail_index = vq->last_avail_idx;
 399		if (ops->set_vq_state(vdpa, idx, &vq_state))
 400			r = -EINVAL;
 401		break;
 402
 403	case VHOST_SET_VRING_CALL:
 404		if (vq->call_ctx.ctx) {
 405			cb.callback = vhost_vdpa_virtqueue_cb;
 406			cb.private = vq;
 407		} else {
 408			cb.callback = NULL;
 409			cb.private = NULL;
 410		}
 411		ops->set_vq_cb(vdpa, idx, &cb);
 412		vhost_vdpa_setup_vq_irq(v, idx);
 413		break;
 414
 415	case VHOST_SET_VRING_NUM:
 416		ops->set_vq_num(vdpa, idx, vq->num);
 417		break;
 418	}
 419
 420	return r;
 421}
 422
 423static long vhost_vdpa_unlocked_ioctl(struct file *filep,
 424				      unsigned int cmd, unsigned long arg)
 425{
 426	struct vhost_vdpa *v = filep->private_data;
 427	struct vhost_dev *d = &v->vdev;
 428	void __user *argp = (void __user *)arg;
 429	u64 __user *featurep = argp;
 430	u64 features;
 431	long r;
 432
 433	if (cmd == VHOST_SET_BACKEND_FEATURES) {
 434		r = copy_from_user(&features, featurep, sizeof(features));
 435		if (r)
 436			return r;
 437		if (features & ~VHOST_VDPA_BACKEND_FEATURES)
 438			return -EOPNOTSUPP;
 439		vhost_set_backend_features(&v->vdev, features);
 440		return 0;
 441	}
 442
 443	mutex_lock(&d->mutex);
 444
 445	switch (cmd) {
 446	case VHOST_VDPA_GET_DEVICE_ID:
 447		r = vhost_vdpa_get_device_id(v, argp);
 448		break;
 449	case VHOST_VDPA_GET_STATUS:
 450		r = vhost_vdpa_get_status(v, argp);
 451		break;
 452	case VHOST_VDPA_SET_STATUS:
 453		r = vhost_vdpa_set_status(v, argp);
 454		break;
 455	case VHOST_VDPA_GET_CONFIG:
 456		r = vhost_vdpa_get_config(v, argp);
 457		break;
 458	case VHOST_VDPA_SET_CONFIG:
 459		r = vhost_vdpa_set_config(v, argp);
 460		break;
 461	case VHOST_GET_FEATURES:
 462		r = vhost_vdpa_get_features(v, argp);
 463		break;
 464	case VHOST_SET_FEATURES:
 465		r = vhost_vdpa_set_features(v, argp);
 466		break;
 467	case VHOST_VDPA_GET_VRING_NUM:
 468		r = vhost_vdpa_get_vring_num(v, argp);
 469		break;
 470	case VHOST_SET_LOG_BASE:
 471	case VHOST_SET_LOG_FD:
 472		r = -ENOIOCTLCMD;
 473		break;
 474	case VHOST_VDPA_SET_CONFIG_CALL:
 475		r = vhost_vdpa_set_config_call(v, argp);
 476		break;
 477	case VHOST_GET_BACKEND_FEATURES:
 478		features = VHOST_VDPA_BACKEND_FEATURES;
 479		r = copy_to_user(featurep, &features, sizeof(features));
 480		break;
 481	default:
 482		r = vhost_dev_ioctl(&v->vdev, cmd, argp);
 483		if (r == -ENOIOCTLCMD)
 484			r = vhost_vdpa_vring_ioctl(v, cmd, argp);
 485		break;
 486	}
 487
 488	mutex_unlock(&d->mutex);
 489	return r;
 490}
 491
 492static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, u64 start, u64 last)
 493{
 494	struct vhost_dev *dev = &v->vdev;
 495	struct vhost_iotlb *iotlb = dev->iotlb;
 496	struct vhost_iotlb_map *map;
 497	struct page *page;
 498	unsigned long pfn, pinned;
 499
 500	while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) {
 501		pinned = map->size >> PAGE_SHIFT;
 502		for (pfn = map->addr >> PAGE_SHIFT;
 503		     pinned > 0; pfn++, pinned--) {
 504			page = pfn_to_page(pfn);
 505			if (map->perm & VHOST_ACCESS_WO)
 506				set_page_dirty_lock(page);
 507			unpin_user_page(page);
 508		}
 509		atomic64_sub(map->size >> PAGE_SHIFT, &dev->mm->pinned_vm);
 510		vhost_iotlb_map_free(iotlb, map);
 511	}
 512}
 513
 514static void vhost_vdpa_iotlb_free(struct vhost_vdpa *v)
 515{
 516	struct vhost_dev *dev = &v->vdev;
 517
 518	vhost_vdpa_iotlb_unmap(v, 0ULL, 0ULL - 1);
 519	kfree(dev->iotlb);
 520	dev->iotlb = NULL;
 521}
 522
 523static int perm_to_iommu_flags(u32 perm)
 524{
 525	int flags = 0;
 526
 527	switch (perm) {
 528	case VHOST_ACCESS_WO:
 529		flags |= IOMMU_WRITE;
 530		break;
 531	case VHOST_ACCESS_RO:
 532		flags |= IOMMU_READ;
 533		break;
 534	case VHOST_ACCESS_RW:
 535		flags |= (IOMMU_WRITE | IOMMU_READ);
 536		break;
 537	default:
 538		WARN(1, "invalidate vhost IOTLB permission\n");
 539		break;
 540	}
 541
 542	return flags | IOMMU_CACHE;
 543}
 544
 545static int vhost_vdpa_map(struct vhost_vdpa *v,
 546			  u64 iova, u64 size, u64 pa, u32 perm)
 547{
 548	struct vhost_dev *dev = &v->vdev;
 549	struct vdpa_device *vdpa = v->vdpa;
 550	const struct vdpa_config_ops *ops = vdpa->config;
 551	int r = 0;
 552
 553	r = vhost_iotlb_add_range(dev->iotlb, iova, iova + size - 1,
 554				  pa, perm);
 555	if (r)
 556		return r;
 557
 558	if (ops->dma_map) {
 559		r = ops->dma_map(vdpa, iova, size, pa, perm);
 560	} else if (ops->set_map) {
 561		if (!v->in_batch)
 562			r = ops->set_map(vdpa, dev->iotlb);
 563	} else {
 564		r = iommu_map(v->domain, iova, pa, size,
 565			      perm_to_iommu_flags(perm));
 566	}
 567
 568	if (r)
 569		vhost_iotlb_del_range(dev->iotlb, iova, iova + size - 1);
 570
 571	return r;
 572}
 573
 574static void vhost_vdpa_unmap(struct vhost_vdpa *v, u64 iova, u64 size)
 575{
 576	struct vhost_dev *dev = &v->vdev;
 577	struct vdpa_device *vdpa = v->vdpa;
 578	const struct vdpa_config_ops *ops = vdpa->config;
 579
 580	vhost_vdpa_iotlb_unmap(v, iova, iova + size - 1);
 581
 582	if (ops->dma_map) {
 583		ops->dma_unmap(vdpa, iova, size);
 584	} else if (ops->set_map) {
 585		if (!v->in_batch)
 586			ops->set_map(vdpa, dev->iotlb);
 587	} else {
 588		iommu_unmap(v->domain, iova, size);
 589	}
 590}
 591
 592static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
 593					   struct vhost_iotlb_msg *msg)
 594{
 595	struct vhost_dev *dev = &v->vdev;
 596	struct vhost_iotlb *iotlb = dev->iotlb;
 597	struct page **page_list;
 598	struct vm_area_struct **vmas;
 599	unsigned int gup_flags = FOLL_LONGTERM;
 600	unsigned long map_pfn, last_pfn = 0;
 601	unsigned long npages, lock_limit;
 602	unsigned long i, nmap = 0;
 603	u64 iova = msg->iova;
 604	long pinned;
 605	int ret = 0;
 606
 607	if (vhost_iotlb_itree_first(iotlb, msg->iova,
 608				    msg->iova + msg->size - 1))
 609		return -EEXIST;
 610
 611	if (msg->perm & VHOST_ACCESS_WO)
 612		gup_flags |= FOLL_WRITE;
 613
 614	npages = PAGE_ALIGN(msg->size + (iova & ~PAGE_MASK)) >> PAGE_SHIFT;
 615	if (!npages)
 616		return -EINVAL;
 617
 618	page_list = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL);
 619	vmas = kvmalloc_array(npages, sizeof(struct vm_area_struct *),
 620			      GFP_KERNEL);
 621	if (!page_list || !vmas) {
 622		ret = -ENOMEM;
 623		goto free;
 624	}
 625
 626	mmap_read_lock(dev->mm);
 627
 628	lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
 629	if (npages + atomic64_read(&dev->mm->pinned_vm) > lock_limit) {
 630		ret = -ENOMEM;
 631		goto unlock;
 632	}
 633
 634	pinned = pin_user_pages(msg->uaddr & PAGE_MASK, npages, gup_flags,
 635				page_list, vmas);
 636	if (npages != pinned) {
 637		if (pinned < 0) {
 638			ret = pinned;
 639		} else {
 640			unpin_user_pages(page_list, pinned);
 641			ret = -ENOMEM;
 642		}
 643		goto unlock;
 644	}
 645
 646	iova &= PAGE_MASK;
 647	map_pfn = page_to_pfn(page_list[0]);
 648
 649	/* One more iteration to avoid extra vdpa_map() call out of loop. */
 650	for (i = 0; i <= npages; i++) {
 651		unsigned long this_pfn;
 652		u64 csize;
 653
 654		/* The last chunk may have no valid PFN next to it */
 655		this_pfn = i < npages ? page_to_pfn(page_list[i]) : -1UL;
 656
 657		if (last_pfn && (this_pfn == -1UL ||
 658				 this_pfn != last_pfn + 1)) {
 659			/* Pin a contiguous chunk of memory */
 660			csize = last_pfn - map_pfn + 1;
 661			ret = vhost_vdpa_map(v, iova, csize << PAGE_SHIFT,
 662					     map_pfn << PAGE_SHIFT,
 663					     msg->perm);
 664			if (ret) {
 665				/*
 666				 * Unpin the rest chunks of memory on the
 667				 * flight with no corresponding vdpa_map()
 668				 * calls having been made yet. On the other
 669				 * hand, vdpa_unmap() in the failure path
 670				 * is in charge of accounting the number of
 671				 * pinned pages for its own.
 672				 * This asymmetrical pattern of accounting
 673				 * is for efficiency to pin all pages at
 674				 * once, while there is no other callsite
 675				 * of vdpa_map() than here above.
 676				 */
 677				unpin_user_pages(&page_list[nmap],
 678						 npages - nmap);
 679				goto out;
 680			}
 681			atomic64_add(csize, &dev->mm->pinned_vm);
 682			nmap += csize;
 683			iova += csize << PAGE_SHIFT;
 684			map_pfn = this_pfn;
 685		}
 686		last_pfn = this_pfn;
 687	}
 688
 689	WARN_ON(nmap != npages);
 690out:
 691	if (ret)
 692		vhost_vdpa_unmap(v, msg->iova, msg->size);
 693unlock:
 694	mmap_read_unlock(dev->mm);
 695free:
 696	kvfree(vmas);
 697	kvfree(page_list);
 698	return ret;
 699}
 700
 701static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev,
 702					struct vhost_iotlb_msg *msg)
 703{
 704	struct vhost_vdpa *v = container_of(dev, struct vhost_vdpa, vdev);
 705	struct vdpa_device *vdpa = v->vdpa;
 706	const struct vdpa_config_ops *ops = vdpa->config;
 707	int r = 0;
 708
 709	r = vhost_dev_check_owner(dev);
 710	if (r)
 711		return r;
 712
 713	switch (msg->type) {
 714	case VHOST_IOTLB_UPDATE:
 715		r = vhost_vdpa_process_iotlb_update(v, msg);
 716		break;
 717	case VHOST_IOTLB_INVALIDATE:
 718		vhost_vdpa_unmap(v, msg->iova, msg->size);
 719		break;
 720	case VHOST_IOTLB_BATCH_BEGIN:
 721		v->in_batch = true;
 722		break;
 723	case VHOST_IOTLB_BATCH_END:
 724		if (v->in_batch && ops->set_map)
 725			ops->set_map(vdpa, dev->iotlb);
 726		v->in_batch = false;
 727		break;
 728	default:
 729		r = -EINVAL;
 730		break;
 731	}
 732
 733	return r;
 734}
 735
 736static ssize_t vhost_vdpa_chr_write_iter(struct kiocb *iocb,
 737					 struct iov_iter *from)
 738{
 739	struct file *file = iocb->ki_filp;
 740	struct vhost_vdpa *v = file->private_data;
 741	struct vhost_dev *dev = &v->vdev;
 742
 743	return vhost_chr_write_iter(dev, from);
 744}
 745
 746static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v)
 747{
 748	struct vdpa_device *vdpa = v->vdpa;
 749	const struct vdpa_config_ops *ops = vdpa->config;
 750	struct device *dma_dev = vdpa_get_dma_dev(vdpa);
 751	struct bus_type *bus;
 752	int ret;
 753
 754	/* Device want to do DMA by itself */
 755	if (ops->set_map || ops->dma_map)
 756		return 0;
 757
 758	bus = dma_dev->bus;
 759	if (!bus)
 760		return -EFAULT;
 761
 762	if (!iommu_capable(bus, IOMMU_CAP_CACHE_COHERENCY))
 763		return -ENOTSUPP;
 764
 765	v->domain = iommu_domain_alloc(bus);
 766	if (!v->domain)
 767		return -EIO;
 768
 769	ret = iommu_attach_device(v->domain, dma_dev);
 770	if (ret)
 771		goto err_attach;
 772
 773	return 0;
 774
 775err_attach:
 776	iommu_domain_free(v->domain);
 777	return ret;
 778}
 779
 780static void vhost_vdpa_free_domain(struct vhost_vdpa *v)
 781{
 782	struct vdpa_device *vdpa = v->vdpa;
 783	struct device *dma_dev = vdpa_get_dma_dev(vdpa);
 784
 785	if (v->domain) {
 786		iommu_detach_device(v->domain, dma_dev);
 787		iommu_domain_free(v->domain);
 788	}
 789
 790	v->domain = NULL;
 791}
 792
 793static int vhost_vdpa_open(struct inode *inode, struct file *filep)
 794{
 795	struct vhost_vdpa *v;
 796	struct vhost_dev *dev;
 797	struct vhost_virtqueue **vqs;
 798	int nvqs, i, r, opened;
 799
 800	v = container_of(inode->i_cdev, struct vhost_vdpa, cdev);
 801
 802	opened = atomic_cmpxchg(&v->opened, 0, 1);
 803	if (opened)
 804		return -EBUSY;
 805
 806	nvqs = v->nvqs;
 807	vhost_vdpa_reset(v);
 808
 809	vqs = kmalloc_array(nvqs, sizeof(*vqs), GFP_KERNEL);
 810	if (!vqs) {
 811		r = -ENOMEM;
 812		goto err;
 813	}
 814
 815	dev = &v->vdev;
 816	for (i = 0; i < nvqs; i++) {
 817		vqs[i] = &v->vqs[i];
 818		vqs[i]->handle_kick = handle_vq_kick;
 819	}
 820	vhost_dev_init(dev, vqs, nvqs, 0, 0, 0, false,
 821		       vhost_vdpa_process_iotlb_msg);
 822
 823	dev->iotlb = vhost_iotlb_alloc(0, 0);
 824	if (!dev->iotlb) {
 825		r = -ENOMEM;
 826		goto err_init_iotlb;
 827	}
 828
 829	r = vhost_vdpa_alloc_domain(v);
 830	if (r)
 831		goto err_init_iotlb;
 832
 833	filep->private_data = v;
 834
 835	return 0;
 836
 837err_init_iotlb:
 838	vhost_dev_cleanup(&v->vdev);
 839	kfree(vqs);
 840err:
 841	atomic_dec(&v->opened);
 842	return r;
 843}
 844
 845static void vhost_vdpa_clean_irq(struct vhost_vdpa *v)
 846{
 847	struct vhost_virtqueue *vq;
 848	int i;
 849
 850	for (i = 0; i < v->nvqs; i++) {
 851		vq = &v->vqs[i];
 852		if (vq->call_ctx.producer.irq)
 853			irq_bypass_unregister_producer(&vq->call_ctx.producer);
 854	}
 855}
 856
 857static int vhost_vdpa_release(struct inode *inode, struct file *filep)
 858{
 859	struct vhost_vdpa *v = filep->private_data;
 860	struct vhost_dev *d = &v->vdev;
 861
 862	mutex_lock(&d->mutex);
 863	filep->private_data = NULL;
 864	vhost_vdpa_reset(v);
 865	vhost_dev_stop(&v->vdev);
 866	vhost_vdpa_iotlb_free(v);
 867	vhost_vdpa_free_domain(v);
 868	vhost_vdpa_config_put(v);
 869	vhost_vdpa_clean_irq(v);
 870	vhost_dev_cleanup(&v->vdev);
 871	kfree(v->vdev.vqs);
 872	mutex_unlock(&d->mutex);
 873
 874	atomic_dec(&v->opened);
 875	complete(&v->completion);
 876
 877	return 0;
 878}
 879
 880#ifdef CONFIG_MMU
 881static vm_fault_t vhost_vdpa_fault(struct vm_fault *vmf)
 882{
 883	struct vhost_vdpa *v = vmf->vma->vm_file->private_data;
 884	struct vdpa_device *vdpa = v->vdpa;
 885	const struct vdpa_config_ops *ops = vdpa->config;
 886	struct vdpa_notification_area notify;
 887	struct vm_area_struct *vma = vmf->vma;
 888	u16 index = vma->vm_pgoff;
 889
 890	notify = ops->get_vq_notification(vdpa, index);
 891
 892	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
 893	if (remap_pfn_range(vma, vmf->address & PAGE_MASK,
 894			    notify.addr >> PAGE_SHIFT, PAGE_SIZE,
 895			    vma->vm_page_prot))
 896		return VM_FAULT_SIGBUS;
 897
 898	return VM_FAULT_NOPAGE;
 899}
 900
 901static const struct vm_operations_struct vhost_vdpa_vm_ops = {
 902	.fault = vhost_vdpa_fault,
 903};
 904
 905static int vhost_vdpa_mmap(struct file *file, struct vm_area_struct *vma)
 906{
 907	struct vhost_vdpa *v = vma->vm_file->private_data;
 908	struct vdpa_device *vdpa = v->vdpa;
 909	const struct vdpa_config_ops *ops = vdpa->config;
 910	struct vdpa_notification_area notify;
 911	unsigned long index = vma->vm_pgoff;
 912
 913	if (vma->vm_end - vma->vm_start != PAGE_SIZE)
 914		return -EINVAL;
 915	if ((vma->vm_flags & VM_SHARED) == 0)
 916		return -EINVAL;
 917	if (vma->vm_flags & VM_READ)
 918		return -EINVAL;
 919	if (index > 65535)
 920		return -EINVAL;
 921	if (!ops->get_vq_notification)
 922		return -ENOTSUPP;
 923
 924	/* To be safe and easily modelled by userspace, We only
 925	 * support the doorbell which sits on the page boundary and
 926	 * does not share the page with other registers.
 927	 */
 928	notify = ops->get_vq_notification(vdpa, index);
 929	if (notify.addr & (PAGE_SIZE - 1))
 930		return -EINVAL;
 931	if (vma->vm_end - vma->vm_start != notify.size)
 932		return -ENOTSUPP;
 933
 934	vma->vm_ops = &vhost_vdpa_vm_ops;
 935	return 0;
 936}
 937#endif /* CONFIG_MMU */
 938
 939static const struct file_operations vhost_vdpa_fops = {
 940	.owner		= THIS_MODULE,
 941	.open		= vhost_vdpa_open,
 942	.release	= vhost_vdpa_release,
 943	.write_iter	= vhost_vdpa_chr_write_iter,
 944	.unlocked_ioctl	= vhost_vdpa_unlocked_ioctl,
 945#ifdef CONFIG_MMU
 946	.mmap		= vhost_vdpa_mmap,
 947#endif /* CONFIG_MMU */
 948	.compat_ioctl	= compat_ptr_ioctl,
 949};
 950
 951static void vhost_vdpa_release_dev(struct device *device)
 952{
 953	struct vhost_vdpa *v =
 954	       container_of(device, struct vhost_vdpa, dev);
 955
 956	ida_simple_remove(&vhost_vdpa_ida, v->minor);
 957	kfree(v->vqs);
 958	kfree(v);
 959}
 960
 961static int vhost_vdpa_probe(struct vdpa_device *vdpa)
 962{
 963	const struct vdpa_config_ops *ops = vdpa->config;
 964	struct vhost_vdpa *v;
 965	int minor;
 966	int r;
 967
 968	/* Currently, we only accept the network devices. */
 969	if (ops->get_device_id(vdpa) != VIRTIO_ID_NET)
 970		return -ENOTSUPP;
 971
 972	v = kzalloc(sizeof(*v), GFP_KERNEL | __GFP_RETRY_MAYFAIL);
 973	if (!v)
 974		return -ENOMEM;
 975
 976	minor = ida_simple_get(&vhost_vdpa_ida, 0,
 977			       VHOST_VDPA_DEV_MAX, GFP_KERNEL);
 978	if (minor < 0) {
 979		kfree(v);
 980		return minor;
 981	}
 982
 983	atomic_set(&v->opened, 0);
 984	v->minor = minor;
 985	v->vdpa = vdpa;
 986	v->nvqs = vdpa->nvqs;
 987	v->virtio_id = ops->get_device_id(vdpa);
 988
 989	device_initialize(&v->dev);
 990	v->dev.release = vhost_vdpa_release_dev;
 991	v->dev.parent = &vdpa->dev;
 992	v->dev.devt = MKDEV(MAJOR(vhost_vdpa_major), minor);
 993	v->vqs = kmalloc_array(v->nvqs, sizeof(struct vhost_virtqueue),
 994			       GFP_KERNEL);
 995	if (!v->vqs) {
 996		r = -ENOMEM;
 997		goto err;
 998	}
 999
1000	r = dev_set_name(&v->dev, "vhost-vdpa-%u", minor);
1001	if (r)
1002		goto err;
1003
1004	cdev_init(&v->cdev, &vhost_vdpa_fops);
1005	v->cdev.owner = THIS_MODULE;
1006
1007	r = cdev_device_add(&v->cdev, &v->dev);
1008	if (r)
1009		goto err;
1010
1011	init_completion(&v->completion);
1012	vdpa_set_drvdata(vdpa, v);
1013
1014	return 0;
1015
1016err:
1017	put_device(&v->dev);
1018	return r;
1019}
1020
1021static void vhost_vdpa_remove(struct vdpa_device *vdpa)
1022{
1023	struct vhost_vdpa *v = vdpa_get_drvdata(vdpa);
1024	int opened;
1025
1026	cdev_device_del(&v->cdev, &v->dev);
1027
1028	do {
1029		opened = atomic_cmpxchg(&v->opened, 0, 1);
1030		if (!opened)
1031			break;
1032		wait_for_completion(&v->completion);
1033	} while (1);
1034
1035	put_device(&v->dev);
1036}
1037
1038static struct vdpa_driver vhost_vdpa_driver = {
1039	.driver = {
1040		.name	= "vhost_vdpa",
1041	},
1042	.probe	= vhost_vdpa_probe,
1043	.remove	= vhost_vdpa_remove,
1044};
1045
1046static int __init vhost_vdpa_init(void)
1047{
1048	int r;
1049
1050	r = alloc_chrdev_region(&vhost_vdpa_major, 0, VHOST_VDPA_DEV_MAX,
1051				"vhost-vdpa");
1052	if (r)
1053		goto err_alloc_chrdev;
1054
1055	r = vdpa_register_driver(&vhost_vdpa_driver);
1056	if (r)
1057		goto err_vdpa_register_driver;
1058
1059	return 0;
1060
1061err_vdpa_register_driver:
1062	unregister_chrdev_region(vhost_vdpa_major, VHOST_VDPA_DEV_MAX);
1063err_alloc_chrdev:
1064	return r;
1065}
1066module_init(vhost_vdpa_init);
1067
1068static void __exit vhost_vdpa_exit(void)
1069{
1070	vdpa_unregister_driver(&vhost_vdpa_driver);
1071	unregister_chrdev_region(vhost_vdpa_major, VHOST_VDPA_DEV_MAX);
1072}
1073module_exit(vhost_vdpa_exit);
1074
1075MODULE_VERSION("0.0.1");
1076MODULE_LICENSE("GPL v2");
1077MODULE_AUTHOR("Intel Corporation");
1078MODULE_DESCRIPTION("vDPA-based vhost backend for virtio");