Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.1.
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Copyright (C) 2018-2020 Intel Corporation.
   4 * Copyright (C) 2020 Red Hat, Inc.
   5 *
   6 * Author: Tiwei Bie <tiwei.bie@intel.com>
   7 *         Jason Wang <jasowang@redhat.com>
   8 *
   9 * Thanks Michael S. Tsirkin for the valuable comments and
  10 * suggestions.  And thanks to Cunming Liang and Zhihong Wang for all
  11 * their supports.
  12 */
  13
  14#include <linux/kernel.h>
  15#include <linux/module.h>
  16#include <linux/cdev.h>
  17#include <linux/device.h>
  18#include <linux/mm.h>
  19#include <linux/slab.h>
  20#include <linux/iommu.h>
  21#include <linux/uuid.h>
  22#include <linux/vdpa.h>
  23#include <linux/nospec.h>
  24#include <linux/vhost.h>
  25
  26#include "vhost.h"
  27
  28enum {
  29	VHOST_VDPA_BACKEND_FEATURES =
  30	(1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2) |
  31	(1ULL << VHOST_BACKEND_F_IOTLB_BATCH),
  32};
  33
  34#define VHOST_VDPA_DEV_MAX (1U << MINORBITS)
  35
  36struct vhost_vdpa {
  37	struct vhost_dev vdev;
  38	struct iommu_domain *domain;
  39	struct vhost_virtqueue *vqs;
  40	struct completion completion;
  41	struct vdpa_device *vdpa;
  42	struct device dev;
  43	struct cdev cdev;
  44	atomic_t opened;
  45	int nvqs;
  46	int virtio_id;
  47	int minor;
  48	struct eventfd_ctx *config_ctx;
  49	int in_batch;
  50	struct vdpa_iova_range range;
  51};
  52
  53static DEFINE_IDA(vhost_vdpa_ida);
  54
  55static dev_t vhost_vdpa_major;
  56
  57static void handle_vq_kick(struct vhost_work *work)
  58{
  59	struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
  60						  poll.work);
  61	struct vhost_vdpa *v = container_of(vq->dev, struct vhost_vdpa, vdev);
  62	const struct vdpa_config_ops *ops = v->vdpa->config;
  63
  64	ops->kick_vq(v->vdpa, vq - v->vqs);
  65}
  66
  67static irqreturn_t vhost_vdpa_virtqueue_cb(void *private)
  68{
  69	struct vhost_virtqueue *vq = private;
  70	struct eventfd_ctx *call_ctx = vq->call_ctx.ctx;
  71
  72	if (call_ctx)
  73		eventfd_signal(call_ctx, 1);
  74
  75	return IRQ_HANDLED;
  76}
  77
  78static irqreturn_t vhost_vdpa_config_cb(void *private)
  79{
  80	struct vhost_vdpa *v = private;
  81	struct eventfd_ctx *config_ctx = v->config_ctx;
  82
  83	if (config_ctx)
  84		eventfd_signal(config_ctx, 1);
  85
  86	return IRQ_HANDLED;
  87}
  88
  89static void vhost_vdpa_setup_vq_irq(struct vhost_vdpa *v, u16 qid)
  90{
  91	struct vhost_virtqueue *vq = &v->vqs[qid];
  92	const struct vdpa_config_ops *ops = v->vdpa->config;
  93	struct vdpa_device *vdpa = v->vdpa;
  94	int ret, irq;
  95
  96	if (!ops->get_vq_irq)
  97		return;
  98
  99	irq = ops->get_vq_irq(vdpa, qid);
 100	irq_bypass_unregister_producer(&vq->call_ctx.producer);
 101	if (!vq->call_ctx.ctx || irq < 0)
 102		return;
 103
 104	vq->call_ctx.producer.token = vq->call_ctx.ctx;
 105	vq->call_ctx.producer.irq = irq;
 106	ret = irq_bypass_register_producer(&vq->call_ctx.producer);
 107	if (unlikely(ret))
 108		dev_info(&v->dev, "vq %u, irq bypass producer (token %p) registration fails, ret =  %d\n",
 109			 qid, vq->call_ctx.producer.token, ret);
 110}
 111
 112static void vhost_vdpa_unsetup_vq_irq(struct vhost_vdpa *v, u16 qid)
 113{
 114	struct vhost_virtqueue *vq = &v->vqs[qid];
 115
 116	irq_bypass_unregister_producer(&vq->call_ctx.producer);
 117}
 118
 119static void vhost_vdpa_reset(struct vhost_vdpa *v)
 120{
 121	struct vdpa_device *vdpa = v->vdpa;
 122
 123	vdpa_reset(vdpa);
 124	v->in_batch = 0;
 125}
 126
 127static long vhost_vdpa_get_device_id(struct vhost_vdpa *v, u8 __user *argp)
 128{
 129	struct vdpa_device *vdpa = v->vdpa;
 130	const struct vdpa_config_ops *ops = vdpa->config;
 131	u32 device_id;
 132
 133	device_id = ops->get_device_id(vdpa);
 134
 135	if (copy_to_user(argp, &device_id, sizeof(device_id)))
 136		return -EFAULT;
 137
 138	return 0;
 139}
 140
 141static long vhost_vdpa_get_status(struct vhost_vdpa *v, u8 __user *statusp)
 142{
 143	struct vdpa_device *vdpa = v->vdpa;
 144	const struct vdpa_config_ops *ops = vdpa->config;
 145	u8 status;
 146
 147	status = ops->get_status(vdpa);
 148
 149	if (copy_to_user(statusp, &status, sizeof(status)))
 150		return -EFAULT;
 151
 152	return 0;
 153}
 154
 155static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp)
 156{
 157	struct vdpa_device *vdpa = v->vdpa;
 158	const struct vdpa_config_ops *ops = vdpa->config;
 159	u8 status, status_old;
 160	int nvqs = v->nvqs;
 161	u16 i;
 162
 163	if (copy_from_user(&status, statusp, sizeof(status)))
 164		return -EFAULT;
 165
 166	status_old = ops->get_status(vdpa);
 167
 168	/*
 169	 * Userspace shouldn't remove status bits unless reset the
 170	 * status to 0.
 171	 */
 172	if (status != 0 && (ops->get_status(vdpa) & ~status) != 0)
 173		return -EINVAL;
 174
 175	ops->set_status(vdpa, status);
 176
 177	if ((status & VIRTIO_CONFIG_S_DRIVER_OK) && !(status_old & VIRTIO_CONFIG_S_DRIVER_OK))
 178		for (i = 0; i < nvqs; i++)
 179			vhost_vdpa_setup_vq_irq(v, i);
 180
 181	if ((status_old & VIRTIO_CONFIG_S_DRIVER_OK) && !(status & VIRTIO_CONFIG_S_DRIVER_OK))
 182		for (i = 0; i < nvqs; i++)
 183			vhost_vdpa_unsetup_vq_irq(v, i);
 184
 185	return 0;
 186}
 187
 188static int vhost_vdpa_config_validate(struct vhost_vdpa *v,
 189				      struct vhost_vdpa_config *c)
 190{
 191	struct vdpa_device *vdpa = v->vdpa;
 192	long size = vdpa->config->get_config_size(vdpa);
 193
 194	if (c->len == 0)
 195		return -EINVAL;
 196
 197	if (c->len > size - c->off)
 198		return -E2BIG;
 199
 200	return 0;
 201}
 202
 203static long vhost_vdpa_get_config(struct vhost_vdpa *v,
 204				  struct vhost_vdpa_config __user *c)
 205{
 206	struct vdpa_device *vdpa = v->vdpa;
 207	struct vhost_vdpa_config config;
 208	unsigned long size = offsetof(struct vhost_vdpa_config, buf);
 209	u8 *buf;
 210
 211	if (copy_from_user(&config, c, size))
 212		return -EFAULT;
 213	if (vhost_vdpa_config_validate(v, &config))
 214		return -EINVAL;
 215	buf = kvzalloc(config.len, GFP_KERNEL);
 216	if (!buf)
 217		return -ENOMEM;
 218
 219	vdpa_get_config(vdpa, config.off, buf, config.len);
 220
 221	if (copy_to_user(c->buf, buf, config.len)) {
 222		kvfree(buf);
 223		return -EFAULT;
 224	}
 225
 226	kvfree(buf);
 227	return 0;
 228}
 229
 230static long vhost_vdpa_set_config(struct vhost_vdpa *v,
 231				  struct vhost_vdpa_config __user *c)
 232{
 233	struct vdpa_device *vdpa = v->vdpa;
 234	const struct vdpa_config_ops *ops = vdpa->config;
 235	struct vhost_vdpa_config config;
 236	unsigned long size = offsetof(struct vhost_vdpa_config, buf);
 237	u8 *buf;
 238
 239	if (copy_from_user(&config, c, size))
 240		return -EFAULT;
 241	if (vhost_vdpa_config_validate(v, &config))
 242		return -EINVAL;
 243
 244	buf = vmemdup_user(c->buf, config.len);
 245	if (IS_ERR(buf))
 246		return PTR_ERR(buf);
 247
 248	ops->set_config(vdpa, config.off, buf, config.len);
 249
 250	kvfree(buf);
 251	return 0;
 252}
 253
 254static long vhost_vdpa_get_features(struct vhost_vdpa *v, u64 __user *featurep)
 255{
 256	struct vdpa_device *vdpa = v->vdpa;
 257	const struct vdpa_config_ops *ops = vdpa->config;
 258	u64 features;
 259
 260	features = ops->get_features(vdpa);
 261
 262	if (copy_to_user(featurep, &features, sizeof(features)))
 263		return -EFAULT;
 264
 265	return 0;
 266}
 267
 268static long vhost_vdpa_set_features(struct vhost_vdpa *v, u64 __user *featurep)
 269{
 270	struct vdpa_device *vdpa = v->vdpa;
 271	const struct vdpa_config_ops *ops = vdpa->config;
 272	u64 features;
 273
 274	/*
 275	 * It's not allowed to change the features after they have
 276	 * been negotiated.
 277	 */
 278	if (ops->get_status(vdpa) & VIRTIO_CONFIG_S_FEATURES_OK)
 279		return -EBUSY;
 280
 281	if (copy_from_user(&features, featurep, sizeof(features)))
 282		return -EFAULT;
 283
 284	if (vdpa_set_features(vdpa, features))
 285		return -EINVAL;
 286
 287	return 0;
 288}
 289
 290static long vhost_vdpa_get_vring_num(struct vhost_vdpa *v, u16 __user *argp)
 291{
 292	struct vdpa_device *vdpa = v->vdpa;
 293	const struct vdpa_config_ops *ops = vdpa->config;
 294	u16 num;
 295
 296	num = ops->get_vq_num_max(vdpa);
 297
 298	if (copy_to_user(argp, &num, sizeof(num)))
 299		return -EFAULT;
 300
 301	return 0;
 302}
 303
 304static void vhost_vdpa_config_put(struct vhost_vdpa *v)
 305{
 306	if (v->config_ctx) {
 307		eventfd_ctx_put(v->config_ctx);
 308		v->config_ctx = NULL;
 309	}
 310}
 311
 312static long vhost_vdpa_set_config_call(struct vhost_vdpa *v, u32 __user *argp)
 313{
 314	struct vdpa_callback cb;
 315	int fd;
 316	struct eventfd_ctx *ctx;
 317
 318	cb.callback = vhost_vdpa_config_cb;
 319	cb.private = v;
 320	if (copy_from_user(&fd, argp, sizeof(fd)))
 321		return  -EFAULT;
 322
 323	ctx = fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(fd);
 324	swap(ctx, v->config_ctx);
 325
 326	if (!IS_ERR_OR_NULL(ctx))
 327		eventfd_ctx_put(ctx);
 328
 329	if (IS_ERR(v->config_ctx)) {
 330		long ret = PTR_ERR(v->config_ctx);
 331
 332		v->config_ctx = NULL;
 333		return ret;
 334	}
 335
 336	v->vdpa->config->set_config_cb(v->vdpa, &cb);
 337
 338	return 0;
 339}
 340
 341static long vhost_vdpa_get_iova_range(struct vhost_vdpa *v, u32 __user *argp)
 342{
 343	struct vhost_vdpa_iova_range range = {
 344		.first = v->range.first,
 345		.last = v->range.last,
 346	};
 347
 348	if (copy_to_user(argp, &range, sizeof(range)))
 349		return -EFAULT;
 350	return 0;
 351}
 352
 353static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
 354				   void __user *argp)
 355{
 356	struct vdpa_device *vdpa = v->vdpa;
 357	const struct vdpa_config_ops *ops = vdpa->config;
 358	struct vdpa_vq_state vq_state;
 359	struct vdpa_callback cb;
 360	struct vhost_virtqueue *vq;
 361	struct vhost_vring_state s;
 362	u32 idx;
 363	long r;
 364
 365	r = get_user(idx, (u32 __user *)argp);
 366	if (r < 0)
 367		return r;
 368
 369	if (idx >= v->nvqs)
 370		return -ENOBUFS;
 371
 372	idx = array_index_nospec(idx, v->nvqs);
 373	vq = &v->vqs[idx];
 374
 375	switch (cmd) {
 376	case VHOST_VDPA_SET_VRING_ENABLE:
 377		if (copy_from_user(&s, argp, sizeof(s)))
 378			return -EFAULT;
 379		ops->set_vq_ready(vdpa, idx, s.num);
 380		return 0;
 381	case VHOST_GET_VRING_BASE:
 382		r = ops->get_vq_state(v->vdpa, idx, &vq_state);
 383		if (r)
 384			return r;
 385
 386		vq->last_avail_idx = vq_state.split.avail_index;
 387		break;
 388	}
 389
 390	r = vhost_vring_ioctl(&v->vdev, cmd, argp);
 391	if (r)
 392		return r;
 393
 394	switch (cmd) {
 395	case VHOST_SET_VRING_ADDR:
 396		if (ops->set_vq_address(vdpa, idx,
 397					(u64)(uintptr_t)vq->desc,
 398					(u64)(uintptr_t)vq->avail,
 399					(u64)(uintptr_t)vq->used))
 400			r = -EINVAL;
 401		break;
 402
 403	case VHOST_SET_VRING_BASE:
 404		vq_state.split.avail_index = vq->last_avail_idx;
 405		if (ops->set_vq_state(vdpa, idx, &vq_state))
 406			r = -EINVAL;
 407		break;
 408
 409	case VHOST_SET_VRING_CALL:
 410		if (vq->call_ctx.ctx) {
 411			cb.callback = vhost_vdpa_virtqueue_cb;
 412			cb.private = vq;
 413		} else {
 414			cb.callback = NULL;
 415			cb.private = NULL;
 416		}
 417		ops->set_vq_cb(vdpa, idx, &cb);
 418		vhost_vdpa_setup_vq_irq(v, idx);
 419		break;
 420
 421	case VHOST_SET_VRING_NUM:
 422		ops->set_vq_num(vdpa, idx, vq->num);
 423		break;
 424	}
 425
 426	return r;
 427}
 428
 429static long vhost_vdpa_unlocked_ioctl(struct file *filep,
 430				      unsigned int cmd, unsigned long arg)
 431{
 432	struct vhost_vdpa *v = filep->private_data;
 433	struct vhost_dev *d = &v->vdev;
 434	void __user *argp = (void __user *)arg;
 435	u64 __user *featurep = argp;
 436	u64 features;
 437	long r = 0;
 438
 439	if (cmd == VHOST_SET_BACKEND_FEATURES) {
 440		if (copy_from_user(&features, featurep, sizeof(features)))
 441			return -EFAULT;
 442		if (features & ~VHOST_VDPA_BACKEND_FEATURES)
 443			return -EOPNOTSUPP;
 444		vhost_set_backend_features(&v->vdev, features);
 445		return 0;
 446	}
 447
 448	mutex_lock(&d->mutex);
 449
 450	switch (cmd) {
 451	case VHOST_VDPA_GET_DEVICE_ID:
 452		r = vhost_vdpa_get_device_id(v, argp);
 453		break;
 454	case VHOST_VDPA_GET_STATUS:
 455		r = vhost_vdpa_get_status(v, argp);
 456		break;
 457	case VHOST_VDPA_SET_STATUS:
 458		r = vhost_vdpa_set_status(v, argp);
 459		break;
 460	case VHOST_VDPA_GET_CONFIG:
 461		r = vhost_vdpa_get_config(v, argp);
 462		break;
 463	case VHOST_VDPA_SET_CONFIG:
 464		r = vhost_vdpa_set_config(v, argp);
 465		break;
 466	case VHOST_GET_FEATURES:
 467		r = vhost_vdpa_get_features(v, argp);
 468		break;
 469	case VHOST_SET_FEATURES:
 470		r = vhost_vdpa_set_features(v, argp);
 471		break;
 472	case VHOST_VDPA_GET_VRING_NUM:
 473		r = vhost_vdpa_get_vring_num(v, argp);
 474		break;
 475	case VHOST_SET_LOG_BASE:
 476	case VHOST_SET_LOG_FD:
 477		r = -ENOIOCTLCMD;
 478		break;
 479	case VHOST_VDPA_SET_CONFIG_CALL:
 480		r = vhost_vdpa_set_config_call(v, argp);
 481		break;
 482	case VHOST_GET_BACKEND_FEATURES:
 483		features = VHOST_VDPA_BACKEND_FEATURES;
 484		if (copy_to_user(featurep, &features, sizeof(features)))
 485			r = -EFAULT;
 486		break;
 487	case VHOST_VDPA_GET_IOVA_RANGE:
 488		r = vhost_vdpa_get_iova_range(v, argp);
 489		break;
 490	default:
 491		r = vhost_dev_ioctl(&v->vdev, cmd, argp);
 492		if (r == -ENOIOCTLCMD)
 493			r = vhost_vdpa_vring_ioctl(v, cmd, argp);
 494		break;
 495	}
 496
 497	mutex_unlock(&d->mutex);
 498	return r;
 499}
 500
 501static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, u64 start, u64 last)
 502{
 503	struct vhost_dev *dev = &v->vdev;
 504	struct vhost_iotlb *iotlb = dev->iotlb;
 505	struct vhost_iotlb_map *map;
 506	struct page *page;
 507	unsigned long pfn, pinned;
 508
 509	while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) {
 510		pinned = map->size >> PAGE_SHIFT;
 511		for (pfn = map->addr >> PAGE_SHIFT;
 512		     pinned > 0; pfn++, pinned--) {
 513			page = pfn_to_page(pfn);
 514			if (map->perm & VHOST_ACCESS_WO)
 515				set_page_dirty_lock(page);
 516			unpin_user_page(page);
 517		}
 518		atomic64_sub(map->size >> PAGE_SHIFT, &dev->mm->pinned_vm);
 519		vhost_iotlb_map_free(iotlb, map);
 520	}
 521}
 522
 523static void vhost_vdpa_iotlb_free(struct vhost_vdpa *v)
 524{
 525	struct vhost_dev *dev = &v->vdev;
 526
 527	vhost_vdpa_iotlb_unmap(v, 0ULL, 0ULL - 1);
 528	kfree(dev->iotlb);
 529	dev->iotlb = NULL;
 530}
 531
 532static int perm_to_iommu_flags(u32 perm)
 533{
 534	int flags = 0;
 535
 536	switch (perm) {
 537	case VHOST_ACCESS_WO:
 538		flags |= IOMMU_WRITE;
 539		break;
 540	case VHOST_ACCESS_RO:
 541		flags |= IOMMU_READ;
 542		break;
 543	case VHOST_ACCESS_RW:
 544		flags |= (IOMMU_WRITE | IOMMU_READ);
 545		break;
 546	default:
 547		WARN(1, "invalidate vhost IOTLB permission\n");
 548		break;
 549	}
 550
 551	return flags | IOMMU_CACHE;
 552}
 553
 554static int vhost_vdpa_map(struct vhost_vdpa *v,
 555			  u64 iova, u64 size, u64 pa, u32 perm)
 556{
 557	struct vhost_dev *dev = &v->vdev;
 558	struct vdpa_device *vdpa = v->vdpa;
 559	const struct vdpa_config_ops *ops = vdpa->config;
 560	int r = 0;
 561
 562	r = vhost_iotlb_add_range(dev->iotlb, iova, iova + size - 1,
 563				  pa, perm);
 564	if (r)
 565		return r;
 566
 567	if (ops->dma_map) {
 568		r = ops->dma_map(vdpa, iova, size, pa, perm);
 569	} else if (ops->set_map) {
 570		if (!v->in_batch)
 571			r = ops->set_map(vdpa, dev->iotlb);
 572	} else {
 573		r = iommu_map(v->domain, iova, pa, size,
 574			      perm_to_iommu_flags(perm));
 575	}
 576
 577	if (r)
 578		vhost_iotlb_del_range(dev->iotlb, iova, iova + size - 1);
 579	else
 580		atomic64_add(size >> PAGE_SHIFT, &dev->mm->pinned_vm);
 581
 582	return r;
 583}
 584
 585static void vhost_vdpa_unmap(struct vhost_vdpa *v, u64 iova, u64 size)
 586{
 587	struct vhost_dev *dev = &v->vdev;
 588	struct vdpa_device *vdpa = v->vdpa;
 589	const struct vdpa_config_ops *ops = vdpa->config;
 590
 591	vhost_vdpa_iotlb_unmap(v, iova, iova + size - 1);
 592
 593	if (ops->dma_map) {
 594		ops->dma_unmap(vdpa, iova, size);
 595	} else if (ops->set_map) {
 596		if (!v->in_batch)
 597			ops->set_map(vdpa, dev->iotlb);
 598	} else {
 599		iommu_unmap(v->domain, iova, size);
 600	}
 601}
 602
 603static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
 604					   struct vhost_iotlb_msg *msg)
 605{
 606	struct vhost_dev *dev = &v->vdev;
 607	struct vhost_iotlb *iotlb = dev->iotlb;
 608	struct page **page_list;
 609	unsigned long list_size = PAGE_SIZE / sizeof(struct page *);
 610	unsigned int gup_flags = FOLL_LONGTERM;
 611	unsigned long npages, cur_base, map_pfn, last_pfn = 0;
 612	unsigned long lock_limit, sz2pin, nchunks, i;
 613	u64 iova = msg->iova;
 614	long pinned;
 615	int ret = 0;
 616
 617	if (msg->iova < v->range.first || !msg->size ||
 618	    msg->iova > U64_MAX - msg->size + 1 ||
 619	    msg->iova + msg->size - 1 > v->range.last)
 620		return -EINVAL;
 621
 622	if (vhost_iotlb_itree_first(iotlb, msg->iova,
 623				    msg->iova + msg->size - 1))
 624		return -EEXIST;
 625
 626	/* Limit the use of memory for bookkeeping */
 627	page_list = (struct page **) __get_free_page(GFP_KERNEL);
 628	if (!page_list)
 629		return -ENOMEM;
 630
 631	if (msg->perm & VHOST_ACCESS_WO)
 632		gup_flags |= FOLL_WRITE;
 633
 634	npages = PAGE_ALIGN(msg->size + (iova & ~PAGE_MASK)) >> PAGE_SHIFT;
 635	if (!npages) {
 636		ret = -EINVAL;
 637		goto free;
 638	}
 639
 640	mmap_read_lock(dev->mm);
 641
 642	lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
 643	if (npages + atomic64_read(&dev->mm->pinned_vm) > lock_limit) {
 644		ret = -ENOMEM;
 645		goto unlock;
 646	}
 647
 648	cur_base = msg->uaddr & PAGE_MASK;
 649	iova &= PAGE_MASK;
 650	nchunks = 0;
 651
 652	while (npages) {
 653		sz2pin = min_t(unsigned long, npages, list_size);
 654		pinned = pin_user_pages(cur_base, sz2pin,
 655					gup_flags, page_list, NULL);
 656		if (sz2pin != pinned) {
 657			if (pinned < 0) {
 658				ret = pinned;
 659			} else {
 660				unpin_user_pages(page_list, pinned);
 661				ret = -ENOMEM;
 662			}
 663			goto out;
 664		}
 665		nchunks++;
 666
 667		if (!last_pfn)
 668			map_pfn = page_to_pfn(page_list[0]);
 669
 670		for (i = 0; i < pinned; i++) {
 671			unsigned long this_pfn = page_to_pfn(page_list[i]);
 672			u64 csize;
 673
 674			if (last_pfn && (this_pfn != last_pfn + 1)) {
 675				/* Pin a contiguous chunk of memory */
 676				csize = (last_pfn - map_pfn + 1) << PAGE_SHIFT;
 677				ret = vhost_vdpa_map(v, iova, csize,
 678						     map_pfn << PAGE_SHIFT,
 679						     msg->perm);
 680				if (ret) {
 681					/*
 682					 * Unpin the pages that are left unmapped
 683					 * from this point on in the current
 684					 * page_list. The remaining outstanding
 685					 * ones which may stride across several
 686					 * chunks will be covered in the common
 687					 * error path subsequently.
 688					 */
 689					unpin_user_pages(&page_list[i],
 690							 pinned - i);
 691					goto out;
 692				}
 693
 694				map_pfn = this_pfn;
 695				iova += csize;
 696				nchunks = 0;
 697			}
 698
 699			last_pfn = this_pfn;
 700		}
 701
 702		cur_base += pinned << PAGE_SHIFT;
 703		npages -= pinned;
 704	}
 705
 706	/* Pin the rest chunk */
 707	ret = vhost_vdpa_map(v, iova, (last_pfn - map_pfn + 1) << PAGE_SHIFT,
 708			     map_pfn << PAGE_SHIFT, msg->perm);
 709out:
 710	if (ret) {
 711		if (nchunks) {
 712			unsigned long pfn;
 713
 714			/*
 715			 * Unpin the outstanding pages which are yet to be
 716			 * mapped but haven't due to vdpa_map() or
 717			 * pin_user_pages() failure.
 718			 *
 719			 * Mapped pages are accounted in vdpa_map(), hence
 720			 * the corresponding unpinning will be handled by
 721			 * vdpa_unmap().
 722			 */
 723			WARN_ON(!last_pfn);
 724			for (pfn = map_pfn; pfn <= last_pfn; pfn++)
 725				unpin_user_page(pfn_to_page(pfn));
 726		}
 727		vhost_vdpa_unmap(v, msg->iova, msg->size);
 728	}
 729unlock:
 730	mmap_read_unlock(dev->mm);
 731free:
 732	free_page((unsigned long)page_list);
 733	return ret;
 734}
 735
 736static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev,
 737					struct vhost_iotlb_msg *msg)
 738{
 739	struct vhost_vdpa *v = container_of(dev, struct vhost_vdpa, vdev);
 740	struct vdpa_device *vdpa = v->vdpa;
 741	const struct vdpa_config_ops *ops = vdpa->config;
 742	int r = 0;
 743
 744	mutex_lock(&dev->mutex);
 745
 746	r = vhost_dev_check_owner(dev);
 747	if (r)
 748		goto unlock;
 749
 750	switch (msg->type) {
 751	case VHOST_IOTLB_UPDATE:
 752		r = vhost_vdpa_process_iotlb_update(v, msg);
 753		break;
 754	case VHOST_IOTLB_INVALIDATE:
 755		vhost_vdpa_unmap(v, msg->iova, msg->size);
 756		break;
 757	case VHOST_IOTLB_BATCH_BEGIN:
 758		v->in_batch = true;
 759		break;
 760	case VHOST_IOTLB_BATCH_END:
 761		if (v->in_batch && ops->set_map)
 762			ops->set_map(vdpa, dev->iotlb);
 763		v->in_batch = false;
 764		break;
 765	default:
 766		r = -EINVAL;
 767		break;
 768	}
 769unlock:
 770	mutex_unlock(&dev->mutex);
 771
 772	return r;
 773}
 774
 775static ssize_t vhost_vdpa_chr_write_iter(struct kiocb *iocb,
 776					 struct iov_iter *from)
 777{
 778	struct file *file = iocb->ki_filp;
 779	struct vhost_vdpa *v = file->private_data;
 780	struct vhost_dev *dev = &v->vdev;
 781
 782	return vhost_chr_write_iter(dev, from);
 783}
 784
 785static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v)
 786{
 787	struct vdpa_device *vdpa = v->vdpa;
 788	const struct vdpa_config_ops *ops = vdpa->config;
 789	struct device *dma_dev = vdpa_get_dma_dev(vdpa);
 790	struct bus_type *bus;
 791	int ret;
 792
 793	/* Device want to do DMA by itself */
 794	if (ops->set_map || ops->dma_map)
 795		return 0;
 796
 797	bus = dma_dev->bus;
 798	if (!bus)
 799		return -EFAULT;
 800
 801	if (!iommu_capable(bus, IOMMU_CAP_CACHE_COHERENCY))
 802		return -ENOTSUPP;
 803
 804	v->domain = iommu_domain_alloc(bus);
 805	if (!v->domain)
 806		return -EIO;
 807
 808	ret = iommu_attach_device(v->domain, dma_dev);
 809	if (ret)
 810		goto err_attach;
 811
 812	return 0;
 813
 814err_attach:
 815	iommu_domain_free(v->domain);
 816	return ret;
 817}
 818
 819static void vhost_vdpa_free_domain(struct vhost_vdpa *v)
 820{
 821	struct vdpa_device *vdpa = v->vdpa;
 822	struct device *dma_dev = vdpa_get_dma_dev(vdpa);
 823
 824	if (v->domain) {
 825		iommu_detach_device(v->domain, dma_dev);
 826		iommu_domain_free(v->domain);
 827	}
 828
 829	v->domain = NULL;
 830}
 831
 832static void vhost_vdpa_set_iova_range(struct vhost_vdpa *v)
 833{
 834	struct vdpa_iova_range *range = &v->range;
 835	struct vdpa_device *vdpa = v->vdpa;
 836	const struct vdpa_config_ops *ops = vdpa->config;
 837
 838	if (ops->get_iova_range) {
 839		*range = ops->get_iova_range(vdpa);
 840	} else if (v->domain && v->domain->geometry.force_aperture) {
 841		range->first = v->domain->geometry.aperture_start;
 842		range->last = v->domain->geometry.aperture_end;
 843	} else {
 844		range->first = 0;
 845		range->last = ULLONG_MAX;
 846	}
 847}
 848
 849static int vhost_vdpa_open(struct inode *inode, struct file *filep)
 850{
 851	struct vhost_vdpa *v;
 852	struct vhost_dev *dev;
 853	struct vhost_virtqueue **vqs;
 854	int nvqs, i, r, opened;
 855
 856	v = container_of(inode->i_cdev, struct vhost_vdpa, cdev);
 857
 858	opened = atomic_cmpxchg(&v->opened, 0, 1);
 859	if (opened)
 860		return -EBUSY;
 861
 862	nvqs = v->nvqs;
 863	vhost_vdpa_reset(v);
 864
 865	vqs = kmalloc_array(nvqs, sizeof(*vqs), GFP_KERNEL);
 866	if (!vqs) {
 867		r = -ENOMEM;
 868		goto err;
 869	}
 870
 871	dev = &v->vdev;
 872	for (i = 0; i < nvqs; i++) {
 873		vqs[i] = &v->vqs[i];
 874		vqs[i]->handle_kick = handle_vq_kick;
 875	}
 876	vhost_dev_init(dev, vqs, nvqs, 0, 0, 0, false,
 877		       vhost_vdpa_process_iotlb_msg);
 878
 879	dev->iotlb = vhost_iotlb_alloc(0, 0);
 880	if (!dev->iotlb) {
 881		r = -ENOMEM;
 882		goto err_init_iotlb;
 883	}
 884
 885	r = vhost_vdpa_alloc_domain(v);
 886	if (r)
 887		goto err_init_iotlb;
 888
 889	vhost_vdpa_set_iova_range(v);
 890
 891	filep->private_data = v;
 892
 893	return 0;
 894
 895err_init_iotlb:
 896	vhost_dev_cleanup(&v->vdev);
 897	kfree(vqs);
 898err:
 899	atomic_dec(&v->opened);
 900	return r;
 901}
 902
 903static void vhost_vdpa_clean_irq(struct vhost_vdpa *v)
 904{
 905	int i;
 906
 907	for (i = 0; i < v->nvqs; i++)
 908		vhost_vdpa_unsetup_vq_irq(v, i);
 909}
 910
 911static int vhost_vdpa_release(struct inode *inode, struct file *filep)
 912{
 913	struct vhost_vdpa *v = filep->private_data;
 914	struct vhost_dev *d = &v->vdev;
 915
 916	mutex_lock(&d->mutex);
 917	filep->private_data = NULL;
 918	vhost_vdpa_reset(v);
 919	vhost_dev_stop(&v->vdev);
 920	vhost_vdpa_iotlb_free(v);
 921	vhost_vdpa_free_domain(v);
 922	vhost_vdpa_config_put(v);
 923	vhost_vdpa_clean_irq(v);
 924	vhost_dev_cleanup(&v->vdev);
 925	kfree(v->vdev.vqs);
 926	mutex_unlock(&d->mutex);
 927
 928	atomic_dec(&v->opened);
 929	complete(&v->completion);
 930
 931	return 0;
 932}
 933
 934#ifdef CONFIG_MMU
 935static vm_fault_t vhost_vdpa_fault(struct vm_fault *vmf)
 936{
 937	struct vhost_vdpa *v = vmf->vma->vm_file->private_data;
 938	struct vdpa_device *vdpa = v->vdpa;
 939	const struct vdpa_config_ops *ops = vdpa->config;
 940	struct vdpa_notification_area notify;
 941	struct vm_area_struct *vma = vmf->vma;
 942	u16 index = vma->vm_pgoff;
 943
 944	notify = ops->get_vq_notification(vdpa, index);
 945
 946	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
 947	if (remap_pfn_range(vma, vmf->address & PAGE_MASK,
 948			    notify.addr >> PAGE_SHIFT, PAGE_SIZE,
 949			    vma->vm_page_prot))
 950		return VM_FAULT_SIGBUS;
 951
 952	return VM_FAULT_NOPAGE;
 953}
 954
 955static const struct vm_operations_struct vhost_vdpa_vm_ops = {
 956	.fault = vhost_vdpa_fault,
 957};
 958
 959static int vhost_vdpa_mmap(struct file *file, struct vm_area_struct *vma)
 960{
 961	struct vhost_vdpa *v = vma->vm_file->private_data;
 962	struct vdpa_device *vdpa = v->vdpa;
 963	const struct vdpa_config_ops *ops = vdpa->config;
 964	struct vdpa_notification_area notify;
 965	unsigned long index = vma->vm_pgoff;
 966
 967	if (vma->vm_end - vma->vm_start != PAGE_SIZE)
 968		return -EINVAL;
 969	if ((vma->vm_flags & VM_SHARED) == 0)
 970		return -EINVAL;
 971	if (vma->vm_flags & VM_READ)
 972		return -EINVAL;
 973	if (index > 65535)
 974		return -EINVAL;
 975	if (!ops->get_vq_notification)
 976		return -ENOTSUPP;
 977
 978	/* To be safe and easily modelled by userspace, We only
 979	 * support the doorbell which sits on the page boundary and
 980	 * does not share the page with other registers.
 981	 */
 982	notify = ops->get_vq_notification(vdpa, index);
 983	if (notify.addr & (PAGE_SIZE - 1))
 984		return -EINVAL;
 985	if (vma->vm_end - vma->vm_start != notify.size)
 986		return -ENOTSUPP;
 987
 988	vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP;
 989	vma->vm_ops = &vhost_vdpa_vm_ops;
 990	return 0;
 991}
 992#endif /* CONFIG_MMU */
 993
 994static const struct file_operations vhost_vdpa_fops = {
 995	.owner		= THIS_MODULE,
 996	.open		= vhost_vdpa_open,
 997	.release	= vhost_vdpa_release,
 998	.write_iter	= vhost_vdpa_chr_write_iter,
 999	.unlocked_ioctl	= vhost_vdpa_unlocked_ioctl,
1000#ifdef CONFIG_MMU
1001	.mmap		= vhost_vdpa_mmap,
1002#endif /* CONFIG_MMU */
1003	.compat_ioctl	= compat_ptr_ioctl,
1004};
1005
1006static void vhost_vdpa_release_dev(struct device *device)
1007{
1008	struct vhost_vdpa *v =
1009	       container_of(device, struct vhost_vdpa, dev);
1010
1011	ida_simple_remove(&vhost_vdpa_ida, v->minor);
1012	kfree(v->vqs);
1013	kfree(v);
1014}
1015
1016static int vhost_vdpa_probe(struct vdpa_device *vdpa)
1017{
1018	const struct vdpa_config_ops *ops = vdpa->config;
1019	struct vhost_vdpa *v;
1020	int minor;
1021	int r;
1022
1023	v = kzalloc(sizeof(*v), GFP_KERNEL | __GFP_RETRY_MAYFAIL);
1024	if (!v)
1025		return -ENOMEM;
1026
1027	minor = ida_simple_get(&vhost_vdpa_ida, 0,
1028			       VHOST_VDPA_DEV_MAX, GFP_KERNEL);
1029	if (minor < 0) {
1030		kfree(v);
1031		return minor;
1032	}
1033
1034	atomic_set(&v->opened, 0);
1035	v->minor = minor;
1036	v->vdpa = vdpa;
1037	v->nvqs = vdpa->nvqs;
1038	v->virtio_id = ops->get_device_id(vdpa);
1039
1040	device_initialize(&v->dev);
1041	v->dev.release = vhost_vdpa_release_dev;
1042	v->dev.parent = &vdpa->dev;
1043	v->dev.devt = MKDEV(MAJOR(vhost_vdpa_major), minor);
1044	v->vqs = kmalloc_array(v->nvqs, sizeof(struct vhost_virtqueue),
1045			       GFP_KERNEL);
1046	if (!v->vqs) {
1047		r = -ENOMEM;
1048		goto err;
1049	}
1050
1051	r = dev_set_name(&v->dev, "vhost-vdpa-%u", minor);
1052	if (r)
1053		goto err;
1054
1055	cdev_init(&v->cdev, &vhost_vdpa_fops);
1056	v->cdev.owner = THIS_MODULE;
1057
1058	r = cdev_device_add(&v->cdev, &v->dev);
1059	if (r)
1060		goto err;
1061
1062	init_completion(&v->completion);
1063	vdpa_set_drvdata(vdpa, v);
1064
1065	return 0;
1066
1067err:
1068	put_device(&v->dev);
1069	return r;
1070}
1071
1072static void vhost_vdpa_remove(struct vdpa_device *vdpa)
1073{
1074	struct vhost_vdpa *v = vdpa_get_drvdata(vdpa);
1075	int opened;
1076
1077	cdev_device_del(&v->cdev, &v->dev);
1078
1079	do {
1080		opened = atomic_cmpxchg(&v->opened, 0, 1);
1081		if (!opened)
1082			break;
1083		wait_for_completion(&v->completion);
1084	} while (1);
1085
1086	put_device(&v->dev);
1087}
1088
1089static struct vdpa_driver vhost_vdpa_driver = {
1090	.driver = {
1091		.name	= "vhost_vdpa",
1092	},
1093	.probe	= vhost_vdpa_probe,
1094	.remove	= vhost_vdpa_remove,
1095};
1096
1097static int __init vhost_vdpa_init(void)
1098{
1099	int r;
1100
1101	r = alloc_chrdev_region(&vhost_vdpa_major, 0, VHOST_VDPA_DEV_MAX,
1102				"vhost-vdpa");
1103	if (r)
1104		goto err_alloc_chrdev;
1105
1106	r = vdpa_register_driver(&vhost_vdpa_driver);
1107	if (r)
1108		goto err_vdpa_register_driver;
1109
1110	return 0;
1111
1112err_vdpa_register_driver:
1113	unregister_chrdev_region(vhost_vdpa_major, VHOST_VDPA_DEV_MAX);
1114err_alloc_chrdev:
1115	return r;
1116}
1117module_init(vhost_vdpa_init);
1118
1119static void __exit vhost_vdpa_exit(void)
1120{
1121	vdpa_unregister_driver(&vhost_vdpa_driver);
1122	unregister_chrdev_region(vhost_vdpa_major, VHOST_VDPA_DEV_MAX);
1123}
1124module_exit(vhost_vdpa_exit);
1125
1126MODULE_VERSION("0.0.1");
1127MODULE_LICENSE("GPL v2");
1128MODULE_AUTHOR("Intel Corporation");
1129MODULE_DESCRIPTION("vDPA-based vhost backend for virtio");