Linux Audio

Check our new training course

Loading...
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Copyright (C) 2018-2020 Intel Corporation.
   4 * Copyright (C) 2020 Red Hat, Inc.
   5 *
   6 * Author: Tiwei Bie <tiwei.bie@intel.com>
   7 *         Jason Wang <jasowang@redhat.com>
   8 *
   9 * Thanks Michael S. Tsirkin for the valuable comments and
  10 * suggestions.  And thanks to Cunming Liang and Zhihong Wang for all
  11 * their supports.
  12 */
  13
  14#include <linux/kernel.h>
  15#include <linux/module.h>
  16#include <linux/cdev.h>
  17#include <linux/device.h>
  18#include <linux/mm.h>
  19#include <linux/slab.h>
  20#include <linux/iommu.h>
  21#include <linux/uuid.h>
  22#include <linux/vdpa.h>
  23#include <linux/nospec.h>
  24#include <linux/vhost.h>
  25
  26#include "vhost.h"
  27
  28enum {
  29	VHOST_VDPA_BACKEND_FEATURES =
  30	(1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2) |
  31	(1ULL << VHOST_BACKEND_F_IOTLB_BATCH) |
  32	(1ULL << VHOST_BACKEND_F_IOTLB_ASID),
  33};
  34
  35#define VHOST_VDPA_DEV_MAX (1U << MINORBITS)
  36
  37#define VHOST_VDPA_IOTLB_BUCKETS 16
  38
  39struct vhost_vdpa_as {
  40	struct hlist_node hash_link;
  41	struct vhost_iotlb iotlb;
  42	u32 id;
  43};
  44
  45struct vhost_vdpa {
  46	struct vhost_dev vdev;
  47	struct iommu_domain *domain;
  48	struct vhost_virtqueue *vqs;
  49	struct completion completion;
  50	struct vdpa_device *vdpa;
  51	struct hlist_head as[VHOST_VDPA_IOTLB_BUCKETS];
  52	struct device dev;
  53	struct cdev cdev;
  54	atomic_t opened;
  55	u32 nvqs;
  56	int virtio_id;
  57	int minor;
  58	struct eventfd_ctx *config_ctx;
  59	int in_batch;
  60	struct vdpa_iova_range range;
  61	u32 batch_asid;
  62	bool suspended;
  63};
  64
  65static DEFINE_IDA(vhost_vdpa_ida);
  66
  67static dev_t vhost_vdpa_major;
  68
  69static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v,
  70				   struct vhost_iotlb *iotlb, u64 start,
  71				   u64 last, u32 asid);
  72
  73static inline u32 iotlb_to_asid(struct vhost_iotlb *iotlb)
  74{
  75	struct vhost_vdpa_as *as = container_of(iotlb, struct
  76						vhost_vdpa_as, iotlb);
  77	return as->id;
  78}
  79
  80static struct vhost_vdpa_as *asid_to_as(struct vhost_vdpa *v, u32 asid)
  81{
  82	struct hlist_head *head = &v->as[asid % VHOST_VDPA_IOTLB_BUCKETS];
  83	struct vhost_vdpa_as *as;
  84
  85	hlist_for_each_entry(as, head, hash_link)
  86		if (as->id == asid)
  87			return as;
  88
  89	return NULL;
  90}
  91
  92static struct vhost_iotlb *asid_to_iotlb(struct vhost_vdpa *v, u32 asid)
  93{
  94	struct vhost_vdpa_as *as = asid_to_as(v, asid);
  95
  96	if (!as)
  97		return NULL;
  98
  99	return &as->iotlb;
 100}
 101
 102static struct vhost_vdpa_as *vhost_vdpa_alloc_as(struct vhost_vdpa *v, u32 asid)
 103{
 104	struct hlist_head *head = &v->as[asid % VHOST_VDPA_IOTLB_BUCKETS];
 105	struct vhost_vdpa_as *as;
 106
 107	if (asid_to_as(v, asid))
 108		return NULL;
 109
 110	if (asid >= v->vdpa->nas)
 111		return NULL;
 112
 113	as = kmalloc(sizeof(*as), GFP_KERNEL);
 114	if (!as)
 115		return NULL;
 116
 117	vhost_iotlb_init(&as->iotlb, 0, 0);
 118	as->id = asid;
 119	hlist_add_head(&as->hash_link, head);
 120
 121	return as;
 122}
 123
 124static struct vhost_vdpa_as *vhost_vdpa_find_alloc_as(struct vhost_vdpa *v,
 125						      u32 asid)
 126{
 127	struct vhost_vdpa_as *as = asid_to_as(v, asid);
 128
 129	if (as)
 130		return as;
 131
 132	return vhost_vdpa_alloc_as(v, asid);
 133}
 134
 135static void vhost_vdpa_reset_map(struct vhost_vdpa *v, u32 asid)
 136{
 137	struct vdpa_device *vdpa = v->vdpa;
 138	const struct vdpa_config_ops *ops = vdpa->config;
 139
 140	if (ops->reset_map)
 141		ops->reset_map(vdpa, asid);
 142}
 143
 144static int vhost_vdpa_remove_as(struct vhost_vdpa *v, u32 asid)
 145{
 146	struct vhost_vdpa_as *as = asid_to_as(v, asid);
 147
 148	if (!as)
 149		return -EINVAL;
 150
 151	hlist_del(&as->hash_link);
 152	vhost_vdpa_iotlb_unmap(v, &as->iotlb, 0ULL, 0ULL - 1, asid);
 153	/*
 154	 * Devices with vendor specific IOMMU may need to restore
 155	 * iotlb to the initial or default state, which cannot be
 156	 * cleaned up in the all range unmap call above. Give them
 157	 * a chance to clean up or reset the map to the desired
 158	 * state.
 159	 */
 160	vhost_vdpa_reset_map(v, asid);
 161	kfree(as);
 162
 163	return 0;
 164}
 165
 166static void handle_vq_kick(struct vhost_work *work)
 167{
 168	struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
 169						  poll.work);
 170	struct vhost_vdpa *v = container_of(vq->dev, struct vhost_vdpa, vdev);
 171	const struct vdpa_config_ops *ops = v->vdpa->config;
 172
 173	ops->kick_vq(v->vdpa, vq - v->vqs);
 174}
 175
 176static irqreturn_t vhost_vdpa_virtqueue_cb(void *private)
 177{
 178	struct vhost_virtqueue *vq = private;
 179	struct eventfd_ctx *call_ctx = vq->call_ctx.ctx;
 180
 181	if (call_ctx)
 182		eventfd_signal(call_ctx);
 183
 184	return IRQ_HANDLED;
 185}
 186
 187static irqreturn_t vhost_vdpa_config_cb(void *private)
 188{
 189	struct vhost_vdpa *v = private;
 190	struct eventfd_ctx *config_ctx = v->config_ctx;
 191
 192	if (config_ctx)
 193		eventfd_signal(config_ctx);
 194
 195	return IRQ_HANDLED;
 196}
 197
 198static void vhost_vdpa_setup_vq_irq(struct vhost_vdpa *v, u16 qid)
 199{
 200	struct vhost_virtqueue *vq = &v->vqs[qid];
 201	const struct vdpa_config_ops *ops = v->vdpa->config;
 202	struct vdpa_device *vdpa = v->vdpa;
 203	int ret, irq;
 204
 205	if (!ops->get_vq_irq)
 206		return;
 207
 208	irq = ops->get_vq_irq(vdpa, qid);
 209	if (irq < 0)
 210		return;
 211
 212	irq_bypass_unregister_producer(&vq->call_ctx.producer);
 213	if (!vq->call_ctx.ctx)
 214		return;
 215
 216	vq->call_ctx.producer.token = vq->call_ctx.ctx;
 217	vq->call_ctx.producer.irq = irq;
 218	ret = irq_bypass_register_producer(&vq->call_ctx.producer);
 219	if (unlikely(ret))
 220		dev_info(&v->dev, "vq %u, irq bypass producer (token %p) registration fails, ret =  %d\n",
 221			 qid, vq->call_ctx.producer.token, ret);
 222}
 223
 224static void vhost_vdpa_unsetup_vq_irq(struct vhost_vdpa *v, u16 qid)
 225{
 226	struct vhost_virtqueue *vq = &v->vqs[qid];
 227
 228	irq_bypass_unregister_producer(&vq->call_ctx.producer);
 229}
 230
 231static int _compat_vdpa_reset(struct vhost_vdpa *v)
 232{
 233	struct vdpa_device *vdpa = v->vdpa;
 234	u32 flags = 0;
 235
 236	v->suspended = false;
 237
 238	if (v->vdev.vqs) {
 239		flags |= !vhost_backend_has_feature(v->vdev.vqs[0],
 240						    VHOST_BACKEND_F_IOTLB_PERSIST) ?
 241			 VDPA_RESET_F_CLEAN_MAP : 0;
 242	}
 243
 244	return vdpa_reset(vdpa, flags);
 245}
 246
 247static int vhost_vdpa_reset(struct vhost_vdpa *v)
 248{
 249	v->in_batch = 0;
 250	return _compat_vdpa_reset(v);
 251}
 252
 253static long vhost_vdpa_bind_mm(struct vhost_vdpa *v)
 254{
 255	struct vdpa_device *vdpa = v->vdpa;
 256	const struct vdpa_config_ops *ops = vdpa->config;
 257
 258	if (!vdpa->use_va || !ops->bind_mm)
 259		return 0;
 260
 261	return ops->bind_mm(vdpa, v->vdev.mm);
 262}
 263
 264static void vhost_vdpa_unbind_mm(struct vhost_vdpa *v)
 265{
 266	struct vdpa_device *vdpa = v->vdpa;
 267	const struct vdpa_config_ops *ops = vdpa->config;
 268
 269	if (!vdpa->use_va || !ops->unbind_mm)
 270		return;
 271
 272	ops->unbind_mm(vdpa);
 273}
 274
 275static long vhost_vdpa_get_device_id(struct vhost_vdpa *v, u8 __user *argp)
 276{
 277	struct vdpa_device *vdpa = v->vdpa;
 278	const struct vdpa_config_ops *ops = vdpa->config;
 279	u32 device_id;
 280
 281	device_id = ops->get_device_id(vdpa);
 282
 283	if (copy_to_user(argp, &device_id, sizeof(device_id)))
 284		return -EFAULT;
 285
 286	return 0;
 287}
 288
 289static long vhost_vdpa_get_status(struct vhost_vdpa *v, u8 __user *statusp)
 290{
 291	struct vdpa_device *vdpa = v->vdpa;
 292	const struct vdpa_config_ops *ops = vdpa->config;
 293	u8 status;
 294
 295	status = ops->get_status(vdpa);
 296
 297	if (copy_to_user(statusp, &status, sizeof(status)))
 298		return -EFAULT;
 299
 300	return 0;
 301}
 302
 303static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp)
 304{
 305	struct vdpa_device *vdpa = v->vdpa;
 306	const struct vdpa_config_ops *ops = vdpa->config;
 307	u8 status, status_old;
 308	u32 nvqs = v->nvqs;
 309	int ret;
 310	u16 i;
 311
 312	if (copy_from_user(&status, statusp, sizeof(status)))
 313		return -EFAULT;
 314
 315	status_old = ops->get_status(vdpa);
 316
 317	/*
 318	 * Userspace shouldn't remove status bits unless reset the
 319	 * status to 0.
 320	 */
 321	if (status != 0 && (status_old & ~status) != 0)
 322		return -EINVAL;
 323
 324	if ((status_old & VIRTIO_CONFIG_S_DRIVER_OK) && !(status & VIRTIO_CONFIG_S_DRIVER_OK))
 325		for (i = 0; i < nvqs; i++)
 326			vhost_vdpa_unsetup_vq_irq(v, i);
 327
 328	if (status == 0) {
 329		ret = _compat_vdpa_reset(v);
 330		if (ret)
 331			return ret;
 332	} else
 333		vdpa_set_status(vdpa, status);
 334
 335	if ((status & VIRTIO_CONFIG_S_DRIVER_OK) && !(status_old & VIRTIO_CONFIG_S_DRIVER_OK))
 336		for (i = 0; i < nvqs; i++)
 337			vhost_vdpa_setup_vq_irq(v, i);
 338
 339	return 0;
 340}
 341
 342static int vhost_vdpa_config_validate(struct vhost_vdpa *v,
 343				      struct vhost_vdpa_config *c)
 344{
 345	struct vdpa_device *vdpa = v->vdpa;
 346	size_t size = vdpa->config->get_config_size(vdpa);
 347
 348	if (c->len == 0 || c->off > size)
 349		return -EINVAL;
 350
 351	if (c->len > size - c->off)
 352		return -E2BIG;
 353
 354	return 0;
 355}
 356
 357static long vhost_vdpa_get_config(struct vhost_vdpa *v,
 358				  struct vhost_vdpa_config __user *c)
 359{
 360	struct vdpa_device *vdpa = v->vdpa;
 361	struct vhost_vdpa_config config;
 362	unsigned long size = offsetof(struct vhost_vdpa_config, buf);
 363	u8 *buf;
 364
 365	if (copy_from_user(&config, c, size))
 366		return -EFAULT;
 367	if (vhost_vdpa_config_validate(v, &config))
 368		return -EINVAL;
 369	buf = kvzalloc(config.len, GFP_KERNEL);
 370	if (!buf)
 371		return -ENOMEM;
 372
 373	vdpa_get_config(vdpa, config.off, buf, config.len);
 374
 375	if (copy_to_user(c->buf, buf, config.len)) {
 376		kvfree(buf);
 377		return -EFAULT;
 378	}
 379
 380	kvfree(buf);
 381	return 0;
 382}
 383
 384static long vhost_vdpa_set_config(struct vhost_vdpa *v,
 385				  struct vhost_vdpa_config __user *c)
 386{
 387	struct vdpa_device *vdpa = v->vdpa;
 388	struct vhost_vdpa_config config;
 389	unsigned long size = offsetof(struct vhost_vdpa_config, buf);
 390	u8 *buf;
 391
 392	if (copy_from_user(&config, c, size))
 393		return -EFAULT;
 394	if (vhost_vdpa_config_validate(v, &config))
 395		return -EINVAL;
 396
 397	buf = vmemdup_user(c->buf, config.len);
 398	if (IS_ERR(buf))
 399		return PTR_ERR(buf);
 400
 401	vdpa_set_config(vdpa, config.off, buf, config.len);
 402
 403	kvfree(buf);
 404	return 0;
 405}
 406
 407static bool vhost_vdpa_can_suspend(const struct vhost_vdpa *v)
 408{
 409	struct vdpa_device *vdpa = v->vdpa;
 410	const struct vdpa_config_ops *ops = vdpa->config;
 411
 412	return ops->suspend;
 413}
 414
 415static bool vhost_vdpa_can_resume(const struct vhost_vdpa *v)
 416{
 417	struct vdpa_device *vdpa = v->vdpa;
 418	const struct vdpa_config_ops *ops = vdpa->config;
 419
 420	return ops->resume;
 421}
 422
 423static bool vhost_vdpa_has_desc_group(const struct vhost_vdpa *v)
 424{
 425	struct vdpa_device *vdpa = v->vdpa;
 426	const struct vdpa_config_ops *ops = vdpa->config;
 427
 428	return ops->get_vq_desc_group;
 429}
 430
 431static long vhost_vdpa_get_features(struct vhost_vdpa *v, u64 __user *featurep)
 432{
 433	struct vdpa_device *vdpa = v->vdpa;
 434	const struct vdpa_config_ops *ops = vdpa->config;
 435	u64 features;
 436
 437	features = ops->get_device_features(vdpa);
 438
 439	if (copy_to_user(featurep, &features, sizeof(features)))
 440		return -EFAULT;
 441
 442	return 0;
 443}
 444
 445static u64 vhost_vdpa_get_backend_features(const struct vhost_vdpa *v)
 446{
 447	struct vdpa_device *vdpa = v->vdpa;
 448	const struct vdpa_config_ops *ops = vdpa->config;
 449
 450	if (!ops->get_backend_features)
 451		return 0;
 452	else
 453		return ops->get_backend_features(vdpa);
 454}
 455
 456static bool vhost_vdpa_has_persistent_map(const struct vhost_vdpa *v)
 457{
 458	struct vdpa_device *vdpa = v->vdpa;
 459	const struct vdpa_config_ops *ops = vdpa->config;
 460
 461	return (!ops->set_map && !ops->dma_map) || ops->reset_map ||
 462	       vhost_vdpa_get_backend_features(v) & BIT_ULL(VHOST_BACKEND_F_IOTLB_PERSIST);
 463}
 464
 465static long vhost_vdpa_set_features(struct vhost_vdpa *v, u64 __user *featurep)
 466{
 467	struct vdpa_device *vdpa = v->vdpa;
 468	const struct vdpa_config_ops *ops = vdpa->config;
 469	struct vhost_dev *d = &v->vdev;
 470	u64 actual_features;
 471	u64 features;
 472	int i;
 473
 474	/*
 475	 * It's not allowed to change the features after they have
 476	 * been negotiated.
 477	 */
 478	if (ops->get_status(vdpa) & VIRTIO_CONFIG_S_FEATURES_OK)
 479		return -EBUSY;
 480
 481	if (copy_from_user(&features, featurep, sizeof(features)))
 482		return -EFAULT;
 483
 484	if (vdpa_set_features(vdpa, features))
 485		return -EINVAL;
 486
 487	/* let the vqs know what has been configured */
 488	actual_features = ops->get_driver_features(vdpa);
 489	for (i = 0; i < d->nvqs; ++i) {
 490		struct vhost_virtqueue *vq = d->vqs[i];
 491
 492		mutex_lock(&vq->mutex);
 493		vq->acked_features = actual_features;
 494		mutex_unlock(&vq->mutex);
 495	}
 496
 497	return 0;
 498}
 499
 500static long vhost_vdpa_get_vring_num(struct vhost_vdpa *v, u16 __user *argp)
 501{
 502	struct vdpa_device *vdpa = v->vdpa;
 503	const struct vdpa_config_ops *ops = vdpa->config;
 504	u16 num;
 505
 506	num = ops->get_vq_num_max(vdpa);
 507
 508	if (copy_to_user(argp, &num, sizeof(num)))
 509		return -EFAULT;
 510
 511	return 0;
 512}
 513
 514static void vhost_vdpa_config_put(struct vhost_vdpa *v)
 515{
 516	if (v->config_ctx) {
 517		eventfd_ctx_put(v->config_ctx);
 518		v->config_ctx = NULL;
 519	}
 520}
 521
 522static long vhost_vdpa_set_config_call(struct vhost_vdpa *v, u32 __user *argp)
 523{
 524	struct vdpa_callback cb;
 525	int fd;
 526	struct eventfd_ctx *ctx;
 527
 528	cb.callback = vhost_vdpa_config_cb;
 529	cb.private = v;
 530	if (copy_from_user(&fd, argp, sizeof(fd)))
 531		return  -EFAULT;
 532
 533	ctx = fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(fd);
 534	swap(ctx, v->config_ctx);
 535
 536	if (!IS_ERR_OR_NULL(ctx))
 537		eventfd_ctx_put(ctx);
 538
 539	if (IS_ERR(v->config_ctx)) {
 540		long ret = PTR_ERR(v->config_ctx);
 541
 542		v->config_ctx = NULL;
 543		return ret;
 544	}
 545
 546	v->vdpa->config->set_config_cb(v->vdpa, &cb);
 547
 548	return 0;
 549}
 550
 551static long vhost_vdpa_get_iova_range(struct vhost_vdpa *v, u32 __user *argp)
 552{
 553	struct vhost_vdpa_iova_range range = {
 554		.first = v->range.first,
 555		.last = v->range.last,
 556	};
 557
 558	if (copy_to_user(argp, &range, sizeof(range)))
 559		return -EFAULT;
 560	return 0;
 561}
 562
 563static long vhost_vdpa_get_config_size(struct vhost_vdpa *v, u32 __user *argp)
 564{
 565	struct vdpa_device *vdpa = v->vdpa;
 566	const struct vdpa_config_ops *ops = vdpa->config;
 567	u32 size;
 568
 569	size = ops->get_config_size(vdpa);
 570
 571	if (copy_to_user(argp, &size, sizeof(size)))
 572		return -EFAULT;
 573
 574	return 0;
 575}
 576
 577static long vhost_vdpa_get_vqs_count(struct vhost_vdpa *v, u32 __user *argp)
 578{
 579	struct vdpa_device *vdpa = v->vdpa;
 580
 581	if (copy_to_user(argp, &vdpa->nvqs, sizeof(vdpa->nvqs)))
 582		return -EFAULT;
 583
 584	return 0;
 585}
 586
 587/* After a successful return of ioctl the device must not process more
 588 * virtqueue descriptors. The device can answer to read or writes of config
 589 * fields as if it were not suspended. In particular, writing to "queue_enable"
 590 * with a value of 1 will not make the device start processing buffers.
 591 */
 592static long vhost_vdpa_suspend(struct vhost_vdpa *v)
 593{
 594	struct vdpa_device *vdpa = v->vdpa;
 595	const struct vdpa_config_ops *ops = vdpa->config;
 596	int ret;
 597
 598	if (!(ops->get_status(vdpa) & VIRTIO_CONFIG_S_DRIVER_OK))
 599		return 0;
 600
 601	if (!ops->suspend)
 602		return -EOPNOTSUPP;
 603
 604	ret = ops->suspend(vdpa);
 605	if (!ret)
 606		v->suspended = true;
 607
 608	return ret;
 609}
 610
 611/* After a successful return of this ioctl the device resumes processing
 612 * virtqueue descriptors. The device becomes fully operational the same way it
 613 * was before it was suspended.
 614 */
 615static long vhost_vdpa_resume(struct vhost_vdpa *v)
 616{
 617	struct vdpa_device *vdpa = v->vdpa;
 618	const struct vdpa_config_ops *ops = vdpa->config;
 619	int ret;
 620
 621	if (!(ops->get_status(vdpa) & VIRTIO_CONFIG_S_DRIVER_OK))
 622		return 0;
 623
 624	if (!ops->resume)
 625		return -EOPNOTSUPP;
 626
 627	ret = ops->resume(vdpa);
 628	if (!ret)
 629		v->suspended = false;
 630
 631	return ret;
 632}
 633
 634static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
 635				   void __user *argp)
 636{
 637	struct vdpa_device *vdpa = v->vdpa;
 638	const struct vdpa_config_ops *ops = vdpa->config;
 639	struct vdpa_vq_state vq_state;
 640	struct vdpa_callback cb;
 641	struct vhost_virtqueue *vq;
 642	struct vhost_vring_state s;
 643	u32 idx;
 644	long r;
 645
 646	r = get_user(idx, (u32 __user *)argp);
 647	if (r < 0)
 648		return r;
 649
 650	if (idx >= v->nvqs)
 651		return -ENOBUFS;
 652
 653	idx = array_index_nospec(idx, v->nvqs);
 654	vq = &v->vqs[idx];
 655
 656	switch (cmd) {
 657	case VHOST_VDPA_SET_VRING_ENABLE:
 658		if (copy_from_user(&s, argp, sizeof(s)))
 659			return -EFAULT;
 660		ops->set_vq_ready(vdpa, idx, s.num);
 661		return 0;
 662	case VHOST_VDPA_GET_VRING_GROUP:
 663		if (!ops->get_vq_group)
 664			return -EOPNOTSUPP;
 665		s.index = idx;
 666		s.num = ops->get_vq_group(vdpa, idx);
 667		if (s.num >= vdpa->ngroups)
 668			return -EIO;
 669		else if (copy_to_user(argp, &s, sizeof(s)))
 670			return -EFAULT;
 671		return 0;
 672	case VHOST_VDPA_GET_VRING_DESC_GROUP:
 673		if (!vhost_vdpa_has_desc_group(v))
 674			return -EOPNOTSUPP;
 675		s.index = idx;
 676		s.num = ops->get_vq_desc_group(vdpa, idx);
 677		if (s.num >= vdpa->ngroups)
 678			return -EIO;
 679		else if (copy_to_user(argp, &s, sizeof(s)))
 680			return -EFAULT;
 681		return 0;
 682	case VHOST_VDPA_SET_GROUP_ASID:
 683		if (copy_from_user(&s, argp, sizeof(s)))
 684			return -EFAULT;
 685		if (s.num >= vdpa->nas)
 686			return -EINVAL;
 687		if (!ops->set_group_asid)
 688			return -EOPNOTSUPP;
 689		return ops->set_group_asid(vdpa, idx, s.num);
 690	case VHOST_VDPA_GET_VRING_SIZE:
 691		if (!ops->get_vq_size)
 692			return -EOPNOTSUPP;
 693		s.index = idx;
 694		s.num = ops->get_vq_size(vdpa, idx);
 695		if (copy_to_user(argp, &s, sizeof(s)))
 696			return -EFAULT;
 697		return 0;
 698	case VHOST_GET_VRING_BASE:
 699		r = ops->get_vq_state(v->vdpa, idx, &vq_state);
 700		if (r)
 701			return r;
 702
 703		if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) {
 704			vq->last_avail_idx = vq_state.packed.last_avail_idx |
 705					     (vq_state.packed.last_avail_counter << 15);
 706			vq->last_used_idx = vq_state.packed.last_used_idx |
 707					    (vq_state.packed.last_used_counter << 15);
 708		} else {
 709			vq->last_avail_idx = vq_state.split.avail_index;
 710		}
 711		break;
 712	}
 713
 714	r = vhost_vring_ioctl(&v->vdev, cmd, argp);
 715	if (r)
 716		return r;
 717
 718	switch (cmd) {
 719	case VHOST_SET_VRING_ADDR:
 720		if ((ops->get_status(vdpa) & VIRTIO_CONFIG_S_DRIVER_OK) && !v->suspended)
 721			return -EINVAL;
 722
 723		if (ops->set_vq_address(vdpa, idx,
 724					(u64)(uintptr_t)vq->desc,
 725					(u64)(uintptr_t)vq->avail,
 726					(u64)(uintptr_t)vq->used))
 727			r = -EINVAL;
 728		break;
 729
 730	case VHOST_SET_VRING_BASE:
 731		if ((ops->get_status(vdpa) & VIRTIO_CONFIG_S_DRIVER_OK) && !v->suspended)
 732			return -EINVAL;
 733
 734		if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) {
 735			vq_state.packed.last_avail_idx = vq->last_avail_idx & 0x7fff;
 736			vq_state.packed.last_avail_counter = !!(vq->last_avail_idx & 0x8000);
 737			vq_state.packed.last_used_idx = vq->last_used_idx & 0x7fff;
 738			vq_state.packed.last_used_counter = !!(vq->last_used_idx & 0x8000);
 739		} else {
 740			vq_state.split.avail_index = vq->last_avail_idx;
 741		}
 742		r = ops->set_vq_state(vdpa, idx, &vq_state);
 743		break;
 744
 745	case VHOST_SET_VRING_CALL:
 746		if (vq->call_ctx.ctx) {
 747			cb.callback = vhost_vdpa_virtqueue_cb;
 748			cb.private = vq;
 749			cb.trigger = vq->call_ctx.ctx;
 750		} else {
 751			cb.callback = NULL;
 752			cb.private = NULL;
 753			cb.trigger = NULL;
 754		}
 755		ops->set_vq_cb(vdpa, idx, &cb);
 756		vhost_vdpa_setup_vq_irq(v, idx);
 757		break;
 758
 759	case VHOST_SET_VRING_NUM:
 760		ops->set_vq_num(vdpa, idx, vq->num);
 761		break;
 762	}
 763
 764	return r;
 765}
 766
 767static long vhost_vdpa_unlocked_ioctl(struct file *filep,
 768				      unsigned int cmd, unsigned long arg)
 769{
 770	struct vhost_vdpa *v = filep->private_data;
 771	struct vhost_dev *d = &v->vdev;
 772	void __user *argp = (void __user *)arg;
 773	u64 __user *featurep = argp;
 774	u64 features;
 775	long r = 0;
 776
 777	if (cmd == VHOST_SET_BACKEND_FEATURES) {
 778		if (copy_from_user(&features, featurep, sizeof(features)))
 779			return -EFAULT;
 780		if (features & ~(VHOST_VDPA_BACKEND_FEATURES |
 781				 BIT_ULL(VHOST_BACKEND_F_DESC_ASID) |
 782				 BIT_ULL(VHOST_BACKEND_F_IOTLB_PERSIST) |
 783				 BIT_ULL(VHOST_BACKEND_F_SUSPEND) |
 784				 BIT_ULL(VHOST_BACKEND_F_RESUME) |
 785				 BIT_ULL(VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK)))
 786			return -EOPNOTSUPP;
 787		if ((features & BIT_ULL(VHOST_BACKEND_F_SUSPEND)) &&
 788		     !vhost_vdpa_can_suspend(v))
 789			return -EOPNOTSUPP;
 790		if ((features & BIT_ULL(VHOST_BACKEND_F_RESUME)) &&
 791		     !vhost_vdpa_can_resume(v))
 792			return -EOPNOTSUPP;
 793		if ((features & BIT_ULL(VHOST_BACKEND_F_DESC_ASID)) &&
 794		    !(features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID)))
 795			return -EINVAL;
 796		if ((features & BIT_ULL(VHOST_BACKEND_F_DESC_ASID)) &&
 797		     !vhost_vdpa_has_desc_group(v))
 798			return -EOPNOTSUPP;
 799		if ((features & BIT_ULL(VHOST_BACKEND_F_IOTLB_PERSIST)) &&
 800		     !vhost_vdpa_has_persistent_map(v))
 801			return -EOPNOTSUPP;
 802		vhost_set_backend_features(&v->vdev, features);
 803		return 0;
 804	}
 805
 806	mutex_lock(&d->mutex);
 807
 808	switch (cmd) {
 809	case VHOST_VDPA_GET_DEVICE_ID:
 810		r = vhost_vdpa_get_device_id(v, argp);
 811		break;
 812	case VHOST_VDPA_GET_STATUS:
 813		r = vhost_vdpa_get_status(v, argp);
 814		break;
 815	case VHOST_VDPA_SET_STATUS:
 816		r = vhost_vdpa_set_status(v, argp);
 817		break;
 818	case VHOST_VDPA_GET_CONFIG:
 819		r = vhost_vdpa_get_config(v, argp);
 820		break;
 821	case VHOST_VDPA_SET_CONFIG:
 822		r = vhost_vdpa_set_config(v, argp);
 823		break;
 824	case VHOST_GET_FEATURES:
 825		r = vhost_vdpa_get_features(v, argp);
 826		break;
 827	case VHOST_SET_FEATURES:
 828		r = vhost_vdpa_set_features(v, argp);
 829		break;
 830	case VHOST_VDPA_GET_VRING_NUM:
 831		r = vhost_vdpa_get_vring_num(v, argp);
 832		break;
 833	case VHOST_VDPA_GET_GROUP_NUM:
 834		if (copy_to_user(argp, &v->vdpa->ngroups,
 835				 sizeof(v->vdpa->ngroups)))
 836			r = -EFAULT;
 837		break;
 838	case VHOST_VDPA_GET_AS_NUM:
 839		if (copy_to_user(argp, &v->vdpa->nas, sizeof(v->vdpa->nas)))
 840			r = -EFAULT;
 841		break;
 842	case VHOST_SET_LOG_BASE:
 843	case VHOST_SET_LOG_FD:
 844		r = -ENOIOCTLCMD;
 845		break;
 846	case VHOST_VDPA_SET_CONFIG_CALL:
 847		r = vhost_vdpa_set_config_call(v, argp);
 848		break;
 849	case VHOST_GET_BACKEND_FEATURES:
 850		features = VHOST_VDPA_BACKEND_FEATURES;
 851		if (vhost_vdpa_can_suspend(v))
 852			features |= BIT_ULL(VHOST_BACKEND_F_SUSPEND);
 853		if (vhost_vdpa_can_resume(v))
 854			features |= BIT_ULL(VHOST_BACKEND_F_RESUME);
 855		if (vhost_vdpa_has_desc_group(v))
 856			features |= BIT_ULL(VHOST_BACKEND_F_DESC_ASID);
 857		if (vhost_vdpa_has_persistent_map(v))
 858			features |= BIT_ULL(VHOST_BACKEND_F_IOTLB_PERSIST);
 859		features |= vhost_vdpa_get_backend_features(v);
 860		if (copy_to_user(featurep, &features, sizeof(features)))
 861			r = -EFAULT;
 862		break;
 863	case VHOST_VDPA_GET_IOVA_RANGE:
 864		r = vhost_vdpa_get_iova_range(v, argp);
 865		break;
 866	case VHOST_VDPA_GET_CONFIG_SIZE:
 867		r = vhost_vdpa_get_config_size(v, argp);
 868		break;
 869	case VHOST_VDPA_GET_VQS_COUNT:
 870		r = vhost_vdpa_get_vqs_count(v, argp);
 871		break;
 872	case VHOST_VDPA_SUSPEND:
 873		r = vhost_vdpa_suspend(v);
 874		break;
 875	case VHOST_VDPA_RESUME:
 876		r = vhost_vdpa_resume(v);
 877		break;
 878	default:
 879		r = vhost_dev_ioctl(&v->vdev, cmd, argp);
 880		if (r == -ENOIOCTLCMD)
 881			r = vhost_vdpa_vring_ioctl(v, cmd, argp);
 882		break;
 883	}
 884
 885	if (r)
 886		goto out;
 887
 888	switch (cmd) {
 889	case VHOST_SET_OWNER:
 890		r = vhost_vdpa_bind_mm(v);
 891		if (r)
 892			vhost_dev_reset_owner(d, NULL);
 893		break;
 894	}
 895out:
 896	mutex_unlock(&d->mutex);
 897	return r;
 898}
 899static void vhost_vdpa_general_unmap(struct vhost_vdpa *v,
 900				     struct vhost_iotlb_map *map, u32 asid)
 901{
 902	struct vdpa_device *vdpa = v->vdpa;
 903	const struct vdpa_config_ops *ops = vdpa->config;
 904	if (ops->dma_map) {
 905		ops->dma_unmap(vdpa, asid, map->start, map->size);
 906	} else if (ops->set_map == NULL) {
 907		iommu_unmap(v->domain, map->start, map->size);
 908	}
 909}
 910
 911static void vhost_vdpa_pa_unmap(struct vhost_vdpa *v, struct vhost_iotlb *iotlb,
 912				u64 start, u64 last, u32 asid)
 913{
 914	struct vhost_dev *dev = &v->vdev;
 915	struct vhost_iotlb_map *map;
 916	struct page *page;
 917	unsigned long pfn, pinned;
 918
 919	while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) {
 920		pinned = PFN_DOWN(map->size);
 921		for (pfn = PFN_DOWN(map->addr);
 922		     pinned > 0; pfn++, pinned--) {
 923			page = pfn_to_page(pfn);
 924			if (map->perm & VHOST_ACCESS_WO)
 925				set_page_dirty_lock(page);
 926			unpin_user_page(page);
 927		}
 928		atomic64_sub(PFN_DOWN(map->size), &dev->mm->pinned_vm);
 929		vhost_vdpa_general_unmap(v, map, asid);
 930		vhost_iotlb_map_free(iotlb, map);
 931	}
 932}
 933
 934static void vhost_vdpa_va_unmap(struct vhost_vdpa *v, struct vhost_iotlb *iotlb,
 935				u64 start, u64 last, u32 asid)
 936{
 937	struct vhost_iotlb_map *map;
 938	struct vdpa_map_file *map_file;
 939
 940	while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) {
 941		map_file = (struct vdpa_map_file *)map->opaque;
 942		fput(map_file->file);
 943		kfree(map_file);
 944		vhost_vdpa_general_unmap(v, map, asid);
 945		vhost_iotlb_map_free(iotlb, map);
 946	}
 947}
 948
 949static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v,
 950				   struct vhost_iotlb *iotlb, u64 start,
 951				   u64 last, u32 asid)
 952{
 953	struct vdpa_device *vdpa = v->vdpa;
 954
 955	if (vdpa->use_va)
 956		return vhost_vdpa_va_unmap(v, iotlb, start, last, asid);
 957
 958	return vhost_vdpa_pa_unmap(v, iotlb, start, last, asid);
 959}
 960
 961static int perm_to_iommu_flags(u32 perm)
 962{
 963	int flags = 0;
 964
 965	switch (perm) {
 966	case VHOST_ACCESS_WO:
 967		flags |= IOMMU_WRITE;
 968		break;
 969	case VHOST_ACCESS_RO:
 970		flags |= IOMMU_READ;
 971		break;
 972	case VHOST_ACCESS_RW:
 973		flags |= (IOMMU_WRITE | IOMMU_READ);
 974		break;
 975	default:
 976		WARN(1, "invalidate vhost IOTLB permission\n");
 977		break;
 978	}
 979
 980	return flags | IOMMU_CACHE;
 981}
 982
 983static int vhost_vdpa_map(struct vhost_vdpa *v, struct vhost_iotlb *iotlb,
 984			  u64 iova, u64 size, u64 pa, u32 perm, void *opaque)
 985{
 986	struct vhost_dev *dev = &v->vdev;
 987	struct vdpa_device *vdpa = v->vdpa;
 988	const struct vdpa_config_ops *ops = vdpa->config;
 989	u32 asid = iotlb_to_asid(iotlb);
 990	int r = 0;
 991
 992	r = vhost_iotlb_add_range_ctx(iotlb, iova, iova + size - 1,
 993				      pa, perm, opaque);
 994	if (r)
 995		return r;
 996
 997	if (ops->dma_map) {
 998		r = ops->dma_map(vdpa, asid, iova, size, pa, perm, opaque);
 999	} else if (ops->set_map) {
1000		if (!v->in_batch)
1001			r = ops->set_map(vdpa, asid, iotlb);
1002	} else {
1003		r = iommu_map(v->domain, iova, pa, size,
1004			      perm_to_iommu_flags(perm),
1005			      GFP_KERNEL_ACCOUNT);
1006	}
1007	if (r) {
1008		vhost_iotlb_del_range(iotlb, iova, iova + size - 1);
1009		return r;
1010	}
1011
1012	if (!vdpa->use_va)
1013		atomic64_add(PFN_DOWN(size), &dev->mm->pinned_vm);
1014
1015	return 0;
1016}
1017
1018static void vhost_vdpa_unmap(struct vhost_vdpa *v,
1019			     struct vhost_iotlb *iotlb,
1020			     u64 iova, u64 size)
1021{
1022	struct vdpa_device *vdpa = v->vdpa;
1023	const struct vdpa_config_ops *ops = vdpa->config;
1024	u32 asid = iotlb_to_asid(iotlb);
1025
1026	vhost_vdpa_iotlb_unmap(v, iotlb, iova, iova + size - 1, asid);
1027
1028	if (ops->set_map) {
1029		if (!v->in_batch)
1030			ops->set_map(vdpa, asid, iotlb);
1031	}
1032
1033}
1034
1035static int vhost_vdpa_va_map(struct vhost_vdpa *v,
1036			     struct vhost_iotlb *iotlb,
1037			     u64 iova, u64 size, u64 uaddr, u32 perm)
1038{
1039	struct vhost_dev *dev = &v->vdev;
1040	u64 offset, map_size, map_iova = iova;
1041	struct vdpa_map_file *map_file;
1042	struct vm_area_struct *vma;
1043	int ret = 0;
1044
1045	mmap_read_lock(dev->mm);
1046
1047	while (size) {
1048		vma = find_vma(dev->mm, uaddr);
1049		if (!vma) {
1050			ret = -EINVAL;
1051			break;
1052		}
1053		map_size = min(size, vma->vm_end - uaddr);
1054		if (!(vma->vm_file && (vma->vm_flags & VM_SHARED) &&
1055			!(vma->vm_flags & (VM_IO | VM_PFNMAP))))
1056			goto next;
1057
1058		map_file = kzalloc(sizeof(*map_file), GFP_KERNEL);
1059		if (!map_file) {
1060			ret = -ENOMEM;
1061			break;
1062		}
1063		offset = (vma->vm_pgoff << PAGE_SHIFT) + uaddr - vma->vm_start;
1064		map_file->offset = offset;
1065		map_file->file = get_file(vma->vm_file);
1066		ret = vhost_vdpa_map(v, iotlb, map_iova, map_size, uaddr,
1067				     perm, map_file);
1068		if (ret) {
1069			fput(map_file->file);
1070			kfree(map_file);
1071			break;
1072		}
1073next:
1074		size -= map_size;
1075		uaddr += map_size;
1076		map_iova += map_size;
1077	}
1078	if (ret)
1079		vhost_vdpa_unmap(v, iotlb, iova, map_iova - iova);
1080
1081	mmap_read_unlock(dev->mm);
1082
1083	return ret;
1084}
1085
1086static int vhost_vdpa_pa_map(struct vhost_vdpa *v,
1087			     struct vhost_iotlb *iotlb,
1088			     u64 iova, u64 size, u64 uaddr, u32 perm)
1089{
1090	struct vhost_dev *dev = &v->vdev;
1091	struct page **page_list;
1092	unsigned long list_size = PAGE_SIZE / sizeof(struct page *);
1093	unsigned int gup_flags = FOLL_LONGTERM;
1094	unsigned long npages, cur_base, map_pfn, last_pfn = 0;
1095	unsigned long lock_limit, sz2pin, nchunks, i;
1096	u64 start = iova;
1097	long pinned;
1098	int ret = 0;
1099
1100	/* Limit the use of memory for bookkeeping */
1101	page_list = (struct page **) __get_free_page(GFP_KERNEL);
1102	if (!page_list)
1103		return -ENOMEM;
1104
1105	if (perm & VHOST_ACCESS_WO)
1106		gup_flags |= FOLL_WRITE;
1107
1108	npages = PFN_UP(size + (iova & ~PAGE_MASK));
1109	if (!npages) {
1110		ret = -EINVAL;
1111		goto free;
1112	}
1113
1114	mmap_read_lock(dev->mm);
1115
1116	lock_limit = PFN_DOWN(rlimit(RLIMIT_MEMLOCK));
1117	if (npages + atomic64_read(&dev->mm->pinned_vm) > lock_limit) {
1118		ret = -ENOMEM;
1119		goto unlock;
1120	}
1121
1122	cur_base = uaddr & PAGE_MASK;
1123	iova &= PAGE_MASK;
1124	nchunks = 0;
1125
1126	while (npages) {
1127		sz2pin = min_t(unsigned long, npages, list_size);
1128		pinned = pin_user_pages(cur_base, sz2pin,
1129					gup_flags, page_list);
1130		if (sz2pin != pinned) {
1131			if (pinned < 0) {
1132				ret = pinned;
1133			} else {
1134				unpin_user_pages(page_list, pinned);
1135				ret = -ENOMEM;
1136			}
1137			goto out;
1138		}
1139		nchunks++;
1140
1141		if (!last_pfn)
1142			map_pfn = page_to_pfn(page_list[0]);
1143
1144		for (i = 0; i < pinned; i++) {
1145			unsigned long this_pfn = page_to_pfn(page_list[i]);
1146			u64 csize;
1147
1148			if (last_pfn && (this_pfn != last_pfn + 1)) {
1149				/* Pin a contiguous chunk of memory */
1150				csize = PFN_PHYS(last_pfn - map_pfn + 1);
1151				ret = vhost_vdpa_map(v, iotlb, iova, csize,
1152						     PFN_PHYS(map_pfn),
1153						     perm, NULL);
1154				if (ret) {
1155					/*
1156					 * Unpin the pages that are left unmapped
1157					 * from this point on in the current
1158					 * page_list. The remaining outstanding
1159					 * ones which may stride across several
1160					 * chunks will be covered in the common
1161					 * error path subsequently.
1162					 */
1163					unpin_user_pages(&page_list[i],
1164							 pinned - i);
1165					goto out;
1166				}
1167
1168				map_pfn = this_pfn;
1169				iova += csize;
1170				nchunks = 0;
1171			}
1172
1173			last_pfn = this_pfn;
1174		}
1175
1176		cur_base += PFN_PHYS(pinned);
1177		npages -= pinned;
1178	}
1179
1180	/* Pin the rest chunk */
1181	ret = vhost_vdpa_map(v, iotlb, iova, PFN_PHYS(last_pfn - map_pfn + 1),
1182			     PFN_PHYS(map_pfn), perm, NULL);
1183out:
1184	if (ret) {
1185		if (nchunks) {
1186			unsigned long pfn;
1187
1188			/*
1189			 * Unpin the outstanding pages which are yet to be
1190			 * mapped but haven't due to vdpa_map() or
1191			 * pin_user_pages() failure.
1192			 *
1193			 * Mapped pages are accounted in vdpa_map(), hence
1194			 * the corresponding unpinning will be handled by
1195			 * vdpa_unmap().
1196			 */
1197			WARN_ON(!last_pfn);
1198			for (pfn = map_pfn; pfn <= last_pfn; pfn++)
1199				unpin_user_page(pfn_to_page(pfn));
1200		}
1201		vhost_vdpa_unmap(v, iotlb, start, size);
1202	}
1203unlock:
1204	mmap_read_unlock(dev->mm);
1205free:
1206	free_page((unsigned long)page_list);
1207	return ret;
1208
1209}
1210
1211static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
1212					   struct vhost_iotlb *iotlb,
1213					   struct vhost_iotlb_msg *msg)
1214{
1215	struct vdpa_device *vdpa = v->vdpa;
1216
1217	if (msg->iova < v->range.first || !msg->size ||
1218	    msg->iova > U64_MAX - msg->size + 1 ||
1219	    msg->iova + msg->size - 1 > v->range.last)
1220		return -EINVAL;
1221
1222	if (vhost_iotlb_itree_first(iotlb, msg->iova,
1223				    msg->iova + msg->size - 1))
1224		return -EEXIST;
1225
1226	if (vdpa->use_va)
1227		return vhost_vdpa_va_map(v, iotlb, msg->iova, msg->size,
1228					 msg->uaddr, msg->perm);
1229
1230	return vhost_vdpa_pa_map(v, iotlb, msg->iova, msg->size, msg->uaddr,
1231				 msg->perm);
1232}
1233
1234static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev, u32 asid,
1235					struct vhost_iotlb_msg *msg)
1236{
1237	struct vhost_vdpa *v = container_of(dev, struct vhost_vdpa, vdev);
1238	struct vdpa_device *vdpa = v->vdpa;
1239	const struct vdpa_config_ops *ops = vdpa->config;
1240	struct vhost_iotlb *iotlb = NULL;
1241	struct vhost_vdpa_as *as = NULL;
1242	int r = 0;
1243
1244	mutex_lock(&dev->mutex);
1245
1246	r = vhost_dev_check_owner(dev);
1247	if (r)
1248		goto unlock;
1249
1250	if (msg->type == VHOST_IOTLB_UPDATE ||
1251	    msg->type == VHOST_IOTLB_BATCH_BEGIN) {
1252		as = vhost_vdpa_find_alloc_as(v, asid);
1253		if (!as) {
1254			dev_err(&v->dev, "can't find and alloc asid %d\n",
1255				asid);
1256			r = -EINVAL;
1257			goto unlock;
1258		}
1259		iotlb = &as->iotlb;
1260	} else
1261		iotlb = asid_to_iotlb(v, asid);
1262
1263	if ((v->in_batch && v->batch_asid != asid) || !iotlb) {
1264		if (v->in_batch && v->batch_asid != asid) {
1265			dev_info(&v->dev, "batch id %d asid %d\n",
1266				 v->batch_asid, asid);
1267		}
1268		if (!iotlb)
1269			dev_err(&v->dev, "no iotlb for asid %d\n", asid);
1270		r = -EINVAL;
1271		goto unlock;
1272	}
1273
1274	switch (msg->type) {
1275	case VHOST_IOTLB_UPDATE:
1276		r = vhost_vdpa_process_iotlb_update(v, iotlb, msg);
1277		break;
1278	case VHOST_IOTLB_INVALIDATE:
1279		vhost_vdpa_unmap(v, iotlb, msg->iova, msg->size);
1280		break;
1281	case VHOST_IOTLB_BATCH_BEGIN:
1282		v->batch_asid = asid;
1283		v->in_batch = true;
1284		break;
1285	case VHOST_IOTLB_BATCH_END:
1286		if (v->in_batch && ops->set_map)
1287			ops->set_map(vdpa, asid, iotlb);
1288		v->in_batch = false;
1289		break;
1290	default:
1291		r = -EINVAL;
1292		break;
1293	}
1294unlock:
1295	mutex_unlock(&dev->mutex);
1296
1297	return r;
1298}
1299
1300static ssize_t vhost_vdpa_chr_write_iter(struct kiocb *iocb,
1301					 struct iov_iter *from)
1302{
1303	struct file *file = iocb->ki_filp;
1304	struct vhost_vdpa *v = file->private_data;
1305	struct vhost_dev *dev = &v->vdev;
1306
1307	return vhost_chr_write_iter(dev, from);
1308}
1309
1310static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v)
1311{
1312	struct vdpa_device *vdpa = v->vdpa;
1313	const struct vdpa_config_ops *ops = vdpa->config;
1314	struct device *dma_dev = vdpa_get_dma_dev(vdpa);
1315	const struct bus_type *bus;
1316	int ret;
1317
1318	/* Device want to do DMA by itself */
1319	if (ops->set_map || ops->dma_map)
1320		return 0;
1321
1322	bus = dma_dev->bus;
1323	if (!bus)
1324		return -EFAULT;
1325
1326	if (!device_iommu_capable(dma_dev, IOMMU_CAP_CACHE_COHERENCY)) {
1327		dev_warn_once(&v->dev,
1328			      "Failed to allocate domain, device is not IOMMU cache coherent capable\n");
1329		return -ENOTSUPP;
1330	}
1331
1332	v->domain = iommu_domain_alloc(bus);
1333	if (!v->domain)
1334		return -EIO;
1335
1336	ret = iommu_attach_device(v->domain, dma_dev);
1337	if (ret)
1338		goto err_attach;
1339
1340	return 0;
1341
1342err_attach:
1343	iommu_domain_free(v->domain);
1344	v->domain = NULL;
1345	return ret;
1346}
1347
1348static void vhost_vdpa_free_domain(struct vhost_vdpa *v)
1349{
1350	struct vdpa_device *vdpa = v->vdpa;
1351	struct device *dma_dev = vdpa_get_dma_dev(vdpa);
1352
1353	if (v->domain) {
1354		iommu_detach_device(v->domain, dma_dev);
1355		iommu_domain_free(v->domain);
1356	}
1357
1358	v->domain = NULL;
1359}
1360
1361static void vhost_vdpa_set_iova_range(struct vhost_vdpa *v)
1362{
1363	struct vdpa_iova_range *range = &v->range;
1364	struct vdpa_device *vdpa = v->vdpa;
1365	const struct vdpa_config_ops *ops = vdpa->config;
1366
1367	if (ops->get_iova_range) {
1368		*range = ops->get_iova_range(vdpa);
1369	} else if (v->domain && v->domain->geometry.force_aperture) {
1370		range->first = v->domain->geometry.aperture_start;
1371		range->last = v->domain->geometry.aperture_end;
1372	} else {
1373		range->first = 0;
1374		range->last = ULLONG_MAX;
1375	}
1376}
1377
1378static void vhost_vdpa_cleanup(struct vhost_vdpa *v)
1379{
1380	struct vhost_vdpa_as *as;
1381	u32 asid;
1382
1383	for (asid = 0; asid < v->vdpa->nas; asid++) {
1384		as = asid_to_as(v, asid);
1385		if (as)
1386			vhost_vdpa_remove_as(v, asid);
1387	}
1388
1389	vhost_vdpa_free_domain(v);
1390	vhost_dev_cleanup(&v->vdev);
1391	kfree(v->vdev.vqs);
1392	v->vdev.vqs = NULL;
1393}
1394
1395static int vhost_vdpa_open(struct inode *inode, struct file *filep)
1396{
1397	struct vhost_vdpa *v;
1398	struct vhost_dev *dev;
1399	struct vhost_virtqueue **vqs;
1400	int r, opened;
1401	u32 i, nvqs;
1402
1403	v = container_of(inode->i_cdev, struct vhost_vdpa, cdev);
1404
1405	opened = atomic_cmpxchg(&v->opened, 0, 1);
1406	if (opened)
1407		return -EBUSY;
1408
1409	nvqs = v->nvqs;
1410	r = vhost_vdpa_reset(v);
1411	if (r)
1412		goto err;
1413
1414	vqs = kmalloc_array(nvqs, sizeof(*vqs), GFP_KERNEL);
1415	if (!vqs) {
1416		r = -ENOMEM;
1417		goto err;
1418	}
1419
1420	dev = &v->vdev;
1421	for (i = 0; i < nvqs; i++) {
1422		vqs[i] = &v->vqs[i];
1423		vqs[i]->handle_kick = handle_vq_kick;
1424	}
1425	vhost_dev_init(dev, vqs, nvqs, 0, 0, 0, false,
1426		       vhost_vdpa_process_iotlb_msg);
1427
1428	r = vhost_vdpa_alloc_domain(v);
1429	if (r)
1430		goto err_alloc_domain;
1431
1432	vhost_vdpa_set_iova_range(v);
1433
1434	filep->private_data = v;
1435
1436	return 0;
1437
1438err_alloc_domain:
1439	vhost_vdpa_cleanup(v);
1440err:
1441	atomic_dec(&v->opened);
1442	return r;
1443}
1444
1445static void vhost_vdpa_clean_irq(struct vhost_vdpa *v)
1446{
1447	u32 i;
1448
1449	for (i = 0; i < v->nvqs; i++)
1450		vhost_vdpa_unsetup_vq_irq(v, i);
1451}
1452
1453static int vhost_vdpa_release(struct inode *inode, struct file *filep)
1454{
1455	struct vhost_vdpa *v = filep->private_data;
1456	struct vhost_dev *d = &v->vdev;
1457
1458	mutex_lock(&d->mutex);
1459	filep->private_data = NULL;
1460	vhost_vdpa_clean_irq(v);
1461	vhost_vdpa_reset(v);
1462	vhost_dev_stop(&v->vdev);
1463	vhost_vdpa_unbind_mm(v);
1464	vhost_vdpa_config_put(v);
1465	vhost_vdpa_cleanup(v);
1466	mutex_unlock(&d->mutex);
1467
1468	atomic_dec(&v->opened);
1469	complete(&v->completion);
1470
1471	return 0;
1472}
1473
1474#ifdef CONFIG_MMU
1475static vm_fault_t vhost_vdpa_fault(struct vm_fault *vmf)
1476{
1477	struct vhost_vdpa *v = vmf->vma->vm_file->private_data;
1478	struct vdpa_device *vdpa = v->vdpa;
1479	const struct vdpa_config_ops *ops = vdpa->config;
1480	struct vdpa_notification_area notify;
1481	struct vm_area_struct *vma = vmf->vma;
1482	u16 index = vma->vm_pgoff;
1483
1484	notify = ops->get_vq_notification(vdpa, index);
1485
1486	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
1487	if (remap_pfn_range(vma, vmf->address & PAGE_MASK,
1488			    PFN_DOWN(notify.addr), PAGE_SIZE,
1489			    vma->vm_page_prot))
1490		return VM_FAULT_SIGBUS;
1491
1492	return VM_FAULT_NOPAGE;
1493}
1494
1495static const struct vm_operations_struct vhost_vdpa_vm_ops = {
1496	.fault = vhost_vdpa_fault,
1497};
1498
1499static int vhost_vdpa_mmap(struct file *file, struct vm_area_struct *vma)
1500{
1501	struct vhost_vdpa *v = vma->vm_file->private_data;
1502	struct vdpa_device *vdpa = v->vdpa;
1503	const struct vdpa_config_ops *ops = vdpa->config;
1504	struct vdpa_notification_area notify;
1505	unsigned long index = vma->vm_pgoff;
1506
1507	if (vma->vm_end - vma->vm_start != PAGE_SIZE)
1508		return -EINVAL;
1509	if ((vma->vm_flags & VM_SHARED) == 0)
1510		return -EINVAL;
1511	if (vma->vm_flags & VM_READ)
1512		return -EINVAL;
1513	if (index > 65535)
1514		return -EINVAL;
1515	if (!ops->get_vq_notification)
1516		return -ENOTSUPP;
1517
1518	/* To be safe and easily modelled by userspace, We only
1519	 * support the doorbell which sits on the page boundary and
1520	 * does not share the page with other registers.
1521	 */
1522	notify = ops->get_vq_notification(vdpa, index);
1523	if (notify.addr & (PAGE_SIZE - 1))
1524		return -EINVAL;
1525	if (vma->vm_end - vma->vm_start != notify.size)
1526		return -ENOTSUPP;
1527
1528	vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP);
1529	vma->vm_ops = &vhost_vdpa_vm_ops;
1530	return 0;
1531}
1532#endif /* CONFIG_MMU */
1533
1534static const struct file_operations vhost_vdpa_fops = {
1535	.owner		= THIS_MODULE,
1536	.open		= vhost_vdpa_open,
1537	.release	= vhost_vdpa_release,
1538	.write_iter	= vhost_vdpa_chr_write_iter,
1539	.unlocked_ioctl	= vhost_vdpa_unlocked_ioctl,
1540#ifdef CONFIG_MMU
1541	.mmap		= vhost_vdpa_mmap,
1542#endif /* CONFIG_MMU */
1543	.compat_ioctl	= compat_ptr_ioctl,
1544};
1545
1546static void vhost_vdpa_release_dev(struct device *device)
1547{
1548	struct vhost_vdpa *v =
1549	       container_of(device, struct vhost_vdpa, dev);
1550
1551	ida_simple_remove(&vhost_vdpa_ida, v->minor);
1552	kfree(v->vqs);
1553	kfree(v);
1554}
1555
1556static int vhost_vdpa_probe(struct vdpa_device *vdpa)
1557{
1558	const struct vdpa_config_ops *ops = vdpa->config;
1559	struct vhost_vdpa *v;
1560	int minor;
1561	int i, r;
1562
1563	/* We can't support platform IOMMU device with more than 1
1564	 * group or as
1565	 */
1566	if (!ops->set_map && !ops->dma_map &&
1567	    (vdpa->ngroups > 1 || vdpa->nas > 1))
1568		return -EOPNOTSUPP;
1569
1570	v = kzalloc(sizeof(*v), GFP_KERNEL | __GFP_RETRY_MAYFAIL);
1571	if (!v)
1572		return -ENOMEM;
1573
1574	minor = ida_simple_get(&vhost_vdpa_ida, 0,
1575			       VHOST_VDPA_DEV_MAX, GFP_KERNEL);
1576	if (minor < 0) {
1577		kfree(v);
1578		return minor;
1579	}
1580
1581	atomic_set(&v->opened, 0);
1582	v->minor = minor;
1583	v->vdpa = vdpa;
1584	v->nvqs = vdpa->nvqs;
1585	v->virtio_id = ops->get_device_id(vdpa);
1586
1587	device_initialize(&v->dev);
1588	v->dev.release = vhost_vdpa_release_dev;
1589	v->dev.parent = &vdpa->dev;
1590	v->dev.devt = MKDEV(MAJOR(vhost_vdpa_major), minor);
1591	v->vqs = kmalloc_array(v->nvqs, sizeof(struct vhost_virtqueue),
1592			       GFP_KERNEL);
1593	if (!v->vqs) {
1594		r = -ENOMEM;
1595		goto err;
1596	}
1597
1598	r = dev_set_name(&v->dev, "vhost-vdpa-%u", minor);
1599	if (r)
1600		goto err;
1601
1602	cdev_init(&v->cdev, &vhost_vdpa_fops);
1603	v->cdev.owner = THIS_MODULE;
1604
1605	r = cdev_device_add(&v->cdev, &v->dev);
1606	if (r)
1607		goto err;
1608
1609	init_completion(&v->completion);
1610	vdpa_set_drvdata(vdpa, v);
1611
1612	for (i = 0; i < VHOST_VDPA_IOTLB_BUCKETS; i++)
1613		INIT_HLIST_HEAD(&v->as[i]);
1614
1615	return 0;
1616
1617err:
1618	put_device(&v->dev);
1619	return r;
1620}
1621
1622static void vhost_vdpa_remove(struct vdpa_device *vdpa)
1623{
1624	struct vhost_vdpa *v = vdpa_get_drvdata(vdpa);
1625	int opened;
1626
1627	cdev_device_del(&v->cdev, &v->dev);
1628
1629	do {
1630		opened = atomic_cmpxchg(&v->opened, 0, 1);
1631		if (!opened)
1632			break;
1633		wait_for_completion(&v->completion);
1634	} while (1);
1635
1636	put_device(&v->dev);
1637}
1638
1639static struct vdpa_driver vhost_vdpa_driver = {
1640	.driver = {
1641		.name	= "vhost_vdpa",
1642	},
1643	.probe	= vhost_vdpa_probe,
1644	.remove	= vhost_vdpa_remove,
1645};
1646
1647static int __init vhost_vdpa_init(void)
1648{
1649	int r;
1650
1651	r = alloc_chrdev_region(&vhost_vdpa_major, 0, VHOST_VDPA_DEV_MAX,
1652				"vhost-vdpa");
1653	if (r)
1654		goto err_alloc_chrdev;
1655
1656	r = vdpa_register_driver(&vhost_vdpa_driver);
1657	if (r)
1658		goto err_vdpa_register_driver;
1659
1660	return 0;
1661
1662err_vdpa_register_driver:
1663	unregister_chrdev_region(vhost_vdpa_major, VHOST_VDPA_DEV_MAX);
1664err_alloc_chrdev:
1665	return r;
1666}
1667module_init(vhost_vdpa_init);
1668
1669static void __exit vhost_vdpa_exit(void)
1670{
1671	vdpa_unregister_driver(&vhost_vdpa_driver);
1672	unregister_chrdev_region(vhost_vdpa_major, VHOST_VDPA_DEV_MAX);
1673}
1674module_exit(vhost_vdpa_exit);
1675
1676MODULE_VERSION("0.0.1");
1677MODULE_LICENSE("GPL v2");
1678MODULE_AUTHOR("Intel Corporation");
1679MODULE_DESCRIPTION("vDPA-based vhost backend for virtio");
1