Linux Audio

Check our new training course

Yocto distribution development and maintenance

Need a Yocto distribution for your embedded project?
Loading...
v6.13.7
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Copyright (C) 2018-2020 Intel Corporation.
   4 * Copyright (C) 2020 Red Hat, Inc.
   5 *
   6 * Author: Tiwei Bie <tiwei.bie@intel.com>
   7 *         Jason Wang <jasowang@redhat.com>
   8 *
   9 * Thanks Michael S. Tsirkin for the valuable comments and
  10 * suggestions.  And thanks to Cunming Liang and Zhihong Wang for all
  11 * their supports.
  12 */
  13
  14#include <linux/kernel.h>
  15#include <linux/module.h>
  16#include <linux/cdev.h>
  17#include <linux/device.h>
  18#include <linux/mm.h>
  19#include <linux/slab.h>
  20#include <linux/iommu.h>
  21#include <linux/uuid.h>
  22#include <linux/vdpa.h>
  23#include <linux/nospec.h>
  24#include <linux/vhost.h>
  25
  26#include "vhost.h"
  27
  28enum {
  29	VHOST_VDPA_BACKEND_FEATURES =
  30	(1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2) |
  31	(1ULL << VHOST_BACKEND_F_IOTLB_BATCH) |
  32	(1ULL << VHOST_BACKEND_F_IOTLB_ASID),
  33};
  34
  35#define VHOST_VDPA_DEV_MAX (1U << MINORBITS)
  36
  37#define VHOST_VDPA_IOTLB_BUCKETS 16
  38
  39struct vhost_vdpa_as {
  40	struct hlist_node hash_link;
  41	struct vhost_iotlb iotlb;
  42	u32 id;
  43};
  44
  45struct vhost_vdpa {
  46	struct vhost_dev vdev;
  47	struct iommu_domain *domain;
  48	struct vhost_virtqueue *vqs;
  49	struct completion completion;
  50	struct vdpa_device *vdpa;
  51	struct hlist_head as[VHOST_VDPA_IOTLB_BUCKETS];
  52	struct device dev;
  53	struct cdev cdev;
  54	atomic_t opened;
  55	u32 nvqs;
  56	int virtio_id;
  57	int minor;
  58	struct eventfd_ctx *config_ctx;
  59	int in_batch;
  60	struct vdpa_iova_range range;
  61	u32 batch_asid;
  62	bool suspended;
  63};
  64
  65static DEFINE_IDA(vhost_vdpa_ida);
  66
  67static dev_t vhost_vdpa_major;
  68
  69static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v,
  70				   struct vhost_iotlb *iotlb, u64 start,
  71				   u64 last, u32 asid);
  72
  73static inline u32 iotlb_to_asid(struct vhost_iotlb *iotlb)
  74{
  75	struct vhost_vdpa_as *as = container_of(iotlb, struct
  76						vhost_vdpa_as, iotlb);
  77	return as->id;
  78}
  79
  80static struct vhost_vdpa_as *asid_to_as(struct vhost_vdpa *v, u32 asid)
  81{
  82	struct hlist_head *head = &v->as[asid % VHOST_VDPA_IOTLB_BUCKETS];
  83	struct vhost_vdpa_as *as;
  84
  85	hlist_for_each_entry(as, head, hash_link)
  86		if (as->id == asid)
  87			return as;
  88
  89	return NULL;
  90}
  91
  92static struct vhost_iotlb *asid_to_iotlb(struct vhost_vdpa *v, u32 asid)
  93{
  94	struct vhost_vdpa_as *as = asid_to_as(v, asid);
  95
  96	if (!as)
  97		return NULL;
  98
  99	return &as->iotlb;
 100}
 101
 102static struct vhost_vdpa_as *vhost_vdpa_alloc_as(struct vhost_vdpa *v, u32 asid)
 103{
 104	struct hlist_head *head = &v->as[asid % VHOST_VDPA_IOTLB_BUCKETS];
 105	struct vhost_vdpa_as *as;
 106
 107	if (asid_to_as(v, asid))
 108		return NULL;
 109
 110	if (asid >= v->vdpa->nas)
 111		return NULL;
 112
 113	as = kmalloc(sizeof(*as), GFP_KERNEL);
 114	if (!as)
 115		return NULL;
 116
 117	vhost_iotlb_init(&as->iotlb, 0, 0);
 118	as->id = asid;
 119	hlist_add_head(&as->hash_link, head);
 120
 121	return as;
 122}
 123
 124static struct vhost_vdpa_as *vhost_vdpa_find_alloc_as(struct vhost_vdpa *v,
 125						      u32 asid)
 126{
 127	struct vhost_vdpa_as *as = asid_to_as(v, asid);
 128
 129	if (as)
 130		return as;
 131
 132	return vhost_vdpa_alloc_as(v, asid);
 133}
 134
 135static void vhost_vdpa_reset_map(struct vhost_vdpa *v, u32 asid)
 136{
 137	struct vdpa_device *vdpa = v->vdpa;
 138	const struct vdpa_config_ops *ops = vdpa->config;
 139
 140	if (ops->reset_map)
 141		ops->reset_map(vdpa, asid);
 142}
 143
 144static int vhost_vdpa_remove_as(struct vhost_vdpa *v, u32 asid)
 145{
 146	struct vhost_vdpa_as *as = asid_to_as(v, asid);
 147
 148	if (!as)
 149		return -EINVAL;
 150
 151	hlist_del(&as->hash_link);
 152	vhost_vdpa_iotlb_unmap(v, &as->iotlb, 0ULL, 0ULL - 1, asid);
 153	/*
 154	 * Devices with vendor specific IOMMU may need to restore
 155	 * iotlb to the initial or default state, which cannot be
 156	 * cleaned up in the all range unmap call above. Give them
 157	 * a chance to clean up or reset the map to the desired
 158	 * state.
 159	 */
 160	vhost_vdpa_reset_map(v, asid);
 161	kfree(as);
 162
 163	return 0;
 164}
 165
 166static void handle_vq_kick(struct vhost_work *work)
 167{
 168	struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
 169						  poll.work);
 170	struct vhost_vdpa *v = container_of(vq->dev, struct vhost_vdpa, vdev);
 171	const struct vdpa_config_ops *ops = v->vdpa->config;
 172
 173	ops->kick_vq(v->vdpa, vq - v->vqs);
 174}
 175
 176static irqreturn_t vhost_vdpa_virtqueue_cb(void *private)
 177{
 178	struct vhost_virtqueue *vq = private;
 179	struct eventfd_ctx *call_ctx = vq->call_ctx.ctx;
 180
 181	if (call_ctx)
 182		eventfd_signal(call_ctx);
 183
 184	return IRQ_HANDLED;
 185}
 186
 187static irqreturn_t vhost_vdpa_config_cb(void *private)
 188{
 189	struct vhost_vdpa *v = private;
 190	struct eventfd_ctx *config_ctx = v->config_ctx;
 191
 192	if (config_ctx)
 193		eventfd_signal(config_ctx);
 194
 195	return IRQ_HANDLED;
 196}
 197
 198static void vhost_vdpa_setup_vq_irq(struct vhost_vdpa *v, u16 qid)
 199{
 200	struct vhost_virtqueue *vq = &v->vqs[qid];
 201	const struct vdpa_config_ops *ops = v->vdpa->config;
 202	struct vdpa_device *vdpa = v->vdpa;
 203	int ret, irq;
 204
 205	if (!ops->get_vq_irq)
 206		return;
 207
 208	irq = ops->get_vq_irq(vdpa, qid);
 209	if (irq < 0)
 210		return;
 211
 
 212	if (!vq->call_ctx.ctx)
 213		return;
 214
 
 215	vq->call_ctx.producer.irq = irq;
 216	ret = irq_bypass_register_producer(&vq->call_ctx.producer);
 217	if (unlikely(ret))
 218		dev_info(&v->dev, "vq %u, irq bypass producer (token %p) registration fails, ret =  %d\n",
 219			 qid, vq->call_ctx.producer.token, ret);
 220}
 221
 222static void vhost_vdpa_unsetup_vq_irq(struct vhost_vdpa *v, u16 qid)
 223{
 224	struct vhost_virtqueue *vq = &v->vqs[qid];
 225
 226	irq_bypass_unregister_producer(&vq->call_ctx.producer);
 227}
 228
 229static int _compat_vdpa_reset(struct vhost_vdpa *v)
 230{
 231	struct vdpa_device *vdpa = v->vdpa;
 232	u32 flags = 0;
 233
 234	v->suspended = false;
 235
 236	if (v->vdev.vqs) {
 237		flags |= !vhost_backend_has_feature(v->vdev.vqs[0],
 238						    VHOST_BACKEND_F_IOTLB_PERSIST) ?
 239			 VDPA_RESET_F_CLEAN_MAP : 0;
 240	}
 241
 242	return vdpa_reset(vdpa, flags);
 243}
 244
 245static int vhost_vdpa_reset(struct vhost_vdpa *v)
 246{
 247	v->in_batch = 0;
 248	return _compat_vdpa_reset(v);
 249}
 250
 251static long vhost_vdpa_bind_mm(struct vhost_vdpa *v)
 252{
 253	struct vdpa_device *vdpa = v->vdpa;
 254	const struct vdpa_config_ops *ops = vdpa->config;
 255
 256	if (!vdpa->use_va || !ops->bind_mm)
 257		return 0;
 258
 259	return ops->bind_mm(vdpa, v->vdev.mm);
 260}
 261
 262static void vhost_vdpa_unbind_mm(struct vhost_vdpa *v)
 263{
 264	struct vdpa_device *vdpa = v->vdpa;
 265	const struct vdpa_config_ops *ops = vdpa->config;
 266
 267	if (!vdpa->use_va || !ops->unbind_mm)
 268		return;
 269
 270	ops->unbind_mm(vdpa);
 271}
 272
 273static long vhost_vdpa_get_device_id(struct vhost_vdpa *v, u8 __user *argp)
 274{
 275	struct vdpa_device *vdpa = v->vdpa;
 276	const struct vdpa_config_ops *ops = vdpa->config;
 277	u32 device_id;
 278
 279	device_id = ops->get_device_id(vdpa);
 280
 281	if (copy_to_user(argp, &device_id, sizeof(device_id)))
 282		return -EFAULT;
 283
 284	return 0;
 285}
 286
 287static long vhost_vdpa_get_status(struct vhost_vdpa *v, u8 __user *statusp)
 288{
 289	struct vdpa_device *vdpa = v->vdpa;
 290	const struct vdpa_config_ops *ops = vdpa->config;
 291	u8 status;
 292
 293	status = ops->get_status(vdpa);
 294
 295	if (copy_to_user(statusp, &status, sizeof(status)))
 296		return -EFAULT;
 297
 298	return 0;
 299}
 300
 301static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp)
 302{
 303	struct vdpa_device *vdpa = v->vdpa;
 304	const struct vdpa_config_ops *ops = vdpa->config;
 305	u8 status, status_old;
 306	u32 nvqs = v->nvqs;
 307	int ret;
 308	u16 i;
 309
 310	if (copy_from_user(&status, statusp, sizeof(status)))
 311		return -EFAULT;
 312
 313	status_old = ops->get_status(vdpa);
 314
 315	/*
 316	 * Userspace shouldn't remove status bits unless reset the
 317	 * status to 0.
 318	 */
 319	if (status != 0 && (status_old & ~status) != 0)
 320		return -EINVAL;
 321
 322	if ((status_old & VIRTIO_CONFIG_S_DRIVER_OK) && !(status & VIRTIO_CONFIG_S_DRIVER_OK))
 323		for (i = 0; i < nvqs; i++)
 324			vhost_vdpa_unsetup_vq_irq(v, i);
 325
 326	if (status == 0) {
 327		ret = _compat_vdpa_reset(v);
 328		if (ret)
 329			return ret;
 330	} else
 331		vdpa_set_status(vdpa, status);
 332
 333	if ((status & VIRTIO_CONFIG_S_DRIVER_OK) && !(status_old & VIRTIO_CONFIG_S_DRIVER_OK))
 334		for (i = 0; i < nvqs; i++)
 335			vhost_vdpa_setup_vq_irq(v, i);
 336
 337	return 0;
 338}
 339
 340static int vhost_vdpa_config_validate(struct vhost_vdpa *v,
 341				      struct vhost_vdpa_config *c)
 342{
 343	struct vdpa_device *vdpa = v->vdpa;
 344	size_t size = vdpa->config->get_config_size(vdpa);
 345
 346	if (c->len == 0 || c->off > size)
 347		return -EINVAL;
 348
 349	if (c->len > size - c->off)
 350		return -E2BIG;
 351
 352	return 0;
 353}
 354
 355static long vhost_vdpa_get_config(struct vhost_vdpa *v,
 356				  struct vhost_vdpa_config __user *c)
 357{
 358	struct vdpa_device *vdpa = v->vdpa;
 359	struct vhost_vdpa_config config;
 360	unsigned long size = offsetof(struct vhost_vdpa_config, buf);
 361	u8 *buf;
 362
 363	if (copy_from_user(&config, c, size))
 364		return -EFAULT;
 365	if (vhost_vdpa_config_validate(v, &config))
 366		return -EINVAL;
 367	buf = kvzalloc(config.len, GFP_KERNEL);
 368	if (!buf)
 369		return -ENOMEM;
 370
 371	vdpa_get_config(vdpa, config.off, buf, config.len);
 372
 373	if (copy_to_user(c->buf, buf, config.len)) {
 374		kvfree(buf);
 375		return -EFAULT;
 376	}
 377
 378	kvfree(buf);
 379	return 0;
 380}
 381
 382static long vhost_vdpa_set_config(struct vhost_vdpa *v,
 383				  struct vhost_vdpa_config __user *c)
 384{
 385	struct vdpa_device *vdpa = v->vdpa;
 386	struct vhost_vdpa_config config;
 387	unsigned long size = offsetof(struct vhost_vdpa_config, buf);
 388	u8 *buf;
 389
 390	if (copy_from_user(&config, c, size))
 391		return -EFAULT;
 392	if (vhost_vdpa_config_validate(v, &config))
 393		return -EINVAL;
 394
 395	buf = vmemdup_user(c->buf, config.len);
 396	if (IS_ERR(buf))
 397		return PTR_ERR(buf);
 398
 399	vdpa_set_config(vdpa, config.off, buf, config.len);
 400
 401	kvfree(buf);
 402	return 0;
 403}
 404
 405static bool vhost_vdpa_can_suspend(const struct vhost_vdpa *v)
 406{
 407	struct vdpa_device *vdpa = v->vdpa;
 408	const struct vdpa_config_ops *ops = vdpa->config;
 409
 410	return ops->suspend;
 411}
 412
 413static bool vhost_vdpa_can_resume(const struct vhost_vdpa *v)
 414{
 415	struct vdpa_device *vdpa = v->vdpa;
 416	const struct vdpa_config_ops *ops = vdpa->config;
 417
 418	return ops->resume;
 419}
 420
 421static bool vhost_vdpa_has_desc_group(const struct vhost_vdpa *v)
 422{
 423	struct vdpa_device *vdpa = v->vdpa;
 424	const struct vdpa_config_ops *ops = vdpa->config;
 425
 426	return ops->get_vq_desc_group;
 427}
 428
 429static long vhost_vdpa_get_features(struct vhost_vdpa *v, u64 __user *featurep)
 430{
 431	struct vdpa_device *vdpa = v->vdpa;
 432	const struct vdpa_config_ops *ops = vdpa->config;
 433	u64 features;
 434
 435	features = ops->get_device_features(vdpa);
 436
 437	if (copy_to_user(featurep, &features, sizeof(features)))
 438		return -EFAULT;
 439
 440	return 0;
 441}
 442
 443static u64 vhost_vdpa_get_backend_features(const struct vhost_vdpa *v)
 444{
 445	struct vdpa_device *vdpa = v->vdpa;
 446	const struct vdpa_config_ops *ops = vdpa->config;
 447
 448	if (!ops->get_backend_features)
 449		return 0;
 450	else
 451		return ops->get_backend_features(vdpa);
 452}
 453
 454static bool vhost_vdpa_has_persistent_map(const struct vhost_vdpa *v)
 455{
 456	struct vdpa_device *vdpa = v->vdpa;
 457	const struct vdpa_config_ops *ops = vdpa->config;
 458
 459	return (!ops->set_map && !ops->dma_map) || ops->reset_map ||
 460	       vhost_vdpa_get_backend_features(v) & BIT_ULL(VHOST_BACKEND_F_IOTLB_PERSIST);
 461}
 462
 463static long vhost_vdpa_set_features(struct vhost_vdpa *v, u64 __user *featurep)
 464{
 465	struct vdpa_device *vdpa = v->vdpa;
 466	const struct vdpa_config_ops *ops = vdpa->config;
 467	struct vhost_dev *d = &v->vdev;
 468	u64 actual_features;
 469	u64 features;
 470	int i;
 471
 472	/*
 473	 * It's not allowed to change the features after they have
 474	 * been negotiated.
 475	 */
 476	if (ops->get_status(vdpa) & VIRTIO_CONFIG_S_FEATURES_OK)
 477		return -EBUSY;
 478
 479	if (copy_from_user(&features, featurep, sizeof(features)))
 480		return -EFAULT;
 481
 482	if (vdpa_set_features(vdpa, features))
 483		return -EINVAL;
 484
 485	/* let the vqs know what has been configured */
 486	actual_features = ops->get_driver_features(vdpa);
 487	for (i = 0; i < d->nvqs; ++i) {
 488		struct vhost_virtqueue *vq = d->vqs[i];
 489
 490		mutex_lock(&vq->mutex);
 491		vq->acked_features = actual_features;
 492		mutex_unlock(&vq->mutex);
 493	}
 494
 495	return 0;
 496}
 497
 498static long vhost_vdpa_get_vring_num(struct vhost_vdpa *v, u16 __user *argp)
 499{
 500	struct vdpa_device *vdpa = v->vdpa;
 501	const struct vdpa_config_ops *ops = vdpa->config;
 502	u16 num;
 503
 504	num = ops->get_vq_num_max(vdpa);
 505
 506	if (copy_to_user(argp, &num, sizeof(num)))
 507		return -EFAULT;
 508
 509	return 0;
 510}
 511
 512static void vhost_vdpa_config_put(struct vhost_vdpa *v)
 513{
 514	if (v->config_ctx) {
 515		eventfd_ctx_put(v->config_ctx);
 516		v->config_ctx = NULL;
 517	}
 518}
 519
 520static long vhost_vdpa_set_config_call(struct vhost_vdpa *v, u32 __user *argp)
 521{
 522	struct vdpa_callback cb;
 523	int fd;
 524	struct eventfd_ctx *ctx;
 525
 526	cb.callback = vhost_vdpa_config_cb;
 527	cb.private = v;
 528	if (copy_from_user(&fd, argp, sizeof(fd)))
 529		return  -EFAULT;
 530
 531	ctx = fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(fd);
 532	swap(ctx, v->config_ctx);
 533
 534	if (!IS_ERR_OR_NULL(ctx))
 535		eventfd_ctx_put(ctx);
 536
 537	if (IS_ERR(v->config_ctx)) {
 538		long ret = PTR_ERR(v->config_ctx);
 539
 540		v->config_ctx = NULL;
 541		return ret;
 542	}
 543
 544	v->vdpa->config->set_config_cb(v->vdpa, &cb);
 545
 546	return 0;
 547}
 548
 549static long vhost_vdpa_get_iova_range(struct vhost_vdpa *v, u32 __user *argp)
 550{
 551	struct vhost_vdpa_iova_range range = {
 552		.first = v->range.first,
 553		.last = v->range.last,
 554	};
 555
 556	if (copy_to_user(argp, &range, sizeof(range)))
 557		return -EFAULT;
 558	return 0;
 559}
 560
 561static long vhost_vdpa_get_config_size(struct vhost_vdpa *v, u32 __user *argp)
 562{
 563	struct vdpa_device *vdpa = v->vdpa;
 564	const struct vdpa_config_ops *ops = vdpa->config;
 565	u32 size;
 566
 567	size = ops->get_config_size(vdpa);
 568
 569	if (copy_to_user(argp, &size, sizeof(size)))
 570		return -EFAULT;
 571
 572	return 0;
 573}
 574
 575static long vhost_vdpa_get_vqs_count(struct vhost_vdpa *v, u32 __user *argp)
 576{
 577	struct vdpa_device *vdpa = v->vdpa;
 578
 579	if (copy_to_user(argp, &vdpa->nvqs, sizeof(vdpa->nvqs)))
 580		return -EFAULT;
 581
 582	return 0;
 583}
 584
 585/* After a successful return of ioctl the device must not process more
 586 * virtqueue descriptors. The device can answer to read or writes of config
 587 * fields as if it were not suspended. In particular, writing to "queue_enable"
 588 * with a value of 1 will not make the device start processing buffers.
 589 */
 590static long vhost_vdpa_suspend(struct vhost_vdpa *v)
 591{
 592	struct vdpa_device *vdpa = v->vdpa;
 593	const struct vdpa_config_ops *ops = vdpa->config;
 594	int ret;
 595
 596	if (!(ops->get_status(vdpa) & VIRTIO_CONFIG_S_DRIVER_OK))
 597		return 0;
 598
 599	if (!ops->suspend)
 600		return -EOPNOTSUPP;
 601
 602	ret = ops->suspend(vdpa);
 603	if (!ret)
 604		v->suspended = true;
 605
 606	return ret;
 607}
 608
 609/* After a successful return of this ioctl the device resumes processing
 610 * virtqueue descriptors. The device becomes fully operational the same way it
 611 * was before it was suspended.
 612 */
 613static long vhost_vdpa_resume(struct vhost_vdpa *v)
 614{
 615	struct vdpa_device *vdpa = v->vdpa;
 616	const struct vdpa_config_ops *ops = vdpa->config;
 617	int ret;
 618
 619	if (!(ops->get_status(vdpa) & VIRTIO_CONFIG_S_DRIVER_OK))
 620		return 0;
 621
 622	if (!ops->resume)
 623		return -EOPNOTSUPP;
 624
 625	ret = ops->resume(vdpa);
 626	if (!ret)
 627		v->suspended = false;
 628
 629	return ret;
 630}
 631
 632static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
 633				   void __user *argp)
 634{
 635	struct vdpa_device *vdpa = v->vdpa;
 636	const struct vdpa_config_ops *ops = vdpa->config;
 637	struct vdpa_vq_state vq_state;
 638	struct vdpa_callback cb;
 639	struct vhost_virtqueue *vq;
 640	struct vhost_vring_state s;
 641	u32 idx;
 642	long r;
 643
 644	r = get_user(idx, (u32 __user *)argp);
 645	if (r < 0)
 646		return r;
 647
 648	if (idx >= v->nvqs)
 649		return -ENOBUFS;
 650
 651	idx = array_index_nospec(idx, v->nvqs);
 652	vq = &v->vqs[idx];
 653
 654	switch (cmd) {
 655	case VHOST_VDPA_SET_VRING_ENABLE:
 656		if (copy_from_user(&s, argp, sizeof(s)))
 657			return -EFAULT;
 658		ops->set_vq_ready(vdpa, idx, s.num);
 659		return 0;
 660	case VHOST_VDPA_GET_VRING_GROUP:
 661		if (!ops->get_vq_group)
 662			return -EOPNOTSUPP;
 663		s.index = idx;
 664		s.num = ops->get_vq_group(vdpa, idx);
 665		if (s.num >= vdpa->ngroups)
 666			return -EIO;
 667		else if (copy_to_user(argp, &s, sizeof(s)))
 668			return -EFAULT;
 669		return 0;
 670	case VHOST_VDPA_GET_VRING_DESC_GROUP:
 671		if (!vhost_vdpa_has_desc_group(v))
 672			return -EOPNOTSUPP;
 673		s.index = idx;
 674		s.num = ops->get_vq_desc_group(vdpa, idx);
 675		if (s.num >= vdpa->ngroups)
 676			return -EIO;
 677		else if (copy_to_user(argp, &s, sizeof(s)))
 678			return -EFAULT;
 679		return 0;
 680	case VHOST_VDPA_SET_GROUP_ASID:
 681		if (copy_from_user(&s, argp, sizeof(s)))
 682			return -EFAULT;
 683		if (s.num >= vdpa->nas)
 684			return -EINVAL;
 685		if (!ops->set_group_asid)
 686			return -EOPNOTSUPP;
 687		return ops->set_group_asid(vdpa, idx, s.num);
 688	case VHOST_VDPA_GET_VRING_SIZE:
 689		if (!ops->get_vq_size)
 690			return -EOPNOTSUPP;
 691		s.index = idx;
 692		s.num = ops->get_vq_size(vdpa, idx);
 693		if (copy_to_user(argp, &s, sizeof(s)))
 694			return -EFAULT;
 695		return 0;
 696	case VHOST_GET_VRING_BASE:
 697		r = ops->get_vq_state(v->vdpa, idx, &vq_state);
 698		if (r)
 699			return r;
 700
 701		if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) {
 702			vq->last_avail_idx = vq_state.packed.last_avail_idx |
 703					     (vq_state.packed.last_avail_counter << 15);
 704			vq->last_used_idx = vq_state.packed.last_used_idx |
 705					    (vq_state.packed.last_used_counter << 15);
 706		} else {
 707			vq->last_avail_idx = vq_state.split.avail_index;
 708		}
 709		break;
 710	case VHOST_SET_VRING_CALL:
 711		if (vq->call_ctx.ctx) {
 712			if (ops->get_status(vdpa) &
 713			    VIRTIO_CONFIG_S_DRIVER_OK)
 714				vhost_vdpa_unsetup_vq_irq(v, idx);
 715			vq->call_ctx.producer.token = NULL;
 716		}
 717		break;
 718	}
 719
 720	r = vhost_vring_ioctl(&v->vdev, cmd, argp);
 721	if (r)
 722		return r;
 723
 724	switch (cmd) {
 725	case VHOST_SET_VRING_ADDR:
 726		if ((ops->get_status(vdpa) & VIRTIO_CONFIG_S_DRIVER_OK) && !v->suspended)
 727			return -EINVAL;
 728
 729		if (ops->set_vq_address(vdpa, idx,
 730					(u64)(uintptr_t)vq->desc,
 731					(u64)(uintptr_t)vq->avail,
 732					(u64)(uintptr_t)vq->used))
 733			r = -EINVAL;
 734		break;
 735
 736	case VHOST_SET_VRING_BASE:
 737		if ((ops->get_status(vdpa) & VIRTIO_CONFIG_S_DRIVER_OK) && !v->suspended)
 738			return -EINVAL;
 739
 740		if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) {
 741			vq_state.packed.last_avail_idx = vq->last_avail_idx & 0x7fff;
 742			vq_state.packed.last_avail_counter = !!(vq->last_avail_idx & 0x8000);
 743			vq_state.packed.last_used_idx = vq->last_used_idx & 0x7fff;
 744			vq_state.packed.last_used_counter = !!(vq->last_used_idx & 0x8000);
 745		} else {
 746			vq_state.split.avail_index = vq->last_avail_idx;
 747		}
 748		r = ops->set_vq_state(vdpa, idx, &vq_state);
 749		break;
 750
 751	case VHOST_SET_VRING_CALL:
 752		if (vq->call_ctx.ctx) {
 753			cb.callback = vhost_vdpa_virtqueue_cb;
 754			cb.private = vq;
 755			cb.trigger = vq->call_ctx.ctx;
 756			vq->call_ctx.producer.token = vq->call_ctx.ctx;
 757			if (ops->get_status(vdpa) &
 758			    VIRTIO_CONFIG_S_DRIVER_OK)
 759				vhost_vdpa_setup_vq_irq(v, idx);
 760		} else {
 761			cb.callback = NULL;
 762			cb.private = NULL;
 763			cb.trigger = NULL;
 764		}
 765		ops->set_vq_cb(vdpa, idx, &cb);
 
 766		break;
 767
 768	case VHOST_SET_VRING_NUM:
 769		ops->set_vq_num(vdpa, idx, vq->num);
 770		break;
 771	}
 772
 773	return r;
 774}
 775
 776static long vhost_vdpa_unlocked_ioctl(struct file *filep,
 777				      unsigned int cmd, unsigned long arg)
 778{
 779	struct vhost_vdpa *v = filep->private_data;
 780	struct vhost_dev *d = &v->vdev;
 781	void __user *argp = (void __user *)arg;
 782	u64 __user *featurep = argp;
 783	u64 features;
 784	long r = 0;
 785
 786	if (cmd == VHOST_SET_BACKEND_FEATURES) {
 787		if (copy_from_user(&features, featurep, sizeof(features)))
 788			return -EFAULT;
 789		if (features & ~(VHOST_VDPA_BACKEND_FEATURES |
 790				 BIT_ULL(VHOST_BACKEND_F_DESC_ASID) |
 791				 BIT_ULL(VHOST_BACKEND_F_IOTLB_PERSIST) |
 792				 BIT_ULL(VHOST_BACKEND_F_SUSPEND) |
 793				 BIT_ULL(VHOST_BACKEND_F_RESUME) |
 794				 BIT_ULL(VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK)))
 795			return -EOPNOTSUPP;
 796		if ((features & BIT_ULL(VHOST_BACKEND_F_SUSPEND)) &&
 797		     !vhost_vdpa_can_suspend(v))
 798			return -EOPNOTSUPP;
 799		if ((features & BIT_ULL(VHOST_BACKEND_F_RESUME)) &&
 800		     !vhost_vdpa_can_resume(v))
 801			return -EOPNOTSUPP;
 802		if ((features & BIT_ULL(VHOST_BACKEND_F_DESC_ASID)) &&
 803		    !(features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID)))
 804			return -EINVAL;
 805		if ((features & BIT_ULL(VHOST_BACKEND_F_DESC_ASID)) &&
 806		     !vhost_vdpa_has_desc_group(v))
 807			return -EOPNOTSUPP;
 808		if ((features & BIT_ULL(VHOST_BACKEND_F_IOTLB_PERSIST)) &&
 809		     !vhost_vdpa_has_persistent_map(v))
 810			return -EOPNOTSUPP;
 811		vhost_set_backend_features(&v->vdev, features);
 812		return 0;
 813	}
 814
 815	mutex_lock(&d->mutex);
 816
 817	switch (cmd) {
 818	case VHOST_VDPA_GET_DEVICE_ID:
 819		r = vhost_vdpa_get_device_id(v, argp);
 820		break;
 821	case VHOST_VDPA_GET_STATUS:
 822		r = vhost_vdpa_get_status(v, argp);
 823		break;
 824	case VHOST_VDPA_SET_STATUS:
 825		r = vhost_vdpa_set_status(v, argp);
 826		break;
 827	case VHOST_VDPA_GET_CONFIG:
 828		r = vhost_vdpa_get_config(v, argp);
 829		break;
 830	case VHOST_VDPA_SET_CONFIG:
 831		r = vhost_vdpa_set_config(v, argp);
 832		break;
 833	case VHOST_GET_FEATURES:
 834		r = vhost_vdpa_get_features(v, argp);
 835		break;
 836	case VHOST_SET_FEATURES:
 837		r = vhost_vdpa_set_features(v, argp);
 838		break;
 839	case VHOST_VDPA_GET_VRING_NUM:
 840		r = vhost_vdpa_get_vring_num(v, argp);
 841		break;
 842	case VHOST_VDPA_GET_GROUP_NUM:
 843		if (copy_to_user(argp, &v->vdpa->ngroups,
 844				 sizeof(v->vdpa->ngroups)))
 845			r = -EFAULT;
 846		break;
 847	case VHOST_VDPA_GET_AS_NUM:
 848		if (copy_to_user(argp, &v->vdpa->nas, sizeof(v->vdpa->nas)))
 849			r = -EFAULT;
 850		break;
 851	case VHOST_SET_LOG_BASE:
 852	case VHOST_SET_LOG_FD:
 853		r = -ENOIOCTLCMD;
 854		break;
 855	case VHOST_VDPA_SET_CONFIG_CALL:
 856		r = vhost_vdpa_set_config_call(v, argp);
 857		break;
 858	case VHOST_GET_BACKEND_FEATURES:
 859		features = VHOST_VDPA_BACKEND_FEATURES;
 860		if (vhost_vdpa_can_suspend(v))
 861			features |= BIT_ULL(VHOST_BACKEND_F_SUSPEND);
 862		if (vhost_vdpa_can_resume(v))
 863			features |= BIT_ULL(VHOST_BACKEND_F_RESUME);
 864		if (vhost_vdpa_has_desc_group(v))
 865			features |= BIT_ULL(VHOST_BACKEND_F_DESC_ASID);
 866		if (vhost_vdpa_has_persistent_map(v))
 867			features |= BIT_ULL(VHOST_BACKEND_F_IOTLB_PERSIST);
 868		features |= vhost_vdpa_get_backend_features(v);
 869		if (copy_to_user(featurep, &features, sizeof(features)))
 870			r = -EFAULT;
 871		break;
 872	case VHOST_VDPA_GET_IOVA_RANGE:
 873		r = vhost_vdpa_get_iova_range(v, argp);
 874		break;
 875	case VHOST_VDPA_GET_CONFIG_SIZE:
 876		r = vhost_vdpa_get_config_size(v, argp);
 877		break;
 878	case VHOST_VDPA_GET_VQS_COUNT:
 879		r = vhost_vdpa_get_vqs_count(v, argp);
 880		break;
 881	case VHOST_VDPA_SUSPEND:
 882		r = vhost_vdpa_suspend(v);
 883		break;
 884	case VHOST_VDPA_RESUME:
 885		r = vhost_vdpa_resume(v);
 886		break;
 887	default:
 888		r = vhost_dev_ioctl(&v->vdev, cmd, argp);
 889		if (r == -ENOIOCTLCMD)
 890			r = vhost_vdpa_vring_ioctl(v, cmd, argp);
 891		break;
 892	}
 893
 894	if (r)
 895		goto out;
 896
 897	switch (cmd) {
 898	case VHOST_SET_OWNER:
 899		r = vhost_vdpa_bind_mm(v);
 900		if (r)
 901			vhost_dev_reset_owner(d, NULL);
 902		break;
 903	}
 904out:
 905	mutex_unlock(&d->mutex);
 906	return r;
 907}
 908static void vhost_vdpa_general_unmap(struct vhost_vdpa *v,
 909				     struct vhost_iotlb_map *map, u32 asid)
 910{
 911	struct vdpa_device *vdpa = v->vdpa;
 912	const struct vdpa_config_ops *ops = vdpa->config;
 913	if (ops->dma_map) {
 914		ops->dma_unmap(vdpa, asid, map->start, map->size);
 915	} else if (ops->set_map == NULL) {
 916		iommu_unmap(v->domain, map->start, map->size);
 917	}
 918}
 919
 920static void vhost_vdpa_pa_unmap(struct vhost_vdpa *v, struct vhost_iotlb *iotlb,
 921				u64 start, u64 last, u32 asid)
 922{
 923	struct vhost_dev *dev = &v->vdev;
 924	struct vhost_iotlb_map *map;
 925	struct page *page;
 926	unsigned long pfn, pinned;
 927
 928	while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) {
 929		pinned = PFN_DOWN(map->size);
 930		for (pfn = PFN_DOWN(map->addr);
 931		     pinned > 0; pfn++, pinned--) {
 932			page = pfn_to_page(pfn);
 933			if (map->perm & VHOST_ACCESS_WO)
 934				set_page_dirty_lock(page);
 935			unpin_user_page(page);
 936		}
 937		atomic64_sub(PFN_DOWN(map->size), &dev->mm->pinned_vm);
 938		vhost_vdpa_general_unmap(v, map, asid);
 939		vhost_iotlb_map_free(iotlb, map);
 940	}
 941}
 942
 943static void vhost_vdpa_va_unmap(struct vhost_vdpa *v, struct vhost_iotlb *iotlb,
 944				u64 start, u64 last, u32 asid)
 945{
 946	struct vhost_iotlb_map *map;
 947	struct vdpa_map_file *map_file;
 948
 949	while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) {
 950		map_file = (struct vdpa_map_file *)map->opaque;
 951		fput(map_file->file);
 952		kfree(map_file);
 953		vhost_vdpa_general_unmap(v, map, asid);
 954		vhost_iotlb_map_free(iotlb, map);
 955	}
 956}
 957
 958static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v,
 959				   struct vhost_iotlb *iotlb, u64 start,
 960				   u64 last, u32 asid)
 961{
 962	struct vdpa_device *vdpa = v->vdpa;
 963
 964	if (vdpa->use_va)
 965		return vhost_vdpa_va_unmap(v, iotlb, start, last, asid);
 966
 967	return vhost_vdpa_pa_unmap(v, iotlb, start, last, asid);
 968}
 969
 970static int perm_to_iommu_flags(u32 perm)
 971{
 972	int flags = 0;
 973
 974	switch (perm) {
 975	case VHOST_ACCESS_WO:
 976		flags |= IOMMU_WRITE;
 977		break;
 978	case VHOST_ACCESS_RO:
 979		flags |= IOMMU_READ;
 980		break;
 981	case VHOST_ACCESS_RW:
 982		flags |= (IOMMU_WRITE | IOMMU_READ);
 983		break;
 984	default:
 985		WARN(1, "invalidate vhost IOTLB permission\n");
 986		break;
 987	}
 988
 989	return flags | IOMMU_CACHE;
 990}
 991
 992static int vhost_vdpa_map(struct vhost_vdpa *v, struct vhost_iotlb *iotlb,
 993			  u64 iova, u64 size, u64 pa, u32 perm, void *opaque)
 994{
 995	struct vhost_dev *dev = &v->vdev;
 996	struct vdpa_device *vdpa = v->vdpa;
 997	const struct vdpa_config_ops *ops = vdpa->config;
 998	u32 asid = iotlb_to_asid(iotlb);
 999	int r = 0;
1000
1001	r = vhost_iotlb_add_range_ctx(iotlb, iova, iova + size - 1,
1002				      pa, perm, opaque);
1003	if (r)
1004		return r;
1005
1006	if (ops->dma_map) {
1007		r = ops->dma_map(vdpa, asid, iova, size, pa, perm, opaque);
1008	} else if (ops->set_map) {
1009		if (!v->in_batch)
1010			r = ops->set_map(vdpa, asid, iotlb);
1011	} else {
1012		r = iommu_map(v->domain, iova, pa, size,
1013			      perm_to_iommu_flags(perm),
1014			      GFP_KERNEL_ACCOUNT);
1015	}
1016	if (r) {
1017		vhost_iotlb_del_range(iotlb, iova, iova + size - 1);
1018		return r;
1019	}
1020
1021	if (!vdpa->use_va)
1022		atomic64_add(PFN_DOWN(size), &dev->mm->pinned_vm);
1023
1024	return 0;
1025}
1026
1027static void vhost_vdpa_unmap(struct vhost_vdpa *v,
1028			     struct vhost_iotlb *iotlb,
1029			     u64 iova, u64 size)
1030{
1031	struct vdpa_device *vdpa = v->vdpa;
1032	const struct vdpa_config_ops *ops = vdpa->config;
1033	u32 asid = iotlb_to_asid(iotlb);
1034
1035	vhost_vdpa_iotlb_unmap(v, iotlb, iova, iova + size - 1, asid);
1036
1037	if (ops->set_map) {
1038		if (!v->in_batch)
1039			ops->set_map(vdpa, asid, iotlb);
1040	}
1041
 
 
 
 
1042}
1043
1044static int vhost_vdpa_va_map(struct vhost_vdpa *v,
1045			     struct vhost_iotlb *iotlb,
1046			     u64 iova, u64 size, u64 uaddr, u32 perm)
1047{
1048	struct vhost_dev *dev = &v->vdev;
1049	u64 offset, map_size, map_iova = iova;
1050	struct vdpa_map_file *map_file;
1051	struct vm_area_struct *vma;
1052	int ret = 0;
1053
1054	mmap_read_lock(dev->mm);
1055
1056	while (size) {
1057		vma = find_vma(dev->mm, uaddr);
1058		if (!vma) {
1059			ret = -EINVAL;
1060			break;
1061		}
1062		map_size = min(size, vma->vm_end - uaddr);
1063		if (!(vma->vm_file && (vma->vm_flags & VM_SHARED) &&
1064			!(vma->vm_flags & (VM_IO | VM_PFNMAP))))
1065			goto next;
1066
1067		map_file = kzalloc(sizeof(*map_file), GFP_KERNEL);
1068		if (!map_file) {
1069			ret = -ENOMEM;
1070			break;
1071		}
1072		offset = (vma->vm_pgoff << PAGE_SHIFT) + uaddr - vma->vm_start;
1073		map_file->offset = offset;
1074		map_file->file = get_file(vma->vm_file);
1075		ret = vhost_vdpa_map(v, iotlb, map_iova, map_size, uaddr,
1076				     perm, map_file);
1077		if (ret) {
1078			fput(map_file->file);
1079			kfree(map_file);
1080			break;
1081		}
1082next:
1083		size -= map_size;
1084		uaddr += map_size;
1085		map_iova += map_size;
1086	}
1087	if (ret)
1088		vhost_vdpa_unmap(v, iotlb, iova, map_iova - iova);
1089
1090	mmap_read_unlock(dev->mm);
1091
1092	return ret;
1093}
1094
1095static int vhost_vdpa_pa_map(struct vhost_vdpa *v,
1096			     struct vhost_iotlb *iotlb,
1097			     u64 iova, u64 size, u64 uaddr, u32 perm)
1098{
1099	struct vhost_dev *dev = &v->vdev;
1100	struct page **page_list;
1101	unsigned long list_size = PAGE_SIZE / sizeof(struct page *);
1102	unsigned int gup_flags = FOLL_LONGTERM;
1103	unsigned long npages, cur_base, map_pfn, last_pfn = 0;
1104	unsigned long lock_limit, sz2pin, nchunks, i;
1105	u64 start = iova;
1106	long pinned;
1107	int ret = 0;
1108
1109	/* Limit the use of memory for bookkeeping */
1110	page_list = (struct page **) __get_free_page(GFP_KERNEL);
1111	if (!page_list)
1112		return -ENOMEM;
1113
1114	if (perm & VHOST_ACCESS_WO)
1115		gup_flags |= FOLL_WRITE;
1116
1117	npages = PFN_UP(size + (iova & ~PAGE_MASK));
1118	if (!npages) {
1119		ret = -EINVAL;
1120		goto free;
1121	}
1122
1123	mmap_read_lock(dev->mm);
1124
1125	lock_limit = PFN_DOWN(rlimit(RLIMIT_MEMLOCK));
1126	if (npages + atomic64_read(&dev->mm->pinned_vm) > lock_limit) {
1127		ret = -ENOMEM;
1128		goto unlock;
1129	}
1130
1131	cur_base = uaddr & PAGE_MASK;
1132	iova &= PAGE_MASK;
1133	nchunks = 0;
1134
1135	while (npages) {
1136		sz2pin = min_t(unsigned long, npages, list_size);
1137		pinned = pin_user_pages(cur_base, sz2pin,
1138					gup_flags, page_list);
1139		if (sz2pin != pinned) {
1140			if (pinned < 0) {
1141				ret = pinned;
1142			} else {
1143				unpin_user_pages(page_list, pinned);
1144				ret = -ENOMEM;
1145			}
1146			goto out;
1147		}
1148		nchunks++;
1149
1150		if (!last_pfn)
1151			map_pfn = page_to_pfn(page_list[0]);
1152
1153		for (i = 0; i < pinned; i++) {
1154			unsigned long this_pfn = page_to_pfn(page_list[i]);
1155			u64 csize;
1156
1157			if (last_pfn && (this_pfn != last_pfn + 1)) {
1158				/* Pin a contiguous chunk of memory */
1159				csize = PFN_PHYS(last_pfn - map_pfn + 1);
1160				ret = vhost_vdpa_map(v, iotlb, iova, csize,
1161						     PFN_PHYS(map_pfn),
1162						     perm, NULL);
1163				if (ret) {
1164					/*
1165					 * Unpin the pages that are left unmapped
1166					 * from this point on in the current
1167					 * page_list. The remaining outstanding
1168					 * ones which may stride across several
1169					 * chunks will be covered in the common
1170					 * error path subsequently.
1171					 */
1172					unpin_user_pages(&page_list[i],
1173							 pinned - i);
1174					goto out;
1175				}
1176
1177				map_pfn = this_pfn;
1178				iova += csize;
1179				nchunks = 0;
1180			}
1181
1182			last_pfn = this_pfn;
1183		}
1184
1185		cur_base += PFN_PHYS(pinned);
1186		npages -= pinned;
1187	}
1188
1189	/* Pin the rest chunk */
1190	ret = vhost_vdpa_map(v, iotlb, iova, PFN_PHYS(last_pfn - map_pfn + 1),
1191			     PFN_PHYS(map_pfn), perm, NULL);
1192out:
1193	if (ret) {
1194		if (nchunks) {
1195			unsigned long pfn;
1196
1197			/*
1198			 * Unpin the outstanding pages which are yet to be
1199			 * mapped but haven't due to vdpa_map() or
1200			 * pin_user_pages() failure.
1201			 *
1202			 * Mapped pages are accounted in vdpa_map(), hence
1203			 * the corresponding unpinning will be handled by
1204			 * vdpa_unmap().
1205			 */
1206			WARN_ON(!last_pfn);
1207			for (pfn = map_pfn; pfn <= last_pfn; pfn++)
1208				unpin_user_page(pfn_to_page(pfn));
1209		}
1210		vhost_vdpa_unmap(v, iotlb, start, size);
1211	}
1212unlock:
1213	mmap_read_unlock(dev->mm);
1214free:
1215	free_page((unsigned long)page_list);
1216	return ret;
1217
1218}
1219
1220static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
1221					   struct vhost_iotlb *iotlb,
1222					   struct vhost_iotlb_msg *msg)
1223{
1224	struct vdpa_device *vdpa = v->vdpa;
1225
1226	if (msg->iova < v->range.first || !msg->size ||
1227	    msg->iova > U64_MAX - msg->size + 1 ||
1228	    msg->iova + msg->size - 1 > v->range.last)
1229		return -EINVAL;
1230
1231	if (vhost_iotlb_itree_first(iotlb, msg->iova,
1232				    msg->iova + msg->size - 1))
1233		return -EEXIST;
1234
1235	if (vdpa->use_va)
1236		return vhost_vdpa_va_map(v, iotlb, msg->iova, msg->size,
1237					 msg->uaddr, msg->perm);
1238
1239	return vhost_vdpa_pa_map(v, iotlb, msg->iova, msg->size, msg->uaddr,
1240				 msg->perm);
1241}
1242
1243static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev, u32 asid,
1244					struct vhost_iotlb_msg *msg)
1245{
1246	struct vhost_vdpa *v = container_of(dev, struct vhost_vdpa, vdev);
1247	struct vdpa_device *vdpa = v->vdpa;
1248	const struct vdpa_config_ops *ops = vdpa->config;
1249	struct vhost_iotlb *iotlb = NULL;
1250	struct vhost_vdpa_as *as = NULL;
1251	int r = 0;
1252
1253	mutex_lock(&dev->mutex);
1254
1255	r = vhost_dev_check_owner(dev);
1256	if (r)
1257		goto unlock;
1258
1259	if (msg->type == VHOST_IOTLB_UPDATE ||
1260	    msg->type == VHOST_IOTLB_BATCH_BEGIN) {
1261		as = vhost_vdpa_find_alloc_as(v, asid);
1262		if (!as) {
1263			dev_err(&v->dev, "can't find and alloc asid %d\n",
1264				asid);
1265			r = -EINVAL;
1266			goto unlock;
1267		}
1268		iotlb = &as->iotlb;
1269	} else
1270		iotlb = asid_to_iotlb(v, asid);
1271
1272	if ((v->in_batch && v->batch_asid != asid) || !iotlb) {
1273		if (v->in_batch && v->batch_asid != asid) {
1274			dev_info(&v->dev, "batch id %d asid %d\n",
1275				 v->batch_asid, asid);
1276		}
1277		if (!iotlb)
1278			dev_err(&v->dev, "no iotlb for asid %d\n", asid);
1279		r = -EINVAL;
1280		goto unlock;
1281	}
1282
1283	switch (msg->type) {
1284	case VHOST_IOTLB_UPDATE:
1285		r = vhost_vdpa_process_iotlb_update(v, iotlb, msg);
1286		break;
1287	case VHOST_IOTLB_INVALIDATE:
1288		vhost_vdpa_unmap(v, iotlb, msg->iova, msg->size);
1289		break;
1290	case VHOST_IOTLB_BATCH_BEGIN:
1291		v->batch_asid = asid;
1292		v->in_batch = true;
1293		break;
1294	case VHOST_IOTLB_BATCH_END:
1295		if (v->in_batch && ops->set_map)
1296			ops->set_map(vdpa, asid, iotlb);
1297		v->in_batch = false;
 
 
1298		break;
1299	default:
1300		r = -EINVAL;
1301		break;
1302	}
1303unlock:
1304	mutex_unlock(&dev->mutex);
1305
1306	return r;
1307}
1308
1309static ssize_t vhost_vdpa_chr_write_iter(struct kiocb *iocb,
1310					 struct iov_iter *from)
1311{
1312	struct file *file = iocb->ki_filp;
1313	struct vhost_vdpa *v = file->private_data;
1314	struct vhost_dev *dev = &v->vdev;
1315
1316	return vhost_chr_write_iter(dev, from);
1317}
1318
1319static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v)
1320{
1321	struct vdpa_device *vdpa = v->vdpa;
1322	const struct vdpa_config_ops *ops = vdpa->config;
1323	struct device *dma_dev = vdpa_get_dma_dev(vdpa);
 
1324	int ret;
1325
1326	/* Device want to do DMA by itself */
1327	if (ops->set_map || ops->dma_map)
1328		return 0;
1329
1330	if (!device_iommu_capable(dma_dev, IOMMU_CAP_CACHE_COHERENCY)) {
1331		dev_warn_once(&v->dev,
1332			      "Failed to allocate domain, device is not IOMMU cache coherent capable\n");
 
 
1333		return -ENOTSUPP;
1334	}
1335
1336	v->domain = iommu_paging_domain_alloc(dma_dev);
1337	if (IS_ERR(v->domain)) {
1338		ret = PTR_ERR(v->domain);
1339		v->domain = NULL;
1340		return ret;
1341	}
1342
1343	ret = iommu_attach_device(v->domain, dma_dev);
1344	if (ret)
1345		goto err_attach;
1346
1347	return 0;
1348
1349err_attach:
1350	iommu_domain_free(v->domain);
1351	v->domain = NULL;
1352	return ret;
1353}
1354
1355static void vhost_vdpa_free_domain(struct vhost_vdpa *v)
1356{
1357	struct vdpa_device *vdpa = v->vdpa;
1358	struct device *dma_dev = vdpa_get_dma_dev(vdpa);
1359
1360	if (v->domain) {
1361		iommu_detach_device(v->domain, dma_dev);
1362		iommu_domain_free(v->domain);
1363	}
1364
1365	v->domain = NULL;
1366}
1367
1368static void vhost_vdpa_set_iova_range(struct vhost_vdpa *v)
1369{
1370	struct vdpa_iova_range *range = &v->range;
1371	struct vdpa_device *vdpa = v->vdpa;
1372	const struct vdpa_config_ops *ops = vdpa->config;
1373
1374	if (ops->get_iova_range) {
1375		*range = ops->get_iova_range(vdpa);
1376	} else if (v->domain && v->domain->geometry.force_aperture) {
1377		range->first = v->domain->geometry.aperture_start;
1378		range->last = v->domain->geometry.aperture_end;
1379	} else {
1380		range->first = 0;
1381		range->last = ULLONG_MAX;
1382	}
1383}
1384
1385static void vhost_vdpa_cleanup(struct vhost_vdpa *v)
1386{
1387	struct vhost_vdpa_as *as;
1388	u32 asid;
1389
1390	for (asid = 0; asid < v->vdpa->nas; asid++) {
1391		as = asid_to_as(v, asid);
1392		if (as)
1393			vhost_vdpa_remove_as(v, asid);
1394	}
1395
1396	vhost_vdpa_free_domain(v);
1397	vhost_dev_cleanup(&v->vdev);
1398	kfree(v->vdev.vqs);
1399	v->vdev.vqs = NULL;
1400}
1401
1402static int vhost_vdpa_open(struct inode *inode, struct file *filep)
1403{
1404	struct vhost_vdpa *v;
1405	struct vhost_dev *dev;
1406	struct vhost_virtqueue **vqs;
1407	int r, opened;
1408	u32 i, nvqs;
1409
1410	v = container_of(inode->i_cdev, struct vhost_vdpa, cdev);
1411
1412	opened = atomic_cmpxchg(&v->opened, 0, 1);
1413	if (opened)
1414		return -EBUSY;
1415
1416	nvqs = v->nvqs;
1417	r = vhost_vdpa_reset(v);
1418	if (r)
1419		goto err;
1420
1421	vqs = kmalloc_array(nvqs, sizeof(*vqs), GFP_KERNEL);
1422	if (!vqs) {
1423		r = -ENOMEM;
1424		goto err;
1425	}
1426
1427	dev = &v->vdev;
1428	for (i = 0; i < nvqs; i++) {
1429		vqs[i] = &v->vqs[i];
1430		vqs[i]->handle_kick = handle_vq_kick;
1431		vqs[i]->call_ctx.ctx = NULL;
1432	}
1433	vhost_dev_init(dev, vqs, nvqs, 0, 0, 0, false,
1434		       vhost_vdpa_process_iotlb_msg);
1435
1436	r = vhost_vdpa_alloc_domain(v);
1437	if (r)
1438		goto err_alloc_domain;
1439
1440	vhost_vdpa_set_iova_range(v);
1441
1442	filep->private_data = v;
1443
1444	return 0;
1445
1446err_alloc_domain:
1447	vhost_vdpa_cleanup(v);
1448err:
1449	atomic_dec(&v->opened);
1450	return r;
1451}
1452
1453static void vhost_vdpa_clean_irq(struct vhost_vdpa *v)
1454{
1455	u32 i;
1456
1457	for (i = 0; i < v->nvqs; i++)
1458		vhost_vdpa_unsetup_vq_irq(v, i);
1459}
1460
1461static int vhost_vdpa_release(struct inode *inode, struct file *filep)
1462{
1463	struct vhost_vdpa *v = filep->private_data;
1464	struct vhost_dev *d = &v->vdev;
1465
1466	mutex_lock(&d->mutex);
1467	filep->private_data = NULL;
1468	vhost_vdpa_clean_irq(v);
1469	vhost_vdpa_reset(v);
1470	vhost_dev_stop(&v->vdev);
1471	vhost_vdpa_unbind_mm(v);
1472	vhost_vdpa_config_put(v);
1473	vhost_vdpa_cleanup(v);
1474	mutex_unlock(&d->mutex);
1475
1476	atomic_dec(&v->opened);
1477	complete(&v->completion);
1478
1479	return 0;
1480}
1481
1482#ifdef CONFIG_MMU
1483static vm_fault_t vhost_vdpa_fault(struct vm_fault *vmf)
1484{
1485	struct vhost_vdpa *v = vmf->vma->vm_file->private_data;
1486	struct vdpa_device *vdpa = v->vdpa;
1487	const struct vdpa_config_ops *ops = vdpa->config;
1488	struct vdpa_notification_area notify;
1489	struct vm_area_struct *vma = vmf->vma;
1490	u16 index = vma->vm_pgoff;
1491
1492	notify = ops->get_vq_notification(vdpa, index);
1493
1494	return vmf_insert_pfn(vma, vmf->address & PAGE_MASK, PFN_DOWN(notify.addr));
 
 
 
 
 
 
1495}
1496
1497static const struct vm_operations_struct vhost_vdpa_vm_ops = {
1498	.fault = vhost_vdpa_fault,
1499};
1500
1501static int vhost_vdpa_mmap(struct file *file, struct vm_area_struct *vma)
1502{
1503	struct vhost_vdpa *v = vma->vm_file->private_data;
1504	struct vdpa_device *vdpa = v->vdpa;
1505	const struct vdpa_config_ops *ops = vdpa->config;
1506	struct vdpa_notification_area notify;
1507	unsigned long index = vma->vm_pgoff;
1508
1509	if (vma->vm_end - vma->vm_start != PAGE_SIZE)
1510		return -EINVAL;
1511	if ((vma->vm_flags & VM_SHARED) == 0)
1512		return -EINVAL;
1513	if (vma->vm_flags & VM_READ)
1514		return -EINVAL;
1515	if (index > 65535)
1516		return -EINVAL;
1517	if (!ops->get_vq_notification)
1518		return -ENOTSUPP;
1519
1520	/* To be safe and easily modelled by userspace, We only
1521	 * support the doorbell which sits on the page boundary and
1522	 * does not share the page with other registers.
1523	 */
1524	notify = ops->get_vq_notification(vdpa, index);
1525	if (notify.addr & (PAGE_SIZE - 1))
1526		return -EINVAL;
1527	if (vma->vm_end - vma->vm_start != notify.size)
1528		return -ENOTSUPP;
1529
1530	vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP);
1531	vma->vm_ops = &vhost_vdpa_vm_ops;
1532	return 0;
1533}
1534#endif /* CONFIG_MMU */
1535
1536static const struct file_operations vhost_vdpa_fops = {
1537	.owner		= THIS_MODULE,
1538	.open		= vhost_vdpa_open,
1539	.release	= vhost_vdpa_release,
1540	.write_iter	= vhost_vdpa_chr_write_iter,
1541	.unlocked_ioctl	= vhost_vdpa_unlocked_ioctl,
1542#ifdef CONFIG_MMU
1543	.mmap		= vhost_vdpa_mmap,
1544#endif /* CONFIG_MMU */
1545	.compat_ioctl	= compat_ptr_ioctl,
1546};
1547
1548static void vhost_vdpa_release_dev(struct device *device)
1549{
1550	struct vhost_vdpa *v =
1551	       container_of(device, struct vhost_vdpa, dev);
1552
1553	ida_free(&vhost_vdpa_ida, v->minor);
1554	kfree(v->vqs);
1555	kfree(v);
1556}
1557
1558static int vhost_vdpa_probe(struct vdpa_device *vdpa)
1559{
1560	const struct vdpa_config_ops *ops = vdpa->config;
1561	struct vhost_vdpa *v;
1562	int minor;
1563	int i, r;
1564
1565	/* We can't support platform IOMMU device with more than 1
1566	 * group or as
1567	 */
1568	if (!ops->set_map && !ops->dma_map &&
1569	    (vdpa->ngroups > 1 || vdpa->nas > 1))
1570		return -EOPNOTSUPP;
1571
1572	v = kzalloc(sizeof(*v), GFP_KERNEL | __GFP_RETRY_MAYFAIL);
1573	if (!v)
1574		return -ENOMEM;
1575
1576	minor = ida_alloc_max(&vhost_vdpa_ida, VHOST_VDPA_DEV_MAX - 1,
1577			      GFP_KERNEL);
1578	if (minor < 0) {
1579		kfree(v);
1580		return minor;
1581	}
1582
1583	atomic_set(&v->opened, 0);
1584	v->minor = minor;
1585	v->vdpa = vdpa;
1586	v->nvqs = vdpa->nvqs;
1587	v->virtio_id = ops->get_device_id(vdpa);
1588
1589	device_initialize(&v->dev);
1590	v->dev.release = vhost_vdpa_release_dev;
1591	v->dev.parent = &vdpa->dev;
1592	v->dev.devt = MKDEV(MAJOR(vhost_vdpa_major), minor);
1593	v->vqs = kmalloc_array(v->nvqs, sizeof(struct vhost_virtqueue),
1594			       GFP_KERNEL);
1595	if (!v->vqs) {
1596		r = -ENOMEM;
1597		goto err;
1598	}
1599
1600	r = dev_set_name(&v->dev, "vhost-vdpa-%u", minor);
1601	if (r)
1602		goto err;
1603
1604	cdev_init(&v->cdev, &vhost_vdpa_fops);
1605	v->cdev.owner = THIS_MODULE;
1606
1607	r = cdev_device_add(&v->cdev, &v->dev);
1608	if (r)
1609		goto err;
1610
1611	init_completion(&v->completion);
1612	vdpa_set_drvdata(vdpa, v);
1613
1614	for (i = 0; i < VHOST_VDPA_IOTLB_BUCKETS; i++)
1615		INIT_HLIST_HEAD(&v->as[i]);
1616
1617	return 0;
1618
1619err:
1620	put_device(&v->dev);
 
1621	return r;
1622}
1623
1624static void vhost_vdpa_remove(struct vdpa_device *vdpa)
1625{
1626	struct vhost_vdpa *v = vdpa_get_drvdata(vdpa);
1627	int opened;
1628
1629	cdev_device_del(&v->cdev, &v->dev);
1630
1631	do {
1632		opened = atomic_cmpxchg(&v->opened, 0, 1);
1633		if (!opened)
1634			break;
1635		wait_for_completion(&v->completion);
1636	} while (1);
1637
1638	put_device(&v->dev);
1639}
1640
1641static struct vdpa_driver vhost_vdpa_driver = {
1642	.driver = {
1643		.name	= "vhost_vdpa",
1644	},
1645	.probe	= vhost_vdpa_probe,
1646	.remove	= vhost_vdpa_remove,
1647};
1648
1649static int __init vhost_vdpa_init(void)
1650{
1651	int r;
1652
1653	r = alloc_chrdev_region(&vhost_vdpa_major, 0, VHOST_VDPA_DEV_MAX,
1654				"vhost-vdpa");
1655	if (r)
1656		goto err_alloc_chrdev;
1657
1658	r = vdpa_register_driver(&vhost_vdpa_driver);
1659	if (r)
1660		goto err_vdpa_register_driver;
1661
1662	return 0;
1663
1664err_vdpa_register_driver:
1665	unregister_chrdev_region(vhost_vdpa_major, VHOST_VDPA_DEV_MAX);
1666err_alloc_chrdev:
1667	return r;
1668}
1669module_init(vhost_vdpa_init);
1670
1671static void __exit vhost_vdpa_exit(void)
1672{
1673	vdpa_unregister_driver(&vhost_vdpa_driver);
1674	unregister_chrdev_region(vhost_vdpa_major, VHOST_VDPA_DEV_MAX);
1675}
1676module_exit(vhost_vdpa_exit);
1677
1678MODULE_VERSION("0.0.1");
1679MODULE_LICENSE("GPL v2");
1680MODULE_AUTHOR("Intel Corporation");
1681MODULE_DESCRIPTION("vDPA-based vhost backend for virtio");
v6.2
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Copyright (C) 2018-2020 Intel Corporation.
   4 * Copyright (C) 2020 Red Hat, Inc.
   5 *
   6 * Author: Tiwei Bie <tiwei.bie@intel.com>
   7 *         Jason Wang <jasowang@redhat.com>
   8 *
   9 * Thanks Michael S. Tsirkin for the valuable comments and
  10 * suggestions.  And thanks to Cunming Liang and Zhihong Wang for all
  11 * their supports.
  12 */
  13
  14#include <linux/kernel.h>
  15#include <linux/module.h>
  16#include <linux/cdev.h>
  17#include <linux/device.h>
  18#include <linux/mm.h>
  19#include <linux/slab.h>
  20#include <linux/iommu.h>
  21#include <linux/uuid.h>
  22#include <linux/vdpa.h>
  23#include <linux/nospec.h>
  24#include <linux/vhost.h>
  25
  26#include "vhost.h"
  27
  28enum {
  29	VHOST_VDPA_BACKEND_FEATURES =
  30	(1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2) |
  31	(1ULL << VHOST_BACKEND_F_IOTLB_BATCH) |
  32	(1ULL << VHOST_BACKEND_F_IOTLB_ASID),
  33};
  34
  35#define VHOST_VDPA_DEV_MAX (1U << MINORBITS)
  36
  37#define VHOST_VDPA_IOTLB_BUCKETS 16
  38
  39struct vhost_vdpa_as {
  40	struct hlist_node hash_link;
  41	struct vhost_iotlb iotlb;
  42	u32 id;
  43};
  44
  45struct vhost_vdpa {
  46	struct vhost_dev vdev;
  47	struct iommu_domain *domain;
  48	struct vhost_virtqueue *vqs;
  49	struct completion completion;
  50	struct vdpa_device *vdpa;
  51	struct hlist_head as[VHOST_VDPA_IOTLB_BUCKETS];
  52	struct device dev;
  53	struct cdev cdev;
  54	atomic_t opened;
  55	u32 nvqs;
  56	int virtio_id;
  57	int minor;
  58	struct eventfd_ctx *config_ctx;
  59	int in_batch;
  60	struct vdpa_iova_range range;
  61	u32 batch_asid;
 
  62};
  63
  64static DEFINE_IDA(vhost_vdpa_ida);
  65
  66static dev_t vhost_vdpa_major;
  67
  68static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v,
  69				   struct vhost_iotlb *iotlb, u64 start,
  70				   u64 last, u32 asid);
  71
  72static inline u32 iotlb_to_asid(struct vhost_iotlb *iotlb)
  73{
  74	struct vhost_vdpa_as *as = container_of(iotlb, struct
  75						vhost_vdpa_as, iotlb);
  76	return as->id;
  77}
  78
  79static struct vhost_vdpa_as *asid_to_as(struct vhost_vdpa *v, u32 asid)
  80{
  81	struct hlist_head *head = &v->as[asid % VHOST_VDPA_IOTLB_BUCKETS];
  82	struct vhost_vdpa_as *as;
  83
  84	hlist_for_each_entry(as, head, hash_link)
  85		if (as->id == asid)
  86			return as;
  87
  88	return NULL;
  89}
  90
  91static struct vhost_iotlb *asid_to_iotlb(struct vhost_vdpa *v, u32 asid)
  92{
  93	struct vhost_vdpa_as *as = asid_to_as(v, asid);
  94
  95	if (!as)
  96		return NULL;
  97
  98	return &as->iotlb;
  99}
 100
 101static struct vhost_vdpa_as *vhost_vdpa_alloc_as(struct vhost_vdpa *v, u32 asid)
 102{
 103	struct hlist_head *head = &v->as[asid % VHOST_VDPA_IOTLB_BUCKETS];
 104	struct vhost_vdpa_as *as;
 105
 106	if (asid_to_as(v, asid))
 107		return NULL;
 108
 109	if (asid >= v->vdpa->nas)
 110		return NULL;
 111
 112	as = kmalloc(sizeof(*as), GFP_KERNEL);
 113	if (!as)
 114		return NULL;
 115
 116	vhost_iotlb_init(&as->iotlb, 0, 0);
 117	as->id = asid;
 118	hlist_add_head(&as->hash_link, head);
 119
 120	return as;
 121}
 122
 123static struct vhost_vdpa_as *vhost_vdpa_find_alloc_as(struct vhost_vdpa *v,
 124						      u32 asid)
 125{
 126	struct vhost_vdpa_as *as = asid_to_as(v, asid);
 127
 128	if (as)
 129		return as;
 130
 131	return vhost_vdpa_alloc_as(v, asid);
 132}
 133
 
 
 
 
 
 
 
 
 
 134static int vhost_vdpa_remove_as(struct vhost_vdpa *v, u32 asid)
 135{
 136	struct vhost_vdpa_as *as = asid_to_as(v, asid);
 137
 138	if (!as)
 139		return -EINVAL;
 140
 141	hlist_del(&as->hash_link);
 142	vhost_vdpa_iotlb_unmap(v, &as->iotlb, 0ULL, 0ULL - 1, asid);
 
 
 
 
 
 
 
 
 143	kfree(as);
 144
 145	return 0;
 146}
 147
 148static void handle_vq_kick(struct vhost_work *work)
 149{
 150	struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
 151						  poll.work);
 152	struct vhost_vdpa *v = container_of(vq->dev, struct vhost_vdpa, vdev);
 153	const struct vdpa_config_ops *ops = v->vdpa->config;
 154
 155	ops->kick_vq(v->vdpa, vq - v->vqs);
 156}
 157
 158static irqreturn_t vhost_vdpa_virtqueue_cb(void *private)
 159{
 160	struct vhost_virtqueue *vq = private;
 161	struct eventfd_ctx *call_ctx = vq->call_ctx.ctx;
 162
 163	if (call_ctx)
 164		eventfd_signal(call_ctx, 1);
 165
 166	return IRQ_HANDLED;
 167}
 168
 169static irqreturn_t vhost_vdpa_config_cb(void *private)
 170{
 171	struct vhost_vdpa *v = private;
 172	struct eventfd_ctx *config_ctx = v->config_ctx;
 173
 174	if (config_ctx)
 175		eventfd_signal(config_ctx, 1);
 176
 177	return IRQ_HANDLED;
 178}
 179
 180static void vhost_vdpa_setup_vq_irq(struct vhost_vdpa *v, u16 qid)
 181{
 182	struct vhost_virtqueue *vq = &v->vqs[qid];
 183	const struct vdpa_config_ops *ops = v->vdpa->config;
 184	struct vdpa_device *vdpa = v->vdpa;
 185	int ret, irq;
 186
 187	if (!ops->get_vq_irq)
 188		return;
 189
 190	irq = ops->get_vq_irq(vdpa, qid);
 191	if (irq < 0)
 192		return;
 193
 194	irq_bypass_unregister_producer(&vq->call_ctx.producer);
 195	if (!vq->call_ctx.ctx)
 196		return;
 197
 198	vq->call_ctx.producer.token = vq->call_ctx.ctx;
 199	vq->call_ctx.producer.irq = irq;
 200	ret = irq_bypass_register_producer(&vq->call_ctx.producer);
 201	if (unlikely(ret))
 202		dev_info(&v->dev, "vq %u, irq bypass producer (token %p) registration fails, ret =  %d\n",
 203			 qid, vq->call_ctx.producer.token, ret);
 204}
 205
 206static void vhost_vdpa_unsetup_vq_irq(struct vhost_vdpa *v, u16 qid)
 207{
 208	struct vhost_virtqueue *vq = &v->vqs[qid];
 209
 210	irq_bypass_unregister_producer(&vq->call_ctx.producer);
 211}
 212
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 213static int vhost_vdpa_reset(struct vhost_vdpa *v)
 214{
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 215	struct vdpa_device *vdpa = v->vdpa;
 
 216
 217	v->in_batch = 0;
 
 218
 219	return vdpa_reset(vdpa);
 220}
 221
 222static long vhost_vdpa_get_device_id(struct vhost_vdpa *v, u8 __user *argp)
 223{
 224	struct vdpa_device *vdpa = v->vdpa;
 225	const struct vdpa_config_ops *ops = vdpa->config;
 226	u32 device_id;
 227
 228	device_id = ops->get_device_id(vdpa);
 229
 230	if (copy_to_user(argp, &device_id, sizeof(device_id)))
 231		return -EFAULT;
 232
 233	return 0;
 234}
 235
 236static long vhost_vdpa_get_status(struct vhost_vdpa *v, u8 __user *statusp)
 237{
 238	struct vdpa_device *vdpa = v->vdpa;
 239	const struct vdpa_config_ops *ops = vdpa->config;
 240	u8 status;
 241
 242	status = ops->get_status(vdpa);
 243
 244	if (copy_to_user(statusp, &status, sizeof(status)))
 245		return -EFAULT;
 246
 247	return 0;
 248}
 249
 250static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp)
 251{
 252	struct vdpa_device *vdpa = v->vdpa;
 253	const struct vdpa_config_ops *ops = vdpa->config;
 254	u8 status, status_old;
 255	u32 nvqs = v->nvqs;
 256	int ret;
 257	u16 i;
 258
 259	if (copy_from_user(&status, statusp, sizeof(status)))
 260		return -EFAULT;
 261
 262	status_old = ops->get_status(vdpa);
 263
 264	/*
 265	 * Userspace shouldn't remove status bits unless reset the
 266	 * status to 0.
 267	 */
 268	if (status != 0 && (status_old & ~status) != 0)
 269		return -EINVAL;
 270
 271	if ((status_old & VIRTIO_CONFIG_S_DRIVER_OK) && !(status & VIRTIO_CONFIG_S_DRIVER_OK))
 272		for (i = 0; i < nvqs; i++)
 273			vhost_vdpa_unsetup_vq_irq(v, i);
 274
 275	if (status == 0) {
 276		ret = vdpa_reset(vdpa);
 277		if (ret)
 278			return ret;
 279	} else
 280		vdpa_set_status(vdpa, status);
 281
 282	if ((status & VIRTIO_CONFIG_S_DRIVER_OK) && !(status_old & VIRTIO_CONFIG_S_DRIVER_OK))
 283		for (i = 0; i < nvqs; i++)
 284			vhost_vdpa_setup_vq_irq(v, i);
 285
 286	return 0;
 287}
 288
 289static int vhost_vdpa_config_validate(struct vhost_vdpa *v,
 290				      struct vhost_vdpa_config *c)
 291{
 292	struct vdpa_device *vdpa = v->vdpa;
 293	size_t size = vdpa->config->get_config_size(vdpa);
 294
 295	if (c->len == 0 || c->off > size)
 296		return -EINVAL;
 297
 298	if (c->len > size - c->off)
 299		return -E2BIG;
 300
 301	return 0;
 302}
 303
 304static long vhost_vdpa_get_config(struct vhost_vdpa *v,
 305				  struct vhost_vdpa_config __user *c)
 306{
 307	struct vdpa_device *vdpa = v->vdpa;
 308	struct vhost_vdpa_config config;
 309	unsigned long size = offsetof(struct vhost_vdpa_config, buf);
 310	u8 *buf;
 311
 312	if (copy_from_user(&config, c, size))
 313		return -EFAULT;
 314	if (vhost_vdpa_config_validate(v, &config))
 315		return -EINVAL;
 316	buf = kvzalloc(config.len, GFP_KERNEL);
 317	if (!buf)
 318		return -ENOMEM;
 319
 320	vdpa_get_config(vdpa, config.off, buf, config.len);
 321
 322	if (copy_to_user(c->buf, buf, config.len)) {
 323		kvfree(buf);
 324		return -EFAULT;
 325	}
 326
 327	kvfree(buf);
 328	return 0;
 329}
 330
 331static long vhost_vdpa_set_config(struct vhost_vdpa *v,
 332				  struct vhost_vdpa_config __user *c)
 333{
 334	struct vdpa_device *vdpa = v->vdpa;
 335	struct vhost_vdpa_config config;
 336	unsigned long size = offsetof(struct vhost_vdpa_config, buf);
 337	u8 *buf;
 338
 339	if (copy_from_user(&config, c, size))
 340		return -EFAULT;
 341	if (vhost_vdpa_config_validate(v, &config))
 342		return -EINVAL;
 343
 344	buf = vmemdup_user(c->buf, config.len);
 345	if (IS_ERR(buf))
 346		return PTR_ERR(buf);
 347
 348	vdpa_set_config(vdpa, config.off, buf, config.len);
 349
 350	kvfree(buf);
 351	return 0;
 352}
 353
 354static bool vhost_vdpa_can_suspend(const struct vhost_vdpa *v)
 355{
 356	struct vdpa_device *vdpa = v->vdpa;
 357	const struct vdpa_config_ops *ops = vdpa->config;
 358
 359	return ops->suspend;
 360}
 361
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 362static long vhost_vdpa_get_features(struct vhost_vdpa *v, u64 __user *featurep)
 363{
 364	struct vdpa_device *vdpa = v->vdpa;
 365	const struct vdpa_config_ops *ops = vdpa->config;
 366	u64 features;
 367
 368	features = ops->get_device_features(vdpa);
 369
 370	if (copy_to_user(featurep, &features, sizeof(features)))
 371		return -EFAULT;
 372
 373	return 0;
 374}
 375
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 376static long vhost_vdpa_set_features(struct vhost_vdpa *v, u64 __user *featurep)
 377{
 378	struct vdpa_device *vdpa = v->vdpa;
 379	const struct vdpa_config_ops *ops = vdpa->config;
 
 
 380	u64 features;
 
 381
 382	/*
 383	 * It's not allowed to change the features after they have
 384	 * been negotiated.
 385	 */
 386	if (ops->get_status(vdpa) & VIRTIO_CONFIG_S_FEATURES_OK)
 387		return -EBUSY;
 388
 389	if (copy_from_user(&features, featurep, sizeof(features)))
 390		return -EFAULT;
 391
 392	if (vdpa_set_features(vdpa, features))
 393		return -EINVAL;
 394
 
 
 
 
 
 
 
 
 
 
 395	return 0;
 396}
 397
 398static long vhost_vdpa_get_vring_num(struct vhost_vdpa *v, u16 __user *argp)
 399{
 400	struct vdpa_device *vdpa = v->vdpa;
 401	const struct vdpa_config_ops *ops = vdpa->config;
 402	u16 num;
 403
 404	num = ops->get_vq_num_max(vdpa);
 405
 406	if (copy_to_user(argp, &num, sizeof(num)))
 407		return -EFAULT;
 408
 409	return 0;
 410}
 411
 412static void vhost_vdpa_config_put(struct vhost_vdpa *v)
 413{
 414	if (v->config_ctx) {
 415		eventfd_ctx_put(v->config_ctx);
 416		v->config_ctx = NULL;
 417	}
 418}
 419
 420static long vhost_vdpa_set_config_call(struct vhost_vdpa *v, u32 __user *argp)
 421{
 422	struct vdpa_callback cb;
 423	int fd;
 424	struct eventfd_ctx *ctx;
 425
 426	cb.callback = vhost_vdpa_config_cb;
 427	cb.private = v;
 428	if (copy_from_user(&fd, argp, sizeof(fd)))
 429		return  -EFAULT;
 430
 431	ctx = fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(fd);
 432	swap(ctx, v->config_ctx);
 433
 434	if (!IS_ERR_OR_NULL(ctx))
 435		eventfd_ctx_put(ctx);
 436
 437	if (IS_ERR(v->config_ctx)) {
 438		long ret = PTR_ERR(v->config_ctx);
 439
 440		v->config_ctx = NULL;
 441		return ret;
 442	}
 443
 444	v->vdpa->config->set_config_cb(v->vdpa, &cb);
 445
 446	return 0;
 447}
 448
 449static long vhost_vdpa_get_iova_range(struct vhost_vdpa *v, u32 __user *argp)
 450{
 451	struct vhost_vdpa_iova_range range = {
 452		.first = v->range.first,
 453		.last = v->range.last,
 454	};
 455
 456	if (copy_to_user(argp, &range, sizeof(range)))
 457		return -EFAULT;
 458	return 0;
 459}
 460
 461static long vhost_vdpa_get_config_size(struct vhost_vdpa *v, u32 __user *argp)
 462{
 463	struct vdpa_device *vdpa = v->vdpa;
 464	const struct vdpa_config_ops *ops = vdpa->config;
 465	u32 size;
 466
 467	size = ops->get_config_size(vdpa);
 468
 469	if (copy_to_user(argp, &size, sizeof(size)))
 470		return -EFAULT;
 471
 472	return 0;
 473}
 474
 475static long vhost_vdpa_get_vqs_count(struct vhost_vdpa *v, u32 __user *argp)
 476{
 477	struct vdpa_device *vdpa = v->vdpa;
 478
 479	if (copy_to_user(argp, &vdpa->nvqs, sizeof(vdpa->nvqs)))
 480		return -EFAULT;
 481
 482	return 0;
 483}
 484
 485/* After a successful return of ioctl the device must not process more
 486 * virtqueue descriptors. The device can answer to read or writes of config
 487 * fields as if it were not suspended. In particular, writing to "queue_enable"
 488 * with a value of 1 will not make the device start processing buffers.
 489 */
 490static long vhost_vdpa_suspend(struct vhost_vdpa *v)
 491{
 492	struct vdpa_device *vdpa = v->vdpa;
 493	const struct vdpa_config_ops *ops = vdpa->config;
 
 
 
 
 494
 495	if (!ops->suspend)
 496		return -EOPNOTSUPP;
 497
 498	return ops->suspend(vdpa);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 499}
 500
 501static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
 502				   void __user *argp)
 503{
 504	struct vdpa_device *vdpa = v->vdpa;
 505	const struct vdpa_config_ops *ops = vdpa->config;
 506	struct vdpa_vq_state vq_state;
 507	struct vdpa_callback cb;
 508	struct vhost_virtqueue *vq;
 509	struct vhost_vring_state s;
 510	u32 idx;
 511	long r;
 512
 513	r = get_user(idx, (u32 __user *)argp);
 514	if (r < 0)
 515		return r;
 516
 517	if (idx >= v->nvqs)
 518		return -ENOBUFS;
 519
 520	idx = array_index_nospec(idx, v->nvqs);
 521	vq = &v->vqs[idx];
 522
 523	switch (cmd) {
 524	case VHOST_VDPA_SET_VRING_ENABLE:
 525		if (copy_from_user(&s, argp, sizeof(s)))
 526			return -EFAULT;
 527		ops->set_vq_ready(vdpa, idx, s.num);
 528		return 0;
 529	case VHOST_VDPA_GET_VRING_GROUP:
 530		if (!ops->get_vq_group)
 531			return -EOPNOTSUPP;
 532		s.index = idx;
 533		s.num = ops->get_vq_group(vdpa, idx);
 534		if (s.num >= vdpa->ngroups)
 535			return -EIO;
 536		else if (copy_to_user(argp, &s, sizeof(s)))
 537			return -EFAULT;
 538		return 0;
 
 
 
 
 
 
 
 
 
 
 539	case VHOST_VDPA_SET_GROUP_ASID:
 540		if (copy_from_user(&s, argp, sizeof(s)))
 541			return -EFAULT;
 542		if (s.num >= vdpa->nas)
 543			return -EINVAL;
 544		if (!ops->set_group_asid)
 545			return -EOPNOTSUPP;
 546		return ops->set_group_asid(vdpa, idx, s.num);
 
 
 
 
 
 
 
 
 547	case VHOST_GET_VRING_BASE:
 548		r = ops->get_vq_state(v->vdpa, idx, &vq_state);
 549		if (r)
 550			return r;
 551
 552		vq->last_avail_idx = vq_state.split.avail_index;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 553		break;
 554	}
 555
 556	r = vhost_vring_ioctl(&v->vdev, cmd, argp);
 557	if (r)
 558		return r;
 559
 560	switch (cmd) {
 561	case VHOST_SET_VRING_ADDR:
 
 
 
 562		if (ops->set_vq_address(vdpa, idx,
 563					(u64)(uintptr_t)vq->desc,
 564					(u64)(uintptr_t)vq->avail,
 565					(u64)(uintptr_t)vq->used))
 566			r = -EINVAL;
 567		break;
 568
 569	case VHOST_SET_VRING_BASE:
 570		vq_state.split.avail_index = vq->last_avail_idx;
 571		if (ops->set_vq_state(vdpa, idx, &vq_state))
 572			r = -EINVAL;
 
 
 
 
 
 
 
 
 
 573		break;
 574
 575	case VHOST_SET_VRING_CALL:
 576		if (vq->call_ctx.ctx) {
 577			cb.callback = vhost_vdpa_virtqueue_cb;
 578			cb.private = vq;
 
 
 
 
 
 579		} else {
 580			cb.callback = NULL;
 581			cb.private = NULL;
 
 582		}
 583		ops->set_vq_cb(vdpa, idx, &cb);
 584		vhost_vdpa_setup_vq_irq(v, idx);
 585		break;
 586
 587	case VHOST_SET_VRING_NUM:
 588		ops->set_vq_num(vdpa, idx, vq->num);
 589		break;
 590	}
 591
 592	return r;
 593}
 594
 595static long vhost_vdpa_unlocked_ioctl(struct file *filep,
 596				      unsigned int cmd, unsigned long arg)
 597{
 598	struct vhost_vdpa *v = filep->private_data;
 599	struct vhost_dev *d = &v->vdev;
 600	void __user *argp = (void __user *)arg;
 601	u64 __user *featurep = argp;
 602	u64 features;
 603	long r = 0;
 604
 605	if (cmd == VHOST_SET_BACKEND_FEATURES) {
 606		if (copy_from_user(&features, featurep, sizeof(features)))
 607			return -EFAULT;
 608		if (features & ~(VHOST_VDPA_BACKEND_FEATURES |
 609				 BIT_ULL(VHOST_BACKEND_F_SUSPEND)))
 
 
 
 
 610			return -EOPNOTSUPP;
 611		if ((features & BIT_ULL(VHOST_BACKEND_F_SUSPEND)) &&
 612		     !vhost_vdpa_can_suspend(v))
 613			return -EOPNOTSUPP;
 
 
 
 
 
 
 
 
 
 
 
 
 614		vhost_set_backend_features(&v->vdev, features);
 615		return 0;
 616	}
 617
 618	mutex_lock(&d->mutex);
 619
 620	switch (cmd) {
 621	case VHOST_VDPA_GET_DEVICE_ID:
 622		r = vhost_vdpa_get_device_id(v, argp);
 623		break;
 624	case VHOST_VDPA_GET_STATUS:
 625		r = vhost_vdpa_get_status(v, argp);
 626		break;
 627	case VHOST_VDPA_SET_STATUS:
 628		r = vhost_vdpa_set_status(v, argp);
 629		break;
 630	case VHOST_VDPA_GET_CONFIG:
 631		r = vhost_vdpa_get_config(v, argp);
 632		break;
 633	case VHOST_VDPA_SET_CONFIG:
 634		r = vhost_vdpa_set_config(v, argp);
 635		break;
 636	case VHOST_GET_FEATURES:
 637		r = vhost_vdpa_get_features(v, argp);
 638		break;
 639	case VHOST_SET_FEATURES:
 640		r = vhost_vdpa_set_features(v, argp);
 641		break;
 642	case VHOST_VDPA_GET_VRING_NUM:
 643		r = vhost_vdpa_get_vring_num(v, argp);
 644		break;
 645	case VHOST_VDPA_GET_GROUP_NUM:
 646		if (copy_to_user(argp, &v->vdpa->ngroups,
 647				 sizeof(v->vdpa->ngroups)))
 648			r = -EFAULT;
 649		break;
 650	case VHOST_VDPA_GET_AS_NUM:
 651		if (copy_to_user(argp, &v->vdpa->nas, sizeof(v->vdpa->nas)))
 652			r = -EFAULT;
 653		break;
 654	case VHOST_SET_LOG_BASE:
 655	case VHOST_SET_LOG_FD:
 656		r = -ENOIOCTLCMD;
 657		break;
 658	case VHOST_VDPA_SET_CONFIG_CALL:
 659		r = vhost_vdpa_set_config_call(v, argp);
 660		break;
 661	case VHOST_GET_BACKEND_FEATURES:
 662		features = VHOST_VDPA_BACKEND_FEATURES;
 663		if (vhost_vdpa_can_suspend(v))
 664			features |= BIT_ULL(VHOST_BACKEND_F_SUSPEND);
 
 
 
 
 
 
 
 665		if (copy_to_user(featurep, &features, sizeof(features)))
 666			r = -EFAULT;
 667		break;
 668	case VHOST_VDPA_GET_IOVA_RANGE:
 669		r = vhost_vdpa_get_iova_range(v, argp);
 670		break;
 671	case VHOST_VDPA_GET_CONFIG_SIZE:
 672		r = vhost_vdpa_get_config_size(v, argp);
 673		break;
 674	case VHOST_VDPA_GET_VQS_COUNT:
 675		r = vhost_vdpa_get_vqs_count(v, argp);
 676		break;
 677	case VHOST_VDPA_SUSPEND:
 678		r = vhost_vdpa_suspend(v);
 679		break;
 
 
 
 680	default:
 681		r = vhost_dev_ioctl(&v->vdev, cmd, argp);
 682		if (r == -ENOIOCTLCMD)
 683			r = vhost_vdpa_vring_ioctl(v, cmd, argp);
 684		break;
 685	}
 686
 
 
 
 
 
 
 
 
 
 
 
 687	mutex_unlock(&d->mutex);
 688	return r;
 689}
 690static void vhost_vdpa_general_unmap(struct vhost_vdpa *v,
 691				     struct vhost_iotlb_map *map, u32 asid)
 692{
 693	struct vdpa_device *vdpa = v->vdpa;
 694	const struct vdpa_config_ops *ops = vdpa->config;
 695	if (ops->dma_map) {
 696		ops->dma_unmap(vdpa, asid, map->start, map->size);
 697	} else if (ops->set_map == NULL) {
 698		iommu_unmap(v->domain, map->start, map->size);
 699	}
 700}
 701
 702static void vhost_vdpa_pa_unmap(struct vhost_vdpa *v, struct vhost_iotlb *iotlb,
 703				u64 start, u64 last, u32 asid)
 704{
 705	struct vhost_dev *dev = &v->vdev;
 706	struct vhost_iotlb_map *map;
 707	struct page *page;
 708	unsigned long pfn, pinned;
 709
 710	while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) {
 711		pinned = PFN_DOWN(map->size);
 712		for (pfn = PFN_DOWN(map->addr);
 713		     pinned > 0; pfn++, pinned--) {
 714			page = pfn_to_page(pfn);
 715			if (map->perm & VHOST_ACCESS_WO)
 716				set_page_dirty_lock(page);
 717			unpin_user_page(page);
 718		}
 719		atomic64_sub(PFN_DOWN(map->size), &dev->mm->pinned_vm);
 720		vhost_vdpa_general_unmap(v, map, asid);
 721		vhost_iotlb_map_free(iotlb, map);
 722	}
 723}
 724
 725static void vhost_vdpa_va_unmap(struct vhost_vdpa *v, struct vhost_iotlb *iotlb,
 726				u64 start, u64 last, u32 asid)
 727{
 728	struct vhost_iotlb_map *map;
 729	struct vdpa_map_file *map_file;
 730
 731	while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) {
 732		map_file = (struct vdpa_map_file *)map->opaque;
 733		fput(map_file->file);
 734		kfree(map_file);
 735		vhost_vdpa_general_unmap(v, map, asid);
 736		vhost_iotlb_map_free(iotlb, map);
 737	}
 738}
 739
 740static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v,
 741				   struct vhost_iotlb *iotlb, u64 start,
 742				   u64 last, u32 asid)
 743{
 744	struct vdpa_device *vdpa = v->vdpa;
 745
 746	if (vdpa->use_va)
 747		return vhost_vdpa_va_unmap(v, iotlb, start, last, asid);
 748
 749	return vhost_vdpa_pa_unmap(v, iotlb, start, last, asid);
 750}
 751
 752static int perm_to_iommu_flags(u32 perm)
 753{
 754	int flags = 0;
 755
 756	switch (perm) {
 757	case VHOST_ACCESS_WO:
 758		flags |= IOMMU_WRITE;
 759		break;
 760	case VHOST_ACCESS_RO:
 761		flags |= IOMMU_READ;
 762		break;
 763	case VHOST_ACCESS_RW:
 764		flags |= (IOMMU_WRITE | IOMMU_READ);
 765		break;
 766	default:
 767		WARN(1, "invalidate vhost IOTLB permission\n");
 768		break;
 769	}
 770
 771	return flags | IOMMU_CACHE;
 772}
 773
 774static int vhost_vdpa_map(struct vhost_vdpa *v, struct vhost_iotlb *iotlb,
 775			  u64 iova, u64 size, u64 pa, u32 perm, void *opaque)
 776{
 777	struct vhost_dev *dev = &v->vdev;
 778	struct vdpa_device *vdpa = v->vdpa;
 779	const struct vdpa_config_ops *ops = vdpa->config;
 780	u32 asid = iotlb_to_asid(iotlb);
 781	int r = 0;
 782
 783	r = vhost_iotlb_add_range_ctx(iotlb, iova, iova + size - 1,
 784				      pa, perm, opaque);
 785	if (r)
 786		return r;
 787
 788	if (ops->dma_map) {
 789		r = ops->dma_map(vdpa, asid, iova, size, pa, perm, opaque);
 790	} else if (ops->set_map) {
 791		if (!v->in_batch)
 792			r = ops->set_map(vdpa, asid, iotlb);
 793	} else {
 794		r = iommu_map(v->domain, iova, pa, size,
 795			      perm_to_iommu_flags(perm));
 
 796	}
 797	if (r) {
 798		vhost_iotlb_del_range(iotlb, iova, iova + size - 1);
 799		return r;
 800	}
 801
 802	if (!vdpa->use_va)
 803		atomic64_add(PFN_DOWN(size), &dev->mm->pinned_vm);
 804
 805	return 0;
 806}
 807
 808static void vhost_vdpa_unmap(struct vhost_vdpa *v,
 809			     struct vhost_iotlb *iotlb,
 810			     u64 iova, u64 size)
 811{
 812	struct vdpa_device *vdpa = v->vdpa;
 813	const struct vdpa_config_ops *ops = vdpa->config;
 814	u32 asid = iotlb_to_asid(iotlb);
 815
 816	vhost_vdpa_iotlb_unmap(v, iotlb, iova, iova + size - 1, asid);
 817
 818	if (ops->set_map) {
 819		if (!v->in_batch)
 820			ops->set_map(vdpa, asid, iotlb);
 821	}
 822	/* If we are in the middle of batch processing, delay the free
 823	 * of AS until BATCH_END.
 824	 */
 825	if (!v->in_batch && !iotlb->nmaps)
 826		vhost_vdpa_remove_as(v, asid);
 827}
 828
 829static int vhost_vdpa_va_map(struct vhost_vdpa *v,
 830			     struct vhost_iotlb *iotlb,
 831			     u64 iova, u64 size, u64 uaddr, u32 perm)
 832{
 833	struct vhost_dev *dev = &v->vdev;
 834	u64 offset, map_size, map_iova = iova;
 835	struct vdpa_map_file *map_file;
 836	struct vm_area_struct *vma;
 837	int ret = 0;
 838
 839	mmap_read_lock(dev->mm);
 840
 841	while (size) {
 842		vma = find_vma(dev->mm, uaddr);
 843		if (!vma) {
 844			ret = -EINVAL;
 845			break;
 846		}
 847		map_size = min(size, vma->vm_end - uaddr);
 848		if (!(vma->vm_file && (vma->vm_flags & VM_SHARED) &&
 849			!(vma->vm_flags & (VM_IO | VM_PFNMAP))))
 850			goto next;
 851
 852		map_file = kzalloc(sizeof(*map_file), GFP_KERNEL);
 853		if (!map_file) {
 854			ret = -ENOMEM;
 855			break;
 856		}
 857		offset = (vma->vm_pgoff << PAGE_SHIFT) + uaddr - vma->vm_start;
 858		map_file->offset = offset;
 859		map_file->file = get_file(vma->vm_file);
 860		ret = vhost_vdpa_map(v, iotlb, map_iova, map_size, uaddr,
 861				     perm, map_file);
 862		if (ret) {
 863			fput(map_file->file);
 864			kfree(map_file);
 865			break;
 866		}
 867next:
 868		size -= map_size;
 869		uaddr += map_size;
 870		map_iova += map_size;
 871	}
 872	if (ret)
 873		vhost_vdpa_unmap(v, iotlb, iova, map_iova - iova);
 874
 875	mmap_read_unlock(dev->mm);
 876
 877	return ret;
 878}
 879
 880static int vhost_vdpa_pa_map(struct vhost_vdpa *v,
 881			     struct vhost_iotlb *iotlb,
 882			     u64 iova, u64 size, u64 uaddr, u32 perm)
 883{
 884	struct vhost_dev *dev = &v->vdev;
 885	struct page **page_list;
 886	unsigned long list_size = PAGE_SIZE / sizeof(struct page *);
 887	unsigned int gup_flags = FOLL_LONGTERM;
 888	unsigned long npages, cur_base, map_pfn, last_pfn = 0;
 889	unsigned long lock_limit, sz2pin, nchunks, i;
 890	u64 start = iova;
 891	long pinned;
 892	int ret = 0;
 893
 894	/* Limit the use of memory for bookkeeping */
 895	page_list = (struct page **) __get_free_page(GFP_KERNEL);
 896	if (!page_list)
 897		return -ENOMEM;
 898
 899	if (perm & VHOST_ACCESS_WO)
 900		gup_flags |= FOLL_WRITE;
 901
 902	npages = PFN_UP(size + (iova & ~PAGE_MASK));
 903	if (!npages) {
 904		ret = -EINVAL;
 905		goto free;
 906	}
 907
 908	mmap_read_lock(dev->mm);
 909
 910	lock_limit = PFN_DOWN(rlimit(RLIMIT_MEMLOCK));
 911	if (npages + atomic64_read(&dev->mm->pinned_vm) > lock_limit) {
 912		ret = -ENOMEM;
 913		goto unlock;
 914	}
 915
 916	cur_base = uaddr & PAGE_MASK;
 917	iova &= PAGE_MASK;
 918	nchunks = 0;
 919
 920	while (npages) {
 921		sz2pin = min_t(unsigned long, npages, list_size);
 922		pinned = pin_user_pages(cur_base, sz2pin,
 923					gup_flags, page_list, NULL);
 924		if (sz2pin != pinned) {
 925			if (pinned < 0) {
 926				ret = pinned;
 927			} else {
 928				unpin_user_pages(page_list, pinned);
 929				ret = -ENOMEM;
 930			}
 931			goto out;
 932		}
 933		nchunks++;
 934
 935		if (!last_pfn)
 936			map_pfn = page_to_pfn(page_list[0]);
 937
 938		for (i = 0; i < pinned; i++) {
 939			unsigned long this_pfn = page_to_pfn(page_list[i]);
 940			u64 csize;
 941
 942			if (last_pfn && (this_pfn != last_pfn + 1)) {
 943				/* Pin a contiguous chunk of memory */
 944				csize = PFN_PHYS(last_pfn - map_pfn + 1);
 945				ret = vhost_vdpa_map(v, iotlb, iova, csize,
 946						     PFN_PHYS(map_pfn),
 947						     perm, NULL);
 948				if (ret) {
 949					/*
 950					 * Unpin the pages that are left unmapped
 951					 * from this point on in the current
 952					 * page_list. The remaining outstanding
 953					 * ones which may stride across several
 954					 * chunks will be covered in the common
 955					 * error path subsequently.
 956					 */
 957					unpin_user_pages(&page_list[i],
 958							 pinned - i);
 959					goto out;
 960				}
 961
 962				map_pfn = this_pfn;
 963				iova += csize;
 964				nchunks = 0;
 965			}
 966
 967			last_pfn = this_pfn;
 968		}
 969
 970		cur_base += PFN_PHYS(pinned);
 971		npages -= pinned;
 972	}
 973
 974	/* Pin the rest chunk */
 975	ret = vhost_vdpa_map(v, iotlb, iova, PFN_PHYS(last_pfn - map_pfn + 1),
 976			     PFN_PHYS(map_pfn), perm, NULL);
 977out:
 978	if (ret) {
 979		if (nchunks) {
 980			unsigned long pfn;
 981
 982			/*
 983			 * Unpin the outstanding pages which are yet to be
 984			 * mapped but haven't due to vdpa_map() or
 985			 * pin_user_pages() failure.
 986			 *
 987			 * Mapped pages are accounted in vdpa_map(), hence
 988			 * the corresponding unpinning will be handled by
 989			 * vdpa_unmap().
 990			 */
 991			WARN_ON(!last_pfn);
 992			for (pfn = map_pfn; pfn <= last_pfn; pfn++)
 993				unpin_user_page(pfn_to_page(pfn));
 994		}
 995		vhost_vdpa_unmap(v, iotlb, start, size);
 996	}
 997unlock:
 998	mmap_read_unlock(dev->mm);
 999free:
1000	free_page((unsigned long)page_list);
1001	return ret;
1002
1003}
1004
1005static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
1006					   struct vhost_iotlb *iotlb,
1007					   struct vhost_iotlb_msg *msg)
1008{
1009	struct vdpa_device *vdpa = v->vdpa;
1010
1011	if (msg->iova < v->range.first || !msg->size ||
1012	    msg->iova > U64_MAX - msg->size + 1 ||
1013	    msg->iova + msg->size - 1 > v->range.last)
1014		return -EINVAL;
1015
1016	if (vhost_iotlb_itree_first(iotlb, msg->iova,
1017				    msg->iova + msg->size - 1))
1018		return -EEXIST;
1019
1020	if (vdpa->use_va)
1021		return vhost_vdpa_va_map(v, iotlb, msg->iova, msg->size,
1022					 msg->uaddr, msg->perm);
1023
1024	return vhost_vdpa_pa_map(v, iotlb, msg->iova, msg->size, msg->uaddr,
1025				 msg->perm);
1026}
1027
1028static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev, u32 asid,
1029					struct vhost_iotlb_msg *msg)
1030{
1031	struct vhost_vdpa *v = container_of(dev, struct vhost_vdpa, vdev);
1032	struct vdpa_device *vdpa = v->vdpa;
1033	const struct vdpa_config_ops *ops = vdpa->config;
1034	struct vhost_iotlb *iotlb = NULL;
1035	struct vhost_vdpa_as *as = NULL;
1036	int r = 0;
1037
1038	mutex_lock(&dev->mutex);
1039
1040	r = vhost_dev_check_owner(dev);
1041	if (r)
1042		goto unlock;
1043
1044	if (msg->type == VHOST_IOTLB_UPDATE ||
1045	    msg->type == VHOST_IOTLB_BATCH_BEGIN) {
1046		as = vhost_vdpa_find_alloc_as(v, asid);
1047		if (!as) {
1048			dev_err(&v->dev, "can't find and alloc asid %d\n",
1049				asid);
1050			r = -EINVAL;
1051			goto unlock;
1052		}
1053		iotlb = &as->iotlb;
1054	} else
1055		iotlb = asid_to_iotlb(v, asid);
1056
1057	if ((v->in_batch && v->batch_asid != asid) || !iotlb) {
1058		if (v->in_batch && v->batch_asid != asid) {
1059			dev_info(&v->dev, "batch id %d asid %d\n",
1060				 v->batch_asid, asid);
1061		}
1062		if (!iotlb)
1063			dev_err(&v->dev, "no iotlb for asid %d\n", asid);
1064		r = -EINVAL;
1065		goto unlock;
1066	}
1067
1068	switch (msg->type) {
1069	case VHOST_IOTLB_UPDATE:
1070		r = vhost_vdpa_process_iotlb_update(v, iotlb, msg);
1071		break;
1072	case VHOST_IOTLB_INVALIDATE:
1073		vhost_vdpa_unmap(v, iotlb, msg->iova, msg->size);
1074		break;
1075	case VHOST_IOTLB_BATCH_BEGIN:
1076		v->batch_asid = asid;
1077		v->in_batch = true;
1078		break;
1079	case VHOST_IOTLB_BATCH_END:
1080		if (v->in_batch && ops->set_map)
1081			ops->set_map(vdpa, asid, iotlb);
1082		v->in_batch = false;
1083		if (!iotlb->nmaps)
1084			vhost_vdpa_remove_as(v, asid);
1085		break;
1086	default:
1087		r = -EINVAL;
1088		break;
1089	}
1090unlock:
1091	mutex_unlock(&dev->mutex);
1092
1093	return r;
1094}
1095
1096static ssize_t vhost_vdpa_chr_write_iter(struct kiocb *iocb,
1097					 struct iov_iter *from)
1098{
1099	struct file *file = iocb->ki_filp;
1100	struct vhost_vdpa *v = file->private_data;
1101	struct vhost_dev *dev = &v->vdev;
1102
1103	return vhost_chr_write_iter(dev, from);
1104}
1105
1106static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v)
1107{
1108	struct vdpa_device *vdpa = v->vdpa;
1109	const struct vdpa_config_ops *ops = vdpa->config;
1110	struct device *dma_dev = vdpa_get_dma_dev(vdpa);
1111	struct bus_type *bus;
1112	int ret;
1113
1114	/* Device want to do DMA by itself */
1115	if (ops->set_map || ops->dma_map)
1116		return 0;
1117
1118	bus = dma_dev->bus;
1119	if (!bus)
1120		return -EFAULT;
1121
1122	if (!device_iommu_capable(dma_dev, IOMMU_CAP_CACHE_COHERENCY))
1123		return -ENOTSUPP;
 
1124
1125	v->domain = iommu_domain_alloc(bus);
1126	if (!v->domain)
1127		return -EIO;
 
 
 
1128
1129	ret = iommu_attach_device(v->domain, dma_dev);
1130	if (ret)
1131		goto err_attach;
1132
1133	return 0;
1134
1135err_attach:
1136	iommu_domain_free(v->domain);
 
1137	return ret;
1138}
1139
1140static void vhost_vdpa_free_domain(struct vhost_vdpa *v)
1141{
1142	struct vdpa_device *vdpa = v->vdpa;
1143	struct device *dma_dev = vdpa_get_dma_dev(vdpa);
1144
1145	if (v->domain) {
1146		iommu_detach_device(v->domain, dma_dev);
1147		iommu_domain_free(v->domain);
1148	}
1149
1150	v->domain = NULL;
1151}
1152
1153static void vhost_vdpa_set_iova_range(struct vhost_vdpa *v)
1154{
1155	struct vdpa_iova_range *range = &v->range;
1156	struct vdpa_device *vdpa = v->vdpa;
1157	const struct vdpa_config_ops *ops = vdpa->config;
1158
1159	if (ops->get_iova_range) {
1160		*range = ops->get_iova_range(vdpa);
1161	} else if (v->domain && v->domain->geometry.force_aperture) {
1162		range->first = v->domain->geometry.aperture_start;
1163		range->last = v->domain->geometry.aperture_end;
1164	} else {
1165		range->first = 0;
1166		range->last = ULLONG_MAX;
1167	}
1168}
1169
1170static void vhost_vdpa_cleanup(struct vhost_vdpa *v)
1171{
1172	struct vhost_vdpa_as *as;
1173	u32 asid;
1174
1175	for (asid = 0; asid < v->vdpa->nas; asid++) {
1176		as = asid_to_as(v, asid);
1177		if (as)
1178			vhost_vdpa_remove_as(v, asid);
1179	}
1180
 
1181	vhost_dev_cleanup(&v->vdev);
1182	kfree(v->vdev.vqs);
 
1183}
1184
1185static int vhost_vdpa_open(struct inode *inode, struct file *filep)
1186{
1187	struct vhost_vdpa *v;
1188	struct vhost_dev *dev;
1189	struct vhost_virtqueue **vqs;
1190	int r, opened;
1191	u32 i, nvqs;
1192
1193	v = container_of(inode->i_cdev, struct vhost_vdpa, cdev);
1194
1195	opened = atomic_cmpxchg(&v->opened, 0, 1);
1196	if (opened)
1197		return -EBUSY;
1198
1199	nvqs = v->nvqs;
1200	r = vhost_vdpa_reset(v);
1201	if (r)
1202		goto err;
1203
1204	vqs = kmalloc_array(nvqs, sizeof(*vqs), GFP_KERNEL);
1205	if (!vqs) {
1206		r = -ENOMEM;
1207		goto err;
1208	}
1209
1210	dev = &v->vdev;
1211	for (i = 0; i < nvqs; i++) {
1212		vqs[i] = &v->vqs[i];
1213		vqs[i]->handle_kick = handle_vq_kick;
 
1214	}
1215	vhost_dev_init(dev, vqs, nvqs, 0, 0, 0, false,
1216		       vhost_vdpa_process_iotlb_msg);
1217
1218	r = vhost_vdpa_alloc_domain(v);
1219	if (r)
1220		goto err_alloc_domain;
1221
1222	vhost_vdpa_set_iova_range(v);
1223
1224	filep->private_data = v;
1225
1226	return 0;
1227
1228err_alloc_domain:
1229	vhost_vdpa_cleanup(v);
1230err:
1231	atomic_dec(&v->opened);
1232	return r;
1233}
1234
1235static void vhost_vdpa_clean_irq(struct vhost_vdpa *v)
1236{
1237	u32 i;
1238
1239	for (i = 0; i < v->nvqs; i++)
1240		vhost_vdpa_unsetup_vq_irq(v, i);
1241}
1242
1243static int vhost_vdpa_release(struct inode *inode, struct file *filep)
1244{
1245	struct vhost_vdpa *v = filep->private_data;
1246	struct vhost_dev *d = &v->vdev;
1247
1248	mutex_lock(&d->mutex);
1249	filep->private_data = NULL;
1250	vhost_vdpa_clean_irq(v);
1251	vhost_vdpa_reset(v);
1252	vhost_dev_stop(&v->vdev);
1253	vhost_vdpa_free_domain(v);
1254	vhost_vdpa_config_put(v);
1255	vhost_vdpa_cleanup(v);
1256	mutex_unlock(&d->mutex);
1257
1258	atomic_dec(&v->opened);
1259	complete(&v->completion);
1260
1261	return 0;
1262}
1263
1264#ifdef CONFIG_MMU
1265static vm_fault_t vhost_vdpa_fault(struct vm_fault *vmf)
1266{
1267	struct vhost_vdpa *v = vmf->vma->vm_file->private_data;
1268	struct vdpa_device *vdpa = v->vdpa;
1269	const struct vdpa_config_ops *ops = vdpa->config;
1270	struct vdpa_notification_area notify;
1271	struct vm_area_struct *vma = vmf->vma;
1272	u16 index = vma->vm_pgoff;
1273
1274	notify = ops->get_vq_notification(vdpa, index);
1275
1276	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
1277	if (remap_pfn_range(vma, vmf->address & PAGE_MASK,
1278			    PFN_DOWN(notify.addr), PAGE_SIZE,
1279			    vma->vm_page_prot))
1280		return VM_FAULT_SIGBUS;
1281
1282	return VM_FAULT_NOPAGE;
1283}
1284
1285static const struct vm_operations_struct vhost_vdpa_vm_ops = {
1286	.fault = vhost_vdpa_fault,
1287};
1288
1289static int vhost_vdpa_mmap(struct file *file, struct vm_area_struct *vma)
1290{
1291	struct vhost_vdpa *v = vma->vm_file->private_data;
1292	struct vdpa_device *vdpa = v->vdpa;
1293	const struct vdpa_config_ops *ops = vdpa->config;
1294	struct vdpa_notification_area notify;
1295	unsigned long index = vma->vm_pgoff;
1296
1297	if (vma->vm_end - vma->vm_start != PAGE_SIZE)
1298		return -EINVAL;
1299	if ((vma->vm_flags & VM_SHARED) == 0)
1300		return -EINVAL;
1301	if (vma->vm_flags & VM_READ)
1302		return -EINVAL;
1303	if (index > 65535)
1304		return -EINVAL;
1305	if (!ops->get_vq_notification)
1306		return -ENOTSUPP;
1307
1308	/* To be safe and easily modelled by userspace, We only
1309	 * support the doorbell which sits on the page boundary and
1310	 * does not share the page with other registers.
1311	 */
1312	notify = ops->get_vq_notification(vdpa, index);
1313	if (notify.addr & (PAGE_SIZE - 1))
1314		return -EINVAL;
1315	if (vma->vm_end - vma->vm_start != notify.size)
1316		return -ENOTSUPP;
1317
1318	vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP;
1319	vma->vm_ops = &vhost_vdpa_vm_ops;
1320	return 0;
1321}
1322#endif /* CONFIG_MMU */
1323
1324static const struct file_operations vhost_vdpa_fops = {
1325	.owner		= THIS_MODULE,
1326	.open		= vhost_vdpa_open,
1327	.release	= vhost_vdpa_release,
1328	.write_iter	= vhost_vdpa_chr_write_iter,
1329	.unlocked_ioctl	= vhost_vdpa_unlocked_ioctl,
1330#ifdef CONFIG_MMU
1331	.mmap		= vhost_vdpa_mmap,
1332#endif /* CONFIG_MMU */
1333	.compat_ioctl	= compat_ptr_ioctl,
1334};
1335
1336static void vhost_vdpa_release_dev(struct device *device)
1337{
1338	struct vhost_vdpa *v =
1339	       container_of(device, struct vhost_vdpa, dev);
1340
1341	ida_simple_remove(&vhost_vdpa_ida, v->minor);
1342	kfree(v->vqs);
1343	kfree(v);
1344}
1345
1346static int vhost_vdpa_probe(struct vdpa_device *vdpa)
1347{
1348	const struct vdpa_config_ops *ops = vdpa->config;
1349	struct vhost_vdpa *v;
1350	int minor;
1351	int i, r;
1352
1353	/* We can't support platform IOMMU device with more than 1
1354	 * group or as
1355	 */
1356	if (!ops->set_map && !ops->dma_map &&
1357	    (vdpa->ngroups > 1 || vdpa->nas > 1))
1358		return -EOPNOTSUPP;
1359
1360	v = kzalloc(sizeof(*v), GFP_KERNEL | __GFP_RETRY_MAYFAIL);
1361	if (!v)
1362		return -ENOMEM;
1363
1364	minor = ida_simple_get(&vhost_vdpa_ida, 0,
1365			       VHOST_VDPA_DEV_MAX, GFP_KERNEL);
1366	if (minor < 0) {
1367		kfree(v);
1368		return minor;
1369	}
1370
1371	atomic_set(&v->opened, 0);
1372	v->minor = minor;
1373	v->vdpa = vdpa;
1374	v->nvqs = vdpa->nvqs;
1375	v->virtio_id = ops->get_device_id(vdpa);
1376
1377	device_initialize(&v->dev);
1378	v->dev.release = vhost_vdpa_release_dev;
1379	v->dev.parent = &vdpa->dev;
1380	v->dev.devt = MKDEV(MAJOR(vhost_vdpa_major), minor);
1381	v->vqs = kmalloc_array(v->nvqs, sizeof(struct vhost_virtqueue),
1382			       GFP_KERNEL);
1383	if (!v->vqs) {
1384		r = -ENOMEM;
1385		goto err;
1386	}
1387
1388	r = dev_set_name(&v->dev, "vhost-vdpa-%u", minor);
1389	if (r)
1390		goto err;
1391
1392	cdev_init(&v->cdev, &vhost_vdpa_fops);
1393	v->cdev.owner = THIS_MODULE;
1394
1395	r = cdev_device_add(&v->cdev, &v->dev);
1396	if (r)
1397		goto err;
1398
1399	init_completion(&v->completion);
1400	vdpa_set_drvdata(vdpa, v);
1401
1402	for (i = 0; i < VHOST_VDPA_IOTLB_BUCKETS; i++)
1403		INIT_HLIST_HEAD(&v->as[i]);
1404
1405	return 0;
1406
1407err:
1408	put_device(&v->dev);
1409	ida_simple_remove(&vhost_vdpa_ida, v->minor);
1410	return r;
1411}
1412
1413static void vhost_vdpa_remove(struct vdpa_device *vdpa)
1414{
1415	struct vhost_vdpa *v = vdpa_get_drvdata(vdpa);
1416	int opened;
1417
1418	cdev_device_del(&v->cdev, &v->dev);
1419
1420	do {
1421		opened = atomic_cmpxchg(&v->opened, 0, 1);
1422		if (!opened)
1423			break;
1424		wait_for_completion(&v->completion);
1425	} while (1);
1426
1427	put_device(&v->dev);
1428}
1429
1430static struct vdpa_driver vhost_vdpa_driver = {
1431	.driver = {
1432		.name	= "vhost_vdpa",
1433	},
1434	.probe	= vhost_vdpa_probe,
1435	.remove	= vhost_vdpa_remove,
1436};
1437
1438static int __init vhost_vdpa_init(void)
1439{
1440	int r;
1441
1442	r = alloc_chrdev_region(&vhost_vdpa_major, 0, VHOST_VDPA_DEV_MAX,
1443				"vhost-vdpa");
1444	if (r)
1445		goto err_alloc_chrdev;
1446
1447	r = vdpa_register_driver(&vhost_vdpa_driver);
1448	if (r)
1449		goto err_vdpa_register_driver;
1450
1451	return 0;
1452
1453err_vdpa_register_driver:
1454	unregister_chrdev_region(vhost_vdpa_major, VHOST_VDPA_DEV_MAX);
1455err_alloc_chrdev:
1456	return r;
1457}
1458module_init(vhost_vdpa_init);
1459
1460static void __exit vhost_vdpa_exit(void)
1461{
1462	vdpa_unregister_driver(&vhost_vdpa_driver);
1463	unregister_chrdev_region(vhost_vdpa_major, VHOST_VDPA_DEV_MAX);
1464}
1465module_exit(vhost_vdpa_exit);
1466
1467MODULE_VERSION("0.0.1");
1468MODULE_LICENSE("GPL v2");
1469MODULE_AUTHOR("Intel Corporation");
1470MODULE_DESCRIPTION("vDPA-based vhost backend for virtio");