Linux Audio

Check our new training course

Loading...
v6.8
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Copyright (C) 2018-2020 Intel Corporation.
   4 * Copyright (C) 2020 Red Hat, Inc.
   5 *
   6 * Author: Tiwei Bie <tiwei.bie@intel.com>
   7 *         Jason Wang <jasowang@redhat.com>
   8 *
   9 * Thanks Michael S. Tsirkin for the valuable comments and
  10 * suggestions.  And thanks to Cunming Liang and Zhihong Wang for all
  11 * their supports.
  12 */
  13
  14#include <linux/kernel.h>
  15#include <linux/module.h>
  16#include <linux/cdev.h>
  17#include <linux/device.h>
  18#include <linux/mm.h>
  19#include <linux/slab.h>
  20#include <linux/iommu.h>
  21#include <linux/uuid.h>
  22#include <linux/vdpa.h>
  23#include <linux/nospec.h>
  24#include <linux/vhost.h>
  25
  26#include "vhost.h"
  27
  28enum {
  29	VHOST_VDPA_BACKEND_FEATURES =
  30	(1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2) |
  31	(1ULL << VHOST_BACKEND_F_IOTLB_BATCH) |
  32	(1ULL << VHOST_BACKEND_F_IOTLB_ASID),
  33};
  34
  35#define VHOST_VDPA_DEV_MAX (1U << MINORBITS)
  36
  37#define VHOST_VDPA_IOTLB_BUCKETS 16
  38
  39struct vhost_vdpa_as {
  40	struct hlist_node hash_link;
  41	struct vhost_iotlb iotlb;
  42	u32 id;
  43};
  44
  45struct vhost_vdpa {
  46	struct vhost_dev vdev;
  47	struct iommu_domain *domain;
  48	struct vhost_virtqueue *vqs;
  49	struct completion completion;
  50	struct vdpa_device *vdpa;
  51	struct hlist_head as[VHOST_VDPA_IOTLB_BUCKETS];
  52	struct device dev;
  53	struct cdev cdev;
  54	atomic_t opened;
  55	u32 nvqs;
  56	int virtio_id;
  57	int minor;
  58	struct eventfd_ctx *config_ctx;
  59	int in_batch;
  60	struct vdpa_iova_range range;
  61	u32 batch_asid;
  62	bool suspended;
  63};
  64
  65static DEFINE_IDA(vhost_vdpa_ida);
  66
  67static dev_t vhost_vdpa_major;
  68
  69static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v,
  70				   struct vhost_iotlb *iotlb, u64 start,
  71				   u64 last, u32 asid);
  72
  73static inline u32 iotlb_to_asid(struct vhost_iotlb *iotlb)
  74{
  75	struct vhost_vdpa_as *as = container_of(iotlb, struct
  76						vhost_vdpa_as, iotlb);
  77	return as->id;
  78}
  79
  80static struct vhost_vdpa_as *asid_to_as(struct vhost_vdpa *v, u32 asid)
  81{
  82	struct hlist_head *head = &v->as[asid % VHOST_VDPA_IOTLB_BUCKETS];
  83	struct vhost_vdpa_as *as;
  84
  85	hlist_for_each_entry(as, head, hash_link)
  86		if (as->id == asid)
  87			return as;
  88
  89	return NULL;
  90}
  91
  92static struct vhost_iotlb *asid_to_iotlb(struct vhost_vdpa *v, u32 asid)
  93{
  94	struct vhost_vdpa_as *as = asid_to_as(v, asid);
  95
  96	if (!as)
  97		return NULL;
  98
  99	return &as->iotlb;
 100}
 101
 102static struct vhost_vdpa_as *vhost_vdpa_alloc_as(struct vhost_vdpa *v, u32 asid)
 103{
 104	struct hlist_head *head = &v->as[asid % VHOST_VDPA_IOTLB_BUCKETS];
 105	struct vhost_vdpa_as *as;
 106
 107	if (asid_to_as(v, asid))
 108		return NULL;
 109
 110	if (asid >= v->vdpa->nas)
 111		return NULL;
 112
 113	as = kmalloc(sizeof(*as), GFP_KERNEL);
 114	if (!as)
 115		return NULL;
 116
 117	vhost_iotlb_init(&as->iotlb, 0, 0);
 118	as->id = asid;
 119	hlist_add_head(&as->hash_link, head);
 120
 121	return as;
 122}
 123
 124static struct vhost_vdpa_as *vhost_vdpa_find_alloc_as(struct vhost_vdpa *v,
 125						      u32 asid)
 126{
 127	struct vhost_vdpa_as *as = asid_to_as(v, asid);
 128
 129	if (as)
 130		return as;
 131
 132	return vhost_vdpa_alloc_as(v, asid);
 133}
 134
 135static void vhost_vdpa_reset_map(struct vhost_vdpa *v, u32 asid)
 136{
 137	struct vdpa_device *vdpa = v->vdpa;
 138	const struct vdpa_config_ops *ops = vdpa->config;
 139
 140	if (ops->reset_map)
 141		ops->reset_map(vdpa, asid);
 142}
 143
 144static int vhost_vdpa_remove_as(struct vhost_vdpa *v, u32 asid)
 145{
 146	struct vhost_vdpa_as *as = asid_to_as(v, asid);
 147
 148	if (!as)
 149		return -EINVAL;
 150
 151	hlist_del(&as->hash_link);
 152	vhost_vdpa_iotlb_unmap(v, &as->iotlb, 0ULL, 0ULL - 1, asid);
 153	/*
 154	 * Devices with vendor specific IOMMU may need to restore
 155	 * iotlb to the initial or default state, which cannot be
 156	 * cleaned up in the all range unmap call above. Give them
 157	 * a chance to clean up or reset the map to the desired
 158	 * state.
 159	 */
 160	vhost_vdpa_reset_map(v, asid);
 161	kfree(as);
 162
 163	return 0;
 164}
 165
 166static void handle_vq_kick(struct vhost_work *work)
 167{
 168	struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
 169						  poll.work);
 170	struct vhost_vdpa *v = container_of(vq->dev, struct vhost_vdpa, vdev);
 171	const struct vdpa_config_ops *ops = v->vdpa->config;
 172
 173	ops->kick_vq(v->vdpa, vq - v->vqs);
 174}
 175
 176static irqreturn_t vhost_vdpa_virtqueue_cb(void *private)
 177{
 178	struct vhost_virtqueue *vq = private;
 179	struct eventfd_ctx *call_ctx = vq->call_ctx.ctx;
 180
 181	if (call_ctx)
 182		eventfd_signal(call_ctx);
 183
 184	return IRQ_HANDLED;
 185}
 186
 187static irqreturn_t vhost_vdpa_config_cb(void *private)
 188{
 189	struct vhost_vdpa *v = private;
 190	struct eventfd_ctx *config_ctx = v->config_ctx;
 191
 192	if (config_ctx)
 193		eventfd_signal(config_ctx);
 194
 195	return IRQ_HANDLED;
 196}
 197
 198static void vhost_vdpa_setup_vq_irq(struct vhost_vdpa *v, u16 qid)
 199{
 200	struct vhost_virtqueue *vq = &v->vqs[qid];
 201	const struct vdpa_config_ops *ops = v->vdpa->config;
 202	struct vdpa_device *vdpa = v->vdpa;
 203	int ret, irq;
 204
 205	if (!ops->get_vq_irq)
 206		return;
 207
 208	irq = ops->get_vq_irq(vdpa, qid);
 209	if (irq < 0)
 210		return;
 211
 212	irq_bypass_unregister_producer(&vq->call_ctx.producer);
 213	if (!vq->call_ctx.ctx)
 214		return;
 215
 216	vq->call_ctx.producer.token = vq->call_ctx.ctx;
 217	vq->call_ctx.producer.irq = irq;
 218	ret = irq_bypass_register_producer(&vq->call_ctx.producer);
 219	if (unlikely(ret))
 220		dev_info(&v->dev, "vq %u, irq bypass producer (token %p) registration fails, ret =  %d\n",
 221			 qid, vq->call_ctx.producer.token, ret);
 222}
 223
 224static void vhost_vdpa_unsetup_vq_irq(struct vhost_vdpa *v, u16 qid)
 225{
 226	struct vhost_virtqueue *vq = &v->vqs[qid];
 227
 228	irq_bypass_unregister_producer(&vq->call_ctx.producer);
 229}
 230
 231static int _compat_vdpa_reset(struct vhost_vdpa *v)
 232{
 233	struct vdpa_device *vdpa = v->vdpa;
 234	u32 flags = 0;
 235
 236	v->suspended = false;
 237
 238	if (v->vdev.vqs) {
 239		flags |= !vhost_backend_has_feature(v->vdev.vqs[0],
 240						    VHOST_BACKEND_F_IOTLB_PERSIST) ?
 241			 VDPA_RESET_F_CLEAN_MAP : 0;
 242	}
 243
 244	return vdpa_reset(vdpa, flags);
 245}
 246
 247static int vhost_vdpa_reset(struct vhost_vdpa *v)
 248{
 249	v->in_batch = 0;
 250	return _compat_vdpa_reset(v);
 251}
 252
 253static long vhost_vdpa_bind_mm(struct vhost_vdpa *v)
 254{
 255	struct vdpa_device *vdpa = v->vdpa;
 256	const struct vdpa_config_ops *ops = vdpa->config;
 257
 258	if (!vdpa->use_va || !ops->bind_mm)
 259		return 0;
 260
 261	return ops->bind_mm(vdpa, v->vdev.mm);
 262}
 263
 264static void vhost_vdpa_unbind_mm(struct vhost_vdpa *v)
 265{
 266	struct vdpa_device *vdpa = v->vdpa;
 267	const struct vdpa_config_ops *ops = vdpa->config;
 268
 269	if (!vdpa->use_va || !ops->unbind_mm)
 270		return;
 271
 272	ops->unbind_mm(vdpa);
 273}
 274
 275static long vhost_vdpa_get_device_id(struct vhost_vdpa *v, u8 __user *argp)
 276{
 277	struct vdpa_device *vdpa = v->vdpa;
 278	const struct vdpa_config_ops *ops = vdpa->config;
 279	u32 device_id;
 280
 281	device_id = ops->get_device_id(vdpa);
 282
 283	if (copy_to_user(argp, &device_id, sizeof(device_id)))
 284		return -EFAULT;
 285
 286	return 0;
 287}
 288
 289static long vhost_vdpa_get_status(struct vhost_vdpa *v, u8 __user *statusp)
 290{
 291	struct vdpa_device *vdpa = v->vdpa;
 292	const struct vdpa_config_ops *ops = vdpa->config;
 293	u8 status;
 294
 295	status = ops->get_status(vdpa);
 296
 297	if (copy_to_user(statusp, &status, sizeof(status)))
 298		return -EFAULT;
 299
 300	return 0;
 301}
 302
 303static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp)
 304{
 305	struct vdpa_device *vdpa = v->vdpa;
 306	const struct vdpa_config_ops *ops = vdpa->config;
 307	u8 status, status_old;
 308	u32 nvqs = v->nvqs;
 309	int ret;
 310	u16 i;
 311
 312	if (copy_from_user(&status, statusp, sizeof(status)))
 313		return -EFAULT;
 314
 315	status_old = ops->get_status(vdpa);
 316
 317	/*
 318	 * Userspace shouldn't remove status bits unless reset the
 319	 * status to 0.
 320	 */
 321	if (status != 0 && (status_old & ~status) != 0)
 322		return -EINVAL;
 323
 324	if ((status_old & VIRTIO_CONFIG_S_DRIVER_OK) && !(status & VIRTIO_CONFIG_S_DRIVER_OK))
 325		for (i = 0; i < nvqs; i++)
 326			vhost_vdpa_unsetup_vq_irq(v, i);
 327
 328	if (status == 0) {
 329		ret = _compat_vdpa_reset(v);
 330		if (ret)
 331			return ret;
 332	} else
 333		vdpa_set_status(vdpa, status);
 334
 335	if ((status & VIRTIO_CONFIG_S_DRIVER_OK) && !(status_old & VIRTIO_CONFIG_S_DRIVER_OK))
 336		for (i = 0; i < nvqs; i++)
 337			vhost_vdpa_setup_vq_irq(v, i);
 338
 
 
 
 
 339	return 0;
 340}
 341
 342static int vhost_vdpa_config_validate(struct vhost_vdpa *v,
 343				      struct vhost_vdpa_config *c)
 344{
 345	struct vdpa_device *vdpa = v->vdpa;
 346	size_t size = vdpa->config->get_config_size(vdpa);
 347
 348	if (c->len == 0 || c->off > size)
 349		return -EINVAL;
 350
 351	if (c->len > size - c->off)
 352		return -E2BIG;
 353
 354	return 0;
 355}
 356
 357static long vhost_vdpa_get_config(struct vhost_vdpa *v,
 358				  struct vhost_vdpa_config __user *c)
 359{
 360	struct vdpa_device *vdpa = v->vdpa;
 361	struct vhost_vdpa_config config;
 362	unsigned long size = offsetof(struct vhost_vdpa_config, buf);
 363	u8 *buf;
 364
 365	if (copy_from_user(&config, c, size))
 366		return -EFAULT;
 367	if (vhost_vdpa_config_validate(v, &config))
 368		return -EINVAL;
 369	buf = kvzalloc(config.len, GFP_KERNEL);
 370	if (!buf)
 371		return -ENOMEM;
 372
 373	vdpa_get_config(vdpa, config.off, buf, config.len);
 374
 375	if (copy_to_user(c->buf, buf, config.len)) {
 376		kvfree(buf);
 377		return -EFAULT;
 378	}
 379
 380	kvfree(buf);
 381	return 0;
 382}
 383
 384static long vhost_vdpa_set_config(struct vhost_vdpa *v,
 385				  struct vhost_vdpa_config __user *c)
 386{
 387	struct vdpa_device *vdpa = v->vdpa;
 
 388	struct vhost_vdpa_config config;
 389	unsigned long size = offsetof(struct vhost_vdpa_config, buf);
 390	u8 *buf;
 391
 392	if (copy_from_user(&config, c, size))
 393		return -EFAULT;
 394	if (vhost_vdpa_config_validate(v, &config))
 395		return -EINVAL;
 396
 397	buf = vmemdup_user(c->buf, config.len);
 398	if (IS_ERR(buf))
 399		return PTR_ERR(buf);
 400
 401	vdpa_set_config(vdpa, config.off, buf, config.len);
 402
 403	kvfree(buf);
 404	return 0;
 405}
 406
 407static bool vhost_vdpa_can_suspend(const struct vhost_vdpa *v)
 408{
 409	struct vdpa_device *vdpa = v->vdpa;
 410	const struct vdpa_config_ops *ops = vdpa->config;
 411
 412	return ops->suspend;
 413}
 414
 415static bool vhost_vdpa_can_resume(const struct vhost_vdpa *v)
 416{
 417	struct vdpa_device *vdpa = v->vdpa;
 418	const struct vdpa_config_ops *ops = vdpa->config;
 419
 420	return ops->resume;
 421}
 422
 423static bool vhost_vdpa_has_desc_group(const struct vhost_vdpa *v)
 424{
 425	struct vdpa_device *vdpa = v->vdpa;
 426	const struct vdpa_config_ops *ops = vdpa->config;
 427
 428	return ops->get_vq_desc_group;
 429}
 430
 431static long vhost_vdpa_get_features(struct vhost_vdpa *v, u64 __user *featurep)
 432{
 433	struct vdpa_device *vdpa = v->vdpa;
 434	const struct vdpa_config_ops *ops = vdpa->config;
 435	u64 features;
 436
 437	features = ops->get_device_features(vdpa);
 438
 439	if (copy_to_user(featurep, &features, sizeof(features)))
 440		return -EFAULT;
 441
 442	return 0;
 443}
 444
 445static u64 vhost_vdpa_get_backend_features(const struct vhost_vdpa *v)
 446{
 447	struct vdpa_device *vdpa = v->vdpa;
 448	const struct vdpa_config_ops *ops = vdpa->config;
 449
 450	if (!ops->get_backend_features)
 451		return 0;
 452	else
 453		return ops->get_backend_features(vdpa);
 454}
 455
 456static bool vhost_vdpa_has_persistent_map(const struct vhost_vdpa *v)
 457{
 458	struct vdpa_device *vdpa = v->vdpa;
 459	const struct vdpa_config_ops *ops = vdpa->config;
 460
 461	return (!ops->set_map && !ops->dma_map) || ops->reset_map ||
 462	       vhost_vdpa_get_backend_features(v) & BIT_ULL(VHOST_BACKEND_F_IOTLB_PERSIST);
 463}
 464
 465static long vhost_vdpa_set_features(struct vhost_vdpa *v, u64 __user *featurep)
 466{
 467	struct vdpa_device *vdpa = v->vdpa;
 468	const struct vdpa_config_ops *ops = vdpa->config;
 469	struct vhost_dev *d = &v->vdev;
 470	u64 actual_features;
 471	u64 features;
 472	int i;
 473
 474	/*
 475	 * It's not allowed to change the features after they have
 476	 * been negotiated.
 477	 */
 478	if (ops->get_status(vdpa) & VIRTIO_CONFIG_S_FEATURES_OK)
 479		return -EBUSY;
 480
 481	if (copy_from_user(&features, featurep, sizeof(features)))
 482		return -EFAULT;
 483
 484	if (vdpa_set_features(vdpa, features))
 485		return -EINVAL;
 486
 487	/* let the vqs know what has been configured */
 488	actual_features = ops->get_driver_features(vdpa);
 489	for (i = 0; i < d->nvqs; ++i) {
 490		struct vhost_virtqueue *vq = d->vqs[i];
 491
 492		mutex_lock(&vq->mutex);
 493		vq->acked_features = actual_features;
 494		mutex_unlock(&vq->mutex);
 495	}
 496
 497	return 0;
 498}
 499
 500static long vhost_vdpa_get_vring_num(struct vhost_vdpa *v, u16 __user *argp)
 501{
 502	struct vdpa_device *vdpa = v->vdpa;
 503	const struct vdpa_config_ops *ops = vdpa->config;
 504	u16 num;
 505
 506	num = ops->get_vq_num_max(vdpa);
 507
 508	if (copy_to_user(argp, &num, sizeof(num)))
 509		return -EFAULT;
 510
 511	return 0;
 512}
 513
 514static void vhost_vdpa_config_put(struct vhost_vdpa *v)
 515{
 516	if (v->config_ctx) {
 517		eventfd_ctx_put(v->config_ctx);
 518		v->config_ctx = NULL;
 519	}
 520}
 521
 522static long vhost_vdpa_set_config_call(struct vhost_vdpa *v, u32 __user *argp)
 523{
 524	struct vdpa_callback cb;
 525	int fd;
 526	struct eventfd_ctx *ctx;
 527
 528	cb.callback = vhost_vdpa_config_cb;
 529	cb.private = v;
 530	if (copy_from_user(&fd, argp, sizeof(fd)))
 531		return  -EFAULT;
 532
 533	ctx = fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(fd);
 534	swap(ctx, v->config_ctx);
 535
 536	if (!IS_ERR_OR_NULL(ctx))
 537		eventfd_ctx_put(ctx);
 538
 539	if (IS_ERR(v->config_ctx)) {
 540		long ret = PTR_ERR(v->config_ctx);
 541
 542		v->config_ctx = NULL;
 543		return ret;
 544	}
 545
 546	v->vdpa->config->set_config_cb(v->vdpa, &cb);
 547
 548	return 0;
 549}
 550
 551static long vhost_vdpa_get_iova_range(struct vhost_vdpa *v, u32 __user *argp)
 552{
 553	struct vhost_vdpa_iova_range range = {
 554		.first = v->range.first,
 555		.last = v->range.last,
 556	};
 557
 558	if (copy_to_user(argp, &range, sizeof(range)))
 559		return -EFAULT;
 560	return 0;
 561}
 562
 563static long vhost_vdpa_get_config_size(struct vhost_vdpa *v, u32 __user *argp)
 564{
 565	struct vdpa_device *vdpa = v->vdpa;
 566	const struct vdpa_config_ops *ops = vdpa->config;
 567	u32 size;
 568
 569	size = ops->get_config_size(vdpa);
 570
 571	if (copy_to_user(argp, &size, sizeof(size)))
 572		return -EFAULT;
 573
 574	return 0;
 575}
 576
 577static long vhost_vdpa_get_vqs_count(struct vhost_vdpa *v, u32 __user *argp)
 578{
 579	struct vdpa_device *vdpa = v->vdpa;
 580
 581	if (copy_to_user(argp, &vdpa->nvqs, sizeof(vdpa->nvqs)))
 582		return -EFAULT;
 583
 584	return 0;
 585}
 586
 587/* After a successful return of ioctl the device must not process more
 588 * virtqueue descriptors. The device can answer to read or writes of config
 589 * fields as if it were not suspended. In particular, writing to "queue_enable"
 590 * with a value of 1 will not make the device start processing buffers.
 591 */
 592static long vhost_vdpa_suspend(struct vhost_vdpa *v)
 593{
 594	struct vdpa_device *vdpa = v->vdpa;
 595	const struct vdpa_config_ops *ops = vdpa->config;
 596	int ret;
 597
 598	if (!ops->suspend)
 599		return -EOPNOTSUPP;
 600
 601	ret = ops->suspend(vdpa);
 602	if (!ret)
 603		v->suspended = true;
 604
 605	return ret;
 606}
 607
 608/* After a successful return of this ioctl the device resumes processing
 609 * virtqueue descriptors. The device becomes fully operational the same way it
 610 * was before it was suspended.
 611 */
 612static long vhost_vdpa_resume(struct vhost_vdpa *v)
 613{
 614	struct vdpa_device *vdpa = v->vdpa;
 615	const struct vdpa_config_ops *ops = vdpa->config;
 616	int ret;
 617
 618	if (!ops->resume)
 619		return -EOPNOTSUPP;
 620
 621	ret = ops->resume(vdpa);
 622	if (!ret)
 623		v->suspended = false;
 624
 625	return ret;
 626}
 627
 628static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
 629				   void __user *argp)
 630{
 631	struct vdpa_device *vdpa = v->vdpa;
 632	const struct vdpa_config_ops *ops = vdpa->config;
 633	struct vdpa_vq_state vq_state;
 634	struct vdpa_callback cb;
 635	struct vhost_virtqueue *vq;
 636	struct vhost_vring_state s;
 637	u32 idx;
 638	long r;
 639
 640	r = get_user(idx, (u32 __user *)argp);
 641	if (r < 0)
 642		return r;
 643
 644	if (idx >= v->nvqs)
 645		return -ENOBUFS;
 646
 647	idx = array_index_nospec(idx, v->nvqs);
 648	vq = &v->vqs[idx];
 649
 650	switch (cmd) {
 651	case VHOST_VDPA_SET_VRING_ENABLE:
 652		if (copy_from_user(&s, argp, sizeof(s)))
 653			return -EFAULT;
 654		ops->set_vq_ready(vdpa, idx, s.num);
 655		return 0;
 656	case VHOST_VDPA_GET_VRING_GROUP:
 657		if (!ops->get_vq_group)
 658			return -EOPNOTSUPP;
 659		s.index = idx;
 660		s.num = ops->get_vq_group(vdpa, idx);
 661		if (s.num >= vdpa->ngroups)
 662			return -EIO;
 663		else if (copy_to_user(argp, &s, sizeof(s)))
 664			return -EFAULT;
 665		return 0;
 666	case VHOST_VDPA_GET_VRING_DESC_GROUP:
 667		if (!vhost_vdpa_has_desc_group(v))
 668			return -EOPNOTSUPP;
 669		s.index = idx;
 670		s.num = ops->get_vq_desc_group(vdpa, idx);
 671		if (s.num >= vdpa->ngroups)
 672			return -EIO;
 673		else if (copy_to_user(argp, &s, sizeof(s)))
 674			return -EFAULT;
 675		return 0;
 676	case VHOST_VDPA_SET_GROUP_ASID:
 677		if (copy_from_user(&s, argp, sizeof(s)))
 678			return -EFAULT;
 679		if (s.num >= vdpa->nas)
 680			return -EINVAL;
 681		if (!ops->set_group_asid)
 682			return -EOPNOTSUPP;
 683		return ops->set_group_asid(vdpa, idx, s.num);
 684	case VHOST_GET_VRING_BASE:
 685		r = ops->get_vq_state(v->vdpa, idx, &vq_state);
 686		if (r)
 687			return r;
 688
 689		if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) {
 690			vq->last_avail_idx = vq_state.packed.last_avail_idx |
 691					     (vq_state.packed.last_avail_counter << 15);
 692			vq->last_used_idx = vq_state.packed.last_used_idx |
 693					    (vq_state.packed.last_used_counter << 15);
 694		} else {
 695			vq->last_avail_idx = vq_state.split.avail_index;
 696		}
 697		break;
 698	}
 699
 700	r = vhost_vring_ioctl(&v->vdev, cmd, argp);
 701	if (r)
 702		return r;
 703
 704	switch (cmd) {
 705	case VHOST_SET_VRING_ADDR:
 706		if ((ops->get_status(vdpa) & VIRTIO_CONFIG_S_DRIVER_OK) && !v->suspended)
 707			return -EINVAL;
 708
 709		if (ops->set_vq_address(vdpa, idx,
 710					(u64)(uintptr_t)vq->desc,
 711					(u64)(uintptr_t)vq->avail,
 712					(u64)(uintptr_t)vq->used))
 713			r = -EINVAL;
 714		break;
 715
 716	case VHOST_SET_VRING_BASE:
 717		if ((ops->get_status(vdpa) & VIRTIO_CONFIG_S_DRIVER_OK) && !v->suspended)
 718			return -EINVAL;
 719
 720		if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) {
 721			vq_state.packed.last_avail_idx = vq->last_avail_idx & 0x7fff;
 722			vq_state.packed.last_avail_counter = !!(vq->last_avail_idx & 0x8000);
 723			vq_state.packed.last_used_idx = vq->last_used_idx & 0x7fff;
 724			vq_state.packed.last_used_counter = !!(vq->last_used_idx & 0x8000);
 725		} else {
 726			vq_state.split.avail_index = vq->last_avail_idx;
 727		}
 728		r = ops->set_vq_state(vdpa, idx, &vq_state);
 729		break;
 730
 731	case VHOST_SET_VRING_CALL:
 732		if (vq->call_ctx.ctx) {
 733			cb.callback = vhost_vdpa_virtqueue_cb;
 734			cb.private = vq;
 735			cb.trigger = vq->call_ctx.ctx;
 736		} else {
 737			cb.callback = NULL;
 738			cb.private = NULL;
 739			cb.trigger = NULL;
 740		}
 741		ops->set_vq_cb(vdpa, idx, &cb);
 742		vhost_vdpa_setup_vq_irq(v, idx);
 743		break;
 744
 745	case VHOST_SET_VRING_NUM:
 746		ops->set_vq_num(vdpa, idx, vq->num);
 747		break;
 748	}
 749
 750	return r;
 751}
 752
 753static long vhost_vdpa_unlocked_ioctl(struct file *filep,
 754				      unsigned int cmd, unsigned long arg)
 755{
 756	struct vhost_vdpa *v = filep->private_data;
 757	struct vhost_dev *d = &v->vdev;
 758	void __user *argp = (void __user *)arg;
 759	u64 __user *featurep = argp;
 760	u64 features;
 761	long r = 0;
 762
 763	if (cmd == VHOST_SET_BACKEND_FEATURES) {
 764		if (copy_from_user(&features, featurep, sizeof(features)))
 765			return -EFAULT;
 766		if (features & ~(VHOST_VDPA_BACKEND_FEATURES |
 767				 BIT_ULL(VHOST_BACKEND_F_DESC_ASID) |
 768				 BIT_ULL(VHOST_BACKEND_F_IOTLB_PERSIST) |
 769				 BIT_ULL(VHOST_BACKEND_F_SUSPEND) |
 770				 BIT_ULL(VHOST_BACKEND_F_RESUME) |
 771				 BIT_ULL(VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK)))
 772			return -EOPNOTSUPP;
 773		if ((features & BIT_ULL(VHOST_BACKEND_F_SUSPEND)) &&
 774		     !vhost_vdpa_can_suspend(v))
 775			return -EOPNOTSUPP;
 776		if ((features & BIT_ULL(VHOST_BACKEND_F_RESUME)) &&
 777		     !vhost_vdpa_can_resume(v))
 778			return -EOPNOTSUPP;
 779		if ((features & BIT_ULL(VHOST_BACKEND_F_DESC_ASID)) &&
 780		    !(features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID)))
 781			return -EINVAL;
 782		if ((features & BIT_ULL(VHOST_BACKEND_F_DESC_ASID)) &&
 783		     !vhost_vdpa_has_desc_group(v))
 784			return -EOPNOTSUPP;
 785		if ((features & BIT_ULL(VHOST_BACKEND_F_IOTLB_PERSIST)) &&
 786		     !vhost_vdpa_has_persistent_map(v))
 787			return -EOPNOTSUPP;
 788		vhost_set_backend_features(&v->vdev, features);
 789		return 0;
 790	}
 791
 792	mutex_lock(&d->mutex);
 793
 794	switch (cmd) {
 795	case VHOST_VDPA_GET_DEVICE_ID:
 796		r = vhost_vdpa_get_device_id(v, argp);
 797		break;
 798	case VHOST_VDPA_GET_STATUS:
 799		r = vhost_vdpa_get_status(v, argp);
 800		break;
 801	case VHOST_VDPA_SET_STATUS:
 802		r = vhost_vdpa_set_status(v, argp);
 803		break;
 804	case VHOST_VDPA_GET_CONFIG:
 805		r = vhost_vdpa_get_config(v, argp);
 806		break;
 807	case VHOST_VDPA_SET_CONFIG:
 808		r = vhost_vdpa_set_config(v, argp);
 809		break;
 810	case VHOST_GET_FEATURES:
 811		r = vhost_vdpa_get_features(v, argp);
 812		break;
 813	case VHOST_SET_FEATURES:
 814		r = vhost_vdpa_set_features(v, argp);
 815		break;
 816	case VHOST_VDPA_GET_VRING_NUM:
 817		r = vhost_vdpa_get_vring_num(v, argp);
 818		break;
 819	case VHOST_VDPA_GET_GROUP_NUM:
 820		if (copy_to_user(argp, &v->vdpa->ngroups,
 821				 sizeof(v->vdpa->ngroups)))
 822			r = -EFAULT;
 823		break;
 824	case VHOST_VDPA_GET_AS_NUM:
 825		if (copy_to_user(argp, &v->vdpa->nas, sizeof(v->vdpa->nas)))
 826			r = -EFAULT;
 827		break;
 828	case VHOST_SET_LOG_BASE:
 829	case VHOST_SET_LOG_FD:
 830		r = -ENOIOCTLCMD;
 831		break;
 832	case VHOST_VDPA_SET_CONFIG_CALL:
 833		r = vhost_vdpa_set_config_call(v, argp);
 834		break;
 835	case VHOST_GET_BACKEND_FEATURES:
 836		features = VHOST_VDPA_BACKEND_FEATURES;
 837		if (vhost_vdpa_can_suspend(v))
 838			features |= BIT_ULL(VHOST_BACKEND_F_SUSPEND);
 839		if (vhost_vdpa_can_resume(v))
 840			features |= BIT_ULL(VHOST_BACKEND_F_RESUME);
 841		if (vhost_vdpa_has_desc_group(v))
 842			features |= BIT_ULL(VHOST_BACKEND_F_DESC_ASID);
 843		if (vhost_vdpa_has_persistent_map(v))
 844			features |= BIT_ULL(VHOST_BACKEND_F_IOTLB_PERSIST);
 845		features |= vhost_vdpa_get_backend_features(v);
 846		if (copy_to_user(featurep, &features, sizeof(features)))
 847			r = -EFAULT;
 848		break;
 849	case VHOST_VDPA_GET_IOVA_RANGE:
 850		r = vhost_vdpa_get_iova_range(v, argp);
 851		break;
 852	case VHOST_VDPA_GET_CONFIG_SIZE:
 853		r = vhost_vdpa_get_config_size(v, argp);
 854		break;
 855	case VHOST_VDPA_GET_VQS_COUNT:
 856		r = vhost_vdpa_get_vqs_count(v, argp);
 857		break;
 858	case VHOST_VDPA_SUSPEND:
 859		r = vhost_vdpa_suspend(v);
 860		break;
 861	case VHOST_VDPA_RESUME:
 862		r = vhost_vdpa_resume(v);
 863		break;
 864	default:
 865		r = vhost_dev_ioctl(&v->vdev, cmd, argp);
 866		if (r == -ENOIOCTLCMD)
 867			r = vhost_vdpa_vring_ioctl(v, cmd, argp);
 868		break;
 869	}
 870
 871	if (r)
 872		goto out;
 873
 874	switch (cmd) {
 875	case VHOST_SET_OWNER:
 876		r = vhost_vdpa_bind_mm(v);
 877		if (r)
 878			vhost_dev_reset_owner(d, NULL);
 879		break;
 880	}
 881out:
 882	mutex_unlock(&d->mutex);
 883	return r;
 884}
 885static void vhost_vdpa_general_unmap(struct vhost_vdpa *v,
 886				     struct vhost_iotlb_map *map, u32 asid)
 887{
 888	struct vdpa_device *vdpa = v->vdpa;
 889	const struct vdpa_config_ops *ops = vdpa->config;
 890	if (ops->dma_map) {
 891		ops->dma_unmap(vdpa, asid, map->start, map->size);
 892	} else if (ops->set_map == NULL) {
 893		iommu_unmap(v->domain, map->start, map->size);
 894	}
 895}
 896
 897static void vhost_vdpa_pa_unmap(struct vhost_vdpa *v, struct vhost_iotlb *iotlb,
 898				u64 start, u64 last, u32 asid)
 899{
 900	struct vhost_dev *dev = &v->vdev;
 
 901	struct vhost_iotlb_map *map;
 902	struct page *page;
 903	unsigned long pfn, pinned;
 904
 905	while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) {
 906		pinned = PFN_DOWN(map->size);
 907		for (pfn = PFN_DOWN(map->addr);
 908		     pinned > 0; pfn++, pinned--) {
 909			page = pfn_to_page(pfn);
 910			if (map->perm & VHOST_ACCESS_WO)
 911				set_page_dirty_lock(page);
 912			unpin_user_page(page);
 913		}
 914		atomic64_sub(PFN_DOWN(map->size), &dev->mm->pinned_vm);
 915		vhost_vdpa_general_unmap(v, map, asid);
 916		vhost_iotlb_map_free(iotlb, map);
 917	}
 918}
 919
 920static void vhost_vdpa_va_unmap(struct vhost_vdpa *v, struct vhost_iotlb *iotlb,
 921				u64 start, u64 last, u32 asid)
 922{
 923	struct vhost_iotlb_map *map;
 924	struct vdpa_map_file *map_file;
 925
 926	while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) {
 927		map_file = (struct vdpa_map_file *)map->opaque;
 928		fput(map_file->file);
 929		kfree(map_file);
 930		vhost_vdpa_general_unmap(v, map, asid);
 931		vhost_iotlb_map_free(iotlb, map);
 932	}
 933}
 934
 935static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v,
 936				   struct vhost_iotlb *iotlb, u64 start,
 937				   u64 last, u32 asid)
 938{
 939	struct vdpa_device *vdpa = v->vdpa;
 940
 941	if (vdpa->use_va)
 942		return vhost_vdpa_va_unmap(v, iotlb, start, last, asid);
 943
 944	return vhost_vdpa_pa_unmap(v, iotlb, start, last, asid);
 
 
 945}
 946
 947static int perm_to_iommu_flags(u32 perm)
 948{
 949	int flags = 0;
 950
 951	switch (perm) {
 952	case VHOST_ACCESS_WO:
 953		flags |= IOMMU_WRITE;
 954		break;
 955	case VHOST_ACCESS_RO:
 956		flags |= IOMMU_READ;
 957		break;
 958	case VHOST_ACCESS_RW:
 959		flags |= (IOMMU_WRITE | IOMMU_READ);
 960		break;
 961	default:
 962		WARN(1, "invalidate vhost IOTLB permission\n");
 963		break;
 964	}
 965
 966	return flags | IOMMU_CACHE;
 967}
 968
 969static int vhost_vdpa_map(struct vhost_vdpa *v, struct vhost_iotlb *iotlb,
 970			  u64 iova, u64 size, u64 pa, u32 perm, void *opaque)
 971{
 972	struct vhost_dev *dev = &v->vdev;
 973	struct vdpa_device *vdpa = v->vdpa;
 974	const struct vdpa_config_ops *ops = vdpa->config;
 975	u32 asid = iotlb_to_asid(iotlb);
 976	int r = 0;
 977
 978	r = vhost_iotlb_add_range_ctx(iotlb, iova, iova + size - 1,
 979				      pa, perm, opaque);
 980	if (r)
 981		return r;
 982
 983	if (ops->dma_map) {
 984		r = ops->dma_map(vdpa, asid, iova, size, pa, perm, opaque);
 985	} else if (ops->set_map) {
 986		if (!v->in_batch)
 987			r = ops->set_map(vdpa, asid, iotlb);
 988	} else {
 989		r = iommu_map(v->domain, iova, pa, size,
 990			      perm_to_iommu_flags(perm),
 991			      GFP_KERNEL_ACCOUNT);
 992	}
 993	if (r) {
 994		vhost_iotlb_del_range(iotlb, iova, iova + size - 1);
 995		return r;
 996	}
 997
 998	if (!vdpa->use_va)
 999		atomic64_add(PFN_DOWN(size), &dev->mm->pinned_vm);
 
 
1000
1001	return 0;
1002}
1003
1004static void vhost_vdpa_unmap(struct vhost_vdpa *v,
1005			     struct vhost_iotlb *iotlb,
1006			     u64 iova, u64 size)
1007{
 
1008	struct vdpa_device *vdpa = v->vdpa;
1009	const struct vdpa_config_ops *ops = vdpa->config;
1010	u32 asid = iotlb_to_asid(iotlb);
1011
1012	vhost_vdpa_iotlb_unmap(v, iotlb, iova, iova + size - 1, asid);
1013
1014	if (ops->set_map) {
 
 
1015		if (!v->in_batch)
1016			ops->set_map(vdpa, asid, iotlb);
1017	}
1018
1019}
1020
1021static int vhost_vdpa_va_map(struct vhost_vdpa *v,
1022			     struct vhost_iotlb *iotlb,
1023			     u64 iova, u64 size, u64 uaddr, u32 perm)
1024{
1025	struct vhost_dev *dev = &v->vdev;
1026	u64 offset, map_size, map_iova = iova;
1027	struct vdpa_map_file *map_file;
1028	struct vm_area_struct *vma;
1029	int ret = 0;
1030
1031	mmap_read_lock(dev->mm);
1032
1033	while (size) {
1034		vma = find_vma(dev->mm, uaddr);
1035		if (!vma) {
1036			ret = -EINVAL;
1037			break;
1038		}
1039		map_size = min(size, vma->vm_end - uaddr);
1040		if (!(vma->vm_file && (vma->vm_flags & VM_SHARED) &&
1041			!(vma->vm_flags & (VM_IO | VM_PFNMAP))))
1042			goto next;
1043
1044		map_file = kzalloc(sizeof(*map_file), GFP_KERNEL);
1045		if (!map_file) {
1046			ret = -ENOMEM;
1047			break;
1048		}
1049		offset = (vma->vm_pgoff << PAGE_SHIFT) + uaddr - vma->vm_start;
1050		map_file->offset = offset;
1051		map_file->file = get_file(vma->vm_file);
1052		ret = vhost_vdpa_map(v, iotlb, map_iova, map_size, uaddr,
1053				     perm, map_file);
1054		if (ret) {
1055			fput(map_file->file);
1056			kfree(map_file);
1057			break;
1058		}
1059next:
1060		size -= map_size;
1061		uaddr += map_size;
1062		map_iova += map_size;
1063	}
1064	if (ret)
1065		vhost_vdpa_unmap(v, iotlb, iova, map_iova - iova);
1066
1067	mmap_read_unlock(dev->mm);
1068
1069	return ret;
1070}
1071
1072static int vhost_vdpa_pa_map(struct vhost_vdpa *v,
1073			     struct vhost_iotlb *iotlb,
1074			     u64 iova, u64 size, u64 uaddr, u32 perm)
1075{
1076	struct vhost_dev *dev = &v->vdev;
 
1077	struct page **page_list;
1078	unsigned long list_size = PAGE_SIZE / sizeof(struct page *);
1079	unsigned int gup_flags = FOLL_LONGTERM;
1080	unsigned long npages, cur_base, map_pfn, last_pfn = 0;
1081	unsigned long lock_limit, sz2pin, nchunks, i;
1082	u64 start = iova;
1083	long pinned;
1084	int ret = 0;
1085
 
 
 
 
 
 
 
 
 
1086	/* Limit the use of memory for bookkeeping */
1087	page_list = (struct page **) __get_free_page(GFP_KERNEL);
1088	if (!page_list)
1089		return -ENOMEM;
1090
1091	if (perm & VHOST_ACCESS_WO)
1092		gup_flags |= FOLL_WRITE;
1093
1094	npages = PFN_UP(size + (iova & ~PAGE_MASK));
1095	if (!npages) {
1096		ret = -EINVAL;
1097		goto free;
1098	}
1099
1100	mmap_read_lock(dev->mm);
1101
1102	lock_limit = PFN_DOWN(rlimit(RLIMIT_MEMLOCK));
1103	if (npages + atomic64_read(&dev->mm->pinned_vm) > lock_limit) {
1104		ret = -ENOMEM;
1105		goto unlock;
1106	}
1107
1108	cur_base = uaddr & PAGE_MASK;
1109	iova &= PAGE_MASK;
1110	nchunks = 0;
1111
1112	while (npages) {
1113		sz2pin = min_t(unsigned long, npages, list_size);
1114		pinned = pin_user_pages(cur_base, sz2pin,
1115					gup_flags, page_list);
1116		if (sz2pin != pinned) {
1117			if (pinned < 0) {
1118				ret = pinned;
1119			} else {
1120				unpin_user_pages(page_list, pinned);
1121				ret = -ENOMEM;
1122			}
1123			goto out;
1124		}
1125		nchunks++;
1126
1127		if (!last_pfn)
1128			map_pfn = page_to_pfn(page_list[0]);
1129
1130		for (i = 0; i < pinned; i++) {
1131			unsigned long this_pfn = page_to_pfn(page_list[i]);
1132			u64 csize;
1133
1134			if (last_pfn && (this_pfn != last_pfn + 1)) {
1135				/* Pin a contiguous chunk of memory */
1136				csize = PFN_PHYS(last_pfn - map_pfn + 1);
1137				ret = vhost_vdpa_map(v, iotlb, iova, csize,
1138						     PFN_PHYS(map_pfn),
1139						     perm, NULL);
1140				if (ret) {
1141					/*
1142					 * Unpin the pages that are left unmapped
1143					 * from this point on in the current
1144					 * page_list. The remaining outstanding
1145					 * ones which may stride across several
1146					 * chunks will be covered in the common
1147					 * error path subsequently.
1148					 */
1149					unpin_user_pages(&page_list[i],
1150							 pinned - i);
1151					goto out;
1152				}
1153
1154				map_pfn = this_pfn;
1155				iova += csize;
1156				nchunks = 0;
1157			}
1158
1159			last_pfn = this_pfn;
1160		}
1161
1162		cur_base += PFN_PHYS(pinned);
1163		npages -= pinned;
1164	}
1165
1166	/* Pin the rest chunk */
1167	ret = vhost_vdpa_map(v, iotlb, iova, PFN_PHYS(last_pfn - map_pfn + 1),
1168			     PFN_PHYS(map_pfn), perm, NULL);
1169out:
1170	if (ret) {
1171		if (nchunks) {
1172			unsigned long pfn;
1173
1174			/*
1175			 * Unpin the outstanding pages which are yet to be
1176			 * mapped but haven't due to vdpa_map() or
1177			 * pin_user_pages() failure.
1178			 *
1179			 * Mapped pages are accounted in vdpa_map(), hence
1180			 * the corresponding unpinning will be handled by
1181			 * vdpa_unmap().
1182			 */
1183			WARN_ON(!last_pfn);
1184			for (pfn = map_pfn; pfn <= last_pfn; pfn++)
1185				unpin_user_page(pfn_to_page(pfn));
1186		}
1187		vhost_vdpa_unmap(v, iotlb, start, size);
1188	}
1189unlock:
1190	mmap_read_unlock(dev->mm);
1191free:
1192	free_page((unsigned long)page_list);
1193	return ret;
1194
1195}
1196
1197static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
1198					   struct vhost_iotlb *iotlb,
1199					   struct vhost_iotlb_msg *msg)
1200{
1201	struct vdpa_device *vdpa = v->vdpa;
1202
1203	if (msg->iova < v->range.first || !msg->size ||
1204	    msg->iova > U64_MAX - msg->size + 1 ||
1205	    msg->iova + msg->size - 1 > v->range.last)
1206		return -EINVAL;
1207
1208	if (vhost_iotlb_itree_first(iotlb, msg->iova,
1209				    msg->iova + msg->size - 1))
1210		return -EEXIST;
1211
1212	if (vdpa->use_va)
1213		return vhost_vdpa_va_map(v, iotlb, msg->iova, msg->size,
1214					 msg->uaddr, msg->perm);
1215
1216	return vhost_vdpa_pa_map(v, iotlb, msg->iova, msg->size, msg->uaddr,
1217				 msg->perm);
1218}
1219
1220static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev, u32 asid,
1221					struct vhost_iotlb_msg *msg)
1222{
1223	struct vhost_vdpa *v = container_of(dev, struct vhost_vdpa, vdev);
1224	struct vdpa_device *vdpa = v->vdpa;
1225	const struct vdpa_config_ops *ops = vdpa->config;
1226	struct vhost_iotlb *iotlb = NULL;
1227	struct vhost_vdpa_as *as = NULL;
1228	int r = 0;
1229
1230	mutex_lock(&dev->mutex);
1231
1232	r = vhost_dev_check_owner(dev);
1233	if (r)
1234		goto unlock;
1235
1236	if (msg->type == VHOST_IOTLB_UPDATE ||
1237	    msg->type == VHOST_IOTLB_BATCH_BEGIN) {
1238		as = vhost_vdpa_find_alloc_as(v, asid);
1239		if (!as) {
1240			dev_err(&v->dev, "can't find and alloc asid %d\n",
1241				asid);
1242			r = -EINVAL;
1243			goto unlock;
1244		}
1245		iotlb = &as->iotlb;
1246	} else
1247		iotlb = asid_to_iotlb(v, asid);
1248
1249	if ((v->in_batch && v->batch_asid != asid) || !iotlb) {
1250		if (v->in_batch && v->batch_asid != asid) {
1251			dev_info(&v->dev, "batch id %d asid %d\n",
1252				 v->batch_asid, asid);
1253		}
1254		if (!iotlb)
1255			dev_err(&v->dev, "no iotlb for asid %d\n", asid);
1256		r = -EINVAL;
1257		goto unlock;
1258	}
1259
1260	switch (msg->type) {
1261	case VHOST_IOTLB_UPDATE:
1262		r = vhost_vdpa_process_iotlb_update(v, iotlb, msg);
1263		break;
1264	case VHOST_IOTLB_INVALIDATE:
1265		vhost_vdpa_unmap(v, iotlb, msg->iova, msg->size);
1266		break;
1267	case VHOST_IOTLB_BATCH_BEGIN:
1268		v->batch_asid = asid;
1269		v->in_batch = true;
1270		break;
1271	case VHOST_IOTLB_BATCH_END:
1272		if (v->in_batch && ops->set_map)
1273			ops->set_map(vdpa, asid, iotlb);
1274		v->in_batch = false;
1275		break;
1276	default:
1277		r = -EINVAL;
1278		break;
1279	}
1280unlock:
1281	mutex_unlock(&dev->mutex);
1282
1283	return r;
1284}
1285
1286static ssize_t vhost_vdpa_chr_write_iter(struct kiocb *iocb,
1287					 struct iov_iter *from)
1288{
1289	struct file *file = iocb->ki_filp;
1290	struct vhost_vdpa *v = file->private_data;
1291	struct vhost_dev *dev = &v->vdev;
1292
1293	return vhost_chr_write_iter(dev, from);
1294}
1295
1296static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v)
1297{
1298	struct vdpa_device *vdpa = v->vdpa;
1299	const struct vdpa_config_ops *ops = vdpa->config;
1300	struct device *dma_dev = vdpa_get_dma_dev(vdpa);
1301	const struct bus_type *bus;
1302	int ret;
1303
1304	/* Device want to do DMA by itself */
1305	if (ops->set_map || ops->dma_map)
1306		return 0;
1307
1308	bus = dma_dev->bus;
1309	if (!bus)
1310		return -EFAULT;
1311
1312	if (!device_iommu_capable(dma_dev, IOMMU_CAP_CACHE_COHERENCY)) {
1313		dev_warn_once(&v->dev,
1314			      "Failed to allocate domain, device is not IOMMU cache coherent capable\n");
1315		return -ENOTSUPP;
1316	}
1317
1318	v->domain = iommu_domain_alloc(bus);
1319	if (!v->domain)
1320		return -EIO;
1321
1322	ret = iommu_attach_device(v->domain, dma_dev);
1323	if (ret)
1324		goto err_attach;
1325
1326	return 0;
1327
1328err_attach:
1329	iommu_domain_free(v->domain);
1330	v->domain = NULL;
1331	return ret;
1332}
1333
1334static void vhost_vdpa_free_domain(struct vhost_vdpa *v)
1335{
1336	struct vdpa_device *vdpa = v->vdpa;
1337	struct device *dma_dev = vdpa_get_dma_dev(vdpa);
1338
1339	if (v->domain) {
1340		iommu_detach_device(v->domain, dma_dev);
1341		iommu_domain_free(v->domain);
1342	}
1343
1344	v->domain = NULL;
1345}
1346
1347static void vhost_vdpa_set_iova_range(struct vhost_vdpa *v)
1348{
1349	struct vdpa_iova_range *range = &v->range;
1350	struct vdpa_device *vdpa = v->vdpa;
1351	const struct vdpa_config_ops *ops = vdpa->config;
1352
1353	if (ops->get_iova_range) {
1354		*range = ops->get_iova_range(vdpa);
1355	} else if (v->domain && v->domain->geometry.force_aperture) {
1356		range->first = v->domain->geometry.aperture_start;
1357		range->last = v->domain->geometry.aperture_end;
1358	} else {
1359		range->first = 0;
1360		range->last = ULLONG_MAX;
1361	}
1362}
1363
1364static void vhost_vdpa_cleanup(struct vhost_vdpa *v)
1365{
1366	struct vhost_vdpa_as *as;
1367	u32 asid;
1368
1369	for (asid = 0; asid < v->vdpa->nas; asid++) {
1370		as = asid_to_as(v, asid);
1371		if (as)
1372			vhost_vdpa_remove_as(v, asid);
1373	}
1374
1375	vhost_vdpa_free_domain(v);
1376	vhost_dev_cleanup(&v->vdev);
1377	kfree(v->vdev.vqs);
1378	v->vdev.vqs = NULL;
1379}
1380
1381static int vhost_vdpa_open(struct inode *inode, struct file *filep)
1382{
1383	struct vhost_vdpa *v;
1384	struct vhost_dev *dev;
1385	struct vhost_virtqueue **vqs;
1386	int r, opened;
1387	u32 i, nvqs;
1388
1389	v = container_of(inode->i_cdev, struct vhost_vdpa, cdev);
1390
1391	opened = atomic_cmpxchg(&v->opened, 0, 1);
1392	if (opened)
1393		return -EBUSY;
1394
1395	nvqs = v->nvqs;
1396	r = vhost_vdpa_reset(v);
1397	if (r)
1398		goto err;
1399
1400	vqs = kmalloc_array(nvqs, sizeof(*vqs), GFP_KERNEL);
1401	if (!vqs) {
1402		r = -ENOMEM;
1403		goto err;
1404	}
1405
1406	dev = &v->vdev;
1407	for (i = 0; i < nvqs; i++) {
1408		vqs[i] = &v->vqs[i];
1409		vqs[i]->handle_kick = handle_vq_kick;
1410	}
1411	vhost_dev_init(dev, vqs, nvqs, 0, 0, 0, false,
1412		       vhost_vdpa_process_iotlb_msg);
1413
 
 
 
 
 
 
1414	r = vhost_vdpa_alloc_domain(v);
1415	if (r)
1416		goto err_alloc_domain;
1417
1418	vhost_vdpa_set_iova_range(v);
1419
1420	filep->private_data = v;
1421
1422	return 0;
1423
1424err_alloc_domain:
1425	vhost_vdpa_cleanup(v);
 
1426err:
1427	atomic_dec(&v->opened);
1428	return r;
1429}
1430
1431static void vhost_vdpa_clean_irq(struct vhost_vdpa *v)
1432{
1433	u32 i;
1434
1435	for (i = 0; i < v->nvqs; i++)
1436		vhost_vdpa_unsetup_vq_irq(v, i);
1437}
1438
1439static int vhost_vdpa_release(struct inode *inode, struct file *filep)
1440{
1441	struct vhost_vdpa *v = filep->private_data;
1442	struct vhost_dev *d = &v->vdev;
1443
1444	mutex_lock(&d->mutex);
1445	filep->private_data = NULL;
1446	vhost_vdpa_clean_irq(v);
1447	vhost_vdpa_reset(v);
1448	vhost_dev_stop(&v->vdev);
1449	vhost_vdpa_unbind_mm(v);
 
1450	vhost_vdpa_config_put(v);
1451	vhost_vdpa_cleanup(v);
 
 
1452	mutex_unlock(&d->mutex);
1453
1454	atomic_dec(&v->opened);
1455	complete(&v->completion);
1456
1457	return 0;
1458}
1459
1460#ifdef CONFIG_MMU
1461static vm_fault_t vhost_vdpa_fault(struct vm_fault *vmf)
1462{
1463	struct vhost_vdpa *v = vmf->vma->vm_file->private_data;
1464	struct vdpa_device *vdpa = v->vdpa;
1465	const struct vdpa_config_ops *ops = vdpa->config;
1466	struct vdpa_notification_area notify;
1467	struct vm_area_struct *vma = vmf->vma;
1468	u16 index = vma->vm_pgoff;
1469
1470	notify = ops->get_vq_notification(vdpa, index);
1471
1472	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
1473	if (remap_pfn_range(vma, vmf->address & PAGE_MASK,
1474			    PFN_DOWN(notify.addr), PAGE_SIZE,
1475			    vma->vm_page_prot))
1476		return VM_FAULT_SIGBUS;
1477
1478	return VM_FAULT_NOPAGE;
1479}
1480
1481static const struct vm_operations_struct vhost_vdpa_vm_ops = {
1482	.fault = vhost_vdpa_fault,
1483};
1484
1485static int vhost_vdpa_mmap(struct file *file, struct vm_area_struct *vma)
1486{
1487	struct vhost_vdpa *v = vma->vm_file->private_data;
1488	struct vdpa_device *vdpa = v->vdpa;
1489	const struct vdpa_config_ops *ops = vdpa->config;
1490	struct vdpa_notification_area notify;
1491	unsigned long index = vma->vm_pgoff;
1492
1493	if (vma->vm_end - vma->vm_start != PAGE_SIZE)
1494		return -EINVAL;
1495	if ((vma->vm_flags & VM_SHARED) == 0)
1496		return -EINVAL;
1497	if (vma->vm_flags & VM_READ)
1498		return -EINVAL;
1499	if (index > 65535)
1500		return -EINVAL;
1501	if (!ops->get_vq_notification)
1502		return -ENOTSUPP;
1503
1504	/* To be safe and easily modelled by userspace, We only
1505	 * support the doorbell which sits on the page boundary and
1506	 * does not share the page with other registers.
1507	 */
1508	notify = ops->get_vq_notification(vdpa, index);
1509	if (notify.addr & (PAGE_SIZE - 1))
1510		return -EINVAL;
1511	if (vma->vm_end - vma->vm_start != notify.size)
1512		return -ENOTSUPP;
1513
1514	vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP);
1515	vma->vm_ops = &vhost_vdpa_vm_ops;
1516	return 0;
1517}
1518#endif /* CONFIG_MMU */
1519
1520static const struct file_operations vhost_vdpa_fops = {
1521	.owner		= THIS_MODULE,
1522	.open		= vhost_vdpa_open,
1523	.release	= vhost_vdpa_release,
1524	.write_iter	= vhost_vdpa_chr_write_iter,
1525	.unlocked_ioctl	= vhost_vdpa_unlocked_ioctl,
1526#ifdef CONFIG_MMU
1527	.mmap		= vhost_vdpa_mmap,
1528#endif /* CONFIG_MMU */
1529	.compat_ioctl	= compat_ptr_ioctl,
1530};
1531
1532static void vhost_vdpa_release_dev(struct device *device)
1533{
1534	struct vhost_vdpa *v =
1535	       container_of(device, struct vhost_vdpa, dev);
1536
1537	ida_simple_remove(&vhost_vdpa_ida, v->minor);
1538	kfree(v->vqs);
1539	kfree(v);
1540}
1541
1542static int vhost_vdpa_probe(struct vdpa_device *vdpa)
1543{
1544	const struct vdpa_config_ops *ops = vdpa->config;
1545	struct vhost_vdpa *v;
1546	int minor;
1547	int i, r;
1548
1549	/* We can't support platform IOMMU device with more than 1
1550	 * group or as
1551	 */
1552	if (!ops->set_map && !ops->dma_map &&
1553	    (vdpa->ngroups > 1 || vdpa->nas > 1))
1554		return -EOPNOTSUPP;
1555
1556	v = kzalloc(sizeof(*v), GFP_KERNEL | __GFP_RETRY_MAYFAIL);
1557	if (!v)
1558		return -ENOMEM;
1559
1560	minor = ida_simple_get(&vhost_vdpa_ida, 0,
1561			       VHOST_VDPA_DEV_MAX, GFP_KERNEL);
1562	if (minor < 0) {
1563		kfree(v);
1564		return minor;
1565	}
1566
1567	atomic_set(&v->opened, 0);
1568	v->minor = minor;
1569	v->vdpa = vdpa;
1570	v->nvqs = vdpa->nvqs;
1571	v->virtio_id = ops->get_device_id(vdpa);
1572
1573	device_initialize(&v->dev);
1574	v->dev.release = vhost_vdpa_release_dev;
1575	v->dev.parent = &vdpa->dev;
1576	v->dev.devt = MKDEV(MAJOR(vhost_vdpa_major), minor);
1577	v->vqs = kmalloc_array(v->nvqs, sizeof(struct vhost_virtqueue),
1578			       GFP_KERNEL);
1579	if (!v->vqs) {
1580		r = -ENOMEM;
1581		goto err;
1582	}
1583
1584	r = dev_set_name(&v->dev, "vhost-vdpa-%u", minor);
1585	if (r)
1586		goto err;
1587
1588	cdev_init(&v->cdev, &vhost_vdpa_fops);
1589	v->cdev.owner = THIS_MODULE;
1590
1591	r = cdev_device_add(&v->cdev, &v->dev);
1592	if (r)
1593		goto err;
1594
1595	init_completion(&v->completion);
1596	vdpa_set_drvdata(vdpa, v);
1597
1598	for (i = 0; i < VHOST_VDPA_IOTLB_BUCKETS; i++)
1599		INIT_HLIST_HEAD(&v->as[i]);
1600
1601	return 0;
1602
1603err:
1604	put_device(&v->dev);
1605	return r;
1606}
1607
1608static void vhost_vdpa_remove(struct vdpa_device *vdpa)
1609{
1610	struct vhost_vdpa *v = vdpa_get_drvdata(vdpa);
1611	int opened;
1612
1613	cdev_device_del(&v->cdev, &v->dev);
1614
1615	do {
1616		opened = atomic_cmpxchg(&v->opened, 0, 1);
1617		if (!opened)
1618			break;
1619		wait_for_completion(&v->completion);
1620	} while (1);
1621
1622	put_device(&v->dev);
1623}
1624
1625static struct vdpa_driver vhost_vdpa_driver = {
1626	.driver = {
1627		.name	= "vhost_vdpa",
1628	},
1629	.probe	= vhost_vdpa_probe,
1630	.remove	= vhost_vdpa_remove,
1631};
1632
1633static int __init vhost_vdpa_init(void)
1634{
1635	int r;
1636
1637	r = alloc_chrdev_region(&vhost_vdpa_major, 0, VHOST_VDPA_DEV_MAX,
1638				"vhost-vdpa");
1639	if (r)
1640		goto err_alloc_chrdev;
1641
1642	r = vdpa_register_driver(&vhost_vdpa_driver);
1643	if (r)
1644		goto err_vdpa_register_driver;
1645
1646	return 0;
1647
1648err_vdpa_register_driver:
1649	unregister_chrdev_region(vhost_vdpa_major, VHOST_VDPA_DEV_MAX);
1650err_alloc_chrdev:
1651	return r;
1652}
1653module_init(vhost_vdpa_init);
1654
1655static void __exit vhost_vdpa_exit(void)
1656{
1657	vdpa_unregister_driver(&vhost_vdpa_driver);
1658	unregister_chrdev_region(vhost_vdpa_major, VHOST_VDPA_DEV_MAX);
1659}
1660module_exit(vhost_vdpa_exit);
1661
1662MODULE_VERSION("0.0.1");
1663MODULE_LICENSE("GPL v2");
1664MODULE_AUTHOR("Intel Corporation");
1665MODULE_DESCRIPTION("vDPA-based vhost backend for virtio");
v5.14.15
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Copyright (C) 2018-2020 Intel Corporation.
   4 * Copyright (C) 2020 Red Hat, Inc.
   5 *
   6 * Author: Tiwei Bie <tiwei.bie@intel.com>
   7 *         Jason Wang <jasowang@redhat.com>
   8 *
   9 * Thanks Michael S. Tsirkin for the valuable comments and
  10 * suggestions.  And thanks to Cunming Liang and Zhihong Wang for all
  11 * their supports.
  12 */
  13
  14#include <linux/kernel.h>
  15#include <linux/module.h>
  16#include <linux/cdev.h>
  17#include <linux/device.h>
  18#include <linux/mm.h>
  19#include <linux/slab.h>
  20#include <linux/iommu.h>
  21#include <linux/uuid.h>
  22#include <linux/vdpa.h>
  23#include <linux/nospec.h>
  24#include <linux/vhost.h>
  25
  26#include "vhost.h"
  27
  28enum {
  29	VHOST_VDPA_BACKEND_FEATURES =
  30	(1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2) |
  31	(1ULL << VHOST_BACKEND_F_IOTLB_BATCH),
 
  32};
  33
  34#define VHOST_VDPA_DEV_MAX (1U << MINORBITS)
  35
 
 
 
 
 
 
 
 
  36struct vhost_vdpa {
  37	struct vhost_dev vdev;
  38	struct iommu_domain *domain;
  39	struct vhost_virtqueue *vqs;
  40	struct completion completion;
  41	struct vdpa_device *vdpa;
 
  42	struct device dev;
  43	struct cdev cdev;
  44	atomic_t opened;
  45	int nvqs;
  46	int virtio_id;
  47	int minor;
  48	struct eventfd_ctx *config_ctx;
  49	int in_batch;
  50	struct vdpa_iova_range range;
 
 
  51};
  52
  53static DEFINE_IDA(vhost_vdpa_ida);
  54
  55static dev_t vhost_vdpa_major;
  56
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
  57static void handle_vq_kick(struct vhost_work *work)
  58{
  59	struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
  60						  poll.work);
  61	struct vhost_vdpa *v = container_of(vq->dev, struct vhost_vdpa, vdev);
  62	const struct vdpa_config_ops *ops = v->vdpa->config;
  63
  64	ops->kick_vq(v->vdpa, vq - v->vqs);
  65}
  66
  67static irqreturn_t vhost_vdpa_virtqueue_cb(void *private)
  68{
  69	struct vhost_virtqueue *vq = private;
  70	struct eventfd_ctx *call_ctx = vq->call_ctx.ctx;
  71
  72	if (call_ctx)
  73		eventfd_signal(call_ctx, 1);
  74
  75	return IRQ_HANDLED;
  76}
  77
  78static irqreturn_t vhost_vdpa_config_cb(void *private)
  79{
  80	struct vhost_vdpa *v = private;
  81	struct eventfd_ctx *config_ctx = v->config_ctx;
  82
  83	if (config_ctx)
  84		eventfd_signal(config_ctx, 1);
  85
  86	return IRQ_HANDLED;
  87}
  88
  89static void vhost_vdpa_setup_vq_irq(struct vhost_vdpa *v, u16 qid)
  90{
  91	struct vhost_virtqueue *vq = &v->vqs[qid];
  92	const struct vdpa_config_ops *ops = v->vdpa->config;
  93	struct vdpa_device *vdpa = v->vdpa;
  94	int ret, irq;
  95
  96	if (!ops->get_vq_irq)
  97		return;
  98
  99	irq = ops->get_vq_irq(vdpa, qid);
 
 
 
 100	irq_bypass_unregister_producer(&vq->call_ctx.producer);
 101	if (!vq->call_ctx.ctx || irq < 0)
 102		return;
 103
 104	vq->call_ctx.producer.token = vq->call_ctx.ctx;
 105	vq->call_ctx.producer.irq = irq;
 106	ret = irq_bypass_register_producer(&vq->call_ctx.producer);
 107	if (unlikely(ret))
 108		dev_info(&v->dev, "vq %u, irq bypass producer (token %p) registration fails, ret =  %d\n",
 109			 qid, vq->call_ctx.producer.token, ret);
 110}
 111
 112static void vhost_vdpa_unsetup_vq_irq(struct vhost_vdpa *v, u16 qid)
 113{
 114	struct vhost_virtqueue *vq = &v->vqs[qid];
 115
 116	irq_bypass_unregister_producer(&vq->call_ctx.producer);
 117}
 118
 119static void vhost_vdpa_reset(struct vhost_vdpa *v)
 120{
 121	struct vdpa_device *vdpa = v->vdpa;
 
 122
 123	vdpa_reset(vdpa);
 
 
 
 
 
 
 
 
 
 
 
 
 124	v->in_batch = 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 125}
 126
 127static long vhost_vdpa_get_device_id(struct vhost_vdpa *v, u8 __user *argp)
 128{
 129	struct vdpa_device *vdpa = v->vdpa;
 130	const struct vdpa_config_ops *ops = vdpa->config;
 131	u32 device_id;
 132
 133	device_id = ops->get_device_id(vdpa);
 134
 135	if (copy_to_user(argp, &device_id, sizeof(device_id)))
 136		return -EFAULT;
 137
 138	return 0;
 139}
 140
 141static long vhost_vdpa_get_status(struct vhost_vdpa *v, u8 __user *statusp)
 142{
 143	struct vdpa_device *vdpa = v->vdpa;
 144	const struct vdpa_config_ops *ops = vdpa->config;
 145	u8 status;
 146
 147	status = ops->get_status(vdpa);
 148
 149	if (copy_to_user(statusp, &status, sizeof(status)))
 150		return -EFAULT;
 151
 152	return 0;
 153}
 154
 155static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp)
 156{
 157	struct vdpa_device *vdpa = v->vdpa;
 158	const struct vdpa_config_ops *ops = vdpa->config;
 159	u8 status, status_old;
 160	int nvqs = v->nvqs;
 
 161	u16 i;
 162
 163	if (copy_from_user(&status, statusp, sizeof(status)))
 164		return -EFAULT;
 165
 166	status_old = ops->get_status(vdpa);
 167
 168	/*
 169	 * Userspace shouldn't remove status bits unless reset the
 170	 * status to 0.
 171	 */
 172	if (status != 0 && (ops->get_status(vdpa) & ~status) != 0)
 173		return -EINVAL;
 174
 175	ops->set_status(vdpa, status);
 
 
 
 
 
 
 
 
 
 176
 177	if ((status & VIRTIO_CONFIG_S_DRIVER_OK) && !(status_old & VIRTIO_CONFIG_S_DRIVER_OK))
 178		for (i = 0; i < nvqs; i++)
 179			vhost_vdpa_setup_vq_irq(v, i);
 180
 181	if ((status_old & VIRTIO_CONFIG_S_DRIVER_OK) && !(status & VIRTIO_CONFIG_S_DRIVER_OK))
 182		for (i = 0; i < nvqs; i++)
 183			vhost_vdpa_unsetup_vq_irq(v, i);
 184
 185	return 0;
 186}
 187
 188static int vhost_vdpa_config_validate(struct vhost_vdpa *v,
 189				      struct vhost_vdpa_config *c)
 190{
 191	struct vdpa_device *vdpa = v->vdpa;
 192	long size = vdpa->config->get_config_size(vdpa);
 193
 194	if (c->len == 0)
 195		return -EINVAL;
 196
 197	if (c->len > size - c->off)
 198		return -E2BIG;
 199
 200	return 0;
 201}
 202
 203static long vhost_vdpa_get_config(struct vhost_vdpa *v,
 204				  struct vhost_vdpa_config __user *c)
 205{
 206	struct vdpa_device *vdpa = v->vdpa;
 207	struct vhost_vdpa_config config;
 208	unsigned long size = offsetof(struct vhost_vdpa_config, buf);
 209	u8 *buf;
 210
 211	if (copy_from_user(&config, c, size))
 212		return -EFAULT;
 213	if (vhost_vdpa_config_validate(v, &config))
 214		return -EINVAL;
 215	buf = kvzalloc(config.len, GFP_KERNEL);
 216	if (!buf)
 217		return -ENOMEM;
 218
 219	vdpa_get_config(vdpa, config.off, buf, config.len);
 220
 221	if (copy_to_user(c->buf, buf, config.len)) {
 222		kvfree(buf);
 223		return -EFAULT;
 224	}
 225
 226	kvfree(buf);
 227	return 0;
 228}
 229
 230static long vhost_vdpa_set_config(struct vhost_vdpa *v,
 231				  struct vhost_vdpa_config __user *c)
 232{
 233	struct vdpa_device *vdpa = v->vdpa;
 234	const struct vdpa_config_ops *ops = vdpa->config;
 235	struct vhost_vdpa_config config;
 236	unsigned long size = offsetof(struct vhost_vdpa_config, buf);
 237	u8 *buf;
 238
 239	if (copy_from_user(&config, c, size))
 240		return -EFAULT;
 241	if (vhost_vdpa_config_validate(v, &config))
 242		return -EINVAL;
 243
 244	buf = vmemdup_user(c->buf, config.len);
 245	if (IS_ERR(buf))
 246		return PTR_ERR(buf);
 247
 248	ops->set_config(vdpa, config.off, buf, config.len);
 249
 250	kvfree(buf);
 251	return 0;
 252}
 253
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 254static long vhost_vdpa_get_features(struct vhost_vdpa *v, u64 __user *featurep)
 255{
 256	struct vdpa_device *vdpa = v->vdpa;
 257	const struct vdpa_config_ops *ops = vdpa->config;
 258	u64 features;
 259
 260	features = ops->get_features(vdpa);
 261
 262	if (copy_to_user(featurep, &features, sizeof(features)))
 263		return -EFAULT;
 264
 265	return 0;
 266}
 267
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 268static long vhost_vdpa_set_features(struct vhost_vdpa *v, u64 __user *featurep)
 269{
 270	struct vdpa_device *vdpa = v->vdpa;
 271	const struct vdpa_config_ops *ops = vdpa->config;
 
 
 272	u64 features;
 
 273
 274	/*
 275	 * It's not allowed to change the features after they have
 276	 * been negotiated.
 277	 */
 278	if (ops->get_status(vdpa) & VIRTIO_CONFIG_S_FEATURES_OK)
 279		return -EBUSY;
 280
 281	if (copy_from_user(&features, featurep, sizeof(features)))
 282		return -EFAULT;
 283
 284	if (vdpa_set_features(vdpa, features))
 285		return -EINVAL;
 286
 
 
 
 
 
 
 
 
 
 
 287	return 0;
 288}
 289
 290static long vhost_vdpa_get_vring_num(struct vhost_vdpa *v, u16 __user *argp)
 291{
 292	struct vdpa_device *vdpa = v->vdpa;
 293	const struct vdpa_config_ops *ops = vdpa->config;
 294	u16 num;
 295
 296	num = ops->get_vq_num_max(vdpa);
 297
 298	if (copy_to_user(argp, &num, sizeof(num)))
 299		return -EFAULT;
 300
 301	return 0;
 302}
 303
 304static void vhost_vdpa_config_put(struct vhost_vdpa *v)
 305{
 306	if (v->config_ctx) {
 307		eventfd_ctx_put(v->config_ctx);
 308		v->config_ctx = NULL;
 309	}
 310}
 311
 312static long vhost_vdpa_set_config_call(struct vhost_vdpa *v, u32 __user *argp)
 313{
 314	struct vdpa_callback cb;
 315	int fd;
 316	struct eventfd_ctx *ctx;
 317
 318	cb.callback = vhost_vdpa_config_cb;
 319	cb.private = v;
 320	if (copy_from_user(&fd, argp, sizeof(fd)))
 321		return  -EFAULT;
 322
 323	ctx = fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(fd);
 324	swap(ctx, v->config_ctx);
 325
 326	if (!IS_ERR_OR_NULL(ctx))
 327		eventfd_ctx_put(ctx);
 328
 329	if (IS_ERR(v->config_ctx)) {
 330		long ret = PTR_ERR(v->config_ctx);
 331
 332		v->config_ctx = NULL;
 333		return ret;
 334	}
 335
 336	v->vdpa->config->set_config_cb(v->vdpa, &cb);
 337
 338	return 0;
 339}
 340
 341static long vhost_vdpa_get_iova_range(struct vhost_vdpa *v, u32 __user *argp)
 342{
 343	struct vhost_vdpa_iova_range range = {
 344		.first = v->range.first,
 345		.last = v->range.last,
 346	};
 347
 348	if (copy_to_user(argp, &range, sizeof(range)))
 349		return -EFAULT;
 350	return 0;
 351}
 352
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 353static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
 354				   void __user *argp)
 355{
 356	struct vdpa_device *vdpa = v->vdpa;
 357	const struct vdpa_config_ops *ops = vdpa->config;
 358	struct vdpa_vq_state vq_state;
 359	struct vdpa_callback cb;
 360	struct vhost_virtqueue *vq;
 361	struct vhost_vring_state s;
 362	u32 idx;
 363	long r;
 364
 365	r = get_user(idx, (u32 __user *)argp);
 366	if (r < 0)
 367		return r;
 368
 369	if (idx >= v->nvqs)
 370		return -ENOBUFS;
 371
 372	idx = array_index_nospec(idx, v->nvqs);
 373	vq = &v->vqs[idx];
 374
 375	switch (cmd) {
 376	case VHOST_VDPA_SET_VRING_ENABLE:
 377		if (copy_from_user(&s, argp, sizeof(s)))
 378			return -EFAULT;
 379		ops->set_vq_ready(vdpa, idx, s.num);
 380		return 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 381	case VHOST_GET_VRING_BASE:
 382		r = ops->get_vq_state(v->vdpa, idx, &vq_state);
 383		if (r)
 384			return r;
 385
 386		vq->last_avail_idx = vq_state.split.avail_index;
 
 
 
 
 
 
 
 387		break;
 388	}
 389
 390	r = vhost_vring_ioctl(&v->vdev, cmd, argp);
 391	if (r)
 392		return r;
 393
 394	switch (cmd) {
 395	case VHOST_SET_VRING_ADDR:
 
 
 
 396		if (ops->set_vq_address(vdpa, idx,
 397					(u64)(uintptr_t)vq->desc,
 398					(u64)(uintptr_t)vq->avail,
 399					(u64)(uintptr_t)vq->used))
 400			r = -EINVAL;
 401		break;
 402
 403	case VHOST_SET_VRING_BASE:
 404		vq_state.split.avail_index = vq->last_avail_idx;
 405		if (ops->set_vq_state(vdpa, idx, &vq_state))
 406			r = -EINVAL;
 
 
 
 
 
 
 
 
 
 407		break;
 408
 409	case VHOST_SET_VRING_CALL:
 410		if (vq->call_ctx.ctx) {
 411			cb.callback = vhost_vdpa_virtqueue_cb;
 412			cb.private = vq;
 
 413		} else {
 414			cb.callback = NULL;
 415			cb.private = NULL;
 
 416		}
 417		ops->set_vq_cb(vdpa, idx, &cb);
 418		vhost_vdpa_setup_vq_irq(v, idx);
 419		break;
 420
 421	case VHOST_SET_VRING_NUM:
 422		ops->set_vq_num(vdpa, idx, vq->num);
 423		break;
 424	}
 425
 426	return r;
 427}
 428
 429static long vhost_vdpa_unlocked_ioctl(struct file *filep,
 430				      unsigned int cmd, unsigned long arg)
 431{
 432	struct vhost_vdpa *v = filep->private_data;
 433	struct vhost_dev *d = &v->vdev;
 434	void __user *argp = (void __user *)arg;
 435	u64 __user *featurep = argp;
 436	u64 features;
 437	long r = 0;
 438
 439	if (cmd == VHOST_SET_BACKEND_FEATURES) {
 440		if (copy_from_user(&features, featurep, sizeof(features)))
 441			return -EFAULT;
 442		if (features & ~VHOST_VDPA_BACKEND_FEATURES)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 443			return -EOPNOTSUPP;
 444		vhost_set_backend_features(&v->vdev, features);
 445		return 0;
 446	}
 447
 448	mutex_lock(&d->mutex);
 449
 450	switch (cmd) {
 451	case VHOST_VDPA_GET_DEVICE_ID:
 452		r = vhost_vdpa_get_device_id(v, argp);
 453		break;
 454	case VHOST_VDPA_GET_STATUS:
 455		r = vhost_vdpa_get_status(v, argp);
 456		break;
 457	case VHOST_VDPA_SET_STATUS:
 458		r = vhost_vdpa_set_status(v, argp);
 459		break;
 460	case VHOST_VDPA_GET_CONFIG:
 461		r = vhost_vdpa_get_config(v, argp);
 462		break;
 463	case VHOST_VDPA_SET_CONFIG:
 464		r = vhost_vdpa_set_config(v, argp);
 465		break;
 466	case VHOST_GET_FEATURES:
 467		r = vhost_vdpa_get_features(v, argp);
 468		break;
 469	case VHOST_SET_FEATURES:
 470		r = vhost_vdpa_set_features(v, argp);
 471		break;
 472	case VHOST_VDPA_GET_VRING_NUM:
 473		r = vhost_vdpa_get_vring_num(v, argp);
 474		break;
 
 
 
 
 
 
 
 
 
 475	case VHOST_SET_LOG_BASE:
 476	case VHOST_SET_LOG_FD:
 477		r = -ENOIOCTLCMD;
 478		break;
 479	case VHOST_VDPA_SET_CONFIG_CALL:
 480		r = vhost_vdpa_set_config_call(v, argp);
 481		break;
 482	case VHOST_GET_BACKEND_FEATURES:
 483		features = VHOST_VDPA_BACKEND_FEATURES;
 
 
 
 
 
 
 
 
 
 484		if (copy_to_user(featurep, &features, sizeof(features)))
 485			r = -EFAULT;
 486		break;
 487	case VHOST_VDPA_GET_IOVA_RANGE:
 488		r = vhost_vdpa_get_iova_range(v, argp);
 489		break;
 
 
 
 
 
 
 
 
 
 
 
 
 490	default:
 491		r = vhost_dev_ioctl(&v->vdev, cmd, argp);
 492		if (r == -ENOIOCTLCMD)
 493			r = vhost_vdpa_vring_ioctl(v, cmd, argp);
 494		break;
 495	}
 496
 
 
 
 
 
 
 
 
 
 
 
 497	mutex_unlock(&d->mutex);
 498	return r;
 499}
 
 
 
 
 
 
 
 
 
 
 
 500
 501static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, u64 start, u64 last)
 
 502{
 503	struct vhost_dev *dev = &v->vdev;
 504	struct vhost_iotlb *iotlb = dev->iotlb;
 505	struct vhost_iotlb_map *map;
 506	struct page *page;
 507	unsigned long pfn, pinned;
 508
 509	while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) {
 510		pinned = map->size >> PAGE_SHIFT;
 511		for (pfn = map->addr >> PAGE_SHIFT;
 512		     pinned > 0; pfn++, pinned--) {
 513			page = pfn_to_page(pfn);
 514			if (map->perm & VHOST_ACCESS_WO)
 515				set_page_dirty_lock(page);
 516			unpin_user_page(page);
 517		}
 518		atomic64_sub(map->size >> PAGE_SHIFT, &dev->mm->pinned_vm);
 
 519		vhost_iotlb_map_free(iotlb, map);
 520	}
 521}
 522
 523static void vhost_vdpa_iotlb_free(struct vhost_vdpa *v)
 
 524{
 525	struct vhost_dev *dev = &v->vdev;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 526
 527	vhost_vdpa_iotlb_unmap(v, 0ULL, 0ULL - 1);
 528	kfree(dev->iotlb);
 529	dev->iotlb = NULL;
 530}
 531
 532static int perm_to_iommu_flags(u32 perm)
 533{
 534	int flags = 0;
 535
 536	switch (perm) {
 537	case VHOST_ACCESS_WO:
 538		flags |= IOMMU_WRITE;
 539		break;
 540	case VHOST_ACCESS_RO:
 541		flags |= IOMMU_READ;
 542		break;
 543	case VHOST_ACCESS_RW:
 544		flags |= (IOMMU_WRITE | IOMMU_READ);
 545		break;
 546	default:
 547		WARN(1, "invalidate vhost IOTLB permission\n");
 548		break;
 549	}
 550
 551	return flags | IOMMU_CACHE;
 552}
 553
 554static int vhost_vdpa_map(struct vhost_vdpa *v,
 555			  u64 iova, u64 size, u64 pa, u32 perm)
 556{
 557	struct vhost_dev *dev = &v->vdev;
 558	struct vdpa_device *vdpa = v->vdpa;
 559	const struct vdpa_config_ops *ops = vdpa->config;
 
 560	int r = 0;
 561
 562	r = vhost_iotlb_add_range(dev->iotlb, iova, iova + size - 1,
 563				  pa, perm);
 564	if (r)
 565		return r;
 566
 567	if (ops->dma_map) {
 568		r = ops->dma_map(vdpa, iova, size, pa, perm);
 569	} else if (ops->set_map) {
 570		if (!v->in_batch)
 571			r = ops->set_map(vdpa, dev->iotlb);
 572	} else {
 573		r = iommu_map(v->domain, iova, pa, size,
 574			      perm_to_iommu_flags(perm));
 
 
 
 
 
 575	}
 576
 577	if (r)
 578		vhost_iotlb_del_range(dev->iotlb, iova, iova + size - 1);
 579	else
 580		atomic64_add(size >> PAGE_SHIFT, &dev->mm->pinned_vm);
 581
 582	return r;
 583}
 584
 585static void vhost_vdpa_unmap(struct vhost_vdpa *v, u64 iova, u64 size)
 
 
 586{
 587	struct vhost_dev *dev = &v->vdev;
 588	struct vdpa_device *vdpa = v->vdpa;
 589	const struct vdpa_config_ops *ops = vdpa->config;
 
 590
 591	vhost_vdpa_iotlb_unmap(v, iova, iova + size - 1);
 592
 593	if (ops->dma_map) {
 594		ops->dma_unmap(vdpa, iova, size);
 595	} else if (ops->set_map) {
 596		if (!v->in_batch)
 597			ops->set_map(vdpa, dev->iotlb);
 598	} else {
 599		iommu_unmap(v->domain, iova, size);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 600	}
 
 
 
 
 
 
 601}
 602
 603static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
 604					   struct vhost_iotlb_msg *msg)
 
 605{
 606	struct vhost_dev *dev = &v->vdev;
 607	struct vhost_iotlb *iotlb = dev->iotlb;
 608	struct page **page_list;
 609	unsigned long list_size = PAGE_SIZE / sizeof(struct page *);
 610	unsigned int gup_flags = FOLL_LONGTERM;
 611	unsigned long npages, cur_base, map_pfn, last_pfn = 0;
 612	unsigned long lock_limit, sz2pin, nchunks, i;
 613	u64 iova = msg->iova;
 614	long pinned;
 615	int ret = 0;
 616
 617	if (msg->iova < v->range.first || !msg->size ||
 618	    msg->iova > U64_MAX - msg->size + 1 ||
 619	    msg->iova + msg->size - 1 > v->range.last)
 620		return -EINVAL;
 621
 622	if (vhost_iotlb_itree_first(iotlb, msg->iova,
 623				    msg->iova + msg->size - 1))
 624		return -EEXIST;
 625
 626	/* Limit the use of memory for bookkeeping */
 627	page_list = (struct page **) __get_free_page(GFP_KERNEL);
 628	if (!page_list)
 629		return -ENOMEM;
 630
 631	if (msg->perm & VHOST_ACCESS_WO)
 632		gup_flags |= FOLL_WRITE;
 633
 634	npages = PAGE_ALIGN(msg->size + (iova & ~PAGE_MASK)) >> PAGE_SHIFT;
 635	if (!npages) {
 636		ret = -EINVAL;
 637		goto free;
 638	}
 639
 640	mmap_read_lock(dev->mm);
 641
 642	lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
 643	if (npages + atomic64_read(&dev->mm->pinned_vm) > lock_limit) {
 644		ret = -ENOMEM;
 645		goto unlock;
 646	}
 647
 648	cur_base = msg->uaddr & PAGE_MASK;
 649	iova &= PAGE_MASK;
 650	nchunks = 0;
 651
 652	while (npages) {
 653		sz2pin = min_t(unsigned long, npages, list_size);
 654		pinned = pin_user_pages(cur_base, sz2pin,
 655					gup_flags, page_list, NULL);
 656		if (sz2pin != pinned) {
 657			if (pinned < 0) {
 658				ret = pinned;
 659			} else {
 660				unpin_user_pages(page_list, pinned);
 661				ret = -ENOMEM;
 662			}
 663			goto out;
 664		}
 665		nchunks++;
 666
 667		if (!last_pfn)
 668			map_pfn = page_to_pfn(page_list[0]);
 669
 670		for (i = 0; i < pinned; i++) {
 671			unsigned long this_pfn = page_to_pfn(page_list[i]);
 672			u64 csize;
 673
 674			if (last_pfn && (this_pfn != last_pfn + 1)) {
 675				/* Pin a contiguous chunk of memory */
 676				csize = (last_pfn - map_pfn + 1) << PAGE_SHIFT;
 677				ret = vhost_vdpa_map(v, iova, csize,
 678						     map_pfn << PAGE_SHIFT,
 679						     msg->perm);
 680				if (ret) {
 681					/*
 682					 * Unpin the pages that are left unmapped
 683					 * from this point on in the current
 684					 * page_list. The remaining outstanding
 685					 * ones which may stride across several
 686					 * chunks will be covered in the common
 687					 * error path subsequently.
 688					 */
 689					unpin_user_pages(&page_list[i],
 690							 pinned - i);
 691					goto out;
 692				}
 693
 694				map_pfn = this_pfn;
 695				iova += csize;
 696				nchunks = 0;
 697			}
 698
 699			last_pfn = this_pfn;
 700		}
 701
 702		cur_base += pinned << PAGE_SHIFT;
 703		npages -= pinned;
 704	}
 705
 706	/* Pin the rest chunk */
 707	ret = vhost_vdpa_map(v, iova, (last_pfn - map_pfn + 1) << PAGE_SHIFT,
 708			     map_pfn << PAGE_SHIFT, msg->perm);
 709out:
 710	if (ret) {
 711		if (nchunks) {
 712			unsigned long pfn;
 713
 714			/*
 715			 * Unpin the outstanding pages which are yet to be
 716			 * mapped but haven't due to vdpa_map() or
 717			 * pin_user_pages() failure.
 718			 *
 719			 * Mapped pages are accounted in vdpa_map(), hence
 720			 * the corresponding unpinning will be handled by
 721			 * vdpa_unmap().
 722			 */
 723			WARN_ON(!last_pfn);
 724			for (pfn = map_pfn; pfn <= last_pfn; pfn++)
 725				unpin_user_page(pfn_to_page(pfn));
 726		}
 727		vhost_vdpa_unmap(v, msg->iova, msg->size);
 728	}
 729unlock:
 730	mmap_read_unlock(dev->mm);
 731free:
 732	free_page((unsigned long)page_list);
 733	return ret;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 734}
 735
 736static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev,
 737					struct vhost_iotlb_msg *msg)
 738{
 739	struct vhost_vdpa *v = container_of(dev, struct vhost_vdpa, vdev);
 740	struct vdpa_device *vdpa = v->vdpa;
 741	const struct vdpa_config_ops *ops = vdpa->config;
 
 
 742	int r = 0;
 743
 744	mutex_lock(&dev->mutex);
 745
 746	r = vhost_dev_check_owner(dev);
 747	if (r)
 748		goto unlock;
 749
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 750	switch (msg->type) {
 751	case VHOST_IOTLB_UPDATE:
 752		r = vhost_vdpa_process_iotlb_update(v, msg);
 753		break;
 754	case VHOST_IOTLB_INVALIDATE:
 755		vhost_vdpa_unmap(v, msg->iova, msg->size);
 756		break;
 757	case VHOST_IOTLB_BATCH_BEGIN:
 
 758		v->in_batch = true;
 759		break;
 760	case VHOST_IOTLB_BATCH_END:
 761		if (v->in_batch && ops->set_map)
 762			ops->set_map(vdpa, dev->iotlb);
 763		v->in_batch = false;
 764		break;
 765	default:
 766		r = -EINVAL;
 767		break;
 768	}
 769unlock:
 770	mutex_unlock(&dev->mutex);
 771
 772	return r;
 773}
 774
 775static ssize_t vhost_vdpa_chr_write_iter(struct kiocb *iocb,
 776					 struct iov_iter *from)
 777{
 778	struct file *file = iocb->ki_filp;
 779	struct vhost_vdpa *v = file->private_data;
 780	struct vhost_dev *dev = &v->vdev;
 781
 782	return vhost_chr_write_iter(dev, from);
 783}
 784
 785static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v)
 786{
 787	struct vdpa_device *vdpa = v->vdpa;
 788	const struct vdpa_config_ops *ops = vdpa->config;
 789	struct device *dma_dev = vdpa_get_dma_dev(vdpa);
 790	struct bus_type *bus;
 791	int ret;
 792
 793	/* Device want to do DMA by itself */
 794	if (ops->set_map || ops->dma_map)
 795		return 0;
 796
 797	bus = dma_dev->bus;
 798	if (!bus)
 799		return -EFAULT;
 800
 801	if (!iommu_capable(bus, IOMMU_CAP_CACHE_COHERENCY))
 
 
 802		return -ENOTSUPP;
 
 803
 804	v->domain = iommu_domain_alloc(bus);
 805	if (!v->domain)
 806		return -EIO;
 807
 808	ret = iommu_attach_device(v->domain, dma_dev);
 809	if (ret)
 810		goto err_attach;
 811
 812	return 0;
 813
 814err_attach:
 815	iommu_domain_free(v->domain);
 
 816	return ret;
 817}
 818
 819static void vhost_vdpa_free_domain(struct vhost_vdpa *v)
 820{
 821	struct vdpa_device *vdpa = v->vdpa;
 822	struct device *dma_dev = vdpa_get_dma_dev(vdpa);
 823
 824	if (v->domain) {
 825		iommu_detach_device(v->domain, dma_dev);
 826		iommu_domain_free(v->domain);
 827	}
 828
 829	v->domain = NULL;
 830}
 831
 832static void vhost_vdpa_set_iova_range(struct vhost_vdpa *v)
 833{
 834	struct vdpa_iova_range *range = &v->range;
 835	struct vdpa_device *vdpa = v->vdpa;
 836	const struct vdpa_config_ops *ops = vdpa->config;
 837
 838	if (ops->get_iova_range) {
 839		*range = ops->get_iova_range(vdpa);
 840	} else if (v->domain && v->domain->geometry.force_aperture) {
 841		range->first = v->domain->geometry.aperture_start;
 842		range->last = v->domain->geometry.aperture_end;
 843	} else {
 844		range->first = 0;
 845		range->last = ULLONG_MAX;
 846	}
 847}
 848
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 849static int vhost_vdpa_open(struct inode *inode, struct file *filep)
 850{
 851	struct vhost_vdpa *v;
 852	struct vhost_dev *dev;
 853	struct vhost_virtqueue **vqs;
 854	int nvqs, i, r, opened;
 
 855
 856	v = container_of(inode->i_cdev, struct vhost_vdpa, cdev);
 857
 858	opened = atomic_cmpxchg(&v->opened, 0, 1);
 859	if (opened)
 860		return -EBUSY;
 861
 862	nvqs = v->nvqs;
 863	vhost_vdpa_reset(v);
 
 
 864
 865	vqs = kmalloc_array(nvqs, sizeof(*vqs), GFP_KERNEL);
 866	if (!vqs) {
 867		r = -ENOMEM;
 868		goto err;
 869	}
 870
 871	dev = &v->vdev;
 872	for (i = 0; i < nvqs; i++) {
 873		vqs[i] = &v->vqs[i];
 874		vqs[i]->handle_kick = handle_vq_kick;
 875	}
 876	vhost_dev_init(dev, vqs, nvqs, 0, 0, 0, false,
 877		       vhost_vdpa_process_iotlb_msg);
 878
 879	dev->iotlb = vhost_iotlb_alloc(0, 0);
 880	if (!dev->iotlb) {
 881		r = -ENOMEM;
 882		goto err_init_iotlb;
 883	}
 884
 885	r = vhost_vdpa_alloc_domain(v);
 886	if (r)
 887		goto err_init_iotlb;
 888
 889	vhost_vdpa_set_iova_range(v);
 890
 891	filep->private_data = v;
 892
 893	return 0;
 894
 895err_init_iotlb:
 896	vhost_dev_cleanup(&v->vdev);
 897	kfree(vqs);
 898err:
 899	atomic_dec(&v->opened);
 900	return r;
 901}
 902
 903static void vhost_vdpa_clean_irq(struct vhost_vdpa *v)
 904{
 905	int i;
 906
 907	for (i = 0; i < v->nvqs; i++)
 908		vhost_vdpa_unsetup_vq_irq(v, i);
 909}
 910
 911static int vhost_vdpa_release(struct inode *inode, struct file *filep)
 912{
 913	struct vhost_vdpa *v = filep->private_data;
 914	struct vhost_dev *d = &v->vdev;
 915
 916	mutex_lock(&d->mutex);
 917	filep->private_data = NULL;
 
 918	vhost_vdpa_reset(v);
 919	vhost_dev_stop(&v->vdev);
 920	vhost_vdpa_iotlb_free(v);
 921	vhost_vdpa_free_domain(v);
 922	vhost_vdpa_config_put(v);
 923	vhost_vdpa_clean_irq(v);
 924	vhost_dev_cleanup(&v->vdev);
 925	kfree(v->vdev.vqs);
 926	mutex_unlock(&d->mutex);
 927
 928	atomic_dec(&v->opened);
 929	complete(&v->completion);
 930
 931	return 0;
 932}
 933
 934#ifdef CONFIG_MMU
 935static vm_fault_t vhost_vdpa_fault(struct vm_fault *vmf)
 936{
 937	struct vhost_vdpa *v = vmf->vma->vm_file->private_data;
 938	struct vdpa_device *vdpa = v->vdpa;
 939	const struct vdpa_config_ops *ops = vdpa->config;
 940	struct vdpa_notification_area notify;
 941	struct vm_area_struct *vma = vmf->vma;
 942	u16 index = vma->vm_pgoff;
 943
 944	notify = ops->get_vq_notification(vdpa, index);
 945
 946	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
 947	if (remap_pfn_range(vma, vmf->address & PAGE_MASK,
 948			    notify.addr >> PAGE_SHIFT, PAGE_SIZE,
 949			    vma->vm_page_prot))
 950		return VM_FAULT_SIGBUS;
 951
 952	return VM_FAULT_NOPAGE;
 953}
 954
 955static const struct vm_operations_struct vhost_vdpa_vm_ops = {
 956	.fault = vhost_vdpa_fault,
 957};
 958
 959static int vhost_vdpa_mmap(struct file *file, struct vm_area_struct *vma)
 960{
 961	struct vhost_vdpa *v = vma->vm_file->private_data;
 962	struct vdpa_device *vdpa = v->vdpa;
 963	const struct vdpa_config_ops *ops = vdpa->config;
 964	struct vdpa_notification_area notify;
 965	unsigned long index = vma->vm_pgoff;
 966
 967	if (vma->vm_end - vma->vm_start != PAGE_SIZE)
 968		return -EINVAL;
 969	if ((vma->vm_flags & VM_SHARED) == 0)
 970		return -EINVAL;
 971	if (vma->vm_flags & VM_READ)
 972		return -EINVAL;
 973	if (index > 65535)
 974		return -EINVAL;
 975	if (!ops->get_vq_notification)
 976		return -ENOTSUPP;
 977
 978	/* To be safe and easily modelled by userspace, We only
 979	 * support the doorbell which sits on the page boundary and
 980	 * does not share the page with other registers.
 981	 */
 982	notify = ops->get_vq_notification(vdpa, index);
 983	if (notify.addr & (PAGE_SIZE - 1))
 984		return -EINVAL;
 985	if (vma->vm_end - vma->vm_start != notify.size)
 986		return -ENOTSUPP;
 987
 988	vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP;
 989	vma->vm_ops = &vhost_vdpa_vm_ops;
 990	return 0;
 991}
 992#endif /* CONFIG_MMU */
 993
 994static const struct file_operations vhost_vdpa_fops = {
 995	.owner		= THIS_MODULE,
 996	.open		= vhost_vdpa_open,
 997	.release	= vhost_vdpa_release,
 998	.write_iter	= vhost_vdpa_chr_write_iter,
 999	.unlocked_ioctl	= vhost_vdpa_unlocked_ioctl,
1000#ifdef CONFIG_MMU
1001	.mmap		= vhost_vdpa_mmap,
1002#endif /* CONFIG_MMU */
1003	.compat_ioctl	= compat_ptr_ioctl,
1004};
1005
1006static void vhost_vdpa_release_dev(struct device *device)
1007{
1008	struct vhost_vdpa *v =
1009	       container_of(device, struct vhost_vdpa, dev);
1010
1011	ida_simple_remove(&vhost_vdpa_ida, v->minor);
1012	kfree(v->vqs);
1013	kfree(v);
1014}
1015
1016static int vhost_vdpa_probe(struct vdpa_device *vdpa)
1017{
1018	const struct vdpa_config_ops *ops = vdpa->config;
1019	struct vhost_vdpa *v;
1020	int minor;
1021	int r;
 
 
 
 
 
 
 
1022
1023	v = kzalloc(sizeof(*v), GFP_KERNEL | __GFP_RETRY_MAYFAIL);
1024	if (!v)
1025		return -ENOMEM;
1026
1027	minor = ida_simple_get(&vhost_vdpa_ida, 0,
1028			       VHOST_VDPA_DEV_MAX, GFP_KERNEL);
1029	if (minor < 0) {
1030		kfree(v);
1031		return minor;
1032	}
1033
1034	atomic_set(&v->opened, 0);
1035	v->minor = minor;
1036	v->vdpa = vdpa;
1037	v->nvqs = vdpa->nvqs;
1038	v->virtio_id = ops->get_device_id(vdpa);
1039
1040	device_initialize(&v->dev);
1041	v->dev.release = vhost_vdpa_release_dev;
1042	v->dev.parent = &vdpa->dev;
1043	v->dev.devt = MKDEV(MAJOR(vhost_vdpa_major), minor);
1044	v->vqs = kmalloc_array(v->nvqs, sizeof(struct vhost_virtqueue),
1045			       GFP_KERNEL);
1046	if (!v->vqs) {
1047		r = -ENOMEM;
1048		goto err;
1049	}
1050
1051	r = dev_set_name(&v->dev, "vhost-vdpa-%u", minor);
1052	if (r)
1053		goto err;
1054
1055	cdev_init(&v->cdev, &vhost_vdpa_fops);
1056	v->cdev.owner = THIS_MODULE;
1057
1058	r = cdev_device_add(&v->cdev, &v->dev);
1059	if (r)
1060		goto err;
1061
1062	init_completion(&v->completion);
1063	vdpa_set_drvdata(vdpa, v);
 
 
 
1064
1065	return 0;
1066
1067err:
1068	put_device(&v->dev);
1069	return r;
1070}
1071
1072static void vhost_vdpa_remove(struct vdpa_device *vdpa)
1073{
1074	struct vhost_vdpa *v = vdpa_get_drvdata(vdpa);
1075	int opened;
1076
1077	cdev_device_del(&v->cdev, &v->dev);
1078
1079	do {
1080		opened = atomic_cmpxchg(&v->opened, 0, 1);
1081		if (!opened)
1082			break;
1083		wait_for_completion(&v->completion);
1084	} while (1);
1085
1086	put_device(&v->dev);
1087}
1088
1089static struct vdpa_driver vhost_vdpa_driver = {
1090	.driver = {
1091		.name	= "vhost_vdpa",
1092	},
1093	.probe	= vhost_vdpa_probe,
1094	.remove	= vhost_vdpa_remove,
1095};
1096
1097static int __init vhost_vdpa_init(void)
1098{
1099	int r;
1100
1101	r = alloc_chrdev_region(&vhost_vdpa_major, 0, VHOST_VDPA_DEV_MAX,
1102				"vhost-vdpa");
1103	if (r)
1104		goto err_alloc_chrdev;
1105
1106	r = vdpa_register_driver(&vhost_vdpa_driver);
1107	if (r)
1108		goto err_vdpa_register_driver;
1109
1110	return 0;
1111
1112err_vdpa_register_driver:
1113	unregister_chrdev_region(vhost_vdpa_major, VHOST_VDPA_DEV_MAX);
1114err_alloc_chrdev:
1115	return r;
1116}
1117module_init(vhost_vdpa_init);
1118
1119static void __exit vhost_vdpa_exit(void)
1120{
1121	vdpa_unregister_driver(&vhost_vdpa_driver);
1122	unregister_chrdev_region(vhost_vdpa_major, VHOST_VDPA_DEV_MAX);
1123}
1124module_exit(vhost_vdpa_exit);
1125
1126MODULE_VERSION("0.0.1");
1127MODULE_LICENSE("GPL v2");
1128MODULE_AUTHOR("Intel Corporation");
1129MODULE_DESCRIPTION("vDPA-based vhost backend for virtio");