Linux Audio

Check our new training course

Loading...
v6.8
  1// SPDX-License-Identifier: GPL-2.0-only
  2/*
  3 * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
  4 *
  5 * VFIO container (/dev/vfio/vfio)
  6 */
  7#include <linux/file.h>
  8#include <linux/slab.h>
  9#include <linux/fs.h>
 10#include <linux/capability.h>
 11#include <linux/iommu.h>
 12#include <linux/miscdevice.h>
 13#include <linux/vfio.h>
 14#include <uapi/linux/vfio.h>
 15
 16#include "vfio.h"
 17
 18struct vfio_container {
 19	struct kref			kref;
 20	struct list_head		group_list;
 21	struct rw_semaphore		group_lock;
 22	struct vfio_iommu_driver	*iommu_driver;
 23	void				*iommu_data;
 24	bool				noiommu;
 25};
 26
 27static struct vfio {
 28	struct list_head		iommu_drivers_list;
 29	struct mutex			iommu_drivers_lock;
 30} vfio;
 31
 
 
 
 
 
 
 
 32static void *vfio_noiommu_open(unsigned long arg)
 33{
 34	if (arg != VFIO_NOIOMMU_IOMMU)
 35		return ERR_PTR(-EINVAL);
 36	if (!capable(CAP_SYS_RAWIO))
 37		return ERR_PTR(-EPERM);
 38
 39	return NULL;
 40}
 41
 42static void vfio_noiommu_release(void *iommu_data)
 43{
 44}
 45
 46static long vfio_noiommu_ioctl(void *iommu_data,
 47			       unsigned int cmd, unsigned long arg)
 48{
 49	if (cmd == VFIO_CHECK_EXTENSION)
 50		return vfio_noiommu && (arg == VFIO_NOIOMMU_IOMMU) ? 1 : 0;
 51
 52	return -ENOTTY;
 53}
 54
 55static int vfio_noiommu_attach_group(void *iommu_data,
 56		struct iommu_group *iommu_group, enum vfio_group_type type)
 57{
 58	return 0;
 59}
 60
 61static void vfio_noiommu_detach_group(void *iommu_data,
 62				      struct iommu_group *iommu_group)
 63{
 64}
 65
 66static const struct vfio_iommu_driver_ops vfio_noiommu_ops = {
 67	.name = "vfio-noiommu",
 68	.owner = THIS_MODULE,
 69	.open = vfio_noiommu_open,
 70	.release = vfio_noiommu_release,
 71	.ioctl = vfio_noiommu_ioctl,
 72	.attach_group = vfio_noiommu_attach_group,
 73	.detach_group = vfio_noiommu_detach_group,
 74};
 75
 76/*
 77 * Only noiommu containers can use vfio-noiommu and noiommu containers can only
 78 * use vfio-noiommu.
 79 */
 80static bool vfio_iommu_driver_allowed(struct vfio_container *container,
 81				      const struct vfio_iommu_driver *driver)
 82{
 83	if (!IS_ENABLED(CONFIG_VFIO_NOIOMMU))
 84		return true;
 85	return container->noiommu == (driver->ops == &vfio_noiommu_ops);
 86}
 87
 88/*
 89 * IOMMU driver registration
 90 */
 91int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops)
 92{
 93	struct vfio_iommu_driver *driver, *tmp;
 94
 95	if (WARN_ON(!ops->register_device != !ops->unregister_device))
 96		return -EINVAL;
 97
 98	driver = kzalloc(sizeof(*driver), GFP_KERNEL);
 99	if (!driver)
100		return -ENOMEM;
101
102	driver->ops = ops;
103
104	mutex_lock(&vfio.iommu_drivers_lock);
105
106	/* Check for duplicates */
107	list_for_each_entry(tmp, &vfio.iommu_drivers_list, vfio_next) {
108		if (tmp->ops == ops) {
109			mutex_unlock(&vfio.iommu_drivers_lock);
110			kfree(driver);
111			return -EINVAL;
112		}
113	}
114
115	list_add(&driver->vfio_next, &vfio.iommu_drivers_list);
116
117	mutex_unlock(&vfio.iommu_drivers_lock);
118
119	return 0;
120}
121EXPORT_SYMBOL_GPL(vfio_register_iommu_driver);
122
123void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops)
124{
125	struct vfio_iommu_driver *driver;
126
127	mutex_lock(&vfio.iommu_drivers_lock);
128	list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
129		if (driver->ops == ops) {
130			list_del(&driver->vfio_next);
131			mutex_unlock(&vfio.iommu_drivers_lock);
132			kfree(driver);
133			return;
134		}
135	}
136	mutex_unlock(&vfio.iommu_drivers_lock);
137}
138EXPORT_SYMBOL_GPL(vfio_unregister_iommu_driver);
139
140/*
141 * Container objects - containers are created when /dev/vfio/vfio is
142 * opened, but their lifecycle extends until the last user is done, so
143 * it's freed via kref.  Must support container/group/device being
144 * closed in any order.
145 */
146static void vfio_container_release(struct kref *kref)
147{
148	struct vfio_container *container;
149	container = container_of(kref, struct vfio_container, kref);
150
151	kfree(container);
152}
153
154static void vfio_container_get(struct vfio_container *container)
155{
156	kref_get(&container->kref);
157}
158
159static void vfio_container_put(struct vfio_container *container)
160{
161	kref_put(&container->kref, vfio_container_release);
162}
163
164void vfio_device_container_register(struct vfio_device *device)
165{
166	struct vfio_iommu_driver *iommu_driver =
167		device->group->container->iommu_driver;
168
169	if (iommu_driver && iommu_driver->ops->register_device)
170		iommu_driver->ops->register_device(
171			device->group->container->iommu_data, device);
172}
173
174void vfio_device_container_unregister(struct vfio_device *device)
175{
176	struct vfio_iommu_driver *iommu_driver =
177		device->group->container->iommu_driver;
178
179	if (iommu_driver && iommu_driver->ops->unregister_device)
180		iommu_driver->ops->unregister_device(
181			device->group->container->iommu_data, device);
182}
183
184static long
185vfio_container_ioctl_check_extension(struct vfio_container *container,
186				     unsigned long arg)
187{
188	struct vfio_iommu_driver *driver;
189	long ret = 0;
190
191	down_read(&container->group_lock);
192
193	driver = container->iommu_driver;
194
195	switch (arg) {
196		/* No base extensions yet */
197	default:
198		/*
199		 * If no driver is set, poll all registered drivers for
200		 * extensions and return the first positive result.  If
201		 * a driver is already set, further queries will be passed
202		 * only to that driver.
203		 */
204		if (!driver) {
205			mutex_lock(&vfio.iommu_drivers_lock);
206			list_for_each_entry(driver, &vfio.iommu_drivers_list,
207					    vfio_next) {
208
209				if (!list_empty(&container->group_list) &&
210				    !vfio_iommu_driver_allowed(container,
211							       driver))
212					continue;
213				if (!try_module_get(driver->ops->owner))
214					continue;
215
216				ret = driver->ops->ioctl(NULL,
217							 VFIO_CHECK_EXTENSION,
218							 arg);
219				module_put(driver->ops->owner);
220				if (ret > 0)
221					break;
222			}
223			mutex_unlock(&vfio.iommu_drivers_lock);
224		} else
225			ret = driver->ops->ioctl(container->iommu_data,
226						 VFIO_CHECK_EXTENSION, arg);
227	}
228
229	up_read(&container->group_lock);
230
231	return ret;
232}
233
234/* hold write lock on container->group_lock */
235static int __vfio_container_attach_groups(struct vfio_container *container,
236					  struct vfio_iommu_driver *driver,
237					  void *data)
238{
239	struct vfio_group *group;
240	int ret = -ENODEV;
241
242	list_for_each_entry(group, &container->group_list, container_next) {
243		ret = driver->ops->attach_group(data, group->iommu_group,
244						group->type);
245		if (ret)
246			goto unwind;
247	}
248
249	return ret;
250
251unwind:
252	list_for_each_entry_continue_reverse(group, &container->group_list,
253					     container_next) {
254		driver->ops->detach_group(data, group->iommu_group);
255	}
256
257	return ret;
258}
259
260static long vfio_ioctl_set_iommu(struct vfio_container *container,
261				 unsigned long arg)
262{
263	struct vfio_iommu_driver *driver;
264	long ret = -ENODEV;
265
266	down_write(&container->group_lock);
267
268	/*
269	 * The container is designed to be an unprivileged interface while
270	 * the group can be assigned to specific users.  Therefore, only by
271	 * adding a group to a container does the user get the privilege of
272	 * enabling the iommu, which may allocate finite resources.  There
273	 * is no unset_iommu, but by removing all the groups from a container,
274	 * the container is deprivileged and returns to an unset state.
275	 */
276	if (list_empty(&container->group_list) || container->iommu_driver) {
277		up_write(&container->group_lock);
278		return -EINVAL;
279	}
280
281	mutex_lock(&vfio.iommu_drivers_lock);
282	list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
283		void *data;
284
285		if (!vfio_iommu_driver_allowed(container, driver))
286			continue;
287		if (!try_module_get(driver->ops->owner))
288			continue;
289
290		/*
291		 * The arg magic for SET_IOMMU is the same as CHECK_EXTENSION,
292		 * so test which iommu driver reported support for this
293		 * extension and call open on them.  We also pass them the
294		 * magic, allowing a single driver to support multiple
295		 * interfaces if they'd like.
296		 */
297		if (driver->ops->ioctl(NULL, VFIO_CHECK_EXTENSION, arg) <= 0) {
298			module_put(driver->ops->owner);
299			continue;
300		}
301
302		data = driver->ops->open(arg);
303		if (IS_ERR(data)) {
304			ret = PTR_ERR(data);
305			module_put(driver->ops->owner);
306			continue;
307		}
308
309		ret = __vfio_container_attach_groups(container, driver, data);
310		if (ret) {
311			driver->ops->release(data);
312			module_put(driver->ops->owner);
313			continue;
314		}
315
316		container->iommu_driver = driver;
317		container->iommu_data = data;
318		break;
319	}
320
321	mutex_unlock(&vfio.iommu_drivers_lock);
322	up_write(&container->group_lock);
323
324	return ret;
325}
326
327static long vfio_fops_unl_ioctl(struct file *filep,
328				unsigned int cmd, unsigned long arg)
329{
330	struct vfio_container *container = filep->private_data;
331	struct vfio_iommu_driver *driver;
332	void *data;
333	long ret = -EINVAL;
334
335	if (!container)
336		return ret;
337
338	switch (cmd) {
339	case VFIO_GET_API_VERSION:
340		ret = VFIO_API_VERSION;
341		break;
342	case VFIO_CHECK_EXTENSION:
343		ret = vfio_container_ioctl_check_extension(container, arg);
344		break;
345	case VFIO_SET_IOMMU:
346		ret = vfio_ioctl_set_iommu(container, arg);
347		break;
348	default:
349		driver = container->iommu_driver;
350		data = container->iommu_data;
351
352		if (driver) /* passthrough all unrecognized ioctls */
353			ret = driver->ops->ioctl(data, cmd, arg);
354	}
355
356	return ret;
357}
358
359static int vfio_fops_open(struct inode *inode, struct file *filep)
360{
361	struct vfio_container *container;
362
363	container = kzalloc(sizeof(*container), GFP_KERNEL_ACCOUNT);
364	if (!container)
365		return -ENOMEM;
366
367	INIT_LIST_HEAD(&container->group_list);
368	init_rwsem(&container->group_lock);
369	kref_init(&container->kref);
370
371	filep->private_data = container;
372
373	return 0;
374}
375
376static int vfio_fops_release(struct inode *inode, struct file *filep)
377{
378	struct vfio_container *container = filep->private_data;
 
 
 
 
 
379
380	filep->private_data = NULL;
381
382	vfio_container_put(container);
383
384	return 0;
385}
386
387static const struct file_operations vfio_fops = {
388	.owner		= THIS_MODULE,
389	.open		= vfio_fops_open,
390	.release	= vfio_fops_release,
391	.unlocked_ioctl	= vfio_fops_unl_ioctl,
392	.compat_ioctl	= compat_ptr_ioctl,
393};
394
395struct vfio_container *vfio_container_from_file(struct file *file)
396{
397	struct vfio_container *container;
398
399	/* Sanity check, is this really our fd? */
400	if (file->f_op != &vfio_fops)
401		return NULL;
402
403	container = file->private_data;
404	WARN_ON(!container); /* fget ensures we don't race vfio_release */
405	return container;
406}
407
408static struct miscdevice vfio_dev = {
409	.minor = VFIO_MINOR,
410	.name = "vfio",
411	.fops = &vfio_fops,
412	.nodename = "vfio/vfio",
413	.mode = S_IRUGO | S_IWUGO,
414};
415
416int vfio_container_attach_group(struct vfio_container *container,
417				struct vfio_group *group)
418{
419	struct vfio_iommu_driver *driver;
420	int ret = 0;
421
422	lockdep_assert_held(&group->group_lock);
423
424	if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO))
425		return -EPERM;
426
427	down_write(&container->group_lock);
428
429	/* Real groups and fake groups cannot mix */
430	if (!list_empty(&container->group_list) &&
431	    container->noiommu != (group->type == VFIO_NO_IOMMU)) {
432		ret = -EPERM;
433		goto out_unlock_container;
434	}
435
436	if (group->type == VFIO_IOMMU) {
437		ret = iommu_group_claim_dma_owner(group->iommu_group, group);
438		if (ret)
439			goto out_unlock_container;
440	}
441
442	driver = container->iommu_driver;
443	if (driver) {
444		ret = driver->ops->attach_group(container->iommu_data,
445						group->iommu_group,
446						group->type);
447		if (ret) {
448			if (group->type == VFIO_IOMMU)
449				iommu_group_release_dma_owner(
450					group->iommu_group);
451			goto out_unlock_container;
452		}
453	}
454
455	group->container = container;
456	group->container_users = 1;
457	container->noiommu = (group->type == VFIO_NO_IOMMU);
458	list_add(&group->container_next, &container->group_list);
459
460	/* Get a reference on the container and mark a user within the group */
461	vfio_container_get(container);
462
463out_unlock_container:
464	up_write(&container->group_lock);
465	return ret;
466}
467
468void vfio_group_detach_container(struct vfio_group *group)
469{
470	struct vfio_container *container = group->container;
471	struct vfio_iommu_driver *driver;
472
473	lockdep_assert_held(&group->group_lock);
474	WARN_ON(group->container_users != 1);
475
476	down_write(&container->group_lock);
477
478	driver = container->iommu_driver;
479	if (driver)
480		driver->ops->detach_group(container->iommu_data,
481					  group->iommu_group);
482
483	if (group->type == VFIO_IOMMU)
484		iommu_group_release_dma_owner(group->iommu_group);
485
486	group->container = NULL;
487	group->container_users = 0;
488	list_del(&group->container_next);
489
490	/* Detaching the last group deprivileges a container, remove iommu */
491	if (driver && list_empty(&container->group_list)) {
492		driver->ops->release(container->iommu_data);
493		module_put(driver->ops->owner);
494		container->iommu_driver = NULL;
495		container->iommu_data = NULL;
496	}
497
498	up_write(&container->group_lock);
499
500	vfio_container_put(container);
501}
502
503int vfio_group_use_container(struct vfio_group *group)
504{
505	lockdep_assert_held(&group->group_lock);
506
507	/*
508	 * The container fd has been assigned with VFIO_GROUP_SET_CONTAINER but
509	 * VFIO_SET_IOMMU hasn't been done yet.
510	 */
511	if (!group->container->iommu_driver)
512		return -EINVAL;
513
514	if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO))
515		return -EPERM;
516
517	get_file(group->opened_file);
518	group->container_users++;
519	return 0;
520}
521
522void vfio_group_unuse_container(struct vfio_group *group)
523{
524	lockdep_assert_held(&group->group_lock);
525
526	WARN_ON(group->container_users <= 1);
527	group->container_users--;
528	fput(group->opened_file);
529}
530
531int vfio_device_container_pin_pages(struct vfio_device *device,
532				    dma_addr_t iova, int npage,
533				    int prot, struct page **pages)
534{
535	struct vfio_container *container = device->group->container;
536	struct iommu_group *iommu_group = device->group->iommu_group;
537	struct vfio_iommu_driver *driver = container->iommu_driver;
538
539	if (npage > VFIO_PIN_PAGES_MAX_ENTRIES)
540		return -E2BIG;
541
542	if (unlikely(!driver || !driver->ops->pin_pages))
543		return -ENOTTY;
544	return driver->ops->pin_pages(container->iommu_data, iommu_group, iova,
545				      npage, prot, pages);
546}
547
548void vfio_device_container_unpin_pages(struct vfio_device *device,
549				       dma_addr_t iova, int npage)
550{
551	struct vfio_container *container = device->group->container;
552
553	if (WARN_ON(npage <= 0 || npage > VFIO_PIN_PAGES_MAX_ENTRIES))
554		return;
555
556	container->iommu_driver->ops->unpin_pages(container->iommu_data, iova,
557						  npage);
558}
559
560int vfio_device_container_dma_rw(struct vfio_device *device,
561				 dma_addr_t iova, void *data,
562				 size_t len, bool write)
563{
564	struct vfio_container *container = device->group->container;
565	struct vfio_iommu_driver *driver = container->iommu_driver;
566
567	if (unlikely(!driver || !driver->ops->dma_rw))
568		return -ENOTTY;
569	return driver->ops->dma_rw(container->iommu_data, iova, data, len,
570				   write);
571}
572
573int __init vfio_container_init(void)
574{
575	int ret;
576
577	mutex_init(&vfio.iommu_drivers_lock);
578	INIT_LIST_HEAD(&vfio.iommu_drivers_list);
579
580	ret = misc_register(&vfio_dev);
581	if (ret) {
582		pr_err("vfio: misc device register failed\n");
583		return ret;
584	}
585
586	if (IS_ENABLED(CONFIG_VFIO_NOIOMMU)) {
587		ret = vfio_register_iommu_driver(&vfio_noiommu_ops);
588		if (ret)
589			goto err_misc;
590	}
591	return 0;
592
593err_misc:
594	misc_deregister(&vfio_dev);
595	return ret;
596}
597
598void vfio_container_cleanup(void)
599{
600	if (IS_ENABLED(CONFIG_VFIO_NOIOMMU))
601		vfio_unregister_iommu_driver(&vfio_noiommu_ops);
602	misc_deregister(&vfio_dev);
603	mutex_destroy(&vfio.iommu_drivers_lock);
604}
605
606MODULE_ALIAS_MISCDEV(VFIO_MINOR);
607MODULE_ALIAS("devname:vfio/vfio");
v6.2
  1// SPDX-License-Identifier: GPL-2.0-only
  2/*
  3 * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
  4 *
  5 * VFIO container (/dev/vfio/vfio)
  6 */
  7#include <linux/file.h>
  8#include <linux/slab.h>
  9#include <linux/fs.h>
 10#include <linux/capability.h>
 11#include <linux/iommu.h>
 12#include <linux/miscdevice.h>
 13#include <linux/vfio.h>
 14#include <uapi/linux/vfio.h>
 15
 16#include "vfio.h"
 17
 18struct vfio_container {
 19	struct kref			kref;
 20	struct list_head		group_list;
 21	struct rw_semaphore		group_lock;
 22	struct vfio_iommu_driver	*iommu_driver;
 23	void				*iommu_data;
 24	bool				noiommu;
 25};
 26
 27static struct vfio {
 28	struct list_head		iommu_drivers_list;
 29	struct mutex			iommu_drivers_lock;
 30} vfio;
 31
 32#ifdef CONFIG_VFIO_NOIOMMU
 33bool vfio_noiommu __read_mostly;
 34module_param_named(enable_unsafe_noiommu_mode,
 35		   vfio_noiommu, bool, S_IRUGO | S_IWUSR);
 36MODULE_PARM_DESC(enable_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode.  This mode provides no device isolation, no DMA translation, no host kernel protection, cannot be used for device assignment to virtual machines, requires RAWIO permissions, and will taint the kernel.  If you do not know what this is for, step away. (default: false)");
 37#endif
 38
 39static void *vfio_noiommu_open(unsigned long arg)
 40{
 41	if (arg != VFIO_NOIOMMU_IOMMU)
 42		return ERR_PTR(-EINVAL);
 43	if (!capable(CAP_SYS_RAWIO))
 44		return ERR_PTR(-EPERM);
 45
 46	return NULL;
 47}
 48
 49static void vfio_noiommu_release(void *iommu_data)
 50{
 51}
 52
 53static long vfio_noiommu_ioctl(void *iommu_data,
 54			       unsigned int cmd, unsigned long arg)
 55{
 56	if (cmd == VFIO_CHECK_EXTENSION)
 57		return vfio_noiommu && (arg == VFIO_NOIOMMU_IOMMU) ? 1 : 0;
 58
 59	return -ENOTTY;
 60}
 61
 62static int vfio_noiommu_attach_group(void *iommu_data,
 63		struct iommu_group *iommu_group, enum vfio_group_type type)
 64{
 65	return 0;
 66}
 67
 68static void vfio_noiommu_detach_group(void *iommu_data,
 69				      struct iommu_group *iommu_group)
 70{
 71}
 72
 73static const struct vfio_iommu_driver_ops vfio_noiommu_ops = {
 74	.name = "vfio-noiommu",
 75	.owner = THIS_MODULE,
 76	.open = vfio_noiommu_open,
 77	.release = vfio_noiommu_release,
 78	.ioctl = vfio_noiommu_ioctl,
 79	.attach_group = vfio_noiommu_attach_group,
 80	.detach_group = vfio_noiommu_detach_group,
 81};
 82
 83/*
 84 * Only noiommu containers can use vfio-noiommu and noiommu containers can only
 85 * use vfio-noiommu.
 86 */
 87static bool vfio_iommu_driver_allowed(struct vfio_container *container,
 88				      const struct vfio_iommu_driver *driver)
 89{
 90	if (!IS_ENABLED(CONFIG_VFIO_NOIOMMU))
 91		return true;
 92	return container->noiommu == (driver->ops == &vfio_noiommu_ops);
 93}
 94
 95/*
 96 * IOMMU driver registration
 97 */
 98int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops)
 99{
100	struct vfio_iommu_driver *driver, *tmp;
101
102	if (WARN_ON(!ops->register_device != !ops->unregister_device))
103		return -EINVAL;
104
105	driver = kzalloc(sizeof(*driver), GFP_KERNEL);
106	if (!driver)
107		return -ENOMEM;
108
109	driver->ops = ops;
110
111	mutex_lock(&vfio.iommu_drivers_lock);
112
113	/* Check for duplicates */
114	list_for_each_entry(tmp, &vfio.iommu_drivers_list, vfio_next) {
115		if (tmp->ops == ops) {
116			mutex_unlock(&vfio.iommu_drivers_lock);
117			kfree(driver);
118			return -EINVAL;
119		}
120	}
121
122	list_add(&driver->vfio_next, &vfio.iommu_drivers_list);
123
124	mutex_unlock(&vfio.iommu_drivers_lock);
125
126	return 0;
127}
128EXPORT_SYMBOL_GPL(vfio_register_iommu_driver);
129
130void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops)
131{
132	struct vfio_iommu_driver *driver;
133
134	mutex_lock(&vfio.iommu_drivers_lock);
135	list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
136		if (driver->ops == ops) {
137			list_del(&driver->vfio_next);
138			mutex_unlock(&vfio.iommu_drivers_lock);
139			kfree(driver);
140			return;
141		}
142	}
143	mutex_unlock(&vfio.iommu_drivers_lock);
144}
145EXPORT_SYMBOL_GPL(vfio_unregister_iommu_driver);
146
147/*
148 * Container objects - containers are created when /dev/vfio/vfio is
149 * opened, but their lifecycle extends until the last user is done, so
150 * it's freed via kref.  Must support container/group/device being
151 * closed in any order.
152 */
153static void vfio_container_release(struct kref *kref)
154{
155	struct vfio_container *container;
156	container = container_of(kref, struct vfio_container, kref);
157
158	kfree(container);
159}
160
161static void vfio_container_get(struct vfio_container *container)
162{
163	kref_get(&container->kref);
164}
165
166static void vfio_container_put(struct vfio_container *container)
167{
168	kref_put(&container->kref, vfio_container_release);
169}
170
171void vfio_device_container_register(struct vfio_device *device)
172{
173	struct vfio_iommu_driver *iommu_driver =
174		device->group->container->iommu_driver;
175
176	if (iommu_driver && iommu_driver->ops->register_device)
177		iommu_driver->ops->register_device(
178			device->group->container->iommu_data, device);
179}
180
181void vfio_device_container_unregister(struct vfio_device *device)
182{
183	struct vfio_iommu_driver *iommu_driver =
184		device->group->container->iommu_driver;
185
186	if (iommu_driver && iommu_driver->ops->unregister_device)
187		iommu_driver->ops->unregister_device(
188			device->group->container->iommu_data, device);
189}
190
191static long
192vfio_container_ioctl_check_extension(struct vfio_container *container,
193				     unsigned long arg)
194{
195	struct vfio_iommu_driver *driver;
196	long ret = 0;
197
198	down_read(&container->group_lock);
199
200	driver = container->iommu_driver;
201
202	switch (arg) {
203		/* No base extensions yet */
204	default:
205		/*
206		 * If no driver is set, poll all registered drivers for
207		 * extensions and return the first positive result.  If
208		 * a driver is already set, further queries will be passed
209		 * only to that driver.
210		 */
211		if (!driver) {
212			mutex_lock(&vfio.iommu_drivers_lock);
213			list_for_each_entry(driver, &vfio.iommu_drivers_list,
214					    vfio_next) {
215
216				if (!list_empty(&container->group_list) &&
217				    !vfio_iommu_driver_allowed(container,
218							       driver))
219					continue;
220				if (!try_module_get(driver->ops->owner))
221					continue;
222
223				ret = driver->ops->ioctl(NULL,
224							 VFIO_CHECK_EXTENSION,
225							 arg);
226				module_put(driver->ops->owner);
227				if (ret > 0)
228					break;
229			}
230			mutex_unlock(&vfio.iommu_drivers_lock);
231		} else
232			ret = driver->ops->ioctl(container->iommu_data,
233						 VFIO_CHECK_EXTENSION, arg);
234	}
235
236	up_read(&container->group_lock);
237
238	return ret;
239}
240
241/* hold write lock on container->group_lock */
242static int __vfio_container_attach_groups(struct vfio_container *container,
243					  struct vfio_iommu_driver *driver,
244					  void *data)
245{
246	struct vfio_group *group;
247	int ret = -ENODEV;
248
249	list_for_each_entry(group, &container->group_list, container_next) {
250		ret = driver->ops->attach_group(data, group->iommu_group,
251						group->type);
252		if (ret)
253			goto unwind;
254	}
255
256	return ret;
257
258unwind:
259	list_for_each_entry_continue_reverse(group, &container->group_list,
260					     container_next) {
261		driver->ops->detach_group(data, group->iommu_group);
262	}
263
264	return ret;
265}
266
267static long vfio_ioctl_set_iommu(struct vfio_container *container,
268				 unsigned long arg)
269{
270	struct vfio_iommu_driver *driver;
271	long ret = -ENODEV;
272
273	down_write(&container->group_lock);
274
275	/*
276	 * The container is designed to be an unprivileged interface while
277	 * the group can be assigned to specific users.  Therefore, only by
278	 * adding a group to a container does the user get the privilege of
279	 * enabling the iommu, which may allocate finite resources.  There
280	 * is no unset_iommu, but by removing all the groups from a container,
281	 * the container is deprivileged and returns to an unset state.
282	 */
283	if (list_empty(&container->group_list) || container->iommu_driver) {
284		up_write(&container->group_lock);
285		return -EINVAL;
286	}
287
288	mutex_lock(&vfio.iommu_drivers_lock);
289	list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
290		void *data;
291
292		if (!vfio_iommu_driver_allowed(container, driver))
293			continue;
294		if (!try_module_get(driver->ops->owner))
295			continue;
296
297		/*
298		 * The arg magic for SET_IOMMU is the same as CHECK_EXTENSION,
299		 * so test which iommu driver reported support for this
300		 * extension and call open on them.  We also pass them the
301		 * magic, allowing a single driver to support multiple
302		 * interfaces if they'd like.
303		 */
304		if (driver->ops->ioctl(NULL, VFIO_CHECK_EXTENSION, arg) <= 0) {
305			module_put(driver->ops->owner);
306			continue;
307		}
308
309		data = driver->ops->open(arg);
310		if (IS_ERR(data)) {
311			ret = PTR_ERR(data);
312			module_put(driver->ops->owner);
313			continue;
314		}
315
316		ret = __vfio_container_attach_groups(container, driver, data);
317		if (ret) {
318			driver->ops->release(data);
319			module_put(driver->ops->owner);
320			continue;
321		}
322
323		container->iommu_driver = driver;
324		container->iommu_data = data;
325		break;
326	}
327
328	mutex_unlock(&vfio.iommu_drivers_lock);
329	up_write(&container->group_lock);
330
331	return ret;
332}
333
334static long vfio_fops_unl_ioctl(struct file *filep,
335				unsigned int cmd, unsigned long arg)
336{
337	struct vfio_container *container = filep->private_data;
338	struct vfio_iommu_driver *driver;
339	void *data;
340	long ret = -EINVAL;
341
342	if (!container)
343		return ret;
344
345	switch (cmd) {
346	case VFIO_GET_API_VERSION:
347		ret = VFIO_API_VERSION;
348		break;
349	case VFIO_CHECK_EXTENSION:
350		ret = vfio_container_ioctl_check_extension(container, arg);
351		break;
352	case VFIO_SET_IOMMU:
353		ret = vfio_ioctl_set_iommu(container, arg);
354		break;
355	default:
356		driver = container->iommu_driver;
357		data = container->iommu_data;
358
359		if (driver) /* passthrough all unrecognized ioctls */
360			ret = driver->ops->ioctl(data, cmd, arg);
361	}
362
363	return ret;
364}
365
366static int vfio_fops_open(struct inode *inode, struct file *filep)
367{
368	struct vfio_container *container;
369
370	container = kzalloc(sizeof(*container), GFP_KERNEL);
371	if (!container)
372		return -ENOMEM;
373
374	INIT_LIST_HEAD(&container->group_list);
375	init_rwsem(&container->group_lock);
376	kref_init(&container->kref);
377
378	filep->private_data = container;
379
380	return 0;
381}
382
383static int vfio_fops_release(struct inode *inode, struct file *filep)
384{
385	struct vfio_container *container = filep->private_data;
386	struct vfio_iommu_driver *driver = container->iommu_driver;
387
388	if (driver && driver->ops->notify)
389		driver->ops->notify(container->iommu_data,
390				    VFIO_IOMMU_CONTAINER_CLOSE);
391
392	filep->private_data = NULL;
393
394	vfio_container_put(container);
395
396	return 0;
397}
398
399static const struct file_operations vfio_fops = {
400	.owner		= THIS_MODULE,
401	.open		= vfio_fops_open,
402	.release	= vfio_fops_release,
403	.unlocked_ioctl	= vfio_fops_unl_ioctl,
404	.compat_ioctl	= compat_ptr_ioctl,
405};
406
407struct vfio_container *vfio_container_from_file(struct file *file)
408{
409	struct vfio_container *container;
410
411	/* Sanity check, is this really our fd? */
412	if (file->f_op != &vfio_fops)
413		return NULL;
414
415	container = file->private_data;
416	WARN_ON(!container); /* fget ensures we don't race vfio_release */
417	return container;
418}
419
420static struct miscdevice vfio_dev = {
421	.minor = VFIO_MINOR,
422	.name = "vfio",
423	.fops = &vfio_fops,
424	.nodename = "vfio/vfio",
425	.mode = S_IRUGO | S_IWUGO,
426};
427
428int vfio_container_attach_group(struct vfio_container *container,
429				struct vfio_group *group)
430{
431	struct vfio_iommu_driver *driver;
432	int ret = 0;
433
434	lockdep_assert_held(&group->group_lock);
435
436	if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO))
437		return -EPERM;
438
439	down_write(&container->group_lock);
440
441	/* Real groups and fake groups cannot mix */
442	if (!list_empty(&container->group_list) &&
443	    container->noiommu != (group->type == VFIO_NO_IOMMU)) {
444		ret = -EPERM;
445		goto out_unlock_container;
446	}
447
448	if (group->type == VFIO_IOMMU) {
449		ret = iommu_group_claim_dma_owner(group->iommu_group, group);
450		if (ret)
451			goto out_unlock_container;
452	}
453
454	driver = container->iommu_driver;
455	if (driver) {
456		ret = driver->ops->attach_group(container->iommu_data,
457						group->iommu_group,
458						group->type);
459		if (ret) {
460			if (group->type == VFIO_IOMMU)
461				iommu_group_release_dma_owner(
462					group->iommu_group);
463			goto out_unlock_container;
464		}
465	}
466
467	group->container = container;
468	group->container_users = 1;
469	container->noiommu = (group->type == VFIO_NO_IOMMU);
470	list_add(&group->container_next, &container->group_list);
471
472	/* Get a reference on the container and mark a user within the group */
473	vfio_container_get(container);
474
475out_unlock_container:
476	up_write(&container->group_lock);
477	return ret;
478}
479
480void vfio_group_detach_container(struct vfio_group *group)
481{
482	struct vfio_container *container = group->container;
483	struct vfio_iommu_driver *driver;
484
485	lockdep_assert_held(&group->group_lock);
486	WARN_ON(group->container_users != 1);
487
488	down_write(&container->group_lock);
489
490	driver = container->iommu_driver;
491	if (driver)
492		driver->ops->detach_group(container->iommu_data,
493					  group->iommu_group);
494
495	if (group->type == VFIO_IOMMU)
496		iommu_group_release_dma_owner(group->iommu_group);
497
498	group->container = NULL;
499	group->container_users = 0;
500	list_del(&group->container_next);
501
502	/* Detaching the last group deprivileges a container, remove iommu */
503	if (driver && list_empty(&container->group_list)) {
504		driver->ops->release(container->iommu_data);
505		module_put(driver->ops->owner);
506		container->iommu_driver = NULL;
507		container->iommu_data = NULL;
508	}
509
510	up_write(&container->group_lock);
511
512	vfio_container_put(container);
513}
514
515int vfio_group_use_container(struct vfio_group *group)
516{
517	lockdep_assert_held(&group->group_lock);
518
519	/*
520	 * The container fd has been assigned with VFIO_GROUP_SET_CONTAINER but
521	 * VFIO_SET_IOMMU hasn't been done yet.
522	 */
523	if (!group->container->iommu_driver)
524		return -EINVAL;
525
526	if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO))
527		return -EPERM;
528
529	get_file(group->opened_file);
530	group->container_users++;
531	return 0;
532}
533
534void vfio_group_unuse_container(struct vfio_group *group)
535{
536	lockdep_assert_held(&group->group_lock);
537
538	WARN_ON(group->container_users <= 1);
539	group->container_users--;
540	fput(group->opened_file);
541}
542
543int vfio_device_container_pin_pages(struct vfio_device *device,
544				    dma_addr_t iova, int npage,
545				    int prot, struct page **pages)
546{
547	struct vfio_container *container = device->group->container;
548	struct iommu_group *iommu_group = device->group->iommu_group;
549	struct vfio_iommu_driver *driver = container->iommu_driver;
550
551	if (npage > VFIO_PIN_PAGES_MAX_ENTRIES)
552		return -E2BIG;
553
554	if (unlikely(!driver || !driver->ops->pin_pages))
555		return -ENOTTY;
556	return driver->ops->pin_pages(container->iommu_data, iommu_group, iova,
557				      npage, prot, pages);
558}
559
560void vfio_device_container_unpin_pages(struct vfio_device *device,
561				       dma_addr_t iova, int npage)
562{
563	struct vfio_container *container = device->group->container;
564
565	if (WARN_ON(npage <= 0 || npage > VFIO_PIN_PAGES_MAX_ENTRIES))
566		return;
567
568	container->iommu_driver->ops->unpin_pages(container->iommu_data, iova,
569						  npage);
570}
571
572int vfio_device_container_dma_rw(struct vfio_device *device,
573				 dma_addr_t iova, void *data,
574				 size_t len, bool write)
575{
576	struct vfio_container *container = device->group->container;
577	struct vfio_iommu_driver *driver = container->iommu_driver;
578
579	if (unlikely(!driver || !driver->ops->dma_rw))
580		return -ENOTTY;
581	return driver->ops->dma_rw(container->iommu_data, iova, data, len,
582				   write);
583}
584
585int __init vfio_container_init(void)
586{
587	int ret;
588
589	mutex_init(&vfio.iommu_drivers_lock);
590	INIT_LIST_HEAD(&vfio.iommu_drivers_list);
591
592	ret = misc_register(&vfio_dev);
593	if (ret) {
594		pr_err("vfio: misc device register failed\n");
595		return ret;
596	}
597
598	if (IS_ENABLED(CONFIG_VFIO_NOIOMMU)) {
599		ret = vfio_register_iommu_driver(&vfio_noiommu_ops);
600		if (ret)
601			goto err_misc;
602	}
603	return 0;
604
605err_misc:
606	misc_deregister(&vfio_dev);
607	return ret;
608}
609
610void vfio_container_cleanup(void)
611{
612	if (IS_ENABLED(CONFIG_VFIO_NOIOMMU))
613		vfio_unregister_iommu_driver(&vfio_noiommu_ops);
614	misc_deregister(&vfio_dev);
615	mutex_destroy(&vfio.iommu_drivers_lock);
616}
617
618MODULE_ALIAS_MISCDEV(VFIO_MINOR);
619MODULE_ALIAS("devname:vfio/vfio");