Loading...
1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Copyright (C) 2012 Red Hat, Inc. All rights reserved.
4 *
5 * VFIO container (/dev/vfio/vfio)
6 */
7#include <linux/file.h>
8#include <linux/slab.h>
9#include <linux/fs.h>
10#include <linux/capability.h>
11#include <linux/iommu.h>
12#include <linux/miscdevice.h>
13#include <linux/vfio.h>
14#include <uapi/linux/vfio.h>
15
16#include "vfio.h"
17
18struct vfio_container {
19 struct kref kref;
20 struct list_head group_list;
21 struct rw_semaphore group_lock;
22 struct vfio_iommu_driver *iommu_driver;
23 void *iommu_data;
24 bool noiommu;
25};
26
27static struct vfio {
28 struct list_head iommu_drivers_list;
29 struct mutex iommu_drivers_lock;
30} vfio;
31
32static void *vfio_noiommu_open(unsigned long arg)
33{
34 if (arg != VFIO_NOIOMMU_IOMMU)
35 return ERR_PTR(-EINVAL);
36 if (!capable(CAP_SYS_RAWIO))
37 return ERR_PTR(-EPERM);
38
39 return NULL;
40}
41
42static void vfio_noiommu_release(void *iommu_data)
43{
44}
45
46static long vfio_noiommu_ioctl(void *iommu_data,
47 unsigned int cmd, unsigned long arg)
48{
49 if (cmd == VFIO_CHECK_EXTENSION)
50 return vfio_noiommu && (arg == VFIO_NOIOMMU_IOMMU) ? 1 : 0;
51
52 return -ENOTTY;
53}
54
55static int vfio_noiommu_attach_group(void *iommu_data,
56 struct iommu_group *iommu_group, enum vfio_group_type type)
57{
58 return 0;
59}
60
61static void vfio_noiommu_detach_group(void *iommu_data,
62 struct iommu_group *iommu_group)
63{
64}
65
66static const struct vfio_iommu_driver_ops vfio_noiommu_ops = {
67 .name = "vfio-noiommu",
68 .owner = THIS_MODULE,
69 .open = vfio_noiommu_open,
70 .release = vfio_noiommu_release,
71 .ioctl = vfio_noiommu_ioctl,
72 .attach_group = vfio_noiommu_attach_group,
73 .detach_group = vfio_noiommu_detach_group,
74};
75
76/*
77 * Only noiommu containers can use vfio-noiommu and noiommu containers can only
78 * use vfio-noiommu.
79 */
80static bool vfio_iommu_driver_allowed(struct vfio_container *container,
81 const struct vfio_iommu_driver *driver)
82{
83 if (!IS_ENABLED(CONFIG_VFIO_NOIOMMU))
84 return true;
85 return container->noiommu == (driver->ops == &vfio_noiommu_ops);
86}
87
88/*
89 * IOMMU driver registration
90 */
91int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops)
92{
93 struct vfio_iommu_driver *driver, *tmp;
94
95 if (WARN_ON(!ops->register_device != !ops->unregister_device))
96 return -EINVAL;
97
98 driver = kzalloc(sizeof(*driver), GFP_KERNEL);
99 if (!driver)
100 return -ENOMEM;
101
102 driver->ops = ops;
103
104 mutex_lock(&vfio.iommu_drivers_lock);
105
106 /* Check for duplicates */
107 list_for_each_entry(tmp, &vfio.iommu_drivers_list, vfio_next) {
108 if (tmp->ops == ops) {
109 mutex_unlock(&vfio.iommu_drivers_lock);
110 kfree(driver);
111 return -EINVAL;
112 }
113 }
114
115 list_add(&driver->vfio_next, &vfio.iommu_drivers_list);
116
117 mutex_unlock(&vfio.iommu_drivers_lock);
118
119 return 0;
120}
121EXPORT_SYMBOL_GPL(vfio_register_iommu_driver);
122
123void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops)
124{
125 struct vfio_iommu_driver *driver;
126
127 mutex_lock(&vfio.iommu_drivers_lock);
128 list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
129 if (driver->ops == ops) {
130 list_del(&driver->vfio_next);
131 mutex_unlock(&vfio.iommu_drivers_lock);
132 kfree(driver);
133 return;
134 }
135 }
136 mutex_unlock(&vfio.iommu_drivers_lock);
137}
138EXPORT_SYMBOL_GPL(vfio_unregister_iommu_driver);
139
140/*
141 * Container objects - containers are created when /dev/vfio/vfio is
142 * opened, but their lifecycle extends until the last user is done, so
143 * it's freed via kref. Must support container/group/device being
144 * closed in any order.
145 */
146static void vfio_container_release(struct kref *kref)
147{
148 struct vfio_container *container;
149 container = container_of(kref, struct vfio_container, kref);
150
151 kfree(container);
152}
153
154static void vfio_container_get(struct vfio_container *container)
155{
156 kref_get(&container->kref);
157}
158
159static void vfio_container_put(struct vfio_container *container)
160{
161 kref_put(&container->kref, vfio_container_release);
162}
163
164void vfio_device_container_register(struct vfio_device *device)
165{
166 struct vfio_iommu_driver *iommu_driver =
167 device->group->container->iommu_driver;
168
169 if (iommu_driver && iommu_driver->ops->register_device)
170 iommu_driver->ops->register_device(
171 device->group->container->iommu_data, device);
172}
173
174void vfio_device_container_unregister(struct vfio_device *device)
175{
176 struct vfio_iommu_driver *iommu_driver =
177 device->group->container->iommu_driver;
178
179 if (iommu_driver && iommu_driver->ops->unregister_device)
180 iommu_driver->ops->unregister_device(
181 device->group->container->iommu_data, device);
182}
183
184static long
185vfio_container_ioctl_check_extension(struct vfio_container *container,
186 unsigned long arg)
187{
188 struct vfio_iommu_driver *driver;
189 long ret = 0;
190
191 down_read(&container->group_lock);
192
193 driver = container->iommu_driver;
194
195 switch (arg) {
196 /* No base extensions yet */
197 default:
198 /*
199 * If no driver is set, poll all registered drivers for
200 * extensions and return the first positive result. If
201 * a driver is already set, further queries will be passed
202 * only to that driver.
203 */
204 if (!driver) {
205 mutex_lock(&vfio.iommu_drivers_lock);
206 list_for_each_entry(driver, &vfio.iommu_drivers_list,
207 vfio_next) {
208
209 if (!list_empty(&container->group_list) &&
210 !vfio_iommu_driver_allowed(container,
211 driver))
212 continue;
213 if (!try_module_get(driver->ops->owner))
214 continue;
215
216 ret = driver->ops->ioctl(NULL,
217 VFIO_CHECK_EXTENSION,
218 arg);
219 module_put(driver->ops->owner);
220 if (ret > 0)
221 break;
222 }
223 mutex_unlock(&vfio.iommu_drivers_lock);
224 } else
225 ret = driver->ops->ioctl(container->iommu_data,
226 VFIO_CHECK_EXTENSION, arg);
227 }
228
229 up_read(&container->group_lock);
230
231 return ret;
232}
233
234/* hold write lock on container->group_lock */
235static int __vfio_container_attach_groups(struct vfio_container *container,
236 struct vfio_iommu_driver *driver,
237 void *data)
238{
239 struct vfio_group *group;
240 int ret = -ENODEV;
241
242 list_for_each_entry(group, &container->group_list, container_next) {
243 ret = driver->ops->attach_group(data, group->iommu_group,
244 group->type);
245 if (ret)
246 goto unwind;
247 }
248
249 return ret;
250
251unwind:
252 list_for_each_entry_continue_reverse(group, &container->group_list,
253 container_next) {
254 driver->ops->detach_group(data, group->iommu_group);
255 }
256
257 return ret;
258}
259
260static long vfio_ioctl_set_iommu(struct vfio_container *container,
261 unsigned long arg)
262{
263 struct vfio_iommu_driver *driver;
264 long ret = -ENODEV;
265
266 down_write(&container->group_lock);
267
268 /*
269 * The container is designed to be an unprivileged interface while
270 * the group can be assigned to specific users. Therefore, only by
271 * adding a group to a container does the user get the privilege of
272 * enabling the iommu, which may allocate finite resources. There
273 * is no unset_iommu, but by removing all the groups from a container,
274 * the container is deprivileged and returns to an unset state.
275 */
276 if (list_empty(&container->group_list) || container->iommu_driver) {
277 up_write(&container->group_lock);
278 return -EINVAL;
279 }
280
281 mutex_lock(&vfio.iommu_drivers_lock);
282 list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
283 void *data;
284
285 if (!vfio_iommu_driver_allowed(container, driver))
286 continue;
287 if (!try_module_get(driver->ops->owner))
288 continue;
289
290 /*
291 * The arg magic for SET_IOMMU is the same as CHECK_EXTENSION,
292 * so test which iommu driver reported support for this
293 * extension and call open on them. We also pass them the
294 * magic, allowing a single driver to support multiple
295 * interfaces if they'd like.
296 */
297 if (driver->ops->ioctl(NULL, VFIO_CHECK_EXTENSION, arg) <= 0) {
298 module_put(driver->ops->owner);
299 continue;
300 }
301
302 data = driver->ops->open(arg);
303 if (IS_ERR(data)) {
304 ret = PTR_ERR(data);
305 module_put(driver->ops->owner);
306 continue;
307 }
308
309 ret = __vfio_container_attach_groups(container, driver, data);
310 if (ret) {
311 driver->ops->release(data);
312 module_put(driver->ops->owner);
313 continue;
314 }
315
316 container->iommu_driver = driver;
317 container->iommu_data = data;
318 break;
319 }
320
321 mutex_unlock(&vfio.iommu_drivers_lock);
322 up_write(&container->group_lock);
323
324 return ret;
325}
326
327static long vfio_fops_unl_ioctl(struct file *filep,
328 unsigned int cmd, unsigned long arg)
329{
330 struct vfio_container *container = filep->private_data;
331 struct vfio_iommu_driver *driver;
332 void *data;
333 long ret = -EINVAL;
334
335 if (!container)
336 return ret;
337
338 switch (cmd) {
339 case VFIO_GET_API_VERSION:
340 ret = VFIO_API_VERSION;
341 break;
342 case VFIO_CHECK_EXTENSION:
343 ret = vfio_container_ioctl_check_extension(container, arg);
344 break;
345 case VFIO_SET_IOMMU:
346 ret = vfio_ioctl_set_iommu(container, arg);
347 break;
348 default:
349 driver = container->iommu_driver;
350 data = container->iommu_data;
351
352 if (driver) /* passthrough all unrecognized ioctls */
353 ret = driver->ops->ioctl(data, cmd, arg);
354 }
355
356 return ret;
357}
358
359static int vfio_fops_open(struct inode *inode, struct file *filep)
360{
361 struct vfio_container *container;
362
363 container = kzalloc(sizeof(*container), GFP_KERNEL_ACCOUNT);
364 if (!container)
365 return -ENOMEM;
366
367 INIT_LIST_HEAD(&container->group_list);
368 init_rwsem(&container->group_lock);
369 kref_init(&container->kref);
370
371 filep->private_data = container;
372
373 return 0;
374}
375
376static int vfio_fops_release(struct inode *inode, struct file *filep)
377{
378 struct vfio_container *container = filep->private_data;
379
380 filep->private_data = NULL;
381
382 vfio_container_put(container);
383
384 return 0;
385}
386
387static const struct file_operations vfio_fops = {
388 .owner = THIS_MODULE,
389 .open = vfio_fops_open,
390 .release = vfio_fops_release,
391 .unlocked_ioctl = vfio_fops_unl_ioctl,
392 .compat_ioctl = compat_ptr_ioctl,
393};
394
395struct vfio_container *vfio_container_from_file(struct file *file)
396{
397 struct vfio_container *container;
398
399 /* Sanity check, is this really our fd? */
400 if (file->f_op != &vfio_fops)
401 return NULL;
402
403 container = file->private_data;
404 WARN_ON(!container); /* fget ensures we don't race vfio_release */
405 return container;
406}
407
408static struct miscdevice vfio_dev = {
409 .minor = VFIO_MINOR,
410 .name = "vfio",
411 .fops = &vfio_fops,
412 .nodename = "vfio/vfio",
413 .mode = S_IRUGO | S_IWUGO,
414};
415
416int vfio_container_attach_group(struct vfio_container *container,
417 struct vfio_group *group)
418{
419 struct vfio_iommu_driver *driver;
420 int ret = 0;
421
422 lockdep_assert_held(&group->group_lock);
423
424 if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO))
425 return -EPERM;
426
427 down_write(&container->group_lock);
428
429 /* Real groups and fake groups cannot mix */
430 if (!list_empty(&container->group_list) &&
431 container->noiommu != (group->type == VFIO_NO_IOMMU)) {
432 ret = -EPERM;
433 goto out_unlock_container;
434 }
435
436 if (group->type == VFIO_IOMMU) {
437 ret = iommu_group_claim_dma_owner(group->iommu_group, group);
438 if (ret)
439 goto out_unlock_container;
440 }
441
442 driver = container->iommu_driver;
443 if (driver) {
444 ret = driver->ops->attach_group(container->iommu_data,
445 group->iommu_group,
446 group->type);
447 if (ret) {
448 if (group->type == VFIO_IOMMU)
449 iommu_group_release_dma_owner(
450 group->iommu_group);
451 goto out_unlock_container;
452 }
453 }
454
455 group->container = container;
456 group->container_users = 1;
457 container->noiommu = (group->type == VFIO_NO_IOMMU);
458 list_add(&group->container_next, &container->group_list);
459
460 /* Get a reference on the container and mark a user within the group */
461 vfio_container_get(container);
462
463out_unlock_container:
464 up_write(&container->group_lock);
465 return ret;
466}
467
468void vfio_group_detach_container(struct vfio_group *group)
469{
470 struct vfio_container *container = group->container;
471 struct vfio_iommu_driver *driver;
472
473 lockdep_assert_held(&group->group_lock);
474 WARN_ON(group->container_users != 1);
475
476 down_write(&container->group_lock);
477
478 driver = container->iommu_driver;
479 if (driver)
480 driver->ops->detach_group(container->iommu_data,
481 group->iommu_group);
482
483 if (group->type == VFIO_IOMMU)
484 iommu_group_release_dma_owner(group->iommu_group);
485
486 group->container = NULL;
487 group->container_users = 0;
488 list_del(&group->container_next);
489
490 /* Detaching the last group deprivileges a container, remove iommu */
491 if (driver && list_empty(&container->group_list)) {
492 driver->ops->release(container->iommu_data);
493 module_put(driver->ops->owner);
494 container->iommu_driver = NULL;
495 container->iommu_data = NULL;
496 }
497
498 up_write(&container->group_lock);
499
500 vfio_container_put(container);
501}
502
503int vfio_group_use_container(struct vfio_group *group)
504{
505 lockdep_assert_held(&group->group_lock);
506
507 /*
508 * The container fd has been assigned with VFIO_GROUP_SET_CONTAINER but
509 * VFIO_SET_IOMMU hasn't been done yet.
510 */
511 if (!group->container->iommu_driver)
512 return -EINVAL;
513
514 if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO))
515 return -EPERM;
516
517 get_file(group->opened_file);
518 group->container_users++;
519 return 0;
520}
521
522void vfio_group_unuse_container(struct vfio_group *group)
523{
524 lockdep_assert_held(&group->group_lock);
525
526 WARN_ON(group->container_users <= 1);
527 group->container_users--;
528 fput(group->opened_file);
529}
530
531int vfio_device_container_pin_pages(struct vfio_device *device,
532 dma_addr_t iova, int npage,
533 int prot, struct page **pages)
534{
535 struct vfio_container *container = device->group->container;
536 struct iommu_group *iommu_group = device->group->iommu_group;
537 struct vfio_iommu_driver *driver = container->iommu_driver;
538
539 if (npage > VFIO_PIN_PAGES_MAX_ENTRIES)
540 return -E2BIG;
541
542 if (unlikely(!driver || !driver->ops->pin_pages))
543 return -ENOTTY;
544 return driver->ops->pin_pages(container->iommu_data, iommu_group, iova,
545 npage, prot, pages);
546}
547
548void vfio_device_container_unpin_pages(struct vfio_device *device,
549 dma_addr_t iova, int npage)
550{
551 struct vfio_container *container = device->group->container;
552
553 if (WARN_ON(npage <= 0 || npage > VFIO_PIN_PAGES_MAX_ENTRIES))
554 return;
555
556 container->iommu_driver->ops->unpin_pages(container->iommu_data, iova,
557 npage);
558}
559
560int vfio_device_container_dma_rw(struct vfio_device *device,
561 dma_addr_t iova, void *data,
562 size_t len, bool write)
563{
564 struct vfio_container *container = device->group->container;
565 struct vfio_iommu_driver *driver = container->iommu_driver;
566
567 if (unlikely(!driver || !driver->ops->dma_rw))
568 return -ENOTTY;
569 return driver->ops->dma_rw(container->iommu_data, iova, data, len,
570 write);
571}
572
573int __init vfio_container_init(void)
574{
575 int ret;
576
577 mutex_init(&vfio.iommu_drivers_lock);
578 INIT_LIST_HEAD(&vfio.iommu_drivers_list);
579
580 ret = misc_register(&vfio_dev);
581 if (ret) {
582 pr_err("vfio: misc device register failed\n");
583 return ret;
584 }
585
586 if (IS_ENABLED(CONFIG_VFIO_NOIOMMU)) {
587 ret = vfio_register_iommu_driver(&vfio_noiommu_ops);
588 if (ret)
589 goto err_misc;
590 }
591 return 0;
592
593err_misc:
594 misc_deregister(&vfio_dev);
595 return ret;
596}
597
598void vfio_container_cleanup(void)
599{
600 if (IS_ENABLED(CONFIG_VFIO_NOIOMMU))
601 vfio_unregister_iommu_driver(&vfio_noiommu_ops);
602 misc_deregister(&vfio_dev);
603 mutex_destroy(&vfio.iommu_drivers_lock);
604}
605
606MODULE_ALIAS_MISCDEV(VFIO_MINOR);
607MODULE_ALIAS("devname:vfio/vfio");
1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Copyright (C) 2012 Red Hat, Inc. All rights reserved.
4 *
5 * VFIO container (/dev/vfio/vfio)
6 */
7#include <linux/file.h>
8#include <linux/slab.h>
9#include <linux/fs.h>
10#include <linux/capability.h>
11#include <linux/iommu.h>
12#include <linux/miscdevice.h>
13#include <linux/vfio.h>
14#include <uapi/linux/vfio.h>
15
16#include "vfio.h"
17
18struct vfio_container {
19 struct kref kref;
20 struct list_head group_list;
21 struct rw_semaphore group_lock;
22 struct vfio_iommu_driver *iommu_driver;
23 void *iommu_data;
24 bool noiommu;
25};
26
27static struct vfio {
28 struct list_head iommu_drivers_list;
29 struct mutex iommu_drivers_lock;
30} vfio;
31
32#ifdef CONFIG_VFIO_NOIOMMU
33bool vfio_noiommu __read_mostly;
34module_param_named(enable_unsafe_noiommu_mode,
35 vfio_noiommu, bool, S_IRUGO | S_IWUSR);
36MODULE_PARM_DESC(enable_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode. This mode provides no device isolation, no DMA translation, no host kernel protection, cannot be used for device assignment to virtual machines, requires RAWIO permissions, and will taint the kernel. If you do not know what this is for, step away. (default: false)");
37#endif
38
39static void *vfio_noiommu_open(unsigned long arg)
40{
41 if (arg != VFIO_NOIOMMU_IOMMU)
42 return ERR_PTR(-EINVAL);
43 if (!capable(CAP_SYS_RAWIO))
44 return ERR_PTR(-EPERM);
45
46 return NULL;
47}
48
49static void vfio_noiommu_release(void *iommu_data)
50{
51}
52
53static long vfio_noiommu_ioctl(void *iommu_data,
54 unsigned int cmd, unsigned long arg)
55{
56 if (cmd == VFIO_CHECK_EXTENSION)
57 return vfio_noiommu && (arg == VFIO_NOIOMMU_IOMMU) ? 1 : 0;
58
59 return -ENOTTY;
60}
61
62static int vfio_noiommu_attach_group(void *iommu_data,
63 struct iommu_group *iommu_group, enum vfio_group_type type)
64{
65 return 0;
66}
67
68static void vfio_noiommu_detach_group(void *iommu_data,
69 struct iommu_group *iommu_group)
70{
71}
72
73static const struct vfio_iommu_driver_ops vfio_noiommu_ops = {
74 .name = "vfio-noiommu",
75 .owner = THIS_MODULE,
76 .open = vfio_noiommu_open,
77 .release = vfio_noiommu_release,
78 .ioctl = vfio_noiommu_ioctl,
79 .attach_group = vfio_noiommu_attach_group,
80 .detach_group = vfio_noiommu_detach_group,
81};
82
83/*
84 * Only noiommu containers can use vfio-noiommu and noiommu containers can only
85 * use vfio-noiommu.
86 */
87static bool vfio_iommu_driver_allowed(struct vfio_container *container,
88 const struct vfio_iommu_driver *driver)
89{
90 if (!IS_ENABLED(CONFIG_VFIO_NOIOMMU))
91 return true;
92 return container->noiommu == (driver->ops == &vfio_noiommu_ops);
93}
94
95/*
96 * IOMMU driver registration
97 */
98int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops)
99{
100 struct vfio_iommu_driver *driver, *tmp;
101
102 if (WARN_ON(!ops->register_device != !ops->unregister_device))
103 return -EINVAL;
104
105 driver = kzalloc(sizeof(*driver), GFP_KERNEL);
106 if (!driver)
107 return -ENOMEM;
108
109 driver->ops = ops;
110
111 mutex_lock(&vfio.iommu_drivers_lock);
112
113 /* Check for duplicates */
114 list_for_each_entry(tmp, &vfio.iommu_drivers_list, vfio_next) {
115 if (tmp->ops == ops) {
116 mutex_unlock(&vfio.iommu_drivers_lock);
117 kfree(driver);
118 return -EINVAL;
119 }
120 }
121
122 list_add(&driver->vfio_next, &vfio.iommu_drivers_list);
123
124 mutex_unlock(&vfio.iommu_drivers_lock);
125
126 return 0;
127}
128EXPORT_SYMBOL_GPL(vfio_register_iommu_driver);
129
130void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops)
131{
132 struct vfio_iommu_driver *driver;
133
134 mutex_lock(&vfio.iommu_drivers_lock);
135 list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
136 if (driver->ops == ops) {
137 list_del(&driver->vfio_next);
138 mutex_unlock(&vfio.iommu_drivers_lock);
139 kfree(driver);
140 return;
141 }
142 }
143 mutex_unlock(&vfio.iommu_drivers_lock);
144}
145EXPORT_SYMBOL_GPL(vfio_unregister_iommu_driver);
146
147/*
148 * Container objects - containers are created when /dev/vfio/vfio is
149 * opened, but their lifecycle extends until the last user is done, so
150 * it's freed via kref. Must support container/group/device being
151 * closed in any order.
152 */
153static void vfio_container_release(struct kref *kref)
154{
155 struct vfio_container *container;
156 container = container_of(kref, struct vfio_container, kref);
157
158 kfree(container);
159}
160
161static void vfio_container_get(struct vfio_container *container)
162{
163 kref_get(&container->kref);
164}
165
166static void vfio_container_put(struct vfio_container *container)
167{
168 kref_put(&container->kref, vfio_container_release);
169}
170
171void vfio_device_container_register(struct vfio_device *device)
172{
173 struct vfio_iommu_driver *iommu_driver =
174 device->group->container->iommu_driver;
175
176 if (iommu_driver && iommu_driver->ops->register_device)
177 iommu_driver->ops->register_device(
178 device->group->container->iommu_data, device);
179}
180
181void vfio_device_container_unregister(struct vfio_device *device)
182{
183 struct vfio_iommu_driver *iommu_driver =
184 device->group->container->iommu_driver;
185
186 if (iommu_driver && iommu_driver->ops->unregister_device)
187 iommu_driver->ops->unregister_device(
188 device->group->container->iommu_data, device);
189}
190
191static long
192vfio_container_ioctl_check_extension(struct vfio_container *container,
193 unsigned long arg)
194{
195 struct vfio_iommu_driver *driver;
196 long ret = 0;
197
198 down_read(&container->group_lock);
199
200 driver = container->iommu_driver;
201
202 switch (arg) {
203 /* No base extensions yet */
204 default:
205 /*
206 * If no driver is set, poll all registered drivers for
207 * extensions and return the first positive result. If
208 * a driver is already set, further queries will be passed
209 * only to that driver.
210 */
211 if (!driver) {
212 mutex_lock(&vfio.iommu_drivers_lock);
213 list_for_each_entry(driver, &vfio.iommu_drivers_list,
214 vfio_next) {
215
216 if (!list_empty(&container->group_list) &&
217 !vfio_iommu_driver_allowed(container,
218 driver))
219 continue;
220 if (!try_module_get(driver->ops->owner))
221 continue;
222
223 ret = driver->ops->ioctl(NULL,
224 VFIO_CHECK_EXTENSION,
225 arg);
226 module_put(driver->ops->owner);
227 if (ret > 0)
228 break;
229 }
230 mutex_unlock(&vfio.iommu_drivers_lock);
231 } else
232 ret = driver->ops->ioctl(container->iommu_data,
233 VFIO_CHECK_EXTENSION, arg);
234 }
235
236 up_read(&container->group_lock);
237
238 return ret;
239}
240
241/* hold write lock on container->group_lock */
242static int __vfio_container_attach_groups(struct vfio_container *container,
243 struct vfio_iommu_driver *driver,
244 void *data)
245{
246 struct vfio_group *group;
247 int ret = -ENODEV;
248
249 list_for_each_entry(group, &container->group_list, container_next) {
250 ret = driver->ops->attach_group(data, group->iommu_group,
251 group->type);
252 if (ret)
253 goto unwind;
254 }
255
256 return ret;
257
258unwind:
259 list_for_each_entry_continue_reverse(group, &container->group_list,
260 container_next) {
261 driver->ops->detach_group(data, group->iommu_group);
262 }
263
264 return ret;
265}
266
267static long vfio_ioctl_set_iommu(struct vfio_container *container,
268 unsigned long arg)
269{
270 struct vfio_iommu_driver *driver;
271 long ret = -ENODEV;
272
273 down_write(&container->group_lock);
274
275 /*
276 * The container is designed to be an unprivileged interface while
277 * the group can be assigned to specific users. Therefore, only by
278 * adding a group to a container does the user get the privilege of
279 * enabling the iommu, which may allocate finite resources. There
280 * is no unset_iommu, but by removing all the groups from a container,
281 * the container is deprivileged and returns to an unset state.
282 */
283 if (list_empty(&container->group_list) || container->iommu_driver) {
284 up_write(&container->group_lock);
285 return -EINVAL;
286 }
287
288 mutex_lock(&vfio.iommu_drivers_lock);
289 list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
290 void *data;
291
292 if (!vfio_iommu_driver_allowed(container, driver))
293 continue;
294 if (!try_module_get(driver->ops->owner))
295 continue;
296
297 /*
298 * The arg magic for SET_IOMMU is the same as CHECK_EXTENSION,
299 * so test which iommu driver reported support for this
300 * extension and call open on them. We also pass them the
301 * magic, allowing a single driver to support multiple
302 * interfaces if they'd like.
303 */
304 if (driver->ops->ioctl(NULL, VFIO_CHECK_EXTENSION, arg) <= 0) {
305 module_put(driver->ops->owner);
306 continue;
307 }
308
309 data = driver->ops->open(arg);
310 if (IS_ERR(data)) {
311 ret = PTR_ERR(data);
312 module_put(driver->ops->owner);
313 continue;
314 }
315
316 ret = __vfio_container_attach_groups(container, driver, data);
317 if (ret) {
318 driver->ops->release(data);
319 module_put(driver->ops->owner);
320 continue;
321 }
322
323 container->iommu_driver = driver;
324 container->iommu_data = data;
325 break;
326 }
327
328 mutex_unlock(&vfio.iommu_drivers_lock);
329 up_write(&container->group_lock);
330
331 return ret;
332}
333
334static long vfio_fops_unl_ioctl(struct file *filep,
335 unsigned int cmd, unsigned long arg)
336{
337 struct vfio_container *container = filep->private_data;
338 struct vfio_iommu_driver *driver;
339 void *data;
340 long ret = -EINVAL;
341
342 if (!container)
343 return ret;
344
345 switch (cmd) {
346 case VFIO_GET_API_VERSION:
347 ret = VFIO_API_VERSION;
348 break;
349 case VFIO_CHECK_EXTENSION:
350 ret = vfio_container_ioctl_check_extension(container, arg);
351 break;
352 case VFIO_SET_IOMMU:
353 ret = vfio_ioctl_set_iommu(container, arg);
354 break;
355 default:
356 driver = container->iommu_driver;
357 data = container->iommu_data;
358
359 if (driver) /* passthrough all unrecognized ioctls */
360 ret = driver->ops->ioctl(data, cmd, arg);
361 }
362
363 return ret;
364}
365
366static int vfio_fops_open(struct inode *inode, struct file *filep)
367{
368 struct vfio_container *container;
369
370 container = kzalloc(sizeof(*container), GFP_KERNEL);
371 if (!container)
372 return -ENOMEM;
373
374 INIT_LIST_HEAD(&container->group_list);
375 init_rwsem(&container->group_lock);
376 kref_init(&container->kref);
377
378 filep->private_data = container;
379
380 return 0;
381}
382
383static int vfio_fops_release(struct inode *inode, struct file *filep)
384{
385 struct vfio_container *container = filep->private_data;
386 struct vfio_iommu_driver *driver = container->iommu_driver;
387
388 if (driver && driver->ops->notify)
389 driver->ops->notify(container->iommu_data,
390 VFIO_IOMMU_CONTAINER_CLOSE);
391
392 filep->private_data = NULL;
393
394 vfio_container_put(container);
395
396 return 0;
397}
398
399static const struct file_operations vfio_fops = {
400 .owner = THIS_MODULE,
401 .open = vfio_fops_open,
402 .release = vfio_fops_release,
403 .unlocked_ioctl = vfio_fops_unl_ioctl,
404 .compat_ioctl = compat_ptr_ioctl,
405};
406
407struct vfio_container *vfio_container_from_file(struct file *file)
408{
409 struct vfio_container *container;
410
411 /* Sanity check, is this really our fd? */
412 if (file->f_op != &vfio_fops)
413 return NULL;
414
415 container = file->private_data;
416 WARN_ON(!container); /* fget ensures we don't race vfio_release */
417 return container;
418}
419
420static struct miscdevice vfio_dev = {
421 .minor = VFIO_MINOR,
422 .name = "vfio",
423 .fops = &vfio_fops,
424 .nodename = "vfio/vfio",
425 .mode = S_IRUGO | S_IWUGO,
426};
427
428int vfio_container_attach_group(struct vfio_container *container,
429 struct vfio_group *group)
430{
431 struct vfio_iommu_driver *driver;
432 int ret = 0;
433
434 lockdep_assert_held(&group->group_lock);
435
436 if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO))
437 return -EPERM;
438
439 down_write(&container->group_lock);
440
441 /* Real groups and fake groups cannot mix */
442 if (!list_empty(&container->group_list) &&
443 container->noiommu != (group->type == VFIO_NO_IOMMU)) {
444 ret = -EPERM;
445 goto out_unlock_container;
446 }
447
448 if (group->type == VFIO_IOMMU) {
449 ret = iommu_group_claim_dma_owner(group->iommu_group, group);
450 if (ret)
451 goto out_unlock_container;
452 }
453
454 driver = container->iommu_driver;
455 if (driver) {
456 ret = driver->ops->attach_group(container->iommu_data,
457 group->iommu_group,
458 group->type);
459 if (ret) {
460 if (group->type == VFIO_IOMMU)
461 iommu_group_release_dma_owner(
462 group->iommu_group);
463 goto out_unlock_container;
464 }
465 }
466
467 group->container = container;
468 group->container_users = 1;
469 container->noiommu = (group->type == VFIO_NO_IOMMU);
470 list_add(&group->container_next, &container->group_list);
471
472 /* Get a reference on the container and mark a user within the group */
473 vfio_container_get(container);
474
475out_unlock_container:
476 up_write(&container->group_lock);
477 return ret;
478}
479
480void vfio_group_detach_container(struct vfio_group *group)
481{
482 struct vfio_container *container = group->container;
483 struct vfio_iommu_driver *driver;
484
485 lockdep_assert_held(&group->group_lock);
486 WARN_ON(group->container_users != 1);
487
488 down_write(&container->group_lock);
489
490 driver = container->iommu_driver;
491 if (driver)
492 driver->ops->detach_group(container->iommu_data,
493 group->iommu_group);
494
495 if (group->type == VFIO_IOMMU)
496 iommu_group_release_dma_owner(group->iommu_group);
497
498 group->container = NULL;
499 group->container_users = 0;
500 list_del(&group->container_next);
501
502 /* Detaching the last group deprivileges a container, remove iommu */
503 if (driver && list_empty(&container->group_list)) {
504 driver->ops->release(container->iommu_data);
505 module_put(driver->ops->owner);
506 container->iommu_driver = NULL;
507 container->iommu_data = NULL;
508 }
509
510 up_write(&container->group_lock);
511
512 vfio_container_put(container);
513}
514
515int vfio_group_use_container(struct vfio_group *group)
516{
517 lockdep_assert_held(&group->group_lock);
518
519 /*
520 * The container fd has been assigned with VFIO_GROUP_SET_CONTAINER but
521 * VFIO_SET_IOMMU hasn't been done yet.
522 */
523 if (!group->container->iommu_driver)
524 return -EINVAL;
525
526 if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO))
527 return -EPERM;
528
529 get_file(group->opened_file);
530 group->container_users++;
531 return 0;
532}
533
534void vfio_group_unuse_container(struct vfio_group *group)
535{
536 lockdep_assert_held(&group->group_lock);
537
538 WARN_ON(group->container_users <= 1);
539 group->container_users--;
540 fput(group->opened_file);
541}
542
543int vfio_device_container_pin_pages(struct vfio_device *device,
544 dma_addr_t iova, int npage,
545 int prot, struct page **pages)
546{
547 struct vfio_container *container = device->group->container;
548 struct iommu_group *iommu_group = device->group->iommu_group;
549 struct vfio_iommu_driver *driver = container->iommu_driver;
550
551 if (npage > VFIO_PIN_PAGES_MAX_ENTRIES)
552 return -E2BIG;
553
554 if (unlikely(!driver || !driver->ops->pin_pages))
555 return -ENOTTY;
556 return driver->ops->pin_pages(container->iommu_data, iommu_group, iova,
557 npage, prot, pages);
558}
559
560void vfio_device_container_unpin_pages(struct vfio_device *device,
561 dma_addr_t iova, int npage)
562{
563 struct vfio_container *container = device->group->container;
564
565 if (WARN_ON(npage <= 0 || npage > VFIO_PIN_PAGES_MAX_ENTRIES))
566 return;
567
568 container->iommu_driver->ops->unpin_pages(container->iommu_data, iova,
569 npage);
570}
571
572int vfio_device_container_dma_rw(struct vfio_device *device,
573 dma_addr_t iova, void *data,
574 size_t len, bool write)
575{
576 struct vfio_container *container = device->group->container;
577 struct vfio_iommu_driver *driver = container->iommu_driver;
578
579 if (unlikely(!driver || !driver->ops->dma_rw))
580 return -ENOTTY;
581 return driver->ops->dma_rw(container->iommu_data, iova, data, len,
582 write);
583}
584
585int __init vfio_container_init(void)
586{
587 int ret;
588
589 mutex_init(&vfio.iommu_drivers_lock);
590 INIT_LIST_HEAD(&vfio.iommu_drivers_list);
591
592 ret = misc_register(&vfio_dev);
593 if (ret) {
594 pr_err("vfio: misc device register failed\n");
595 return ret;
596 }
597
598 if (IS_ENABLED(CONFIG_VFIO_NOIOMMU)) {
599 ret = vfio_register_iommu_driver(&vfio_noiommu_ops);
600 if (ret)
601 goto err_misc;
602 }
603 return 0;
604
605err_misc:
606 misc_deregister(&vfio_dev);
607 return ret;
608}
609
610void vfio_container_cleanup(void)
611{
612 if (IS_ENABLED(CONFIG_VFIO_NOIOMMU))
613 vfio_unregister_iommu_driver(&vfio_noiommu_ops);
614 misc_deregister(&vfio_dev);
615 mutex_destroy(&vfio.iommu_drivers_lock);
616}
617
618MODULE_ALIAS_MISCDEV(VFIO_MINOR);
619MODULE_ALIAS("devname:vfio/vfio");