]> Git Repo - linux.git/blame - drivers/vfio/container.c
vfio: Rename vfio_device_assign/unassign_container()
[linux.git] / drivers / vfio / container.c
CommitLineData
cdc71fe4
JG
1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Copyright (C) 2012 Red Hat, Inc. All rights reserved.
4 *
5 * VFIO container (/dev/vfio/vfio)
6 */
7#include <linux/file.h>
8#include <linux/slab.h>
9#include <linux/fs.h>
10#include <linux/capability.h>
11#include <linux/iommu.h>
12#include <linux/miscdevice.h>
13#include <linux/vfio.h>
14#include <uapi/linux/vfio.h>
15
16#include "vfio.h"
17
18struct vfio_container {
19 struct kref kref;
20 struct list_head group_list;
21 struct rw_semaphore group_lock;
22 struct vfio_iommu_driver *iommu_driver;
23 void *iommu_data;
24 bool noiommu;
25};
26
27static struct vfio {
28 struct list_head iommu_drivers_list;
29 struct mutex iommu_drivers_lock;
30} vfio;
31
32#ifdef CONFIG_VFIO_NOIOMMU
33bool vfio_noiommu __read_mostly;
34module_param_named(enable_unsafe_noiommu_mode,
35 vfio_noiommu, bool, S_IRUGO | S_IWUSR);
36MODULE_PARM_DESC(enable_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode. This mode provides no device isolation, no DMA translation, no host kernel protection, cannot be used for device assignment to virtual machines, requires RAWIO permissions, and will taint the kernel. If you do not know what this is for, step away. (default: false)");
37#endif
38
39static void *vfio_noiommu_open(unsigned long arg)
40{
41 if (arg != VFIO_NOIOMMU_IOMMU)
42 return ERR_PTR(-EINVAL);
43 if (!capable(CAP_SYS_RAWIO))
44 return ERR_PTR(-EPERM);
45
46 return NULL;
47}
48
49static void vfio_noiommu_release(void *iommu_data)
50{
51}
52
53static long vfio_noiommu_ioctl(void *iommu_data,
54 unsigned int cmd, unsigned long arg)
55{
56 if (cmd == VFIO_CHECK_EXTENSION)
57 return vfio_noiommu && (arg == VFIO_NOIOMMU_IOMMU) ? 1 : 0;
58
59 return -ENOTTY;
60}
61
62static int vfio_noiommu_attach_group(void *iommu_data,
63 struct iommu_group *iommu_group, enum vfio_group_type type)
64{
65 return 0;
66}
67
68static void vfio_noiommu_detach_group(void *iommu_data,
69 struct iommu_group *iommu_group)
70{
71}
72
73static const struct vfio_iommu_driver_ops vfio_noiommu_ops = {
74 .name = "vfio-noiommu",
75 .owner = THIS_MODULE,
76 .open = vfio_noiommu_open,
77 .release = vfio_noiommu_release,
78 .ioctl = vfio_noiommu_ioctl,
79 .attach_group = vfio_noiommu_attach_group,
80 .detach_group = vfio_noiommu_detach_group,
81};
82
83/*
84 * Only noiommu containers can use vfio-noiommu and noiommu containers can only
85 * use vfio-noiommu.
86 */
87static bool vfio_iommu_driver_allowed(struct vfio_container *container,
88 const struct vfio_iommu_driver *driver)
89{
90 if (!IS_ENABLED(CONFIG_VFIO_NOIOMMU))
91 return true;
92 return container->noiommu == (driver->ops == &vfio_noiommu_ops);
93}
94
95/*
96 * IOMMU driver registration
97 */
98int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops)
99{
100 struct vfio_iommu_driver *driver, *tmp;
101
102 if (WARN_ON(!ops->register_device != !ops->unregister_device))
103 return -EINVAL;
104
105 driver = kzalloc(sizeof(*driver), GFP_KERNEL);
106 if (!driver)
107 return -ENOMEM;
108
109 driver->ops = ops;
110
111 mutex_lock(&vfio.iommu_drivers_lock);
112
113 /* Check for duplicates */
114 list_for_each_entry(tmp, &vfio.iommu_drivers_list, vfio_next) {
115 if (tmp->ops == ops) {
116 mutex_unlock(&vfio.iommu_drivers_lock);
117 kfree(driver);
118 return -EINVAL;
119 }
120 }
121
122 list_add(&driver->vfio_next, &vfio.iommu_drivers_list);
123
124 mutex_unlock(&vfio.iommu_drivers_lock);
125
126 return 0;
127}
128EXPORT_SYMBOL_GPL(vfio_register_iommu_driver);
129
130void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops)
131{
132 struct vfio_iommu_driver *driver;
133
134 mutex_lock(&vfio.iommu_drivers_lock);
135 list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
136 if (driver->ops == ops) {
137 list_del(&driver->vfio_next);
138 mutex_unlock(&vfio.iommu_drivers_lock);
139 kfree(driver);
140 return;
141 }
142 }
143 mutex_unlock(&vfio.iommu_drivers_lock);
144}
145EXPORT_SYMBOL_GPL(vfio_unregister_iommu_driver);
146
147/*
148 * Container objects - containers are created when /dev/vfio/vfio is
149 * opened, but their lifecycle extends until the last user is done, so
150 * it's freed via kref. Must support container/group/device being
151 * closed in any order.
152 */
153static void vfio_container_release(struct kref *kref)
154{
155 struct vfio_container *container;
156 container = container_of(kref, struct vfio_container, kref);
157
158 kfree(container);
159}
160
161static void vfio_container_get(struct vfio_container *container)
162{
163 kref_get(&container->kref);
164}
165
166static void vfio_container_put(struct vfio_container *container)
167{
168 kref_put(&container->kref, vfio_container_release);
169}
170
171void vfio_device_container_register(struct vfio_device *device)
172{
173 struct vfio_iommu_driver *iommu_driver =
174 device->group->container->iommu_driver;
175
176 if (iommu_driver && iommu_driver->ops->register_device)
177 iommu_driver->ops->register_device(
178 device->group->container->iommu_data, device);
179}
180
181void vfio_device_container_unregister(struct vfio_device *device)
182{
183 struct vfio_iommu_driver *iommu_driver =
184 device->group->container->iommu_driver;
185
186 if (iommu_driver && iommu_driver->ops->unregister_device)
187 iommu_driver->ops->unregister_device(
188 device->group->container->iommu_data, device);
189}
190
191long vfio_container_ioctl_check_extension(struct vfio_container *container,
192 unsigned long arg)
193{
194 struct vfio_iommu_driver *driver;
195 long ret = 0;
196
197 down_read(&container->group_lock);
198
199 driver = container->iommu_driver;
200
201 switch (arg) {
202 /* No base extensions yet */
203 default:
204 /*
205 * If no driver is set, poll all registered drivers for
206 * extensions and return the first positive result. If
207 * a driver is already set, further queries will be passed
208 * only to that driver.
209 */
210 if (!driver) {
211 mutex_lock(&vfio.iommu_drivers_lock);
212 list_for_each_entry(driver, &vfio.iommu_drivers_list,
213 vfio_next) {
214
215 if (!list_empty(&container->group_list) &&
216 !vfio_iommu_driver_allowed(container,
217 driver))
218 continue;
219 if (!try_module_get(driver->ops->owner))
220 continue;
221
222 ret = driver->ops->ioctl(NULL,
223 VFIO_CHECK_EXTENSION,
224 arg);
225 module_put(driver->ops->owner);
226 if (ret > 0)
227 break;
228 }
229 mutex_unlock(&vfio.iommu_drivers_lock);
230 } else
231 ret = driver->ops->ioctl(container->iommu_data,
232 VFIO_CHECK_EXTENSION, arg);
233 }
234
235 up_read(&container->group_lock);
236
237 return ret;
238}
239
240/* hold write lock on container->group_lock */
241static int __vfio_container_attach_groups(struct vfio_container *container,
242 struct vfio_iommu_driver *driver,
243 void *data)
244{
245 struct vfio_group *group;
246 int ret = -ENODEV;
247
248 list_for_each_entry(group, &container->group_list, container_next) {
249 ret = driver->ops->attach_group(data, group->iommu_group,
250 group->type);
251 if (ret)
252 goto unwind;
253 }
254
255 return ret;
256
257unwind:
258 list_for_each_entry_continue_reverse(group, &container->group_list,
259 container_next) {
260 driver->ops->detach_group(data, group->iommu_group);
261 }
262
263 return ret;
264}
265
266static long vfio_ioctl_set_iommu(struct vfio_container *container,
267 unsigned long arg)
268{
269 struct vfio_iommu_driver *driver;
270 long ret = -ENODEV;
271
272 down_write(&container->group_lock);
273
274 /*
275 * The container is designed to be an unprivileged interface while
276 * the group can be assigned to specific users. Therefore, only by
277 * adding a group to a container does the user get the privilege of
278 * enabling the iommu, which may allocate finite resources. There
279 * is no unset_iommu, but by removing all the groups from a container,
280 * the container is deprivileged and returns to an unset state.
281 */
282 if (list_empty(&container->group_list) || container->iommu_driver) {
283 up_write(&container->group_lock);
284 return -EINVAL;
285 }
286
287 mutex_lock(&vfio.iommu_drivers_lock);
288 list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
289 void *data;
290
291 if (!vfio_iommu_driver_allowed(container, driver))
292 continue;
293 if (!try_module_get(driver->ops->owner))
294 continue;
295
296 /*
297 * The arg magic for SET_IOMMU is the same as CHECK_EXTENSION,
298 * so test which iommu driver reported support for this
299 * extension and call open on them. We also pass them the
300 * magic, allowing a single driver to support multiple
301 * interfaces if they'd like.
302 */
303 if (driver->ops->ioctl(NULL, VFIO_CHECK_EXTENSION, arg) <= 0) {
304 module_put(driver->ops->owner);
305 continue;
306 }
307
308 data = driver->ops->open(arg);
309 if (IS_ERR(data)) {
310 ret = PTR_ERR(data);
311 module_put(driver->ops->owner);
312 continue;
313 }
314
315 ret = __vfio_container_attach_groups(container, driver, data);
316 if (ret) {
317 driver->ops->release(data);
318 module_put(driver->ops->owner);
319 continue;
320 }
321
322 container->iommu_driver = driver;
323 container->iommu_data = data;
324 break;
325 }
326
327 mutex_unlock(&vfio.iommu_drivers_lock);
328 up_write(&container->group_lock);
329
330 return ret;
331}
332
333static long vfio_fops_unl_ioctl(struct file *filep,
334 unsigned int cmd, unsigned long arg)
335{
336 struct vfio_container *container = filep->private_data;
337 struct vfio_iommu_driver *driver;
338 void *data;
339 long ret = -EINVAL;
340
341 if (!container)
342 return ret;
343
344 switch (cmd) {
345 case VFIO_GET_API_VERSION:
346 ret = VFIO_API_VERSION;
347 break;
348 case VFIO_CHECK_EXTENSION:
349 ret = vfio_container_ioctl_check_extension(container, arg);
350 break;
351 case VFIO_SET_IOMMU:
352 ret = vfio_ioctl_set_iommu(container, arg);
353 break;
354 default:
355 driver = container->iommu_driver;
356 data = container->iommu_data;
357
358 if (driver) /* passthrough all unrecognized ioctls */
359 ret = driver->ops->ioctl(data, cmd, arg);
360 }
361
362 return ret;
363}
364
365static int vfio_fops_open(struct inode *inode, struct file *filep)
366{
367 struct vfio_container *container;
368
369 container = kzalloc(sizeof(*container), GFP_KERNEL);
370 if (!container)
371 return -ENOMEM;
372
373 INIT_LIST_HEAD(&container->group_list);
374 init_rwsem(&container->group_lock);
375 kref_init(&container->kref);
376
377 filep->private_data = container;
378
379 return 0;
380}
381
382static int vfio_fops_release(struct inode *inode, struct file *filep)
383{
384 struct vfio_container *container = filep->private_data;
385 struct vfio_iommu_driver *driver = container->iommu_driver;
386
387 if (driver && driver->ops->notify)
388 driver->ops->notify(container->iommu_data,
389 VFIO_IOMMU_CONTAINER_CLOSE);
390
391 filep->private_data = NULL;
392
393 vfio_container_put(container);
394
395 return 0;
396}
397
398static const struct file_operations vfio_fops = {
399 .owner = THIS_MODULE,
400 .open = vfio_fops_open,
401 .release = vfio_fops_release,
402 .unlocked_ioctl = vfio_fops_unl_ioctl,
403 .compat_ioctl = compat_ptr_ioctl,
404};
405
406struct vfio_container *vfio_container_from_file(struct file *file)
407{
408 struct vfio_container *container;
409
410 /* Sanity check, is this really our fd? */
411 if (file->f_op != &vfio_fops)
412 return NULL;
413
414 container = file->private_data;
415 WARN_ON(!container); /* fget ensures we don't race vfio_release */
416 return container;
417}
418
419static struct miscdevice vfio_dev = {
420 .minor = VFIO_MINOR,
421 .name = "vfio",
422 .fops = &vfio_fops,
423 .nodename = "vfio/vfio",
424 .mode = S_IRUGO | S_IWUGO,
425};
426
427int vfio_container_attach_group(struct vfio_container *container,
428 struct vfio_group *group)
429{
430 struct vfio_iommu_driver *driver;
431 int ret = 0;
432
c82e81ab 433 lockdep_assert_held(&group->group_lock);
cdc71fe4
JG
434
435 if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO))
436 return -EPERM;
437
438 down_write(&container->group_lock);
439
440 /* Real groups and fake groups cannot mix */
441 if (!list_empty(&container->group_list) &&
442 container->noiommu != (group->type == VFIO_NO_IOMMU)) {
443 ret = -EPERM;
444 goto out_unlock_container;
445 }
446
447 if (group->type == VFIO_IOMMU) {
448 ret = iommu_group_claim_dma_owner(group->iommu_group, group);
449 if (ret)
450 goto out_unlock_container;
451 }
452
453 driver = container->iommu_driver;
454 if (driver) {
455 ret = driver->ops->attach_group(container->iommu_data,
456 group->iommu_group,
457 group->type);
458 if (ret) {
459 if (group->type == VFIO_IOMMU)
460 iommu_group_release_dma_owner(
461 group->iommu_group);
462 goto out_unlock_container;
463 }
464 }
465
466 group->container = container;
467 group->container_users = 1;
468 container->noiommu = (group->type == VFIO_NO_IOMMU);
469 list_add(&group->container_next, &container->group_list);
470
471 /* Get a reference on the container and mark a user within the group */
472 vfio_container_get(container);
473
474out_unlock_container:
475 up_write(&container->group_lock);
476 return ret;
477}
478
479void vfio_group_detach_container(struct vfio_group *group)
480{
481 struct vfio_container *container = group->container;
482 struct vfio_iommu_driver *driver;
483
c82e81ab 484 lockdep_assert_held(&group->group_lock);
cdc71fe4
JG
485 WARN_ON(group->container_users != 1);
486
487 down_write(&container->group_lock);
488
489 driver = container->iommu_driver;
490 if (driver)
491 driver->ops->detach_group(container->iommu_data,
492 group->iommu_group);
493
494 if (group->type == VFIO_IOMMU)
495 iommu_group_release_dma_owner(group->iommu_group);
496
497 group->container = NULL;
498 group->container_users = 0;
499 list_del(&group->container_next);
500
501 /* Detaching the last group deprivileges a container, remove iommu */
502 if (driver && list_empty(&container->group_list)) {
503 driver->ops->release(container->iommu_data);
504 module_put(driver->ops->owner);
505 container->iommu_driver = NULL;
506 container->iommu_data = NULL;
507 }
508
509 up_write(&container->group_lock);
510
511 vfio_container_put(container);
512}
513
04f930c3 514int vfio_group_use_container(struct vfio_group *group)
cdc71fe4 515{
c82e81ab 516 lockdep_assert_held(&group->group_lock);
cdc71fe4
JG
517
518 if (!group->container || !group->container->iommu_driver ||
519 WARN_ON(!group->container_users))
520 return -EINVAL;
521
522 if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO))
523 return -EPERM;
524
525 get_file(group->opened_file);
526 group->container_users++;
527 return 0;
528}
529
04f930c3 530void vfio_group_unuse_container(struct vfio_group *group)
cdc71fe4 531{
04f930c3 532 lockdep_assert_held(&group->group_lock);
bab6fabc 533
04f930c3
JG
534 WARN_ON(group->container_users <= 1);
535 group->container_users--;
536 fput(group->opened_file);
cdc71fe4
JG
537}
538
539/*
540 * Pin contiguous user pages and return their associated host pages for local
541 * domain only.
542 * @device [in] : device
543 * @iova [in] : starting IOVA of user pages to be pinned.
544 * @npage [in] : count of pages to be pinned. This count should not
545 * be greater than VFIO_PIN_PAGES_MAX_ENTRIES.
546 * @prot [in] : protection flags
547 * @pages[out] : array of host pages
548 * Return error or number of pages pinned.
549 *
550 * A driver may only call this function if the vfio_device was created
551 * by vfio_register_emulated_iommu_dev().
552 */
553int vfio_pin_pages(struct vfio_device *device, dma_addr_t iova,
554 int npage, int prot, struct page **pages)
555{
556 struct vfio_container *container;
557 struct vfio_group *group = device->group;
558 struct vfio_iommu_driver *driver;
559 int ret;
560
561 if (!pages || !npage || !vfio_assert_device_open(device))
562 return -EINVAL;
563
564 if (npage > VFIO_PIN_PAGES_MAX_ENTRIES)
565 return -E2BIG;
566
567 /* group->container cannot change while a vfio device is open */
568 container = group->container;
569 driver = container->iommu_driver;
570 if (likely(driver && driver->ops->pin_pages))
571 ret = driver->ops->pin_pages(container->iommu_data,
572 group->iommu_group, iova,
573 npage, prot, pages);
574 else
575 ret = -ENOTTY;
576
577 return ret;
578}
579EXPORT_SYMBOL(vfio_pin_pages);
580
581/*
582 * Unpin contiguous host pages for local domain only.
583 * @device [in] : device
584 * @iova [in] : starting address of user pages to be unpinned.
585 * @npage [in] : count of pages to be unpinned. This count should not
586 * be greater than VFIO_PIN_PAGES_MAX_ENTRIES.
587 */
588void vfio_unpin_pages(struct vfio_device *device, dma_addr_t iova, int npage)
589{
590 struct vfio_container *container;
591 struct vfio_iommu_driver *driver;
592
593 if (WARN_ON(npage <= 0 || npage > VFIO_PIN_PAGES_MAX_ENTRIES))
594 return;
595
596 if (WARN_ON(!vfio_assert_device_open(device)))
597 return;
598
599 /* group->container cannot change while a vfio device is open */
600 container = device->group->container;
601 driver = container->iommu_driver;
602
603 driver->ops->unpin_pages(container->iommu_data, iova, npage);
604}
605EXPORT_SYMBOL(vfio_unpin_pages);
606
607/*
608 * This interface allows the CPUs to perform some sort of virtual DMA on
609 * behalf of the device.
610 *
611 * CPUs read/write from/into a range of IOVAs pointing to user space memory
612 * into/from a kernel buffer.
613 *
614 * As the read/write of user space memory is conducted via the CPUs and is
615 * not a real device DMA, it is not necessary to pin the user space memory.
616 *
617 * @device [in] : VFIO device
618 * @iova [in] : base IOVA of a user space buffer
619 * @data [in] : pointer to kernel buffer
620 * @len [in] : kernel buffer length
621 * @write : indicate read or write
622 * Return error code on failure or 0 on success.
623 */
624int vfio_dma_rw(struct vfio_device *device, dma_addr_t iova, void *data,
625 size_t len, bool write)
626{
627 struct vfio_container *container;
628 struct vfio_iommu_driver *driver;
629 int ret = 0;
630
631 if (!data || len <= 0 || !vfio_assert_device_open(device))
632 return -EINVAL;
633
634 /* group->container cannot change while a vfio device is open */
635 container = device->group->container;
636 driver = container->iommu_driver;
637
638 if (likely(driver && driver->ops->dma_rw))
639 ret = driver->ops->dma_rw(container->iommu_data,
640 iova, data, len, write);
641 else
642 ret = -ENOTTY;
643 return ret;
644}
645EXPORT_SYMBOL(vfio_dma_rw);
646
647int __init vfio_container_init(void)
648{
649 int ret;
650
651 mutex_init(&vfio.iommu_drivers_lock);
652 INIT_LIST_HEAD(&vfio.iommu_drivers_list);
653
654 ret = misc_register(&vfio_dev);
655 if (ret) {
656 pr_err("vfio: misc device register failed\n");
657 return ret;
658 }
659
660 if (IS_ENABLED(CONFIG_VFIO_NOIOMMU)) {
661 ret = vfio_register_iommu_driver(&vfio_noiommu_ops);
662 if (ret)
663 goto err_misc;
664 }
665 return 0;
666
667err_misc:
668 misc_deregister(&vfio_dev);
669 return ret;
670}
671
672void vfio_container_cleanup(void)
673{
674 if (IS_ENABLED(CONFIG_VFIO_NOIOMMU))
675 vfio_unregister_iommu_driver(&vfio_noiommu_ops);
676 misc_deregister(&vfio_dev);
677 mutex_destroy(&vfio.iommu_drivers_lock);
678}
This page took 0.122025 seconds and 4 git commands to generate.