]>
Commit | Line | Data |
---|---|---|
cdc71fe4 JG |
1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* | |
3 | * Copyright (C) 2012 Red Hat, Inc. All rights reserved. | |
4 | * | |
5 | * VFIO container (/dev/vfio/vfio) | |
6 | */ | |
7 | #include <linux/file.h> | |
8 | #include <linux/slab.h> | |
9 | #include <linux/fs.h> | |
10 | #include <linux/capability.h> | |
11 | #include <linux/iommu.h> | |
12 | #include <linux/miscdevice.h> | |
13 | #include <linux/vfio.h> | |
14 | #include <uapi/linux/vfio.h> | |
15 | ||
16 | #include "vfio.h" | |
17 | ||
18 | struct vfio_container { | |
19 | struct kref kref; | |
20 | struct list_head group_list; | |
21 | struct rw_semaphore group_lock; | |
22 | struct vfio_iommu_driver *iommu_driver; | |
23 | void *iommu_data; | |
24 | bool noiommu; | |
25 | }; | |
26 | ||
27 | static struct vfio { | |
28 | struct list_head iommu_drivers_list; | |
29 | struct mutex iommu_drivers_lock; | |
30 | } vfio; | |
31 | ||
cdc71fe4 JG |
32 | static void *vfio_noiommu_open(unsigned long arg) |
33 | { | |
34 | if (arg != VFIO_NOIOMMU_IOMMU) | |
35 | return ERR_PTR(-EINVAL); | |
36 | if (!capable(CAP_SYS_RAWIO)) | |
37 | return ERR_PTR(-EPERM); | |
38 | ||
39 | return NULL; | |
40 | } | |
41 | ||
42 | static void vfio_noiommu_release(void *iommu_data) | |
43 | { | |
44 | } | |
45 | ||
46 | static long vfio_noiommu_ioctl(void *iommu_data, | |
47 | unsigned int cmd, unsigned long arg) | |
48 | { | |
49 | if (cmd == VFIO_CHECK_EXTENSION) | |
50 | return vfio_noiommu && (arg == VFIO_NOIOMMU_IOMMU) ? 1 : 0; | |
51 | ||
52 | return -ENOTTY; | |
53 | } | |
54 | ||
55 | static int vfio_noiommu_attach_group(void *iommu_data, | |
56 | struct iommu_group *iommu_group, enum vfio_group_type type) | |
57 | { | |
58 | return 0; | |
59 | } | |
60 | ||
61 | static void vfio_noiommu_detach_group(void *iommu_data, | |
62 | struct iommu_group *iommu_group) | |
63 | { | |
64 | } | |
65 | ||
66 | static const struct vfio_iommu_driver_ops vfio_noiommu_ops = { | |
67 | .name = "vfio-noiommu", | |
68 | .owner = THIS_MODULE, | |
69 | .open = vfio_noiommu_open, | |
70 | .release = vfio_noiommu_release, | |
71 | .ioctl = vfio_noiommu_ioctl, | |
72 | .attach_group = vfio_noiommu_attach_group, | |
73 | .detach_group = vfio_noiommu_detach_group, | |
74 | }; | |
75 | ||
76 | /* | |
77 | * Only noiommu containers can use vfio-noiommu and noiommu containers can only | |
78 | * use vfio-noiommu. | |
79 | */ | |
80 | static bool vfio_iommu_driver_allowed(struct vfio_container *container, | |
81 | const struct vfio_iommu_driver *driver) | |
82 | { | |
83 | if (!IS_ENABLED(CONFIG_VFIO_NOIOMMU)) | |
84 | return true; | |
85 | return container->noiommu == (driver->ops == &vfio_noiommu_ops); | |
86 | } | |
87 | ||
88 | /* | |
89 | * IOMMU driver registration | |
90 | */ | |
91 | int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops) | |
92 | { | |
93 | struct vfio_iommu_driver *driver, *tmp; | |
94 | ||
95 | if (WARN_ON(!ops->register_device != !ops->unregister_device)) | |
96 | return -EINVAL; | |
97 | ||
98 | driver = kzalloc(sizeof(*driver), GFP_KERNEL); | |
99 | if (!driver) | |
100 | return -ENOMEM; | |
101 | ||
102 | driver->ops = ops; | |
103 | ||
104 | mutex_lock(&vfio.iommu_drivers_lock); | |
105 | ||
106 | /* Check for duplicates */ | |
107 | list_for_each_entry(tmp, &vfio.iommu_drivers_list, vfio_next) { | |
108 | if (tmp->ops == ops) { | |
109 | mutex_unlock(&vfio.iommu_drivers_lock); | |
110 | kfree(driver); | |
111 | return -EINVAL; | |
112 | } | |
113 | } | |
114 | ||
115 | list_add(&driver->vfio_next, &vfio.iommu_drivers_list); | |
116 | ||
117 | mutex_unlock(&vfio.iommu_drivers_lock); | |
118 | ||
119 | return 0; | |
120 | } | |
121 | EXPORT_SYMBOL_GPL(vfio_register_iommu_driver); | |
122 | ||
123 | void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops) | |
124 | { | |
125 | struct vfio_iommu_driver *driver; | |
126 | ||
127 | mutex_lock(&vfio.iommu_drivers_lock); | |
128 | list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) { | |
129 | if (driver->ops == ops) { | |
130 | list_del(&driver->vfio_next); | |
131 | mutex_unlock(&vfio.iommu_drivers_lock); | |
132 | kfree(driver); | |
133 | return; | |
134 | } | |
135 | } | |
136 | mutex_unlock(&vfio.iommu_drivers_lock); | |
137 | } | |
138 | EXPORT_SYMBOL_GPL(vfio_unregister_iommu_driver); | |
139 | ||
140 | /* | |
141 | * Container objects - containers are created when /dev/vfio/vfio is | |
142 | * opened, but their lifecycle extends until the last user is done, so | |
143 | * it's freed via kref. Must support container/group/device being | |
144 | * closed in any order. | |
145 | */ | |
146 | static void vfio_container_release(struct kref *kref) | |
147 | { | |
148 | struct vfio_container *container; | |
149 | container = container_of(kref, struct vfio_container, kref); | |
150 | ||
151 | kfree(container); | |
152 | } | |
153 | ||
154 | static void vfio_container_get(struct vfio_container *container) | |
155 | { | |
156 | kref_get(&container->kref); | |
157 | } | |
158 | ||
159 | static void vfio_container_put(struct vfio_container *container) | |
160 | { | |
161 | kref_put(&container->kref, vfio_container_release); | |
162 | } | |
163 | ||
164 | void vfio_device_container_register(struct vfio_device *device) | |
165 | { | |
166 | struct vfio_iommu_driver *iommu_driver = | |
167 | device->group->container->iommu_driver; | |
168 | ||
169 | if (iommu_driver && iommu_driver->ops->register_device) | |
170 | iommu_driver->ops->register_device( | |
171 | device->group->container->iommu_data, device); | |
172 | } | |
173 | ||
174 | void vfio_device_container_unregister(struct vfio_device *device) | |
175 | { | |
176 | struct vfio_iommu_driver *iommu_driver = | |
177 | device->group->container->iommu_driver; | |
178 | ||
179 | if (iommu_driver && iommu_driver->ops->unregister_device) | |
180 | iommu_driver->ops->unregister_device( | |
181 | device->group->container->iommu_data, device); | |
182 | } | |
183 | ||
0d8227b6 JG |
184 | static long |
185 | vfio_container_ioctl_check_extension(struct vfio_container *container, | |
186 | unsigned long arg) | |
cdc71fe4 JG |
187 | { |
188 | struct vfio_iommu_driver *driver; | |
189 | long ret = 0; | |
190 | ||
191 | down_read(&container->group_lock); | |
192 | ||
193 | driver = container->iommu_driver; | |
194 | ||
195 | switch (arg) { | |
196 | /* No base extensions yet */ | |
197 | default: | |
198 | /* | |
199 | * If no driver is set, poll all registered drivers for | |
200 | * extensions and return the first positive result. If | |
201 | * a driver is already set, further queries will be passed | |
202 | * only to that driver. | |
203 | */ | |
204 | if (!driver) { | |
205 | mutex_lock(&vfio.iommu_drivers_lock); | |
206 | list_for_each_entry(driver, &vfio.iommu_drivers_list, | |
207 | vfio_next) { | |
208 | ||
209 | if (!list_empty(&container->group_list) && | |
210 | !vfio_iommu_driver_allowed(container, | |
211 | driver)) | |
212 | continue; | |
213 | if (!try_module_get(driver->ops->owner)) | |
214 | continue; | |
215 | ||
216 | ret = driver->ops->ioctl(NULL, | |
217 | VFIO_CHECK_EXTENSION, | |
218 | arg); | |
219 | module_put(driver->ops->owner); | |
220 | if (ret > 0) | |
221 | break; | |
222 | } | |
223 | mutex_unlock(&vfio.iommu_drivers_lock); | |
224 | } else | |
225 | ret = driver->ops->ioctl(container->iommu_data, | |
226 | VFIO_CHECK_EXTENSION, arg); | |
227 | } | |
228 | ||
229 | up_read(&container->group_lock); | |
230 | ||
231 | return ret; | |
232 | } | |
233 | ||
234 | /* hold write lock on container->group_lock */ | |
235 | static int __vfio_container_attach_groups(struct vfio_container *container, | |
236 | struct vfio_iommu_driver *driver, | |
237 | void *data) | |
238 | { | |
239 | struct vfio_group *group; | |
240 | int ret = -ENODEV; | |
241 | ||
242 | list_for_each_entry(group, &container->group_list, container_next) { | |
243 | ret = driver->ops->attach_group(data, group->iommu_group, | |
244 | group->type); | |
245 | if (ret) | |
246 | goto unwind; | |
247 | } | |
248 | ||
249 | return ret; | |
250 | ||
251 | unwind: | |
252 | list_for_each_entry_continue_reverse(group, &container->group_list, | |
253 | container_next) { | |
254 | driver->ops->detach_group(data, group->iommu_group); | |
255 | } | |
256 | ||
257 | return ret; | |
258 | } | |
259 | ||
260 | static long vfio_ioctl_set_iommu(struct vfio_container *container, | |
261 | unsigned long arg) | |
262 | { | |
263 | struct vfio_iommu_driver *driver; | |
264 | long ret = -ENODEV; | |
265 | ||
266 | down_write(&container->group_lock); | |
267 | ||
268 | /* | |
269 | * The container is designed to be an unprivileged interface while | |
270 | * the group can be assigned to specific users. Therefore, only by | |
271 | * adding a group to a container does the user get the privilege of | |
272 | * enabling the iommu, which may allocate finite resources. There | |
273 | * is no unset_iommu, but by removing all the groups from a container, | |
274 | * the container is deprivileged and returns to an unset state. | |
275 | */ | |
276 | if (list_empty(&container->group_list) || container->iommu_driver) { | |
277 | up_write(&container->group_lock); | |
278 | return -EINVAL; | |
279 | } | |
280 | ||
281 | mutex_lock(&vfio.iommu_drivers_lock); | |
282 | list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) { | |
283 | void *data; | |
284 | ||
285 | if (!vfio_iommu_driver_allowed(container, driver)) | |
286 | continue; | |
287 | if (!try_module_get(driver->ops->owner)) | |
288 | continue; | |
289 | ||
290 | /* | |
291 | * The arg magic for SET_IOMMU is the same as CHECK_EXTENSION, | |
292 | * so test which iommu driver reported support for this | |
293 | * extension and call open on them. We also pass them the | |
294 | * magic, allowing a single driver to support multiple | |
295 | * interfaces if they'd like. | |
296 | */ | |
297 | if (driver->ops->ioctl(NULL, VFIO_CHECK_EXTENSION, arg) <= 0) { | |
298 | module_put(driver->ops->owner); | |
299 | continue; | |
300 | } | |
301 | ||
302 | data = driver->ops->open(arg); | |
303 | if (IS_ERR(data)) { | |
304 | ret = PTR_ERR(data); | |
305 | module_put(driver->ops->owner); | |
306 | continue; | |
307 | } | |
308 | ||
309 | ret = __vfio_container_attach_groups(container, driver, data); | |
310 | if (ret) { | |
311 | driver->ops->release(data); | |
312 | module_put(driver->ops->owner); | |
313 | continue; | |
314 | } | |
315 | ||
316 | container->iommu_driver = driver; | |
317 | container->iommu_data = data; | |
318 | break; | |
319 | } | |
320 | ||
321 | mutex_unlock(&vfio.iommu_drivers_lock); | |
322 | up_write(&container->group_lock); | |
323 | ||
324 | return ret; | |
325 | } | |
326 | ||
327 | static long vfio_fops_unl_ioctl(struct file *filep, | |
328 | unsigned int cmd, unsigned long arg) | |
329 | { | |
330 | struct vfio_container *container = filep->private_data; | |
331 | struct vfio_iommu_driver *driver; | |
332 | void *data; | |
333 | long ret = -EINVAL; | |
334 | ||
335 | if (!container) | |
336 | return ret; | |
337 | ||
338 | switch (cmd) { | |
339 | case VFIO_GET_API_VERSION: | |
340 | ret = VFIO_API_VERSION; | |
341 | break; | |
342 | case VFIO_CHECK_EXTENSION: | |
343 | ret = vfio_container_ioctl_check_extension(container, arg); | |
344 | break; | |
345 | case VFIO_SET_IOMMU: | |
346 | ret = vfio_ioctl_set_iommu(container, arg); | |
347 | break; | |
348 | default: | |
349 | driver = container->iommu_driver; | |
350 | data = container->iommu_data; | |
351 | ||
352 | if (driver) /* passthrough all unrecognized ioctls */ | |
353 | ret = driver->ops->ioctl(data, cmd, arg); | |
354 | } | |
355 | ||
356 | return ret; | |
357 | } | |
358 | ||
359 | static int vfio_fops_open(struct inode *inode, struct file *filep) | |
360 | { | |
361 | struct vfio_container *container; | |
362 | ||
0886196c | 363 | container = kzalloc(sizeof(*container), GFP_KERNEL_ACCOUNT); |
cdc71fe4 JG |
364 | if (!container) |
365 | return -ENOMEM; | |
366 | ||
367 | INIT_LIST_HEAD(&container->group_list); | |
368 | init_rwsem(&container->group_lock); | |
369 | kref_init(&container->kref); | |
370 | ||
371 | filep->private_data = container; | |
372 | ||
373 | return 0; | |
374 | } | |
375 | ||
376 | static int vfio_fops_release(struct inode *inode, struct file *filep) | |
377 | { | |
378 | struct vfio_container *container = filep->private_data; | |
cdc71fe4 JG |
379 | |
380 | filep->private_data = NULL; | |
381 | ||
382 | vfio_container_put(container); | |
383 | ||
384 | return 0; | |
385 | } | |
386 | ||
387 | static const struct file_operations vfio_fops = { | |
388 | .owner = THIS_MODULE, | |
389 | .open = vfio_fops_open, | |
390 | .release = vfio_fops_release, | |
391 | .unlocked_ioctl = vfio_fops_unl_ioctl, | |
392 | .compat_ioctl = compat_ptr_ioctl, | |
393 | }; | |
394 | ||
395 | struct vfio_container *vfio_container_from_file(struct file *file) | |
396 | { | |
397 | struct vfio_container *container; | |
398 | ||
399 | /* Sanity check, is this really our fd? */ | |
400 | if (file->f_op != &vfio_fops) | |
401 | return NULL; | |
402 | ||
403 | container = file->private_data; | |
404 | WARN_ON(!container); /* fget ensures we don't race vfio_release */ | |
405 | return container; | |
406 | } | |
407 | ||
408 | static struct miscdevice vfio_dev = { | |
409 | .minor = VFIO_MINOR, | |
410 | .name = "vfio", | |
411 | .fops = &vfio_fops, | |
412 | .nodename = "vfio/vfio", | |
413 | .mode = S_IRUGO | S_IWUGO, | |
414 | }; | |
415 | ||
416 | int vfio_container_attach_group(struct vfio_container *container, | |
417 | struct vfio_group *group) | |
418 | { | |
419 | struct vfio_iommu_driver *driver; | |
420 | int ret = 0; | |
421 | ||
c82e81ab | 422 | lockdep_assert_held(&group->group_lock); |
cdc71fe4 JG |
423 | |
424 | if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO)) | |
425 | return -EPERM; | |
426 | ||
427 | down_write(&container->group_lock); | |
428 | ||
429 | /* Real groups and fake groups cannot mix */ | |
430 | if (!list_empty(&container->group_list) && | |
431 | container->noiommu != (group->type == VFIO_NO_IOMMU)) { | |
432 | ret = -EPERM; | |
433 | goto out_unlock_container; | |
434 | } | |
435 | ||
436 | if (group->type == VFIO_IOMMU) { | |
437 | ret = iommu_group_claim_dma_owner(group->iommu_group, group); | |
438 | if (ret) | |
439 | goto out_unlock_container; | |
440 | } | |
441 | ||
442 | driver = container->iommu_driver; | |
443 | if (driver) { | |
444 | ret = driver->ops->attach_group(container->iommu_data, | |
445 | group->iommu_group, | |
446 | group->type); | |
447 | if (ret) { | |
448 | if (group->type == VFIO_IOMMU) | |
449 | iommu_group_release_dma_owner( | |
450 | group->iommu_group); | |
451 | goto out_unlock_container; | |
452 | } | |
453 | } | |
454 | ||
455 | group->container = container; | |
456 | group->container_users = 1; | |
457 | container->noiommu = (group->type == VFIO_NO_IOMMU); | |
458 | list_add(&group->container_next, &container->group_list); | |
459 | ||
460 | /* Get a reference on the container and mark a user within the group */ | |
461 | vfio_container_get(container); | |
462 | ||
463 | out_unlock_container: | |
464 | up_write(&container->group_lock); | |
465 | return ret; | |
466 | } | |
467 | ||
468 | void vfio_group_detach_container(struct vfio_group *group) | |
469 | { | |
470 | struct vfio_container *container = group->container; | |
471 | struct vfio_iommu_driver *driver; | |
472 | ||
c82e81ab | 473 | lockdep_assert_held(&group->group_lock); |
cdc71fe4 JG |
474 | WARN_ON(group->container_users != 1); |
475 | ||
476 | down_write(&container->group_lock); | |
477 | ||
478 | driver = container->iommu_driver; | |
479 | if (driver) | |
480 | driver->ops->detach_group(container->iommu_data, | |
481 | group->iommu_group); | |
482 | ||
483 | if (group->type == VFIO_IOMMU) | |
484 | iommu_group_release_dma_owner(group->iommu_group); | |
485 | ||
486 | group->container = NULL; | |
487 | group->container_users = 0; | |
488 | list_del(&group->container_next); | |
489 | ||
490 | /* Detaching the last group deprivileges a container, remove iommu */ | |
491 | if (driver && list_empty(&container->group_list)) { | |
492 | driver->ops->release(container->iommu_data); | |
493 | module_put(driver->ops->owner); | |
494 | container->iommu_driver = NULL; | |
495 | container->iommu_data = NULL; | |
496 | } | |
497 | ||
498 | up_write(&container->group_lock); | |
499 | ||
500 | vfio_container_put(container); | |
501 | } | |
502 | ||
04f930c3 | 503 | int vfio_group_use_container(struct vfio_group *group) |
cdc71fe4 | 504 | { |
c82e81ab | 505 | lockdep_assert_held(&group->group_lock); |
cdc71fe4 | 506 | |
2a3dab19 JG |
507 | /* |
508 | * The container fd has been assigned with VFIO_GROUP_SET_CONTAINER but | |
509 | * VFIO_SET_IOMMU hasn't been done yet. | |
510 | */ | |
511 | if (!group->container->iommu_driver) | |
cdc71fe4 JG |
512 | return -EINVAL; |
513 | ||
514 | if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO)) | |
515 | return -EPERM; | |
516 | ||
517 | get_file(group->opened_file); | |
518 | group->container_users++; | |
519 | return 0; | |
520 | } | |
521 | ||
04f930c3 | 522 | void vfio_group_unuse_container(struct vfio_group *group) |
cdc71fe4 | 523 | { |
04f930c3 | 524 | lockdep_assert_held(&group->group_lock); |
bab6fabc | 525 | |
04f930c3 JG |
526 | WARN_ON(group->container_users <= 1); |
527 | group->container_users--; | |
528 | fput(group->opened_file); | |
cdc71fe4 JG |
529 | } |
530 | ||
8da7a0e7 YL |
531 | int vfio_device_container_pin_pages(struct vfio_device *device, |
532 | dma_addr_t iova, int npage, | |
533 | int prot, struct page **pages) | |
cdc71fe4 | 534 | { |
8da7a0e7 YL |
535 | struct vfio_container *container = device->group->container; |
536 | struct iommu_group *iommu_group = device->group->iommu_group; | |
4741f2e9 | 537 | struct vfio_iommu_driver *driver = container->iommu_driver; |
cdc71fe4 JG |
538 | |
539 | if (npage > VFIO_PIN_PAGES_MAX_ENTRIES) | |
540 | return -E2BIG; | |
541 | ||
4741f2e9 JG |
542 | if (unlikely(!driver || !driver->ops->pin_pages)) |
543 | return -ENOTTY; | |
544 | return driver->ops->pin_pages(container->iommu_data, iommu_group, iova, | |
545 | npage, prot, pages); | |
cdc71fe4 | 546 | } |
cdc71fe4 | 547 | |
8da7a0e7 YL |
548 | void vfio_device_container_unpin_pages(struct vfio_device *device, |
549 | dma_addr_t iova, int npage) | |
cdc71fe4 | 550 | { |
8da7a0e7 YL |
551 | struct vfio_container *container = device->group->container; |
552 | ||
cdc71fe4 JG |
553 | if (WARN_ON(npage <= 0 || npage > VFIO_PIN_PAGES_MAX_ENTRIES)) |
554 | return; | |
555 | ||
4741f2e9 JG |
556 | container->iommu_driver->ops->unpin_pages(container->iommu_data, iova, |
557 | npage); | |
cdc71fe4 | 558 | } |
cdc71fe4 | 559 | |
8da7a0e7 YL |
560 | int vfio_device_container_dma_rw(struct vfio_device *device, |
561 | dma_addr_t iova, void *data, | |
562 | size_t len, bool write) | |
cdc71fe4 | 563 | { |
8da7a0e7 | 564 | struct vfio_container *container = device->group->container; |
4741f2e9 | 565 | struct vfio_iommu_driver *driver = container->iommu_driver; |
cdc71fe4 | 566 | |
4741f2e9 JG |
567 | if (unlikely(!driver || !driver->ops->dma_rw)) |
568 | return -ENOTTY; | |
569 | return driver->ops->dma_rw(container->iommu_data, iova, data, len, | |
570 | write); | |
cdc71fe4 | 571 | } |
cdc71fe4 JG |
572 | |
573 | int __init vfio_container_init(void) | |
574 | { | |
575 | int ret; | |
576 | ||
577 | mutex_init(&vfio.iommu_drivers_lock); | |
578 | INIT_LIST_HEAD(&vfio.iommu_drivers_list); | |
579 | ||
580 | ret = misc_register(&vfio_dev); | |
581 | if (ret) { | |
582 | pr_err("vfio: misc device register failed\n"); | |
583 | return ret; | |
584 | } | |
585 | ||
586 | if (IS_ENABLED(CONFIG_VFIO_NOIOMMU)) { | |
587 | ret = vfio_register_iommu_driver(&vfio_noiommu_ops); | |
588 | if (ret) | |
589 | goto err_misc; | |
590 | } | |
591 | return 0; | |
592 | ||
593 | err_misc: | |
594 | misc_deregister(&vfio_dev); | |
595 | return ret; | |
596 | } | |
597 | ||
598 | void vfio_container_cleanup(void) | |
599 | { | |
600 | if (IS_ENABLED(CONFIG_VFIO_NOIOMMU)) | |
601 | vfio_unregister_iommu_driver(&vfio_noiommu_ops); | |
602 | misc_deregister(&vfio_dev); | |
603 | mutex_destroy(&vfio.iommu_drivers_lock); | |
604 | } | |
81ab9890 JG |
605 | |
606 | MODULE_ALIAS_MISCDEV(VFIO_MINOR); | |
607 | MODULE_ALIAS("devname:vfio/vfio"); |