]>
Commit | Line | Data |
---|---|---|
d2912cb1 | 1 | // SPDX-License-Identifier: GPL-2.0-only |
cba3345c AW |
2 | /* |
3 | * VFIO core | |
4 | * | |
5 | * Copyright (C) 2012 Red Hat, Inc. All rights reserved. | |
6 | * Author: Alex Williamson <[email protected]> | |
7 | * | |
cba3345c AW |
8 | * Derived from original vfio: |
9 | * Copyright 2010 Cisco Systems, Inc. All rights reserved. | |
10 | * Author: Tom Lyon, [email protected] | |
11 | */ | |
12 | ||
13 | #include <linux/cdev.h> | |
14 | #include <linux/compat.h> | |
15 | #include <linux/device.h> | |
16 | #include <linux/file.h> | |
17 | #include <linux/anon_inodes.h> | |
18 | #include <linux/fs.h> | |
19 | #include <linux/idr.h> | |
20 | #include <linux/iommu.h> | |
21 | #include <linux/list.h> | |
d1099901 | 22 | #include <linux/miscdevice.h> |
cba3345c AW |
23 | #include <linux/module.h> |
24 | #include <linux/mutex.h> | |
5f096b14 | 25 | #include <linux/pci.h> |
9587f44a | 26 | #include <linux/rwsem.h> |
cba3345c AW |
27 | #include <linux/sched.h> |
28 | #include <linux/slab.h> | |
664e9386 | 29 | #include <linux/stat.h> |
cba3345c AW |
30 | #include <linux/string.h> |
31 | #include <linux/uaccess.h> | |
32 | #include <linux/vfio.h> | |
33 | #include <linux/wait.h> | |
41be3e26 | 34 | #include <linux/sched/signal.h> |
8e5c6995 | 35 | #include <linux/pm_runtime.h> |
80c4b92a YH |
36 | #include <linux/interval_tree.h> |
37 | #include <linux/iova_bitmap.h> | |
8cc02d22 | 38 | #include "vfio.h" |
cba3345c AW |
39 | |
40 | #define DRIVER_VERSION "0.3" | |
41 | #define DRIVER_AUTHOR "Alex Williamson <[email protected]>" | |
42 | #define DRIVER_DESC "VFIO - User Level meta-driver" | |
43 | ||
44 | static struct vfio { | |
45 | struct class *class; | |
cba3345c | 46 | struct list_head group_list; |
9cef7391 JG |
47 | struct mutex group_lock; /* locks group_list */ |
48 | struct ida group_ida; | |
d1099901 | 49 | dev_t group_devt; |
3c28a761 YL |
50 | struct class *device_class; |
51 | struct ida device_ida; | |
cba3345c AW |
52 | } vfio; |
53 | ||
2fd585f4 | 54 | static DEFINE_XARRAY(vfio_device_set_xa); |
9cef7391 | 55 | static const struct file_operations vfio_group_fops; |
2fd585f4 JG |
56 | |
57 | int vfio_assign_device_set(struct vfio_device *device, void *set_id) | |
58 | { | |
59 | unsigned long idx = (unsigned long)set_id; | |
60 | struct vfio_device_set *new_dev_set; | |
61 | struct vfio_device_set *dev_set; | |
62 | ||
63 | if (WARN_ON(!set_id)) | |
64 | return -EINVAL; | |
65 | ||
66 | /* | |
67 | * Atomically acquire a singleton object in the xarray for this set_id | |
68 | */ | |
69 | xa_lock(&vfio_device_set_xa); | |
70 | dev_set = xa_load(&vfio_device_set_xa, idx); | |
71 | if (dev_set) | |
72 | goto found_get_ref; | |
73 | xa_unlock(&vfio_device_set_xa); | |
74 | ||
75 | new_dev_set = kzalloc(sizeof(*new_dev_set), GFP_KERNEL); | |
76 | if (!new_dev_set) | |
77 | return -ENOMEM; | |
78 | mutex_init(&new_dev_set->lock); | |
79 | INIT_LIST_HEAD(&new_dev_set->device_list); | |
80 | new_dev_set->set_id = set_id; | |
81 | ||
82 | xa_lock(&vfio_device_set_xa); | |
83 | dev_set = __xa_cmpxchg(&vfio_device_set_xa, idx, NULL, new_dev_set, | |
84 | GFP_KERNEL); | |
85 | if (!dev_set) { | |
86 | dev_set = new_dev_set; | |
87 | goto found_get_ref; | |
88 | } | |
89 | ||
90 | kfree(new_dev_set); | |
91 | if (xa_is_err(dev_set)) { | |
92 | xa_unlock(&vfio_device_set_xa); | |
93 | return xa_err(dev_set); | |
94 | } | |
95 | ||
96 | found_get_ref: | |
97 | dev_set->device_count++; | |
98 | xa_unlock(&vfio_device_set_xa); | |
99 | mutex_lock(&dev_set->lock); | |
100 | device->dev_set = dev_set; | |
101 | list_add_tail(&device->dev_set_list, &dev_set->device_list); | |
102 | mutex_unlock(&dev_set->lock); | |
103 | return 0; | |
104 | } | |
105 | EXPORT_SYMBOL_GPL(vfio_assign_device_set); | |
106 | ||
107 | static void vfio_release_device_set(struct vfio_device *device) | |
108 | { | |
109 | struct vfio_device_set *dev_set = device->dev_set; | |
110 | ||
111 | if (!dev_set) | |
112 | return; | |
113 | ||
114 | mutex_lock(&dev_set->lock); | |
115 | list_del(&device->dev_set_list); | |
116 | mutex_unlock(&dev_set->lock); | |
117 | ||
118 | xa_lock(&vfio_device_set_xa); | |
119 | if (!--dev_set->device_count) { | |
120 | __xa_erase(&vfio_device_set_xa, | |
121 | (unsigned long)dev_set->set_id); | |
122 | mutex_destroy(&dev_set->lock); | |
123 | kfree(dev_set); | |
124 | } | |
125 | xa_unlock(&vfio_device_set_xa); | |
126 | } | |
127 | ||
3b9a2d57 | 128 | /* |
cba3345c AW |
129 | * Group objects - create, release, get, put, search |
130 | */ | |
1ceabade JG |
131 | static struct vfio_group * |
132 | __vfio_group_get_from_iommu(struct iommu_group *iommu_group) | |
133 | { | |
134 | struct vfio_group *group; | |
135 | ||
3dd59a7d JG |
136 | /* |
137 | * group->iommu_group from the vfio.group_list cannot be NULL | |
138 | * under the vfio.group_lock. | |
139 | */ | |
1ceabade JG |
140 | list_for_each_entry(group, &vfio.group_list, vfio_next) { |
141 | if (group->iommu_group == iommu_group) { | |
ca5f21b2 | 142 | refcount_inc(&group->drivers); |
1ceabade JG |
143 | return group; |
144 | } | |
145 | } | |
146 | return NULL; | |
147 | } | |
148 | ||
149 | static struct vfio_group * | |
150 | vfio_group_get_from_iommu(struct iommu_group *iommu_group) | |
151 | { | |
152 | struct vfio_group *group; | |
153 | ||
154 | mutex_lock(&vfio.group_lock); | |
155 | group = __vfio_group_get_from_iommu(iommu_group); | |
156 | mutex_unlock(&vfio.group_lock); | |
157 | return group; | |
158 | } | |
159 | ||
9cef7391 | 160 | static void vfio_group_release(struct device *dev) |
cba3345c | 161 | { |
9cef7391 | 162 | struct vfio_group *group = container_of(dev, struct vfio_group, dev); |
9cef7391 JG |
163 | |
164 | mutex_destroy(&group->device_lock); | |
c82e81ab | 165 | mutex_destroy(&group->group_lock); |
3dd59a7d | 166 | WARN_ON(group->iommu_group); |
9cef7391 JG |
167 | ida_free(&vfio.group_ida, MINOR(group->dev.devt)); |
168 | kfree(group); | |
169 | } | |
170 | ||
171 | static struct vfio_group *vfio_group_alloc(struct iommu_group *iommu_group, | |
172 | enum vfio_group_type type) | |
173 | { | |
174 | struct vfio_group *group; | |
175 | int minor; | |
cba3345c AW |
176 | |
177 | group = kzalloc(sizeof(*group), GFP_KERNEL); | |
178 | if (!group) | |
179 | return ERR_PTR(-ENOMEM); | |
180 | ||
9cef7391 JG |
181 | minor = ida_alloc_max(&vfio.group_ida, MINORMASK, GFP_KERNEL); |
182 | if (minor < 0) { | |
183 | kfree(group); | |
184 | return ERR_PTR(minor); | |
185 | } | |
186 | ||
187 | device_initialize(&group->dev); | |
188 | group->dev.devt = MKDEV(MAJOR(vfio.group_devt), minor); | |
189 | group->dev.class = vfio.class; | |
190 | group->dev.release = vfio_group_release; | |
191 | cdev_init(&group->cdev, &vfio_group_fops); | |
192 | group->cdev.owner = THIS_MODULE; | |
193 | ||
ca5f21b2 | 194 | refcount_set(&group->drivers, 1); |
c82e81ab | 195 | mutex_init(&group->group_lock); |
cba3345c AW |
196 | INIT_LIST_HEAD(&group->device_list); |
197 | mutex_init(&group->device_lock); | |
cba3345c | 198 | group->iommu_group = iommu_group; |
9cef7391 | 199 | /* put in vfio_group_release() */ |
325a31c9 | 200 | iommu_group_ref_get(iommu_group); |
c68ea0d0 | 201 | group->type = type; |
ccd46dba | 202 | BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier); |
cba3345c | 203 | |
9cef7391 JG |
204 | return group; |
205 | } | |
206 | ||
207 | static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group, | |
208 | enum vfio_group_type type) | |
209 | { | |
210 | struct vfio_group *group; | |
211 | struct vfio_group *ret; | |
212 | int err; | |
213 | ||
214 | group = vfio_group_alloc(iommu_group, type); | |
215 | if (IS_ERR(group)) | |
216 | return group; | |
217 | ||
218 | err = dev_set_name(&group->dev, "%s%d", | |
219 | group->type == VFIO_NO_IOMMU ? "noiommu-" : "", | |
220 | iommu_group_id(iommu_group)); | |
221 | if (err) { | |
222 | ret = ERR_PTR(err); | |
223 | goto err_put; | |
224 | } | |
225 | ||
cba3345c AW |
226 | mutex_lock(&vfio.group_lock); |
227 | ||
cba3345c | 228 | /* Did we race creating this group? */ |
9cef7391 JG |
229 | ret = __vfio_group_get_from_iommu(iommu_group); |
230 | if (ret) | |
231 | goto err_unlock; | |
2f51bf4b | 232 | |
9cef7391 JG |
233 | err = cdev_device_add(&group->cdev, &group->dev); |
234 | if (err) { | |
235 | ret = ERR_PTR(err); | |
236 | goto err_unlock; | |
cba3345c AW |
237 | } |
238 | ||
cba3345c AW |
239 | list_add(&group->vfio_next, &vfio.group_list); |
240 | ||
241 | mutex_unlock(&vfio.group_lock); | |
cba3345c | 242 | return group; |
9cef7391 JG |
243 | |
244 | err_unlock: | |
245 | mutex_unlock(&vfio.group_lock); | |
9cef7391 JG |
246 | err_put: |
247 | put_device(&group->dev); | |
248 | return ret; | |
cba3345c AW |
249 | } |
250 | ||
ca5f21b2 JG |
251 | static void vfio_device_remove_group(struct vfio_device *device) |
252 | { | |
253 | struct vfio_group *group = device->group; | |
3dd59a7d | 254 | struct iommu_group *iommu_group; |
ca5f21b2 JG |
255 | |
256 | if (group->type == VFIO_NO_IOMMU || group->type == VFIO_EMULATED_IOMMU) | |
257 | iommu_group_remove_device(device->dev); | |
258 | ||
259 | /* Pairs with vfio_create_group() / vfio_group_get_from_iommu() */ | |
260 | if (!refcount_dec_and_mutex_lock(&group->drivers, &vfio.group_lock)) | |
2b678aa2 | 261 | return; |
ca5f21b2 JG |
262 | list_del(&group->vfio_next); |
263 | ||
264 | /* | |
265 | * We could concurrently probe another driver in the group that might | |
266 | * race vfio_device_remove_group() with vfio_get_group(), so we have to | |
267 | * ensure that the sysfs is all cleaned up under lock otherwise the | |
268 | * cdev_device_add() will fail due to the name aready existing. | |
269 | */ | |
270 | cdev_device_del(&group->cdev, &group->dev); | |
ca5f21b2 | 271 | |
3dd59a7d | 272 | mutex_lock(&group->group_lock); |
63b150fd JG |
273 | /* |
274 | * These data structures all have paired operations that can only be | |
3dd59a7d JG |
275 | * undone when the caller holds a live reference on the device. Since |
276 | * all pairs must be undone these WARN_ON's indicate some caller did not | |
63b150fd JG |
277 | * properly hold the group reference. |
278 | */ | |
cba3345c | 279 | WARN_ON(!list_empty(&group->device_list)); |
65b1adeb | 280 | WARN_ON(group->notifier.head); |
3dd59a7d JG |
281 | |
282 | /* | |
283 | * Revoke all users of group->iommu_group. At this point we know there | |
284 | * are no devices active because we are unplugging the last one. Setting | |
285 | * iommu_group to NULL blocks all new users. | |
286 | */ | |
287 | if (group->container) | |
288 | vfio_group_detach_container(group); | |
289 | iommu_group = group->iommu_group; | |
ca5f21b2 | 290 | group->iommu_group = NULL; |
3dd59a7d JG |
291 | mutex_unlock(&group->group_lock); |
292 | mutex_unlock(&vfio.group_lock); | |
9cef7391 | 293 | |
3dd59a7d | 294 | iommu_group_put(iommu_group); |
9cef7391 | 295 | put_device(&group->dev); |
cba3345c AW |
296 | } |
297 | ||
3b9a2d57 | 298 | /* |
cba3345c AW |
299 | * Device objects - create, release, get, put, search |
300 | */ | |
cba3345c | 301 | /* Device reference always implies a group reference */ |
4a725b8d | 302 | static void vfio_device_put_registration(struct vfio_device *device) |
cba3345c | 303 | { |
5e42c999 JG |
304 | if (refcount_dec_and_test(&device->refcount)) |
305 | complete(&device->comp); | |
cba3345c AW |
306 | } |
307 | ||
4a725b8d | 308 | static bool vfio_device_try_get_registration(struct vfio_device *device) |
cba3345c | 309 | { |
5e42c999 | 310 | return refcount_inc_not_zero(&device->refcount); |
cba3345c AW |
311 | } |
312 | ||
313 | static struct vfio_device *vfio_group_get_device(struct vfio_group *group, | |
314 | struct device *dev) | |
315 | { | |
316 | struct vfio_device *device; | |
317 | ||
318 | mutex_lock(&group->device_lock); | |
319 | list_for_each_entry(device, &group->device_list, group_next) { | |
4a725b8d KT |
320 | if (device->dev == dev && |
321 | vfio_device_try_get_registration(device)) { | |
cba3345c AW |
322 | mutex_unlock(&group->device_lock); |
323 | return device; | |
324 | } | |
325 | } | |
326 | mutex_unlock(&group->device_lock); | |
327 | return NULL; | |
328 | } | |
329 | ||
3b9a2d57 | 330 | /* |
cba3345c AW |
331 | * VFIO driver API |
332 | */ | |
cb9ff3f3 | 333 | /* Release helper called by vfio_put_device() */ |
3c28a761 | 334 | static void vfio_device_release(struct device *dev) |
cb9ff3f3 KT |
335 | { |
336 | struct vfio_device *device = | |
3c28a761 | 337 | container_of(dev, struct vfio_device, device); |
cb9ff3f3 | 338 | |
ebb72b76 | 339 | vfio_release_device_set(device); |
3c28a761 | 340 | ida_free(&vfio.device_ida, device->index); |
cb9ff3f3 KT |
341 | |
342 | /* | |
343 | * kvfree() cannot be done here due to a life cycle mess in | |
344 | * vfio-ccw. Before the ccw part is fixed all drivers are | |
345 | * required to support @release and call vfio_free_device() | |
346 | * from there. | |
347 | */ | |
348 | device->ops->release(device); | |
349 | } | |
cb9ff3f3 KT |
350 | |
351 | /* | |
352 | * Allocate and initialize vfio_device so it can be registered to vfio | |
353 | * core. | |
354 | * | |
355 | * Drivers should use the wrapper vfio_alloc_device() for allocation. | |
356 | * @size is the size of the structure to be allocated, including any | |
357 | * private data used by the driver. | |
358 | * | |
359 | * Driver may provide an @init callback to cover device private data. | |
360 | * | |
361 | * Use vfio_put_device() to release the structure after success return. | |
362 | */ | |
363 | struct vfio_device *_vfio_alloc_device(size_t size, struct device *dev, | |
364 | const struct vfio_device_ops *ops) | |
365 | { | |
366 | struct vfio_device *device; | |
367 | int ret; | |
368 | ||
369 | if (WARN_ON(size < sizeof(struct vfio_device))) | |
370 | return ERR_PTR(-EINVAL); | |
371 | ||
372 | device = kvzalloc(size, GFP_KERNEL); | |
373 | if (!device) | |
374 | return ERR_PTR(-ENOMEM); | |
375 | ||
376 | ret = vfio_init_device(device, dev, ops); | |
377 | if (ret) | |
378 | goto out_free; | |
379 | return device; | |
380 | ||
381 | out_free: | |
382 | kvfree(device); | |
383 | return ERR_PTR(ret); | |
384 | } | |
385 | EXPORT_SYMBOL_GPL(_vfio_alloc_device); | |
386 | ||
387 | /* | |
388 | * Initialize a vfio_device so it can be registered to vfio core. | |
389 | * | |
390 | * Only vfio-ccw driver should call this interface. | |
391 | */ | |
392 | int vfio_init_device(struct vfio_device *device, struct device *dev, | |
393 | const struct vfio_device_ops *ops) | |
394 | { | |
395 | int ret; | |
396 | ||
3c28a761 YL |
397 | ret = ida_alloc_max(&vfio.device_ida, MINORMASK, GFP_KERNEL); |
398 | if (ret < 0) { | |
399 | dev_dbg(dev, "Error to alloc index\n"); | |
400 | return ret; | |
401 | } | |
402 | ||
403 | device->index = ret; | |
ebb72b76 KT |
404 | init_completion(&device->comp); |
405 | device->dev = dev; | |
406 | device->ops = ops; | |
cb9ff3f3 KT |
407 | |
408 | if (ops->init) { | |
409 | ret = ops->init(device); | |
410 | if (ret) | |
411 | goto out_uninit; | |
412 | } | |
413 | ||
3c28a761 YL |
414 | device_initialize(&device->device); |
415 | device->device.release = vfio_device_release; | |
416 | device->device.class = vfio.device_class; | |
417 | device->device.parent = device->dev; | |
cb9ff3f3 KT |
418 | return 0; |
419 | ||
420 | out_uninit: | |
ebb72b76 | 421 | vfio_release_device_set(device); |
3c28a761 | 422 | ida_free(&vfio.device_ida, device->index); |
cb9ff3f3 KT |
423 | return ret; |
424 | } | |
425 | EXPORT_SYMBOL_GPL(vfio_init_device); | |
426 | ||
427 | /* | |
428 | * The helper called by driver @release callback to free the device | |
429 | * structure. Drivers which don't have private data to clean can | |
430 | * simply use this helper as its @release. | |
431 | */ | |
432 | void vfio_free_device(struct vfio_device *device) | |
433 | { | |
434 | kvfree(device); | |
435 | } | |
436 | EXPORT_SYMBOL_GPL(vfio_free_device); | |
437 | ||
c68ea0d0 CH |
438 | static struct vfio_group *vfio_noiommu_group_alloc(struct device *dev, |
439 | enum vfio_group_type type) | |
1362591f CH |
440 | { |
441 | struct iommu_group *iommu_group; | |
442 | struct vfio_group *group; | |
3af91771 CH |
443 | int ret; |
444 | ||
445 | iommu_group = iommu_group_alloc(); | |
446 | if (IS_ERR(iommu_group)) | |
447 | return ERR_CAST(iommu_group); | |
448 | ||
1c61d51e LN |
449 | ret = iommu_group_set_name(iommu_group, "vfio-noiommu"); |
450 | if (ret) | |
451 | goto out_put_group; | |
3af91771 CH |
452 | ret = iommu_group_add_device(iommu_group, dev); |
453 | if (ret) | |
454 | goto out_put_group; | |
1362591f | 455 | |
c68ea0d0 | 456 | group = vfio_create_group(iommu_group, type); |
3af91771 CH |
457 | if (IS_ERR(group)) { |
458 | ret = PTR_ERR(group); | |
459 | goto out_remove_device; | |
460 | } | |
325a31c9 | 461 | iommu_group_put(iommu_group); |
3af91771 CH |
462 | return group; |
463 | ||
464 | out_remove_device: | |
465 | iommu_group_remove_device(dev); | |
466 | out_put_group: | |
467 | iommu_group_put(iommu_group); | |
468 | return ERR_PTR(ret); | |
469 | } | |
3af91771 CH |
470 | |
471 | static struct vfio_group *vfio_group_find_or_alloc(struct device *dev) | |
472 | { | |
473 | struct iommu_group *iommu_group; | |
474 | struct vfio_group *group; | |
475 | ||
476 | iommu_group = iommu_group_get(dev); | |
444d43ec | 477 | if (!iommu_group && vfio_noiommu) { |
3af91771 CH |
478 | /* |
479 | * With noiommu enabled, create an IOMMU group for devices that | |
a77109ff RM |
480 | * don't already have one, implying no IOMMU hardware/driver |
481 | * exists. Taint the kernel because we're about to give a DMA | |
3af91771 CH |
482 | * capable device to a user without IOMMU protection. |
483 | */ | |
c68ea0d0 | 484 | group = vfio_noiommu_group_alloc(dev, VFIO_NO_IOMMU); |
3af91771 CH |
485 | if (!IS_ERR(group)) { |
486 | add_taint(TAINT_USER, LOCKDEP_STILL_OK); | |
487 | dev_warn(dev, "Adding kernel taint for vfio-noiommu group on device\n"); | |
488 | } | |
489 | return group; | |
490 | } | |
444d43ec | 491 | |
1362591f CH |
492 | if (!iommu_group) |
493 | return ERR_PTR(-EINVAL); | |
494 | ||
afe4e376 JG |
495 | /* |
496 | * VFIO always sets IOMMU_CACHE because we offer no way for userspace to | |
497 | * restore cache coherency. It has to be checked here because it is only | |
498 | * valid for cases where we are using iommu groups. | |
499 | */ | |
a9cf69d0 | 500 | if (!device_iommu_capable(dev, IOMMU_CAP_CACHE_COHERENCY)) { |
afe4e376 JG |
501 | iommu_group_put(iommu_group); |
502 | return ERR_PTR(-EINVAL); | |
503 | } | |
504 | ||
1362591f | 505 | group = vfio_group_get_from_iommu(iommu_group); |
325a31c9 JG |
506 | if (!group) |
507 | group = vfio_create_group(iommu_group, VFIO_IOMMU); | |
1362591f | 508 | |
325a31c9 | 509 | /* The vfio_group holds a reference to the iommu_group */ |
1362591f CH |
510 | iommu_group_put(iommu_group); |
511 | return group; | |
512 | } | |
513 | ||
c68ea0d0 CH |
514 | static int __vfio_register_dev(struct vfio_device *device, |
515 | struct vfio_group *group) | |
0bfc6a4e JG |
516 | { |
517 | struct vfio_device *existing_device; | |
3c28a761 | 518 | int ret; |
c68ea0d0 | 519 | |
ca5f21b2 JG |
520 | /* |
521 | * In all cases group is the output of one of the group allocation | |
522 | * functions and we have group->drivers incremented for us. | |
523 | */ | |
c68ea0d0 CH |
524 | if (IS_ERR(group)) |
525 | return PTR_ERR(group); | |
cba3345c | 526 | |
2fd585f4 JG |
527 | /* |
528 | * If the driver doesn't specify a set then the device is added to a | |
529 | * singleton set just for itself. | |
530 | */ | |
531 | if (!device->dev_set) | |
532 | vfio_assign_device_set(device, device); | |
533 | ||
0bfc6a4e JG |
534 | existing_device = vfio_group_get_device(group, device->dev); |
535 | if (existing_device) { | |
3dd59a7d JG |
536 | /* |
537 | * group->iommu_group is non-NULL because we hold the drivers | |
538 | * refcount. | |
539 | */ | |
0bfc6a4e | 540 | dev_WARN(device->dev, "Device already exists on group %d\n", |
1362591f | 541 | iommu_group_id(group->iommu_group)); |
4a725b8d | 542 | vfio_device_put_registration(existing_device); |
3c28a761 YL |
543 | ret = -EBUSY; |
544 | goto err_out; | |
cba3345c AW |
545 | } |
546 | ||
0bfc6a4e JG |
547 | /* Our reference on group is moved to the device */ |
548 | device->group = group; | |
549 | ||
3c28a761 YL |
550 | ret = dev_set_name(&device->device, "vfio%d", device->index); |
551 | if (ret) | |
552 | goto err_out; | |
553 | ||
554 | ret = device_add(&device->device); | |
555 | if (ret) | |
556 | goto err_out; | |
557 | ||
0bfc6a4e JG |
558 | /* Refcounting can't start until the driver calls register */ |
559 | refcount_set(&device->refcount, 1); | |
560 | ||
561 | mutex_lock(&group->device_lock); | |
562 | list_add(&device->group_next, &group->device_list); | |
0bfc6a4e JG |
563 | mutex_unlock(&group->device_lock); |
564 | ||
565 | return 0; | |
3c28a761 | 566 | err_out: |
ca5f21b2 | 567 | vfio_device_remove_group(device); |
3c28a761 | 568 | return ret; |
0bfc6a4e | 569 | } |
c68ea0d0 CH |
570 | |
571 | int vfio_register_group_dev(struct vfio_device *device) | |
572 | { | |
573 | return __vfio_register_dev(device, | |
574 | vfio_group_find_or_alloc(device->dev)); | |
575 | } | |
0bfc6a4e JG |
576 | EXPORT_SYMBOL_GPL(vfio_register_group_dev); |
577 | ||
c68ea0d0 CH |
578 | /* |
579 | * Register a virtual device without IOMMU backing. The user of this | |
580 | * device must not be able to directly trigger unmediated DMA. | |
581 | */ | |
582 | int vfio_register_emulated_iommu_dev(struct vfio_device *device) | |
583 | { | |
584 | return __vfio_register_dev(device, | |
585 | vfio_noiommu_group_alloc(device->dev, VFIO_EMULATED_IOMMU)); | |
586 | } | |
587 | EXPORT_SYMBOL_GPL(vfio_register_emulated_iommu_dev); | |
588 | ||
4bc94d5d AW |
589 | static struct vfio_device *vfio_device_get_from_name(struct vfio_group *group, |
590 | char *buf) | |
591 | { | |
5f3874c2 | 592 | struct vfio_device *it, *device = ERR_PTR(-ENODEV); |
4bc94d5d AW |
593 | |
594 | mutex_lock(&group->device_lock); | |
e324fc82 | 595 | list_for_each_entry(it, &group->device_list, group_next) { |
5f3874c2 AW |
596 | int ret; |
597 | ||
598 | if (it->ops->match) { | |
6df62c5b | 599 | ret = it->ops->match(it, buf); |
5f3874c2 AW |
600 | if (ret < 0) { |
601 | device = ERR_PTR(ret); | |
602 | break; | |
603 | } | |
604 | } else { | |
605 | ret = !strcmp(dev_name(it->dev), buf); | |
606 | } | |
607 | ||
4a725b8d | 608 | if (ret && vfio_device_try_get_registration(it)) { |
e324fc82 | 609 | device = it; |
4bc94d5d AW |
610 | break; |
611 | } | |
612 | } | |
613 | mutex_unlock(&group->device_lock); | |
614 | ||
615 | return device; | |
616 | } | |
617 | ||
cba3345c AW |
618 | /* |
619 | * Decrement the device reference count and wait for the device to be | |
620 | * removed. Open file descriptors for the device... */ | |
0bfc6a4e | 621 | void vfio_unregister_group_dev(struct vfio_device *device) |
cba3345c | 622 | { |
cba3345c | 623 | struct vfio_group *group = device->group; |
13060b64 | 624 | unsigned int i = 0; |
db7d4d7f | 625 | bool interrupted = false; |
5e42c999 | 626 | long rc; |
cba3345c | 627 | |
4a725b8d | 628 | vfio_device_put_registration(device); |
5e42c999 JG |
629 | rc = try_wait_for_completion(&device->comp); |
630 | while (rc <= 0) { | |
13060b64 | 631 | if (device->ops->request) |
6df62c5b | 632 | device->ops->request(device, i++); |
13060b64 | 633 | |
db7d4d7f | 634 | if (interrupted) { |
5e42c999 JG |
635 | rc = wait_for_completion_timeout(&device->comp, |
636 | HZ * 10); | |
db7d4d7f | 637 | } else { |
5e42c999 JG |
638 | rc = wait_for_completion_interruptible_timeout( |
639 | &device->comp, HZ * 10); | |
640 | if (rc < 0) { | |
db7d4d7f | 641 | interrupted = true; |
0bfc6a4e | 642 | dev_warn(device->dev, |
db7d4d7f AW |
643 | "Device is currently in use, task" |
644 | " \"%s\" (%d) " | |
645 | "blocked until device is released", | |
646 | current->comm, task_pid_nr(current)); | |
647 | } | |
648 | } | |
5e42c999 | 649 | } |
e014e944 | 650 | |
5e42c999 JG |
651 | mutex_lock(&group->device_lock); |
652 | list_del(&device->group_next); | |
5e42c999 | 653 | mutex_unlock(&group->device_lock); |
41be3e26 | 654 | |
3c28a761 YL |
655 | /* Balances device_add in register path */ |
656 | device_del(&device->device); | |
657 | ||
ca5f21b2 | 658 | vfio_device_remove_group(device); |
0bfc6a4e JG |
659 | } |
660 | EXPORT_SYMBOL_GPL(vfio_unregister_group_dev); | |
661 | ||
3b9a2d57 | 662 | /* |
cba3345c AW |
663 | * VFIO Group fd, /dev/vfio/$GROUP |
664 | */ | |
cba3345c AW |
665 | /* |
666 | * VFIO_GROUP_UNSET_CONTAINER should fail if there are other users or | |
667 | * if there was no container to unset. Since the ioctl is called on | |
668 | * the group, we know that still exists, therefore the only valid | |
669 | * transition here is 1->0. | |
670 | */ | |
b3b43590 | 671 | static int vfio_group_ioctl_unset_container(struct vfio_group *group) |
cba3345c | 672 | { |
b3b43590 | 673 | int ret = 0; |
cba3345c | 674 | |
c82e81ab | 675 | mutex_lock(&group->group_lock); |
b3b43590 JG |
676 | if (!group->container) { |
677 | ret = -EINVAL; | |
678 | goto out_unlock; | |
679 | } | |
680 | if (group->container_users != 1) { | |
681 | ret = -EBUSY; | |
682 | goto out_unlock; | |
683 | } | |
429a781c | 684 | vfio_group_detach_container(group); |
b3b43590 JG |
685 | |
686 | out_unlock: | |
c82e81ab | 687 | mutex_unlock(&group->group_lock); |
b3b43590 | 688 | return ret; |
cba3345c AW |
689 | } |
690 | ||
03e650f6 JG |
691 | static int vfio_group_ioctl_set_container(struct vfio_group *group, |
692 | int __user *arg) | |
693 | { | |
694 | struct vfio_container *container; | |
695 | struct fd f; | |
696 | int ret; | |
697 | int fd; | |
698 | ||
699 | if (get_user(fd, arg)) | |
700 | return -EFAULT; | |
701 | ||
702 | f = fdget(fd); | |
703 | if (!f.file) | |
704 | return -EBADF; | |
705 | ||
c82e81ab | 706 | mutex_lock(&group->group_lock); |
03e650f6 JG |
707 | if (group->container || WARN_ON(group->container_users)) { |
708 | ret = -EINVAL; | |
709 | goto out_unlock; | |
710 | } | |
3dd59a7d JG |
711 | if (!group->iommu_group) { |
712 | ret = -ENODEV; | |
713 | goto out_unlock; | |
714 | } | |
715 | ||
03e650f6 JG |
716 | container = vfio_container_from_file(f.file); |
717 | ret = -EINVAL; | |
718 | if (container) { | |
719 | ret = vfio_container_attach_group(container, group); | |
720 | goto out_unlock; | |
721 | } | |
722 | ||
723 | out_unlock: | |
c82e81ab | 724 | mutex_unlock(&group->group_lock); |
2903ff01 | 725 | fdput(f); |
cba3345c AW |
726 | return ret; |
727 | } | |
728 | ||
cba3345c AW |
729 | static const struct file_operations vfio_device_fops; |
730 | ||
eadd86f8 | 731 | /* true if the vfio_device has open_device() called but not close_device() */ |
cdc71fe4 | 732 | bool vfio_assert_device_open(struct vfio_device *device) |
32f55d83 | 733 | { |
eadd86f8 JG |
734 | return !WARN_ON_ONCE(!READ_ONCE(device->open_count)); |
735 | } | |
736 | ||
294aaccb JG |
737 | static int vfio_device_first_open(struct vfio_device *device) |
738 | { | |
739 | int ret; | |
740 | ||
741 | lockdep_assert_held(&device->dev_set->lock); | |
742 | ||
743 | if (!try_module_get(device->dev->driver->owner)) | |
744 | return -ENODEV; | |
745 | ||
746 | /* | |
747 | * Here we pass the KVM pointer with the group under the lock. If the | |
748 | * device driver will use it, it must obtain a reference and release it | |
749 | * during close_device. | |
750 | */ | |
751 | mutex_lock(&device->group->group_lock); | |
bab6fabc JG |
752 | ret = vfio_device_assign_container(device); |
753 | if (ret) | |
754 | goto err_module_put; | |
755 | ||
294aaccb JG |
756 | device->kvm = device->group->kvm; |
757 | if (device->ops->open_device) { | |
758 | ret = device->ops->open_device(device); | |
759 | if (ret) | |
bab6fabc | 760 | goto err_container; |
294aaccb JG |
761 | } |
762 | vfio_device_container_register(device); | |
763 | mutex_unlock(&device->group->group_lock); | |
764 | return 0; | |
765 | ||
bab6fabc | 766 | err_container: |
294aaccb | 767 | device->kvm = NULL; |
bab6fabc JG |
768 | vfio_device_unassign_container(device); |
769 | err_module_put: | |
294aaccb JG |
770 | mutex_unlock(&device->group->group_lock); |
771 | module_put(device->dev->driver->owner); | |
772 | return ret; | |
773 | } | |
774 | ||
775 | static void vfio_device_last_close(struct vfio_device *device) | |
776 | { | |
777 | lockdep_assert_held(&device->dev_set->lock); | |
778 | ||
779 | mutex_lock(&device->group->group_lock); | |
780 | vfio_device_container_unregister(device); | |
781 | if (device->ops->close_device) | |
782 | device->ops->close_device(device); | |
783 | device->kvm = NULL; | |
bab6fabc | 784 | vfio_device_unassign_container(device); |
294aaccb JG |
785 | mutex_unlock(&device->group->group_lock); |
786 | module_put(device->dev->driver->owner); | |
787 | } | |
788 | ||
805bb6c1 | 789 | static struct file *vfio_device_open(struct vfio_device *device) |
cba3345c | 790 | { |
cba3345c | 791 | struct file *filep; |
805bb6c1 | 792 | int ret; |
03a76b60 | 793 | |
2fd585f4 JG |
794 | mutex_lock(&device->dev_set->lock); |
795 | device->open_count++; | |
421cfe65 | 796 | if (device->open_count == 1) { |
294aaccb JG |
797 | ret = vfio_device_first_open(device); |
798 | if (ret) | |
bab6fabc | 799 | goto err_unlock; |
2fd585f4 JG |
800 | } |
801 | mutex_unlock(&device->dev_set->lock); | |
802 | ||
4bc94d5d AW |
803 | /* |
804 | * We can't use anon_inode_getfd() because we need to modify | |
805 | * the f_mode flags directly to allow more than just ioctls | |
806 | */ | |
4bc94d5d AW |
807 | filep = anon_inode_getfile("[vfio-device]", &vfio_device_fops, |
808 | device, O_RDWR); | |
809 | if (IS_ERR(filep)) { | |
4bc94d5d | 810 | ret = PTR_ERR(filep); |
805bb6c1 | 811 | goto err_close_device; |
4bc94d5d AW |
812 | } |
813 | ||
814 | /* | |
815 | * TODO: add an anon_inode interface to do this. | |
816 | * Appears to be missing by lack of need rather than | |
817 | * explicitly prevented. Now there's need. | |
818 | */ | |
54ef7a47 | 819 | filep->f_mode |= (FMODE_PREAD | FMODE_PWRITE); |
cba3345c | 820 | |
805bb6c1 | 821 | if (device->group->type == VFIO_NO_IOMMU) |
03a76b60 AW |
822 | dev_warn(device->dev, "vfio-noiommu device opened by user " |
823 | "(%s:%d)\n", current->comm, task_pid_nr(current)); | |
805bb6c1 JG |
824 | /* |
825 | * On success the ref of device is moved to the file and | |
826 | * put in vfio_device_fops_release() | |
827 | */ | |
828 | return filep; | |
03a76b60 | 829 | |
2fd585f4 JG |
830 | err_close_device: |
831 | mutex_lock(&device->dev_set->lock); | |
294aaccb JG |
832 | if (device->open_count == 1) |
833 | vfio_device_last_close(device); | |
bab6fabc | 834 | err_unlock: |
2fd585f4 JG |
835 | device->open_count--; |
836 | mutex_unlock(&device->dev_set->lock); | |
805bb6c1 JG |
837 | return ERR_PTR(ret); |
838 | } | |
839 | ||
150ee2f9 JG |
840 | static int vfio_group_ioctl_get_device_fd(struct vfio_group *group, |
841 | char __user *arg) | |
805bb6c1 JG |
842 | { |
843 | struct vfio_device *device; | |
844 | struct file *filep; | |
150ee2f9 | 845 | char *buf; |
805bb6c1 JG |
846 | int fdno; |
847 | int ret; | |
848 | ||
150ee2f9 JG |
849 | buf = strndup_user(arg, PAGE_SIZE); |
850 | if (IS_ERR(buf)) | |
851 | return PTR_ERR(buf); | |
852 | ||
805bb6c1 | 853 | device = vfio_device_get_from_name(group, buf); |
150ee2f9 | 854 | kfree(buf); |
805bb6c1 JG |
855 | if (IS_ERR(device)) |
856 | return PTR_ERR(device); | |
857 | ||
858 | fdno = get_unused_fd_flags(O_CLOEXEC); | |
859 | if (fdno < 0) { | |
860 | ret = fdno; | |
861 | goto err_put_device; | |
862 | } | |
863 | ||
864 | filep = vfio_device_open(device); | |
865 | if (IS_ERR(filep)) { | |
866 | ret = PTR_ERR(filep); | |
867 | goto err_put_fdno; | |
868 | } | |
869 | ||
870 | fd_install(fdno, filep); | |
871 | return fdno; | |
872 | ||
873 | err_put_fdno: | |
874 | put_unused_fd(fdno); | |
875 | err_put_device: | |
4a725b8d | 876 | vfio_device_put_registration(device); |
cba3345c AW |
877 | return ret; |
878 | } | |
879 | ||
99a27c08 JG |
880 | static int vfio_group_ioctl_get_status(struct vfio_group *group, |
881 | struct vfio_group_status __user *arg) | |
882 | { | |
883 | unsigned long minsz = offsetofend(struct vfio_group_status, flags); | |
884 | struct vfio_group_status status; | |
885 | ||
886 | if (copy_from_user(&status, arg, minsz)) | |
887 | return -EFAULT; | |
888 | ||
889 | if (status.argsz < minsz) | |
890 | return -EINVAL; | |
891 | ||
892 | status.flags = 0; | |
893 | ||
c82e81ab | 894 | mutex_lock(&group->group_lock); |
3dd59a7d JG |
895 | if (!group->iommu_group) { |
896 | mutex_unlock(&group->group_lock); | |
897 | return -ENODEV; | |
898 | } | |
899 | ||
99a27c08 JG |
900 | if (group->container) |
901 | status.flags |= VFIO_GROUP_FLAGS_CONTAINER_SET | | |
902 | VFIO_GROUP_FLAGS_VIABLE; | |
903 | else if (!iommu_group_dma_owner_claimed(group->iommu_group)) | |
904 | status.flags |= VFIO_GROUP_FLAGS_VIABLE; | |
c82e81ab | 905 | mutex_unlock(&group->group_lock); |
99a27c08 JG |
906 | |
907 | if (copy_to_user(arg, &status, minsz)) | |
908 | return -EFAULT; | |
909 | return 0; | |
910 | } | |
911 | ||
cba3345c AW |
912 | static long vfio_group_fops_unl_ioctl(struct file *filep, |
913 | unsigned int cmd, unsigned long arg) | |
914 | { | |
915 | struct vfio_group *group = filep->private_data; | |
150ee2f9 | 916 | void __user *uarg = (void __user *)arg; |
cba3345c AW |
917 | |
918 | switch (cmd) { | |
150ee2f9 JG |
919 | case VFIO_GROUP_GET_DEVICE_FD: |
920 | return vfio_group_ioctl_get_device_fd(group, uarg); | |
cba3345c | 921 | case VFIO_GROUP_GET_STATUS: |
99a27c08 | 922 | return vfio_group_ioctl_get_status(group, uarg); |
cba3345c | 923 | case VFIO_GROUP_SET_CONTAINER: |
67671f15 | 924 | return vfio_group_ioctl_set_container(group, uarg); |
cba3345c | 925 | case VFIO_GROUP_UNSET_CONTAINER: |
b3b43590 | 926 | return vfio_group_ioctl_unset_container(group); |
99a27c08 JG |
927 | default: |
928 | return -ENOTTY; | |
cba3345c | 929 | } |
cba3345c AW |
930 | } |
931 | ||
cba3345c AW |
932 | static int vfio_group_fops_open(struct inode *inode, struct file *filep) |
933 | { | |
9cef7391 JG |
934 | struct vfio_group *group = |
935 | container_of(inode->i_cdev, struct vfio_group, cdev); | |
c6f4860e | 936 | int ret; |
cba3345c | 937 | |
c82e81ab | 938 | mutex_lock(&group->group_lock); |
cba3345c | 939 | |
912b74d2 JG |
940 | /* |
941 | * drivers can be zero if this races with vfio_device_remove_group(), it | |
942 | * will be stable at 0 under the group rwsem | |
943 | */ | |
944 | if (refcount_read(&group->drivers) == 0) { | |
c6f4860e | 945 | ret = -ENODEV; |
912b74d2 | 946 | goto out_unlock; |
03a76b60 AW |
947 | } |
948 | ||
c6f4860e JG |
949 | if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO)) { |
950 | ret = -EPERM; | |
912b74d2 | 951 | goto out_unlock; |
6d6768c6 AW |
952 | } |
953 | ||
c6f4860e JG |
954 | /* |
955 | * Do we need multiple instances of the group open? Seems not. | |
c6f4860e | 956 | */ |
b76c0eed | 957 | if (group->opened_file) { |
c6f4860e | 958 | ret = -EBUSY; |
912b74d2 | 959 | goto out_unlock; |
cba3345c | 960 | } |
b76c0eed | 961 | group->opened_file = filep; |
cba3345c | 962 | filep->private_data = group; |
912b74d2 JG |
963 | ret = 0; |
964 | out_unlock: | |
c82e81ab | 965 | mutex_unlock(&group->group_lock); |
c6f4860e | 966 | return ret; |
cba3345c AW |
967 | } |
968 | ||
969 | static int vfio_group_fops_release(struct inode *inode, struct file *filep) | |
970 | { | |
971 | struct vfio_group *group = filep->private_data; | |
972 | ||
973 | filep->private_data = NULL; | |
974 | ||
c82e81ab | 975 | mutex_lock(&group->group_lock); |
b76c0eed JG |
976 | /* |
977 | * Device FDs hold a group file reference, therefore the group release | |
978 | * is only called when there are no open devices. | |
979 | */ | |
980 | WARN_ON(group->notifier.head); | |
429a781c JG |
981 | if (group->container) |
982 | vfio_group_detach_container(group); | |
b76c0eed | 983 | group->opened_file = NULL; |
c82e81ab | 984 | mutex_unlock(&group->group_lock); |
cba3345c AW |
985 | return 0; |
986 | } | |
987 | ||
988 | static const struct file_operations vfio_group_fops = { | |
989 | .owner = THIS_MODULE, | |
990 | .unlocked_ioctl = vfio_group_fops_unl_ioctl, | |
407e9ef7 | 991 | .compat_ioctl = compat_ptr_ioctl, |
cba3345c AW |
992 | .open = vfio_group_fops_open, |
993 | .release = vfio_group_fops_release, | |
994 | }; | |
995 | ||
8e5c6995 AS |
996 | /* |
997 | * Wrapper around pm_runtime_resume_and_get(). | |
998 | * Return error code on failure or 0 on success. | |
999 | */ | |
1000 | static inline int vfio_device_pm_runtime_get(struct vfio_device *device) | |
1001 | { | |
1002 | struct device *dev = device->dev; | |
1003 | ||
1004 | if (dev->driver && dev->driver->pm) { | |
1005 | int ret; | |
1006 | ||
1007 | ret = pm_runtime_resume_and_get(dev); | |
1008 | if (ret) { | |
1009 | dev_info_ratelimited(dev, | |
1010 | "vfio: runtime resume failed %d\n", ret); | |
1011 | return -EIO; | |
1012 | } | |
1013 | } | |
1014 | ||
1015 | return 0; | |
1016 | } | |
1017 | ||
1018 | /* | |
1019 | * Wrapper around pm_runtime_put(). | |
1020 | */ | |
1021 | static inline void vfio_device_pm_runtime_put(struct vfio_device *device) | |
1022 | { | |
1023 | struct device *dev = device->dev; | |
1024 | ||
1025 | if (dev->driver && dev->driver->pm) | |
1026 | pm_runtime_put(dev); | |
1027 | } | |
1028 | ||
3b9a2d57 | 1029 | /* |
cba3345c AW |
1030 | * VFIO Device fd |
1031 | */ | |
1032 | static int vfio_device_fops_release(struct inode *inode, struct file *filep) | |
1033 | { | |
1034 | struct vfio_device *device = filep->private_data; | |
1035 | ||
2fd585f4 | 1036 | mutex_lock(&device->dev_set->lock); |
eadd86f8 | 1037 | vfio_assert_device_open(device); |
294aaccb JG |
1038 | if (device->open_count == 1) |
1039 | vfio_device_last_close(device); | |
eadd86f8 | 1040 | device->open_count--; |
2fd585f4 | 1041 | mutex_unlock(&device->dev_set->lock); |
cba3345c | 1042 | |
4a725b8d | 1043 | vfio_device_put_registration(device); |
cba3345c AW |
1044 | |
1045 | return 0; | |
1046 | } | |
1047 | ||
115dcec6 JG |
1048 | /* |
1049 | * vfio_mig_get_next_state - Compute the next step in the FSM | |
1050 | * @cur_fsm - The current state the device is in | |
1051 | * @new_fsm - The target state to reach | |
1052 | * @next_fsm - Pointer to the next step to get to new_fsm | |
1053 | * | |
1054 | * Return 0 upon success, otherwise -errno | |
1055 | * Upon success the next step in the state progression between cur_fsm and | |
1056 | * new_fsm will be set in next_fsm. | |
1057 | * | |
1058 | * This breaks down requests for combination transitions into smaller steps and | |
1059 | * returns the next step to get to new_fsm. The function may need to be called | |
1060 | * multiple times before reaching new_fsm. | |
1061 | * | |
1062 | */ | |
1063 | int vfio_mig_get_next_state(struct vfio_device *device, | |
1064 | enum vfio_device_mig_state cur_fsm, | |
1065 | enum vfio_device_mig_state new_fsm, | |
1066 | enum vfio_device_mig_state *next_fsm) | |
1067 | { | |
8cb3d83b | 1068 | enum { VFIO_DEVICE_NUM_STATES = VFIO_DEVICE_STATE_RUNNING_P2P + 1 }; |
115dcec6 | 1069 | /* |
8cb3d83b JG |
1070 | * The coding in this table requires the driver to implement the |
1071 | * following FSM arcs: | |
115dcec6 | 1072 | * RESUMING -> STOP |
115dcec6 | 1073 | * STOP -> RESUMING |
115dcec6 JG |
1074 | * STOP -> STOP_COPY |
1075 | * STOP_COPY -> STOP | |
1076 | * | |
8cb3d83b JG |
1077 | * If P2P is supported then the driver must also implement these FSM |
1078 | * arcs: | |
1079 | * RUNNING -> RUNNING_P2P | |
1080 | * RUNNING_P2P -> RUNNING | |
1081 | * RUNNING_P2P -> STOP | |
1082 | * STOP -> RUNNING_P2P | |
1083 | * Without P2P the driver must implement: | |
1084 | * RUNNING -> STOP | |
1085 | * STOP -> RUNNING | |
1086 | * | |
1087 | * The coding will step through multiple states for some combination | |
1088 | * transitions; if all optional features are supported, this means the | |
1089 | * following ones: | |
1090 | * RESUMING -> STOP -> RUNNING_P2P | |
1091 | * RESUMING -> STOP -> RUNNING_P2P -> RUNNING | |
115dcec6 | 1092 | * RESUMING -> STOP -> STOP_COPY |
8cb3d83b JG |
1093 | * RUNNING -> RUNNING_P2P -> STOP |
1094 | * RUNNING -> RUNNING_P2P -> STOP -> RESUMING | |
1095 | * RUNNING -> RUNNING_P2P -> STOP -> STOP_COPY | |
1096 | * RUNNING_P2P -> STOP -> RESUMING | |
1097 | * RUNNING_P2P -> STOP -> STOP_COPY | |
1098 | * STOP -> RUNNING_P2P -> RUNNING | |
115dcec6 | 1099 | * STOP_COPY -> STOP -> RESUMING |
8cb3d83b JG |
1100 | * STOP_COPY -> STOP -> RUNNING_P2P |
1101 | * STOP_COPY -> STOP -> RUNNING_P2P -> RUNNING | |
115dcec6 JG |
1102 | */ |
1103 | static const u8 vfio_from_fsm_table[VFIO_DEVICE_NUM_STATES][VFIO_DEVICE_NUM_STATES] = { | |
1104 | [VFIO_DEVICE_STATE_STOP] = { | |
1105 | [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP, | |
8cb3d83b | 1106 | [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING_P2P, |
115dcec6 JG |
1107 | [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP_COPY, |
1108 | [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RESUMING, | |
8cb3d83b | 1109 | [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P, |
115dcec6 JG |
1110 | [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, |
1111 | }, | |
1112 | [VFIO_DEVICE_STATE_RUNNING] = { | |
8cb3d83b | 1113 | [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_RUNNING_P2P, |
115dcec6 | 1114 | [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING, |
8cb3d83b JG |
1115 | [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_RUNNING_P2P, |
1116 | [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RUNNING_P2P, | |
1117 | [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P, | |
115dcec6 JG |
1118 | [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, |
1119 | }, | |
1120 | [VFIO_DEVICE_STATE_STOP_COPY] = { | |
1121 | [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP, | |
1122 | [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_STOP, | |
1123 | [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP_COPY, | |
1124 | [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_STOP, | |
8cb3d83b | 1125 | [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_STOP, |
115dcec6 JG |
1126 | [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, |
1127 | }, | |
1128 | [VFIO_DEVICE_STATE_RESUMING] = { | |
1129 | [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP, | |
1130 | [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_STOP, | |
1131 | [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP, | |
1132 | [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RESUMING, | |
8cb3d83b JG |
1133 | [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_STOP, |
1134 | [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, | |
1135 | }, | |
1136 | [VFIO_DEVICE_STATE_RUNNING_P2P] = { | |
1137 | [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP, | |
1138 | [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING, | |
1139 | [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP, | |
1140 | [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_STOP, | |
1141 | [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P, | |
115dcec6 JG |
1142 | [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, |
1143 | }, | |
1144 | [VFIO_DEVICE_STATE_ERROR] = { | |
1145 | [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_ERROR, | |
1146 | [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_ERROR, | |
1147 | [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_ERROR, | |
1148 | [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_ERROR, | |
8cb3d83b | 1149 | [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_ERROR, |
115dcec6 JG |
1150 | [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, |
1151 | }, | |
1152 | }; | |
1153 | ||
8cb3d83b JG |
1154 | static const unsigned int state_flags_table[VFIO_DEVICE_NUM_STATES] = { |
1155 | [VFIO_DEVICE_STATE_STOP] = VFIO_MIGRATION_STOP_COPY, | |
1156 | [VFIO_DEVICE_STATE_RUNNING] = VFIO_MIGRATION_STOP_COPY, | |
1157 | [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_MIGRATION_STOP_COPY, | |
1158 | [VFIO_DEVICE_STATE_RESUMING] = VFIO_MIGRATION_STOP_COPY, | |
1159 | [VFIO_DEVICE_STATE_RUNNING_P2P] = | |
1160 | VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_P2P, | |
1161 | [VFIO_DEVICE_STATE_ERROR] = ~0U, | |
1162 | }; | |
1163 | ||
1164 | if (WARN_ON(cur_fsm >= ARRAY_SIZE(vfio_from_fsm_table) || | |
1165 | (state_flags_table[cur_fsm] & device->migration_flags) != | |
1166 | state_flags_table[cur_fsm])) | |
115dcec6 JG |
1167 | return -EINVAL; |
1168 | ||
8cb3d83b JG |
1169 | if (new_fsm >= ARRAY_SIZE(vfio_from_fsm_table) || |
1170 | (state_flags_table[new_fsm] & device->migration_flags) != | |
1171 | state_flags_table[new_fsm]) | |
115dcec6 JG |
1172 | return -EINVAL; |
1173 | ||
8cb3d83b JG |
1174 | /* |
1175 | * Arcs touching optional and unsupported states are skipped over. The | |
1176 | * driver will instead see an arc from the original state to the next | |
1177 | * logical state, as per the above comment. | |
1178 | */ | |
115dcec6 | 1179 | *next_fsm = vfio_from_fsm_table[cur_fsm][new_fsm]; |
8cb3d83b JG |
1180 | while ((state_flags_table[*next_fsm] & device->migration_flags) != |
1181 | state_flags_table[*next_fsm]) | |
1182 | *next_fsm = vfio_from_fsm_table[*next_fsm][new_fsm]; | |
1183 | ||
115dcec6 JG |
1184 | return (*next_fsm != VFIO_DEVICE_STATE_ERROR) ? 0 : -EINVAL; |
1185 | } | |
1186 | EXPORT_SYMBOL_GPL(vfio_mig_get_next_state); | |
1187 | ||
1188 | /* | |
1189 | * Convert the drivers's struct file into a FD number and return it to userspace | |
1190 | */ | |
1191 | static int vfio_ioct_mig_return_fd(struct file *filp, void __user *arg, | |
1192 | struct vfio_device_feature_mig_state *mig) | |
1193 | { | |
1194 | int ret; | |
1195 | int fd; | |
1196 | ||
1197 | fd = get_unused_fd_flags(O_CLOEXEC); | |
1198 | if (fd < 0) { | |
1199 | ret = fd; | |
1200 | goto out_fput; | |
1201 | } | |
1202 | ||
1203 | mig->data_fd = fd; | |
1204 | if (copy_to_user(arg, mig, sizeof(*mig))) { | |
1205 | ret = -EFAULT; | |
1206 | goto out_put_unused; | |
1207 | } | |
1208 | fd_install(fd, filp); | |
1209 | return 0; | |
1210 | ||
1211 | out_put_unused: | |
1212 | put_unused_fd(fd); | |
1213 | out_fput: | |
1214 | fput(filp); | |
1215 | return ret; | |
1216 | } | |
1217 | ||
1218 | static int | |
1219 | vfio_ioctl_device_feature_mig_device_state(struct vfio_device *device, | |
1220 | u32 flags, void __user *arg, | |
1221 | size_t argsz) | |
1222 | { | |
1223 | size_t minsz = | |
1224 | offsetofend(struct vfio_device_feature_mig_state, data_fd); | |
1225 | struct vfio_device_feature_mig_state mig; | |
1226 | struct file *filp = NULL; | |
1227 | int ret; | |
1228 | ||
6e97eba8 | 1229 | if (!device->mig_ops) |
115dcec6 JG |
1230 | return -ENOTTY; |
1231 | ||
1232 | ret = vfio_check_feature(flags, argsz, | |
1233 | VFIO_DEVICE_FEATURE_SET | | |
1234 | VFIO_DEVICE_FEATURE_GET, | |
1235 | sizeof(mig)); | |
1236 | if (ret != 1) | |
1237 | return ret; | |
1238 | ||
1239 | if (copy_from_user(&mig, arg, minsz)) | |
1240 | return -EFAULT; | |
1241 | ||
1242 | if (flags & VFIO_DEVICE_FEATURE_GET) { | |
1243 | enum vfio_device_mig_state curr_state; | |
1244 | ||
6e97eba8 YH |
1245 | ret = device->mig_ops->migration_get_state(device, |
1246 | &curr_state); | |
115dcec6 JG |
1247 | if (ret) |
1248 | return ret; | |
1249 | mig.device_state = curr_state; | |
1250 | goto out_copy; | |
1251 | } | |
1252 | ||
1253 | /* Handle the VFIO_DEVICE_FEATURE_SET */ | |
6e97eba8 | 1254 | filp = device->mig_ops->migration_set_state(device, mig.device_state); |
115dcec6 JG |
1255 | if (IS_ERR(filp) || !filp) |
1256 | goto out_copy; | |
1257 | ||
1258 | return vfio_ioct_mig_return_fd(filp, arg, &mig); | |
1259 | out_copy: | |
1260 | mig.data_fd = -1; | |
1261 | if (copy_to_user(arg, &mig, sizeof(mig))) | |
1262 | return -EFAULT; | |
1263 | if (IS_ERR(filp)) | |
1264 | return PTR_ERR(filp); | |
1265 | return 0; | |
1266 | } | |
1267 | ||
1268 | static int vfio_ioctl_device_feature_migration(struct vfio_device *device, | |
1269 | u32 flags, void __user *arg, | |
1270 | size_t argsz) | |
1271 | { | |
1272 | struct vfio_device_feature_migration mig = { | |
8cb3d83b | 1273 | .flags = device->migration_flags, |
115dcec6 JG |
1274 | }; |
1275 | int ret; | |
1276 | ||
6e97eba8 | 1277 | if (!device->mig_ops) |
115dcec6 JG |
1278 | return -ENOTTY; |
1279 | ||
1280 | ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_GET, | |
1281 | sizeof(mig)); | |
1282 | if (ret != 1) | |
1283 | return ret; | |
1284 | if (copy_to_user(arg, &mig, sizeof(mig))) | |
1285 | return -EFAULT; | |
1286 | return 0; | |
1287 | } | |
1288 | ||
80c4b92a YH |
1289 | /* Ranges should fit into a single kernel page */ |
1290 | #define LOG_MAX_RANGES \ | |
1291 | (PAGE_SIZE / sizeof(struct vfio_device_feature_dma_logging_range)) | |
1292 | ||
1293 | static int | |
1294 | vfio_ioctl_device_feature_logging_start(struct vfio_device *device, | |
1295 | u32 flags, void __user *arg, | |
1296 | size_t argsz) | |
1297 | { | |
1298 | size_t minsz = | |
1299 | offsetofend(struct vfio_device_feature_dma_logging_control, | |
1300 | ranges); | |
1301 | struct vfio_device_feature_dma_logging_range __user *ranges; | |
1302 | struct vfio_device_feature_dma_logging_control control; | |
1303 | struct vfio_device_feature_dma_logging_range range; | |
1304 | struct rb_root_cached root = RB_ROOT_CACHED; | |
1305 | struct interval_tree_node *nodes; | |
1306 | u64 iova_end; | |
1307 | u32 nnodes; | |
1308 | int i, ret; | |
1309 | ||
1310 | if (!device->log_ops) | |
1311 | return -ENOTTY; | |
1312 | ||
1313 | ret = vfio_check_feature(flags, argsz, | |
1314 | VFIO_DEVICE_FEATURE_SET, | |
1315 | sizeof(control)); | |
1316 | if (ret != 1) | |
1317 | return ret; | |
1318 | ||
1319 | if (copy_from_user(&control, arg, minsz)) | |
1320 | return -EFAULT; | |
1321 | ||
1322 | nnodes = control.num_ranges; | |
1323 | if (!nnodes) | |
1324 | return -EINVAL; | |
1325 | ||
1326 | if (nnodes > LOG_MAX_RANGES) | |
1327 | return -E2BIG; | |
1328 | ||
1329 | ranges = u64_to_user_ptr(control.ranges); | |
1330 | nodes = kmalloc_array(nnodes, sizeof(struct interval_tree_node), | |
1331 | GFP_KERNEL); | |
1332 | if (!nodes) | |
1333 | return -ENOMEM; | |
1334 | ||
1335 | for (i = 0; i < nnodes; i++) { | |
1336 | if (copy_from_user(&range, &ranges[i], sizeof(range))) { | |
1337 | ret = -EFAULT; | |
1338 | goto end; | |
1339 | } | |
1340 | if (!IS_ALIGNED(range.iova, control.page_size) || | |
1341 | !IS_ALIGNED(range.length, control.page_size)) { | |
1342 | ret = -EINVAL; | |
1343 | goto end; | |
1344 | } | |
1345 | ||
1346 | if (check_add_overflow(range.iova, range.length, &iova_end) || | |
1347 | iova_end > ULONG_MAX) { | |
1348 | ret = -EOVERFLOW; | |
1349 | goto end; | |
1350 | } | |
1351 | ||
1352 | nodes[i].start = range.iova; | |
1353 | nodes[i].last = range.iova + range.length - 1; | |
1354 | if (interval_tree_iter_first(&root, nodes[i].start, | |
1355 | nodes[i].last)) { | |
1356 | /* Range overlapping */ | |
1357 | ret = -EINVAL; | |
1358 | goto end; | |
1359 | } | |
1360 | interval_tree_insert(nodes + i, &root); | |
1361 | } | |
1362 | ||
1363 | ret = device->log_ops->log_start(device, &root, nnodes, | |
1364 | &control.page_size); | |
1365 | if (ret) | |
1366 | goto end; | |
1367 | ||
1368 | if (copy_to_user(arg, &control, sizeof(control))) { | |
1369 | ret = -EFAULT; | |
1370 | device->log_ops->log_stop(device); | |
1371 | } | |
1372 | ||
1373 | end: | |
1374 | kfree(nodes); | |
1375 | return ret; | |
1376 | } | |
1377 | ||
1378 | static int | |
1379 | vfio_ioctl_device_feature_logging_stop(struct vfio_device *device, | |
1380 | u32 flags, void __user *arg, | |
1381 | size_t argsz) | |
1382 | { | |
1383 | int ret; | |
1384 | ||
1385 | if (!device->log_ops) | |
1386 | return -ENOTTY; | |
1387 | ||
1388 | ret = vfio_check_feature(flags, argsz, | |
1389 | VFIO_DEVICE_FEATURE_SET, 0); | |
1390 | if (ret != 1) | |
1391 | return ret; | |
1392 | ||
1393 | return device->log_ops->log_stop(device); | |
1394 | } | |
1395 | ||
1396 | static int vfio_device_log_read_and_clear(struct iova_bitmap *iter, | |
1397 | unsigned long iova, size_t length, | |
1398 | void *opaque) | |
1399 | { | |
1400 | struct vfio_device *device = opaque; | |
1401 | ||
1402 | return device->log_ops->log_read_and_clear(device, iova, length, iter); | |
1403 | } | |
1404 | ||
1405 | static int | |
1406 | vfio_ioctl_device_feature_logging_report(struct vfio_device *device, | |
1407 | u32 flags, void __user *arg, | |
1408 | size_t argsz) | |
1409 | { | |
1410 | size_t minsz = | |
1411 | offsetofend(struct vfio_device_feature_dma_logging_report, | |
1412 | bitmap); | |
1413 | struct vfio_device_feature_dma_logging_report report; | |
1414 | struct iova_bitmap *iter; | |
1415 | u64 iova_end; | |
1416 | int ret; | |
1417 | ||
1418 | if (!device->log_ops) | |
1419 | return -ENOTTY; | |
1420 | ||
1421 | ret = vfio_check_feature(flags, argsz, | |
1422 | VFIO_DEVICE_FEATURE_GET, | |
1423 | sizeof(report)); | |
1424 | if (ret != 1) | |
1425 | return ret; | |
1426 | ||
1427 | if (copy_from_user(&report, arg, minsz)) | |
1428 | return -EFAULT; | |
1429 | ||
1430 | if (report.page_size < SZ_4K || !is_power_of_2(report.page_size)) | |
1431 | return -EINVAL; | |
1432 | ||
1433 | if (check_add_overflow(report.iova, report.length, &iova_end) || | |
1434 | iova_end > ULONG_MAX) | |
1435 | return -EOVERFLOW; | |
1436 | ||
1437 | iter = iova_bitmap_alloc(report.iova, report.length, | |
1438 | report.page_size, | |
1439 | u64_to_user_ptr(report.bitmap)); | |
1440 | if (IS_ERR(iter)) | |
1441 | return PTR_ERR(iter); | |
1442 | ||
1443 | ret = iova_bitmap_for_each(iter, device, | |
1444 | vfio_device_log_read_and_clear); | |
1445 | ||
1446 | iova_bitmap_free(iter); | |
1447 | return ret; | |
1448 | } | |
1449 | ||
445ad495 JG |
1450 | static int vfio_ioctl_device_feature(struct vfio_device *device, |
1451 | struct vfio_device_feature __user *arg) | |
1452 | { | |
1453 | size_t minsz = offsetofend(struct vfio_device_feature, flags); | |
1454 | struct vfio_device_feature feature; | |
1455 | ||
1456 | if (copy_from_user(&feature, arg, minsz)) | |
1457 | return -EFAULT; | |
1458 | ||
1459 | if (feature.argsz < minsz) | |
1460 | return -EINVAL; | |
1461 | ||
1462 | /* Check unknown flags */ | |
1463 | if (feature.flags & | |
1464 | ~(VFIO_DEVICE_FEATURE_MASK | VFIO_DEVICE_FEATURE_SET | | |
1465 | VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_PROBE)) | |
1466 | return -EINVAL; | |
1467 | ||
1468 | /* GET & SET are mutually exclusive except with PROBE */ | |
1469 | if (!(feature.flags & VFIO_DEVICE_FEATURE_PROBE) && | |
1470 | (feature.flags & VFIO_DEVICE_FEATURE_SET) && | |
1471 | (feature.flags & VFIO_DEVICE_FEATURE_GET)) | |
1472 | return -EINVAL; | |
1473 | ||
1474 | switch (feature.flags & VFIO_DEVICE_FEATURE_MASK) { | |
115dcec6 JG |
1475 | case VFIO_DEVICE_FEATURE_MIGRATION: |
1476 | return vfio_ioctl_device_feature_migration( | |
1477 | device, feature.flags, arg->data, | |
1478 | feature.argsz - minsz); | |
1479 | case VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE: | |
1480 | return vfio_ioctl_device_feature_mig_device_state( | |
1481 | device, feature.flags, arg->data, | |
1482 | feature.argsz - minsz); | |
80c4b92a YH |
1483 | case VFIO_DEVICE_FEATURE_DMA_LOGGING_START: |
1484 | return vfio_ioctl_device_feature_logging_start( | |
1485 | device, feature.flags, arg->data, | |
1486 | feature.argsz - minsz); | |
1487 | case VFIO_DEVICE_FEATURE_DMA_LOGGING_STOP: | |
1488 | return vfio_ioctl_device_feature_logging_stop( | |
1489 | device, feature.flags, arg->data, | |
1490 | feature.argsz - minsz); | |
1491 | case VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT: | |
1492 | return vfio_ioctl_device_feature_logging_report( | |
1493 | device, feature.flags, arg->data, | |
1494 | feature.argsz - minsz); | |
445ad495 JG |
1495 | default: |
1496 | if (unlikely(!device->ops->device_feature)) | |
1497 | return -EINVAL; | |
1498 | return device->ops->device_feature(device, feature.flags, | |
1499 | arg->data, | |
1500 | feature.argsz - minsz); | |
1501 | } | |
1502 | } | |
1503 | ||
cba3345c AW |
1504 | static long vfio_device_fops_unl_ioctl(struct file *filep, |
1505 | unsigned int cmd, unsigned long arg) | |
1506 | { | |
1507 | struct vfio_device *device = filep->private_data; | |
8e5c6995 AS |
1508 | int ret; |
1509 | ||
1510 | ret = vfio_device_pm_runtime_get(device); | |
1511 | if (ret) | |
1512 | return ret; | |
cba3345c | 1513 | |
445ad495 JG |
1514 | switch (cmd) { |
1515 | case VFIO_DEVICE_FEATURE: | |
8e5c6995 AS |
1516 | ret = vfio_ioctl_device_feature(device, (void __user *)arg); |
1517 | break; | |
1518 | ||
445ad495 JG |
1519 | default: |
1520 | if (unlikely(!device->ops->ioctl)) | |
8e5c6995 AS |
1521 | ret = -EINVAL; |
1522 | else | |
1523 | ret = device->ops->ioctl(device, cmd, arg); | |
1524 | break; | |
445ad495 | 1525 | } |
8e5c6995 AS |
1526 | |
1527 | vfio_device_pm_runtime_put(device); | |
1528 | return ret; | |
cba3345c AW |
1529 | } |
1530 | ||
1531 | static ssize_t vfio_device_fops_read(struct file *filep, char __user *buf, | |
1532 | size_t count, loff_t *ppos) | |
1533 | { | |
1534 | struct vfio_device *device = filep->private_data; | |
1535 | ||
1536 | if (unlikely(!device->ops->read)) | |
1537 | return -EINVAL; | |
1538 | ||
6df62c5b | 1539 | return device->ops->read(device, buf, count, ppos); |
cba3345c AW |
1540 | } |
1541 | ||
1542 | static ssize_t vfio_device_fops_write(struct file *filep, | |
1543 | const char __user *buf, | |
1544 | size_t count, loff_t *ppos) | |
1545 | { | |
1546 | struct vfio_device *device = filep->private_data; | |
1547 | ||
1548 | if (unlikely(!device->ops->write)) | |
1549 | return -EINVAL; | |
1550 | ||
6df62c5b | 1551 | return device->ops->write(device, buf, count, ppos); |
cba3345c AW |
1552 | } |
1553 | ||
1554 | static int vfio_device_fops_mmap(struct file *filep, struct vm_area_struct *vma) | |
1555 | { | |
1556 | struct vfio_device *device = filep->private_data; | |
1557 | ||
1558 | if (unlikely(!device->ops->mmap)) | |
1559 | return -EINVAL; | |
1560 | ||
6df62c5b | 1561 | return device->ops->mmap(device, vma); |
cba3345c AW |
1562 | } |
1563 | ||
cba3345c AW |
1564 | static const struct file_operations vfio_device_fops = { |
1565 | .owner = THIS_MODULE, | |
1566 | .release = vfio_device_fops_release, | |
1567 | .read = vfio_device_fops_read, | |
1568 | .write = vfio_device_fops_write, | |
1569 | .unlocked_ioctl = vfio_device_fops_unl_ioctl, | |
407e9ef7 | 1570 | .compat_ioctl = compat_ptr_ioctl, |
cba3345c AW |
1571 | .mmap = vfio_device_fops_mmap, |
1572 | }; | |
1573 | ||
50d63b5b JG |
1574 | /** |
1575 | * vfio_file_iommu_group - Return the struct iommu_group for the vfio group file | |
1576 | * @file: VFIO group file | |
6cdd9782 | 1577 | * |
819da99a JG |
1578 | * The returned iommu_group is valid as long as a ref is held on the file. This |
1579 | * returns a reference on the group. This function is deprecated, only the SPAPR | |
1580 | * path in kvm should call it. | |
6cdd9782 | 1581 | */ |
50d63b5b | 1582 | struct iommu_group *vfio_file_iommu_group(struct file *file) |
6cdd9782 | 1583 | { |
50d63b5b | 1584 | struct vfio_group *group = file->private_data; |
3dd59a7d | 1585 | struct iommu_group *iommu_group = NULL; |
6cdd9782 | 1586 | |
4b22ef04 JG |
1587 | if (!IS_ENABLED(CONFIG_SPAPR_TCE_IOMMU)) |
1588 | return NULL; | |
1589 | ||
1590 | if (!vfio_file_is_group(file)) | |
50d63b5b | 1591 | return NULL; |
3dd59a7d JG |
1592 | |
1593 | mutex_lock(&group->group_lock); | |
1594 | if (group->iommu_group) { | |
1595 | iommu_group = group->iommu_group; | |
1596 | iommu_group_ref_get(iommu_group); | |
1597 | } | |
1598 | mutex_unlock(&group->group_lock); | |
1599 | return iommu_group; | |
6cdd9782 | 1600 | } |
50d63b5b | 1601 | EXPORT_SYMBOL_GPL(vfio_file_iommu_group); |
6cdd9782 | 1602 | |
4b22ef04 JG |
1603 | /** |
1604 | * vfio_file_is_group - True if the file is usable with VFIO aPIS | |
1605 | * @file: VFIO group file | |
1606 | */ | |
1607 | bool vfio_file_is_group(struct file *file) | |
1608 | { | |
1609 | return file->f_op == &vfio_group_fops; | |
1610 | } | |
1611 | EXPORT_SYMBOL_GPL(vfio_file_is_group); | |
1612 | ||
a905ad04 JG |
1613 | /** |
1614 | * vfio_file_enforced_coherent - True if the DMA associated with the VFIO file | |
1615 | * is always CPU cache coherent | |
1616 | * @file: VFIO group file | |
c0560f51 | 1617 | * |
a905ad04 JG |
1618 | * Enforced coherency means that the IOMMU ignores things like the PCIe no-snoop |
1619 | * bit in DMA transactions. A return of false indicates that the user has | |
1620 | * rights to access additional instructions such as wbinvd on x86. | |
c0560f51 | 1621 | */ |
a905ad04 | 1622 | bool vfio_file_enforced_coherent(struct file *file) |
c0560f51 | 1623 | { |
a905ad04 JG |
1624 | struct vfio_group *group = file->private_data; |
1625 | bool ret; | |
c0560f51 | 1626 | |
b1b8132a | 1627 | if (!vfio_file_is_group(file)) |
a905ad04 | 1628 | return true; |
c0560f51 | 1629 | |
c82e81ab | 1630 | mutex_lock(&group->group_lock); |
e0e29bdb | 1631 | if (group->container) { |
1408640d JG |
1632 | ret = vfio_container_ioctl_check_extension(group->container, |
1633 | VFIO_DMA_CC_IOMMU); | |
e0e29bdb JG |
1634 | } else { |
1635 | /* | |
1636 | * Since the coherency state is determined only once a container | |
1637 | * is attached the user must do so before they can prove they | |
1638 | * have permission. | |
1639 | */ | |
1640 | ret = true; | |
c0560f51 | 1641 | } |
c82e81ab | 1642 | mutex_unlock(&group->group_lock); |
a905ad04 | 1643 | return ret; |
c0560f51 | 1644 | } |
a905ad04 | 1645 | EXPORT_SYMBOL_GPL(vfio_file_enforced_coherent); |
c0560f51 | 1646 | |
ba70a89f JG |
1647 | /** |
1648 | * vfio_file_set_kvm - Link a kvm with VFIO drivers | |
1649 | * @file: VFIO group file | |
1650 | * @kvm: KVM to link | |
1651 | * | |
421cfe65 MR |
1652 | * When a VFIO device is first opened the KVM will be available in |
1653 | * device->kvm if one was associated with the group. | |
ba70a89f JG |
1654 | */ |
1655 | void vfio_file_set_kvm(struct file *file, struct kvm *kvm) | |
6cdd9782 | 1656 | { |
ba70a89f | 1657 | struct vfio_group *group = file->private_data; |
6cdd9782 | 1658 | |
b1b8132a | 1659 | if (!vfio_file_is_group(file)) |
ba70a89f | 1660 | return; |
5d6dee80 | 1661 | |
c82e81ab | 1662 | mutex_lock(&group->group_lock); |
ba70a89f | 1663 | group->kvm = kvm; |
c82e81ab | 1664 | mutex_unlock(&group->group_lock); |
5d6dee80 | 1665 | } |
ba70a89f | 1666 | EXPORT_SYMBOL_GPL(vfio_file_set_kvm); |
5d6dee80 | 1667 | |
6a985ae8 JG |
1668 | /** |
1669 | * vfio_file_has_dev - True if the VFIO file is a handle for device | |
1670 | * @file: VFIO file to check | |
1671 | * @device: Device that must be part of the file | |
1672 | * | |
1673 | * Returns true if given file has permission to manipulate the given device. | |
1674 | */ | |
1675 | bool vfio_file_has_dev(struct file *file, struct vfio_device *device) | |
6cdd9782 | 1676 | { |
6a985ae8 | 1677 | struct vfio_group *group = file->private_data; |
6cdd9782 | 1678 | |
b1b8132a | 1679 | if (!vfio_file_is_group(file)) |
6a985ae8 JG |
1680 | return false; |
1681 | ||
1682 | return group == device->group; | |
88d7ab89 | 1683 | } |
6a985ae8 | 1684 | EXPORT_SYMBOL_GPL(vfio_file_has_dev); |
88d7ab89 | 1685 | |
3b9a2d57 | 1686 | /* |
d7a8d5ed AW |
1687 | * Sub-module support |
1688 | */ | |
1689 | /* | |
1690 | * Helper for managing a buffer of info chain capabilities, allocate or | |
1691 | * reallocate a buffer with additional @size, filling in @id and @version | |
1692 | * of the capability. A pointer to the new capability is returned. | |
1693 | * | |
1694 | * NB. The chain is based at the head of the buffer, so new entries are | |
1695 | * added to the tail, vfio_info_cap_shift() should be called to fixup the | |
1696 | * next offsets prior to copying to the user buffer. | |
1697 | */ | |
1698 | struct vfio_info_cap_header *vfio_info_cap_add(struct vfio_info_cap *caps, | |
1699 | size_t size, u16 id, u16 version) | |
1700 | { | |
1701 | void *buf; | |
1702 | struct vfio_info_cap_header *header, *tmp; | |
1703 | ||
1704 | buf = krealloc(caps->buf, caps->size + size, GFP_KERNEL); | |
1705 | if (!buf) { | |
1706 | kfree(caps->buf); | |
6641085e | 1707 | caps->buf = NULL; |
d7a8d5ed AW |
1708 | caps->size = 0; |
1709 | return ERR_PTR(-ENOMEM); | |
1710 | } | |
1711 | ||
1712 | caps->buf = buf; | |
1713 | header = buf + caps->size; | |
1714 | ||
1715 | /* Eventually copied to user buffer, zero */ | |
1716 | memset(header, 0, size); | |
1717 | ||
1718 | header->id = id; | |
1719 | header->version = version; | |
1720 | ||
1721 | /* Add to the end of the capability chain */ | |
5ba6de98 | 1722 | for (tmp = buf; tmp->next; tmp = buf + tmp->next) |
d7a8d5ed AW |
1723 | ; /* nothing */ |
1724 | ||
1725 | tmp->next = caps->size; | |
1726 | caps->size += size; | |
1727 | ||
1728 | return header; | |
1729 | } | |
1730 | EXPORT_SYMBOL_GPL(vfio_info_cap_add); | |
1731 | ||
1732 | void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset) | |
1733 | { | |
1734 | struct vfio_info_cap_header *tmp; | |
5ba6de98 | 1735 | void *buf = (void *)caps->buf; |
d7a8d5ed | 1736 | |
5ba6de98 | 1737 | for (tmp = buf; tmp->next; tmp = buf + tmp->next - offset) |
d7a8d5ed AW |
1738 | tmp->next += offset; |
1739 | } | |
b3c0a866 | 1740 | EXPORT_SYMBOL(vfio_info_cap_shift); |
d7a8d5ed | 1741 | |
dda01f78 AW |
1742 | int vfio_info_add_capability(struct vfio_info_cap *caps, |
1743 | struct vfio_info_cap_header *cap, size_t size) | |
b3c0a866 KW |
1744 | { |
1745 | struct vfio_info_cap_header *header; | |
b3c0a866 | 1746 | |
dda01f78 | 1747 | header = vfio_info_cap_add(caps, size, cap->id, cap->version); |
b3c0a866 KW |
1748 | if (IS_ERR(header)) |
1749 | return PTR_ERR(header); | |
1750 | ||
dda01f78 | 1751 | memcpy(header + 1, cap + 1, size - sizeof(*header)); |
b3c0a866 | 1752 | |
b3c0a866 KW |
1753 | return 0; |
1754 | } | |
b3c0a866 | 1755 | EXPORT_SYMBOL(vfio_info_add_capability); |
2169037d | 1756 | |
c747f08a KW |
1757 | int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr, int num_irqs, |
1758 | int max_irq_type, size_t *data_size) | |
1759 | { | |
1760 | unsigned long minsz; | |
1761 | size_t size; | |
1762 | ||
1763 | minsz = offsetofend(struct vfio_irq_set, count); | |
1764 | ||
1765 | if ((hdr->argsz < minsz) || (hdr->index >= max_irq_type) || | |
1766 | (hdr->count >= (U32_MAX - hdr->start)) || | |
1767 | (hdr->flags & ~(VFIO_IRQ_SET_DATA_TYPE_MASK | | |
1768 | VFIO_IRQ_SET_ACTION_TYPE_MASK))) | |
1769 | return -EINVAL; | |
1770 | ||
1771 | if (data_size) | |
1772 | *data_size = 0; | |
1773 | ||
1774 | if (hdr->start >= num_irqs || hdr->start + hdr->count > num_irqs) | |
1775 | return -EINVAL; | |
1776 | ||
1777 | switch (hdr->flags & VFIO_IRQ_SET_DATA_TYPE_MASK) { | |
1778 | case VFIO_IRQ_SET_DATA_NONE: | |
1779 | size = 0; | |
1780 | break; | |
1781 | case VFIO_IRQ_SET_DATA_BOOL: | |
1782 | size = sizeof(uint8_t); | |
1783 | break; | |
1784 | case VFIO_IRQ_SET_DATA_EVENTFD: | |
1785 | size = sizeof(int32_t); | |
1786 | break; | |
1787 | default: | |
1788 | return -EINVAL; | |
1789 | } | |
1790 | ||
1791 | if (size) { | |
1792 | if (hdr->argsz - minsz < hdr->count * size) | |
1793 | return -EINVAL; | |
1794 | ||
1795 | if (!data_size) | |
1796 | return -EINVAL; | |
1797 | ||
1798 | *data_size = hdr->count * size; | |
1799 | } | |
1800 | ||
1801 | return 0; | |
1802 | } | |
1803 | EXPORT_SYMBOL(vfio_set_irqs_validate_and_prepare); | |
1804 | ||
3b9a2d57 | 1805 | /* |
cba3345c AW |
1806 | * Module/class support |
1807 | */ | |
1808 | static char *vfio_devnode(struct device *dev, umode_t *mode) | |
1809 | { | |
1810 | return kasprintf(GFP_KERNEL, "vfio/%s", dev_name(dev)); | |
1811 | } | |
1812 | ||
c41da462 JG |
1813 | static int __init vfio_init(void) |
1814 | { | |
1815 | int ret; | |
1816 | ||
1817 | ida_init(&vfio.group_ida); | |
1818 | ida_init(&vfio.device_ida); | |
1819 | mutex_init(&vfio.group_lock); | |
1820 | INIT_LIST_HEAD(&vfio.group_list); | |
1821 | ||
1822 | ret = vfio_container_init(); | |
1823 | if (ret) | |
1824 | return ret; | |
1825 | ||
d1099901 | 1826 | /* /dev/vfio/$GROUP */ |
cba3345c AW |
1827 | vfio.class = class_create(THIS_MODULE, "vfio"); |
1828 | if (IS_ERR(vfio.class)) { | |
1829 | ret = PTR_ERR(vfio.class); | |
3c28a761 | 1830 | goto err_group_class; |
cba3345c AW |
1831 | } |
1832 | ||
1833 | vfio.class->devnode = vfio_devnode; | |
1834 | ||
3c28a761 YL |
1835 | /* /sys/class/vfio-dev/vfioX */ |
1836 | vfio.device_class = class_create(THIS_MODULE, "vfio-dev"); | |
1837 | if (IS_ERR(vfio.device_class)) { | |
1838 | ret = PTR_ERR(vfio.device_class); | |
1839 | goto err_dev_class; | |
1840 | } | |
1841 | ||
8bcb64a5 | 1842 | ret = alloc_chrdev_region(&vfio.group_devt, 0, MINORMASK + 1, "vfio"); |
cba3345c | 1843 | if (ret) |
d1099901 | 1844 | goto err_alloc_chrdev; |
cba3345c | 1845 | |
a13b1e47 | 1846 | pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n"); |
cba3345c AW |
1847 | return 0; |
1848 | ||
d1099901 | 1849 | err_alloc_chrdev: |
3c28a761 YL |
1850 | class_destroy(vfio.device_class); |
1851 | vfio.device_class = NULL; | |
1852 | err_dev_class: | |
cba3345c AW |
1853 | class_destroy(vfio.class); |
1854 | vfio.class = NULL; | |
3c28a761 | 1855 | err_group_class: |
c41da462 | 1856 | vfio_container_cleanup(); |
cba3345c AW |
1857 | return ret; |
1858 | } | |
1859 | ||
1860 | static void __exit vfio_cleanup(void) | |
1861 | { | |
1862 | WARN_ON(!list_empty(&vfio.group_list)); | |
1863 | ||
3c28a761 | 1864 | ida_destroy(&vfio.device_ida); |
9cef7391 | 1865 | ida_destroy(&vfio.group_ida); |
8bcb64a5 | 1866 | unregister_chrdev_region(vfio.group_devt, MINORMASK + 1); |
3c28a761 YL |
1867 | class_destroy(vfio.device_class); |
1868 | vfio.device_class = NULL; | |
cba3345c | 1869 | class_destroy(vfio.class); |
c41da462 | 1870 | vfio_container_cleanup(); |
cba3345c | 1871 | vfio.class = NULL; |
2fd585f4 | 1872 | xa_destroy(&vfio_device_set_xa); |
cba3345c AW |
1873 | } |
1874 | ||
1875 | module_init(vfio_init); | |
1876 | module_exit(vfio_cleanup); | |
1877 | ||
1878 | MODULE_VERSION(DRIVER_VERSION); | |
1879 | MODULE_LICENSE("GPL v2"); | |
1880 | MODULE_AUTHOR(DRIVER_AUTHOR); | |
1881 | MODULE_DESCRIPTION(DRIVER_DESC); | |
d1099901 AW |
1882 | MODULE_ALIAS_MISCDEV(VFIO_MINOR); |
1883 | MODULE_ALIAS("devname:vfio/vfio"); | |
0ca582fd | 1884 | MODULE_SOFTDEP("post: vfio_iommu_type1 vfio_iommu_spapr_tce"); |