]>
Commit | Line | Data |
---|---|---|
1dcac3e1 XFR |
1 | /* |
2 | * vfio based subchannel assignment support | |
3 | * | |
4 | * Copyright 2017 IBM Corp. | |
5 | * Author(s): Dong Jia Shi <[email protected]> | |
6 | * Xiao Feng Ren <[email protected]> | |
7 | * Pierre Morel <[email protected]> | |
8 | * | |
9 | * This work is licensed under the terms of the GNU GPL, version 2 or(at | |
10 | * your option) any version. See the COPYING file in the top-level | |
11 | * directory. | |
12 | */ | |
13 | ||
e9808d09 | 14 | #include "qemu/osdep.h" |
1dcac3e1 | 15 | #include <linux/vfio.h> |
c14e706c | 16 | #include <linux/vfio_ccw.h> |
1dcac3e1 XFR |
17 | #include <sys/ioctl.h> |
18 | ||
1dcac3e1 XFR |
19 | #include "qapi/error.h" |
20 | #include "hw/sysbus.h" | |
21 | #include "hw/vfio/vfio.h" | |
22 | #include "hw/vfio/vfio-common.h" | |
23 | #include "hw/s390x/s390-ccw.h" | |
24 | #include "hw/s390x/ccw-device.h" | |
4886b3e9 | 25 | #include "qemu/error-report.h" |
1dcac3e1 XFR |
26 | |
27 | #define TYPE_VFIO_CCW "vfio-ccw" | |
28 | typedef struct VFIOCCWDevice { | |
29 | S390CCWDevice cdev; | |
30 | VFIODevice vdev; | |
c14e706c DJS |
31 | uint64_t io_region_size; |
32 | uint64_t io_region_offset; | |
33 | struct ccw_io_region *io_region; | |
4886b3e9 | 34 | EventNotifier io_notifier; |
1dcac3e1 XFR |
35 | } VFIOCCWDevice; |
36 | ||
37 | static void vfio_ccw_compute_needs_reset(VFIODevice *vdev) | |
38 | { | |
39 | vdev->needs_reset = false; | |
40 | } | |
41 | ||
42 | /* | |
43 | * We don't need vfio_hot_reset_multi and vfio_eoi operations for | |
44 | * vfio_ccw device now. | |
45 | */ | |
46 | struct VFIODeviceOps vfio_ccw_ops = { | |
47 | .vfio_compute_needs_reset = vfio_ccw_compute_needs_reset, | |
48 | }; | |
49 | ||
66dc50f7 | 50 | static IOInstEnding vfio_ccw_handle_request(SubchDev *sch) |
8ca2b376 | 51 | { |
66dc50f7 | 52 | S390CCWDevice *cdev = sch->driver_data; |
8ca2b376 XFR |
53 | VFIOCCWDevice *vcdev = DO_UPCAST(VFIOCCWDevice, cdev, cdev); |
54 | struct ccw_io_region *region = vcdev->io_region; | |
55 | int ret; | |
56 | ||
57 | QEMU_BUILD_BUG_ON(sizeof(region->orb_area) != sizeof(ORB)); | |
58 | QEMU_BUILD_BUG_ON(sizeof(region->scsw_area) != sizeof(SCSW)); | |
59 | QEMU_BUILD_BUG_ON(sizeof(region->irb_area) != sizeof(IRB)); | |
60 | ||
61 | memset(region, 0, sizeof(*region)); | |
62 | ||
66dc50f7 HP |
63 | memcpy(region->orb_area, &sch->orb, sizeof(ORB)); |
64 | memcpy(region->scsw_area, &sch->curr_status.scsw, sizeof(SCSW)); | |
8ca2b376 XFR |
65 | |
66 | again: | |
67 | ret = pwrite(vcdev->vdev.fd, region, | |
68 | vcdev->io_region_size, vcdev->io_region_offset); | |
69 | if (ret != vcdev->io_region_size) { | |
70 | if (errno == EAGAIN) { | |
71 | goto again; | |
72 | } | |
73 | error_report("vfio-ccw: wirte I/O region failed with errno=%d", errno); | |
66dc50f7 HP |
74 | ret = -errno; |
75 | } else { | |
76 | ret = region->ret_code; | |
77 | } | |
78 | switch (ret) { | |
79 | case 0: | |
80 | return IOINST_CC_EXPECTED; | |
81 | case -EBUSY: | |
82 | return IOINST_CC_BUSY; | |
83 | case -ENODEV: | |
84 | case -EACCES: | |
85 | return IOINST_CC_NOT_OPERATIONAL; | |
86 | case -EFAULT: | |
87 | default: | |
88 | sch_gen_unit_exception(sch); | |
89 | css_inject_io_interrupt(sch); | |
90 | return IOINST_CC_EXPECTED; | |
8ca2b376 | 91 | } |
8ca2b376 XFR |
92 | } |
93 | ||
1dcac3e1 XFR |
94 | static void vfio_ccw_reset(DeviceState *dev) |
95 | { | |
96 | CcwDevice *ccw_dev = DO_UPCAST(CcwDevice, parent_obj, dev); | |
97 | S390CCWDevice *cdev = DO_UPCAST(S390CCWDevice, parent_obj, ccw_dev); | |
98 | VFIOCCWDevice *vcdev = DO_UPCAST(VFIOCCWDevice, cdev, cdev); | |
99 | ||
100 | ioctl(vcdev->vdev.fd, VFIO_DEVICE_RESET); | |
101 | } | |
102 | ||
4886b3e9 DJS |
103 | static void vfio_ccw_io_notifier_handler(void *opaque) |
104 | { | |
105 | VFIOCCWDevice *vcdev = opaque; | |
8ca2b376 XFR |
106 | struct ccw_io_region *region = vcdev->io_region; |
107 | S390CCWDevice *cdev = S390_CCW_DEVICE(vcdev); | |
108 | CcwDevice *ccw_dev = CCW_DEVICE(cdev); | |
109 | SubchDev *sch = ccw_dev->sch; | |
110 | SCSW *s = &sch->curr_status.scsw; | |
334e7685 | 111 | PMCW *p = &sch->curr_status.pmcw; |
8ca2b376 XFR |
112 | IRB irb; |
113 | int size; | |
4886b3e9 DJS |
114 | |
115 | if (!event_notifier_test_and_clear(&vcdev->io_notifier)) { | |
116 | return; | |
117 | } | |
8ca2b376 XFR |
118 | |
119 | size = pread(vcdev->vdev.fd, region, vcdev->io_region_size, | |
120 | vcdev->io_region_offset); | |
121 | if (size == -1) { | |
122 | switch (errno) { | |
123 | case ENODEV: | |
124 | /* Generate a deferred cc 3 condition. */ | |
125 | s->flags |= SCSW_FLAGS_MASK_CC; | |
126 | s->ctrl &= ~SCSW_CTRL_MASK_STCTL; | |
127 | s->ctrl |= (SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND); | |
128 | goto read_err; | |
129 | case EFAULT: | |
130 | /* Memory problem, generate channel data check. */ | |
131 | s->ctrl &= ~SCSW_ACTL_START_PEND; | |
132 | s->cstat = SCSW_CSTAT_DATA_CHECK; | |
133 | s->ctrl &= ~SCSW_CTRL_MASK_STCTL; | |
134 | s->ctrl |= SCSW_STCTL_PRIMARY | SCSW_STCTL_SECONDARY | | |
135 | SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND; | |
136 | goto read_err; | |
137 | default: | |
138 | /* Error, generate channel program check. */ | |
139 | s->ctrl &= ~SCSW_ACTL_START_PEND; | |
140 | s->cstat = SCSW_CSTAT_PROG_CHECK; | |
141 | s->ctrl &= ~SCSW_CTRL_MASK_STCTL; | |
142 | s->ctrl |= SCSW_STCTL_PRIMARY | SCSW_STCTL_SECONDARY | | |
143 | SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND; | |
144 | goto read_err; | |
145 | } | |
146 | } else if (size != vcdev->io_region_size) { | |
147 | /* Information transfer error, generate channel-control check. */ | |
148 | s->ctrl &= ~SCSW_ACTL_START_PEND; | |
149 | s->cstat = SCSW_CSTAT_CHN_CTRL_CHK; | |
150 | s->ctrl &= ~SCSW_CTRL_MASK_STCTL; | |
151 | s->ctrl |= SCSW_STCTL_PRIMARY | SCSW_STCTL_SECONDARY | | |
152 | SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND; | |
153 | goto read_err; | |
154 | } | |
155 | ||
156 | memcpy(&irb, region->irb_area, sizeof(IRB)); | |
157 | ||
158 | /* Update control block via irb. */ | |
159 | copy_scsw_to_guest(s, &irb.scsw); | |
160 | ||
334e7685 DJS |
161 | /* If a uint check is pending, copy sense data. */ |
162 | if ((s->dstat & SCSW_DSTAT_UNIT_CHECK) && | |
163 | (p->chars & PMCW_CHARS_MASK_CSENSE)) { | |
164 | memcpy(sch->sense_data, irb.ecw, sizeof(irb.ecw)); | |
165 | } | |
166 | ||
8ca2b376 XFR |
167 | read_err: |
168 | css_inject_io_interrupt(sch); | |
4886b3e9 DJS |
169 | } |
170 | ||
171 | static void vfio_ccw_register_io_notifier(VFIOCCWDevice *vcdev, Error **errp) | |
172 | { | |
173 | VFIODevice *vdev = &vcdev->vdev; | |
174 | struct vfio_irq_info *irq_info; | |
175 | struct vfio_irq_set *irq_set; | |
176 | size_t argsz; | |
177 | int32_t *pfd; | |
178 | ||
179 | if (vdev->num_irqs < VFIO_CCW_IO_IRQ_INDEX + 1) { | |
180 | error_setg(errp, "vfio: unexpected number of io irqs %u", | |
181 | vdev->num_irqs); | |
182 | return; | |
183 | } | |
184 | ||
28e22d4b | 185 | argsz = sizeof(*irq_info); |
4886b3e9 DJS |
186 | irq_info = g_malloc0(argsz); |
187 | irq_info->index = VFIO_CCW_IO_IRQ_INDEX; | |
188 | irq_info->argsz = argsz; | |
189 | if (ioctl(vdev->fd, VFIO_DEVICE_GET_IRQ_INFO, | |
190 | irq_info) < 0 || irq_info->count < 1) { | |
191 | error_setg_errno(errp, errno, "vfio: Error getting irq info"); | |
192 | goto out_free_info; | |
193 | } | |
194 | ||
195 | if (event_notifier_init(&vcdev->io_notifier, 0)) { | |
196 | error_setg_errno(errp, errno, | |
197 | "vfio: Unable to init event notifier for IO"); | |
198 | goto out_free_info; | |
199 | } | |
200 | ||
201 | argsz = sizeof(*irq_set) + sizeof(*pfd); | |
202 | irq_set = g_malloc0(argsz); | |
203 | irq_set->argsz = argsz; | |
204 | irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | | |
205 | VFIO_IRQ_SET_ACTION_TRIGGER; | |
206 | irq_set->index = VFIO_CCW_IO_IRQ_INDEX; | |
207 | irq_set->start = 0; | |
208 | irq_set->count = 1; | |
209 | pfd = (int32_t *) &irq_set->data; | |
210 | ||
211 | *pfd = event_notifier_get_fd(&vcdev->io_notifier); | |
212 | qemu_set_fd_handler(*pfd, vfio_ccw_io_notifier_handler, NULL, vcdev); | |
213 | if (ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, irq_set)) { | |
214 | error_setg(errp, "vfio: Failed to set up io notification"); | |
215 | qemu_set_fd_handler(*pfd, NULL, NULL, vcdev); | |
216 | event_notifier_cleanup(&vcdev->io_notifier); | |
217 | } | |
218 | ||
219 | g_free(irq_set); | |
220 | ||
221 | out_free_info: | |
222 | g_free(irq_info); | |
223 | } | |
224 | ||
225 | static void vfio_ccw_unregister_io_notifier(VFIOCCWDevice *vcdev) | |
226 | { | |
227 | struct vfio_irq_set *irq_set; | |
228 | size_t argsz; | |
229 | int32_t *pfd; | |
230 | ||
231 | argsz = sizeof(*irq_set) + sizeof(*pfd); | |
232 | irq_set = g_malloc0(argsz); | |
233 | irq_set->argsz = argsz; | |
234 | irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | | |
235 | VFIO_IRQ_SET_ACTION_TRIGGER; | |
236 | irq_set->index = VFIO_CCW_IO_IRQ_INDEX; | |
237 | irq_set->start = 0; | |
238 | irq_set->count = 1; | |
239 | pfd = (int32_t *) &irq_set->data; | |
240 | *pfd = -1; | |
241 | ||
242 | if (ioctl(vcdev->vdev.fd, VFIO_DEVICE_SET_IRQS, irq_set)) { | |
243 | error_report("vfio: Failed to de-assign device io fd: %m"); | |
244 | } | |
245 | ||
246 | qemu_set_fd_handler(event_notifier_get_fd(&vcdev->io_notifier), | |
247 | NULL, NULL, vcdev); | |
248 | event_notifier_cleanup(&vcdev->io_notifier); | |
249 | ||
250 | g_free(irq_set); | |
251 | } | |
252 | ||
c14e706c DJS |
253 | static void vfio_ccw_get_region(VFIOCCWDevice *vcdev, Error **errp) |
254 | { | |
255 | VFIODevice *vdev = &vcdev->vdev; | |
256 | struct vfio_region_info *info; | |
257 | int ret; | |
258 | ||
259 | /* Sanity check device */ | |
260 | if (!(vdev->flags & VFIO_DEVICE_FLAGS_CCW)) { | |
261 | error_setg(errp, "vfio: Um, this isn't a vfio-ccw device"); | |
262 | return; | |
263 | } | |
264 | ||
265 | if (vdev->num_regions < VFIO_CCW_CONFIG_REGION_INDEX + 1) { | |
266 | error_setg(errp, "vfio: Unexpected number of the I/O region %u", | |
267 | vdev->num_regions); | |
268 | return; | |
269 | } | |
270 | ||
271 | ret = vfio_get_region_info(vdev, VFIO_CCW_CONFIG_REGION_INDEX, &info); | |
272 | if (ret) { | |
273 | error_setg_errno(errp, -ret, "vfio: Error getting config info"); | |
274 | return; | |
275 | } | |
276 | ||
277 | vcdev->io_region_size = info->size; | |
278 | if (sizeof(*vcdev->io_region) != vcdev->io_region_size) { | |
279 | error_setg(errp, "vfio: Unexpected size of the I/O region"); | |
280 | g_free(info); | |
281 | return; | |
282 | } | |
283 | ||
284 | vcdev->io_region_offset = info->offset; | |
285 | vcdev->io_region = g_malloc0(info->size); | |
286 | ||
287 | g_free(info); | |
288 | } | |
289 | ||
290 | static void vfio_ccw_put_region(VFIOCCWDevice *vcdev) | |
291 | { | |
292 | g_free(vcdev->io_region); | |
293 | } | |
294 | ||
1dcac3e1 XFR |
295 | static void vfio_put_device(VFIOCCWDevice *vcdev) |
296 | { | |
297 | g_free(vcdev->vdev.name); | |
298 | vfio_put_base_device(&vcdev->vdev); | |
299 | } | |
300 | ||
301 | static VFIOGroup *vfio_ccw_get_group(S390CCWDevice *cdev, Error **errp) | |
302 | { | |
303 | char *tmp, group_path[PATH_MAX]; | |
304 | ssize_t len; | |
305 | int groupid; | |
306 | ||
307 | tmp = g_strdup_printf("/sys/bus/css/devices/%x.%x.%04x/%s/iommu_group", | |
308 | cdev->hostid.cssid, cdev->hostid.ssid, | |
309 | cdev->hostid.devid, cdev->mdevid); | |
310 | len = readlink(tmp, group_path, sizeof(group_path)); | |
311 | g_free(tmp); | |
312 | ||
313 | if (len <= 0 || len >= sizeof(group_path)) { | |
314 | error_setg(errp, "vfio: no iommu_group found"); | |
315 | return NULL; | |
316 | } | |
317 | ||
318 | group_path[len] = 0; | |
319 | ||
320 | if (sscanf(basename(group_path), "%d", &groupid) != 1) { | |
321 | error_setg(errp, "vfio: failed to read %s", group_path); | |
322 | return NULL; | |
323 | } | |
324 | ||
325 | return vfio_get_group(groupid, &address_space_memory, errp); | |
326 | } | |
327 | ||
328 | static void vfio_ccw_realize(DeviceState *dev, Error **errp) | |
329 | { | |
330 | VFIODevice *vbasedev; | |
331 | VFIOGroup *group; | |
332 | CcwDevice *ccw_dev = DO_UPCAST(CcwDevice, parent_obj, dev); | |
333 | S390CCWDevice *cdev = DO_UPCAST(S390CCWDevice, parent_obj, ccw_dev); | |
334 | VFIOCCWDevice *vcdev = DO_UPCAST(VFIOCCWDevice, cdev, cdev); | |
335 | S390CCWDeviceClass *cdc = S390_CCW_DEVICE_GET_CLASS(cdev); | |
336 | Error *err = NULL; | |
337 | ||
338 | /* Call the class init function for subchannel. */ | |
339 | if (cdc->realize) { | |
340 | cdc->realize(cdev, vcdev->vdev.sysfsdev, &err); | |
341 | if (err) { | |
342 | goto out_err_propagate; | |
343 | } | |
344 | } | |
345 | ||
346 | group = vfio_ccw_get_group(cdev, &err); | |
347 | if (!group) { | |
348 | goto out_group_err; | |
349 | } | |
350 | ||
351 | vcdev->vdev.ops = &vfio_ccw_ops; | |
352 | vcdev->vdev.type = VFIO_DEVICE_TYPE_CCW; | |
353 | vcdev->vdev.name = g_strdup_printf("%x.%x.%04x", cdev->hostid.cssid, | |
354 | cdev->hostid.ssid, cdev->hostid.devid); | |
6a79dd46 | 355 | vcdev->vdev.dev = dev; |
1dcac3e1 XFR |
356 | QLIST_FOREACH(vbasedev, &group->device_list, next) { |
357 | if (strcmp(vbasedev->name, vcdev->vdev.name) == 0) { | |
358 | error_setg(&err, "vfio: subchannel %s has already been attached", | |
359 | vcdev->vdev.name); | |
360 | goto out_device_err; | |
361 | } | |
362 | } | |
363 | ||
364 | if (vfio_get_device(group, cdev->mdevid, &vcdev->vdev, &err)) { | |
365 | goto out_device_err; | |
366 | } | |
367 | ||
c14e706c DJS |
368 | vfio_ccw_get_region(vcdev, &err); |
369 | if (err) { | |
370 | goto out_region_err; | |
371 | } | |
372 | ||
4886b3e9 DJS |
373 | vfio_ccw_register_io_notifier(vcdev, &err); |
374 | if (err) { | |
375 | goto out_notifier_err; | |
376 | } | |
377 | ||
1dcac3e1 XFR |
378 | return; |
379 | ||
4886b3e9 DJS |
380 | out_notifier_err: |
381 | vfio_ccw_put_region(vcdev); | |
c14e706c DJS |
382 | out_region_err: |
383 | vfio_put_device(vcdev); | |
1dcac3e1 XFR |
384 | out_device_err: |
385 | vfio_put_group(group); | |
386 | out_group_err: | |
387 | if (cdc->unrealize) { | |
388 | cdc->unrealize(cdev, NULL); | |
389 | } | |
390 | out_err_propagate: | |
391 | error_propagate(errp, err); | |
392 | } | |
393 | ||
394 | static void vfio_ccw_unrealize(DeviceState *dev, Error **errp) | |
395 | { | |
396 | CcwDevice *ccw_dev = DO_UPCAST(CcwDevice, parent_obj, dev); | |
397 | S390CCWDevice *cdev = DO_UPCAST(S390CCWDevice, parent_obj, ccw_dev); | |
398 | VFIOCCWDevice *vcdev = DO_UPCAST(VFIOCCWDevice, cdev, cdev); | |
399 | S390CCWDeviceClass *cdc = S390_CCW_DEVICE_GET_CLASS(cdev); | |
400 | VFIOGroup *group = vcdev->vdev.group; | |
401 | ||
4886b3e9 | 402 | vfio_ccw_unregister_io_notifier(vcdev); |
c14e706c | 403 | vfio_ccw_put_region(vcdev); |
1dcac3e1 XFR |
404 | vfio_put_device(vcdev); |
405 | vfio_put_group(group); | |
406 | ||
407 | if (cdc->unrealize) { | |
408 | cdc->unrealize(cdev, errp); | |
409 | } | |
410 | } | |
411 | ||
412 | static Property vfio_ccw_properties[] = { | |
413 | DEFINE_PROP_STRING("sysfsdev", VFIOCCWDevice, vdev.sysfsdev), | |
414 | DEFINE_PROP_END_OF_LIST(), | |
415 | }; | |
416 | ||
417 | static const VMStateDescription vfio_ccw_vmstate = { | |
418 | .name = TYPE_VFIO_CCW, | |
419 | .unmigratable = 1, | |
420 | }; | |
421 | ||
422 | static void vfio_ccw_class_init(ObjectClass *klass, void *data) | |
423 | { | |
424 | DeviceClass *dc = DEVICE_CLASS(klass); | |
8ca2b376 | 425 | S390CCWDeviceClass *cdc = S390_CCW_DEVICE_CLASS(klass); |
1dcac3e1 XFR |
426 | |
427 | dc->props = vfio_ccw_properties; | |
428 | dc->vmsd = &vfio_ccw_vmstate; | |
429 | dc->desc = "VFIO-based subchannel assignment"; | |
bd2aef10 | 430 | set_bit(DEVICE_CATEGORY_MISC, dc->categories); |
1dcac3e1 XFR |
431 | dc->realize = vfio_ccw_realize; |
432 | dc->unrealize = vfio_ccw_unrealize; | |
433 | dc->reset = vfio_ccw_reset; | |
8ca2b376 XFR |
434 | |
435 | cdc->handle_request = vfio_ccw_handle_request; | |
1dcac3e1 XFR |
436 | } |
437 | ||
438 | static const TypeInfo vfio_ccw_info = { | |
439 | .name = TYPE_VFIO_CCW, | |
440 | .parent = TYPE_S390_CCW, | |
441 | .instance_size = sizeof(VFIOCCWDevice), | |
442 | .class_init = vfio_ccw_class_init, | |
443 | }; | |
444 | ||
445 | static void register_vfio_ccw_type(void) | |
446 | { | |
447 | type_register_static(&vfio_ccw_info); | |
448 | } | |
449 | ||
450 | type_init(register_vfio_ccw_type) |