2 * vfio based subchannel assignment support
4 * Copyright 2017 IBM Corp.
5 * Copyright 2019 Red Hat, Inc.
12 * This work is licensed under the terms of the GNU GPL, version 2 or (at
13 * your option) any later version. See the COPYING file in the top-level
17 #include "qemu/osdep.h"
18 #include <linux/vfio.h>
19 #include <linux/vfio_ccw.h>
20 #include <sys/ioctl.h>
22 #include "qapi/error.h"
23 #include "hw/sysbus.h"
24 #include "hw/vfio/vfio.h"
25 #include "hw/vfio/vfio-common.h"
26 #include "hw/s390x/s390-ccw.h"
27 #include "hw/s390x/vfio-ccw.h"
28 #include "hw/qdev-properties.h"
29 #include "hw/s390x/ccw-device.h"
30 #include "exec/address-spaces.h"
31 #include "qemu/error-report.h"
32 #include "qemu/main-loop.h"
33 #include "qemu/module.h"
35 struct VFIOCCWDevice {
38 uint64_t io_region_size;
39 uint64_t io_region_offset;
40 struct ccw_io_region *io_region;
41 uint64_t async_cmd_region_size;
42 uint64_t async_cmd_region_offset;
43 struct ccw_cmd_region *async_cmd_region;
44 EventNotifier io_notifier;
49 static inline void warn_once_pfch(VFIOCCWDevice *vcdev, SubchDev *sch,
52 warn_report_once_cond(&vcdev->warned_orb_pfch,
53 "vfio-ccw (devno %x.%x.%04x): %s",
54 sch->cssid, sch->ssid, sch->devno, msg);
57 static void vfio_ccw_compute_needs_reset(VFIODevice *vdev)
59 vdev->needs_reset = false;
63 * We don't need vfio_hot_reset_multi and vfio_eoi operations for
64 * vfio_ccw device now.
66 struct VFIODeviceOps vfio_ccw_ops = {
67 .vfio_compute_needs_reset = vfio_ccw_compute_needs_reset,
70 static IOInstEnding vfio_ccw_handle_request(SubchDev *sch)
72 S390CCWDevice *cdev = sch->driver_data;
73 VFIOCCWDevice *vcdev = DO_UPCAST(VFIOCCWDevice, cdev, cdev);
74 struct ccw_io_region *region = vcdev->io_region;
77 if (!(sch->orb.ctrl0 & ORB_CTRL0_MASK_PFCH) && vcdev->force_orb_pfch) {
78 sch->orb.ctrl0 |= ORB_CTRL0_MASK_PFCH;
79 warn_once_pfch(vcdev, sch, "PFCH flag forced");
82 QEMU_BUILD_BUG_ON(sizeof(region->orb_area) != sizeof(ORB));
83 QEMU_BUILD_BUG_ON(sizeof(region->scsw_area) != sizeof(SCSW));
84 QEMU_BUILD_BUG_ON(sizeof(region->irb_area) != sizeof(IRB));
86 memset(region, 0, sizeof(*region));
88 memcpy(region->orb_area, &sch->orb, sizeof(ORB));
89 memcpy(region->scsw_area, &sch->curr_status.scsw, sizeof(SCSW));
92 ret = pwrite(vcdev->vdev.fd, region,
93 vcdev->io_region_size, vcdev->io_region_offset);
94 if (ret != vcdev->io_region_size) {
95 if (errno == EAGAIN) {
98 error_report("vfio-ccw: write I/O region failed with errno=%d", errno);
101 ret = region->ret_code;
105 return IOINST_CC_EXPECTED;
107 return IOINST_CC_BUSY;
110 return IOINST_CC_NOT_OPERATIONAL;
113 sch_gen_unit_exception(sch);
114 css_inject_io_interrupt(sch);
115 return IOINST_CC_EXPECTED;
119 static int vfio_ccw_handle_clear(SubchDev *sch)
121 S390CCWDevice *cdev = sch->driver_data;
122 VFIOCCWDevice *vcdev = DO_UPCAST(VFIOCCWDevice, cdev, cdev);
123 struct ccw_cmd_region *region = vcdev->async_cmd_region;
126 if (!vcdev->async_cmd_region) {
127 /* Async command region not available, fall back to emulation */
131 memset(region, 0, sizeof(*region));
132 region->command = VFIO_CCW_ASYNC_CMD_CSCH;
135 ret = pwrite(vcdev->vdev.fd, region,
136 vcdev->async_cmd_region_size, vcdev->async_cmd_region_offset);
137 if (ret != vcdev->async_cmd_region_size) {
138 if (errno == EAGAIN) {
141 error_report("vfio-ccw: write cmd region failed with errno=%d", errno);
144 ret = region->ret_code;
153 sch_gen_unit_exception(sch);
154 css_inject_io_interrupt(sch);
159 static int vfio_ccw_handle_halt(SubchDev *sch)
161 S390CCWDevice *cdev = sch->driver_data;
162 VFIOCCWDevice *vcdev = DO_UPCAST(VFIOCCWDevice, cdev, cdev);
163 struct ccw_cmd_region *region = vcdev->async_cmd_region;
166 if (!vcdev->async_cmd_region) {
167 /* Async command region not available, fall back to emulation */
171 memset(region, 0, sizeof(*region));
172 region->command = VFIO_CCW_ASYNC_CMD_HSCH;
175 ret = pwrite(vcdev->vdev.fd, region,
176 vcdev->async_cmd_region_size, vcdev->async_cmd_region_offset);
177 if (ret != vcdev->async_cmd_region_size) {
178 if (errno == EAGAIN) {
181 error_report("vfio-ccw: write cmd region failed with errno=%d", errno);
184 ret = region->ret_code;
194 sch_gen_unit_exception(sch);
195 css_inject_io_interrupt(sch);
200 static void vfio_ccw_reset(DeviceState *dev)
202 CcwDevice *ccw_dev = DO_UPCAST(CcwDevice, parent_obj, dev);
203 S390CCWDevice *cdev = DO_UPCAST(S390CCWDevice, parent_obj, ccw_dev);
204 VFIOCCWDevice *vcdev = DO_UPCAST(VFIOCCWDevice, cdev, cdev);
206 ioctl(vcdev->vdev.fd, VFIO_DEVICE_RESET);
209 static void vfio_ccw_io_notifier_handler(void *opaque)
211 VFIOCCWDevice *vcdev = opaque;
212 struct ccw_io_region *region = vcdev->io_region;
213 S390CCWDevice *cdev = S390_CCW_DEVICE(vcdev);
214 CcwDevice *ccw_dev = CCW_DEVICE(cdev);
215 SubchDev *sch = ccw_dev->sch;
216 SCHIB *schib = &sch->curr_status;
221 if (!event_notifier_test_and_clear(&vcdev->io_notifier)) {
225 size = pread(vcdev->vdev.fd, region, vcdev->io_region_size,
226 vcdev->io_region_offset);
230 /* Generate a deferred cc 3 condition. */
231 schib->scsw.flags |= SCSW_FLAGS_MASK_CC;
232 schib->scsw.ctrl &= ~SCSW_CTRL_MASK_STCTL;
233 schib->scsw.ctrl |= (SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND);
236 /* Memory problem, generate channel data check. */
237 schib->scsw.ctrl &= ~SCSW_ACTL_START_PEND;
238 schib->scsw.cstat = SCSW_CSTAT_DATA_CHECK;
239 schib->scsw.ctrl &= ~SCSW_CTRL_MASK_STCTL;
240 schib->scsw.ctrl |= SCSW_STCTL_PRIMARY | SCSW_STCTL_SECONDARY |
241 SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND;
244 /* Error, generate channel program check. */
245 schib->scsw.ctrl &= ~SCSW_ACTL_START_PEND;
246 schib->scsw.cstat = SCSW_CSTAT_PROG_CHECK;
247 schib->scsw.ctrl &= ~SCSW_CTRL_MASK_STCTL;
248 schib->scsw.ctrl |= SCSW_STCTL_PRIMARY | SCSW_STCTL_SECONDARY |
249 SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND;
252 } else if (size != vcdev->io_region_size) {
253 /* Information transfer error, generate channel-control check. */
254 schib->scsw.ctrl &= ~SCSW_ACTL_START_PEND;
255 schib->scsw.cstat = SCSW_CSTAT_CHN_CTRL_CHK;
256 schib->scsw.ctrl &= ~SCSW_CTRL_MASK_STCTL;
257 schib->scsw.ctrl |= SCSW_STCTL_PRIMARY | SCSW_STCTL_SECONDARY |
258 SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND;
262 memcpy(&irb, region->irb_area, sizeof(IRB));
264 /* Update control block via irb. */
266 copy_scsw_to_guest(&s, &irb.scsw);
269 /* If a uint check is pending, copy sense data. */
270 if ((schib->scsw.dstat & SCSW_DSTAT_UNIT_CHECK) &&
271 (schib->pmcw.chars & PMCW_CHARS_MASK_CSENSE)) {
272 memcpy(sch->sense_data, irb.ecw, sizeof(irb.ecw));
276 css_inject_io_interrupt(sch);
279 static void vfio_ccw_register_io_notifier(VFIOCCWDevice *vcdev, Error **errp)
281 VFIODevice *vdev = &vcdev->vdev;
282 struct vfio_irq_info *irq_info;
286 if (vdev->num_irqs < VFIO_CCW_IO_IRQ_INDEX + 1) {
287 error_setg(errp, "vfio: unexpected number of io irqs %u",
292 argsz = sizeof(*irq_info);
293 irq_info = g_malloc0(argsz);
294 irq_info->index = VFIO_CCW_IO_IRQ_INDEX;
295 irq_info->argsz = argsz;
296 if (ioctl(vdev->fd, VFIO_DEVICE_GET_IRQ_INFO,
297 irq_info) < 0 || irq_info->count < 1) {
298 error_setg_errno(errp, errno, "vfio: Error getting irq info");
302 if (event_notifier_init(&vcdev->io_notifier, 0)) {
303 error_setg_errno(errp, errno,
304 "vfio: Unable to init event notifier for IO");
308 fd = event_notifier_get_fd(&vcdev->io_notifier);
309 qemu_set_fd_handler(fd, vfio_ccw_io_notifier_handler, NULL, vcdev);
311 if (vfio_set_irq_signaling(vdev, VFIO_CCW_IO_IRQ_INDEX, 0,
312 VFIO_IRQ_SET_ACTION_TRIGGER, fd, errp)) {
313 qemu_set_fd_handler(fd, NULL, NULL, vcdev);
314 event_notifier_cleanup(&vcdev->io_notifier);
321 static void vfio_ccw_unregister_io_notifier(VFIOCCWDevice *vcdev)
325 if (vfio_set_irq_signaling(&vcdev->vdev, VFIO_CCW_IO_IRQ_INDEX, 0,
326 VFIO_IRQ_SET_ACTION_TRIGGER, -1, &err)) {
327 error_reportf_err(err, VFIO_MSG_PREFIX, vcdev->vdev.name);
330 qemu_set_fd_handler(event_notifier_get_fd(&vcdev->io_notifier),
332 event_notifier_cleanup(&vcdev->io_notifier);
335 static void vfio_ccw_get_region(VFIOCCWDevice *vcdev, Error **errp)
337 VFIODevice *vdev = &vcdev->vdev;
338 struct vfio_region_info *info;
341 /* Sanity check device */
342 if (!(vdev->flags & VFIO_DEVICE_FLAGS_CCW)) {
343 error_setg(errp, "vfio: Um, this isn't a vfio-ccw device");
348 * We always expect at least the I/O region to be present. We also
349 * may have a variable number of regions governed by capabilities.
351 if (vdev->num_regions < VFIO_CCW_CONFIG_REGION_INDEX + 1) {
352 error_setg(errp, "vfio: too few regions (%u), expected at least %u",
353 vdev->num_regions, VFIO_CCW_CONFIG_REGION_INDEX + 1);
357 ret = vfio_get_region_info(vdev, VFIO_CCW_CONFIG_REGION_INDEX, &info);
359 error_setg_errno(errp, -ret, "vfio: Error getting config info");
363 vcdev->io_region_size = info->size;
364 if (sizeof(*vcdev->io_region) != vcdev->io_region_size) {
365 error_setg(errp, "vfio: Unexpected size of the I/O region");
370 vcdev->io_region_offset = info->offset;
371 vcdev->io_region = g_malloc0(info->size);
373 /* check for the optional async command region */
374 ret = vfio_get_dev_region_info(vdev, VFIO_REGION_TYPE_CCW,
375 VFIO_REGION_SUBTYPE_CCW_ASYNC_CMD, &info);
377 vcdev->async_cmd_region_size = info->size;
378 if (sizeof(*vcdev->async_cmd_region) != vcdev->async_cmd_region_size) {
379 error_setg(errp, "vfio: Unexpected size of the async cmd region");
380 g_free(vcdev->io_region);
384 vcdev->async_cmd_region_offset = info->offset;
385 vcdev->async_cmd_region = g_malloc0(info->size);
391 static void vfio_ccw_put_region(VFIOCCWDevice *vcdev)
393 g_free(vcdev->async_cmd_region);
394 g_free(vcdev->io_region);
397 static void vfio_ccw_put_device(VFIOCCWDevice *vcdev)
399 g_free(vcdev->vdev.name);
400 vfio_put_base_device(&vcdev->vdev);
403 static void vfio_ccw_get_device(VFIOGroup *group, VFIOCCWDevice *vcdev,
406 char *name = g_strdup_printf("%x.%x.%04x", vcdev->cdev.hostid.cssid,
407 vcdev->cdev.hostid.ssid,
408 vcdev->cdev.hostid.devid);
409 VFIODevice *vbasedev;
411 QLIST_FOREACH(vbasedev, &group->device_list, next) {
412 if (strcmp(vbasedev->name, name) == 0) {
413 error_setg(errp, "vfio: subchannel %s has already been attached",
420 * All vfio-ccw devices are believed to operate in a way compatible with
421 * memory ballooning, ie. pages pinned in the host are in the current
422 * working set of the guest driver and therefore never overlap with pages
423 * available to the guest balloon driver. This needs to be set before
424 * vfio_get_device() for vfio common to handle the balloon inhibitor.
426 vcdev->vdev.balloon_allowed = true;
428 if (vfio_get_device(group, vcdev->cdev.mdevid, &vcdev->vdev, errp)) {
432 vcdev->vdev.ops = &vfio_ccw_ops;
433 vcdev->vdev.type = VFIO_DEVICE_TYPE_CCW;
434 vcdev->vdev.name = name;
435 vcdev->vdev.dev = &vcdev->cdev.parent_obj.parent_obj;
443 static VFIOGroup *vfio_ccw_get_group(S390CCWDevice *cdev, Error **errp)
445 char *tmp, group_path[PATH_MAX];
449 tmp = g_strdup_printf("/sys/bus/css/devices/%x.%x.%04x/%s/iommu_group",
450 cdev->hostid.cssid, cdev->hostid.ssid,
451 cdev->hostid.devid, cdev->mdevid);
452 len = readlink(tmp, group_path, sizeof(group_path));
455 if (len <= 0 || len >= sizeof(group_path)) {
456 error_setg(errp, "vfio: no iommu_group found");
462 if (sscanf(basename(group_path), "%d", &groupid) != 1) {
463 error_setg(errp, "vfio: failed to read %s", group_path);
467 return vfio_get_group(groupid, &address_space_memory, errp);
470 static void vfio_ccw_realize(DeviceState *dev, Error **errp)
473 CcwDevice *ccw_dev = DO_UPCAST(CcwDevice, parent_obj, dev);
474 S390CCWDevice *cdev = DO_UPCAST(S390CCWDevice, parent_obj, ccw_dev);
475 VFIOCCWDevice *vcdev = DO_UPCAST(VFIOCCWDevice, cdev, cdev);
476 S390CCWDeviceClass *cdc = S390_CCW_DEVICE_GET_CLASS(cdev);
479 /* Call the class init function for subchannel. */
481 cdc->realize(cdev, vcdev->vdev.sysfsdev, &err);
483 goto out_err_propagate;
487 group = vfio_ccw_get_group(cdev, &err);
492 vfio_ccw_get_device(group, vcdev, &err);
497 vfio_ccw_get_region(vcdev, &err);
502 vfio_ccw_register_io_notifier(vcdev, &err);
504 goto out_notifier_err;
510 vfio_ccw_put_region(vcdev);
512 vfio_ccw_put_device(vcdev);
514 vfio_put_group(group);
516 if (cdc->unrealize) {
517 cdc->unrealize(cdev);
520 error_propagate(errp, err);
523 static void vfio_ccw_unrealize(DeviceState *dev)
525 CcwDevice *ccw_dev = DO_UPCAST(CcwDevice, parent_obj, dev);
526 S390CCWDevice *cdev = DO_UPCAST(S390CCWDevice, parent_obj, ccw_dev);
527 VFIOCCWDevice *vcdev = DO_UPCAST(VFIOCCWDevice, cdev, cdev);
528 S390CCWDeviceClass *cdc = S390_CCW_DEVICE_GET_CLASS(cdev);
529 VFIOGroup *group = vcdev->vdev.group;
531 vfio_ccw_unregister_io_notifier(vcdev);
532 vfio_ccw_put_region(vcdev);
533 vfio_ccw_put_device(vcdev);
534 vfio_put_group(group);
536 if (cdc->unrealize) {
537 cdc->unrealize(cdev);
541 static Property vfio_ccw_properties[] = {
542 DEFINE_PROP_STRING("sysfsdev", VFIOCCWDevice, vdev.sysfsdev),
543 DEFINE_PROP_BOOL("force-orb-pfch", VFIOCCWDevice, force_orb_pfch, false),
544 DEFINE_PROP_END_OF_LIST(),
547 static const VMStateDescription vfio_ccw_vmstate = {
552 static void vfio_ccw_class_init(ObjectClass *klass, void *data)
554 DeviceClass *dc = DEVICE_CLASS(klass);
555 S390CCWDeviceClass *cdc = S390_CCW_DEVICE_CLASS(klass);
557 device_class_set_props(dc, vfio_ccw_properties);
558 dc->vmsd = &vfio_ccw_vmstate;
559 dc->desc = "VFIO-based subchannel assignment";
560 set_bit(DEVICE_CATEGORY_MISC, dc->categories);
561 dc->realize = vfio_ccw_realize;
562 dc->unrealize = vfio_ccw_unrealize;
563 dc->reset = vfio_ccw_reset;
565 cdc->handle_request = vfio_ccw_handle_request;
566 cdc->handle_halt = vfio_ccw_handle_halt;
567 cdc->handle_clear = vfio_ccw_handle_clear;
570 static const TypeInfo vfio_ccw_info = {
571 .name = TYPE_VFIO_CCW,
572 .parent = TYPE_S390_CCW,
573 .instance_size = sizeof(VFIOCCWDevice),
574 .class_init = vfio_ccw_class_init,
577 static void register_vfio_ccw_type(void)
579 type_register_static(&vfio_ccw_info);
582 type_init(register_vfio_ccw_type)