#include <linux/vfio.h>
#include <sys/ioctl.h>
+#include "hw/hw.h"
#include "hw/pci/msi.h"
#include "hw/pci/msix.h"
#include "hw/pci/pci_bridge.h"
+#include "hw/qdev-properties.h"
+#include "migration/vmstate.h"
#include "qemu/error-report.h"
+#include "qemu/main-loop.h"
+#include "qemu/module.h"
#include "qemu/option.h"
#include "qemu/range.h"
#include "qemu/units.h"
#include "sysemu/kvm.h"
+#include "sysemu/runstate.h"
#include "sysemu/sysemu.h"
#include "pci.h"
#include "trace.h"
#include "qapi/error.h"
+#include "migration/blocker.h"
#define TYPE_VFIO_PCI "vfio-pci"
#define PCI_VFIO(obj) OBJECT_CHECK(VFIOPCIDevice, obj, TYPE_VFIO_PCI)
-#define TYPE_VIFO_PCI_NOHOTPLUG "vfio-pci-nohotplug"
+#define TYPE_VFIO_PCI_NOHOTPLUG "vfio-pci-nohotplug"
static void vfio_disable_interrupts(VFIOPCIDevice *vdev);
static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled);
.gsi = vdev->intx.route.irq,
.flags = KVM_IRQFD_FLAG_RESAMPLE,
};
- struct vfio_irq_set *irq_set;
- int ret, argsz;
- int32_t *pfd;
+ Error *err = NULL;
if (vdev->no_kvm_intx || !kvm_irqfds_enabled() ||
vdev->intx.route.mode != PCI_INTX_ENABLED ||
goto fail_irqfd;
}
- argsz = sizeof(*irq_set) + sizeof(*pfd);
-
- irq_set = g_malloc0(argsz);
- irq_set->argsz = argsz;
- irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_UNMASK;
- irq_set->index = VFIO_PCI_INTX_IRQ_INDEX;
- irq_set->start = 0;
- irq_set->count = 1;
- pfd = (int32_t *)&irq_set->data;
-
- *pfd = irqfd.resamplefd;
-
- ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_SET_IRQS, irq_set);
- g_free(irq_set);
- if (ret) {
- error_setg_errno(errp, -ret, "failed to setup INTx unmask fd");
+ if (vfio_set_irq_signaling(&vdev->vbasedev, VFIO_PCI_INTX_IRQ_INDEX, 0,
+ VFIO_IRQ_SET_ACTION_UNMASK,
+ irqfd.resamplefd, &err)) {
+ error_propagate(errp, err);
goto fail_vfio;
}
#endif
}
-static void vfio_intx_update(PCIDevice *pdev)
+static void vfio_intx_update(VFIOPCIDevice *vdev, PCIINTxRoute *route)
{
- VFIOPCIDevice *vdev = PCI_VFIO(pdev);
- PCIINTxRoute route;
Error *err = NULL;
- if (vdev->interrupt != VFIO_INT_INTx) {
- return;
- }
-
- route = pci_device_route_intx_to_irq(&vdev->pdev, vdev->intx.pin);
-
- if (!pci_intx_route_changed(&vdev->intx.route, &route)) {
- return; /* Nothing changed */
- }
-
trace_vfio_intx_update(vdev->vbasedev.name,
- vdev->intx.route.irq, route.irq);
+ vdev->intx.route.irq, route->irq);
vfio_intx_disable_kvm(vdev);
- vdev->intx.route = route;
+ vdev->intx.route = *route;
- if (route.mode != PCI_INTX_ENABLED) {
+ if (route->mode != PCI_INTX_ENABLED) {
return;
}
vfio_intx_eoi(&vdev->vbasedev);
}
+static void vfio_intx_routing_notifier(PCIDevice *pdev)
+{
+ VFIOPCIDevice *vdev = PCI_VFIO(pdev);
+ PCIINTxRoute route;
+
+ if (vdev->interrupt != VFIO_INT_INTx) {
+ return;
+ }
+
+ route = pci_device_route_intx_to_irq(&vdev->pdev, vdev->intx.pin);
+
+ if (pci_intx_route_changed(&vdev->intx.route, &route)) {
+ vfio_intx_update(vdev, &route);
+ }
+}
+
+static void vfio_irqchip_change(Notifier *notify, void *data)
+{
+ VFIOPCIDevice *vdev = container_of(notify, VFIOPCIDevice,
+ irqchip_change_notifier);
+
+ vfio_intx_update(vdev, &vdev->intx.route);
+}
+
static int vfio_intx_enable(VFIOPCIDevice *vdev, Error **errp)
{
uint8_t pin = vfio_pci_read_config(&vdev->pdev, PCI_INTERRUPT_PIN, 1);
- int ret, argsz, retval = 0;
- struct vfio_irq_set *irq_set;
- int32_t *pfd;
Error *err = NULL;
+ int32_t fd;
+ int ret;
+
if (!pin) {
return 0;
error_setg_errno(errp, -ret, "event_notifier_init failed");
return ret;
}
+ fd = event_notifier_get_fd(&vdev->intx.interrupt);
+ qemu_set_fd_handler(fd, vfio_intx_interrupt, NULL, vdev);
- argsz = sizeof(*irq_set) + sizeof(*pfd);
-
- irq_set = g_malloc0(argsz);
- irq_set->argsz = argsz;
- irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER;
- irq_set->index = VFIO_PCI_INTX_IRQ_INDEX;
- irq_set->start = 0;
- irq_set->count = 1;
- pfd = (int32_t *)&irq_set->data;
-
- *pfd = event_notifier_get_fd(&vdev->intx.interrupt);
- qemu_set_fd_handler(*pfd, vfio_intx_interrupt, NULL, vdev);
-
- ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_SET_IRQS, irq_set);
- if (ret) {
- error_setg_errno(errp, -ret, "failed to setup INTx fd");
- qemu_set_fd_handler(*pfd, NULL, NULL, vdev);
+ if (vfio_set_irq_signaling(&vdev->vbasedev, VFIO_PCI_INTX_IRQ_INDEX, 0,
+ VFIO_IRQ_SET_ACTION_TRIGGER, fd, &err)) {
+ error_propagate(errp, err);
+ qemu_set_fd_handler(fd, NULL, NULL, vdev);
event_notifier_cleanup(&vdev->intx.interrupt);
- retval = -errno;
- goto cleanup;
+ return -errno;
}
vfio_intx_enable_kvm(vdev, &err);
vdev->interrupt = VFIO_INT_INTx;
trace_vfio_intx_enable(vdev->vbasedev.name);
-
-cleanup:
- g_free(irq_set);
-
- return retval;
+ return 0;
}
static void vfio_intx_disable(VFIOPCIDevice *vdev)
error_report("vfio: failed to enable vectors, %d", ret);
}
} else {
- int argsz;
- struct vfio_irq_set *irq_set;
- int32_t *pfd;
-
- argsz = sizeof(*irq_set) + sizeof(*pfd);
-
- irq_set = g_malloc0(argsz);
- irq_set->argsz = argsz;
- irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD |
- VFIO_IRQ_SET_ACTION_TRIGGER;
- irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
- irq_set->start = nr;
- irq_set->count = 1;
- pfd = (int32_t *)&irq_set->data;
+ Error *err = NULL;
+ int32_t fd;
if (vector->virq >= 0) {
- *pfd = event_notifier_get_fd(&vector->kvm_interrupt);
+ fd = event_notifier_get_fd(&vector->kvm_interrupt);
} else {
- *pfd = event_notifier_get_fd(&vector->interrupt);
+ fd = event_notifier_get_fd(&vector->interrupt);
}
- ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_SET_IRQS, irq_set);
- g_free(irq_set);
- if (ret) {
- error_report("vfio: failed to modify vector, %d", ret);
+ if (vfio_set_irq_signaling(&vdev->vbasedev,
+ VFIO_PCI_MSIX_IRQ_INDEX, nr,
+ VFIO_IRQ_SET_ACTION_TRIGGER, fd, &err)) {
+ error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
}
}
* be re-asserted on unmask. Nothing to do if already using QEMU mode.
*/
if (vector->virq >= 0) {
- int argsz;
- struct vfio_irq_set *irq_set;
- int32_t *pfd;
-
- argsz = sizeof(*irq_set) + sizeof(*pfd);
-
- irq_set = g_malloc0(argsz);
- irq_set->argsz = argsz;
- irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD |
- VFIO_IRQ_SET_ACTION_TRIGGER;
- irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
- irq_set->start = nr;
- irq_set->count = 1;
- pfd = (int32_t *)&irq_set->data;
-
- *pfd = event_notifier_get_fd(&vector->interrupt);
+ int32_t fd = event_notifier_get_fd(&vector->interrupt);
+ Error *err = NULL;
- ioctl(vdev->vbasedev.fd, VFIO_DEVICE_SET_IRQS, irq_set);
-
- g_free(irq_set);
+ if (vfio_set_irq_signaling(&vdev->vbasedev, VFIO_PCI_MSIX_IRQ_INDEX, nr,
+ VFIO_IRQ_SET_ACTION_TRIGGER, fd, &err)) {
+ error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
+ }
}
}
}
g_free(vdev->msi_vectors);
+ vdev->msi_vectors = NULL;
if (ret > 0 && ret != vdev->nr_vectors) {
vdev->nr_vectors = ret;
if (vdev->vendor_id == PCI_VENDOR_ID_CHELSIO &&
(vdev->device_id & 0xff00) == 0x5800) {
msix->pba_offset = 0x1000;
- } else {
+ } else if (vdev->msix_relo == OFF_AUTOPCIBAR_OFF) {
error_setg(errp, "hardware reports invalid configuration, "
"MSIX PBA outside of specified BAR");
g_free(msix);
case 0: /* kernel masked capability */
case PCI_EXT_CAP_ID_SRIOV: /* Read-only VF BARs confuse OVMF */
case PCI_EXT_CAP_ID_ARI: /* XXX Needs next function virtualization */
+ case PCI_EXT_CAP_ID_REBAR: /* Can't expose read-only */
trace_vfio_add_ext_cap_dropped(vdev->vbasedev.name, cap_id, next);
break;
default:
*/
static void vfio_register_err_notifier(VFIOPCIDevice *vdev)
{
- int ret;
- int argsz;
- struct vfio_irq_set *irq_set;
- int32_t *pfd;
+ Error *err = NULL;
+ int32_t fd;
if (!vdev->pci_aer) {
return;
return;
}
- argsz = sizeof(*irq_set) + sizeof(*pfd);
-
- irq_set = g_malloc0(argsz);
- irq_set->argsz = argsz;
- irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD |
- VFIO_IRQ_SET_ACTION_TRIGGER;
- irq_set->index = VFIO_PCI_ERR_IRQ_INDEX;
- irq_set->start = 0;
- irq_set->count = 1;
- pfd = (int32_t *)&irq_set->data;
-
- *pfd = event_notifier_get_fd(&vdev->err_notifier);
- qemu_set_fd_handler(*pfd, vfio_err_notifier_handler, NULL, vdev);
+ fd = event_notifier_get_fd(&vdev->err_notifier);
+ qemu_set_fd_handler(fd, vfio_err_notifier_handler, NULL, vdev);
- ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_SET_IRQS, irq_set);
- if (ret) {
- error_report("vfio: Failed to set up error notification");
- qemu_set_fd_handler(*pfd, NULL, NULL, vdev);
+ if (vfio_set_irq_signaling(&vdev->vbasedev, VFIO_PCI_ERR_IRQ_INDEX, 0,
+ VFIO_IRQ_SET_ACTION_TRIGGER, fd, &err)) {
+ error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
+ qemu_set_fd_handler(fd, NULL, NULL, vdev);
event_notifier_cleanup(&vdev->err_notifier);
vdev->pci_aer = false;
}
- g_free(irq_set);
}
static void vfio_unregister_err_notifier(VFIOPCIDevice *vdev)
{
- int argsz;
- struct vfio_irq_set *irq_set;
- int32_t *pfd;
- int ret;
+ Error *err = NULL;
if (!vdev->pci_aer) {
return;
}
- argsz = sizeof(*irq_set) + sizeof(*pfd);
-
- irq_set = g_malloc0(argsz);
- irq_set->argsz = argsz;
- irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD |
- VFIO_IRQ_SET_ACTION_TRIGGER;
- irq_set->index = VFIO_PCI_ERR_IRQ_INDEX;
- irq_set->start = 0;
- irq_set->count = 1;
- pfd = (int32_t *)&irq_set->data;
- *pfd = -1;
-
- ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_SET_IRQS, irq_set);
- if (ret) {
- error_report("vfio: Failed to de-assign error fd: %m");
+ if (vfio_set_irq_signaling(&vdev->vbasedev, VFIO_PCI_ERR_IRQ_INDEX, 0,
+ VFIO_IRQ_SET_ACTION_TRIGGER, -1, &err)) {
+ error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
}
- g_free(irq_set);
qemu_set_fd_handler(event_notifier_get_fd(&vdev->err_notifier),
NULL, NULL, vdev);
event_notifier_cleanup(&vdev->err_notifier);
{
struct vfio_irq_info irq_info = { .argsz = sizeof(irq_info),
.index = VFIO_PCI_REQ_IRQ_INDEX };
- int argsz;
- struct vfio_irq_set *irq_set;
- int32_t *pfd;
+ Error *err = NULL;
+ int32_t fd;
if (!(vdev->features & VFIO_FEATURE_ENABLE_REQ)) {
return;
return;
}
- argsz = sizeof(*irq_set) + sizeof(*pfd);
+ fd = event_notifier_get_fd(&vdev->req_notifier);
+ qemu_set_fd_handler(fd, vfio_req_notifier_handler, NULL, vdev);
- irq_set = g_malloc0(argsz);
- irq_set->argsz = argsz;
- irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD |
- VFIO_IRQ_SET_ACTION_TRIGGER;
- irq_set->index = VFIO_PCI_REQ_IRQ_INDEX;
- irq_set->start = 0;
- irq_set->count = 1;
- pfd = (int32_t *)&irq_set->data;
-
- *pfd = event_notifier_get_fd(&vdev->req_notifier);
- qemu_set_fd_handler(*pfd, vfio_req_notifier_handler, NULL, vdev);
-
- if (ioctl(vdev->vbasedev.fd, VFIO_DEVICE_SET_IRQS, irq_set)) {
- error_report("vfio: Failed to set up device request notification");
- qemu_set_fd_handler(*pfd, NULL, NULL, vdev);
+ if (vfio_set_irq_signaling(&vdev->vbasedev, VFIO_PCI_REQ_IRQ_INDEX, 0,
+ VFIO_IRQ_SET_ACTION_TRIGGER, fd, &err)) {
+ error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
+ qemu_set_fd_handler(fd, NULL, NULL, vdev);
event_notifier_cleanup(&vdev->req_notifier);
} else {
vdev->req_enabled = true;
}
-
- g_free(irq_set);
}
static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev)
{
- int argsz;
- struct vfio_irq_set *irq_set;
- int32_t *pfd;
+ Error *err = NULL;
if (!vdev->req_enabled) {
return;
}
- argsz = sizeof(*irq_set) + sizeof(*pfd);
-
- irq_set = g_malloc0(argsz);
- irq_set->argsz = argsz;
- irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD |
- VFIO_IRQ_SET_ACTION_TRIGGER;
- irq_set->index = VFIO_PCI_REQ_IRQ_INDEX;
- irq_set->start = 0;
- irq_set->count = 1;
- pfd = (int32_t *)&irq_set->data;
- *pfd = -1;
-
- if (ioctl(vdev->vbasedev.fd, VFIO_DEVICE_SET_IRQS, irq_set)) {
- error_report("vfio: Failed to de-assign device request fd: %m");
+ if (vfio_set_irq_signaling(&vdev->vbasedev, VFIO_PCI_REQ_IRQ_INDEX, 0,
+ VFIO_IRQ_SET_ACTION_TRIGGER, -1, &err)) {
+ error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
}
- g_free(irq_set);
qemu_set_fd_handler(event_notifier_get_fd(&vdev->req_notifier),
NULL, NULL, vdev);
event_notifier_cleanup(&vdev->req_notifier);
return;
}
+ if (!pdev->failover_pair_id) {
+ error_setg(&vdev->migration_blocker,
+ "VFIO device doesn't support migration");
+ ret = migrate_add_blocker(vdev->migration_blocker, &err);
+ if (ret) {
+ error_propagate(errp, err);
+ error_free(vdev->migration_blocker);
+ vdev->migration_blocker = NULL;
+ return;
+ }
+ }
+
vdev->vbasedev.name = g_path_get_basename(vdev->vbasedev.sysfsdev);
vdev->vbasedev.ops = &vfio_pci_ops;
vdev->vbasedev.type = VFIO_DEVICE_TYPE_PCI;
if (vfio_pci_read_config(&vdev->pdev, PCI_INTERRUPT_PIN, 1)) {
vdev->intx.mmap_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
vfio_intx_mmap_enable, vdev);
- pci_device_set_intx_routing_notifier(&vdev->pdev, vfio_intx_update);
+ pci_device_set_intx_routing_notifier(&vdev->pdev,
+ vfio_intx_routing_notifier);
+ vdev->irqchip_change_notifier.notify = vfio_irqchip_change;
+ kvm_irqchip_add_change_notifier(&vdev->irqchip_change_notifier);
ret = vfio_intx_enable(vdev, errp);
if (ret) {
- goto out_teardown;
+ goto out_deregister;
}
}
if (vdev->display != ON_OFF_AUTO_OFF) {
ret = vfio_display_probe(vdev, errp);
if (ret) {
- goto out_teardown;
+ goto out_deregister;
}
}
if (vdev->enable_ramfb && vdev->dpy == NULL) {
error_setg(errp, "ramfb=on requires display=on");
- goto out_teardown;
+ goto out_deregister;
}
if (vdev->display_xres || vdev->display_yres) {
if (vdev->dpy == NULL) {
error_setg(errp, "xres and yres properties require display=on");
- goto out_teardown;
+ goto out_deregister;
}
if (vdev->dpy->edid_regs == NULL) {
error_setg(errp, "xres and yres properties need edid support");
- goto out_teardown;
+ goto out_deregister;
}
}
return;
-out_teardown:
+out_deregister:
pci_device_set_intx_routing_notifier(&vdev->pdev, NULL);
+ kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier);
+out_teardown:
vfio_teardown_msi(vdev);
vfio_bars_exit(vdev);
error:
error_prepend(errp, VFIO_MSG_PREFIX, vdev->vbasedev.name);
+ if (vdev->migration_blocker) {
+ migrate_del_blocker(vdev->migration_blocker);
+ error_free(vdev->migration_blocker);
+ vdev->migration_blocker = NULL;
+ }
}
static void vfio_instance_finalize(Object *obj)
vfio_bars_finalize(vdev);
g_free(vdev->emulated_config_bits);
g_free(vdev->rom);
+ if (vdev->migration_blocker) {
+ migrate_del_blocker(vdev->migration_blocker);
+ error_free(vdev->migration_blocker);
+ }
/*
* XXX Leaking igd_opregion is not an oversight, we can't remove the
* fw_cfg entry therefore leaking this allocation seems like the safest
vfio_unregister_req_notifier(vdev);
vfio_unregister_err_notifier(vdev);
pci_device_set_intx_routing_notifier(&vdev->pdev, NULL);
+ if (vdev->irqchip_change_notifier.notify) {
+ kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier);
+ }
vfio_disable_interrupts(vdev);
if (vdev->intx.mmap_timer) {
timer_free(vdev->intx.mmap_timer);
DEFINE_PROP_END_OF_LIST(),
};
-static const VMStateDescription vfio_pci_vmstate = {
- .name = "vfio-pci",
- .unmigratable = 1,
-};
-
static void vfio_pci_dev_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
PCIDeviceClass *pdc = PCI_DEVICE_CLASS(klass);
dc->reset = vfio_pci_reset;
- dc->props = vfio_pci_dev_properties;
- dc->vmsd = &vfio_pci_vmstate;
+ device_class_set_props(dc, vfio_pci_dev_properties);
dc->desc = "VFIO-based PCI device assignment";
set_bit(DEVICE_CATEGORY_MISC, dc->categories);
pdc->realize = vfio_realize;
{
DeviceClass *dc = DEVICE_CLASS(klass);
- dc->props = vfio_pci_dev_nohotplug_properties;
+ device_class_set_props(dc, vfio_pci_dev_nohotplug_properties);
dc->hotpluggable = false;
}
static const TypeInfo vfio_pci_nohotplug_dev_info = {
- .name = TYPE_VIFO_PCI_NOHOTPLUG,
+ .name = TYPE_VFIO_PCI_NOHOTPLUG,
.parent = TYPE_VFIO_PCI,
.instance_size = sizeof(VFIOPCIDevice),
.class_init = vfio_pci_nohotplug_dev_class_init,