#include <linux/vfio.h>
#include <sys/ioctl.h>
+#include "hw/hw.h"
#include "hw/pci/msi.h"
#include "hw/pci/msix.h"
#include "hw/pci/pci_bridge.h"
+#include "migration/vmstate.h"
#include "qemu/error-report.h"
+#include "qemu/main-loop.h"
+#include "qemu/module.h"
#include "qemu/option.h"
#include "qemu/range.h"
#include "qemu/units.h"
#include "trace.h"
#include "qapi/error.h"
-#define MSIX_CAP_LENGTH 12
-
#define TYPE_VFIO_PCI "vfio-pci"
#define PCI_VFIO(obj) OBJECT_CHECK(VFIOPCIDevice, obj, TYPE_VFIO_PCI)
+#define TYPE_VIFO_PCI_NOHOTPLUG "vfio-pci-nohotplug"
+
static void vfio_disable_interrupts(VFIOPCIDevice *vdev);
static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled);
.gsi = vdev->intx.route.irq,
.flags = KVM_IRQFD_FLAG_RESAMPLE,
};
- struct vfio_irq_set *irq_set;
- int ret, argsz;
- int32_t *pfd;
+ Error *err = NULL;
if (vdev->no_kvm_intx || !kvm_irqfds_enabled() ||
vdev->intx.route.mode != PCI_INTX_ENABLED ||
goto fail_irqfd;
}
- argsz = sizeof(*irq_set) + sizeof(*pfd);
-
- irq_set = g_malloc0(argsz);
- irq_set->argsz = argsz;
- irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_UNMASK;
- irq_set->index = VFIO_PCI_INTX_IRQ_INDEX;
- irq_set->start = 0;
- irq_set->count = 1;
- pfd = (int32_t *)&irq_set->data;
-
- *pfd = irqfd.resamplefd;
-
- ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_SET_IRQS, irq_set);
- g_free(irq_set);
- if (ret) {
- error_setg_errno(errp, -ret, "failed to setup INTx unmask fd");
+ if (vfio_set_irq_signaling(&vdev->vbasedev, VFIO_PCI_INTX_IRQ_INDEX, 0,
+ VFIO_IRQ_SET_ACTION_UNMASK,
+ irqfd.resamplefd, &err)) {
+ error_propagate(errp, err);
goto fail_vfio;
}
static int vfio_intx_enable(VFIOPCIDevice *vdev, Error **errp)
{
uint8_t pin = vfio_pci_read_config(&vdev->pdev, PCI_INTERRUPT_PIN, 1);
- int ret, argsz, retval = 0;
- struct vfio_irq_set *irq_set;
- int32_t *pfd;
Error *err = NULL;
+ int32_t fd;
+ int ret;
+
if (!pin) {
return 0;
error_setg_errno(errp, -ret, "event_notifier_init failed");
return ret;
}
+ fd = event_notifier_get_fd(&vdev->intx.interrupt);
+ qemu_set_fd_handler(fd, vfio_intx_interrupt, NULL, vdev);
- argsz = sizeof(*irq_set) + sizeof(*pfd);
-
- irq_set = g_malloc0(argsz);
- irq_set->argsz = argsz;
- irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER;
- irq_set->index = VFIO_PCI_INTX_IRQ_INDEX;
- irq_set->start = 0;
- irq_set->count = 1;
- pfd = (int32_t *)&irq_set->data;
-
- *pfd = event_notifier_get_fd(&vdev->intx.interrupt);
- qemu_set_fd_handler(*pfd, vfio_intx_interrupt, NULL, vdev);
-
- ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_SET_IRQS, irq_set);
- if (ret) {
- error_setg_errno(errp, -ret, "failed to setup INTx fd");
- qemu_set_fd_handler(*pfd, NULL, NULL, vdev);
+ if (vfio_set_irq_signaling(&vdev->vbasedev, VFIO_PCI_INTX_IRQ_INDEX, 0,
+ VFIO_IRQ_SET_ACTION_TRIGGER, fd, &err)) {
+ error_propagate(errp, err);
+ qemu_set_fd_handler(fd, NULL, NULL, vdev);
event_notifier_cleanup(&vdev->intx.interrupt);
- retval = -errno;
- goto cleanup;
+ return -errno;
}
vfio_intx_enable_kvm(vdev, &err);
vdev->interrupt = VFIO_INT_INTx;
trace_vfio_intx_enable(vdev->vbasedev.name);
-
-cleanup:
- g_free(irq_set);
-
- return retval;
+ return 0;
}
static void vfio_intx_disable(VFIOPCIDevice *vdev)
error_report("vfio: failed to enable vectors, %d", ret);
}
} else {
- int argsz;
- struct vfio_irq_set *irq_set;
- int32_t *pfd;
-
- argsz = sizeof(*irq_set) + sizeof(*pfd);
-
- irq_set = g_malloc0(argsz);
- irq_set->argsz = argsz;
- irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD |
- VFIO_IRQ_SET_ACTION_TRIGGER;
- irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
- irq_set->start = nr;
- irq_set->count = 1;
- pfd = (int32_t *)&irq_set->data;
+ Error *err = NULL;
+ int32_t fd;
if (vector->virq >= 0) {
- *pfd = event_notifier_get_fd(&vector->kvm_interrupt);
+ fd = event_notifier_get_fd(&vector->kvm_interrupt);
} else {
- *pfd = event_notifier_get_fd(&vector->interrupt);
+ fd = event_notifier_get_fd(&vector->interrupt);
}
- ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_SET_IRQS, irq_set);
- g_free(irq_set);
- if (ret) {
- error_report("vfio: failed to modify vector, %d", ret);
+ if (vfio_set_irq_signaling(&vdev->vbasedev,
+ VFIO_PCI_MSIX_IRQ_INDEX, nr,
+ VFIO_IRQ_SET_ACTION_TRIGGER, fd, &err)) {
+ error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
}
}
* be re-asserted on unmask. Nothing to do if already using QEMU mode.
*/
if (vector->virq >= 0) {
- int argsz;
- struct vfio_irq_set *irq_set;
- int32_t *pfd;
-
- argsz = sizeof(*irq_set) + sizeof(*pfd);
-
- irq_set = g_malloc0(argsz);
- irq_set->argsz = argsz;
- irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD |
- VFIO_IRQ_SET_ACTION_TRIGGER;
- irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
- irq_set->start = nr;
- irq_set->count = 1;
- pfd = (int32_t *)&irq_set->data;
+ int32_t fd = event_notifier_get_fd(&vector->interrupt);
+ Error *err = NULL;
- *pfd = event_notifier_get_fd(&vector->interrupt);
-
- ioctl(vdev->vbasedev.fd, VFIO_DEVICE_SET_IRQS, irq_set);
-
- g_free(irq_set);
+ if (vfio_set_irq_signaling(&vdev->vbasedev, VFIO_PCI_MSIX_IRQ_INDEX, nr,
+ VFIO_IRQ_SET_ACTION_TRIGGER, fd, &err)) {
+ error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
+ }
}
}
if (vdev->pdev.romfile || !vdev->pdev.rom_bar) {
/* Since pci handles romfile, just print a message and return */
if (vfio_blacklist_opt_rom(vdev) && vdev->pdev.romfile) {
- error_printf("Warning : Device at %s is known to cause system instability issues during option rom execution. Proceeding anyway since user specified romfile\n",
- vdev->vbasedev.name);
+ warn_report("Device at %s is known to cause system instability"
+ " issues during option rom execution",
+ vdev->vbasedev.name);
+ error_printf("Proceeding anyway since user specified romfile\n");
}
return;
}
if (vfio_blacklist_opt_rom(vdev)) {
if (dev->opts && qemu_opt_get(dev->opts, "rombar")) {
- error_printf("Warning : Device at %s is known to cause system instability issues during option rom execution. Proceeding anyway since user specified non zero value for rombar\n",
- vdev->vbasedev.name);
+ warn_report("Device at %s is known to cause system instability"
+ " issues during option rom execution",
+ vdev->vbasedev.name);
+ error_printf("Proceeding anyway since user specified"
+ " non zero value for rombar\n");
} else {
- error_printf("Warning : Rom loading for device at %s has been disabled due to system instability issues. Specify rombar=1 or romfile to force\n",
- vdev->vbasedev.name);
+ warn_report("Rom loading for device at %s has been disabled"
+ " due to system instability issues",
+ vdev->vbasedev.name);
+ error_printf("Specify rombar=1 or romfile to force\n");
return;
}
}
if (vdev->vendor_id == PCI_VENDOR_ID_CHELSIO &&
(vdev->device_id & 0xff00) == 0x5800) {
msix->pba_offset = 0x1000;
- } else {
+ } else if (vdev->msix_relo == OFF_AUTOPCIBAR_OFF) {
error_setg(errp, "hardware reports invalid configuration, "
"MSIX PBA outside of specified BAR");
g_free(msix);
case 0: /* kernel masked capability */
case PCI_EXT_CAP_ID_SRIOV: /* Read-only VF BARs confuse OVMF */
case PCI_EXT_CAP_ID_ARI: /* XXX Needs next function virtualization */
+ case PCI_EXT_CAP_ID_REBAR: /* Can't expose read-only */
trace_vfio_add_ext_cap_dropped(vdev->vbasedev.name, cap_id, next);
break;
default:
*/
static void vfio_register_err_notifier(VFIOPCIDevice *vdev)
{
- int ret;
- int argsz;
- struct vfio_irq_set *irq_set;
- int32_t *pfd;
+ Error *err = NULL;
+ int32_t fd;
if (!vdev->pci_aer) {
return;
return;
}
- argsz = sizeof(*irq_set) + sizeof(*pfd);
+ fd = event_notifier_get_fd(&vdev->err_notifier);
+ qemu_set_fd_handler(fd, vfio_err_notifier_handler, NULL, vdev);
- irq_set = g_malloc0(argsz);
- irq_set->argsz = argsz;
- irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD |
- VFIO_IRQ_SET_ACTION_TRIGGER;
- irq_set->index = VFIO_PCI_ERR_IRQ_INDEX;
- irq_set->start = 0;
- irq_set->count = 1;
- pfd = (int32_t *)&irq_set->data;
-
- *pfd = event_notifier_get_fd(&vdev->err_notifier);
- qemu_set_fd_handler(*pfd, vfio_err_notifier_handler, NULL, vdev);
-
- ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_SET_IRQS, irq_set);
- if (ret) {
- error_report("vfio: Failed to set up error notification");
- qemu_set_fd_handler(*pfd, NULL, NULL, vdev);
+ if (vfio_set_irq_signaling(&vdev->vbasedev, VFIO_PCI_ERR_IRQ_INDEX, 0,
+ VFIO_IRQ_SET_ACTION_TRIGGER, fd, &err)) {
+ error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
+ qemu_set_fd_handler(fd, NULL, NULL, vdev);
event_notifier_cleanup(&vdev->err_notifier);
vdev->pci_aer = false;
}
- g_free(irq_set);
}
static void vfio_unregister_err_notifier(VFIOPCIDevice *vdev)
{
- int argsz;
- struct vfio_irq_set *irq_set;
- int32_t *pfd;
- int ret;
+ Error *err = NULL;
if (!vdev->pci_aer) {
return;
}
- argsz = sizeof(*irq_set) + sizeof(*pfd);
-
- irq_set = g_malloc0(argsz);
- irq_set->argsz = argsz;
- irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD |
- VFIO_IRQ_SET_ACTION_TRIGGER;
- irq_set->index = VFIO_PCI_ERR_IRQ_INDEX;
- irq_set->start = 0;
- irq_set->count = 1;
- pfd = (int32_t *)&irq_set->data;
- *pfd = -1;
-
- ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_SET_IRQS, irq_set);
- if (ret) {
- error_report("vfio: Failed to de-assign error fd: %m");
+ if (vfio_set_irq_signaling(&vdev->vbasedev, VFIO_PCI_ERR_IRQ_INDEX, 0,
+ VFIO_IRQ_SET_ACTION_TRIGGER, -1, &err)) {
+ error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
}
- g_free(irq_set);
qemu_set_fd_handler(event_notifier_get_fd(&vdev->err_notifier),
NULL, NULL, vdev);
event_notifier_cleanup(&vdev->err_notifier);
return;
}
- qdev_unplug(&vdev->pdev.qdev, &err);
+ qdev_unplug(DEVICE(vdev), &err);
if (err) {
warn_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
}
{
struct vfio_irq_info irq_info = { .argsz = sizeof(irq_info),
.index = VFIO_PCI_REQ_IRQ_INDEX };
- int argsz;
- struct vfio_irq_set *irq_set;
- int32_t *pfd;
+ Error *err = NULL;
+ int32_t fd;
if (!(vdev->features & VFIO_FEATURE_ENABLE_REQ)) {
return;
return;
}
- argsz = sizeof(*irq_set) + sizeof(*pfd);
+ fd = event_notifier_get_fd(&vdev->req_notifier);
+ qemu_set_fd_handler(fd, vfio_req_notifier_handler, NULL, vdev);
- irq_set = g_malloc0(argsz);
- irq_set->argsz = argsz;
- irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD |
- VFIO_IRQ_SET_ACTION_TRIGGER;
- irq_set->index = VFIO_PCI_REQ_IRQ_INDEX;
- irq_set->start = 0;
- irq_set->count = 1;
- pfd = (int32_t *)&irq_set->data;
-
- *pfd = event_notifier_get_fd(&vdev->req_notifier);
- qemu_set_fd_handler(*pfd, vfio_req_notifier_handler, NULL, vdev);
-
- if (ioctl(vdev->vbasedev.fd, VFIO_DEVICE_SET_IRQS, irq_set)) {
- error_report("vfio: Failed to set up device request notification");
- qemu_set_fd_handler(*pfd, NULL, NULL, vdev);
+ if (vfio_set_irq_signaling(&vdev->vbasedev, VFIO_PCI_REQ_IRQ_INDEX, 0,
+ VFIO_IRQ_SET_ACTION_TRIGGER, fd, &err)) {
+ error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
+ qemu_set_fd_handler(fd, NULL, NULL, vdev);
event_notifier_cleanup(&vdev->req_notifier);
} else {
vdev->req_enabled = true;
}
-
- g_free(irq_set);
}
static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev)
{
- int argsz;
- struct vfio_irq_set *irq_set;
- int32_t *pfd;
+ Error *err = NULL;
if (!vdev->req_enabled) {
return;
}
- argsz = sizeof(*irq_set) + sizeof(*pfd);
-
- irq_set = g_malloc0(argsz);
- irq_set->argsz = argsz;
- irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD |
- VFIO_IRQ_SET_ACTION_TRIGGER;
- irq_set->index = VFIO_PCI_REQ_IRQ_INDEX;
- irq_set->start = 0;
- irq_set->count = 1;
- pfd = (int32_t *)&irq_set->data;
- *pfd = -1;
-
- if (ioctl(vdev->vbasedev.fd, VFIO_DEVICE_SET_IRQS, irq_set)) {
- error_report("vfio: Failed to de-assign device request fd: %m");
+ if (vfio_set_irq_signaling(&vdev->vbasedev, VFIO_PCI_REQ_IRQ_INDEX, 0,
+ VFIO_IRQ_SET_ACTION_TRIGGER, -1, &err)) {
+ error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
}
- g_free(irq_set);
qemu_set_fd_handler(event_notifier_get_fd(&vdev->req_notifier),
NULL, NULL, vdev);
event_notifier_cleanup(&vdev->req_notifier);
vdev->vbasedev.name = g_path_get_basename(vdev->vbasedev.sysfsdev);
vdev->vbasedev.ops = &vfio_pci_ops;
vdev->vbasedev.type = VFIO_DEVICE_TYPE_PCI;
- vdev->vbasedev.dev = &vdev->pdev.qdev;
+ vdev->vbasedev.dev = DEVICE(vdev);
tmp = g_strdup_printf("%s/iommu_group", vdev->vbasedev.sysfsdev);
len = readlink(tmp, group_path, sizeof(group_path));
}
}
+ if (vdev->vendor_id == PCI_VENDOR_ID_NVIDIA) {
+ ret = vfio_pci_nvidia_v100_ram_init(vdev, errp);
+ if (ret && ret != -ENODEV) {
+ error_report("Failed to setup NVIDIA V100 GPU RAM");
+ }
+ }
+
+ if (vdev->vendor_id == PCI_VENDOR_ID_IBM) {
+ ret = vfio_pci_nvlink2_init(vdev, errp);
+ if (ret && ret != -ENODEV) {
+ error_report("Failed to setup NVlink2 bridge");
+ }
+ }
+
vfio_register_err_notifier(vdev);
vfio_register_req_notifier(vdev);
vfio_setup_resetfn_quirk(vdev);
}
static const TypeInfo vfio_pci_nohotplug_dev_info = {
- .name = "vfio-pci-nohotplug",
- .parent = "vfio-pci",
+ .name = TYPE_VIFO_PCI_NOHOTPLUG,
+ .parent = TYPE_VFIO_PCI,
.instance_size = sizeof(VFIOPCIDevice),
.class_init = vfio_pci_nohotplug_dev_class_init,
};