X-Git-Url: https://repo.jachan.dev/qemu.git/blobdiff_plain/8e36d27c5a7b99a7adb40cfda2f92b1d97216e84..c60807dea5930a402b77172da17e79b26f1af48b:/hw/vfio/pci.c diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 18c493b49e..6520c05dee 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -26,15 +26,20 @@ #include "hw/pci/msix.h" #include "hw/pci/pci_bridge.h" #include "qemu/error-report.h" +#include "qemu/module.h" #include "qemu/option.h" #include "qemu/range.h" +#include "qemu/units.h" #include "sysemu/kvm.h" #include "sysemu/sysemu.h" #include "pci.h" #include "trace.h" #include "qapi/error.h" -#define MSIX_CAP_LENGTH 12 +#define TYPE_VFIO_PCI "vfio-pci" +#define PCI_VFIO(obj) OBJECT_CHECK(VFIOPCIDevice, obj, TYPE_VFIO_PCI) + +#define TYPE_VIFO_PCI_NOHOTPLUG "vfio-pci-nohotplug" static void vfio_disable_interrupts(VFIOPCIDevice *vdev); static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled); @@ -221,7 +226,7 @@ static void vfio_intx_disable_kvm(VFIOPCIDevice *vdev) static void vfio_intx_update(PCIDevice *pdev) { - VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev); + VFIOPCIDevice *vdev = PCI_VFIO(pdev); PCIINTxRoute route; Error *err = NULL; @@ -248,7 +253,7 @@ static void vfio_intx_update(PCIDevice *pdev) vfio_intx_enable_kvm(vdev, &err); if (err) { - error_reportf_err(err, WARN_PREFIX, vdev->vbasedev.name); + warn_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name); } /* Re-enable the interrupt in cased we missed an EOI */ @@ -313,7 +318,7 @@ static int vfio_intx_enable(VFIOPCIDevice *vdev, Error **errp) vfio_intx_enable_kvm(vdev, &err); if (err) { - error_reportf_err(err, WARN_PREFIX, vdev->vbasedev.name); + warn_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name); } vdev->interrupt = VFIO_INT_INTx; @@ -476,7 +481,7 @@ static void vfio_update_kvm_msi_virq(VFIOMSIVector *vector, MSIMessage msg, static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr, MSIMessage *msg, IOHandler *handler) { - VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev); + VFIOPCIDevice *vdev = PCI_VFIO(pdev); VFIOMSIVector *vector; int ret; @@ -573,7 +578,7 @@ static int vfio_msix_vector_use(PCIDevice *pdev, static void vfio_msix_vector_release(PCIDevice *pdev, unsigned int nr) { - VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev); + VFIOPCIDevice *vdev = PCI_VFIO(pdev); VFIOMSIVector *vector = &vdev->msi_vectors[nr]; trace_vfio_msix_vector_release(vdev->vbasedev.name, nr); @@ -741,7 +746,7 @@ static void vfio_msi_disable_common(VFIOPCIDevice *vdev) vfio_intx_enable(vdev, &err); if (err) { - error_reportf_err(err, ERR_PREFIX, vdev->vbasedev.name); + error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name); } } @@ -943,8 +948,10 @@ static void vfio_pci_size_rom(VFIOPCIDevice *vdev) if (vdev->pdev.romfile || !vdev->pdev.rom_bar) { /* Since pci handles romfile, just print a message and return */ if (vfio_blacklist_opt_rom(vdev) && vdev->pdev.romfile) { - error_printf("Warning : Device at %s is known to cause system instability issues during option rom execution. Proceeding anyway since user specified romfile\n", - vdev->vbasedev.name); + warn_report("Device at %s is known to cause system instability" + " issues during option rom execution", + vdev->vbasedev.name); + error_printf("Proceeding anyway since user specified romfile\n"); } return; } @@ -969,11 +976,16 @@ static void vfio_pci_size_rom(VFIOPCIDevice *vdev) if (vfio_blacklist_opt_rom(vdev)) { if (dev->opts && qemu_opt_get(dev->opts, "rombar")) { - error_printf("Warning : Device at %s is known to cause system instability issues during option rom execution. Proceeding anyway since user specified non zero value for rombar\n", - vdev->vbasedev.name); + warn_report("Device at %s is known to cause system instability" + " issues during option rom execution", + vdev->vbasedev.name); + error_printf("Proceeding anyway since user specified" + " non zero value for rombar\n"); } else { - error_printf("Warning : Rom loading for device at %s has been disabled due to system instability issues. Specify rombar=1 or romfile to force\n", - vdev->vbasedev.name); + warn_report("Rom loading for device at %s has been disabled" + " due to system instability issues", + vdev->vbasedev.name); + error_printf("Specify rombar=1 or romfile to force\n"); return; } } @@ -989,7 +1001,6 @@ static void vfio_pci_size_rom(VFIOPCIDevice *vdev) pci_register_bar(&vdev->pdev, PCI_ROM_SLOT, PCI_BASE_ADDRESS_SPACE_MEMORY, &vdev->pdev.rom); - vdev->pdev.has_rom = true; vdev->rom_read_failed = false; } @@ -1086,7 +1097,7 @@ static const MemoryRegionOps vfio_vga_ops = { */ static void vfio_sub_page_bar_update_mapping(PCIDevice *pdev, int bar) { - VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev); + VFIOPCIDevice *vdev = PCI_VFIO(pdev); VFIORegion *region = &vdev->bars[bar].region; MemoryRegion *mmap_mr, *region_mr, *base_mr; PCIIORegion *r; @@ -1132,7 +1143,7 @@ static void vfio_sub_page_bar_update_mapping(PCIDevice *pdev, int bar) */ uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len) { - VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev); + VFIOPCIDevice *vdev = PCI_VFIO(pdev); uint32_t emu_bits = 0, emu_val = 0, phys_val = 0, val; memcpy(&emu_bits, vdev->emulated_config_bits + addr, len); @@ -1165,7 +1176,7 @@ uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len) void vfio_pci_write_config(PCIDevice *pdev, uint32_t addr, uint32_t val, int len) { - VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev); + VFIOPCIDevice *vdev = PCI_VFIO(pdev); uint32_t val_le = cpu_to_le32(val); trace_vfio_pci_write_config(vdev->vbasedev.name, addr, val, len); @@ -1280,8 +1291,7 @@ static int vfio_msi_setup(VFIOPCIDevice *vdev, int pos, Error **errp) if (ret == -ENOTSUP) { return 0; } - error_prepend(&err, "msi_init failed: "); - error_propagate(errp, err); + error_propagate_prepend(errp, err, "msi_init failed: "); return ret; } vdev->msi_cap_size = 0xa + (msi_maskbit ? 0xa : 0) + (msi_64bit ? 0x4 : 0); @@ -1417,7 +1427,7 @@ static void vfio_pci_relocate_msix(VFIOPCIDevice *vdev, Error **errp) } /* 2GB max size for 32-bit BARs, cannot double if already > 1G */ - if (vdev->bars[target_bar].size > (1 * 1024 * 1024 * 1024) && + if (vdev->bars[target_bar].size > 1 * GiB && !vdev->bars[target_bar].mem64) { error_setg(errp, "Invalid MSI-X relocation BAR %d, " "no space to extend 32-bit BAR", target_bar); @@ -1523,7 +1533,7 @@ static void vfio_msix_early_setup(VFIOPCIDevice *vdev, Error **errp) if (vdev->vendor_id == PCI_VENDOR_ID_CHELSIO && (vdev->device_id & 0xff00) == 0x5800) { msix->pba_offset = 0x1000; - } else { + } else if (vdev->msix_relo == OFF_AUTOPCIBAR_OFF) { error_setg(errp, "hardware reports invalid configuration, " "MSIX PBA outside of specified BAR"); g_free(msix); @@ -1555,7 +1565,7 @@ static int vfio_msix_setup(VFIOPCIDevice *vdev, int pos, Error **errp) &err); if (ret < 0) { if (ret == -ENOTSUP) { - error_report_err(err); + warn_report_err(err); return 0; } @@ -1895,15 +1905,10 @@ static int vfio_setup_pcie_cap(VFIOPCIDevice *vdev, int pos, uint8_t size, PCI_EXP_TYPE_ENDPOINT << 4, PCI_EXP_FLAGS_TYPE); vfio_add_emulated_long(vdev, pos + PCI_EXP_LNKCAP, - PCI_EXP_LNK_MLW_1 | PCI_EXP_LNK_LS_25, ~0); + QEMU_PCI_EXP_LNKCAP_MLW(QEMU_PCI_EXP_LNK_X1) | + QEMU_PCI_EXP_LNKCAP_MLS(QEMU_PCI_EXP_LNK_2_5GT), ~0); vfio_add_emulated_word(vdev, pos + PCI_EXP_LNKCTL, 0, ~0); } - - /* Mark the Link Status bits as emulated to allow virtual negotiation */ - vfio_add_emulated_word(vdev, pos + PCI_EXP_LNKSTA, - pci_get_word(vdev->pdev.config + pos + - PCI_EXP_LNKSTA), - PCI_EXP_LNKCAP_MLW | PCI_EXP_LNKCAP_SLS); } /* @@ -2114,6 +2119,7 @@ static void vfio_add_ext_cap(VFIOPCIDevice *vdev) case 0: /* kernel masked capability */ case PCI_EXT_CAP_ID_SRIOV: /* Read-only VF BARs confuse OVMF */ case PCI_EXT_CAP_ID_ARI: /* XXX Needs next function virtualization */ + case PCI_EXT_CAP_ID_REBAR: /* Can't expose read-only */ trace_vfio_add_ext_cap_dropped(vdev->vbasedev.name, cap_id, next); break; default: @@ -2194,7 +2200,7 @@ static void vfio_pci_post_reset(VFIOPCIDevice *vdev) vfio_intx_enable(vdev, &err); if (err) { - error_reportf_err(err, ERR_PREFIX, vdev->vbasedev.name); + error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name); } for (nr = 0; nr < PCI_NUM_REGIONS - 1; ++nr) { @@ -2584,13 +2590,13 @@ static void vfio_populate_device(VFIOPCIDevice *vdev, Error **errp) ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_IRQ_INFO, &irq_info); if (ret) { /* This can fail for an old kernel or legacy PCI dev */ - trace_vfio_populate_device_get_irq_info_failure(); + trace_vfio_populate_device_get_irq_info_failure(strerror(errno)); } else if (irq_info.count == 1) { vdev->pci_aer = true; } else { - error_report(WARN_PREFIX - "Could not enable error recovery for the device", - vbasedev->name); + warn_report(VFIO_MSG_PREFIX + "Could not enable error recovery for the device", + vbasedev->name); } } @@ -2713,9 +2719,9 @@ static void vfio_req_notifier_handler(void *opaque) return; } - qdev_unplug(&vdev->pdev.qdev, &err); + qdev_unplug(DEVICE(vdev), &err); if (err) { - error_reportf_err(err, WARN_PREFIX, vdev->vbasedev.name); + warn_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name); } } @@ -2801,15 +2807,16 @@ static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev) static void vfio_realize(PCIDevice *pdev, Error **errp) { - VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev); + VFIOPCIDevice *vdev = PCI_VFIO(pdev); VFIODevice *vbasedev_iter; VFIOGroup *group; - char *tmp, group_path[PATH_MAX], *group_name; + char *tmp, *subsys, group_path[PATH_MAX], *group_name; Error *err = NULL; ssize_t len; struct stat st; int groupid; int i, ret; + bool is_mdev; if (!vdev->vbasedev.sysfsdev) { if (!(~vdev->host.domain || ~vdev->host.bus || @@ -2827,14 +2834,14 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) if (stat(vdev->vbasedev.sysfsdev, &st) < 0) { error_setg_errno(errp, errno, "no such host device"); - error_prepend(errp, ERR_PREFIX, vdev->vbasedev.sysfsdev); + error_prepend(errp, VFIO_MSG_PREFIX, vdev->vbasedev.sysfsdev); return; } vdev->vbasedev.name = g_path_get_basename(vdev->vbasedev.sysfsdev); vdev->vbasedev.ops = &vfio_pci_ops; vdev->vbasedev.type = VFIO_DEVICE_TYPE_PCI; - vdev->vbasedev.dev = &vdev->pdev.qdev; + vdev->vbasedev.dev = DEVICE(vdev); tmp = g_strdup_printf("%s/iommu_group", vdev->vbasedev.sysfsdev); len = readlink(tmp, group_path, sizeof(group_path)); @@ -2869,6 +2876,27 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) } } + /* + * Mediated devices *might* operate compatibly with memory ballooning, but + * we cannot know for certain, it depends on whether the mdev vendor driver + * stays in sync with the active working set of the guest driver. Prevent + * the x-balloon-allowed option unless this is minimally an mdev device. + */ + tmp = g_strdup_printf("%s/subsystem", vdev->vbasedev.sysfsdev); + subsys = realpath(tmp, NULL); + g_free(tmp); + is_mdev = subsys && (strcmp(subsys, "/sys/bus/mdev") == 0); + free(subsys); + + trace_vfio_mdev(vdev->vbasedev.name, is_mdev); + + if (vdev->vbasedev.balloon_allowed && !is_mdev) { + error_setg(errp, "x-balloon-allowed only potentially compatible " + "with mdev devices"); + vfio_put_group(group); + goto error; + } + ret = vfio_get_device(group, vdev->vbasedev.name, &vdev->vbasedev, errp); if (ret) { vfio_put_group(group); @@ -3045,6 +3073,34 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) goto out_teardown; } } + if (vdev->enable_ramfb && vdev->dpy == NULL) { + error_setg(errp, "ramfb=on requires display=on"); + goto out_teardown; + } + if (vdev->display_xres || vdev->display_yres) { + if (vdev->dpy == NULL) { + error_setg(errp, "xres and yres properties require display=on"); + goto out_teardown; + } + if (vdev->dpy->edid_regs == NULL) { + error_setg(errp, "xres and yres properties need edid support"); + goto out_teardown; + } + } + + if (vdev->vendor_id == PCI_VENDOR_ID_NVIDIA) { + ret = vfio_pci_nvidia_v100_ram_init(vdev, errp); + if (ret && ret != -ENODEV) { + error_report("Failed to setup NVIDIA V100 GPU RAM"); + } + } + + if (vdev->vendor_id == PCI_VENDOR_ID_IBM) { + ret = vfio_pci_nvlink2_init(vdev, errp); + if (ret && ret != -ENODEV) { + error_report("Failed to setup NVlink2 bridge"); + } + } vfio_register_err_notifier(vdev); vfio_register_req_notifier(vdev); @@ -3057,13 +3113,12 @@ out_teardown: vfio_teardown_msi(vdev); vfio_bars_exit(vdev); error: - error_prepend(errp, ERR_PREFIX, vdev->vbasedev.name); + error_prepend(errp, VFIO_MSG_PREFIX, vdev->vbasedev.name); } static void vfio_instance_finalize(Object *obj) { - PCIDevice *pci_dev = PCI_DEVICE(obj); - VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pci_dev); + VFIOPCIDevice *vdev = PCI_VFIO(obj); VFIOGroup *group = vdev->vbasedev.group; vfio_display_finalize(vdev); @@ -3083,7 +3138,7 @@ static void vfio_instance_finalize(Object *obj) static void vfio_exitfn(PCIDevice *pdev) { - VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev); + VFIOPCIDevice *vdev = PCI_VFIO(pdev); vfio_unregister_req_notifier(vdev); vfio_unregister_err_notifier(vdev); @@ -3098,8 +3153,7 @@ static void vfio_exitfn(PCIDevice *pdev) static void vfio_pci_reset(DeviceState *dev) { - PCIDevice *pdev = DO_UPCAST(PCIDevice, qdev, dev); - VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev); + VFIOPCIDevice *vdev = PCI_VFIO(dev); trace_vfio_pci_reset(vdev->vbasedev.name); @@ -3139,7 +3193,7 @@ post_reset: static void vfio_instance_init(Object *obj) { PCIDevice *pci_dev = PCI_DEVICE(obj); - VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, PCI_DEVICE(obj)); + VFIOPCIDevice *vdev = PCI_VFIO(obj); device_add_bootindex_property(obj, &vdev->bootindex, "bootindex", NULL, @@ -3161,6 +3215,8 @@ static Property vfio_pci_dev_properties[] = { DEFINE_PROP_STRING("sysfsdev", VFIOPCIDevice, vbasedev.sysfsdev), DEFINE_PROP_ON_OFF_AUTO("display", VFIOPCIDevice, display, ON_OFF_AUTO_OFF), + DEFINE_PROP_UINT32("xres", VFIOPCIDevice, display_xres, 0), + DEFINE_PROP_UINT32("yres", VFIOPCIDevice, display_yres, 0), DEFINE_PROP_UINT32("x-intx-mmap-timeout-ms", VFIOPCIDevice, intx.mmap_timeout, 1100), DEFINE_PROP_BIT("x-vga", VFIOPCIDevice, features, @@ -3170,6 +3226,8 @@ static Property vfio_pci_dev_properties[] = { DEFINE_PROP_BIT("x-igd-opregion", VFIOPCIDevice, features, VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT, false), DEFINE_PROP_BOOL("x-no-mmap", VFIOPCIDevice, vbasedev.no_mmap, false), + DEFINE_PROP_BOOL("x-balloon-allowed", VFIOPCIDevice, + vbasedev.balloon_allowed, false), DEFINE_PROP_BOOL("x-no-kvm-intx", VFIOPCIDevice, no_kvm_intx, false), DEFINE_PROP_BOOL("x-no-kvm-msi", VFIOPCIDevice, no_kvm_msi, false), DEFINE_PROP_BOOL("x-no-kvm-msix", VFIOPCIDevice, no_kvm_msix, false), @@ -3221,7 +3279,7 @@ static void vfio_pci_dev_class_init(ObjectClass *klass, void *data) } static const TypeInfo vfio_pci_dev_info = { - .name = "vfio-pci", + .name = TYPE_VFIO_PCI, .parent = TYPE_PCI_DEVICE, .instance_size = sizeof(VFIOPCIDevice), .class_init = vfio_pci_dev_class_init, @@ -3234,9 +3292,30 @@ static const TypeInfo vfio_pci_dev_info = { }, }; +static Property vfio_pci_dev_nohotplug_properties[] = { + DEFINE_PROP_BOOL("ramfb", VFIOPCIDevice, enable_ramfb, false), + DEFINE_PROP_END_OF_LIST(), +}; + +static void vfio_pci_nohotplug_dev_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->props = vfio_pci_dev_nohotplug_properties; + dc->hotpluggable = false; +} + +static const TypeInfo vfio_pci_nohotplug_dev_info = { + .name = TYPE_VIFO_PCI_NOHOTPLUG, + .parent = TYPE_VFIO_PCI, + .instance_size = sizeof(VFIOPCIDevice), + .class_init = vfio_pci_nohotplug_dev_class_init, +}; + static void register_vfio_pci_dev_type(void) { type_register_static(&vfio_pci_dev_info); + type_register_static(&vfio_pci_nohotplug_dev_info); } type_init(register_vfio_pci_dev_type)