#include "hw/pci/msix.h"
#include "hw/pci/pci_bridge.h"
#include "qemu/error-report.h"
+#include "qemu/module.h"
#include "qemu/option.h"
#include "qemu/range.h"
+#include "qemu/units.h"
#include "sysemu/kvm.h"
#include "sysemu/sysemu.h"
#include "pci.h"
#include "trace.h"
#include "qapi/error.h"
-#define MSIX_CAP_LENGTH 12
+#define TYPE_VFIO_PCI "vfio-pci"
+#define PCI_VFIO(obj) OBJECT_CHECK(VFIOPCIDevice, obj, TYPE_VFIO_PCI)
+
+#define TYPE_VIFO_PCI_NOHOTPLUG "vfio-pci-nohotplug"
static void vfio_disable_interrupts(VFIOPCIDevice *vdev);
static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled);
static void vfio_intx_update(PCIDevice *pdev)
{
- VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
+ VFIOPCIDevice *vdev = PCI_VFIO(pdev);
PCIINTxRoute route;
Error *err = NULL;
vfio_intx_enable_kvm(vdev, &err);
if (err) {
- error_reportf_err(err, WARN_PREFIX, vdev->vbasedev.name);
+ warn_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
}
/* Re-enable the interrupt in cased we missed an EOI */
vfio_intx_enable_kvm(vdev, &err);
if (err) {
- error_reportf_err(err, WARN_PREFIX, vdev->vbasedev.name);
+ warn_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
}
vdev->interrupt = VFIO_INT_INTx;
static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
MSIMessage *msg, IOHandler *handler)
{
- VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
+ VFIOPCIDevice *vdev = PCI_VFIO(pdev);
VFIOMSIVector *vector;
int ret;
static void vfio_msix_vector_release(PCIDevice *pdev, unsigned int nr)
{
- VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
+ VFIOPCIDevice *vdev = PCI_VFIO(pdev);
VFIOMSIVector *vector = &vdev->msi_vectors[nr];
trace_vfio_msix_vector_release(vdev->vbasedev.name, nr);
vfio_intx_enable(vdev, &err);
if (err) {
- error_reportf_err(err, ERR_PREFIX, vdev->vbasedev.name);
+ error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
}
}
if (vdev->pdev.romfile || !vdev->pdev.rom_bar) {
/* Since pci handles romfile, just print a message and return */
if (vfio_blacklist_opt_rom(vdev) && vdev->pdev.romfile) {
- error_printf("Warning : Device at %s is known to cause system instability issues during option rom execution. Proceeding anyway since user specified romfile\n",
- vdev->vbasedev.name);
+ warn_report("Device at %s is known to cause system instability"
+ " issues during option rom execution",
+ vdev->vbasedev.name);
+ error_printf("Proceeding anyway since user specified romfile\n");
}
return;
}
if (vfio_blacklist_opt_rom(vdev)) {
if (dev->opts && qemu_opt_get(dev->opts, "rombar")) {
- error_printf("Warning : Device at %s is known to cause system instability issues during option rom execution. Proceeding anyway since user specified non zero value for rombar\n",
- vdev->vbasedev.name);
+ warn_report("Device at %s is known to cause system instability"
+ " issues during option rom execution",
+ vdev->vbasedev.name);
+ error_printf("Proceeding anyway since user specified"
+ " non zero value for rombar\n");
} else {
- error_printf("Warning : Rom loading for device at %s has been disabled due to system instability issues. Specify rombar=1 or romfile to force\n",
- vdev->vbasedev.name);
+ warn_report("Rom loading for device at %s has been disabled"
+ " due to system instability issues",
+ vdev->vbasedev.name);
+ error_printf("Specify rombar=1 or romfile to force\n");
return;
}
}
pci_register_bar(&vdev->pdev, PCI_ROM_SLOT,
PCI_BASE_ADDRESS_SPACE_MEMORY, &vdev->pdev.rom);
- vdev->pdev.has_rom = true;
vdev->rom_read_failed = false;
}
*/
static void vfio_sub_page_bar_update_mapping(PCIDevice *pdev, int bar)
{
- VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
+ VFIOPCIDevice *vdev = PCI_VFIO(pdev);
VFIORegion *region = &vdev->bars[bar].region;
MemoryRegion *mmap_mr, *region_mr, *base_mr;
PCIIORegion *r;
*/
uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len)
{
- VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
+ VFIOPCIDevice *vdev = PCI_VFIO(pdev);
uint32_t emu_bits = 0, emu_val = 0, phys_val = 0, val;
memcpy(&emu_bits, vdev->emulated_config_bits + addr, len);
void vfio_pci_write_config(PCIDevice *pdev,
uint32_t addr, uint32_t val, int len)
{
- VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
+ VFIOPCIDevice *vdev = PCI_VFIO(pdev);
uint32_t val_le = cpu_to_le32(val);
trace_vfio_pci_write_config(vdev->vbasedev.name, addr, val, len);
if (ret == -ENOTSUP) {
return 0;
}
- error_prepend(&err, "msi_init failed: ");
- error_propagate(errp, err);
+ error_propagate_prepend(errp, err, "msi_init failed: ");
return ret;
}
vdev->msi_cap_size = 0xa + (msi_maskbit ? 0xa : 0) + (msi_64bit ? 0x4 : 0);
}
/* 2GB max size for 32-bit BARs, cannot double if already > 1G */
- if (vdev->bars[target_bar].size > (1 * 1024 * 1024 * 1024) &&
+ if (vdev->bars[target_bar].size > 1 * GiB &&
!vdev->bars[target_bar].mem64) {
error_setg(errp, "Invalid MSI-X relocation BAR %d, "
"no space to extend 32-bit BAR", target_bar);
if (vdev->vendor_id == PCI_VENDOR_ID_CHELSIO &&
(vdev->device_id & 0xff00) == 0x5800) {
msix->pba_offset = 0x1000;
- } else {
+ } else if (vdev->msix_relo == OFF_AUTOPCIBAR_OFF) {
error_setg(errp, "hardware reports invalid configuration, "
"MSIX PBA outside of specified BAR");
g_free(msix);
&err);
if (ret < 0) {
if (ret == -ENOTSUP) {
- error_report_err(err);
+ warn_report_err(err);
return 0;
}
PCI_EXP_TYPE_ENDPOINT << 4,
PCI_EXP_FLAGS_TYPE);
vfio_add_emulated_long(vdev, pos + PCI_EXP_LNKCAP,
- PCI_EXP_LNK_MLW_1 | PCI_EXP_LNK_LS_25, ~0);
+ QEMU_PCI_EXP_LNKCAP_MLW(QEMU_PCI_EXP_LNK_X1) |
+ QEMU_PCI_EXP_LNKCAP_MLS(QEMU_PCI_EXP_LNK_2_5GT), ~0);
vfio_add_emulated_word(vdev, pos + PCI_EXP_LNKCTL, 0, ~0);
}
-
- /* Mark the Link Status bits as emulated to allow virtual negotiation */
- vfio_add_emulated_word(vdev, pos + PCI_EXP_LNKSTA,
- pci_get_word(vdev->pdev.config + pos +
- PCI_EXP_LNKSTA),
- PCI_EXP_LNKCAP_MLW | PCI_EXP_LNKCAP_SLS);
}
/*
case 0: /* kernel masked capability */
case PCI_EXT_CAP_ID_SRIOV: /* Read-only VF BARs confuse OVMF */
case PCI_EXT_CAP_ID_ARI: /* XXX Needs next function virtualization */
+ case PCI_EXT_CAP_ID_REBAR: /* Can't expose read-only */
trace_vfio_add_ext_cap_dropped(vdev->vbasedev.name, cap_id, next);
break;
default:
vfio_intx_enable(vdev, &err);
if (err) {
- error_reportf_err(err, ERR_PREFIX, vdev->vbasedev.name);
+ error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
}
for (nr = 0; nr < PCI_NUM_REGIONS - 1; ++nr) {
ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_IRQ_INFO, &irq_info);
if (ret) {
/* This can fail for an old kernel or legacy PCI dev */
- trace_vfio_populate_device_get_irq_info_failure();
+ trace_vfio_populate_device_get_irq_info_failure(strerror(errno));
} else if (irq_info.count == 1) {
vdev->pci_aer = true;
} else {
- error_report(WARN_PREFIX
- "Could not enable error recovery for the device",
- vbasedev->name);
+ warn_report(VFIO_MSG_PREFIX
+ "Could not enable error recovery for the device",
+ vbasedev->name);
}
}
return;
}
- qdev_unplug(&vdev->pdev.qdev, &err);
+ qdev_unplug(DEVICE(vdev), &err);
if (err) {
- error_reportf_err(err, WARN_PREFIX, vdev->vbasedev.name);
+ warn_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
}
}
static void vfio_realize(PCIDevice *pdev, Error **errp)
{
- VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
+ VFIOPCIDevice *vdev = PCI_VFIO(pdev);
VFIODevice *vbasedev_iter;
VFIOGroup *group;
- char *tmp, group_path[PATH_MAX], *group_name;
+ char *tmp, *subsys, group_path[PATH_MAX], *group_name;
Error *err = NULL;
ssize_t len;
struct stat st;
int groupid;
int i, ret;
+ bool is_mdev;
if (!vdev->vbasedev.sysfsdev) {
if (!(~vdev->host.domain || ~vdev->host.bus ||
if (stat(vdev->vbasedev.sysfsdev, &st) < 0) {
error_setg_errno(errp, errno, "no such host device");
- error_prepend(errp, ERR_PREFIX, vdev->vbasedev.sysfsdev);
+ error_prepend(errp, VFIO_MSG_PREFIX, vdev->vbasedev.sysfsdev);
return;
}
vdev->vbasedev.name = g_path_get_basename(vdev->vbasedev.sysfsdev);
vdev->vbasedev.ops = &vfio_pci_ops;
vdev->vbasedev.type = VFIO_DEVICE_TYPE_PCI;
- vdev->vbasedev.dev = &vdev->pdev.qdev;
+ vdev->vbasedev.dev = DEVICE(vdev);
tmp = g_strdup_printf("%s/iommu_group", vdev->vbasedev.sysfsdev);
len = readlink(tmp, group_path, sizeof(group_path));
}
}
+ /*
+ * Mediated devices *might* operate compatibly with memory ballooning, but
+ * we cannot know for certain, it depends on whether the mdev vendor driver
+ * stays in sync with the active working set of the guest driver. Prevent
+ * the x-balloon-allowed option unless this is minimally an mdev device.
+ */
+ tmp = g_strdup_printf("%s/subsystem", vdev->vbasedev.sysfsdev);
+ subsys = realpath(tmp, NULL);
+ g_free(tmp);
+ is_mdev = subsys && (strcmp(subsys, "/sys/bus/mdev") == 0);
+ free(subsys);
+
+ trace_vfio_mdev(vdev->vbasedev.name, is_mdev);
+
+ if (vdev->vbasedev.balloon_allowed && !is_mdev) {
+ error_setg(errp, "x-balloon-allowed only potentially compatible "
+ "with mdev devices");
+ vfio_put_group(group);
+ goto error;
+ }
+
ret = vfio_get_device(group, vdev->vbasedev.name, &vdev->vbasedev, errp);
if (ret) {
vfio_put_group(group);
goto out_teardown;
}
}
+ if (vdev->enable_ramfb && vdev->dpy == NULL) {
+ error_setg(errp, "ramfb=on requires display=on");
+ goto out_teardown;
+ }
+ if (vdev->display_xres || vdev->display_yres) {
+ if (vdev->dpy == NULL) {
+ error_setg(errp, "xres and yres properties require display=on");
+ goto out_teardown;
+ }
+ if (vdev->dpy->edid_regs == NULL) {
+ error_setg(errp, "xres and yres properties need edid support");
+ goto out_teardown;
+ }
+ }
+
+ if (vdev->vendor_id == PCI_VENDOR_ID_NVIDIA) {
+ ret = vfio_pci_nvidia_v100_ram_init(vdev, errp);
+ if (ret && ret != -ENODEV) {
+ error_report("Failed to setup NVIDIA V100 GPU RAM");
+ }
+ }
+
+ if (vdev->vendor_id == PCI_VENDOR_ID_IBM) {
+ ret = vfio_pci_nvlink2_init(vdev, errp);
+ if (ret && ret != -ENODEV) {
+ error_report("Failed to setup NVlink2 bridge");
+ }
+ }
vfio_register_err_notifier(vdev);
vfio_register_req_notifier(vdev);
vfio_teardown_msi(vdev);
vfio_bars_exit(vdev);
error:
- error_prepend(errp, ERR_PREFIX, vdev->vbasedev.name);
+ error_prepend(errp, VFIO_MSG_PREFIX, vdev->vbasedev.name);
}
static void vfio_instance_finalize(Object *obj)
{
- PCIDevice *pci_dev = PCI_DEVICE(obj);
- VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pci_dev);
+ VFIOPCIDevice *vdev = PCI_VFIO(obj);
VFIOGroup *group = vdev->vbasedev.group;
vfio_display_finalize(vdev);
static void vfio_exitfn(PCIDevice *pdev)
{
- VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
+ VFIOPCIDevice *vdev = PCI_VFIO(pdev);
vfio_unregister_req_notifier(vdev);
vfio_unregister_err_notifier(vdev);
static void vfio_pci_reset(DeviceState *dev)
{
- PCIDevice *pdev = DO_UPCAST(PCIDevice, qdev, dev);
- VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
+ VFIOPCIDevice *vdev = PCI_VFIO(dev);
trace_vfio_pci_reset(vdev->vbasedev.name);
static void vfio_instance_init(Object *obj)
{
PCIDevice *pci_dev = PCI_DEVICE(obj);
- VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, PCI_DEVICE(obj));
+ VFIOPCIDevice *vdev = PCI_VFIO(obj);
device_add_bootindex_property(obj, &vdev->bootindex,
"bootindex", NULL,
DEFINE_PROP_STRING("sysfsdev", VFIOPCIDevice, vbasedev.sysfsdev),
DEFINE_PROP_ON_OFF_AUTO("display", VFIOPCIDevice,
display, ON_OFF_AUTO_OFF),
+ DEFINE_PROP_UINT32("xres", VFIOPCIDevice, display_xres, 0),
+ DEFINE_PROP_UINT32("yres", VFIOPCIDevice, display_yres, 0),
DEFINE_PROP_UINT32("x-intx-mmap-timeout-ms", VFIOPCIDevice,
intx.mmap_timeout, 1100),
DEFINE_PROP_BIT("x-vga", VFIOPCIDevice, features,
DEFINE_PROP_BIT("x-igd-opregion", VFIOPCIDevice, features,
VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT, false),
DEFINE_PROP_BOOL("x-no-mmap", VFIOPCIDevice, vbasedev.no_mmap, false),
+ DEFINE_PROP_BOOL("x-balloon-allowed", VFIOPCIDevice,
+ vbasedev.balloon_allowed, false),
DEFINE_PROP_BOOL("x-no-kvm-intx", VFIOPCIDevice, no_kvm_intx, false),
DEFINE_PROP_BOOL("x-no-kvm-msi", VFIOPCIDevice, no_kvm_msi, false),
DEFINE_PROP_BOOL("x-no-kvm-msix", VFIOPCIDevice, no_kvm_msix, false),
}
static const TypeInfo vfio_pci_dev_info = {
- .name = "vfio-pci",
+ .name = TYPE_VFIO_PCI,
.parent = TYPE_PCI_DEVICE,
.instance_size = sizeof(VFIOPCIDevice),
.class_init = vfio_pci_dev_class_init,
},
};
+static Property vfio_pci_dev_nohotplug_properties[] = {
+ DEFINE_PROP_BOOL("ramfb", VFIOPCIDevice, enable_ramfb, false),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+static void vfio_pci_nohotplug_dev_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+
+ dc->props = vfio_pci_dev_nohotplug_properties;
+ dc->hotpluggable = false;
+}
+
+static const TypeInfo vfio_pci_nohotplug_dev_info = {
+ .name = TYPE_VIFO_PCI_NOHOTPLUG,
+ .parent = TYPE_VFIO_PCI,
+ .instance_size = sizeof(VFIOPCIDevice),
+ .class_init = vfio_pci_nohotplug_dev_class_init,
+};
+
static void register_vfio_pci_dev_type(void)
{
type_register_static(&vfio_pci_dev_info);
+ type_register_static(&vfio_pci_nohotplug_dev_info);
}
type_init(register_vfio_pci_dev_type)