+
+/*
+ * The NVIDIA GPUDirect P2P Vendor capability allows the user to specify
+ * devices as a member of a clique. Devices within the same clique ID
+ * are capable of direct P2P. It's the user's responsibility that this
+ * is correct. The spec says that this may reside at any unused config
+ * offset, but reserves and recommends hypervisors place this at C8h.
+ * The spec also states that the hypervisor should place this capability
+ * at the end of the capability list, thus next is defined as 0h.
+ *
+ * +----------------+----------------+----------------+----------------+
+ * | sig 7:0 ('P') | vndr len (8h) | next (0h) | cap id (9h) |
+ * +----------------+----------------+----------------+----------------+
+ * | rsvd 15:7(0h),id 6:3,ver 2:0(0h)| sig 23:8 ('P2') |
+ * +---------------------------------+---------------------------------+
+ *
+ * https://lists.gnu.org/archive/html/qemu-devel/2017-08/pdfUda5iEpgOS.pdf
+ */
+static void get_nv_gpudirect_clique_id(Object *obj, Visitor *v,
+ const char *name, void *opaque,
+ Error **errp)
+{
+ DeviceState *dev = DEVICE(obj);
+ Property *prop = opaque;
+ uint8_t *ptr = qdev_get_prop_ptr(dev, prop);
+
+ visit_type_uint8(v, name, ptr, errp);
+}
+
+static void set_nv_gpudirect_clique_id(Object *obj, Visitor *v,
+ const char *name, void *opaque,
+ Error **errp)
+{
+ DeviceState *dev = DEVICE(obj);
+ Property *prop = opaque;
+ uint8_t value, *ptr = qdev_get_prop_ptr(dev, prop);
+ Error *local_err = NULL;
+
+ if (dev->realized) {
+ qdev_prop_set_after_realize(dev, name, errp);
+ return;
+ }
+
+ visit_type_uint8(v, name, &value, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+
+ if (value & ~0xF) {
+ error_setg(errp, "Property %s: valid range 0-15", name);
+ return;
+ }
+
+ *ptr = value;
+}
+
+const PropertyInfo qdev_prop_nv_gpudirect_clique = {
+ .name = "uint4",
+ .description = "NVIDIA GPUDirect Clique ID (0 - 15)",
+ .get = get_nv_gpudirect_clique_id,
+ .set = set_nv_gpudirect_clique_id,
+};
+
+static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp)
+{
+ PCIDevice *pdev = &vdev->pdev;
+ int ret, pos = 0xC8;
+
+ if (vdev->nv_gpudirect_clique == 0xFF) {
+ return 0;
+ }
+
+ if (!vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID)) {
+ error_setg(errp, "NVIDIA GPUDirect Clique ID: invalid device vendor");
+ return -EINVAL;
+ }
+
+ if (pci_get_byte(pdev->config + PCI_CLASS_DEVICE + 1) !=
+ PCI_BASE_CLASS_DISPLAY) {
+ error_setg(errp, "NVIDIA GPUDirect Clique ID: unsupported PCI class");
+ return -EINVAL;
+ }
+
+ ret = pci_add_capability(pdev, PCI_CAP_ID_VNDR, pos, 8, errp);
+ if (ret < 0) {
+ error_prepend(errp, "Failed to add NVIDIA GPUDirect cap: ");
+ return ret;
+ }
+
+ memset(vdev->emulated_config_bits + pos, 0xFF, 8);
+ pos += PCI_CAP_FLAGS;
+ pci_set_byte(pdev->config + pos++, 8);
+ pci_set_byte(pdev->config + pos++, 'P');
+ pci_set_byte(pdev->config + pos++, '2');
+ pci_set_byte(pdev->config + pos++, 'P');
+ pci_set_byte(pdev->config + pos++, vdev->nv_gpudirect_clique << 3);
+ pci_set_byte(pdev->config + pos, 0);
+
+ return 0;
+}
+
+int vfio_add_virt_caps(VFIOPCIDevice *vdev, Error **errp)
+{
+ int ret;
+
+ ret = vfio_add_nv_gpudirect_cap(vdev, errp);
+ if (ret) {
+ return ret;
+ }
+
+ return 0;
+}
+
+static void vfio_pci_nvlink2_get_tgt(Object *obj, Visitor *v,
+ const char *name,
+ void *opaque, Error **errp)
+{
+ uint64_t tgt = (uintptr_t) opaque;
+ visit_type_uint64(v, name, &tgt, errp);
+}
+
+static void vfio_pci_nvlink2_get_link_speed(Object *obj, Visitor *v,
+ const char *name,
+ void *opaque, Error **errp)
+{
+ uint32_t link_speed = (uint32_t)(uintptr_t) opaque;
+ visit_type_uint32(v, name, &link_speed, errp);
+}
+
+int vfio_pci_nvidia_v100_ram_init(VFIOPCIDevice *vdev, Error **errp)
+{
+ int ret;
+ void *p;
+ struct vfio_region_info *nv2reg = NULL;
+ struct vfio_info_cap_header *hdr;
+ struct vfio_region_info_cap_nvlink2_ssatgt *cap;
+ VFIOQuirk *quirk;
+
+ ret = vfio_get_dev_region_info(&vdev->vbasedev,
+ VFIO_REGION_TYPE_PCI_VENDOR_TYPE |
+ PCI_VENDOR_ID_NVIDIA,
+ VFIO_REGION_SUBTYPE_NVIDIA_NVLINK2_RAM,
+ &nv2reg);
+ if (ret) {
+ return ret;
+ }
+
+ hdr = vfio_get_region_info_cap(nv2reg, VFIO_REGION_INFO_CAP_NVLINK2_SSATGT);
+ if (!hdr) {
+ ret = -ENODEV;
+ goto free_exit;
+ }
+ cap = (void *) hdr;
+
+ p = mmap(NULL, nv2reg->size, PROT_READ | PROT_WRITE | PROT_EXEC,
+ MAP_SHARED, vdev->vbasedev.fd, nv2reg->offset);
+ if (p == MAP_FAILED) {
+ ret = -errno;
+ goto free_exit;
+ }
+
+ quirk = vfio_quirk_alloc(1);
+ memory_region_init_ram_ptr(&quirk->mem[0], OBJECT(vdev), "nvlink2-mr",
+ nv2reg->size, p);
+ QLIST_INSERT_HEAD(&vdev->bars[0].quirks, quirk, next);
+
+ object_property_add(OBJECT(vdev), "nvlink2-tgt", "uint64",
+ vfio_pci_nvlink2_get_tgt, NULL, NULL,
+ (void *) (uintptr_t) cap->tgt, NULL);
+ trace_vfio_pci_nvidia_gpu_setup_quirk(vdev->vbasedev.name, cap->tgt,
+ nv2reg->size);
+free_exit:
+ g_free(nv2reg);
+
+ return ret;
+}
+
+int vfio_pci_nvlink2_init(VFIOPCIDevice *vdev, Error **errp)
+{
+ int ret;
+ void *p;
+ struct vfio_region_info *atsdreg = NULL;
+ struct vfio_info_cap_header *hdr;
+ struct vfio_region_info_cap_nvlink2_ssatgt *captgt;
+ struct vfio_region_info_cap_nvlink2_lnkspd *capspeed;
+ VFIOQuirk *quirk;
+
+ ret = vfio_get_dev_region_info(&vdev->vbasedev,
+ VFIO_REGION_TYPE_PCI_VENDOR_TYPE |
+ PCI_VENDOR_ID_IBM,
+ VFIO_REGION_SUBTYPE_IBM_NVLINK2_ATSD,
+ &atsdreg);
+ if (ret) {
+ return ret;
+ }
+
+ hdr = vfio_get_region_info_cap(atsdreg,
+ VFIO_REGION_INFO_CAP_NVLINK2_SSATGT);
+ if (!hdr) {
+ ret = -ENODEV;
+ goto free_exit;
+ }
+ captgt = (void *) hdr;
+
+ hdr = vfio_get_region_info_cap(atsdreg,
+ VFIO_REGION_INFO_CAP_NVLINK2_LNKSPD);
+ if (!hdr) {
+ ret = -ENODEV;
+ goto free_exit;
+ }
+ capspeed = (void *) hdr;
+
+ /* Some NVLink bridges may not have assigned ATSD */
+ if (atsdreg->size) {
+ p = mmap(NULL, atsdreg->size, PROT_READ | PROT_WRITE | PROT_EXEC,
+ MAP_SHARED, vdev->vbasedev.fd, atsdreg->offset);
+ if (p == MAP_FAILED) {
+ ret = -errno;
+ goto free_exit;
+ }
+
+ quirk = vfio_quirk_alloc(1);
+ memory_region_init_ram_device_ptr(&quirk->mem[0], OBJECT(vdev),
+ "nvlink2-atsd-mr", atsdreg->size, p);
+ QLIST_INSERT_HEAD(&vdev->bars[0].quirks, quirk, next);
+ }
+
+ object_property_add(OBJECT(vdev), "nvlink2-tgt", "uint64",
+ vfio_pci_nvlink2_get_tgt, NULL, NULL,
+ (void *) (uintptr_t) captgt->tgt, NULL);
+ trace_vfio_pci_nvlink2_setup_quirk_ssatgt(vdev->vbasedev.name, captgt->tgt,
+ atsdreg->size);
+
+ object_property_add(OBJECT(vdev), "nvlink2-link-speed", "uint32",
+ vfio_pci_nvlink2_get_link_speed, NULL, NULL,
+ (void *) (uintptr_t) capspeed->link_speed, NULL);
+ trace_vfio_pci_nvlink2_setup_quirk_lnkspd(vdev->vbasedev.name,
+ capspeed->link_speed);
+free_exit:
+ g_free(atsdreg);
+
+ return ret;
+}