* QEMU emulation of AMD IOMMU (AMD-Vi)
*
* Copyright (C) 2011 Eduard - Gabriel Munteanu
+ * Copyright (C) 2015, 2016 David Kiarie Kahurani
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* Cache implementation inspired by hw/i386/intel_iommu.c
*/
#include "qemu/osdep.h"
-#include "hw/i386/amd_iommu.h"
+#include "hw/i386/pc.h"
+#include "hw/pci/msi.h"
+#include "hw/pci/pci_bus.h"
+#include "amd_iommu.h"
#include "qapi/error.h"
#include "qemu/error-report.h"
+#include "hw/i386/apic_internal.h"
#include "trace.h"
+#include "hw/i386/apic-msidef.h"
/* used AMD-Vi MMIO registers */
const char *amdvi_mmio_low[] = {
uint8_t bus_num; /* bus number */
uint8_t devfn; /* device function */
AMDVIState *iommu_state; /* AMDVI - one per machine */
- MemoryRegion iommu; /* Device's address translation region */
+ MemoryRegion root; /* AMDVI Root memory map region */
+ IOMMUMemoryRegion iommu; /* Device's address translation region */
MemoryRegion iommu_ir; /* Device's interrupt remapping region */
AddressSpace as; /* device's corresponding address space */
};
s->completion_wait_intr = !!(control & AMDVI_MMIO_CONTROL_COMWAITINTEN);
s->cmdbuf_enabled = s->enabled && !!(control &
AMDVI_MMIO_CONTROL_CMDBUFLEN);
+ s->ga_enabled = !!(control & AMDVI_MMIO_CONTROL_GAEN);
/* update the flags depending on the control register */
if (s->cmdbuf_enabled) {
AMDVI_DEV_PERM_SHIFT;
}
-/* a valid entry should have V = 1 and reserved bits honoured */
+/* validate that reserved bits are honoured */
static bool amdvi_validate_dte(AMDVIState *s, uint16_t devid,
uint64_t *dte)
{
return false;
}
- return dte[0] & AMDVI_DEV_VALID;
+ return true;
}
/* get a device table entry given the devid */
return;
}
- /* devices with V = 0 are not translated */
if (!amdvi_get_dte(s, devid, entry)) {
+ return;
+ }
+
+ /* devices with V = 0 are not translated */
+ if (!(entry[0] & AMDVI_DEV_VALID)) {
goto out;
}
return addr >= AMDVI_INT_ADDR_FIRST && addr <= AMDVI_INT_ADDR_LAST;
}
-static IOMMUTLBEntry amdvi_translate(MemoryRegion *iommu, hwaddr addr,
- bool is_write)
+static IOMMUTLBEntry amdvi_translate(IOMMUMemoryRegion *iommu, hwaddr addr,
+ IOMMUAccessFlags flag, int iommu_idx)
{
AMDVIAddressSpace *as = container_of(iommu, AMDVIAddressSpace, iommu);
AMDVIState *s = as->iommu_state;
return ret;
}
- amdvi_do_translate(as, addr, is_write, &ret);
+ amdvi_do_translate(as, addr, flag & IOMMU_WO, &ret);
trace_amdvi_translation_result(as->bus_num, PCI_SLOT(as->devfn),
PCI_FUNC(as->devfn), addr, ret.translated_addr);
return ret;
}
+static int amdvi_get_irte(AMDVIState *s, MSIMessage *origin, uint64_t *dte,
+ union irte *irte, uint16_t devid)
+{
+ uint64_t irte_root, offset;
+
+ irte_root = dte[2] & AMDVI_IR_PHYS_ADDR_MASK;
+ offset = (origin->data & AMDVI_IRTE_OFFSET) << 2;
+
+ trace_amdvi_ir_irte(irte_root, offset);
+
+ if (dma_memory_read(&address_space_memory, irte_root + offset,
+ irte, sizeof(*irte))) {
+ trace_amdvi_ir_err("failed to get irte");
+ return -AMDVI_IR_GET_IRTE;
+ }
+
+ trace_amdvi_ir_irte_val(irte->val);
+
+ return 0;
+}
+
+static int amdvi_int_remap_legacy(AMDVIState *iommu,
+ MSIMessage *origin,
+ MSIMessage *translated,
+ uint64_t *dte,
+ X86IOMMUIrq *irq,
+ uint16_t sid)
+{
+ int ret;
+ union irte irte;
+
+ /* get interrupt remapping table */
+ ret = amdvi_get_irte(iommu, origin, dte, &irte, sid);
+ if (ret < 0) {
+ return ret;
+ }
+
+ if (!irte.fields.valid) {
+ trace_amdvi_ir_target_abort("RemapEn is disabled");
+ return -AMDVI_IR_TARGET_ABORT;
+ }
+
+ if (irte.fields.guest_mode) {
+ error_report_once("guest mode is not zero");
+ return -AMDVI_IR_ERR;
+ }
+
+ if (irte.fields.int_type > AMDVI_IOAPIC_INT_TYPE_ARBITRATED) {
+ error_report_once("reserved int_type");
+ return -AMDVI_IR_ERR;
+ }
+
+ irq->delivery_mode = irte.fields.int_type;
+ irq->vector = irte.fields.vector;
+ irq->dest_mode = irte.fields.dm;
+ irq->redir_hint = irte.fields.rq_eoi;
+ irq->dest = irte.fields.destination;
+
+ return 0;
+}
+
+static int amdvi_get_irte_ga(AMDVIState *s, MSIMessage *origin, uint64_t *dte,
+ struct irte_ga *irte, uint16_t devid)
+{
+ uint64_t irte_root, offset;
+
+ irte_root = dte[2] & AMDVI_IR_PHYS_ADDR_MASK;
+ offset = (origin->data & AMDVI_IRTE_OFFSET) << 4;
+ trace_amdvi_ir_irte(irte_root, offset);
+
+ if (dma_memory_read(&address_space_memory, irte_root + offset,
+ irte, sizeof(*irte))) {
+ trace_amdvi_ir_err("failed to get irte_ga");
+ return -AMDVI_IR_GET_IRTE;
+ }
+
+ trace_amdvi_ir_irte_ga_val(irte->hi.val, irte->lo.val);
+ return 0;
+}
+
+static int amdvi_int_remap_ga(AMDVIState *iommu,
+ MSIMessage *origin,
+ MSIMessage *translated,
+ uint64_t *dte,
+ X86IOMMUIrq *irq,
+ uint16_t sid)
+{
+ int ret;
+ struct irte_ga irte;
+
+ /* get interrupt remapping table */
+ ret = amdvi_get_irte_ga(iommu, origin, dte, &irte, sid);
+ if (ret < 0) {
+ return ret;
+ }
+
+ if (!irte.lo.fields_remap.valid) {
+ trace_amdvi_ir_target_abort("RemapEn is disabled");
+ return -AMDVI_IR_TARGET_ABORT;
+ }
+
+ if (irte.lo.fields_remap.guest_mode) {
+ error_report_once("guest mode is not zero");
+ return -AMDVI_IR_ERR;
+ }
+
+ if (irte.lo.fields_remap.int_type > AMDVI_IOAPIC_INT_TYPE_ARBITRATED) {
+ error_report_once("reserved int_type is set");
+ return -AMDVI_IR_ERR;
+ }
+
+ irq->delivery_mode = irte.lo.fields_remap.int_type;
+ irq->vector = irte.hi.fields.vector;
+ irq->dest_mode = irte.lo.fields_remap.dm;
+ irq->redir_hint = irte.lo.fields_remap.rq_eoi;
+ irq->dest = irte.lo.fields_remap.destination;
+
+ return 0;
+}
+
+static int __amdvi_int_remap_msi(AMDVIState *iommu,
+ MSIMessage *origin,
+ MSIMessage *translated,
+ uint64_t *dte,
+ X86IOMMUIrq *irq,
+ uint16_t sid)
+{
+ int ret;
+ uint8_t int_ctl;
+
+ int_ctl = (dte[2] >> AMDVI_IR_INTCTL_SHIFT) & 3;
+ trace_amdvi_ir_intctl(int_ctl);
+
+ switch (int_ctl) {
+ case AMDVI_IR_INTCTL_PASS:
+ memcpy(translated, origin, sizeof(*origin));
+ return 0;
+ case AMDVI_IR_INTCTL_REMAP:
+ break;
+ case AMDVI_IR_INTCTL_ABORT:
+ trace_amdvi_ir_target_abort("int_ctl abort");
+ return -AMDVI_IR_TARGET_ABORT;
+ default:
+ trace_amdvi_ir_err("int_ctl reserved");
+ return -AMDVI_IR_ERR;
+ }
+
+ if (iommu->ga_enabled) {
+ ret = amdvi_int_remap_ga(iommu, origin, translated, dte, irq, sid);
+ } else {
+ ret = amdvi_int_remap_legacy(iommu, origin, translated, dte, irq, sid);
+ }
+
+ return ret;
+}
+
+/* Interrupt remapping for MSI/MSI-X entry */
+static int amdvi_int_remap_msi(AMDVIState *iommu,
+ MSIMessage *origin,
+ MSIMessage *translated,
+ uint16_t sid)
+{
+ int ret = 0;
+ uint64_t pass = 0;
+ uint64_t dte[4] = { 0 };
+ X86IOMMUIrq irq = { 0 };
+ uint8_t dest_mode, delivery_mode;
+
+ assert(origin && translated);
+
+ /*
+ * When IOMMU is enabled, interrupt remap request will come either from
+ * IO-APIC or PCI device. If interrupt is from PCI device then it will
+ * have a valid requester id but if the interrupt is from IO-APIC
+ * then requester id will be invalid.
+ */
+ if (sid == X86_IOMMU_SID_INVALID) {
+ sid = AMDVI_IOAPIC_SB_DEVID;
+ }
+
+ trace_amdvi_ir_remap_msi_req(origin->address, origin->data, sid);
+
+ /* check if device table entry is set before we go further. */
+ if (!iommu || !iommu->devtab_len) {
+ memcpy(translated, origin, sizeof(*origin));
+ goto out;
+ }
+
+ if (!amdvi_get_dte(iommu, sid, dte)) {
+ return -AMDVI_IR_ERR;
+ }
+
+ /* Check if IR is enabled in DTE */
+ if (!(dte[2] & AMDVI_IR_REMAP_ENABLE)) {
+ memcpy(translated, origin, sizeof(*origin));
+ goto out;
+ }
+
+ /* validate that we are configure with intremap=on */
+ if (!x86_iommu_ir_supported(X86_IOMMU_DEVICE(iommu))) {
+ trace_amdvi_err("Interrupt remapping is enabled in the guest but "
+ "not in the host. Use intremap=on to enable interrupt "
+ "remapping in amd-iommu.");
+ return -AMDVI_IR_ERR;
+ }
+
+ if (origin->address & AMDVI_MSI_ADDR_HI_MASK) {
+ trace_amdvi_err("MSI address high 32 bits non-zero when "
+ "Interrupt Remapping enabled.");
+ return -AMDVI_IR_ERR;
+ }
+
+ if ((origin->address & AMDVI_MSI_ADDR_LO_MASK) != APIC_DEFAULT_ADDRESS) {
+ trace_amdvi_err("MSI is not from IOAPIC.");
+ return -AMDVI_IR_ERR;
+ }
+
+ /*
+ * The MSI data register [10:8] are used to get the upstream interrupt type.
+ *
+ * See MSI/MSI-X format:
+ * https://pdfs.semanticscholar.org/presentation/9420/c279e942eca568157711ef5c92b800c40a79.pdf
+ * (page 5)
+ */
+ delivery_mode = (origin->data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 7;
+
+ switch (delivery_mode) {
+ case AMDVI_IOAPIC_INT_TYPE_FIXED:
+ case AMDVI_IOAPIC_INT_TYPE_ARBITRATED:
+ trace_amdvi_ir_delivery_mode("fixed/arbitrated");
+ ret = __amdvi_int_remap_msi(iommu, origin, translated, dte, &irq, sid);
+ if (ret < 0) {
+ goto remap_fail;
+ } else {
+ /* Translate IRQ to MSI messages */
+ x86_iommu_irq_to_msi_message(&irq, translated);
+ goto out;
+ }
+ break;
+ case AMDVI_IOAPIC_INT_TYPE_SMI:
+ error_report("SMI is not supported!");
+ ret = -AMDVI_IR_ERR;
+ break;
+ case AMDVI_IOAPIC_INT_TYPE_NMI:
+ pass = dte[3] & AMDVI_DEV_NMI_PASS_MASK;
+ trace_amdvi_ir_delivery_mode("nmi");
+ break;
+ case AMDVI_IOAPIC_INT_TYPE_INIT:
+ pass = dte[3] & AMDVI_DEV_INT_PASS_MASK;
+ trace_amdvi_ir_delivery_mode("init");
+ break;
+ case AMDVI_IOAPIC_INT_TYPE_EINT:
+ pass = dte[3] & AMDVI_DEV_EINT_PASS_MASK;
+ trace_amdvi_ir_delivery_mode("eint");
+ break;
+ default:
+ trace_amdvi_ir_delivery_mode("unsupported delivery_mode");
+ ret = -AMDVI_IR_ERR;
+ break;
+ }
+
+ if (ret < 0) {
+ goto remap_fail;
+ }
+
+ /*
+ * The MSI address register bit[2] is used to get the destination
+ * mode. The dest_mode 1 is valid for fixed and arbitrated interrupts
+ * only.
+ */
+ dest_mode = (origin->address >> MSI_ADDR_DEST_MODE_SHIFT) & 1;
+ if (dest_mode) {
+ trace_amdvi_ir_err("invalid dest_mode");
+ ret = -AMDVI_IR_ERR;
+ goto remap_fail;
+ }
+
+ if (pass) {
+ memcpy(translated, origin, sizeof(*origin));
+ } else {
+ trace_amdvi_ir_err("passthrough is not enabled");
+ ret = -AMDVI_IR_ERR;
+ goto remap_fail;
+ }
+
+out:
+ trace_amdvi_ir_remap_msi(origin->address, origin->data,
+ translated->address, translated->data);
+ return 0;
+
+remap_fail:
+ return ret;
+}
+
+static int amdvi_int_remap(X86IOMMUState *iommu,
+ MSIMessage *origin,
+ MSIMessage *translated,
+ uint16_t sid)
+{
+ return amdvi_int_remap_msi(AMD_IOMMU_DEVICE(iommu), origin,
+ translated, sid);
+}
+
+static MemTxResult amdvi_mem_ir_write(void *opaque, hwaddr addr,
+ uint64_t value, unsigned size,
+ MemTxAttrs attrs)
+{
+ int ret;
+ MSIMessage from = { 0, 0 }, to = { 0, 0 };
+ uint16_t sid = AMDVI_IOAPIC_SB_DEVID;
+
+ from.address = (uint64_t) addr + AMDVI_INT_ADDR_FIRST;
+ from.data = (uint32_t) value;
+
+ trace_amdvi_mem_ir_write_req(addr, value, size);
+
+ if (!attrs.unspecified) {
+ /* We have explicit Source ID */
+ sid = attrs.requester_id;
+ }
+
+ ret = amdvi_int_remap_msi(opaque, &from, &to, sid);
+ if (ret < 0) {
+ /* TODO: log the event using IOMMU log event interface */
+ error_report_once("failed to remap interrupt from devid 0x%x", sid);
+ return MEMTX_ERROR;
+ }
+
+ apic_get_class()->send_msi(&to);
+
+ trace_amdvi_mem_ir_write(to.address, to.data);
+ return MEMTX_OK;
+}
+
+static MemTxResult amdvi_mem_ir_read(void *opaque, hwaddr addr,
+ uint64_t *data, unsigned size,
+ MemTxAttrs attrs)
+{
+ return MEMTX_OK;
+}
+
+static const MemoryRegionOps amdvi_ir_ops = {
+ .read_with_attrs = amdvi_mem_ir_read,
+ .write_with_attrs = amdvi_mem_ir_write,
+ .endianness = DEVICE_LITTLE_ENDIAN,
+ .impl = {
+ .min_access_size = 4,
+ .max_access_size = 4,
+ },
+ .valid = {
+ .min_access_size = 4,
+ .max_access_size = 4,
+ }
+};
+
static AddressSpace *amdvi_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
{
+ char name[128];
AMDVIState *s = opaque;
- AMDVIAddressSpace **iommu_as;
+ AMDVIAddressSpace **iommu_as, *amdvi_dev_as;
int bus_num = pci_bus_num(bus);
iommu_as = s->address_spaces[bus_num];
/* set up AMD-Vi region */
if (!iommu_as[devfn]) {
+ snprintf(name, sizeof(name), "amd_iommu_devfn_%d", devfn);
+
iommu_as[devfn] = g_malloc0(sizeof(AMDVIAddressSpace));
iommu_as[devfn]->bus_num = (uint8_t)bus_num;
iommu_as[devfn]->devfn = (uint8_t)devfn;
iommu_as[devfn]->iommu_state = s;
- memory_region_init_iommu(&iommu_as[devfn]->iommu, OBJECT(s),
- &s->iommu_ops, "amd-iommu", UINT64_MAX);
- address_space_init(&iommu_as[devfn]->as, &iommu_as[devfn]->iommu,
- "amd-iommu");
+ amdvi_dev_as = iommu_as[devfn];
+
+ /*
+ * Memory region relationships looks like (Address range shows
+ * only lower 32 bits to make it short in length...):
+ *
+ * |-----------------+-------------------+----------|
+ * | Name | Address range | Priority |
+ * |-----------------+-------------------+----------+
+ * | amdvi_root | 00000000-ffffffff | 0 |
+ * | amdvi_iommu | 00000000-ffffffff | 1 |
+ * | amdvi_iommu_ir | fee00000-feefffff | 64 |
+ * |-----------------+-------------------+----------|
+ */
+ memory_region_init_iommu(&amdvi_dev_as->iommu,
+ sizeof(amdvi_dev_as->iommu),
+ TYPE_AMD_IOMMU_MEMORY_REGION,
+ OBJECT(s),
+ "amd_iommu", UINT64_MAX);
+ memory_region_init(&amdvi_dev_as->root, OBJECT(s),
+ "amdvi_root", UINT64_MAX);
+ address_space_init(&amdvi_dev_as->as, &amdvi_dev_as->root, name);
+ memory_region_init_io(&amdvi_dev_as->iommu_ir, OBJECT(s),
+ &amdvi_ir_ops, s, "amd_iommu_ir",
+ AMDVI_INT_ADDR_SIZE);
+ memory_region_add_subregion_overlap(&amdvi_dev_as->root,
+ AMDVI_INT_ADDR_FIRST,
+ &amdvi_dev_as->iommu_ir,
+ 64);
+ memory_region_add_subregion_overlap(&amdvi_dev_as->root, 0,
+ MEMORY_REGION(&amdvi_dev_as->iommu),
+ 1);
}
return &iommu_as[devfn]->as;
}
}
};
-static void amdvi_iommu_notify_flag_changed(MemoryRegion *iommu,
+static void amdvi_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu,
IOMMUNotifierFlag old,
IOMMUNotifierFlag new)
{
{
amdvi_iotlb_reset(s);
- s->iommu_ops.translate = amdvi_translate;
- s->iommu_ops.notify_flag_changed = amdvi_iommu_notify_flag_changed;
s->devtab_len = 0;
s->cmdbuf_len = 0;
s->cmdbuf_head = 0;
AMDVIState *s = AMD_IOMMU_DEVICE(dev);
X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(dev);
MachineState *ms = MACHINE(qdev_get_machine());
- MachineClass *mc = MACHINE_GET_CLASS(ms);
- PCMachineState *pcms =
- PC_MACHINE(object_dynamic_cast(OBJECT(ms), TYPE_PC_MACHINE));
- PCIBus *bus;
-
- if (!pcms) {
- error_setg(err, "Machine-type '%s' not supported by amd-iommu",
- mc->name);
- return;
- }
+ PCMachineState *pcms = PC_MACHINE(ms);
+ PCIBus *bus = pcms->bus;
- bus = pcms->bus;
s->iotlb = g_hash_table_new_full(amdvi_uint64_hash,
amdvi_uint64_equal, g_free, g_free);
x86_iommu->type = TYPE_AMD;
qdev_set_parent_bus(DEVICE(&s->pci), &bus->qbus);
object_property_set_bool(OBJECT(&s->pci), true, "realized", err);
- s->capab_offset = pci_add_capability(&s->pci.dev, AMDVI_CAPAB_ID_SEC, 0,
- AMDVI_CAPAB_SIZE);
- assert(s->capab_offset > 0);
- ret = pci_add_capability(&s->pci.dev, PCI_CAP_ID_MSI, 0, AMDVI_CAPAB_REG_SIZE);
- assert(ret > 0);
- ret = pci_add_capability(&s->pci.dev, PCI_CAP_ID_HT, 0, AMDVI_CAPAB_REG_SIZE);
- assert(ret > 0);
+ ret = pci_add_capability(&s->pci.dev, AMDVI_CAPAB_ID_SEC, 0,
+ AMDVI_CAPAB_SIZE, err);
+ if (ret < 0) {
+ return;
+ }
+ s->capab_offset = ret;
+
+ ret = pci_add_capability(&s->pci.dev, PCI_CAP_ID_MSI, 0,
+ AMDVI_CAPAB_REG_SIZE, err);
+ if (ret < 0) {
+ return;
+ }
+ ret = pci_add_capability(&s->pci.dev, PCI_CAP_ID_HT, 0,
+ AMDVI_CAPAB_REG_SIZE, err);
+ if (ret < 0) {
+ return;
+ }
+
+ /* Pseudo address space under root PCI bus. */
+ pcms->ioapic_as = amdvi_host_dma_iommu(bus, s, AMDVI_IOAPIC_SB_DEVID);
/* set up MMIO */
memory_region_init_io(&s->mmio, OBJECT(s), &mmio_mem_ops, s, "amdvi-mmio",
dc->vmsd = &vmstate_amdvi;
dc->hotpluggable = false;
dc_class->realize = amdvi_realize;
+ dc_class->int_remap = amdvi_int_remap;
+ /* Supported by the pc-q35-* machine types */
+ dc->user_creatable = true;
+ set_bit(DEVICE_CATEGORY_MISC, dc->categories);
+ dc->desc = "AMD IOMMU (AMD-Vi) DMA Remapping device";
}
static const TypeInfo amdvi = {
.name = "AMDVI-PCI",
.parent = TYPE_PCI_DEVICE,
.instance_size = sizeof(AMDVIPCIState),
+ .interfaces = (InterfaceInfo[]) {
+ { INTERFACE_CONVENTIONAL_PCI_DEVICE },
+ { },
+ },
+};
+
+static void amdvi_iommu_memory_region_class_init(ObjectClass *klass, void *data)
+{
+ IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass);
+
+ imrc->translate = amdvi_translate;
+ imrc->notify_flag_changed = amdvi_iommu_notify_flag_changed;
+}
+
+static const TypeInfo amdvi_iommu_memory_region_info = {
+ .parent = TYPE_IOMMU_MEMORY_REGION,
+ .name = TYPE_AMD_IOMMU_MEMORY_REGION,
+ .class_init = amdvi_iommu_memory_region_class_init,
};
static void amdviPCI_register_types(void)
{
type_register_static(&amdviPCI);
type_register_static(&amdvi);
+ type_register_static(&amdvi_iommu_memory_region_info);
}
type_init(amdviPCI_register_types);