2 * QEMU sPAPR IOMMU (TCE) code
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
21 #include "qemu/error-report.h"
24 #include "qemu/module.h"
25 #include "sysemu/kvm.h"
28 #include "migration/vmstate.h"
29 #include "sysemu/dma.h"
30 #include "exec/address-spaces.h"
33 #include "hw/ppc/spapr.h"
34 #include "hw/ppc/spapr_vio.h"
45 #define IOMMU_PAGE_SIZE(shift) (1ULL << (shift))
46 #define IOMMU_PAGE_MASK(shift) (~(IOMMU_PAGE_SIZE(shift) - 1))
48 static QLIST_HEAD(, SpaprTceTable) spapr_tce_tables;
50 SpaprTceTable *spapr_tce_find_by_liobn(target_ulong liobn)
54 if (liobn & 0xFFFFFFFF00000000ULL) {
55 hcall_dprintf("Request for out-of-bounds LIOBN 0x" TARGET_FMT_lx "\n",
60 QLIST_FOREACH(tcet, &spapr_tce_tables, list) {
61 if (tcet->liobn == (uint32_t)liobn) {
69 static IOMMUAccessFlags spapr_tce_iommu_access_flags(uint64_t tce)
71 switch (tce & SPAPR_TCE_RW) {
78 default: /* SPAPR_TCE_RW */
83 static uint64_t *spapr_tce_alloc_table(uint32_t liobn,
90 uint64_t *table = NULL;
93 table = kvmppc_create_spapr_tce(liobn, page_shift, bus_offset, nb_table,
99 table = g_new0(uint64_t, nb_table);
102 trace_spapr_iommu_new_table(liobn, table, *fd);
107 static void spapr_tce_free_table(uint64_t *table, int fd, uint32_t nb_table)
109 if (!kvm_enabled() ||
110 (kvmppc_remove_spapr_tce(table, fd, nb_table) != 0)) {
115 /* Called from RCU critical section */
116 static IOMMUTLBEntry spapr_tce_translate_iommu(IOMMUMemoryRegion *iommu,
118 IOMMUAccessFlags flag,
121 SpaprTceTable *tcet = container_of(iommu, SpaprTceTable, iommu);
123 IOMMUTLBEntry ret = {
124 .target_as = &address_space_memory,
126 .translated_addr = 0,
127 .addr_mask = ~(hwaddr)0,
131 if ((addr >> tcet->page_shift) < tcet->nb_table) {
132 /* Check if we are in bound */
133 hwaddr page_mask = IOMMU_PAGE_MASK(tcet->page_shift);
135 tce = tcet->table[addr >> tcet->page_shift];
136 ret.iova = addr & page_mask;
137 ret.translated_addr = tce & page_mask;
138 ret.addr_mask = ~page_mask;
139 ret.perm = spapr_tce_iommu_access_flags(tce);
141 trace_spapr_iommu_xlate(tcet->liobn, addr, ret.iova, ret.perm,
147 static void spapr_tce_replay(IOMMUMemoryRegion *iommu_mr, IOMMUNotifier *n)
149 MemoryRegion *mr = MEMORY_REGION(iommu_mr);
150 IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_GET_CLASS(iommu_mr);
151 hwaddr addr, granularity;
153 SpaprTceTable *tcet = container_of(iommu_mr, SpaprTceTable, iommu);
155 if (tcet->skipping_replay) {
159 granularity = memory_region_iommu_get_min_page_size(iommu_mr);
161 for (addr = 0; addr < memory_region_size(mr); addr += granularity) {
162 iotlb = imrc->translate(iommu_mr, addr, IOMMU_NONE, n->iommu_idx);
163 if (iotlb.perm != IOMMU_NONE) {
164 n->notify(n, &iotlb);
168 * if (2^64 - MR size) < granularity, it's possible to get an
169 * infinite loop here. This should catch such a wraparound.
171 if ((addr + granularity) < addr) {
177 static int spapr_tce_table_pre_save(void *opaque)
179 SpaprTceTable *tcet = SPAPR_TCE_TABLE(opaque);
181 tcet->mig_table = tcet->table;
182 tcet->mig_nb_table = tcet->nb_table;
184 trace_spapr_iommu_pre_save(tcet->liobn, tcet->mig_nb_table,
185 tcet->bus_offset, tcet->page_shift);
190 static uint64_t spapr_tce_get_min_page_size(IOMMUMemoryRegion *iommu)
192 SpaprTceTable *tcet = container_of(iommu, SpaprTceTable, iommu);
194 return 1ULL << tcet->page_shift;
197 static int spapr_tce_get_attr(IOMMUMemoryRegion *iommu,
198 enum IOMMUMemoryRegionAttr attr, void *data)
200 SpaprTceTable *tcet = container_of(iommu, SpaprTceTable, iommu);
202 if (attr == IOMMU_ATTR_SPAPR_TCE_FD && kvmppc_has_cap_spapr_vfio()) {
203 *(int *) data = tcet->fd;
210 static void spapr_tce_notify_flag_changed(IOMMUMemoryRegion *iommu,
211 IOMMUNotifierFlag old,
212 IOMMUNotifierFlag new)
214 struct SpaprTceTable *tbl = container_of(iommu, SpaprTceTable, iommu);
216 if (old == IOMMU_NOTIFIER_NONE && new != IOMMU_NOTIFIER_NONE) {
217 spapr_tce_set_need_vfio(tbl, true);
218 } else if (old != IOMMU_NOTIFIER_NONE && new == IOMMU_NOTIFIER_NONE) {
219 spapr_tce_set_need_vfio(tbl, false);
223 static int spapr_tce_table_post_load(void *opaque, int version_id)
225 SpaprTceTable *tcet = SPAPR_TCE_TABLE(opaque);
226 uint32_t old_nb_table = tcet->nb_table;
227 uint64_t old_bus_offset = tcet->bus_offset;
228 uint32_t old_page_shift = tcet->page_shift;
231 spapr_vio_set_bypass(tcet->vdev, tcet->bypass);
234 if (tcet->mig_nb_table != tcet->nb_table) {
235 spapr_tce_table_disable(tcet);
238 if (tcet->mig_nb_table) {
239 if (!tcet->nb_table) {
240 spapr_tce_table_enable(tcet, old_page_shift, old_bus_offset,
244 memcpy(tcet->table, tcet->mig_table,
245 tcet->nb_table * sizeof(tcet->table[0]));
247 free(tcet->mig_table);
248 tcet->mig_table = NULL;
251 trace_spapr_iommu_post_load(tcet->liobn, old_nb_table, tcet->nb_table,
252 tcet->bus_offset, tcet->page_shift);
257 static bool spapr_tce_table_ex_needed(void *opaque)
259 SpaprTceTable *tcet = opaque;
261 return tcet->bus_offset || tcet->page_shift != 0xC;
264 static const VMStateDescription vmstate_spapr_tce_table_ex = {
265 .name = "spapr_iommu_ex",
267 .minimum_version_id = 1,
268 .needed = spapr_tce_table_ex_needed,
269 .fields = (VMStateField[]) {
270 VMSTATE_UINT64(bus_offset, SpaprTceTable),
271 VMSTATE_UINT32(page_shift, SpaprTceTable),
272 VMSTATE_END_OF_LIST()
276 static const VMStateDescription vmstate_spapr_tce_table = {
277 .name = "spapr_iommu",
279 .minimum_version_id = 2,
280 .pre_save = spapr_tce_table_pre_save,
281 .post_load = spapr_tce_table_post_load,
282 .fields = (VMStateField []) {
284 VMSTATE_UINT32_EQUAL(liobn, SpaprTceTable, NULL),
287 VMSTATE_UINT32(mig_nb_table, SpaprTceTable),
288 VMSTATE_BOOL(bypass, SpaprTceTable),
289 VMSTATE_VARRAY_UINT32_ALLOC(mig_table, SpaprTceTable, mig_nb_table, 0,
290 vmstate_info_uint64, uint64_t),
292 VMSTATE_END_OF_LIST()
294 .subsections = (const VMStateDescription*[]) {
295 &vmstate_spapr_tce_table_ex,
300 static void spapr_tce_table_realize(DeviceState *dev, Error **errp)
302 SpaprTceTable *tcet = SPAPR_TCE_TABLE(dev);
303 Object *tcetobj = OBJECT(tcet);
307 tcet->need_vfio = false;
308 tmp = g_strdup_printf("tce-root-%x", tcet->liobn);
309 memory_region_init(&tcet->root, tcetobj, tmp, UINT64_MAX);
312 tmp = g_strdup_printf("tce-iommu-%x", tcet->liobn);
313 memory_region_init_iommu(&tcet->iommu, sizeof(tcet->iommu),
314 TYPE_SPAPR_IOMMU_MEMORY_REGION,
318 QLIST_INSERT_HEAD(&spapr_tce_tables, tcet, list);
320 vmstate_register(DEVICE(tcet), tcet->liobn, &vmstate_spapr_tce_table,
324 void spapr_tce_set_need_vfio(SpaprTceTable *tcet, bool need_vfio)
326 size_t table_size = tcet->nb_table * sizeof(uint64_t);
330 g_assert(need_vfio != tcet->need_vfio);
332 tcet->need_vfio = need_vfio;
334 if (!need_vfio || (tcet->fd != -1 && kvmppc_has_cap_spapr_vfio())) {
338 oldtable = tcet->table;
340 tcet->table = spapr_tce_alloc_table(tcet->liobn,
346 memcpy(tcet->table, oldtable, table_size);
348 spapr_tce_free_table(oldtable, tcet->fd, tcet->nb_table);
353 SpaprTceTable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn)
358 if (spapr_tce_find_by_liobn(liobn)) {
359 error_report("Attempted to create TCE table with duplicate"
360 " LIOBN 0x%x", liobn);
364 tcet = SPAPR_TCE_TABLE(object_new(TYPE_SPAPR_TCE_TABLE));
367 tmp = g_strdup_printf("tce-table-%x", liobn);
368 object_property_add_child(OBJECT(owner), tmp, OBJECT(tcet), NULL);
370 object_unref(OBJECT(tcet));
372 object_property_set_bool(OBJECT(tcet), true, "realized", NULL);
377 void spapr_tce_table_enable(SpaprTceTable *tcet,
378 uint32_t page_shift, uint64_t bus_offset,
381 if (tcet->nb_table) {
382 warn_report("trying to enable already enabled TCE table");
386 tcet->bus_offset = bus_offset;
387 tcet->page_shift = page_shift;
388 tcet->nb_table = nb_table;
389 tcet->table = spapr_tce_alloc_table(tcet->liobn,
396 memory_region_set_size(MEMORY_REGION(&tcet->iommu),
397 (uint64_t)tcet->nb_table << tcet->page_shift);
398 memory_region_add_subregion(&tcet->root, tcet->bus_offset,
399 MEMORY_REGION(&tcet->iommu));
402 void spapr_tce_table_disable(SpaprTceTable *tcet)
404 if (!tcet->nb_table) {
408 memory_region_del_subregion(&tcet->root, MEMORY_REGION(&tcet->iommu));
409 memory_region_set_size(MEMORY_REGION(&tcet->iommu), 0);
411 spapr_tce_free_table(tcet->table, tcet->fd, tcet->nb_table);
414 tcet->bus_offset = 0;
415 tcet->page_shift = 0;
419 static void spapr_tce_table_unrealize(DeviceState *dev, Error **errp)
421 SpaprTceTable *tcet = SPAPR_TCE_TABLE(dev);
423 vmstate_unregister(DEVICE(tcet), &vmstate_spapr_tce_table, tcet);
425 QLIST_REMOVE(tcet, list);
427 spapr_tce_table_disable(tcet);
430 MemoryRegion *spapr_tce_get_iommu(SpaprTceTable *tcet)
435 static void spapr_tce_reset(DeviceState *dev)
437 SpaprTceTable *tcet = SPAPR_TCE_TABLE(dev);
438 size_t table_size = tcet->nb_table * sizeof(uint64_t);
440 if (tcet->nb_table) {
441 memset(tcet->table, 0, table_size);
445 static target_ulong put_tce_emu(SpaprTceTable *tcet, target_ulong ioba,
449 hwaddr page_mask = IOMMU_PAGE_MASK(tcet->page_shift);
450 unsigned long index = (ioba - tcet->bus_offset) >> tcet->page_shift;
452 if (index >= tcet->nb_table) {
453 hcall_dprintf("spapr_vio_put_tce on out-of-bounds IOBA 0x"
454 TARGET_FMT_lx "\n", ioba);
458 tcet->table[index] = tce;
460 entry.target_as = &address_space_memory,
461 entry.iova = (ioba - tcet->bus_offset) & page_mask;
462 entry.translated_addr = tce & page_mask;
463 entry.addr_mask = ~page_mask;
464 entry.perm = spapr_tce_iommu_access_flags(tce);
465 memory_region_notify_iommu(&tcet->iommu, 0, entry);
470 static target_ulong h_put_tce_indirect(PowerPCCPU *cpu,
471 SpaprMachineState *spapr,
472 target_ulong opcode, target_ulong *args)
475 target_ulong liobn = args[0];
476 target_ulong ioba = args[1];
477 target_ulong ioba1 = ioba;
478 target_ulong tce_list = args[2];
479 target_ulong npages = args[3];
480 target_ulong ret = H_PARAMETER, tce = 0;
481 SpaprTceTable *tcet = spapr_tce_find_by_liobn(liobn);
482 CPUState *cs = CPU(cpu);
483 hwaddr page_mask, page_size;
489 if ((npages > 512) || (tce_list & SPAPR_TCE_PAGE_MASK)) {
493 page_mask = IOMMU_PAGE_MASK(tcet->page_shift);
494 page_size = IOMMU_PAGE_SIZE(tcet->page_shift);
497 for (i = 0; i < npages; ++i, ioba += page_size) {
498 tce = ldq_be_phys(cs->as, tce_list + i * sizeof(target_ulong));
500 ret = put_tce_emu(tcet, ioba, tce);
506 /* Trace last successful or the first problematic entry */
508 if (SPAPR_IS_PCI_LIOBN(liobn)) {
509 trace_spapr_iommu_pci_indirect(liobn, ioba1, tce_list, i, tce, ret);
511 trace_spapr_iommu_indirect(liobn, ioba1, tce_list, i, tce, ret);
516 static target_ulong h_stuff_tce(PowerPCCPU *cpu, SpaprMachineState *spapr,
517 target_ulong opcode, target_ulong *args)
520 target_ulong liobn = args[0];
521 target_ulong ioba = args[1];
522 target_ulong tce_value = args[2];
523 target_ulong npages = args[3];
524 target_ulong ret = H_PARAMETER;
525 SpaprTceTable *tcet = spapr_tce_find_by_liobn(liobn);
526 hwaddr page_mask, page_size;
532 if (npages > tcet->nb_table) {
536 page_mask = IOMMU_PAGE_MASK(tcet->page_shift);
537 page_size = IOMMU_PAGE_SIZE(tcet->page_shift);
540 for (i = 0; i < npages; ++i, ioba += page_size) {
541 ret = put_tce_emu(tcet, ioba, tce_value);
546 if (SPAPR_IS_PCI_LIOBN(liobn)) {
547 trace_spapr_iommu_pci_stuff(liobn, ioba, tce_value, npages, ret);
549 trace_spapr_iommu_stuff(liobn, ioba, tce_value, npages, ret);
555 static target_ulong h_put_tce(PowerPCCPU *cpu, SpaprMachineState *spapr,
556 target_ulong opcode, target_ulong *args)
558 target_ulong liobn = args[0];
559 target_ulong ioba = args[1];
560 target_ulong tce = args[2];
561 target_ulong ret = H_PARAMETER;
562 SpaprTceTable *tcet = spapr_tce_find_by_liobn(liobn);
565 hwaddr page_mask = IOMMU_PAGE_MASK(tcet->page_shift);
569 ret = put_tce_emu(tcet, ioba, tce);
571 if (SPAPR_IS_PCI_LIOBN(liobn)) {
572 trace_spapr_iommu_pci_put(liobn, ioba, tce, ret);
574 trace_spapr_iommu_put(liobn, ioba, tce, ret);
580 static target_ulong get_tce_emu(SpaprTceTable *tcet, target_ulong ioba,
583 unsigned long index = (ioba - tcet->bus_offset) >> tcet->page_shift;
585 if (index >= tcet->nb_table) {
586 hcall_dprintf("spapr_iommu_get_tce on out-of-bounds IOBA 0x"
587 TARGET_FMT_lx "\n", ioba);
591 *tce = tcet->table[index];
596 static target_ulong h_get_tce(PowerPCCPU *cpu, SpaprMachineState *spapr,
597 target_ulong opcode, target_ulong *args)
599 target_ulong liobn = args[0];
600 target_ulong ioba = args[1];
601 target_ulong tce = 0;
602 target_ulong ret = H_PARAMETER;
603 SpaprTceTable *tcet = spapr_tce_find_by_liobn(liobn);
606 hwaddr page_mask = IOMMU_PAGE_MASK(tcet->page_shift);
610 ret = get_tce_emu(tcet, ioba, &tce);
615 if (SPAPR_IS_PCI_LIOBN(liobn)) {
616 trace_spapr_iommu_pci_get(liobn, ioba, ret, tce);
618 trace_spapr_iommu_get(liobn, ioba, ret, tce);
624 int spapr_dma_dt(void *fdt, int node_off, const char *propname,
625 uint32_t liobn, uint64_t window, uint32_t size)
627 uint32_t dma_prop[5];
630 dma_prop[0] = cpu_to_be32(liobn);
631 dma_prop[1] = cpu_to_be32(window >> 32);
632 dma_prop[2] = cpu_to_be32(window & 0xFFFFFFFF);
633 dma_prop[3] = 0; /* window size is 32 bits */
634 dma_prop[4] = cpu_to_be32(size);
636 ret = fdt_setprop_cell(fdt, node_off, "ibm,#dma-address-cells", 2);
641 ret = fdt_setprop_cell(fdt, node_off, "ibm,#dma-size-cells", 2);
646 ret = fdt_setprop(fdt, node_off, propname, dma_prop, sizeof(dma_prop));
654 int spapr_tcet_dma_dt(void *fdt, int node_off, const char *propname,
661 return spapr_dma_dt(fdt, node_off, propname,
662 tcet->liobn, 0, tcet->nb_table << tcet->page_shift);
665 static void spapr_tce_table_class_init(ObjectClass *klass, void *data)
667 DeviceClass *dc = DEVICE_CLASS(klass);
668 dc->realize = spapr_tce_table_realize;
669 dc->reset = spapr_tce_reset;
670 dc->unrealize = spapr_tce_table_unrealize;
671 /* Reason: This is just an internal device for handling the hypercalls */
672 dc->user_creatable = false;
674 QLIST_INIT(&spapr_tce_tables);
677 spapr_register_hypercall(H_PUT_TCE, h_put_tce);
678 spapr_register_hypercall(H_GET_TCE, h_get_tce);
679 spapr_register_hypercall(H_PUT_TCE_INDIRECT, h_put_tce_indirect);
680 spapr_register_hypercall(H_STUFF_TCE, h_stuff_tce);
683 static TypeInfo spapr_tce_table_info = {
684 .name = TYPE_SPAPR_TCE_TABLE,
685 .parent = TYPE_DEVICE,
686 .instance_size = sizeof(SpaprTceTable),
687 .class_init = spapr_tce_table_class_init,
690 static void spapr_iommu_memory_region_class_init(ObjectClass *klass, void *data)
692 IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass);
694 imrc->translate = spapr_tce_translate_iommu;
695 imrc->replay = spapr_tce_replay;
696 imrc->get_min_page_size = spapr_tce_get_min_page_size;
697 imrc->notify_flag_changed = spapr_tce_notify_flag_changed;
698 imrc->get_attr = spapr_tce_get_attr;
701 static const TypeInfo spapr_iommu_memory_region_info = {
702 .parent = TYPE_IOMMU_MEMORY_REGION,
703 .name = TYPE_SPAPR_IOMMU_MEMORY_REGION,
704 .class_init = spapr_iommu_memory_region_class_init,
707 static void register_types(void)
709 type_register_static(&spapr_tce_table_info);
710 type_register_static(&spapr_iommu_memory_region_info);
713 type_init(register_types);