1 // SPDX-License-Identifier: GPL-2.0-only
3 * Copyright © 2006-2014 Intel Corporation.
13 #define pr_fmt(fmt) "DMAR: " fmt
14 #define dev_fmt(fmt) pr_fmt(fmt)
16 #include <linux/crash_dump.h>
17 #include <linux/dma-direct.h>
18 #include <linux/dma-iommu.h>
19 #include <linux/dmi.h>
20 #include <linux/intel-svm.h>
21 #include <linux/memory.h>
22 #include <linux/pci.h>
23 #include <linux/pci-ats.h>
24 #include <linux/spinlock.h>
25 #include <linux/syscore_ops.h>
26 #include <linux/tboot.h>
29 #include "../irq_remapping.h"
30 #include "../iommu-sva-lib.h"
32 #include "cap_audit.h"
34 #define ROOT_SIZE VTD_PAGE_SIZE
35 #define CONTEXT_SIZE VTD_PAGE_SIZE
37 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
38 #define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
39 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
40 #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
42 #define IOAPIC_RANGE_START (0xfee00000)
43 #define IOAPIC_RANGE_END (0xfeefffff)
44 #define IOVA_START_ADDR (0x1000)
46 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 57
48 #define MAX_AGAW_WIDTH 64
49 #define MAX_AGAW_PFN_WIDTH (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
51 #define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << ((gaw) - VTD_PAGE_SHIFT)) - 1)
52 #define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << (gaw)) - 1)
54 /* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
55 to match. That way, we can use 'unsigned long' for PFNs with impunity. */
56 #define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
57 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
58 #define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
60 /* IO virtual address start page frame number */
61 #define IOVA_START_PFN (1)
63 #define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
65 /* page table handling */
66 #define LEVEL_STRIDE (9)
67 #define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
69 static inline int agaw_to_level(int agaw)
74 static inline int agaw_to_width(int agaw)
76 return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
79 static inline int width_to_agaw(int width)
81 return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
84 static inline unsigned int level_to_offset_bits(int level)
86 return (level - 1) * LEVEL_STRIDE;
89 static inline int pfn_level_offset(u64 pfn, int level)
91 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
94 static inline u64 level_mask(int level)
96 return -1ULL << level_to_offset_bits(level);
99 static inline u64 level_size(int level)
101 return 1ULL << level_to_offset_bits(level);
104 static inline u64 align_to_level(u64 pfn, int level)
106 return (pfn + level_size(level) - 1) & level_mask(level);
109 static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
111 return 1UL << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
114 /* VT-d pages must always be _smaller_ than MM pages. Otherwise things
115 are never going to work. */
116 static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
118 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
120 static inline unsigned long page_to_dma_pfn(struct page *pg)
122 return mm_to_dma_pfn(page_to_pfn(pg));
124 static inline unsigned long virt_to_dma_pfn(void *p)
126 return page_to_dma_pfn(virt_to_page(p));
129 static void __init check_tylersburg_isoch(void);
130 static int rwbf_quirk;
133 * set to 1 to panic kernel if can't successfully enable VT-d
134 * (used when kernel is launched w/ TXT)
136 static int force_on = 0;
137 static int intel_iommu_tboot_noforce;
138 static int no_platform_optin;
140 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
143 * Take a root_entry and return the Lower Context Table Pointer (LCTP)
146 static phys_addr_t root_entry_lctp(struct root_entry *re)
151 return re->lo & VTD_PAGE_MASK;
155 * Take a root_entry and return the Upper Context Table Pointer (UCTP)
158 static phys_addr_t root_entry_uctp(struct root_entry *re)
163 return re->hi & VTD_PAGE_MASK;
166 static inline void context_clear_pasid_enable(struct context_entry *context)
168 context->lo &= ~(1ULL << 11);
171 static inline bool context_pasid_enabled(struct context_entry *context)
173 return !!(context->lo & (1ULL << 11));
176 static inline void context_set_copied(struct context_entry *context)
178 context->hi |= (1ull << 3);
181 static inline bool context_copied(struct context_entry *context)
183 return !!(context->hi & (1ULL << 3));
186 static inline bool __context_present(struct context_entry *context)
188 return (context->lo & 1);
191 bool context_present(struct context_entry *context)
193 return context_pasid_enabled(context) ?
194 __context_present(context) :
195 __context_present(context) && !context_copied(context);
198 static inline void context_set_present(struct context_entry *context)
203 static inline void context_set_fault_enable(struct context_entry *context)
205 context->lo &= (((u64)-1) << 2) | 1;
208 static inline void context_set_translation_type(struct context_entry *context,
211 context->lo &= (((u64)-1) << 4) | 3;
212 context->lo |= (value & 3) << 2;
215 static inline void context_set_address_root(struct context_entry *context,
218 context->lo &= ~VTD_PAGE_MASK;
219 context->lo |= value & VTD_PAGE_MASK;
222 static inline void context_set_address_width(struct context_entry *context,
225 context->hi |= value & 7;
228 static inline void context_set_domain_id(struct context_entry *context,
231 context->hi |= (value & ((1 << 16) - 1)) << 8;
234 static inline int context_domain_id(struct context_entry *c)
236 return((c->hi >> 8) & 0xffff);
239 static inline void context_clear_entry(struct context_entry *context)
246 * This domain is a statically identity mapping domain.
247 * 1. This domain creats a static 1:1 mapping to all usable memory.
248 * 2. It maps to each iommu if successful.
249 * 3. Each iommu mapps to this domain if successful.
251 static struct dmar_domain *si_domain;
252 static int hw_pass_through = 1;
254 struct dmar_rmrr_unit {
255 struct list_head list; /* list of rmrr units */
256 struct acpi_dmar_header *hdr; /* ACPI header */
257 u64 base_address; /* reserved base address*/
258 u64 end_address; /* reserved end address */
259 struct dmar_dev_scope *devices; /* target devices */
260 int devices_cnt; /* target device count */
263 struct dmar_atsr_unit {
264 struct list_head list; /* list of ATSR units */
265 struct acpi_dmar_header *hdr; /* ACPI header */
266 struct dmar_dev_scope *devices; /* target devices */
267 int devices_cnt; /* target device count */
268 u8 include_all:1; /* include all ports */
271 struct dmar_satc_unit {
272 struct list_head list; /* list of SATC units */
273 struct acpi_dmar_header *hdr; /* ACPI header */
274 struct dmar_dev_scope *devices; /* target devices */
275 struct intel_iommu *iommu; /* the corresponding iommu */
276 int devices_cnt; /* target device count */
277 u8 atc_required:1; /* ATS is required */
280 static LIST_HEAD(dmar_atsr_units);
281 static LIST_HEAD(dmar_rmrr_units);
282 static LIST_HEAD(dmar_satc_units);
284 #define for_each_rmrr_units(rmrr) \
285 list_for_each_entry(rmrr, &dmar_rmrr_units, list)
287 static void dmar_remove_one_dev_info(struct device *dev);
289 int dmar_disabled = !IS_ENABLED(CONFIG_INTEL_IOMMU_DEFAULT_ON);
290 int intel_iommu_sm = IS_ENABLED(CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON);
292 int intel_iommu_enabled = 0;
293 EXPORT_SYMBOL_GPL(intel_iommu_enabled);
295 static int dmar_map_gfx = 1;
296 static int intel_iommu_superpage = 1;
297 static int iommu_identity_mapping;
298 static int iommu_skip_te_disable;
300 #define IDENTMAP_GFX 2
301 #define IDENTMAP_AZALIA 4
303 const struct iommu_ops intel_iommu_ops;
305 static bool translation_pre_enabled(struct intel_iommu *iommu)
307 return (iommu->flags & VTD_FLAG_TRANS_PRE_ENABLED);
310 static void clear_translation_pre_enabled(struct intel_iommu *iommu)
312 iommu->flags &= ~VTD_FLAG_TRANS_PRE_ENABLED;
315 static void init_translation_status(struct intel_iommu *iommu)
319 gsts = readl(iommu->reg + DMAR_GSTS_REG);
320 if (gsts & DMA_GSTS_TES)
321 iommu->flags |= VTD_FLAG_TRANS_PRE_ENABLED;
324 static int __init intel_iommu_setup(char *str)
330 if (!strncmp(str, "on", 2)) {
332 pr_info("IOMMU enabled\n");
333 } else if (!strncmp(str, "off", 3)) {
335 no_platform_optin = 1;
336 pr_info("IOMMU disabled\n");
337 } else if (!strncmp(str, "igfx_off", 8)) {
339 pr_info("Disable GFX device mapping\n");
340 } else if (!strncmp(str, "forcedac", 8)) {
341 pr_warn("intel_iommu=forcedac deprecated; use iommu.forcedac instead\n");
342 iommu_dma_forcedac = true;
343 } else if (!strncmp(str, "strict", 6)) {
344 pr_warn("intel_iommu=strict deprecated; use iommu.strict=1 instead\n");
345 iommu_set_dma_strict();
346 } else if (!strncmp(str, "sp_off", 6)) {
347 pr_info("Disable supported super page\n");
348 intel_iommu_superpage = 0;
349 } else if (!strncmp(str, "sm_on", 5)) {
350 pr_info("Enable scalable mode if hardware supports\n");
352 } else if (!strncmp(str, "sm_off", 6)) {
353 pr_info("Scalable mode is disallowed\n");
355 } else if (!strncmp(str, "tboot_noforce", 13)) {
356 pr_info("Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n");
357 intel_iommu_tboot_noforce = 1;
359 pr_notice("Unknown option - '%s'\n", str);
362 str += strcspn(str, ",");
369 __setup("intel_iommu=", intel_iommu_setup);
371 void *alloc_pgtable_page(int node)
376 page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
378 vaddr = page_address(page);
382 void free_pgtable_page(void *vaddr)
384 free_page((unsigned long)vaddr);
387 static inline int domain_type_is_si(struct dmar_domain *domain)
389 return domain->domain.type == IOMMU_DOMAIN_IDENTITY;
392 static inline bool domain_use_first_level(struct dmar_domain *domain)
394 return domain->flags & DOMAIN_FLAG_USE_FIRST_LEVEL;
397 static inline int domain_pfn_supported(struct dmar_domain *domain,
400 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
402 return !(addr_width < BITS_PER_LONG && pfn >> addr_width);
405 static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
410 sagaw = cap_sagaw(iommu->cap);
411 for (agaw = width_to_agaw(max_gaw);
413 if (test_bit(agaw, &sagaw))
421 * Calculate max SAGAW for each iommu.
423 int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
425 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
429 * calculate agaw for each iommu.
430 * "SAGAW" may be different across iommus, use a default agaw, and
431 * get a supported less agaw for iommus that don't support the default agaw.
433 int iommu_calculate_agaw(struct intel_iommu *iommu)
435 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
438 static inline bool iommu_paging_structure_coherency(struct intel_iommu *iommu)
440 return sm_supported(iommu) ?
441 ecap_smpwc(iommu->ecap) : ecap_coherent(iommu->ecap);
444 static void domain_update_iommu_coherency(struct dmar_domain *domain)
446 struct iommu_domain_info *info;
447 struct dmar_drhd_unit *drhd;
448 struct intel_iommu *iommu;
452 domain->iommu_coherency = true;
453 xa_for_each(&domain->iommu_array, i, info) {
455 if (!iommu_paging_structure_coherency(info->iommu)) {
456 domain->iommu_coherency = false;
463 /* No hardware attached; use lowest common denominator */
465 for_each_active_iommu(iommu, drhd) {
466 if (!iommu_paging_structure_coherency(iommu)) {
467 domain->iommu_coherency = false;
474 static int domain_update_iommu_superpage(struct dmar_domain *domain,
475 struct intel_iommu *skip)
477 struct dmar_drhd_unit *drhd;
478 struct intel_iommu *iommu;
481 if (!intel_iommu_superpage)
484 /* set iommu_superpage to the smallest common denominator */
486 for_each_active_iommu(iommu, drhd) {
488 if (domain && domain_use_first_level(domain)) {
489 if (!cap_fl1gp_support(iommu->cap))
492 mask &= cap_super_page_val(iommu->cap);
504 static int domain_update_device_node(struct dmar_domain *domain)
506 struct device_domain_info *info;
507 int nid = NUMA_NO_NODE;
509 spin_lock(&domain->lock);
510 list_for_each_entry(info, &domain->devices, link) {
512 * There could possibly be multiple device numa nodes as devices
513 * within the same domain may sit behind different IOMMUs. There
514 * isn't perfect answer in such situation, so we select first
515 * come first served policy.
517 nid = dev_to_node(info->dev);
518 if (nid != NUMA_NO_NODE)
521 spin_unlock(&domain->lock);
526 static void domain_update_iotlb(struct dmar_domain *domain);
528 /* Return the super pagesize bitmap if supported. */
529 static unsigned long domain_super_pgsize_bitmap(struct dmar_domain *domain)
531 unsigned long bitmap = 0;
534 * 1-level super page supports page size of 2MiB, 2-level super page
535 * supports page size of both 2MiB and 1GiB.
537 if (domain->iommu_superpage == 1)
539 else if (domain->iommu_superpage == 2)
540 bitmap |= SZ_2M | SZ_1G;
545 /* Some capabilities may be different across iommus */
546 static void domain_update_iommu_cap(struct dmar_domain *domain)
548 domain_update_iommu_coherency(domain);
549 domain->iommu_superpage = domain_update_iommu_superpage(domain, NULL);
552 * If RHSA is missing, we should default to the device numa domain
555 if (domain->nid == NUMA_NO_NODE)
556 domain->nid = domain_update_device_node(domain);
559 * First-level translation restricts the input-address to a
560 * canonical address (i.e., address bits 63:N have the same
561 * value as address bit [N-1], where N is 48-bits with 4-level
562 * paging and 57-bits with 5-level paging). Hence, skip bit
565 if (domain_use_first_level(domain))
566 domain->domain.geometry.aperture_end = __DOMAIN_MAX_ADDR(domain->gaw - 1);
568 domain->domain.geometry.aperture_end = __DOMAIN_MAX_ADDR(domain->gaw);
570 domain->domain.pgsize_bitmap |= domain_super_pgsize_bitmap(domain);
571 domain_update_iotlb(domain);
574 struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
577 struct root_entry *root = &iommu->root_entry[bus];
578 struct context_entry *context;
582 if (sm_supported(iommu)) {
590 context = phys_to_virt(*entry & VTD_PAGE_MASK);
592 unsigned long phy_addr;
596 context = alloc_pgtable_page(iommu->node);
600 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
601 phy_addr = virt_to_phys((void *)context);
602 *entry = phy_addr | 1;
603 __iommu_flush_cache(iommu, entry, sizeof(*entry));
605 return &context[devfn];
609 * is_downstream_to_pci_bridge - test if a device belongs to the PCI
610 * sub-hierarchy of a candidate PCI-PCI bridge
611 * @dev: candidate PCI device belonging to @bridge PCI sub-hierarchy
612 * @bridge: the candidate PCI-PCI bridge
614 * Return: true if @dev belongs to @bridge PCI sub-hierarchy, else false.
617 is_downstream_to_pci_bridge(struct device *dev, struct device *bridge)
619 struct pci_dev *pdev, *pbridge;
621 if (!dev_is_pci(dev) || !dev_is_pci(bridge))
624 pdev = to_pci_dev(dev);
625 pbridge = to_pci_dev(bridge);
627 if (pbridge->subordinate &&
628 pbridge->subordinate->number <= pdev->bus->number &&
629 pbridge->subordinate->busn_res.end >= pdev->bus->number)
635 static bool quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
637 struct dmar_drhd_unit *drhd;
641 /* We know that this device on this chipset has its own IOMMU.
642 * If we find it under a different IOMMU, then the BIOS is lying
643 * to us. Hope that the IOMMU for this device is actually
644 * disabled, and it needs no translation...
646 rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
649 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
654 /* we know that the this iommu should be at offset 0xa000 from vtbar */
655 drhd = dmar_find_matched_drhd_unit(pdev);
656 if (!drhd || drhd->reg_base_addr - vtbar != 0xa000) {
657 pr_warn_once(FW_BUG "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n");
658 add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
665 static bool iommu_is_dummy(struct intel_iommu *iommu, struct device *dev)
667 if (!iommu || iommu->drhd->ignored)
670 if (dev_is_pci(dev)) {
671 struct pci_dev *pdev = to_pci_dev(dev);
673 if (pdev->vendor == PCI_VENDOR_ID_INTEL &&
674 pdev->device == PCI_DEVICE_ID_INTEL_IOAT_SNB &&
675 quirk_ioat_snb_local_iommu(pdev))
682 struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
684 struct dmar_drhd_unit *drhd = NULL;
685 struct pci_dev *pdev = NULL;
686 struct intel_iommu *iommu;
694 if (dev_is_pci(dev)) {
695 struct pci_dev *pf_pdev;
697 pdev = pci_real_dma_dev(to_pci_dev(dev));
699 /* VFs aren't listed in scope tables; we need to look up
700 * the PF instead to find the IOMMU. */
701 pf_pdev = pci_physfn(pdev);
703 segment = pci_domain_nr(pdev->bus);
704 } else if (has_acpi_companion(dev))
705 dev = &ACPI_COMPANION(dev)->dev;
708 for_each_iommu(iommu, drhd) {
709 if (pdev && segment != drhd->segment)
712 for_each_active_dev_scope(drhd->devices,
713 drhd->devices_cnt, i, tmp) {
715 /* For a VF use its original BDF# not that of the PF
716 * which we used for the IOMMU lookup. Strictly speaking
717 * we could do this for all PCI devices; we only need to
718 * get the BDF# from the scope table for ACPI matches. */
719 if (pdev && pdev->is_virtfn)
723 *bus = drhd->devices[i].bus;
724 *devfn = drhd->devices[i].devfn;
729 if (is_downstream_to_pci_bridge(dev, tmp))
733 if (pdev && drhd->include_all) {
736 *bus = pdev->bus->number;
737 *devfn = pdev->devfn;
744 if (iommu_is_dummy(iommu, dev))
752 static void domain_flush_cache(struct dmar_domain *domain,
753 void *addr, int size)
755 if (!domain->iommu_coherency)
756 clflush_cache_range(addr, size);
759 static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
761 struct context_entry *context;
764 spin_lock(&iommu->lock);
765 context = iommu_context_addr(iommu, bus, devfn, 0);
767 ret = context_present(context);
768 spin_unlock(&iommu->lock);
772 static void free_context_table(struct intel_iommu *iommu)
774 struct context_entry *context;
777 if (!iommu->root_entry)
780 for (i = 0; i < ROOT_ENTRY_NR; i++) {
781 context = iommu_context_addr(iommu, i, 0, 0);
783 free_pgtable_page(context);
785 if (!sm_supported(iommu))
788 context = iommu_context_addr(iommu, i, 0x80, 0);
790 free_pgtable_page(context);
793 free_pgtable_page(iommu->root_entry);
794 iommu->root_entry = NULL;
797 #ifdef CONFIG_DMAR_DEBUG
798 static void pgtable_walk(struct intel_iommu *iommu, unsigned long pfn, u8 bus, u8 devfn)
800 struct device_domain_info *info;
801 struct dma_pte *parent, *pte;
802 struct dmar_domain *domain;
803 struct pci_dev *pdev;
806 pdev = pci_get_domain_bus_and_slot(iommu->segment, bus, devfn);
810 info = dev_iommu_priv_get(&pdev->dev);
811 if (!info || !info->domain) {
812 pr_info("device [%02x:%02x.%d] not probed\n",
813 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
817 domain = info->domain;
818 level = agaw_to_level(domain->agaw);
819 parent = domain->pgd;
821 pr_info("no page table setup\n");
826 offset = pfn_level_offset(pfn, level);
827 pte = &parent[offset];
828 if (!pte || (dma_pte_superpage(pte) || !dma_pte_present(pte))) {
829 pr_info("PTE not present at level %d\n", level);
833 pr_info("pte level: %d, pte value: 0x%016llx\n", level, pte->val);
838 parent = phys_to_virt(dma_pte_addr(pte));
843 void dmar_fault_dump_ptes(struct intel_iommu *iommu, u16 source_id,
844 unsigned long long addr, u32 pasid)
846 struct pasid_dir_entry *dir, *pde;
847 struct pasid_entry *entries, *pte;
848 struct context_entry *ctx_entry;
849 struct root_entry *rt_entry;
850 u8 devfn = source_id & 0xff;
851 u8 bus = source_id >> 8;
852 int i, dir_index, index;
854 pr_info("Dump %s table entries for IOVA 0x%llx\n", iommu->name, addr);
856 /* root entry dump */
857 rt_entry = &iommu->root_entry[bus];
859 pr_info("root table entry is not present\n");
863 if (sm_supported(iommu))
864 pr_info("scalable mode root entry: hi 0x%016llx, low 0x%016llx\n",
865 rt_entry->hi, rt_entry->lo);
867 pr_info("root entry: 0x%016llx", rt_entry->lo);
869 /* context entry dump */
870 ctx_entry = iommu_context_addr(iommu, bus, devfn, 0);
872 pr_info("context table entry is not present\n");
876 pr_info("context entry: hi 0x%016llx, low 0x%016llx\n",
877 ctx_entry->hi, ctx_entry->lo);
879 /* legacy mode does not require PASID entries */
880 if (!sm_supported(iommu))
883 /* get the pointer to pasid directory entry */
884 dir = phys_to_virt(ctx_entry->lo & VTD_PAGE_MASK);
886 pr_info("pasid directory entry is not present\n");
889 /* For request-without-pasid, get the pasid from context entry */
890 if (intel_iommu_sm && pasid == INVALID_IOASID)
891 pasid = PASID_RID2PASID;
893 dir_index = pasid >> PASID_PDE_SHIFT;
894 pde = &dir[dir_index];
895 pr_info("pasid dir entry: 0x%016llx\n", pde->val);
897 /* get the pointer to the pasid table entry */
898 entries = get_pasid_table_from_pde(pde);
900 pr_info("pasid table entry is not present\n");
903 index = pasid & PASID_PTE_MASK;
904 pte = &entries[index];
905 for (i = 0; i < ARRAY_SIZE(pte->val); i++)
906 pr_info("pasid table entry[%d]: 0x%016llx\n", i, pte->val[i]);
909 pgtable_walk(iommu, addr >> VTD_PAGE_SHIFT, bus, devfn);
913 static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
914 unsigned long pfn, int *target_level)
916 struct dma_pte *parent, *pte;
917 int level = agaw_to_level(domain->agaw);
920 BUG_ON(!domain->pgd);
922 if (!domain_pfn_supported(domain, pfn))
923 /* Address beyond IOMMU's addressing capabilities. */
926 parent = domain->pgd;
931 offset = pfn_level_offset(pfn, level);
932 pte = &parent[offset];
933 if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
935 if (level == *target_level)
938 if (!dma_pte_present(pte)) {
941 tmp_page = alloc_pgtable_page(domain->nid);
946 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
947 pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
948 if (domain_use_first_level(domain)) {
949 pteval |= DMA_FL_PTE_XD | DMA_FL_PTE_US;
950 if (iommu_is_dma_domain(&domain->domain))
951 pteval |= DMA_FL_PTE_ACCESS;
953 if (cmpxchg64(&pte->val, 0ULL, pteval))
954 /* Someone else set it while we were thinking; use theirs. */
955 free_pgtable_page(tmp_page);
957 domain_flush_cache(domain, pte, sizeof(*pte));
962 parent = phys_to_virt(dma_pte_addr(pte));
967 *target_level = level;
972 /* return address's pte at specific level */
973 static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
975 int level, int *large_page)
977 struct dma_pte *parent, *pte;
978 int total = agaw_to_level(domain->agaw);
981 parent = domain->pgd;
982 while (level <= total) {
983 offset = pfn_level_offset(pfn, total);
984 pte = &parent[offset];
988 if (!dma_pte_present(pte)) {
993 if (dma_pte_superpage(pte)) {
998 parent = phys_to_virt(dma_pte_addr(pte));
1004 /* clear last level pte, a tlb flush should be followed */
1005 static void dma_pte_clear_range(struct dmar_domain *domain,
1006 unsigned long start_pfn,
1007 unsigned long last_pfn)
1009 unsigned int large_page;
1010 struct dma_pte *first_pte, *pte;
1012 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1013 BUG_ON(!domain_pfn_supported(domain, last_pfn));
1014 BUG_ON(start_pfn > last_pfn);
1016 /* we don't need lock here; nobody else touches the iova range */
1019 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
1021 start_pfn = align_to_level(start_pfn + 1, large_page + 1);
1026 start_pfn += lvl_to_nr_pages(large_page);
1028 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
1030 domain_flush_cache(domain, first_pte,
1031 (void *)pte - (void *)first_pte);
1033 } while (start_pfn && start_pfn <= last_pfn);
1036 static void dma_pte_free_level(struct dmar_domain *domain, int level,
1037 int retain_level, struct dma_pte *pte,
1038 unsigned long pfn, unsigned long start_pfn,
1039 unsigned long last_pfn)
1041 pfn = max(start_pfn, pfn);
1042 pte = &pte[pfn_level_offset(pfn, level)];
1045 unsigned long level_pfn;
1046 struct dma_pte *level_pte;
1048 if (!dma_pte_present(pte) || dma_pte_superpage(pte))
1051 level_pfn = pfn & level_mask(level);
1052 level_pte = phys_to_virt(dma_pte_addr(pte));
1055 dma_pte_free_level(domain, level - 1, retain_level,
1056 level_pte, level_pfn, start_pfn,
1061 * Free the page table if we're below the level we want to
1062 * retain and the range covers the entire table.
1064 if (level < retain_level && !(start_pfn > level_pfn ||
1065 last_pfn < level_pfn + level_size(level) - 1)) {
1067 domain_flush_cache(domain, pte, sizeof(*pte));
1068 free_pgtable_page(level_pte);
1071 pfn += level_size(level);
1072 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1076 * clear last level (leaf) ptes and free page table pages below the
1077 * level we wish to keep intact.
1079 static void dma_pte_free_pagetable(struct dmar_domain *domain,
1080 unsigned long start_pfn,
1081 unsigned long last_pfn,
1084 dma_pte_clear_range(domain, start_pfn, last_pfn);
1086 /* We don't need lock here; nobody else touches the iova range */
1087 dma_pte_free_level(domain, agaw_to_level(domain->agaw), retain_level,
1088 domain->pgd, 0, start_pfn, last_pfn);
1091 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1092 free_pgtable_page(domain->pgd);
1097 /* When a page at a given level is being unlinked from its parent, we don't
1098 need to *modify* it at all. All we need to do is make a list of all the
1099 pages which can be freed just as soon as we've flushed the IOTLB and we
1100 know the hardware page-walk will no longer touch them.
1101 The 'pte' argument is the *parent* PTE, pointing to the page that is to
1103 static void dma_pte_list_pagetables(struct dmar_domain *domain,
1104 int level, struct dma_pte *pte,
1105 struct list_head *freelist)
1109 pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
1110 list_add_tail(&pg->lru, freelist);
1115 pte = page_address(pg);
1117 if (dma_pte_present(pte) && !dma_pte_superpage(pte))
1118 dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1120 } while (!first_pte_in_page(pte));
1123 static void dma_pte_clear_level(struct dmar_domain *domain, int level,
1124 struct dma_pte *pte, unsigned long pfn,
1125 unsigned long start_pfn, unsigned long last_pfn,
1126 struct list_head *freelist)
1128 struct dma_pte *first_pte = NULL, *last_pte = NULL;
1130 pfn = max(start_pfn, pfn);
1131 pte = &pte[pfn_level_offset(pfn, level)];
1134 unsigned long level_pfn = pfn & level_mask(level);
1136 if (!dma_pte_present(pte))
1139 /* If range covers entire pagetable, free it */
1140 if (start_pfn <= level_pfn &&
1141 last_pfn >= level_pfn + level_size(level) - 1) {
1142 /* These suborbinate page tables are going away entirely. Don't
1143 bother to clear them; we're just going to *free* them. */
1144 if (level > 1 && !dma_pte_superpage(pte))
1145 dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1151 } else if (level > 1) {
1152 /* Recurse down into a level that isn't *entirely* obsolete */
1153 dma_pte_clear_level(domain, level - 1,
1154 phys_to_virt(dma_pte_addr(pte)),
1155 level_pfn, start_pfn, last_pfn,
1159 pfn = level_pfn + level_size(level);
1160 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1163 domain_flush_cache(domain, first_pte,
1164 (void *)++last_pte - (void *)first_pte);
1167 /* We can't just free the pages because the IOMMU may still be walking
1168 the page tables, and may have cached the intermediate levels. The
1169 pages can only be freed after the IOTLB flush has been done. */
1170 static void domain_unmap(struct dmar_domain *domain, unsigned long start_pfn,
1171 unsigned long last_pfn, struct list_head *freelist)
1173 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1174 BUG_ON(!domain_pfn_supported(domain, last_pfn));
1175 BUG_ON(start_pfn > last_pfn);
1177 /* we don't need lock here; nobody else touches the iova range */
1178 dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
1179 domain->pgd, 0, start_pfn, last_pfn, freelist);
1182 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1183 struct page *pgd_page = virt_to_page(domain->pgd);
1184 list_add_tail(&pgd_page->lru, freelist);
1189 /* iommu handling */
1190 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
1192 struct root_entry *root;
1194 root = (struct root_entry *)alloc_pgtable_page(iommu->node);
1196 pr_err("Allocating root entry for %s failed\n",
1201 __iommu_flush_cache(iommu, root, ROOT_SIZE);
1202 iommu->root_entry = root;
1207 static void iommu_set_root_entry(struct intel_iommu *iommu)
1213 addr = virt_to_phys(iommu->root_entry);
1214 if (sm_supported(iommu))
1215 addr |= DMA_RTADDR_SMT;
1217 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1218 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr);
1220 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
1222 /* Make sure hardware complete it */
1223 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1224 readl, (sts & DMA_GSTS_RTPS), sts);
1226 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1228 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
1229 if (sm_supported(iommu))
1230 qi_flush_pasid_cache(iommu, 0, QI_PC_GLOBAL, 0);
1231 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
1234 void iommu_flush_write_buffer(struct intel_iommu *iommu)
1239 if (!rwbf_quirk && !cap_rwbf(iommu->cap))
1242 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1243 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
1245 /* Make sure hardware complete it */
1246 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1247 readl, (!(val & DMA_GSTS_WBFS)), val);
1249 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1252 /* return value determine if we need a write buffer flush */
1253 static void __iommu_flush_context(struct intel_iommu *iommu,
1254 u16 did, u16 source_id, u8 function_mask,
1261 case DMA_CCMD_GLOBAL_INVL:
1262 val = DMA_CCMD_GLOBAL_INVL;
1264 case DMA_CCMD_DOMAIN_INVL:
1265 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1267 case DMA_CCMD_DEVICE_INVL:
1268 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1269 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1274 val |= DMA_CCMD_ICC;
1276 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1277 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1279 /* Make sure hardware complete it */
1280 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1281 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1283 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1286 /* return value determine if we need a write buffer flush */
1287 static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1288 u64 addr, unsigned int size_order, u64 type)
1290 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1291 u64 val = 0, val_iva = 0;
1295 case DMA_TLB_GLOBAL_FLUSH:
1296 /* global flush doesn't need set IVA_REG */
1297 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1299 case DMA_TLB_DSI_FLUSH:
1300 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1302 case DMA_TLB_PSI_FLUSH:
1303 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1304 /* IH bit is passed in as part of address */
1305 val_iva = size_order | addr;
1310 /* Note: set drain read/write */
1313 * This is probably to be super secure.. Looks like we can
1314 * ignore it without any impact.
1316 if (cap_read_drain(iommu->cap))
1317 val |= DMA_TLB_READ_DRAIN;
1319 if (cap_write_drain(iommu->cap))
1320 val |= DMA_TLB_WRITE_DRAIN;
1322 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1323 /* Note: Only uses first TLB reg currently */
1325 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1326 dmar_writeq(iommu->reg + tlb_offset + 8, val);
1328 /* Make sure hardware complete it */
1329 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1330 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1332 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1334 /* check IOTLB invalidation granularity */
1335 if (DMA_TLB_IAIG(val) == 0)
1336 pr_err("Flush IOTLB failed\n");
1337 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1338 pr_debug("TLB flush request %Lx, actual %Lx\n",
1339 (unsigned long long)DMA_TLB_IIRG(type),
1340 (unsigned long long)DMA_TLB_IAIG(val));
1343 static struct device_domain_info *
1344 iommu_support_dev_iotlb(struct dmar_domain *domain, struct intel_iommu *iommu,
1347 struct device_domain_info *info;
1352 spin_lock(&domain->lock);
1353 list_for_each_entry(info, &domain->devices, link) {
1354 if (info->iommu == iommu && info->bus == bus &&
1355 info->devfn == devfn) {
1356 spin_unlock(&domain->lock);
1357 return info->ats_supported ? info : NULL;
1360 spin_unlock(&domain->lock);
1365 static void domain_update_iotlb(struct dmar_domain *domain)
1367 struct device_domain_info *info;
1368 bool has_iotlb_device = false;
1370 spin_lock(&domain->lock);
1371 list_for_each_entry(info, &domain->devices, link) {
1372 if (info->ats_enabled) {
1373 has_iotlb_device = true;
1377 domain->has_iotlb_device = has_iotlb_device;
1378 spin_unlock(&domain->lock);
1381 static void iommu_enable_dev_iotlb(struct device_domain_info *info)
1383 struct pci_dev *pdev;
1385 if (!info || !dev_is_pci(info->dev))
1388 pdev = to_pci_dev(info->dev);
1389 /* For IOMMU that supports device IOTLB throttling (DIT), we assign
1390 * PFSID to the invalidation desc of a VF such that IOMMU HW can gauge
1391 * queue depth at PF level. If DIT is not set, PFSID will be treated as
1392 * reserved, which should be set to 0.
1394 if (!ecap_dit(info->iommu->ecap))
1397 struct pci_dev *pf_pdev;
1399 /* pdev will be returned if device is not a vf */
1400 pf_pdev = pci_physfn(pdev);
1401 info->pfsid = pci_dev_id(pf_pdev);
1404 #ifdef CONFIG_INTEL_IOMMU_SVM
1405 /* The PCIe spec, in its wisdom, declares that the behaviour of
1406 the device if you enable PASID support after ATS support is
1407 undefined. So always enable PASID support on devices which
1408 have it, even if we can't yet know if we're ever going to
1410 if (info->pasid_supported && !pci_enable_pasid(pdev, info->pasid_supported & ~1))
1411 info->pasid_enabled = 1;
1413 if (info->pri_supported &&
1414 (info->pasid_enabled ? pci_prg_resp_pasid_required(pdev) : 1) &&
1415 !pci_reset_pri(pdev) && !pci_enable_pri(pdev, PRQ_DEPTH))
1416 info->pri_enabled = 1;
1418 if (info->ats_supported && pci_ats_page_aligned(pdev) &&
1419 !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) {
1420 info->ats_enabled = 1;
1421 domain_update_iotlb(info->domain);
1422 info->ats_qdep = pci_ats_queue_depth(pdev);
1426 static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1428 struct pci_dev *pdev;
1430 if (!dev_is_pci(info->dev))
1433 pdev = to_pci_dev(info->dev);
1435 if (info->ats_enabled) {
1436 pci_disable_ats(pdev);
1437 info->ats_enabled = 0;
1438 domain_update_iotlb(info->domain);
1440 #ifdef CONFIG_INTEL_IOMMU_SVM
1441 if (info->pri_enabled) {
1442 pci_disable_pri(pdev);
1443 info->pri_enabled = 0;
1445 if (info->pasid_enabled) {
1446 pci_disable_pasid(pdev);
1447 info->pasid_enabled = 0;
1452 static void __iommu_flush_dev_iotlb(struct device_domain_info *info,
1453 u64 addr, unsigned int mask)
1457 if (!info || !info->ats_enabled)
1460 sid = info->bus << 8 | info->devfn;
1461 qdep = info->ats_qdep;
1462 qi_flush_dev_iotlb(info->iommu, sid, info->pfsid,
1466 static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1467 u64 addr, unsigned mask)
1469 struct device_domain_info *info;
1471 if (!domain->has_iotlb_device)
1474 spin_lock(&domain->lock);
1475 list_for_each_entry(info, &domain->devices, link)
1476 __iommu_flush_dev_iotlb(info, addr, mask);
1477 spin_unlock(&domain->lock);
1480 static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
1481 struct dmar_domain *domain,
1482 unsigned long pfn, unsigned int pages,
1485 unsigned int aligned_pages = __roundup_pow_of_two(pages);
1486 unsigned int mask = ilog2(aligned_pages);
1487 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
1488 u16 did = domain_id_iommu(domain, iommu);
1495 if (domain_use_first_level(domain)) {
1496 qi_flush_piotlb(iommu, did, PASID_RID2PASID, addr, pages, ih);
1498 unsigned long bitmask = aligned_pages - 1;
1501 * PSI masks the low order bits of the base address. If the
1502 * address isn't aligned to the mask, then compute a mask value
1503 * needed to ensure the target range is flushed.
1505 if (unlikely(bitmask & pfn)) {
1506 unsigned long end_pfn = pfn + pages - 1, shared_bits;
1509 * Since end_pfn <= pfn + bitmask, the only way bits
1510 * higher than bitmask can differ in pfn and end_pfn is
1511 * by carrying. This means after masking out bitmask,
1512 * high bits starting with the first set bit in
1513 * shared_bits are all equal in both pfn and end_pfn.
1515 shared_bits = ~(pfn ^ end_pfn) & ~bitmask;
1516 mask = shared_bits ? __ffs(shared_bits) : BITS_PER_LONG;
1520 * Fallback to domain selective flush if no PSI support or
1521 * the size is too big.
1523 if (!cap_pgsel_inv(iommu->cap) ||
1524 mask > cap_max_amask_val(iommu->cap))
1525 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1528 iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
1533 * In caching mode, changes of pages from non-present to present require
1534 * flush. However, device IOTLB doesn't need to be flushed in this case.
1536 if (!cap_caching_mode(iommu->cap) || !map)
1537 iommu_flush_dev_iotlb(domain, addr, mask);
1540 /* Notification for newly created mappings */
1541 static inline void __mapping_notify_one(struct intel_iommu *iommu,
1542 struct dmar_domain *domain,
1543 unsigned long pfn, unsigned int pages)
1546 * It's a non-present to present mapping. Only flush if caching mode
1549 if (cap_caching_mode(iommu->cap) && !domain_use_first_level(domain))
1550 iommu_flush_iotlb_psi(iommu, domain, pfn, pages, 0, 1);
1552 iommu_flush_write_buffer(iommu);
1555 static void intel_flush_iotlb_all(struct iommu_domain *domain)
1557 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
1558 struct iommu_domain_info *info;
1561 xa_for_each(&dmar_domain->iommu_array, idx, info) {
1562 struct intel_iommu *iommu = info->iommu;
1563 u16 did = domain_id_iommu(dmar_domain, iommu);
1565 if (domain_use_first_level(dmar_domain))
1566 qi_flush_piotlb(iommu, did, PASID_RID2PASID, 0, -1, 0);
1568 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1571 if (!cap_caching_mode(iommu->cap))
1572 iommu_flush_dev_iotlb(dmar_domain, 0, MAX_AGAW_PFN_WIDTH);
1576 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1579 unsigned long flags;
1581 if (!cap_plmr(iommu->cap) && !cap_phmr(iommu->cap))
1584 raw_spin_lock_irqsave(&iommu->register_lock, flags);
1585 pmen = readl(iommu->reg + DMAR_PMEN_REG);
1586 pmen &= ~DMA_PMEN_EPM;
1587 writel(pmen, iommu->reg + DMAR_PMEN_REG);
1589 /* wait for the protected region status bit to clear */
1590 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1591 readl, !(pmen & DMA_PMEN_PRS), pmen);
1593 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1596 static void iommu_enable_translation(struct intel_iommu *iommu)
1599 unsigned long flags;
1601 raw_spin_lock_irqsave(&iommu->register_lock, flags);
1602 iommu->gcmd |= DMA_GCMD_TE;
1603 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1605 /* Make sure hardware complete it */
1606 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1607 readl, (sts & DMA_GSTS_TES), sts);
1609 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1612 static void iommu_disable_translation(struct intel_iommu *iommu)
1617 if (iommu_skip_te_disable && iommu->drhd->gfx_dedicated &&
1618 (cap_read_drain(iommu->cap) || cap_write_drain(iommu->cap)))
1621 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1622 iommu->gcmd &= ~DMA_GCMD_TE;
1623 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1625 /* Make sure hardware complete it */
1626 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1627 readl, (!(sts & DMA_GSTS_TES)), sts);
1629 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1632 static int iommu_init_domains(struct intel_iommu *iommu)
1636 ndomains = cap_ndoms(iommu->cap);
1637 pr_debug("%s: Number of Domains supported <%d>\n",
1638 iommu->name, ndomains);
1640 spin_lock_init(&iommu->lock);
1642 iommu->domain_ids = bitmap_zalloc(ndomains, GFP_KERNEL);
1643 if (!iommu->domain_ids)
1647 * If Caching mode is set, then invalid translations are tagged
1648 * with domain-id 0, hence we need to pre-allocate it. We also
1649 * use domain-id 0 as a marker for non-allocated domain-id, so
1650 * make sure it is not used for a real domain.
1652 set_bit(0, iommu->domain_ids);
1655 * Vt-d spec rev3.0 (section 6.2.3.1) requires that each pasid
1656 * entry for first-level or pass-through translation modes should
1657 * be programmed with a domain id different from those used for
1658 * second-level or nested translation. We reserve a domain id for
1661 if (sm_supported(iommu))
1662 set_bit(FLPT_DEFAULT_DID, iommu->domain_ids);
1667 static void disable_dmar_iommu(struct intel_iommu *iommu)
1669 if (!iommu->domain_ids)
1673 * All iommu domains must have been detached from the devices,
1674 * hence there should be no domain IDs in use.
1676 if (WARN_ON(bitmap_weight(iommu->domain_ids, cap_ndoms(iommu->cap))
1677 > NUM_RESERVED_DID))
1680 if (iommu->gcmd & DMA_GCMD_TE)
1681 iommu_disable_translation(iommu);
1684 static void free_dmar_iommu(struct intel_iommu *iommu)
1686 if (iommu->domain_ids) {
1687 bitmap_free(iommu->domain_ids);
1688 iommu->domain_ids = NULL;
1691 /* free context mapping */
1692 free_context_table(iommu);
1694 #ifdef CONFIG_INTEL_IOMMU_SVM
1695 if (pasid_supported(iommu)) {
1696 if (ecap_prs(iommu->ecap))
1697 intel_svm_finish_prq(iommu);
1699 if (vccap_pasid(iommu->vccap))
1700 ioasid_unregister_allocator(&iommu->pasid_allocator);
1706 * Check and return whether first level is used by default for
1709 static bool first_level_by_default(unsigned int type)
1711 /* Only SL is available in legacy mode */
1712 if (!scalable_mode_support())
1715 /* Only level (either FL or SL) is available, just use it */
1716 if (intel_cap_flts_sanity() ^ intel_cap_slts_sanity())
1717 return intel_cap_flts_sanity();
1719 /* Both levels are available, decide it based on domain type */
1720 return type != IOMMU_DOMAIN_UNMANAGED;
1723 static struct dmar_domain *alloc_domain(unsigned int type)
1725 struct dmar_domain *domain;
1727 domain = kzalloc(sizeof(*domain), GFP_KERNEL);
1731 domain->nid = NUMA_NO_NODE;
1732 if (first_level_by_default(type))
1733 domain->flags |= DOMAIN_FLAG_USE_FIRST_LEVEL;
1734 domain->has_iotlb_device = false;
1735 INIT_LIST_HEAD(&domain->devices);
1736 spin_lock_init(&domain->lock);
1737 xa_init(&domain->iommu_array);
1742 static int domain_attach_iommu(struct dmar_domain *domain,
1743 struct intel_iommu *iommu)
1745 struct iommu_domain_info *info, *curr;
1746 unsigned long ndomains;
1747 int num, ret = -ENOSPC;
1749 info = kzalloc(sizeof(*info), GFP_KERNEL);
1753 spin_lock(&iommu->lock);
1754 curr = xa_load(&domain->iommu_array, iommu->seq_id);
1757 spin_unlock(&iommu->lock);
1762 ndomains = cap_ndoms(iommu->cap);
1763 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1764 if (num >= ndomains) {
1765 pr_err("%s: No free domain ids\n", iommu->name);
1769 set_bit(num, iommu->domain_ids);
1772 info->iommu = iommu;
1773 curr = xa_cmpxchg(&domain->iommu_array, iommu->seq_id,
1774 NULL, info, GFP_ATOMIC);
1776 ret = xa_err(curr) ? : -EBUSY;
1779 domain_update_iommu_cap(domain);
1781 spin_unlock(&iommu->lock);
1785 clear_bit(info->did, iommu->domain_ids);
1787 spin_unlock(&iommu->lock);
1792 static void domain_detach_iommu(struct dmar_domain *domain,
1793 struct intel_iommu *iommu)
1795 struct iommu_domain_info *info;
1797 spin_lock(&iommu->lock);
1798 info = xa_load(&domain->iommu_array, iommu->seq_id);
1799 if (--info->refcnt == 0) {
1800 clear_bit(info->did, iommu->domain_ids);
1801 xa_erase(&domain->iommu_array, iommu->seq_id);
1802 domain->nid = NUMA_NO_NODE;
1803 domain_update_iommu_cap(domain);
1806 spin_unlock(&iommu->lock);
1809 static inline int guestwidth_to_adjustwidth(int gaw)
1812 int r = (gaw - 12) % 9;
1823 static void domain_exit(struct dmar_domain *domain)
1826 LIST_HEAD(freelist);
1828 domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw), &freelist);
1829 put_pages_list(&freelist);
1832 if (WARN_ON(!list_empty(&domain->devices)))
1839 * Get the PASID directory size for scalable mode context entry.
1840 * Value of X in the PDTS field of a scalable mode context entry
1841 * indicates PASID directory with 2^(X + 7) entries.
1843 static inline unsigned long context_get_sm_pds(struct pasid_table *table)
1845 unsigned long pds, max_pde;
1847 max_pde = table->max_pasid >> PASID_PDE_SHIFT;
1848 pds = find_first_bit(&max_pde, MAX_NR_PASID_BITS);
1856 * Set the RID_PASID field of a scalable mode context entry. The
1857 * IOMMU hardware will use the PASID value set in this field for
1858 * DMA translations of DMA requests without PASID.
1861 context_set_sm_rid2pasid(struct context_entry *context, unsigned long pasid)
1863 context->hi |= pasid & ((1 << 20) - 1);
1867 * Set the DTE(Device-TLB Enable) field of a scalable mode context
1870 static inline void context_set_sm_dte(struct context_entry *context)
1872 context->lo |= (1 << 2);
1876 * Set the PRE(Page Request Enable) field of a scalable mode context
1879 static inline void context_set_sm_pre(struct context_entry *context)
1881 context->lo |= (1 << 4);
1884 /* Convert value to context PASID directory size field coding. */
1885 #define context_pdts(pds) (((pds) & 0x7) << 9)
1887 static int domain_context_mapping_one(struct dmar_domain *domain,
1888 struct intel_iommu *iommu,
1889 struct pasid_table *table,
1892 struct device_domain_info *info =
1893 iommu_support_dev_iotlb(domain, iommu, bus, devfn);
1894 u16 did = domain_id_iommu(domain, iommu);
1895 int translation = CONTEXT_TT_MULTI_LEVEL;
1896 struct context_entry *context;
1901 if (hw_pass_through && domain_type_is_si(domain))
1902 translation = CONTEXT_TT_PASS_THROUGH;
1904 pr_debug("Set context mapping for %02x:%02x.%d\n",
1905 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1907 BUG_ON(!domain->pgd);
1909 spin_lock(&iommu->lock);
1911 context = iommu_context_addr(iommu, bus, devfn, 1);
1916 if (context_present(context))
1920 * For kdump cases, old valid entries may be cached due to the
1921 * in-flight DMA and copied pgtable, but there is no unmapping
1922 * behaviour for them, thus we need an explicit cache flush for
1923 * the newly-mapped device. For kdump, at this point, the device
1924 * is supposed to finish reset at its driver probe stage, so no
1925 * in-flight DMA will exist, and we don't need to worry anymore
1928 if (context_copied(context)) {
1929 u16 did_old = context_domain_id(context);
1931 if (did_old < cap_ndoms(iommu->cap)) {
1932 iommu->flush.flush_context(iommu, did_old,
1933 (((u16)bus) << 8) | devfn,
1934 DMA_CCMD_MASK_NOBIT,
1935 DMA_CCMD_DEVICE_INVL);
1936 iommu->flush.flush_iotlb(iommu, did_old, 0, 0,
1941 context_clear_entry(context);
1943 if (sm_supported(iommu)) {
1948 /* Setup the PASID DIR pointer: */
1949 pds = context_get_sm_pds(table);
1950 context->lo = (u64)virt_to_phys(table->table) |
1953 /* Setup the RID_PASID field: */
1954 context_set_sm_rid2pasid(context, PASID_RID2PASID);
1957 * Setup the Device-TLB enable bit and Page request
1960 if (info && info->ats_supported)
1961 context_set_sm_dte(context);
1962 if (info && info->pri_supported)
1963 context_set_sm_pre(context);
1965 struct dma_pte *pgd = domain->pgd;
1968 context_set_domain_id(context, did);
1970 if (translation != CONTEXT_TT_PASS_THROUGH) {
1972 * Skip top levels of page tables for iommu which has
1973 * less agaw than default. Unnecessary for PT mode.
1975 for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
1977 pgd = phys_to_virt(dma_pte_addr(pgd));
1978 if (!dma_pte_present(pgd))
1982 if (info && info->ats_supported)
1983 translation = CONTEXT_TT_DEV_IOTLB;
1985 translation = CONTEXT_TT_MULTI_LEVEL;
1987 context_set_address_root(context, virt_to_phys(pgd));
1988 context_set_address_width(context, agaw);
1991 * In pass through mode, AW must be programmed to
1992 * indicate the largest AGAW value supported by
1993 * hardware. And ASR is ignored by hardware.
1995 context_set_address_width(context, iommu->msagaw);
1998 context_set_translation_type(context, translation);
2001 context_set_fault_enable(context);
2002 context_set_present(context);
2003 if (!ecap_coherent(iommu->ecap))
2004 clflush_cache_range(context, sizeof(*context));
2007 * It's a non-present to present mapping. If hardware doesn't cache
2008 * non-present entry we only need to flush the write-buffer. If the
2009 * _does_ cache non-present entries, then it does so in the special
2010 * domain #0, which we have to flush:
2012 if (cap_caching_mode(iommu->cap)) {
2013 iommu->flush.flush_context(iommu, 0,
2014 (((u16)bus) << 8) | devfn,
2015 DMA_CCMD_MASK_NOBIT,
2016 DMA_CCMD_DEVICE_INVL);
2017 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
2019 iommu_flush_write_buffer(iommu);
2021 iommu_enable_dev_iotlb(info);
2026 spin_unlock(&iommu->lock);
2031 struct domain_context_mapping_data {
2032 struct dmar_domain *domain;
2033 struct intel_iommu *iommu;
2034 struct pasid_table *table;
2037 static int domain_context_mapping_cb(struct pci_dev *pdev,
2038 u16 alias, void *opaque)
2040 struct domain_context_mapping_data *data = opaque;
2042 return domain_context_mapping_one(data->domain, data->iommu,
2043 data->table, PCI_BUS_NUM(alias),
2048 domain_context_mapping(struct dmar_domain *domain, struct device *dev)
2050 struct domain_context_mapping_data data;
2051 struct pasid_table *table;
2052 struct intel_iommu *iommu;
2055 iommu = device_to_iommu(dev, &bus, &devfn);
2059 table = intel_pasid_get_table(dev);
2061 if (!dev_is_pci(dev))
2062 return domain_context_mapping_one(domain, iommu, table,
2065 data.domain = domain;
2069 return pci_for_each_dma_alias(to_pci_dev(dev),
2070 &domain_context_mapping_cb, &data);
2073 static int domain_context_mapped_cb(struct pci_dev *pdev,
2074 u16 alias, void *opaque)
2076 struct intel_iommu *iommu = opaque;
2078 return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff);
2081 static int domain_context_mapped(struct device *dev)
2083 struct intel_iommu *iommu;
2086 iommu = device_to_iommu(dev, &bus, &devfn);
2090 if (!dev_is_pci(dev))
2091 return device_context_mapped(iommu, bus, devfn);
2093 return !pci_for_each_dma_alias(to_pci_dev(dev),
2094 domain_context_mapped_cb, iommu);
2097 /* Returns a number of VTD pages, but aligned to MM page size */
2098 static inline unsigned long aligned_nrpages(unsigned long host_addr,
2101 host_addr &= ~PAGE_MASK;
2102 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
2105 /* Return largest possible superpage level for a given mapping */
2106 static inline int hardware_largepage_caps(struct dmar_domain *domain,
2107 unsigned long iov_pfn,
2108 unsigned long phy_pfn,
2109 unsigned long pages)
2111 int support, level = 1;
2112 unsigned long pfnmerge;
2114 support = domain->iommu_superpage;
2116 /* To use a large page, the virtual *and* physical addresses
2117 must be aligned to 2MiB/1GiB/etc. Lower bits set in either
2118 of them will mean we have to use smaller pages. So just
2119 merge them and check both at once. */
2120 pfnmerge = iov_pfn | phy_pfn;
2122 while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
2123 pages >>= VTD_STRIDE_SHIFT;
2126 pfnmerge >>= VTD_STRIDE_SHIFT;
2134 * Ensure that old small page tables are removed to make room for superpage(s).
2135 * We're going to add new large pages, so make sure we don't remove their parent
2136 * tables. The IOTLB/devTLBs should be flushed if any PDE/PTEs are cleared.
2138 static void switch_to_super_page(struct dmar_domain *domain,
2139 unsigned long start_pfn,
2140 unsigned long end_pfn, int level)
2142 unsigned long lvl_pages = lvl_to_nr_pages(level);
2143 struct iommu_domain_info *info;
2144 struct dma_pte *pte = NULL;
2147 while (start_pfn <= end_pfn) {
2149 pte = pfn_to_dma_pte(domain, start_pfn, &level);
2151 if (dma_pte_present(pte)) {
2152 dma_pte_free_pagetable(domain, start_pfn,
2153 start_pfn + lvl_pages - 1,
2156 xa_for_each(&domain->iommu_array, i, info)
2157 iommu_flush_iotlb_psi(info->iommu, domain,
2158 start_pfn, lvl_pages,
2163 start_pfn += lvl_pages;
2164 if (first_pte_in_page(pte))
2170 __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2171 unsigned long phys_pfn, unsigned long nr_pages, int prot)
2173 struct dma_pte *first_pte = NULL, *pte = NULL;
2174 unsigned int largepage_lvl = 0;
2175 unsigned long lvl_pages = 0;
2179 BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
2181 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
2184 attr = prot & (DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP);
2185 attr |= DMA_FL_PTE_PRESENT;
2186 if (domain_use_first_level(domain)) {
2187 attr |= DMA_FL_PTE_XD | DMA_FL_PTE_US | DMA_FL_PTE_ACCESS;
2188 if (prot & DMA_PTE_WRITE)
2189 attr |= DMA_FL_PTE_DIRTY;
2192 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | attr;
2194 while (nr_pages > 0) {
2198 largepage_lvl = hardware_largepage_caps(domain, iov_pfn,
2199 phys_pfn, nr_pages);
2201 pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
2206 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2208 /* It is large page*/
2209 if (largepage_lvl > 1) {
2210 unsigned long end_pfn;
2211 unsigned long pages_to_remove;
2213 pteval |= DMA_PTE_LARGE_PAGE;
2214 pages_to_remove = min_t(unsigned long, nr_pages,
2215 nr_pte_to_next_page(pte) * lvl_pages);
2216 end_pfn = iov_pfn + pages_to_remove - 1;
2217 switch_to_super_page(domain, iov_pfn, end_pfn, largepage_lvl);
2219 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
2223 /* We don't need lock here, nobody else
2224 * touches the iova range
2226 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
2228 static int dumps = 5;
2229 pr_crit("ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
2230 iov_pfn, tmp, (unsigned long long)pteval);
2233 debug_dma_dump_mappings(NULL);
2238 nr_pages -= lvl_pages;
2239 iov_pfn += lvl_pages;
2240 phys_pfn += lvl_pages;
2241 pteval += lvl_pages * VTD_PAGE_SIZE;
2243 /* If the next PTE would be the first in a new page, then we
2244 * need to flush the cache on the entries we've just written.
2245 * And then we'll need to recalculate 'pte', so clear it and
2246 * let it get set again in the if (!pte) block above.
2248 * If we're done (!nr_pages) we need to flush the cache too.
2250 * Also if we've been setting superpages, we may need to
2251 * recalculate 'pte' and switch back to smaller pages for the
2252 * end of the mapping, if the trailing size is not enough to
2253 * use another superpage (i.e. nr_pages < lvl_pages).
2256 if (!nr_pages || first_pte_in_page(pte) ||
2257 (largepage_lvl > 1 && nr_pages < lvl_pages)) {
2258 domain_flush_cache(domain, first_pte,
2259 (void *)pte - (void *)first_pte);
2267 static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8 devfn)
2269 struct intel_iommu *iommu = info->iommu;
2270 struct context_entry *context;
2276 spin_lock(&iommu->lock);
2277 context = iommu_context_addr(iommu, bus, devfn, 0);
2279 spin_unlock(&iommu->lock);
2283 if (sm_supported(iommu)) {
2284 if (hw_pass_through && domain_type_is_si(info->domain))
2285 did_old = FLPT_DEFAULT_DID;
2287 did_old = domain_id_iommu(info->domain, iommu);
2289 did_old = context_domain_id(context);
2292 context_clear_entry(context);
2293 __iommu_flush_cache(iommu, context, sizeof(*context));
2294 spin_unlock(&iommu->lock);
2295 iommu->flush.flush_context(iommu,
2297 (((u16)bus) << 8) | devfn,
2298 DMA_CCMD_MASK_NOBIT,
2299 DMA_CCMD_DEVICE_INVL);
2301 if (sm_supported(iommu))
2302 qi_flush_pasid_cache(iommu, did_old, QI_PC_ALL_PASIDS, 0);
2304 iommu->flush.flush_iotlb(iommu,
2310 __iommu_flush_dev_iotlb(info, 0, MAX_AGAW_PFN_WIDTH);
2313 static int domain_setup_first_level(struct intel_iommu *iommu,
2314 struct dmar_domain *domain,
2318 struct dma_pte *pgd = domain->pgd;
2323 * Skip top levels of page tables for iommu which has
2324 * less agaw than default. Unnecessary for PT mode.
2326 for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
2327 pgd = phys_to_virt(dma_pte_addr(pgd));
2328 if (!dma_pte_present(pgd))
2332 level = agaw_to_level(agaw);
2333 if (level != 4 && level != 5)
2336 if (pasid != PASID_RID2PASID)
2337 flags |= PASID_FLAG_SUPERVISOR_MODE;
2339 flags |= PASID_FLAG_FL5LP;
2341 if (domain->force_snooping)
2342 flags |= PASID_FLAG_PAGE_SNOOP;
2344 return intel_pasid_setup_first_level(iommu, dev, (pgd_t *)pgd, pasid,
2345 domain_id_iommu(domain, iommu),
2349 static bool dev_is_real_dma_subdevice(struct device *dev)
2351 return dev && dev_is_pci(dev) &&
2352 pci_real_dma_dev(to_pci_dev(dev)) != to_pci_dev(dev);
2355 static int iommu_domain_identity_map(struct dmar_domain *domain,
2356 unsigned long first_vpfn,
2357 unsigned long last_vpfn)
2360 * RMRR range might have overlap with physical memory range,
2363 dma_pte_clear_range(domain, first_vpfn, last_vpfn);
2365 return __domain_mapping(domain, first_vpfn,
2366 first_vpfn, last_vpfn - first_vpfn + 1,
2367 DMA_PTE_READ|DMA_PTE_WRITE);
2370 static int md_domain_init(struct dmar_domain *domain, int guest_width);
2372 static int __init si_domain_init(int hw)
2374 struct dmar_rmrr_unit *rmrr;
2378 si_domain = alloc_domain(IOMMU_DOMAIN_IDENTITY);
2382 if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2383 domain_exit(si_domain);
2390 for_each_online_node(nid) {
2391 unsigned long start_pfn, end_pfn;
2394 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2395 ret = iommu_domain_identity_map(si_domain,
2396 mm_to_dma_pfn(start_pfn),
2397 mm_to_dma_pfn(end_pfn));
2404 * Identity map the RMRRs so that devices with RMRRs could also use
2407 for_each_rmrr_units(rmrr) {
2408 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
2410 unsigned long long start = rmrr->base_address;
2411 unsigned long long end = rmrr->end_address;
2413 if (WARN_ON(end < start ||
2414 end >> agaw_to_width(si_domain->agaw)))
2417 ret = iommu_domain_identity_map(si_domain,
2418 mm_to_dma_pfn(start >> PAGE_SHIFT),
2419 mm_to_dma_pfn(end >> PAGE_SHIFT));
2428 static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev)
2430 struct device_domain_info *info = dev_iommu_priv_get(dev);
2431 struct intel_iommu *iommu;
2435 iommu = device_to_iommu(dev, &bus, &devfn);
2439 ret = domain_attach_iommu(domain, iommu);
2442 info->domain = domain;
2443 spin_lock(&domain->lock);
2444 list_add(&info->link, &domain->devices);
2445 spin_unlock(&domain->lock);
2447 /* PASID table is mandatory for a PCI device in scalable mode. */
2448 if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) {
2449 ret = intel_pasid_alloc_table(dev);
2451 dev_err(dev, "PASID table allocation failed\n");
2452 dmar_remove_one_dev_info(dev);
2456 /* Setup the PASID entry for requests without PASID: */
2457 if (hw_pass_through && domain_type_is_si(domain))
2458 ret = intel_pasid_setup_pass_through(iommu, domain,
2459 dev, PASID_RID2PASID);
2460 else if (domain_use_first_level(domain))
2461 ret = domain_setup_first_level(iommu, domain, dev,
2464 ret = intel_pasid_setup_second_level(iommu, domain,
2465 dev, PASID_RID2PASID);
2467 dev_err(dev, "Setup RID2PASID failed\n");
2468 dmar_remove_one_dev_info(dev);
2473 ret = domain_context_mapping(domain, dev);
2475 dev_err(dev, "Domain context map failed\n");
2476 dmar_remove_one_dev_info(dev);
2483 static bool device_has_rmrr(struct device *dev)
2485 struct dmar_rmrr_unit *rmrr;
2490 for_each_rmrr_units(rmrr) {
2492 * Return TRUE if this RMRR contains the device that
2495 for_each_active_dev_scope(rmrr->devices,
2496 rmrr->devices_cnt, i, tmp)
2498 is_downstream_to_pci_bridge(dev, tmp)) {
2508 * device_rmrr_is_relaxable - Test whether the RMRR of this device
2509 * is relaxable (ie. is allowed to be not enforced under some conditions)
2510 * @dev: device handle
2512 * We assume that PCI USB devices with RMRRs have them largely
2513 * for historical reasons and that the RMRR space is not actively used post
2514 * boot. This exclusion may change if vendors begin to abuse it.
2516 * The same exception is made for graphics devices, with the requirement that
2517 * any use of the RMRR regions will be torn down before assigning the device
2520 * Return: true if the RMRR is relaxable, false otherwise
2522 static bool device_rmrr_is_relaxable(struct device *dev)
2524 struct pci_dev *pdev;
2526 if (!dev_is_pci(dev))
2529 pdev = to_pci_dev(dev);
2530 if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev))
2537 * There are a couple cases where we need to restrict the functionality of
2538 * devices associated with RMRRs. The first is when evaluating a device for
2539 * identity mapping because problems exist when devices are moved in and out
2540 * of domains and their respective RMRR information is lost. This means that
2541 * a device with associated RMRRs will never be in a "passthrough" domain.
2542 * The second is use of the device through the IOMMU API. This interface
2543 * expects to have full control of the IOVA space for the device. We cannot
2544 * satisfy both the requirement that RMRR access is maintained and have an
2545 * unencumbered IOVA space. We also have no ability to quiesce the device's
2546 * use of the RMRR space or even inform the IOMMU API user of the restriction.
2547 * We therefore prevent devices associated with an RMRR from participating in
2548 * the IOMMU API, which eliminates them from device assignment.
2550 * In both cases, devices which have relaxable RMRRs are not concerned by this
2551 * restriction. See device_rmrr_is_relaxable comment.
2553 static bool device_is_rmrr_locked(struct device *dev)
2555 if (!device_has_rmrr(dev))
2558 if (device_rmrr_is_relaxable(dev))
2565 * Return the required default domain type for a specific device.
2567 * @dev: the device in query
2568 * @startup: true if this is during early boot
2571 * - IOMMU_DOMAIN_DMA: device requires a dynamic mapping domain
2572 * - IOMMU_DOMAIN_IDENTITY: device requires an identical mapping domain
2573 * - 0: both identity and dynamic domains work for this device
2575 static int device_def_domain_type(struct device *dev)
2577 if (dev_is_pci(dev)) {
2578 struct pci_dev *pdev = to_pci_dev(dev);
2580 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2581 return IOMMU_DOMAIN_IDENTITY;
2583 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2584 return IOMMU_DOMAIN_IDENTITY;
2590 static void intel_iommu_init_qi(struct intel_iommu *iommu)
2593 * Start from the sane iommu hardware state.
2594 * If the queued invalidation is already initialized by us
2595 * (for example, while enabling interrupt-remapping) then
2596 * we got the things already rolling from a sane state.
2600 * Clear any previous faults.
2602 dmar_fault(-1, iommu);
2604 * Disable queued invalidation if supported and already enabled
2605 * before OS handover.
2607 dmar_disable_qi(iommu);
2610 if (dmar_enable_qi(iommu)) {
2612 * Queued Invalidate not enabled, use Register Based Invalidate
2614 iommu->flush.flush_context = __iommu_flush_context;
2615 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
2616 pr_info("%s: Using Register based invalidation\n",
2619 iommu->flush.flush_context = qi_flush_context;
2620 iommu->flush.flush_iotlb = qi_flush_iotlb;
2621 pr_info("%s: Using Queued invalidation\n", iommu->name);
2625 static int copy_context_table(struct intel_iommu *iommu,
2626 struct root_entry *old_re,
2627 struct context_entry **tbl,
2630 int tbl_idx, pos = 0, idx, devfn, ret = 0, did;
2631 struct context_entry *new_ce = NULL, ce;
2632 struct context_entry *old_ce = NULL;
2633 struct root_entry re;
2634 phys_addr_t old_ce_phys;
2636 tbl_idx = ext ? bus * 2 : bus;
2637 memcpy(&re, old_re, sizeof(re));
2639 for (devfn = 0; devfn < 256; devfn++) {
2640 /* First calculate the correct index */
2641 idx = (ext ? devfn * 2 : devfn) % 256;
2644 /* First save what we may have and clean up */
2646 tbl[tbl_idx] = new_ce;
2647 __iommu_flush_cache(iommu, new_ce,
2657 old_ce_phys = root_entry_lctp(&re);
2659 old_ce_phys = root_entry_uctp(&re);
2662 if (ext && devfn == 0) {
2663 /* No LCTP, try UCTP */
2672 old_ce = memremap(old_ce_phys, PAGE_SIZE,
2677 new_ce = alloc_pgtable_page(iommu->node);
2684 /* Now copy the context entry */
2685 memcpy(&ce, old_ce + idx, sizeof(ce));
2687 if (!__context_present(&ce))
2690 did = context_domain_id(&ce);
2691 if (did >= 0 && did < cap_ndoms(iommu->cap))
2692 set_bit(did, iommu->domain_ids);
2695 * We need a marker for copied context entries. This
2696 * marker needs to work for the old format as well as
2697 * for extended context entries.
2699 * Bit 67 of the context entry is used. In the old
2700 * format this bit is available to software, in the
2701 * extended format it is the PGE bit, but PGE is ignored
2702 * by HW if PASIDs are disabled (and thus still
2705 * So disable PASIDs first and then mark the entry
2706 * copied. This means that we don't copy PASID
2707 * translations from the old kernel, but this is fine as
2708 * faults there are not fatal.
2710 context_clear_pasid_enable(&ce);
2711 context_set_copied(&ce);
2716 tbl[tbl_idx + pos] = new_ce;
2718 __iommu_flush_cache(iommu, new_ce, VTD_PAGE_SIZE);
2727 static int copy_translation_tables(struct intel_iommu *iommu)
2729 struct context_entry **ctxt_tbls;
2730 struct root_entry *old_rt;
2731 phys_addr_t old_rt_phys;
2732 int ctxt_table_entries;
2737 rtaddr_reg = dmar_readq(iommu->reg + DMAR_RTADDR_REG);
2738 ext = !!(rtaddr_reg & DMA_RTADDR_RTT);
2739 new_ext = !!ecap_ecs(iommu->ecap);
2742 * The RTT bit can only be changed when translation is disabled,
2743 * but disabling translation means to open a window for data
2744 * corruption. So bail out and don't copy anything if we would
2745 * have to change the bit.
2750 old_rt_phys = rtaddr_reg & VTD_PAGE_MASK;
2754 old_rt = memremap(old_rt_phys, PAGE_SIZE, MEMREMAP_WB);
2758 /* This is too big for the stack - allocate it from slab */
2759 ctxt_table_entries = ext ? 512 : 256;
2761 ctxt_tbls = kcalloc(ctxt_table_entries, sizeof(void *), GFP_KERNEL);
2765 for (bus = 0; bus < 256; bus++) {
2766 ret = copy_context_table(iommu, &old_rt[bus],
2767 ctxt_tbls, bus, ext);
2769 pr_err("%s: Failed to copy context table for bus %d\n",
2775 spin_lock(&iommu->lock);
2777 /* Context tables are copied, now write them to the root_entry table */
2778 for (bus = 0; bus < 256; bus++) {
2779 int idx = ext ? bus * 2 : bus;
2782 if (ctxt_tbls[idx]) {
2783 val = virt_to_phys(ctxt_tbls[idx]) | 1;
2784 iommu->root_entry[bus].lo = val;
2787 if (!ext || !ctxt_tbls[idx + 1])
2790 val = virt_to_phys(ctxt_tbls[idx + 1]) | 1;
2791 iommu->root_entry[bus].hi = val;
2794 spin_unlock(&iommu->lock);
2798 __iommu_flush_cache(iommu, iommu->root_entry, PAGE_SIZE);
2808 #ifdef CONFIG_INTEL_IOMMU_SVM
2809 static ioasid_t intel_vcmd_ioasid_alloc(ioasid_t min, ioasid_t max, void *data)
2811 struct intel_iommu *iommu = data;
2815 return INVALID_IOASID;
2817 * VT-d virtual command interface always uses the full 20 bit
2818 * PASID range. Host can partition guest PASID range based on
2819 * policies but it is out of guest's control.
2821 if (min < PASID_MIN || max > intel_pasid_max_id)
2822 return INVALID_IOASID;
2824 if (vcmd_alloc_pasid(iommu, &ioasid))
2825 return INVALID_IOASID;
2830 static void intel_vcmd_ioasid_free(ioasid_t ioasid, void *data)
2832 struct intel_iommu *iommu = data;
2837 * Sanity check the ioasid owner is done at upper layer, e.g. VFIO
2838 * We can only free the PASID when all the devices are unbound.
2840 if (ioasid_find(NULL, ioasid, NULL)) {
2841 pr_alert("Cannot free active IOASID %d\n", ioasid);
2844 vcmd_free_pasid(iommu, ioasid);
2847 static void register_pasid_allocator(struct intel_iommu *iommu)
2850 * If we are running in the host, no need for custom allocator
2851 * in that PASIDs are allocated from the host system-wide.
2853 if (!cap_caching_mode(iommu->cap))
2856 if (!sm_supported(iommu)) {
2857 pr_warn("VT-d Scalable Mode not enabled, no PASID allocation\n");
2862 * Register a custom PASID allocator if we are running in a guest,
2863 * guest PASID must be obtained via virtual command interface.
2864 * There can be multiple vIOMMUs in each guest but only one allocator
2865 * is active. All vIOMMU allocators will eventually be calling the same
2868 if (!vccap_pasid(iommu->vccap))
2871 pr_info("Register custom PASID allocator\n");
2872 iommu->pasid_allocator.alloc = intel_vcmd_ioasid_alloc;
2873 iommu->pasid_allocator.free = intel_vcmd_ioasid_free;
2874 iommu->pasid_allocator.pdata = (void *)iommu;
2875 if (ioasid_register_allocator(&iommu->pasid_allocator)) {
2876 pr_warn("Custom PASID allocator failed, scalable mode disabled\n");
2878 * Disable scalable mode on this IOMMU if there
2879 * is no custom allocator. Mixing SM capable vIOMMU
2880 * and non-SM vIOMMU are not supported.
2887 static int __init init_dmars(void)
2889 struct dmar_drhd_unit *drhd;
2890 struct intel_iommu *iommu;
2893 ret = intel_cap_audit(CAP_AUDIT_STATIC_DMAR, NULL);
2897 for_each_iommu(iommu, drhd) {
2898 if (drhd->ignored) {
2899 iommu_disable_translation(iommu);
2904 * Find the max pasid size of all IOMMU's in the system.
2905 * We need to ensure the system pasid table is no bigger
2906 * than the smallest supported.
2908 if (pasid_supported(iommu)) {
2909 u32 temp = 2 << ecap_pss(iommu->ecap);
2911 intel_pasid_max_id = min_t(u32, temp,
2912 intel_pasid_max_id);
2915 intel_iommu_init_qi(iommu);
2917 ret = iommu_init_domains(iommu);
2921 init_translation_status(iommu);
2923 if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
2924 iommu_disable_translation(iommu);
2925 clear_translation_pre_enabled(iommu);
2926 pr_warn("Translation was enabled for %s but we are not in kdump mode\n",
2932 * we could share the same root & context tables
2933 * among all IOMMU's. Need to Split it later.
2935 ret = iommu_alloc_root_entry(iommu);
2939 if (translation_pre_enabled(iommu)) {
2940 pr_info("Translation already enabled - trying to copy translation structures\n");
2942 ret = copy_translation_tables(iommu);
2945 * We found the IOMMU with translation
2946 * enabled - but failed to copy over the
2947 * old root-entry table. Try to proceed
2948 * by disabling translation now and
2949 * allocating a clean root-entry table.
2950 * This might cause DMAR faults, but
2951 * probably the dump will still succeed.
2953 pr_err("Failed to copy translation tables from previous kernel for %s\n",
2955 iommu_disable_translation(iommu);
2956 clear_translation_pre_enabled(iommu);
2958 pr_info("Copied translation tables from previous kernel for %s\n",
2963 if (!ecap_pass_through(iommu->ecap))
2964 hw_pass_through = 0;
2965 intel_svm_check(iommu);
2969 * Now that qi is enabled on all iommus, set the root entry and flush
2970 * caches. This is required on some Intel X58 chipsets, otherwise the
2971 * flush_context function will loop forever and the boot hangs.
2973 for_each_active_iommu(iommu, drhd) {
2974 iommu_flush_write_buffer(iommu);
2975 #ifdef CONFIG_INTEL_IOMMU_SVM
2976 register_pasid_allocator(iommu);
2978 iommu_set_root_entry(iommu);
2981 #ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
2986 iommu_identity_mapping |= IDENTMAP_GFX;
2988 check_tylersburg_isoch();
2990 ret = si_domain_init(hw_pass_through);
2997 * global invalidate context cache
2998 * global invalidate iotlb
2999 * enable translation
3001 for_each_iommu(iommu, drhd) {
3002 if (drhd->ignored) {
3004 * we always have to disable PMRs or DMA may fail on
3008 iommu_disable_protect_mem_regions(iommu);
3012 iommu_flush_write_buffer(iommu);
3014 #ifdef CONFIG_INTEL_IOMMU_SVM
3015 if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) {
3017 * Call dmar_alloc_hwirq() with dmar_global_lock held,
3018 * could cause possible lock race condition.
3020 up_write(&dmar_global_lock);
3021 ret = intel_svm_enable_prq(iommu);
3022 down_write(&dmar_global_lock);
3027 ret = dmar_set_interrupt(iommu);
3035 for_each_active_iommu(iommu, drhd) {
3036 disable_dmar_iommu(iommu);
3037 free_dmar_iommu(iommu);
3043 static void __init init_no_remapping_devices(void)
3045 struct dmar_drhd_unit *drhd;
3049 for_each_drhd_unit(drhd) {
3050 if (!drhd->include_all) {
3051 for_each_active_dev_scope(drhd->devices,
3052 drhd->devices_cnt, i, dev)
3054 /* ignore DMAR unit if no devices exist */
3055 if (i == drhd->devices_cnt)
3060 for_each_active_drhd_unit(drhd) {
3061 if (drhd->include_all)
3064 for_each_active_dev_scope(drhd->devices,
3065 drhd->devices_cnt, i, dev)
3066 if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
3068 if (i < drhd->devices_cnt)
3071 /* This IOMMU has *only* gfx devices. Either bypass it or
3072 set the gfx_mapped flag, as appropriate */
3073 drhd->gfx_dedicated = 1;
3079 #ifdef CONFIG_SUSPEND
3080 static int init_iommu_hw(void)
3082 struct dmar_drhd_unit *drhd;
3083 struct intel_iommu *iommu = NULL;
3085 for_each_active_iommu(iommu, drhd)
3087 dmar_reenable_qi(iommu);
3089 for_each_iommu(iommu, drhd) {
3090 if (drhd->ignored) {
3092 * we always have to disable PMRs or DMA may fail on
3096 iommu_disable_protect_mem_regions(iommu);
3100 iommu_flush_write_buffer(iommu);
3101 iommu_set_root_entry(iommu);
3102 iommu_enable_translation(iommu);
3103 iommu_disable_protect_mem_regions(iommu);
3109 static void iommu_flush_all(void)
3111 struct dmar_drhd_unit *drhd;
3112 struct intel_iommu *iommu;
3114 for_each_active_iommu(iommu, drhd) {
3115 iommu->flush.flush_context(iommu, 0, 0, 0,
3116 DMA_CCMD_GLOBAL_INVL);
3117 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3118 DMA_TLB_GLOBAL_FLUSH);
3122 static int iommu_suspend(void)
3124 struct dmar_drhd_unit *drhd;
3125 struct intel_iommu *iommu = NULL;
3128 for_each_active_iommu(iommu, drhd) {
3129 iommu->iommu_state = kcalloc(MAX_SR_DMAR_REGS, sizeof(u32),
3131 if (!iommu->iommu_state)
3137 for_each_active_iommu(iommu, drhd) {
3138 iommu_disable_translation(iommu);
3140 raw_spin_lock_irqsave(&iommu->register_lock, flag);
3142 iommu->iommu_state[SR_DMAR_FECTL_REG] =
3143 readl(iommu->reg + DMAR_FECTL_REG);
3144 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
3145 readl(iommu->reg + DMAR_FEDATA_REG);
3146 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
3147 readl(iommu->reg + DMAR_FEADDR_REG);
3148 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
3149 readl(iommu->reg + DMAR_FEUADDR_REG);
3151 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
3156 for_each_active_iommu(iommu, drhd)
3157 kfree(iommu->iommu_state);
3162 static void iommu_resume(void)
3164 struct dmar_drhd_unit *drhd;
3165 struct intel_iommu *iommu = NULL;
3168 if (init_iommu_hw()) {
3170 panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
3172 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
3176 for_each_active_iommu(iommu, drhd) {
3178 raw_spin_lock_irqsave(&iommu->register_lock, flag);
3180 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
3181 iommu->reg + DMAR_FECTL_REG);
3182 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
3183 iommu->reg + DMAR_FEDATA_REG);
3184 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
3185 iommu->reg + DMAR_FEADDR_REG);
3186 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
3187 iommu->reg + DMAR_FEUADDR_REG);
3189 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
3192 for_each_active_iommu(iommu, drhd)
3193 kfree(iommu->iommu_state);
3196 static struct syscore_ops iommu_syscore_ops = {
3197 .resume = iommu_resume,
3198 .suspend = iommu_suspend,
3201 static void __init init_iommu_pm_ops(void)
3203 register_syscore_ops(&iommu_syscore_ops);
3207 static inline void init_iommu_pm_ops(void) {}
3208 #endif /* CONFIG_PM */
3210 static int __init rmrr_sanity_check(struct acpi_dmar_reserved_memory *rmrr)
3212 if (!IS_ALIGNED(rmrr->base_address, PAGE_SIZE) ||
3213 !IS_ALIGNED(rmrr->end_address + 1, PAGE_SIZE) ||
3214 rmrr->end_address <= rmrr->base_address ||
3215 arch_rmrr_sanity_check(rmrr))
3221 int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg)
3223 struct acpi_dmar_reserved_memory *rmrr;
3224 struct dmar_rmrr_unit *rmrru;
3226 rmrr = (struct acpi_dmar_reserved_memory *)header;
3227 if (rmrr_sanity_check(rmrr)) {
3229 "Your BIOS is broken; bad RMRR [%#018Lx-%#018Lx]\n"
3230 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
3231 rmrr->base_address, rmrr->end_address,
3232 dmi_get_system_info(DMI_BIOS_VENDOR),
3233 dmi_get_system_info(DMI_BIOS_VERSION),
3234 dmi_get_system_info(DMI_PRODUCT_VERSION));
3235 add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
3238 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
3242 rmrru->hdr = header;
3244 rmrru->base_address = rmrr->base_address;
3245 rmrru->end_address = rmrr->end_address;
3247 rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
3248 ((void *)rmrr) + rmrr->header.length,
3249 &rmrru->devices_cnt);
3250 if (rmrru->devices_cnt && rmrru->devices == NULL)
3253 list_add(&rmrru->list, &dmar_rmrr_units);
3262 static struct dmar_atsr_unit *dmar_find_atsr(struct acpi_dmar_atsr *atsr)
3264 struct dmar_atsr_unit *atsru;
3265 struct acpi_dmar_atsr *tmp;
3267 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list,
3269 tmp = (struct acpi_dmar_atsr *)atsru->hdr;
3270 if (atsr->segment != tmp->segment)
3272 if (atsr->header.length != tmp->header.length)
3274 if (memcmp(atsr, tmp, atsr->header.length) == 0)
3281 int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg)
3283 struct acpi_dmar_atsr *atsr;
3284 struct dmar_atsr_unit *atsru;
3286 if (system_state >= SYSTEM_RUNNING && !intel_iommu_enabled)
3289 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
3290 atsru = dmar_find_atsr(atsr);
3294 atsru = kzalloc(sizeof(*atsru) + hdr->length, GFP_KERNEL);
3299 * If memory is allocated from slab by ACPI _DSM method, we need to
3300 * copy the memory content because the memory buffer will be freed
3303 atsru->hdr = (void *)(atsru + 1);
3304 memcpy(atsru->hdr, hdr, hdr->length);
3305 atsru->include_all = atsr->flags & 0x1;
3306 if (!atsru->include_all) {
3307 atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
3308 (void *)atsr + atsr->header.length,
3309 &atsru->devices_cnt);
3310 if (atsru->devices_cnt && atsru->devices == NULL) {
3316 list_add_rcu(&atsru->list, &dmar_atsr_units);
3321 static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
3323 dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
3327 int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg)
3329 struct acpi_dmar_atsr *atsr;
3330 struct dmar_atsr_unit *atsru;
3332 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
3333 atsru = dmar_find_atsr(atsr);
3335 list_del_rcu(&atsru->list);
3337 intel_iommu_free_atsr(atsru);
3343 int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg)
3347 struct acpi_dmar_atsr *atsr;
3348 struct dmar_atsr_unit *atsru;
3350 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
3351 atsru = dmar_find_atsr(atsr);
3355 if (!atsru->include_all && atsru->devices && atsru->devices_cnt) {
3356 for_each_active_dev_scope(atsru->devices, atsru->devices_cnt,
3364 static struct dmar_satc_unit *dmar_find_satc(struct acpi_dmar_satc *satc)
3366 struct dmar_satc_unit *satcu;
3367 struct acpi_dmar_satc *tmp;
3369 list_for_each_entry_rcu(satcu, &dmar_satc_units, list,
3371 tmp = (struct acpi_dmar_satc *)satcu->hdr;
3372 if (satc->segment != tmp->segment)
3374 if (satc->header.length != tmp->header.length)
3376 if (memcmp(satc, tmp, satc->header.length) == 0)
3383 int dmar_parse_one_satc(struct acpi_dmar_header *hdr, void *arg)
3385 struct acpi_dmar_satc *satc;
3386 struct dmar_satc_unit *satcu;
3388 if (system_state >= SYSTEM_RUNNING && !intel_iommu_enabled)
3391 satc = container_of(hdr, struct acpi_dmar_satc, header);
3392 satcu = dmar_find_satc(satc);
3396 satcu = kzalloc(sizeof(*satcu) + hdr->length, GFP_KERNEL);
3400 satcu->hdr = (void *)(satcu + 1);
3401 memcpy(satcu->hdr, hdr, hdr->length);
3402 satcu->atc_required = satc->flags & 0x1;
3403 satcu->devices = dmar_alloc_dev_scope((void *)(satc + 1),
3404 (void *)satc + satc->header.length,
3405 &satcu->devices_cnt);
3406 if (satcu->devices_cnt && !satcu->devices) {
3410 list_add_rcu(&satcu->list, &dmar_satc_units);
3415 static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
3418 struct intel_iommu *iommu = dmaru->iommu;
3420 ret = intel_cap_audit(CAP_AUDIT_HOTPLUG_DMAR, iommu);
3424 if (hw_pass_through && !ecap_pass_through(iommu->ecap)) {
3425 pr_warn("%s: Doesn't support hardware pass through.\n",
3430 sp = domain_update_iommu_superpage(NULL, iommu) - 1;
3431 if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) {
3432 pr_warn("%s: Doesn't support large page.\n",
3438 * Disable translation if already enabled prior to OS handover.
3440 if (iommu->gcmd & DMA_GCMD_TE)
3441 iommu_disable_translation(iommu);
3443 ret = iommu_init_domains(iommu);
3445 ret = iommu_alloc_root_entry(iommu);
3449 intel_svm_check(iommu);
3451 if (dmaru->ignored) {
3453 * we always have to disable PMRs or DMA may fail on this device
3456 iommu_disable_protect_mem_regions(iommu);
3460 intel_iommu_init_qi(iommu);
3461 iommu_flush_write_buffer(iommu);
3463 #ifdef CONFIG_INTEL_IOMMU_SVM
3464 if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) {
3465 ret = intel_svm_enable_prq(iommu);
3470 ret = dmar_set_interrupt(iommu);
3474 iommu_set_root_entry(iommu);
3475 iommu_enable_translation(iommu);
3477 iommu_disable_protect_mem_regions(iommu);
3481 disable_dmar_iommu(iommu);
3483 free_dmar_iommu(iommu);
3487 int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
3490 struct intel_iommu *iommu = dmaru->iommu;
3492 if (!intel_iommu_enabled)
3498 ret = intel_iommu_add(dmaru);
3500 disable_dmar_iommu(iommu);
3501 free_dmar_iommu(iommu);
3507 static void intel_iommu_free_dmars(void)
3509 struct dmar_rmrr_unit *rmrru, *rmrr_n;
3510 struct dmar_atsr_unit *atsru, *atsr_n;
3511 struct dmar_satc_unit *satcu, *satc_n;
3513 list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
3514 list_del(&rmrru->list);
3515 dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
3519 list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
3520 list_del(&atsru->list);
3521 intel_iommu_free_atsr(atsru);
3523 list_for_each_entry_safe(satcu, satc_n, &dmar_satc_units, list) {
3524 list_del(&satcu->list);
3525 dmar_free_dev_scope(&satcu->devices, &satcu->devices_cnt);
3530 static struct dmar_satc_unit *dmar_find_matched_satc_unit(struct pci_dev *dev)
3532 struct dmar_satc_unit *satcu;
3533 struct acpi_dmar_satc *satc;
3537 dev = pci_physfn(dev);
3540 list_for_each_entry_rcu(satcu, &dmar_satc_units, list) {
3541 satc = container_of(satcu->hdr, struct acpi_dmar_satc, header);
3542 if (satc->segment != pci_domain_nr(dev->bus))
3544 for_each_dev_scope(satcu->devices, satcu->devices_cnt, i, tmp)
3545 if (to_pci_dev(tmp) == dev)
3554 static int dmar_ats_supported(struct pci_dev *dev, struct intel_iommu *iommu)
3557 struct pci_bus *bus;
3558 struct pci_dev *bridge = NULL;
3560 struct acpi_dmar_atsr *atsr;
3561 struct dmar_atsr_unit *atsru;
3562 struct dmar_satc_unit *satcu;
3564 dev = pci_physfn(dev);
3565 satcu = dmar_find_matched_satc_unit(dev);
3568 * This device supports ATS as it is in SATC table.
3569 * When IOMMU is in legacy mode, enabling ATS is done
3570 * automatically by HW for the device that requires
3571 * ATS, hence OS should not enable this device ATS
3572 * to avoid duplicated TLB invalidation.
3574 return !(satcu->atc_required && !sm_supported(iommu));
3576 for (bus = dev->bus; bus; bus = bus->parent) {
3578 /* If it's an integrated device, allow ATS */
3581 /* Connected via non-PCIe: no ATS */
3582 if (!pci_is_pcie(bridge) ||
3583 pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
3585 /* If we found the root port, look it up in the ATSR */
3586 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
3591 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
3592 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3593 if (atsr->segment != pci_domain_nr(dev->bus))
3596 for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
3597 if (tmp == &bridge->dev)
3600 if (atsru->include_all)
3610 int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
3613 struct dmar_rmrr_unit *rmrru;
3614 struct dmar_atsr_unit *atsru;
3615 struct dmar_satc_unit *satcu;
3616 struct acpi_dmar_atsr *atsr;
3617 struct acpi_dmar_reserved_memory *rmrr;
3618 struct acpi_dmar_satc *satc;
3620 if (!intel_iommu_enabled && system_state >= SYSTEM_RUNNING)
3623 list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
3624 rmrr = container_of(rmrru->hdr,
3625 struct acpi_dmar_reserved_memory, header);
3626 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
3627 ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
3628 ((void *)rmrr) + rmrr->header.length,
3629 rmrr->segment, rmrru->devices,
3630 rmrru->devices_cnt);
3633 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
3634 dmar_remove_dev_scope(info, rmrr->segment,
3635 rmrru->devices, rmrru->devices_cnt);
3639 list_for_each_entry(atsru, &dmar_atsr_units, list) {
3640 if (atsru->include_all)
3643 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3644 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
3645 ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
3646 (void *)atsr + atsr->header.length,
3647 atsr->segment, atsru->devices,
3648 atsru->devices_cnt);
3653 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
3654 if (dmar_remove_dev_scope(info, atsr->segment,
3655 atsru->devices, atsru->devices_cnt))
3659 list_for_each_entry(satcu, &dmar_satc_units, list) {
3660 satc = container_of(satcu->hdr, struct acpi_dmar_satc, header);
3661 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
3662 ret = dmar_insert_dev_scope(info, (void *)(satc + 1),
3663 (void *)satc + satc->header.length,
3664 satc->segment, satcu->devices,
3665 satcu->devices_cnt);
3670 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
3671 if (dmar_remove_dev_scope(info, satc->segment,
3672 satcu->devices, satcu->devices_cnt))
3680 static int intel_iommu_memory_notifier(struct notifier_block *nb,
3681 unsigned long val, void *v)
3683 struct memory_notify *mhp = v;
3684 unsigned long start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
3685 unsigned long last_vpfn = mm_to_dma_pfn(mhp->start_pfn +
3689 case MEM_GOING_ONLINE:
3690 if (iommu_domain_identity_map(si_domain,
3691 start_vpfn, last_vpfn)) {
3692 pr_warn("Failed to build identity map for [%lx-%lx]\n",
3693 start_vpfn, last_vpfn);
3699 case MEM_CANCEL_ONLINE:
3701 struct dmar_drhd_unit *drhd;
3702 struct intel_iommu *iommu;
3703 LIST_HEAD(freelist);
3705 domain_unmap(si_domain, start_vpfn, last_vpfn, &freelist);
3708 for_each_active_iommu(iommu, drhd)
3709 iommu_flush_iotlb_psi(iommu, si_domain,
3710 start_vpfn, mhp->nr_pages,
3711 list_empty(&freelist), 0);
3713 put_pages_list(&freelist);
3721 static struct notifier_block intel_iommu_memory_nb = {
3722 .notifier_call = intel_iommu_memory_notifier,
3726 static void intel_disable_iommus(void)
3728 struct intel_iommu *iommu = NULL;
3729 struct dmar_drhd_unit *drhd;
3731 for_each_iommu(iommu, drhd)
3732 iommu_disable_translation(iommu);
3735 void intel_iommu_shutdown(void)
3737 struct dmar_drhd_unit *drhd;
3738 struct intel_iommu *iommu = NULL;
3740 if (no_iommu || dmar_disabled)
3743 down_write(&dmar_global_lock);
3745 /* Disable PMRs explicitly here. */
3746 for_each_iommu(iommu, drhd)
3747 iommu_disable_protect_mem_regions(iommu);
3749 /* Make sure the IOMMUs are switched off */
3750 intel_disable_iommus();
3752 up_write(&dmar_global_lock);
3755 static inline struct intel_iommu *dev_to_intel_iommu(struct device *dev)
3757 struct iommu_device *iommu_dev = dev_to_iommu_device(dev);
3759 return container_of(iommu_dev, struct intel_iommu, iommu);
3762 static ssize_t version_show(struct device *dev,
3763 struct device_attribute *attr, char *buf)
3765 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
3766 u32 ver = readl(iommu->reg + DMAR_VER_REG);
3767 return sprintf(buf, "%d:%d\n",
3768 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
3770 static DEVICE_ATTR_RO(version);
3772 static ssize_t address_show(struct device *dev,
3773 struct device_attribute *attr, char *buf)
3775 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
3776 return sprintf(buf, "%llx\n", iommu->reg_phys);
3778 static DEVICE_ATTR_RO(address);
3780 static ssize_t cap_show(struct device *dev,
3781 struct device_attribute *attr, char *buf)
3783 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
3784 return sprintf(buf, "%llx\n", iommu->cap);
3786 static DEVICE_ATTR_RO(cap);
3788 static ssize_t ecap_show(struct device *dev,
3789 struct device_attribute *attr, char *buf)
3791 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
3792 return sprintf(buf, "%llx\n", iommu->ecap);
3794 static DEVICE_ATTR_RO(ecap);
3796 static ssize_t domains_supported_show(struct device *dev,
3797 struct device_attribute *attr, char *buf)
3799 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
3800 return sprintf(buf, "%ld\n", cap_ndoms(iommu->cap));
3802 static DEVICE_ATTR_RO(domains_supported);
3804 static ssize_t domains_used_show(struct device *dev,
3805 struct device_attribute *attr, char *buf)
3807 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
3808 return sprintf(buf, "%d\n", bitmap_weight(iommu->domain_ids,
3809 cap_ndoms(iommu->cap)));
3811 static DEVICE_ATTR_RO(domains_used);
3813 static struct attribute *intel_iommu_attrs[] = {
3814 &dev_attr_version.attr,
3815 &dev_attr_address.attr,
3817 &dev_attr_ecap.attr,
3818 &dev_attr_domains_supported.attr,
3819 &dev_attr_domains_used.attr,
3823 static struct attribute_group intel_iommu_group = {
3824 .name = "intel-iommu",
3825 .attrs = intel_iommu_attrs,
3828 const struct attribute_group *intel_iommu_groups[] = {
3833 static inline bool has_external_pci(void)
3835 struct pci_dev *pdev = NULL;
3837 for_each_pci_dev(pdev)
3838 if (pdev->external_facing)
3844 static int __init platform_optin_force_iommu(void)
3846 if (!dmar_platform_optin() || no_platform_optin || !has_external_pci())
3849 if (no_iommu || dmar_disabled)
3850 pr_info("Intel-IOMMU force enabled due to platform opt in\n");
3853 * If Intel-IOMMU is disabled by default, we will apply identity
3854 * map for all devices except those marked as being untrusted.
3857 iommu_set_default_passthrough(false);
3865 static int __init probe_acpi_namespace_devices(void)
3867 struct dmar_drhd_unit *drhd;
3868 /* To avoid a -Wunused-but-set-variable warning. */
3869 struct intel_iommu *iommu __maybe_unused;
3873 for_each_active_iommu(iommu, drhd) {
3874 for_each_active_dev_scope(drhd->devices,
3875 drhd->devices_cnt, i, dev) {
3876 struct acpi_device_physical_node *pn;
3877 struct iommu_group *group;
3878 struct acpi_device *adev;
3880 if (dev->bus != &acpi_bus_type)
3883 adev = to_acpi_device(dev);
3884 mutex_lock(&adev->physical_node_lock);
3885 list_for_each_entry(pn,
3886 &adev->physical_node_list, node) {
3887 group = iommu_group_get(pn->dev);
3889 iommu_group_put(group);
3893 pn->dev->bus->iommu_ops = &intel_iommu_ops;
3894 ret = iommu_probe_device(pn->dev);
3898 mutex_unlock(&adev->physical_node_lock);
3908 static __init int tboot_force_iommu(void)
3910 if (!tboot_enabled())
3913 if (no_iommu || dmar_disabled)
3914 pr_warn("Forcing Intel-IOMMU to enabled\n");
3922 int __init intel_iommu_init(void)
3925 struct dmar_drhd_unit *drhd;
3926 struct intel_iommu *iommu;
3929 * Intel IOMMU is required for a TXT/tboot launch or platform
3930 * opt in, so enforce that.
3932 force_on = (!intel_iommu_tboot_noforce && tboot_force_iommu()) ||
3933 platform_optin_force_iommu();
3935 down_write(&dmar_global_lock);
3936 if (dmar_table_init()) {
3938 panic("tboot: Failed to initialize DMAR table\n");
3942 if (dmar_dev_scope_init() < 0) {
3944 panic("tboot: Failed to initialize DMAR device scope\n");
3948 up_write(&dmar_global_lock);
3951 * The bus notifier takes the dmar_global_lock, so lockdep will
3952 * complain later when we register it under the lock.
3954 dmar_register_bus_notifier();
3956 down_write(&dmar_global_lock);
3959 intel_iommu_debugfs_init();
3961 if (no_iommu || dmar_disabled) {
3963 * We exit the function here to ensure IOMMU's remapping and
3964 * mempool aren't setup, which means that the IOMMU's PMRs
3965 * won't be disabled via the call to init_dmars(). So disable
3966 * it explicitly here. The PMRs were setup by tboot prior to
3967 * calling SENTER, but the kernel is expected to reset/tear
3970 if (intel_iommu_tboot_noforce) {
3971 for_each_iommu(iommu, drhd)
3972 iommu_disable_protect_mem_regions(iommu);
3976 * Make sure the IOMMUs are switched off, even when we
3977 * boot into a kexec kernel and the previous kernel left
3980 intel_disable_iommus();
3984 if (list_empty(&dmar_rmrr_units))
3985 pr_info("No RMRR found\n");
3987 if (list_empty(&dmar_atsr_units))
3988 pr_info("No ATSR found\n");
3990 if (list_empty(&dmar_satc_units))
3991 pr_info("No SATC found\n");
3993 init_no_remapping_devices();
3998 panic("tboot: Failed to initialize DMARs\n");
3999 pr_err("Initialization failed\n");
4002 up_write(&dmar_global_lock);
4004 init_iommu_pm_ops();
4006 down_read(&dmar_global_lock);
4007 for_each_active_iommu(iommu, drhd) {
4009 * The flush queue implementation does not perform
4010 * page-selective invalidations that are required for efficient
4011 * TLB flushes in virtual environments. The benefit of batching
4012 * is likely to be much lower than the overhead of synchronizing
4013 * the virtual and physical IOMMU page-tables.
4015 if (cap_caching_mode(iommu->cap)) {
4016 pr_info_once("IOMMU batching disallowed due to virtualization\n");
4017 iommu_set_dma_strict();
4019 iommu_device_sysfs_add(&iommu->iommu, NULL,
4022 iommu_device_register(&iommu->iommu, &intel_iommu_ops, NULL);
4024 up_read(&dmar_global_lock);
4026 bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
4027 if (si_domain && !hw_pass_through)
4028 register_memory_notifier(&intel_iommu_memory_nb);
4030 down_read(&dmar_global_lock);
4031 if (probe_acpi_namespace_devices())
4032 pr_warn("ACPI name space devices didn't probe correctly\n");
4034 /* Finally, we enable the DMA remapping hardware. */
4035 for_each_iommu(iommu, drhd) {
4036 if (!drhd->ignored && !translation_pre_enabled(iommu))
4037 iommu_enable_translation(iommu);
4039 iommu_disable_protect_mem_regions(iommu);
4041 up_read(&dmar_global_lock);
4043 pr_info("Intel(R) Virtualization Technology for Directed I/O\n");
4045 intel_iommu_enabled = 1;
4050 intel_iommu_free_dmars();
4051 up_write(&dmar_global_lock);
4055 static int domain_context_clear_one_cb(struct pci_dev *pdev, u16 alias, void *opaque)
4057 struct device_domain_info *info = opaque;
4059 domain_context_clear_one(info, PCI_BUS_NUM(alias), alias & 0xff);
4064 * NB - intel-iommu lacks any sort of reference counting for the users of
4065 * dependent devices. If multiple endpoints have intersecting dependent
4066 * devices, unbinding the driver from any one of them will possibly leave
4067 * the others unable to operate.
4069 static void domain_context_clear(struct device_domain_info *info)
4071 if (!info->iommu || !info->dev || !dev_is_pci(info->dev))
4074 pci_for_each_dma_alias(to_pci_dev(info->dev),
4075 &domain_context_clear_one_cb, info);
4078 static void dmar_remove_one_dev_info(struct device *dev)
4080 struct device_domain_info *info = dev_iommu_priv_get(dev);
4081 struct dmar_domain *domain = info->domain;
4082 struct intel_iommu *iommu = info->iommu;
4084 if (!dev_is_real_dma_subdevice(info->dev)) {
4085 if (dev_is_pci(info->dev) && sm_supported(iommu))
4086 intel_pasid_tear_down_entry(iommu, info->dev,
4087 PASID_RID2PASID, false);
4089 iommu_disable_dev_iotlb(info);
4090 domain_context_clear(info);
4091 intel_pasid_free_table(info->dev);
4094 spin_lock(&domain->lock);
4095 list_del(&info->link);
4096 spin_unlock(&domain->lock);
4098 domain_detach_iommu(domain, iommu);
4099 info->domain = NULL;
4102 static int md_domain_init(struct dmar_domain *domain, int guest_width)
4106 /* calculate AGAW */
4107 domain->gaw = guest_width;
4108 adjust_width = guestwidth_to_adjustwidth(guest_width);
4109 domain->agaw = width_to_agaw(adjust_width);
4111 domain->iommu_coherency = false;
4112 domain->iommu_superpage = 0;
4113 domain->max_addr = 0;
4115 /* always allocate the top pgd */
4116 domain->pgd = alloc_pgtable_page(domain->nid);
4119 domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
4123 static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
4125 struct dmar_domain *dmar_domain;
4126 struct iommu_domain *domain;
4129 case IOMMU_DOMAIN_DMA:
4130 case IOMMU_DOMAIN_DMA_FQ:
4131 case IOMMU_DOMAIN_UNMANAGED:
4132 dmar_domain = alloc_domain(type);
4134 pr_err("Can't allocate dmar_domain\n");
4137 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
4138 pr_err("Domain initialization failed\n");
4139 domain_exit(dmar_domain);
4143 domain = &dmar_domain->domain;
4144 domain->geometry.aperture_start = 0;
4145 domain->geometry.aperture_end =
4146 __DOMAIN_MAX_ADDR(dmar_domain->gaw);
4147 domain->geometry.force_aperture = true;
4150 case IOMMU_DOMAIN_IDENTITY:
4151 return &si_domain->domain;
4159 static void intel_iommu_domain_free(struct iommu_domain *domain)
4161 if (domain != &si_domain->domain)
4162 domain_exit(to_dmar_domain(domain));
4165 static int prepare_domain_attach_device(struct iommu_domain *domain,
4168 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4169 struct intel_iommu *iommu;
4172 iommu = device_to_iommu(dev, NULL, NULL);
4176 if (dmar_domain->force_snooping && !ecap_sc_support(iommu->ecap))
4179 /* check if this iommu agaw is sufficient for max mapped address */
4180 addr_width = agaw_to_width(iommu->agaw);
4181 if (addr_width > cap_mgaw(iommu->cap))
4182 addr_width = cap_mgaw(iommu->cap);
4184 if (dmar_domain->max_addr > (1LL << addr_width)) {
4185 dev_err(dev, "%s: iommu width (%d) is not "
4186 "sufficient for the mapped address (%llx)\n",
4187 __func__, addr_width, dmar_domain->max_addr);
4190 dmar_domain->gaw = addr_width;
4193 * Knock out extra levels of page tables if necessary
4195 while (iommu->agaw < dmar_domain->agaw) {
4196 struct dma_pte *pte;
4198 pte = dmar_domain->pgd;
4199 if (dma_pte_present(pte)) {
4200 dmar_domain->pgd = phys_to_virt(dma_pte_addr(pte));
4201 free_pgtable_page(pte);
4203 dmar_domain->agaw--;
4209 static int intel_iommu_attach_device(struct iommu_domain *domain,
4214 if (domain->type == IOMMU_DOMAIN_UNMANAGED &&
4215 device_is_rmrr_locked(dev)) {
4216 dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement. Contact your platform vendor.\n");
4220 /* normally dev is not mapped */
4221 if (unlikely(domain_context_mapped(dev))) {
4222 struct device_domain_info *info = dev_iommu_priv_get(dev);
4225 dmar_remove_one_dev_info(dev);
4228 ret = prepare_domain_attach_device(domain, dev);
4232 return domain_add_dev_info(to_dmar_domain(domain), dev);
4235 static void intel_iommu_detach_device(struct iommu_domain *domain,
4238 dmar_remove_one_dev_info(dev);
4241 static int intel_iommu_map(struct iommu_domain *domain,
4242 unsigned long iova, phys_addr_t hpa,
4243 size_t size, int iommu_prot, gfp_t gfp)
4245 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4249 if (iommu_prot & IOMMU_READ)
4250 prot |= DMA_PTE_READ;
4251 if (iommu_prot & IOMMU_WRITE)
4252 prot |= DMA_PTE_WRITE;
4253 if (dmar_domain->set_pte_snp)
4254 prot |= DMA_PTE_SNP;
4256 max_addr = iova + size;
4257 if (dmar_domain->max_addr < max_addr) {
4260 /* check if minimum agaw is sufficient for mapped address */
4261 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
4262 if (end < max_addr) {
4263 pr_err("%s: iommu width (%d) is not "
4264 "sufficient for the mapped address (%llx)\n",
4265 __func__, dmar_domain->gaw, max_addr);
4268 dmar_domain->max_addr = max_addr;
4270 /* Round up size to next multiple of PAGE_SIZE, if it and
4271 the low bits of hpa would take us onto the next page */
4272 size = aligned_nrpages(hpa, size);
4273 return __domain_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
4274 hpa >> VTD_PAGE_SHIFT, size, prot);
4277 static int intel_iommu_map_pages(struct iommu_domain *domain,
4278 unsigned long iova, phys_addr_t paddr,
4279 size_t pgsize, size_t pgcount,
4280 int prot, gfp_t gfp, size_t *mapped)
4282 unsigned long pgshift = __ffs(pgsize);
4283 size_t size = pgcount << pgshift;
4286 if (pgsize != SZ_4K && pgsize != SZ_2M && pgsize != SZ_1G)
4289 if (!IS_ALIGNED(iova | paddr, pgsize))
4292 ret = intel_iommu_map(domain, iova, paddr, size, prot, gfp);
4299 static size_t intel_iommu_unmap(struct iommu_domain *domain,
4300 unsigned long iova, size_t size,
4301 struct iommu_iotlb_gather *gather)
4303 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4304 unsigned long start_pfn, last_pfn;
4307 /* Cope with horrid API which requires us to unmap more than the
4308 size argument if it happens to be a large-page mapping. */
4309 BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level));
4311 if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
4312 size = VTD_PAGE_SIZE << level_to_offset_bits(level);
4314 start_pfn = iova >> VTD_PAGE_SHIFT;
4315 last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
4317 domain_unmap(dmar_domain, start_pfn, last_pfn, &gather->freelist);
4319 if (dmar_domain->max_addr == iova + size)
4320 dmar_domain->max_addr = iova;
4322 iommu_iotlb_gather_add_page(domain, gather, iova, size);
4327 static size_t intel_iommu_unmap_pages(struct iommu_domain *domain,
4329 size_t pgsize, size_t pgcount,
4330 struct iommu_iotlb_gather *gather)
4332 unsigned long pgshift = __ffs(pgsize);
4333 size_t size = pgcount << pgshift;
4335 return intel_iommu_unmap(domain, iova, size, gather);
4338 static void intel_iommu_tlb_sync(struct iommu_domain *domain,
4339 struct iommu_iotlb_gather *gather)
4341 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4342 unsigned long iova_pfn = IOVA_PFN(gather->start);
4343 size_t size = gather->end - gather->start;
4344 struct iommu_domain_info *info;
4345 unsigned long start_pfn;
4346 unsigned long nrpages;
4349 nrpages = aligned_nrpages(gather->start, size);
4350 start_pfn = mm_to_dma_pfn(iova_pfn);
4352 xa_for_each(&dmar_domain->iommu_array, i, info)
4353 iommu_flush_iotlb_psi(info->iommu, dmar_domain,
4355 list_empty(&gather->freelist), 0);
4357 put_pages_list(&gather->freelist);
4360 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
4363 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4364 struct dma_pte *pte;
4368 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
4369 if (pte && dma_pte_present(pte))
4370 phys = dma_pte_addr(pte) +
4371 (iova & (BIT_MASK(level_to_offset_bits(level) +
4372 VTD_PAGE_SHIFT) - 1));
4377 static bool domain_support_force_snooping(struct dmar_domain *domain)
4379 struct device_domain_info *info;
4380 bool support = true;
4382 assert_spin_locked(&domain->lock);
4383 list_for_each_entry(info, &domain->devices, link) {
4384 if (!ecap_sc_support(info->iommu->ecap)) {
4393 static void domain_set_force_snooping(struct dmar_domain *domain)
4395 struct device_domain_info *info;
4397 assert_spin_locked(&domain->lock);
4399 * Second level page table supports per-PTE snoop control. The
4400 * iommu_map() interface will handle this by setting SNP bit.
4402 if (!domain_use_first_level(domain)) {
4403 domain->set_pte_snp = true;
4407 list_for_each_entry(info, &domain->devices, link)
4408 intel_pasid_setup_page_snoop_control(info->iommu, info->dev,
4412 static bool intel_iommu_enforce_cache_coherency(struct iommu_domain *domain)
4414 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4416 if (dmar_domain->force_snooping)
4419 spin_lock(&dmar_domain->lock);
4420 if (!domain_support_force_snooping(dmar_domain)) {
4421 spin_unlock(&dmar_domain->lock);
4425 domain_set_force_snooping(dmar_domain);
4426 dmar_domain->force_snooping = true;
4427 spin_unlock(&dmar_domain->lock);
4432 static bool intel_iommu_capable(enum iommu_cap cap)
4434 if (cap == IOMMU_CAP_CACHE_COHERENCY)
4436 if (cap == IOMMU_CAP_INTR_REMAP)
4437 return irq_remapping_enabled == 1;
4438 if (cap == IOMMU_CAP_PRE_BOOT_PROTECTION)
4439 return dmar_platform_optin();
4444 static struct iommu_device *intel_iommu_probe_device(struct device *dev)
4446 struct pci_dev *pdev = dev_is_pci(dev) ? to_pci_dev(dev) : NULL;
4447 struct device_domain_info *info;
4448 struct intel_iommu *iommu;
4451 iommu = device_to_iommu(dev, &bus, &devfn);
4453 return ERR_PTR(-ENODEV);
4455 info = kzalloc(sizeof(*info), GFP_KERNEL);
4457 return ERR_PTR(-ENOMEM);
4459 if (dev_is_real_dma_subdevice(dev)) {
4460 info->bus = pdev->bus->number;
4461 info->devfn = pdev->devfn;
4462 info->segment = pci_domain_nr(pdev->bus);
4465 info->devfn = devfn;
4466 info->segment = iommu->segment;
4470 info->iommu = iommu;
4471 if (dev_is_pci(dev)) {
4472 if (ecap_dev_iotlb_support(iommu->ecap) &&
4473 pci_ats_supported(pdev) &&
4474 dmar_ats_supported(pdev, iommu))
4475 info->ats_supported = 1;
4477 if (sm_supported(iommu)) {
4478 if (pasid_supported(iommu)) {
4479 int features = pci_pasid_features(pdev);
4482 info->pasid_supported = features | 1;
4485 if (info->ats_supported && ecap_prs(iommu->ecap) &&
4486 pci_pri_supported(pdev))
4487 info->pri_supported = 1;
4491 dev_iommu_priv_set(dev, info);
4493 return &iommu->iommu;
4496 static void intel_iommu_release_device(struct device *dev)
4498 struct device_domain_info *info = dev_iommu_priv_get(dev);
4500 dmar_remove_one_dev_info(dev);
4501 dev_iommu_priv_set(dev, NULL);
4503 set_dma_ops(dev, NULL);
4506 static void intel_iommu_probe_finalize(struct device *dev)
4508 set_dma_ops(dev, NULL);
4509 iommu_setup_dma_ops(dev, 0, U64_MAX);
4512 static void intel_iommu_get_resv_regions(struct device *device,
4513 struct list_head *head)
4515 int prot = DMA_PTE_READ | DMA_PTE_WRITE;
4516 struct iommu_resv_region *reg;
4517 struct dmar_rmrr_unit *rmrr;
4518 struct device *i_dev;
4521 down_read(&dmar_global_lock);
4522 for_each_rmrr_units(rmrr) {
4523 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
4525 struct iommu_resv_region *resv;
4526 enum iommu_resv_type type;
4529 if (i_dev != device &&
4530 !is_downstream_to_pci_bridge(device, i_dev))
4533 length = rmrr->end_address - rmrr->base_address + 1;
4535 type = device_rmrr_is_relaxable(device) ?
4536 IOMMU_RESV_DIRECT_RELAXABLE : IOMMU_RESV_DIRECT;
4538 resv = iommu_alloc_resv_region(rmrr->base_address,
4539 length, prot, type);
4543 list_add_tail(&resv->list, head);
4546 up_read(&dmar_global_lock);
4548 #ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
4549 if (dev_is_pci(device)) {
4550 struct pci_dev *pdev = to_pci_dev(device);
4552 if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) {
4553 reg = iommu_alloc_resv_region(0, 1UL << 24, prot,
4554 IOMMU_RESV_DIRECT_RELAXABLE);
4556 list_add_tail(®->list, head);
4559 #endif /* CONFIG_INTEL_IOMMU_FLOPPY_WA */
4561 reg = iommu_alloc_resv_region(IOAPIC_RANGE_START,
4562 IOAPIC_RANGE_END - IOAPIC_RANGE_START + 1,
4566 list_add_tail(®->list, head);
4569 int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev)
4571 struct device_domain_info *info = dev_iommu_priv_get(dev);
4572 struct context_entry *context;
4573 struct dmar_domain *domain;
4577 domain = info->domain;
4581 spin_lock(&iommu->lock);
4583 if (!info->pasid_supported)
4586 context = iommu_context_addr(iommu, info->bus, info->devfn, 0);
4587 if (WARN_ON(!context))
4590 ctx_lo = context[0].lo;
4592 if (!(ctx_lo & CONTEXT_PASIDE)) {
4593 ctx_lo |= CONTEXT_PASIDE;
4594 context[0].lo = ctx_lo;
4596 iommu->flush.flush_context(iommu,
4597 domain_id_iommu(domain, iommu),
4598 PCI_DEVID(info->bus, info->devfn),
4599 DMA_CCMD_MASK_NOBIT,
4600 DMA_CCMD_DEVICE_INVL);
4603 /* Enable PASID support in the device, if it wasn't already */
4604 if (!info->pasid_enabled)
4605 iommu_enable_dev_iotlb(info);
4610 spin_unlock(&iommu->lock);
4615 static struct iommu_group *intel_iommu_device_group(struct device *dev)
4617 if (dev_is_pci(dev))
4618 return pci_device_group(dev);
4619 return generic_device_group(dev);
4622 static int intel_iommu_enable_sva(struct device *dev)
4624 struct device_domain_info *info = dev_iommu_priv_get(dev);
4625 struct intel_iommu *iommu;
4628 if (!info || dmar_disabled)
4631 iommu = info->iommu;
4635 if (!(iommu->flags & VTD_FLAG_SVM_CAPABLE))
4638 if (intel_iommu_enable_pasid(iommu, dev))
4641 if (!info->pasid_enabled || !info->pri_enabled || !info->ats_enabled)
4644 ret = iopf_queue_add_device(iommu->iopf_queue, dev);
4646 ret = iommu_register_device_fault_handler(dev, iommu_queue_iopf, dev);
4651 static int intel_iommu_disable_sva(struct device *dev)
4653 struct device_domain_info *info = dev_iommu_priv_get(dev);
4654 struct intel_iommu *iommu = info->iommu;
4657 ret = iommu_unregister_device_fault_handler(dev);
4659 ret = iopf_queue_remove_device(iommu->iopf_queue, dev);
4664 static int intel_iommu_enable_iopf(struct device *dev)
4666 struct device_domain_info *info = dev_iommu_priv_get(dev);
4668 if (info && info->pri_supported)
4675 intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat)
4678 case IOMMU_DEV_FEAT_IOPF:
4679 return intel_iommu_enable_iopf(dev);
4681 case IOMMU_DEV_FEAT_SVA:
4682 return intel_iommu_enable_sva(dev);
4690 intel_iommu_dev_disable_feat(struct device *dev, enum iommu_dev_features feat)
4693 case IOMMU_DEV_FEAT_IOPF:
4696 case IOMMU_DEV_FEAT_SVA:
4697 return intel_iommu_disable_sva(dev);
4704 static bool intel_iommu_is_attach_deferred(struct device *dev)
4706 struct device_domain_info *info = dev_iommu_priv_get(dev);
4708 return translation_pre_enabled(info->iommu) && !info->domain;
4712 * Check that the device does not live on an external facing PCI port that is
4713 * marked as untrusted. Such devices should not be able to apply quirks and
4714 * thus not be able to bypass the IOMMU restrictions.
4716 static bool risky_device(struct pci_dev *pdev)
4718 if (pdev->untrusted) {
4720 "Skipping IOMMU quirk for dev [%04X:%04X] on untrusted PCI link\n",
4721 pdev->vendor, pdev->device);
4722 pci_info(pdev, "Please check with your BIOS/Platform vendor about this\n");
4728 static void intel_iommu_iotlb_sync_map(struct iommu_domain *domain,
4729 unsigned long iova, size_t size)
4731 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4732 unsigned long pages = aligned_nrpages(iova, size);
4733 unsigned long pfn = iova >> VTD_PAGE_SHIFT;
4734 struct iommu_domain_info *info;
4737 xa_for_each(&dmar_domain->iommu_array, i, info)
4738 __mapping_notify_one(info->iommu, dmar_domain, pfn, pages);
4741 const struct iommu_ops intel_iommu_ops = {
4742 .capable = intel_iommu_capable,
4743 .domain_alloc = intel_iommu_domain_alloc,
4744 .probe_device = intel_iommu_probe_device,
4745 .probe_finalize = intel_iommu_probe_finalize,
4746 .release_device = intel_iommu_release_device,
4747 .get_resv_regions = intel_iommu_get_resv_regions,
4748 .device_group = intel_iommu_device_group,
4749 .dev_enable_feat = intel_iommu_dev_enable_feat,
4750 .dev_disable_feat = intel_iommu_dev_disable_feat,
4751 .is_attach_deferred = intel_iommu_is_attach_deferred,
4752 .def_domain_type = device_def_domain_type,
4753 .pgsize_bitmap = SZ_4K,
4754 #ifdef CONFIG_INTEL_IOMMU_SVM
4755 .sva_bind = intel_svm_bind,
4756 .sva_unbind = intel_svm_unbind,
4757 .sva_get_pasid = intel_svm_get_pasid,
4758 .page_response = intel_svm_page_response,
4760 .default_domain_ops = &(const struct iommu_domain_ops) {
4761 .attach_dev = intel_iommu_attach_device,
4762 .detach_dev = intel_iommu_detach_device,
4763 .map_pages = intel_iommu_map_pages,
4764 .unmap_pages = intel_iommu_unmap_pages,
4765 .iotlb_sync_map = intel_iommu_iotlb_sync_map,
4766 .flush_iotlb_all = intel_flush_iotlb_all,
4767 .iotlb_sync = intel_iommu_tlb_sync,
4768 .iova_to_phys = intel_iommu_iova_to_phys,
4769 .free = intel_iommu_domain_free,
4770 .enforce_cache_coherency = intel_iommu_enforce_cache_coherency,
4774 static void quirk_iommu_igfx(struct pci_dev *dev)
4776 if (risky_device(dev))
4779 pci_info(dev, "Disabling IOMMU for graphics on this chipset\n");
4783 /* G4x/GM45 integrated gfx dmar support is totally busted. */
4784 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_igfx);
4785 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_igfx);
4786 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_igfx);
4787 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_igfx);
4788 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_igfx);
4789 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_igfx);
4790 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_igfx);
4792 /* Broadwell igfx malfunctions with dmar */
4793 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1606, quirk_iommu_igfx);
4794 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160B, quirk_iommu_igfx);
4795 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160E, quirk_iommu_igfx);
4796 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1602, quirk_iommu_igfx);
4797 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160A, quirk_iommu_igfx);
4798 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160D, quirk_iommu_igfx);
4799 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1616, quirk_iommu_igfx);
4800 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161B, quirk_iommu_igfx);
4801 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161E, quirk_iommu_igfx);
4802 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1612, quirk_iommu_igfx);
4803 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161A, quirk_iommu_igfx);
4804 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161D, quirk_iommu_igfx);
4805 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1626, quirk_iommu_igfx);
4806 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162B, quirk_iommu_igfx);
4807 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162E, quirk_iommu_igfx);
4808 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1622, quirk_iommu_igfx);
4809 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162A, quirk_iommu_igfx);
4810 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162D, quirk_iommu_igfx);
4811 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1636, quirk_iommu_igfx);
4812 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163B, quirk_iommu_igfx);
4813 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163E, quirk_iommu_igfx);
4814 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1632, quirk_iommu_igfx);
4815 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163A, quirk_iommu_igfx);
4816 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163D, quirk_iommu_igfx);
4818 static void quirk_iommu_rwbf(struct pci_dev *dev)
4820 if (risky_device(dev))
4824 * Mobile 4 Series Chipset neglects to set RWBF capability,
4825 * but needs it. Same seems to hold for the desktop versions.
4827 pci_info(dev, "Forcing write-buffer flush capability\n");
4831 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
4832 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
4833 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
4834 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
4835 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
4836 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
4837 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
4840 #define GGC_MEMORY_SIZE_MASK (0xf << 8)
4841 #define GGC_MEMORY_SIZE_NONE (0x0 << 8)
4842 #define GGC_MEMORY_SIZE_1M (0x1 << 8)
4843 #define GGC_MEMORY_SIZE_2M (0x3 << 8)
4844 #define GGC_MEMORY_VT_ENABLED (0x8 << 8)
4845 #define GGC_MEMORY_SIZE_2M_VT (0x9 << 8)
4846 #define GGC_MEMORY_SIZE_3M_VT (0xa << 8)
4847 #define GGC_MEMORY_SIZE_4M_VT (0xb << 8)
4849 static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
4853 if (risky_device(dev))
4856 if (pci_read_config_word(dev, GGC, &ggc))
4859 if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
4860 pci_info(dev, "BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
4862 } else if (dmar_map_gfx) {
4863 /* we have to ensure the gfx device is idle before we flush */
4864 pci_info(dev, "Disabling batched IOTLB flush on Ironlake\n");
4865 iommu_set_dma_strict();
4868 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
4869 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
4870 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
4871 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
4873 static void quirk_igfx_skip_te_disable(struct pci_dev *dev)
4877 if (!IS_GFX_DEVICE(dev))
4880 ver = (dev->device >> 8) & 0xff;
4881 if (ver != 0x45 && ver != 0x46 && ver != 0x4c &&
4882 ver != 0x4e && ver != 0x8a && ver != 0x98 &&
4883 ver != 0x9a && ver != 0xa7)
4886 if (risky_device(dev))
4889 pci_info(dev, "Skip IOMMU disabling for graphics\n");
4890 iommu_skip_te_disable = 1;
4892 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, quirk_igfx_skip_te_disable);
4894 /* On Tylersburg chipsets, some BIOSes have been known to enable the
4895 ISOCH DMAR unit for the Azalia sound device, but not give it any
4896 TLB entries, which causes it to deadlock. Check for that. We do
4897 this in a function called from init_dmars(), instead of in a PCI
4898 quirk, because we don't want to print the obnoxious "BIOS broken"
4899 message if VT-d is actually disabled.
4901 static void __init check_tylersburg_isoch(void)
4903 struct pci_dev *pdev;
4904 uint32_t vtisochctrl;
4906 /* If there's no Azalia in the system anyway, forget it. */
4907 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
4911 if (risky_device(pdev)) {
4918 /* System Management Registers. Might be hidden, in which case
4919 we can't do the sanity check. But that's OK, because the
4920 known-broken BIOSes _don't_ actually hide it, so far. */
4921 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
4925 if (risky_device(pdev)) {
4930 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
4937 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
4938 if (vtisochctrl & 1)
4941 /* Drop all bits other than the number of TLB entries */
4942 vtisochctrl &= 0x1c;
4944 /* If we have the recommended number of TLB entries (16), fine. */
4945 if (vtisochctrl == 0x10)
4948 /* Zero TLB entries? You get to ride the short bus to school. */
4950 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
4951 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
4952 dmi_get_system_info(DMI_BIOS_VENDOR),
4953 dmi_get_system_info(DMI_BIOS_VERSION),
4954 dmi_get_system_info(DMI_PRODUCT_VERSION));
4955 iommu_identity_mapping |= IDENTMAP_AZALIA;
4959 pr_warn("Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",