1 // SPDX-License-Identifier: GPL-2.0-only
3 * Copyright © 2006-2014 Intel Corporation.
13 #define pr_fmt(fmt) "DMAR: " fmt
14 #define dev_fmt(fmt) pr_fmt(fmt)
16 #include <linux/crash_dump.h>
17 #include <linux/dma-direct.h>
18 #include <linux/dmi.h>
19 #include <linux/intel-svm.h>
20 #include <linux/memory.h>
21 #include <linux/pci.h>
22 #include <linux/pci-ats.h>
23 #include <linux/spinlock.h>
24 #include <linux/syscore_ops.h>
25 #include <linux/tboot.h>
28 #include "../dma-iommu.h"
29 #include "../irq_remapping.h"
30 #include "../iommu-sva-lib.h"
32 #include "cap_audit.h"
34 #define ROOT_SIZE VTD_PAGE_SIZE
35 #define CONTEXT_SIZE VTD_PAGE_SIZE
37 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
38 #define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
39 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
40 #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
42 #define IOAPIC_RANGE_START (0xfee00000)
43 #define IOAPIC_RANGE_END (0xfeefffff)
44 #define IOVA_START_ADDR (0x1000)
46 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 57
48 #define MAX_AGAW_WIDTH 64
49 #define MAX_AGAW_PFN_WIDTH (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
51 #define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << ((gaw) - VTD_PAGE_SHIFT)) - 1)
52 #define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << (gaw)) - 1)
54 /* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
55 to match. That way, we can use 'unsigned long' for PFNs with impunity. */
56 #define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
57 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
58 #define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
60 /* IO virtual address start page frame number */
61 #define IOVA_START_PFN (1)
63 #define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
65 /* page table handling */
66 #define LEVEL_STRIDE (9)
67 #define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
69 static inline int agaw_to_level(int agaw)
74 static inline int agaw_to_width(int agaw)
76 return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
79 static inline int width_to_agaw(int width)
81 return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
84 static inline unsigned int level_to_offset_bits(int level)
86 return (level - 1) * LEVEL_STRIDE;
89 static inline int pfn_level_offset(u64 pfn, int level)
91 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
94 static inline u64 level_mask(int level)
96 return -1ULL << level_to_offset_bits(level);
99 static inline u64 level_size(int level)
101 return 1ULL << level_to_offset_bits(level);
104 static inline u64 align_to_level(u64 pfn, int level)
106 return (pfn + level_size(level) - 1) & level_mask(level);
109 static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
111 return 1UL << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
114 /* VT-d pages must always be _smaller_ than MM pages. Otherwise things
115 are never going to work. */
116 static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
118 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
120 static inline unsigned long page_to_dma_pfn(struct page *pg)
122 return mm_to_dma_pfn(page_to_pfn(pg));
124 static inline unsigned long virt_to_dma_pfn(void *p)
126 return page_to_dma_pfn(virt_to_page(p));
129 static void __init check_tylersburg_isoch(void);
130 static int rwbf_quirk;
133 * set to 1 to panic kernel if can't successfully enable VT-d
134 * (used when kernel is launched w/ TXT)
136 static int force_on = 0;
137 static int intel_iommu_tboot_noforce;
138 static int no_platform_optin;
140 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
143 * Take a root_entry and return the Lower Context Table Pointer (LCTP)
146 static phys_addr_t root_entry_lctp(struct root_entry *re)
151 return re->lo & VTD_PAGE_MASK;
155 * Take a root_entry and return the Upper Context Table Pointer (UCTP)
158 static phys_addr_t root_entry_uctp(struct root_entry *re)
163 return re->hi & VTD_PAGE_MASK;
166 static inline void context_set_present(struct context_entry *context)
171 static inline void context_set_fault_enable(struct context_entry *context)
173 context->lo &= (((u64)-1) << 2) | 1;
176 static inline void context_set_translation_type(struct context_entry *context,
179 context->lo &= (((u64)-1) << 4) | 3;
180 context->lo |= (value & 3) << 2;
183 static inline void context_set_address_root(struct context_entry *context,
186 context->lo &= ~VTD_PAGE_MASK;
187 context->lo |= value & VTD_PAGE_MASK;
190 static inline void context_set_address_width(struct context_entry *context,
193 context->hi |= value & 7;
196 static inline void context_set_domain_id(struct context_entry *context,
199 context->hi |= (value & ((1 << 16) - 1)) << 8;
202 static inline void context_set_pasid(struct context_entry *context)
204 context->lo |= CONTEXT_PASIDE;
207 static inline int context_domain_id(struct context_entry *c)
209 return((c->hi >> 8) & 0xffff);
212 static inline void context_clear_entry(struct context_entry *context)
218 static inline bool context_copied(struct intel_iommu *iommu, u8 bus, u8 devfn)
220 if (!iommu->copied_tables)
223 return test_bit(((long)bus << 8) | devfn, iommu->copied_tables);
227 set_context_copied(struct intel_iommu *iommu, u8 bus, u8 devfn)
229 set_bit(((long)bus << 8) | devfn, iommu->copied_tables);
233 clear_context_copied(struct intel_iommu *iommu, u8 bus, u8 devfn)
235 clear_bit(((long)bus << 8) | devfn, iommu->copied_tables);
239 * This domain is a statically identity mapping domain.
240 * 1. This domain creats a static 1:1 mapping to all usable memory.
241 * 2. It maps to each iommu if successful.
242 * 3. Each iommu mapps to this domain if successful.
244 static struct dmar_domain *si_domain;
245 static int hw_pass_through = 1;
247 struct dmar_rmrr_unit {
248 struct list_head list; /* list of rmrr units */
249 struct acpi_dmar_header *hdr; /* ACPI header */
250 u64 base_address; /* reserved base address*/
251 u64 end_address; /* reserved end address */
252 struct dmar_dev_scope *devices; /* target devices */
253 int devices_cnt; /* target device count */
256 struct dmar_atsr_unit {
257 struct list_head list; /* list of ATSR units */
258 struct acpi_dmar_header *hdr; /* ACPI header */
259 struct dmar_dev_scope *devices; /* target devices */
260 int devices_cnt; /* target device count */
261 u8 include_all:1; /* include all ports */
264 struct dmar_satc_unit {
265 struct list_head list; /* list of SATC units */
266 struct acpi_dmar_header *hdr; /* ACPI header */
267 struct dmar_dev_scope *devices; /* target devices */
268 struct intel_iommu *iommu; /* the corresponding iommu */
269 int devices_cnt; /* target device count */
270 u8 atc_required:1; /* ATS is required */
273 static LIST_HEAD(dmar_atsr_units);
274 static LIST_HEAD(dmar_rmrr_units);
275 static LIST_HEAD(dmar_satc_units);
277 #define for_each_rmrr_units(rmrr) \
278 list_for_each_entry(rmrr, &dmar_rmrr_units, list)
280 static void dmar_remove_one_dev_info(struct device *dev);
282 int dmar_disabled = !IS_ENABLED(CONFIG_INTEL_IOMMU_DEFAULT_ON);
283 int intel_iommu_sm = IS_ENABLED(CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON);
285 int intel_iommu_enabled = 0;
286 EXPORT_SYMBOL_GPL(intel_iommu_enabled);
288 static int dmar_map_gfx = 1;
289 static int intel_iommu_superpage = 1;
290 static int iommu_identity_mapping;
291 static int iommu_skip_te_disable;
293 #define IDENTMAP_GFX 2
294 #define IDENTMAP_AZALIA 4
296 const struct iommu_ops intel_iommu_ops;
298 static bool translation_pre_enabled(struct intel_iommu *iommu)
300 return (iommu->flags & VTD_FLAG_TRANS_PRE_ENABLED);
303 static void clear_translation_pre_enabled(struct intel_iommu *iommu)
305 iommu->flags &= ~VTD_FLAG_TRANS_PRE_ENABLED;
308 static void init_translation_status(struct intel_iommu *iommu)
312 gsts = readl(iommu->reg + DMAR_GSTS_REG);
313 if (gsts & DMA_GSTS_TES)
314 iommu->flags |= VTD_FLAG_TRANS_PRE_ENABLED;
317 static int __init intel_iommu_setup(char *str)
323 if (!strncmp(str, "on", 2)) {
325 pr_info("IOMMU enabled\n");
326 } else if (!strncmp(str, "off", 3)) {
328 no_platform_optin = 1;
329 pr_info("IOMMU disabled\n");
330 } else if (!strncmp(str, "igfx_off", 8)) {
332 pr_info("Disable GFX device mapping\n");
333 } else if (!strncmp(str, "forcedac", 8)) {
334 pr_warn("intel_iommu=forcedac deprecated; use iommu.forcedac instead\n");
335 iommu_dma_forcedac = true;
336 } else if (!strncmp(str, "strict", 6)) {
337 pr_warn("intel_iommu=strict deprecated; use iommu.strict=1 instead\n");
338 iommu_set_dma_strict();
339 } else if (!strncmp(str, "sp_off", 6)) {
340 pr_info("Disable supported super page\n");
341 intel_iommu_superpage = 0;
342 } else if (!strncmp(str, "sm_on", 5)) {
343 pr_info("Enable scalable mode if hardware supports\n");
345 } else if (!strncmp(str, "sm_off", 6)) {
346 pr_info("Scalable mode is disallowed\n");
348 } else if (!strncmp(str, "tboot_noforce", 13)) {
349 pr_info("Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n");
350 intel_iommu_tboot_noforce = 1;
352 pr_notice("Unknown option - '%s'\n", str);
355 str += strcspn(str, ",");
362 __setup("intel_iommu=", intel_iommu_setup);
364 void *alloc_pgtable_page(int node)
369 page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
371 vaddr = page_address(page);
375 void free_pgtable_page(void *vaddr)
377 free_page((unsigned long)vaddr);
380 static inline int domain_type_is_si(struct dmar_domain *domain)
382 return domain->domain.type == IOMMU_DOMAIN_IDENTITY;
385 static inline bool domain_use_first_level(struct dmar_domain *domain)
387 return domain->flags & DOMAIN_FLAG_USE_FIRST_LEVEL;
390 static inline int domain_pfn_supported(struct dmar_domain *domain,
393 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
395 return !(addr_width < BITS_PER_LONG && pfn >> addr_width);
399 * Calculate the Supported Adjusted Guest Address Widths of an IOMMU.
400 * Refer to 11.4.2 of the VT-d spec for the encoding of each bit of
401 * the returned SAGAW.
403 static unsigned long __iommu_calculate_sagaw(struct intel_iommu *iommu)
405 unsigned long fl_sagaw, sl_sagaw;
407 fl_sagaw = BIT(2) | (cap_fl5lp_support(iommu->cap) ? BIT(3) : 0);
408 sl_sagaw = cap_sagaw(iommu->cap);
410 /* Second level only. */
411 if (!sm_supported(iommu) || !ecap_flts(iommu->ecap))
414 /* First level only. */
415 if (!ecap_slts(iommu->ecap))
418 return fl_sagaw & sl_sagaw;
421 static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
426 sagaw = __iommu_calculate_sagaw(iommu);
427 for (agaw = width_to_agaw(max_gaw); agaw >= 0; agaw--) {
428 if (test_bit(agaw, &sagaw))
436 * Calculate max SAGAW for each iommu.
438 int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
440 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
444 * calculate agaw for each iommu.
445 * "SAGAW" may be different across iommus, use a default agaw, and
446 * get a supported less agaw for iommus that don't support the default agaw.
448 int iommu_calculate_agaw(struct intel_iommu *iommu)
450 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
453 static inline bool iommu_paging_structure_coherency(struct intel_iommu *iommu)
455 return sm_supported(iommu) ?
456 ecap_smpwc(iommu->ecap) : ecap_coherent(iommu->ecap);
459 static void domain_update_iommu_coherency(struct dmar_domain *domain)
461 struct iommu_domain_info *info;
462 struct dmar_drhd_unit *drhd;
463 struct intel_iommu *iommu;
467 domain->iommu_coherency = true;
468 xa_for_each(&domain->iommu_array, i, info) {
470 if (!iommu_paging_structure_coherency(info->iommu)) {
471 domain->iommu_coherency = false;
478 /* No hardware attached; use lowest common denominator */
480 for_each_active_iommu(iommu, drhd) {
481 if (!iommu_paging_structure_coherency(iommu)) {
482 domain->iommu_coherency = false;
489 static int domain_update_iommu_superpage(struct dmar_domain *domain,
490 struct intel_iommu *skip)
492 struct dmar_drhd_unit *drhd;
493 struct intel_iommu *iommu;
496 if (!intel_iommu_superpage)
499 /* set iommu_superpage to the smallest common denominator */
501 for_each_active_iommu(iommu, drhd) {
503 if (domain && domain_use_first_level(domain)) {
504 if (!cap_fl1gp_support(iommu->cap))
507 mask &= cap_super_page_val(iommu->cap);
519 static int domain_update_device_node(struct dmar_domain *domain)
521 struct device_domain_info *info;
522 int nid = NUMA_NO_NODE;
525 spin_lock_irqsave(&domain->lock, flags);
526 list_for_each_entry(info, &domain->devices, link) {
528 * There could possibly be multiple device numa nodes as devices
529 * within the same domain may sit behind different IOMMUs. There
530 * isn't perfect answer in such situation, so we select first
531 * come first served policy.
533 nid = dev_to_node(info->dev);
534 if (nid != NUMA_NO_NODE)
537 spin_unlock_irqrestore(&domain->lock, flags);
542 static void domain_update_iotlb(struct dmar_domain *domain);
544 /* Return the super pagesize bitmap if supported. */
545 static unsigned long domain_super_pgsize_bitmap(struct dmar_domain *domain)
547 unsigned long bitmap = 0;
550 * 1-level super page supports page size of 2MiB, 2-level super page
551 * supports page size of both 2MiB and 1GiB.
553 if (domain->iommu_superpage == 1)
555 else if (domain->iommu_superpage == 2)
556 bitmap |= SZ_2M | SZ_1G;
561 /* Some capabilities may be different across iommus */
562 static void domain_update_iommu_cap(struct dmar_domain *domain)
564 domain_update_iommu_coherency(domain);
565 domain->iommu_superpage = domain_update_iommu_superpage(domain, NULL);
568 * If RHSA is missing, we should default to the device numa domain
571 if (domain->nid == NUMA_NO_NODE)
572 domain->nid = domain_update_device_node(domain);
575 * First-level translation restricts the input-address to a
576 * canonical address (i.e., address bits 63:N have the same
577 * value as address bit [N-1], where N is 48-bits with 4-level
578 * paging and 57-bits with 5-level paging). Hence, skip bit
581 if (domain_use_first_level(domain))
582 domain->domain.geometry.aperture_end = __DOMAIN_MAX_ADDR(domain->gaw - 1);
584 domain->domain.geometry.aperture_end = __DOMAIN_MAX_ADDR(domain->gaw);
586 domain->domain.pgsize_bitmap |= domain_super_pgsize_bitmap(domain);
587 domain_update_iotlb(domain);
590 struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
593 struct root_entry *root = &iommu->root_entry[bus];
594 struct context_entry *context;
598 * Except that the caller requested to allocate a new entry,
599 * returning a copied context entry makes no sense.
601 if (!alloc && context_copied(iommu, bus, devfn))
605 if (sm_supported(iommu)) {
613 context = phys_to_virt(*entry & VTD_PAGE_MASK);
615 unsigned long phy_addr;
619 context = alloc_pgtable_page(iommu->node);
623 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
624 phy_addr = virt_to_phys((void *)context);
625 *entry = phy_addr | 1;
626 __iommu_flush_cache(iommu, entry, sizeof(*entry));
628 return &context[devfn];
632 * is_downstream_to_pci_bridge - test if a device belongs to the PCI
633 * sub-hierarchy of a candidate PCI-PCI bridge
634 * @dev: candidate PCI device belonging to @bridge PCI sub-hierarchy
635 * @bridge: the candidate PCI-PCI bridge
637 * Return: true if @dev belongs to @bridge PCI sub-hierarchy, else false.
640 is_downstream_to_pci_bridge(struct device *dev, struct device *bridge)
642 struct pci_dev *pdev, *pbridge;
644 if (!dev_is_pci(dev) || !dev_is_pci(bridge))
647 pdev = to_pci_dev(dev);
648 pbridge = to_pci_dev(bridge);
650 if (pbridge->subordinate &&
651 pbridge->subordinate->number <= pdev->bus->number &&
652 pbridge->subordinate->busn_res.end >= pdev->bus->number)
658 static bool quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
660 struct dmar_drhd_unit *drhd;
664 /* We know that this device on this chipset has its own IOMMU.
665 * If we find it under a different IOMMU, then the BIOS is lying
666 * to us. Hope that the IOMMU for this device is actually
667 * disabled, and it needs no translation...
669 rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
672 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
677 /* we know that the this iommu should be at offset 0xa000 from vtbar */
678 drhd = dmar_find_matched_drhd_unit(pdev);
679 if (!drhd || drhd->reg_base_addr - vtbar != 0xa000) {
680 pr_warn_once(FW_BUG "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n");
681 add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
688 static bool iommu_is_dummy(struct intel_iommu *iommu, struct device *dev)
690 if (!iommu || iommu->drhd->ignored)
693 if (dev_is_pci(dev)) {
694 struct pci_dev *pdev = to_pci_dev(dev);
696 if (pdev->vendor == PCI_VENDOR_ID_INTEL &&
697 pdev->device == PCI_DEVICE_ID_INTEL_IOAT_SNB &&
698 quirk_ioat_snb_local_iommu(pdev))
705 struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
707 struct dmar_drhd_unit *drhd = NULL;
708 struct pci_dev *pdev = NULL;
709 struct intel_iommu *iommu;
717 if (dev_is_pci(dev)) {
718 struct pci_dev *pf_pdev;
720 pdev = pci_real_dma_dev(to_pci_dev(dev));
722 /* VFs aren't listed in scope tables; we need to look up
723 * the PF instead to find the IOMMU. */
724 pf_pdev = pci_physfn(pdev);
726 segment = pci_domain_nr(pdev->bus);
727 } else if (has_acpi_companion(dev))
728 dev = &ACPI_COMPANION(dev)->dev;
731 for_each_iommu(iommu, drhd) {
732 if (pdev && segment != drhd->segment)
735 for_each_active_dev_scope(drhd->devices,
736 drhd->devices_cnt, i, tmp) {
738 /* For a VF use its original BDF# not that of the PF
739 * which we used for the IOMMU lookup. Strictly speaking
740 * we could do this for all PCI devices; we only need to
741 * get the BDF# from the scope table for ACPI matches. */
742 if (pdev && pdev->is_virtfn)
746 *bus = drhd->devices[i].bus;
747 *devfn = drhd->devices[i].devfn;
752 if (is_downstream_to_pci_bridge(dev, tmp))
756 if (pdev && drhd->include_all) {
759 *bus = pdev->bus->number;
760 *devfn = pdev->devfn;
767 if (iommu_is_dummy(iommu, dev))
775 static void domain_flush_cache(struct dmar_domain *domain,
776 void *addr, int size)
778 if (!domain->iommu_coherency)
779 clflush_cache_range(addr, size);
782 static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
784 struct context_entry *context;
787 spin_lock(&iommu->lock);
788 context = iommu_context_addr(iommu, bus, devfn, 0);
790 ret = context_present(context);
791 spin_unlock(&iommu->lock);
795 static void free_context_table(struct intel_iommu *iommu)
797 struct context_entry *context;
800 if (!iommu->root_entry)
803 for (i = 0; i < ROOT_ENTRY_NR; i++) {
804 context = iommu_context_addr(iommu, i, 0, 0);
806 free_pgtable_page(context);
808 if (!sm_supported(iommu))
811 context = iommu_context_addr(iommu, i, 0x80, 0);
813 free_pgtable_page(context);
816 free_pgtable_page(iommu->root_entry);
817 iommu->root_entry = NULL;
820 #ifdef CONFIG_DMAR_DEBUG
821 static void pgtable_walk(struct intel_iommu *iommu, unsigned long pfn,
822 u8 bus, u8 devfn, struct dma_pte *parent, int level)
828 offset = pfn_level_offset(pfn, level);
829 pte = &parent[offset];
830 if (!pte || (dma_pte_superpage(pte) || !dma_pte_present(pte))) {
831 pr_info("PTE not present at level %d\n", level);
835 pr_info("pte level: %d, pte value: 0x%016llx\n", level, pte->val);
840 parent = phys_to_virt(dma_pte_addr(pte));
845 void dmar_fault_dump_ptes(struct intel_iommu *iommu, u16 source_id,
846 unsigned long long addr, u32 pasid)
848 struct pasid_dir_entry *dir, *pde;
849 struct pasid_entry *entries, *pte;
850 struct context_entry *ctx_entry;
851 struct root_entry *rt_entry;
852 int i, dir_index, index, level;
853 u8 devfn = source_id & 0xff;
854 u8 bus = source_id >> 8;
855 struct dma_pte *pgtable;
857 pr_info("Dump %s table entries for IOVA 0x%llx\n", iommu->name, addr);
859 /* root entry dump */
860 rt_entry = &iommu->root_entry[bus];
862 pr_info("root table entry is not present\n");
866 if (sm_supported(iommu))
867 pr_info("scalable mode root entry: hi 0x%016llx, low 0x%016llx\n",
868 rt_entry->hi, rt_entry->lo);
870 pr_info("root entry: 0x%016llx", rt_entry->lo);
872 /* context entry dump */
873 ctx_entry = iommu_context_addr(iommu, bus, devfn, 0);
875 pr_info("context table entry is not present\n");
879 pr_info("context entry: hi 0x%016llx, low 0x%016llx\n",
880 ctx_entry->hi, ctx_entry->lo);
882 /* legacy mode does not require PASID entries */
883 if (!sm_supported(iommu)) {
884 level = agaw_to_level(ctx_entry->hi & 7);
885 pgtable = phys_to_virt(ctx_entry->lo & VTD_PAGE_MASK);
889 /* get the pointer to pasid directory entry */
890 dir = phys_to_virt(ctx_entry->lo & VTD_PAGE_MASK);
892 pr_info("pasid directory entry is not present\n");
895 /* For request-without-pasid, get the pasid from context entry */
896 if (intel_iommu_sm && pasid == INVALID_IOASID)
897 pasid = PASID_RID2PASID;
899 dir_index = pasid >> PASID_PDE_SHIFT;
900 pde = &dir[dir_index];
901 pr_info("pasid dir entry: 0x%016llx\n", pde->val);
903 /* get the pointer to the pasid table entry */
904 entries = get_pasid_table_from_pde(pde);
906 pr_info("pasid table entry is not present\n");
909 index = pasid & PASID_PTE_MASK;
910 pte = &entries[index];
911 for (i = 0; i < ARRAY_SIZE(pte->val); i++)
912 pr_info("pasid table entry[%d]: 0x%016llx\n", i, pte->val[i]);
914 if (pasid_pte_get_pgtt(pte) == PASID_ENTRY_PGTT_FL_ONLY) {
915 level = pte->val[2] & BIT_ULL(2) ? 5 : 4;
916 pgtable = phys_to_virt(pte->val[2] & VTD_PAGE_MASK);
918 level = agaw_to_level((pte->val[0] >> 2) & 0x7);
919 pgtable = phys_to_virt(pte->val[0] & VTD_PAGE_MASK);
923 pgtable_walk(iommu, addr >> VTD_PAGE_SHIFT, bus, devfn, pgtable, level);
927 static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
928 unsigned long pfn, int *target_level)
930 struct dma_pte *parent, *pte;
931 int level = agaw_to_level(domain->agaw);
934 BUG_ON(!domain->pgd);
936 if (!domain_pfn_supported(domain, pfn))
937 /* Address beyond IOMMU's addressing capabilities. */
940 parent = domain->pgd;
945 offset = pfn_level_offset(pfn, level);
946 pte = &parent[offset];
947 if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
949 if (level == *target_level)
952 if (!dma_pte_present(pte)) {
955 tmp_page = alloc_pgtable_page(domain->nid);
960 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
961 pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
962 if (domain_use_first_level(domain)) {
963 pteval |= DMA_FL_PTE_XD | DMA_FL_PTE_US;
964 if (iommu_is_dma_domain(&domain->domain))
965 pteval |= DMA_FL_PTE_ACCESS;
967 if (cmpxchg64(&pte->val, 0ULL, pteval))
968 /* Someone else set it while we were thinking; use theirs. */
969 free_pgtable_page(tmp_page);
971 domain_flush_cache(domain, pte, sizeof(*pte));
976 parent = phys_to_virt(dma_pte_addr(pte));
981 *target_level = level;
986 /* return address's pte at specific level */
987 static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
989 int level, int *large_page)
991 struct dma_pte *parent, *pte;
992 int total = agaw_to_level(domain->agaw);
995 parent = domain->pgd;
996 while (level <= total) {
997 offset = pfn_level_offset(pfn, total);
998 pte = &parent[offset];
1002 if (!dma_pte_present(pte)) {
1003 *large_page = total;
1007 if (dma_pte_superpage(pte)) {
1008 *large_page = total;
1012 parent = phys_to_virt(dma_pte_addr(pte));
1018 /* clear last level pte, a tlb flush should be followed */
1019 static void dma_pte_clear_range(struct dmar_domain *domain,
1020 unsigned long start_pfn,
1021 unsigned long last_pfn)
1023 unsigned int large_page;
1024 struct dma_pte *first_pte, *pte;
1026 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1027 BUG_ON(!domain_pfn_supported(domain, last_pfn));
1028 BUG_ON(start_pfn > last_pfn);
1030 /* we don't need lock here; nobody else touches the iova range */
1033 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
1035 start_pfn = align_to_level(start_pfn + 1, large_page + 1);
1040 start_pfn += lvl_to_nr_pages(large_page);
1042 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
1044 domain_flush_cache(domain, first_pte,
1045 (void *)pte - (void *)first_pte);
1047 } while (start_pfn && start_pfn <= last_pfn);
1050 static void dma_pte_free_level(struct dmar_domain *domain, int level,
1051 int retain_level, struct dma_pte *pte,
1052 unsigned long pfn, unsigned long start_pfn,
1053 unsigned long last_pfn)
1055 pfn = max(start_pfn, pfn);
1056 pte = &pte[pfn_level_offset(pfn, level)];
1059 unsigned long level_pfn;
1060 struct dma_pte *level_pte;
1062 if (!dma_pte_present(pte) || dma_pte_superpage(pte))
1065 level_pfn = pfn & level_mask(level);
1066 level_pte = phys_to_virt(dma_pte_addr(pte));
1069 dma_pte_free_level(domain, level - 1, retain_level,
1070 level_pte, level_pfn, start_pfn,
1075 * Free the page table if we're below the level we want to
1076 * retain and the range covers the entire table.
1078 if (level < retain_level && !(start_pfn > level_pfn ||
1079 last_pfn < level_pfn + level_size(level) - 1)) {
1081 domain_flush_cache(domain, pte, sizeof(*pte));
1082 free_pgtable_page(level_pte);
1085 pfn += level_size(level);
1086 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1090 * clear last level (leaf) ptes and free page table pages below the
1091 * level we wish to keep intact.
1093 static void dma_pte_free_pagetable(struct dmar_domain *domain,
1094 unsigned long start_pfn,
1095 unsigned long last_pfn,
1098 dma_pte_clear_range(domain, start_pfn, last_pfn);
1100 /* We don't need lock here; nobody else touches the iova range */
1101 dma_pte_free_level(domain, agaw_to_level(domain->agaw), retain_level,
1102 domain->pgd, 0, start_pfn, last_pfn);
1105 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1106 free_pgtable_page(domain->pgd);
1111 /* When a page at a given level is being unlinked from its parent, we don't
1112 need to *modify* it at all. All we need to do is make a list of all the
1113 pages which can be freed just as soon as we've flushed the IOTLB and we
1114 know the hardware page-walk will no longer touch them.
1115 The 'pte' argument is the *parent* PTE, pointing to the page that is to
1117 static void dma_pte_list_pagetables(struct dmar_domain *domain,
1118 int level, struct dma_pte *pte,
1119 struct list_head *freelist)
1123 pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
1124 list_add_tail(&pg->lru, freelist);
1129 pte = page_address(pg);
1131 if (dma_pte_present(pte) && !dma_pte_superpage(pte))
1132 dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1134 } while (!first_pte_in_page(pte));
1137 static void dma_pte_clear_level(struct dmar_domain *domain, int level,
1138 struct dma_pte *pte, unsigned long pfn,
1139 unsigned long start_pfn, unsigned long last_pfn,
1140 struct list_head *freelist)
1142 struct dma_pte *first_pte = NULL, *last_pte = NULL;
1144 pfn = max(start_pfn, pfn);
1145 pte = &pte[pfn_level_offset(pfn, level)];
1148 unsigned long level_pfn = pfn & level_mask(level);
1150 if (!dma_pte_present(pte))
1153 /* If range covers entire pagetable, free it */
1154 if (start_pfn <= level_pfn &&
1155 last_pfn >= level_pfn + level_size(level) - 1) {
1156 /* These suborbinate page tables are going away entirely. Don't
1157 bother to clear them; we're just going to *free* them. */
1158 if (level > 1 && !dma_pte_superpage(pte))
1159 dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1165 } else if (level > 1) {
1166 /* Recurse down into a level that isn't *entirely* obsolete */
1167 dma_pte_clear_level(domain, level - 1,
1168 phys_to_virt(dma_pte_addr(pte)),
1169 level_pfn, start_pfn, last_pfn,
1173 pfn = level_pfn + level_size(level);
1174 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1177 domain_flush_cache(domain, first_pte,
1178 (void *)++last_pte - (void *)first_pte);
1181 /* We can't just free the pages because the IOMMU may still be walking
1182 the page tables, and may have cached the intermediate levels. The
1183 pages can only be freed after the IOTLB flush has been done. */
1184 static void domain_unmap(struct dmar_domain *domain, unsigned long start_pfn,
1185 unsigned long last_pfn, struct list_head *freelist)
1187 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1188 BUG_ON(!domain_pfn_supported(domain, last_pfn));
1189 BUG_ON(start_pfn > last_pfn);
1191 /* we don't need lock here; nobody else touches the iova range */
1192 dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
1193 domain->pgd, 0, start_pfn, last_pfn, freelist);
1196 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1197 struct page *pgd_page = virt_to_page(domain->pgd);
1198 list_add_tail(&pgd_page->lru, freelist);
1203 /* iommu handling */
1204 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
1206 struct root_entry *root;
1208 root = (struct root_entry *)alloc_pgtable_page(iommu->node);
1210 pr_err("Allocating root entry for %s failed\n",
1215 __iommu_flush_cache(iommu, root, ROOT_SIZE);
1216 iommu->root_entry = root;
1221 static void iommu_set_root_entry(struct intel_iommu *iommu)
1227 addr = virt_to_phys(iommu->root_entry);
1228 if (sm_supported(iommu))
1229 addr |= DMA_RTADDR_SMT;
1231 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1232 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr);
1234 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
1236 /* Make sure hardware complete it */
1237 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1238 readl, (sts & DMA_GSTS_RTPS), sts);
1240 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1243 * Hardware invalidates all DMA remapping hardware translation
1244 * caches as part of SRTP flow.
1246 if (cap_esrtps(iommu->cap))
1249 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
1250 if (sm_supported(iommu))
1251 qi_flush_pasid_cache(iommu, 0, QI_PC_GLOBAL, 0);
1252 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
1255 void iommu_flush_write_buffer(struct intel_iommu *iommu)
1260 if (!rwbf_quirk && !cap_rwbf(iommu->cap))
1263 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1264 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
1266 /* Make sure hardware complete it */
1267 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1268 readl, (!(val & DMA_GSTS_WBFS)), val);
1270 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1273 /* return value determine if we need a write buffer flush */
1274 static void __iommu_flush_context(struct intel_iommu *iommu,
1275 u16 did, u16 source_id, u8 function_mask,
1282 case DMA_CCMD_GLOBAL_INVL:
1283 val = DMA_CCMD_GLOBAL_INVL;
1285 case DMA_CCMD_DOMAIN_INVL:
1286 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1288 case DMA_CCMD_DEVICE_INVL:
1289 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1290 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1295 val |= DMA_CCMD_ICC;
1297 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1298 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1300 /* Make sure hardware complete it */
1301 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1302 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1304 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1307 /* return value determine if we need a write buffer flush */
1308 static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1309 u64 addr, unsigned int size_order, u64 type)
1311 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1312 u64 val = 0, val_iva = 0;
1316 case DMA_TLB_GLOBAL_FLUSH:
1317 /* global flush doesn't need set IVA_REG */
1318 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1320 case DMA_TLB_DSI_FLUSH:
1321 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1323 case DMA_TLB_PSI_FLUSH:
1324 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1325 /* IH bit is passed in as part of address */
1326 val_iva = size_order | addr;
1331 /* Note: set drain read/write */
1334 * This is probably to be super secure.. Looks like we can
1335 * ignore it without any impact.
1337 if (cap_read_drain(iommu->cap))
1338 val |= DMA_TLB_READ_DRAIN;
1340 if (cap_write_drain(iommu->cap))
1341 val |= DMA_TLB_WRITE_DRAIN;
1343 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1344 /* Note: Only uses first TLB reg currently */
1346 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1347 dmar_writeq(iommu->reg + tlb_offset + 8, val);
1349 /* Make sure hardware complete it */
1350 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1351 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1353 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1355 /* check IOTLB invalidation granularity */
1356 if (DMA_TLB_IAIG(val) == 0)
1357 pr_err("Flush IOTLB failed\n");
1358 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1359 pr_debug("TLB flush request %Lx, actual %Lx\n",
1360 (unsigned long long)DMA_TLB_IIRG(type),
1361 (unsigned long long)DMA_TLB_IAIG(val));
1364 static struct device_domain_info *
1365 domain_lookup_dev_info(struct dmar_domain *domain,
1366 struct intel_iommu *iommu, u8 bus, u8 devfn)
1368 struct device_domain_info *info;
1369 unsigned long flags;
1371 spin_lock_irqsave(&domain->lock, flags);
1372 list_for_each_entry(info, &domain->devices, link) {
1373 if (info->iommu == iommu && info->bus == bus &&
1374 info->devfn == devfn) {
1375 spin_unlock_irqrestore(&domain->lock, flags);
1379 spin_unlock_irqrestore(&domain->lock, flags);
1384 static void domain_update_iotlb(struct dmar_domain *domain)
1386 struct device_domain_info *info;
1387 bool has_iotlb_device = false;
1388 unsigned long flags;
1390 spin_lock_irqsave(&domain->lock, flags);
1391 list_for_each_entry(info, &domain->devices, link) {
1392 if (info->ats_enabled) {
1393 has_iotlb_device = true;
1397 domain->has_iotlb_device = has_iotlb_device;
1398 spin_unlock_irqrestore(&domain->lock, flags);
1401 static void iommu_enable_pci_caps(struct device_domain_info *info)
1403 struct pci_dev *pdev;
1405 if (!info || !dev_is_pci(info->dev))
1408 pdev = to_pci_dev(info->dev);
1409 /* For IOMMU that supports device IOTLB throttling (DIT), we assign
1410 * PFSID to the invalidation desc of a VF such that IOMMU HW can gauge
1411 * queue depth at PF level. If DIT is not set, PFSID will be treated as
1412 * reserved, which should be set to 0.
1414 if (!ecap_dit(info->iommu->ecap))
1417 struct pci_dev *pf_pdev;
1419 /* pdev will be returned if device is not a vf */
1420 pf_pdev = pci_physfn(pdev);
1421 info->pfsid = pci_dev_id(pf_pdev);
1424 /* The PCIe spec, in its wisdom, declares that the behaviour of
1425 the device if you enable PASID support after ATS support is
1426 undefined. So always enable PASID support on devices which
1427 have it, even if we can't yet know if we're ever going to
1429 if (info->pasid_supported && !pci_enable_pasid(pdev, info->pasid_supported & ~1))
1430 info->pasid_enabled = 1;
1432 if (info->pri_supported &&
1433 (info->pasid_enabled ? pci_prg_resp_pasid_required(pdev) : 1) &&
1434 !pci_reset_pri(pdev) && !pci_enable_pri(pdev, PRQ_DEPTH))
1435 info->pri_enabled = 1;
1437 if (info->ats_supported && pci_ats_page_aligned(pdev) &&
1438 !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) {
1439 info->ats_enabled = 1;
1440 domain_update_iotlb(info->domain);
1441 info->ats_qdep = pci_ats_queue_depth(pdev);
1445 static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1447 struct pci_dev *pdev;
1449 if (!dev_is_pci(info->dev))
1452 pdev = to_pci_dev(info->dev);
1454 if (info->ats_enabled) {
1455 pci_disable_ats(pdev);
1456 info->ats_enabled = 0;
1457 domain_update_iotlb(info->domain);
1460 if (info->pri_enabled) {
1461 pci_disable_pri(pdev);
1462 info->pri_enabled = 0;
1465 if (info->pasid_enabled) {
1466 pci_disable_pasid(pdev);
1467 info->pasid_enabled = 0;
1471 static void __iommu_flush_dev_iotlb(struct device_domain_info *info,
1472 u64 addr, unsigned int mask)
1476 if (!info || !info->ats_enabled)
1479 sid = info->bus << 8 | info->devfn;
1480 qdep = info->ats_qdep;
1481 qi_flush_dev_iotlb(info->iommu, sid, info->pfsid,
1485 static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1486 u64 addr, unsigned mask)
1488 struct device_domain_info *info;
1489 unsigned long flags;
1491 if (!domain->has_iotlb_device)
1494 spin_lock_irqsave(&domain->lock, flags);
1495 list_for_each_entry(info, &domain->devices, link)
1496 __iommu_flush_dev_iotlb(info, addr, mask);
1497 spin_unlock_irqrestore(&domain->lock, flags);
1500 static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
1501 struct dmar_domain *domain,
1502 unsigned long pfn, unsigned int pages,
1505 unsigned int aligned_pages = __roundup_pow_of_two(pages);
1506 unsigned int mask = ilog2(aligned_pages);
1507 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
1508 u16 did = domain_id_iommu(domain, iommu);
1515 if (domain_use_first_level(domain)) {
1516 qi_flush_piotlb(iommu, did, PASID_RID2PASID, addr, pages, ih);
1518 unsigned long bitmask = aligned_pages - 1;
1521 * PSI masks the low order bits of the base address. If the
1522 * address isn't aligned to the mask, then compute a mask value
1523 * needed to ensure the target range is flushed.
1525 if (unlikely(bitmask & pfn)) {
1526 unsigned long end_pfn = pfn + pages - 1, shared_bits;
1529 * Since end_pfn <= pfn + bitmask, the only way bits
1530 * higher than bitmask can differ in pfn and end_pfn is
1531 * by carrying. This means after masking out bitmask,
1532 * high bits starting with the first set bit in
1533 * shared_bits are all equal in both pfn and end_pfn.
1535 shared_bits = ~(pfn ^ end_pfn) & ~bitmask;
1536 mask = shared_bits ? __ffs(shared_bits) : BITS_PER_LONG;
1540 * Fallback to domain selective flush if no PSI support or
1541 * the size is too big.
1543 if (!cap_pgsel_inv(iommu->cap) ||
1544 mask > cap_max_amask_val(iommu->cap))
1545 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1548 iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
1553 * In caching mode, changes of pages from non-present to present require
1554 * flush. However, device IOTLB doesn't need to be flushed in this case.
1556 if (!cap_caching_mode(iommu->cap) || !map)
1557 iommu_flush_dev_iotlb(domain, addr, mask);
1560 /* Notification for newly created mappings */
1561 static inline void __mapping_notify_one(struct intel_iommu *iommu,
1562 struct dmar_domain *domain,
1563 unsigned long pfn, unsigned int pages)
1566 * It's a non-present to present mapping. Only flush if caching mode
1569 if (cap_caching_mode(iommu->cap) && !domain_use_first_level(domain))
1570 iommu_flush_iotlb_psi(iommu, domain, pfn, pages, 0, 1);
1572 iommu_flush_write_buffer(iommu);
1575 static void intel_flush_iotlb_all(struct iommu_domain *domain)
1577 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
1578 struct iommu_domain_info *info;
1581 xa_for_each(&dmar_domain->iommu_array, idx, info) {
1582 struct intel_iommu *iommu = info->iommu;
1583 u16 did = domain_id_iommu(dmar_domain, iommu);
1585 if (domain_use_first_level(dmar_domain))
1586 qi_flush_piotlb(iommu, did, PASID_RID2PASID, 0, -1, 0);
1588 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1591 if (!cap_caching_mode(iommu->cap))
1592 iommu_flush_dev_iotlb(dmar_domain, 0, MAX_AGAW_PFN_WIDTH);
1596 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1599 unsigned long flags;
1601 if (!cap_plmr(iommu->cap) && !cap_phmr(iommu->cap))
1604 raw_spin_lock_irqsave(&iommu->register_lock, flags);
1605 pmen = readl(iommu->reg + DMAR_PMEN_REG);
1606 pmen &= ~DMA_PMEN_EPM;
1607 writel(pmen, iommu->reg + DMAR_PMEN_REG);
1609 /* wait for the protected region status bit to clear */
1610 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1611 readl, !(pmen & DMA_PMEN_PRS), pmen);
1613 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1616 static void iommu_enable_translation(struct intel_iommu *iommu)
1619 unsigned long flags;
1621 raw_spin_lock_irqsave(&iommu->register_lock, flags);
1622 iommu->gcmd |= DMA_GCMD_TE;
1623 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1625 /* Make sure hardware complete it */
1626 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1627 readl, (sts & DMA_GSTS_TES), sts);
1629 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1632 static void iommu_disable_translation(struct intel_iommu *iommu)
1637 if (iommu_skip_te_disable && iommu->drhd->gfx_dedicated &&
1638 (cap_read_drain(iommu->cap) || cap_write_drain(iommu->cap)))
1641 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1642 iommu->gcmd &= ~DMA_GCMD_TE;
1643 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1645 /* Make sure hardware complete it */
1646 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1647 readl, (!(sts & DMA_GSTS_TES)), sts);
1649 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1652 static int iommu_init_domains(struct intel_iommu *iommu)
1656 ndomains = cap_ndoms(iommu->cap);
1657 pr_debug("%s: Number of Domains supported <%d>\n",
1658 iommu->name, ndomains);
1660 spin_lock_init(&iommu->lock);
1662 iommu->domain_ids = bitmap_zalloc(ndomains, GFP_KERNEL);
1663 if (!iommu->domain_ids)
1667 * If Caching mode is set, then invalid translations are tagged
1668 * with domain-id 0, hence we need to pre-allocate it. We also
1669 * use domain-id 0 as a marker for non-allocated domain-id, so
1670 * make sure it is not used for a real domain.
1672 set_bit(0, iommu->domain_ids);
1675 * Vt-d spec rev3.0 (section 6.2.3.1) requires that each pasid
1676 * entry for first-level or pass-through translation modes should
1677 * be programmed with a domain id different from those used for
1678 * second-level or nested translation. We reserve a domain id for
1681 if (sm_supported(iommu))
1682 set_bit(FLPT_DEFAULT_DID, iommu->domain_ids);
1687 static void disable_dmar_iommu(struct intel_iommu *iommu)
1689 if (!iommu->domain_ids)
1693 * All iommu domains must have been detached from the devices,
1694 * hence there should be no domain IDs in use.
1696 if (WARN_ON(bitmap_weight(iommu->domain_ids, cap_ndoms(iommu->cap))
1697 > NUM_RESERVED_DID))
1700 if (iommu->gcmd & DMA_GCMD_TE)
1701 iommu_disable_translation(iommu);
1704 static void free_dmar_iommu(struct intel_iommu *iommu)
1706 if (iommu->domain_ids) {
1707 bitmap_free(iommu->domain_ids);
1708 iommu->domain_ids = NULL;
1711 if (iommu->copied_tables) {
1712 bitmap_free(iommu->copied_tables);
1713 iommu->copied_tables = NULL;
1716 /* free context mapping */
1717 free_context_table(iommu);
1719 #ifdef CONFIG_INTEL_IOMMU_SVM
1720 if (pasid_supported(iommu)) {
1721 if (ecap_prs(iommu->ecap))
1722 intel_svm_finish_prq(iommu);
1724 if (vccap_pasid(iommu->vccap))
1725 ioasid_unregister_allocator(&iommu->pasid_allocator);
1731 * Check and return whether first level is used by default for
1734 static bool first_level_by_default(unsigned int type)
1736 /* Only SL is available in legacy mode */
1737 if (!scalable_mode_support())
1740 /* Only level (either FL or SL) is available, just use it */
1741 if (intel_cap_flts_sanity() ^ intel_cap_slts_sanity())
1742 return intel_cap_flts_sanity();
1744 /* Both levels are available, decide it based on domain type */
1745 return type != IOMMU_DOMAIN_UNMANAGED;
1748 static struct dmar_domain *alloc_domain(unsigned int type)
1750 struct dmar_domain *domain;
1752 domain = kzalloc(sizeof(*domain), GFP_KERNEL);
1756 domain->nid = NUMA_NO_NODE;
1757 if (first_level_by_default(type))
1758 domain->flags |= DOMAIN_FLAG_USE_FIRST_LEVEL;
1759 domain->has_iotlb_device = false;
1760 INIT_LIST_HEAD(&domain->devices);
1761 spin_lock_init(&domain->lock);
1762 xa_init(&domain->iommu_array);
1767 static int domain_attach_iommu(struct dmar_domain *domain,
1768 struct intel_iommu *iommu)
1770 struct iommu_domain_info *info, *curr;
1771 unsigned long ndomains;
1772 int num, ret = -ENOSPC;
1774 info = kzalloc(sizeof(*info), GFP_KERNEL);
1778 spin_lock(&iommu->lock);
1779 curr = xa_load(&domain->iommu_array, iommu->seq_id);
1782 spin_unlock(&iommu->lock);
1787 ndomains = cap_ndoms(iommu->cap);
1788 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1789 if (num >= ndomains) {
1790 pr_err("%s: No free domain ids\n", iommu->name);
1794 set_bit(num, iommu->domain_ids);
1797 info->iommu = iommu;
1798 curr = xa_cmpxchg(&domain->iommu_array, iommu->seq_id,
1799 NULL, info, GFP_ATOMIC);
1801 ret = xa_err(curr) ? : -EBUSY;
1804 domain_update_iommu_cap(domain);
1806 spin_unlock(&iommu->lock);
1810 clear_bit(info->did, iommu->domain_ids);
1812 spin_unlock(&iommu->lock);
1817 static void domain_detach_iommu(struct dmar_domain *domain,
1818 struct intel_iommu *iommu)
1820 struct iommu_domain_info *info;
1822 spin_lock(&iommu->lock);
1823 info = xa_load(&domain->iommu_array, iommu->seq_id);
1824 if (--info->refcnt == 0) {
1825 clear_bit(info->did, iommu->domain_ids);
1826 xa_erase(&domain->iommu_array, iommu->seq_id);
1827 domain->nid = NUMA_NO_NODE;
1828 domain_update_iommu_cap(domain);
1831 spin_unlock(&iommu->lock);
1834 static inline int guestwidth_to_adjustwidth(int gaw)
1837 int r = (gaw - 12) % 9;
1848 static void domain_exit(struct dmar_domain *domain)
1851 LIST_HEAD(freelist);
1853 domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw), &freelist);
1854 put_pages_list(&freelist);
1857 if (WARN_ON(!list_empty(&domain->devices)))
1864 * Get the PASID directory size for scalable mode context entry.
1865 * Value of X in the PDTS field of a scalable mode context entry
1866 * indicates PASID directory with 2^(X + 7) entries.
1868 static inline unsigned long context_get_sm_pds(struct pasid_table *table)
1870 unsigned long pds, max_pde;
1872 max_pde = table->max_pasid >> PASID_PDE_SHIFT;
1873 pds = find_first_bit(&max_pde, MAX_NR_PASID_BITS);
1881 * Set the RID_PASID field of a scalable mode context entry. The
1882 * IOMMU hardware will use the PASID value set in this field for
1883 * DMA translations of DMA requests without PASID.
1886 context_set_sm_rid2pasid(struct context_entry *context, unsigned long pasid)
1888 context->hi |= pasid & ((1 << 20) - 1);
1892 * Set the DTE(Device-TLB Enable) field of a scalable mode context
1895 static inline void context_set_sm_dte(struct context_entry *context)
1897 context->lo |= (1 << 2);
1901 * Set the PRE(Page Request Enable) field of a scalable mode context
1904 static inline void context_set_sm_pre(struct context_entry *context)
1906 context->lo |= (1 << 4);
1909 /* Convert value to context PASID directory size field coding. */
1910 #define context_pdts(pds) (((pds) & 0x7) << 9)
1912 static int domain_context_mapping_one(struct dmar_domain *domain,
1913 struct intel_iommu *iommu,
1914 struct pasid_table *table,
1917 struct device_domain_info *info =
1918 domain_lookup_dev_info(domain, iommu, bus, devfn);
1919 u16 did = domain_id_iommu(domain, iommu);
1920 int translation = CONTEXT_TT_MULTI_LEVEL;
1921 struct context_entry *context;
1926 if (hw_pass_through && domain_type_is_si(domain))
1927 translation = CONTEXT_TT_PASS_THROUGH;
1929 pr_debug("Set context mapping for %02x:%02x.%d\n",
1930 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1932 BUG_ON(!domain->pgd);
1934 spin_lock(&iommu->lock);
1936 context = iommu_context_addr(iommu, bus, devfn, 1);
1941 if (context_present(context) && !context_copied(iommu, bus, devfn))
1945 * For kdump cases, old valid entries may be cached due to the
1946 * in-flight DMA and copied pgtable, but there is no unmapping
1947 * behaviour for them, thus we need an explicit cache flush for
1948 * the newly-mapped device. For kdump, at this point, the device
1949 * is supposed to finish reset at its driver probe stage, so no
1950 * in-flight DMA will exist, and we don't need to worry anymore
1953 if (context_copied(iommu, bus, devfn)) {
1954 u16 did_old = context_domain_id(context);
1956 if (did_old < cap_ndoms(iommu->cap)) {
1957 iommu->flush.flush_context(iommu, did_old,
1958 (((u16)bus) << 8) | devfn,
1959 DMA_CCMD_MASK_NOBIT,
1960 DMA_CCMD_DEVICE_INVL);
1961 iommu->flush.flush_iotlb(iommu, did_old, 0, 0,
1965 clear_context_copied(iommu, bus, devfn);
1968 context_clear_entry(context);
1970 if (sm_supported(iommu)) {
1975 /* Setup the PASID DIR pointer: */
1976 pds = context_get_sm_pds(table);
1977 context->lo = (u64)virt_to_phys(table->table) |
1980 /* Setup the RID_PASID field: */
1981 context_set_sm_rid2pasid(context, PASID_RID2PASID);
1984 * Setup the Device-TLB enable bit and Page request
1987 if (info && info->ats_supported)
1988 context_set_sm_dte(context);
1989 if (info && info->pri_supported)
1990 context_set_sm_pre(context);
1991 if (info && info->pasid_supported)
1992 context_set_pasid(context);
1994 struct dma_pte *pgd = domain->pgd;
1997 context_set_domain_id(context, did);
1999 if (translation != CONTEXT_TT_PASS_THROUGH) {
2001 * Skip top levels of page tables for iommu which has
2002 * less agaw than default. Unnecessary for PT mode.
2004 for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
2006 pgd = phys_to_virt(dma_pte_addr(pgd));
2007 if (!dma_pte_present(pgd))
2011 if (info && info->ats_supported)
2012 translation = CONTEXT_TT_DEV_IOTLB;
2014 translation = CONTEXT_TT_MULTI_LEVEL;
2016 context_set_address_root(context, virt_to_phys(pgd));
2017 context_set_address_width(context, agaw);
2020 * In pass through mode, AW must be programmed to
2021 * indicate the largest AGAW value supported by
2022 * hardware. And ASR is ignored by hardware.
2024 context_set_address_width(context, iommu->msagaw);
2027 context_set_translation_type(context, translation);
2030 context_set_fault_enable(context);
2031 context_set_present(context);
2032 if (!ecap_coherent(iommu->ecap))
2033 clflush_cache_range(context, sizeof(*context));
2036 * It's a non-present to present mapping. If hardware doesn't cache
2037 * non-present entry we only need to flush the write-buffer. If the
2038 * _does_ cache non-present entries, then it does so in the special
2039 * domain #0, which we have to flush:
2041 if (cap_caching_mode(iommu->cap)) {
2042 iommu->flush.flush_context(iommu, 0,
2043 (((u16)bus) << 8) | devfn,
2044 DMA_CCMD_MASK_NOBIT,
2045 DMA_CCMD_DEVICE_INVL);
2046 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
2048 iommu_flush_write_buffer(iommu);
2050 iommu_enable_pci_caps(info);
2055 spin_unlock(&iommu->lock);
2060 struct domain_context_mapping_data {
2061 struct dmar_domain *domain;
2062 struct intel_iommu *iommu;
2063 struct pasid_table *table;
2066 static int domain_context_mapping_cb(struct pci_dev *pdev,
2067 u16 alias, void *opaque)
2069 struct domain_context_mapping_data *data = opaque;
2071 return domain_context_mapping_one(data->domain, data->iommu,
2072 data->table, PCI_BUS_NUM(alias),
2077 domain_context_mapping(struct dmar_domain *domain, struct device *dev)
2079 struct domain_context_mapping_data data;
2080 struct pasid_table *table;
2081 struct intel_iommu *iommu;
2084 iommu = device_to_iommu(dev, &bus, &devfn);
2088 table = intel_pasid_get_table(dev);
2090 if (!dev_is_pci(dev))
2091 return domain_context_mapping_one(domain, iommu, table,
2094 data.domain = domain;
2098 return pci_for_each_dma_alias(to_pci_dev(dev),
2099 &domain_context_mapping_cb, &data);
2102 static int domain_context_mapped_cb(struct pci_dev *pdev,
2103 u16 alias, void *opaque)
2105 struct intel_iommu *iommu = opaque;
2107 return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff);
2110 static int domain_context_mapped(struct device *dev)
2112 struct intel_iommu *iommu;
2115 iommu = device_to_iommu(dev, &bus, &devfn);
2119 if (!dev_is_pci(dev))
2120 return device_context_mapped(iommu, bus, devfn);
2122 return !pci_for_each_dma_alias(to_pci_dev(dev),
2123 domain_context_mapped_cb, iommu);
2126 /* Returns a number of VTD pages, but aligned to MM page size */
2127 static inline unsigned long aligned_nrpages(unsigned long host_addr,
2130 host_addr &= ~PAGE_MASK;
2131 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
2134 /* Return largest possible superpage level for a given mapping */
2135 static inline int hardware_largepage_caps(struct dmar_domain *domain,
2136 unsigned long iov_pfn,
2137 unsigned long phy_pfn,
2138 unsigned long pages)
2140 int support, level = 1;
2141 unsigned long pfnmerge;
2143 support = domain->iommu_superpage;
2145 /* To use a large page, the virtual *and* physical addresses
2146 must be aligned to 2MiB/1GiB/etc. Lower bits set in either
2147 of them will mean we have to use smaller pages. So just
2148 merge them and check both at once. */
2149 pfnmerge = iov_pfn | phy_pfn;
2151 while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
2152 pages >>= VTD_STRIDE_SHIFT;
2155 pfnmerge >>= VTD_STRIDE_SHIFT;
2163 * Ensure that old small page tables are removed to make room for superpage(s).
2164 * We're going to add new large pages, so make sure we don't remove their parent
2165 * tables. The IOTLB/devTLBs should be flushed if any PDE/PTEs are cleared.
2167 static void switch_to_super_page(struct dmar_domain *domain,
2168 unsigned long start_pfn,
2169 unsigned long end_pfn, int level)
2171 unsigned long lvl_pages = lvl_to_nr_pages(level);
2172 struct iommu_domain_info *info;
2173 struct dma_pte *pte = NULL;
2176 while (start_pfn <= end_pfn) {
2178 pte = pfn_to_dma_pte(domain, start_pfn, &level);
2180 if (dma_pte_present(pte)) {
2181 dma_pte_free_pagetable(domain, start_pfn,
2182 start_pfn + lvl_pages - 1,
2185 xa_for_each(&domain->iommu_array, i, info)
2186 iommu_flush_iotlb_psi(info->iommu, domain,
2187 start_pfn, lvl_pages,
2192 start_pfn += lvl_pages;
2193 if (first_pte_in_page(pte))
2199 __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2200 unsigned long phys_pfn, unsigned long nr_pages, int prot)
2202 struct dma_pte *first_pte = NULL, *pte = NULL;
2203 unsigned int largepage_lvl = 0;
2204 unsigned long lvl_pages = 0;
2208 BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
2210 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
2213 attr = prot & (DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP);
2214 attr |= DMA_FL_PTE_PRESENT;
2215 if (domain_use_first_level(domain)) {
2216 attr |= DMA_FL_PTE_XD | DMA_FL_PTE_US | DMA_FL_PTE_ACCESS;
2217 if (prot & DMA_PTE_WRITE)
2218 attr |= DMA_FL_PTE_DIRTY;
2221 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | attr;
2223 while (nr_pages > 0) {
2227 largepage_lvl = hardware_largepage_caps(domain, iov_pfn,
2228 phys_pfn, nr_pages);
2230 pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
2235 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2237 /* It is large page*/
2238 if (largepage_lvl > 1) {
2239 unsigned long end_pfn;
2240 unsigned long pages_to_remove;
2242 pteval |= DMA_PTE_LARGE_PAGE;
2243 pages_to_remove = min_t(unsigned long, nr_pages,
2244 nr_pte_to_next_page(pte) * lvl_pages);
2245 end_pfn = iov_pfn + pages_to_remove - 1;
2246 switch_to_super_page(domain, iov_pfn, end_pfn, largepage_lvl);
2248 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
2252 /* We don't need lock here, nobody else
2253 * touches the iova range
2255 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
2257 static int dumps = 5;
2258 pr_crit("ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
2259 iov_pfn, tmp, (unsigned long long)pteval);
2262 debug_dma_dump_mappings(NULL);
2267 nr_pages -= lvl_pages;
2268 iov_pfn += lvl_pages;
2269 phys_pfn += lvl_pages;
2270 pteval += lvl_pages * VTD_PAGE_SIZE;
2272 /* If the next PTE would be the first in a new page, then we
2273 * need to flush the cache on the entries we've just written.
2274 * And then we'll need to recalculate 'pte', so clear it and
2275 * let it get set again in the if (!pte) block above.
2277 * If we're done (!nr_pages) we need to flush the cache too.
2279 * Also if we've been setting superpages, we may need to
2280 * recalculate 'pte' and switch back to smaller pages for the
2281 * end of the mapping, if the trailing size is not enough to
2282 * use another superpage (i.e. nr_pages < lvl_pages).
2285 if (!nr_pages || first_pte_in_page(pte) ||
2286 (largepage_lvl > 1 && nr_pages < lvl_pages)) {
2287 domain_flush_cache(domain, first_pte,
2288 (void *)pte - (void *)first_pte);
2296 static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8 devfn)
2298 struct intel_iommu *iommu = info->iommu;
2299 struct context_entry *context;
2305 spin_lock(&iommu->lock);
2306 context = iommu_context_addr(iommu, bus, devfn, 0);
2308 spin_unlock(&iommu->lock);
2312 if (sm_supported(iommu)) {
2313 if (hw_pass_through && domain_type_is_si(info->domain))
2314 did_old = FLPT_DEFAULT_DID;
2316 did_old = domain_id_iommu(info->domain, iommu);
2318 did_old = context_domain_id(context);
2321 context_clear_entry(context);
2322 __iommu_flush_cache(iommu, context, sizeof(*context));
2323 spin_unlock(&iommu->lock);
2324 iommu->flush.flush_context(iommu,
2326 (((u16)bus) << 8) | devfn,
2327 DMA_CCMD_MASK_NOBIT,
2328 DMA_CCMD_DEVICE_INVL);
2330 if (sm_supported(iommu))
2331 qi_flush_pasid_cache(iommu, did_old, QI_PC_ALL_PASIDS, 0);
2333 iommu->flush.flush_iotlb(iommu,
2339 __iommu_flush_dev_iotlb(info, 0, MAX_AGAW_PFN_WIDTH);
2342 static int domain_setup_first_level(struct intel_iommu *iommu,
2343 struct dmar_domain *domain,
2347 struct dma_pte *pgd = domain->pgd;
2352 * Skip top levels of page tables for iommu which has
2353 * less agaw than default. Unnecessary for PT mode.
2355 for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
2356 pgd = phys_to_virt(dma_pte_addr(pgd));
2357 if (!dma_pte_present(pgd))
2361 level = agaw_to_level(agaw);
2362 if (level != 4 && level != 5)
2365 if (pasid != PASID_RID2PASID)
2366 flags |= PASID_FLAG_SUPERVISOR_MODE;
2368 flags |= PASID_FLAG_FL5LP;
2370 if (domain->force_snooping)
2371 flags |= PASID_FLAG_PAGE_SNOOP;
2373 return intel_pasid_setup_first_level(iommu, dev, (pgd_t *)pgd, pasid,
2374 domain_id_iommu(domain, iommu),
2378 static bool dev_is_real_dma_subdevice(struct device *dev)
2380 return dev && dev_is_pci(dev) &&
2381 pci_real_dma_dev(to_pci_dev(dev)) != to_pci_dev(dev);
2384 static int iommu_domain_identity_map(struct dmar_domain *domain,
2385 unsigned long first_vpfn,
2386 unsigned long last_vpfn)
2389 * RMRR range might have overlap with physical memory range,
2392 dma_pte_clear_range(domain, first_vpfn, last_vpfn);
2394 return __domain_mapping(domain, first_vpfn,
2395 first_vpfn, last_vpfn - first_vpfn + 1,
2396 DMA_PTE_READ|DMA_PTE_WRITE);
2399 static int md_domain_init(struct dmar_domain *domain, int guest_width);
2401 static int __init si_domain_init(int hw)
2403 struct dmar_rmrr_unit *rmrr;
2407 si_domain = alloc_domain(IOMMU_DOMAIN_IDENTITY);
2411 if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2412 domain_exit(si_domain);
2419 for_each_online_node(nid) {
2420 unsigned long start_pfn, end_pfn;
2423 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2424 ret = iommu_domain_identity_map(si_domain,
2425 mm_to_dma_pfn(start_pfn),
2426 mm_to_dma_pfn(end_pfn));
2433 * Identity map the RMRRs so that devices with RMRRs could also use
2436 for_each_rmrr_units(rmrr) {
2437 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
2439 unsigned long long start = rmrr->base_address;
2440 unsigned long long end = rmrr->end_address;
2442 if (WARN_ON(end < start ||
2443 end >> agaw_to_width(si_domain->agaw)))
2446 ret = iommu_domain_identity_map(si_domain,
2447 mm_to_dma_pfn(start >> PAGE_SHIFT),
2448 mm_to_dma_pfn(end >> PAGE_SHIFT));
2457 static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev)
2459 struct device_domain_info *info = dev_iommu_priv_get(dev);
2460 struct intel_iommu *iommu;
2461 unsigned long flags;
2465 iommu = device_to_iommu(dev, &bus, &devfn);
2469 ret = domain_attach_iommu(domain, iommu);
2472 info->domain = domain;
2473 spin_lock_irqsave(&domain->lock, flags);
2474 list_add(&info->link, &domain->devices);
2475 spin_unlock_irqrestore(&domain->lock, flags);
2477 /* PASID table is mandatory for a PCI device in scalable mode. */
2478 if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) {
2479 ret = intel_pasid_alloc_table(dev);
2481 dev_err(dev, "PASID table allocation failed\n");
2482 dmar_remove_one_dev_info(dev);
2486 /* Setup the PASID entry for requests without PASID: */
2487 if (hw_pass_through && domain_type_is_si(domain))
2488 ret = intel_pasid_setup_pass_through(iommu, domain,
2489 dev, PASID_RID2PASID);
2490 else if (domain_use_first_level(domain))
2491 ret = domain_setup_first_level(iommu, domain, dev,
2494 ret = intel_pasid_setup_second_level(iommu, domain,
2495 dev, PASID_RID2PASID);
2497 dev_err(dev, "Setup RID2PASID failed\n");
2498 dmar_remove_one_dev_info(dev);
2503 ret = domain_context_mapping(domain, dev);
2505 dev_err(dev, "Domain context map failed\n");
2506 dmar_remove_one_dev_info(dev);
2513 static bool device_has_rmrr(struct device *dev)
2515 struct dmar_rmrr_unit *rmrr;
2520 for_each_rmrr_units(rmrr) {
2522 * Return TRUE if this RMRR contains the device that
2525 for_each_active_dev_scope(rmrr->devices,
2526 rmrr->devices_cnt, i, tmp)
2528 is_downstream_to_pci_bridge(dev, tmp)) {
2538 * device_rmrr_is_relaxable - Test whether the RMRR of this device
2539 * is relaxable (ie. is allowed to be not enforced under some conditions)
2540 * @dev: device handle
2542 * We assume that PCI USB devices with RMRRs have them largely
2543 * for historical reasons and that the RMRR space is not actively used post
2544 * boot. This exclusion may change if vendors begin to abuse it.
2546 * The same exception is made for graphics devices, with the requirement that
2547 * any use of the RMRR regions will be torn down before assigning the device
2550 * Return: true if the RMRR is relaxable, false otherwise
2552 static bool device_rmrr_is_relaxable(struct device *dev)
2554 struct pci_dev *pdev;
2556 if (!dev_is_pci(dev))
2559 pdev = to_pci_dev(dev);
2560 if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev))
2567 * There are a couple cases where we need to restrict the functionality of
2568 * devices associated with RMRRs. The first is when evaluating a device for
2569 * identity mapping because problems exist when devices are moved in and out
2570 * of domains and their respective RMRR information is lost. This means that
2571 * a device with associated RMRRs will never be in a "passthrough" domain.
2572 * The second is use of the device through the IOMMU API. This interface
2573 * expects to have full control of the IOVA space for the device. We cannot
2574 * satisfy both the requirement that RMRR access is maintained and have an
2575 * unencumbered IOVA space. We also have no ability to quiesce the device's
2576 * use of the RMRR space or even inform the IOMMU API user of the restriction.
2577 * We therefore prevent devices associated with an RMRR from participating in
2578 * the IOMMU API, which eliminates them from device assignment.
2580 * In both cases, devices which have relaxable RMRRs are not concerned by this
2581 * restriction. See device_rmrr_is_relaxable comment.
2583 static bool device_is_rmrr_locked(struct device *dev)
2585 if (!device_has_rmrr(dev))
2588 if (device_rmrr_is_relaxable(dev))
2595 * Return the required default domain type for a specific device.
2597 * @dev: the device in query
2598 * @startup: true if this is during early boot
2601 * - IOMMU_DOMAIN_DMA: device requires a dynamic mapping domain
2602 * - IOMMU_DOMAIN_IDENTITY: device requires an identical mapping domain
2603 * - 0: both identity and dynamic domains work for this device
2605 static int device_def_domain_type(struct device *dev)
2607 if (dev_is_pci(dev)) {
2608 struct pci_dev *pdev = to_pci_dev(dev);
2610 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2611 return IOMMU_DOMAIN_IDENTITY;
2613 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2614 return IOMMU_DOMAIN_IDENTITY;
2620 static void intel_iommu_init_qi(struct intel_iommu *iommu)
2623 * Start from the sane iommu hardware state.
2624 * If the queued invalidation is already initialized by us
2625 * (for example, while enabling interrupt-remapping) then
2626 * we got the things already rolling from a sane state.
2630 * Clear any previous faults.
2632 dmar_fault(-1, iommu);
2634 * Disable queued invalidation if supported and already enabled
2635 * before OS handover.
2637 dmar_disable_qi(iommu);
2640 if (dmar_enable_qi(iommu)) {
2642 * Queued Invalidate not enabled, use Register Based Invalidate
2644 iommu->flush.flush_context = __iommu_flush_context;
2645 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
2646 pr_info("%s: Using Register based invalidation\n",
2649 iommu->flush.flush_context = qi_flush_context;
2650 iommu->flush.flush_iotlb = qi_flush_iotlb;
2651 pr_info("%s: Using Queued invalidation\n", iommu->name);
2655 static int copy_context_table(struct intel_iommu *iommu,
2656 struct root_entry *old_re,
2657 struct context_entry **tbl,
2660 int tbl_idx, pos = 0, idx, devfn, ret = 0, did;
2661 struct context_entry *new_ce = NULL, ce;
2662 struct context_entry *old_ce = NULL;
2663 struct root_entry re;
2664 phys_addr_t old_ce_phys;
2666 tbl_idx = ext ? bus * 2 : bus;
2667 memcpy(&re, old_re, sizeof(re));
2669 for (devfn = 0; devfn < 256; devfn++) {
2670 /* First calculate the correct index */
2671 idx = (ext ? devfn * 2 : devfn) % 256;
2674 /* First save what we may have and clean up */
2676 tbl[tbl_idx] = new_ce;
2677 __iommu_flush_cache(iommu, new_ce,
2687 old_ce_phys = root_entry_lctp(&re);
2689 old_ce_phys = root_entry_uctp(&re);
2692 if (ext && devfn == 0) {
2693 /* No LCTP, try UCTP */
2702 old_ce = memremap(old_ce_phys, PAGE_SIZE,
2707 new_ce = alloc_pgtable_page(iommu->node);
2714 /* Now copy the context entry */
2715 memcpy(&ce, old_ce + idx, sizeof(ce));
2717 if (!context_present(&ce))
2720 did = context_domain_id(&ce);
2721 if (did >= 0 && did < cap_ndoms(iommu->cap))
2722 set_bit(did, iommu->domain_ids);
2724 set_context_copied(iommu, bus, devfn);
2728 tbl[tbl_idx + pos] = new_ce;
2730 __iommu_flush_cache(iommu, new_ce, VTD_PAGE_SIZE);
2739 static int copy_translation_tables(struct intel_iommu *iommu)
2741 struct context_entry **ctxt_tbls;
2742 struct root_entry *old_rt;
2743 phys_addr_t old_rt_phys;
2744 int ctxt_table_entries;
2749 rtaddr_reg = dmar_readq(iommu->reg + DMAR_RTADDR_REG);
2750 ext = !!(rtaddr_reg & DMA_RTADDR_SMT);
2751 new_ext = !!sm_supported(iommu);
2754 * The RTT bit can only be changed when translation is disabled,
2755 * but disabling translation means to open a window for data
2756 * corruption. So bail out and don't copy anything if we would
2757 * have to change the bit.
2762 iommu->copied_tables = bitmap_zalloc(BIT_ULL(16), GFP_KERNEL);
2763 if (!iommu->copied_tables)
2766 old_rt_phys = rtaddr_reg & VTD_PAGE_MASK;
2770 old_rt = memremap(old_rt_phys, PAGE_SIZE, MEMREMAP_WB);
2774 /* This is too big for the stack - allocate it from slab */
2775 ctxt_table_entries = ext ? 512 : 256;
2777 ctxt_tbls = kcalloc(ctxt_table_entries, sizeof(void *), GFP_KERNEL);
2781 for (bus = 0; bus < 256; bus++) {
2782 ret = copy_context_table(iommu, &old_rt[bus],
2783 ctxt_tbls, bus, ext);
2785 pr_err("%s: Failed to copy context table for bus %d\n",
2791 spin_lock(&iommu->lock);
2793 /* Context tables are copied, now write them to the root_entry table */
2794 for (bus = 0; bus < 256; bus++) {
2795 int idx = ext ? bus * 2 : bus;
2798 if (ctxt_tbls[idx]) {
2799 val = virt_to_phys(ctxt_tbls[idx]) | 1;
2800 iommu->root_entry[bus].lo = val;
2803 if (!ext || !ctxt_tbls[idx + 1])
2806 val = virt_to_phys(ctxt_tbls[idx + 1]) | 1;
2807 iommu->root_entry[bus].hi = val;
2810 spin_unlock(&iommu->lock);
2814 __iommu_flush_cache(iommu, iommu->root_entry, PAGE_SIZE);
2824 #ifdef CONFIG_INTEL_IOMMU_SVM
2825 static ioasid_t intel_vcmd_ioasid_alloc(ioasid_t min, ioasid_t max, void *data)
2827 struct intel_iommu *iommu = data;
2831 return INVALID_IOASID;
2833 * VT-d virtual command interface always uses the full 20 bit
2834 * PASID range. Host can partition guest PASID range based on
2835 * policies but it is out of guest's control.
2837 if (min < PASID_MIN || max > intel_pasid_max_id)
2838 return INVALID_IOASID;
2840 if (vcmd_alloc_pasid(iommu, &ioasid))
2841 return INVALID_IOASID;
2846 static void intel_vcmd_ioasid_free(ioasid_t ioasid, void *data)
2848 struct intel_iommu *iommu = data;
2853 * Sanity check the ioasid owner is done at upper layer, e.g. VFIO
2854 * We can only free the PASID when all the devices are unbound.
2856 if (ioasid_find(NULL, ioasid, NULL)) {
2857 pr_alert("Cannot free active IOASID %d\n", ioasid);
2860 vcmd_free_pasid(iommu, ioasid);
2863 static void register_pasid_allocator(struct intel_iommu *iommu)
2866 * If we are running in the host, no need for custom allocator
2867 * in that PASIDs are allocated from the host system-wide.
2869 if (!cap_caching_mode(iommu->cap))
2872 if (!sm_supported(iommu)) {
2873 pr_warn("VT-d Scalable Mode not enabled, no PASID allocation\n");
2878 * Register a custom PASID allocator if we are running in a guest,
2879 * guest PASID must be obtained via virtual command interface.
2880 * There can be multiple vIOMMUs in each guest but only one allocator
2881 * is active. All vIOMMU allocators will eventually be calling the same
2884 if (!vccap_pasid(iommu->vccap))
2887 pr_info("Register custom PASID allocator\n");
2888 iommu->pasid_allocator.alloc = intel_vcmd_ioasid_alloc;
2889 iommu->pasid_allocator.free = intel_vcmd_ioasid_free;
2890 iommu->pasid_allocator.pdata = (void *)iommu;
2891 if (ioasid_register_allocator(&iommu->pasid_allocator)) {
2892 pr_warn("Custom PASID allocator failed, scalable mode disabled\n");
2894 * Disable scalable mode on this IOMMU if there
2895 * is no custom allocator. Mixing SM capable vIOMMU
2896 * and non-SM vIOMMU are not supported.
2903 static int __init init_dmars(void)
2905 struct dmar_drhd_unit *drhd;
2906 struct intel_iommu *iommu;
2909 ret = intel_cap_audit(CAP_AUDIT_STATIC_DMAR, NULL);
2913 for_each_iommu(iommu, drhd) {
2914 if (drhd->ignored) {
2915 iommu_disable_translation(iommu);
2920 * Find the max pasid size of all IOMMU's in the system.
2921 * We need to ensure the system pasid table is no bigger
2922 * than the smallest supported.
2924 if (pasid_supported(iommu)) {
2925 u32 temp = 2 << ecap_pss(iommu->ecap);
2927 intel_pasid_max_id = min_t(u32, temp,
2928 intel_pasid_max_id);
2931 intel_iommu_init_qi(iommu);
2933 ret = iommu_init_domains(iommu);
2937 init_translation_status(iommu);
2939 if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
2940 iommu_disable_translation(iommu);
2941 clear_translation_pre_enabled(iommu);
2942 pr_warn("Translation was enabled for %s but we are not in kdump mode\n",
2948 * we could share the same root & context tables
2949 * among all IOMMU's. Need to Split it later.
2951 ret = iommu_alloc_root_entry(iommu);
2955 if (translation_pre_enabled(iommu)) {
2956 pr_info("Translation already enabled - trying to copy translation structures\n");
2958 ret = copy_translation_tables(iommu);
2961 * We found the IOMMU with translation
2962 * enabled - but failed to copy over the
2963 * old root-entry table. Try to proceed
2964 * by disabling translation now and
2965 * allocating a clean root-entry table.
2966 * This might cause DMAR faults, but
2967 * probably the dump will still succeed.
2969 pr_err("Failed to copy translation tables from previous kernel for %s\n",
2971 iommu_disable_translation(iommu);
2972 clear_translation_pre_enabled(iommu);
2974 pr_info("Copied translation tables from previous kernel for %s\n",
2979 if (!ecap_pass_through(iommu->ecap))
2980 hw_pass_through = 0;
2981 intel_svm_check(iommu);
2985 * Now that qi is enabled on all iommus, set the root entry and flush
2986 * caches. This is required on some Intel X58 chipsets, otherwise the
2987 * flush_context function will loop forever and the boot hangs.
2989 for_each_active_iommu(iommu, drhd) {
2990 iommu_flush_write_buffer(iommu);
2991 #ifdef CONFIG_INTEL_IOMMU_SVM
2992 register_pasid_allocator(iommu);
2994 iommu_set_root_entry(iommu);
2997 #ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
3002 iommu_identity_mapping |= IDENTMAP_GFX;
3004 check_tylersburg_isoch();
3006 ret = si_domain_init(hw_pass_through);
3013 * global invalidate context cache
3014 * global invalidate iotlb
3015 * enable translation
3017 for_each_iommu(iommu, drhd) {
3018 if (drhd->ignored) {
3020 * we always have to disable PMRs or DMA may fail on
3024 iommu_disable_protect_mem_regions(iommu);
3028 iommu_flush_write_buffer(iommu);
3030 #ifdef CONFIG_INTEL_IOMMU_SVM
3031 if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) {
3033 * Call dmar_alloc_hwirq() with dmar_global_lock held,
3034 * could cause possible lock race condition.
3036 up_write(&dmar_global_lock);
3037 ret = intel_svm_enable_prq(iommu);
3038 down_write(&dmar_global_lock);
3043 ret = dmar_set_interrupt(iommu);
3051 for_each_active_iommu(iommu, drhd) {
3052 disable_dmar_iommu(iommu);
3053 free_dmar_iommu(iommu);
3059 static void __init init_no_remapping_devices(void)
3061 struct dmar_drhd_unit *drhd;
3065 for_each_drhd_unit(drhd) {
3066 if (!drhd->include_all) {
3067 for_each_active_dev_scope(drhd->devices,
3068 drhd->devices_cnt, i, dev)
3070 /* ignore DMAR unit if no devices exist */
3071 if (i == drhd->devices_cnt)
3076 for_each_active_drhd_unit(drhd) {
3077 if (drhd->include_all)
3080 for_each_active_dev_scope(drhd->devices,
3081 drhd->devices_cnt, i, dev)
3082 if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
3084 if (i < drhd->devices_cnt)
3087 /* This IOMMU has *only* gfx devices. Either bypass it or
3088 set the gfx_mapped flag, as appropriate */
3089 drhd->gfx_dedicated = 1;
3095 #ifdef CONFIG_SUSPEND
3096 static int init_iommu_hw(void)
3098 struct dmar_drhd_unit *drhd;
3099 struct intel_iommu *iommu = NULL;
3101 for_each_active_iommu(iommu, drhd)
3103 dmar_reenable_qi(iommu);
3105 for_each_iommu(iommu, drhd) {
3106 if (drhd->ignored) {
3108 * we always have to disable PMRs or DMA may fail on
3112 iommu_disable_protect_mem_regions(iommu);
3116 iommu_flush_write_buffer(iommu);
3117 iommu_set_root_entry(iommu);
3118 iommu_enable_translation(iommu);
3119 iommu_disable_protect_mem_regions(iommu);
3125 static void iommu_flush_all(void)
3127 struct dmar_drhd_unit *drhd;
3128 struct intel_iommu *iommu;
3130 for_each_active_iommu(iommu, drhd) {
3131 iommu->flush.flush_context(iommu, 0, 0, 0,
3132 DMA_CCMD_GLOBAL_INVL);
3133 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3134 DMA_TLB_GLOBAL_FLUSH);
3138 static int iommu_suspend(void)
3140 struct dmar_drhd_unit *drhd;
3141 struct intel_iommu *iommu = NULL;
3144 for_each_active_iommu(iommu, drhd) {
3145 iommu->iommu_state = kcalloc(MAX_SR_DMAR_REGS, sizeof(u32),
3147 if (!iommu->iommu_state)
3153 for_each_active_iommu(iommu, drhd) {
3154 iommu_disable_translation(iommu);
3156 raw_spin_lock_irqsave(&iommu->register_lock, flag);
3158 iommu->iommu_state[SR_DMAR_FECTL_REG] =
3159 readl(iommu->reg + DMAR_FECTL_REG);
3160 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
3161 readl(iommu->reg + DMAR_FEDATA_REG);
3162 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
3163 readl(iommu->reg + DMAR_FEADDR_REG);
3164 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
3165 readl(iommu->reg + DMAR_FEUADDR_REG);
3167 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
3172 for_each_active_iommu(iommu, drhd)
3173 kfree(iommu->iommu_state);
3178 static void iommu_resume(void)
3180 struct dmar_drhd_unit *drhd;
3181 struct intel_iommu *iommu = NULL;
3184 if (init_iommu_hw()) {
3186 panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
3188 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
3192 for_each_active_iommu(iommu, drhd) {
3194 raw_spin_lock_irqsave(&iommu->register_lock, flag);
3196 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
3197 iommu->reg + DMAR_FECTL_REG);
3198 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
3199 iommu->reg + DMAR_FEDATA_REG);
3200 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
3201 iommu->reg + DMAR_FEADDR_REG);
3202 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
3203 iommu->reg + DMAR_FEUADDR_REG);
3205 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
3208 for_each_active_iommu(iommu, drhd)
3209 kfree(iommu->iommu_state);
3212 static struct syscore_ops iommu_syscore_ops = {
3213 .resume = iommu_resume,
3214 .suspend = iommu_suspend,
3217 static void __init init_iommu_pm_ops(void)
3219 register_syscore_ops(&iommu_syscore_ops);
3223 static inline void init_iommu_pm_ops(void) {}
3224 #endif /* CONFIG_PM */
3226 static int __init rmrr_sanity_check(struct acpi_dmar_reserved_memory *rmrr)
3228 if (!IS_ALIGNED(rmrr->base_address, PAGE_SIZE) ||
3229 !IS_ALIGNED(rmrr->end_address + 1, PAGE_SIZE) ||
3230 rmrr->end_address <= rmrr->base_address ||
3231 arch_rmrr_sanity_check(rmrr))
3237 int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg)
3239 struct acpi_dmar_reserved_memory *rmrr;
3240 struct dmar_rmrr_unit *rmrru;
3242 rmrr = (struct acpi_dmar_reserved_memory *)header;
3243 if (rmrr_sanity_check(rmrr)) {
3245 "Your BIOS is broken; bad RMRR [%#018Lx-%#018Lx]\n"
3246 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
3247 rmrr->base_address, rmrr->end_address,
3248 dmi_get_system_info(DMI_BIOS_VENDOR),
3249 dmi_get_system_info(DMI_BIOS_VERSION),
3250 dmi_get_system_info(DMI_PRODUCT_VERSION));
3251 add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
3254 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
3258 rmrru->hdr = header;
3260 rmrru->base_address = rmrr->base_address;
3261 rmrru->end_address = rmrr->end_address;
3263 rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
3264 ((void *)rmrr) + rmrr->header.length,
3265 &rmrru->devices_cnt);
3266 if (rmrru->devices_cnt && rmrru->devices == NULL)
3269 list_add(&rmrru->list, &dmar_rmrr_units);
3278 static struct dmar_atsr_unit *dmar_find_atsr(struct acpi_dmar_atsr *atsr)
3280 struct dmar_atsr_unit *atsru;
3281 struct acpi_dmar_atsr *tmp;
3283 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list,
3285 tmp = (struct acpi_dmar_atsr *)atsru->hdr;
3286 if (atsr->segment != tmp->segment)
3288 if (atsr->header.length != tmp->header.length)
3290 if (memcmp(atsr, tmp, atsr->header.length) == 0)
3297 int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg)
3299 struct acpi_dmar_atsr *atsr;
3300 struct dmar_atsr_unit *atsru;
3302 if (system_state >= SYSTEM_RUNNING && !intel_iommu_enabled)
3305 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
3306 atsru = dmar_find_atsr(atsr);
3310 atsru = kzalloc(sizeof(*atsru) + hdr->length, GFP_KERNEL);
3315 * If memory is allocated from slab by ACPI _DSM method, we need to
3316 * copy the memory content because the memory buffer will be freed
3319 atsru->hdr = (void *)(atsru + 1);
3320 memcpy(atsru->hdr, hdr, hdr->length);
3321 atsru->include_all = atsr->flags & 0x1;
3322 if (!atsru->include_all) {
3323 atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
3324 (void *)atsr + atsr->header.length,
3325 &atsru->devices_cnt);
3326 if (atsru->devices_cnt && atsru->devices == NULL) {
3332 list_add_rcu(&atsru->list, &dmar_atsr_units);
3337 static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
3339 dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
3343 int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg)
3345 struct acpi_dmar_atsr *atsr;
3346 struct dmar_atsr_unit *atsru;
3348 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
3349 atsru = dmar_find_atsr(atsr);
3351 list_del_rcu(&atsru->list);
3353 intel_iommu_free_atsr(atsru);
3359 int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg)
3363 struct acpi_dmar_atsr *atsr;
3364 struct dmar_atsr_unit *atsru;
3366 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
3367 atsru = dmar_find_atsr(atsr);
3371 if (!atsru->include_all && atsru->devices && atsru->devices_cnt) {
3372 for_each_active_dev_scope(atsru->devices, atsru->devices_cnt,
3380 static struct dmar_satc_unit *dmar_find_satc(struct acpi_dmar_satc *satc)
3382 struct dmar_satc_unit *satcu;
3383 struct acpi_dmar_satc *tmp;
3385 list_for_each_entry_rcu(satcu, &dmar_satc_units, list,
3387 tmp = (struct acpi_dmar_satc *)satcu->hdr;
3388 if (satc->segment != tmp->segment)
3390 if (satc->header.length != tmp->header.length)
3392 if (memcmp(satc, tmp, satc->header.length) == 0)
3399 int dmar_parse_one_satc(struct acpi_dmar_header *hdr, void *arg)
3401 struct acpi_dmar_satc *satc;
3402 struct dmar_satc_unit *satcu;
3404 if (system_state >= SYSTEM_RUNNING && !intel_iommu_enabled)
3407 satc = container_of(hdr, struct acpi_dmar_satc, header);
3408 satcu = dmar_find_satc(satc);
3412 satcu = kzalloc(sizeof(*satcu) + hdr->length, GFP_KERNEL);
3416 satcu->hdr = (void *)(satcu + 1);
3417 memcpy(satcu->hdr, hdr, hdr->length);
3418 satcu->atc_required = satc->flags & 0x1;
3419 satcu->devices = dmar_alloc_dev_scope((void *)(satc + 1),
3420 (void *)satc + satc->header.length,
3421 &satcu->devices_cnt);
3422 if (satcu->devices_cnt && !satcu->devices) {
3426 list_add_rcu(&satcu->list, &dmar_satc_units);
3431 static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
3434 struct intel_iommu *iommu = dmaru->iommu;
3436 ret = intel_cap_audit(CAP_AUDIT_HOTPLUG_DMAR, iommu);
3440 if (hw_pass_through && !ecap_pass_through(iommu->ecap)) {
3441 pr_warn("%s: Doesn't support hardware pass through.\n",
3446 sp = domain_update_iommu_superpage(NULL, iommu) - 1;
3447 if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) {
3448 pr_warn("%s: Doesn't support large page.\n",
3454 * Disable translation if already enabled prior to OS handover.
3456 if (iommu->gcmd & DMA_GCMD_TE)
3457 iommu_disable_translation(iommu);
3459 ret = iommu_init_domains(iommu);
3461 ret = iommu_alloc_root_entry(iommu);
3465 intel_svm_check(iommu);
3467 if (dmaru->ignored) {
3469 * we always have to disable PMRs or DMA may fail on this device
3472 iommu_disable_protect_mem_regions(iommu);
3476 intel_iommu_init_qi(iommu);
3477 iommu_flush_write_buffer(iommu);
3479 #ifdef CONFIG_INTEL_IOMMU_SVM
3480 if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) {
3481 ret = intel_svm_enable_prq(iommu);
3486 ret = dmar_set_interrupt(iommu);
3490 iommu_set_root_entry(iommu);
3491 iommu_enable_translation(iommu);
3493 iommu_disable_protect_mem_regions(iommu);
3497 disable_dmar_iommu(iommu);
3499 free_dmar_iommu(iommu);
3503 int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
3506 struct intel_iommu *iommu = dmaru->iommu;
3508 if (!intel_iommu_enabled)
3514 ret = intel_iommu_add(dmaru);
3516 disable_dmar_iommu(iommu);
3517 free_dmar_iommu(iommu);
3523 static void intel_iommu_free_dmars(void)
3525 struct dmar_rmrr_unit *rmrru, *rmrr_n;
3526 struct dmar_atsr_unit *atsru, *atsr_n;
3527 struct dmar_satc_unit *satcu, *satc_n;
3529 list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
3530 list_del(&rmrru->list);
3531 dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
3535 list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
3536 list_del(&atsru->list);
3537 intel_iommu_free_atsr(atsru);
3539 list_for_each_entry_safe(satcu, satc_n, &dmar_satc_units, list) {
3540 list_del(&satcu->list);
3541 dmar_free_dev_scope(&satcu->devices, &satcu->devices_cnt);
3546 static struct dmar_satc_unit *dmar_find_matched_satc_unit(struct pci_dev *dev)
3548 struct dmar_satc_unit *satcu;
3549 struct acpi_dmar_satc *satc;
3553 dev = pci_physfn(dev);
3556 list_for_each_entry_rcu(satcu, &dmar_satc_units, list) {
3557 satc = container_of(satcu->hdr, struct acpi_dmar_satc, header);
3558 if (satc->segment != pci_domain_nr(dev->bus))
3560 for_each_dev_scope(satcu->devices, satcu->devices_cnt, i, tmp)
3561 if (to_pci_dev(tmp) == dev)
3570 static int dmar_ats_supported(struct pci_dev *dev, struct intel_iommu *iommu)
3573 struct pci_bus *bus;
3574 struct pci_dev *bridge = NULL;
3576 struct acpi_dmar_atsr *atsr;
3577 struct dmar_atsr_unit *atsru;
3578 struct dmar_satc_unit *satcu;
3580 dev = pci_physfn(dev);
3581 satcu = dmar_find_matched_satc_unit(dev);
3584 * This device supports ATS as it is in SATC table.
3585 * When IOMMU is in legacy mode, enabling ATS is done
3586 * automatically by HW for the device that requires
3587 * ATS, hence OS should not enable this device ATS
3588 * to avoid duplicated TLB invalidation.
3590 return !(satcu->atc_required && !sm_supported(iommu));
3592 for (bus = dev->bus; bus; bus = bus->parent) {
3594 /* If it's an integrated device, allow ATS */
3597 /* Connected via non-PCIe: no ATS */
3598 if (!pci_is_pcie(bridge) ||
3599 pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
3601 /* If we found the root port, look it up in the ATSR */
3602 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
3607 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
3608 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3609 if (atsr->segment != pci_domain_nr(dev->bus))
3612 for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
3613 if (tmp == &bridge->dev)
3616 if (atsru->include_all)
3626 int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
3629 struct dmar_rmrr_unit *rmrru;
3630 struct dmar_atsr_unit *atsru;
3631 struct dmar_satc_unit *satcu;
3632 struct acpi_dmar_atsr *atsr;
3633 struct acpi_dmar_reserved_memory *rmrr;
3634 struct acpi_dmar_satc *satc;
3636 if (!intel_iommu_enabled && system_state >= SYSTEM_RUNNING)
3639 list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
3640 rmrr = container_of(rmrru->hdr,
3641 struct acpi_dmar_reserved_memory, header);
3642 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
3643 ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
3644 ((void *)rmrr) + rmrr->header.length,
3645 rmrr->segment, rmrru->devices,
3646 rmrru->devices_cnt);
3649 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
3650 dmar_remove_dev_scope(info, rmrr->segment,
3651 rmrru->devices, rmrru->devices_cnt);
3655 list_for_each_entry(atsru, &dmar_atsr_units, list) {
3656 if (atsru->include_all)
3659 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3660 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
3661 ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
3662 (void *)atsr + atsr->header.length,
3663 atsr->segment, atsru->devices,
3664 atsru->devices_cnt);
3669 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
3670 if (dmar_remove_dev_scope(info, atsr->segment,
3671 atsru->devices, atsru->devices_cnt))
3675 list_for_each_entry(satcu, &dmar_satc_units, list) {
3676 satc = container_of(satcu->hdr, struct acpi_dmar_satc, header);
3677 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
3678 ret = dmar_insert_dev_scope(info, (void *)(satc + 1),
3679 (void *)satc + satc->header.length,
3680 satc->segment, satcu->devices,
3681 satcu->devices_cnt);
3686 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
3687 if (dmar_remove_dev_scope(info, satc->segment,
3688 satcu->devices, satcu->devices_cnt))
3696 static int intel_iommu_memory_notifier(struct notifier_block *nb,
3697 unsigned long val, void *v)
3699 struct memory_notify *mhp = v;
3700 unsigned long start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
3701 unsigned long last_vpfn = mm_to_dma_pfn(mhp->start_pfn +
3705 case MEM_GOING_ONLINE:
3706 if (iommu_domain_identity_map(si_domain,
3707 start_vpfn, last_vpfn)) {
3708 pr_warn("Failed to build identity map for [%lx-%lx]\n",
3709 start_vpfn, last_vpfn);
3715 case MEM_CANCEL_ONLINE:
3717 struct dmar_drhd_unit *drhd;
3718 struct intel_iommu *iommu;
3719 LIST_HEAD(freelist);
3721 domain_unmap(si_domain, start_vpfn, last_vpfn, &freelist);
3724 for_each_active_iommu(iommu, drhd)
3725 iommu_flush_iotlb_psi(iommu, si_domain,
3726 start_vpfn, mhp->nr_pages,
3727 list_empty(&freelist), 0);
3729 put_pages_list(&freelist);
3737 static struct notifier_block intel_iommu_memory_nb = {
3738 .notifier_call = intel_iommu_memory_notifier,
3742 static void intel_disable_iommus(void)
3744 struct intel_iommu *iommu = NULL;
3745 struct dmar_drhd_unit *drhd;
3747 for_each_iommu(iommu, drhd)
3748 iommu_disable_translation(iommu);
3751 void intel_iommu_shutdown(void)
3753 struct dmar_drhd_unit *drhd;
3754 struct intel_iommu *iommu = NULL;
3756 if (no_iommu || dmar_disabled)
3759 down_write(&dmar_global_lock);
3761 /* Disable PMRs explicitly here. */
3762 for_each_iommu(iommu, drhd)
3763 iommu_disable_protect_mem_regions(iommu);
3765 /* Make sure the IOMMUs are switched off */
3766 intel_disable_iommus();
3768 up_write(&dmar_global_lock);
3771 static inline struct intel_iommu *dev_to_intel_iommu(struct device *dev)
3773 struct iommu_device *iommu_dev = dev_to_iommu_device(dev);
3775 return container_of(iommu_dev, struct intel_iommu, iommu);
3778 static ssize_t version_show(struct device *dev,
3779 struct device_attribute *attr, char *buf)
3781 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
3782 u32 ver = readl(iommu->reg + DMAR_VER_REG);
3783 return sprintf(buf, "%d:%d\n",
3784 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
3786 static DEVICE_ATTR_RO(version);
3788 static ssize_t address_show(struct device *dev,
3789 struct device_attribute *attr, char *buf)
3791 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
3792 return sprintf(buf, "%llx\n", iommu->reg_phys);
3794 static DEVICE_ATTR_RO(address);
3796 static ssize_t cap_show(struct device *dev,
3797 struct device_attribute *attr, char *buf)
3799 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
3800 return sprintf(buf, "%llx\n", iommu->cap);
3802 static DEVICE_ATTR_RO(cap);
3804 static ssize_t ecap_show(struct device *dev,
3805 struct device_attribute *attr, char *buf)
3807 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
3808 return sprintf(buf, "%llx\n", iommu->ecap);
3810 static DEVICE_ATTR_RO(ecap);
3812 static ssize_t domains_supported_show(struct device *dev,
3813 struct device_attribute *attr, char *buf)
3815 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
3816 return sprintf(buf, "%ld\n", cap_ndoms(iommu->cap));
3818 static DEVICE_ATTR_RO(domains_supported);
3820 static ssize_t domains_used_show(struct device *dev,
3821 struct device_attribute *attr, char *buf)
3823 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
3824 return sprintf(buf, "%d\n", bitmap_weight(iommu->domain_ids,
3825 cap_ndoms(iommu->cap)));
3827 static DEVICE_ATTR_RO(domains_used);
3829 static struct attribute *intel_iommu_attrs[] = {
3830 &dev_attr_version.attr,
3831 &dev_attr_address.attr,
3833 &dev_attr_ecap.attr,
3834 &dev_attr_domains_supported.attr,
3835 &dev_attr_domains_used.attr,
3839 static struct attribute_group intel_iommu_group = {
3840 .name = "intel-iommu",
3841 .attrs = intel_iommu_attrs,
3844 const struct attribute_group *intel_iommu_groups[] = {
3849 static inline bool has_external_pci(void)
3851 struct pci_dev *pdev = NULL;
3853 for_each_pci_dev(pdev)
3854 if (pdev->external_facing)
3860 static int __init platform_optin_force_iommu(void)
3862 if (!dmar_platform_optin() || no_platform_optin || !has_external_pci())
3865 if (no_iommu || dmar_disabled)
3866 pr_info("Intel-IOMMU force enabled due to platform opt in\n");
3869 * If Intel-IOMMU is disabled by default, we will apply identity
3870 * map for all devices except those marked as being untrusted.
3873 iommu_set_default_passthrough(false);
3881 static int __init probe_acpi_namespace_devices(void)
3883 struct dmar_drhd_unit *drhd;
3884 /* To avoid a -Wunused-but-set-variable warning. */
3885 struct intel_iommu *iommu __maybe_unused;
3889 for_each_active_iommu(iommu, drhd) {
3890 for_each_active_dev_scope(drhd->devices,
3891 drhd->devices_cnt, i, dev) {
3892 struct acpi_device_physical_node *pn;
3893 struct iommu_group *group;
3894 struct acpi_device *adev;
3896 if (dev->bus != &acpi_bus_type)
3899 adev = to_acpi_device(dev);
3900 mutex_lock(&adev->physical_node_lock);
3901 list_for_each_entry(pn,
3902 &adev->physical_node_list, node) {
3903 group = iommu_group_get(pn->dev);
3905 iommu_group_put(group);
3909 ret = iommu_probe_device(pn->dev);
3913 mutex_unlock(&adev->physical_node_lock);
3923 static __init int tboot_force_iommu(void)
3925 if (!tboot_enabled())
3928 if (no_iommu || dmar_disabled)
3929 pr_warn("Forcing Intel-IOMMU to enabled\n");
3937 int __init intel_iommu_init(void)
3940 struct dmar_drhd_unit *drhd;
3941 struct intel_iommu *iommu;
3944 * Intel IOMMU is required for a TXT/tboot launch or platform
3945 * opt in, so enforce that.
3947 force_on = (!intel_iommu_tboot_noforce && tboot_force_iommu()) ||
3948 platform_optin_force_iommu();
3950 down_write(&dmar_global_lock);
3951 if (dmar_table_init()) {
3953 panic("tboot: Failed to initialize DMAR table\n");
3957 if (dmar_dev_scope_init() < 0) {
3959 panic("tboot: Failed to initialize DMAR device scope\n");
3963 up_write(&dmar_global_lock);
3966 * The bus notifier takes the dmar_global_lock, so lockdep will
3967 * complain later when we register it under the lock.
3969 dmar_register_bus_notifier();
3971 down_write(&dmar_global_lock);
3974 intel_iommu_debugfs_init();
3976 if (no_iommu || dmar_disabled) {
3978 * We exit the function here to ensure IOMMU's remapping and
3979 * mempool aren't setup, which means that the IOMMU's PMRs
3980 * won't be disabled via the call to init_dmars(). So disable
3981 * it explicitly here. The PMRs were setup by tboot prior to
3982 * calling SENTER, but the kernel is expected to reset/tear
3985 if (intel_iommu_tboot_noforce) {
3986 for_each_iommu(iommu, drhd)
3987 iommu_disable_protect_mem_regions(iommu);
3991 * Make sure the IOMMUs are switched off, even when we
3992 * boot into a kexec kernel and the previous kernel left
3995 intel_disable_iommus();
3999 if (list_empty(&dmar_rmrr_units))
4000 pr_info("No RMRR found\n");
4002 if (list_empty(&dmar_atsr_units))
4003 pr_info("No ATSR found\n");
4005 if (list_empty(&dmar_satc_units))
4006 pr_info("No SATC found\n");
4008 init_no_remapping_devices();
4013 panic("tboot: Failed to initialize DMARs\n");
4014 pr_err("Initialization failed\n");
4017 up_write(&dmar_global_lock);
4019 init_iommu_pm_ops();
4021 down_read(&dmar_global_lock);
4022 for_each_active_iommu(iommu, drhd) {
4024 * The flush queue implementation does not perform
4025 * page-selective invalidations that are required for efficient
4026 * TLB flushes in virtual environments. The benefit of batching
4027 * is likely to be much lower than the overhead of synchronizing
4028 * the virtual and physical IOMMU page-tables.
4030 if (cap_caching_mode(iommu->cap)) {
4031 pr_info_once("IOMMU batching disallowed due to virtualization\n");
4032 iommu_set_dma_strict();
4034 iommu_device_sysfs_add(&iommu->iommu, NULL,
4037 iommu_device_register(&iommu->iommu, &intel_iommu_ops, NULL);
4039 up_read(&dmar_global_lock);
4041 if (si_domain && !hw_pass_through)
4042 register_memory_notifier(&intel_iommu_memory_nb);
4044 down_read(&dmar_global_lock);
4045 if (probe_acpi_namespace_devices())
4046 pr_warn("ACPI name space devices didn't probe correctly\n");
4048 /* Finally, we enable the DMA remapping hardware. */
4049 for_each_iommu(iommu, drhd) {
4050 if (!drhd->ignored && !translation_pre_enabled(iommu))
4051 iommu_enable_translation(iommu);
4053 iommu_disable_protect_mem_regions(iommu);
4055 up_read(&dmar_global_lock);
4057 pr_info("Intel(R) Virtualization Technology for Directed I/O\n");
4059 intel_iommu_enabled = 1;
4064 intel_iommu_free_dmars();
4065 up_write(&dmar_global_lock);
4069 static int domain_context_clear_one_cb(struct pci_dev *pdev, u16 alias, void *opaque)
4071 struct device_domain_info *info = opaque;
4073 domain_context_clear_one(info, PCI_BUS_NUM(alias), alias & 0xff);
4078 * NB - intel-iommu lacks any sort of reference counting for the users of
4079 * dependent devices. If multiple endpoints have intersecting dependent
4080 * devices, unbinding the driver from any one of them will possibly leave
4081 * the others unable to operate.
4083 static void domain_context_clear(struct device_domain_info *info)
4085 if (!info->iommu || !info->dev || !dev_is_pci(info->dev))
4088 pci_for_each_dma_alias(to_pci_dev(info->dev),
4089 &domain_context_clear_one_cb, info);
4092 static void dmar_remove_one_dev_info(struct device *dev)
4094 struct device_domain_info *info = dev_iommu_priv_get(dev);
4095 struct dmar_domain *domain = info->domain;
4096 struct intel_iommu *iommu = info->iommu;
4097 unsigned long flags;
4099 if (!dev_is_real_dma_subdevice(info->dev)) {
4100 if (dev_is_pci(info->dev) && sm_supported(iommu))
4101 intel_pasid_tear_down_entry(iommu, info->dev,
4102 PASID_RID2PASID, false);
4104 iommu_disable_dev_iotlb(info);
4105 domain_context_clear(info);
4106 intel_pasid_free_table(info->dev);
4109 spin_lock_irqsave(&domain->lock, flags);
4110 list_del(&info->link);
4111 spin_unlock_irqrestore(&domain->lock, flags);
4113 domain_detach_iommu(domain, iommu);
4114 info->domain = NULL;
4117 static int md_domain_init(struct dmar_domain *domain, int guest_width)
4121 /* calculate AGAW */
4122 domain->gaw = guest_width;
4123 adjust_width = guestwidth_to_adjustwidth(guest_width);
4124 domain->agaw = width_to_agaw(adjust_width);
4126 domain->iommu_coherency = false;
4127 domain->iommu_superpage = 0;
4128 domain->max_addr = 0;
4130 /* always allocate the top pgd */
4131 domain->pgd = alloc_pgtable_page(domain->nid);
4134 domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
4138 static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
4140 struct dmar_domain *dmar_domain;
4141 struct iommu_domain *domain;
4144 case IOMMU_DOMAIN_DMA:
4145 case IOMMU_DOMAIN_DMA_FQ:
4146 case IOMMU_DOMAIN_UNMANAGED:
4147 dmar_domain = alloc_domain(type);
4149 pr_err("Can't allocate dmar_domain\n");
4152 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
4153 pr_err("Domain initialization failed\n");
4154 domain_exit(dmar_domain);
4158 domain = &dmar_domain->domain;
4159 domain->geometry.aperture_start = 0;
4160 domain->geometry.aperture_end =
4161 __DOMAIN_MAX_ADDR(dmar_domain->gaw);
4162 domain->geometry.force_aperture = true;
4165 case IOMMU_DOMAIN_IDENTITY:
4166 return &si_domain->domain;
4174 static void intel_iommu_domain_free(struct iommu_domain *domain)
4176 if (domain != &si_domain->domain)
4177 domain_exit(to_dmar_domain(domain));
4180 static int prepare_domain_attach_device(struct iommu_domain *domain,
4183 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4184 struct intel_iommu *iommu;
4187 iommu = device_to_iommu(dev, NULL, NULL);
4191 if (dmar_domain->force_snooping && !ecap_sc_support(iommu->ecap))
4194 /* check if this iommu agaw is sufficient for max mapped address */
4195 addr_width = agaw_to_width(iommu->agaw);
4196 if (addr_width > cap_mgaw(iommu->cap))
4197 addr_width = cap_mgaw(iommu->cap);
4199 if (dmar_domain->max_addr > (1LL << addr_width)) {
4200 dev_err(dev, "%s: iommu width (%d) is not "
4201 "sufficient for the mapped address (%llx)\n",
4202 __func__, addr_width, dmar_domain->max_addr);
4205 dmar_domain->gaw = addr_width;
4208 * Knock out extra levels of page tables if necessary
4210 while (iommu->agaw < dmar_domain->agaw) {
4211 struct dma_pte *pte;
4213 pte = dmar_domain->pgd;
4214 if (dma_pte_present(pte)) {
4215 dmar_domain->pgd = phys_to_virt(dma_pte_addr(pte));
4216 free_pgtable_page(pte);
4218 dmar_domain->agaw--;
4224 static int intel_iommu_attach_device(struct iommu_domain *domain,
4229 if (domain->type == IOMMU_DOMAIN_UNMANAGED &&
4230 device_is_rmrr_locked(dev)) {
4231 dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement. Contact your platform vendor.\n");
4235 /* normally dev is not mapped */
4236 if (unlikely(domain_context_mapped(dev))) {
4237 struct device_domain_info *info = dev_iommu_priv_get(dev);
4240 dmar_remove_one_dev_info(dev);
4243 ret = prepare_domain_attach_device(domain, dev);
4247 return domain_add_dev_info(to_dmar_domain(domain), dev);
4250 static void intel_iommu_detach_device(struct iommu_domain *domain,
4253 dmar_remove_one_dev_info(dev);
4256 static int intel_iommu_map(struct iommu_domain *domain,
4257 unsigned long iova, phys_addr_t hpa,
4258 size_t size, int iommu_prot, gfp_t gfp)
4260 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4264 if (iommu_prot & IOMMU_READ)
4265 prot |= DMA_PTE_READ;
4266 if (iommu_prot & IOMMU_WRITE)
4267 prot |= DMA_PTE_WRITE;
4268 if (dmar_domain->set_pte_snp)
4269 prot |= DMA_PTE_SNP;
4271 max_addr = iova + size;
4272 if (dmar_domain->max_addr < max_addr) {
4275 /* check if minimum agaw is sufficient for mapped address */
4276 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
4277 if (end < max_addr) {
4278 pr_err("%s: iommu width (%d) is not "
4279 "sufficient for the mapped address (%llx)\n",
4280 __func__, dmar_domain->gaw, max_addr);
4283 dmar_domain->max_addr = max_addr;
4285 /* Round up size to next multiple of PAGE_SIZE, if it and
4286 the low bits of hpa would take us onto the next page */
4287 size = aligned_nrpages(hpa, size);
4288 return __domain_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
4289 hpa >> VTD_PAGE_SHIFT, size, prot);
4292 static int intel_iommu_map_pages(struct iommu_domain *domain,
4293 unsigned long iova, phys_addr_t paddr,
4294 size_t pgsize, size_t pgcount,
4295 int prot, gfp_t gfp, size_t *mapped)
4297 unsigned long pgshift = __ffs(pgsize);
4298 size_t size = pgcount << pgshift;
4301 if (pgsize != SZ_4K && pgsize != SZ_2M && pgsize != SZ_1G)
4304 if (!IS_ALIGNED(iova | paddr, pgsize))
4307 ret = intel_iommu_map(domain, iova, paddr, size, prot, gfp);
4314 static size_t intel_iommu_unmap(struct iommu_domain *domain,
4315 unsigned long iova, size_t size,
4316 struct iommu_iotlb_gather *gather)
4318 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4319 unsigned long start_pfn, last_pfn;
4322 /* Cope with horrid API which requires us to unmap more than the
4323 size argument if it happens to be a large-page mapping. */
4324 BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level));
4326 if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
4327 size = VTD_PAGE_SIZE << level_to_offset_bits(level);
4329 start_pfn = iova >> VTD_PAGE_SHIFT;
4330 last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
4332 domain_unmap(dmar_domain, start_pfn, last_pfn, &gather->freelist);
4334 if (dmar_domain->max_addr == iova + size)
4335 dmar_domain->max_addr = iova;
4337 iommu_iotlb_gather_add_page(domain, gather, iova, size);
4342 static size_t intel_iommu_unmap_pages(struct iommu_domain *domain,
4344 size_t pgsize, size_t pgcount,
4345 struct iommu_iotlb_gather *gather)
4347 unsigned long pgshift = __ffs(pgsize);
4348 size_t size = pgcount << pgshift;
4350 return intel_iommu_unmap(domain, iova, size, gather);
4353 static void intel_iommu_tlb_sync(struct iommu_domain *domain,
4354 struct iommu_iotlb_gather *gather)
4356 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4357 unsigned long iova_pfn = IOVA_PFN(gather->start);
4358 size_t size = gather->end - gather->start;
4359 struct iommu_domain_info *info;
4360 unsigned long start_pfn;
4361 unsigned long nrpages;
4364 nrpages = aligned_nrpages(gather->start, size);
4365 start_pfn = mm_to_dma_pfn(iova_pfn);
4367 xa_for_each(&dmar_domain->iommu_array, i, info)
4368 iommu_flush_iotlb_psi(info->iommu, dmar_domain,
4370 list_empty(&gather->freelist), 0);
4372 put_pages_list(&gather->freelist);
4375 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
4378 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4379 struct dma_pte *pte;
4383 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
4384 if (pte && dma_pte_present(pte))
4385 phys = dma_pte_addr(pte) +
4386 (iova & (BIT_MASK(level_to_offset_bits(level) +
4387 VTD_PAGE_SHIFT) - 1));
4392 static bool domain_support_force_snooping(struct dmar_domain *domain)
4394 struct device_domain_info *info;
4395 bool support = true;
4397 assert_spin_locked(&domain->lock);
4398 list_for_each_entry(info, &domain->devices, link) {
4399 if (!ecap_sc_support(info->iommu->ecap)) {
4408 static void domain_set_force_snooping(struct dmar_domain *domain)
4410 struct device_domain_info *info;
4412 assert_spin_locked(&domain->lock);
4414 * Second level page table supports per-PTE snoop control. The
4415 * iommu_map() interface will handle this by setting SNP bit.
4417 if (!domain_use_first_level(domain)) {
4418 domain->set_pte_snp = true;
4422 list_for_each_entry(info, &domain->devices, link)
4423 intel_pasid_setup_page_snoop_control(info->iommu, info->dev,
4427 static bool intel_iommu_enforce_cache_coherency(struct iommu_domain *domain)
4429 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4430 unsigned long flags;
4432 if (dmar_domain->force_snooping)
4435 spin_lock_irqsave(&dmar_domain->lock, flags);
4436 if (!domain_support_force_snooping(dmar_domain)) {
4437 spin_unlock_irqrestore(&dmar_domain->lock, flags);
4441 domain_set_force_snooping(dmar_domain);
4442 dmar_domain->force_snooping = true;
4443 spin_unlock_irqrestore(&dmar_domain->lock, flags);
4448 static bool intel_iommu_capable(struct device *dev, enum iommu_cap cap)
4450 if (cap == IOMMU_CAP_CACHE_COHERENCY)
4452 if (cap == IOMMU_CAP_INTR_REMAP)
4453 return irq_remapping_enabled == 1;
4454 if (cap == IOMMU_CAP_PRE_BOOT_PROTECTION)
4455 return dmar_platform_optin();
4460 static struct iommu_device *intel_iommu_probe_device(struct device *dev)
4462 struct pci_dev *pdev = dev_is_pci(dev) ? to_pci_dev(dev) : NULL;
4463 struct device_domain_info *info;
4464 struct intel_iommu *iommu;
4467 iommu = device_to_iommu(dev, &bus, &devfn);
4468 if (!iommu || !iommu->iommu.ops)
4469 return ERR_PTR(-ENODEV);
4471 info = kzalloc(sizeof(*info), GFP_KERNEL);
4473 return ERR_PTR(-ENOMEM);
4475 if (dev_is_real_dma_subdevice(dev)) {
4476 info->bus = pdev->bus->number;
4477 info->devfn = pdev->devfn;
4478 info->segment = pci_domain_nr(pdev->bus);
4481 info->devfn = devfn;
4482 info->segment = iommu->segment;
4486 info->iommu = iommu;
4487 if (dev_is_pci(dev)) {
4488 if (ecap_dev_iotlb_support(iommu->ecap) &&
4489 pci_ats_supported(pdev) &&
4490 dmar_ats_supported(pdev, iommu))
4491 info->ats_supported = 1;
4493 if (sm_supported(iommu)) {
4494 if (pasid_supported(iommu)) {
4495 int features = pci_pasid_features(pdev);
4498 info->pasid_supported = features | 1;
4501 if (info->ats_supported && ecap_prs(iommu->ecap) &&
4502 pci_pri_supported(pdev))
4503 info->pri_supported = 1;
4507 dev_iommu_priv_set(dev, info);
4509 return &iommu->iommu;
4512 static void intel_iommu_release_device(struct device *dev)
4514 struct device_domain_info *info = dev_iommu_priv_get(dev);
4516 dmar_remove_one_dev_info(dev);
4517 dev_iommu_priv_set(dev, NULL);
4519 set_dma_ops(dev, NULL);
4522 static void intel_iommu_probe_finalize(struct device *dev)
4524 set_dma_ops(dev, NULL);
4525 iommu_setup_dma_ops(dev, 0, U64_MAX);
4528 static void intel_iommu_get_resv_regions(struct device *device,
4529 struct list_head *head)
4531 int prot = DMA_PTE_READ | DMA_PTE_WRITE;
4532 struct iommu_resv_region *reg;
4533 struct dmar_rmrr_unit *rmrr;
4534 struct device *i_dev;
4537 down_read(&dmar_global_lock);
4538 for_each_rmrr_units(rmrr) {
4539 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
4541 struct iommu_resv_region *resv;
4542 enum iommu_resv_type type;
4545 if (i_dev != device &&
4546 !is_downstream_to_pci_bridge(device, i_dev))
4549 length = rmrr->end_address - rmrr->base_address + 1;
4551 type = device_rmrr_is_relaxable(device) ?
4552 IOMMU_RESV_DIRECT_RELAXABLE : IOMMU_RESV_DIRECT;
4554 resv = iommu_alloc_resv_region(rmrr->base_address,
4555 length, prot, type);
4559 list_add_tail(&resv->list, head);
4562 up_read(&dmar_global_lock);
4564 #ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
4565 if (dev_is_pci(device)) {
4566 struct pci_dev *pdev = to_pci_dev(device);
4568 if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) {
4569 reg = iommu_alloc_resv_region(0, 1UL << 24, prot,
4570 IOMMU_RESV_DIRECT_RELAXABLE);
4572 list_add_tail(®->list, head);
4575 #endif /* CONFIG_INTEL_IOMMU_FLOPPY_WA */
4577 reg = iommu_alloc_resv_region(IOAPIC_RANGE_START,
4578 IOAPIC_RANGE_END - IOAPIC_RANGE_START + 1,
4582 list_add_tail(®->list, head);
4585 static struct iommu_group *intel_iommu_device_group(struct device *dev)
4587 if (dev_is_pci(dev))
4588 return pci_device_group(dev);
4589 return generic_device_group(dev);
4592 static int intel_iommu_enable_sva(struct device *dev)
4594 struct device_domain_info *info = dev_iommu_priv_get(dev);
4595 struct intel_iommu *iommu;
4598 if (!info || dmar_disabled)
4601 iommu = info->iommu;
4605 if (!(iommu->flags & VTD_FLAG_SVM_CAPABLE))
4608 if (!info->pasid_enabled || !info->pri_enabled || !info->ats_enabled)
4611 ret = iopf_queue_add_device(iommu->iopf_queue, dev);
4613 ret = iommu_register_device_fault_handler(dev, iommu_queue_iopf, dev);
4618 static int intel_iommu_disable_sva(struct device *dev)
4620 struct device_domain_info *info = dev_iommu_priv_get(dev);
4621 struct intel_iommu *iommu = info->iommu;
4624 ret = iommu_unregister_device_fault_handler(dev);
4626 ret = iopf_queue_remove_device(iommu->iopf_queue, dev);
4631 static int intel_iommu_enable_iopf(struct device *dev)
4633 struct device_domain_info *info = dev_iommu_priv_get(dev);
4635 if (info && info->pri_supported)
4642 intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat)
4645 case IOMMU_DEV_FEAT_IOPF:
4646 return intel_iommu_enable_iopf(dev);
4648 case IOMMU_DEV_FEAT_SVA:
4649 return intel_iommu_enable_sva(dev);
4657 intel_iommu_dev_disable_feat(struct device *dev, enum iommu_dev_features feat)
4660 case IOMMU_DEV_FEAT_IOPF:
4663 case IOMMU_DEV_FEAT_SVA:
4664 return intel_iommu_disable_sva(dev);
4671 static bool intel_iommu_is_attach_deferred(struct device *dev)
4673 struct device_domain_info *info = dev_iommu_priv_get(dev);
4675 return translation_pre_enabled(info->iommu) && !info->domain;
4679 * Check that the device does not live on an external facing PCI port that is
4680 * marked as untrusted. Such devices should not be able to apply quirks and
4681 * thus not be able to bypass the IOMMU restrictions.
4683 static bool risky_device(struct pci_dev *pdev)
4685 if (pdev->untrusted) {
4687 "Skipping IOMMU quirk for dev [%04X:%04X] on untrusted PCI link\n",
4688 pdev->vendor, pdev->device);
4689 pci_info(pdev, "Please check with your BIOS/Platform vendor about this\n");
4695 static void intel_iommu_iotlb_sync_map(struct iommu_domain *domain,
4696 unsigned long iova, size_t size)
4698 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4699 unsigned long pages = aligned_nrpages(iova, size);
4700 unsigned long pfn = iova >> VTD_PAGE_SHIFT;
4701 struct iommu_domain_info *info;
4704 xa_for_each(&dmar_domain->iommu_array, i, info)
4705 __mapping_notify_one(info->iommu, dmar_domain, pfn, pages);
4708 const struct iommu_ops intel_iommu_ops = {
4709 .capable = intel_iommu_capable,
4710 .domain_alloc = intel_iommu_domain_alloc,
4711 .probe_device = intel_iommu_probe_device,
4712 .probe_finalize = intel_iommu_probe_finalize,
4713 .release_device = intel_iommu_release_device,
4714 .get_resv_regions = intel_iommu_get_resv_regions,
4715 .device_group = intel_iommu_device_group,
4716 .dev_enable_feat = intel_iommu_dev_enable_feat,
4717 .dev_disable_feat = intel_iommu_dev_disable_feat,
4718 .is_attach_deferred = intel_iommu_is_attach_deferred,
4719 .def_domain_type = device_def_domain_type,
4720 .pgsize_bitmap = SZ_4K,
4721 #ifdef CONFIG_INTEL_IOMMU_SVM
4722 .sva_bind = intel_svm_bind,
4723 .sva_unbind = intel_svm_unbind,
4724 .sva_get_pasid = intel_svm_get_pasid,
4725 .page_response = intel_svm_page_response,
4727 .default_domain_ops = &(const struct iommu_domain_ops) {
4728 .attach_dev = intel_iommu_attach_device,
4729 .detach_dev = intel_iommu_detach_device,
4730 .map_pages = intel_iommu_map_pages,
4731 .unmap_pages = intel_iommu_unmap_pages,
4732 .iotlb_sync_map = intel_iommu_iotlb_sync_map,
4733 .flush_iotlb_all = intel_flush_iotlb_all,
4734 .iotlb_sync = intel_iommu_tlb_sync,
4735 .iova_to_phys = intel_iommu_iova_to_phys,
4736 .free = intel_iommu_domain_free,
4737 .enforce_cache_coherency = intel_iommu_enforce_cache_coherency,
4741 static void quirk_iommu_igfx(struct pci_dev *dev)
4743 if (risky_device(dev))
4746 pci_info(dev, "Disabling IOMMU for graphics on this chipset\n");
4750 /* G4x/GM45 integrated gfx dmar support is totally busted. */
4751 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_igfx);
4752 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_igfx);
4753 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_igfx);
4754 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_igfx);
4755 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_igfx);
4756 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_igfx);
4757 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_igfx);
4759 /* Broadwell igfx malfunctions with dmar */
4760 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1606, quirk_iommu_igfx);
4761 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160B, quirk_iommu_igfx);
4762 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160E, quirk_iommu_igfx);
4763 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1602, quirk_iommu_igfx);
4764 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160A, quirk_iommu_igfx);
4765 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160D, quirk_iommu_igfx);
4766 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1616, quirk_iommu_igfx);
4767 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161B, quirk_iommu_igfx);
4768 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161E, quirk_iommu_igfx);
4769 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1612, quirk_iommu_igfx);
4770 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161A, quirk_iommu_igfx);
4771 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161D, quirk_iommu_igfx);
4772 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1626, quirk_iommu_igfx);
4773 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162B, quirk_iommu_igfx);
4774 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162E, quirk_iommu_igfx);
4775 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1622, quirk_iommu_igfx);
4776 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162A, quirk_iommu_igfx);
4777 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162D, quirk_iommu_igfx);
4778 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1636, quirk_iommu_igfx);
4779 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163B, quirk_iommu_igfx);
4780 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163E, quirk_iommu_igfx);
4781 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1632, quirk_iommu_igfx);
4782 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163A, quirk_iommu_igfx);
4783 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163D, quirk_iommu_igfx);
4785 static void quirk_iommu_rwbf(struct pci_dev *dev)
4787 if (risky_device(dev))
4791 * Mobile 4 Series Chipset neglects to set RWBF capability,
4792 * but needs it. Same seems to hold for the desktop versions.
4794 pci_info(dev, "Forcing write-buffer flush capability\n");
4798 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
4799 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
4800 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
4801 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
4802 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
4803 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
4804 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
4807 #define GGC_MEMORY_SIZE_MASK (0xf << 8)
4808 #define GGC_MEMORY_SIZE_NONE (0x0 << 8)
4809 #define GGC_MEMORY_SIZE_1M (0x1 << 8)
4810 #define GGC_MEMORY_SIZE_2M (0x3 << 8)
4811 #define GGC_MEMORY_VT_ENABLED (0x8 << 8)
4812 #define GGC_MEMORY_SIZE_2M_VT (0x9 << 8)
4813 #define GGC_MEMORY_SIZE_3M_VT (0xa << 8)
4814 #define GGC_MEMORY_SIZE_4M_VT (0xb << 8)
4816 static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
4820 if (risky_device(dev))
4823 if (pci_read_config_word(dev, GGC, &ggc))
4826 if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
4827 pci_info(dev, "BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
4829 } else if (dmar_map_gfx) {
4830 /* we have to ensure the gfx device is idle before we flush */
4831 pci_info(dev, "Disabling batched IOTLB flush on Ironlake\n");
4832 iommu_set_dma_strict();
4835 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
4836 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
4837 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
4838 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
4840 static void quirk_igfx_skip_te_disable(struct pci_dev *dev)
4844 if (!IS_GFX_DEVICE(dev))
4847 ver = (dev->device >> 8) & 0xff;
4848 if (ver != 0x45 && ver != 0x46 && ver != 0x4c &&
4849 ver != 0x4e && ver != 0x8a && ver != 0x98 &&
4850 ver != 0x9a && ver != 0xa7)
4853 if (risky_device(dev))
4856 pci_info(dev, "Skip IOMMU disabling for graphics\n");
4857 iommu_skip_te_disable = 1;
4859 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, quirk_igfx_skip_te_disable);
4861 /* On Tylersburg chipsets, some BIOSes have been known to enable the
4862 ISOCH DMAR unit for the Azalia sound device, but not give it any
4863 TLB entries, which causes it to deadlock. Check for that. We do
4864 this in a function called from init_dmars(), instead of in a PCI
4865 quirk, because we don't want to print the obnoxious "BIOS broken"
4866 message if VT-d is actually disabled.
4868 static void __init check_tylersburg_isoch(void)
4870 struct pci_dev *pdev;
4871 uint32_t vtisochctrl;
4873 /* If there's no Azalia in the system anyway, forget it. */
4874 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
4878 if (risky_device(pdev)) {
4885 /* System Management Registers. Might be hidden, in which case
4886 we can't do the sanity check. But that's OK, because the
4887 known-broken BIOSes _don't_ actually hide it, so far. */
4888 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
4892 if (risky_device(pdev)) {
4897 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
4904 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
4905 if (vtisochctrl & 1)
4908 /* Drop all bits other than the number of TLB entries */
4909 vtisochctrl &= 0x1c;
4911 /* If we have the recommended number of TLB entries (16), fine. */
4912 if (vtisochctrl == 0x10)
4915 /* Zero TLB entries? You get to ride the short bus to school. */
4917 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
4918 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
4919 dmi_get_system_info(DMI_BIOS_VENDOR),
4920 dmi_get_system_info(DMI_BIOS_VERSION),
4921 dmi_get_system_info(DMI_PRODUCT_VERSION));
4922 iommu_identity_mapping |= IDENTMAP_AZALIA;
4926 pr_warn("Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",