1 // SPDX-License-Identifier: GPL-2.0-only
3 * Copyright © 2006-2014 Intel Corporation.
13 #define pr_fmt(fmt) "DMAR: " fmt
14 #define dev_fmt(fmt) pr_fmt(fmt)
16 #include <linux/init.h>
17 #include <linux/bitmap.h>
18 #include <linux/debugfs.h>
19 #include <linux/export.h>
20 #include <linux/slab.h>
21 #include <linux/irq.h>
22 #include <linux/interrupt.h>
23 #include <linux/spinlock.h>
24 #include <linux/pci.h>
25 #include <linux/dmar.h>
26 #include <linux/dma-map-ops.h>
27 #include <linux/mempool.h>
28 #include <linux/memory.h>
29 #include <linux/cpu.h>
30 #include <linux/timer.h>
32 #include <linux/iova.h>
33 #include <linux/iommu.h>
34 #include <linux/dma-iommu.h>
35 #include <linux/intel-iommu.h>
36 #include <linux/intel-svm.h>
37 #include <linux/syscore_ops.h>
38 #include <linux/tboot.h>
39 #include <linux/dmi.h>
40 #include <linux/pci-ats.h>
41 #include <linux/memblock.h>
42 #include <linux/dma-direct.h>
43 #include <linux/crash_dump.h>
44 #include <linux/numa.h>
45 #include <asm/irq_remapping.h>
46 #include <asm/cacheflush.h>
47 #include <asm/iommu.h>
49 #include "../irq_remapping.h"
50 #include "../iommu-sva-lib.h"
52 #include "cap_audit.h"
54 #define ROOT_SIZE VTD_PAGE_SIZE
55 #define CONTEXT_SIZE VTD_PAGE_SIZE
57 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
58 #define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
59 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
60 #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
62 #define IOAPIC_RANGE_START (0xfee00000)
63 #define IOAPIC_RANGE_END (0xfeefffff)
64 #define IOVA_START_ADDR (0x1000)
66 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 57
68 #define MAX_AGAW_WIDTH 64
69 #define MAX_AGAW_PFN_WIDTH (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
71 #define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << ((gaw) - VTD_PAGE_SHIFT)) - 1)
72 #define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << (gaw)) - 1)
74 /* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
75 to match. That way, we can use 'unsigned long' for PFNs with impunity. */
76 #define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
77 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
78 #define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
80 /* IO virtual address start page frame number */
81 #define IOVA_START_PFN (1)
83 #define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
85 /* page table handling */
86 #define LEVEL_STRIDE (9)
87 #define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
89 static inline int agaw_to_level(int agaw)
94 static inline int agaw_to_width(int agaw)
96 return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
99 static inline int width_to_agaw(int width)
101 return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
104 static inline unsigned int level_to_offset_bits(int level)
106 return (level - 1) * LEVEL_STRIDE;
109 static inline int pfn_level_offset(u64 pfn, int level)
111 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
114 static inline u64 level_mask(int level)
116 return -1ULL << level_to_offset_bits(level);
119 static inline u64 level_size(int level)
121 return 1ULL << level_to_offset_bits(level);
124 static inline u64 align_to_level(u64 pfn, int level)
126 return (pfn + level_size(level) - 1) & level_mask(level);
129 static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
131 return 1UL << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
134 /* VT-d pages must always be _smaller_ than MM pages. Otherwise things
135 are never going to work. */
136 static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
138 return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
141 static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
143 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
145 static inline unsigned long page_to_dma_pfn(struct page *pg)
147 return mm_to_dma_pfn(page_to_pfn(pg));
149 static inline unsigned long virt_to_dma_pfn(void *p)
151 return page_to_dma_pfn(virt_to_page(p));
154 /* global iommu list, set NULL for ignored DMAR units */
155 static struct intel_iommu **g_iommus;
157 static void __init check_tylersburg_isoch(void);
158 static int rwbf_quirk;
159 static inline struct device_domain_info *
160 dmar_search_domain_by_dev_info(int segment, int bus, int devfn);
163 * set to 1 to panic kernel if can't successfully enable VT-d
164 * (used when kernel is launched w/ TXT)
166 static int force_on = 0;
167 static int intel_iommu_tboot_noforce;
168 static int no_platform_optin;
170 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
173 * Take a root_entry and return the Lower Context Table Pointer (LCTP)
176 static phys_addr_t root_entry_lctp(struct root_entry *re)
181 return re->lo & VTD_PAGE_MASK;
185 * Take a root_entry and return the Upper Context Table Pointer (UCTP)
188 static phys_addr_t root_entry_uctp(struct root_entry *re)
193 return re->hi & VTD_PAGE_MASK;
196 static inline void context_clear_pasid_enable(struct context_entry *context)
198 context->lo &= ~(1ULL << 11);
201 static inline bool context_pasid_enabled(struct context_entry *context)
203 return !!(context->lo & (1ULL << 11));
206 static inline void context_set_copied(struct context_entry *context)
208 context->hi |= (1ull << 3);
211 static inline bool context_copied(struct context_entry *context)
213 return !!(context->hi & (1ULL << 3));
216 static inline bool __context_present(struct context_entry *context)
218 return (context->lo & 1);
221 bool context_present(struct context_entry *context)
223 return context_pasid_enabled(context) ?
224 __context_present(context) :
225 __context_present(context) && !context_copied(context);
228 static inline void context_set_present(struct context_entry *context)
233 static inline void context_set_fault_enable(struct context_entry *context)
235 context->lo &= (((u64)-1) << 2) | 1;
238 static inline void context_set_translation_type(struct context_entry *context,
241 context->lo &= (((u64)-1) << 4) | 3;
242 context->lo |= (value & 3) << 2;
245 static inline void context_set_address_root(struct context_entry *context,
248 context->lo &= ~VTD_PAGE_MASK;
249 context->lo |= value & VTD_PAGE_MASK;
252 static inline void context_set_address_width(struct context_entry *context,
255 context->hi |= value & 7;
258 static inline void context_set_domain_id(struct context_entry *context,
261 context->hi |= (value & ((1 << 16) - 1)) << 8;
264 static inline int context_domain_id(struct context_entry *c)
266 return((c->hi >> 8) & 0xffff);
269 static inline void context_clear_entry(struct context_entry *context)
276 * This domain is a statically identity mapping domain.
277 * 1. This domain creats a static 1:1 mapping to all usable memory.
278 * 2. It maps to each iommu if successful.
279 * 3. Each iommu mapps to this domain if successful.
281 static struct dmar_domain *si_domain;
282 static int hw_pass_through = 1;
284 #define for_each_domain_iommu(idx, domain) \
285 for (idx = 0; idx < g_num_of_iommus; idx++) \
286 if (domain->iommu_refcnt[idx])
288 struct dmar_rmrr_unit {
289 struct list_head list; /* list of rmrr units */
290 struct acpi_dmar_header *hdr; /* ACPI header */
291 u64 base_address; /* reserved base address*/
292 u64 end_address; /* reserved end address */
293 struct dmar_dev_scope *devices; /* target devices */
294 int devices_cnt; /* target device count */
297 struct dmar_atsr_unit {
298 struct list_head list; /* list of ATSR units */
299 struct acpi_dmar_header *hdr; /* ACPI header */
300 struct dmar_dev_scope *devices; /* target devices */
301 int devices_cnt; /* target device count */
302 u8 include_all:1; /* include all ports */
305 struct dmar_satc_unit {
306 struct list_head list; /* list of SATC units */
307 struct acpi_dmar_header *hdr; /* ACPI header */
308 struct dmar_dev_scope *devices; /* target devices */
309 struct intel_iommu *iommu; /* the corresponding iommu */
310 int devices_cnt; /* target device count */
311 u8 atc_required:1; /* ATS is required */
314 static LIST_HEAD(dmar_atsr_units);
315 static LIST_HEAD(dmar_rmrr_units);
316 static LIST_HEAD(dmar_satc_units);
318 #define for_each_rmrr_units(rmrr) \
319 list_for_each_entry(rmrr, &dmar_rmrr_units, list)
321 /* bitmap for indexing intel_iommus */
322 static int g_num_of_iommus;
324 static void domain_exit(struct dmar_domain *domain);
325 static void domain_remove_dev_info(struct dmar_domain *domain);
326 static void dmar_remove_one_dev_info(struct device *dev);
327 static void __dmar_remove_one_dev_info(struct device_domain_info *info);
328 static int intel_iommu_attach_device(struct iommu_domain *domain,
330 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
333 int dmar_disabled = !IS_ENABLED(CONFIG_INTEL_IOMMU_DEFAULT_ON);
334 int intel_iommu_sm = IS_ENABLED(CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON);
336 int intel_iommu_enabled = 0;
337 EXPORT_SYMBOL_GPL(intel_iommu_enabled);
339 static int dmar_map_gfx = 1;
340 static int intel_iommu_superpage = 1;
341 static int iommu_identity_mapping;
342 static int iommu_skip_te_disable;
344 #define IDENTMAP_GFX 2
345 #define IDENTMAP_AZALIA 4
347 int intel_iommu_gfx_mapped;
348 EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
350 #define DEFER_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-2))
351 struct device_domain_info *get_domain_info(struct device *dev)
353 struct device_domain_info *info;
358 info = dev_iommu_priv_get(dev);
359 if (unlikely(info == DEFER_DEVICE_DOMAIN_INFO))
365 DEFINE_SPINLOCK(device_domain_lock);
366 static LIST_HEAD(device_domain_list);
369 * Iterate over elements in device_domain_list and call the specified
370 * callback @fn against each element.
372 int for_each_device_domain(int (*fn)(struct device_domain_info *info,
373 void *data), void *data)
377 struct device_domain_info *info;
379 spin_lock_irqsave(&device_domain_lock, flags);
380 list_for_each_entry(info, &device_domain_list, global) {
381 ret = fn(info, data);
383 spin_unlock_irqrestore(&device_domain_lock, flags);
387 spin_unlock_irqrestore(&device_domain_lock, flags);
392 const struct iommu_ops intel_iommu_ops;
394 static bool translation_pre_enabled(struct intel_iommu *iommu)
396 return (iommu->flags & VTD_FLAG_TRANS_PRE_ENABLED);
399 static void clear_translation_pre_enabled(struct intel_iommu *iommu)
401 iommu->flags &= ~VTD_FLAG_TRANS_PRE_ENABLED;
404 static void init_translation_status(struct intel_iommu *iommu)
408 gsts = readl(iommu->reg + DMAR_GSTS_REG);
409 if (gsts & DMA_GSTS_TES)
410 iommu->flags |= VTD_FLAG_TRANS_PRE_ENABLED;
413 static int __init intel_iommu_setup(char *str)
419 if (!strncmp(str, "on", 2)) {
421 pr_info("IOMMU enabled\n");
422 } else if (!strncmp(str, "off", 3)) {
424 no_platform_optin = 1;
425 pr_info("IOMMU disabled\n");
426 } else if (!strncmp(str, "igfx_off", 8)) {
428 pr_info("Disable GFX device mapping\n");
429 } else if (!strncmp(str, "forcedac", 8)) {
430 pr_warn("intel_iommu=forcedac deprecated; use iommu.forcedac instead\n");
431 iommu_dma_forcedac = true;
432 } else if (!strncmp(str, "strict", 6)) {
433 pr_warn("intel_iommu=strict deprecated; use iommu.strict=1 instead\n");
434 iommu_set_dma_strict();
435 } else if (!strncmp(str, "sp_off", 6)) {
436 pr_info("Disable supported super page\n");
437 intel_iommu_superpage = 0;
438 } else if (!strncmp(str, "sm_on", 5)) {
439 pr_info("Enable scalable mode if hardware supports\n");
441 } else if (!strncmp(str, "sm_off", 6)) {
442 pr_info("Scalable mode is disallowed\n");
444 } else if (!strncmp(str, "tboot_noforce", 13)) {
445 pr_info("Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n");
446 intel_iommu_tboot_noforce = 1;
448 pr_notice("Unknown option - '%s'\n", str);
451 str += strcspn(str, ",");
458 __setup("intel_iommu=", intel_iommu_setup);
460 static struct kmem_cache *iommu_domain_cache;
461 static struct kmem_cache *iommu_devinfo_cache;
463 static struct dmar_domain* get_iommu_domain(struct intel_iommu *iommu, u16 did)
465 struct dmar_domain **domains;
468 domains = iommu->domains[idx];
472 return domains[did & 0xff];
475 static void set_iommu_domain(struct intel_iommu *iommu, u16 did,
476 struct dmar_domain *domain)
478 struct dmar_domain **domains;
481 if (!iommu->domains[idx]) {
482 size_t size = 256 * sizeof(struct dmar_domain *);
483 iommu->domains[idx] = kzalloc(size, GFP_ATOMIC);
486 domains = iommu->domains[idx];
487 if (WARN_ON(!domains))
490 domains[did & 0xff] = domain;
493 void *alloc_pgtable_page(int node)
498 page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
500 vaddr = page_address(page);
504 void free_pgtable_page(void *vaddr)
506 free_page((unsigned long)vaddr);
509 static inline void *alloc_domain_mem(void)
511 return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
514 static void free_domain_mem(void *vaddr)
516 kmem_cache_free(iommu_domain_cache, vaddr);
519 static inline void * alloc_devinfo_mem(void)
521 return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
524 static inline void free_devinfo_mem(void *vaddr)
526 kmem_cache_free(iommu_devinfo_cache, vaddr);
529 static inline int domain_type_is_si(struct dmar_domain *domain)
531 return domain->domain.type == IOMMU_DOMAIN_IDENTITY;
534 static inline bool domain_use_first_level(struct dmar_domain *domain)
536 return domain->flags & DOMAIN_FLAG_USE_FIRST_LEVEL;
539 static inline int domain_pfn_supported(struct dmar_domain *domain,
542 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
544 return !(addr_width < BITS_PER_LONG && pfn >> addr_width);
547 static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
552 sagaw = cap_sagaw(iommu->cap);
553 for (agaw = width_to_agaw(max_gaw);
555 if (test_bit(agaw, &sagaw))
563 * Calculate max SAGAW for each iommu.
565 int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
567 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
571 * calculate agaw for each iommu.
572 * "SAGAW" may be different across iommus, use a default agaw, and
573 * get a supported less agaw for iommus that don't support the default agaw.
575 int iommu_calculate_agaw(struct intel_iommu *iommu)
577 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
580 /* This functionin only returns single iommu in a domain */
581 struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
585 /* si_domain and vm domain should not get here. */
586 if (WARN_ON(!iommu_is_dma_domain(&domain->domain)))
589 for_each_domain_iommu(iommu_id, domain)
592 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
595 return g_iommus[iommu_id];
598 static inline bool iommu_paging_structure_coherency(struct intel_iommu *iommu)
600 return sm_supported(iommu) ?
601 ecap_smpwc(iommu->ecap) : ecap_coherent(iommu->ecap);
604 static void domain_update_iommu_coherency(struct dmar_domain *domain)
606 struct dmar_drhd_unit *drhd;
607 struct intel_iommu *iommu;
611 domain->iommu_coherency = true;
613 for_each_domain_iommu(i, domain) {
615 if (!iommu_paging_structure_coherency(g_iommus[i])) {
616 domain->iommu_coherency = false;
623 /* No hardware attached; use lowest common denominator */
625 for_each_active_iommu(iommu, drhd) {
626 if (!iommu_paging_structure_coherency(iommu)) {
627 domain->iommu_coherency = false;
634 static bool domain_update_iommu_snooping(struct intel_iommu *skip)
636 struct dmar_drhd_unit *drhd;
637 struct intel_iommu *iommu;
641 for_each_active_iommu(iommu, drhd) {
644 * If the hardware is operating in the scalable mode,
645 * the snooping control is always supported since we
646 * always set PASID-table-entry.PGSNP bit if the domain
647 * is managed outside (UNMANAGED).
649 if (!sm_supported(iommu) &&
650 !ecap_sc_support(iommu->ecap)) {
661 static int domain_update_iommu_superpage(struct dmar_domain *domain,
662 struct intel_iommu *skip)
664 struct dmar_drhd_unit *drhd;
665 struct intel_iommu *iommu;
668 if (!intel_iommu_superpage)
671 /* set iommu_superpage to the smallest common denominator */
673 for_each_active_iommu(iommu, drhd) {
675 if (domain && domain_use_first_level(domain)) {
676 if (!cap_fl1gp_support(iommu->cap))
679 mask &= cap_super_page_val(iommu->cap);
691 static int domain_update_device_node(struct dmar_domain *domain)
693 struct device_domain_info *info;
694 int nid = NUMA_NO_NODE;
696 assert_spin_locked(&device_domain_lock);
698 if (list_empty(&domain->devices))
701 list_for_each_entry(info, &domain->devices, link) {
706 * There could possibly be multiple device numa nodes as devices
707 * within the same domain may sit behind different IOMMUs. There
708 * isn't perfect answer in such situation, so we select first
709 * come first served policy.
711 nid = dev_to_node(info->dev);
712 if (nid != NUMA_NO_NODE)
719 static void domain_update_iotlb(struct dmar_domain *domain);
721 /* Return the super pagesize bitmap if supported. */
722 static unsigned long domain_super_pgsize_bitmap(struct dmar_domain *domain)
724 unsigned long bitmap = 0;
727 * 1-level super page supports page size of 2MiB, 2-level super page
728 * supports page size of both 2MiB and 1GiB.
730 if (domain->iommu_superpage == 1)
732 else if (domain->iommu_superpage == 2)
733 bitmap |= SZ_2M | SZ_1G;
738 /* Some capabilities may be different across iommus */
739 static void domain_update_iommu_cap(struct dmar_domain *domain)
741 domain_update_iommu_coherency(domain);
742 domain->iommu_snooping = domain_update_iommu_snooping(NULL);
743 domain->iommu_superpage = domain_update_iommu_superpage(domain, NULL);
746 * If RHSA is missing, we should default to the device numa domain
749 if (domain->nid == NUMA_NO_NODE)
750 domain->nid = domain_update_device_node(domain);
753 * First-level translation restricts the input-address to a
754 * canonical address (i.e., address bits 63:N have the same
755 * value as address bit [N-1], where N is 48-bits with 4-level
756 * paging and 57-bits with 5-level paging). Hence, skip bit
759 if (domain_use_first_level(domain))
760 domain->domain.geometry.aperture_end = __DOMAIN_MAX_ADDR(domain->gaw - 1);
762 domain->domain.geometry.aperture_end = __DOMAIN_MAX_ADDR(domain->gaw);
764 domain->domain.pgsize_bitmap |= domain_super_pgsize_bitmap(domain);
765 domain_update_iotlb(domain);
768 struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
771 struct root_entry *root = &iommu->root_entry[bus];
772 struct context_entry *context;
776 if (sm_supported(iommu)) {
784 context = phys_to_virt(*entry & VTD_PAGE_MASK);
786 unsigned long phy_addr;
790 context = alloc_pgtable_page(iommu->node);
794 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
795 phy_addr = virt_to_phys((void *)context);
796 *entry = phy_addr | 1;
797 __iommu_flush_cache(iommu, entry, sizeof(*entry));
799 return &context[devfn];
802 static bool attach_deferred(struct device *dev)
804 return dev_iommu_priv_get(dev) == DEFER_DEVICE_DOMAIN_INFO;
808 * is_downstream_to_pci_bridge - test if a device belongs to the PCI
809 * sub-hierarchy of a candidate PCI-PCI bridge
810 * @dev: candidate PCI device belonging to @bridge PCI sub-hierarchy
811 * @bridge: the candidate PCI-PCI bridge
813 * Return: true if @dev belongs to @bridge PCI sub-hierarchy, else false.
816 is_downstream_to_pci_bridge(struct device *dev, struct device *bridge)
818 struct pci_dev *pdev, *pbridge;
820 if (!dev_is_pci(dev) || !dev_is_pci(bridge))
823 pdev = to_pci_dev(dev);
824 pbridge = to_pci_dev(bridge);
826 if (pbridge->subordinate &&
827 pbridge->subordinate->number <= pdev->bus->number &&
828 pbridge->subordinate->busn_res.end >= pdev->bus->number)
834 static bool quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
836 struct dmar_drhd_unit *drhd;
840 /* We know that this device on this chipset has its own IOMMU.
841 * If we find it under a different IOMMU, then the BIOS is lying
842 * to us. Hope that the IOMMU for this device is actually
843 * disabled, and it needs no translation...
845 rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
848 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
853 /* we know that the this iommu should be at offset 0xa000 from vtbar */
854 drhd = dmar_find_matched_drhd_unit(pdev);
855 if (!drhd || drhd->reg_base_addr - vtbar != 0xa000) {
856 pr_warn_once(FW_BUG "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n");
857 add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
864 static bool iommu_is_dummy(struct intel_iommu *iommu, struct device *dev)
866 if (!iommu || iommu->drhd->ignored)
869 if (dev_is_pci(dev)) {
870 struct pci_dev *pdev = to_pci_dev(dev);
872 if (pdev->vendor == PCI_VENDOR_ID_INTEL &&
873 pdev->device == PCI_DEVICE_ID_INTEL_IOAT_SNB &&
874 quirk_ioat_snb_local_iommu(pdev))
881 struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
883 struct dmar_drhd_unit *drhd = NULL;
884 struct pci_dev *pdev = NULL;
885 struct intel_iommu *iommu;
893 if (dev_is_pci(dev)) {
894 struct pci_dev *pf_pdev;
896 pdev = pci_real_dma_dev(to_pci_dev(dev));
898 /* VFs aren't listed in scope tables; we need to look up
899 * the PF instead to find the IOMMU. */
900 pf_pdev = pci_physfn(pdev);
902 segment = pci_domain_nr(pdev->bus);
903 } else if (has_acpi_companion(dev))
904 dev = &ACPI_COMPANION(dev)->dev;
907 for_each_iommu(iommu, drhd) {
908 if (pdev && segment != drhd->segment)
911 for_each_active_dev_scope(drhd->devices,
912 drhd->devices_cnt, i, tmp) {
914 /* For a VF use its original BDF# not that of the PF
915 * which we used for the IOMMU lookup. Strictly speaking
916 * we could do this for all PCI devices; we only need to
917 * get the BDF# from the scope table for ACPI matches. */
918 if (pdev && pdev->is_virtfn)
922 *bus = drhd->devices[i].bus;
923 *devfn = drhd->devices[i].devfn;
928 if (is_downstream_to_pci_bridge(dev, tmp))
932 if (pdev && drhd->include_all) {
935 *bus = pdev->bus->number;
936 *devfn = pdev->devfn;
943 if (iommu_is_dummy(iommu, dev))
951 static void domain_flush_cache(struct dmar_domain *domain,
952 void *addr, int size)
954 if (!domain->iommu_coherency)
955 clflush_cache_range(addr, size);
958 static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
960 struct context_entry *context;
964 spin_lock_irqsave(&iommu->lock, flags);
965 context = iommu_context_addr(iommu, bus, devfn, 0);
967 ret = context_present(context);
968 spin_unlock_irqrestore(&iommu->lock, flags);
972 static void free_context_table(struct intel_iommu *iommu)
976 struct context_entry *context;
978 spin_lock_irqsave(&iommu->lock, flags);
979 if (!iommu->root_entry) {
982 for (i = 0; i < ROOT_ENTRY_NR; i++) {
983 context = iommu_context_addr(iommu, i, 0, 0);
985 free_pgtable_page(context);
987 if (!sm_supported(iommu))
990 context = iommu_context_addr(iommu, i, 0x80, 0);
992 free_pgtable_page(context);
995 free_pgtable_page(iommu->root_entry);
996 iommu->root_entry = NULL;
998 spin_unlock_irqrestore(&iommu->lock, flags);
1001 #ifdef CONFIG_DMAR_DEBUG
1002 static void pgtable_walk(struct intel_iommu *iommu, unsigned long pfn, u8 bus, u8 devfn)
1004 struct device_domain_info *info;
1005 struct dma_pte *parent, *pte;
1006 struct dmar_domain *domain;
1009 info = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn);
1010 if (!info || !info->domain) {
1011 pr_info("device [%02x:%02x.%d] not probed\n",
1012 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1016 domain = info->domain;
1017 level = agaw_to_level(domain->agaw);
1018 parent = domain->pgd;
1020 pr_info("no page table setup\n");
1025 offset = pfn_level_offset(pfn, level);
1026 pte = &parent[offset];
1027 if (!pte || (dma_pte_superpage(pte) || !dma_pte_present(pte))) {
1028 pr_info("PTE not present at level %d\n", level);
1032 pr_info("pte level: %d, pte value: 0x%016llx\n", level, pte->val);
1037 parent = phys_to_virt(dma_pte_addr(pte));
1042 void dmar_fault_dump_ptes(struct intel_iommu *iommu, u16 source_id,
1043 unsigned long long addr, u32 pasid)
1045 struct pasid_dir_entry *dir, *pde;
1046 struct pasid_entry *entries, *pte;
1047 struct context_entry *ctx_entry;
1048 struct root_entry *rt_entry;
1049 u8 devfn = source_id & 0xff;
1050 u8 bus = source_id >> 8;
1051 int i, dir_index, index;
1053 pr_info("Dump %s table entries for IOVA 0x%llx\n", iommu->name, addr);
1055 /* root entry dump */
1056 rt_entry = &iommu->root_entry[bus];
1058 pr_info("root table entry is not present\n");
1062 if (sm_supported(iommu))
1063 pr_info("scalable mode root entry: hi 0x%016llx, low 0x%016llx\n",
1064 rt_entry->hi, rt_entry->lo);
1066 pr_info("root entry: 0x%016llx", rt_entry->lo);
1068 /* context entry dump */
1069 ctx_entry = iommu_context_addr(iommu, bus, devfn, 0);
1071 pr_info("context table entry is not present\n");
1075 pr_info("context entry: hi 0x%016llx, low 0x%016llx\n",
1076 ctx_entry->hi, ctx_entry->lo);
1078 /* legacy mode does not require PASID entries */
1079 if (!sm_supported(iommu))
1082 /* get the pointer to pasid directory entry */
1083 dir = phys_to_virt(ctx_entry->lo & VTD_PAGE_MASK);
1085 pr_info("pasid directory entry is not present\n");
1088 /* For request-without-pasid, get the pasid from context entry */
1089 if (intel_iommu_sm && pasid == INVALID_IOASID)
1090 pasid = PASID_RID2PASID;
1092 dir_index = pasid >> PASID_PDE_SHIFT;
1093 pde = &dir[dir_index];
1094 pr_info("pasid dir entry: 0x%016llx\n", pde->val);
1096 /* get the pointer to the pasid table entry */
1097 entries = get_pasid_table_from_pde(pde);
1099 pr_info("pasid table entry is not present\n");
1102 index = pasid & PASID_PTE_MASK;
1103 pte = &entries[index];
1104 for (i = 0; i < ARRAY_SIZE(pte->val); i++)
1105 pr_info("pasid table entry[%d]: 0x%016llx\n", i, pte->val[i]);
1108 pgtable_walk(iommu, addr >> VTD_PAGE_SHIFT, bus, devfn);
1112 static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
1113 unsigned long pfn, int *target_level)
1115 struct dma_pte *parent, *pte;
1116 int level = agaw_to_level(domain->agaw);
1119 BUG_ON(!domain->pgd);
1121 if (!domain_pfn_supported(domain, pfn))
1122 /* Address beyond IOMMU's addressing capabilities. */
1125 parent = domain->pgd;
1130 offset = pfn_level_offset(pfn, level);
1131 pte = &parent[offset];
1132 if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
1134 if (level == *target_level)
1137 if (!dma_pte_present(pte)) {
1140 tmp_page = alloc_pgtable_page(domain->nid);
1145 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
1146 pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
1147 if (domain_use_first_level(domain)) {
1148 pteval |= DMA_FL_PTE_XD | DMA_FL_PTE_US;
1149 if (iommu_is_dma_domain(&domain->domain))
1150 pteval |= DMA_FL_PTE_ACCESS;
1152 if (cmpxchg64(&pte->val, 0ULL, pteval))
1153 /* Someone else set it while we were thinking; use theirs. */
1154 free_pgtable_page(tmp_page);
1156 domain_flush_cache(domain, pte, sizeof(*pte));
1161 parent = phys_to_virt(dma_pte_addr(pte));
1166 *target_level = level;
1171 /* return address's pte at specific level */
1172 static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
1174 int level, int *large_page)
1176 struct dma_pte *parent, *pte;
1177 int total = agaw_to_level(domain->agaw);
1180 parent = domain->pgd;
1181 while (level <= total) {
1182 offset = pfn_level_offset(pfn, total);
1183 pte = &parent[offset];
1187 if (!dma_pte_present(pte)) {
1188 *large_page = total;
1192 if (dma_pte_superpage(pte)) {
1193 *large_page = total;
1197 parent = phys_to_virt(dma_pte_addr(pte));
1203 /* clear last level pte, a tlb flush should be followed */
1204 static void dma_pte_clear_range(struct dmar_domain *domain,
1205 unsigned long start_pfn,
1206 unsigned long last_pfn)
1208 unsigned int large_page;
1209 struct dma_pte *first_pte, *pte;
1211 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1212 BUG_ON(!domain_pfn_supported(domain, last_pfn));
1213 BUG_ON(start_pfn > last_pfn);
1215 /* we don't need lock here; nobody else touches the iova range */
1218 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
1220 start_pfn = align_to_level(start_pfn + 1, large_page + 1);
1225 start_pfn += lvl_to_nr_pages(large_page);
1227 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
1229 domain_flush_cache(domain, first_pte,
1230 (void *)pte - (void *)first_pte);
1232 } while (start_pfn && start_pfn <= last_pfn);
1235 static void dma_pte_free_level(struct dmar_domain *domain, int level,
1236 int retain_level, struct dma_pte *pte,
1237 unsigned long pfn, unsigned long start_pfn,
1238 unsigned long last_pfn)
1240 pfn = max(start_pfn, pfn);
1241 pte = &pte[pfn_level_offset(pfn, level)];
1244 unsigned long level_pfn;
1245 struct dma_pte *level_pte;
1247 if (!dma_pte_present(pte) || dma_pte_superpage(pte))
1250 level_pfn = pfn & level_mask(level);
1251 level_pte = phys_to_virt(dma_pte_addr(pte));
1254 dma_pte_free_level(domain, level - 1, retain_level,
1255 level_pte, level_pfn, start_pfn,
1260 * Free the page table if we're below the level we want to
1261 * retain and the range covers the entire table.
1263 if (level < retain_level && !(start_pfn > level_pfn ||
1264 last_pfn < level_pfn + level_size(level) - 1)) {
1266 domain_flush_cache(domain, pte, sizeof(*pte));
1267 free_pgtable_page(level_pte);
1270 pfn += level_size(level);
1271 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1275 * clear last level (leaf) ptes and free page table pages below the
1276 * level we wish to keep intact.
1278 static void dma_pte_free_pagetable(struct dmar_domain *domain,
1279 unsigned long start_pfn,
1280 unsigned long last_pfn,
1283 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1284 BUG_ON(!domain_pfn_supported(domain, last_pfn));
1285 BUG_ON(start_pfn > last_pfn);
1287 dma_pte_clear_range(domain, start_pfn, last_pfn);
1289 /* We don't need lock here; nobody else touches the iova range */
1290 dma_pte_free_level(domain, agaw_to_level(domain->agaw), retain_level,
1291 domain->pgd, 0, start_pfn, last_pfn);
1294 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1295 free_pgtable_page(domain->pgd);
1300 /* When a page at a given level is being unlinked from its parent, we don't
1301 need to *modify* it at all. All we need to do is make a list of all the
1302 pages which can be freed just as soon as we've flushed the IOTLB and we
1303 know the hardware page-walk will no longer touch them.
1304 The 'pte' argument is the *parent* PTE, pointing to the page that is to
1306 static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
1307 int level, struct dma_pte *pte,
1308 struct page *freelist)
1312 pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
1313 pg->freelist = freelist;
1319 pte = page_address(pg);
1321 if (dma_pte_present(pte) && !dma_pte_superpage(pte))
1322 freelist = dma_pte_list_pagetables(domain, level - 1,
1325 } while (!first_pte_in_page(pte));
1330 static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
1331 struct dma_pte *pte, unsigned long pfn,
1332 unsigned long start_pfn,
1333 unsigned long last_pfn,
1334 struct page *freelist)
1336 struct dma_pte *first_pte = NULL, *last_pte = NULL;
1338 pfn = max(start_pfn, pfn);
1339 pte = &pte[pfn_level_offset(pfn, level)];
1342 unsigned long level_pfn;
1344 if (!dma_pte_present(pte))
1347 level_pfn = pfn & level_mask(level);
1349 /* If range covers entire pagetable, free it */
1350 if (start_pfn <= level_pfn &&
1351 last_pfn >= level_pfn + level_size(level) - 1) {
1352 /* These suborbinate page tables are going away entirely. Don't
1353 bother to clear them; we're just going to *free* them. */
1354 if (level > 1 && !dma_pte_superpage(pte))
1355 freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1361 } else if (level > 1) {
1362 /* Recurse down into a level that isn't *entirely* obsolete */
1363 freelist = dma_pte_clear_level(domain, level - 1,
1364 phys_to_virt(dma_pte_addr(pte)),
1365 level_pfn, start_pfn, last_pfn,
1369 pfn += level_size(level);
1370 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1373 domain_flush_cache(domain, first_pte,
1374 (void *)++last_pte - (void *)first_pte);
1379 /* We can't just free the pages because the IOMMU may still be walking
1380 the page tables, and may have cached the intermediate levels. The
1381 pages can only be freed after the IOTLB flush has been done. */
1382 static struct page *domain_unmap(struct dmar_domain *domain,
1383 unsigned long start_pfn,
1384 unsigned long last_pfn,
1385 struct page *freelist)
1387 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1388 BUG_ON(!domain_pfn_supported(domain, last_pfn));
1389 BUG_ON(start_pfn > last_pfn);
1391 /* we don't need lock here; nobody else touches the iova range */
1392 freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
1393 domain->pgd, 0, start_pfn, last_pfn,
1397 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1398 struct page *pgd_page = virt_to_page(domain->pgd);
1399 pgd_page->freelist = freelist;
1400 freelist = pgd_page;
1408 static void dma_free_pagelist(struct page *freelist)
1412 while ((pg = freelist)) {
1413 freelist = pg->freelist;
1414 free_pgtable_page(page_address(pg));
1418 /* iommu handling */
1419 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
1421 struct root_entry *root;
1422 unsigned long flags;
1424 root = (struct root_entry *)alloc_pgtable_page(iommu->node);
1426 pr_err("Allocating root entry for %s failed\n",
1431 __iommu_flush_cache(iommu, root, ROOT_SIZE);
1433 spin_lock_irqsave(&iommu->lock, flags);
1434 iommu->root_entry = root;
1435 spin_unlock_irqrestore(&iommu->lock, flags);
1440 static void iommu_set_root_entry(struct intel_iommu *iommu)
1446 addr = virt_to_phys(iommu->root_entry);
1447 if (sm_supported(iommu))
1448 addr |= DMA_RTADDR_SMT;
1450 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1451 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr);
1453 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
1455 /* Make sure hardware complete it */
1456 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1457 readl, (sts & DMA_GSTS_RTPS), sts);
1459 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1461 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
1462 if (sm_supported(iommu))
1463 qi_flush_pasid_cache(iommu, 0, QI_PC_GLOBAL, 0);
1464 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
1467 void iommu_flush_write_buffer(struct intel_iommu *iommu)
1472 if (!rwbf_quirk && !cap_rwbf(iommu->cap))
1475 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1476 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
1478 /* Make sure hardware complete it */
1479 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1480 readl, (!(val & DMA_GSTS_WBFS)), val);
1482 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1485 /* return value determine if we need a write buffer flush */
1486 static void __iommu_flush_context(struct intel_iommu *iommu,
1487 u16 did, u16 source_id, u8 function_mask,
1494 case DMA_CCMD_GLOBAL_INVL:
1495 val = DMA_CCMD_GLOBAL_INVL;
1497 case DMA_CCMD_DOMAIN_INVL:
1498 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1500 case DMA_CCMD_DEVICE_INVL:
1501 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1502 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1507 val |= DMA_CCMD_ICC;
1509 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1510 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1512 /* Make sure hardware complete it */
1513 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1514 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1516 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1519 /* return value determine if we need a write buffer flush */
1520 static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1521 u64 addr, unsigned int size_order, u64 type)
1523 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1524 u64 val = 0, val_iva = 0;
1528 case DMA_TLB_GLOBAL_FLUSH:
1529 /* global flush doesn't need set IVA_REG */
1530 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1532 case DMA_TLB_DSI_FLUSH:
1533 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1535 case DMA_TLB_PSI_FLUSH:
1536 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1537 /* IH bit is passed in as part of address */
1538 val_iva = size_order | addr;
1543 /* Note: set drain read/write */
1546 * This is probably to be super secure.. Looks like we can
1547 * ignore it without any impact.
1549 if (cap_read_drain(iommu->cap))
1550 val |= DMA_TLB_READ_DRAIN;
1552 if (cap_write_drain(iommu->cap))
1553 val |= DMA_TLB_WRITE_DRAIN;
1555 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1556 /* Note: Only uses first TLB reg currently */
1558 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1559 dmar_writeq(iommu->reg + tlb_offset + 8, val);
1561 /* Make sure hardware complete it */
1562 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1563 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1565 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1567 /* check IOTLB invalidation granularity */
1568 if (DMA_TLB_IAIG(val) == 0)
1569 pr_err("Flush IOTLB failed\n");
1570 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1571 pr_debug("TLB flush request %Lx, actual %Lx\n",
1572 (unsigned long long)DMA_TLB_IIRG(type),
1573 (unsigned long long)DMA_TLB_IAIG(val));
1576 static struct device_domain_info *
1577 iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
1580 struct device_domain_info *info;
1582 assert_spin_locked(&device_domain_lock);
1587 list_for_each_entry(info, &domain->devices, link)
1588 if (info->iommu == iommu && info->bus == bus &&
1589 info->devfn == devfn) {
1590 if (info->ats_supported && info->dev)
1598 static void domain_update_iotlb(struct dmar_domain *domain)
1600 struct device_domain_info *info;
1601 bool has_iotlb_device = false;
1603 assert_spin_locked(&device_domain_lock);
1605 list_for_each_entry(info, &domain->devices, link)
1606 if (info->ats_enabled) {
1607 has_iotlb_device = true;
1611 if (!has_iotlb_device) {
1612 struct subdev_domain_info *sinfo;
1614 list_for_each_entry(sinfo, &domain->subdevices, link_domain) {
1615 info = get_domain_info(sinfo->pdev);
1616 if (info && info->ats_enabled) {
1617 has_iotlb_device = true;
1623 domain->has_iotlb_device = has_iotlb_device;
1626 static void iommu_enable_dev_iotlb(struct device_domain_info *info)
1628 struct pci_dev *pdev;
1630 assert_spin_locked(&device_domain_lock);
1632 if (!info || !dev_is_pci(info->dev))
1635 pdev = to_pci_dev(info->dev);
1636 /* For IOMMU that supports device IOTLB throttling (DIT), we assign
1637 * PFSID to the invalidation desc of a VF such that IOMMU HW can gauge
1638 * queue depth at PF level. If DIT is not set, PFSID will be treated as
1639 * reserved, which should be set to 0.
1641 if (!ecap_dit(info->iommu->ecap))
1644 struct pci_dev *pf_pdev;
1646 /* pdev will be returned if device is not a vf */
1647 pf_pdev = pci_physfn(pdev);
1648 info->pfsid = pci_dev_id(pf_pdev);
1651 #ifdef CONFIG_INTEL_IOMMU_SVM
1652 /* The PCIe spec, in its wisdom, declares that the behaviour of
1653 the device if you enable PASID support after ATS support is
1654 undefined. So always enable PASID support on devices which
1655 have it, even if we can't yet know if we're ever going to
1657 if (info->pasid_supported && !pci_enable_pasid(pdev, info->pasid_supported & ~1))
1658 info->pasid_enabled = 1;
1660 if (info->pri_supported &&
1661 (info->pasid_enabled ? pci_prg_resp_pasid_required(pdev) : 1) &&
1662 !pci_reset_pri(pdev) && !pci_enable_pri(pdev, PRQ_DEPTH))
1663 info->pri_enabled = 1;
1665 if (info->ats_supported && pci_ats_page_aligned(pdev) &&
1666 !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) {
1667 info->ats_enabled = 1;
1668 domain_update_iotlb(info->domain);
1669 info->ats_qdep = pci_ats_queue_depth(pdev);
1673 static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1675 struct pci_dev *pdev;
1677 assert_spin_locked(&device_domain_lock);
1679 if (!dev_is_pci(info->dev))
1682 pdev = to_pci_dev(info->dev);
1684 if (info->ats_enabled) {
1685 pci_disable_ats(pdev);
1686 info->ats_enabled = 0;
1687 domain_update_iotlb(info->domain);
1689 #ifdef CONFIG_INTEL_IOMMU_SVM
1690 if (info->pri_enabled) {
1691 pci_disable_pri(pdev);
1692 info->pri_enabled = 0;
1694 if (info->pasid_enabled) {
1695 pci_disable_pasid(pdev);
1696 info->pasid_enabled = 0;
1701 static void __iommu_flush_dev_iotlb(struct device_domain_info *info,
1702 u64 addr, unsigned int mask)
1706 if (!info || !info->ats_enabled)
1709 sid = info->bus << 8 | info->devfn;
1710 qdep = info->ats_qdep;
1711 qi_flush_dev_iotlb(info->iommu, sid, info->pfsid,
1715 static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1716 u64 addr, unsigned mask)
1718 unsigned long flags;
1719 struct device_domain_info *info;
1720 struct subdev_domain_info *sinfo;
1722 if (!domain->has_iotlb_device)
1725 spin_lock_irqsave(&device_domain_lock, flags);
1726 list_for_each_entry(info, &domain->devices, link)
1727 __iommu_flush_dev_iotlb(info, addr, mask);
1729 list_for_each_entry(sinfo, &domain->subdevices, link_domain) {
1730 info = get_domain_info(sinfo->pdev);
1731 __iommu_flush_dev_iotlb(info, addr, mask);
1733 spin_unlock_irqrestore(&device_domain_lock, flags);
1736 static void domain_flush_piotlb(struct intel_iommu *iommu,
1737 struct dmar_domain *domain,
1738 u64 addr, unsigned long npages, bool ih)
1740 u16 did = domain->iommu_did[iommu->seq_id];
1742 if (domain->default_pasid)
1743 qi_flush_piotlb(iommu, did, domain->default_pasid,
1746 if (!list_empty(&domain->devices))
1747 qi_flush_piotlb(iommu, did, PASID_RID2PASID, addr, npages, ih);
1750 static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
1751 struct dmar_domain *domain,
1752 unsigned long pfn, unsigned int pages,
1755 unsigned int mask = ilog2(__roundup_pow_of_two(pages));
1756 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
1757 u16 did = domain->iommu_did[iommu->seq_id];
1764 if (domain_use_first_level(domain)) {
1765 domain_flush_piotlb(iommu, domain, addr, pages, ih);
1768 * Fallback to domain selective flush if no PSI support or
1769 * the size is too big. PSI requires page size to be 2 ^ x,
1770 * and the base address is naturally aligned to the size.
1772 if (!cap_pgsel_inv(iommu->cap) ||
1773 mask > cap_max_amask_val(iommu->cap))
1774 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1777 iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
1782 * In caching mode, changes of pages from non-present to present require
1783 * flush. However, device IOTLB doesn't need to be flushed in this case.
1785 if (!cap_caching_mode(iommu->cap) || !map)
1786 iommu_flush_dev_iotlb(domain, addr, mask);
1789 /* Notification for newly created mappings */
1790 static inline void __mapping_notify_one(struct intel_iommu *iommu,
1791 struct dmar_domain *domain,
1792 unsigned long pfn, unsigned int pages)
1795 * It's a non-present to present mapping. Only flush if caching mode
1798 if (cap_caching_mode(iommu->cap) && !domain_use_first_level(domain))
1799 iommu_flush_iotlb_psi(iommu, domain, pfn, pages, 0, 1);
1801 iommu_flush_write_buffer(iommu);
1804 static void intel_flush_iotlb_all(struct iommu_domain *domain)
1806 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
1809 for_each_domain_iommu(idx, dmar_domain) {
1810 struct intel_iommu *iommu = g_iommus[idx];
1811 u16 did = dmar_domain->iommu_did[iommu->seq_id];
1813 if (domain_use_first_level(dmar_domain))
1814 domain_flush_piotlb(iommu, dmar_domain, 0, -1, 0);
1816 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1819 if (!cap_caching_mode(iommu->cap))
1820 iommu_flush_dev_iotlb(get_iommu_domain(iommu, did),
1821 0, MAX_AGAW_PFN_WIDTH);
1825 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1828 unsigned long flags;
1830 if (!cap_plmr(iommu->cap) && !cap_phmr(iommu->cap))
1833 raw_spin_lock_irqsave(&iommu->register_lock, flags);
1834 pmen = readl(iommu->reg + DMAR_PMEN_REG);
1835 pmen &= ~DMA_PMEN_EPM;
1836 writel(pmen, iommu->reg + DMAR_PMEN_REG);
1838 /* wait for the protected region status bit to clear */
1839 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1840 readl, !(pmen & DMA_PMEN_PRS), pmen);
1842 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1845 static void iommu_enable_translation(struct intel_iommu *iommu)
1848 unsigned long flags;
1850 raw_spin_lock_irqsave(&iommu->register_lock, flags);
1851 iommu->gcmd |= DMA_GCMD_TE;
1852 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1854 /* Make sure hardware complete it */
1855 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1856 readl, (sts & DMA_GSTS_TES), sts);
1858 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1861 static void iommu_disable_translation(struct intel_iommu *iommu)
1866 if (iommu_skip_te_disable && iommu->drhd->gfx_dedicated &&
1867 (cap_read_drain(iommu->cap) || cap_write_drain(iommu->cap)))
1870 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1871 iommu->gcmd &= ~DMA_GCMD_TE;
1872 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1874 /* Make sure hardware complete it */
1875 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1876 readl, (!(sts & DMA_GSTS_TES)), sts);
1878 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1881 static int iommu_init_domains(struct intel_iommu *iommu)
1883 u32 ndomains, nlongs;
1886 ndomains = cap_ndoms(iommu->cap);
1887 pr_debug("%s: Number of Domains supported <%d>\n",
1888 iommu->name, ndomains);
1889 nlongs = BITS_TO_LONGS(ndomains);
1891 spin_lock_init(&iommu->lock);
1893 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1894 if (!iommu->domain_ids)
1897 size = (ALIGN(ndomains, 256) >> 8) * sizeof(struct dmar_domain **);
1898 iommu->domains = kzalloc(size, GFP_KERNEL);
1900 if (iommu->domains) {
1901 size = 256 * sizeof(struct dmar_domain *);
1902 iommu->domains[0] = kzalloc(size, GFP_KERNEL);
1905 if (!iommu->domains || !iommu->domains[0]) {
1906 pr_err("%s: Allocating domain array failed\n",
1908 kfree(iommu->domain_ids);
1909 kfree(iommu->domains);
1910 iommu->domain_ids = NULL;
1911 iommu->domains = NULL;
1916 * If Caching mode is set, then invalid translations are tagged
1917 * with domain-id 0, hence we need to pre-allocate it. We also
1918 * use domain-id 0 as a marker for non-allocated domain-id, so
1919 * make sure it is not used for a real domain.
1921 set_bit(0, iommu->domain_ids);
1924 * Vt-d spec rev3.0 (section 6.2.3.1) requires that each pasid
1925 * entry for first-level or pass-through translation modes should
1926 * be programmed with a domain id different from those used for
1927 * second-level or nested translation. We reserve a domain id for
1930 if (sm_supported(iommu))
1931 set_bit(FLPT_DEFAULT_DID, iommu->domain_ids);
1936 static void disable_dmar_iommu(struct intel_iommu *iommu)
1938 struct device_domain_info *info, *tmp;
1939 unsigned long flags;
1941 if (!iommu->domains || !iommu->domain_ids)
1944 spin_lock_irqsave(&device_domain_lock, flags);
1945 list_for_each_entry_safe(info, tmp, &device_domain_list, global) {
1946 if (info->iommu != iommu)
1949 if (!info->dev || !info->domain)
1952 __dmar_remove_one_dev_info(info);
1954 spin_unlock_irqrestore(&device_domain_lock, flags);
1956 if (iommu->gcmd & DMA_GCMD_TE)
1957 iommu_disable_translation(iommu);
1960 static void free_dmar_iommu(struct intel_iommu *iommu)
1962 if ((iommu->domains) && (iommu->domain_ids)) {
1963 int elems = ALIGN(cap_ndoms(iommu->cap), 256) >> 8;
1966 for (i = 0; i < elems; i++)
1967 kfree(iommu->domains[i]);
1968 kfree(iommu->domains);
1969 kfree(iommu->domain_ids);
1970 iommu->domains = NULL;
1971 iommu->domain_ids = NULL;
1974 g_iommus[iommu->seq_id] = NULL;
1976 /* free context mapping */
1977 free_context_table(iommu);
1979 #ifdef CONFIG_INTEL_IOMMU_SVM
1980 if (pasid_supported(iommu)) {
1981 if (ecap_prs(iommu->ecap))
1982 intel_svm_finish_prq(iommu);
1984 if (vccap_pasid(iommu->vccap))
1985 ioasid_unregister_allocator(&iommu->pasid_allocator);
1991 * Check and return whether first level is used by default for
1994 static bool first_level_by_default(unsigned int type)
1996 /* Only SL is available in legacy mode */
1997 if (!scalable_mode_support())
2000 /* Only level (either FL or SL) is available, just use it */
2001 if (intel_cap_flts_sanity() ^ intel_cap_slts_sanity())
2002 return intel_cap_flts_sanity();
2004 /* Both levels are available, decide it based on domain type */
2005 return type != IOMMU_DOMAIN_UNMANAGED;
2008 static struct dmar_domain *alloc_domain(unsigned int type)
2010 struct dmar_domain *domain;
2012 domain = alloc_domain_mem();
2016 memset(domain, 0, sizeof(*domain));
2017 domain->nid = NUMA_NO_NODE;
2018 if (first_level_by_default(type))
2019 domain->flags |= DOMAIN_FLAG_USE_FIRST_LEVEL;
2020 domain->has_iotlb_device = false;
2021 INIT_LIST_HEAD(&domain->devices);
2022 INIT_LIST_HEAD(&domain->subdevices);
2027 /* Must be called with iommu->lock */
2028 static int domain_attach_iommu(struct dmar_domain *domain,
2029 struct intel_iommu *iommu)
2031 unsigned long ndomains;
2034 assert_spin_locked(&device_domain_lock);
2035 assert_spin_locked(&iommu->lock);
2037 domain->iommu_refcnt[iommu->seq_id] += 1;
2038 if (domain->iommu_refcnt[iommu->seq_id] == 1) {
2039 ndomains = cap_ndoms(iommu->cap);
2040 num = find_first_zero_bit(iommu->domain_ids, ndomains);
2042 if (num >= ndomains) {
2043 pr_err("%s: No free domain ids\n", iommu->name);
2044 domain->iommu_refcnt[iommu->seq_id] -= 1;
2048 set_bit(num, iommu->domain_ids);
2049 set_iommu_domain(iommu, num, domain);
2051 domain->iommu_did[iommu->seq_id] = num;
2052 domain->nid = iommu->node;
2054 domain_update_iommu_cap(domain);
2060 static void domain_detach_iommu(struct dmar_domain *domain,
2061 struct intel_iommu *iommu)
2065 assert_spin_locked(&device_domain_lock);
2066 assert_spin_locked(&iommu->lock);
2068 domain->iommu_refcnt[iommu->seq_id] -= 1;
2069 if (domain->iommu_refcnt[iommu->seq_id] == 0) {
2070 num = domain->iommu_did[iommu->seq_id];
2071 clear_bit(num, iommu->domain_ids);
2072 set_iommu_domain(iommu, num, NULL);
2074 domain_update_iommu_cap(domain);
2075 domain->iommu_did[iommu->seq_id] = 0;
2079 static inline int guestwidth_to_adjustwidth(int gaw)
2082 int r = (gaw - 12) % 9;
2093 static void domain_exit(struct dmar_domain *domain)
2096 /* Remove associated devices and clear attached or cached domains */
2097 domain_remove_dev_info(domain);
2100 struct page *freelist;
2102 freelist = domain_unmap(domain, 0,
2103 DOMAIN_MAX_PFN(domain->gaw), NULL);
2104 dma_free_pagelist(freelist);
2107 free_domain_mem(domain);
2111 * Get the PASID directory size for scalable mode context entry.
2112 * Value of X in the PDTS field of a scalable mode context entry
2113 * indicates PASID directory with 2^(X + 7) entries.
2115 static inline unsigned long context_get_sm_pds(struct pasid_table *table)
2119 max_pde = table->max_pasid >> PASID_PDE_SHIFT;
2120 pds = find_first_bit((unsigned long *)&max_pde, MAX_NR_PASID_BITS);
2128 * Set the RID_PASID field of a scalable mode context entry. The
2129 * IOMMU hardware will use the PASID value set in this field for
2130 * DMA translations of DMA requests without PASID.
2133 context_set_sm_rid2pasid(struct context_entry *context, unsigned long pasid)
2135 context->hi |= pasid & ((1 << 20) - 1);
2139 * Set the DTE(Device-TLB Enable) field of a scalable mode context
2142 static inline void context_set_sm_dte(struct context_entry *context)
2144 context->lo |= (1 << 2);
2148 * Set the PRE(Page Request Enable) field of a scalable mode context
2151 static inline void context_set_sm_pre(struct context_entry *context)
2153 context->lo |= (1 << 4);
2156 /* Convert value to context PASID directory size field coding. */
2157 #define context_pdts(pds) (((pds) & 0x7) << 9)
2159 static int domain_context_mapping_one(struct dmar_domain *domain,
2160 struct intel_iommu *iommu,
2161 struct pasid_table *table,
2164 u16 did = domain->iommu_did[iommu->seq_id];
2165 int translation = CONTEXT_TT_MULTI_LEVEL;
2166 struct device_domain_info *info = NULL;
2167 struct context_entry *context;
2168 unsigned long flags;
2173 if (hw_pass_through && domain_type_is_si(domain))
2174 translation = CONTEXT_TT_PASS_THROUGH;
2176 pr_debug("Set context mapping for %02x:%02x.%d\n",
2177 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
2179 BUG_ON(!domain->pgd);
2181 spin_lock_irqsave(&device_domain_lock, flags);
2182 spin_lock(&iommu->lock);
2185 context = iommu_context_addr(iommu, bus, devfn, 1);
2190 if (context_present(context))
2194 * For kdump cases, old valid entries may be cached due to the
2195 * in-flight DMA and copied pgtable, but there is no unmapping
2196 * behaviour for them, thus we need an explicit cache flush for
2197 * the newly-mapped device. For kdump, at this point, the device
2198 * is supposed to finish reset at its driver probe stage, so no
2199 * in-flight DMA will exist, and we don't need to worry anymore
2202 if (context_copied(context)) {
2203 u16 did_old = context_domain_id(context);
2205 if (did_old < cap_ndoms(iommu->cap)) {
2206 iommu->flush.flush_context(iommu, did_old,
2207 (((u16)bus) << 8) | devfn,
2208 DMA_CCMD_MASK_NOBIT,
2209 DMA_CCMD_DEVICE_INVL);
2210 iommu->flush.flush_iotlb(iommu, did_old, 0, 0,
2215 context_clear_entry(context);
2217 if (sm_supported(iommu)) {
2222 /* Setup the PASID DIR pointer: */
2223 pds = context_get_sm_pds(table);
2224 context->lo = (u64)virt_to_phys(table->table) |
2227 /* Setup the RID_PASID field: */
2228 context_set_sm_rid2pasid(context, PASID_RID2PASID);
2231 * Setup the Device-TLB enable bit and Page request
2234 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
2235 if (info && info->ats_supported)
2236 context_set_sm_dte(context);
2237 if (info && info->pri_supported)
2238 context_set_sm_pre(context);
2240 struct dma_pte *pgd = domain->pgd;
2243 context_set_domain_id(context, did);
2245 if (translation != CONTEXT_TT_PASS_THROUGH) {
2247 * Skip top levels of page tables for iommu which has
2248 * less agaw than default. Unnecessary for PT mode.
2250 for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
2252 pgd = phys_to_virt(dma_pte_addr(pgd));
2253 if (!dma_pte_present(pgd))
2257 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
2258 if (info && info->ats_supported)
2259 translation = CONTEXT_TT_DEV_IOTLB;
2261 translation = CONTEXT_TT_MULTI_LEVEL;
2263 context_set_address_root(context, virt_to_phys(pgd));
2264 context_set_address_width(context, agaw);
2267 * In pass through mode, AW must be programmed to
2268 * indicate the largest AGAW value supported by
2269 * hardware. And ASR is ignored by hardware.
2271 context_set_address_width(context, iommu->msagaw);
2274 context_set_translation_type(context, translation);
2277 context_set_fault_enable(context);
2278 context_set_present(context);
2279 if (!ecap_coherent(iommu->ecap))
2280 clflush_cache_range(context, sizeof(*context));
2283 * It's a non-present to present mapping. If hardware doesn't cache
2284 * non-present entry we only need to flush the write-buffer. If the
2285 * _does_ cache non-present entries, then it does so in the special
2286 * domain #0, which we have to flush:
2288 if (cap_caching_mode(iommu->cap)) {
2289 iommu->flush.flush_context(iommu, 0,
2290 (((u16)bus) << 8) | devfn,
2291 DMA_CCMD_MASK_NOBIT,
2292 DMA_CCMD_DEVICE_INVL);
2293 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
2295 iommu_flush_write_buffer(iommu);
2297 iommu_enable_dev_iotlb(info);
2302 spin_unlock(&iommu->lock);
2303 spin_unlock_irqrestore(&device_domain_lock, flags);
2308 struct domain_context_mapping_data {
2309 struct dmar_domain *domain;
2310 struct intel_iommu *iommu;
2311 struct pasid_table *table;
2314 static int domain_context_mapping_cb(struct pci_dev *pdev,
2315 u16 alias, void *opaque)
2317 struct domain_context_mapping_data *data = opaque;
2319 return domain_context_mapping_one(data->domain, data->iommu,
2320 data->table, PCI_BUS_NUM(alias),
2325 domain_context_mapping(struct dmar_domain *domain, struct device *dev)
2327 struct domain_context_mapping_data data;
2328 struct pasid_table *table;
2329 struct intel_iommu *iommu;
2332 iommu = device_to_iommu(dev, &bus, &devfn);
2336 table = intel_pasid_get_table(dev);
2338 if (!dev_is_pci(dev))
2339 return domain_context_mapping_one(domain, iommu, table,
2342 data.domain = domain;
2346 return pci_for_each_dma_alias(to_pci_dev(dev),
2347 &domain_context_mapping_cb, &data);
2350 static int domain_context_mapped_cb(struct pci_dev *pdev,
2351 u16 alias, void *opaque)
2353 struct intel_iommu *iommu = opaque;
2355 return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff);
2358 static int domain_context_mapped(struct device *dev)
2360 struct intel_iommu *iommu;
2363 iommu = device_to_iommu(dev, &bus, &devfn);
2367 if (!dev_is_pci(dev))
2368 return device_context_mapped(iommu, bus, devfn);
2370 return !pci_for_each_dma_alias(to_pci_dev(dev),
2371 domain_context_mapped_cb, iommu);
2374 /* Returns a number of VTD pages, but aligned to MM page size */
2375 static inline unsigned long aligned_nrpages(unsigned long host_addr,
2378 host_addr &= ~PAGE_MASK;
2379 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
2382 /* Return largest possible superpage level for a given mapping */
2383 static inline int hardware_largepage_caps(struct dmar_domain *domain,
2384 unsigned long iov_pfn,
2385 unsigned long phy_pfn,
2386 unsigned long pages)
2388 int support, level = 1;
2389 unsigned long pfnmerge;
2391 support = domain->iommu_superpage;
2393 /* To use a large page, the virtual *and* physical addresses
2394 must be aligned to 2MiB/1GiB/etc. Lower bits set in either
2395 of them will mean we have to use smaller pages. So just
2396 merge them and check both at once. */
2397 pfnmerge = iov_pfn | phy_pfn;
2399 while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
2400 pages >>= VTD_STRIDE_SHIFT;
2403 pfnmerge >>= VTD_STRIDE_SHIFT;
2411 * Ensure that old small page tables are removed to make room for superpage(s).
2412 * We're going to add new large pages, so make sure we don't remove their parent
2413 * tables. The IOTLB/devTLBs should be flushed if any PDE/PTEs are cleared.
2415 static void switch_to_super_page(struct dmar_domain *domain,
2416 unsigned long start_pfn,
2417 unsigned long end_pfn, int level)
2419 unsigned long lvl_pages = lvl_to_nr_pages(level);
2420 struct dma_pte *pte = NULL;
2423 while (start_pfn <= end_pfn) {
2425 pte = pfn_to_dma_pte(domain, start_pfn, &level);
2427 if (dma_pte_present(pte)) {
2428 dma_pte_free_pagetable(domain, start_pfn,
2429 start_pfn + lvl_pages - 1,
2432 for_each_domain_iommu(i, domain)
2433 iommu_flush_iotlb_psi(g_iommus[i], domain,
2434 start_pfn, lvl_pages,
2439 start_pfn += lvl_pages;
2440 if (first_pte_in_page(pte))
2446 __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2447 unsigned long phys_pfn, unsigned long nr_pages, int prot)
2449 struct dma_pte *first_pte = NULL, *pte = NULL;
2450 unsigned int largepage_lvl = 0;
2451 unsigned long lvl_pages = 0;
2455 BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
2457 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
2460 attr = prot & (DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP);
2461 attr |= DMA_FL_PTE_PRESENT;
2462 if (domain_use_first_level(domain)) {
2463 attr |= DMA_FL_PTE_XD | DMA_FL_PTE_US | DMA_FL_PTE_ACCESS;
2464 if (prot & DMA_PTE_WRITE)
2465 attr |= DMA_FL_PTE_DIRTY;
2468 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | attr;
2470 while (nr_pages > 0) {
2474 largepage_lvl = hardware_largepage_caps(domain, iov_pfn,
2475 phys_pfn, nr_pages);
2477 pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
2482 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2484 /* It is large page*/
2485 if (largepage_lvl > 1) {
2486 unsigned long end_pfn;
2487 unsigned long pages_to_remove;
2489 pteval |= DMA_PTE_LARGE_PAGE;
2490 pages_to_remove = min_t(unsigned long, nr_pages,
2491 nr_pte_to_next_page(pte) * lvl_pages);
2492 end_pfn = iov_pfn + pages_to_remove - 1;
2493 switch_to_super_page(domain, iov_pfn, end_pfn, largepage_lvl);
2495 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
2499 /* We don't need lock here, nobody else
2500 * touches the iova range
2502 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
2504 static int dumps = 5;
2505 pr_crit("ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
2506 iov_pfn, tmp, (unsigned long long)pteval);
2509 debug_dma_dump_mappings(NULL);
2514 nr_pages -= lvl_pages;
2515 iov_pfn += lvl_pages;
2516 phys_pfn += lvl_pages;
2517 pteval += lvl_pages * VTD_PAGE_SIZE;
2519 /* If the next PTE would be the first in a new page, then we
2520 * need to flush the cache on the entries we've just written.
2521 * And then we'll need to recalculate 'pte', so clear it and
2522 * let it get set again in the if (!pte) block above.
2524 * If we're done (!nr_pages) we need to flush the cache too.
2526 * Also if we've been setting superpages, we may need to
2527 * recalculate 'pte' and switch back to smaller pages for the
2528 * end of the mapping, if the trailing size is not enough to
2529 * use another superpage (i.e. nr_pages < lvl_pages).
2532 if (!nr_pages || first_pte_in_page(pte) ||
2533 (largepage_lvl > 1 && nr_pages < lvl_pages)) {
2534 domain_flush_cache(domain, first_pte,
2535 (void *)pte - (void *)first_pte);
2543 static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8 devfn)
2545 struct intel_iommu *iommu = info->iommu;
2546 struct context_entry *context;
2547 unsigned long flags;
2553 spin_lock_irqsave(&iommu->lock, flags);
2554 context = iommu_context_addr(iommu, bus, devfn, 0);
2556 spin_unlock_irqrestore(&iommu->lock, flags);
2560 if (sm_supported(iommu)) {
2561 if (hw_pass_through && domain_type_is_si(info->domain))
2562 did_old = FLPT_DEFAULT_DID;
2564 did_old = info->domain->iommu_did[iommu->seq_id];
2566 did_old = context_domain_id(context);
2569 context_clear_entry(context);
2570 __iommu_flush_cache(iommu, context, sizeof(*context));
2571 spin_unlock_irqrestore(&iommu->lock, flags);
2572 iommu->flush.flush_context(iommu,
2574 (((u16)bus) << 8) | devfn,
2575 DMA_CCMD_MASK_NOBIT,
2576 DMA_CCMD_DEVICE_INVL);
2578 if (sm_supported(iommu))
2579 qi_flush_pasid_cache(iommu, did_old, QI_PC_ALL_PASIDS, 0);
2581 iommu->flush.flush_iotlb(iommu,
2587 __iommu_flush_dev_iotlb(info, 0, MAX_AGAW_PFN_WIDTH);
2590 static inline void unlink_domain_info(struct device_domain_info *info)
2592 assert_spin_locked(&device_domain_lock);
2593 list_del(&info->link);
2594 list_del(&info->global);
2596 dev_iommu_priv_set(info->dev, NULL);
2599 static void domain_remove_dev_info(struct dmar_domain *domain)
2601 struct device_domain_info *info, *tmp;
2602 unsigned long flags;
2604 spin_lock_irqsave(&device_domain_lock, flags);
2605 list_for_each_entry_safe(info, tmp, &domain->devices, link)
2606 __dmar_remove_one_dev_info(info);
2607 spin_unlock_irqrestore(&device_domain_lock, flags);
2610 struct dmar_domain *find_domain(struct device *dev)
2612 struct device_domain_info *info;
2614 if (unlikely(!dev || !dev->iommu))
2617 if (unlikely(attach_deferred(dev)))
2620 /* No lock here, assumes no domain exit in normal case */
2621 info = get_domain_info(dev);
2623 return info->domain;
2628 static inline struct device_domain_info *
2629 dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
2631 struct device_domain_info *info;
2633 list_for_each_entry(info, &device_domain_list, global)
2634 if (info->segment == segment && info->bus == bus &&
2635 info->devfn == devfn)
2641 static int domain_setup_first_level(struct intel_iommu *iommu,
2642 struct dmar_domain *domain,
2646 struct dma_pte *pgd = domain->pgd;
2651 * Skip top levels of page tables for iommu which has
2652 * less agaw than default. Unnecessary for PT mode.
2654 for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
2655 pgd = phys_to_virt(dma_pte_addr(pgd));
2656 if (!dma_pte_present(pgd))
2660 level = agaw_to_level(agaw);
2661 if (level != 4 && level != 5)
2664 if (pasid != PASID_RID2PASID)
2665 flags |= PASID_FLAG_SUPERVISOR_MODE;
2667 flags |= PASID_FLAG_FL5LP;
2669 if (domain->domain.type == IOMMU_DOMAIN_UNMANAGED)
2670 flags |= PASID_FLAG_PAGE_SNOOP;
2672 return intel_pasid_setup_first_level(iommu, dev, (pgd_t *)pgd, pasid,
2673 domain->iommu_did[iommu->seq_id],
2677 static bool dev_is_real_dma_subdevice(struct device *dev)
2679 return dev && dev_is_pci(dev) &&
2680 pci_real_dma_dev(to_pci_dev(dev)) != to_pci_dev(dev);
2683 static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
2686 struct dmar_domain *domain)
2688 struct dmar_domain *found = NULL;
2689 struct device_domain_info *info;
2690 unsigned long flags;
2693 info = alloc_devinfo_mem();
2697 if (!dev_is_real_dma_subdevice(dev)) {
2699 info->devfn = devfn;
2700 info->segment = iommu->segment;
2702 struct pci_dev *pdev = to_pci_dev(dev);
2704 info->bus = pdev->bus->number;
2705 info->devfn = pdev->devfn;
2706 info->segment = pci_domain_nr(pdev->bus);
2709 info->ats_supported = info->pasid_supported = info->pri_supported = 0;
2710 info->ats_enabled = info->pasid_enabled = info->pri_enabled = 0;
2713 info->domain = domain;
2714 info->iommu = iommu;
2715 info->pasid_table = NULL;
2716 info->auxd_enabled = 0;
2717 INIT_LIST_HEAD(&info->subdevices);
2719 if (dev && dev_is_pci(dev)) {
2720 struct pci_dev *pdev = to_pci_dev(info->dev);
2722 if (ecap_dev_iotlb_support(iommu->ecap) &&
2723 pci_ats_supported(pdev) &&
2724 dmar_find_matched_atsr_unit(pdev))
2725 info->ats_supported = 1;
2727 if (sm_supported(iommu)) {
2728 if (pasid_supported(iommu)) {
2729 int features = pci_pasid_features(pdev);
2731 info->pasid_supported = features | 1;
2734 if (info->ats_supported && ecap_prs(iommu->ecap) &&
2735 pci_pri_supported(pdev))
2736 info->pri_supported = 1;
2740 spin_lock_irqsave(&device_domain_lock, flags);
2742 found = find_domain(dev);
2745 struct device_domain_info *info2;
2746 info2 = dmar_search_domain_by_dev_info(info->segment, info->bus,
2749 found = info2->domain;
2755 spin_unlock_irqrestore(&device_domain_lock, flags);
2756 free_devinfo_mem(info);
2757 /* Caller must free the original domain */
2761 spin_lock(&iommu->lock);
2762 ret = domain_attach_iommu(domain, iommu);
2763 spin_unlock(&iommu->lock);
2766 spin_unlock_irqrestore(&device_domain_lock, flags);
2767 free_devinfo_mem(info);
2771 list_add(&info->link, &domain->devices);
2772 list_add(&info->global, &device_domain_list);
2774 dev_iommu_priv_set(dev, info);
2775 spin_unlock_irqrestore(&device_domain_lock, flags);
2777 /* PASID table is mandatory for a PCI device in scalable mode. */
2778 if (dev && dev_is_pci(dev) && sm_supported(iommu)) {
2779 ret = intel_pasid_alloc_table(dev);
2781 dev_err(dev, "PASID table allocation failed\n");
2782 dmar_remove_one_dev_info(dev);
2786 /* Setup the PASID entry for requests without PASID: */
2787 spin_lock_irqsave(&iommu->lock, flags);
2788 if (hw_pass_through && domain_type_is_si(domain))
2789 ret = intel_pasid_setup_pass_through(iommu, domain,
2790 dev, PASID_RID2PASID);
2791 else if (domain_use_first_level(domain))
2792 ret = domain_setup_first_level(iommu, domain, dev,
2795 ret = intel_pasid_setup_second_level(iommu, domain,
2796 dev, PASID_RID2PASID);
2797 spin_unlock_irqrestore(&iommu->lock, flags);
2799 dev_err(dev, "Setup RID2PASID failed\n");
2800 dmar_remove_one_dev_info(dev);
2805 if (dev && domain_context_mapping(domain, dev)) {
2806 dev_err(dev, "Domain context map failed\n");
2807 dmar_remove_one_dev_info(dev);
2814 static int iommu_domain_identity_map(struct dmar_domain *domain,
2815 unsigned long first_vpfn,
2816 unsigned long last_vpfn)
2819 * RMRR range might have overlap with physical memory range,
2822 dma_pte_clear_range(domain, first_vpfn, last_vpfn);
2824 return __domain_mapping(domain, first_vpfn,
2825 first_vpfn, last_vpfn - first_vpfn + 1,
2826 DMA_PTE_READ|DMA_PTE_WRITE);
2829 static int md_domain_init(struct dmar_domain *domain, int guest_width);
2831 static int __init si_domain_init(int hw)
2833 struct dmar_rmrr_unit *rmrr;
2837 si_domain = alloc_domain(IOMMU_DOMAIN_IDENTITY);
2841 if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2842 domain_exit(si_domain);
2849 for_each_online_node(nid) {
2850 unsigned long start_pfn, end_pfn;
2853 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2854 ret = iommu_domain_identity_map(si_domain,
2855 mm_to_dma_pfn(start_pfn),
2856 mm_to_dma_pfn(end_pfn));
2863 * Identity map the RMRRs so that devices with RMRRs could also use
2866 for_each_rmrr_units(rmrr) {
2867 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
2869 unsigned long long start = rmrr->base_address;
2870 unsigned long long end = rmrr->end_address;
2872 if (WARN_ON(end < start ||
2873 end >> agaw_to_width(si_domain->agaw)))
2876 ret = iommu_domain_identity_map(si_domain,
2877 mm_to_dma_pfn(start >> PAGE_SHIFT),
2878 mm_to_dma_pfn(end >> PAGE_SHIFT));
2887 static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev)
2889 struct dmar_domain *ndomain;
2890 struct intel_iommu *iommu;
2893 iommu = device_to_iommu(dev, &bus, &devfn);
2897 ndomain = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
2898 if (ndomain != domain)
2904 static bool device_has_rmrr(struct device *dev)
2906 struct dmar_rmrr_unit *rmrr;
2911 for_each_rmrr_units(rmrr) {
2913 * Return TRUE if this RMRR contains the device that
2916 for_each_active_dev_scope(rmrr->devices,
2917 rmrr->devices_cnt, i, tmp)
2919 is_downstream_to_pci_bridge(dev, tmp)) {
2929 * device_rmrr_is_relaxable - Test whether the RMRR of this device
2930 * is relaxable (ie. is allowed to be not enforced under some conditions)
2931 * @dev: device handle
2933 * We assume that PCI USB devices with RMRRs have them largely
2934 * for historical reasons and that the RMRR space is not actively used post
2935 * boot. This exclusion may change if vendors begin to abuse it.
2937 * The same exception is made for graphics devices, with the requirement that
2938 * any use of the RMRR regions will be torn down before assigning the device
2941 * Return: true if the RMRR is relaxable, false otherwise
2943 static bool device_rmrr_is_relaxable(struct device *dev)
2945 struct pci_dev *pdev;
2947 if (!dev_is_pci(dev))
2950 pdev = to_pci_dev(dev);
2951 if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev))
2958 * There are a couple cases where we need to restrict the functionality of
2959 * devices associated with RMRRs. The first is when evaluating a device for
2960 * identity mapping because problems exist when devices are moved in and out
2961 * of domains and their respective RMRR information is lost. This means that
2962 * a device with associated RMRRs will never be in a "passthrough" domain.
2963 * The second is use of the device through the IOMMU API. This interface
2964 * expects to have full control of the IOVA space for the device. We cannot
2965 * satisfy both the requirement that RMRR access is maintained and have an
2966 * unencumbered IOVA space. We also have no ability to quiesce the device's
2967 * use of the RMRR space or even inform the IOMMU API user of the restriction.
2968 * We therefore prevent devices associated with an RMRR from participating in
2969 * the IOMMU API, which eliminates them from device assignment.
2971 * In both cases, devices which have relaxable RMRRs are not concerned by this
2972 * restriction. See device_rmrr_is_relaxable comment.
2974 static bool device_is_rmrr_locked(struct device *dev)
2976 if (!device_has_rmrr(dev))
2979 if (device_rmrr_is_relaxable(dev))
2986 * Return the required default domain type for a specific device.
2988 * @dev: the device in query
2989 * @startup: true if this is during early boot
2992 * - IOMMU_DOMAIN_DMA: device requires a dynamic mapping domain
2993 * - IOMMU_DOMAIN_IDENTITY: device requires an identical mapping domain
2994 * - 0: both identity and dynamic domains work for this device
2996 static int device_def_domain_type(struct device *dev)
2998 if (dev_is_pci(dev)) {
2999 struct pci_dev *pdev = to_pci_dev(dev);
3001 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
3002 return IOMMU_DOMAIN_IDENTITY;
3004 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
3005 return IOMMU_DOMAIN_IDENTITY;
3011 static void intel_iommu_init_qi(struct intel_iommu *iommu)
3014 * Start from the sane iommu hardware state.
3015 * If the queued invalidation is already initialized by us
3016 * (for example, while enabling interrupt-remapping) then
3017 * we got the things already rolling from a sane state.
3021 * Clear any previous faults.
3023 dmar_fault(-1, iommu);
3025 * Disable queued invalidation if supported and already enabled
3026 * before OS handover.
3028 dmar_disable_qi(iommu);
3031 if (dmar_enable_qi(iommu)) {
3033 * Queued Invalidate not enabled, use Register Based Invalidate
3035 iommu->flush.flush_context = __iommu_flush_context;
3036 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
3037 pr_info("%s: Using Register based invalidation\n",
3040 iommu->flush.flush_context = qi_flush_context;
3041 iommu->flush.flush_iotlb = qi_flush_iotlb;
3042 pr_info("%s: Using Queued invalidation\n", iommu->name);
3046 static int copy_context_table(struct intel_iommu *iommu,
3047 struct root_entry *old_re,
3048 struct context_entry **tbl,
3051 int tbl_idx, pos = 0, idx, devfn, ret = 0, did;
3052 struct context_entry *new_ce = NULL, ce;
3053 struct context_entry *old_ce = NULL;
3054 struct root_entry re;
3055 phys_addr_t old_ce_phys;
3057 tbl_idx = ext ? bus * 2 : bus;
3058 memcpy(&re, old_re, sizeof(re));
3060 for (devfn = 0; devfn < 256; devfn++) {
3061 /* First calculate the correct index */
3062 idx = (ext ? devfn * 2 : devfn) % 256;
3065 /* First save what we may have and clean up */
3067 tbl[tbl_idx] = new_ce;
3068 __iommu_flush_cache(iommu, new_ce,
3078 old_ce_phys = root_entry_lctp(&re);
3080 old_ce_phys = root_entry_uctp(&re);
3083 if (ext && devfn == 0) {
3084 /* No LCTP, try UCTP */
3093 old_ce = memremap(old_ce_phys, PAGE_SIZE,
3098 new_ce = alloc_pgtable_page(iommu->node);
3105 /* Now copy the context entry */
3106 memcpy(&ce, old_ce + idx, sizeof(ce));
3108 if (!__context_present(&ce))
3111 did = context_domain_id(&ce);
3112 if (did >= 0 && did < cap_ndoms(iommu->cap))
3113 set_bit(did, iommu->domain_ids);
3116 * We need a marker for copied context entries. This
3117 * marker needs to work for the old format as well as
3118 * for extended context entries.
3120 * Bit 67 of the context entry is used. In the old
3121 * format this bit is available to software, in the
3122 * extended format it is the PGE bit, but PGE is ignored
3123 * by HW if PASIDs are disabled (and thus still
3126 * So disable PASIDs first and then mark the entry
3127 * copied. This means that we don't copy PASID
3128 * translations from the old kernel, but this is fine as
3129 * faults there are not fatal.
3131 context_clear_pasid_enable(&ce);
3132 context_set_copied(&ce);
3137 tbl[tbl_idx + pos] = new_ce;
3139 __iommu_flush_cache(iommu, new_ce, VTD_PAGE_SIZE);
3148 static int copy_translation_tables(struct intel_iommu *iommu)
3150 struct context_entry **ctxt_tbls;
3151 struct root_entry *old_rt;
3152 phys_addr_t old_rt_phys;
3153 int ctxt_table_entries;
3154 unsigned long flags;
3159 rtaddr_reg = dmar_readq(iommu->reg + DMAR_RTADDR_REG);
3160 ext = !!(rtaddr_reg & DMA_RTADDR_RTT);
3161 new_ext = !!ecap_ecs(iommu->ecap);
3164 * The RTT bit can only be changed when translation is disabled,
3165 * but disabling translation means to open a window for data
3166 * corruption. So bail out and don't copy anything if we would
3167 * have to change the bit.
3172 old_rt_phys = rtaddr_reg & VTD_PAGE_MASK;
3176 old_rt = memremap(old_rt_phys, PAGE_SIZE, MEMREMAP_WB);
3180 /* This is too big for the stack - allocate it from slab */
3181 ctxt_table_entries = ext ? 512 : 256;
3183 ctxt_tbls = kcalloc(ctxt_table_entries, sizeof(void *), GFP_KERNEL);
3187 for (bus = 0; bus < 256; bus++) {
3188 ret = copy_context_table(iommu, &old_rt[bus],
3189 ctxt_tbls, bus, ext);
3191 pr_err("%s: Failed to copy context table for bus %d\n",
3197 spin_lock_irqsave(&iommu->lock, flags);
3199 /* Context tables are copied, now write them to the root_entry table */
3200 for (bus = 0; bus < 256; bus++) {
3201 int idx = ext ? bus * 2 : bus;
3204 if (ctxt_tbls[idx]) {
3205 val = virt_to_phys(ctxt_tbls[idx]) | 1;
3206 iommu->root_entry[bus].lo = val;
3209 if (!ext || !ctxt_tbls[idx + 1])
3212 val = virt_to_phys(ctxt_tbls[idx + 1]) | 1;
3213 iommu->root_entry[bus].hi = val;
3216 spin_unlock_irqrestore(&iommu->lock, flags);
3220 __iommu_flush_cache(iommu, iommu->root_entry, PAGE_SIZE);
3230 #ifdef CONFIG_INTEL_IOMMU_SVM
3231 static ioasid_t intel_vcmd_ioasid_alloc(ioasid_t min, ioasid_t max, void *data)
3233 struct intel_iommu *iommu = data;
3237 return INVALID_IOASID;
3239 * VT-d virtual command interface always uses the full 20 bit
3240 * PASID range. Host can partition guest PASID range based on
3241 * policies but it is out of guest's control.
3243 if (min < PASID_MIN || max > intel_pasid_max_id)
3244 return INVALID_IOASID;
3246 if (vcmd_alloc_pasid(iommu, &ioasid))
3247 return INVALID_IOASID;
3252 static void intel_vcmd_ioasid_free(ioasid_t ioasid, void *data)
3254 struct intel_iommu *iommu = data;
3259 * Sanity check the ioasid owner is done at upper layer, e.g. VFIO
3260 * We can only free the PASID when all the devices are unbound.
3262 if (ioasid_find(NULL, ioasid, NULL)) {
3263 pr_alert("Cannot free active IOASID %d\n", ioasid);
3266 vcmd_free_pasid(iommu, ioasid);
3269 static void register_pasid_allocator(struct intel_iommu *iommu)
3272 * If we are running in the host, no need for custom allocator
3273 * in that PASIDs are allocated from the host system-wide.
3275 if (!cap_caching_mode(iommu->cap))
3278 if (!sm_supported(iommu)) {
3279 pr_warn("VT-d Scalable Mode not enabled, no PASID allocation\n");
3284 * Register a custom PASID allocator if we are running in a guest,
3285 * guest PASID must be obtained via virtual command interface.
3286 * There can be multiple vIOMMUs in each guest but only one allocator
3287 * is active. All vIOMMU allocators will eventually be calling the same
3290 if (!vccap_pasid(iommu->vccap))
3293 pr_info("Register custom PASID allocator\n");
3294 iommu->pasid_allocator.alloc = intel_vcmd_ioasid_alloc;
3295 iommu->pasid_allocator.free = intel_vcmd_ioasid_free;
3296 iommu->pasid_allocator.pdata = (void *)iommu;
3297 if (ioasid_register_allocator(&iommu->pasid_allocator)) {
3298 pr_warn("Custom PASID allocator failed, scalable mode disabled\n");
3300 * Disable scalable mode on this IOMMU if there
3301 * is no custom allocator. Mixing SM capable vIOMMU
3302 * and non-SM vIOMMU are not supported.
3309 static int __init init_dmars(void)
3311 struct dmar_drhd_unit *drhd;
3312 struct intel_iommu *iommu;
3318 * initialize and program root entry to not present
3321 for_each_drhd_unit(drhd) {
3323 * lock not needed as this is only incremented in the single
3324 * threaded kernel __init code path all other access are read
3327 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED) {
3331 pr_err_once("Exceeded %d IOMMUs\n", DMAR_UNITS_SUPPORTED);
3334 /* Preallocate enough resources for IOMMU hot-addition */
3335 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED)
3336 g_num_of_iommus = DMAR_UNITS_SUPPORTED;
3338 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
3345 ret = intel_cap_audit(CAP_AUDIT_STATIC_DMAR, NULL);
3349 for_each_iommu(iommu, drhd) {
3350 if (drhd->ignored) {
3351 iommu_disable_translation(iommu);
3356 * Find the max pasid size of all IOMMU's in the system.
3357 * We need to ensure the system pasid table is no bigger
3358 * than the smallest supported.
3360 if (pasid_supported(iommu)) {
3361 u32 temp = 2 << ecap_pss(iommu->ecap);
3363 intel_pasid_max_id = min_t(u32, temp,
3364 intel_pasid_max_id);
3367 g_iommus[iommu->seq_id] = iommu;
3369 intel_iommu_init_qi(iommu);
3371 ret = iommu_init_domains(iommu);
3375 init_translation_status(iommu);
3377 if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
3378 iommu_disable_translation(iommu);
3379 clear_translation_pre_enabled(iommu);
3380 pr_warn("Translation was enabled for %s but we are not in kdump mode\n",
3386 * we could share the same root & context tables
3387 * among all IOMMU's. Need to Split it later.
3389 ret = iommu_alloc_root_entry(iommu);
3393 if (translation_pre_enabled(iommu)) {
3394 pr_info("Translation already enabled - trying to copy translation structures\n");
3396 ret = copy_translation_tables(iommu);
3399 * We found the IOMMU with translation
3400 * enabled - but failed to copy over the
3401 * old root-entry table. Try to proceed
3402 * by disabling translation now and
3403 * allocating a clean root-entry table.
3404 * This might cause DMAR faults, but
3405 * probably the dump will still succeed.
3407 pr_err("Failed to copy translation tables from previous kernel for %s\n",
3409 iommu_disable_translation(iommu);
3410 clear_translation_pre_enabled(iommu);
3412 pr_info("Copied translation tables from previous kernel for %s\n",
3417 if (!ecap_pass_through(iommu->ecap))
3418 hw_pass_through = 0;
3419 intel_svm_check(iommu);
3423 * Now that qi is enabled on all iommus, set the root entry and flush
3424 * caches. This is required on some Intel X58 chipsets, otherwise the
3425 * flush_context function will loop forever and the boot hangs.
3427 for_each_active_iommu(iommu, drhd) {
3428 iommu_flush_write_buffer(iommu);
3429 #ifdef CONFIG_INTEL_IOMMU_SVM
3430 register_pasid_allocator(iommu);
3432 iommu_set_root_entry(iommu);
3435 #ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
3440 iommu_identity_mapping |= IDENTMAP_GFX;
3442 check_tylersburg_isoch();
3444 ret = si_domain_init(hw_pass_through);
3451 * global invalidate context cache
3452 * global invalidate iotlb
3453 * enable translation
3455 for_each_iommu(iommu, drhd) {
3456 if (drhd->ignored) {
3458 * we always have to disable PMRs or DMA may fail on
3462 iommu_disable_protect_mem_regions(iommu);
3466 iommu_flush_write_buffer(iommu);
3468 #ifdef CONFIG_INTEL_IOMMU_SVM
3469 if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) {
3471 * Call dmar_alloc_hwirq() with dmar_global_lock held,
3472 * could cause possible lock race condition.
3474 up_write(&dmar_global_lock);
3475 ret = intel_svm_enable_prq(iommu);
3476 down_write(&dmar_global_lock);
3481 ret = dmar_set_interrupt(iommu);
3489 for_each_active_iommu(iommu, drhd) {
3490 disable_dmar_iommu(iommu);
3491 free_dmar_iommu(iommu);
3500 static inline int iommu_domain_cache_init(void)
3504 iommu_domain_cache = kmem_cache_create("iommu_domain",
3505 sizeof(struct dmar_domain),
3510 if (!iommu_domain_cache) {
3511 pr_err("Couldn't create iommu_domain cache\n");
3518 static inline int iommu_devinfo_cache_init(void)
3522 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3523 sizeof(struct device_domain_info),
3527 if (!iommu_devinfo_cache) {
3528 pr_err("Couldn't create devinfo cache\n");
3535 static int __init iommu_init_mempool(void)
3538 ret = iova_cache_get();
3542 ret = iommu_domain_cache_init();
3546 ret = iommu_devinfo_cache_init();
3550 kmem_cache_destroy(iommu_domain_cache);
3557 static void __init iommu_exit_mempool(void)
3559 kmem_cache_destroy(iommu_devinfo_cache);
3560 kmem_cache_destroy(iommu_domain_cache);
3564 static void __init init_no_remapping_devices(void)
3566 struct dmar_drhd_unit *drhd;
3570 for_each_drhd_unit(drhd) {
3571 if (!drhd->include_all) {
3572 for_each_active_dev_scope(drhd->devices,
3573 drhd->devices_cnt, i, dev)
3575 /* ignore DMAR unit if no devices exist */
3576 if (i == drhd->devices_cnt)
3581 for_each_active_drhd_unit(drhd) {
3582 if (drhd->include_all)
3585 for_each_active_dev_scope(drhd->devices,
3586 drhd->devices_cnt, i, dev)
3587 if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
3589 if (i < drhd->devices_cnt)
3592 /* This IOMMU has *only* gfx devices. Either bypass it or
3593 set the gfx_mapped flag, as appropriate */
3594 drhd->gfx_dedicated = 1;
3600 #ifdef CONFIG_SUSPEND
3601 static int init_iommu_hw(void)
3603 struct dmar_drhd_unit *drhd;
3604 struct intel_iommu *iommu = NULL;
3606 for_each_active_iommu(iommu, drhd)
3608 dmar_reenable_qi(iommu);
3610 for_each_iommu(iommu, drhd) {
3611 if (drhd->ignored) {
3613 * we always have to disable PMRs or DMA may fail on
3617 iommu_disable_protect_mem_regions(iommu);
3621 iommu_flush_write_buffer(iommu);
3622 iommu_set_root_entry(iommu);
3623 iommu_enable_translation(iommu);
3624 iommu_disable_protect_mem_regions(iommu);
3630 static void iommu_flush_all(void)
3632 struct dmar_drhd_unit *drhd;
3633 struct intel_iommu *iommu;
3635 for_each_active_iommu(iommu, drhd) {
3636 iommu->flush.flush_context(iommu, 0, 0, 0,
3637 DMA_CCMD_GLOBAL_INVL);
3638 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3639 DMA_TLB_GLOBAL_FLUSH);
3643 static int iommu_suspend(void)
3645 struct dmar_drhd_unit *drhd;
3646 struct intel_iommu *iommu = NULL;
3649 for_each_active_iommu(iommu, drhd) {
3650 iommu->iommu_state = kcalloc(MAX_SR_DMAR_REGS, sizeof(u32),
3652 if (!iommu->iommu_state)
3658 for_each_active_iommu(iommu, drhd) {
3659 iommu_disable_translation(iommu);
3661 raw_spin_lock_irqsave(&iommu->register_lock, flag);
3663 iommu->iommu_state[SR_DMAR_FECTL_REG] =
3664 readl(iommu->reg + DMAR_FECTL_REG);
3665 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
3666 readl(iommu->reg + DMAR_FEDATA_REG);
3667 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
3668 readl(iommu->reg + DMAR_FEADDR_REG);
3669 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
3670 readl(iommu->reg + DMAR_FEUADDR_REG);
3672 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
3677 for_each_active_iommu(iommu, drhd)
3678 kfree(iommu->iommu_state);
3683 static void iommu_resume(void)
3685 struct dmar_drhd_unit *drhd;
3686 struct intel_iommu *iommu = NULL;
3689 if (init_iommu_hw()) {
3691 panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
3693 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
3697 for_each_active_iommu(iommu, drhd) {
3699 raw_spin_lock_irqsave(&iommu->register_lock, flag);
3701 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
3702 iommu->reg + DMAR_FECTL_REG);
3703 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
3704 iommu->reg + DMAR_FEDATA_REG);
3705 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
3706 iommu->reg + DMAR_FEADDR_REG);
3707 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
3708 iommu->reg + DMAR_FEUADDR_REG);
3710 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
3713 for_each_active_iommu(iommu, drhd)
3714 kfree(iommu->iommu_state);
3717 static struct syscore_ops iommu_syscore_ops = {
3718 .resume = iommu_resume,
3719 .suspend = iommu_suspend,
3722 static void __init init_iommu_pm_ops(void)
3724 register_syscore_ops(&iommu_syscore_ops);
3728 static inline void init_iommu_pm_ops(void) {}
3729 #endif /* CONFIG_PM */
3731 static int rmrr_sanity_check(struct acpi_dmar_reserved_memory *rmrr)
3733 if (!IS_ALIGNED(rmrr->base_address, PAGE_SIZE) ||
3734 !IS_ALIGNED(rmrr->end_address + 1, PAGE_SIZE) ||
3735 rmrr->end_address <= rmrr->base_address ||
3736 arch_rmrr_sanity_check(rmrr))
3742 int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg)
3744 struct acpi_dmar_reserved_memory *rmrr;
3745 struct dmar_rmrr_unit *rmrru;
3747 rmrr = (struct acpi_dmar_reserved_memory *)header;
3748 if (rmrr_sanity_check(rmrr)) {
3750 "Your BIOS is broken; bad RMRR [%#018Lx-%#018Lx]\n"
3751 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
3752 rmrr->base_address, rmrr->end_address,
3753 dmi_get_system_info(DMI_BIOS_VENDOR),
3754 dmi_get_system_info(DMI_BIOS_VERSION),
3755 dmi_get_system_info(DMI_PRODUCT_VERSION));
3756 add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
3759 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
3763 rmrru->hdr = header;
3765 rmrru->base_address = rmrr->base_address;
3766 rmrru->end_address = rmrr->end_address;
3768 rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
3769 ((void *)rmrr) + rmrr->header.length,
3770 &rmrru->devices_cnt);
3771 if (rmrru->devices_cnt && rmrru->devices == NULL)
3774 list_add(&rmrru->list, &dmar_rmrr_units);
3783 static struct dmar_atsr_unit *dmar_find_atsr(struct acpi_dmar_atsr *atsr)
3785 struct dmar_atsr_unit *atsru;
3786 struct acpi_dmar_atsr *tmp;
3788 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list,
3790 tmp = (struct acpi_dmar_atsr *)atsru->hdr;
3791 if (atsr->segment != tmp->segment)
3793 if (atsr->header.length != tmp->header.length)
3795 if (memcmp(atsr, tmp, atsr->header.length) == 0)
3802 int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg)
3804 struct acpi_dmar_atsr *atsr;
3805 struct dmar_atsr_unit *atsru;
3807 if (system_state >= SYSTEM_RUNNING && !intel_iommu_enabled)
3810 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
3811 atsru = dmar_find_atsr(atsr);
3815 atsru = kzalloc(sizeof(*atsru) + hdr->length, GFP_KERNEL);
3820 * If memory is allocated from slab by ACPI _DSM method, we need to
3821 * copy the memory content because the memory buffer will be freed
3824 atsru->hdr = (void *)(atsru + 1);
3825 memcpy(atsru->hdr, hdr, hdr->length);
3826 atsru->include_all = atsr->flags & 0x1;
3827 if (!atsru->include_all) {
3828 atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
3829 (void *)atsr + atsr->header.length,
3830 &atsru->devices_cnt);
3831 if (atsru->devices_cnt && atsru->devices == NULL) {
3837 list_add_rcu(&atsru->list, &dmar_atsr_units);
3842 static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
3844 dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
3848 int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg)
3850 struct acpi_dmar_atsr *atsr;
3851 struct dmar_atsr_unit *atsru;
3853 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
3854 atsru = dmar_find_atsr(atsr);
3856 list_del_rcu(&atsru->list);
3858 intel_iommu_free_atsr(atsru);
3864 int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg)
3868 struct acpi_dmar_atsr *atsr;
3869 struct dmar_atsr_unit *atsru;
3871 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
3872 atsru = dmar_find_atsr(atsr);
3876 if (!atsru->include_all && atsru->devices && atsru->devices_cnt) {
3877 for_each_active_dev_scope(atsru->devices, atsru->devices_cnt,
3885 static struct dmar_satc_unit *dmar_find_satc(struct acpi_dmar_satc *satc)
3887 struct dmar_satc_unit *satcu;
3888 struct acpi_dmar_satc *tmp;
3890 list_for_each_entry_rcu(satcu, &dmar_satc_units, list,
3892 tmp = (struct acpi_dmar_satc *)satcu->hdr;
3893 if (satc->segment != tmp->segment)
3895 if (satc->header.length != tmp->header.length)
3897 if (memcmp(satc, tmp, satc->header.length) == 0)
3904 int dmar_parse_one_satc(struct acpi_dmar_header *hdr, void *arg)
3906 struct acpi_dmar_satc *satc;
3907 struct dmar_satc_unit *satcu;
3909 if (system_state >= SYSTEM_RUNNING && !intel_iommu_enabled)
3912 satc = container_of(hdr, struct acpi_dmar_satc, header);
3913 satcu = dmar_find_satc(satc);
3917 satcu = kzalloc(sizeof(*satcu) + hdr->length, GFP_KERNEL);
3921 satcu->hdr = (void *)(satcu + 1);
3922 memcpy(satcu->hdr, hdr, hdr->length);
3923 satcu->atc_required = satc->flags & 0x1;
3924 satcu->devices = dmar_alloc_dev_scope((void *)(satc + 1),
3925 (void *)satc + satc->header.length,
3926 &satcu->devices_cnt);
3927 if (satcu->devices_cnt && !satcu->devices) {
3931 list_add_rcu(&satcu->list, &dmar_satc_units);
3936 static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
3939 struct intel_iommu *iommu = dmaru->iommu;
3941 if (g_iommus[iommu->seq_id])
3944 ret = intel_cap_audit(CAP_AUDIT_HOTPLUG_DMAR, iommu);
3948 if (hw_pass_through && !ecap_pass_through(iommu->ecap)) {
3949 pr_warn("%s: Doesn't support hardware pass through.\n",
3953 if (!ecap_sc_support(iommu->ecap) &&
3954 domain_update_iommu_snooping(iommu)) {
3955 pr_warn("%s: Doesn't support snooping.\n",
3959 sp = domain_update_iommu_superpage(NULL, iommu) - 1;
3960 if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) {
3961 pr_warn("%s: Doesn't support large page.\n",
3967 * Disable translation if already enabled prior to OS handover.
3969 if (iommu->gcmd & DMA_GCMD_TE)
3970 iommu_disable_translation(iommu);
3972 g_iommus[iommu->seq_id] = iommu;
3973 ret = iommu_init_domains(iommu);
3975 ret = iommu_alloc_root_entry(iommu);
3979 intel_svm_check(iommu);
3981 if (dmaru->ignored) {
3983 * we always have to disable PMRs or DMA may fail on this device
3986 iommu_disable_protect_mem_regions(iommu);
3990 intel_iommu_init_qi(iommu);
3991 iommu_flush_write_buffer(iommu);
3993 #ifdef CONFIG_INTEL_IOMMU_SVM
3994 if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) {
3995 ret = intel_svm_enable_prq(iommu);
4000 ret = dmar_set_interrupt(iommu);
4004 iommu_set_root_entry(iommu);
4005 iommu_enable_translation(iommu);
4007 iommu_disable_protect_mem_regions(iommu);
4011 disable_dmar_iommu(iommu);
4013 free_dmar_iommu(iommu);
4017 int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
4020 struct intel_iommu *iommu = dmaru->iommu;
4022 if (!intel_iommu_enabled)
4028 ret = intel_iommu_add(dmaru);
4030 disable_dmar_iommu(iommu);
4031 free_dmar_iommu(iommu);
4037 static void intel_iommu_free_dmars(void)
4039 struct dmar_rmrr_unit *rmrru, *rmrr_n;
4040 struct dmar_atsr_unit *atsru, *atsr_n;
4041 struct dmar_satc_unit *satcu, *satc_n;
4043 list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
4044 list_del(&rmrru->list);
4045 dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
4049 list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
4050 list_del(&atsru->list);
4051 intel_iommu_free_atsr(atsru);
4053 list_for_each_entry_safe(satcu, satc_n, &dmar_satc_units, list) {
4054 list_del(&satcu->list);
4055 dmar_free_dev_scope(&satcu->devices, &satcu->devices_cnt);
4060 int dmar_find_matched_atsr_unit(struct pci_dev *dev)
4063 struct pci_bus *bus;
4064 struct pci_dev *bridge = NULL;
4066 struct acpi_dmar_atsr *atsr;
4067 struct dmar_atsr_unit *atsru;
4069 dev = pci_physfn(dev);
4070 for (bus = dev->bus; bus; bus = bus->parent) {
4072 /* If it's an integrated device, allow ATS */
4075 /* Connected via non-PCIe: no ATS */
4076 if (!pci_is_pcie(bridge) ||
4077 pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
4079 /* If we found the root port, look it up in the ATSR */
4080 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
4085 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4086 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4087 if (atsr->segment != pci_domain_nr(dev->bus))
4090 for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
4091 if (tmp == &bridge->dev)
4094 if (atsru->include_all)
4104 int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
4107 struct dmar_rmrr_unit *rmrru;
4108 struct dmar_atsr_unit *atsru;
4109 struct dmar_satc_unit *satcu;
4110 struct acpi_dmar_atsr *atsr;
4111 struct acpi_dmar_reserved_memory *rmrr;
4112 struct acpi_dmar_satc *satc;
4114 if (!intel_iommu_enabled && system_state >= SYSTEM_RUNNING)
4117 list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
4118 rmrr = container_of(rmrru->hdr,
4119 struct acpi_dmar_reserved_memory, header);
4120 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4121 ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
4122 ((void *)rmrr) + rmrr->header.length,
4123 rmrr->segment, rmrru->devices,
4124 rmrru->devices_cnt);
4127 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
4128 dmar_remove_dev_scope(info, rmrr->segment,
4129 rmrru->devices, rmrru->devices_cnt);
4133 list_for_each_entry(atsru, &dmar_atsr_units, list) {
4134 if (atsru->include_all)
4137 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4138 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4139 ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
4140 (void *)atsr + atsr->header.length,
4141 atsr->segment, atsru->devices,
4142 atsru->devices_cnt);
4147 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
4148 if (dmar_remove_dev_scope(info, atsr->segment,
4149 atsru->devices, atsru->devices_cnt))
4153 list_for_each_entry(satcu, &dmar_satc_units, list) {
4154 satc = container_of(satcu->hdr, struct acpi_dmar_satc, header);
4155 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4156 ret = dmar_insert_dev_scope(info, (void *)(satc + 1),
4157 (void *)satc + satc->header.length,
4158 satc->segment, satcu->devices,
4159 satcu->devices_cnt);
4164 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
4165 if (dmar_remove_dev_scope(info, satc->segment,
4166 satcu->devices, satcu->devices_cnt))
4174 static int intel_iommu_memory_notifier(struct notifier_block *nb,
4175 unsigned long val, void *v)
4177 struct memory_notify *mhp = v;
4178 unsigned long start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
4179 unsigned long last_vpfn = mm_to_dma_pfn(mhp->start_pfn +
4183 case MEM_GOING_ONLINE:
4184 if (iommu_domain_identity_map(si_domain,
4185 start_vpfn, last_vpfn)) {
4186 pr_warn("Failed to build identity map for [%lx-%lx]\n",
4187 start_vpfn, last_vpfn);
4193 case MEM_CANCEL_ONLINE:
4195 struct dmar_drhd_unit *drhd;
4196 struct intel_iommu *iommu;
4197 struct page *freelist;
4199 freelist = domain_unmap(si_domain,
4200 start_vpfn, last_vpfn,
4204 for_each_active_iommu(iommu, drhd)
4205 iommu_flush_iotlb_psi(iommu, si_domain,
4206 start_vpfn, mhp->nr_pages,
4209 dma_free_pagelist(freelist);
4217 static struct notifier_block intel_iommu_memory_nb = {
4218 .notifier_call = intel_iommu_memory_notifier,
4222 static void intel_disable_iommus(void)
4224 struct intel_iommu *iommu = NULL;
4225 struct dmar_drhd_unit *drhd;
4227 for_each_iommu(iommu, drhd)
4228 iommu_disable_translation(iommu);
4231 void intel_iommu_shutdown(void)
4233 struct dmar_drhd_unit *drhd;
4234 struct intel_iommu *iommu = NULL;
4236 if (no_iommu || dmar_disabled)
4239 down_write(&dmar_global_lock);
4241 /* Disable PMRs explicitly here. */
4242 for_each_iommu(iommu, drhd)
4243 iommu_disable_protect_mem_regions(iommu);
4245 /* Make sure the IOMMUs are switched off */
4246 intel_disable_iommus();
4248 up_write(&dmar_global_lock);
4251 static inline struct intel_iommu *dev_to_intel_iommu(struct device *dev)
4253 struct iommu_device *iommu_dev = dev_to_iommu_device(dev);
4255 return container_of(iommu_dev, struct intel_iommu, iommu);
4258 static ssize_t version_show(struct device *dev,
4259 struct device_attribute *attr, char *buf)
4261 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4262 u32 ver = readl(iommu->reg + DMAR_VER_REG);
4263 return sprintf(buf, "%d:%d\n",
4264 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
4266 static DEVICE_ATTR_RO(version);
4268 static ssize_t address_show(struct device *dev,
4269 struct device_attribute *attr, char *buf)
4271 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4272 return sprintf(buf, "%llx\n", iommu->reg_phys);
4274 static DEVICE_ATTR_RO(address);
4276 static ssize_t cap_show(struct device *dev,
4277 struct device_attribute *attr, char *buf)
4279 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4280 return sprintf(buf, "%llx\n", iommu->cap);
4282 static DEVICE_ATTR_RO(cap);
4284 static ssize_t ecap_show(struct device *dev,
4285 struct device_attribute *attr, char *buf)
4287 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4288 return sprintf(buf, "%llx\n", iommu->ecap);
4290 static DEVICE_ATTR_RO(ecap);
4292 static ssize_t domains_supported_show(struct device *dev,
4293 struct device_attribute *attr, char *buf)
4295 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4296 return sprintf(buf, "%ld\n", cap_ndoms(iommu->cap));
4298 static DEVICE_ATTR_RO(domains_supported);
4300 static ssize_t domains_used_show(struct device *dev,
4301 struct device_attribute *attr, char *buf)
4303 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4304 return sprintf(buf, "%d\n", bitmap_weight(iommu->domain_ids,
4305 cap_ndoms(iommu->cap)));
4307 static DEVICE_ATTR_RO(domains_used);
4309 static struct attribute *intel_iommu_attrs[] = {
4310 &dev_attr_version.attr,
4311 &dev_attr_address.attr,
4313 &dev_attr_ecap.attr,
4314 &dev_attr_domains_supported.attr,
4315 &dev_attr_domains_used.attr,
4319 static struct attribute_group intel_iommu_group = {
4320 .name = "intel-iommu",
4321 .attrs = intel_iommu_attrs,
4324 const struct attribute_group *intel_iommu_groups[] = {
4329 static inline bool has_external_pci(void)
4331 struct pci_dev *pdev = NULL;
4333 for_each_pci_dev(pdev)
4334 if (pdev->external_facing)
4340 static int __init platform_optin_force_iommu(void)
4342 if (!dmar_platform_optin() || no_platform_optin || !has_external_pci())
4345 if (no_iommu || dmar_disabled)
4346 pr_info("Intel-IOMMU force enabled due to platform opt in\n");
4349 * If Intel-IOMMU is disabled by default, we will apply identity
4350 * map for all devices except those marked as being untrusted.
4353 iommu_set_default_passthrough(false);
4361 static int __init probe_acpi_namespace_devices(void)
4363 struct dmar_drhd_unit *drhd;
4364 /* To avoid a -Wunused-but-set-variable warning. */
4365 struct intel_iommu *iommu __maybe_unused;
4369 for_each_active_iommu(iommu, drhd) {
4370 for_each_active_dev_scope(drhd->devices,
4371 drhd->devices_cnt, i, dev) {
4372 struct acpi_device_physical_node *pn;
4373 struct iommu_group *group;
4374 struct acpi_device *adev;
4376 if (dev->bus != &acpi_bus_type)
4379 adev = to_acpi_device(dev);
4380 mutex_lock(&adev->physical_node_lock);
4381 list_for_each_entry(pn,
4382 &adev->physical_node_list, node) {
4383 group = iommu_group_get(pn->dev);
4385 iommu_group_put(group);
4389 pn->dev->bus->iommu_ops = &intel_iommu_ops;
4390 ret = iommu_probe_device(pn->dev);
4394 mutex_unlock(&adev->physical_node_lock);
4404 int __init intel_iommu_init(void)
4407 struct dmar_drhd_unit *drhd;
4408 struct intel_iommu *iommu;
4411 * Intel IOMMU is required for a TXT/tboot launch or platform
4412 * opt in, so enforce that.
4414 force_on = (!intel_iommu_tboot_noforce && tboot_force_iommu()) ||
4415 platform_optin_force_iommu();
4417 if (iommu_init_mempool()) {
4419 panic("tboot: Failed to initialize iommu memory\n");
4423 down_write(&dmar_global_lock);
4424 if (dmar_table_init()) {
4426 panic("tboot: Failed to initialize DMAR table\n");
4430 if (dmar_dev_scope_init() < 0) {
4432 panic("tboot: Failed to initialize DMAR device scope\n");
4436 up_write(&dmar_global_lock);
4439 * The bus notifier takes the dmar_global_lock, so lockdep will
4440 * complain later when we register it under the lock.
4442 dmar_register_bus_notifier();
4444 down_write(&dmar_global_lock);
4447 intel_iommu_debugfs_init();
4449 if (no_iommu || dmar_disabled) {
4451 * We exit the function here to ensure IOMMU's remapping and
4452 * mempool aren't setup, which means that the IOMMU's PMRs
4453 * won't be disabled via the call to init_dmars(). So disable
4454 * it explicitly here. The PMRs were setup by tboot prior to
4455 * calling SENTER, but the kernel is expected to reset/tear
4458 if (intel_iommu_tboot_noforce) {
4459 for_each_iommu(iommu, drhd)
4460 iommu_disable_protect_mem_regions(iommu);
4464 * Make sure the IOMMUs are switched off, even when we
4465 * boot into a kexec kernel and the previous kernel left
4468 intel_disable_iommus();
4472 if (list_empty(&dmar_rmrr_units))
4473 pr_info("No RMRR found\n");
4475 if (list_empty(&dmar_atsr_units))
4476 pr_info("No ATSR found\n");
4478 if (list_empty(&dmar_satc_units))
4479 pr_info("No SATC found\n");
4482 intel_iommu_gfx_mapped = 1;
4484 init_no_remapping_devices();
4489 panic("tboot: Failed to initialize DMARs\n");
4490 pr_err("Initialization failed\n");
4493 up_write(&dmar_global_lock);
4495 init_iommu_pm_ops();
4497 down_read(&dmar_global_lock);
4498 for_each_active_iommu(iommu, drhd) {
4500 * The flush queue implementation does not perform
4501 * page-selective invalidations that are required for efficient
4502 * TLB flushes in virtual environments. The benefit of batching
4503 * is likely to be much lower than the overhead of synchronizing
4504 * the virtual and physical IOMMU page-tables.
4506 if (cap_caching_mode(iommu->cap)) {
4507 pr_info_once("IOMMU batching disallowed due to virtualization\n");
4508 iommu_set_dma_strict();
4510 iommu_device_sysfs_add(&iommu->iommu, NULL,
4513 iommu_device_register(&iommu->iommu, &intel_iommu_ops, NULL);
4515 up_read(&dmar_global_lock);
4517 bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
4518 if (si_domain && !hw_pass_through)
4519 register_memory_notifier(&intel_iommu_memory_nb);
4521 down_read(&dmar_global_lock);
4522 if (probe_acpi_namespace_devices())
4523 pr_warn("ACPI name space devices didn't probe correctly\n");
4525 /* Finally, we enable the DMA remapping hardware. */
4526 for_each_iommu(iommu, drhd) {
4527 if (!drhd->ignored && !translation_pre_enabled(iommu))
4528 iommu_enable_translation(iommu);
4530 iommu_disable_protect_mem_regions(iommu);
4532 up_read(&dmar_global_lock);
4534 pr_info("Intel(R) Virtualization Technology for Directed I/O\n");
4536 intel_iommu_enabled = 1;
4541 intel_iommu_free_dmars();
4542 up_write(&dmar_global_lock);
4543 iommu_exit_mempool();
4547 static int domain_context_clear_one_cb(struct pci_dev *pdev, u16 alias, void *opaque)
4549 struct device_domain_info *info = opaque;
4551 domain_context_clear_one(info, PCI_BUS_NUM(alias), alias & 0xff);
4556 * NB - intel-iommu lacks any sort of reference counting for the users of
4557 * dependent devices. If multiple endpoints have intersecting dependent
4558 * devices, unbinding the driver from any one of them will possibly leave
4559 * the others unable to operate.
4561 static void domain_context_clear(struct device_domain_info *info)
4563 if (!info->iommu || !info->dev || !dev_is_pci(info->dev))
4566 pci_for_each_dma_alias(to_pci_dev(info->dev),
4567 &domain_context_clear_one_cb, info);
4570 static void __dmar_remove_one_dev_info(struct device_domain_info *info)
4572 struct dmar_domain *domain;
4573 struct intel_iommu *iommu;
4574 unsigned long flags;
4576 assert_spin_locked(&device_domain_lock);
4581 iommu = info->iommu;
4582 domain = info->domain;
4584 if (info->dev && !dev_is_real_dma_subdevice(info->dev)) {
4585 if (dev_is_pci(info->dev) && sm_supported(iommu))
4586 intel_pasid_tear_down_entry(iommu, info->dev,
4587 PASID_RID2PASID, false);
4589 iommu_disable_dev_iotlb(info);
4590 domain_context_clear(info);
4591 intel_pasid_free_table(info->dev);
4594 unlink_domain_info(info);
4596 spin_lock_irqsave(&iommu->lock, flags);
4597 domain_detach_iommu(domain, iommu);
4598 spin_unlock_irqrestore(&iommu->lock, flags);
4600 free_devinfo_mem(info);
4603 static void dmar_remove_one_dev_info(struct device *dev)
4605 struct device_domain_info *info;
4606 unsigned long flags;
4608 spin_lock_irqsave(&device_domain_lock, flags);
4609 info = get_domain_info(dev);
4611 __dmar_remove_one_dev_info(info);
4612 spin_unlock_irqrestore(&device_domain_lock, flags);
4615 static int md_domain_init(struct dmar_domain *domain, int guest_width)
4619 /* calculate AGAW */
4620 domain->gaw = guest_width;
4621 adjust_width = guestwidth_to_adjustwidth(guest_width);
4622 domain->agaw = width_to_agaw(adjust_width);
4624 domain->iommu_coherency = false;
4625 domain->iommu_snooping = false;
4626 domain->iommu_superpage = 0;
4627 domain->max_addr = 0;
4629 /* always allocate the top pgd */
4630 domain->pgd = alloc_pgtable_page(domain->nid);
4633 domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
4637 static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
4639 struct dmar_domain *dmar_domain;
4640 struct iommu_domain *domain;
4643 case IOMMU_DOMAIN_DMA:
4644 case IOMMU_DOMAIN_DMA_FQ:
4645 case IOMMU_DOMAIN_UNMANAGED:
4646 dmar_domain = alloc_domain(type);
4648 pr_err("Can't allocate dmar_domain\n");
4651 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
4652 pr_err("Domain initialization failed\n");
4653 domain_exit(dmar_domain);
4657 domain = &dmar_domain->domain;
4658 domain->geometry.aperture_start = 0;
4659 domain->geometry.aperture_end =
4660 __DOMAIN_MAX_ADDR(dmar_domain->gaw);
4661 domain->geometry.force_aperture = true;
4664 case IOMMU_DOMAIN_IDENTITY:
4665 return &si_domain->domain;
4673 static void intel_iommu_domain_free(struct iommu_domain *domain)
4675 if (domain != &si_domain->domain)
4676 domain_exit(to_dmar_domain(domain));
4680 * Check whether a @domain could be attached to the @dev through the
4681 * aux-domain attach/detach APIs.
4684 is_aux_domain(struct device *dev, struct iommu_domain *domain)
4686 struct device_domain_info *info = get_domain_info(dev);
4688 return info && info->auxd_enabled &&
4689 domain->type == IOMMU_DOMAIN_UNMANAGED;
4692 static inline struct subdev_domain_info *
4693 lookup_subdev_info(struct dmar_domain *domain, struct device *dev)
4695 struct subdev_domain_info *sinfo;
4697 if (!list_empty(&domain->subdevices)) {
4698 list_for_each_entry(sinfo, &domain->subdevices, link_domain) {
4699 if (sinfo->pdev == dev)
4707 static int auxiliary_link_device(struct dmar_domain *domain,
4710 struct device_domain_info *info = get_domain_info(dev);
4711 struct subdev_domain_info *sinfo = lookup_subdev_info(domain, dev);
4713 assert_spin_locked(&device_domain_lock);
4718 sinfo = kzalloc(sizeof(*sinfo), GFP_ATOMIC);
4721 sinfo->domain = domain;
4723 list_add(&sinfo->link_phys, &info->subdevices);
4724 list_add(&sinfo->link_domain, &domain->subdevices);
4727 return ++sinfo->users;
4730 static int auxiliary_unlink_device(struct dmar_domain *domain,
4733 struct device_domain_info *info = get_domain_info(dev);
4734 struct subdev_domain_info *sinfo = lookup_subdev_info(domain, dev);
4737 assert_spin_locked(&device_domain_lock);
4738 if (WARN_ON(!info || !sinfo || sinfo->users <= 0))
4741 ret = --sinfo->users;
4743 list_del(&sinfo->link_phys);
4744 list_del(&sinfo->link_domain);
4751 static int aux_domain_add_dev(struct dmar_domain *domain,
4755 unsigned long flags;
4756 struct intel_iommu *iommu;
4758 iommu = device_to_iommu(dev, NULL, NULL);
4762 if (domain->default_pasid <= 0) {
4765 /* No private data needed for the default pasid */
4766 pasid = ioasid_alloc(NULL, PASID_MIN,
4767 pci_max_pasids(to_pci_dev(dev)) - 1,
4769 if (pasid == INVALID_IOASID) {
4770 pr_err("Can't allocate default pasid\n");
4773 domain->default_pasid = pasid;
4776 spin_lock_irqsave(&device_domain_lock, flags);
4777 ret = auxiliary_link_device(domain, dev);
4782 * Subdevices from the same physical device can be attached to the
4783 * same domain. For such cases, only the first subdevice attachment
4784 * needs to go through the full steps in this function. So if ret >
4791 * iommu->lock must be held to attach domain to iommu and setup the
4792 * pasid entry for second level translation.
4794 spin_lock(&iommu->lock);
4795 ret = domain_attach_iommu(domain, iommu);
4799 /* Setup the PASID entry for mediated devices: */
4800 if (domain_use_first_level(domain))
4801 ret = domain_setup_first_level(iommu, domain, dev,
4802 domain->default_pasid);
4804 ret = intel_pasid_setup_second_level(iommu, domain, dev,
4805 domain->default_pasid);
4809 spin_unlock(&iommu->lock);
4811 spin_unlock_irqrestore(&device_domain_lock, flags);
4816 domain_detach_iommu(domain, iommu);
4818 spin_unlock(&iommu->lock);
4819 auxiliary_unlink_device(domain, dev);
4821 spin_unlock_irqrestore(&device_domain_lock, flags);
4822 if (list_empty(&domain->subdevices) && domain->default_pasid > 0)
4823 ioasid_put(domain->default_pasid);
4828 static void aux_domain_remove_dev(struct dmar_domain *domain,
4831 struct device_domain_info *info;
4832 struct intel_iommu *iommu;
4833 unsigned long flags;
4835 if (!is_aux_domain(dev, &domain->domain))
4838 spin_lock_irqsave(&device_domain_lock, flags);
4839 info = get_domain_info(dev);
4840 iommu = info->iommu;
4842 if (!auxiliary_unlink_device(domain, dev)) {
4843 spin_lock(&iommu->lock);
4844 intel_pasid_tear_down_entry(iommu, dev,
4845 domain->default_pasid, false);
4846 domain_detach_iommu(domain, iommu);
4847 spin_unlock(&iommu->lock);
4850 spin_unlock_irqrestore(&device_domain_lock, flags);
4852 if (list_empty(&domain->subdevices) && domain->default_pasid > 0)
4853 ioasid_put(domain->default_pasid);
4856 static int prepare_domain_attach_device(struct iommu_domain *domain,
4859 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4860 struct intel_iommu *iommu;
4863 iommu = device_to_iommu(dev, NULL, NULL);
4867 if ((dmar_domain->flags & DOMAIN_FLAG_NESTING_MODE) &&
4868 !ecap_nest(iommu->ecap)) {
4869 dev_err(dev, "%s: iommu not support nested translation\n",
4874 /* check if this iommu agaw is sufficient for max mapped address */
4875 addr_width = agaw_to_width(iommu->agaw);
4876 if (addr_width > cap_mgaw(iommu->cap))
4877 addr_width = cap_mgaw(iommu->cap);
4879 if (dmar_domain->max_addr > (1LL << addr_width)) {
4880 dev_err(dev, "%s: iommu width (%d) is not "
4881 "sufficient for the mapped address (%llx)\n",
4882 __func__, addr_width, dmar_domain->max_addr);
4885 dmar_domain->gaw = addr_width;
4888 * Knock out extra levels of page tables if necessary
4890 while (iommu->agaw < dmar_domain->agaw) {
4891 struct dma_pte *pte;
4893 pte = dmar_domain->pgd;
4894 if (dma_pte_present(pte)) {
4895 dmar_domain->pgd = phys_to_virt(dma_pte_addr(pte));
4896 free_pgtable_page(pte);
4898 dmar_domain->agaw--;
4904 static int intel_iommu_attach_device(struct iommu_domain *domain,
4909 if (domain->type == IOMMU_DOMAIN_UNMANAGED &&
4910 device_is_rmrr_locked(dev)) {
4911 dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement. Contact your platform vendor.\n");
4915 if (is_aux_domain(dev, domain))
4918 /* normally dev is not mapped */
4919 if (unlikely(domain_context_mapped(dev))) {
4920 struct dmar_domain *old_domain;
4922 old_domain = find_domain(dev);
4924 dmar_remove_one_dev_info(dev);
4927 ret = prepare_domain_attach_device(domain, dev);
4931 return domain_add_dev_info(to_dmar_domain(domain), dev);
4934 static int intel_iommu_aux_attach_device(struct iommu_domain *domain,
4939 if (!is_aux_domain(dev, domain))
4942 ret = prepare_domain_attach_device(domain, dev);
4946 return aux_domain_add_dev(to_dmar_domain(domain), dev);
4949 static void intel_iommu_detach_device(struct iommu_domain *domain,
4952 dmar_remove_one_dev_info(dev);
4955 static void intel_iommu_aux_detach_device(struct iommu_domain *domain,
4958 aux_domain_remove_dev(to_dmar_domain(domain), dev);
4961 #ifdef CONFIG_INTEL_IOMMU_SVM
4963 * 2D array for converting and sanitizing IOMMU generic TLB granularity to
4964 * VT-d granularity. Invalidation is typically included in the unmap operation
4965 * as a result of DMA or VFIO unmap. However, for assigned devices guest
4966 * owns the first level page tables. Invalidations of translation caches in the
4967 * guest are trapped and passed down to the host.
4969 * vIOMMU in the guest will only expose first level page tables, therefore
4970 * we do not support IOTLB granularity for request without PASID (second level).
4972 * For example, to find the VT-d granularity encoding for IOTLB
4973 * type and page selective granularity within PASID:
4974 * X: indexed by iommu cache type
4975 * Y: indexed by enum iommu_inv_granularity
4976 * [IOMMU_CACHE_INV_TYPE_IOTLB][IOMMU_INV_GRANU_ADDR]
4980 inv_type_granu_table[IOMMU_CACHE_INV_TYPE_NR][IOMMU_INV_GRANU_NR] = {
4982 * PASID based IOTLB invalidation: PASID selective (per PASID),
4983 * page selective (address granularity)
4985 {-EINVAL, QI_GRAN_NONG_PASID, QI_GRAN_PSI_PASID},
4986 /* PASID based dev TLBs */
4987 {-EINVAL, -EINVAL, QI_DEV_IOTLB_GRAN_PASID_SEL},
4989 {-EINVAL, -EINVAL, -EINVAL}
4992 static inline int to_vtd_granularity(int type, int granu)
4994 return inv_type_granu_table[type][granu];
4997 static inline u64 to_vtd_size(u64 granu_size, u64 nr_granules)
4999 u64 nr_pages = (granu_size * nr_granules) >> VTD_PAGE_SHIFT;
5001 /* VT-d size is encoded as 2^size of 4K pages, 0 for 4k, 9 for 2MB, etc.
5002 * IOMMU cache invalidate API passes granu_size in bytes, and number of
5003 * granu size in contiguous memory.
5005 return order_base_2(nr_pages);
5009 intel_iommu_sva_invalidate(struct iommu_domain *domain, struct device *dev,
5010 struct iommu_cache_invalidate_info *inv_info)
5012 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5013 struct device_domain_info *info;
5014 struct intel_iommu *iommu;
5015 unsigned long flags;
5022 if (!inv_info || !dmar_domain)
5025 if (!dev || !dev_is_pci(dev))
5028 iommu = device_to_iommu(dev, &bus, &devfn);
5032 if (!(dmar_domain->flags & DOMAIN_FLAG_NESTING_MODE))
5035 spin_lock_irqsave(&device_domain_lock, flags);
5036 spin_lock(&iommu->lock);
5037 info = get_domain_info(dev);
5042 did = dmar_domain->iommu_did[iommu->seq_id];
5043 sid = PCI_DEVID(bus, devfn);
5045 /* Size is only valid in address selective invalidation */
5046 if (inv_info->granularity == IOMMU_INV_GRANU_ADDR)
5047 size = to_vtd_size(inv_info->granu.addr_info.granule_size,
5048 inv_info->granu.addr_info.nb_granules);
5050 for_each_set_bit(cache_type,
5051 (unsigned long *)&inv_info->cache,
5052 IOMMU_CACHE_INV_TYPE_NR) {
5057 granu = to_vtd_granularity(cache_type, inv_info->granularity);
5058 if (granu == -EINVAL) {
5059 pr_err_ratelimited("Invalid cache type and granu combination %d/%d\n",
5060 cache_type, inv_info->granularity);
5065 * PASID is stored in different locations based on the
5068 if (inv_info->granularity == IOMMU_INV_GRANU_PASID &&
5069 (inv_info->granu.pasid_info.flags & IOMMU_INV_PASID_FLAGS_PASID))
5070 pasid = inv_info->granu.pasid_info.pasid;
5071 else if (inv_info->granularity == IOMMU_INV_GRANU_ADDR &&
5072 (inv_info->granu.addr_info.flags & IOMMU_INV_ADDR_FLAGS_PASID))
5073 pasid = inv_info->granu.addr_info.pasid;
5075 switch (BIT(cache_type)) {
5076 case IOMMU_CACHE_INV_TYPE_IOTLB:
5077 /* HW will ignore LSB bits based on address mask */
5078 if (inv_info->granularity == IOMMU_INV_GRANU_ADDR &&
5080 (inv_info->granu.addr_info.addr & ((BIT(VTD_PAGE_SHIFT + size)) - 1))) {
5081 pr_err_ratelimited("User address not aligned, 0x%llx, size order %llu\n",
5082 inv_info->granu.addr_info.addr, size);
5086 * If granu is PASID-selective, address is ignored.
5087 * We use npages = -1 to indicate that.
5089 qi_flush_piotlb(iommu, did, pasid,
5090 mm_to_dma_pfn(inv_info->granu.addr_info.addr),
5091 (granu == QI_GRAN_NONG_PASID) ? -1 : 1 << size,
5092 inv_info->granu.addr_info.flags & IOMMU_INV_ADDR_FLAGS_LEAF);
5094 if (!info->ats_enabled)
5097 * Always flush device IOTLB if ATS is enabled. vIOMMU
5098 * in the guest may assume IOTLB flush is inclusive,
5099 * which is more efficient.
5102 case IOMMU_CACHE_INV_TYPE_DEV_IOTLB:
5104 * PASID based device TLB invalidation does not support
5105 * IOMMU_INV_GRANU_PASID granularity but only supports
5106 * IOMMU_INV_GRANU_ADDR.
5107 * The equivalent of that is we set the size to be the
5108 * entire range of 64 bit. User only provides PASID info
5109 * without address info. So we set addr to 0.
5111 if (inv_info->granularity == IOMMU_INV_GRANU_PASID) {
5112 size = 64 - VTD_PAGE_SHIFT;
5114 } else if (inv_info->granularity == IOMMU_INV_GRANU_ADDR) {
5115 addr = inv_info->granu.addr_info.addr;
5118 if (info->ats_enabled)
5119 qi_flush_dev_iotlb_pasid(iommu, sid,
5121 info->ats_qdep, addr,
5124 pr_warn_ratelimited("Passdown device IOTLB flush w/o ATS!\n");
5127 dev_err_ratelimited(dev, "Unsupported IOMMU invalidation type %d\n",
5133 spin_unlock(&iommu->lock);
5134 spin_unlock_irqrestore(&device_domain_lock, flags);
5140 static int intel_iommu_map(struct iommu_domain *domain,
5141 unsigned long iova, phys_addr_t hpa,
5142 size_t size, int iommu_prot, gfp_t gfp)
5144 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5148 if (iommu_prot & IOMMU_READ)
5149 prot |= DMA_PTE_READ;
5150 if (iommu_prot & IOMMU_WRITE)
5151 prot |= DMA_PTE_WRITE;
5152 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
5153 prot |= DMA_PTE_SNP;
5155 max_addr = iova + size;
5156 if (dmar_domain->max_addr < max_addr) {
5159 /* check if minimum agaw is sufficient for mapped address */
5160 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
5161 if (end < max_addr) {
5162 pr_err("%s: iommu width (%d) is not "
5163 "sufficient for the mapped address (%llx)\n",
5164 __func__, dmar_domain->gaw, max_addr);
5167 dmar_domain->max_addr = max_addr;
5169 /* Round up size to next multiple of PAGE_SIZE, if it and
5170 the low bits of hpa would take us onto the next page */
5171 size = aligned_nrpages(hpa, size);
5172 return __domain_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
5173 hpa >> VTD_PAGE_SHIFT, size, prot);
5176 static int intel_iommu_map_pages(struct iommu_domain *domain,
5177 unsigned long iova, phys_addr_t paddr,
5178 size_t pgsize, size_t pgcount,
5179 int prot, gfp_t gfp, size_t *mapped)
5181 unsigned long pgshift = __ffs(pgsize);
5182 size_t size = pgcount << pgshift;
5185 if (pgsize != SZ_4K && pgsize != SZ_2M && pgsize != SZ_1G)
5188 if (!IS_ALIGNED(iova | paddr, pgsize))
5191 ret = intel_iommu_map(domain, iova, paddr, size, prot, gfp);
5198 static size_t intel_iommu_unmap(struct iommu_domain *domain,
5199 unsigned long iova, size_t size,
5200 struct iommu_iotlb_gather *gather)
5202 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5203 unsigned long start_pfn, last_pfn;
5206 /* Cope with horrid API which requires us to unmap more than the
5207 size argument if it happens to be a large-page mapping. */
5208 BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level));
5210 if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
5211 size = VTD_PAGE_SIZE << level_to_offset_bits(level);
5213 start_pfn = iova >> VTD_PAGE_SHIFT;
5214 last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
5216 gather->freelist = domain_unmap(dmar_domain, start_pfn,
5217 last_pfn, gather->freelist);
5219 if (dmar_domain->max_addr == iova + size)
5220 dmar_domain->max_addr = iova;
5222 iommu_iotlb_gather_add_page(domain, gather, iova, size);
5227 static size_t intel_iommu_unmap_pages(struct iommu_domain *domain,
5229 size_t pgsize, size_t pgcount,
5230 struct iommu_iotlb_gather *gather)
5232 unsigned long pgshift = __ffs(pgsize);
5233 size_t size = pgcount << pgshift;
5235 return intel_iommu_unmap(domain, iova, size, gather);
5238 static void intel_iommu_tlb_sync(struct iommu_domain *domain,
5239 struct iommu_iotlb_gather *gather)
5241 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5242 unsigned long iova_pfn = IOVA_PFN(gather->start);
5243 size_t size = gather->end - gather->start;
5244 unsigned long start_pfn;
5245 unsigned long nrpages;
5248 nrpages = aligned_nrpages(gather->start, size);
5249 start_pfn = mm_to_dma_pfn(iova_pfn);
5251 for_each_domain_iommu(iommu_id, dmar_domain)
5252 iommu_flush_iotlb_psi(g_iommus[iommu_id], dmar_domain,
5253 start_pfn, nrpages, !gather->freelist, 0);
5255 dma_free_pagelist(gather->freelist);
5258 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
5261 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5262 struct dma_pte *pte;
5266 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
5267 if (pte && dma_pte_present(pte))
5268 phys = dma_pte_addr(pte) +
5269 (iova & (BIT_MASK(level_to_offset_bits(level) +
5270 VTD_PAGE_SHIFT) - 1));
5275 static bool intel_iommu_capable(enum iommu_cap cap)
5277 if (cap == IOMMU_CAP_CACHE_COHERENCY)
5278 return domain_update_iommu_snooping(NULL);
5279 if (cap == IOMMU_CAP_INTR_REMAP)
5280 return irq_remapping_enabled == 1;
5285 static struct iommu_device *intel_iommu_probe_device(struct device *dev)
5287 struct intel_iommu *iommu;
5289 iommu = device_to_iommu(dev, NULL, NULL);
5291 return ERR_PTR(-ENODEV);
5293 if (translation_pre_enabled(iommu))
5294 dev_iommu_priv_set(dev, DEFER_DEVICE_DOMAIN_INFO);
5296 return &iommu->iommu;
5299 static void intel_iommu_release_device(struct device *dev)
5301 struct intel_iommu *iommu;
5303 iommu = device_to_iommu(dev, NULL, NULL);
5307 dmar_remove_one_dev_info(dev);
5309 set_dma_ops(dev, NULL);
5312 static void intel_iommu_probe_finalize(struct device *dev)
5314 set_dma_ops(dev, NULL);
5315 iommu_setup_dma_ops(dev, 0, U64_MAX);
5318 static void intel_iommu_get_resv_regions(struct device *device,
5319 struct list_head *head)
5321 int prot = DMA_PTE_READ | DMA_PTE_WRITE;
5322 struct iommu_resv_region *reg;
5323 struct dmar_rmrr_unit *rmrr;
5324 struct device *i_dev;
5327 down_read(&dmar_global_lock);
5328 for_each_rmrr_units(rmrr) {
5329 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
5331 struct iommu_resv_region *resv;
5332 enum iommu_resv_type type;
5335 if (i_dev != device &&
5336 !is_downstream_to_pci_bridge(device, i_dev))
5339 length = rmrr->end_address - rmrr->base_address + 1;
5341 type = device_rmrr_is_relaxable(device) ?
5342 IOMMU_RESV_DIRECT_RELAXABLE : IOMMU_RESV_DIRECT;
5344 resv = iommu_alloc_resv_region(rmrr->base_address,
5345 length, prot, type);
5349 list_add_tail(&resv->list, head);
5352 up_read(&dmar_global_lock);
5354 #ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
5355 if (dev_is_pci(device)) {
5356 struct pci_dev *pdev = to_pci_dev(device);
5358 if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) {
5359 reg = iommu_alloc_resv_region(0, 1UL << 24, prot,
5360 IOMMU_RESV_DIRECT_RELAXABLE);
5362 list_add_tail(®->list, head);
5365 #endif /* CONFIG_INTEL_IOMMU_FLOPPY_WA */
5367 reg = iommu_alloc_resv_region(IOAPIC_RANGE_START,
5368 IOAPIC_RANGE_END - IOAPIC_RANGE_START + 1,
5372 list_add_tail(®->list, head);
5375 int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev)
5377 struct device_domain_info *info;
5378 struct context_entry *context;
5379 struct dmar_domain *domain;
5380 unsigned long flags;
5384 domain = find_domain(dev);
5388 spin_lock_irqsave(&device_domain_lock, flags);
5389 spin_lock(&iommu->lock);
5392 info = get_domain_info(dev);
5393 if (!info || !info->pasid_supported)
5396 context = iommu_context_addr(iommu, info->bus, info->devfn, 0);
5397 if (WARN_ON(!context))
5400 ctx_lo = context[0].lo;
5402 if (!(ctx_lo & CONTEXT_PASIDE)) {
5403 ctx_lo |= CONTEXT_PASIDE;
5404 context[0].lo = ctx_lo;
5406 iommu->flush.flush_context(iommu,
5407 domain->iommu_did[iommu->seq_id],
5408 PCI_DEVID(info->bus, info->devfn),
5409 DMA_CCMD_MASK_NOBIT,
5410 DMA_CCMD_DEVICE_INVL);
5413 /* Enable PASID support in the device, if it wasn't already */
5414 if (!info->pasid_enabled)
5415 iommu_enable_dev_iotlb(info);
5420 spin_unlock(&iommu->lock);
5421 spin_unlock_irqrestore(&device_domain_lock, flags);
5426 static struct iommu_group *intel_iommu_device_group(struct device *dev)
5428 if (dev_is_pci(dev))
5429 return pci_device_group(dev);
5430 return generic_device_group(dev);
5433 static int intel_iommu_enable_auxd(struct device *dev)
5435 struct device_domain_info *info;
5436 struct intel_iommu *iommu;
5437 unsigned long flags;
5440 iommu = device_to_iommu(dev, NULL, NULL);
5441 if (!iommu || dmar_disabled)
5444 if (!sm_supported(iommu) || !pasid_supported(iommu))
5447 ret = intel_iommu_enable_pasid(iommu, dev);
5451 spin_lock_irqsave(&device_domain_lock, flags);
5452 info = get_domain_info(dev);
5453 info->auxd_enabled = 1;
5454 spin_unlock_irqrestore(&device_domain_lock, flags);
5459 static int intel_iommu_disable_auxd(struct device *dev)
5461 struct device_domain_info *info;
5462 unsigned long flags;
5464 spin_lock_irqsave(&device_domain_lock, flags);
5465 info = get_domain_info(dev);
5466 if (!WARN_ON(!info))
5467 info->auxd_enabled = 0;
5468 spin_unlock_irqrestore(&device_domain_lock, flags);
5473 static int intel_iommu_enable_sva(struct device *dev)
5475 struct device_domain_info *info = get_domain_info(dev);
5476 struct intel_iommu *iommu;
5479 if (!info || dmar_disabled)
5482 iommu = info->iommu;
5486 if (!(iommu->flags & VTD_FLAG_SVM_CAPABLE))
5489 if (intel_iommu_enable_pasid(iommu, dev))
5492 if (!info->pasid_enabled || !info->pri_enabled || !info->ats_enabled)
5495 ret = iopf_queue_add_device(iommu->iopf_queue, dev);
5497 ret = iommu_register_device_fault_handler(dev, iommu_queue_iopf, dev);
5502 static int intel_iommu_disable_sva(struct device *dev)
5504 struct device_domain_info *info = get_domain_info(dev);
5505 struct intel_iommu *iommu = info->iommu;
5508 ret = iommu_unregister_device_fault_handler(dev);
5510 ret = iopf_queue_remove_device(iommu->iopf_queue, dev);
5515 static int intel_iommu_enable_iopf(struct device *dev)
5517 struct device_domain_info *info = get_domain_info(dev);
5519 if (info && info->pri_supported)
5526 intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat)
5529 case IOMMU_DEV_FEAT_AUX:
5530 return intel_iommu_enable_auxd(dev);
5532 case IOMMU_DEV_FEAT_IOPF:
5533 return intel_iommu_enable_iopf(dev);
5535 case IOMMU_DEV_FEAT_SVA:
5536 return intel_iommu_enable_sva(dev);
5544 intel_iommu_dev_disable_feat(struct device *dev, enum iommu_dev_features feat)
5547 case IOMMU_DEV_FEAT_AUX:
5548 return intel_iommu_disable_auxd(dev);
5550 case IOMMU_DEV_FEAT_IOPF:
5553 case IOMMU_DEV_FEAT_SVA:
5554 return intel_iommu_disable_sva(dev);
5562 intel_iommu_dev_feat_enabled(struct device *dev, enum iommu_dev_features feat)
5564 struct device_domain_info *info = get_domain_info(dev);
5566 if (feat == IOMMU_DEV_FEAT_AUX)
5567 return scalable_mode_support() && info && info->auxd_enabled;
5573 intel_iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev)
5575 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5577 return dmar_domain->default_pasid > 0 ?
5578 dmar_domain->default_pasid : -EINVAL;
5581 static bool intel_iommu_is_attach_deferred(struct iommu_domain *domain,
5584 return attach_deferred(dev);
5588 intel_iommu_enable_nesting(struct iommu_domain *domain)
5590 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5591 unsigned long flags;
5594 spin_lock_irqsave(&device_domain_lock, flags);
5595 if (list_empty(&dmar_domain->devices)) {
5596 dmar_domain->flags |= DOMAIN_FLAG_NESTING_MODE;
5597 dmar_domain->flags &= ~DOMAIN_FLAG_USE_FIRST_LEVEL;
5600 spin_unlock_irqrestore(&device_domain_lock, flags);
5606 * Check that the device does not live on an external facing PCI port that is
5607 * marked as untrusted. Such devices should not be able to apply quirks and
5608 * thus not be able to bypass the IOMMU restrictions.
5610 static bool risky_device(struct pci_dev *pdev)
5612 if (pdev->untrusted) {
5614 "Skipping IOMMU quirk for dev [%04X:%04X] on untrusted PCI link\n",
5615 pdev->vendor, pdev->device);
5616 pci_info(pdev, "Please check with your BIOS/Platform vendor about this\n");
5622 static void intel_iommu_iotlb_sync_map(struct iommu_domain *domain,
5623 unsigned long iova, size_t size)
5625 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5626 unsigned long pages = aligned_nrpages(iova, size);
5627 unsigned long pfn = iova >> VTD_PAGE_SHIFT;
5628 struct intel_iommu *iommu;
5631 for_each_domain_iommu(iommu_id, dmar_domain) {
5632 iommu = g_iommus[iommu_id];
5633 __mapping_notify_one(iommu, dmar_domain, pfn, pages);
5637 const struct iommu_ops intel_iommu_ops = {
5638 .capable = intel_iommu_capable,
5639 .domain_alloc = intel_iommu_domain_alloc,
5640 .domain_free = intel_iommu_domain_free,
5641 .enable_nesting = intel_iommu_enable_nesting,
5642 .attach_dev = intel_iommu_attach_device,
5643 .detach_dev = intel_iommu_detach_device,
5644 .aux_attach_dev = intel_iommu_aux_attach_device,
5645 .aux_detach_dev = intel_iommu_aux_detach_device,
5646 .aux_get_pasid = intel_iommu_aux_get_pasid,
5647 .map_pages = intel_iommu_map_pages,
5648 .unmap_pages = intel_iommu_unmap_pages,
5649 .iotlb_sync_map = intel_iommu_iotlb_sync_map,
5650 .flush_iotlb_all = intel_flush_iotlb_all,
5651 .iotlb_sync = intel_iommu_tlb_sync,
5652 .iova_to_phys = intel_iommu_iova_to_phys,
5653 .probe_device = intel_iommu_probe_device,
5654 .probe_finalize = intel_iommu_probe_finalize,
5655 .release_device = intel_iommu_release_device,
5656 .get_resv_regions = intel_iommu_get_resv_regions,
5657 .put_resv_regions = generic_iommu_put_resv_regions,
5658 .device_group = intel_iommu_device_group,
5659 .dev_feat_enabled = intel_iommu_dev_feat_enabled,
5660 .dev_enable_feat = intel_iommu_dev_enable_feat,
5661 .dev_disable_feat = intel_iommu_dev_disable_feat,
5662 .is_attach_deferred = intel_iommu_is_attach_deferred,
5663 .def_domain_type = device_def_domain_type,
5664 .pgsize_bitmap = SZ_4K,
5665 #ifdef CONFIG_INTEL_IOMMU_SVM
5666 .cache_invalidate = intel_iommu_sva_invalidate,
5667 .sva_bind_gpasid = intel_svm_bind_gpasid,
5668 .sva_unbind_gpasid = intel_svm_unbind_gpasid,
5669 .sva_bind = intel_svm_bind,
5670 .sva_unbind = intel_svm_unbind,
5671 .sva_get_pasid = intel_svm_get_pasid,
5672 .page_response = intel_svm_page_response,
5676 static void quirk_iommu_igfx(struct pci_dev *dev)
5678 if (risky_device(dev))
5681 pci_info(dev, "Disabling IOMMU for graphics on this chipset\n");
5685 /* G4x/GM45 integrated gfx dmar support is totally busted. */
5686 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_igfx);
5687 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_igfx);
5688 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_igfx);
5689 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_igfx);
5690 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_igfx);
5691 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_igfx);
5692 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_igfx);
5694 /* Broadwell igfx malfunctions with dmar */
5695 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1606, quirk_iommu_igfx);
5696 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160B, quirk_iommu_igfx);
5697 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160E, quirk_iommu_igfx);
5698 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1602, quirk_iommu_igfx);
5699 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160A, quirk_iommu_igfx);
5700 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160D, quirk_iommu_igfx);
5701 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1616, quirk_iommu_igfx);
5702 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161B, quirk_iommu_igfx);
5703 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161E, quirk_iommu_igfx);
5704 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1612, quirk_iommu_igfx);
5705 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161A, quirk_iommu_igfx);
5706 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161D, quirk_iommu_igfx);
5707 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1626, quirk_iommu_igfx);
5708 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162B, quirk_iommu_igfx);
5709 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162E, quirk_iommu_igfx);
5710 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1622, quirk_iommu_igfx);
5711 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162A, quirk_iommu_igfx);
5712 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162D, quirk_iommu_igfx);
5713 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1636, quirk_iommu_igfx);
5714 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163B, quirk_iommu_igfx);
5715 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163E, quirk_iommu_igfx);
5716 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1632, quirk_iommu_igfx);
5717 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163A, quirk_iommu_igfx);
5718 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163D, quirk_iommu_igfx);
5720 static void quirk_iommu_rwbf(struct pci_dev *dev)
5722 if (risky_device(dev))
5726 * Mobile 4 Series Chipset neglects to set RWBF capability,
5727 * but needs it. Same seems to hold for the desktop versions.
5729 pci_info(dev, "Forcing write-buffer flush capability\n");
5733 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
5734 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
5735 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
5736 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
5737 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
5738 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
5739 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
5742 #define GGC_MEMORY_SIZE_MASK (0xf << 8)
5743 #define GGC_MEMORY_SIZE_NONE (0x0 << 8)
5744 #define GGC_MEMORY_SIZE_1M (0x1 << 8)
5745 #define GGC_MEMORY_SIZE_2M (0x3 << 8)
5746 #define GGC_MEMORY_VT_ENABLED (0x8 << 8)
5747 #define GGC_MEMORY_SIZE_2M_VT (0x9 << 8)
5748 #define GGC_MEMORY_SIZE_3M_VT (0xa << 8)
5749 #define GGC_MEMORY_SIZE_4M_VT (0xb << 8)
5751 static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
5755 if (risky_device(dev))
5758 if (pci_read_config_word(dev, GGC, &ggc))
5761 if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
5762 pci_info(dev, "BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
5764 } else if (dmar_map_gfx) {
5765 /* we have to ensure the gfx device is idle before we flush */
5766 pci_info(dev, "Disabling batched IOTLB flush on Ironlake\n");
5767 iommu_set_dma_strict();
5770 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
5771 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
5772 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
5773 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
5775 static void quirk_igfx_skip_te_disable(struct pci_dev *dev)
5779 if (!IS_GFX_DEVICE(dev))
5782 ver = (dev->device >> 8) & 0xff;
5783 if (ver != 0x45 && ver != 0x46 && ver != 0x4c &&
5784 ver != 0x4e && ver != 0x8a && ver != 0x98 &&
5788 if (risky_device(dev))
5791 pci_info(dev, "Skip IOMMU disabling for graphics\n");
5792 iommu_skip_te_disable = 1;
5794 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, quirk_igfx_skip_te_disable);
5796 /* On Tylersburg chipsets, some BIOSes have been known to enable the
5797 ISOCH DMAR unit for the Azalia sound device, but not give it any
5798 TLB entries, which causes it to deadlock. Check for that. We do
5799 this in a function called from init_dmars(), instead of in a PCI
5800 quirk, because we don't want to print the obnoxious "BIOS broken"
5801 message if VT-d is actually disabled.
5803 static void __init check_tylersburg_isoch(void)
5805 struct pci_dev *pdev;
5806 uint32_t vtisochctrl;
5808 /* If there's no Azalia in the system anyway, forget it. */
5809 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
5813 if (risky_device(pdev)) {
5820 /* System Management Registers. Might be hidden, in which case
5821 we can't do the sanity check. But that's OK, because the
5822 known-broken BIOSes _don't_ actually hide it, so far. */
5823 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
5827 if (risky_device(pdev)) {
5832 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
5839 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
5840 if (vtisochctrl & 1)
5843 /* Drop all bits other than the number of TLB entries */
5844 vtisochctrl &= 0x1c;
5846 /* If we have the recommended number of TLB entries (16), fine. */
5847 if (vtisochctrl == 0x10)
5850 /* Zero TLB entries? You get to ride the short bus to school. */
5852 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
5853 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
5854 dmi_get_system_info(DMI_BIOS_VENDOR),
5855 dmi_get_system_info(DMI_BIOS_VERSION),
5856 dmi_get_system_info(DMI_PRODUCT_VERSION));
5857 iommu_identity_mapping |= IDENTMAP_AZALIA;
5861 pr_warn("Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",