]> Git Repo - linux.git/blob - drivers/iommu/intel/iommu.c
Merge tag 'net-5.16-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
[linux.git] / drivers / iommu / intel / iommu.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright © 2006-2014 Intel Corporation.
4  *
5  * Authors: David Woodhouse <[email protected]>,
6  *          Ashok Raj <[email protected]>,
7  *          Shaohua Li <[email protected]>,
8  *          Anil S Keshavamurthy <[email protected]>,
9  *          Fenghua Yu <[email protected]>
10  *          Joerg Roedel <[email protected]>
11  */
12
13 #define pr_fmt(fmt)     "DMAR: " fmt
14 #define dev_fmt(fmt)    pr_fmt(fmt)
15
16 #include <linux/init.h>
17 #include <linux/bitmap.h>
18 #include <linux/debugfs.h>
19 #include <linux/export.h>
20 #include <linux/slab.h>
21 #include <linux/irq.h>
22 #include <linux/interrupt.h>
23 #include <linux/spinlock.h>
24 #include <linux/pci.h>
25 #include <linux/dmar.h>
26 #include <linux/dma-map-ops.h>
27 #include <linux/mempool.h>
28 #include <linux/memory.h>
29 #include <linux/cpu.h>
30 #include <linux/timer.h>
31 #include <linux/io.h>
32 #include <linux/iova.h>
33 #include <linux/iommu.h>
34 #include <linux/dma-iommu.h>
35 #include <linux/intel-iommu.h>
36 #include <linux/intel-svm.h>
37 #include <linux/syscore_ops.h>
38 #include <linux/tboot.h>
39 #include <linux/dmi.h>
40 #include <linux/pci-ats.h>
41 #include <linux/memblock.h>
42 #include <linux/dma-direct.h>
43 #include <linux/crash_dump.h>
44 #include <linux/numa.h>
45 #include <asm/irq_remapping.h>
46 #include <asm/cacheflush.h>
47 #include <asm/iommu.h>
48
49 #include "../irq_remapping.h"
50 #include "../iommu-sva-lib.h"
51 #include "pasid.h"
52 #include "cap_audit.h"
53
54 #define ROOT_SIZE               VTD_PAGE_SIZE
55 #define CONTEXT_SIZE            VTD_PAGE_SIZE
56
57 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
58 #define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
59 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
60 #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
61
62 #define IOAPIC_RANGE_START      (0xfee00000)
63 #define IOAPIC_RANGE_END        (0xfeefffff)
64 #define IOVA_START_ADDR         (0x1000)
65
66 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 57
67
68 #define MAX_AGAW_WIDTH 64
69 #define MAX_AGAW_PFN_WIDTH      (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
70
71 #define __DOMAIN_MAX_PFN(gaw)  ((((uint64_t)1) << ((gaw) - VTD_PAGE_SHIFT)) - 1)
72 #define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << (gaw)) - 1)
73
74 /* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
75    to match. That way, we can use 'unsigned long' for PFNs with impunity. */
76 #define DOMAIN_MAX_PFN(gaw)     ((unsigned long) min_t(uint64_t, \
77                                 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
78 #define DOMAIN_MAX_ADDR(gaw)    (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
79
80 /* IO virtual address start page frame number */
81 #define IOVA_START_PFN          (1)
82
83 #define IOVA_PFN(addr)          ((addr) >> PAGE_SHIFT)
84
85 /* page table handling */
86 #define LEVEL_STRIDE            (9)
87 #define LEVEL_MASK              (((u64)1 << LEVEL_STRIDE) - 1)
88
89 static inline int agaw_to_level(int agaw)
90 {
91         return agaw + 2;
92 }
93
94 static inline int agaw_to_width(int agaw)
95 {
96         return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
97 }
98
99 static inline int width_to_agaw(int width)
100 {
101         return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
102 }
103
104 static inline unsigned int level_to_offset_bits(int level)
105 {
106         return (level - 1) * LEVEL_STRIDE;
107 }
108
109 static inline int pfn_level_offset(u64 pfn, int level)
110 {
111         return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
112 }
113
114 static inline u64 level_mask(int level)
115 {
116         return -1ULL << level_to_offset_bits(level);
117 }
118
119 static inline u64 level_size(int level)
120 {
121         return 1ULL << level_to_offset_bits(level);
122 }
123
124 static inline u64 align_to_level(u64 pfn, int level)
125 {
126         return (pfn + level_size(level) - 1) & level_mask(level);
127 }
128
129 static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
130 {
131         return 1UL << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
132 }
133
134 /* VT-d pages must always be _smaller_ than MM pages. Otherwise things
135    are never going to work. */
136 static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
137 {
138         return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
139 }
140
141 static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
142 {
143         return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
144 }
145 static inline unsigned long page_to_dma_pfn(struct page *pg)
146 {
147         return mm_to_dma_pfn(page_to_pfn(pg));
148 }
149 static inline unsigned long virt_to_dma_pfn(void *p)
150 {
151         return page_to_dma_pfn(virt_to_page(p));
152 }
153
154 /* global iommu list, set NULL for ignored DMAR units */
155 static struct intel_iommu **g_iommus;
156
157 static void __init check_tylersburg_isoch(void);
158 static int rwbf_quirk;
159 static inline struct device_domain_info *
160 dmar_search_domain_by_dev_info(int segment, int bus, int devfn);
161
162 /*
163  * set to 1 to panic kernel if can't successfully enable VT-d
164  * (used when kernel is launched w/ TXT)
165  */
166 static int force_on = 0;
167 static int intel_iommu_tboot_noforce;
168 static int no_platform_optin;
169
170 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
171
172 /*
173  * Take a root_entry and return the Lower Context Table Pointer (LCTP)
174  * if marked present.
175  */
176 static phys_addr_t root_entry_lctp(struct root_entry *re)
177 {
178         if (!(re->lo & 1))
179                 return 0;
180
181         return re->lo & VTD_PAGE_MASK;
182 }
183
184 /*
185  * Take a root_entry and return the Upper Context Table Pointer (UCTP)
186  * if marked present.
187  */
188 static phys_addr_t root_entry_uctp(struct root_entry *re)
189 {
190         if (!(re->hi & 1))
191                 return 0;
192
193         return re->hi & VTD_PAGE_MASK;
194 }
195
196 static inline void context_clear_pasid_enable(struct context_entry *context)
197 {
198         context->lo &= ~(1ULL << 11);
199 }
200
201 static inline bool context_pasid_enabled(struct context_entry *context)
202 {
203         return !!(context->lo & (1ULL << 11));
204 }
205
206 static inline void context_set_copied(struct context_entry *context)
207 {
208         context->hi |= (1ull << 3);
209 }
210
211 static inline bool context_copied(struct context_entry *context)
212 {
213         return !!(context->hi & (1ULL << 3));
214 }
215
216 static inline bool __context_present(struct context_entry *context)
217 {
218         return (context->lo & 1);
219 }
220
221 bool context_present(struct context_entry *context)
222 {
223         return context_pasid_enabled(context) ?
224              __context_present(context) :
225              __context_present(context) && !context_copied(context);
226 }
227
228 static inline void context_set_present(struct context_entry *context)
229 {
230         context->lo |= 1;
231 }
232
233 static inline void context_set_fault_enable(struct context_entry *context)
234 {
235         context->lo &= (((u64)-1) << 2) | 1;
236 }
237
238 static inline void context_set_translation_type(struct context_entry *context,
239                                                 unsigned long value)
240 {
241         context->lo &= (((u64)-1) << 4) | 3;
242         context->lo |= (value & 3) << 2;
243 }
244
245 static inline void context_set_address_root(struct context_entry *context,
246                                             unsigned long value)
247 {
248         context->lo &= ~VTD_PAGE_MASK;
249         context->lo |= value & VTD_PAGE_MASK;
250 }
251
252 static inline void context_set_address_width(struct context_entry *context,
253                                              unsigned long value)
254 {
255         context->hi |= value & 7;
256 }
257
258 static inline void context_set_domain_id(struct context_entry *context,
259                                          unsigned long value)
260 {
261         context->hi |= (value & ((1 << 16) - 1)) << 8;
262 }
263
264 static inline int context_domain_id(struct context_entry *c)
265 {
266         return((c->hi >> 8) & 0xffff);
267 }
268
269 static inline void context_clear_entry(struct context_entry *context)
270 {
271         context->lo = 0;
272         context->hi = 0;
273 }
274
275 /*
276  * This domain is a statically identity mapping domain.
277  *      1. This domain creats a static 1:1 mapping to all usable memory.
278  *      2. It maps to each iommu if successful.
279  *      3. Each iommu mapps to this domain if successful.
280  */
281 static struct dmar_domain *si_domain;
282 static int hw_pass_through = 1;
283
284 #define for_each_domain_iommu(idx, domain)                      \
285         for (idx = 0; idx < g_num_of_iommus; idx++)             \
286                 if (domain->iommu_refcnt[idx])
287
288 struct dmar_rmrr_unit {
289         struct list_head list;          /* list of rmrr units   */
290         struct acpi_dmar_header *hdr;   /* ACPI header          */
291         u64     base_address;           /* reserved base address*/
292         u64     end_address;            /* reserved end address */
293         struct dmar_dev_scope *devices; /* target devices */
294         int     devices_cnt;            /* target device count */
295 };
296
297 struct dmar_atsr_unit {
298         struct list_head list;          /* list of ATSR units */
299         struct acpi_dmar_header *hdr;   /* ACPI header */
300         struct dmar_dev_scope *devices; /* target devices */
301         int devices_cnt;                /* target device count */
302         u8 include_all:1;               /* include all ports */
303 };
304
305 struct dmar_satc_unit {
306         struct list_head list;          /* list of SATC units */
307         struct acpi_dmar_header *hdr;   /* ACPI header */
308         struct dmar_dev_scope *devices; /* target devices */
309         struct intel_iommu *iommu;      /* the corresponding iommu */
310         int devices_cnt;                /* target device count */
311         u8 atc_required:1;              /* ATS is required */
312 };
313
314 static LIST_HEAD(dmar_atsr_units);
315 static LIST_HEAD(dmar_rmrr_units);
316 static LIST_HEAD(dmar_satc_units);
317
318 #define for_each_rmrr_units(rmrr) \
319         list_for_each_entry(rmrr, &dmar_rmrr_units, list)
320
321 /* bitmap for indexing intel_iommus */
322 static int g_num_of_iommus;
323
324 static void domain_exit(struct dmar_domain *domain);
325 static void domain_remove_dev_info(struct dmar_domain *domain);
326 static void dmar_remove_one_dev_info(struct device *dev);
327 static void __dmar_remove_one_dev_info(struct device_domain_info *info);
328 static int intel_iommu_attach_device(struct iommu_domain *domain,
329                                      struct device *dev);
330 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
331                                             dma_addr_t iova);
332
333 int dmar_disabled = !IS_ENABLED(CONFIG_INTEL_IOMMU_DEFAULT_ON);
334 int intel_iommu_sm = IS_ENABLED(CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON);
335
336 int intel_iommu_enabled = 0;
337 EXPORT_SYMBOL_GPL(intel_iommu_enabled);
338
339 static int dmar_map_gfx = 1;
340 static int intel_iommu_superpage = 1;
341 static int iommu_identity_mapping;
342 static int iommu_skip_te_disable;
343
344 #define IDENTMAP_GFX            2
345 #define IDENTMAP_AZALIA         4
346
347 int intel_iommu_gfx_mapped;
348 EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
349
350 #define DEFER_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-2))
351 struct device_domain_info *get_domain_info(struct device *dev)
352 {
353         struct device_domain_info *info;
354
355         if (!dev)
356                 return NULL;
357
358         info = dev_iommu_priv_get(dev);
359         if (unlikely(info == DEFER_DEVICE_DOMAIN_INFO))
360                 return NULL;
361
362         return info;
363 }
364
365 DEFINE_SPINLOCK(device_domain_lock);
366 static LIST_HEAD(device_domain_list);
367
368 /*
369  * Iterate over elements in device_domain_list and call the specified
370  * callback @fn against each element.
371  */
372 int for_each_device_domain(int (*fn)(struct device_domain_info *info,
373                                      void *data), void *data)
374 {
375         int ret = 0;
376         unsigned long flags;
377         struct device_domain_info *info;
378
379         spin_lock_irqsave(&device_domain_lock, flags);
380         list_for_each_entry(info, &device_domain_list, global) {
381                 ret = fn(info, data);
382                 if (ret) {
383                         spin_unlock_irqrestore(&device_domain_lock, flags);
384                         return ret;
385                 }
386         }
387         spin_unlock_irqrestore(&device_domain_lock, flags);
388
389         return 0;
390 }
391
392 const struct iommu_ops intel_iommu_ops;
393
394 static bool translation_pre_enabled(struct intel_iommu *iommu)
395 {
396         return (iommu->flags & VTD_FLAG_TRANS_PRE_ENABLED);
397 }
398
399 static void clear_translation_pre_enabled(struct intel_iommu *iommu)
400 {
401         iommu->flags &= ~VTD_FLAG_TRANS_PRE_ENABLED;
402 }
403
404 static void init_translation_status(struct intel_iommu *iommu)
405 {
406         u32 gsts;
407
408         gsts = readl(iommu->reg + DMAR_GSTS_REG);
409         if (gsts & DMA_GSTS_TES)
410                 iommu->flags |= VTD_FLAG_TRANS_PRE_ENABLED;
411 }
412
413 static int __init intel_iommu_setup(char *str)
414 {
415         if (!str)
416                 return -EINVAL;
417
418         while (*str) {
419                 if (!strncmp(str, "on", 2)) {
420                         dmar_disabled = 0;
421                         pr_info("IOMMU enabled\n");
422                 } else if (!strncmp(str, "off", 3)) {
423                         dmar_disabled = 1;
424                         no_platform_optin = 1;
425                         pr_info("IOMMU disabled\n");
426                 } else if (!strncmp(str, "igfx_off", 8)) {
427                         dmar_map_gfx = 0;
428                         pr_info("Disable GFX device mapping\n");
429                 } else if (!strncmp(str, "forcedac", 8)) {
430                         pr_warn("intel_iommu=forcedac deprecated; use iommu.forcedac instead\n");
431                         iommu_dma_forcedac = true;
432                 } else if (!strncmp(str, "strict", 6)) {
433                         pr_warn("intel_iommu=strict deprecated; use iommu.strict=1 instead\n");
434                         iommu_set_dma_strict();
435                 } else if (!strncmp(str, "sp_off", 6)) {
436                         pr_info("Disable supported super page\n");
437                         intel_iommu_superpage = 0;
438                 } else if (!strncmp(str, "sm_on", 5)) {
439                         pr_info("Enable scalable mode if hardware supports\n");
440                         intel_iommu_sm = 1;
441                 } else if (!strncmp(str, "sm_off", 6)) {
442                         pr_info("Scalable mode is disallowed\n");
443                         intel_iommu_sm = 0;
444                 } else if (!strncmp(str, "tboot_noforce", 13)) {
445                         pr_info("Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n");
446                         intel_iommu_tboot_noforce = 1;
447                 } else {
448                         pr_notice("Unknown option - '%s'\n", str);
449                 }
450
451                 str += strcspn(str, ",");
452                 while (*str == ',')
453                         str++;
454         }
455
456         return 1;
457 }
458 __setup("intel_iommu=", intel_iommu_setup);
459
460 static struct kmem_cache *iommu_domain_cache;
461 static struct kmem_cache *iommu_devinfo_cache;
462
463 static struct dmar_domain* get_iommu_domain(struct intel_iommu *iommu, u16 did)
464 {
465         struct dmar_domain **domains;
466         int idx = did >> 8;
467
468         domains = iommu->domains[idx];
469         if (!domains)
470                 return NULL;
471
472         return domains[did & 0xff];
473 }
474
475 static void set_iommu_domain(struct intel_iommu *iommu, u16 did,
476                              struct dmar_domain *domain)
477 {
478         struct dmar_domain **domains;
479         int idx = did >> 8;
480
481         if (!iommu->domains[idx]) {
482                 size_t size = 256 * sizeof(struct dmar_domain *);
483                 iommu->domains[idx] = kzalloc(size, GFP_ATOMIC);
484         }
485
486         domains = iommu->domains[idx];
487         if (WARN_ON(!domains))
488                 return;
489         else
490                 domains[did & 0xff] = domain;
491 }
492
493 void *alloc_pgtable_page(int node)
494 {
495         struct page *page;
496         void *vaddr = NULL;
497
498         page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
499         if (page)
500                 vaddr = page_address(page);
501         return vaddr;
502 }
503
504 void free_pgtable_page(void *vaddr)
505 {
506         free_page((unsigned long)vaddr);
507 }
508
509 static inline void *alloc_domain_mem(void)
510 {
511         return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
512 }
513
514 static void free_domain_mem(void *vaddr)
515 {
516         kmem_cache_free(iommu_domain_cache, vaddr);
517 }
518
519 static inline void * alloc_devinfo_mem(void)
520 {
521         return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
522 }
523
524 static inline void free_devinfo_mem(void *vaddr)
525 {
526         kmem_cache_free(iommu_devinfo_cache, vaddr);
527 }
528
529 static inline int domain_type_is_si(struct dmar_domain *domain)
530 {
531         return domain->domain.type == IOMMU_DOMAIN_IDENTITY;
532 }
533
534 static inline bool domain_use_first_level(struct dmar_domain *domain)
535 {
536         return domain->flags & DOMAIN_FLAG_USE_FIRST_LEVEL;
537 }
538
539 static inline int domain_pfn_supported(struct dmar_domain *domain,
540                                        unsigned long pfn)
541 {
542         int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
543
544         return !(addr_width < BITS_PER_LONG && pfn >> addr_width);
545 }
546
547 static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
548 {
549         unsigned long sagaw;
550         int agaw;
551
552         sagaw = cap_sagaw(iommu->cap);
553         for (agaw = width_to_agaw(max_gaw);
554              agaw >= 0; agaw--) {
555                 if (test_bit(agaw, &sagaw))
556                         break;
557         }
558
559         return agaw;
560 }
561
562 /*
563  * Calculate max SAGAW for each iommu.
564  */
565 int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
566 {
567         return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
568 }
569
570 /*
571  * calculate agaw for each iommu.
572  * "SAGAW" may be different across iommus, use a default agaw, and
573  * get a supported less agaw for iommus that don't support the default agaw.
574  */
575 int iommu_calculate_agaw(struct intel_iommu *iommu)
576 {
577         return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
578 }
579
580 /* This functionin only returns single iommu in a domain */
581 struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
582 {
583         int iommu_id;
584
585         /* si_domain and vm domain should not get here. */
586         if (WARN_ON(!iommu_is_dma_domain(&domain->domain)))
587                 return NULL;
588
589         for_each_domain_iommu(iommu_id, domain)
590                 break;
591
592         if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
593                 return NULL;
594
595         return g_iommus[iommu_id];
596 }
597
598 static inline bool iommu_paging_structure_coherency(struct intel_iommu *iommu)
599 {
600         return sm_supported(iommu) ?
601                         ecap_smpwc(iommu->ecap) : ecap_coherent(iommu->ecap);
602 }
603
604 static void domain_update_iommu_coherency(struct dmar_domain *domain)
605 {
606         struct dmar_drhd_unit *drhd;
607         struct intel_iommu *iommu;
608         bool found = false;
609         int i;
610
611         domain->iommu_coherency = true;
612
613         for_each_domain_iommu(i, domain) {
614                 found = true;
615                 if (!iommu_paging_structure_coherency(g_iommus[i])) {
616                         domain->iommu_coherency = false;
617                         break;
618                 }
619         }
620         if (found)
621                 return;
622
623         /* No hardware attached; use lowest common denominator */
624         rcu_read_lock();
625         for_each_active_iommu(iommu, drhd) {
626                 if (!iommu_paging_structure_coherency(iommu)) {
627                         domain->iommu_coherency = false;
628                         break;
629                 }
630         }
631         rcu_read_unlock();
632 }
633
634 static bool domain_update_iommu_snooping(struct intel_iommu *skip)
635 {
636         struct dmar_drhd_unit *drhd;
637         struct intel_iommu *iommu;
638         bool ret = true;
639
640         rcu_read_lock();
641         for_each_active_iommu(iommu, drhd) {
642                 if (iommu != skip) {
643                         /*
644                          * If the hardware is operating in the scalable mode,
645                          * the snooping control is always supported since we
646                          * always set PASID-table-entry.PGSNP bit if the domain
647                          * is managed outside (UNMANAGED).
648                          */
649                         if (!sm_supported(iommu) &&
650                             !ecap_sc_support(iommu->ecap)) {
651                                 ret = false;
652                                 break;
653                         }
654                 }
655         }
656         rcu_read_unlock();
657
658         return ret;
659 }
660
661 static int domain_update_iommu_superpage(struct dmar_domain *domain,
662                                          struct intel_iommu *skip)
663 {
664         struct dmar_drhd_unit *drhd;
665         struct intel_iommu *iommu;
666         int mask = 0x3;
667
668         if (!intel_iommu_superpage)
669                 return 0;
670
671         /* set iommu_superpage to the smallest common denominator */
672         rcu_read_lock();
673         for_each_active_iommu(iommu, drhd) {
674                 if (iommu != skip) {
675                         if (domain && domain_use_first_level(domain)) {
676                                 if (!cap_fl1gp_support(iommu->cap))
677                                         mask = 0x1;
678                         } else {
679                                 mask &= cap_super_page_val(iommu->cap);
680                         }
681
682                         if (!mask)
683                                 break;
684                 }
685         }
686         rcu_read_unlock();
687
688         return fls(mask);
689 }
690
691 static int domain_update_device_node(struct dmar_domain *domain)
692 {
693         struct device_domain_info *info;
694         int nid = NUMA_NO_NODE;
695
696         assert_spin_locked(&device_domain_lock);
697
698         if (list_empty(&domain->devices))
699                 return NUMA_NO_NODE;
700
701         list_for_each_entry(info, &domain->devices, link) {
702                 if (!info->dev)
703                         continue;
704
705                 /*
706                  * There could possibly be multiple device numa nodes as devices
707                  * within the same domain may sit behind different IOMMUs. There
708                  * isn't perfect answer in such situation, so we select first
709                  * come first served policy.
710                  */
711                 nid = dev_to_node(info->dev);
712                 if (nid != NUMA_NO_NODE)
713                         break;
714         }
715
716         return nid;
717 }
718
719 static void domain_update_iotlb(struct dmar_domain *domain);
720
721 /* Return the super pagesize bitmap if supported. */
722 static unsigned long domain_super_pgsize_bitmap(struct dmar_domain *domain)
723 {
724         unsigned long bitmap = 0;
725
726         /*
727          * 1-level super page supports page size of 2MiB, 2-level super page
728          * supports page size of both 2MiB and 1GiB.
729          */
730         if (domain->iommu_superpage == 1)
731                 bitmap |= SZ_2M;
732         else if (domain->iommu_superpage == 2)
733                 bitmap |= SZ_2M | SZ_1G;
734
735         return bitmap;
736 }
737
738 /* Some capabilities may be different across iommus */
739 static void domain_update_iommu_cap(struct dmar_domain *domain)
740 {
741         domain_update_iommu_coherency(domain);
742         domain->iommu_snooping = domain_update_iommu_snooping(NULL);
743         domain->iommu_superpage = domain_update_iommu_superpage(domain, NULL);
744
745         /*
746          * If RHSA is missing, we should default to the device numa domain
747          * as fall back.
748          */
749         if (domain->nid == NUMA_NO_NODE)
750                 domain->nid = domain_update_device_node(domain);
751
752         /*
753          * First-level translation restricts the input-address to a
754          * canonical address (i.e., address bits 63:N have the same
755          * value as address bit [N-1], where N is 48-bits with 4-level
756          * paging and 57-bits with 5-level paging). Hence, skip bit
757          * [N-1].
758          */
759         if (domain_use_first_level(domain))
760                 domain->domain.geometry.aperture_end = __DOMAIN_MAX_ADDR(domain->gaw - 1);
761         else
762                 domain->domain.geometry.aperture_end = __DOMAIN_MAX_ADDR(domain->gaw);
763
764         domain->domain.pgsize_bitmap |= domain_super_pgsize_bitmap(domain);
765         domain_update_iotlb(domain);
766 }
767
768 struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
769                                          u8 devfn, int alloc)
770 {
771         struct root_entry *root = &iommu->root_entry[bus];
772         struct context_entry *context;
773         u64 *entry;
774
775         entry = &root->lo;
776         if (sm_supported(iommu)) {
777                 if (devfn >= 0x80) {
778                         devfn -= 0x80;
779                         entry = &root->hi;
780                 }
781                 devfn *= 2;
782         }
783         if (*entry & 1)
784                 context = phys_to_virt(*entry & VTD_PAGE_MASK);
785         else {
786                 unsigned long phy_addr;
787                 if (!alloc)
788                         return NULL;
789
790                 context = alloc_pgtable_page(iommu->node);
791                 if (!context)
792                         return NULL;
793
794                 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
795                 phy_addr = virt_to_phys((void *)context);
796                 *entry = phy_addr | 1;
797                 __iommu_flush_cache(iommu, entry, sizeof(*entry));
798         }
799         return &context[devfn];
800 }
801
802 static bool attach_deferred(struct device *dev)
803 {
804         return dev_iommu_priv_get(dev) == DEFER_DEVICE_DOMAIN_INFO;
805 }
806
807 /**
808  * is_downstream_to_pci_bridge - test if a device belongs to the PCI
809  *                               sub-hierarchy of a candidate PCI-PCI bridge
810  * @dev: candidate PCI device belonging to @bridge PCI sub-hierarchy
811  * @bridge: the candidate PCI-PCI bridge
812  *
813  * Return: true if @dev belongs to @bridge PCI sub-hierarchy, else false.
814  */
815 static bool
816 is_downstream_to_pci_bridge(struct device *dev, struct device *bridge)
817 {
818         struct pci_dev *pdev, *pbridge;
819
820         if (!dev_is_pci(dev) || !dev_is_pci(bridge))
821                 return false;
822
823         pdev = to_pci_dev(dev);
824         pbridge = to_pci_dev(bridge);
825
826         if (pbridge->subordinate &&
827             pbridge->subordinate->number <= pdev->bus->number &&
828             pbridge->subordinate->busn_res.end >= pdev->bus->number)
829                 return true;
830
831         return false;
832 }
833
834 static bool quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
835 {
836         struct dmar_drhd_unit *drhd;
837         u32 vtbar;
838         int rc;
839
840         /* We know that this device on this chipset has its own IOMMU.
841          * If we find it under a different IOMMU, then the BIOS is lying
842          * to us. Hope that the IOMMU for this device is actually
843          * disabled, and it needs no translation...
844          */
845         rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
846         if (rc) {
847                 /* "can't" happen */
848                 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
849                 return false;
850         }
851         vtbar &= 0xffff0000;
852
853         /* we know that the this iommu should be at offset 0xa000 from vtbar */
854         drhd = dmar_find_matched_drhd_unit(pdev);
855         if (!drhd || drhd->reg_base_addr - vtbar != 0xa000) {
856                 pr_warn_once(FW_BUG "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n");
857                 add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
858                 return true;
859         }
860
861         return false;
862 }
863
864 static bool iommu_is_dummy(struct intel_iommu *iommu, struct device *dev)
865 {
866         if (!iommu || iommu->drhd->ignored)
867                 return true;
868
869         if (dev_is_pci(dev)) {
870                 struct pci_dev *pdev = to_pci_dev(dev);
871
872                 if (pdev->vendor == PCI_VENDOR_ID_INTEL &&
873                     pdev->device == PCI_DEVICE_ID_INTEL_IOAT_SNB &&
874                     quirk_ioat_snb_local_iommu(pdev))
875                         return true;
876         }
877
878         return false;
879 }
880
881 struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
882 {
883         struct dmar_drhd_unit *drhd = NULL;
884         struct pci_dev *pdev = NULL;
885         struct intel_iommu *iommu;
886         struct device *tmp;
887         u16 segment = 0;
888         int i;
889
890         if (!dev)
891                 return NULL;
892
893         if (dev_is_pci(dev)) {
894                 struct pci_dev *pf_pdev;
895
896                 pdev = pci_real_dma_dev(to_pci_dev(dev));
897
898                 /* VFs aren't listed in scope tables; we need to look up
899                  * the PF instead to find the IOMMU. */
900                 pf_pdev = pci_physfn(pdev);
901                 dev = &pf_pdev->dev;
902                 segment = pci_domain_nr(pdev->bus);
903         } else if (has_acpi_companion(dev))
904                 dev = &ACPI_COMPANION(dev)->dev;
905
906         rcu_read_lock();
907         for_each_iommu(iommu, drhd) {
908                 if (pdev && segment != drhd->segment)
909                         continue;
910
911                 for_each_active_dev_scope(drhd->devices,
912                                           drhd->devices_cnt, i, tmp) {
913                         if (tmp == dev) {
914                                 /* For a VF use its original BDF# not that of the PF
915                                  * which we used for the IOMMU lookup. Strictly speaking
916                                  * we could do this for all PCI devices; we only need to
917                                  * get the BDF# from the scope table for ACPI matches. */
918                                 if (pdev && pdev->is_virtfn)
919                                         goto got_pdev;
920
921                                 if (bus && devfn) {
922                                         *bus = drhd->devices[i].bus;
923                                         *devfn = drhd->devices[i].devfn;
924                                 }
925                                 goto out;
926                         }
927
928                         if (is_downstream_to_pci_bridge(dev, tmp))
929                                 goto got_pdev;
930                 }
931
932                 if (pdev && drhd->include_all) {
933                 got_pdev:
934                         if (bus && devfn) {
935                                 *bus = pdev->bus->number;
936                                 *devfn = pdev->devfn;
937                         }
938                         goto out;
939                 }
940         }
941         iommu = NULL;
942  out:
943         if (iommu_is_dummy(iommu, dev))
944                 iommu = NULL;
945
946         rcu_read_unlock();
947
948         return iommu;
949 }
950
951 static void domain_flush_cache(struct dmar_domain *domain,
952                                void *addr, int size)
953 {
954         if (!domain->iommu_coherency)
955                 clflush_cache_range(addr, size);
956 }
957
958 static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
959 {
960         struct context_entry *context;
961         int ret = 0;
962         unsigned long flags;
963
964         spin_lock_irqsave(&iommu->lock, flags);
965         context = iommu_context_addr(iommu, bus, devfn, 0);
966         if (context)
967                 ret = context_present(context);
968         spin_unlock_irqrestore(&iommu->lock, flags);
969         return ret;
970 }
971
972 static void free_context_table(struct intel_iommu *iommu)
973 {
974         int i;
975         unsigned long flags;
976         struct context_entry *context;
977
978         spin_lock_irqsave(&iommu->lock, flags);
979         if (!iommu->root_entry) {
980                 goto out;
981         }
982         for (i = 0; i < ROOT_ENTRY_NR; i++) {
983                 context = iommu_context_addr(iommu, i, 0, 0);
984                 if (context)
985                         free_pgtable_page(context);
986
987                 if (!sm_supported(iommu))
988                         continue;
989
990                 context = iommu_context_addr(iommu, i, 0x80, 0);
991                 if (context)
992                         free_pgtable_page(context);
993
994         }
995         free_pgtable_page(iommu->root_entry);
996         iommu->root_entry = NULL;
997 out:
998         spin_unlock_irqrestore(&iommu->lock, flags);
999 }
1000
1001 #ifdef CONFIG_DMAR_DEBUG
1002 static void pgtable_walk(struct intel_iommu *iommu, unsigned long pfn, u8 bus, u8 devfn)
1003 {
1004         struct device_domain_info *info;
1005         struct dma_pte *parent, *pte;
1006         struct dmar_domain *domain;
1007         int offset, level;
1008
1009         info = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn);
1010         if (!info || !info->domain) {
1011                 pr_info("device [%02x:%02x.%d] not probed\n",
1012                         bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1013                 return;
1014         }
1015
1016         domain = info->domain;
1017         level = agaw_to_level(domain->agaw);
1018         parent = domain->pgd;
1019         if (!parent) {
1020                 pr_info("no page table setup\n");
1021                 return;
1022         }
1023
1024         while (1) {
1025                 offset = pfn_level_offset(pfn, level);
1026                 pte = &parent[offset];
1027                 if (!pte || (dma_pte_superpage(pte) || !dma_pte_present(pte))) {
1028                         pr_info("PTE not present at level %d\n", level);
1029                         break;
1030                 }
1031
1032                 pr_info("pte level: %d, pte value: 0x%016llx\n", level, pte->val);
1033
1034                 if (level == 1)
1035                         break;
1036
1037                 parent = phys_to_virt(dma_pte_addr(pte));
1038                 level--;
1039         }
1040 }
1041
1042 void dmar_fault_dump_ptes(struct intel_iommu *iommu, u16 source_id,
1043                           unsigned long long addr, u32 pasid)
1044 {
1045         struct pasid_dir_entry *dir, *pde;
1046         struct pasid_entry *entries, *pte;
1047         struct context_entry *ctx_entry;
1048         struct root_entry *rt_entry;
1049         u8 devfn = source_id & 0xff;
1050         u8 bus = source_id >> 8;
1051         int i, dir_index, index;
1052
1053         pr_info("Dump %s table entries for IOVA 0x%llx\n", iommu->name, addr);
1054
1055         /* root entry dump */
1056         rt_entry = &iommu->root_entry[bus];
1057         if (!rt_entry) {
1058                 pr_info("root table entry is not present\n");
1059                 return;
1060         }
1061
1062         if (sm_supported(iommu))
1063                 pr_info("scalable mode root entry: hi 0x%016llx, low 0x%016llx\n",
1064                         rt_entry->hi, rt_entry->lo);
1065         else
1066                 pr_info("root entry: 0x%016llx", rt_entry->lo);
1067
1068         /* context entry dump */
1069         ctx_entry = iommu_context_addr(iommu, bus, devfn, 0);
1070         if (!ctx_entry) {
1071                 pr_info("context table entry is not present\n");
1072                 return;
1073         }
1074
1075         pr_info("context entry: hi 0x%016llx, low 0x%016llx\n",
1076                 ctx_entry->hi, ctx_entry->lo);
1077
1078         /* legacy mode does not require PASID entries */
1079         if (!sm_supported(iommu))
1080                 goto pgtable_walk;
1081
1082         /* get the pointer to pasid directory entry */
1083         dir = phys_to_virt(ctx_entry->lo & VTD_PAGE_MASK);
1084         if (!dir) {
1085                 pr_info("pasid directory entry is not present\n");
1086                 return;
1087         }
1088         /* For request-without-pasid, get the pasid from context entry */
1089         if (intel_iommu_sm && pasid == INVALID_IOASID)
1090                 pasid = PASID_RID2PASID;
1091
1092         dir_index = pasid >> PASID_PDE_SHIFT;
1093         pde = &dir[dir_index];
1094         pr_info("pasid dir entry: 0x%016llx\n", pde->val);
1095
1096         /* get the pointer to the pasid table entry */
1097         entries = get_pasid_table_from_pde(pde);
1098         if (!entries) {
1099                 pr_info("pasid table entry is not present\n");
1100                 return;
1101         }
1102         index = pasid & PASID_PTE_MASK;
1103         pte = &entries[index];
1104         for (i = 0; i < ARRAY_SIZE(pte->val); i++)
1105                 pr_info("pasid table entry[%d]: 0x%016llx\n", i, pte->val[i]);
1106
1107 pgtable_walk:
1108         pgtable_walk(iommu, addr >> VTD_PAGE_SHIFT, bus, devfn);
1109 }
1110 #endif
1111
1112 static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
1113                                       unsigned long pfn, int *target_level)
1114 {
1115         struct dma_pte *parent, *pte;
1116         int level = agaw_to_level(domain->agaw);
1117         int offset;
1118
1119         BUG_ON(!domain->pgd);
1120
1121         if (!domain_pfn_supported(domain, pfn))
1122                 /* Address beyond IOMMU's addressing capabilities. */
1123                 return NULL;
1124
1125         parent = domain->pgd;
1126
1127         while (1) {
1128                 void *tmp_page;
1129
1130                 offset = pfn_level_offset(pfn, level);
1131                 pte = &parent[offset];
1132                 if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
1133                         break;
1134                 if (level == *target_level)
1135                         break;
1136
1137                 if (!dma_pte_present(pte)) {
1138                         uint64_t pteval;
1139
1140                         tmp_page = alloc_pgtable_page(domain->nid);
1141
1142                         if (!tmp_page)
1143                                 return NULL;
1144
1145                         domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
1146                         pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
1147                         if (domain_use_first_level(domain)) {
1148                                 pteval |= DMA_FL_PTE_XD | DMA_FL_PTE_US;
1149                                 if (iommu_is_dma_domain(&domain->domain))
1150                                         pteval |= DMA_FL_PTE_ACCESS;
1151                         }
1152                         if (cmpxchg64(&pte->val, 0ULL, pteval))
1153                                 /* Someone else set it while we were thinking; use theirs. */
1154                                 free_pgtable_page(tmp_page);
1155                         else
1156                                 domain_flush_cache(domain, pte, sizeof(*pte));
1157                 }
1158                 if (level == 1)
1159                         break;
1160
1161                 parent = phys_to_virt(dma_pte_addr(pte));
1162                 level--;
1163         }
1164
1165         if (!*target_level)
1166                 *target_level = level;
1167
1168         return pte;
1169 }
1170
1171 /* return address's pte at specific level */
1172 static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
1173                                          unsigned long pfn,
1174                                          int level, int *large_page)
1175 {
1176         struct dma_pte *parent, *pte;
1177         int total = agaw_to_level(domain->agaw);
1178         int offset;
1179
1180         parent = domain->pgd;
1181         while (level <= total) {
1182                 offset = pfn_level_offset(pfn, total);
1183                 pte = &parent[offset];
1184                 if (level == total)
1185                         return pte;
1186
1187                 if (!dma_pte_present(pte)) {
1188                         *large_page = total;
1189                         break;
1190                 }
1191
1192                 if (dma_pte_superpage(pte)) {
1193                         *large_page = total;
1194                         return pte;
1195                 }
1196
1197                 parent = phys_to_virt(dma_pte_addr(pte));
1198                 total--;
1199         }
1200         return NULL;
1201 }
1202
1203 /* clear last level pte, a tlb flush should be followed */
1204 static void dma_pte_clear_range(struct dmar_domain *domain,
1205                                 unsigned long start_pfn,
1206                                 unsigned long last_pfn)
1207 {
1208         unsigned int large_page;
1209         struct dma_pte *first_pte, *pte;
1210
1211         BUG_ON(!domain_pfn_supported(domain, start_pfn));
1212         BUG_ON(!domain_pfn_supported(domain, last_pfn));
1213         BUG_ON(start_pfn > last_pfn);
1214
1215         /* we don't need lock here; nobody else touches the iova range */
1216         do {
1217                 large_page = 1;
1218                 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
1219                 if (!pte) {
1220                         start_pfn = align_to_level(start_pfn + 1, large_page + 1);
1221                         continue;
1222                 }
1223                 do {
1224                         dma_clear_pte(pte);
1225                         start_pfn += lvl_to_nr_pages(large_page);
1226                         pte++;
1227                 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
1228
1229                 domain_flush_cache(domain, first_pte,
1230                                    (void *)pte - (void *)first_pte);
1231
1232         } while (start_pfn && start_pfn <= last_pfn);
1233 }
1234
1235 static void dma_pte_free_level(struct dmar_domain *domain, int level,
1236                                int retain_level, struct dma_pte *pte,
1237                                unsigned long pfn, unsigned long start_pfn,
1238                                unsigned long last_pfn)
1239 {
1240         pfn = max(start_pfn, pfn);
1241         pte = &pte[pfn_level_offset(pfn, level)];
1242
1243         do {
1244                 unsigned long level_pfn;
1245                 struct dma_pte *level_pte;
1246
1247                 if (!dma_pte_present(pte) || dma_pte_superpage(pte))
1248                         goto next;
1249
1250                 level_pfn = pfn & level_mask(level);
1251                 level_pte = phys_to_virt(dma_pte_addr(pte));
1252
1253                 if (level > 2) {
1254                         dma_pte_free_level(domain, level - 1, retain_level,
1255                                            level_pte, level_pfn, start_pfn,
1256                                            last_pfn);
1257                 }
1258
1259                 /*
1260                  * Free the page table if we're below the level we want to
1261                  * retain and the range covers the entire table.
1262                  */
1263                 if (level < retain_level && !(start_pfn > level_pfn ||
1264                       last_pfn < level_pfn + level_size(level) - 1)) {
1265                         dma_clear_pte(pte);
1266                         domain_flush_cache(domain, pte, sizeof(*pte));
1267                         free_pgtable_page(level_pte);
1268                 }
1269 next:
1270                 pfn += level_size(level);
1271         } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1272 }
1273
1274 /*
1275  * clear last level (leaf) ptes and free page table pages below the
1276  * level we wish to keep intact.
1277  */
1278 static void dma_pte_free_pagetable(struct dmar_domain *domain,
1279                                    unsigned long start_pfn,
1280                                    unsigned long last_pfn,
1281                                    int retain_level)
1282 {
1283         BUG_ON(!domain_pfn_supported(domain, start_pfn));
1284         BUG_ON(!domain_pfn_supported(domain, last_pfn));
1285         BUG_ON(start_pfn > last_pfn);
1286
1287         dma_pte_clear_range(domain, start_pfn, last_pfn);
1288
1289         /* We don't need lock here; nobody else touches the iova range */
1290         dma_pte_free_level(domain, agaw_to_level(domain->agaw), retain_level,
1291                            domain->pgd, 0, start_pfn, last_pfn);
1292
1293         /* free pgd */
1294         if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1295                 free_pgtable_page(domain->pgd);
1296                 domain->pgd = NULL;
1297         }
1298 }
1299
1300 /* When a page at a given level is being unlinked from its parent, we don't
1301    need to *modify* it at all. All we need to do is make a list of all the
1302    pages which can be freed just as soon as we've flushed the IOTLB and we
1303    know the hardware page-walk will no longer touch them.
1304    The 'pte' argument is the *parent* PTE, pointing to the page that is to
1305    be freed. */
1306 static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
1307                                             int level, struct dma_pte *pte,
1308                                             struct page *freelist)
1309 {
1310         struct page *pg;
1311
1312         pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
1313         pg->freelist = freelist;
1314         freelist = pg;
1315
1316         if (level == 1)
1317                 return freelist;
1318
1319         pte = page_address(pg);
1320         do {
1321                 if (dma_pte_present(pte) && !dma_pte_superpage(pte))
1322                         freelist = dma_pte_list_pagetables(domain, level - 1,
1323                                                            pte, freelist);
1324                 pte++;
1325         } while (!first_pte_in_page(pte));
1326
1327         return freelist;
1328 }
1329
1330 static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
1331                                         struct dma_pte *pte, unsigned long pfn,
1332                                         unsigned long start_pfn,
1333                                         unsigned long last_pfn,
1334                                         struct page *freelist)
1335 {
1336         struct dma_pte *first_pte = NULL, *last_pte = NULL;
1337
1338         pfn = max(start_pfn, pfn);
1339         pte = &pte[pfn_level_offset(pfn, level)];
1340
1341         do {
1342                 unsigned long level_pfn;
1343
1344                 if (!dma_pte_present(pte))
1345                         goto next;
1346
1347                 level_pfn = pfn & level_mask(level);
1348
1349                 /* If range covers entire pagetable, free it */
1350                 if (start_pfn <= level_pfn &&
1351                     last_pfn >= level_pfn + level_size(level) - 1) {
1352                         /* These suborbinate page tables are going away entirely. Don't
1353                            bother to clear them; we're just going to *free* them. */
1354                         if (level > 1 && !dma_pte_superpage(pte))
1355                                 freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1356
1357                         dma_clear_pte(pte);
1358                         if (!first_pte)
1359                                 first_pte = pte;
1360                         last_pte = pte;
1361                 } else if (level > 1) {
1362                         /* Recurse down into a level that isn't *entirely* obsolete */
1363                         freelist = dma_pte_clear_level(domain, level - 1,
1364                                                        phys_to_virt(dma_pte_addr(pte)),
1365                                                        level_pfn, start_pfn, last_pfn,
1366                                                        freelist);
1367                 }
1368 next:
1369                 pfn += level_size(level);
1370         } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1371
1372         if (first_pte)
1373                 domain_flush_cache(domain, first_pte,
1374                                    (void *)++last_pte - (void *)first_pte);
1375
1376         return freelist;
1377 }
1378
1379 /* We can't just free the pages because the IOMMU may still be walking
1380    the page tables, and may have cached the intermediate levels. The
1381    pages can only be freed after the IOTLB flush has been done. */
1382 static struct page *domain_unmap(struct dmar_domain *domain,
1383                                  unsigned long start_pfn,
1384                                  unsigned long last_pfn,
1385                                  struct page *freelist)
1386 {
1387         BUG_ON(!domain_pfn_supported(domain, start_pfn));
1388         BUG_ON(!domain_pfn_supported(domain, last_pfn));
1389         BUG_ON(start_pfn > last_pfn);
1390
1391         /* we don't need lock here; nobody else touches the iova range */
1392         freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
1393                                        domain->pgd, 0, start_pfn, last_pfn,
1394                                        freelist);
1395
1396         /* free pgd */
1397         if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1398                 struct page *pgd_page = virt_to_page(domain->pgd);
1399                 pgd_page->freelist = freelist;
1400                 freelist = pgd_page;
1401
1402                 domain->pgd = NULL;
1403         }
1404
1405         return freelist;
1406 }
1407
1408 static void dma_free_pagelist(struct page *freelist)
1409 {
1410         struct page *pg;
1411
1412         while ((pg = freelist)) {
1413                 freelist = pg->freelist;
1414                 free_pgtable_page(page_address(pg));
1415         }
1416 }
1417
1418 /* iommu handling */
1419 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
1420 {
1421         struct root_entry *root;
1422         unsigned long flags;
1423
1424         root = (struct root_entry *)alloc_pgtable_page(iommu->node);
1425         if (!root) {
1426                 pr_err("Allocating root entry for %s failed\n",
1427                         iommu->name);
1428                 return -ENOMEM;
1429         }
1430
1431         __iommu_flush_cache(iommu, root, ROOT_SIZE);
1432
1433         spin_lock_irqsave(&iommu->lock, flags);
1434         iommu->root_entry = root;
1435         spin_unlock_irqrestore(&iommu->lock, flags);
1436
1437         return 0;
1438 }
1439
1440 static void iommu_set_root_entry(struct intel_iommu *iommu)
1441 {
1442         u64 addr;
1443         u32 sts;
1444         unsigned long flag;
1445
1446         addr = virt_to_phys(iommu->root_entry);
1447         if (sm_supported(iommu))
1448                 addr |= DMA_RTADDR_SMT;
1449
1450         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1451         dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr);
1452
1453         writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
1454
1455         /* Make sure hardware complete it */
1456         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1457                       readl, (sts & DMA_GSTS_RTPS), sts);
1458
1459         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1460
1461         iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
1462         if (sm_supported(iommu))
1463                 qi_flush_pasid_cache(iommu, 0, QI_PC_GLOBAL, 0);
1464         iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
1465 }
1466
1467 void iommu_flush_write_buffer(struct intel_iommu *iommu)
1468 {
1469         u32 val;
1470         unsigned long flag;
1471
1472         if (!rwbf_quirk && !cap_rwbf(iommu->cap))
1473                 return;
1474
1475         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1476         writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
1477
1478         /* Make sure hardware complete it */
1479         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1480                       readl, (!(val & DMA_GSTS_WBFS)), val);
1481
1482         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1483 }
1484
1485 /* return value determine if we need a write buffer flush */
1486 static void __iommu_flush_context(struct intel_iommu *iommu,
1487                                   u16 did, u16 source_id, u8 function_mask,
1488                                   u64 type)
1489 {
1490         u64 val = 0;
1491         unsigned long flag;
1492
1493         switch (type) {
1494         case DMA_CCMD_GLOBAL_INVL:
1495                 val = DMA_CCMD_GLOBAL_INVL;
1496                 break;
1497         case DMA_CCMD_DOMAIN_INVL:
1498                 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1499                 break;
1500         case DMA_CCMD_DEVICE_INVL:
1501                 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1502                         | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1503                 break;
1504         default:
1505                 BUG();
1506         }
1507         val |= DMA_CCMD_ICC;
1508
1509         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1510         dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1511
1512         /* Make sure hardware complete it */
1513         IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1514                 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1515
1516         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1517 }
1518
1519 /* return value determine if we need a write buffer flush */
1520 static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1521                                 u64 addr, unsigned int size_order, u64 type)
1522 {
1523         int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1524         u64 val = 0, val_iva = 0;
1525         unsigned long flag;
1526
1527         switch (type) {
1528         case DMA_TLB_GLOBAL_FLUSH:
1529                 /* global flush doesn't need set IVA_REG */
1530                 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1531                 break;
1532         case DMA_TLB_DSI_FLUSH:
1533                 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1534                 break;
1535         case DMA_TLB_PSI_FLUSH:
1536                 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1537                 /* IH bit is passed in as part of address */
1538                 val_iva = size_order | addr;
1539                 break;
1540         default:
1541                 BUG();
1542         }
1543         /* Note: set drain read/write */
1544 #if 0
1545         /*
1546          * This is probably to be super secure.. Looks like we can
1547          * ignore it without any impact.
1548          */
1549         if (cap_read_drain(iommu->cap))
1550                 val |= DMA_TLB_READ_DRAIN;
1551 #endif
1552         if (cap_write_drain(iommu->cap))
1553                 val |= DMA_TLB_WRITE_DRAIN;
1554
1555         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1556         /* Note: Only uses first TLB reg currently */
1557         if (val_iva)
1558                 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1559         dmar_writeq(iommu->reg + tlb_offset + 8, val);
1560
1561         /* Make sure hardware complete it */
1562         IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1563                 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1564
1565         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1566
1567         /* check IOTLB invalidation granularity */
1568         if (DMA_TLB_IAIG(val) == 0)
1569                 pr_err("Flush IOTLB failed\n");
1570         if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1571                 pr_debug("TLB flush request %Lx, actual %Lx\n",
1572                         (unsigned long long)DMA_TLB_IIRG(type),
1573                         (unsigned long long)DMA_TLB_IAIG(val));
1574 }
1575
1576 static struct device_domain_info *
1577 iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
1578                          u8 bus, u8 devfn)
1579 {
1580         struct device_domain_info *info;
1581
1582         assert_spin_locked(&device_domain_lock);
1583
1584         if (!iommu->qi)
1585                 return NULL;
1586
1587         list_for_each_entry(info, &domain->devices, link)
1588                 if (info->iommu == iommu && info->bus == bus &&
1589                     info->devfn == devfn) {
1590                         if (info->ats_supported && info->dev)
1591                                 return info;
1592                         break;
1593                 }
1594
1595         return NULL;
1596 }
1597
1598 static void domain_update_iotlb(struct dmar_domain *domain)
1599 {
1600         struct device_domain_info *info;
1601         bool has_iotlb_device = false;
1602
1603         assert_spin_locked(&device_domain_lock);
1604
1605         list_for_each_entry(info, &domain->devices, link)
1606                 if (info->ats_enabled) {
1607                         has_iotlb_device = true;
1608                         break;
1609                 }
1610
1611         if (!has_iotlb_device) {
1612                 struct subdev_domain_info *sinfo;
1613
1614                 list_for_each_entry(sinfo, &domain->subdevices, link_domain) {
1615                         info = get_domain_info(sinfo->pdev);
1616                         if (info && info->ats_enabled) {
1617                                 has_iotlb_device = true;
1618                                 break;
1619                         }
1620                 }
1621         }
1622
1623         domain->has_iotlb_device = has_iotlb_device;
1624 }
1625
1626 static void iommu_enable_dev_iotlb(struct device_domain_info *info)
1627 {
1628         struct pci_dev *pdev;
1629
1630         assert_spin_locked(&device_domain_lock);
1631
1632         if (!info || !dev_is_pci(info->dev))
1633                 return;
1634
1635         pdev = to_pci_dev(info->dev);
1636         /* For IOMMU that supports device IOTLB throttling (DIT), we assign
1637          * PFSID to the invalidation desc of a VF such that IOMMU HW can gauge
1638          * queue depth at PF level. If DIT is not set, PFSID will be treated as
1639          * reserved, which should be set to 0.
1640          */
1641         if (!ecap_dit(info->iommu->ecap))
1642                 info->pfsid = 0;
1643         else {
1644                 struct pci_dev *pf_pdev;
1645
1646                 /* pdev will be returned if device is not a vf */
1647                 pf_pdev = pci_physfn(pdev);
1648                 info->pfsid = pci_dev_id(pf_pdev);
1649         }
1650
1651 #ifdef CONFIG_INTEL_IOMMU_SVM
1652         /* The PCIe spec, in its wisdom, declares that the behaviour of
1653            the device if you enable PASID support after ATS support is
1654            undefined. So always enable PASID support on devices which
1655            have it, even if we can't yet know if we're ever going to
1656            use it. */
1657         if (info->pasid_supported && !pci_enable_pasid(pdev, info->pasid_supported & ~1))
1658                 info->pasid_enabled = 1;
1659
1660         if (info->pri_supported &&
1661             (info->pasid_enabled ? pci_prg_resp_pasid_required(pdev) : 1)  &&
1662             !pci_reset_pri(pdev) && !pci_enable_pri(pdev, PRQ_DEPTH))
1663                 info->pri_enabled = 1;
1664 #endif
1665         if (info->ats_supported && pci_ats_page_aligned(pdev) &&
1666             !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) {
1667                 info->ats_enabled = 1;
1668                 domain_update_iotlb(info->domain);
1669                 info->ats_qdep = pci_ats_queue_depth(pdev);
1670         }
1671 }
1672
1673 static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1674 {
1675         struct pci_dev *pdev;
1676
1677         assert_spin_locked(&device_domain_lock);
1678
1679         if (!dev_is_pci(info->dev))
1680                 return;
1681
1682         pdev = to_pci_dev(info->dev);
1683
1684         if (info->ats_enabled) {
1685                 pci_disable_ats(pdev);
1686                 info->ats_enabled = 0;
1687                 domain_update_iotlb(info->domain);
1688         }
1689 #ifdef CONFIG_INTEL_IOMMU_SVM
1690         if (info->pri_enabled) {
1691                 pci_disable_pri(pdev);
1692                 info->pri_enabled = 0;
1693         }
1694         if (info->pasid_enabled) {
1695                 pci_disable_pasid(pdev);
1696                 info->pasid_enabled = 0;
1697         }
1698 #endif
1699 }
1700
1701 static void __iommu_flush_dev_iotlb(struct device_domain_info *info,
1702                                     u64 addr, unsigned int mask)
1703 {
1704         u16 sid, qdep;
1705
1706         if (!info || !info->ats_enabled)
1707                 return;
1708
1709         sid = info->bus << 8 | info->devfn;
1710         qdep = info->ats_qdep;
1711         qi_flush_dev_iotlb(info->iommu, sid, info->pfsid,
1712                            qdep, addr, mask);
1713 }
1714
1715 static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1716                                   u64 addr, unsigned mask)
1717 {
1718         unsigned long flags;
1719         struct device_domain_info *info;
1720         struct subdev_domain_info *sinfo;
1721
1722         if (!domain->has_iotlb_device)
1723                 return;
1724
1725         spin_lock_irqsave(&device_domain_lock, flags);
1726         list_for_each_entry(info, &domain->devices, link)
1727                 __iommu_flush_dev_iotlb(info, addr, mask);
1728
1729         list_for_each_entry(sinfo, &domain->subdevices, link_domain) {
1730                 info = get_domain_info(sinfo->pdev);
1731                 __iommu_flush_dev_iotlb(info, addr, mask);
1732         }
1733         spin_unlock_irqrestore(&device_domain_lock, flags);
1734 }
1735
1736 static void domain_flush_piotlb(struct intel_iommu *iommu,
1737                                 struct dmar_domain *domain,
1738                                 u64 addr, unsigned long npages, bool ih)
1739 {
1740         u16 did = domain->iommu_did[iommu->seq_id];
1741
1742         if (domain->default_pasid)
1743                 qi_flush_piotlb(iommu, did, domain->default_pasid,
1744                                 addr, npages, ih);
1745
1746         if (!list_empty(&domain->devices))
1747                 qi_flush_piotlb(iommu, did, PASID_RID2PASID, addr, npages, ih);
1748 }
1749
1750 static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
1751                                   struct dmar_domain *domain,
1752                                   unsigned long pfn, unsigned int pages,
1753                                   int ih, int map)
1754 {
1755         unsigned int mask = ilog2(__roundup_pow_of_two(pages));
1756         uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
1757         u16 did = domain->iommu_did[iommu->seq_id];
1758
1759         BUG_ON(pages == 0);
1760
1761         if (ih)
1762                 ih = 1 << 6;
1763
1764         if (domain_use_first_level(domain)) {
1765                 domain_flush_piotlb(iommu, domain, addr, pages, ih);
1766         } else {
1767                 /*
1768                  * Fallback to domain selective flush if no PSI support or
1769                  * the size is too big. PSI requires page size to be 2 ^ x,
1770                  * and the base address is naturally aligned to the size.
1771                  */
1772                 if (!cap_pgsel_inv(iommu->cap) ||
1773                     mask > cap_max_amask_val(iommu->cap))
1774                         iommu->flush.flush_iotlb(iommu, did, 0, 0,
1775                                                         DMA_TLB_DSI_FLUSH);
1776                 else
1777                         iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
1778                                                         DMA_TLB_PSI_FLUSH);
1779         }
1780
1781         /*
1782          * In caching mode, changes of pages from non-present to present require
1783          * flush. However, device IOTLB doesn't need to be flushed in this case.
1784          */
1785         if (!cap_caching_mode(iommu->cap) || !map)
1786                 iommu_flush_dev_iotlb(domain, addr, mask);
1787 }
1788
1789 /* Notification for newly created mappings */
1790 static inline void __mapping_notify_one(struct intel_iommu *iommu,
1791                                         struct dmar_domain *domain,
1792                                         unsigned long pfn, unsigned int pages)
1793 {
1794         /*
1795          * It's a non-present to present mapping. Only flush if caching mode
1796          * and second level.
1797          */
1798         if (cap_caching_mode(iommu->cap) && !domain_use_first_level(domain))
1799                 iommu_flush_iotlb_psi(iommu, domain, pfn, pages, 0, 1);
1800         else
1801                 iommu_flush_write_buffer(iommu);
1802 }
1803
1804 static void intel_flush_iotlb_all(struct iommu_domain *domain)
1805 {
1806         struct dmar_domain *dmar_domain = to_dmar_domain(domain);
1807         int idx;
1808
1809         for_each_domain_iommu(idx, dmar_domain) {
1810                 struct intel_iommu *iommu = g_iommus[idx];
1811                 u16 did = dmar_domain->iommu_did[iommu->seq_id];
1812
1813                 if (domain_use_first_level(dmar_domain))
1814                         domain_flush_piotlb(iommu, dmar_domain, 0, -1, 0);
1815                 else
1816                         iommu->flush.flush_iotlb(iommu, did, 0, 0,
1817                                                  DMA_TLB_DSI_FLUSH);
1818
1819                 if (!cap_caching_mode(iommu->cap))
1820                         iommu_flush_dev_iotlb(get_iommu_domain(iommu, did),
1821                                               0, MAX_AGAW_PFN_WIDTH);
1822         }
1823 }
1824
1825 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1826 {
1827         u32 pmen;
1828         unsigned long flags;
1829
1830         if (!cap_plmr(iommu->cap) && !cap_phmr(iommu->cap))
1831                 return;
1832
1833         raw_spin_lock_irqsave(&iommu->register_lock, flags);
1834         pmen = readl(iommu->reg + DMAR_PMEN_REG);
1835         pmen &= ~DMA_PMEN_EPM;
1836         writel(pmen, iommu->reg + DMAR_PMEN_REG);
1837
1838         /* wait for the protected region status bit to clear */
1839         IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1840                 readl, !(pmen & DMA_PMEN_PRS), pmen);
1841
1842         raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1843 }
1844
1845 static void iommu_enable_translation(struct intel_iommu *iommu)
1846 {
1847         u32 sts;
1848         unsigned long flags;
1849
1850         raw_spin_lock_irqsave(&iommu->register_lock, flags);
1851         iommu->gcmd |= DMA_GCMD_TE;
1852         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1853
1854         /* Make sure hardware complete it */
1855         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1856                       readl, (sts & DMA_GSTS_TES), sts);
1857
1858         raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1859 }
1860
1861 static void iommu_disable_translation(struct intel_iommu *iommu)
1862 {
1863         u32 sts;
1864         unsigned long flag;
1865
1866         if (iommu_skip_te_disable && iommu->drhd->gfx_dedicated &&
1867             (cap_read_drain(iommu->cap) || cap_write_drain(iommu->cap)))
1868                 return;
1869
1870         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1871         iommu->gcmd &= ~DMA_GCMD_TE;
1872         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1873
1874         /* Make sure hardware complete it */
1875         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1876                       readl, (!(sts & DMA_GSTS_TES)), sts);
1877
1878         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1879 }
1880
1881 static int iommu_init_domains(struct intel_iommu *iommu)
1882 {
1883         u32 ndomains, nlongs;
1884         size_t size;
1885
1886         ndomains = cap_ndoms(iommu->cap);
1887         pr_debug("%s: Number of Domains supported <%d>\n",
1888                  iommu->name, ndomains);
1889         nlongs = BITS_TO_LONGS(ndomains);
1890
1891         spin_lock_init(&iommu->lock);
1892
1893         iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1894         if (!iommu->domain_ids)
1895                 return -ENOMEM;
1896
1897         size = (ALIGN(ndomains, 256) >> 8) * sizeof(struct dmar_domain **);
1898         iommu->domains = kzalloc(size, GFP_KERNEL);
1899
1900         if (iommu->domains) {
1901                 size = 256 * sizeof(struct dmar_domain *);
1902                 iommu->domains[0] = kzalloc(size, GFP_KERNEL);
1903         }
1904
1905         if (!iommu->domains || !iommu->domains[0]) {
1906                 pr_err("%s: Allocating domain array failed\n",
1907                        iommu->name);
1908                 kfree(iommu->domain_ids);
1909                 kfree(iommu->domains);
1910                 iommu->domain_ids = NULL;
1911                 iommu->domains    = NULL;
1912                 return -ENOMEM;
1913         }
1914
1915         /*
1916          * If Caching mode is set, then invalid translations are tagged
1917          * with domain-id 0, hence we need to pre-allocate it. We also
1918          * use domain-id 0 as a marker for non-allocated domain-id, so
1919          * make sure it is not used for a real domain.
1920          */
1921         set_bit(0, iommu->domain_ids);
1922
1923         /*
1924          * Vt-d spec rev3.0 (section 6.2.3.1) requires that each pasid
1925          * entry for first-level or pass-through translation modes should
1926          * be programmed with a domain id different from those used for
1927          * second-level or nested translation. We reserve a domain id for
1928          * this purpose.
1929          */
1930         if (sm_supported(iommu))
1931                 set_bit(FLPT_DEFAULT_DID, iommu->domain_ids);
1932
1933         return 0;
1934 }
1935
1936 static void disable_dmar_iommu(struct intel_iommu *iommu)
1937 {
1938         struct device_domain_info *info, *tmp;
1939         unsigned long flags;
1940
1941         if (!iommu->domains || !iommu->domain_ids)
1942                 return;
1943
1944         spin_lock_irqsave(&device_domain_lock, flags);
1945         list_for_each_entry_safe(info, tmp, &device_domain_list, global) {
1946                 if (info->iommu != iommu)
1947                         continue;
1948
1949                 if (!info->dev || !info->domain)
1950                         continue;
1951
1952                 __dmar_remove_one_dev_info(info);
1953         }
1954         spin_unlock_irqrestore(&device_domain_lock, flags);
1955
1956         if (iommu->gcmd & DMA_GCMD_TE)
1957                 iommu_disable_translation(iommu);
1958 }
1959
1960 static void free_dmar_iommu(struct intel_iommu *iommu)
1961 {
1962         if ((iommu->domains) && (iommu->domain_ids)) {
1963                 int elems = ALIGN(cap_ndoms(iommu->cap), 256) >> 8;
1964                 int i;
1965
1966                 for (i = 0; i < elems; i++)
1967                         kfree(iommu->domains[i]);
1968                 kfree(iommu->domains);
1969                 kfree(iommu->domain_ids);
1970                 iommu->domains = NULL;
1971                 iommu->domain_ids = NULL;
1972         }
1973
1974         g_iommus[iommu->seq_id] = NULL;
1975
1976         /* free context mapping */
1977         free_context_table(iommu);
1978
1979 #ifdef CONFIG_INTEL_IOMMU_SVM
1980         if (pasid_supported(iommu)) {
1981                 if (ecap_prs(iommu->ecap))
1982                         intel_svm_finish_prq(iommu);
1983         }
1984         if (vccap_pasid(iommu->vccap))
1985                 ioasid_unregister_allocator(&iommu->pasid_allocator);
1986
1987 #endif
1988 }
1989
1990 /*
1991  * Check and return whether first level is used by default for
1992  * DMA translation.
1993  */
1994 static bool first_level_by_default(unsigned int type)
1995 {
1996         /* Only SL is available in legacy mode */
1997         if (!scalable_mode_support())
1998                 return false;
1999
2000         /* Only level (either FL or SL) is available, just use it */
2001         if (intel_cap_flts_sanity() ^ intel_cap_slts_sanity())
2002                 return intel_cap_flts_sanity();
2003
2004         /* Both levels are available, decide it based on domain type */
2005         return type != IOMMU_DOMAIN_UNMANAGED;
2006 }
2007
2008 static struct dmar_domain *alloc_domain(unsigned int type)
2009 {
2010         struct dmar_domain *domain;
2011
2012         domain = alloc_domain_mem();
2013         if (!domain)
2014                 return NULL;
2015
2016         memset(domain, 0, sizeof(*domain));
2017         domain->nid = NUMA_NO_NODE;
2018         if (first_level_by_default(type))
2019                 domain->flags |= DOMAIN_FLAG_USE_FIRST_LEVEL;
2020         domain->has_iotlb_device = false;
2021         INIT_LIST_HEAD(&domain->devices);
2022         INIT_LIST_HEAD(&domain->subdevices);
2023
2024         return domain;
2025 }
2026
2027 /* Must be called with iommu->lock */
2028 static int domain_attach_iommu(struct dmar_domain *domain,
2029                                struct intel_iommu *iommu)
2030 {
2031         unsigned long ndomains;
2032         int num;
2033
2034         assert_spin_locked(&device_domain_lock);
2035         assert_spin_locked(&iommu->lock);
2036
2037         domain->iommu_refcnt[iommu->seq_id] += 1;
2038         if (domain->iommu_refcnt[iommu->seq_id] == 1) {
2039                 ndomains = cap_ndoms(iommu->cap);
2040                 num      = find_first_zero_bit(iommu->domain_ids, ndomains);
2041
2042                 if (num >= ndomains) {
2043                         pr_err("%s: No free domain ids\n", iommu->name);
2044                         domain->iommu_refcnt[iommu->seq_id] -= 1;
2045                         return -ENOSPC;
2046                 }
2047
2048                 set_bit(num, iommu->domain_ids);
2049                 set_iommu_domain(iommu, num, domain);
2050
2051                 domain->iommu_did[iommu->seq_id] = num;
2052                 domain->nid                      = iommu->node;
2053
2054                 domain_update_iommu_cap(domain);
2055         }
2056
2057         return 0;
2058 }
2059
2060 static void domain_detach_iommu(struct dmar_domain *domain,
2061                                 struct intel_iommu *iommu)
2062 {
2063         int num;
2064
2065         assert_spin_locked(&device_domain_lock);
2066         assert_spin_locked(&iommu->lock);
2067
2068         domain->iommu_refcnt[iommu->seq_id] -= 1;
2069         if (domain->iommu_refcnt[iommu->seq_id] == 0) {
2070                 num = domain->iommu_did[iommu->seq_id];
2071                 clear_bit(num, iommu->domain_ids);
2072                 set_iommu_domain(iommu, num, NULL);
2073
2074                 domain_update_iommu_cap(domain);
2075                 domain->iommu_did[iommu->seq_id] = 0;
2076         }
2077 }
2078
2079 static inline int guestwidth_to_adjustwidth(int gaw)
2080 {
2081         int agaw;
2082         int r = (gaw - 12) % 9;
2083
2084         if (r == 0)
2085                 agaw = gaw;
2086         else
2087                 agaw = gaw + 9 - r;
2088         if (agaw > 64)
2089                 agaw = 64;
2090         return agaw;
2091 }
2092
2093 static void domain_exit(struct dmar_domain *domain)
2094 {
2095
2096         /* Remove associated devices and clear attached or cached domains */
2097         domain_remove_dev_info(domain);
2098
2099         if (domain->pgd) {
2100                 struct page *freelist;
2101
2102                 freelist = domain_unmap(domain, 0,
2103                                         DOMAIN_MAX_PFN(domain->gaw), NULL);
2104                 dma_free_pagelist(freelist);
2105         }
2106
2107         free_domain_mem(domain);
2108 }
2109
2110 /*
2111  * Get the PASID directory size for scalable mode context entry.
2112  * Value of X in the PDTS field of a scalable mode context entry
2113  * indicates PASID directory with 2^(X + 7) entries.
2114  */
2115 static inline unsigned long context_get_sm_pds(struct pasid_table *table)
2116 {
2117         int pds, max_pde;
2118
2119         max_pde = table->max_pasid >> PASID_PDE_SHIFT;
2120         pds = find_first_bit((unsigned long *)&max_pde, MAX_NR_PASID_BITS);
2121         if (pds < 7)
2122                 return 0;
2123
2124         return pds - 7;
2125 }
2126
2127 /*
2128  * Set the RID_PASID field of a scalable mode context entry. The
2129  * IOMMU hardware will use the PASID value set in this field for
2130  * DMA translations of DMA requests without PASID.
2131  */
2132 static inline void
2133 context_set_sm_rid2pasid(struct context_entry *context, unsigned long pasid)
2134 {
2135         context->hi |= pasid & ((1 << 20) - 1);
2136 }
2137
2138 /*
2139  * Set the DTE(Device-TLB Enable) field of a scalable mode context
2140  * entry.
2141  */
2142 static inline void context_set_sm_dte(struct context_entry *context)
2143 {
2144         context->lo |= (1 << 2);
2145 }
2146
2147 /*
2148  * Set the PRE(Page Request Enable) field of a scalable mode context
2149  * entry.
2150  */
2151 static inline void context_set_sm_pre(struct context_entry *context)
2152 {
2153         context->lo |= (1 << 4);
2154 }
2155
2156 /* Convert value to context PASID directory size field coding. */
2157 #define context_pdts(pds)       (((pds) & 0x7) << 9)
2158
2159 static int domain_context_mapping_one(struct dmar_domain *domain,
2160                                       struct intel_iommu *iommu,
2161                                       struct pasid_table *table,
2162                                       u8 bus, u8 devfn)
2163 {
2164         u16 did = domain->iommu_did[iommu->seq_id];
2165         int translation = CONTEXT_TT_MULTI_LEVEL;
2166         struct device_domain_info *info = NULL;
2167         struct context_entry *context;
2168         unsigned long flags;
2169         int ret;
2170
2171         WARN_ON(did == 0);
2172
2173         if (hw_pass_through && domain_type_is_si(domain))
2174                 translation = CONTEXT_TT_PASS_THROUGH;
2175
2176         pr_debug("Set context mapping for %02x:%02x.%d\n",
2177                 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
2178
2179         BUG_ON(!domain->pgd);
2180
2181         spin_lock_irqsave(&device_domain_lock, flags);
2182         spin_lock(&iommu->lock);
2183
2184         ret = -ENOMEM;
2185         context = iommu_context_addr(iommu, bus, devfn, 1);
2186         if (!context)
2187                 goto out_unlock;
2188
2189         ret = 0;
2190         if (context_present(context))
2191                 goto out_unlock;
2192
2193         /*
2194          * For kdump cases, old valid entries may be cached due to the
2195          * in-flight DMA and copied pgtable, but there is no unmapping
2196          * behaviour for them, thus we need an explicit cache flush for
2197          * the newly-mapped device. For kdump, at this point, the device
2198          * is supposed to finish reset at its driver probe stage, so no
2199          * in-flight DMA will exist, and we don't need to worry anymore
2200          * hereafter.
2201          */
2202         if (context_copied(context)) {
2203                 u16 did_old = context_domain_id(context);
2204
2205                 if (did_old < cap_ndoms(iommu->cap)) {
2206                         iommu->flush.flush_context(iommu, did_old,
2207                                                    (((u16)bus) << 8) | devfn,
2208                                                    DMA_CCMD_MASK_NOBIT,
2209                                                    DMA_CCMD_DEVICE_INVL);
2210                         iommu->flush.flush_iotlb(iommu, did_old, 0, 0,
2211                                                  DMA_TLB_DSI_FLUSH);
2212                 }
2213         }
2214
2215         context_clear_entry(context);
2216
2217         if (sm_supported(iommu)) {
2218                 unsigned long pds;
2219
2220                 WARN_ON(!table);
2221
2222                 /* Setup the PASID DIR pointer: */
2223                 pds = context_get_sm_pds(table);
2224                 context->lo = (u64)virt_to_phys(table->table) |
2225                                 context_pdts(pds);
2226
2227                 /* Setup the RID_PASID field: */
2228                 context_set_sm_rid2pasid(context, PASID_RID2PASID);
2229
2230                 /*
2231                  * Setup the Device-TLB enable bit and Page request
2232                  * Enable bit:
2233                  */
2234                 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
2235                 if (info && info->ats_supported)
2236                         context_set_sm_dte(context);
2237                 if (info && info->pri_supported)
2238                         context_set_sm_pre(context);
2239         } else {
2240                 struct dma_pte *pgd = domain->pgd;
2241                 int agaw;
2242
2243                 context_set_domain_id(context, did);
2244
2245                 if (translation != CONTEXT_TT_PASS_THROUGH) {
2246                         /*
2247                          * Skip top levels of page tables for iommu which has
2248                          * less agaw than default. Unnecessary for PT mode.
2249                          */
2250                         for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
2251                                 ret = -ENOMEM;
2252                                 pgd = phys_to_virt(dma_pte_addr(pgd));
2253                                 if (!dma_pte_present(pgd))
2254                                         goto out_unlock;
2255                         }
2256
2257                         info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
2258                         if (info && info->ats_supported)
2259                                 translation = CONTEXT_TT_DEV_IOTLB;
2260                         else
2261                                 translation = CONTEXT_TT_MULTI_LEVEL;
2262
2263                         context_set_address_root(context, virt_to_phys(pgd));
2264                         context_set_address_width(context, agaw);
2265                 } else {
2266                         /*
2267                          * In pass through mode, AW must be programmed to
2268                          * indicate the largest AGAW value supported by
2269                          * hardware. And ASR is ignored by hardware.
2270                          */
2271                         context_set_address_width(context, iommu->msagaw);
2272                 }
2273
2274                 context_set_translation_type(context, translation);
2275         }
2276
2277         context_set_fault_enable(context);
2278         context_set_present(context);
2279         if (!ecap_coherent(iommu->ecap))
2280                 clflush_cache_range(context, sizeof(*context));
2281
2282         /*
2283          * It's a non-present to present mapping. If hardware doesn't cache
2284          * non-present entry we only need to flush the write-buffer. If the
2285          * _does_ cache non-present entries, then it does so in the special
2286          * domain #0, which we have to flush:
2287          */
2288         if (cap_caching_mode(iommu->cap)) {
2289                 iommu->flush.flush_context(iommu, 0,
2290                                            (((u16)bus) << 8) | devfn,
2291                                            DMA_CCMD_MASK_NOBIT,
2292                                            DMA_CCMD_DEVICE_INVL);
2293                 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
2294         } else {
2295                 iommu_flush_write_buffer(iommu);
2296         }
2297         iommu_enable_dev_iotlb(info);
2298
2299         ret = 0;
2300
2301 out_unlock:
2302         spin_unlock(&iommu->lock);
2303         spin_unlock_irqrestore(&device_domain_lock, flags);
2304
2305         return ret;
2306 }
2307
2308 struct domain_context_mapping_data {
2309         struct dmar_domain *domain;
2310         struct intel_iommu *iommu;
2311         struct pasid_table *table;
2312 };
2313
2314 static int domain_context_mapping_cb(struct pci_dev *pdev,
2315                                      u16 alias, void *opaque)
2316 {
2317         struct domain_context_mapping_data *data = opaque;
2318
2319         return domain_context_mapping_one(data->domain, data->iommu,
2320                                           data->table, PCI_BUS_NUM(alias),
2321                                           alias & 0xff);
2322 }
2323
2324 static int
2325 domain_context_mapping(struct dmar_domain *domain, struct device *dev)
2326 {
2327         struct domain_context_mapping_data data;
2328         struct pasid_table *table;
2329         struct intel_iommu *iommu;
2330         u8 bus, devfn;
2331
2332         iommu = device_to_iommu(dev, &bus, &devfn);
2333         if (!iommu)
2334                 return -ENODEV;
2335
2336         table = intel_pasid_get_table(dev);
2337
2338         if (!dev_is_pci(dev))
2339                 return domain_context_mapping_one(domain, iommu, table,
2340                                                   bus, devfn);
2341
2342         data.domain = domain;
2343         data.iommu = iommu;
2344         data.table = table;
2345
2346         return pci_for_each_dma_alias(to_pci_dev(dev),
2347                                       &domain_context_mapping_cb, &data);
2348 }
2349
2350 static int domain_context_mapped_cb(struct pci_dev *pdev,
2351                                     u16 alias, void *opaque)
2352 {
2353         struct intel_iommu *iommu = opaque;
2354
2355         return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff);
2356 }
2357
2358 static int domain_context_mapped(struct device *dev)
2359 {
2360         struct intel_iommu *iommu;
2361         u8 bus, devfn;
2362
2363         iommu = device_to_iommu(dev, &bus, &devfn);
2364         if (!iommu)
2365                 return -ENODEV;
2366
2367         if (!dev_is_pci(dev))
2368                 return device_context_mapped(iommu, bus, devfn);
2369
2370         return !pci_for_each_dma_alias(to_pci_dev(dev),
2371                                        domain_context_mapped_cb, iommu);
2372 }
2373
2374 /* Returns a number of VTD pages, but aligned to MM page size */
2375 static inline unsigned long aligned_nrpages(unsigned long host_addr,
2376                                             size_t size)
2377 {
2378         host_addr &= ~PAGE_MASK;
2379         return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
2380 }
2381
2382 /* Return largest possible superpage level for a given mapping */
2383 static inline int hardware_largepage_caps(struct dmar_domain *domain,
2384                                           unsigned long iov_pfn,
2385                                           unsigned long phy_pfn,
2386                                           unsigned long pages)
2387 {
2388         int support, level = 1;
2389         unsigned long pfnmerge;
2390
2391         support = domain->iommu_superpage;
2392
2393         /* To use a large page, the virtual *and* physical addresses
2394            must be aligned to 2MiB/1GiB/etc. Lower bits set in either
2395            of them will mean we have to use smaller pages. So just
2396            merge them and check both at once. */
2397         pfnmerge = iov_pfn | phy_pfn;
2398
2399         while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
2400                 pages >>= VTD_STRIDE_SHIFT;
2401                 if (!pages)
2402                         break;
2403                 pfnmerge >>= VTD_STRIDE_SHIFT;
2404                 level++;
2405                 support--;
2406         }
2407         return level;
2408 }
2409
2410 /*
2411  * Ensure that old small page tables are removed to make room for superpage(s).
2412  * We're going to add new large pages, so make sure we don't remove their parent
2413  * tables. The IOTLB/devTLBs should be flushed if any PDE/PTEs are cleared.
2414  */
2415 static void switch_to_super_page(struct dmar_domain *domain,
2416                                  unsigned long start_pfn,
2417                                  unsigned long end_pfn, int level)
2418 {
2419         unsigned long lvl_pages = lvl_to_nr_pages(level);
2420         struct dma_pte *pte = NULL;
2421         int i;
2422
2423         while (start_pfn <= end_pfn) {
2424                 if (!pte)
2425                         pte = pfn_to_dma_pte(domain, start_pfn, &level);
2426
2427                 if (dma_pte_present(pte)) {
2428                         dma_pte_free_pagetable(domain, start_pfn,
2429                                                start_pfn + lvl_pages - 1,
2430                                                level + 1);
2431
2432                         for_each_domain_iommu(i, domain)
2433                                 iommu_flush_iotlb_psi(g_iommus[i], domain,
2434                                                       start_pfn, lvl_pages,
2435                                                       0, 0);
2436                 }
2437
2438                 pte++;
2439                 start_pfn += lvl_pages;
2440                 if (first_pte_in_page(pte))
2441                         pte = NULL;
2442         }
2443 }
2444
2445 static int
2446 __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2447                  unsigned long phys_pfn, unsigned long nr_pages, int prot)
2448 {
2449         struct dma_pte *first_pte = NULL, *pte = NULL;
2450         unsigned int largepage_lvl = 0;
2451         unsigned long lvl_pages = 0;
2452         phys_addr_t pteval;
2453         u64 attr;
2454
2455         BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
2456
2457         if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
2458                 return -EINVAL;
2459
2460         attr = prot & (DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP);
2461         attr |= DMA_FL_PTE_PRESENT;
2462         if (domain_use_first_level(domain)) {
2463                 attr |= DMA_FL_PTE_XD | DMA_FL_PTE_US | DMA_FL_PTE_ACCESS;
2464                 if (prot & DMA_PTE_WRITE)
2465                         attr |= DMA_FL_PTE_DIRTY;
2466         }
2467
2468         pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | attr;
2469
2470         while (nr_pages > 0) {
2471                 uint64_t tmp;
2472
2473                 if (!pte) {
2474                         largepage_lvl = hardware_largepage_caps(domain, iov_pfn,
2475                                         phys_pfn, nr_pages);
2476
2477                         pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
2478                         if (!pte)
2479                                 return -ENOMEM;
2480                         first_pte = pte;
2481
2482                         lvl_pages = lvl_to_nr_pages(largepage_lvl);
2483
2484                         /* It is large page*/
2485                         if (largepage_lvl > 1) {
2486                                 unsigned long end_pfn;
2487                                 unsigned long pages_to_remove;
2488
2489                                 pteval |= DMA_PTE_LARGE_PAGE;
2490                                 pages_to_remove = min_t(unsigned long, nr_pages,
2491                                                         nr_pte_to_next_page(pte) * lvl_pages);
2492                                 end_pfn = iov_pfn + pages_to_remove - 1;
2493                                 switch_to_super_page(domain, iov_pfn, end_pfn, largepage_lvl);
2494                         } else {
2495                                 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
2496                         }
2497
2498                 }
2499                 /* We don't need lock here, nobody else
2500                  * touches the iova range
2501                  */
2502                 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
2503                 if (tmp) {
2504                         static int dumps = 5;
2505                         pr_crit("ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
2506                                 iov_pfn, tmp, (unsigned long long)pteval);
2507                         if (dumps) {
2508                                 dumps--;
2509                                 debug_dma_dump_mappings(NULL);
2510                         }
2511                         WARN_ON(1);
2512                 }
2513
2514                 nr_pages -= lvl_pages;
2515                 iov_pfn += lvl_pages;
2516                 phys_pfn += lvl_pages;
2517                 pteval += lvl_pages * VTD_PAGE_SIZE;
2518
2519                 /* If the next PTE would be the first in a new page, then we
2520                  * need to flush the cache on the entries we've just written.
2521                  * And then we'll need to recalculate 'pte', so clear it and
2522                  * let it get set again in the if (!pte) block above.
2523                  *
2524                  * If we're done (!nr_pages) we need to flush the cache too.
2525                  *
2526                  * Also if we've been setting superpages, we may need to
2527                  * recalculate 'pte' and switch back to smaller pages for the
2528                  * end of the mapping, if the trailing size is not enough to
2529                  * use another superpage (i.e. nr_pages < lvl_pages).
2530                  */
2531                 pte++;
2532                 if (!nr_pages || first_pte_in_page(pte) ||
2533                     (largepage_lvl > 1 && nr_pages < lvl_pages)) {
2534                         domain_flush_cache(domain, first_pte,
2535                                            (void *)pte - (void *)first_pte);
2536                         pte = NULL;
2537                 }
2538         }
2539
2540         return 0;
2541 }
2542
2543 static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8 devfn)
2544 {
2545         struct intel_iommu *iommu = info->iommu;
2546         struct context_entry *context;
2547         unsigned long flags;
2548         u16 did_old;
2549
2550         if (!iommu)
2551                 return;
2552
2553         spin_lock_irqsave(&iommu->lock, flags);
2554         context = iommu_context_addr(iommu, bus, devfn, 0);
2555         if (!context) {
2556                 spin_unlock_irqrestore(&iommu->lock, flags);
2557                 return;
2558         }
2559
2560         if (sm_supported(iommu)) {
2561                 if (hw_pass_through && domain_type_is_si(info->domain))
2562                         did_old = FLPT_DEFAULT_DID;
2563                 else
2564                         did_old = info->domain->iommu_did[iommu->seq_id];
2565         } else {
2566                 did_old = context_domain_id(context);
2567         }
2568
2569         context_clear_entry(context);
2570         __iommu_flush_cache(iommu, context, sizeof(*context));
2571         spin_unlock_irqrestore(&iommu->lock, flags);
2572         iommu->flush.flush_context(iommu,
2573                                    did_old,
2574                                    (((u16)bus) << 8) | devfn,
2575                                    DMA_CCMD_MASK_NOBIT,
2576                                    DMA_CCMD_DEVICE_INVL);
2577
2578         if (sm_supported(iommu))
2579                 qi_flush_pasid_cache(iommu, did_old, QI_PC_ALL_PASIDS, 0);
2580
2581         iommu->flush.flush_iotlb(iommu,
2582                                  did_old,
2583                                  0,
2584                                  0,
2585                                  DMA_TLB_DSI_FLUSH);
2586
2587         __iommu_flush_dev_iotlb(info, 0, MAX_AGAW_PFN_WIDTH);
2588 }
2589
2590 static inline void unlink_domain_info(struct device_domain_info *info)
2591 {
2592         assert_spin_locked(&device_domain_lock);
2593         list_del(&info->link);
2594         list_del(&info->global);
2595         if (info->dev)
2596                 dev_iommu_priv_set(info->dev, NULL);
2597 }
2598
2599 static void domain_remove_dev_info(struct dmar_domain *domain)
2600 {
2601         struct device_domain_info *info, *tmp;
2602         unsigned long flags;
2603
2604         spin_lock_irqsave(&device_domain_lock, flags);
2605         list_for_each_entry_safe(info, tmp, &domain->devices, link)
2606                 __dmar_remove_one_dev_info(info);
2607         spin_unlock_irqrestore(&device_domain_lock, flags);
2608 }
2609
2610 struct dmar_domain *find_domain(struct device *dev)
2611 {
2612         struct device_domain_info *info;
2613
2614         if (unlikely(!dev || !dev->iommu))
2615                 return NULL;
2616
2617         if (unlikely(attach_deferred(dev)))
2618                 return NULL;
2619
2620         /* No lock here, assumes no domain exit in normal case */
2621         info = get_domain_info(dev);
2622         if (likely(info))
2623                 return info->domain;
2624
2625         return NULL;
2626 }
2627
2628 static inline struct device_domain_info *
2629 dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
2630 {
2631         struct device_domain_info *info;
2632
2633         list_for_each_entry(info, &device_domain_list, global)
2634                 if (info->segment == segment && info->bus == bus &&
2635                     info->devfn == devfn)
2636                         return info;
2637
2638         return NULL;
2639 }
2640
2641 static int domain_setup_first_level(struct intel_iommu *iommu,
2642                                     struct dmar_domain *domain,
2643                                     struct device *dev,
2644                                     u32 pasid)
2645 {
2646         struct dma_pte *pgd = domain->pgd;
2647         int agaw, level;
2648         int flags = 0;
2649
2650         /*
2651          * Skip top levels of page tables for iommu which has
2652          * less agaw than default. Unnecessary for PT mode.
2653          */
2654         for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
2655                 pgd = phys_to_virt(dma_pte_addr(pgd));
2656                 if (!dma_pte_present(pgd))
2657                         return -ENOMEM;
2658         }
2659
2660         level = agaw_to_level(agaw);
2661         if (level != 4 && level != 5)
2662                 return -EINVAL;
2663
2664         if (pasid != PASID_RID2PASID)
2665                 flags |= PASID_FLAG_SUPERVISOR_MODE;
2666         if (level == 5)
2667                 flags |= PASID_FLAG_FL5LP;
2668
2669         if (domain->domain.type == IOMMU_DOMAIN_UNMANAGED)
2670                 flags |= PASID_FLAG_PAGE_SNOOP;
2671
2672         return intel_pasid_setup_first_level(iommu, dev, (pgd_t *)pgd, pasid,
2673                                              domain->iommu_did[iommu->seq_id],
2674                                              flags);
2675 }
2676
2677 static bool dev_is_real_dma_subdevice(struct device *dev)
2678 {
2679         return dev && dev_is_pci(dev) &&
2680                pci_real_dma_dev(to_pci_dev(dev)) != to_pci_dev(dev);
2681 }
2682
2683 static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
2684                                                     int bus, int devfn,
2685                                                     struct device *dev,
2686                                                     struct dmar_domain *domain)
2687 {
2688         struct dmar_domain *found = NULL;
2689         struct device_domain_info *info;
2690         unsigned long flags;
2691         int ret;
2692
2693         info = alloc_devinfo_mem();
2694         if (!info)
2695                 return NULL;
2696
2697         if (!dev_is_real_dma_subdevice(dev)) {
2698                 info->bus = bus;
2699                 info->devfn = devfn;
2700                 info->segment = iommu->segment;
2701         } else {
2702                 struct pci_dev *pdev = to_pci_dev(dev);
2703
2704                 info->bus = pdev->bus->number;
2705                 info->devfn = pdev->devfn;
2706                 info->segment = pci_domain_nr(pdev->bus);
2707         }
2708
2709         info->ats_supported = info->pasid_supported = info->pri_supported = 0;
2710         info->ats_enabled = info->pasid_enabled = info->pri_enabled = 0;
2711         info->ats_qdep = 0;
2712         info->dev = dev;
2713         info->domain = domain;
2714         info->iommu = iommu;
2715         info->pasid_table = NULL;
2716         info->auxd_enabled = 0;
2717         INIT_LIST_HEAD(&info->subdevices);
2718
2719         if (dev && dev_is_pci(dev)) {
2720                 struct pci_dev *pdev = to_pci_dev(info->dev);
2721
2722                 if (ecap_dev_iotlb_support(iommu->ecap) &&
2723                     pci_ats_supported(pdev) &&
2724                     dmar_find_matched_atsr_unit(pdev))
2725                         info->ats_supported = 1;
2726
2727                 if (sm_supported(iommu)) {
2728                         if (pasid_supported(iommu)) {
2729                                 int features = pci_pasid_features(pdev);
2730                                 if (features >= 0)
2731                                         info->pasid_supported = features | 1;
2732                         }
2733
2734                         if (info->ats_supported && ecap_prs(iommu->ecap) &&
2735                             pci_pri_supported(pdev))
2736                                 info->pri_supported = 1;
2737                 }
2738         }
2739
2740         spin_lock_irqsave(&device_domain_lock, flags);
2741         if (dev)
2742                 found = find_domain(dev);
2743
2744         if (!found) {
2745                 struct device_domain_info *info2;
2746                 info2 = dmar_search_domain_by_dev_info(info->segment, info->bus,
2747                                                        info->devfn);
2748                 if (info2) {
2749                         found      = info2->domain;
2750                         info2->dev = dev;
2751                 }
2752         }
2753
2754         if (found) {
2755                 spin_unlock_irqrestore(&device_domain_lock, flags);
2756                 free_devinfo_mem(info);
2757                 /* Caller must free the original domain */
2758                 return found;
2759         }
2760
2761         spin_lock(&iommu->lock);
2762         ret = domain_attach_iommu(domain, iommu);
2763         spin_unlock(&iommu->lock);
2764
2765         if (ret) {
2766                 spin_unlock_irqrestore(&device_domain_lock, flags);
2767                 free_devinfo_mem(info);
2768                 return NULL;
2769         }
2770
2771         list_add(&info->link, &domain->devices);
2772         list_add(&info->global, &device_domain_list);
2773         if (dev)
2774                 dev_iommu_priv_set(dev, info);
2775         spin_unlock_irqrestore(&device_domain_lock, flags);
2776
2777         /* PASID table is mandatory for a PCI device in scalable mode. */
2778         if (dev && dev_is_pci(dev) && sm_supported(iommu)) {
2779                 ret = intel_pasid_alloc_table(dev);
2780                 if (ret) {
2781                         dev_err(dev, "PASID table allocation failed\n");
2782                         dmar_remove_one_dev_info(dev);
2783                         return NULL;
2784                 }
2785
2786                 /* Setup the PASID entry for requests without PASID: */
2787                 spin_lock_irqsave(&iommu->lock, flags);
2788                 if (hw_pass_through && domain_type_is_si(domain))
2789                         ret = intel_pasid_setup_pass_through(iommu, domain,
2790                                         dev, PASID_RID2PASID);
2791                 else if (domain_use_first_level(domain))
2792                         ret = domain_setup_first_level(iommu, domain, dev,
2793                                         PASID_RID2PASID);
2794                 else
2795                         ret = intel_pasid_setup_second_level(iommu, domain,
2796                                         dev, PASID_RID2PASID);
2797                 spin_unlock_irqrestore(&iommu->lock, flags);
2798                 if (ret) {
2799                         dev_err(dev, "Setup RID2PASID failed\n");
2800                         dmar_remove_one_dev_info(dev);
2801                         return NULL;
2802                 }
2803         }
2804
2805         if (dev && domain_context_mapping(domain, dev)) {
2806                 dev_err(dev, "Domain context map failed\n");
2807                 dmar_remove_one_dev_info(dev);
2808                 return NULL;
2809         }
2810
2811         return domain;
2812 }
2813
2814 static int iommu_domain_identity_map(struct dmar_domain *domain,
2815                                      unsigned long first_vpfn,
2816                                      unsigned long last_vpfn)
2817 {
2818         /*
2819          * RMRR range might have overlap with physical memory range,
2820          * clear it first
2821          */
2822         dma_pte_clear_range(domain, first_vpfn, last_vpfn);
2823
2824         return __domain_mapping(domain, first_vpfn,
2825                                 first_vpfn, last_vpfn - first_vpfn + 1,
2826                                 DMA_PTE_READ|DMA_PTE_WRITE);
2827 }
2828
2829 static int md_domain_init(struct dmar_domain *domain, int guest_width);
2830
2831 static int __init si_domain_init(int hw)
2832 {
2833         struct dmar_rmrr_unit *rmrr;
2834         struct device *dev;
2835         int i, nid, ret;
2836
2837         si_domain = alloc_domain(IOMMU_DOMAIN_IDENTITY);
2838         if (!si_domain)
2839                 return -EFAULT;
2840
2841         if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2842                 domain_exit(si_domain);
2843                 return -EFAULT;
2844         }
2845
2846         if (hw)
2847                 return 0;
2848
2849         for_each_online_node(nid) {
2850                 unsigned long start_pfn, end_pfn;
2851                 int i;
2852
2853                 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2854                         ret = iommu_domain_identity_map(si_domain,
2855                                         mm_to_dma_pfn(start_pfn),
2856                                         mm_to_dma_pfn(end_pfn));
2857                         if (ret)
2858                                 return ret;
2859                 }
2860         }
2861
2862         /*
2863          * Identity map the RMRRs so that devices with RMRRs could also use
2864          * the si_domain.
2865          */
2866         for_each_rmrr_units(rmrr) {
2867                 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
2868                                           i, dev) {
2869                         unsigned long long start = rmrr->base_address;
2870                         unsigned long long end = rmrr->end_address;
2871
2872                         if (WARN_ON(end < start ||
2873                                     end >> agaw_to_width(si_domain->agaw)))
2874                                 continue;
2875
2876                         ret = iommu_domain_identity_map(si_domain,
2877                                         mm_to_dma_pfn(start >> PAGE_SHIFT),
2878                                         mm_to_dma_pfn(end >> PAGE_SHIFT));
2879                         if (ret)
2880                                 return ret;
2881                 }
2882         }
2883
2884         return 0;
2885 }
2886
2887 static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev)
2888 {
2889         struct dmar_domain *ndomain;
2890         struct intel_iommu *iommu;
2891         u8 bus, devfn;
2892
2893         iommu = device_to_iommu(dev, &bus, &devfn);
2894         if (!iommu)
2895                 return -ENODEV;
2896
2897         ndomain = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
2898         if (ndomain != domain)
2899                 return -EBUSY;
2900
2901         return 0;
2902 }
2903
2904 static bool device_has_rmrr(struct device *dev)
2905 {
2906         struct dmar_rmrr_unit *rmrr;
2907         struct device *tmp;
2908         int i;
2909
2910         rcu_read_lock();
2911         for_each_rmrr_units(rmrr) {
2912                 /*
2913                  * Return TRUE if this RMRR contains the device that
2914                  * is passed in.
2915                  */
2916                 for_each_active_dev_scope(rmrr->devices,
2917                                           rmrr->devices_cnt, i, tmp)
2918                         if (tmp == dev ||
2919                             is_downstream_to_pci_bridge(dev, tmp)) {
2920                                 rcu_read_unlock();
2921                                 return true;
2922                         }
2923         }
2924         rcu_read_unlock();
2925         return false;
2926 }
2927
2928 /**
2929  * device_rmrr_is_relaxable - Test whether the RMRR of this device
2930  * is relaxable (ie. is allowed to be not enforced under some conditions)
2931  * @dev: device handle
2932  *
2933  * We assume that PCI USB devices with RMRRs have them largely
2934  * for historical reasons and that the RMRR space is not actively used post
2935  * boot.  This exclusion may change if vendors begin to abuse it.
2936  *
2937  * The same exception is made for graphics devices, with the requirement that
2938  * any use of the RMRR regions will be torn down before assigning the device
2939  * to a guest.
2940  *
2941  * Return: true if the RMRR is relaxable, false otherwise
2942  */
2943 static bool device_rmrr_is_relaxable(struct device *dev)
2944 {
2945         struct pci_dev *pdev;
2946
2947         if (!dev_is_pci(dev))
2948                 return false;
2949
2950         pdev = to_pci_dev(dev);
2951         if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev))
2952                 return true;
2953         else
2954                 return false;
2955 }
2956
2957 /*
2958  * There are a couple cases where we need to restrict the functionality of
2959  * devices associated with RMRRs.  The first is when evaluating a device for
2960  * identity mapping because problems exist when devices are moved in and out
2961  * of domains and their respective RMRR information is lost.  This means that
2962  * a device with associated RMRRs will never be in a "passthrough" domain.
2963  * The second is use of the device through the IOMMU API.  This interface
2964  * expects to have full control of the IOVA space for the device.  We cannot
2965  * satisfy both the requirement that RMRR access is maintained and have an
2966  * unencumbered IOVA space.  We also have no ability to quiesce the device's
2967  * use of the RMRR space or even inform the IOMMU API user of the restriction.
2968  * We therefore prevent devices associated with an RMRR from participating in
2969  * the IOMMU API, which eliminates them from device assignment.
2970  *
2971  * In both cases, devices which have relaxable RMRRs are not concerned by this
2972  * restriction. See device_rmrr_is_relaxable comment.
2973  */
2974 static bool device_is_rmrr_locked(struct device *dev)
2975 {
2976         if (!device_has_rmrr(dev))
2977                 return false;
2978
2979         if (device_rmrr_is_relaxable(dev))
2980                 return false;
2981
2982         return true;
2983 }
2984
2985 /*
2986  * Return the required default domain type for a specific device.
2987  *
2988  * @dev: the device in query
2989  * @startup: true if this is during early boot
2990  *
2991  * Returns:
2992  *  - IOMMU_DOMAIN_DMA: device requires a dynamic mapping domain
2993  *  - IOMMU_DOMAIN_IDENTITY: device requires an identical mapping domain
2994  *  - 0: both identity and dynamic domains work for this device
2995  */
2996 static int device_def_domain_type(struct device *dev)
2997 {
2998         if (dev_is_pci(dev)) {
2999                 struct pci_dev *pdev = to_pci_dev(dev);
3000
3001                 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
3002                         return IOMMU_DOMAIN_IDENTITY;
3003
3004                 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
3005                         return IOMMU_DOMAIN_IDENTITY;
3006         }
3007
3008         return 0;
3009 }
3010
3011 static void intel_iommu_init_qi(struct intel_iommu *iommu)
3012 {
3013         /*
3014          * Start from the sane iommu hardware state.
3015          * If the queued invalidation is already initialized by us
3016          * (for example, while enabling interrupt-remapping) then
3017          * we got the things already rolling from a sane state.
3018          */
3019         if (!iommu->qi) {
3020                 /*
3021                  * Clear any previous faults.
3022                  */
3023                 dmar_fault(-1, iommu);
3024                 /*
3025                  * Disable queued invalidation if supported and already enabled
3026                  * before OS handover.
3027                  */
3028                 dmar_disable_qi(iommu);
3029         }
3030
3031         if (dmar_enable_qi(iommu)) {
3032                 /*
3033                  * Queued Invalidate not enabled, use Register Based Invalidate
3034                  */
3035                 iommu->flush.flush_context = __iommu_flush_context;
3036                 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
3037                 pr_info("%s: Using Register based invalidation\n",
3038                         iommu->name);
3039         } else {
3040                 iommu->flush.flush_context = qi_flush_context;
3041                 iommu->flush.flush_iotlb = qi_flush_iotlb;
3042                 pr_info("%s: Using Queued invalidation\n", iommu->name);
3043         }
3044 }
3045
3046 static int copy_context_table(struct intel_iommu *iommu,
3047                               struct root_entry *old_re,
3048                               struct context_entry **tbl,
3049                               int bus, bool ext)
3050 {
3051         int tbl_idx, pos = 0, idx, devfn, ret = 0, did;
3052         struct context_entry *new_ce = NULL, ce;
3053         struct context_entry *old_ce = NULL;
3054         struct root_entry re;
3055         phys_addr_t old_ce_phys;
3056
3057         tbl_idx = ext ? bus * 2 : bus;
3058         memcpy(&re, old_re, sizeof(re));
3059
3060         for (devfn = 0; devfn < 256; devfn++) {
3061                 /* First calculate the correct index */
3062                 idx = (ext ? devfn * 2 : devfn) % 256;
3063
3064                 if (idx == 0) {
3065                         /* First save what we may have and clean up */
3066                         if (new_ce) {
3067                                 tbl[tbl_idx] = new_ce;
3068                                 __iommu_flush_cache(iommu, new_ce,
3069                                                     VTD_PAGE_SIZE);
3070                                 pos = 1;
3071                         }
3072
3073                         if (old_ce)
3074                                 memunmap(old_ce);
3075
3076                         ret = 0;
3077                         if (devfn < 0x80)
3078                                 old_ce_phys = root_entry_lctp(&re);
3079                         else
3080                                 old_ce_phys = root_entry_uctp(&re);
3081
3082                         if (!old_ce_phys) {
3083                                 if (ext && devfn == 0) {
3084                                         /* No LCTP, try UCTP */
3085                                         devfn = 0x7f;
3086                                         continue;
3087                                 } else {
3088                                         goto out;
3089                                 }
3090                         }
3091
3092                         ret = -ENOMEM;
3093                         old_ce = memremap(old_ce_phys, PAGE_SIZE,
3094                                         MEMREMAP_WB);
3095                         if (!old_ce)
3096                                 goto out;
3097
3098                         new_ce = alloc_pgtable_page(iommu->node);
3099                         if (!new_ce)
3100                                 goto out_unmap;
3101
3102                         ret = 0;
3103                 }
3104
3105                 /* Now copy the context entry */
3106                 memcpy(&ce, old_ce + idx, sizeof(ce));
3107
3108                 if (!__context_present(&ce))
3109                         continue;
3110
3111                 did = context_domain_id(&ce);
3112                 if (did >= 0 && did < cap_ndoms(iommu->cap))
3113                         set_bit(did, iommu->domain_ids);
3114
3115                 /*
3116                  * We need a marker for copied context entries. This
3117                  * marker needs to work for the old format as well as
3118                  * for extended context entries.
3119                  *
3120                  * Bit 67 of the context entry is used. In the old
3121                  * format this bit is available to software, in the
3122                  * extended format it is the PGE bit, but PGE is ignored
3123                  * by HW if PASIDs are disabled (and thus still
3124                  * available).
3125                  *
3126                  * So disable PASIDs first and then mark the entry
3127                  * copied. This means that we don't copy PASID
3128                  * translations from the old kernel, but this is fine as
3129                  * faults there are not fatal.
3130                  */
3131                 context_clear_pasid_enable(&ce);
3132                 context_set_copied(&ce);
3133
3134                 new_ce[idx] = ce;
3135         }
3136
3137         tbl[tbl_idx + pos] = new_ce;
3138
3139         __iommu_flush_cache(iommu, new_ce, VTD_PAGE_SIZE);
3140
3141 out_unmap:
3142         memunmap(old_ce);
3143
3144 out:
3145         return ret;
3146 }
3147
3148 static int copy_translation_tables(struct intel_iommu *iommu)
3149 {
3150         struct context_entry **ctxt_tbls;
3151         struct root_entry *old_rt;
3152         phys_addr_t old_rt_phys;
3153         int ctxt_table_entries;
3154         unsigned long flags;
3155         u64 rtaddr_reg;
3156         int bus, ret;
3157         bool new_ext, ext;
3158
3159         rtaddr_reg = dmar_readq(iommu->reg + DMAR_RTADDR_REG);
3160         ext        = !!(rtaddr_reg & DMA_RTADDR_RTT);
3161         new_ext    = !!ecap_ecs(iommu->ecap);
3162
3163         /*
3164          * The RTT bit can only be changed when translation is disabled,
3165          * but disabling translation means to open a window for data
3166          * corruption. So bail out and don't copy anything if we would
3167          * have to change the bit.
3168          */
3169         if (new_ext != ext)
3170                 return -EINVAL;
3171
3172         old_rt_phys = rtaddr_reg & VTD_PAGE_MASK;
3173         if (!old_rt_phys)
3174                 return -EINVAL;
3175
3176         old_rt = memremap(old_rt_phys, PAGE_SIZE, MEMREMAP_WB);
3177         if (!old_rt)
3178                 return -ENOMEM;
3179
3180         /* This is too big for the stack - allocate it from slab */
3181         ctxt_table_entries = ext ? 512 : 256;
3182         ret = -ENOMEM;
3183         ctxt_tbls = kcalloc(ctxt_table_entries, sizeof(void *), GFP_KERNEL);
3184         if (!ctxt_tbls)
3185                 goto out_unmap;
3186
3187         for (bus = 0; bus < 256; bus++) {
3188                 ret = copy_context_table(iommu, &old_rt[bus],
3189                                          ctxt_tbls, bus, ext);
3190                 if (ret) {
3191                         pr_err("%s: Failed to copy context table for bus %d\n",
3192                                 iommu->name, bus);
3193                         continue;
3194                 }
3195         }
3196
3197         spin_lock_irqsave(&iommu->lock, flags);
3198
3199         /* Context tables are copied, now write them to the root_entry table */
3200         for (bus = 0; bus < 256; bus++) {
3201                 int idx = ext ? bus * 2 : bus;
3202                 u64 val;
3203
3204                 if (ctxt_tbls[idx]) {
3205                         val = virt_to_phys(ctxt_tbls[idx]) | 1;
3206                         iommu->root_entry[bus].lo = val;
3207                 }
3208
3209                 if (!ext || !ctxt_tbls[idx + 1])
3210                         continue;
3211
3212                 val = virt_to_phys(ctxt_tbls[idx + 1]) | 1;
3213                 iommu->root_entry[bus].hi = val;
3214         }
3215
3216         spin_unlock_irqrestore(&iommu->lock, flags);
3217
3218         kfree(ctxt_tbls);
3219
3220         __iommu_flush_cache(iommu, iommu->root_entry, PAGE_SIZE);
3221
3222         ret = 0;
3223
3224 out_unmap:
3225         memunmap(old_rt);
3226
3227         return ret;
3228 }
3229
3230 #ifdef CONFIG_INTEL_IOMMU_SVM
3231 static ioasid_t intel_vcmd_ioasid_alloc(ioasid_t min, ioasid_t max, void *data)
3232 {
3233         struct intel_iommu *iommu = data;
3234         ioasid_t ioasid;
3235
3236         if (!iommu)
3237                 return INVALID_IOASID;
3238         /*
3239          * VT-d virtual command interface always uses the full 20 bit
3240          * PASID range. Host can partition guest PASID range based on
3241          * policies but it is out of guest's control.
3242          */
3243         if (min < PASID_MIN || max > intel_pasid_max_id)
3244                 return INVALID_IOASID;
3245
3246         if (vcmd_alloc_pasid(iommu, &ioasid))
3247                 return INVALID_IOASID;
3248
3249         return ioasid;
3250 }
3251
3252 static void intel_vcmd_ioasid_free(ioasid_t ioasid, void *data)
3253 {
3254         struct intel_iommu *iommu = data;
3255
3256         if (!iommu)
3257                 return;
3258         /*
3259          * Sanity check the ioasid owner is done at upper layer, e.g. VFIO
3260          * We can only free the PASID when all the devices are unbound.
3261          */
3262         if (ioasid_find(NULL, ioasid, NULL)) {
3263                 pr_alert("Cannot free active IOASID %d\n", ioasid);
3264                 return;
3265         }
3266         vcmd_free_pasid(iommu, ioasid);
3267 }
3268
3269 static void register_pasid_allocator(struct intel_iommu *iommu)
3270 {
3271         /*
3272          * If we are running in the host, no need for custom allocator
3273          * in that PASIDs are allocated from the host system-wide.
3274          */
3275         if (!cap_caching_mode(iommu->cap))
3276                 return;
3277
3278         if (!sm_supported(iommu)) {
3279                 pr_warn("VT-d Scalable Mode not enabled, no PASID allocation\n");
3280                 return;
3281         }
3282
3283         /*
3284          * Register a custom PASID allocator if we are running in a guest,
3285          * guest PASID must be obtained via virtual command interface.
3286          * There can be multiple vIOMMUs in each guest but only one allocator
3287          * is active. All vIOMMU allocators will eventually be calling the same
3288          * host allocator.
3289          */
3290         if (!vccap_pasid(iommu->vccap))
3291                 return;
3292
3293         pr_info("Register custom PASID allocator\n");
3294         iommu->pasid_allocator.alloc = intel_vcmd_ioasid_alloc;
3295         iommu->pasid_allocator.free = intel_vcmd_ioasid_free;
3296         iommu->pasid_allocator.pdata = (void *)iommu;
3297         if (ioasid_register_allocator(&iommu->pasid_allocator)) {
3298                 pr_warn("Custom PASID allocator failed, scalable mode disabled\n");
3299                 /*
3300                  * Disable scalable mode on this IOMMU if there
3301                  * is no custom allocator. Mixing SM capable vIOMMU
3302                  * and non-SM vIOMMU are not supported.
3303                  */
3304                 intel_iommu_sm = 0;
3305         }
3306 }
3307 #endif
3308
3309 static int __init init_dmars(void)
3310 {
3311         struct dmar_drhd_unit *drhd;
3312         struct intel_iommu *iommu;
3313         int ret;
3314
3315         /*
3316          * for each drhd
3317          *    allocate root
3318          *    initialize and program root entry to not present
3319          * endfor
3320          */
3321         for_each_drhd_unit(drhd) {
3322                 /*
3323                  * lock not needed as this is only incremented in the single
3324                  * threaded kernel __init code path all other access are read
3325                  * only
3326                  */
3327                 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED) {
3328                         g_num_of_iommus++;
3329                         continue;
3330                 }
3331                 pr_err_once("Exceeded %d IOMMUs\n", DMAR_UNITS_SUPPORTED);
3332         }
3333
3334         /* Preallocate enough resources for IOMMU hot-addition */
3335         if (g_num_of_iommus < DMAR_UNITS_SUPPORTED)
3336                 g_num_of_iommus = DMAR_UNITS_SUPPORTED;
3337
3338         g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
3339                         GFP_KERNEL);
3340         if (!g_iommus) {
3341                 ret = -ENOMEM;
3342                 goto error;
3343         }
3344
3345         ret = intel_cap_audit(CAP_AUDIT_STATIC_DMAR, NULL);
3346         if (ret)
3347                 goto free_iommu;
3348
3349         for_each_iommu(iommu, drhd) {
3350                 if (drhd->ignored) {
3351                         iommu_disable_translation(iommu);
3352                         continue;
3353                 }
3354
3355                 /*
3356                  * Find the max pasid size of all IOMMU's in the system.
3357                  * We need to ensure the system pasid table is no bigger
3358                  * than the smallest supported.
3359                  */
3360                 if (pasid_supported(iommu)) {
3361                         u32 temp = 2 << ecap_pss(iommu->ecap);
3362
3363                         intel_pasid_max_id = min_t(u32, temp,
3364                                                    intel_pasid_max_id);
3365                 }
3366
3367                 g_iommus[iommu->seq_id] = iommu;
3368
3369                 intel_iommu_init_qi(iommu);
3370
3371                 ret = iommu_init_domains(iommu);
3372                 if (ret)
3373                         goto free_iommu;
3374
3375                 init_translation_status(iommu);
3376
3377                 if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
3378                         iommu_disable_translation(iommu);
3379                         clear_translation_pre_enabled(iommu);
3380                         pr_warn("Translation was enabled for %s but we are not in kdump mode\n",
3381                                 iommu->name);
3382                 }
3383
3384                 /*
3385                  * TBD:
3386                  * we could share the same root & context tables
3387                  * among all IOMMU's. Need to Split it later.
3388                  */
3389                 ret = iommu_alloc_root_entry(iommu);
3390                 if (ret)
3391                         goto free_iommu;
3392
3393                 if (translation_pre_enabled(iommu)) {
3394                         pr_info("Translation already enabled - trying to copy translation structures\n");
3395
3396                         ret = copy_translation_tables(iommu);
3397                         if (ret) {
3398                                 /*
3399                                  * We found the IOMMU with translation
3400                                  * enabled - but failed to copy over the
3401                                  * old root-entry table. Try to proceed
3402                                  * by disabling translation now and
3403                                  * allocating a clean root-entry table.
3404                                  * This might cause DMAR faults, but
3405                                  * probably the dump will still succeed.
3406                                  */
3407                                 pr_err("Failed to copy translation tables from previous kernel for %s\n",
3408                                        iommu->name);
3409                                 iommu_disable_translation(iommu);
3410                                 clear_translation_pre_enabled(iommu);
3411                         } else {
3412                                 pr_info("Copied translation tables from previous kernel for %s\n",
3413                                         iommu->name);
3414                         }
3415                 }
3416
3417                 if (!ecap_pass_through(iommu->ecap))
3418                         hw_pass_through = 0;
3419                 intel_svm_check(iommu);
3420         }
3421
3422         /*
3423          * Now that qi is enabled on all iommus, set the root entry and flush
3424          * caches. This is required on some Intel X58 chipsets, otherwise the
3425          * flush_context function will loop forever and the boot hangs.
3426          */
3427         for_each_active_iommu(iommu, drhd) {
3428                 iommu_flush_write_buffer(iommu);
3429 #ifdef CONFIG_INTEL_IOMMU_SVM
3430                 register_pasid_allocator(iommu);
3431 #endif
3432                 iommu_set_root_entry(iommu);
3433         }
3434
3435 #ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
3436         dmar_map_gfx = 0;
3437 #endif
3438
3439         if (!dmar_map_gfx)
3440                 iommu_identity_mapping |= IDENTMAP_GFX;
3441
3442         check_tylersburg_isoch();
3443
3444         ret = si_domain_init(hw_pass_through);
3445         if (ret)
3446                 goto free_iommu;
3447
3448         /*
3449          * for each drhd
3450          *   enable fault log
3451          *   global invalidate context cache
3452          *   global invalidate iotlb
3453          *   enable translation
3454          */
3455         for_each_iommu(iommu, drhd) {
3456                 if (drhd->ignored) {
3457                         /*
3458                          * we always have to disable PMRs or DMA may fail on
3459                          * this device
3460                          */
3461                         if (force_on)
3462                                 iommu_disable_protect_mem_regions(iommu);
3463                         continue;
3464                 }
3465
3466                 iommu_flush_write_buffer(iommu);
3467
3468 #ifdef CONFIG_INTEL_IOMMU_SVM
3469                 if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) {
3470                         /*
3471                          * Call dmar_alloc_hwirq() with dmar_global_lock held,
3472                          * could cause possible lock race condition.
3473                          */
3474                         up_write(&dmar_global_lock);
3475                         ret = intel_svm_enable_prq(iommu);
3476                         down_write(&dmar_global_lock);
3477                         if (ret)
3478                                 goto free_iommu;
3479                 }
3480 #endif
3481                 ret = dmar_set_interrupt(iommu);
3482                 if (ret)
3483                         goto free_iommu;
3484         }
3485
3486         return 0;
3487
3488 free_iommu:
3489         for_each_active_iommu(iommu, drhd) {
3490                 disable_dmar_iommu(iommu);
3491                 free_dmar_iommu(iommu);
3492         }
3493
3494         kfree(g_iommus);
3495
3496 error:
3497         return ret;
3498 }
3499
3500 static inline int iommu_domain_cache_init(void)
3501 {
3502         int ret = 0;
3503
3504         iommu_domain_cache = kmem_cache_create("iommu_domain",
3505                                          sizeof(struct dmar_domain),
3506                                          0,
3507                                          SLAB_HWCACHE_ALIGN,
3508
3509                                          NULL);
3510         if (!iommu_domain_cache) {
3511                 pr_err("Couldn't create iommu_domain cache\n");
3512                 ret = -ENOMEM;
3513         }
3514
3515         return ret;
3516 }
3517
3518 static inline int iommu_devinfo_cache_init(void)
3519 {
3520         int ret = 0;
3521
3522         iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3523                                          sizeof(struct device_domain_info),
3524                                          0,
3525                                          SLAB_HWCACHE_ALIGN,
3526                                          NULL);
3527         if (!iommu_devinfo_cache) {
3528                 pr_err("Couldn't create devinfo cache\n");
3529                 ret = -ENOMEM;
3530         }
3531
3532         return ret;
3533 }
3534
3535 static int __init iommu_init_mempool(void)
3536 {
3537         int ret;
3538         ret = iova_cache_get();
3539         if (ret)
3540                 return ret;
3541
3542         ret = iommu_domain_cache_init();
3543         if (ret)
3544                 goto domain_error;
3545
3546         ret = iommu_devinfo_cache_init();
3547         if (!ret)
3548                 return ret;
3549
3550         kmem_cache_destroy(iommu_domain_cache);
3551 domain_error:
3552         iova_cache_put();
3553
3554         return -ENOMEM;
3555 }
3556
3557 static void __init iommu_exit_mempool(void)
3558 {
3559         kmem_cache_destroy(iommu_devinfo_cache);
3560         kmem_cache_destroy(iommu_domain_cache);
3561         iova_cache_put();
3562 }
3563
3564 static void __init init_no_remapping_devices(void)
3565 {
3566         struct dmar_drhd_unit *drhd;
3567         struct device *dev;
3568         int i;
3569
3570         for_each_drhd_unit(drhd) {
3571                 if (!drhd->include_all) {
3572                         for_each_active_dev_scope(drhd->devices,
3573                                                   drhd->devices_cnt, i, dev)
3574                                 break;
3575                         /* ignore DMAR unit if no devices exist */
3576                         if (i == drhd->devices_cnt)
3577                                 drhd->ignored = 1;
3578                 }
3579         }
3580
3581         for_each_active_drhd_unit(drhd) {
3582                 if (drhd->include_all)
3583                         continue;
3584
3585                 for_each_active_dev_scope(drhd->devices,
3586                                           drhd->devices_cnt, i, dev)
3587                         if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
3588                                 break;
3589                 if (i < drhd->devices_cnt)
3590                         continue;
3591
3592                 /* This IOMMU has *only* gfx devices. Either bypass it or
3593                    set the gfx_mapped flag, as appropriate */
3594                 drhd->gfx_dedicated = 1;
3595                 if (!dmar_map_gfx)
3596                         drhd->ignored = 1;
3597         }
3598 }
3599
3600 #ifdef CONFIG_SUSPEND
3601 static int init_iommu_hw(void)
3602 {
3603         struct dmar_drhd_unit *drhd;
3604         struct intel_iommu *iommu = NULL;
3605
3606         for_each_active_iommu(iommu, drhd)
3607                 if (iommu->qi)
3608                         dmar_reenable_qi(iommu);
3609
3610         for_each_iommu(iommu, drhd) {
3611                 if (drhd->ignored) {
3612                         /*
3613                          * we always have to disable PMRs or DMA may fail on
3614                          * this device
3615                          */
3616                         if (force_on)
3617                                 iommu_disable_protect_mem_regions(iommu);
3618                         continue;
3619                 }
3620
3621                 iommu_flush_write_buffer(iommu);
3622                 iommu_set_root_entry(iommu);
3623                 iommu_enable_translation(iommu);
3624                 iommu_disable_protect_mem_regions(iommu);
3625         }
3626
3627         return 0;
3628 }
3629
3630 static void iommu_flush_all(void)
3631 {
3632         struct dmar_drhd_unit *drhd;
3633         struct intel_iommu *iommu;
3634
3635         for_each_active_iommu(iommu, drhd) {
3636                 iommu->flush.flush_context(iommu, 0, 0, 0,
3637                                            DMA_CCMD_GLOBAL_INVL);
3638                 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3639                                          DMA_TLB_GLOBAL_FLUSH);
3640         }
3641 }
3642
3643 static int iommu_suspend(void)
3644 {
3645         struct dmar_drhd_unit *drhd;
3646         struct intel_iommu *iommu = NULL;
3647         unsigned long flag;
3648
3649         for_each_active_iommu(iommu, drhd) {
3650                 iommu->iommu_state = kcalloc(MAX_SR_DMAR_REGS, sizeof(u32),
3651                                              GFP_KERNEL);
3652                 if (!iommu->iommu_state)
3653                         goto nomem;
3654         }
3655
3656         iommu_flush_all();
3657
3658         for_each_active_iommu(iommu, drhd) {
3659                 iommu_disable_translation(iommu);
3660
3661                 raw_spin_lock_irqsave(&iommu->register_lock, flag);
3662
3663                 iommu->iommu_state[SR_DMAR_FECTL_REG] =
3664                         readl(iommu->reg + DMAR_FECTL_REG);
3665                 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
3666                         readl(iommu->reg + DMAR_FEDATA_REG);
3667                 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
3668                         readl(iommu->reg + DMAR_FEADDR_REG);
3669                 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
3670                         readl(iommu->reg + DMAR_FEUADDR_REG);
3671
3672                 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
3673         }
3674         return 0;
3675
3676 nomem:
3677         for_each_active_iommu(iommu, drhd)
3678                 kfree(iommu->iommu_state);
3679
3680         return -ENOMEM;
3681 }
3682
3683 static void iommu_resume(void)
3684 {
3685         struct dmar_drhd_unit *drhd;
3686         struct intel_iommu *iommu = NULL;
3687         unsigned long flag;
3688
3689         if (init_iommu_hw()) {
3690                 if (force_on)
3691                         panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
3692                 else
3693                         WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
3694                 return;
3695         }
3696
3697         for_each_active_iommu(iommu, drhd) {
3698
3699                 raw_spin_lock_irqsave(&iommu->register_lock, flag);
3700
3701                 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
3702                         iommu->reg + DMAR_FECTL_REG);
3703                 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
3704                         iommu->reg + DMAR_FEDATA_REG);
3705                 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
3706                         iommu->reg + DMAR_FEADDR_REG);
3707                 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
3708                         iommu->reg + DMAR_FEUADDR_REG);
3709
3710                 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
3711         }
3712
3713         for_each_active_iommu(iommu, drhd)
3714                 kfree(iommu->iommu_state);
3715 }
3716
3717 static struct syscore_ops iommu_syscore_ops = {
3718         .resume         = iommu_resume,
3719         .suspend        = iommu_suspend,
3720 };
3721
3722 static void __init init_iommu_pm_ops(void)
3723 {
3724         register_syscore_ops(&iommu_syscore_ops);
3725 }
3726
3727 #else
3728 static inline void init_iommu_pm_ops(void) {}
3729 #endif  /* CONFIG_PM */
3730
3731 static int rmrr_sanity_check(struct acpi_dmar_reserved_memory *rmrr)
3732 {
3733         if (!IS_ALIGNED(rmrr->base_address, PAGE_SIZE) ||
3734             !IS_ALIGNED(rmrr->end_address + 1, PAGE_SIZE) ||
3735             rmrr->end_address <= rmrr->base_address ||
3736             arch_rmrr_sanity_check(rmrr))
3737                 return -EINVAL;
3738
3739         return 0;
3740 }
3741
3742 int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg)
3743 {
3744         struct acpi_dmar_reserved_memory *rmrr;
3745         struct dmar_rmrr_unit *rmrru;
3746
3747         rmrr = (struct acpi_dmar_reserved_memory *)header;
3748         if (rmrr_sanity_check(rmrr)) {
3749                 pr_warn(FW_BUG
3750                            "Your BIOS is broken; bad RMRR [%#018Lx-%#018Lx]\n"
3751                            "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
3752                            rmrr->base_address, rmrr->end_address,
3753                            dmi_get_system_info(DMI_BIOS_VENDOR),
3754                            dmi_get_system_info(DMI_BIOS_VERSION),
3755                            dmi_get_system_info(DMI_PRODUCT_VERSION));
3756                 add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
3757         }
3758
3759         rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
3760         if (!rmrru)
3761                 goto out;
3762
3763         rmrru->hdr = header;
3764
3765         rmrru->base_address = rmrr->base_address;
3766         rmrru->end_address = rmrr->end_address;
3767
3768         rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
3769                                 ((void *)rmrr) + rmrr->header.length,
3770                                 &rmrru->devices_cnt);
3771         if (rmrru->devices_cnt && rmrru->devices == NULL)
3772                 goto free_rmrru;
3773
3774         list_add(&rmrru->list, &dmar_rmrr_units);
3775
3776         return 0;
3777 free_rmrru:
3778         kfree(rmrru);
3779 out:
3780         return -ENOMEM;
3781 }
3782
3783 static struct dmar_atsr_unit *dmar_find_atsr(struct acpi_dmar_atsr *atsr)
3784 {
3785         struct dmar_atsr_unit *atsru;
3786         struct acpi_dmar_atsr *tmp;
3787
3788         list_for_each_entry_rcu(atsru, &dmar_atsr_units, list,
3789                                 dmar_rcu_check()) {
3790                 tmp = (struct acpi_dmar_atsr *)atsru->hdr;
3791                 if (atsr->segment != tmp->segment)
3792                         continue;
3793                 if (atsr->header.length != tmp->header.length)
3794                         continue;
3795                 if (memcmp(atsr, tmp, atsr->header.length) == 0)
3796                         return atsru;
3797         }
3798
3799         return NULL;
3800 }
3801
3802 int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg)
3803 {
3804         struct acpi_dmar_atsr *atsr;
3805         struct dmar_atsr_unit *atsru;
3806
3807         if (system_state >= SYSTEM_RUNNING && !intel_iommu_enabled)
3808                 return 0;
3809
3810         atsr = container_of(hdr, struct acpi_dmar_atsr, header);
3811         atsru = dmar_find_atsr(atsr);
3812         if (atsru)
3813                 return 0;
3814
3815         atsru = kzalloc(sizeof(*atsru) + hdr->length, GFP_KERNEL);
3816         if (!atsru)
3817                 return -ENOMEM;
3818
3819         /*
3820          * If memory is allocated from slab by ACPI _DSM method, we need to
3821          * copy the memory content because the memory buffer will be freed
3822          * on return.
3823          */
3824         atsru->hdr = (void *)(atsru + 1);
3825         memcpy(atsru->hdr, hdr, hdr->length);
3826         atsru->include_all = atsr->flags & 0x1;
3827         if (!atsru->include_all) {
3828                 atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
3829                                 (void *)atsr + atsr->header.length,
3830                                 &atsru->devices_cnt);
3831                 if (atsru->devices_cnt && atsru->devices == NULL) {
3832                         kfree(atsru);
3833                         return -ENOMEM;
3834                 }
3835         }
3836
3837         list_add_rcu(&atsru->list, &dmar_atsr_units);
3838
3839         return 0;
3840 }
3841
3842 static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
3843 {
3844         dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
3845         kfree(atsru);
3846 }
3847
3848 int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg)
3849 {
3850         struct acpi_dmar_atsr *atsr;
3851         struct dmar_atsr_unit *atsru;
3852
3853         atsr = container_of(hdr, struct acpi_dmar_atsr, header);
3854         atsru = dmar_find_atsr(atsr);
3855         if (atsru) {
3856                 list_del_rcu(&atsru->list);
3857                 synchronize_rcu();
3858                 intel_iommu_free_atsr(atsru);
3859         }
3860
3861         return 0;
3862 }
3863
3864 int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg)
3865 {
3866         int i;
3867         struct device *dev;
3868         struct acpi_dmar_atsr *atsr;
3869         struct dmar_atsr_unit *atsru;
3870
3871         atsr = container_of(hdr, struct acpi_dmar_atsr, header);
3872         atsru = dmar_find_atsr(atsr);
3873         if (!atsru)
3874                 return 0;
3875
3876         if (!atsru->include_all && atsru->devices && atsru->devices_cnt) {
3877                 for_each_active_dev_scope(atsru->devices, atsru->devices_cnt,
3878                                           i, dev)
3879                         return -EBUSY;
3880         }
3881
3882         return 0;
3883 }
3884
3885 static struct dmar_satc_unit *dmar_find_satc(struct acpi_dmar_satc *satc)
3886 {
3887         struct dmar_satc_unit *satcu;
3888         struct acpi_dmar_satc *tmp;
3889
3890         list_for_each_entry_rcu(satcu, &dmar_satc_units, list,
3891                                 dmar_rcu_check()) {
3892                 tmp = (struct acpi_dmar_satc *)satcu->hdr;
3893                 if (satc->segment != tmp->segment)
3894                         continue;
3895                 if (satc->header.length != tmp->header.length)
3896                         continue;
3897                 if (memcmp(satc, tmp, satc->header.length) == 0)
3898                         return satcu;
3899         }
3900
3901         return NULL;
3902 }
3903
3904 int dmar_parse_one_satc(struct acpi_dmar_header *hdr, void *arg)
3905 {
3906         struct acpi_dmar_satc *satc;
3907         struct dmar_satc_unit *satcu;
3908
3909         if (system_state >= SYSTEM_RUNNING && !intel_iommu_enabled)
3910                 return 0;
3911
3912         satc = container_of(hdr, struct acpi_dmar_satc, header);
3913         satcu = dmar_find_satc(satc);
3914         if (satcu)
3915                 return 0;
3916
3917         satcu = kzalloc(sizeof(*satcu) + hdr->length, GFP_KERNEL);
3918         if (!satcu)
3919                 return -ENOMEM;
3920
3921         satcu->hdr = (void *)(satcu + 1);
3922         memcpy(satcu->hdr, hdr, hdr->length);
3923         satcu->atc_required = satc->flags & 0x1;
3924         satcu->devices = dmar_alloc_dev_scope((void *)(satc + 1),
3925                                               (void *)satc + satc->header.length,
3926                                               &satcu->devices_cnt);
3927         if (satcu->devices_cnt && !satcu->devices) {
3928                 kfree(satcu);
3929                 return -ENOMEM;
3930         }
3931         list_add_rcu(&satcu->list, &dmar_satc_units);
3932
3933         return 0;
3934 }
3935
3936 static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
3937 {
3938         int sp, ret;
3939         struct intel_iommu *iommu = dmaru->iommu;
3940
3941         if (g_iommus[iommu->seq_id])
3942                 return 0;
3943
3944         ret = intel_cap_audit(CAP_AUDIT_HOTPLUG_DMAR, iommu);
3945         if (ret)
3946                 goto out;
3947
3948         if (hw_pass_through && !ecap_pass_through(iommu->ecap)) {
3949                 pr_warn("%s: Doesn't support hardware pass through.\n",
3950                         iommu->name);
3951                 return -ENXIO;
3952         }
3953         if (!ecap_sc_support(iommu->ecap) &&
3954             domain_update_iommu_snooping(iommu)) {
3955                 pr_warn("%s: Doesn't support snooping.\n",
3956                         iommu->name);
3957                 return -ENXIO;
3958         }
3959         sp = domain_update_iommu_superpage(NULL, iommu) - 1;
3960         if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) {
3961                 pr_warn("%s: Doesn't support large page.\n",
3962                         iommu->name);
3963                 return -ENXIO;
3964         }
3965
3966         /*
3967          * Disable translation if already enabled prior to OS handover.
3968          */
3969         if (iommu->gcmd & DMA_GCMD_TE)
3970                 iommu_disable_translation(iommu);
3971
3972         g_iommus[iommu->seq_id] = iommu;
3973         ret = iommu_init_domains(iommu);
3974         if (ret == 0)
3975                 ret = iommu_alloc_root_entry(iommu);
3976         if (ret)
3977                 goto out;
3978
3979         intel_svm_check(iommu);
3980
3981         if (dmaru->ignored) {
3982                 /*
3983                  * we always have to disable PMRs or DMA may fail on this device
3984                  */
3985                 if (force_on)
3986                         iommu_disable_protect_mem_regions(iommu);
3987                 return 0;
3988         }
3989
3990         intel_iommu_init_qi(iommu);
3991         iommu_flush_write_buffer(iommu);
3992
3993 #ifdef CONFIG_INTEL_IOMMU_SVM
3994         if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) {
3995                 ret = intel_svm_enable_prq(iommu);
3996                 if (ret)
3997                         goto disable_iommu;
3998         }
3999 #endif
4000         ret = dmar_set_interrupt(iommu);
4001         if (ret)
4002                 goto disable_iommu;
4003
4004         iommu_set_root_entry(iommu);
4005         iommu_enable_translation(iommu);
4006
4007         iommu_disable_protect_mem_regions(iommu);
4008         return 0;
4009
4010 disable_iommu:
4011         disable_dmar_iommu(iommu);
4012 out:
4013         free_dmar_iommu(iommu);
4014         return ret;
4015 }
4016
4017 int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
4018 {
4019         int ret = 0;
4020         struct intel_iommu *iommu = dmaru->iommu;
4021
4022         if (!intel_iommu_enabled)
4023                 return 0;
4024         if (iommu == NULL)
4025                 return -EINVAL;
4026
4027         if (insert) {
4028                 ret = intel_iommu_add(dmaru);
4029         } else {
4030                 disable_dmar_iommu(iommu);
4031                 free_dmar_iommu(iommu);
4032         }
4033
4034         return ret;
4035 }
4036
4037 static void intel_iommu_free_dmars(void)
4038 {
4039         struct dmar_rmrr_unit *rmrru, *rmrr_n;
4040         struct dmar_atsr_unit *atsru, *atsr_n;
4041         struct dmar_satc_unit *satcu, *satc_n;
4042
4043         list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
4044                 list_del(&rmrru->list);
4045                 dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
4046                 kfree(rmrru);
4047         }
4048
4049         list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
4050                 list_del(&atsru->list);
4051                 intel_iommu_free_atsr(atsru);
4052         }
4053         list_for_each_entry_safe(satcu, satc_n, &dmar_satc_units, list) {
4054                 list_del(&satcu->list);
4055                 dmar_free_dev_scope(&satcu->devices, &satcu->devices_cnt);
4056                 kfree(satcu);
4057         }
4058 }
4059
4060 int dmar_find_matched_atsr_unit(struct pci_dev *dev)
4061 {
4062         int i, ret = 1;
4063         struct pci_bus *bus;
4064         struct pci_dev *bridge = NULL;
4065         struct device *tmp;
4066         struct acpi_dmar_atsr *atsr;
4067         struct dmar_atsr_unit *atsru;
4068
4069         dev = pci_physfn(dev);
4070         for (bus = dev->bus; bus; bus = bus->parent) {
4071                 bridge = bus->self;
4072                 /* If it's an integrated device, allow ATS */
4073                 if (!bridge)
4074                         return 1;
4075                 /* Connected via non-PCIe: no ATS */
4076                 if (!pci_is_pcie(bridge) ||
4077                     pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
4078                         return 0;
4079                 /* If we found the root port, look it up in the ATSR */
4080                 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
4081                         break;
4082         }
4083
4084         rcu_read_lock();
4085         list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4086                 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4087                 if (atsr->segment != pci_domain_nr(dev->bus))
4088                         continue;
4089
4090                 for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
4091                         if (tmp == &bridge->dev)
4092                                 goto out;
4093
4094                 if (atsru->include_all)
4095                         goto out;
4096         }
4097         ret = 0;
4098 out:
4099         rcu_read_unlock();
4100
4101         return ret;
4102 }
4103
4104 int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
4105 {
4106         int ret;
4107         struct dmar_rmrr_unit *rmrru;
4108         struct dmar_atsr_unit *atsru;
4109         struct dmar_satc_unit *satcu;
4110         struct acpi_dmar_atsr *atsr;
4111         struct acpi_dmar_reserved_memory *rmrr;
4112         struct acpi_dmar_satc *satc;
4113
4114         if (!intel_iommu_enabled && system_state >= SYSTEM_RUNNING)
4115                 return 0;
4116
4117         list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
4118                 rmrr = container_of(rmrru->hdr,
4119                                     struct acpi_dmar_reserved_memory, header);
4120                 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4121                         ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
4122                                 ((void *)rmrr) + rmrr->header.length,
4123                                 rmrr->segment, rmrru->devices,
4124                                 rmrru->devices_cnt);
4125                         if (ret < 0)
4126                                 return ret;
4127                 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
4128                         dmar_remove_dev_scope(info, rmrr->segment,
4129                                 rmrru->devices, rmrru->devices_cnt);
4130                 }
4131         }
4132
4133         list_for_each_entry(atsru, &dmar_atsr_units, list) {
4134                 if (atsru->include_all)
4135                         continue;
4136
4137                 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4138                 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4139                         ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
4140                                         (void *)atsr + atsr->header.length,
4141                                         atsr->segment, atsru->devices,
4142                                         atsru->devices_cnt);
4143                         if (ret > 0)
4144                                 break;
4145                         else if (ret < 0)
4146                                 return ret;
4147                 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
4148                         if (dmar_remove_dev_scope(info, atsr->segment,
4149                                         atsru->devices, atsru->devices_cnt))
4150                                 break;
4151                 }
4152         }
4153         list_for_each_entry(satcu, &dmar_satc_units, list) {
4154                 satc = container_of(satcu->hdr, struct acpi_dmar_satc, header);
4155                 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4156                         ret = dmar_insert_dev_scope(info, (void *)(satc + 1),
4157                                         (void *)satc + satc->header.length,
4158                                         satc->segment, satcu->devices,
4159                                         satcu->devices_cnt);
4160                         if (ret > 0)
4161                                 break;
4162                         else if (ret < 0)
4163                                 return ret;
4164                 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
4165                         if (dmar_remove_dev_scope(info, satc->segment,
4166                                         satcu->devices, satcu->devices_cnt))
4167                                 break;
4168                 }
4169         }
4170
4171         return 0;
4172 }
4173
4174 static int intel_iommu_memory_notifier(struct notifier_block *nb,
4175                                        unsigned long val, void *v)
4176 {
4177         struct memory_notify *mhp = v;
4178         unsigned long start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
4179         unsigned long last_vpfn = mm_to_dma_pfn(mhp->start_pfn +
4180                         mhp->nr_pages - 1);
4181
4182         switch (val) {
4183         case MEM_GOING_ONLINE:
4184                 if (iommu_domain_identity_map(si_domain,
4185                                               start_vpfn, last_vpfn)) {
4186                         pr_warn("Failed to build identity map for [%lx-%lx]\n",
4187                                 start_vpfn, last_vpfn);
4188                         return NOTIFY_BAD;
4189                 }
4190                 break;
4191
4192         case MEM_OFFLINE:
4193         case MEM_CANCEL_ONLINE:
4194                 {
4195                         struct dmar_drhd_unit *drhd;
4196                         struct intel_iommu *iommu;
4197                         struct page *freelist;
4198
4199                         freelist = domain_unmap(si_domain,
4200                                                 start_vpfn, last_vpfn,
4201                                                 NULL);
4202
4203                         rcu_read_lock();
4204                         for_each_active_iommu(iommu, drhd)
4205                                 iommu_flush_iotlb_psi(iommu, si_domain,
4206                                         start_vpfn, mhp->nr_pages,
4207                                         !freelist, 0);
4208                         rcu_read_unlock();
4209                         dma_free_pagelist(freelist);
4210                 }
4211                 break;
4212         }
4213
4214         return NOTIFY_OK;
4215 }
4216
4217 static struct notifier_block intel_iommu_memory_nb = {
4218         .notifier_call = intel_iommu_memory_notifier,
4219         .priority = 0
4220 };
4221
4222 static void intel_disable_iommus(void)
4223 {
4224         struct intel_iommu *iommu = NULL;
4225         struct dmar_drhd_unit *drhd;
4226
4227         for_each_iommu(iommu, drhd)
4228                 iommu_disable_translation(iommu);
4229 }
4230
4231 void intel_iommu_shutdown(void)
4232 {
4233         struct dmar_drhd_unit *drhd;
4234         struct intel_iommu *iommu = NULL;
4235
4236         if (no_iommu || dmar_disabled)
4237                 return;
4238
4239         down_write(&dmar_global_lock);
4240
4241         /* Disable PMRs explicitly here. */
4242         for_each_iommu(iommu, drhd)
4243                 iommu_disable_protect_mem_regions(iommu);
4244
4245         /* Make sure the IOMMUs are switched off */
4246         intel_disable_iommus();
4247
4248         up_write(&dmar_global_lock);
4249 }
4250
4251 static inline struct intel_iommu *dev_to_intel_iommu(struct device *dev)
4252 {
4253         struct iommu_device *iommu_dev = dev_to_iommu_device(dev);
4254
4255         return container_of(iommu_dev, struct intel_iommu, iommu);
4256 }
4257
4258 static ssize_t version_show(struct device *dev,
4259                             struct device_attribute *attr, char *buf)
4260 {
4261         struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4262         u32 ver = readl(iommu->reg + DMAR_VER_REG);
4263         return sprintf(buf, "%d:%d\n",
4264                        DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
4265 }
4266 static DEVICE_ATTR_RO(version);
4267
4268 static ssize_t address_show(struct device *dev,
4269                             struct device_attribute *attr, char *buf)
4270 {
4271         struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4272         return sprintf(buf, "%llx\n", iommu->reg_phys);
4273 }
4274 static DEVICE_ATTR_RO(address);
4275
4276 static ssize_t cap_show(struct device *dev,
4277                         struct device_attribute *attr, char *buf)
4278 {
4279         struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4280         return sprintf(buf, "%llx\n", iommu->cap);
4281 }
4282 static DEVICE_ATTR_RO(cap);
4283
4284 static ssize_t ecap_show(struct device *dev,
4285                          struct device_attribute *attr, char *buf)
4286 {
4287         struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4288         return sprintf(buf, "%llx\n", iommu->ecap);
4289 }
4290 static DEVICE_ATTR_RO(ecap);
4291
4292 static ssize_t domains_supported_show(struct device *dev,
4293                                       struct device_attribute *attr, char *buf)
4294 {
4295         struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4296         return sprintf(buf, "%ld\n", cap_ndoms(iommu->cap));
4297 }
4298 static DEVICE_ATTR_RO(domains_supported);
4299
4300 static ssize_t domains_used_show(struct device *dev,
4301                                  struct device_attribute *attr, char *buf)
4302 {
4303         struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4304         return sprintf(buf, "%d\n", bitmap_weight(iommu->domain_ids,
4305                                                   cap_ndoms(iommu->cap)));
4306 }
4307 static DEVICE_ATTR_RO(domains_used);
4308
4309 static struct attribute *intel_iommu_attrs[] = {
4310         &dev_attr_version.attr,
4311         &dev_attr_address.attr,
4312         &dev_attr_cap.attr,
4313         &dev_attr_ecap.attr,
4314         &dev_attr_domains_supported.attr,
4315         &dev_attr_domains_used.attr,
4316         NULL,
4317 };
4318
4319 static struct attribute_group intel_iommu_group = {
4320         .name = "intel-iommu",
4321         .attrs = intel_iommu_attrs,
4322 };
4323
4324 const struct attribute_group *intel_iommu_groups[] = {
4325         &intel_iommu_group,
4326         NULL,
4327 };
4328
4329 static inline bool has_external_pci(void)
4330 {
4331         struct pci_dev *pdev = NULL;
4332
4333         for_each_pci_dev(pdev)
4334                 if (pdev->external_facing)
4335                         return true;
4336
4337         return false;
4338 }
4339
4340 static int __init platform_optin_force_iommu(void)
4341 {
4342         if (!dmar_platform_optin() || no_platform_optin || !has_external_pci())
4343                 return 0;
4344
4345         if (no_iommu || dmar_disabled)
4346                 pr_info("Intel-IOMMU force enabled due to platform opt in\n");
4347
4348         /*
4349          * If Intel-IOMMU is disabled by default, we will apply identity
4350          * map for all devices except those marked as being untrusted.
4351          */
4352         if (dmar_disabled)
4353                 iommu_set_default_passthrough(false);
4354
4355         dmar_disabled = 0;
4356         no_iommu = 0;
4357
4358         return 1;
4359 }
4360
4361 static int __init probe_acpi_namespace_devices(void)
4362 {
4363         struct dmar_drhd_unit *drhd;
4364         /* To avoid a -Wunused-but-set-variable warning. */
4365         struct intel_iommu *iommu __maybe_unused;
4366         struct device *dev;
4367         int i, ret = 0;
4368
4369         for_each_active_iommu(iommu, drhd) {
4370                 for_each_active_dev_scope(drhd->devices,
4371                                           drhd->devices_cnt, i, dev) {
4372                         struct acpi_device_physical_node *pn;
4373                         struct iommu_group *group;
4374                         struct acpi_device *adev;
4375
4376                         if (dev->bus != &acpi_bus_type)
4377                                 continue;
4378
4379                         adev = to_acpi_device(dev);
4380                         mutex_lock(&adev->physical_node_lock);
4381                         list_for_each_entry(pn,
4382                                             &adev->physical_node_list, node) {
4383                                 group = iommu_group_get(pn->dev);
4384                                 if (group) {
4385                                         iommu_group_put(group);
4386                                         continue;
4387                                 }
4388
4389                                 pn->dev->bus->iommu_ops = &intel_iommu_ops;
4390                                 ret = iommu_probe_device(pn->dev);
4391                                 if (ret)
4392                                         break;
4393                         }
4394                         mutex_unlock(&adev->physical_node_lock);
4395
4396                         if (ret)
4397                                 return ret;
4398                 }
4399         }
4400
4401         return 0;
4402 }
4403
4404 int __init intel_iommu_init(void)
4405 {
4406         int ret = -ENODEV;
4407         struct dmar_drhd_unit *drhd;
4408         struct intel_iommu *iommu;
4409
4410         /*
4411          * Intel IOMMU is required for a TXT/tboot launch or platform
4412          * opt in, so enforce that.
4413          */
4414         force_on = (!intel_iommu_tboot_noforce && tboot_force_iommu()) ||
4415                     platform_optin_force_iommu();
4416
4417         if (iommu_init_mempool()) {
4418                 if (force_on)
4419                         panic("tboot: Failed to initialize iommu memory\n");
4420                 return -ENOMEM;
4421         }
4422
4423         down_write(&dmar_global_lock);
4424         if (dmar_table_init()) {
4425                 if (force_on)
4426                         panic("tboot: Failed to initialize DMAR table\n");
4427                 goto out_free_dmar;
4428         }
4429
4430         if (dmar_dev_scope_init() < 0) {
4431                 if (force_on)
4432                         panic("tboot: Failed to initialize DMAR device scope\n");
4433                 goto out_free_dmar;
4434         }
4435
4436         up_write(&dmar_global_lock);
4437
4438         /*
4439          * The bus notifier takes the dmar_global_lock, so lockdep will
4440          * complain later when we register it under the lock.
4441          */
4442         dmar_register_bus_notifier();
4443
4444         down_write(&dmar_global_lock);
4445
4446         if (!no_iommu)
4447                 intel_iommu_debugfs_init();
4448
4449         if (no_iommu || dmar_disabled) {
4450                 /*
4451                  * We exit the function here to ensure IOMMU's remapping and
4452                  * mempool aren't setup, which means that the IOMMU's PMRs
4453                  * won't be disabled via the call to init_dmars(). So disable
4454                  * it explicitly here. The PMRs were setup by tboot prior to
4455                  * calling SENTER, but the kernel is expected to reset/tear
4456                  * down the PMRs.
4457                  */
4458                 if (intel_iommu_tboot_noforce) {
4459                         for_each_iommu(iommu, drhd)
4460                                 iommu_disable_protect_mem_regions(iommu);
4461                 }
4462
4463                 /*
4464                  * Make sure the IOMMUs are switched off, even when we
4465                  * boot into a kexec kernel and the previous kernel left
4466                  * them enabled
4467                  */
4468                 intel_disable_iommus();
4469                 goto out_free_dmar;
4470         }
4471
4472         if (list_empty(&dmar_rmrr_units))
4473                 pr_info("No RMRR found\n");
4474
4475         if (list_empty(&dmar_atsr_units))
4476                 pr_info("No ATSR found\n");
4477
4478         if (list_empty(&dmar_satc_units))
4479                 pr_info("No SATC found\n");
4480
4481         if (dmar_map_gfx)
4482                 intel_iommu_gfx_mapped = 1;
4483
4484         init_no_remapping_devices();
4485
4486         ret = init_dmars();
4487         if (ret) {
4488                 if (force_on)
4489                         panic("tboot: Failed to initialize DMARs\n");
4490                 pr_err("Initialization failed\n");
4491                 goto out_free_dmar;
4492         }
4493         up_write(&dmar_global_lock);
4494
4495         init_iommu_pm_ops();
4496
4497         down_read(&dmar_global_lock);
4498         for_each_active_iommu(iommu, drhd) {
4499                 /*
4500                  * The flush queue implementation does not perform
4501                  * page-selective invalidations that are required for efficient
4502                  * TLB flushes in virtual environments.  The benefit of batching
4503                  * is likely to be much lower than the overhead of synchronizing
4504                  * the virtual and physical IOMMU page-tables.
4505                  */
4506                 if (cap_caching_mode(iommu->cap)) {
4507                         pr_info_once("IOMMU batching disallowed due to virtualization\n");
4508                         iommu_set_dma_strict();
4509                 }
4510                 iommu_device_sysfs_add(&iommu->iommu, NULL,
4511                                        intel_iommu_groups,
4512                                        "%s", iommu->name);
4513                 iommu_device_register(&iommu->iommu, &intel_iommu_ops, NULL);
4514         }
4515         up_read(&dmar_global_lock);
4516
4517         bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
4518         if (si_domain && !hw_pass_through)
4519                 register_memory_notifier(&intel_iommu_memory_nb);
4520
4521         down_read(&dmar_global_lock);
4522         if (probe_acpi_namespace_devices())
4523                 pr_warn("ACPI name space devices didn't probe correctly\n");
4524
4525         /* Finally, we enable the DMA remapping hardware. */
4526         for_each_iommu(iommu, drhd) {
4527                 if (!drhd->ignored && !translation_pre_enabled(iommu))
4528                         iommu_enable_translation(iommu);
4529
4530                 iommu_disable_protect_mem_regions(iommu);
4531         }
4532         up_read(&dmar_global_lock);
4533
4534         pr_info("Intel(R) Virtualization Technology for Directed I/O\n");
4535
4536         intel_iommu_enabled = 1;
4537
4538         return 0;
4539
4540 out_free_dmar:
4541         intel_iommu_free_dmars();
4542         up_write(&dmar_global_lock);
4543         iommu_exit_mempool();
4544         return ret;
4545 }
4546
4547 static int domain_context_clear_one_cb(struct pci_dev *pdev, u16 alias, void *opaque)
4548 {
4549         struct device_domain_info *info = opaque;
4550
4551         domain_context_clear_one(info, PCI_BUS_NUM(alias), alias & 0xff);
4552         return 0;
4553 }
4554
4555 /*
4556  * NB - intel-iommu lacks any sort of reference counting for the users of
4557  * dependent devices.  If multiple endpoints have intersecting dependent
4558  * devices, unbinding the driver from any one of them will possibly leave
4559  * the others unable to operate.
4560  */
4561 static void domain_context_clear(struct device_domain_info *info)
4562 {
4563         if (!info->iommu || !info->dev || !dev_is_pci(info->dev))
4564                 return;
4565
4566         pci_for_each_dma_alias(to_pci_dev(info->dev),
4567                                &domain_context_clear_one_cb, info);
4568 }
4569
4570 static void __dmar_remove_one_dev_info(struct device_domain_info *info)
4571 {
4572         struct dmar_domain *domain;
4573         struct intel_iommu *iommu;
4574         unsigned long flags;
4575
4576         assert_spin_locked(&device_domain_lock);
4577
4578         if (WARN_ON(!info))
4579                 return;
4580
4581         iommu = info->iommu;
4582         domain = info->domain;
4583
4584         if (info->dev && !dev_is_real_dma_subdevice(info->dev)) {
4585                 if (dev_is_pci(info->dev) && sm_supported(iommu))
4586                         intel_pasid_tear_down_entry(iommu, info->dev,
4587                                         PASID_RID2PASID, false);
4588
4589                 iommu_disable_dev_iotlb(info);
4590                 domain_context_clear(info);
4591                 intel_pasid_free_table(info->dev);
4592         }
4593
4594         unlink_domain_info(info);
4595
4596         spin_lock_irqsave(&iommu->lock, flags);
4597         domain_detach_iommu(domain, iommu);
4598         spin_unlock_irqrestore(&iommu->lock, flags);
4599
4600         free_devinfo_mem(info);
4601 }
4602
4603 static void dmar_remove_one_dev_info(struct device *dev)
4604 {
4605         struct device_domain_info *info;
4606         unsigned long flags;
4607
4608         spin_lock_irqsave(&device_domain_lock, flags);
4609         info = get_domain_info(dev);
4610         if (info)
4611                 __dmar_remove_one_dev_info(info);
4612         spin_unlock_irqrestore(&device_domain_lock, flags);
4613 }
4614
4615 static int md_domain_init(struct dmar_domain *domain, int guest_width)
4616 {
4617         int adjust_width;
4618
4619         /* calculate AGAW */
4620         domain->gaw = guest_width;
4621         adjust_width = guestwidth_to_adjustwidth(guest_width);
4622         domain->agaw = width_to_agaw(adjust_width);
4623
4624         domain->iommu_coherency = false;
4625         domain->iommu_snooping = false;
4626         domain->iommu_superpage = 0;
4627         domain->max_addr = 0;
4628
4629         /* always allocate the top pgd */
4630         domain->pgd = alloc_pgtable_page(domain->nid);
4631         if (!domain->pgd)
4632                 return -ENOMEM;
4633         domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
4634         return 0;
4635 }
4636
4637 static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
4638 {
4639         struct dmar_domain *dmar_domain;
4640         struct iommu_domain *domain;
4641
4642         switch (type) {
4643         case IOMMU_DOMAIN_DMA:
4644         case IOMMU_DOMAIN_DMA_FQ:
4645         case IOMMU_DOMAIN_UNMANAGED:
4646                 dmar_domain = alloc_domain(type);
4647                 if (!dmar_domain) {
4648                         pr_err("Can't allocate dmar_domain\n");
4649                         return NULL;
4650                 }
4651                 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
4652                         pr_err("Domain initialization failed\n");
4653                         domain_exit(dmar_domain);
4654                         return NULL;
4655                 }
4656
4657                 domain = &dmar_domain->domain;
4658                 domain->geometry.aperture_start = 0;
4659                 domain->geometry.aperture_end   =
4660                                 __DOMAIN_MAX_ADDR(dmar_domain->gaw);
4661                 domain->geometry.force_aperture = true;
4662
4663                 return domain;
4664         case IOMMU_DOMAIN_IDENTITY:
4665                 return &si_domain->domain;
4666         default:
4667                 return NULL;
4668         }
4669
4670         return NULL;
4671 }
4672
4673 static void intel_iommu_domain_free(struct iommu_domain *domain)
4674 {
4675         if (domain != &si_domain->domain)
4676                 domain_exit(to_dmar_domain(domain));
4677 }
4678
4679 /*
4680  * Check whether a @domain could be attached to the @dev through the
4681  * aux-domain attach/detach APIs.
4682  */
4683 static inline bool
4684 is_aux_domain(struct device *dev, struct iommu_domain *domain)
4685 {
4686         struct device_domain_info *info = get_domain_info(dev);
4687
4688         return info && info->auxd_enabled &&
4689                         domain->type == IOMMU_DOMAIN_UNMANAGED;
4690 }
4691
4692 static inline struct subdev_domain_info *
4693 lookup_subdev_info(struct dmar_domain *domain, struct device *dev)
4694 {
4695         struct subdev_domain_info *sinfo;
4696
4697         if (!list_empty(&domain->subdevices)) {
4698                 list_for_each_entry(sinfo, &domain->subdevices, link_domain) {
4699                         if (sinfo->pdev == dev)
4700                                 return sinfo;
4701                 }
4702         }
4703
4704         return NULL;
4705 }
4706
4707 static int auxiliary_link_device(struct dmar_domain *domain,
4708                                  struct device *dev)
4709 {
4710         struct device_domain_info *info = get_domain_info(dev);
4711         struct subdev_domain_info *sinfo = lookup_subdev_info(domain, dev);
4712
4713         assert_spin_locked(&device_domain_lock);
4714         if (WARN_ON(!info))
4715                 return -EINVAL;
4716
4717         if (!sinfo) {
4718                 sinfo = kzalloc(sizeof(*sinfo), GFP_ATOMIC);
4719                 if (!sinfo)
4720                         return -ENOMEM;
4721                 sinfo->domain = domain;
4722                 sinfo->pdev = dev;
4723                 list_add(&sinfo->link_phys, &info->subdevices);
4724                 list_add(&sinfo->link_domain, &domain->subdevices);
4725         }
4726
4727         return ++sinfo->users;
4728 }
4729
4730 static int auxiliary_unlink_device(struct dmar_domain *domain,
4731                                    struct device *dev)
4732 {
4733         struct device_domain_info *info = get_domain_info(dev);
4734         struct subdev_domain_info *sinfo = lookup_subdev_info(domain, dev);
4735         int ret;
4736
4737         assert_spin_locked(&device_domain_lock);
4738         if (WARN_ON(!info || !sinfo || sinfo->users <= 0))
4739                 return -EINVAL;
4740
4741         ret = --sinfo->users;
4742         if (!ret) {
4743                 list_del(&sinfo->link_phys);
4744                 list_del(&sinfo->link_domain);
4745                 kfree(sinfo);
4746         }
4747
4748         return ret;
4749 }
4750
4751 static int aux_domain_add_dev(struct dmar_domain *domain,
4752                               struct device *dev)
4753 {
4754         int ret;
4755         unsigned long flags;
4756         struct intel_iommu *iommu;
4757
4758         iommu = device_to_iommu(dev, NULL, NULL);
4759         if (!iommu)
4760                 return -ENODEV;
4761
4762         if (domain->default_pasid <= 0) {
4763                 u32 pasid;
4764
4765                 /* No private data needed for the default pasid */
4766                 pasid = ioasid_alloc(NULL, PASID_MIN,
4767                                      pci_max_pasids(to_pci_dev(dev)) - 1,
4768                                      NULL);
4769                 if (pasid == INVALID_IOASID) {
4770                         pr_err("Can't allocate default pasid\n");
4771                         return -ENODEV;
4772                 }
4773                 domain->default_pasid = pasid;
4774         }
4775
4776         spin_lock_irqsave(&device_domain_lock, flags);
4777         ret = auxiliary_link_device(domain, dev);
4778         if (ret <= 0)
4779                 goto link_failed;
4780
4781         /*
4782          * Subdevices from the same physical device can be attached to the
4783          * same domain. For such cases, only the first subdevice attachment
4784          * needs to go through the full steps in this function. So if ret >
4785          * 1, just goto out.
4786          */
4787         if (ret > 1)
4788                 goto out;
4789
4790         /*
4791          * iommu->lock must be held to attach domain to iommu and setup the
4792          * pasid entry for second level translation.
4793          */
4794         spin_lock(&iommu->lock);
4795         ret = domain_attach_iommu(domain, iommu);
4796         if (ret)
4797                 goto attach_failed;
4798
4799         /* Setup the PASID entry for mediated devices: */
4800         if (domain_use_first_level(domain))
4801                 ret = domain_setup_first_level(iommu, domain, dev,
4802                                                domain->default_pasid);
4803         else
4804                 ret = intel_pasid_setup_second_level(iommu, domain, dev,
4805                                                      domain->default_pasid);
4806         if (ret)
4807                 goto table_failed;
4808
4809         spin_unlock(&iommu->lock);
4810 out:
4811         spin_unlock_irqrestore(&device_domain_lock, flags);
4812
4813         return 0;
4814
4815 table_failed:
4816         domain_detach_iommu(domain, iommu);
4817 attach_failed:
4818         spin_unlock(&iommu->lock);
4819         auxiliary_unlink_device(domain, dev);
4820 link_failed:
4821         spin_unlock_irqrestore(&device_domain_lock, flags);
4822         if (list_empty(&domain->subdevices) && domain->default_pasid > 0)
4823                 ioasid_put(domain->default_pasid);
4824
4825         return ret;
4826 }
4827
4828 static void aux_domain_remove_dev(struct dmar_domain *domain,
4829                                   struct device *dev)
4830 {
4831         struct device_domain_info *info;
4832         struct intel_iommu *iommu;
4833         unsigned long flags;
4834
4835         if (!is_aux_domain(dev, &domain->domain))
4836                 return;
4837
4838         spin_lock_irqsave(&device_domain_lock, flags);
4839         info = get_domain_info(dev);
4840         iommu = info->iommu;
4841
4842         if (!auxiliary_unlink_device(domain, dev)) {
4843                 spin_lock(&iommu->lock);
4844                 intel_pasid_tear_down_entry(iommu, dev,
4845                                             domain->default_pasid, false);
4846                 domain_detach_iommu(domain, iommu);
4847                 spin_unlock(&iommu->lock);
4848         }
4849
4850         spin_unlock_irqrestore(&device_domain_lock, flags);
4851
4852         if (list_empty(&domain->subdevices) && domain->default_pasid > 0)
4853                 ioasid_put(domain->default_pasid);
4854 }
4855
4856 static int prepare_domain_attach_device(struct iommu_domain *domain,
4857                                         struct device *dev)
4858 {
4859         struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4860         struct intel_iommu *iommu;
4861         int addr_width;
4862
4863         iommu = device_to_iommu(dev, NULL, NULL);
4864         if (!iommu)
4865                 return -ENODEV;
4866
4867         if ((dmar_domain->flags & DOMAIN_FLAG_NESTING_MODE) &&
4868             !ecap_nest(iommu->ecap)) {
4869                 dev_err(dev, "%s: iommu not support nested translation\n",
4870                         iommu->name);
4871                 return -EINVAL;
4872         }
4873
4874         /* check if this iommu agaw is sufficient for max mapped address */
4875         addr_width = agaw_to_width(iommu->agaw);
4876         if (addr_width > cap_mgaw(iommu->cap))
4877                 addr_width = cap_mgaw(iommu->cap);
4878
4879         if (dmar_domain->max_addr > (1LL << addr_width)) {
4880                 dev_err(dev, "%s: iommu width (%d) is not "
4881                         "sufficient for the mapped address (%llx)\n",
4882                         __func__, addr_width, dmar_domain->max_addr);
4883                 return -EFAULT;
4884         }
4885         dmar_domain->gaw = addr_width;
4886
4887         /*
4888          * Knock out extra levels of page tables if necessary
4889          */
4890         while (iommu->agaw < dmar_domain->agaw) {
4891                 struct dma_pte *pte;
4892
4893                 pte = dmar_domain->pgd;
4894                 if (dma_pte_present(pte)) {
4895                         dmar_domain->pgd = phys_to_virt(dma_pte_addr(pte));
4896                         free_pgtable_page(pte);
4897                 }
4898                 dmar_domain->agaw--;
4899         }
4900
4901         return 0;
4902 }
4903
4904 static int intel_iommu_attach_device(struct iommu_domain *domain,
4905                                      struct device *dev)
4906 {
4907         int ret;
4908
4909         if (domain->type == IOMMU_DOMAIN_UNMANAGED &&
4910             device_is_rmrr_locked(dev)) {
4911                 dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement.  Contact your platform vendor.\n");
4912                 return -EPERM;
4913         }
4914
4915         if (is_aux_domain(dev, domain))
4916                 return -EPERM;
4917
4918         /* normally dev is not mapped */
4919         if (unlikely(domain_context_mapped(dev))) {
4920                 struct dmar_domain *old_domain;
4921
4922                 old_domain = find_domain(dev);
4923                 if (old_domain)
4924                         dmar_remove_one_dev_info(dev);
4925         }
4926
4927         ret = prepare_domain_attach_device(domain, dev);
4928         if (ret)
4929                 return ret;
4930
4931         return domain_add_dev_info(to_dmar_domain(domain), dev);
4932 }
4933
4934 static int intel_iommu_aux_attach_device(struct iommu_domain *domain,
4935                                          struct device *dev)
4936 {
4937         int ret;
4938
4939         if (!is_aux_domain(dev, domain))
4940                 return -EPERM;
4941
4942         ret = prepare_domain_attach_device(domain, dev);
4943         if (ret)
4944                 return ret;
4945
4946         return aux_domain_add_dev(to_dmar_domain(domain), dev);
4947 }
4948
4949 static void intel_iommu_detach_device(struct iommu_domain *domain,
4950                                       struct device *dev)
4951 {
4952         dmar_remove_one_dev_info(dev);
4953 }
4954
4955 static void intel_iommu_aux_detach_device(struct iommu_domain *domain,
4956                                           struct device *dev)
4957 {
4958         aux_domain_remove_dev(to_dmar_domain(domain), dev);
4959 }
4960
4961 #ifdef CONFIG_INTEL_IOMMU_SVM
4962 /*
4963  * 2D array for converting and sanitizing IOMMU generic TLB granularity to
4964  * VT-d granularity. Invalidation is typically included in the unmap operation
4965  * as a result of DMA or VFIO unmap. However, for assigned devices guest
4966  * owns the first level page tables. Invalidations of translation caches in the
4967  * guest are trapped and passed down to the host.
4968  *
4969  * vIOMMU in the guest will only expose first level page tables, therefore
4970  * we do not support IOTLB granularity for request without PASID (second level).
4971  *
4972  * For example, to find the VT-d granularity encoding for IOTLB
4973  * type and page selective granularity within PASID:
4974  * X: indexed by iommu cache type
4975  * Y: indexed by enum iommu_inv_granularity
4976  * [IOMMU_CACHE_INV_TYPE_IOTLB][IOMMU_INV_GRANU_ADDR]
4977  */
4978
4979 static const int
4980 inv_type_granu_table[IOMMU_CACHE_INV_TYPE_NR][IOMMU_INV_GRANU_NR] = {
4981         /*
4982          * PASID based IOTLB invalidation: PASID selective (per PASID),
4983          * page selective (address granularity)
4984          */
4985         {-EINVAL, QI_GRAN_NONG_PASID, QI_GRAN_PSI_PASID},
4986         /* PASID based dev TLBs */
4987         {-EINVAL, -EINVAL, QI_DEV_IOTLB_GRAN_PASID_SEL},
4988         /* PASID cache */
4989         {-EINVAL, -EINVAL, -EINVAL}
4990 };
4991
4992 static inline int to_vtd_granularity(int type, int granu)
4993 {
4994         return inv_type_granu_table[type][granu];
4995 }
4996
4997 static inline u64 to_vtd_size(u64 granu_size, u64 nr_granules)
4998 {
4999         u64 nr_pages = (granu_size * nr_granules) >> VTD_PAGE_SHIFT;
5000
5001         /* VT-d size is encoded as 2^size of 4K pages, 0 for 4k, 9 for 2MB, etc.
5002          * IOMMU cache invalidate API passes granu_size in bytes, and number of
5003          * granu size in contiguous memory.
5004          */
5005         return order_base_2(nr_pages);
5006 }
5007
5008 static int
5009 intel_iommu_sva_invalidate(struct iommu_domain *domain, struct device *dev,
5010                            struct iommu_cache_invalidate_info *inv_info)
5011 {
5012         struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5013         struct device_domain_info *info;
5014         struct intel_iommu *iommu;
5015         unsigned long flags;
5016         int cache_type;
5017         u8 bus, devfn;
5018         u16 did, sid;
5019         int ret = 0;
5020         u64 size = 0;
5021
5022         if (!inv_info || !dmar_domain)
5023                 return -EINVAL;
5024
5025         if (!dev || !dev_is_pci(dev))
5026                 return -ENODEV;
5027
5028         iommu = device_to_iommu(dev, &bus, &devfn);
5029         if (!iommu)
5030                 return -ENODEV;
5031
5032         if (!(dmar_domain->flags & DOMAIN_FLAG_NESTING_MODE))
5033                 return -EINVAL;
5034
5035         spin_lock_irqsave(&device_domain_lock, flags);
5036         spin_lock(&iommu->lock);
5037         info = get_domain_info(dev);
5038         if (!info) {
5039                 ret = -EINVAL;
5040                 goto out_unlock;
5041         }
5042         did = dmar_domain->iommu_did[iommu->seq_id];
5043         sid = PCI_DEVID(bus, devfn);
5044
5045         /* Size is only valid in address selective invalidation */
5046         if (inv_info->granularity == IOMMU_INV_GRANU_ADDR)
5047                 size = to_vtd_size(inv_info->granu.addr_info.granule_size,
5048                                    inv_info->granu.addr_info.nb_granules);
5049
5050         for_each_set_bit(cache_type,
5051                          (unsigned long *)&inv_info->cache,
5052                          IOMMU_CACHE_INV_TYPE_NR) {
5053                 int granu = 0;
5054                 u64 pasid = 0;
5055                 u64 addr = 0;
5056
5057                 granu = to_vtd_granularity(cache_type, inv_info->granularity);
5058                 if (granu == -EINVAL) {
5059                         pr_err_ratelimited("Invalid cache type and granu combination %d/%d\n",
5060                                            cache_type, inv_info->granularity);
5061                         break;
5062                 }
5063
5064                 /*
5065                  * PASID is stored in different locations based on the
5066                  * granularity.
5067                  */
5068                 if (inv_info->granularity == IOMMU_INV_GRANU_PASID &&
5069                     (inv_info->granu.pasid_info.flags & IOMMU_INV_PASID_FLAGS_PASID))
5070                         pasid = inv_info->granu.pasid_info.pasid;
5071                 else if (inv_info->granularity == IOMMU_INV_GRANU_ADDR &&
5072                          (inv_info->granu.addr_info.flags & IOMMU_INV_ADDR_FLAGS_PASID))
5073                         pasid = inv_info->granu.addr_info.pasid;
5074
5075                 switch (BIT(cache_type)) {
5076                 case IOMMU_CACHE_INV_TYPE_IOTLB:
5077                         /* HW will ignore LSB bits based on address mask */
5078                         if (inv_info->granularity == IOMMU_INV_GRANU_ADDR &&
5079                             size &&
5080                             (inv_info->granu.addr_info.addr & ((BIT(VTD_PAGE_SHIFT + size)) - 1))) {
5081                                 pr_err_ratelimited("User address not aligned, 0x%llx, size order %llu\n",
5082                                                    inv_info->granu.addr_info.addr, size);
5083                         }
5084
5085                         /*
5086                          * If granu is PASID-selective, address is ignored.
5087                          * We use npages = -1 to indicate that.
5088                          */
5089                         qi_flush_piotlb(iommu, did, pasid,
5090                                         mm_to_dma_pfn(inv_info->granu.addr_info.addr),
5091                                         (granu == QI_GRAN_NONG_PASID) ? -1 : 1 << size,
5092                                         inv_info->granu.addr_info.flags & IOMMU_INV_ADDR_FLAGS_LEAF);
5093
5094                         if (!info->ats_enabled)
5095                                 break;
5096                         /*
5097                          * Always flush device IOTLB if ATS is enabled. vIOMMU
5098                          * in the guest may assume IOTLB flush is inclusive,
5099                          * which is more efficient.
5100                          */
5101                         fallthrough;
5102                 case IOMMU_CACHE_INV_TYPE_DEV_IOTLB:
5103                         /*
5104                          * PASID based device TLB invalidation does not support
5105                          * IOMMU_INV_GRANU_PASID granularity but only supports
5106                          * IOMMU_INV_GRANU_ADDR.
5107                          * The equivalent of that is we set the size to be the
5108                          * entire range of 64 bit. User only provides PASID info
5109                          * without address info. So we set addr to 0.
5110                          */
5111                         if (inv_info->granularity == IOMMU_INV_GRANU_PASID) {
5112                                 size = 64 - VTD_PAGE_SHIFT;
5113                                 addr = 0;
5114                         } else if (inv_info->granularity == IOMMU_INV_GRANU_ADDR) {
5115                                 addr = inv_info->granu.addr_info.addr;
5116                         }
5117
5118                         if (info->ats_enabled)
5119                                 qi_flush_dev_iotlb_pasid(iommu, sid,
5120                                                 info->pfsid, pasid,
5121                                                 info->ats_qdep, addr,
5122                                                 size);
5123                         else
5124                                 pr_warn_ratelimited("Passdown device IOTLB flush w/o ATS!\n");
5125                         break;
5126                 default:
5127                         dev_err_ratelimited(dev, "Unsupported IOMMU invalidation type %d\n",
5128                                             cache_type);
5129                         ret = -EINVAL;
5130                 }
5131         }
5132 out_unlock:
5133         spin_unlock(&iommu->lock);
5134         spin_unlock_irqrestore(&device_domain_lock, flags);
5135
5136         return ret;
5137 }
5138 #endif
5139
5140 static int intel_iommu_map(struct iommu_domain *domain,
5141                            unsigned long iova, phys_addr_t hpa,
5142                            size_t size, int iommu_prot, gfp_t gfp)
5143 {
5144         struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5145         u64 max_addr;
5146         int prot = 0;
5147
5148         if (iommu_prot & IOMMU_READ)
5149                 prot |= DMA_PTE_READ;
5150         if (iommu_prot & IOMMU_WRITE)
5151                 prot |= DMA_PTE_WRITE;
5152         if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
5153                 prot |= DMA_PTE_SNP;
5154
5155         max_addr = iova + size;
5156         if (dmar_domain->max_addr < max_addr) {
5157                 u64 end;
5158
5159                 /* check if minimum agaw is sufficient for mapped address */
5160                 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
5161                 if (end < max_addr) {
5162                         pr_err("%s: iommu width (%d) is not "
5163                                "sufficient for the mapped address (%llx)\n",
5164                                __func__, dmar_domain->gaw, max_addr);
5165                         return -EFAULT;
5166                 }
5167                 dmar_domain->max_addr = max_addr;
5168         }
5169         /* Round up size to next multiple of PAGE_SIZE, if it and
5170            the low bits of hpa would take us onto the next page */
5171         size = aligned_nrpages(hpa, size);
5172         return __domain_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
5173                                 hpa >> VTD_PAGE_SHIFT, size, prot);
5174 }
5175
5176 static int intel_iommu_map_pages(struct iommu_domain *domain,
5177                                  unsigned long iova, phys_addr_t paddr,
5178                                  size_t pgsize, size_t pgcount,
5179                                  int prot, gfp_t gfp, size_t *mapped)
5180 {
5181         unsigned long pgshift = __ffs(pgsize);
5182         size_t size = pgcount << pgshift;
5183         int ret;
5184
5185         if (pgsize != SZ_4K && pgsize != SZ_2M && pgsize != SZ_1G)
5186                 return -EINVAL;
5187
5188         if (!IS_ALIGNED(iova | paddr, pgsize))
5189                 return -EINVAL;
5190
5191         ret = intel_iommu_map(domain, iova, paddr, size, prot, gfp);
5192         if (!ret && mapped)
5193                 *mapped = size;
5194
5195         return ret;
5196 }
5197
5198 static size_t intel_iommu_unmap(struct iommu_domain *domain,
5199                                 unsigned long iova, size_t size,
5200                                 struct iommu_iotlb_gather *gather)
5201 {
5202         struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5203         unsigned long start_pfn, last_pfn;
5204         int level = 0;
5205
5206         /* Cope with horrid API which requires us to unmap more than the
5207            size argument if it happens to be a large-page mapping. */
5208         BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level));
5209
5210         if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
5211                 size = VTD_PAGE_SIZE << level_to_offset_bits(level);
5212
5213         start_pfn = iova >> VTD_PAGE_SHIFT;
5214         last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
5215
5216         gather->freelist = domain_unmap(dmar_domain, start_pfn,
5217                                         last_pfn, gather->freelist);
5218
5219         if (dmar_domain->max_addr == iova + size)
5220                 dmar_domain->max_addr = iova;
5221
5222         iommu_iotlb_gather_add_page(domain, gather, iova, size);
5223
5224         return size;
5225 }
5226
5227 static size_t intel_iommu_unmap_pages(struct iommu_domain *domain,
5228                                       unsigned long iova,
5229                                       size_t pgsize, size_t pgcount,
5230                                       struct iommu_iotlb_gather *gather)
5231 {
5232         unsigned long pgshift = __ffs(pgsize);
5233         size_t size = pgcount << pgshift;
5234
5235         return intel_iommu_unmap(domain, iova, size, gather);
5236 }
5237
5238 static void intel_iommu_tlb_sync(struct iommu_domain *domain,
5239                                  struct iommu_iotlb_gather *gather)
5240 {
5241         struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5242         unsigned long iova_pfn = IOVA_PFN(gather->start);
5243         size_t size = gather->end - gather->start;
5244         unsigned long start_pfn;
5245         unsigned long nrpages;
5246         int iommu_id;
5247
5248         nrpages = aligned_nrpages(gather->start, size);
5249         start_pfn = mm_to_dma_pfn(iova_pfn);
5250
5251         for_each_domain_iommu(iommu_id, dmar_domain)
5252                 iommu_flush_iotlb_psi(g_iommus[iommu_id], dmar_domain,
5253                                       start_pfn, nrpages, !gather->freelist, 0);
5254
5255         dma_free_pagelist(gather->freelist);
5256 }
5257
5258 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
5259                                             dma_addr_t iova)
5260 {
5261         struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5262         struct dma_pte *pte;
5263         int level = 0;
5264         u64 phys = 0;
5265
5266         pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
5267         if (pte && dma_pte_present(pte))
5268                 phys = dma_pte_addr(pte) +
5269                         (iova & (BIT_MASK(level_to_offset_bits(level) +
5270                                                 VTD_PAGE_SHIFT) - 1));
5271
5272         return phys;
5273 }
5274
5275 static bool intel_iommu_capable(enum iommu_cap cap)
5276 {
5277         if (cap == IOMMU_CAP_CACHE_COHERENCY)
5278                 return domain_update_iommu_snooping(NULL);
5279         if (cap == IOMMU_CAP_INTR_REMAP)
5280                 return irq_remapping_enabled == 1;
5281
5282         return false;
5283 }
5284
5285 static struct iommu_device *intel_iommu_probe_device(struct device *dev)
5286 {
5287         struct intel_iommu *iommu;
5288
5289         iommu = device_to_iommu(dev, NULL, NULL);
5290         if (!iommu)
5291                 return ERR_PTR(-ENODEV);
5292
5293         if (translation_pre_enabled(iommu))
5294                 dev_iommu_priv_set(dev, DEFER_DEVICE_DOMAIN_INFO);
5295
5296         return &iommu->iommu;
5297 }
5298
5299 static void intel_iommu_release_device(struct device *dev)
5300 {
5301         struct intel_iommu *iommu;
5302
5303         iommu = device_to_iommu(dev, NULL, NULL);
5304         if (!iommu)
5305                 return;
5306
5307         dmar_remove_one_dev_info(dev);
5308
5309         set_dma_ops(dev, NULL);
5310 }
5311
5312 static void intel_iommu_probe_finalize(struct device *dev)
5313 {
5314         set_dma_ops(dev, NULL);
5315         iommu_setup_dma_ops(dev, 0, U64_MAX);
5316 }
5317
5318 static void intel_iommu_get_resv_regions(struct device *device,
5319                                          struct list_head *head)
5320 {
5321         int prot = DMA_PTE_READ | DMA_PTE_WRITE;
5322         struct iommu_resv_region *reg;
5323         struct dmar_rmrr_unit *rmrr;
5324         struct device *i_dev;
5325         int i;
5326
5327         down_read(&dmar_global_lock);
5328         for_each_rmrr_units(rmrr) {
5329                 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
5330                                           i, i_dev) {
5331                         struct iommu_resv_region *resv;
5332                         enum iommu_resv_type type;
5333                         size_t length;
5334
5335                         if (i_dev != device &&
5336                             !is_downstream_to_pci_bridge(device, i_dev))
5337                                 continue;
5338
5339                         length = rmrr->end_address - rmrr->base_address + 1;
5340
5341                         type = device_rmrr_is_relaxable(device) ?
5342                                 IOMMU_RESV_DIRECT_RELAXABLE : IOMMU_RESV_DIRECT;
5343
5344                         resv = iommu_alloc_resv_region(rmrr->base_address,
5345                                                        length, prot, type);
5346                         if (!resv)
5347                                 break;
5348
5349                         list_add_tail(&resv->list, head);
5350                 }
5351         }
5352         up_read(&dmar_global_lock);
5353
5354 #ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
5355         if (dev_is_pci(device)) {
5356                 struct pci_dev *pdev = to_pci_dev(device);
5357
5358                 if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) {
5359                         reg = iommu_alloc_resv_region(0, 1UL << 24, prot,
5360                                                    IOMMU_RESV_DIRECT_RELAXABLE);
5361                         if (reg)
5362                                 list_add_tail(&reg->list, head);
5363                 }
5364         }
5365 #endif /* CONFIG_INTEL_IOMMU_FLOPPY_WA */
5366
5367         reg = iommu_alloc_resv_region(IOAPIC_RANGE_START,
5368                                       IOAPIC_RANGE_END - IOAPIC_RANGE_START + 1,
5369                                       0, IOMMU_RESV_MSI);
5370         if (!reg)
5371                 return;
5372         list_add_tail(&reg->list, head);
5373 }
5374
5375 int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev)
5376 {
5377         struct device_domain_info *info;
5378         struct context_entry *context;
5379         struct dmar_domain *domain;
5380         unsigned long flags;
5381         u64 ctx_lo;
5382         int ret;
5383
5384         domain = find_domain(dev);
5385         if (!domain)
5386                 return -EINVAL;
5387
5388         spin_lock_irqsave(&device_domain_lock, flags);
5389         spin_lock(&iommu->lock);
5390
5391         ret = -EINVAL;
5392         info = get_domain_info(dev);
5393         if (!info || !info->pasid_supported)
5394                 goto out;
5395
5396         context = iommu_context_addr(iommu, info->bus, info->devfn, 0);
5397         if (WARN_ON(!context))
5398                 goto out;
5399
5400         ctx_lo = context[0].lo;
5401
5402         if (!(ctx_lo & CONTEXT_PASIDE)) {
5403                 ctx_lo |= CONTEXT_PASIDE;
5404                 context[0].lo = ctx_lo;
5405                 wmb();
5406                 iommu->flush.flush_context(iommu,
5407                                            domain->iommu_did[iommu->seq_id],
5408                                            PCI_DEVID(info->bus, info->devfn),
5409                                            DMA_CCMD_MASK_NOBIT,
5410                                            DMA_CCMD_DEVICE_INVL);
5411         }
5412
5413         /* Enable PASID support in the device, if it wasn't already */
5414         if (!info->pasid_enabled)
5415                 iommu_enable_dev_iotlb(info);
5416
5417         ret = 0;
5418
5419  out:
5420         spin_unlock(&iommu->lock);
5421         spin_unlock_irqrestore(&device_domain_lock, flags);
5422
5423         return ret;
5424 }
5425
5426 static struct iommu_group *intel_iommu_device_group(struct device *dev)
5427 {
5428         if (dev_is_pci(dev))
5429                 return pci_device_group(dev);
5430         return generic_device_group(dev);
5431 }
5432
5433 static int intel_iommu_enable_auxd(struct device *dev)
5434 {
5435         struct device_domain_info *info;
5436         struct intel_iommu *iommu;
5437         unsigned long flags;
5438         int ret;
5439
5440         iommu = device_to_iommu(dev, NULL, NULL);
5441         if (!iommu || dmar_disabled)
5442                 return -EINVAL;
5443
5444         if (!sm_supported(iommu) || !pasid_supported(iommu))
5445                 return -EINVAL;
5446
5447         ret = intel_iommu_enable_pasid(iommu, dev);
5448         if (ret)
5449                 return -ENODEV;
5450
5451         spin_lock_irqsave(&device_domain_lock, flags);
5452         info = get_domain_info(dev);
5453         info->auxd_enabled = 1;
5454         spin_unlock_irqrestore(&device_domain_lock, flags);
5455
5456         return 0;
5457 }
5458
5459 static int intel_iommu_disable_auxd(struct device *dev)
5460 {
5461         struct device_domain_info *info;
5462         unsigned long flags;
5463
5464         spin_lock_irqsave(&device_domain_lock, flags);
5465         info = get_domain_info(dev);
5466         if (!WARN_ON(!info))
5467                 info->auxd_enabled = 0;
5468         spin_unlock_irqrestore(&device_domain_lock, flags);
5469
5470         return 0;
5471 }
5472
5473 static int intel_iommu_enable_sva(struct device *dev)
5474 {
5475         struct device_domain_info *info = get_domain_info(dev);
5476         struct intel_iommu *iommu;
5477         int ret;
5478
5479         if (!info || dmar_disabled)
5480                 return -EINVAL;
5481
5482         iommu = info->iommu;
5483         if (!iommu)
5484                 return -EINVAL;
5485
5486         if (!(iommu->flags & VTD_FLAG_SVM_CAPABLE))
5487                 return -ENODEV;
5488
5489         if (intel_iommu_enable_pasid(iommu, dev))
5490                 return -ENODEV;
5491
5492         if (!info->pasid_enabled || !info->pri_enabled || !info->ats_enabled)
5493                 return -EINVAL;
5494
5495         ret = iopf_queue_add_device(iommu->iopf_queue, dev);
5496         if (!ret)
5497                 ret = iommu_register_device_fault_handler(dev, iommu_queue_iopf, dev);
5498
5499         return ret;
5500 }
5501
5502 static int intel_iommu_disable_sva(struct device *dev)
5503 {
5504         struct device_domain_info *info = get_domain_info(dev);
5505         struct intel_iommu *iommu = info->iommu;
5506         int ret;
5507
5508         ret = iommu_unregister_device_fault_handler(dev);
5509         if (!ret)
5510                 ret = iopf_queue_remove_device(iommu->iopf_queue, dev);
5511
5512         return ret;
5513 }
5514
5515 static int intel_iommu_enable_iopf(struct device *dev)
5516 {
5517         struct device_domain_info *info = get_domain_info(dev);
5518
5519         if (info && info->pri_supported)
5520                 return 0;
5521
5522         return -ENODEV;
5523 }
5524
5525 static int
5526 intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat)
5527 {
5528         switch (feat) {
5529         case IOMMU_DEV_FEAT_AUX:
5530                 return intel_iommu_enable_auxd(dev);
5531
5532         case IOMMU_DEV_FEAT_IOPF:
5533                 return intel_iommu_enable_iopf(dev);
5534
5535         case IOMMU_DEV_FEAT_SVA:
5536                 return intel_iommu_enable_sva(dev);
5537
5538         default:
5539                 return -ENODEV;
5540         }
5541 }
5542
5543 static int
5544 intel_iommu_dev_disable_feat(struct device *dev, enum iommu_dev_features feat)
5545 {
5546         switch (feat) {
5547         case IOMMU_DEV_FEAT_AUX:
5548                 return intel_iommu_disable_auxd(dev);
5549
5550         case IOMMU_DEV_FEAT_IOPF:
5551                 return 0;
5552
5553         case IOMMU_DEV_FEAT_SVA:
5554                 return intel_iommu_disable_sva(dev);
5555
5556         default:
5557                 return -ENODEV;
5558         }
5559 }
5560
5561 static bool
5562 intel_iommu_dev_feat_enabled(struct device *dev, enum iommu_dev_features feat)
5563 {
5564         struct device_domain_info *info = get_domain_info(dev);
5565
5566         if (feat == IOMMU_DEV_FEAT_AUX)
5567                 return scalable_mode_support() && info && info->auxd_enabled;
5568
5569         return false;
5570 }
5571
5572 static int
5573 intel_iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev)
5574 {
5575         struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5576
5577         return dmar_domain->default_pasid > 0 ?
5578                         dmar_domain->default_pasid : -EINVAL;
5579 }
5580
5581 static bool intel_iommu_is_attach_deferred(struct iommu_domain *domain,
5582                                            struct device *dev)
5583 {
5584         return attach_deferred(dev);
5585 }
5586
5587 static int
5588 intel_iommu_enable_nesting(struct iommu_domain *domain)
5589 {
5590         struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5591         unsigned long flags;
5592         int ret = -ENODEV;
5593
5594         spin_lock_irqsave(&device_domain_lock, flags);
5595         if (list_empty(&dmar_domain->devices)) {
5596                 dmar_domain->flags |= DOMAIN_FLAG_NESTING_MODE;
5597                 dmar_domain->flags &= ~DOMAIN_FLAG_USE_FIRST_LEVEL;
5598                 ret = 0;
5599         }
5600         spin_unlock_irqrestore(&device_domain_lock, flags);
5601
5602         return ret;
5603 }
5604
5605 /*
5606  * Check that the device does not live on an external facing PCI port that is
5607  * marked as untrusted. Such devices should not be able to apply quirks and
5608  * thus not be able to bypass the IOMMU restrictions.
5609  */
5610 static bool risky_device(struct pci_dev *pdev)
5611 {
5612         if (pdev->untrusted) {
5613                 pci_info(pdev,
5614                          "Skipping IOMMU quirk for dev [%04X:%04X] on untrusted PCI link\n",
5615                          pdev->vendor, pdev->device);
5616                 pci_info(pdev, "Please check with your BIOS/Platform vendor about this\n");
5617                 return true;
5618         }
5619         return false;
5620 }
5621
5622 static void intel_iommu_iotlb_sync_map(struct iommu_domain *domain,
5623                                        unsigned long iova, size_t size)
5624 {
5625         struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5626         unsigned long pages = aligned_nrpages(iova, size);
5627         unsigned long pfn = iova >> VTD_PAGE_SHIFT;
5628         struct intel_iommu *iommu;
5629         int iommu_id;
5630
5631         for_each_domain_iommu(iommu_id, dmar_domain) {
5632                 iommu = g_iommus[iommu_id];
5633                 __mapping_notify_one(iommu, dmar_domain, pfn, pages);
5634         }
5635 }
5636
5637 const struct iommu_ops intel_iommu_ops = {
5638         .capable                = intel_iommu_capable,
5639         .domain_alloc           = intel_iommu_domain_alloc,
5640         .domain_free            = intel_iommu_domain_free,
5641         .enable_nesting         = intel_iommu_enable_nesting,
5642         .attach_dev             = intel_iommu_attach_device,
5643         .detach_dev             = intel_iommu_detach_device,
5644         .aux_attach_dev         = intel_iommu_aux_attach_device,
5645         .aux_detach_dev         = intel_iommu_aux_detach_device,
5646         .aux_get_pasid          = intel_iommu_aux_get_pasid,
5647         .map_pages              = intel_iommu_map_pages,
5648         .unmap_pages            = intel_iommu_unmap_pages,
5649         .iotlb_sync_map         = intel_iommu_iotlb_sync_map,
5650         .flush_iotlb_all        = intel_flush_iotlb_all,
5651         .iotlb_sync             = intel_iommu_tlb_sync,
5652         .iova_to_phys           = intel_iommu_iova_to_phys,
5653         .probe_device           = intel_iommu_probe_device,
5654         .probe_finalize         = intel_iommu_probe_finalize,
5655         .release_device         = intel_iommu_release_device,
5656         .get_resv_regions       = intel_iommu_get_resv_regions,
5657         .put_resv_regions       = generic_iommu_put_resv_regions,
5658         .device_group           = intel_iommu_device_group,
5659         .dev_feat_enabled       = intel_iommu_dev_feat_enabled,
5660         .dev_enable_feat        = intel_iommu_dev_enable_feat,
5661         .dev_disable_feat       = intel_iommu_dev_disable_feat,
5662         .is_attach_deferred     = intel_iommu_is_attach_deferred,
5663         .def_domain_type        = device_def_domain_type,
5664         .pgsize_bitmap          = SZ_4K,
5665 #ifdef CONFIG_INTEL_IOMMU_SVM
5666         .cache_invalidate       = intel_iommu_sva_invalidate,
5667         .sva_bind_gpasid        = intel_svm_bind_gpasid,
5668         .sva_unbind_gpasid      = intel_svm_unbind_gpasid,
5669         .sva_bind               = intel_svm_bind,
5670         .sva_unbind             = intel_svm_unbind,
5671         .sva_get_pasid          = intel_svm_get_pasid,
5672         .page_response          = intel_svm_page_response,
5673 #endif
5674 };
5675
5676 static void quirk_iommu_igfx(struct pci_dev *dev)
5677 {
5678         if (risky_device(dev))
5679                 return;
5680
5681         pci_info(dev, "Disabling IOMMU for graphics on this chipset\n");
5682         dmar_map_gfx = 0;
5683 }
5684
5685 /* G4x/GM45 integrated gfx dmar support is totally busted. */
5686 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_igfx);
5687 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_igfx);
5688 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_igfx);
5689 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_igfx);
5690 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_igfx);
5691 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_igfx);
5692 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_igfx);
5693
5694 /* Broadwell igfx malfunctions with dmar */
5695 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1606, quirk_iommu_igfx);
5696 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160B, quirk_iommu_igfx);
5697 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160E, quirk_iommu_igfx);
5698 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1602, quirk_iommu_igfx);
5699 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160A, quirk_iommu_igfx);
5700 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160D, quirk_iommu_igfx);
5701 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1616, quirk_iommu_igfx);
5702 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161B, quirk_iommu_igfx);
5703 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161E, quirk_iommu_igfx);
5704 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1612, quirk_iommu_igfx);
5705 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161A, quirk_iommu_igfx);
5706 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161D, quirk_iommu_igfx);
5707 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1626, quirk_iommu_igfx);
5708 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162B, quirk_iommu_igfx);
5709 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162E, quirk_iommu_igfx);
5710 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1622, quirk_iommu_igfx);
5711 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162A, quirk_iommu_igfx);
5712 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162D, quirk_iommu_igfx);
5713 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1636, quirk_iommu_igfx);
5714 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163B, quirk_iommu_igfx);
5715 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163E, quirk_iommu_igfx);
5716 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1632, quirk_iommu_igfx);
5717 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163A, quirk_iommu_igfx);
5718 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163D, quirk_iommu_igfx);
5719
5720 static void quirk_iommu_rwbf(struct pci_dev *dev)
5721 {
5722         if (risky_device(dev))
5723                 return;
5724
5725         /*
5726          * Mobile 4 Series Chipset neglects to set RWBF capability,
5727          * but needs it. Same seems to hold for the desktop versions.
5728          */
5729         pci_info(dev, "Forcing write-buffer flush capability\n");
5730         rwbf_quirk = 1;
5731 }
5732
5733 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
5734 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
5735 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
5736 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
5737 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
5738 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
5739 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
5740
5741 #define GGC 0x52
5742 #define GGC_MEMORY_SIZE_MASK    (0xf << 8)
5743 #define GGC_MEMORY_SIZE_NONE    (0x0 << 8)
5744 #define GGC_MEMORY_SIZE_1M      (0x1 << 8)
5745 #define GGC_MEMORY_SIZE_2M      (0x3 << 8)
5746 #define GGC_MEMORY_VT_ENABLED   (0x8 << 8)
5747 #define GGC_MEMORY_SIZE_2M_VT   (0x9 << 8)
5748 #define GGC_MEMORY_SIZE_3M_VT   (0xa << 8)
5749 #define GGC_MEMORY_SIZE_4M_VT   (0xb << 8)
5750
5751 static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
5752 {
5753         unsigned short ggc;
5754
5755         if (risky_device(dev))
5756                 return;
5757
5758         if (pci_read_config_word(dev, GGC, &ggc))
5759                 return;
5760
5761         if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
5762                 pci_info(dev, "BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
5763                 dmar_map_gfx = 0;
5764         } else if (dmar_map_gfx) {
5765                 /* we have to ensure the gfx device is idle before we flush */
5766                 pci_info(dev, "Disabling batched IOTLB flush on Ironlake\n");
5767                 iommu_set_dma_strict();
5768         }
5769 }
5770 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
5771 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
5772 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
5773 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
5774
5775 static void quirk_igfx_skip_te_disable(struct pci_dev *dev)
5776 {
5777         unsigned short ver;
5778
5779         if (!IS_GFX_DEVICE(dev))
5780                 return;
5781
5782         ver = (dev->device >> 8) & 0xff;
5783         if (ver != 0x45 && ver != 0x46 && ver != 0x4c &&
5784             ver != 0x4e && ver != 0x8a && ver != 0x98 &&
5785             ver != 0x9a)
5786                 return;
5787
5788         if (risky_device(dev))
5789                 return;
5790
5791         pci_info(dev, "Skip IOMMU disabling for graphics\n");
5792         iommu_skip_te_disable = 1;
5793 }
5794 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, quirk_igfx_skip_te_disable);
5795
5796 /* On Tylersburg chipsets, some BIOSes have been known to enable the
5797    ISOCH DMAR unit for the Azalia sound device, but not give it any
5798    TLB entries, which causes it to deadlock. Check for that.  We do
5799    this in a function called from init_dmars(), instead of in a PCI
5800    quirk, because we don't want to print the obnoxious "BIOS broken"
5801    message if VT-d is actually disabled.
5802 */
5803 static void __init check_tylersburg_isoch(void)
5804 {
5805         struct pci_dev *pdev;
5806         uint32_t vtisochctrl;
5807
5808         /* If there's no Azalia in the system anyway, forget it. */
5809         pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
5810         if (!pdev)
5811                 return;
5812
5813         if (risky_device(pdev)) {
5814                 pci_dev_put(pdev);
5815                 return;
5816         }
5817
5818         pci_dev_put(pdev);
5819
5820         /* System Management Registers. Might be hidden, in which case
5821            we can't do the sanity check. But that's OK, because the
5822            known-broken BIOSes _don't_ actually hide it, so far. */
5823         pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
5824         if (!pdev)
5825                 return;
5826
5827         if (risky_device(pdev)) {
5828                 pci_dev_put(pdev);
5829                 return;
5830         }
5831
5832         if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
5833                 pci_dev_put(pdev);
5834                 return;
5835         }
5836
5837         pci_dev_put(pdev);
5838
5839         /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
5840         if (vtisochctrl & 1)
5841                 return;
5842
5843         /* Drop all bits other than the number of TLB entries */
5844         vtisochctrl &= 0x1c;
5845
5846         /* If we have the recommended number of TLB entries (16), fine. */
5847         if (vtisochctrl == 0x10)
5848                 return;
5849
5850         /* Zero TLB entries? You get to ride the short bus to school. */
5851         if (!vtisochctrl) {
5852                 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
5853                      "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
5854                      dmi_get_system_info(DMI_BIOS_VENDOR),
5855                      dmi_get_system_info(DMI_BIOS_VERSION),
5856                      dmi_get_system_info(DMI_PRODUCT_VERSION));
5857                 iommu_identity_mapping |= IDENTMAP_AZALIA;
5858                 return;
5859         }
5860
5861         pr_warn("Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
5862                vtisochctrl);
5863 }
This page took 0.39302 seconds and 4 git commands to generate.