]> Git Repo - linux.git/blame - drivers/iommu/intel-iommu.c
Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[linux.git] / drivers / iommu / intel-iommu.c
CommitLineData
ba395927 1/*
ea8ea460 2 * Copyright © 2006-2014 Intel Corporation.
ba395927
KA
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
ea8ea460
DW
13 * Authors: David Woodhouse <[email protected]>,
14 * Ashok Raj <[email protected]>,
15 * Shaohua Li <[email protected]>,
16 * Anil S Keshavamurthy <[email protected]>,
17 * Fenghua Yu <[email protected]>
9f10e5bf 18 * Joerg Roedel <[email protected]>
ba395927
KA
19 */
20
9f10e5bf
JR
21#define pr_fmt(fmt) "DMAR: " fmt
22
ba395927
KA
23#include <linux/init.h>
24#include <linux/bitmap.h>
5e0d2a6f 25#include <linux/debugfs.h>
54485c30 26#include <linux/export.h>
ba395927
KA
27#include <linux/slab.h>
28#include <linux/irq.h>
29#include <linux/interrupt.h>
ba395927
KA
30#include <linux/spinlock.h>
31#include <linux/pci.h>
32#include <linux/dmar.h>
33#include <linux/dma-mapping.h>
34#include <linux/mempool.h>
75f05569 35#include <linux/memory.h>
aa473240 36#include <linux/cpu.h>
5e0d2a6f 37#include <linux/timer.h>
dfddb969 38#include <linux/io.h>
38717946 39#include <linux/iova.h>
5d450806 40#include <linux/iommu.h>
38717946 41#include <linux/intel-iommu.h>
134fac3f 42#include <linux/syscore_ops.h>
69575d38 43#include <linux/tboot.h>
adb2fe02 44#include <linux/dmi.h>
5cdede24 45#include <linux/pci-ats.h>
0ee332c1 46#include <linux/memblock.h>
36746436 47#include <linux/dma-contiguous.h>
fec777c3 48#include <linux/dma-direct.h>
091d42e4 49#include <linux/crash_dump.h>
8a8f422d 50#include <asm/irq_remapping.h>
ba395927 51#include <asm/cacheflush.h>
46a7fa27 52#include <asm/iommu.h>
ba395927 53
078e1ee2 54#include "irq_remapping.h"
56283174 55#include "intel-pasid.h"
078e1ee2 56
5b6985ce
FY
57#define ROOT_SIZE VTD_PAGE_SIZE
58#define CONTEXT_SIZE VTD_PAGE_SIZE
59
ba395927 60#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
18436afd 61#define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
ba395927 62#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
e0fc7e0b 63#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
ba395927
KA
64
65#define IOAPIC_RANGE_START (0xfee00000)
66#define IOAPIC_RANGE_END (0xfeefffff)
67#define IOVA_START_ADDR (0x1000)
68
5e3b4a15 69#define DEFAULT_DOMAIN_ADDRESS_WIDTH 57
ba395927 70
4ed0d3e6 71#define MAX_AGAW_WIDTH 64
5c645b35 72#define MAX_AGAW_PFN_WIDTH (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
4ed0d3e6 73
2ebe3151
DW
74#define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
75#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
76
77/* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
78 to match. That way, we can use 'unsigned long' for PFNs with impunity. */
79#define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
80 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
81#define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
ba395927 82
1b722500
RM
83/* IO virtual address start page frame number */
84#define IOVA_START_PFN (1)
85
f27be03b 86#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
5e0d2a6f 87
df08cdc7
AM
88/* page table handling */
89#define LEVEL_STRIDE (9)
90#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
91
6d1c56a9
OBC
92/*
93 * This bitmap is used to advertise the page sizes our hardware support
94 * to the IOMMU core, which will then use this information to split
95 * physically contiguous memory regions it is mapping into page sizes
96 * that we support.
97 *
98 * Traditionally the IOMMU core just handed us the mappings directly,
99 * after making sure the size is an order of a 4KiB page and that the
100 * mapping has natural alignment.
101 *
102 * To retain this behavior, we currently advertise that we support
103 * all page sizes that are an order of 4KiB.
104 *
105 * If at some point we'd like to utilize the IOMMU core's new behavior,
106 * we could change this to advertise the real page sizes we support.
107 */
108#define INTEL_IOMMU_PGSIZES (~0xFFFUL)
109
df08cdc7
AM
110static inline int agaw_to_level(int agaw)
111{
112 return agaw + 2;
113}
114
115static inline int agaw_to_width(int agaw)
116{
5c645b35 117 return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
df08cdc7
AM
118}
119
120static inline int width_to_agaw(int width)
121{
5c645b35 122 return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
df08cdc7
AM
123}
124
125static inline unsigned int level_to_offset_bits(int level)
126{
127 return (level - 1) * LEVEL_STRIDE;
128}
129
130static inline int pfn_level_offset(unsigned long pfn, int level)
131{
132 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
133}
134
135static inline unsigned long level_mask(int level)
136{
137 return -1UL << level_to_offset_bits(level);
138}
139
140static inline unsigned long level_size(int level)
141{
142 return 1UL << level_to_offset_bits(level);
143}
144
145static inline unsigned long align_to_level(unsigned long pfn, int level)
146{
147 return (pfn + level_size(level) - 1) & level_mask(level);
148}
fd18de50 149
6dd9a7c7
YS
150static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
151{
5c645b35 152 return 1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
6dd9a7c7
YS
153}
154
dd4e8319
DW
155/* VT-d pages must always be _smaller_ than MM pages. Otherwise things
156 are never going to work. */
157static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
158{
159 return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
160}
161
162static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
163{
164 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
165}
166static inline unsigned long page_to_dma_pfn(struct page *pg)
167{
168 return mm_to_dma_pfn(page_to_pfn(pg));
169}
170static inline unsigned long virt_to_dma_pfn(void *p)
171{
172 return page_to_dma_pfn(virt_to_page(p));
173}
174
d9630fe9
WH
175/* global iommu list, set NULL for ignored DMAR units */
176static struct intel_iommu **g_iommus;
177
e0fc7e0b 178static void __init check_tylersburg_isoch(void);
9af88143
DW
179static int rwbf_quirk;
180
b779260b
JC
181/*
182 * set to 1 to panic kernel if can't successfully enable VT-d
183 * (used when kernel is launched w/ TXT)
184 */
185static int force_on = 0;
bfd20f1c 186int intel_iommu_tboot_noforce;
b779260b 187
46b08e1a 188#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
46b08e1a 189
091d42e4
JR
190/*
191 * Take a root_entry and return the Lower Context Table Pointer (LCTP)
192 * if marked present.
193 */
194static phys_addr_t root_entry_lctp(struct root_entry *re)
195{
196 if (!(re->lo & 1))
197 return 0;
198
199 return re->lo & VTD_PAGE_MASK;
200}
201
202/*
203 * Take a root_entry and return the Upper Context Table Pointer (UCTP)
204 * if marked present.
205 */
206static phys_addr_t root_entry_uctp(struct root_entry *re)
207{
208 if (!(re->hi & 1))
209 return 0;
46b08e1a 210
091d42e4
JR
211 return re->hi & VTD_PAGE_MASK;
212}
c07e7d21 213
cf484d0e
JR
214static inline void context_clear_pasid_enable(struct context_entry *context)
215{
216 context->lo &= ~(1ULL << 11);
217}
218
219static inline bool context_pasid_enabled(struct context_entry *context)
220{
221 return !!(context->lo & (1ULL << 11));
222}
223
224static inline void context_set_copied(struct context_entry *context)
225{
226 context->hi |= (1ull << 3);
227}
228
229static inline bool context_copied(struct context_entry *context)
230{
231 return !!(context->hi & (1ULL << 3));
232}
233
234static inline bool __context_present(struct context_entry *context)
c07e7d21
MM
235{
236 return (context->lo & 1);
237}
cf484d0e 238
26b86092 239bool context_present(struct context_entry *context)
cf484d0e
JR
240{
241 return context_pasid_enabled(context) ?
242 __context_present(context) :
243 __context_present(context) && !context_copied(context);
244}
245
c07e7d21
MM
246static inline void context_set_present(struct context_entry *context)
247{
248 context->lo |= 1;
249}
250
251static inline void context_set_fault_enable(struct context_entry *context)
252{
253 context->lo &= (((u64)-1) << 2) | 1;
254}
255
c07e7d21
MM
256static inline void context_set_translation_type(struct context_entry *context,
257 unsigned long value)
258{
259 context->lo &= (((u64)-1) << 4) | 3;
260 context->lo |= (value & 3) << 2;
261}
262
263static inline void context_set_address_root(struct context_entry *context,
264 unsigned long value)
265{
1a2262f9 266 context->lo &= ~VTD_PAGE_MASK;
c07e7d21
MM
267 context->lo |= value & VTD_PAGE_MASK;
268}
269
270static inline void context_set_address_width(struct context_entry *context,
271 unsigned long value)
272{
273 context->hi |= value & 7;
274}
275
276static inline void context_set_domain_id(struct context_entry *context,
277 unsigned long value)
278{
279 context->hi |= (value & ((1 << 16) - 1)) << 8;
280}
281
dbcd861f
JR
282static inline int context_domain_id(struct context_entry *c)
283{
284 return((c->hi >> 8) & 0xffff);
285}
286
c07e7d21
MM
287static inline void context_clear_entry(struct context_entry *context)
288{
289 context->lo = 0;
290 context->hi = 0;
291}
7a8fc25e 292
622ba12a
MM
293/*
294 * 0: readable
295 * 1: writable
296 * 2-6: reserved
297 * 7: super page
9cf06697
SY
298 * 8-10: available
299 * 11: snoop behavior
622ba12a
MM
300 * 12-63: Host physcial address
301 */
302struct dma_pte {
303 u64 val;
304};
622ba12a 305
19c239ce
MM
306static inline void dma_clear_pte(struct dma_pte *pte)
307{
308 pte->val = 0;
309}
310
19c239ce
MM
311static inline u64 dma_pte_addr(struct dma_pte *pte)
312{
c85994e4
DW
313#ifdef CONFIG_64BIT
314 return pte->val & VTD_PAGE_MASK;
315#else
316 /* Must have a full atomic 64-bit read */
1a8bd481 317 return __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK;
c85994e4 318#endif
19c239ce
MM
319}
320
19c239ce
MM
321static inline bool dma_pte_present(struct dma_pte *pte)
322{
323 return (pte->val & 3) != 0;
324}
622ba12a 325
4399c8bf
AK
326static inline bool dma_pte_superpage(struct dma_pte *pte)
327{
c3c75eb7 328 return (pte->val & DMA_PTE_LARGE_PAGE);
4399c8bf
AK
329}
330
75e6bf96
DW
331static inline int first_pte_in_page(struct dma_pte *pte)
332{
333 return !((unsigned long)pte & ~VTD_PAGE_MASK);
334}
335
2c2e2c38
FY
336/*
337 * This domain is a statically identity mapping domain.
338 * 1. This domain creats a static 1:1 mapping to all usable memory.
339 * 2. It maps to each iommu if successful.
340 * 3. Each iommu mapps to this domain if successful.
341 */
19943b0e
DW
342static struct dmar_domain *si_domain;
343static int hw_pass_through = 1;
2c2e2c38 344
28ccce0d
JR
345/*
346 * Domain represents a virtual machine, more than one devices
1ce28feb
WH
347 * across iommus may be owned in one domain, e.g. kvm guest.
348 */
ab8dfe25 349#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 0)
1ce28feb 350
2c2e2c38 351/* si_domain contains mulitple devices */
ab8dfe25 352#define DOMAIN_FLAG_STATIC_IDENTITY (1 << 1)
2c2e2c38 353
29a27719
JR
354#define for_each_domain_iommu(idx, domain) \
355 for (idx = 0; idx < g_num_of_iommus; idx++) \
356 if (domain->iommu_refcnt[idx])
357
b94e4117
JL
358struct dmar_rmrr_unit {
359 struct list_head list; /* list of rmrr units */
360 struct acpi_dmar_header *hdr; /* ACPI header */
361 u64 base_address; /* reserved base address*/
362 u64 end_address; /* reserved end address */
832bd858 363 struct dmar_dev_scope *devices; /* target devices */
b94e4117 364 int devices_cnt; /* target device count */
0659b8dc 365 struct iommu_resv_region *resv; /* reserved region handle */
b94e4117
JL
366};
367
368struct dmar_atsr_unit {
369 struct list_head list; /* list of ATSR units */
370 struct acpi_dmar_header *hdr; /* ACPI header */
832bd858 371 struct dmar_dev_scope *devices; /* target devices */
b94e4117
JL
372 int devices_cnt; /* target device count */
373 u8 include_all:1; /* include all ports */
374};
375
376static LIST_HEAD(dmar_atsr_units);
377static LIST_HEAD(dmar_rmrr_units);
378
379#define for_each_rmrr_units(rmrr) \
380 list_for_each_entry(rmrr, &dmar_rmrr_units, list)
381
5e0d2a6f 382/* bitmap for indexing intel_iommus */
5e0d2a6f 383static int g_num_of_iommus;
384
92d03cc8 385static void domain_exit(struct dmar_domain *domain);
ba395927 386static void domain_remove_dev_info(struct dmar_domain *domain);
e6de0f8d
JR
387static void dmar_remove_one_dev_info(struct dmar_domain *domain,
388 struct device *dev);
127c7615 389static void __dmar_remove_one_dev_info(struct device_domain_info *info);
2452d9db
JR
390static void domain_context_clear(struct intel_iommu *iommu,
391 struct device *dev);
2a46ddf7
JL
392static int domain_detach_iommu(struct dmar_domain *domain,
393 struct intel_iommu *iommu);
ba395927 394
d3f13810 395#ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
0cd5c3c8
KM
396int dmar_disabled = 0;
397#else
398int dmar_disabled = 1;
d3f13810 399#endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
0cd5c3c8 400
8bc1f85c
ED
401int intel_iommu_enabled = 0;
402EXPORT_SYMBOL_GPL(intel_iommu_enabled);
403
2d9e667e 404static int dmar_map_gfx = 1;
7d3b03ce 405static int dmar_forcedac;
5e0d2a6f 406static int intel_iommu_strict;
6dd9a7c7 407static int intel_iommu_superpage = 1;
c83b2f20 408static int intel_iommu_ecs = 1;
2db1581e 409static int intel_iommu_pasid28;
ae853ddb 410static int iommu_identity_mapping;
c83b2f20 411
ae853ddb
DW
412#define IDENTMAP_ALL 1
413#define IDENTMAP_GFX 2
414#define IDENTMAP_AZALIA 4
c83b2f20 415
2db1581e
LB
416/* Broadwell and Skylake have broken ECS support — normal so-called "second
417 * level" translation of DMA requests-without-PASID doesn't actually happen
418 * unless you also set the NESTE bit in an extended context-entry. Which of
419 * course means that SVM doesn't work because it's trying to do nested
420 * translation of the physical addresses it finds in the process page tables,
421 * through the IOVA->phys mapping found in the "second level" page tables.
422 *
423 * The VT-d specification was retroactively changed to change the definition
424 * of the capability bits and pretend that Broadwell/Skylake never happened...
425 * but unfortunately the wrong bit was changed. It's ECS which is broken, but
426 * for some reason it was the PASID capability bit which was redefined (from
427 * bit 28 on BDW/SKL to bit 40 in future).
428 *
429 * So our test for ECS needs to eschew those implementations which set the old
430 * PASID capabiity bit 28, since those are the ones on which ECS is broken.
431 * Unless we are working around the 'pasid28' limitations, that is, by putting
432 * the device into passthrough mode for normal DMA and thus masking the bug.
433 */
434#define ecs_enabled(iommu) (intel_iommu_ecs && ecap_ecs(iommu->ecap) && \
435 (intel_iommu_pasid28 || !ecap_broken_pasid(iommu->ecap)))
436/* PASID support is thus enabled if ECS is enabled and *either* of the old
437 * or new capability bits are set. */
438#define pasid_enabled(iommu) (ecs_enabled(iommu) && \
439 (ecap_pasid(iommu->ecap) || ecap_broken_pasid(iommu->ecap)))
ba395927 440
c0771df8
DW
441int intel_iommu_gfx_mapped;
442EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
443
ba395927
KA
444#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
445static DEFINE_SPINLOCK(device_domain_lock);
446static LIST_HEAD(device_domain_list);
447
85319dcc
LB
448/*
449 * Iterate over elements in device_domain_list and call the specified
450 * callback @fn against each element. This helper should only be used
451 * in the context where the device_domain_lock has already been holden.
452 */
453int for_each_device_domain(int (*fn)(struct device_domain_info *info,
454 void *data), void *data)
455{
456 int ret = 0;
457 struct device_domain_info *info;
458
459 assert_spin_locked(&device_domain_lock);
460 list_for_each_entry(info, &device_domain_list, global) {
461 ret = fn(info, data);
462 if (ret)
463 return ret;
464 }
465
466 return 0;
467}
468
b0119e87 469const struct iommu_ops intel_iommu_ops;
a8bcbb0d 470
4158c2ec
JR
471static bool translation_pre_enabled(struct intel_iommu *iommu)
472{
473 return (iommu->flags & VTD_FLAG_TRANS_PRE_ENABLED);
474}
475
091d42e4
JR
476static void clear_translation_pre_enabled(struct intel_iommu *iommu)
477{
478 iommu->flags &= ~VTD_FLAG_TRANS_PRE_ENABLED;
479}
480
4158c2ec
JR
481static void init_translation_status(struct intel_iommu *iommu)
482{
483 u32 gsts;
484
485 gsts = readl(iommu->reg + DMAR_GSTS_REG);
486 if (gsts & DMA_GSTS_TES)
487 iommu->flags |= VTD_FLAG_TRANS_PRE_ENABLED;
488}
489
00a77deb
JR
490/* Convert generic 'struct iommu_domain to private struct dmar_domain */
491static struct dmar_domain *to_dmar_domain(struct iommu_domain *dom)
492{
493 return container_of(dom, struct dmar_domain, domain);
494}
495
ba395927
KA
496static int __init intel_iommu_setup(char *str)
497{
498 if (!str)
499 return -EINVAL;
500 while (*str) {
0cd5c3c8
KM
501 if (!strncmp(str, "on", 2)) {
502 dmar_disabled = 0;
9f10e5bf 503 pr_info("IOMMU enabled\n");
0cd5c3c8 504 } else if (!strncmp(str, "off", 3)) {
ba395927 505 dmar_disabled = 1;
9f10e5bf 506 pr_info("IOMMU disabled\n");
ba395927
KA
507 } else if (!strncmp(str, "igfx_off", 8)) {
508 dmar_map_gfx = 0;
9f10e5bf 509 pr_info("Disable GFX device mapping\n");
7d3b03ce 510 } else if (!strncmp(str, "forcedac", 8)) {
9f10e5bf 511 pr_info("Forcing DAC for PCI devices\n");
7d3b03ce 512 dmar_forcedac = 1;
5e0d2a6f 513 } else if (!strncmp(str, "strict", 6)) {
9f10e5bf 514 pr_info("Disable batched IOTLB flush\n");
5e0d2a6f 515 intel_iommu_strict = 1;
6dd9a7c7 516 } else if (!strncmp(str, "sp_off", 6)) {
9f10e5bf 517 pr_info("Disable supported super page\n");
6dd9a7c7 518 intel_iommu_superpage = 0;
c83b2f20
DW
519 } else if (!strncmp(str, "ecs_off", 7)) {
520 printk(KERN_INFO
521 "Intel-IOMMU: disable extended context table support\n");
522 intel_iommu_ecs = 0;
2db1581e
LB
523 } else if (!strncmp(str, "pasid28", 7)) {
524 printk(KERN_INFO
525 "Intel-IOMMU: enable pre-production PASID support\n");
526 intel_iommu_pasid28 = 1;
527 iommu_identity_mapping |= IDENTMAP_GFX;
bfd20f1c
SL
528 } else if (!strncmp(str, "tboot_noforce", 13)) {
529 printk(KERN_INFO
530 "Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n");
531 intel_iommu_tboot_noforce = 1;
ba395927
KA
532 }
533
534 str += strcspn(str, ",");
535 while (*str == ',')
536 str++;
537 }
538 return 0;
539}
540__setup("intel_iommu=", intel_iommu_setup);
541
542static struct kmem_cache *iommu_domain_cache;
543static struct kmem_cache *iommu_devinfo_cache;
ba395927 544
9452d5bf
JR
545static struct dmar_domain* get_iommu_domain(struct intel_iommu *iommu, u16 did)
546{
8bf47816
JR
547 struct dmar_domain **domains;
548 int idx = did >> 8;
549
550 domains = iommu->domains[idx];
551 if (!domains)
552 return NULL;
553
554 return domains[did & 0xff];
9452d5bf
JR
555}
556
557static void set_iommu_domain(struct intel_iommu *iommu, u16 did,
558 struct dmar_domain *domain)
559{
8bf47816
JR
560 struct dmar_domain **domains;
561 int idx = did >> 8;
562
563 if (!iommu->domains[idx]) {
564 size_t size = 256 * sizeof(struct dmar_domain *);
565 iommu->domains[idx] = kzalloc(size, GFP_ATOMIC);
566 }
567
568 domains = iommu->domains[idx];
569 if (WARN_ON(!domains))
570 return;
571 else
572 domains[did & 0xff] = domain;
9452d5bf
JR
573}
574
9ddbfb42 575void *alloc_pgtable_page(int node)
eb3fa7cb 576{
4c923d47
SS
577 struct page *page;
578 void *vaddr = NULL;
eb3fa7cb 579
4c923d47
SS
580 page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
581 if (page)
582 vaddr = page_address(page);
eb3fa7cb 583 return vaddr;
ba395927
KA
584}
585
9ddbfb42 586void free_pgtable_page(void *vaddr)
ba395927
KA
587{
588 free_page((unsigned long)vaddr);
589}
590
591static inline void *alloc_domain_mem(void)
592{
354bb65e 593 return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
ba395927
KA
594}
595
38717946 596static void free_domain_mem(void *vaddr)
ba395927
KA
597{
598 kmem_cache_free(iommu_domain_cache, vaddr);
599}
600
601static inline void * alloc_devinfo_mem(void)
602{
354bb65e 603 return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
ba395927
KA
604}
605
606static inline void free_devinfo_mem(void *vaddr)
607{
608 kmem_cache_free(iommu_devinfo_cache, vaddr);
609}
610
ab8dfe25
JL
611static inline int domain_type_is_vm(struct dmar_domain *domain)
612{
613 return domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE;
614}
615
28ccce0d
JR
616static inline int domain_type_is_si(struct dmar_domain *domain)
617{
618 return domain->flags & DOMAIN_FLAG_STATIC_IDENTITY;
619}
620
ab8dfe25
JL
621static inline int domain_type_is_vm_or_si(struct dmar_domain *domain)
622{
623 return domain->flags & (DOMAIN_FLAG_VIRTUAL_MACHINE |
624 DOMAIN_FLAG_STATIC_IDENTITY);
625}
1b573683 626
162d1b10
JL
627static inline int domain_pfn_supported(struct dmar_domain *domain,
628 unsigned long pfn)
629{
630 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
631
632 return !(addr_width < BITS_PER_LONG && pfn >> addr_width);
633}
634
4ed0d3e6 635static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
1b573683
WH
636{
637 unsigned long sagaw;
638 int agaw = -1;
639
640 sagaw = cap_sagaw(iommu->cap);
4ed0d3e6 641 for (agaw = width_to_agaw(max_gaw);
1b573683
WH
642 agaw >= 0; agaw--) {
643 if (test_bit(agaw, &sagaw))
644 break;
645 }
646
647 return agaw;
648}
649
4ed0d3e6
FY
650/*
651 * Calculate max SAGAW for each iommu.
652 */
653int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
654{
655 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
656}
657
658/*
659 * calculate agaw for each iommu.
660 * "SAGAW" may be different across iommus, use a default agaw, and
661 * get a supported less agaw for iommus that don't support the default agaw.
662 */
663int iommu_calculate_agaw(struct intel_iommu *iommu)
664{
665 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
666}
667
2c2e2c38 668/* This functionin only returns single iommu in a domain */
9ddbfb42 669struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
8c11e798
WH
670{
671 int iommu_id;
672
2c2e2c38 673 /* si_domain and vm domain should not get here. */
ab8dfe25 674 BUG_ON(domain_type_is_vm_or_si(domain));
29a27719
JR
675 for_each_domain_iommu(iommu_id, domain)
676 break;
677
8c11e798
WH
678 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
679 return NULL;
680
681 return g_iommus[iommu_id];
682}
683
8e604097
WH
684static void domain_update_iommu_coherency(struct dmar_domain *domain)
685{
d0501960
DW
686 struct dmar_drhd_unit *drhd;
687 struct intel_iommu *iommu;
2f119c78
QL
688 bool found = false;
689 int i;
2e12bc29 690
d0501960 691 domain->iommu_coherency = 1;
8e604097 692
29a27719 693 for_each_domain_iommu(i, domain) {
2f119c78 694 found = true;
8e604097
WH
695 if (!ecap_coherent(g_iommus[i]->ecap)) {
696 domain->iommu_coherency = 0;
697 break;
698 }
8e604097 699 }
d0501960
DW
700 if (found)
701 return;
702
703 /* No hardware attached; use lowest common denominator */
704 rcu_read_lock();
705 for_each_active_iommu(iommu, drhd) {
706 if (!ecap_coherent(iommu->ecap)) {
707 domain->iommu_coherency = 0;
708 break;
709 }
710 }
711 rcu_read_unlock();
8e604097
WH
712}
713
161f6934 714static int domain_update_iommu_snooping(struct intel_iommu *skip)
58c610bd 715{
161f6934
JL
716 struct dmar_drhd_unit *drhd;
717 struct intel_iommu *iommu;
718 int ret = 1;
58c610bd 719
161f6934
JL
720 rcu_read_lock();
721 for_each_active_iommu(iommu, drhd) {
722 if (iommu != skip) {
723 if (!ecap_sc_support(iommu->ecap)) {
724 ret = 0;
725 break;
726 }
58c610bd 727 }
58c610bd 728 }
161f6934
JL
729 rcu_read_unlock();
730
731 return ret;
58c610bd
SY
732}
733
161f6934 734static int domain_update_iommu_superpage(struct intel_iommu *skip)
6dd9a7c7 735{
8140a95d 736 struct dmar_drhd_unit *drhd;
161f6934 737 struct intel_iommu *iommu;
8140a95d 738 int mask = 0xf;
6dd9a7c7
YS
739
740 if (!intel_iommu_superpage) {
161f6934 741 return 0;
6dd9a7c7
YS
742 }
743
8140a95d 744 /* set iommu_superpage to the smallest common denominator */
0e242612 745 rcu_read_lock();
8140a95d 746 for_each_active_iommu(iommu, drhd) {
161f6934
JL
747 if (iommu != skip) {
748 mask &= cap_super_page_val(iommu->cap);
749 if (!mask)
750 break;
6dd9a7c7
YS
751 }
752 }
0e242612
JL
753 rcu_read_unlock();
754
161f6934 755 return fls(mask);
6dd9a7c7
YS
756}
757
58c610bd
SY
758/* Some capabilities may be different across iommus */
759static void domain_update_iommu_cap(struct dmar_domain *domain)
760{
761 domain_update_iommu_coherency(domain);
161f6934
JL
762 domain->iommu_snooping = domain_update_iommu_snooping(NULL);
763 domain->iommu_superpage = domain_update_iommu_superpage(NULL);
58c610bd
SY
764}
765
26b86092
SM
766struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
767 u8 devfn, int alloc)
03ecc32c
DW
768{
769 struct root_entry *root = &iommu->root_entry[bus];
770 struct context_entry *context;
771 u64 *entry;
772
4df4eab1 773 entry = &root->lo;
c83b2f20 774 if (ecs_enabled(iommu)) {
03ecc32c
DW
775 if (devfn >= 0x80) {
776 devfn -= 0x80;
777 entry = &root->hi;
778 }
779 devfn *= 2;
780 }
03ecc32c
DW
781 if (*entry & 1)
782 context = phys_to_virt(*entry & VTD_PAGE_MASK);
783 else {
784 unsigned long phy_addr;
785 if (!alloc)
786 return NULL;
787
788 context = alloc_pgtable_page(iommu->node);
789 if (!context)
790 return NULL;
791
792 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
793 phy_addr = virt_to_phys((void *)context);
794 *entry = phy_addr | 1;
795 __iommu_flush_cache(iommu, entry, sizeof(*entry));
796 }
797 return &context[devfn];
798}
799
4ed6a540
DW
800static int iommu_dummy(struct device *dev)
801{
802 return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
803}
804
156baca8 805static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
c7151a8d
WH
806{
807 struct dmar_drhd_unit *drhd = NULL;
b683b230 808 struct intel_iommu *iommu;
156baca8
DW
809 struct device *tmp;
810 struct pci_dev *ptmp, *pdev = NULL;
aa4d066a 811 u16 segment = 0;
c7151a8d
WH
812 int i;
813
4ed6a540
DW
814 if (iommu_dummy(dev))
815 return NULL;
816
156baca8 817 if (dev_is_pci(dev)) {
1c387188
AR
818 struct pci_dev *pf_pdev;
819
156baca8 820 pdev = to_pci_dev(dev);
5823e330
JD
821
822#ifdef CONFIG_X86
823 /* VMD child devices currently cannot be handled individually */
824 if (is_vmd(pdev->bus))
825 return NULL;
826#endif
827
1c387188
AR
828 /* VFs aren't listed in scope tables; we need to look up
829 * the PF instead to find the IOMMU. */
830 pf_pdev = pci_physfn(pdev);
831 dev = &pf_pdev->dev;
156baca8 832 segment = pci_domain_nr(pdev->bus);
ca5b74d2 833 } else if (has_acpi_companion(dev))
156baca8
DW
834 dev = &ACPI_COMPANION(dev)->dev;
835
0e242612 836 rcu_read_lock();
b683b230 837 for_each_active_iommu(iommu, drhd) {
156baca8 838 if (pdev && segment != drhd->segment)
276dbf99 839 continue;
c7151a8d 840
b683b230 841 for_each_active_dev_scope(drhd->devices,
156baca8
DW
842 drhd->devices_cnt, i, tmp) {
843 if (tmp == dev) {
1c387188
AR
844 /* For a VF use its original BDF# not that of the PF
845 * which we used for the IOMMU lookup. Strictly speaking
846 * we could do this for all PCI devices; we only need to
847 * get the BDF# from the scope table for ACPI matches. */
5003ae1e 848 if (pdev && pdev->is_virtfn)
1c387188
AR
849 goto got_pdev;
850
156baca8
DW
851 *bus = drhd->devices[i].bus;
852 *devfn = drhd->devices[i].devfn;
b683b230 853 goto out;
156baca8
DW
854 }
855
856 if (!pdev || !dev_is_pci(tmp))
857 continue;
858
859 ptmp = to_pci_dev(tmp);
860 if (ptmp->subordinate &&
861 ptmp->subordinate->number <= pdev->bus->number &&
862 ptmp->subordinate->busn_res.end >= pdev->bus->number)
863 goto got_pdev;
924b6231 864 }
c7151a8d 865
156baca8
DW
866 if (pdev && drhd->include_all) {
867 got_pdev:
868 *bus = pdev->bus->number;
869 *devfn = pdev->devfn;
b683b230 870 goto out;
156baca8 871 }
c7151a8d 872 }
b683b230 873 iommu = NULL;
156baca8 874 out:
0e242612 875 rcu_read_unlock();
c7151a8d 876
b683b230 877 return iommu;
c7151a8d
WH
878}
879
5331fe6f
WH
880static void domain_flush_cache(struct dmar_domain *domain,
881 void *addr, int size)
882{
883 if (!domain->iommu_coherency)
884 clflush_cache_range(addr, size);
885}
886
ba395927
KA
887static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
888{
ba395927 889 struct context_entry *context;
03ecc32c 890 int ret = 0;
ba395927
KA
891 unsigned long flags;
892
893 spin_lock_irqsave(&iommu->lock, flags);
03ecc32c
DW
894 context = iommu_context_addr(iommu, bus, devfn, 0);
895 if (context)
896 ret = context_present(context);
ba395927
KA
897 spin_unlock_irqrestore(&iommu->lock, flags);
898 return ret;
899}
900
ba395927
KA
901static void free_context_table(struct intel_iommu *iommu)
902{
ba395927
KA
903 int i;
904 unsigned long flags;
905 struct context_entry *context;
906
907 spin_lock_irqsave(&iommu->lock, flags);
908 if (!iommu->root_entry) {
909 goto out;
910 }
911 for (i = 0; i < ROOT_ENTRY_NR; i++) {
03ecc32c 912 context = iommu_context_addr(iommu, i, 0, 0);
ba395927
KA
913 if (context)
914 free_pgtable_page(context);
03ecc32c 915
c83b2f20 916 if (!ecs_enabled(iommu))
03ecc32c
DW
917 continue;
918
919 context = iommu_context_addr(iommu, i, 0x80, 0);
920 if (context)
921 free_pgtable_page(context);
922
ba395927
KA
923 }
924 free_pgtable_page(iommu->root_entry);
925 iommu->root_entry = NULL;
926out:
927 spin_unlock_irqrestore(&iommu->lock, flags);
928}
929
b026fd28 930static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
5cf0a76f 931 unsigned long pfn, int *target_level)
ba395927 932{
ba395927
KA
933 struct dma_pte *parent, *pte = NULL;
934 int level = agaw_to_level(domain->agaw);
4399c8bf 935 int offset;
ba395927
KA
936
937 BUG_ON(!domain->pgd);
f9423606 938
162d1b10 939 if (!domain_pfn_supported(domain, pfn))
f9423606
JS
940 /* Address beyond IOMMU's addressing capabilities. */
941 return NULL;
942
ba395927
KA
943 parent = domain->pgd;
944
5cf0a76f 945 while (1) {
ba395927
KA
946 void *tmp_page;
947
b026fd28 948 offset = pfn_level_offset(pfn, level);
ba395927 949 pte = &parent[offset];
5cf0a76f 950 if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
6dd9a7c7 951 break;
5cf0a76f 952 if (level == *target_level)
ba395927
KA
953 break;
954
19c239ce 955 if (!dma_pte_present(pte)) {
c85994e4
DW
956 uint64_t pteval;
957
4c923d47 958 tmp_page = alloc_pgtable_page(domain->nid);
ba395927 959
206a73c1 960 if (!tmp_page)
ba395927 961 return NULL;
206a73c1 962
c85994e4 963 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
64de5af0 964 pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
effad4b5 965 if (cmpxchg64(&pte->val, 0ULL, pteval))
c85994e4
DW
966 /* Someone else set it while we were thinking; use theirs. */
967 free_pgtable_page(tmp_page);
effad4b5 968 else
c85994e4 969 domain_flush_cache(domain, pte, sizeof(*pte));
ba395927 970 }
5cf0a76f
DW
971 if (level == 1)
972 break;
973
19c239ce 974 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
975 level--;
976 }
977
5cf0a76f
DW
978 if (!*target_level)
979 *target_level = level;
980
ba395927
KA
981 return pte;
982}
983
6dd9a7c7 984
ba395927 985/* return address's pte at specific level */
90dcfb5e
DW
986static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
987 unsigned long pfn,
6dd9a7c7 988 int level, int *large_page)
ba395927
KA
989{
990 struct dma_pte *parent, *pte = NULL;
991 int total = agaw_to_level(domain->agaw);
992 int offset;
993
994 parent = domain->pgd;
995 while (level <= total) {
90dcfb5e 996 offset = pfn_level_offset(pfn, total);
ba395927
KA
997 pte = &parent[offset];
998 if (level == total)
999 return pte;
1000
6dd9a7c7
YS
1001 if (!dma_pte_present(pte)) {
1002 *large_page = total;
ba395927 1003 break;
6dd9a7c7
YS
1004 }
1005
e16922af 1006 if (dma_pte_superpage(pte)) {
6dd9a7c7
YS
1007 *large_page = total;
1008 return pte;
1009 }
1010
19c239ce 1011 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
1012 total--;
1013 }
1014 return NULL;
1015}
1016
ba395927 1017/* clear last level pte, a tlb flush should be followed */
5cf0a76f 1018static void dma_pte_clear_range(struct dmar_domain *domain,
595badf5
DW
1019 unsigned long start_pfn,
1020 unsigned long last_pfn)
ba395927 1021{
6dd9a7c7 1022 unsigned int large_page = 1;
310a5ab9 1023 struct dma_pte *first_pte, *pte;
66eae846 1024
162d1b10
JL
1025 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1026 BUG_ON(!domain_pfn_supported(domain, last_pfn));
59c36286 1027 BUG_ON(start_pfn > last_pfn);
ba395927 1028
04b18e65 1029 /* we don't need lock here; nobody else touches the iova range */
59c36286 1030 do {
6dd9a7c7
YS
1031 large_page = 1;
1032 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
310a5ab9 1033 if (!pte) {
6dd9a7c7 1034 start_pfn = align_to_level(start_pfn + 1, large_page + 1);
310a5ab9
DW
1035 continue;
1036 }
6dd9a7c7 1037 do {
310a5ab9 1038 dma_clear_pte(pte);
6dd9a7c7 1039 start_pfn += lvl_to_nr_pages(large_page);
310a5ab9 1040 pte++;
75e6bf96
DW
1041 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
1042
310a5ab9
DW
1043 domain_flush_cache(domain, first_pte,
1044 (void *)pte - (void *)first_pte);
59c36286
DW
1045
1046 } while (start_pfn && start_pfn <= last_pfn);
ba395927
KA
1047}
1048
3269ee0b 1049static void dma_pte_free_level(struct dmar_domain *domain, int level,
bc24c571
DD
1050 int retain_level, struct dma_pte *pte,
1051 unsigned long pfn, unsigned long start_pfn,
1052 unsigned long last_pfn)
3269ee0b
AW
1053{
1054 pfn = max(start_pfn, pfn);
1055 pte = &pte[pfn_level_offset(pfn, level)];
1056
1057 do {
1058 unsigned long level_pfn;
1059 struct dma_pte *level_pte;
1060
1061 if (!dma_pte_present(pte) || dma_pte_superpage(pte))
1062 goto next;
1063
f7116e11 1064 level_pfn = pfn & level_mask(level);
3269ee0b
AW
1065 level_pte = phys_to_virt(dma_pte_addr(pte));
1066
bc24c571
DD
1067 if (level > 2) {
1068 dma_pte_free_level(domain, level - 1, retain_level,
1069 level_pte, level_pfn, start_pfn,
1070 last_pfn);
1071 }
3269ee0b 1072
bc24c571
DD
1073 /*
1074 * Free the page table if we're below the level we want to
1075 * retain and the range covers the entire table.
1076 */
1077 if (level < retain_level && !(start_pfn > level_pfn ||
08336fd2 1078 last_pfn < level_pfn + level_size(level) - 1)) {
3269ee0b
AW
1079 dma_clear_pte(pte);
1080 domain_flush_cache(domain, pte, sizeof(*pte));
1081 free_pgtable_page(level_pte);
1082 }
1083next:
1084 pfn += level_size(level);
1085 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1086}
1087
bc24c571
DD
1088/*
1089 * clear last level (leaf) ptes and free page table pages below the
1090 * level we wish to keep intact.
1091 */
ba395927 1092static void dma_pte_free_pagetable(struct dmar_domain *domain,
d794dc9b 1093 unsigned long start_pfn,
bc24c571
DD
1094 unsigned long last_pfn,
1095 int retain_level)
ba395927 1096{
162d1b10
JL
1097 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1098 BUG_ON(!domain_pfn_supported(domain, last_pfn));
59c36286 1099 BUG_ON(start_pfn > last_pfn);
ba395927 1100
d41a4adb
JL
1101 dma_pte_clear_range(domain, start_pfn, last_pfn);
1102
f3a0a52f 1103 /* We don't need lock here; nobody else touches the iova range */
bc24c571 1104 dma_pte_free_level(domain, agaw_to_level(domain->agaw), retain_level,
3269ee0b 1105 domain->pgd, 0, start_pfn, last_pfn);
6660c63a 1106
ba395927 1107 /* free pgd */
d794dc9b 1108 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
ba395927
KA
1109 free_pgtable_page(domain->pgd);
1110 domain->pgd = NULL;
1111 }
1112}
1113
ea8ea460
DW
1114/* When a page at a given level is being unlinked from its parent, we don't
1115 need to *modify* it at all. All we need to do is make a list of all the
1116 pages which can be freed just as soon as we've flushed the IOTLB and we
1117 know the hardware page-walk will no longer touch them.
1118 The 'pte' argument is the *parent* PTE, pointing to the page that is to
1119 be freed. */
1120static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
1121 int level, struct dma_pte *pte,
1122 struct page *freelist)
1123{
1124 struct page *pg;
1125
1126 pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
1127 pg->freelist = freelist;
1128 freelist = pg;
1129
1130 if (level == 1)
1131 return freelist;
1132
adeb2590
JL
1133 pte = page_address(pg);
1134 do {
ea8ea460
DW
1135 if (dma_pte_present(pte) && !dma_pte_superpage(pte))
1136 freelist = dma_pte_list_pagetables(domain, level - 1,
1137 pte, freelist);
adeb2590
JL
1138 pte++;
1139 } while (!first_pte_in_page(pte));
ea8ea460
DW
1140
1141 return freelist;
1142}
1143
1144static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
1145 struct dma_pte *pte, unsigned long pfn,
1146 unsigned long start_pfn,
1147 unsigned long last_pfn,
1148 struct page *freelist)
1149{
1150 struct dma_pte *first_pte = NULL, *last_pte = NULL;
1151
1152 pfn = max(start_pfn, pfn);
1153 pte = &pte[pfn_level_offset(pfn, level)];
1154
1155 do {
1156 unsigned long level_pfn;
1157
1158 if (!dma_pte_present(pte))
1159 goto next;
1160
1161 level_pfn = pfn & level_mask(level);
1162
1163 /* If range covers entire pagetable, free it */
1164 if (start_pfn <= level_pfn &&
1165 last_pfn >= level_pfn + level_size(level) - 1) {
1166 /* These suborbinate page tables are going away entirely. Don't
1167 bother to clear them; we're just going to *free* them. */
1168 if (level > 1 && !dma_pte_superpage(pte))
1169 freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1170
1171 dma_clear_pte(pte);
1172 if (!first_pte)
1173 first_pte = pte;
1174 last_pte = pte;
1175 } else if (level > 1) {
1176 /* Recurse down into a level that isn't *entirely* obsolete */
1177 freelist = dma_pte_clear_level(domain, level - 1,
1178 phys_to_virt(dma_pte_addr(pte)),
1179 level_pfn, start_pfn, last_pfn,
1180 freelist);
1181 }
1182next:
1183 pfn += level_size(level);
1184 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1185
1186 if (first_pte)
1187 domain_flush_cache(domain, first_pte,
1188 (void *)++last_pte - (void *)first_pte);
1189
1190 return freelist;
1191}
1192
1193/* We can't just free the pages because the IOMMU may still be walking
1194 the page tables, and may have cached the intermediate levels. The
1195 pages can only be freed after the IOTLB flush has been done. */
b690420a
JR
1196static struct page *domain_unmap(struct dmar_domain *domain,
1197 unsigned long start_pfn,
1198 unsigned long last_pfn)
ea8ea460 1199{
ea8ea460
DW
1200 struct page *freelist = NULL;
1201
162d1b10
JL
1202 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1203 BUG_ON(!domain_pfn_supported(domain, last_pfn));
ea8ea460
DW
1204 BUG_ON(start_pfn > last_pfn);
1205
1206 /* we don't need lock here; nobody else touches the iova range */
1207 freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
1208 domain->pgd, 0, start_pfn, last_pfn, NULL);
1209
1210 /* free pgd */
1211 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1212 struct page *pgd_page = virt_to_page(domain->pgd);
1213 pgd_page->freelist = freelist;
1214 freelist = pgd_page;
1215
1216 domain->pgd = NULL;
1217 }
1218
1219 return freelist;
1220}
1221
b690420a 1222static void dma_free_pagelist(struct page *freelist)
ea8ea460
DW
1223{
1224 struct page *pg;
1225
1226 while ((pg = freelist)) {
1227 freelist = pg->freelist;
1228 free_pgtable_page(page_address(pg));
1229 }
1230}
1231
13cf0174
JR
1232static void iova_entry_free(unsigned long data)
1233{
1234 struct page *freelist = (struct page *)data;
1235
1236 dma_free_pagelist(freelist);
1237}
1238
ba395927
KA
1239/* iommu handling */
1240static int iommu_alloc_root_entry(struct intel_iommu *iommu)
1241{
1242 struct root_entry *root;
1243 unsigned long flags;
1244
4c923d47 1245 root = (struct root_entry *)alloc_pgtable_page(iommu->node);
ffebeb46 1246 if (!root) {
9f10e5bf 1247 pr_err("Allocating root entry for %s failed\n",
ffebeb46 1248 iommu->name);
ba395927 1249 return -ENOMEM;
ffebeb46 1250 }
ba395927 1251
5b6985ce 1252 __iommu_flush_cache(iommu, root, ROOT_SIZE);
ba395927
KA
1253
1254 spin_lock_irqsave(&iommu->lock, flags);
1255 iommu->root_entry = root;
1256 spin_unlock_irqrestore(&iommu->lock, flags);
1257
1258 return 0;
1259}
1260
ba395927
KA
1261static void iommu_set_root_entry(struct intel_iommu *iommu)
1262{
03ecc32c 1263 u64 addr;
c416daa9 1264 u32 sts;
ba395927
KA
1265 unsigned long flag;
1266
03ecc32c 1267 addr = virt_to_phys(iommu->root_entry);
c83b2f20 1268 if (ecs_enabled(iommu))
03ecc32c 1269 addr |= DMA_RTADDR_RTT;
ba395927 1270
1f5b3c3f 1271 raw_spin_lock_irqsave(&iommu->register_lock, flag);
03ecc32c 1272 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr);
ba395927 1273
c416daa9 1274 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1275
1276 /* Make sure hardware complete it */
1277 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1278 readl, (sts & DMA_GSTS_RTPS), sts);
ba395927 1279
1f5b3c3f 1280 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1281}
1282
1283static void iommu_flush_write_buffer(struct intel_iommu *iommu)
1284{
1285 u32 val;
1286 unsigned long flag;
1287
9af88143 1288 if (!rwbf_quirk && !cap_rwbf(iommu->cap))
ba395927 1289 return;
ba395927 1290
1f5b3c3f 1291 raw_spin_lock_irqsave(&iommu->register_lock, flag);
462b60f6 1292 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1293
1294 /* Make sure hardware complete it */
1295 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1296 readl, (!(val & DMA_GSTS_WBFS)), val);
ba395927 1297
1f5b3c3f 1298 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1299}
1300
1301/* return value determine if we need a write buffer flush */
4c25a2c1
DW
1302static void __iommu_flush_context(struct intel_iommu *iommu,
1303 u16 did, u16 source_id, u8 function_mask,
1304 u64 type)
ba395927
KA
1305{
1306 u64 val = 0;
1307 unsigned long flag;
1308
ba395927
KA
1309 switch (type) {
1310 case DMA_CCMD_GLOBAL_INVL:
1311 val = DMA_CCMD_GLOBAL_INVL;
1312 break;
1313 case DMA_CCMD_DOMAIN_INVL:
1314 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1315 break;
1316 case DMA_CCMD_DEVICE_INVL:
1317 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1318 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1319 break;
1320 default:
1321 BUG();
1322 }
1323 val |= DMA_CCMD_ICC;
1324
1f5b3c3f 1325 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1326 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1327
1328 /* Make sure hardware complete it */
1329 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1330 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1331
1f5b3c3f 1332 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1333}
1334
ba395927 1335/* return value determine if we need a write buffer flush */
1f0ef2aa
DW
1336static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1337 u64 addr, unsigned int size_order, u64 type)
ba395927
KA
1338{
1339 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1340 u64 val = 0, val_iva = 0;
1341 unsigned long flag;
1342
ba395927
KA
1343 switch (type) {
1344 case DMA_TLB_GLOBAL_FLUSH:
1345 /* global flush doesn't need set IVA_REG */
1346 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1347 break;
1348 case DMA_TLB_DSI_FLUSH:
1349 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1350 break;
1351 case DMA_TLB_PSI_FLUSH:
1352 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
ea8ea460 1353 /* IH bit is passed in as part of address */
ba395927
KA
1354 val_iva = size_order | addr;
1355 break;
1356 default:
1357 BUG();
1358 }
1359 /* Note: set drain read/write */
1360#if 0
1361 /*
1362 * This is probably to be super secure.. Looks like we can
1363 * ignore it without any impact.
1364 */
1365 if (cap_read_drain(iommu->cap))
1366 val |= DMA_TLB_READ_DRAIN;
1367#endif
1368 if (cap_write_drain(iommu->cap))
1369 val |= DMA_TLB_WRITE_DRAIN;
1370
1f5b3c3f 1371 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1372 /* Note: Only uses first TLB reg currently */
1373 if (val_iva)
1374 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1375 dmar_writeq(iommu->reg + tlb_offset + 8, val);
1376
1377 /* Make sure hardware complete it */
1378 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1379 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1380
1f5b3c3f 1381 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1382
1383 /* check IOTLB invalidation granularity */
1384 if (DMA_TLB_IAIG(val) == 0)
9f10e5bf 1385 pr_err("Flush IOTLB failed\n");
ba395927 1386 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
9f10e5bf 1387 pr_debug("TLB flush request %Lx, actual %Lx\n",
5b6985ce
FY
1388 (unsigned long long)DMA_TLB_IIRG(type),
1389 (unsigned long long)DMA_TLB_IAIG(val));
ba395927
KA
1390}
1391
64ae892b
DW
1392static struct device_domain_info *
1393iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
1394 u8 bus, u8 devfn)
93a23a72 1395{
93a23a72 1396 struct device_domain_info *info;
93a23a72 1397
55d94043
JR
1398 assert_spin_locked(&device_domain_lock);
1399
93a23a72
YZ
1400 if (!iommu->qi)
1401 return NULL;
1402
93a23a72 1403 list_for_each_entry(info, &domain->devices, link)
c3b497c6
JL
1404 if (info->iommu == iommu && info->bus == bus &&
1405 info->devfn == devfn) {
b16d0cb9
DW
1406 if (info->ats_supported && info->dev)
1407 return info;
93a23a72
YZ
1408 break;
1409 }
93a23a72 1410
b16d0cb9 1411 return NULL;
93a23a72
YZ
1412}
1413
0824c592
OP
1414static void domain_update_iotlb(struct dmar_domain *domain)
1415{
1416 struct device_domain_info *info;
1417 bool has_iotlb_device = false;
1418
1419 assert_spin_locked(&device_domain_lock);
1420
1421 list_for_each_entry(info, &domain->devices, link) {
1422 struct pci_dev *pdev;
1423
1424 if (!info->dev || !dev_is_pci(info->dev))
1425 continue;
1426
1427 pdev = to_pci_dev(info->dev);
1428 if (pdev->ats_enabled) {
1429 has_iotlb_device = true;
1430 break;
1431 }
1432 }
1433
1434 domain->has_iotlb_device = has_iotlb_device;
1435}
1436
93a23a72 1437static void iommu_enable_dev_iotlb(struct device_domain_info *info)
ba395927 1438{
fb0cc3aa
BH
1439 struct pci_dev *pdev;
1440
0824c592
OP
1441 assert_spin_locked(&device_domain_lock);
1442
0bcb3e28 1443 if (!info || !dev_is_pci(info->dev))
93a23a72
YZ
1444 return;
1445
fb0cc3aa 1446 pdev = to_pci_dev(info->dev);
1c48db44
JP
1447 /* For IOMMU that supports device IOTLB throttling (DIT), we assign
1448 * PFSID to the invalidation desc of a VF such that IOMMU HW can gauge
1449 * queue depth at PF level. If DIT is not set, PFSID will be treated as
1450 * reserved, which should be set to 0.
1451 */
1452 if (!ecap_dit(info->iommu->ecap))
1453 info->pfsid = 0;
1454 else {
1455 struct pci_dev *pf_pdev;
1456
1457 /* pdev will be returned if device is not a vf */
1458 pf_pdev = pci_physfn(pdev);
1459 info->pfsid = PCI_DEVID(pf_pdev->bus->number, pf_pdev->devfn);
1460 }
fb0cc3aa 1461
b16d0cb9
DW
1462#ifdef CONFIG_INTEL_IOMMU_SVM
1463 /* The PCIe spec, in its wisdom, declares that the behaviour of
1464 the device if you enable PASID support after ATS support is
1465 undefined. So always enable PASID support on devices which
1466 have it, even if we can't yet know if we're ever going to
1467 use it. */
1468 if (info->pasid_supported && !pci_enable_pasid(pdev, info->pasid_supported & ~1))
1469 info->pasid_enabled = 1;
1470
1471 if (info->pri_supported && !pci_reset_pri(pdev) && !pci_enable_pri(pdev, 32))
1472 info->pri_enabled = 1;
1473#endif
1474 if (info->ats_supported && !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) {
1475 info->ats_enabled = 1;
0824c592 1476 domain_update_iotlb(info->domain);
b16d0cb9
DW
1477 info->ats_qdep = pci_ats_queue_depth(pdev);
1478 }
93a23a72
YZ
1479}
1480
1481static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1482{
b16d0cb9
DW
1483 struct pci_dev *pdev;
1484
0824c592
OP
1485 assert_spin_locked(&device_domain_lock);
1486
da972fb1 1487 if (!dev_is_pci(info->dev))
93a23a72
YZ
1488 return;
1489
b16d0cb9
DW
1490 pdev = to_pci_dev(info->dev);
1491
1492 if (info->ats_enabled) {
1493 pci_disable_ats(pdev);
1494 info->ats_enabled = 0;
0824c592 1495 domain_update_iotlb(info->domain);
b16d0cb9
DW
1496 }
1497#ifdef CONFIG_INTEL_IOMMU_SVM
1498 if (info->pri_enabled) {
1499 pci_disable_pri(pdev);
1500 info->pri_enabled = 0;
1501 }
1502 if (info->pasid_enabled) {
1503 pci_disable_pasid(pdev);
1504 info->pasid_enabled = 0;
1505 }
1506#endif
93a23a72
YZ
1507}
1508
1509static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1510 u64 addr, unsigned mask)
1511{
1512 u16 sid, qdep;
1513 unsigned long flags;
1514 struct device_domain_info *info;
1515
0824c592
OP
1516 if (!domain->has_iotlb_device)
1517 return;
1518
93a23a72
YZ
1519 spin_lock_irqsave(&device_domain_lock, flags);
1520 list_for_each_entry(info, &domain->devices, link) {
b16d0cb9 1521 if (!info->ats_enabled)
93a23a72
YZ
1522 continue;
1523
1524 sid = info->bus << 8 | info->devfn;
b16d0cb9 1525 qdep = info->ats_qdep;
1c48db44
JP
1526 qi_flush_dev_iotlb(info->iommu, sid, info->pfsid,
1527 qdep, addr, mask);
93a23a72
YZ
1528 }
1529 spin_unlock_irqrestore(&device_domain_lock, flags);
1530}
1531
a1ddcbe9
JR
1532static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
1533 struct dmar_domain *domain,
1534 unsigned long pfn, unsigned int pages,
1535 int ih, int map)
ba395927 1536{
9dd2fe89 1537 unsigned int mask = ilog2(__roundup_pow_of_two(pages));
03d6a246 1538 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
a1ddcbe9 1539 u16 did = domain->iommu_did[iommu->seq_id];
ba395927 1540
ba395927
KA
1541 BUG_ON(pages == 0);
1542
ea8ea460
DW
1543 if (ih)
1544 ih = 1 << 6;
ba395927 1545 /*
9dd2fe89
YZ
1546 * Fallback to domain selective flush if no PSI support or the size is
1547 * too big.
ba395927
KA
1548 * PSI requires page size to be 2 ^ x, and the base address is naturally
1549 * aligned to the size
1550 */
9dd2fe89
YZ
1551 if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1552 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1f0ef2aa 1553 DMA_TLB_DSI_FLUSH);
9dd2fe89 1554 else
ea8ea460 1555 iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
9dd2fe89 1556 DMA_TLB_PSI_FLUSH);
bf92df30
YZ
1557
1558 /*
82653633
NA
1559 * In caching mode, changes of pages from non-present to present require
1560 * flush. However, device IOTLB doesn't need to be flushed in this case.
bf92df30 1561 */
82653633 1562 if (!cap_caching_mode(iommu->cap) || !map)
9d2e6505 1563 iommu_flush_dev_iotlb(domain, addr, mask);
ba395927
KA
1564}
1565
eed91a0b
PX
1566/* Notification for newly created mappings */
1567static inline void __mapping_notify_one(struct intel_iommu *iommu,
1568 struct dmar_domain *domain,
1569 unsigned long pfn, unsigned int pages)
1570{
1571 /* It's a non-present to present mapping. Only flush if caching mode */
1572 if (cap_caching_mode(iommu->cap))
1573 iommu_flush_iotlb_psi(iommu, domain, pfn, pages, 0, 1);
1574 else
1575 iommu_flush_write_buffer(iommu);
1576}
1577
13cf0174
JR
1578static void iommu_flush_iova(struct iova_domain *iovad)
1579{
1580 struct dmar_domain *domain;
1581 int idx;
1582
1583 domain = container_of(iovad, struct dmar_domain, iovad);
1584
1585 for_each_domain_iommu(idx, domain) {
1586 struct intel_iommu *iommu = g_iommus[idx];
1587 u16 did = domain->iommu_did[iommu->seq_id];
1588
1589 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
1590
1591 if (!cap_caching_mode(iommu->cap))
1592 iommu_flush_dev_iotlb(get_iommu_domain(iommu, did),
1593 0, MAX_AGAW_PFN_WIDTH);
1594 }
1595}
1596
f8bab735 1597static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1598{
1599 u32 pmen;
1600 unsigned long flags;
1601
1f5b3c3f 1602 raw_spin_lock_irqsave(&iommu->register_lock, flags);
f8bab735 1603 pmen = readl(iommu->reg + DMAR_PMEN_REG);
1604 pmen &= ~DMA_PMEN_EPM;
1605 writel(pmen, iommu->reg + DMAR_PMEN_REG);
1606
1607 /* wait for the protected region status bit to clear */
1608 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1609 readl, !(pmen & DMA_PMEN_PRS), pmen);
1610
1f5b3c3f 1611 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
f8bab735 1612}
1613
2a41ccee 1614static void iommu_enable_translation(struct intel_iommu *iommu)
ba395927
KA
1615{
1616 u32 sts;
1617 unsigned long flags;
1618
1f5b3c3f 1619 raw_spin_lock_irqsave(&iommu->register_lock, flags);
c416daa9
DW
1620 iommu->gcmd |= DMA_GCMD_TE;
1621 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1622
1623 /* Make sure hardware complete it */
1624 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1625 readl, (sts & DMA_GSTS_TES), sts);
ba395927 1626
1f5b3c3f 1627 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
ba395927
KA
1628}
1629
2a41ccee 1630static void iommu_disable_translation(struct intel_iommu *iommu)
ba395927
KA
1631{
1632 u32 sts;
1633 unsigned long flag;
1634
1f5b3c3f 1635 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1636 iommu->gcmd &= ~DMA_GCMD_TE;
1637 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1638
1639 /* Make sure hardware complete it */
1640 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1641 readl, (!(sts & DMA_GSTS_TES)), sts);
ba395927 1642
1f5b3c3f 1643 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1644}
1645
3460a6d9 1646
ba395927
KA
1647static int iommu_init_domains(struct intel_iommu *iommu)
1648{
8bf47816
JR
1649 u32 ndomains, nlongs;
1650 size_t size;
ba395927
KA
1651
1652 ndomains = cap_ndoms(iommu->cap);
8bf47816 1653 pr_debug("%s: Number of Domains supported <%d>\n",
9f10e5bf 1654 iommu->name, ndomains);
ba395927
KA
1655 nlongs = BITS_TO_LONGS(ndomains);
1656
94a91b50
DD
1657 spin_lock_init(&iommu->lock);
1658
ba395927
KA
1659 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1660 if (!iommu->domain_ids) {
9f10e5bf
JR
1661 pr_err("%s: Allocating domain id array failed\n",
1662 iommu->name);
ba395927
KA
1663 return -ENOMEM;
1664 }
8bf47816 1665
86f004c7 1666 size = (ALIGN(ndomains, 256) >> 8) * sizeof(struct dmar_domain **);
8bf47816
JR
1667 iommu->domains = kzalloc(size, GFP_KERNEL);
1668
1669 if (iommu->domains) {
1670 size = 256 * sizeof(struct dmar_domain *);
1671 iommu->domains[0] = kzalloc(size, GFP_KERNEL);
1672 }
1673
1674 if (!iommu->domains || !iommu->domains[0]) {
9f10e5bf
JR
1675 pr_err("%s: Allocating domain array failed\n",
1676 iommu->name);
852bdb04 1677 kfree(iommu->domain_ids);
8bf47816 1678 kfree(iommu->domains);
852bdb04 1679 iommu->domain_ids = NULL;
8bf47816 1680 iommu->domains = NULL;
ba395927
KA
1681 return -ENOMEM;
1682 }
1683
8bf47816
JR
1684
1685
ba395927 1686 /*
c0e8a6c8
JR
1687 * If Caching mode is set, then invalid translations are tagged
1688 * with domain-id 0, hence we need to pre-allocate it. We also
1689 * use domain-id 0 as a marker for non-allocated domain-id, so
1690 * make sure it is not used for a real domain.
ba395927 1691 */
c0e8a6c8
JR
1692 set_bit(0, iommu->domain_ids);
1693
ba395927
KA
1694 return 0;
1695}
ba395927 1696
ffebeb46 1697static void disable_dmar_iommu(struct intel_iommu *iommu)
ba395927 1698{
29a27719 1699 struct device_domain_info *info, *tmp;
55d94043 1700 unsigned long flags;
ba395927 1701
29a27719
JR
1702 if (!iommu->domains || !iommu->domain_ids)
1703 return;
a4eaa86c 1704
bea64033 1705again:
55d94043 1706 spin_lock_irqsave(&device_domain_lock, flags);
29a27719
JR
1707 list_for_each_entry_safe(info, tmp, &device_domain_list, global) {
1708 struct dmar_domain *domain;
1709
1710 if (info->iommu != iommu)
1711 continue;
1712
1713 if (!info->dev || !info->domain)
1714 continue;
1715
1716 domain = info->domain;
1717
bea64033 1718 __dmar_remove_one_dev_info(info);
29a27719 1719
bea64033
JR
1720 if (!domain_type_is_vm_or_si(domain)) {
1721 /*
1722 * The domain_exit() function can't be called under
1723 * device_domain_lock, as it takes this lock itself.
1724 * So release the lock here and re-run the loop
1725 * afterwards.
1726 */
1727 spin_unlock_irqrestore(&device_domain_lock, flags);
29a27719 1728 domain_exit(domain);
bea64033
JR
1729 goto again;
1730 }
ba395927 1731 }
55d94043 1732 spin_unlock_irqrestore(&device_domain_lock, flags);
ba395927
KA
1733
1734 if (iommu->gcmd & DMA_GCMD_TE)
1735 iommu_disable_translation(iommu);
ffebeb46 1736}
ba395927 1737
ffebeb46
JL
1738static void free_dmar_iommu(struct intel_iommu *iommu)
1739{
1740 if ((iommu->domains) && (iommu->domain_ids)) {
86f004c7 1741 int elems = ALIGN(cap_ndoms(iommu->cap), 256) >> 8;
8bf47816
JR
1742 int i;
1743
1744 for (i = 0; i < elems; i++)
1745 kfree(iommu->domains[i]);
ffebeb46
JL
1746 kfree(iommu->domains);
1747 kfree(iommu->domain_ids);
1748 iommu->domains = NULL;
1749 iommu->domain_ids = NULL;
1750 }
ba395927 1751
d9630fe9
WH
1752 g_iommus[iommu->seq_id] = NULL;
1753
ba395927
KA
1754 /* free context mapping */
1755 free_context_table(iommu);
8a94ade4
DW
1756
1757#ifdef CONFIG_INTEL_IOMMU_SVM
a222a7f0
DW
1758 if (pasid_enabled(iommu)) {
1759 if (ecap_prs(iommu->ecap))
1760 intel_svm_finish_prq(iommu);
d9737953 1761 intel_svm_exit(iommu);
a222a7f0 1762 }
8a94ade4 1763#endif
ba395927
KA
1764}
1765
ab8dfe25 1766static struct dmar_domain *alloc_domain(int flags)
ba395927 1767{
ba395927 1768 struct dmar_domain *domain;
ba395927
KA
1769
1770 domain = alloc_domain_mem();
1771 if (!domain)
1772 return NULL;
1773
ab8dfe25 1774 memset(domain, 0, sizeof(*domain));
4c923d47 1775 domain->nid = -1;
ab8dfe25 1776 domain->flags = flags;
0824c592 1777 domain->has_iotlb_device = false;
92d03cc8 1778 INIT_LIST_HEAD(&domain->devices);
2c2e2c38
FY
1779
1780 return domain;
1781}
1782
d160aca5
JR
1783/* Must be called with iommu->lock */
1784static int domain_attach_iommu(struct dmar_domain *domain,
fb170fb4
JL
1785 struct intel_iommu *iommu)
1786{
44bde614 1787 unsigned long ndomains;
55d94043 1788 int num;
44bde614 1789
55d94043 1790 assert_spin_locked(&device_domain_lock);
d160aca5 1791 assert_spin_locked(&iommu->lock);
ba395927 1792
29a27719
JR
1793 domain->iommu_refcnt[iommu->seq_id] += 1;
1794 domain->iommu_count += 1;
1795 if (domain->iommu_refcnt[iommu->seq_id] == 1) {
fb170fb4 1796 ndomains = cap_ndoms(iommu->cap);
d160aca5
JR
1797 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1798
1799 if (num >= ndomains) {
1800 pr_err("%s: No free domain ids\n", iommu->name);
1801 domain->iommu_refcnt[iommu->seq_id] -= 1;
1802 domain->iommu_count -= 1;
55d94043 1803 return -ENOSPC;
2c2e2c38 1804 }
ba395927 1805
d160aca5
JR
1806 set_bit(num, iommu->domain_ids);
1807 set_iommu_domain(iommu, num, domain);
1808
1809 domain->iommu_did[iommu->seq_id] = num;
1810 domain->nid = iommu->node;
fb170fb4 1811
fb170fb4
JL
1812 domain_update_iommu_cap(domain);
1813 }
d160aca5 1814
55d94043 1815 return 0;
fb170fb4
JL
1816}
1817
1818static int domain_detach_iommu(struct dmar_domain *domain,
1819 struct intel_iommu *iommu)
1820{
d160aca5 1821 int num, count = INT_MAX;
d160aca5 1822
55d94043 1823 assert_spin_locked(&device_domain_lock);
d160aca5 1824 assert_spin_locked(&iommu->lock);
fb170fb4 1825
29a27719
JR
1826 domain->iommu_refcnt[iommu->seq_id] -= 1;
1827 count = --domain->iommu_count;
1828 if (domain->iommu_refcnt[iommu->seq_id] == 0) {
d160aca5
JR
1829 num = domain->iommu_did[iommu->seq_id];
1830 clear_bit(num, iommu->domain_ids);
1831 set_iommu_domain(iommu, num, NULL);
fb170fb4 1832
fb170fb4 1833 domain_update_iommu_cap(domain);
c0e8a6c8 1834 domain->iommu_did[iommu->seq_id] = 0;
fb170fb4 1835 }
fb170fb4
JL
1836
1837 return count;
1838}
1839
ba395927 1840static struct iova_domain reserved_iova_list;
8a443df4 1841static struct lock_class_key reserved_rbtree_key;
ba395927 1842
51a63e67 1843static int dmar_init_reserved_ranges(void)
ba395927
KA
1844{
1845 struct pci_dev *pdev = NULL;
1846 struct iova *iova;
1847 int i;
ba395927 1848
aa3ac946 1849 init_iova_domain(&reserved_iova_list, VTD_PAGE_SIZE, IOVA_START_PFN);
ba395927 1850
8a443df4
MG
1851 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1852 &reserved_rbtree_key);
1853
ba395927
KA
1854 /* IOAPIC ranges shouldn't be accessed by DMA */
1855 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1856 IOVA_PFN(IOAPIC_RANGE_END));
51a63e67 1857 if (!iova) {
9f10e5bf 1858 pr_err("Reserve IOAPIC range failed\n");
51a63e67
JC
1859 return -ENODEV;
1860 }
ba395927
KA
1861
1862 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1863 for_each_pci_dev(pdev) {
1864 struct resource *r;
1865
1866 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1867 r = &pdev->resource[i];
1868 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1869 continue;
1a4a4551
DW
1870 iova = reserve_iova(&reserved_iova_list,
1871 IOVA_PFN(r->start),
1872 IOVA_PFN(r->end));
51a63e67 1873 if (!iova) {
9f10e5bf 1874 pr_err("Reserve iova failed\n");
51a63e67
JC
1875 return -ENODEV;
1876 }
ba395927
KA
1877 }
1878 }
51a63e67 1879 return 0;
ba395927
KA
1880}
1881
1882static void domain_reserve_special_ranges(struct dmar_domain *domain)
1883{
1884 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1885}
1886
1887static inline int guestwidth_to_adjustwidth(int gaw)
1888{
1889 int agaw;
1890 int r = (gaw - 12) % 9;
1891
1892 if (r == 0)
1893 agaw = gaw;
1894 else
1895 agaw = gaw + 9 - r;
1896 if (agaw > 64)
1897 agaw = 64;
1898 return agaw;
1899}
1900
dc534b25
JR
1901static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu,
1902 int guest_width)
ba395927 1903{
ba395927
KA
1904 int adjust_width, agaw;
1905 unsigned long sagaw;
13cf0174 1906 int err;
ba395927 1907
aa3ac946 1908 init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
13cf0174
JR
1909
1910 err = init_iova_flush_queue(&domain->iovad,
1911 iommu_flush_iova, iova_entry_free);
1912 if (err)
1913 return err;
1914
ba395927
KA
1915 domain_reserve_special_ranges(domain);
1916
1917 /* calculate AGAW */
ba395927
KA
1918 if (guest_width > cap_mgaw(iommu->cap))
1919 guest_width = cap_mgaw(iommu->cap);
1920 domain->gaw = guest_width;
1921 adjust_width = guestwidth_to_adjustwidth(guest_width);
1922 agaw = width_to_agaw(adjust_width);
1923 sagaw = cap_sagaw(iommu->cap);
1924 if (!test_bit(agaw, &sagaw)) {
1925 /* hardware doesn't support it, choose a bigger one */
9f10e5bf 1926 pr_debug("Hardware doesn't support agaw %d\n", agaw);
ba395927
KA
1927 agaw = find_next_bit(&sagaw, 5, agaw);
1928 if (agaw >= 5)
1929 return -ENODEV;
1930 }
1931 domain->agaw = agaw;
ba395927 1932
8e604097
WH
1933 if (ecap_coherent(iommu->ecap))
1934 domain->iommu_coherency = 1;
1935 else
1936 domain->iommu_coherency = 0;
1937
58c610bd
SY
1938 if (ecap_sc_support(iommu->ecap))
1939 domain->iommu_snooping = 1;
1940 else
1941 domain->iommu_snooping = 0;
1942
214e39aa
DW
1943 if (intel_iommu_superpage)
1944 domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1945 else
1946 domain->iommu_superpage = 0;
1947
4c923d47 1948 domain->nid = iommu->node;
c7151a8d 1949
ba395927 1950 /* always allocate the top pgd */
4c923d47 1951 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
ba395927
KA
1952 if (!domain->pgd)
1953 return -ENOMEM;
5b6985ce 1954 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
ba395927
KA
1955 return 0;
1956}
1957
1958static void domain_exit(struct dmar_domain *domain)
1959{
ea8ea460 1960 struct page *freelist = NULL;
ba395927
KA
1961
1962 /* Domain 0 is reserved, so dont process it */
1963 if (!domain)
1964 return;
1965
d160aca5
JR
1966 /* Remove associated devices and clear attached or cached domains */
1967 rcu_read_lock();
ba395927 1968 domain_remove_dev_info(domain);
d160aca5 1969 rcu_read_unlock();
92d03cc8 1970
ba395927
KA
1971 /* destroy iovas */
1972 put_iova_domain(&domain->iovad);
ba395927 1973
ea8ea460 1974 freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
ba395927 1975
ea8ea460
DW
1976 dma_free_pagelist(freelist);
1977
ba395927
KA
1978 free_domain_mem(domain);
1979}
1980
64ae892b
DW
1981static int domain_context_mapping_one(struct dmar_domain *domain,
1982 struct intel_iommu *iommu,
28ccce0d 1983 u8 bus, u8 devfn)
ba395927 1984{
c6c2cebd 1985 u16 did = domain->iommu_did[iommu->seq_id];
28ccce0d
JR
1986 int translation = CONTEXT_TT_MULTI_LEVEL;
1987 struct device_domain_info *info = NULL;
ba395927 1988 struct context_entry *context;
ba395927 1989 unsigned long flags;
ea6606b0 1990 struct dma_pte *pgd;
55d94043 1991 int ret, agaw;
28ccce0d 1992
c6c2cebd
JR
1993 WARN_ON(did == 0);
1994
28ccce0d
JR
1995 if (hw_pass_through && domain_type_is_si(domain))
1996 translation = CONTEXT_TT_PASS_THROUGH;
ba395927
KA
1997
1998 pr_debug("Set context mapping for %02x:%02x.%d\n",
1999 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
4ed0d3e6 2000
ba395927 2001 BUG_ON(!domain->pgd);
5331fe6f 2002
55d94043
JR
2003 spin_lock_irqsave(&device_domain_lock, flags);
2004 spin_lock(&iommu->lock);
2005
2006 ret = -ENOMEM;
03ecc32c 2007 context = iommu_context_addr(iommu, bus, devfn, 1);
ba395927 2008 if (!context)
55d94043 2009 goto out_unlock;
ba395927 2010
55d94043
JR
2011 ret = 0;
2012 if (context_present(context))
2013 goto out_unlock;
cf484d0e 2014
aec0e861
XP
2015 /*
2016 * For kdump cases, old valid entries may be cached due to the
2017 * in-flight DMA and copied pgtable, but there is no unmapping
2018 * behaviour for them, thus we need an explicit cache flush for
2019 * the newly-mapped device. For kdump, at this point, the device
2020 * is supposed to finish reset at its driver probe stage, so no
2021 * in-flight DMA will exist, and we don't need to worry anymore
2022 * hereafter.
2023 */
2024 if (context_copied(context)) {
2025 u16 did_old = context_domain_id(context);
2026
b117e038 2027 if (did_old < cap_ndoms(iommu->cap)) {
aec0e861
XP
2028 iommu->flush.flush_context(iommu, did_old,
2029 (((u16)bus) << 8) | devfn,
2030 DMA_CCMD_MASK_NOBIT,
2031 DMA_CCMD_DEVICE_INVL);
f73a7eee
KA
2032 iommu->flush.flush_iotlb(iommu, did_old, 0, 0,
2033 DMA_TLB_DSI_FLUSH);
2034 }
aec0e861
XP
2035 }
2036
ea6606b0
WH
2037 pgd = domain->pgd;
2038
de24e553 2039 context_clear_entry(context);
c6c2cebd 2040 context_set_domain_id(context, did);
ea6606b0 2041
de24e553
JR
2042 /*
2043 * Skip top levels of page tables for iommu which has less agaw
2044 * than default. Unnecessary for PT mode.
2045 */
93a23a72 2046 if (translation != CONTEXT_TT_PASS_THROUGH) {
de24e553 2047 for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
55d94043 2048 ret = -ENOMEM;
de24e553 2049 pgd = phys_to_virt(dma_pte_addr(pgd));
55d94043
JR
2050 if (!dma_pte_present(pgd))
2051 goto out_unlock;
ea6606b0 2052 }
4ed0d3e6 2053
64ae892b 2054 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
b16d0cb9
DW
2055 if (info && info->ats_supported)
2056 translation = CONTEXT_TT_DEV_IOTLB;
2057 else
2058 translation = CONTEXT_TT_MULTI_LEVEL;
de24e553 2059
93a23a72
YZ
2060 context_set_address_root(context, virt_to_phys(pgd));
2061 context_set_address_width(context, iommu->agaw);
de24e553
JR
2062 } else {
2063 /*
2064 * In pass through mode, AW must be programmed to
2065 * indicate the largest AGAW value supported by
2066 * hardware. And ASR is ignored by hardware.
2067 */
2068 context_set_address_width(context, iommu->msagaw);
93a23a72 2069 }
4ed0d3e6
FY
2070
2071 context_set_translation_type(context, translation);
c07e7d21
MM
2072 context_set_fault_enable(context);
2073 context_set_present(context);
5331fe6f 2074 domain_flush_cache(domain, context, sizeof(*context));
ba395927 2075
4c25a2c1
DW
2076 /*
2077 * It's a non-present to present mapping. If hardware doesn't cache
2078 * non-present entry we only need to flush the write-buffer. If the
2079 * _does_ cache non-present entries, then it does so in the special
2080 * domain #0, which we have to flush:
2081 */
2082 if (cap_caching_mode(iommu->cap)) {
2083 iommu->flush.flush_context(iommu, 0,
2084 (((u16)bus) << 8) | devfn,
2085 DMA_CCMD_MASK_NOBIT,
2086 DMA_CCMD_DEVICE_INVL);
c6c2cebd 2087 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
4c25a2c1 2088 } else {
ba395927 2089 iommu_flush_write_buffer(iommu);
4c25a2c1 2090 }
93a23a72 2091 iommu_enable_dev_iotlb(info);
c7151a8d 2092
55d94043
JR
2093 ret = 0;
2094
2095out_unlock:
2096 spin_unlock(&iommu->lock);
2097 spin_unlock_irqrestore(&device_domain_lock, flags);
fb170fb4 2098
5c365d18 2099 return ret;
ba395927
KA
2100}
2101
579305f7
AW
2102struct domain_context_mapping_data {
2103 struct dmar_domain *domain;
2104 struct intel_iommu *iommu;
579305f7
AW
2105};
2106
2107static int domain_context_mapping_cb(struct pci_dev *pdev,
2108 u16 alias, void *opaque)
2109{
2110 struct domain_context_mapping_data *data = opaque;
2111
2112 return domain_context_mapping_one(data->domain, data->iommu,
28ccce0d 2113 PCI_BUS_NUM(alias), alias & 0xff);
579305f7
AW
2114}
2115
ba395927 2116static int
28ccce0d 2117domain_context_mapping(struct dmar_domain *domain, struct device *dev)
ba395927 2118{
64ae892b 2119 struct intel_iommu *iommu;
156baca8 2120 u8 bus, devfn;
579305f7 2121 struct domain_context_mapping_data data;
64ae892b 2122
e1f167f3 2123 iommu = device_to_iommu(dev, &bus, &devfn);
64ae892b
DW
2124 if (!iommu)
2125 return -ENODEV;
ba395927 2126
579305f7 2127 if (!dev_is_pci(dev))
28ccce0d 2128 return domain_context_mapping_one(domain, iommu, bus, devfn);
579305f7
AW
2129
2130 data.domain = domain;
2131 data.iommu = iommu;
579305f7
AW
2132
2133 return pci_for_each_dma_alias(to_pci_dev(dev),
2134 &domain_context_mapping_cb, &data);
2135}
2136
2137static int domain_context_mapped_cb(struct pci_dev *pdev,
2138 u16 alias, void *opaque)
2139{
2140 struct intel_iommu *iommu = opaque;
2141
2142 return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff);
ba395927
KA
2143}
2144
e1f167f3 2145static int domain_context_mapped(struct device *dev)
ba395927 2146{
5331fe6f 2147 struct intel_iommu *iommu;
156baca8 2148 u8 bus, devfn;
5331fe6f 2149
e1f167f3 2150 iommu = device_to_iommu(dev, &bus, &devfn);
5331fe6f
WH
2151 if (!iommu)
2152 return -ENODEV;
ba395927 2153
579305f7
AW
2154 if (!dev_is_pci(dev))
2155 return device_context_mapped(iommu, bus, devfn);
e1f167f3 2156
579305f7
AW
2157 return !pci_for_each_dma_alias(to_pci_dev(dev),
2158 domain_context_mapped_cb, iommu);
ba395927
KA
2159}
2160
f532959b
FY
2161/* Returns a number of VTD pages, but aligned to MM page size */
2162static inline unsigned long aligned_nrpages(unsigned long host_addr,
2163 size_t size)
2164{
2165 host_addr &= ~PAGE_MASK;
2166 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
2167}
2168
6dd9a7c7
YS
2169/* Return largest possible superpage level for a given mapping */
2170static inline int hardware_largepage_caps(struct dmar_domain *domain,
2171 unsigned long iov_pfn,
2172 unsigned long phy_pfn,
2173 unsigned long pages)
2174{
2175 int support, level = 1;
2176 unsigned long pfnmerge;
2177
2178 support = domain->iommu_superpage;
2179
2180 /* To use a large page, the virtual *and* physical addresses
2181 must be aligned to 2MiB/1GiB/etc. Lower bits set in either
2182 of them will mean we have to use smaller pages. So just
2183 merge them and check both at once. */
2184 pfnmerge = iov_pfn | phy_pfn;
2185
2186 while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
2187 pages >>= VTD_STRIDE_SHIFT;
2188 if (!pages)
2189 break;
2190 pfnmerge >>= VTD_STRIDE_SHIFT;
2191 level++;
2192 support--;
2193 }
2194 return level;
2195}
2196
9051aa02
DW
2197static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2198 struct scatterlist *sg, unsigned long phys_pfn,
2199 unsigned long nr_pages, int prot)
e1605495
DW
2200{
2201 struct dma_pte *first_pte = NULL, *pte = NULL;
9051aa02 2202 phys_addr_t uninitialized_var(pteval);
cc4f14aa 2203 unsigned long sg_res = 0;
6dd9a7c7
YS
2204 unsigned int largepage_lvl = 0;
2205 unsigned long lvl_pages = 0;
e1605495 2206
162d1b10 2207 BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
e1605495
DW
2208
2209 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
2210 return -EINVAL;
2211
2212 prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
2213
cc4f14aa
JL
2214 if (!sg) {
2215 sg_res = nr_pages;
9051aa02
DW
2216 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
2217 }
2218
6dd9a7c7 2219 while (nr_pages > 0) {
c85994e4
DW
2220 uint64_t tmp;
2221
e1605495 2222 if (!sg_res) {
29a90b70
RM
2223 unsigned int pgoff = sg->offset & ~PAGE_MASK;
2224
f532959b 2225 sg_res = aligned_nrpages(sg->offset, sg->length);
29a90b70 2226 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + pgoff;
e1605495 2227 sg->dma_length = sg->length;
29a90b70 2228 pteval = (sg_phys(sg) - pgoff) | prot;
6dd9a7c7 2229 phys_pfn = pteval >> VTD_PAGE_SHIFT;
e1605495 2230 }
6dd9a7c7 2231
e1605495 2232 if (!pte) {
6dd9a7c7
YS
2233 largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
2234
5cf0a76f 2235 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
e1605495
DW
2236 if (!pte)
2237 return -ENOMEM;
6dd9a7c7 2238 /* It is large page*/
6491d4d0 2239 if (largepage_lvl > 1) {
ba2374fd
CZ
2240 unsigned long nr_superpages, end_pfn;
2241
6dd9a7c7 2242 pteval |= DMA_PTE_LARGE_PAGE;
d41a4adb 2243 lvl_pages = lvl_to_nr_pages(largepage_lvl);
ba2374fd
CZ
2244
2245 nr_superpages = sg_res / lvl_pages;
2246 end_pfn = iov_pfn + nr_superpages * lvl_pages - 1;
2247
d41a4adb
JL
2248 /*
2249 * Ensure that old small page tables are
ba2374fd 2250 * removed to make room for superpage(s).
bc24c571
DD
2251 * We're adding new large pages, so make sure
2252 * we don't remove their parent tables.
d41a4adb 2253 */
bc24c571
DD
2254 dma_pte_free_pagetable(domain, iov_pfn, end_pfn,
2255 largepage_lvl + 1);
6491d4d0 2256 } else {
6dd9a7c7 2257 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
6491d4d0 2258 }
6dd9a7c7 2259
e1605495
DW
2260 }
2261 /* We don't need lock here, nobody else
2262 * touches the iova range
2263 */
7766a3fb 2264 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
c85994e4 2265 if (tmp) {
1bf20f0d 2266 static int dumps = 5;
9f10e5bf
JR
2267 pr_crit("ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
2268 iov_pfn, tmp, (unsigned long long)pteval);
1bf20f0d
DW
2269 if (dumps) {
2270 dumps--;
2271 debug_dma_dump_mappings(NULL);
2272 }
2273 WARN_ON(1);
2274 }
6dd9a7c7
YS
2275
2276 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2277
2278 BUG_ON(nr_pages < lvl_pages);
2279 BUG_ON(sg_res < lvl_pages);
2280
2281 nr_pages -= lvl_pages;
2282 iov_pfn += lvl_pages;
2283 phys_pfn += lvl_pages;
2284 pteval += lvl_pages * VTD_PAGE_SIZE;
2285 sg_res -= lvl_pages;
2286
2287 /* If the next PTE would be the first in a new page, then we
2288 need to flush the cache on the entries we've just written.
2289 And then we'll need to recalculate 'pte', so clear it and
2290 let it get set again in the if (!pte) block above.
2291
2292 If we're done (!nr_pages) we need to flush the cache too.
2293
2294 Also if we've been setting superpages, we may need to
2295 recalculate 'pte' and switch back to smaller pages for the
2296 end of the mapping, if the trailing size is not enough to
2297 use another superpage (i.e. sg_res < lvl_pages). */
e1605495 2298 pte++;
6dd9a7c7
YS
2299 if (!nr_pages || first_pte_in_page(pte) ||
2300 (largepage_lvl > 1 && sg_res < lvl_pages)) {
e1605495
DW
2301 domain_flush_cache(domain, first_pte,
2302 (void *)pte - (void *)first_pte);
2303 pte = NULL;
2304 }
6dd9a7c7
YS
2305
2306 if (!sg_res && nr_pages)
e1605495
DW
2307 sg = sg_next(sg);
2308 }
2309 return 0;
2310}
2311
87684fd9
PX
2312static int domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2313 struct scatterlist *sg, unsigned long phys_pfn,
2314 unsigned long nr_pages, int prot)
2315{
2316 int ret;
2317 struct intel_iommu *iommu;
2318
2319 /* Do the real mapping first */
2320 ret = __domain_mapping(domain, iov_pfn, sg, phys_pfn, nr_pages, prot);
2321 if (ret)
2322 return ret;
2323
2324 /* Notify about the new mapping */
2325 if (domain_type_is_vm(domain)) {
2326 /* VM typed domains can have more than one IOMMUs */
2327 int iommu_id;
2328 for_each_domain_iommu(iommu_id, domain) {
2329 iommu = g_iommus[iommu_id];
2330 __mapping_notify_one(iommu, domain, iov_pfn, nr_pages);
2331 }
2332 } else {
2333 /* General domains only have one IOMMU */
2334 iommu = domain_get_iommu(domain);
2335 __mapping_notify_one(iommu, domain, iov_pfn, nr_pages);
2336 }
2337
2338 return 0;
2339}
2340
9051aa02
DW
2341static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2342 struct scatterlist *sg, unsigned long nr_pages,
2343 int prot)
ba395927 2344{
87684fd9 2345 return domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
9051aa02 2346}
6f6a00e4 2347
9051aa02
DW
2348static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2349 unsigned long phys_pfn, unsigned long nr_pages,
2350 int prot)
2351{
87684fd9 2352 return domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
ba395927
KA
2353}
2354
2452d9db 2355static void domain_context_clear_one(struct intel_iommu *iommu, u8 bus, u8 devfn)
ba395927 2356{
5082219b
FS
2357 unsigned long flags;
2358 struct context_entry *context;
2359 u16 did_old;
2360
c7151a8d
WH
2361 if (!iommu)
2362 return;
8c11e798 2363
5082219b
FS
2364 spin_lock_irqsave(&iommu->lock, flags);
2365 context = iommu_context_addr(iommu, bus, devfn, 0);
2366 if (!context) {
2367 spin_unlock_irqrestore(&iommu->lock, flags);
2368 return;
2369 }
2370 did_old = context_domain_id(context);
2371 context_clear_entry(context);
2372 __iommu_flush_cache(iommu, context, sizeof(*context));
2373 spin_unlock_irqrestore(&iommu->lock, flags);
2374 iommu->flush.flush_context(iommu,
2375 did_old,
2376 (((u16)bus) << 8) | devfn,
2377 DMA_CCMD_MASK_NOBIT,
2378 DMA_CCMD_DEVICE_INVL);
2379 iommu->flush.flush_iotlb(iommu,
2380 did_old,
2381 0,
2382 0,
2383 DMA_TLB_DSI_FLUSH);
ba395927
KA
2384}
2385
109b9b04
DW
2386static inline void unlink_domain_info(struct device_domain_info *info)
2387{
2388 assert_spin_locked(&device_domain_lock);
2389 list_del(&info->link);
2390 list_del(&info->global);
2391 if (info->dev)
0bcb3e28 2392 info->dev->archdata.iommu = NULL;
109b9b04
DW
2393}
2394
ba395927
KA
2395static void domain_remove_dev_info(struct dmar_domain *domain)
2396{
3a74ca01 2397 struct device_domain_info *info, *tmp;
fb170fb4 2398 unsigned long flags;
ba395927
KA
2399
2400 spin_lock_irqsave(&device_domain_lock, flags);
76f45fe3 2401 list_for_each_entry_safe(info, tmp, &domain->devices, link)
127c7615 2402 __dmar_remove_one_dev_info(info);
ba395927
KA
2403 spin_unlock_irqrestore(&device_domain_lock, flags);
2404}
2405
2406/*
2407 * find_domain
1525a29a 2408 * Note: we use struct device->archdata.iommu stores the info
ba395927 2409 */
1525a29a 2410static struct dmar_domain *find_domain(struct device *dev)
ba395927
KA
2411{
2412 struct device_domain_info *info;
2413
2414 /* No lock here, assumes no domain exit in normal case */
1525a29a 2415 info = dev->archdata.iommu;
b316d02a 2416 if (likely(info))
ba395927
KA
2417 return info->domain;
2418 return NULL;
2419}
2420
5a8f40e8 2421static inline struct device_domain_info *
745f2586
JL
2422dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
2423{
2424 struct device_domain_info *info;
2425
2426 list_for_each_entry(info, &device_domain_list, global)
41e80dca 2427 if (info->iommu->segment == segment && info->bus == bus &&
745f2586 2428 info->devfn == devfn)
5a8f40e8 2429 return info;
745f2586
JL
2430
2431 return NULL;
2432}
2433
5db31569
JR
2434static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
2435 int bus, int devfn,
2436 struct device *dev,
2437 struct dmar_domain *domain)
745f2586 2438{
5a8f40e8 2439 struct dmar_domain *found = NULL;
745f2586
JL
2440 struct device_domain_info *info;
2441 unsigned long flags;
d160aca5 2442 int ret;
745f2586
JL
2443
2444 info = alloc_devinfo_mem();
2445 if (!info)
b718cd3d 2446 return NULL;
745f2586 2447
745f2586
JL
2448 info->bus = bus;
2449 info->devfn = devfn;
b16d0cb9
DW
2450 info->ats_supported = info->pasid_supported = info->pri_supported = 0;
2451 info->ats_enabled = info->pasid_enabled = info->pri_enabled = 0;
2452 info->ats_qdep = 0;
745f2586
JL
2453 info->dev = dev;
2454 info->domain = domain;
5a8f40e8 2455 info->iommu = iommu;
cc580e41 2456 info->pasid_table = NULL;
745f2586 2457
b16d0cb9
DW
2458 if (dev && dev_is_pci(dev)) {
2459 struct pci_dev *pdev = to_pci_dev(info->dev);
2460
cef74409
GK
2461 if (!pci_ats_disabled() &&
2462 ecap_dev_iotlb_support(iommu->ecap) &&
b16d0cb9
DW
2463 pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS) &&
2464 dmar_find_matched_atsr_unit(pdev))
2465 info->ats_supported = 1;
2466
2467 if (ecs_enabled(iommu)) {
2468 if (pasid_enabled(iommu)) {
2469 int features = pci_pasid_features(pdev);
2470 if (features >= 0)
2471 info->pasid_supported = features | 1;
2472 }
2473
2474 if (info->ats_supported && ecap_prs(iommu->ecap) &&
2475 pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI))
2476 info->pri_supported = 1;
2477 }
2478 }
2479
745f2586
JL
2480 spin_lock_irqsave(&device_domain_lock, flags);
2481 if (dev)
0bcb3e28 2482 found = find_domain(dev);
f303e507
JR
2483
2484 if (!found) {
5a8f40e8 2485 struct device_domain_info *info2;
41e80dca 2486 info2 = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn);
f303e507
JR
2487 if (info2) {
2488 found = info2->domain;
2489 info2->dev = dev;
2490 }
5a8f40e8 2491 }
f303e507 2492
745f2586
JL
2493 if (found) {
2494 spin_unlock_irqrestore(&device_domain_lock, flags);
2495 free_devinfo_mem(info);
b718cd3d
DW
2496 /* Caller must free the original domain */
2497 return found;
745f2586
JL
2498 }
2499
d160aca5
JR
2500 spin_lock(&iommu->lock);
2501 ret = domain_attach_iommu(domain, iommu);
2502 spin_unlock(&iommu->lock);
2503
2504 if (ret) {
c6c2cebd 2505 spin_unlock_irqrestore(&device_domain_lock, flags);
499f3aa4 2506 free_devinfo_mem(info);
c6c2cebd
JR
2507 return NULL;
2508 }
c6c2cebd 2509
b718cd3d
DW
2510 list_add(&info->link, &domain->devices);
2511 list_add(&info->global, &device_domain_list);
2512 if (dev)
2513 dev->archdata.iommu = info;
a7fc93fe
LB
2514
2515 if (dev && dev_is_pci(dev) && info->pasid_supported) {
2516 ret = intel_pasid_alloc_table(dev);
2517 if (ret) {
be9e6598
LB
2518 pr_warn("No pasid table for %s, pasid disabled\n",
2519 dev_name(dev));
2520 info->pasid_supported = 0;
a7fc93fe
LB
2521 }
2522 }
b718cd3d
DW
2523 spin_unlock_irqrestore(&device_domain_lock, flags);
2524
cc4e2575
JR
2525 if (dev && domain_context_mapping(domain, dev)) {
2526 pr_err("Domain context map for %s failed\n", dev_name(dev));
e6de0f8d 2527 dmar_remove_one_dev_info(domain, dev);
cc4e2575
JR
2528 return NULL;
2529 }
2530
b718cd3d 2531 return domain;
745f2586
JL
2532}
2533
579305f7
AW
2534static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque)
2535{
2536 *(u16 *)opaque = alias;
2537 return 0;
2538}
2539
76208356 2540static struct dmar_domain *find_or_alloc_domain(struct device *dev, int gaw)
ba395927 2541{
cc4e2575 2542 struct device_domain_info *info = NULL;
76208356 2543 struct dmar_domain *domain = NULL;
579305f7 2544 struct intel_iommu *iommu;
fcc35c63 2545 u16 dma_alias;
ba395927 2546 unsigned long flags;
aa4d066a 2547 u8 bus, devfn;
ba395927 2548
579305f7
AW
2549 iommu = device_to_iommu(dev, &bus, &devfn);
2550 if (!iommu)
2551 return NULL;
2552
146922ec
DW
2553 if (dev_is_pci(dev)) {
2554 struct pci_dev *pdev = to_pci_dev(dev);
276dbf99 2555
579305f7
AW
2556 pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2557
2558 spin_lock_irqsave(&device_domain_lock, flags);
2559 info = dmar_search_domain_by_dev_info(pci_domain_nr(pdev->bus),
2560 PCI_BUS_NUM(dma_alias),
2561 dma_alias & 0xff);
2562 if (info) {
2563 iommu = info->iommu;
2564 domain = info->domain;
5a8f40e8 2565 }
579305f7 2566 spin_unlock_irqrestore(&device_domain_lock, flags);
ba395927 2567
76208356 2568 /* DMA alias already has a domain, use it */
579305f7 2569 if (info)
76208356 2570 goto out;
579305f7 2571 }
ba395927 2572
146922ec 2573 /* Allocate and initialize new domain for the device */
ab8dfe25 2574 domain = alloc_domain(0);
745f2586 2575 if (!domain)
579305f7 2576 return NULL;
dc534b25 2577 if (domain_init(domain, iommu, gaw)) {
579305f7
AW
2578 domain_exit(domain);
2579 return NULL;
2c2e2c38 2580 }
ba395927 2581
76208356 2582out:
579305f7 2583
76208356
JR
2584 return domain;
2585}
579305f7 2586
76208356
JR
2587static struct dmar_domain *set_domain_for_dev(struct device *dev,
2588 struct dmar_domain *domain)
2589{
2590 struct intel_iommu *iommu;
2591 struct dmar_domain *tmp;
2592 u16 req_id, dma_alias;
2593 u8 bus, devfn;
2594
2595 iommu = device_to_iommu(dev, &bus, &devfn);
2596 if (!iommu)
2597 return NULL;
2598
2599 req_id = ((u16)bus << 8) | devfn;
2600
2601 if (dev_is_pci(dev)) {
2602 struct pci_dev *pdev = to_pci_dev(dev);
2603
2604 pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2605
2606 /* register PCI DMA alias device */
2607 if (req_id != dma_alias) {
2608 tmp = dmar_insert_one_dev_info(iommu, PCI_BUS_NUM(dma_alias),
2609 dma_alias & 0xff, NULL, domain);
2610
2611 if (!tmp || tmp != domain)
2612 return tmp;
2613 }
ba395927
KA
2614 }
2615
5db31569 2616 tmp = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
76208356
JR
2617 if (!tmp || tmp != domain)
2618 return tmp;
2619
2620 return domain;
2621}
579305f7 2622
76208356
JR
2623static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
2624{
2625 struct dmar_domain *domain, *tmp;
2626
2627 domain = find_domain(dev);
2628 if (domain)
2629 goto out;
2630
2631 domain = find_or_alloc_domain(dev, gaw);
2632 if (!domain)
2633 goto out;
2634
2635 tmp = set_domain_for_dev(dev, domain);
2636 if (!tmp || domain != tmp) {
579305f7
AW
2637 domain_exit(domain);
2638 domain = tmp;
2639 }
b718cd3d 2640
76208356
JR
2641out:
2642
b718cd3d 2643 return domain;
ba395927
KA
2644}
2645
b213203e
DW
2646static int iommu_domain_identity_map(struct dmar_domain *domain,
2647 unsigned long long start,
2648 unsigned long long end)
ba395927 2649{
c5395d5c
DW
2650 unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2651 unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2652
2653 if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2654 dma_to_mm_pfn(last_vpfn))) {
9f10e5bf 2655 pr_err("Reserving iova failed\n");
b213203e 2656 return -ENOMEM;
ba395927
KA
2657 }
2658
af1089ce 2659 pr_debug("Mapping reserved region %llx-%llx\n", start, end);
ba395927
KA
2660 /*
2661 * RMRR range might have overlap with physical memory range,
2662 * clear it first
2663 */
c5395d5c 2664 dma_pte_clear_range(domain, first_vpfn, last_vpfn);
ba395927 2665
87684fd9
PX
2666 return __domain_mapping(domain, first_vpfn, NULL,
2667 first_vpfn, last_vpfn - first_vpfn + 1,
2668 DMA_PTE_READ|DMA_PTE_WRITE);
b213203e
DW
2669}
2670
d66ce54b
JR
2671static int domain_prepare_identity_map(struct device *dev,
2672 struct dmar_domain *domain,
2673 unsigned long long start,
2674 unsigned long long end)
b213203e 2675{
19943b0e
DW
2676 /* For _hardware_ passthrough, don't bother. But for software
2677 passthrough, we do it anyway -- it may indicate a memory
2678 range which is reserved in E820, so which didn't get set
2679 up to start with in si_domain */
2680 if (domain == si_domain && hw_pass_through) {
9f10e5bf
JR
2681 pr_warn("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
2682 dev_name(dev), start, end);
19943b0e
DW
2683 return 0;
2684 }
2685
9f10e5bf
JR
2686 pr_info("Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
2687 dev_name(dev), start, end);
2688
5595b528
DW
2689 if (end < start) {
2690 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2691 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2692 dmi_get_system_info(DMI_BIOS_VENDOR),
2693 dmi_get_system_info(DMI_BIOS_VERSION),
2694 dmi_get_system_info(DMI_PRODUCT_VERSION));
d66ce54b 2695 return -EIO;
5595b528
DW
2696 }
2697
2ff729f5
DW
2698 if (end >> agaw_to_width(domain->agaw)) {
2699 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2700 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2701 agaw_to_width(domain->agaw),
2702 dmi_get_system_info(DMI_BIOS_VENDOR),
2703 dmi_get_system_info(DMI_BIOS_VERSION),
2704 dmi_get_system_info(DMI_PRODUCT_VERSION));
d66ce54b 2705 return -EIO;
2ff729f5 2706 }
19943b0e 2707
d66ce54b
JR
2708 return iommu_domain_identity_map(domain, start, end);
2709}
ba395927 2710
d66ce54b
JR
2711static int iommu_prepare_identity_map(struct device *dev,
2712 unsigned long long start,
2713 unsigned long long end)
2714{
2715 struct dmar_domain *domain;
2716 int ret;
2717
2718 domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
2719 if (!domain)
2720 return -ENOMEM;
2721
2722 ret = domain_prepare_identity_map(dev, domain, start, end);
2723 if (ret)
2724 domain_exit(domain);
b213203e 2725
ba395927 2726 return ret;
ba395927
KA
2727}
2728
2729static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
0b9d9753 2730 struct device *dev)
ba395927 2731{
0b9d9753 2732 if (dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
ba395927 2733 return 0;
0b9d9753
DW
2734 return iommu_prepare_identity_map(dev, rmrr->base_address,
2735 rmrr->end_address);
ba395927
KA
2736}
2737
d3f13810 2738#ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
49a0429e
KA
2739static inline void iommu_prepare_isa(void)
2740{
2741 struct pci_dev *pdev;
2742 int ret;
2743
2744 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2745 if (!pdev)
2746 return;
2747
9f10e5bf 2748 pr_info("Prepare 0-16MiB unity mapping for LPC\n");
0b9d9753 2749 ret = iommu_prepare_identity_map(&pdev->dev, 0, 16*1024*1024 - 1);
49a0429e
KA
2750
2751 if (ret)
9f10e5bf 2752 pr_err("Failed to create 0-16MiB identity map - floppy might not work\n");
49a0429e 2753
9b27e82d 2754 pci_dev_put(pdev);
49a0429e
KA
2755}
2756#else
2757static inline void iommu_prepare_isa(void)
2758{
2759 return;
2760}
d3f13810 2761#endif /* !CONFIG_INTEL_IOMMU_FLPY_WA */
49a0429e 2762
2c2e2c38 2763static int md_domain_init(struct dmar_domain *domain, int guest_width);
c7ab48d2 2764
071e1374 2765static int __init si_domain_init(int hw)
2c2e2c38 2766{
c7ab48d2 2767 int nid, ret = 0;
2c2e2c38 2768
ab8dfe25 2769 si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY);
2c2e2c38
FY
2770 if (!si_domain)
2771 return -EFAULT;
2772
2c2e2c38
FY
2773 if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2774 domain_exit(si_domain);
2775 return -EFAULT;
2776 }
2777
0dc79715 2778 pr_debug("Identity mapping domain allocated\n");
2c2e2c38 2779
19943b0e
DW
2780 if (hw)
2781 return 0;
2782
c7ab48d2 2783 for_each_online_node(nid) {
5dfe8660
TH
2784 unsigned long start_pfn, end_pfn;
2785 int i;
2786
2787 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2788 ret = iommu_domain_identity_map(si_domain,
2789 PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2790 if (ret)
2791 return ret;
2792 }
c7ab48d2
DW
2793 }
2794
2c2e2c38
FY
2795 return 0;
2796}
2797
9b226624 2798static int identity_mapping(struct device *dev)
2c2e2c38
FY
2799{
2800 struct device_domain_info *info;
2801
2802 if (likely(!iommu_identity_mapping))
2803 return 0;
2804
9b226624 2805 info = dev->archdata.iommu;
cb452a40
MT
2806 if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2807 return (info->domain == si_domain);
2c2e2c38 2808
2c2e2c38
FY
2809 return 0;
2810}
2811
28ccce0d 2812static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev)
2c2e2c38 2813{
0ac72664 2814 struct dmar_domain *ndomain;
5a8f40e8 2815 struct intel_iommu *iommu;
156baca8 2816 u8 bus, devfn;
2c2e2c38 2817
5913c9bf 2818 iommu = device_to_iommu(dev, &bus, &devfn);
5a8f40e8
DW
2819 if (!iommu)
2820 return -ENODEV;
2821
5db31569 2822 ndomain = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
0ac72664
DW
2823 if (ndomain != domain)
2824 return -EBUSY;
2c2e2c38
FY
2825
2826 return 0;
2827}
2828
0b9d9753 2829static bool device_has_rmrr(struct device *dev)
ea2447f7
TM
2830{
2831 struct dmar_rmrr_unit *rmrr;
832bd858 2832 struct device *tmp;
ea2447f7
TM
2833 int i;
2834
0e242612 2835 rcu_read_lock();
ea2447f7 2836 for_each_rmrr_units(rmrr) {
b683b230
JL
2837 /*
2838 * Return TRUE if this RMRR contains the device that
2839 * is passed in.
2840 */
2841 for_each_active_dev_scope(rmrr->devices,
2842 rmrr->devices_cnt, i, tmp)
0b9d9753 2843 if (tmp == dev) {
0e242612 2844 rcu_read_unlock();
ea2447f7 2845 return true;
b683b230 2846 }
ea2447f7 2847 }
0e242612 2848 rcu_read_unlock();
ea2447f7
TM
2849 return false;
2850}
2851
c875d2c1
AW
2852/*
2853 * There are a couple cases where we need to restrict the functionality of
2854 * devices associated with RMRRs. The first is when evaluating a device for
2855 * identity mapping because problems exist when devices are moved in and out
2856 * of domains and their respective RMRR information is lost. This means that
2857 * a device with associated RMRRs will never be in a "passthrough" domain.
2858 * The second is use of the device through the IOMMU API. This interface
2859 * expects to have full control of the IOVA space for the device. We cannot
2860 * satisfy both the requirement that RMRR access is maintained and have an
2861 * unencumbered IOVA space. We also have no ability to quiesce the device's
2862 * use of the RMRR space or even inform the IOMMU API user of the restriction.
2863 * We therefore prevent devices associated with an RMRR from participating in
2864 * the IOMMU API, which eliminates them from device assignment.
2865 *
2866 * In both cases we assume that PCI USB devices with RMRRs have them largely
2867 * for historical reasons and that the RMRR space is not actively used post
2868 * boot. This exclusion may change if vendors begin to abuse it.
18436afd
DW
2869 *
2870 * The same exception is made for graphics devices, with the requirement that
2871 * any use of the RMRR regions will be torn down before assigning the device
2872 * to a guest.
c875d2c1
AW
2873 */
2874static bool device_is_rmrr_locked(struct device *dev)
2875{
2876 if (!device_has_rmrr(dev))
2877 return false;
2878
2879 if (dev_is_pci(dev)) {
2880 struct pci_dev *pdev = to_pci_dev(dev);
2881
18436afd 2882 if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev))
c875d2c1
AW
2883 return false;
2884 }
2885
2886 return true;
2887}
2888
3bdb2591 2889static int iommu_should_identity_map(struct device *dev, int startup)
6941af28 2890{
ea2447f7 2891
3bdb2591
DW
2892 if (dev_is_pci(dev)) {
2893 struct pci_dev *pdev = to_pci_dev(dev);
ea2447f7 2894
c875d2c1 2895 if (device_is_rmrr_locked(dev))
3bdb2591 2896 return 0;
e0fc7e0b 2897
3bdb2591
DW
2898 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2899 return 1;
e0fc7e0b 2900
3bdb2591
DW
2901 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2902 return 1;
6941af28 2903
3bdb2591 2904 if (!(iommu_identity_mapping & IDENTMAP_ALL))
3dfc813d 2905 return 0;
3bdb2591
DW
2906
2907 /*
2908 * We want to start off with all devices in the 1:1 domain, and
2909 * take them out later if we find they can't access all of memory.
2910 *
2911 * However, we can't do this for PCI devices behind bridges,
2912 * because all PCI devices behind the same bridge will end up
2913 * with the same source-id on their transactions.
2914 *
2915 * Practically speaking, we can't change things around for these
2916 * devices at run-time, because we can't be sure there'll be no
2917 * DMA transactions in flight for any of their siblings.
2918 *
2919 * So PCI devices (unless they're on the root bus) as well as
2920 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2921 * the 1:1 domain, just in _case_ one of their siblings turns out
2922 * not to be able to map all of memory.
2923 */
2924 if (!pci_is_pcie(pdev)) {
2925 if (!pci_is_root_bus(pdev->bus))
2926 return 0;
2927 if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2928 return 0;
2929 } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
3dfc813d 2930 return 0;
3bdb2591
DW
2931 } else {
2932 if (device_has_rmrr(dev))
2933 return 0;
2934 }
3dfc813d 2935
3bdb2591 2936 /*
3dfc813d 2937 * At boot time, we don't yet know if devices will be 64-bit capable.
3bdb2591 2938 * Assume that they will — if they turn out not to be, then we can
3dfc813d
DW
2939 * take them out of the 1:1 domain later.
2940 */
8fcc5372
CW
2941 if (!startup) {
2942 /*
2943 * If the device's dma_mask is less than the system's memory
2944 * size then this is not a candidate for identity mapping.
2945 */
3bdb2591 2946 u64 dma_mask = *dev->dma_mask;
8fcc5372 2947
3bdb2591
DW
2948 if (dev->coherent_dma_mask &&
2949 dev->coherent_dma_mask < dma_mask)
2950 dma_mask = dev->coherent_dma_mask;
8fcc5372 2951
3bdb2591 2952 return dma_mask >= dma_get_required_mask(dev);
8fcc5372 2953 }
6941af28
DW
2954
2955 return 1;
2956}
2957
cf04eee8
DW
2958static int __init dev_prepare_static_identity_mapping(struct device *dev, int hw)
2959{
2960 int ret;
2961
2962 if (!iommu_should_identity_map(dev, 1))
2963 return 0;
2964
28ccce0d 2965 ret = domain_add_dev_info(si_domain, dev);
cf04eee8 2966 if (!ret)
9f10e5bf
JR
2967 pr_info("%s identity mapping for device %s\n",
2968 hw ? "Hardware" : "Software", dev_name(dev));
cf04eee8
DW
2969 else if (ret == -ENODEV)
2970 /* device not associated with an iommu */
2971 ret = 0;
2972
2973 return ret;
2974}
2975
2976
071e1374 2977static int __init iommu_prepare_static_identity_mapping(int hw)
2c2e2c38 2978{
2c2e2c38 2979 struct pci_dev *pdev = NULL;
cf04eee8
DW
2980 struct dmar_drhd_unit *drhd;
2981 struct intel_iommu *iommu;
2982 struct device *dev;
2983 int i;
2984 int ret = 0;
2c2e2c38 2985
2c2e2c38 2986 for_each_pci_dev(pdev) {
cf04eee8
DW
2987 ret = dev_prepare_static_identity_mapping(&pdev->dev, hw);
2988 if (ret)
2989 return ret;
2990 }
2991
2992 for_each_active_iommu(iommu, drhd)
2993 for_each_active_dev_scope(drhd->devices, drhd->devices_cnt, i, dev) {
2994 struct acpi_device_physical_node *pn;
2995 struct acpi_device *adev;
2996
2997 if (dev->bus != &acpi_bus_type)
2998 continue;
86080ccc 2999
cf04eee8
DW
3000 adev= to_acpi_device(dev);
3001 mutex_lock(&adev->physical_node_lock);
3002 list_for_each_entry(pn, &adev->physical_node_list, node) {
3003 ret = dev_prepare_static_identity_mapping(pn->dev, hw);
3004 if (ret)
3005 break;
eae460b6 3006 }
cf04eee8
DW
3007 mutex_unlock(&adev->physical_node_lock);
3008 if (ret)
3009 return ret;
62edf5dc 3010 }
2c2e2c38
FY
3011
3012 return 0;
3013}
3014
ffebeb46
JL
3015static void intel_iommu_init_qi(struct intel_iommu *iommu)
3016{
3017 /*
3018 * Start from the sane iommu hardware state.
3019 * If the queued invalidation is already initialized by us
3020 * (for example, while enabling interrupt-remapping) then
3021 * we got the things already rolling from a sane state.
3022 */
3023 if (!iommu->qi) {
3024 /*
3025 * Clear any previous faults.
3026 */
3027 dmar_fault(-1, iommu);
3028 /*
3029 * Disable queued invalidation if supported and already enabled
3030 * before OS handover.
3031 */
3032 dmar_disable_qi(iommu);
3033 }
3034
3035 if (dmar_enable_qi(iommu)) {
3036 /*
3037 * Queued Invalidate not enabled, use Register Based Invalidate
3038 */
3039 iommu->flush.flush_context = __iommu_flush_context;
3040 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
9f10e5bf 3041 pr_info("%s: Using Register based invalidation\n",
ffebeb46
JL
3042 iommu->name);
3043 } else {
3044 iommu->flush.flush_context = qi_flush_context;
3045 iommu->flush.flush_iotlb = qi_flush_iotlb;
9f10e5bf 3046 pr_info("%s: Using Queued invalidation\n", iommu->name);
ffebeb46
JL
3047 }
3048}
3049
091d42e4 3050static int copy_context_table(struct intel_iommu *iommu,
dfddb969 3051 struct root_entry *old_re,
091d42e4
JR
3052 struct context_entry **tbl,
3053 int bus, bool ext)
3054{
dbcd861f 3055 int tbl_idx, pos = 0, idx, devfn, ret = 0, did;
543c8dcf 3056 struct context_entry *new_ce = NULL, ce;
dfddb969 3057 struct context_entry *old_ce = NULL;
543c8dcf 3058 struct root_entry re;
091d42e4
JR
3059 phys_addr_t old_ce_phys;
3060
3061 tbl_idx = ext ? bus * 2 : bus;
dfddb969 3062 memcpy(&re, old_re, sizeof(re));
091d42e4
JR
3063
3064 for (devfn = 0; devfn < 256; devfn++) {
3065 /* First calculate the correct index */
3066 idx = (ext ? devfn * 2 : devfn) % 256;
3067
3068 if (idx == 0) {
3069 /* First save what we may have and clean up */
3070 if (new_ce) {
3071 tbl[tbl_idx] = new_ce;
3072 __iommu_flush_cache(iommu, new_ce,
3073 VTD_PAGE_SIZE);
3074 pos = 1;
3075 }
3076
3077 if (old_ce)
829383e1 3078 memunmap(old_ce);
091d42e4
JR
3079
3080 ret = 0;
3081 if (devfn < 0x80)
543c8dcf 3082 old_ce_phys = root_entry_lctp(&re);
091d42e4 3083 else
543c8dcf 3084 old_ce_phys = root_entry_uctp(&re);
091d42e4
JR
3085
3086 if (!old_ce_phys) {
3087 if (ext && devfn == 0) {
3088 /* No LCTP, try UCTP */
3089 devfn = 0x7f;
3090 continue;
3091 } else {
3092 goto out;
3093 }
3094 }
3095
3096 ret = -ENOMEM;
dfddb969
DW
3097 old_ce = memremap(old_ce_phys, PAGE_SIZE,
3098 MEMREMAP_WB);
091d42e4
JR
3099 if (!old_ce)
3100 goto out;
3101
3102 new_ce = alloc_pgtable_page(iommu->node);
3103 if (!new_ce)
3104 goto out_unmap;
3105
3106 ret = 0;
3107 }
3108
3109 /* Now copy the context entry */
dfddb969 3110 memcpy(&ce, old_ce + idx, sizeof(ce));
091d42e4 3111
cf484d0e 3112 if (!__context_present(&ce))
091d42e4
JR
3113 continue;
3114
dbcd861f
JR
3115 did = context_domain_id(&ce);
3116 if (did >= 0 && did < cap_ndoms(iommu->cap))
3117 set_bit(did, iommu->domain_ids);
3118
cf484d0e
JR
3119 /*
3120 * We need a marker for copied context entries. This
3121 * marker needs to work for the old format as well as
3122 * for extended context entries.
3123 *
3124 * Bit 67 of the context entry is used. In the old
3125 * format this bit is available to software, in the
3126 * extended format it is the PGE bit, but PGE is ignored
3127 * by HW if PASIDs are disabled (and thus still
3128 * available).
3129 *
3130 * So disable PASIDs first and then mark the entry
3131 * copied. This means that we don't copy PASID
3132 * translations from the old kernel, but this is fine as
3133 * faults there are not fatal.
3134 */
3135 context_clear_pasid_enable(&ce);
3136 context_set_copied(&ce);
3137
091d42e4
JR
3138 new_ce[idx] = ce;
3139 }
3140
3141 tbl[tbl_idx + pos] = new_ce;
3142
3143 __iommu_flush_cache(iommu, new_ce, VTD_PAGE_SIZE);
3144
3145out_unmap:
dfddb969 3146 memunmap(old_ce);
091d42e4
JR
3147
3148out:
3149 return ret;
3150}
3151
3152static int copy_translation_tables(struct intel_iommu *iommu)
3153{
3154 struct context_entry **ctxt_tbls;
dfddb969 3155 struct root_entry *old_rt;
091d42e4
JR
3156 phys_addr_t old_rt_phys;
3157 int ctxt_table_entries;
3158 unsigned long flags;
3159 u64 rtaddr_reg;
3160 int bus, ret;
c3361f2f 3161 bool new_ext, ext;
091d42e4
JR
3162
3163 rtaddr_reg = dmar_readq(iommu->reg + DMAR_RTADDR_REG);
3164 ext = !!(rtaddr_reg & DMA_RTADDR_RTT);
c3361f2f
JR
3165 new_ext = !!ecap_ecs(iommu->ecap);
3166
3167 /*
3168 * The RTT bit can only be changed when translation is disabled,
3169 * but disabling translation means to open a window for data
3170 * corruption. So bail out and don't copy anything if we would
3171 * have to change the bit.
3172 */
3173 if (new_ext != ext)
3174 return -EINVAL;
091d42e4
JR
3175
3176 old_rt_phys = rtaddr_reg & VTD_PAGE_MASK;
3177 if (!old_rt_phys)
3178 return -EINVAL;
3179
dfddb969 3180 old_rt = memremap(old_rt_phys, PAGE_SIZE, MEMREMAP_WB);
091d42e4
JR
3181 if (!old_rt)
3182 return -ENOMEM;
3183
3184 /* This is too big for the stack - allocate it from slab */
3185 ctxt_table_entries = ext ? 512 : 256;
3186 ret = -ENOMEM;
6396bb22 3187 ctxt_tbls = kcalloc(ctxt_table_entries, sizeof(void *), GFP_KERNEL);
091d42e4
JR
3188 if (!ctxt_tbls)
3189 goto out_unmap;
3190
3191 for (bus = 0; bus < 256; bus++) {
3192 ret = copy_context_table(iommu, &old_rt[bus],
3193 ctxt_tbls, bus, ext);
3194 if (ret) {
3195 pr_err("%s: Failed to copy context table for bus %d\n",
3196 iommu->name, bus);
3197 continue;
3198 }
3199 }
3200
3201 spin_lock_irqsave(&iommu->lock, flags);
3202
3203 /* Context tables are copied, now write them to the root_entry table */
3204 for (bus = 0; bus < 256; bus++) {
3205 int idx = ext ? bus * 2 : bus;
3206 u64 val;
3207
3208 if (ctxt_tbls[idx]) {
3209 val = virt_to_phys(ctxt_tbls[idx]) | 1;
3210 iommu->root_entry[bus].lo = val;
3211 }
3212
3213 if (!ext || !ctxt_tbls[idx + 1])
3214 continue;
3215
3216 val = virt_to_phys(ctxt_tbls[idx + 1]) | 1;
3217 iommu->root_entry[bus].hi = val;
3218 }
3219
3220 spin_unlock_irqrestore(&iommu->lock, flags);
3221
3222 kfree(ctxt_tbls);
3223
3224 __iommu_flush_cache(iommu, iommu->root_entry, PAGE_SIZE);
3225
3226 ret = 0;
3227
3228out_unmap:
dfddb969 3229 memunmap(old_rt);
091d42e4
JR
3230
3231 return ret;
3232}
3233
b779260b 3234static int __init init_dmars(void)
ba395927
KA
3235{
3236 struct dmar_drhd_unit *drhd;
3237 struct dmar_rmrr_unit *rmrr;
a87f4918 3238 bool copied_tables = false;
832bd858 3239 struct device *dev;
ba395927 3240 struct intel_iommu *iommu;
13cf0174 3241 int i, ret;
2c2e2c38 3242
ba395927
KA
3243 /*
3244 * for each drhd
3245 * allocate root
3246 * initialize and program root entry to not present
3247 * endfor
3248 */
3249 for_each_drhd_unit(drhd) {
5e0d2a6f 3250 /*
3251 * lock not needed as this is only incremented in the single
3252 * threaded kernel __init code path all other access are read
3253 * only
3254 */
78d8e704 3255 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED) {
1b198bb0
MT
3256 g_num_of_iommus++;
3257 continue;
3258 }
9f10e5bf 3259 pr_err_once("Exceeded %d IOMMUs\n", DMAR_UNITS_SUPPORTED);
5e0d2a6f 3260 }
3261
ffebeb46
JL
3262 /* Preallocate enough resources for IOMMU hot-addition */
3263 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED)
3264 g_num_of_iommus = DMAR_UNITS_SUPPORTED;
3265
d9630fe9
WH
3266 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
3267 GFP_KERNEL);
3268 if (!g_iommus) {
9f10e5bf 3269 pr_err("Allocating global iommu array failed\n");
d9630fe9
WH
3270 ret = -ENOMEM;
3271 goto error;
3272 }
3273
7c919779 3274 for_each_active_iommu(iommu, drhd) {
56283174
LB
3275 /*
3276 * Find the max pasid size of all IOMMU's in the system.
3277 * We need to ensure the system pasid table is no bigger
3278 * than the smallest supported.
3279 */
3280 if (pasid_enabled(iommu)) {
3281 u32 temp = 2 << ecap_pss(iommu->ecap);
3282
3283 intel_pasid_max_id = min_t(u32, temp,
3284 intel_pasid_max_id);
3285 }
3286
d9630fe9 3287 g_iommus[iommu->seq_id] = iommu;
ba395927 3288
b63d80d1
JR
3289 intel_iommu_init_qi(iommu);
3290
e61d98d8
SS
3291 ret = iommu_init_domains(iommu);
3292 if (ret)
989d51fc 3293 goto free_iommu;
e61d98d8 3294
4158c2ec
JR
3295 init_translation_status(iommu);
3296
091d42e4
JR
3297 if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
3298 iommu_disable_translation(iommu);
3299 clear_translation_pre_enabled(iommu);
3300 pr_warn("Translation was enabled for %s but we are not in kdump mode\n",
3301 iommu->name);
3302 }
4158c2ec 3303
ba395927
KA
3304 /*
3305 * TBD:
3306 * we could share the same root & context tables
25985edc 3307 * among all IOMMU's. Need to Split it later.
ba395927
KA
3308 */
3309 ret = iommu_alloc_root_entry(iommu);
ffebeb46 3310 if (ret)
989d51fc 3311 goto free_iommu;
5f0a7f76 3312
091d42e4
JR
3313 if (translation_pre_enabled(iommu)) {
3314 pr_info("Translation already enabled - trying to copy translation structures\n");
3315
3316 ret = copy_translation_tables(iommu);
3317 if (ret) {
3318 /*
3319 * We found the IOMMU with translation
3320 * enabled - but failed to copy over the
3321 * old root-entry table. Try to proceed
3322 * by disabling translation now and
3323 * allocating a clean root-entry table.
3324 * This might cause DMAR faults, but
3325 * probably the dump will still succeed.
3326 */
3327 pr_err("Failed to copy translation tables from previous kernel for %s\n",
3328 iommu->name);
3329 iommu_disable_translation(iommu);
3330 clear_translation_pre_enabled(iommu);
3331 } else {
3332 pr_info("Copied translation tables from previous kernel for %s\n",
3333 iommu->name);
a87f4918 3334 copied_tables = true;
091d42e4
JR
3335 }
3336 }
3337
4ed0d3e6 3338 if (!ecap_pass_through(iommu->ecap))
19943b0e 3339 hw_pass_through = 0;
8a94ade4
DW
3340#ifdef CONFIG_INTEL_IOMMU_SVM
3341 if (pasid_enabled(iommu))
d9737953 3342 intel_svm_init(iommu);
8a94ade4 3343#endif
ba395927
KA
3344 }
3345
a4c34ff1
JR
3346 /*
3347 * Now that qi is enabled on all iommus, set the root entry and flush
3348 * caches. This is required on some Intel X58 chipsets, otherwise the
3349 * flush_context function will loop forever and the boot hangs.
3350 */
3351 for_each_active_iommu(iommu, drhd) {
3352 iommu_flush_write_buffer(iommu);
3353 iommu_set_root_entry(iommu);
3354 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
3355 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
3356 }
3357
19943b0e 3358 if (iommu_pass_through)
e0fc7e0b
DW
3359 iommu_identity_mapping |= IDENTMAP_ALL;
3360
d3f13810 3361#ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
e0fc7e0b 3362 iommu_identity_mapping |= IDENTMAP_GFX;
19943b0e 3363#endif
e0fc7e0b 3364
21e722c4
AR
3365 check_tylersburg_isoch();
3366
86080ccc
JR
3367 if (iommu_identity_mapping) {
3368 ret = si_domain_init(hw_pass_through);
3369 if (ret)
3370 goto free_iommu;
3371 }
3372
e0fc7e0b 3373
a87f4918
JR
3374 /*
3375 * If we copied translations from a previous kernel in the kdump
3376 * case, we can not assign the devices to domains now, as that
3377 * would eliminate the old mappings. So skip this part and defer
3378 * the assignment to device driver initialization time.
3379 */
3380 if (copied_tables)
3381 goto domains_done;
3382
ba395927 3383 /*
19943b0e
DW
3384 * If pass through is not set or not enabled, setup context entries for
3385 * identity mappings for rmrr, gfx, and isa and may fall back to static
3386 * identity mapping if iommu_identity_mapping is set.
ba395927 3387 */
19943b0e
DW
3388 if (iommu_identity_mapping) {
3389 ret = iommu_prepare_static_identity_mapping(hw_pass_through);
4ed0d3e6 3390 if (ret) {
9f10e5bf 3391 pr_crit("Failed to setup IOMMU pass-through\n");
989d51fc 3392 goto free_iommu;
ba395927
KA
3393 }
3394 }
ba395927 3395 /*
19943b0e
DW
3396 * For each rmrr
3397 * for each dev attached to rmrr
3398 * do
3399 * locate drhd for dev, alloc domain for dev
3400 * allocate free domain
3401 * allocate page table entries for rmrr
3402 * if context not allocated for bus
3403 * allocate and init context
3404 * set present in root table for this bus
3405 * init context with domain, translation etc
3406 * endfor
3407 * endfor
ba395927 3408 */
9f10e5bf 3409 pr_info("Setting RMRR:\n");
19943b0e 3410 for_each_rmrr_units(rmrr) {
b683b230
JL
3411 /* some BIOS lists non-exist devices in DMAR table. */
3412 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
832bd858 3413 i, dev) {
0b9d9753 3414 ret = iommu_prepare_rmrr_dev(rmrr, dev);
19943b0e 3415 if (ret)
9f10e5bf 3416 pr_err("Mapping reserved region failed\n");
ba395927 3417 }
4ed0d3e6 3418 }
49a0429e 3419
19943b0e
DW
3420 iommu_prepare_isa();
3421
a87f4918
JR
3422domains_done:
3423
ba395927
KA
3424 /*
3425 * for each drhd
3426 * enable fault log
3427 * global invalidate context cache
3428 * global invalidate iotlb
3429 * enable translation
3430 */
7c919779 3431 for_each_iommu(iommu, drhd) {
51a63e67
JC
3432 if (drhd->ignored) {
3433 /*
3434 * we always have to disable PMRs or DMA may fail on
3435 * this device
3436 */
3437 if (force_on)
7c919779 3438 iommu_disable_protect_mem_regions(iommu);
ba395927 3439 continue;
51a63e67 3440 }
ba395927
KA
3441
3442 iommu_flush_write_buffer(iommu);
3443
a222a7f0
DW
3444#ifdef CONFIG_INTEL_IOMMU_SVM
3445 if (pasid_enabled(iommu) && ecap_prs(iommu->ecap)) {
3446 ret = intel_svm_enable_prq(iommu);
3447 if (ret)
3448 goto free_iommu;
3449 }
3450#endif
3460a6d9
KA
3451 ret = dmar_set_interrupt(iommu);
3452 if (ret)
989d51fc 3453 goto free_iommu;
3460a6d9 3454
8939ddf6
JR
3455 if (!translation_pre_enabled(iommu))
3456 iommu_enable_translation(iommu);
3457
b94996c9 3458 iommu_disable_protect_mem_regions(iommu);
ba395927
KA
3459 }
3460
3461 return 0;
989d51fc
JL
3462
3463free_iommu:
ffebeb46
JL
3464 for_each_active_iommu(iommu, drhd) {
3465 disable_dmar_iommu(iommu);
a868e6b7 3466 free_dmar_iommu(iommu);
ffebeb46 3467 }
13cf0174 3468
d9630fe9 3469 kfree(g_iommus);
13cf0174 3470
989d51fc 3471error:
ba395927
KA
3472 return ret;
3473}
3474
5a5e02a6 3475/* This takes a number of _MM_ pages, not VTD pages */
2aac6304 3476static unsigned long intel_alloc_iova(struct device *dev,
875764de
DW
3477 struct dmar_domain *domain,
3478 unsigned long nrpages, uint64_t dma_mask)
ba395927 3479{
22e2f9fa 3480 unsigned long iova_pfn = 0;
ba395927 3481
875764de
DW
3482 /* Restrict dma_mask to the width that the iommu can handle */
3483 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
8f6429c7
RM
3484 /* Ensure we reserve the whole size-aligned region */
3485 nrpages = __roundup_pow_of_two(nrpages);
875764de
DW
3486
3487 if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
ba395927
KA
3488 /*
3489 * First try to allocate an io virtual address in
284901a9 3490 * DMA_BIT_MASK(32) and if that fails then try allocating
3609801e 3491 * from higher range
ba395927 3492 */
22e2f9fa 3493 iova_pfn = alloc_iova_fast(&domain->iovad, nrpages,
538d5b33 3494 IOVA_PFN(DMA_BIT_MASK(32)), false);
22e2f9fa
OP
3495 if (iova_pfn)
3496 return iova_pfn;
875764de 3497 }
538d5b33
TN
3498 iova_pfn = alloc_iova_fast(&domain->iovad, nrpages,
3499 IOVA_PFN(dma_mask), true);
22e2f9fa 3500 if (unlikely(!iova_pfn)) {
9f10e5bf 3501 pr_err("Allocating %ld-page iova for %s failed",
207e3592 3502 nrpages, dev_name(dev));
2aac6304 3503 return 0;
f76aec76
KA
3504 }
3505
22e2f9fa 3506 return iova_pfn;
f76aec76
KA
3507}
3508
9ddbfb42 3509struct dmar_domain *get_valid_domain_for_dev(struct device *dev)
f76aec76 3510{
1c5ebba9 3511 struct dmar_domain *domain, *tmp;
b1ce5b79 3512 struct dmar_rmrr_unit *rmrr;
b1ce5b79
JR
3513 struct device *i_dev;
3514 int i, ret;
f76aec76 3515
1c5ebba9
JR
3516 domain = find_domain(dev);
3517 if (domain)
3518 goto out;
3519
3520 domain = find_or_alloc_domain(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
3521 if (!domain)
3522 goto out;
ba395927 3523
b1ce5b79
JR
3524 /* We have a new domain - setup possible RMRRs for the device */
3525 rcu_read_lock();
3526 for_each_rmrr_units(rmrr) {
3527 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
3528 i, i_dev) {
3529 if (i_dev != dev)
3530 continue;
3531
3532 ret = domain_prepare_identity_map(dev, domain,
3533 rmrr->base_address,
3534 rmrr->end_address);
3535 if (ret)
3536 dev_err(dev, "Mapping reserved region failed\n");
3537 }
3538 }
3539 rcu_read_unlock();
3540
1c5ebba9
JR
3541 tmp = set_domain_for_dev(dev, domain);
3542 if (!tmp || domain != tmp) {
3543 domain_exit(domain);
3544 domain = tmp;
3545 }
3546
3547out:
3548
3549 if (!domain)
3550 pr_err("Allocating domain for %s failed\n", dev_name(dev));
3551
3552
f76aec76
KA
3553 return domain;
3554}
3555
ecb509ec 3556/* Check if the dev needs to go through non-identity map and unmap process.*/
73676832 3557static int iommu_no_mapping(struct device *dev)
2c2e2c38
FY
3558{
3559 int found;
3560
3d89194a 3561 if (iommu_dummy(dev))
1e4c64c4
DW
3562 return 1;
3563
2c2e2c38 3564 if (!iommu_identity_mapping)
1e4c64c4 3565 return 0;
2c2e2c38 3566
9b226624 3567 found = identity_mapping(dev);
2c2e2c38 3568 if (found) {
ecb509ec 3569 if (iommu_should_identity_map(dev, 0))
2c2e2c38
FY
3570 return 1;
3571 else {
3572 /*
3573 * 32 bit DMA is removed from si_domain and fall back
3574 * to non-identity mapping.
3575 */
e6de0f8d 3576 dmar_remove_one_dev_info(si_domain, dev);
9f10e5bf
JR
3577 pr_info("32bit %s uses non-identity mapping\n",
3578 dev_name(dev));
2c2e2c38
FY
3579 return 0;
3580 }
3581 } else {
3582 /*
3583 * In case of a detached 64 bit DMA device from vm, the device
3584 * is put into si_domain for identity mapping.
3585 */
ecb509ec 3586 if (iommu_should_identity_map(dev, 0)) {
2c2e2c38 3587 int ret;
28ccce0d 3588 ret = domain_add_dev_info(si_domain, dev);
2c2e2c38 3589 if (!ret) {
9f10e5bf
JR
3590 pr_info("64bit %s uses identity mapping\n",
3591 dev_name(dev));
2c2e2c38
FY
3592 return 1;
3593 }
3594 }
3595 }
3596
1e4c64c4 3597 return 0;
2c2e2c38
FY
3598}
3599
5040a918 3600static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
bb9e6d65 3601 size_t size, int dir, u64 dma_mask)
f76aec76 3602{
f76aec76 3603 struct dmar_domain *domain;
5b6985ce 3604 phys_addr_t start_paddr;
2aac6304 3605 unsigned long iova_pfn;
f76aec76 3606 int prot = 0;
6865f0d1 3607 int ret;
8c11e798 3608 struct intel_iommu *iommu;
33041ec0 3609 unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
f76aec76
KA
3610
3611 BUG_ON(dir == DMA_NONE);
2c2e2c38 3612
5040a918 3613 if (iommu_no_mapping(dev))
6865f0d1 3614 return paddr;
f76aec76 3615
5040a918 3616 domain = get_valid_domain_for_dev(dev);
f76aec76
KA
3617 if (!domain)
3618 return 0;
3619
8c11e798 3620 iommu = domain_get_iommu(domain);
88cb6a74 3621 size = aligned_nrpages(paddr, size);
f76aec76 3622
2aac6304
OP
3623 iova_pfn = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), dma_mask);
3624 if (!iova_pfn)
f76aec76
KA
3625 goto error;
3626
ba395927
KA
3627 /*
3628 * Check if DMAR supports zero-length reads on write only
3629 * mappings..
3630 */
3631 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 3632 !cap_zlr(iommu->cap))
ba395927
KA
3633 prot |= DMA_PTE_READ;
3634 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3635 prot |= DMA_PTE_WRITE;
3636 /*
6865f0d1 3637 * paddr - (paddr + size) might be partial page, we should map the whole
ba395927 3638 * page. Note: if two part of one page are separately mapped, we
6865f0d1 3639 * might have two guest_addr mapping to the same host paddr, but this
ba395927
KA
3640 * is not a big problem
3641 */
2aac6304 3642 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova_pfn),
33041ec0 3643 mm_to_dma_pfn(paddr_pfn), size, prot);
ba395927
KA
3644 if (ret)
3645 goto error;
3646
2aac6304 3647 start_paddr = (phys_addr_t)iova_pfn << PAGE_SHIFT;
03d6a246
DW
3648 start_paddr += paddr & ~PAGE_MASK;
3649 return start_paddr;
ba395927 3650
ba395927 3651error:
2aac6304 3652 if (iova_pfn)
22e2f9fa 3653 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(size));
9f10e5bf 3654 pr_err("Device %s request: %zx@%llx dir %d --- failed\n",
5040a918 3655 dev_name(dev), size, (unsigned long long)paddr, dir);
ba395927
KA
3656 return 0;
3657}
3658
ffbbef5c
FT
3659static dma_addr_t intel_map_page(struct device *dev, struct page *page,
3660 unsigned long offset, size_t size,
3661 enum dma_data_direction dir,
00085f1e 3662 unsigned long attrs)
bb9e6d65 3663{
ffbbef5c 3664 return __intel_map_single(dev, page_to_phys(page) + offset, size,
46333e37 3665 dir, *dev->dma_mask);
bb9e6d65
FT
3666}
3667
769530e4 3668static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size)
ba395927 3669{
f76aec76 3670 struct dmar_domain *domain;
d794dc9b 3671 unsigned long start_pfn, last_pfn;
769530e4 3672 unsigned long nrpages;
2aac6304 3673 unsigned long iova_pfn;
8c11e798 3674 struct intel_iommu *iommu;
ea8ea460 3675 struct page *freelist;
ba395927 3676
73676832 3677 if (iommu_no_mapping(dev))
f76aec76 3678 return;
2c2e2c38 3679
1525a29a 3680 domain = find_domain(dev);
ba395927
KA
3681 BUG_ON(!domain);
3682
8c11e798
WH
3683 iommu = domain_get_iommu(domain);
3684
2aac6304 3685 iova_pfn = IOVA_PFN(dev_addr);
ba395927 3686
769530e4 3687 nrpages = aligned_nrpages(dev_addr, size);
2aac6304 3688 start_pfn = mm_to_dma_pfn(iova_pfn);
769530e4 3689 last_pfn = start_pfn + nrpages - 1;
ba395927 3690
d794dc9b 3691 pr_debug("Device %s unmapping: pfn %lx-%lx\n",
207e3592 3692 dev_name(dev), start_pfn, last_pfn);
ba395927 3693
ea8ea460 3694 freelist = domain_unmap(domain, start_pfn, last_pfn);
d794dc9b 3695
5e0d2a6f 3696 if (intel_iommu_strict) {
a1ddcbe9 3697 iommu_flush_iotlb_psi(iommu, domain, start_pfn,
769530e4 3698 nrpages, !freelist, 0);
5e0d2a6f 3699 /* free iova */
22e2f9fa 3700 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(nrpages));
ea8ea460 3701 dma_free_pagelist(freelist);
5e0d2a6f 3702 } else {
13cf0174
JR
3703 queue_iova(&domain->iovad, iova_pfn, nrpages,
3704 (unsigned long)freelist);
5e0d2a6f 3705 /*
3706 * queue up the release of the unmap to save the 1/6th of the
3707 * cpu used up by the iotlb flush operation...
3708 */
5e0d2a6f 3709 }
ba395927
KA
3710}
3711
d41a4adb
JL
3712static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
3713 size_t size, enum dma_data_direction dir,
00085f1e 3714 unsigned long attrs)
d41a4adb 3715{
769530e4 3716 intel_unmap(dev, dev_addr, size);
d41a4adb
JL
3717}
3718
5040a918 3719static void *intel_alloc_coherent(struct device *dev, size_t size,
baa676fc 3720 dma_addr_t *dma_handle, gfp_t flags,
00085f1e 3721 unsigned long attrs)
ba395927 3722{
7ec916f8
CH
3723 struct page *page = NULL;
3724 int order;
ba395927 3725
7ec916f8
CH
3726 size = PAGE_ALIGN(size);
3727 order = get_order(size);
36746436 3728
7ec916f8
CH
3729 if (!iommu_no_mapping(dev))
3730 flags &= ~(GFP_DMA | GFP_DMA32);
3731 else if (dev->coherent_dma_mask < dma_get_required_mask(dev)) {
3732 if (dev->coherent_dma_mask < DMA_BIT_MASK(32))
3733 flags |= GFP_DMA;
3734 else
3735 flags |= GFP_DMA32;
3736 }
3737
3738 if (gfpflags_allow_blocking(flags)) {
3739 unsigned int count = size >> PAGE_SHIFT;
3740
d834c5ab
MS
3741 page = dma_alloc_from_contiguous(dev, count, order,
3742 flags & __GFP_NOWARN);
7ec916f8
CH
3743 if (page && iommu_no_mapping(dev) &&
3744 page_to_phys(page) + size > dev->coherent_dma_mask) {
3745 dma_release_from_contiguous(dev, page, count);
3746 page = NULL;
3747 }
3748 }
3749
3750 if (!page)
3751 page = alloc_pages(flags, order);
3752 if (!page)
3753 return NULL;
3754 memset(page_address(page), 0, size);
3755
3756 *dma_handle = __intel_map_single(dev, page_to_phys(page), size,
3757 DMA_BIDIRECTIONAL,
3758 dev->coherent_dma_mask);
3759 if (*dma_handle)
3760 return page_address(page);
3761 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3762 __free_pages(page, order);
36746436 3763
ba395927
KA
3764 return NULL;
3765}
3766
5040a918 3767static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
00085f1e 3768 dma_addr_t dma_handle, unsigned long attrs)
ba395927 3769{
7ec916f8
CH
3770 int order;
3771 struct page *page = virt_to_page(vaddr);
3772
3773 size = PAGE_ALIGN(size);
3774 order = get_order(size);
3775
3776 intel_unmap(dev, dma_handle, size);
3777 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3778 __free_pages(page, order);
ba395927
KA
3779}
3780
5040a918 3781static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
d7ab5c46 3782 int nelems, enum dma_data_direction dir,
00085f1e 3783 unsigned long attrs)
ba395927 3784{
769530e4
OP
3785 dma_addr_t startaddr = sg_dma_address(sglist) & PAGE_MASK;
3786 unsigned long nrpages = 0;
3787 struct scatterlist *sg;
3788 int i;
3789
3790 for_each_sg(sglist, sg, nelems, i) {
3791 nrpages += aligned_nrpages(sg_dma_address(sg), sg_dma_len(sg));
3792 }
3793
3794 intel_unmap(dev, startaddr, nrpages << VTD_PAGE_SHIFT);
ba395927
KA
3795}
3796
ba395927 3797static int intel_nontranslate_map_sg(struct device *hddev,
c03ab37c 3798 struct scatterlist *sglist, int nelems, int dir)
ba395927
KA
3799{
3800 int i;
c03ab37c 3801 struct scatterlist *sg;
ba395927 3802
c03ab37c 3803 for_each_sg(sglist, sg, nelems, i) {
12d4d40e 3804 BUG_ON(!sg_page(sg));
29a90b70 3805 sg->dma_address = sg_phys(sg);
c03ab37c 3806 sg->dma_length = sg->length;
ba395927
KA
3807 }
3808 return nelems;
3809}
3810
5040a918 3811static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
00085f1e 3812 enum dma_data_direction dir, unsigned long attrs)
ba395927 3813{
ba395927 3814 int i;
ba395927 3815 struct dmar_domain *domain;
f76aec76
KA
3816 size_t size = 0;
3817 int prot = 0;
2aac6304 3818 unsigned long iova_pfn;
f76aec76 3819 int ret;
c03ab37c 3820 struct scatterlist *sg;
b536d24d 3821 unsigned long start_vpfn;
8c11e798 3822 struct intel_iommu *iommu;
ba395927
KA
3823
3824 BUG_ON(dir == DMA_NONE);
5040a918
DW
3825 if (iommu_no_mapping(dev))
3826 return intel_nontranslate_map_sg(dev, sglist, nelems, dir);
ba395927 3827
5040a918 3828 domain = get_valid_domain_for_dev(dev);
f76aec76
KA
3829 if (!domain)
3830 return 0;
3831
8c11e798
WH
3832 iommu = domain_get_iommu(domain);
3833
b536d24d 3834 for_each_sg(sglist, sg, nelems, i)
88cb6a74 3835 size += aligned_nrpages(sg->offset, sg->length);
f76aec76 3836
2aac6304 3837 iova_pfn = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size),
5040a918 3838 *dev->dma_mask);
2aac6304 3839 if (!iova_pfn) {
c03ab37c 3840 sglist->dma_length = 0;
f76aec76
KA
3841 return 0;
3842 }
3843
3844 /*
3845 * Check if DMAR supports zero-length reads on write only
3846 * mappings..
3847 */
3848 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 3849 !cap_zlr(iommu->cap))
f76aec76
KA
3850 prot |= DMA_PTE_READ;
3851 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3852 prot |= DMA_PTE_WRITE;
3853
2aac6304 3854 start_vpfn = mm_to_dma_pfn(iova_pfn);
e1605495 3855
f532959b 3856 ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
e1605495 3857 if (unlikely(ret)) {
e1605495 3858 dma_pte_free_pagetable(domain, start_vpfn,
bc24c571
DD
3859 start_vpfn + size - 1,
3860 agaw_to_level(domain->agaw) + 1);
22e2f9fa 3861 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(size));
e1605495 3862 return 0;
ba395927
KA
3863 }
3864
ba395927
KA
3865 return nelems;
3866}
3867
dfb805e8
FT
3868static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
3869{
3870 return !dma_addr;
3871}
3872
02b4da5f 3873static const struct dma_map_ops intel_dma_ops = {
baa676fc
AP
3874 .alloc = intel_alloc_coherent,
3875 .free = intel_free_coherent,
ba395927
KA
3876 .map_sg = intel_map_sg,
3877 .unmap_sg = intel_unmap_sg,
ffbbef5c
FT
3878 .map_page = intel_map_page,
3879 .unmap_page = intel_unmap_page,
dfb805e8 3880 .mapping_error = intel_mapping_error,
fec777c3 3881 .dma_supported = dma_direct_supported,
ba395927
KA
3882};
3883
3884static inline int iommu_domain_cache_init(void)
3885{
3886 int ret = 0;
3887
3888 iommu_domain_cache = kmem_cache_create("iommu_domain",
3889 sizeof(struct dmar_domain),
3890 0,
3891 SLAB_HWCACHE_ALIGN,
3892
3893 NULL);
3894 if (!iommu_domain_cache) {
9f10e5bf 3895 pr_err("Couldn't create iommu_domain cache\n");
ba395927
KA
3896 ret = -ENOMEM;
3897 }
3898
3899 return ret;
3900}
3901
3902static inline int iommu_devinfo_cache_init(void)
3903{
3904 int ret = 0;
3905
3906 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3907 sizeof(struct device_domain_info),
3908 0,
3909 SLAB_HWCACHE_ALIGN,
ba395927
KA
3910 NULL);
3911 if (!iommu_devinfo_cache) {
9f10e5bf 3912 pr_err("Couldn't create devinfo cache\n");
ba395927
KA
3913 ret = -ENOMEM;
3914 }
3915
3916 return ret;
3917}
3918
ba395927
KA
3919static int __init iommu_init_mempool(void)
3920{
3921 int ret;
ae1ff3d6 3922 ret = iova_cache_get();
ba395927
KA
3923 if (ret)
3924 return ret;
3925
3926 ret = iommu_domain_cache_init();
3927 if (ret)
3928 goto domain_error;
3929
3930 ret = iommu_devinfo_cache_init();
3931 if (!ret)
3932 return ret;
3933
3934 kmem_cache_destroy(iommu_domain_cache);
3935domain_error:
ae1ff3d6 3936 iova_cache_put();
ba395927
KA
3937
3938 return -ENOMEM;
3939}
3940
3941static void __init iommu_exit_mempool(void)
3942{
3943 kmem_cache_destroy(iommu_devinfo_cache);
3944 kmem_cache_destroy(iommu_domain_cache);
ae1ff3d6 3945 iova_cache_put();
ba395927
KA
3946}
3947
556ab45f
DW
3948static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
3949{
3950 struct dmar_drhd_unit *drhd;
3951 u32 vtbar;
3952 int rc;
3953
3954 /* We know that this device on this chipset has its own IOMMU.
3955 * If we find it under a different IOMMU, then the BIOS is lying
3956 * to us. Hope that the IOMMU for this device is actually
3957 * disabled, and it needs no translation...
3958 */
3959 rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
3960 if (rc) {
3961 /* "can't" happen */
3962 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
3963 return;
3964 }
3965 vtbar &= 0xffff0000;
3966
3967 /* we know that the this iommu should be at offset 0xa000 from vtbar */
3968 drhd = dmar_find_matched_drhd_unit(pdev);
3969 if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
3970 TAINT_FIRMWARE_WORKAROUND,
3971 "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
3972 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3973}
3974DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
3975
ba395927
KA
3976static void __init init_no_remapping_devices(void)
3977{
3978 struct dmar_drhd_unit *drhd;
832bd858 3979 struct device *dev;
b683b230 3980 int i;
ba395927
KA
3981
3982 for_each_drhd_unit(drhd) {
3983 if (!drhd->include_all) {
b683b230
JL
3984 for_each_active_dev_scope(drhd->devices,
3985 drhd->devices_cnt, i, dev)
3986 break;
832bd858 3987 /* ignore DMAR unit if no devices exist */
ba395927
KA
3988 if (i == drhd->devices_cnt)
3989 drhd->ignored = 1;
3990 }
3991 }
3992
7c919779 3993 for_each_active_drhd_unit(drhd) {
7c919779 3994 if (drhd->include_all)
ba395927
KA
3995 continue;
3996
b683b230
JL
3997 for_each_active_dev_scope(drhd->devices,
3998 drhd->devices_cnt, i, dev)
832bd858 3999 if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
ba395927 4000 break;
ba395927
KA
4001 if (i < drhd->devices_cnt)
4002 continue;
4003
c0771df8
DW
4004 /* This IOMMU has *only* gfx devices. Either bypass it or
4005 set the gfx_mapped flag, as appropriate */
4006 if (dmar_map_gfx) {
4007 intel_iommu_gfx_mapped = 1;
4008 } else {
4009 drhd->ignored = 1;
b683b230
JL
4010 for_each_active_dev_scope(drhd->devices,
4011 drhd->devices_cnt, i, dev)
832bd858 4012 dev->archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
ba395927
KA
4013 }
4014 }
4015}
4016
f59c7b69
FY
4017#ifdef CONFIG_SUSPEND
4018static int init_iommu_hw(void)
4019{
4020 struct dmar_drhd_unit *drhd;
4021 struct intel_iommu *iommu = NULL;
4022
4023 for_each_active_iommu(iommu, drhd)
4024 if (iommu->qi)
4025 dmar_reenable_qi(iommu);
4026
b779260b
JC
4027 for_each_iommu(iommu, drhd) {
4028 if (drhd->ignored) {
4029 /*
4030 * we always have to disable PMRs or DMA may fail on
4031 * this device
4032 */
4033 if (force_on)
4034 iommu_disable_protect_mem_regions(iommu);
4035 continue;
4036 }
4037
f59c7b69
FY
4038 iommu_flush_write_buffer(iommu);
4039
4040 iommu_set_root_entry(iommu);
4041
4042 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 4043 DMA_CCMD_GLOBAL_INVL);
2a41ccee
JL
4044 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
4045 iommu_enable_translation(iommu);
b94996c9 4046 iommu_disable_protect_mem_regions(iommu);
f59c7b69
FY
4047 }
4048
4049 return 0;
4050}
4051
4052static void iommu_flush_all(void)
4053{
4054 struct dmar_drhd_unit *drhd;
4055 struct intel_iommu *iommu;
4056
4057 for_each_active_iommu(iommu, drhd) {
4058 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 4059 DMA_CCMD_GLOBAL_INVL);
f59c7b69 4060 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1f0ef2aa 4061 DMA_TLB_GLOBAL_FLUSH);
f59c7b69
FY
4062 }
4063}
4064
134fac3f 4065static int iommu_suspend(void)
f59c7b69
FY
4066{
4067 struct dmar_drhd_unit *drhd;
4068 struct intel_iommu *iommu = NULL;
4069 unsigned long flag;
4070
4071 for_each_active_iommu(iommu, drhd) {
6396bb22 4072 iommu->iommu_state = kcalloc(MAX_SR_DMAR_REGS, sizeof(u32),
f59c7b69
FY
4073 GFP_ATOMIC);
4074 if (!iommu->iommu_state)
4075 goto nomem;
4076 }
4077
4078 iommu_flush_all();
4079
4080 for_each_active_iommu(iommu, drhd) {
4081 iommu_disable_translation(iommu);
4082
1f5b3c3f 4083 raw_spin_lock_irqsave(&iommu->register_lock, flag);
f59c7b69
FY
4084
4085 iommu->iommu_state[SR_DMAR_FECTL_REG] =
4086 readl(iommu->reg + DMAR_FECTL_REG);
4087 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
4088 readl(iommu->reg + DMAR_FEDATA_REG);
4089 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
4090 readl(iommu->reg + DMAR_FEADDR_REG);
4091 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
4092 readl(iommu->reg + DMAR_FEUADDR_REG);
4093
1f5b3c3f 4094 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
f59c7b69
FY
4095 }
4096 return 0;
4097
4098nomem:
4099 for_each_active_iommu(iommu, drhd)
4100 kfree(iommu->iommu_state);
4101
4102 return -ENOMEM;
4103}
4104
134fac3f 4105static void iommu_resume(void)
f59c7b69
FY
4106{
4107 struct dmar_drhd_unit *drhd;
4108 struct intel_iommu *iommu = NULL;
4109 unsigned long flag;
4110
4111 if (init_iommu_hw()) {
b779260b
JC
4112 if (force_on)
4113 panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
4114 else
4115 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
134fac3f 4116 return;
f59c7b69
FY
4117 }
4118
4119 for_each_active_iommu(iommu, drhd) {
4120
1f5b3c3f 4121 raw_spin_lock_irqsave(&iommu->register_lock, flag);
f59c7b69
FY
4122
4123 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
4124 iommu->reg + DMAR_FECTL_REG);
4125 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
4126 iommu->reg + DMAR_FEDATA_REG);
4127 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
4128 iommu->reg + DMAR_FEADDR_REG);
4129 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
4130 iommu->reg + DMAR_FEUADDR_REG);
4131
1f5b3c3f 4132 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
f59c7b69
FY
4133 }
4134
4135 for_each_active_iommu(iommu, drhd)
4136 kfree(iommu->iommu_state);
f59c7b69
FY
4137}
4138
134fac3f 4139static struct syscore_ops iommu_syscore_ops = {
f59c7b69
FY
4140 .resume = iommu_resume,
4141 .suspend = iommu_suspend,
4142};
4143
134fac3f 4144static void __init init_iommu_pm_ops(void)
f59c7b69 4145{
134fac3f 4146 register_syscore_ops(&iommu_syscore_ops);
f59c7b69
FY
4147}
4148
4149#else
99592ba4 4150static inline void init_iommu_pm_ops(void) {}
f59c7b69
FY
4151#endif /* CONFIG_PM */
4152
318fe7df 4153
c2a0b538 4154int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg)
318fe7df
SS
4155{
4156 struct acpi_dmar_reserved_memory *rmrr;
0659b8dc 4157 int prot = DMA_PTE_READ|DMA_PTE_WRITE;
318fe7df 4158 struct dmar_rmrr_unit *rmrru;
0659b8dc 4159 size_t length;
318fe7df
SS
4160
4161 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
4162 if (!rmrru)
0659b8dc 4163 goto out;
318fe7df
SS
4164
4165 rmrru->hdr = header;
4166 rmrr = (struct acpi_dmar_reserved_memory *)header;
4167 rmrru->base_address = rmrr->base_address;
4168 rmrru->end_address = rmrr->end_address;
0659b8dc
EA
4169
4170 length = rmrr->end_address - rmrr->base_address + 1;
4171 rmrru->resv = iommu_alloc_resv_region(rmrr->base_address, length, prot,
4172 IOMMU_RESV_DIRECT);
4173 if (!rmrru->resv)
4174 goto free_rmrru;
4175
2e455289
JL
4176 rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
4177 ((void *)rmrr) + rmrr->header.length,
4178 &rmrru->devices_cnt);
0659b8dc
EA
4179 if (rmrru->devices_cnt && rmrru->devices == NULL)
4180 goto free_all;
318fe7df 4181
2e455289 4182 list_add(&rmrru->list, &dmar_rmrr_units);
318fe7df 4183
2e455289 4184 return 0;
0659b8dc
EA
4185free_all:
4186 kfree(rmrru->resv);
4187free_rmrru:
4188 kfree(rmrru);
4189out:
4190 return -ENOMEM;
318fe7df
SS
4191}
4192
6b197249
JL
4193static struct dmar_atsr_unit *dmar_find_atsr(struct acpi_dmar_atsr *atsr)
4194{
4195 struct dmar_atsr_unit *atsru;
4196 struct acpi_dmar_atsr *tmp;
4197
4198 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4199 tmp = (struct acpi_dmar_atsr *)atsru->hdr;
4200 if (atsr->segment != tmp->segment)
4201 continue;
4202 if (atsr->header.length != tmp->header.length)
4203 continue;
4204 if (memcmp(atsr, tmp, atsr->header.length) == 0)
4205 return atsru;
4206 }
4207
4208 return NULL;
4209}
4210
4211int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg)
318fe7df
SS
4212{
4213 struct acpi_dmar_atsr *atsr;
4214 struct dmar_atsr_unit *atsru;
4215
b608fe35 4216 if (system_state >= SYSTEM_RUNNING && !intel_iommu_enabled)
6b197249
JL
4217 return 0;
4218
318fe7df 4219 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
6b197249
JL
4220 atsru = dmar_find_atsr(atsr);
4221 if (atsru)
4222 return 0;
4223
4224 atsru = kzalloc(sizeof(*atsru) + hdr->length, GFP_KERNEL);
318fe7df
SS
4225 if (!atsru)
4226 return -ENOMEM;
4227
6b197249
JL
4228 /*
4229 * If memory is allocated from slab by ACPI _DSM method, we need to
4230 * copy the memory content because the memory buffer will be freed
4231 * on return.
4232 */
4233 atsru->hdr = (void *)(atsru + 1);
4234 memcpy(atsru->hdr, hdr, hdr->length);
318fe7df 4235 atsru->include_all = atsr->flags & 0x1;
2e455289
JL
4236 if (!atsru->include_all) {
4237 atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
4238 (void *)atsr + atsr->header.length,
4239 &atsru->devices_cnt);
4240 if (atsru->devices_cnt && atsru->devices == NULL) {
4241 kfree(atsru);
4242 return -ENOMEM;
4243 }
4244 }
318fe7df 4245
0e242612 4246 list_add_rcu(&atsru->list, &dmar_atsr_units);
318fe7df
SS
4247
4248 return 0;
4249}
4250
9bdc531e
JL
4251static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
4252{
4253 dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
4254 kfree(atsru);
4255}
4256
6b197249
JL
4257int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4258{
4259 struct acpi_dmar_atsr *atsr;
4260 struct dmar_atsr_unit *atsru;
4261
4262 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4263 atsru = dmar_find_atsr(atsr);
4264 if (atsru) {
4265 list_del_rcu(&atsru->list);
4266 synchronize_rcu();
4267 intel_iommu_free_atsr(atsru);
4268 }
4269
4270 return 0;
4271}
4272
4273int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4274{
4275 int i;
4276 struct device *dev;
4277 struct acpi_dmar_atsr *atsr;
4278 struct dmar_atsr_unit *atsru;
4279
4280 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4281 atsru = dmar_find_atsr(atsr);
4282 if (!atsru)
4283 return 0;
4284
194dc870 4285 if (!atsru->include_all && atsru->devices && atsru->devices_cnt) {
6b197249
JL
4286 for_each_active_dev_scope(atsru->devices, atsru->devices_cnt,
4287 i, dev)
4288 return -EBUSY;
194dc870 4289 }
6b197249
JL
4290
4291 return 0;
4292}
4293
ffebeb46
JL
4294static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
4295{
4296 int sp, ret = 0;
4297 struct intel_iommu *iommu = dmaru->iommu;
4298
4299 if (g_iommus[iommu->seq_id])
4300 return 0;
4301
4302 if (hw_pass_through && !ecap_pass_through(iommu->ecap)) {
9f10e5bf 4303 pr_warn("%s: Doesn't support hardware pass through.\n",
ffebeb46
JL
4304 iommu->name);
4305 return -ENXIO;
4306 }
4307 if (!ecap_sc_support(iommu->ecap) &&
4308 domain_update_iommu_snooping(iommu)) {
9f10e5bf 4309 pr_warn("%s: Doesn't support snooping.\n",
ffebeb46
JL
4310 iommu->name);
4311 return -ENXIO;
4312 }
4313 sp = domain_update_iommu_superpage(iommu) - 1;
4314 if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) {
9f10e5bf 4315 pr_warn("%s: Doesn't support large page.\n",
ffebeb46
JL
4316 iommu->name);
4317 return -ENXIO;
4318 }
4319
4320 /*
4321 * Disable translation if already enabled prior to OS handover.
4322 */
4323 if (iommu->gcmd & DMA_GCMD_TE)
4324 iommu_disable_translation(iommu);
4325
4326 g_iommus[iommu->seq_id] = iommu;
4327 ret = iommu_init_domains(iommu);
4328 if (ret == 0)
4329 ret = iommu_alloc_root_entry(iommu);
4330 if (ret)
4331 goto out;
4332
8a94ade4
DW
4333#ifdef CONFIG_INTEL_IOMMU_SVM
4334 if (pasid_enabled(iommu))
d9737953 4335 intel_svm_init(iommu);
8a94ade4
DW
4336#endif
4337
ffebeb46
JL
4338 if (dmaru->ignored) {
4339 /*
4340 * we always have to disable PMRs or DMA may fail on this device
4341 */
4342 if (force_on)
4343 iommu_disable_protect_mem_regions(iommu);
4344 return 0;
4345 }
4346
4347 intel_iommu_init_qi(iommu);
4348 iommu_flush_write_buffer(iommu);
a222a7f0
DW
4349
4350#ifdef CONFIG_INTEL_IOMMU_SVM
4351 if (pasid_enabled(iommu) && ecap_prs(iommu->ecap)) {
4352 ret = intel_svm_enable_prq(iommu);
4353 if (ret)
4354 goto disable_iommu;
4355 }
4356#endif
ffebeb46
JL
4357 ret = dmar_set_interrupt(iommu);
4358 if (ret)
4359 goto disable_iommu;
4360
4361 iommu_set_root_entry(iommu);
4362 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
4363 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
4364 iommu_enable_translation(iommu);
4365
ffebeb46
JL
4366 iommu_disable_protect_mem_regions(iommu);
4367 return 0;
4368
4369disable_iommu:
4370 disable_dmar_iommu(iommu);
4371out:
4372 free_dmar_iommu(iommu);
4373 return ret;
4374}
4375
6b197249
JL
4376int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
4377{
ffebeb46
JL
4378 int ret = 0;
4379 struct intel_iommu *iommu = dmaru->iommu;
4380
4381 if (!intel_iommu_enabled)
4382 return 0;
4383 if (iommu == NULL)
4384 return -EINVAL;
4385
4386 if (insert) {
4387 ret = intel_iommu_add(dmaru);
4388 } else {
4389 disable_dmar_iommu(iommu);
4390 free_dmar_iommu(iommu);
4391 }
4392
4393 return ret;
6b197249
JL
4394}
4395
9bdc531e
JL
4396static void intel_iommu_free_dmars(void)
4397{
4398 struct dmar_rmrr_unit *rmrru, *rmrr_n;
4399 struct dmar_atsr_unit *atsru, *atsr_n;
4400
4401 list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
4402 list_del(&rmrru->list);
4403 dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
0659b8dc 4404 kfree(rmrru->resv);
9bdc531e 4405 kfree(rmrru);
318fe7df
SS
4406 }
4407
9bdc531e
JL
4408 list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
4409 list_del(&atsru->list);
4410 intel_iommu_free_atsr(atsru);
4411 }
318fe7df
SS
4412}
4413
4414int dmar_find_matched_atsr_unit(struct pci_dev *dev)
4415{
b683b230 4416 int i, ret = 1;
318fe7df 4417 struct pci_bus *bus;
832bd858
DW
4418 struct pci_dev *bridge = NULL;
4419 struct device *tmp;
318fe7df
SS
4420 struct acpi_dmar_atsr *atsr;
4421 struct dmar_atsr_unit *atsru;
4422
4423 dev = pci_physfn(dev);
318fe7df 4424 for (bus = dev->bus; bus; bus = bus->parent) {
b5f82ddf 4425 bridge = bus->self;
d14053b3
DW
4426 /* If it's an integrated device, allow ATS */
4427 if (!bridge)
4428 return 1;
4429 /* Connected via non-PCIe: no ATS */
4430 if (!pci_is_pcie(bridge) ||
62f87c0e 4431 pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
318fe7df 4432 return 0;
d14053b3 4433 /* If we found the root port, look it up in the ATSR */
b5f82ddf 4434 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
318fe7df 4435 break;
318fe7df
SS
4436 }
4437
0e242612 4438 rcu_read_lock();
b5f82ddf
JL
4439 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4440 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4441 if (atsr->segment != pci_domain_nr(dev->bus))
4442 continue;
4443
b683b230 4444 for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
832bd858 4445 if (tmp == &bridge->dev)
b683b230 4446 goto out;
b5f82ddf
JL
4447
4448 if (atsru->include_all)
b683b230 4449 goto out;
b5f82ddf 4450 }
b683b230
JL
4451 ret = 0;
4452out:
0e242612 4453 rcu_read_unlock();
318fe7df 4454
b683b230 4455 return ret;
318fe7df
SS
4456}
4457
59ce0515
JL
4458int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
4459{
4460 int ret = 0;
4461 struct dmar_rmrr_unit *rmrru;
4462 struct dmar_atsr_unit *atsru;
4463 struct acpi_dmar_atsr *atsr;
4464 struct acpi_dmar_reserved_memory *rmrr;
4465
b608fe35 4466 if (!intel_iommu_enabled && system_state >= SYSTEM_RUNNING)
59ce0515
JL
4467 return 0;
4468
4469 list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
4470 rmrr = container_of(rmrru->hdr,
4471 struct acpi_dmar_reserved_memory, header);
4472 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4473 ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
4474 ((void *)rmrr) + rmrr->header.length,
4475 rmrr->segment, rmrru->devices,
4476 rmrru->devices_cnt);
27e24950 4477 if(ret < 0)
59ce0515 4478 return ret;
e6a8c9b3 4479 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
27e24950
JL
4480 dmar_remove_dev_scope(info, rmrr->segment,
4481 rmrru->devices, rmrru->devices_cnt);
59ce0515
JL
4482 }
4483 }
4484
4485 list_for_each_entry(atsru, &dmar_atsr_units, list) {
4486 if (atsru->include_all)
4487 continue;
4488
4489 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4490 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4491 ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
4492 (void *)atsr + atsr->header.length,
4493 atsr->segment, atsru->devices,
4494 atsru->devices_cnt);
4495 if (ret > 0)
4496 break;
4497 else if(ret < 0)
4498 return ret;
e6a8c9b3 4499 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
59ce0515
JL
4500 if (dmar_remove_dev_scope(info, atsr->segment,
4501 atsru->devices, atsru->devices_cnt))
4502 break;
4503 }
4504 }
4505
4506 return 0;
4507}
4508
99dcaded
FY
4509/*
4510 * Here we only respond to action of unbound device from driver.
4511 *
4512 * Added device is not attached to its DMAR domain here yet. That will happen
4513 * when mapping the device to iova.
4514 */
4515static int device_notifier(struct notifier_block *nb,
4516 unsigned long action, void *data)
4517{
4518 struct device *dev = data;
99dcaded
FY
4519 struct dmar_domain *domain;
4520
3d89194a 4521 if (iommu_dummy(dev))
44cd613c
DW
4522 return 0;
4523
1196c2fb 4524 if (action != BUS_NOTIFY_REMOVED_DEVICE)
7e7dfab7
JL
4525 return 0;
4526
1525a29a 4527 domain = find_domain(dev);
99dcaded
FY
4528 if (!domain)
4529 return 0;
4530
e6de0f8d 4531 dmar_remove_one_dev_info(domain, dev);
ab8dfe25 4532 if (!domain_type_is_vm_or_si(domain) && list_empty(&domain->devices))
7e7dfab7 4533 domain_exit(domain);
a97590e5 4534
99dcaded
FY
4535 return 0;
4536}
4537
4538static struct notifier_block device_nb = {
4539 .notifier_call = device_notifier,
4540};
4541
75f05569
JL
4542static int intel_iommu_memory_notifier(struct notifier_block *nb,
4543 unsigned long val, void *v)
4544{
4545 struct memory_notify *mhp = v;
4546 unsigned long long start, end;
4547 unsigned long start_vpfn, last_vpfn;
4548
4549 switch (val) {
4550 case MEM_GOING_ONLINE:
4551 start = mhp->start_pfn << PAGE_SHIFT;
4552 end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1;
4553 if (iommu_domain_identity_map(si_domain, start, end)) {
9f10e5bf 4554 pr_warn("Failed to build identity map for [%llx-%llx]\n",
75f05569
JL
4555 start, end);
4556 return NOTIFY_BAD;
4557 }
4558 break;
4559
4560 case MEM_OFFLINE:
4561 case MEM_CANCEL_ONLINE:
4562 start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
4563 last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1);
4564 while (start_vpfn <= last_vpfn) {
4565 struct iova *iova;
4566 struct dmar_drhd_unit *drhd;
4567 struct intel_iommu *iommu;
ea8ea460 4568 struct page *freelist;
75f05569
JL
4569
4570 iova = find_iova(&si_domain->iovad, start_vpfn);
4571 if (iova == NULL) {
9f10e5bf 4572 pr_debug("Failed get IOVA for PFN %lx\n",
75f05569
JL
4573 start_vpfn);
4574 break;
4575 }
4576
4577 iova = split_and_remove_iova(&si_domain->iovad, iova,
4578 start_vpfn, last_vpfn);
4579 if (iova == NULL) {
9f10e5bf 4580 pr_warn("Failed to split IOVA PFN [%lx-%lx]\n",
75f05569
JL
4581 start_vpfn, last_vpfn);
4582 return NOTIFY_BAD;
4583 }
4584
ea8ea460
DW
4585 freelist = domain_unmap(si_domain, iova->pfn_lo,
4586 iova->pfn_hi);
4587
75f05569
JL
4588 rcu_read_lock();
4589 for_each_active_iommu(iommu, drhd)
a1ddcbe9 4590 iommu_flush_iotlb_psi(iommu, si_domain,
a156ef99 4591 iova->pfn_lo, iova_size(iova),
ea8ea460 4592 !freelist, 0);
75f05569 4593 rcu_read_unlock();
ea8ea460 4594 dma_free_pagelist(freelist);
75f05569
JL
4595
4596 start_vpfn = iova->pfn_hi + 1;
4597 free_iova_mem(iova);
4598 }
4599 break;
4600 }
4601
4602 return NOTIFY_OK;
4603}
4604
4605static struct notifier_block intel_iommu_memory_nb = {
4606 .notifier_call = intel_iommu_memory_notifier,
4607 .priority = 0
4608};
4609
22e2f9fa
OP
4610static void free_all_cpu_cached_iovas(unsigned int cpu)
4611{
4612 int i;
4613
4614 for (i = 0; i < g_num_of_iommus; i++) {
4615 struct intel_iommu *iommu = g_iommus[i];
4616 struct dmar_domain *domain;
0caa7616 4617 int did;
22e2f9fa
OP
4618
4619 if (!iommu)
4620 continue;
4621
3bd4f911 4622 for (did = 0; did < cap_ndoms(iommu->cap); did++) {
0caa7616 4623 domain = get_iommu_domain(iommu, (u16)did);
22e2f9fa
OP
4624
4625 if (!domain)
4626 continue;
4627 free_cpu_cached_iovas(cpu, &domain->iovad);
4628 }
4629 }
4630}
4631
21647615 4632static int intel_iommu_cpu_dead(unsigned int cpu)
aa473240 4633{
21647615 4634 free_all_cpu_cached_iovas(cpu);
21647615 4635 return 0;
aa473240
OP
4636}
4637
161b28aa
JR
4638static void intel_disable_iommus(void)
4639{
4640 struct intel_iommu *iommu = NULL;
4641 struct dmar_drhd_unit *drhd;
4642
4643 for_each_iommu(iommu, drhd)
4644 iommu_disable_translation(iommu);
4645}
4646
a7fdb6e6
JR
4647static inline struct intel_iommu *dev_to_intel_iommu(struct device *dev)
4648{
2926a2aa
JR
4649 struct iommu_device *iommu_dev = dev_to_iommu_device(dev);
4650
4651 return container_of(iommu_dev, struct intel_iommu, iommu);
a7fdb6e6
JR
4652}
4653
a5459cfe
AW
4654static ssize_t intel_iommu_show_version(struct device *dev,
4655 struct device_attribute *attr,
4656 char *buf)
4657{
a7fdb6e6 4658 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
a5459cfe
AW
4659 u32 ver = readl(iommu->reg + DMAR_VER_REG);
4660 return sprintf(buf, "%d:%d\n",
4661 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
4662}
4663static DEVICE_ATTR(version, S_IRUGO, intel_iommu_show_version, NULL);
4664
4665static ssize_t intel_iommu_show_address(struct device *dev,
4666 struct device_attribute *attr,
4667 char *buf)
4668{
a7fdb6e6 4669 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
a5459cfe
AW
4670 return sprintf(buf, "%llx\n", iommu->reg_phys);
4671}
4672static DEVICE_ATTR(address, S_IRUGO, intel_iommu_show_address, NULL);
4673
4674static ssize_t intel_iommu_show_cap(struct device *dev,
4675 struct device_attribute *attr,
4676 char *buf)
4677{
a7fdb6e6 4678 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
a5459cfe
AW
4679 return sprintf(buf, "%llx\n", iommu->cap);
4680}
4681static DEVICE_ATTR(cap, S_IRUGO, intel_iommu_show_cap, NULL);
4682
4683static ssize_t intel_iommu_show_ecap(struct device *dev,
4684 struct device_attribute *attr,
4685 char *buf)
4686{
a7fdb6e6 4687 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
a5459cfe
AW
4688 return sprintf(buf, "%llx\n", iommu->ecap);
4689}
4690static DEVICE_ATTR(ecap, S_IRUGO, intel_iommu_show_ecap, NULL);
4691
2238c082
AW
4692static ssize_t intel_iommu_show_ndoms(struct device *dev,
4693 struct device_attribute *attr,
4694 char *buf)
4695{
a7fdb6e6 4696 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
2238c082
AW
4697 return sprintf(buf, "%ld\n", cap_ndoms(iommu->cap));
4698}
4699static DEVICE_ATTR(domains_supported, S_IRUGO, intel_iommu_show_ndoms, NULL);
4700
4701static ssize_t intel_iommu_show_ndoms_used(struct device *dev,
4702 struct device_attribute *attr,
4703 char *buf)
4704{
a7fdb6e6 4705 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
2238c082
AW
4706 return sprintf(buf, "%d\n", bitmap_weight(iommu->domain_ids,
4707 cap_ndoms(iommu->cap)));
4708}
4709static DEVICE_ATTR(domains_used, S_IRUGO, intel_iommu_show_ndoms_used, NULL);
4710
a5459cfe
AW
4711static struct attribute *intel_iommu_attrs[] = {
4712 &dev_attr_version.attr,
4713 &dev_attr_address.attr,
4714 &dev_attr_cap.attr,
4715 &dev_attr_ecap.attr,
2238c082
AW
4716 &dev_attr_domains_supported.attr,
4717 &dev_attr_domains_used.attr,
a5459cfe
AW
4718 NULL,
4719};
4720
4721static struct attribute_group intel_iommu_group = {
4722 .name = "intel-iommu",
4723 .attrs = intel_iommu_attrs,
4724};
4725
4726const struct attribute_group *intel_iommu_groups[] = {
4727 &intel_iommu_group,
4728 NULL,
4729};
4730
ba395927
KA
4731int __init intel_iommu_init(void)
4732{
9bdc531e 4733 int ret = -ENODEV;
3a93c841 4734 struct dmar_drhd_unit *drhd;
7c919779 4735 struct intel_iommu *iommu;
ba395927 4736
a59b50e9
JC
4737 /* VT-d is required for a TXT/tboot launch, so enforce that */
4738 force_on = tboot_force_iommu();
4739
3a5670e8
JL
4740 if (iommu_init_mempool()) {
4741 if (force_on)
4742 panic("tboot: Failed to initialize iommu memory\n");
4743 return -ENOMEM;
4744 }
4745
4746 down_write(&dmar_global_lock);
a59b50e9
JC
4747 if (dmar_table_init()) {
4748 if (force_on)
4749 panic("tboot: Failed to initialize DMAR table\n");
9bdc531e 4750 goto out_free_dmar;
a59b50e9 4751 }
ba395927 4752
c2c7286a 4753 if (dmar_dev_scope_init() < 0) {
a59b50e9
JC
4754 if (force_on)
4755 panic("tboot: Failed to initialize DMAR device scope\n");
9bdc531e 4756 goto out_free_dmar;
a59b50e9 4757 }
1886e8a9 4758
ec154bf5
JR
4759 up_write(&dmar_global_lock);
4760
4761 /*
4762 * The bus notifier takes the dmar_global_lock, so lockdep will
4763 * complain later when we register it under the lock.
4764 */
4765 dmar_register_bus_notifier();
4766
4767 down_write(&dmar_global_lock);
4768
161b28aa 4769 if (no_iommu || dmar_disabled) {
bfd20f1c
SL
4770 /*
4771 * We exit the function here to ensure IOMMU's remapping and
4772 * mempool aren't setup, which means that the IOMMU's PMRs
4773 * won't be disabled via the call to init_dmars(). So disable
4774 * it explicitly here. The PMRs were setup by tboot prior to
4775 * calling SENTER, but the kernel is expected to reset/tear
4776 * down the PMRs.
4777 */
4778 if (intel_iommu_tboot_noforce) {
4779 for_each_iommu(iommu, drhd)
4780 iommu_disable_protect_mem_regions(iommu);
4781 }
4782
161b28aa
JR
4783 /*
4784 * Make sure the IOMMUs are switched off, even when we
4785 * boot into a kexec kernel and the previous kernel left
4786 * them enabled
4787 */
4788 intel_disable_iommus();
9bdc531e 4789 goto out_free_dmar;
161b28aa 4790 }
2ae21010 4791
318fe7df 4792 if (list_empty(&dmar_rmrr_units))
9f10e5bf 4793 pr_info("No RMRR found\n");
318fe7df
SS
4794
4795 if (list_empty(&dmar_atsr_units))
9f10e5bf 4796 pr_info("No ATSR found\n");
318fe7df 4797
51a63e67
JC
4798 if (dmar_init_reserved_ranges()) {
4799 if (force_on)
4800 panic("tboot: Failed to reserve iommu ranges\n");
3a5670e8 4801 goto out_free_reserved_range;
51a63e67 4802 }
ba395927
KA
4803
4804 init_no_remapping_devices();
4805
b779260b 4806 ret = init_dmars();
ba395927 4807 if (ret) {
a59b50e9
JC
4808 if (force_on)
4809 panic("tboot: Failed to initialize DMARs\n");
9f10e5bf 4810 pr_err("Initialization failed\n");
9bdc531e 4811 goto out_free_reserved_range;
ba395927 4812 }
3a5670e8 4813 up_write(&dmar_global_lock);
9f10e5bf 4814 pr_info("Intel(R) Virtualization Technology for Directed I/O\n");
ba395927 4815
4fac8076 4816#if defined(CONFIG_X86) && defined(CONFIG_SWIOTLB)
75f1cdf1
FT
4817 swiotlb = 0;
4818#endif
19943b0e 4819 dma_ops = &intel_dma_ops;
4ed0d3e6 4820
134fac3f 4821 init_iommu_pm_ops();
a8bcbb0d 4822
39ab9555
JR
4823 for_each_active_iommu(iommu, drhd) {
4824 iommu_device_sysfs_add(&iommu->iommu, NULL,
4825 intel_iommu_groups,
4826 "%s", iommu->name);
4827 iommu_device_set_ops(&iommu->iommu, &intel_iommu_ops);
4828 iommu_device_register(&iommu->iommu);
4829 }
a5459cfe 4830
4236d97d 4831 bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
99dcaded 4832 bus_register_notifier(&pci_bus_type, &device_nb);
75f05569
JL
4833 if (si_domain && !hw_pass_through)
4834 register_memory_notifier(&intel_iommu_memory_nb);
21647615
AMG
4835 cpuhp_setup_state(CPUHP_IOMMU_INTEL_DEAD, "iommu/intel:dead", NULL,
4836 intel_iommu_cpu_dead);
8bc1f85c 4837 intel_iommu_enabled = 1;
ee2636b8 4838 intel_iommu_debugfs_init();
8bc1f85c 4839
ba395927 4840 return 0;
9bdc531e
JL
4841
4842out_free_reserved_range:
4843 put_iova_domain(&reserved_iova_list);
9bdc531e
JL
4844out_free_dmar:
4845 intel_iommu_free_dmars();
3a5670e8
JL
4846 up_write(&dmar_global_lock);
4847 iommu_exit_mempool();
9bdc531e 4848 return ret;
ba395927 4849}
e820482c 4850
2452d9db 4851static int domain_context_clear_one_cb(struct pci_dev *pdev, u16 alias, void *opaque)
579305f7
AW
4852{
4853 struct intel_iommu *iommu = opaque;
4854
2452d9db 4855 domain_context_clear_one(iommu, PCI_BUS_NUM(alias), alias & 0xff);
579305f7
AW
4856 return 0;
4857}
4858
4859/*
4860 * NB - intel-iommu lacks any sort of reference counting for the users of
4861 * dependent devices. If multiple endpoints have intersecting dependent
4862 * devices, unbinding the driver from any one of them will possibly leave
4863 * the others unable to operate.
4864 */
2452d9db 4865static void domain_context_clear(struct intel_iommu *iommu, struct device *dev)
3199aa6b 4866{
0bcb3e28 4867 if (!iommu || !dev || !dev_is_pci(dev))
3199aa6b
HW
4868 return;
4869
2452d9db 4870 pci_for_each_dma_alias(to_pci_dev(dev), &domain_context_clear_one_cb, iommu);
3199aa6b
HW
4871}
4872
127c7615 4873static void __dmar_remove_one_dev_info(struct device_domain_info *info)
c7151a8d 4874{
c7151a8d
WH
4875 struct intel_iommu *iommu;
4876 unsigned long flags;
c7151a8d 4877
55d94043
JR
4878 assert_spin_locked(&device_domain_lock);
4879
127c7615 4880 if (WARN_ON(!info))
c7151a8d
WH
4881 return;
4882
127c7615 4883 iommu = info->iommu;
c7151a8d 4884
127c7615
JR
4885 if (info->dev) {
4886 iommu_disable_dev_iotlb(info);
4887 domain_context_clear(iommu, info->dev);
a7fc93fe 4888 intel_pasid_free_table(info->dev);
127c7615 4889 }
c7151a8d 4890
b608ac3b 4891 unlink_domain_info(info);
c7151a8d 4892
d160aca5 4893 spin_lock_irqsave(&iommu->lock, flags);
127c7615 4894 domain_detach_iommu(info->domain, iommu);
d160aca5 4895 spin_unlock_irqrestore(&iommu->lock, flags);
c7151a8d 4896
127c7615 4897 free_devinfo_mem(info);
c7151a8d 4898}
c7151a8d 4899
55d94043
JR
4900static void dmar_remove_one_dev_info(struct dmar_domain *domain,
4901 struct device *dev)
4902{
127c7615 4903 struct device_domain_info *info;
55d94043 4904 unsigned long flags;
3e7abe25 4905
55d94043 4906 spin_lock_irqsave(&device_domain_lock, flags);
127c7615
JR
4907 info = dev->archdata.iommu;
4908 __dmar_remove_one_dev_info(info);
55d94043 4909 spin_unlock_irqrestore(&device_domain_lock, flags);
c7151a8d
WH
4910}
4911
2c2e2c38 4912static int md_domain_init(struct dmar_domain *domain, int guest_width)
5e98c4b1
WH
4913{
4914 int adjust_width;
4915
aa3ac946 4916 init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
5e98c4b1
WH
4917 domain_reserve_special_ranges(domain);
4918
4919 /* calculate AGAW */
4920 domain->gaw = guest_width;
4921 adjust_width = guestwidth_to_adjustwidth(guest_width);
4922 domain->agaw = width_to_agaw(adjust_width);
4923
5e98c4b1 4924 domain->iommu_coherency = 0;
c5b15255 4925 domain->iommu_snooping = 0;
6dd9a7c7 4926 domain->iommu_superpage = 0;
fe40f1e0 4927 domain->max_addr = 0;
5e98c4b1
WH
4928
4929 /* always allocate the top pgd */
4c923d47 4930 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
5e98c4b1
WH
4931 if (!domain->pgd)
4932 return -ENOMEM;
4933 domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
4934 return 0;
4935}
4936
00a77deb 4937static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
38717946 4938{
5d450806 4939 struct dmar_domain *dmar_domain;
00a77deb
JR
4940 struct iommu_domain *domain;
4941
4942 if (type != IOMMU_DOMAIN_UNMANAGED)
4943 return NULL;
38717946 4944
ab8dfe25 4945 dmar_domain = alloc_domain(DOMAIN_FLAG_VIRTUAL_MACHINE);
5d450806 4946 if (!dmar_domain) {
9f10e5bf 4947 pr_err("Can't allocate dmar_domain\n");
00a77deb 4948 return NULL;
38717946 4949 }
2c2e2c38 4950 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
9f10e5bf 4951 pr_err("Domain initialization failed\n");
92d03cc8 4952 domain_exit(dmar_domain);
00a77deb 4953 return NULL;
38717946 4954 }
8140a95d 4955 domain_update_iommu_cap(dmar_domain);
faa3d6f5 4956
00a77deb 4957 domain = &dmar_domain->domain;
8a0e715b
JR
4958 domain->geometry.aperture_start = 0;
4959 domain->geometry.aperture_end = __DOMAIN_MAX_ADDR(dmar_domain->gaw);
4960 domain->geometry.force_aperture = true;
4961
00a77deb 4962 return domain;
38717946 4963}
38717946 4964
00a77deb 4965static void intel_iommu_domain_free(struct iommu_domain *domain)
38717946 4966{
00a77deb 4967 domain_exit(to_dmar_domain(domain));
38717946 4968}
38717946 4969
4c5478c9
JR
4970static int intel_iommu_attach_device(struct iommu_domain *domain,
4971 struct device *dev)
38717946 4972{
00a77deb 4973 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
fe40f1e0
WH
4974 struct intel_iommu *iommu;
4975 int addr_width;
156baca8 4976 u8 bus, devfn;
faa3d6f5 4977
c875d2c1
AW
4978 if (device_is_rmrr_locked(dev)) {
4979 dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement. Contact your platform vendor.\n");
4980 return -EPERM;
4981 }
4982
7207d8f9
DW
4983 /* normally dev is not mapped */
4984 if (unlikely(domain_context_mapped(dev))) {
faa3d6f5
WH
4985 struct dmar_domain *old_domain;
4986
1525a29a 4987 old_domain = find_domain(dev);
faa3d6f5 4988 if (old_domain) {
d160aca5 4989 rcu_read_lock();
de7e8886 4990 dmar_remove_one_dev_info(old_domain, dev);
d160aca5 4991 rcu_read_unlock();
62c22167
JR
4992
4993 if (!domain_type_is_vm_or_si(old_domain) &&
4994 list_empty(&old_domain->devices))
4995 domain_exit(old_domain);
faa3d6f5
WH
4996 }
4997 }
4998
156baca8 4999 iommu = device_to_iommu(dev, &bus, &devfn);
fe40f1e0
WH
5000 if (!iommu)
5001 return -ENODEV;
5002
5003 /* check if this iommu agaw is sufficient for max mapped address */
5004 addr_width = agaw_to_width(iommu->agaw);
a99c47a2
TL
5005 if (addr_width > cap_mgaw(iommu->cap))
5006 addr_width = cap_mgaw(iommu->cap);
5007
5008 if (dmar_domain->max_addr > (1LL << addr_width)) {
9f10e5bf 5009 pr_err("%s: iommu width (%d) is not "
fe40f1e0 5010 "sufficient for the mapped address (%llx)\n",
a99c47a2 5011 __func__, addr_width, dmar_domain->max_addr);
fe40f1e0
WH
5012 return -EFAULT;
5013 }
a99c47a2
TL
5014 dmar_domain->gaw = addr_width;
5015
5016 /*
5017 * Knock out extra levels of page tables if necessary
5018 */
5019 while (iommu->agaw < dmar_domain->agaw) {
5020 struct dma_pte *pte;
5021
5022 pte = dmar_domain->pgd;
5023 if (dma_pte_present(pte)) {
25cbff16
SY
5024 dmar_domain->pgd = (struct dma_pte *)
5025 phys_to_virt(dma_pte_addr(pte));
7a661013 5026 free_pgtable_page(pte);
a99c47a2
TL
5027 }
5028 dmar_domain->agaw--;
5029 }
fe40f1e0 5030
28ccce0d 5031 return domain_add_dev_info(dmar_domain, dev);
38717946 5032}
38717946 5033
4c5478c9
JR
5034static void intel_iommu_detach_device(struct iommu_domain *domain,
5035 struct device *dev)
38717946 5036{
e6de0f8d 5037 dmar_remove_one_dev_info(to_dmar_domain(domain), dev);
faa3d6f5 5038}
c7151a8d 5039
b146a1c9
JR
5040static int intel_iommu_map(struct iommu_domain *domain,
5041 unsigned long iova, phys_addr_t hpa,
5009065d 5042 size_t size, int iommu_prot)
faa3d6f5 5043{
00a77deb 5044 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
fe40f1e0 5045 u64 max_addr;
dde57a21 5046 int prot = 0;
faa3d6f5 5047 int ret;
fe40f1e0 5048
dde57a21
JR
5049 if (iommu_prot & IOMMU_READ)
5050 prot |= DMA_PTE_READ;
5051 if (iommu_prot & IOMMU_WRITE)
5052 prot |= DMA_PTE_WRITE;
9cf06697
SY
5053 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
5054 prot |= DMA_PTE_SNP;
dde57a21 5055
163cc52c 5056 max_addr = iova + size;
dde57a21 5057 if (dmar_domain->max_addr < max_addr) {
fe40f1e0
WH
5058 u64 end;
5059
5060 /* check if minimum agaw is sufficient for mapped address */
8954da1f 5061 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
fe40f1e0 5062 if (end < max_addr) {
9f10e5bf 5063 pr_err("%s: iommu width (%d) is not "
fe40f1e0 5064 "sufficient for the mapped address (%llx)\n",
8954da1f 5065 __func__, dmar_domain->gaw, max_addr);
fe40f1e0
WH
5066 return -EFAULT;
5067 }
dde57a21 5068 dmar_domain->max_addr = max_addr;
fe40f1e0 5069 }
ad051221
DW
5070 /* Round up size to next multiple of PAGE_SIZE, if it and
5071 the low bits of hpa would take us onto the next page */
88cb6a74 5072 size = aligned_nrpages(hpa, size);
ad051221
DW
5073 ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
5074 hpa >> VTD_PAGE_SHIFT, size, prot);
faa3d6f5 5075 return ret;
38717946 5076}
38717946 5077
5009065d 5078static size_t intel_iommu_unmap(struct iommu_domain *domain,
ea8ea460 5079 unsigned long iova, size_t size)
38717946 5080{
00a77deb 5081 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
ea8ea460 5082 struct page *freelist = NULL;
ea8ea460
DW
5083 unsigned long start_pfn, last_pfn;
5084 unsigned int npages;
42e8c186 5085 int iommu_id, level = 0;
5cf0a76f
DW
5086
5087 /* Cope with horrid API which requires us to unmap more than the
5088 size argument if it happens to be a large-page mapping. */
dc02e46e 5089 BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level));
5cf0a76f
DW
5090
5091 if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
5092 size = VTD_PAGE_SIZE << level_to_offset_bits(level);
4b99d352 5093
ea8ea460
DW
5094 start_pfn = iova >> VTD_PAGE_SHIFT;
5095 last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
5096
5097 freelist = domain_unmap(dmar_domain, start_pfn, last_pfn);
5098
5099 npages = last_pfn - start_pfn + 1;
5100
f746a025 5101 for_each_domain_iommu(iommu_id, dmar_domain)
42e8c186
JR
5102 iommu_flush_iotlb_psi(g_iommus[iommu_id], dmar_domain,
5103 start_pfn, npages, !freelist, 0);
ea8ea460
DW
5104
5105 dma_free_pagelist(freelist);
fe40f1e0 5106
163cc52c
DW
5107 if (dmar_domain->max_addr == iova + size)
5108 dmar_domain->max_addr = iova;
b146a1c9 5109
5cf0a76f 5110 return size;
38717946 5111}
38717946 5112
d14d6577 5113static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
bb5547ac 5114 dma_addr_t iova)
38717946 5115{
00a77deb 5116 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
38717946 5117 struct dma_pte *pte;
5cf0a76f 5118 int level = 0;
faa3d6f5 5119 u64 phys = 0;
38717946 5120
5cf0a76f 5121 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
38717946 5122 if (pte)
faa3d6f5 5123 phys = dma_pte_addr(pte);
38717946 5124
faa3d6f5 5125 return phys;
38717946 5126}
a8bcbb0d 5127
5d587b8d 5128static bool intel_iommu_capable(enum iommu_cap cap)
dbb9fd86 5129{
dbb9fd86 5130 if (cap == IOMMU_CAP_CACHE_COHERENCY)
5d587b8d 5131 return domain_update_iommu_snooping(NULL) == 1;
323f99cb 5132 if (cap == IOMMU_CAP_INTR_REMAP)
5d587b8d 5133 return irq_remapping_enabled == 1;
dbb9fd86 5134
5d587b8d 5135 return false;
dbb9fd86
SY
5136}
5137
abdfdde2
AW
5138static int intel_iommu_add_device(struct device *dev)
5139{
a5459cfe 5140 struct intel_iommu *iommu;
abdfdde2 5141 struct iommu_group *group;
156baca8 5142 u8 bus, devfn;
70ae6f0d 5143
a5459cfe
AW
5144 iommu = device_to_iommu(dev, &bus, &devfn);
5145 if (!iommu)
70ae6f0d
AW
5146 return -ENODEV;
5147
e3d10af1 5148 iommu_device_link(&iommu->iommu, dev);
a4ff1fc2 5149
e17f9ff4 5150 group = iommu_group_get_for_dev(dev);
783f157b 5151
e17f9ff4
AW
5152 if (IS_ERR(group))
5153 return PTR_ERR(group);
bcb71abe 5154
abdfdde2 5155 iommu_group_put(group);
e17f9ff4 5156 return 0;
abdfdde2 5157}
70ae6f0d 5158
abdfdde2
AW
5159static void intel_iommu_remove_device(struct device *dev)
5160{
a5459cfe
AW
5161 struct intel_iommu *iommu;
5162 u8 bus, devfn;
5163
5164 iommu = device_to_iommu(dev, &bus, &devfn);
5165 if (!iommu)
5166 return;
5167
abdfdde2 5168 iommu_group_remove_device(dev);
a5459cfe 5169
e3d10af1 5170 iommu_device_unlink(&iommu->iommu, dev);
70ae6f0d
AW
5171}
5172
0659b8dc
EA
5173static void intel_iommu_get_resv_regions(struct device *device,
5174 struct list_head *head)
5175{
5176 struct iommu_resv_region *reg;
5177 struct dmar_rmrr_unit *rmrr;
5178 struct device *i_dev;
5179 int i;
5180
5181 rcu_read_lock();
5182 for_each_rmrr_units(rmrr) {
5183 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
5184 i, i_dev) {
5185 if (i_dev != device)
5186 continue;
5187
5188 list_add_tail(&rmrr->resv->list, head);
5189 }
5190 }
5191 rcu_read_unlock();
5192
5193 reg = iommu_alloc_resv_region(IOAPIC_RANGE_START,
5194 IOAPIC_RANGE_END - IOAPIC_RANGE_START + 1,
9d3a4de4 5195 0, IOMMU_RESV_MSI);
0659b8dc
EA
5196 if (!reg)
5197 return;
5198 list_add_tail(&reg->list, head);
5199}
5200
5201static void intel_iommu_put_resv_regions(struct device *dev,
5202 struct list_head *head)
5203{
5204 struct iommu_resv_region *entry, *next;
5205
5206 list_for_each_entry_safe(entry, next, head, list) {
5207 if (entry->type == IOMMU_RESV_RESERVED)
5208 kfree(entry);
5209 }
70ae6f0d
AW
5210}
5211
2f26e0a9 5212#ifdef CONFIG_INTEL_IOMMU_SVM
65ca7f5f 5213#define MAX_NR_PASID_BITS (20)
4774cc52 5214static inline unsigned long intel_iommu_get_pts(struct device *dev)
65ca7f5f 5215{
4774cc52
LB
5216 int pts, max_pasid;
5217
5218 max_pasid = intel_pasid_get_dev_max_id(dev);
5219 pts = find_first_bit((unsigned long *)&max_pasid, MAX_NR_PASID_BITS);
5220 if (pts < 5)
65ca7f5f
JP
5221 return 0;
5222
4774cc52 5223 return pts - 5;
65ca7f5f
JP
5224}
5225
2f26e0a9
DW
5226int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sdev)
5227{
5228 struct device_domain_info *info;
5229 struct context_entry *context;
5230 struct dmar_domain *domain;
5231 unsigned long flags;
5232 u64 ctx_lo;
5233 int ret;
5234
5235 domain = get_valid_domain_for_dev(sdev->dev);
5236 if (!domain)
5237 return -EINVAL;
5238
5239 spin_lock_irqsave(&device_domain_lock, flags);
5240 spin_lock(&iommu->lock);
5241
5242 ret = -EINVAL;
5243 info = sdev->dev->archdata.iommu;
5244 if (!info || !info->pasid_supported)
5245 goto out;
5246
5247 context = iommu_context_addr(iommu, info->bus, info->devfn, 0);
5248 if (WARN_ON(!context))
5249 goto out;
5250
5251 ctx_lo = context[0].lo;
5252
5253 sdev->did = domain->iommu_did[iommu->seq_id];
5254 sdev->sid = PCI_DEVID(info->bus, info->devfn);
5255
5256 if (!(ctx_lo & CONTEXT_PASIDE)) {
11b93ebf
AR
5257 if (iommu->pasid_state_table)
5258 context[1].hi = (u64)virt_to_phys(iommu->pasid_state_table);
4774cc52
LB
5259 context[1].lo = (u64)virt_to_phys(info->pasid_table->table) |
5260 intel_iommu_get_pts(sdev->dev);
65ca7f5f 5261
2f26e0a9
DW
5262 wmb();
5263 /* CONTEXT_TT_MULTI_LEVEL and CONTEXT_TT_DEV_IOTLB are both
5264 * extended to permit requests-with-PASID if the PASIDE bit
5265 * is set. which makes sense. For CONTEXT_TT_PASS_THROUGH,
5266 * however, the PASIDE bit is ignored and requests-with-PASID
5267 * are unconditionally blocked. Which makes less sense.
5268 * So convert from CONTEXT_TT_PASS_THROUGH to one of the new
5269 * "guest mode" translation types depending on whether ATS
5270 * is available or not. Annoyingly, we can't use the new
5271 * modes *unless* PASIDE is set. */
5272 if ((ctx_lo & CONTEXT_TT_MASK) == (CONTEXT_TT_PASS_THROUGH << 2)) {
5273 ctx_lo &= ~CONTEXT_TT_MASK;
5274 if (info->ats_supported)
5275 ctx_lo |= CONTEXT_TT_PT_PASID_DEV_IOTLB << 2;
5276 else
5277 ctx_lo |= CONTEXT_TT_PT_PASID << 2;
5278 }
5279 ctx_lo |= CONTEXT_PASIDE;
907fea34
DW
5280 if (iommu->pasid_state_table)
5281 ctx_lo |= CONTEXT_DINVE;
a222a7f0
DW
5282 if (info->pri_supported)
5283 ctx_lo |= CONTEXT_PRS;
2f26e0a9
DW
5284 context[0].lo = ctx_lo;
5285 wmb();
5286 iommu->flush.flush_context(iommu, sdev->did, sdev->sid,
5287 DMA_CCMD_MASK_NOBIT,
5288 DMA_CCMD_DEVICE_INVL);
5289 }
5290
5291 /* Enable PASID support in the device, if it wasn't already */
5292 if (!info->pasid_enabled)
5293 iommu_enable_dev_iotlb(info);
5294
5295 if (info->ats_enabled) {
5296 sdev->dev_iotlb = 1;
5297 sdev->qdep = info->ats_qdep;
5298 if (sdev->qdep >= QI_DEV_EIOTLB_MAX_INVS)
5299 sdev->qdep = 0;
5300 }
5301 ret = 0;
5302
5303 out:
5304 spin_unlock(&iommu->lock);
5305 spin_unlock_irqrestore(&device_domain_lock, flags);
5306
5307 return ret;
5308}
5309
5310struct intel_iommu *intel_svm_device_to_iommu(struct device *dev)
5311{
5312 struct intel_iommu *iommu;
5313 u8 bus, devfn;
5314
5315 if (iommu_dummy(dev)) {
5316 dev_warn(dev,
5317 "No IOMMU translation for device; cannot enable SVM\n");
5318 return NULL;
5319 }
5320
5321 iommu = device_to_iommu(dev, &bus, &devfn);
5322 if ((!iommu)) {
b9997e38 5323 dev_err(dev, "No IOMMU for device; cannot enable SVM\n");
2f26e0a9
DW
5324 return NULL;
5325 }
5326
2f26e0a9
DW
5327 return iommu;
5328}
5329#endif /* CONFIG_INTEL_IOMMU_SVM */
5330
b0119e87 5331const struct iommu_ops intel_iommu_ops = {
0659b8dc
EA
5332 .capable = intel_iommu_capable,
5333 .domain_alloc = intel_iommu_domain_alloc,
5334 .domain_free = intel_iommu_domain_free,
5335 .attach_dev = intel_iommu_attach_device,
5336 .detach_dev = intel_iommu_detach_device,
5337 .map = intel_iommu_map,
5338 .unmap = intel_iommu_unmap,
0659b8dc
EA
5339 .iova_to_phys = intel_iommu_iova_to_phys,
5340 .add_device = intel_iommu_add_device,
5341 .remove_device = intel_iommu_remove_device,
5342 .get_resv_regions = intel_iommu_get_resv_regions,
5343 .put_resv_regions = intel_iommu_put_resv_regions,
5344 .device_group = pci_device_group,
5345 .pgsize_bitmap = INTEL_IOMMU_PGSIZES,
a8bcbb0d 5346};
9af88143 5347
9452618e
SV
5348static void quirk_iommu_g4x_gfx(struct pci_dev *dev)
5349{
5350 /* G4x/GM45 integrated gfx dmar support is totally busted. */
9f10e5bf 5351 pr_info("Disabling IOMMU for graphics on this chipset\n");
9452618e
SV
5352 dmar_map_gfx = 0;
5353}
5354
5355DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_g4x_gfx);
5356DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_g4x_gfx);
5357DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_g4x_gfx);
5358DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_g4x_gfx);
5359DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_g4x_gfx);
5360DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_g4x_gfx);
5361DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_g4x_gfx);
5362
d34d6517 5363static void quirk_iommu_rwbf(struct pci_dev *dev)
9af88143
DW
5364{
5365 /*
5366 * Mobile 4 Series Chipset neglects to set RWBF capability,
210561ff 5367 * but needs it. Same seems to hold for the desktop versions.
9af88143 5368 */
9f10e5bf 5369 pr_info("Forcing write-buffer flush capability\n");
9af88143
DW
5370 rwbf_quirk = 1;
5371}
5372
5373DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
210561ff
SV
5374DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
5375DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
5376DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
5377DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
5378DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
5379DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
e0fc7e0b 5380
eecfd57f
AJ
5381#define GGC 0x52
5382#define GGC_MEMORY_SIZE_MASK (0xf << 8)
5383#define GGC_MEMORY_SIZE_NONE (0x0 << 8)
5384#define GGC_MEMORY_SIZE_1M (0x1 << 8)
5385#define GGC_MEMORY_SIZE_2M (0x3 << 8)
5386#define GGC_MEMORY_VT_ENABLED (0x8 << 8)
5387#define GGC_MEMORY_SIZE_2M_VT (0x9 << 8)
5388#define GGC_MEMORY_SIZE_3M_VT (0xa << 8)
5389#define GGC_MEMORY_SIZE_4M_VT (0xb << 8)
5390
d34d6517 5391static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
9eecabcb
DW
5392{
5393 unsigned short ggc;
5394
eecfd57f 5395 if (pci_read_config_word(dev, GGC, &ggc))
9eecabcb
DW
5396 return;
5397
eecfd57f 5398 if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
9f10e5bf 5399 pr_info("BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
9eecabcb 5400 dmar_map_gfx = 0;
6fbcfb3e
DW
5401 } else if (dmar_map_gfx) {
5402 /* we have to ensure the gfx device is idle before we flush */
9f10e5bf 5403 pr_info("Disabling batched IOTLB flush on Ironlake\n");
6fbcfb3e
DW
5404 intel_iommu_strict = 1;
5405 }
9eecabcb
DW
5406}
5407DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
5408DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
5409DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
5410DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
5411
e0fc7e0b
DW
5412/* On Tylersburg chipsets, some BIOSes have been known to enable the
5413 ISOCH DMAR unit for the Azalia sound device, but not give it any
5414 TLB entries, which causes it to deadlock. Check for that. We do
5415 this in a function called from init_dmars(), instead of in a PCI
5416 quirk, because we don't want to print the obnoxious "BIOS broken"
5417 message if VT-d is actually disabled.
5418*/
5419static void __init check_tylersburg_isoch(void)
5420{
5421 struct pci_dev *pdev;
5422 uint32_t vtisochctrl;
5423
5424 /* If there's no Azalia in the system anyway, forget it. */
5425 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
5426 if (!pdev)
5427 return;
5428 pci_dev_put(pdev);
5429
5430 /* System Management Registers. Might be hidden, in which case
5431 we can't do the sanity check. But that's OK, because the
5432 known-broken BIOSes _don't_ actually hide it, so far. */
5433 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
5434 if (!pdev)
5435 return;
5436
5437 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
5438 pci_dev_put(pdev);
5439 return;
5440 }
5441
5442 pci_dev_put(pdev);
5443
5444 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
5445 if (vtisochctrl & 1)
5446 return;
5447
5448 /* Drop all bits other than the number of TLB entries */
5449 vtisochctrl &= 0x1c;
5450
5451 /* If we have the recommended number of TLB entries (16), fine. */
5452 if (vtisochctrl == 0x10)
5453 return;
5454
5455 /* Zero TLB entries? You get to ride the short bus to school. */
5456 if (!vtisochctrl) {
5457 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
5458 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
5459 dmi_get_system_info(DMI_BIOS_VENDOR),
5460 dmi_get_system_info(DMI_BIOS_VERSION),
5461 dmi_get_system_info(DMI_PRODUCT_VERSION));
5462 iommu_identity_mapping |= IDENTMAP_AZALIA;
5463 return;
5464 }
9f10e5bf
JR
5465
5466 pr_warn("Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
e0fc7e0b
DW
5467 vtisochctrl);
5468}
This page took 3.419623 seconds and 4 git commands to generate.