debug [KNL] Enable kernel debugging (events log level).
+ debug_boot_weak_hash
+ [KNL] Enable printing [hashed] pointers early in the
+ boot sequence. If enabled, we use a weak hash instead
+ of siphash to hash pointers. Use this option if you are
+ seeing instances of '(___ptrval___)') and need to see a
+ value (hashed pointer) instead. Cryptographically
+ insecure, please do not use on production kernels.
+
debug_locks_verbose=
[KNL] verbose self-tests
Format=<0|1>
Defaults to the default architecture's huge page size
if not specified.
+ deferred_probe_timeout=
+ [KNL] Debugging option to set a timeout in seconds for
+ deferred probe to give up waiting on dependencies to
+ probe. Only specific dependencies (subsystems or
+ drivers) that have opted in will be ignored. A timeout of 0
+ will timeout at the end of initcalls. This option will also
+ dump out devices still on the deferred probe list after
+ retrying.
+
dhash_entries= [KNL]
Set number of hash buckets for dentry cache.
disable= [IPV6]
See Documentation/networking/ipv6.txt.
+ hardened_usercopy=
+ [KNL] Under CONFIG_HARDENED_USERCOPY, whether
+ hardening is enabled for this boot. Hardened
+ usercopy checking is used to protect the kernel
+ from reading or writing beyond known memory
+ allocation boundaries as a proactive defense
+ against bounds-checking flaws in the kernel's
+ copy_to_user()/copy_from_user() interface.
+ on Perform hardened usercopy checks (default).
+ off Disable hardened usercopy checks.
+
disable_radix [PPC]
Disable RADIX MMU mode on POWER9
merge
nomerge
soft
- pt [x86, IA-64]
+ pt [x86]
+ nopt [x86]
nobypass [PPC/POWERNV]
Disable IOMMU bypass, using IOMMU for PCI devices.
(virtualized real and unpaged mode) on capable
Intel chips. Default is 1 (enabled)
+ kvm-intel.vmentry_l1d_flush=[KVM,Intel] Mitigation for L1 Terminal Fault
+ CVE-2018-3620.
+
+ Valid arguments: never, cond, always
+
+ always: L1D cache flush on every VMENTER.
+ cond: Flush L1D on VMENTER only when the code between
+ VMEXIT and VMENTER can leak host memory.
+ never: Disables the mitigation
+
+ Default is cond (do L1 cache flush in specific instances)
+
kvm-intel.vpid= [KVM,Intel] Disable Virtual Processor Identification
feature (tagged TLBs) on capable Intel chips.
Default is 1 (enabled)
+ l1tf= [X86] Control mitigation of the L1TF vulnerability on
+ affected CPUs
+
+ The kernel PTE inversion protection is unconditionally
+ enabled and cannot be disabled.
+
+ full
+ Provides all available mitigations for the
+ L1TF vulnerability. Disables SMT and
+ enables all mitigations in the
+ hypervisors, i.e. unconditional L1D flush.
+
+ SMT control and L1D flush control via the
+ sysfs interface is still possible after
+ boot. Hypervisors will issue a warning
+ when the first VM is started in a
+ potentially insecure configuration,
+ i.e. SMT enabled or L1D flush disabled.
+
+ full,force
+ Same as 'full', but disables SMT and L1D
+ flush runtime control. Implies the
+ 'nosmt=force' command line option.
+ (i.e. sysfs control of SMT is disabled.)
+
+ flush
+ Leaves SMT enabled and enables the default
+ hypervisor mitigation, i.e. conditional
+ L1D flush.
+
+ SMT control and L1D flush control via the
+ sysfs interface is still possible after
+ boot. Hypervisors will issue a warning
+ when the first VM is started in a
+ potentially insecure configuration,
+ i.e. SMT enabled or L1D flush disabled.
+
+ flush,nosmt
+
+ Disables SMT and enables the default
+ hypervisor mitigation.
+
+ SMT control and L1D flush control via the
+ sysfs interface is still possible after
+ boot. Hypervisors will issue a warning
+ when the first VM is started in a
+ potentially insecure configuration,
+ i.e. SMT enabled or L1D flush disabled.
+
+ flush,nowarn
+ Same as 'flush', but hypervisors will not
+ warn when a VM is started in a potentially
+ insecure configuration.
+
+ off
+ Disables hypervisor mitigations and doesn't
+ emit any warnings.
+
+ Default is 'flush'.
+
+ For details see: Documentation/admin-guide/l1tf.rst
+
l2cr= [PPC]
l3cr= [PPC]
nosmt [KNL,S390] Disable symmetric multithreading (SMT).
Equivalent to smt=1.
+ [KNL,x86] Disable symmetric multithreading (SMT).
+ nosmt=force: Force disable SMT, cannot be undone
+ via the sysfs control file.
+
+ nospectre_v1 [PPC] Disable mitigations for Spectre Variant 1 (bounds
+ check bypass). With this option data leaks are possible
+ in the system.
+
nospectre_v2 [X86] Disable all mitigations for the Spectre variant 2
(indirect branch prediction) vulnerability. System may
allow data leaks with this option, which is equivalent
nosync [HW,M68K] Disables sync negotiation for all devices.
- notsc [BUGS=X86-32] Disable Time Stamp Counter
-
nowatchdog [KNL] Disable both lockup detectors, i.e.
soft-lockup and NMI watchdog (hard-lockup).
on: enable the feature
page_poison= [KNL] Boot-time parameter changing the state of
- poisoning on the buddy allocator.
- off: turn off poisoning
+ poisoning on the buddy allocator, available with
+ CONFIG_PAGE_POISONING=y.
+ off: turn off poisoning (default)
on: turn on poisoning
panic= [KNL] Kernel behaviour on panic: delay <timeout>
See header of drivers/block/paride/pcd.c.
See also Documentation/blockdev/paride.txt.
- pci=option[,option...] [PCI] various PCI subsystem options:
- earlydump [X86] dump PCI config space before the kernel
+ pci=option[,option...] [PCI] various PCI subsystem options.
+
+ Some options herein operate on a specific device
+ or a set of devices (<pci_dev>). These are
+ specified in one of the following formats:
+
+ [<domain>:]<bus>:<dev>.<func>[/<dev>.<func>]*
+ pci:<vendor>:<device>[:<subvendor>:<subdevice>]
+
+ Note: the first format specifies a PCI
+ bus/device/function address which may change
+ if new hardware is inserted, if motherboard
+ firmware changes, or due to changes caused
+ by other kernel parameters. If the
+ domain is left unspecified, it is
+ taken to be zero. Optionally, a path
+ to a device through multiple device/function
+ addresses can be specified after the base
+ address (this is more robust against
+ renumbering issues). The second format
+ selects devices using IDs from the
+ configuration space which may match multiple
+ devices in the system.
+
+ earlydump dump PCI config space before the kernel
changes anything
off [X86] don't probe for the PCI bus
bios [X86-32] force use of PCI BIOS, don't access
window. The default value is 64 megabytes.
resource_alignment=
Format:
- [<order of align>@][<domain>:]<bus>:<slot>.<func>[; ...]
- [<order of align>@]pci:<vendor>:<device>\
- [:<subvendor>:<subdevice>][; ...]
+ [<order of align>@]<pci_dev>[; ...]
Specifies alignment and device to reassign
- aligned memory resources.
+ aligned memory resources. How to
+ specify the device is described above.
If <order of align> is not specified,
PAGE_SIZE is used as alignment.
PCI-PCI bridge can be specified, if resource
Adding the window is slightly risky (it may
conflict with unreported devices), so this
taints the kernel.
+ disable_acs_redir=<pci_dev>[; ...]
+ Specify one or more PCI devices (in the format
+ specified above) separated by semicolons.
+ Each device specified will have the PCI ACS
+ redirect capabilities forced off which will
+ allow P2P traffic between devices through
+ bridges without forcing it upstream. Note:
+ this removes isolation between devices and
+ may put more devices in an IOMMU group.
pcie_aspm= [PCIE] Forcibly enable or disable PCIe Active State Power
Management.
Set time (s) after boot for CPU-hotplug testing.
rcutorture.onoff_interval= [KNL]
- Set time (s) between CPU-hotplug operations, or
- zero to disable CPU-hotplug testing.
+ Set time (jiffies) between CPU-hotplug operations,
+ or zero to disable CPU-hotplug testing.
rcutorture.shuffle_interval= [KNL]
Set task-shuffle interval (s). Shuffling tasks
This parameter controls whether the Speculative Store
Bypass optimization is used.
+ On x86 the options are:
+
on - Unconditionally disable Speculative Store Bypass
off - Unconditionally enable Speculative Store Bypass
auto - Kernel detects whether the CPU model contains an
seccomp - Same as "prctl" above, but all seccomp threads
will disable SSB unless they explicitly opt out.
- Not specifying this option is equivalent to
- spec_store_bypass_disable=auto.
-
Default mitigations:
X86: If CONFIG_SECCOMP=y "seccomp", otherwise "prctl"
+ On powerpc the options are:
+
+ on,auto - On Power8 and Power9 insert a store-forwarding
+ barrier on kernel entry and exit. On Power7
+ perform a software flush on kernel entry and
+ exit.
+ off - No action.
+
+ Not specifying this option is equivalent to
+ spec_store_bypass_disable=auto.
+
spia_io_base= [HW,MTD]
spia_fio_base=
spia_pedr=
* devices and allow every device to access to whole physical memory. This is
* useful if a user wants to use an IOMMU only for KVM device assignment to
* guests and not for driver dma translation.
+ * It is also possible to disable by default in kernel config, and enable with
+ * iommu=nopt at boot time.
*/
+ #ifdef CONFIG_IOMMU_DEFAULT_PASSTHROUGH
+ int iommu_pass_through __read_mostly = 1;
+ #else
int iommu_pass_through __read_mostly;
+ #endif
extern struct iommu_table_entry __iommu_table[], __iommu_table_end[];
#endif
if (!strncmp(p, "pt", 2))
iommu_pass_through = 1;
+ if (!strncmp(p, "nopt", 4))
+ iommu_pass_through = 0;
gart_parse_options(p);
{
struct iommu_table_entry *p;
-#ifdef CONFIG_PCI
- dma_debug_add_bus(&pci_bus_type);
-#endif
x86_init.iommu.iommu_init();
for (p = __iommu_table; p < __iommu_table_end; p++) {
static int via_no_dac_cb(struct pci_dev *pdev, void *data)
{
- pdev->dev.dma_32bit_limit = true;
+ pdev->dev.bus_dma_mask = DMA_BIT_MASK(32);
return 0;
}
int level;
u64 *pte;
+ *page_size = 0;
+
if (address > PM_LEVEL_SIZE(domain->mode))
return NULL;
{
int ret;
- /*
- * Must be called with IRQs disabled. Warn here to detect early
- * when its not.
- */
- WARN_ON(!irqs_disabled());
-
/* lock domain */
spin_lock(&domain->lock);
{
struct protection_domain *domain;
- /*
- * Must be called with IRQs disabled. Warn here to detect early
- * when its not.
- */
- WARN_ON(!irqs_disabled());
-
domain = dev_data->domain;
spin_lock(&domain->lock);
}
if (amd_iommu_unmap_flush) {
- dma_ops_free_iova(dma_dom, dma_addr, pages);
domain_flush_tlb(&dma_dom->domain);
domain_flush_complete(&dma_dom->domain);
+ dma_ops_free_iova(dma_dom, dma_addr, pages);
} else {
pages = __roundup_pow_of_two(pages);
queue_iova(&dma_dom->iovad, dma_addr >> PAGE_SHIFT, pages, 0);
return NULL;
page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT,
- get_order(size), flag);
+ get_order(size), flag & __GFP_NOWARN);
if (!page)
return NULL;
}
.detach_dev = amd_iommu_detach_device,
.map = amd_iommu_map,
.unmap = amd_iommu_unmap,
- .map_sg = default_iommu_map_sg,
.iova_to_phys = amd_iommu_iova_to_phys,
.add_device = amd_iommu_add_device,
.remove_device = amd_iommu_remove_device,
irte->lo.fields_remap.int_type = delivery_mode;
irte->lo.fields_remap.dm = dest_mode;
irte->hi.fields.vector = vector;
- irte->lo.fields_remap.destination = dest_apicid;
+ irte->lo.fields_remap.destination = APICID_TO_IRTE_DEST_LO(dest_apicid);
+ irte->hi.fields.destination = APICID_TO_IRTE_DEST_HI(dest_apicid);
irte->lo.fields_remap.valid = 1;
}
if (!irte->lo.fields_remap.guest_mode) {
irte->hi.fields.vector = vector;
- irte->lo.fields_remap.destination = dest_apicid;
+ irte->lo.fields_remap.destination =
+ APICID_TO_IRTE_DEST_LO(dest_apicid);
+ irte->hi.fields.destination =
+ APICID_TO_IRTE_DEST_HI(dest_apicid);
modify_irte_ga(devid, index, irte, NULL);
}
}
irte->lo.val = 0;
irte->hi.fields.vector = cfg->vector;
irte->lo.fields_remap.guest_mode = 0;
- irte->lo.fields_remap.destination = cfg->dest_apicid;
+ irte->lo.fields_remap.destination =
+ APICID_TO_IRTE_DEST_LO(cfg->dest_apicid);
+ irte->hi.fields.destination =
+ APICID_TO_IRTE_DEST_HI(cfg->dest_apicid);
irte->lo.fields_remap.int_type = apic->irq_delivery_mode;
irte->lo.fields_remap.dm = apic->irq_dest_mode;
raw_spin_lock_irqsave(&table->lock, flags);
if (ref->lo.fields_vapic.guest_mode) {
- if (cpu >= 0)
- ref->lo.fields_vapic.destination = cpu;
+ if (cpu >= 0) {
+ ref->lo.fields_vapic.destination =
+ APICID_TO_IRTE_DEST_LO(cpu);
+ ref->hi.fields.destination =
+ APICID_TO_IRTE_DEST_HI(cpu);
+ }
ref->lo.fields_vapic.is_run = is_run;
barrier();
}
#include <linux/acpi_iort.h>
#include <linux/bitfield.h>
#include <linux/bitops.h>
+ #include <linux/crash_dump.h>
#include <linux/delay.h>
#include <linux/dma-iommu.h>
#include <linux/err.h>
#define MSI_IOVA_BASE 0x8000000
#define MSI_IOVA_LENGTH 0x100000
- static bool disable_bypass;
+ static bool disable_bypass = 1;
module_param_named(disable_bypass, disable_bypass, bool, S_IRUGO);
MODULE_PARM_DESC(disable_bypass,
"Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
/* Sync our overflow flag, as we believe we're up to speed */
q->cons = Q_OVF(q, q->prod) | Q_WRP(q, q->cons) | Q_IDX(q, q->cons);
+ writel(q->cons, q->cons_reg);
return IRQ_HANDLED;
}
.attach_dev = arm_smmu_attach_dev,
.map = arm_smmu_map,
.unmap = arm_smmu_unmap,
- .map_sg = default_iommu_map_sg,
.flush_iotlb_all = arm_smmu_iotlb_sync,
.iotlb_sync = arm_smmu_iotlb_sync,
.iova_to_phys = arm_smmu_iova_to_phys,
reg &= ~clr;
reg |= set;
writel_relaxed(reg | GBPA_UPDATE, gbpa);
- return readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
- 1, ARM_SMMU_POLL_TIMEOUT_US);
+ ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
+ 1, ARM_SMMU_POLL_TIMEOUT_US);
+
+ if (ret)
+ dev_err(smmu->dev, "GBPA not responding to update\n");
+ return ret;
}
static void arm_smmu_free_msis(void *data)
/* Clear CR0 and sync (disables SMMU and queue processing) */
reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
- if (reg & CR0_SMMUEN)
+ if (reg & CR0_SMMUEN) {
+ if (is_kdump_kernel()) {
+ arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
+ arm_smmu_device_disable(smmu);
+ return -EBUSY;
+ }
+
dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
+ }
ret = arm_smmu_device_disable(smmu);
if (ret)
enables |= CR0_SMMUEN;
} else {
ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT);
- if (ret) {
- dev_err(smmu->dev, "GBPA not responding to update\n");
+ if (ret)
return ret;
- }
}
ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
ARM_SMMU_CR0ACK);
};
module_platform_driver(arm_smmu_driver);
-IOMMU_OF_DECLARE(arm_smmuv3, "arm,smmu-v3");
-
MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
MODULE_LICENSE("GPL v2");
.attach_dev = arm_smmu_attach_dev,
.map = arm_smmu_map,
.unmap = arm_smmu_unmap,
- .map_sg = default_iommu_map_sg,
.flush_iotlb_all = arm_smmu_iotlb_sync,
.iotlb_sync = arm_smmu_iotlb_sync,
.iova_to_phys = arm_smmu_iova_to_phys,
if (err)
return err;
- if (smmu->version == ARM_SMMU_V2 &&
- smmu->num_context_banks != smmu->num_context_irqs) {
- dev_err(dev,
- "found only %d context interrupt(s) but %d required\n",
- smmu->num_context_irqs, smmu->num_context_banks);
- return -ENODEV;
+ if (smmu->version == ARM_SMMU_V2) {
+ if (smmu->num_context_banks > smmu->num_context_irqs) {
+ dev_err(dev,
+ "found only %d context irq(s) but %d required\n",
+ smmu->num_context_irqs, smmu->num_context_banks);
+ return -ENODEV;
+ }
+
+ /* Ignore superfluous interrupts */
+ smmu->num_context_irqs = smmu->num_context_banks;
}
for (i = 0; i < smmu->num_global_irqs; ++i) {
};
module_platform_driver(arm_smmu_driver);
-IOMMU_OF_DECLARE(arm_smmuv1, "arm,smmu-v1");
-IOMMU_OF_DECLARE(arm_smmuv2, "arm,smmu-v2");
-IOMMU_OF_DECLARE(arm_mmu400, "arm,mmu-400");
-IOMMU_OF_DECLARE(arm_mmu401, "arm,mmu-401");
-IOMMU_OF_DECLARE(arm_mmu500, "arm,mmu-500");
-IOMMU_OF_DECLARE(cavium_smmuv2, "cavium,smmu-v2");
-
MODULE_DESCRIPTION("IOMMU API for ARM architected SMMU implementations");
MODULE_LICENSE("GPL v2");
.detach_dev = exynos_iommu_detach_device,
.map = exynos_iommu_map,
.unmap = exynos_iommu_unmap,
- .map_sg = default_iommu_map_sg,
.iova_to_phys = exynos_iommu_iova_to_phys,
.device_group = generic_device_group,
.add_device = exynos_iommu_add_device,
return ret;
}
core_initcall(exynos_iommu_init);
-
-IOMMU_OF_DECLARE(exynos_iommu_of, "samsung,exynos-sysmmu");
#include <asm/iommu.h>
#include "irq_remapping.h"
+ #include "intel-pasid.h"
#define ROOT_SIZE VTD_PAGE_SIZE
#define CONTEXT_SIZE VTD_PAGE_SIZE
for (idx = 0; idx < g_num_of_iommus; idx++) \
if (domain->iommu_refcnt[idx])
- struct dmar_domain {
- int nid; /* node id */
-
- unsigned iommu_refcnt[DMAR_UNITS_SUPPORTED];
- /* Refcount of devices per iommu */
-
-
- u16 iommu_did[DMAR_UNITS_SUPPORTED];
- /* Domain ids per IOMMU. Use u16 since
- * domain ids are 16 bit wide according
- * to VT-d spec, section 9.3 */
-
- bool has_iotlb_device;
- struct list_head devices; /* all devices' list */
- struct iova_domain iovad; /* iova's that belong to this domain */
-
- struct dma_pte *pgd; /* virtual address */
- int gaw; /* max guest address width */
-
- /* adjusted guest address width, 0 is level 2 30-bit */
- int agaw;
-
- int flags; /* flags to find out type of domain */
-
- int iommu_coherency;/* indicate coherency of iommu access */
- int iommu_snooping; /* indicate snooping control feature*/
- int iommu_count; /* reference count of iommu */
- int iommu_superpage;/* Level of superpages supported:
- 0 == 4KiB (no superpages), 1 == 2MiB,
- 2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
- u64 max_addr; /* maximum mapped address */
-
- struct iommu_domain domain; /* generic domain data structure for
- iommu core */
- };
-
- /* PCI domain-device relationship */
- struct device_domain_info {
- struct list_head link; /* link to domain siblings */
- struct list_head global; /* link to global list */
- u8 bus; /* PCI bus number */
- u8 devfn; /* PCI devfn number */
- u8 pasid_supported:3;
- u8 pasid_enabled:1;
- u8 pri_supported:1;
- u8 pri_enabled:1;
- u8 ats_supported:1;
- u8 ats_enabled:1;
- u8 ats_qdep;
- struct device *dev; /* it's NULL for PCIe-to-PCI bridge */
- struct intel_iommu *iommu; /* IOMMU used by this device */
- struct dmar_domain *domain; /* pointer to domain */
- };
-
struct dmar_rmrr_unit {
struct list_head list; /* list of rmrr units */
struct acpi_dmar_header *hdr; /* ACPI header */
static DEFINE_SPINLOCK(device_domain_lock);
static LIST_HEAD(device_domain_list);
+ /*
+ * Iterate over elements in device_domain_list and call the specified
+ * callback @fn against each element. This helper should only be used
+ * in the context where the device_domain_lock has already been holden.
+ */
+ int for_each_device_domain(int (*fn)(struct device_domain_info *info,
+ void *data), void *data)
+ {
+ int ret = 0;
+ struct device_domain_info *info;
+
+ assert_spin_locked(&device_domain_lock);
+ list_for_each_entry(info, &device_domain_list, global) {
+ ret = fn(info, data);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+ }
+
const struct iommu_ops intel_iommu_ops;
static bool translation_pre_enabled(struct intel_iommu *iommu)
domains[did & 0xff] = domain;
}
- static inline void *alloc_pgtable_page(int node)
+ void *alloc_pgtable_page(int node)
{
struct page *page;
void *vaddr = NULL;
return vaddr;
}
- static inline void free_pgtable_page(void *vaddr)
+ void free_pgtable_page(void *vaddr)
{
free_page((unsigned long)vaddr);
}
}
/* This functionin only returns single iommu in a domain */
- static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
+ struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
{
int iommu_id;
return;
pdev = to_pci_dev(info->dev);
+ /* For IOMMU that supports device IOTLB throttling (DIT), we assign
+ * PFSID to the invalidation desc of a VF such that IOMMU HW can gauge
+ * queue depth at PF level. If DIT is not set, PFSID will be treated as
+ * reserved, which should be set to 0.
+ */
+ if (!ecap_dit(info->iommu->ecap))
+ info->pfsid = 0;
+ else {
+ struct pci_dev *pf_pdev;
+
+ /* pdev will be returned if device is not a vf */
+ pf_pdev = pci_physfn(pdev);
+ info->pfsid = PCI_DEVID(pf_pdev->bus->number, pf_pdev->devfn);
+ }
#ifdef CONFIG_INTEL_IOMMU_SVM
/* The PCIe spec, in its wisdom, declares that the behaviour of
sid = info->bus << 8 | info->devfn;
qdep = info->ats_qdep;
- qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
+ qi_flush_dev_iotlb(info->iommu, sid, info->pfsid,
+ qdep, addr, mask);
}
spin_unlock_irqrestore(&device_domain_lock, flags);
}
if (pasid_enabled(iommu)) {
if (ecap_prs(iommu->ecap))
intel_svm_finish_prq(iommu);
- intel_svm_free_pasid_tables(iommu);
+ intel_svm_exit(iommu);
}
#endif
}
info->dev = dev;
info->domain = domain;
info->iommu = iommu;
+ info->pasid_table = NULL;
if (dev && dev_is_pci(dev)) {
struct pci_dev *pdev = to_pci_dev(info->dev);
list_add(&info->global, &device_domain_list);
if (dev)
dev->archdata.iommu = info;
+
+ if (dev && dev_is_pci(dev) && info->pasid_supported) {
+ ret = intel_pasid_alloc_table(dev);
+ if (ret) {
+ __dmar_remove_one_dev_info(info);
+ spin_unlock_irqrestore(&device_domain_lock, flags);
+ return NULL;
+ }
+ }
spin_unlock_irqrestore(&device_domain_lock, flags);
if (dev && domain_context_mapping(domain, dev)) {
}
for_each_active_iommu(iommu, drhd) {
+ /*
+ * Find the max pasid size of all IOMMU's in the system.
+ * We need to ensure the system pasid table is no bigger
+ * than the smallest supported.
+ */
+ if (pasid_enabled(iommu)) {
+ u32 temp = 2 << ecap_pss(iommu->ecap);
+
+ intel_pasid_max_id = min_t(u32, temp,
+ intel_pasid_max_id);
+ }
+
g_iommus[iommu->seq_id] = iommu;
intel_iommu_init_qi(iommu);
hw_pass_through = 0;
#ifdef CONFIG_INTEL_IOMMU_SVM
if (pasid_enabled(iommu))
- intel_svm_alloc_pasid_tables(iommu);
+ intel_svm_init(iommu);
#endif
}
return iova_pfn;
}
- static struct dmar_domain *get_valid_domain_for_dev(struct device *dev)
+ struct dmar_domain *get_valid_domain_for_dev(struct device *dev)
{
struct dmar_domain *domain, *tmp;
struct dmar_rmrr_unit *rmrr;
if (gfpflags_allow_blocking(flags)) {
unsigned int count = size >> PAGE_SHIFT;
- page = dma_alloc_from_contiguous(dev, count, order, flags);
+ page = dma_alloc_from_contiguous(dev, count, order,
+ flags & __GFP_NOWARN);
if (page && iommu_no_mapping(dev) &&
page_to_phys(page) + size > dev->coherent_dma_mask) {
dma_release_from_contiguous(dev, page, count);
#ifdef CONFIG_INTEL_IOMMU_SVM
if (pasid_enabled(iommu))
- intel_svm_alloc_pasid_tables(iommu);
+ intel_svm_init(iommu);
#endif
if (dmaru->ignored) {
if (info->dev) {
iommu_disable_dev_iotlb(info);
domain_context_clear(iommu, info->dev);
+ intel_pasid_free_table(info->dev);
}
unlink_domain_info(info);
#ifdef CONFIG_INTEL_IOMMU_SVM
#define MAX_NR_PASID_BITS (20)
- static inline unsigned long intel_iommu_get_pts(struct intel_iommu *iommu)
+ static inline unsigned long intel_iommu_get_pts(struct device *dev)
{
- /*
- * Convert ecap_pss to extend context entry pts encoding, also
- * respect the soft pasid_max value set by the iommu.
- * - number of PASID bits = ecap_pss + 1
- * - number of PASID table entries = 2^(pts + 5)
- * Therefore, pts = ecap_pss - 4
- * e.g. KBL ecap_pss = 0x13, PASID has 20 bits, pts = 15
- */
- if (ecap_pss(iommu->ecap) < 5)
+ int pts, max_pasid;
+
+ max_pasid = intel_pasid_get_dev_max_id(dev);
+ pts = find_first_bit((unsigned long *)&max_pasid, MAX_NR_PASID_BITS);
+ if (pts < 5)
return 0;
- /* pasid_max is encoded as actual number of entries not the bits */
- return find_first_bit((unsigned long *)&iommu->pasid_max,
- MAX_NR_PASID_BITS) - 5;
+ return pts - 5;
}
int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sdev)
if (!(ctx_lo & CONTEXT_PASIDE)) {
if (iommu->pasid_state_table)
context[1].hi = (u64)virt_to_phys(iommu->pasid_state_table);
- context[1].lo = (u64)virt_to_phys(iommu->pasid_table) |
- intel_iommu_get_pts(iommu);
+ context[1].lo = (u64)virt_to_phys(info->pasid_table->table) |
+ intel_iommu_get_pts(sdev->dev);
wmb();
/* CONTEXT_TT_MULTI_LEVEL and CONTEXT_TT_DEV_IOTLB are both
return NULL;
}
- if (!iommu->pasid_table) {
- dev_err(dev, "PASID not enabled on IOMMU; cannot enable SVM\n");
- return NULL;
- }
-
return iommu;
}
#endif /* CONFIG_INTEL_IOMMU_SVM */
.detach_dev = intel_iommu_detach_device,
.map = intel_iommu_map,
.unmap = intel_iommu_unmap,
- .map_sg = default_iommu_map_sg,
.iova_to_phys = intel_iommu_iova_to_phys,
.add_device = intel_iommu_add_device,
.remove_device = intel_iommu_remove_device,
#include <linux/pci-ats.h>
#include <linux/dmar.h>
#include <linux/interrupt.h>
+#include <linux/mm_types.h>
#include <asm/page.h>
+ #include "intel-pasid.h"
+
#define PASID_ENTRY_P BIT_ULL(0)
#define PASID_ENTRY_FLPM_5LP BIT_ULL(9)
#define PASID_ENTRY_SRE BIT_ULL(11)
static irqreturn_t prq_event_thread(int irq, void *d);
- struct pasid_entry {
- u64 val;
- };
-
struct pasid_state_entry {
u64 val;
};
- int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu)
+ int intel_svm_init(struct intel_iommu *iommu)
{
struct page *pages;
int order;
iommu->pasid_max = 0x20000;
order = get_order(sizeof(struct pasid_entry) * iommu->pasid_max);
- pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, order);
- if (!pages) {
- pr_warn("IOMMU: %s: Failed to allocate PASID table\n",
- iommu->name);
- return -ENOMEM;
- }
- iommu->pasid_table = page_address(pages);
- pr_info("%s: Allocated order %d PASID table.\n", iommu->name, order);
-
if (ecap_dis(iommu->ecap)) {
/* Just making it explicit... */
BUILD_BUG_ON(sizeof(struct pasid_entry) != sizeof(struct pasid_state_entry));
iommu->name);
}
- idr_init(&iommu->pasid_idr);
-
return 0;
}
- int intel_svm_free_pasid_tables(struct intel_iommu *iommu)
+ int intel_svm_exit(struct intel_iommu *iommu)
{
int order = get_order(sizeof(struct pasid_entry) * iommu->pasid_max);
- if (iommu->pasid_table) {
- free_pages((unsigned long)iommu->pasid_table, order);
- iommu->pasid_table = NULL;
- }
if (iommu->pasid_state_table) {
free_pages((unsigned long)iommu->pasid_state_table, order);
iommu->pasid_state_table = NULL;
}
- idr_destroy(&iommu->pasid_idr);
+
return 0;
}
* page) so that we end up taking a fault that the hardware really
* *has* to handle gracefully without affecting other processes.
*/
- svm->iommu->pasid_table[svm->pasid].val = 0;
- wmb();
-
rcu_read_lock();
list_for_each_entry_rcu(sdev, &svm->devs, list) {
+ intel_pasid_clear_entry(sdev->dev, svm->pasid);
intel_flush_pasid_dev(svm, sdev, svm->pasid);
intel_flush_svm_range_dev(svm, sdev, 0, -1, 0, !svm->mm);
}
};
static DEFINE_MUTEX(pasid_mutex);
+ static LIST_HEAD(global_svm_list);
int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ops *ops)
{
struct intel_iommu *iommu = intel_svm_device_to_iommu(dev);
+ struct pasid_entry *entry;
struct intel_svm_dev *sdev;
struct intel_svm *svm = NULL;
struct mm_struct *mm = NULL;
int pasid_max;
int ret;
- if (WARN_ON(!iommu || !iommu->pasid_table))
+ if (!iommu)
return -EINVAL;
if (dev_is_pci(dev)) {
mutex_lock(&pasid_mutex);
if (pasid && !(flags & SVM_FLAG_PRIVATE_PASID)) {
- int i;
+ struct intel_svm *t;
- idr_for_each_entry(&iommu->pasid_idr, svm, i) {
- if (svm->mm != mm ||
- (svm->flags & SVM_FLAG_PRIVATE_PASID))
+ list_for_each_entry(t, &global_svm_list, list) {
+ if (t->mm != mm || (t->flags & SVM_FLAG_PRIVATE_PASID))
continue;
+ svm = t;
if (svm->pasid >= pasid_max) {
dev_warn(dev,
"Limited PASID width. Cannot use existing PASID %d\n",
}
svm->iommu = iommu;
- if (pasid_max > iommu->pasid_max)
- pasid_max = iommu->pasid_max;
+ if (pasid_max > intel_pasid_max_id)
+ pasid_max = intel_pasid_max_id;
/* Do not use PASID 0 in caching mode (virtualised IOMMU) */
- ret = idr_alloc(&iommu->pasid_idr, svm,
- !!cap_caching_mode(iommu->cap),
- pasid_max - 1, GFP_KERNEL);
+ ret = intel_pasid_alloc_id(svm,
+ !!cap_caching_mode(iommu->cap),
+ pasid_max - 1, GFP_KERNEL);
if (ret < 0) {
kfree(svm);
kfree(sdev);
svm->mm = mm;
svm->flags = flags;
INIT_LIST_HEAD_RCU(&svm->devs);
+ INIT_LIST_HEAD(&svm->list);
ret = -ENOMEM;
if (mm) {
ret = mmu_notifier_register(&svm->notifier, mm);
if (ret) {
- idr_remove(&svm->iommu->pasid_idr, svm->pasid);
+ intel_pasid_free_id(svm->pasid);
kfree(svm);
kfree(sdev);
goto out;
if (cpu_feature_enabled(X86_FEATURE_LA57))
pasid_entry_val |= PASID_ENTRY_FLPM_5LP;
- iommu->pasid_table[svm->pasid].val = pasid_entry_val;
+ entry = intel_pasid_get_entry(dev, svm->pasid);
+ entry->val = pasid_entry_val;
wmb();
*/
if (cap_caching_mode(iommu->cap))
intel_flush_pasid_dev(svm, sdev, svm->pasid);
+
+ list_add_tail(&svm->list, &global_svm_list);
}
list_add_rcu(&sdev->list, &svm->devs);
mutex_lock(&pasid_mutex);
iommu = intel_svm_device_to_iommu(dev);
- if (!iommu || !iommu->pasid_table)
+ if (!iommu)
goto out;
- svm = idr_find(&iommu->pasid_idr, pasid);
+ svm = intel_pasid_lookup_id(pasid);
if (!svm)
goto out;
intel_flush_pasid_dev(svm, sdev, svm->pasid);
intel_flush_svm_range_dev(svm, sdev, 0, -1, 0, !svm->mm);
kfree_rcu(sdev, rcu);
+ intel_pasid_clear_entry(dev, svm->pasid);
if (list_empty(&svm->devs)) {
- svm->iommu->pasid_table[svm->pasid].val = 0;
- wmb();
-
- idr_remove(&svm->iommu->pasid_idr, svm->pasid);
+ intel_pasid_free_id(svm->pasid);
if (svm->mm)
mmu_notifier_unregister(&svm->notifier, svm->mm);
+ list_del(&svm->list);
+
/* We mandate that no page faults may be outstanding
* for the PASID when intel_svm_unbind_mm() is called.
* If that is not obeyed, subtle errors will happen.
mutex_lock(&pasid_mutex);
iommu = intel_svm_device_to_iommu(dev);
- if (!iommu || !iommu->pasid_table)
+ if (!iommu)
goto out;
- svm = idr_find(&iommu->pasid_idr, pasid);
+ svm = intel_pasid_lookup_id(pasid);
if (!svm)
goto out;
struct vm_area_struct *vma;
struct page_req_dsc *req;
struct qi_desc resp;
- int ret, result;
+ int result;
+ vm_fault_t ret;
u64 address;
handled = 1;
if (!svm || svm->pasid != req->pasid) {
rcu_read_lock();
- svm = idr_find(&iommu->pasid_idr, req->pasid);
+ svm = intel_pasid_lookup_id(req->pasid);
/* It *can't* go away, because the driver is not permitted
* to unbind the mm while any page faults are outstanding.
* So we only need RCU to protect the internal idr code. */
unsigned int number_of_contexts;
bool setup_imbuscr;
bool twobit_imttbcr_sl0;
+ bool reserved_context;
};
struct ipmmu_vmsa_device {
struct io_pgtable_ops *iop;
unsigned int context_id;
- spinlock_t lock; /* Protects mappings */
+ struct mutex mutex; /* Protects mappings */
};
static struct ipmmu_vmsa_domain *to_vmsa_domain(struct iommu_domain *dom)
#define IMPMBA(n) (0x0280 + ((n) * 4))
#define IMPMBD(n) (0x02c0 + ((n) * 4))
- #define IMUCTR(n) (0x0300 + ((n) * 16))
+ #define IMUCTR(n) ((n) < 32 ? IMUCTR0(n) : IMUCTR32(n))
+ #define IMUCTR0(n) (0x0300 + ((n) * 16))
+ #define IMUCTR32(n) (0x0600 + (((n) - 32) * 16))
#define IMUCTR_FIXADDEN (1 << 31)
#define IMUCTR_FIXADD_MASK (0xff << 16)
#define IMUCTR_FIXADD_SHIFT 16
#define IMUCTR_FLUSH (1 << 1)
#define IMUCTR_MMUEN (1 << 0)
- #define IMUASID(n) (0x0308 + ((n) * 16))
+ #define IMUASID(n) ((n) < 32 ? IMUASID0(n) : IMUASID32(n))
+ #define IMUASID0(n) (0x0308 + ((n) * 16))
+ #define IMUASID32(n) (0x0608 + (((n) - 32) * 16))
#define IMUASID_ASID8_MASK (0xff << 8)
#define IMUASID_ASID8_SHIFT 8
#define IMUASID_ASID0_MASK (0xff << 0)
if (!domain)
return NULL;
- spin_lock_init(&domain->lock);
+ mutex_init(&domain->mutex);
return &domain->io_domain;
}
struct iommu_fwspec *fwspec = dev->iommu_fwspec;
struct ipmmu_vmsa_device *mmu = to_ipmmu(dev);
struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain);
- unsigned long flags;
unsigned int i;
int ret = 0;
return -ENXIO;
}
- spin_lock_irqsave(&domain->lock, flags);
+ mutex_lock(&domain->mutex);
if (!domain->mmu) {
/* The domain hasn't been used yet, initialize it. */
} else
dev_info(dev, "Reusing IPMMU context %u\n", domain->context_id);
- spin_unlock_irqrestore(&domain->lock, flags);
+ mutex_unlock(&domain->mutex);
if (ret < 0)
return ret;
return false;
}
- static const struct soc_device_attribute soc_r8a7795[] = {
+ static const struct soc_device_attribute soc_rcar_gen3[] = {
{ .soc_id = "r8a7795", },
+ { .soc_id = "r8a7796", },
+ { .soc_id = "r8a77965", },
+ { .soc_id = "r8a77970", },
+ { .soc_id = "r8a77995", },
{ /* sentinel */ }
};
struct of_phandle_args *spec)
{
/* For R-Car Gen3 use a white list to opt-in slave devices */
- if (soc_device_match(soc_r8a7795) && !ipmmu_slave_whitelist(dev))
+ if (soc_device_match(soc_rcar_gen3) && !ipmmu_slave_whitelist(dev))
return -ENODEV;
iommu_fwspec_add_ids(dev, spec->args, 1);
.unmap = ipmmu_unmap,
.flush_iotlb_all = ipmmu_iotlb_sync,
.iotlb_sync = ipmmu_iotlb_sync,
- .map_sg = default_iommu_map_sg,
.iova_to_phys = ipmmu_iova_to_phys,
.add_device = ipmmu_add_device,
.remove_device = ipmmu_remove_device,
.number_of_contexts = 1, /* software only tested with one context */
.setup_imbuscr = true,
.twobit_imttbcr_sl0 = false,
+ .reserved_context = false,
};
- static const struct ipmmu_features ipmmu_features_r8a7795 = {
+ static const struct ipmmu_features ipmmu_features_rcar_gen3 = {
.use_ns_alias_offset = false,
.has_cache_leaf_nodes = true,
.number_of_contexts = 8,
.setup_imbuscr = false,
.twobit_imttbcr_sl0 = true,
+ .reserved_context = true,
};
static const struct of_device_id ipmmu_of_ids[] = {
.data = &ipmmu_features_default,
}, {
.compatible = "renesas,ipmmu-r8a7795",
- .data = &ipmmu_features_r8a7795,
+ .data = &ipmmu_features_rcar_gen3,
+ }, {
+ .compatible = "renesas,ipmmu-r8a7796",
+ .data = &ipmmu_features_rcar_gen3,
+ }, {
+ .compatible = "renesas,ipmmu-r8a77965",
+ .data = &ipmmu_features_rcar_gen3,
+ }, {
+ .compatible = "renesas,ipmmu-r8a77970",
+ .data = &ipmmu_features_rcar_gen3,
+ }, {
+ .compatible = "renesas,ipmmu-r8a77995",
+ .data = &ipmmu_features_rcar_gen3,
}, {
/* Terminator */
},
}
mmu->dev = &pdev->dev;
- mmu->num_utlbs = 32;
+ mmu->num_utlbs = 48;
spin_lock_init(&mmu->lock);
bitmap_zero(mmu->ctx, IPMMU_CTX_MAX);
mmu->features = of_device_get_match_data(&pdev->dev);
}
ipmmu_device_reset(mmu);
+
+ if (mmu->features->reserved_context) {
+ dev_info(&pdev->dev, "IPMMU context 0 is reserved\n");
+ set_bit(0, mmu->ctx);
+ }
}
/*
static int __init ipmmu_init(void)
{
+ struct device_node *np;
static bool setup_done;
int ret;
if (setup_done)
return 0;
+ np = of_find_matching_node(NULL, ipmmu_of_ids);
+ if (!np)
+ return 0;
+
+ of_node_put(np);
+
ret = platform_driver_register(&ipmmu_driver);
if (ret < 0)
return ret;
subsys_initcall(ipmmu_init);
module_exit(ipmmu_exit);
-IOMMU_OF_DECLARE(ipmmu_vmsa_iommu_of, "renesas,ipmmu-vmsa");
-IOMMU_OF_DECLARE(ipmmu_r8a7795_iommu_of, "renesas,ipmmu-r8a7795");
-IOMMU_OF_DECLARE(ipmmu_r8a7796_iommu_of, "renesas,ipmmu-r8a7796");
-IOMMU_OF_DECLARE(ipmmu_r8a77965_iommu_of, "renesas,ipmmu-r8a77965");
-IOMMU_OF_DECLARE(ipmmu_r8a77970_iommu_of, "renesas,ipmmu-r8a77970");
-IOMMU_OF_DECLARE(ipmmu_r8a77995_iommu_of, "renesas,ipmmu-r8a77995");
-
MODULE_DESCRIPTION("IOMMU API for Renesas VMSA-compatible IPMMU");
MODULE_LICENSE("GPL v2");
struct msm_iommu_dev *iommu;
struct iommu_group *group;
unsigned long flags;
- int ret = 0;
spin_lock_irqsave(&msm_iommu_lock, flags);
-
iommu = find_iommu_for_dev(dev);
+ spin_unlock_irqrestore(&msm_iommu_lock, flags);
+
if (iommu)
iommu_device_link(&iommu->iommu, dev);
else
- ret = -ENODEV;
-
- spin_unlock_irqrestore(&msm_iommu_lock, flags);
-
- if (ret)
- return ret;
+ return -ENODEV;
group = iommu_group_get_for_dev(dev);
if (IS_ERR(group))
unsigned long flags;
spin_lock_irqsave(&msm_iommu_lock, flags);
-
iommu = find_iommu_for_dev(dev);
+ spin_unlock_irqrestore(&msm_iommu_lock, flags);
+
if (iommu)
iommu_device_unlink(&iommu->iommu, dev);
- spin_unlock_irqrestore(&msm_iommu_lock, flags);
-
iommu_group_remove_device(dev);
}
.detach_dev = msm_iommu_detach_dev,
.map = msm_iommu_map,
.unmap = msm_iommu_unmap,
- .map_sg = default_iommu_map_sg,
.iova_to_phys = msm_iommu_iova_to_phys,
.add_device = msm_iommu_add_device,
.remove_device = msm_iommu_remove_device,
subsys_initcall(msm_iommu_driver_init);
module_exit(msm_iommu_driver_exit);
-IOMMU_OF_DECLARE(msm_iommu_of, "qcom,apq8064-iommu");
-
MODULE_LICENSE("GPL v2");
.detach_dev = qcom_iommu_detach_dev,
.map = qcom_iommu_map,
.unmap = qcom_iommu_unmap,
- .map_sg = default_iommu_map_sg,
.flush_iotlb_all = qcom_iommu_iotlb_sync,
.iotlb_sync = qcom_iommu_iotlb_sync,
.iova_to_phys = qcom_iommu_iova_to_phys,
module_init(qcom_iommu_init);
module_exit(qcom_iommu_exit);
-IOMMU_OF_DECLARE(qcom_iommu_dev, "qcom,msm-iommu-v1");
-
MODULE_DESCRIPTION("IOMMU API for QCOM IOMMU v1 implementations");
MODULE_LICENSE("GPL v2");
.detach_dev = rk_iommu_detach_device,
.map = rk_iommu_map,
.unmap = rk_iommu_unmap,
- .map_sg = default_iommu_map_sg,
.add_device = rk_iommu_add_device,
.remove_device = rk_iommu_remove_device,
.iova_to_phys = rk_iommu_iova_to_phys,
}
subsys_initcall(rk_iommu_init);
-IOMMU_OF_DECLARE(rk_iommu_of, "rockchip,iommu");
-
MODULE_DESCRIPTION("IOMMU API for Rockchip");
MODULE_ALIAS("platform:rockchip-iommu");