Merge tag 'iommu-updates-v6.5' of git://git.kernel.org/pub/scm/linux/kernel/git/joro...

author Linus Torvalds <[email protected]>

Fri, 30 Jun 2023 03:51:03 +0000 (20:51 -0700)

committer Linus Torvalds <[email protected]>

Fri, 30 Jun 2023 03:51:03 +0000 (20:51 -0700)
author Linus Torvalds <[email protected]>
Fri, 30 Jun 2023 03:51:03 +0000 (20:51 -0700)
committer Linus Torvalds <[email protected]>
Fri, 30 Jun 2023 03:51:03 +0000 (20:51 -0700)
diff --combined Documentation/admin-guide/kernel-parameters.txt

index b0dd367dfb591ce28580903efc78a46f2113950e,986ac2b73ea22be02375b56db66990c74aae5876..44bcaf791ce6bf629f51754aca68a703ebacb2bf
--- 1/Documentation/admin-guide/kernel-parameters.txt
--- 2/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@@ -304,7 -304,7 +304,7 @@@
                         EL0 is indicated by /sys/devices/system/cpu/aarch32_el0
                         and hot-unplug operations may be restricted.
   
- -                      See Documentation/arm64/asymmetric-32bit.rst for more
+ +                      See Documentation/arch/arm64/asymmetric-32bit.rst for more
                         information.
   
         amd_iommu=      [HW,X86-64]
@@@ -323,6 -323,7 +323,7 @@@
                                        option with care.
                         pgtbl_v1     - Use v1 page table for DMA-API (Default).
                         pgtbl_v2     - Use v2 page table for DMA-API.
+                       irtcachedis  - Disable Interrupt Remapping Table (IRT) caching.
   
         amd_iommu_dump= [HW,X86-64]
                         Enable AMD IOMMU driver option to dump the ACPI table
@@@ -429,9 -430,6 +430,9 @@@
         arm64.nosme     [ARM64] Unconditionally disable Scalable Matrix
                         Extension support
   
+ +      arm64.nomops    [ARM64] Unconditionally disable Memory Copy and Memory
+ +                      Set instructions support
+ +
         ataflop=        [HW,M68k]
   
         atarimouse=     [HW,MOUSE] Atari Mouse
@@@ -821,6 -819,20 +822,6 @@@
                         Format:
                         <first_slot>,<last_slot>,<port>,<enum_bit>[,<debug>]
   
- -      cpu0_hotplug    [X86] Turn on CPU0 hotplug feature when
- -                      CONFIG_BOOTPARAM_HOTPLUG_CPU0 is off.
- -                      Some features depend on CPU0. Known dependencies are:
- -                      1. Resume from suspend/hibernate depends on CPU0.
- -                      Suspend/hibernate will fail if CPU0 is offline and you
- -                      need to online CPU0 before suspend/hibernate.
- -                      2. PIC interrupts also depend on CPU0. CPU0 can't be
- -                      removed if a PIC interrupt is detected.
- -                      It's said poweroff/reboot may depend on CPU0 on some
- -                      machines although I haven't seen such issues so far
- -                      after CPU0 is offline on a few tested machines.
- -                      If the dependencies are under your control, you can
- -                      turn on cpu0_hotplug.
- -
         cpuidle.off=1   [CPU_IDLE]
                         disable the cpuidle sub-system
   
@@@ -841,12 -853,6 +842,12 @@@
                         on every CPU online, such as boot, and resume from suspend.
                         Default: 10000
   
+ +      cpuhp.parallel=
+ +                      [SMP] Enable/disable parallel bringup of secondary CPUs
+ +                      Format: <bool>
+ +                      Default is enabled if CONFIG_HOTPLUG_PARALLEL=y. Otherwise
+ +                      the parameter has no effect.
+ +
         crash_kexec_post_notifiers
                         Run kdump after running panic-notifiers and dumping
                         kmsg. This only for the users who doubt kdump always
@@@ -2112,16 -2118,6 +2113,16 @@@
                         disable
                           Do not enable intel_pstate as the default
                           scaling driver for the supported processors
+ +                        active
+ +                          Use intel_pstate driver to bypass the scaling
+ +                          governors layer of cpufreq and provides it own
+ +                          algorithms for p-state selection. There are two
+ +                          P-state selection algorithms provided by
+ +                          intel_pstate in the active mode: powersave and
+ +                          performance.  The way they both operate depends
+ +                          on whether or not the hardware managed P-states
+ +                          (HWP) feature has been enabled in the processor
+ +                          and possibly on the processor model.
                         passive
                           Use intel_pstate as a scaling driver, but configure it
                           to work with generic cpufreq governors (instead of
@@@ -2556,13 -2552,12 +2557,13 @@@
                         If the value is 0 (the default), KVM will pick a period based
                         on the ratio, such that a page is zapped after 1 hour on average.
   
- -      kvm-amd.nested= [KVM,AMD] Allow nested virtualization in KVM/SVM.
- -                      Default is 1 (enabled)
+ +      kvm-amd.nested= [KVM,AMD] Control nested virtualization feature in
+ +                      KVM/SVM. Default is 1 (enabled).
   
- -      kvm-amd.npt=    [KVM,AMD] Disable nested paging (virtualized MMU)
- -                      for all guests.
- -                      Default is 1 (enabled) if in 64-bit or 32-bit PAE mode.
+ +      kvm-amd.npt=    [KVM,AMD] Control KVM's use of Nested Page Tables,
+ +                      a.k.a. Two-Dimensional Page Tables. Default is 1
+ +                      (enabled). Disable by KVM if hardware lacks support
+ +                      for NPT.
   
         kvm-arm.mode=
                         [KVM,ARM] Select one of KVM/arm64's modes of operation.
@@@ -2608,33 -2603,30 +2609,33 @@@
                         Format: <integer>
                         Default: 5
   
- -      kvm-intel.ept=  [KVM,Intel] Disable extended page tables
- -                      (virtualized MMU) support on capable Intel chips.
- -                      Default is 1 (enabled)
+ +      kvm-intel.ept=  [KVM,Intel] Control KVM's use of Extended Page Tables,
+ +                      a.k.a. Two-Dimensional Page Tables.  Default is 1
+ +                      (enabled). Disable by KVM if hardware lacks support
+ +                      for EPT.
   
         kvm-intel.emulate_invalid_guest_state=
- -                      [KVM,Intel] Disable emulation of invalid guest state.
- -                      Ignored if kvm-intel.enable_unrestricted_guest=1, as
- -                      guest state is never invalid for unrestricted guests.
- -                      This param doesn't apply to nested guests (L2), as KVM
- -                      never emulates invalid L2 guest state.
- -                      Default is 1 (enabled)
+ +                      [KVM,Intel] Control whether to emulate invalid guest
+ +                      state. Ignored if kvm-intel.enable_unrestricted_guest=1,
+ +                      as guest state is never invalid for unrestricted
+ +                      guests. This param doesn't apply to nested guests (L2),
+ +                      as KVM never emulates invalid L2 guest state.
+ +                      Default is 1 (enabled).
   
         kvm-intel.flexpriority=
- -                      [KVM,Intel] Disable FlexPriority feature (TPR shadow).
- -                      Default is 1 (enabled)
+ +                      [KVM,Intel] Control KVM's use of FlexPriority feature
+ +                      (TPR shadow). Default is 1 (enabled). Disalbe by KVM if
+ +                      hardware lacks support for it.
   
         kvm-intel.nested=
- -                      [KVM,Intel] Enable VMX nesting (nVMX).
- -                      Default is 0 (disabled)
+ +                      [KVM,Intel] Control nested virtualization feature in
+ +                      KVM/VMX. Default is 1 (enabled).
   
         kvm-intel.unrestricted_guest=
- -                      [KVM,Intel] Disable unrestricted guest feature
- -                      (virtualized real and unpaged mode) on capable
- -                      Intel chips. Default is 1 (enabled)
+ +                      [KVM,Intel] Control KVM's use of unrestricted guest
+ +                      feature (virtualized real and unpaged mode). Default
+ +                      is 1 (enabled). Disable by KVM if EPT is disabled or
+ +                      hardware lacks support for it.
   
         kvm-intel.vmentry_l1d_flush=[KVM,Intel] Mitigation for L1 Terminal Fault
                         CVE-2018-3620.
@@@ -2648,10 -2640,9 +2649,10 @@@
   
                         Default is cond (do L1 cache flush in specific instances)
   
- -      kvm-intel.vpid= [KVM,Intel] Disable Virtual Processor Identification
- -                      feature (tagged TLBs) on capable Intel chips.
- -                      Default is 1 (enabled)
+ +      kvm-intel.vpid= [KVM,Intel] Control KVM's use of Virtual Processor
+ +                      Identification feature (tagged TLBs). Default is 1
+ +                      (enabled). Disable by KVM if hardware lacks support
+ +                      for it.
   
         l1d_flush=      [X86,INTEL]
                         Control mitigation for L1D based snooping vulnerability.
@@@ -3433,10 -3424,6 +3434,10 @@@
                         [HW] Make the MicroTouch USB driver use raw coordinates
                         ('y', default) or cooked coordinates ('n')
   
+ +      mtrr=debug      [X86]
+ +                      Enable printing debug information related to MTRR
+ +                      registers at boot time.
+ +
         mtrr_chunk_size=nn[KMG] [X86]
                         used for mtrr cleanup. It is largest continuous chunk
                         that could hold holes aka. UC entries.
@@@ -3716,8 -3703,8 +3717,8 @@@
   
         nohibernate     [HIBERNATION] Disable hibernation and resume.
   
- -      nohlt           [ARM,ARM64,MICROBLAZE,SH] Forces the kernel to busy wait
- -                      in do_idle() and not use the arch_cpu_idle()
+ +      nohlt           [ARM,ARM64,MICROBLAZE,MIPS,SH] Forces the kernel to
+ +                      busy wait in do_idle() and not use the arch_cpu_idle()
                         implementation; requires CONFIG_GENERIC_IDLE_POLL_SETUP
                         to be effective. This is useful on platforms where the
                         sleep(SH) or wfi(ARM,ARM64) instructions do not work
@@@ -3852,7 -3839,7 +3853,7 @@@
         nosmp           [SMP] Tells an SMP kernel to act as a UP kernel,
                         and disable the IO APIC.  legacy for "maxcpus=0".
   
- -      nosmt           [KNL,S390] Disable symmetric multithreading (SMT).
+ +      nosmt           [KNL,MIPS,S390] Disable symmetric multithreading (SMT).
                         Equivalent to smt=1.
   
                         [KNL,X86] Disable symmetric multithreading (SMT).
@@@ -4750,6 -4737,43 +4751,6 @@@
                         the propagation of recent CPU-hotplug changes up
                         the rcu_node combining tree.
   
- -      rcutree.use_softirq=    [KNL]
- -                      If set to zero, move all RCU_SOFTIRQ processing to
- -                      per-CPU rcuc kthreads.  Defaults to a non-zero
- -                      value, meaning that RCU_SOFTIRQ is used by default.
- -                      Specify rcutree.use_softirq=0 to use rcuc kthreads.
- -
- -                      But note that CONFIG_PREEMPT_RT=y kernels disable
- -                      this kernel boot parameter, forcibly setting it
- -                      to zero.
- -
- -      rcutree.rcu_fanout_exact= [KNL]
- -                      Disable autobalancing of the rcu_node combining
- -                      tree.  This is used by rcutorture, and might
- -                      possibly be useful for architectures having high
- -                      cache-to-cache transfer latencies.
- -
- -      rcutree.rcu_fanout_leaf= [KNL]
- -                      Change the number of CPUs assigned to each
- -                      leaf rcu_node structure.  Useful for very
- -                      large systems, which will choose the value 64,
- -                      and for NUMA systems with large remote-access
- -                      latencies, which will choose a value aligned
- -                      with the appropriate hardware boundaries.
- -
- -      rcutree.rcu_min_cached_objs= [KNL]
- -                      Minimum number of objects which are cached and
- -                      maintained per one CPU. Object size is equal
- -                      to PAGE_SIZE. The cache allows to reduce the
- -                      pressure to page allocator, also it makes the
- -                      whole algorithm to behave better in low memory
- -                      condition.
- -
- -      rcutree.rcu_delay_page_cache_fill_msec= [KNL]
- -                      Set the page-cache refill delay (in milliseconds)
- -                      in response to low-memory conditions.  The range
- -                      of permitted values is in the range 0:100000.
- -
         rcutree.jiffies_till_first_fqs= [KNL]
                         Set delay from grace-period initialization to
                         first attempt to force quiescent states.
@@@ -4788,6 -4812,21 +4789,6 @@@
                         When RCU_NOCB_CPU is set, also adjust the
                         priority of NOCB callback kthreads.
   
- -      rcutree.rcu_divisor= [KNL]
- -                      Set the shift-right count to use to compute
- -                      the callback-invocation batch limit bl from
- -                      the number of callbacks queued on this CPU.
- -                      The result will be bounded below by the value of
- -                      the rcutree.blimit kernel parameter.  Every bl
- -                      callbacks, the softirq handler will exit in
- -                      order to allow the CPU to do other work.
- -
- -                      Please note that this callback-invocation batch
- -                      limit applies only to non-offloaded callback
- -                      invocation.  Offloaded callbacks are instead
- -                      invoked in the context of an rcuoc kthread, which
- -                      scheduler will preempt as it does any other task.
- -
         rcutree.nocb_nobypass_lim_per_jiffy= [KNL]
                         On callback-offloaded (rcu_nocbs) CPUs,
                         RCU reduces the lock contention that would
@@@ -4801,6 -4840,14 +4802,6 @@@
                         the ->nocb_bypass queue.  The definition of "too
                         many" is supplied by this kernel boot parameter.
   
- -      rcutree.rcu_nocb_gp_stride= [KNL]
- -                      Set the number of NOCB callback kthreads in
- -                      each group, which defaults to the square root
- -                      of the number of CPUs.  Larger numbers reduce
- -                      the wakeup overhead on the global grace-period
- -                      kthread, but increases that same overhead on
- -                      each group's NOCB grace-period kthread.
- -
         rcutree.qhimark= [KNL]
                         Set threshold of queued RCU callbacks beyond which
                         batch limiting is disabled.
@@@ -4818,56 -4865,6 +4819,56 @@@
                         on rcutree.qhimark at boot time and to zero to
                         disable more aggressive help enlistment.
   
+ +      rcutree.rcu_delay_page_cache_fill_msec= [KNL]
+ +                      Set the page-cache refill delay (in milliseconds)
+ +                      in response to low-memory conditions.  The range
+ +                      of permitted values is in the range 0:100000.
+ +
+ +      rcutree.rcu_divisor= [KNL]
+ +                      Set the shift-right count to use to compute
+ +                      the callback-invocation batch limit bl from
+ +                      the number of callbacks queued on this CPU.
+ +                      The result will be bounded below by the value of
+ +                      the rcutree.blimit kernel parameter.  Every bl
+ +                      callbacks, the softirq handler will exit in
+ +                      order to allow the CPU to do other work.
+ +
+ +                      Please note that this callback-invocation batch
+ +                      limit applies only to non-offloaded callback
+ +                      invocation.  Offloaded callbacks are instead
+ +                      invoked in the context of an rcuoc kthread, which
+ +                      scheduler will preempt as it does any other task.
+ +
+ +      rcutree.rcu_fanout_exact= [KNL]
+ +                      Disable autobalancing of the rcu_node combining
+ +                      tree.  This is used by rcutorture, and might
+ +                      possibly be useful for architectures having high
+ +                      cache-to-cache transfer latencies.
+ +
+ +      rcutree.rcu_fanout_leaf= [KNL]
+ +                      Change the number of CPUs assigned to each
+ +                      leaf rcu_node structure.  Useful for very
+ +                      large systems, which will choose the value 64,
+ +                      and for NUMA systems with large remote-access
+ +                      latencies, which will choose a value aligned
+ +                      with the appropriate hardware boundaries.
+ +
+ +      rcutree.rcu_min_cached_objs= [KNL]
+ +                      Minimum number of objects which are cached and
+ +                      maintained per one CPU. Object size is equal
+ +                      to PAGE_SIZE. The cache allows to reduce the
+ +                      pressure to page allocator, also it makes the
+ +                      whole algorithm to behave better in low memory
+ +                      condition.
+ +
+ +      rcutree.rcu_nocb_gp_stride= [KNL]
+ +                      Set the number of NOCB callback kthreads in
+ +                      each group, which defaults to the square root
+ +                      of the number of CPUs.  Larger numbers reduce
+ +                      the wakeup overhead on the global grace-period
+ +                      kthread, but increases that same overhead on
+ +                      each group's NOCB grace-period kthread.
+ +
         rcutree.rcu_kick_kthreads= [KNL]
                         Cause the grace-period kthread to get an extra
                         wake_up() if it sleeps three times longer than
@@@ -4875,13 -4872,6 +4876,13 @@@
                         This wake_up() will be accompanied by a
                         WARN_ONCE() splat and an ftrace_dump().
   
+ +      rcutree.rcu_resched_ns= [KNL]
+ +                      Limit the time spend invoking a batch of RCU
+ +                      callbacks to the specified number of nanoseconds.
+ +                      By default, this limit is checked only once
+ +                      every 32 callbacks in order to limit the pain
+ +                      inflicted by local_clock() overhead.
+ +
         rcutree.rcu_unlock_delay= [KNL]
                         In CONFIG_RCU_STRICT_GRACE_PERIOD=y kernels,
                         this specifies an rcu_read_unlock()-time delay
@@@ -4896,16 -4886,6 +4897,16 @@@
                         rcu_node tree with an eye towards determining
                         why a new grace period has not yet started.
   
+ +      rcutree.use_softirq=    [KNL]
+ +                      If set to zero, move all RCU_SOFTIRQ processing to
+ +                      per-CPU rcuc kthreads.  Defaults to a non-zero
+ +                      value, meaning that RCU_SOFTIRQ is used by default.
+ +                      Specify rcutree.use_softirq=0 to use rcuc kthreads.
+ +
+ +                      But note that CONFIG_PREEMPT_RT=y kernels disable
+ +                      this kernel boot parameter, forcibly setting it
+ +                      to zero.
+ +
         rcuscale.gp_async= [KNL]
                         Measure performance of asynchronous
                         grace-period primitives such as call_rcu().
@@@ -5108,17 -5088,8 +5109,17 @@@
   
         rcutorture.stall_cpu_block= [KNL]
                         Sleep while stalling if set.  This will result
- -                      in warnings from preemptible RCU in addition
- -                      to any other stall-related activity.
+ +                      in warnings from preemptible RCU in addition to
+ +                      any other stall-related activity.  Note that
+ +                      in kernels built with CONFIG_PREEMPTION=n and
+ +                      CONFIG_PREEMPT_COUNT=y, this parameter will
+ +                      cause the CPU to pass through a quiescent state.
+ +                      Given CONFIG_PREEMPTION=n, this will suppress
+ +                      RCU CPU stall warnings, but will instead result
+ +                      in scheduling-while-atomic splats.
+ +
+ +                      Use of this module parameter results in splats.
+ +
   
         rcutorture.stall_cpu_holdoff= [KNL]
                         Time to wait (s) after boot before inducing stall.
@@@ -5482,12 -5453,7 +5483,12 @@@
                         port and the regular usb controller gets disabled.
   
         root=           [KNL] Root filesystem
- -                      See name_to_dev_t comment in init/do_mounts.c.
+ +                      Usually this a a block device specifier of some kind,
+ +                      see the early_lookup_bdev comment in
+ +                      block/early-lookup.c for details.
+ +                      Alternatively this can be "ram" for the legacy initial
+ +                      ramdisk, "nfs" and "cifs" for root on a network file
+ +                      system, or "mtd" and "ubi" for mounting from raw flash.
   
         rootdelay=      [KNL] Delay (in seconds) to pause before attempting to
                         mount the root filesystem
@@@ -5770,7 -5736,7 +5771,7 @@@
                                 1: Fast pin select (default)
                                 2: ATC IRMode
   
- -      smt=            [KNL,S390] Set the maximum number of threads (logical
+ +      smt=            [KNL,MIPS,S390] Set the maximum number of threads (logical
                         CPUs) to use per physical CPU on systems capable of
                         symmetric multithreading (SMT). Will be capped to the
                         actual hardware limit.
@@@ -6598,12 -6564,6 +6599,12 @@@
         unknown_nmi_panic
                         [X86] Cause panic on unknown NMI.
   
+ +      unwind_debug    [X86-64]
+ +                      Enable unwinder debug output.  This can be
+ +                      useful for debugging certain unwinder error
+ +                      conditions, including corrupt stacks and
+ +                      bad/missing unwinder metadata.
+ +
         usbcore.authorized_default=
                         [USB] Default USB device authorization:
                         (default -1 = authorized except for wireless USB,
@@@ -6972,18 -6932,6 +6973,18 @@@
                         it can be updated at runtime by writing to the
                         corresponding sysfs file.
   
+ +      workqueue.cpu_intensive_thresh_us=
+ +                      Per-cpu work items which run for longer than this
+ +                      threshold are automatically considered CPU intensive
+ +                      and excluded from concurrency management to prevent
+ +                      them from noticeably delaying other per-cpu work
+ +                      items. Default is 10000 (10ms).
+ +
+ +                      If CONFIG_WQ_CPU_INTENSIVE_REPORT is set, the kernel
+ +                      will report the work functions which violate this
+ +                      threshold repeatedly. They are likely good
+ +                      candidates for using WQ_UNBOUND workqueues instead.
+ +
         workqueue.disable_numa
                         By default, all work items queued to unbound
                         workqueues are affine to the NUMA nodes they're
diff --combined Documentation/arch/arm64/silicon-errata.rst

index d6430ade349dda7a7dbcf2ec8da2c9d5e4322760,0000000000000000000000000000000000000000..f093a9d8bc5cac4dfd31e79efe7528ca810d1b66

mode 100644,000000..100644
--- 1/Documentation/arch/arm64/silicon-errata.rst
--- /dev/null
+++ b/Documentation/arch/arm64/silicon-errata.rst
@@@ -1,220 -1,0 +1,224 @@@
+ +=======================================
+ +Silicon Errata and Software Workarounds
+ +=======================================
+ +
+ +Author: Will Deacon <[email protected]>
+ +
+ +Date  : 27 November 2015
+ +
+ +It is an unfortunate fact of life that hardware is often produced with
+ +so-called "errata", which can cause it to deviate from the architecture
+ +under specific circumstances.  For hardware produced by ARM, these
+ +errata are broadly classified into the following categories:
+ +
+ +  ==========  ========================================================
+ +  Category A  A critical error without a viable workaround.
+ +  Category B  A significant or critical error with an acceptable
+ +              workaround.
+ +  Category C  A minor error that is not expected to occur under normal
+ +              operation.
+ +  ==========  ========================================================
+ +
+ +For more information, consult one of the "Software Developers Errata
+ +Notice" documents available on infocenter.arm.com (registration
+ +required).
+ +
+ +As far as Linux is concerned, Category B errata may require some special
+ +treatment in the operating system. For example, avoiding a particular
+ +sequence of code, or configuring the processor in a particular way. A
+ +less common situation may require similar actions in order to declassify
+ +a Category A erratum into a Category C erratum. These are collectively
+ +known as "software workarounds" and are only required in the minority of
+ +cases (e.g. those cases that both require a non-secure workaround *and*
+ +can be triggered by Linux).
+ +
+ +For software workarounds that may adversely impact systems unaffected by
+ +the erratum in question, a Kconfig entry is added under "Kernel
+ +Features" -> "ARM errata workarounds via the alternatives framework".
+ +These are enabled by default and patched in at runtime when an affected
+ +CPU is detected. For less-intrusive workarounds, a Kconfig option is not
+ +available and the code is structured (preferably with a comment) in such
+ +a way that the erratum will not be hit.
+ +
+ +This approach can make it slightly onerous to determine exactly which
+ +errata are worked around in an arbitrary kernel source tree, so this
+ +file acts as a registry of software workarounds in the Linux Kernel and
+ +will be updated when new workarounds are committed and backported to
+ +stable kernels.
+ +
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +| Implementor    | Component       | Erratum ID      | Kconfig                     |
+ ++================+=================+=================+=============================+
+ +| Allwinner      | A64/R18         | UNKNOWN1        | SUN50I_ERRATUM_UNKNOWN1     |
+ ++----------------+-----------------+-----------------+-----------------------------+
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +| ARM            | Cortex-A510     | #2457168        | ARM64_ERRATUM_2457168       |
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +| ARM            | Cortex-A510     | #2064142        | ARM64_ERRATUM_2064142       |
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +| ARM            | Cortex-A510     | #2038923        | ARM64_ERRATUM_2038923       |
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +| ARM            | Cortex-A510     | #1902691        | ARM64_ERRATUM_1902691       |
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +| ARM            | Cortex-A53      | #826319         | ARM64_ERRATUM_826319        |
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +| ARM            | Cortex-A53      | #827319         | ARM64_ERRATUM_827319        |
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +| ARM            | Cortex-A53      | #824069         | ARM64_ERRATUM_824069        |
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +| ARM            | Cortex-A53      | #819472         | ARM64_ERRATUM_819472        |
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +| ARM            | Cortex-A53      | #845719         | ARM64_ERRATUM_845719        |
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +| ARM            | Cortex-A53      | #843419         | ARM64_ERRATUM_843419        |
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +| ARM            | Cortex-A55      | #1024718        | ARM64_ERRATUM_1024718       |
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +| ARM            | Cortex-A55      | #1530923        | ARM64_ERRATUM_1530923       |
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +| ARM            | Cortex-A55      | #2441007        | ARM64_ERRATUM_2441007       |
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +| ARM            | Cortex-A57      | #832075         | ARM64_ERRATUM_832075        |
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +| ARM            | Cortex-A57      | #852523         | N/A                         |
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +| ARM            | Cortex-A57      | #834220         | ARM64_ERRATUM_834220        |
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +| ARM            | Cortex-A57      | #1319537        | ARM64_ERRATUM_1319367       |
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +| ARM            | Cortex-A57      | #1742098        | ARM64_ERRATUM_1742098       |
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +| ARM            | Cortex-A72      | #853709         | N/A                         |
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +| ARM            | Cortex-A72      | #1319367        | ARM64_ERRATUM_1319367       |
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +| ARM            | Cortex-A72      | #1655431        | ARM64_ERRATUM_1742098       |
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +| ARM            | Cortex-A73      | #858921         | ARM64_ERRATUM_858921        |
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +| ARM            | Cortex-A76      | #1188873,1418040| ARM64_ERRATUM_1418040       |
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +| ARM            | Cortex-A76      | #1165522        | ARM64_ERRATUM_1165522       |
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +| ARM            | Cortex-A76      | #1286807        | ARM64_ERRATUM_1286807       |
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +| ARM            | Cortex-A76      | #1463225        | ARM64_ERRATUM_1463225       |
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +| ARM            | Cortex-A77      | #1508412        | ARM64_ERRATUM_1508412       |
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +| ARM            | Cortex-A510     | #2051678        | ARM64_ERRATUM_2051678       |
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +| ARM            | Cortex-A510     | #2077057        | ARM64_ERRATUM_2077057       |
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +| ARM            | Cortex-A510     | #2441009        | ARM64_ERRATUM_2441009       |
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +| ARM            | Cortex-A510     | #2658417        | ARM64_ERRATUM_2658417       |
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +| ARM            | Cortex-A710     | #2119858        | ARM64_ERRATUM_2119858       |
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +| ARM            | Cortex-A710     | #2054223        | ARM64_ERRATUM_2054223       |
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +| ARM            | Cortex-A710     | #2224489        | ARM64_ERRATUM_2224489       |
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +| ARM            | Cortex-A715     | #2645198        | ARM64_ERRATUM_2645198       |
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +| ARM            | Cortex-X2       | #2119858        | ARM64_ERRATUM_2119858       |
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +| ARM            | Cortex-X2       | #2224489        | ARM64_ERRATUM_2224489       |
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +| ARM            | Neoverse-N1     | #1188873,1418040| ARM64_ERRATUM_1418040       |
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +| ARM            | Neoverse-N1     | #1349291        | N/A                         |
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +| ARM            | Neoverse-N1     | #1542419        | ARM64_ERRATUM_1542419       |
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +| ARM            | Neoverse-N2     | #2139208        | ARM64_ERRATUM_2139208       |
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +| ARM            | Neoverse-N2     | #2067961        | ARM64_ERRATUM_2067961       |
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +| ARM            | Neoverse-N2     | #2253138        | ARM64_ERRATUM_2253138       |
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +| ARM            | MMU-500         | #841119,826419  | N/A                         |
+ ++----------------+-----------------+-----------------+-----------------------------+
++| ARM            | MMU-600         | #1076982,1209401| N/A                         |
+++----------------+-----------------+-----------------+-----------------------------+
++| ARM            | MMU-700         | #2268618,2812531| N/A                         |
+++----------------+-----------------+-----------------+-----------------------------+
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +| Broadcom       | Brahma-B53      | N/A             | ARM64_ERRATUM_845719        |
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +| Broadcom       | Brahma-B53      | N/A             | ARM64_ERRATUM_843419        |
+ ++----------------+-----------------+-----------------+-----------------------------+
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +| Cavium         | ThunderX ITS    | #22375,24313    | CAVIUM_ERRATUM_22375        |
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +| Cavium         | ThunderX ITS    | #23144          | CAVIUM_ERRATUM_23144        |
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +| Cavium         | ThunderX GICv3  | #23154,38545    | CAVIUM_ERRATUM_23154        |
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +| Cavium         | ThunderX GICv3  | #38539          | N/A                         |
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +| Cavium         | ThunderX Core   | #27456          | CAVIUM_ERRATUM_27456        |
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +| Cavium         | ThunderX Core   | #30115          | CAVIUM_ERRATUM_30115        |
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +| Cavium         | ThunderX SMMUv2 | #27704          | N/A                         |
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +| Cavium         | ThunderX2 SMMUv3| #74             | N/A                         |
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +| Cavium         | ThunderX2 SMMUv3| #126            | N/A                         |
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +| Cavium         | ThunderX2 Core  | #219            | CAVIUM_TX2_ERRATUM_219      |
+ ++----------------+-----------------+-----------------+-----------------------------+
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +| Marvell        | ARM-MMU-500     | #582743         | N/A                         |
+ ++----------------+-----------------+-----------------+-----------------------------+
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +| NVIDIA         | Carmel Core     | N/A             | NVIDIA_CARMEL_CNP_ERRATUM   |
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +| NVIDIA         | T241 GICv3/4.x  | T241-FABRIC-4   | N/A                         |
+ ++----------------+-----------------+-----------------+-----------------------------+
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +| Freescale/NXP  | LS2080A/LS1043A | A-008585        | FSL_ERRATUM_A008585         |
+ ++----------------+-----------------+-----------------+-----------------------------+
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +| Hisilicon      | Hip0{5,6,7}     | #161010101      | HISILICON_ERRATUM_161010101 |
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +| Hisilicon      | Hip0{6,7}       | #161010701      | N/A                         |
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +| Hisilicon      | Hip0{6,7}       | #161010803      | N/A                         |
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +| Hisilicon      | Hip07           | #161600802      | HISILICON_ERRATUM_161600802 |
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +| Hisilicon      | Hip08 SMMU PMCG | #162001800      | N/A                         |
+ ++----------------+-----------------+-----------------+-----------------------------+
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +| Qualcomm Tech. | Kryo/Falkor v1  | E1003           | QCOM_FALKOR_ERRATUM_1003    |
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +| Qualcomm Tech. | Kryo/Falkor v1  | E1009           | QCOM_FALKOR_ERRATUM_1009    |
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +| Qualcomm Tech. | QDF2400 ITS     | E0065           | QCOM_QDF2400_ERRATUM_0065   |
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +| Qualcomm Tech. | Falkor v{1,2}   | E1041           | QCOM_FALKOR_ERRATUM_1041    |
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +| Qualcomm Tech. | Kryo4xx Gold    | N/A             | ARM64_ERRATUM_1463225       |
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +| Qualcomm Tech. | Kryo4xx Gold    | N/A             | ARM64_ERRATUM_1418040       |
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +| Qualcomm Tech. | Kryo4xx Silver  | N/A             | ARM64_ERRATUM_1530923       |
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +| Qualcomm Tech. | Kryo4xx Silver  | N/A             | ARM64_ERRATUM_1024718       |
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +| Qualcomm Tech. | Kryo4xx Gold    | N/A             | ARM64_ERRATUM_1286807       |
+ ++----------------+-----------------+-----------------+-----------------------------+
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +| Rockchip       | RK3588          | #3588001        | ROCKCHIP_ERRATUM_3588001    |
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +| Fujitsu        | A64FX           | E#010001        | FUJITSU_ERRATUM_010001      |
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +
+ ++----------------+-----------------+-----------------+-----------------------------+
+ +| ASR            | ASR8601         | #8601001        | N/A                         |
+ ++----------------+-----------------+-----------------+-----------------------------+
diff --combined drivers/iommu/amd/amd_iommu_types.h

index ab8aa8f77cc47d20d6385413d9873af17d5f54e5,53443d6f7b13e1f66099f7a55f9d721c3b8d08d3..dc1db6167927910939cdf9d5d749fdbeedf9e5bd
--- 1/drivers/iommu/amd/amd_iommu_types.h
--- 2/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@@ -84,21 -84,21 +84,21 @@@
   
   
   /* Extended Feature Bits */
- #define FEATURE_PREFETCH      (1ULL<<0)
- #define FEATURE_PPR           (1ULL<<1)
- #define FEATURE_X2APIC                (1ULL<<2)
- #define FEATURE_NX            (1ULL<<3)
- #define FEATURE_GT            (1ULL<<4)
- #define FEATURE_IA            (1ULL<<6)
- #define FEATURE_GA            (1ULL<<7)
- #define FEATURE_HE            (1ULL<<8)
- #define FEATURE_PC            (1ULL<<9)
+ #define FEATURE_PREFETCH      BIT_ULL(0)
+ #define FEATURE_PPR           BIT_ULL(1)
+ #define FEATURE_X2APIC                BIT_ULL(2)
+ #define FEATURE_NX            BIT_ULL(3)
+ #define FEATURE_GT            BIT_ULL(4)
+ #define FEATURE_IA            BIT_ULL(6)
+ #define FEATURE_GA            BIT_ULL(7)
+ #define FEATURE_HE            BIT_ULL(8)
+ #define FEATURE_PC            BIT_ULL(9)
   #define FEATURE_GATS_SHIFT    (12)
   #define FEATURE_GATS_MASK     (3ULL)
- #define FEATURE_GAM_VAPIC     (1ULL<<21)
- #define FEATURE_GIOSUP                (1ULL<<48)
- #define FEATURE_EPHSUP                (1ULL<<50)
- #define FEATURE_SNP           (1ULL<<63)
+ #define FEATURE_GAM_VAPIC     BIT_ULL(21)
+ #define FEATURE_GIOSUP                BIT_ULL(48)
+ #define FEATURE_EPHSUP                BIT_ULL(50)
+ #define FEATURE_SNP           BIT_ULL(63)
   
   #define FEATURE_PASID_SHIFT   32
   #define FEATURE_PASID_MASK    (0x1fULL << FEATURE_PASID_SHIFT)
@@@ -120,13 -120,13 +120,13 @@@
   #define PASID_MASK            0x0000ffff
   
   /* MMIO status bits */
- #define MMIO_STATUS_EVT_OVERFLOW_INT_MASK     (1 << 0)
- #define MMIO_STATUS_EVT_INT_MASK      (1 << 1)
- #define MMIO_STATUS_COM_WAIT_INT_MASK (1 << 2)
- #define MMIO_STATUS_PPR_INT_MASK      (1 << 6)
- #define MMIO_STATUS_GALOG_RUN_MASK    (1 << 8)
- #define MMIO_STATUS_GALOG_OVERFLOW_MASK       (1 << 9)
- #define MMIO_STATUS_GALOG_INT_MASK    (1 << 10)
+ #define MMIO_STATUS_EVT_OVERFLOW_INT_MASK     BIT(0)
+ #define MMIO_STATUS_EVT_INT_MASK              BIT(1)
+ #define MMIO_STATUS_COM_WAIT_INT_MASK         BIT(2)
+ #define MMIO_STATUS_PPR_INT_MASK              BIT(6)
+ #define MMIO_STATUS_GALOG_RUN_MASK            BIT(8)
+ #define MMIO_STATUS_GALOG_OVERFLOW_MASK               BIT(9)
+ #define MMIO_STATUS_GALOG_INT_MASK            BIT(10)
   
   /* event logging constants */
   #define EVENT_ENTRY_SIZE      0x10
@@@ -174,6 -174,7 +174,7 @@@
   #define CONTROL_GAINT_EN      29
   #define CONTROL_XT_EN         50
   #define CONTROL_INTCAPXT_EN   51
+ #define CONTROL_IRTCACHEDIS   59
   #define CONTROL_SNPAVIC_EN    61
   
   #define CTRL_INV_TO_MASK      (7 << CONTROL_INV_TIMEOUT)
@@@ -283,7 -284,7 +284,7 @@@
   #define AMD_IOMMU_PGSIZES_V2  (PAGE_SIZE | (1ULL << 21) | (1ULL << 30))
   
   /* Bit value definition for dte irq remapping fields*/
- #define DTE_IRQ_PHYS_ADDR_MASK        (((1ULL << 45)-1) << 6)
+ #define DTE_IRQ_PHYS_ADDR_MASK                GENMASK_ULL(51, 6)
   #define DTE_IRQ_REMAP_INTCTL_MASK     (0x3ULL << 60)
   #define DTE_IRQ_REMAP_INTCTL    (2ULL << 60)
   #define DTE_IRQ_REMAP_ENABLE    1ULL
@@@ -369,23 -370,23 +370,23 @@@
   /*
    * Bit value definition for I/O PTE fields
    */
- #define IOMMU_PTE_PR (1ULL << 0)
- #define IOMMU_PTE_U  (1ULL << 59)
- #define IOMMU_PTE_FC (1ULL << 60)
- #define IOMMU_PTE_IR (1ULL << 61)
- #define IOMMU_PTE_IW (1ULL << 62)
+ #define IOMMU_PTE_PR  BIT_ULL(0)
+ #define IOMMU_PTE_U   BIT_ULL(59)
+ #define IOMMU_PTE_FC  BIT_ULL(60)
+ #define IOMMU_PTE_IR  BIT_ULL(61)
+ #define IOMMU_PTE_IW  BIT_ULL(62)
   
   /*
    * Bit value definition for DTE fields
    */
- #define DTE_FLAG_V  (1ULL << 0)
- #define DTE_FLAG_TV (1ULL << 1)
- #define DTE_FLAG_IR (1ULL << 61)
- #define DTE_FLAG_IW (1ULL << 62)
- 
- #define DTE_FLAG_IOTLB        (1ULL << 32)
- #define DTE_FLAG_GIOV (1ULL << 54)
- #define DTE_FLAG_GV   (1ULL << 55)
+ #define DTE_FLAG_V    BIT_ULL(0)
+ #define DTE_FLAG_TV   BIT_ULL(1)
+ #define DTE_FLAG_IR   BIT_ULL(61)
+ #define DTE_FLAG_IW   BIT_ULL(62)
+ 
+ #define DTE_FLAG_IOTLB        BIT_ULL(32)
+ #define DTE_FLAG_GIOV BIT_ULL(54)
+ #define DTE_FLAG_GV   BIT_ULL(55)
   #define DTE_FLAG_MASK (0x3ffULL << 32)
   #define DTE_GLX_SHIFT (56)
   #define DTE_GLX_MASK  (3)
@@@ -439,13 -440,13 +440,13 @@@
   #define MAX_DOMAIN_ID 65536
   
   /* Protection domain flags */
- #define PD_DMA_OPS_MASK               (1UL << 0) /* domain used for dma_ops */
- #define PD_DEFAULT_MASK               (1UL << 1) /* domain is a default dma_ops
+ #define PD_DMA_OPS_MASK               BIT(0) /* domain used for dma_ops */
+ #define PD_DEFAULT_MASK               BIT(1) /* domain is a default dma_ops
                                               domain for an IOMMU */
- #define PD_PASSTHROUGH_MASK   (1UL << 2) /* domain has no page
+ #define PD_PASSTHROUGH_MASK   BIT(2) /* domain has no page
                                               translation */
- #define PD_IOMMUV2_MASK               (1UL << 3) /* domain has gcr3 table */
- #define PD_GIOV_MASK          (1UL << 4) /* domain enable GIOV support */
+ #define PD_IOMMUV2_MASK               BIT(3) /* domain has gcr3 table */
+ #define PD_GIOV_MASK          BIT(4) /* domain enable GIOV support */
   
   extern bool amd_iommu_dump;
   #define DUMP_printk(format, arg...)                           \
@@@ -716,6 -717,9 +717,9 @@@ struct amd_iommu 
         /* if one, we need to send a completion wait command */
         bool need_sync;
   
+       /* true if disable irte caching */
+       bool irtcachedis_enabled;
+ 
         /* Handle for IOMMU core code */
         struct iommu_device iommu;
   
@@@ -748,7 -752,7 +752,7 @@@
   
         u32 flags;
         volatile u64 *cmd_sem;
-       u64 cmd_sem_val;
+       atomic64_t cmd_sem_val;
   
   #ifdef CONFIG_AMD_IOMMU_DEBUGFS
         /* DebugFS Info */
@@@ -882,7 -886,7 +886,7 @@@ extern int amd_iommu_max_glx_val
    * This function flushes all internal caches of
    * the IOMMU used by this driver.
    */
- extern void iommu_flush_all_caches(struct amd_iommu *iommu);
+ void iommu_flush_all_caches(struct amd_iommu *iommu);
   
   static inline int get_ioapic_devid(int id)
   {
@@@ -986,13 -990,8 +990,13 @@@ union irte_ga_hi 
   };
   
   struct irte_ga {
- -      union irte_ga_lo lo;
- -      union irte_ga_hi hi;
+ +      union {
+ +              struct {
+ +                      union irte_ga_lo lo;
+ +                      union irte_ga_hi hi;
+ +              };
+ +              u128 irte;
+ +      };
   };
   
   struct irq_2_irte {
@@@ -1006,7 -1005,6 +1010,6 @@@ struct amd_ir_data 
         struct irq_2_irte irq_2_irte;
         struct msi_msg msi_entry;
         void *entry;    /* Pointer to union irte or struct irte_ga */
-       void *ref;      /* Pointer to the actual irte */
   
         /**
          * Store information for activate/de-activate
diff --combined drivers/iommu/amd/iommu.c

index 9ea40960978be36445f1a204ec622ea080ab2ede,dc481080f7222964f0fe325da9f5b8f915a11b9e..c3b58a8389b9bff28db96b42289ff1af7db64dc6
--- 1/drivers/iommu/amd/iommu.c
--- 2/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@@ -1182,11 -1182,11 +1182,11 @@@ static int iommu_completion_wait(struc
         if (!iommu->need_sync)
                 return 0;
   
-       raw_spin_lock_irqsave(&iommu->lock, flags);
- 
-       data = ++iommu->cmd_sem_val;
+       data = atomic64_add_return(1, &iommu->cmd_sem_val);
         build_completion_wait(&cmd, iommu, data);
   
+       raw_spin_lock_irqsave(&iommu->lock, flags);
+ 
         ret = __iommu_queue_command_sync(iommu, &cmd, false);
         if (ret)
                 goto out_unlock;
@@@ -1273,6 -1273,9 +1273,9 @@@ static void amd_iommu_flush_irt_all(str
         u32 devid;
         u16 last_bdf = iommu->pci_seg->last_bdf;
   
+       if (iommu->irtcachedis_enabled)
+               return;
+ 
         for (devid = 0; devid <= last_bdf; devid++)
                 iommu_flush_irt(iommu, devid);
   
@@@ -2313,6 -2316,8 +2316,8 @@@ static bool amd_iommu_capable(struct de
                 return amdr_ivrs_remap_support;
         case IOMMU_CAP_ENFORCE_CACHE_COHERENCY:
                 return true;
+       case IOMMU_CAP_DEFERRED_FLUSH:
+               return true;
         default:
                 break;
         }
@@@ -2822,6 -2827,32 +2827,32 @@@ EXPORT_SYMBOL(amd_iommu_device_info)
   static struct irq_chip amd_ir_chip;
   static DEFINE_SPINLOCK(iommu_table_lock);
   
+ static void iommu_flush_irt_and_complete(struct amd_iommu *iommu, u16 devid)
+ {
+       int ret;
+       u64 data;
+       unsigned long flags;
+       struct iommu_cmd cmd, cmd2;
+ 
+       if (iommu->irtcachedis_enabled)
+               return;
+ 
+       build_inv_irt(&cmd, devid);
+       data = atomic64_add_return(1, &iommu->cmd_sem_val);
+       build_completion_wait(&cmd2, iommu, data);
+ 
+       raw_spin_lock_irqsave(&iommu->lock, flags);
+       ret = __iommu_queue_command_sync(iommu, &cmd, true);
+       if (ret)
+               goto out;
+       ret = __iommu_queue_command_sync(iommu, &cmd2, false);
+       if (ret)
+               goto out;
+       wait_on_sem(iommu, data);
+ out:
+       raw_spin_unlock_irqrestore(&iommu->lock, flags);
+ }
+ 
   static void set_dte_irq_entry(struct amd_iommu *iommu, u16 devid,
                               struct irq_remap_table *table)
   {
@@@ -3021,12 -3052,12 +3052,12 @@@ out
   }
   
   static int modify_irte_ga(struct amd_iommu *iommu, u16 devid, int index,
-                         struct irte_ga *irte, struct amd_ir_data *data)
+                         struct irte_ga *irte)
   {
- -      bool ret;
         struct irq_remap_table *table;
- -      unsigned long flags;
         struct irte_ga *entry;
+ +      unsigned long flags;
+ +      u128 old;
   
         table = get_irq_table(iommu, devid);
         if (!table)
@@@ -3037,22 -3068,20 +3068,18 @@@
         entry = (struct irte_ga *)table->table;
         entry = &entry[index];
   
- -      ret = cmpxchg_double(&entry->lo.val, &entry->hi.val,
- -                           entry->lo.val, entry->hi.val,
- -                           irte->lo.val, irte->hi.val);
         /*
          * We use cmpxchg16 to atomically update the 128-bit IRTE,
          * and it cannot be updated by the hardware or other processors
          * behind us, so the return value of cmpxchg16 should be the
          * same as the old value.
          */
- -      WARN_ON(!ret);
+ +      old = entry->irte;
+ +      WARN_ON(!try_cmpxchg128(&entry->irte, &old, irte->irte));
   
-       if (data)
-               data->ref = entry;
- 
         raw_spin_unlock_irqrestore(&table->lock, flags);
   
-       iommu_flush_irt(iommu, devid);
-       iommu_completion_wait(iommu);
+       iommu_flush_irt_and_complete(iommu, devid);
   
         return 0;
   }
@@@ -3071,8 -3100,7 +3098,7 @@@ static int modify_irte(struct amd_iomm
         table->table[index] = irte->val;
         raw_spin_unlock_irqrestore(&table->lock, flags);
   
-       iommu_flush_irt(iommu, devid);
-       iommu_completion_wait(iommu);
+       iommu_flush_irt_and_complete(iommu, devid);
   
         return 0;
   }
@@@ -3090,8 -3118,7 +3116,7 @@@ static void free_irte(struct amd_iommu 
         iommu->irte_ops->clear_allocated(table, index);
         raw_spin_unlock_irqrestore(&table->lock, flags);
   
-       iommu_flush_irt(iommu, devid);
-       iommu_completion_wait(iommu);
+       iommu_flush_irt_and_complete(iommu, devid);
   }
   
   static void irte_prepare(void *entry,
@@@ -3137,7 -3164,7 +3162,7 @@@ static void irte_ga_activate(struct amd
         struct irte_ga *irte = (struct irte_ga *) entry;
   
         irte->lo.fields_remap.valid = 1;
-       modify_irte_ga(iommu, devid, index, irte, NULL);
+       modify_irte_ga(iommu, devid, index, irte);
   }
   
   static void irte_deactivate(struct amd_iommu *iommu, void *entry, u16 devid, u16 index)
@@@ -3153,7 -3180,7 +3178,7 @@@ static void irte_ga_deactivate(struct a
         struct irte_ga *irte = (struct irte_ga *) entry;
   
         irte->lo.fields_remap.valid = 0;
-       modify_irte_ga(iommu, devid, index, irte, NULL);
+       modify_irte_ga(iommu, devid, index, irte);
   }
   
   static void irte_set_affinity(struct amd_iommu *iommu, void *entry, u16 devid, u16 index,
@@@ -3177,7 -3204,7 +3202,7 @@@ static void irte_ga_set_affinity(struc
                                         APICID_TO_IRTE_DEST_LO(dest_apicid);
                 irte->hi.fields.destination =
                                         APICID_TO_IRTE_DEST_HI(dest_apicid);
-               modify_irte_ga(iommu, devid, index, irte, NULL);
+               modify_irte_ga(iommu, devid, index, irte);
         }
   }
   
@@@ -3527,7 -3554,7 +3552,7 @@@ int amd_iommu_activate_guest_mode(void 
         entry->lo.fields_vapic.ga_tag      = ir_data->ga_tag;
   
         return modify_irte_ga(ir_data->iommu, ir_data->irq_2_irte.devid,
-                             ir_data->irq_2_irte.index, entry, ir_data);
+                             ir_data->irq_2_irte.index, entry);
   }
   EXPORT_SYMBOL(amd_iommu_activate_guest_mode);
   
@@@ -3557,7 -3584,7 +3582,7 @@@ int amd_iommu_deactivate_guest_mode(voi
                                 APICID_TO_IRTE_DEST_HI(cfg->dest_apicid);
   
         return modify_irte_ga(ir_data->iommu, ir_data->irq_2_irte.devid,
-                             ir_data->irq_2_irte.index, entry, ir_data);
+                             ir_data->irq_2_irte.index, entry);
   }
   EXPORT_SYMBOL(amd_iommu_deactivate_guest_mode);
   
@@@ -3719,44 -3746,26 +3744,26 @@@ int amd_iommu_create_irq_domain(struct 
   
   int amd_iommu_update_ga(int cpu, bool is_run, void *data)
   {
-       unsigned long flags;
-       struct amd_iommu *iommu;
-       struct irq_remap_table *table;
         struct amd_ir_data *ir_data = (struct amd_ir_data *)data;
-       int devid = ir_data->irq_2_irte.devid;
         struct irte_ga *entry = (struct irte_ga *) ir_data->entry;
-       struct irte_ga *ref = (struct irte_ga *) ir_data->ref;
   
         if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) ||
-           !ref || !entry || !entry->lo.fields_vapic.guest_mode)
+           !entry || !entry->lo.fields_vapic.guest_mode)
                 return 0;
   
-       iommu = ir_data->iommu;
-       if (!iommu)
+       if (!ir_data->iommu)
                 return -ENODEV;
   
-       table = get_irq_table(iommu, devid);
-       if (!table)
-               return -ENODEV;
- 
-       raw_spin_lock_irqsave(&table->lock, flags);
- 
-       if (ref->lo.fields_vapic.guest_mode) {
-               if (cpu >= 0) {
-                       ref->lo.fields_vapic.destination =
-                                               APICID_TO_IRTE_DEST_LO(cpu);
-                       ref->hi.fields.destination =
-                                               APICID_TO_IRTE_DEST_HI(cpu);
-               }
-               ref->lo.fields_vapic.is_run = is_run;
-               barrier();
+       if (cpu >= 0) {
+               entry->lo.fields_vapic.destination =
+                                       APICID_TO_IRTE_DEST_LO(cpu);
+               entry->hi.fields.destination =
+                                       APICID_TO_IRTE_DEST_HI(cpu);
         }
+       entry->lo.fields_vapic.is_run = is_run;
   
-       raw_spin_unlock_irqrestore(&table->lock, flags);
- 
-       iommu_flush_irt(iommu, devid);
-       iommu_completion_wait(iommu);
-       return 0;
+       return modify_irte_ga(ir_data->iommu, ir_data->irq_2_irte.devid,
+                             ir_data->irq_2_irte.index, entry);
   }
   EXPORT_SYMBOL(amd_iommu_update_ga);
   #endif
diff --combined drivers/iommu/dma-iommu.c

index e86ae462cade0b5def446d573c54764fbc194da4,c4bdd2587daf34e5d51310ca169f2189188987f4..e577241638350c2e7fd0f4b3eefedc631a4aeace
--- 1/drivers/iommu/dma-iommu.c
--- 2/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@@ -520,38 -520,9 +520,38 @@@ static bool dev_is_untrusted(struct dev
         return dev_is_pci(dev) && to_pci_dev(dev)->untrusted;
   }
   
- -static bool dev_use_swiotlb(struct device *dev)
+ +static bool dev_use_swiotlb(struct device *dev, size_t size,
+ +                          enum dma_data_direction dir)
   {
- -      return IS_ENABLED(CONFIG_SWIOTLB) && dev_is_untrusted(dev);
+ +      return IS_ENABLED(CONFIG_SWIOTLB) &&
+ +              (dev_is_untrusted(dev) ||
+ +               dma_kmalloc_needs_bounce(dev, size, dir));
+ +}
+ +
+ +static bool dev_use_sg_swiotlb(struct device *dev, struct scatterlist *sg,
+ +                             int nents, enum dma_data_direction dir)
+ +{
+ +      struct scatterlist *s;
+ +      int i;
+ +
+ +      if (!IS_ENABLED(CONFIG_SWIOTLB))
+ +              return false;
+ +
+ +      if (dev_is_untrusted(dev))
+ +              return true;
+ +
+ +      /*
+ +       * If kmalloc() buffers are not DMA-safe for this device and
+ +       * direction, check the individual lengths in the sg list. If any
+ +       * element is deemed unsafe, use the swiotlb for bouncing.
+ +       */
+ +      if (!dma_kmalloc_safe(dev, dir)) {
+ +              for_each_sg(sg, s, nents, i)
+ +                      if (!dma_kmalloc_size_aligned(s->length))
+ +                              return true;
+ +      }
+ +
+ +      return false;
   }
   
   /**
@@@ -615,7 -586,8 +615,8 @@@ static int iommu_dma_init_domain(struc
                 goto done_unlock;
   
         /* If the FQ fails we can simply fall back to strict mode */
-       if (domain->type == IOMMU_DOMAIN_DMA_FQ && iommu_dma_init_fq(domain))
+       if (domain->type == IOMMU_DOMAIN_DMA_FQ &&
+           (!device_iommu_capable(dev, IOMMU_CAP_DEFERRED_FLUSH) || iommu_dma_init_fq(domain)))
                 domain->type = IOMMU_DOMAIN_DMA;
   
         ret = iova_reserve_iommu_regions(dev, domain);
@@@ -951,7 -923,7 +952,7 @@@ static void iommu_dma_sync_single_for_c
   {
         phys_addr_t phys;
   
- -      if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev))
+ +      if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev, size, dir))
                 return;
   
         phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
@@@ -967,7 -939,7 +968,7 @@@ static void iommu_dma_sync_single_for_d
   {
         phys_addr_t phys;
   
- -      if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev))
+ +      if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev, size, dir))
                 return;
   
         phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
@@@ -985,7 -957,7 +986,7 @@@ static void iommu_dma_sync_sg_for_cpu(s
         struct scatterlist *sg;
         int i;
   
- -      if (dev_use_swiotlb(dev))
+ +      if (sg_dma_is_swiotlb(sgl))
                 for_each_sg(sgl, sg, nelems, i)
                         iommu_dma_sync_single_for_cpu(dev, sg_dma_address(sg),
                                                       sg->length, dir);
@@@ -1001,7 -973,7 +1002,7 @@@ static void iommu_dma_sync_sg_for_devic
         struct scatterlist *sg;
         int i;
   
- -      if (dev_use_swiotlb(dev))
+ +      if (sg_dma_is_swiotlb(sgl))
                 for_each_sg(sgl, sg, nelems, i)
                         iommu_dma_sync_single_for_device(dev,
                                                          sg_dma_address(sg),
@@@ -1027,8 -999,7 +1028,8 @@@ static dma_addr_t iommu_dma_map_page(st
          * If both the physical buffer start address and size are
          * page aligned, we don't need to use a bounce page.
          */
- -      if (dev_use_swiotlb(dev) && iova_offset(iovad, phys | size)) {
+ +      if (dev_use_swiotlb(dev, size, dir) &&
+ +          iova_offset(iovad, phys | size)) {
                 void *padding_start;
                 size_t padding_size, aligned_size;
   
@@@ -1110,7 -1081,7 +1111,7 @@@ static int __finalise_sg(struct device 
                 sg_dma_address(s) = DMA_MAPPING_ERROR;
                 sg_dma_len(s) = 0;
   
- -              if (sg_is_dma_bus_address(s)) {
+ +              if (sg_dma_is_bus_address(s)) {
                         if (i > 0)
                                 cur = sg_next(cur);
   
@@@ -1166,7 -1137,7 +1167,7 @@@ static void __invalidate_sg(struct scat
         int i;
   
         for_each_sg(sg, s, nents, i) {
- -              if (sg_is_dma_bus_address(s)) {
+ +              if (sg_dma_is_bus_address(s)) {
                         sg_dma_unmark_bus_address(s);
                 } else {
                         if (sg_dma_address(s) != DMA_MAPPING_ERROR)
@@@ -1196,8 -1167,6 +1197,8 @@@ static int iommu_dma_map_sg_swiotlb(str
         struct scatterlist *s;
         int i;
   
+ +      sg_dma_mark_swiotlb(sg);
+ +
         for_each_sg(sg, s, nents, i) {
                 sg_dma_address(s) = iommu_dma_map_page(dev, sg_page(s),
                                 s->offset, s->length, dir, attrs);
@@@ -1242,7 -1211,7 +1243,7 @@@ static int iommu_dma_map_sg(struct devi
                         goto out;
         }
   
- -      if (dev_use_swiotlb(dev))
+ +      if (dev_use_sg_swiotlb(dev, sg, nents, dir))
                 return iommu_dma_map_sg_swiotlb(dev, sg, nents, dir, attrs);
   
         if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
@@@ -1347,7 -1316,7 +1348,7 @@@ static void iommu_dma_unmap_sg(struct d
         struct scatterlist *tmp;
         int i;
   
- -      if (dev_use_swiotlb(dev)) {
+ +      if (sg_dma_is_swiotlb(sg)) {
                 iommu_dma_unmap_sg_swiotlb(dev, sg, nents, dir, attrs);
                 return;
         }
@@@ -1361,7 -1330,7 +1362,7 @@@
          * just have to be determined.
          */
         for_each_sg(sg, tmp, nents, i) {
- -              if (sg_is_dma_bus_address(tmp)) {
+ +              if (sg_dma_is_bus_address(tmp)) {
                         sg_dma_unmark_bus_address(tmp);
                         continue;
                 }
@@@ -1375,7 -1344,7 +1376,7 @@@
   
         nents -= i;
         for_each_sg(tmp, tmp, nents, i) {
- -              if (sg_is_dma_bus_address(tmp)) {
+ +              if (sg_dma_is_bus_address(tmp)) {
                         sg_dma_unmark_bus_address(tmp);
                         continue;
                 }
diff --combined drivers/iommu/iommu.c

index eb620552967b6d682471cea85f4fa87d94dfa53e,9e0228ef612b85dee3c664a09c928020fca3c844..da340f11c5f5b6e9bc03824594c4a78f18067f1f
--- 1/drivers/iommu/iommu.c
--- 2/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@@ -68,6 -68,10 +68,10 @@@ struct group_device 
         char *name;
   };
   
+ /* Iterate over each struct group_device in a struct iommu_group */
+ #define for_each_group_device(group, pos) \
+       list_for_each_entry(pos, &(group)->devices, list)
+ 
   struct iommu_group_attribute {
         struct attribute attr;
         ssize_t (*show)(struct iommu_group *group, char *buf);
@@@ -89,17 -93,39 +93,39 @@@ static const char * const iommu_group_r
   static int iommu_bus_notifier(struct notifier_block *nb,
                               unsigned long action, void *data);
   static void iommu_release_device(struct device *dev);
- static int iommu_alloc_default_domain(struct iommu_group *group,
-                                     struct device *dev);
   static struct iommu_domain *__iommu_domain_alloc(const struct bus_type *bus,
                                                  unsigned type);
   static int __iommu_attach_device(struct iommu_domain *domain,
                                  struct device *dev);
   static int __iommu_attach_group(struct iommu_domain *domain,
                                 struct iommu_group *group);
+ 
+ enum {
+       IOMMU_SET_DOMAIN_MUST_SUCCEED = 1 << 0,
+ };
+ 
+ static int __iommu_device_set_domain(struct iommu_group *group,
+                                    struct device *dev,
+                                    struct iommu_domain *new_domain,
+                                    unsigned int flags);
+ static int __iommu_group_set_domain_internal(struct iommu_group *group,
+                                            struct iommu_domain *new_domain,
+                                            unsigned int flags);
   static int __iommu_group_set_domain(struct iommu_group *group,
-                                   struct iommu_domain *new_domain);
- static int iommu_create_device_direct_mappings(struct iommu_group *group,
+                                   struct iommu_domain *new_domain)
+ {
+       return __iommu_group_set_domain_internal(group, new_domain, 0);
+ }
+ static void __iommu_group_set_domain_nofail(struct iommu_group *group,
+                                           struct iommu_domain *new_domain)
+ {
+       WARN_ON(__iommu_group_set_domain_internal(
+               group, new_domain, IOMMU_SET_DOMAIN_MUST_SUCCEED));
+ }
+ 
+ static int iommu_setup_default_domain(struct iommu_group *group,
+                                     int target_type);
+ static int iommu_create_device_direct_mappings(struct iommu_domain *domain,
                                                struct device *dev);
   static struct iommu_group *iommu_group_get_for_dev(struct device *dev);
   static ssize_t iommu_group_store_type(struct iommu_group *group,
@@@ -176,16 -202,16 +202,16 @@@ static int __init iommu_subsys_init(voi
         if (!iommu_default_passthrough() && !iommu_dma_strict)
                 iommu_def_domain_type = IOMMU_DOMAIN_DMA_FQ;
   
-       pr_info("Default domain type: %s %s\n",
+       pr_info("Default domain type: %s%s\n",
                 iommu_domain_type_str(iommu_def_domain_type),
                 (iommu_cmd_line & IOMMU_CMD_LINE_DMA_API) ?
-                       "(set via kernel command line)" : "");
+                       " (set via kernel command line)" : "");
   
         if (!iommu_default_passthrough())
-               pr_info("DMA domain TLB invalidation policy: %s mode %s\n",
+               pr_info("DMA domain TLB invalidation policy: %s mode%s\n",
                         iommu_dma_strict ? "strict" : "lazy",
                         (iommu_cmd_line & IOMMU_CMD_LINE_STRICT) ?
-                               "(set via kernel command line)" : "");
+                               " (set via kernel command line)" : "");
   
         nb = kcalloc(ARRAY_SIZE(iommu_buses), sizeof(*nb), GFP_KERNEL);
         if (!nb)
@@@ -343,6 -369,8 +369,8 @@@ static int __iommu_probe_device(struct 
   
         dev->iommu->iommu_dev = iommu_dev;
         dev->iommu->max_pasids = dev_iommu_get_max_pasids(dev);
+       if (ops->is_attach_deferred)
+               dev->iommu->attach_deferred = ops->is_attach_deferred(dev);
   
         group = iommu_group_get_for_dev(dev);
         if (IS_ERR(group)) {
@@@ -377,30 -405,6 +405,6 @@@ err_unlock
         return ret;
   }
   
- static bool iommu_is_attach_deferred(struct device *dev)
- {
-       const struct iommu_ops *ops = dev_iommu_ops(dev);
- 
-       if (ops->is_attach_deferred)
-               return ops->is_attach_deferred(dev);
- 
-       return false;
- }
- 
- static int iommu_group_do_dma_first_attach(struct device *dev, void *data)
- {
-       struct iommu_domain *domain = data;
- 
-       lockdep_assert_held(&dev->iommu_group->mutex);
- 
-       if (iommu_is_attach_deferred(dev)) {
-               dev->iommu->attach_deferred = 1;
-               return 0;
-       }
- 
-       return __iommu_attach_device(domain, dev);
- }
- 
   int iommu_probe_device(struct device *dev)
   {
         const struct iommu_ops *ops;
@@@ -417,29 -421,20 +421,20 @@@
                 goto err_release;
         }
   
-       /*
-        * Try to allocate a default domain - needs support from the
-        * IOMMU driver. There are still some drivers which don't
-        * support default domains, so the return value is not yet
-        * checked.
-        */
         mutex_lock(&group->mutex);
-       iommu_alloc_default_domain(group, dev);
   
-       /*
-        * If device joined an existing group which has been claimed, don't
-        * attach the default domain.
-        */
-       if (group->default_domain && !group->owner) {
-               ret = iommu_group_do_dma_first_attach(dev, group->default_domain);
-               if (ret) {
-                       mutex_unlock(&group->mutex);
-                       iommu_group_put(group);
-                       goto err_release;
-               }
-       }
+       if (group->default_domain)
+               iommu_create_device_direct_mappings(group->default_domain, dev);
   
-       iommu_create_device_direct_mappings(group, dev);
+       if (group->domain) {
+               ret = __iommu_device_set_domain(group, dev, group->domain, 0);
+               if (ret)
+                       goto err_unlock;
+       } else if (!group->default_domain) {
+               ret = iommu_setup_default_domain(group, 0);
+               if (ret)
+                       goto err_unlock;
+       }
   
         mutex_unlock(&group->mutex);
         iommu_group_put(group);
@@@ -450,6 -445,9 +445,9 @@@
   
         return 0;
   
+ err_unlock:
+       mutex_unlock(&group->mutex);
+       iommu_group_put(group);
   err_release:
         iommu_release_device(dev);
   
@@@ -468,7 -466,7 +466,7 @@@ __iommu_group_remove_device(struct iomm
         struct group_device *device;
   
         lockdep_assert_held(&group->mutex);
-       list_for_each_entry(device, &group->devices, list) {
+       for_each_group_device(group, device) {
                 if (device->dev == dev) {
                         list_del(&device->list);
                         return device;
@@@ -707,7 -705,7 +705,7 @@@ int iommu_get_group_resv_regions(struc
         int ret = 0;
   
         mutex_lock(&group->mutex);
-       list_for_each_entry(device, &group->devices, list) {
+       for_each_group_device(group, device) {
                 struct list_head dev_resv_regions;
   
                 /*
@@@ -953,16 -951,15 +951,15 @@@ int iommu_group_set_name(struct iommu_g
   }
   EXPORT_SYMBOL_GPL(iommu_group_set_name);
   
- static int iommu_create_device_direct_mappings(struct iommu_group *group,
+ static int iommu_create_device_direct_mappings(struct iommu_domain *domain,
                                                struct device *dev)
   {
-       struct iommu_domain *domain = group->default_domain;
         struct iommu_resv_region *entry;
         struct list_head mappings;
         unsigned long pg_size;
         int ret = 0;
   
-       if (!domain || !iommu_is_dma_domain(domain))
+       if (!iommu_is_dma_domain(domain))
                 return 0;
   
         BUG_ON(!domain->pgsize_bitmap);
@@@ -1069,25 -1066,13 +1066,13 @@@ rename
   
         mutex_lock(&group->mutex);
         list_add_tail(&device->list, &group->devices);
-       if (group->domain)
-               ret = iommu_group_do_dma_first_attach(dev, group->domain);
         mutex_unlock(&group->mutex);
-       if (ret)
-               goto err_put_group;
- 
         trace_add_device_to_group(group->id, dev);
   
         dev_info(dev, "Adding to iommu group %d\n", group->id);
   
         return 0;
   
- err_put_group:
-       mutex_lock(&group->mutex);
-       list_del(&device->list);
-       mutex_unlock(&group->mutex);
-       dev->iommu_group = NULL;
-       kobject_put(group->devices_kobj);
-       sysfs_remove_link(group->devices_kobj, device->name);
   err_free_name:
         kfree(device->name);
   err_remove_link:
@@@ -1125,31 -1110,6 +1110,6 @@@ void iommu_group_remove_device(struct d
   }
   EXPORT_SYMBOL_GPL(iommu_group_remove_device);
   
- static int iommu_group_device_count(struct iommu_group *group)
- {
-       struct group_device *entry;
-       int ret = 0;
- 
-       list_for_each_entry(entry, &group->devices, list)
-               ret++;
- 
-       return ret;
- }
- 
- static int __iommu_group_for_each_dev(struct iommu_group *group, void *data,
-                                     int (*fn)(struct device *, void *))
- {
-       struct group_device *device;
-       int ret = 0;
- 
-       list_for_each_entry(device, &group->devices, list) {
-               ret = fn(device->dev, data);
-               if (ret)
-                       break;
-       }
-       return ret;
- }
- 
   /**
    * iommu_group_for_each_dev - iterate over each device in the group
    * @group: the group
@@@ -1164,10 -1124,15 +1124,15 @@@
   int iommu_group_for_each_dev(struct iommu_group *group, void *data,
                              int (*fn)(struct device *, void *))
   {
-       int ret;
+       struct group_device *device;
+       int ret = 0;
   
         mutex_lock(&group->mutex);
-       ret = __iommu_group_for_each_dev(group, data, fn);
+       for_each_group_device(group, device) {
+               ret = fn(device->dev, data);
+               if (ret)
+                       break;
+       }
         mutex_unlock(&group->mutex);
   
         return ret;
@@@ -1656,40 -1621,47 +1621,47 @@@ static int iommu_get_def_domain_type(st
         return 0;
   }
   
- static int iommu_group_alloc_default_domain(const struct bus_type *bus,
-                                           struct iommu_group *group,
-                                           unsigned int type)
+ static struct iommu_domain *
+ __iommu_group_alloc_default_domain(const struct bus_type *bus,
+                                  struct iommu_group *group, int req_type)
   {
-       struct iommu_domain *dom;
- 
-       dom = __iommu_domain_alloc(bus, type);
-       if (!dom && type != IOMMU_DOMAIN_DMA) {
-               dom = __iommu_domain_alloc(bus, IOMMU_DOMAIN_DMA);
-               if (dom)
-                       pr_warn("Failed to allocate default IOMMU domain of type %u for group %s - Falling back to IOMMU_DOMAIN_DMA",
-                               type, group->name);
-       }
- 
-       if (!dom)
-               return -ENOMEM;
- 
-       group->default_domain = dom;
-       if (!group->domain)
-               group->domain = dom;
-       return 0;
+       if (group->default_domain && group->default_domain->type == req_type)
+               return group->default_domain;
+       return __iommu_domain_alloc(bus, req_type);
   }
   
- static int iommu_alloc_default_domain(struct iommu_group *group,
-                                     struct device *dev)
+ /*
+  * req_type of 0 means "auto" which means to select a domain based on
+  * iommu_def_domain_type or what the driver actually supports.
+  */
+ static struct iommu_domain *
+ iommu_group_alloc_default_domain(struct iommu_group *group, int req_type)
   {
-       unsigned int type;
+       const struct bus_type *bus =
+               list_first_entry(&group->devices, struct group_device, list)
+                       ->dev->bus;
+       struct iommu_domain *dom;
   
-       if (group->default_domain)
-               return 0;
+       lockdep_assert_held(&group->mutex);
+ 
+       if (req_type)
+               return __iommu_group_alloc_default_domain(bus, group, req_type);
   
-       type = iommu_get_def_domain_type(dev) ? : iommu_def_domain_type;
+       /* The driver gave no guidance on what type to use, try the default */
+       dom = __iommu_group_alloc_default_domain(bus, group, iommu_def_domain_type);
+       if (dom)
+               return dom;
   
-       return iommu_group_alloc_default_domain(dev->bus, group, type);
+       /* Otherwise IDENTITY and DMA_FQ defaults will try DMA */
+       if (iommu_def_domain_type == IOMMU_DOMAIN_DMA)
+               return NULL;
+       dom = __iommu_group_alloc_default_domain(bus, group, IOMMU_DOMAIN_DMA);
+       if (!dom)
+               return NULL;
+ 
+       pr_warn("Failed to allocate default IOMMU domain of type %u for group %s - Falling back to IOMMU_DOMAIN_DMA",
+               iommu_def_domain_type, group->name);
+       return dom;
   }
   
   /**
@@@ -1774,87 -1746,48 +1746,48 @@@ static int iommu_bus_notifier(struct no
         return 0;
   }
   
- struct __group_domain_type {
-       struct device *dev;
-       unsigned int type;
- };
- 
- static int probe_get_default_domain_type(struct device *dev, void *data)
- {
-       struct __group_domain_type *gtype = data;
-       unsigned int type = iommu_get_def_domain_type(dev);
- 
-       if (type) {
-               if (gtype->type && gtype->type != type) {
-                       dev_warn(dev, "Device needs domain type %s, but device %s in the same iommu group requires type %s - using default\n",
-                                iommu_domain_type_str(type),
-                                dev_name(gtype->dev),
-                                iommu_domain_type_str(gtype->type));
-                       gtype->type = 0;
-               }
- 
-               if (!gtype->dev) {
-                       gtype->dev  = dev;
-                       gtype->type = type;
-               }
-       }
- 
-       return 0;
- }
- 
- static void probe_alloc_default_domain(const struct bus_type *bus,
-                                      struct iommu_group *group)
+ /* A target_type of 0 will select the best domain type and cannot fail */
+ static int iommu_get_default_domain_type(struct iommu_group *group,
+                                        int target_type)
   {
-       struct __group_domain_type gtype;
+       int best_type = target_type;
+       struct group_device *gdev;
+       struct device *last_dev;
   
-       memset(&gtype, 0, sizeof(gtype));
- 
-       /* Ask for default domain requirements of all devices in the group */
-       __iommu_group_for_each_dev(group, &gtype,
-                                  probe_get_default_domain_type);
- 
-       if (!gtype.type)
-               gtype.type = iommu_def_domain_type;
+       lockdep_assert_held(&group->mutex);
   
-       iommu_group_alloc_default_domain(bus, group, gtype.type);
+       for_each_group_device(group, gdev) {
+               unsigned int type = iommu_get_def_domain_type(gdev->dev);
   
- }
+               if (best_type && type && best_type != type) {
+                       if (target_type) {
+                               dev_err_ratelimited(
+                                       gdev->dev,
+                                       "Device cannot be in %s domain\n",
+                                       iommu_domain_type_str(target_type));
+                               return -1;
+                       }
   
- static int __iommu_group_dma_first_attach(struct iommu_group *group)
- {
-       return __iommu_group_for_each_dev(group, group->default_domain,
-                                         iommu_group_do_dma_first_attach);
+                       dev_warn(
+                               gdev->dev,
+                               "Device needs domain type %s, but device %s in the same iommu group requires type %s - using default\n",
+                               iommu_domain_type_str(type), dev_name(last_dev),
+                               iommu_domain_type_str(best_type));
+                       return 0;
+               }
+               if (!best_type)
+                       best_type = type;
+               last_dev = gdev->dev;
+       }
+       return best_type;
   }
   
- static int iommu_group_do_probe_finalize(struct device *dev, void *data)
+ static void iommu_group_do_probe_finalize(struct device *dev)
   {
         const struct iommu_ops *ops = dev_iommu_ops(dev);
   
         if (ops->probe_finalize)
                 ops->probe_finalize(dev);
- 
-       return 0;
- }
- 
- static void __iommu_group_dma_finalize(struct iommu_group *group)
- {
-       __iommu_group_for_each_dev(group, group->default_domain,
-                                  iommu_group_do_probe_finalize);
- }
- 
- static int iommu_do_create_direct_mappings(struct device *dev, void *data)
- {
-       struct iommu_group *group = data;
- 
-       iommu_create_device_direct_mappings(group, dev);
- 
-       return 0;
- }
- 
- static int iommu_group_create_direct_mappings(struct iommu_group *group)
- {
-       return __iommu_group_for_each_dev(group, group,
-                                         iommu_do_create_direct_mappings);
   }
   
   int bus_iommu_probe(const struct bus_type *bus)
@@@ -1873,32 -1806,31 +1806,31 @@@
                 return ret;
   
         list_for_each_entry_safe(group, next, &group_list, entry) {
+               struct group_device *gdev;
+ 
                 mutex_lock(&group->mutex);
   
                 /* Remove item from the list */
                 list_del_init(&group->entry);
   
-               /* Try to allocate default domain */
-               probe_alloc_default_domain(bus, group);
- 
-               if (!group->default_domain) {
+               ret = iommu_setup_default_domain(group, 0);
+               if (ret) {
                         mutex_unlock(&group->mutex);
-                       continue;
+                       return ret;
                 }
- 
-               iommu_group_create_direct_mappings(group);
- 
-               ret = __iommu_group_dma_first_attach(group);
- 
                 mutex_unlock(&group->mutex);
   
-               if (ret)
-                       break;
- 
-               __iommu_group_dma_finalize(group);
+               /*
+                * FIXME: Mis-locked because the ops->probe_finalize() call-back
+                * of some IOMMU drivers calls arm_iommu_attach_device() which
+                * in-turn might call back into IOMMU core code, where it tries
+                * to take group->mutex, resulting in a deadlock.
+                */
+               for_each_group_device(group, gdev)
+                       iommu_group_do_probe_finalize(gdev->dev);
         }
   
-       return ret;
+       return 0;
   }
   
   bool iommu_present(const struct bus_type *bus)
@@@ -1946,7 -1878,7 +1878,7 @@@ bool iommu_group_has_isolated_msi(struc
         bool ret = true;
   
         mutex_lock(&group->mutex);
-       list_for_each_entry(group_dev, &group->devices, list)
+       for_each_group_device(group, group_dev)
                 ret &= msi_device_has_isolated_msi(group_dev->dev);
         mutex_unlock(&group->mutex);
         return ret;
@@@ -1980,11 -1912,12 +1912,12 @@@ static struct iommu_domain *__iommu_dom
                                                  unsigned type)
   {
         struct iommu_domain *domain;
+       unsigned int alloc_type = type & IOMMU_DOMAIN_ALLOC_FLAGS;
   
         if (bus == NULL || bus->iommu_ops == NULL)
                 return NULL;
   
-       domain = bus->iommu_ops->domain_alloc(type);
+       domain = bus->iommu_ops->domain_alloc(alloc_type);
         if (!domain)
                 return NULL;
   
@@@ -2028,15 -1961,13 +1961,13 @@@ EXPORT_SYMBOL_GPL(iommu_domain_free)
   static void __iommu_group_set_core_domain(struct iommu_group *group)
   {
         struct iommu_domain *new_domain;
-       int ret;
   
         if (group->owner)
                 new_domain = group->blocking_domain;
         else
                 new_domain = group->default_domain;
   
-       ret = __iommu_group_set_domain(group, new_domain);
-       WARN(ret, "iommu driver failed to attach the default/blocking domain");
+       __iommu_group_set_domain_nofail(group, new_domain);
   }
   
   static int __iommu_attach_device(struct iommu_domain *domain,
@@@ -2082,7 -2013,7 +2013,7 @@@ int iommu_attach_device(struct iommu_do
          */
         mutex_lock(&group->mutex);
         ret = -EINVAL;
-       if (iommu_group_device_count(group) != 1)
+       if (list_count_nodes(&group->devices) != 1)
                 goto out_unlock;
   
         ret = __iommu_attach_group(domain, group);
@@@ -2113,7 -2044,7 +2044,7 @@@ void iommu_detach_device(struct iommu_d
   
         mutex_lock(&group->mutex);
         if (WARN_ON(domain != group->domain) ||
-           WARN_ON(iommu_group_device_count(group) != 1))
+           WARN_ON(list_count_nodes(&group->devices) != 1))
                 goto out_unlock;
         __iommu_group_set_core_domain(group);
   
@@@ -2149,52 -2080,14 +2080,14 @@@ struct iommu_domain *iommu_get_dma_doma
         return dev->iommu_group->default_domain;
   }
   
- /*
-  * IOMMU groups are really the natural working unit of the IOMMU, but
-  * the IOMMU API works on domains and devices.  Bridge that gap by
-  * iterating over the devices in a group.  Ideally we'd have a single
-  * device which represents the requestor ID of the group, but we also
-  * allow IOMMU drivers to create policy defined minimum sets, where
-  * the physical hardware may be able to distiguish members, but we
-  * wish to group them at a higher level (ex. untrusted multi-function
-  * PCI devices).  Thus we attach each device.
-  */
- static int iommu_group_do_attach_device(struct device *dev, void *data)
- {
-       struct iommu_domain *domain = data;
- 
-       return __iommu_attach_device(domain, dev);
- }
- 
   static int __iommu_attach_group(struct iommu_domain *domain,
                                 struct iommu_group *group)
   {
-       int ret;
- 
         if (group->domain && group->domain != group->default_domain &&
             group->domain != group->blocking_domain)
                 return -EBUSY;
   
-       ret = __iommu_group_for_each_dev(group, domain,
-                                        iommu_group_do_attach_device);
-       if (ret == 0) {
-               group->domain = domain;
-       } else {
-               /*
-                * To recover from the case when certain device within the
-                * group fails to attach to the new domain, we need force
-                * attaching all devices back to the old domain. The old
-                * domain is compatible for all devices in the group,
-                * hence the iommu driver should always return success.
-                */
-               struct iommu_domain *old_domain = group->domain;
- 
-               group->domain = NULL;
-               WARN(__iommu_group_set_domain(group, old_domain),
-                    "iommu driver failed to attach a compatible domain");
-       }
- 
-       return ret;
+       return __iommu_group_set_domain(group, domain);
   }
   
   /**
@@@ -2221,21 -2114,61 +2114,61 @@@ int iommu_attach_group(struct iommu_dom
   }
   EXPORT_SYMBOL_GPL(iommu_attach_group);
   
- static int iommu_group_do_set_platform_dma(struct device *dev, void *data)
+ static int __iommu_device_set_domain(struct iommu_group *group,
+                                    struct device *dev,
+                                    struct iommu_domain *new_domain,
+                                    unsigned int flags)
   {
-       const struct iommu_ops *ops = dev_iommu_ops(dev);
+       int ret;
   
-       if (!WARN_ON(!ops->set_platform_dma_ops))
-               ops->set_platform_dma_ops(dev);
+       if (dev->iommu->attach_deferred) {
+               if (new_domain == group->default_domain)
+                       return 0;
+               dev->iommu->attach_deferred = 0;
+       }
   
+       ret = __iommu_attach_device(new_domain, dev);
+       if (ret) {
+               /*
+                * If we have a blocking domain then try to attach that in hopes
+                * of avoiding a UAF. Modern drivers should implement blocking
+                * domains as global statics that cannot fail.
+                */
+               if ((flags & IOMMU_SET_DOMAIN_MUST_SUCCEED) &&
+                   group->blocking_domain &&
+                   group->blocking_domain != new_domain)
+                       __iommu_attach_device(group->blocking_domain, dev);
+               return ret;
+       }
         return 0;
   }
   
- static int __iommu_group_set_domain(struct iommu_group *group,
-                                   struct iommu_domain *new_domain)
+ /*
+  * If 0 is returned the group's domain is new_domain. If an error is returned
+  * then the group's domain will be set back to the existing domain unless
+  * IOMMU_SET_DOMAIN_MUST_SUCCEED, otherwise an error is returned and the group's
+  * domains is left inconsistent. This is a driver bug to fail attach with a
+  * previously good domain. We try to avoid a kernel UAF because of this.
+  *
+  * IOMMU groups are really the natural working unit of the IOMMU, but the IOMMU
+  * API works on domains and devices.  Bridge that gap by iterating over the
+  * devices in a group.  Ideally we'd have a single device which represents the
+  * requestor ID of the group, but we also allow IOMMU drivers to create policy
+  * defined minimum sets, where the physical hardware may be able to distiguish
+  * members, but we wish to group them at a higher level (ex. untrusted
+  * multi-function PCI devices).  Thus we attach each device.
+  */
+ static int __iommu_group_set_domain_internal(struct iommu_group *group,
+                                            struct iommu_domain *new_domain,
+                                            unsigned int flags)
   {
+       struct group_device *last_gdev;
+       struct group_device *gdev;
+       int result;
         int ret;
   
+       lockdep_assert_held(&group->mutex);
+ 
         if (group->domain == new_domain)
                 return 0;
   
@@@ -2245,8 -2178,12 +2178,12 @@@
          * platform specific behavior.
          */
         if (!new_domain) {
-               __iommu_group_for_each_dev(group, NULL,
-                                          iommu_group_do_set_platform_dma);
+               for_each_group_device(group, gdev) {
+                       const struct iommu_ops *ops = dev_iommu_ops(gdev->dev);
+ 
+                       if (!WARN_ON(!ops->set_platform_dma_ops))
+                               ops->set_platform_dma_ops(gdev->dev);
+               }
                 group->domain = NULL;
                 return 0;
         }
@@@ -2256,16 -2193,52 +2193,52 @@@
          * domain. This switch does not have to be atomic and DMA can be
          * discarded during the transition. DMA must only be able to access
          * either new_domain or group->domain, never something else.
-        *
-        * Note that this is called in error unwind paths, attaching to a
-        * domain that has already been attached cannot fail.
          */
-       ret = __iommu_group_for_each_dev(group, new_domain,
-                                        iommu_group_do_attach_device);
-       if (ret)
-               return ret;
+       result = 0;
+       for_each_group_device(group, gdev) {
+               ret = __iommu_device_set_domain(group, gdev->dev, new_domain,
+                                               flags);
+               if (ret) {
+                       result = ret;
+                       /*
+                        * Keep trying the other devices in the group. If a
+                        * driver fails attach to an otherwise good domain, and
+                        * does not support blocking domains, it should at least
+                        * drop its reference on the current domain so we don't
+                        * UAF.
+                        */
+                       if (flags & IOMMU_SET_DOMAIN_MUST_SUCCEED)
+                               continue;
+                       goto err_revert;
+               }
+       }
         group->domain = new_domain;
-       return 0;
+       return result;
+ 
+ err_revert:
+       /*
+        * This is called in error unwind paths. A well behaved driver should
+        * always allow us to attach to a domain that was already attached.
+        */
+       last_gdev = gdev;
+       for_each_group_device(group, gdev) {
+               const struct iommu_ops *ops = dev_iommu_ops(gdev->dev);
+ 
+               /*
+                * If set_platform_dma_ops is not present a NULL domain can
+                * happen only for first probe, in which case we leave
+                * group->domain as NULL and let release clean everything up.
+                */
+               if (group->domain)
+                       WARN_ON(__iommu_device_set_domain(
+                               group, gdev->dev, group->domain,
+                               IOMMU_SET_DOMAIN_MUST_SUCCEED));
+               else if (ops->set_platform_dma_ops)
+                       ops->set_platform_dma_ops(gdev->dev);
+               if (gdev == last_gdev)
+                       break;
+       }
+       return ret;
   }
   
   void iommu_detach_group(struct iommu_domain *domain, struct iommu_group *group)
@@@ -2567,7 -2540,7 +2540,7 @@@ ssize_t iommu_map_sg(struct iommu_domai
                         len = 0;
                 }
   
- -              if (sg_is_dma_bus_address(sg))
+ +              if (sg_dma_is_bus_address(sg))
                         goto next;
   
                 if (len) {
@@@ -2846,78 -2819,112 +2819,112 @@@ int iommu_dev_disable_feature(struct de
   }
   EXPORT_SYMBOL_GPL(iommu_dev_disable_feature);
   
- /*
-  * Changes the default domain of an iommu group
-  *
-  * @group: The group for which the default domain should be changed
-  * @dev: The first device in the group
-  * @type: The type of the new default domain that gets associated with the group
-  *
-  * Returns 0 on success and error code on failure
+ /**
+  * iommu_setup_default_domain - Set the default_domain for the group
+  * @group: Group to change
+  * @target_type: Domain type to set as the default_domain
    *
-  * Note:
-  * 1. Presently, this function is called only when user requests to change the
-  *    group's default domain type through /sys/kernel/iommu_groups/<grp_id>/type
-  *    Please take a closer look if intended to use for other purposes.
+  * Allocate a default domain and set it as the current domain on the group. If
+  * the group already has a default domain it will be changed to the target_type.
+  * When target_type is 0 the default domain is selected based on driver and
+  * system preferences.
    */
- static int iommu_change_dev_def_domain(struct iommu_group *group,
-                                      struct device *dev, int type)
+ static int iommu_setup_default_domain(struct iommu_group *group,
+                                     int target_type)
   {
-       struct __group_domain_type gtype = {NULL, 0};
-       struct iommu_domain *prev_dom;
+       struct iommu_domain *old_dom = group->default_domain;
+       struct group_device *gdev;
+       struct iommu_domain *dom;
+       bool direct_failed;
+       int req_type;
         int ret;
   
         lockdep_assert_held(&group->mutex);
   
-       prev_dom = group->default_domain;
-       __iommu_group_for_each_dev(group, &gtype,
-                                  probe_get_default_domain_type);
-       if (!type) {
-               /*
-                * If the user hasn't requested any specific type of domain and
-                * if the device supports both the domains, then default to the
-                * domain the device was booted with
-                */
-               type = gtype.type ? : iommu_def_domain_type;
-       } else if (gtype.type && type != gtype.type) {
-               dev_err_ratelimited(dev, "Device cannot be in %s domain\n",
-                                   iommu_domain_type_str(type));
+       req_type = iommu_get_default_domain_type(group, target_type);
+       if (req_type < 0)
                 return -EINVAL;
-       }
   
         /*
-        * Switch to a new domain only if the requested domain type is different
-        * from the existing default domain type
+        * There are still some drivers which don't support default domains, so
+        * we ignore the failure and leave group->default_domain NULL.
+        *
+        * We assume that the iommu driver starts up the device in
+        * 'set_platform_dma_ops' mode if it does not support default domains.
          */
-       if (prev_dom->type == type)
+       dom = iommu_group_alloc_default_domain(group, req_type);
+       if (!dom) {
+               /* Once in default_domain mode we never leave */
+               if (group->default_domain)
+                       return -ENODEV;
+               group->default_domain = NULL;
                 return 0;
+       }
   
-       group->default_domain = NULL;
-       group->domain = NULL;
- 
-       /* Sets group->default_domain to the newly allocated domain */
-       ret = iommu_group_alloc_default_domain(dev->bus, group, type);
-       if (ret)
-               goto restore_old_domain;
- 
-       ret = iommu_group_create_direct_mappings(group);
-       if (ret)
-               goto free_new_domain;
- 
-       ret = __iommu_attach_group(group->default_domain, group);
-       if (ret)
-               goto free_new_domain;
+       if (group->default_domain == dom)
+               return 0;
   
-       iommu_domain_free(prev_dom);
+       /*
+        * IOMMU_RESV_DIRECT and IOMMU_RESV_DIRECT_RELAXABLE regions must be
+        * mapped before their device is attached, in order to guarantee
+        * continuity with any FW activity
+        */
+       direct_failed = false;
+       for_each_group_device(group, gdev) {
+               if (iommu_create_device_direct_mappings(dom, gdev->dev)) {
+                       direct_failed = true;
+                       dev_warn_once(
+                               gdev->dev->iommu->iommu_dev->dev,
+                               "IOMMU driver was not able to establish FW requested direct mapping.");
+               }
+       }
   
-       return 0;
+       /* We must set default_domain early for __iommu_device_set_domain */
+       group->default_domain = dom;
+       if (!group->domain) {
+               /*
+                * Drivers are not allowed to fail the first domain attach.
+                * The only way to recover from this is to fail attaching the
+                * iommu driver and call ops->release_device. Put the domain
+                * in group->default_domain so it is freed after.
+                */
+               ret = __iommu_group_set_domain_internal(
+                       group, dom, IOMMU_SET_DOMAIN_MUST_SUCCEED);
+               if (WARN_ON(ret))
+                       goto out_free;
+       } else {
+               ret = __iommu_group_set_domain(group, dom);
+               if (ret) {
+                       iommu_domain_free(dom);
+                       group->default_domain = old_dom;
+                       return ret;
+               }
+       }
   
- free_new_domain:
-       iommu_domain_free(group->default_domain);
- restore_old_domain:
-       group->default_domain = prev_dom;
-       group->domain = prev_dom;
+       /*
+        * Drivers are supposed to allow mappings to be installed in a domain
+        * before device attachment, but some don't. Hack around this defect by
+        * trying again after attaching. If this happens it means the device
+        * will not continuously have the IOMMU_RESV_DIRECT map.
+        */
+       if (direct_failed) {
+               for_each_group_device(group, gdev) {
+                       ret = iommu_create_device_direct_mappings(dom, gdev->dev);
+                       if (ret)
+                               goto err_restore;
+               }
+       }
   
+ err_restore:
+       if (old_dom) {
+               __iommu_group_set_domain_internal(
+                       group, old_dom, IOMMU_SET_DOMAIN_MUST_SUCCEED);
+               iommu_domain_free(dom);
+               old_dom = NULL;
+       }
+ out_free:
+       if (old_dom)
+               iommu_domain_free(old_dom);
         return ret;
   }
   
@@@ -2933,8 -2940,7 +2940,7 @@@
   static ssize_t iommu_group_store_type(struct iommu_group *group,
                                       const char *buf, size_t count)
   {
-       struct group_device *grp_dev;
-       struct device *dev;
+       struct group_device *gdev;
         int ret, req_type;
   
         if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO))
@@@ -2959,23 -2965,23 +2965,23 @@@
         if (req_type == IOMMU_DOMAIN_DMA_FQ &&
             group->default_domain->type == IOMMU_DOMAIN_DMA) {
                 ret = iommu_dma_init_fq(group->default_domain);
-               if (!ret)
-                       group->default_domain->type = IOMMU_DOMAIN_DMA_FQ;
-               mutex_unlock(&group->mutex);
+               if (ret)
+                       goto out_unlock;
   
-               return ret ?: count;
+               group->default_domain->type = IOMMU_DOMAIN_DMA_FQ;
+               ret = count;
+               goto out_unlock;
         }
   
         /* Otherwise, ensure that device exists and no driver is bound. */
         if (list_empty(&group->devices) || group->owner_cnt) {
-               mutex_unlock(&group->mutex);
-               return -EPERM;
+               ret = -EPERM;
+               goto out_unlock;
         }
   
-       grp_dev = list_first_entry(&group->devices, struct group_device, list);
-       dev = grp_dev->dev;
- 
-       ret = iommu_change_dev_def_domain(group, dev, req_type);
+       ret = iommu_setup_default_domain(group, req_type);
+       if (ret)
+               goto out_unlock;
   
         /*
          * Release the mutex here because ops->probe_finalize() call-back of
@@@ -2986,9 -2992,12 +2992,12 @@@
         mutex_unlock(&group->mutex);
   
         /* Make sure dma_ops is appropriatley set */
-       if (!ret)
-               __iommu_group_dma_finalize(group);
+       for_each_group_device(group, gdev)
+               iommu_group_do_probe_finalize(gdev->dev);
+       return count;
   
+ out_unlock:
+       mutex_unlock(&group->mutex);
         return ret ?: count;
   }
   
@@@ -3182,16 -3191,13 +3191,13 @@@ EXPORT_SYMBOL_GPL(iommu_device_claim_dm
   
   static void __iommu_release_dma_ownership(struct iommu_group *group)
   {
-       int ret;
- 
         if (WARN_ON(!group->owner_cnt || !group->owner ||
                     !xa_empty(&group->pasid_array)))
                 return;
   
         group->owner_cnt = 0;
         group->owner = NULL;
-       ret = __iommu_group_set_domain(group, group->default_domain);
-       WARN(ret, "iommu driver failed to attach the default domain");
+       __iommu_group_set_domain_nofail(group, group->default_domain);
   }
   
   /**
@@@ -3253,7 -3259,7 +3259,7 @@@ static int __iommu_set_group_pasid(stru
         struct group_device *device;
         int ret = 0;
   
-       list_for_each_entry(device, &group->devices, list) {
+       for_each_group_device(group, device) {
                 ret = domain->ops->set_dev_pasid(domain, device->dev, pasid);
                 if (ret)
                         break;
@@@ -3268,7 -3274,7 +3274,7 @@@ static void __iommu_remove_group_pasid(
         struct group_device *device;
         const struct iommu_ops *ops;
   
-       list_for_each_entry(device, &group->devices, list) {
+       for_each_group_device(group, device) {
                 ops = dev_iommu_ops(device->dev);
                 ops->remove_dev_pasid(device->dev, pasid);
         }
author	Linus Torvalds <[email protected]>
	Fri, 30 Jun 2023 03:51:03 +0000 (20:51 -0700)
committer	Linus Torvalds <[email protected]>
	Fri, 30 Jun 2023 03:51:03 +0000 (20:51 -0700)
		1	2
Documentation/admin-guide/kernel-parameters.txt	patch \|	diff1 \|	diff2 \|	blob \| history
Documentation/arch/arm64/silicon-errata.rst	patch \|	diff1 \|	\|	blob \| history
drivers/iommu/amd/amd_iommu_types.h	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/iommu/amd/iommu.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/iommu/dma-iommu.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/iommu/iommu.c	patch \|	diff1 \|	diff2 \|	blob \| history