]> Git Repo - linux.git/commitdiff
Merge tag 'iommu-updates-v6.4' of git://git.kernel.org/pub/scm/linux/kernel/git/joro...
authorLinus Torvalds <[email protected]>
Sun, 30 Apr 2023 20:00:38 +0000 (13:00 -0700)
committerLinus Torvalds <[email protected]>
Sun, 30 Apr 2023 20:00:38 +0000 (13:00 -0700)
Pull iommu updates from Joerg Roedel:

 - Convert to platform remove callback returning void

 - Extend changing default domain to normal group

 - Intel VT-d updates:
     - Remove VT-d virtual command interface and IOASID
     - Allow the VT-d driver to support non-PRI IOPF
     - Remove PASID supervisor request support
     - Various small and misc cleanups

 - ARM SMMU updates:
     - Device-tree binding updates:
         * Allow Qualcomm GPU SMMUs to accept relevant clock properties
         * Document Qualcomm 8550 SoC as implementing an MMU-500
         * Favour new "qcom,smmu-500" binding for Adreno SMMUs

     - Fix S2CR quirk detection on non-architectural Qualcomm SMMU
       implementations

     - Acknowledge SMMUv3 PRI queue overflow when consuming events

     - Document (in a comment) why ATS is disabled for bypass streams

 - AMD IOMMU updates:
     - 5-level page-table support
     - NUMA awareness for memory allocations

 - Unisoc driver: Support for reattaching an existing domain

 - Rockchip driver: Add missing set_platform_dma_ops callback

 - Mediatek driver: Adjust the dma-ranges

 - Various other small fixes and cleanups

* tag 'iommu-updates-v6.4' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu: (82 commits)
  iommu: Remove iommu_group_get_by_id()
  iommu: Make iommu_release_device() static
  iommu/vt-d: Remove BUG_ON in dmar_insert_dev_scope()
  iommu/vt-d: Remove a useless BUG_ON(dev->is_virtfn)
  iommu/vt-d: Remove BUG_ON in map/unmap()
  iommu/vt-d: Remove BUG_ON when domain->pgd is NULL
  iommu/vt-d: Remove BUG_ON in handling iotlb cache invalidation
  iommu/vt-d: Remove BUG_ON on checking valid pfn range
  iommu/vt-d: Make size of operands same in bitwise operations
  iommu/vt-d: Remove PASID supervisor request support
  iommu/vt-d: Use non-privileged mode for all PASIDs
  iommu/vt-d: Remove extern from function prototypes
  iommu/vt-d: Do not use GFP_ATOMIC when not needed
  iommu/vt-d: Remove unnecessary checks in iopf disabling path
  iommu/vt-d: Move PRI handling to IOPF feature path
  iommu/vt-d: Move pfsid and ats_qdep calculation to device probe path
  iommu/vt-d: Move iopf code from SVA to IOPF enabling path
  iommu/vt-d: Allow SVA with device-specific IOPF
  dmaengine: idxd: Add enable/disable device IOPF feature
  arm64: dts: mt8186: Add dma-ranges for the parent "soc" node
  ...

15 files changed:
1  2 
Documentation/arch/x86/sva.rst
arch/arm64/boot/dts/mediatek/mt8186.dtsi
arch/arm64/boot/dts/mediatek/mt8195.dtsi
arch/x86/kernel/process_64.c
arch/x86/kernel/traps.c
drivers/iommu/intel/Kconfig
drivers/iommu/iommu-sva.c
drivers/iommu/iommu.c
drivers/media/platform/mediatek/jpeg/mtk_jpeg_core.c
drivers/media/platform/mediatek/vcodec/mtk_vcodec_dec_drv.c
drivers/media/platform/mediatek/vcodec/mtk_vcodec_enc_drv.c
include/linux/iommu.h
include/linux/sched/mm.h
kernel/fork.c
mm/init-mm.c

index 2e9b8b0f9a0f491c6773db0604f389e6741397c9,0000000000000000000000000000000000000000..33cb05005982028409061ddfcb3c8290e754d410
mode 100644,000000..100644
--- /dev/null
@@@ -1,286 -1,0 +1,286 @@@
- PASID is initialized as INVALID_IOASID (-1) when a process is created.
 +.. SPDX-License-Identifier: GPL-2.0
 +
 +===========================================
 +Shared Virtual Addressing (SVA) with ENQCMD
 +===========================================
 +
 +Background
 +==========
 +
 +Shared Virtual Addressing (SVA) allows the processor and device to use the
 +same virtual addresses avoiding the need for software to translate virtual
 +addresses to physical addresses. SVA is what PCIe calls Shared Virtual
 +Memory (SVM).
 +
 +In addition to the convenience of using application virtual addresses
 +by the device, it also doesn't require pinning pages for DMA.
 +PCIe Address Translation Services (ATS) along with Page Request Interface
 +(PRI) allow devices to function much the same way as the CPU handling
 +application page-faults. For more information please refer to the PCIe
 +specification Chapter 10: ATS Specification.
 +
 +Use of SVA requires IOMMU support in the platform. IOMMU is also
 +required to support the PCIe features ATS and PRI. ATS allows devices
 +to cache translations for virtual addresses. The IOMMU driver uses the
 +mmu_notifier() support to keep the device TLB cache and the CPU cache in
 +sync. When an ATS lookup fails for a virtual address, the device should
 +use the PRI in order to request the virtual address to be paged into the
 +CPU page tables. The device must use ATS again in order the fetch the
 +translation before use.
 +
 +Shared Hardware Workqueues
 +==========================
 +
 +Unlike Single Root I/O Virtualization (SR-IOV), Scalable IOV (SIOV) permits
 +the use of Shared Work Queues (SWQ) by both applications and Virtual
 +Machines (VM's). This allows better hardware utilization vs. hard
 +partitioning resources that could result in under utilization. In order to
 +allow the hardware to distinguish the context for which work is being
 +executed in the hardware by SWQ interface, SIOV uses Process Address Space
 +ID (PASID), which is a 20-bit number defined by the PCIe SIG.
 +
 +PASID value is encoded in all transactions from the device. This allows the
 +IOMMU to track I/O on a per-PASID granularity in addition to using the PCIe
 +Resource Identifier (RID) which is the Bus/Device/Function.
 +
 +
 +ENQCMD
 +======
 +
 +ENQCMD is a new instruction on Intel platforms that atomically submits a
 +work descriptor to a device. The descriptor includes the operation to be
 +performed, virtual addresses of all parameters, virtual address of a completion
 +record, and the PASID (process address space ID) of the current process.
 +
 +ENQCMD works with non-posted semantics and carries a status back if the
 +command was accepted by hardware. This allows the submitter to know if the
 +submission needs to be retried or other device specific mechanisms to
 +implement fairness or ensure forward progress should be provided.
 +
 +ENQCMD is the glue that ensures applications can directly submit commands
 +to the hardware and also permits hardware to be aware of application context
 +to perform I/O operations via use of PASID.
 +
 +Process Address Space Tagging
 +=============================
 +
 +A new thread-scoped MSR (IA32_PASID) provides the connection between
 +user processes and the rest of the hardware. When an application first
 +accesses an SVA-capable device, this MSR is initialized with a newly
 +allocated PASID. The driver for the device calls an IOMMU-specific API
 +that sets up the routing for DMA and page-requests.
 +
 +For example, the Intel Data Streaming Accelerator (DSA) uses
 +iommu_sva_bind_device(), which will do the following:
 +
 +- Allocate the PASID, and program the process page-table (%cr3 register) in the
 +  PASID context entries.
 +- Register for mmu_notifier() to track any page-table invalidations to keep
 +  the device TLB in sync. For example, when a page-table entry is invalidated,
 +  the IOMMU propagates the invalidation to the device TLB. This will force any
 +  future access by the device to this virtual address to participate in
 +  ATS. If the IOMMU responds with proper response that a page is not
 +  present, the device would request the page to be paged in via the PCIe PRI
 +  protocol before performing I/O.
 +
 +This MSR is managed with the XSAVE feature set as "supervisor state" to
 +ensure the MSR is updated during context switch.
 +
 +PASID Management
 +================
 +
 +The kernel must allocate a PASID on behalf of each process which will use
 +ENQCMD and program it into the new MSR to communicate the process identity to
 +platform hardware.  ENQCMD uses the PASID stored in this MSR to tag requests
 +from this process.  When a user submits a work descriptor to a device using the
 +ENQCMD instruction, the PASID field in the descriptor is auto-filled with the
 +value from MSR_IA32_PASID. Requests for DMA from the device are also tagged
 +with the same PASID. The platform IOMMU uses the PASID in the transaction to
 +perform address translation. The IOMMU APIs setup the corresponding PASID
 +entry in IOMMU with the process address used by the CPU (e.g. %cr3 register in
 +x86).
 +
 +The MSR must be configured on each logical CPU before any application
 +thread can interact with a device. Threads that belong to the same
 +process share the same page tables, thus the same MSR value.
 +
 +PASID Life Cycle Management
 +===========================
 +
++PASID is initialized as IOMMU_PASID_INVALID (-1) when a process is created.
 +
 +Only processes that access SVA-capable devices need to have a PASID
 +allocated. This allocation happens when a process opens/binds an SVA-capable
 +device but finds no PASID for this process. Subsequent binds of the same, or
 +other devices will share the same PASID.
 +
 +Although the PASID is allocated to the process by opening a device,
 +it is not active in any of the threads of that process. It's loaded to the
 +IA32_PASID MSR lazily when a thread tries to submit a work descriptor
 +to a device using the ENQCMD.
 +
 +That first access will trigger a #GP fault because the IA32_PASID MSR
 +has not been initialized with the PASID value assigned to the process
 +when the device was opened. The Linux #GP handler notes that a PASID has
 +been allocated for the process, and so initializes the IA32_PASID MSR
 +and returns so that the ENQCMD instruction is re-executed.
 +
 +On fork(2) or exec(2) the PASID is removed from the process as it no
 +longer has the same address space that it had when the device was opened.
 +
 +On clone(2) the new task shares the same address space, so will be
 +able to use the PASID allocated to the process. The IA32_PASID is not
 +preemptively initialized as the PASID value might not be allocated yet or
 +the kernel does not know whether this thread is going to access the device
 +and the cleared IA32_PASID MSR reduces context switch overhead by xstate
 +init optimization. Since #GP faults have to be handled on any threads that
 +were created before the PASID was assigned to the mm of the process, newly
 +created threads might as well be treated in a consistent way.
 +
 +Due to complexity of freeing the PASID and clearing all IA32_PASID MSRs in
 +all threads in unbind, free the PASID lazily only on mm exit.
 +
 +If a process does a close(2) of the device file descriptor and munmap(2)
 +of the device MMIO portal, then the driver will unbind the device. The
 +PASID is still marked VALID in the PASID_MSR for any threads in the
 +process that accessed the device. But this is harmless as without the
 +MMIO portal they cannot submit new work to the device.
 +
 +Relationships
 +=============
 +
 + * Each process has many threads, but only one PASID.
 + * Devices have a limited number (~10's to 1000's) of hardware workqueues.
 +   The device driver manages allocating hardware workqueues.
 + * A single mmap() maps a single hardware workqueue as a "portal" and
 +   each portal maps down to a single workqueue.
 + * For each device with which a process interacts, there must be
 +   one or more mmap()'d portals.
 + * Many threads within a process can share a single portal to access
 +   a single device.
 + * Multiple processes can separately mmap() the same portal, in
 +   which case they still share one device hardware workqueue.
 + * The single process-wide PASID is used by all threads to interact
 +   with all devices.  There is not, for instance, a PASID for each
 +   thread or each thread<->device pair.
 +
 +FAQ
 +===
 +
 +* What is SVA/SVM?
 +
 +Shared Virtual Addressing (SVA) permits I/O hardware and the processor to
 +work in the same address space, i.e., to share it. Some call it Shared
 +Virtual Memory (SVM), but Linux community wanted to avoid confusing it with
 +POSIX Shared Memory and Secure Virtual Machines which were terms already in
 +circulation.
 +
 +* What is a PASID?
 +
 +A Process Address Space ID (PASID) is a PCIe-defined Transaction Layer Packet
 +(TLP) prefix. A PASID is a 20-bit number allocated and managed by the OS.
 +PASID is included in all transactions between the platform and the device.
 +
 +* How are shared workqueues different?
 +
 +Traditionally, in order for userspace applications to interact with hardware,
 +there is a separate hardware instance required per process. For example,
 +consider doorbells as a mechanism of informing hardware about work to process.
 +Each doorbell is required to be spaced 4k (or page-size) apart for process
 +isolation. This requires hardware to provision that space and reserve it in
 +MMIO. This doesn't scale as the number of threads becomes quite large. The
 +hardware also manages the queue depth for Shared Work Queues (SWQ), and
 +consumers don't need to track queue depth. If there is no space to accept
 +a command, the device will return an error indicating retry.
 +
 +A user should check Deferrable Memory Write (DMWr) capability on the device
 +and only submits ENQCMD when the device supports it. In the new DMWr PCIe
 +terminology, devices need to support DMWr completer capability. In addition,
 +it requires all switch ports to support DMWr routing and must be enabled by
 +the PCIe subsystem, much like how PCIe atomic operations are managed for
 +instance.
 +
 +SWQ allows hardware to provision just a single address in the device. When
 +used with ENQCMD to submit work, the device can distinguish the process
 +submitting the work since it will include the PASID assigned to that
 +process. This helps the device scale to a large number of processes.
 +
 +* Is this the same as a user space device driver?
 +
 +Communicating with the device via the shared workqueue is much simpler
 +than a full blown user space driver. The kernel driver does all the
 +initialization of the hardware. User space only needs to worry about
 +submitting work and processing completions.
 +
 +* Is this the same as SR-IOV?
 +
 +Single Root I/O Virtualization (SR-IOV) focuses on providing independent
 +hardware interfaces for virtualizing hardware. Hence, it's required to be
 +almost fully functional interface to software supporting the traditional
 +BARs, space for interrupts via MSI-X, its own register layout.
 +Virtual Functions (VFs) are assisted by the Physical Function (PF)
 +driver.
 +
 +Scalable I/O Virtualization builds on the PASID concept to create device
 +instances for virtualization. SIOV requires host software to assist in
 +creating virtual devices; each virtual device is represented by a PASID
 +along with the bus/device/function of the device.  This allows device
 +hardware to optimize device resource creation and can grow dynamically on
 +demand. SR-IOV creation and management is very static in nature. Consult
 +references below for more details.
 +
 +* Why not just create a virtual function for each app?
 +
 +Creating PCIe SR-IOV type Virtual Functions (VF) is expensive. VFs require
 +duplicated hardware for PCI config space and interrupts such as MSI-X.
 +Resources such as interrupts have to be hard partitioned between VFs at
 +creation time, and cannot scale dynamically on demand. The VFs are not
 +completely independent from the Physical Function (PF). Most VFs require
 +some communication and assistance from the PF driver. SIOV, in contrast,
 +creates a software-defined device where all the configuration and control
 +aspects are mediated via the slow path. The work submission and completion
 +happen without any mediation.
 +
 +* Does this support virtualization?
 +
 +ENQCMD can be used from within a guest VM. In these cases, the VMM helps
 +with setting up a translation table to translate from Guest PASID to Host
 +PASID. Please consult the ENQCMD instruction set reference for more
 +details.
 +
 +* Does memory need to be pinned?
 +
 +When devices support SVA along with platform hardware such as IOMMU
 +supporting such devices, there is no need to pin memory for DMA purposes.
 +Devices that support SVA also support other PCIe features that remove the
 +pinning requirement for memory.
 +
 +Device TLB support - Device requests the IOMMU to lookup an address before
 +use via Address Translation Service (ATS) requests.  If the mapping exists
 +but there is no page allocated by the OS, IOMMU hardware returns that no
 +mapping exists.
 +
 +Device requests the virtual address to be mapped via Page Request
 +Interface (PRI). Once the OS has successfully completed the mapping, it
 +returns the response back to the device. The device requests again for
 +a translation and continues.
 +
 +IOMMU works with the OS in managing consistency of page-tables with the
 +device. When removing pages, it interacts with the device to remove any
 +device TLB entry that might have been cached before removing the mappings from
 +the OS.
 +
 +References
 +==========
 +
 +VT-D:
 +https://01.org/blogs/ashokraj/2018/recent-enhancements-intel-virtualization-technology-directed-i/o-intel-vt-d
 +
 +SIOV:
 +https://01.org/blogs/2019/assignable-interfaces-intel-scalable-i/o-virtualization-linux
 +
 +ENQCMD in ISE:
 +https://software.intel.com/sites/default/files/managed/c5/15/architecture-instruction-set-extensions-programming-reference.pdf
 +
 +DSA spec:
 +https://software.intel.com/sites/default/files/341204-intel-data-streaming-accelerator-spec.pdf
index 78ff8ba5718e685f64d1a2db34e6888260749009,251eace411c02d7de32377c78b0462852e550633..5e83d4e9efa4103ed6e9ccb12571ac4b674c02c3
                #address-cells = <2>;
                #size-cells = <2>;
                compatible = "simple-bus";
+               dma-ranges = <0x0 0x0 0x0 0x0 0x4 0x0>;
                ranges;
  
                gic: interrupt-controller@c000000 {
                        #clock-cells = <1>;
                };
  
 +              gpu: gpu@13040000 {
 +                      compatible = "mediatek,mt8186-mali",
 +                                   "arm,mali-bifrost";
 +                      reg = <0 0x13040000 0 0x4000>;
 +
 +                      clocks = <&mfgsys CLK_MFG_BG3D>;
 +                      interrupts = <GIC_SPI 276 IRQ_TYPE_LEVEL_HIGH 0>,
 +                                   <GIC_SPI 275 IRQ_TYPE_LEVEL_HIGH 0>,
 +                                   <GIC_SPI 274 IRQ_TYPE_LEVEL_HIGH 0>;
 +                      interrupt-names = "job", "mmu", "gpu";
 +                      power-domains = <&spm MT8186_POWER_DOMAIN_MFG2>,
 +                                      <&spm MT8186_POWER_DOMAIN_MFG3>;
 +                      power-domain-names = "core0", "core1";
 +                      #cooling-cells = <2>;
 +                      status = "disabled";
 +              };
 +
                mmsys: syscon@14000000 {
                        compatible = "mediatek,mt8186-mmsys", "syscon";
                        reg = <0 0x14000000 0 0x1000>;
index 8652f41403aee8cf01772fe0bee5f642bda41b0c,7d87cbabc9f192ba70e342341d5bea7fbc63dc8d..a44aae4ab95378dd31b5ec8c8acc79f411294ac9
@@@ -14,8 -14,6 +14,8 @@@
  #include <dt-bindings/pinctrl/mt8195-pinfunc.h>
  #include <dt-bindings/power/mt8195-power.h>
  #include <dt-bindings/reset/mt8195-resets.h>
 +#include <dt-bindings/thermal/thermal.h>
 +#include <dt-bindings/thermal/mediatek,lvts-thermal.h>
  
  / {
        compatible = "mediatek,mt8195";
        aliases {
                gce0 = &gce0;
                gce1 = &gce1;
 +              ethdr0 = &ethdr0;
 +              mutex0 = &mutex;
 +              mutex1 = &mutex1;
 +              merge1 = &merge1;
 +              merge2 = &merge2;
 +              merge3 = &merge3;
 +              merge4 = &merge4;
 +              merge5 = &merge5;
 +              vdo1-rdma0 = &vdo1_rdma0;
 +              vdo1-rdma1 = &vdo1_rdma1;
 +              vdo1-rdma2 = &vdo1_rdma2;
 +              vdo1-rdma3 = &vdo1_rdma3;
 +              vdo1-rdma4 = &vdo1_rdma4;
 +              vdo1-rdma5 = &vdo1_rdma5;
 +              vdo1-rdma6 = &vdo1_rdma6;
 +              vdo1-rdma7 = &vdo1_rdma7;
        };
  
        cpus {
                #performance-domain-cells = <1>;
        };
  
 +      gpu_opp_table: opp-table-gpu {
 +              compatible = "operating-points-v2";
 +              opp-shared;
 +
 +              opp-390000000 {
 +                      opp-hz = /bits/ 64 <390000000>;
 +                      opp-microvolt = <625000>;
 +              };
 +              opp-410000000 {
 +                      opp-hz = /bits/ 64 <410000000>;
 +                      opp-microvolt = <631250>;
 +              };
 +              opp-431000000 {
 +                      opp-hz = /bits/ 64 <431000000>;
 +                      opp-microvolt = <631250>;
 +              };
 +              opp-473000000 {
 +                      opp-hz = /bits/ 64 <473000000>;
 +                      opp-microvolt = <637500>;
 +              };
 +              opp-515000000 {
 +                      opp-hz = /bits/ 64 <515000000>;
 +                      opp-microvolt = <637500>;
 +              };
 +              opp-556000000 {
 +                      opp-hz = /bits/ 64 <556000000>;
 +                      opp-microvolt = <643750>;
 +              };
 +              opp-598000000 {
 +                      opp-hz = /bits/ 64 <598000000>;
 +                      opp-microvolt = <650000>;
 +              };
 +              opp-640000000 {
 +                      opp-hz = /bits/ 64 <640000000>;
 +                      opp-microvolt = <650000>;
 +              };
 +              opp-670000000 {
 +                      opp-hz = /bits/ 64 <670000000>;
 +                      opp-microvolt = <662500>;
 +              };
 +              opp-700000000 {
 +                      opp-hz = /bits/ 64 <700000000>;
 +                      opp-microvolt = <675000>;
 +              };
 +              opp-730000000 {
 +                      opp-hz = /bits/ 64 <730000000>;
 +                      opp-microvolt = <687500>;
 +              };
 +              opp-760000000 {
 +                      opp-hz = /bits/ 64 <760000000>;
 +                      opp-microvolt = <700000>;
 +              };
 +              opp-790000000 {
 +                      opp-hz = /bits/ 64 <790000000>;
 +                      opp-microvolt = <712500>;
 +              };
 +              opp-820000000 {
 +                      opp-hz = /bits/ 64 <820000000>;
 +                      opp-microvolt = <725000>;
 +              };
 +              opp-850000000 {
 +                      opp-hz = /bits/ 64 <850000000>;
 +                      opp-microvolt = <737500>;
 +              };
 +              opp-880000000 {
 +                      opp-hz = /bits/ 64 <880000000>;
 +                      opp-microvolt = <750000>;
 +              };
 +      };
 +
        pmu-a55 {
                compatible = "arm,cortex-a55-pmu";
                interrupt-parent = <&gic>;
                #size-cells = <2>;
                compatible = "simple-bus";
                ranges;
+               dma-ranges = <0x0 0x0 0x0 0x0 0x4 0x0>;
  
                gic: interrupt-controller@c000000 {
                        compatible = "arm,gic-v3";
  
                                        power-domain@MT8195_POWER_DOMAIN_MFG1 {
                                                reg = <MT8195_POWER_DOMAIN_MFG1>;
 -                                              clocks = <&apmixedsys CLK_APMIXED_MFGPLL>;
 -                                              clock-names = "mfg";
 +                                              clocks = <&apmixedsys CLK_APMIXED_MFGPLL>,
 +                                                       <&topckgen CLK_TOP_MFG_CORE_TMP>;
 +                                              clock-names = "mfg", "alt";
                                                mediatek,infracfg = <&infracfg_ao>;
                                                #address-cells = <1>;
                                                #size-cells = <0>;
                        status = "disabled";
                };
  
 +              lvts_ap: thermal-sensor@1100b000 {
 +                      compatible = "mediatek,mt8195-lvts-ap";
 +                      reg = <0 0x1100b000 0 0x1000>;
 +                      interrupts = <GIC_SPI 169 IRQ_TYPE_LEVEL_HIGH 0>;
 +                      clocks = <&infracfg_ao CLK_INFRA_AO_THERM>;
 +                      resets = <&infracfg_ao MT8195_INFRA_RST0_THERM_CTRL_SWRST>;
 +                      nvmem-cells = <&lvts_efuse_data1 &lvts_efuse_data2>;
 +                      nvmem-cell-names = "lvts-calib-data-1", "lvts-calib-data-2";
 +                      #thermal-sensor-cells = <1>;
 +              };
 +
 +              disp_pwm0: pwm@1100e000 {
 +                      compatible = "mediatek,mt8195-disp-pwm", "mediatek,mt8183-disp-pwm";
 +                      reg = <0 0x1100e000 0 0x1000>;
 +                      interrupts = <GIC_SPI 203 IRQ_TYPE_LEVEL_LOW 0>;
 +                      power-domains = <&spm MT8195_POWER_DOMAIN_VDOSYS0>;
 +                      #pwm-cells = <2>;
 +                      clocks = <&topckgen CLK_TOP_DISP_PWM0>,
 +                               <&infracfg_ao CLK_INFRA_AO_DISP_PWM>;
 +                      clock-names = "main", "mm";
 +                      status = "disabled";
 +              };
 +
 +              disp_pwm1: pwm@1100f000 {
 +                      compatible = "mediatek,mt8195-disp-pwm", "mediatek,mt8183-disp-pwm";
 +                      reg = <0 0x1100f000 0 0x1000>;
 +                      interrupts = <GIC_SPI 793 IRQ_TYPE_LEVEL_HIGH 0>;
 +                      #pwm-cells = <2>;
 +                      clocks = <&topckgen CLK_TOP_DISP_PWM1>,
 +                               <&infracfg_ao CLK_INFRA_AO_DISP_PWM1>;
 +                      clock-names = "main", "mm";
 +                      status = "disabled";
 +              };
 +
                spi1: spi@11010000 {
                        compatible = "mediatek,mt8195-spi",
                                     "mediatek,mt6765-spi";
                        status = "disabled";
                };
  
 +              lvts_mcu: thermal-sensor@11278000 {
 +                      compatible = "mediatek,mt8195-lvts-mcu";
 +                      reg = <0 0x11278000 0 0x1000>;
 +                      interrupts = <GIC_SPI 170 IRQ_TYPE_LEVEL_HIGH 0>;
 +                      clocks = <&infracfg_ao CLK_INFRA_AO_THERM>;
 +                      resets = <&infracfg_ao MT8195_INFRA_RST4_THERM_CTRL_MCU_SWRST>;
 +                      nvmem-cells = <&lvts_efuse_data1 &lvts_efuse_data2>;
 +                      nvmem-cell-names = "lvts-calib-data-1", "lvts-calib-data-2";
 +                      #thermal-sensor-cells = <1>;
 +              };
 +
                xhci1: usb@11290000 {
                        compatible = "mediatek,mt8195-xhci",
                                     "mediatek,mtk-xhci";
                        status = "disabled";
                };
  
 +              gpu: gpu@13000000 {
 +                      compatible = "mediatek,mt8195-mali", "mediatek,mt8192-mali",
 +                                   "arm,mali-valhall-jm";
 +                      reg = <0 0x13000000 0 0x4000>;
 +
 +                      clocks = <&mfgcfg CLK_MFG_BG3D>;
 +                      interrupts = <GIC_SPI 397 IRQ_TYPE_LEVEL_HIGH 0>,
 +                                   <GIC_SPI 396 IRQ_TYPE_LEVEL_HIGH 0>,
 +                                   <GIC_SPI 395 IRQ_TYPE_LEVEL_HIGH 0>;
 +                      interrupt-names = "job", "mmu", "gpu";
 +                      operating-points-v2 = <&gpu_opp_table>;
 +                      power-domains = <&spm MT8195_POWER_DOMAIN_MFG2>,
 +                                      <&spm MT8195_POWER_DOMAIN_MFG3>,
 +                                      <&spm MT8195_POWER_DOMAIN_MFG4>,
 +                                      <&spm MT8195_POWER_DOMAIN_MFG5>,
 +                                      <&spm MT8195_POWER_DOMAIN_MFG6>;
 +                      power-domain-names = "core0", "core1", "core2", "core3", "core4";
 +                      status = "disabled";
 +              };
 +
                mfgcfg: clock-controller@13fbf000 {
                        compatible = "mediatek,mt8195-mfgcfg";
                        reg = <0 0x13fbf000 0 0x1000>;
                        #clock-cells = <1>;
                };
  
 -              vppsys0: clock-controller@14000000 {
 -                      compatible = "mediatek,mt8195-vppsys0";
 +              vppsys0: syscon@14000000 {
 +                      compatible = "mediatek,mt8195-vppsys0", "syscon";
                        reg = <0 0x14000000 0 0x1000>;
                        #clock-cells = <1>;
                };
  
 +              mutex@1400f000 {
 +                      compatible = "mediatek,mt8195-vpp-mutex";
 +                      reg = <0 0x1400f000 0 0x1000>;
 +                      interrupts = <GIC_SPI 592 IRQ_TYPE_LEVEL_HIGH 0>;
 +                      mediatek,gce-client-reg = <&gce1 SUBSYS_1400XXXX 0xf000 0x1000>;
 +                      clocks = <&vppsys0 CLK_VPP0_MUTEX>;
 +                      power-domains = <&spm MT8195_POWER_DOMAIN_VPPSYS0>;
 +              };
 +
                smi_sub_common_vpp0_vpp1_2x1: smi@14010000 {
                        compatible = "mediatek,mt8195-smi-sub-common";
                        reg = <0 0x14010000 0 0x1000>;
                        power-domains = <&spm MT8195_POWER_DOMAIN_WPESYS>;
                };
  
 -              vppsys1: clock-controller@14f00000 {
 -                      compatible = "mediatek,mt8195-vppsys1";
 +              vppsys1: syscon@14f00000 {
 +                      compatible = "mediatek,mt8195-vppsys1", "syscon";
                        reg = <0 0x14f00000 0 0x1000>;
                        #clock-cells = <1>;
                };
  
 +              mutex@14f01000 {
 +                      compatible = "mediatek,mt8195-vpp-mutex";
 +                      reg = <0 0x14f01000 0 0x1000>;
 +                      interrupts = <GIC_SPI 635 IRQ_TYPE_LEVEL_HIGH 0>;
 +                      mediatek,gce-client-reg = <&gce1 SUBSYS_14f0XXXX 0x1000 0x1000>;
 +                      clocks = <&vppsys1 CLK_VPP1_DISP_MUTEX>;
 +                      power-domains = <&spm MT8195_POWER_DOMAIN_VPPSYS1>;
 +              };
 +
                larb5: larb@14f02000 {
                        compatible = "mediatek,mt8195-smi-larb";
                        reg = <0 0x14f02000 0 0x1000>;
                        power-domains = <&spm MT8195_POWER_DOMAIN_VENC>;
                        #address-cells = <2>;
                        #size-cells = <2>;
-                       dma-ranges = <0x1 0x0 0x0 0x40000000 0x0 0xfff00000>;
                };
  
                jpgdec-master {
                                 <&iommu_vdo M4U_PORT_L19_JPGDEC_BSDMA1>,
                                 <&iommu_vdo M4U_PORT_L19_JPGDEC_BUFF_OFFSET1>,
                                 <&iommu_vdo M4U_PORT_L19_JPGDEC_BUFF_OFFSET0>;
-                       dma-ranges = <0x1 0x0 0x0 0x40000000 0x0 0xfff00000>;
                        #address-cells = <2>;
                        #size-cells = <2>;
                        ranges;
                                        <&iommu_vpp M4U_PORT_L20_JPGENC_C_RDMA>,
                                        <&iommu_vpp M4U_PORT_L20_JPGENC_Q_TABLE>,
                                        <&iommu_vpp M4U_PORT_L20_JPGENC_BSDMA>;
-                       dma-ranges = <0x1 0x0 0x0 0x40000000 0x0 0xfff00000>;
                        #address-cells = <2>;
                        #size-cells = <2>;
                        ranges;
                vdosys1: syscon@1c100000 {
                        compatible = "mediatek,mt8195-vdosys1", "syscon";
                        reg = <0 0x1c100000 0 0x1000>;
 +                      mboxes = <&gce0 1 CMDQ_THR_PRIO_4>;
 +                      mediatek,gce-client-reg = <&gce0 SUBSYS_1c10XXXX 0x0000 0x1000>;
                        #clock-cells = <1>;
 +                      #reset-cells = <1>;
                };
  
                smi_common_vdo: smi@1c01b000 {
                        power-domains = <&spm MT8195_POWER_DOMAIN_VDOSYS0>;
                };
  
 +              mutex1: mutex@1c101000 {
 +                      compatible = "mediatek,mt8195-disp-mutex";
 +                      reg = <0 0x1c101000 0 0x1000>;
 +                      reg-names = "vdo1_mutex";
 +                      interrupts = <GIC_SPI 494 IRQ_TYPE_LEVEL_HIGH 0>;
 +                      power-domains = <&spm MT8195_POWER_DOMAIN_VDOSYS1>;
 +                      clocks = <&vdosys1 CLK_VDO1_DISP_MUTEX>;
 +                      clock-names = "vdo1_mutex";
 +                      mediatek,gce-events = <CMDQ_EVENT_VDO1_STREAM_DONE_ENG_0>;
 +              };
 +
                larb2: larb@1c102000 {
                        compatible = "mediatek,mt8195-smi-larb";
                        reg = <0 0x1c102000 0 0x1000>;
                        power-domains = <&spm MT8195_POWER_DOMAIN_VDOSYS1>;
                };
  
 +              vdo1_rdma0: rdma@1c104000 {
 +                      compatible = "mediatek,mt8195-vdo1-rdma";
 +                      reg = <0 0x1c104000 0 0x1000>;
 +                      interrupts = <GIC_SPI 495 IRQ_TYPE_LEVEL_HIGH 0>;
 +                      clocks = <&vdosys1 CLK_VDO1_MDP_RDMA0>;
 +                      power-domains = <&spm MT8195_POWER_DOMAIN_VDOSYS1>;
 +                      iommus = <&iommu_vdo M4U_PORT_L2_MDP_RDMA0>;
 +                      mediatek,gce-client-reg = <&gce0 SUBSYS_1c10XXXX 0x4000 0x1000>;
 +              };
 +
 +              vdo1_rdma1: rdma@1c105000 {
 +                      compatible = "mediatek,mt8195-vdo1-rdma";
 +                      reg = <0 0x1c105000 0 0x1000>;
 +                      interrupts = <GIC_SPI 496 IRQ_TYPE_LEVEL_HIGH 0>;
 +                      clocks = <&vdosys1 CLK_VDO1_MDP_RDMA1>;
 +                      power-domains = <&spm MT8195_POWER_DOMAIN_VDOSYS1>;
 +                      iommus = <&iommu_vpp M4U_PORT_L3_MDP_RDMA1>;
 +                      mediatek,gce-client-reg = <&gce0 SUBSYS_1c10XXXX 0x5000 0x1000>;
 +              };
 +
 +              vdo1_rdma2: rdma@1c106000 {
 +                      compatible = "mediatek,mt8195-vdo1-rdma";
 +                      reg = <0 0x1c106000 0 0x1000>;
 +                      interrupts = <GIC_SPI 497 IRQ_TYPE_LEVEL_HIGH 0>;
 +                      clocks = <&vdosys1 CLK_VDO1_MDP_RDMA2>;
 +                      power-domains = <&spm MT8195_POWER_DOMAIN_VDOSYS1>;
 +                      iommus = <&iommu_vdo M4U_PORT_L2_MDP_RDMA2>;
 +                      mediatek,gce-client-reg = <&gce0 SUBSYS_1c10XXXX 0x6000 0x1000>;
 +              };
 +
 +              vdo1_rdma3: rdma@1c107000 {
 +                      compatible = "mediatek,mt8195-vdo1-rdma";
 +                      reg = <0 0x1c107000 0 0x1000>;
 +                      interrupts = <GIC_SPI 498 IRQ_TYPE_LEVEL_HIGH 0>;
 +                      clocks = <&vdosys1 CLK_VDO1_MDP_RDMA3>;
 +                      power-domains = <&spm MT8195_POWER_DOMAIN_VDOSYS1>;
 +                      iommus = <&iommu_vpp M4U_PORT_L3_MDP_RDMA3>;
 +                      mediatek,gce-client-reg = <&gce0 SUBSYS_1c10XXXX 0x7000 0x1000>;
 +              };
 +
 +              vdo1_rdma4: rdma@1c108000 {
 +                      compatible = "mediatek,mt8195-vdo1-rdma";
 +                      reg = <0 0x1c108000 0 0x1000>;
 +                      interrupts = <GIC_SPI 499 IRQ_TYPE_LEVEL_HIGH 0>;
 +                      clocks = <&vdosys1 CLK_VDO1_MDP_RDMA4>;
 +                      power-domains = <&spm MT8195_POWER_DOMAIN_VDOSYS1>;
 +                      iommus = <&iommu_vdo M4U_PORT_L2_MDP_RDMA4>;
 +                      mediatek,gce-client-reg = <&gce0 SUBSYS_1c10XXXX 0x8000 0x1000>;
 +              };
 +
 +              vdo1_rdma5: rdma@1c109000 {
 +                      compatible = "mediatek,mt8195-vdo1-rdma";
 +                      reg = <0 0x1c109000 0 0x1000>;
 +                      interrupts = <GIC_SPI 500 IRQ_TYPE_LEVEL_HIGH 0>;
 +                      clocks = <&vdosys1 CLK_VDO1_MDP_RDMA5>;
 +                      power-domains = <&spm MT8195_POWER_DOMAIN_VDOSYS1>;
 +                      iommus = <&iommu_vpp M4U_PORT_L3_MDP_RDMA5>;
 +                      mediatek,gce-client-reg = <&gce0 SUBSYS_1c10XXXX 0x9000 0x1000>;
 +              };
 +
 +              vdo1_rdma6: rdma@1c10a000 {
 +                      compatible = "mediatek,mt8195-vdo1-rdma";
 +                      reg = <0 0x1c10a000 0 0x1000>;
 +                      interrupts = <GIC_SPI 501 IRQ_TYPE_LEVEL_HIGH 0>;
 +                      clocks = <&vdosys1 CLK_VDO1_MDP_RDMA6>;
 +                      power-domains = <&spm MT8195_POWER_DOMAIN_VDOSYS1>;
 +                      iommus = <&iommu_vdo M4U_PORT_L2_MDP_RDMA6>;
 +                      mediatek,gce-client-reg = <&gce0 SUBSYS_1c10XXXX 0xa000 0x1000>;
 +              };
 +
 +              vdo1_rdma7: rdma@1c10b000 {
 +                      compatible = "mediatek,mt8195-vdo1-rdma";
 +                      reg = <0 0x1c10b000 0 0x1000>;
 +                      interrupts = <GIC_SPI 502 IRQ_TYPE_LEVEL_HIGH 0>;
 +                      clocks = <&vdosys1 CLK_VDO1_MDP_RDMA7>;
 +                      power-domains = <&spm MT8195_POWER_DOMAIN_VDOSYS1>;
 +                      iommus = <&iommu_vpp M4U_PORT_L3_MDP_RDMA7>;
 +                      mediatek,gce-client-reg = <&gce0 SUBSYS_1c10XXXX 0xb000 0x1000>;
 +              };
 +
 +              merge1: vpp-merge@1c10c000 {
 +                      compatible = "mediatek,mt8195-disp-merge";
 +                      reg = <0 0x1c10c000 0 0x1000>;
 +                      interrupts = <GIC_SPI 503 IRQ_TYPE_LEVEL_HIGH 0>;
 +                      clocks = <&vdosys1 CLK_VDO1_VPP_MERGE0>,
 +                               <&vdosys1 CLK_VDO1_MERGE0_DL_ASYNC>;
 +                      clock-names = "merge","merge_async";
 +                      power-domains = <&spm MT8195_POWER_DOMAIN_VDOSYS1>;
 +                      mediatek,gce-client-reg = <&gce0 SUBSYS_1c10XXXX 0xc000 0x1000>;
 +                      mediatek,merge-mute = <1>;
 +                      resets = <&vdosys1 MT8195_VDOSYS1_SW0_RST_B_MERGE0_DL_ASYNC>;
 +              };
 +
 +              merge2: vpp-merge@1c10d000 {
 +                      compatible = "mediatek,mt8195-disp-merge";
 +                      reg = <0 0x1c10d000 0 0x1000>;
 +                      interrupts = <GIC_SPI 504 IRQ_TYPE_LEVEL_HIGH 0>;
 +                      clocks = <&vdosys1 CLK_VDO1_VPP_MERGE1>,
 +                               <&vdosys1 CLK_VDO1_MERGE1_DL_ASYNC>;
 +                      clock-names = "merge","merge_async";
 +                      power-domains = <&spm MT8195_POWER_DOMAIN_VDOSYS1>;
 +                      mediatek,gce-client-reg = <&gce0 SUBSYS_1c10XXXX 0xd000 0x1000>;
 +                      mediatek,merge-mute = <1>;
 +                      resets = <&vdosys1 MT8195_VDOSYS1_SW0_RST_B_MERGE1_DL_ASYNC>;
 +              };
 +
 +              merge3: vpp-merge@1c10e000 {
 +                      compatible = "mediatek,mt8195-disp-merge";
 +                      reg = <0 0x1c10e000 0 0x1000>;
 +                      interrupts = <GIC_SPI 505 IRQ_TYPE_LEVEL_HIGH 0>;
 +                      clocks = <&vdosys1 CLK_VDO1_VPP_MERGE2>,
 +                               <&vdosys1 CLK_VDO1_MERGE2_DL_ASYNC>;
 +                      clock-names = "merge","merge_async";
 +                      power-domains = <&spm MT8195_POWER_DOMAIN_VDOSYS1>;
 +                      mediatek,gce-client-reg = <&gce0 SUBSYS_1c10XXXX 0xe000 0x1000>;
 +                      mediatek,merge-mute = <1>;
 +                      resets = <&vdosys1 MT8195_VDOSYS1_SW0_RST_B_MERGE2_DL_ASYNC>;
 +              };
 +
 +              merge4: vpp-merge@1c10f000 {
 +                      compatible = "mediatek,mt8195-disp-merge";
 +                      reg = <0 0x1c10f000 0 0x1000>;
 +                      interrupts = <GIC_SPI 506 IRQ_TYPE_LEVEL_HIGH 0>;
 +                      clocks = <&vdosys1 CLK_VDO1_VPP_MERGE3>,
 +                               <&vdosys1 CLK_VDO1_MERGE3_DL_ASYNC>;
 +                      clock-names = "merge","merge_async";
 +                      power-domains = <&spm MT8195_POWER_DOMAIN_VDOSYS1>;
 +                      mediatek,gce-client-reg = <&gce0 SUBSYS_1c10XXXX 0xf000 0x1000>;
 +                      mediatek,merge-mute = <1>;
 +                      resets = <&vdosys1 MT8195_VDOSYS1_SW0_RST_B_MERGE3_DL_ASYNC>;
 +              };
 +
 +              merge5: vpp-merge@1c110000 {
 +                      compatible = "mediatek,mt8195-disp-merge";
 +                      reg = <0 0x1c110000 0 0x1000>;
 +                      interrupts = <GIC_SPI 507 IRQ_TYPE_LEVEL_HIGH 0>;
 +                      clocks = <&vdosys1 CLK_VDO1_VPP_MERGE4>,
 +                               <&vdosys1 CLK_VDO1_MERGE4_DL_ASYNC>;
 +                      clock-names = "merge","merge_async";
 +                      power-domains = <&spm MT8195_POWER_DOMAIN_VDOSYS1>;
 +                      mediatek,gce-client-reg = <&gce0 SUBSYS_1c11XXXX 0x0000 0x1000>;
 +                      mediatek,merge-fifo-en = <1>;
 +                      resets = <&vdosys1 MT8195_VDOSYS1_SW0_RST_B_MERGE4_DL_ASYNC>;
 +              };
 +
                dp_intf1: dp-intf@1c113000 {
                        compatible = "mediatek,mt8195-dp-intf";
                        reg = <0 0x1c113000 0 0x1000>;
                        status = "disabled";
                };
  
 +              ethdr0: hdr-engine@1c114000 {
 +                      compatible = "mediatek,mt8195-disp-ethdr";
 +                      reg = <0 0x1c114000 0 0x1000>,
 +                            <0 0x1c115000 0 0x1000>,
 +                            <0 0x1c117000 0 0x1000>,
 +                            <0 0x1c119000 0 0x1000>,
 +                            <0 0x1c11a000 0 0x1000>,
 +                            <0 0x1c11b000 0 0x1000>,
 +                            <0 0x1c11c000 0 0x1000>;
 +                      reg-names = "mixer", "vdo_fe0", "vdo_fe1", "gfx_fe0", "gfx_fe1",
 +                                  "vdo_be", "adl_ds";
 +                      mediatek,gce-client-reg = <&gce0 SUBSYS_1c11XXXX 0x4000 0x1000>,
 +                                                <&gce0 SUBSYS_1c11XXXX 0x5000 0x1000>,
 +                                                <&gce0 SUBSYS_1c11XXXX 0x7000 0x1000>,
 +                                                <&gce0 SUBSYS_1c11XXXX 0x9000 0x1000>,
 +                                                <&gce0 SUBSYS_1c11XXXX 0xa000 0x1000>,
 +                                                <&gce0 SUBSYS_1c11XXXX 0xb000 0x1000>,
 +                                                <&gce0 SUBSYS_1c11XXXX 0xc000 0x1000>;
 +                      clocks = <&vdosys1 CLK_VDO1_DISP_MIXER>,
 +                               <&vdosys1 CLK_VDO1_HDR_VDO_FE0>,
 +                               <&vdosys1 CLK_VDO1_HDR_VDO_FE1>,
 +                               <&vdosys1 CLK_VDO1_HDR_GFX_FE0>,
 +                               <&vdosys1 CLK_VDO1_HDR_GFX_FE1>,
 +                               <&vdosys1 CLK_VDO1_HDR_VDO_BE>,
 +                               <&vdosys1 CLK_VDO1_26M_SLOW>,
 +                               <&vdosys1 CLK_VDO1_HDR_VDO_FE0_DL_ASYNC>,
 +                               <&vdosys1 CLK_VDO1_HDR_VDO_FE1_DL_ASYNC>,
 +                               <&vdosys1 CLK_VDO1_HDR_GFX_FE0_DL_ASYNC>,
 +                               <&vdosys1 CLK_VDO1_HDR_GFX_FE1_DL_ASYNC>,
 +                               <&vdosys1 CLK_VDO1_HDR_VDO_BE_DL_ASYNC>,
 +                               <&topckgen CLK_TOP_ETHDR>;
 +                      clock-names = "mixer", "vdo_fe0", "vdo_fe1", "gfx_fe0", "gfx_fe1",
 +                                    "vdo_be", "adl_ds", "vdo_fe0_async", "vdo_fe1_async",
 +                                    "gfx_fe0_async", "gfx_fe1_async","vdo_be_async",
 +                                    "ethdr_top";
 +                      power-domains = <&spm MT8195_POWER_DOMAIN_VDOSYS1>;
 +                      iommus = <&iommu_vpp M4U_PORT_L3_HDR_DS>,
 +                               <&iommu_vpp M4U_PORT_L3_HDR_ADL>;
 +                      interrupts = <GIC_SPI 517 IRQ_TYPE_LEVEL_HIGH 0>; /* disp mixer */
 +                      resets = <&vdosys1 MT8195_VDOSYS1_SW1_RST_B_HDR_VDO_FE0_DL_ASYNC>,
 +                               <&vdosys1 MT8195_VDOSYS1_SW1_RST_B_HDR_VDO_FE1_DL_ASYNC>,
 +                               <&vdosys1 MT8195_VDOSYS1_SW1_RST_B_HDR_GFX_FE0_DL_ASYNC>,
 +                               <&vdosys1 MT8195_VDOSYS1_SW1_RST_B_HDR_GFX_FE1_DL_ASYNC>,
 +                               <&vdosys1 MT8195_VDOSYS1_SW1_RST_B_HDR_VDO_BE_DL_ASYNC>;
 +                      reset-names = "vdo_fe0_async", "vdo_fe1_async", "gfx_fe0_async",
 +                                    "gfx_fe1_async", "vdo_be_async";
 +              };
 +
                edp_tx: edp-tx@1c500000 {
                        compatible = "mediatek,mt8195-edp-tx";
                        reg = <0 0x1c500000 0 0x8000>;
                        status = "disabled";
                };
        };
 +
 +      thermal_zones: thermal-zones {
 +              cpu0-thermal {
 +                      polling-delay = <1000>;
 +                      polling-delay-passive = <250>;
 +                      thermal-sensors = <&lvts_mcu MT8195_MCU_LITTLE_CPU0>;
 +
 +                      trips {
 +                              cpu0_alert: trip-alert {
 +                                      temperature = <85000>;
 +                                      hysteresis = <2000>;
 +                                      type = "passive";
 +                              };
 +
 +                              cpu0_crit: trip-crit {
 +                                      temperature = <100000>;
 +                                      hysteresis = <2000>;
 +                                      type = "critical";
 +                              };
 +                      };
 +
 +                      cooling-maps {
 +                              map0 {
 +                                      trip = <&cpu0_alert>;
 +                                      cooling-device = <&cpu0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
 +                                                              <&cpu1 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
 +                                                              <&cpu2 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
 +                                                              <&cpu3 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
 +                              };
 +                      };
 +              };
 +
 +              cpu1-thermal {
 +                      polling-delay = <1000>;
 +                      polling-delay-passive = <250>;
 +                      thermal-sensors = <&lvts_mcu MT8195_MCU_LITTLE_CPU1>;
 +
 +                      trips {
 +                              cpu1_alert: trip-alert {
 +                                      temperature = <85000>;
 +                                      hysteresis = <2000>;
 +                                      type = "passive";
 +                              };
 +
 +                              cpu1_crit: trip-crit {
 +                                      temperature = <100000>;
 +                                      hysteresis = <2000>;
 +                                      type = "critical";
 +                              };
 +                      };
 +
 +                      cooling-maps {
 +                              map0 {
 +                                      trip = <&cpu1_alert>;
 +                                      cooling-device = <&cpu0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
 +                                                              <&cpu1 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
 +                                                              <&cpu2 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
 +                                                              <&cpu3 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
 +                              };
 +                      };
 +              };
 +
 +              cpu2-thermal {
 +                      polling-delay = <1000>;
 +                      polling-delay-passive = <250>;
 +                      thermal-sensors = <&lvts_mcu MT8195_MCU_LITTLE_CPU2>;
 +
 +                      trips {
 +                              cpu2_alert: trip-alert {
 +                                      temperature = <85000>;
 +                                      hysteresis = <2000>;
 +                                      type = "passive";
 +                              };
 +
 +                              cpu2_crit: trip-crit {
 +                                      temperature = <100000>;
 +                                      hysteresis = <2000>;
 +                                      type = "critical";
 +                              };
 +                      };
 +
 +                      cooling-maps {
 +                              map0 {
 +                                      trip = <&cpu2_alert>;
 +                                      cooling-device = <&cpu0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
 +                                                              <&cpu1 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
 +                                                              <&cpu2 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
 +                                                              <&cpu3 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
 +                              };
 +                      };
 +              };
 +
 +              cpu3-thermal {
 +                      polling-delay = <1000>;
 +                      polling-delay-passive = <250>;
 +                      thermal-sensors = <&lvts_mcu MT8195_MCU_LITTLE_CPU3>;
 +
 +                      trips {
 +                              cpu3_alert: trip-alert {
 +                                      temperature = <85000>;
 +                                      hysteresis = <2000>;
 +                                      type = "passive";
 +                              };
 +
 +                              cpu3_crit: trip-crit {
 +                                      temperature = <100000>;
 +                                      hysteresis = <2000>;
 +                                      type = "critical";
 +                              };
 +                      };
 +
 +                      cooling-maps {
 +                              map0 {
 +                                      trip = <&cpu3_alert>;
 +                                      cooling-device = <&cpu0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
 +                                                              <&cpu1 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
 +                                                              <&cpu2 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
 +                                                              <&cpu3 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
 +                              };
 +                      };
 +              };
 +
 +              cpu4-thermal {
 +                      polling-delay = <1000>;
 +                      polling-delay-passive = <250>;
 +                      thermal-sensors = <&lvts_mcu MT8195_MCU_BIG_CPU0>;
 +
 +                      trips {
 +                              cpu4_alert: trip-alert {
 +                                      temperature = <85000>;
 +                                      hysteresis = <2000>;
 +                                      type = "passive";
 +                              };
 +
 +                              cpu4_crit: trip-crit {
 +                                      temperature = <100000>;
 +                                      hysteresis = <2000>;
 +                                      type = "critical";
 +                              };
 +                      };
 +
 +                      cooling-maps {
 +                              map0 {
 +                                      trip = <&cpu4_alert>;
 +                                      cooling-device = <&cpu4 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
 +                                                              <&cpu5 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
 +                                                              <&cpu6 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
 +                                                              <&cpu7 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
 +                              };
 +                      };
 +              };
 +
 +              cpu5-thermal {
 +                      polling-delay = <1000>;
 +                      polling-delay-passive = <250>;
 +                      thermal-sensors = <&lvts_mcu MT8195_MCU_BIG_CPU1>;
 +
 +                      trips {
 +                              cpu5_alert: trip-alert {
 +                                      temperature = <85000>;
 +                                      hysteresis = <2000>;
 +                                      type = "passive";
 +                              };
 +
 +                              cpu5_crit: trip-crit {
 +                                      temperature = <100000>;
 +                                      hysteresis = <2000>;
 +                                      type = "critical";
 +                              };
 +                      };
 +
 +                      cooling-maps {
 +                              map0 {
 +                                      trip = <&cpu5_alert>;
 +                                      cooling-device = <&cpu4 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
 +                                                              <&cpu5 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
 +                                                              <&cpu6 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
 +                                                              <&cpu7 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
 +                              };
 +                      };
 +              };
 +
 +              cpu6-thermal {
 +                      polling-delay = <1000>;
 +                      polling-delay-passive = <250>;
 +                      thermal-sensors = <&lvts_mcu MT8195_MCU_BIG_CPU2>;
 +
 +                      trips {
 +                              cpu6_alert: trip-alert {
 +                                      temperature = <85000>;
 +                                      hysteresis = <2000>;
 +                                      type = "passive";
 +                              };
 +
 +                              cpu6_crit: trip-crit {
 +                                      temperature = <100000>;
 +                                      hysteresis = <2000>;
 +                                      type = "critical";
 +                              };
 +                      };
 +
 +                      cooling-maps {
 +                              map0 {
 +                                      trip = <&cpu6_alert>;
 +                                      cooling-device = <&cpu4 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
 +                                                              <&cpu5 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
 +                                                              <&cpu6 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
 +                                                              <&cpu7 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
 +                              };
 +                      };
 +              };
 +
 +              cpu7-thermal {
 +                      polling-delay = <1000>;
 +                      polling-delay-passive = <250>;
 +                      thermal-sensors = <&lvts_mcu MT8195_MCU_BIG_CPU3>;
 +
 +                      trips {
 +                              cpu7_alert: trip-alert {
 +                                      temperature = <85000>;
 +                                      hysteresis = <2000>;
 +                                      type = "passive";
 +                              };
 +
 +                              cpu7_crit: trip-crit {
 +                                      temperature = <100000>;
 +                                      hysteresis = <2000>;
 +                                      type = "critical";
 +                              };
 +                      };
 +
 +                      cooling-maps {
 +                              map0 {
 +                                      trip = <&cpu7_alert>;
 +                                      cooling-device = <&cpu4 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
 +                                                              <&cpu5 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
 +                                                              <&cpu6 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
 +                                                              <&cpu7 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
 +                              };
 +                      };
 +              };
 +      };
  };
index 223b223f713ff1dac8afbb572f62aa988d1a6747,bb65a68b4b49968c6d33ea9324b37c3a242e1458..3d181c16a2f67fb5f93dc221d7583b145f576578
@@@ -39,6 -39,6 +39,7 @@@
  #include <linux/io.h>
  #include <linux/ftrace.h>
  #include <linux/syscalls.h>
++#include <linux/iommu.h>
  
  #include <asm/processor.h>
  #include <asm/pkru.h>
@@@ -671,7 -671,7 +672,7 @@@ void set_personality_64bit(void
        task_pt_regs(current)->orig_ax = __NR_execve;
        current_thread_info()->status &= ~TS_COMPAT;
        if (current->mm)
 -              current->mm->context.flags = MM_CONTEXT_HAS_VSYSCALL;
 +              __set_bit(MM_CONTEXT_HAS_VSYSCALL, &current->mm->context.flags);
  
        /* TBD: overwrites user setup. Should have two bits.
           But 64bit processes have always behaved this way,
@@@ -708,7 -708,7 +709,7 @@@ static void __set_personality_ia32(void
                 * uprobes applied to this MM need to know this and
                 * cannot use user_64bit_mode() at that time.
                 */
 -              current->mm->context.flags = MM_CONTEXT_UPROBE_IA32;
 +              __set_bit(MM_CONTEXT_UPROBE_IA32, &current->mm->context.flags);
        }
  
        current->personality |= force_personality32;
@@@ -743,52 -743,6 +744,52 @@@ static long prctl_map_vdso(const struc
  }
  #endif
  
 +#ifdef CONFIG_ADDRESS_MASKING
 +
 +#define LAM_U57_BITS 6
 +
 +static int prctl_enable_tagged_addr(struct mm_struct *mm, unsigned long nr_bits)
 +{
 +      if (!cpu_feature_enabled(X86_FEATURE_LAM))
 +              return -ENODEV;
 +
 +      /* PTRACE_ARCH_PRCTL */
 +      if (current->mm != mm)
 +              return -EINVAL;
 +
 +      if (mm_valid_pasid(mm) &&
 +          !test_bit(MM_CONTEXT_FORCE_TAGGED_SVA, &mm->context.flags))
 +              return -EINVAL;
 +
 +      if (mmap_write_lock_killable(mm))
 +              return -EINTR;
 +
 +      if (test_bit(MM_CONTEXT_LOCK_LAM, &mm->context.flags)) {
 +              mmap_write_unlock(mm);
 +              return -EBUSY;
 +      }
 +
 +      if (!nr_bits) {
 +              mmap_write_unlock(mm);
 +              return -EINVAL;
 +      } else if (nr_bits <= LAM_U57_BITS) {
 +              mm->context.lam_cr3_mask = X86_CR3_LAM_U57;
 +              mm->context.untag_mask =  ~GENMASK(62, 57);
 +      } else {
 +              mmap_write_unlock(mm);
 +              return -EINVAL;
 +      }
 +
 +      write_cr3(__read_cr3() | mm->context.lam_cr3_mask);
 +      set_tlbstate_lam_mode(mm);
 +      set_bit(MM_CONTEXT_LOCK_LAM, &mm->context.flags);
 +
 +      mmap_write_unlock(mm);
 +
 +      return 0;
 +}
 +#endif
 +
  long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2)
  {
        int ret = 0;
        case ARCH_MAP_VDSO_64:
                return prctl_map_vdso(&vdso_image_64, arg2);
  #endif
 -
 +#ifdef CONFIG_ADDRESS_MASKING
 +      case ARCH_GET_UNTAG_MASK:
 +              return put_user(task->mm->context.untag_mask,
 +                              (unsigned long __user *)arg2);
 +      case ARCH_ENABLE_TAGGED_ADDR:
 +              return prctl_enable_tagged_addr(task->mm, arg2);
 +      case ARCH_FORCE_TAGGED_SVA:
 +              if (current != task)
 +                      return -EINVAL;
 +              set_bit(MM_CONTEXT_FORCE_TAGGED_SVA, &task->mm->context.flags);
 +              return 0;
 +      case ARCH_GET_MAX_TAG_BITS:
 +              if (!cpu_feature_enabled(X86_FEATURE_LAM))
 +                      return put_user(0, (unsigned long __user *)arg2);
 +              else
 +                      return put_user(LAM_U57_BITS, (unsigned long __user *)arg2);
 +#endif
        default:
                ret = -EINVAL;
                break;
diff --combined arch/x86/kernel/traps.c
index 8b83d8fbce718024e284d3eb991099f0c6e24638,492a60febb1119dcb80e29d5dd9f4b6d6914e86c..58b1f208eff5186aae3b08825f1d0581b47ba275
@@@ -40,7 -40,7 +40,7 @@@
  #include <linux/io.h>
  #include <linux/hardirq.h>
  #include <linux/atomic.h>
- #include <linux/ioasid.h>
+ #include <linux/iommu.h>
  
  #include <asm/stacktrace.h>
  #include <asm/processor.h>
@@@ -671,15 -671,15 +671,15 @@@ static bool try_fixup_enqcmd_gp(void
        if (!cpu_feature_enabled(X86_FEATURE_ENQCMD))
                return false;
  
 -      pasid = current->mm->pasid;
 -
        /*
         * If the mm has not been allocated a
         * PASID, the #GP can not be fixed up.
         */
 -      if (!pasid_valid(pasid))
 +      if (!mm_valid_pasid(current->mm))
                return false;
  
 +      pasid = current->mm->pasid;
 +
        /*
         * Did this thread already have its PASID activated?
         * If so, the #GP must be from something else.
index 12e1e90fdae138647eab7c004e080fc34fa5da5b,12e1e90fdae138647eab7c004e080fc34fa5da5b..2e56bd79f589d30c2787e695b5c93750fcb480e0
@@@ -18,7 -18,7 +18,6 @@@ config INTEL_IOMM
        select NEED_DMA_MAP_STATE
        select DMAR_TABLE
        select SWIOTLB
--      select IOASID
        select PCI_ATS
        select PCI_PRI
        select PCI_PASID
index dd76a1a09cf708ce7f92ed8396531e0d20ec89d2,c434b95dc8ebb1fd65f308df50a8b192331f65f3..9821bc44f5ac1d4d5e85175f57963081734464fe
@@@ -2,7 -2,6 +2,7 @@@
  /*
   * Helpers for IOMMU drivers implementing SVA
   */
 +#include <linux/mmu_context.h>
  #include <linux/mutex.h>
  #include <linux/sched/mm.h>
  #include <linux/iommu.h>
  #include "iommu-sva.h"
  
  static DEFINE_MUTEX(iommu_sva_lock);
- static DECLARE_IOASID_SET(iommu_sva_pasid);
+ static DEFINE_IDA(iommu_global_pasid_ida);
  
- /**
-  * iommu_sva_alloc_pasid - Allocate a PASID for the mm
-  * @mm: the mm
-  * @min: minimum PASID value (inclusive)
-  * @max: maximum PASID value (inclusive)
-  *
-  * Try to allocate a PASID for this mm, or take a reference to the existing one
-  * provided it fits within the [@min, @max] range. On success the PASID is
-  * available in mm->pasid and will be available for the lifetime of the mm.
-  *
-  * Returns 0 on success and < 0 on error.
-  */
- int iommu_sva_alloc_pasid(struct mm_struct *mm, ioasid_t min, ioasid_t max)
+ /* Allocate a PASID for the mm within range (inclusive) */
+ static int iommu_sva_alloc_pasid(struct mm_struct *mm, ioasid_t min, ioasid_t max)
  {
        int ret = 0;
-       ioasid_t pasid;
  
-       if (min == INVALID_IOASID || max == INVALID_IOASID ||
 -      if (!pasid_valid(min) || !pasid_valid(max) ||
++      if (min == IOMMU_PASID_INVALID ||
++          max == IOMMU_PASID_INVALID ||
            min == 0 || max < min)
                return -EINVAL;
  
 +      if (!arch_pgtable_dma_compat(mm))
 +              return -EBUSY;
 +
        mutex_lock(&iommu_sva_lock);
        /* Is a PASID already associated with this mm? */
 -      if (pasid_valid(mm->pasid)) {
 +      if (mm_valid_pasid(mm)) {
-               if (mm->pasid < min || mm->pasid >= max)
+               if (mm->pasid < min || mm->pasid > max)
                        ret = -EOVERFLOW;
                goto out;
        }
  
-       pasid = ioasid_alloc(&iommu_sva_pasid, min, max, mm);
-       if (pasid == INVALID_IOASID)
-               ret = -ENOMEM;
-       else
-               mm_pasid_set(mm, pasid);
+       ret = ida_alloc_range(&iommu_global_pasid_ida, min, max, GFP_KERNEL);
+       if (ret < min)
+               goto out;
+       mm->pasid = ret;
+       ret = 0;
  out:
        mutex_unlock(&iommu_sva_lock);
        return ret;
  }
- EXPORT_SYMBOL_GPL(iommu_sva_alloc_pasid);
- /* ioasid_find getter() requires a void * argument */
- static bool __mmget_not_zero(void *mm)
- {
-       return mmget_not_zero(mm);
- }
- /**
-  * iommu_sva_find() - Find mm associated to the given PASID
-  * @pasid: Process Address Space ID assigned to the mm
-  *
-  * On success a reference to the mm is taken, and must be released with mmput().
-  *
-  * Returns the mm corresponding to this PASID, or an error if not found.
-  */
- struct mm_struct *iommu_sva_find(ioasid_t pasid)
- {
-       return ioasid_find(&iommu_sva_pasid, pasid, __mmget_not_zero);
- }
- EXPORT_SYMBOL_GPL(iommu_sva_find);
  
  /**
   * iommu_sva_bind_device() - Bind a process address space to a device
@@@ -242,3 -205,11 +210,11 @@@ out_put_mm
  
        return status;
  }
 -      if (likely(!pasid_valid(mm->pasid)))
+ void mm_pasid_drop(struct mm_struct *mm)
+ {
++      if (likely(!mm_valid_pasid(mm)))
+               return;
+       ida_free(&iommu_global_pasid_ida, mm->pasid);
+ }
diff --combined drivers/iommu/iommu.c
index 807c98de40d4f681045ef03af4e0f805642350a3,153a3dab568c63a75bb71f6830b85846a1e59992..f1dcfa3f1a1b48ab8a48bb6976f524e3dc4c29e0
@@@ -28,7 -28,6 +28,7 @@@
  #include <linux/fsl/mc.h>
  #include <linux/module.h>
  #include <linux/cc_platform.h>
 +#include <linux/cdx/cdx_bus.h>
  #include <trace/events/iommu.h>
  #include <linux/sched/mm.h>
  #include <linux/msi.h>
@@@ -88,9 -87,10 +88,10 @@@ static const char * const iommu_group_r
  
  static int iommu_bus_notifier(struct notifier_block *nb,
                              unsigned long action, void *data);
+ static void iommu_release_device(struct device *dev);
  static int iommu_alloc_default_domain(struct iommu_group *group,
                                      struct device *dev);
 -static struct iommu_domain *__iommu_domain_alloc(struct bus_type *bus,
 +static struct iommu_domain *__iommu_domain_alloc(const struct bus_type *bus,
                                                 unsigned type);
  static int __iommu_attach_device(struct iommu_domain *domain,
                                 struct device *dev);
@@@ -130,9 -130,6 +131,9 @@@ static struct bus_type * const iommu_bu
  #ifdef CONFIG_TEGRA_HOST1X_CONTEXT_BUS
        &host1x_context_device_bus_type,
  #endif
 +#ifdef CONFIG_CDX_BUS
 +      &cdx_bus_type,
 +#endif
  };
  
  /*
@@@ -457,20 -454,86 +458,86 @@@ err_out
  
  }
  
- void iommu_release_device(struct device *dev)
+ /*
+  * Remove a device from a group's device list and return the group device
+  * if successful.
+  */
+ static struct group_device *
+ __iommu_group_remove_device(struct iommu_group *group, struct device *dev)
  {
+       struct group_device *device;
+       lockdep_assert_held(&group->mutex);
+       list_for_each_entry(device, &group->devices, list) {
+               if (device->dev == dev) {
+                       list_del(&device->list);
+                       return device;
+               }
+       }
+       return NULL;
+ }
+ /*
+  * Release a device from its group and decrements the iommu group reference
+  * count.
+  */
+ static void __iommu_group_release_device(struct iommu_group *group,
+                                        struct group_device *grp_dev)
+ {
+       struct device *dev = grp_dev->dev;
+       sysfs_remove_link(group->devices_kobj, grp_dev->name);
+       sysfs_remove_link(&dev->kobj, "iommu_group");
+       trace_remove_device_from_group(group->id, dev);
+       kfree(grp_dev->name);
+       kfree(grp_dev);
+       dev->iommu_group = NULL;
+       kobject_put(group->devices_kobj);
+ }
+ static void iommu_release_device(struct device *dev)
+ {
+       struct iommu_group *group = dev->iommu_group;
+       struct group_device *device;
        const struct iommu_ops *ops;
  
-       if (!dev->iommu)
+       if (!dev->iommu || !group)
                return;
  
        iommu_device_unlink(dev->iommu->iommu_dev, dev);
  
+       mutex_lock(&group->mutex);
+       device = __iommu_group_remove_device(group, dev);
+       /*
+        * If the group has become empty then ownership must have been released,
+        * and the current domain must be set back to NULL or the default
+        * domain.
+        */
+       if (list_empty(&group->devices))
+               WARN_ON(group->owner_cnt ||
+                       group->domain != group->default_domain);
+       /*
+        * release_device() must stop using any attached domain on the device.
+        * If there are still other devices in the group they are not effected
+        * by this callback.
+        *
+        * The IOMMU driver must set the device to either an identity or
+        * blocking translation and stop using any domain pointer, as it is
+        * going to be freed.
+        */
        ops = dev_iommu_ops(dev);
        if (ops->release_device)
                ops->release_device(dev);
+       mutex_unlock(&group->mutex);
+       if (device)
+               __iommu_group_release_device(group, device);
  
-       iommu_group_remove_device(dev);
        module_put(ops->owner);
        dev_iommu_free(dev);
  }
@@@ -554,7 -617,7 +621,7 @@@ static void iommu_group_remove_file(str
  
  static ssize_t iommu_group_show_name(struct iommu_group *group, char *buf)
  {
-       return sprintf(buf, "%s\n", group->name);
+       return sysfs_emit(buf, "%s\n", group->name);
  }
  
  /**
@@@ -667,52 -730,51 +734,51 @@@ static ssize_t iommu_group_show_resv_re
  {
        struct iommu_resv_region *region, *next;
        struct list_head group_resv_regions;
-       char *str = buf;
+       int offset = 0;
  
        INIT_LIST_HEAD(&group_resv_regions);
        iommu_get_group_resv_regions(group, &group_resv_regions);
  
        list_for_each_entry_safe(region, next, &group_resv_regions, list) {
-               str += sprintf(str, "0x%016llx 0x%016llx %s\n",
-                              (long long int)region->start,
-                              (long long int)(region->start +
-                                               region->length - 1),
-                              iommu_group_resv_type_string[region->type]);
+               offset += sysfs_emit_at(buf, offset, "0x%016llx 0x%016llx %s\n",
+                                       (long long)region->start,
+                                       (long long)(region->start +
+                                                   region->length - 1),
+                                       iommu_group_resv_type_string[region->type]);
                kfree(region);
        }
  
-       return (str - buf);
+       return offset;
  }
  
  static ssize_t iommu_group_show_type(struct iommu_group *group,
                                     char *buf)
  {
-       char *type = "unknown\n";
+       char *type = "unknown";
  
        mutex_lock(&group->mutex);
        if (group->default_domain) {
                switch (group->default_domain->type) {
                case IOMMU_DOMAIN_BLOCKED:
-                       type = "blocked\n";
+                       type = "blocked";
                        break;
                case IOMMU_DOMAIN_IDENTITY:
-                       type = "identity\n";
+                       type = "identity";
                        break;
                case IOMMU_DOMAIN_UNMANAGED:
-                       type = "unmanaged\n";
+                       type = "unmanaged";
                        break;
                case IOMMU_DOMAIN_DMA:
-                       type = "DMA\n";
+                       type = "DMA";
                        break;
                case IOMMU_DOMAIN_DMA_FQ:
-                       type = "DMA-FQ\n";
+                       type = "DMA-FQ";
                        break;
                }
        }
        mutex_unlock(&group->mutex);
-       strcpy(buf, type);
  
-       return strlen(type);
+       return sysfs_emit(buf, "%s\n", type);
  }
  
  static IOMMU_GROUP_ATTR(name, S_IRUGO, iommu_group_show_name, NULL);
@@@ -743,7 -805,7 +809,7 @@@ static void iommu_group_release(struct 
        kfree(group);
  }
  
- static struct kobj_type iommu_group_ktype = {
+ static const struct kobj_type iommu_group_ktype = {
        .sysfs_ops = &iommu_group_sysfs_ops,
        .release = iommu_group_release,
  };
@@@ -820,35 -882,6 +886,6 @@@ struct iommu_group *iommu_group_alloc(v
  }
  EXPORT_SYMBOL_GPL(iommu_group_alloc);
  
- struct iommu_group *iommu_group_get_by_id(int id)
- {
-       struct kobject *group_kobj;
-       struct iommu_group *group;
-       const char *name;
-       if (!iommu_group_kset)
-               return NULL;
-       name = kasprintf(GFP_KERNEL, "%d", id);
-       if (!name)
-               return NULL;
-       group_kobj = kset_find_obj(iommu_group_kset, name);
-       kfree(name);
-       if (!group_kobj)
-               return NULL;
-       group = container_of(group_kobj, struct iommu_group, kobj);
-       BUG_ON(group->id != id);
-       kobject_get(group->devices_kobj);
-       kobject_put(&group->kobj);
-       return group;
- }
- EXPORT_SYMBOL_GPL(iommu_group_get_by_id);
  /**
   * iommu_group_get_iommudata - retrieve iommu_data registered for a group
   * @group: the group
@@@ -1072,7 -1105,7 +1109,7 @@@ EXPORT_SYMBOL_GPL(iommu_group_add_devic
  void iommu_group_remove_device(struct device *dev)
  {
        struct iommu_group *group = dev->iommu_group;
-       struct group_device *tmp_device, *device = NULL;
+       struct group_device *device;
  
        if (!group)
                return;
        dev_info(dev, "Removing from iommu group %d\n", group->id);
  
        mutex_lock(&group->mutex);
-       list_for_each_entry(tmp_device, &group->devices, list) {
-               if (tmp_device->dev == dev) {
-                       device = tmp_device;
-                       list_del(&device->list);
-                       break;
-               }
-       }
+       device = __iommu_group_remove_device(group, dev);
        mutex_unlock(&group->mutex);
  
-       if (!device)
-               return;
-       sysfs_remove_link(group->devices_kobj, device->name);
-       sysfs_remove_link(&dev->kobj, "iommu_group");
-       trace_remove_device_from_group(group->id, dev);
-       kfree(device->name);
-       kfree(device);
-       dev->iommu_group = NULL;
-       kobject_put(group->devices_kobj);
+       if (device)
+               __iommu_group_release_device(group, device);
  }
  EXPORT_SYMBOL_GPL(iommu_group_remove_device);
  
@@@ -1635,7 -1652,7 +1656,7 @@@ static int iommu_get_def_domain_type(st
        return 0;
  }
  
 -static int iommu_group_alloc_default_domain(struct bus_type *bus,
 +static int iommu_group_alloc_default_domain(const struct bus_type *bus,
                                            struct iommu_group *group,
                                            unsigned int type)
  {
@@@ -1781,7 -1798,7 +1802,7 @@@ static int probe_get_default_domain_typ
        return 0;
  }
  
 -static void probe_alloc_default_domain(struct bus_type *bus,
 +static void probe_alloc_default_domain(const struct bus_type *bus,
                                       struct iommu_group *group)
  {
        struct __group_domain_type gtype;
@@@ -1836,7 -1853,7 +1857,7 @@@ static int iommu_group_create_direct_ma
                                          iommu_do_create_direct_mappings);
  }
  
 -int bus_iommu_probe(struct bus_type *bus)
 +int bus_iommu_probe(const struct bus_type *bus)
  {
        struct iommu_group *group, *next;
        LIST_HEAD(group_list);
        return ret;
  }
  
 -bool iommu_present(struct bus_type *bus)
 +bool iommu_present(const struct bus_type *bus)
  {
        return bus->iommu_ops != NULL;
  }
@@@ -1955,7 -1972,7 +1976,7 @@@ void iommu_set_fault_handler(struct iom
  }
  EXPORT_SYMBOL_GPL(iommu_set_fault_handler);
  
 -static struct iommu_domain *__iommu_domain_alloc(struct bus_type *bus,
 +static struct iommu_domain *__iommu_domain_alloc(const struct bus_type *bus,
                                                 unsigned type)
  {
        struct iommu_domain *domain;
                return NULL;
  
        domain->type = type;
-       /* Assume all sizes by default; the driver may override this later */
-       domain->pgsize_bitmap = bus->iommu_ops->pgsize_bitmap;
+       /*
+        * If not already set, assume all sizes by default; the driver
+        * may override this later
+        */
+       if (!domain->pgsize_bitmap)
+               domain->pgsize_bitmap = bus->iommu_ops->pgsize_bitmap;
        if (!domain->ops)
                domain->ops = bus->iommu_ops->default_domain_ops;
  
        return domain;
  }
  
 -struct iommu_domain *iommu_domain_alloc(struct bus_type *bus)
 +struct iommu_domain *iommu_domain_alloc(const struct bus_type *bus)
  {
        return __iommu_domain_alloc(bus, IOMMU_DOMAIN_UNMANAGED);
  }
@@@ -2821,11 -2843,10 +2847,10 @@@ int iommu_dev_disable_feature(struct de
  EXPORT_SYMBOL_GPL(iommu_dev_disable_feature);
  
  /*
-  * Changes the default domain of an iommu group that has *only* one device
+  * Changes the default domain of an iommu group
   *
   * @group: The group for which the default domain should be changed
-  * @prev_dev: The device in the group (this is used to make sure that the device
-  *     hasn't changed after the caller has called this function)
+  * @dev: The first device in the group
   * @type: The type of the new default domain that gets associated with the group
   *
   * Returns 0 on success and error code on failure
   *    Please take a closer look if intended to use for other purposes.
   */
  static int iommu_change_dev_def_domain(struct iommu_group *group,
-                                      struct device *prev_dev, int type)
+                                      struct device *dev, int type)
  {
+       struct __group_domain_type gtype = {NULL, 0};
        struct iommu_domain *prev_dom;
-       struct group_device *grp_dev;
-       int ret, dev_def_dom;
-       struct device *dev;
-       mutex_lock(&group->mutex);
-       if (group->default_domain != group->domain) {
-               dev_err_ratelimited(prev_dev, "Group not assigned to default domain\n");
-               ret = -EBUSY;
-               goto out;
-       }
-       /*
-        * iommu group wasn't locked while acquiring device lock in
-        * iommu_group_store_type(). So, make sure that the device count hasn't
-        * changed while acquiring device lock.
-        *
-        * Changing default domain of an iommu group with two or more devices
-        * isn't supported because there could be a potential deadlock. Consider
-        * the following scenario. T1 is trying to acquire device locks of all
-        * the devices in the group and before it could acquire all of them,
-        * there could be another thread T2 (from different sub-system and use
-        * case) that has already acquired some of the device locks and might be
-        * waiting for T1 to release other device locks.
-        */
-       if (iommu_group_device_count(group) != 1) {
-               dev_err_ratelimited(prev_dev, "Cannot change default domain: Group has more than one device\n");
-               ret = -EINVAL;
-               goto out;
-       }
+       int ret;
  
-       /* Since group has only one device */
-       grp_dev = list_first_entry(&group->devices, struct group_device, list);
-       dev = grp_dev->dev;
-       if (prev_dev != dev) {
-               dev_err_ratelimited(prev_dev, "Cannot change default domain: Device has been changed\n");
-               ret = -EBUSY;
-               goto out;
-       }
+       lockdep_assert_held(&group->mutex);
  
        prev_dom = group->default_domain;
-       if (!prev_dom) {
-               ret = -EINVAL;
-               goto out;
-       }
-       dev_def_dom = iommu_get_def_domain_type(dev);
+       __iommu_group_for_each_dev(group, &gtype,
+                                  probe_get_default_domain_type);
        if (!type) {
                /*
                 * If the user hasn't requested any specific type of domain and
                 * if the device supports both the domains, then default to the
                 * domain the device was booted with
                 */
-               type = dev_def_dom ? : iommu_def_domain_type;
-       } else if (dev_def_dom && type != dev_def_dom) {
-               dev_err_ratelimited(prev_dev, "Device cannot be in %s domain\n",
+               type = gtype.type ? : iommu_def_domain_type;
+       } else if (gtype.type && type != gtype.type) {
+               dev_err_ratelimited(dev, "Device cannot be in %s domain\n",
                                    iommu_domain_type_str(type));
-               ret = -EINVAL;
-               goto out;
+               return -EINVAL;
        }
  
        /*
         * Switch to a new domain only if the requested domain type is different
         * from the existing default domain type
         */
-       if (prev_dom->type == type) {
-               ret = 0;
-               goto out;
-       }
+       if (prev_dom->type == type)
+               return 0;
  
-       /* We can bring up a flush queue without tearing down the domain */
-       if (type == IOMMU_DOMAIN_DMA_FQ && prev_dom->type == IOMMU_DOMAIN_DMA) {
-               ret = iommu_dma_init_fq(prev_dom);
-               if (!ret)
-                       prev_dom->type = IOMMU_DOMAIN_DMA_FQ;
-               goto out;
-       }
+       group->default_domain = NULL;
+       group->domain = NULL;
  
        /* Sets group->default_domain to the newly allocated domain */
        ret = iommu_group_alloc_default_domain(dev->bus, group, type);
        if (ret)
-               goto out;
+               goto restore_old_domain;
  
-       ret = iommu_create_device_direct_mappings(group, dev);
+       ret = iommu_group_create_direct_mappings(group);
        if (ret)
                goto free_new_domain;
  
-       ret = __iommu_attach_device(group->default_domain, dev);
+       ret = __iommu_attach_group(group->default_domain, group);
        if (ret)
                goto free_new_domain;
  
-       group->domain = group->default_domain;
-       /*
-        * Release the mutex here because ops->probe_finalize() call-back of
-        * some vendor IOMMU drivers calls arm_iommu_attach_device() which
-        * in-turn might call back into IOMMU core code, where it tries to take
-        * group->mutex, resulting in a deadlock.
-        */
-       mutex_unlock(&group->mutex);
-       /* Make sure dma_ops is appropriatley set */
-       iommu_group_do_probe_finalize(dev, group->default_domain);
        iommu_domain_free(prev_dom);
        return 0;
  
  free_new_domain:
        iommu_domain_free(group->default_domain);
+ restore_old_domain:
        group->default_domain = prev_dom;
        group->domain = prev_dom;
  
- out:
-       mutex_unlock(&group->mutex);
        return ret;
  }
  
   * transition. Return failure if this isn't met.
   *
   * We need to consider the race between this and the device release path.
-  * device_lock(dev) is used here to guarantee that the device release path
+  * group->mutex is used here to guarantee that the device release path
   * will not be entered at the same time.
   */
  static ssize_t iommu_group_store_type(struct iommu_group *group,
        else
                return -EINVAL;
  
-       /*
-        * Lock/Unlock the group mutex here before device lock to
-        * 1. Make sure that the iommu group has only one device (this is a
-        *    prerequisite for step 2)
-        * 2. Get struct *dev which is needed to lock device
-        */
        mutex_lock(&group->mutex);
-       if (iommu_group_device_count(group) != 1) {
+       /* We can bring up a flush queue without tearing down the domain. */
+       if (req_type == IOMMU_DOMAIN_DMA_FQ &&
+           group->default_domain->type == IOMMU_DOMAIN_DMA) {
+               ret = iommu_dma_init_fq(group->default_domain);
+               if (!ret)
+                       group->default_domain->type = IOMMU_DOMAIN_DMA_FQ;
                mutex_unlock(&group->mutex);
-               pr_err_ratelimited("Cannot change default domain: Group has more than one device\n");
-               return -EINVAL;
+               return ret ?: count;
+       }
+       /* Otherwise, ensure that device exists and no driver is bound. */
+       if (list_empty(&group->devices) || group->owner_cnt) {
+               mutex_unlock(&group->mutex);
+               return -EPERM;
        }
  
-       /* Since group has only one device */
        grp_dev = list_first_entry(&group->devices, struct group_device, list);
        dev = grp_dev->dev;
-       get_device(dev);
+       ret = iommu_change_dev_def_domain(group, dev, req_type);
  
        /*
-        * Don't hold the group mutex because taking group mutex first and then
-        * the device lock could potentially cause a deadlock as below. Assume
-        * two threads T1 and T2. T1 is trying to change default domain of an
-        * iommu group and T2 is trying to hot unplug a device or release [1] VF
-        * of a PCIe device which is in the same iommu group. T1 takes group
-        * mutex and before it could take device lock assume T2 has taken device
-        * lock and is yet to take group mutex. Now, both the threads will be
-        * waiting for the other thread to release lock. Below, lock order was
-        * suggested.
-        * device_lock(dev);
-        *      mutex_lock(&group->mutex);
-        *              iommu_change_dev_def_domain();
-        *      mutex_unlock(&group->mutex);
-        * device_unlock(dev);
-        *
-        * [1] Typical device release path
-        * device_lock() from device/driver core code
-        *  -> bus_notifier()
-        *   -> iommu_bus_notifier()
-        *    -> iommu_release_device()
-        *     -> ops->release_device() vendor driver calls back iommu core code
-        *      -> mutex_lock() from iommu core code
+        * Release the mutex here because ops->probe_finalize() call-back of
+        * some vendor IOMMU drivers calls arm_iommu_attach_device() which
+        * in-turn might call back into IOMMU core code, where it tries to take
+        * group->mutex, resulting in a deadlock.
         */
        mutex_unlock(&group->mutex);
  
-       /* Check if the device in the group still has a driver bound to it */
-       device_lock(dev);
-       if (device_is_bound(dev) && !(req_type == IOMMU_DOMAIN_DMA_FQ &&
-           group->default_domain->type == IOMMU_DOMAIN_DMA)) {
-               pr_err_ratelimited("Device is still bound to driver\n");
-               ret = -EBUSY;
-               goto out;
-       }
-       ret = iommu_change_dev_def_domain(group, dev, req_type);
-       ret = ret ?: count;
- out:
-       device_unlock(dev);
-       put_device(dev);
+       /* Make sure dma_ops is appropriatley set */
+       if (!ret)
+               __iommu_group_dma_finalize(group);
  
-       return ret;
+       return ret ?: count;
  }
  
  static bool iommu_is_default_domain(struct iommu_group *group)
index 6c7c5f3648df81197c3088570c5ada45fa1ee96c,9b96d243631199b8c50e1e56d28c1ff9a30bd4f9..0051f372a66cfea26f11f390bb71a8c90a51430d
@@@ -1004,8 -1004,8 +1004,8 @@@ static void mtk_jpegenc_worker(struct w
  retry_select:
        hw_id = mtk_jpegenc_get_hw(ctx);
        if (hw_id < 0) {
 -              ret = wait_event_interruptible(jpeg->enc_hw_wq,
 -                                             atomic_read(&jpeg->enchw_rdy) > 0);
 +              ret = wait_event_interruptible(jpeg->hw_wq,
 +                                             atomic_read(&jpeg->hw_rdy) > 0);
                if (ret != 0 || (i++ > MTK_JPEG_MAX_RETRY_TIME)) {
                        dev_err(jpeg->dev, "%s : %d, all HW are busy\n",
                                __func__, __LINE__);
                goto retry_select;
        }
  
 -      atomic_dec(&jpeg->enchw_rdy);
 +      atomic_dec(&jpeg->hw_rdy);
        src_buf = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx);
        if (!src_buf)
                goto getbuf_fail;
        if (!dst_buf)
                goto getbuf_fail;
  
 -      v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
 -      v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
 -
        v4l2_m2m_buf_copy_metadata(src_buf, dst_buf, true);
  
        mtk_jpegenc_set_hw_param(ctx, hw_id, src_buf, dst_buf);
                goto enc_end;
        }
  
 +      v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
 +      v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
 +
        schedule_delayed_work(&comp_jpeg[hw_id]->job_timeout_work,
                              msecs_to_jiffies(MTK_JPEG_HW_TIMEOUT_MSEC));
  
@@@ -1073,7 -1073,7 +1073,7 @@@ enc_end
        v4l2_m2m_buf_done(src_buf, buf_state);
        v4l2_m2m_buf_done(dst_buf, buf_state);
  getbuf_fail:
 -      atomic_inc(&jpeg->enchw_rdy);
 +      atomic_inc(&jpeg->hw_rdy);
        mtk_jpegenc_put_hw(jpeg, hw_id);
        v4l2_m2m_job_finish(jpeg->m2m_dev, ctx->fh.m2m_ctx);
  }
@@@ -1198,8 -1198,8 +1198,8 @@@ static void mtk_jpegdec_worker(struct w
  retry_select:
        hw_id = mtk_jpegdec_get_hw(ctx);
        if (hw_id < 0) {
 -              ret = wait_event_interruptible_timeout(jpeg->dec_hw_wq,
 -                                                     atomic_read(&jpeg->dechw_rdy) > 0,
 +              ret = wait_event_interruptible_timeout(jpeg->hw_wq,
 +                                                     atomic_read(&jpeg->hw_rdy) > 0,
                                                       MTK_JPEG_HW_TIMEOUT_MSEC);
                if (ret != 0 || (i++ > MTK_JPEG_MAX_RETRY_TIME)) {
                        dev_err(jpeg->dev, "%s : %d, all HW are busy\n",
                goto retry_select;
        }
  
 -      atomic_dec(&jpeg->dechw_rdy);
 +      atomic_dec(&jpeg->hw_rdy);
        src_buf = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx);
        if (!src_buf)
                goto getbuf_fail;
        if (!dst_buf)
                goto getbuf_fail;
  
 -      v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
 -      v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
 -
        v4l2_m2m_buf_copy_metadata(src_buf, dst_buf, true);
        jpeg_src_buf = mtk_jpeg_vb2_to_srcbuf(&src_buf->vb2_buf);
        jpeg_dst_buf = mtk_jpeg_vb2_to_srcbuf(&dst_buf->vb2_buf);
                                             &jpeg_src_buf->dec_param)) {
                mtk_jpeg_queue_src_chg_event(ctx);
                ctx->state = MTK_JPEG_SOURCE_CHANGE;
 -              goto dec_end;
 +              goto getbuf_fail;
        }
  
        jpeg_src_buf->curr_ctx = ctx;
                goto clk_end;
        }
  
 +      v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
 +      v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
 +
        schedule_delayed_work(&comp_jpeg[hw_id]->job_timeout_work,
                              msecs_to_jiffies(MTK_JPEG_HW_TIMEOUT_MSEC));
  
@@@ -1290,7 -1290,7 +1290,7 @@@ dec_end
        v4l2_m2m_buf_done(src_buf, buf_state);
        v4l2_m2m_buf_done(dst_buf, buf_state);
  getbuf_fail:
 -      atomic_inc(&jpeg->dechw_rdy);
 +      atomic_inc(&jpeg->hw_rdy);
        mtk_jpegdec_put_hw(jpeg, hw_id);
        v4l2_m2m_job_finish(jpeg->m2m_dev, ctx->fh.m2m_ctx);
  }
@@@ -1575,7 -1575,12 +1575,7 @@@ static int mtk_jpeg_open(struct file *f
                goto free;
        }
  
 -      if (jpeg->is_jpgenc_multihw)
 -              INIT_WORK(&ctx->jpeg_work, mtk_jpegenc_worker);
 -
 -      if (jpeg->is_jpgdec_multihw)
 -              INIT_WORK(&ctx->jpeg_work, mtk_jpegdec_worker);
 -
 +      INIT_WORK(&ctx->jpeg_work, jpeg->variant->jpeg_worker);
        INIT_LIST_HEAD(&ctx->dst_done_queue);
        spin_lock_init(&ctx->done_queue_lock);
        v4l2_fh_init(&ctx->fh, vfd);
@@@ -1666,52 -1671,10 +1666,52 @@@ static void mtk_jpeg_job_timeout_work(s
        v4l2_m2m_job_finish(jpeg->m2m_dev, ctx->fh.m2m_ctx);
  }
  
 +static int mtk_jpeg_single_core_init(struct platform_device *pdev,
 +                                   struct mtk_jpeg_dev *jpeg_dev)
 +{
 +      struct mtk_jpeg_dev *jpeg = jpeg_dev;
 +      int jpeg_irq, ret;
 +
 +      INIT_DELAYED_WORK(&jpeg->job_timeout_work,
 +                        mtk_jpeg_job_timeout_work);
 +
 +      jpeg->reg_base = devm_platform_ioremap_resource(pdev, 0);
 +      if (IS_ERR(jpeg->reg_base)) {
 +              ret = PTR_ERR(jpeg->reg_base);
 +              return ret;
 +      }
 +
 +      jpeg_irq = platform_get_irq(pdev, 0);
 +      if (jpeg_irq < 0)
 +              return jpeg_irq;
 +
 +      ret = devm_request_irq(&pdev->dev,
 +                             jpeg_irq,
 +                             jpeg->variant->irq_handler,
 +                             0,
 +                             pdev->name, jpeg);
 +      if (ret) {
 +              dev_err(&pdev->dev, "Failed to request jpeg_irq %d (%d)\n",
 +                      jpeg_irq, ret);
 +              return ret;
 +      }
 +
 +      ret = devm_clk_bulk_get(jpeg->dev,
 +                              jpeg->variant->num_clks,
 +                              jpeg->variant->clks);
 +      if (ret) {
 +              dev_err(&pdev->dev, "Failed to init clk\n");
 +              return ret;
 +      }
 +
 +      return 0;
 +}
 +
  static int mtk_jpeg_probe(struct platform_device *pdev)
  {
        struct mtk_jpeg_dev *jpeg;
 -      int jpeg_irq;
 +      struct device_node *child;
 +      int num_child = 0;
        int ret;
  
        jpeg = devm_kzalloc(&pdev->dev, sizeof(*jpeg), GFP_KERNEL);
                return -EINVAL;
        }
  
 -      if (list_empty(&pdev->dev.devres_head)) {
 -              INIT_DELAYED_WORK(&jpeg->job_timeout_work,
 -                                mtk_jpeg_job_timeout_work);
 -
 -              jpeg->reg_base = devm_platform_ioremap_resource(pdev, 0);
 -              if (IS_ERR(jpeg->reg_base)) {
 -                      ret = PTR_ERR(jpeg->reg_base);
 -                      return ret;
 +      if (!jpeg->variant->multi_core) {
 +              ret = mtk_jpeg_single_core_init(pdev, jpeg);
 +              if (ret) {
 +                      v4l2_err(&jpeg->v4l2_dev, "mtk_jpeg_single_core_init failed.");
 +                      return -EINVAL;
                }
 +      } else {
 +              init_waitqueue_head(&jpeg->hw_wq);
  
 -              jpeg_irq = platform_get_irq(pdev, 0);
 -              if (jpeg_irq < 0)
 -                      return jpeg_irq;
 +              for_each_child_of_node(pdev->dev.of_node, child)
 +                      num_child++;
  
 -              ret = devm_request_irq(&pdev->dev,
 -                                     jpeg_irq,
 -                                     jpeg->variant->irq_handler,
 -                                     0,
 -                                     pdev->name, jpeg);
 -              if (ret) {
 -                      dev_err(&pdev->dev, "Failed to request jpeg_irq %d (%d)\n",
 -                              jpeg_irq, ret);
 -                      return ret;
 -              }
 +              atomic_set(&jpeg->hw_rdy, num_child);
 +              atomic_set(&jpeg->hw_index, 0);
  
 -              ret = devm_clk_bulk_get(jpeg->dev,
 -                                      jpeg->variant->num_clks,
 -                                      jpeg->variant->clks);
 -              if (ret) {
 -                      dev_err(&pdev->dev, "Failed to init clk\n");
 -                      return ret;
 -              }
 +              jpeg->workqueue = alloc_ordered_workqueue(MTK_JPEG_NAME,
 +                                                        WQ_MEM_RECLAIM
 +                                                        | WQ_FREEZABLE);
 +              if (!jpeg->workqueue)
 +                      return -EINVAL;
        }
  
        ret = v4l2_device_register(&pdev->dev, &jpeg->v4l2_dev);
        jpeg->vdev->device_caps = V4L2_CAP_STREAMING |
                                  V4L2_CAP_VIDEO_M2M_MPLANE;
  
-       if (of_property_present(pdev->dev.of_node, "dma-ranges"))
-               dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(34));
        ret = video_register_device(jpeg->vdev, VFL_TYPE_VIDEO, -1);
        if (ret) {
                v4l2_err(&jpeg->v4l2_dev, "Failed to register video device\n");
@@@ -1815,7 -1787,7 +1812,7 @@@ err_m2m_init
        return ret;
  }
  
 -static int mtk_jpeg_remove(struct platform_device *pdev)
 +static void mtk_jpeg_remove(struct platform_device *pdev)
  {
        struct mtk_jpeg_dev *jpeg = platform_get_drvdata(pdev);
  
        video_unregister_device(jpeg->vdev);
        v4l2_m2m_release(jpeg->m2m_dev);
        v4l2_device_unregister(&jpeg->v4l2_dev);
 -
 -      return 0;
  }
  
  static __maybe_unused int mtk_jpeg_pm_suspend(struct device *dev)
@@@ -1869,7 -1843,6 +1866,7 @@@ static const struct dev_pm_ops mtk_jpeg
        SET_RUNTIME_PM_OPS(mtk_jpeg_pm_suspend, mtk_jpeg_pm_resume, NULL)
  };
  
 +#if defined(CONFIG_OF)
  static const struct mtk_jpeg_variant mt8173_jpeg_drvdata = {
        .clks = mt8173_jpeg_dec_clocks,
        .num_clks = ARRAY_SIZE(mt8173_jpeg_dec_clocks),
@@@ -1898,7 -1871,6 +1895,7 @@@ static const struct mtk_jpeg_variant mt
        .ioctl_ops = &mtk_jpeg_enc_ioctl_ops,
        .out_q_default_fourcc = V4L2_PIX_FMT_YUYV,
        .cap_q_default_fourcc = V4L2_PIX_FMT_JPEG,
 +      .multi_core = false,
  };
  
  static struct mtk_jpeg_variant mtk8195_jpegenc_drvdata = {
        .ioctl_ops = &mtk_jpeg_enc_ioctl_ops,
        .out_q_default_fourcc = V4L2_PIX_FMT_YUYV,
        .cap_q_default_fourcc = V4L2_PIX_FMT_JPEG,
 +      .multi_core = true,
 +      .jpeg_worker = mtk_jpegenc_worker,
  };
  
  static const struct mtk_jpeg_variant mtk8195_jpegdec_drvdata = {
        .ioctl_ops = &mtk_jpeg_dec_ioctl_ops,
        .out_q_default_fourcc = V4L2_PIX_FMT_JPEG,
        .cap_q_default_fourcc = V4L2_PIX_FMT_YUV420M,
 +      .multi_core = true,
 +      .jpeg_worker = mtk_jpegdec_worker,
  };
  
 -#if defined(CONFIG_OF)
  static const struct of_device_id mtk_jpeg_match[] = {
        {
                .compatible = "mediatek,mt8173-jpgdec",
@@@ -1956,7 -1925,7 +1953,7 @@@ MODULE_DEVICE_TABLE(of, mtk_jpeg_match)
  
  static struct platform_driver mtk_jpeg_driver = {
        .probe = mtk_jpeg_probe,
 -      .remove = mtk_jpeg_remove,
 +      .remove_new = mtk_jpeg_remove,
        .driver = {
                .name           = MTK_JPEG_NAME,
                .of_match_table = of_match_ptr(mtk_jpeg_match),
index 9ba5dc5df648d12abf43fc585caf75d24e5d9599,11583405cf618e0cbc1594e1c8a3f07f86af7e50..9c652beb3f193bc2226ae0678a92eff1df68e2bc
@@@ -321,14 -321,6 +321,6 @@@ static int mtk_vcodec_probe(struct plat
                }
        }
  
-       if (of_property_present(pdev->dev.of_node, "dma-ranges")) {
-               ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(34));
-               if (ret) {
-                       mtk_v4l2_err("Failed to set mask");
-                       goto err_core_workq;
-               }
-       }
        for (i = 0; i < MTK_VDEC_HW_MAX; i++)
                mutex_init(&dev->dec_mutex[i]);
        mutex_init(&dev->dev_mutex);
@@@ -451,8 -443,7 +443,8 @@@ err_core_workq
        if (IS_VDEC_LAT_ARCH(dev->vdec_pdata->hw_arch))
                destroy_workqueue(dev->core_workqueue);
  err_res:
 -      pm_runtime_disable(dev->pm.dev);
 +      if (!dev->vdec_pdata->is_subdev_supported)
 +              pm_runtime_disable(dev->pm.dev);
  err_dec_pm:
        mtk_vcodec_fw_release(dev->fw_handler);
        return ret;
@@@ -488,7 -479,7 +480,7 @@@ static const struct of_device_id mtk_vc
  
  MODULE_DEVICE_TABLE(of, mtk_vcodec_match);
  
 -static int mtk_vcodec_dec_remove(struct platform_device *pdev)
 +static void mtk_vcodec_dec_remove(struct platform_device *pdev)
  {
        struct mtk_vcodec_dev *dev = platform_get_drvdata(pdev);
  
        if (!dev->vdec_pdata->is_subdev_supported)
                pm_runtime_disable(dev->pm.dev);
        mtk_vcodec_fw_release(dev->fw_handler);
 -      return 0;
  }
  
  static struct platform_driver mtk_vcodec_dec_driver = {
        .probe  = mtk_vcodec_probe,
 -      .remove = mtk_vcodec_dec_remove,
 +      .remove_new = mtk_vcodec_dec_remove,
        .driver = {
                .name   = MTK_VCODEC_DEC_NAME,
                .of_match_table = mtk_vcodec_match,
index 755f567b9e54fa50b8a45ca93d1892a1bfa4435c,50e5571608cdfd02874849191550cb8a863d05b9..168004a08888ff12c27cbd8b0c6be9fd526bb085
@@@ -89,24 -89,16 +89,24 @@@ static irqreturn_t mtk_vcodec_enc_irq_h
        struct mtk_vcodec_ctx *ctx;
        unsigned long flags;
        void __iomem *addr;
 +      int core_id;
  
        spin_lock_irqsave(&dev->irqlock, flags);
        ctx = dev->curr_ctx;
        spin_unlock_irqrestore(&dev->irqlock, flags);
  
 -      mtk_v4l2_debug(1, "id=%d coreid:%d", ctx->id, dev->venc_pdata->core_id);
 -      addr = dev->reg_base[dev->venc_pdata->core_id] +
 -                              MTK_VENC_IRQ_ACK_OFFSET;
 +      core_id = dev->venc_pdata->core_id;
 +      if (core_id < 0 || core_id >= NUM_MAX_VCODEC_REG_BASE) {
 +              mtk_v4l2_err("Invalid core id: %d, ctx id: %d",
 +                           core_id, ctx->id);
 +              return IRQ_HANDLED;
 +      }
 +
 +      mtk_v4l2_debug(1, "id: %d, core id: %d", ctx->id, core_id);
  
 -      ctx->irq_status = readl(dev->reg_base[dev->venc_pdata->core_id] +
 +      addr = dev->reg_base[core_id] + MTK_VENC_IRQ_ACK_OFFSET;
 +
 +      ctx->irq_status = readl(dev->reg_base[core_id] +
                                (MTK_VENC_IRQ_STATUS_OFFSET));
  
        clean_irq_status(ctx->irq_status, addr);
@@@ -352,9 -344,6 +352,6 @@@ static int mtk_vcodec_probe(struct plat
                goto err_event_workq;
        }
  
-       if (of_property_present(pdev->dev.of_node, "dma-ranges"))
-               dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(34));
        ret = video_register_device(vfd_enc, VFL_TYPE_VIDEO, -1);
        if (ret) {
                mtk_v4l2_err("Failed to register video device");
@@@ -459,7 -448,7 +456,7 @@@ static const struct of_device_id mtk_vc
  };
  MODULE_DEVICE_TABLE(of, mtk_vcodec_enc_match);
  
 -static int mtk_vcodec_enc_remove(struct platform_device *pdev)
 +static void mtk_vcodec_enc_remove(struct platform_device *pdev)
  {
        struct mtk_vcodec_dev *dev = platform_get_drvdata(pdev);
  
        v4l2_device_unregister(&dev->v4l2_dev);
        pm_runtime_disable(dev->pm.dev);
        mtk_vcodec_fw_release(dev->fw_handler);
 -      return 0;
  }
  
  static struct platform_driver mtk_vcodec_enc_driver = {
        .probe  = mtk_vcodec_probe,
 -      .remove = mtk_vcodec_enc_remove,
 +      .remove_new = mtk_vcodec_enc_remove,
        .driver = {
                .name   = MTK_VCODEC_ENC_NAME,
                .of_match_table = mtk_vcodec_enc_match,
diff --combined include/linux/iommu.h
index 0fd4e6734d5b255b629531a45395f2db0f887ea8,7dbdd13d7ce046eee0af5f2d9bcad26fbbb00411..e8c9a7da1060969a0c9ab1f9311e223bf9df3c96
@@@ -13,7 -13,6 +13,6 @@@
  #include <linux/errno.h>
  #include <linux/err.h>
  #include <linux/of.h>
- #include <linux/ioasid.h>
  #include <uapi/linux/iommu.h>
  
  #define IOMMU_READ    (1 << 0)
@@@ -192,6 -191,7 +191,7 @@@ enum iommu_dev_features 
  };
  
  #define IOMMU_PASID_INVALID   (-1U)
+ typedef unsigned int ioasid_t;
  
  #ifdef CONFIG_IOMMU_API
  
@@@ -455,12 -455,11 +455,11 @@@ static inline const struct iommu_ops *d
        return dev->iommu->iommu_dev->ops;
  }
  
 -extern int bus_iommu_probe(struct bus_type *bus);
 -extern bool iommu_present(struct bus_type *bus);
 +extern int bus_iommu_probe(const struct bus_type *bus);
 +extern bool iommu_present(const struct bus_type *bus);
  extern bool device_iommu_capable(struct device *dev, enum iommu_cap cap);
  extern bool iommu_group_has_isolated_msi(struct iommu_group *group);
 -extern struct iommu_domain *iommu_domain_alloc(struct bus_type *bus);
 +extern struct iommu_domain *iommu_domain_alloc(const struct bus_type *bus);
- extern struct iommu_group *iommu_group_get_by_id(int id);
  extern void iommu_domain_free(struct iommu_domain *domain);
  extern int iommu_attach_device(struct iommu_domain *domain,
                               struct device *dev);
@@@ -699,7 -698,6 +698,6 @@@ static inline void dev_iommu_priv_set(s
  }
  
  int iommu_probe_device(struct device *dev);
- void iommu_release_device(struct device *dev);
  
  int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features f);
  int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features f);
@@@ -732,7 -730,7 +730,7 @@@ struct iommu_device {}
  struct iommu_fault_param {};
  struct iommu_iotlb_gather {};
  
 -static inline bool iommu_present(struct bus_type *bus)
 +static inline bool iommu_present(const struct bus_type *bus)
  {
        return false;
  }
@@@ -742,16 -740,11 +740,11 @@@ static inline bool device_iommu_capable
        return false;
  }
  
 -static inline struct iommu_domain *iommu_domain_alloc(struct bus_type *bus)
 +static inline struct iommu_domain *iommu_domain_alloc(const struct bus_type *bus)
  {
        return NULL;
  }
  
- static inline struct iommu_group *iommu_group_get_by_id(int id)
- {
-       return NULL;
- }
  static inline void iommu_domain_free(struct iommu_domain *domain)
  {
  }
@@@ -1172,7 -1165,17 +1165,16 @@@ static inline bool tegra_dev_iommu_get_
        return false;
  }
  
 -static inline bool pasid_valid(ioasid_t ioasid)
 -{
 -      return ioasid != IOMMU_PASID_INVALID;
 -}
 -
  #ifdef CONFIG_IOMMU_SVA
+ static inline void mm_pasid_init(struct mm_struct *mm)
+ {
+       mm->pasid = IOMMU_PASID_INVALID;
+ }
++static inline bool mm_valid_pasid(struct mm_struct *mm)
++{
++      return mm->pasid != IOMMU_PASID_INVALID;
++}
+ void mm_pasid_drop(struct mm_struct *mm);
  struct iommu_sva *iommu_sva_bind_device(struct device *dev,
                                        struct mm_struct *mm);
  void iommu_sva_unbind_device(struct iommu_sva *handle);
@@@ -1192,6 -1195,8 +1194,9 @@@ static inline u32 iommu_sva_get_pasid(s
  {
        return IOMMU_PASID_INVALID;
  }
+ static inline void mm_pasid_init(struct mm_struct *mm) {}
++static inline bool mm_valid_pasid(struct mm_struct *mm) { return false; }
+ static inline void mm_pasid_drop(struct mm_struct *mm) {}
  #endif /* CONFIG_IOMMU_SVA */
  
  #endif /* __LINUX_IOMMU_H */
diff --combined include/linux/sched/mm.h
index b114fbe3a93b51d39174e453ad14906b21373537,da9712a3ba739effccbfa6aa5d9438408737ff1b..8d89c8c4fac1f2db1fc278478486aa71516b52bd
@@@ -8,7 -8,6 +8,6 @@@
  #include <linux/mm_types.h>
  #include <linux/gfp.h>
  #include <linux/sync_core.h>
- #include <linux/ioasid.h>
  
  /*
   * Routines for handling mm_structs
@@@ -37,11 -36,6 +36,11 @@@ static inline void mmgrab(struct mm_str
        atomic_inc(&mm->mm_count);
  }
  
 +static inline void smp_mb__after_mmgrab(void)
 +{
 +      smp_mb__after_atomic();
 +}
 +
  extern void __mmdrop(struct mm_struct *mm);
  
  static inline void mmdrop(struct mm_struct *mm)
@@@ -84,34 -78,6 +83,34 @@@ static inline void mmdrop_sched(struct 
  }
  #endif
  
 +/* Helpers for lazy TLB mm refcounting */
 +static inline void mmgrab_lazy_tlb(struct mm_struct *mm)
 +{
 +      if (IS_ENABLED(CONFIG_MMU_LAZY_TLB_REFCOUNT))
 +              mmgrab(mm);
 +}
 +
 +static inline void mmdrop_lazy_tlb(struct mm_struct *mm)
 +{
 +      if (IS_ENABLED(CONFIG_MMU_LAZY_TLB_REFCOUNT)) {
 +              mmdrop(mm);
 +      } else {
 +              /*
 +               * mmdrop_lazy_tlb must provide a full memory barrier, see the
 +               * membarrier comment finish_task_switch which relies on this.
 +               */
 +              smp_mb();
 +      }
 +}
 +
 +static inline void mmdrop_lazy_tlb_sched(struct mm_struct *mm)
 +{
 +      if (IS_ENABLED(CONFIG_MMU_LAZY_TLB_REFCOUNT))
 +              mmdrop_sched(mm);
 +      else
 +              smp_mb(); /* see mmdrop_lazy_tlb() above */
 +}
 +
  /**
   * mmget() - Pin the address space associated with a &struct mm_struct.
   * @mm: The address space to pin.
@@@ -484,35 -450,4 +483,4 @@@ static inline void membarrier_update_cu
  }
  #endif
  
- #ifdef CONFIG_IOMMU_SVA
- static inline void mm_pasid_init(struct mm_struct *mm)
- {
-       mm->pasid = INVALID_IOASID;
- }
- static inline bool mm_valid_pasid(struct mm_struct *mm)
- {
-       return mm->pasid != INVALID_IOASID;
- }
- /* Associate a PASID with an mm_struct: */
- static inline void mm_pasid_set(struct mm_struct *mm, u32 pasid)
- {
-       mm->pasid = pasid;
- }
- static inline void mm_pasid_drop(struct mm_struct *mm)
- {
-       if (mm_valid_pasid(mm)) {
-               ioasid_free(mm->pasid);
-               mm->pasid = INVALID_IOASID;
-       }
- }
- #else
- static inline void mm_pasid_init(struct mm_struct *mm) {}
- static inline bool mm_valid_pasid(struct mm_struct *mm) { return false; }
- static inline void mm_pasid_set(struct mm_struct *mm, u32 pasid) {}
- static inline void mm_pasid_drop(struct mm_struct *mm) {}
- #endif
  #endif /* _LINUX_SCHED_MM_H */
diff --combined kernel/fork.c
index 735d9f4f5acff93d06a21f138fd7bed594344753,e7d10ad98a6912d918abd2530f1b00dfdc5c2f39..ed4e01daccaa008ed0ef98c1a011aa5a4dddab36
@@@ -97,7 -97,7 +97,8 @@@
  #include <linux/io_uring.h>
  #include <linux/bpf.h>
  #include <linux/stackprotector.h>
 +#include <linux/user_events.h>
+ #include <linux/iommu.h>
  
  #include <asm/pgalloc.h>
  #include <linux/uaccess.h>
@@@ -452,49 -452,13 +453,49 @@@ static struct kmem_cache *vm_area_cache
  /* SLAB cache for mm_struct structures (tsk->mm) */
  static struct kmem_cache *mm_cachep;
  
 +#ifdef CONFIG_PER_VMA_LOCK
 +
 +/* SLAB cache for vm_area_struct.lock */
 +static struct kmem_cache *vma_lock_cachep;
 +
 +static bool vma_lock_alloc(struct vm_area_struct *vma)
 +{
 +      vma->vm_lock = kmem_cache_alloc(vma_lock_cachep, GFP_KERNEL);
 +      if (!vma->vm_lock)
 +              return false;
 +
 +      init_rwsem(&vma->vm_lock->lock);
 +      vma->vm_lock_seq = -1;
 +
 +      return true;
 +}
 +
 +static inline void vma_lock_free(struct vm_area_struct *vma)
 +{
 +      kmem_cache_free(vma_lock_cachep, vma->vm_lock);
 +}
 +
 +#else /* CONFIG_PER_VMA_LOCK */
 +
 +static inline bool vma_lock_alloc(struct vm_area_struct *vma) { return true; }
 +static inline void vma_lock_free(struct vm_area_struct *vma) {}
 +
 +#endif /* CONFIG_PER_VMA_LOCK */
 +
  struct vm_area_struct *vm_area_alloc(struct mm_struct *mm)
  {
        struct vm_area_struct *vma;
  
        vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
 -      if (vma)
 -              vma_init(vma, mm);
 +      if (!vma)
 +              return NULL;
 +
 +      vma_init(vma, mm);
 +      if (!vma_lock_alloc(vma)) {
 +              kmem_cache_free(vm_area_cachep, vma);
 +              return NULL;
 +      }
 +
        return vma;
  }
  
@@@ -502,56 -466,26 +503,56 @@@ struct vm_area_struct *vm_area_dup(stru
  {
        struct vm_area_struct *new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
  
 -      if (new) {
 -              ASSERT_EXCLUSIVE_WRITER(orig->vm_flags);
 -              ASSERT_EXCLUSIVE_WRITER(orig->vm_file);
 -              /*
 -               * orig->shared.rb may be modified concurrently, but the clone
 -               * will be reinitialized.
 -               */
 -              data_race(memcpy(new, orig, sizeof(*new)));
 -              INIT_LIST_HEAD(&new->anon_vma_chain);
 -              dup_anon_vma_name(orig, new);
 +      if (!new)
 +              return NULL;
 +
 +      ASSERT_EXCLUSIVE_WRITER(orig->vm_flags);
 +      ASSERT_EXCLUSIVE_WRITER(orig->vm_file);
 +      /*
 +       * orig->shared.rb may be modified concurrently, but the clone
 +       * will be reinitialized.
 +       */
 +      data_race(memcpy(new, orig, sizeof(*new)));
 +      if (!vma_lock_alloc(new)) {
 +              kmem_cache_free(vm_area_cachep, new);
 +              return NULL;
        }
 +      INIT_LIST_HEAD(&new->anon_vma_chain);
 +      vma_numab_state_init(new);
 +      dup_anon_vma_name(orig, new);
 +
        return new;
  }
  
 -void vm_area_free(struct vm_area_struct *vma)
 +void __vm_area_free(struct vm_area_struct *vma)
  {
 +      vma_numab_state_free(vma);
        free_anon_vma_name(vma);
 +      vma_lock_free(vma);
        kmem_cache_free(vm_area_cachep, vma);
  }
  
 +#ifdef CONFIG_PER_VMA_LOCK
 +static void vm_area_free_rcu_cb(struct rcu_head *head)
 +{
 +      struct vm_area_struct *vma = container_of(head, struct vm_area_struct,
 +                                                vm_rcu);
 +
 +      /* The vma should not be locked while being destroyed. */
 +      VM_BUG_ON_VMA(rwsem_is_locked(&vma->vm_lock->lock), vma);
 +      __vm_area_free(vma);
 +}
 +#endif
 +
 +void vm_area_free(struct vm_area_struct *vma)
 +{
 +#ifdef CONFIG_PER_VMA_LOCK
 +      call_rcu(&vma->vm_rcu, vm_area_free_rcu_cb);
 +#else
 +      __vm_area_free(vma);
 +#endif
 +}
 +
  static void account_kernel_stack(struct task_struct *tsk, int account)
  {
        if (IS_ENABLED(CONFIG_VMAP_STACK)) {
@@@ -842,67 -776,6 +843,67 @@@ static void check_mm(struct mm_struct *
  #define allocate_mm() (kmem_cache_alloc(mm_cachep, GFP_KERNEL))
  #define free_mm(mm)   (kmem_cache_free(mm_cachep, (mm)))
  
 +static void do_check_lazy_tlb(void *arg)
 +{
 +      struct mm_struct *mm = arg;
 +
 +      WARN_ON_ONCE(current->active_mm == mm);
 +}
 +
 +static void do_shoot_lazy_tlb(void *arg)
 +{
 +      struct mm_struct *mm = arg;
 +
 +      if (current->active_mm == mm) {
 +              WARN_ON_ONCE(current->mm);
 +              current->active_mm = &init_mm;
 +              switch_mm(mm, &init_mm, current);
 +      }
 +}
 +
 +static void cleanup_lazy_tlbs(struct mm_struct *mm)
 +{
 +      if (!IS_ENABLED(CONFIG_MMU_LAZY_TLB_SHOOTDOWN)) {
 +              /*
 +               * In this case, lazy tlb mms are refounted and would not reach
 +               * __mmdrop until all CPUs have switched away and mmdrop()ed.
 +               */
 +              return;
 +      }
 +
 +      /*
 +       * Lazy mm shootdown does not refcount "lazy tlb mm" usage, rather it
 +       * requires lazy mm users to switch to another mm when the refcount
 +       * drops to zero, before the mm is freed. This requires IPIs here to
 +       * switch kernel threads to init_mm.
 +       *
 +       * archs that use IPIs to flush TLBs can piggy-back that lazy tlb mm
 +       * switch with the final userspace teardown TLB flush which leaves the
 +       * mm lazy on this CPU but no others, reducing the need for additional
 +       * IPIs here. There are cases where a final IPI is still required here,
 +       * such as the final mmdrop being performed on a different CPU than the
 +       * one exiting, or kernel threads using the mm when userspace exits.
 +       *
 +       * IPI overheads have not found to be expensive, but they could be
 +       * reduced in a number of possible ways, for example (roughly
 +       * increasing order of complexity):
 +       * - The last lazy reference created by exit_mm() could instead switch
 +       *   to init_mm, however it's probable this will run on the same CPU
 +       *   immediately afterwards, so this may not reduce IPIs much.
 +       * - A batch of mms requiring IPIs could be gathered and freed at once.
 +       * - CPUs store active_mm where it can be remotely checked without a
 +       *   lock, to filter out false-positives in the cpumask.
 +       * - After mm_users or mm_count reaches zero, switching away from the
 +       *   mm could clear mm_cpumask to reduce some IPIs, perhaps together
 +       *   with some batching or delaying of the final IPIs.
 +       * - A delayed freeing and RCU-like quiescing sequence based on mm
 +       *   switching to avoid IPIs completely.
 +       */
 +      on_each_cpu_mask(mm_cpumask(mm), do_shoot_lazy_tlb, (void *)mm, 1);
 +      if (IS_ENABLED(CONFIG_DEBUG_VM_SHOOT_LAZIES))
 +              on_each_cpu(do_check_lazy_tlb, (void *)mm, 1);
 +}
 +
  /*
   * Called when the last reference to the mm
   * is dropped: either by a lazy thread or by
@@@ -914,10 -787,6 +915,10 @@@ void __mmdrop(struct mm_struct *mm
  
        BUG_ON(mm == &init_mm);
        WARN_ON_ONCE(mm == current->mm);
 +
 +      /* Ensure no CPUs are using this as their lazy tlb mm */
 +      cleanup_lazy_tlbs(mm);
 +
        WARN_ON_ONCE(mm == current->active_mm);
        mm_free_pgd(mm);
        destroy_context(mm);
        check_mm(mm);
        put_user_ns(mm->user_ns);
        mm_pasid_drop(mm);
 +      mm_destroy_cid(mm);
  
        for (i = 0; i < NR_MM_COUNTERS; i++)
                percpu_counter_destroy(&mm->rss_stat[i]);
@@@ -1190,9 -1058,7 +1191,9 @@@ static struct task_struct *dup_task_str
  
  #ifdef CONFIG_SCHED_MM_CID
        tsk->mm_cid = -1;
 +      tsk->last_mm_cid = -1;
        tsk->mm_cid_active = 0;
 +      tsk->migrate_from_cpu = -1;
  #endif
        return tsk;
  
@@@ -1263,9 -1129,6 +1264,9 @@@ static struct mm_struct *mm_init(struc
        seqcount_init(&mm->write_protect_seq);
        mmap_init_lock(mm);
        INIT_LIST_HEAD(&mm->mmlist);
 +#ifdef CONFIG_PER_VMA_LOCK
 +      mm->mm_lock_seq = 0;
 +#endif
        mm_pgtables_bytes_init(mm);
        mm->map_count = 0;
        mm->locked_vm = 0;
        if (init_new_context(p, mm))
                goto fail_nocontext;
  
 +      if (mm_alloc_cid(mm))
 +              goto fail_cid;
 +
        for (i = 0; i < NR_MM_COUNTERS; i++)
                if (percpu_counter_init(&mm->rss_stat[i], 0, GFP_KERNEL_ACCOUNT))
                        goto fail_pcpu;
  
        mm->user_ns = get_user_ns(user_ns);
        lru_gen_init_mm(mm);
 -      mm_init_cid(mm);
        return mm;
  
  fail_pcpu:
        while (i > 0)
                percpu_counter_destroy(&mm->rss_stat[--i]);
 +      mm_destroy_cid(mm);
 +fail_cid:
 +      destroy_context(mm);
  fail_nocontext:
        mm_free_pgd(mm);
  fail_nopgd:
@@@ -1768,8 -1626,7 +1769,8 @@@ static int copy_fs(unsigned long clone_
        return 0;
  }
  
 -static int copy_files(unsigned long clone_flags, struct task_struct *tsk)
 +static int copy_files(unsigned long clone_flags, struct task_struct *tsk,
 +                    int no_files)
  {
        struct files_struct *oldf, *newf;
        int error = 0;
        if (!oldf)
                goto out;
  
 +      if (no_files) {
 +              tsk->files = NULL;
 +              goto out;
 +      }
 +
        if (clone_flags & CLONE_FILES) {
                atomic_inc(&oldf->count);
                goto out;
@@@ -2103,91 -1955,6 +2104,91 @@@ const struct file_operations pidfd_fop
  #endif
  };
  
 +/**
 + * __pidfd_prepare - allocate a new pidfd_file and reserve a pidfd
 + * @pid:   the struct pid for which to create a pidfd
 + * @flags: flags of the new @pidfd
 + * @pidfd: the pidfd to return
 + *
 + * Allocate a new file that stashes @pid and reserve a new pidfd number in the
 + * caller's file descriptor table. The pidfd is reserved but not installed yet.
 +
 + * The helper doesn't perform checks on @pid which makes it useful for pidfds
 + * created via CLONE_PIDFD where @pid has no task attached when the pidfd and
 + * pidfd file are prepared.
 + *
 + * If this function returns successfully the caller is responsible to either
 + * call fd_install() passing the returned pidfd and pidfd file as arguments in
 + * order to install the pidfd into its file descriptor table or they must use
 + * put_unused_fd() and fput() on the returned pidfd and pidfd file
 + * respectively.
 + *
 + * This function is useful when a pidfd must already be reserved but there
 + * might still be points of failure afterwards and the caller wants to ensure
 + * that no pidfd is leaked into its file descriptor table.
 + *
 + * Return: On success, a reserved pidfd is returned from the function and a new
 + *         pidfd file is returned in the last argument to the function. On
 + *         error, a negative error code is returned from the function and the
 + *         last argument remains unchanged.
 + */
 +static int __pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret)
 +{
 +      int pidfd;
 +      struct file *pidfd_file;
 +
 +      if (flags & ~(O_NONBLOCK | O_RDWR | O_CLOEXEC))
 +              return -EINVAL;
 +
 +      pidfd = get_unused_fd_flags(O_RDWR | O_CLOEXEC);
 +      if (pidfd < 0)
 +              return pidfd;
 +
 +      pidfd_file = anon_inode_getfile("[pidfd]", &pidfd_fops, pid,
 +                                      flags | O_RDWR | O_CLOEXEC);
 +      if (IS_ERR(pidfd_file)) {
 +              put_unused_fd(pidfd);
 +              return PTR_ERR(pidfd_file);
 +      }
 +      get_pid(pid); /* held by pidfd_file now */
 +      *ret = pidfd_file;
 +      return pidfd;
 +}
 +
 +/**
 + * pidfd_prepare - allocate a new pidfd_file and reserve a pidfd
 + * @pid:   the struct pid for which to create a pidfd
 + * @flags: flags of the new @pidfd
 + * @pidfd: the pidfd to return
 + *
 + * Allocate a new file that stashes @pid and reserve a new pidfd number in the
 + * caller's file descriptor table. The pidfd is reserved but not installed yet.
 + *
 + * The helper verifies that @pid is used as a thread group leader.
 + *
 + * If this function returns successfully the caller is responsible to either
 + * call fd_install() passing the returned pidfd and pidfd file as arguments in
 + * order to install the pidfd into its file descriptor table or they must use
 + * put_unused_fd() and fput() on the returned pidfd and pidfd file
 + * respectively.
 + *
 + * This function is useful when a pidfd must already be reserved but there
 + * might still be points of failure afterwards and the caller wants to ensure
 + * that no pidfd is leaked into its file descriptor table.
 + *
 + * Return: On success, a reserved pidfd is returned from the function and a new
 + *         pidfd file is returned in the last argument to the function. On
 + *         error, a negative error code is returned from the function and the
 + *         last argument remains unchanged.
 + */
 +int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret)
 +{
 +      if (!pid || !pid_has_task(pid, PIDTYPE_TGID))
 +              return -EINVAL;
 +
 +      return __pidfd_prepare(pid, flags, ret);
 +}
 +
  static void __delayed_free_task(struct rcu_head *rhp)
  {
        struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
@@@ -2242,7 -2009,7 +2243,7 @@@ static void rv_task_fork(struct task_st
   * parts of the process environment (as per the clone
   * flags). The actual kick-off is left to the caller.
   */
 -static __latent_entropy struct task_struct *copy_process(
 +__latent_entropy struct task_struct *copy_process(
                                        struct pid *pid,
                                        int trace,
                                        int node,
        p->flags &= ~PF_KTHREAD;
        if (args->kthread)
                p->flags |= PF_KTHREAD;
 +      if (args->user_worker)
 +              p->flags |= PF_USER_WORKER;
        if (args->io_thread) {
                /*
                 * Mark us an IO worker, and block any signal that isn't
                siginitsetinv(&p->blocked, sigmask(SIGKILL)|sigmask(SIGSTOP));
        }
  
 +      if (args->name)
 +              strscpy_pad(p->comm, args->name, sizeof(p->comm));
 +
        p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? args->child_tid : NULL;
        /*
         * Clear TID on mm_release()?
        retval = copy_semundo(clone_flags, p);
        if (retval)
                goto bad_fork_cleanup_security;
 -      retval = copy_files(clone_flags, p);
 +      retval = copy_files(clone_flags, p, args->no_files);
        if (retval)
                goto bad_fork_cleanup_semundo;
        retval = copy_fs(clone_flags, p);
        if (retval)
                goto bad_fork_cleanup_io;
  
 +      if (args->ignore_signals)
 +              ignore_signals(p);
 +
        stackleak_task_init(p);
  
        if (pid != &init_struct_pid) {
         * if the fd table isn't shared).
         */
        if (clone_flags & CLONE_PIDFD) {
 -              retval = get_unused_fd_flags(O_RDWR | O_CLOEXEC);
 +              /* Note that no task has been attached to @pid yet. */
 +              retval = __pidfd_prepare(pid, O_RDWR | O_CLOEXEC, &pidfile);
                if (retval < 0)
                        goto bad_fork_free_pid;
 -
                pidfd = retval;
  
 -              pidfile = anon_inode_getfile("[pidfd]", &pidfd_fops, pid,
 -                                            O_RDWR | O_CLOEXEC);
 -              if (IS_ERR(pidfile)) {
 -                      put_unused_fd(pidfd);
 -                      retval = PTR_ERR(pidfile);
 -                      goto bad_fork_free_pid;
 -              }
 -              get_pid(pid);   /* held by pidfile now */
 -
                retval = put_user(pidfd, args->pidfd);
                if (retval)
                        goto bad_fork_put_pidfd;
  
        trace_task_newtask(p, clone_flags);
        uprobe_copy_process(p, clone_flags);
 +      user_events_fork(p, clone_flags);
  
        copy_oom_score_adj(clone_flags, p);
  
@@@ -2859,7 -2626,6 +2860,7 @@@ struct task_struct *create_io_thread(in
                .fn             = fn,
                .fn_arg         = arg,
                .io_thread      = 1,
 +              .user_worker    = 1,
        };
  
        return copy_process(NULL, 0, node, &args);
@@@ -2963,8 -2729,7 +2964,8 @@@ pid_t kernel_clone(struct kernel_clone_
  /*
   * Create a kernel thread.
   */
 -pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
 +pid_t kernel_thread(int (*fn)(void *), void *arg, const char *name,
 +                  unsigned long flags)
  {
        struct kernel_clone_args args = {
                .flags          = ((lower_32_bits(flags) | CLONE_VM |
                .exit_signal    = (lower_32_bits(flags) & CSIGNAL),
                .fn             = fn,
                .fn_arg         = arg,
 +              .name           = name,
                .kthread        = 1,
        };
  
@@@ -3302,9 -3066,6 +3303,9 @@@ void __init proc_caches_init(void
                        NULL);
  
        vm_area_cachep = KMEM_CACHE(vm_area_struct, SLAB_PANIC|SLAB_ACCOUNT);
 +#ifdef CONFIG_PER_VMA_LOCK
 +      vma_lock_cachep = KMEM_CACHE(vma_lock, SLAB_PANIC|SLAB_ACCOUNT);
 +#endif
        mmap_init();
        nsproxy_cache_init();
  }
diff --combined mm/init-mm.c
index 33269314e06017509dd39255ea910118bdc7e0a2,a084039f55d8253c1276b66824a8454e00ef38dd..efa97b57acfd888a0e5db8b1e34cc4dcc20da70c
@@@ -10,7 -10,7 +10,7 @@@
  
  #include <linux/atomic.h>
  #include <linux/user_namespace.h>
- #include <linux/ioasid.h>
+ #include <linux/iommu.h>
  #include <asm/mmu.h>
  
  #ifndef INIT_MM_CONTEXT
@@@ -37,13 -37,10 +37,13 @@@ struct mm_struct init_mm = 
        .page_table_lock =  __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock),
        .arg_lock       =  __SPIN_LOCK_UNLOCKED(init_mm.arg_lock),
        .mmlist         = LIST_HEAD_INIT(init_mm.mmlist),
 +#ifdef CONFIG_PER_VMA_LOCK
 +      .mm_lock_seq    = 0,
 +#endif
        .user_ns        = &init_user_ns,
        .cpu_bitmap     = CPU_BITS_NONE,
  #ifdef CONFIG_IOMMU_SVA
-       .pasid          = INVALID_IOASID,
+       .pasid          = IOMMU_PASID_INVALID,
  #endif
        INIT_MM_CONTEXT(init_mm)
  };
This page took 0.272876 seconds and 4 git commands to generate.