Merge tag 'iommu-updates-v6.4' of git://git.kernel.org/pub/scm/linux/kernel/git/joro...

author Linus Torvalds <[email protected]>

Sun, 30 Apr 2023 20:00:38 +0000 (13:00 -0700)

committer Linus Torvalds <[email protected]>

Sun, 30 Apr 2023 20:00:38 +0000 (13:00 -0700)
author Linus Torvalds <[email protected]>
Sun, 30 Apr 2023 20:00:38 +0000 (13:00 -0700)
committer Linus Torvalds <[email protected]>
Sun, 30 Apr 2023 20:00:38 +0000 (13:00 -0700)
diff --combined Documentation/arch/x86/sva.rst

index 2e9b8b0f9a0f491c6773db0604f389e6741397c9,0000000000000000000000000000000000000000..33cb05005982028409061ddfcb3c8290e754d410

mode 100644,000000..100644
--- 1/Documentation/arch/x86/sva.rst
--- /dev/null
+++ b/Documentation/arch/x86/sva.rst
@@@ -1,286 -1,0 +1,286 @@@
- PASID is initialized as INVALID_IOASID (-1) when a process is created.
+ +.. SPDX-License-Identifier: GPL-2.0
+ +
+ +===========================================
+ +Shared Virtual Addressing (SVA) with ENQCMD
+ +===========================================
+ +
+ +Background
+ +==========
+ +
+ +Shared Virtual Addressing (SVA) allows the processor and device to use the
+ +same virtual addresses avoiding the need for software to translate virtual
+ +addresses to physical addresses. SVA is what PCIe calls Shared Virtual
+ +Memory (SVM).
+ +
+ +In addition to the convenience of using application virtual addresses
+ +by the device, it also doesn't require pinning pages for DMA.
+ +PCIe Address Translation Services (ATS) along with Page Request Interface
+ +(PRI) allow devices to function much the same way as the CPU handling
+ +application page-faults. For more information please refer to the PCIe
+ +specification Chapter 10: ATS Specification.
+ +
+ +Use of SVA requires IOMMU support in the platform. IOMMU is also
+ +required to support the PCIe features ATS and PRI. ATS allows devices
+ +to cache translations for virtual addresses. The IOMMU driver uses the
+ +mmu_notifier() support to keep the device TLB cache and the CPU cache in
+ +sync. When an ATS lookup fails for a virtual address, the device should
+ +use the PRI in order to request the virtual address to be paged into the
+ +CPU page tables. The device must use ATS again in order the fetch the
+ +translation before use.
+ +
+ +Shared Hardware Workqueues
+ +==========================
+ +
+ +Unlike Single Root I/O Virtualization (SR-IOV), Scalable IOV (SIOV) permits
+ +the use of Shared Work Queues (SWQ) by both applications and Virtual
+ +Machines (VM's). This allows better hardware utilization vs. hard
+ +partitioning resources that could result in under utilization. In order to
+ +allow the hardware to distinguish the context for which work is being
+ +executed in the hardware by SWQ interface, SIOV uses Process Address Space
+ +ID (PASID), which is a 20-bit number defined by the PCIe SIG.
+ +
+ +PASID value is encoded in all transactions from the device. This allows the
+ +IOMMU to track I/O on a per-PASID granularity in addition to using the PCIe
+ +Resource Identifier (RID) which is the Bus/Device/Function.
+ +
+ +
+ +ENQCMD
+ +======
+ +
+ +ENQCMD is a new instruction on Intel platforms that atomically submits a
+ +work descriptor to a device. The descriptor includes the operation to be
+ +performed, virtual addresses of all parameters, virtual address of a completion
+ +record, and the PASID (process address space ID) of the current process.
+ +
+ +ENQCMD works with non-posted semantics and carries a status back if the
+ +command was accepted by hardware. This allows the submitter to know if the
+ +submission needs to be retried or other device specific mechanisms to
+ +implement fairness or ensure forward progress should be provided.
+ +
+ +ENQCMD is the glue that ensures applications can directly submit commands
+ +to the hardware and also permits hardware to be aware of application context
+ +to perform I/O operations via use of PASID.
+ +
+ +Process Address Space Tagging
+ +=============================
+ +
+ +A new thread-scoped MSR (IA32_PASID) provides the connection between
+ +user processes and the rest of the hardware. When an application first
+ +accesses an SVA-capable device, this MSR is initialized with a newly
+ +allocated PASID. The driver for the device calls an IOMMU-specific API
+ +that sets up the routing for DMA and page-requests.
+ +
+ +For example, the Intel Data Streaming Accelerator (DSA) uses
+ +iommu_sva_bind_device(), which will do the following:
+ +
+ +- Allocate the PASID, and program the process page-table (%cr3 register) in the
+ +  PASID context entries.
+ +- Register for mmu_notifier() to track any page-table invalidations to keep
+ +  the device TLB in sync. For example, when a page-table entry is invalidated,
+ +  the IOMMU propagates the invalidation to the device TLB. This will force any
+ +  future access by the device to this virtual address to participate in
+ +  ATS. If the IOMMU responds with proper response that a page is not
+ +  present, the device would request the page to be paged in via the PCIe PRI
+ +  protocol before performing I/O.
+ +
+ +This MSR is managed with the XSAVE feature set as "supervisor state" to
+ +ensure the MSR is updated during context switch.
+ +
+ +PASID Management
+ +================
+ +
+ +The kernel must allocate a PASID on behalf of each process which will use
+ +ENQCMD and program it into the new MSR to communicate the process identity to
+ +platform hardware.  ENQCMD uses the PASID stored in this MSR to tag requests
+ +from this process.  When a user submits a work descriptor to a device using the
+ +ENQCMD instruction, the PASID field in the descriptor is auto-filled with the
+ +value from MSR_IA32_PASID. Requests for DMA from the device are also tagged
+ +with the same PASID. The platform IOMMU uses the PASID in the transaction to
+ +perform address translation. The IOMMU APIs setup the corresponding PASID
+ +entry in IOMMU with the process address used by the CPU (e.g. %cr3 register in
+ +x86).
+ +
+ +The MSR must be configured on each logical CPU before any application
+ +thread can interact with a device. Threads that belong to the same
+ +process share the same page tables, thus the same MSR value.
+ +
+ +PASID Life Cycle Management
+ +===========================
+ +
++PASID is initialized as IOMMU_PASID_INVALID (-1) when a process is created.
+ +
+ +Only processes that access SVA-capable devices need to have a PASID
+ +allocated. This allocation happens when a process opens/binds an SVA-capable
+ +device but finds no PASID for this process. Subsequent binds of the same, or
+ +other devices will share the same PASID.
+ +
+ +Although the PASID is allocated to the process by opening a device,
+ +it is not active in any of the threads of that process. It's loaded to the
+ +IA32_PASID MSR lazily when a thread tries to submit a work descriptor
+ +to a device using the ENQCMD.
+ +
+ +That first access will trigger a #GP fault because the IA32_PASID MSR
+ +has not been initialized with the PASID value assigned to the process
+ +when the device was opened. The Linux #GP handler notes that a PASID has
+ +been allocated for the process, and so initializes the IA32_PASID MSR
+ +and returns so that the ENQCMD instruction is re-executed.
+ +
+ +On fork(2) or exec(2) the PASID is removed from the process as it no
+ +longer has the same address space that it had when the device was opened.
+ +
+ +On clone(2) the new task shares the same address space, so will be
+ +able to use the PASID allocated to the process. The IA32_PASID is not
+ +preemptively initialized as the PASID value might not be allocated yet or
+ +the kernel does not know whether this thread is going to access the device
+ +and the cleared IA32_PASID MSR reduces context switch overhead by xstate
+ +init optimization. Since #GP faults have to be handled on any threads that
+ +were created before the PASID was assigned to the mm of the process, newly
+ +created threads might as well be treated in a consistent way.
+ +
+ +Due to complexity of freeing the PASID and clearing all IA32_PASID MSRs in
+ +all threads in unbind, free the PASID lazily only on mm exit.
+ +
+ +If a process does a close(2) of the device file descriptor and munmap(2)
+ +of the device MMIO portal, then the driver will unbind the device. The
+ +PASID is still marked VALID in the PASID_MSR for any threads in the
+ +process that accessed the device. But this is harmless as without the
+ +MMIO portal they cannot submit new work to the device.
+ +
+ +Relationships
+ +=============
+ +
+ + * Each process has many threads, but only one PASID.
+ + * Devices have a limited number (~10's to 1000's) of hardware workqueues.
+ +   The device driver manages allocating hardware workqueues.
+ + * A single mmap() maps a single hardware workqueue as a "portal" and
+ +   each portal maps down to a single workqueue.
+ + * For each device with which a process interacts, there must be
+ +   one or more mmap()'d portals.
+ + * Many threads within a process can share a single portal to access
+ +   a single device.
+ + * Multiple processes can separately mmap() the same portal, in
+ +   which case they still share one device hardware workqueue.
+ + * The single process-wide PASID is used by all threads to interact
+ +   with all devices.  There is not, for instance, a PASID for each
+ +   thread or each thread<->device pair.
+ +
+ +FAQ
+ +===
+ +
+ +* What is SVA/SVM?
+ +
+ +Shared Virtual Addressing (SVA) permits I/O hardware and the processor to
+ +work in the same address space, i.e., to share it. Some call it Shared
+ +Virtual Memory (SVM), but Linux community wanted to avoid confusing it with
+ +POSIX Shared Memory and Secure Virtual Machines which were terms already in
+ +circulation.
+ +
+ +* What is a PASID?
+ +
+ +A Process Address Space ID (PASID) is a PCIe-defined Transaction Layer Packet
+ +(TLP) prefix. A PASID is a 20-bit number allocated and managed by the OS.
+ +PASID is included in all transactions between the platform and the device.
+ +
+ +* How are shared workqueues different?
+ +
+ +Traditionally, in order for userspace applications to interact with hardware,
+ +there is a separate hardware instance required per process. For example,
+ +consider doorbells as a mechanism of informing hardware about work to process.
+ +Each doorbell is required to be spaced 4k (or page-size) apart for process
+ +isolation. This requires hardware to provision that space and reserve it in
+ +MMIO. This doesn't scale as the number of threads becomes quite large. The
+ +hardware also manages the queue depth for Shared Work Queues (SWQ), and
+ +consumers don't need to track queue depth. If there is no space to accept
+ +a command, the device will return an error indicating retry.
+ +
+ +A user should check Deferrable Memory Write (DMWr) capability on the device
+ +and only submits ENQCMD when the device supports it. In the new DMWr PCIe
+ +terminology, devices need to support DMWr completer capability. In addition,
+ +it requires all switch ports to support DMWr routing and must be enabled by
+ +the PCIe subsystem, much like how PCIe atomic operations are managed for
+ +instance.
+ +
+ +SWQ allows hardware to provision just a single address in the device. When
+ +used with ENQCMD to submit work, the device can distinguish the process
+ +submitting the work since it will include the PASID assigned to that
+ +process. This helps the device scale to a large number of processes.
+ +
+ +* Is this the same as a user space device driver?
+ +
+ +Communicating with the device via the shared workqueue is much simpler
+ +than a full blown user space driver. The kernel driver does all the
+ +initialization of the hardware. User space only needs to worry about
+ +submitting work and processing completions.
+ +
+ +* Is this the same as SR-IOV?
+ +
+ +Single Root I/O Virtualization (SR-IOV) focuses on providing independent
+ +hardware interfaces for virtualizing hardware. Hence, it's required to be
+ +almost fully functional interface to software supporting the traditional
+ +BARs, space for interrupts via MSI-X, its own register layout.
+ +Virtual Functions (VFs) are assisted by the Physical Function (PF)
+ +driver.
+ +
+ +Scalable I/O Virtualization builds on the PASID concept to create device
+ +instances for virtualization. SIOV requires host software to assist in
+ +creating virtual devices; each virtual device is represented by a PASID
+ +along with the bus/device/function of the device.  This allows device
+ +hardware to optimize device resource creation and can grow dynamically on
+ +demand. SR-IOV creation and management is very static in nature. Consult
+ +references below for more details.
+ +
+ +* Why not just create a virtual function for each app?
+ +
+ +Creating PCIe SR-IOV type Virtual Functions (VF) is expensive. VFs require
+ +duplicated hardware for PCI config space and interrupts such as MSI-X.
+ +Resources such as interrupts have to be hard partitioned between VFs at
+ +creation time, and cannot scale dynamically on demand. The VFs are not
+ +completely independent from the Physical Function (PF). Most VFs require
+ +some communication and assistance from the PF driver. SIOV, in contrast,
+ +creates a software-defined device where all the configuration and control
+ +aspects are mediated via the slow path. The work submission and completion
+ +happen without any mediation.
+ +
+ +* Does this support virtualization?
+ +
+ +ENQCMD can be used from within a guest VM. In these cases, the VMM helps
+ +with setting up a translation table to translate from Guest PASID to Host
+ +PASID. Please consult the ENQCMD instruction set reference for more
+ +details.
+ +
+ +* Does memory need to be pinned?
+ +
+ +When devices support SVA along with platform hardware such as IOMMU
+ +supporting such devices, there is no need to pin memory for DMA purposes.
+ +Devices that support SVA also support other PCIe features that remove the
+ +pinning requirement for memory.
+ +
+ +Device TLB support - Device requests the IOMMU to lookup an address before
+ +use via Address Translation Service (ATS) requests.  If the mapping exists
+ +but there is no page allocated by the OS, IOMMU hardware returns that no
+ +mapping exists.
+ +
+ +Device requests the virtual address to be mapped via Page Request
+ +Interface (PRI). Once the OS has successfully completed the mapping, it
+ +returns the response back to the device. The device requests again for
+ +a translation and continues.
+ +
+ +IOMMU works with the OS in managing consistency of page-tables with the
+ +device. When removing pages, it interacts with the device to remove any
+ +device TLB entry that might have been cached before removing the mappings from
+ +the OS.
+ +
+ +References
+ +==========
+ +
+ +VT-D:
+ +https://01.org/blogs/ashokraj/2018/recent-enhancements-intel-virtualization-technology-directed-i/o-intel-vt-d
+ +
+ +SIOV:
+ +https://01.org/blogs/2019/assignable-interfaces-intel-scalable-i/o-virtualization-linux
+ +
+ +ENQCMD in ISE:
+ +https://software.intel.com/sites/default/files/managed/c5/15/architecture-instruction-set-extensions-programming-reference.pdf
+ +
+ +DSA spec:
+ +https://software.intel.com/sites/default/files/341204-intel-data-streaming-accelerator-spec.pdf
diff --combined arch/arm64/boot/dts/mediatek/mt8186.dtsi

index 78ff8ba5718e685f64d1a2db34e6888260749009,251eace411c02d7de32377c78b0462852e550633..5e83d4e9efa4103ed6e9ccb12571ac4b674c02c3
--- 1/arch/arm64/boot/dts/mediatek/mt8186.dtsi
--- 2/arch/arm64/boot/dts/mediatek/mt8186.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt8186.dtsi
@@@ -324,6 -324,7 +324,7 @@@
                 #address-cells = <2>;
                 #size-cells = <2>;
                 compatible = "simple-bus";
+               dma-ranges = <0x0 0x0 0x0 0x0 0x4 0x0>;
                 ranges;
   
                 gic: interrupt-controller@c000000 {
@@@ -1075,23 -1076,6 +1076,23 @@@
                         #clock-cells = <1>;
                 };
   
+ +              gpu: gpu@13040000 {
+ +                      compatible = "mediatek,mt8186-mali",
+ +                                   "arm,mali-bifrost";
+ +                      reg = <0 0x13040000 0 0x4000>;
+ +
+ +                      clocks = <&mfgsys CLK_MFG_BG3D>;
+ +                      interrupts = <GIC_SPI 276 IRQ_TYPE_LEVEL_HIGH 0>,
+ +                                   <GIC_SPI 275 IRQ_TYPE_LEVEL_HIGH 0>,
+ +                                   <GIC_SPI 274 IRQ_TYPE_LEVEL_HIGH 0>;
+ +                      interrupt-names = "job", "mmu", "gpu";
+ +                      power-domains = <&spm MT8186_POWER_DOMAIN_MFG2>,
+ +                                      <&spm MT8186_POWER_DOMAIN_MFG3>;
+ +                      power-domain-names = "core0", "core1";
+ +                      #cooling-cells = <2>;
+ +                      status = "disabled";
+ +              };
+ +
                 mmsys: syscon@14000000 {
                         compatible = "mediatek,mt8186-mmsys", "syscon";
                         reg = <0 0x14000000 0 0x1000>;
diff --combined arch/arm64/boot/dts/mediatek/mt8195.dtsi

index 8652f41403aee8cf01772fe0bee5f642bda41b0c,7d87cbabc9f192ba70e342341d5bea7fbc63dc8d..a44aae4ab95378dd31b5ec8c8acc79f411294ac9
--- 1/arch/arm64/boot/dts/mediatek/mt8195.dtsi
--- 2/arch/arm64/boot/dts/mediatek/mt8195.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt8195.dtsi
@@@ -14,8 -14,6 +14,8 @@@
   #include <dt-bindings/pinctrl/mt8195-pinfunc.h>
   #include <dt-bindings/power/mt8195-power.h>
   #include <dt-bindings/reset/mt8195-resets.h>
+ +#include <dt-bindings/thermal/thermal.h>
+ +#include <dt-bindings/thermal/mediatek,lvts-thermal.h>
   
   / {
         compatible = "mediatek,mt8195";
@@@ -26,22 -24,6 +26,22 @@@
         aliases {
                 gce0 = &gce0;
                 gce1 = &gce1;
+ +              ethdr0 = &ethdr0;
+ +              mutex0 = &mutex;
+ +              mutex1 = &mutex1;
+ +              merge1 = &merge1;
+ +              merge2 = &merge2;
+ +              merge3 = &merge3;
+ +              merge4 = &merge4;
+ +              merge5 = &merge5;
+ +              vdo1-rdma0 = &vdo1_rdma0;
+ +              vdo1-rdma1 = &vdo1_rdma1;
+ +              vdo1-rdma2 = &vdo1_rdma2;
+ +              vdo1-rdma3 = &vdo1_rdma3;
+ +              vdo1-rdma4 = &vdo1_rdma4;
+ +              vdo1-rdma5 = &vdo1_rdma5;
+ +              vdo1-rdma6 = &vdo1_rdma6;
+ +              vdo1-rdma7 = &vdo1_rdma7;
         };
   
         cpus {
@@@ -351,76 -333,6 +351,76 @@@
                 #performance-domain-cells = <1>;
         };
   
+ +      gpu_opp_table: opp-table-gpu {
+ +              compatible = "operating-points-v2";
+ +              opp-shared;
+ +
+ +              opp-390000000 {
+ +                      opp-hz = /bits/ 64 <390000000>;
+ +                      opp-microvolt = <625000>;
+ +              };
+ +              opp-410000000 {
+ +                      opp-hz = /bits/ 64 <410000000>;
+ +                      opp-microvolt = <631250>;
+ +              };
+ +              opp-431000000 {
+ +                      opp-hz = /bits/ 64 <431000000>;
+ +                      opp-microvolt = <631250>;
+ +              };
+ +              opp-473000000 {
+ +                      opp-hz = /bits/ 64 <473000000>;
+ +                      opp-microvolt = <637500>;
+ +              };
+ +              opp-515000000 {
+ +                      opp-hz = /bits/ 64 <515000000>;
+ +                      opp-microvolt = <637500>;
+ +              };
+ +              opp-556000000 {
+ +                      opp-hz = /bits/ 64 <556000000>;
+ +                      opp-microvolt = <643750>;
+ +              };
+ +              opp-598000000 {
+ +                      opp-hz = /bits/ 64 <598000000>;
+ +                      opp-microvolt = <650000>;
+ +              };
+ +              opp-640000000 {
+ +                      opp-hz = /bits/ 64 <640000000>;
+ +                      opp-microvolt = <650000>;
+ +              };
+ +              opp-670000000 {
+ +                      opp-hz = /bits/ 64 <670000000>;
+ +                      opp-microvolt = <662500>;
+ +              };
+ +              opp-700000000 {
+ +                      opp-hz = /bits/ 64 <700000000>;
+ +                      opp-microvolt = <675000>;
+ +              };
+ +              opp-730000000 {
+ +                      opp-hz = /bits/ 64 <730000000>;
+ +                      opp-microvolt = <687500>;
+ +              };
+ +              opp-760000000 {
+ +                      opp-hz = /bits/ 64 <760000000>;
+ +                      opp-microvolt = <700000>;
+ +              };
+ +              opp-790000000 {
+ +                      opp-hz = /bits/ 64 <790000000>;
+ +                      opp-microvolt = <712500>;
+ +              };
+ +              opp-820000000 {
+ +                      opp-hz = /bits/ 64 <820000000>;
+ +                      opp-microvolt = <725000>;
+ +              };
+ +              opp-850000000 {
+ +                      opp-hz = /bits/ 64 <850000000>;
+ +                      opp-microvolt = <737500>;
+ +              };
+ +              opp-880000000 {
+ +                      opp-hz = /bits/ 64 <880000000>;
+ +                      opp-microvolt = <750000>;
+ +              };
+ +      };
+ +
         pmu-a55 {
                 compatible = "arm,cortex-a55-pmu";
                 interrupt-parent = <&gic>;
@@@ -452,6 -364,7 +452,7 @@@
                 #size-cells = <2>;
                 compatible = "simple-bus";
                 ranges;
+               dma-ranges = <0x0 0x0 0x0 0x0 0x4 0x0>;
   
                 gic: interrupt-controller@c000000 {
                         compatible = "arm,gic-v3";
@@@ -534,9 -447,8 +535,9 @@@
   
                                         power-domain@MT8195_POWER_DOMAIN_MFG1 {
                                                 reg = <MT8195_POWER_DOMAIN_MFG1>;
- -                                              clocks = <&apmixedsys CLK_APMIXED_MFGPLL>;
- -                                              clock-names = "mfg";
+ +                                              clocks = <&apmixedsys CLK_APMIXED_MFGPLL>,
+ +                                                       <&topckgen CLK_TOP_MFG_CORE_TMP>;
+ +                                              clock-names = "mfg", "alt";
                                                 mediatek,infracfg = <&infracfg_ao>;
                                                 #address-cells = <1>;
                                                 #size-cells = <0>;
@@@ -1107,40 -1019,6 +1108,40 @@@
                         status = "disabled";
                 };
   
+ +              lvts_ap: thermal-sensor@1100b000 {
+ +                      compatible = "mediatek,mt8195-lvts-ap";
+ +                      reg = <0 0x1100b000 0 0x1000>;
+ +                      interrupts = <GIC_SPI 169 IRQ_TYPE_LEVEL_HIGH 0>;
+ +                      clocks = <&infracfg_ao CLK_INFRA_AO_THERM>;
+ +                      resets = <&infracfg_ao MT8195_INFRA_RST0_THERM_CTRL_SWRST>;
+ +                      nvmem-cells = <&lvts_efuse_data1 &lvts_efuse_data2>;
+ +                      nvmem-cell-names = "lvts-calib-data-1", "lvts-calib-data-2";
+ +                      #thermal-sensor-cells = <1>;
+ +              };
+ +
+ +              disp_pwm0: pwm@1100e000 {
+ +                      compatible = "mediatek,mt8195-disp-pwm", "mediatek,mt8183-disp-pwm";
+ +                      reg = <0 0x1100e000 0 0x1000>;
+ +                      interrupts = <GIC_SPI 203 IRQ_TYPE_LEVEL_LOW 0>;
+ +                      power-domains = <&spm MT8195_POWER_DOMAIN_VDOSYS0>;
+ +                      #pwm-cells = <2>;
+ +                      clocks = <&topckgen CLK_TOP_DISP_PWM0>,
+ +                               <&infracfg_ao CLK_INFRA_AO_DISP_PWM>;
+ +                      clock-names = "main", "mm";
+ +                      status = "disabled";
+ +              };
+ +
+ +              disp_pwm1: pwm@1100f000 {
+ +                      compatible = "mediatek,mt8195-disp-pwm", "mediatek,mt8183-disp-pwm";
+ +                      reg = <0 0x1100f000 0 0x1000>;
+ +                      interrupts = <GIC_SPI 793 IRQ_TYPE_LEVEL_HIGH 0>;
+ +                      #pwm-cells = <2>;
+ +                      clocks = <&topckgen CLK_TOP_DISP_PWM1>,
+ +                               <&infracfg_ao CLK_INFRA_AO_DISP_PWM1>;
+ +                      clock-names = "main", "mm";
+ +                      status = "disabled";
+ +              };
+ +
                 spi1: spi@11010000 {
                         compatible = "mediatek,mt8195-spi",
                                      "mediatek,mt6765-spi";
@@@ -1393,17 -1271,6 +1394,17 @@@
                         status = "disabled";
                 };
   
+ +              lvts_mcu: thermal-sensor@11278000 {
+ +                      compatible = "mediatek,mt8195-lvts-mcu";
+ +                      reg = <0 0x11278000 0 0x1000>;
+ +                      interrupts = <GIC_SPI 170 IRQ_TYPE_LEVEL_HIGH 0>;
+ +                      clocks = <&infracfg_ao CLK_INFRA_AO_THERM>;
+ +                      resets = <&infracfg_ao MT8195_INFRA_RST4_THERM_CTRL_MCU_SWRST>;
+ +                      nvmem-cells = <&lvts_efuse_data1 &lvts_efuse_data2>;
+ +                      nvmem-cell-names = "lvts-calib-data-1", "lvts-calib-data-2";
+ +                      #thermal-sensor-cells = <1>;
+ +              };
+ +
                 xhci1: usb@11290000 {
                         compatible = "mediatek,mt8195-xhci",
                                      "mediatek,mtk-xhci";
@@@ -1923,47 -1790,18 +1924,47 @@@
                         status = "disabled";
                 };
   
+ +              gpu: gpu@13000000 {
+ +                      compatible = "mediatek,mt8195-mali", "mediatek,mt8192-mali",
+ +                                   "arm,mali-valhall-jm";
+ +                      reg = <0 0x13000000 0 0x4000>;
+ +
+ +                      clocks = <&mfgcfg CLK_MFG_BG3D>;
+ +                      interrupts = <GIC_SPI 397 IRQ_TYPE_LEVEL_HIGH 0>,
+ +                                   <GIC_SPI 396 IRQ_TYPE_LEVEL_HIGH 0>,
+ +                                   <GIC_SPI 395 IRQ_TYPE_LEVEL_HIGH 0>;
+ +                      interrupt-names = "job", "mmu", "gpu";
+ +                      operating-points-v2 = <&gpu_opp_table>;
+ +                      power-domains = <&spm MT8195_POWER_DOMAIN_MFG2>,
+ +                                      <&spm MT8195_POWER_DOMAIN_MFG3>,
+ +                                      <&spm MT8195_POWER_DOMAIN_MFG4>,
+ +                                      <&spm MT8195_POWER_DOMAIN_MFG5>,
+ +                                      <&spm MT8195_POWER_DOMAIN_MFG6>;
+ +                      power-domain-names = "core0", "core1", "core2", "core3", "core4";
+ +                      status = "disabled";
+ +              };
+ +
                 mfgcfg: clock-controller@13fbf000 {
                         compatible = "mediatek,mt8195-mfgcfg";
                         reg = <0 0x13fbf000 0 0x1000>;
                         #clock-cells = <1>;
                 };
   
- -              vppsys0: clock-controller@14000000 {
- -                      compatible = "mediatek,mt8195-vppsys0";
+ +              vppsys0: syscon@14000000 {
+ +                      compatible = "mediatek,mt8195-vppsys0", "syscon";
                         reg = <0 0x14000000 0 0x1000>;
                         #clock-cells = <1>;
                 };
   
+ +              mutex@1400f000 {
+ +                      compatible = "mediatek,mt8195-vpp-mutex";
+ +                      reg = <0 0x1400f000 0 0x1000>;
+ +                      interrupts = <GIC_SPI 592 IRQ_TYPE_LEVEL_HIGH 0>;
+ +                      mediatek,gce-client-reg = <&gce1 SUBSYS_1400XXXX 0xf000 0x1000>;
+ +                      clocks = <&vppsys0 CLK_VPP0_MUTEX>;
+ +                      power-domains = <&spm MT8195_POWER_DOMAIN_VPPSYS0>;
+ +              };
+ +
                 smi_sub_common_vpp0_vpp1_2x1: smi@14010000 {
                         compatible = "mediatek,mt8195-smi-sub-common";
                         reg = <0 0x14010000 0 0x1000>;
@@@ -2063,21 -1901,12 +2064,21 @@@
                         power-domains = <&spm MT8195_POWER_DOMAIN_WPESYS>;
                 };
   
- -              vppsys1: clock-controller@14f00000 {
- -                      compatible = "mediatek,mt8195-vppsys1";
+ +              vppsys1: syscon@14f00000 {
+ +                      compatible = "mediatek,mt8195-vppsys1", "syscon";
                         reg = <0 0x14f00000 0 0x1000>;
                         #clock-cells = <1>;
                 };
   
+ +              mutex@14f01000 {
+ +                      compatible = "mediatek,mt8195-vpp-mutex";
+ +                      reg = <0 0x14f01000 0 0x1000>;
+ +                      interrupts = <GIC_SPI 635 IRQ_TYPE_LEVEL_HIGH 0>;
+ +                      mediatek,gce-client-reg = <&gce1 SUBSYS_14f0XXXX 0x1000 0x1000>;
+ +                      clocks = <&vppsys1 CLK_VPP1_DISP_MUTEX>;
+ +                      power-domains = <&spm MT8195_POWER_DOMAIN_VPPSYS1>;
+ +              };
+ +
                 larb5: larb@14f02000 {
                         compatible = "mediatek,mt8195-smi-larb";
                         reg = <0 0x14f02000 0 0x1000>;
@@@ -2471,7 -2300,6 +2472,6 @@@
                         power-domains = <&spm MT8195_POWER_DOMAIN_VENC>;
                         #address-cells = <2>;
                         #size-cells = <2>;
-                       dma-ranges = <0x1 0x0 0x0 0x40000000 0x0 0xfff00000>;
                 };
   
                 jpgdec-master {
@@@ -2483,7 -2311,6 +2483,6 @@@
                                  <&iommu_vdo M4U_PORT_L19_JPGDEC_BSDMA1>,
                                  <&iommu_vdo M4U_PORT_L19_JPGDEC_BUFF_OFFSET1>,
                                  <&iommu_vdo M4U_PORT_L19_JPGDEC_BUFF_OFFSET0>;
-                       dma-ranges = <0x1 0x0 0x0 0x40000000 0x0 0xfff00000>;
                         #address-cells = <2>;
                         #size-cells = <2>;
                         ranges;
@@@ -2555,7 -2382,6 +2554,6 @@@
                                         <&iommu_vpp M4U_PORT_L20_JPGENC_C_RDMA>,
                                         <&iommu_vpp M4U_PORT_L20_JPGENC_Q_TABLE>,
                                         <&iommu_vpp M4U_PORT_L20_JPGENC_BSDMA>;
-                       dma-ranges = <0x1 0x0 0x0 0x40000000 0x0 0xfff00000>;
                         #address-cells = <2>;
                         #size-cells = <2>;
                         ranges;
@@@ -2729,10 -2555,7 +2727,10 @@@
                 vdosys1: syscon@1c100000 {
                         compatible = "mediatek,mt8195-vdosys1", "syscon";
                         reg = <0 0x1c100000 0 0x1000>;
+ +                      mboxes = <&gce0 1 CMDQ_THR_PRIO_4>;
+ +                      mediatek,gce-client-reg = <&gce0 SUBSYS_1c10XXXX 0x0000 0x1000>;
                         #clock-cells = <1>;
+ +                      #reset-cells = <1>;
                 };
   
                 smi_common_vdo: smi@1c01b000 {
@@@ -2761,17 -2584,6 +2759,17 @@@
                         power-domains = <&spm MT8195_POWER_DOMAIN_VDOSYS0>;
                 };
   
+ +              mutex1: mutex@1c101000 {
+ +                      compatible = "mediatek,mt8195-disp-mutex";
+ +                      reg = <0 0x1c101000 0 0x1000>;
+ +                      reg-names = "vdo1_mutex";
+ +                      interrupts = <GIC_SPI 494 IRQ_TYPE_LEVEL_HIGH 0>;
+ +                      power-domains = <&spm MT8195_POWER_DOMAIN_VDOSYS1>;
+ +                      clocks = <&vdosys1 CLK_VDO1_DISP_MUTEX>;
+ +                      clock-names = "vdo1_mutex";
+ +                      mediatek,gce-events = <CMDQ_EVENT_VDO1_STREAM_DONE_ENG_0>;
+ +              };
+ +
                 larb2: larb@1c102000 {
                         compatible = "mediatek,mt8195-smi-larb";
                         reg = <0 0x1c102000 0 0x1000>;
@@@ -2796,151 -2608,6 +2794,151 @@@
                         power-domains = <&spm MT8195_POWER_DOMAIN_VDOSYS1>;
                 };
   
+ +              vdo1_rdma0: rdma@1c104000 {
+ +                      compatible = "mediatek,mt8195-vdo1-rdma";
+ +                      reg = <0 0x1c104000 0 0x1000>;
+ +                      interrupts = <GIC_SPI 495 IRQ_TYPE_LEVEL_HIGH 0>;
+ +                      clocks = <&vdosys1 CLK_VDO1_MDP_RDMA0>;
+ +                      power-domains = <&spm MT8195_POWER_DOMAIN_VDOSYS1>;
+ +                      iommus = <&iommu_vdo M4U_PORT_L2_MDP_RDMA0>;
+ +                      mediatek,gce-client-reg = <&gce0 SUBSYS_1c10XXXX 0x4000 0x1000>;
+ +              };
+ +
+ +              vdo1_rdma1: rdma@1c105000 {
+ +                      compatible = "mediatek,mt8195-vdo1-rdma";
+ +                      reg = <0 0x1c105000 0 0x1000>;
+ +                      interrupts = <GIC_SPI 496 IRQ_TYPE_LEVEL_HIGH 0>;
+ +                      clocks = <&vdosys1 CLK_VDO1_MDP_RDMA1>;
+ +                      power-domains = <&spm MT8195_POWER_DOMAIN_VDOSYS1>;
+ +                      iommus = <&iommu_vpp M4U_PORT_L3_MDP_RDMA1>;
+ +                      mediatek,gce-client-reg = <&gce0 SUBSYS_1c10XXXX 0x5000 0x1000>;
+ +              };
+ +
+ +              vdo1_rdma2: rdma@1c106000 {
+ +                      compatible = "mediatek,mt8195-vdo1-rdma";
+ +                      reg = <0 0x1c106000 0 0x1000>;
+ +                      interrupts = <GIC_SPI 497 IRQ_TYPE_LEVEL_HIGH 0>;
+ +                      clocks = <&vdosys1 CLK_VDO1_MDP_RDMA2>;
+ +                      power-domains = <&spm MT8195_POWER_DOMAIN_VDOSYS1>;
+ +                      iommus = <&iommu_vdo M4U_PORT_L2_MDP_RDMA2>;
+ +                      mediatek,gce-client-reg = <&gce0 SUBSYS_1c10XXXX 0x6000 0x1000>;
+ +              };
+ +
+ +              vdo1_rdma3: rdma@1c107000 {
+ +                      compatible = "mediatek,mt8195-vdo1-rdma";
+ +                      reg = <0 0x1c107000 0 0x1000>;
+ +                      interrupts = <GIC_SPI 498 IRQ_TYPE_LEVEL_HIGH 0>;
+ +                      clocks = <&vdosys1 CLK_VDO1_MDP_RDMA3>;
+ +                      power-domains = <&spm MT8195_POWER_DOMAIN_VDOSYS1>;
+ +                      iommus = <&iommu_vpp M4U_PORT_L3_MDP_RDMA3>;
+ +                      mediatek,gce-client-reg = <&gce0 SUBSYS_1c10XXXX 0x7000 0x1000>;
+ +              };
+ +
+ +              vdo1_rdma4: rdma@1c108000 {
+ +                      compatible = "mediatek,mt8195-vdo1-rdma";
+ +                      reg = <0 0x1c108000 0 0x1000>;
+ +                      interrupts = <GIC_SPI 499 IRQ_TYPE_LEVEL_HIGH 0>;
+ +                      clocks = <&vdosys1 CLK_VDO1_MDP_RDMA4>;
+ +                      power-domains = <&spm MT8195_POWER_DOMAIN_VDOSYS1>;
+ +                      iommus = <&iommu_vdo M4U_PORT_L2_MDP_RDMA4>;
+ +                      mediatek,gce-client-reg = <&gce0 SUBSYS_1c10XXXX 0x8000 0x1000>;
+ +              };
+ +
+ +              vdo1_rdma5: rdma@1c109000 {
+ +                      compatible = "mediatek,mt8195-vdo1-rdma";
+ +                      reg = <0 0x1c109000 0 0x1000>;
+ +                      interrupts = <GIC_SPI 500 IRQ_TYPE_LEVEL_HIGH 0>;
+ +                      clocks = <&vdosys1 CLK_VDO1_MDP_RDMA5>;
+ +                      power-domains = <&spm MT8195_POWER_DOMAIN_VDOSYS1>;
+ +                      iommus = <&iommu_vpp M4U_PORT_L3_MDP_RDMA5>;
+ +                      mediatek,gce-client-reg = <&gce0 SUBSYS_1c10XXXX 0x9000 0x1000>;
+ +              };
+ +
+ +              vdo1_rdma6: rdma@1c10a000 {
+ +                      compatible = "mediatek,mt8195-vdo1-rdma";
+ +                      reg = <0 0x1c10a000 0 0x1000>;
+ +                      interrupts = <GIC_SPI 501 IRQ_TYPE_LEVEL_HIGH 0>;
+ +                      clocks = <&vdosys1 CLK_VDO1_MDP_RDMA6>;
+ +                      power-domains = <&spm MT8195_POWER_DOMAIN_VDOSYS1>;
+ +                      iommus = <&iommu_vdo M4U_PORT_L2_MDP_RDMA6>;
+ +                      mediatek,gce-client-reg = <&gce0 SUBSYS_1c10XXXX 0xa000 0x1000>;
+ +              };
+ +
+ +              vdo1_rdma7: rdma@1c10b000 {
+ +                      compatible = "mediatek,mt8195-vdo1-rdma";
+ +                      reg = <0 0x1c10b000 0 0x1000>;
+ +                      interrupts = <GIC_SPI 502 IRQ_TYPE_LEVEL_HIGH 0>;
+ +                      clocks = <&vdosys1 CLK_VDO1_MDP_RDMA7>;
+ +                      power-domains = <&spm MT8195_POWER_DOMAIN_VDOSYS1>;
+ +                      iommus = <&iommu_vpp M4U_PORT_L3_MDP_RDMA7>;
+ +                      mediatek,gce-client-reg = <&gce0 SUBSYS_1c10XXXX 0xb000 0x1000>;
+ +              };
+ +
+ +              merge1: vpp-merge@1c10c000 {
+ +                      compatible = "mediatek,mt8195-disp-merge";
+ +                      reg = <0 0x1c10c000 0 0x1000>;
+ +                      interrupts = <GIC_SPI 503 IRQ_TYPE_LEVEL_HIGH 0>;
+ +                      clocks = <&vdosys1 CLK_VDO1_VPP_MERGE0>,
+ +                               <&vdosys1 CLK_VDO1_MERGE0_DL_ASYNC>;
+ +                      clock-names = "merge","merge_async";
+ +                      power-domains = <&spm MT8195_POWER_DOMAIN_VDOSYS1>;
+ +                      mediatek,gce-client-reg = <&gce0 SUBSYS_1c10XXXX 0xc000 0x1000>;
+ +                      mediatek,merge-mute = <1>;
+ +                      resets = <&vdosys1 MT8195_VDOSYS1_SW0_RST_B_MERGE0_DL_ASYNC>;
+ +              };
+ +
+ +              merge2: vpp-merge@1c10d000 {
+ +                      compatible = "mediatek,mt8195-disp-merge";
+ +                      reg = <0 0x1c10d000 0 0x1000>;
+ +                      interrupts = <GIC_SPI 504 IRQ_TYPE_LEVEL_HIGH 0>;
+ +                      clocks = <&vdosys1 CLK_VDO1_VPP_MERGE1>,
+ +                               <&vdosys1 CLK_VDO1_MERGE1_DL_ASYNC>;
+ +                      clock-names = "merge","merge_async";
+ +                      power-domains = <&spm MT8195_POWER_DOMAIN_VDOSYS1>;
+ +                      mediatek,gce-client-reg = <&gce0 SUBSYS_1c10XXXX 0xd000 0x1000>;
+ +                      mediatek,merge-mute = <1>;
+ +                      resets = <&vdosys1 MT8195_VDOSYS1_SW0_RST_B_MERGE1_DL_ASYNC>;
+ +              };
+ +
+ +              merge3: vpp-merge@1c10e000 {
+ +                      compatible = "mediatek,mt8195-disp-merge";
+ +                      reg = <0 0x1c10e000 0 0x1000>;
+ +                      interrupts = <GIC_SPI 505 IRQ_TYPE_LEVEL_HIGH 0>;
+ +                      clocks = <&vdosys1 CLK_VDO1_VPP_MERGE2>,
+ +                               <&vdosys1 CLK_VDO1_MERGE2_DL_ASYNC>;
+ +                      clock-names = "merge","merge_async";
+ +                      power-domains = <&spm MT8195_POWER_DOMAIN_VDOSYS1>;
+ +                      mediatek,gce-client-reg = <&gce0 SUBSYS_1c10XXXX 0xe000 0x1000>;
+ +                      mediatek,merge-mute = <1>;
+ +                      resets = <&vdosys1 MT8195_VDOSYS1_SW0_RST_B_MERGE2_DL_ASYNC>;
+ +              };
+ +
+ +              merge4: vpp-merge@1c10f000 {
+ +                      compatible = "mediatek,mt8195-disp-merge";
+ +                      reg = <0 0x1c10f000 0 0x1000>;
+ +                      interrupts = <GIC_SPI 506 IRQ_TYPE_LEVEL_HIGH 0>;
+ +                      clocks = <&vdosys1 CLK_VDO1_VPP_MERGE3>,
+ +                               <&vdosys1 CLK_VDO1_MERGE3_DL_ASYNC>;
+ +                      clock-names = "merge","merge_async";
+ +                      power-domains = <&spm MT8195_POWER_DOMAIN_VDOSYS1>;
+ +                      mediatek,gce-client-reg = <&gce0 SUBSYS_1c10XXXX 0xf000 0x1000>;
+ +                      mediatek,merge-mute = <1>;
+ +                      resets = <&vdosys1 MT8195_VDOSYS1_SW0_RST_B_MERGE3_DL_ASYNC>;
+ +              };
+ +
+ +              merge5: vpp-merge@1c110000 {
+ +                      compatible = "mediatek,mt8195-disp-merge";
+ +                      reg = <0 0x1c110000 0 0x1000>;
+ +                      interrupts = <GIC_SPI 507 IRQ_TYPE_LEVEL_HIGH 0>;
+ +                      clocks = <&vdosys1 CLK_VDO1_VPP_MERGE4>,
+ +                               <&vdosys1 CLK_VDO1_MERGE4_DL_ASYNC>;
+ +                      clock-names = "merge","merge_async";
+ +                      power-domains = <&spm MT8195_POWER_DOMAIN_VDOSYS1>;
+ +                      mediatek,gce-client-reg = <&gce0 SUBSYS_1c11XXXX 0x0000 0x1000>;
+ +                      mediatek,merge-fifo-en = <1>;
+ +                      resets = <&vdosys1 MT8195_VDOSYS1_SW0_RST_B_MERGE4_DL_ASYNC>;
+ +              };
+ +
                 dp_intf1: dp-intf@1c113000 {
                         compatible = "mediatek,mt8195-dp-intf";
                         reg = <0 0x1c113000 0 0x1000>;
@@@ -2953,54 -2620,6 +2951,54 @@@
                         status = "disabled";
                 };
   
+ +              ethdr0: hdr-engine@1c114000 {
+ +                      compatible = "mediatek,mt8195-disp-ethdr";
+ +                      reg = <0 0x1c114000 0 0x1000>,
+ +                            <0 0x1c115000 0 0x1000>,
+ +                            <0 0x1c117000 0 0x1000>,
+ +                            <0 0x1c119000 0 0x1000>,
+ +                            <0 0x1c11a000 0 0x1000>,
+ +                            <0 0x1c11b000 0 0x1000>,
+ +                            <0 0x1c11c000 0 0x1000>;
+ +                      reg-names = "mixer", "vdo_fe0", "vdo_fe1", "gfx_fe0", "gfx_fe1",
+ +                                  "vdo_be", "adl_ds";
+ +                      mediatek,gce-client-reg = <&gce0 SUBSYS_1c11XXXX 0x4000 0x1000>,
+ +                                                <&gce0 SUBSYS_1c11XXXX 0x5000 0x1000>,
+ +                                                <&gce0 SUBSYS_1c11XXXX 0x7000 0x1000>,
+ +                                                <&gce0 SUBSYS_1c11XXXX 0x9000 0x1000>,
+ +                                                <&gce0 SUBSYS_1c11XXXX 0xa000 0x1000>,
+ +                                                <&gce0 SUBSYS_1c11XXXX 0xb000 0x1000>,
+ +                                                <&gce0 SUBSYS_1c11XXXX 0xc000 0x1000>;
+ +                      clocks = <&vdosys1 CLK_VDO1_DISP_MIXER>,
+ +                               <&vdosys1 CLK_VDO1_HDR_VDO_FE0>,
+ +                               <&vdosys1 CLK_VDO1_HDR_VDO_FE1>,
+ +                               <&vdosys1 CLK_VDO1_HDR_GFX_FE0>,
+ +                               <&vdosys1 CLK_VDO1_HDR_GFX_FE1>,
+ +                               <&vdosys1 CLK_VDO1_HDR_VDO_BE>,
+ +                               <&vdosys1 CLK_VDO1_26M_SLOW>,
+ +                               <&vdosys1 CLK_VDO1_HDR_VDO_FE0_DL_ASYNC>,
+ +                               <&vdosys1 CLK_VDO1_HDR_VDO_FE1_DL_ASYNC>,
+ +                               <&vdosys1 CLK_VDO1_HDR_GFX_FE0_DL_ASYNC>,
+ +                               <&vdosys1 CLK_VDO1_HDR_GFX_FE1_DL_ASYNC>,
+ +                               <&vdosys1 CLK_VDO1_HDR_VDO_BE_DL_ASYNC>,
+ +                               <&topckgen CLK_TOP_ETHDR>;
+ +                      clock-names = "mixer", "vdo_fe0", "vdo_fe1", "gfx_fe0", "gfx_fe1",
+ +                                    "vdo_be", "adl_ds", "vdo_fe0_async", "vdo_fe1_async",
+ +                                    "gfx_fe0_async", "gfx_fe1_async","vdo_be_async",
+ +                                    "ethdr_top";
+ +                      power-domains = <&spm MT8195_POWER_DOMAIN_VDOSYS1>;
+ +                      iommus = <&iommu_vpp M4U_PORT_L3_HDR_DS>,
+ +                               <&iommu_vpp M4U_PORT_L3_HDR_ADL>;
+ +                      interrupts = <GIC_SPI 517 IRQ_TYPE_LEVEL_HIGH 0>; /* disp mixer */
+ +                      resets = <&vdosys1 MT8195_VDOSYS1_SW1_RST_B_HDR_VDO_FE0_DL_ASYNC>,
+ +                               <&vdosys1 MT8195_VDOSYS1_SW1_RST_B_HDR_VDO_FE1_DL_ASYNC>,
+ +                               <&vdosys1 MT8195_VDOSYS1_SW1_RST_B_HDR_GFX_FE0_DL_ASYNC>,
+ +                               <&vdosys1 MT8195_VDOSYS1_SW1_RST_B_HDR_GFX_FE1_DL_ASYNC>,
+ +                               <&vdosys1 MT8195_VDOSYS1_SW1_RST_B_HDR_VDO_BE_DL_ASYNC>;
+ +                      reset-names = "vdo_fe0_async", "vdo_fe1_async", "gfx_fe0_async",
+ +                                    "gfx_fe1_async", "vdo_be_async";
+ +              };
+ +
                 edp_tx: edp-tx@1c500000 {
                         compatible = "mediatek,mt8195-edp-tx";
                         reg = <0 0x1c500000 0 0x8000>;
@@@ -3023,246 -2642,4 +3021,246 @@@
                         status = "disabled";
                 };
         };
+ +
+ +      thermal_zones: thermal-zones {
+ +              cpu0-thermal {
+ +                      polling-delay = <1000>;
+ +                      polling-delay-passive = <250>;
+ +                      thermal-sensors = <&lvts_mcu MT8195_MCU_LITTLE_CPU0>;
+ +
+ +                      trips {
+ +                              cpu0_alert: trip-alert {
+ +                                      temperature = <85000>;
+ +                                      hysteresis = <2000>;
+ +                                      type = "passive";
+ +                              };
+ +
+ +                              cpu0_crit: trip-crit {
+ +                                      temperature = <100000>;
+ +                                      hysteresis = <2000>;
+ +                                      type = "critical";
+ +                              };
+ +                      };
+ +
+ +                      cooling-maps {
+ +                              map0 {
+ +                                      trip = <&cpu0_alert>;
+ +                                      cooling-device = <&cpu0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+ +                                                              <&cpu1 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+ +                                                              <&cpu2 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+ +                                                              <&cpu3 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
+ +                              };
+ +                      };
+ +              };
+ +
+ +              cpu1-thermal {
+ +                      polling-delay = <1000>;
+ +                      polling-delay-passive = <250>;
+ +                      thermal-sensors = <&lvts_mcu MT8195_MCU_LITTLE_CPU1>;
+ +
+ +                      trips {
+ +                              cpu1_alert: trip-alert {
+ +                                      temperature = <85000>;
+ +                                      hysteresis = <2000>;
+ +                                      type = "passive";
+ +                              };
+ +
+ +                              cpu1_crit: trip-crit {
+ +                                      temperature = <100000>;
+ +                                      hysteresis = <2000>;
+ +                                      type = "critical";
+ +                              };
+ +                      };
+ +
+ +                      cooling-maps {
+ +                              map0 {
+ +                                      trip = <&cpu1_alert>;
+ +                                      cooling-device = <&cpu0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+ +                                                              <&cpu1 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+ +                                                              <&cpu2 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+ +                                                              <&cpu3 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
+ +                              };
+ +                      };
+ +              };
+ +
+ +              cpu2-thermal {
+ +                      polling-delay = <1000>;
+ +                      polling-delay-passive = <250>;
+ +                      thermal-sensors = <&lvts_mcu MT8195_MCU_LITTLE_CPU2>;
+ +
+ +                      trips {
+ +                              cpu2_alert: trip-alert {
+ +                                      temperature = <85000>;
+ +                                      hysteresis = <2000>;
+ +                                      type = "passive";
+ +                              };
+ +
+ +                              cpu2_crit: trip-crit {
+ +                                      temperature = <100000>;
+ +                                      hysteresis = <2000>;
+ +                                      type = "critical";
+ +                              };
+ +                      };
+ +
+ +                      cooling-maps {
+ +                              map0 {
+ +                                      trip = <&cpu2_alert>;
+ +                                      cooling-device = <&cpu0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+ +                                                              <&cpu1 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+ +                                                              <&cpu2 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+ +                                                              <&cpu3 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
+ +                              };
+ +                      };
+ +              };
+ +
+ +              cpu3-thermal {
+ +                      polling-delay = <1000>;
+ +                      polling-delay-passive = <250>;
+ +                      thermal-sensors = <&lvts_mcu MT8195_MCU_LITTLE_CPU3>;
+ +
+ +                      trips {
+ +                              cpu3_alert: trip-alert {
+ +                                      temperature = <85000>;
+ +                                      hysteresis = <2000>;
+ +                                      type = "passive";
+ +                              };
+ +
+ +                              cpu3_crit: trip-crit {
+ +                                      temperature = <100000>;
+ +                                      hysteresis = <2000>;
+ +                                      type = "critical";
+ +                              };
+ +                      };
+ +
+ +                      cooling-maps {
+ +                              map0 {
+ +                                      trip = <&cpu3_alert>;
+ +                                      cooling-device = <&cpu0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+ +                                                              <&cpu1 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+ +                                                              <&cpu2 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+ +                                                              <&cpu3 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
+ +                              };
+ +                      };
+ +              };
+ +
+ +              cpu4-thermal {
+ +                      polling-delay = <1000>;
+ +                      polling-delay-passive = <250>;
+ +                      thermal-sensors = <&lvts_mcu MT8195_MCU_BIG_CPU0>;
+ +
+ +                      trips {
+ +                              cpu4_alert: trip-alert {
+ +                                      temperature = <85000>;
+ +                                      hysteresis = <2000>;
+ +                                      type = "passive";
+ +                              };
+ +
+ +                              cpu4_crit: trip-crit {
+ +                                      temperature = <100000>;
+ +                                      hysteresis = <2000>;
+ +                                      type = "critical";
+ +                              };
+ +                      };
+ +
+ +                      cooling-maps {
+ +                              map0 {
+ +                                      trip = <&cpu4_alert>;
+ +                                      cooling-device = <&cpu4 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+ +                                                              <&cpu5 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+ +                                                              <&cpu6 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+ +                                                              <&cpu7 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
+ +                              };
+ +                      };
+ +              };
+ +
+ +              cpu5-thermal {
+ +                      polling-delay = <1000>;
+ +                      polling-delay-passive = <250>;
+ +                      thermal-sensors = <&lvts_mcu MT8195_MCU_BIG_CPU1>;
+ +
+ +                      trips {
+ +                              cpu5_alert: trip-alert {
+ +                                      temperature = <85000>;
+ +                                      hysteresis = <2000>;
+ +                                      type = "passive";
+ +                              };
+ +
+ +                              cpu5_crit: trip-crit {
+ +                                      temperature = <100000>;
+ +                                      hysteresis = <2000>;
+ +                                      type = "critical";
+ +                              };
+ +                      };
+ +
+ +                      cooling-maps {
+ +                              map0 {
+ +                                      trip = <&cpu5_alert>;
+ +                                      cooling-device = <&cpu4 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+ +                                                              <&cpu5 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+ +                                                              <&cpu6 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+ +                                                              <&cpu7 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
+ +                              };
+ +                      };
+ +              };
+ +
+ +              cpu6-thermal {
+ +                      polling-delay = <1000>;
+ +                      polling-delay-passive = <250>;
+ +                      thermal-sensors = <&lvts_mcu MT8195_MCU_BIG_CPU2>;
+ +
+ +                      trips {
+ +                              cpu6_alert: trip-alert {
+ +                                      temperature = <85000>;
+ +                                      hysteresis = <2000>;
+ +                                      type = "passive";
+ +                              };
+ +
+ +                              cpu6_crit: trip-crit {
+ +                                      temperature = <100000>;
+ +                                      hysteresis = <2000>;
+ +                                      type = "critical";
+ +                              };
+ +                      };
+ +
+ +                      cooling-maps {
+ +                              map0 {
+ +                                      trip = <&cpu6_alert>;
+ +                                      cooling-device = <&cpu4 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+ +                                                              <&cpu5 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+ +                                                              <&cpu6 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+ +                                                              <&cpu7 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
+ +                              };
+ +                      };
+ +              };
+ +
+ +              cpu7-thermal {
+ +                      polling-delay = <1000>;
+ +                      polling-delay-passive = <250>;
+ +                      thermal-sensors = <&lvts_mcu MT8195_MCU_BIG_CPU3>;
+ +
+ +                      trips {
+ +                              cpu7_alert: trip-alert {
+ +                                      temperature = <85000>;
+ +                                      hysteresis = <2000>;
+ +                                      type = "passive";
+ +                              };
+ +
+ +                              cpu7_crit: trip-crit {
+ +                                      temperature = <100000>;
+ +                                      hysteresis = <2000>;
+ +                                      type = "critical";
+ +                              };
+ +                      };
+ +
+ +                      cooling-maps {
+ +                              map0 {
+ +                                      trip = <&cpu7_alert>;
+ +                                      cooling-device = <&cpu4 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+ +                                                              <&cpu5 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+ +                                                              <&cpu6 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+ +                                                              <&cpu7 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
+ +                              };
+ +                      };
+ +              };
+ +      };
   };
diff --combined arch/x86/kernel/process_64.c

index 223b223f713ff1dac8afbb572f62aa988d1a6747,bb65a68b4b49968c6d33ea9324b37c3a242e1458..3d181c16a2f67fb5f93dc221d7583b145f576578
--- 1/arch/x86/kernel/process_64.c
--- 2/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@@ -39,6 -39,6 +39,7 @@@
   #include <linux/io.h>
   #include <linux/ftrace.h>
   #include <linux/syscalls.h>
++#include <linux/iommu.h>
   
   #include <asm/processor.h>
   #include <asm/pkru.h>
@@@ -671,7 -671,7 +672,7 @@@ void set_personality_64bit(void
         task_pt_regs(current)->orig_ax = __NR_execve;
         current_thread_info()->status &= ~TS_COMPAT;
         if (current->mm)
- -              current->mm->context.flags = MM_CONTEXT_HAS_VSYSCALL;
+ +              __set_bit(MM_CONTEXT_HAS_VSYSCALL, &current->mm->context.flags);
   
         /* TBD: overwrites user setup. Should have two bits.
            But 64bit processes have always behaved this way,
@@@ -708,7 -708,7 +709,7 @@@ static void __set_personality_ia32(void
                  * uprobes applied to this MM need to know this and
                  * cannot use user_64bit_mode() at that time.
                  */
- -              current->mm->context.flags = MM_CONTEXT_UPROBE_IA32;
+ +              __set_bit(MM_CONTEXT_UPROBE_IA32, &current->mm->context.flags);
         }
   
         current->personality |= force_personality32;
@@@ -743,52 -743,6 +744,52 @@@ static long prctl_map_vdso(const struc
   }
   #endif
   
+ +#ifdef CONFIG_ADDRESS_MASKING
+ +
+ +#define LAM_U57_BITS 6
+ +
+ +static int prctl_enable_tagged_addr(struct mm_struct *mm, unsigned long nr_bits)
+ +{
+ +      if (!cpu_feature_enabled(X86_FEATURE_LAM))
+ +              return -ENODEV;
+ +
+ +      /* PTRACE_ARCH_PRCTL */
+ +      if (current->mm != mm)
+ +              return -EINVAL;
+ +
+ +      if (mm_valid_pasid(mm) &&
+ +          !test_bit(MM_CONTEXT_FORCE_TAGGED_SVA, &mm->context.flags))
+ +              return -EINVAL;
+ +
+ +      if (mmap_write_lock_killable(mm))
+ +              return -EINTR;
+ +
+ +      if (test_bit(MM_CONTEXT_LOCK_LAM, &mm->context.flags)) {
+ +              mmap_write_unlock(mm);
+ +              return -EBUSY;
+ +      }
+ +
+ +      if (!nr_bits) {
+ +              mmap_write_unlock(mm);
+ +              return -EINVAL;
+ +      } else if (nr_bits <= LAM_U57_BITS) {
+ +              mm->context.lam_cr3_mask = X86_CR3_LAM_U57;
+ +              mm->context.untag_mask =  ~GENMASK(62, 57);
+ +      } else {
+ +              mmap_write_unlock(mm);
+ +              return -EINVAL;
+ +      }
+ +
+ +      write_cr3(__read_cr3() | mm->context.lam_cr3_mask);
+ +      set_tlbstate_lam_mode(mm);
+ +      set_bit(MM_CONTEXT_LOCK_LAM, &mm->context.flags);
+ +
+ +      mmap_write_unlock(mm);
+ +
+ +      return 0;
+ +}
+ +#endif
+ +
   long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2)
   {
         int ret = 0;
@@@ -876,23 -830,7 +877,23 @@@
         case ARCH_MAP_VDSO_64:
                 return prctl_map_vdso(&vdso_image_64, arg2);
   #endif
- -
+ +#ifdef CONFIG_ADDRESS_MASKING
+ +      case ARCH_GET_UNTAG_MASK:
+ +              return put_user(task->mm->context.untag_mask,
+ +                              (unsigned long __user *)arg2);
+ +      case ARCH_ENABLE_TAGGED_ADDR:
+ +              return prctl_enable_tagged_addr(task->mm, arg2);
+ +      case ARCH_FORCE_TAGGED_SVA:
+ +              if (current != task)
+ +                      return -EINVAL;
+ +              set_bit(MM_CONTEXT_FORCE_TAGGED_SVA, &task->mm->context.flags);
+ +              return 0;
+ +      case ARCH_GET_MAX_TAG_BITS:
+ +              if (!cpu_feature_enabled(X86_FEATURE_LAM))
+ +                      return put_user(0, (unsigned long __user *)arg2);
+ +              else
+ +                      return put_user(LAM_U57_BITS, (unsigned long __user *)arg2);
+ +#endif
         default:
                 ret = -EINVAL;
                 break;
diff --combined arch/x86/kernel/traps.c

index 8b83d8fbce718024e284d3eb991099f0c6e24638,492a60febb1119dcb80e29d5dd9f4b6d6914e86c..58b1f208eff5186aae3b08825f1d0581b47ba275
--- 1/arch/x86/kernel/traps.c
--- 2/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@@ -40,7 -40,7 +40,7 @@@
   #include <linux/io.h>
   #include <linux/hardirq.h>
   #include <linux/atomic.h>
- #include <linux/ioasid.h>
+ #include <linux/iommu.h>
   
   #include <asm/stacktrace.h>
   #include <asm/processor.h>
@@@ -671,15 -671,15 +671,15 @@@ static bool try_fixup_enqcmd_gp(void
         if (!cpu_feature_enabled(X86_FEATURE_ENQCMD))
                 return false;
   
- -      pasid = current->mm->pasid;
- -
         /*
          * If the mm has not been allocated a
          * PASID, the #GP can not be fixed up.
          */
- -      if (!pasid_valid(pasid))
+ +      if (!mm_valid_pasid(current->mm))
                 return false;
   
+ +      pasid = current->mm->pasid;
+ +
         /*
          * Did this thread already have its PASID activated?
          * If so, the #GP must be from something else.
diff --combined drivers/iommu/intel/Kconfig

index 12e1e90fdae138647eab7c004e080fc34fa5da5b,12e1e90fdae138647eab7c004e080fc34fa5da5b..2e56bd79f589d30c2787e695b5c93750fcb480e0
--- 1/drivers/iommu/intel/Kconfig
--- 2/drivers/iommu/intel/Kconfig
+++ b/drivers/iommu/intel/Kconfig
@@@ -18,7 -18,7 +18,6 @@@ config INTEL_IOMM
         select NEED_DMA_MAP_STATE
         select DMAR_TABLE
         select SWIOTLB
--      select IOASID
         select PCI_ATS
         select PCI_PRI
         select PCI_PASID
diff --combined drivers/iommu/iommu-sva.c

index dd76a1a09cf708ce7f92ed8396531e0d20ec89d2,c434b95dc8ebb1fd65f308df50a8b192331f65f3..9821bc44f5ac1d4d5e85175f57963081734464fe
--- 1/drivers/iommu/iommu-sva.c
--- 2/drivers/iommu/iommu-sva.c
+++ b/drivers/iommu/iommu-sva.c
@@@ -2,7 -2,6 +2,7 @@@
   /*
    * Helpers for IOMMU drivers implementing SVA
    */
+ +#include <linux/mmu_context.h>
   #include <linux/mutex.h>
   #include <linux/sched/mm.h>
   #include <linux/iommu.h>
@@@ -10,70 -9,34 +10,38 @@@
   #include "iommu-sva.h"
   
   static DEFINE_MUTEX(iommu_sva_lock);
- static DECLARE_IOASID_SET(iommu_sva_pasid);
+ static DEFINE_IDA(iommu_global_pasid_ida);
   
- /**
-  * iommu_sva_alloc_pasid - Allocate a PASID for the mm
-  * @mm: the mm
-  * @min: minimum PASID value (inclusive)
-  * @max: maximum PASID value (inclusive)
-  *
-  * Try to allocate a PASID for this mm, or take a reference to the existing one
-  * provided it fits within the [@min, @max] range. On success the PASID is
-  * available in mm->pasid and will be available for the lifetime of the mm.
-  *
-  * Returns 0 on success and < 0 on error.
-  */
- int iommu_sva_alloc_pasid(struct mm_struct *mm, ioasid_t min, ioasid_t max)
+ /* Allocate a PASID for the mm within range (inclusive) */
+ static int iommu_sva_alloc_pasid(struct mm_struct *mm, ioasid_t min, ioasid_t max)
   {
         int ret = 0;
-       ioasid_t pasid;
   
-       if (min == INVALID_IOASID || max == INVALID_IOASID ||
- -      if (!pasid_valid(min) || !pasid_valid(max) ||
++      if (min == IOMMU_PASID_INVALID ||
++          max == IOMMU_PASID_INVALID ||
             min == 0 || max < min)
                 return -EINVAL;
   
+ +      if (!arch_pgtable_dma_compat(mm))
+ +              return -EBUSY;
+ +
         mutex_lock(&iommu_sva_lock);
         /* Is a PASID already associated with this mm? */
- -      if (pasid_valid(mm->pasid)) {
+ +      if (mm_valid_pasid(mm)) {
-               if (mm->pasid < min || mm->pasid >= max)
+               if (mm->pasid < min || mm->pasid > max)
                         ret = -EOVERFLOW;
                 goto out;
         }
   
-       pasid = ioasid_alloc(&iommu_sva_pasid, min, max, mm);
-       if (pasid == INVALID_IOASID)
-               ret = -ENOMEM;
-       else
-               mm_pasid_set(mm, pasid);
+       ret = ida_alloc_range(&iommu_global_pasid_ida, min, max, GFP_KERNEL);
+       if (ret < min)
+               goto out;
+       mm->pasid = ret;
+       ret = 0;
   out:
         mutex_unlock(&iommu_sva_lock);
         return ret;
   }
- EXPORT_SYMBOL_GPL(iommu_sva_alloc_pasid);
- 
- /* ioasid_find getter() requires a void * argument */
- static bool __mmget_not_zero(void *mm)
- {
-       return mmget_not_zero(mm);
- }
- 
- /**
-  * iommu_sva_find() - Find mm associated to the given PASID
-  * @pasid: Process Address Space ID assigned to the mm
-  *
-  * On success a reference to the mm is taken, and must be released with mmput().
-  *
-  * Returns the mm corresponding to this PASID, or an error if not found.
-  */
- struct mm_struct *iommu_sva_find(ioasid_t pasid)
- {
-       return ioasid_find(&iommu_sva_pasid, pasid, __mmget_not_zero);
- }
- EXPORT_SYMBOL_GPL(iommu_sva_find);
   
   /**
    * iommu_sva_bind_device() - Bind a process address space to a device
@@@ -242,3 -205,11 +210,11 @@@ out_put_mm
   
         return status;
   }
- -      if (likely(!pasid_valid(mm->pasid)))
+ 
+ void mm_pasid_drop(struct mm_struct *mm)
+ {
++      if (likely(!mm_valid_pasid(mm)))
+               return;
+ 
+       ida_free(&iommu_global_pasid_ida, mm->pasid);
+ }
diff --combined drivers/iommu/iommu.c

index 807c98de40d4f681045ef03af4e0f805642350a3,153a3dab568c63a75bb71f6830b85846a1e59992..f1dcfa3f1a1b48ab8a48bb6976f524e3dc4c29e0
--- 1/drivers/iommu/iommu.c
--- 2/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@@ -28,7 -28,6 +28,7 @@@
   #include <linux/fsl/mc.h>
   #include <linux/module.h>
   #include <linux/cc_platform.h>
+ +#include <linux/cdx/cdx_bus.h>
   #include <trace/events/iommu.h>
   #include <linux/sched/mm.h>
   #include <linux/msi.h>
@@@ -88,9 -87,10 +88,10 @@@ static const char * const iommu_group_r
   
   static int iommu_bus_notifier(struct notifier_block *nb,
                               unsigned long action, void *data);
+ static void iommu_release_device(struct device *dev);
   static int iommu_alloc_default_domain(struct iommu_group *group,
                                       struct device *dev);
- -static struct iommu_domain *__iommu_domain_alloc(struct bus_type *bus,
+ +static struct iommu_domain *__iommu_domain_alloc(const struct bus_type *bus,
                                                  unsigned type);
   static int __iommu_attach_device(struct iommu_domain *domain,
                                  struct device *dev);
@@@ -130,9 -130,6 +131,9 @@@ static struct bus_type * const iommu_bu
   #ifdef CONFIG_TEGRA_HOST1X_CONTEXT_BUS
         &host1x_context_device_bus_type,
   #endif
+ +#ifdef CONFIG_CDX_BUS
+ +      &cdx_bus_type,
+ +#endif
   };
   
   /*
@@@ -457,20 -454,86 +458,86 @@@ err_out
   
   }
   
- void iommu_release_device(struct device *dev)
+ /*
+  * Remove a device from a group's device list and return the group device
+  * if successful.
+  */
+ static struct group_device *
+ __iommu_group_remove_device(struct iommu_group *group, struct device *dev)
   {
+       struct group_device *device;
+ 
+       lockdep_assert_held(&group->mutex);
+       list_for_each_entry(device, &group->devices, list) {
+               if (device->dev == dev) {
+                       list_del(&device->list);
+                       return device;
+               }
+       }
+ 
+       return NULL;
+ }
+ 
+ /*
+  * Release a device from its group and decrements the iommu group reference
+  * count.
+  */
+ static void __iommu_group_release_device(struct iommu_group *group,
+                                        struct group_device *grp_dev)
+ {
+       struct device *dev = grp_dev->dev;
+ 
+       sysfs_remove_link(group->devices_kobj, grp_dev->name);
+       sysfs_remove_link(&dev->kobj, "iommu_group");
+ 
+       trace_remove_device_from_group(group->id, dev);
+ 
+       kfree(grp_dev->name);
+       kfree(grp_dev);
+       dev->iommu_group = NULL;
+       kobject_put(group->devices_kobj);
+ }
+ 
+ static void iommu_release_device(struct device *dev)
+ {
+       struct iommu_group *group = dev->iommu_group;
+       struct group_device *device;
         const struct iommu_ops *ops;
   
-       if (!dev->iommu)
+       if (!dev->iommu || !group)
                 return;
   
         iommu_device_unlink(dev->iommu->iommu_dev, dev);
   
+       mutex_lock(&group->mutex);
+       device = __iommu_group_remove_device(group, dev);
+ 
+       /*
+        * If the group has become empty then ownership must have been released,
+        * and the current domain must be set back to NULL or the default
+        * domain.
+        */
+       if (list_empty(&group->devices))
+               WARN_ON(group->owner_cnt ||
+                       group->domain != group->default_domain);
+ 
+       /*
+        * release_device() must stop using any attached domain on the device.
+        * If there are still other devices in the group they are not effected
+        * by this callback.
+        *
+        * The IOMMU driver must set the device to either an identity or
+        * blocking translation and stop using any domain pointer, as it is
+        * going to be freed.
+        */
         ops = dev_iommu_ops(dev);
         if (ops->release_device)
                 ops->release_device(dev);
+       mutex_unlock(&group->mutex);
+ 
+       if (device)
+               __iommu_group_release_device(group, device);
   
-       iommu_group_remove_device(dev);
         module_put(ops->owner);
         dev_iommu_free(dev);
   }
@@@ -554,7 -617,7 +621,7 @@@ static void iommu_group_remove_file(str
   
   static ssize_t iommu_group_show_name(struct iommu_group *group, char *buf)
   {
-       return sprintf(buf, "%s\n", group->name);
+       return sysfs_emit(buf, "%s\n", group->name);
   }
   
   /**
@@@ -667,52 -730,51 +734,51 @@@ static ssize_t iommu_group_show_resv_re
   {
         struct iommu_resv_region *region, *next;
         struct list_head group_resv_regions;
-       char *str = buf;
+       int offset = 0;
   
         INIT_LIST_HEAD(&group_resv_regions);
         iommu_get_group_resv_regions(group, &group_resv_regions);
   
         list_for_each_entry_safe(region, next, &group_resv_regions, list) {
-               str += sprintf(str, "0x%016llx 0x%016llx %s\n",
-                              (long long int)region->start,
-                              (long long int)(region->start +
-                                               region->length - 1),
-                              iommu_group_resv_type_string[region->type]);
+               offset += sysfs_emit_at(buf, offset, "0x%016llx 0x%016llx %s\n",
+                                       (long long)region->start,
+                                       (long long)(region->start +
+                                                   region->length - 1),
+                                       iommu_group_resv_type_string[region->type]);
                 kfree(region);
         }
   
-       return (str - buf);
+       return offset;
   }
   
   static ssize_t iommu_group_show_type(struct iommu_group *group,
                                      char *buf)
   {
-       char *type = "unknown\n";
+       char *type = "unknown";
   
         mutex_lock(&group->mutex);
         if (group->default_domain) {
                 switch (group->default_domain->type) {
                 case IOMMU_DOMAIN_BLOCKED:
-                       type = "blocked\n";
+                       type = "blocked";
                         break;
                 case IOMMU_DOMAIN_IDENTITY:
-                       type = "identity\n";
+                       type = "identity";
                         break;
                 case IOMMU_DOMAIN_UNMANAGED:
-                       type = "unmanaged\n";
+                       type = "unmanaged";
                         break;
                 case IOMMU_DOMAIN_DMA:
-                       type = "DMA\n";
+                       type = "DMA";
                         break;
                 case IOMMU_DOMAIN_DMA_FQ:
-                       type = "DMA-FQ\n";
+                       type = "DMA-FQ";
                         break;
                 }
         }
         mutex_unlock(&group->mutex);
-       strcpy(buf, type);
   
-       return strlen(type);
+       return sysfs_emit(buf, "%s\n", type);
   }
   
   static IOMMU_GROUP_ATTR(name, S_IRUGO, iommu_group_show_name, NULL);
@@@ -743,7 -805,7 +809,7 @@@ static void iommu_group_release(struct 
         kfree(group);
   }
   
- static struct kobj_type iommu_group_ktype = {
+ static const struct kobj_type iommu_group_ktype = {
         .sysfs_ops = &iommu_group_sysfs_ops,
         .release = iommu_group_release,
   };
@@@ -820,35 -882,6 +886,6 @@@ struct iommu_group *iommu_group_alloc(v
   }
   EXPORT_SYMBOL_GPL(iommu_group_alloc);
   
- struct iommu_group *iommu_group_get_by_id(int id)
- {
-       struct kobject *group_kobj;
-       struct iommu_group *group;
-       const char *name;
- 
-       if (!iommu_group_kset)
-               return NULL;
- 
-       name = kasprintf(GFP_KERNEL, "%d", id);
-       if (!name)
-               return NULL;
- 
-       group_kobj = kset_find_obj(iommu_group_kset, name);
-       kfree(name);
- 
-       if (!group_kobj)
-               return NULL;
- 
-       group = container_of(group_kobj, struct iommu_group, kobj);
-       BUG_ON(group->id != id);
- 
-       kobject_get(group->devices_kobj);
-       kobject_put(&group->kobj);
- 
-       return group;
- }
- EXPORT_SYMBOL_GPL(iommu_group_get_by_id);
- 
   /**
    * iommu_group_get_iommudata - retrieve iommu_data registered for a group
    * @group: the group
@@@ -1072,7 -1105,7 +1109,7 @@@ EXPORT_SYMBOL_GPL(iommu_group_add_devic
   void iommu_group_remove_device(struct device *dev)
   {
         struct iommu_group *group = dev->iommu_group;
-       struct group_device *tmp_device, *device = NULL;
+       struct group_device *device;
   
         if (!group)
                 return;
@@@ -1080,27 -1113,11 +1117,11 @@@
         dev_info(dev, "Removing from iommu group %d\n", group->id);
   
         mutex_lock(&group->mutex);
-       list_for_each_entry(tmp_device, &group->devices, list) {
-               if (tmp_device->dev == dev) {
-                       device = tmp_device;
-                       list_del(&device->list);
-                       break;
-               }
-       }
+       device = __iommu_group_remove_device(group, dev);
         mutex_unlock(&group->mutex);
   
-       if (!device)
-               return;
- 
-       sysfs_remove_link(group->devices_kobj, device->name);
-       sysfs_remove_link(&dev->kobj, "iommu_group");
- 
-       trace_remove_device_from_group(group->id, dev);
- 
-       kfree(device->name);
-       kfree(device);
-       dev->iommu_group = NULL;
-       kobject_put(group->devices_kobj);
+       if (device)
+               __iommu_group_release_device(group, device);
   }
   EXPORT_SYMBOL_GPL(iommu_group_remove_device);
   
@@@ -1635,7 -1652,7 +1656,7 @@@ static int iommu_get_def_domain_type(st
         return 0;
   }
   
- -static int iommu_group_alloc_default_domain(struct bus_type *bus,
+ +static int iommu_group_alloc_default_domain(const struct bus_type *bus,
                                             struct iommu_group *group,
                                             unsigned int type)
   {
@@@ -1781,7 -1798,7 +1802,7 @@@ static int probe_get_default_domain_typ
         return 0;
   }
   
- -static void probe_alloc_default_domain(struct bus_type *bus,
+ +static void probe_alloc_default_domain(const struct bus_type *bus,
                                        struct iommu_group *group)
   {
         struct __group_domain_type gtype;
@@@ -1836,7 -1853,7 +1857,7 @@@ static int iommu_group_create_direct_ma
                                           iommu_do_create_direct_mappings);
   }
   
- -int bus_iommu_probe(struct bus_type *bus)
+ +int bus_iommu_probe(const struct bus_type *bus)
   {
         struct iommu_group *group, *next;
         LIST_HEAD(group_list);
@@@ -1880,7 -1897,7 +1901,7 @@@
         return ret;
   }
   
- -bool iommu_present(struct bus_type *bus)
+ +bool iommu_present(const struct bus_type *bus)
   {
         return bus->iommu_ops != NULL;
   }
@@@ -1955,7 -1972,7 +1976,7 @@@ void iommu_set_fault_handler(struct iom
   }
   EXPORT_SYMBOL_GPL(iommu_set_fault_handler);
   
- -static struct iommu_domain *__iommu_domain_alloc(struct bus_type *bus,
+ +static struct iommu_domain *__iommu_domain_alloc(const struct bus_type *bus,
                                                  unsigned type)
   {
         struct iommu_domain *domain;
@@@ -1968,8 -1985,13 +1989,13 @@@
                 return NULL;
   
         domain->type = type;
-       /* Assume all sizes by default; the driver may override this later */
-       domain->pgsize_bitmap = bus->iommu_ops->pgsize_bitmap;
+       /*
+        * If not already set, assume all sizes by default; the driver
+        * may override this later
+        */
+       if (!domain->pgsize_bitmap)
+               domain->pgsize_bitmap = bus->iommu_ops->pgsize_bitmap;
+ 
         if (!domain->ops)
                 domain->ops = bus->iommu_ops->default_domain_ops;
   
@@@ -1980,7 -2002,7 +2006,7 @@@
         return domain;
   }
   
- -struct iommu_domain *iommu_domain_alloc(struct bus_type *bus)
+ +struct iommu_domain *iommu_domain_alloc(const struct bus_type *bus)
   {
         return __iommu_domain_alloc(bus, IOMMU_DOMAIN_UNMANAGED);
   }
@@@ -2821,11 -2843,10 +2847,10 @@@ int iommu_dev_disable_feature(struct de
   EXPORT_SYMBOL_GPL(iommu_dev_disable_feature);
   
   /*
-  * Changes the default domain of an iommu group that has *only* one device
+  * Changes the default domain of an iommu group
    *
    * @group: The group for which the default domain should be changed
-  * @prev_dev: The device in the group (this is used to make sure that the device
-  *     hasn't changed after the caller has called this function)
+  * @dev: The first device in the group
    * @type: The type of the new default domain that gets associated with the group
    *
    * Returns 0 on success and error code on failure
@@@ -2836,124 -2857,63 +2861,63 @@@
    *    Please take a closer look if intended to use for other purposes.
    */
   static int iommu_change_dev_def_domain(struct iommu_group *group,
-                                      struct device *prev_dev, int type)
+                                      struct device *dev, int type)
   {
+       struct __group_domain_type gtype = {NULL, 0};
         struct iommu_domain *prev_dom;
-       struct group_device *grp_dev;
-       int ret, dev_def_dom;
-       struct device *dev;
- 
-       mutex_lock(&group->mutex);
- 
-       if (group->default_domain != group->domain) {
-               dev_err_ratelimited(prev_dev, "Group not assigned to default domain\n");
-               ret = -EBUSY;
-               goto out;
-       }
- 
-       /*
-        * iommu group wasn't locked while acquiring device lock in
-        * iommu_group_store_type(). So, make sure that the device count hasn't
-        * changed while acquiring device lock.
-        *
-        * Changing default domain of an iommu group with two or more devices
-        * isn't supported because there could be a potential deadlock. Consider
-        * the following scenario. T1 is trying to acquire device locks of all
-        * the devices in the group and before it could acquire all of them,
-        * there could be another thread T2 (from different sub-system and use
-        * case) that has already acquired some of the device locks and might be
-        * waiting for T1 to release other device locks.
-        */
-       if (iommu_group_device_count(group) != 1) {
-               dev_err_ratelimited(prev_dev, "Cannot change default domain: Group has more than one device\n");
-               ret = -EINVAL;
-               goto out;
-       }
+       int ret;
   
-       /* Since group has only one device */
-       grp_dev = list_first_entry(&group->devices, struct group_device, list);
-       dev = grp_dev->dev;
- 
-       if (prev_dev != dev) {
-               dev_err_ratelimited(prev_dev, "Cannot change default domain: Device has been changed\n");
-               ret = -EBUSY;
-               goto out;
-       }
+       lockdep_assert_held(&group->mutex);
   
         prev_dom = group->default_domain;
-       if (!prev_dom) {
-               ret = -EINVAL;
-               goto out;
-       }
- 
-       dev_def_dom = iommu_get_def_domain_type(dev);
+       __iommu_group_for_each_dev(group, &gtype,
+                                  probe_get_default_domain_type);
         if (!type) {
                 /*
                  * If the user hasn't requested any specific type of domain and
                  * if the device supports both the domains, then default to the
                  * domain the device was booted with
                  */
-               type = dev_def_dom ? : iommu_def_domain_type;
-       } else if (dev_def_dom && type != dev_def_dom) {
-               dev_err_ratelimited(prev_dev, "Device cannot be in %s domain\n",
+               type = gtype.type ? : iommu_def_domain_type;
+       } else if (gtype.type && type != gtype.type) {
+               dev_err_ratelimited(dev, "Device cannot be in %s domain\n",
                                     iommu_domain_type_str(type));
-               ret = -EINVAL;
-               goto out;
+               return -EINVAL;
         }
   
         /*
          * Switch to a new domain only if the requested domain type is different
          * from the existing default domain type
          */
-       if (prev_dom->type == type) {
-               ret = 0;
-               goto out;
-       }
+       if (prev_dom->type == type)
+               return 0;
   
-       /* We can bring up a flush queue without tearing down the domain */
-       if (type == IOMMU_DOMAIN_DMA_FQ && prev_dom->type == IOMMU_DOMAIN_DMA) {
-               ret = iommu_dma_init_fq(prev_dom);
-               if (!ret)
-                       prev_dom->type = IOMMU_DOMAIN_DMA_FQ;
-               goto out;
-       }
+       group->default_domain = NULL;
+       group->domain = NULL;
   
         /* Sets group->default_domain to the newly allocated domain */
         ret = iommu_group_alloc_default_domain(dev->bus, group, type);
         if (ret)
-               goto out;
+               goto restore_old_domain;
   
-       ret = iommu_create_device_direct_mappings(group, dev);
+       ret = iommu_group_create_direct_mappings(group);
         if (ret)
                 goto free_new_domain;
   
-       ret = __iommu_attach_device(group->default_domain, dev);
+       ret = __iommu_attach_group(group->default_domain, group);
         if (ret)
                 goto free_new_domain;
   
-       group->domain = group->default_domain;
- 
-       /*
-        * Release the mutex here because ops->probe_finalize() call-back of
-        * some vendor IOMMU drivers calls arm_iommu_attach_device() which
-        * in-turn might call back into IOMMU core code, where it tries to take
-        * group->mutex, resulting in a deadlock.
-        */
-       mutex_unlock(&group->mutex);
- 
-       /* Make sure dma_ops is appropriatley set */
-       iommu_group_do_probe_finalize(dev, group->default_domain);
         iommu_domain_free(prev_dom);
+ 
         return 0;
   
   free_new_domain:
         iommu_domain_free(group->default_domain);
+ restore_old_domain:
         group->default_domain = prev_dom;
         group->domain = prev_dom;
   
- out:
-       mutex_unlock(&group->mutex);
- 
         return ret;
   }
   
@@@ -2963,7 -2923,7 +2927,7 @@@
    * transition. Return failure if this isn't met.
    *
    * We need to consider the race between this and the device release path.
-  * device_lock(dev) is used here to guarantee that the device release path
+  * group->mutex is used here to guarantee that the device release path
    * will not be entered at the same time.
    */
   static ssize_t iommu_group_store_type(struct iommu_group *group,
@@@ -2990,67 -2950,42 +2954,42 @@@
         else
                 return -EINVAL;
   
-       /*
-        * Lock/Unlock the group mutex here before device lock to
-        * 1. Make sure that the iommu group has only one device (this is a
-        *    prerequisite for step 2)
-        * 2. Get struct *dev which is needed to lock device
-        */
         mutex_lock(&group->mutex);
-       if (iommu_group_device_count(group) != 1) {
+       /* We can bring up a flush queue without tearing down the domain. */
+       if (req_type == IOMMU_DOMAIN_DMA_FQ &&
+           group->default_domain->type == IOMMU_DOMAIN_DMA) {
+               ret = iommu_dma_init_fq(group->default_domain);
+               if (!ret)
+                       group->default_domain->type = IOMMU_DOMAIN_DMA_FQ;
                 mutex_unlock(&group->mutex);
-               pr_err_ratelimited("Cannot change default domain: Group has more than one device\n");
-               return -EINVAL;
+ 
+               return ret ?: count;
+       }
+ 
+       /* Otherwise, ensure that device exists and no driver is bound. */
+       if (list_empty(&group->devices) || group->owner_cnt) {
+               mutex_unlock(&group->mutex);
+               return -EPERM;
         }
   
-       /* Since group has only one device */
         grp_dev = list_first_entry(&group->devices, struct group_device, list);
         dev = grp_dev->dev;
-       get_device(dev);
+ 
+       ret = iommu_change_dev_def_domain(group, dev, req_type);
   
         /*
-        * Don't hold the group mutex because taking group mutex first and then
-        * the device lock could potentially cause a deadlock as below. Assume
-        * two threads T1 and T2. T1 is trying to change default domain of an
-        * iommu group and T2 is trying to hot unplug a device or release [1] VF
-        * of a PCIe device which is in the same iommu group. T1 takes group
-        * mutex and before it could take device lock assume T2 has taken device
-        * lock and is yet to take group mutex. Now, both the threads will be
-        * waiting for the other thread to release lock. Below, lock order was
-        * suggested.
-        * device_lock(dev);
-        *      mutex_lock(&group->mutex);
-        *              iommu_change_dev_def_domain();
-        *      mutex_unlock(&group->mutex);
-        * device_unlock(dev);
-        *
-        * [1] Typical device release path
-        * device_lock() from device/driver core code
-        *  -> bus_notifier()
-        *   -> iommu_bus_notifier()
-        *    -> iommu_release_device()
-        *     -> ops->release_device() vendor driver calls back iommu core code
-        *      -> mutex_lock() from iommu core code
+        * Release the mutex here because ops->probe_finalize() call-back of
+        * some vendor IOMMU drivers calls arm_iommu_attach_device() which
+        * in-turn might call back into IOMMU core code, where it tries to take
+        * group->mutex, resulting in a deadlock.
          */
         mutex_unlock(&group->mutex);
   
-       /* Check if the device in the group still has a driver bound to it */
-       device_lock(dev);
-       if (device_is_bound(dev) && !(req_type == IOMMU_DOMAIN_DMA_FQ &&
-           group->default_domain->type == IOMMU_DOMAIN_DMA)) {
-               pr_err_ratelimited("Device is still bound to driver\n");
-               ret = -EBUSY;
-               goto out;
-       }
- 
-       ret = iommu_change_dev_def_domain(group, dev, req_type);
-       ret = ret ?: count;
- 
- out:
-       device_unlock(dev);
-       put_device(dev);
+       /* Make sure dma_ops is appropriatley set */
+       if (!ret)
+               __iommu_group_dma_finalize(group);
   
-       return ret;
+       return ret ?: count;
   }
   
   static bool iommu_is_default_domain(struct iommu_group *group)
diff --combined drivers/media/platform/mediatek/jpeg/mtk_jpeg_core.c

index 6c7c5f3648df81197c3088570c5ada45fa1ee96c,9b96d243631199b8c50e1e56d28c1ff9a30bd4f9..0051f372a66cfea26f11f390bb71a8c90a51430d
--- 1/drivers/media/platform/mediatek/jpeg/mtk_jpeg_core.c
--- 2/drivers/media/platform/mediatek/jpeg/mtk_jpeg_core.c
+++ b/drivers/media/platform/mediatek/jpeg/mtk_jpeg_core.c
@@@ -1004,8 -1004,8 +1004,8 @@@ static void mtk_jpegenc_worker(struct w
   retry_select:
         hw_id = mtk_jpegenc_get_hw(ctx);
         if (hw_id < 0) {
- -              ret = wait_event_interruptible(jpeg->enc_hw_wq,
- -                                             atomic_read(&jpeg->enchw_rdy) > 0);
+ +              ret = wait_event_interruptible(jpeg->hw_wq,
+ +                                             atomic_read(&jpeg->hw_rdy) > 0);
                 if (ret != 0 || (i++ > MTK_JPEG_MAX_RETRY_TIME)) {
                         dev_err(jpeg->dev, "%s : %d, all HW are busy\n",
                                 __func__, __LINE__);
@@@ -1016,7 -1016,7 +1016,7 @@@
                 goto retry_select;
         }
   
- -      atomic_dec(&jpeg->enchw_rdy);
+ +      atomic_dec(&jpeg->hw_rdy);
         src_buf = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx);
         if (!src_buf)
                 goto getbuf_fail;
@@@ -1025,6 -1025,9 +1025,6 @@@
         if (!dst_buf)
                 goto getbuf_fail;
   
- -      v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
- -      v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
- -
         v4l2_m2m_buf_copy_metadata(src_buf, dst_buf, true);
   
         mtk_jpegenc_set_hw_param(ctx, hw_id, src_buf, dst_buf);
@@@ -1042,9 -1045,6 +1042,9 @@@
                 goto enc_end;
         }
   
+ +      v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
+ +      v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
+ +
         schedule_delayed_work(&comp_jpeg[hw_id]->job_timeout_work,
                               msecs_to_jiffies(MTK_JPEG_HW_TIMEOUT_MSEC));
   
@@@ -1073,7 -1073,7 +1073,7 @@@ enc_end
         v4l2_m2m_buf_done(src_buf, buf_state);
         v4l2_m2m_buf_done(dst_buf, buf_state);
   getbuf_fail:
- -      atomic_inc(&jpeg->enchw_rdy);
+ +      atomic_inc(&jpeg->hw_rdy);
         mtk_jpegenc_put_hw(jpeg, hw_id);
         v4l2_m2m_job_finish(jpeg->m2m_dev, ctx->fh.m2m_ctx);
   }
@@@ -1198,8 -1198,8 +1198,8 @@@ static void mtk_jpegdec_worker(struct w
   retry_select:
         hw_id = mtk_jpegdec_get_hw(ctx);
         if (hw_id < 0) {
- -              ret = wait_event_interruptible_timeout(jpeg->dec_hw_wq,
- -                                                     atomic_read(&jpeg->dechw_rdy) > 0,
+ +              ret = wait_event_interruptible_timeout(jpeg->hw_wq,
+ +                                                     atomic_read(&jpeg->hw_rdy) > 0,
                                                        MTK_JPEG_HW_TIMEOUT_MSEC);
                 if (ret != 0 || (i++ > MTK_JPEG_MAX_RETRY_TIME)) {
                         dev_err(jpeg->dev, "%s : %d, all HW are busy\n",
@@@ -1211,7 -1211,7 +1211,7 @@@
                 goto retry_select;
         }
   
- -      atomic_dec(&jpeg->dechw_rdy);
+ +      atomic_dec(&jpeg->hw_rdy);
         src_buf = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx);
         if (!src_buf)
                 goto getbuf_fail;
@@@ -1220,6 -1220,9 +1220,6 @@@
         if (!dst_buf)
                 goto getbuf_fail;
   
- -      v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
- -      v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
- -
         v4l2_m2m_buf_copy_metadata(src_buf, dst_buf, true);
         jpeg_src_buf = mtk_jpeg_vb2_to_srcbuf(&src_buf->vb2_buf);
         jpeg_dst_buf = mtk_jpeg_vb2_to_srcbuf(&dst_buf->vb2_buf);
@@@ -1228,7 -1231,7 +1228,7 @@@
                                              &jpeg_src_buf->dec_param)) {
                 mtk_jpeg_queue_src_chg_event(ctx);
                 ctx->state = MTK_JPEG_SOURCE_CHANGE;
- -              goto dec_end;
+ +              goto getbuf_fail;
         }
   
         jpeg_src_buf->curr_ctx = ctx;
@@@ -1251,9 -1254,6 +1251,9 @@@
                 goto clk_end;
         }
   
+ +      v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
+ +      v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
+ +
         schedule_delayed_work(&comp_jpeg[hw_id]->job_timeout_work,
                               msecs_to_jiffies(MTK_JPEG_HW_TIMEOUT_MSEC));
   
@@@ -1290,7 -1290,7 +1290,7 @@@ dec_end
         v4l2_m2m_buf_done(src_buf, buf_state);
         v4l2_m2m_buf_done(dst_buf, buf_state);
   getbuf_fail:
- -      atomic_inc(&jpeg->dechw_rdy);
+ +      atomic_inc(&jpeg->hw_rdy);
         mtk_jpegdec_put_hw(jpeg, hw_id);
         v4l2_m2m_job_finish(jpeg->m2m_dev, ctx->fh.m2m_ctx);
   }
@@@ -1575,7 -1575,12 +1575,7 @@@ static int mtk_jpeg_open(struct file *f
                 goto free;
         }
   
- -      if (jpeg->is_jpgenc_multihw)
- -              INIT_WORK(&ctx->jpeg_work, mtk_jpegenc_worker);
- -
- -      if (jpeg->is_jpgdec_multihw)
- -              INIT_WORK(&ctx->jpeg_work, mtk_jpegdec_worker);
- -
+ +      INIT_WORK(&ctx->jpeg_work, jpeg->variant->jpeg_worker);
         INIT_LIST_HEAD(&ctx->dst_done_queue);
         spin_lock_init(&ctx->done_queue_lock);
         v4l2_fh_init(&ctx->fh, vfd);
@@@ -1666,52 -1671,10 +1666,52 @@@ static void mtk_jpeg_job_timeout_work(s
         v4l2_m2m_job_finish(jpeg->m2m_dev, ctx->fh.m2m_ctx);
   }
   
+ +static int mtk_jpeg_single_core_init(struct platform_device *pdev,
+ +                                   struct mtk_jpeg_dev *jpeg_dev)
+ +{
+ +      struct mtk_jpeg_dev *jpeg = jpeg_dev;
+ +      int jpeg_irq, ret;
+ +
+ +      INIT_DELAYED_WORK(&jpeg->job_timeout_work,
+ +                        mtk_jpeg_job_timeout_work);
+ +
+ +      jpeg->reg_base = devm_platform_ioremap_resource(pdev, 0);
+ +      if (IS_ERR(jpeg->reg_base)) {
+ +              ret = PTR_ERR(jpeg->reg_base);
+ +              return ret;
+ +      }
+ +
+ +      jpeg_irq = platform_get_irq(pdev, 0);
+ +      if (jpeg_irq < 0)
+ +              return jpeg_irq;
+ +
+ +      ret = devm_request_irq(&pdev->dev,
+ +                             jpeg_irq,
+ +                             jpeg->variant->irq_handler,
+ +                             0,
+ +                             pdev->name, jpeg);
+ +      if (ret) {
+ +              dev_err(&pdev->dev, "Failed to request jpeg_irq %d (%d)\n",
+ +                      jpeg_irq, ret);
+ +              return ret;
+ +      }
+ +
+ +      ret = devm_clk_bulk_get(jpeg->dev,
+ +                              jpeg->variant->num_clks,
+ +                              jpeg->variant->clks);
+ +      if (ret) {
+ +              dev_err(&pdev->dev, "Failed to init clk\n");
+ +              return ret;
+ +      }
+ +
+ +      return 0;
+ +}
+ +
   static int mtk_jpeg_probe(struct platform_device *pdev)
   {
         struct mtk_jpeg_dev *jpeg;
- -      int jpeg_irq;
+ +      struct device_node *child;
+ +      int num_child = 0;
         int ret;
   
         jpeg = devm_kzalloc(&pdev->dev, sizeof(*jpeg), GFP_KERNEL);
@@@ -1729,26 -1692,38 +1729,26 @@@
                 return -EINVAL;
         }
   
- -      if (list_empty(&pdev->dev.devres_head)) {
- -              INIT_DELAYED_WORK(&jpeg->job_timeout_work,
- -                                mtk_jpeg_job_timeout_work);
- -
- -              jpeg->reg_base = devm_platform_ioremap_resource(pdev, 0);
- -              if (IS_ERR(jpeg->reg_base)) {
- -                      ret = PTR_ERR(jpeg->reg_base);
- -                      return ret;
+ +      if (!jpeg->variant->multi_core) {
+ +              ret = mtk_jpeg_single_core_init(pdev, jpeg);
+ +              if (ret) {
+ +                      v4l2_err(&jpeg->v4l2_dev, "mtk_jpeg_single_core_init failed.");
+ +                      return -EINVAL;
                 }
+ +      } else {
+ +              init_waitqueue_head(&jpeg->hw_wq);
   
- -              jpeg_irq = platform_get_irq(pdev, 0);
- -              if (jpeg_irq < 0)
- -                      return jpeg_irq;
+ +              for_each_child_of_node(pdev->dev.of_node, child)
+ +                      num_child++;
   
- -              ret = devm_request_irq(&pdev->dev,
- -                                     jpeg_irq,
- -                                     jpeg->variant->irq_handler,
- -                                     0,
- -                                     pdev->name, jpeg);
- -              if (ret) {
- -                      dev_err(&pdev->dev, "Failed to request jpeg_irq %d (%d)\n",
- -                              jpeg_irq, ret);
- -                      return ret;
- -              }
+ +              atomic_set(&jpeg->hw_rdy, num_child);
+ +              atomic_set(&jpeg->hw_index, 0);
   
- -              ret = devm_clk_bulk_get(jpeg->dev,
- -                                      jpeg->variant->num_clks,
- -                                      jpeg->variant->clks);
- -              if (ret) {
- -                      dev_err(&pdev->dev, "Failed to init clk\n");
- -                      return ret;
- -              }
+ +              jpeg->workqueue = alloc_ordered_workqueue(MTK_JPEG_NAME,
+ +                                                        WQ_MEM_RECLAIM
+ +                                                        | WQ_FREEZABLE);
+ +              if (!jpeg->workqueue)
+ +                      return -EINVAL;
         }
   
         ret = v4l2_device_register(&pdev->dev, &jpeg->v4l2_dev);
@@@ -1782,9 -1757,6 +1782,6 @@@
         jpeg->vdev->device_caps = V4L2_CAP_STREAMING |
                                   V4L2_CAP_VIDEO_M2M_MPLANE;
   
-       if (of_property_present(pdev->dev.of_node, "dma-ranges"))
-               dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(34));
- 
         ret = video_register_device(jpeg->vdev, VFL_TYPE_VIDEO, -1);
         if (ret) {
                 v4l2_err(&jpeg->v4l2_dev, "Failed to register video device\n");
@@@ -1815,7 -1787,7 +1812,7 @@@ err_m2m_init
         return ret;
   }
   
- -static int mtk_jpeg_remove(struct platform_device *pdev)
+ +static void mtk_jpeg_remove(struct platform_device *pdev)
   {
         struct mtk_jpeg_dev *jpeg = platform_get_drvdata(pdev);
   
@@@ -1823,6 -1795,8 +1820,6 @@@
         video_unregister_device(jpeg->vdev);
         v4l2_m2m_release(jpeg->m2m_dev);
         v4l2_device_unregister(&jpeg->v4l2_dev);
- -
- -      return 0;
   }
   
   static __maybe_unused int mtk_jpeg_pm_suspend(struct device *dev)
@@@ -1869,7 -1843,6 +1866,7 @@@ static const struct dev_pm_ops mtk_jpeg
         SET_RUNTIME_PM_OPS(mtk_jpeg_pm_suspend, mtk_jpeg_pm_resume, NULL)
   };
   
+ +#if defined(CONFIG_OF)
   static const struct mtk_jpeg_variant mt8173_jpeg_drvdata = {
         .clks = mt8173_jpeg_dec_clocks,
         .num_clks = ARRAY_SIZE(mt8173_jpeg_dec_clocks),
@@@ -1898,7 -1871,6 +1895,7 @@@ static const struct mtk_jpeg_variant mt
         .ioctl_ops = &mtk_jpeg_enc_ioctl_ops,
         .out_q_default_fourcc = V4L2_PIX_FMT_YUYV,
         .cap_q_default_fourcc = V4L2_PIX_FMT_JPEG,
+ +      .multi_core = false,
   };
   
   static struct mtk_jpeg_variant mtk8195_jpegenc_drvdata = {
@@@ -1910,8 -1882,6 +1907,8 @@@
         .ioctl_ops = &mtk_jpeg_enc_ioctl_ops,
         .out_q_default_fourcc = V4L2_PIX_FMT_YUYV,
         .cap_q_default_fourcc = V4L2_PIX_FMT_JPEG,
+ +      .multi_core = true,
+ +      .jpeg_worker = mtk_jpegenc_worker,
   };
   
   static const struct mtk_jpeg_variant mtk8195_jpegdec_drvdata = {
@@@ -1923,10 -1893,9 +1920,10 @@@
         .ioctl_ops = &mtk_jpeg_dec_ioctl_ops,
         .out_q_default_fourcc = V4L2_PIX_FMT_JPEG,
         .cap_q_default_fourcc = V4L2_PIX_FMT_YUV420M,
+ +      .multi_core = true,
+ +      .jpeg_worker = mtk_jpegdec_worker,
   };
   
- -#if defined(CONFIG_OF)
   static const struct of_device_id mtk_jpeg_match[] = {
         {
                 .compatible = "mediatek,mt8173-jpgdec",
@@@ -1956,7 -1925,7 +1953,7 @@@ MODULE_DEVICE_TABLE(of, mtk_jpeg_match)
   
   static struct platform_driver mtk_jpeg_driver = {
         .probe = mtk_jpeg_probe,
- -      .remove = mtk_jpeg_remove,
+ +      .remove_new = mtk_jpeg_remove,
         .driver = {
                 .name           = MTK_JPEG_NAME,
                 .of_match_table = of_match_ptr(mtk_jpeg_match),
diff --combined drivers/media/platform/mediatek/vcodec/mtk_vcodec_dec_drv.c

index 9ba5dc5df648d12abf43fc585caf75d24e5d9599,11583405cf618e0cbc1594e1c8a3f07f86af7e50..9c652beb3f193bc2226ae0678a92eff1df68e2bc
--- 1/drivers/media/platform/mediatek/vcodec/mtk_vcodec_dec_drv.c
--- 2/drivers/media/platform/mediatek/vcodec/mtk_vcodec_dec_drv.c
+++ b/drivers/media/platform/mediatek/vcodec/mtk_vcodec_dec_drv.c
@@@ -321,14 -321,6 +321,6 @@@ static int mtk_vcodec_probe(struct plat
                 }
         }
   
-       if (of_property_present(pdev->dev.of_node, "dma-ranges")) {
-               ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(34));
-               if (ret) {
-                       mtk_v4l2_err("Failed to set mask");
-                       goto err_core_workq;
-               }
-       }
- 
         for (i = 0; i < MTK_VDEC_HW_MAX; i++)
                 mutex_init(&dev->dec_mutex[i]);
         mutex_init(&dev->dev_mutex);
@@@ -451,8 -443,7 +443,8 @@@ err_core_workq
         if (IS_VDEC_LAT_ARCH(dev->vdec_pdata->hw_arch))
                 destroy_workqueue(dev->core_workqueue);
   err_res:
- -      pm_runtime_disable(dev->pm.dev);
+ +      if (!dev->vdec_pdata->is_subdev_supported)
+ +              pm_runtime_disable(dev->pm.dev);
   err_dec_pm:
         mtk_vcodec_fw_release(dev->fw_handler);
         return ret;
@@@ -488,7 -479,7 +480,7 @@@ static const struct of_device_id mtk_vc
   
   MODULE_DEVICE_TABLE(of, mtk_vcodec_match);
   
- -static int mtk_vcodec_dec_remove(struct platform_device *pdev)
+ +static void mtk_vcodec_dec_remove(struct platform_device *pdev)
   {
         struct mtk_vcodec_dev *dev = platform_get_drvdata(pdev);
   
@@@ -510,11 -501,12 +502,11 @@@
         if (!dev->vdec_pdata->is_subdev_supported)
                 pm_runtime_disable(dev->pm.dev);
         mtk_vcodec_fw_release(dev->fw_handler);
- -      return 0;
   }
   
   static struct platform_driver mtk_vcodec_dec_driver = {
         .probe  = mtk_vcodec_probe,
- -      .remove = mtk_vcodec_dec_remove,
+ +      .remove_new = mtk_vcodec_dec_remove,
         .driver = {
                 .name   = MTK_VCODEC_DEC_NAME,
                 .of_match_table = mtk_vcodec_match,
diff --combined drivers/media/platform/mediatek/vcodec/mtk_vcodec_enc_drv.c

index 755f567b9e54fa50b8a45ca93d1892a1bfa4435c,50e5571608cdfd02874849191550cb8a863d05b9..168004a08888ff12c27cbd8b0c6be9fd526bb085
--- 1/drivers/media/platform/mediatek/vcodec/mtk_vcodec_enc_drv.c
--- 2/drivers/media/platform/mediatek/vcodec/mtk_vcodec_enc_drv.c
+++ b/drivers/media/platform/mediatek/vcodec/mtk_vcodec_enc_drv.c
@@@ -89,24 -89,16 +89,24 @@@ static irqreturn_t mtk_vcodec_enc_irq_h
         struct mtk_vcodec_ctx *ctx;
         unsigned long flags;
         void __iomem *addr;
+ +      int core_id;
   
         spin_lock_irqsave(&dev->irqlock, flags);
         ctx = dev->curr_ctx;
         spin_unlock_irqrestore(&dev->irqlock, flags);
   
- -      mtk_v4l2_debug(1, "id=%d coreid:%d", ctx->id, dev->venc_pdata->core_id);
- -      addr = dev->reg_base[dev->venc_pdata->core_id] +
- -                              MTK_VENC_IRQ_ACK_OFFSET;
+ +      core_id = dev->venc_pdata->core_id;
+ +      if (core_id < 0 || core_id >= NUM_MAX_VCODEC_REG_BASE) {
+ +              mtk_v4l2_err("Invalid core id: %d, ctx id: %d",
+ +                           core_id, ctx->id);
+ +              return IRQ_HANDLED;
+ +      }
+ +
+ +      mtk_v4l2_debug(1, "id: %d, core id: %d", ctx->id, core_id);
   
- -      ctx->irq_status = readl(dev->reg_base[dev->venc_pdata->core_id] +
+ +      addr = dev->reg_base[core_id] + MTK_VENC_IRQ_ACK_OFFSET;
+ +
+ +      ctx->irq_status = readl(dev->reg_base[core_id] +
                                 (MTK_VENC_IRQ_STATUS_OFFSET));
   
         clean_irq_status(ctx->irq_status, addr);
@@@ -352,9 -344,6 +352,6 @@@ static int mtk_vcodec_probe(struct plat
                 goto err_event_workq;
         }
   
-       if (of_property_present(pdev->dev.of_node, "dma-ranges"))
-               dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(34));
- 
         ret = video_register_device(vfd_enc, VFL_TYPE_VIDEO, -1);
         if (ret) {
                 mtk_v4l2_err("Failed to register video device");
@@@ -459,7 -448,7 +456,7 @@@ static const struct of_device_id mtk_vc
   };
   MODULE_DEVICE_TABLE(of, mtk_vcodec_enc_match);
   
- -static int mtk_vcodec_enc_remove(struct platform_device *pdev)
+ +static void mtk_vcodec_enc_remove(struct platform_device *pdev)
   {
         struct mtk_vcodec_dev *dev = platform_get_drvdata(pdev);
   
@@@ -474,11 -463,12 +471,11 @@@
         v4l2_device_unregister(&dev->v4l2_dev);
         pm_runtime_disable(dev->pm.dev);
         mtk_vcodec_fw_release(dev->fw_handler);
- -      return 0;
   }
   
   static struct platform_driver mtk_vcodec_enc_driver = {
         .probe  = mtk_vcodec_probe,
- -      .remove = mtk_vcodec_enc_remove,
+ +      .remove_new = mtk_vcodec_enc_remove,
         .driver = {
                 .name   = MTK_VCODEC_ENC_NAME,
                 .of_match_table = mtk_vcodec_enc_match,
diff --combined include/linux/iommu.h

index 0fd4e6734d5b255b629531a45395f2db0f887ea8,7dbdd13d7ce046eee0af5f2d9bcad26fbbb00411..e8c9a7da1060969a0c9ab1f9311e223bf9df3c96
--- 1/include/linux/iommu.h
--- 2/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@@ -13,7 -13,6 +13,6 @@@
   #include <linux/errno.h>
   #include <linux/err.h>
   #include <linux/of.h>
- #include <linux/ioasid.h>
   #include <uapi/linux/iommu.h>
   
   #define IOMMU_READ    (1 << 0)
@@@ -192,6 -191,7 +191,7 @@@ enum iommu_dev_features 
   };
   
   #define IOMMU_PASID_INVALID   (-1U)
+ typedef unsigned int ioasid_t;
   
   #ifdef CONFIG_IOMMU_API
   
@@@ -455,12 -455,11 +455,11 @@@ static inline const struct iommu_ops *d
         return dev->iommu->iommu_dev->ops;
   }
   
- -extern int bus_iommu_probe(struct bus_type *bus);
- -extern bool iommu_present(struct bus_type *bus);
+ +extern int bus_iommu_probe(const struct bus_type *bus);
+ +extern bool iommu_present(const struct bus_type *bus);
   extern bool device_iommu_capable(struct device *dev, enum iommu_cap cap);
   extern bool iommu_group_has_isolated_msi(struct iommu_group *group);
- -extern struct iommu_domain *iommu_domain_alloc(struct bus_type *bus);
+ +extern struct iommu_domain *iommu_domain_alloc(const struct bus_type *bus);
- extern struct iommu_group *iommu_group_get_by_id(int id);
   extern void iommu_domain_free(struct iommu_domain *domain);
   extern int iommu_attach_device(struct iommu_domain *domain,
                                struct device *dev);
@@@ -699,7 -698,6 +698,6 @@@ static inline void dev_iommu_priv_set(s
   }
   
   int iommu_probe_device(struct device *dev);
- void iommu_release_device(struct device *dev);
   
   int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features f);
   int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features f);
@@@ -732,7 -730,7 +730,7 @@@ struct iommu_device {}
   struct iommu_fault_param {};
   struct iommu_iotlb_gather {};
   
- -static inline bool iommu_present(struct bus_type *bus)
+ +static inline bool iommu_present(const struct bus_type *bus)
   {
         return false;
   }
@@@ -742,16 -740,11 +740,11 @@@ static inline bool device_iommu_capable
         return false;
   }
   
- -static inline struct iommu_domain *iommu_domain_alloc(struct bus_type *bus)
+ +static inline struct iommu_domain *iommu_domain_alloc(const struct bus_type *bus)
   {
         return NULL;
   }
   
- static inline struct iommu_group *iommu_group_get_by_id(int id)
- {
-       return NULL;
- }
- 
   static inline void iommu_domain_free(struct iommu_domain *domain)
   {
   }
@@@ -1172,7 -1165,17 +1165,16 @@@ static inline bool tegra_dev_iommu_get_
         return false;
   }
   
- -static inline bool pasid_valid(ioasid_t ioasid)
- -{
- -      return ioasid != IOMMU_PASID_INVALID;
- -}
- -
   #ifdef CONFIG_IOMMU_SVA
+ static inline void mm_pasid_init(struct mm_struct *mm)
+ {
+       mm->pasid = IOMMU_PASID_INVALID;
+ }
++static inline bool mm_valid_pasid(struct mm_struct *mm)
++{
++      return mm->pasid != IOMMU_PASID_INVALID;
++}
+ void mm_pasid_drop(struct mm_struct *mm);
   struct iommu_sva *iommu_sva_bind_device(struct device *dev,
                                         struct mm_struct *mm);
   void iommu_sva_unbind_device(struct iommu_sva *handle);
@@@ -1192,6 -1195,8 +1194,9 @@@ static inline u32 iommu_sva_get_pasid(s
   {
         return IOMMU_PASID_INVALID;
   }
+ static inline void mm_pasid_init(struct mm_struct *mm) {}
++static inline bool mm_valid_pasid(struct mm_struct *mm) { return false; }
+ static inline void mm_pasid_drop(struct mm_struct *mm) {}
   #endif /* CONFIG_IOMMU_SVA */
   
   #endif /* __LINUX_IOMMU_H */
diff --combined include/linux/sched/mm.h

index b114fbe3a93b51d39174e453ad14906b21373537,da9712a3ba739effccbfa6aa5d9438408737ff1b..8d89c8c4fac1f2db1fc278478486aa71516b52bd
--- 1/include/linux/sched/mm.h
--- 2/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@@ -8,7 -8,6 +8,6 @@@
   #include <linux/mm_types.h>
   #include <linux/gfp.h>
   #include <linux/sync_core.h>
- #include <linux/ioasid.h>
   
   /*
    * Routines for handling mm_structs
@@@ -37,11 -36,6 +36,11 @@@ static inline void mmgrab(struct mm_str
         atomic_inc(&mm->mm_count);
   }
   
+ +static inline void smp_mb__after_mmgrab(void)
+ +{
+ +      smp_mb__after_atomic();
+ +}
+ +
   extern void __mmdrop(struct mm_struct *mm);
   
   static inline void mmdrop(struct mm_struct *mm)
@@@ -84,34 -78,6 +83,34 @@@ static inline void mmdrop_sched(struct 
   }
   #endif
   
+ +/* Helpers for lazy TLB mm refcounting */
+ +static inline void mmgrab_lazy_tlb(struct mm_struct *mm)
+ +{
+ +      if (IS_ENABLED(CONFIG_MMU_LAZY_TLB_REFCOUNT))
+ +              mmgrab(mm);
+ +}
+ +
+ +static inline void mmdrop_lazy_tlb(struct mm_struct *mm)
+ +{
+ +      if (IS_ENABLED(CONFIG_MMU_LAZY_TLB_REFCOUNT)) {
+ +              mmdrop(mm);
+ +      } else {
+ +              /*
+ +               * mmdrop_lazy_tlb must provide a full memory barrier, see the
+ +               * membarrier comment finish_task_switch which relies on this.
+ +               */
+ +              smp_mb();
+ +      }
+ +}
+ +
+ +static inline void mmdrop_lazy_tlb_sched(struct mm_struct *mm)
+ +{
+ +      if (IS_ENABLED(CONFIG_MMU_LAZY_TLB_REFCOUNT))
+ +              mmdrop_sched(mm);
+ +      else
+ +              smp_mb(); /* see mmdrop_lazy_tlb() above */
+ +}
+ +
   /**
    * mmget() - Pin the address space associated with a &struct mm_struct.
    * @mm: The address space to pin.
@@@ -484,35 -450,4 +483,4 @@@ static inline void membarrier_update_cu
   }
   #endif
   
- #ifdef CONFIG_IOMMU_SVA
- static inline void mm_pasid_init(struct mm_struct *mm)
- {
-       mm->pasid = INVALID_IOASID;
- }
- 
- static inline bool mm_valid_pasid(struct mm_struct *mm)
- {
-       return mm->pasid != INVALID_IOASID;
- }
- 
- /* Associate a PASID with an mm_struct: */
- static inline void mm_pasid_set(struct mm_struct *mm, u32 pasid)
- {
-       mm->pasid = pasid;
- }
- 
- static inline void mm_pasid_drop(struct mm_struct *mm)
- {
-       if (mm_valid_pasid(mm)) {
-               ioasid_free(mm->pasid);
-               mm->pasid = INVALID_IOASID;
-       }
- }
- #else
- static inline void mm_pasid_init(struct mm_struct *mm) {}
- static inline bool mm_valid_pasid(struct mm_struct *mm) { return false; }
- static inline void mm_pasid_set(struct mm_struct *mm, u32 pasid) {}
- static inline void mm_pasid_drop(struct mm_struct *mm) {}
- #endif
- 
   #endif /* _LINUX_SCHED_MM_H */
diff --combined kernel/fork.c

index 735d9f4f5acff93d06a21f138fd7bed594344753,e7d10ad98a6912d918abd2530f1b00dfdc5c2f39..ed4e01daccaa008ed0ef98c1a011aa5a4dddab36
--- 1/kernel/fork.c
--- 2/kernel/fork.c
+++ b/kernel/fork.c
@@@ -97,7 -97,7 +97,8 @@@
   #include <linux/io_uring.h>
   #include <linux/bpf.h>
   #include <linux/stackprotector.h>
+ +#include <linux/user_events.h>
+ #include <linux/iommu.h>
   
   #include <asm/pgalloc.h>
   #include <linux/uaccess.h>
@@@ -452,49 -452,13 +453,49 @@@ static struct kmem_cache *vm_area_cache
   /* SLAB cache for mm_struct structures (tsk->mm) */
   static struct kmem_cache *mm_cachep;
   
+ +#ifdef CONFIG_PER_VMA_LOCK
+ +
+ +/* SLAB cache for vm_area_struct.lock */
+ +static struct kmem_cache *vma_lock_cachep;
+ +
+ +static bool vma_lock_alloc(struct vm_area_struct *vma)
+ +{
+ +      vma->vm_lock = kmem_cache_alloc(vma_lock_cachep, GFP_KERNEL);
+ +      if (!vma->vm_lock)
+ +              return false;
+ +
+ +      init_rwsem(&vma->vm_lock->lock);
+ +      vma->vm_lock_seq = -1;
+ +
+ +      return true;
+ +}
+ +
+ +static inline void vma_lock_free(struct vm_area_struct *vma)
+ +{
+ +      kmem_cache_free(vma_lock_cachep, vma->vm_lock);
+ +}
+ +
+ +#else /* CONFIG_PER_VMA_LOCK */
+ +
+ +static inline bool vma_lock_alloc(struct vm_area_struct *vma) { return true; }
+ +static inline void vma_lock_free(struct vm_area_struct *vma) {}
+ +
+ +#endif /* CONFIG_PER_VMA_LOCK */
+ +
   struct vm_area_struct *vm_area_alloc(struct mm_struct *mm)
   {
         struct vm_area_struct *vma;
   
         vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
- -      if (vma)
- -              vma_init(vma, mm);
+ +      if (!vma)
+ +              return NULL;
+ +
+ +      vma_init(vma, mm);
+ +      if (!vma_lock_alloc(vma)) {
+ +              kmem_cache_free(vm_area_cachep, vma);
+ +              return NULL;
+ +      }
+ +
         return vma;
   }
   
@@@ -502,56 -466,26 +503,56 @@@ struct vm_area_struct *vm_area_dup(stru
   {
         struct vm_area_struct *new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
   
- -      if (new) {
- -              ASSERT_EXCLUSIVE_WRITER(orig->vm_flags);
- -              ASSERT_EXCLUSIVE_WRITER(orig->vm_file);
- -              /*
- -               * orig->shared.rb may be modified concurrently, but the clone
- -               * will be reinitialized.
- -               */
- -              data_race(memcpy(new, orig, sizeof(*new)));
- -              INIT_LIST_HEAD(&new->anon_vma_chain);
- -              dup_anon_vma_name(orig, new);
+ +      if (!new)
+ +              return NULL;
+ +
+ +      ASSERT_EXCLUSIVE_WRITER(orig->vm_flags);
+ +      ASSERT_EXCLUSIVE_WRITER(orig->vm_file);
+ +      /*
+ +       * orig->shared.rb may be modified concurrently, but the clone
+ +       * will be reinitialized.
+ +       */
+ +      data_race(memcpy(new, orig, sizeof(*new)));
+ +      if (!vma_lock_alloc(new)) {
+ +              kmem_cache_free(vm_area_cachep, new);
+ +              return NULL;
         }
+ +      INIT_LIST_HEAD(&new->anon_vma_chain);
+ +      vma_numab_state_init(new);
+ +      dup_anon_vma_name(orig, new);
+ +
         return new;
   }
   
- -void vm_area_free(struct vm_area_struct *vma)
+ +void __vm_area_free(struct vm_area_struct *vma)
   {
+ +      vma_numab_state_free(vma);
         free_anon_vma_name(vma);
+ +      vma_lock_free(vma);
         kmem_cache_free(vm_area_cachep, vma);
   }
   
+ +#ifdef CONFIG_PER_VMA_LOCK
+ +static void vm_area_free_rcu_cb(struct rcu_head *head)
+ +{
+ +      struct vm_area_struct *vma = container_of(head, struct vm_area_struct,
+ +                                                vm_rcu);
+ +
+ +      /* The vma should not be locked while being destroyed. */
+ +      VM_BUG_ON_VMA(rwsem_is_locked(&vma->vm_lock->lock), vma);
+ +      __vm_area_free(vma);
+ +}
+ +#endif
+ +
+ +void vm_area_free(struct vm_area_struct *vma)
+ +{
+ +#ifdef CONFIG_PER_VMA_LOCK
+ +      call_rcu(&vma->vm_rcu, vm_area_free_rcu_cb);
+ +#else
+ +      __vm_area_free(vma);
+ +#endif
+ +}
+ +
   static void account_kernel_stack(struct task_struct *tsk, int account)
   {
         if (IS_ENABLED(CONFIG_VMAP_STACK)) {
@@@ -842,67 -776,6 +843,67 @@@ static void check_mm(struct mm_struct *
   #define allocate_mm() (kmem_cache_alloc(mm_cachep, GFP_KERNEL))
   #define free_mm(mm)   (kmem_cache_free(mm_cachep, (mm)))
   
+ +static void do_check_lazy_tlb(void *arg)
+ +{
+ +      struct mm_struct *mm = arg;
+ +
+ +      WARN_ON_ONCE(current->active_mm == mm);
+ +}
+ +
+ +static void do_shoot_lazy_tlb(void *arg)
+ +{
+ +      struct mm_struct *mm = arg;
+ +
+ +      if (current->active_mm == mm) {
+ +              WARN_ON_ONCE(current->mm);
+ +              current->active_mm = &init_mm;
+ +              switch_mm(mm, &init_mm, current);
+ +      }
+ +}
+ +
+ +static void cleanup_lazy_tlbs(struct mm_struct *mm)
+ +{
+ +      if (!IS_ENABLED(CONFIG_MMU_LAZY_TLB_SHOOTDOWN)) {
+ +              /*
+ +               * In this case, lazy tlb mms are refounted and would not reach
+ +               * __mmdrop until all CPUs have switched away and mmdrop()ed.
+ +               */
+ +              return;
+ +      }
+ +
+ +      /*
+ +       * Lazy mm shootdown does not refcount "lazy tlb mm" usage, rather it
+ +       * requires lazy mm users to switch to another mm when the refcount
+ +       * drops to zero, before the mm is freed. This requires IPIs here to
+ +       * switch kernel threads to init_mm.
+ +       *
+ +       * archs that use IPIs to flush TLBs can piggy-back that lazy tlb mm
+ +       * switch with the final userspace teardown TLB flush which leaves the
+ +       * mm lazy on this CPU but no others, reducing the need for additional
+ +       * IPIs here. There are cases where a final IPI is still required here,
+ +       * such as the final mmdrop being performed on a different CPU than the
+ +       * one exiting, or kernel threads using the mm when userspace exits.
+ +       *
+ +       * IPI overheads have not found to be expensive, but they could be
+ +       * reduced in a number of possible ways, for example (roughly
+ +       * increasing order of complexity):
+ +       * - The last lazy reference created by exit_mm() could instead switch
+ +       *   to init_mm, however it's probable this will run on the same CPU
+ +       *   immediately afterwards, so this may not reduce IPIs much.
+ +       * - A batch of mms requiring IPIs could be gathered and freed at once.
+ +       * - CPUs store active_mm where it can be remotely checked without a
+ +       *   lock, to filter out false-positives in the cpumask.
+ +       * - After mm_users or mm_count reaches zero, switching away from the
+ +       *   mm could clear mm_cpumask to reduce some IPIs, perhaps together
+ +       *   with some batching or delaying of the final IPIs.
+ +       * - A delayed freeing and RCU-like quiescing sequence based on mm
+ +       *   switching to avoid IPIs completely.
+ +       */
+ +      on_each_cpu_mask(mm_cpumask(mm), do_shoot_lazy_tlb, (void *)mm, 1);
+ +      if (IS_ENABLED(CONFIG_DEBUG_VM_SHOOT_LAZIES))
+ +              on_each_cpu(do_check_lazy_tlb, (void *)mm, 1);
+ +}
+ +
   /*
    * Called when the last reference to the mm
    * is dropped: either by a lazy thread or by
@@@ -914,10 -787,6 +915,10 @@@ void __mmdrop(struct mm_struct *mm
   
         BUG_ON(mm == &init_mm);
         WARN_ON_ONCE(mm == current->mm);
+ +
+ +      /* Ensure no CPUs are using this as their lazy tlb mm */
+ +      cleanup_lazy_tlbs(mm);
+ +
         WARN_ON_ONCE(mm == current->active_mm);
         mm_free_pgd(mm);
         destroy_context(mm);
@@@ -925,7 -794,6 +926,7 @@@
         check_mm(mm);
         put_user_ns(mm->user_ns);
         mm_pasid_drop(mm);
+ +      mm_destroy_cid(mm);
   
         for (i = 0; i < NR_MM_COUNTERS; i++)
                 percpu_counter_destroy(&mm->rss_stat[i]);
@@@ -1190,9 -1058,7 +1191,9 @@@ static struct task_struct *dup_task_str
   
   #ifdef CONFIG_SCHED_MM_CID
         tsk->mm_cid = -1;
+ +      tsk->last_mm_cid = -1;
         tsk->mm_cid_active = 0;
+ +      tsk->migrate_from_cpu = -1;
   #endif
         return tsk;
   
@@@ -1263,9 -1129,6 +1264,9 @@@ static struct mm_struct *mm_init(struc
         seqcount_init(&mm->write_protect_seq);
         mmap_init_lock(mm);
         INIT_LIST_HEAD(&mm->mmlist);
+ +#ifdef CONFIG_PER_VMA_LOCK
+ +      mm->mm_lock_seq = 0;
+ +#endif
         mm_pgtables_bytes_init(mm);
         mm->map_count = 0;
         mm->locked_vm = 0;
@@@ -1300,23 -1163,18 +1301,23 @@@
         if (init_new_context(p, mm))
                 goto fail_nocontext;
   
+ +      if (mm_alloc_cid(mm))
+ +              goto fail_cid;
+ +
         for (i = 0; i < NR_MM_COUNTERS; i++)
                 if (percpu_counter_init(&mm->rss_stat[i], 0, GFP_KERNEL_ACCOUNT))
                         goto fail_pcpu;
   
         mm->user_ns = get_user_ns(user_ns);
         lru_gen_init_mm(mm);
- -      mm_init_cid(mm);
         return mm;
   
   fail_pcpu:
         while (i > 0)
                 percpu_counter_destroy(&mm->rss_stat[--i]);
+ +      mm_destroy_cid(mm);
+ +fail_cid:
+ +      destroy_context(mm);
   fail_nocontext:
         mm_free_pgd(mm);
   fail_nopgd:
@@@ -1768,8 -1626,7 +1769,8 @@@ static int copy_fs(unsigned long clone_
         return 0;
   }
   
- -static int copy_files(unsigned long clone_flags, struct task_struct *tsk)
+ +static int copy_files(unsigned long clone_flags, struct task_struct *tsk,
+ +                    int no_files)
   {
         struct files_struct *oldf, *newf;
         int error = 0;
@@@ -1781,11 -1638,6 +1782,11 @@@
         if (!oldf)
                 goto out;
   
+ +      if (no_files) {
+ +              tsk->files = NULL;
+ +              goto out;
+ +      }
+ +
         if (clone_flags & CLONE_FILES) {
                 atomic_inc(&oldf->count);
                 goto out;
@@@ -2103,91 -1955,6 +2104,91 @@@ const struct file_operations pidfd_fop
   #endif
   };
   
+ +/**
+ + * __pidfd_prepare - allocate a new pidfd_file and reserve a pidfd
+ + * @pid:   the struct pid for which to create a pidfd
+ + * @flags: flags of the new @pidfd
+ + * @pidfd: the pidfd to return
+ + *
+ + * Allocate a new file that stashes @pid and reserve a new pidfd number in the
+ + * caller's file descriptor table. The pidfd is reserved but not installed yet.
+ +
+ + * The helper doesn't perform checks on @pid which makes it useful for pidfds
+ + * created via CLONE_PIDFD where @pid has no task attached when the pidfd and
+ + * pidfd file are prepared.
+ + *
+ + * If this function returns successfully the caller is responsible to either
+ + * call fd_install() passing the returned pidfd and pidfd file as arguments in
+ + * order to install the pidfd into its file descriptor table or they must use
+ + * put_unused_fd() and fput() on the returned pidfd and pidfd file
+ + * respectively.
+ + *
+ + * This function is useful when a pidfd must already be reserved but there
+ + * might still be points of failure afterwards and the caller wants to ensure
+ + * that no pidfd is leaked into its file descriptor table.
+ + *
+ + * Return: On success, a reserved pidfd is returned from the function and a new
+ + *         pidfd file is returned in the last argument to the function. On
+ + *         error, a negative error code is returned from the function and the
+ + *         last argument remains unchanged.
+ + */
+ +static int __pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret)
+ +{
+ +      int pidfd;
+ +      struct file *pidfd_file;
+ +
+ +      if (flags & ~(O_NONBLOCK | O_RDWR | O_CLOEXEC))
+ +              return -EINVAL;
+ +
+ +      pidfd = get_unused_fd_flags(O_RDWR | O_CLOEXEC);
+ +      if (pidfd < 0)
+ +              return pidfd;
+ +
+ +      pidfd_file = anon_inode_getfile("[pidfd]", &pidfd_fops, pid,
+ +                                      flags | O_RDWR | O_CLOEXEC);
+ +      if (IS_ERR(pidfd_file)) {
+ +              put_unused_fd(pidfd);
+ +              return PTR_ERR(pidfd_file);
+ +      }
+ +      get_pid(pid); /* held by pidfd_file now */
+ +      *ret = pidfd_file;
+ +      return pidfd;
+ +}
+ +
+ +/**
+ + * pidfd_prepare - allocate a new pidfd_file and reserve a pidfd
+ + * @pid:   the struct pid for which to create a pidfd
+ + * @flags: flags of the new @pidfd
+ + * @pidfd: the pidfd to return
+ + *
+ + * Allocate a new file that stashes @pid and reserve a new pidfd number in the
+ + * caller's file descriptor table. The pidfd is reserved but not installed yet.
+ + *
+ + * The helper verifies that @pid is used as a thread group leader.
+ + *
+ + * If this function returns successfully the caller is responsible to either
+ + * call fd_install() passing the returned pidfd and pidfd file as arguments in
+ + * order to install the pidfd into its file descriptor table or they must use
+ + * put_unused_fd() and fput() on the returned pidfd and pidfd file
+ + * respectively.
+ + *
+ + * This function is useful when a pidfd must already be reserved but there
+ + * might still be points of failure afterwards and the caller wants to ensure
+ + * that no pidfd is leaked into its file descriptor table.
+ + *
+ + * Return: On success, a reserved pidfd is returned from the function and a new
+ + *         pidfd file is returned in the last argument to the function. On
+ + *         error, a negative error code is returned from the function and the
+ + *         last argument remains unchanged.
+ + */
+ +int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret)
+ +{
+ +      if (!pid || !pid_has_task(pid, PIDTYPE_TGID))
+ +              return -EINVAL;
+ +
+ +      return __pidfd_prepare(pid, flags, ret);
+ +}
+ +
   static void __delayed_free_task(struct rcu_head *rhp)
   {
         struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
@@@ -2242,7 -2009,7 +2243,7 @@@ static void rv_task_fork(struct task_st
    * parts of the process environment (as per the clone
    * flags). The actual kick-off is left to the caller.
    */
- -static __latent_entropy struct task_struct *copy_process(
+ +__latent_entropy struct task_struct *copy_process(
                                         struct pid *pid,
                                         int trace,
                                         int node,
@@@ -2335,8 -2102,6 +2336,8 @@@
         p->flags &= ~PF_KTHREAD;
         if (args->kthread)
                 p->flags |= PF_KTHREAD;
+ +      if (args->user_worker)
+ +              p->flags |= PF_USER_WORKER;
         if (args->io_thread) {
                 /*
                  * Mark us an IO worker, and block any signal that isn't
@@@ -2346,9 -2111,6 +2347,9 @@@
                 siginitsetinv(&p->blocked, sigmask(SIGKILL)|sigmask(SIGSTOP));
         }
   
+ +      if (args->name)
+ +              strscpy_pad(p->comm, args->name, sizeof(p->comm));
+ +
         p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? args->child_tid : NULL;
         /*
          * Clear TID on mm_release()?
@@@ -2491,7 -2253,7 +2492,7 @@@
         retval = copy_semundo(clone_flags, p);
         if (retval)
                 goto bad_fork_cleanup_security;
- -      retval = copy_files(clone_flags, p);
+ +      retval = copy_files(clone_flags, p, args->no_files);
         if (retval)
                 goto bad_fork_cleanup_semundo;
         retval = copy_fs(clone_flags, p);
@@@ -2516,9 -2278,6 +2517,9 @@@
         if (retval)
                 goto bad_fork_cleanup_io;
   
+ +      if (args->ignore_signals)
+ +              ignore_signals(p);
+ +
         stackleak_task_init(p);
   
         if (pid != &init_struct_pid) {
@@@ -2536,12 -2295,21 +2537,12 @@@
          * if the fd table isn't shared).
          */
         if (clone_flags & CLONE_PIDFD) {
- -              retval = get_unused_fd_flags(O_RDWR | O_CLOEXEC);
+ +              /* Note that no task has been attached to @pid yet. */
+ +              retval = __pidfd_prepare(pid, O_RDWR | O_CLOEXEC, &pidfile);
                 if (retval < 0)
                         goto bad_fork_free_pid;
- -
                 pidfd = retval;
   
- -              pidfile = anon_inode_getfile("[pidfd]", &pidfd_fops, pid,
- -                                            O_RDWR | O_CLOEXEC);
- -              if (IS_ERR(pidfile)) {
- -                      put_unused_fd(pidfd);
- -                      retval = PTR_ERR(pidfile);
- -                      goto bad_fork_free_pid;
- -              }
- -              get_pid(pid);   /* held by pidfile now */
- -
                 retval = put_user(pidfd, args->pidfd);
                 if (retval)
                         goto bad_fork_put_pidfd;
@@@ -2736,7 -2504,6 +2737,7 @@@
   
         trace_task_newtask(p, clone_flags);
         uprobe_copy_process(p, clone_flags);
+ +      user_events_fork(p, clone_flags);
   
         copy_oom_score_adj(clone_flags, p);
   
@@@ -2859,7 -2626,6 +2860,7 @@@ struct task_struct *create_io_thread(in
                 .fn             = fn,
                 .fn_arg         = arg,
                 .io_thread      = 1,
+ +              .user_worker    = 1,
         };
   
         return copy_process(NULL, 0, node, &args);
@@@ -2963,8 -2729,7 +2964,8 @@@ pid_t kernel_clone(struct kernel_clone_
   /*
    * Create a kernel thread.
    */
- -pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
+ +pid_t kernel_thread(int (*fn)(void *), void *arg, const char *name,
+ +                  unsigned long flags)
   {
         struct kernel_clone_args args = {
                 .flags          = ((lower_32_bits(flags) | CLONE_VM |
@@@ -2972,7 -2737,6 +2973,7 @@@
                 .exit_signal    = (lower_32_bits(flags) & CSIGNAL),
                 .fn             = fn,
                 .fn_arg         = arg,
+ +              .name           = name,
                 .kthread        = 1,
         };
   
@@@ -3302,9 -3066,6 +3303,9 @@@ void __init proc_caches_init(void
                         NULL);
   
         vm_area_cachep = KMEM_CACHE(vm_area_struct, SLAB_PANIC|SLAB_ACCOUNT);
+ +#ifdef CONFIG_PER_VMA_LOCK
+ +      vma_lock_cachep = KMEM_CACHE(vma_lock, SLAB_PANIC|SLAB_ACCOUNT);
+ +#endif
         mmap_init();
         nsproxy_cache_init();
   }
diff --combined mm/init-mm.c

index 33269314e06017509dd39255ea910118bdc7e0a2,a084039f55d8253c1276b66824a8454e00ef38dd..efa97b57acfd888a0e5db8b1e34cc4dcc20da70c
--- 1/mm/init-mm.c
--- 2/mm/init-mm.c
+++ b/mm/init-mm.c
@@@ -10,7 -10,7 +10,7 @@@
   
   #include <linux/atomic.h>
   #include <linux/user_namespace.h>
- #include <linux/ioasid.h>
+ #include <linux/iommu.h>
   #include <asm/mmu.h>
   
   #ifndef INIT_MM_CONTEXT
@@@ -37,13 -37,10 +37,13 @@@ struct mm_struct init_mm = 
         .page_table_lock =  __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock),
         .arg_lock       =  __SPIN_LOCK_UNLOCKED(init_mm.arg_lock),
         .mmlist         = LIST_HEAD_INIT(init_mm.mmlist),
+ +#ifdef CONFIG_PER_VMA_LOCK
+ +      .mm_lock_seq    = 0,
+ +#endif
         .user_ns        = &init_user_ns,
         .cpu_bitmap     = CPU_BITS_NONE,
   #ifdef CONFIG_IOMMU_SVA
-       .pasid          = INVALID_IOASID,
+       .pasid          = IOMMU_PASID_INVALID,
   #endif
         INIT_MM_CONTEXT(init_mm)
   };
author	Linus Torvalds <[email protected]>
	Sun, 30 Apr 2023 20:00:38 +0000 (13:00 -0700)
committer	Linus Torvalds <[email protected]>
	Sun, 30 Apr 2023 20:00:38 +0000 (13:00 -0700)
		1	2
Documentation/arch/x86/sva.rst	patch \|	diff1 \|	\|	blob \| history
arch/arm64/boot/dts/mediatek/mt8186.dtsi	patch \|	diff1 \|	diff2 \|	blob \| history
arch/arm64/boot/dts/mediatek/mt8195.dtsi	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/process_64.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/traps.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/iommu/intel/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/iommu/iommu-sva.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/iommu/iommu.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/media/platform/mediatek/jpeg/mtk_jpeg_core.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/media/platform/mediatek/vcodec/mtk_vcodec_dec_drv.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/media/platform/mediatek/vcodec/mtk_vcodec_enc_drv.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/iommu.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/sched/mm.h	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/fork.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/init-mm.c	patch \|	diff1 \|	diff2 \|	blob \| history