]> Git Repo - linux.git/commitdiff
Merge tag 'mm-stable-2022-12-13' of git://git.kernel.org/pub/scm/linux/kernel/git...
authorLinus Torvalds <[email protected]>
Wed, 14 Dec 2022 03:29:45 +0000 (19:29 -0800)
committerLinus Torvalds <[email protected]>
Wed, 14 Dec 2022 03:29:45 +0000 (19:29 -0800)
Pull MM updates from Andrew Morton:

 - More userfaultfs work from Peter Xu

 - Several convert-to-folios series from Sidhartha Kumar and Huang Ying

 - Some filemap cleanups from Vishal Moola

 - David Hildenbrand added the ability to selftest anon memory COW
   handling

 - Some cpuset simplifications from Liu Shixin

 - Addition of vmalloc tracing support by Uladzislau Rezki

 - Some pagecache folioifications and simplifications from Matthew
   Wilcox

 - A pagemap cleanup from Kefeng Wang: we have VM_ACCESS_FLAGS, so use
   it

 - Miguel Ojeda contributed some cleanups for our use of the
   __no_sanitize_thread__ gcc keyword.

   This series should have been in the non-MM tree, my bad

 - Naoya Horiguchi improved the interaction between memory poisoning and
   memory section removal for huge pages

 - DAMON cleanups and tuneups from SeongJae Park

 - Tony Luck fixed the handling of COW faults against poisoned pages

 - Peter Xu utilized the PTE marker code for handling swapin errors

 - Hugh Dickins reworked compound page mapcount handling, simplifying it
   and making it more efficient

 - Removal of the autonuma savedwrite infrastructure from Nadav Amit and
   David Hildenbrand

 - zram support for multiple compression streams from Sergey Senozhatsky

 - David Hildenbrand reworked the GUP code's R/O long-term pinning so
   that drivers no longer need to use the FOLL_FORCE workaround which
   didn't work very well anyway

 - Mel Gorman altered the page allocator so that local IRQs can remnain
   enabled during per-cpu page allocations

 - Vishal Moola removed the try_to_release_page() wrapper

 - Stefan Roesch added some per-BDI sysfs tunables which are used to
   prevent network block devices from dirtying excessive amounts of
   pagecache

 - David Hildenbrand did some cleanup and repair work on KSM COW
   breaking

 - Nhat Pham and Johannes Weiner have implemented writeback in zswap's
   zsmalloc backend

 - Brian Foster has fixed a longstanding corner-case oddity in
   file[map]_write_and_wait_range()

 - sparse-vmemmap changes for MIPS, LoongArch and NIOS2 from Feiyang
   Chen

 - Shiyang Ruan has done some work on fsdax, to make its reflink mode
   work better under xfstests. Better, but still not perfect

 - Christoph Hellwig has removed the .writepage() method from several
   filesystems. They only need .writepages()

 - Yosry Ahmed wrote a series which fixes the memcg reclaim target
   beancounting

 - David Hildenbrand has fixed some of our MM selftests for 32-bit
   machines

 - Many singleton patches, as usual

* tag 'mm-stable-2022-12-13' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (313 commits)
  mm/hugetlb: set head flag before setting compound_order in __prep_compound_gigantic_folio
  mm: mmu_gather: allow more than one batch of delayed rmaps
  mm: fix typo in struct pglist_data code comment
  kmsan: fix memcpy tests
  mm: add cond_resched() in swapin_walk_pmd_entry()
  mm: do not show fs mm pc for VM_LOCKONFAULT pages
  selftests/vm: ksm_functional_tests: fixes for 32bit
  selftests/vm: cow: fix compile warning on 32bit
  selftests/vm: madv_populate: fix missing MADV_POPULATE_(READ|WRITE) definitions
  mm/gup_test: fix PIN_LONGTERM_TEST_READ with highmem
  mm,thp,rmap: fix races between updates of subpages_mapcount
  mm: memcg: fix swapcached stat accounting
  mm: add nodes= arg to memory.reclaim
  mm: disable top-tier fallback to reclaim on proactive reclaim
  selftests: cgroup: make sure reclaim target memcg is unprotected
  selftests: cgroup: refactor proactive reclaim code to reclaim_until()
  mm: memcg: fix stale protection of reclaim target memcg
  mm/mmap: properly unaccount memory on mas_preallocate() failure
  omfs: remove ->writepage
  jfs: remove ->writepage
  ...

40 files changed:
1  2 
Documentation/filesystems/proc.rst
MAINTAINERS
arch/arm/include/asm/pgtable-nommu.h
arch/arm/include/asm/pgtable.h
arch/arm64/include/asm/pgtable.h
arch/arm64/mm/mmu.c
arch/arm64/mm/pageattr.c
arch/loongarch/Kconfig
arch/loongarch/include/asm/pgtable.h
arch/x86/kernel/cpu/sgx/encl.c
drivers/acpi/numa/hmat.c
drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
drivers/gpu/drm/etnaviv/etnaviv_gem.c
drivers/media/common/videobuf2/frame_vector.c
fs/fuse/dev.c
fs/hfs/inode.c
fs/hfsplus/inode.c
fs/xfs/xfs_iops.c
include/linux/damon.h
include/linux/mm.h
include/linux/mmzone.h
include/linux/sched.h
kernel/cgroup/cpuset.c
kernel/fork.c
kernel/sysctl.c
lib/Kconfig.debug
lib/Kconfig.kasan
mm/Kconfig
mm/gup.c
mm/huge_memory.c
mm/hugetlb.c
mm/kasan/kasan_test.c
mm/kfence/core.c
mm/madvise.c
mm/migrate.c
mm/mmap.c
mm/shmem.c
mm/slub.c
mm/swapfile.c
tools/testing/selftests/vm/.gitignore

index f4ee84d7b351752b8b67bf3d2b0f325307857824,b8f175ae4853b3b3a81b8fc918cac51d77aac4b6..e224b6d5b642309494bf4ce7340980327e93b169
@@@ -47,7 -47,6 +47,7 @@@ fixes/update part 1.1  Stefani Seibold 
    3.10  /proc/<pid>/timerslack_ns - Task timerslack value
    3.11        /proc/<pid>/patch_state - Livepatch patch operation state
    3.12        /proc/<pid>/arch_status - Task architecture specific information
 +  3.13  /proc/<pid>/fd - List of symlinks to open files
  
    4   Configuring procfs
    4.1 Mount options
@@@ -246,8 -245,7 +246,8 @@@ It's slow but very precise
   Ngid                        NUMA group ID (0 if none)
   Pid                         process id
   PPid                        process id of the parent process
 - TracerPid                   PID of process tracing this process (0 if not)
 + TracerPid                   PID of process tracing this process (0 if not, or
 +                             the tracer is outside of the current pid namespace)
   Uid                         Real, effective, saved set, and  file system UIDs
   Gid                         Real, effective, saved set, and  file system GIDs
   FDSize                      number of file descriptor slots currently allocated
@@@ -428,14 -426,16 +428,16 @@@ with the memory region, as the case wou
  The "pathname" shows the name associated file for this mapping.  If the mapping
  is not associated with a file:
  
-  =============              ====================================
+  ===================        ===========================================
   [heap]                     the heap of the program
   [stack]                    the stack of the main process
   [vdso]                     the "virtual dynamic shared object",
                              the kernel system call handler
-  [anon:<name>]              an anonymous mapping that has been
+  [anon:<name>]              a private anonymous mapping that has been
                              named by userspace
-  =============              ====================================
+  [anon_shmem:<name>]        an anonymous shared memory mapping that has
+                             been named by userspace
+  ===================        ===========================================
  
   or if empty, the mapping is anonymous.
  
@@@ -2151,22 -2151,6 +2153,22 @@@ AVX512_elapsed_m
    the task is unlikely an AVX512 user, but depends on the workload and the
    scheduling scenario, it also could be a false negative mentioned above.
  
 +3.13 /proc/<pid>/fd - List of symlinks to open files
 +-------------------------------------------------------
 +This directory contains symbolic links which represent open files
 +the process is maintaining.  Example output::
 +
 +  lr-x------ 1 root root 64 Sep 20 17:53 0 -> /dev/null
 +  l-wx------ 1 root root 64 Sep 20 17:53 1 -> /dev/null
 +  lrwx------ 1 root root 64 Sep 20 17:53 10 -> 'socket:[12539]'
 +  lrwx------ 1 root root 64 Sep 20 17:53 11 -> 'socket:[12540]'
 +  lrwx------ 1 root root 64 Sep 20 17:53 12 -> 'socket:[12542]'
 +
 +The number of open files for the process is stored in 'size' member
 +of stat() output for /proc/<pid>/fd for fast access.
 +-------------------------------------------------------
 +
 +
  Chapter 4: Configuring procfs
  =============================
  
diff --combined MAINTAINERS
index 5a4526a171d6a8b8697caac89741f2125b05cdc3,6044301ee9bdda665723070220fa367d15467328..bb77a3ed9d5423a86920787df03177541da0b5bb
@@@ -775,24 -775,6 +775,24 @@@ T:       git git://linuxtv.org/media_tree.gi
  F:    Documentation/devicetree/bindings/media/allwinner,sun4i-a10-csi.yaml
  F:    drivers/media/platform/sunxi/sun4i-csi/
  
 +ALLWINNER A31 CSI DRIVER
 +M:    Yong Deng <[email protected]>
 +M:    Paul Kocialkowski <[email protected]>
 +L:    [email protected]
 +S:    Maintained
 +T:    git git://linuxtv.org/media_tree.git
 +F:    Documentation/devicetree/bindings/media/allwinner,sun6i-a31-csi.yaml
 +F:    drivers/media/platform/sunxi/sun6i-csi/
 +
 +ALLWINNER A31 ISP DRIVER
 +M:    Paul Kocialkowski <[email protected]>
 +L:    [email protected]
 +S:    Maintained
 +T:    git git://linuxtv.org/media_tree.git
 +F:    Documentation/devicetree/bindings/media/allwinner,sun6i-a31-isp.yaml
 +F:    drivers/staging/media/sunxi/sun6i-isp/
 +F:    drivers/staging/media/sunxi/sun6i-isp/uapi/sun6i-isp-config.h
 +
  ALLWINNER A31 MIPI CSI-2 BRIDGE DRIVER
  M:    Paul Kocialkowski <[email protected]>
  L:    [email protected]
@@@ -1111,16 -1093,6 +1111,16 @@@ S:    Maintaine
  F:    Documentation/hid/amd-sfh*
  F:    drivers/hid/amd-sfh-hid/
  
 +AMLOGIC DDR PMU DRIVER
 +M:    Jiucheng Xu <[email protected]>
 +L:    [email protected]
 +S:    Supported
 +W:    http://www.amlogic.com
 +F:    Documentation/admin-guide/perf/meson-ddr-pmu.rst
 +F:    Documentation/devicetree/bindings/perf/amlogic,g12-ddr-pmu.yaml
 +F:    drivers/perf/amlogic/
 +F:    include/soc/amlogic/
 +
  AMPHION VPU CODEC V4L2 DRIVER
  M:    Ming Qian <[email protected]>
  M:    Shijie Qin <[email protected]>
@@@ -1713,7 -1685,7 +1713,7 @@@ M:      Miquel Raynal <miquel.raynal@bootlin
  M:    Naga Sureshkumar Relli <[email protected]>
  L:    [email protected] (moderated for non-subscribers)
  S:    Maintained
 -F:    Documentation/devicetree/bindings/memory-controllers/arm,pl353-smc.yaml
 +F:    Documentation/devicetree/bindings/memory-controllers/arm,pl35x-smc.yaml
  F:    drivers/memory/pl353-smc.c
  
  ARM PRIMECELL CLCD PL110 DRIVER
@@@ -1925,14 -1897,12 +1925,14 @@@ T:   git https://github.com/AsahiLinux/li
  F:    Documentation/devicetree/bindings/arm/apple.yaml
  F:    Documentation/devicetree/bindings/arm/apple/*
  F:    Documentation/devicetree/bindings/clock/apple,nco.yaml
 +F:    Documentation/devicetree/bindings/cpufreq/apple,cluster-cpufreq.yaml
  F:    Documentation/devicetree/bindings/dma/apple,admac.yaml
  F:    Documentation/devicetree/bindings/i2c/apple,i2c.yaml
  F:    Documentation/devicetree/bindings/interrupt-controller/apple,*
  F:    Documentation/devicetree/bindings/iommu/apple,dart.yaml
  F:    Documentation/devicetree/bindings/iommu/apple,sart.yaml
  F:    Documentation/devicetree/bindings/mailbox/apple,mailbox.yaml
 +F:    Documentation/devicetree/bindings/net/bluetooth/brcm,bcm4377-bluetooth.yaml
  F:    Documentation/devicetree/bindings/nvme/apple,nvme-ans.yaml
  F:    Documentation/devicetree/bindings/nvmem/apple,efuses.yaml
  F:    Documentation/devicetree/bindings/pci/apple,pcie.yaml
@@@ -1940,9 -1910,7 +1940,9 @@@ F:      Documentation/devicetree/bindings/pi
  F:    Documentation/devicetree/bindings/power/apple*
  F:    Documentation/devicetree/bindings/watchdog/apple,wdt.yaml
  F:    arch/arm64/boot/dts/apple/
 +F:    drivers/bluetooth/hci_bcm4377.c
  F:    drivers/clk/clk-apple-nco.c
 +F:    drivers/cpufreq/apple-soc-cpufreq.c
  F:    drivers/dma/apple-admac.c
  F:    drivers/i2c/busses/i2c-pasemi-core.c
  F:    drivers/i2c/busses/i2c-pasemi-platform.c
@@@ -2229,7 -2197,7 +2229,7 @@@ M:      Wei Xu <[email protected]
  L:    [email protected] (moderated for non-subscribers)
  S:    Supported
  W:    http://www.hisilicon.com
 -T:    git git://github.com/hisilicon/linux-hisi.git
 +T:    git https://github.com/hisilicon/linux-hisi.git
  F:    arch/arm/boot/dts/hi3*
  F:    arch/arm/boot/dts/hip*
  F:    arch/arm/boot/dts/hisi*
@@@ -2304,6 -2272,8 +2304,6 @@@ F:      drivers/clocksource/timer-ixp4xx.
  F:    drivers/crypto/ixp4xx_crypto.c
  F:    drivers/gpio/gpio-ixp4xx.c
  F:    drivers/irqchip/irq-ixp4xx.c
 -F:    include/linux/irqchip/irq-ixp4xx.h
 -F:    include/linux/platform_data/timer-ixp4xx.h
  
  ARM/INTEL KEEMBAY ARCHITECTURE
  M:    Paul J. Murphy <[email protected]>
@@@ -2371,8 -2341,6 +2371,8 @@@ M:      Gregory Clement <gregory.clement@boo
  L:    [email protected] (moderated for non-subscribers)
  S:    Maintained
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/gclement/mvebu.git
 +F:    Documentation/devicetree/bindings/arm/marvell/marvell,dove.txt
 +F:    Documentation/devicetree/bindings/arm/marvell/marvell,orion5x.txt
  F:    Documentation/devicetree/bindings/soc/dove/
  F:    arch/arm/boot/dts/dove*
  F:    arch/arm/boot/dts/orion5x*
@@@ -2389,7 -2357,6 +2389,7 @@@ M:      Sebastian Hesselbarth <sebastian.hes
  L:    [email protected] (moderated for non-subscribers)
  S:    Maintained
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/gclement/mvebu.git
 +F:    Documentation/devicetree/bindings/arm/marvell/
  F:    arch/arm/boot/dts/armada*
  F:    arch/arm/boot/dts/kirkwood*
  F:    arch/arm/configs/mvebu_*_defconfig
@@@ -2472,7 -2439,6 +2472,7 @@@ L:      [email protected]
  S:    Supported
  T:    git git://github.com/microchip-ung/linux-upstream.git
  F:    arch/arm64/boot/dts/microchip/
 +F:    drivers/net/ethernet/microchip/vcap/
  F:    drivers/pinctrl/pinctrl-microchip-sgpio.c
  N:    sparx5
  
@@@ -2654,7 -2620,7 +2654,7 @@@ W:      http://www.armlinux.org.uk
  ARM/QUALCOMM SUPPORT
  M:    Andy Gross <[email protected]>
  M:    Bjorn Andersson <[email protected]>
 -R:    Konrad Dybcio <konrad.dybcio@somainline.org>
 +R:    Konrad Dybcio <konrad.dybcio@linaro.org>
  L:    [email protected]
  S:    Maintained
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/qcom/linux.git
@@@ -2725,7 -2691,7 +2725,7 @@@ F:      arch/arm/boot/dts/rtd
  F:    arch/arm/mach-realtek/
  F:    arch/arm64/boot/dts/realtek/
  
 -ARM/RENESAS ARCHITECTURE
 +ARM/RISC-V/RENESAS ARCHITECTURE
  M:    Geert Uytterhoeven <[email protected]>
  M:    Magnus Damm <[email protected]>
  L:    [email protected]
@@@ -2733,6 -2699,7 +2733,6 @@@ S:      Supporte
  Q:    http://patchwork.kernel.org/project/linux-renesas-soc/list/
  C:    irc://irc.libera.chat/renesas-soc
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/geert/renesas-devel.git next
 -F:    Documentation/devicetree/bindings/arm/renesas.yaml
  F:    Documentation/devicetree/bindings/hwinfo/renesas,prr.yaml
  F:    Documentation/devicetree/bindings/soc/renesas/
  F:    arch/arm/boot/dts/emev2*
@@@ -2746,7 -2713,6 +2746,7 @@@ F:      arch/arm/configs/shmobile_defconfi
  F:    arch/arm/include/debug/renesas-scif.S
  F:    arch/arm/mach-shmobile/
  F:    arch/arm64/boot/dts/renesas/
 +F:    arch/riscv/boot/dts/renesas/
  F:    drivers/soc/renesas/
  F:    include/linux/soc/renesas/
  
@@@ -4843,7 -4809,7 +4843,7 @@@ R:      Jeff Layton <[email protected]
  L:    [email protected]
  S:    Supported
  W:    http://ceph.com/
 -T:    git git://github.com/ceph/ceph-client.git
 +T:    git https://github.com/ceph/ceph-client.git
  F:    include/linux/ceph/
  F:    include/linux/crush/
  F:    net/ceph/
@@@ -4855,7 -4821,7 +4855,7 @@@ R:      Jeff Layton <[email protected]
  L:    [email protected]
  S:    Supported
  W:    http://ceph.com/
 -T:    git git://github.com/ceph/ceph-client.git
 +T:    git https://github.com/ceph/ceph-client.git
  F:    Documentation/filesystems/ceph.rst
  F:    fs/ceph/
  
@@@ -4945,7 -4911,7 +4945,7 @@@ F:      drivers/platform/chrome
  
  CHROMEOS EC CODEC DRIVER
  M:    Cheng-Yi Chiang <[email protected]>
 -M:    Tzung-Bi Shih <tzungbi@google.com>
 +M:    Tzung-Bi Shih <tzungbi@kernel.org>
  R:    Guenter Roeck <[email protected]>
  L:    [email protected]
  S:    Maintained
@@@ -4975,12 -4941,6 +4975,12 @@@ S:    Maintaine
  F:    drivers/platform/chrome/cros_usbpd_notify.c
  F:    include/linux/platform_data/cros_usbpd_notify.h
  
 +CHROMEOS HPS DRIVER
 +M:    Dan Callaghan <[email protected]>
 +R:    Sami Kyöstilä <[email protected]>
 +S:    Maintained
 +F:    drivers/platform/chrome/cros_hps_i2c.c
 +
  CHRONTEL CH7322 CEC DRIVER
  M:    Joe Tessler <[email protected]>
  L:    [email protected]
@@@ -5542,6 -5502,14 +5542,6 @@@ M:     Jaya Kumar <[email protected]
  S:    Maintained
  F:    sound/pci/cs5535audio/
  
 -CSI DRIVERS FOR ALLWINNER V3s
 -M:    Yong Deng <[email protected]>
 -L:    [email protected]
 -S:    Maintained
 -T:    git git://linuxtv.org/media_tree.git
 -F:    Documentation/devicetree/bindings/media/allwinner,sun6i-a31-csi.yaml
 -F:    drivers/media/platform/sunxi/sun6i-csi/
 -
  CTU CAN FD DRIVER
  M:    Pavel Pisa <[email protected]>
  M:    Ondrej Ille <[email protected]>
@@@ -5617,6 -5585,8 +5617,6 @@@ F:      drivers/scsi/cxgbi/cxgb3
  
  CXGB4 CRYPTO DRIVER (chcr)
  M:    Ayush Sawal <[email protected]>
 -M:    Vinay Kumar Yadav <[email protected]>
 -M:    Rohit Maheshwari <[email protected]>
  L:    [email protected]
  S:    Supported
  W:    http://www.chelsio.com
@@@ -5624,6 -5594,8 +5624,6 @@@ F:      drivers/crypto/chelsi
  
  CXGB4 INLINE CRYPTO DRIVER
  M:    Ayush Sawal <[email protected]>
 -M:    Vinay Kumar Yadav <[email protected]>
 -M:    Rohit Maheshwari <[email protected]>
  L:    [email protected]
  S:    Supported
  W:    http://www.chelsio.com
  S:    Maintained
  F:    drivers/platform/x86/dell/dell-wmi-descriptor.c
  
 +DELL WMI DDV DRIVER
 +M:    Armin Wolf <[email protected]>
 +S:    Maintained
 +F:    Documentation/ABI/testing/debugfs-dell-wmi-ddv
 +F:    Documentation/ABI/testing/sysfs-platform-dell-wmi-ddv
 +F:    drivers/platform/x86/dell/dell-wmi-ddv.c
 +
  DELL WMI SYSMAN DRIVER
  M:    Divya Bharathi <[email protected]>
  M:    Prasanth Ksr <[email protected]>
@@@ -6068,12 -6033,11 +6068,12 @@@ F:   include/net/devlink.
  F:    include/uapi/linux/devlink.h
  F:    net/core/devlink.c
  
 -DH ELECTRONICS IMX6 DHCOM BOARD SUPPORT
 +DH ELECTRONICS IMX6 DHCOM/DHCOR BOARD SUPPORT
  M:    Christoph Niedermaier <[email protected]>
  L:    [email protected]
  S:    Maintained
  F:    arch/arm/boot/dts/imx6*-dhcom-*
 +F:    arch/arm/boot/dts/imx6*-dhcor-*
  
  DH ELECTRONICS STM32MP1 DHCOM/DHCOR BOARD SUPPORT
  M:    Marek Vasut <[email protected]>
@@@ -6365,7 -6329,6 +6365,7 @@@ F:      drivers/net/ethernet/freescale/dpaa2
  F:    drivers/net/ethernet/freescale/dpaa2/Makefile
  F:    drivers/net/ethernet/freescale/dpaa2/dpaa2-eth*
  F:    drivers/net/ethernet/freescale/dpaa2/dpaa2-mac*
 +F:    drivers/net/ethernet/freescale/dpaa2/dpaa2-xsk*
  F:    drivers/net/ethernet/freescale/dpaa2/dpkg.h
  F:    drivers/net/ethernet/freescale/dpaa2/dpmac*
  F:    drivers/net/ethernet/freescale/dpaa2/dpni*
@@@ -6543,12 -6506,6 +6543,12 @@@ S:    Orphan / Obsolet
  F:    drivers/gpu/drm/i810/
  F:    include/uapi/drm/i810_drm.h
  
 +DRM DRIVER FOR JADARD JD9365DA-H3 MIPI-DSI LCD PANELS
 +M:    Jagan Teki <[email protected]>
 +S:    Maintained
 +F:    Documentation/devicetree/bindings/display/panel/jadard,jd9365da-h3.yaml
 +F:    drivers/gpu/drm/panel/panel-jadard-jd9365da-h3.c
 +
  DRM DRIVER FOR LOGICVC DISPLAY CONTROLLER
  M:    Paul Kocialkowski <[email protected]>
  S:    Supported
  S:    Maintained
  T:    git git://anongit.freedesktop.org/drm/drm-misc
  F:    drivers/gpu/drm/drm_aperture.c
 +F:    drivers/gpu/drm/tiny/ofdrm.c
  F:    drivers/gpu/drm/tiny/simpledrm.c
  F:    drivers/video/aperture.c
 +F:    drivers/video/nomodeset.c
  F:    include/drm/drm_aperture.h
  F:    include/linux/aperture.h
 +F:    include/video/nomodeset.h
  
  DRM DRIVER FOR SIS VIDEO CARDS
  S:    Orphan / Obsolete
@@@ -6873,15 -6827,6 +6873,15 @@@ F:    include/drm/drm
  F:    include/linux/vga*
  F:    include/uapi/drm/drm*
  
 +DRM COMPUTE ACCELERATORS DRIVERS AND FRAMEWORK
 +M:    Oded Gabbay <[email protected]>
 +L:    [email protected]
 +S:    Maintained
 +C:    irc://irc.oftc.net/dri-devel
 +T:    git https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/accel.git
 +F:    Documentation/accel/
 +F:    drivers/accel/
 +
  DRM DRIVERS FOR ALLWINNER A10
  M:    Maxime Ripard <[email protected]>
  M:    Chen-Yu Tsai <[email protected]>
@@@ -7170,7 -7115,7 +7170,7 @@@ F:      drivers/gpu/drm/ttm
  F:    include/drm/ttm/
  
  DRM GPU SCHEDULER
 -M:    Andrey Grodzovsky <andrey.grodzovsky@amd.com>
 +M:    Luben Tuikov <luben.tuikov@amd.com>
  L:    [email protected]
  S:    Maintained
  T:    git git://anongit.freedesktop.org/drm/drm-misc
@@@ -7418,9 -7363,9 +7418,9 @@@ F:      drivers/edac/thunderx_edac
  
  EDAC-CORE
  M:    Borislav Petkov <[email protected]>
 -M:    Mauro Carvalho Chehab <[email protected]>
  M:    Tony Luck <[email protected]>
  R:    James Morse <[email protected]>
 +R:    Mauro Carvalho Chehab <[email protected]>
  R:    Robert Richter <[email protected]>
  L:    [email protected]
  S:    Supported
@@@ -7537,7 -7482,8 +7537,7 @@@ S:      Maintaine
  F:    drivers/edac/pnd2_edac.[ch]
  
  EDAC-QCOM
 -M:    Channagoud Kadabi <[email protected]>
 -M:    Venkata Narendra Kumar Gutta <[email protected]>
 +M:    Manivannan Sadhasivam <[email protected]>
  L:    [email protected]
  L:    [email protected]
  S:    Maintained
@@@ -7738,7 -7684,6 +7738,7 @@@ ETAS ES58X CAN/USB DRIVE
  M:    Vincent Mailhol <[email protected]>
  L:    [email protected]
  S:    Maintained
 +F:    Documentation/networking/devlink/etas_es58x.rst
  F:    drivers/net/can/usb/etas_es58x/
  
  ETHERNET BRIDGE
@@@ -7844,6 -7789,7 +7844,6 @@@ F:      Documentation/admin-guide/efi-stub.r
  F:    arch/*/include/asm/efi.h
  F:    arch/*/kernel/efi.c
  F:    arch/arm/boot/compressed/efi-header.S
 -F:    arch/arm64/kernel/efi-entry.S
  F:    arch/x86/platform/efi/
  F:    drivers/firmware/efi/
  F:    include/linux/efi*.h
@@@ -8241,10 -8187,7 +8241,10 @@@ S:    Maintaine
  F:    drivers/i2c/busses/i2c-cpm.c
  
  FREESCALE IMX / MXC FEC DRIVER
 -M:    Joakim Zhang <[email protected]>
 +M:    Wei Fang <[email protected]>
 +R:    Shenwei Wang <[email protected]>
 +R:    Clark Wang <[email protected]>
 +R:    NXP Linux Team <[email protected]>
  L:    [email protected]
  S:    Maintained
  F:    Documentation/devicetree/bindings/net/fsl,fec.yaml
@@@ -8659,8 -8602,8 +8659,8 @@@ F:      include/asm-generic
  F:    include/uapi/asm-generic/
  
  GENERIC PHY FRAMEWORK
 -M:    Kishon Vijay Abraham I <[email protected]>
  M:    Vinod Koul <[email protected]>
 +M:    Kishon Vijay Abraham I <[email protected]>
  L:    [email protected]
  S:    Supported
  Q:    https://patchwork.kernel.org/project/linux-phy/list/
@@@ -8803,7 -8746,6 +8803,7 @@@ GPIO IR Transmitte
  M:    Sean Young <[email protected]>
  L:    [email protected]
  S:    Maintained
 +F:    Documentation/devicetree/bindings/leds/irled/gpio-ir-tx.yaml
  F:    drivers/media/rc/gpio-ir-tx.c
  
  GPIO MOCKUP DRIVER
@@@ -9227,13 -9169,6 +9227,13 @@@ W:    http://www.highpoint-tech.co
  F:    Documentation/scsi/hptiop.rst
  F:    drivers/scsi/hptiop.c
  
 +HIMAX HX83112B TOUCHSCREEN SUPPORT
 +M:    Job Noorman <[email protected]>
 +L:    [email protected]
 +S:    Maintained
 +F:    Documentation/devicetree/bindings/input/touchscreen/himax,hx83112b.yaml
 +F:    drivers/input/touchscreen/himax_hx83112b.c
 +
  HIPPI
  M:    Jes Sorensen <[email protected]>
  L:    [email protected]
@@@ -9313,7 -9248,7 +9313,7 @@@ F:      drivers/misc/hisi_hikey_usb.
  
  HISILICON PMU DRIVER
  M:    Shaokun Zhang <[email protected]>
 -M:    Qi Liu <liuqi115@huawei.com>
 +M:    Jonathan Cameron <jonathan.cameron@huawei.com>
  S:    Supported
  W:    http://www.hisilicon.com
  F:    Documentation/admin-guide/perf/hisi-pcie-pmu.rst
@@@ -9406,7 -9341,7 +9406,7 @@@ S:      Maintaine
  F:    drivers/crypto/hisilicon/trng/trng.c
  
  HISILICON V3XX SPI NOR FLASH Controller Driver
 -M:    John Garry <john.garry@huawei.com>
 +M:    Jay Fang <f.fangjian@huawei.com>
  S:    Maintained
  W:    http://www.hisilicon.com
  F:    drivers/spi/spi-hisi-sfc-v3xx.c
@@@ -9431,7 -9366,7 +9431,7 @@@ F:      drivers/net/wireless/intersil/hostap
  HP COMPAQ TC1100 TABLET WMI EXTRAS DRIVER
  L:    [email protected]
  S:    Orphan
 -F:    drivers/platform/x86/tc1100-wmi.c
 +F:    drivers/platform/x86/hp/tc1100-wmi.c
  
  HPET: High Precision Event Timers driver
  M:    Clemens Ladisch <[email protected]>
@@@ -9501,9 -9436,8 +9501,9 @@@ F:      Documentation/devicetree/bindings/ii
  F:    drivers/iio/humidity/hts221*
  
  HUAWEI ETHERNET DRIVER
 +M:    Cai Huoqing <[email protected]>
  L:    [email protected]
 -S:    Orphan
 +S:    Maintained
  F:    Documentation/networking/device_drivers/ethernet/huawei/hinic.rst
  F:    drivers/net/ethernet/huawei/hinic/
  
@@@ -9573,6 -9507,7 +9573,6 @@@ F:      drivers/media/i2c/hi847.
  Hyper-V/Azure CORE AND DRIVERS
  M:    "K. Y. Srinivasan" <[email protected]>
  M:    Haiyang Zhang <[email protected]>
 -M:    Stephen Hemminger <[email protected]>
  M:    Wei Liu <[email protected]>
  M:    Dexuan Cui <[email protected]>
  L:    [email protected]
@@@ -9606,7 -9541,6 +9606,7 @@@ F:      include/asm-generic/hyperv-tlfs.
  F:    include/asm-generic/mshyperv.h
  F:    include/clocksource/hyperv_timer.h
  F:    include/linux/hyperv.h
 +F:    include/net/mana
  F:    include/uapi/linux/hyperv.h
  F:    net/vmw_vsock/hyperv_transport.c
  F:    tools/hv/
@@@ -10093,11 -10027,6 +10093,11 @@@ F: Documentation/hwmon/ina2xx.rs
  F:    drivers/hwmon/ina2xx.c
  F:    include/linux/platform_data/ina2xx.h
  
 +INDEX OF FURTHER KERNEL DOCUMENTATION
 +M:    Carlos Bilbao <[email protected]>
 +S:    Maintained
 +F:    Documentation/process/kernel-docs.rst
 +
  INDUSTRY PACK SUBSYSTEM (IPACK)
  M:    Samuel Iglesias Gonsalvez <[email protected]>
  M:    Jens Taprogge <[email protected]>
@@@ -10127,7 -10056,6 +10127,7 @@@ F:   drivers/infiniband
  F:    include/rdma/
  F:    include/trace/events/ib_mad.h
  F:    include/trace/events/ib_umad.h
 +F:    include/trace/misc/rdma.h
  F:    include/uapi/linux/if_infiniband.h
  F:    include/uapi/rdma/
  F:    samples/bpf/ibumad_kern.c
@@@ -10301,7 -10229,6 +10301,7 @@@ Q:   http://patchwork.freedesktop.org/pro
  B:    https://gitlab.freedesktop.org/drm/intel/-/wikis/How-to-file-i915-bugs
  C:    irc://irc.oftc.net/intel-gfx
  T:    git git://anongit.freedesktop.org/drm-intel
 +F:    Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
  F:    Documentation/gpu/i915.rst
  F:    drivers/gpu/drm/i915/
  F:    include/drm/i915*
@@@ -10972,13 -10899,6 +10972,13 @@@ F: drivers/isdn/Makefil
  F:    drivers/isdn/hardware/
  F:    drivers/isdn/mISDN/
  
 +ISOFS FILESYSTEM
 +M:    Jan Kara <[email protected]>
 +L:    [email protected]
 +S:    Maintained
 +F:    Documentation/filesystems/isofs.rst
 +F:    fs/isofs/
 +
  IT87 HARDWARE MONITORING DRIVER
  M:    Jean Delvare <[email protected]>
  L:    [email protected]
@@@ -11040,9 -10960,9 +11040,9 @@@ F:   drivers/hwmon/jc42.
  JFS FILESYSTEM
  M:    Dave Kleikamp <[email protected]>
  L:    [email protected]
 -S:    Maintained
 +S:    Odd Fixes
  W:    http://jfs.sourceforge.net/
 -T:    git git://github.com/kleikamp/linux-shaggy.git
 +T:    git https://github.com/kleikamp/linux-shaggy.git
  F:    Documentation/admin-guide/jfs.rst
  F:    fs/jfs/
  
@@@ -11116,7 -11036,6 +11116,7 @@@ KCONFI
  M:    Masahiro Yamada <[email protected]>
  L:    [email protected]
  S:    Maintained
 +Q:    https://patchwork.kernel.org/project/linux-kbuild/list/
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/masahiroy/linux-kbuild.git kconfig
  F:    Documentation/kbuild/kconfig*
  F:    scripts/Kconfig.include
@@@ -11174,12 -11093,10 +11174,12 @@@ F:        fs/autofs
  
  KERNEL BUILD + files below scripts/ (unless maintained elsewhere)
  M:    Masahiro Yamada <[email protected]>
 -M:    Michal Marek <[email protected]>
 +R:    Nathan Chancellor <[email protected]>
  R:    Nick Desaulniers <[email protected]>
 +R:    Nicolas Schier <[email protected]>
  L:    [email protected]
  S:    Maintained
 +Q:    https://patchwork.kernel.org/project/linux-kbuild/list/
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/masahiroy/linux-kbuild.git
  F:    Documentation/kbuild/
  F:    Makefile
@@@ -11215,18 -11132,11 +11215,18 @@@ L:        [email protected]
  S:    Supported
  W:    http://nfs.sourceforge.net/
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/cel/linux.git
 +F:    fs/exportfs/
  F:    fs/lockd/
  F:    fs/nfs_common/
  F:    fs/nfsd/
  F:    include/linux/lockd/
  F:    include/linux/sunrpc/
 +F:    include/trace/events/rpcgss.h
 +F:    include/trace/events/rpcrdma.h
 +F:    include/trace/events/sunrpc.h
 +F:    include/trace/misc/fs.h
 +F:    include/trace/misc/nfs.h
 +F:    include/trace/misc/sunrpc.h
  F:    include/uapi/linux/nfsd/
  F:    include/uapi/linux/sunrpc/
  F:    net/sunrpc/
@@@ -11948,7 -11858,7 +11948,7 @@@ M:   Eric Piel <[email protected]
  S:    Maintained
  F:    Documentation/misc-devices/lis3lv02d.rst
  F:    drivers/misc/lis3lv02d/
 -F:    drivers/platform/x86/hp_accel.c
 +F:    drivers/platform/x86/hp/hp_accel.c
  
  LIST KUNIT TEST
  M:    David Gow <[email protected]>
@@@ -12103,21 -12013,6 +12103,21 @@@ F: drivers/*/*loongarch
  F:    Documentation/loongarch/
  F:    Documentation/translations/zh_CN/loongarch/
  
 +LOONGSON-2 SOC SERIES GUTS DRIVER
 +M:    Yinbo Zhu <[email protected]>
 +L:    [email protected]
 +S:    Maintained
 +F:    Documentation/devicetree/bindings/hwinfo/loongson,ls2k-chipid.yaml
 +F:    drivers/soc/loongson/loongson2_guts.c
 +
 +LOONGSON-2 SOC SERIES PINCTRL DRIVER
 +M:    zhanghongchen <[email protected]>
 +M:    Yinbo Zhu <[email protected]>
 +L:    [email protected]
 +S:    Maintained
 +F:    Documentation/devicetree/bindings/pinctrl/loongson,ls2k-pinctrl.yaml
 +F:    drivers/pinctrl/pinctrl-loongson2.c
 +
  LSILOGIC MPT FUSION DRIVERS (FC/SAS/SPI)
  M:    Sathya Prakash <[email protected]>
  M:    Sreekanth Reddy <[email protected]>
@@@ -12195,7 -12090,7 +12195,7 @@@ M:   Alexey Kodanev <alexey.kodanev@oracl
  L:    [email protected] (subscribers-only)
  S:    Maintained
  W:    http://linux-test-project.github.io/
 -T:    git git://github.com/linux-test-project/ltp.git
 +T:    git https://github.com/linux-test-project/ltp.git
  
  LYNX 28G SERDES PHY DRIVER
  M:    Ioana Ciornei <[email protected]>
@@@ -12331,6 -12226,7 +12331,6 @@@ F:   arch/mips/boot/dts/img/pistachio
  
  MARVELL 88E6XXX ETHERNET SWITCH FABRIC DRIVER
  M:    Andrew Lunn <[email protected]>
 -M:    Vivien Didelot <[email protected]>
  L:    [email protected]
  S:    Maintained
  F:    Documentation/devicetree/bindings/net/dsa/marvell.txt
@@@ -12420,7 -12316,7 +12420,7 @@@ M:   Marcin Wojtas <[email protected]
  M:    Russell King <[email protected]>
  L:    [email protected]
  S:    Maintained
 -F:    Documentation/devicetree/bindings/net/marvell-pp2.txt
 +F:    Documentation/devicetree/bindings/net/marvell,pp2.yaml
  F:    drivers/net/ethernet/marvell/mvpp2/
  
  MARVELL MWIFIEX WIRELESS DRIVER
@@@ -12468,7 -12364,7 +12468,7 @@@ F:   Documentation/networking/device_driv
  F:    drivers/net/ethernet/marvell/octeontx2/af/
  
  MARVELL PRESTERA ETHERNET SWITCH DRIVER
 -M:    Taras Chornyi <t[email protected]>
 +M:    Taras Chornyi <t[email protected]>
  S:    Supported
  W:    https://github.com/Marvell-switching/switchdev-prestera
  F:    drivers/net/ethernet/marvell/prestera/
@@@ -12830,7 -12726,7 +12830,7 @@@ F:   Documentation/admin-guide/media/imx7
  F:    Documentation/devicetree/bindings/media/nxp,imx-mipi-csi2.yaml
  F:    Documentation/devicetree/bindings/media/nxp,imx7-csi.yaml
  F:    drivers/media/platform/nxp/imx-mipi-csis.c
 -F:    drivers/staging/media/imx/imx7-media-csi.c
 +F:    drivers/media/platform/nxp/imx7-media-csi.c
  
  MEDIA DRIVERS FOR HELENE
  M:    Abylay Ospan <[email protected]>
@@@ -13027,7 -12923,6 +13027,7 @@@ M:   Felix Fietkau <[email protected]
  M:    John Crispin <[email protected]>
  M:    Sean Wang <[email protected]>
  M:    Mark Lee <[email protected]>
 +M:    Lorenzo Bianconi <[email protected]>
  L:    [email protected]
  S:    Maintained
  F:    drivers/net/ethernet/mediatek/
@@@ -13399,10 -13294,20 +13399,20 @@@ F:        include/linux/memory_hotplug.
  F:    include/linux/mm.h
  F:    include/linux/mmzone.h
  F:    include/linux/pagewalk.h
- F:    include/linux/vmalloc.h
  F:    mm/
  F:    tools/testing/selftests/vm/
  
+ VMALLOC
+ M:    Andrew Morton <[email protected]>
+ R:    Uladzislau Rezki <[email protected]>
+ R:    Christoph Hellwig <[email protected]>
+ L:    [email protected]
+ S:    Maintained
+ W:    http://www.linux-mm.org
+ T:    git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
+ F:    include/linux/vmalloc.h
+ F:    mm/vmalloc.c
  MEMORY HOT(UN)PLUG
  M:    David Hildenbrand <[email protected]>
  M:    Oscar Salvador <[email protected]>
@@@ -13490,7 -13395,7 +13500,7 @@@ MESON NAND CONTROLLER DRIVER FOR AMLOGI
  M:    Liang Yang <[email protected]>
  L:    [email protected]
  S:    Maintained
 -F:    Documentation/devicetree/bindings/mtd/amlogic,meson-nand.txt
 +F:    Documentation/devicetree/bindings/mtd/amlogic,meson-nand.yaml
  F:    drivers/mtd/nand/raw/meson_*
  
  MESON VIDEO DECODER DRIVER FOR AMLOGIC SOCS
@@@ -13571,7 -13476,7 +13581,7 @@@ M:   Eugen Hristev <eugen.hristev@microch
  L:    [email protected]
  S:    Supported
  F:    Documentation/devicetree/bindings/media/microchip,csi2dc.yaml
 -F:    drivers/media/platform/atmel/microchip-csi2dc.c
 +F:    drivers/media/platform/microchip/microchip-csi2dc.c
  
  MICROCHIP ECC DRIVER
  M:    Tudor Ambarus <[email protected]>
  S:    Supported
  F:    Documentation/devicetree/bindings/media/atmel,isc.yaml
  F:    Documentation/devicetree/bindings/media/microchip,xisc.yaml
 -F:    drivers/media/platform/atmel/atmel-isc*
 -F:    drivers/media/platform/atmel/atmel-sama*-isc*
 +F:    drivers/staging/media/deprecated/atmel/atmel-isc*
 +F:    drivers/staging/media/deprecated/atmel/atmel-sama*-isc*
 +F:    drivers/media/platform/microchip/microchip-isc*
 +F:    drivers/media/platform/microchip/microchip-sama*-isc*
  F:    include/linux/atmel-isc-media.h
  
  MICROCHIP ISI DRIVER
@@@ -13734,12 -13637,6 +13744,12 @@@ S: Supporte
  F:    drivers/misc/atmel-ssc.c
  F:    include/linux/atmel-ssc.h
  
 +MICROCHIP SOC DRIVERS
 +M:    Conor Dooley <[email protected]>
 +S:    Supported
 +T:    git https://git.kernel.org/pub/scm/linux/kernel/git/conor/linux.git/
 +F:    drivers/soc/microchip/
 +
  MICROCHIP USB251XB DRIVER
  M:    Richard Leitner <[email protected]>
  L:    [email protected]
@@@ -14059,7 -13956,6 +14069,7 @@@ F:   include/uapi/linux/meye.
  
  MOTORCOMM PHY DRIVER
  M:    Peter Geis <[email protected]>
 +M:    Frank <[email protected]>
  L:    [email protected]
  S:    Maintained
  F:    drivers/net/phy/motorcomm.c
@@@ -14438,6 -14334,7 +14448,6 @@@ F:   drivers/net/wireless
  
  NETWORKING [DSA]
  M:    Andrew Lunn <[email protected]>
 -M:    Vivien Didelot <[email protected]>
  M:    Florian Fainelli <[email protected]>
  M:    Vladimir Oltean <[email protected]>
  S:    Maintained
@@@ -15294,13 -15191,6 +15304,13 @@@ S: Maintaine
  T:    git git://linuxtv.org/media_tree.git
  F:    drivers/media/i2c/ov08d10.c
  
 +OMNIVISION OV08X40 SENSOR DRIVER
 +M:    Jason Chen <[email protected]>
 +L:    [email protected]
 +S:    Maintained
 +T:    git git://linuxtv.org/media_tree.git
 +F:    drivers/media/i2c/ov08x40.c
 +
  OMNIVISION OV13858 SENSOR DRIVER
  M:    Sakari Ailus <[email protected]>
  L:    [email protected]
@@@ -15339,14 -15229,6 +15349,14 @@@ S: Maintaine
  T:    git git://linuxtv.org/media_tree.git
  F:    drivers/media/i2c/ov2740.c
  
 +OMNIVISION OV4689 SENSOR DRIVER
 +M:    Mikhail Rudenko <[email protected]>
 +L:    [email protected]
 +S:    Maintained
 +T:    git git://linuxtv.org/media_tree.git
 +F:    Documentation/devicetree/bindings/media/i2c/ovti,ov4689.yaml
 +F:    drivers/media/i2c/ov5647.c
 +
  OMNIVISION OV5640 SENSOR DRIVER
  M:    Steve Longerbeam <[email protected]>
  L:    [email protected]
@@@ -15471,12 -15353,6 +15481,12 @@@ S: Maintaine
  F:    drivers/mtd/nand/onenand/
  F:    include/linux/mtd/onenand*.h
  
 +ONEXPLAYER FAN DRIVER
 +M:    Joaquín Ignacio Aramendía <[email protected]>
 +L:    [email protected]
 +S:    Maintained
 +F:    drivers/hwmon/oxp-sensors.c
 +
  ONION OMEGA2+ BOARD
  M:    Harvey Hunt <[email protected]>
  L:    [email protected]
@@@ -16080,7 -15956,6 +16090,7 @@@ Q:   https://patchwork.kernel.org/project
  B:    https://bugzilla.kernel.org
  C:    irc://irc.oftc.net/linux-pci
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/lpieralisi/pci.git
 +F:    Documentation/devicetree/bindings/pci/
  F:    drivers/pci/controller/
  F:    drivers/pci/pci-bridge-emul.c
  F:    drivers/pci/pci-bridge-emul.h
@@@ -16187,7 -16062,7 +16197,7 @@@ F:   Documentation/devicetree/bindings/pc
  F:    drivers/pci/controller/*microchip*
  
  PCIE DRIVER FOR QUALCOMM MSM
 -M:    Stanimir Varbanov <[email protected]>
 +M:    Manivannan Sadhasivam <[email protected]>
  L:    [email protected]
  L:    [email protected]
  S:    Maintained
@@@ -16277,8 -16152,7 +16287,8 @@@ F:   include/linux/peci-cpu.
  F:    include/linux/peci.h
  
  PENSANDO ETHERNET DRIVERS
 -M:    Shannon Nelson <[email protected]>
 +M:    Shannon Nelson <[email protected]>
 +M:    Brett Creeley <[email protected]>
  M:    [email protected]
  L:    [email protected]
  S:    Supported
@@@ -16436,7 -16310,7 +16446,7 @@@ M:   Sean Wang <[email protected]
  L:    [email protected] (moderated for non-subscribers)
  S:    Maintained
  F:    Documentation/devicetree/bindings/pinctrl/mediatek,mt65xx-pinctrl.yaml
 -F:    Documentation/devicetree/bindings/pinctrl/mediatek,mt6797-pinctrl.yaml
 +F:    Documentation/devicetree/bindings/pinctrl/mediatek,mt6779-pinctrl.yaml
  F:    Documentation/devicetree/bindings/pinctrl/mediatek,mt7622-pinctrl.yaml
  F:    Documentation/devicetree/bindings/pinctrl/mediatek,mt8183-pinctrl.yaml
  F:    drivers/pinctrl/mediatek/
@@@ -16509,6 -16383,13 +16519,6 @@@ S:  Supporte
  F:    Documentation/devicetree/bindings/input/pine64,pinephone-keyboard.yaml
  F:    drivers/input/keyboard/pinephone-keyboard.c
  
 -PKTCDVD DRIVER
 -M:    [email protected]
 -S:    Orphan
 -F:    drivers/block/pktcdvd.c
 -F:    include/linux/pktcdvd.h
 -F:    include/uapi/linux/pktcdvd.h
 -
  PLANTOWER PMS7003 AIR POLLUTION SENSOR DRIVER
  M:    Tomasz Duszynski <[email protected]>
  S:    Maintained
@@@ -16786,10 -16667,10 +16796,10 @@@ F:        net/psampl
  
  PSTORE FILESYSTEM
  M:    Kees Cook <[email protected]>
 -M:    Anton Vorontsov <[email protected]>
 -M:    Colin Cross <ccross@android.com>
 -M:    Tony Luck <[email protected]>
 -S:    Maintained
 +R:    Tony Luck <[email protected]>
 +R:    Guilherme G. Piccoli <gpiccoli@igalia.com>
 +L:    [email protected]
 +S:    Supported
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/pstore
  F:    Documentation/admin-guide/ramoops.rst
  F:    Documentation/admin-guide/pstore-blk.rst
@@@ -16836,6 -16717,7 +16846,6 @@@ M:   Hans Verkuil <[email protected]
  L:    [email protected]
  S:    Maintained
  T:    git git://linuxtv.org/media_tree.git
 -F:    Documentation/admin-guide/media/pulse8-cec.rst
  F:    drivers/media/cec/usb/pulse8/
  
  PURELIFI PLFXLC DRIVER
@@@ -16866,7 -16748,6 +16876,7 @@@ PWM IR Transmitte
  M:    Sean Young <[email protected]>
  L:    [email protected]
  S:    Maintained
 +F:    Documentation/devicetree/bindings/leds/irled/pwm-ir-tx.yaml
  F:    drivers/media/rc/pwm-ir-tx.c
  
  PWM SUBSYSTEM
@@@ -16931,7 -16812,7 +16941,7 @@@ M:   Srinivas Kandagatla <srinivas.kandag
  M:    Banajit Goswami <[email protected]>
  L:    [email protected] (moderated for non-subscribers)
  S:    Supported
 -F:    Documentation/devicetree/bindings/soc/qcom/qcom,apr.yaml
 +F:    Documentation/devicetree/bindings/soc/qcom/qcom,apr*
  F:    Documentation/devicetree/bindings/sound/qcom,*
  F:    drivers/soc/qcom/apr.c
  F:    include/dt-bindings/sound/qcom,wcd9335.h
@@@ -17289,8 -17170,7 +17299,8 @@@ F:   Documentation/devicetree/bindings/th
  F:    drivers/thermal/qcom/
  
  QUALCOMM VENUS VIDEO ACCELERATOR DRIVER
 -M:    Stanimir Varbanov <[email protected]>
 +M:    Stanimir Varbanov <[email protected]>
 +M:    Vikash Garodia <[email protected]>
  L:    [email protected]
  L:    [email protected]
  S:    Maintained
@@@ -17355,7 -17235,7 +17365,7 @@@ R:   Dongsheng Yang <dongsheng.yang@easys
  L:    [email protected]
  S:    Supported
  W:    http://ceph.com/
 -T:    git git://github.com/ceph/ceph-client.git
 +T:    git https://github.com/ceph/ceph-client.git
  F:    Documentation/ABI/testing/sysfs-bus-rbd
  F:    drivers/block/rbd.c
  F:    drivers/block/rbd_types.h
@@@ -17608,8 -17488,10 +17618,8 @@@ S:  Maintaine
  F:    drivers/net/wireless/realtek/rtw89/
  
  REDPINE WIRELESS DRIVER
 -M:    Amitkumar Karwar <[email protected]>
 -M:    Siva Rebbagondla <[email protected]>
  L:    [email protected]
 -S:    Maintained
 +S:    Orphan
  F:    drivers/net/wireless/rsi/
  
  REGISTER MAP ABSTRACTION
@@@ -17854,7 -17736,7 +17864,7 @@@ F:   arch/riscv
  N:    riscv
  K:    riscv
  
 -RISC-V/MICROCHIP POLARFIRE SOC SUPPORT
 +RISC-V MICROCHIP FPGA SUPPORT
  M:    Conor Dooley <[email protected]>
  M:    Daire McNamara <[email protected]>
  L:    [email protected]
@@@ -17872,26 -17754,17 +17882,26 @@@ F:        Documentation/devicetree/bindings/us
  F:    arch/riscv/boot/dts/microchip/
  F:    drivers/char/hw_random/mpfs-rng.c
  F:    drivers/clk/microchip/clk-mpfs.c
 -F:    drivers/i2c/busses/i2c-microchip-core.c
 +F:    drivers/i2c/busses/i2c-microchip-corei2c.c
  F:    drivers/mailbox/mailbox-mpfs.c
  F:    drivers/pci/controller/pcie-microchip-host.c
  F:    drivers/reset/reset-mpfs.c
  F:    drivers/rtc/rtc-mpfs.c
 -F:    drivers/soc/microchip/
 +F:    drivers/soc/microchip/mpfs-sys-controller.c
  F:    drivers/spi/spi-microchip-core-qspi.c
  F:    drivers/spi/spi-microchip-core.c
  F:    drivers/usb/musb/mpfs.c
  F:    include/soc/microchip/mpfs.h
  
 +RISC-V MISC SOC SUPPORT
 +M:    Conor Dooley <[email protected]>
 +L:    [email protected]
 +S:    Maintained
 +Q:    https://patchwork.kernel.org/project/linux-riscv/list/
 +T:    git https://git.kernel.org/pub/scm/linux/kernel/git/conor/linux.git/
 +F:    Documentation/devicetree/bindings/riscv/
 +F:    arch/riscv/boot/dts/
 +
  RNBD BLOCK DRIVERS
  M:    Md. Haris Iqbal <[email protected]>
  M:    Jack Wang <[email protected]>
  S:    Maintained
  F:    drivers/video/fbdev/savage/
  
 -S390
 +S390 ARCHITECTURE
  M:    Heiko Carstens <[email protected]>
  M:    Vasily Gorbik <[email protected]>
  M:    Alexander Gordeev <[email protected]>
  S:    Supported
  F:    drivers/s390/net/
  
 +S390 MM
 +M:    Alexander Gordeev <[email protected]>
 +M:    Gerald Schaefer <[email protected]>
 +L:    [email protected]
 +S:    Supported
 +T:    git git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux.git
 +F:    arch/s390/include/asm/pgtable.h
 +F:    arch/s390/mm
 +
  S390 PCI SUBSYSTEM
  M:    Niklas Schnelle <[email protected]>
  M:    Gerald Schaefer <[email protected]>
@@@ -18623,7 -18487,6 +18633,7 @@@ K:   \bsecure_computin
  K:    \bTIF_SECCOMP\b
  
  SECURE DIGITAL HOST CONTROLLER INTERFACE (SDHCI) Broadcom BRCMSTB DRIVER
 +M:    Kamal Dasu <[email protected]>
  M:    Al Cooper <[email protected]>
  R:    Broadcom internal kernel review list <[email protected]>
  L:    [email protected]
@@@ -18634,7 -18497,6 +18644,7 @@@ SECURE DIGITAL HOST CONTROLLER INTERFAC
  M:    Adrian Hunter <[email protected]>
  L:    [email protected]
  S:    Supported
 +F:    Documentation/devicetree/bindings/mmc/sdhci-common.yaml
  F:    drivers/mmc/host/sdhci*
  
  SECURE DIGITAL HOST CONTROLLER INTERFACE (SDHCI) MICROCHIP DRIVER
@@@ -18929,6 -18791,7 +18939,6 @@@ M:   Palmer Dabbelt <[email protected]
  M:    Paul Walmsley <[email protected]>
  L:    [email protected]
  S:    Supported
 -T:    git https://github.com/sifive/riscv-linux.git
  N:    sifive
  K:    [^@]sifive
  
@@@ -18947,13 -18810,6 +18957,13 @@@ S: Maintaine
  F:    Documentation/devicetree/bindings/dma/sifive,fu540-c000-pdma.yaml
  F:    drivers/dma/sf-pdma/
  
 +SIFIVE SOC DRIVERS
 +M:    Conor Dooley <[email protected]>
 +L:    [email protected]
 +S:    Maintained
 +T:    git https://git.kernel.org/pub/scm/linux/kernel/git/conor/linux.git/
 +F:    drivers/soc/sifive/
 +
  SILEAD TOUCHSCREEN DRIVER
  M:    Hans de Goede <[email protected]>
  L:    [email protected]
@@@ -19044,7 -18900,7 +19054,7 @@@ F:   drivers/video/fbdev/sis
  F:    include/video/sisfb.h
  
  SIS I2C TOUCHSCREEN DRIVER
 -M:    Mika Penttilä <mika.penttila@nextfour.com>
 +M:    Mika Penttilä <mpenttil@redhat.com>
  L:    [email protected]
  S:    Maintained
  F:    Documentation/devicetree/bindings/input/touchscreen/sis_i2c.txt
@@@ -19187,7 -19043,7 +19197,7 @@@ M:   Jassi Brar <[email protected]
  M:    Ilias Apalodimas <[email protected]>
  L:    [email protected]
  S:    Maintained
 -F:    Documentation/devicetree/bindings/net/socionext-netsec.txt
 +F:    Documentation/devicetree/bindings/net/socionext,synquacer-netsec.yaml
  F:    drivers/net/ethernet/socionext/netsec.c
  
  SOCIONEXT (SNI) Synquacer SPI DRIVER
@@@ -19195,7 -19051,7 +19205,7 @@@ M:   Masahisa Kojima <masahisa.kojima@lin
  M:    Jassi Brar <[email protected]>
  L:    [email protected]
  S:    Maintained
 -F:    Documentation/devicetree/bindings/spi/spi-synquacer.txt
 +F:    Documentation/devicetree/bindings/spi/socionext,synquacer-spi.yaml
  F:    drivers/spi/spi-synquacer.c
  
  SOCIONEXT SYNQUACER I2C DRIVER
@@@ -19342,7 -19198,7 +19352,7 @@@ M:   Manivannan Sadhasivam <manivannan.sa
  L:    [email protected]
  S:    Maintained
  T:    git git://linuxtv.org/media_tree.git
 -F:    Documentation/devicetree/bindings/media/i2c/imx290.txt
 +F:    Documentation/devicetree/bindings/media/i2c/sony,imx290.yaml
  F:    drivers/media/i2c/imx290.c
  
  SONY IMX319 SENSOR DRIVER
@@@ -19491,11 -19347,6 +19501,11 @@@ W: https://linuxtv.or
  Q:    http://patchwork.linuxtv.org/project/linux-media/list/
  F:    drivers/media/dvb-frontends/sp2*
  
 +SPANISH DOCUMENTATION
 +M:    Carlos Bilbao <[email protected]>
 +S:    Maintained
 +F:    Documentation/translations/sp_SP/
 +
  SPARC + UltraSPARC (sparc/sparc64)
  M:    "David S. Miller" <[email protected]>
  L:    [email protected]
@@@ -19639,7 -19490,7 +19649,7 @@@ M:   Sylvain Petinot <sylvain.petinot@fos
  L:    [email protected]
  S:    Maintained
  T:    git git://linuxtv.org/media_tree.git
 -F:    Documentation/devicetree/bindings/media/i2c/st,st-mipid02.txt
 +F:    Documentation/devicetree/bindings/media/i2c/st,st-mipid02.yaml
  F:    drivers/media/i2c/st-mipid02.c
  
  ST STM32 I2C/SMBUS DRIVER
@@@ -19662,16 -19513,6 +19672,16 @@@ S: Maintaine
  F:    Documentation/hwmon/stpddc60.rst
  F:    drivers/hwmon/pmbus/stpddc60.c
  
 +ST VGXY61 DRIVER
 +M:    Benjamin Mugnier <[email protected]>
 +M:    Sylvain Petinot <[email protected]>
 +L:    [email protected]
 +S:    Maintained
 +T:    git git://linuxtv.org/media_tree.git
 +F:    Documentation/devicetree/bindings/media/i2c/st,st-vgxy61.yaml
 +F:    Documentation/userspace-api/media/drivers/st-vgxy61.rst
 +F:    drivers/media/i2c/st-vgxy61.c
 +
  ST VL53L0X ToF RANGER(I2C) IIO DRIVER
  M:    Song Qiang <[email protected]>
  L:    [email protected]
@@@ -19687,7 -19528,6 +19697,7 @@@ S:   Supporte
  F:    Documentation/process/stable-kernel-rules.rst
  
  STAGING - ATOMISP DRIVER
 +M:    Hans de Goede <[email protected]>
  M:    Mauro Carvalho Chehab <[email protected]>
  R:    Sakari Ailus <[email protected]>
  L:    [email protected]
@@@ -19771,11 -19611,6 +19781,11 @@@ M: Ion Badulescu <[email protected]
  S:    Odd Fixes
  F:    drivers/net/ethernet/adaptec/starfire*
  
 +STARFIVE DEVICETREES
 +M:    Emil Renner Berthing <[email protected]>
 +S:    Maintained
 +F:    arch/riscv/boot/dts/starfive/
 +
  STARFIVE JH7100 CLOCK DRIVERS
  M:    Emil Renner Berthing <[email protected]>
  S:    Maintained
@@@ -19897,13 -19732,6 +19907,13 @@@ W: https://sunplus.atlassian.net/wiki/s
  F:    Documentation/devicetree/bindings/net/sunplus,sp7021-emac.yaml
  F:    drivers/net/ethernet/sunplus/
  
 +SUNPLUS MMC DRIVER
 +M:    Tony Huang <[email protected]>
 +M:    Li-hao Kuo <[email protected]>
 +S:    Maintained
 +F:    Documentation/devicetree/bindings/mmc/sunplus,mmc.yaml
 +F:    drivers/mmc/host/sunplus-mmc.c
 +
  SUNPLUS OCOTP DRIVER
  M:    Vincent Shih <[email protected]>
  S:    Maintained
@@@ -20155,7 -19983,6 +20165,7 @@@ F:   drivers/clk/clk-sc[mp]i.
  F:    drivers/cpufreq/sc[mp]i-cpufreq.c
  F:    drivers/firmware/arm_scmi/
  F:    drivers/firmware/arm_scpi.c
 +F:    drivers/powercap/arm_scmi_powercap.c
  F:    drivers/regulator/scmi-regulator.c
  F:    drivers/reset/reset-scmi.c
  F:    include/linux/sc[mp]i_protocol.h
@@@ -20490,7 -20317,7 +20500,7 @@@ M:   Chris Zankel <[email protected]
  M:    Max Filippov <[email protected]>
  L:    [email protected]
  S:    Maintained
 -T:    git git://github.com/czankel/xtensa-linux.git
 +T:    git https://github.com/jcmvbkbc/linux-xtensa.git
  F:    arch/xtensa/
  F:    drivers/irqchip/irq-xtensa-*
  
@@@ -20840,6 -20667,7 +20850,6 @@@ W:   https://wireless.wiki.kernel.org/en/
  W:    https://wireless.wiki.kernel.org/en/users/Drivers/wl1251
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/luca/wl12xx.git
  F:    drivers/net/wireless/ti/
 -F:    include/linux/wl12xx.h
  
  TIMEKEEPING, CLOCKSOURCE CORE, NTP, ALARMTIMER
  M:    John Stultz <[email protected]>
@@@ -21903,12 -21731,6 +21913,12 @@@ F: include/linux/virtio*.
  F:    include/uapi/linux/virtio_*.h
  F:    tools/virtio/
  
 +VISL VIRTUAL STATELESS DECODER DRIVER
 +M:    Daniel Almeida <[email protected]>
 +L:    [email protected]
 +S:    Supported
 +F:    drivers/media/test-drivers/visl
 +
  IFCVF VIRTIO DATA PATH ACCELERATOR
  R:    Zhu Lingshan <[email protected]>
  F:    drivers/vdpa/ifcvf/
index 090011394477f1090abe0c21ab0095b48da73ab9,25d8c7bb07e0cf9efffa120bad74329494329551..61480d096054d32d41dd8f56ec31a4ce62a213a2
@@@ -21,8 -21,6 +21,6 @@@
  #define pgd_none(pgd)         (0)
  #define pgd_bad(pgd)          (0)
  #define pgd_clear(pgdp)
- #define kern_addr_valid(addr) (1)
- /* FIXME */
  /*
   * PMD_SHIFT determines the size of the area a second-level page table can map
   * PGDIR_SHIFT determines what a third-level page table entry can map
  
  typedef pte_t *pte_addr_t;
  
 -/*
 - * ZERO_PAGE is a global shared page that is always zero: used
 - * for zero-mapped memory areas etc..
 - */
 -#define ZERO_PAGE(vaddr)      (virt_to_page(0))
 -
  /*
   * Mark the prot value as uncacheable and unbufferable.
   */
index ef48a55e9af83bd56c5f4ab21f684a1876b0ae44,00954ab1a0393ce25d414d76da5a39bb31f2f068..f049072b2e8586089cad1e9c26390d028b59286e
  #include <linux/const.h>
  #include <asm/proc-fns.h>
  
 +#ifndef __ASSEMBLY__
 +/*
 + * ZERO_PAGE is a global shared page that is always zero: used
 + * for zero-mapped memory areas etc..
 + */
 +extern struct page *empty_zero_page;
 +#define ZERO_PAGE(vaddr)      (empty_zero_page)
 +#endif
 +
  #ifndef CONFIG_MMU
  
  #include <asm-generic/pgtable-nopud.h>
@@@ -148,6 -139,13 +148,6 @@@ extern pgprot_t phys_mem_access_prot(st
   */
  
  #ifndef __ASSEMBLY__
 -/*
 - * ZERO_PAGE is a global shared page that is always zero: used
 - * for zero-mapped memory areas etc..
 - */
 -extern struct page *empty_zero_page;
 -#define ZERO_PAGE(vaddr)      (empty_zero_page)
 -
  
  extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
  
@@@ -300,10 -298,6 +300,6 @@@ static inline pte_t pte_modify(pte_t pt
   */
  #define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > __SWP_TYPE_BITS)
  
- /* Needs to be defined here and not in linux/mm.h, as it is arch dependent */
- /* FIXME: this is not correct */
- #define kern_addr_valid(addr) (1)
  /*
   * We provide our own arch_get_unmapped_area to cope with VIPT caches.
   */
index c36d56dbf940f726247a289c85fbe886316eff9a,4873c1d6e7d0d73aa8a7c0a704548615a57855a7..b3faf7582a53fddbac2ab078731613a151bb9af3
@@@ -77,11 -77,11 +77,11 @@@ extern unsigned long empty_zero_page[PA
  static inline phys_addr_t __pte_to_phys(pte_t pte)
  {
        return (pte_val(pte) & PTE_ADDR_LOW) |
 -              ((pte_val(pte) & PTE_ADDR_HIGH) << 36);
 +              ((pte_val(pte) & PTE_ADDR_HIGH) << PTE_ADDR_HIGH_SHIFT);
  }
  static inline pteval_t __phys_to_pte_val(phys_addr_t phys)
  {
 -      return (phys | (phys >> 36)) & PTE_ADDR_MASK;
 +      return (phys | (phys >> PTE_ADDR_HIGH_SHIFT)) & PTE_ADDR_MASK;
  }
  #else
  #define __pte_to_phys(pte)    (pte_val(pte) & PTE_ADDR_MASK)
@@@ -609,6 -609,7 +609,6 @@@ extern pgd_t init_pg_dir[PTRS_PER_PGD]
  extern pgd_t init_pg_end[];
  extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
  extern pgd_t idmap_pg_dir[PTRS_PER_PGD];
 -extern pgd_t idmap_pg_end[];
  extern pgd_t tramp_pg_dir[PTRS_PER_PGD];
  extern pgd_t reserved_pg_dir[PTRS_PER_PGD];
  
@@@ -862,12 -863,12 +862,12 @@@ static inline bool pte_user_accessible_
  
  static inline bool pmd_user_accessible_page(pmd_t pmd)
  {
 -      return pmd_present(pmd) && (pmd_user(pmd) || pmd_user_exec(pmd));
 +      return pmd_leaf(pmd) && (pmd_user(pmd) || pmd_user_exec(pmd));
  }
  
  static inline bool pud_user_accessible_page(pud_t pud)
  {
 -      return pud_present(pud) && pud_user(pud);
 +      return pud_leaf(pud) && pud_user(pud);
  }
  #endif
  
@@@ -1020,8 -1021,6 +1020,6 @@@ static inline pmd_t pmdp_establish(stru
   */
  #define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > __SWP_TYPE_BITS)
  
- extern int kern_addr_valid(unsigned long addr);
  #ifdef CONFIG_ARM64_MTE
  
  #define __HAVE_ARCH_PREPARE_TO_SWAP
@@@ -1095,15 -1094,6 +1093,15 @@@ static inline bool pud_sect_supported(v
  }
  
  
 +#define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
 +#define ptep_modify_prot_start ptep_modify_prot_start
 +extern pte_t ptep_modify_prot_start(struct vm_area_struct *vma,
 +                                  unsigned long addr, pte_t *ptep);
 +
 +#define ptep_modify_prot_commit ptep_modify_prot_commit
 +extern void ptep_modify_prot_commit(struct vm_area_struct *vma,
 +                                  unsigned long addr, pte_t *ptep,
 +                                  pte_t old_pte, pte_t new_pte);
  #endif /* !__ASSEMBLY__ */
  
  #endif /* __ASM_PGTABLE_H */
diff --combined arch/arm64/mm/mmu.c
index 2368e4daa23d3e6b82ee1750e038d800f27acc51,27217ba12e5769a7cee4d917eb09245b8e32f860..12915f379c22be7da15313047b20815456e0db10
@@@ -814,53 -814,6 +814,6 @@@ void __init paging_init(void
        create_idmap();
  }
  
- /*
-  * Check whether a kernel address is valid (derived from arch/x86/).
-  */
- int kern_addr_valid(unsigned long addr)
- {
-       pgd_t *pgdp;
-       p4d_t *p4dp;
-       pud_t *pudp, pud;
-       pmd_t *pmdp, pmd;
-       pte_t *ptep, pte;
-       addr = arch_kasan_reset_tag(addr);
-       if ((((long)addr) >> VA_BITS) != -1UL)
-               return 0;
-       pgdp = pgd_offset_k(addr);
-       if (pgd_none(READ_ONCE(*pgdp)))
-               return 0;
-       p4dp = p4d_offset(pgdp, addr);
-       if (p4d_none(READ_ONCE(*p4dp)))
-               return 0;
-       pudp = pud_offset(p4dp, addr);
-       pud = READ_ONCE(*pudp);
-       if (pud_none(pud))
-               return 0;
-       if (pud_sect(pud))
-               return pfn_valid(pud_pfn(pud));
-       pmdp = pmd_offset(pudp, addr);
-       pmd = READ_ONCE(*pmdp);
-       if (pmd_none(pmd))
-               return 0;
-       if (pmd_sect(pmd))
-               return pfn_valid(pmd_pfn(pmd));
-       ptep = pte_offset_kernel(pmdp, addr);
-       pte = READ_ONCE(*ptep);
-       if (pte_none(pte))
-               return 0;
-       return pfn_valid(pte_pfn(pte));
- }
  #ifdef CONFIG_MEMORY_HOTPLUG
  static void free_hotplug_page_range(struct page *page, size_t size,
                                    struct vmem_altmap *altmap)
@@@ -1184,53 -1137,28 +1137,28 @@@ static void free_empty_tables(unsigned 
  }
  #endif
  
+ void __meminit vmemmap_set_pmd(pmd_t *pmdp, void *p, int node,
+                              unsigned long addr, unsigned long next)
+ {
+       pmd_set_huge(pmdp, __pa(p), __pgprot(PROT_SECT_NORMAL));
+ }
+ int __meminit vmemmap_check_pmd(pmd_t *pmdp, int node,
+                               unsigned long addr, unsigned long next)
+ {
+       vmemmap_verify((pte_t *)pmdp, node, addr, next);
+       return 1;
+ }
  int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
                struct vmem_altmap *altmap)
  {
-       unsigned long addr = start;
-       unsigned long next;
-       pgd_t *pgdp;
-       p4d_t *p4dp;
-       pud_t *pudp;
-       pmd_t *pmdp;
        WARN_ON((start < VMEMMAP_START) || (end > VMEMMAP_END));
  
 -      if (!ARM64_KERNEL_USES_PMD_MAPS)
 +      if (!IS_ENABLED(CONFIG_ARM64_4K_PAGES))
                return vmemmap_populate_basepages(start, end, node, altmap);
-       do {
-               next = pmd_addr_end(addr, end);
-               pgdp = vmemmap_pgd_populate(addr, node);
-               if (!pgdp)
-                       return -ENOMEM;
-               p4dp = vmemmap_p4d_populate(pgdp, addr, node);
-               if (!p4dp)
-                       return -ENOMEM;
-               pudp = vmemmap_pud_populate(p4dp, addr, node);
-               if (!pudp)
-                       return -ENOMEM;
-               pmdp = pmd_offset(pudp, addr);
-               if (pmd_none(READ_ONCE(*pmdp))) {
-                       void *p = NULL;
-                       p = vmemmap_alloc_block_buf(PMD_SIZE, node, altmap);
-                       if (!p) {
-                               if (vmemmap_populate_basepages(addr, next, node, altmap))
-                                       return -ENOMEM;
-                               continue;
-                       }
-                       pmd_set_huge(pmdp, __pa(p), __pgprot(PROT_SECT_NORMAL));
-               } else
-                       vmemmap_verify((pte_t *)pmdp, node, addr, next);
-       } while (addr = next, addr != end);
-       return 0;
+       else
+               return vmemmap_populate_hugepages(start, end, node, altmap);
  }
  
  #ifdef CONFIG_MEMORY_HOTPLUG
@@@ -1702,24 -1630,3 +1630,24 @@@ static int __init prevent_bootmem_remov
  }
  early_initcall(prevent_bootmem_remove_init);
  #endif
 +
 +pte_t ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
 +{
 +      if (IS_ENABLED(CONFIG_ARM64_WORKAROUND_2645198) &&
 +          cpus_have_const_cap(ARM64_WORKAROUND_2645198)) {
 +              /*
 +               * Break-before-make (BBM) is required for all user space mappings
 +               * when the permission changes from executable to non-executable
 +               * in cases where cpu is affected with errata #2645198.
 +               */
 +              if (pte_user_exec(READ_ONCE(*ptep)))
 +                      return ptep_clear_flush(vma, addr, ptep);
 +      }
 +      return ptep_get_and_clear(vma->vm_mm, addr, ptep);
 +}
 +
 +void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep,
 +                           pte_t old_pte, pte_t pte)
 +{
 +      set_pte_at(vma->vm_mm, addr, ptep, pte);
 +}
diff --combined arch/arm64/mm/pageattr.c
index 5922178d7a064c1c98af43ad97d72fa4a6b8d79b,0a741a910a6abb909902ab21b5baf6b6995685bb..79dd201c59d8b32e0652476d6961027b90bcd42c
@@@ -26,7 -26,7 +26,7 @@@ bool can_set_direct_map(void
         * mapped at page granularity, so that it is possible to
         * protect/unprotect single pages.
         */
 -      return rodata_full || debug_pagealloc_enabled() ||
 +      return (rodata_enabled && rodata_full) || debug_pagealloc_enabled() ||
                IS_ENABLED(CONFIG_KFENCE);
  }
  
@@@ -102,8 -102,7 +102,8 @@@ static int change_memory_common(unsigne
         * If we are manipulating read-only permissions, apply the same
         * change to the linear mapping of the pages that back this VM area.
         */
 -      if (rodata_full && (pgprot_val(set_mask) == PTE_RDONLY ||
 +      if (rodata_enabled &&
 +          rodata_full && (pgprot_val(set_mask) == PTE_RDONLY ||
                            pgprot_val(clear_mask) == PTE_RDONLY)) {
                for (i = 0; i < area->nr_pages; i++) {
                        __change_memory_common((u64)page_address(area->pages[i]),
@@@ -202,8 -201,7 +202,7 @@@ void __kernel_map_pages(struct page *pa
  
  /*
   * This function is used to determine if a linear map page has been marked as
-  * not-valid. Walk the page table and check the PTE_VALID bit. This is based
-  * on kern_addr_valid(), which almost does what we need.
+  * not-valid. Walk the page table and check the PTE_VALID bit.
   *
   * Because this is only called on the kernel linear map,  p?d_sect() implies
   * p?d_present(). When debug_pagealloc is enabled, sections mappings are
diff --combined arch/loongarch/Kconfig
index 386adde2feffb31f024d4014311baf519dca03bd,0a6ef613124c63f192a39c121d118858b061929d..a508813d5ea299382fc723698d8c661439d1f515
@@@ -10,7 -10,6 +10,7 @@@ config LOONGARC
        select ARCH_ENABLE_MEMORY_HOTPLUG
        select ARCH_ENABLE_MEMORY_HOTREMOVE
        select ARCH_HAS_ACPI_TABLE_UPGRADE      if ACPI
 +      select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
        select ARCH_HAS_PTE_SPECIAL
        select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
        select ARCH_INLINE_READ_LOCK if !PREEMPTION
@@@ -53,6 -52,7 +53,7 @@@
        select ARCH_USE_QUEUED_RWLOCKS
        select ARCH_USE_QUEUED_SPINLOCKS
        select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
+       select ARCH_WANT_HUGETLB_PAGE_OPTIMIZE_VMEMMAP
        select ARCH_WANT_LD_ORPHAN_WARN
        select ARCH_WANTS_NO_INSTR
        select BUILDTIME_TABLE_SORT
@@@ -488,6 -488,7 +489,7 @@@ config ARCH_FLATMEM_ENABL
  
  config ARCH_SPARSEMEM_ENABLE
        def_bool y
+       select SPARSEMEM_VMEMMAP_ENABLE
        help
          Say Y to support efficient handling of sparse physical memory,
          for architectures which are either NUMA (Non-Uniform Memory Access)
index 79d5bfd913e0fb4843b2fc96a109ceb3e7bb6087,022ec6be3602f191ac3d38c7227950fdfbdcae25..7a34e900d8c18a610af721426904438b5eb8c013
@@@ -11,6 -11,7 +11,7 @@@
  
  #include <linux/compiler.h>
  #include <asm/addrspace.h>
+ #include <asm/page.h>
  #include <asm/pgtable-bits.h>
  
  #if CONFIG_PGTABLE_LEVELS == 2
@@@ -59,6 -60,7 +60,7 @@@
  #include <linux/mm_types.h>
  #include <linux/mmzone.h>
  #include <asm/fixmap.h>
+ #include <asm/sparsemem.h>
  
  struct mm_struct;
  struct vm_area_struct;
@@@ -86,7 -88,10 +88,10 @@@ extern unsigned long zero_page_mask
  #define VMALLOC_START MODULES_END
  #define VMALLOC_END   \
        (vm_map_base +  \
-        min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits)) - PMD_SIZE)
+        min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits)) - PMD_SIZE - VMEMMAP_SIZE)
+ #define vmemmap               ((struct page *)((VMALLOC_END + PMD_SIZE) & PMD_MASK))
+ #define VMEMMAP_END   ((unsigned long)vmemmap + VMEMMAP_SIZE - 1)
  
  #define pte_ERROR(e) \
        pr_err("%s:%d: bad pte %016lx.\n", __FILE__, __LINE__, pte_val(e))
@@@ -237,11 -242,11 +242,11 @@@ extern void set_pmd_at(struct mm_struc
  #define pfn_pmd(pfn, prot)    __pmd(((pfn) << _PFN_SHIFT) | pgprot_val(prot))
  
  /*
-  * Initialize a new pgd / pmd table with invalid pointers.
+  * Initialize a new pgd / pud / pmd table with invalid pointers.
   */
- extern void pgd_init(unsigned long page);
- extern void pud_init(unsigned long page, unsigned long pagetable);
- extern void pmd_init(unsigned long page, unsigned long pagetable);
+ extern void pgd_init(void *addr);
+ extern void pud_init(void *addr);
+ extern void pmd_init(void *addr);
  
  /*
   * Non-present pages:  high 40 bits are offset, next 8 bits type,
@@@ -349,17 -354,13 +354,17 @@@ static inline pte_t pte_mkclean(pte_t p
  
  static inline pte_t pte_mkdirty(pte_t pte)
  {
 -      pte_val(pte) |= (_PAGE_DIRTY | _PAGE_MODIFIED);
 +      pte_val(pte) |= _PAGE_MODIFIED;
 +      if (pte_val(pte) & _PAGE_WRITE)
 +              pte_val(pte) |= _PAGE_DIRTY;
        return pte;
  }
  
  static inline pte_t pte_mkwrite(pte_t pte)
  {
 -      pte_val(pte) |= (_PAGE_WRITE | _PAGE_DIRTY);
 +      pte_val(pte) |= _PAGE_WRITE;
 +      if (pte_val(pte) & _PAGE_MODIFIED)
 +              pte_val(pte) |= _PAGE_DIRTY;
        return pte;
  }
  
@@@ -425,8 -426,6 +430,6 @@@ static inline void update_mmu_cache_pmd
        __update_tlb(vma, address, (pte_t *)pmdp);
  }
  
- #define kern_addr_valid(addr) (1)
  static inline unsigned long pmd_pfn(pmd_t pmd)
  {
        return (pmd_val(pmd) & _PFN_MASK) >> _PFN_SHIFT;
@@@ -459,9 -458,7 +462,9 @@@ static inline int pmd_write(pmd_t pmd
  
  static inline pmd_t pmd_mkwrite(pmd_t pmd)
  {
 -      pmd_val(pmd) |= (_PAGE_WRITE | _PAGE_DIRTY);
 +      pmd_val(pmd) |= _PAGE_WRITE;
 +      if (pmd_val(pmd) & _PAGE_MODIFIED)
 +              pmd_val(pmd) |= _PAGE_DIRTY;
        return pmd;
  }
  
@@@ -484,9 -481,7 +487,9 @@@ static inline pmd_t pmd_mkclean(pmd_t p
  
  static inline pmd_t pmd_mkdirty(pmd_t pmd)
  {
 -      pmd_val(pmd) |= (_PAGE_DIRTY | _PAGE_MODIFIED);
 +      pmd_val(pmd) |= _PAGE_MODIFIED;
 +      if (pmd_val(pmd) & _PAGE_WRITE)
 +              pmd_val(pmd) |= _PAGE_DIRTY;
        return pmd;
  }
  
index 68f8b18d2278d75822bfb0d86156ba5c498d56ed,6225c525372d1f063faed1345beea9a2aa6822b5..2a0e90fe2abce14f049fb3c3eb083e306f4bd94c
@@@ -160,8 -160,8 +160,8 @@@ static int __sgx_encl_eldu(struct sgx_e
                return ret;
  
        pginfo.addr = encl_page->desc & PAGE_MASK;
 -      pginfo.contents = (unsigned long)kmap_atomic(b.contents);
 -      pcmd_page = kmap_atomic(b.pcmd);
 +      pginfo.contents = (unsigned long)kmap_local_page(b.contents);
 +      pcmd_page = kmap_local_page(b.pcmd);
        pginfo.metadata = (unsigned long)pcmd_page + b.pcmd_offset;
  
        if (secs_page)
         */
        pcmd_page_empty = !memchr_inv(pcmd_page, 0, PAGE_SIZE);
  
 -      kunmap_atomic(pcmd_page);
 -      kunmap_atomic((void *)(unsigned long)pginfo.contents);
 +      kunmap_local(pcmd_page);
 +      kunmap_local((void *)(unsigned long)pginfo.contents);
  
        get_page(b.pcmd);
        sgx_encl_put_backing(&b);
  
        if (pcmd_page_empty && !reclaimer_writing_to_pcmd(encl, pcmd_first_page)) {
                sgx_encl_truncate_backing_page(encl, PFN_DOWN(page_pcmd_off));
 -              pcmd_page = kmap_atomic(b.pcmd);
 +              pcmd_page = kmap_local_page(b.pcmd);
                if (memchr_inv(pcmd_page, 0, PAGE_SIZE))
                        pr_warn("PCMD page not empty after truncate.\n");
 -              kunmap_atomic(pcmd_page);
 +              kunmap_local(pcmd_page);
        }
  
        put_page(b.pcmd);
@@@ -268,7 -268,7 +268,7 @@@ static struct sgx_encl_page *sgx_encl_l
                                                       unsigned long addr,
                                                       unsigned long vm_flags)
  {
-       unsigned long vm_prot_bits = vm_flags & (VM_READ | VM_WRITE | VM_EXEC);
+       unsigned long vm_prot_bits = vm_flags & VM_ACCESS_FLAGS;
        struct sgx_encl_page *entry;
  
        entry = xa_load(&encl->page_array, PFN_DOWN(addr));
@@@ -502,7 -502,7 +502,7 @@@ static void sgx_vma_open(struct vm_area
  int sgx_encl_may_map(struct sgx_encl *encl, unsigned long start,
                     unsigned long end, unsigned long vm_flags)
  {
-       unsigned long vm_prot_bits = vm_flags & (VM_READ | VM_WRITE | VM_EXEC);
+       unsigned long vm_prot_bits = vm_flags & VM_ACCESS_FLAGS;
        struct sgx_encl_page *page;
        unsigned long count = 0;
        int ret = 0;
@@@ -680,15 -680,11 +680,15 @@@ const struct vm_operations_struct sgx_v
  void sgx_encl_release(struct kref *ref)
  {
        struct sgx_encl *encl = container_of(ref, struct sgx_encl, refcount);
 +      unsigned long max_page_index = PFN_DOWN(encl->base + encl->size - 1);
        struct sgx_va_page *va_page;
        struct sgx_encl_page *entry;
 -      unsigned long index;
 +      unsigned long count = 0;
 +
 +      XA_STATE(xas, &encl->page_array, PFN_DOWN(encl->base));
  
 -      xa_for_each(&encl->page_array, index, entry) {
 +      xas_lock(&xas);
 +      xas_for_each(&xas, entry, max_page_index) {
                if (entry->epc_page) {
                        /*
                         * The page and its radix tree entry cannot be freed
                }
  
                kfree(entry);
 -              /* Invoke scheduler to prevent soft lockups. */
 -              cond_resched();
 +              /*
 +               * Invoke scheduler on every XA_CHECK_SCHED iteration
 +               * to prevent soft lockups.
 +               */
 +              if (!(++count % XA_CHECK_SCHED)) {
 +                      xas_pause(&xas);
 +                      xas_unlock(&xas);
 +
 +                      cond_resched();
 +
 +                      xas_lock(&xas);
 +              }
        }
 +      xas_unlock(&xas);
  
        xa_destroy(&encl->page_array);
  
diff --combined drivers/acpi/numa/hmat.c
index 6cceca64a6bcfdd723d11f741983f82c0aa2f374,139e3b41653e50d68eb0a115a71cff09807f58b4..605a0c7053bea27eb01bc7d91cdf324752bc4deb
@@@ -562,26 -562,17 +562,26 @@@ static int initiator_cmp(void *priv, co
  {
        struct memory_initiator *ia;
        struct memory_initiator *ib;
 -      unsigned long *p_nodes = priv;
  
        ia = list_entry(a, struct memory_initiator, node);
        ib = list_entry(b, struct memory_initiator, node);
  
 -      set_bit(ia->processor_pxm, p_nodes);
 -      set_bit(ib->processor_pxm, p_nodes);
 -
        return ia->processor_pxm - ib->processor_pxm;
  }
  
 +static int initiators_to_nodemask(unsigned long *p_nodes)
 +{
 +      struct memory_initiator *initiator;
 +
 +      if (list_empty(&initiators))
 +              return -ENXIO;
 +
 +      list_for_each_entry(initiator, &initiators, node)
 +              set_bit(initiator->processor_pxm, p_nodes);
 +
 +      return 0;
 +}
 +
  static void hmat_register_target_initiators(struct memory_target *target)
  {
        static DECLARE_BITMAP(p_nodes, MAX_NUMNODES);
         * initiators.
         */
        bitmap_zero(p_nodes, MAX_NUMNODES);
 -      list_sort(p_nodes, &initiators, initiator_cmp);
 +      list_sort(NULL, &initiators, initiator_cmp);
 +      if (initiators_to_nodemask(p_nodes) < 0)
 +              return;
 +
        if (!access0done) {
                for (i = WRITE_LATENCY; i <= READ_BANDWIDTH; i++) {
                        loc = localities_types[i];
  
        /* Access 1 ignores Generic Initiators */
        bitmap_zero(p_nodes, MAX_NUMNODES);
 -      list_sort(p_nodes, &initiators, initiator_cmp);
 -      best = 0;
 +      if (initiators_to_nodemask(p_nodes) < 0)
 +              return;
 +
        for (i = WRITE_LATENCY; i <= READ_BANDWIDTH; i++) {
                loc = localities_types[i];
                if (!loc)
@@@ -780,11 -767,6 +780,6 @@@ static int hmat_callback(struct notifie
        return NOTIFY_OK;
  }
  
- static struct notifier_block hmat_callback_nb = {
-       .notifier_call = hmat_callback,
-       .priority = 2,
- };
  static __init void hmat_free_structures(void)
  {
        struct memory_target *target, *tnext;
@@@ -867,7 -849,7 +862,7 @@@ static __init int hmat_init(void
        hmat_register_targets();
  
        /* Keep the table and structures if the notifier may use them */
-       if (!register_hotmemory_notifier(&hmat_callback_nb))
+       if (!hotplug_memory_notifier(hmat_callback, HMAT_CALLBACK_PRI))
                return 0;
  out_put:
        hmat_free_structures();
index 62e98f1ad770b1ec0621ef46d8739b4b7188ea35,4728be161828413bb4a312e017543952844ec6fa..bee38c326537de308e0b3bdf47bcbb1884add21b
@@@ -38,7 -38,6 +38,7 @@@
  #include "amdgpu.h"
  #include "amdgpu_display.h"
  #include "amdgpu_dma_buf.h"
 +#include "amdgpu_hmm.h"
  #include "amdgpu_xgmi.h"
  
  static const struct drm_gem_object_funcs amdgpu_gem_object_funcs;
@@@ -88,7 -87,7 +88,7 @@@ static void amdgpu_gem_object_free(stru
        struct amdgpu_bo *robj = gem_to_amdgpu_bo(gobj);
  
        if (robj) {
 -              amdgpu_mn_unregister(robj);
 +              amdgpu_hmm_unregister(robj);
                amdgpu_bo_unref(&robj);
        }
  }
@@@ -113,7 -112,7 +113,7 @@@ int amdgpu_gem_object_create(struct amd
        bp.resv = resv;
        bp.preferred_domain = initial_domain;
        bp.flags = flags;
 -      bp.domain = initial_domain;
 +      bp.domain = initial_domain | AMDGPU_GEM_DOMAIN_CPU;
        bp.bo_ptr_size = sizeof(struct amdgpu_bo);
  
        r = amdgpu_bo_create_user(adev, &bp, &ubo);
@@@ -256,7 -255,7 +256,7 @@@ static int amdgpu_gem_object_mmap(struc
         * becoming writable and makes is_cow_mapping(vm_flags) false.
         */
        if (is_cow_mapping(vma->vm_flags) &&
-           !(vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)))
+           !(vma->vm_flags & VM_ACCESS_FLAGS))
                vma->vm_flags &= ~VM_MAYWRITE;
  
        return drm_gem_ttm_mmap(obj, vma);
@@@ -332,10 -331,20 +332,10 @@@ int amdgpu_gem_create_ioctl(struct drm_
        }
  
        initial_domain = (u32)(0xffffffff & args->in.domains);
 -retry:
        r = amdgpu_gem_object_create(adev, size, args->in.alignment,
 -                                   initial_domain,
 -                                   flags, ttm_bo_type_device, resv, &gobj);
 +                                   initial_domain, flags, ttm_bo_type_device,
 +                                   resv, &gobj);
        if (r && r != -ERESTARTSYS) {
 -              if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) {
 -                      flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
 -                      goto retry;
 -              }
 -
 -              if (initial_domain == AMDGPU_GEM_DOMAIN_VRAM) {
 -                      initial_domain |= AMDGPU_GEM_DOMAIN_GTT;
 -                      goto retry;
 -              }
                DRM_DEBUG("Failed to allocate GEM object (%llu, %d, %llu, %d)\n",
                                size, initial_domain, args->in.alignment, r);
        }
@@@ -369,7 -378,6 +369,7 @@@ int amdgpu_gem_userptr_ioctl(struct drm
        struct amdgpu_device *adev = drm_to_adev(dev);
        struct drm_amdgpu_gem_userptr *args = data;
        struct drm_gem_object *gobj;
 +      struct hmm_range *range;
        struct amdgpu_bo *bo;
        uint32_t handle;
        int r;
        if (r)
                goto release_object;
  
 -      if (args->flags & AMDGPU_GEM_USERPTR_REGISTER) {
 -              r = amdgpu_mn_register(bo, args->addr);
 -              if (r)
 -                      goto release_object;
 -      }
 +      r = amdgpu_hmm_register(bo, args->addr);
 +      if (r)
 +              goto release_object;
  
        if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE) {
 -              r = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages);
 +              r = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages,
 +                                               &range);
                if (r)
                        goto release_object;
  
  
  user_pages_done:
        if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE)
 -              amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
 +              amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, range);
  
  release_object:
        drm_gem_object_put(gobj);
index 68e4446a94ad7c955d8d5ca5c05871d0952c8cc2,efe2240945d07b7aa5539e5f72b9b190620f8e90..c5ae5492e1af066e5ddcdc760fc8b13647c0efa7
@@@ -130,7 -130,7 +130,7 @@@ static int etnaviv_gem_mmap_obj(struct 
  {
        pgprot_t vm_page_prot;
  
 -      vma->vm_flags |= VM_IO | VM_MIXEDMAP | VM_DONTEXPAND | VM_DONTDUMP;
 +      vma->vm_flags |= VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP;
  
        vm_page_prot = vm_get_page_prot(vma->vm_flags);
  
@@@ -165,8 -165,7 +165,8 @@@ static vm_fault_t etnaviv_gem_fault(str
        struct vm_area_struct *vma = vmf->vma;
        struct drm_gem_object *obj = vma->vm_private_data;
        struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj);
 -      struct page **pages, *page;
 +      struct page **pages;
 +      unsigned long pfn;
        pgoff_t pgoff;
        int err;
  
        /* We don't use vmf->pgoff since that has the fake offset: */
        pgoff = (vmf->address - vma->vm_start) >> PAGE_SHIFT;
  
 -      page = pages[pgoff];
 +      pfn = page_to_pfn(pages[pgoff]);
  
        VERB("Inserting %p pfn %lx, pa %lx", (void *)vmf->address,
 -           page_to_pfn(page), page_to_pfn(page) << PAGE_SHIFT);
 +           pfn, pfn << PAGE_SHIFT);
  
 -      return vmf_insert_page(vma, vmf->address, page);
 +      return vmf_insert_pfn(vma, vmf->address, pfn);
  }
  
  int etnaviv_gem_mmap_offset(struct drm_gem_object *obj, u64 *offset)
@@@ -259,12 -258,7 +259,12 @@@ struct etnaviv_vram_mapping *etnaviv_ge
                if (mapping->use == 0) {
                        mutex_lock(&mmu_context->lock);
                        if (mapping->context == mmu_context)
 -                              mapping->use += 1;
 +                              if (va && mapping->iova != va) {
 +                                      etnaviv_iommu_reap_mapping(mapping);
 +                                      mapping = NULL;
 +                              } else {
 +                                      mapping->use += 1;
 +                              }
                        else
                                mapping = NULL;
                        mutex_unlock(&mmu_context->lock);
@@@ -510,6 -504,7 +510,6 @@@ void etnaviv_gem_free_object(struct drm
                kfree(mapping);
        }
  
 -      drm_gem_free_mmap_offset(obj);
        etnaviv_obj->ops->release(etnaviv_obj);
        drm_gem_object_release(obj);
  
@@@ -643,6 -638,7 +643,7 @@@ static int etnaviv_gem_userptr_get_page
        struct page **pvec = NULL;
        struct etnaviv_gem_userptr *userptr = &etnaviv_obj->userptr;
        int ret, pinned = 0, npages = etnaviv_obj->base.size >> PAGE_SHIFT;
+       unsigned int gup_flags = FOLL_LONGTERM;
  
        might_lock_read(&current->mm->mmap_lock);
  
        if (!pvec)
                return -ENOMEM;
  
+       if (!userptr->ro)
+               gup_flags |= FOLL_WRITE;
        do {
                unsigned num_pages = npages - pinned;
                uint64_t ptr = userptr->ptr + pinned * PAGE_SIZE;
                struct page **pages = pvec + pinned;
  
-               ret = pin_user_pages_fast(ptr, num_pages,
-                                         FOLL_WRITE | FOLL_FORCE | FOLL_LONGTERM,
-                                         pages);
+               ret = pin_user_pages_fast(ptr, num_pages, gup_flags, pages);
                if (ret < 0) {
                        unpin_user_pages(pvec, pinned);
                        kvfree(pvec);
index 89eebc3341b84708b5775fed0e434c9ad1dea9be,062e98148c5356c43344c1f7f881e2d81e96486d..0f430ddc1f6704e96dd42357eaf77f03be19793f
@@@ -14,7 -14,6 +14,7 @@@
   * get_vaddr_frames() - map virtual addresses to pfns
   * @start:    starting user address
   * @nr_frames:        number of pages / pfns from start to map
 + * @write:    the mapped address has write permission
   * @vec:      structure which receives pages / pfns of the addresses mapped.
   *            It should have space for at least nr_frames entries.
   *
   *
   * This function takes care of grabbing mmap_lock as necessary.
   */
 -int get_vaddr_frames(unsigned long start, unsigned int nr_frames,
 +int get_vaddr_frames(unsigned long start, unsigned int nr_frames, bool write,
                     struct frame_vector *vec)
  {
 -      struct mm_struct *mm = current->mm;
 -      struct vm_area_struct *vma;
 -      int ret_pin_user_pages_fast = 0;
 -      int ret = 0;
 -      int err;
 +      int ret;
-       unsigned int gup_flags = FOLL_FORCE | FOLL_LONGTERM;
++      unsigned int gup_flags = FOLL_LONGTERM;
  
        if (nr_frames == 0)
                return 0;
  
        start = untagged_addr(start);
  
 -      ret = pin_user_pages_fast(start, nr_frames,
 -                                FOLL_WRITE | FOLL_LONGTERM,
 -                                (struct page **)(vec->ptrs));
 -      if (ret > 0) {
 -              vec->got_ref = true;
 -              vec->is_pfns = false;
 -              goto out_unlocked;
 -      }
 -      ret_pin_user_pages_fast = ret;
 +      if (write)
 +              gup_flags |= FOLL_WRITE;
  
 -      mmap_read_lock(mm);
 -      vec->got_ref = false;
 -      vec->is_pfns = true;
 -      ret = 0;
 -      do {
 -              unsigned long *nums = frame_vector_pfns(vec);
 +      ret = pin_user_pages_fast(start, nr_frames, gup_flags,
 +                                (struct page **)(vec->ptrs));
 +      vec->got_ref = true;
 +      vec->is_pfns = false;
 +      vec->nr_frames = ret;
  
 -              vma = vma_lookup(mm, start);
 -              if (!vma)
 -                      break;
 +      if (likely(ret > 0))
 +              return ret;
  
 -              while (ret < nr_frames && start + PAGE_SIZE <= vma->vm_end) {
 -                      err = follow_pfn(vma, start, &nums[ret]);
 -                      if (err) {
 -                              if (ret)
 -                                      goto out;
 -                              // If follow_pfn() returns -EINVAL, then this
 -                              // is not an IO mapping or a raw PFN mapping.
 -                              // In that case, return the original error from
 -                              // pin_user_pages_fast(). Otherwise this
 -                              // function would return -EINVAL when
 -                              // pin_user_pages_fast() returned -ENOMEM,
 -                              // which makes debugging hard.
 -                              if (err == -EINVAL && ret_pin_user_pages_fast)
 -                                      ret = ret_pin_user_pages_fast;
 -                              else
 -                                      ret = err;
 -                              goto out;
 -                      }
 -                      start += PAGE_SIZE;
 -                      ret++;
 -              }
 -              /* Bail out if VMA doesn't completely cover the tail page. */
 -              if (start < vma->vm_end)
 -                      break;
 -      } while (ret < nr_frames);
 -out:
 -      mmap_read_unlock(mm);
 -out_unlocked:
 -      if (!ret)
 -              ret = -EFAULT;
 -      if (ret > 0)
 -              vec->nr_frames = ret;
 -      return ret;
 +      /* This used to (racily) return non-refcounted pfns. Let people know */
 +      WARN_ONCE(1, "get_vaddr_frames() cannot follow VM_IO mapping");
 +      vec->nr_frames = 0;
 +      return ret ? ret : -EFAULT;
  }
  EXPORT_SYMBOL(get_vaddr_frames);
  
diff --combined fs/fuse/dev.c
index c73d9c4132f67a199dc44e1b3220acceb449beed,204c332cd3433fde7f8b668ace2cb5312596486a..e8b60ce72c9ad1c429b43740686e3c63aceec55a
@@@ -764,11 -764,11 +764,11 @@@ static int fuse_copy_do(struct fuse_cop
        return ncpy;
  }
  
- static int fuse_check_page(struct page *page)
+ static int fuse_check_folio(struct folio *folio)
  {
-       if (page_mapcount(page) ||
-           page->mapping != NULL ||
-           (page->flags & PAGE_FLAGS_CHECK_AT_PREP &
+       if (folio_mapped(folio) ||
+           folio->mapping != NULL ||
+           (folio->flags & PAGE_FLAGS_CHECK_AT_PREP &
             ~(1 << PG_locked |
               1 << PG_referenced |
               1 << PG_uptodate |
               1 << PG_reclaim |
               1 << PG_waiters |
               LRU_GEN_MASK | LRU_REFS_MASK))) {
-               dump_page(page, "fuse: trying to steal weird page");
+               dump_page(&folio->page, "fuse: trying to steal weird page");
                return 1;
        }
        return 0;
  static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
  {
        int err;
-       struct page *oldpage = *pagep;
-       struct page *newpage;
+       struct folio *oldfolio = page_folio(*pagep);
+       struct folio *newfolio;
        struct pipe_buffer *buf = cs->pipebufs;
  
-       get_page(oldpage);
+       folio_get(oldfolio);
        err = unlock_request(cs->req);
        if (err)
                goto out_put_old;
        if (!pipe_buf_try_steal(cs->pipe, buf))
                goto out_fallback;
  
-       newpage = buf->page;
+       newfolio = page_folio(buf->page);
  
-       if (!PageUptodate(newpage))
-               SetPageUptodate(newpage);
+       if (!folio_test_uptodate(newfolio))
+               folio_mark_uptodate(newfolio);
  
-       ClearPageMappedToDisk(newpage);
+       folio_clear_mappedtodisk(newfolio);
  
-       if (fuse_check_page(newpage) != 0)
+       if (fuse_check_folio(newfolio) != 0)
                goto out_fallback_unlock;
  
        /*
         * This is a new and locked page, it shouldn't be mapped or
         * have any special flags on it
         */
-       if (WARN_ON(page_mapped(oldpage)))
+       if (WARN_ON(folio_mapped(oldfolio)))
                goto out_fallback_unlock;
-       if (WARN_ON(page_has_private(oldpage)))
+       if (WARN_ON(folio_has_private(oldfolio)))
                goto out_fallback_unlock;
-       if (WARN_ON(PageDirty(oldpage) || PageWriteback(oldpage)))
+       if (WARN_ON(folio_test_dirty(oldfolio) ||
+                               folio_test_writeback(oldfolio)))
                goto out_fallback_unlock;
-       if (WARN_ON(PageMlocked(oldpage)))
+       if (WARN_ON(folio_test_mlocked(oldfolio)))
                goto out_fallback_unlock;
  
-       replace_page_cache_page(oldpage, newpage);
+       replace_page_cache_folio(oldfolio, newfolio);
  
-       get_page(newpage);
+       folio_get(newfolio);
  
        if (!(buf->flags & PIPE_BUF_FLAG_LRU))
-               lru_cache_add(newpage);
+               folio_add_lru(newfolio);
  
        /*
         * Release while we have extra ref on stolen page.  Otherwise
        if (test_bit(FR_ABORTED, &cs->req->flags))
                err = -ENOENT;
        else
-               *pagep = newpage;
+               *pagep = &newfolio->page;
        spin_unlock(&cs->req->waitq.lock);
  
        if (err) {
-               unlock_page(newpage);
-               put_page(newpage);
+               folio_unlock(newfolio);
+               folio_put(newfolio);
                goto out_put_old;
        }
  
-       unlock_page(oldpage);
+       folio_unlock(oldfolio);
        /* Drop ref for ap->pages[] array */
-       put_page(oldpage);
+       folio_put(oldfolio);
        cs->len = 0;
  
        err = 0;
  out_put_old:
        /* Drop ref obtained in this function */
-       put_page(oldpage);
+       folio_put(oldfolio);
        return err;
  
  out_fallback_unlock:
-       unlock_page(newpage);
+       folio_unlock(newfolio);
  out_fallback:
        cs->pg = buf->page;
        cs->offset = buf->offset;
@@@ -1498,7 -1499,7 +1499,7 @@@ static int fuse_notify_inval_entry(stru
        buf[outarg.namelen] = 0;
  
        down_read(&fc->killsb);
 -      err = fuse_reverse_inval_entry(fc, outarg.parent, 0, &name);
 +      err = fuse_reverse_inval_entry(fc, outarg.parent, 0, &name, outarg.flags);
        up_read(&fc->killsb);
        kfree(buf);
        return err;
@@@ -1546,7 -1547,7 +1547,7 @@@ static int fuse_notify_delete(struct fu
        buf[outarg.namelen] = 0;
  
        down_read(&fc->killsb);
 -      err = fuse_reverse_inval_entry(fc, outarg.parent, outarg.child, &name);
 +      err = fuse_reverse_inval_entry(fc, outarg.parent, outarg.child, &name, 0);
        up_read(&fc->killsb);
        kfree(buf);
        return err;
@@@ -2267,7 -2268,8 +2268,7 @@@ static long fuse_dev_ioctl(struct file 
                                 * Check against file->f_op because CUSE
                                 * uses the same ioctl handler.
                                 */
 -                              if (old->f_op == file->f_op &&
 -                                  old->f_cred->user_ns == file->f_cred->user_ns)
 +                              if (old->f_op == file->f_op)
                                        fud = fuse_get_dev(old);
  
                                if (fud) {
diff --combined fs/hfs/inode.c
index a0746be3c1de75d11c44dd0d47bd8d0553881e52,16466a5e88b44bceb579333b9dd58c4e4db0bb1e..9c329a365e7502a1243aa013b72dafb120b6749b
@@@ -173,12 -173,12 +173,12 @@@ const struct address_space_operations h
        .dirty_folio    = block_dirty_folio,
        .invalidate_folio = block_invalidate_folio,
        .read_folio     = hfs_read_folio,
-       .writepage      = hfs_writepage,
        .write_begin    = hfs_write_begin,
        .write_end      = generic_write_end,
        .bmap           = hfs_bmap,
        .direct_IO      = hfs_direct_IO,
        .writepages     = hfs_writepages,
+       .migrate_folio  = buffer_migrate_folio,
  };
  
  /*
@@@ -458,8 -458,6 +458,8 @@@ int hfs_write_inode(struct inode *inode
                /* panic? */
                return -EIO;
  
 +      if (HFS_I(main_inode)->cat_key.CName.len > HFS_NAMELEN)
 +              return -EIO;
        fd.search_key->cat = HFS_I(main_inode)->cat_key;
        if (hfs_brec_find(&fd))
                /* panic? */
diff --combined fs/hfsplus/inode.c
index b675581aa9d0fe0eed8a05b79cc582d1fa0d0782,d6572ad2407a7ca3f2f67b2333858c52d84c681f..840577a0c1e7696de6775a4f88b601f98987e5b8
@@@ -170,12 -170,12 +170,12 @@@ const struct address_space_operations h
        .dirty_folio    = block_dirty_folio,
        .invalidate_folio = block_invalidate_folio,
        .read_folio     = hfsplus_read_folio,
-       .writepage      = hfsplus_writepage,
        .write_begin    = hfsplus_write_begin,
        .write_end      = generic_write_end,
        .bmap           = hfsplus_bmap,
        .direct_IO      = hfsplus_direct_IO,
        .writepages     = hfsplus_writepages,
+       .migrate_folio  = buffer_migrate_folio,
  };
  
  const struct dentry_operations hfsplus_dentry_operations = {
@@@ -192,11 -192,11 +192,11 @@@ static void hfsplus_get_perms(struct in
        mode = be16_to_cpu(perms->mode);
  
        i_uid_write(inode, be32_to_cpu(perms->owner));
 -      if (!i_uid_read(inode) && !mode)
 +      if ((test_bit(HFSPLUS_SB_UID, &sbi->flags)) || (!i_uid_read(inode) && !mode))
                inode->i_uid = sbi->uid;
  
        i_gid_write(inode, be32_to_cpu(perms->group));
 -      if (!i_gid_read(inode) && !mode)
 +      if ((test_bit(HFSPLUS_SB_GID, &sbi->flags)) || (!i_gid_read(inode) && !mode))
                inode->i_gid = sbi->gid;
  
        if (dir) {
diff --combined fs/xfs/xfs_iops.c
index 712238305bc33e866f990d7df6aeb843ad54f86d,bf0495f7a5e1c6406f07664b2bfe8a46985f4637..515318dfbc382d42863e2faa07dcb84491fafe29
@@@ -651,7 -651,6 +651,7 @@@ xfs_vn_change_ok
  static int
  xfs_setattr_nonsize(
        struct user_namespace   *mnt_userns,
 +      struct dentry           *dentry,
        struct xfs_inode        *ip,
        struct iattr            *iattr)
  {
         *           Posix ACL code seems to care about this issue either.
         */
        if (mask & ATTR_MODE) {
 -              error = posix_acl_chmod(mnt_userns, inode, inode->i_mode);
 +              error = posix_acl_chmod(mnt_userns, dentry, inode->i_mode);
                if (error)
                        return error;
        }
@@@ -780,7 -779,6 +780,7 @@@ out_dqrele
  STATIC int
  xfs_setattr_size(
        struct user_namespace   *mnt_userns,
 +      struct dentry           *dentry,
        struct xfs_inode        *ip,
        struct iattr            *iattr)
  {
                 * Use the regular setattr path to update the timestamps.
                 */
                iattr->ia_valid &= ~ATTR_SIZE;
 -              return xfs_setattr_nonsize(mnt_userns, ip, iattr);
 +              return xfs_setattr_nonsize(mnt_userns, dentry, ip, iattr);
        }
  
        /*
@@@ -989,7 -987,7 +989,7 @@@ xfs_vn_setattr_size
        error = xfs_vn_change_ok(mnt_userns, dentry, iattr);
        if (error)
                return error;
 -      return xfs_setattr_size(mnt_userns, ip, iattr);
 +      return xfs_setattr_size(mnt_userns, dentry, ip, iattr);
  }
  
  STATIC int
@@@ -1021,7 -1019,7 +1021,7 @@@ xfs_vn_setattr
  
                error = xfs_vn_change_ok(mnt_userns, dentry, iattr);
                if (!error)
 -                      error = xfs_setattr_nonsize(mnt_userns, ip, iattr);
 +                      error = xfs_setattr_nonsize(mnt_userns, dentry, ip, iattr);
        }
  
        return error;
@@@ -1103,7 -1101,7 +1103,7 @@@ xfs_vn_tmpfile
  }
  
  static const struct inode_operations xfs_inode_operations = {
 -      .get_acl                = xfs_get_acl,
 +      .get_inode_acl          = xfs_get_acl,
        .set_acl                = xfs_set_acl,
        .getattr                = xfs_vn_getattr,
        .setattr                = xfs_vn_setattr,
@@@ -1130,7 -1128,7 +1130,7 @@@ static const struct inode_operations xf
        .rmdir                  = xfs_vn_unlink,
        .mknod                  = xfs_vn_mknod,
        .rename                 = xfs_vn_rename,
 -      .get_acl                = xfs_get_acl,
 +      .get_inode_acl          = xfs_get_acl,
        .set_acl                = xfs_set_acl,
        .getattr                = xfs_vn_getattr,
        .setattr                = xfs_vn_setattr,
@@@ -1157,7 -1155,7 +1157,7 @@@ static const struct inode_operations xf
        .rmdir                  = xfs_vn_unlink,
        .mknod                  = xfs_vn_mknod,
        .rename                 = xfs_vn_rename,
 -      .get_acl                = xfs_get_acl,
 +      .get_inode_acl          = xfs_get_acl,
        .set_acl                = xfs_set_acl,
        .getattr                = xfs_vn_getattr,
        .setattr                = xfs_vn_setattr,
@@@ -1187,10 -1185,6 +1187,6 @@@ xfs_inode_supports_dax
        if (!S_ISREG(VFS_I(ip)->i_mode))
                return false;
  
-       /* Only supported on non-reflinked files. */
-       if (xfs_is_reflink_inode(ip))
-               return false;
        /* Block size must match page size */
        if (mp->m_sb.sb_blocksize != PAGE_SIZE)
                return false;
diff --combined include/linux/damon.h
index 84525b9cdf6eb0989a0c9eb0174dae649dc50bd8,35630634d79049e5cf872bd268553583ca15c866..ad15a5b88e3a499cf0f1c2ffd1a6773a843fad25
@@@ -21,7 -21,7 +21,7 @@@
  /* Get a random number in [l, r) */
  static inline unsigned long damon_rand(unsigned long l, unsigned long r)
  {
 -      return l + prandom_u32_max(r - l);
 +      return l + get_random_u32_below(r - l);
  }
  
  /**
@@@ -357,6 -357,7 +357,7 @@@ struct damon_operations 
   * @after_wmarks_check:       Called after each schemes' watermarks check.
   * @after_sampling:   Called after each sampling.
   * @after_aggregation:        Called after each aggregation.
+  * @before_damos_apply:       Called before applying DAMOS action.
   * @before_terminate: Called before terminating the monitoring.
   * @private:          User private data.
   *
@@@ -385,6 -386,10 +386,10 @@@ struct damon_callback 
        int (*after_wmarks_check)(struct damon_ctx *context);
        int (*after_sampling)(struct damon_ctx *context);
        int (*after_aggregation)(struct damon_ctx *context);
+       int (*before_damos_apply)(struct damon_ctx *context,
+                       struct damon_target *target,
+                       struct damon_region *region,
+                       struct damos *scheme);
        void (*before_terminate)(struct damon_ctx *context);
  };
  
diff --combined include/linux/mm.h
index 6a05a3bc0a287270f1f85190a4ac4d4bf187b89c,7dc376052d4005882389bfc8ecb408e1dfca0372..8178fe894e2e1da4a9f6f0bb8e7f2995f5730f18
@@@ -74,6 -74,7 +74,7 @@@ static inline void totalram_pages_add(l
  
  extern void * high_memory;
  extern int page_cluster;
+ extern const int page_cluster_max;
  
  #ifdef CONFIG_SYSCTL
  extern int sysctl_legacy_va_layout;
@@@ -549,7 -550,7 +550,7 @@@ struct vm_operations_struct 
        /*
         * Called by mprotect() to make driver-specific permission
         * checks before mprotect() is finalised.   The VMA must not
-        * be modified.  Returns 0 if eprotect() can proceed.
+        * be modified.  Returns 0 if mprotect() can proceed.
         */
        int (*mprotect)(struct vm_area_struct *vma, unsigned long start,
                        unsigned long end, unsigned long newflags);
@@@ -699,8 -700,10 +700,10 @@@ static inline unsigned long vma_iter_ad
   * paths in userfault.
   */
  bool vma_is_shmem(struct vm_area_struct *vma);
+ bool vma_is_anon_shmem(struct vm_area_struct *vma);
  #else
  static inline bool vma_is_shmem(struct vm_area_struct *vma) { return false; }
+ static inline bool vma_is_anon_shmem(struct vm_area_struct *vma) { return false; }
  #endif
  
  int vma_is_stack_for_current(struct vm_area_struct *vma);
@@@ -817,8 -820,8 +820,8 @@@ static inline int is_vmalloc_or_module_
  /*
   * How many times the entire folio is mapped as a single unit (eg by a
   * PMD or PUD entry).  This is probably not what you want, except for
-  * debugging purposes; look at folio_mapcount() or page_mapcount()
-  * instead.
+  * debugging purposes - it does not include PTE-mapped sub-pages; look
+  * at folio_mapcount() or page_mapcount() or total_mapcount() instead.
   */
  static inline int folio_entire_mapcount(struct folio *folio)
  {
  
  /*
   * Mapcount of compound page as a whole, does not include mapped sub-pages.
-  *
-  * Must be called only for compound pages.
+  * Must be called only on head of compound page.
+  */
+ static inline int head_compound_mapcount(struct page *head)
+ {
+       return atomic_read(compound_mapcount_ptr(head)) + 1;
+ }
+ /*
+  * If a 16GB hugetlb page were mapped by PTEs of all of its 4kB sub-pages,
+  * its subpages_mapcount would be 0x400000: choose the COMPOUND_MAPPED bit
+  * above that range, instead of 2*(PMD_SIZE/PAGE_SIZE).  Hugetlb currently
+  * leaves subpages_mapcount at 0, but avoid surprise if it participates later.
   */
- static inline int compound_mapcount(struct page *page)
+ #define COMPOUND_MAPPED       0x800000
+ #define SUBPAGES_MAPPED       (COMPOUND_MAPPED - 1)
+ /*
+  * Number of sub-pages mapped by PTE, does not include compound mapcount.
+  * Must be called only on head of compound page.
+  */
+ static inline int head_subpages_mapcount(struct page *head)
  {
-       return folio_entire_mapcount(page_folio(page));
+       return atomic_read(subpages_mapcount_ptr(head)) & SUBPAGES_MAPPED;
  }
  
  /*
@@@ -846,11 -866,9 +866,9 @@@ static inline void page_mapcount_reset(
        atomic_set(&(page)->_mapcount, -1);
  }
  
- int __page_mapcount(struct page *page);
  /*
   * Mapcount of 0-order page; when compound sub-page, includes
-  * compound_mapcount().
+  * compound_mapcount of compound_head of page.
   *
   * Result is undefined for pages which cannot be mapped into userspace.
   * For example SLAB or special types of pages. See function page_has_type().
   */
  static inline int page_mapcount(struct page *page)
  {
-       if (unlikely(PageCompound(page)))
-               return __page_mapcount(page);
-       return atomic_read(&page->_mapcount) + 1;
+       int mapcount = atomic_read(&page->_mapcount) + 1;
+       if (likely(!PageCompound(page)))
+               return mapcount;
+       page = compound_head(page);
+       return head_compound_mapcount(page) + mapcount;
  }
  
- int folio_mapcount(struct folio *folio);
+ int total_compound_mapcount(struct page *head);
  
- #ifdef CONFIG_TRANSPARENT_HUGEPAGE
- static inline int total_mapcount(struct page *page)
+ /**
+  * folio_mapcount() - Calculate the number of mappings of this folio.
+  * @folio: The folio.
+  *
+  * A large folio tracks both how many times the entire folio is mapped,
+  * and how many times each individual page in the folio is mapped.
+  * This function calculates the total number of times the folio is
+  * mapped.
+  *
+  * Return: The number of times this folio is mapped.
+  */
+ static inline int folio_mapcount(struct folio *folio)
  {
-       return folio_mapcount(page_folio(page));
+       if (likely(!folio_test_large(folio)))
+               return atomic_read(&folio->_mapcount) + 1;
+       return total_compound_mapcount(&folio->page);
  }
  
- #else
  static inline int total_mapcount(struct page *page)
  {
-       return page_mapcount(page);
+       if (likely(!PageCompound(page)))
+               return atomic_read(&page->_mapcount) + 1;
+       return total_compound_mapcount(compound_head(page));
+ }
+ static inline bool folio_large_is_mapped(struct folio *folio)
+ {
+       /*
+        * Reading folio_mapcount_ptr() below could be omitted if hugetlb
+        * participated in incrementing subpages_mapcount when compound mapped.
+        */
+       return atomic_read(folio_subpages_mapcount_ptr(folio)) > 0 ||
+               atomic_read(folio_mapcount_ptr(folio)) >= 0;
+ }
+ /**
+  * folio_mapped - Is this folio mapped into userspace?
+  * @folio: The folio.
+  *
+  * Return: True if any page in this folio is referenced by user page tables.
+  */
+ static inline bool folio_mapped(struct folio *folio)
+ {
+       if (likely(!folio_test_large(folio)))
+               return atomic_read(&folio->_mapcount) >= 0;
+       return folio_large_is_mapped(folio);
+ }
+ /*
+  * Return true if this page is mapped into pagetables.
+  * For compound page it returns true if any sub-page of compound page is mapped,
+  * even if this particular sub-page is not itself mapped by any PTE or PMD.
+  */
+ static inline bool page_mapped(struct page *page)
+ {
+       if (likely(!PageCompound(page)))
+               return atomic_read(&page->_mapcount) >= 0;
+       return folio_large_is_mapped(page_folio(page));
  }
- #endif
  
  static inline struct page *virt_to_head_page(const void *x)
  {
@@@ -929,6 -997,13 +997,13 @@@ static inline void set_compound_page_dt
        page[1].compound_dtor = compound_dtor;
  }
  
+ static inline void folio_set_compound_dtor(struct folio *folio,
+               enum compound_dtor_id compound_dtor)
+ {
+       VM_BUG_ON_FOLIO(compound_dtor >= NR_COMPOUND_DTORS, folio);
+       folio->_folio_dtor = compound_dtor;
+ }
  void destroy_large_folio(struct folio *folio);
  
  static inline int head_compound_pincount(struct page *head)
@@@ -944,6 -1019,22 +1019,22 @@@ static inline void set_compound_order(s
  #endif
  }
  
+ /*
+  * folio_set_compound_order is generally passed a non-zero order to
+  * initialize a large folio.  However, hugetlb code abuses this by
+  * passing in zero when 'dissolving' a large folio.
+  */
+ static inline void folio_set_compound_order(struct folio *folio,
+               unsigned int order)
+ {
+       VM_BUG_ON_FOLIO(!folio_test_large(folio), folio);
+       folio->_folio_order = order;
+ #ifdef CONFIG_64BIT
+       folio->_folio_nr_pages = order ? 1U << order : 0;
+ #endif
+ }
  /* Returns the number of pages in this potentially compound page. */
  static inline unsigned long compound_nr(struct page *page)
  {
@@@ -1129,7 -1220,7 +1220,7 @@@ static inline void get_page(struct pag
        folio_get(page_folio(page));
  }
  
 -bool __must_check try_grab_page(struct page *page, unsigned int flags);
 +int __must_check try_grab_page(struct page *page, unsigned int flags);
  
  static inline __must_check bool try_get_page(struct page *page)
  {
@@@ -1179,7 -1270,24 +1270,24 @@@ static inline void folio_put_refs(struc
                __folio_put(folio);
  }
  
- void release_pages(struct page **pages, int nr);
+ /**
+  * release_pages - release an array of pages or folios
+  *
+  * This just releases a simple array of multiple pages, and
+  * accepts various different forms of said page array: either
+  * a regular old boring array of pages, an array of folios, or
+  * an array of encoded page pointers.
+  *
+  * The transparent union syntax for this kind of "any of these
+  * argument types" is all kinds of ugly, so look away.
+  */
+ typedef union {
+       struct page **pages;
+       struct folio **folios;
+       struct encoded_page **encoded_pages;
+ } release_pages_arg __attribute__ ((__transparent_union__));
+ void release_pages(release_pages_arg, int nr);
  
  /**
   * folios_put - Decrement the reference count on an array of folios.
   */
  static inline void folios_put(struct folio **folios, unsigned int nr)
  {
-       release_pages((struct page **)folios, nr);
+       release_pages(folios, nr);
  }
  
  static inline void put_page(struct page *page)
@@@ -1799,9 -1907,6 +1907,6 @@@ static inline pgoff_t page_index(struc
        return page->index;
  }
  
- bool page_mapped(struct page *page);
- bool folio_mapped(struct folio *folio);
  /*
   * Return true only if the page has been allocated with
   * ALLOC_NO_WATERMARKS and the low watermark was not
@@@ -2025,6 -2130,22 +2130,22 @@@ extern unsigned long move_page_tables(s
  #define  MM_CP_UFFD_WP_ALL                 (MM_CP_UFFD_WP | \
                                            MM_CP_UFFD_WP_RESOLVE)
  
+ int vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot);
+ static inline bool vma_wants_manual_pte_write_upgrade(struct vm_area_struct *vma)
+ {
+       /*
+        * We want to check manually if we can change individual PTEs writable
+        * if we can't do that automatically for all PTEs in a mapping. For
+        * private mappings, that's always the case when we have write
+        * permissions as we properly have to handle COW.
+        */
+       if (vma->vm_flags & VM_SHARED)
+               return vma_wants_writenotify(vma, vma->vm_page_prot);
+       return !!(vma->vm_flags & VM_WRITE);
+ }
+ bool can_change_pte_writable(struct vm_area_struct *vma, unsigned long addr,
+                            pte_t pte);
  extern unsigned long change_protection(struct mmu_gather *tlb,
                              struct vm_area_struct *vma, unsigned long start,
                              unsigned long end, pgprot_t newprot,
@@@ -2051,40 -2172,30 +2172,30 @@@ static inline bool get_user_page_fast_o
   */
  static inline unsigned long get_mm_counter(struct mm_struct *mm, int member)
  {
-       long val = atomic_long_read(&mm->rss_stat.count[member]);
- #ifdef SPLIT_RSS_COUNTING
-       /*
-        * counter is updated in asynchronous manner and may go to minus.
-        * But it's never be expected number for users.
-        */
-       if (val < 0)
-               val = 0;
- #endif
-       return (unsigned long)val;
+       return percpu_counter_read_positive(&mm->rss_stat[member]);
  }
  
- void mm_trace_rss_stat(struct mm_struct *mm, int member, long count);
+ void mm_trace_rss_stat(struct mm_struct *mm, int member);
  
  static inline void add_mm_counter(struct mm_struct *mm, int member, long value)
  {
-       long count = atomic_long_add_return(value, &mm->rss_stat.count[member]);
+       percpu_counter_add(&mm->rss_stat[member], value);
  
-       mm_trace_rss_stat(mm, member, count);
+       mm_trace_rss_stat(mm, member);
  }
  
  static inline void inc_mm_counter(struct mm_struct *mm, int member)
  {
-       long count = atomic_long_inc_return(&mm->rss_stat.count[member]);
+       percpu_counter_inc(&mm->rss_stat[member]);
  
-       mm_trace_rss_stat(mm, member, count);
+       mm_trace_rss_stat(mm, member);
  }
  
  static inline void dec_mm_counter(struct mm_struct *mm, int member)
  {
-       long count = atomic_long_dec_return(&mm->rss_stat.count[member]);
+       percpu_counter_dec(&mm->rss_stat[member]);
  
-       mm_trace_rss_stat(mm, member, count);
+       mm_trace_rss_stat(mm, member);
  }
  
  /* Optimized variant when page is already known not to be PageAnon */
@@@ -2174,8 -2285,6 +2285,6 @@@ static inline int pte_devmap(pte_t pte
  }
  #endif
  
- int vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot);
  extern pte_t *__get_locked_pte(struct mm_struct *mm, unsigned long addr,
                               spinlock_t **ptl);
  static inline pte_t *get_locked_pte(struct mm_struct *mm, unsigned long addr,
@@@ -2424,7 -2533,7 +2533,7 @@@ static inline void pgtable_pte_page_dto
  
  #if USE_SPLIT_PMD_PTLOCKS
  
- static struct page *pmd_to_page(pmd_t *pmd)
+ static inline struct page *pmd_pgtable_page(pmd_t *pmd)
  {
        unsigned long mask = ~(PTRS_PER_PMD * sizeof(pmd_t) - 1);
        return virt_to_page((void *)((unsigned long) pmd & mask));
  
  static inline spinlock_t *pmd_lockptr(struct mm_struct *mm, pmd_t *pmd)
  {
-       return ptlock_ptr(pmd_to_page(pmd));
+       return ptlock_ptr(pmd_pgtable_page(pmd));
  }
  
  static inline bool pmd_ptlock_init(struct page *page)
@@@ -2451,7 -2560,7 +2560,7 @@@ static inline void pmd_ptlock_free(stru
        ptlock_free(page);
  }
  
- #define pmd_huge_pte(mm, pmd) (pmd_to_page(pmd)->pmd_huge_pte)
+ #define pmd_huge_pte(mm, pmd) (pmd_pgtable_page(pmd)->pmd_huge_pte)
  
  #else
  
@@@ -2971,7 -3080,6 +3080,6 @@@ struct page *follow_page(struct vm_area
                                 * and return without waiting upon it */
  #define FOLL_NOFAULT  0x80    /* do not fault in pages */
  #define FOLL_HWPOISON 0x100   /* check page is hwpoisoned */
- #define FOLL_MIGRATION        0x400   /* wait for page to replace migration entry */
  #define FOLL_TRIED    0x800   /* a retry, previous pass started an IO */
  #define FOLL_REMOTE   0x2000  /* we are working on non-current tsk/mm */
  #define FOLL_ANON     0x8000  /* don't do file mappings */
  #define FOLL_SPLIT_PMD        0x20000 /* split huge pmd before returning */
  #define FOLL_PIN      0x40000 /* pages must be released via unpin_user_page */
  #define FOLL_FAST_ONLY        0x80000 /* gup_fast: prevent fall-back to slow gup */
 +#define FOLL_PCI_P2PDMA       0x100000 /* allow returning PCI P2PDMA pages */
  
  /*
   * FOLL_PIN and FOLL_LONGTERM may be used in various combinations with each
@@@ -3064,8 -3171,12 +3172,12 @@@ static inline int vm_fault_to_errno(vm_
   * Must be called with the (sub)page that's actually referenced via the
   * page table entry, which might not necessarily be the head page for a
   * PTE-mapped THP.
+  *
+  * If the vma is NULL, we're coming from the GUP-fast path and might have
+  * to fallback to the slow path just to lookup the vma.
   */
- static inline bool gup_must_unshare(unsigned int flags, struct page *page)
+ static inline bool gup_must_unshare(struct vm_area_struct *vma,
+                                   unsigned int flags, struct page *page)
  {
        /*
         * FOLL_WRITE is implicitly handled correctly as the page table entry
         * Note: PageAnon(page) is stable until the page is actually getting
         * freed.
         */
-       if (!PageAnon(page))
-               return false;
+       if (!PageAnon(page)) {
+               /*
+                * We only care about R/O long-term pining: R/O short-term
+                * pinning does not have the semantics to observe successive
+                * changes through the process page tables.
+                */
+               if (!(flags & FOLL_LONGTERM))
+                       return false;
+               /* We really need the vma ... */
+               if (!vma)
+                       return true;
+               /*
+                * ... because we only care about writable private ("COW")
+                * mappings where we have to break COW early.
+                */
+               return is_cow_mapping(vma->vm_flags);
+       }
  
        /* Paired with a memory barrier in page_try_share_anon_rmap(). */
        if (IS_ENABLED(CONFIG_HAVE_FAST_GUP))
@@@ -3255,6 -3383,8 +3384,8 @@@ void *sparse_buffer_alloc(unsigned lon
  struct page * __populate_section_memmap(unsigned long pfn,
                unsigned long nr_pages, int nid, struct vmem_altmap *altmap,
                struct dev_pagemap *pgmap);
+ void pmd_init(void *addr);
+ void pud_init(void *addr);
  pgd_t *vmemmap_pgd_populate(unsigned long addr, int node);
  p4d_t *vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node);
  pud_t *vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node);
@@@ -3266,8 -3396,14 +3397,14 @@@ struct vmem_altmap
  void *vmemmap_alloc_block_buf(unsigned long size, int node,
                              struct vmem_altmap *altmap);
  void vmemmap_verify(pte_t *, int, unsigned long, unsigned long);
+ void vmemmap_set_pmd(pmd_t *pmd, void *p, int node,
+                    unsigned long addr, unsigned long next);
+ int vmemmap_check_pmd(pmd_t *pmd, int node,
+                     unsigned long addr, unsigned long next);
  int vmemmap_populate_basepages(unsigned long start, unsigned long end,
                               int node, struct vmem_altmap *altmap);
+ int vmemmap_populate_hugepages(unsigned long start, unsigned long end,
+                              int node, struct vmem_altmap *altmap);
  int vmemmap_populate(unsigned long start, unsigned long end, int node,
                struct vmem_altmap *altmap);
  void vmemmap_populate_print_last(void);
@@@ -3290,7 -3426,6 +3427,6 @@@ enum mf_flags 
  int mf_dax_kill_procs(struct address_space *mapping, pgoff_t index,
                      unsigned long count, int mf_flags);
  extern int memory_failure(unsigned long pfn, int flags);
- extern void memory_failure_queue(unsigned long pfn, int flags);
  extern void memory_failure_queue_kick(int cpu);
  extern int unpoison_memory(unsigned long pfn);
  extern int sysctl_memory_failure_early_kill;
@@@ -3299,12 -3434,42 +3435,42 @@@ extern void shake_page(struct page *p)
  extern atomic_long_t num_poisoned_pages __read_mostly;
  extern int soft_offline_page(unsigned long pfn, int flags);
  #ifdef CONFIG_MEMORY_FAILURE
- extern int __get_huge_page_for_hwpoison(unsigned long pfn, int flags);
+ extern void memory_failure_queue(unsigned long pfn, int flags);
+ extern int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
+                                       bool *migratable_cleared);
+ void num_poisoned_pages_inc(unsigned long pfn);
+ void num_poisoned_pages_sub(unsigned long pfn, long i);
  #else
- static inline int __get_huge_page_for_hwpoison(unsigned long pfn, int flags)
+ static inline void memory_failure_queue(unsigned long pfn, int flags)
+ {
+ }
+ static inline int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
+                                       bool *migratable_cleared)
  {
        return 0;
  }
+ static inline void num_poisoned_pages_inc(unsigned long pfn)
+ {
+ }
+ static inline void num_poisoned_pages_sub(unsigned long pfn, long i)
+ {
+ }
+ #endif
+ #if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_MEMORY_HOTPLUG)
+ extern void memblk_nr_poison_inc(unsigned long pfn);
+ extern void memblk_nr_poison_sub(unsigned long pfn, long i);
+ #else
+ static inline void memblk_nr_poison_inc(unsigned long pfn)
+ {
+ }
+ static inline void memblk_nr_poison_sub(unsigned long pfn, long i)
+ {
+ }
  #endif
  
  #ifndef arch_memory_failure
diff --combined include/linux/mmzone.h
index 9c49ec5d0e254802018bce581baa5c8f8665a50a,128f3cde800cb5a6f01e525c7aaa41daeb149486..cd28a100d9e4f7fd8cfe16068aab68a914d80729
@@@ -986,25 -986,6 +986,25 @@@ static inline bool is_zone_device_page(
  {
        return page_zonenum(page) == ZONE_DEVICE;
  }
 +
 +/*
 + * Consecutive zone device pages should not be merged into the same sgl
 + * or bvec segment with other types of pages or if they belong to different
 + * pgmaps. Otherwise getting the pgmap of a given segment is not possible
 + * without scanning the entire segment. This helper returns true either if
 + * both pages are not zone device pages or both pages are zone device pages
 + * with the same pgmap.
 + */
 +static inline bool zone_device_pages_have_same_pgmap(const struct page *a,
 +                                                   const struct page *b)
 +{
 +      if (is_zone_device_page(a) != is_zone_device_page(b))
 +              return false;
 +      if (!is_zone_device_page(a))
 +              return true;
 +      return a->pgmap == b->pgmap;
 +}
 +
  extern void memmap_init_zone_device(struct zone *, unsigned long,
                                    unsigned long, struct dev_pagemap *);
  #else
@@@ -1012,11 -993,6 +1012,11 @@@ static inline bool is_zone_device_page(
  {
        return false;
  }
 +static inline bool zone_device_pages_have_same_pgmap(const struct page *a,
 +                                                   const struct page *b)
 +{
 +      return true;
 +}
  #endif
  
  static inline bool folio_is_zone_device(const struct folio *folio)
@@@ -1224,7 -1200,7 +1224,7 @@@ typedef struct pglist_data 
        /* start time in ms of current promote threshold adjustment period */
        unsigned int nbp_th_start;
        /*
-        * number of promote candidate pages at stat time of current promote
+        * number of promote candidate pages at start time of current promote
         * threshold adjustment period
         */
        unsigned long nbp_th_nr_cand;
diff --combined include/linux/sched.h
index 5affff14993dff327b6a3e79b6bbc74031e5d687,079d299fa4654b682a34b8c70a00d3446dd9d5dc..853d08f7562bdaae9119ff2b9b78522ca561ee48
@@@ -870,9 -870,6 +870,6 @@@ struct task_struct 
        struct mm_struct                *mm;
        struct mm_struct                *active_mm;
  
- #ifdef SPLIT_RSS_COUNTING
-       struct task_rss_stat            rss_stat;
- #endif
        int                             exit_state;
        int                             exit_code;
        int                             exit_signal;
        unsigned                        sched_reset_on_fork:1;
        unsigned                        sched_contributes_to_load:1;
        unsigned                        sched_migrated:1;
 -#ifdef CONFIG_PSI
 -      unsigned                        sched_psi_wake_requeue:1;
 -#endif
  
        /* Force alignment to the next boundary: */
        unsigned                        :0;
        unsigned int                    futex_state;
  #endif
  #ifdef CONFIG_PERF_EVENTS
 -      struct perf_event_context       *perf_event_ctxp[perf_nr_task_contexts];
 +      struct perf_event_context       *perf_event_ctxp;
        struct mutex                    perf_event_mutex;
        struct list_head                perf_event_list;
  #endif
diff --combined kernel/cgroup/cpuset.c
index 589827ccda8b992f03510702b2de92db0140b597,3ea2e836e93eba39541f0f2ea1c3831119d94284..a29c0b13706bb0b8230596f2ef4bf4fbe4e2666c
@@@ -550,15 -550,11 +550,15 @@@ static void guarantee_online_mems(struc
  /*
   * update task's spread flag if cpuset's page/slab spread flag is set
   *
 - * Call with callback_lock or cpuset_rwsem held.
 + * Call with callback_lock or cpuset_rwsem held. The check can be skipped
 + * if on default hierarchy.
   */
 -static void cpuset_update_task_spread_flag(struct cpuset *cs,
 +static void cpuset_update_task_spread_flags(struct cpuset *cs,
                                        struct task_struct *tsk)
  {
 +      if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys))
 +              return;
 +
        if (is_spread_page(cs))
                task_set_spread_page(tsk);
        else
@@@ -2157,7 -2153,7 +2157,7 @@@ static void update_tasks_flags(struct c
  
        css_task_iter_start(&cs->css, 0, &it);
        while ((task = css_task_iter_next(&it)))
 -              cpuset_update_task_spread_flag(cs, task);
 +              cpuset_update_task_spread_flags(cs, task);
        css_task_iter_end(&it);
  }
  
@@@ -2513,28 -2509,12 +2513,28 @@@ static void cpuset_attach(struct cgroup
        struct cgroup_subsys_state *css;
        struct cpuset *cs;
        struct cpuset *oldcs = cpuset_attach_old_cs;
 +      bool cpus_updated, mems_updated;
  
        cgroup_taskset_first(tset, &css);
        cs = css_cs(css);
  
        lockdep_assert_cpus_held();     /* see cgroup_attach_lock() */
        percpu_down_write(&cpuset_rwsem);
 +      cpus_updated = !cpumask_equal(cs->effective_cpus,
 +                                    oldcs->effective_cpus);
 +      mems_updated = !nodes_equal(cs->effective_mems, oldcs->effective_mems);
 +
 +      /*
 +       * In the default hierarchy, enabling cpuset in the child cgroups
 +       * will trigger a number of cpuset_attach() calls with no change
 +       * in effective cpus and mems. In that case, we can optimize out
 +       * by skipping the task iteration and update.
 +       */
 +      if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
 +          !cpus_updated && !mems_updated) {
 +              cpuset_attach_nodemask_to = cs->effective_mems;
 +              goto out;
 +      }
  
        guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
  
                WARN_ON_ONCE(set_cpus_allowed_ptr(task, cpus_attach));
  
                cpuset_change_task_nodemask(task, &cpuset_attach_nodemask_to);
 -              cpuset_update_task_spread_flag(cs, task);
 +              cpuset_update_task_spread_flags(cs, task);
        }
  
        /*
         * Change mm for all threadgroup leaders. This is expensive and may
 -       * sleep and should be moved outside migration path proper.
 +       * sleep and should be moved outside migration path proper. Skip it
 +       * if there is no change in effective_mems and CS_MEMORY_MIGRATE is
 +       * not set.
         */
        cpuset_attach_nodemask_to = cs->effective_mems;
 +      if (!is_memory_migrate(cs) && !mems_updated)
 +              goto out;
 +
        cgroup_taskset_for_each_leader(leader, css, tset) {
                struct mm_struct *mm = get_task_mm(leader);
  
                }
        }
  
 +out:
        cs->old_mems_allowed = cpuset_attach_nodemask_to;
  
        cs->attach_in_progress--;
@@@ -3072,15 -3046,11 +3072,15 @@@ static struct cftype dfl_files[] = 
  };
  
  
 -/*
 - *    cpuset_css_alloc - allocate a cpuset css
 - *    cgrp:   control group that the new cpuset will be part of
 +/**
 + * cpuset_css_alloc - Allocate a cpuset css
 + * @parent_css: Parent css of the control group that the new cpuset will be
 + *              part of
 + * Return: cpuset css on success, -ENOMEM on failure.
 + *
 + * Allocate and initialize a new cpuset css, for non-NULL @parent_css, return
 + * top cpuset css otherwise.
   */
 -
  static struct cgroup_subsys_state *
  cpuset_css_alloc(struct cgroup_subsys_state *parent_css)
  {
@@@ -3660,11 -3630,6 +3660,6 @@@ static int cpuset_track_online_nodes(st
        return NOTIFY_OK;
  }
  
- static struct notifier_block cpuset_track_online_nodes_nb = {
-       .notifier_call = cpuset_track_online_nodes,
-       .priority = 10,         /* ??! */
- };
  /**
   * cpuset_init_smp - initialize cpus_allowed
   *
@@@ -3682,7 -3647,7 +3677,7 @@@ void __init cpuset_init_smp(void
        cpumask_copy(top_cpuset.effective_cpus, cpu_active_mask);
        top_cpuset.effective_mems = node_states[N_MEMORY];
  
-       register_hotmemory_notifier(&cpuset_track_online_nodes_nb);
+       hotplug_memory_notifier(cpuset_track_online_nodes, CPUSET_CALLBACK_PRI);
  
        cpuset_migrate_mm_wq = alloc_ordered_workqueue("cpuset_migrate_mm", 0);
        BUG_ON(!cpuset_migrate_mm_wq);
diff --combined kernel/fork.c
index 89b8b6c085921a605c0633de4e8bd0af589f788a,1be4c4ab7f3e777b6a10bcd70a94c0ab421e3190..7a08025d2c99ce904920151e701cdcf9f0233519
@@@ -75,6 -75,7 +75,6 @@@
  #include <linux/freezer.h>
  #include <linux/delayacct.h>
  #include <linux/taskstats_kern.h>
 -#include <linux/random.h>
  #include <linux/tty.h>
  #include <linux/fs_struct.h>
  #include <linux/magic.h>
@@@ -96,7 -97,6 +96,7 @@@
  #include <linux/scs.h>
  #include <linux/io_uring.h>
  #include <linux/bpf.h>
 +#include <linux/stackprotector.h>
  
  #include <asm/pgalloc.h>
  #include <linux/uaccess.h>
@@@ -535,9 -535,6 +535,9 @@@ void put_task_stack(struct task_struct 
  
  void free_task(struct task_struct *tsk)
  {
 +#ifdef CONFIG_SECCOMP
 +      WARN_ON_ONCE(tsk->seccomp.filter);
 +#endif
        release_user_cpus_ptr(tsk);
        scs_release(tsk);
  
@@@ -756,8 -753,13 +756,13 @@@ static void check_mm(struct mm_struct *
                         "Please make sure 'struct resident_page_types[]' is updated as well");
  
        for (i = 0; i < NR_MM_COUNTERS; i++) {
-               long x = atomic_long_read(&mm->rss_stat.count[i]);
+               long x = percpu_counter_sum(&mm->rss_stat[i]);
  
+               if (likely(!x))
+                       continue;
+               /* Making sure this is not due to race with CPU offlining. */
+               x = percpu_counter_sum_all(&mm->rss_stat[i]);
                if (unlikely(x))
                        pr_alert("BUG: Bad rss-counter state mm:%p type:%s val:%ld\n",
                                 mm, resident_page_types[i], x);
   */
  void __mmdrop(struct mm_struct *mm)
  {
+       int i;
        BUG_ON(mm == &init_mm);
        WARN_ON_ONCE(mm == current->mm);
        WARN_ON_ONCE(mm == current->active_mm);
        check_mm(mm);
        put_user_ns(mm->user_ns);
        mm_pasid_drop(mm);
+       for (i = 0; i < NR_MM_COUNTERS; i++)
+               percpu_counter_destroy(&mm->rss_stat[i]);
        free_mm(mm);
  }
  EXPORT_SYMBOL_GPL(__mmdrop);
@@@ -1110,6 -1117,8 +1120,8 @@@ static void mm_init_uprobes_state(struc
  static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
        struct user_namespace *user_ns)
  {
+       int i;
        mt_init_flags(&mm->mm_mt, MM_MT_FLAGS);
        mt_set_external_lock(&mm->mm_mt, &mm->mmap_lock);
        atomic_set(&mm->mm_users, 1);
        if (init_new_context(p, mm))
                goto fail_nocontext;
  
+       for (i = 0; i < NR_MM_COUNTERS; i++)
+               if (percpu_counter_init(&mm->rss_stat[i], 0, GFP_KERNEL_ACCOUNT))
+                       goto fail_pcpu;
        mm->user_ns = get_user_ns(user_ns);
        lru_gen_init_mm(mm);
        return mm;
  
+ fail_pcpu:
+       while (i > 0)
+               percpu_counter_destroy(&mm->rss_stat[--i]);
  fail_nocontext:
        mm_free_pgd(mm);
  fail_nopgd:
@@@ -2046,6 -2062,15 +2065,6 @@@ static __latent_entropy struct task_str
                        return ERR_PTR(-EINVAL);
        }
  
 -      /*
 -       * If the new process will be in a different time namespace
 -       * do not allow it to share VM or a thread group with the forking task.
 -       */
 -      if (clone_flags & (CLONE_THREAD | CLONE_VM)) {
 -              if (nsp->time_ns != nsp->time_ns_for_children)
 -                      return ERR_PTR(-EINVAL);
 -      }
 -
        if (clone_flags & CLONE_PIDFD) {
                /*
                 * - CLONE_DETACHED is blocked so that we can potentially
  
        spin_lock(&current->sighand->siglock);
  
 -      /*
 -       * Copy seccomp details explicitly here, in case they were changed
 -       * before holding sighand lock.
 -       */
 -      copy_seccomp(p);
 -
        rv_task_fork(p);
  
        rseq_fork(p, clone_flags);
                goto bad_fork_cancel_cgroup;
        }
  
 +      /* No more failure paths after this point. */
 +
 +      /*
 +       * Copy seccomp details explicitly here, in case they were changed
 +       * before holding sighand lock.
 +       */
 +      copy_seccomp(p);
 +
        init_task_pid_links(p);
        if (likely(p->pid)) {
                ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace);
diff --combined kernel/sysctl.c
index a4d8e19e231a5cf3ff68a4834878e8f70a6ea5b3,71a4350ac601b31a838a686c4f15346f4ffd77a3..137d4abe3eda11a6f891920c59fd46ea3f5614f7
@@@ -267,14 -267,13 +267,14 @@@ int proc_dostring(struct ctl_table *tab
                        ppos);
  }
  
 -static size_t proc_skip_spaces(char **buf)
 +static void proc_skip_spaces(char **buf, size_t *size)
  {
 -      size_t ret;
 -      char *tmp = skip_spaces(*buf);
 -      ret = tmp - *buf;
 -      *buf = tmp;
 -      return ret;
 +      while (*size) {
 +              if (!isspace(**buf))
 +                      break;
 +              (*size)--;
 +              (*buf)++;
 +      }
  }
  
  static void proc_skip_char(char **buf, size_t *size, const char v)
@@@ -343,12 -342,13 +343,12 @@@ static int proc_get_long(char **buf, si
                          unsigned long *val, bool *neg,
                          const char *perm_tr, unsigned perm_tr_len, char *tr)
  {
 -      int len;
        char *p, tmp[TMPBUFLEN];
 +      ssize_t len = *size;
  
 -      if (!*size)
 +      if (len <= 0)
                return -EINVAL;
  
 -      len = *size;
        if (len > TMPBUFLEN - 1)
                len = TMPBUFLEN - 1;
  
@@@ -521,7 -521,7 +521,7 @@@ static int __do_proc_dointvec(void *tbl
                bool neg;
  
                if (write) {
 -                      left -= proc_skip_spaces(&p);
 +                      proc_skip_spaces(&p, &left);
  
                        if (!left)
                                break;
        if (!write && !first && left && !err)
                proc_put_char(&buffer, &left, '\n');
        if (write && !err && left)
 -              left -= proc_skip_spaces(&p);
 +              proc_skip_spaces(&p, &left);
        if (write && first)
                return err ? : -EINVAL;
        *lenp -= left;
@@@ -590,7 -590,7 +590,7 @@@ static int do_proc_douintvec_w(unsigne
        if (left > PAGE_SIZE - 1)
                left = PAGE_SIZE - 1;
  
 -      left -= proc_skip_spaces(&p);
 +      proc_skip_spaces(&p, &left);
        if (!left) {
                err = -EINVAL;
                goto out_free;
        }
  
        if (!err && left)
 -              left -= proc_skip_spaces(&p);
 +              proc_skip_spaces(&p, &left);
  
  out_free:
        if (err)
@@@ -1075,7 -1075,7 +1075,7 @@@ static int __do_proc_doulongvec_minmax(
                if (write) {
                        bool neg;
  
 -                      left -= proc_skip_spaces(&p);
 +                      proc_skip_spaces(&p, &left);
                        if (!left)
                                break;
  
        if (!write && !first && left && !err)
                proc_put_char(&buffer, &left, '\n');
        if (write && !err)
 -              left -= proc_skip_spaces(&p);
 +              proc_skip_spaces(&p, &left);
        if (write && first)
                return err ? : -EINVAL;
        *lenp -= left;
@@@ -1633,6 -1633,25 +1633,6 @@@ int proc_do_static_key(struct ctl_tabl
  }
  
  static struct ctl_table kern_table[] = {
 -#ifdef CONFIG_NUMA_BALANCING
 -      {
 -              .procname       = "numa_balancing",
 -              .data           = NULL, /* filled in by handler */
 -              .maxlen         = sizeof(unsigned int),
 -              .mode           = 0644,
 -              .proc_handler   = sysctl_numa_balancing,
 -              .extra1         = SYSCTL_ZERO,
 -              .extra2         = SYSCTL_FOUR,
 -      },
 -      {
 -              .procname       = "numa_balancing_promote_rate_limit_MBps",
 -              .data           = &sysctl_numa_balancing_promote_rate_limit,
 -              .maxlen         = sizeof(unsigned int),
 -              .mode           = 0644,
 -              .proc_handler   = proc_dointvec_minmax,
 -              .extra1         = SYSCTL_ZERO,
 -      },
 -#endif /* CONFIG_NUMA_BALANCING */
        {
                .procname       = "panic",
                .data           = &panic_timeout,
@@@ -2106,6 -2125,7 +2106,7 @@@ static struct ctl_table vm_table[] = 
                .mode           = 0644,
                .proc_handler   = proc_dointvec_minmax,
                .extra1         = SYSCTL_ZERO,
+               .extra2         = (void *)&page_cluster_max,
        },
        {
                .procname       = "dirtytime_expire_seconds",
diff --combined lib/Kconfig.debug
index 9dc5066654fd56276862b1686b4d8932d3da8b6d,a0dc28fdc5676928733ba7eaf3da96ac18769301..1b2bdc02abf4734e39be636b8e5dae06b7503838
@@@ -1717,16 -1717,6 +1717,16 @@@ config LATENCYTO
          Enable this option if you want to use the LatencyTOP tool
          to find out which userspace is blocking on what kernel operations.
  
 +config DEBUG_CGROUP_REF
 +      bool "Disable inlining of cgroup css reference count functions"
 +      depends on DEBUG_KERNEL
 +      depends on CGROUPS
 +      depends on KPROBES
 +      default n
 +      help
 +        Force cgroup css reference count functions to not be inlined so
 +        that they can be kprobed for debugging.
 +
  source "kernel/trace/Kconfig"
  
  config PROVIDE_OHCI1394_DMA_INIT
@@@ -1885,14 -1875,8 +1885,14 @@@ config NETDEV_NOTIFIER_ERROR_INJEC
          If unsure, say N.
  
  config FUNCTION_ERROR_INJECTION
 -      def_bool y
 +      bool "Fault-injections of functions"
        depends on HAVE_FUNCTION_ERROR_INJECTION && KPROBES
 +      help
 +        Add fault injections into various functions that are annotated with
 +        ALLOW_ERROR_INJECTION() in the kernel. BPF may also modify the return
 +        value of theses functions. This is useful to test error paths of code.
 +
 +        If unsure, say N
  
  config FAULT_INJECTION
        bool "Fault-injection framework"
@@@ -2260,6 -2244,7 +2260,7 @@@ config TEST_XARRA
        tristate "Test the XArray code at runtime"
  
  config TEST_MAPLE_TREE
+       depends on DEBUG_KERNEL
        select DEBUG_MAPLE_TREE
        tristate "Test the Maple Tree code at runtime"
  
@@@ -2823,22 -2808,6 +2824,22 @@@ config RUST_OVERFLOW_CHECK
  
          If unsure, say Y.
  
 +config RUST_BUILD_ASSERT_ALLOW
 +      bool "Allow unoptimized build-time assertions"
 +      depends on RUST
 +      help
 +        Controls how are `build_error!` and `build_assert!` handled during build.
 +
 +        If calls to them exist in the binary, it may indicate a violated invariant
 +        or that the optimizer failed to verify the invariant during compilation.
 +
 +        This should not happen, thus by default the build is aborted. However,
 +        as an escape hatch, you can choose Y here to ignore them during build
 +        and let the check be carried at runtime (with `panic!` being called if
 +        the check fails).
 +
 +        If unsure, say N.
 +
  endmenu # "Rust"
  
  source "Documentation/Kconfig"
diff --combined lib/Kconfig.kasan
index 836f70393e22cea591e593b689f25e5101f3c15e,ba5b27962c348c2a523e06b10df1bff2e5345c0c..be6ee60202908394540f68bf7ff4b25f296cf78d
@@@ -37,7 -37,7 +37,7 @@@ menuconfig KASA
                     (HAVE_ARCH_KASAN_SW_TAGS && CC_HAS_KASAN_SW_TAGS)) && \
                    CC_HAS_WORKING_NOSANITIZE_ADDRESS) || \
                   HAVE_ARCH_KASAN_HW_TAGS
 -      depends on (SLUB && SYSFS) || (SLAB && !DEBUG_SLAB)
 +      depends on (SLUB && SYSFS && !SLUB_TINY) || (SLAB && !DEBUG_SLAB)
        select STACKDEPOT_ALWAYS_INIT
        help
          Enables KASAN (Kernel Address Sanitizer) - a dynamic memory safety
@@@ -181,7 -181,7 +181,7 @@@ config KASAN_VMALLO
  
  config KASAN_KUNIT_TEST
        tristate "KUnit-compatible tests of KASAN bug detection capabilities" if !KUNIT_ALL_TESTS
-       depends on KASAN && KUNIT
+       depends on KASAN && KUNIT && TRACEPOINTS
        default KUNIT_ALL_TESTS
        help
          A KUnit-based KASAN test suite. Triggers different kinds of
diff --combined mm/Kconfig
index 623d95659ff91af57e36f22f1a316d3983d94d42,4e8a2697f28d943825899822d7bdf45d5c0b5c17..34d36958b8ac981cd4b8ea5c3cdc415bd67d2d9a
@@@ -219,43 -219,17 +219,43 @@@ config SLU
           and has enhanced diagnostics. SLUB is the default choice for
           a slab allocator.
  
 -config SLOB
 +config SLOB_DEPRECATED
        depends on EXPERT
 -      bool "SLOB (Simple Allocator)"
 +      bool "SLOB (Simple Allocator - DEPRECATED)"
        depends on !PREEMPT_RT
        help
 +         Deprecated and scheduled for removal in a few cycles. SLUB
 +         recommended as replacement. CONFIG_SLUB_TINY can be considered
 +         on systems with 16MB or less RAM.
 +
 +         If you need SLOB to stay, please contact [email protected] and
 +         people listed in the SLAB ALLOCATOR section of MAINTAINERS file,
 +         with your use case.
 +
           SLOB replaces the stock allocator with a drastically simpler
           allocator. SLOB is generally more space efficient but
           does not perform as well on large systems.
  
  endchoice
  
 +config SLOB
 +      bool
 +      default y
 +      depends on SLOB_DEPRECATED
 +
 +config SLUB_TINY
 +      bool "Configure SLUB for minimal memory footprint"
 +      depends on SLUB && EXPERT
 +      select SLAB_MERGE_DEFAULT
 +      help
 +         Configures the SLUB allocator in a way to achieve minimal memory
 +         footprint, sacrificing scalability, debugging and other features.
 +         This is intended only for the smallest system that had used the
 +         SLOB allocator and is not recommended for systems with more than
 +         16MB RAM.
 +
 +         If unsure, say N.
 +
  config SLAB_MERGE_DEFAULT
        bool "Allow slab caches to be merged"
        default y
  
  config SLAB_FREELIST_RANDOM
        bool "Randomize slab freelist"
 -      depends on SLAB || SLUB
 +      depends on SLAB || (SLUB && !SLUB_TINY)
        help
          Randomizes the freelist order used on creating new pages. This
          security feature reduces the predictability of the kernel slab
  
  config SLAB_FREELIST_HARDENED
        bool "Harden slab freelist metadata"
 -      depends on SLAB || SLUB
 +      depends on SLAB || (SLUB && !SLUB_TINY)
        help
          Many kernel heap attacks try to target slab cache metadata and
          other infrastructure. This options makes minor performance
  config SLUB_STATS
        default n
        bool "Enable SLUB performance statistics"
 -      depends on SLUB && SYSFS
 +      depends on SLUB && SYSFS && !SLUB_TINY
        help
          SLUB statistics are useful to debug SLUBs allocation behavior in
          order find ways to optimize the allocator. This should never be
  
  config SLUB_CPU_PARTIAL
        default y
 -      depends on SLUB && SMP
 +      depends on SLUB && SMP && !SLUB_TINY
        bool "SLUB per cpu partial cache"
        help
          Per cpu partial caches accelerate objects allocation and freeing
@@@ -801,7 -775,7 +801,7 @@@ endchoic
  
  config THP_SWAP
        def_bool y
-       depends on TRANSPARENT_HUGEPAGE && ARCH_WANTS_THP_SWAP && SWAP
+       depends on TRANSPARENT_HUGEPAGE && ARCH_WANTS_THP_SWAP && SWAP && 64BIT
        help
          Swap transparent huge pages in one piece, without splitting.
          XXX: For now, swap cluster backing transparent huge page
@@@ -1100,7 -1074,13 +1100,13 @@@ config IO_MAPPIN
        bool
  
  config SECRETMEM
-       def_bool ARCH_HAS_SET_DIRECT_MAP && !EMBEDDED
+       default y
+       bool "Enable memfd_secret() system call" if EXPERT
+       depends on ARCH_HAS_SET_DIRECT_MAP
+       help
+         Enable the memfd_secret() system call with the ability to create
+         memory areas visible only in the context of the owning process and
+         not mapped to other processes and other kernel page tables.
  
  config ANON_VMA_NAME
        bool "Anonymous VMA name support"
@@@ -1133,17 -1113,10 +1139,10 @@@ config HAVE_ARCH_USERFAULTFD_MINO
        help
          Arch has userfaultfd minor fault support
  
- config PTE_MARKER
-       bool
-       help
-         Allows to create marker PTEs for file-backed memory.
  config PTE_MARKER_UFFD_WP
        bool "Userfaultfd write protection support for shmem/hugetlbfs"
        default y
        depends on HAVE_ARCH_USERFAULTFD_WP
-       select PTE_MARKER
  
        help
          Allows to create marker PTEs for userfaultfd write protection
diff --combined mm/gup.c
index 98aac6201e1bc7e324cf274994f79be126fe1018,82b275bbaad545b303e041da3c30908158679fb6..f212d571b563d88b1d7172fee5d69d5e4ea1e33c
+++ b/mm/gup.c
@@@ -123,9 -123,6 +123,9 @@@ retry
   */
  struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags)
  {
 +      if (unlikely(!(flags & FOLL_PCI_P2PDMA) && is_pci_p2pdma_page(page)))
 +              return NULL;
 +
        if (flags & FOLL_GET)
                return try_get_folio(page, refs);
        else if (flags & FOLL_PIN) {
@@@ -205,22 -202,17 +205,22 @@@ static void gup_put_folio(struct folio 
   * time. Cases: please see the try_grab_folio() documentation, with
   * "refs=1".
   *
 - * Return: true for success, or if no action was required (if neither FOLL_PIN
 - * nor FOLL_GET was set, nothing is done). False for failure: FOLL_GET or
 - * FOLL_PIN was set, but the page could not be grabbed.
 + * Return: 0 for success, or if no action was required (if neither FOLL_PIN
 + * nor FOLL_GET was set, nothing is done). A negative error code for failure:
 + *
 + *   -ENOMEM          FOLL_GET or FOLL_PIN was set, but the page could not
 + *                    be grabbed.
   */
 -bool __must_check try_grab_page(struct page *page, unsigned int flags)
 +int __must_check try_grab_page(struct page *page, unsigned int flags)
  {
        struct folio *folio = page_folio(page);
  
        WARN_ON_ONCE((flags & (FOLL_GET | FOLL_PIN)) == (FOLL_GET | FOLL_PIN));
        if (WARN_ON_ONCE(folio_ref_count(folio) <= 0))
 -              return false;
 +              return -ENOMEM;
 +
 +      if (unlikely(!(flags & FOLL_PCI_P2PDMA) && is_pci_p2pdma_page(page)))
 +              return -EREMOTEIO;
  
        if (flags & FOLL_GET)
                folio_ref_inc(folio);
                node_stat_mod_folio(folio, NR_FOLL_PIN_ACQUIRED, 1);
        }
  
 -      return true;
 +      return 0;
  }
  
  /**
@@@ -545,42 -537,13 +545,13 @@@ static struct page *follow_page_pte(str
        if (WARN_ON_ONCE((flags & (FOLL_PIN | FOLL_GET)) ==
                         (FOLL_PIN | FOLL_GET)))
                return ERR_PTR(-EINVAL);
-       /*
-        * Considering PTE level hugetlb, like continuous-PTE hugetlb on
-        * ARM64 architecture.
-        */
-       if (is_vm_hugetlb_page(vma)) {
-               page = follow_huge_pmd_pte(vma, address, flags);
-               if (page)
-                       return page;
-               return no_page_table(vma, flags);
-       }
- retry:
        if (unlikely(pmd_bad(*pmd)))
                return no_page_table(vma, flags);
  
        ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
        pte = *ptep;
-       if (!pte_present(pte)) {
-               swp_entry_t entry;
-               /*
-                * KSM's break_ksm() relies upon recognizing a ksm page
-                * even while it is being migrated, so for that case we
-                * need migration_entry_wait().
-                */
-               if (likely(!(flags & FOLL_MIGRATION)))
-                       goto no_page;
-               if (pte_none(pte))
-                       goto no_page;
-               entry = pte_to_swp_entry(pte);
-               if (!is_migration_entry(entry))
-                       goto no_page;
-               pte_unmap_unlock(ptep, ptl);
-               migration_entry_wait(mm, pmd, address);
-               goto retry;
-       }
+       if (!pte_present(pte))
+               goto no_page;
        if (pte_protnone(pte) && !gup_can_follow_protnone(flags))
                goto no_page;
  
                }
        }
  
-       if (!pte_write(pte) && gup_must_unshare(flags, page)) {
+       if (!pte_write(pte) && gup_must_unshare(vma, flags, page)) {
                page = ERR_PTR(-EMLINK);
                goto out;
        }
                       !PageAnonExclusive(page), page);
  
        /* try_grab_page() does nothing unless FOLL_GET or FOLL_PIN is set. */
 -      if (unlikely(!try_grab_page(page, flags))) {
 -              page = ERR_PTR(-ENOMEM);
 +      ret = try_grab_page(page, flags);
 +      if (unlikely(ret)) {
 +              page = ERR_PTR(ret);
                goto out;
        }
 +
        /*
         * We need to make the page accessible if and only if we are going
         * to access its content (the FOLL_PIN case).  Please see
@@@ -690,42 -651,8 +661,8 @@@ static struct page *follow_pmd_mask(str
        pmdval = READ_ONCE(*pmd);
        if (pmd_none(pmdval))
                return no_page_table(vma, flags);
-       if (pmd_huge(pmdval) && is_vm_hugetlb_page(vma)) {
-               page = follow_huge_pmd_pte(vma, address, flags);
-               if (page)
-                       return page;
+       if (!pmd_present(pmdval))
                return no_page_table(vma, flags);
-       }
-       if (is_hugepd(__hugepd(pmd_val(pmdval)))) {
-               page = follow_huge_pd(vma, address,
-                                     __hugepd(pmd_val(pmdval)), flags,
-                                     PMD_SHIFT);
-               if (page)
-                       return page;
-               return no_page_table(vma, flags);
-       }
- retry:
-       if (!pmd_present(pmdval)) {
-               /*
-                * Should never reach here, if thp migration is not supported;
-                * Otherwise, it must be a thp migration entry.
-                */
-               VM_BUG_ON(!thp_migration_supported() ||
-                                 !is_pmd_migration_entry(pmdval));
-               if (likely(!(flags & FOLL_MIGRATION)))
-                       return no_page_table(vma, flags);
-               pmd_migration_entry_wait(mm, pmd);
-               pmdval = READ_ONCE(*pmd);
-               /*
-                * MADV_DONTNEED may convert the pmd to null because
-                * mmap_lock is held in read mode
-                */
-               if (pmd_none(pmdval))
-                       return no_page_table(vma, flags);
-               goto retry;
-       }
        if (pmd_devmap(pmdval)) {
                ptl = pmd_lock(mm, pmd);
                page = follow_devmap_pmd(vma, address, pmd, flags, &ctx->pgmap);
        if (pmd_protnone(pmdval) && !gup_can_follow_protnone(flags))
                return no_page_table(vma, flags);
  
- retry_locked:
        ptl = pmd_lock(mm, pmd);
-       if (unlikely(pmd_none(*pmd))) {
-               spin_unlock(ptl);
-               return no_page_table(vma, flags);
-       }
        if (unlikely(!pmd_present(*pmd))) {
                spin_unlock(ptl);
-               if (likely(!(flags & FOLL_MIGRATION)))
-                       return no_page_table(vma, flags);
-               pmd_migration_entry_wait(mm, pmd);
-               goto retry_locked;
+               return no_page_table(vma, flags);
        }
        if (unlikely(!pmd_trans_huge(*pmd))) {
                spin_unlock(ptl);
@@@ -793,20 -712,6 +722,6 @@@ static struct page *follow_pud_mask(str
        pud = pud_offset(p4dp, address);
        if (pud_none(*pud))
                return no_page_table(vma, flags);
-       if (pud_huge(*pud) && is_vm_hugetlb_page(vma)) {
-               page = follow_huge_pud(mm, address, pud, flags);
-               if (page)
-                       return page;
-               return no_page_table(vma, flags);
-       }
-       if (is_hugepd(__hugepd(pud_val(*pud)))) {
-               page = follow_huge_pd(vma, address,
-                                     __hugepd(pud_val(*pud)), flags,
-                                     PUD_SHIFT);
-               if (page)
-                       return page;
-               return no_page_table(vma, flags);
-       }
        if (pud_devmap(*pud)) {
                ptl = pud_lock(mm, pud);
                page = follow_devmap_pud(vma, address, pud, flags, &ctx->pgmap);
@@@ -826,7 -731,6 +741,6 @@@ static struct page *follow_p4d_mask(str
                                    struct follow_page_context *ctx)
  {
        p4d_t *p4d;
-       struct page *page;
  
        p4d = p4d_offset(pgdp, address);
        if (p4d_none(*p4d))
        if (unlikely(p4d_bad(*p4d)))
                return no_page_table(vma, flags);
  
-       if (is_hugepd(__hugepd(p4d_val(*p4d)))) {
-               page = follow_huge_pd(vma, address,
-                                     __hugepd(p4d_val(*p4d)), flags,
-                                     P4D_SHIFT);
-               if (page)
-                       return page;
-               return no_page_table(vma, flags);
-       }
        return follow_pud_mask(vma, address, p4d, flags, ctx);
  }
  
@@@ -880,10 -776,18 +786,18 @@@ static struct page *follow_page_mask(st
  
        ctx->page_mask = 0;
  
-       /* make this handle hugepd */
-       page = follow_huge_addr(mm, address, flags & FOLL_WRITE);
-       if (!IS_ERR(page)) {
-               WARN_ON_ONCE(flags & (FOLL_GET | FOLL_PIN));
+       /*
+        * Call hugetlb_follow_page_mask for hugetlb vmas as it will use
+        * special hugetlb page table walking code.  This eliminates the
+        * need to check for hugetlb entries in the general walking code.
+        *
+        * hugetlb_follow_page_mask is only for follow_page() handling here.
+        * Ordinary GUP uses follow_hugetlb_page for hugetlb processing.
+        */
+       if (is_vm_hugetlb_page(vma)) {
+               page = hugetlb_follow_page_mask(vma, address, flags);
+               if (!page)
+                       page = no_page_table(vma, flags);
                return page;
        }
  
        if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
                return no_page_table(vma, flags);
  
-       if (pgd_huge(*pgd)) {
-               page = follow_huge_pgd(mm, address, pgd, flags);
-               if (page)
-                       return page;
-               return no_page_table(vma, flags);
-       }
-       if (is_hugepd(__hugepd(pgd_val(*pgd)))) {
-               page = follow_huge_pd(vma, address,
-                                     __hugepd(pgd_val(*pgd)), flags,
-                                     PGDIR_SHIFT);
-               if (page)
-                       return page;
-               return no_page_table(vma, flags);
-       }
        return follow_p4d_mask(vma, address, pgd, flags, ctx);
  }
  
@@@ -970,9 -859,10 +869,9 @@@ static int get_gate_page(struct mm_stru
                        goto unmap;
                *page = pte_page(*pte);
        }
 -      if (unlikely(!try_grab_page(*page, gup_flags))) {
 -              ret = -ENOMEM;
 +      ret = try_grab_page(*page, gup_flags);
 +      if (unlikely(ret))
                goto unmap;
 -      }
  out:
        ret = 0;
  unmap:
@@@ -1067,9 -957,6 +966,9 @@@ static int check_vma_flags(struct vm_ar
        if ((gup_flags & FOLL_LONGTERM) && vma_is_fsdax(vma))
                return -EOPNOTSUPP;
  
 +      if ((gup_flags & FOLL_LONGTERM) && (gup_flags & FOLL_PCI_P2PDMA))
 +              return -EOPNOTSUPP;
 +
        if (vma_is_secretmem(vma))
                return -EFAULT;
  
                if (!(vm_flags & VM_WRITE)) {
                        if (!(gup_flags & FOLL_FORCE))
                                return -EFAULT;
+                       /* hugetlb does not support FOLL_FORCE|FOLL_WRITE. */
+                       if (is_vm_hugetlb_page(vma))
+                               return -EFAULT;
                        /*
                         * We used to let the write,force case do COW in a
                         * VM_MAYWRITE VM_SHARED !VM_WRITE vma, so ptrace could
@@@ -2117,14 -2007,19 +2019,19 @@@ static long __gup_longterm_locked(struc
                                  unsigned long nr_pages,
                                  struct page **pages,
                                  struct vm_area_struct **vmas,
+                                 int *locked,
                                  unsigned int gup_flags)
  {
+       bool must_unlock = false;
        unsigned int flags;
        long rc, nr_pinned_pages;
  
+       if (locked && WARN_ON_ONCE(!*locked))
+               return -EINVAL;
        if (!(gup_flags & FOLL_LONGTERM))
                return __get_user_pages_locked(mm, start, nr_pages, pages, vmas,
-                                              NULL, gup_flags);
+                                              locked, gup_flags);
  
        /*
         * If we get to this point then FOLL_LONGTERM is set, and FOLL_LONGTERM
                return -EINVAL;
        flags = memalloc_pin_save();
        do {
+               if (locked && !*locked) {
+                       mmap_read_lock(mm);
+                       must_unlock = true;
+                       *locked = 1;
+               }
                nr_pinned_pages = __get_user_pages_locked(mm, start, nr_pages,
-                                                         pages, vmas, NULL,
+                                                         pages, vmas, locked,
                                                          gup_flags);
                if (nr_pinned_pages <= 0) {
                        rc = nr_pinned_pages;
        } while (rc == -EAGAIN);
        memalloc_pin_restore(flags);
  
+       if (locked && *locked && must_unlock) {
+               mmap_read_unlock(mm);
+               *locked = 0;
+       }
        return rc ? rc : nr_pinned_pages;
  }
  
@@@ -2172,35 -2076,6 +2088,6 @@@ static bool is_valid_gup_flags(unsigne
  }
  
  #ifdef CONFIG_MMU
- static long __get_user_pages_remote(struct mm_struct *mm,
-                                   unsigned long start, unsigned long nr_pages,
-                                   unsigned int gup_flags, struct page **pages,
-                                   struct vm_area_struct **vmas, int *locked)
- {
-       /*
-        * Parts of FOLL_LONGTERM behavior are incompatible with
-        * FAULT_FLAG_ALLOW_RETRY because of the FS DAX check requirement on
-        * vmas. However, this only comes up if locked is set, and there are
-        * callers that do request FOLL_LONGTERM, but do not set locked. So,
-        * allow what we can.
-        */
-       if (gup_flags & FOLL_LONGTERM) {
-               if (WARN_ON_ONCE(locked))
-                       return -EINVAL;
-               /*
-                * This will check the vmas (even if our vmas arg is NULL)
-                * and return -ENOTSUPP if DAX isn't allowed in this case:
-                */
-               return __gup_longterm_locked(mm, start, nr_pages, pages,
-                                            vmas, gup_flags | FOLL_TOUCH |
-                                            FOLL_REMOTE);
-       }
-       return __get_user_pages_locked(mm, start, nr_pages, pages, vmas,
-                                      locked,
-                                      gup_flags | FOLL_TOUCH | FOLL_REMOTE);
- }
  /**
   * get_user_pages_remote() - pin user pages in memory
   * @mm:               mm_struct of target mm
@@@ -2269,8 -2144,8 +2156,8 @@@ long get_user_pages_remote(struct mm_st
        if (!is_valid_gup_flags(gup_flags))
                return -EINVAL;
  
-       return __get_user_pages_remote(mm, start, nr_pages, gup_flags,
-                                      pages, vmas, locked);
+       return __gup_longterm_locked(mm, start, nr_pages, pages, vmas, locked,
+                                    gup_flags | FOLL_TOUCH | FOLL_REMOTE);
  }
  EXPORT_SYMBOL(get_user_pages_remote);
  
@@@ -2282,14 -2157,6 +2169,6 @@@ long get_user_pages_remote(struct mm_st
  {
        return 0;
  }
- static long __get_user_pages_remote(struct mm_struct *mm,
-                                   unsigned long start, unsigned long nr_pages,
-                                   unsigned int gup_flags, struct page **pages,
-                                   struct vm_area_struct **vmas, int *locked)
- {
-       return 0;
- }
  #endif /* !CONFIG_MMU */
  
  /**
@@@ -2316,7 -2183,7 +2195,7 @@@ long get_user_pages(unsigned long start
                return -EINVAL;
  
        return __gup_longterm_locked(current->mm, start, nr_pages,
-                                    pages, vmas, gup_flags | FOLL_TOUCH);
+                                    pages, vmas, NULL, gup_flags | FOLL_TOUCH);
  }
  EXPORT_SYMBOL(get_user_pages);
  
@@@ -2342,18 -2209,9 +2221,9 @@@ long get_user_pages_unlocked(unsigned l
        int locked = 1;
        long ret;
  
-       /*
-        * FIXME: Current FOLL_LONGTERM behavior is incompatible with
-        * FAULT_FLAG_ALLOW_RETRY because of the FS DAX check requirement on
-        * vmas.  As there are no users of this flag in this call we simply
-        * disallow this option for now.
-        */
-       if (WARN_ON_ONCE(gup_flags & FOLL_LONGTERM))
-               return -EINVAL;
        mmap_read_lock(mm);
-       ret = __get_user_pages_locked(mm, start, nr_pages, pages, NULL,
-                                     &locked, gup_flags | FOLL_TOUCH);
+       ret = __gup_longterm_locked(mm, start, nr_pages, pages, NULL, &locked,
+                                   gup_flags | FOLL_TOUCH);
        if (locked)
                mmap_read_unlock(mm);
        return ret;
@@@ -2480,7 -2338,7 +2350,7 @@@ static int gup_pte_range(pmd_t pmd, pmd
                        goto pte_unmap;
                }
  
-               if (!pte_write(pte) && gup_must_unshare(flags, page)) {
+               if (!pte_write(pte) && gup_must_unshare(NULL, flags, page)) {
                        gup_put_folio(folio, 1, flags);
                        goto pte_unmap;
                }
@@@ -2546,15 -2404,9 +2416,15 @@@ static int __gup_device_huge(unsigned l
                        undo_dev_pagemap(nr, nr_start, flags, pages);
                        break;
                }
 +
 +              if (!(flags & FOLL_PCI_P2PDMA) && is_pci_p2pdma_page(page)) {
 +                      undo_dev_pagemap(nr, nr_start, flags, pages);
 +                      break;
 +              }
 +
                SetPageReferenced(page);
                pages[*nr] = page;
 -              if (unlikely(!try_grab_page(page, flags))) {
 +              if (unlikely(try_grab_page(page, flags))) {
                        undo_dev_pagemap(nr, nr_start, flags, pages);
                        break;
                }
@@@ -2672,7 -2524,7 +2542,7 @@@ static int gup_hugepte(pte_t *ptep, uns
                return 0;
        }
  
-       if (!pte_write(pte) && gup_must_unshare(flags, &folio->page)) {
+       if (!pte_write(pte) && gup_must_unshare(NULL, flags, &folio->page)) {
                gup_put_folio(folio, refs, flags);
                return 0;
        }
@@@ -2738,7 -2590,7 +2608,7 @@@ static int gup_huge_pmd(pmd_t orig, pmd
                return 0;
        }
  
-       if (!pmd_write(orig) && gup_must_unshare(flags, &folio->page)) {
+       if (!pmd_write(orig) && gup_must_unshare(NULL, flags, &folio->page)) {
                gup_put_folio(folio, refs, flags);
                return 0;
        }
@@@ -2778,7 -2630,7 +2648,7 @@@ static int gup_huge_pud(pud_t orig, pud
                return 0;
        }
  
-       if (!pud_write(orig) && gup_must_unshare(flags, &folio->page)) {
+       if (!pud_write(orig) && gup_must_unshare(NULL, flags, &folio->page)) {
                gup_put_folio(folio, refs, flags);
                return 0;
        }
@@@ -2953,29 -2805,6 +2823,6 @@@ static bool gup_fast_permitted(unsigne
  }
  #endif
  
- static int __gup_longterm_unlocked(unsigned long start, int nr_pages,
-                                  unsigned int gup_flags, struct page **pages)
- {
-       int ret;
-       /*
-        * FIXME: FOLL_LONGTERM does not work with
-        * get_user_pages_unlocked() (see comments in that function)
-        */
-       if (gup_flags & FOLL_LONGTERM) {
-               mmap_read_lock(current->mm);
-               ret = __gup_longterm_locked(current->mm,
-                                           start, nr_pages,
-                                           pages, NULL, gup_flags);
-               mmap_read_unlock(current->mm);
-       } else {
-               ret = get_user_pages_unlocked(start, nr_pages,
-                                             pages, gup_flags);
-       }
-       return ret;
- }
  static unsigned long lockless_pages_from_mm(unsigned long start,
                                            unsigned long end,
                                            unsigned int gup_flags,
@@@ -3036,8 -2865,7 +2883,8 @@@ static int internal_get_user_pages_fast
  
        if (WARN_ON_ONCE(gup_flags & ~(FOLL_WRITE | FOLL_LONGTERM |
                                       FOLL_FORCE | FOLL_PIN | FOLL_GET |
 -                                     FOLL_FAST_ONLY | FOLL_NOFAULT)))
 +                                     FOLL_FAST_ONLY | FOLL_NOFAULT |
 +                                     FOLL_PCI_P2PDMA)))
                return -EINVAL;
  
        if (gup_flags & FOLL_PIN)
        /* Slow path: try to get the remaining pages with get_user_pages */
        start += nr_pinned << PAGE_SHIFT;
        pages += nr_pinned;
-       ret = __gup_longterm_unlocked(start, nr_pages - nr_pinned, gup_flags,
-                                     pages);
+       ret = get_user_pages_unlocked(start, nr_pages - nr_pinned, pages,
+                                     gup_flags);
        if (ret < 0) {
                /*
                 * The caller has to unpin the pages we already pinned so
@@@ -3260,9 -3088,9 +3107,9 @@@ long pin_user_pages_remote(struct mm_st
        if (WARN_ON_ONCE(!pages))
                return -EINVAL;
  
-       gup_flags |= FOLL_PIN;
-       return __get_user_pages_remote(mm, start, nr_pages, gup_flags,
-                                      pages, vmas, locked);
+       return __gup_longterm_locked(mm, start, nr_pages, pages, vmas, locked,
+                                    gup_flags | FOLL_PIN | FOLL_TOUCH |
+                                            FOLL_REMOTE);
  }
  EXPORT_SYMBOL(pin_user_pages_remote);
  
@@@ -3296,7 -3124,7 +3143,7 @@@ long pin_user_pages(unsigned long start
  
        gup_flags |= FOLL_PIN;
        return __gup_longterm_locked(current->mm, start, nr_pages,
-                                    pages, vmas, gup_flags);
+                                    pages, vmas, NULL, gup_flags);
  }
  EXPORT_SYMBOL(pin_user_pages);
  
diff --combined mm/huge_memory.c
index ffbea56a8711a654e70a2e413134bd0865e03304,1d9ad909c87c3b3b479c674e99e741c76f9079d1..2546199ab3c0856362d926e2afb56ad70e8876b0
@@@ -1035,7 -1035,6 +1035,7 @@@ struct page *follow_devmap_pmd(struct v
        unsigned long pfn = pmd_pfn(*pmd);
        struct mm_struct *mm = vma->vm_mm;
        struct page *page;
 +      int ret;
  
        assert_spin_locked(pmd_lockptr(mm, pmd));
  
        if (!*pgmap)
                return ERR_PTR(-EFAULT);
        page = pfn_to_page(pfn);
 -      if (!try_grab_page(page, flags))
 -              page = ERR_PTR(-ENOMEM);
 +      ret = try_grab_page(page, flags);
 +      if (ret)
 +              page = ERR_PTR(ret);
  
        return page;
  }
@@@ -1195,7 -1193,6 +1195,7 @@@ struct page *follow_devmap_pud(struct v
        unsigned long pfn = pud_pfn(*pud);
        struct mm_struct *mm = vma->vm_mm;
        struct page *page;
 +      int ret;
  
        assert_spin_locked(pud_lockptr(mm, pud));
  
        if (!*pgmap)
                return ERR_PTR(-EFAULT);
        page = pfn_to_page(pfn);
 -      if (!try_grab_page(page, flags))
 -              page = ERR_PTR(-ENOMEM);
 +
 +      ret = try_grab_page(page, flags);
 +      if (ret)
 +              page = ERR_PTR(ret);
  
        return page;
  }
@@@ -1318,9 -1313,6 +1318,6 @@@ vm_fault_t do_huge_pmd_wp_page(struct v
        vmf->ptl = pmd_lockptr(vma->vm_mm, vmf->pmd);
        VM_BUG_ON_VMA(!vma->anon_vma, vma);
  
-       VM_BUG_ON(unshare && (vmf->flags & FAULT_FLAG_WRITE));
-       VM_BUG_ON(!unshare && !(vmf->flags & FAULT_FLAG_WRITE));
        if (is_huge_zero_pmd(orig_pmd))
                goto fallback;
  
@@@ -1384,7 -1376,7 +1381,7 @@@ reuse
                if (pmdp_set_access_flags(vma, haddr, vmf->pmd, entry, 1))
                        update_mmu_cache_pmd(vma, vmf->address, vmf->pmd);
                spin_unlock(vmf->ptl);
-               return VM_FAULT_WRITE;
+               return 0;
        }
  
  unlock_fallback:
@@@ -1395,6 -1387,36 +1392,36 @@@ fallback
        return VM_FAULT_FALLBACK;
  }
  
+ static inline bool can_change_pmd_writable(struct vm_area_struct *vma,
+                                          unsigned long addr, pmd_t pmd)
+ {
+       struct page *page;
+       if (WARN_ON_ONCE(!(vma->vm_flags & VM_WRITE)))
+               return false;
+       /* Don't touch entries that are not even readable (NUMA hinting). */
+       if (pmd_protnone(pmd))
+               return false;
+       /* Do we need write faults for softdirty tracking? */
+       if (vma_soft_dirty_enabled(vma) && !pmd_soft_dirty(pmd))
+               return false;
+       /* Do we need write faults for uffd-wp tracking? */
+       if (userfaultfd_huge_pmd_wp(vma, pmd))
+               return false;
+       if (!(vma->vm_flags & VM_SHARED)) {
+               /* See can_change_pte_writable(). */
+               page = vm_normal_page_pmd(vma, addr, pmd);
+               return page && PageAnon(page) && PageAnonExclusive(page);
+       }
+       /* See can_change_pte_writable(). */
+       return pmd_dirty(pmd);
+ }
  /* FOLL_FORCE can write to even unwritable PMDs in COW mappings. */
  static inline bool can_follow_write_pmd(pmd_t pmd, struct page *page,
                                        struct vm_area_struct *vma,
@@@ -1440,7 -1462,6 +1467,7 @@@ struct page *follow_trans_huge_pmd(stru
  {
        struct mm_struct *mm = vma->vm_mm;
        struct page *page;
 +      int ret;
  
        assert_spin_locked(pmd_lockptr(mm, pmd));
  
        if (pmd_protnone(*pmd) && !gup_can_follow_protnone(flags))
                return NULL;
  
-       if (!pmd_write(*pmd) && gup_must_unshare(flags, page))
+       if (!pmd_write(*pmd) && gup_must_unshare(vma, flags, page))
                return ERR_PTR(-EMLINK);
  
        VM_BUG_ON_PAGE((flags & FOLL_PIN) && PageAnon(page) &&
                        !PageAnonExclusive(page), page);
  
 -      if (!try_grab_page(page, flags))
 -              return ERR_PTR(-ENOMEM);
 +      ret = try_grab_page(page, flags);
 +      if (ret)
 +              return ERR_PTR(ret);
  
        if (flags & FOLL_TOUCH)
                touch_pmd(vma, addr, pmd, flags & FOLL_WRITE);
@@@ -1488,8 -1508,7 +1515,7 @@@ vm_fault_t do_huge_pmd_numa_page(struc
        unsigned long haddr = vmf->address & HPAGE_PMD_MASK;
        int page_nid = NUMA_NO_NODE;
        int target_nid, last_cpupid = (-1 & LAST_CPUPID_MASK);
-       bool migrated = false;
-       bool was_writable = pmd_savedwrite(oldpmd);
+       bool migrated = false, writable = false;
        int flags = 0;
  
        vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
        }
  
        pmd = pmd_modify(oldpmd, vma->vm_page_prot);
+       /*
+        * Detect now whether the PMD could be writable; this information
+        * is only valid while holding the PT lock.
+        */
+       writable = pmd_write(pmd);
+       if (!writable && vma_wants_manual_pte_write_upgrade(vma) &&
+           can_change_pmd_writable(vma, vmf->address, pmd))
+               writable = true;
        page = vm_normal_page_pmd(vma, haddr, pmd);
        if (!page)
                goto out_map;
  
        /* See similar comment in do_numa_page for explanation */
-       if (!was_writable)
+       if (!writable)
                flags |= TNF_NO_GROUP;
  
        page_nid = page_to_nid(page);
        }
  
        spin_unlock(vmf->ptl);
+       writable = false;
  
        migrated = migrate_misplaced_page(page, vma, target_nid);
        if (migrated) {
@@@ -1549,7 -1579,7 +1586,7 @@@ out_map
        /* Restore the PMD */
        pmd = pmd_modify(oldpmd, vma->vm_page_prot);
        pmd = pmd_mkyoung(pmd);
-       if (was_writable)
+       if (writable)
                pmd = pmd_mkwrite(pmd);
        set_pmd_at(vma->vm_mm, haddr, vmf->pmd, pmd);
        update_mmu_cache_pmd(vma, vmf->address, vmf->pmd);
@@@ -1790,11 -1820,10 +1827,10 @@@ int change_huge_pmd(struct mmu_gather *
        struct mm_struct *mm = vma->vm_mm;
        spinlock_t *ptl;
        pmd_t oldpmd, entry;
-       bool preserve_write;
-       int ret;
        bool prot_numa = cp_flags & MM_CP_PROT_NUMA;
        bool uffd_wp = cp_flags & MM_CP_UFFD_WP;
        bool uffd_wp_resolve = cp_flags & MM_CP_UFFD_WP_RESOLVE;
+       int ret = 1;
  
        tlb_change_page_size(tlb, HPAGE_PMD_SIZE);
  
        if (!ptl)
                return 0;
  
-       preserve_write = prot_numa && pmd_write(*pmd);
-       ret = 1;
  #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
        if (is_swap_pmd(*pmd)) {
                swp_entry_t entry = pmd_to_swp_entry(*pmd);
        oldpmd = pmdp_invalidate_ad(vma, addr, pmd);
  
        entry = pmd_modify(oldpmd, newprot);
-       if (preserve_write)
-               entry = pmd_mk_savedwrite(entry);
        if (uffd_wp) {
                entry = pmd_wrprotect(entry);
                entry = pmd_mkuffd_wp(entry);
                 */
                entry = pmd_clear_uffd_wp(entry);
        }
+       /* See change_pte_range(). */
+       if ((cp_flags & MM_CP_TRY_CHANGE_WRITABLE) && !pmd_write(entry) &&
+           can_change_pmd_writable(vma, addr, entry))
+               entry = pmd_mkwrite(entry);
        ret = HPAGE_PMD_NR;
        set_pmd_at(mm, addr, pmd, entry);
  
        if (huge_pmd_needs_flush(oldpmd, entry))
                tlb_flush_pmd_range(tlb, addr, HPAGE_PMD_SIZE);
-       BUG_ON(vma_is_anonymous(vma) && !preserve_write && pmd_write(entry));
  unlock:
        spin_unlock(ptl);
        return ret;
@@@ -2148,7 -2176,6 +2183,6 @@@ static void __split_huge_pmd_locked(str
                uffd_wp = pmd_uffd_wp(old_pmd);
  
                VM_BUG_ON_PAGE(!page_count(page), page);
-               page_ref_add(page, HPAGE_PMD_NR - 1);
  
                /*
                 * Without "freeze", we'll simply split the PMD, propagating the
                anon_exclusive = PageAnon(page) && PageAnonExclusive(page);
                if (freeze && anon_exclusive && page_try_share_anon_rmap(page))
                        freeze = false;
+               if (!freeze)
+                       page_ref_add(page, HPAGE_PMD_NR - 1);
        }
  
        /*
                        entry = maybe_mkwrite(entry, vma);
                        if (anon_exclusive)
                                SetPageAnonExclusive(page + i);
-                       if (!write)
-                               entry = pte_wrprotect(entry);
                        if (!young)
                                entry = pte_mkold(entry);
+                       /* NOTE: this may set soft-dirty too on some archs */
+                       if (dirty)
+                               entry = pte_mkdirty(entry);
                        /*
-                        * NOTE: we don't do pte_mkdirty when dirty==true
-                        * because it breaks sparc64 which can sigsegv
-                        * random process.  Need to revisit when we figure
-                        * out what is special with sparc64.
+                        * NOTE: this needs to happen after pte_mkdirty,
+                        * because some archs (sparc64, loongarch) could
+                        * set hw write bit when mkdirty.
                         */
+                       if (!write)
+                               entry = pte_wrprotect(entry);
                        if (soft_dirty)
                                entry = pte_mksoft_dirty(entry);
                        if (uffd_wp)
                                entry = pte_mkuffd_wp(entry);
+                       page_add_anon_rmap(page + i, vma, addr, false);
                }
                pte = pte_offset_map(&_pmd, addr);
                BUG_ON(!pte_none(*pte));
                set_pte_at(mm, addr, pte, entry);
-               if (!pmd_migration)
-                       atomic_inc(&page[i]._mapcount);
                pte_unmap(pte);
        }
  
-       if (!pmd_migration) {
-               /*
-                * Set PG_double_map before dropping compound_mapcount to avoid
-                * false-negative page_mapped().
-                */
-               if (compound_mapcount(page) > 1 &&
-                   !TestSetPageDoubleMap(page)) {
-                       for (i = 0; i < HPAGE_PMD_NR; i++)
-                               atomic_inc(&page[i]._mapcount);
-               }
-               lock_page_memcg(page);
-               if (atomic_add_negative(-1, compound_mapcount_ptr(page))) {
-                       /* Last compound_mapcount is gone. */
-                       __mod_lruvec_page_state(page, NR_ANON_THPS,
-                                               -HPAGE_PMD_NR);
-                       if (TestClearPageDoubleMap(page)) {
-                               /* No need in mapcount reference anymore */
-                               for (i = 0; i < HPAGE_PMD_NR; i++)
-                                       atomic_dec(&page[i]._mapcount);
-                       }
-               }
-               unlock_page_memcg(page);
-               /* Above is effectively page_remove_rmap(page, vma, true) */
-               munlock_vma_page(page, vma, true);
-       }
+       if (!pmd_migration)
+               page_remove_rmap(page, vma, true);
+       if (freeze)
+               put_page(page);
  
        smp_wmb(); /* make pte visible before pmd */
        pmd_populate(mm, pmd, pgtable);
-       if (freeze) {
-               for (i = 0; i < HPAGE_PMD_NR; i++) {
-                       page_remove_rmap(page + i, vma, false);
-                       put_page(page + i);
-               }
-       }
  }
  
  void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
@@@ -2460,7 -2460,7 +2467,7 @@@ static void __split_huge_page_tail(stru
                         (1L << PG_dirty) |
                         LRU_GEN_MASK | LRU_REFS_MASK));
  
-       /* ->mapping in first tail page is compound_mapcount */
+       /* ->mapping in first and second tail page is replaced by other uses */
        VM_BUG_ON_PAGE(tail > 2 && page_tail->mapping != TAIL_MAPPING,
                        page_tail);
        page_tail->mapping = head->mapping;
         * page->private should not be set in tail pages with the exception
         * of swap cache pages that store the swp_entry_t in tail pages.
         * Fix up and warn once if private is unexpectedly set.
+        *
+        * What of 32-bit systems, on which head[1].compound_pincount overlays
+        * head[1].private?  No problem: THP_SWAP is not enabled on 32-bit, and
+        * compound_pincount must be 0 for folio_ref_freeze() to have succeeded.
         */
        if (!folio_test_swapcache(page_folio(head))) {
                VM_WARN_ON_ONCE_PAGE(page_tail->private != 0, page_tail);
@@@ -2722,7 -2726,7 +2733,7 @@@ int split_huge_page_to_list(struct pag
         * split PMDs
         */
        if (!can_split_folio(folio, &extra_pins)) {
-               ret = -EBUSY;
+               ret = -EAGAIN;
                goto out_unlock;
        }
  
@@@ -2772,7 -2776,7 +2783,7 @@@ fail
                        xas_unlock(&xas);
                local_irq_enable();
                remap_page(folio, folio_nr_pages(folio));
-               ret = -EBUSY;
+               ret = -EAGAIN;
        }
  
  out_unlock:
@@@ -3076,28 -3080,28 +3087,28 @@@ static int split_huge_pages_in_file(con
        mapping = candidate->f_mapping;
  
        for (index = off_start; index < off_end; index += nr_pages) {
-               struct page *fpage = pagecache_get_page(mapping, index,
-                                               FGP_ENTRY | FGP_HEAD, 0);
+               struct folio *folio = __filemap_get_folio(mapping, index,
+                                               FGP_ENTRY, 0);
  
                nr_pages = 1;
-               if (xa_is_value(fpage) || !fpage)
+               if (xa_is_value(folio) || !folio)
                        continue;
  
-               if (!is_transparent_hugepage(fpage))
+               if (!folio_test_large(folio))
                        goto next;
  
                total++;
-               nr_pages = thp_nr_pages(fpage);
+               nr_pages = folio_nr_pages(folio);
  
-               if (!trylock_page(fpage))
+               if (!folio_trylock(folio))
                        goto next;
  
-               if (!split_huge_page(fpage))
+               if (!split_folio(folio))
                        split++;
  
-               unlock_page(fpage);
+               folio_unlock(folio);
  next:
-               put_page(fpage);
+               folio_put(folio);
                cond_resched();
        }
  
diff --combined mm/hugetlb.c
index 3d9f4abec17c61e6af64d51ef74612e908371aca,7cdbcc22587bf2f2f8fe193c93affd3909f368eb..f5f445c39dbcb6852bf346623c6bb97d123f42c2
@@@ -54,13 -54,13 +54,13 @@@ struct hstate hstates[HUGE_MAX_HSTATE]
  #ifdef CONFIG_CMA
  static struct cma *hugetlb_cma[MAX_NUMNODES];
  static unsigned long hugetlb_cma_size_in_node[MAX_NUMNODES] __initdata;
- static bool hugetlb_cma_page(struct page *page, unsigned int order)
+ static bool hugetlb_cma_folio(struct folio *folio, unsigned int order)
  {
-       return cma_pages_valid(hugetlb_cma[page_to_nid(page)], page,
+       return cma_pages_valid(hugetlb_cma[folio_nid(folio)], &folio->page,
                                1 << order);
  }
  #else
- static bool hugetlb_cma_page(struct page *page, unsigned int order)
+ static bool hugetlb_cma_folio(struct folio *folio, unsigned int order)
  {
        return false;
  }
@@@ -1127,17 -1127,17 +1127,17 @@@ static bool vma_has_reserves(struct vm_
        return false;
  }
  
- static void enqueue_huge_page(struct hstate *h, struct page *page)
+ static void enqueue_hugetlb_folio(struct hstate *h, struct folio *folio)
  {
-       int nid = page_to_nid(page);
+       int nid = folio_nid(folio);
  
        lockdep_assert_held(&hugetlb_lock);
-       VM_BUG_ON_PAGE(page_count(page), page);
+       VM_BUG_ON_FOLIO(folio_ref_count(folio), folio);
  
-       list_move(&page->lru, &h->hugepage_freelists[nid]);
+       list_move(&folio->lru, &h->hugepage_freelists[nid]);
        h->free_huge_pages++;
        h->free_huge_pages_node[nid]++;
-       SetHPageFreed(page);
+       folio_set_hugetlb_freed(folio);
  }
  
  static struct page *dequeue_huge_page_node_exact(struct hstate *h, int nid)
@@@ -1325,76 -1325,76 +1325,76 @@@ static int hstate_next_node_to_free(str
                nr_nodes--)
  
  /* used to demote non-gigantic_huge pages as well */
- static void __destroy_compound_gigantic_page(struct page *page,
+ static void __destroy_compound_gigantic_folio(struct folio *folio,
                                        unsigned int order, bool demote)
  {
        int i;
        int nr_pages = 1 << order;
        struct page *p;
  
-       atomic_set(compound_mapcount_ptr(page), 0);
-       atomic_set(compound_pincount_ptr(page), 0);
+       atomic_set(folio_mapcount_ptr(folio), 0);
+       atomic_set(folio_subpages_mapcount_ptr(folio), 0);
+       atomic_set(folio_pincount_ptr(folio), 0);
  
        for (i = 1; i < nr_pages; i++) {
-               p = nth_page(page, i);
+               p = folio_page(folio, i);
                p->mapping = NULL;
                clear_compound_head(p);
                if (!demote)
                        set_page_refcounted(p);
        }
  
-       set_compound_order(page, 0);
- #ifdef CONFIG_64BIT
-       page[1].compound_nr = 0;
- #endif
-       __ClearPageHead(page);
+       folio_set_compound_order(folio, 0);
+       __folio_clear_head(folio);
  }
  
- static void destroy_compound_hugetlb_page_for_demote(struct page *page,
+ static void destroy_compound_hugetlb_folio_for_demote(struct folio *folio,
                                        unsigned int order)
  {
-       __destroy_compound_gigantic_page(page, order, true);
+       __destroy_compound_gigantic_folio(folio, order, true);
  }
  
  #ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE
- static void destroy_compound_gigantic_page(struct page *page,
+ static void destroy_compound_gigantic_folio(struct folio *folio,
                                        unsigned int order)
  {
-       __destroy_compound_gigantic_page(page, order, false);
+       __destroy_compound_gigantic_folio(folio, order, false);
  }
  
- static void free_gigantic_page(struct page *page, unsigned int order)
+ static void free_gigantic_folio(struct folio *folio, unsigned int order)
  {
        /*
         * If the page isn't allocated using the cma allocator,
         * cma_release() returns false.
         */
  #ifdef CONFIG_CMA
-       if (cma_release(hugetlb_cma[page_to_nid(page)], page, 1 << order))
+       int nid = folio_nid(folio);
+       if (cma_release(hugetlb_cma[nid], &folio->page, 1 << order))
                return;
  #endif
  
-       free_contig_range(page_to_pfn(page), 1 << order);
+       free_contig_range(folio_pfn(folio), 1 << order);
  }
  
  #ifdef CONFIG_CONTIG_ALLOC
- static struct page *alloc_gigantic_page(struct hstate *h, gfp_t gfp_mask,
+ static struct folio *alloc_gigantic_folio(struct hstate *h, gfp_t gfp_mask,
                int nid, nodemask_t *nodemask)
  {
+       struct page *page;
        unsigned long nr_pages = pages_per_huge_page(h);
        if (nid == NUMA_NO_NODE)
                nid = numa_mem_id();
  
  #ifdef CONFIG_CMA
        {
-               struct page *page;
                int node;
  
                if (hugetlb_cma[nid]) {
                        page = cma_alloc(hugetlb_cma[nid], nr_pages,
                                        huge_page_order(h), true);
                        if (page)
-                               return page;
+                               return page_folio(page);
                }
  
                if (!(gfp_mask & __GFP_THISNODE)) {
                                page = cma_alloc(hugetlb_cma[node], nr_pages,
                                                huge_page_order(h), true);
                                if (page)
-                                       return page;
+                                       return page_folio(page);
                        }
                }
        }
  #endif
  
-       return alloc_contig_pages(nr_pages, gfp_mask, nid, nodemask);
+       page = alloc_contig_pages(nr_pages, gfp_mask, nid, nodemask);
+       return page ? page_folio(page) : NULL;
  }
  
  #else /* !CONFIG_CONTIG_ALLOC */
- static struct page *alloc_gigantic_page(struct hstate *h, gfp_t gfp_mask,
+ static struct folio *alloc_gigantic_folio(struct hstate *h, gfp_t gfp_mask,
                                        int nid, nodemask_t *nodemask)
  {
        return NULL;
  #endif /* CONFIG_CONTIG_ALLOC */
  
  #else /* !CONFIG_ARCH_HAS_GIGANTIC_PAGE */
- static struct page *alloc_gigantic_page(struct hstate *h, gfp_t gfp_mask,
+ static struct folio *alloc_gigantic_folio(struct hstate *h, gfp_t gfp_mask,
                                        int nid, nodemask_t *nodemask)
  {
        return NULL;
  }
- static inline void free_gigantic_page(struct page *page, unsigned int order) { }
- static inline void destroy_compound_gigantic_page(struct page *page,
+ static inline void free_gigantic_folio(struct folio *folio,
+                                               unsigned int order) { }
+ static inline void destroy_compound_gigantic_folio(struct folio *folio,
                                                unsigned int order) { }
  #endif
  
  /*
-  * Remove hugetlb page from lists, and update dtor so that page appears
+  * Remove hugetlb folio from lists, and update dtor so that the folio appears
   * as just a compound page.
   *
-  * A reference is held on the page, except in the case of demote.
+  * A reference is held on the folio, except in the case of demote.
   *
   * Must be called with hugetlb lock held.
   */
- static void __remove_hugetlb_page(struct hstate *h, struct page *page,
+ static void __remove_hugetlb_folio(struct hstate *h, struct folio *folio,
                                                        bool adjust_surplus,
                                                        bool demote)
  {
-       int nid = page_to_nid(page);
+       int nid = folio_nid(folio);
  
-       VM_BUG_ON_PAGE(hugetlb_cgroup_from_page(page), page);
-       VM_BUG_ON_PAGE(hugetlb_cgroup_from_page_rsvd(page), page);
+       VM_BUG_ON_FOLIO(hugetlb_cgroup_from_folio(folio), folio);
+       VM_BUG_ON_FOLIO(hugetlb_cgroup_from_folio_rsvd(folio), folio);
  
        lockdep_assert_held(&hugetlb_lock);
        if (hstate_is_gigantic(h) && !gigantic_page_runtime_supported())
                return;
  
-       list_del(&page->lru);
+       list_del(&folio->lru);
  
-       if (HPageFreed(page)) {
+       if (folio_test_hugetlb_freed(folio)) {
                h->free_huge_pages--;
                h->free_huge_pages_node[nid]--;
        }
         *
         * For gigantic pages set the destructor to the null dtor.  This
         * destructor will never be called.  Before freeing the gigantic
-        * page destroy_compound_gigantic_page will turn the compound page
-        * into a simple group of pages.  After this the destructor does not
+        * page destroy_compound_gigantic_folio will turn the folio into a
+        * simple group of pages.  After this the destructor does not
         * apply.
         *
         * This handles the case where more than one ref is held when and
-        * after update_and_free_page is called.
+        * after update_and_free_hugetlb_folio is called.
         *
         * In the case of demote we do not ref count the page as it will soon
         * be turned into a page of smaller size.
         */
        if (!demote)
-               set_page_refcounted(page);
+               folio_ref_unfreeze(folio, 1);
        if (hstate_is_gigantic(h))
-               set_compound_page_dtor(page, NULL_COMPOUND_DTOR);
+               folio_set_compound_dtor(folio, NULL_COMPOUND_DTOR);
        else
-               set_compound_page_dtor(page, COMPOUND_PAGE_DTOR);
+               folio_set_compound_dtor(folio, COMPOUND_PAGE_DTOR);
  
        h->nr_huge_pages--;
        h->nr_huge_pages_node[nid]--;
  }
  
- static void remove_hugetlb_page(struct hstate *h, struct page *page,
+ static void remove_hugetlb_folio(struct hstate *h, struct folio *folio,
                                                        bool adjust_surplus)
  {
-       __remove_hugetlb_page(h, page, adjust_surplus, false);
+       __remove_hugetlb_folio(h, folio, adjust_surplus, false);
  }
  
- static void remove_hugetlb_page_for_demote(struct hstate *h, struct page *page,
+ static void remove_hugetlb_folio_for_demote(struct hstate *h, struct folio *folio,
                                                        bool adjust_surplus)
  {
-       __remove_hugetlb_page(h, page, adjust_surplus, true);
+       __remove_hugetlb_folio(h, folio, adjust_surplus, true);
  }
  
- static void add_hugetlb_page(struct hstate *h, struct page *page,
+ static void add_hugetlb_folio(struct hstate *h, struct folio *folio,
                             bool adjust_surplus)
  {
        int zeroed;
-       int nid = page_to_nid(page);
+       int nid = folio_nid(folio);
  
-       VM_BUG_ON_PAGE(!HPageVmemmapOptimized(page), page);
+       VM_BUG_ON_FOLIO(!folio_test_hugetlb_vmemmap_optimized(folio), folio);
  
        lockdep_assert_held(&hugetlb_lock);
  
-       INIT_LIST_HEAD(&page->lru);
+       INIT_LIST_HEAD(&folio->lru);
        h->nr_huge_pages++;
        h->nr_huge_pages_node[nid]++;
  
                h->surplus_huge_pages_node[nid]++;
        }
  
-       set_compound_page_dtor(page, HUGETLB_PAGE_DTOR);
-       set_page_private(page, 0);
+       folio_set_compound_dtor(folio, HUGETLB_PAGE_DTOR);
+       folio_change_private(folio, NULL);
        /*
-        * We have to set HPageVmemmapOptimized again as above
-        * set_page_private(page, 0) cleared it.
+        * We have to set hugetlb_vmemmap_optimized again as above
+        * folio_change_private(folio, NULL) cleared it.
         */
-       SetHPageVmemmapOptimized(page);
+       folio_set_hugetlb_vmemmap_optimized(folio);
  
        /*
-        * This page is about to be managed by the hugetlb allocator and
+        * This folio is about to be managed by the hugetlb allocator and
         * should have no users.  Drop our reference, and check for others
         * just in case.
         */
-       zeroed = put_page_testzero(page);
-       if (!zeroed)
+       zeroed = folio_put_testzero(folio);
+       if (unlikely(!zeroed))
                /*
                 * It is VERY unlikely soneone else has taken a ref on
                 * the page.  In this case, we simply return as the
                 */
                return;
  
-       arch_clear_hugepage_flags(page);
-       enqueue_huge_page(h, page);
+       arch_clear_hugepage_flags(&folio->page);
+       enqueue_hugetlb_folio(h, folio);
  }
  
  static void __update_and_free_page(struct hstate *h, struct page *page)
  {
        int i;
+       struct folio *folio = page_folio(page);
        struct page *subpage;
  
        if (hstate_is_gigantic(h) && !gigantic_page_runtime_supported())
         * If we don't know which subpages are hwpoisoned, we can't free
         * the hugepage, so it's leaked intentionally.
         */
-       if (HPageRawHwpUnreliable(page))
+       if (folio_test_hugetlb_raw_hwp_unreliable(folio))
                return;
  
        if (hugetlb_vmemmap_restore(h, page)) {
                 * page and put the page back on the hugetlb free list and treat
                 * as a surplus page.
                 */
-               add_hugetlb_page(h, page, true);
+               add_hugetlb_folio(h, folio, true);
                spin_unlock_irq(&hugetlb_lock);
                return;
        }
         * Move PageHWPoison flag from head page to the raw error pages,
         * which makes any healthy subpages reusable.
         */
-       if (unlikely(PageHWPoison(page)))
-               hugetlb_clear_page_hwpoison(page);
+       if (unlikely(folio_test_hwpoison(folio)))
+               hugetlb_clear_page_hwpoison(&folio->page);
  
        for (i = 0; i < pages_per_huge_page(h); i++) {
-               subpage = nth_page(page, i);
+               subpage = folio_page(folio, i);
                subpage->flags &= ~(1 << PG_locked | 1 << PG_error |
                                1 << PG_referenced | 1 << PG_dirty |
                                1 << PG_active | 1 << PG_private |
  
        /*
         * Non-gigantic pages demoted from CMA allocated gigantic pages
-        * need to be given back to CMA in free_gigantic_page.
+        * need to be given back to CMA in free_gigantic_folio.
         */
        if (hstate_is_gigantic(h) ||
-           hugetlb_cma_page(page, huge_page_order(h))) {
-               destroy_compound_gigantic_page(page, huge_page_order(h));
-               free_gigantic_page(page, huge_page_order(h));
+           hugetlb_cma_folio(folio, huge_page_order(h))) {
+               destroy_compound_gigantic_folio(folio, huge_page_order(h));
+               free_gigantic_folio(folio, huge_page_order(h));
        } else {
                __free_pages(page, huge_page_order(h));
        }
  }
  
  /*
-  * As update_and_free_page() can be called under any context, so we cannot
+  * As update_and_free_hugetlb_folio() can be called under any context, so we cannot
   * use GFP_KERNEL to allocate vmemmap pages. However, we can defer the
   * actual freeing in a workqueue to prevent from using GFP_ATOMIC to allocate
   * the vmemmap pages.
@@@ -1639,8 -1642,9 +1642,9 @@@ static void free_hpage_workfn(struct wo
                /*
                 * The VM_BUG_ON_PAGE(!PageHuge(page), page) in page_hstate()
                 * is going to trigger because a previous call to
-                * remove_hugetlb_page() will set_compound_page_dtor(page,
-                * NULL_COMPOUND_DTOR), so do not use page_hstate() directly.
+                * remove_hugetlb_folio() will call folio_set_compound_dtor
+                * (folio, NULL_COMPOUND_DTOR), so do not use page_hstate()
+                * directly.
                 */
                h = size_to_hstate(page_size(page));
  
@@@ -1657,11 -1661,11 +1661,11 @@@ static inline void flush_free_hpage_wor
                flush_work(&free_hpage_work);
  }
  
- static void update_and_free_page(struct hstate *h, struct page *page,
+ static void update_and_free_hugetlb_folio(struct hstate *h, struct folio *folio,
                                 bool atomic)
  {
-       if (!HPageVmemmapOptimized(page) || !atomic) {
-               __update_and_free_page(h, page);
+       if (!folio_test_hugetlb_vmemmap_optimized(folio) || !atomic) {
+               __update_and_free_page(h, &folio->page);
                return;
        }
  
         * empty. Otherwise, schedule_work() had been called but the workfn
         * hasn't retrieved the list yet.
         */
-       if (llist_add((struct llist_node *)&page->mapping, &hpage_freelist))
+       if (llist_add((struct llist_node *)&folio->mapping, &hpage_freelist))
                schedule_work(&free_hpage_work);
  }
  
  static void update_and_free_pages_bulk(struct hstate *h, struct list_head *list)
  {
        struct page *page, *t_page;
+       struct folio *folio;
  
        list_for_each_entry_safe(page, t_page, list, lru) {
-               update_and_free_page(h, page, false);
+               folio = page_folio(page);
+               update_and_free_hugetlb_folio(h, folio, false);
                cond_resched();
        }
  }
@@@ -1703,21 -1709,22 +1709,22 @@@ void free_huge_page(struct page *page
         * Can't pass hstate in here because it is called from the
         * compound page destructor.
         */
-       struct hstate *h = page_hstate(page);
-       int nid = page_to_nid(page);
-       struct hugepage_subpool *spool = hugetlb_page_subpool(page);
+       struct folio *folio = page_folio(page);
+       struct hstate *h = folio_hstate(folio);
+       int nid = folio_nid(folio);
+       struct hugepage_subpool *spool = hugetlb_folio_subpool(folio);
        bool restore_reserve;
        unsigned long flags;
  
-       VM_BUG_ON_PAGE(page_count(page), page);
-       VM_BUG_ON_PAGE(page_mapcount(page), page);
+       VM_BUG_ON_FOLIO(folio_ref_count(folio), folio);
+       VM_BUG_ON_FOLIO(folio_mapcount(folio), folio);
  
-       hugetlb_set_page_subpool(page, NULL);
-       if (PageAnon(page))
-               __ClearPageAnonExclusive(page);
-       page->mapping = NULL;
-       restore_reserve = HPageRestoreReserve(page);
-       ClearHPageRestoreReserve(page);
+       hugetlb_set_folio_subpool(folio, NULL);
+       if (folio_test_anon(folio))
+               __ClearPageAnonExclusive(&folio->page);
+       folio->mapping = NULL;
+       restore_reserve = folio_test_hugetlb_restore_reserve(folio);
+       folio_clear_hugetlb_restore_reserve(folio);
  
        /*
         * If HPageRestoreReserve was set on page, page allocation consumed a
        }
  
        spin_lock_irqsave(&hugetlb_lock, flags);
-       ClearHPageMigratable(page);
-       hugetlb_cgroup_uncharge_page(hstate_index(h),
-                                    pages_per_huge_page(h), page);
-       hugetlb_cgroup_uncharge_page_rsvd(hstate_index(h),
-                                         pages_per_huge_page(h), page);
+       folio_clear_hugetlb_migratable(folio);
+       hugetlb_cgroup_uncharge_folio(hstate_index(h),
+                                    pages_per_huge_page(h), folio);
+       hugetlb_cgroup_uncharge_folio_rsvd(hstate_index(h),
+                                         pages_per_huge_page(h), folio);
        if (restore_reserve)
                h->resv_huge_pages++;
  
-       if (HPageTemporary(page)) {
-               remove_hugetlb_page(h, page, false);
+       if (folio_test_hugetlb_temporary(folio)) {
+               remove_hugetlb_folio(h, folio, false);
                spin_unlock_irqrestore(&hugetlb_lock, flags);
-               update_and_free_page(h, page, true);
+               update_and_free_hugetlb_folio(h, folio, true);
        } else if (h->surplus_huge_pages_node[nid]) {
                /* remove the page from active list */
-               remove_hugetlb_page(h, page, true);
+               remove_hugetlb_folio(h, folio, true);
                spin_unlock_irqrestore(&hugetlb_lock, flags);
-               update_and_free_page(h, page, true);
+               update_and_free_hugetlb_folio(h, folio, true);
        } else {
                arch_clear_hugepage_flags(page);
-               enqueue_huge_page(h, page);
+               enqueue_hugetlb_folio(h, folio);
                spin_unlock_irqrestore(&hugetlb_lock, flags);
        }
  }
@@@ -1773,37 -1780,37 +1780,37 @@@ static void __prep_account_new_huge_pag
        h->nr_huge_pages_node[nid]++;
  }
  
- static void __prep_new_huge_page(struct hstate *h, struct page *page)
+ static void __prep_new_hugetlb_folio(struct hstate *h, struct folio *folio)
  {
-       hugetlb_vmemmap_optimize(h, page);
-       INIT_LIST_HEAD(&page->lru);
-       set_compound_page_dtor(page, HUGETLB_PAGE_DTOR);
-       hugetlb_set_page_subpool(page, NULL);
-       set_hugetlb_cgroup(page, NULL);
-       set_hugetlb_cgroup_rsvd(page, NULL);
+       hugetlb_vmemmap_optimize(h, &folio->page);
+       INIT_LIST_HEAD(&folio->lru);
+       folio_set_compound_dtor(folio, HUGETLB_PAGE_DTOR);
+       hugetlb_set_folio_subpool(folio, NULL);
+       set_hugetlb_cgroup(folio, NULL);
+       set_hugetlb_cgroup_rsvd(folio, NULL);
  }
  
- static void prep_new_huge_page(struct hstate *h, struct page *page, int nid)
+ static void prep_new_hugetlb_folio(struct hstate *h, struct folio *folio, int nid)
  {
-       __prep_new_huge_page(h, page);
+       __prep_new_hugetlb_folio(h, folio);
        spin_lock_irq(&hugetlb_lock);
        __prep_account_new_huge_page(h, nid);
        spin_unlock_irq(&hugetlb_lock);
  }
  
- static bool __prep_compound_gigantic_page(struct page *page, unsigned int order,
-                                                               bool demote)
+ static bool __prep_compound_gigantic_folio(struct folio *folio,
+                                       unsigned int order, bool demote)
  {
        int i, j;
        int nr_pages = 1 << order;
        struct page *p;
  
-       /* we rely on prep_new_huge_page to set the destructor */
-       set_compound_order(page, order);
-       __ClearPageReserved(page);
-       __SetPageHead(page);
+       __folio_clear_reserved(folio);
+       __folio_set_head(folio);
+       /* we rely on prep_new_hugetlb_folio to set the destructor */
+       folio_set_compound_order(folio, order);
        for (i = 0; i < nr_pages; i++) {
-               p = nth_page(page, i);
+               p = folio_page(folio, i);
  
                /*
                 * For gigantic hugepages allocated through bootmem at
                        VM_BUG_ON_PAGE(page_count(p), p);
                }
                if (i != 0)
-                       set_compound_head(p, page);
+                       set_compound_head(p, &folio->page);
        }
-       atomic_set(compound_mapcount_ptr(page), -1);
-       atomic_set(compound_pincount_ptr(page), 0);
+       atomic_set(folio_mapcount_ptr(folio), -1);
+       atomic_set(folio_subpages_mapcount_ptr(folio), 0);
+       atomic_set(folio_pincount_ptr(folio), 0);
        return true;
  
  out_error:
        /* undo page modifications made above */
        for (j = 0; j < i; j++) {
-               p = nth_page(page, j);
+               p = folio_page(folio, j);
                if (j != 0)
                        clear_compound_head(p);
                set_page_refcounted(p);
        }
        /* need to clear PG_reserved on remaining tail pages  */
        for (; j < nr_pages; j++) {
-               p = nth_page(page, j);
+               p = folio_page(folio, j);
                __ClearPageReserved(p);
        }
-       set_compound_order(page, 0);
- #ifdef CONFIG_64BIT
-       page[1].compound_nr = 0;
- #endif
-       __ClearPageHead(page);
+       folio_set_compound_order(folio, 0);
+       __folio_clear_head(folio);
        return false;
  }
  
- static bool prep_compound_gigantic_page(struct page *page, unsigned int order)
+ static bool prep_compound_gigantic_folio(struct folio *folio,
+                                                       unsigned int order)
  {
-       return __prep_compound_gigantic_page(page, order, false);
+       return __prep_compound_gigantic_folio(folio, order, false);
  }
  
- static bool prep_compound_gigantic_page_for_demote(struct page *page,
+ static bool prep_compound_gigantic_folio_for_demote(struct folio *folio,
                                                        unsigned int order)
  {
-       return __prep_compound_gigantic_page(page, order, true);
+       return __prep_compound_gigantic_folio(folio, order, true);
  }
  
  /*
@@@ -1945,7 -1951,7 +1951,7 @@@ pgoff_t hugetlb_basepage_index(struct p
        return (index << compound_order(page_head)) + compound_idx;
  }
  
- static struct page *alloc_buddy_huge_page(struct hstate *h,
+ static struct folio *alloc_buddy_hugetlb_folio(struct hstate *h,
                gfp_t gfp_mask, int nid, nodemask_t *nmask,
                nodemask_t *node_alloc_noretry)
  {
@@@ -1983,11 -1989,6 +1989,6 @@@ retry
                page = NULL;
        }
  
-       if (page)
-               __count_vm_event(HTLB_BUDDY_PGALLOC);
-       else
-               __count_vm_event(HTLB_BUDDY_PGALLOC_FAIL);
        /*
         * If we did not specify __GFP_RETRY_MAYFAIL, but still got a page this
         * indicates an overall state change.  Clear bit so that we resume
        if (node_alloc_noretry && !page && alloc_try_hard)
                node_set(nid, *node_alloc_noretry);
  
-       return page;
+       if (!page) {
+               __count_vm_event(HTLB_BUDDY_PGALLOC_FAIL);
+               return NULL;
+       }
+       __count_vm_event(HTLB_BUDDY_PGALLOC);
+       return page_folio(page);
  }
  
  /*
   * Note that returned page is 'frozen':  ref count of head page and all tail
   * pages is zero.
   */
- static struct page *alloc_fresh_huge_page(struct hstate *h,
+ static struct folio *alloc_fresh_hugetlb_folio(struct hstate *h,
                gfp_t gfp_mask, int nid, nodemask_t *nmask,
                nodemask_t *node_alloc_noretry)
  {
-       struct page *page;
+       struct folio *folio;
        bool retry = false;
  
  retry:
        if (hstate_is_gigantic(h))
-               page = alloc_gigantic_page(h, gfp_mask, nid, nmask);
+               folio = alloc_gigantic_folio(h, gfp_mask, nid, nmask);
        else
-               page = alloc_buddy_huge_page(h, gfp_mask,
+               folio = alloc_buddy_hugetlb_folio(h, gfp_mask,
                                nid, nmask, node_alloc_noretry);
-       if (!page)
+       if (!folio)
                return NULL;
        if (hstate_is_gigantic(h)) {
-               if (!prep_compound_gigantic_page(page, huge_page_order(h))) {
+               if (!prep_compound_gigantic_folio(folio, huge_page_order(h))) {
                        /*
                         * Rare failure to convert pages to compound page.
                         * Free pages and try again - ONCE!
                         */
-                       free_gigantic_page(page, huge_page_order(h));
+                       free_gigantic_folio(folio, huge_page_order(h));
                        if (!retry) {
                                retry = true;
                                goto retry;
                        return NULL;
                }
        }
-       prep_new_huge_page(h, page, page_to_nid(page));
+       prep_new_hugetlb_folio(h, folio, folio_nid(folio));
  
-       return page;
+       return folio;
  }
  
  /*
  static int alloc_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed,
                                nodemask_t *node_alloc_noretry)
  {
-       struct page *page;
+       struct folio *folio;
        int nr_nodes, node;
        gfp_t gfp_mask = htlb_alloc_mask(h) | __GFP_THISNODE;
  
        for_each_node_mask_to_alloc(h, nr_nodes, node, nodes_allowed) {
-               page = alloc_fresh_huge_page(h, gfp_mask, node, nodes_allowed,
-                                               node_alloc_noretry);
-               if (page)
-                       break;
+               folio = alloc_fresh_hugetlb_folio(h, gfp_mask, node,
+                                       nodes_allowed, node_alloc_noretry);
+               if (folio) {
+                       free_huge_page(&folio->page); /* free it into the hugepage allocator */
+                       return 1;
+               }
        }
  
-       if (!page)
-               return 0;
-       free_huge_page(page); /* free it into the hugepage allocator */
-       return 1;
+       return 0;
  }
  
  /*
@@@ -2088,6 -2091,7 +2091,7 @@@ static struct page *remove_pool_huge_pa
  {
        int nr_nodes, node;
        struct page *page = NULL;
+       struct folio *folio;
  
        lockdep_assert_held(&hugetlb_lock);
        for_each_node_mask_to_free(h, nr_nodes, node, nodes_allowed) {
                    !list_empty(&h->hugepage_freelists[node])) {
                        page = list_entry(h->hugepage_freelists[node].next,
                                          struct page, lru);
-                       remove_hugetlb_page(h, page, acct_surplus);
+                       folio = page_folio(page);
+                       remove_hugetlb_folio(h, folio, acct_surplus);
                        break;
                }
        }
  int dissolve_free_huge_page(struct page *page)
  {
        int rc = -EBUSY;
+       struct folio *folio = page_folio(page);
  
  retry:
        /* Not to disrupt normal path by vainly holding hugetlb_lock */
-       if (!PageHuge(page))
+       if (!folio_test_hugetlb(folio))
                return 0;
  
        spin_lock_irq(&hugetlb_lock);
-       if (!PageHuge(page)) {
+       if (!folio_test_hugetlb(folio)) {
                rc = 0;
                goto out;
        }
  
-       if (!page_count(page)) {
-               struct page *head = compound_head(page);
-               struct hstate *h = page_hstate(head);
+       if (!folio_ref_count(folio)) {
+               struct hstate *h = folio_hstate(folio);
                if (!available_huge_pages(h))
                        goto out;
  
                 * We should make sure that the page is already on the free list
                 * when it is dissolved.
                 */
-               if (unlikely(!HPageFreed(head))) {
+               if (unlikely(!folio_test_hugetlb_freed(folio))) {
                        spin_unlock_irq(&hugetlb_lock);
                        cond_resched();
  
                        goto retry;
                }
  
-               remove_hugetlb_page(h, head, false);
+               remove_hugetlb_folio(h, folio, false);
                h->max_huge_pages--;
                spin_unlock_irq(&hugetlb_lock);
  
                /*
-                * Normally update_and_free_page will allocate required vmemmmap
-                * before freeing the page.  update_and_free_page will fail to
+                * Normally update_and_free_hugtlb_folio will allocate required vmemmmap
+                * before freeing the page.  update_and_free_hugtlb_folio will fail to
                 * free the page if it can not allocate required vmemmap.  We
                 * need to adjust max_huge_pages if the page is not freed.
                 * Attempt to allocate vmemmmap here so that we can take
                 * appropriate action on failure.
                 */
-               rc = hugetlb_vmemmap_restore(h, head);
+               rc = hugetlb_vmemmap_restore(h, &folio->page);
                if (!rc) {
-                       update_and_free_page(h, head, false);
+                       update_and_free_hugetlb_folio(h, folio, false);
                } else {
                        spin_lock_irq(&hugetlb_lock);
-                       add_hugetlb_page(h, head, false);
+                       add_hugetlb_folio(h, folio, false);
                        h->max_huge_pages++;
                        spin_unlock_irq(&hugetlb_lock);
                }
@@@ -2229,7 -2234,7 +2234,7 @@@ int dissolve_free_huge_pages(unsigned l
  static struct page *alloc_surplus_huge_page(struct hstate *h, gfp_t gfp_mask,
                                                int nid, nodemask_t *nmask)
  {
-       struct page *page = NULL;
+       struct folio *folio = NULL;
  
        if (hstate_is_gigantic(h))
                return NULL;
                goto out_unlock;
        spin_unlock_irq(&hugetlb_lock);
  
-       page = alloc_fresh_huge_page(h, gfp_mask, nid, nmask, NULL);
-       if (!page)
+       folio = alloc_fresh_hugetlb_folio(h, gfp_mask, nid, nmask, NULL);
+       if (!folio)
                return NULL;
  
        spin_lock_irq(&hugetlb_lock);
         * codeflow
         */
        if (h->surplus_huge_pages >= h->nr_overcommit_huge_pages) {
-               SetHPageTemporary(page);
+               folio_set_hugetlb_temporary(folio);
                spin_unlock_irq(&hugetlb_lock);
-               free_huge_page(page);
+               free_huge_page(&folio->page);
                return NULL;
        }
  
        h->surplus_huge_pages++;
-       h->surplus_huge_pages_node[page_to_nid(page)]++;
+       h->surplus_huge_pages_node[folio_nid(folio)]++;
  
  out_unlock:
        spin_unlock_irq(&hugetlb_lock);
  
-       return page;
+       return &folio->page;
  }
  
  static struct page *alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask,
                                     int nid, nodemask_t *nmask)
  {
-       struct page *page;
+       struct folio *folio;
  
        if (hstate_is_gigantic(h))
                return NULL;
  
-       page = alloc_fresh_huge_page(h, gfp_mask, nid, nmask, NULL);
-       if (!page)
+       folio = alloc_fresh_hugetlb_folio(h, gfp_mask, nid, nmask, NULL);
+       if (!folio)
                return NULL;
  
        /* fresh huge pages are frozen */
-       set_page_refcounted(page);
+       folio_ref_unfreeze(folio, 1);
        /*
         * We do not account these pages as surplus because they are only
         * temporary and will be released properly on the last reference
         */
-       SetHPageTemporary(page);
+       folio_set_hugetlb_temporary(folio);
  
-       return page;
+       return &folio->page;
  }
  
  /*
@@@ -2430,7 -2434,7 +2434,7 @@@ retry
                if ((--needed) < 0)
                        break;
                /* Add the page to the hugetlb allocator */
-               enqueue_huge_page(h, page);
+               enqueue_hugetlb_folio(h, page_folio(page));
        }
  free:
        spin_unlock_irq(&hugetlb_lock);
@@@ -2737,51 -2741,52 +2741,52 @@@ void restore_reserve_on_error(struct hs
  }
  
  /*
-  * alloc_and_dissolve_huge_page - Allocate a new page and dissolve the old one
+  * alloc_and_dissolve_hugetlb_folio - Allocate a new folio and dissolve
+  * the old one
   * @h: struct hstate old page belongs to
-  * @old_page: Old page to dissolve
+  * @old_folio: Old folio to dissolve
   * @list: List to isolate the page in case we need to
   * Returns 0 on success, otherwise negated error.
   */
- static int alloc_and_dissolve_huge_page(struct hstate *h, struct page *old_page,
-                                       struct list_head *list)
+ static int alloc_and_dissolve_hugetlb_folio(struct hstate *h,
+                       struct folio *old_folio, struct list_head *list)
  {
        gfp_t gfp_mask = htlb_alloc_mask(h) | __GFP_THISNODE;
-       int nid = page_to_nid(old_page);
-       struct page *new_page;
+       int nid = folio_nid(old_folio);
+       struct folio *new_folio;
        int ret = 0;
  
        /*
-        * Before dissolving the page, we need to allocate a new one for the
-        * pool to remain stable.  Here, we allocate the page and 'prep' it
+        * Before dissolving the folio, we need to allocate a new one for the
+        * pool to remain stable.  Here, we allocate the folio and 'prep' it
         * by doing everything but actually updating counters and adding to
         * the pool.  This simplifies and let us do most of the processing
         * under the lock.
         */
-       new_page = alloc_buddy_huge_page(h, gfp_mask, nid, NULL, NULL);
-       if (!new_page)
+       new_folio = alloc_buddy_hugetlb_folio(h, gfp_mask, nid, NULL, NULL);
+       if (!new_folio)
                return -ENOMEM;
-       __prep_new_huge_page(h, new_page);
+       __prep_new_hugetlb_folio(h, new_folio);
  
  retry:
        spin_lock_irq(&hugetlb_lock);
-       if (!PageHuge(old_page)) {
+       if (!folio_test_hugetlb(old_folio)) {
                /*
-                * Freed from under us. Drop new_page too.
+                * Freed from under us. Drop new_folio too.
                 */
                goto free_new;
-       } else if (page_count(old_page)) {
+       } else if (folio_ref_count(old_folio)) {
                /*
-                * Someone has grabbed the page, try to isolate it here.
+                * Someone has grabbed the folio, try to isolate it here.
                 * Fail with -EBUSY if not possible.
                 */
                spin_unlock_irq(&hugetlb_lock);
-               ret = isolate_hugetlb(old_page, list);
+               ret = isolate_hugetlb(&old_folio->page, list);
                spin_lock_irq(&hugetlb_lock);
                goto free_new;
-       } else if (!HPageFreed(old_page)) {
+       } else if (!folio_test_hugetlb_freed(old_folio)) {
                /*
-                * Page's refcount is 0 but it has not been enqueued in the
+                * Folio's refcount is 0 but it has not been enqueued in the
                 * freelist yet. Race window is small, so we can succeed here if
                 * we retry.
                 */
                goto retry;
        } else {
                /*
-                * Ok, old_page is still a genuine free hugepage. Remove it from
+                * Ok, old_folio is still a genuine free hugepage. Remove it from
                 * the freelist and decrease the counters. These will be
                 * incremented again when calling __prep_account_new_huge_page()
-                * and enqueue_huge_page() for new_page. The counters will remain
-                * stable since this happens under the lock.
+                * and enqueue_hugetlb_folio() for new_folio. The counters will
+                * remain stable since this happens under the lock.
                 */
-               remove_hugetlb_page(h, old_page, false);
+               remove_hugetlb_folio(h, old_folio, false);
  
                /*
-                * Ref count on new page is already zero as it was dropped
+                * Ref count on new_folio is already zero as it was dropped
                 * earlier.  It can be directly added to the pool free list.
                 */
                __prep_account_new_huge_page(h, nid);
-               enqueue_huge_page(h, new_page);
+               enqueue_hugetlb_folio(h, new_folio);
  
                /*
-                * Pages have been replaced, we can safely free the old one.
+                * Folio has been replaced, we can safely free the old one.
                 */
                spin_unlock_irq(&hugetlb_lock);
-               update_and_free_page(h, old_page, false);
+               update_and_free_hugetlb_folio(h, old_folio, false);
        }
  
        return ret;
  
  free_new:
        spin_unlock_irq(&hugetlb_lock);
-       /* Page has a zero ref count, but needs a ref to be freed */
-       set_page_refcounted(new_page);
-       update_and_free_page(h, new_page, false);
+       /* Folio has a zero ref count, but needs a ref to be freed */
+       folio_ref_unfreeze(new_folio, 1);
+       update_and_free_hugetlb_folio(h, new_folio, false);
  
        return ret;
  }
  int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list)
  {
        struct hstate *h;
-       struct page *head;
+       struct folio *folio = page_folio(page);
        int ret = -EBUSY;
  
        /*
         * Return success when racing as if we dissolved the page ourselves.
         */
        spin_lock_irq(&hugetlb_lock);
-       if (PageHuge(page)) {
-               head = compound_head(page);
-               h = page_hstate(head);
+       if (folio_test_hugetlb(folio)) {
+               h = folio_hstate(folio);
        } else {
                spin_unlock_irq(&hugetlb_lock);
                return 0;
        if (hstate_is_gigantic(h))
                return -ENOMEM;
  
-       if (page_count(head) && !isolate_hugetlb(head, list))
+       if (folio_ref_count(folio) && !isolate_hugetlb(&folio->page, list))
                ret = 0;
-       else if (!page_count(head))
-               ret = alloc_and_dissolve_huge_page(h, head, list);
+       else if (!folio_ref_count(folio))
+               ret = alloc_and_dissolve_hugetlb_folio(h, folio, list);
  
        return ret;
  }
@@@ -2866,6 -2870,7 +2870,7 @@@ struct page *alloc_huge_page(struct vm_
        struct hugepage_subpool *spool = subpool_vma(vma);
        struct hstate *h = hstate_vma(vma);
        struct page *page;
+       struct folio *folio;
        long map_chg, map_commit;
        long gbl_chg;
        int ret, idx;
                set_page_refcounted(page);
                /* Fall through */
        }
+       folio = page_folio(page);
        hugetlb_cgroup_commit_charge(idx, pages_per_huge_page(h), h_cg, page);
        /* If allocation is not consuming a reservation, also store the
         * hugetlb_cgroup pointer on the page.
                rsv_adjust = hugepage_subpool_put_pages(spool, 1);
                hugetlb_acct_memory(h, -rsv_adjust);
                if (deferred_reserve)
-                       hugetlb_cgroup_uncharge_page_rsvd(hstate_index(h),
-                                       pages_per_huge_page(h), page);
+                       hugetlb_cgroup_uncharge_folio_rsvd(hstate_index(h),
+                                       pages_per_huge_page(h), folio);
        }
        return page;
  
@@@ -3038,17 -3044,18 +3044,18 @@@ static void __init gather_bootmem_preal
  
        list_for_each_entry(m, &huge_boot_pages, list) {
                struct page *page = virt_to_page(m);
+               struct folio *folio = page_folio(page);
                struct hstate *h = m->hstate;
  
                VM_BUG_ON(!hstate_is_gigantic(h));
-               WARN_ON(page_count(page) != 1);
-               if (prep_compound_gigantic_page(page, huge_page_order(h))) {
-                       WARN_ON(PageReserved(page));
-                       prep_new_huge_page(h, page, page_to_nid(page));
+               WARN_ON(folio_ref_count(folio) != 1);
+               if (prep_compound_gigantic_folio(folio, huge_page_order(h))) {
+                       WARN_ON(folio_test_reserved(folio));
+                       prep_new_hugetlb_folio(h, folio, folio_nid(folio));
                        free_huge_page(page); /* add to the hugepage allocator */
                } else {
                        /* VERY unlikely inflated ref count on a tail page */
-                       free_gigantic_page(page, huge_page_order(h));
+                       free_gigantic_folio(folio, huge_page_order(h));
                }
  
                /*
@@@ -3070,14 -3077,14 +3077,14 @@@ static void __init hugetlb_hstate_alloc
                        if (!alloc_bootmem_huge_page(h, nid))
                                break;
                } else {
-                       struct page *page;
+                       struct folio *folio;
                        gfp_t gfp_mask = htlb_alloc_mask(h) | __GFP_THISNODE;
  
-                       page = alloc_fresh_huge_page(h, gfp_mask, nid,
+                       folio = alloc_fresh_hugetlb_folio(h, gfp_mask, nid,
                                        &node_states[N_MEMORY], NULL);
-                       if (!page)
+                       if (!folio)
                                break;
-                       free_huge_page(page); /* free it into the hugepage allocator */
+                       free_huge_page(&folio->page); /* free it into the hugepage allocator */
                }
                cond_resched();
        }
@@@ -3222,7 -3229,7 +3229,7 @@@ static void try_to_free_low(struct hsta
                                goto out;
                        if (PageHighMem(page))
                                continue;
-                       remove_hugetlb_page(h, page, false);
+                       remove_hugetlb_folio(h, page_folio(page), false);
                        list_add(&page->lru, &page_list);
                }
        }
@@@ -3427,12 -3434,13 +3434,13 @@@ static int demote_free_huge_page(struc
  {
        int i, nid = page_to_nid(page);
        struct hstate *target_hstate;
+       struct folio *folio = page_folio(page);
        struct page *subpage;
        int rc = 0;
  
        target_hstate = size_to_hstate(PAGE_SIZE << h->demote_order);
  
-       remove_hugetlb_page_for_demote(h, page, false);
+       remove_hugetlb_folio_for_demote(h, folio, false);
        spin_unlock_irq(&hugetlb_lock);
  
        rc = hugetlb_vmemmap_restore(h, page);
                /* Allocation of vmemmmap failed, we can not demote page */
                spin_lock_irq(&hugetlb_lock);
                set_page_refcounted(page);
-               add_hugetlb_page(h, page, false);
+               add_hugetlb_folio(h, page_folio(page), false);
                return rc;
        }
  
        /*
-        * Use destroy_compound_hugetlb_page_for_demote for all huge page
+        * Use destroy_compound_hugetlb_folio_for_demote for all huge page
         * sizes as it will not ref count pages.
         */
-       destroy_compound_hugetlb_page_for_demote(page, huge_page_order(h));
+       destroy_compound_hugetlb_folio_for_demote(folio, huge_page_order(h));
  
        /*
         * Taking target hstate mutex synchronizes with set_max_huge_pages.
        for (i = 0; i < pages_per_huge_page(h);
                                i += pages_per_huge_page(target_hstate)) {
                subpage = nth_page(page, i);
+               folio = page_folio(subpage);
                if (hstate_is_gigantic(target_hstate))
-                       prep_compound_gigantic_page_for_demote(subpage,
+                       prep_compound_gigantic_folio_for_demote(folio,
                                                        target_hstate->order);
                else
                        prep_compound_page(subpage, target_hstate->order);
                set_page_private(subpage, 0);
-               prep_new_huge_page(target_hstate, subpage, nid);
+               prep_new_hugetlb_folio(target_hstate, folio, nid);
                free_huge_page(subpage);
        }
        mutex_unlock(&target_hstate->resize_lock);
@@@ -4777,7 -4786,6 +4786,6 @@@ hugetlb_install_page(struct vm_area_str
        hugepage_add_new_anon_rmap(new_page, vma, addr);
        set_huge_pte_at(vma->vm_mm, addr, ptep, make_huge_pte(vma, new_page, 1));
        hugetlb_count_add(pages_per_huge_page(hstate_vma(vma)), vma->vm_mm);
-       ClearHPageRestoreReserve(new_page);
        SetHPageMigratable(new_page);
  }
  
@@@ -5066,7 -5074,6 +5074,6 @@@ static void __unmap_hugepage_range(stru
        struct page *page;
        struct hstate *h = hstate_vma(vma);
        unsigned long sz = huge_page_size(h);
-       struct mmu_notifier_range range;
        unsigned long last_addr_mask;
        bool force_flush = false;
  
        tlb_change_page_size(tlb, sz);
        tlb_start_vma(tlb, vma);
  
-       /*
-        * If sharing possible, alert mmu notifiers of worst case.
-        */
-       mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, mm, start,
-                               end);
-       adjust_range_if_pmd_sharing_possible(vma, &range.start, &range.end);
-       mmu_notifier_invalidate_range_start(&range);
        last_addr_mask = hugetlb_mask_last_page(h);
        address = start;
        for (; address < end; address += sz) {
                 * unmapped and its refcount is dropped, so just clear pte here.
                 */
                if (unlikely(!pte_present(pte))) {
- #ifdef CONFIG_PTE_MARKER_UFFD_WP
                        /*
                         * If the pte was wr-protected by uffd-wp in any of the
                         * swap forms, meanwhile the caller does not want to
                                set_huge_pte_at(mm, address, ptep,
                                                make_pte_marker(PTE_MARKER_UFFD_WP));
                        else
- #endif
                                huge_pte_clear(mm, address, ptep, sz);
                        spin_unlock(ptl);
                        continue;
                tlb_remove_huge_tlb_entry(h, tlb, ptep, address);
                if (huge_pte_dirty(pte))
                        set_page_dirty(page);
- #ifdef CONFIG_PTE_MARKER_UFFD_WP
                /* Leave a uffd-wp pte marker if needed */
                if (huge_pte_uffd_wp(pte) &&
                    !(zap_flags & ZAP_FLAG_DROP_MARKER))
                        set_huge_pte_at(mm, address, ptep,
                                        make_pte_marker(PTE_MARKER_UFFD_WP));
- #endif
                hugetlb_count_sub(pages_per_huge_page(h), mm);
                page_remove_rmap(page, vma, true);
  
                if (ref_page)
                        break;
        }
-       mmu_notifier_invalidate_range_end(&range);
        tlb_end_vma(tlb, vma);
  
        /*
@@@ -5204,6 -5199,7 +5199,7 @@@ void __unmap_hugepage_range_final(struc
        hugetlb_vma_lock_write(vma);
        i_mmap_lock_write(vma->vm_file->f_mapping);
  
+       /* mmu notification performed in caller */
        __unmap_hugepage_range(tlb, vma, start, end, ref_page, zap_flags);
  
        if (zap_flags & ZAP_FLAG_UNMAP) {       /* final unmap */
@@@ -5228,10 -5224,18 +5224,18 @@@ void unmap_hugepage_range(struct vm_are
                          unsigned long end, struct page *ref_page,
                          zap_flags_t zap_flags)
  {
+       struct mmu_notifier_range range;
        struct mmu_gather tlb;
  
+       mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
+                               start, end);
+       adjust_range_if_pmd_sharing_possible(vma, &range.start, &range.end);
+       mmu_notifier_invalidate_range_start(&range);
        tlb_gather_mmu(&tlb, vma->vm_mm);
        __unmap_hugepage_range(&tlb, vma, start, end, ref_page, zap_flags);
+       mmu_notifier_invalidate_range_end(&range);
        tlb_finish_mmu(&tlb);
  }
  
@@@ -5310,9 -5314,6 +5314,6 @@@ static vm_fault_t hugetlb_wp(struct mm_
        unsigned long haddr = address & huge_page_mask(h);
        struct mmu_notifier_range range;
  
-       VM_BUG_ON(unshare && (flags & FOLL_WRITE));
-       VM_BUG_ON(!unshare && !(flags & FOLL_WRITE));
        /*
         * hugetlb does not support FOLL_FORCE-style write faults that keep the
         * PTE mapped R/O such as maybe_mkwrite() would do.
  
        /* Let's take out MAP_SHARED mappings first. */
        if (vma->vm_flags & VM_MAYSHARE) {
-               if (unlikely(unshare))
-                       return 0;
                set_huge_ptep_writable(vma, haddr, ptep);
                return 0;
        }
@@@ -5445,8 -5444,6 +5444,6 @@@ retry_avoidcopy
        spin_lock(ptl);
        ptep = huge_pte_offset(mm, haddr, huge_page_size(h));
        if (likely(ptep && pte_same(huge_ptep_get(ptep), pte))) {
-               ClearHPageRestoreReserve(new_page);
                /* Break COW or unshare */
                huge_ptep_clear_flush(vma, haddr, ptep);
                mmu_notifier_invalidate_range(mm, range.start, range.end);
@@@ -5741,10 -5738,9 +5738,9 @@@ static vm_fault_t hugetlb_no_page(struc
        if (!pte_same(huge_ptep_get(ptep), old_pte))
                goto backout;
  
-       if (anon_rmap) {
-               ClearHPageRestoreReserve(page);
+       if (anon_rmap)
                hugepage_add_new_anon_rmap(page, vma, haddr);
-       else
+       else
                page_dup_file_rmap(page, true);
        new_pte = make_huge_pte(vma, page, ((vma->vm_flags & VM_WRITE)
                                && (vma->vm_flags & VM_SHARED)));
@@@ -6131,12 -6127,10 +6127,10 @@@ int hugetlb_mcopy_atomic_pte(struct mm_
        if (!huge_pte_none_mostly(huge_ptep_get(dst_pte)))
                goto out_release_unlock;
  
-       if (page_in_pagecache) {
+       if (page_in_pagecache)
                page_dup_file_rmap(page, true);
-       } else {
-               ClearHPageRestoreReserve(page);
+       else
                hugepage_add_new_anon_rmap(page, dst_vma, dst_addr);
-       }
  
        /*
         * For either: (1) CONTINUE on a non-shared VMA, or (2) UFFDIO_COPY
@@@ -6201,7 -6195,8 +6195,8 @@@ static void record_subpages_vmas(struc
        }
  }
  
- static inline bool __follow_hugetlb_must_fault(unsigned int flags, pte_t *pte,
+ static inline bool __follow_hugetlb_must_fault(struct vm_area_struct *vma,
+                                              unsigned int flags, pte_t *pte,
                                               bool *unshare)
  {
        pte_t pteval = huge_ptep_get(pte);
                return false;
        if (flags & FOLL_WRITE)
                return true;
-       if (gup_must_unshare(flags, pte_page(pteval))) {
+       if (gup_must_unshare(vma, flags, pte_page(pteval))) {
                *unshare = true;
                return true;
        }
        return false;
  }
  
 -               * try_grab_page() should always succeed here, because we hold
 -               * the ptl lock and have verified pte_present().
+ struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma,
+                               unsigned long address, unsigned int flags)
+ {
+       struct hstate *h = hstate_vma(vma);
+       struct mm_struct *mm = vma->vm_mm;
+       unsigned long haddr = address & huge_page_mask(h);
+       struct page *page = NULL;
+       spinlock_t *ptl;
+       pte_t *pte, entry;
+       /*
+        * FOLL_PIN is not supported for follow_page(). Ordinary GUP goes via
+        * follow_hugetlb_page().
+        */
+       if (WARN_ON_ONCE(flags & FOLL_PIN))
+               return NULL;
+ retry:
+       pte = huge_pte_offset(mm, haddr, huge_page_size(h));
+       if (!pte)
+               return NULL;
+       ptl = huge_pte_lock(h, mm, pte);
+       entry = huge_ptep_get(pte);
+       if (pte_present(entry)) {
+               page = pte_page(entry) +
+                               ((address & ~huge_page_mask(h)) >> PAGE_SHIFT);
+               /*
+                * Note that page may be a sub-page, and with vmemmap
+                * optimizations the page struct may be read only.
+                * try_grab_page() will increase the ref count on the
+                * head page, so this will be OK.
+                *
 -              if (WARN_ON_ONCE(!try_grab_page(page, flags))) {
++               * try_grab_page() should always be able to get the page here,
++               * because we hold the ptl lock and have verified pte_present().
+                */
++              if (try_grab_page(page, flags)) {
+                       page = NULL;
+                       goto out;
+               }
+       } else {
+               if (is_hugetlb_entry_migration(entry)) {
+                       spin_unlock(ptl);
+                       __migration_entry_wait_huge(pte, ptl);
+                       goto retry;
+               }
+               /*
+                * hwpoisoned entry is treated as no_page_table in
+                * follow_page_mask().
+                */
+       }
+ out:
+       spin_unlock(ptl);
+       return page;
+ }
  long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
                         struct page **pages, struct vm_area_struct **vmas,
                         unsigned long *position, unsigned long *nr_pages,
                 * directly from any kind of swap entries.
                 */
                if (absent ||
-                   __follow_hugetlb_must_fault(flags, pte, &unshare)) {
+                   __follow_hugetlb_must_fault(vma, flags, pte, &unshare)) {
                        vm_fault_t ret;
                        unsigned int fault_flags = 0;
  
                         * tables. If the huge page is present, then the tail
                         * pages must also be present. The ptl prevents the
                         * head page and tail pages from being rearranged in
 -                       * any way. So this page must be available at this
 -                       * point, unless the page refcount overflowed:
 +                       * any way. As this is hugetlb, the pages will never
 +                       * be p2pdma or not longterm pinable. So this page
 +                       * must be available at this point, unless the page
 +                       * refcount overflowed:
                         */
                        if (WARN_ON_ONCE(!try_grab_folio(pages[i], refs,
                                                         flags))) {
@@@ -7214,123 -7263,6 +7265,6 @@@ __weak unsigned long hugetlb_mask_last_
   * These functions are overwritable if your architecture needs its own
   * behavior.
   */
- struct page * __weak
- follow_huge_addr(struct mm_struct *mm, unsigned long address,
-                             int write)
- {
-       return ERR_PTR(-EINVAL);
- }
- struct page * __weak
- follow_huge_pd(struct vm_area_struct *vma,
-              unsigned long address, hugepd_t hpd, int flags, int pdshift)
- {
-       WARN(1, "hugepd follow called with no support for hugepage directory format\n");
-       return NULL;
- }
- struct page * __weak
- follow_huge_pmd_pte(struct vm_area_struct *vma, unsigned long address, int flags)
- {
-       struct hstate *h = hstate_vma(vma);
-       struct mm_struct *mm = vma->vm_mm;
-       struct page *page = NULL;
-       spinlock_t *ptl;
-       pte_t *ptep, pte;
-       /*
-        * FOLL_PIN is not supported for follow_page(). Ordinary GUP goes via
-        * follow_hugetlb_page().
-        */
-       if (WARN_ON_ONCE(flags & FOLL_PIN))
-               return NULL;
- retry:
-       ptep = huge_pte_offset(mm, address, huge_page_size(h));
-       if (!ptep)
-               return NULL;
-       ptl = huge_pte_lock(h, mm, ptep);
-       pte = huge_ptep_get(ptep);
-       if (pte_present(pte)) {
-               page = pte_page(pte) +
-                       ((address & ~huge_page_mask(h)) >> PAGE_SHIFT);
-               /*
-                * try_grab_page() should always be able to get the page here,
-                * because: a) we hold the pmd (ptl) lock, and b) we've just
-                * checked that the huge pmd (head) page is present in the
-                * page tables. The ptl prevents the head page and tail pages
-                * from being rearranged in any way. So this page must be
-                * available at this point, unless the page refcount
-                * overflowed:
-                */
-               if (try_grab_page(page, flags)) {
-                       page = NULL;
-                       goto out;
-               }
-       } else {
-               if (is_hugetlb_entry_migration(pte)) {
-                       spin_unlock(ptl);
-                       __migration_entry_wait_huge(ptep, ptl);
-                       goto retry;
-               }
-               /*
-                * hwpoisoned entry is treated as no_page_table in
-                * follow_page_mask().
-                */
-       }
- out:
-       spin_unlock(ptl);
-       return page;
- }
- struct page * __weak
- follow_huge_pud(struct mm_struct *mm, unsigned long address,
-               pud_t *pud, int flags)
- {
-       struct page *page = NULL;
-       spinlock_t *ptl;
-       pte_t pte;
-       if (WARN_ON_ONCE(flags & FOLL_PIN))
-               return NULL;
- retry:
-       ptl = huge_pte_lock(hstate_sizelog(PUD_SHIFT), mm, (pte_t *)pud);
-       if (!pud_huge(*pud))
-               goto out;
-       pte = huge_ptep_get((pte_t *)pud);
-       if (pte_present(pte)) {
-               page = pud_page(*pud) + ((address & ~PUD_MASK) >> PAGE_SHIFT);
-               if (try_grab_page(page, flags)) {
-                       page = NULL;
-                       goto out;
-               }
-       } else {
-               if (is_hugetlb_entry_migration(pte)) {
-                       spin_unlock(ptl);
-                       __migration_entry_wait(mm, (pte_t *)pud, ptl);
-                       goto retry;
-               }
-               /*
-                * hwpoisoned entry is treated as no_page_table in
-                * follow_page_mask().
-                */
-       }
- out:
-       spin_unlock(ptl);
-       return page;
- }
- struct page * __weak
- follow_huge_pgd(struct mm_struct *mm, unsigned long address, pgd_t *pgd, int flags)
- {
-       if (flags & (FOLL_GET | FOLL_PIN))
-               return NULL;
-       return pte_page(*(pte_t *)pgd) + ((address & ~PGDIR_MASK) >> PAGE_SHIFT);
- }
  int isolate_hugetlb(struct page *page, struct list_head *list)
  {
        int ret = 0;
@@@ -7349,7 -7281,7 +7283,7 @@@ unlock
        return ret;
  }
  
- int get_hwpoison_huge_page(struct page *page, bool *hugetlb)
+ int get_hwpoison_huge_page(struct page *page, bool *hugetlb, bool unpoison)
  {
        int ret = 0;
  
                *hugetlb = true;
                if (HPageFreed(page))
                        ret = 0;
-               else if (HPageMigratable(page))
+               else if (HPageMigratable(page) || unpoison)
                        ret = get_page_unless_zero(page);
                else
                        ret = -EBUSY;
        return ret;
  }
  
- int get_huge_page_for_hwpoison(unsigned long pfn, int flags)
+ int get_huge_page_for_hwpoison(unsigned long pfn, int flags,
+                               bool *migratable_cleared)
  {
        int ret;
  
        spin_lock_irq(&hugetlb_lock);
-       ret = __get_huge_page_for_hwpoison(pfn, flags);
+       ret = __get_huge_page_for_hwpoison(pfn, flags, migratable_cleared);
        spin_unlock_irq(&hugetlb_lock);
        return ret;
  }
@@@ -7387,15 -7320,15 +7322,15 @@@ void putback_active_hugepage(struct pag
        put_page(page);
  }
  
- void move_hugetlb_state(struct page *oldpage, struct page *newpage, int reason)
+ void move_hugetlb_state(struct folio *old_folio, struct folio *new_folio, int reason)
  {
-       struct hstate *h = page_hstate(oldpage);
+       struct hstate *h = folio_hstate(old_folio);
  
-       hugetlb_cgroup_migrate(oldpage, newpage);
-       set_page_owner_migrate_reason(newpage, reason);
+       hugetlb_cgroup_migrate(old_folio, new_folio);
+       set_page_owner_migrate_reason(&new_folio->page, reason);
  
        /*
-        * transfer temporary state of the new huge page. This is
+        * transfer temporary state of the new hugetlb folio. This is
         * reverse to other transitions because the newpage is going to
         * be final while the old one will be freed so it takes over
         * the temporary status.
         * here as well otherwise the global surplus count will not match
         * the per-node's.
         */
-       if (HPageTemporary(newpage)) {
-               int old_nid = page_to_nid(oldpage);
-               int new_nid = page_to_nid(newpage);
+       if (folio_test_hugetlb_temporary(new_folio)) {
+               int old_nid = folio_nid(old_folio);
+               int new_nid = folio_nid(new_folio);
+               folio_set_hugetlb_temporary(old_folio);
+               folio_clear_hugetlb_temporary(new_folio);
  
-               SetHPageTemporary(oldpage);
-               ClearHPageTemporary(newpage);
  
                /*
                 * There is no need to transfer the per-node surplus state
diff --combined mm/kasan/kasan_test.c
index 54181eba3e2420703e70a4d6a765690525565934,9aa892e7b76c3f35fb9a8a66d0e06fdd3e2479f7..d1439669d6bc57856607d2d928423c9804e7ec4f
@@@ -5,8 -5,12 +5,12 @@@
   * Author: Andrey Ryabinin <[email protected]>
   */
  
+ #define pr_fmt(fmt) "kasan_test: " fmt
+ #include <kunit/test.h>
  #include <linux/bitops.h>
  #include <linux/delay.h>
+ #include <linux/io.h>
  #include <linux/kasan.h>
  #include <linux/kernel.h>
  #include <linux/mm.h>
  #include <linux/module.h>
  #include <linux/printk.h>
  #include <linux/random.h>
+ #include <linux/set_memory.h>
  #include <linux/slab.h>
  #include <linux/string.h>
+ #include <linux/tracepoint.h>
  #include <linux/uaccess.h>
- #include <linux/io.h>
  #include <linux/vmalloc.h>
- #include <linux/set_memory.h>
+ #include <trace/events/printk.h>
  
  #include <asm/page.h>
  
- #include <kunit/test.h>
  #include "kasan.h"
  
  #define OOB_TAG_OFF (IS_ENABLED(CONFIG_KASAN_GENERIC) ? 0 : KASAN_GRANULE_SIZE)
  
+ static bool multishot;
+ /* Fields set based on lines observed in the console. */
+ static struct {
+       bool report_found;
+       bool async_fault;
+ } test_status;
  /*
   * Some tests use these global variables to store return values from function
   * calls that could otherwise be eliminated by the compiler as dead code.
  void *kasan_ptr_result;
  int kasan_int_result;
  
- static struct kunit_resource resource;
- static struct kunit_kasan_status test_status;
- static bool multishot;
+ /* Probe for console output: obtains test_status lines of interest. */
+ static void probe_console(void *ignore, const char *buf, size_t len)
+ {
+       if (strnstr(buf, "BUG: KASAN: ", len))
+               WRITE_ONCE(test_status.report_found, true);
+       else if (strnstr(buf, "Asynchronous fault: ", len))
+               WRITE_ONCE(test_status.async_fault, true);
+ }
  
- /*
-  * Temporarily enable multi-shot mode. Otherwise, KASAN would only report the
-  * first detected bug and panic the kernel if panic_on_warn is enabled. For
-  * hardware tag-based KASAN also allow tag checking to be reenabled for each
-  * test, see the comment for KUNIT_EXPECT_KASAN_FAIL().
-  */
- static int kasan_test_init(struct kunit *test)
+ static void register_tracepoints(struct tracepoint *tp, void *ignore)
+ {
+       check_trace_callback_type_console(probe_console);
+       if (!strcmp(tp->name, "console"))
+               WARN_ON(tracepoint_probe_register(tp, probe_console, NULL));
+ }
+ static void unregister_tracepoints(struct tracepoint *tp, void *ignore)
+ {
+       if (!strcmp(tp->name, "console"))
+               tracepoint_probe_unregister(tp, probe_console, NULL);
+ }
+ static int kasan_suite_init(struct kunit_suite *suite)
  {
        if (!kasan_enabled()) {
-               kunit_err(test, "can't run KASAN tests with KASAN disabled");
+               pr_err("Can't run KASAN tests with KASAN disabled");
                return -1;
        }
  
+       /* Stop failing KUnit tests on KASAN reports. */
+       kasan_kunit_test_suite_start();
+       /*
+        * Temporarily enable multi-shot mode. Otherwise, KASAN would only
+        * report the first detected bug and panic the kernel if panic_on_warn
+        * is enabled.
+        */
        multishot = kasan_save_enable_multi_shot();
-       test_status.report_found = false;
-       test_status.sync_fault = false;
-       kunit_add_named_resource(test, NULL, NULL, &resource,
-                                       "kasan_status", &test_status);
+       /*
+        * Because we want to be able to build the test as a module, we need to
+        * iterate through all known tracepoints, since the static registration
+        * won't work here.
+        */
+       for_each_kernel_tracepoint(register_tracepoints, NULL);
        return 0;
  }
  
- static void kasan_test_exit(struct kunit *test)
+ static void kasan_suite_exit(struct kunit_suite *suite)
  {
+       kasan_kunit_test_suite_end();
        kasan_restore_multi_shot(multishot);
-       KUNIT_EXPECT_FALSE(test, test_status.report_found);
+       for_each_kernel_tracepoint(unregister_tracepoints, NULL);
+       tracepoint_synchronize_unregister();
+ }
+ static void kasan_test_exit(struct kunit *test)
+ {
+       KUNIT_EXPECT_FALSE(test, READ_ONCE(test_status.report_found));
  }
  
  /**
        if (IS_ENABLED(CONFIG_KASAN_HW_TAGS) &&                         \
            kasan_sync_fault_possible()) {                              \
                if (READ_ONCE(test_status.report_found) &&              \
-                   READ_ONCE(test_status.sync_fault))                  \
+                   !READ_ONCE(test_status.async_fault))                \
                        kasan_enable_tagging();                         \
                migrate_enable();                                       \
        }                                                               \
        WRITE_ONCE(test_status.report_found, false);                    \
+       WRITE_ONCE(test_status.async_fault, false);                     \
  } while (0)
  
  #define KASAN_TEST_NEEDS_CONFIG_ON(test, config) do {                 \
@@@ -1103,6 -1145,67 +1145,67 @@@ static void kmalloc_double_kzfree(struc
        KUNIT_EXPECT_KASAN_FAIL(test, kfree_sensitive(ptr));
  }
  
+ /*
+  * The two tests below check that Generic KASAN prints auxiliary stack traces
+  * for RCU callbacks and workqueues. The reports need to be inspected manually.
+  *
+  * These tests are still enabled for other KASAN modes to make sure that all
+  * modes report bad accesses in tested scenarios.
+  */
+ static struct kasan_rcu_info {
+       int i;
+       struct rcu_head rcu;
+ } *global_rcu_ptr;
+ static void rcu_uaf_reclaim(struct rcu_head *rp)
+ {
+       struct kasan_rcu_info *fp =
+               container_of(rp, struct kasan_rcu_info, rcu);
+       kfree(fp);
+       ((volatile struct kasan_rcu_info *)fp)->i;
+ }
+ static void rcu_uaf(struct kunit *test)
+ {
+       struct kasan_rcu_info *ptr;
+       ptr = kmalloc(sizeof(struct kasan_rcu_info), GFP_KERNEL);
+       KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
+       global_rcu_ptr = rcu_dereference_protected(
+                               (struct kasan_rcu_info __rcu *)ptr, NULL);
+       KUNIT_EXPECT_KASAN_FAIL(test,
+               call_rcu(&global_rcu_ptr->rcu, rcu_uaf_reclaim);
+               rcu_barrier());
+ }
+ static void workqueue_uaf_work(struct work_struct *work)
+ {
+       kfree(work);
+ }
+ static void workqueue_uaf(struct kunit *test)
+ {
+       struct workqueue_struct *workqueue;
+       struct work_struct *work;
+       workqueue = create_workqueue("kasan_workqueue_test");
+       KUNIT_ASSERT_NOT_ERR_OR_NULL(test, workqueue);
+       work = kmalloc(sizeof(struct work_struct), GFP_KERNEL);
+       KUNIT_ASSERT_NOT_ERR_OR_NULL(test, work);
+       INIT_WORK(work, workqueue_uaf_work);
+       queue_work(workqueue, work);
+       destroy_workqueue(workqueue);
+       KUNIT_EXPECT_KASAN_FAIL(test,
+               ((volatile struct work_struct *)work)->data);
+ }
  static void vmalloc_helpers_tags(struct kunit *test)
  {
        void *ptr;
@@@ -1299,7 -1402,7 +1402,7 @@@ static void match_all_not_assigned(stru
        KASAN_TEST_NEEDS_CONFIG_OFF(test, CONFIG_KASAN_GENERIC);
  
        for (i = 0; i < 256; i++) {
 -              size = prandom_u32_max(1024) + 1;
 +              size = get_random_u32_inclusive(1, 1024);
                ptr = kmalloc(size, GFP_KERNEL);
                KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
                KUNIT_EXPECT_GE(test, (u8)get_tag(ptr), (u8)KASAN_TAG_MIN);
        }
  
        for (i = 0; i < 256; i++) {
 -              order = prandom_u32_max(4) + 1;
 +              order = get_random_u32_inclusive(1, 4);
                pages = alloc_pages(GFP_KERNEL, order);
                ptr = page_address(pages);
                KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
                return;
  
        for (i = 0; i < 256; i++) {
 -              size = prandom_u32_max(1024) + 1;
 +              size = get_random_u32_inclusive(1, 1024);
                ptr = vmalloc(size);
                KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
                KUNIT_EXPECT_GE(test, (u8)get_tag(ptr), (u8)KASAN_TAG_MIN);
@@@ -1434,6 -1537,8 +1537,8 @@@ static struct kunit_case kasan_kunit_te
        KUNIT_CASE(kasan_bitops_generic),
        KUNIT_CASE(kasan_bitops_tags),
        KUNIT_CASE(kmalloc_double_kzfree),
+       KUNIT_CASE(rcu_uaf),
+       KUNIT_CASE(workqueue_uaf),
        KUNIT_CASE(vmalloc_helpers_tags),
        KUNIT_CASE(vmalloc_oob),
        KUNIT_CASE(vmap_tags),
  
  static struct kunit_suite kasan_kunit_test_suite = {
        .name = "kasan",
-       .init = kasan_test_init,
        .test_cases = kasan_kunit_test_cases,
        .exit = kasan_test_exit,
+       .suite_init = kasan_suite_init,
+       .suite_exit = kasan_suite_exit,
  };
  
  kunit_test_suite(kasan_kunit_test_suite);
diff --combined mm/kfence/core.c
index 6cbd93f2007b493aa1bd56777ff2e2abbb10e38e,08f5bd6fc36d6a11d11fe11343e15f591367bf93..5349c37a5dac9fc83f9a1eec0f12a9f6c6ab4b48
@@@ -26,7 -26,6 +26,6 @@@
  #include <linux/random.h>
  #include <linux/rcupdate.h>
  #include <linux/sched/clock.h>
- #include <linux/sched/sysctl.h>
  #include <linux/seq_file.h>
  #include <linux/slab.h>
  #include <linux/spinlock.h>
@@@ -360,9 -359,9 +359,9 @@@ static void *kfence_guarded_alloc(struc
        unsigned long flags;
        struct slab *slab;
        void *addr;
 -      const bool random_right_allocate = prandom_u32_max(2);
 +      const bool random_right_allocate = get_random_u32_below(2);
        const bool random_fault = CONFIG_KFENCE_STRESS_TEST_FAULTS &&
 -                                !prandom_u32_max(CONFIG_KFENCE_STRESS_TEST_FAULTS);
 +                                !get_random_u32_below(CONFIG_KFENCE_STRESS_TEST_FAULTS);
  
        /* Try to obtain a free object. */
        raw_spin_lock_irqsave(&kfence_freelist_lock, flags);
@@@ -799,16 -798,7 +798,7 @@@ static void toggle_allocation_gate(stru
        /* Enable static key, and await allocation to happen. */
        static_branch_enable(&kfence_allocation_key);
  
-       if (sysctl_hung_task_timeout_secs) {
-               /*
-                * During low activity with no allocations we might wait a
-                * while; let's avoid the hung task warning.
-                */
-               wait_event_idle_timeout(allocation_wait, atomic_read(&kfence_allocation_gate),
-                                       sysctl_hung_task_timeout_secs * HZ / 2);
-       } else {
-               wait_event_idle(allocation_wait, atomic_read(&kfence_allocation_gate));
-       }
+       wait_event_idle(allocation_wait, atomic_read(&kfence_allocation_gate));
  
        /* Disable static key and reset timer. */
        static_branch_disable(&kfence_allocation_key);
diff --combined mm/madvise.c
index d03e149ffe6e86af9aaf6d55d4bbf374d915c026,b7d9b1a1c135c2e16291ff2380e4ce9e0844d9a9..a56a6d17e201e3cecc3f2b8d66361db2ee5e91b0
@@@ -95,9 -95,6 +95,6 @@@ struct anon_vma_name *anon_vma_name(str
  {
        mmap_assert_locked(vma->vm_mm);
  
-       if (vma->vm_file)
-               return NULL;
        return vma->anon_name;
  }
  
@@@ -183,7 -180,7 +180,7 @@@ success
         * vm_flags is protected by the mmap_lock held in write mode.
         */
        vma->vm_flags = new_flags;
-       if (!vma->vm_file) {
+       if (!vma->vm_file || vma_is_anon_shmem(vma)) {
                error = replace_anon_vma_name(vma, anon_name);
                if (error)
                        return error;
@@@ -226,6 -223,7 +223,7 @@@ static int swapin_walk_pmd_entry(pmd_t 
                        put_page(page);
        }
        swap_read_unplug(splug);
+       cond_resched();
  
        return 0;
  }
@@@ -321,6 -319,21 +319,21 @@@ static long madvise_willneed(struct vm_
        return 0;
  }
  
+ static inline bool can_do_file_pageout(struct vm_area_struct *vma)
+ {
+       if (!vma->vm_file)
+               return false;
+       /*
+        * paging out pagecache only for non-anonymous mappings that correspond
+        * to the files the calling process could (if tried) open for writing;
+        * otherwise we'd be including shared non-exclusive mappings, which
+        * opens a side channel.
+        */
+       return inode_owner_or_capable(&init_user_ns,
+                                     file_inode(vma->vm_file)) ||
+              file_permission(vma->vm_file, MAY_WRITE) == 0;
+ }
  static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
                                unsigned long addr, unsigned long end,
                                struct mm_walk *walk)
        spinlock_t *ptl;
        struct page *page = NULL;
        LIST_HEAD(page_list);
+       bool pageout_anon_only_filter;
  
        if (fatal_signal_pending(current))
                return -EINTR;
  
+       pageout_anon_only_filter = pageout && !vma_is_anonymous(vma) &&
+                                       !can_do_file_pageout(vma);
  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
        if (pmd_trans_huge(*pmd)) {
                pmd_t orig_pmd;
                if (page_mapcount(page) != 1)
                        goto huge_unlock;
  
+               if (pageout_anon_only_filter && !PageAnon(page))
+                       goto huge_unlock;
                if (next - addr != HPAGE_PMD_SIZE) {
                        int err;
  
@@@ -432,6 -452,8 +452,8 @@@ regular_page
                if (PageTransCompound(page)) {
                        if (page_mapcount(page) != 1)
                                break;
+                       if (pageout_anon_only_filter && !PageAnon(page))
+                               break;
                        get_page(page);
                        if (!trylock_page(page)) {
                                put_page(page);
                if (!PageLRU(page) || page_mapcount(page) != 1)
                        continue;
  
+               if (pageout_anon_only_filter && !PageAnon(page))
+                       continue;
                VM_BUG_ON_PAGE(PageTransCompound(page), page);
  
                if (pte_young(ptent)) {
@@@ -553,23 -578,6 +578,6 @@@ static void madvise_pageout_page_range(
        tlb_end_vma(tlb, vma);
  }
  
- static inline bool can_do_pageout(struct vm_area_struct *vma)
- {
-       if (vma_is_anonymous(vma))
-               return true;
-       if (!vma->vm_file)
-               return false;
-       /*
-        * paging out pagecache only for non-anonymous mappings that correspond
-        * to the files the calling process could (if tried) open for writing;
-        * otherwise we'd be including shared non-exclusive mappings, which
-        * opens a side channel.
-        */
-       return inode_owner_or_capable(&init_user_ns,
-                                     file_inode(vma->vm_file)) ||
-              file_permission(vma->vm_file, MAY_WRITE) == 0;
- }
  static long madvise_pageout(struct vm_area_struct *vma,
                        struct vm_area_struct **prev,
                        unsigned long start_addr, unsigned long end_addr)
        if (!can_madv_lru_vma(vma))
                return -EINVAL;
  
-       if (!can_do_pageout(vma))
+       /*
+        * If the VMA belongs to a private file mapping, there can be private
+        * dirty pages which can be paged out if even this process is neither
+        * owner nor write capable of the file. We allow private file mappings
+        * further to pageout dirty anon pages.
+        */
+       if (!vma_is_anonymous(vma) && (!can_do_file_pageout(vma) &&
+                               (vma->vm_flags & VM_MAYSHARE)))
                return 0;
  
        lru_add_drain();
@@@ -1273,7 -1288,7 +1288,7 @@@ static int madvise_vma_anon_name(struc
        int error;
  
        /* Only anonymous mappings can be named */
-       if (vma->vm_file)
+       if (vma->vm_file && !vma_is_anon_shmem(vma))
                return -EBADF;
  
        error = madvise_update_vma(vma, prev, start, end, vma->vm_flags,
@@@ -1459,7 -1474,7 +1474,7 @@@ SYSCALL_DEFINE5(process_madvise, int, p
                goto out;
        }
  
 -      ret = import_iovec(READ, vec, vlen, ARRAY_SIZE(iovstack), &iov, &iter);
 +      ret = import_iovec(ITER_DEST, vec, vlen, ARRAY_SIZE(iovstack), &iov, &iter);
        if (ret < 0)
                goto out;
  
diff --combined mm/migrate.c
index 19ba91ba3d05d88d399f244da4b715bff9d37911,3be90351ad1d4928f9e1970318ee375daf229bf3..a4d3fc65085f3fe83a6135205a2c26174749e281
@@@ -74,22 -74,13 +74,22 @@@ int isolate_movable_page(struct page *p
        if (unlikely(!get_page_unless_zero(page)))
                goto out;
  
 +      if (unlikely(PageSlab(page)))
 +              goto out_putpage;
 +      /* Pairs with smp_wmb() in slab freeing, e.g. SLUB's __free_slab() */
 +      smp_rmb();
        /*
 -       * Check PageMovable before holding a PG_lock because page's owner
 -       * assumes anybody doesn't touch PG_lock of newly allocated page
 -       * so unconditionally grabbing the lock ruins page's owner side.
 +       * Check movable flag before taking the page lock because
 +       * we use non-atomic bitops on newly allocated page flags so
 +       * unconditionally grabbing the lock ruins page's owner side.
         */
        if (unlikely(!__PageMovable(page)))
                goto out_putpage;
 +      /* Pairs with smp_wmb() in slab allocation, e.g. SLUB's alloc_slab_page() */
 +      smp_rmb();
 +      if (unlikely(PageSlab(page)))
 +              goto out_putpage;
 +
        /*
         * As movable pages are not isolated from LRU lists, concurrent
         * compaction threads can race against page migration functions
@@@ -829,7 -820,6 +829,7 @@@ int buffer_migrate_folio_norefs(struct 
  {
        return __buffer_migrate_folio(mapping, dst, src, mode, true);
  }
 +EXPORT_SYMBOL_GPL(buffer_migrate_folio_norefs);
  #endif
  
  int filemap_migrate_folio(struct address_space *mapping,
  }
  
  /*
-  * Obtain the lock on page, remove all ptes and migrate the page
-  * to the newly allocated page in newpage.
+  * Obtain the lock on folio, remove all ptes and migrate the folio
+  * to the newly allocated folio in dst.
   */
  static int unmap_and_move(new_page_t get_new_page,
                                   free_page_t put_new_page,
-                                  unsigned long private, struct page *page,
+                                  unsigned long private, struct folio *src,
                                   int force, enum migrate_mode mode,
                                   enum migrate_reason reason,
                                   struct list_head *ret)
  {
-       struct folio *dst, *src = page_folio(page);
+       struct folio *dst;
        int rc = MIGRATEPAGE_SUCCESS;
        struct page *newpage = NULL;
  
-       if (!thp_migration_supported() && PageTransHuge(page))
+       if (!thp_migration_supported() && folio_test_transhuge(src))
                return -ENOSYS;
  
-       if (page_count(page) == 1) {
-               /* Page was freed from under us. So we are done. */
-               ClearPageActive(page);
-               ClearPageUnevictable(page);
+       if (folio_ref_count(src) == 1) {
+               /* Folio was freed from under us. So we are done. */
+               folio_clear_active(src);
+               folio_clear_unevictable(src);
                /* free_pages_prepare() will clear PG_isolated. */
                goto out;
        }
  
-       newpage = get_new_page(page, private);
+       newpage = get_new_page(&src->page, private);
        if (!newpage)
                return -ENOMEM;
        dst = page_folio(newpage);
  
-       newpage->private = 0;
+       dst->private = NULL;
        rc = __unmap_and_move(src, dst, force, mode);
        if (rc == MIGRATEPAGE_SUCCESS)
-               set_page_owner_migrate_reason(newpage, reason);
+               set_page_owner_migrate_reason(&dst->page, reason);
  
  out:
        if (rc != -EAGAIN) {
                /*
-                * A page that has been migrated has all references
-                * removed and will be freed. A page that has not been
+                * A folio that has been migrated has all references
+                * removed and will be freed. A folio that has not been
                 * migrated will have kept its references and be restored.
                 */
-               list_del(&page->lru);
+               list_del(&src->lru);
        }
  
        /*
         * If migration is successful, releases reference grabbed during
-        * isolation. Otherwise, restore the page to right list unless
+        * isolation. Otherwise, restore the folio to right list unless
         * we want to retry.
         */
        if (rc == MIGRATEPAGE_SUCCESS) {
                /*
-                * Compaction can migrate also non-LRU pages which are
+                * Compaction can migrate also non-LRU folios which are
                 * not accounted to NR_ISOLATED_*. They can be recognized
-                * as __PageMovable
+                * as __folio_test_movable
                 */
-               if (likely(!__PageMovable(page)))
-                       mod_node_page_state(page_pgdat(page), NR_ISOLATED_ANON +
-                                       page_is_file_lru(page), -thp_nr_pages(page));
+               if (likely(!__folio_test_movable(src)))
+                       mod_node_page_state(folio_pgdat(src), NR_ISOLATED_ANON +
+                                       folio_is_file_lru(src), -folio_nr_pages(src));
  
                if (reason != MR_MEMORY_FAILURE)
                        /*
-                        * We release the page in page_handle_poison.
+                        * We release the folio in page_handle_poison.
                         */
-                       put_page(page);
+                       folio_put(src);
        } else {
                if (rc != -EAGAIN)
-                       list_add_tail(&page->lru, ret);
+                       list_add_tail(&src->lru, ret);
  
                if (put_new_page)
-                       put_new_page(newpage, private);
+                       put_new_page(&dst->page, private);
                else
-                       put_page(newpage);
+                       folio_put(dst);
        }
  
        return rc;
@@@ -1308,7 -1298,7 +1308,7 @@@ static int unmap_and_move_huge_page(new
         * folio_mapping() set, hugetlbfs specific move page routine will not
         * be called and we could leak usage counts for subpools.
         */
-       if (hugetlb_page_subpool(hpage) && !folio_mapping(src)) {
+       if (hugetlb_folio_subpool(src) && !folio_mapping(src)) {
                rc = -EBUSY;
                goto out_unlock;
        }
@@@ -1358,7 -1348,7 +1358,7 @@@ put_anon
                put_anon_vma(anon_vma);
  
        if (rc == MIGRATEPAGE_SUCCESS) {
-               move_hugetlb_state(hpage, new_hpage, reason);
+               move_hugetlb_state(src, dst, reason);
                put_new_page = NULL;
        }
  
        return rc;
  }
  
- static inline int try_split_thp(struct page *page, struct list_head *split_pages)
+ static inline int try_split_folio(struct folio *folio, struct list_head *split_folios)
  {
        int rc;
  
-       lock_page(page);
-       rc = split_huge_page_to_list(page, split_pages);
-       unlock_page(page);
+       folio_lock(folio);
+       rc = split_folio_to_list(folio, split_folios);
+       folio_unlock(folio);
        if (!rc)
-               list_move_tail(&page->lru, split_pages);
+               list_move_tail(&folio->lru, split_folios);
  
        return rc;
  }
  
  /*
-  * migrate_pages - migrate the pages specified in a list, to the free pages
+  * migrate_pages - migrate the folios specified in a list, to the free folios
   *               supplied as the target for the page migration
   *
-  * @from:             The list of pages to be migrated.
-  * @get_new_page:     The function used to allocate free pages to be used
-  *                    as the target of the page migration.
-  * @put_new_page:     The function used to free target pages if migration
+  * @from:             The list of folios to be migrated.
+  * @get_new_page:     The function used to allocate free folios to be used
+  *                    as the target of the folio migration.
+  * @put_new_page:     The function used to free target folios if migration
   *                    fails, or NULL if no special handling is necessary.
   * @private:          Private data to be passed on to get_new_page()
   * @mode:             The migration mode that specifies the constraints for
-  *                    page migration, if any.
-  * @reason:           The reason for page migration.
-  * @ret_succeeded:    Set to the number of normal pages migrated successfully if
+  *                    folio migration, if any.
+  * @reason:           The reason for folio migration.
+  * @ret_succeeded:    Set to the number of folios migrated successfully if
   *                    the caller passes a non-NULL pointer.
   *
-  * The function returns after 10 attempts or if no pages are movable any more
-  * because the list has become empty or no retryable pages exist any more.
-  * It is caller's responsibility to call putback_movable_pages() to return pages
+  * The function returns after 10 attempts or if no folios are movable any more
+  * because the list has become empty or no retryable folios exist any more.
+  * It is caller's responsibility to call putback_movable_pages() to return folios
   * to the LRU or free list only if ret != 0.
   *
-  * Returns the number of {normal page, THP, hugetlb} that were not migrated, or
-  * an error code. The number of THP splits will be considered as the number of
-  * non-migrated THP, no matter how many subpages of the THP are migrated successfully.
+  * Returns the number of {normal folio, large folio, hugetlb} that were not
+  * migrated, or an error code. The number of large folio splits will be
+  * considered as the number of non-migrated large folio, no matter how many
+  * split folios of the large folio are migrated successfully.
   */
  int migrate_pages(struct list_head *from, new_page_t get_new_page,
                free_page_t put_new_page, unsigned long private,
                enum migrate_mode mode, int reason, unsigned int *ret_succeeded)
  {
        int retry = 1;
+       int large_retry = 1;
        int thp_retry = 1;
        int nr_failed = 0;
        int nr_failed_pages = 0;
        int nr_retry_pages = 0;
        int nr_succeeded = 0;
        int nr_thp_succeeded = 0;
+       int nr_large_failed = 0;
        int nr_thp_failed = 0;
        int nr_thp_split = 0;
        int pass = 0;
+       bool is_large = false;
        bool is_thp = false;
-       struct page *page;
-       struct page *page2;
-       int rc, nr_subpages;
-       LIST_HEAD(ret_pages);
-       LIST_HEAD(thp_split_pages);
+       struct folio *folio, *folio2;
+       int rc, nr_pages;
+       LIST_HEAD(ret_folios);
+       LIST_HEAD(split_folios);
        bool nosplit = (reason == MR_NUMA_MISPLACED);
-       bool no_subpage_counting = false;
+       bool no_split_folio_counting = false;
  
        trace_mm_migrate_pages_start(mode, reason);
  
thp_subpage_migration:
-       for (pass = 0; pass < 10 && (retry || thp_retry); pass++) {
split_folio_migration:
+       for (pass = 0; pass < 10 && (retry || large_retry); pass++) {
                retry = 0;
+               large_retry = 0;
                thp_retry = 0;
                nr_retry_pages = 0;
  
-               list_for_each_entry_safe(page, page2, from, lru) {
+               list_for_each_entry_safe(folio, folio2, from, lru) {
                        /*
-                        * THP statistics is based on the source huge page.
-                        * Capture required information that might get lost
-                        * during migration.
+                        * Large folio statistics is based on the source large
+                        * folio. Capture required information that might get
+                        * lost during migration.
                         */
-                       is_thp = PageTransHuge(page) && !PageHuge(page);
-                       nr_subpages = compound_nr(page);
+                       is_large = folio_test_large(folio) && !folio_test_hugetlb(folio);
+                       is_thp = is_large && folio_test_pmd_mappable(folio);
+                       nr_pages = folio_nr_pages(folio);
                        cond_resched();
  
-                       if (PageHuge(page))
+                       if (folio_test_hugetlb(folio))
                                rc = unmap_and_move_huge_page(get_new_page,
-                                               put_new_page, private, page,
-                                               pass > 2, mode, reason,
-                                               &ret_pages);
+                                               put_new_page, private,
+                                               &folio->page, pass > 2, mode,
+                                               reason,
+                                               &ret_folios);
                        else
                                rc = unmap_and_move(get_new_page, put_new_page,
-                                               private, page, pass > 2, mode,
-                                               reason, &ret_pages);
+                                               private, folio, pass > 2, mode,
+                                               reason, &ret_folios);
                        /*
                         * The rules are:
-                        *      Success: non hugetlb page will be freed, hugetlb
-                        *               page will be put back
+                        *      Success: non hugetlb folio will be freed, hugetlb
+                        *               folio will be put back
                         *      -EAGAIN: stay on the from list
                         *      -ENOMEM: stay on the from list
                         *      -ENOSYS: stay on the from list
-                        *      Other errno: put on ret_pages list then splice to
+                        *      Other errno: put on ret_folios list then splice to
                         *                   from list
                         */
                        switch(rc) {
                        /*
-                        * THP migration might be unsupported or the
-                        * allocation could've failed so we should
-                        * retry on the same page with the THP split
-                        * to base pages.
+                        * Large folio migration might be unsupported or
+                        * the allocation could've failed so we should retry
+                        * on the same folio with the large folio split
+                        * to normal folios.
                         *
-                        * Sub-pages are put in thp_split_pages, and
+                        * Split folios are put in split_folios, and
                         * we will migrate them after the rest of the
                         * list is processed.
                         */
                        case -ENOSYS:
-                               /* THP migration is unsupported */
-                               if (is_thp) {
-                                       nr_thp_failed++;
-                                       if (!try_split_thp(page, &thp_split_pages)) {
-                                               nr_thp_split++;
+                               /* Large folio migration is unsupported */
+                               if (is_large) {
+                                       nr_large_failed++;
+                                       nr_thp_failed += is_thp;
+                                       if (!try_split_folio(folio, &split_folios)) {
+                                               nr_thp_split += is_thp;
                                                break;
                                        }
                                /* Hugetlb migration is unsupported */
-                               } else if (!no_subpage_counting) {
+                               } else if (!no_split_folio_counting) {
                                        nr_failed++;
                                }
  
-                               nr_failed_pages += nr_subpages;
-                               list_move_tail(&page->lru, &ret_pages);
+                               nr_failed_pages += nr_pages;
+                               list_move_tail(&folio->lru, &ret_folios);
                                break;
                        case -ENOMEM:
                                /*
                                 * When memory is low, don't bother to try to migrate
-                                * other pages, just exit.
+                                * other folios, just exit.
                                 */
-                               if (is_thp) {
-                                       nr_thp_failed++;
-                                       /* THP NUMA faulting doesn't split THP to retry. */
-                                       if (!nosplit && !try_split_thp(page, &thp_split_pages)) {
-                                               nr_thp_split++;
-                                               break;
+                               if (is_large) {
+                                       nr_large_failed++;
+                                       nr_thp_failed += is_thp;
+                                       /* Large folio NUMA faulting doesn't split to retry. */
+                                       if (!nosplit) {
+                                               int ret = try_split_folio(folio, &split_folios);
+                                               if (!ret) {
+                                                       nr_thp_split += is_thp;
+                                                       break;
+                                               } else if (reason == MR_LONGTERM_PIN &&
+                                                          ret == -EAGAIN) {
+                                                       /*
+                                                        * Try again to split large folio to
+                                                        * mitigate the failure of longterm pinning.
+                                                        */
+                                                       large_retry++;
+                                                       thp_retry += is_thp;
+                                                       nr_retry_pages += nr_pages;
+                                                       break;
+                                               }
                                        }
-                               } else if (!no_subpage_counting) {
+                               } else if (!no_split_folio_counting) {
                                        nr_failed++;
                                }
  
-                               nr_failed_pages += nr_subpages + nr_retry_pages;
+                               nr_failed_pages += nr_pages + nr_retry_pages;
                                /*
-                                * There might be some subpages of fail-to-migrate THPs
-                                * left in thp_split_pages list. Move them back to migration
+                                * There might be some split folios of fail-to-migrate large
+                                * folios left in split_folios list. Move them back to migration
                                 * list so that they could be put back to the right list by
-                                * the caller otherwise the page refcnt will be leaked.
+                                * the caller otherwise the folio refcnt will be leaked.
                                 */
-                               list_splice_init(&thp_split_pages, from);
+                               list_splice_init(&split_folios, from);
                                /* nr_failed isn't updated for not used */
+                               nr_large_failed += large_retry;
                                nr_thp_failed += thp_retry;
                                goto out;
                        case -EAGAIN:
-                               if (is_thp)
-                                       thp_retry++;
-                               else if (!no_subpage_counting)
+                               if (is_large) {
+                                       large_retry++;
+                                       thp_retry += is_thp;
+                               } else if (!no_split_folio_counting) {
                                        retry++;
-                               nr_retry_pages += nr_subpages;
+                               }
+                               nr_retry_pages += nr_pages;
                                break;
                        case MIGRATEPAGE_SUCCESS:
-                               nr_succeeded += nr_subpages;
-                               if (is_thp)
-                                       nr_thp_succeeded++;
+                               nr_succeeded += nr_pages;
+                               nr_thp_succeeded += is_thp;
                                break;
                        default:
                                /*
                                 * Permanent failure (-EBUSY, etc.):
-                                * unlike -EAGAIN case, the failed page is
-                                * removed from migration page list and not
+                                * unlike -EAGAIN case, the failed folio is
+                                * removed from migration folio list and not
                                 * retried in the next outer loop.
                                 */
-                               if (is_thp)
-                                       nr_thp_failed++;
-                               else if (!no_subpage_counting)
+                               if (is_large) {
+                                       nr_large_failed++;
+                                       nr_thp_failed += is_thp;
+                               } else if (!no_split_folio_counting) {
                                        nr_failed++;
+                               }
  
-                               nr_failed_pages += nr_subpages;
+                               nr_failed_pages += nr_pages;
                                break;
                        }
                }
        }
        nr_failed += retry;
+       nr_large_failed += large_retry;
        nr_thp_failed += thp_retry;
        nr_failed_pages += nr_retry_pages;
        /*
-        * Try to migrate subpages of fail-to-migrate THPs, no nr_failed
-        * counting in this round, since all subpages of a THP is counted
-        * as 1 failure in the first round.
+        * Try to migrate split folios of fail-to-migrate large folios, no
+        * nr_failed counting in this round, since all split folios of a
+        * large folio is counted as 1 failure in the first round.
         */
-       if (!list_empty(&thp_split_pages)) {
+       if (!list_empty(&split_folios)) {
                /*
-                * Move non-migrated pages (after 10 retries) to ret_pages
+                * Move non-migrated folios (after 10 retries) to ret_folios
                 * to avoid migrating them again.
                 */
-               list_splice_init(from, &ret_pages);
-               list_splice_init(&thp_split_pages, from);
-               no_subpage_counting = true;
+               list_splice_init(from, &ret_folios);
+               list_splice_init(&split_folios, from);
+               no_split_folio_counting = true;
                retry = 1;
-               goto thp_subpage_migration;
+               goto split_folio_migration;
        }
  
-       rc = nr_failed + nr_thp_failed;
+       rc = nr_failed + nr_large_failed;
  out:
        /*
-        * Put the permanent failure page back to migration list, they
+        * Put the permanent failure folio back to migration list, they
         * will be put back to the right list by the caller.
         */
-       list_splice(&ret_pages, from);
+       list_splice(&ret_folios, from);
  
        /*
-        * Return 0 in case all subpages of fail-to-migrate THPs are
-        * migrated successfully.
+        * Return 0 in case all split folios of fail-to-migrate large folios
+        * are migrated successfully.
         */
        if (list_empty(from))
                rc = 0;
@@@ -1630,7 -1647,7 +1657,7 @@@ struct page *alloc_migration_target(str
                nid = folio_nid(folio);
  
        if (folio_test_hugetlb(folio)) {
-               struct hstate *h = page_hstate(&folio->page);
+               struct hstate *h = folio_hstate(folio);
  
                gfp_mask = htlb_modify_alloc_mask(h, gfp_mask);
                return alloc_huge_page_nodemask(h, nid, mtc->nmask, gfp_mask);
@@@ -1896,7 -1913,6 +1923,6 @@@ static void do_pages_stat_array(struct 
  
        for (i = 0; i < nr_pages; i++) {
                unsigned long addr = (unsigned long)(*pages);
-               unsigned int foll_flags = FOLL_DUMP;
                struct vm_area_struct *vma;
                struct page *page;
                int err = -EFAULT;
                if (!vma)
                        goto set_status;
  
-               /* Not all huge page follow APIs support 'FOLL_GET' */
-               if (!is_vm_hugetlb_page(vma))
-                       foll_flags |= FOLL_GET;
                /* FOLL_DUMP to ignore special (like zero) pages */
-               page = follow_page(vma, addr, foll_flags);
+               page = follow_page(vma, addr, FOLL_GET | FOLL_DUMP);
  
                err = PTR_ERR(page);
                if (IS_ERR(page))
                if (!is_zone_device_page(page))
                        err = page_to_nid(page);
  
-               if (foll_flags & FOLL_GET)
-                       put_page(page);
+               put_page(page);
  set_status:
                *status = err;
  
diff --combined mm/mmap.c
index 54abd46e60078aa4a8b63355dbf356b533597aad,7d24fc478ffa59bc86bfe7a2a4d68c434a0c5edd..87d929316d57264197b01cc815209ca3a5d039b7
+++ b/mm/mmap.c
@@@ -1778,6 -1778,9 +1778,6 @@@ get_unmapped_area(struct file *file, un
                 */
                pgoff = 0;
                get_area = shmem_get_unmapped_area;
 -      } else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
 -              /* Ensures that larger anonymous mappings are THP aligned. */
 -              get_area = thp_get_unmapped_area;
        }
  
        addr = get_area(file, addr, len, pgoff, flags);
@@@ -2950,7 -2953,7 +2950,7 @@@ static int do_brk_flags(struct ma_stat
                                addr >> PAGE_SHIFT, NULL_VM_UFFD_CTX, NULL)) {
                mas_set_range(mas, vma->vm_start, addr + len - 1);
                if (mas_preallocate(mas, vma, GFP_KERNEL))
-                       return -ENOMEM;
+                       goto unacct_fail;
  
                vma_adjust_trans_huge(vma, vma->vm_start, addr + len, 0);
                if (vma->anon_vma) {
        /* create a vma struct for an anonymous mapping */
        vma = vm_area_alloc(mm);
        if (!vma)
-               goto vma_alloc_fail;
+               goto unacct_fail;
  
        vma_set_anonymous(vma);
        vma->vm_start = addr;
@@@ -2997,7 -3000,7 +2997,7 @@@ out
  
  mas_store_fail:
        vm_area_free(vma);
vma_alloc_fail:
unacct_fail:
        vm_unacct_memory(len >> PAGE_SHIFT);
        return -ENOMEM;
  }
@@@ -3744,13 -3747,9 +3744,9 @@@ static int reserve_mem_notifier(struct 
        return NOTIFY_OK;
  }
  
- static struct notifier_block reserve_mem_nb = {
-       .notifier_call = reserve_mem_notifier,
- };
  static int __meminit init_reserve_notifier(void)
  {
-       if (register_hotmemory_notifier(&reserve_mem_nb))
+       if (hotplug_memory_notifier(reserve_mem_notifier, DEFAULT_CALLBACK_PRI))
                pr_err("Failed registering memory add/remove notifier for admin reserve\n");
  
        return 0;
diff --combined mm/shmem.c
index 202ec3156d047af91cf012be57232c93712925d1,17adb7f6f6e43f535926af8f2df7a1078b1bb3d9..c301487be5fb405d2f2d12a9c497e951dbecdbe3
@@@ -237,11 -237,17 +237,17 @@@ static const struct inode_operations sh
  static const struct inode_operations shmem_dir_inode_operations;
  static const struct inode_operations shmem_special_inode_operations;
  static const struct vm_operations_struct shmem_vm_ops;
+ static const struct vm_operations_struct shmem_anon_vm_ops;
  static struct file_system_type shmem_fs_type;
  
+ bool vma_is_anon_shmem(struct vm_area_struct *vma)
+ {
+       return vma->vm_ops == &shmem_anon_vm_ops;
+ }
  bool vma_is_shmem(struct vm_area_struct *vma)
  {
-       return vma->vm_ops == &shmem_vm_ops;
+       return vma_is_anon_shmem(vma) || vma->vm_ops == &shmem_vm_ops;
  }
  
  static LIST_HEAD(shmem_swaplist);
@@@ -922,21 -928,18 +928,18 @@@ static void shmem_undo_range(struct ino
  
        folio_batch_init(&fbatch);
        index = start;
-       while (index < end && find_lock_entries(mapping, index, end - 1,
+       while (index < end && find_lock_entries(mapping, &index, end - 1,
                        &fbatch, indices)) {
                for (i = 0; i < folio_batch_count(&fbatch); i++) {
                        folio = fbatch.folios[i];
  
-                       index = indices[i];
                        if (xa_is_value(folio)) {
                                if (unfalloc)
                                        continue;
                                nr_swaps_freed += !shmem_free_swap(mapping,
-                                                               index, folio);
+                                                       indices[i], folio);
                                continue;
                        }
-                       index += folio_nr_pages(folio) - 1;
  
                        if (!unfalloc || !folio_test_uptodate(folio))
                                truncate_inode_folio(mapping, folio);
                folio_batch_remove_exceptionals(&fbatch);
                folio_batch_release(&fbatch);
                cond_resched();
-               index++;
        }
  
        /*
@@@ -988,7 -990,7 +990,7 @@@ whole_folios
        while (index < end) {
                cond_resched();
  
-               if (!find_get_entries(mapping, index, end - 1, &fbatch,
+               if (!find_get_entries(mapping, &index, end - 1, &fbatch,
                                indices)) {
                        /* If all gone or hole-punch or unfalloc, we're done */
                        if (index == start || end != -1)
                for (i = 0; i < folio_batch_count(&fbatch); i++) {
                        folio = fbatch.folios[i];
  
-                       index = indices[i];
                        if (xa_is_value(folio)) {
                                if (unfalloc)
                                        continue;
-                               if (shmem_free_swap(mapping, index, folio)) {
+                               if (shmem_free_swap(mapping, indices[i], folio)) {
                                        /* Swap was replaced by page: retry */
-                                       index--;
+                                       index = indices[i];
                                        break;
                                }
                                nr_swaps_freed++;
                                if (folio_mapping(folio) != mapping) {
                                        /* Page was replaced by swap: retry */
                                        folio_unlock(folio);
-                                       index--;
+                                       index = indices[i];
                                        break;
                                }
                                VM_BUG_ON_FOLIO(folio_test_writeback(folio),
                                                folio);
                                truncate_inode_folio(mapping, folio);
                        }
-                       index = folio->index + folio_nr_pages(folio) - 1;
                        folio_unlock(folio);
                }
                folio_batch_remove_exceptionals(&fbatch);
                folio_batch_release(&fbatch);
-               index++;
        }
  
        spin_lock_irq(&info->lock);
@@@ -1132,7 -1131,7 +1131,7 @@@ static int shmem_setattr(struct user_na
  
        setattr_copy(&init_user_ns, inode, attr);
        if (attr->ia_valid & ATTR_MODE)
 -              error = posix_acl_chmod(&init_user_ns, inode, inode->i_mode);
 +              error = posix_acl_chmod(&init_user_ns, dentry, inode->i_mode);
        if (!error && update_ctime) {
                inode->i_ctime = current_time(inode);
                if (update_mtime)
@@@ -1700,7 -1699,7 +1699,7 @@@ static void shmem_set_folio_swapin_erro
        swp_entry_t swapin_error;
        void *old;
  
-       swapin_error = make_swapin_error_entry(&folio->page);
+       swapin_error = make_swapin_error_entry();
        old = xa_cmpxchg_irq(&mapping->i_pages, index,
                             swp_to_radix_entry(swap),
                             swp_to_radix_entry(swapin_error), 0);
@@@ -1844,7 -1843,7 +1843,7 @@@ static int shmem_get_folio_gfp(struct i
        struct shmem_sb_info *sbinfo;
        struct mm_struct *charge_mm;
        struct folio *folio;
-       pgoff_t hindex = index;
+       pgoff_t hindex;
        gfp_t huge_gfp;
        int error;
        int once = 0;
@@@ -1882,7 -1881,6 +1881,6 @@@ repeat
        }
  
        if (folio) {
-               hindex = folio->index;
                if (sgp == SGP_WRITE)
                        folio_mark_accessed(folio);
                if (folio_test_uptodate(folio))
@@@ -2282,7 -2280,8 +2280,8 @@@ out_nomem
  
  static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
  {
-       struct shmem_inode_info *info = SHMEM_I(file_inode(file));
+       struct inode *inode = file_inode(file);
+       struct shmem_inode_info *info = SHMEM_I(inode);
        int ret;
  
        ret = seal_check_future_write(info->seals, vma);
        vma->vm_flags |= VM_MTE_ALLOWED;
  
        file_accessed(file);
-       vma->vm_ops = &shmem_vm_ops;
+       /* This is anonymous shared memory if it is unlinked at the time of mmap */
+       if (inode->i_nlink)
+               vma->vm_ops = &shmem_vm_ops;
+       else
+               vma->vm_ops = &shmem_anon_vm_ops;
        return 0;
  }
  
@@@ -3283,7 -3286,7 +3286,7 @@@ static int shmem_initxattrs(struct inod
                memcpy(new_xattr->name + XATTR_SECURITY_PREFIX_LEN,
                       xattr->name, len);
  
 -              simple_xattr_list_add(&info->xattrs, new_xattr);
 +              simple_xattr_add(&info->xattrs, new_xattr);
        }
  
        return 0;
@@@ -3921,6 -3924,7 +3924,7 @@@ EXPORT_SYMBOL(shmem_aops)
  
  static const struct file_operations shmem_file_operations = {
        .mmap           = shmem_mmap,
+       .open           = generic_file_open,
        .get_unmapped_area = shmem_get_unmapped_area,
  #ifdef CONFIG_TMPFS
        .llseek         = shmem_file_llseek,
@@@ -4006,6 -4010,15 +4010,15 @@@ static const struct vm_operations_struc
  #endif
  };
  
+ static const struct vm_operations_struct shmem_anon_vm_ops = {
+       .fault          = shmem_fault,
+       .map_pages      = filemap_map_pages,
+ #ifdef CONFIG_NUMA
+       .set_policy     = shmem_set_policy,
+       .get_policy     = shmem_get_policy,
+ #endif
+ };
  int shmem_init_fs_context(struct fs_context *fc)
  {
        struct shmem_options *ctx;
@@@ -4181,6 -4194,7 +4194,7 @@@ void shmem_truncate_range(struct inode 
  EXPORT_SYMBOL_GPL(shmem_truncate_range);
  
  #define shmem_vm_ops                          generic_file_vm_ops
+ #define shmem_anon_vm_ops                     generic_file_vm_ops
  #define shmem_file_operations                 ramfs_file_operations
  #define shmem_get_inode(sb, dir, mode, dev, flags)    ramfs_get_inode(sb, dir, mode, dev)
  #define shmem_acct_size(flags, size)          0
@@@ -4286,7 -4300,7 +4300,7 @@@ int shmem_zero_setup(struct vm_area_str
        if (vma->vm_file)
                fput(vma->vm_file);
        vma->vm_file = file;
-       vma->vm_ops = &shmem_vm_ops;
+       vma->vm_ops = &shmem_anon_vm_ops;
  
        return 0;
  }
diff --combined mm/slub.c
index 2248f85e816795541445f9d3e9195a729d2fc8fe,f37e6a51e23335858c9d22851b72bc13876760e8..13459c69095a25b6a1db8e24472654dca3b9d0d0
+++ b/mm/slub.c
@@@ -39,7 -39,6 +39,7 @@@
  #include <linux/memcontrol.h>
  #include <linux/random.h>
  #include <kunit/test.h>
 +#include <kunit/test-bug.h>
  #include <linux/sort.h>
  
  #include <linux/debugfs.h>
@@@ -188,12 -187,6 +188,12 @@@ do {                                     
  #define USE_LOCKLESS_FAST_PATH()      (false)
  #endif
  
 +#ifndef CONFIG_SLUB_TINY
 +#define __fastpath_inline __always_inline
 +#else
 +#define __fastpath_inline
 +#endif
 +
  #ifdef CONFIG_SLUB_DEBUG
  #ifdef CONFIG_SLUB_DEBUG_ON
  DEFINE_STATIC_KEY_TRUE(slub_debug_enabled);
@@@ -248,7 -241,6 +248,7 @@@ static inline bool kmem_cache_has_cpu_p
  /* Enable to log cmpxchg failures */
  #undef SLUB_DEBUG_CMPXCHG
  
 +#ifndef CONFIG_SLUB_TINY
  /*
   * Minimum number of partial slabs. These will be left on the partial
   * lists even if they are empty. kmem_cache_shrink may reclaim them.
   * sort the partial list by the number of objects in use.
   */
  #define MAX_PARTIAL 10
 +#else
 +#define MIN_PARTIAL 0
 +#define MAX_PARTIAL 0
 +#endif
  
  #define DEBUG_DEFAULT_FLAGS (SLAB_CONSISTENCY_CHECKS | SLAB_RED_ZONE | \
                                SLAB_POISON | SLAB_STORE_USER)
@@@ -310,7 -298,7 +310,7 @@@ struct track 
  
  enum track_item { TRACK_ALLOC, TRACK_FREE };
  
 -#ifdef CONFIG_SYSFS
 +#ifdef SLAB_SUPPORTS_SYSFS
  static int sysfs_slab_add(struct kmem_cache *);
  static int sysfs_slab_alias(struct kmem_cache *, const char *);
  #else
@@@ -344,12 -332,10 +344,12 @@@ static inline void stat(const struct km
   */
  static nodemask_t slab_nodes;
  
 +#ifndef CONFIG_SLUB_TINY
  /*
   * Workqueue used for flush_cpu_slab().
   */
  static struct workqueue_struct *flushwq;
 +#endif
  
  /********************************************************************
   *                    Core slab cache functions
@@@ -395,12 -381,10 +395,12 @@@ static inline void *get_freepointer(str
        return freelist_dereference(s, object + s->offset);
  }
  
 +#ifndef CONFIG_SLUB_TINY
  static void prefetch_freepointer(const struct kmem_cache *s, void *object)
  {
        prefetchw(object + s->offset);
  }
 +#endif
  
  /*
   * When running under KMSAN, get_freepointer_safe() may return an uninitialized
@@@ -619,7 -603,7 +619,7 @@@ static bool slab_add_kunit_errors(void
  {
        struct kunit_resource *resource;
  
 -      if (likely(!current->kunit_test))
 +      if (!kunit_get_current_test())
                return false;
  
        resource = kunit_find_named_resource(current->kunit_test, "slab_errors");
@@@ -845,17 -829,6 +845,17 @@@ static inline void set_orig_size(struc
        if (!slub_debug_orig_size(s))
                return;
  
 +#ifdef CONFIG_KASAN_GENERIC
 +      /*
 +       * KASAN could save its free meta data in object's data area at
 +       * offset 0, if the size is larger than 'orig_size', it will
 +       * overlap the data redzone in [orig_size+1, object_size], and
 +       * the check should be skipped.
 +       */
 +      if (kasan_metadata_size(s, true) > orig_size)
 +              orig_size = s->object_size;
 +#endif
 +
        p += get_info_end(s);
        p += sizeof(struct track) * 2;
  
@@@ -875,11 -848,6 +875,11 @@@ static inline unsigned int get_orig_siz
        return *(unsigned int *)p;
  }
  
 +void skip_orig_size_check(struct kmem_cache *s, const void *object)
 +{
 +      set_orig_size(s, (void *)object, s->object_size);
 +}
 +
  static void slab_bug(struct kmem_cache *s, char *fmt, ...)
  {
        struct va_format vaf;
@@@ -942,7 -910,7 +942,7 @@@ static void print_trailer(struct kmem_c
        if (slub_debug_orig_size(s))
                off += sizeof(unsigned int);
  
 -      off += kasan_metadata_size(s);
 +      off += kasan_metadata_size(s, false);
  
        if (off != size_from_object(s))
                /* Beginning of the filler is the free pointer */
@@@ -998,28 -966,17 +998,28 @@@ static __printf(3, 4) void slab_err(str
  static void init_object(struct kmem_cache *s, void *object, u8 val)
  {
        u8 *p = kasan_reset_tag(object);
 +      unsigned int poison_size = s->object_size;
  
 -      if (s->flags & SLAB_RED_ZONE)
 +      if (s->flags & SLAB_RED_ZONE) {
                memset(p - s->red_left_pad, val, s->red_left_pad);
  
 +              if (slub_debug_orig_size(s) && val == SLUB_RED_ACTIVE) {
 +                      /*
 +                       * Redzone the extra allocated space by kmalloc than
 +                       * requested, and the poison size will be limited to
 +                       * the original request size accordingly.
 +                       */
 +                      poison_size = get_orig_size(s, object);
 +              }
 +      }
 +
        if (s->flags & __OBJECT_POISON) {
 -              memset(p, POISON_FREE, s->object_size - 1);
 -              p[s->object_size - 1] = POISON_END;
 +              memset(p, POISON_FREE, poison_size - 1);
 +              p[poison_size - 1] = POISON_END;
        }
  
        if (s->flags & SLAB_RED_ZONE)
 -              memset(p + s->object_size, val, s->inuse - s->object_size);
 +              memset(p + poison_size, val, s->inuse - poison_size);
  }
  
  static void restore_bytes(struct kmem_cache *s, char *message, u8 data,
@@@ -1113,7 -1070,7 +1113,7 @@@ static int check_pad_bytes(struct kmem_
                        off += sizeof(unsigned int);
        }
  
 -      off += kasan_metadata_size(s);
 +      off += kasan_metadata_size(s, false);
  
        if (size_from_object(s) == off)
                return 1;
@@@ -1163,7 -1120,6 +1163,7 @@@ static int check_object(struct kmem_cac
  {
        u8 *p = object;
        u8 *endobject = object + s->object_size;
 +      unsigned int orig_size;
  
        if (s->flags & SLAB_RED_ZONE) {
                if (!check_bytes_and_report(s, slab, object, "Left Redzone",
                if (!check_bytes_and_report(s, slab, object, "Right Redzone",
                        endobject, val, s->inuse - s->object_size))
                        return 0;
 +
 +              if (slub_debug_orig_size(s) && val == SLUB_RED_ACTIVE) {
 +                      orig_size = get_orig_size(s, object);
 +
 +                      if (s->object_size > orig_size  &&
 +                              !check_bytes_and_report(s, slab, object,
 +                                      "kmalloc Redzone", p + orig_size,
 +                                      val, s->object_size - orig_size)) {
 +                              return 0;
 +                      }
 +              }
        } else {
                if ((s->flags & SLAB_POISON) && s->object_size < s->inuse) {
                        check_bytes_and_report(s, slab, p, "Alignment padding",
@@@ -1418,7 -1363,7 +1418,7 @@@ static inline int alloc_consistency_che
        return 1;
  }
  
 -static noinline int alloc_debug_processing(struct kmem_cache *s,
 +static noinline bool alloc_debug_processing(struct kmem_cache *s,
                        struct slab *slab, void *object, int orig_size)
  {
        if (s->flags & SLAB_CONSISTENCY_CHECKS) {
        trace(s, slab, object, 1);
        set_orig_size(s, object, orig_size);
        init_object(s, object, SLUB_RED_ACTIVE);
 -      return 1;
 +      return true;
  
  bad:
        if (folio_test_slab(slab_folio(slab))) {
                slab->inuse = slab->objects;
                slab->freelist = NULL;
        }
 -      return 0;
 +      return false;
  }
  
  static inline int free_consistency_checks(struct kmem_cache *s,
@@@ -1696,17 -1641,17 +1696,17 @@@ static inline void setup_object_debug(s
  static inline
  void setup_slab_debug(struct kmem_cache *s, struct slab *slab, void *addr) {}
  
 -static inline int alloc_debug_processing(struct kmem_cache *s,
 -      struct slab *slab, void *object, int orig_size) { return 0; }
 +static inline bool alloc_debug_processing(struct kmem_cache *s,
 +      struct slab *slab, void *object, int orig_size) { return true; }
  
 -static inline void free_debug_processing(
 -      struct kmem_cache *s, struct slab *slab,
 -      void *head, void *tail, int bulk_cnt,
 -      unsigned long addr) {}
 +static inline bool free_debug_processing(struct kmem_cache *s,
 +      struct slab *slab, void *head, void *tail, int *bulk_cnt,
 +      unsigned long addr, depot_stack_handle_t handle) { return true; }
  
  static inline void slab_pad_check(struct kmem_cache *s, struct slab *slab) {}
  static inline int check_object(struct kmem_cache *s, struct slab *slab,
                        void *object, u8 val) { return 1; }
 +static inline depot_stack_handle_t set_track_prepare(void) { return 0; }
  static inline void set_track(struct kmem_cache *s, void *object,
                             enum track_item alloc, unsigned long addr) {}
  static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n,
@@@ -1731,13 -1676,11 +1731,13 @@@ static inline void inc_slabs_node(struc
  static inline void dec_slabs_node(struct kmem_cache *s, int node,
                                                        int objects) {}
  
 +#ifndef CONFIG_SLUB_TINY
  static bool freelist_corrupted(struct kmem_cache *s, struct slab *slab,
                               void **freelist, void *nextfree)
  {
        return false;
  }
 +#endif
  #endif /* CONFIG_SLUB_DEBUG */
  
  /*
@@@ -1857,8 -1800,6 +1857,8 @@@ static inline struct slab *alloc_slab_p
  
        slab = folio_slab(folio);
        __folio_set_slab(folio);
 +      /* Make the flag visible before any changes to folio->mapping */
 +      smp_wmb();
        if (page_is_pfmemalloc(folio_page(folio, 0)))
                slab_set_pfmemalloc(slab);
  
@@@ -1940,7 -1881,7 +1940,7 @@@ static bool shuffle_freelist(struct kme
                return false;
  
        freelist_count = oo_objects(s->oo);
 -      pos = prandom_u32_max(freelist_count);
 +      pos = get_random_u32_below(freelist_count);
  
        page_limit = slab->objects * s->size;
        start = fixup_red_left(s, slab_address(slab));
@@@ -2058,11 -1999,17 +2058,11 @@@ static void __free_slab(struct kmem_cac
        int order = folio_order(folio);
        int pages = 1 << order;
  
 -      if (kmem_cache_debug_flags(s, SLAB_CONSISTENCY_CHECKS)) {
 -              void *p;
 -
 -              slab_pad_check(s, slab);
 -              for_each_object(p, s, slab_address(slab), slab->objects)
 -                      check_object(s, slab, p, SLUB_RED_INACTIVE);
 -      }
 -
        __slab_clear_pfmemalloc(slab);
 -      __folio_clear_slab(folio);
        folio->mapping = NULL;
 +      /* Make the mapping reset visible before clearing the flag */
 +      smp_wmb();
 +      __folio_clear_slab(folio);
        if (current->reclaim_state)
                current->reclaim_state->reclaimed_slab += pages;
        unaccount_slab(slab, order, s);
@@@ -2078,17 -2025,9 +2078,17 @@@ static void rcu_free_slab(struct rcu_he
  
  static void free_slab(struct kmem_cache *s, struct slab *slab)
  {
 -      if (unlikely(s->flags & SLAB_TYPESAFE_BY_RCU)) {
 +      if (kmem_cache_debug_flags(s, SLAB_CONSISTENCY_CHECKS)) {
 +              void *p;
 +
 +              slab_pad_check(s, slab);
 +              for_each_object(p, s, slab_address(slab), slab->objects)
 +                      check_object(s, slab, p, SLUB_RED_INACTIVE);
 +      }
 +
 +      if (unlikely(s->flags & SLAB_TYPESAFE_BY_RCU))
                call_rcu(&slab->rcu_head, rcu_free_slab);
 -      else
 +      else
                __free_slab(s, slab);
  }
  
@@@ -2275,7 -2214,7 +2275,7 @@@ static void *get_partial_node(struct km
                if (!pfmemalloc_match(slab, pc->flags))
                        continue;
  
 -              if (kmem_cache_debug(s)) {
 +              if (IS_ENABLED(CONFIG_SLUB_TINY) || kmem_cache_debug(s)) {
                        object = alloc_single_from_partial(s, n, slab,
                                                        pc->orig_size);
                        if (object)
@@@ -2390,8 -2329,6 +2390,8 @@@ static void *get_partial(struct kmem_ca
        return get_any_partial(s, pc);
  }
  
 +#ifndef CONFIG_SLUB_TINY
 +
  #ifdef CONFIG_PREEMPTION
  /*
   * Calculate the next globally unique transaction for disambiguation
   * different cpus.
   */
  #define TID_STEP 1
 -#endif
 +#endif /* CONFIG_PREEMPTION */
  
  static inline unsigned long next_tid(unsigned long tid)
  {
@@@ -2474,7 -2411,7 +2474,7 @@@ static void init_kmem_cache_cpus(struc
  static void deactivate_slab(struct kmem_cache *s, struct slab *slab,
                            void *freelist)
  {
 -      enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE, M_FULL_NOLIST };
 +      enum slab_modes { M_NONE, M_PARTIAL, M_FREE, M_FULL_NOLIST };
        struct kmem_cache_node *n = get_node(s, slab_nid(slab));
        int free_delta = 0;
        enum slab_modes mode = M_NONE;
@@@ -2550,6 -2487,14 +2550,6 @@@ redo
                 * acquire_slab() will see a slab that is frozen
                 */
                spin_lock_irqsave(&n->list_lock, flags);
 -      } else if (kmem_cache_debug_flags(s, SLAB_STORE_USER)) {
 -              mode = M_FULL;
 -              /*
 -               * This also ensures that the scanning of full
 -               * slabs from diagnostic functions will not see
 -               * any frozen slabs.
 -               */
 -              spin_lock_irqsave(&n->list_lock, flags);
        } else {
                mode = M_FULL_NOLIST;
        }
                                old.freelist, old.counters,
                                new.freelist, new.counters,
                                "unfreezing slab")) {
 -              if (mode == M_PARTIAL || mode == M_FULL)
 +              if (mode == M_PARTIAL)
                        spin_unlock_irqrestore(&n->list_lock, flags);
                goto redo;
        }
                stat(s, DEACTIVATE_EMPTY);
                discard_slab(s, slab);
                stat(s, FREE_SLAB);
 -      } else if (mode == M_FULL) {
 -              add_full(s, n, slab);
 -              spin_unlock_irqrestore(&n->list_lock, flags);
 -              stat(s, DEACTIVATE_FULL);
        } else if (mode == M_FULL_NOLIST) {
                stat(s, DEACTIVATE_FULL);
        }
@@@ -2854,13 -2803,6 +2854,13 @@@ static int slub_cpu_dead(unsigned int c
        return 0;
  }
  
 +#else /* CONFIG_SLUB_TINY */
 +static inline void flush_all_cpus_locked(struct kmem_cache *s) { }
 +static inline void flush_all(struct kmem_cache *s) { }
 +static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu) { }
 +static inline int slub_cpu_dead(unsigned int cpu) { return 0; }
 +#endif /* CONFIG_SLUB_TINY */
 +
  /*
   * Check if the objects in a per cpu structure fit numa
   * locality expectations.
@@@ -2886,28 -2828,38 +2886,28 @@@ static inline unsigned long node_nr_obj
  }
  
  /* Supports checking bulk free of a constructed freelist */
 -static noinline void free_debug_processing(
 -      struct kmem_cache *s, struct slab *slab,
 -      void *head, void *tail, int bulk_cnt,
 -      unsigned long addr)
 +static inline bool free_debug_processing(struct kmem_cache *s,
 +      struct slab *slab, void *head, void *tail, int *bulk_cnt,
 +      unsigned long addr, depot_stack_handle_t handle)
  {
 -      struct kmem_cache_node *n = get_node(s, slab_nid(slab));
 -      struct slab *slab_free = NULL;
 +      bool checks_ok = false;
        void *object = head;
        int cnt = 0;
 -      unsigned long flags;
 -      bool checks_ok = false;
 -      depot_stack_handle_t handle = 0;
 -
 -      if (s->flags & SLAB_STORE_USER)
 -              handle = set_track_prepare();
 -
 -      spin_lock_irqsave(&n->list_lock, flags);
  
        if (s->flags & SLAB_CONSISTENCY_CHECKS) {
                if (!check_slab(s, slab))
                        goto out;
        }
  
 -      if (slab->inuse < bulk_cnt) {
 +      if (slab->inuse < *bulk_cnt) {
                slab_err(s, slab, "Slab has %d allocated objects but %d are to be freed\n",
 -                       slab->inuse, bulk_cnt);
 +                       slab->inuse, *bulk_cnt);
                goto out;
        }
  
  next_object:
  
 -      if (++cnt > bulk_cnt)
 +      if (++cnt > *bulk_cnt)
                goto out_cnt;
  
        if (s->flags & SLAB_CONSISTENCY_CHECKS) {
        checks_ok = true;
  
  out_cnt:
 -      if (cnt != bulk_cnt)
 +      if (cnt != *bulk_cnt) {
                slab_err(s, slab, "Bulk free expected %d objects but found %d\n",
 -                       bulk_cnt, cnt);
 -
 -out:
 -      if (checks_ok) {
 -              void *prior = slab->freelist;
 -
 -              /* Perform the actual freeing while we still hold the locks */
 -              slab->inuse -= cnt;
 -              set_freepointer(s, tail, prior);
 -              slab->freelist = head;
 -
 -              /*
 -               * If the slab is empty, and node's partial list is full,
 -               * it should be discarded anyway no matter it's on full or
 -               * partial list.
 -               */
 -              if (slab->inuse == 0 && n->nr_partial >= s->min_partial)
 -                      slab_free = slab;
 -
 -              if (!prior) {
 -                      /* was on full list */
 -                      remove_full(s, n, slab);
 -                      if (!slab_free) {
 -                              add_partial(n, slab, DEACTIVATE_TO_TAIL);
 -                              stat(s, FREE_ADD_PARTIAL);
 -                      }
 -              } else if (slab_free) {
 -                      remove_partial(n, slab);
 -                      stat(s, FREE_REMOVE_PARTIAL);
 -              }
 +                       *bulk_cnt, cnt);
 +              *bulk_cnt = cnt;
        }
  
 -      if (slab_free) {
 -              /*
 -               * Update the counters while still holding n->list_lock to
 -               * prevent spurious validation warnings
 -               */
 -              dec_slabs_node(s, slab_nid(slab_free), slab_free->objects);
 -      }
 -
 -      spin_unlock_irqrestore(&n->list_lock, flags);
 +out:
  
        if (!checks_ok)
                slab_fix(s, "Object at 0x%p not freed", object);
  
 -      if (slab_free) {
 -              stat(s, FREE_SLAB);
 -              free_slab(s, slab_free);
 -      }
 +      return checks_ok;
  }
  #endif /* CONFIG_SLUB_DEBUG */
  
 -#if defined(CONFIG_SLUB_DEBUG) || defined(CONFIG_SYSFS)
 +#if defined(CONFIG_SLUB_DEBUG) || defined(SLAB_SUPPORTS_SYSFS)
  static unsigned long count_partial(struct kmem_cache_node *n,
                                        int (*get_count)(struct slab *))
  {
        spin_unlock_irqrestore(&n->list_lock, flags);
        return x;
  }
 -#endif /* CONFIG_SLUB_DEBUG || CONFIG_SYSFS */
 +#endif /* CONFIG_SLUB_DEBUG || SLAB_SUPPORTS_SYSFS */
  
 +#ifdef CONFIG_SLUB_DEBUG
  static noinline void
  slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
  {
 -#ifdef CONFIG_SLUB_DEBUG
        static DEFINE_RATELIMIT_STATE(slub_oom_rs, DEFAULT_RATELIMIT_INTERVAL,
                                      DEFAULT_RATELIMIT_BURST);
        int node;
                pr_warn("  node %d: slabs: %ld, objs: %ld, free: %ld\n",
                        node, nr_slabs, nr_objs, nr_free);
        }
 -#endif
  }
 +#else /* CONFIG_SLUB_DEBUG */
 +static inline void
 +slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid) { }
 +#endif
  
  static inline bool pfmemalloc_match(struct slab *slab, gfp_t gfpflags)
  {
        return true;
  }
  
 +#ifndef CONFIG_SLUB_TINY
  /*
   * Check the slab->freelist and either transfer the freelist to the
   * per cpu freelist or deactivate the slab.
@@@ -3296,13 -3283,45 +3296,13 @@@ static void *__slab_alloc(struct kmem_c
        return p;
  }
  
 -/*
 - * If the object has been wiped upon free, make sure it's fully initialized by
 - * zeroing out freelist pointer.
 - */
 -static __always_inline void maybe_wipe_obj_freeptr(struct kmem_cache *s,
 -                                                 void *obj)
 -{
 -      if (unlikely(slab_want_init_on_free(s)) && obj)
 -              memset((void *)((char *)kasan_reset_tag(obj) + s->offset),
 -                      0, sizeof(void *));
 -}
 -
 -/*
 - * Inlined fastpath so that allocation functions (kmalloc, kmem_cache_alloc)
 - * have the fastpath folded into their functions. So no function call
 - * overhead for requests that can be satisfied on the fastpath.
 - *
 - * The fastpath works by first checking if the lockless freelist can be used.
 - * If not then __slab_alloc is called for slow processing.
 - *
 - * Otherwise we can simply pick the next object from the lockless free list.
 - */
 -static __always_inline void *slab_alloc_node(struct kmem_cache *s, struct list_lru *lru,
 +static __always_inline void *__slab_alloc_node(struct kmem_cache *s,
                gfp_t gfpflags, int node, unsigned long addr, size_t orig_size)
  {
 -      void *object;
        struct kmem_cache_cpu *c;
        struct slab *slab;
        unsigned long tid;
 -      struct obj_cgroup *objcg = NULL;
 -      bool init = false;
 -
 -      s = slab_pre_alloc_hook(s, lru, &objcg, 1, gfpflags);
 -      if (!s)
 -              return NULL;
 -
 -      object = kfence_alloc(s, orig_size, gfpflags);
 -      if (unlikely(object))
 -              goto out;
 +      void *object;
  
  redo:
        /*
                stat(s, ALLOC_FASTPATH);
        }
  
 +      return object;
 +}
 +#else /* CONFIG_SLUB_TINY */
 +static void *__slab_alloc_node(struct kmem_cache *s,
 +              gfp_t gfpflags, int node, unsigned long addr, size_t orig_size)
 +{
 +      struct partial_context pc;
 +      struct slab *slab;
 +      void *object;
 +
 +      pc.flags = gfpflags;
 +      pc.slab = &slab;
 +      pc.orig_size = orig_size;
 +      object = get_partial(s, node, &pc);
 +
 +      if (object)
 +              return object;
 +
 +      slab = new_slab(s, gfpflags, node);
 +      if (unlikely(!slab)) {
 +              slab_out_of_memory(s, gfpflags, node);
 +              return NULL;
 +      }
 +
 +      object = alloc_single_from_new_slab(s, slab, orig_size);
 +
 +      return object;
 +}
 +#endif /* CONFIG_SLUB_TINY */
 +
 +/*
 + * If the object has been wiped upon free, make sure it's fully initialized by
 + * zeroing out freelist pointer.
 + */
 +static __always_inline void maybe_wipe_obj_freeptr(struct kmem_cache *s,
 +                                                 void *obj)
 +{
 +      if (unlikely(slab_want_init_on_free(s)) && obj)
 +              memset((void *)((char *)kasan_reset_tag(obj) + s->offset),
 +                      0, sizeof(void *));
 +}
 +
 +/*
 + * Inlined fastpath so that allocation functions (kmalloc, kmem_cache_alloc)
 + * have the fastpath folded into their functions. So no function call
 + * overhead for requests that can be satisfied on the fastpath.
 + *
 + * The fastpath works by first checking if the lockless freelist can be used.
 + * If not then __slab_alloc is called for slow processing.
 + *
 + * Otherwise we can simply pick the next object from the lockless free list.
 + */
 +static __fastpath_inline void *slab_alloc_node(struct kmem_cache *s, struct list_lru *lru,
 +              gfp_t gfpflags, int node, unsigned long addr, size_t orig_size)
 +{
 +      void *object;
 +      struct obj_cgroup *objcg = NULL;
 +      bool init = false;
 +
 +      s = slab_pre_alloc_hook(s, lru, &objcg, 1, gfpflags);
 +      if (!s)
 +              return NULL;
 +
 +      object = kfence_alloc(s, orig_size, gfpflags);
 +      if (unlikely(object))
 +              goto out;
 +
 +      object = __slab_alloc_node(s, gfpflags, node, addr, orig_size);
 +
        maybe_wipe_obj_freeptr(s, object);
        init = slab_want_init_on_alloc(gfpflags, s);
  
  out:
 -      slab_post_alloc_hook(s, objcg, gfpflags, 1, &object, init);
 +      /*
 +       * When init equals 'true', like for kzalloc() family, only
 +       * @orig_size bytes might be zeroed instead of s->object_size
 +       */
 +      slab_post_alloc_hook(s, objcg, gfpflags, 1, &object, init, orig_size);
  
        return object;
  }
  
 -static __always_inline void *slab_alloc(struct kmem_cache *s, struct list_lru *lru,
 +static __fastpath_inline void *slab_alloc(struct kmem_cache *s, struct list_lru *lru,
                gfp_t gfpflags, unsigned long addr, size_t orig_size)
  {
        return slab_alloc_node(s, lru, gfpflags, NUMA_NO_NODE, addr, orig_size);
  }
  
 -static __always_inline
 +static __fastpath_inline
  void *__kmem_cache_alloc_lru(struct kmem_cache *s, struct list_lru *lru,
                             gfp_t gfpflags)
  {
@@@ -3502,67 -3448,6 +3502,67 @@@ void *kmem_cache_alloc_node(struct kmem
  }
  EXPORT_SYMBOL(kmem_cache_alloc_node);
  
 +static noinline void free_to_partial_list(
 +      struct kmem_cache *s, struct slab *slab,
 +      void *head, void *tail, int bulk_cnt,
 +      unsigned long addr)
 +{
 +      struct kmem_cache_node *n = get_node(s, slab_nid(slab));
 +      struct slab *slab_free = NULL;
 +      int cnt = bulk_cnt;
 +      unsigned long flags;
 +      depot_stack_handle_t handle = 0;
 +
 +      if (s->flags & SLAB_STORE_USER)
 +              handle = set_track_prepare();
 +
 +      spin_lock_irqsave(&n->list_lock, flags);
 +
 +      if (free_debug_processing(s, slab, head, tail, &cnt, addr, handle)) {
 +              void *prior = slab->freelist;
 +
 +              /* Perform the actual freeing while we still hold the locks */
 +              slab->inuse -= cnt;
 +              set_freepointer(s, tail, prior);
 +              slab->freelist = head;
 +
 +              /*
 +               * If the slab is empty, and node's partial list is full,
 +               * it should be discarded anyway no matter it's on full or
 +               * partial list.
 +               */
 +              if (slab->inuse == 0 && n->nr_partial >= s->min_partial)
 +                      slab_free = slab;
 +
 +              if (!prior) {
 +                      /* was on full list */
 +                      remove_full(s, n, slab);
 +                      if (!slab_free) {
 +                              add_partial(n, slab, DEACTIVATE_TO_TAIL);
 +                              stat(s, FREE_ADD_PARTIAL);
 +                      }
 +              } else if (slab_free) {
 +                      remove_partial(n, slab);
 +                      stat(s, FREE_REMOVE_PARTIAL);
 +              }
 +      }
 +
 +      if (slab_free) {
 +              /*
 +               * Update the counters while still holding n->list_lock to
 +               * prevent spurious validation warnings
 +               */
 +              dec_slabs_node(s, slab_nid(slab_free), slab_free->objects);
 +      }
 +
 +      spin_unlock_irqrestore(&n->list_lock, flags);
 +
 +      if (slab_free) {
 +              stat(s, FREE_SLAB);
 +              free_slab(s, slab_free);
 +      }
 +}
 +
  /*
   * Slow path handling. This may still be called frequently since objects
   * have a longer lifetime than the cpu slabs in most processing loads.
@@@ -3588,8 -3473,8 +3588,8 @@@ static void __slab_free(struct kmem_cac
        if (kfence_free(head))
                return;
  
 -      if (kmem_cache_debug(s)) {
 -              free_debug_processing(s, slab, head, tail, cnt, addr);
 +      if (IS_ENABLED(CONFIG_SLUB_TINY) || kmem_cache_debug(s)) {
 +              free_to_partial_list(s, slab, head, tail, cnt, addr);
                return;
        }
  
@@@ -3689,7 -3574,6 +3689,7 @@@ slab_empty
        discard_slab(s, slab);
  }
  
 +#ifndef CONFIG_SLUB_TINY
  /*
   * Fastpath with forced inlining to produce a kfree and kmem_cache_free that
   * can perform fastpath freeing without additional function calls.
@@@ -3764,18 -3648,8 +3764,18 @@@ redo
        }
        stat(s, FREE_FASTPATH);
  }
 +#else /* CONFIG_SLUB_TINY */
 +static void do_slab_free(struct kmem_cache *s,
 +                              struct slab *slab, void *head, void *tail,
 +                              int cnt, unsigned long addr)
 +{
 +      void *tail_obj = tail ? : head;
 +
 +      __slab_free(s, slab, head, tail_obj, cnt, addr);
 +}
 +#endif /* CONFIG_SLUB_TINY */
  
 -static __always_inline void slab_free(struct kmem_cache *s, struct slab *slab,
 +static __fastpath_inline void slab_free(struct kmem_cache *s, struct slab *slab,
                                      void *head, void *tail, void **p, int cnt,
                                      unsigned long addr)
  {
@@@ -3908,13 -3782,18 +3908,13 @@@ void kmem_cache_free_bulk(struct kmem_c
  }
  EXPORT_SYMBOL(kmem_cache_free_bulk);
  
 -/* Note that interrupts must be enabled when calling this function. */
 -int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
 -                        void **p)
 +#ifndef CONFIG_SLUB_TINY
 +static inline int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags,
 +                      size_t size, void **p, struct obj_cgroup *objcg)
  {
        struct kmem_cache_cpu *c;
        int i;
 -      struct obj_cgroup *objcg = NULL;
  
 -      /* memcg and kmem_cache debug support */
 -      s = slab_pre_alloc_hook(s, NULL, &objcg, size, flags);
 -      if (unlikely(!s))
 -              return false;
        /*
         * Drain objects in the per cpu slab, while disabling local
         * IRQs, which protects against PREEMPT and interrupts
        local_unlock_irq(&s->cpu_slab->lock);
        slub_put_cpu_ptr(s->cpu_slab);
  
 -      /*
 -       * memcg and kmem_cache debug support and memory initialization.
 -       * Done outside of the IRQ disabled fastpath loop.
 -       */
 -      slab_post_alloc_hook(s, objcg, flags, size, p,
 -                              slab_want_init_on_alloc(flags, s));
        return i;
 +
  error:
        slub_put_cpu_ptr(s->cpu_slab);
 -      slab_post_alloc_hook(s, objcg, flags, i, p, false);
 +      slab_post_alloc_hook(s, objcg, flags, i, p, false, s->object_size);
 +      kmem_cache_free_bulk(s, i, p);
 +      return 0;
 +
 +}
 +#else /* CONFIG_SLUB_TINY */
 +static int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags,
 +                      size_t size, void **p, struct obj_cgroup *objcg)
 +{
 +      int i;
 +
 +      for (i = 0; i < size; i++) {
 +              void *object = kfence_alloc(s, s->object_size, flags);
 +
 +              if (unlikely(object)) {
 +                      p[i] = object;
 +                      continue;
 +              }
 +
 +              p[i] = __slab_alloc_node(s, flags, NUMA_NO_NODE,
 +                                       _RET_IP_, s->object_size);
 +              if (unlikely(!p[i]))
 +                      goto error;
 +
 +              maybe_wipe_obj_freeptr(s, p[i]);
 +      }
 +
 +      return i;
 +
 +error:
 +      slab_post_alloc_hook(s, objcg, flags, i, p, false, s->object_size);
        kmem_cache_free_bulk(s, i, p);
        return 0;
  }
 +#endif /* CONFIG_SLUB_TINY */
 +
 +/* Note that interrupts must be enabled when calling this function. */
 +int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
 +                        void **p)
 +{
 +      int i;
 +      struct obj_cgroup *objcg = NULL;
 +
 +      if (!size)
 +              return 0;
 +
 +      /* memcg and kmem_cache debug support */
 +      s = slab_pre_alloc_hook(s, NULL, &objcg, size, flags);
 +      if (unlikely(!s))
 +              return 0;
 +
 +      i = __kmem_cache_alloc_bulk(s, flags, size, p, objcg);
 +
 +      /*
 +       * memcg and kmem_cache debug support and memory initialization.
 +       * Done outside of the IRQ disabled fastpath loop.
 +       */
 +      if (i != 0)
 +              slab_post_alloc_hook(s, objcg, flags, size, p,
 +                      slab_want_init_on_alloc(flags, s), s->object_size);
 +      return i;
 +}
  EXPORT_SYMBOL(kmem_cache_alloc_bulk);
  
  
   * take the list_lock.
   */
  static unsigned int slub_min_order;
 -static unsigned int slub_max_order = PAGE_ALLOC_COSTLY_ORDER;
 +static unsigned int slub_max_order =
 +      IS_ENABLED(CONFIG_SLUB_TINY) ? 1 : PAGE_ALLOC_COSTLY_ORDER;
  static unsigned int slub_min_objects;
  
  /*
@@@ -4189,12 -4014,10 +4189,12 @@@ init_kmem_cache_node(struct kmem_cache_
  #endif
  }
  
 +#ifndef CONFIG_SLUB_TINY
  static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
  {
        BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE <
 -                      KMALLOC_SHIFT_HIGH * sizeof(struct kmem_cache_cpu));
 +                      NR_KMALLOC_TYPES * KMALLOC_SHIFT_HIGH *
 +                      sizeof(struct kmem_cache_cpu));
  
        /*
         * Must align to double word boundary for the double cmpxchg
  
        return 1;
  }
 +#else
 +static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
 +{
 +      return 1;
 +}
 +#endif /* CONFIG_SLUB_TINY */
  
  static struct kmem_cache *kmem_cache_node;
  
@@@ -4278,9 -4095,7 +4278,9 @@@ static void free_kmem_cache_nodes(struc
  void __kmem_cache_release(struct kmem_cache *s)
  {
        cache_random_seq_destroy(s);
 +#ifndef CONFIG_SLUB_TINY
        free_percpu(s->cpu_slab);
 +#endif
        free_kmem_cache_nodes(s);
  }
  
@@@ -4387,8 -4202,7 +4387,8 @@@ static int calculate_sizes(struct kmem_
         */
        s->inuse = size;
  
 -      if ((flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) ||
 +      if (slub_debug_orig_size(s) ||
 +          (flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) ||
            ((flags & SLAB_RED_ZONE) && s->object_size < sizeof(void *)) ||
            s->ctor) {
                /*
@@@ -4957,11 -4771,6 +4957,6 @@@ static int slab_memory_callback(struct 
        return ret;
  }
  
- static struct notifier_block slab_memory_callback_nb = {
-       .notifier_call = slab_memory_callback,
-       .priority = SLAB_CALLBACK_PRI,
- };
  /********************************************************************
   *                    Basic setup of slabs
   *******************************************************************/
@@@ -5027,7 -4836,7 +5022,7 @@@ void __init kmem_cache_init(void
        create_boot_cache(kmem_cache_node, "kmem_cache_node",
                sizeof(struct kmem_cache_node), SLAB_HWCACHE_ALIGN, 0, 0);
  
-       register_hotmemory_notifier(&slab_memory_callback_nb);
+       hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI);
  
        /* Able to allocate the per node structures */
        slab_state = PARTIAL;
  
  void __init kmem_cache_init_late(void)
  {
 +#ifndef CONFIG_SLUB_TINY
        flushwq = alloc_workqueue("slub_flushwq", WQ_MEM_RECLAIM, 0);
        WARN_ON(!flushwq);
 +#endif
  }
  
  struct kmem_cache *
@@@ -5112,7 -4919,7 +5107,7 @@@ int __kmem_cache_create(struct kmem_cac
        return 0;
  }
  
 -#ifdef CONFIG_SYSFS
 +#ifdef SLAB_SUPPORTS_SYSFS
  static int count_inuse(struct slab *slab)
  {
        return slab->inuse;
@@@ -5370,7 -5177,7 +5365,7 @@@ static void process_slab(struct loc_tra
  #endif  /* CONFIG_DEBUG_FS   */
  #endif        /* CONFIG_SLUB_DEBUG */
  
 -#ifdef CONFIG_SYSFS
 +#ifdef SLAB_SUPPORTS_SYSFS
  enum slab_stat_type {
        SL_ALL,                 /* All slabs */
        SL_PARTIAL,             /* Only partially allocated slabs */
@@@ -5690,13 -5497,11 +5685,13 @@@ static ssize_t cache_dma_show(struct km
  SLAB_ATTR_RO(cache_dma);
  #endif
  
 +#ifdef CONFIG_HARDENED_USERCOPY
  static ssize_t usersize_show(struct kmem_cache *s, char *buf)
  {
        return sysfs_emit(buf, "%u\n", s->usersize);
  }
  SLAB_ATTR_RO(usersize);
 +#endif
  
  static ssize_t destroy_by_rcu_show(struct kmem_cache *s, char *buf)
  {
@@@ -5776,21 -5581,7 +5771,21 @@@ static ssize_t failslab_show(struct kme
  {
        return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_FAILSLAB));
  }
 -SLAB_ATTR_RO(failslab);
 +
 +static ssize_t failslab_store(struct kmem_cache *s, const char *buf,
 +                              size_t length)
 +{
 +      if (s->refcount > 1)
 +              return -EINVAL;
 +
 +      if (buf[0] == '1')
 +              WRITE_ONCE(s->flags, s->flags | SLAB_FAILSLAB);
 +      else
 +              WRITE_ONCE(s->flags, s->flags & ~SLAB_FAILSLAB);
 +
 +      return length;
 +}
 +SLAB_ATTR(failslab);
  #endif
  
  static ssize_t shrink_show(struct kmem_cache *s, char *buf)
@@@ -6007,9 -5798,7 +6002,9 @@@ static struct attribute *slab_attrs[] 
  #ifdef CONFIG_FAILSLAB
        &failslab_attr.attr,
  #endif
 +#ifdef CONFIG_HARDENED_USERCOPY
        &usersize_attr.attr,
 +#endif
  #ifdef CONFIG_KFENCE
        &skip_kfence_attr.attr,
  #endif
@@@ -6126,6 -5915,11 +6121,6 @@@ static int sysfs_slab_add(struct kmem_c
        struct kset *kset = cache_kset(s);
        int unmergeable = slab_unmergeable(s);
  
 -      if (!kset) {
 -              kobject_init(&s->kobj, &slab_ktype);
 -              return 0;
 -      }
 -
        if (!unmergeable && disable_higher_order_debug &&
                        (slub_debug & DEBUG_METADATA_FLAGS))
                unmergeable = 1;
@@@ -6255,8 -6049,9 +6250,8 @@@ static int __init slab_sysfs_init(void
        mutex_unlock(&slab_mutex);
        return 0;
  }
 -
 -__initcall(slab_sysfs_init);
 -#endif /* CONFIG_SYSFS */
 +late_initcall(slab_sysfs_init);
 +#endif /* SLAB_SUPPORTS_SYSFS */
  
  #if defined(CONFIG_SLUB_DEBUG) && defined(CONFIG_DEBUG_FS)
  static int slab_debugfs_show(struct seq_file *seq, void *v)
diff --combined mm/swapfile.c
index 3eedf7ae957f6e86674e86701ac2ee18edc051a3,03fe0949f6b2f2cba619b1c76543ce9559c938e6..908a529bca12c9477f460774cae73eba5a31fe02
@@@ -772,7 -772,8 +772,7 @@@ static void set_cluster_next(struct swa
                /* No free swap slots available */
                if (si->highest_bit <= si->lowest_bit)
                        return;
 -              next = si->lowest_bit +
 -                      prandom_u32_max(si->highest_bit - si->lowest_bit + 1);
 +              next = get_random_u32_inclusive(si->lowest_bit, si->highest_bit);
                next = ALIGN_DOWN(next, SWAP_ADDRESS_SPACE_PAGES);
                next = max_t(unsigned int, next, si->lowest_bit);
        }
@@@ -1780,7 -1781,7 +1780,7 @@@ static int unuse_pte(struct vm_area_str
                pte_t pteval;
  
                dec_mm_counter(vma->vm_mm, MM_SWAPENTS);
-               pteval = swp_entry_to_pte(make_swapin_error_entry(page));
+               pteval = swp_entry_to_pte(make_swapin_error_entry());
                set_pte_at(vma->vm_mm, addr, pte, pteval);
                swap_free(entry);
                ret = 0;
@@@ -3088,7 -3089,7 +3088,7 @@@ SYSCALL_DEFINE2(swapon, const char __us
                 */
                for_each_possible_cpu(cpu) {
                        per_cpu(*p->cluster_next_cpu, cpu) =
 -                              1 + prandom_u32_max(p->highest_bit);
 +                              get_random_u32_inclusive(1, p->highest_bit);
                }
                nr_cluster = DIV_ROUND_UP(maxpages, SWAPFILE_CLUSTER);
  
index a8fbf8548bc08bce673df7872045c76854eb4138,ee8c41c998e6d454280eb83dda61a563b52a39a0..1f8c36a9fa1083743e1210408a79a5b95597db5b
@@@ -1,4 -1,5 +1,5 @@@
  # SPDX-License-Identifier: GPL-2.0-only
+ cow
  hugepage-mmap
  hugepage-mremap
  hugepage-shm
@@@ -33,5 -34,3 +34,5 @@@ memfd_secre
  soft-dirty
  split_huge_page_test
  ksm_tests
 +local_config.h
 +local_config.mk
This page took 0.594352 seconds and 4 git commands to generate.