]> Git Repo - linux.git/commitdiff
Merge branch 'akpm' (patches from Andrew)
authorLinus Torvalds <[email protected]>
Sat, 15 Jan 2022 18:37:06 +0000 (20:37 +0200)
committerLinus Torvalds <[email protected]>
Sat, 15 Jan 2022 18:37:06 +0000 (20:37 +0200)
Merge misc updates from Andrew Morton:
 "146 patches.

  Subsystems affected by this patch series: kthread, ia64, scripts,
  ntfs, squashfs, ocfs2, vfs, and mm (slab-generic, slab, kmemleak,
  dax, kasan, debug, pagecache, gup, shmem, frontswap, memremap,
  memcg, selftests, pagemap, dma, vmalloc, memory-failure, hugetlb,
  userfaultfd, vmscan, mempolicy, oom-kill, hugetlbfs, migration, thp,
  ksm, page-poison, percpu, rmap, zswap, zram, cleanups, hmm, and
  damon)"

* emailed patches from Andrew Morton <[email protected]>: (146 commits)
  mm/damon: hide kernel pointer from tracepoint event
  mm/damon/vaddr: hide kernel pointer from damon_va_three_regions() failure log
  mm/damon/vaddr: use pr_debug() for damon_va_three_regions() failure logging
  mm/damon/dbgfs: remove an unnecessary variable
  mm/damon: move the implementation of damon_insert_region to damon.h
  mm/damon: add access checking for hugetlb pages
  Docs/admin-guide/mm/damon/usage: update for schemes statistics
  mm/damon/dbgfs: support all DAMOS stats
  Docs/admin-guide/mm/damon/reclaim: document statistics parameters
  mm/damon/reclaim: provide reclamation statistics
  mm/damon/schemes: account how many times quota limit has exceeded
  mm/damon/schemes: account scheme actions that successfully applied
  mm/damon: remove a mistakenly added comment for a future feature
  Docs/admin-guide/mm/damon/usage: update for kdamond_pid and (mk|rm)_contexts
  Docs/admin-guide/mm/damon/usage: mention tracepoint at the beginning
  Docs/admin-guide/mm/damon/usage: remove redundant information
  Docs/admin-guide/mm/damon/usage: update for scheme quotas and watermarks
  mm/damon: convert macro functions to static inline functions
  mm/damon: modify damon_rand() macro to static inline function
  mm/damon: move damon_rand() definition into damon.h
  ...

42 files changed:
1  2 
MAINTAINERS
arch/Kconfig
arch/arm/mm/fault.c
arch/arm64/mm/fault.c
arch/parisc/mm/fault.c
arch/powerpc/mm/fault.c
arch/s390/mm/fault.c
arch/um/kernel/trap.c
arch/x86/Kconfig
arch/x86/include/asm/pgtable.h
drivers/block/zram/zram_drv.c
drivers/dax/bus.c
drivers/dax/bus.h
drivers/dax/device.c
drivers/of/fdt.c
fs/ext4/extents.c
fs/xfs/xfs_buf.c
include/linux/fs.h
include/linux/kasan.h
include/linux/memcontrol.h
include/linux/memremap.h
include/linux/mm.h
include/linux/mm_types.h
include/linux/page-flags.h
kernel/fork.c
kernel/rcu/rcutorture.c
kernel/sysctl.c
mm/Makefile
mm/huge_memory.c
mm/internal.h
mm/kasan/quarantine.c
mm/khugepaged.c
mm/memcontrol.c
mm/memory-failure.c
mm/memory.c
mm/memremap.c
mm/migrate.c
mm/shmem.c
mm/slab.h
mm/slab_common.c
mm/swap.c
mm/truncate.c

diff --combined MAINTAINERS
index 5d0cd537803a1f56867f39d0909b3d591b91cc98,fbdb860c0b8b503d6649ee8e69d5ea69794cfe0f..4749663143834439887116a0564bd1427eb23bf3
@@@ -966,7 -966,6 +966,7 @@@ F: drivers/gpu/drm/amd/include/kgd_kfd_
  F:    drivers/gpu/drm/amd/include/v9_structs.h
  F:    drivers/gpu/drm/amd/include/vi_structs.h
  F:    include/uapi/linux/kfd_ioctl.h
 +F:    include/uapi/linux/kfd_sysfs.h
  
  AMD SPI DRIVER
  M:    Sanjay R Mehta <[email protected]>
@@@ -994,13 -993,6 +994,13 @@@ S:       Supporte
  T:    git https://gitlab.freedesktop.org/agd5f/linux.git
  F:    drivers/gpu/drm/amd/pm/
  
 +AMD PSTATE DRIVER
 +M:    Huang Rui <[email protected]>
 +L:    [email protected]
 +S:    Supported
 +F:    Documentation/admin-guide/pm/amd-pstate.rst
 +F:    drivers/cpufreq/amd-pstate*
 +
  AMD PTDMA DRIVER
  M:    Sanjay R Mehta <[email protected]>
  L:    [email protected]
@@@ -1077,15 -1069,6 +1077,15 @@@ W:    http://ez.analog.com/community/linux
  F:    Documentation/devicetree/bindings/iio/adc/adi,ad7780.yaml
  F:    drivers/iio/adc/ad7780.c
  
 +ANALOG DEVICES INC AD74413R DRIVER
 +M:    Cosmin Tanislav <[email protected]>
 +L:    [email protected]
 +S:    Supported
 +W:    http://ez.analog.com/community/linux-device-drivers
 +F:    Documentation/devicetree/bindings/iio/addac/adi,ad74413r.yaml
 +F:    drivers/iio/addac/ad74413r.c
 +F:    include/dt-bindings/iio/addac/adi,ad74413r.h
 +
  ANALOG DEVICES INC AD9389B DRIVER
  M:    Hans Verkuil <[email protected]>
  L:    [email protected]
@@@ -1156,7 -1139,6 +1156,7 @@@ ANALOG DEVICES INC ADV748X DRIVE
  M:    Kieran Bingham <[email protected]>
  L:    [email protected]
  S:    Maintained
 +F:    Documentation/devicetree/bindings/media/i2c/adv748x.yaml
  F:    drivers/media/i2c/adv748x/*
  
  ANALOG DEVICES INC ADV7511 DRIVER
@@@ -1763,21 -1745,17 +1763,21 @@@ B:   https://github.com/AsahiLinux/linux/
  C:    irc://irc.oftc.net/asahi-dev
  T:    git https://github.com/AsahiLinux/linux.git
  F:    Documentation/devicetree/bindings/arm/apple.yaml
 +F:    Documentation/devicetree/bindings/arm/apple/*
  F:    Documentation/devicetree/bindings/i2c/apple,i2c.yaml
  F:    Documentation/devicetree/bindings/interrupt-controller/apple,aic.yaml
  F:    Documentation/devicetree/bindings/mailbox/apple,mailbox.yaml
  F:    Documentation/devicetree/bindings/pci/apple,pcie.yaml
  F:    Documentation/devicetree/bindings/pinctrl/apple,pinctrl.yaml
 +F:    Documentation/devicetree/bindings/power/apple*
 +F:    Documentation/devicetree/bindings/watchdog/apple,wdt.yaml
  F:    arch/arm64/boot/dts/apple/
  F:    drivers/i2c/busses/i2c-pasemi-core.c
  F:    drivers/i2c/busses/i2c-pasemi-platform.c
  F:    drivers/irqchip/irq-apple-aic.c
  F:    drivers/mailbox/apple-mailbox.c
  F:    drivers/pinctrl/pinctrl-apple-gpio.c
 +F:    drivers/soc/apple/*
  F:    include/dt-bindings/interrupt-controller/apple-aic.h
  F:    include/dt-bindings/pinctrl/apple.h
  F:    include/linux/apple-mailbox.h
@@@ -1912,7 -1890,6 +1912,7 @@@ F:      Documentation/trace/coresight/
  F:    drivers/hwtracing/coresight/*
  F:    include/dt-bindings/arm/coresight-cti-dt.h
  F:    include/linux/coresight*
 +F:    samples/coresight/*
  F:    tools/perf/arch/arm/util/auxtrace.c
  F:    tools/perf/arch/arm/util/cs-etm.c
  F:    tools/perf/arch/arm/util/cs-etm.h
@@@ -2314,7 -2291,6 +2314,7 @@@ F:      Documentation/devicetree/bindings/gp
  F:    arch/arm/boot/dts/mstar-*
  F:    arch/arm/mach-mstar/
  F:    drivers/clk/mstar/
 +F:    drivers/clocksource/timer-msc313e.c
  F:    drivers/gpio/gpio-msc313.c
  F:    drivers/rtc/rtc-msc313.c
  F:    drivers/watchdog/msc313e_wdt.c
@@@ -2575,7 -2551,6 +2575,7 @@@ Q:      https://patchwork.kernel.org/project
  F:    Documentation/arm/samsung/
  F:    Documentation/devicetree/bindings/arm/samsung/
  F:    Documentation/devicetree/bindings/power/pd-samsung.yaml
 +F:    Documentation/devicetree/bindings/soc/samsung/
  F:    arch/arm/boot/dts/exynos*
  F:    arch/arm/boot/dts/s3c*
  F:    arch/arm/boot/dts/s5p*
@@@ -2602,7 -2577,7 +2602,7 @@@ N:      s3c64x
  N:    s5pv210
  
  ARM/SAMSUNG S5P SERIES 2D GRAPHICS ACCELERATION (G2D) SUPPORT
 -M:    Andrzej Hajda <a.hajda@samsung.com>
 +M:    Ćukasz Stelmach <l.stelmach@samsung.com>
  L:    [email protected] (moderated for non-subscribers)
  L:    [email protected]
  S:    Maintained
@@@ -2626,8 -2601,7 +2626,8 @@@ S:      Maintaine
  F:    drivers/media/platform/s5p-jpeg/
  
  ARM/SAMSUNG S5P SERIES Multi Format Codec (MFC) SUPPORT
 -M:    Andrzej Hajda <[email protected]>
 +M:    Marek Szyprowski <[email protected]>
 +M:    Andrzej Hajda <[email protected]>
  L:    [email protected] (moderated for non-subscribers)
  L:    [email protected]
  S:    Maintained
  S:    Supported
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/iwamatsu/linux-visconti.git
  F:    Documentation/devicetree/bindings/arm/toshiba.yaml
 +F:    Documentation/devicetree/bindings/clock/toshiba,tmpv770x-pipllct.yaml
 +F:    Documentation/devicetree/bindings/clock/toshiba,tmpv770x-pismu.yaml
  F:    Documentation/devicetree/bindings/net/toshiba,visconti-dwmac.yaml
  F:    Documentation/devicetree/bindings/gpio/toshiba,gpio-visconti.yaml
  F:    Documentation/devicetree/bindings/pci/toshiba,visconti-pcie.yaml
  F:    Documentation/devicetree/bindings/pinctrl/toshiba,visconti-pinctrl.yaml
  F:    Documentation/devicetree/bindings/watchdog/toshiba,visconti-wdt.yaml
  F:    arch/arm64/boot/dts/toshiba/
 +F:    drivers/clk/visconti/
  F:    drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c
  F:    drivers/gpio/gpio-visconti.c
  F:    drivers/pci/controller/dwc/pcie-visconti.c
@@@ -3027,27 -2998,6 +3027,27 @@@ W:    http://acpi4asus.sf.ne
  F:    drivers/platform/x86/asus*.c
  F:    drivers/platform/x86/eeepc*.c
  
 +ASUS TF103C DOCK DRIVER
 +M:    Hans de Goede <[email protected]>
 +L:    [email protected]
 +S:    Maintained
 +T:    git git://git.kernel.org/pub/scm/linux/kernel/git/pdx86/platform-drivers-x86.git
 +F:    drivers/platform/x86/asus-tf103c-dock.c
 +
 +ASUS WMI HARDWARE MONITOR DRIVER
 +M:    Ed Brindley <[email protected]>
 +M:    Denis Pauk <[email protected]>
 +L:    [email protected]
 +S:    Maintained
 +F:    drivers/hwmon/asus_wmi_sensors.c
 +
 +ASUS WMI EC HARDWARE MONITOR DRIVER
 +M:    Eugene Shalygin <[email protected]>
 +M:    Denis Pauk <[email protected]>
 +L:    [email protected]
 +S:    Maintained
 +F:    drivers/hwmon/asus_wmi_ec_sensors.c
 +
  ASUS WIRELESS RADIO CONTROL DRIVER
  M:    JoĂŁo Paulo Rechi Vita <[email protected]>
  L:    [email protected]
@@@ -3430,8 -3380,6 +3430,8 @@@ M:      Jens Axboe <[email protected]
  L:    [email protected]
  S:    Maintained
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git
 +F:    Documentation/ABI/stable/sysfs-block
 +F:    Documentation/block/
  F:    block/
  F:    drivers/block/
  F:    include/linux/blk*
@@@ -3621,7 -3569,7 +3621,7 @@@ R:      Florent Revest <[email protected]
  R:    Brendan Jackman <[email protected]>
  L:    [email protected]
  S:    Maintained
 -F:    Documentation/bpf/bpf_lsm.rst
 +F:    Documentation/bpf/prog_lsm.rst
  F:    include/linux/bpf_lsm.h
  F:    kernel/bpf/bpf_lsm.c
  F:    security/bpf/
@@@ -3688,7 -3636,6 +3688,7 @@@ F:      drivers/net/ethernet/broadcom/bcm490
  F:    drivers/net/ethernet/broadcom/unimac.h
  
  BROADCOM BCM5301X ARM ARCHITECTURE
 +M:    Florian Fainelli <[email protected]>
  M:    Hauke Mehrtens <[email protected]>
  M:    RafaƂ MiƂecki <[email protected]>
  M:    [email protected]
@@@ -3700,7 -3647,6 +3700,7 @@@ F:      arch/arm/boot/dts/bcm953012
  F:    arch/arm/mach-bcm/bcm_5301x.c
  
  BROADCOM BCM53573 ARM ARCHITECTURE
 +M:    Florian Fainelli <[email protected]>
  M:    RafaƂ MiƂecki <[email protected]>
  L:    [email protected]
  L:    [email protected] (moderated for non-subscribers)
@@@ -3744,7 -3690,7 +3744,7 @@@ M:      Al Cooper <[email protected]
  L:    [email protected]
  L:    [email protected]
  S:    Maintained
 -F:    Documentation/devicetree/bindings/usb/brcm,bdc.txt
 +F:    Documentation/devicetree/bindings/usb/brcm,bdc.yaml
  F:    drivers/usb/gadget/udc/bdc/
  
  BROADCOM BMIPS CPUFREQ DRIVER
@@@ -3827,7 -3773,7 +3827,7 @@@ M:      Doug Berger <[email protected]
  M:    Florian Fainelli <[email protected]>
  L:    [email protected]
  S:    Supported
 -F:    Documentation/devicetree/bindings/gpio/brcm,brcmstb-gpio.txt
 +F:    Documentation/devicetree/bindings/gpio/brcm,brcmstb-gpio.yaml
  F:    drivers/gpio/gpio-brcmstb.c
  
  BROADCOM BRCMSTB I2C DRIVER
@@@ -3885,7 -3831,7 +3885,7 @@@ M:      Florian Fainelli <[email protected]
  L:    [email protected]
  L:    [email protected]
  S:    Supported
 -F:    Documentation/devicetree/bindings/net/brcm,bcmgenet.txt
 +F:    Documentation/devicetree/bindings/net/brcm,bcmgenet.yaml
  F:    Documentation/devicetree/bindings/net/brcm,unimac-mdio.yaml
  F:    drivers/net/ethernet/broadcom/genet/
  F:    drivers/net/ethernet/broadcom/unimac.h
@@@ -3927,7 -3873,7 +3927,7 @@@ M:      RafaƂ MiƂecki <[email protected]
  M:    [email protected]
  L:    [email protected]
  S:    Maintained
 -F:    Documentation/devicetree/bindings/net/brcm,amac.txt
 +F:    Documentation/devicetree/bindings/net/brcm,amac.yaml
  F:    drivers/net/ethernet/broadcom/bgmac*
  F:    drivers/net/ethernet/broadcom/unimac.h
  
@@@ -4002,7 -3948,7 +4002,7 @@@ M:      Markus Mayer <[email protected]
  M:    [email protected]
  L:    [email protected]
  S:    Maintained
 -F:    Documentation/devicetree/bindings/thermal/brcm,avs-tmon.txt
 +F:    Documentation/devicetree/bindings/thermal/brcm,avs-tmon.yaml
  F:    drivers/thermal/broadcom/brcmstb*
  
  BROADCOM STB DPFE DRIVER
@@@ -4038,7 -3984,6 +4038,7 @@@ L:      [email protected]
  S:    Supported
  F:    drivers/net/ethernet/broadcom/bcmsysport.*
  F:    drivers/net/ethernet/broadcom/unimac.h
 +F:    Documentation/devicetree/bindings/net/brcm,systemport.yaml
  
  BROADCOM TG3 GIGABIT ETHERNET DRIVER
  M:    Siva Reddy Kallam <[email protected]>
@@@ -4578,12 -4523,9 +4578,12 @@@ F:    drivers/media/cec/i2c/ch7322.
  CIRRUS LOGIC AUDIO CODEC DRIVERS
  M:    James Schulman <[email protected]>
  M:    David Rhodes <[email protected]>
 +M:    Lucas Tanure <[email protected]>
  L:    [email protected] (moderated for non-subscribers)
  L:    [email protected]
  S:    Maintained
 +F:    Documentation/devicetree/bindings/sound/cirrus,cs*
 +F:    sound/pci/hda/cs*
  F:    sound/soc/codecs/cs*
  
  CIRRUS LOGIC DSP FIRMWARE DRIVER
@@@ -4783,8 -4725,6 +4783,8 @@@ M:      Ian Abbott <[email protected]
  M:    H Hartley Sweeten <[email protected]>
  S:    Odd Fixes
  F:    drivers/comedi/
 +F:    include/linux/comedi/
 +F:    include/uapi/linux/comedi.h
  
  COMMON CLK FRAMEWORK
  M:    Michael Turquette <[email protected]>
@@@ -5483,12 -5423,6 +5483,12 @@@ W:    https://linuxtv.or
  T:    git git://linuxtv.org/media_tree.git
  F:    drivers/media/platform/sti/delta
  
 +DELTA AHE-50DC FAN CONTROL MODULE DRIVER
 +M:    Zev Weiss <[email protected]>
 +L:    [email protected]
 +S:    Maintained
 +F:    drivers/hwmon/pmbus/delta-ahe50dc-fan.c
 +
  DELTA DPS920AB PSU DRIVER
  M:    Robert Marko <[email protected]>
  L:    [email protected]
@@@ -6116,7 -6050,6 +6116,7 @@@ F:      drivers/gpu/drm/tiny/mi0283qt.
  DRM DRIVER FOR MSM ADRENO GPU
  M:    Rob Clark <[email protected]>
  M:    Sean Paul <[email protected]>
 +R:    Abhinav Kumar <[email protected]>
  L:    [email protected]
  L:    [email protected]
  L:    [email protected]
@@@ -6142,17 -6075,10 +6142,17 @@@ F:   drivers/gpu/drm/panel/panel-novatek-
  
  DRM DRIVER FOR NVIDIA GEFORCE/QUADRO GPUS
  M:    Ben Skeggs <[email protected]>
 +M:    Karol Herbst <[email protected]>
 +M:    Lyude Paul <[email protected]>
  L:    [email protected]
  L:    [email protected]
  S:    Supported
 -T:    git git://github.com/skeggsb/linux
 +W:    https://nouveau.freedesktop.org/
 +Q:    https://patchwork.freedesktop.org/project/nouveau/
 +Q:    https://gitlab.freedesktop.org/drm/nouveau/-/merge_requests
 +B:    https://gitlab.freedesktop.org/drm/nouveau/-/issues
 +C:    irc://irc.oftc.net/nouveau
 +T:    git https://gitlab.freedesktop.org/drm/nouveau.git
  F:    drivers/gpu/drm/nouveau/
  F:    include/uapi/drm/nouveau_drm.h
  
@@@ -6385,7 -6311,7 +6385,7 @@@ F:      Documentation/devicetree/bindings/di
  F:    drivers/gpu/drm/atmel-hlcdc/
  
  DRM DRIVERS FOR BRIDGE CHIPS
 -M:    Andrzej Hajda <a.hajda@samsung.com>
 +M:    Andrzej Hajda <andrzej.hajda@intel.com>
  M:    Neil Armstrong <[email protected]>
  M:    Robert Foss <[email protected]>
  R:    Laurent Pinchart <[email protected]>
@@@ -6492,7 -6418,6 +6492,7 @@@ L:      [email protected]
  L:    [email protected]
  S:    Supported
  T:    git git://linuxtv.org/pinchartl/media drm/du/next
 +F:    Documentation/devicetree/bindings/display/bridge/renesas,dsi-csi2-tx.yaml
  F:    Documentation/devicetree/bindings/display/bridge/renesas,dw-hdmi.yaml
  F:    Documentation/devicetree/bindings/display/bridge/renesas,lvds.yaml
  F:    Documentation/devicetree/bindings/display/renesas,du.yaml
@@@ -6611,14 -6536,6 +6611,14 @@@ F:    drivers/gpu/drm/drm_panel.
  F:    drivers/gpu/drm/panel/
  F:    include/drm/drm_panel.h
  
 +DRM PRIVACY-SCREEN CLASS
 +M:    Hans de Goede <[email protected]>
 +L:    [email protected]
 +S:    Maintained
 +T:    git git://anongit.freedesktop.org/drm/drm-misc
 +F:    drivers/gpu/drm/drm_privacy_screen*
 +F:    include/drm/drm_privacy_screen*
 +
  DRM TTM SUBSYSTEM
  M:    Christian Koenig <[email protected]>
  M:    Huang Rui <[email protected]>
@@@ -7096,7 -7013,9 +7096,7 @@@ S:      Maintaine
  F:    drivers/mmc/host/cqhci*
  
  EMULEX 10Gbps iSCSI - OneConnect DRIVER
 -M:    Subbu Seetharaman <[email protected]>
  M:    Ketan Mukadam <[email protected]>
 -M:    Jitendra Bhivare <[email protected]>
  L:    [email protected]
  S:    Supported
  W:    http://www.broadcom.com
@@@ -7507,6 -7426,12 +7507,6 @@@ F:     Documentation/firmware_class
  F:    drivers/base/firmware_loader/
  F:    include/linux/firmware.h
  
 -FLASH ADAPTER DRIVER (IBM Flash Adapter 900GB Full Height PCI Flash Card)
 -M:    Joshua Morris <[email protected]>
 -M:    Philip Kelleher <[email protected]>
 -S:    Maintained
 -F:    drivers/block/rsxx/
 -
  FLEXTIMER FTM-QUADDEC DRIVER
  M:    Patrick Havelange <[email protected]>
  L:    [email protected]
@@@ -7598,7 -7523,6 +7598,7 @@@ F:      include/video
  FREESCALE CAAM (Cryptographic Acceleration and Assurance Module) DRIVER
  M:    Horia Geantă <[email protected]>
  M:    Pankaj Gupta <[email protected]>
 +M:    Gaurav Jain <[email protected]>
  L:    [email protected]
  S:    Maintained
  F:    Documentation/devicetree/bindings/crypto/fsl-sec4.txt
@@@ -8563,12 -8487,6 +8563,12 @@@ F:    drivers/hid
  F:    include/linux/hid*
  F:    include/uapi/linux/hid*
  
 +HID LOGITECH DRIVERS
 +R:    Filipe LaĂ­ns <[email protected]>
 +L:    [email protected]
 +S:    Maintained
 +F:    drivers/hid/hid-logitech-*
 +
  HID PLAYSTATION DRIVER
  M:    Roderick Colenbrander <[email protected]>
  L:    [email protected]
@@@ -8690,10 -8608,8 +8690,10 @@@ F:    drivers/misc/hisi_hikey_usb.
  
  HISILICON PMU DRIVER
  M:    Shaokun Zhang <[email protected]>
 +M:    Qi Liu <[email protected]>
  S:    Supported
  W:    http://www.hisilicon.com
 +F:    Documentation/admin-guide/perf/hisi-pcie-pmu.rst
  F:    Documentation/admin-guide/perf/hisi-pmu.rst
  F:    drivers/perf/hisilicon
  
@@@ -8724,7 -8640,6 +8724,7 @@@ F:      drivers/scsi/hisi_sas
  
  HISILICON SECURITY ENGINE V2 DRIVER (SEC2)
  M:    Zaibo Xu <[email protected]>
 +M:    Kai Ye <[email protected]>
  L:    [email protected]
  S:    Maintained
  F:    Documentation/ABI/testing/debugfs-hisi-sec
@@@ -9575,7 -9490,6 +9575,7 @@@ INTEL DRM DRIVERS (excluding Poulsbo, M
  M:    Jani Nikula <[email protected]>
  M:    Joonas Lahtinen <[email protected]>
  M:    Rodrigo Vivi <[email protected]>
 +M:    Tvrtko Ursulin <[email protected]>
  L:    [email protected]
  S:    Supported
  W:    https://01.org/linuxgraphics/
@@@ -9770,6 -9684,7 +9770,6 @@@ F:      Documentation/devicetree/bindings/cr
  F:    drivers/crypto/keembay/Kconfig
  F:    drivers/crypto/keembay/Makefile
  F:    drivers/crypto/keembay/keembay-ocs-ecc.c
 -F:    drivers/crypto/keembay/ocs-ecc-curve-defs.h
  
  INTEL KEEM BAY OCS HCU CRYPTO DRIVER
  M:    Daniele Alessandrelli <[email protected]>
@@@ -9782,13 -9697,6 +9782,13 @@@ F:    drivers/crypto/keembay/keembay-ocs-h
  F:    drivers/crypto/keembay/ocs-hcu.c
  F:    drivers/crypto/keembay/ocs-hcu.h
  
 +INTEL THUNDER BAY EMMC PHY DRIVER
 +M:    Nandhini Srikandan <[email protected]>
 +M:    Rashmi A <[email protected]>
 +S:    Maintained
 +F:    Documentation/devicetree/bindings/phy/intel,phy-thunderbay-emmc.yaml
 +F:    drivers/phy/intel/phy-intel-thunderbay-emmc.c
 +
  INTEL MANAGEMENT ENGINE (mei)
  M:    Tomas Winkler <[email protected]>
  L:    [email protected]
@@@ -9844,9 -9752,10 +9844,9 @@@ S:     Maintaine
  F:    drivers/mfd/intel_soc_pmic*
  F:    include/linux/mfd/intel_soc_pmic*
  
 -INTEL PMT DRIVER
 -M:    "David E. Box" <[email protected]>
 -S:    Maintained
 -F:    drivers/mfd/intel_pmt.c
 +INTEL PMT DRIVERS
 +M:    David E. Box <[email protected]>
 +S:    Supported
  F:    drivers/platform/x86/intel/pmt/
  
  INTEL PRO/WIRELESS 2100, 2200BG, 2915ABG NETWORK CONNECTION SUPPORT
  S:    Maintained
  F:    drivers/platform/x86/intel/uncore-frequency.c
  
 +INTEL VENDOR SPECIFIC EXTENDED CAPABILITIES DRIVER
 +M:    David E. Box <[email protected]>
 +S:    Supported
 +F:    drivers/platform/x86/intel/vsec.*
 +
  INTEL VIRTUAL BUTTON DRIVER
  M:    AceLan Kao <[email protected]>
  L:    [email protected]
@@@ -10842,13 -10746,6 +10842,13 @@@ S: Maintaine
  W:    http://legousb.sourceforge.net/
  F:    drivers/usb/misc/legousbtower.c
  
 +LETSKETCH HID TABLET DRIVER
 +M:    Hans de Goede <[email protected]>
 +L:    [email protected]
 +S:    Maintained
 +T:    git git://git.kernel.org/pub/scm/linux/kernel/git/hid/hid.git
 +F:    drivers/hid/hid-letsketch.c
 +
  LG LAPTOP EXTRAS
  M:    Matan Ziv-Av <[email protected]>
  L:    [email protected]
@@@ -11637,12 -11534,6 +11637,12 @@@ S: Maintaine
  F:    Documentation/devicetree/bindings/media/i2c/maxim,max9286.yaml
  F:    drivers/media/i2c/max9286.c
  
 +MAX96712 QUAD GMSL2 DESERIALIZER DRIVER
 +M:    Niklas Söderlund <[email protected]>
 +L:    [email protected]
 +S:    Maintained
 +F:    drivers/staging/media/max96712/max96712.c
 +
  MAX9860 MONO AUDIO VOICE CODEC DRIVER
  M:    Peter Rosin <[email protected]>
  L:    [email protected] (moderated for non-subscribers)
@@@ -11678,13 -11569,6 +11678,13 @@@ S: Maintaine
  F:    Documentation/devicetree/bindings/power/supply/maxim,max17042.yaml
  F:    drivers/power/supply/max17042_battery.c
  
 +MAXIM MAX20086 CAMERA POWER PROTECTOR DRIVER
 +M:    Laurent Pinchart <[email protected]>
 +L:    [email protected]
 +S:    Maintained
 +F:    Documentation/devicetree/bindings/regulator/maxim,max20086.yaml
 +F:    drivers/regulator/max20086-regulator.c
 +
  MAXIM MAX77650 PMIC MFD DRIVER
  M:    Bartosz Golaszewski <[email protected]>
  L:    [email protected]
@@@ -11707,12 -11591,6 +11707,12 @@@ F: Documentation/devicetree/bindings/*/
  F:    drivers/regulator/max77802-regulator.c
  F:    include/dt-bindings/*/*max77802.h
  
 +MAXIM MAX77976 BATTERY CHARGER
 +M:    Luca Ceresoli <[email protected]>
 +S:    Supported
 +F:    Documentation/devicetree/bindings/power/supply/maxim,max77976.yaml
 +F:    drivers/power/supply/max77976_charger.c
 +
  MAXIM MUIC CHARGER DRIVERS FOR EXYNOS BASED BOARDS
  M:    Krzysztof Kozlowski <[email protected]>
  M:    Bartlomiej Zolnierkiewicz <[email protected]>
@@@ -11727,7 -11605,7 +11727,7 @@@ M:   Krzysztof Kozlowski <krzysztof.kozlo
  M:    Bartlomiej Zolnierkiewicz <[email protected]>
  L:    [email protected]
  S:    Supported
 -F:    Documentation/devicetree/bindings/*/max77686.txt
 +F:    Documentation/devicetree/bindings/*/maxim,max77686.yaml
  F:    Documentation/devicetree/bindings/clock/maxim,max77686.txt
  F:    Documentation/devicetree/bindings/mfd/max14577.txt
  F:    Documentation/devicetree/bindings/mfd/max77693.txt
  S:    Maintained
  F:    drivers/net/ethernet/microchip/lan743x_*
  
 +MICROCHIP LAN966X ETHERNET DRIVER
 +M:    Horatiu Vultur <[email protected]>
 +M:    [email protected]
 +L:    [email protected]
 +S:    Maintained
 +F:    drivers/net/ethernet/microchip/lan966x/*
 +
  MICROCHIP LCDFB DRIVER
  M:    Nicolas Ferre <[email protected]>
  L:    [email protected]
@@@ -13836,24 -13707,12 +13836,24 @@@ F:        Documentation/devicetree/bindings/di
  F:    drivers/gpu/drm/imx/dcss/
  
  NXP i.MX 8QXP ADC DRIVER
 -M:    Cai Huoqing <[email protected]>
 +M:    Cai Huoqing <[email protected]>
 +M:    Haibo Chen <[email protected]>
 +L:    [email protected]
  L:    [email protected]
 -S:    Supported
 +S:    Maintained
  F:    Documentation/devicetree/bindings/iio/adc/nxp,imx8qxp-adc.yaml
  F:    drivers/iio/adc/imx8qxp-adc.c
  
 +NXP i.MX 7D/6SX/6UL AND VF610 ADC DRIVER
 +M:    Haibo Chen <[email protected]>
 +L:    [email protected]
 +L:    [email protected]
 +S:    Maintained
 +F:    Documentation/devicetree/bindings/iio/adc/fsl,imx7d-adc.yaml
 +F:    Documentation/devicetree/bindings/iio/adc/fsl,vf610-adc.yaml
 +F:    drivers/iio/adc/imx7d_adc.c
 +F:    drivers/iio/adc/vf610_adc.c
 +
  NXP PF8100/PF8121A/PF8200 PMIC REGULATOR DEVICE DRIVER
  M:    Jagan Teki <[email protected]>
  S:    Maintained
@@@ -13927,13 -13786,6 +13927,13 @@@ S: Maintaine
  F:    Documentation/hwmon/nzxt-kraken2.rst
  F:    drivers/hwmon/nzxt-kraken2.c
  
 +NZXT-SMART2 HARDWARE MONITORING DRIVER
 +M:    Aleksandr Mezin <[email protected]>
 +L:    [email protected]
 +S:    Maintained
 +F:    Documentation/hwmon/nzxt-smart2.rst
 +F:    drivers/hwmon/nzxt-smart2.c
 +
  OBJAGG
  M:    Jiri Pirko <[email protected]>
  L:    [email protected]
@@@ -14246,6 -14098,7 +14246,6 @@@ F:   drivers/media/i2c/ov5647.
  
  OMNIVISION OV5670 SENSOR DRIVER
  M:    Chiranjeevi Rapolu <[email protected]>
 -M:    Hyungwoo Yang <[email protected]>
  L:    [email protected]
  S:    Maintained
  T:    git git://linuxtv.org/media_tree.git
@@@ -14258,13 -14111,6 +14258,13 @@@ S: Maintaine
  T:    git git://linuxtv.org/media_tree.git
  F:    drivers/media/i2c/ov5675.c
  
 +OMNIVISION OV5693 SENSOR DRIVER
 +M:    Daniel Scally <[email protected]>
 +L:    [email protected]
 +S:    Maintained
 +T:    git git://linuxtv.org/media_tree.git
 +F:    drivers/media/i2c/ov5693.c
 +
  OMNIVISION OV5695 SENSOR DRIVER
  M:    Shunqian Zheng <[email protected]>
  L:    [email protected]
@@@ -14541,6 -14387,15 +14541,15 @@@ F: include/net/page_pool.
  F:    include/trace/events/page_pool.h
  F:    net/core/page_pool.c
  
+ PAGE TABLE CHECK
+ M:    Pasha Tatashin <[email protected]>
+ M:    Andrew Morton <[email protected]>
+ L:    [email protected]
+ S:    Maintained
+ F:    Documentation/vm/page_table_check.rst
+ F:    include/linux/page_table_check.h
+ F:    mm/page_table_check.c
  PANASONIC LAPTOP ACPI EXTRAS DRIVER
  M:    Kenneth Chan <[email protected]>
  L:    [email protected]
@@@ -15051,7 -14906,7 +15060,7 @@@ F:   drivers/pci/controller/dwc/*spear
  PCMCIA SUBSYSTEM
  M:    Dominik Brodowski <[email protected]>
  S:    Odd Fixes
 -T:    git git://git.kernel.org/pub/scm/linux/kernel/git/brodo/pcmcia.git
 +T:    git git://git.kernel.org/pub/scm/linux/kernel/git/brodo/linux.git
  F:    Documentation/pcmcia/
  F:    drivers/pcmcia/
  F:    include/pcmcia/
  S:    Maintained
  F:    drivers/pinctrl/pinctrl-single.c
  
 +PIN CONTROLLER - THUNDERBAY
 +M:    Lakshmi Sowjanya D <[email protected]>
 +S:    Supported
 +F:    drivers/pinctrl/pinctrl-thunderbay.c
 +
  PKTCDVD DRIVER
  M:    [email protected]
  S:    Orphan
@@@ -15499,7 -15349,6 +15508,7 @@@ M:   Sergey Senozhatsky <senozhatsky@chro
  R:    Steven Rostedt <[email protected]>
  R:    John Ogness <[email protected]>
  S:    Maintained
 +T:    git git://git.kernel.org/pub/scm/linux/kernel/git/printk/linux.git
  F:    include/linux/printk.h
  F:    kernel/printk/
  
@@@ -15887,14 -15736,6 +15896,14 @@@ W: https://wireless.wiki.kernel.org/en/
  F:    Documentation/devicetree/bindings/net/wireless/qca,ath9k.yaml
  F:    drivers/net/wireless/ath/ath9k/
  
 +QUALCOMM BAM-DMUX WWAN NETWORK DRIVER
 +M:    Stephan Gerhold <[email protected]>
 +L:    [email protected]
 +L:    [email protected]
 +S:    Maintained
 +F:    Documentation/devicetree/bindings/net/qcom,bam-dmux.yaml
 +F:    drivers/net/wwan/qcom_bam_dmux.c
 +
  QUALCOMM CAMERA SUBSYSTEM DRIVER
  M:    Robert Foss <[email protected]>
  M:    Todor Tomov <[email protected]>
@@@ -15904,15 -15745,6 +15913,15 @@@ F: Documentation/admin-guide/media/qcom
  F:    Documentation/devicetree/bindings/media/*camss*
  F:    drivers/media/platform/qcom/camss/
  
 +QUALCOMM CLOCK DRIVERS
 +M:    Bjorn Andersson <[email protected]>
 +L:    [email protected]
 +S:    Supported
 +T:    git git://git.kernel.org/pub/scm/linux/kernel/git/qcom/linux.git
 +F:    Documentation/devicetree/bindings/clock/qcom,*
 +F:    drivers/clk/qcom/
 +F:    include/dt-bindings/clock/qcom,*
 +
  QUALCOMM CORE POWER REDUCTION (CPR) AVS DRIVER
  M:    Niklas Cassel <[email protected]>
  L:    [email protected]
@@@ -16166,7 -15998,6 +16175,7 @@@ F:   arch/mips/generic/board-ranchu.
  RANDOM NUMBER DRIVER
  M:    "Theodore Ts'o" <[email protected]>
  M:    Jason A. Donenfeld <[email protected]>
 +T:    git https://git.kernel.org/pub/scm/linux/kernel/git/crng/random.git
  S:    Maintained
  F:    drivers/char/random.c
  
@@@ -16474,14 -16305,6 +16483,14 @@@ S: Supporte
  F:    Documentation/devicetree/bindings/iio/adc/renesas,rzg2l-adc.yaml
  F:    drivers/iio/adc/rzg2l_adc.c
  
 +RENESAS R-CAR GEN3 & RZ/N1 NAND CONTROLLER DRIVER
 +M:    Miquel Raynal <[email protected]>
 +L:    [email protected]
 +L:    [email protected]
 +S:    Maintained
 +F:    Documentation/devicetree/bindings/mtd/renesas-nandc.yaml
 +F:    drivers/mtd/nand/raw/renesas-nand-controller.c
 +
  RESET CONTROLLER FRAMEWORK
  M:    Philipp Zabel <[email protected]>
  S:    Maintained
@@@ -16652,19 -16475,27 +16661,19 @@@ ROHM POWER MANAGEMENT IC DEVICE DRIVER
  R:    Matti Vaittinen <[email protected]>
  L:    [email protected]
  S:    Supported
 -F:    Documentation/devicetree/bindings/mfd/rohm,bd70528-pmic.txt
 -F:    Documentation/devicetree/bindings/regulator/rohm,bd70528-regulator.txt
  F:    drivers/clk/clk-bd718x7.c
 -F:    drivers/gpio/gpio-bd70528.c
  F:    drivers/gpio/gpio-bd71815.c
  F:    drivers/gpio/gpio-bd71828.c
 -F:    drivers/mfd/rohm-bd70528.c
  F:    drivers/mfd/rohm-bd71828.c
  F:    drivers/mfd/rohm-bd718x7.c
  F:    drivers/mfd/rohm-bd9576.c
 -F:    drivers/power/supply/bd70528-charger.c
 -F:    drivers/regulator/bd70528-regulator.c
  F:    drivers/regulator/bd71815-regulator.c
  F:    drivers/regulator/bd71828-regulator.c
  F:    drivers/regulator/bd718x7-regulator.c
  F:    drivers/regulator/bd9576-regulator.c
  F:    drivers/regulator/rohm-regulator.c
  F:    drivers/rtc/rtc-bd70528.c
 -F:    drivers/watchdog/bd70528_wdt.c
  F:    drivers/watchdog/bd9576_wdt.c
 -F:    include/linux/mfd/rohm-bd70528.h
  F:    include/linux/mfd/rohm-bd71815.h
  F:    include/linux/mfd/rohm-bd71828.h
  F:    include/linux/mfd/rohm-bd718x7.h
@@@ -17015,15 -16846,13 +17024,15 @@@ F:        Documentation/devicetree/bindings/ne
  F:    drivers/nfc/s3fwrn5
  
  SAMSUNG S5C73M3 CAMERA DRIVER
 -M:    Andrzej Hajda <[email protected]>
 +M:    Sylwester Nawrocki <[email protected]>
 +M:    Andrzej Hajda <[email protected]>
  L:    [email protected]
  S:    Supported
  F:    drivers/media/i2c/s5c73m3/*
  
  SAMSUNG S5K5BAF CAMERA DRIVER
 -M:    Andrzej Hajda <[email protected]>
 +M:    Sylwester Nawrocki <[email protected]>
 +M:    Andrzej Hajda <[email protected]>
  L:    [email protected]
  S:    Supported
  F:    drivers/media/i2c/s5k5baf.c
@@@ -17052,8 -16881,10 +17061,8 @@@ M:  Chanwoo Choi <[email protected]
  L:    [email protected]
  S:    Supported
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/snawrocki/clk.git
 -F:    Documentation/devicetree/bindings/clock/exynos*.txt
  F:    Documentation/devicetree/bindings/clock/samsung,*.yaml
  F:    Documentation/devicetree/bindings/clock/samsung,s3c*
 -F:    Documentation/devicetree/bindings/clock/samsung,s5p*
  F:    drivers/clk/samsung/
  F:    include/dt-bindings/clock/exynos*.h
  F:    include/dt-bindings/clock/s3c*.h
  S:    Maintained
  F:    drivers/mmc/host/sdhci-omap.c
  
 +SECURE DIGITAL HOST CONTROLLER INTERFACE (SDHCI) NXP i.MX DRIVER
 +M:    Haibo Chen <[email protected]>
 +L:    [email protected]
 +L:    [email protected]
 +S:    Maintained
 +F:    drivers/mmc/host/sdhci-esdhc-imx.c
 +
  SECURE ENCRYPTING DEVICE (SED) OPAL DRIVER
  M:    Jonathan Derrick <[email protected]>
  M:    Revanth Rajashekar <[email protected]>
@@@ -17852,17 -17676,12 +17861,17 @@@ F:        drivers/firmware/arm_sdei.
  F:    include/linux/arm_sdei.h
  F:    include/uapi/linux/arm_sdei.h
  
 -SOFTWARE NODES
 +SOFTWARE NODES AND DEVICE PROPERTIES
  R:    Andy Shevchenko <[email protected]>
 +R:    Daniel Scally <[email protected]>
  R:    Heikki Krogerus <[email protected]>
 +R:    Sakari Ailus <[email protected]>
  L:    [email protected]
  S:    Maintained
 +F:    drivers/base/property.c
  F:    drivers/base/swnode.c
 +F:    include/linux/fwnode.h
 +F:    include/linux/property.h
  
  SOFTWARE RAID (Multiple Disks) SUPPORT
  M:    Song Liu <[email protected]>
@@@ -18022,7 -17841,6 +18031,7 @@@ F:   Documentation/sound
  F:    include/sound/
  F:    include/uapi/sound/
  F:    sound/
 +F:    tools/testing/selftests/alsa
  
  SOUND - COMPRESSED AUDIO
  M:    Vinod Koul <[email protected]>
@@@ -18042,13 -17860,6 +18051,13 @@@ F: include/sound/dmaengine_pcm.
  F:    sound/core/pcm_dmaengine.c
  F:    sound/soc/soc-generic-dmaengine-pcm.c
  
 +SOUND - ALSA SELFTESTS
 +M:    Mark Brown <[email protected]>
 +L:    [email protected] (moderated for non-subscribers)
 +L:    [email protected]
 +S:    Supported
 +F:    tools/testing/selftests/alsa
 +
  SOUND - SOC LAYER / DYNAMIC AUDIO POWER MANAGEMENT (ASoC)
  M:    Liam Girdwood <[email protected]>
  M:    Mark Brown <[email protected]>
@@@ -18155,8 -17966,8 +18164,8 @@@ F:   drivers/pinctrl/spear
  
  SPI NOR SUBSYSTEM
  M:    Tudor Ambarus <[email protected]>
 +M:    Pratyush Yadav <[email protected]>
  R:    Michael Walle <[email protected]>
 -R:    Pratyush Yadav <[email protected]>
  L:    [email protected]
  S:    Maintained
  W:    http://www.linux-mtd.infradead.org/
@@@ -18355,28 -18166,6 +18364,28 @@@ M: Ion Badulescu <[email protected]
  S:    Odd Fixes
  F:    drivers/net/ethernet/adaptec/starfire*
  
 +STARFIVE JH7100 CLOCK DRIVER
 +M:    Emil Renner Berthing <[email protected]>
 +S:    Maintained
 +F:    Documentation/devicetree/bindings/clock/starfive,jh7100-clkgen.yaml
 +F:    drivers/clk/starfive/clk-starfive-jh7100.c
 +F:    include/dt-bindings/clock/starfive-jh7100.h
 +
 +STARFIVE JH7100 PINCTRL DRIVER
 +M:    Emil Renner Berthing <[email protected]>
 +L:    [email protected]
 +S:    Maintained
 +F:    Documentation/devicetree/bindings/pinctrl/starfive,jh7100-pinctrl.yaml
 +F:    drivers/pinctrl/pinctrl-starfive.c
 +F:    include/dt-bindings/pinctrl/pinctrl-starfive.h
 +
 +STARFIVE JH7100 RESET CONTROLLER DRIVER
 +M:    Emil Renner Berthing <[email protected]>
 +S:    Maintained
 +F:    Documentation/devicetree/bindings/reset/starfive,jh7100-reset.yaml
 +F:    drivers/reset/reset-starfive-jh7100.c
 +F:    include/dt-bindings/reset/starfive-jh7100.h
 +
  STATIC BRANCH/CALL
  M:    Peter Zijlstra <[email protected]>
  M:    Josh Poimboeuf <[email protected]>
@@@ -18538,7 -18327,6 +18547,7 @@@ M:   Vineet Gupta <[email protected]
  L:    [email protected]
  S:    Supported
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/vgupta/arc.git
 +F:    Documentation/arc/
  F:    Documentation/devicetree/bindings/arc/*
  F:    Documentation/devicetree/bindings/interrupt-controller/snps,arc*
  F:    arch/arc/
@@@ -19556,6 -19344,12 +19565,6 @@@ W:  https://github.com/srcres258/linux-d
  T:    git git://github.com/srcres258/linux-doc.git doc-zh-tw
  F:    Documentation/translations/zh_TW/
  
 -TRIVIAL PATCHES
 -M:    Jiri Kosina <[email protected]>
 -S:    Maintained
 -T:    git git://git.kernel.org/pub/scm/linux/kernel/git/jikos/trivial.git
 -K:    ^Subject:.*(?i)trivial
 -
  TTY LAYER
  M:    Greg Kroah-Hartman <[email protected]>
  M:    Jiri Slaby <[email protected]>
@@@ -19660,7 -19454,6 +19669,7 @@@ S:   Supporte
  W:    http://www.linux-mtd.infradead.org/doc/ubifs.html
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/rw/ubifs.git next
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/rw/ubifs.git fixes
 +F:    Documentation/ABI/testing/sysfs-fs-ubifs
  F:    Documentation/filesystems/ubifs-authentication.rst
  F:    Documentation/filesystems/ubifs.rst
  F:    fs/ubifs/
@@@ -20412,8 -20205,6 +20421,8 @@@ F:   include/uapi/linux/virtio_gpio.
  VIRTIO GPU DRIVER
  M:    David Airlie <[email protected]>
  M:    Gerd Hoffmann <[email protected]>
 +R:    Gurchetan Singh <[email protected]>
 +R:    Chia-I Wu <[email protected]>
  L:    [email protected]
  L:    [email protected]
  S:    Maintained
@@@ -20647,7 -20438,7 +20656,7 @@@ M:   Sergey Senozhatsky <senozhatsky@chro
  R:    Andy Shevchenko <[email protected]>
  R:    Rasmus Villemoes <[email protected]>
  S:    Maintained
 -T:    git git://git.kernel.org/pub/scm/linux/kernel/git/pmladek/printk.git
 +T:    git git://git.kernel.org/pub/scm/linux/kernel/git/printk/linux.git
  F:    Documentation/core-api/printk-formats.rst
  F:    lib/test_printf.c
  F:    lib/test_scanf.c
@@@ -20915,13 -20706,6 +20924,13 @@@ S: Maintaine
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86/mm
  F:    arch/x86/mm/
  
 +X86 PLATFORM ANDROID TABLETS DSDT FIXUP DRIVER
 +M:    Hans de Goede <[email protected]>
 +L:    [email protected]
 +S:    Maintained
 +T:    git git://git.kernel.org/pub/scm/linux/kernel/git/pdx86/platform-drivers-x86.git
 +F:    drivers/platform/x86/x86-android-tablets.c
 +
  X86 PLATFORM DRIVERS
  M:    Hans de Goede <[email protected]>
  M:    Mark Gross <[email protected]>
@@@ -21085,14 -20869,6 +21094,14 @@@ F: drivers/scsi/xen-scsifront.
  F:    drivers/xen/xen-scsiback.c
  F:    include/xen/interface/io/vscsiif.h
  
 +XEN PVUSB DRIVER
 +M:    Juergen Gross <[email protected]>
 +L:    [email protected] (moderated for non-subscribers)
 +L:    [email protected]
 +S:    Supported
 +F:    drivers/usb/host/xen*
 +F:    include/xen/interface/io/usbif.h
 +
  XEN SOUND FRONTEND DRIVER
  M:    Oleksandr Andrushchenko <[email protected]>
  L:    [email protected] (moderated for non-subscribers)
@@@ -21125,13 -20901,6 +21134,13 @@@ F: fs/xfs
  F:    include/uapi/linux/dqblk_xfs.h
  F:    include/uapi/linux/fsmap.h
  
 +XILINX AMS DRIVER
 +M:    Anand Ashok Dumbre <[email protected]>
 +L:    [email protected]
 +S:    Maintained
 +F:    Documentation/devicetree/bindings/iio/adc/xlnx,zynqmp-ams.yaml
 +F:    drivers/iio/adc/xilinx-ams.c
 +
  XILINX AXI ETHERNET DRIVER
  M:    Radhey Shyam Pandey <[email protected]>
  S:    Maintained
@@@ -21200,12 -20969,6 +21209,12 @@@ T: git https://github.com/Xilinx/linux-
  F:    Documentation/devicetree/bindings/phy/xlnx,zynqmp-psgtr.yaml
  F:    drivers/phy/xilinx/phy-zynqmp.c
  
 +XILINX EVENT MANAGEMENT DRIVER
 +M:    Abhyuday Godhasara <[email protected]>
 +S:    Maintained
 +F:    drivers/soc/xilinx/xlnx_event_manager.c
 +F:    include/linux/firmware/xlnx-event-manager.h
 +
  XILLYBUS DRIVER
  M:    Eli Billauer <[email protected]>
  L:    [email protected]
diff --combined arch/Kconfig
index 847fde3d22cdfd5ba38dd16b2541e5b1b6857770,4568b6b70b5db5bd8d79f6f631e333a39edc8b85..5a1692392a4de9ad9c4e2c5170ebb51c395f4456
@@@ -1297,6 -1297,9 +1297,9 @@@ config HAVE_ARCH_PFN_VALI
  config ARCH_SUPPORTS_DEBUG_PAGEALLOC
        bool
  
+ config ARCH_SUPPORTS_PAGE_TABLE_CHECK
+       bool
  config ARCH_SPLIT_ARG64
        bool
        help
@@@ -1312,10 -1315,6 +1315,10 @@@ config ARCH_HAS_PARANOID_L1D_FLUS
  config DYNAMIC_SIGFRAME
        bool
  
 +# Select, if arch has a named attribute group bound to NUMA device nodes.
 +config HAVE_ARCH_NODE_DEV_GROUP
 +      bool
 +
  source "kernel/gcov/Kconfig"
  
  source "scripts/gcc-plugins/Kconfig"
diff --combined arch/arm/mm/fault.c
index a1cebe363ed5067db04707814c8c8b237d73d3dd,c7326a521a6983a8daeaf047caf29a3c75e6914e..13949510772a861359e105a837f6c083e449269b
@@@ -17,7 -17,6 +17,7 @@@
  #include <linux/sched/debug.h>
  #include <linux/highmem.h>
  #include <linux/perf_event.h>
 +#include <linux/kfence.h>
  
  #include <asm/system_misc.h>
  #include <asm/system_info.h>
@@@ -100,11 -99,6 +100,11 @@@ void show_pte(const char *lvl, struct m
  { }
  #endif                                        /* CONFIG_MMU */
  
 +static inline bool is_write_fault(unsigned int fsr)
 +{
 +      return (fsr & FSR_WRITE) && !(fsr & FSR_CM);
 +}
 +
  static void die_kernel_fault(const char *msg, struct mm_struct *mm,
                             unsigned long addr, unsigned int fsr,
                             struct pt_regs *regs)
@@@ -137,14 -131,10 +137,14 @@@ __do_kernel_fault(struct mm_struct *mm
        /*
         * No handler, we'll have to terminate things with extreme prejudice.
         */
 -      if (addr < PAGE_SIZE)
 +      if (addr < PAGE_SIZE) {
                msg = "NULL pointer dereference";
 -      else
 +      } else {
 +              if (kfence_handle_page_fault(addr, is_write_fault(fsr), regs))
 +                      return;
 +
                msg = "paging request";
 +      }
  
        die_kernel_fault(msg, mm, addr, fsr, regs);
  }
@@@ -201,8 -191,8 +201,8 @@@ void do_bad_area(unsigned long addr, un
  }
  
  #ifdef CONFIG_MMU
 -#define VM_FAULT_BADMAP               0x010000
 -#define VM_FAULT_BADACCESS    0x020000
 +#define VM_FAULT_BADMAP               ((__force vm_fault_t)0x010000)
 +#define VM_FAULT_BADACCESS    ((__force vm_fault_t)0x020000)
  
  static inline bool is_permission_fault(unsigned int fsr)
  {
@@@ -271,7 -261,7 +271,7 @@@ do_page_fault(unsigned long addr, unsig
        if (user_mode(regs))
                flags |= FAULT_FLAG_USER;
  
 -      if ((fsr & FSR_WRITE) && !(fsr & FSR_CM)) {
 +      if (is_write_fault(fsr)) {
                flags |= FAULT_FLAG_WRITE;
                vm_flags = VM_WRITE;
        }
@@@ -322,7 -312,7 +322,7 @@@ retry
                return 0;
        }
  
-       if (!(fault & VM_FAULT_ERROR) && flags & FAULT_FLAG_ALLOW_RETRY) {
+       if (!(fault & VM_FAULT_ERROR)) {
                if (fault & VM_FAULT_RETRY) {
                        flags |= FAULT_FLAG_TRIED;
                        goto retry;
diff --combined arch/arm64/mm/fault.c
index 9a9e7675b18775f866ca1f9ad4bc1752a1769cee,a8fb54fccde05bbd18d42052703cf4b217402957..11e04cca0f4f4f6ffcefc1fc9b97e86d86cf70c6
@@@ -297,8 -297,6 +297,8 @@@ static void die_kernel_fault(const cha
        pr_alert("Unable to handle kernel %s at virtual address %016lx\n", msg,
                 addr);
  
 +      kasan_non_canonical_hook(addr);
 +
        mem_abort_decode(esr);
  
        show_pte(addr);
@@@ -608,10 -606,8 +608,8 @@@ retry
        }
  
        if (fault & VM_FAULT_RETRY) {
-               if (mm_flags & FAULT_FLAG_ALLOW_RETRY) {
-                       mm_flags |= FAULT_FLAG_TRIED;
-                       goto retry;
-               }
+               mm_flags |= FAULT_FLAG_TRIED;
+               goto retry;
        }
        mmap_read_unlock(mm);
  
@@@ -815,8 -811,11 +813,8 @@@ void do_mem_abort(unsigned long far, un
        if (!inf->fn(far, esr, regs))
                return;
  
 -      if (!user_mode(regs)) {
 -              pr_alert("Unhandled fault at 0x%016lx\n", addr);
 -              mem_abort_decode(esr);
 -              show_pte(addr);
 -      }
 +      if (!user_mode(regs))
 +              die_kernel_fault(inf->name, addr, esr, regs);
  
        /*
         * At this point we have an unrecognized fault type whose tag bits may
diff --combined arch/parisc/mm/fault.c
index 147868427b7cd14308a56a726294ed6fcaf22c95,360b627645cca6e2c837b76256d7a09f6e453004..e9eabf8f14d7e6f7ba2f98ac83558685909d84ee
@@@ -148,11 -148,11 +148,11 @@@ int fixup_exception(struct pt_regs *reg
                 * Fix up get_user() and put_user().
                 * ASM_EXCEPTIONTABLE_ENTRY_EFAULT() sets the least-significant
                 * bit in the relative address of the fixup routine to indicate
 -               * that %r8 should be loaded with -EFAULT to report a userspace
 -               * access error.
 +               * that gr[ASM_EXCEPTIONTABLE_REG] should be loaded with
 +               * -EFAULT to report a userspace access error.
                 */
                if (fix->fixup & 1) {
 -                      regs->gr[8] = -EFAULT;
 +                      regs->gr[ASM_EXCEPTIONTABLE_REG] = -EFAULT;
  
                        /* zero target register for get_user() */
                        if (parisc_acctyp(0, regs->iir) == VM_READ) {
@@@ -266,14 -266,14 +266,14 @@@ void do_page_fault(struct pt_regs *regs
        unsigned long acc_type;
        vm_fault_t fault = 0;
        unsigned int flags;
 -
 -      if (faulthandler_disabled())
 -              goto no_context;
 +      char *msg;
  
        tsk = current;
        mm = tsk->mm;
 -      if (!mm)
 +      if (!mm) {
 +              msg = "Page fault: no context";
                goto no_context;
 +      }
  
        flags = FAULT_FLAG_DEFAULT;
        if (user_mode(regs))
@@@ -324,16 -324,14 +324,14 @@@ good_area
                        goto bad_area;
                BUG();
        }
-       if (flags & FAULT_FLAG_ALLOW_RETRY) {
-               if (fault & VM_FAULT_RETRY) {
-                       /*
-                        * No need to mmap_read_unlock(mm) as we would
-                        * have already released it in __lock_page_or_retry
-                        * in mm/filemap.c.
-                        */
-                       flags |= FAULT_FLAG_TRIED;
-                       goto retry;
-               }
+       if (fault & VM_FAULT_RETRY) {
+               /*
+                * No need to mmap_read_unlock(mm) as we would
+                * have already released it in __lock_page_or_retry
+                * in mm/filemap.c.
+                */
+               flags |= FAULT_FLAG_TRIED;
+               goto retry;
        }
        mmap_read_unlock(mm);
        return;
@@@ -409,7 -407,6 +407,7 @@@ bad_area
                force_sig_fault(signo, si_code, (void __user *) address);
                return;
        }
 +      msg = "Page fault: bad address";
  
  no_context:
  
                return;
        }
  
 -      parisc_terminate("Bad Address (null pointer deref?)", regs, code, address);
 +      parisc_terminate(msg, regs, code, address);
  
 -  out_of_memory:
 +out_of_memory:
        mmap_read_unlock(mm);
 -      if (!user_mode(regs))
 +      if (!user_mode(regs)) {
 +              msg = "Page fault: out of memory";
                goto no_context;
 +      }
        pagefault_out_of_memory();
  }
diff --combined arch/powerpc/mm/fault.c
index 2d4a411c7c85efb184ae8a6d3b2d3ab8fcba2817,ebcc61e47d62ba6a8bbb0834d6aed662ac14f2dc..eb8ecd7343a99c3ef9a136bc2d8403b0185f98bd
@@@ -35,7 -35,6 +35,7 @@@
  #include <linux/kfence.h>
  #include <linux/pkeys.h>
  
 +#include <asm/asm-prototypes.h>
  #include <asm/firmware.h>
  #include <asm/interrupt.h>
  #include <asm/page.h>
@@@ -517,10 -516,8 +517,8 @@@ retry
         * case.
         */
        if (unlikely(fault & VM_FAULT_RETRY)) {
-               if (flags & FAULT_FLAG_ALLOW_RETRY) {
-                       flags |= FAULT_FLAG_TRIED;
-                       goto retry;
-               }
+               flags |= FAULT_FLAG_TRIED;
+               goto retry;
        }
  
        mmap_read_unlock(current->mm);
@@@ -621,27 -618,4 +619,27 @@@ DEFINE_INTERRUPT_HANDLER(do_bad_page_fa
  {
        bad_page_fault(regs, SIGSEGV);
  }
 +
 +/*
 + * In radix, segment interrupts indicate the EA is not addressable by the
 + * page table geometry, so they are always sent here.
 + *
 + * In hash, this is called if do_slb_fault returns error. Typically it is
 + * because the EA was outside the region allowed by software.
 + */
 +DEFINE_INTERRUPT_HANDLER(do_bad_segment_interrupt)
 +{
 +      int err = regs->result;
 +
 +      if (err == -EFAULT) {
 +              if (user_mode(regs))
 +                      _exception(SIGSEGV, regs, SEGV_BNDERR, regs->dar);
 +              else
 +                      bad_page_fault(regs, SIGSEGV);
 +      } else if (err == -EINVAL) {
 +              unrecoverable_exception(regs);
 +      } else {
 +              BUG();
 +      }
 +}
  #endif
diff --combined arch/s390/mm/fault.c
index 6ed2886fc014b19846ed3d3b804054e0b839bd06,d7d6be283d9425779eec60c8561308de1252a720..ff16ce0d04ee019f73dfc4ebfd3d284727dd45f2
@@@ -115,7 -115,7 +115,7 @@@ static void dump_pagetable(unsigned lon
                pr_cont("R1:%016lx ", *table);
                if (*table & _REGION_ENTRY_INVALID)
                        goto out;
 -              table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
 +              table = __va(*table & _REGION_ENTRY_ORIGIN);
                fallthrough;
        case _ASCE_TYPE_REGION2:
                table += (address & _REGION2_INDEX) >> _REGION2_SHIFT;
                pr_cont("R2:%016lx ", *table);
                if (*table & _REGION_ENTRY_INVALID)
                        goto out;
 -              table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
 +              table = __va(*table & _REGION_ENTRY_ORIGIN);
                fallthrough;
        case _ASCE_TYPE_REGION3:
                table += (address & _REGION3_INDEX) >> _REGION3_SHIFT;
                pr_cont("R3:%016lx ", *table);
                if (*table & (_REGION_ENTRY_INVALID | _REGION3_ENTRY_LARGE))
                        goto out;
 -              table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
 +              table = __va(*table & _REGION_ENTRY_ORIGIN);
                fallthrough;
        case _ASCE_TYPE_SEGMENT:
                table += (address & _SEGMENT_INDEX) >> _SEGMENT_SHIFT;
                pr_cont("S:%016lx ", *table);
                if (*table & (_SEGMENT_ENTRY_INVALID | _SEGMENT_ENTRY_LARGE))
                        goto out;
 -              table = (unsigned long *)(*table & _SEGMENT_ENTRY_ORIGIN);
 +              table = __va(*table & _SEGMENT_ENTRY_ORIGIN);
        }
        table += (address & _PAGE_INDEX) >> _PAGE_SHIFT;
        if (bad_address(table))
@@@ -452,21 -452,21 +452,21 @@@ retry
        if (unlikely(fault & VM_FAULT_ERROR))
                goto out_up;
  
-       if (flags & FAULT_FLAG_ALLOW_RETRY) {
-               if (fault & VM_FAULT_RETRY) {
-                       if (IS_ENABLED(CONFIG_PGSTE) && gmap &&
-                           (flags & FAULT_FLAG_RETRY_NOWAIT)) {
-                               /* FAULT_FLAG_RETRY_NOWAIT has been set,
-                                * mmap_lock has not been released */
-                               current->thread.gmap_pfault = 1;
-                               fault = VM_FAULT_PFAULT;
-                               goto out_up;
-                       }
-                       flags &= ~FAULT_FLAG_RETRY_NOWAIT;
-                       flags |= FAULT_FLAG_TRIED;
-                       mmap_read_lock(mm);
-                       goto retry;
+       if (fault & VM_FAULT_RETRY) {
+               if (IS_ENABLED(CONFIG_PGSTE) && gmap &&
+                       (flags & FAULT_FLAG_RETRY_NOWAIT)) {
+                       /*
+                        * FAULT_FLAG_RETRY_NOWAIT has been set, mmap_lock has
+                        * not been released
+                        */
+                       current->thread.gmap_pfault = 1;
+                       fault = VM_FAULT_PFAULT;
+                       goto out_up;
                }
+               flags &= ~FAULT_FLAG_RETRY_NOWAIT;
+               flags |= FAULT_FLAG_TRIED;
+               mmap_read_lock(mm);
+               goto retry;
        }
        if (IS_ENABLED(CONFIG_PGSTE) && gmap) {
                address =  __gmap_link(gmap, current->thread.gmap_addr,
diff --combined arch/um/kernel/trap.c
index 561a2b03c3cf9f0b77c0c1e19fd76db09f20fa76,193503484af57dbbf1143ce5de0b07ff7cb41c4b..d1d5d0be0308561d13e5d6a9a0379140a77d9490
@@@ -87,12 -87,10 +87,10 @@@ good_area
                        }
                        BUG();
                }
-               if (flags & FAULT_FLAG_ALLOW_RETRY) {
-                       if (fault & VM_FAULT_RETRY) {
-                               flags |= FAULT_FLAG_TRIED;
+               if (fault & VM_FAULT_RETRY) {
+                       flags |= FAULT_FLAG_TRIED;
  
-                               goto retry;
-                       }
+                       goto retry;
                }
  
                pmd = pmd_off(mm, address);
@@@ -127,6 -125,7 +125,6 @@@ out_of_memory
        pagefault_out_of_memory();
        return 0;
  }
 -EXPORT_SYMBOL(handle_page_fault);
  
  static void show_segv_info(struct uml_pt_regs *regs)
  {
diff --combined arch/x86/Kconfig
index 976dd6b532bffa8157fbbb17b8109c036581617e,d0628415b93e4f83b6dc4c80e5a5c95521037674..407533c835fe586faca2411b53015bfa9b22042e
@@@ -104,6 -104,7 +104,7 @@@ config X8
        select ARCH_SUPPORTS_ACPI
        select ARCH_SUPPORTS_ATOMIC_RMW
        select ARCH_SUPPORTS_DEBUG_PAGEALLOC
+       select ARCH_SUPPORTS_PAGE_TABLE_CHECK   if X86_64
        select ARCH_SUPPORTS_NUMA_BALANCING     if X86_64
        select ARCH_SUPPORTS_KMAP_LOCAL_FORCE_MAP       if NR_CPUS <= 4096
        select ARCH_SUPPORTS_LTO_CLANG
        select HAVE_ARCH_KCSAN                  if X86_64
        select X86_FEATURE_NAMES                if PROC_FS
        select PROC_PID_ARCH_STATUS             if PROC_FS
 +      select HAVE_ARCH_NODE_DEV_GROUP         if X86_SGX
        imply IMA_SECURE_AND_OR_TRUSTED_BOOT    if EFI
  
  config INSTRUCTION_DECODER
@@@ -473,18 -473,6 +474,18 @@@ config RETPOLIN
          branches. Requires a compiler with -mindirect-branch=thunk-extern
          support for full protection. The kernel may run slower.
  
 +config CC_HAS_SLS
 +      def_bool $(cc-option,-mharden-sls=all)
 +
 +config SLS
 +      bool "Mitigate Straight-Line-Speculation"
 +      depends on CC_HAS_SLS && X86_64
 +      default n
 +      help
 +        Compile the kernel with straight-line-speculation options to guard
 +        against straight line speculation. The kernel image might be slightly
 +        larger.
 +
  config X86_CPU_RESCTRL
        bool "x86 CPU resource control support"
        depends on X86 && (CPU_SUP_INTEL || CPU_SUP_AMD)
@@@ -1536,20 -1524,16 +1537,20 @@@ config X86_CPA_STATISTIC
          helps to determine the effectiveness of preserving large and huge
          page mappings when mapping protections are changed.
  
 +config X86_MEM_ENCRYPT
 +      select ARCH_HAS_FORCE_DMA_UNENCRYPTED
 +      select DYNAMIC_PHYSICAL_MASK
 +      select ARCH_HAS_RESTRICTED_VIRTIO_MEMORY_ACCESS
 +      def_bool n
 +
  config AMD_MEM_ENCRYPT
        bool "AMD Secure Memory Encryption (SME) support"
        depends on X86_64 && CPU_SUP_AMD
        select DMA_COHERENT_POOL
 -      select DYNAMIC_PHYSICAL_MASK
        select ARCH_USE_MEMREMAP_PROT
 -      select ARCH_HAS_FORCE_DMA_UNENCRYPTED
        select INSTRUCTION_DECODER
 -      select ARCH_HAS_RESTRICTED_VIRTIO_MEMORY_ACCESS
        select ARCH_HAS_CC_PLATFORM
 +      select X86_MEM_ENCRYPT
        help
          Say yes to enable support for the encryption of system memory.
          This requires an AMD processor that supports Secure Memory
@@@ -1934,7 -1918,6 +1935,7 @@@ config X86_SG
        select SRCU
        select MMU_NOTIFIER
        select NUMA_KEEP_MEMINFO if NUMA
 +      select XARRAY_MULTI
        help
          Intel(R) Software Guard eXtensions (SGX) is a set of CPU instructions
          that can be used by applications to set aside private regions of code
@@@ -1964,7 -1947,7 +1965,7 @@@ config EF
  
  config EFI_STUB
        bool "EFI stub support"
 -      depends on EFI && !X86_USE_3DNOW
 +      depends on EFI
        depends on $(cc-option,-mabi=ms) || X86_32
        select RELOCATABLE
        help
index a34430b7af4a3e379114e918cda1689f9a8838fd,d7d287ac1018dbaa0ab57a7fe4cd233995a0936c..8a9432fb3802b3f60e12331b23e43b78f48cddc8
  #define pgprot_decrypted(prot)        __pgprot(__sme_clr(pgprot_val(prot)))
  
  #ifndef __ASSEMBLY__
 +#include <linux/spinlock.h>
  #include <asm/x86_init.h>
  #include <asm/pkru.h>
  #include <asm/fpu/api.h>
  #include <asm-generic/pgtable_uffd.h>
+ #include <linux/page_table_check.h>
  
  extern pgd_t early_top_pgt[PTRS_PER_PGD];
  bool __init __early_make_pgtable(unsigned long address, pmdval_t pmd);
@@@ -753,7 -753,7 +754,7 @@@ static inline bool pte_accessible(struc
                return true;
  
        if ((pte_flags(a) & _PAGE_PROTNONE) &&
-                       mm_tlb_flush_pending(mm))
+                       atomic_read(&mm->tlb_flush_pending))
                return true;
  
        return false;
@@@ -1007,18 -1007,21 +1008,21 @@@ static inline pud_t native_local_pudp_g
  static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
                              pte_t *ptep, pte_t pte)
  {
+       page_table_check_pte_set(mm, addr, ptep, pte);
        set_pte(ptep, pte);
  }
  
  static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
                              pmd_t *pmdp, pmd_t pmd)
  {
+       page_table_check_pmd_set(mm, addr, pmdp, pmd);
        set_pmd(pmdp, pmd);
  }
  
  static inline void set_pud_at(struct mm_struct *mm, unsigned long addr,
                              pud_t *pudp, pud_t pud)
  {
+       page_table_check_pud_set(mm, addr, pudp, pud);
        native_set_pud(pudp, pud);
  }
  
@@@ -1049,6 -1052,7 +1053,7 @@@ static inline pte_t ptep_get_and_clear(
                                       pte_t *ptep)
  {
        pte_t pte = native_ptep_get_and_clear(ptep);
+       page_table_check_pte_clear(mm, addr, pte);
        return pte;
  }
  
@@@ -1064,12 -1068,23 +1069,23 @@@ static inline pte_t ptep_get_and_clear_
                 * care about updates and native needs no locking
                 */
                pte = native_local_ptep_get_and_clear(ptep);
+               page_table_check_pte_clear(mm, addr, pte);
        } else {
                pte = ptep_get_and_clear(mm, addr, ptep);
        }
        return pte;
  }
  
+ #define __HAVE_ARCH_PTEP_CLEAR
+ static inline void ptep_clear(struct mm_struct *mm, unsigned long addr,
+                             pte_t *ptep)
+ {
+       if (IS_ENABLED(CONFIG_PAGE_TABLE_CHECK))
+               ptep_get_and_clear(mm, addr, ptep);
+       else
+               pte_clear(mm, addr, ptep);
+ }
  #define __HAVE_ARCH_PTEP_SET_WRPROTECT
  static inline void ptep_set_wrprotect(struct mm_struct *mm,
                                      unsigned long addr, pte_t *ptep)
@@@ -1110,14 -1125,22 +1126,22 @@@ static inline int pmd_write(pmd_t pmd
  static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, unsigned long addr,
                                       pmd_t *pmdp)
  {
-       return native_pmdp_get_and_clear(pmdp);
+       pmd_t pmd = native_pmdp_get_and_clear(pmdp);
+       page_table_check_pmd_clear(mm, addr, pmd);
+       return pmd;
  }
  
  #define __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR
  static inline pud_t pudp_huge_get_and_clear(struct mm_struct *mm,
                                        unsigned long addr, pud_t *pudp)
  {
-       return native_pudp_get_and_clear(pudp);
+       pud_t pud = native_pudp_get_and_clear(pudp);
+       page_table_check_pud_clear(mm, addr, pud);
+       return pud;
  }
  
  #define __HAVE_ARCH_PMDP_SET_WRPROTECT
@@@ -1138,6 -1161,7 +1162,7 @@@ static inline int pud_write(pud_t pud
  static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
                unsigned long address, pmd_t *pmdp, pmd_t pmd)
  {
+       page_table_check_pmd_set(vma->vm_mm, address, pmdp, pmd);
        if (IS_ENABLED(CONFIG_SMP)) {
                return xchg(pmdp, pmd);
        } else {
index f6da5293b913d89d64816f1b150b90766a5aced3,9a46b2ef6951d94909645afd13d1bc3a63a7b03b..cb253d80d72b9550fbbeeadafe03eb1d34f2ed8f
@@@ -1903,14 -1903,7 +1903,7 @@@ static struct attribute *zram_disk_attr
        NULL,
  };
  
- static const struct attribute_group zram_disk_attr_group = {
-       .attrs = zram_disk_attrs,
- };
- static const struct attribute_group *zram_disk_attr_groups[] = {
-       &zram_disk_attr_group,
-       NULL,
- };
+ ATTRIBUTE_GROUPS(zram_disk);
  
  /*
   * Allocate and initialize new zram device. the function returns
@@@ -1947,7 -1940,6 +1940,7 @@@ static int zram_add(void
        zram->disk->major = zram_major;
        zram->disk->first_minor = device_id;
        zram->disk->minors = 1;
 +      zram->disk->flags |= GENHD_FL_NO_PART;
        zram->disk->fops = &zram_devops;
        zram->disk->private_data = zram;
        snprintf(zram->disk->disk_name, 16, "zram%d", device_id);
                blk_queue_max_write_zeroes_sectors(zram->disk->queue, UINT_MAX);
  
        blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, zram->disk->queue);
-       ret = device_add_disk(NULL, zram->disk, zram_disk_attr_groups);
+       ret = device_add_disk(NULL, zram->disk, zram_disk_groups);
        if (ret)
                goto out_cleanup_disk;
  
diff --combined drivers/dax/bus.c
index ee4568ef757c60a1ec2709e2541584a14a7219cc,a22350e822fa3dbfd6fa65acfdc7e8496450e771..1dad813ee4a6907b2370ea2e6e861b3cbd9b6a85
@@@ -10,6 -10,8 +10,6 @@@
  #include "dax-private.h"
  #include "bus.h"
  
 -static struct class *dax_class;
 -
  static DEFINE_MUTEX(dax_bus_lock);
  
  #define DAX_NAME_LEN 30
@@@ -127,11 -129,35 +127,35 @@@ ATTRIBUTE_GROUPS(dax_drv)
  
  static int dax_bus_match(struct device *dev, struct device_driver *drv);
  
+ /*
+  * Static dax regions are regions created by an external subsystem
+  * nvdimm where a single range is assigned. Its boundaries are by the external
+  * subsystem and are usually limited to one physical memory range. For example,
+  * for PMEM it is usually defined by NVDIMM Namespace boundaries (i.e. a
+  * single contiguous range)
+  *
+  * On dynamic dax regions, the assigned region can be partitioned by dax core
+  * into multiple subdivisions. A subdivision is represented into one
+  * /dev/daxN.M device composed by one or more potentially discontiguous ranges.
+  *
+  * When allocating a dax region, drivers must set whether it's static
+  * (IORESOURCE_DAX_STATIC).  On static dax devices, the @pgmap is pre-assigned
+  * to dax core when calling devm_create_dev_dax(), whereas in dynamic dax
+  * devices it is NULL but afterwards allocated by dax core on device ->probe().
+  * Care is needed to make sure that dynamic dax devices are torn down with a
+  * cleared @pgmap field (see kill_dev_dax()).
+  */
  static bool is_static(struct dax_region *dax_region)
  {
        return (dax_region->res.flags & IORESOURCE_DAX_STATIC) != 0;
  }
  
+ bool static_dev_dax(struct dev_dax *dev_dax)
+ {
+       return is_static(dev_dax->region);
+ }
+ EXPORT_SYMBOL_GPL(static_dev_dax);
  static u64 dev_dax_size(struct dev_dax *dev_dax)
  {
        u64 size = 0;
@@@ -361,6 -387,14 +385,14 @@@ void kill_dev_dax(struct dev_dax *dev_d
  
        kill_dax(dax_dev);
        unmap_mapping_range(inode->i_mapping, 0, 0, 1);
+       /*
+        * Dynamic dax region have the pgmap allocated via dev_kzalloc()
+        * and thus freed by devm. Clear the pgmap to not have stale pgmap
+        * ranges on probe() from previous reconfigurations of region devices.
+        */
+       if (!static_dev_dax(dev_dax))
+               dev_dax->pgmap = NULL;
  }
  EXPORT_SYMBOL_GPL(kill_dev_dax);
  
@@@ -1321,17 -1355,14 +1353,17 @@@ struct dev_dax *devm_create_dev_dax(str
        }
  
        /*
 -       * No 'host' or dax_operations since there is no access to this
 -       * device outside of mmap of the resulting character device.
 +       * No dax_operations since there is no access to this device outside of
 +       * mmap of the resulting character device.
         */
 -      dax_dev = alloc_dax(dev_dax, NULL, NULL, DAXDEV_F_SYNC);
 +      dax_dev = alloc_dax(dev_dax, NULL);
        if (IS_ERR(dax_dev)) {
                rc = PTR_ERR(dax_dev);
                goto err_alloc_dax;
        }
 +      set_dax_synchronous(dax_dev);
 +      set_dax_nocache(dax_dev);
 +      set_dax_nomc(dax_dev);
  
        /* a device_dax instance is dead while the driver is not attached */
        kill_dax(dax_dev);
  
        inode = dax_inode(dax_dev);
        dev->devt = inode->i_rdev;
 -      if (data->subsys == DEV_DAX_BUS)
 -              dev->bus = &dax_bus_type;
 -      else
 -              dev->class = dax_class;
 +      dev->bus = &dax_bus_type;
        dev->parent = parent;
        dev->type = &dev_dax_type;
  
@@@ -1443,10 -1477,22 +1475,10 @@@ EXPORT_SYMBOL_GPL(dax_driver_unregister
  
  int __init dax_bus_init(void)
  {
 -      int rc;
 -
 -      if (IS_ENABLED(CONFIG_DEV_DAX_PMEM_COMPAT)) {
 -              dax_class = class_create(THIS_MODULE, "dax");
 -              if (IS_ERR(dax_class))
 -                      return PTR_ERR(dax_class);
 -      }
 -
 -      rc = bus_register(&dax_bus_type);
 -      if (rc)
 -              class_destroy(dax_class);
 -      return rc;
 +      return bus_register(&dax_bus_type);
  }
  
  void __exit dax_bus_exit(void)
  {
        bus_unregister(&dax_bus_type);
 -      class_destroy(dax_class);
  }
diff --combined drivers/dax/bus.h
index 381cec9ff05c4f92e6be660ccdf685c71322e81a,4acdfee7dd5956b77a67115993d01c26ff5c52c6..fbb940293d6d84384b6fa37eb412790249b4ecf7
@@@ -16,15 -16,24 +16,15 @@@ struct dax_region *alloc_dax_region(str
                struct range *range, int target_node, unsigned int align,
                unsigned long flags);
  
 -enum dev_dax_subsys {
 -      DEV_DAX_BUS = 0, /* zeroed dev_dax_data picks this by default */
 -      DEV_DAX_CLASS,
 -};
 -
  struct dev_dax_data {
        struct dax_region *dax_region;
        struct dev_pagemap *pgmap;
 -      enum dev_dax_subsys subsys;
        resource_size_t size;
        int id;
  };
  
  struct dev_dax *devm_create_dev_dax(struct dev_dax_data *data);
  
 -/* to be deleted when DEV_DAX_CLASS is removed */
 -struct dev_dax *__dax_pmem_probe(struct device *dev, enum dev_dax_subsys subsys);
 -
  struct dax_device_driver {
        struct device_driver drv;
        struct list_head ids;
@@@ -39,7 -48,12 +39,8 @@@ int __dax_driver_register(struct dax_de
        __dax_driver_register(driver, THIS_MODULE, KBUILD_MODNAME)
  void dax_driver_unregister(struct dax_device_driver *dax_drv);
  void kill_dev_dax(struct dev_dax *dev_dax);
+ bool static_dev_dax(struct dev_dax *dev_dax);
  
 -#if IS_ENABLED(CONFIG_DEV_DAX_PMEM_COMPAT)
 -int dev_dax_probe(struct dev_dax *dev_dax);
 -#endif
 -
  /*
   * While run_dax() is potentially a generic operation that could be
   * defined in include/linux/dax.h we don't want to grow any users
diff --combined drivers/dax/device.c
index e58d597f0415a049e64149bc9f92552aad04fed2,591f293d326faaef9e20c885b275ebd0403bada2..d33a0613ed0c518843eb2bdce05974309939bb9e
@@@ -73,11 -73,39 +73,39 @@@ __weak phys_addr_t dax_pgoff_to_phys(st
        return -1;
  }
  
+ static void dax_set_mapping(struct vm_fault *vmf, pfn_t pfn,
+                             unsigned long fault_size)
+ {
+       unsigned long i, nr_pages = fault_size / PAGE_SIZE;
+       struct file *filp = vmf->vma->vm_file;
+       struct dev_dax *dev_dax = filp->private_data;
+       pgoff_t pgoff;
+       /* mapping is only set on the head */
+       if (dev_dax->pgmap->vmemmap_shift)
+               nr_pages = 1;
+       pgoff = linear_page_index(vmf->vma,
+                       ALIGN(vmf->address, fault_size));
+       for (i = 0; i < nr_pages; i++) {
+               struct page *page = pfn_to_page(pfn_t_to_pfn(pfn) + i);
+               page = compound_head(page);
+               if (page->mapping)
+                       continue;
+               page->mapping = filp->f_mapping;
+               page->index = pgoff + i;
+       }
+ }
  static vm_fault_t __dev_dax_pte_fault(struct dev_dax *dev_dax,
-                               struct vm_fault *vmf, pfn_t *pfn)
+                               struct vm_fault *vmf)
  {
        struct device *dev = &dev_dax->dev;
        phys_addr_t phys;
+       pfn_t pfn;
        unsigned int fault_size = PAGE_SIZE;
  
        if (check_vma(dev_dax, vmf->vma, __func__))
                return VM_FAULT_SIGBUS;
        }
  
-       *pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP);
+       pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP);
  
-       return vmf_insert_mixed(vmf->vma, vmf->address, *pfn);
+       dax_set_mapping(vmf, pfn, fault_size);
+       return vmf_insert_mixed(vmf->vma, vmf->address, pfn);
  }
  
  static vm_fault_t __dev_dax_pmd_fault(struct dev_dax *dev_dax,
-                               struct vm_fault *vmf, pfn_t *pfn)
+                               struct vm_fault *vmf)
  {
        unsigned long pmd_addr = vmf->address & PMD_MASK;
        struct device *dev = &dev_dax->dev;
        phys_addr_t phys;
        pgoff_t pgoff;
+       pfn_t pfn;
        unsigned int fault_size = PMD_SIZE;
  
        if (check_vma(dev_dax, vmf->vma, __func__))
                return VM_FAULT_SIGBUS;
        }
  
-       *pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP);
+       pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP);
  
-       return vmf_insert_pfn_pmd(vmf, *pfn, vmf->flags & FAULT_FLAG_WRITE);
+       dax_set_mapping(vmf, pfn, fault_size);
+       return vmf_insert_pfn_pmd(vmf, pfn, vmf->flags & FAULT_FLAG_WRITE);
  }
  
  #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
  static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax,
-                               struct vm_fault *vmf, pfn_t *pfn)
+                               struct vm_fault *vmf)
  {
        unsigned long pud_addr = vmf->address & PUD_MASK;
        struct device *dev = &dev_dax->dev;
        phys_addr_t phys;
        pgoff_t pgoff;
+       pfn_t pfn;
        unsigned int fault_size = PUD_SIZE;
  
  
                return VM_FAULT_SIGBUS;
        }
  
-       *pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP);
+       pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP);
  
-       return vmf_insert_pfn_pud(vmf, *pfn, vmf->flags & FAULT_FLAG_WRITE);
+       dax_set_mapping(vmf, pfn, fault_size);
+       return vmf_insert_pfn_pud(vmf, pfn, vmf->flags & FAULT_FLAG_WRITE);
  }
  #else
  static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax,
-                               struct vm_fault *vmf, pfn_t *pfn)
+                               struct vm_fault *vmf)
  {
        return VM_FAULT_FALLBACK;
  }
@@@ -196,10 -232,8 +232,8 @@@ static vm_fault_t dev_dax_huge_fault(st
                enum page_entry_size pe_size)
  {
        struct file *filp = vmf->vma->vm_file;
-       unsigned long fault_size;
        vm_fault_t rc = VM_FAULT_SIGBUS;
        int id;
-       pfn_t pfn;
        struct dev_dax *dev_dax = filp->private_data;
  
        dev_dbg(&dev_dax->dev, "%s: %s (%#lx - %#lx) size = %d\n", current->comm,
        id = dax_read_lock();
        switch (pe_size) {
        case PE_SIZE_PTE:
-               fault_size = PAGE_SIZE;
-               rc = __dev_dax_pte_fault(dev_dax, vmf, &pfn);
+               rc = __dev_dax_pte_fault(dev_dax, vmf);
                break;
        case PE_SIZE_PMD:
-               fault_size = PMD_SIZE;
-               rc = __dev_dax_pmd_fault(dev_dax, vmf, &pfn);
+               rc = __dev_dax_pmd_fault(dev_dax, vmf);
                break;
        case PE_SIZE_PUD:
-               fault_size = PUD_SIZE;
-               rc = __dev_dax_pud_fault(dev_dax, vmf, &pfn);
+               rc = __dev_dax_pud_fault(dev_dax, vmf);
                break;
        default:
                rc = VM_FAULT_SIGBUS;
        }
  
-       if (rc == VM_FAULT_NOPAGE) {
-               unsigned long i;
-               pgoff_t pgoff;
-               /*
-                * In the device-dax case the only possibility for a
-                * VM_FAULT_NOPAGE result is when device-dax capacity is
-                * mapped. No need to consider the zero page, or racing
-                * conflicting mappings.
-                */
-               pgoff = linear_page_index(vmf->vma, vmf->address
-                               & ~(fault_size - 1));
-               for (i = 0; i < fault_size / PAGE_SIZE; i++) {
-                       struct page *page;
-                       page = pfn_to_page(pfn_t_to_pfn(pfn) + i);
-                       if (page->mapping)
-                               continue;
-                       page->mapping = filp->f_mapping;
-                       page->index = pgoff + i;
-               }
-       }
        dax_read_unlock(id);
  
        return rc;
@@@ -398,17 -407,34 +407,34 @@@ int dev_dax_probe(struct dev_dax *dev_d
        void *addr;
        int rc, i;
  
-       pgmap = dev_dax->pgmap;
-       if (dev_WARN_ONCE(dev, pgmap && dev_dax->nr_range > 1,
-                       "static pgmap / multi-range device conflict\n"))
-               return -EINVAL;
+       if (static_dev_dax(dev_dax))  {
+               if (dev_dax->nr_range > 1) {
+                       dev_warn(dev,
+                               "static pgmap / multi-range device conflict\n");
+                       return -EINVAL;
+               }
  
-       if (!pgmap) {
-               pgmap = devm_kzalloc(dev, sizeof(*pgmap) + sizeof(struct range)
-                               * (dev_dax->nr_range - 1), GFP_KERNEL);
+               pgmap = dev_dax->pgmap;
+       } else {
+               if (dev_dax->pgmap) {
+                       dev_warn(dev,
+                                "dynamic-dax with pre-populated page map\n");
+                       return -EINVAL;
+               }
+               pgmap = devm_kzalloc(dev,
+                        struct_size(pgmap, ranges, dev_dax->nr_range - 1),
+                        GFP_KERNEL);
                if (!pgmap)
                        return -ENOMEM;
                pgmap->nr_range = dev_dax->nr_range;
+               dev_dax->pgmap = pgmap;
+               for (i = 0; i < dev_dax->nr_range; i++) {
+                       struct range *range = &dev_dax->ranges[i].range;
+                       pgmap->ranges[i] = *range;
+               }
        }
  
        for (i = 0; i < dev_dax->nr_range; i++) {
                                        i, range->start, range->end);
                        return -EBUSY;
                }
-               /* don't update the range for static pgmap */
-               if (!dev_dax->pgmap)
-                       pgmap->ranges[i] = *range;
        }
  
        pgmap->type = MEMORY_DEVICE_GENERIC;
+       if (dev_dax->align > PAGE_SIZE)
+               pgmap->vmemmap_shift =
+                       order_base_2(dev_dax->align >> PAGE_SHIFT);
        addr = devm_memremap_pages(dev, pgmap);
        if (IS_ERR(addr))
                return PTR_ERR(addr);
        inode = dax_inode(dax_dev);
        cdev = inode->i_cdev;
        cdev_init(cdev, &dax_fops);
 -      if (dev->class) {
 -              /* for the CONFIG_DEV_DAX_PMEM_COMPAT case */
 -              cdev->owner = dev->parent->driver->owner;
 -      } else
 -              cdev->owner = dev->driver->owner;
 +      cdev->owner = dev->driver->owner;
        cdev_set_parent(cdev, &dev->kobj);
        rc = cdev_add(cdev, dev->devt, 1);
        if (rc)
diff --combined drivers/of/fdt.c
index ca2cfb3012a49400b7279d03688f1b3c3c966780,116c582fea7a5ae9eeb0712d906759a745ba0a4b..ad85ff6474ff1398c3eb93c7396371daca7c15dc
@@@ -26,6 -26,7 +26,7 @@@
  #include <linux/serial_core.h>
  #include <linux/sysfs.h>
  #include <linux/random.h>
+ #include <linux/kmemleak.h>
  
  #include <asm/setup.h>  /* for COMMAND_LINE_SIZE */
  #include <asm/page.h>
@@@ -482,11 -483,9 +483,11 @@@ static int __init early_init_dt_reserve
        if (nomap) {
                /*
                 * If the memory is already reserved (by another region), we
 -               * should not allow it to be marked nomap.
 +               * should not allow it to be marked nomap, but don't worry
 +               * if the region isn't memory as it won't be mapped.
                 */
 -              if (memblock_is_region_reserved(base, size))
 +              if (memblock_overlaps_region(&memblock.memory, base, size) &&
 +                  memblock_is_region_reserved(base, size))
                        return -EBUSY;
  
                return memblock_mark_nomap(base, size);
@@@ -524,9 -523,12 +525,12 @@@ static int __init __reserved_mem_reserv
                size = dt_mem_next_cell(dt_root_size_cells, &prop);
  
                if (size &&
-                   early_init_dt_reserve_memory_arch(base, size, nomap) == 0)
+                   early_init_dt_reserve_memory_arch(base, size, nomap) == 0) {
                        pr_debug("Reserved memory: reserved region for node '%s': base %pa, size %lu MiB\n",
                                uname, &base, (unsigned long)(size / SZ_1M));
+                       if (!nomap)
+                               kmemleak_alloc_phys(base, size, 0, 0);
+               }
                else
                        pr_info("Reserved memory: failed to reserve memory for node '%s': base %pa, size %lu MiB\n",
                                uname, &base, (unsigned long)(size / SZ_1M));
@@@ -967,22 -969,18 +971,22 @@@ static void __init early_init_dt_check_
                 elfcorehdr_addr, elfcorehdr_size);
  }
  
 -static phys_addr_t cap_mem_addr;
 -static phys_addr_t cap_mem_size;
 +static unsigned long chosen_node_offset = -FDT_ERR_NOTFOUND;
  
  /**
   * early_init_dt_check_for_usable_mem_range - Decode usable memory range
   * location from flat tree
 - * @node: reference to node containing usable memory range location ('chosen')
   */
 -static void __init early_init_dt_check_for_usable_mem_range(unsigned long node)
 +void __init early_init_dt_check_for_usable_mem_range(void)
  {
        const __be32 *prop;
        int len;
 +      phys_addr_t cap_mem_addr;
 +      phys_addr_t cap_mem_size;
 +      unsigned long node = chosen_node_offset;
 +
 +      if ((long)node < 0)
 +              return;
  
        pr_debug("Looking for usable-memory-range property... ");
  
  
        pr_debug("cap_mem_start=%pa cap_mem_size=%pa\n", &cap_mem_addr,
                 &cap_mem_size);
 +
 +      memblock_cap_memory_range(cap_mem_addr, cap_mem_size);
  }
  
  #ifdef CONFIG_SERIAL_EARLYCON
@@@ -1050,14 -1046,13 +1054,14 @@@ int __init early_init_dt_scan_chosen_st
  /*
   * early_init_dt_scan_root - fetch the top level address and size cells
   */
 -int __init early_init_dt_scan_root(unsigned long node, const char *uname,
 -                                 int depth, void *data)
 +int __init early_init_dt_scan_root(void)
  {
        const __be32 *prop;
 +      const void *fdt = initial_boot_params;
 +      int node = fdt_path_offset(fdt, "/");
  
 -      if (depth != 0)
 -              return 0;
 +      if (node < 0)
 +              return -ENODEV;
  
        dt_root_size_cells = OF_ROOT_NODE_SIZE_CELLS_DEFAULT;
        dt_root_addr_cells = OF_ROOT_NODE_ADDR_CELLS_DEFAULT;
                dt_root_addr_cells = be32_to_cpup(prop);
        pr_debug("dt_root_addr_cells = %x\n", dt_root_addr_cells);
  
 -      /* break now */
 -      return 1;
 +      return 0;
  }
  
  u64 __init dt_mem_next_cell(int s, const __be32 **cellp)
  /*
   * early_init_dt_scan_memory - Look for and parse memory nodes
   */
 -int __init early_init_dt_scan_memory(unsigned long node, const char *uname,
 -                                   int depth, void *data)
 +int __init early_init_dt_scan_memory(void)
  {
 -      const char *type = of_get_flat_dt_prop(node, "device_type", NULL);
 -      const __be32 *reg, *endp;
 -      int l;
 -      bool hotpluggable;
 +      int node;
 +      const void *fdt = initial_boot_params;
  
 -      /* We are scanning "memory" nodes only */
 -      if (type == NULL || strcmp(type, "memory") != 0)
 -              return 0;
 +      fdt_for_each_subnode(node, fdt, 0) {
 +              const char *type = of_get_flat_dt_prop(node, "device_type", NULL);
 +              const __be32 *reg, *endp;
 +              int l;
 +              bool hotpluggable;
  
 -      reg = of_get_flat_dt_prop(node, "linux,usable-memory", &l);
 -      if (reg == NULL)
 -              reg = of_get_flat_dt_prop(node, "reg", &l);
 -      if (reg == NULL)
 -              return 0;
 +              /* We are scanning "memory" nodes only */
 +              if (type == NULL || strcmp(type, "memory") != 0)
 +                      continue;
  
 -      endp = reg + (l / sizeof(__be32));
 -      hotpluggable = of_get_flat_dt_prop(node, "hotpluggable", NULL);
 +              reg = of_get_flat_dt_prop(node, "linux,usable-memory", &l);
 +              if (reg == NULL)
 +                      reg = of_get_flat_dt_prop(node, "reg", &l);
 +              if (reg == NULL)
 +                      continue;
  
 -      pr_debug("memory scan node %s, reg size %d,\n", uname, l);
 +              endp = reg + (l / sizeof(__be32));
 +              hotpluggable = of_get_flat_dt_prop(node, "hotpluggable", NULL);
  
 -      while ((endp - reg) >= (dt_root_addr_cells + dt_root_size_cells)) {
 -              u64 base, size;
 +              pr_debug("memory scan node %s, reg size %d,\n",
 +                       fdt_get_name(fdt, node, NULL), l);
  
 -              base = dt_mem_next_cell(dt_root_addr_cells, &reg);
 -              size = dt_mem_next_cell(dt_root_size_cells, &reg);
 +              while ((endp - reg) >= (dt_root_addr_cells + dt_root_size_cells)) {
 +                      u64 base, size;
  
 -              if (size == 0)
 -                      continue;
 -              pr_debug(" - %llx, %llx\n", base, size);
 +                      base = dt_mem_next_cell(dt_root_addr_cells, &reg);
 +                      size = dt_mem_next_cell(dt_root_size_cells, &reg);
  
 -              early_init_dt_add_memory_arch(base, size);
 +                      if (size == 0)
 +                              continue;
 +                      pr_debug(" - %llx, %llx\n", base, size);
  
 -              if (!hotpluggable)
 -                      continue;
 +                      early_init_dt_add_memory_arch(base, size);
  
 -              if (memblock_mark_hotplug(base, size))
 -                      pr_warn("failed to mark hotplug range 0x%llx - 0x%llx\n",
 -                              base, base + size);
 -      }
 +                      if (!hotpluggable)
 +                              continue;
  
 +                      if (memblock_mark_hotplug(base, size))
 +                              pr_warn("failed to mark hotplug range 0x%llx - 0x%llx\n",
 +                                      base, base + size);
 +              }
 +      }
        return 0;
  }
  
 -int __init early_init_dt_scan_chosen(unsigned long node, const char *uname,
 -                                   int depth, void *data)
 +int __init early_init_dt_scan_chosen(char *cmdline)
  {
 -      int l;
 +      int l, node;
        const char *p;
        const void *rng_seed;
 +      const void *fdt = initial_boot_params;
  
 -      pr_debug("search \"chosen\", depth: %d, uname: %s\n", depth, uname);
 +      node = fdt_path_offset(fdt, "/chosen");
 +      if (node < 0)
 +              node = fdt_path_offset(fdt, "/chosen@0");
 +      if (node < 0)
 +              return -ENOENT;
  
 -      if (depth != 1 || !data ||
 -          (strcmp(uname, "chosen") != 0 && strcmp(uname, "chosen@0") != 0))
 -              return 0;
 +      chosen_node_offset = node;
  
        early_init_dt_check_for_initrd(node);
        early_init_dt_check_for_elfcorehdr(node);
 -      early_init_dt_check_for_usable_mem_range(node);
  
        /* Retrieve command line */
        p = of_get_flat_dt_prop(node, "bootargs", &l);
        if (p != NULL && l > 0)
 -              strlcpy(data, p, min(l, COMMAND_LINE_SIZE));
 +              strlcpy(cmdline, p, min(l, COMMAND_LINE_SIZE));
  
        /*
         * CONFIG_CMDLINE is meant to be a default in case nothing else
         */
  #ifdef CONFIG_CMDLINE
  #if defined(CONFIG_CMDLINE_EXTEND)
 -      strlcat(data, " ", COMMAND_LINE_SIZE);
 -      strlcat(data, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
 +      strlcat(cmdline, " ", COMMAND_LINE_SIZE);
 +      strlcat(cmdline, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
  #elif defined(CONFIG_CMDLINE_FORCE)
 -      strlcpy(data, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
 +      strlcpy(cmdline, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
  #else
        /* No arguments from boot loader, use kernel's  cmdl*/
 -      if (!((char *)data)[0])
 -              strlcpy(data, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
 +      if (!((char *)cmdline)[0])
 +              strlcpy(cmdline, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
  #endif
  #endif /* CONFIG_CMDLINE */
  
 -      pr_debug("Command line is: %s\n", (char *)data);
 +      pr_debug("Command line is: %s\n", (char *)cmdline);
  
        rng_seed = of_get_flat_dt_prop(node, "rng-seed", &l);
        if (rng_seed && l > 0) {
                                fdt_totalsize(initial_boot_params));
        }
  
 -      /* break now */
 -      return 1;
 +      return 0;
  }
  
  #ifndef MIN_MEMBLOCK_ADDR
@@@ -1273,21 -1265,21 +1277,21 @@@ bool __init early_init_dt_verify(void *
  
  void __init early_init_dt_scan_nodes(void)
  {
 -      int rc = 0;
 +      int rc;
  
        /* Initialize {size,address}-cells info */
 -      of_scan_flat_dt(early_init_dt_scan_root, NULL);
 +      early_init_dt_scan_root();
  
        /* Retrieve various information from the /chosen node */
 -      rc = of_scan_flat_dt(early_init_dt_scan_chosen, boot_command_line);
 -      if (!rc)
 +      rc = early_init_dt_scan_chosen(boot_command_line);
 +      if (rc)
                pr_warn("No chosen node found, continuing without\n");
  
        /* Setup memory, calling early_init_dt_add_memory_arch */
 -      of_scan_flat_dt(early_init_dt_scan_memory, NULL);
 +      early_init_dt_scan_memory();
  
        /* Handle linux,usable-memory-range property */
 -      memblock_cap_memory_range(cap_mem_addr, cap_mem_size);
 +      early_init_dt_check_for_usable_mem_range();
  }
  
  bool __init early_init_dt_scan(void *params)
diff --combined fs/ext4/extents.c
index 1077ce7e189fe776fbda6c2fdb882f98f6fed7d8,5582fba36b4461c40066ff446a8f61d97b0f28d7..74c91da585d7f9f7e1e775c78f269dabf71f5997
@@@ -27,8 -27,8 +27,8 @@@
  #include <linux/slab.h>
  #include <linux/uaccess.h>
  #include <linux/fiemap.h>
- #include <linux/backing-dev.h>
  #include <linux/iomap.h>
+ #include <linux/sched/mm.h>
  #include "ext4_jbd2.h"
  #include "ext4_extents.h"
  #include "xattr.h"
@@@ -1496,7 -1496,8 +1496,7 @@@ static int ext4_ext_search_left(struct 
                                EXT4_ERROR_INODE(inode,
                                  "ix (%d) != EXT_FIRST_INDEX (%d) (depth %d)!",
                                  ix != NULL ? le32_to_cpu(ix->ei_block) : 0,
 -                                EXT_FIRST_INDEX(path[depth].p_hdr) != NULL ?
 -              le32_to_cpu(EXT_FIRST_INDEX(path[depth].p_hdr)->ei_block) : 0,
 +                                le32_to_cpu(EXT_FIRST_INDEX(path[depth].p_hdr)->ei_block),
                                  depth);
                                return -EFSCORRUPTED;
                        }
@@@ -2024,6 -2025,7 +2024,6 @@@ int ext4_ext_insert_extent(handle_t *ha
                                        + ext4_ext_get_actual_len(newext));
                        if (unwritten)
                                ext4_ext_mark_unwritten(ex);
 -                      eh = path[depth].p_hdr;
                        nearex = ex;
                        goto merge;
                }
@@@ -2052,6 -2054,7 +2052,6 @@@ prepend
                                        + ext4_ext_get_actual_len(newext));
                        if (unwritten)
                                ext4_ext_mark_unwritten(ex);
 -                      eh = path[depth].p_hdr;
                        nearex = ex;
                        goto merge;
                }
@@@ -4404,8 -4407,7 +4404,7 @@@ retry
        err = ext4_es_remove_extent(inode, last_block,
                                    EXT_MAX_BLOCKS - last_block);
        if (err == -ENOMEM) {
-               cond_resched();
-               congestion_wait(BLK_RW_ASYNC, HZ/50);
+               memalloc_retry_wait(GFP_ATOMIC);
                goto retry;
        }
        if (err)
  retry_remove_space:
        err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1);
        if (err == -ENOMEM) {
-               cond_resched();
-               congestion_wait(BLK_RW_ASYNC, HZ/50);
+               memalloc_retry_wait(GFP_ATOMIC);
                goto retry_remove_space;
        }
        return err;
@@@ -4644,6 -4645,8 +4642,6 @@@ static long ext4_zero_range(struct fil
        ret = ext4_mark_inode_dirty(handle, inode);
        if (unlikely(ret))
                goto out_handle;
 -      ext4_fc_track_range(handle, inode, offset >> inode->i_sb->s_blocksize_bits,
 -                      (offset + len - 1) >> inode->i_sb->s_blocksize_bits);
        /* Zero out partial block at the edges of the range */
        ret = ext4_zero_partial_blocks(handle, inode, offset, len);
        if (ret >= 0)
@@@ -4692,6 -4695,8 +4690,6 @@@ long ext4_fallocate(struct file *file, 
                     FALLOC_FL_INSERT_RANGE))
                return -EOPNOTSUPP;
  
 -      ext4_fc_start_update(inode);
 -
        if (mode & FALLOC_FL_PUNCH_HOLE) {
                ret = ext4_punch_hole(inode, offset, len);
                goto exit;
@@@ -4755,6 -4760,7 +4753,6 @@@ out
        inode_unlock(inode);
        trace_ext4_fallocate_exit(inode, offset, max_blocks, ret);
  exit:
 -      ext4_fc_stop_update(inode);
        return ret;
  }
  
@@@ -5336,7 -5342,7 +5334,7 @@@ static int ext4_collapse_range(struct i
                ret = PTR_ERR(handle);
                goto out_mmap;
        }
 -      ext4_fc_start_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE);
 +      ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE);
  
        down_write(&EXT4_I(inode)->i_data_sem);
        ext4_discard_preallocations(inode, 0);
  
  out_stop:
        ext4_journal_stop(handle);
 -      ext4_fc_stop_ineligible(sb);
  out_mmap:
        filemap_invalidate_unlock(mapping);
  out_mutex:
@@@ -5476,7 -5483,7 +5474,7 @@@ static int ext4_insert_range(struct ino
                ret = PTR_ERR(handle);
                goto out_mmap;
        }
 -      ext4_fc_start_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE);
 +      ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE);
  
        /* Expand file to avoid data loss if there is error while shifting */
        inode->i_size += len;
  
  out_stop:
        ext4_journal_stop(handle);
 -      ext4_fc_stop_ineligible(sb);
  out_mmap:
        filemap_invalidate_unlock(mapping);
  out_mutex:
diff --combined fs/xfs/xfs_buf.c
index bbb0fbd34e649064ca764131bec26b68cfe3c751,6c45e3fa56f4cd84c7090d38943dd14683e13f85..b45e0d50a4052d95b411d3db02a92a59cc9487df
@@@ -394,7 -394,7 +394,7 @@@ xfs_buf_alloc_pages
                }
  
                XFS_STATS_INC(bp->b_mount, xb_page_retries);
-               congestion_wait(BLK_RW_ASYNC, HZ / 50);
+               memalloc_retry_wait(gfp_mask);
        }
        return 0;
  }
@@@ -1892,7 -1892,6 +1892,7 @@@ xfs_free_buftarg
        list_lru_destroy(&btp->bt_lru);
  
        blkdev_issue_flush(btp->bt_bdev);
 +      fs_put_dax(btp->bt_daxdev);
  
        kmem_free(btp);
  }
@@@ -1933,10 -1932,11 +1933,10 @@@ xfs_setsize_buftarg_early
        return xfs_setsize_buftarg(btp, bdev_logical_block_size(bdev));
  }
  
 -xfs_buftarg_t *
 +struct xfs_buftarg *
  xfs_alloc_buftarg(
        struct xfs_mount        *mp,
 -      struct block_device     *bdev,
 -      struct dax_device       *dax_dev)
 +      struct block_device     *bdev)
  {
        xfs_buftarg_t           *btp;
  
        btp->bt_mount = mp;
        btp->bt_dev =  bdev->bd_dev;
        btp->bt_bdev = bdev;
 -      btp->bt_daxdev = dax_dev;
 +      btp->bt_daxdev = fs_dax_get_by_bdev(bdev, &btp->bt_dax_part_off);
  
        /*
         * Buffer IO error rate limiting. Limit it to no more than 10 messages
diff --combined include/linux/fs.h
index f5d3bf5b69a68b4205a0cab32a8aa4c56078c444,5315fa68f751a71ca56d90cbcde659ce8cbaf4f7..42ab6d71291cf19b844a6150b1b3a63bbc3954f7
@@@ -41,7 -41,6 +41,7 @@@
  #include <linux/stddef.h>
  #include <linux/mount.h>
  #include <linux/cred.h>
 +#include <linux/mnt_idmapping.h>
  
  #include <asm/byteorder.h>
  #include <uapi/linux/fs.h>
@@@ -1600,11 -1599,6 +1600,11 @@@ struct super_block 
        struct list_head        s_inodes_wb;    /* writeback inodes */
  } __randomize_layout;
  
 +static inline struct user_namespace *i_user_ns(const struct inode *inode)
 +{
 +      return inode->i_sb->s_user_ns;
 +}
 +
  /* Helper functions so that in most cases filesystems will
   * not need to deal directly with kuid_t and kgid_t and can
   * instead deal with the raw numeric values that are stored
   */
  static inline uid_t i_uid_read(const struct inode *inode)
  {
 -      return from_kuid(inode->i_sb->s_user_ns, inode->i_uid);
 +      return from_kuid(i_user_ns(inode), inode->i_uid);
  }
  
  static inline gid_t i_gid_read(const struct inode *inode)
  {
 -      return from_kgid(inode->i_sb->s_user_ns, inode->i_gid);
 +      return from_kgid(i_user_ns(inode), inode->i_gid);
  }
  
  static inline void i_uid_write(struct inode *inode, uid_t uid)
  {
 -      inode->i_uid = make_kuid(inode->i_sb->s_user_ns, uid);
 +      inode->i_uid = make_kuid(i_user_ns(inode), uid);
  }
  
  static inline void i_gid_write(struct inode *inode, gid_t gid)
  {
 -      inode->i_gid = make_kgid(inode->i_sb->s_user_ns, gid);
 -}
 -
 -/**
 - * kuid_into_mnt - map a kuid down into a mnt_userns
 - * @mnt_userns: user namespace of the relevant mount
 - * @kuid: kuid to be mapped
 - *
 - * Return: @kuid mapped according to @mnt_userns.
 - * If @kuid has no mapping INVALID_UID is returned.
 - */
 -static inline kuid_t kuid_into_mnt(struct user_namespace *mnt_userns,
 -                                 kuid_t kuid)
 -{
 -      return make_kuid(mnt_userns, __kuid_val(kuid));
 -}
 -
 -/**
 - * kgid_into_mnt - map a kgid down into a mnt_userns
 - * @mnt_userns: user namespace of the relevant mount
 - * @kgid: kgid to be mapped
 - *
 - * Return: @kgid mapped according to @mnt_userns.
 - * If @kgid has no mapping INVALID_GID is returned.
 - */
 -static inline kgid_t kgid_into_mnt(struct user_namespace *mnt_userns,
 -                                 kgid_t kgid)
 -{
 -      return make_kgid(mnt_userns, __kgid_val(kgid));
 +      inode->i_gid = make_kgid(i_user_ns(inode), gid);
  }
  
  /**
  static inline kuid_t i_uid_into_mnt(struct user_namespace *mnt_userns,
                                    const struct inode *inode)
  {
 -      return kuid_into_mnt(mnt_userns, inode->i_uid);
 +      return mapped_kuid_fs(mnt_userns, i_user_ns(inode), inode->i_uid);
  }
  
  /**
  static inline kgid_t i_gid_into_mnt(struct user_namespace *mnt_userns,
                                    const struct inode *inode)
  {
 -      return kgid_into_mnt(mnt_userns, inode->i_gid);
 -}
 -
 -/**
 - * kuid_from_mnt - map a kuid up into a mnt_userns
 - * @mnt_userns: user namespace of the relevant mount
 - * @kuid: kuid to be mapped
 - *
 - * Return: @kuid mapped up according to @mnt_userns.
 - * If @kuid has no mapping INVALID_UID is returned.
 - */
 -static inline kuid_t kuid_from_mnt(struct user_namespace *mnt_userns,
 -                                 kuid_t kuid)
 -{
 -      return KUIDT_INIT(from_kuid(mnt_userns, kuid));
 -}
 -
 -/**
 - * kgid_from_mnt - map a kgid up into a mnt_userns
 - * @mnt_userns: user namespace of the relevant mount
 - * @kgid: kgid to be mapped
 - *
 - * Return: @kgid mapped up according to @mnt_userns.
 - * If @kgid has no mapping INVALID_GID is returned.
 - */
 -static inline kgid_t kgid_from_mnt(struct user_namespace *mnt_userns,
 -                                 kgid_t kgid)
 -{
 -      return KGIDT_INIT(from_kgid(mnt_userns, kgid));
 -}
 -
 -/**
 - * mapped_fsuid - return caller's fsuid mapped up into a mnt_userns
 - * @mnt_userns: user namespace of the relevant mount
 - *
 - * Use this helper to initialize a new vfs or filesystem object based on
 - * the caller's fsuid. A common example is initializing the i_uid field of
 - * a newly allocated inode triggered by a creation event such as mkdir or
 - * O_CREAT. Other examples include the allocation of quotas for a specific
 - * user.
 - *
 - * Return: the caller's current fsuid mapped up according to @mnt_userns.
 - */
 -static inline kuid_t mapped_fsuid(struct user_namespace *mnt_userns)
 -{
 -      return kuid_from_mnt(mnt_userns, current_fsuid());
 -}
 -
 -/**
 - * mapped_fsgid - return caller's fsgid mapped up into a mnt_userns
 - * @mnt_userns: user namespace of the relevant mount
 - *
 - * Use this helper to initialize a new vfs or filesystem object based on
 - * the caller's fsgid. A common example is initializing the i_gid field of
 - * a newly allocated inode triggered by a creation event such as mkdir or
 - * O_CREAT. Other examples include the allocation of quotas for a specific
 - * user.
 - *
 - * Return: the caller's current fsgid mapped up according to @mnt_userns.
 - */
 -static inline kgid_t mapped_fsgid(struct user_namespace *mnt_userns)
 -{
 -      return kgid_from_mnt(mnt_userns, current_fsgid());
 +      return mapped_kgid_fs(mnt_userns, i_user_ns(inode), inode->i_gid);
  }
  
  /**
  static inline void inode_fsuid_set(struct inode *inode,
                                   struct user_namespace *mnt_userns)
  {
 -      inode->i_uid = mapped_fsuid(mnt_userns);
 +      inode->i_uid = mapped_fsuid(mnt_userns, i_user_ns(inode));
  }
  
  /**
  static inline void inode_fsgid_set(struct inode *inode,
                                   struct user_namespace *mnt_userns)
  {
 -      inode->i_gid = mapped_fsgid(mnt_userns);
 +      inode->i_gid = mapped_fsgid(mnt_userns, i_user_ns(inode));
  }
  
  /**
  static inline bool fsuidgid_has_mapping(struct super_block *sb,
                                        struct user_namespace *mnt_userns)
  {
 -      struct user_namespace *s_user_ns = sb->s_user_ns;
 +      struct user_namespace *fs_userns = sb->s_user_ns;
 +      kuid_t kuid;
 +      kgid_t kgid;
  
 -      return kuid_has_mapping(s_user_ns, mapped_fsuid(mnt_userns)) &&
 -             kgid_has_mapping(s_user_ns, mapped_fsgid(mnt_userns));
 +      kuid = mapped_fsuid(mnt_userns, fs_userns);
 +      if (!uid_valid(kuid))
 +              return false;
 +      kgid = mapped_fsgid(mnt_userns, fs_userns);
 +      if (!gid_valid(kgid))
 +              return false;
 +      return kuid_has_mapping(fs_userns, kuid) &&
 +             kgid_has_mapping(fs_userns, kgid);
  }
  
  extern struct timespec64 current_time(struct inode *inode);
@@@ -2173,7 -2249,6 +2173,7 @@@ struct super_operations 
  #define S_ENCRYPTED   (1 << 14) /* Encrypted file (using fs/crypto/) */
  #define S_CASEFOLD    (1 << 15) /* Casefolded file */
  #define S_VERITY      (1 << 16) /* Verity file (using fs/verity/) */
 +#define S_KERNEL_FILE (1 << 17) /* File is in use by the kernel (eg. fs/cachefiles) */
  
  /*
   * Note that nosuid etc flags are inode-specific: setting some file-system
@@@ -2343,8 -2418,6 +2343,8 @@@ static inline void kiocb_clone(struct k
   *                    Used to detect that mark_inode_dirty() should not move
   *                    inode between dirty lists.
   *
 + * I_PINNING_FSCACHE_WB       Inode is pinning an fscache object for writeback.
 + *
   * Q: What is the difference between I_WILL_FREE and I_FREEING?
   */
  #define I_DIRTY_SYNC          (1 << 0)
  #define I_CREATING            (1 << 15)
  #define I_DONTCACHE           (1 << 16)
  #define I_SYNC_QUEUED         (1 << 17)
 +#define I_PINNING_FSCACHE_WB  (1 << 18)
  
  #define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC)
  #define I_DIRTY (I_DIRTY_INODE | I_DIRTY_PAGES)
@@@ -2652,21 -2724,6 +2652,21 @@@ static inline struct user_namespace *fi
  {
        return mnt_user_ns(file->f_path.mnt);
  }
 +
 +/**
 + * is_idmapped_mnt - check whether a mount is mapped
 + * @mnt: the mount to check
 + *
 + * If @mnt has an idmapping attached different from the
 + * filesystem's idmapping then @mnt is mapped.
 + *
 + * Return: true if mount is mapped, false if not.
 + */
 +static inline bool is_idmapped_mnt(const struct vfsmount *mnt)
 +{
 +      return mnt_user_ns(mnt) != mnt->mnt_sb->s_user_ns;
 +}
 +
  extern long vfs_truncate(const struct path *, loff_t);
  int do_truncate(struct user_namespace *, struct dentry *, loff_t start,
                unsigned int time_attrs, struct file *filp);
@@@ -2790,6 -2847,8 +2790,6 @@@ static inline int filemap_fdatawait(str
  
  extern bool filemap_range_has_page(struct address_space *, loff_t lstart,
                                  loff_t lend);
 -extern bool filemap_range_needs_writeback(struct address_space *,
 -                                        loff_t lstart, loff_t lend);
  extern int filemap_write_and_wait_range(struct address_space *mapping,
                                        loff_t lstart, loff_t lend);
  extern int __filemap_fdatawrite_range(struct address_space *mapping,
@@@ -3093,6 -3152,7 +3093,7 @@@ extern void unlock_new_inode(struct ino
  extern void discard_new_inode(struct inode *);
  extern unsigned int get_next_ino(void);
  extern void evict_inodes(struct super_block *sb);
+ void dump_mapping(const struct address_space *);
  
  /*
   * Userspace may rely on the the inode number being non-zero. For example, glibc
diff --combined include/linux/kasan.h
index fb78108d694e7f0d98c703445ed83d2538467fa0,89c99e5e67de57235b7aebd54a4ab8824d4b9ec7..4a45562d889372f4d36878840c312e9da0db73fe
@@@ -9,7 -9,6 +9,7 @@@
  
  struct kmem_cache;
  struct page;
 +struct slab;
  struct vm_struct;
  struct task_struct;
  
@@@ -194,11 -193,11 +194,11 @@@ static __always_inline size_t kasan_met
        return 0;
  }
  
 -void __kasan_poison_slab(struct page *page);
 -static __always_inline void kasan_poison_slab(struct page *page)
 +void __kasan_poison_slab(struct slab *slab);
 +static __always_inline void kasan_poison_slab(struct slab *slab)
  {
        if (kasan_enabled())
 -              __kasan_poison_slab(page);
 +              __kasan_poison_slab(slab);
  }
  
  void __kasan_unpoison_object_data(struct kmem_cache *cache, void *object);
@@@ -323,7 -322,7 +323,7 @@@ static inline void kasan_cache_create(s
                                      slab_flags_t *flags) {}
  static inline void kasan_cache_create_kmalloc(struct kmem_cache *cache) {}
  static inline size_t kasan_metadata_size(struct kmem_cache *cache) { return 0; }
 -static inline void kasan_poison_slab(struct page *page) {}
 +static inline void kasan_poison_slab(struct slab *slab) {}
  static inline void kasan_unpoison_object_data(struct kmem_cache *cache,
                                        void *object) {}
  static inline void kasan_poison_object_data(struct kmem_cache *cache,
@@@ -475,12 -474,12 +475,12 @@@ static inline void kasan_populate_early
   * allocations with real shadow memory. With KASAN vmalloc, the special
   * case is unnecessary, as the work is handled in the generic case.
   */
- int kasan_module_alloc(void *addr, size_t size);
+ int kasan_module_alloc(void *addr, size_t size, gfp_t gfp_mask);
  void kasan_free_shadow(const struct vm_struct *vm);
  
  #else /* (CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS) && !CONFIG_KASAN_VMALLOC */
  
- static inline int kasan_module_alloc(void *addr, size_t size) { return 0; }
+ static inline int kasan_module_alloc(void *addr, size_t size, gfp_t gfp_mask) { return 0; }
  static inline void kasan_free_shadow(const struct vm_struct *vm) {}
  
  #endif /* (CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS) && !CONFIG_KASAN_VMALLOC */
index e34112f6a36918e77131c562f1ae50315fe42bf3,0131e5574c8868520709795d17b35da35b550fa9..b72d75141e125b50b777df27baebdcd938292d8b
@@@ -33,6 -33,7 +33,7 @@@ enum memcg_stat_item 
        MEMCG_SWAP = NR_VM_NODE_STAT_ITEMS,
        MEMCG_SOCK,
        MEMCG_PERCPU_B,
+       MEMCG_VMALLOC,
        MEMCG_NR_STAT,
  };
  
@@@ -42,6 -43,7 +43,7 @@@ enum memcg_memory_event 
        MEMCG_MAX,
        MEMCG_OOM,
        MEMCG_OOM_KILL,
+       MEMCG_OOM_GROUP_KILL,
        MEMCG_SWAP_HIGH,
        MEMCG_SWAP_MAX,
        MEMCG_SWAP_FAIL,
@@@ -536,6 -538,45 +538,6 @@@ static inline bool folio_memcg_kmem(str
        return folio->memcg_data & MEMCG_DATA_KMEM;
  }
  
 -/*
 - * page_objcgs - get the object cgroups vector associated with a page
 - * @page: a pointer to the page struct
 - *
 - * Returns a pointer to the object cgroups vector associated with the page,
 - * or NULL. This function assumes that the page is known to have an
 - * associated object cgroups vector. It's not safe to call this function
 - * against pages, which might have an associated memory cgroup: e.g.
 - * kernel stack pages.
 - */
 -static inline struct obj_cgroup **page_objcgs(struct page *page)
 -{
 -      unsigned long memcg_data = READ_ONCE(page->memcg_data);
 -
 -      VM_BUG_ON_PAGE(memcg_data && !(memcg_data & MEMCG_DATA_OBJCGS), page);
 -      VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_KMEM, page);
 -
 -      return (struct obj_cgroup **)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
 -}
 -
 -/*
 - * page_objcgs_check - get the object cgroups vector associated with a page
 - * @page: a pointer to the page struct
 - *
 - * Returns a pointer to the object cgroups vector associated with the page,
 - * or NULL. This function is safe to use if the page can be directly associated
 - * with a memory cgroup.
 - */
 -static inline struct obj_cgroup **page_objcgs_check(struct page *page)
 -{
 -      unsigned long memcg_data = READ_ONCE(page->memcg_data);
 -
 -      if (!memcg_data || !(memcg_data & MEMCG_DATA_OBJCGS))
 -              return NULL;
 -
 -      VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_KMEM, page);
 -
 -      return (struct obj_cgroup **)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
 -}
  
  #else
  static inline bool folio_memcg_kmem(struct folio *folio)
        return false;
  }
  
 -static inline struct obj_cgroup **page_objcgs(struct page *page)
 -{
 -      return NULL;
 -}
 -
 -static inline struct obj_cgroup **page_objcgs_check(struct page *page)
 -{
 -      return NULL;
 -}
  #endif
  
  static inline bool PageMemcgKmem(struct page *page)
@@@ -943,6 -993,21 +945,21 @@@ static inline void mod_memcg_state(stru
        local_irq_restore(flags);
  }
  
+ static inline void mod_memcg_page_state(struct page *page,
+                                       int idx, int val)
+ {
+       struct mem_cgroup *memcg;
+       if (mem_cgroup_disabled())
+               return;
+       rcu_read_lock();
+       memcg = page_memcg(page);
+       if (memcg)
+               mod_memcg_state(memcg, idx, val);
+       rcu_read_unlock();
+ }
  static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx)
  {
        return READ_ONCE(memcg->vmstats.state[idx]);
@@@ -1398,6 -1463,11 +1415,11 @@@ static inline void mod_memcg_state(stru
  {
  }
  
+ static inline void mod_memcg_page_state(struct page *page,
+                                       int idx, int val)
+ {
+ }
  static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx)
  {
        return 0;
diff --combined include/linux/memremap.h
index a8bc588fe7aa8b2d0287484430ba9e79916b5971,61a6a0e27359d5307fb90fc9bd2afd1db006097d..1fafcc38acbad66229442a33699d7faa4a73dd58
@@@ -72,6 -72,16 +72,6 @@@ struct dev_pagemap_ops 
         */
        void (*page_free)(struct page *page);
  
 -      /*
 -       * Transition the refcount in struct dev_pagemap to the dead state.
 -       */
 -      void (*kill)(struct dev_pagemap *pgmap);
 -
 -      /*
 -       * Wait for refcount in struct dev_pagemap to be idle and reap it.
 -       */
 -      void (*cleanup)(struct dev_pagemap *pgmap);
 -
        /*
         * Used for private (un-addressable) device memory only.  Must migrate
         * the page back to a CPU accessible page.
   * struct dev_pagemap - metadata for ZONE_DEVICE mappings
   * @altmap: pre-allocated/reserved memory for vmemmap allocations
   * @ref: reference count that pins the devm_memremap_pages() mapping
 - * @internal_ref: internal reference if @ref is not provided by the caller
 - * @done: completion for @internal_ref
 + * @done: completion for @ref
   * @type: memory type: see MEMORY_* in memory_hotplug.h
   * @flags: PGMAP_* flags to specify defailed behavior
+  * @vmemmap_shift: structural definition of how the vmemmap page metadata
+  *      is populated, specifically the metadata page order.
+  *    A zero value (default) uses base pages as the vmemmap metadata
+  *    representation. A bigger value will set up compound struct pages
+  *    of the requested order value.
   * @ops: method table
   * @owner: an opaque pointer identifying the entity that manages this
   *    instance.  Used by various helpers to make sure that no
   */
  struct dev_pagemap {
        struct vmem_altmap altmap;
 -      struct percpu_ref *ref;
 -      struct percpu_ref internal_ref;
 +      struct percpu_ref ref;
        struct completion done;
        enum memory_type type;
        unsigned int flags;
+       unsigned long vmemmap_shift;
        const struct dev_pagemap_ops *ops;
        void *owner;
        int nr_range;
@@@ -118,6 -136,11 +124,11 @@@ static inline struct vmem_altmap *pgmap
        return NULL;
  }
  
+ static inline unsigned long pgmap_vmemmap_nr(struct dev_pagemap *pgmap)
+ {
+       return 1 << pgmap->vmemmap_shift;
+ }
  #ifdef CONFIG_ZONE_DEVICE
  void *memremap_pages(struct dev_pagemap *pgmap, int nid);
  void memunmap_pages(struct dev_pagemap *pgmap);
@@@ -179,7 -202,7 +190,7 @@@ static inline unsigned long memremap_co
  static inline void put_dev_pagemap(struct dev_pagemap *pgmap)
  {
        if (pgmap)
 -              percpu_ref_put(pgmap->ref);
 +              percpu_ref_put(&pgmap->ref);
  }
  
  #endif /* _LINUX_MEMREMAP_H_ */
diff --combined include/linux/mm.h
index c768a7c81b0b15f1318b5c1fb9b2bcbc93af66f1,d4fb49a5d60d8eb2f42f906c26aed3aa5c845a20..aa47705191bcdfa3342b8688a6dbcd6051944470
@@@ -424,51 -424,6 +424,6 @@@ extern unsigned int kobjsize(const voi
   */
  extern pgprot_t protection_map[16];
  
- /**
-  * enum fault_flag - Fault flag definitions.
-  * @FAULT_FLAG_WRITE: Fault was a write fault.
-  * @FAULT_FLAG_MKWRITE: Fault was mkwrite of existing PTE.
-  * @FAULT_FLAG_ALLOW_RETRY: Allow to retry the fault if blocked.
-  * @FAULT_FLAG_RETRY_NOWAIT: Don't drop mmap_lock and wait when retrying.
-  * @FAULT_FLAG_KILLABLE: The fault task is in SIGKILL killable region.
-  * @FAULT_FLAG_TRIED: The fault has been tried once.
-  * @FAULT_FLAG_USER: The fault originated in userspace.
-  * @FAULT_FLAG_REMOTE: The fault is not for current task/mm.
-  * @FAULT_FLAG_INSTRUCTION: The fault was during an instruction fetch.
-  * @FAULT_FLAG_INTERRUPTIBLE: The fault can be interrupted by non-fatal signals.
-  *
-  * About @FAULT_FLAG_ALLOW_RETRY and @FAULT_FLAG_TRIED: we can specify
-  * whether we would allow page faults to retry by specifying these two
-  * fault flags correctly.  Currently there can be three legal combinations:
-  *
-  * (a) ALLOW_RETRY and !TRIED:  this means the page fault allows retry, and
-  *                              this is the first try
-  *
-  * (b) ALLOW_RETRY and TRIED:   this means the page fault allows retry, and
-  *                              we've already tried at least once
-  *
-  * (c) !ALLOW_RETRY and !TRIED: this means the page fault does not allow retry
-  *
-  * The unlisted combination (!ALLOW_RETRY && TRIED) is illegal and should never
-  * be used.  Note that page faults can be allowed to retry for multiple times,
-  * in which case we'll have an initial fault with flags (a) then later on
-  * continuous faults with flags (b).  We should always try to detect pending
-  * signals before a retry to make sure the continuous page faults can still be
-  * interrupted if necessary.
-  */
- enum fault_flag {
-       FAULT_FLAG_WRITE =              1 << 0,
-       FAULT_FLAG_MKWRITE =            1 << 1,
-       FAULT_FLAG_ALLOW_RETRY =        1 << 2,
-       FAULT_FLAG_RETRY_NOWAIT =       1 << 3,
-       FAULT_FLAG_KILLABLE =           1 << 4,
-       FAULT_FLAG_TRIED =              1 << 5,
-       FAULT_FLAG_USER =               1 << 6,
-       FAULT_FLAG_REMOTE =             1 << 7,
-       FAULT_FLAG_INSTRUCTION =        1 << 8,
-       FAULT_FLAG_INTERRUPTIBLE =      1 << 9,
- };
  /*
   * The default fault flags that should be used by most of the
   * arch-specific page fault handlers.
@@@ -577,6 -532,10 +532,10 @@@ enum page_entry_size 
   */
  struct vm_operations_struct {
        void (*open)(struct vm_area_struct * area);
+       /**
+        * @close: Called when the VMA is being removed from the MM.
+        * Context: User context.  May sleep.  Caller holds mmap_lock.
+        */
        void (*close)(struct vm_area_struct * area);
        /* Called any time before splitting to check if it's allowed */
        int (*may_split)(struct vm_area_struct *area, unsigned long addr);
@@@ -714,27 -673,6 +673,27 @@@ int vma_is_stack_for_current(struct vm_
  struct mmu_gather;
  struct inode;
  
 +static inline unsigned int compound_order(struct page *page)
 +{
 +      if (!PageHead(page))
 +              return 0;
 +      return page[1].compound_order;
 +}
 +
 +/**
 + * folio_order - The allocation order of a folio.
 + * @folio: The folio.
 + *
 + * A folio is composed of 2^order pages.  See get_order() for the definition
 + * of order.
 + *
 + * Return: The order of the folio.
 + */
 +static inline unsigned int folio_order(struct folio *folio)
 +{
 +      return compound_order(&folio->page);
 +}
 +
  #include <linux/huge_mm.h>
  
  /*
@@@ -861,19 -799,15 +820,15 @@@ static inline int page_mapcount(struct 
  
  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
  int total_mapcount(struct page *page);
- int page_trans_huge_mapcount(struct page *page, int *total_mapcount);
+ int page_trans_huge_mapcount(struct page *page);
  #else
  static inline int total_mapcount(struct page *page)
  {
        return page_mapcount(page);
  }
- static inline int page_trans_huge_mapcount(struct page *page,
-                                          int *total_mapcount)
+ static inline int page_trans_huge_mapcount(struct page *page)
  {
-       int mapcount = page_mapcount(page);
-       if (total_mapcount)
-               *total_mapcount = mapcount;
-       return mapcount;
+       return page_mapcount(page);
  }
  #endif
  
@@@ -884,13 -818,6 +839,13 @@@ static inline struct page *virt_to_head
        return compound_head(page);
  }
  
 +static inline struct folio *virt_to_folio(const void *x)
 +{
 +      struct page *page = virt_to_page(x);
 +
 +      return page_folio(page);
 +}
 +
  void __put_page(struct page *page);
  
  void put_pages_list(struct list_head *pages);
@@@ -934,6 -861,27 +889,6 @@@ static inline void destroy_compound_pag
        compound_page_dtors[page[1].compound_dtor](page);
  }
  
 -static inline unsigned int compound_order(struct page *page)
 -{
 -      if (!PageHead(page))
 -              return 0;
 -      return page[1].compound_order;
 -}
 -
 -/**
 - * folio_order - The allocation order of a folio.
 - * @folio: The folio.
 - *
 - * A folio is composed of 2^order pages.  See get_order() for the definition
 - * of order.
 - *
 - * Return: The order of the folio.
 - */
 -static inline unsigned int folio_order(struct folio *folio)
 -{
 -      return compound_order(&folio->page);
 -}
 -
  static inline bool hpage_pincount_available(struct page *page)
  {
        /*
@@@ -1760,11 -1708,6 +1715,11 @@@ void page_address_init(void)
  #define page_address_init()  do { } while(0)
  #endif
  
 +static inline void *folio_address(const struct folio *folio)
 +{
 +      return page_address(&folio->page);
 +}
 +
  extern void *page_rmapping(struct page *page);
  extern struct anon_vma *page_anon_vma(struct page *page);
  extern pgoff_t __page_file_index(struct page *page);
@@@ -1837,6 -1780,28 +1792,6 @@@ static inline bool can_do_mlock(void) 
  extern int user_shm_lock(size_t, struct ucounts *);
  extern void user_shm_unlock(size_t, struct ucounts *);
  
 -/*
 - * Parameter block passed down to zap_pte_range in exceptional cases.
 - */
 -struct zap_details {
 -      struct address_space *zap_mapping;      /* Check page->mapping if set */
 -      struct page *single_page;               /* Locked page to be unmapped */
 -};
 -
 -/*
 - * We set details->zap_mappings when we want to unmap shared but keep private
 - * pages. Return true if skip zapping this page, false otherwise.
 - */
 -static inline bool
 -zap_skip_check_mapping(struct zap_details *details, struct page *page)
 -{
 -      if (!details || !page)
 -              return false;
 -
 -      return details->zap_mapping &&
 -          (details->zap_mapping != page_rmapping(page));
 -}
 -
  struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
                             pte_t pte);
  struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,
@@@ -1871,6 -1836,7 +1826,6 @@@ extern void truncate_pagecache(struct i
  extern void truncate_setsize(struct inode *inode, loff_t newsize);
  void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to);
  void truncate_pagecache_range(struct inode *inode, loff_t offset, loff_t end);
 -int truncate_inode_page(struct address_space *mapping, struct page *page);
  int generic_error_remove_page(struct address_space *mapping, struct page *page);
  int invalidate_inode_page(struct page *page);
  
@@@ -1881,6 -1847,7 +1836,6 @@@ extern vm_fault_t handle_mm_fault(struc
  extern int fixup_user_fault(struct mm_struct *mm,
                            unsigned long address, unsigned int fault_flags,
                            bool *unlocked);
 -void unmap_mapping_page(struct page *page);
  void unmap_mapping_pages(struct address_space *mapping,
                pgoff_t start, pgoff_t nr, bool even_cows);
  void unmap_mapping_range(struct address_space *mapping,
@@@ -1901,6 -1868,7 +1856,6 @@@ static inline int fixup_user_fault(stru
        BUG();
        return -EFAULT;
  }
 -static inline void unmap_mapping_page(struct page *page) { }
  static inline void unmap_mapping_pages(struct address_space *mapping,
                pgoff_t start, pgoff_t nr, bool even_cows) { }
  static inline void unmap_mapping_range(struct address_space *mapping,
@@@ -1957,6 -1925,7 +1912,6 @@@ int get_kernel_pages(const struct kvec 
                        struct page **pages);
  struct page *get_dump_page(unsigned long addr);
  
 -extern int try_to_release_page(struct page * page, gfp_t gfp_mask);
  extern void do_invalidatepage(struct page *page, unsigned int offset,
                              unsigned int length);
  
@@@ -2644,7 -2613,7 +2599,7 @@@ static inline int vma_adjust(struct vm_
  extern struct vm_area_struct *vma_merge(struct mm_struct *,
        struct vm_area_struct *prev, unsigned long addr, unsigned long end,
        unsigned long vm_flags, struct anon_vma *, struct file *, pgoff_t,
-       struct mempolicy *, struct vm_userfaultfd_ctx);
+       struct mempolicy *, struct vm_userfaultfd_ctx, const char *);
  extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *);
  extern int __split_vma(struct mm_struct *, struct vm_area_struct *,
        unsigned long addr, int new_below);
@@@ -3153,7 -3122,6 +3108,6 @@@ int drop_caches_sysctl_handler(struct c
  #endif
  
  void drop_slab(void);
- void drop_slab_node(int nid);
  
  #ifndef CONFIG_MMU
  #define randomize_va_space 0
@@@ -3206,6 -3174,7 +3160,7 @@@ enum mf_flags 
        MF_ACTION_REQUIRED = 1 << 1,
        MF_MUST_KILL = 1 << 2,
        MF_SOFT_OFFLINE = 1 << 3,
+       MF_UNPOISON = 1 << 4,
  };
  extern int memory_failure(unsigned long pfn, int flags);
  extern void memory_failure_queue(unsigned long pfn, int flags);
@@@ -3217,19 -3186,6 +3172,19 @@@ extern void shake_page(struct page *p)
  extern atomic_long_t num_poisoned_pages __read_mostly;
  extern int soft_offline_page(unsigned long pfn, int flags);
  
 +#ifndef arch_memory_failure
 +static inline int arch_memory_failure(unsigned long pfn, int flags)
 +{
 +      return -ENXIO;
 +}
 +#endif
 +
 +#ifndef arch_is_platform_page
 +static inline bool arch_is_platform_page(u64 paddr)
 +{
 +      return false;
 +}
 +#endif
  
  /*
   * Error handlers for various types of pages.
@@@ -3246,7 -3202,6 +3201,6 @@@ enum mf_action_page_type 
        MF_MSG_KERNEL_HIGH_ORDER,
        MF_MSG_SLAB,
        MF_MSG_DIFFERENT_COMPOUND,
-       MF_MSG_POISONED_HUGE,
        MF_MSG_HUGE,
        MF_MSG_FREE_HUGE,
        MF_MSG_NON_PMD_HUGE,
        MF_MSG_CLEAN_LRU,
        MF_MSG_TRUNCATED_LRU,
        MF_MSG_BUDDY,
-       MF_MSG_BUDDY_2ND,
        MF_MSG_DAX,
        MF_MSG_UNSPLIT_THP,
        MF_MSG_UNKNOWN,
@@@ -3390,5 -3344,16 +3343,16 @@@ static inline int seal_check_future_wri
        return 0;
  }
  
+ #ifdef CONFIG_ANON_VMA_NAME
+ int madvise_set_anon_name(struct mm_struct *mm, unsigned long start,
+                         unsigned long len_in, const char *name);
+ #else
+ static inline int
+ madvise_set_anon_name(struct mm_struct *mm, unsigned long start,
+                     unsigned long len_in, const char *name) {
+       return 0;
+ }
+ #endif
  #endif /* __KERNEL__ */
  #endif /* _LINUX_MM_H */
diff --combined include/linux/mm_types.h
index 1ae3537c792072deecba06b445367fb3773d696f,e3b0476a4fdac82ec8b5a70d88370342ebd2f54b..3764c1b51b02d2b145827d4f190af2a4e5e917ab
@@@ -5,6 -5,7 +5,7 @@@
  #include <linux/mm_types_task.h>
  
  #include <linux/auxvec.h>
+ #include <linux/kref.h>
  #include <linux/list.h>
  #include <linux/spinlock.h>
  #include <linux/rbtree.h>
@@@ -56,11 -57,11 +57,11 @@@ struct mem_cgroup
   * in each subpage, but you may need to restore some of their values
   * afterwards.
   *
 - * SLUB uses cmpxchg_double() to atomically update its freelist and
 - * counters.  That requires that freelist & counters be adjacent and
 - * double-word aligned.  We align all struct pages to double-word
 - * boundaries, and ensure that 'freelist' is aligned within the
 - * struct.
 + * SLUB uses cmpxchg_double() to atomically update its freelist and counters.
 + * That requires that freelist & counters in struct slab be adjacent and
 + * double-word aligned. Because struct slab currently just reinterprets the
 + * bits of struct page, we align all struct pages to double-word boundaries,
 + * and ensure that 'freelist' is aligned within struct slab.
   */
  #ifdef CONFIG_HAVE_ALIGNED_STRUCT_PAGE
  #define _struct_page_alignment        __aligned(2 * sizeof(unsigned long))
@@@ -386,6 -387,12 +387,12 @@@ struct vm_userfaultfd_ctx 
  struct vm_userfaultfd_ctx {};
  #endif /* CONFIG_USERFAULTFD */
  
+ struct anon_vma_name {
+       struct kref kref;
+       /* The name needs to be at the end because it is dynamically sized. */
+       char name[];
+ };
  /*
   * This struct describes a virtual memory area. There is one of these
   * per VM-area/task. A VM area is any part of the process virtual memory
@@@ -426,11 -433,19 +433,19 @@@ struct vm_area_struct 
        /*
         * For areas with an address space and backing store,
         * linkage into the address_space->i_mmap interval tree.
+        *
+        * For private anonymous mappings, a pointer to a null terminated string
+        * containing the name given to the vma, or NULL if unnamed.
         */
-       struct {
-               struct rb_node rb;
-               unsigned long rb_subtree_last;
-       } shared;
+       union {
+               struct {
+                       struct rb_node rb;
+                       unsigned long rb_subtree_last;
+               } shared;
+               /* Serialized by mmap_sem. */
+               struct anon_vma_name *anon_name;
+       };
  
        /*
         * A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma
@@@ -632,7 -647,7 +647,7 @@@ struct mm_struct 
                atomic_t tlb_flush_pending;
  #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
                /* See flush_tlb_batched_pending() */
-               bool tlb_flush_batched;
+               atomic_t tlb_flush_batched;
  #endif
                struct uprobes_state uprobes_state;
  #ifdef CONFIG_PREEMPT_RT
@@@ -677,90 -692,6 +692,6 @@@ extern void tlb_gather_mmu(struct mmu_g
  extern void tlb_gather_mmu_fullmm(struct mmu_gather *tlb, struct mm_struct *mm);
  extern void tlb_finish_mmu(struct mmu_gather *tlb);
  
- static inline void init_tlb_flush_pending(struct mm_struct *mm)
- {
-       atomic_set(&mm->tlb_flush_pending, 0);
- }
- static inline void inc_tlb_flush_pending(struct mm_struct *mm)
- {
-       atomic_inc(&mm->tlb_flush_pending);
-       /*
-        * The only time this value is relevant is when there are indeed pages
-        * to flush. And we'll only flush pages after changing them, which
-        * requires the PTL.
-        *
-        * So the ordering here is:
-        *
-        *      atomic_inc(&mm->tlb_flush_pending);
-        *      spin_lock(&ptl);
-        *      ...
-        *      set_pte_at();
-        *      spin_unlock(&ptl);
-        *
-        *                              spin_lock(&ptl)
-        *                              mm_tlb_flush_pending();
-        *                              ....
-        *                              spin_unlock(&ptl);
-        *
-        *      flush_tlb_range();
-        *      atomic_dec(&mm->tlb_flush_pending);
-        *
-        * Where the increment if constrained by the PTL unlock, it thus
-        * ensures that the increment is visible if the PTE modification is
-        * visible. After all, if there is no PTE modification, nobody cares
-        * about TLB flushes either.
-        *
-        * This very much relies on users (mm_tlb_flush_pending() and
-        * mm_tlb_flush_nested()) only caring about _specific_ PTEs (and
-        * therefore specific PTLs), because with SPLIT_PTE_PTLOCKS and RCpc
-        * locks (PPC) the unlock of one doesn't order against the lock of
-        * another PTL.
-        *
-        * The decrement is ordered by the flush_tlb_range(), such that
-        * mm_tlb_flush_pending() will not return false unless all flushes have
-        * completed.
-        */
- }
- static inline void dec_tlb_flush_pending(struct mm_struct *mm)
- {
-       /*
-        * See inc_tlb_flush_pending().
-        *
-        * This cannot be smp_mb__before_atomic() because smp_mb() simply does
-        * not order against TLB invalidate completion, which is what we need.
-        *
-        * Therefore we must rely on tlb_flush_*() to guarantee order.
-        */
-       atomic_dec(&mm->tlb_flush_pending);
- }
- static inline bool mm_tlb_flush_pending(struct mm_struct *mm)
- {
-       /*
-        * Must be called after having acquired the PTL; orders against that
-        * PTLs release and therefore ensures that if we observe the modified
-        * PTE we must also observe the increment from inc_tlb_flush_pending().
-        *
-        * That is, it only guarantees to return true if there is a flush
-        * pending for _this_ PTL.
-        */
-       return atomic_read(&mm->tlb_flush_pending);
- }
- static inline bool mm_tlb_flush_nested(struct mm_struct *mm)
- {
-       /*
-        * Similar to mm_tlb_flush_pending(), we must have acquired the PTL
-        * for which there is a TLB flush pending in order to guarantee
-        * we've seen both that PTE modification and the increment.
-        *
-        * (no requirement on actually still holding the PTL, that is irrelevant)
-        */
-       return atomic_read(&mm->tlb_flush_pending) > 1;
- }
  struct vm_fault;
  
  /**
@@@ -875,4 -806,49 +806,49 @@@ typedef struct 
        unsigned long val;
  } swp_entry_t;
  
+ /**
+  * enum fault_flag - Fault flag definitions.
+  * @FAULT_FLAG_WRITE: Fault was a write fault.
+  * @FAULT_FLAG_MKWRITE: Fault was mkwrite of existing PTE.
+  * @FAULT_FLAG_ALLOW_RETRY: Allow to retry the fault if blocked.
+  * @FAULT_FLAG_RETRY_NOWAIT: Don't drop mmap_lock and wait when retrying.
+  * @FAULT_FLAG_KILLABLE: The fault task is in SIGKILL killable region.
+  * @FAULT_FLAG_TRIED: The fault has been tried once.
+  * @FAULT_FLAG_USER: The fault originated in userspace.
+  * @FAULT_FLAG_REMOTE: The fault is not for current task/mm.
+  * @FAULT_FLAG_INSTRUCTION: The fault was during an instruction fetch.
+  * @FAULT_FLAG_INTERRUPTIBLE: The fault can be interrupted by non-fatal signals.
+  *
+  * About @FAULT_FLAG_ALLOW_RETRY and @FAULT_FLAG_TRIED: we can specify
+  * whether we would allow page faults to retry by specifying these two
+  * fault flags correctly.  Currently there can be three legal combinations:
+  *
+  * (a) ALLOW_RETRY and !TRIED:  this means the page fault allows retry, and
+  *                              this is the first try
+  *
+  * (b) ALLOW_RETRY and TRIED:   this means the page fault allows retry, and
+  *                              we've already tried at least once
+  *
+  * (c) !ALLOW_RETRY and !TRIED: this means the page fault does not allow retry
+  *
+  * The unlisted combination (!ALLOW_RETRY && TRIED) is illegal and should never
+  * be used.  Note that page faults can be allowed to retry for multiple times,
+  * in which case we'll have an initial fault with flags (a) then later on
+  * continuous faults with flags (b).  We should always try to detect pending
+  * signals before a retry to make sure the continuous page faults can still be
+  * interrupted if necessary.
+  */
+ enum fault_flag {
+       FAULT_FLAG_WRITE =              1 << 0,
+       FAULT_FLAG_MKWRITE =            1 << 1,
+       FAULT_FLAG_ALLOW_RETRY =        1 << 2,
+       FAULT_FLAG_RETRY_NOWAIT =       1 << 3,
+       FAULT_FLAG_KILLABLE =           1 << 4,
+       FAULT_FLAG_TRIED =              1 << 5,
+       FAULT_FLAG_USER =               1 << 6,
+       FAULT_FLAG_REMOTE =             1 << 7,
+       FAULT_FLAG_INSTRUCTION =        1 << 8,
+       FAULT_FLAG_INTERRUPTIBLE =      1 << 9,
+ };
  #endif /* _LINUX_MM_TYPES_H */
index b3d353d537e2917365de7811d8827f2bb2a43d3e,7e2b90dc7d3fc6f7815e2f2d109705aa64d906ef..1294210024434a6c66ffeb3cf5d34acd9fbfec16
@@@ -68,6 -68,9 +68,6 @@@
   * might lose their PG_swapbacked flag when they simply can be dropped (e.g. as
   * a result of MADV_FREE).
   *
 - * PG_uptodate tells whether the page's contents is valid.  When a read
 - * completes, the page becomes uptodate, unless a disk I/O error happened.
 - *
   * PG_referenced, PG_reclaim are used for page reclaim for anonymous and
   * file-backed pagecache (see mm/vmscan.c).
   *
@@@ -380,7 -383,7 +380,7 @@@ static __always_inline int TestClearPag
        TESTCLEARFLAG(uname, lname, policy)
  
  #define TESTPAGEFLAG_FALSE(uname, lname)                              \
- static inline bool folio_test_##lname(const struct folio *folio) { return 0; } \
+ static inline bool folio_test_##lname(const struct folio *folio) { return false; } \
  static inline int Page##uname(const struct page *page) { return 0; }
  
  #define SETPAGEFLAG_NOOP(uname, lname)                                        \
@@@ -519,7 -522,11 +519,11 @@@ PAGEFLAG_FALSE(Uncached, uncached
  PAGEFLAG(HWPoison, hwpoison, PF_ANY)
  TESTSCFLAG(HWPoison, hwpoison, PF_ANY)
  #define __PG_HWPOISON (1UL << PG_hwpoison)
+ #define MAGIC_HWPOISON        0x48575053U     /* HWPS */
+ extern void SetPageHWPoisonTakenOff(struct page *page);
+ extern void ClearPageHWPoisonTakenOff(struct page *page);
  extern bool take_page_off_buddy(struct page *page);
+ extern bool put_page_back_buddy(struct page *page);
  #else
  PAGEFLAG_FALSE(HWPoison, hwpoison)
  #define __PG_HWPOISON 0
@@@ -612,16 -619,6 +616,16 @@@ TESTPAGEFLAG_FALSE(Ksm, ksm
  
  u64 stable_page_flags(struct page *page);
  
 +/**
 + * folio_test_uptodate - Is this folio up to date?
 + * @folio: The folio.
 + *
 + * The uptodate flag is set on a folio when every byte in the folio is
 + * at least as new as the corresponding bytes on storage.  Anonymous
 + * and CoW folios are always uptodate.  If the folio is not uptodate,
 + * some of the bytes in it may be; see the is_partially_uptodate()
 + * address_space operation.
 + */
  static inline bool folio_test_uptodate(struct folio *folio)
  {
        bool ret = test_bit(PG_uptodate, folio_flags(folio, 0));
diff --combined kernel/fork.c
index 3161d7980155e08ead8bf4c401ae8e5ec4d08a21,75737e566441215f4ac7808d598a7fe6346e6acb..1c989cc4208a3b19a9728c297e4f9f5896fb9168
@@@ -42,6 -42,7 +42,7 @@@
  #include <linux/mmu_notifier.h>
  #include <linux/fs.h>
  #include <linux/mm.h>
+ #include <linux/mm_inline.h>
  #include <linux/vmacache.h>
  #include <linux/nsproxy.h>
  #include <linux/capability.h>
@@@ -365,12 -366,14 +366,14 @@@ struct vm_area_struct *vm_area_dup(stru
                *new = data_race(*orig);
                INIT_LIST_HEAD(&new->anon_vma_chain);
                new->vm_next = new->vm_prev = NULL;
+               dup_vma_anon_name(orig, new);
        }
        return new;
  }
  
  void vm_area_free(struct vm_area_struct *vma)
  {
+       free_vma_anon_name(vma);
        kmem_cache_free(vm_area_cachep, vma);
  }
  
@@@ -1556,6 -1559,32 +1559,6 @@@ out
        return error;
  }
  
 -static int copy_io(unsigned long clone_flags, struct task_struct *tsk)
 -{
 -#ifdef CONFIG_BLOCK
 -      struct io_context *ioc = current->io_context;
 -      struct io_context *new_ioc;
 -
 -      if (!ioc)
 -              return 0;
 -      /*
 -       * Share io context with parent, if CLONE_IO is set
 -       */
 -      if (clone_flags & CLONE_IO) {
 -              ioc_task_link(ioc);
 -              tsk->io_context = ioc;
 -      } else if (ioprio_valid(ioc->ioprio)) {
 -              new_ioc = get_task_io_context(tsk, GFP_KERNEL, NUMA_NO_NODE);
 -              if (unlikely(!new_ioc))
 -                      return -ENOMEM;
 -
 -              new_ioc->ioprio = ioc->ioprio;
 -              put_io_context(new_ioc);
 -      }
 -#endif
 -      return 0;
 -}
 -
  static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk)
  {
        struct sighand_struct *sig;
diff --combined kernel/rcu/rcutorture.c
index 33ea446101b30e390095e3099a895799b8e4df6a,42bc66a2f170820d976de1fa59d1bd588221d54e..422f7e4cc08de898f711b5e82e331f10aafd6cad
@@@ -46,7 -46,6 +46,7 @@@
  #include <linux/oom.h>
  #include <linux/tick.h>
  #include <linux/rcupdate_trace.h>
 +#include <linux/nmi.h>
  
  #include "rcu.h"
  
@@@ -54,18 -53,15 +54,18 @@@ MODULE_LICENSE("GPL")
  MODULE_AUTHOR("Paul E. McKenney <[email protected]> and Josh Triplett <[email protected]>");
  
  /* Bits for ->extendables field, extendables param, and related definitions. */
 -#define RCUTORTURE_RDR_SHIFT   8      /* Put SRCU index in upper bits. */
 -#define RCUTORTURE_RDR_MASK    ((1 << RCUTORTURE_RDR_SHIFT) - 1)
 +#define RCUTORTURE_RDR_SHIFT_1         8      /* Put SRCU index in upper bits. */
 +#define RCUTORTURE_RDR_MASK_1  (1 << RCUTORTURE_RDR_SHIFT_1)
 +#define RCUTORTURE_RDR_SHIFT_2         9      /* Put SRCU index in upper bits. */
 +#define RCUTORTURE_RDR_MASK_2  (1 << RCUTORTURE_RDR_SHIFT_2)
  #define RCUTORTURE_RDR_BH      0x01   /* Extend readers by disabling bh. */
  #define RCUTORTURE_RDR_IRQ     0x02   /*  ... disabling interrupts. */
  #define RCUTORTURE_RDR_PREEMPT         0x04   /*  ... disabling preemption. */
  #define RCUTORTURE_RDR_RBH     0x08   /*  ... rcu_read_lock_bh(). */
  #define RCUTORTURE_RDR_SCHED   0x10   /*  ... rcu_read_lock_sched(). */
 -#define RCUTORTURE_RDR_RCU     0x20   /*  ... entering another RCU reader. */
 -#define RCUTORTURE_RDR_NBITS   6      /* Number of bits defined above. */
 +#define RCUTORTURE_RDR_RCU_1   0x20   /*  ... entering another RCU reader. */
 +#define RCUTORTURE_RDR_RCU_2   0x40   /*  ... entering another RCU reader. */
 +#define RCUTORTURE_RDR_NBITS   7      /* Number of bits defined above. */
  #define RCUTORTURE_MAX_EXTEND  \
        (RCUTORTURE_RDR_BH | RCUTORTURE_RDR_IRQ | RCUTORTURE_RDR_PREEMPT | \
         RCUTORTURE_RDR_RBH | RCUTORTURE_RDR_SCHED)
@@@ -79,7 -75,7 +79,7 @@@ torture_param(int, fqs_duration, 0
              "Duration of fqs bursts (us), 0 to disable");
  torture_param(int, fqs_holdoff, 0, "Holdoff time within fqs bursts (us)");
  torture_param(int, fqs_stutter, 3, "Wait time between fqs bursts (s)");
 -torture_param(bool, fwd_progress, 1, "Test grace-period forward progress");
 +torture_param(int, fwd_progress, 1, "Test grace-period forward progress");
  torture_param(int, fwd_progress_div, 4, "Fraction of CPU stall to wait");
  torture_param(int, fwd_progress_holdoff, 60,
              "Time between forward-progress tests (s)");
@@@ -113,8 -109,6 +113,8 @@@ torture_param(int, shutdown_secs, 0, "S
  torture_param(int, stall_cpu, 0, "Stall duration (s), zero to disable.");
  torture_param(int, stall_cpu_holdoff, 10,
             "Time to wait before starting stall (s).");
 +torture_param(bool, stall_no_softlockup, false,
 +           "Avoid softlockup warning during cpu stall.");
  torture_param(int, stall_cpu_irqsoff, 0, "Disable interrupts while stalling.");
  torture_param(int, stall_cpu_block, 0, "Sleep while stalling.");
  torture_param(int, stall_gp_kthread, 0,
@@@ -146,7 -140,7 +146,7 @@@ static struct task_struct *stats_task
  static struct task_struct *fqs_task;
  static struct task_struct *boost_tasks[NR_CPUS];
  static struct task_struct *stall_task;
 -static struct task_struct *fwd_prog_task;
 +static struct task_struct **fwd_prog_tasks;
  static struct task_struct **barrier_cbs_tasks;
  static struct task_struct *barrier_task;
  static struct task_struct *read_exit_task;
@@@ -348,12 -342,10 +348,12 @@@ struct rcu_torture_ops 
        void (*gp_kthread_dbg)(void);
        bool (*check_boost_failed)(unsigned long gp_state, int *cpup);
        int (*stall_dur)(void);
 +      long cbflood_max;
        int irq_capable;
        int can_boost;
        int extendables;
        int slow_gps;
 +      int no_pi_lock;
        const char *name;
  };
  
@@@ -675,7 -667,6 +675,7 @@@ static struct rcu_torture_ops srcu_ops 
        .cb_barrier     = srcu_torture_barrier,
        .stats          = srcu_torture_stats,
        .irq_capable    = 1,
 +      .no_pi_lock     = IS_ENABLED(CONFIG_TINY_SRCU),
        .name           = "srcu"
  };
  
@@@ -709,7 -700,6 +709,7 @@@ static struct rcu_torture_ops srcud_op
        .cb_barrier     = srcu_torture_barrier,
        .stats          = srcu_torture_stats,
        .irq_capable    = 1,
 +      .no_pi_lock     = IS_ENABLED(CONFIG_TINY_SRCU),
        .name           = "srcud"
  };
  
@@@ -730,7 -720,6 +730,7 @@@ static struct rcu_torture_ops busted_sr
        .cb_barrier     = srcu_torture_barrier,
        .stats          = srcu_torture_stats,
        .irq_capable    = 1,
 +      .no_pi_lock     = IS_ENABLED(CONFIG_TINY_SRCU),
        .extendables    = RCUTORTURE_MAX_EXTEND,
        .name           = "busted_srcud"
  };
@@@ -842,7 -831,6 +842,7 @@@ static struct rcu_torture_ops tasks_rud
        .call           = call_rcu_tasks_rude,
        .cb_barrier     = rcu_barrier_tasks_rude,
        .gp_kthread_dbg = show_rcu_tasks_rude_gp_kthread,
 +      .cbflood_max    = 50000,
        .fqs            = NULL,
        .stats          = NULL,
        .irq_capable    = 1,
@@@ -883,7 -871,6 +883,7 @@@ static struct rcu_torture_ops tasks_tra
        .call           = call_rcu_tasks_trace,
        .cb_barrier     = rcu_barrier_tasks_trace,
        .gp_kthread_dbg = show_rcu_tasks_trace_gp_kthread,
 +      .cbflood_max    = 50000,
        .fqs            = NULL,
        .stats          = NULL,
        .irq_capable    = 1,
@@@ -1433,15 -1420,13 +1433,15 @@@ static void rcutorture_one_extend(int *
                                  struct rt_read_seg *rtrsp)
  {
        unsigned long flags;
 -      int idxnew = -1;
 -      int idxold = *readstate;
 +      int idxnew1 = -1;
 +      int idxnew2 = -1;
 +      int idxold1 = *readstate;
 +      int idxold2 = idxold1;
        int statesnew = ~*readstate & newstate;
        int statesold = *readstate & ~newstate;
  
 -      WARN_ON_ONCE(idxold < 0);
 -      WARN_ON_ONCE((idxold >> RCUTORTURE_RDR_SHIFT) > 1);
 +      WARN_ON_ONCE(idxold2 < 0);
 +      WARN_ON_ONCE((idxold2 >> RCUTORTURE_RDR_SHIFT_2) > 1);
        rtrsp->rt_readstate = newstate;
  
        /* First, put new protection in place to avoid critical-section gap. */
                preempt_disable();
        if (statesnew & RCUTORTURE_RDR_SCHED)
                rcu_read_lock_sched();
 -      if (statesnew & RCUTORTURE_RDR_RCU)
 -              idxnew = cur_ops->readlock() << RCUTORTURE_RDR_SHIFT;
 +      if (statesnew & RCUTORTURE_RDR_RCU_1)
 +              idxnew1 = (cur_ops->readlock() & 0x1) << RCUTORTURE_RDR_SHIFT_1;
 +      if (statesnew & RCUTORTURE_RDR_RCU_2)
 +              idxnew2 = (cur_ops->readlock() & 0x1) << RCUTORTURE_RDR_SHIFT_2;
  
        /*
         * Next, remove old protection, in decreasing order of strength
                local_bh_enable();
        if (statesold & RCUTORTURE_RDR_RBH)
                rcu_read_unlock_bh();
 -      if (statesold & RCUTORTURE_RDR_RCU) {
 -              bool lockit = !statesnew && !(torture_random(trsp) & 0xffff);
 +      if (statesold & RCUTORTURE_RDR_RCU_2) {
 +              cur_ops->readunlock((idxold2 >> RCUTORTURE_RDR_SHIFT_2) & 0x1);
 +              WARN_ON_ONCE(idxnew2 != -1);
 +              idxold2 = 0;
 +      }
 +      if (statesold & RCUTORTURE_RDR_RCU_1) {
 +              bool lockit;
  
 +              lockit = !cur_ops->no_pi_lock && !statesnew && !(torture_random(trsp) & 0xffff);
                if (lockit)
                        raw_spin_lock_irqsave(&current->pi_lock, flags);
 -              cur_ops->readunlock(idxold >> RCUTORTURE_RDR_SHIFT);
 +              cur_ops->readunlock((idxold1 >> RCUTORTURE_RDR_SHIFT_1) & 0x1);
 +              WARN_ON_ONCE(idxnew1 != -1);
 +              idxold1 = 0;
                if (lockit)
                        raw_spin_unlock_irqrestore(&current->pi_lock, flags);
        }
                cur_ops->read_delay(trsp, rtrsp);
  
        /* Update the reader state. */
 -      if (idxnew == -1)
 -              idxnew = idxold & ~RCUTORTURE_RDR_MASK;
 -      WARN_ON_ONCE(idxnew < 0);
 -      WARN_ON_ONCE((idxnew >> RCUTORTURE_RDR_SHIFT) > 1);
 -      *readstate = idxnew | newstate;
 -      WARN_ON_ONCE((*readstate >> RCUTORTURE_RDR_SHIFT) < 0);
 -      WARN_ON_ONCE((*readstate >> RCUTORTURE_RDR_SHIFT) > 1);
 +      if (idxnew1 == -1)
 +              idxnew1 = idxold1 & RCUTORTURE_RDR_MASK_1;
 +      WARN_ON_ONCE(idxnew1 < 0);
 +      if (WARN_ON_ONCE((idxnew1 >> RCUTORTURE_RDR_SHIFT_1) > 1))
 +              pr_info("Unexpected idxnew1 value of %#x\n", idxnew1);
 +      if (idxnew2 == -1)
 +              idxnew2 = idxold2 & RCUTORTURE_RDR_MASK_2;
 +      WARN_ON_ONCE(idxnew2 < 0);
 +      WARN_ON_ONCE((idxnew2 >> RCUTORTURE_RDR_SHIFT_2) > 1);
 +      *readstate = idxnew1 | idxnew2 | newstate;
 +      WARN_ON_ONCE(*readstate < 0);
 +      if (WARN_ON_ONCE((*readstate >> RCUTORTURE_RDR_SHIFT_2) > 1))
 +              pr_info("Unexpected idxnew2 value of %#x\n", idxnew2);
  }
  
  /* Return the biggest extendables mask given current RCU and boot parameters. */
@@@ -1522,7 -1491,7 +1522,7 @@@ static int rcutorture_extend_mask_max(v
  
        WARN_ON_ONCE(extendables & ~RCUTORTURE_MAX_EXTEND);
        mask = extendables & RCUTORTURE_MAX_EXTEND & cur_ops->extendables;
 -      mask = mask | RCUTORTURE_RDR_RCU;
 +      mask = mask | RCUTORTURE_RDR_RCU_1 | RCUTORTURE_RDR_RCU_2;
        return mask;
  }
  
@@@ -1537,21 -1506,13 +1537,21 @@@ rcutorture_extend_mask(int oldmask, str
        unsigned long preempts_irq = preempts | RCUTORTURE_RDR_IRQ;
        unsigned long bhs = RCUTORTURE_RDR_BH | RCUTORTURE_RDR_RBH;
  
 -      WARN_ON_ONCE(mask >> RCUTORTURE_RDR_SHIFT);
 +      WARN_ON_ONCE(mask >> RCUTORTURE_RDR_SHIFT_1);
        /* Mostly only one bit (need preemption!), sometimes lots of bits. */
        if (!(randmask1 & 0x7))
                mask = mask & randmask2;
        else
                mask = mask & (1 << (randmask2 % RCUTORTURE_RDR_NBITS));
  
 +      // Can't have nested RCU reader without outer RCU reader.
 +      if (!(mask & RCUTORTURE_RDR_RCU_1) && (mask & RCUTORTURE_RDR_RCU_2)) {
 +              if (oldmask & RCUTORTURE_RDR_RCU_1)
 +                      mask &= ~RCUTORTURE_RDR_RCU_2;
 +              else
 +                      mask |= RCUTORTURE_RDR_RCU_1;
 +      }
 +
        /*
         * Can't enable bh w/irq disabled.
         */
                        mask |= oldmask & bhs;
        }
  
 -      return mask ?: RCUTORTURE_RDR_RCU;
 +      return mask ?: RCUTORTURE_RDR_RCU_1;
  }
  
  /*
@@@ -1665,7 -1626,7 +1665,7 @@@ static bool rcu_torture_one_read(struc
                          rcu_torture_writer_state,
                          cookie, cur_ops->get_gp_state());
        rcutorture_one_extend(&readstate, 0, trsp, rtrsp);
 -      WARN_ON_ONCE(readstate & RCUTORTURE_RDR_MASK);
 +      WARN_ON_ONCE(readstate);
        // This next splat is expected behavior if leakpointer, especially
        // for CONFIG_RCU_STRICT_GRACE_PERIOD=y kernels.
        WARN_ON_ONCE(leakpointer && READ_ONCE(p->rtort_pipe_count) > 1);
@@@ -2031,9 -1992,8 +2031,8 @@@ static int rcutorture_booster_init(unsi
        mutex_lock(&boost_mutex);
        rcu_torture_disable_rt_throttle();
        VERBOSE_TOROUT_STRING("Creating rcu_torture_boost task");
-       boost_tasks[cpu] = kthread_create_on_node(rcu_torture_boost, NULL,
-                                                 cpu_to_node(cpu),
-                                                 "rcu_torture_boost");
+       boost_tasks[cpu] = kthread_run_on_cpu(rcu_torture_boost, NULL,
+                                             cpu, "rcu_torture_boost_%u");
        if (IS_ERR(boost_tasks[cpu])) {
                retval = PTR_ERR(boost_tasks[cpu]);
                VERBOSE_TOROUT_STRING("rcu_torture_boost task create failed");
                mutex_unlock(&boost_mutex);
                return retval;
        }
-       kthread_bind(boost_tasks[cpu], cpu);
-       wake_up_process(boost_tasks[cpu]);
        mutex_unlock(&boost_mutex);
        return 0;
  }
@@@ -2091,8 -2049,6 +2088,8 @@@ static int rcu_torture_stall(void *args
  #else
                                schedule_timeout_uninterruptible(HZ);
  #endif
 +                      } else if (stall_no_softlockup) {
 +                              touch_softlockup_watchdog();
                        }
                if (stall_cpu_irqsoff)
                        local_irq_enable();
@@@ -2164,13 -2120,10 +2161,13 @@@ struct rcu_fwd 
        unsigned long rcu_fwd_startat;
        struct rcu_launder_hist n_launders_hist[N_LAUNDERS_HIST];
        unsigned long rcu_launder_gp_seq_start;
 +      int rcu_fwd_id;
  };
  
  static DEFINE_MUTEX(rcu_fwd_mutex);
  static struct rcu_fwd *rcu_fwds;
 +static unsigned long rcu_fwd_seq;
 +static atomic_long_t rcu_fwd_max_cbs;
  static bool rcu_fwd_emergency_stop;
  
  static void rcu_torture_fwd_cb_hist(struct rcu_fwd *rfp)
        for (i = ARRAY_SIZE(rfp->n_launders_hist) - 1; i > 0; i--)
                if (rfp->n_launders_hist[i].n_launders > 0)
                        break;
 -      pr_alert("%s: Callback-invocation histogram (duration %lu jiffies):",
 -               __func__, jiffies - rfp->rcu_fwd_startat);
 +      mutex_lock(&rcu_fwd_mutex); // Serialize histograms.
 +      pr_alert("%s: Callback-invocation histogram %d (duration %lu jiffies):",
 +               __func__, rfp->rcu_fwd_id, jiffies - rfp->rcu_fwd_startat);
        gps_old = rfp->rcu_launder_gp_seq_start;
        for (j = 0; j <= i; j++) {
                gps = rfp->n_launders_hist[j].launder_gp_seq;
                gps_old = gps;
        }
        pr_cont("\n");
 +      mutex_unlock(&rcu_fwd_mutex);
  }
  
  /* Callback function for continuous-flood RCU callbacks. */
@@@ -2322,8 -2273,7 +2319,8 @@@ static void rcu_torture_fwd_prog_nr(str
                cver = READ_ONCE(rcu_torture_current_version) - cver;
                gps = rcutorture_seq_diff(cur_ops->get_gp_seq(), gps);
                WARN_ON(!cver && gps < 2);
 -              pr_alert("%s: Duration %ld cver %ld gps %ld\n", __func__, dur, cver, gps);
 +              pr_alert("%s: %d Duration %ld cver %ld gps %ld\n", __func__,
 +                       rfp->rcu_fwd_id, dur, cver, gps);
        }
        if (selfpropcb) {
                WRITE_ONCE(fcs.stop, 1);
@@@ -2391,7 -2341,7 +2388,7 @@@ static void rcu_torture_fwd_prog_cr(str
                        rfp->rcu_fwd_cb_head = rfcpn;
                        n_launders++;
                        n_launders_sa++;
 -              } else {
 +              } else if (!cur_ops->cbflood_max || cur_ops->cbflood_max > n_max_cbs) {
                        rfcp = kmalloc(sizeof(*rfcp), GFP_KERNEL);
                        if (WARN_ON_ONCE(!rfcp)) {
                                schedule_timeout_interruptible(1);
                        n_launders_sa = 0;
                        rfcp->rfc_gps = 0;
                        rfcp->rfc_rfp = rfp;
 +              } else {
 +                      rfcp = NULL;
                }
 -              cur_ops->call(&rfcp->rh, rcu_torture_fwd_cb_cr);
 +              if (rfcp)
 +                      cur_ops->call(&rfcp->rh, rcu_torture_fwd_cb_cr);
                rcu_torture_fwd_prog_cond_resched(n_launders + n_max_cbs);
                if (tick_nohz_full_enabled()) {
                        local_irq_save(flags);
                         n_launders + n_max_cbs - n_launders_cb_snap,
                         n_launders, n_launders_sa,
                         n_max_gps, n_max_cbs, cver, gps);
 +              atomic_long_add(n_max_cbs, &rcu_fwd_max_cbs);
                rcu_torture_fwd_cb_hist(rfp);
        }
        schedule_timeout_uninterruptible(HZ); /* Let CBs drain. */
  static int rcutorture_oom_notify(struct notifier_block *self,
                                 unsigned long notused, void *nfreed)
  {
 +      int i;
 +      long ncbs;
        struct rcu_fwd *rfp;
  
        mutex_lock(&rcu_fwd_mutex);
        }
        WARN(1, "%s invoked upon OOM during forward-progress testing.\n",
             __func__);
 -      rcu_torture_fwd_cb_hist(rfp);
 -      rcu_fwd_progress_check(1 + (jiffies - READ_ONCE(rfp->rcu_fwd_startat)) / 2);
 +      for (i = 0; i < fwd_progress; i++) {
 +              rcu_torture_fwd_cb_hist(&rfp[i]);
 +              rcu_fwd_progress_check(1 + (jiffies - READ_ONCE(rfp[i].rcu_fwd_startat)) / 2);
 +      }
        WRITE_ONCE(rcu_fwd_emergency_stop, true);
        smp_mb(); /* Emergency stop before free and wait to avoid hangs. */
 -      pr_info("%s: Freed %lu RCU callbacks.\n",
 -              __func__, rcu_torture_fwd_prog_cbfree(rfp));
 +      ncbs = 0;
 +      for (i = 0; i < fwd_progress; i++)
 +              ncbs += rcu_torture_fwd_prog_cbfree(&rfp[i]);
 +      pr_info("%s: Freed %lu RCU callbacks.\n", __func__, ncbs);
        rcu_barrier();
 -      pr_info("%s: Freed %lu RCU callbacks.\n",
 -              __func__, rcu_torture_fwd_prog_cbfree(rfp));
 +      ncbs = 0;
 +      for (i = 0; i < fwd_progress; i++)
 +              ncbs += rcu_torture_fwd_prog_cbfree(&rfp[i]);
 +      pr_info("%s: Freed %lu RCU callbacks.\n", __func__, ncbs);
        rcu_barrier();
 -      pr_info("%s: Freed %lu RCU callbacks.\n",
 -              __func__, rcu_torture_fwd_prog_cbfree(rfp));
 +      ncbs = 0;
 +      for (i = 0; i < fwd_progress; i++)
 +              ncbs += rcu_torture_fwd_prog_cbfree(&rfp[i]);
 +      pr_info("%s: Freed %lu RCU callbacks.\n", __func__, ncbs);
        smp_mb(); /* Frees before return to avoid redoing OOM. */
        (*(unsigned long *)nfreed)++; /* Forward progress CBs freed! */
        pr_info("%s returning after OOM processing.\n", __func__);
@@@ -2491,10 -2427,7 +2488,10 @@@ static struct notifier_block rcutorture
  /* Carry out grace-period forward-progress testing. */
  static int rcu_torture_fwd_prog(void *args)
  {
 +      bool firsttime = true;
 +      long max_cbs;
        int oldnice = task_nice(current);
 +      unsigned long oldseq = READ_ONCE(rcu_fwd_seq);
        struct rcu_fwd *rfp = args;
        int tested = 0;
        int tested_tries = 0;
        if (!IS_ENABLED(CONFIG_SMP) || !IS_ENABLED(CONFIG_RCU_BOOST))
                set_user_nice(current, MAX_NICE);
        do {
 -              schedule_timeout_interruptible(fwd_progress_holdoff * HZ);
 -              WRITE_ONCE(rcu_fwd_emergency_stop, false);
 -              if (!IS_ENABLED(CONFIG_TINY_RCU) ||
 -                  rcu_inkernel_boot_has_ended())
 -                      rcu_torture_fwd_prog_nr(rfp, &tested, &tested_tries);
 -              if (rcu_inkernel_boot_has_ended())
 +              if (!rfp->rcu_fwd_id) {
 +                      schedule_timeout_interruptible(fwd_progress_holdoff * HZ);
 +                      WRITE_ONCE(rcu_fwd_emergency_stop, false);
 +                      if (!firsttime) {
 +                              max_cbs = atomic_long_xchg(&rcu_fwd_max_cbs, 0);
 +                              pr_alert("%s n_max_cbs: %ld\n", __func__, max_cbs);
 +                      }
 +                      firsttime = false;
 +                      WRITE_ONCE(rcu_fwd_seq, rcu_fwd_seq + 1);
 +              } else {
 +                      while (READ_ONCE(rcu_fwd_seq) == oldseq)
 +                              schedule_timeout_interruptible(1);
 +                      oldseq = READ_ONCE(rcu_fwd_seq);
 +              }
 +              pr_alert("%s: Starting forward-progress test %d\n", __func__, rfp->rcu_fwd_id);
 +              if (rcu_inkernel_boot_has_ended() && torture_num_online_cpus() > rfp->rcu_fwd_id)
                        rcu_torture_fwd_prog_cr(rfp);
 +              if ((cur_ops->stall_dur && cur_ops->stall_dur() > 0) &&
 +                  (!IS_ENABLED(CONFIG_TINY_RCU) ||
 +                   (rcu_inkernel_boot_has_ended() &&
 +                    torture_num_online_cpus() > rfp->rcu_fwd_id)))
 +                      rcu_torture_fwd_prog_nr(rfp, &tested, &tested_tries);
  
                /* Avoid slow periods, better to test when busy. */
                if (stutter_wait("rcu_torture_fwd_prog"))
                        sched_set_normal(current, oldnice);
        } while (!torture_must_stop());
        /* Short runs might not contain a valid forward-progress attempt. */
 -      WARN_ON(!tested && tested_tries >= 5);
 -      pr_alert("%s: tested %d tested_tries %d\n", __func__, tested, tested_tries);
 +      if (!rfp->rcu_fwd_id) {
 +              WARN_ON(!tested && tested_tries >= 5);
 +              pr_alert("%s: tested %d tested_tries %d\n", __func__, tested, tested_tries);
 +      }
        torture_kthread_stopping("rcu_torture_fwd_prog");
        return 0;
  }
  /* If forward-progress checking is requested and feasible, spawn the thread. */
  static int __init rcu_torture_fwd_prog_init(void)
  {
 +      int i;
 +      int ret = 0;
        struct rcu_fwd *rfp;
  
        if (!fwd_progress)
                return 0; /* Not requested, so don't do it. */
 +      if (fwd_progress >= nr_cpu_ids) {
 +              VERBOSE_TOROUT_STRING("rcu_torture_fwd_prog_init: Limiting fwd_progress to # CPUs.\n");
 +              fwd_progress = nr_cpu_ids;
 +      } else if (fwd_progress < 0) {
 +              fwd_progress = nr_cpu_ids;
 +      }
        if ((!cur_ops->sync && !cur_ops->call) ||
 -          !cur_ops->stall_dur || cur_ops->stall_dur() <= 0 || cur_ops == &rcu_busted_ops) {
 +          (!cur_ops->cbflood_max && (!cur_ops->stall_dur || cur_ops->stall_dur() <= 0)) ||
 +          cur_ops == &rcu_busted_ops) {
                VERBOSE_TOROUT_STRING("rcu_torture_fwd_prog_init: Disabled, unsupported by RCU flavor under test");
 +              fwd_progress = 0;
                return 0;
        }
        if (stall_cpu > 0) {
                VERBOSE_TOROUT_STRING("rcu_torture_fwd_prog_init: Disabled, conflicts with CPU-stall testing");
 +              fwd_progress = 0;
                if (IS_MODULE(CONFIG_RCU_TORTURE_TEST))
                        return -EINVAL; /* In module, can fail back to user. */
                WARN_ON(1); /* Make sure rcutorture notices conflict. */
                fwd_progress_holdoff = 1;
        if (fwd_progress_div <= 0)
                fwd_progress_div = 4;
 -      rfp = kzalloc(sizeof(*rfp), GFP_KERNEL);
 -      if (!rfp)
 +      rfp = kcalloc(fwd_progress, sizeof(*rfp), GFP_KERNEL);
 +      fwd_prog_tasks = kcalloc(fwd_progress, sizeof(*fwd_prog_tasks), GFP_KERNEL);
 +      if (!rfp || !fwd_prog_tasks) {
 +              kfree(rfp);
 +              kfree(fwd_prog_tasks);
 +              fwd_prog_tasks = NULL;
 +              fwd_progress = 0;
                return -ENOMEM;
 -      spin_lock_init(&rfp->rcu_fwd_lock);
 -      rfp->rcu_fwd_cb_tail = &rfp->rcu_fwd_cb_head;
 +      }
 +      for (i = 0; i < fwd_progress; i++) {
 +              spin_lock_init(&rfp[i].rcu_fwd_lock);
 +              rfp[i].rcu_fwd_cb_tail = &rfp[i].rcu_fwd_cb_head;
 +              rfp[i].rcu_fwd_id = i;
 +      }
        mutex_lock(&rcu_fwd_mutex);
        rcu_fwds = rfp;
        mutex_unlock(&rcu_fwd_mutex);
        register_oom_notifier(&rcutorture_oom_nb);
 -      return torture_create_kthread(rcu_torture_fwd_prog, rfp, fwd_prog_task);
 +      for (i = 0; i < fwd_progress; i++) {
 +              ret = torture_create_kthread(rcu_torture_fwd_prog, &rcu_fwds[i], fwd_prog_tasks[i]);
 +              if (ret) {
 +                      fwd_progress = i;
 +                      return ret;
 +              }
 +      }
 +      return 0;
  }
  
  static void rcu_torture_fwd_prog_cleanup(void)
  {
 +      int i;
        struct rcu_fwd *rfp;
  
 -      torture_stop_kthread(rcu_torture_fwd_prog, fwd_prog_task);
 -      rfp = rcu_fwds;
 +      if (!rcu_fwds || !fwd_prog_tasks)
 +              return;
 +      for (i = 0; i < fwd_progress; i++)
 +              torture_stop_kthread(rcu_torture_fwd_prog, fwd_prog_tasks[i]);
 +      unregister_oom_notifier(&rcutorture_oom_nb);
        mutex_lock(&rcu_fwd_mutex);
 +      rfp = rcu_fwds;
        rcu_fwds = NULL;
        mutex_unlock(&rcu_fwd_mutex);
 -      unregister_oom_notifier(&rcutorture_oom_nb);
        kfree(rfp);
 +      kfree(fwd_prog_tasks);
 +      fwd_prog_tasks = NULL;
  }
  
  /* Callback function for RCU barrier testing. */
@@@ -2855,7 -2738,7 +2852,7 @@@ static int rcu_torture_read_exit(void *
                                     &trs, "%s",
                                     "rcu_torture_read_exit_child");
                if (IS_ERR(tsp)) {
 -                      VERBOSE_TOROUT_ERRSTRING("out of memory");
 +                      TOROUT_ERRSTRING("out of memory");
                        errexit = true;
                        tsp = NULL;
                        break;
@@@ -3182,7 -3065,7 +3179,7 @@@ rcu_torture_init(void
                                           sizeof(fakewriter_tasks[0]),
                                           GFP_KERNEL);
                if (fakewriter_tasks == NULL) {
 -                      VERBOSE_TOROUT_ERRSTRING("out of memory");
 +                      TOROUT_ERRSTRING("out of memory");
                        firsterr = -ENOMEM;
                        goto unwind;
                }
        rcu_torture_reader_mbchk = kcalloc(nrealreaders, sizeof(*rcu_torture_reader_mbchk),
                                           GFP_KERNEL);
        if (!reader_tasks || !rcu_torture_reader_mbchk) {
 -              VERBOSE_TOROUT_ERRSTRING("out of memory");
 +              TOROUT_ERRSTRING("out of memory");
                firsterr = -ENOMEM;
                goto unwind;
        }
        if (nrealnocbers > 0) {
                nocb_tasks = kcalloc(nrealnocbers, sizeof(nocb_tasks[0]), GFP_KERNEL);
                if (nocb_tasks == NULL) {
 -                      VERBOSE_TOROUT_ERRSTRING("out of memory");
 +                      TOROUT_ERRSTRING("out of memory");
                        firsterr = -ENOMEM;
                        goto unwind;
                }
diff --combined kernel/sysctl.c
index d7ed1dffa4262615c78fb0ff5c12c8d49cdf1f36,2ab4edb6e45094fe95b71b198281f42e3bdd8d01..ef77be575d8754d27c20c0496ba72d008aaddff6
@@@ -33,7 -33,6 +33,7 @@@
  #include <linux/security.h>
  #include <linux/ctype.h>
  #include <linux/kmemleak.h>
 +#include <linux/filter.h>
  #include <linux/fs.h>
  #include <linux/init.h>
  #include <linux/kernel.h>
@@@ -123,6 -122,7 +123,7 @@@ static unsigned long long_max = LONG_MA
  static int one_hundred = 100;
  static int two_hundred = 200;
  static int one_thousand = 1000;
+ static int three_thousand = 3000;
  #ifdef CONFIG_PRINTK
  static int ten_thousand = 10000;
  #endif
@@@ -2960,7 -2960,7 +2961,7 @@@ static struct ctl_table vm_table[] = 
                .mode           = 0644,
                .proc_handler   = watermark_scale_factor_sysctl_handler,
                .extra1         = SYSCTL_ONE,
-               .extra2         = &one_thousand,
+               .extra2         = &three_thousand,
        },
        {
                .procname       = "percpu_pagelist_high_fraction",
diff --combined mm/Makefile
index 7919cd7f13f2ac1c6b30c07e1875cd13fa780d6a,5c5a3a480fa673d07d2af0a084d83bf5f43eb0ab..588d3113f3b08b5f27b20a3532fca06bdcf40a5f
@@@ -15,8 -15,6 +15,8 @@@ KCSAN_SANITIZE_slab_common.o := 
  KCSAN_SANITIZE_slab.o := n
  KCSAN_SANITIZE_slub.o := n
  KCSAN_SANITIZE_page_alloc.o := n
 +# But enable explicit instrumentation for memory barriers.
 +KCSAN_INSTRUMENT_BARRIERS := y
  
  # These files are disabled because they produce non-interesting and/or
  # flaky coverage that is not a function of syscall inputs. E.g. slab is out of
@@@ -114,6 -112,7 +114,7 @@@ obj-$(CONFIG_GENERIC_EARLY_IOREMAP) += 
  obj-$(CONFIG_CMA)     += cma.o
  obj-$(CONFIG_MEMORY_BALLOON) += balloon_compaction.o
  obj-$(CONFIG_PAGE_EXTENSION) += page_ext.o
+ obj-$(CONFIG_PAGE_TABLE_CHECK) += page_table_check.o
  obj-$(CONFIG_CMA_DEBUGFS) += cma_debug.o
  obj-$(CONFIG_SECRETMEM) += secretmem.o
  obj-$(CONFIG_CMA_SYSFS) += cma_sysfs.o
diff --combined mm/huge_memory.c
index f58524394dc13b14df461b125d5332a535b5b154,6ed86a8f6a5becb09e71e0391c76baf92c6d7e97..406a3c28c0266be6e7fb7cca1f79c6daa78de024
@@@ -1322,7 -1322,7 +1322,7 @@@ vm_fault_t do_huge_pmd_wp_page(struct v
         * We can only reuse the page if nobody else maps the huge page or it's
         * part.
         */
-       if (reuse_swap_page(page, NULL)) {
+       if (reuse_swap_page(page)) {
                pmd_t entry;
                entry = pmd_mkyoung(orig_pmd);
                entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
@@@ -2542,38 -2542,28 +2542,28 @@@ int total_mapcount(struct page *page
   * need full accuracy to avoid breaking page pinning, because
   * page_trans_huge_mapcount() is slower than page_mapcount().
   */
- int page_trans_huge_mapcount(struct page *page, int *total_mapcount)
+ int page_trans_huge_mapcount(struct page *page)
  {
-       int i, ret, _total_mapcount, mapcount;
+       int i, ret;
  
        /* hugetlbfs shouldn't call it */
        VM_BUG_ON_PAGE(PageHuge(page), page);
  
-       if (likely(!PageTransCompound(page))) {
-               mapcount = atomic_read(&page->_mapcount) + 1;
-               if (total_mapcount)
-                       *total_mapcount = mapcount;
-               return mapcount;
-       }
+       if (likely(!PageTransCompound(page)))
+               return atomic_read(&page->_mapcount) + 1;
  
        page = compound_head(page);
  
-       _total_mapcount = ret = 0;
+       ret = 0;
        for (i = 0; i < thp_nr_pages(page); i++) {
-               mapcount = atomic_read(&page[i]._mapcount) + 1;
+               int mapcount = atomic_read(&page[i]._mapcount) + 1;
                ret = max(ret, mapcount);
-               _total_mapcount += mapcount;
        }
-       if (PageDoubleMap(page)) {
+       if (PageDoubleMap(page))
                ret -= 1;
-               _total_mapcount -= thp_nr_pages(page);
-       }
-       mapcount = compound_mapcount(page);
-       ret += mapcount;
-       _total_mapcount += mapcount;
-       if (total_mapcount)
-               *total_mapcount = _total_mapcount;
-       return ret;
+       return ret + compound_mapcount(page);
  }
  
  /* Racy check whether the huge page can be split */
@@@ -2614,7 -2604,6 +2604,7 @@@ int split_huge_page_to_list(struct pag
  {
        struct page *head = compound_head(page);
        struct deferred_split *ds_queue = get_deferred_split_queue(head);
 +      XA_STATE(xas, &head->mapping->i_pages, head->index);
        struct anon_vma *anon_vma = NULL;
        struct address_space *mapping = NULL;
        int extra_pins, ret;
                        goto out;
                }
  
 +              xas_split_alloc(&xas, head, compound_order(head),
 +                              mapping_gfp_mask(mapping) & GFP_RECLAIM_MASK);
 +              if (xas_error(&xas)) {
 +                      ret = xas_error(&xas);
 +                      goto out;
 +              }
 +
                anon_vma = NULL;
                i_mmap_lock_read(mapping);
  
        /* block interrupt reentry in xa_lock and spinlock */
        local_irq_disable();
        if (mapping) {
 -              XA_STATE(xas, &mapping->i_pages, page_index(head));
 -
                /*
                 * Check if the head page is present in page cache.
                 * We assume all tail are present too, if head is there.
                 */
 -              xa_lock(&mapping->i_pages);
 +              xas_lock(&xas);
 +              xas_reset(&xas);
                if (xas_load(&xas) != head)
                        goto fail;
        }
                if (mapping) {
                        int nr = thp_nr_pages(head);
  
 +                      xas_split(&xas, head, thp_order(head));
                        if (PageSwapBacked(head)) {
                                __mod_lruvec_page_state(head, NR_SHMEM_THPS,
                                                        -nr);
                spin_unlock(&ds_queue->split_queue_lock);
  fail:
                if (mapping)
 -                      xa_unlock(&mapping->i_pages);
 +                      xas_unlock(&xas);
                local_irq_enable();
                remap_page(head, thp_nr_pages(head));
                ret = -EBUSY;
@@@ -2741,8 -2723,6 +2731,8 @@@ out_unlock
        if (mapping)
                i_mmap_unlock_read(mapping);
  out:
 +      /* Free any memory we didn't use */
 +      xas_nomem(&xas, 0);
        count_vm_event(!ret ? THP_SPLIT_PAGE : THP_SPLIT_PAGE_FAILED);
        return ret;
  }
diff --combined mm/internal.h
index 26af8a5a5be349dc3f0a0fc9ea30d5b33bd530eb,c5834cc28a444db37fa362b3b29c7a2ba1f60aee..d80300392a194f20b4864191ab0ecaba831cb1aa
@@@ -12,8 -12,6 +12,8 @@@
  #include <linux/pagemap.h>
  #include <linux/tracepoint-defs.h>
  
 +struct folio_batch;
 +
  /*
   * The set of flags that only affect watermark checking and reclaim
   * behaviour. This is used by the MM to obey the caller constraints
@@@ -23,7 -21,7 +23,7 @@@
  #define GFP_RECLAIM_MASK (__GFP_RECLAIM|__GFP_HIGH|__GFP_IO|__GFP_FS|\
                        __GFP_NOWARN|__GFP_RETRY_MAYFAIL|__GFP_NOFAIL|\
                        __GFP_NORETRY|__GFP_MEMALLOC|__GFP_NOMEMALLOC|\
-                       __GFP_ATOMIC)
+                       __GFP_ATOMIC|__GFP_NOLOCKDEP)
  
  /* The GFP flags allowed during early boot */
  #define GFP_BOOT_MASK (__GFP_BITS_MASK & ~(__GFP_RECLAIM|__GFP_IO|__GFP_FS))
@@@ -76,7 -74,6 +76,7 @@@ static inline bool can_madv_lru_vma(str
        return !(vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_PFNMAP));
  }
  
 +struct zap_details;
  void unmap_page_range(struct mmu_gather *tlb,
                             struct vm_area_struct *vma,
                             unsigned long addr, unsigned long end,
@@@ -93,13 -90,7 +93,13 @@@ static inline void force_page_cache_rea
  }
  
  unsigned find_lock_entries(struct address_space *mapping, pgoff_t start,
 -              pgoff_t end, struct pagevec *pvec, pgoff_t *indices);
 +              pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices);
 +unsigned find_get_entries(struct address_space *mapping, pgoff_t start,
 +              pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices);
 +void filemap_free_folio(struct address_space *mapping, struct folio *folio);
 +int truncate_inode_folio(struct address_space *mapping, struct folio *folio);
 +bool truncate_inode_partial_folio(struct folio *folio, loff_t start,
 +              loff_t end);
  
  /**
   * folio_evictable - Test whether a folio is evictable.
@@@ -166,11 -157,6 +166,6 @@@ extern void reclaim_throttle(pg_data_t 
   */
  extern pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address);
  
- /*
-  * in mm/memcontrol.c:
-  */
- extern bool cgroup_memory_nokmem;
  /*
   * in mm/page_alloc.c
   */
@@@ -397,7 -383,6 +392,7 @@@ void __vma_link_list(struct mm_struct *
  void __vma_unlink_list(struct mm_struct *mm, struct vm_area_struct *vma);
  
  #ifdef CONFIG_MMU
 +void unmap_mapping_folio(struct folio *folio);
  extern long populate_vma_page_range(struct vm_area_struct *vma,
                unsigned long start, unsigned long end, int *locked);
  extern long faultin_vma_page_range(struct vm_area_struct *vma,
@@@ -501,8 -486,8 +496,8 @@@ static inline struct file *maybe_unlock
        }
        return fpin;
  }
 -
  #else /* !CONFIG_MMU */
 +static inline void unmap_mapping_folio(struct folio *folio) { }
  static inline void clear_page_mlock(struct page *page) { }
  static inline void mlock_vma_page(struct page *page) { }
  static inline void vunmap_range_noflush(unsigned long start, unsigned long end)
diff --combined mm/kasan/quarantine.c
index 587da8995f2d9b4a9ade1536411c605b37fba7ac,47ed4fc33a29e094070fcfab9947b2f59434059e..08291ed33e93af757e436b8e2efbe626d50c1f79
@@@ -117,7 -117,7 +117,7 @@@ static unsigned long quarantine_batch_s
  
  static struct kmem_cache *qlink_to_cache(struct qlist_node *qlink)
  {
 -      return virt_to_head_page(qlink)->slab_cache;
 +      return virt_to_slab(qlink)->slab_cache;
  }
  
  static void *qlink_to_object(struct qlist_node *qlink, struct kmem_cache *cache)
  static void qlink_free(struct qlist_node *qlink, struct kmem_cache *cache)
  {
        void *object = qlink_to_object(qlink, cache);
+       struct kasan_free_meta *meta = kasan_get_free_meta(cache, object);
        unsigned long flags;
  
        if (IS_ENABLED(CONFIG_SLAB))
                local_irq_save(flags);
  
+       /*
+        * If init_on_free is enabled and KASAN's free metadata is stored in
+        * the object, zero the metadata. Otherwise, the object's memory will
+        * not be properly zeroed, as KASAN saves the metadata after the slab
+        * allocator zeroes the object.
+        */
+       if (slab_want_init_on_free(cache) &&
+           cache->kasan_info.free_meta_offset == 0)
+               memzero_explicit(meta, sizeof(*meta));
        /*
         * As the object now gets freed from the quarantine, assume that its
         * free track is no longer valid.
diff --combined mm/khugepaged.c
index 2e1911cc3466dc66e27e64323e20b15ea5492447,7af84bac6fc241cd116e4b8f8dc5c530bbd0dabc..35f14d0a00a6cdadd20f63c4feeac2d44b3c8e14
@@@ -618,6 -618,7 +618,7 @@@ static int __collapse_huge_page_isolate
                                continue;
                        } else {
                                result = SCAN_EXCEED_NONE_PTE;
+                               count_vm_event(THP_SCAN_EXCEED_NONE_PTE);
                                goto out;
                        }
                }
                if (page_mapcount(page) > 1 &&
                                ++shared > khugepaged_max_ptes_shared) {
                        result = SCAN_EXCEED_SHARED_PTE;
+                       count_vm_event(THP_SCAN_EXCEED_SHARED_PTE);
                        goto out;
                }
  
                        goto out;
                }
                if (!pte_write(pteval) && PageSwapCache(page) &&
-                               !reuse_swap_page(page, NULL)) {
+                               !reuse_swap_page(page)) {
                        /*
                         * Page is in the swap cache and cannot be re-used.
                         * It cannot be collapsed into a THP.
@@@ -756,11 -758,7 +758,7 @@@ static void __collapse_huge_page_copy(p
                                 * ptl mostly unnecessary.
                                 */
                                spin_lock(ptl);
-                               /*
-                                * paravirt calls inside pte_clear here are
-                                * superfluous.
-                                */
-                               pte_clear(vma->vm_mm, address, _pte);
+                               ptep_clear(vma->vm_mm, address, _pte);
                                spin_unlock(ptl);
                        }
                } else {
                         * inside page_remove_rmap().
                         */
                        spin_lock(ptl);
-                       /*
-                        * paravirt calls inside pte_clear here are
-                        * superfluous.
-                        */
-                       pte_clear(vma->vm_mm, address, _pte);
+                       ptep_clear(vma->vm_mm, address, _pte);
                        page_remove_rmap(src_page, false);
                        spin_unlock(ptl);
                        free_page_and_swap_cache(src_page);
@@@ -1261,6 -1255,7 +1255,7 @@@ static int khugepaged_scan_pmd(struct m
                                continue;
                        } else {
                                result = SCAN_EXCEED_SWAP_PTE;
+                               count_vm_event(THP_SCAN_EXCEED_SWAP_PTE);
                                goto out_unmap;
                        }
                }
                                continue;
                        } else {
                                result = SCAN_EXCEED_NONE_PTE;
+                               count_vm_event(THP_SCAN_EXCEED_NONE_PTE);
                                goto out_unmap;
                        }
                }
                if (page_mapcount(page) > 1 &&
                                ++shared > khugepaged_max_ptes_shared) {
                        result = SCAN_EXCEED_SHARED_PTE;
+                       count_vm_event(THP_SCAN_EXCEED_SHARED_PTE);
                        goto out_unmap;
                }
  
                /*
                 * Record which node the original page is from and save this
                 * information to khugepaged_node_load[].
-                * Khupaged will allocate hugepage from the node has the max
+                * Khugepaged will allocate hugepage from the node has the max
                 * hit record.
                 */
                node = page_to_nid(page);
@@@ -1667,10 -1664,7 +1664,10 @@@ static void collapse_file(struct mm_str
        }
        count_memcg_page_event(new_page, THP_COLLAPSE_ALLOC);
  
 -      /* This will be less messy when we use multi-index entries */
 +      /*
 +       * Ensure we have slots for all the pages in the range.  This is
 +       * almost certainly a no-op because most of the pages must be present
 +       */
        do {
                xas_lock_irq(&xas);
                xas_create_range(&xas);
@@@ -1895,9 -1889,6 +1892,9 @@@ out_unlock
                        __mod_lruvec_page_state(new_page, NR_SHMEM, nr_none);
        }
  
 +      /* Join all the small entries into a single multi-index entry */
 +      xas_set_order(&xas, start, HPAGE_PMD_ORDER);
 +      xas_store(&xas, new_page);
  xa_locked:
        xas_unlock_irq(&xas);
  xa_unlocked:
@@@ -2014,15 -2005,12 +2011,16 @@@ static void khugepaged_scan_file(struc
                if (xa_is_value(page)) {
                        if (++swap > khugepaged_max_ptes_swap) {
                                result = SCAN_EXCEED_SWAP_PTE;
+                               count_vm_event(THP_SCAN_EXCEED_SWAP_PTE);
                                break;
                        }
                        continue;
                }
  
 +              /*
 +               * XXX: khugepaged should compact smaller compound pages
 +               * into a PMD sized page
 +               */
                if (PageTransCompound(page)) {
                        result = SCAN_PAGE_COMPOUND;
                        break;
        if (result == SCAN_SUCCEED) {
                if (present < HPAGE_PMD_NR - khugepaged_max_ptes_none) {
                        result = SCAN_EXCEED_NONE_PTE;
+                       count_vm_event(THP_SCAN_EXCEED_NONE_PTE);
                } else {
                        node = khugepaged_find_target_node();
                        collapse_file(mm, file, start, hpage, node);
diff --combined mm/memcontrol.c
index 4a7b3ebf8e48189d5ec445579508721b7f9b3d14,c9ddd02dc5de4c295322b80110aced6d9b7c0429..09d342c7cbd0d9f23b3c916c8dab2132e1d35ffe
@@@ -84,7 -84,7 +84,7 @@@ EXPORT_PER_CPU_SYMBOL_GPL(int_active_me
  static bool cgroup_memory_nosocket __ro_after_init;
  
  /* Kernel memory accounting disabled? */
- bool cgroup_memory_nokmem __ro_after_init;
static bool cgroup_memory_nokmem __ro_after_init;
  
  /* Whether the swap controller is active */
  #ifdef CONFIG_MEMCG_SWAP
@@@ -629,11 -629,17 +629,17 @@@ static DEFINE_SPINLOCK(stats_flush_lock
  static DEFINE_PER_CPU(unsigned int, stats_updates);
  static atomic_t stats_flush_threshold = ATOMIC_INIT(0);
  
- static inline void memcg_rstat_updated(struct mem_cgroup *memcg)
+ static inline void memcg_rstat_updated(struct mem_cgroup *memcg, int val)
  {
+       unsigned int x;
        cgroup_rstat_updated(memcg->css.cgroup, smp_processor_id());
-       if (!(__this_cpu_inc_return(stats_updates) % MEMCG_CHARGE_BATCH))
-               atomic_inc(&stats_flush_threshold);
+       x = __this_cpu_add_return(stats_updates, abs(val));
+       if (x > MEMCG_CHARGE_BATCH) {
+               atomic_add(x / MEMCG_CHARGE_BATCH, &stats_flush_threshold);
+               __this_cpu_write(stats_updates, 0);
+       }
  }
  
  static void __mem_cgroup_flush_stats(void)
@@@ -656,7 -662,7 +662,7 @@@ void mem_cgroup_flush_stats(void
  
  static void flush_memcg_stats_dwork(struct work_struct *w)
  {
-       mem_cgroup_flush_stats();
+       __mem_cgroup_flush_stats();
        queue_delayed_work(system_unbound_wq, &stats_flush_dwork, 2UL*HZ);
  }
  
@@@ -672,7 -678,7 +678,7 @@@ void __mod_memcg_state(struct mem_cgrou
                return;
  
        __this_cpu_add(memcg->vmstats_percpu->state[idx], val);
-       memcg_rstat_updated(memcg);
+       memcg_rstat_updated(memcg, val);
  }
  
  /* idx can be of type enum memcg_stat_item or node_stat_item. */
@@@ -705,7 -711,7 +711,7 @@@ void __mod_memcg_lruvec_state(struct lr
        /* Update lruvec */
        __this_cpu_add(pn->lruvec_stats_percpu->state[idx], val);
  
-       memcg_rstat_updated(memcg);
+       memcg_rstat_updated(memcg, val);
  }
  
  /**
@@@ -789,7 -795,7 +795,7 @@@ void __count_memcg_events(struct mem_cg
                return;
  
        __this_cpu_add(memcg->vmstats_percpu->events[idx], count);
-       memcg_rstat_updated(memcg);
+       memcg_rstat_updated(memcg, count);
  }
  
  static unsigned long memcg_events(struct mem_cgroup *memcg, int event)
@@@ -1369,6 -1375,7 +1375,7 @@@ static const struct memory_stat memory_
        { "pagetables",                 NR_PAGETABLE                    },
        { "percpu",                     MEMCG_PERCPU_B                  },
        { "sock",                       MEMCG_SOCK                      },
+       { "vmalloc",                    MEMCG_VMALLOC                   },
        { "shmem",                      NR_SHMEM                        },
        { "file_mapped",                NR_FILE_MAPPED                  },
        { "file_dirty",                 NR_FILE_DIRTY                   },
@@@ -2816,31 -2823,31 +2823,31 @@@ static inline void mod_objcg_mlstate(st
        rcu_read_unlock();
  }
  
 -int memcg_alloc_page_obj_cgroups(struct page *page, struct kmem_cache *s,
 -                               gfp_t gfp, bool new_page)
 +int memcg_alloc_slab_cgroups(struct slab *slab, struct kmem_cache *s,
 +                               gfp_t gfp, bool new_slab)
  {
 -      unsigned int objects = objs_per_slab_page(s, page);
 +      unsigned int objects = objs_per_slab(s, slab);
        unsigned long memcg_data;
        void *vec;
  
        gfp &= ~OBJCGS_CLEAR_MASK;
        vec = kcalloc_node(objects, sizeof(struct obj_cgroup *), gfp,
 -                         page_to_nid(page));
 +                         slab_nid(slab));
        if (!vec)
                return -ENOMEM;
  
        memcg_data = (unsigned long) vec | MEMCG_DATA_OBJCGS;
 -      if (new_page) {
 +      if (new_slab) {
                /*
 -               * If the slab page is brand new and nobody can yet access
 -               * it's memcg_data, no synchronization is required and
 -               * memcg_data can be simply assigned.
 +               * If the slab is brand new and nobody can yet access its
 +               * memcg_data, no synchronization is required and memcg_data can
 +               * be simply assigned.
                 */
 -              page->memcg_data = memcg_data;
 -      } else if (cmpxchg(&page->memcg_data, 0, memcg_data)) {
 +              slab->memcg_data = memcg_data;
 +      } else if (cmpxchg(&slab->memcg_data, 0, memcg_data)) {
                /*
 -               * If the slab page is already in use, somebody can allocate
 -               * and assign obj_cgroups in parallel. In this case the existing
 +               * If the slab is already in use, somebody can allocate and
 +               * assign obj_cgroups in parallel. In this case the existing
                 * objcg vector should be reused.
                 */
                kfree(vec);
   */
  struct mem_cgroup *mem_cgroup_from_obj(void *p)
  {
 -      struct page *page;
 +      struct folio *folio;
  
        if (mem_cgroup_disabled())
                return NULL;
  
 -      page = virt_to_head_page(p);
 +      folio = virt_to_folio(p);
  
        /*
         * Slab objects are accounted individually, not per-page.
         * Memcg membership data for each individual object is saved in
 -       * the page->obj_cgroups.
 +       * slab->memcg_data.
         */
 -      if (page_objcgs_check(page)) {
 -              struct obj_cgroup *objcg;
 +      if (folio_test_slab(folio)) {
 +              struct obj_cgroup **objcgs;
 +              struct slab *slab;
                unsigned int off;
  
 -              off = obj_to_index(page->slab_cache, page, p);
 -              objcg = page_objcgs(page)[off];
 -              if (objcg)
 -                      return obj_cgroup_memcg(objcg);
 +              slab = folio_slab(folio);
 +              objcgs = slab_objcgs(slab);
 +              if (!objcgs)
 +                      return NULL;
 +
 +              off = obj_to_index(slab->slab_cache, slab, p);
 +              if (objcgs[off])
 +                      return obj_cgroup_memcg(objcgs[off]);
  
                return NULL;
        }
  
        /*
 -       * page_memcg_check() is used here, because page_has_obj_cgroups()
 -       * check above could fail because the object cgroups vector wasn't set
 -       * at that moment, but it can be set concurrently.
 +       * page_memcg_check() is used here, because in theory we can encounter
 +       * a folio where the slab flag has been cleared already, but
 +       * slab->memcg_data has not been freed yet
         * page_memcg_check(page) will guarantee that a proper memory
         * cgroup pointer or NULL will be returned.
         */
 -      return page_memcg_check(page);
 +      return page_memcg_check(folio_page(folio, 0));
  }
  
  __always_inline struct obj_cgroup *get_obj_cgroup_from_current(void)
@@@ -4850,6 -4852,17 +4857,17 @@@ out_kfree
        return ret;
  }
  
+ #if defined(CONFIG_MEMCG_KMEM) && (defined(CONFIG_SLAB) || defined(CONFIG_SLUB_DEBUG))
+ static int mem_cgroup_slab_show(struct seq_file *m, void *p)
+ {
+       /*
+        * Deprecated.
+        * Please, take a look at tools/cgroup/slabinfo.py .
+        */
+       return 0;
+ }
+ #endif
  static struct cftype mem_cgroup_legacy_files[] = {
        {
                .name = "usage_in_bytes",
        (defined(CONFIG_SLAB) || defined(CONFIG_SLUB_DEBUG))
        {
                .name = "kmem.slabinfo",
-               .seq_show = memcg_slab_show,
+               .seq_show = mem_cgroup_slab_show,
        },
  #endif
        {
@@@ -5110,15 -5123,11 +5128,11 @@@ static void mem_cgroup_free(struct mem_
  static struct mem_cgroup *mem_cgroup_alloc(void)
  {
        struct mem_cgroup *memcg;
-       unsigned int size;
        int node;
        int __maybe_unused i;
        long error = -ENOMEM;
  
-       size = sizeof(struct mem_cgroup);
-       size += nr_node_ids * sizeof(struct mem_cgroup_per_node *);
-       memcg = kzalloc(size, GFP_KERNEL);
+       memcg = kzalloc(struct_size(memcg, nodeinfo, nr_node_ids), GFP_KERNEL);
        if (!memcg)
                return ERR_PTR(error);
  
@@@ -6312,6 -6321,8 +6326,8 @@@ static void __memory_events_show(struc
        seq_printf(m, "oom %lu\n", atomic_long_read(&events[MEMCG_OOM]));
        seq_printf(m, "oom_kill %lu\n",
                   atomic_long_read(&events[MEMCG_OOM_KILL]));
+       seq_printf(m, "oom_group_kill %lu\n",
+                  atomic_long_read(&events[MEMCG_OOM_GROUP_KILL]));
  }
  
  static int memory_events_show(struct seq_file *m, void *v)
diff --combined mm/memory-failure.c
index f1c389f7e6692d2c45360b6e1cff3faa161db0e2,373837bb94cb3a4a580beba950a8f654912ccdcd..14ae5c18e77668b431d6a1123108e5ddc3ddeaa6
@@@ -58,6 -58,7 +58,7 @@@
  #include <linux/ratelimit.h>
  #include <linux/page-isolation.h>
  #include <linux/pagewalk.h>
+ #include <linux/shmem_fs.h>
  #include "internal.h"
  #include "ras/ras_event.h"
  
@@@ -722,7 -723,6 +723,6 @@@ static const char * const action_page_t
        [MF_MSG_KERNEL_HIGH_ORDER]      = "high-order kernel page",
        [MF_MSG_SLAB]                   = "kernel slab page",
        [MF_MSG_DIFFERENT_COMPOUND]     = "different compound page after locking",
-       [MF_MSG_POISONED_HUGE]          = "huge page already hardware poisoned",
        [MF_MSG_HUGE]                   = "huge page",
        [MF_MSG_FREE_HUGE]              = "free huge page",
        [MF_MSG_NON_PMD_HUGE]           = "non-pmd-sized huge page",
        [MF_MSG_CLEAN_LRU]              = "clean LRU page",
        [MF_MSG_TRUNCATED_LRU]          = "already truncated LRU page",
        [MF_MSG_BUDDY]                  = "free buddy page",
-       [MF_MSG_BUDDY_2ND]              = "free buddy page (2nd try)",
        [MF_MSG_DAX]                    = "dax page",
        [MF_MSG_UNSPLIT_THP]            = "unsplit thp",
        [MF_MSG_UNKNOWN]                = "unknown page",
@@@ -867,6 -866,7 +866,7 @@@ static int me_pagecache_clean(struct pa
  {
        int ret;
        struct address_space *mapping;
+       bool extra_pins;
  
        delete_from_lru_cache(p);
  
                goto out;
        }
  
+       /*
+        * The shmem page is kept in page cache instead of truncating
+        * so is expected to have an extra refcount after error-handling.
+        */
+       extra_pins = shmem_mapping(mapping);
        /*
         * Truncation is a bit tricky. Enable it per file system for now.
         *
         * Open: to take i_rwsem or not for this? Right now we don't.
         */
        ret = truncate_error_page(p, page_to_pfn(p), mapping);
+       if (has_extra_refcount(ps, p, extra_pins))
+               ret = MF_FAILED;
  out:
        unlock_page(p);
  
-       if (has_extra_refcount(ps, p, false))
-               ret = MF_FAILED;
        return ret;
  }
  
@@@ -1154,6 -1160,22 +1160,22 @@@ static int page_action(struct page_stat
        return (result == MF_RECOVERED || result == MF_DELAYED) ? 0 : -EBUSY;
  }
  
+ static inline bool PageHWPoisonTakenOff(struct page *page)
+ {
+       return PageHWPoison(page) && page_private(page) == MAGIC_HWPOISON;
+ }
+ void SetPageHWPoisonTakenOff(struct page *page)
+ {
+       set_page_private(page, MAGIC_HWPOISON);
+ }
+ void ClearPageHWPoisonTakenOff(struct page *page)
+ {
+       if (PageHWPoison(page))
+               set_page_private(page, 0);
+ }
  /*
   * Return true if a page type of a given page is supported by hwpoison
   * mechanism (while handling could fail), otherwise false.  This function
        return ret;
  }
  
+ static int __get_unpoison_page(struct page *page)
+ {
+       struct page *head = compound_head(page);
+       int ret = 0;
+       bool hugetlb = false;
+       ret = get_hwpoison_huge_page(head, &hugetlb);
+       if (hugetlb)
+               return ret;
+       /*
+        * PageHWPoisonTakenOff pages are not only marked as PG_hwpoison,
+        * but also isolated from buddy freelist, so need to identify the
+        * state and have to cancel both operations to unpoison.
+        */
+       if (PageHWPoisonTakenOff(page))
+               return -EHWPOISON;
+       return get_page_unless_zero(page) ? 1 : 0;
+ }
  /**
   * get_hwpoison_page() - Get refcount for memory error handling
   * @p:                Raw error page (hit by memory error)
   *
   * get_hwpoison_page() takes a page refcount of an error page to handle memory
   * error on it, after checking that the error page is in a well-defined state
-  * (defined as a page-type we can successfully handle the memor error on it,
+  * (defined as a page-type we can successfully handle the memory error on it,
   * such as LRU page and hugetlb page).
   *
   * Memory error handling could be triggered at any time on any type of page,
   * extra care for the error page's state (as done in __get_hwpoison_page()),
   * and has some retry logic in get_any_page().
   *
+  * When called from unpoison_memory(), the caller should already ensure that
+  * the given page has PG_hwpoison. So it's never reused for other page
+  * allocations, and __get_unpoison_page() never races with them.
+  *
   * Return: 0 on failure,
   *         1 on success for in-use pages in a well-defined state,
   *         -EIO for pages on which we can not handle memory errors,
   *         -EBUSY when get_hwpoison_page() has raced with page lifecycle
-  *         operations like allocation and free.
+  *         operations like allocation and free,
+  *         -EHWPOISON when the page is hwpoisoned and taken off from buddy.
   */
  static int get_hwpoison_page(struct page *p, unsigned long flags)
  {
        int ret;
  
        zone_pcp_disable(page_zone(p));
-       ret = get_any_page(p, flags);
+       if (flags & MF_UNPOISON)
+               ret = __get_unpoison_page(p);
+       else
+               ret = get_any_page(p, flags);
        zone_pcp_enable(page_zone(p));
  
        return ret;
@@@ -1494,14 -1545,6 +1545,6 @@@ static int memory_failure_hugetlb(unsig
        lock_page(head);
        page_flags = head->flags;
  
-       if (!PageHWPoison(head)) {
-               pr_err("Memory failure: %#lx: just unpoisoned\n", pfn);
-               num_poisoned_pages_dec();
-               unlock_page(head);
-               put_page(head);
-               return 0;
-       }
        /*
         * TODO: hwpoison for pud-sized hugetlb doesn't work right now, so
         * simply disable it. In order to make it work properly, we need
@@@ -1615,6 -1658,8 +1658,8 @@@ out
        return rc;
  }
  
+ static DEFINE_MUTEX(mf_mutex);
  /**
   * memory_failure - Handle memory failure of a page.
   * @pfn: Page Number of the corrupted page
@@@ -1641,33 -1686,25 +1686,32 @@@ int memory_failure(unsigned long pfn, i
        int res = 0;
        unsigned long page_flags;
        bool retry = true;
-       static DEFINE_MUTEX(mf_mutex);
  
        if (!sysctl_memory_failure_recovery)
                panic("Memory failure on page %lx", pfn);
  
 +      mutex_lock(&mf_mutex);
 +
        p = pfn_to_online_page(pfn);
        if (!p) {
 +              res = arch_memory_failure(pfn, flags);
 +              if (res == 0)
 +                      goto unlock_mutex;
 +
                if (pfn_valid(pfn)) {
                        pgmap = get_dev_pagemap(pfn, NULL);
 -                      if (pgmap)
 -                              return memory_failure_dev_pagemap(pfn, flags,
 -                                                                pgmap);
 +                      if (pgmap) {
 +                              res = memory_failure_dev_pagemap(pfn, flags,
 +                                                               pgmap);
 +                              goto unlock_mutex;
 +                      }
                }
                pr_err("Memory failure: %#lx: memory outside kernel control\n",
                        pfn);
 -              return -ENXIO;
 +              res = -ENXIO;
 +              goto unlock_mutex;
        }
  
 -      mutex_lock(&mf_mutex);
 -
  try_again:
        if (PageHuge(p)) {
                res = memory_failure_hugetlb(pfn, flags);
         */
        page_flags = p->flags;
  
-       /*
-        * unpoison always clear PG_hwpoison inside page lock
-        */
-       if (!PageHWPoison(p)) {
-               pr_err("Memory failure: %#lx: just unpoisoned\n", pfn);
-               num_poisoned_pages_dec();
-               unlock_page(p);
-               put_page(p);
-               goto unlock_mutex;
-       }
        if (hwpoison_filter(p)) {
                if (TestClearPageHWPoison(p))
                        num_poisoned_pages_dec();
@@@ -1955,6 -1982,28 +1989,28 @@@ core_initcall(memory_failure_init)
                pr_info(fmt, pfn);                      \
  })
  
+ static inline int clear_page_hwpoison(struct ratelimit_state *rs, struct page *p)
+ {
+       if (TestClearPageHWPoison(p)) {
+               unpoison_pr_info("Unpoison: Software-unpoisoned page %#lx\n",
+                                page_to_pfn(p), rs);
+               num_poisoned_pages_dec();
+               return 1;
+       }
+       return 0;
+ }
+ static inline int unpoison_taken_off_page(struct ratelimit_state *rs,
+                                         struct page *p)
+ {
+       if (put_page_back_buddy(p)) {
+               unpoison_pr_info("Unpoison: Software-unpoisoned page %#lx\n",
+                                page_to_pfn(p), rs);
+               return 0;
+       }
+       return -EBUSY;
+ }
  /**
   * unpoison_memory - Unpoison a previously poisoned page
   * @pfn: Page number of the to be unpoisoned page
@@@ -1971,8 -2020,7 +2027,7 @@@ int unpoison_memory(unsigned long pfn
  {
        struct page *page;
        struct page *p;
-       int freeit = 0;
-       unsigned long flags = 0;
+       int ret = -EBUSY;
        static DEFINE_RATELIMIT_STATE(unpoison_rs, DEFAULT_RATELIMIT_INTERVAL,
                                        DEFAULT_RATELIMIT_BURST);
  
        p = pfn_to_page(pfn);
        page = compound_head(p);
  
+       mutex_lock(&mf_mutex);
        if (!PageHWPoison(p)) {
                unpoison_pr_info("Unpoison: Page was already unpoisoned %#lx\n",
                                 pfn, &unpoison_rs);
-               return 0;
+               goto unlock_mutex;
        }
  
        if (page_count(page) > 1) {
                unpoison_pr_info("Unpoison: Someone grabs the hwpoison page %#lx\n",
                                 pfn, &unpoison_rs);
-               return 0;
+               goto unlock_mutex;
        }
  
        if (page_mapped(page)) {
                unpoison_pr_info("Unpoison: Someone maps the hwpoison page %#lx\n",
                                 pfn, &unpoison_rs);
-               return 0;
+               goto unlock_mutex;
        }
  
        if (page_mapping(page)) {
                unpoison_pr_info("Unpoison: the hwpoison page has non-NULL mapping %#lx\n",
                                 pfn, &unpoison_rs);
-               return 0;
-       }
-       /*
-        * unpoison_memory() can encounter thp only when the thp is being
-        * worked by memory_failure() and the page lock is not held yet.
-        * In such case, we yield to memory_failure() and make unpoison fail.
-        */
-       if (!PageHuge(page) && PageTransHuge(page)) {
-               unpoison_pr_info("Unpoison: Memory failure is now running on %#lx\n",
-                                pfn, &unpoison_rs);
-               return 0;
+               goto unlock_mutex;
        }
  
-       if (!get_hwpoison_page(p, flags)) {
-               if (TestClearPageHWPoison(p))
-                       num_poisoned_pages_dec();
-               unpoison_pr_info("Unpoison: Software-unpoisoned free page %#lx\n",
-                                pfn, &unpoison_rs);
-               return 0;
-       }
+       if (PageSlab(page) || PageTable(page))
+               goto unlock_mutex;
  
-       lock_page(page);
-       /*
-        * This test is racy because PG_hwpoison is set outside of page lock.
-        * That's acceptable because that won't trigger kernel panic. Instead,
-        * the PG_hwpoison page will be caught and isolated on the entrance to
-        * the free buddy page pool.
-        */
-       if (TestClearPageHWPoison(page)) {
-               unpoison_pr_info("Unpoison: Software-unpoisoned page %#lx\n",
-                                pfn, &unpoison_rs);
-               num_poisoned_pages_dec();
-               freeit = 1;
-       }
-       unlock_page(page);
+       ret = get_hwpoison_page(p, MF_UNPOISON);
+       if (!ret) {
+               if (clear_page_hwpoison(&unpoison_rs, page))
+                       ret = 0;
+               else
+                       ret = -EBUSY;
+       } else if (ret < 0) {
+               if (ret == -EHWPOISON) {
+                       ret = unpoison_taken_off_page(&unpoison_rs, p);
+               } else
+                       unpoison_pr_info("Unpoison: failed to grab page %#lx\n",
+                                        pfn, &unpoison_rs);
+       } else {
+               int freeit = clear_page_hwpoison(&unpoison_rs, p);
  
-       put_page(page);
-       if (freeit && !(pfn == my_zero_pfn(0) && page_count(p) == 1))
                put_page(page);
+               if (freeit && !(pfn == my_zero_pfn(0) && page_count(p) == 1)) {
+                       put_page(page);
+                       ret = 0;
+               }
+       }
  
-       return 0;
+ unlock_mutex:
+       mutex_unlock(&mf_mutex);
+       return ret;
  }
  EXPORT_SYMBOL(unpoison_memory);
  
@@@ -2225,9 -2264,12 +2271,12 @@@ int soft_offline_page(unsigned long pfn
                return -EIO;
        }
  
+       mutex_lock(&mf_mutex);
        if (PageHWPoison(page)) {
                pr_info("%s: %#lx page already poisoned\n", __func__, pfn);
                put_ref_page(ref_page);
+               mutex_unlock(&mf_mutex);
                return 0;
        }
  
@@@ -2246,5 -2288,7 +2295,7 @@@ retry
                }
        }
  
+       mutex_unlock(&mf_mutex);
        return ret;
  }
diff --combined mm/memory.c
index 23f2f1300d4294f1bb46253d65ee2c1c9eadb29b,571d02f419baa4682515e48e9fcdb270988b8b1f..f306e698a1e3ebf6c0f7c61b9b66b32bff832ddc
@@@ -41,6 -41,7 +41,7 @@@
  
  #include <linux/kernel_stat.h>
  #include <linux/mm.h>
+ #include <linux/mm_inline.h>
  #include <linux/sched/mm.h>
  #include <linux/sched/coredump.h>
  #include <linux/sched/numa_balancing.h>
@@@ -719,8 -720,6 +720,6 @@@ static void restore_exclusive_pte(struc
        else if (is_writable_device_exclusive_entry(entry))
                pte = maybe_mkwrite(pte_mkdirty(pte), vma);
  
-       set_pte_at(vma->vm_mm, address, ptep, pte);
        /*
         * No need to take a page reference as one was already
         * created when the swap entry was made.
                 */
                WARN_ON_ONCE(!PageAnon(page));
  
+       set_pte_at(vma->vm_mm, address, ptep, pte);
        if (vma->vm_flags & VM_LOCKED)
                mlock_vma_page(page);
  
@@@ -1304,28 -1305,6 +1305,28 @@@ copy_page_range(struct vm_area_struct *
        return ret;
  }
  
 +/*
 + * Parameter block passed down to zap_pte_range in exceptional cases.
 + */
 +struct zap_details {
 +      struct address_space *zap_mapping;      /* Check page->mapping if set */
 +      struct folio *single_folio;     /* Locked folio to be unmapped */
 +};
 +
 +/*
 + * We set details->zap_mapping when we want to unmap shared but keep private
 + * pages. Return true if skip zapping this page, false otherwise.
 + */
 +static inline bool
 +zap_skip_check_mapping(struct zap_details *details, struct page *page)
 +{
 +      if (!details || !page)
 +              return false;
 +
 +      return details->zap_mapping &&
 +              (details->zap_mapping != page_rmapping(page));
 +}
 +
  static unsigned long zap_pte_range(struct mmu_gather *tlb,
                                struct vm_area_struct *vma, pmd_t *pmd,
                                unsigned long addr, unsigned long end,
@@@ -1465,8 -1444,8 +1466,8 @@@ static inline unsigned long zap_pmd_ran
                        else if (zap_huge_pmd(tlb, vma, pmd, addr))
                                goto next;
                        /* fall through */
 -              } else if (details && details->single_page &&
 -                         PageTransCompound(details->single_page) &&
 +              } else if (details && details->single_folio &&
 +                         folio_test_pmd_mappable(details->single_folio) &&
                           next - addr == HPAGE_PMD_SIZE && pmd_none(*pmd)) {
                        spinlock_t *ptl = pmd_lock(tlb->mm, pmd);
                        /*
@@@ -3354,30 -3333,31 +3355,30 @@@ static inline void unmap_mapping_range_
  }
  
  /**
 - * unmap_mapping_page() - Unmap single page from processes.
 - * @page: The locked page to be unmapped.
 + * unmap_mapping_folio() - Unmap single folio from processes.
 + * @folio: The locked folio to be unmapped.
   *
 - * Unmap this page from any userspace process which still has it mmaped.
 + * Unmap this folio from any userspace process which still has it mmaped.
   * Typically, for efficiency, the range of nearby pages has already been
   * unmapped by unmap_mapping_pages() or unmap_mapping_range().  But once
 - * truncation or invalidation holds the lock on a page, it may find that
 - * the page has been remapped again: and then uses unmap_mapping_page()
 + * truncation or invalidation holds the lock on a folio, it may find that
 + * the page has been remapped again: and then uses unmap_mapping_folio()
   * to unmap it finally.
   */
 -void unmap_mapping_page(struct page *page)
 +void unmap_mapping_folio(struct folio *folio)
  {
 -      struct address_space *mapping = page->mapping;
 +      struct address_space *mapping = folio->mapping;
        struct zap_details details = { };
        pgoff_t first_index;
        pgoff_t last_index;
  
 -      VM_BUG_ON(!PageLocked(page));
 -      VM_BUG_ON(PageTail(page));
 +      VM_BUG_ON(!folio_test_locked(folio));
  
 -      first_index = page->index;
 -      last_index = page->index + thp_nr_pages(page) - 1;
 +      first_index = folio->index;
 +      last_index = folio->index + folio_nr_pages(folio) - 1;
  
        details.zap_mapping = mapping;
 -      details.single_page = page;
 +      details.single_folio = folio;
  
        i_mmap_lock_write(mapping);
        if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root)))
@@@ -3647,7 -3627,7 +3648,7 @@@ vm_fault_t do_swap_page(struct vm_faul
        inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
        dec_mm_counter_fast(vma->vm_mm, MM_SWAPENTS);
        pte = mk_pte(page, vma->vm_page_prot);
-       if ((vmf->flags & FAULT_FLAG_WRITE) && reuse_swap_page(page, NULL)) {
+       if ((vmf->flags & FAULT_FLAG_WRITE) && reuse_swap_page(page)) {
                pte = maybe_mkwrite(pte_mkdirty(pte), vma);
                vmf->flags &= ~FAULT_FLAG_WRITE;
                ret |= VM_FAULT_WRITE;
                pte = pte_mkuffd_wp(pte);
                pte = pte_wrprotect(pte);
        }
-       set_pte_at(vma->vm_mm, vmf->address, vmf->pte, pte);
-       arch_do_swap_page(vma->vm_mm, vma, vmf->address, pte, vmf->orig_pte);
        vmf->orig_pte = pte;
  
        /* ksm created a completely new copy */
                do_page_add_anon_rmap(page, vma, vmf->address, exclusive);
        }
  
+       set_pte_at(vma->vm_mm, vmf->address, vmf->pte, pte);
+       arch_do_swap_page(vma->vm_mm, vma, vmf->address, pte, vmf->orig_pte);
        swap_free(entry);
        if (mem_cgroup_swap_full(page) ||
            (vma->vm_flags & VM_LOCKED) || PageMlocked(page))
diff --combined mm/memremap.c
index 643965da13a60b0d4611efac33a7e17112767f08,a2869d8519a207189c24a72ed068a8816dde4eb3..6aa5f0c2d11fda56628243cbbf38561ae3bc03b9
@@@ -102,16 -102,47 +102,23 @@@ static unsigned long pfn_end(struct dev
        return (range->start + range_len(range)) >> PAGE_SHIFT;
  }
  
- static unsigned long pfn_next(unsigned long pfn)
+ static unsigned long pfn_next(struct dev_pagemap *pgmap, unsigned long pfn)
  {
-       if (pfn % 1024 == 0)
+       if (pfn % (1024 << pgmap->vmemmap_shift))
                cond_resched();
-       return pfn + 1;
+       return pfn + pgmap_vmemmap_nr(pgmap);
+ }
+ static unsigned long pfn_len(struct dev_pagemap *pgmap, unsigned long range_id)
+ {
+       return (pfn_end(pgmap, range_id) -
+               pfn_first(pgmap, range_id)) >> pgmap->vmemmap_shift;
  }
  
  #define for_each_device_pfn(pfn, map, i) \
-       for (pfn = pfn_first(map, i); pfn < pfn_end(map, i); pfn = pfn_next(pfn))
+       for (pfn = pfn_first(map, i); pfn < pfn_end(map, i); \
+            pfn = pfn_next(map, pfn))
  
 -static void dev_pagemap_kill(struct dev_pagemap *pgmap)
 -{
 -      if (pgmap->ops && pgmap->ops->kill)
 -              pgmap->ops->kill(pgmap);
 -      else
 -              percpu_ref_kill(pgmap->ref);
 -}
 -
 -static void dev_pagemap_cleanup(struct dev_pagemap *pgmap)
 -{
 -      if (pgmap->ops && pgmap->ops->cleanup) {
 -              pgmap->ops->cleanup(pgmap);
 -      } else {
 -              wait_for_completion(&pgmap->done);
 -              percpu_ref_exit(pgmap->ref);
 -      }
 -      /*
 -       * Undo the pgmap ref assignment for the internal case as the
 -       * caller may re-enable the same pgmap.
 -       */
 -      if (pgmap->ref == &pgmap->internal_ref)
 -              pgmap->ref = NULL;
 -}
 -
  static void pageunmap_range(struct dev_pagemap *pgmap, int range_id)
  {
        struct range *range = &pgmap->ranges[range_id];
@@@ -143,12 -174,11 +150,12 @@@ void memunmap_pages(struct dev_pagemap 
        unsigned long pfn;
        int i;
  
 -      dev_pagemap_kill(pgmap);
 +      percpu_ref_kill(&pgmap->ref);
        for (i = 0; i < pgmap->nr_range; i++)
                for_each_device_pfn(pfn, pgmap, i)
                        put_page(pfn_to_page(pfn));
 -      dev_pagemap_cleanup(pgmap);
 +      wait_for_completion(&pgmap->done);
 +      percpu_ref_exit(&pgmap->ref);
  
        for (i = 0; i < pgmap->nr_range; i++)
                pageunmap_range(pgmap, i);
@@@ -165,7 -195,8 +172,7 @@@ static void devm_memremap_pages_release
  
  static void dev_pagemap_percpu_release(struct percpu_ref *ref)
  {
 -      struct dev_pagemap *pgmap =
 -              container_of(ref, struct dev_pagemap, internal_ref);
 +      struct dev_pagemap *pgmap = container_of(ref, struct dev_pagemap, ref);
  
        complete(&pgmap->done);
  }
@@@ -271,8 -302,7 +278,7 @@@ static int pagemap_range(struct dev_pag
        memmap_init_zone_device(&NODE_DATA(nid)->node_zones[ZONE_DEVICE],
                                PHYS_PFN(range->start),
                                PHYS_PFN(range_len(range)), pgmap);
-       percpu_ref_get_many(&pgmap->ref,
-               pfn_end(pgmap, range_id) - pfn_first(pgmap, range_id));
 -      percpu_ref_get_many(pgmap->ref, pfn_len(pgmap, range_id));
++      percpu_ref_get_many(&pgmap->ref, pfn_len(pgmap, range_id));
        return 0;
  
  err_add_memory:
@@@ -338,11 -368,22 +344,11 @@@ void *memremap_pages(struct dev_pagema
                break;
        }
  
 -      if (!pgmap->ref) {
 -              if (pgmap->ops && (pgmap->ops->kill || pgmap->ops->cleanup))
 -                      return ERR_PTR(-EINVAL);
 -
 -              init_completion(&pgmap->done);
 -              error = percpu_ref_init(&pgmap->internal_ref,
 -                              dev_pagemap_percpu_release, 0, GFP_KERNEL);
 -              if (error)
 -                      return ERR_PTR(error);
 -              pgmap->ref = &pgmap->internal_ref;
 -      } else {
 -              if (!pgmap->ops || !pgmap->ops->kill || !pgmap->ops->cleanup) {
 -                      WARN(1, "Missing reference count teardown definition\n");
 -                      return ERR_PTR(-EINVAL);
 -              }
 -      }
 +      init_completion(&pgmap->done);
 +      error = percpu_ref_init(&pgmap->ref, dev_pagemap_percpu_release, 0,
 +                              GFP_KERNEL);
 +      if (error)
 +              return ERR_PTR(error);
  
        devmap_managed_enable_get(pgmap);
  
@@@ -451,7 -492,7 +457,7 @@@ struct dev_pagemap *get_dev_pagemap(uns
        /* fall back to slow path lookup */
        rcu_read_lock();
        pgmap = xa_load(&pgmap_array, PHYS_PFN(phys));
 -      if (pgmap && !percpu_ref_tryget_live(pgmap->ref))
 +      if (pgmap && !percpu_ref_tryget_live(&pgmap->ref))
                pgmap = NULL;
        rcu_read_unlock();
  
diff --combined mm/migrate.c
index 7079e6b7dbe7d1a5e8f05ed9dbba4828422be5ef,05af2b2336b97afb2bccc67fdbef2925e64e6dcd..18ce840914f0d9b1b5bee825a0a0bd260899e583
@@@ -50,6 -50,7 +50,7 @@@
  #include <linux/ptrace.h>
  #include <linux/oom.h>
  #include <linux/memory.h>
+ #include <linux/random.h>
  
  #include <asm/tlbflush.h>
  
@@@ -236,20 -237,19 +237,19 @@@ static bool remove_migration_pte(struc
  
                        pte = pte_mkhuge(pte);
                        pte = arch_make_huge_pte(pte, shift, vma->vm_flags);
-                       set_huge_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte);
                        if (PageAnon(new))
                                hugepage_add_anon_rmap(new, vma, pvmw.address);
                        else
                                page_dup_rmap(new, true);
+                       set_huge_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte);
                } else
  #endif
                {
-                       set_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte);
                        if (PageAnon(new))
                                page_add_anon_rmap(new, vma, pvmw.address, false);
                        else
                                page_add_file_rmap(new, false);
+                       set_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte);
                }
                if (vma->vm_flags & VM_LOCKED && !PageTransCompound(new))
                        mlock_vma_page(new);
@@@ -291,7 -291,7 +291,7 @@@ void __migration_entry_wait(struct mm_s
  {
        pte_t pte;
        swp_entry_t entry;
 -      struct page *page;
 +      struct folio *folio;
  
        spin_lock(ptl);
        pte = *ptep;
        if (!is_migration_entry(entry))
                goto out;
  
 -      page = pfn_swap_entry_to_page(entry);
 -      page = compound_head(page);
 +      folio = page_folio(pfn_swap_entry_to_page(entry));
  
        /*
         * Once page cache replacement of page migration started, page_count
 -       * is zero; but we must not call put_and_wait_on_page_locked() without
 -       * a ref. Use get_page_unless_zero(), and just fault again if it fails.
 +       * is zero; but we must not call folio_put_wait_locked() without
 +       * a ref. Use folio_try_get(), and just fault again if it fails.
         */
 -      if (!get_page_unless_zero(page))
 +      if (!folio_try_get(folio))
                goto out;
        pte_unmap_unlock(ptep, ptl);
 -      put_and_wait_on_page_locked(page, TASK_UNINTERRUPTIBLE);
 +      folio_put_wait_locked(folio, TASK_UNINTERRUPTIBLE);
        return;
  out:
        pte_unmap_unlock(ptep, ptl);
@@@ -337,16 -338,16 +337,16 @@@ void migration_entry_wait_huge(struct v
  void pmd_migration_entry_wait(struct mm_struct *mm, pmd_t *pmd)
  {
        spinlock_t *ptl;
 -      struct page *page;
 +      struct folio *folio;
  
        ptl = pmd_lock(mm, pmd);
        if (!is_pmd_migration_entry(*pmd))
                goto unlock;
 -      page = pfn_swap_entry_to_page(pmd_to_swp_entry(*pmd));
 -      if (!get_page_unless_zero(page))
 +      folio = page_folio(pfn_swap_entry_to_page(pmd_to_swp_entry(*pmd)));
 +      if (!folio_try_get(folio))
                goto unlock;
        spin_unlock(ptl);
 -      put_and_wait_on_page_locked(page, TASK_UNINTERRUPTIBLE);
 +      folio_put_wait_locked(folio, TASK_UNINTERRUPTIBLE);
        return;
  unlock:
        spin_unlock(ptl);
@@@ -433,6 -434,14 +433,6 @@@ int folio_migrate_mapping(struct addres
        }
  
        xas_store(&xas, newfolio);
 -      if (nr > 1) {
 -              int i;
 -
 -              for (i = 1; i < nr; i++) {
 -                      xas_next(&xas);
 -                      xas_store(&xas, newfolio);
 -              }
 -      }
  
        /*
         * Drop cache reference from old page by unfreezing
@@@ -1084,80 -1093,6 +1084,6 @@@ out
        return rc;
  }
  
- /*
-  * node_demotion[] example:
-  *
-  * Consider a system with two sockets.  Each socket has
-  * three classes of memory attached: fast, medium and slow.
-  * Each memory class is placed in its own NUMA node.  The
-  * CPUs are placed in the node with the "fast" memory.  The
-  * 6 NUMA nodes (0-5) might be split among the sockets like
-  * this:
-  *
-  *    Socket A: 0, 1, 2
-  *    Socket B: 3, 4, 5
-  *
-  * When Node 0 fills up, its memory should be migrated to
-  * Node 1.  When Node 1 fills up, it should be migrated to
-  * Node 2.  The migration path start on the nodes with the
-  * processors (since allocations default to this node) and
-  * fast memory, progress through medium and end with the
-  * slow memory:
-  *
-  *    0 -> 1 -> 2 -> stop
-  *    3 -> 4 -> 5 -> stop
-  *
-  * This is represented in the node_demotion[] like this:
-  *
-  *    {  1, // Node 0 migrates to 1
-  *       2, // Node 1 migrates to 2
-  *      -1, // Node 2 does not migrate
-  *       4, // Node 3 migrates to 4
-  *       5, // Node 4 migrates to 5
-  *      -1} // Node 5 does not migrate
-  */
- /*
-  * Writes to this array occur without locking.  Cycles are
-  * not allowed: Node X demotes to Y which demotes to X...
-  *
-  * If multiple reads are performed, a single rcu_read_lock()
-  * must be held over all reads to ensure that no cycles are
-  * observed.
-  */
- static int node_demotion[MAX_NUMNODES] __read_mostly =
-       {[0 ...  MAX_NUMNODES - 1] = NUMA_NO_NODE};
- /**
-  * next_demotion_node() - Get the next node in the demotion path
-  * @node: The starting node to lookup the next node
-  *
-  * Return: node id for next memory node in the demotion path hierarchy
-  * from @node; NUMA_NO_NODE if @node is terminal.  This does not keep
-  * @node online or guarantee that it *continues* to be the next demotion
-  * target.
-  */
- int next_demotion_node(int node)
- {
-       int target;
-       /*
-        * node_demotion[] is updated without excluding this
-        * function from running.  RCU doesn't provide any
-        * compiler barriers, so the READ_ONCE() is required
-        * to avoid compiler reordering or read merging.
-        *
-        * Make sure to use RCU over entire code blocks if
-        * node_demotion[] reads need to be consistent.
-        */
-       rcu_read_lock();
-       target = READ_ONCE(node_demotion[node]);
-       rcu_read_unlock();
-       return target;
- }
  /*
   * Obtain the lock on page, remove all ptes and migrate the page
   * to the newly allocated page in newpage.
@@@ -1413,7 -1348,7 +1339,7 @@@ static inline int try_split_thp(struct 
   * @mode:             The migration mode that specifies the constraints for
   *                    page migration, if any.
   * @reason:           The reason for page migration.
-  * @ret_succeeded:    Set to the number of pages migrated successfully if
+  * @ret_succeeded:    Set to the number of normal pages migrated successfully if
   *                    the caller passes a non-NULL pointer.
   *
   * The function returns after 10 attempts or if no pages are movable any more
   * It is caller's responsibility to call putback_movable_pages() to return pages
   * to the LRU or free list only if ret != 0.
   *
-  * Returns the number of pages that were not migrated, or an error code.
+  * Returns the number of {normal page, THP, hugetlb} that were not migrated, or
+  * an error code. The number of THP splits will be considered as the number of
+  * non-migrated THP, no matter how many subpages of the THP are migrated successfully.
   */
  int migrate_pages(struct list_head *from, new_page_t get_new_page,
                free_page_t put_new_page, unsigned long private,
        int retry = 1;
        int thp_retry = 1;
        int nr_failed = 0;
+       int nr_failed_pages = 0;
        int nr_succeeded = 0;
        int nr_thp_succeeded = 0;
        int nr_thp_failed = 0;
        int swapwrite = current->flags & PF_SWAPWRITE;
        int rc, nr_subpages;
        LIST_HEAD(ret_pages);
+       LIST_HEAD(thp_split_pages);
        bool nosplit = (reason == MR_NUMA_MISPLACED);
+       bool no_subpage_counting = false;
  
        trace_mm_migrate_pages_start(mode, reason);
  
        if (!swapwrite)
                current->flags |= PF_SWAPWRITE;
  
+ thp_subpage_migration:
        for (pass = 0; pass < 10 && (retry || thp_retry); pass++) {
                retry = 0;
                thp_retry = 0;
@@@ -1460,7 -1401,7 +1392,7 @@@ retry
                         * during migration.
                         */
                        is_thp = PageTransHuge(page) && !PageHuge(page);
-                       nr_subpages = thp_nr_pages(page);
+                       nr_subpages = compound_nr(page);
                        cond_resched();
  
                        if (PageHuge(page))
                        case -ENOSYS:
                                /* THP migration is unsupported */
                                if (is_thp) {
-                                       if (!try_split_thp(page, &page2, from)) {
+                                       nr_thp_failed++;
+                                       if (!try_split_thp(page, &page2, &thp_split_pages)) {
                                                nr_thp_split++;
                                                goto retry;
                                        }
  
-                                       nr_thp_failed++;
-                                       nr_failed += nr_subpages;
+                                       nr_failed_pages += nr_subpages;
                                        break;
                                }
  
                                /* Hugetlb migration is unsupported */
-                               nr_failed++;
+                               if (!no_subpage_counting)
+                                       nr_failed++;
+                               nr_failed_pages += nr_subpages;
                                break;
                        case -ENOMEM:
                                /*
                                 * THP NUMA faulting doesn't split THP to retry.
                                 */
                                if (is_thp && !nosplit) {
-                                       if (!try_split_thp(page, &page2, from)) {
+                                       nr_thp_failed++;
+                                       if (!try_split_thp(page, &page2, &thp_split_pages)) {
                                                nr_thp_split++;
                                                goto retry;
                                        }
  
-                                       nr_thp_failed++;
-                                       nr_failed += nr_subpages;
+                                       nr_failed_pages += nr_subpages;
                                        goto out;
                                }
-                               nr_failed++;
+                               if (!no_subpage_counting)
+                                       nr_failed++;
+                               nr_failed_pages += nr_subpages;
                                goto out;
                        case -EAGAIN:
                                if (is_thp) {
                                retry++;
                                break;
                        case MIGRATEPAGE_SUCCESS:
+                               nr_succeeded += nr_subpages;
                                if (is_thp) {
                                        nr_thp_succeeded++;
-                                       nr_succeeded += nr_subpages;
                                        break;
                                }
-                               nr_succeeded++;
                                break;
                        default:
                                /*
                                 */
                                if (is_thp) {
                                        nr_thp_failed++;
-                                       nr_failed += nr_subpages;
+                                       nr_failed_pages += nr_subpages;
                                        break;
                                }
-                               nr_failed++;
+                               if (!no_subpage_counting)
+                                       nr_failed++;
+                               nr_failed_pages += nr_subpages;
                                break;
                        }
                }
        }
-       nr_failed += retry + thp_retry;
+       nr_failed += retry;
        nr_thp_failed += thp_retry;
-       rc = nr_failed;
+       /*
+        * Try to migrate subpages of fail-to-migrate THPs, no nr_failed
+        * counting in this round, since all subpages of a THP is counted
+        * as 1 failure in the first round.
+        */
+       if (!list_empty(&thp_split_pages)) {
+               /*
+                * Move non-migrated pages (after 10 retries) to ret_pages
+                * to avoid migrating them again.
+                */
+               list_splice_init(from, &ret_pages);
+               list_splice_init(&thp_split_pages, from);
+               no_subpage_counting = true;
+               retry = 1;
+               goto thp_subpage_migration;
+       }
+       rc = nr_failed + nr_thp_failed;
  out:
        /*
         * Put the permanent failure page back to migration list, they
        list_splice(&ret_pages, from);
  
        count_vm_events(PGMIGRATE_SUCCESS, nr_succeeded);
-       count_vm_events(PGMIGRATE_FAIL, nr_failed);
+       count_vm_events(PGMIGRATE_FAIL, nr_failed_pages);
        count_vm_events(THP_MIGRATION_SUCCESS, nr_thp_succeeded);
        count_vm_events(THP_MIGRATION_FAIL, nr_thp_failed);
        count_vm_events(THP_MIGRATION_SPLIT, nr_thp_split);
-       trace_mm_migrate_pages(nr_succeeded, nr_failed, nr_thp_succeeded,
+       trace_mm_migrate_pages(nr_succeeded, nr_failed_pages, nr_thp_succeeded,
                               nr_thp_failed, nr_thp_split, mode, reason);
  
        if (!swapwrite)
@@@ -2516,8 -2481,7 +2472,7 @@@ static bool migrate_vma_check_page(stru
  static void migrate_vma_unmap(struct migrate_vma *migrate)
  {
        const unsigned long npages = migrate->npages;
-       const unsigned long start = migrate->start;
-       unsigned long addr, i, restore = 0;
+       unsigned long i, restore = 0;
        bool allow_drain = true;
  
        lru_add_drain();
                }
        }
  
-       for (addr = start, i = 0; i < npages && restore; addr += PAGE_SIZE, i++) {
+       for (i = 0; i < npages && restore; i++) {
                struct page *page = migrate_pfn_to_page(migrate->src[i]);
  
                if (!page || (migrate->src[i] & MIGRATE_PFN_MIGRATE))
@@@ -2961,14 -2925,152 +2916,152 @@@ void migrate_vma_finalize(struct migrat
  EXPORT_SYMBOL(migrate_vma_finalize);
  #endif /* CONFIG_DEVICE_PRIVATE */
  
+ /*
+  * node_demotion[] example:
+  *
+  * Consider a system with two sockets.  Each socket has
+  * three classes of memory attached: fast, medium and slow.
+  * Each memory class is placed in its own NUMA node.  The
+  * CPUs are placed in the node with the "fast" memory.  The
+  * 6 NUMA nodes (0-5) might be split among the sockets like
+  * this:
+  *
+  *    Socket A: 0, 1, 2
+  *    Socket B: 3, 4, 5
+  *
+  * When Node 0 fills up, its memory should be migrated to
+  * Node 1.  When Node 1 fills up, it should be migrated to
+  * Node 2.  The migration path start on the nodes with the
+  * processors (since allocations default to this node) and
+  * fast memory, progress through medium and end with the
+  * slow memory:
+  *
+  *    0 -> 1 -> 2 -> stop
+  *    3 -> 4 -> 5 -> stop
+  *
+  * This is represented in the node_demotion[] like this:
+  *
+  *    {  nr=1, nodes[0]=1 }, // Node 0 migrates to 1
+  *    {  nr=1, nodes[0]=2 }, // Node 1 migrates to 2
+  *    {  nr=0, nodes[0]=-1 }, // Node 2 does not migrate
+  *    {  nr=1, nodes[0]=4 }, // Node 3 migrates to 4
+  *    {  nr=1, nodes[0]=5 }, // Node 4 migrates to 5
+  *    {  nr=0, nodes[0]=-1 }, // Node 5 does not migrate
+  *
+  * Moreover some systems may have multiple slow memory nodes.
+  * Suppose a system has one socket with 3 memory nodes, node 0
+  * is fast memory type, and node 1/2 both are slow memory
+  * type, and the distance between fast memory node and slow
+  * memory node is same. So the migration path should be:
+  *
+  *    0 -> 1/2 -> stop
+  *
+  * This is represented in the node_demotion[] like this:
+  *    { nr=2, {nodes[0]=1, nodes[1]=2} }, // Node 0 migrates to node 1 and node 2
+  *    { nr=0, nodes[0]=-1, }, // Node 1 dose not migrate
+  *    { nr=0, nodes[0]=-1, }, // Node 2 does not migrate
+  */
+ /*
+  * Writes to this array occur without locking.  Cycles are
+  * not allowed: Node X demotes to Y which demotes to X...
+  *
+  * If multiple reads are performed, a single rcu_read_lock()
+  * must be held over all reads to ensure that no cycles are
+  * observed.
+  */
+ #define DEFAULT_DEMOTION_TARGET_NODES 15
+ #if MAX_NUMNODES < DEFAULT_DEMOTION_TARGET_NODES
+ #define DEMOTION_TARGET_NODES (MAX_NUMNODES - 1)
+ #else
+ #define DEMOTION_TARGET_NODES DEFAULT_DEMOTION_TARGET_NODES
+ #endif
+ struct demotion_nodes {
+       unsigned short nr;
+       short nodes[DEMOTION_TARGET_NODES];
+ };
+ static struct demotion_nodes *node_demotion __read_mostly;
+ /**
+  * next_demotion_node() - Get the next node in the demotion path
+  * @node: The starting node to lookup the next node
+  *
+  * Return: node id for next memory node in the demotion path hierarchy
+  * from @node; NUMA_NO_NODE if @node is terminal.  This does not keep
+  * @node online or guarantee that it *continues* to be the next demotion
+  * target.
+  */
+ int next_demotion_node(int node)
+ {
+       struct demotion_nodes *nd;
+       unsigned short target_nr, index;
+       int target;
+       if (!node_demotion)
+               return NUMA_NO_NODE;
+       nd = &node_demotion[node];
+       /*
+        * node_demotion[] is updated without excluding this
+        * function from running.  RCU doesn't provide any
+        * compiler barriers, so the READ_ONCE() is required
+        * to avoid compiler reordering or read merging.
+        *
+        * Make sure to use RCU over entire code blocks if
+        * node_demotion[] reads need to be consistent.
+        */
+       rcu_read_lock();
+       target_nr = READ_ONCE(nd->nr);
+       switch (target_nr) {
+       case 0:
+               target = NUMA_NO_NODE;
+               goto out;
+       case 1:
+               index = 0;
+               break;
+       default:
+               /*
+                * If there are multiple target nodes, just select one
+                * target node randomly.
+                *
+                * In addition, we can also use round-robin to select
+                * target node, but we should introduce another variable
+                * for node_demotion[] to record last selected target node,
+                * that may cause cache ping-pong due to the changing of
+                * last target node. Or introducing per-cpu data to avoid
+                * caching issue, which seems more complicated. So selecting
+                * target node randomly seems better until now.
+                */
+               index = get_random_int() % target_nr;
+               break;
+       }
+       target = READ_ONCE(nd->nodes[index]);
+ out:
+       rcu_read_unlock();
+       return target;
+ }
  #if defined(CONFIG_HOTPLUG_CPU)
  /* Disable reclaim-based migration. */
  static void __disable_all_migrate_targets(void)
  {
-       int node;
+       int node, i;
+       if (!node_demotion)
+               return;
  
-       for_each_online_node(node)
-               node_demotion[node] = NUMA_NO_NODE;
+       for_each_online_node(node) {
+               node_demotion[node].nr = 0;
+               for (i = 0; i < DEMOTION_TARGET_NODES; i++)
+                       node_demotion[node].nodes[i] = NUMA_NO_NODE;
+       }
  }
  
  static void disable_all_migrate_targets(void)
   * Failing here is OK.  It might just indicate
   * being at the end of a chain.
   */
- static int establish_migrate_target(int node, nodemask_t *used)
+ static int establish_migrate_target(int node, nodemask_t *used,
+                                   int best_distance)
  {
-       int migration_target;
+       int migration_target, index, val;
+       struct demotion_nodes *nd;
  
-       /*
-        * Can not set a migration target on a
-        * node with it already set.
-        *
-        * No need for READ_ONCE() here since this
-        * in the write path for node_demotion[].
-        * This should be the only thread writing.
-        */
-       if (node_demotion[node] != NUMA_NO_NODE)
+       if (!node_demotion)
                return NUMA_NO_NODE;
  
+       nd = &node_demotion[node];
        migration_target = find_next_best_node(node, used);
        if (migration_target == NUMA_NO_NODE)
                return NUMA_NO_NODE;
  
-       node_demotion[node] = migration_target;
+       /*
+        * If the node has been set a migration target node before,
+        * which means it's the best distance between them. Still
+        * check if this node can be demoted to other target nodes
+        * if they have a same best distance.
+        */
+       if (best_distance != -1) {
+               val = node_distance(node, migration_target);
+               if (val > best_distance)
+                       return NUMA_NO_NODE;
+       }
+       index = nd->nr;
+       if (WARN_ONCE(index >= DEMOTION_TARGET_NODES,
+                     "Exceeds maximum demotion target nodes\n"))
+               return NUMA_NO_NODE;
+       nd->nodes[index] = migration_target;
+       nd->nr++;
  
        return migration_target;
  }
   *
   * The difference here is that cycles must be avoided.  If
   * node0 migrates to node1, then neither node1, nor anything
-  * node1 migrates to can migrate to node0.
+  * node1 migrates to can migrate to node0. Also one node can
+  * be migrated to multiple nodes if the target nodes all have
+  * a same best-distance against the source node.
   *
   * This function can run simultaneously with readers of
   * node_demotion[].  However, it can not run simultaneously
@@@ -3042,7 -3160,7 +3151,7 @@@ static void __set_migration_target_node
        nodemask_t next_pass    = NODE_MASK_NONE;
        nodemask_t this_pass    = NODE_MASK_NONE;
        nodemask_t used_targets = NODE_MASK_NONE;
-       int node;
+       int node, best_distance;
  
        /*
         * Avoid any oddities like cycles that could occur
@@@ -3071,18 -3189,33 +3180,33 @@@ again
         * multiple source nodes to share a destination.
         */
        nodes_or(used_targets, used_targets, this_pass);
-       for_each_node_mask(node, this_pass) {
-               int target_node = establish_migrate_target(node, &used_targets);
  
-               if (target_node == NUMA_NO_NODE)
-                       continue;
+       for_each_node_mask(node, this_pass) {
+               best_distance = -1;
  
                /*
-                * Visit targets from this pass in the next pass.
-                * Eventually, every node will have been part of
-                * a pass, and will become set in 'used_targets'.
+                * Try to set up the migration path for the node, and the target
+                * migration nodes can be multiple, so doing a loop to find all
+                * the target nodes if they all have a best node distance.
                 */
-               node_set(target_node, next_pass);
+               do {
+                       int target_node =
+                               establish_migrate_target(node, &used_targets,
+                                                        best_distance);
+                       if (target_node == NUMA_NO_NODE)
+                               break;
+                       if (best_distance == -1)
+                               best_distance = node_distance(node, target_node);
+                       /*
+                        * Visit targets from this pass in the next pass.
+                        * Eventually, every node will have been part of
+                        * a pass, and will become set in 'used_targets'.
+                        */
+                       node_set(target_node, next_pass);
+               } while (1);
        }
        /*
         * 'next_pass' contains nodes which became migration
@@@ -3183,6 -3316,11 +3307,11 @@@ static int __init migrate_on_reclaim_in
  {
        int ret;
  
+       node_demotion = kmalloc_array(nr_node_ids,
+                                     sizeof(struct demotion_nodes),
+                                     GFP_KERNEL);
+       WARN_ON(!node_demotion);
        ret = cpuhp_setup_state_nocalls(CPUHP_MM_DEMOTION_DEAD, "mm/demotion:offline",
                                        NULL, migration_offline_cpu);
        /*
diff --combined mm/shmem.c
index 28d627444a243fd9704dbc74feb07bafa43b6858,0700e9acf53bcf9add22e6374430dde4950c2470..66909efd0a1b25bee37b773aecbb550b13c0175c
@@@ -554,7 -554,7 +554,7 @@@ static unsigned long shmem_unused_huge_
        struct shmem_inode_info *info;
        struct page *page;
        unsigned long batch = sc ? sc->nr_to_scan : 128;
-       int removed = 0, split = 0;
+       int split = 0;
  
        if (list_empty(&sbinfo->shrinklist))
                return SHRINK_STOP;
                /* inode is about to be evicted */
                if (!inode) {
                        list_del_init(&info->shrinklist);
-                       removed++;
                        goto next;
                }
  
                if (round_up(inode->i_size, PAGE_SIZE) ==
                                round_up(inode->i_size, HPAGE_PMD_SIZE)) {
                        list_move(&info->shrinklist, &to_remove);
-                       removed++;
                        goto next;
                }
  
                list_move(&info->shrinklist, &list);
  next:
+               sbinfo->shrinklist_len--;
                if (!--batch)
                        break;
        }
                inode = &info->vfs_inode;
  
                if (nr_to_split && split >= nr_to_split)
-                       goto leave;
+                       goto move_back;
  
                page = find_get_page(inode->i_mapping,
                                (inode->i_size & HPAGE_PMD_MASK) >> PAGE_SHIFT);
                }
  
                /*
-                * Leave the inode on the list if we failed to lock
-                * the page at this time.
+                * Move the inode on the list back to shrinklist if we failed
+                * to lock the page at this time.
                 *
                 * Waiting for the lock may lead to deadlock in the
                 * reclaim path.
                 */
                if (!trylock_page(page)) {
                        put_page(page);
-                       goto leave;
+                       goto move_back;
                }
  
                ret = split_huge_page(page);
                unlock_page(page);
                put_page(page);
  
-               /* If split failed leave the inode on the list */
+               /* If split failed move the inode on the list back to shrinklist */
                if (ret)
-                       goto leave;
+                       goto move_back;
  
                split++;
  drop:
                list_del_init(&info->shrinklist);
-               removed++;
- leave:
+               goto put;
+ move_back:
+               /*
+                * Make sure the inode is either on the global list or deleted
+                * from any local list before iput() since it could be deleted
+                * in another thread once we put the inode (then the local list
+                * is corrupted).
+                */
+               spin_lock(&sbinfo->shrinklist_lock);
+               list_move(&info->shrinklist, &sbinfo->shrinklist);
+               sbinfo->shrinklist_len++;
+               spin_unlock(&sbinfo->shrinklist_lock);
+ put:
                iput(inode);
        }
  
-       spin_lock(&sbinfo->shrinklist_lock);
-       list_splice_tail(&list, &sbinfo->shrinklist);
-       sbinfo->shrinklist_len -= removed;
-       spin_unlock(&sbinfo->shrinklist_lock);
        return split;
  }
  
@@@ -694,6 -699,7 +699,6 @@@ static int shmem_add_to_page_cache(stru
                                   struct mm_struct *charge_mm)
  {
        XA_STATE_ORDER(xas, &mapping->i_pages, index, compound_order(page));
 -      unsigned long i = 0;
        unsigned long nr = compound_nr(page);
        int error;
  
        cgroup_throttle_swaprate(page, gfp);
  
        do {
 -              void *entry;
                xas_lock_irq(&xas);
 -              entry = xas_find_conflict(&xas);
 -              if (entry != expected)
 +              if (expected != xas_find_conflict(&xas)) {
                        xas_set_err(&xas, -EEXIST);
 -              xas_create_range(&xas);
 -              if (xas_error(&xas))
                        goto unlock;
 -next:
 -              xas_store(&xas, page);
 -              if (++i < nr) {
 -                      xas_next(&xas);
 -                      goto next;
                }
 +              if (expected && xas_find_conflict(&xas)) {
 +                      xas_set_err(&xas, -EEXIST);
 +                      goto unlock;
 +              }
 +              xas_store(&xas, page);
 +              if (xas_error(&xas))
 +                      goto unlock;
                if (PageTransHuge(page)) {
                        count_vm_event(THP_FILE_ALLOC);
                        __mod_lruvec_page_state(page, NR_SHMEM_THPS, nr);
@@@ -877,26 -885,30 +882,26 @@@ void shmem_unlock_mapping(struct addres
        }
  }
  
 -/*
 - * Check whether a hole-punch or truncation needs to split a huge page,
 - * returning true if no split was required, or the split has been successful.
 - *
 - * Eviction (or truncation to 0 size) should never need to split a huge page;
 - * but in rare cases might do so, if shmem_undo_range() failed to trylock on
 - * head, and then succeeded to trylock on tail.
 - *
 - * A split can only succeed when there are no additional references on the
 - * huge page: so the split below relies upon find_get_entries() having stopped
 - * when it found a subpage of the huge page, without getting further references.
 - */
 -static bool shmem_punch_compound(struct page *page, pgoff_t start, pgoff_t end)
 +static struct folio *shmem_get_partial_folio(struct inode *inode, pgoff_t index)
  {
 -      if (!PageTransCompound(page))
 -              return true;
 -
 -      /* Just proceed to delete a huge page wholly within the range punched */
 -      if (PageHead(page) &&
 -          page->index >= start && page->index + HPAGE_PMD_NR <= end)
 -              return true;
 +      struct folio *folio;
 +      struct page *page;
  
 -      /* Try to split huge page, so we can truly punch the hole or truncate */
 -      return split_huge_page(page) >= 0;
 +      /*
 +       * At first avoid shmem_getpage(,,,SGP_READ): that fails
 +       * beyond i_size, and reports fallocated pages as holes.
 +       */
 +      folio = __filemap_get_folio(inode->i_mapping, index,
 +                                      FGP_ENTRY | FGP_LOCK, 0);
 +      if (!xa_is_value(folio))
 +              return folio;
 +      /*
 +       * But read a page back from swap if any of it is within i_size
 +       * (although in some cases this is just a waste of time).
 +       */
 +      page = NULL;
 +      shmem_getpage(inode, index, &page, SGP_READ);
 +      return page ? page_folio(page) : NULL;
  }
  
  /*
@@@ -910,10 -922,10 +915,10 @@@ static void shmem_undo_range(struct ino
        struct shmem_inode_info *info = SHMEM_I(inode);
        pgoff_t start = (lstart + PAGE_SIZE - 1) >> PAGE_SHIFT;
        pgoff_t end = (lend + 1) >> PAGE_SHIFT;
 -      unsigned int partial_start = lstart & (PAGE_SIZE - 1);
 -      unsigned int partial_end = (lend + 1) & (PAGE_SIZE - 1);
 -      struct pagevec pvec;
 +      struct folio_batch fbatch;
        pgoff_t indices[PAGEVEC_SIZE];
 +      struct folio *folio;
 +      bool same_folio;
        long nr_swaps_freed = 0;
        pgoff_t index;
        int i;
        if (info->fallocend > start && info->fallocend <= end && !unfalloc)
                info->fallocend = start;
  
 -      pagevec_init(&pvec);
 +      folio_batch_init(&fbatch);
        index = start;
        while (index < end && find_lock_entries(mapping, index, end - 1,
 -                      &pvec, indices)) {
 -              for (i = 0; i < pagevec_count(&pvec); i++) {
 -                      struct page *page = pvec.pages[i];
 +                      &fbatch, indices)) {
 +              for (i = 0; i < folio_batch_count(&fbatch); i++) {
 +                      folio = fbatch.folios[i];
  
                        index = indices[i];
  
 -                      if (xa_is_value(page)) {
 +                      if (xa_is_value(folio)) {
                                if (unfalloc)
                                        continue;
                                nr_swaps_freed += !shmem_free_swap(mapping,
 -                                                              index, page);
 +                                                              index, folio);
                                continue;
                        }
 -                      index += thp_nr_pages(page) - 1;
 +                      index += folio_nr_pages(folio) - 1;
  
 -                      if (!unfalloc || !PageUptodate(page))
 -                              truncate_inode_page(mapping, page);
 -                      unlock_page(page);
 +                      if (!unfalloc || !folio_test_uptodate(folio))
 +                              truncate_inode_folio(mapping, folio);
 +                      folio_unlock(folio);
                }
 -              pagevec_remove_exceptionals(&pvec);
 -              pagevec_release(&pvec);
 +              folio_batch_remove_exceptionals(&fbatch);
 +              folio_batch_release(&fbatch);
                cond_resched();
                index++;
        }
  
 -      if (partial_start) {
 -              struct page *page = NULL;
 -              shmem_getpage(inode, start - 1, &page, SGP_READ);
 -              if (page) {
 -                      unsigned int top = PAGE_SIZE;
 -                      if (start > end) {
 -                              top = partial_end;
 -                              partial_end = 0;
 -                      }
 -                      zero_user_segment(page, partial_start, top);
 -                      set_page_dirty(page);
 -                      unlock_page(page);
 -                      put_page(page);
 +      same_folio = (lstart >> PAGE_SHIFT) == (lend >> PAGE_SHIFT);
 +      folio = shmem_get_partial_folio(inode, lstart >> PAGE_SHIFT);
 +      if (folio) {
 +              same_folio = lend < folio_pos(folio) + folio_size(folio);
 +              folio_mark_dirty(folio);
 +              if (!truncate_inode_partial_folio(folio, lstart, lend)) {
 +                      start = folio->index + folio_nr_pages(folio);
 +                      if (same_folio)
 +                              end = folio->index;
                }
 +              folio_unlock(folio);
 +              folio_put(folio);
 +              folio = NULL;
        }
 -      if (partial_end) {
 -              struct page *page = NULL;
 -              shmem_getpage(inode, end, &page, SGP_READ);
 -              if (page) {
 -                      zero_user_segment(page, 0, partial_end);
 -                      set_page_dirty(page);
 -                      unlock_page(page);
 -                      put_page(page);
 -              }
 +
 +      if (!same_folio)
 +              folio = shmem_get_partial_folio(inode, lend >> PAGE_SHIFT);
 +      if (folio) {
 +              folio_mark_dirty(folio);
 +              if (!truncate_inode_partial_folio(folio, lstart, lend))
 +                      end = folio->index;
 +              folio_unlock(folio);
 +              folio_put(folio);
        }
 -      if (start >= end)
 -              return;
  
        index = start;
        while (index < end) {
                cond_resched();
  
 -              if (!find_get_entries(mapping, index, end - 1, &pvec,
 +              if (!find_get_entries(mapping, index, end - 1, &fbatch,
                                indices)) {
                        /* If all gone or hole-punch or unfalloc, we're done */
                        if (index == start || end != -1)
                        index = start;
                        continue;
                }
 -              for (i = 0; i < pagevec_count(&pvec); i++) {
 -                      struct page *page = pvec.pages[i];
 +              for (i = 0; i < folio_batch_count(&fbatch); i++) {
 +                      folio = fbatch.folios[i];
  
                        index = indices[i];
 -                      if (xa_is_value(page)) {
 +                      if (xa_is_value(folio)) {
                                if (unfalloc)
                                        continue;
 -                              if (shmem_free_swap(mapping, index, page)) {
 +                              if (shmem_free_swap(mapping, index, folio)) {
                                        /* Swap was replaced by page: retry */
                                        index--;
                                        break;
                                continue;
                        }
  
 -                      lock_page(page);
 +                      folio_lock(folio);
  
 -                      if (!unfalloc || !PageUptodate(page)) {
 -                              if (page_mapping(page) != mapping) {
 +                      if (!unfalloc || !folio_test_uptodate(folio)) {
 +                              if (folio_mapping(folio) != mapping) {
                                        /* Page was replaced by swap: retry */
 -                                      unlock_page(page);
 +                                      folio_unlock(folio);
                                        index--;
                                        break;
                                }
 -                              VM_BUG_ON_PAGE(PageWriteback(page), page);
 -                              if (shmem_punch_compound(page, start, end))
 -                                      truncate_inode_page(mapping, page);
 -                              else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
 -                                      /* Wipe the page and don't get stuck */
 -                                      clear_highpage(page);
 -                                      flush_dcache_page(page);
 -                                      set_page_dirty(page);
 -                                      if (index <
 -                                          round_up(start, HPAGE_PMD_NR))
 -                                              start = index + 1;
 -                              }
 +                              VM_BUG_ON_FOLIO(folio_test_writeback(folio),
 +                                              folio);
 +                              truncate_inode_folio(mapping, folio);
                        }
 -                      unlock_page(page);
 +                      index = folio->index + folio_nr_pages(folio) - 1;
 +                      folio_unlock(folio);
                }
 -              pagevec_remove_exceptionals(&pvec);
 -              pagevec_release(&pvec);
 +              folio_batch_remove_exceptionals(&fbatch);
 +              folio_batch_release(&fbatch);
                index++;
        }
  
@@@ -1541,8 -1564,7 +1546,7 @@@ static struct page *shmem_alloc_hugepag
                return NULL;
  
        shmem_pseudo_vma_init(&pvma, info, hindex);
-       page = alloc_pages_vma(gfp, HPAGE_PMD_ORDER, &pvma, 0, numa_node_id(),
-                              true);
+       page = alloc_pages_vma(gfp, HPAGE_PMD_ORDER, &pvma, 0, true);
        shmem_pseudo_vma_destroy(&pvma);
        if (page)
                prep_transhuge_page(page);
@@@ -2439,6 -2461,7 +2443,7 @@@ shmem_write_begin(struct file *file, st
        struct inode *inode = mapping->host;
        struct shmem_inode_info *info = SHMEM_I(inode);
        pgoff_t index = pos >> PAGE_SHIFT;
+       int ret = 0;
  
        /* i_rwsem is held by caller */
        if (unlikely(info->seals & (F_SEAL_GROW |
                        return -EPERM;
        }
  
-       return shmem_getpage(inode, index, pagep, SGP_WRITE);
+       ret = shmem_getpage(inode, index, pagep, SGP_WRITE);
+       if (ret)
+               return ret;
+       if (PageHWPoison(*pagep)) {
+               unlock_page(*pagep);
+               put_page(*pagep);
+               *pagep = NULL;
+               return -EIO;
+       }
+       return 0;
  }
  
  static int
@@@ -2536,6 -2571,12 +2553,12 @@@ static ssize_t shmem_file_read_iter(str
                        if (sgp == SGP_CACHE)
                                set_page_dirty(page);
                        unlock_page(page);
+                       if (PageHWPoison(page)) {
+                               put_page(page);
+                               error = -EIO;
+                               break;
+                       }
                }
  
                /*
@@@ -3075,7 -3116,8 +3098,8 @@@ static const char *shmem_get_link(struc
                page = find_get_page(inode->i_mapping, 0);
                if (!page)
                        return ERR_PTR(-ECHILD);
-               if (!PageUptodate(page)) {
+               if (PageHWPoison(page) ||
+                   !PageUptodate(page)) {
                        put_page(page);
                        return ERR_PTR(-ECHILD);
                }
                error = shmem_getpage(inode, 0, &page, SGP_READ);
                if (error)
                        return ERR_PTR(error);
+               if (!page)
+                       return ERR_PTR(-ECHILD);
+               if (PageHWPoison(page)) {
+                       unlock_page(page);
+                       put_page(page);
+                       return ERR_PTR(-ECHILD);
+               }
                unlock_page(page);
        }
        set_delayed_call(done, shmem_put_link, page);
@@@ -3733,6 -3782,13 +3764,13 @@@ static void shmem_destroy_inodecache(vo
        kmem_cache_destroy(shmem_inode_cachep);
  }
  
+ /* Keep the page in page cache instead of truncating it */
+ static int shmem_error_remove_page(struct address_space *mapping,
+                                  struct page *page)
+ {
+       return 0;
+ }
  const struct address_space_operations shmem_aops = {
        .writepage      = shmem_writepage,
        .set_page_dirty = __set_page_dirty_no_writeback,
  #ifdef CONFIG_MIGRATION
        .migratepage    = migrate_page,
  #endif
-       .error_remove_page = generic_error_remove_page,
+       .error_remove_page = shmem_error_remove_page,
  };
  EXPORT_SYMBOL(shmem_aops);
  
@@@ -4151,9 -4207,14 +4189,14 @@@ struct page *shmem_read_mapping_page_gf
        error = shmem_getpage_gfp(inode, index, &page, SGP_CACHE,
                                  gfp, NULL, NULL, NULL);
        if (error)
-               page = ERR_PTR(error);
-       else
-               unlock_page(page);
+               return ERR_PTR(error);
+       unlock_page(page);
+       if (PageHWPoison(page)) {
+               put_page(page);
+               return ERR_PTR(-EIO);
+       }
        return page;
  #else
        /*
diff --combined mm/slab.h
index 95b9a74a2d515fd83091253b786886be9cc08476,053eefaf6cbd7e23b2643d0c3720658f6b9d4056..7edb7d23f14126e96e1b8c74181a15bc0c932acc
+++ b/mm/slab.h
   * Internal slab definitions
   */
  
 +/* Reuses the bits in struct page */
 +struct slab {
 +      unsigned long __page_flags;
 +
 +#if defined(CONFIG_SLAB)
 +
 +      union {
 +              struct list_head slab_list;
 +              struct rcu_head rcu_head;
 +      };
 +      struct kmem_cache *slab_cache;
 +      void *freelist; /* array of free object indexes */
 +      void *s_mem;    /* first object */
 +      unsigned int active;
 +
 +#elif defined(CONFIG_SLUB)
 +
 +      union {
 +              struct list_head slab_list;
 +              struct rcu_head rcu_head;
 +#ifdef CONFIG_SLUB_CPU_PARTIAL
 +              struct {
 +                      struct slab *next;
 +                      int slabs;      /* Nr of slabs left */
 +              };
 +#endif
 +      };
 +      struct kmem_cache *slab_cache;
 +      /* Double-word boundary */
 +      void *freelist;         /* first free object */
 +      union {
 +              unsigned long counters;
 +              struct {
 +                      unsigned inuse:16;
 +                      unsigned objects:15;
 +                      unsigned frozen:1;
 +              };
 +      };
 +      unsigned int __unused;
 +
 +#elif defined(CONFIG_SLOB)
 +
 +      struct list_head slab_list;
 +      void *__unused_1;
 +      void *freelist;         /* first free block */
 +      long units;
 +      unsigned int __unused_2;
 +
 +#else
 +#error "Unexpected slab allocator configured"
 +#endif
 +
 +      atomic_t __page_refcount;
 +#ifdef CONFIG_MEMCG
 +      unsigned long memcg_data;
 +#endif
 +};
 +
 +#define SLAB_MATCH(pg, sl)                                            \
 +      static_assert(offsetof(struct page, pg) == offsetof(struct slab, sl))
 +SLAB_MATCH(flags, __page_flags);
 +SLAB_MATCH(compound_head, slab_list); /* Ensure bit 0 is clear */
 +SLAB_MATCH(slab_list, slab_list);
 +#ifndef CONFIG_SLOB
 +SLAB_MATCH(rcu_head, rcu_head);
 +SLAB_MATCH(slab_cache, slab_cache);
 +#endif
 +#ifdef CONFIG_SLAB
 +SLAB_MATCH(s_mem, s_mem);
 +SLAB_MATCH(active, active);
 +#endif
 +SLAB_MATCH(_refcount, __page_refcount);
 +#ifdef CONFIG_MEMCG
 +SLAB_MATCH(memcg_data, memcg_data);
 +#endif
 +#undef SLAB_MATCH
 +static_assert(sizeof(struct slab) <= sizeof(struct page));
 +
 +/**
 + * folio_slab - Converts from folio to slab.
 + * @folio: The folio.
 + *
 + * Currently struct slab is a different representation of a folio where
 + * folio_test_slab() is true.
 + *
 + * Return: The slab which contains this folio.
 + */
 +#define folio_slab(folio)     (_Generic((folio),                      \
 +      const struct folio *:   (const struct slab *)(folio),           \
 +      struct folio *:         (struct slab *)(folio)))
 +
 +/**
 + * slab_folio - The folio allocated for a slab
 + * @slab: The slab.
 + *
 + * Slabs are allocated as folios that contain the individual objects and are
 + * using some fields in the first struct page of the folio - those fields are
 + * now accessed by struct slab. It is occasionally necessary to convert back to
 + * a folio in order to communicate with the rest of the mm.  Please use this
 + * helper function instead of casting yourself, as the implementation may change
 + * in the future.
 + */
 +#define slab_folio(s)         (_Generic((s),                          \
 +      const struct slab *:    (const struct folio *)s,                \
 +      struct slab *:          (struct folio *)s))
 +
 +/**
 + * page_slab - Converts from first struct page to slab.
 + * @p: The first (either head of compound or single) page of slab.
 + *
 + * A temporary wrapper to convert struct page to struct slab in situations where
 + * we know the page is the compound head, or single order-0 page.
 + *
 + * Long-term ideally everything would work with struct slab directly or go
 + * through folio to struct slab.
 + *
 + * Return: The slab which contains this page
 + */
 +#define page_slab(p)          (_Generic((p),                          \
 +      const struct page *:    (const struct slab *)(p),               \
 +      struct page *:          (struct slab *)(p)))
 +
 +/**
 + * slab_page - The first struct page allocated for a slab
 + * @slab: The slab.
 + *
 + * A convenience wrapper for converting slab to the first struct page of the
 + * underlying folio, to communicate with code not yet converted to folio or
 + * struct slab.
 + */
 +#define slab_page(s) folio_page(slab_folio(s), 0)
 +
 +/*
 + * If network-based swap is enabled, sl*b must keep track of whether pages
 + * were allocated from pfmemalloc reserves.
 + */
 +static inline bool slab_test_pfmemalloc(const struct slab *slab)
 +{
 +      return folio_test_active((struct folio *)slab_folio(slab));
 +}
 +
 +static inline void slab_set_pfmemalloc(struct slab *slab)
 +{
 +      folio_set_active(slab_folio(slab));
 +}
 +
 +static inline void slab_clear_pfmemalloc(struct slab *slab)
 +{
 +      folio_clear_active(slab_folio(slab));
 +}
 +
 +static inline void __slab_clear_pfmemalloc(struct slab *slab)
 +{
 +      __folio_clear_active(slab_folio(slab));
 +}
 +
 +static inline void *slab_address(const struct slab *slab)
 +{
 +      return folio_address(slab_folio(slab));
 +}
 +
 +static inline int slab_nid(const struct slab *slab)
 +{
 +      return folio_nid(slab_folio(slab));
 +}
 +
 +static inline pg_data_t *slab_pgdat(const struct slab *slab)
 +{
 +      return folio_pgdat(slab_folio(slab));
 +}
 +
 +static inline struct slab *virt_to_slab(const void *addr)
 +{
 +      struct folio *folio = virt_to_folio(addr);
 +
 +      if (!folio_test_slab(folio))
 +              return NULL;
 +
 +      return folio_slab(folio);
 +}
 +
 +static inline int slab_order(const struct slab *slab)
 +{
 +      return folio_order((struct folio *)slab_folio(slab));
 +}
 +
 +static inline size_t slab_size(const struct slab *slab)
 +{
 +      return PAGE_SIZE << slab_order(slab);
 +}
 +
  #ifdef CONFIG_SLOB
  /*
   * Common fields provided in kmem_cache by all slab allocators
@@@ -436,33 -245,15 +436,33 @@@ static inline bool kmem_cache_debug_fla
  }
  
  #ifdef CONFIG_MEMCG_KMEM
 -int memcg_alloc_page_obj_cgroups(struct page *page, struct kmem_cache *s,
 -                               gfp_t gfp, bool new_page);
 +/*
 + * slab_objcgs - get the object cgroups vector associated with a slab
 + * @slab: a pointer to the slab struct
 + *
 + * Returns a pointer to the object cgroups vector associated with the slab,
 + * or NULL if no such vector has been associated yet.
 + */
 +static inline struct obj_cgroup **slab_objcgs(struct slab *slab)
 +{
 +      unsigned long memcg_data = READ_ONCE(slab->memcg_data);
 +
 +      VM_BUG_ON_PAGE(memcg_data && !(memcg_data & MEMCG_DATA_OBJCGS),
 +                                                      slab_page(slab));
 +      VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_KMEM, slab_page(slab));
 +
 +      return (struct obj_cgroup **)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
 +}
 +
 +int memcg_alloc_slab_cgroups(struct slab *slab, struct kmem_cache *s,
 +                               gfp_t gfp, bool new_slab);
  void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat,
                     enum node_stat_item idx, int nr);
  
 -static inline void memcg_free_page_obj_cgroups(struct page *page)
 +static inline void memcg_free_slab_cgroups(struct slab *slab)
  {
 -      kfree(page_objcgs(page));
 -      page->memcg_data = 0;
 +      kfree(slab_objcgs(slab));
 +      slab->memcg_data = 0;
  }
  
  static inline size_t obj_full_size(struct kmem_cache *s)
@@@ -507,7 -298,7 +507,7 @@@ static inline void memcg_slab_post_allo
                                              gfp_t flags, size_t size,
                                              void **p)
  {
 -      struct page *page;
 +      struct slab *slab;
        unsigned long off;
        size_t i;
  
  
        for (i = 0; i < size; i++) {
                if (likely(p[i])) {
 -                      page = virt_to_head_page(p[i]);
 +                      slab = virt_to_slab(p[i]);
  
 -                      if (!page_objcgs(page) &&
 -                          memcg_alloc_page_obj_cgroups(page, s, flags,
 +                      if (!slab_objcgs(slab) &&
 +                          memcg_alloc_slab_cgroups(slab, s, flags,
                                                         false)) {
                                obj_cgroup_uncharge(objcg, obj_full_size(s));
                                continue;
                        }
  
 -                      off = obj_to_index(s, page, p[i]);
 +                      off = obj_to_index(s, slab, p[i]);
                        obj_cgroup_get(objcg);
 -                      page_objcgs(page)[off] = objcg;
 -                      mod_objcg_state(objcg, page_pgdat(page),
 +                      slab_objcgs(slab)[off] = objcg;
 +                      mod_objcg_state(objcg, slab_pgdat(slab),
                                        cache_vmstat_idx(s), obj_full_size(s));
                } else {
                        obj_cgroup_uncharge(objcg, obj_full_size(s));
@@@ -543,7 -334,7 +543,7 @@@ static inline void memcg_slab_free_hook
        struct kmem_cache *s;
        struct obj_cgroup **objcgs;
        struct obj_cgroup *objcg;
 -      struct page *page;
 +      struct slab *slab;
        unsigned int off;
        int i;
  
                if (unlikely(!p[i]))
                        continue;
  
 -              page = virt_to_head_page(p[i]);
 -              objcgs = page_objcgs_check(page);
 +              slab = virt_to_slab(p[i]);
 +              /* we could be given a kmalloc_large() object, skip those */
 +              if (!slab)
 +                      continue;
 +
 +              objcgs = slab_objcgs(slab);
                if (!objcgs)
                        continue;
  
                if (!s_orig)
 -                      s = page->slab_cache;
 +                      s = slab->slab_cache;
                else
                        s = s_orig;
  
 -              off = obj_to_index(s, page, p[i]);
 +              off = obj_to_index(s, slab, p[i]);
                objcg = objcgs[off];
                if (!objcg)
                        continue;
  
                objcgs[off] = NULL;
                obj_cgroup_uncharge(objcg, obj_full_size(s));
 -              mod_objcg_state(objcg, page_pgdat(page), cache_vmstat_idx(s),
 +              mod_objcg_state(objcg, slab_pgdat(slab), cache_vmstat_idx(s),
                                -obj_full_size(s));
                obj_cgroup_put(objcg);
        }
  }
  
  #else /* CONFIG_MEMCG_KMEM */
 +static inline struct obj_cgroup **slab_objcgs(struct slab *slab)
 +{
 +      return NULL;
 +}
 +
  static inline struct mem_cgroup *memcg_from_slab_obj(void *ptr)
  {
        return NULL;
  }
  
 -static inline int memcg_alloc_page_obj_cgroups(struct page *page,
 +static inline int memcg_alloc_slab_cgroups(struct slab *slab,
                                               struct kmem_cache *s, gfp_t gfp,
 -                                             bool new_page)
 +                                             bool new_slab)
  {
        return 0;
  }
  
 -static inline void memcg_free_page_obj_cgroups(struct page *page)
 +static inline void memcg_free_slab_cgroups(struct slab *slab)
  {
  }
  
@@@ -623,35 -405,35 +623,35 @@@ static inline void memcg_slab_free_hook
  }
  #endif /* CONFIG_MEMCG_KMEM */
  
 +#ifndef CONFIG_SLOB
  static inline struct kmem_cache *virt_to_cache(const void *obj)
  {
 -      struct page *page;
 +      struct slab *slab;
  
 -      page = virt_to_head_page(obj);
 -      if (WARN_ONCE(!PageSlab(page), "%s: Object is not a Slab page!\n",
 +      slab = virt_to_slab(obj);
 +      if (WARN_ONCE(!slab, "%s: Object is not a Slab page!\n",
                                        __func__))
                return NULL;
 -      return page->slab_cache;
 +      return slab->slab_cache;
  }
  
 -static __always_inline void account_slab_page(struct page *page, int order,
 -                                            struct kmem_cache *s,
 -                                            gfp_t gfp)
 +static __always_inline void account_slab(struct slab *slab, int order,
 +                                       struct kmem_cache *s, gfp_t gfp)
  {
        if (memcg_kmem_enabled() && (s->flags & SLAB_ACCOUNT))
 -              memcg_alloc_page_obj_cgroups(page, s, gfp, true);
 +              memcg_alloc_slab_cgroups(slab, s, gfp, true);
  
 -      mod_node_page_state(page_pgdat(page), cache_vmstat_idx(s),
 +      mod_node_page_state(slab_pgdat(slab), cache_vmstat_idx(s),
                            PAGE_SIZE << order);
  }
  
 -static __always_inline void unaccount_slab_page(struct page *page, int order,
 -                                              struct kmem_cache *s)
 +static __always_inline void unaccount_slab(struct slab *slab, int order,
 +                                         struct kmem_cache *s)
  {
        if (memcg_kmem_enabled())
 -              memcg_free_page_obj_cgroups(page);
 +              memcg_free_slab_cgroups(slab);
  
 -      mod_node_page_state(page_pgdat(page), cache_vmstat_idx(s),
 +      mod_node_page_state(slab_pgdat(slab), cache_vmstat_idx(s),
                            -(PAGE_SIZE << order));
  }
  
@@@ -670,7 -452,6 +670,7 @@@ static inline struct kmem_cache *cache_
                print_tracking(cachep, x);
        return cachep;
  }
 +#endif /* CONFIG_SLOB */
  
  static inline size_t slab_ksize(const struct kmem_cache *s)
  {
@@@ -794,11 -575,6 +794,6 @@@ static inline struct kmem_cache_node *g
  
  #endif
  
- void *slab_start(struct seq_file *m, loff_t *pos);
- void *slab_next(struct seq_file *m, void *p, loff_t *pos);
- void slab_stop(struct seq_file *m, void *p);
- int memcg_slab_show(struct seq_file *m, void *p);
  #if defined(CONFIG_SLAB) || defined(CONFIG_SLUB_DEBUG)
  void dump_unreclaimable_slab(void);
  #else
@@@ -854,7 -630,7 +849,7 @@@ static inline void debugfs_slab_release
  #define KS_ADDRS_COUNT 16
  struct kmem_obj_info {
        void *kp_ptr;
 -      struct page *kp_page;
 +      struct slab *kp_slab;
        void *kp_objp;
        unsigned long kp_data_offset;
        struct kmem_cache *kp_slab_cache;
        void *kp_stack[KS_ADDRS_COUNT];
        void *kp_free_stack[KS_ADDRS_COUNT];
  };
 -void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct page *page);
 +void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab);
 +#endif
 +
 +#ifdef CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR
 +void __check_heap_object(const void *ptr, unsigned long n,
 +                       const struct slab *slab, bool to_user);
 +#else
 +static inline
 +void __check_heap_object(const void *ptr, unsigned long n,
 +                       const struct slab *slab, bool to_user)
 +{
 +}
  #endif
  
  #endif /* MM_SLAB_H */
diff --combined mm/slab_common.c
index dc15566141d4447bda26128a9a0158c5128ee081,9513244457e6f4653aa056fca914e36f6be9b7fd..23f2ab0713b7748accfb7c1154f1f93b412be67c
@@@ -489,9 -489,7 +489,7 @@@ void slab_kmem_cache_release(struct kme
  
  void kmem_cache_destroy(struct kmem_cache *s)
  {
-       int err;
-       if (unlikely(!s))
+       if (unlikely(!s) || !kasan_check_byte(s))
                return;
  
        cpus_read_lock();
        if (s->refcount)
                goto out_unlock;
  
-       err = shutdown_cache(s);
-       if (err) {
-               pr_err("%s %s: Slab cache still has objects\n",
-                      __func__, s->name);
-               dump_stack();
-       }
+       WARN(shutdown_cache(s),
+            "%s %s: Slab cache still has objects when called from %pS",
+            __func__, s->name, (void *)_RET_IP_);
  out_unlock:
        mutex_unlock(&slab_mutex);
        cpus_read_unlock();
@@@ -550,13 -545,13 +545,13 @@@ bool slab_is_available(void
   */
  bool kmem_valid_obj(void *object)
  {
 -      struct page *page;
 +      struct folio *folio;
  
        /* Some arches consider ZERO_SIZE_PTR to be a valid address. */
        if (object < (void *)PAGE_SIZE || !virt_addr_valid(object))
                return false;
 -      page = virt_to_head_page(object);
 -      return PageSlab(page);
 +      folio = virt_to_folio(object);
 +      return folio_test_slab(folio);
  }
  EXPORT_SYMBOL_GPL(kmem_valid_obj);
  
@@@ -579,18 -574,18 +574,18 @@@ void kmem_dump_obj(void *object
  {
        char *cp = IS_ENABLED(CONFIG_MMU) ? "" : "/vmalloc";
        int i;
 -      struct page *page;
 +      struct slab *slab;
        unsigned long ptroffset;
        struct kmem_obj_info kp = { };
  
        if (WARN_ON_ONCE(!virt_addr_valid(object)))
                return;
 -      page = virt_to_head_page(object);
 -      if (WARN_ON_ONCE(!PageSlab(page))) {
 +      slab = virt_to_slab(object);
 +      if (WARN_ON_ONCE(!slab)) {
                pr_cont(" non-slab memory.\n");
                return;
        }
 -      kmem_obj_info(&kp, object, page);
 +      kmem_obj_info(&kp, object, slab);
        if (kp.kp_slab_cache)
                pr_cont(" slab%s %s", cp, kp.kp_slab_cache->name);
        else
@@@ -824,7 -819,7 +819,7 @@@ void __init setup_kmalloc_cache_index_t
  
        if (KMALLOC_MIN_SIZE >= 64) {
                /*
-                * The 96 byte size cache is not used if the alignment
+                * The 96 byte sized cache is not used if the alignment
                 * is 64 byte.
                 */
                for (i = 64 + 8; i <= 96; i += 8)
@@@ -849,7 -844,7 +844,7 @@@ new_kmalloc_cache(int idx, enum kmalloc
        if (type == KMALLOC_RECLAIM) {
                flags |= SLAB_RECLAIM_ACCOUNT;
        } else if (IS_ENABLED(CONFIG_MEMCG_KMEM) && (type == KMALLOC_CGROUP)) {
-               if (cgroup_memory_nokmem) {
+               if (mem_cgroup_kmem_disabled()) {
                        kmalloc_caches[type][idx] = kmalloc_caches[KMALLOC_NORMAL][idx];
                        return;
                }
@@@ -1044,18 -1039,18 +1039,18 @@@ static void print_slabinfo_header(struc
        seq_putc(m, '\n');
  }
  
- void *slab_start(struct seq_file *m, loff_t *pos)
static void *slab_start(struct seq_file *m, loff_t *pos)
  {
        mutex_lock(&slab_mutex);
        return seq_list_start(&slab_caches, *pos);
  }
  
- void *slab_next(struct seq_file *m, void *p, loff_t *pos)
static void *slab_next(struct seq_file *m, void *p, loff_t *pos)
  {
        return seq_list_next(p, &slab_caches, pos);
  }
  
- void slab_stop(struct seq_file *m, void *p)
static void slab_stop(struct seq_file *m, void *p)
  {
        mutex_unlock(&slab_mutex);
  }
@@@ -1123,17 -1118,6 +1118,6 @@@ void dump_unreclaimable_slab(void
        mutex_unlock(&slab_mutex);
  }
  
- #if defined(CONFIG_MEMCG_KMEM)
- int memcg_slab_show(struct seq_file *m, void *p)
- {
-       /*
-        * Deprecated.
-        * Please, take a look at tools/cgroup/slabinfo.py .
-        */
-       return 0;
- }
- #endif
  /*
   * slabinfo_op - iterator that generates /proc/slabinfo
   *
diff --combined mm/swap.c
index 74f6b311d7eefea87318e5322b10416cc035994b,b461814ce0cb6363250b343806fa2d2b9ab307b8..bcf3ac288b56d560dbfd82ec8c183898e9a3c586
+++ b/mm/swap.c
@@@ -882,7 -882,7 +882,7 @@@ void lru_cache_disable(void
         * all online CPUs so any calls of lru_cache_disabled wrapped by
         * local_lock or preemption disabled would be ordered by that.
         * The atomic operation doesn't need to have stronger ordering
-        * requirements because that is enforeced by the scheduling
+        * requirements because that is enforced by the scheduling
         * guarantees.
         */
        __lru_add_drain_all(true);
@@@ -1077,24 -1077,24 +1077,24 @@@ void __pagevec_lru_add(struct pagevec *
  }
  
  /**
 - * pagevec_remove_exceptionals - pagevec exceptionals pruning
 - * @pvec:     The pagevec to prune
 + * folio_batch_remove_exceptionals() - Prune non-folios from a batch.
 + * @fbatch: The batch to prune
   *
 - * find_get_entries() fills both pages and XArray value entries (aka
 - * exceptional entries) into the pagevec.  This function prunes all
 - * exceptionals from @pvec without leaving holes, so that it can be
 - * passed on to page-only pagevec operations.
 + * find_get_entries() fills a batch with both folios and shadow/swap/DAX
 + * entries.  This function prunes all the non-folio entries from @fbatch
 + * without leaving holes, so that it can be passed on to folio-only batch
 + * operations.
   */
 -void pagevec_remove_exceptionals(struct pagevec *pvec)
 +void folio_batch_remove_exceptionals(struct folio_batch *fbatch)
  {
 -      int i, j;
 +      unsigned int i, j;
  
 -      for (i = 0, j = 0; i < pagevec_count(pvec); i++) {
 -              struct page *page = pvec->pages[i];
 -              if (!xa_is_value(page))
 -                      pvec->pages[j++] = page;
 +      for (i = 0, j = 0; i < folio_batch_count(fbatch); i++) {
 +              struct folio *folio = fbatch->folios[i];
 +              if (!xa_is_value(folio))
 +                      fbatch->folios[j++] = folio;
        }
 -      pvec->nr = j;
 +      fbatch->nr = j;
  }
  
  /**
diff --combined mm/truncate.c
index 5c87cdc70e7bf5c60614d445910042ac39a23cc2,41b8249b3b4aa0a9194f21b6d254f52585f56ead..5e243d7269c0f9d2e70625b70ecca9c4ae4f439c
@@@ -56,11 -56,11 +56,11 @@@ static void clear_shadow_entry(struct a
  
  /*
   * Unconditionally remove exceptional entries. Usually called from truncate
 - * path. Note that the pagevec may be altered by this function by removing
 - * exceptional entries similar to what pagevec_remove_exceptionals does.
 + * path. Note that the folio_batch may be altered by this function by removing
 + * exceptional entries similar to what folio_batch_remove_exceptionals() does.
   */
 -static void truncate_exceptional_pvec_entries(struct address_space *mapping,
 -                              struct pagevec *pvec, pgoff_t *indices)
 +static void truncate_folio_batch_exceptionals(struct address_space *mapping,
 +                              struct folio_batch *fbatch, pgoff_t *indices)
  {
        int i, j;
        bool dax;
        if (shmem_mapping(mapping))
                return;
  
 -      for (j = 0; j < pagevec_count(pvec); j++)
 -              if (xa_is_value(pvec->pages[j]))
 +      for (j = 0; j < folio_batch_count(fbatch); j++)
 +              if (xa_is_value(fbatch->folios[j]))
                        break;
  
 -      if (j == pagevec_count(pvec))
 +      if (j == folio_batch_count(fbatch))
                return;
  
        dax = dax_mapping(mapping);
                xa_lock_irq(&mapping->i_pages);
        }
  
 -      for (i = j; i < pagevec_count(pvec); i++) {
 -              struct page *page = pvec->pages[i];
 +      for (i = j; i < folio_batch_count(fbatch); i++) {
 +              struct folio *folio = fbatch->folios[i];
                pgoff_t index = indices[i];
  
 -              if (!xa_is_value(page)) {
 -                      pvec->pages[j++] = page;
 +              if (!xa_is_value(folio)) {
 +                      fbatch->folios[j++] = folio;
                        continue;
                }
  
@@@ -96,7 -96,7 +96,7 @@@
                        continue;
                }
  
 -              __clear_shadow_entry(mapping, index, page);
 +              __clear_shadow_entry(mapping, index, folio);
        }
  
        if (!dax) {
                        inode_add_lru(mapping->host);
                spin_unlock(&mapping->host->i_lock);
        }
 -      pvec->nr = j;
 +      fbatch->nr = j;
  }
  
  /*
@@@ -177,21 -177,21 +177,21 @@@ void do_invalidatepage(struct page *pag
   * its lock, b) when a concurrent invalidate_mapping_pages got there first and
   * c) when tmpfs swizzles a page between a tmpfs inode and swapper_space.
   */
 -static void truncate_cleanup_page(struct page *page)
 +static void truncate_cleanup_folio(struct folio *folio)
  {
 -      if (page_mapped(page))
 -              unmap_mapping_page(page);
 +      if (folio_mapped(folio))
 +              unmap_mapping_folio(folio);
  
 -      if (page_has_private(page))
 -              do_invalidatepage(page, 0, thp_size(page));
 +      if (folio_has_private(folio))
 +              do_invalidatepage(&folio->page, 0, folio_size(folio));
  
        /*
         * Some filesystems seem to re-dirty the page even after
         * the VM has canceled the dirty bit (eg ext3 journaling).
         * Hence dirty accounting check is placed after invalidation.
         */
 -      cancel_dirty_page(page);
 -      ClearPageMappedToDisk(page);
 +      folio_cancel_dirty(folio);
 +      folio_clear_mappedtodisk(folio);
  }
  
  /*
  static int
  invalidate_complete_page(struct address_space *mapping, struct page *page)
  {
-       int ret;
  
        if (page->mapping != mapping)
                return 0;
        if (page_has_private(page) && !try_to_release_page(page, 0))
                return 0;
  
-       ret = remove_mapping(mapping, page);
-       return ret;
+       return remove_mapping(mapping, page);
  }
  
 -int truncate_inode_page(struct address_space *mapping, struct page *page)
 +int truncate_inode_folio(struct address_space *mapping, struct folio *folio)
  {
 -      VM_BUG_ON_PAGE(PageTail(page), page);
 -
 -      if (page->mapping != mapping)
 +      if (folio->mapping != mapping)
                return -EIO;
  
 -      truncate_cleanup_page(page);
 -      delete_from_page_cache(page);
 +      truncate_cleanup_folio(folio);
 +      filemap_remove_folio(folio);
        return 0;
  }
  
 +/*
 + * Handle partial folios.  The folio may be entirely within the
 + * range if a split has raced with us.  If not, we zero the part of the
 + * folio that's within the [start, end] range, and then split the folio if
 + * it's large.  split_page_range() will discard pages which now lie beyond
 + * i_size, and we rely on the caller to discard pages which lie within a
 + * newly created hole.
 + *
 + * Returns false if splitting failed so the caller can avoid
 + * discarding the entire folio which is stubbornly unsplit.
 + */
 +bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end)
 +{
 +      loff_t pos = folio_pos(folio);
 +      unsigned int offset, length;
 +
 +      if (pos < start)
 +              offset = start - pos;
 +      else
 +              offset = 0;
 +      length = folio_size(folio);
 +      if (pos + length <= (u64)end)
 +              length = length - offset;
 +      else
 +              length = end + 1 - pos - offset;
 +
 +      folio_wait_writeback(folio);
 +      if (length == folio_size(folio)) {
 +              truncate_inode_folio(folio->mapping, folio);
 +              return true;
 +      }
 +
 +      /*
 +       * We may be zeroing pages we're about to discard, but it avoids
 +       * doing a complex calculation here, and then doing the zeroing
 +       * anyway if the page split fails.
 +       */
 +      folio_zero_range(folio, offset, length);
 +
 +      cleancache_invalidate_page(folio->mapping, &folio->page);
 +      if (folio_has_private(folio))
 +              do_invalidatepage(&folio->page, offset, length);
 +      if (!folio_test_large(folio))
 +              return true;
 +      if (split_huge_page(&folio->page) == 0)
 +              return true;
 +      if (folio_test_dirty(folio))
 +              return false;
 +      truncate_inode_folio(folio->mapping, folio);
 +      return true;
 +}
 +
  /*
   * Used to get rid of pages on hardware memory corruption.
   */
  int generic_error_remove_page(struct address_space *mapping, struct page *page)
  {
 +      VM_BUG_ON_PAGE(PageTail(page), page);
 +
        if (!mapping)
                return -EINVAL;
        /*
         */
        if (!S_ISREG(mapping->host->i_mode))
                return -EIO;
 -      return truncate_inode_page(mapping, page);
 +      return truncate_inode_folio(mapping, page_folio(page));
  }
  EXPORT_SYMBOL(generic_error_remove_page);
  
@@@ -346,16 -291,20 +343,16 @@@ void truncate_inode_pages_range(struct 
  {
        pgoff_t         start;          /* inclusive */
        pgoff_t         end;            /* exclusive */
 -      unsigned int    partial_start;  /* inclusive */
 -      unsigned int    partial_end;    /* exclusive */
 -      struct pagevec  pvec;
 +      struct folio_batch fbatch;
        pgoff_t         indices[PAGEVEC_SIZE];
        pgoff_t         index;
        int             i;
 +      struct folio    *folio;
 +      bool            same_folio;
  
        if (mapping_empty(mapping))
                goto out;
  
 -      /* Offsets within partial pages */
 -      partial_start = lstart & (PAGE_SIZE - 1);
 -      partial_end = (lend + 1) & (PAGE_SIZE - 1);
 -
        /*
         * 'start' and 'end' always covers the range of pages to be fully
         * truncated. Partial pages are covered with 'partial_start' at the
        else
                end = (lend + 1) >> PAGE_SHIFT;
  
 -      pagevec_init(&pvec);
 +      folio_batch_init(&fbatch);
        index = start;
        while (index < end && find_lock_entries(mapping, index, end - 1,
 -                      &pvec, indices)) {
 -              index = indices[pagevec_count(&pvec) - 1] + 1;
 -              truncate_exceptional_pvec_entries(mapping, &pvec, indices);
 -              for (i = 0; i < pagevec_count(&pvec); i++)
 -                      truncate_cleanup_page(pvec.pages[i]);
 -              delete_from_page_cache_batch(mapping, &pvec);
 -              for (i = 0; i < pagevec_count(&pvec); i++)
 -                      unlock_page(pvec.pages[i]);
 -              pagevec_release(&pvec);
 +                      &fbatch, indices)) {
 +              index = indices[folio_batch_count(&fbatch) - 1] + 1;
 +              truncate_folio_batch_exceptionals(mapping, &fbatch, indices);
 +              for (i = 0; i < folio_batch_count(&fbatch); i++)
 +                      truncate_cleanup_folio(fbatch.folios[i]);
 +              delete_from_page_cache_batch(mapping, &fbatch);
 +              for (i = 0; i < folio_batch_count(&fbatch); i++)
 +                      folio_unlock(fbatch.folios[i]);
 +              folio_batch_release(&fbatch);
                cond_resched();
        }
  
 -      if (partial_start) {
 -              struct page *page = find_lock_page(mapping, start - 1);
 -              if (page) {
 -                      unsigned int top = PAGE_SIZE;
 -                      if (start > end) {
 -                              /* Truncation within a single page */
 -                              top = partial_end;
 -                              partial_end = 0;
 -                      }
 -                      wait_on_page_writeback(page);
 -                      zero_user_segment(page, partial_start, top);
 -                      cleancache_invalidate_page(mapping, page);
 -                      if (page_has_private(page))
 -                              do_invalidatepage(page, partial_start,
 -                                                top - partial_start);
 -                      unlock_page(page);
 -                      put_page(page);
 +      same_folio = (lstart >> PAGE_SHIFT) == (lend >> PAGE_SHIFT);
 +      folio = __filemap_get_folio(mapping, lstart >> PAGE_SHIFT, FGP_LOCK, 0);
 +      if (folio) {
 +              same_folio = lend < folio_pos(folio) + folio_size(folio);
 +              if (!truncate_inode_partial_folio(folio, lstart, lend)) {
 +                      start = folio->index + folio_nr_pages(folio);
 +                      if (same_folio)
 +                              end = folio->index;
                }
 +              folio_unlock(folio);
 +              folio_put(folio);
 +              folio = NULL;
        }
 -      if (partial_end) {
 -              struct page *page = find_lock_page(mapping, end);
 -              if (page) {
 -                      wait_on_page_writeback(page);
 -                      zero_user_segment(page, 0, partial_end);
 -                      cleancache_invalidate_page(mapping, page);
 -                      if (page_has_private(page))
 -                              do_invalidatepage(page, 0,
 -                                                partial_end);
 -                      unlock_page(page);
 -                      put_page(page);
 -              }
 +
 +      if (!same_folio)
 +              folio = __filemap_get_folio(mapping, lend >> PAGE_SHIFT,
 +                                              FGP_LOCK, 0);
 +      if (folio) {
 +              if (!truncate_inode_partial_folio(folio, lstart, lend))
 +                      end = folio->index;
 +              folio_unlock(folio);
 +              folio_put(folio);
        }
 -      /*
 -       * If the truncation happened within a single page no pages
 -       * will be released, just zeroed, so we can bail out now.
 -       */
 -      if (start >= end)
 -              goto out;
  
        index = start;
 -      for ( ; ; ) {
 +      while (index < end) {
                cond_resched();
 -              if (!find_get_entries(mapping, index, end - 1, &pvec,
 +              if (!find_get_entries(mapping, index, end - 1, &fbatch,
                                indices)) {
                        /* If all gone from start onwards, we're done */
                        if (index == start)
                        continue;
                }
  
 -              for (i = 0; i < pagevec_count(&pvec); i++) {
 -                      struct page *page = pvec.pages[i];
 +              for (i = 0; i < folio_batch_count(&fbatch); i++) {
 +                      struct folio *folio = fbatch.folios[i];
  
                        /* We rely upon deletion not changing page->index */
                        index = indices[i];
  
 -                      if (xa_is_value(page))
 +                      if (xa_is_value(folio))
                                continue;
  
 -                      lock_page(page);
 -                      WARN_ON(page_to_index(page) != index);
 -                      wait_on_page_writeback(page);
 -                      truncate_inode_page(mapping, page);
 -                      unlock_page(page);
 +                      folio_lock(folio);
 +                      VM_BUG_ON_FOLIO(!folio_contains(folio, index), folio);
 +                      folio_wait_writeback(folio);
 +                      truncate_inode_folio(mapping, folio);
 +                      folio_unlock(folio);
 +                      index = folio_index(folio) + folio_nr_pages(folio) - 1;
                }
 -              truncate_exceptional_pvec_entries(mapping, &pvec, indices);
 -              pagevec_release(&pvec);
 +              truncate_folio_batch_exceptionals(mapping, &fbatch, indices);
 +              folio_batch_release(&fbatch);
                index++;
        }
  
@@@ -513,16 -476,16 +510,16 @@@ static unsigned long __invalidate_mappi
                pgoff_t start, pgoff_t end, unsigned long *nr_pagevec)
  {
        pgoff_t indices[PAGEVEC_SIZE];
 -      struct pagevec pvec;
 +      struct folio_batch fbatch;
        pgoff_t index = start;
        unsigned long ret;
        unsigned long count = 0;
        int i;
  
 -      pagevec_init(&pvec);
 -      while (find_lock_entries(mapping, index, end, &pvec, indices)) {
 -              for (i = 0; i < pagevec_count(&pvec); i++) {
 -                      struct page *page = pvec.pages[i];
 +      folio_batch_init(&fbatch);
 +      while (find_lock_entries(mapping, index, end, &fbatch, indices)) {
 +              for (i = 0; i < folio_batch_count(&fbatch); i++) {
 +                      struct page *page = &fbatch.folios[i]->page;
  
                        /* We rely upon deletion not changing page->index */
                        index = indices[i];
                        }
                        count += ret;
                }
 -              pagevec_remove_exceptionals(&pvec);
 -              pagevec_release(&pvec);
 +              folio_batch_remove_exceptionals(&fbatch);
 +              folio_batch_release(&fbatch);
                cond_resched();
                index++;
        }
@@@ -602,29 -565,31 +599,29 @@@ void invalidate_mapping_pagevec(struct 
   * shrink_page_list() has a temp ref on them, or because they're transiently
   * sitting in the lru_cache_add() pagevecs.
   */
 -static int
 -invalidate_complete_page2(struct address_space *mapping, struct page *page)
 +static int invalidate_complete_folio2(struct address_space *mapping,
 +                                      struct folio *folio)
  {
 -      if (page->mapping != mapping)
 +      if (folio->mapping != mapping)
                return 0;
  
 -      if (page_has_private(page) && !try_to_release_page(page, GFP_KERNEL))
 +      if (folio_has_private(folio) &&
 +          !filemap_release_folio(folio, GFP_KERNEL))
                return 0;
  
        spin_lock(&mapping->host->i_lock);
        xa_lock_irq(&mapping->i_pages);
 -      if (PageDirty(page))
 +      if (folio_test_dirty(folio))
                goto failed;
  
 -      BUG_ON(page_has_private(page));
 -      __delete_from_page_cache(page, NULL);
 +      BUG_ON(folio_has_private(folio));
 +      __filemap_remove_folio(folio, NULL);
        xa_unlock_irq(&mapping->i_pages);
        if (mapping_shrinkable(mapping))
                inode_add_lru(mapping->host);
        spin_unlock(&mapping->host->i_lock);
  
 -      if (mapping->a_ops->freepage)
 -              mapping->a_ops->freepage(page);
 -
 -      put_page(page); /* pagecache ref */
 +      filemap_free_folio(mapping, folio);
        return 1;
  failed:
        xa_unlock_irq(&mapping->i_pages);
        return 0;
  }
  
 -static int do_launder_page(struct address_space *mapping, struct page *page)
 +static int do_launder_folio(struct address_space *mapping, struct folio *folio)
  {
 -      if (!PageDirty(page))
 +      if (!folio_test_dirty(folio))
                return 0;
 -      if (page->mapping != mapping || mapping->a_ops->launder_page == NULL)
 +      if (folio->mapping != mapping || mapping->a_ops->launder_page == NULL)
                return 0;
 -      return mapping->a_ops->launder_page(page);
 +      return mapping->a_ops->launder_page(&folio->page);
  }
  
  /**
@@@ -656,7 -621,7 +653,7 @@@ int invalidate_inode_pages2_range(struc
                                  pgoff_t start, pgoff_t end)
  {
        pgoff_t indices[PAGEVEC_SIZE];
 -      struct pagevec pvec;
 +      struct folio_batch fbatch;
        pgoff_t index;
        int i;
        int ret = 0;
        if (mapping_empty(mapping))
                goto out;
  
 -      pagevec_init(&pvec);
 +      folio_batch_init(&fbatch);
        index = start;
 -      while (find_get_entries(mapping, index, end, &pvec, indices)) {
 -              for (i = 0; i < pagevec_count(&pvec); i++) {
 -                      struct page *page = pvec.pages[i];
 +      while (find_get_entries(mapping, index, end, &fbatch, indices)) {
 +              for (i = 0; i < folio_batch_count(&fbatch); i++) {
 +                      struct folio *folio = fbatch.folios[i];
  
 -                      /* We rely upon deletion not changing page->index */
 +                      /* We rely upon deletion not changing folio->index */
                        index = indices[i];
  
 -                      if (xa_is_value(page)) {
 +                      if (xa_is_value(folio)) {
                                if (!invalidate_exceptional_entry2(mapping,
 -                                                                 index, page))
 +                                              index, folio))
                                        ret = -EBUSY;
                                continue;
                        }
  
 -                      if (!did_range_unmap && page_mapped(page)) {
 +                      if (!did_range_unmap && folio_mapped(folio)) {
                                /*
 -                               * If page is mapped, before taking its lock,
 +                               * If folio is mapped, before taking its lock,
                                 * zap the rest of the file in one hit.
                                 */
                                unmap_mapping_pages(mapping, index,
                                did_range_unmap = 1;
                        }
  
 -                      lock_page(page);
 -                      WARN_ON(page_to_index(page) != index);
 -                      if (page->mapping != mapping) {
 -                              unlock_page(page);
 +                      folio_lock(folio);
 +                      VM_BUG_ON_FOLIO(!folio_contains(folio, index), folio);
 +                      if (folio->mapping != mapping) {
 +                              folio_unlock(folio);
                                continue;
                        }
 -                      wait_on_page_writeback(page);
 +                      folio_wait_writeback(folio);
  
 -                      if (page_mapped(page))
 -                              unmap_mapping_page(page);
 -                      BUG_ON(page_mapped(page));
 +                      if (folio_mapped(folio))
 +                              unmap_mapping_folio(folio);
 +                      BUG_ON(folio_mapped(folio));
  
 -                      ret2 = do_launder_page(mapping, page);
 +                      ret2 = do_launder_folio(mapping, folio);
                        if (ret2 == 0) {
 -                              if (!invalidate_complete_page2(mapping, page))
 +                              if (!invalidate_complete_folio2(mapping, folio))
                                        ret2 = -EBUSY;
                        }
                        if (ret2 < 0)
                                ret = ret2;
 -                      unlock_page(page);
 +                      folio_unlock(folio);
                }
 -              pagevec_remove_exceptionals(&pvec);
 -              pagevec_release(&pvec);
 +              folio_batch_remove_exceptionals(&fbatch);
 +              folio_batch_release(&fbatch);
                cond_resched();
                index++;
        }
This page took 0.544684 seconds and 4 git commands to generate.