Merge branch 'akpm' (patches from Andrew)

author Linus Torvalds <[email protected]>

Sat, 15 Jan 2022 18:37:06 +0000 (20:37 +0200)

committer Linus Torvalds <[email protected]>

Sat, 15 Jan 2022 18:37:06 +0000 (20:37 +0200)
author Linus Torvalds <[email protected]>
Sat, 15 Jan 2022 18:37:06 +0000 (20:37 +0200)
committer Linus Torvalds <[email protected]>
Sat, 15 Jan 2022 18:37:06 +0000 (20:37 +0200)
diff --combined MAINTAINERS

index 5d0cd537803a1f56867f39d0909b3d591b91cc98,fbdb860c0b8b503d6649ee8e69d5ea69794cfe0f..4749663143834439887116a0564bd1427eb23bf3
--- 1/MAINTAINERS
--- 2/MAINTAINERS
+++ b/MAINTAINERS
@@@ -966,7 -966,6 +966,7 @@@ F: drivers/gpu/drm/amd/include/kgd_kfd_
   F:    drivers/gpu/drm/amd/include/v9_structs.h
   F:    drivers/gpu/drm/amd/include/vi_structs.h
   F:    include/uapi/linux/kfd_ioctl.h
+ +F:    include/uapi/linux/kfd_sysfs.h
   
   AMD SPI DRIVER
   M:    Sanjay R Mehta <[email protected]>
@@@ -994,13 -993,6 +994,13 @@@ S:       Supporte
   T:    git https://gitlab.freedesktop.org/agd5f/linux.git
   F:    drivers/gpu/drm/amd/pm/
   
+ +AMD PSTATE DRIVER
+ +M:    Huang Rui <[email protected]>
+ +L:    [email protected]
+ +S:    Supported
+ +F:    Documentation/admin-guide/pm/amd-pstate.rst
+ +F:    drivers/cpufreq/amd-pstate*
+ +
   AMD PTDMA DRIVER
   M:    Sanjay R Mehta <[email protected]>
   L:    [email protected]
@@@ -1077,15 -1069,6 +1077,15 @@@ W:    http://ez.analog.com/community/linux
   F:    Documentation/devicetree/bindings/iio/adc/adi,ad7780.yaml
   F:    drivers/iio/adc/ad7780.c
   
+ +ANALOG DEVICES INC AD74413R DRIVER
+ +M:    Cosmin Tanislav <[email protected]>
+ +L:    [email protected]
+ +S:    Supported
+ +W:    http://ez.analog.com/community/linux-device-drivers
+ +F:    Documentation/devicetree/bindings/iio/addac/adi,ad74413r.yaml
+ +F:    drivers/iio/addac/ad74413r.c
+ +F:    include/dt-bindings/iio/addac/adi,ad74413r.h
+ +
   ANALOG DEVICES INC AD9389B DRIVER
   M:    Hans Verkuil <[email protected]>
   L:    [email protected]
@@@ -1156,7 -1139,6 +1156,7 @@@ ANALOG DEVICES INC ADV748X DRIVE
   M:    Kieran Bingham <[email protected]>
   L:    [email protected]
   S:    Maintained
+ +F:    Documentation/devicetree/bindings/media/i2c/adv748x.yaml
   F:    drivers/media/i2c/adv748x/*
   
   ANALOG DEVICES INC ADV7511 DRIVER
@@@ -1763,21 -1745,17 +1763,21 @@@ B:   https://github.com/AsahiLinux/linux/
   C:    irc://irc.oftc.net/asahi-dev
   T:    git https://github.com/AsahiLinux/linux.git
   F:    Documentation/devicetree/bindings/arm/apple.yaml
+ +F:    Documentation/devicetree/bindings/arm/apple/*
   F:    Documentation/devicetree/bindings/i2c/apple,i2c.yaml
   F:    Documentation/devicetree/bindings/interrupt-controller/apple,aic.yaml
   F:    Documentation/devicetree/bindings/mailbox/apple,mailbox.yaml
   F:    Documentation/devicetree/bindings/pci/apple,pcie.yaml
   F:    Documentation/devicetree/bindings/pinctrl/apple,pinctrl.yaml
+ +F:    Documentation/devicetree/bindings/power/apple*
+ +F:    Documentation/devicetree/bindings/watchdog/apple,wdt.yaml
   F:    arch/arm64/boot/dts/apple/
   F:    drivers/i2c/busses/i2c-pasemi-core.c
   F:    drivers/i2c/busses/i2c-pasemi-platform.c
   F:    drivers/irqchip/irq-apple-aic.c
   F:    drivers/mailbox/apple-mailbox.c
   F:    drivers/pinctrl/pinctrl-apple-gpio.c
+ +F:    drivers/soc/apple/*
   F:    include/dt-bindings/interrupt-controller/apple-aic.h
   F:    include/dt-bindings/pinctrl/apple.h
   F:    include/linux/apple-mailbox.h
@@@ -1912,7 -1890,6 +1912,7 @@@ F:      Documentation/trace/coresight/
   F:    drivers/hwtracing/coresight/*
   F:    include/dt-bindings/arm/coresight-cti-dt.h
   F:    include/linux/coresight*
+ +F:    samples/coresight/*
   F:    tools/perf/arch/arm/util/auxtrace.c
   F:    tools/perf/arch/arm/util/cs-etm.c
   F:    tools/perf/arch/arm/util/cs-etm.h
@@@ -2314,7 -2291,6 +2314,7 @@@ F:      Documentation/devicetree/bindings/gp
   F:    arch/arm/boot/dts/mstar-*
   F:    arch/arm/mach-mstar/
   F:    drivers/clk/mstar/
+ +F:    drivers/clocksource/timer-msc313e.c
   F:    drivers/gpio/gpio-msc313.c
   F:    drivers/rtc/rtc-msc313.c
   F:    drivers/watchdog/msc313e_wdt.c
@@@ -2575,7 -2551,6 +2575,7 @@@ Q:      https://patchwork.kernel.org/project
   F:    Documentation/arm/samsung/
   F:    Documentation/devicetree/bindings/arm/samsung/
   F:    Documentation/devicetree/bindings/power/pd-samsung.yaml
+ +F:    Documentation/devicetree/bindings/soc/samsung/
   F:    arch/arm/boot/dts/exynos*
   F:    arch/arm/boot/dts/s3c*
   F:    arch/arm/boot/dts/s5p*
@@@ -2602,7 -2577,7 +2602,7 @@@ N:      s3c64x
   N:    s5pv210
   
   ARM/SAMSUNG S5P SERIES 2D GRAPHICS ACCELERATION (G2D) SUPPORT
- -M:    Andrzej Hajda <a.hajda@samsung.com>
+ +M:    Łukasz Stelmach <l.stelmach@samsung.com>
   L:    [email protected] (moderated for non-subscribers)
   L:    [email protected]
   S:    Maintained
@@@ -2626,8 -2601,7 +2626,8 @@@ S:      Maintaine
   F:    drivers/media/platform/s5p-jpeg/
   
   ARM/SAMSUNG S5P SERIES Multi Format Codec (MFC) SUPPORT
- -M:    Andrzej Hajda <[email protected]>
+ +M:    Marek Szyprowski <[email protected]>
+ +M:    Andrzej Hajda <[email protected]>
   L:    [email protected] (moderated for non-subscribers)
   L:    [email protected]
   S:    Maintained
@@@ -2818,15 -2792,12 +2818,15 @@@ L:   [email protected]
   S:    Supported
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/iwamatsu/linux-visconti.git
   F:    Documentation/devicetree/bindings/arm/toshiba.yaml
+ +F:    Documentation/devicetree/bindings/clock/toshiba,tmpv770x-pipllct.yaml
+ +F:    Documentation/devicetree/bindings/clock/toshiba,tmpv770x-pismu.yaml
   F:    Documentation/devicetree/bindings/net/toshiba,visconti-dwmac.yaml
   F:    Documentation/devicetree/bindings/gpio/toshiba,gpio-visconti.yaml
   F:    Documentation/devicetree/bindings/pci/toshiba,visconti-pcie.yaml
   F:    Documentation/devicetree/bindings/pinctrl/toshiba,visconti-pinctrl.yaml
   F:    Documentation/devicetree/bindings/watchdog/toshiba,visconti-wdt.yaml
   F:    arch/arm64/boot/dts/toshiba/
+ +F:    drivers/clk/visconti/
   F:    drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c
   F:    drivers/gpio/gpio-visconti.c
   F:    drivers/pci/controller/dwc/pcie-visconti.c
@@@ -3027,27 -2998,6 +3027,27 @@@ W:    http://acpi4asus.sf.ne
   F:    drivers/platform/x86/asus*.c
   F:    drivers/platform/x86/eeepc*.c
   
+ +ASUS TF103C DOCK DRIVER
+ +M:    Hans de Goede <[email protected]>
+ +L:    [email protected]
+ +S:    Maintained
+ +T:    git git://git.kernel.org/pub/scm/linux/kernel/git/pdx86/platform-drivers-x86.git
+ +F:    drivers/platform/x86/asus-tf103c-dock.c
+ +
+ +ASUS WMI HARDWARE MONITOR DRIVER
+ +M:    Ed Brindley <[email protected]>
+ +M:    Denis Pauk <[email protected]>
+ +L:    [email protected]
+ +S:    Maintained
+ +F:    drivers/hwmon/asus_wmi_sensors.c
+ +
+ +ASUS WMI EC HARDWARE MONITOR DRIVER
+ +M:    Eugene Shalygin <[email protected]>
+ +M:    Denis Pauk <[email protected]>
+ +L:    [email protected]
+ +S:    Maintained
+ +F:    drivers/hwmon/asus_wmi_ec_sensors.c
+ +
   ASUS WIRELESS RADIO CONTROL DRIVER
   M:    João Paulo Rechi Vita <[email protected]>
   L:    [email protected]
@@@ -3430,8 -3380,6 +3430,8 @@@ M:      Jens Axboe <[email protected]
   L:    [email protected]
   S:    Maintained
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git
+ +F:    Documentation/ABI/stable/sysfs-block
+ +F:    Documentation/block/
   F:    block/
   F:    drivers/block/
   F:    include/linux/blk*
@@@ -3621,7 -3569,7 +3621,7 @@@ R:      Florent Revest <[email protected]
   R:    Brendan Jackman <[email protected]>
   L:    [email protected]
   S:    Maintained
- -F:    Documentation/bpf/bpf_lsm.rst
+ +F:    Documentation/bpf/prog_lsm.rst
   F:    include/linux/bpf_lsm.h
   F:    kernel/bpf/bpf_lsm.c
   F:    security/bpf/
@@@ -3688,7 -3636,6 +3688,7 @@@ F:      drivers/net/ethernet/broadcom/bcm490
   F:    drivers/net/ethernet/broadcom/unimac.h
   
   BROADCOM BCM5301X ARM ARCHITECTURE
+ +M:    Florian Fainelli <[email protected]>
   M:    Hauke Mehrtens <[email protected]>
   M:    Rafał Miłecki <[email protected]>
   M:    [email protected]
@@@ -3700,7 -3647,6 +3700,7 @@@ F:      arch/arm/boot/dts/bcm953012
   F:    arch/arm/mach-bcm/bcm_5301x.c
   
   BROADCOM BCM53573 ARM ARCHITECTURE
+ +M:    Florian Fainelli <[email protected]>
   M:    Rafał Miłecki <[email protected]>
   L:    [email protected]
   L:    [email protected] (moderated for non-subscribers)
@@@ -3744,7 -3690,7 +3744,7 @@@ M:      Al Cooper <[email protected]
   L:    [email protected]
   L:    [email protected]
   S:    Maintained
- -F:    Documentation/devicetree/bindings/usb/brcm,bdc.txt
+ +F:    Documentation/devicetree/bindings/usb/brcm,bdc.yaml
   F:    drivers/usb/gadget/udc/bdc/
   
   BROADCOM BMIPS CPUFREQ DRIVER
@@@ -3827,7 -3773,7 +3827,7 @@@ M:      Doug Berger <[email protected]
   M:    Florian Fainelli <[email protected]>
   L:    [email protected]
   S:    Supported
- -F:    Documentation/devicetree/bindings/gpio/brcm,brcmstb-gpio.txt
+ +F:    Documentation/devicetree/bindings/gpio/brcm,brcmstb-gpio.yaml
   F:    drivers/gpio/gpio-brcmstb.c
   
   BROADCOM BRCMSTB I2C DRIVER
@@@ -3885,7 -3831,7 +3885,7 @@@ M:      Florian Fainelli <[email protected]
   L:    [email protected]
   L:    [email protected]
   S:    Supported
- -F:    Documentation/devicetree/bindings/net/brcm,bcmgenet.txt
+ +F:    Documentation/devicetree/bindings/net/brcm,bcmgenet.yaml
   F:    Documentation/devicetree/bindings/net/brcm,unimac-mdio.yaml
   F:    drivers/net/ethernet/broadcom/genet/
   F:    drivers/net/ethernet/broadcom/unimac.h
@@@ -3927,7 -3873,7 +3927,7 @@@ M:      Rafał Miłecki <[email protected]
   M:    [email protected]
   L:    [email protected]
   S:    Maintained
- -F:    Documentation/devicetree/bindings/net/brcm,amac.txt
+ +F:    Documentation/devicetree/bindings/net/brcm,amac.yaml
   F:    drivers/net/ethernet/broadcom/bgmac*
   F:    drivers/net/ethernet/broadcom/unimac.h
   
@@@ -4002,7 -3948,7 +4002,7 @@@ M:      Markus Mayer <[email protected]
   M:    [email protected]
   L:    [email protected]
   S:    Maintained
- -F:    Documentation/devicetree/bindings/thermal/brcm,avs-tmon.txt
+ +F:    Documentation/devicetree/bindings/thermal/brcm,avs-tmon.yaml
   F:    drivers/thermal/broadcom/brcmstb*
   
   BROADCOM STB DPFE DRIVER
@@@ -4038,7 -3984,6 +4038,7 @@@ L:      [email protected]
   S:    Supported
   F:    drivers/net/ethernet/broadcom/bcmsysport.*
   F:    drivers/net/ethernet/broadcom/unimac.h
+ +F:    Documentation/devicetree/bindings/net/brcm,systemport.yaml
   
   BROADCOM TG3 GIGABIT ETHERNET DRIVER
   M:    Siva Reddy Kallam <[email protected]>
@@@ -4578,12 -4523,9 +4578,12 @@@ F:    drivers/media/cec/i2c/ch7322.
   CIRRUS LOGIC AUDIO CODEC DRIVERS
   M:    James Schulman <[email protected]>
   M:    David Rhodes <[email protected]>
+ +M:    Lucas Tanure <[email protected]>
   L:    [email protected] (moderated for non-subscribers)
   L:    [email protected]
   S:    Maintained
+ +F:    Documentation/devicetree/bindings/sound/cirrus,cs*
+ +F:    sound/pci/hda/cs*
   F:    sound/soc/codecs/cs*
   
   CIRRUS LOGIC DSP FIRMWARE DRIVER
@@@ -4783,8 -4725,6 +4783,8 @@@ M:      Ian Abbott <[email protected]
   M:    H Hartley Sweeten <[email protected]>
   S:    Odd Fixes
   F:    drivers/comedi/
+ +F:    include/linux/comedi/
+ +F:    include/uapi/linux/comedi.h
   
   COMMON CLK FRAMEWORK
   M:    Michael Turquette <[email protected]>
@@@ -5483,12 -5423,6 +5483,12 @@@ W:    https://linuxtv.or
   T:    git git://linuxtv.org/media_tree.git
   F:    drivers/media/platform/sti/delta
   
+ +DELTA AHE-50DC FAN CONTROL MODULE DRIVER
+ +M:    Zev Weiss <[email protected]>
+ +L:    [email protected]
+ +S:    Maintained
+ +F:    drivers/hwmon/pmbus/delta-ahe50dc-fan.c
+ +
   DELTA DPS920AB PSU DRIVER
   M:    Robert Marko <[email protected]>
   L:    [email protected]
@@@ -6116,7 -6050,6 +6116,7 @@@ F:      drivers/gpu/drm/tiny/mi0283qt.
   DRM DRIVER FOR MSM ADRENO GPU
   M:    Rob Clark <[email protected]>
   M:    Sean Paul <[email protected]>
+ +R:    Abhinav Kumar <[email protected]>
   L:    [email protected]
   L:    [email protected]
   L:    [email protected]
@@@ -6142,17 -6075,10 +6142,17 @@@ F:   drivers/gpu/drm/panel/panel-novatek-
   
   DRM DRIVER FOR NVIDIA GEFORCE/QUADRO GPUS
   M:    Ben Skeggs <[email protected]>
+ +M:    Karol Herbst <[email protected]>
+ +M:    Lyude Paul <[email protected]>
   L:    [email protected]
   L:    [email protected]
   S:    Supported
- -T:    git git://github.com/skeggsb/linux
+ +W:    https://nouveau.freedesktop.org/
+ +Q:    https://patchwork.freedesktop.org/project/nouveau/
+ +Q:    https://gitlab.freedesktop.org/drm/nouveau/-/merge_requests
+ +B:    https://gitlab.freedesktop.org/drm/nouveau/-/issues
+ +C:    irc://irc.oftc.net/nouveau
+ +T:    git https://gitlab.freedesktop.org/drm/nouveau.git
   F:    drivers/gpu/drm/nouveau/
   F:    include/uapi/drm/nouveau_drm.h
   
@@@ -6385,7 -6311,7 +6385,7 @@@ F:      Documentation/devicetree/bindings/di
   F:    drivers/gpu/drm/atmel-hlcdc/
   
   DRM DRIVERS FOR BRIDGE CHIPS
- -M:    Andrzej Hajda <a.hajda@samsung.com>
+ +M:    Andrzej Hajda <andrzej.hajda@intel.com>
   M:    Neil Armstrong <[email protected]>
   M:    Robert Foss <[email protected]>
   R:    Laurent Pinchart <[email protected]>
@@@ -6492,7 -6418,6 +6492,7 @@@ L:      [email protected]
   L:    [email protected]
   S:    Supported
   T:    git git://linuxtv.org/pinchartl/media drm/du/next
+ +F:    Documentation/devicetree/bindings/display/bridge/renesas,dsi-csi2-tx.yaml
   F:    Documentation/devicetree/bindings/display/bridge/renesas,dw-hdmi.yaml
   F:    Documentation/devicetree/bindings/display/bridge/renesas,lvds.yaml
   F:    Documentation/devicetree/bindings/display/renesas,du.yaml
@@@ -6611,14 -6536,6 +6611,14 @@@ F:    drivers/gpu/drm/drm_panel.
   F:    drivers/gpu/drm/panel/
   F:    include/drm/drm_panel.h
   
+ +DRM PRIVACY-SCREEN CLASS
+ +M:    Hans de Goede <[email protected]>
+ +L:    [email protected]
+ +S:    Maintained
+ +T:    git git://anongit.freedesktop.org/drm/drm-misc
+ +F:    drivers/gpu/drm/drm_privacy_screen*
+ +F:    include/drm/drm_privacy_screen*
+ +
   DRM TTM SUBSYSTEM
   M:    Christian Koenig <[email protected]>
   M:    Huang Rui <[email protected]>
@@@ -7096,7 -7013,9 +7096,7 @@@ S:      Maintaine
   F:    drivers/mmc/host/cqhci*
   
   EMULEX 10Gbps iSCSI - OneConnect DRIVER
- -M:    Subbu Seetharaman <[email protected]>
   M:    Ketan Mukadam <[email protected]>
- -M:    Jitendra Bhivare <[email protected]>
   L:    [email protected]
   S:    Supported
   W:    http://www.broadcom.com
@@@ -7507,6 -7426,12 +7507,6 @@@ F:     Documentation/firmware_class
   F:    drivers/base/firmware_loader/
   F:    include/linux/firmware.h
   
- -FLASH ADAPTER DRIVER (IBM Flash Adapter 900GB Full Height PCI Flash Card)
- -M:    Joshua Morris <[email protected]>
- -M:    Philip Kelleher <[email protected]>
- -S:    Maintained
- -F:    drivers/block/rsxx/
- -
   FLEXTIMER FTM-QUADDEC DRIVER
   M:    Patrick Havelange <[email protected]>
   L:    [email protected]
@@@ -7598,7 -7523,6 +7598,7 @@@ F:      include/video
   FREESCALE CAAM (Cryptographic Acceleration and Assurance Module) DRIVER
   M:    Horia Geantă <[email protected]>
   M:    Pankaj Gupta <[email protected]>
+ +M:    Gaurav Jain <[email protected]>
   L:    [email protected]
   S:    Maintained
   F:    Documentation/devicetree/bindings/crypto/fsl-sec4.txt
@@@ -8563,12 -8487,6 +8563,12 @@@ F:    drivers/hid
   F:    include/linux/hid*
   F:    include/uapi/linux/hid*
   
+ +HID LOGITECH DRIVERS
+ +R:    Filipe Laíns <[email protected]>
+ +L:    [email protected]
+ +S:    Maintained
+ +F:    drivers/hid/hid-logitech-*
+ +
   HID PLAYSTATION DRIVER
   M:    Roderick Colenbrander <[email protected]>
   L:    [email protected]
@@@ -8690,10 -8608,8 +8690,10 @@@ F:    drivers/misc/hisi_hikey_usb.
   
   HISILICON PMU DRIVER
   M:    Shaokun Zhang <[email protected]>
+ +M:    Qi Liu <[email protected]>
   S:    Supported
   W:    http://www.hisilicon.com
+ +F:    Documentation/admin-guide/perf/hisi-pcie-pmu.rst
   F:    Documentation/admin-guide/perf/hisi-pmu.rst
   F:    drivers/perf/hisilicon
   
@@@ -8724,7 -8640,6 +8724,7 @@@ F:      drivers/scsi/hisi_sas
   
   HISILICON SECURITY ENGINE V2 DRIVER (SEC2)
   M:    Zaibo Xu <[email protected]>
+ +M:    Kai Ye <[email protected]>
   L:    [email protected]
   S:    Maintained
   F:    Documentation/ABI/testing/debugfs-hisi-sec
@@@ -9575,7 -9490,6 +9575,7 @@@ INTEL DRM DRIVERS (excluding Poulsbo, M
   M:    Jani Nikula <[email protected]>
   M:    Joonas Lahtinen <[email protected]>
   M:    Rodrigo Vivi <[email protected]>
+ +M:    Tvrtko Ursulin <[email protected]>
   L:    [email protected]
   S:    Supported
   W:    https://01.org/linuxgraphics/
@@@ -9770,6 -9684,7 +9770,6 @@@ F:      Documentation/devicetree/bindings/cr
   F:    drivers/crypto/keembay/Kconfig
   F:    drivers/crypto/keembay/Makefile
   F:    drivers/crypto/keembay/keembay-ocs-ecc.c
- -F:    drivers/crypto/keembay/ocs-ecc-curve-defs.h
   
   INTEL KEEM BAY OCS HCU CRYPTO DRIVER
   M:    Daniele Alessandrelli <[email protected]>
@@@ -9782,13 -9697,6 +9782,13 @@@ F:    drivers/crypto/keembay/keembay-ocs-h
   F:    drivers/crypto/keembay/ocs-hcu.c
   F:    drivers/crypto/keembay/ocs-hcu.h
   
+ +INTEL THUNDER BAY EMMC PHY DRIVER
+ +M:    Nandhini Srikandan <[email protected]>
+ +M:    Rashmi A <[email protected]>
+ +S:    Maintained
+ +F:    Documentation/devicetree/bindings/phy/intel,phy-thunderbay-emmc.yaml
+ +F:    drivers/phy/intel/phy-intel-thunderbay-emmc.c
+ +
   INTEL MANAGEMENT ENGINE (mei)
   M:    Tomas Winkler <[email protected]>
   L:    [email protected]
@@@ -9844,9 -9752,10 +9844,9 @@@ S:     Maintaine
   F:    drivers/mfd/intel_soc_pmic*
   F:    include/linux/mfd/intel_soc_pmic*
   
- -INTEL PMT DRIVER
- -M:    "David E. Box" <[email protected]>
- -S:    Maintained
- -F:    drivers/mfd/intel_pmt.c
+ +INTEL PMT DRIVERS
+ +M:    David E. Box <[email protected]>
+ +S:    Supported
   F:    drivers/platform/x86/intel/pmt/
   
   INTEL PRO/WIRELESS 2100, 2200BG, 2915ABG NETWORK CONNECTION SUPPORT
@@@ -9913,11 -9822,6 +9913,11 @@@ L:    [email protected]
   S:    Maintained
   F:    drivers/platform/x86/intel/uncore-frequency.c
   
+ +INTEL VENDOR SPECIFIC EXTENDED CAPABILITIES DRIVER
+ +M:    David E. Box <[email protected]>
+ +S:    Supported
+ +F:    drivers/platform/x86/intel/vsec.*
+ +
   INTEL VIRTUAL BUTTON DRIVER
   M:    AceLan Kao <[email protected]>
   L:    [email protected]
@@@ -10842,13 -10746,6 +10842,13 @@@ S: Maintaine
   W:    http://legousb.sourceforge.net/
   F:    drivers/usb/misc/legousbtower.c
   
+ +LETSKETCH HID TABLET DRIVER
+ +M:    Hans de Goede <[email protected]>
+ +L:    [email protected]
+ +S:    Maintained
+ +T:    git git://git.kernel.org/pub/scm/linux/kernel/git/hid/hid.git
+ +F:    drivers/hid/hid-letsketch.c
+ +
   LG LAPTOP EXTRAS
   M:    Matan Ziv-Av <[email protected]>
   L:    [email protected]
@@@ -11637,12 -11534,6 +11637,12 @@@ S: Maintaine
   F:    Documentation/devicetree/bindings/media/i2c/maxim,max9286.yaml
   F:    drivers/media/i2c/max9286.c
   
+ +MAX96712 QUAD GMSL2 DESERIALIZER DRIVER
+ +M:    Niklas Söderlund <[email protected]>
+ +L:    [email protected]
+ +S:    Maintained
+ +F:    drivers/staging/media/max96712/max96712.c
+ +
   MAX9860 MONO AUDIO VOICE CODEC DRIVER
   M:    Peter Rosin <[email protected]>
   L:    [email protected] (moderated for non-subscribers)
@@@ -11678,13 -11569,6 +11678,13 @@@ S: Maintaine
   F:    Documentation/devicetree/bindings/power/supply/maxim,max17042.yaml
   F:    drivers/power/supply/max17042_battery.c
   
+ +MAXIM MAX20086 CAMERA POWER PROTECTOR DRIVER
+ +M:    Laurent Pinchart <[email protected]>
+ +L:    [email protected]
+ +S:    Maintained
+ +F:    Documentation/devicetree/bindings/regulator/maxim,max20086.yaml
+ +F:    drivers/regulator/max20086-regulator.c
+ +
   MAXIM MAX77650 PMIC MFD DRIVER
   M:    Bartosz Golaszewski <[email protected]>
   L:    [email protected]
@@@ -11707,12 -11591,6 +11707,12 @@@ F: Documentation/devicetree/bindings/*/
   F:    drivers/regulator/max77802-regulator.c
   F:    include/dt-bindings/*/*max77802.h
   
+ +MAXIM MAX77976 BATTERY CHARGER
+ +M:    Luca Ceresoli <[email protected]>
+ +S:    Supported
+ +F:    Documentation/devicetree/bindings/power/supply/maxim,max77976.yaml
+ +F:    drivers/power/supply/max77976_charger.c
+ +
   MAXIM MUIC CHARGER DRIVERS FOR EXYNOS BASED BOARDS
   M:    Krzysztof Kozlowski <[email protected]>
   M:    Bartlomiej Zolnierkiewicz <[email protected]>
@@@ -11727,7 -11605,7 +11727,7 @@@ M:   Krzysztof Kozlowski <krzysztof.kozlo
   M:    Bartlomiej Zolnierkiewicz <[email protected]>
   L:    [email protected]
   S:    Supported
- -F:    Documentation/devicetree/bindings/*/max77686.txt
+ +F:    Documentation/devicetree/bindings/*/maxim,max77686.yaml
   F:    Documentation/devicetree/bindings/clock/maxim,max77686.txt
   F:    Documentation/devicetree/bindings/mfd/max14577.txt
   F:    Documentation/devicetree/bindings/mfd/max77693.txt
@@@ -12646,13 -12524,6 +12646,13 @@@ L: [email protected]
   S:    Maintained
   F:    drivers/net/ethernet/microchip/lan743x_*
   
+ +MICROCHIP LAN966X ETHERNET DRIVER
+ +M:    Horatiu Vultur <[email protected]>
+ +M:    [email protected]
+ +L:    [email protected]
+ +S:    Maintained
+ +F:    drivers/net/ethernet/microchip/lan966x/*
+ +
   MICROCHIP LCDFB DRIVER
   M:    Nicolas Ferre <[email protected]>
   L:    [email protected]
@@@ -13836,24 -13707,12 +13836,24 @@@ F:        Documentation/devicetree/bindings/di
   F:    drivers/gpu/drm/imx/dcss/
   
   NXP i.MX 8QXP ADC DRIVER
- -M:    Cai Huoqing <[email protected]>
+ +M:    Cai Huoqing <[email protected]>
+ +M:    Haibo Chen <[email protected]>
+ +L:    [email protected]
   L:    [email protected]
- -S:    Supported
+ +S:    Maintained
   F:    Documentation/devicetree/bindings/iio/adc/nxp,imx8qxp-adc.yaml
   F:    drivers/iio/adc/imx8qxp-adc.c
   
+ +NXP i.MX 7D/6SX/6UL AND VF610 ADC DRIVER
+ +M:    Haibo Chen <[email protected]>
+ +L:    [email protected]
+ +L:    [email protected]
+ +S:    Maintained
+ +F:    Documentation/devicetree/bindings/iio/adc/fsl,imx7d-adc.yaml
+ +F:    Documentation/devicetree/bindings/iio/adc/fsl,vf610-adc.yaml
+ +F:    drivers/iio/adc/imx7d_adc.c
+ +F:    drivers/iio/adc/vf610_adc.c
+ +
   NXP PF8100/PF8121A/PF8200 PMIC REGULATOR DEVICE DRIVER
   M:    Jagan Teki <[email protected]>
   S:    Maintained
@@@ -13927,13 -13786,6 +13927,13 @@@ S: Maintaine
   F:    Documentation/hwmon/nzxt-kraken2.rst
   F:    drivers/hwmon/nzxt-kraken2.c
   
+ +NZXT-SMART2 HARDWARE MONITORING DRIVER
+ +M:    Aleksandr Mezin <[email protected]>
+ +L:    [email protected]
+ +S:    Maintained
+ +F:    Documentation/hwmon/nzxt-smart2.rst
+ +F:    drivers/hwmon/nzxt-smart2.c
+ +
   OBJAGG
   M:    Jiri Pirko <[email protected]>
   L:    [email protected]
@@@ -14246,6 -14098,7 +14246,6 @@@ F:   drivers/media/i2c/ov5647.
   
   OMNIVISION OV5670 SENSOR DRIVER
   M:    Chiranjeevi Rapolu <[email protected]>
- -M:    Hyungwoo Yang <[email protected]>
   L:    [email protected]
   S:    Maintained
   T:    git git://linuxtv.org/media_tree.git
@@@ -14258,13 -14111,6 +14258,13 @@@ S: Maintaine
   T:    git git://linuxtv.org/media_tree.git
   F:    drivers/media/i2c/ov5675.c
   
+ +OMNIVISION OV5693 SENSOR DRIVER
+ +M:    Daniel Scally <[email protected]>
+ +L:    [email protected]
+ +S:    Maintained
+ +T:    git git://linuxtv.org/media_tree.git
+ +F:    drivers/media/i2c/ov5693.c
+ +
   OMNIVISION OV5695 SENSOR DRIVER
   M:    Shunqian Zheng <[email protected]>
   L:    [email protected]
@@@ -14541,6 -14387,15 +14541,15 @@@ F: include/net/page_pool.
   F:    include/trace/events/page_pool.h
   F:    net/core/page_pool.c
   
+ PAGE TABLE CHECK
+ M:    Pasha Tatashin <[email protected]>
+ M:    Andrew Morton <[email protected]>
+ L:    [email protected]
+ S:    Maintained
+ F:    Documentation/vm/page_table_check.rst
+ F:    include/linux/page_table_check.h
+ F:    mm/page_table_check.c
+ 
   PANASONIC LAPTOP ACPI EXTRAS DRIVER
   M:    Kenneth Chan <[email protected]>
   L:    [email protected]
@@@ -15051,7 -14906,7 +15060,7 @@@ F:   drivers/pci/controller/dwc/*spear
   PCMCIA SUBSYSTEM
   M:    Dominik Brodowski <[email protected]>
   S:    Odd Fixes
- -T:    git git://git.kernel.org/pub/scm/linux/kernel/git/brodo/pcmcia.git
+ +T:    git git://git.kernel.org/pub/scm/linux/kernel/git/brodo/linux.git
   F:    Documentation/pcmcia/
   F:    drivers/pcmcia/
   F:    include/pcmcia/
@@@ -15282,11 -15137,6 +15291,11 @@@ L: [email protected]
   S:    Maintained
   F:    drivers/pinctrl/pinctrl-single.c
   
+ +PIN CONTROLLER - THUNDERBAY
+ +M:    Lakshmi Sowjanya D <[email protected]>
+ +S:    Supported
+ +F:    drivers/pinctrl/pinctrl-thunderbay.c
+ +
   PKTCDVD DRIVER
   M:    [email protected]
   S:    Orphan
@@@ -15499,7 -15349,6 +15508,7 @@@ M:   Sergey Senozhatsky <senozhatsky@chro
   R:    Steven Rostedt <[email protected]>
   R:    John Ogness <[email protected]>
   S:    Maintained
+ +T:    git git://git.kernel.org/pub/scm/linux/kernel/git/printk/linux.git
   F:    include/linux/printk.h
   F:    kernel/printk/
   
@@@ -15887,14 -15736,6 +15896,14 @@@ W: https://wireless.wiki.kernel.org/en/
   F:    Documentation/devicetree/bindings/net/wireless/qca,ath9k.yaml
   F:    drivers/net/wireless/ath/ath9k/
   
+ +QUALCOMM BAM-DMUX WWAN NETWORK DRIVER
+ +M:    Stephan Gerhold <[email protected]>
+ +L:    [email protected]
+ +L:    [email protected]
+ +S:    Maintained
+ +F:    Documentation/devicetree/bindings/net/qcom,bam-dmux.yaml
+ +F:    drivers/net/wwan/qcom_bam_dmux.c
+ +
   QUALCOMM CAMERA SUBSYSTEM DRIVER
   M:    Robert Foss <[email protected]>
   M:    Todor Tomov <[email protected]>
@@@ -15904,15 -15745,6 +15913,15 @@@ F: Documentation/admin-guide/media/qcom
   F:    Documentation/devicetree/bindings/media/*camss*
   F:    drivers/media/platform/qcom/camss/
   
+ +QUALCOMM CLOCK DRIVERS
+ +M:    Bjorn Andersson <[email protected]>
+ +L:    [email protected]
+ +S:    Supported
+ +T:    git git://git.kernel.org/pub/scm/linux/kernel/git/qcom/linux.git
+ +F:    Documentation/devicetree/bindings/clock/qcom,*
+ +F:    drivers/clk/qcom/
+ +F:    include/dt-bindings/clock/qcom,*
+ +
   QUALCOMM CORE POWER REDUCTION (CPR) AVS DRIVER
   M:    Niklas Cassel <[email protected]>
   L:    [email protected]
@@@ -16166,7 -15998,6 +16175,7 @@@ F:   arch/mips/generic/board-ranchu.
   RANDOM NUMBER DRIVER
   M:    "Theodore Ts'o" <[email protected]>
   M:    Jason A. Donenfeld <[email protected]>
+ +T:    git https://git.kernel.org/pub/scm/linux/kernel/git/crng/random.git
   S:    Maintained
   F:    drivers/char/random.c
   
@@@ -16474,14 -16305,6 +16483,14 @@@ S: Supporte
   F:    Documentation/devicetree/bindings/iio/adc/renesas,rzg2l-adc.yaml
   F:    drivers/iio/adc/rzg2l_adc.c
   
+ +RENESAS R-CAR GEN3 & RZ/N1 NAND CONTROLLER DRIVER
+ +M:    Miquel Raynal <[email protected]>
+ +L:    [email protected]
+ +L:    [email protected]
+ +S:    Maintained
+ +F:    Documentation/devicetree/bindings/mtd/renesas-nandc.yaml
+ +F:    drivers/mtd/nand/raw/renesas-nand-controller.c
+ +
   RESET CONTROLLER FRAMEWORK
   M:    Philipp Zabel <[email protected]>
   S:    Maintained
@@@ -16652,19 -16475,27 +16661,19 @@@ ROHM POWER MANAGEMENT IC DEVICE DRIVER
   R:    Matti Vaittinen <[email protected]>
   L:    [email protected]
   S:    Supported
- -F:    Documentation/devicetree/bindings/mfd/rohm,bd70528-pmic.txt
- -F:    Documentation/devicetree/bindings/regulator/rohm,bd70528-regulator.txt
   F:    drivers/clk/clk-bd718x7.c
- -F:    drivers/gpio/gpio-bd70528.c
   F:    drivers/gpio/gpio-bd71815.c
   F:    drivers/gpio/gpio-bd71828.c
- -F:    drivers/mfd/rohm-bd70528.c
   F:    drivers/mfd/rohm-bd71828.c
   F:    drivers/mfd/rohm-bd718x7.c
   F:    drivers/mfd/rohm-bd9576.c
- -F:    drivers/power/supply/bd70528-charger.c
- -F:    drivers/regulator/bd70528-regulator.c
   F:    drivers/regulator/bd71815-regulator.c
   F:    drivers/regulator/bd71828-regulator.c
   F:    drivers/regulator/bd718x7-regulator.c
   F:    drivers/regulator/bd9576-regulator.c
   F:    drivers/regulator/rohm-regulator.c
   F:    drivers/rtc/rtc-bd70528.c
- -F:    drivers/watchdog/bd70528_wdt.c
   F:    drivers/watchdog/bd9576_wdt.c
- -F:    include/linux/mfd/rohm-bd70528.h
   F:    include/linux/mfd/rohm-bd71815.h
   F:    include/linux/mfd/rohm-bd71828.h
   F:    include/linux/mfd/rohm-bd718x7.h
@@@ -17015,15 -16846,13 +17024,15 @@@ F:        Documentation/devicetree/bindings/ne
   F:    drivers/nfc/s3fwrn5
   
   SAMSUNG S5C73M3 CAMERA DRIVER
- -M:    Andrzej Hajda <[email protected]>
+ +M:    Sylwester Nawrocki <[email protected]>
+ +M:    Andrzej Hajda <[email protected]>
   L:    [email protected]
   S:    Supported
   F:    drivers/media/i2c/s5c73m3/*
   
   SAMSUNG S5K5BAF CAMERA DRIVER
- -M:    Andrzej Hajda <[email protected]>
+ +M:    Sylwester Nawrocki <[email protected]>
+ +M:    Andrzej Hajda <[email protected]>
   L:    [email protected]
   S:    Supported
   F:    drivers/media/i2c/s5k5baf.c
@@@ -17052,8 -16881,10 +17061,8 @@@ M:  Chanwoo Choi <[email protected]
   L:    [email protected]
   S:    Supported
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/snawrocki/clk.git
- -F:    Documentation/devicetree/bindings/clock/exynos*.txt
   F:    Documentation/devicetree/bindings/clock/samsung,*.yaml
   F:    Documentation/devicetree/bindings/clock/samsung,s3c*
- -F:    Documentation/devicetree/bindings/clock/samsung,s5p*
   F:    drivers/clk/samsung/
   F:    include/dt-bindings/clock/exynos*.h
   F:    include/dt-bindings/clock/s3c*.h
@@@ -17300,13 -17131,6 +17309,13 @@@ L: [email protected]
   S:    Maintained
   F:    drivers/mmc/host/sdhci-omap.c
   
+ +SECURE DIGITAL HOST CONTROLLER INTERFACE (SDHCI) NXP i.MX DRIVER
+ +M:    Haibo Chen <[email protected]>
+ +L:    [email protected]
+ +L:    [email protected]
+ +S:    Maintained
+ +F:    drivers/mmc/host/sdhci-esdhc-imx.c
+ +
   SECURE ENCRYPTING DEVICE (SED) OPAL DRIVER
   M:    Jonathan Derrick <[email protected]>
   M:    Revanth Rajashekar <[email protected]>
@@@ -17852,17 -17676,12 +17861,17 @@@ F:        drivers/firmware/arm_sdei.
   F:    include/linux/arm_sdei.h
   F:    include/uapi/linux/arm_sdei.h
   
- -SOFTWARE NODES
+ +SOFTWARE NODES AND DEVICE PROPERTIES
   R:    Andy Shevchenko <[email protected]>
+ +R:    Daniel Scally <[email protected]>
   R:    Heikki Krogerus <[email protected]>
+ +R:    Sakari Ailus <[email protected]>
   L:    [email protected]
   S:    Maintained
+ +F:    drivers/base/property.c
   F:    drivers/base/swnode.c
+ +F:    include/linux/fwnode.h
+ +F:    include/linux/property.h
   
   SOFTWARE RAID (Multiple Disks) SUPPORT
   M:    Song Liu <[email protected]>
@@@ -18022,7 -17841,6 +18031,7 @@@ F:   Documentation/sound
   F:    include/sound/
   F:    include/uapi/sound/
   F:    sound/
+ +F:    tools/testing/selftests/alsa
   
   SOUND - COMPRESSED AUDIO
   M:    Vinod Koul <[email protected]>
@@@ -18042,13 -17860,6 +18051,13 @@@ F: include/sound/dmaengine_pcm.
   F:    sound/core/pcm_dmaengine.c
   F:    sound/soc/soc-generic-dmaengine-pcm.c
   
+ +SOUND - ALSA SELFTESTS
+ +M:    Mark Brown <[email protected]>
+ +L:    [email protected] (moderated for non-subscribers)
+ +L:    [email protected]
+ +S:    Supported
+ +F:    tools/testing/selftests/alsa
+ +
   SOUND - SOC LAYER / DYNAMIC AUDIO POWER MANAGEMENT (ASoC)
   M:    Liam Girdwood <[email protected]>
   M:    Mark Brown <[email protected]>
@@@ -18155,8 -17966,8 +18164,8 @@@ F:   drivers/pinctrl/spear
   
   SPI NOR SUBSYSTEM
   M:    Tudor Ambarus <[email protected]>
+ +M:    Pratyush Yadav <[email protected]>
   R:    Michael Walle <[email protected]>
- -R:    Pratyush Yadav <[email protected]>
   L:    [email protected]
   S:    Maintained
   W:    http://www.linux-mtd.infradead.org/
@@@ -18355,28 -18166,6 +18364,28 @@@ M: Ion Badulescu <[email protected]
   S:    Odd Fixes
   F:    drivers/net/ethernet/adaptec/starfire*
   
+ +STARFIVE JH7100 CLOCK DRIVER
+ +M:    Emil Renner Berthing <[email protected]>
+ +S:    Maintained
+ +F:    Documentation/devicetree/bindings/clock/starfive,jh7100-clkgen.yaml
+ +F:    drivers/clk/starfive/clk-starfive-jh7100.c
+ +F:    include/dt-bindings/clock/starfive-jh7100.h
+ +
+ +STARFIVE JH7100 PINCTRL DRIVER
+ +M:    Emil Renner Berthing <[email protected]>
+ +L:    [email protected]
+ +S:    Maintained
+ +F:    Documentation/devicetree/bindings/pinctrl/starfive,jh7100-pinctrl.yaml
+ +F:    drivers/pinctrl/pinctrl-starfive.c
+ +F:    include/dt-bindings/pinctrl/pinctrl-starfive.h
+ +
+ +STARFIVE JH7100 RESET CONTROLLER DRIVER
+ +M:    Emil Renner Berthing <[email protected]>
+ +S:    Maintained
+ +F:    Documentation/devicetree/bindings/reset/starfive,jh7100-reset.yaml
+ +F:    drivers/reset/reset-starfive-jh7100.c
+ +F:    include/dt-bindings/reset/starfive-jh7100.h
+ +
   STATIC BRANCH/CALL
   M:    Peter Zijlstra <[email protected]>
   M:    Josh Poimboeuf <[email protected]>
@@@ -18538,7 -18327,6 +18547,7 @@@ M:   Vineet Gupta <[email protected]
   L:    [email protected]
   S:    Supported
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/vgupta/arc.git
+ +F:    Documentation/arc/
   F:    Documentation/devicetree/bindings/arc/*
   F:    Documentation/devicetree/bindings/interrupt-controller/snps,arc*
   F:    arch/arc/
@@@ -19556,6 -19344,12 +19565,6 @@@ W:  https://github.com/srcres258/linux-d
   T:    git git://github.com/srcres258/linux-doc.git doc-zh-tw
   F:    Documentation/translations/zh_TW/
   
- -TRIVIAL PATCHES
- -M:    Jiri Kosina <[email protected]>
- -S:    Maintained
- -T:    git git://git.kernel.org/pub/scm/linux/kernel/git/jikos/trivial.git
- -K:    ^Subject:.*(?i)trivial
- -
   TTY LAYER
   M:    Greg Kroah-Hartman <[email protected]>
   M:    Jiri Slaby <[email protected]>
@@@ -19660,7 -19454,6 +19669,7 @@@ S:   Supporte
   W:    http://www.linux-mtd.infradead.org/doc/ubifs.html
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/rw/ubifs.git next
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/rw/ubifs.git fixes
+ +F:    Documentation/ABI/testing/sysfs-fs-ubifs
   F:    Documentation/filesystems/ubifs-authentication.rst
   F:    Documentation/filesystems/ubifs.rst
   F:    fs/ubifs/
@@@ -20412,8 -20205,6 +20421,8 @@@ F:   include/uapi/linux/virtio_gpio.
   VIRTIO GPU DRIVER
   M:    David Airlie <[email protected]>
   M:    Gerd Hoffmann <[email protected]>
+ +R:    Gurchetan Singh <[email protected]>
+ +R:    Chia-I Wu <[email protected]>
   L:    [email protected]
   L:    [email protected]
   S:    Maintained
@@@ -20647,7 -20438,7 +20656,7 @@@ M:   Sergey Senozhatsky <senozhatsky@chro
   R:    Andy Shevchenko <[email protected]>
   R:    Rasmus Villemoes <[email protected]>
   S:    Maintained
- -T:    git git://git.kernel.org/pub/scm/linux/kernel/git/pmladek/printk.git
+ +T:    git git://git.kernel.org/pub/scm/linux/kernel/git/printk/linux.git
   F:    Documentation/core-api/printk-formats.rst
   F:    lib/test_printf.c
   F:    lib/test_scanf.c
@@@ -20915,13 -20706,6 +20924,13 @@@ S: Maintaine
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86/mm
   F:    arch/x86/mm/
   
+ +X86 PLATFORM ANDROID TABLETS DSDT FIXUP DRIVER
+ +M:    Hans de Goede <[email protected]>
+ +L:    [email protected]
+ +S:    Maintained
+ +T:    git git://git.kernel.org/pub/scm/linux/kernel/git/pdx86/platform-drivers-x86.git
+ +F:    drivers/platform/x86/x86-android-tablets.c
+ +
   X86 PLATFORM DRIVERS
   M:    Hans de Goede <[email protected]>
   M:    Mark Gross <[email protected]>
@@@ -21085,14 -20869,6 +21094,14 @@@ F: drivers/scsi/xen-scsifront.
   F:    drivers/xen/xen-scsiback.c
   F:    include/xen/interface/io/vscsiif.h
   
+ +XEN PVUSB DRIVER
+ +M:    Juergen Gross <[email protected]>
+ +L:    [email protected] (moderated for non-subscribers)
+ +L:    [email protected]
+ +S:    Supported
+ +F:    drivers/usb/host/xen*
+ +F:    include/xen/interface/io/usbif.h
+ +
   XEN SOUND FRONTEND DRIVER
   M:    Oleksandr Andrushchenko <[email protected]>
   L:    [email protected] (moderated for non-subscribers)
@@@ -21125,13 -20901,6 +21134,13 @@@ F: fs/xfs
   F:    include/uapi/linux/dqblk_xfs.h
   F:    include/uapi/linux/fsmap.h
   
+ +XILINX AMS DRIVER
+ +M:    Anand Ashok Dumbre <[email protected]>
+ +L:    [email protected]
+ +S:    Maintained
+ +F:    Documentation/devicetree/bindings/iio/adc/xlnx,zynqmp-ams.yaml
+ +F:    drivers/iio/adc/xilinx-ams.c
+ +
   XILINX AXI ETHERNET DRIVER
   M:    Radhey Shyam Pandey <[email protected]>
   S:    Maintained
@@@ -21200,12 -20969,6 +21209,12 @@@ T: git https://github.com/Xilinx/linux-
   F:    Documentation/devicetree/bindings/phy/xlnx,zynqmp-psgtr.yaml
   F:    drivers/phy/xilinx/phy-zynqmp.c
   
+ +XILINX EVENT MANAGEMENT DRIVER
+ +M:    Abhyuday Godhasara <[email protected]>
+ +S:    Maintained
+ +F:    drivers/soc/xilinx/xlnx_event_manager.c
+ +F:    include/linux/firmware/xlnx-event-manager.h
+ +
   XILLYBUS DRIVER
   M:    Eli Billauer <[email protected]>
   L:    [email protected]
diff --combined arch/Kconfig

index 847fde3d22cdfd5ba38dd16b2541e5b1b6857770,4568b6b70b5db5bd8d79f6f631e333a39edc8b85..5a1692392a4de9ad9c4e2c5170ebb51c395f4456
--- 1/arch/Kconfig
--- 2/arch/Kconfig
+++ b/arch/Kconfig
@@@ -1297,6 -1297,9 +1297,9 @@@ config HAVE_ARCH_PFN_VALI
   config ARCH_SUPPORTS_DEBUG_PAGEALLOC
         bool
   
+ config ARCH_SUPPORTS_PAGE_TABLE_CHECK
+       bool
+ 
   config ARCH_SPLIT_ARG64
         bool
         help
@@@ -1312,10 -1315,6 +1315,10 @@@ config ARCH_HAS_PARANOID_L1D_FLUS
   config DYNAMIC_SIGFRAME
         bool
   
+ +# Select, if arch has a named attribute group bound to NUMA device nodes.
+ +config HAVE_ARCH_NODE_DEV_GROUP
+ +      bool
+ +
   source "kernel/gcov/Kconfig"
   
   source "scripts/gcc-plugins/Kconfig"
diff --combined arch/arm/mm/fault.c

index a1cebe363ed5067db04707814c8c8b237d73d3dd,c7326a521a6983a8daeaf047caf29a3c75e6914e..13949510772a861359e105a837f6c083e449269b
--- 1/arch/arm/mm/fault.c
--- 2/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@@ -17,7 -17,6 +17,7 @@@
   #include <linux/sched/debug.h>
   #include <linux/highmem.h>
   #include <linux/perf_event.h>
+ +#include <linux/kfence.h>
   
   #include <asm/system_misc.h>
   #include <asm/system_info.h>
@@@ -100,11 -99,6 +100,11 @@@ void show_pte(const char *lvl, struct m
   { }
   #endif                                        /* CONFIG_MMU */
   
+ +static inline bool is_write_fault(unsigned int fsr)
+ +{
+ +      return (fsr & FSR_WRITE) && !(fsr & FSR_CM);
+ +}
+ +
   static void die_kernel_fault(const char *msg, struct mm_struct *mm,
                              unsigned long addr, unsigned int fsr,
                              struct pt_regs *regs)
@@@ -137,14 -131,10 +137,14 @@@ __do_kernel_fault(struct mm_struct *mm
         /*
          * No handler, we'll have to terminate things with extreme prejudice.
          */
- -      if (addr < PAGE_SIZE)
+ +      if (addr < PAGE_SIZE) {
                 msg = "NULL pointer dereference";
- -      else
+ +      } else {
+ +              if (kfence_handle_page_fault(addr, is_write_fault(fsr), regs))
+ +                      return;
+ +
                 msg = "paging request";
+ +      }
   
         die_kernel_fault(msg, mm, addr, fsr, regs);
   }
@@@ -201,8 -191,8 +201,8 @@@ void do_bad_area(unsigned long addr, un
   }
   
   #ifdef CONFIG_MMU
- -#define VM_FAULT_BADMAP               0x010000
- -#define VM_FAULT_BADACCESS    0x020000
+ +#define VM_FAULT_BADMAP               ((__force vm_fault_t)0x010000)
+ +#define VM_FAULT_BADACCESS    ((__force vm_fault_t)0x020000)
   
   static inline bool is_permission_fault(unsigned int fsr)
   {
@@@ -271,7 -261,7 +271,7 @@@ do_page_fault(unsigned long addr, unsig
         if (user_mode(regs))
                 flags |= FAULT_FLAG_USER;
   
- -      if ((fsr & FSR_WRITE) && !(fsr & FSR_CM)) {
+ +      if (is_write_fault(fsr)) {
                 flags |= FAULT_FLAG_WRITE;
                 vm_flags = VM_WRITE;
         }
@@@ -322,7 -312,7 +322,7 @@@ retry
                 return 0;
         }
   
-       if (!(fault & VM_FAULT_ERROR) && flags & FAULT_FLAG_ALLOW_RETRY) {
+       if (!(fault & VM_FAULT_ERROR)) {
                 if (fault & VM_FAULT_RETRY) {
                         flags |= FAULT_FLAG_TRIED;
                         goto retry;
diff --combined arch/arm64/mm/fault.c

index 9a9e7675b18775f866ca1f9ad4bc1752a1769cee,a8fb54fccde05bbd18d42052703cf4b217402957..11e04cca0f4f4f6ffcefc1fc9b97e86d86cf70c6
--- 1/arch/arm64/mm/fault.c
--- 2/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@@ -297,8 -297,6 +297,8 @@@ static void die_kernel_fault(const cha
         pr_alert("Unable to handle kernel %s at virtual address %016lx\n", msg,
                  addr);
   
+ +      kasan_non_canonical_hook(addr);
+ +
         mem_abort_decode(esr);
   
         show_pte(addr);
@@@ -608,10 -606,8 +608,8 @@@ retry
         }
   
         if (fault & VM_FAULT_RETRY) {
-               if (mm_flags & FAULT_FLAG_ALLOW_RETRY) {
-                       mm_flags |= FAULT_FLAG_TRIED;
-                       goto retry;
-               }
+               mm_flags |= FAULT_FLAG_TRIED;
+               goto retry;
         }
         mmap_read_unlock(mm);
   
@@@ -815,8 -811,11 +813,8 @@@ void do_mem_abort(unsigned long far, un
         if (!inf->fn(far, esr, regs))
                 return;
   
- -      if (!user_mode(regs)) {
- -              pr_alert("Unhandled fault at 0x%016lx\n", addr);
- -              mem_abort_decode(esr);
- -              show_pte(addr);
- -      }
+ +      if (!user_mode(regs))
+ +              die_kernel_fault(inf->name, addr, esr, regs);
   
         /*
          * At this point we have an unrecognized fault type whose tag bits may
diff --combined arch/parisc/mm/fault.c

index 147868427b7cd14308a56a726294ed6fcaf22c95,360b627645cca6e2c837b76256d7a09f6e453004..e9eabf8f14d7e6f7ba2f98ac83558685909d84ee
--- 1/arch/parisc/mm/fault.c
--- 2/arch/parisc/mm/fault.c
+++ b/arch/parisc/mm/fault.c
@@@ -148,11 -148,11 +148,11 @@@ int fixup_exception(struct pt_regs *reg
                  * Fix up get_user() and put_user().
                  * ASM_EXCEPTIONTABLE_ENTRY_EFAULT() sets the least-significant
                  * bit in the relative address of the fixup routine to indicate
- -               * that %r8 should be loaded with -EFAULT to report a userspace
- -               * access error.
+ +               * that gr[ASM_EXCEPTIONTABLE_REG] should be loaded with
+ +               * -EFAULT to report a userspace access error.
                  */
                 if (fix->fixup & 1) {
- -                      regs->gr[8] = -EFAULT;
+ +                      regs->gr[ASM_EXCEPTIONTABLE_REG] = -EFAULT;
   
                         /* zero target register for get_user() */
                         if (parisc_acctyp(0, regs->iir) == VM_READ) {
@@@ -266,14 -266,14 +266,14 @@@ void do_page_fault(struct pt_regs *regs
         unsigned long acc_type;
         vm_fault_t fault = 0;
         unsigned int flags;
- -
- -      if (faulthandler_disabled())
- -              goto no_context;
+ +      char *msg;
   
         tsk = current;
         mm = tsk->mm;
- -      if (!mm)
+ +      if (!mm) {
+ +              msg = "Page fault: no context";
                 goto no_context;
+ +      }
   
         flags = FAULT_FLAG_DEFAULT;
         if (user_mode(regs))
@@@ -324,16 -324,14 +324,14 @@@ good_area
                         goto bad_area;
                 BUG();
         }
-       if (flags & FAULT_FLAG_ALLOW_RETRY) {
-               if (fault & VM_FAULT_RETRY) {
-                       /*
-                        * No need to mmap_read_unlock(mm) as we would
-                        * have already released it in __lock_page_or_retry
-                        * in mm/filemap.c.
-                        */
-                       flags |= FAULT_FLAG_TRIED;
-                       goto retry;
-               }
+       if (fault & VM_FAULT_RETRY) {
+               /*
+                * No need to mmap_read_unlock(mm) as we would
+                * have already released it in __lock_page_or_retry
+                * in mm/filemap.c.
+                */
+               flags |= FAULT_FLAG_TRIED;
+               goto retry;
         }
         mmap_read_unlock(mm);
         return;
@@@ -409,7 -407,6 +407,7 @@@ bad_area
                 force_sig_fault(signo, si_code, (void __user *) address);
                 return;
         }
+ +      msg = "Page fault: bad address";
   
   no_context:
   
@@@ -417,13 -414,11 +415,13 @@@
                 return;
         }
   
- -      parisc_terminate("Bad Address (null pointer deref?)", regs, code, address);
+ +      parisc_terminate(msg, regs, code, address);
   
- -  out_of_memory:
+ +out_of_memory:
         mmap_read_unlock(mm);
- -      if (!user_mode(regs))
+ +      if (!user_mode(regs)) {
+ +              msg = "Page fault: out of memory";
                 goto no_context;
+ +      }
         pagefault_out_of_memory();
   }
diff --combined arch/powerpc/mm/fault.c

index 2d4a411c7c85efb184ae8a6d3b2d3ab8fcba2817,ebcc61e47d62ba6a8bbb0834d6aed662ac14f2dc..eb8ecd7343a99c3ef9a136bc2d8403b0185f98bd
--- 1/arch/powerpc/mm/fault.c
--- 2/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@@ -35,7 -35,6 +35,7 @@@
   #include <linux/kfence.h>
   #include <linux/pkeys.h>
   
+ +#include <asm/asm-prototypes.h>
   #include <asm/firmware.h>
   #include <asm/interrupt.h>
   #include <asm/page.h>
@@@ -517,10 -516,8 +517,8 @@@ retry
          * case.
          */
         if (unlikely(fault & VM_FAULT_RETRY)) {
-               if (flags & FAULT_FLAG_ALLOW_RETRY) {
-                       flags |= FAULT_FLAG_TRIED;
-                       goto retry;
-               }
+               flags |= FAULT_FLAG_TRIED;
+               goto retry;
         }
   
         mmap_read_unlock(current->mm);
@@@ -621,27 -618,4 +619,27 @@@ DEFINE_INTERRUPT_HANDLER(do_bad_page_fa
   {
         bad_page_fault(regs, SIGSEGV);
   }
+ +
+ +/*
+ + * In radix, segment interrupts indicate the EA is not addressable by the
+ + * page table geometry, so they are always sent here.
+ + *
+ + * In hash, this is called if do_slb_fault returns error. Typically it is
+ + * because the EA was outside the region allowed by software.
+ + */
+ +DEFINE_INTERRUPT_HANDLER(do_bad_segment_interrupt)
+ +{
+ +      int err = regs->result;
+ +
+ +      if (err == -EFAULT) {
+ +              if (user_mode(regs))
+ +                      _exception(SIGSEGV, regs, SEGV_BNDERR, regs->dar);
+ +              else
+ +                      bad_page_fault(regs, SIGSEGV);
+ +      } else if (err == -EINVAL) {
+ +              unrecoverable_exception(regs);
+ +      } else {
+ +              BUG();
+ +      }
+ +}
   #endif
diff --combined arch/s390/mm/fault.c

index 6ed2886fc014b19846ed3d3b804054e0b839bd06,d7d6be283d9425779eec60c8561308de1252a720..ff16ce0d04ee019f73dfc4ebfd3d284727dd45f2
--- 1/arch/s390/mm/fault.c
--- 2/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@@ -115,7 -115,7 +115,7 @@@ static void dump_pagetable(unsigned lon
                 pr_cont("R1:%016lx ", *table);
                 if (*table & _REGION_ENTRY_INVALID)
                         goto out;
- -              table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+ +              table = __va(*table & _REGION_ENTRY_ORIGIN);
                 fallthrough;
         case _ASCE_TYPE_REGION2:
                 table += (address & _REGION2_INDEX) >> _REGION2_SHIFT;
@@@ -124,7 -124,7 +124,7 @@@
                 pr_cont("R2:%016lx ", *table);
                 if (*table & _REGION_ENTRY_INVALID)
                         goto out;
- -              table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+ +              table = __va(*table & _REGION_ENTRY_ORIGIN);
                 fallthrough;
         case _ASCE_TYPE_REGION3:
                 table += (address & _REGION3_INDEX) >> _REGION3_SHIFT;
@@@ -133,7 -133,7 +133,7 @@@
                 pr_cont("R3:%016lx ", *table);
                 if (*table & (_REGION_ENTRY_INVALID | _REGION3_ENTRY_LARGE))
                         goto out;
- -              table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+ +              table = __va(*table & _REGION_ENTRY_ORIGIN);
                 fallthrough;
         case _ASCE_TYPE_SEGMENT:
                 table += (address & _SEGMENT_INDEX) >> _SEGMENT_SHIFT;
@@@ -142,7 -142,7 +142,7 @@@
                 pr_cont("S:%016lx ", *table);
                 if (*table & (_SEGMENT_ENTRY_INVALID | _SEGMENT_ENTRY_LARGE))
                         goto out;
- -              table = (unsigned long *)(*table & _SEGMENT_ENTRY_ORIGIN);
+ +              table = __va(*table & _SEGMENT_ENTRY_ORIGIN);
         }
         table += (address & _PAGE_INDEX) >> _PAGE_SHIFT;
         if (bad_address(table))
@@@ -452,21 -452,21 +452,21 @@@ retry
         if (unlikely(fault & VM_FAULT_ERROR))
                 goto out_up;
   
-       if (flags & FAULT_FLAG_ALLOW_RETRY) {
-               if (fault & VM_FAULT_RETRY) {
-                       if (IS_ENABLED(CONFIG_PGSTE) && gmap &&
-                           (flags & FAULT_FLAG_RETRY_NOWAIT)) {
-                               /* FAULT_FLAG_RETRY_NOWAIT has been set,
-                                * mmap_lock has not been released */
-                               current->thread.gmap_pfault = 1;
-                               fault = VM_FAULT_PFAULT;
-                               goto out_up;
-                       }
-                       flags &= ~FAULT_FLAG_RETRY_NOWAIT;
-                       flags |= FAULT_FLAG_TRIED;
-                       mmap_read_lock(mm);
-                       goto retry;
+       if (fault & VM_FAULT_RETRY) {
+               if (IS_ENABLED(CONFIG_PGSTE) && gmap &&
+                       (flags & FAULT_FLAG_RETRY_NOWAIT)) {
+                       /*
+                        * FAULT_FLAG_RETRY_NOWAIT has been set, mmap_lock has
+                        * not been released
+                        */
+                       current->thread.gmap_pfault = 1;
+                       fault = VM_FAULT_PFAULT;
+                       goto out_up;
                 }
+               flags &= ~FAULT_FLAG_RETRY_NOWAIT;
+               flags |= FAULT_FLAG_TRIED;
+               mmap_read_lock(mm);
+               goto retry;
         }
         if (IS_ENABLED(CONFIG_PGSTE) && gmap) {
                 address =  __gmap_link(gmap, current->thread.gmap_addr,
diff --combined arch/um/kernel/trap.c

index 561a2b03c3cf9f0b77c0c1e19fd76db09f20fa76,193503484af57dbbf1143ce5de0b07ff7cb41c4b..d1d5d0be0308561d13e5d6a9a0379140a77d9490
--- 1/arch/um/kernel/trap.c
--- 2/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@@ -87,12 -87,10 +87,10 @@@ good_area
                         }
                         BUG();
                 }
-               if (flags & FAULT_FLAG_ALLOW_RETRY) {
-                       if (fault & VM_FAULT_RETRY) {
-                               flags |= FAULT_FLAG_TRIED;
+               if (fault & VM_FAULT_RETRY) {
+                       flags |= FAULT_FLAG_TRIED;
   
-                               goto retry;
-                       }
+                       goto retry;
                 }
   
                 pmd = pmd_off(mm, address);
@@@ -127,6 -125,7 +125,6 @@@ out_of_memory
         pagefault_out_of_memory();
         return 0;
   }
- -EXPORT_SYMBOL(handle_page_fault);
   
   static void show_segv_info(struct uml_pt_regs *regs)
   {
diff --combined arch/x86/Kconfig

index 976dd6b532bffa8157fbbb17b8109c036581617e,d0628415b93e4f83b6dc4c80e5a5c95521037674..407533c835fe586faca2411b53015bfa9b22042e
--- 1/arch/x86/Kconfig
--- 2/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@@ -104,6 -104,7 +104,7 @@@ config X8
         select ARCH_SUPPORTS_ACPI
         select ARCH_SUPPORTS_ATOMIC_RMW
         select ARCH_SUPPORTS_DEBUG_PAGEALLOC
+       select ARCH_SUPPORTS_PAGE_TABLE_CHECK   if X86_64
         select ARCH_SUPPORTS_NUMA_BALANCING     if X86_64
         select ARCH_SUPPORTS_KMAP_LOCAL_FORCE_MAP       if NR_CPUS <= 4096
         select ARCH_SUPPORTS_LTO_CLANG
@@@ -269,7 -270,6 +270,7 @@@
         select HAVE_ARCH_KCSAN                  if X86_64
         select X86_FEATURE_NAMES                if PROC_FS
         select PROC_PID_ARCH_STATUS             if PROC_FS
+ +      select HAVE_ARCH_NODE_DEV_GROUP         if X86_SGX
         imply IMA_SECURE_AND_OR_TRUSTED_BOOT    if EFI
   
   config INSTRUCTION_DECODER
@@@ -473,18 -473,6 +474,18 @@@ config RETPOLIN
           branches. Requires a compiler with -mindirect-branch=thunk-extern
           support for full protection. The kernel may run slower.
   
+ +config CC_HAS_SLS
+ +      def_bool $(cc-option,-mharden-sls=all)
+ +
+ +config SLS
+ +      bool "Mitigate Straight-Line-Speculation"
+ +      depends on CC_HAS_SLS && X86_64
+ +      default n
+ +      help
+ +        Compile the kernel with straight-line-speculation options to guard
+ +        against straight line speculation. The kernel image might be slightly
+ +        larger.
+ +
   config X86_CPU_RESCTRL
         bool "x86 CPU resource control support"
         depends on X86 && (CPU_SUP_INTEL || CPU_SUP_AMD)
@@@ -1536,20 -1524,16 +1537,20 @@@ config X86_CPA_STATISTIC
           helps to determine the effectiveness of preserving large and huge
           page mappings when mapping protections are changed.
   
+ +config X86_MEM_ENCRYPT
+ +      select ARCH_HAS_FORCE_DMA_UNENCRYPTED
+ +      select DYNAMIC_PHYSICAL_MASK
+ +      select ARCH_HAS_RESTRICTED_VIRTIO_MEMORY_ACCESS
+ +      def_bool n
+ +
   config AMD_MEM_ENCRYPT
         bool "AMD Secure Memory Encryption (SME) support"
         depends on X86_64 && CPU_SUP_AMD
         select DMA_COHERENT_POOL
- -      select DYNAMIC_PHYSICAL_MASK
         select ARCH_USE_MEMREMAP_PROT
- -      select ARCH_HAS_FORCE_DMA_UNENCRYPTED
         select INSTRUCTION_DECODER
- -      select ARCH_HAS_RESTRICTED_VIRTIO_MEMORY_ACCESS
         select ARCH_HAS_CC_PLATFORM
+ +      select X86_MEM_ENCRYPT
         help
           Say yes to enable support for the encryption of system memory.
           This requires an AMD processor that supports Secure Memory
@@@ -1934,7 -1918,6 +1935,7 @@@ config X86_SG
         select SRCU
         select MMU_NOTIFIER
         select NUMA_KEEP_MEMINFO if NUMA
+ +      select XARRAY_MULTI
         help
           Intel(R) Software Guard eXtensions (SGX) is a set of CPU instructions
           that can be used by applications to set aside private regions of code
@@@ -1964,7 -1947,7 +1965,7 @@@ config EF
   
   config EFI_STUB
         bool "EFI stub support"
- -      depends on EFI && !X86_USE_3DNOW
+ +      depends on EFI
         depends on $(cc-option,-mabi=ms) || X86_32
         select RELOCATABLE
         help
diff --combined arch/x86/include/asm/pgtable.h

index a34430b7af4a3e379114e918cda1689f9a8838fd,d7d287ac1018dbaa0ab57a7fe4cd233995a0936c..8a9432fb3802b3f60e12331b23e43b78f48cddc8
--- 1/arch/x86/include/asm/pgtable.h
--- 2/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@@ -22,11 -22,11 +22,12 @@@
   #define pgprot_decrypted(prot)        __pgprot(__sme_clr(pgprot_val(prot)))
   
   #ifndef __ASSEMBLY__
+ +#include <linux/spinlock.h>
   #include <asm/x86_init.h>
   #include <asm/pkru.h>
   #include <asm/fpu/api.h>
   #include <asm-generic/pgtable_uffd.h>
+ #include <linux/page_table_check.h>
   
   extern pgd_t early_top_pgt[PTRS_PER_PGD];
   bool __init __early_make_pgtable(unsigned long address, pmdval_t pmd);
@@@ -753,7 -753,7 +754,7 @@@ static inline bool pte_accessible(struc
                 return true;
   
         if ((pte_flags(a) & _PAGE_PROTNONE) &&
-                       mm_tlb_flush_pending(mm))
+                       atomic_read(&mm->tlb_flush_pending))
                 return true;
   
         return false;
@@@ -1007,18 -1007,21 +1008,21 @@@ static inline pud_t native_local_pudp_g
   static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
                               pte_t *ptep, pte_t pte)
   {
+       page_table_check_pte_set(mm, addr, ptep, pte);
         set_pte(ptep, pte);
   }
   
   static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
                               pmd_t *pmdp, pmd_t pmd)
   {
+       page_table_check_pmd_set(mm, addr, pmdp, pmd);
         set_pmd(pmdp, pmd);
   }
   
   static inline void set_pud_at(struct mm_struct *mm, unsigned long addr,
                               pud_t *pudp, pud_t pud)
   {
+       page_table_check_pud_set(mm, addr, pudp, pud);
         native_set_pud(pudp, pud);
   }
   
@@@ -1049,6 -1052,7 +1053,7 @@@ static inline pte_t ptep_get_and_clear(
                                        pte_t *ptep)
   {
         pte_t pte = native_ptep_get_and_clear(ptep);
+       page_table_check_pte_clear(mm, addr, pte);
         return pte;
   }
   
@@@ -1064,12 -1068,23 +1069,23 @@@ static inline pte_t ptep_get_and_clear_
                  * care about updates and native needs no locking
                  */
                 pte = native_local_ptep_get_and_clear(ptep);
+               page_table_check_pte_clear(mm, addr, pte);
         } else {
                 pte = ptep_get_and_clear(mm, addr, ptep);
         }
         return pte;
   }
   
+ #define __HAVE_ARCH_PTEP_CLEAR
+ static inline void ptep_clear(struct mm_struct *mm, unsigned long addr,
+                             pte_t *ptep)
+ {
+       if (IS_ENABLED(CONFIG_PAGE_TABLE_CHECK))
+               ptep_get_and_clear(mm, addr, ptep);
+       else
+               pte_clear(mm, addr, ptep);
+ }
+ 
   #define __HAVE_ARCH_PTEP_SET_WRPROTECT
   static inline void ptep_set_wrprotect(struct mm_struct *mm,
                                       unsigned long addr, pte_t *ptep)
@@@ -1110,14 -1125,22 +1126,22 @@@ static inline int pmd_write(pmd_t pmd
   static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, unsigned long addr,
                                        pmd_t *pmdp)
   {
-       return native_pmdp_get_and_clear(pmdp);
+       pmd_t pmd = native_pmdp_get_and_clear(pmdp);
+ 
+       page_table_check_pmd_clear(mm, addr, pmd);
+ 
+       return pmd;
   }
   
   #define __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR
   static inline pud_t pudp_huge_get_and_clear(struct mm_struct *mm,
                                         unsigned long addr, pud_t *pudp)
   {
-       return native_pudp_get_and_clear(pudp);
+       pud_t pud = native_pudp_get_and_clear(pudp);
+ 
+       page_table_check_pud_clear(mm, addr, pud);
+ 
+       return pud;
   }
   
   #define __HAVE_ARCH_PMDP_SET_WRPROTECT
@@@ -1138,6 -1161,7 +1162,7 @@@ static inline int pud_write(pud_t pud
   static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
                 unsigned long address, pmd_t *pmdp, pmd_t pmd)
   {
+       page_table_check_pmd_set(vma->vm_mm, address, pmdp, pmd);
         if (IS_ENABLED(CONFIG_SMP)) {
                 return xchg(pmdp, pmd);
         } else {
diff --combined drivers/block/zram/zram_drv.c

index f6da5293b913d89d64816f1b150b90766a5aced3,9a46b2ef6951d94909645afd13d1bc3a63a7b03b..cb253d80d72b9550fbbeeadafe03eb1d34f2ed8f
--- 1/drivers/block/zram/zram_drv.c
--- 2/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@@ -1903,14 -1903,7 +1903,7 @@@ static struct attribute *zram_disk_attr
         NULL,
   };
   
- static const struct attribute_group zram_disk_attr_group = {
-       .attrs = zram_disk_attrs,
- };
- 
- static const struct attribute_group *zram_disk_attr_groups[] = {
-       &zram_disk_attr_group,
-       NULL,
- };
+ ATTRIBUTE_GROUPS(zram_disk);
   
   /*
    * Allocate and initialize new zram device. the function returns
@@@ -1947,7 -1940,6 +1940,7 @@@ static int zram_add(void
         zram->disk->major = zram_major;
         zram->disk->first_minor = device_id;
         zram->disk->minors = 1;
+ +      zram->disk->flags |= GENHD_FL_NO_PART;
         zram->disk->fops = &zram_devops;
         zram->disk->private_data = zram;
         snprintf(zram->disk->disk_name, 16, "zram%d", device_id);
@@@ -1983,7 -1975,7 +1976,7 @@@
                 blk_queue_max_write_zeroes_sectors(zram->disk->queue, UINT_MAX);
   
         blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, zram->disk->queue);
-       ret = device_add_disk(NULL, zram->disk, zram_disk_attr_groups);
+       ret = device_add_disk(NULL, zram->disk, zram_disk_groups);
         if (ret)
                 goto out_cleanup_disk;
   
diff --combined drivers/dax/bus.c

index ee4568ef757c60a1ec2709e2541584a14a7219cc,a22350e822fa3dbfd6fa65acfdc7e8496450e771..1dad813ee4a6907b2370ea2e6e861b3cbd9b6a85
--- 1/drivers/dax/bus.c
--- 2/drivers/dax/bus.c
+++ b/drivers/dax/bus.c
@@@ -10,6 -10,8 +10,6 @@@
   #include "dax-private.h"
   #include "bus.h"
   
- -static struct class *dax_class;
- -
   static DEFINE_MUTEX(dax_bus_lock);
   
   #define DAX_NAME_LEN 30
@@@ -127,11 -129,35 +127,35 @@@ ATTRIBUTE_GROUPS(dax_drv)
   
   static int dax_bus_match(struct device *dev, struct device_driver *drv);
   
+ /*
+  * Static dax regions are regions created by an external subsystem
+  * nvdimm where a single range is assigned. Its boundaries are by the external
+  * subsystem and are usually limited to one physical memory range. For example,
+  * for PMEM it is usually defined by NVDIMM Namespace boundaries (i.e. a
+  * single contiguous range)
+  *
+  * On dynamic dax regions, the assigned region can be partitioned by dax core
+  * into multiple subdivisions. A subdivision is represented into one
+  * /dev/daxN.M device composed by one or more potentially discontiguous ranges.
+  *
+  * When allocating a dax region, drivers must set whether it's static
+  * (IORESOURCE_DAX_STATIC).  On static dax devices, the @pgmap is pre-assigned
+  * to dax core when calling devm_create_dev_dax(), whereas in dynamic dax
+  * devices it is NULL but afterwards allocated by dax core on device ->probe().
+  * Care is needed to make sure that dynamic dax devices are torn down with a
+  * cleared @pgmap field (see kill_dev_dax()).
+  */
   static bool is_static(struct dax_region *dax_region)
   {
         return (dax_region->res.flags & IORESOURCE_DAX_STATIC) != 0;
   }
   
+ bool static_dev_dax(struct dev_dax *dev_dax)
+ {
+       return is_static(dev_dax->region);
+ }
+ EXPORT_SYMBOL_GPL(static_dev_dax);
+ 
   static u64 dev_dax_size(struct dev_dax *dev_dax)
   {
         u64 size = 0;
@@@ -361,6 -387,14 +385,14 @@@ void kill_dev_dax(struct dev_dax *dev_d
   
         kill_dax(dax_dev);
         unmap_mapping_range(inode->i_mapping, 0, 0, 1);
+ 
+       /*
+        * Dynamic dax region have the pgmap allocated via dev_kzalloc()
+        * and thus freed by devm. Clear the pgmap to not have stale pgmap
+        * ranges on probe() from previous reconfigurations of region devices.
+        */
+       if (!static_dev_dax(dev_dax))
+               dev_dax->pgmap = NULL;
   }
   EXPORT_SYMBOL_GPL(kill_dev_dax);
   
@@@ -1321,17 -1355,14 +1353,17 @@@ struct dev_dax *devm_create_dev_dax(str
         }
   
         /*
- -       * No 'host' or dax_operations since there is no access to this
- -       * device outside of mmap of the resulting character device.
+ +       * No dax_operations since there is no access to this device outside of
+ +       * mmap of the resulting character device.
          */
- -      dax_dev = alloc_dax(dev_dax, NULL, NULL, DAXDEV_F_SYNC);
+ +      dax_dev = alloc_dax(dev_dax, NULL);
         if (IS_ERR(dax_dev)) {
                 rc = PTR_ERR(dax_dev);
                 goto err_alloc_dax;
         }
+ +      set_dax_synchronous(dax_dev);
+ +      set_dax_nocache(dax_dev);
+ +      set_dax_nomc(dax_dev);
   
         /* a device_dax instance is dead while the driver is not attached */
         kill_dax(dax_dev);
@@@ -1344,7 -1375,10 +1376,7 @@@
   
         inode = dax_inode(dax_dev);
         dev->devt = inode->i_rdev;
- -      if (data->subsys == DEV_DAX_BUS)
- -              dev->bus = &dax_bus_type;
- -      else
- -              dev->class = dax_class;
+ +      dev->bus = &dax_bus_type;
         dev->parent = parent;
         dev->type = &dev_dax_type;
   
@@@ -1443,10 -1477,22 +1475,10 @@@ EXPORT_SYMBOL_GPL(dax_driver_unregister
   
   int __init dax_bus_init(void)
   {
- -      int rc;
- -
- -      if (IS_ENABLED(CONFIG_DEV_DAX_PMEM_COMPAT)) {
- -              dax_class = class_create(THIS_MODULE, "dax");
- -              if (IS_ERR(dax_class))
- -                      return PTR_ERR(dax_class);
- -      }
- -
- -      rc = bus_register(&dax_bus_type);
- -      if (rc)
- -              class_destroy(dax_class);
- -      return rc;
+ +      return bus_register(&dax_bus_type);
   }
   
   void __exit dax_bus_exit(void)
   {
         bus_unregister(&dax_bus_type);
- -      class_destroy(dax_class);
   }
diff --combined drivers/dax/bus.h

index 381cec9ff05c4f92e6be660ccdf685c71322e81a,4acdfee7dd5956b77a67115993d01c26ff5c52c6..fbb940293d6d84384b6fa37eb412790249b4ecf7
--- 1/drivers/dax/bus.h
--- 2/drivers/dax/bus.h
+++ b/drivers/dax/bus.h
@@@ -16,15 -16,24 +16,15 @@@ struct dax_region *alloc_dax_region(str
                 struct range *range, int target_node, unsigned int align,
                 unsigned long flags);
   
- -enum dev_dax_subsys {
- -      DEV_DAX_BUS = 0, /* zeroed dev_dax_data picks this by default */
- -      DEV_DAX_CLASS,
- -};
- -
   struct dev_dax_data {
         struct dax_region *dax_region;
         struct dev_pagemap *pgmap;
- -      enum dev_dax_subsys subsys;
         resource_size_t size;
         int id;
   };
   
   struct dev_dax *devm_create_dev_dax(struct dev_dax_data *data);
   
- -/* to be deleted when DEV_DAX_CLASS is removed */
- -struct dev_dax *__dax_pmem_probe(struct device *dev, enum dev_dax_subsys subsys);
- -
   struct dax_device_driver {
         struct device_driver drv;
         struct list_head ids;
@@@ -39,7 -48,12 +39,8 @@@ int __dax_driver_register(struct dax_de
         __dax_driver_register(driver, THIS_MODULE, KBUILD_MODNAME)
   void dax_driver_unregister(struct dax_device_driver *dax_drv);
   void kill_dev_dax(struct dev_dax *dev_dax);
+ bool static_dev_dax(struct dev_dax *dev_dax);
   
- -#if IS_ENABLED(CONFIG_DEV_DAX_PMEM_COMPAT)
- -int dev_dax_probe(struct dev_dax *dev_dax);
- -#endif
- -
   /*
    * While run_dax() is potentially a generic operation that could be
    * defined in include/linux/dax.h we don't want to grow any users
diff --combined drivers/dax/device.c

index e58d597f0415a049e64149bc9f92552aad04fed2,591f293d326faaef9e20c885b275ebd0403bada2..d33a0613ed0c518843eb2bdce05974309939bb9e
--- 1/drivers/dax/device.c
--- 2/drivers/dax/device.c
+++ b/drivers/dax/device.c
@@@ -73,11 -73,39 +73,39 @@@ __weak phys_addr_t dax_pgoff_to_phys(st
         return -1;
   }
   
+ static void dax_set_mapping(struct vm_fault *vmf, pfn_t pfn,
+                             unsigned long fault_size)
+ {
+       unsigned long i, nr_pages = fault_size / PAGE_SIZE;
+       struct file *filp = vmf->vma->vm_file;
+       struct dev_dax *dev_dax = filp->private_data;
+       pgoff_t pgoff;
+ 
+       /* mapping is only set on the head */
+       if (dev_dax->pgmap->vmemmap_shift)
+               nr_pages = 1;
+ 
+       pgoff = linear_page_index(vmf->vma,
+                       ALIGN(vmf->address, fault_size));
+ 
+       for (i = 0; i < nr_pages; i++) {
+               struct page *page = pfn_to_page(pfn_t_to_pfn(pfn) + i);
+ 
+               page = compound_head(page);
+               if (page->mapping)
+                       continue;
+ 
+               page->mapping = filp->f_mapping;
+               page->index = pgoff + i;
+       }
+ }
+ 
   static vm_fault_t __dev_dax_pte_fault(struct dev_dax *dev_dax,
-                               struct vm_fault *vmf, pfn_t *pfn)
+                               struct vm_fault *vmf)
   {
         struct device *dev = &dev_dax->dev;
         phys_addr_t phys;
+       pfn_t pfn;
         unsigned int fault_size = PAGE_SIZE;
   
         if (check_vma(dev_dax, vmf->vma, __func__))
@@@ -98,18 -126,21 +126,21 @@@
                 return VM_FAULT_SIGBUS;
         }
   
-       *pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP);
+       pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP);
   
-       return vmf_insert_mixed(vmf->vma, vmf->address, *pfn);
+       dax_set_mapping(vmf, pfn, fault_size);
+ 
+       return vmf_insert_mixed(vmf->vma, vmf->address, pfn);
   }
   
   static vm_fault_t __dev_dax_pmd_fault(struct dev_dax *dev_dax,
-                               struct vm_fault *vmf, pfn_t *pfn)
+                               struct vm_fault *vmf)
   {
         unsigned long pmd_addr = vmf->address & PMD_MASK;
         struct device *dev = &dev_dax->dev;
         phys_addr_t phys;
         pgoff_t pgoff;
+       pfn_t pfn;
         unsigned int fault_size = PMD_SIZE;
   
         if (check_vma(dev_dax, vmf->vma, __func__))
@@@ -138,19 -169,22 +169,22 @@@
                 return VM_FAULT_SIGBUS;
         }
   
-       *pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP);
+       pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP);
   
-       return vmf_insert_pfn_pmd(vmf, *pfn, vmf->flags & FAULT_FLAG_WRITE);
+       dax_set_mapping(vmf, pfn, fault_size);
+ 
+       return vmf_insert_pfn_pmd(vmf, pfn, vmf->flags & FAULT_FLAG_WRITE);
   }
   
   #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
   static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax,
-                               struct vm_fault *vmf, pfn_t *pfn)
+                               struct vm_fault *vmf)
   {
         unsigned long pud_addr = vmf->address & PUD_MASK;
         struct device *dev = &dev_dax->dev;
         phys_addr_t phys;
         pgoff_t pgoff;
+       pfn_t pfn;
         unsigned int fault_size = PUD_SIZE;
   
   
@@@ -180,13 -214,15 +214,15 @@@
                 return VM_FAULT_SIGBUS;
         }
   
-       *pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP);
+       pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP);
   
-       return vmf_insert_pfn_pud(vmf, *pfn, vmf->flags & FAULT_FLAG_WRITE);
+       dax_set_mapping(vmf, pfn, fault_size);
+ 
+       return vmf_insert_pfn_pud(vmf, pfn, vmf->flags & FAULT_FLAG_WRITE);
   }
   #else
   static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax,
-                               struct vm_fault *vmf, pfn_t *pfn)
+                               struct vm_fault *vmf)
   {
         return VM_FAULT_FALLBACK;
   }
@@@ -196,10 -232,8 +232,8 @@@ static vm_fault_t dev_dax_huge_fault(st
                 enum page_entry_size pe_size)
   {
         struct file *filp = vmf->vma->vm_file;
-       unsigned long fault_size;
         vm_fault_t rc = VM_FAULT_SIGBUS;
         int id;
-       pfn_t pfn;
         struct dev_dax *dev_dax = filp->private_data;
   
         dev_dbg(&dev_dax->dev, "%s: %s (%#lx - %#lx) size = %d\n", current->comm,
@@@ -209,43 -243,18 +243,18 @@@
         id = dax_read_lock();
         switch (pe_size) {
         case PE_SIZE_PTE:
-               fault_size = PAGE_SIZE;
-               rc = __dev_dax_pte_fault(dev_dax, vmf, &pfn);
+               rc = __dev_dax_pte_fault(dev_dax, vmf);
                 break;
         case PE_SIZE_PMD:
-               fault_size = PMD_SIZE;
-               rc = __dev_dax_pmd_fault(dev_dax, vmf, &pfn);
+               rc = __dev_dax_pmd_fault(dev_dax, vmf);
                 break;
         case PE_SIZE_PUD:
-               fault_size = PUD_SIZE;
-               rc = __dev_dax_pud_fault(dev_dax, vmf, &pfn);
+               rc = __dev_dax_pud_fault(dev_dax, vmf);
                 break;
         default:
                 rc = VM_FAULT_SIGBUS;
         }
   
-       if (rc == VM_FAULT_NOPAGE) {
-               unsigned long i;
-               pgoff_t pgoff;
- 
-               /*
-                * In the device-dax case the only possibility for a
-                * VM_FAULT_NOPAGE result is when device-dax capacity is
-                * mapped. No need to consider the zero page, or racing
-                * conflicting mappings.
-                */
-               pgoff = linear_page_index(vmf->vma, vmf->address
-                               & ~(fault_size - 1));
-               for (i = 0; i < fault_size / PAGE_SIZE; i++) {
-                       struct page *page;
- 
-                       page = pfn_to_page(pfn_t_to_pfn(pfn) + i);
-                       if (page->mapping)
-                               continue;
-                       page->mapping = filp->f_mapping;
-                       page->index = pgoff + i;
-               }
-       }
         dax_read_unlock(id);
   
         return rc;
@@@ -398,17 -407,34 +407,34 @@@ int dev_dax_probe(struct dev_dax *dev_d
         void *addr;
         int rc, i;
   
-       pgmap = dev_dax->pgmap;
-       if (dev_WARN_ONCE(dev, pgmap && dev_dax->nr_range > 1,
-                       "static pgmap / multi-range device conflict\n"))
-               return -EINVAL;
+       if (static_dev_dax(dev_dax))  {
+               if (dev_dax->nr_range > 1) {
+                       dev_warn(dev,
+                               "static pgmap / multi-range device conflict\n");
+                       return -EINVAL;
+               }
   
-       if (!pgmap) {
-               pgmap = devm_kzalloc(dev, sizeof(*pgmap) + sizeof(struct range)
-                               * (dev_dax->nr_range - 1), GFP_KERNEL);
+               pgmap = dev_dax->pgmap;
+       } else {
+               if (dev_dax->pgmap) {
+                       dev_warn(dev,
+                                "dynamic-dax with pre-populated page map\n");
+                       return -EINVAL;
+               }
+ 
+               pgmap = devm_kzalloc(dev,
+                        struct_size(pgmap, ranges, dev_dax->nr_range - 1),
+                        GFP_KERNEL);
                 if (!pgmap)
                         return -ENOMEM;
+ 
                 pgmap->nr_range = dev_dax->nr_range;
+               dev_dax->pgmap = pgmap;
+ 
+               for (i = 0; i < dev_dax->nr_range; i++) {
+                       struct range *range = &dev_dax->ranges[i].range;
+                       pgmap->ranges[i] = *range;
+               }
         }
   
         for (i = 0; i < dev_dax->nr_range; i++) {
@@@ -420,12 -446,12 +446,12 @@@
                                         i, range->start, range->end);
                         return -EBUSY;
                 }
-               /* don't update the range for static pgmap */
-               if (!dev_dax->pgmap)
-                       pgmap->ranges[i] = *range;
         }
   
         pgmap->type = MEMORY_DEVICE_GENERIC;
+       if (dev_dax->align > PAGE_SIZE)
+               pgmap->vmemmap_shift =
+                       order_base_2(dev_dax->align >> PAGE_SHIFT);
         addr = devm_memremap_pages(dev, pgmap);
         if (IS_ERR(addr))
                 return PTR_ERR(addr);
@@@ -433,7 -459,11 +459,7 @@@
         inode = dax_inode(dax_dev);
         cdev = inode->i_cdev;
         cdev_init(cdev, &dax_fops);
- -      if (dev->class) {
- -              /* for the CONFIG_DEV_DAX_PMEM_COMPAT case */
- -              cdev->owner = dev->parent->driver->owner;
- -      } else
- -              cdev->owner = dev->driver->owner;
+ +      cdev->owner = dev->driver->owner;
         cdev_set_parent(cdev, &dev->kobj);
         rc = cdev_add(cdev, dev->devt, 1);
         if (rc)
diff --combined drivers/of/fdt.c

index ca2cfb3012a49400b7279d03688f1b3c3c966780,116c582fea7a5ae9eeb0712d906759a745ba0a4b..ad85ff6474ff1398c3eb93c7396371daca7c15dc
--- 1/drivers/of/fdt.c
--- 2/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@@ -26,6 -26,7 +26,7 @@@
   #include <linux/serial_core.h>
   #include <linux/sysfs.h>
   #include <linux/random.h>
+ #include <linux/kmemleak.h>
   
   #include <asm/setup.h>  /* for COMMAND_LINE_SIZE */
   #include <asm/page.h>
@@@ -482,11 -483,9 +483,11 @@@ static int __init early_init_dt_reserve
         if (nomap) {
                 /*
                  * If the memory is already reserved (by another region), we
- -               * should not allow it to be marked nomap.
+ +               * should not allow it to be marked nomap, but don't worry
+ +               * if the region isn't memory as it won't be mapped.
                  */
- -              if (memblock_is_region_reserved(base, size))
+ +              if (memblock_overlaps_region(&memblock.memory, base, size) &&
+ +                  memblock_is_region_reserved(base, size))
                         return -EBUSY;
   
                 return memblock_mark_nomap(base, size);
@@@ -524,9 -523,12 +525,12 @@@ static int __init __reserved_mem_reserv
                 size = dt_mem_next_cell(dt_root_size_cells, &prop);
   
                 if (size &&
-                   early_init_dt_reserve_memory_arch(base, size, nomap) == 0)
+                   early_init_dt_reserve_memory_arch(base, size, nomap) == 0) {
                         pr_debug("Reserved memory: reserved region for node '%s': base %pa, size %lu MiB\n",
                                 uname, &base, (unsigned long)(size / SZ_1M));
+                       if (!nomap)
+                               kmemleak_alloc_phys(base, size, 0, 0);
+               }
                 else
                         pr_info("Reserved memory: failed to reserve memory for node '%s': base %pa, size %lu MiB\n",
                                 uname, &base, (unsigned long)(size / SZ_1M));
@@@ -967,22 -969,18 +971,22 @@@ static void __init early_init_dt_check_
                  elfcorehdr_addr, elfcorehdr_size);
   }
   
- -static phys_addr_t cap_mem_addr;
- -static phys_addr_t cap_mem_size;
+ +static unsigned long chosen_node_offset = -FDT_ERR_NOTFOUND;
   
   /**
    * early_init_dt_check_for_usable_mem_range - Decode usable memory range
    * location from flat tree
- - * @node: reference to node containing usable memory range location ('chosen')
    */
- -static void __init early_init_dt_check_for_usable_mem_range(unsigned long node)
+ +void __init early_init_dt_check_for_usable_mem_range(void)
   {
         const __be32 *prop;
         int len;
+ +      phys_addr_t cap_mem_addr;
+ +      phys_addr_t cap_mem_size;
+ +      unsigned long node = chosen_node_offset;
+ +
+ +      if ((long)node < 0)
+ +              return;
   
         pr_debug("Looking for usable-memory-range property... ");
   
@@@ -995,8 -993,6 +999,8 @@@
   
         pr_debug("cap_mem_start=%pa cap_mem_size=%pa\n", &cap_mem_addr,
                  &cap_mem_size);
+ +
+ +      memblock_cap_memory_range(cap_mem_addr, cap_mem_size);
   }
   
   #ifdef CONFIG_SERIAL_EARLYCON
@@@ -1050,14 -1046,13 +1054,14 @@@ int __init early_init_dt_scan_chosen_st
   /*
    * early_init_dt_scan_root - fetch the top level address and size cells
    */
- -int __init early_init_dt_scan_root(unsigned long node, const char *uname,
- -                                 int depth, void *data)
+ +int __init early_init_dt_scan_root(void)
   {
         const __be32 *prop;
+ +      const void *fdt = initial_boot_params;
+ +      int node = fdt_path_offset(fdt, "/");
   
- -      if (depth != 0)
- -              return 0;
+ +      if (node < 0)
+ +              return -ENODEV;
   
         dt_root_size_cells = OF_ROOT_NODE_SIZE_CELLS_DEFAULT;
         dt_root_addr_cells = OF_ROOT_NODE_ADDR_CELLS_DEFAULT;
@@@ -1072,7 -1067,8 +1076,7 @@@
                 dt_root_addr_cells = be32_to_cpup(prop);
         pr_debug("dt_root_addr_cells = %x\n", dt_root_addr_cells);
   
- -      /* break now */
- -      return 1;
+ +      return 0;
   }
   
   u64 __init dt_mem_next_cell(int s, const __be32 **cellp)
@@@ -1086,78 -1082,73 +1090,78 @@@
   /*
    * early_init_dt_scan_memory - Look for and parse memory nodes
    */
- -int __init early_init_dt_scan_memory(unsigned long node, const char *uname,
- -                                   int depth, void *data)
+ +int __init early_init_dt_scan_memory(void)
   {
- -      const char *type = of_get_flat_dt_prop(node, "device_type", NULL);
- -      const __be32 *reg, *endp;
- -      int l;
- -      bool hotpluggable;
+ +      int node;
+ +      const void *fdt = initial_boot_params;
   
- -      /* We are scanning "memory" nodes only */
- -      if (type == NULL || strcmp(type, "memory") != 0)
- -              return 0;
+ +      fdt_for_each_subnode(node, fdt, 0) {
+ +              const char *type = of_get_flat_dt_prop(node, "device_type", NULL);
+ +              const __be32 *reg, *endp;
+ +              int l;
+ +              bool hotpluggable;
   
- -      reg = of_get_flat_dt_prop(node, "linux,usable-memory", &l);
- -      if (reg == NULL)
- -              reg = of_get_flat_dt_prop(node, "reg", &l);
- -      if (reg == NULL)
- -              return 0;
+ +              /* We are scanning "memory" nodes only */
+ +              if (type == NULL || strcmp(type, "memory") != 0)
+ +                      continue;
   
- -      endp = reg + (l / sizeof(__be32));
- -      hotpluggable = of_get_flat_dt_prop(node, "hotpluggable", NULL);
+ +              reg = of_get_flat_dt_prop(node, "linux,usable-memory", &l);
+ +              if (reg == NULL)
+ +                      reg = of_get_flat_dt_prop(node, "reg", &l);
+ +              if (reg == NULL)
+ +                      continue;
   
- -      pr_debug("memory scan node %s, reg size %d,\n", uname, l);
+ +              endp = reg + (l / sizeof(__be32));
+ +              hotpluggable = of_get_flat_dt_prop(node, "hotpluggable", NULL);
   
- -      while ((endp - reg) >= (dt_root_addr_cells + dt_root_size_cells)) {
- -              u64 base, size;
+ +              pr_debug("memory scan node %s, reg size %d,\n",
+ +                       fdt_get_name(fdt, node, NULL), l);
   
- -              base = dt_mem_next_cell(dt_root_addr_cells, &reg);
- -              size = dt_mem_next_cell(dt_root_size_cells, &reg);
+ +              while ((endp - reg) >= (dt_root_addr_cells + dt_root_size_cells)) {
+ +                      u64 base, size;
   
- -              if (size == 0)
- -                      continue;
- -              pr_debug(" - %llx, %llx\n", base, size);
+ +                      base = dt_mem_next_cell(dt_root_addr_cells, &reg);
+ +                      size = dt_mem_next_cell(dt_root_size_cells, &reg);
   
- -              early_init_dt_add_memory_arch(base, size);
+ +                      if (size == 0)
+ +                              continue;
+ +                      pr_debug(" - %llx, %llx\n", base, size);
   
- -              if (!hotpluggable)
- -                      continue;
+ +                      early_init_dt_add_memory_arch(base, size);
   
- -              if (memblock_mark_hotplug(base, size))
- -                      pr_warn("failed to mark hotplug range 0x%llx - 0x%llx\n",
- -                              base, base + size);
- -      }
+ +                      if (!hotpluggable)
+ +                              continue;
   
+ +                      if (memblock_mark_hotplug(base, size))
+ +                              pr_warn("failed to mark hotplug range 0x%llx - 0x%llx\n",
+ +                                      base, base + size);
+ +              }
+ +      }
         return 0;
   }
   
- -int __init early_init_dt_scan_chosen(unsigned long node, const char *uname,
- -                                   int depth, void *data)
+ +int __init early_init_dt_scan_chosen(char *cmdline)
   {
- -      int l;
+ +      int l, node;
         const char *p;
         const void *rng_seed;
+ +      const void *fdt = initial_boot_params;
   
- -      pr_debug("search \"chosen\", depth: %d, uname: %s\n", depth, uname);
+ +      node = fdt_path_offset(fdt, "/chosen");
+ +      if (node < 0)
+ +              node = fdt_path_offset(fdt, "/chosen@0");
+ +      if (node < 0)
+ +              return -ENOENT;
   
- -      if (depth != 1 || !data ||
- -          (strcmp(uname, "chosen") != 0 && strcmp(uname, "chosen@0") != 0))
- -              return 0;
+ +      chosen_node_offset = node;
   
         early_init_dt_check_for_initrd(node);
         early_init_dt_check_for_elfcorehdr(node);
- -      early_init_dt_check_for_usable_mem_range(node);
   
         /* Retrieve command line */
         p = of_get_flat_dt_prop(node, "bootargs", &l);
         if (p != NULL && l > 0)
- -              strlcpy(data, p, min(l, COMMAND_LINE_SIZE));
+ +              strlcpy(cmdline, p, min(l, COMMAND_LINE_SIZE));
   
         /*
          * CONFIG_CMDLINE is meant to be a default in case nothing else
@@@ -1166,18 -1157,18 +1170,18 @@@
          */
   #ifdef CONFIG_CMDLINE
   #if defined(CONFIG_CMDLINE_EXTEND)
- -      strlcat(data, " ", COMMAND_LINE_SIZE);
- -      strlcat(data, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
+ +      strlcat(cmdline, " ", COMMAND_LINE_SIZE);
+ +      strlcat(cmdline, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
   #elif defined(CONFIG_CMDLINE_FORCE)
- -      strlcpy(data, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
+ +      strlcpy(cmdline, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
   #else
         /* No arguments from boot loader, use kernel's  cmdl*/
- -      if (!((char *)data)[0])
- -              strlcpy(data, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
+ +      if (!((char *)cmdline)[0])
+ +              strlcpy(cmdline, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
   #endif
   #endif /* CONFIG_CMDLINE */
   
- -      pr_debug("Command line is: %s\n", (char *)data);
+ +      pr_debug("Command line is: %s\n", (char *)cmdline);
   
         rng_seed = of_get_flat_dt_prop(node, "rng-seed", &l);
         if (rng_seed && l > 0) {
@@@ -1191,7 -1182,8 +1195,7 @@@
                                 fdt_totalsize(initial_boot_params));
         }
   
- -      /* break now */
- -      return 1;
+ +      return 0;
   }
   
   #ifndef MIN_MEMBLOCK_ADDR
@@@ -1273,21 -1265,21 +1277,21 @@@ bool __init early_init_dt_verify(void *
   
   void __init early_init_dt_scan_nodes(void)
   {
- -      int rc = 0;
+ +      int rc;
   
         /* Initialize {size,address}-cells info */
- -      of_scan_flat_dt(early_init_dt_scan_root, NULL);
+ +      early_init_dt_scan_root();
   
         /* Retrieve various information from the /chosen node */
- -      rc = of_scan_flat_dt(early_init_dt_scan_chosen, boot_command_line);
- -      if (!rc)
+ +      rc = early_init_dt_scan_chosen(boot_command_line);
+ +      if (rc)
                 pr_warn("No chosen node found, continuing without\n");
   
         /* Setup memory, calling early_init_dt_add_memory_arch */
- -      of_scan_flat_dt(early_init_dt_scan_memory, NULL);
+ +      early_init_dt_scan_memory();
   
         /* Handle linux,usable-memory-range property */
- -      memblock_cap_memory_range(cap_mem_addr, cap_mem_size);
+ +      early_init_dt_check_for_usable_mem_range();
   }
   
   bool __init early_init_dt_scan(void *params)
diff --combined fs/ext4/extents.c

index 1077ce7e189fe776fbda6c2fdb882f98f6fed7d8,5582fba36b4461c40066ff446a8f61d97b0f28d7..74c91da585d7f9f7e1e775c78f269dabf71f5997
--- 1/fs/ext4/extents.c
--- 2/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@@ -27,8 -27,8 +27,8 @@@
   #include <linux/slab.h>
   #include <linux/uaccess.h>
   #include <linux/fiemap.h>
- #include <linux/backing-dev.h>
   #include <linux/iomap.h>
+ #include <linux/sched/mm.h>
   #include "ext4_jbd2.h"
   #include "ext4_extents.h"
   #include "xattr.h"
@@@ -1496,7 -1496,8 +1496,7 @@@ static int ext4_ext_search_left(struct 
                                 EXT4_ERROR_INODE(inode,
                                   "ix (%d) != EXT_FIRST_INDEX (%d) (depth %d)!",
                                   ix != NULL ? le32_to_cpu(ix->ei_block) : 0,
- -                                EXT_FIRST_INDEX(path[depth].p_hdr) != NULL ?
- -              le32_to_cpu(EXT_FIRST_INDEX(path[depth].p_hdr)->ei_block) : 0,
+ +                                le32_to_cpu(EXT_FIRST_INDEX(path[depth].p_hdr)->ei_block),
                                   depth);
                                 return -EFSCORRUPTED;
                         }
@@@ -2024,6 -2025,7 +2024,6 @@@ int ext4_ext_insert_extent(handle_t *ha
                                         + ext4_ext_get_actual_len(newext));
                         if (unwritten)
                                 ext4_ext_mark_unwritten(ex);
- -                      eh = path[depth].p_hdr;
                         nearex = ex;
                         goto merge;
                 }
@@@ -2052,6 -2054,7 +2052,6 @@@ prepend
                                         + ext4_ext_get_actual_len(newext));
                         if (unwritten)
                                 ext4_ext_mark_unwritten(ex);
- -                      eh = path[depth].p_hdr;
                         nearex = ex;
                         goto merge;
                 }
@@@ -4404,8 -4407,7 +4404,7 @@@ retry
         err = ext4_es_remove_extent(inode, last_block,
                                     EXT_MAX_BLOCKS - last_block);
         if (err == -ENOMEM) {
-               cond_resched();
-               congestion_wait(BLK_RW_ASYNC, HZ/50);
+               memalloc_retry_wait(GFP_ATOMIC);
                 goto retry;
         }
         if (err)
@@@ -4413,8 -4415,7 +4412,7 @@@
   retry_remove_space:
         err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1);
         if (err == -ENOMEM) {
-               cond_resched();
-               congestion_wait(BLK_RW_ASYNC, HZ/50);
+               memalloc_retry_wait(GFP_ATOMIC);
                 goto retry_remove_space;
         }
         return err;
@@@ -4644,6 -4645,8 +4642,6 @@@ static long ext4_zero_range(struct fil
         ret = ext4_mark_inode_dirty(handle, inode);
         if (unlikely(ret))
                 goto out_handle;
- -      ext4_fc_track_range(handle, inode, offset >> inode->i_sb->s_blocksize_bits,
- -                      (offset + len - 1) >> inode->i_sb->s_blocksize_bits);
         /* Zero out partial block at the edges of the range */
         ret = ext4_zero_partial_blocks(handle, inode, offset, len);
         if (ret >= 0)
@@@ -4692,6 -4695,8 +4690,6 @@@ long ext4_fallocate(struct file *file, 
                      FALLOC_FL_INSERT_RANGE))
                 return -EOPNOTSUPP;
   
- -      ext4_fc_start_update(inode);
- -
         if (mode & FALLOC_FL_PUNCH_HOLE) {
                 ret = ext4_punch_hole(inode, offset, len);
                 goto exit;
@@@ -4755,6 -4760,7 +4753,6 @@@ out
         inode_unlock(inode);
         trace_ext4_fallocate_exit(inode, offset, max_blocks, ret);
   exit:
- -      ext4_fc_stop_update(inode);
         return ret;
   }
   
@@@ -5336,7 -5342,7 +5334,7 @@@ static int ext4_collapse_range(struct i
                 ret = PTR_ERR(handle);
                 goto out_mmap;
         }
- -      ext4_fc_start_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE);
+ +      ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE);
   
         down_write(&EXT4_I(inode)->i_data_sem);
         ext4_discard_preallocations(inode, 0);
@@@ -5375,6 -5381,7 +5373,6 @@@
   
   out_stop:
         ext4_journal_stop(handle);
- -      ext4_fc_stop_ineligible(sb);
   out_mmap:
         filemap_invalidate_unlock(mapping);
   out_mutex:
@@@ -5476,7 -5483,7 +5474,7 @@@ static int ext4_insert_range(struct ino
                 ret = PTR_ERR(handle);
                 goto out_mmap;
         }
- -      ext4_fc_start_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE);
+ +      ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE);
   
         /* Expand file to avoid data loss if there is error while shifting */
         inode->i_size += len;
@@@ -5551,6 -5558,7 +5549,6 @@@
   
   out_stop:
         ext4_journal_stop(handle);
- -      ext4_fc_stop_ineligible(sb);
   out_mmap:
         filemap_invalidate_unlock(mapping);
   out_mutex:
diff --combined fs/xfs/xfs_buf.c

index bbb0fbd34e649064ca764131bec26b68cfe3c751,6c45e3fa56f4cd84c7090d38943dd14683e13f85..b45e0d50a4052d95b411d3db02a92a59cc9487df
--- 1/fs/xfs/xfs_buf.c
--- 2/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@@ -394,7 -394,7 +394,7 @@@ xfs_buf_alloc_pages
                 }
   
                 XFS_STATS_INC(bp->b_mount, xb_page_retries);
-               congestion_wait(BLK_RW_ASYNC, HZ / 50);
+               memalloc_retry_wait(gfp_mask);
         }
         return 0;
   }
@@@ -1892,7 -1892,6 +1892,7 @@@ xfs_free_buftarg
         list_lru_destroy(&btp->bt_lru);
   
         blkdev_issue_flush(btp->bt_bdev);
+ +      fs_put_dax(btp->bt_daxdev);
   
         kmem_free(btp);
   }
@@@ -1933,10 -1932,11 +1933,10 @@@ xfs_setsize_buftarg_early
         return xfs_setsize_buftarg(btp, bdev_logical_block_size(bdev));
   }
   
- -xfs_buftarg_t *
+ +struct xfs_buftarg *
   xfs_alloc_buftarg(
         struct xfs_mount        *mp,
- -      struct block_device     *bdev,
- -      struct dax_device       *dax_dev)
+ +      struct block_device     *bdev)
   {
         xfs_buftarg_t           *btp;
   
@@@ -1945,7 -1945,7 +1945,7 @@@
         btp->bt_mount = mp;
         btp->bt_dev =  bdev->bd_dev;
         btp->bt_bdev = bdev;
- -      btp->bt_daxdev = dax_dev;
+ +      btp->bt_daxdev = fs_dax_get_by_bdev(bdev, &btp->bt_dax_part_off);
   
         /*
          * Buffer IO error rate limiting. Limit it to no more than 10 messages
diff --combined include/linux/fs.h

index f5d3bf5b69a68b4205a0cab32a8aa4c56078c444,5315fa68f751a71ca56d90cbcde659ce8cbaf4f7..42ab6d71291cf19b844a6150b1b3a63bbc3954f7
--- 1/include/linux/fs.h
--- 2/include/linux/fs.h
+++ b/include/linux/fs.h
@@@ -41,7 -41,6 +41,7 @@@
   #include <linux/stddef.h>
   #include <linux/mount.h>
   #include <linux/cred.h>
+ +#include <linux/mnt_idmapping.h>
   
   #include <asm/byteorder.h>
   #include <uapi/linux/fs.h>
@@@ -1600,11 -1599,6 +1600,11 @@@ struct super_block 
         struct list_head        s_inodes_wb;    /* writeback inodes */
   } __randomize_layout;
   
+ +static inline struct user_namespace *i_user_ns(const struct inode *inode)
+ +{
+ +      return inode->i_sb->s_user_ns;
+ +}
+ +
   /* Helper functions so that in most cases filesystems will
    * not need to deal directly with kuid_t and kgid_t and can
    * instead deal with the raw numeric values that are stored
@@@ -1612,22 -1606,50 +1612,22 @@@
    */
   static inline uid_t i_uid_read(const struct inode *inode)
   {
- -      return from_kuid(inode->i_sb->s_user_ns, inode->i_uid);
+ +      return from_kuid(i_user_ns(inode), inode->i_uid);
   }
   
   static inline gid_t i_gid_read(const struct inode *inode)
   {
- -      return from_kgid(inode->i_sb->s_user_ns, inode->i_gid);
+ +      return from_kgid(i_user_ns(inode), inode->i_gid);
   }
   
   static inline void i_uid_write(struct inode *inode, uid_t uid)
   {
- -      inode->i_uid = make_kuid(inode->i_sb->s_user_ns, uid);
+ +      inode->i_uid = make_kuid(i_user_ns(inode), uid);
   }
   
   static inline void i_gid_write(struct inode *inode, gid_t gid)
   {
- -      inode->i_gid = make_kgid(inode->i_sb->s_user_ns, gid);
- -}
- -
- -/**
- - * kuid_into_mnt - map a kuid down into a mnt_userns
- - * @mnt_userns: user namespace of the relevant mount
- - * @kuid: kuid to be mapped
- - *
- - * Return: @kuid mapped according to @mnt_userns.
- - * If @kuid has no mapping INVALID_UID is returned.
- - */
- -static inline kuid_t kuid_into_mnt(struct user_namespace *mnt_userns,
- -                                 kuid_t kuid)
- -{
- -      return make_kuid(mnt_userns, __kuid_val(kuid));
- -}
- -
- -/**
- - * kgid_into_mnt - map a kgid down into a mnt_userns
- - * @mnt_userns: user namespace of the relevant mount
- - * @kgid: kgid to be mapped
- - *
- - * Return: @kgid mapped according to @mnt_userns.
- - * If @kgid has no mapping INVALID_GID is returned.
- - */
- -static inline kgid_t kgid_into_mnt(struct user_namespace *mnt_userns,
- -                                 kgid_t kgid)
- -{
- -      return make_kgid(mnt_userns, __kgid_val(kgid));
+ +      inode->i_gid = make_kgid(i_user_ns(inode), gid);
   }
   
   /**
@@@ -1641,7 -1663,7 +1641,7 @@@
   static inline kuid_t i_uid_into_mnt(struct user_namespace *mnt_userns,
                                     const struct inode *inode)
   {
- -      return kuid_into_mnt(mnt_userns, inode->i_uid);
+ +      return mapped_kuid_fs(mnt_userns, i_user_ns(inode), inode->i_uid);
   }
   
   /**
@@@ -1655,7 -1677,69 +1655,7 @@@
   static inline kgid_t i_gid_into_mnt(struct user_namespace *mnt_userns,
                                     const struct inode *inode)
   {
- -      return kgid_into_mnt(mnt_userns, inode->i_gid);
- -}
- -
- -/**
- - * kuid_from_mnt - map a kuid up into a mnt_userns
- - * @mnt_userns: user namespace of the relevant mount
- - * @kuid: kuid to be mapped
- - *
- - * Return: @kuid mapped up according to @mnt_userns.
- - * If @kuid has no mapping INVALID_UID is returned.
- - */
- -static inline kuid_t kuid_from_mnt(struct user_namespace *mnt_userns,
- -                                 kuid_t kuid)
- -{
- -      return KUIDT_INIT(from_kuid(mnt_userns, kuid));
- -}
- -
- -/**
- - * kgid_from_mnt - map a kgid up into a mnt_userns
- - * @mnt_userns: user namespace of the relevant mount
- - * @kgid: kgid to be mapped
- - *
- - * Return: @kgid mapped up according to @mnt_userns.
- - * If @kgid has no mapping INVALID_GID is returned.
- - */
- -static inline kgid_t kgid_from_mnt(struct user_namespace *mnt_userns,
- -                                 kgid_t kgid)
- -{
- -      return KGIDT_INIT(from_kgid(mnt_userns, kgid));
- -}
- -
- -/**
- - * mapped_fsuid - return caller's fsuid mapped up into a mnt_userns
- - * @mnt_userns: user namespace of the relevant mount
- - *
- - * Use this helper to initialize a new vfs or filesystem object based on
- - * the caller's fsuid. A common example is initializing the i_uid field of
- - * a newly allocated inode triggered by a creation event such as mkdir or
- - * O_CREAT. Other examples include the allocation of quotas for a specific
- - * user.
- - *
- - * Return: the caller's current fsuid mapped up according to @mnt_userns.
- - */
- -static inline kuid_t mapped_fsuid(struct user_namespace *mnt_userns)
- -{
- -      return kuid_from_mnt(mnt_userns, current_fsuid());
- -}
- -
- -/**
- - * mapped_fsgid - return caller's fsgid mapped up into a mnt_userns
- - * @mnt_userns: user namespace of the relevant mount
- - *
- - * Use this helper to initialize a new vfs or filesystem object based on
- - * the caller's fsgid. A common example is initializing the i_gid field of
- - * a newly allocated inode triggered by a creation event such as mkdir or
- - * O_CREAT. Other examples include the allocation of quotas for a specific
- - * user.
- - *
- - * Return: the caller's current fsgid mapped up according to @mnt_userns.
- - */
- -static inline kgid_t mapped_fsgid(struct user_namespace *mnt_userns)
- -{
- -      return kgid_from_mnt(mnt_userns, current_fsgid());
+ +      return mapped_kgid_fs(mnt_userns, i_user_ns(inode), inode->i_gid);
   }
   
   /**
@@@ -1669,7 -1753,7 +1669,7 @@@
   static inline void inode_fsuid_set(struct inode *inode,
                                    struct user_namespace *mnt_userns)
   {
- -      inode->i_uid = mapped_fsuid(mnt_userns);
+ +      inode->i_uid = mapped_fsuid(mnt_userns, i_user_ns(inode));
   }
   
   /**
@@@ -1683,7 -1767,7 +1683,7 @@@
   static inline void inode_fsgid_set(struct inode *inode,
                                    struct user_namespace *mnt_userns)
   {
- -      inode->i_gid = mapped_fsgid(mnt_userns);
+ +      inode->i_gid = mapped_fsgid(mnt_userns, i_user_ns(inode));
   }
   
   /**
@@@ -1700,18 -1784,10 +1700,18 @@@
   static inline bool fsuidgid_has_mapping(struct super_block *sb,
                                         struct user_namespace *mnt_userns)
   {
- -      struct user_namespace *s_user_ns = sb->s_user_ns;
+ +      struct user_namespace *fs_userns = sb->s_user_ns;
+ +      kuid_t kuid;
+ +      kgid_t kgid;
   
- -      return kuid_has_mapping(s_user_ns, mapped_fsuid(mnt_userns)) &&
- -             kgid_has_mapping(s_user_ns, mapped_fsgid(mnt_userns));
+ +      kuid = mapped_fsuid(mnt_userns, fs_userns);
+ +      if (!uid_valid(kuid))
+ +              return false;
+ +      kgid = mapped_fsgid(mnt_userns, fs_userns);
+ +      if (!gid_valid(kgid))
+ +              return false;
+ +      return kuid_has_mapping(fs_userns, kuid) &&
+ +             kgid_has_mapping(fs_userns, kgid);
   }
   
   extern struct timespec64 current_time(struct inode *inode);
@@@ -2173,7 -2249,6 +2173,7 @@@ struct super_operations 
   #define S_ENCRYPTED   (1 << 14) /* Encrypted file (using fs/crypto/) */
   #define S_CASEFOLD    (1 << 15) /* Casefolded file */
   #define S_VERITY      (1 << 16) /* Verity file (using fs/verity/) */
+ +#define S_KERNEL_FILE (1 << 17) /* File is in use by the kernel (eg. fs/cachefiles) */
   
   /*
    * Note that nosuid etc flags are inode-specific: setting some file-system
@@@ -2343,8 -2418,6 +2343,8 @@@ static inline void kiocb_clone(struct k
    *                    Used to detect that mark_inode_dirty() should not move
    *                    inode between dirty lists.
    *
+ + * I_PINNING_FSCACHE_WB       Inode is pinning an fscache object for writeback.
+ + *
    * Q: What is the difference between I_WILL_FREE and I_FREEING?
    */
   #define I_DIRTY_SYNC          (1 << 0)
@@@ -2367,7 -2440,6 +2367,7 @@@
   #define I_CREATING            (1 << 15)
   #define I_DONTCACHE           (1 << 16)
   #define I_SYNC_QUEUED         (1 << 17)
+ +#define I_PINNING_FSCACHE_WB  (1 << 18)
   
   #define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC)
   #define I_DIRTY (I_DIRTY_INODE | I_DIRTY_PAGES)
@@@ -2652,21 -2724,6 +2652,21 @@@ static inline struct user_namespace *fi
   {
         return mnt_user_ns(file->f_path.mnt);
   }
+ +
+ +/**
+ + * is_idmapped_mnt - check whether a mount is mapped
+ + * @mnt: the mount to check
+ + *
+ + * If @mnt has an idmapping attached different from the
+ + * filesystem's idmapping then @mnt is mapped.
+ + *
+ + * Return: true if mount is mapped, false if not.
+ + */
+ +static inline bool is_idmapped_mnt(const struct vfsmount *mnt)
+ +{
+ +      return mnt_user_ns(mnt) != mnt->mnt_sb->s_user_ns;
+ +}
+ +
   extern long vfs_truncate(const struct path *, loff_t);
   int do_truncate(struct user_namespace *, struct dentry *, loff_t start,
                 unsigned int time_attrs, struct file *filp);
@@@ -2790,6 -2847,8 +2790,6 @@@ static inline int filemap_fdatawait(str
   
   extern bool filemap_range_has_page(struct address_space *, loff_t lstart,
                                   loff_t lend);
- -extern bool filemap_range_needs_writeback(struct address_space *,
- -                                        loff_t lstart, loff_t lend);
   extern int filemap_write_and_wait_range(struct address_space *mapping,
                                         loff_t lstart, loff_t lend);
   extern int __filemap_fdatawrite_range(struct address_space *mapping,
@@@ -3093,6 -3152,7 +3093,7 @@@ extern void unlock_new_inode(struct ino
   extern void discard_new_inode(struct inode *);
   extern unsigned int get_next_ino(void);
   extern void evict_inodes(struct super_block *sb);
+ void dump_mapping(const struct address_space *);
   
   /*
    * Userspace may rely on the the inode number being non-zero. For example, glibc
diff --combined include/linux/kasan.h

index fb78108d694e7f0d98c703445ed83d2538467fa0,89c99e5e67de57235b7aebd54a4ab8824d4b9ec7..4a45562d889372f4d36878840c312e9da0db73fe
--- 1/include/linux/kasan.h
--- 2/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@@ -9,7 -9,6 +9,7 @@@
   
   struct kmem_cache;
   struct page;
+ +struct slab;
   struct vm_struct;
   struct task_struct;
   
@@@ -194,11 -193,11 +194,11 @@@ static __always_inline size_t kasan_met
         return 0;
   }
   
- -void __kasan_poison_slab(struct page *page);
- -static __always_inline void kasan_poison_slab(struct page *page)
+ +void __kasan_poison_slab(struct slab *slab);
+ +static __always_inline void kasan_poison_slab(struct slab *slab)
   {
         if (kasan_enabled())
- -              __kasan_poison_slab(page);
+ +              __kasan_poison_slab(slab);
   }
   
   void __kasan_unpoison_object_data(struct kmem_cache *cache, void *object);
@@@ -323,7 -322,7 +323,7 @@@ static inline void kasan_cache_create(s
                                       slab_flags_t *flags) {}
   static inline void kasan_cache_create_kmalloc(struct kmem_cache *cache) {}
   static inline size_t kasan_metadata_size(struct kmem_cache *cache) { return 0; }
- -static inline void kasan_poison_slab(struct page *page) {}
+ +static inline void kasan_poison_slab(struct slab *slab) {}
   static inline void kasan_unpoison_object_data(struct kmem_cache *cache,
                                         void *object) {}
   static inline void kasan_poison_object_data(struct kmem_cache *cache,
@@@ -475,12 -474,12 +475,12 @@@ static inline void kasan_populate_early
    * allocations with real shadow memory. With KASAN vmalloc, the special
    * case is unnecessary, as the work is handled in the generic case.
    */
- int kasan_module_alloc(void *addr, size_t size);
+ int kasan_module_alloc(void *addr, size_t size, gfp_t gfp_mask);
   void kasan_free_shadow(const struct vm_struct *vm);
   
   #else /* (CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS) && !CONFIG_KASAN_VMALLOC */
   
- static inline int kasan_module_alloc(void *addr, size_t size) { return 0; }
+ static inline int kasan_module_alloc(void *addr, size_t size, gfp_t gfp_mask) { return 0; }
   static inline void kasan_free_shadow(const struct vm_struct *vm) {}
   
   #endif /* (CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS) && !CONFIG_KASAN_VMALLOC */
diff --combined include/linux/memcontrol.h

index e34112f6a36918e77131c562f1ae50315fe42bf3,0131e5574c8868520709795d17b35da35b550fa9..b72d75141e125b50b777df27baebdcd938292d8b
--- 1/include/linux/memcontrol.h
--- 2/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@@ -33,6 -33,7 +33,7 @@@ enum memcg_stat_item 
         MEMCG_SWAP = NR_VM_NODE_STAT_ITEMS,
         MEMCG_SOCK,
         MEMCG_PERCPU_B,
+       MEMCG_VMALLOC,
         MEMCG_NR_STAT,
   };
   
@@@ -42,6 -43,7 +43,7 @@@ enum memcg_memory_event 
         MEMCG_MAX,
         MEMCG_OOM,
         MEMCG_OOM_KILL,
+       MEMCG_OOM_GROUP_KILL,
         MEMCG_SWAP_HIGH,
         MEMCG_SWAP_MAX,
         MEMCG_SWAP_FAIL,
@@@ -536,6 -538,45 +538,6 @@@ static inline bool folio_memcg_kmem(str
         return folio->memcg_data & MEMCG_DATA_KMEM;
   }
   
- -/*
- - * page_objcgs - get the object cgroups vector associated with a page
- - * @page: a pointer to the page struct
- - *
- - * Returns a pointer to the object cgroups vector associated with the page,
- - * or NULL. This function assumes that the page is known to have an
- - * associated object cgroups vector. It's not safe to call this function
- - * against pages, which might have an associated memory cgroup: e.g.
- - * kernel stack pages.
- - */
- -static inline struct obj_cgroup **page_objcgs(struct page *page)
- -{
- -      unsigned long memcg_data = READ_ONCE(page->memcg_data);
- -
- -      VM_BUG_ON_PAGE(memcg_data && !(memcg_data & MEMCG_DATA_OBJCGS), page);
- -      VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_KMEM, page);
- -
- -      return (struct obj_cgroup **)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
- -}
- -
- -/*
- - * page_objcgs_check - get the object cgroups vector associated with a page
- - * @page: a pointer to the page struct
- - *
- - * Returns a pointer to the object cgroups vector associated with the page,
- - * or NULL. This function is safe to use if the page can be directly associated
- - * with a memory cgroup.
- - */
- -static inline struct obj_cgroup **page_objcgs_check(struct page *page)
- -{
- -      unsigned long memcg_data = READ_ONCE(page->memcg_data);
- -
- -      if (!memcg_data || !(memcg_data & MEMCG_DATA_OBJCGS))
- -              return NULL;
- -
- -      VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_KMEM, page);
- -
- -      return (struct obj_cgroup **)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
- -}
   
   #else
   static inline bool folio_memcg_kmem(struct folio *folio)
@@@ -543,6 -584,15 +545,6 @@@
         return false;
   }
   
- -static inline struct obj_cgroup **page_objcgs(struct page *page)
- -{
- -      return NULL;
- -}
- -
- -static inline struct obj_cgroup **page_objcgs_check(struct page *page)
- -{
- -      return NULL;
- -}
   #endif
   
   static inline bool PageMemcgKmem(struct page *page)
@@@ -943,6 -993,21 +945,21 @@@ static inline void mod_memcg_state(stru
         local_irq_restore(flags);
   }
   
+ static inline void mod_memcg_page_state(struct page *page,
+                                       int idx, int val)
+ {
+       struct mem_cgroup *memcg;
+ 
+       if (mem_cgroup_disabled())
+               return;
+ 
+       rcu_read_lock();
+       memcg = page_memcg(page);
+       if (memcg)
+               mod_memcg_state(memcg, idx, val);
+       rcu_read_unlock();
+ }
+ 
   static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx)
   {
         return READ_ONCE(memcg->vmstats.state[idx]);
@@@ -1398,6 -1463,11 +1415,11 @@@ static inline void mod_memcg_state(stru
   {
   }
   
+ static inline void mod_memcg_page_state(struct page *page,
+                                       int idx, int val)
+ {
+ }
+ 
   static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx)
   {
         return 0;
diff --combined include/linux/memremap.h

index a8bc588fe7aa8b2d0287484430ba9e79916b5971,61a6a0e27359d5307fb90fc9bd2afd1db006097d..1fafcc38acbad66229442a33699d7faa4a73dd58
--- 1/include/linux/memremap.h
--- 2/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@@ -72,6 -72,16 +72,6 @@@ struct dev_pagemap_ops 
          */
         void (*page_free)(struct page *page);
   
- -      /*
- -       * Transition the refcount in struct dev_pagemap to the dead state.
- -       */
- -      void (*kill)(struct dev_pagemap *pgmap);
- -
- -      /*
- -       * Wait for refcount in struct dev_pagemap to be idle and reap it.
- -       */
- -      void (*cleanup)(struct dev_pagemap *pgmap);
- -
         /*
          * Used for private (un-addressable) device memory only.  Must migrate
          * the page back to a CPU accessible page.
@@@ -85,9 -95,15 +85,14 @@@
    * struct dev_pagemap - metadata for ZONE_DEVICE mappings
    * @altmap: pre-allocated/reserved memory for vmemmap allocations
    * @ref: reference count that pins the devm_memremap_pages() mapping
- - * @internal_ref: internal reference if @ref is not provided by the caller
- - * @done: completion for @internal_ref
+ + * @done: completion for @ref
    * @type: memory type: see MEMORY_* in memory_hotplug.h
    * @flags: PGMAP_* flags to specify defailed behavior
+  * @vmemmap_shift: structural definition of how the vmemmap page metadata
+  *      is populated, specifically the metadata page order.
+  *    A zero value (default) uses base pages as the vmemmap metadata
+  *    representation. A bigger value will set up compound struct pages
+  *    of the requested order value.
    * @ops: method table
    * @owner: an opaque pointer identifying the entity that manages this
    *    instance.  Used by various helpers to make sure that no
@@@ -98,10 -114,12 +103,11 @@@
    */
   struct dev_pagemap {
         struct vmem_altmap altmap;
- -      struct percpu_ref *ref;
- -      struct percpu_ref internal_ref;
+ +      struct percpu_ref ref;
         struct completion done;
         enum memory_type type;
         unsigned int flags;
+       unsigned long vmemmap_shift;
         const struct dev_pagemap_ops *ops;
         void *owner;
         int nr_range;
@@@ -118,6 -136,11 +124,11 @@@ static inline struct vmem_altmap *pgmap
         return NULL;
   }
   
+ static inline unsigned long pgmap_vmemmap_nr(struct dev_pagemap *pgmap)
+ {
+       return 1 << pgmap->vmemmap_shift;
+ }
+ 
   #ifdef CONFIG_ZONE_DEVICE
   void *memremap_pages(struct dev_pagemap *pgmap, int nid);
   void memunmap_pages(struct dev_pagemap *pgmap);
@@@ -179,7 -202,7 +190,7 @@@ static inline unsigned long memremap_co
   static inline void put_dev_pagemap(struct dev_pagemap *pgmap)
   {
         if (pgmap)
- -              percpu_ref_put(pgmap->ref);
+ +              percpu_ref_put(&pgmap->ref);
   }
   
   #endif /* _LINUX_MEMREMAP_H_ */
diff --combined include/linux/mm.h

index c768a7c81b0b15f1318b5c1fb9b2bcbc93af66f1,d4fb49a5d60d8eb2f42f906c26aed3aa5c845a20..aa47705191bcdfa3342b8688a6dbcd6051944470
--- 1/include/linux/mm.h
--- 2/include/linux/mm.h
+++ b/include/linux/mm.h
@@@ -424,51 -424,6 +424,6 @@@ extern unsigned int kobjsize(const voi
    */
   extern pgprot_t protection_map[16];
   
- /**
-  * enum fault_flag - Fault flag definitions.
-  * @FAULT_FLAG_WRITE: Fault was a write fault.
-  * @FAULT_FLAG_MKWRITE: Fault was mkwrite of existing PTE.
-  * @FAULT_FLAG_ALLOW_RETRY: Allow to retry the fault if blocked.
-  * @FAULT_FLAG_RETRY_NOWAIT: Don't drop mmap_lock and wait when retrying.
-  * @FAULT_FLAG_KILLABLE: The fault task is in SIGKILL killable region.
-  * @FAULT_FLAG_TRIED: The fault has been tried once.
-  * @FAULT_FLAG_USER: The fault originated in userspace.
-  * @FAULT_FLAG_REMOTE: The fault is not for current task/mm.
-  * @FAULT_FLAG_INSTRUCTION: The fault was during an instruction fetch.
-  * @FAULT_FLAG_INTERRUPTIBLE: The fault can be interrupted by non-fatal signals.
-  *
-  * About @FAULT_FLAG_ALLOW_RETRY and @FAULT_FLAG_TRIED: we can specify
-  * whether we would allow page faults to retry by specifying these two
-  * fault flags correctly.  Currently there can be three legal combinations:
-  *
-  * (a) ALLOW_RETRY and !TRIED:  this means the page fault allows retry, and
-  *                              this is the first try
-  *
-  * (b) ALLOW_RETRY and TRIED:   this means the page fault allows retry, and
-  *                              we've already tried at least once
-  *
-  * (c) !ALLOW_RETRY and !TRIED: this means the page fault does not allow retry
-  *
-  * The unlisted combination (!ALLOW_RETRY && TRIED) is illegal and should never
-  * be used.  Note that page faults can be allowed to retry for multiple times,
-  * in which case we'll have an initial fault with flags (a) then later on
-  * continuous faults with flags (b).  We should always try to detect pending
-  * signals before a retry to make sure the continuous page faults can still be
-  * interrupted if necessary.
-  */
- enum fault_flag {
-       FAULT_FLAG_WRITE =              1 << 0,
-       FAULT_FLAG_MKWRITE =            1 << 1,
-       FAULT_FLAG_ALLOW_RETRY =        1 << 2,
-       FAULT_FLAG_RETRY_NOWAIT =       1 << 3,
-       FAULT_FLAG_KILLABLE =           1 << 4,
-       FAULT_FLAG_TRIED =              1 << 5,
-       FAULT_FLAG_USER =               1 << 6,
-       FAULT_FLAG_REMOTE =             1 << 7,
-       FAULT_FLAG_INSTRUCTION =        1 << 8,
-       FAULT_FLAG_INTERRUPTIBLE =      1 << 9,
- };
- 
   /*
    * The default fault flags that should be used by most of the
    * arch-specific page fault handlers.
@@@ -577,6 -532,10 +532,10 @@@ enum page_entry_size 
    */
   struct vm_operations_struct {
         void (*open)(struct vm_area_struct * area);
+       /**
+        * @close: Called when the VMA is being removed from the MM.
+        * Context: User context.  May sleep.  Caller holds mmap_lock.
+        */
         void (*close)(struct vm_area_struct * area);
         /* Called any time before splitting to check if it's allowed */
         int (*may_split)(struct vm_area_struct *area, unsigned long addr);
@@@ -714,27 -673,6 +673,27 @@@ int vma_is_stack_for_current(struct vm_
   struct mmu_gather;
   struct inode;
   
+ +static inline unsigned int compound_order(struct page *page)
+ +{
+ +      if (!PageHead(page))
+ +              return 0;
+ +      return page[1].compound_order;
+ +}
+ +
+ +/**
+ + * folio_order - The allocation order of a folio.
+ + * @folio: The folio.
+ + *
+ + * A folio is composed of 2^order pages.  See get_order() for the definition
+ + * of order.
+ + *
+ + * Return: The order of the folio.
+ + */
+ +static inline unsigned int folio_order(struct folio *folio)
+ +{
+ +      return compound_order(&folio->page);
+ +}
+ +
   #include <linux/huge_mm.h>
   
   /*
@@@ -861,19 -799,15 +820,15 @@@ static inline int page_mapcount(struct 
   
   #ifdef CONFIG_TRANSPARENT_HUGEPAGE
   int total_mapcount(struct page *page);
- int page_trans_huge_mapcount(struct page *page, int *total_mapcount);
+ int page_trans_huge_mapcount(struct page *page);
   #else
   static inline int total_mapcount(struct page *page)
   {
         return page_mapcount(page);
   }
- static inline int page_trans_huge_mapcount(struct page *page,
-                                          int *total_mapcount)
+ static inline int page_trans_huge_mapcount(struct page *page)
   {
-       int mapcount = page_mapcount(page);
-       if (total_mapcount)
-               *total_mapcount = mapcount;
-       return mapcount;
+       return page_mapcount(page);
   }
   #endif
   
@@@ -884,13 -818,6 +839,13 @@@ static inline struct page *virt_to_head
         return compound_head(page);
   }
   
+ +static inline struct folio *virt_to_folio(const void *x)
+ +{
+ +      struct page *page = virt_to_page(x);
+ +
+ +      return page_folio(page);
+ +}
+ +
   void __put_page(struct page *page);
   
   void put_pages_list(struct list_head *pages);
@@@ -934,6 -861,27 +889,6 @@@ static inline void destroy_compound_pag
         compound_page_dtors[page[1].compound_dtor](page);
   }
   
- -static inline unsigned int compound_order(struct page *page)
- -{
- -      if (!PageHead(page))
- -              return 0;
- -      return page[1].compound_order;
- -}
- -
- -/**
- - * folio_order - The allocation order of a folio.
- - * @folio: The folio.
- - *
- - * A folio is composed of 2^order pages.  See get_order() for the definition
- - * of order.
- - *
- - * Return: The order of the folio.
- - */
- -static inline unsigned int folio_order(struct folio *folio)
- -{
- -      return compound_order(&folio->page);
- -}
- -
   static inline bool hpage_pincount_available(struct page *page)
   {
         /*
@@@ -1760,11 -1708,6 +1715,11 @@@ void page_address_init(void)
   #define page_address_init()  do { } while(0)
   #endif
   
+ +static inline void *folio_address(const struct folio *folio)
+ +{
+ +      return page_address(&folio->page);
+ +}
+ +
   extern void *page_rmapping(struct page *page);
   extern struct anon_vma *page_anon_vma(struct page *page);
   extern pgoff_t __page_file_index(struct page *page);
@@@ -1837,6 -1780,28 +1792,6 @@@ static inline bool can_do_mlock(void) 
   extern int user_shm_lock(size_t, struct ucounts *);
   extern void user_shm_unlock(size_t, struct ucounts *);
   
- -/*
- - * Parameter block passed down to zap_pte_range in exceptional cases.
- - */
- -struct zap_details {
- -      struct address_space *zap_mapping;      /* Check page->mapping if set */
- -      struct page *single_page;               /* Locked page to be unmapped */
- -};
- -
- -/*
- - * We set details->zap_mappings when we want to unmap shared but keep private
- - * pages. Return true if skip zapping this page, false otherwise.
- - */
- -static inline bool
- -zap_skip_check_mapping(struct zap_details *details, struct page *page)
- -{
- -      if (!details || !page)
- -              return false;
- -
- -      return details->zap_mapping &&
- -          (details->zap_mapping != page_rmapping(page));
- -}
- -
   struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
                              pte_t pte);
   struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,
@@@ -1871,6 -1836,7 +1826,6 @@@ extern void truncate_pagecache(struct i
   extern void truncate_setsize(struct inode *inode, loff_t newsize);
   void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to);
   void truncate_pagecache_range(struct inode *inode, loff_t offset, loff_t end);
- -int truncate_inode_page(struct address_space *mapping, struct page *page);
   int generic_error_remove_page(struct address_space *mapping, struct page *page);
   int invalidate_inode_page(struct page *page);
   
@@@ -1881,6 -1847,7 +1836,6 @@@ extern vm_fault_t handle_mm_fault(struc
   extern int fixup_user_fault(struct mm_struct *mm,
                             unsigned long address, unsigned int fault_flags,
                             bool *unlocked);
- -void unmap_mapping_page(struct page *page);
   void unmap_mapping_pages(struct address_space *mapping,
                 pgoff_t start, pgoff_t nr, bool even_cows);
   void unmap_mapping_range(struct address_space *mapping,
@@@ -1901,6 -1868,7 +1856,6 @@@ static inline int fixup_user_fault(stru
         BUG();
         return -EFAULT;
   }
- -static inline void unmap_mapping_page(struct page *page) { }
   static inline void unmap_mapping_pages(struct address_space *mapping,
                 pgoff_t start, pgoff_t nr, bool even_cows) { }
   static inline void unmap_mapping_range(struct address_space *mapping,
@@@ -1957,6 -1925,7 +1912,6 @@@ int get_kernel_pages(const struct kvec 
                         struct page **pages);
   struct page *get_dump_page(unsigned long addr);
   
- -extern int try_to_release_page(struct page * page, gfp_t gfp_mask);
   extern void do_invalidatepage(struct page *page, unsigned int offset,
                               unsigned int length);
   
@@@ -2644,7 -2613,7 +2599,7 @@@ static inline int vma_adjust(struct vm_
   extern struct vm_area_struct *vma_merge(struct mm_struct *,
         struct vm_area_struct *prev, unsigned long addr, unsigned long end,
         unsigned long vm_flags, struct anon_vma *, struct file *, pgoff_t,
-       struct mempolicy *, struct vm_userfaultfd_ctx);
+       struct mempolicy *, struct vm_userfaultfd_ctx, const char *);
   extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *);
   extern int __split_vma(struct mm_struct *, struct vm_area_struct *,
         unsigned long addr, int new_below);
@@@ -3153,7 -3122,6 +3108,6 @@@ int drop_caches_sysctl_handler(struct c
   #endif
   
   void drop_slab(void);
- void drop_slab_node(int nid);
   
   #ifndef CONFIG_MMU
   #define randomize_va_space 0
@@@ -3206,6 -3174,7 +3160,7 @@@ enum mf_flags 
         MF_ACTION_REQUIRED = 1 << 1,
         MF_MUST_KILL = 1 << 2,
         MF_SOFT_OFFLINE = 1 << 3,
+       MF_UNPOISON = 1 << 4,
   };
   extern int memory_failure(unsigned long pfn, int flags);
   extern void memory_failure_queue(unsigned long pfn, int flags);
@@@ -3217,19 -3186,6 +3172,19 @@@ extern void shake_page(struct page *p)
   extern atomic_long_t num_poisoned_pages __read_mostly;
   extern int soft_offline_page(unsigned long pfn, int flags);
   
+ +#ifndef arch_memory_failure
+ +static inline int arch_memory_failure(unsigned long pfn, int flags)
+ +{
+ +      return -ENXIO;
+ +}
+ +#endif
+ +
+ +#ifndef arch_is_platform_page
+ +static inline bool arch_is_platform_page(u64 paddr)
+ +{
+ +      return false;
+ +}
+ +#endif
   
   /*
    * Error handlers for various types of pages.
@@@ -3246,7 -3202,6 +3201,6 @@@ enum mf_action_page_type 
         MF_MSG_KERNEL_HIGH_ORDER,
         MF_MSG_SLAB,
         MF_MSG_DIFFERENT_COMPOUND,
-       MF_MSG_POISONED_HUGE,
         MF_MSG_HUGE,
         MF_MSG_FREE_HUGE,
         MF_MSG_NON_PMD_HUGE,
@@@ -3261,7 -3216,6 +3215,6 @@@
         MF_MSG_CLEAN_LRU,
         MF_MSG_TRUNCATED_LRU,
         MF_MSG_BUDDY,
-       MF_MSG_BUDDY_2ND,
         MF_MSG_DAX,
         MF_MSG_UNSPLIT_THP,
         MF_MSG_UNKNOWN,
@@@ -3390,5 -3344,16 +3343,16 @@@ static inline int seal_check_future_wri
         return 0;
   }
   
+ #ifdef CONFIG_ANON_VMA_NAME
+ int madvise_set_anon_name(struct mm_struct *mm, unsigned long start,
+                         unsigned long len_in, const char *name);
+ #else
+ static inline int
+ madvise_set_anon_name(struct mm_struct *mm, unsigned long start,
+                     unsigned long len_in, const char *name) {
+       return 0;
+ }
+ #endif
+ 
   #endif /* __KERNEL__ */
   #endif /* _LINUX_MM_H */
diff --combined include/linux/mm_types.h

index 1ae3537c792072deecba06b445367fb3773d696f,e3b0476a4fdac82ec8b5a70d88370342ebd2f54b..3764c1b51b02d2b145827d4f190af2a4e5e917ab
--- 1/include/linux/mm_types.h
--- 2/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@@ -5,6 -5,7 +5,7 @@@
   #include <linux/mm_types_task.h>
   
   #include <linux/auxvec.h>
+ #include <linux/kref.h>
   #include <linux/list.h>
   #include <linux/spinlock.h>
   #include <linux/rbtree.h>
@@@ -56,11 -57,11 +57,11 @@@ struct mem_cgroup
    * in each subpage, but you may need to restore some of their values
    * afterwards.
    *
- - * SLUB uses cmpxchg_double() to atomically update its freelist and
- - * counters.  That requires that freelist & counters be adjacent and
- - * double-word aligned.  We align all struct pages to double-word
- - * boundaries, and ensure that 'freelist' is aligned within the
- - * struct.
+ + * SLUB uses cmpxchg_double() to atomically update its freelist and counters.
+ + * That requires that freelist & counters in struct slab be adjacent and
+ + * double-word aligned. Because struct slab currently just reinterprets the
+ + * bits of struct page, we align all struct pages to double-word boundaries,
+ + * and ensure that 'freelist' is aligned within struct slab.
    */
   #ifdef CONFIG_HAVE_ALIGNED_STRUCT_PAGE
   #define _struct_page_alignment        __aligned(2 * sizeof(unsigned long))
@@@ -386,6 -387,12 +387,12 @@@ struct vm_userfaultfd_ctx 
   struct vm_userfaultfd_ctx {};
   #endif /* CONFIG_USERFAULTFD */
   
+ struct anon_vma_name {
+       struct kref kref;
+       /* The name needs to be at the end because it is dynamically sized. */
+       char name[];
+ };
+ 
   /*
    * This struct describes a virtual memory area. There is one of these
    * per VM-area/task. A VM area is any part of the process virtual memory
@@@ -426,11 -433,19 +433,19 @@@ struct vm_area_struct 
         /*
          * For areas with an address space and backing store,
          * linkage into the address_space->i_mmap interval tree.
+        *
+        * For private anonymous mappings, a pointer to a null terminated string
+        * containing the name given to the vma, or NULL if unnamed.
          */
-       struct {
-               struct rb_node rb;
-               unsigned long rb_subtree_last;
-       } shared;
+ 
+       union {
+               struct {
+                       struct rb_node rb;
+                       unsigned long rb_subtree_last;
+               } shared;
+               /* Serialized by mmap_sem. */
+               struct anon_vma_name *anon_name;
+       };
   
         /*
          * A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma
@@@ -632,7 -647,7 +647,7 @@@ struct mm_struct 
                 atomic_t tlb_flush_pending;
   #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
                 /* See flush_tlb_batched_pending() */
-               bool tlb_flush_batched;
+               atomic_t tlb_flush_batched;
   #endif
                 struct uprobes_state uprobes_state;
   #ifdef CONFIG_PREEMPT_RT
@@@ -677,90 -692,6 +692,6 @@@ extern void tlb_gather_mmu(struct mmu_g
   extern void tlb_gather_mmu_fullmm(struct mmu_gather *tlb, struct mm_struct *mm);
   extern void tlb_finish_mmu(struct mmu_gather *tlb);
   
- static inline void init_tlb_flush_pending(struct mm_struct *mm)
- {
-       atomic_set(&mm->tlb_flush_pending, 0);
- }
- 
- static inline void inc_tlb_flush_pending(struct mm_struct *mm)
- {
-       atomic_inc(&mm->tlb_flush_pending);
-       /*
-        * The only time this value is relevant is when there are indeed pages
-        * to flush. And we'll only flush pages after changing them, which
-        * requires the PTL.
-        *
-        * So the ordering here is:
-        *
-        *      atomic_inc(&mm->tlb_flush_pending);
-        *      spin_lock(&ptl);
-        *      ...
-        *      set_pte_at();
-        *      spin_unlock(&ptl);
-        *
-        *                              spin_lock(&ptl)
-        *                              mm_tlb_flush_pending();
-        *                              ....
-        *                              spin_unlock(&ptl);
-        *
-        *      flush_tlb_range();
-        *      atomic_dec(&mm->tlb_flush_pending);
-        *
-        * Where the increment if constrained by the PTL unlock, it thus
-        * ensures that the increment is visible if the PTE modification is
-        * visible. After all, if there is no PTE modification, nobody cares
-        * about TLB flushes either.
-        *
-        * This very much relies on users (mm_tlb_flush_pending() and
-        * mm_tlb_flush_nested()) only caring about _specific_ PTEs (and
-        * therefore specific PTLs), because with SPLIT_PTE_PTLOCKS and RCpc
-        * locks (PPC) the unlock of one doesn't order against the lock of
-        * another PTL.
-        *
-        * The decrement is ordered by the flush_tlb_range(), such that
-        * mm_tlb_flush_pending() will not return false unless all flushes have
-        * completed.
-        */
- }
- 
- static inline void dec_tlb_flush_pending(struct mm_struct *mm)
- {
-       /*
-        * See inc_tlb_flush_pending().
-        *
-        * This cannot be smp_mb__before_atomic() because smp_mb() simply does
-        * not order against TLB invalidate completion, which is what we need.
-        *
-        * Therefore we must rely on tlb_flush_*() to guarantee order.
-        */
-       atomic_dec(&mm->tlb_flush_pending);
- }
- 
- static inline bool mm_tlb_flush_pending(struct mm_struct *mm)
- {
-       /*
-        * Must be called after having acquired the PTL; orders against that
-        * PTLs release and therefore ensures that if we observe the modified
-        * PTE we must also observe the increment from inc_tlb_flush_pending().
-        *
-        * That is, it only guarantees to return true if there is a flush
-        * pending for _this_ PTL.
-        */
-       return atomic_read(&mm->tlb_flush_pending);
- }
- 
- static inline bool mm_tlb_flush_nested(struct mm_struct *mm)
- {
-       /*
-        * Similar to mm_tlb_flush_pending(), we must have acquired the PTL
-        * for which there is a TLB flush pending in order to guarantee
-        * we've seen both that PTE modification and the increment.
-        *
-        * (no requirement on actually still holding the PTL, that is irrelevant)
-        */
-       return atomic_read(&mm->tlb_flush_pending) > 1;
- }
- 
   struct vm_fault;
   
   /**
@@@ -875,4 -806,49 +806,49 @@@ typedef struct 
         unsigned long val;
   } swp_entry_t;
   
+ /**
+  * enum fault_flag - Fault flag definitions.
+  * @FAULT_FLAG_WRITE: Fault was a write fault.
+  * @FAULT_FLAG_MKWRITE: Fault was mkwrite of existing PTE.
+  * @FAULT_FLAG_ALLOW_RETRY: Allow to retry the fault if blocked.
+  * @FAULT_FLAG_RETRY_NOWAIT: Don't drop mmap_lock and wait when retrying.
+  * @FAULT_FLAG_KILLABLE: The fault task is in SIGKILL killable region.
+  * @FAULT_FLAG_TRIED: The fault has been tried once.
+  * @FAULT_FLAG_USER: The fault originated in userspace.
+  * @FAULT_FLAG_REMOTE: The fault is not for current task/mm.
+  * @FAULT_FLAG_INSTRUCTION: The fault was during an instruction fetch.
+  * @FAULT_FLAG_INTERRUPTIBLE: The fault can be interrupted by non-fatal signals.
+  *
+  * About @FAULT_FLAG_ALLOW_RETRY and @FAULT_FLAG_TRIED: we can specify
+  * whether we would allow page faults to retry by specifying these two
+  * fault flags correctly.  Currently there can be three legal combinations:
+  *
+  * (a) ALLOW_RETRY and !TRIED:  this means the page fault allows retry, and
+  *                              this is the first try
+  *
+  * (b) ALLOW_RETRY and TRIED:   this means the page fault allows retry, and
+  *                              we've already tried at least once
+  *
+  * (c) !ALLOW_RETRY and !TRIED: this means the page fault does not allow retry
+  *
+  * The unlisted combination (!ALLOW_RETRY && TRIED) is illegal and should never
+  * be used.  Note that page faults can be allowed to retry for multiple times,
+  * in which case we'll have an initial fault with flags (a) then later on
+  * continuous faults with flags (b).  We should always try to detect pending
+  * signals before a retry to make sure the continuous page faults can still be
+  * interrupted if necessary.
+  */
+ enum fault_flag {
+       FAULT_FLAG_WRITE =              1 << 0,
+       FAULT_FLAG_MKWRITE =            1 << 1,
+       FAULT_FLAG_ALLOW_RETRY =        1 << 2,
+       FAULT_FLAG_RETRY_NOWAIT =       1 << 3,
+       FAULT_FLAG_KILLABLE =           1 << 4,
+       FAULT_FLAG_TRIED =              1 << 5,
+       FAULT_FLAG_USER =               1 << 6,
+       FAULT_FLAG_REMOTE =             1 << 7,
+       FAULT_FLAG_INSTRUCTION =        1 << 8,
+       FAULT_FLAG_INTERRUPTIBLE =      1 << 9,
+ };
+ 
   #endif /* _LINUX_MM_TYPES_H */
diff --combined include/linux/page-flags.h

index b3d353d537e2917365de7811d8827f2bb2a43d3e,7e2b90dc7d3fc6f7815e2f2d109705aa64d906ef..1294210024434a6c66ffeb3cf5d34acd9fbfec16
--- 1/include/linux/page-flags.h
--- 2/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@@ -68,6 -68,9 +68,6 @@@
    * might lose their PG_swapbacked flag when they simply can be dropped (e.g. as
    * a result of MADV_FREE).
    *
- - * PG_uptodate tells whether the page's contents is valid.  When a read
- - * completes, the page becomes uptodate, unless a disk I/O error happened.
- - *
    * PG_referenced, PG_reclaim are used for page reclaim for anonymous and
    * file-backed pagecache (see mm/vmscan.c).
    *
@@@ -380,7 -383,7 +380,7 @@@ static __always_inline int TestClearPag
         TESTCLEARFLAG(uname, lname, policy)
   
   #define TESTPAGEFLAG_FALSE(uname, lname)                              \
- static inline bool folio_test_##lname(const struct folio *folio) { return 0; } \
+ static inline bool folio_test_##lname(const struct folio *folio) { return false; } \
   static inline int Page##uname(const struct page *page) { return 0; }
   
   #define SETPAGEFLAG_NOOP(uname, lname)                                        \
@@@ -519,7 -522,11 +519,11 @@@ PAGEFLAG_FALSE(Uncached, uncached
   PAGEFLAG(HWPoison, hwpoison, PF_ANY)
   TESTSCFLAG(HWPoison, hwpoison, PF_ANY)
   #define __PG_HWPOISON (1UL << PG_hwpoison)
+ #define MAGIC_HWPOISON        0x48575053U     /* HWPS */
+ extern void SetPageHWPoisonTakenOff(struct page *page);
+ extern void ClearPageHWPoisonTakenOff(struct page *page);
   extern bool take_page_off_buddy(struct page *page);
+ extern bool put_page_back_buddy(struct page *page);
   #else
   PAGEFLAG_FALSE(HWPoison, hwpoison)
   #define __PG_HWPOISON 0
@@@ -612,16 -619,6 +616,16 @@@ TESTPAGEFLAG_FALSE(Ksm, ksm
   
   u64 stable_page_flags(struct page *page);
   
+ +/**
+ + * folio_test_uptodate - Is this folio up to date?
+ + * @folio: The folio.
+ + *
+ + * The uptodate flag is set on a folio when every byte in the folio is
+ + * at least as new as the corresponding bytes on storage.  Anonymous
+ + * and CoW folios are always uptodate.  If the folio is not uptodate,
+ + * some of the bytes in it may be; see the is_partially_uptodate()
+ + * address_space operation.
+ + */
   static inline bool folio_test_uptodate(struct folio *folio)
   {
         bool ret = test_bit(PG_uptodate, folio_flags(folio, 0));
diff --combined kernel/fork.c

index 3161d7980155e08ead8bf4c401ae8e5ec4d08a21,75737e566441215f4ac7808d598a7fe6346e6acb..1c989cc4208a3b19a9728c297e4f9f5896fb9168
--- 1/kernel/fork.c
--- 2/kernel/fork.c
+++ b/kernel/fork.c
@@@ -42,6 -42,7 +42,7 @@@
   #include <linux/mmu_notifier.h>
   #include <linux/fs.h>
   #include <linux/mm.h>
+ #include <linux/mm_inline.h>
   #include <linux/vmacache.h>
   #include <linux/nsproxy.h>
   #include <linux/capability.h>
@@@ -365,12 -366,14 +366,14 @@@ struct vm_area_struct *vm_area_dup(stru
                 *new = data_race(*orig);
                 INIT_LIST_HEAD(&new->anon_vma_chain);
                 new->vm_next = new->vm_prev = NULL;
+               dup_vma_anon_name(orig, new);
         }
         return new;
   }
   
   void vm_area_free(struct vm_area_struct *vma)
   {
+       free_vma_anon_name(vma);
         kmem_cache_free(vm_area_cachep, vma);
   }
   
@@@ -1556,6 -1559,32 +1559,6 @@@ out
         return error;
   }
   
- -static int copy_io(unsigned long clone_flags, struct task_struct *tsk)
- -{
- -#ifdef CONFIG_BLOCK
- -      struct io_context *ioc = current->io_context;
- -      struct io_context *new_ioc;
- -
- -      if (!ioc)
- -              return 0;
- -      /*
- -       * Share io context with parent, if CLONE_IO is set
- -       */
- -      if (clone_flags & CLONE_IO) {
- -              ioc_task_link(ioc);
- -              tsk->io_context = ioc;
- -      } else if (ioprio_valid(ioc->ioprio)) {
- -              new_ioc = get_task_io_context(tsk, GFP_KERNEL, NUMA_NO_NODE);
- -              if (unlikely(!new_ioc))
- -                      return -ENOMEM;
- -
- -              new_ioc->ioprio = ioc->ioprio;
- -              put_io_context(new_ioc);
- -      }
- -#endif
- -      return 0;
- -}
- -
   static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk)
   {
         struct sighand_struct *sig;
diff --combined kernel/rcu/rcutorture.c

index 33ea446101b30e390095e3099a895799b8e4df6a,42bc66a2f170820d976de1fa59d1bd588221d54e..422f7e4cc08de898f711b5e82e331f10aafd6cad
--- 1/kernel/rcu/rcutorture.c
--- 2/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@@ -46,7 -46,6 +46,7 @@@
   #include <linux/oom.h>
   #include <linux/tick.h>
   #include <linux/rcupdate_trace.h>
+ +#include <linux/nmi.h>
   
   #include "rcu.h"
   
@@@ -54,18 -53,15 +54,18 @@@ MODULE_LICENSE("GPL")
   MODULE_AUTHOR("Paul E. McKenney <[email protected]> and Josh Triplett <[email protected]>");
   
   /* Bits for ->extendables field, extendables param, and related definitions. */
- -#define RCUTORTURE_RDR_SHIFT   8      /* Put SRCU index in upper bits. */
- -#define RCUTORTURE_RDR_MASK    ((1 << RCUTORTURE_RDR_SHIFT) - 1)
+ +#define RCUTORTURE_RDR_SHIFT_1         8      /* Put SRCU index in upper bits. */
+ +#define RCUTORTURE_RDR_MASK_1  (1 << RCUTORTURE_RDR_SHIFT_1)
+ +#define RCUTORTURE_RDR_SHIFT_2         9      /* Put SRCU index in upper bits. */
+ +#define RCUTORTURE_RDR_MASK_2  (1 << RCUTORTURE_RDR_SHIFT_2)
   #define RCUTORTURE_RDR_BH      0x01   /* Extend readers by disabling bh. */
   #define RCUTORTURE_RDR_IRQ     0x02   /*  ... disabling interrupts. */
   #define RCUTORTURE_RDR_PREEMPT         0x04   /*  ... disabling preemption. */
   #define RCUTORTURE_RDR_RBH     0x08   /*  ... rcu_read_lock_bh(). */
   #define RCUTORTURE_RDR_SCHED   0x10   /*  ... rcu_read_lock_sched(). */
- -#define RCUTORTURE_RDR_RCU     0x20   /*  ... entering another RCU reader. */
- -#define RCUTORTURE_RDR_NBITS   6      /* Number of bits defined above. */
+ +#define RCUTORTURE_RDR_RCU_1   0x20   /*  ... entering another RCU reader. */
+ +#define RCUTORTURE_RDR_RCU_2   0x40   /*  ... entering another RCU reader. */
+ +#define RCUTORTURE_RDR_NBITS   7      /* Number of bits defined above. */
   #define RCUTORTURE_MAX_EXTEND  \
         (RCUTORTURE_RDR_BH | RCUTORTURE_RDR_IRQ | RCUTORTURE_RDR_PREEMPT | \
          RCUTORTURE_RDR_RBH | RCUTORTURE_RDR_SCHED)
@@@ -79,7 -75,7 +79,7 @@@ torture_param(int, fqs_duration, 0
               "Duration of fqs bursts (us), 0 to disable");
   torture_param(int, fqs_holdoff, 0, "Holdoff time within fqs bursts (us)");
   torture_param(int, fqs_stutter, 3, "Wait time between fqs bursts (s)");
- -torture_param(bool, fwd_progress, 1, "Test grace-period forward progress");
+ +torture_param(int, fwd_progress, 1, "Test grace-period forward progress");
   torture_param(int, fwd_progress_div, 4, "Fraction of CPU stall to wait");
   torture_param(int, fwd_progress_holdoff, 60,
               "Time between forward-progress tests (s)");
@@@ -113,8 -109,6 +113,8 @@@ torture_param(int, shutdown_secs, 0, "S
   torture_param(int, stall_cpu, 0, "Stall duration (s), zero to disable.");
   torture_param(int, stall_cpu_holdoff, 10,
              "Time to wait before starting stall (s).");
+ +torture_param(bool, stall_no_softlockup, false,
+ +           "Avoid softlockup warning during cpu stall.");
   torture_param(int, stall_cpu_irqsoff, 0, "Disable interrupts while stalling.");
   torture_param(int, stall_cpu_block, 0, "Sleep while stalling.");
   torture_param(int, stall_gp_kthread, 0,
@@@ -146,7 -140,7 +146,7 @@@ static struct task_struct *stats_task
   static struct task_struct *fqs_task;
   static struct task_struct *boost_tasks[NR_CPUS];
   static struct task_struct *stall_task;
- -static struct task_struct *fwd_prog_task;
+ +static struct task_struct **fwd_prog_tasks;
   static struct task_struct **barrier_cbs_tasks;
   static struct task_struct *barrier_task;
   static struct task_struct *read_exit_task;
@@@ -348,12 -342,10 +348,12 @@@ struct rcu_torture_ops 
         void (*gp_kthread_dbg)(void);
         bool (*check_boost_failed)(unsigned long gp_state, int *cpup);
         int (*stall_dur)(void);
+ +      long cbflood_max;
         int irq_capable;
         int can_boost;
         int extendables;
         int slow_gps;
+ +      int no_pi_lock;
         const char *name;
   };
   
@@@ -675,7 -667,6 +675,7 @@@ static struct rcu_torture_ops srcu_ops 
         .cb_barrier     = srcu_torture_barrier,
         .stats          = srcu_torture_stats,
         .irq_capable    = 1,
+ +      .no_pi_lock     = IS_ENABLED(CONFIG_TINY_SRCU),
         .name           = "srcu"
   };
   
@@@ -709,7 -700,6 +709,7 @@@ static struct rcu_torture_ops srcud_op
         .cb_barrier     = srcu_torture_barrier,
         .stats          = srcu_torture_stats,
         .irq_capable    = 1,
+ +      .no_pi_lock     = IS_ENABLED(CONFIG_TINY_SRCU),
         .name           = "srcud"
   };
   
@@@ -730,7 -720,6 +730,7 @@@ static struct rcu_torture_ops busted_sr
         .cb_barrier     = srcu_torture_barrier,
         .stats          = srcu_torture_stats,
         .irq_capable    = 1,
+ +      .no_pi_lock     = IS_ENABLED(CONFIG_TINY_SRCU),
         .extendables    = RCUTORTURE_MAX_EXTEND,
         .name           = "busted_srcud"
   };
@@@ -842,7 -831,6 +842,7 @@@ static struct rcu_torture_ops tasks_rud
         .call           = call_rcu_tasks_rude,
         .cb_barrier     = rcu_barrier_tasks_rude,
         .gp_kthread_dbg = show_rcu_tasks_rude_gp_kthread,
+ +      .cbflood_max    = 50000,
         .fqs            = NULL,
         .stats          = NULL,
         .irq_capable    = 1,
@@@ -883,7 -871,6 +883,7 @@@ static struct rcu_torture_ops tasks_tra
         .call           = call_rcu_tasks_trace,
         .cb_barrier     = rcu_barrier_tasks_trace,
         .gp_kthread_dbg = show_rcu_tasks_trace_gp_kthread,
+ +      .cbflood_max    = 50000,
         .fqs            = NULL,
         .stats          = NULL,
         .irq_capable    = 1,
@@@ -1433,15 -1420,13 +1433,15 @@@ static void rcutorture_one_extend(int *
                                   struct rt_read_seg *rtrsp)
   {
         unsigned long flags;
- -      int idxnew = -1;
- -      int idxold = *readstate;
+ +      int idxnew1 = -1;
+ +      int idxnew2 = -1;
+ +      int idxold1 = *readstate;
+ +      int idxold2 = idxold1;
         int statesnew = ~*readstate & newstate;
         int statesold = *readstate & ~newstate;
   
- -      WARN_ON_ONCE(idxold < 0);
- -      WARN_ON_ONCE((idxold >> RCUTORTURE_RDR_SHIFT) > 1);
+ +      WARN_ON_ONCE(idxold2 < 0);
+ +      WARN_ON_ONCE((idxold2 >> RCUTORTURE_RDR_SHIFT_2) > 1);
         rtrsp->rt_readstate = newstate;
   
         /* First, put new protection in place to avoid critical-section gap. */
@@@ -1455,10 -1440,8 +1455,10 @@@
                 preempt_disable();
         if (statesnew & RCUTORTURE_RDR_SCHED)
                 rcu_read_lock_sched();
- -      if (statesnew & RCUTORTURE_RDR_RCU)
- -              idxnew = cur_ops->readlock() << RCUTORTURE_RDR_SHIFT;
+ +      if (statesnew & RCUTORTURE_RDR_RCU_1)
+ +              idxnew1 = (cur_ops->readlock() & 0x1) << RCUTORTURE_RDR_SHIFT_1;
+ +      if (statesnew & RCUTORTURE_RDR_RCU_2)
+ +              idxnew2 = (cur_ops->readlock() & 0x1) << RCUTORTURE_RDR_SHIFT_2;
   
         /*
          * Next, remove old protection, in decreasing order of strength
@@@ -1477,20 -1460,12 +1477,20 @@@
                 local_bh_enable();
         if (statesold & RCUTORTURE_RDR_RBH)
                 rcu_read_unlock_bh();
- -      if (statesold & RCUTORTURE_RDR_RCU) {
- -              bool lockit = !statesnew && !(torture_random(trsp) & 0xffff);
+ +      if (statesold & RCUTORTURE_RDR_RCU_2) {
+ +              cur_ops->readunlock((idxold2 >> RCUTORTURE_RDR_SHIFT_2) & 0x1);
+ +              WARN_ON_ONCE(idxnew2 != -1);
+ +              idxold2 = 0;
+ +      }
+ +      if (statesold & RCUTORTURE_RDR_RCU_1) {
+ +              bool lockit;
   
+ +              lockit = !cur_ops->no_pi_lock && !statesnew && !(torture_random(trsp) & 0xffff);
                 if (lockit)
                         raw_spin_lock_irqsave(&current->pi_lock, flags);
- -              cur_ops->readunlock(idxold >> RCUTORTURE_RDR_SHIFT);
+ +              cur_ops->readunlock((idxold1 >> RCUTORTURE_RDR_SHIFT_1) & 0x1);
+ +              WARN_ON_ONCE(idxnew1 != -1);
+ +              idxold1 = 0;
                 if (lockit)
                         raw_spin_unlock_irqrestore(&current->pi_lock, flags);
         }
@@@ -1500,19 -1475,13 +1500,19 @@@
                 cur_ops->read_delay(trsp, rtrsp);
   
         /* Update the reader state. */
- -      if (idxnew == -1)
- -              idxnew = idxold & ~RCUTORTURE_RDR_MASK;
- -      WARN_ON_ONCE(idxnew < 0);
- -      WARN_ON_ONCE((idxnew >> RCUTORTURE_RDR_SHIFT) > 1);
- -      *readstate = idxnew | newstate;
- -      WARN_ON_ONCE((*readstate >> RCUTORTURE_RDR_SHIFT) < 0);
- -      WARN_ON_ONCE((*readstate >> RCUTORTURE_RDR_SHIFT) > 1);
+ +      if (idxnew1 == -1)
+ +              idxnew1 = idxold1 & RCUTORTURE_RDR_MASK_1;
+ +      WARN_ON_ONCE(idxnew1 < 0);
+ +      if (WARN_ON_ONCE((idxnew1 >> RCUTORTURE_RDR_SHIFT_1) > 1))
+ +              pr_info("Unexpected idxnew1 value of %#x\n", idxnew1);
+ +      if (idxnew2 == -1)
+ +              idxnew2 = idxold2 & RCUTORTURE_RDR_MASK_2;
+ +      WARN_ON_ONCE(idxnew2 < 0);
+ +      WARN_ON_ONCE((idxnew2 >> RCUTORTURE_RDR_SHIFT_2) > 1);
+ +      *readstate = idxnew1 | idxnew2 | newstate;
+ +      WARN_ON_ONCE(*readstate < 0);
+ +      if (WARN_ON_ONCE((*readstate >> RCUTORTURE_RDR_SHIFT_2) > 1))
+ +              pr_info("Unexpected idxnew2 value of %#x\n", idxnew2);
   }
   
   /* Return the biggest extendables mask given current RCU and boot parameters. */
@@@ -1522,7 -1491,7 +1522,7 @@@ static int rcutorture_extend_mask_max(v
   
         WARN_ON_ONCE(extendables & ~RCUTORTURE_MAX_EXTEND);
         mask = extendables & RCUTORTURE_MAX_EXTEND & cur_ops->extendables;
- -      mask = mask | RCUTORTURE_RDR_RCU;
+ +      mask = mask | RCUTORTURE_RDR_RCU_1 | RCUTORTURE_RDR_RCU_2;
         return mask;
   }
   
@@@ -1537,21 -1506,13 +1537,21 @@@ rcutorture_extend_mask(int oldmask, str
         unsigned long preempts_irq = preempts | RCUTORTURE_RDR_IRQ;
         unsigned long bhs = RCUTORTURE_RDR_BH | RCUTORTURE_RDR_RBH;
   
- -      WARN_ON_ONCE(mask >> RCUTORTURE_RDR_SHIFT);
+ +      WARN_ON_ONCE(mask >> RCUTORTURE_RDR_SHIFT_1);
         /* Mostly only one bit (need preemption!), sometimes lots of bits. */
         if (!(randmask1 & 0x7))
                 mask = mask & randmask2;
         else
                 mask = mask & (1 << (randmask2 % RCUTORTURE_RDR_NBITS));
   
+ +      // Can't have nested RCU reader without outer RCU reader.
+ +      if (!(mask & RCUTORTURE_RDR_RCU_1) && (mask & RCUTORTURE_RDR_RCU_2)) {
+ +              if (oldmask & RCUTORTURE_RDR_RCU_1)
+ +                      mask &= ~RCUTORTURE_RDR_RCU_2;
+ +              else
+ +                      mask |= RCUTORTURE_RDR_RCU_1;
+ +      }
+ +
         /*
          * Can't enable bh w/irq disabled.
          */
@@@ -1571,7 -1532,7 +1571,7 @@@
                         mask |= oldmask & bhs;
         }
   
- -      return mask ?: RCUTORTURE_RDR_RCU;
+ +      return mask ?: RCUTORTURE_RDR_RCU_1;
   }
   
   /*
@@@ -1665,7 -1626,7 +1665,7 @@@ static bool rcu_torture_one_read(struc
                           rcu_torture_writer_state,
                           cookie, cur_ops->get_gp_state());
         rcutorture_one_extend(&readstate, 0, trsp, rtrsp);
- -      WARN_ON_ONCE(readstate & RCUTORTURE_RDR_MASK);
+ +      WARN_ON_ONCE(readstate);
         // This next splat is expected behavior if leakpointer, especially
         // for CONFIG_RCU_STRICT_GRACE_PERIOD=y kernels.
         WARN_ON_ONCE(leakpointer && READ_ONCE(p->rtort_pipe_count) > 1);
@@@ -2031,9 -1992,8 +2031,8 @@@ static int rcutorture_booster_init(unsi
         mutex_lock(&boost_mutex);
         rcu_torture_disable_rt_throttle();
         VERBOSE_TOROUT_STRING("Creating rcu_torture_boost task");
-       boost_tasks[cpu] = kthread_create_on_node(rcu_torture_boost, NULL,
-                                                 cpu_to_node(cpu),
-                                                 "rcu_torture_boost");
+       boost_tasks[cpu] = kthread_run_on_cpu(rcu_torture_boost, NULL,
+                                             cpu, "rcu_torture_boost_%u");
         if (IS_ERR(boost_tasks[cpu])) {
                 retval = PTR_ERR(boost_tasks[cpu]);
                 VERBOSE_TOROUT_STRING("rcu_torture_boost task create failed");
@@@ -2042,8 -2002,6 +2041,6 @@@
                 mutex_unlock(&boost_mutex);
                 return retval;
         }
-       kthread_bind(boost_tasks[cpu], cpu);
-       wake_up_process(boost_tasks[cpu]);
         mutex_unlock(&boost_mutex);
         return 0;
   }
@@@ -2091,8 -2049,6 +2088,8 @@@ static int rcu_torture_stall(void *args
   #else
                                 schedule_timeout_uninterruptible(HZ);
   #endif
+ +                      } else if (stall_no_softlockup) {
+ +                              touch_softlockup_watchdog();
                         }
                 if (stall_cpu_irqsoff)
                         local_irq_enable();
@@@ -2164,13 -2120,10 +2161,13 @@@ struct rcu_fwd 
         unsigned long rcu_fwd_startat;
         struct rcu_launder_hist n_launders_hist[N_LAUNDERS_HIST];
         unsigned long rcu_launder_gp_seq_start;
+ +      int rcu_fwd_id;
   };
   
   static DEFINE_MUTEX(rcu_fwd_mutex);
   static struct rcu_fwd *rcu_fwds;
+ +static unsigned long rcu_fwd_seq;
+ +static atomic_long_t rcu_fwd_max_cbs;
   static bool rcu_fwd_emergency_stop;
   
   static void rcu_torture_fwd_cb_hist(struct rcu_fwd *rfp)
@@@ -2183,9 -2136,8 +2180,9 @@@
         for (i = ARRAY_SIZE(rfp->n_launders_hist) - 1; i > 0; i--)
                 if (rfp->n_launders_hist[i].n_launders > 0)
                         break;
- -      pr_alert("%s: Callback-invocation histogram (duration %lu jiffies):",
- -               __func__, jiffies - rfp->rcu_fwd_startat);
+ +      mutex_lock(&rcu_fwd_mutex); // Serialize histograms.
+ +      pr_alert("%s: Callback-invocation histogram %d (duration %lu jiffies):",
+ +               __func__, rfp->rcu_fwd_id, jiffies - rfp->rcu_fwd_startat);
         gps_old = rfp->rcu_launder_gp_seq_start;
         for (j = 0; j <= i; j++) {
                 gps = rfp->n_launders_hist[j].launder_gp_seq;
@@@ -2196,7 -2148,6 +2193,7 @@@
                 gps_old = gps;
         }
         pr_cont("\n");
+ +      mutex_unlock(&rcu_fwd_mutex);
   }
   
   /* Callback function for continuous-flood RCU callbacks. */
@@@ -2322,8 -2273,7 +2319,8 @@@ static void rcu_torture_fwd_prog_nr(str
                 cver = READ_ONCE(rcu_torture_current_version) - cver;
                 gps = rcutorture_seq_diff(cur_ops->get_gp_seq(), gps);
                 WARN_ON(!cver && gps < 2);
- -              pr_alert("%s: Duration %ld cver %ld gps %ld\n", __func__, dur, cver, gps);
+ +              pr_alert("%s: %d Duration %ld cver %ld gps %ld\n", __func__,
+ +                       rfp->rcu_fwd_id, dur, cver, gps);
         }
         if (selfpropcb) {
                 WRITE_ONCE(fcs.stop, 1);
@@@ -2391,7 -2341,7 +2388,7 @@@ static void rcu_torture_fwd_prog_cr(str
                         rfp->rcu_fwd_cb_head = rfcpn;
                         n_launders++;
                         n_launders_sa++;
- -              } else {
+ +              } else if (!cur_ops->cbflood_max || cur_ops->cbflood_max > n_max_cbs) {
                         rfcp = kmalloc(sizeof(*rfcp), GFP_KERNEL);
                         if (WARN_ON_ONCE(!rfcp)) {
                                 schedule_timeout_interruptible(1);
@@@ -2401,11 -2351,8 +2398,11 @@@
                         n_launders_sa = 0;
                         rfcp->rfc_gps = 0;
                         rfcp->rfc_rfp = rfp;
+ +              } else {
+ +                      rfcp = NULL;
                 }
- -              cur_ops->call(&rfcp->rh, rcu_torture_fwd_cb_cr);
+ +              if (rfcp)
+ +                      cur_ops->call(&rfcp->rh, rcu_torture_fwd_cb_cr);
                 rcu_torture_fwd_prog_cond_resched(n_launders + n_max_cbs);
                 if (tick_nohz_full_enabled()) {
                         local_irq_save(flags);
@@@ -2429,7 -2376,6 +2426,7 @@@
                          n_launders + n_max_cbs - n_launders_cb_snap,
                          n_launders, n_launders_sa,
                          n_max_gps, n_max_cbs, cver, gps);
+ +              atomic_long_add(n_max_cbs, &rcu_fwd_max_cbs);
                 rcu_torture_fwd_cb_hist(rfp);
         }
         schedule_timeout_uninterruptible(HZ); /* Let CBs drain. */
@@@ -2445,8 -2391,6 +2442,8 @@@
   static int rcutorture_oom_notify(struct notifier_block *self,
                                  unsigned long notused, void *nfreed)
   {
+ +      int i;
+ +      long ncbs;
         struct rcu_fwd *rfp;
   
         mutex_lock(&rcu_fwd_mutex);
@@@ -2457,26 -2401,18 +2454,26 @@@
         }
         WARN(1, "%s invoked upon OOM during forward-progress testing.\n",
              __func__);
- -      rcu_torture_fwd_cb_hist(rfp);
- -      rcu_fwd_progress_check(1 + (jiffies - READ_ONCE(rfp->rcu_fwd_startat)) / 2);
+ +      for (i = 0; i < fwd_progress; i++) {
+ +              rcu_torture_fwd_cb_hist(&rfp[i]);
+ +              rcu_fwd_progress_check(1 + (jiffies - READ_ONCE(rfp[i].rcu_fwd_startat)) / 2);
+ +      }
         WRITE_ONCE(rcu_fwd_emergency_stop, true);
         smp_mb(); /* Emergency stop before free and wait to avoid hangs. */
- -      pr_info("%s: Freed %lu RCU callbacks.\n",
- -              __func__, rcu_torture_fwd_prog_cbfree(rfp));
+ +      ncbs = 0;
+ +      for (i = 0; i < fwd_progress; i++)
+ +              ncbs += rcu_torture_fwd_prog_cbfree(&rfp[i]);
+ +      pr_info("%s: Freed %lu RCU callbacks.\n", __func__, ncbs);
         rcu_barrier();
- -      pr_info("%s: Freed %lu RCU callbacks.\n",
- -              __func__, rcu_torture_fwd_prog_cbfree(rfp));
+ +      ncbs = 0;
+ +      for (i = 0; i < fwd_progress; i++)
+ +              ncbs += rcu_torture_fwd_prog_cbfree(&rfp[i]);
+ +      pr_info("%s: Freed %lu RCU callbacks.\n", __func__, ncbs);
         rcu_barrier();
- -      pr_info("%s: Freed %lu RCU callbacks.\n",
- -              __func__, rcu_torture_fwd_prog_cbfree(rfp));
+ +      ncbs = 0;
+ +      for (i = 0; i < fwd_progress; i++)
+ +              ncbs += rcu_torture_fwd_prog_cbfree(&rfp[i]);
+ +      pr_info("%s: Freed %lu RCU callbacks.\n", __func__, ncbs);
         smp_mb(); /* Frees before return to avoid redoing OOM. */
         (*(unsigned long *)nfreed)++; /* Forward progress CBs freed! */
         pr_info("%s returning after OOM processing.\n", __func__);
@@@ -2491,10 -2427,7 +2488,10 @@@ static struct notifier_block rcutorture
   /* Carry out grace-period forward-progress testing. */
   static int rcu_torture_fwd_prog(void *args)
   {
+ +      bool firsttime = true;
+ +      long max_cbs;
         int oldnice = task_nice(current);
+ +      unsigned long oldseq = READ_ONCE(rcu_fwd_seq);
         struct rcu_fwd *rfp = args;
         int tested = 0;
         int tested_tries = 0;
@@@ -2504,38 -2437,21 +2501,38 @@@
         if (!IS_ENABLED(CONFIG_SMP) || !IS_ENABLED(CONFIG_RCU_BOOST))
                 set_user_nice(current, MAX_NICE);
         do {
- -              schedule_timeout_interruptible(fwd_progress_holdoff * HZ);
- -              WRITE_ONCE(rcu_fwd_emergency_stop, false);
- -              if (!IS_ENABLED(CONFIG_TINY_RCU) ||
- -                  rcu_inkernel_boot_has_ended())
- -                      rcu_torture_fwd_prog_nr(rfp, &tested, &tested_tries);
- -              if (rcu_inkernel_boot_has_ended())
+ +              if (!rfp->rcu_fwd_id) {
+ +                      schedule_timeout_interruptible(fwd_progress_holdoff * HZ);
+ +                      WRITE_ONCE(rcu_fwd_emergency_stop, false);
+ +                      if (!firsttime) {
+ +                              max_cbs = atomic_long_xchg(&rcu_fwd_max_cbs, 0);
+ +                              pr_alert("%s n_max_cbs: %ld\n", __func__, max_cbs);
+ +                      }
+ +                      firsttime = false;
+ +                      WRITE_ONCE(rcu_fwd_seq, rcu_fwd_seq + 1);
+ +              } else {
+ +                      while (READ_ONCE(rcu_fwd_seq) == oldseq)
+ +                              schedule_timeout_interruptible(1);
+ +                      oldseq = READ_ONCE(rcu_fwd_seq);
+ +              }
+ +              pr_alert("%s: Starting forward-progress test %d\n", __func__, rfp->rcu_fwd_id);
+ +              if (rcu_inkernel_boot_has_ended() && torture_num_online_cpus() > rfp->rcu_fwd_id)
                         rcu_torture_fwd_prog_cr(rfp);
+ +              if ((cur_ops->stall_dur && cur_ops->stall_dur() > 0) &&
+ +                  (!IS_ENABLED(CONFIG_TINY_RCU) ||
+ +                   (rcu_inkernel_boot_has_ended() &&
+ +                    torture_num_online_cpus() > rfp->rcu_fwd_id)))
+ +                      rcu_torture_fwd_prog_nr(rfp, &tested, &tested_tries);
   
                 /* Avoid slow periods, better to test when busy. */
                 if (stutter_wait("rcu_torture_fwd_prog"))
                         sched_set_normal(current, oldnice);
         } while (!torture_must_stop());
         /* Short runs might not contain a valid forward-progress attempt. */
- -      WARN_ON(!tested && tested_tries >= 5);
- -      pr_alert("%s: tested %d tested_tries %d\n", __func__, tested, tested_tries);
+ +      if (!rfp->rcu_fwd_id) {
+ +              WARN_ON(!tested && tested_tries >= 5);
+ +              pr_alert("%s: tested %d tested_tries %d\n", __func__, tested, tested_tries);
+ +      }
         torture_kthread_stopping("rcu_torture_fwd_prog");
         return 0;
   }
@@@ -2543,28 -2459,17 +2540,28 @@@
   /* If forward-progress checking is requested and feasible, spawn the thread. */
   static int __init rcu_torture_fwd_prog_init(void)
   {
+ +      int i;
+ +      int ret = 0;
         struct rcu_fwd *rfp;
   
         if (!fwd_progress)
                 return 0; /* Not requested, so don't do it. */
+ +      if (fwd_progress >= nr_cpu_ids) {
+ +              VERBOSE_TOROUT_STRING("rcu_torture_fwd_prog_init: Limiting fwd_progress to # CPUs.\n");
+ +              fwd_progress = nr_cpu_ids;
+ +      } else if (fwd_progress < 0) {
+ +              fwd_progress = nr_cpu_ids;
+ +      }
         if ((!cur_ops->sync && !cur_ops->call) ||
- -          !cur_ops->stall_dur || cur_ops->stall_dur() <= 0 || cur_ops == &rcu_busted_ops) {
+ +          (!cur_ops->cbflood_max && (!cur_ops->stall_dur || cur_ops->stall_dur() <= 0)) ||
+ +          cur_ops == &rcu_busted_ops) {
                 VERBOSE_TOROUT_STRING("rcu_torture_fwd_prog_init: Disabled, unsupported by RCU flavor under test");
+ +              fwd_progress = 0;
                 return 0;
         }
         if (stall_cpu > 0) {
                 VERBOSE_TOROUT_STRING("rcu_torture_fwd_prog_init: Disabled, conflicts with CPU-stall testing");
+ +              fwd_progress = 0;
                 if (IS_MODULE(CONFIG_RCU_TORTURE_TEST))
                         return -EINVAL; /* In module, can fail back to user. */
                 WARN_ON(1); /* Make sure rcutorture notices conflict. */
@@@ -2574,51 -2479,29 +2571,51 @@@
                 fwd_progress_holdoff = 1;
         if (fwd_progress_div <= 0)
                 fwd_progress_div = 4;
- -      rfp = kzalloc(sizeof(*rfp), GFP_KERNEL);
- -      if (!rfp)
+ +      rfp = kcalloc(fwd_progress, sizeof(*rfp), GFP_KERNEL);
+ +      fwd_prog_tasks = kcalloc(fwd_progress, sizeof(*fwd_prog_tasks), GFP_KERNEL);
+ +      if (!rfp || !fwd_prog_tasks) {
+ +              kfree(rfp);
+ +              kfree(fwd_prog_tasks);
+ +              fwd_prog_tasks = NULL;
+ +              fwd_progress = 0;
                 return -ENOMEM;
- -      spin_lock_init(&rfp->rcu_fwd_lock);
- -      rfp->rcu_fwd_cb_tail = &rfp->rcu_fwd_cb_head;
+ +      }
+ +      for (i = 0; i < fwd_progress; i++) {
+ +              spin_lock_init(&rfp[i].rcu_fwd_lock);
+ +              rfp[i].rcu_fwd_cb_tail = &rfp[i].rcu_fwd_cb_head;
+ +              rfp[i].rcu_fwd_id = i;
+ +      }
         mutex_lock(&rcu_fwd_mutex);
         rcu_fwds = rfp;
         mutex_unlock(&rcu_fwd_mutex);
         register_oom_notifier(&rcutorture_oom_nb);
- -      return torture_create_kthread(rcu_torture_fwd_prog, rfp, fwd_prog_task);
+ +      for (i = 0; i < fwd_progress; i++) {
+ +              ret = torture_create_kthread(rcu_torture_fwd_prog, &rcu_fwds[i], fwd_prog_tasks[i]);
+ +              if (ret) {
+ +                      fwd_progress = i;
+ +                      return ret;
+ +              }
+ +      }
+ +      return 0;
   }
   
   static void rcu_torture_fwd_prog_cleanup(void)
   {
+ +      int i;
         struct rcu_fwd *rfp;
   
- -      torture_stop_kthread(rcu_torture_fwd_prog, fwd_prog_task);
- -      rfp = rcu_fwds;
+ +      if (!rcu_fwds || !fwd_prog_tasks)
+ +              return;
+ +      for (i = 0; i < fwd_progress; i++)
+ +              torture_stop_kthread(rcu_torture_fwd_prog, fwd_prog_tasks[i]);
+ +      unregister_oom_notifier(&rcutorture_oom_nb);
         mutex_lock(&rcu_fwd_mutex);
+ +      rfp = rcu_fwds;
         rcu_fwds = NULL;
         mutex_unlock(&rcu_fwd_mutex);
- -      unregister_oom_notifier(&rcutorture_oom_nb);
         kfree(rfp);
+ +      kfree(fwd_prog_tasks);
+ +      fwd_prog_tasks = NULL;
   }
   
   /* Callback function for RCU barrier testing. */
@@@ -2855,7 -2738,7 +2852,7 @@@ static int rcu_torture_read_exit(void *
                                      &trs, "%s",
                                      "rcu_torture_read_exit_child");
                 if (IS_ERR(tsp)) {
- -                      VERBOSE_TOROUT_ERRSTRING("out of memory");
+ +                      TOROUT_ERRSTRING("out of memory");
                         errexit = true;
                         tsp = NULL;
                         break;
@@@ -3182,7 -3065,7 +3179,7 @@@ rcu_torture_init(void
                                            sizeof(fakewriter_tasks[0]),
                                            GFP_KERNEL);
                 if (fakewriter_tasks == NULL) {
- -                      VERBOSE_TOROUT_ERRSTRING("out of memory");
+ +                      TOROUT_ERRSTRING("out of memory");
                         firsterr = -ENOMEM;
                         goto unwind;
                 }
@@@ -3198,7 -3081,7 +3195,7 @@@
         rcu_torture_reader_mbchk = kcalloc(nrealreaders, sizeof(*rcu_torture_reader_mbchk),
                                            GFP_KERNEL);
         if (!reader_tasks || !rcu_torture_reader_mbchk) {
- -              VERBOSE_TOROUT_ERRSTRING("out of memory");
+ +              TOROUT_ERRSTRING("out of memory");
                 firsterr = -ENOMEM;
                 goto unwind;
         }
@@@ -3217,7 -3100,7 +3214,7 @@@
         if (nrealnocbers > 0) {
                 nocb_tasks = kcalloc(nrealnocbers, sizeof(nocb_tasks[0]), GFP_KERNEL);
                 if (nocb_tasks == NULL) {
- -                      VERBOSE_TOROUT_ERRSTRING("out of memory");
+ +                      TOROUT_ERRSTRING("out of memory");
                         firsterr = -ENOMEM;
                         goto unwind;
                 }
diff --combined kernel/sysctl.c

index d7ed1dffa4262615c78fb0ff5c12c8d49cdf1f36,2ab4edb6e45094fe95b71b198281f42e3bdd8d01..ef77be575d8754d27c20c0496ba72d008aaddff6
--- 1/kernel/sysctl.c
--- 2/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@@ -33,7 -33,6 +33,7 @@@
   #include <linux/security.h>
   #include <linux/ctype.h>
   #include <linux/kmemleak.h>
+ +#include <linux/filter.h>
   #include <linux/fs.h>
   #include <linux/init.h>
   #include <linux/kernel.h>
@@@ -123,6 -122,7 +123,7 @@@ static unsigned long long_max = LONG_MA
   static int one_hundred = 100;
   static int two_hundred = 200;
   static int one_thousand = 1000;
+ static int three_thousand = 3000;
   #ifdef CONFIG_PRINTK
   static int ten_thousand = 10000;
   #endif
@@@ -2960,7 -2960,7 +2961,7 @@@ static struct ctl_table vm_table[] = 
                 .mode           = 0644,
                 .proc_handler   = watermark_scale_factor_sysctl_handler,
                 .extra1         = SYSCTL_ONE,
-               .extra2         = &one_thousand,
+               .extra2         = &three_thousand,
         },
         {
                 .procname       = "percpu_pagelist_high_fraction",
diff --combined mm/Makefile

index 7919cd7f13f2ac1c6b30c07e1875cd13fa780d6a,5c5a3a480fa673d07d2af0a084d83bf5f43eb0ab..588d3113f3b08b5f27b20a3532fca06bdcf40a5f
--- 1/mm/Makefile
--- 2/mm/Makefile
+++ b/mm/Makefile
@@@ -15,8 -15,6 +15,8 @@@ KCSAN_SANITIZE_slab_common.o := 
   KCSAN_SANITIZE_slab.o := n
   KCSAN_SANITIZE_slub.o := n
   KCSAN_SANITIZE_page_alloc.o := n
+ +# But enable explicit instrumentation for memory barriers.
+ +KCSAN_INSTRUMENT_BARRIERS := y
   
   # These files are disabled because they produce non-interesting and/or
   # flaky coverage that is not a function of syscall inputs. E.g. slab is out of
@@@ -114,6 -112,7 +114,7 @@@ obj-$(CONFIG_GENERIC_EARLY_IOREMAP) += 
   obj-$(CONFIG_CMA)     += cma.o
   obj-$(CONFIG_MEMORY_BALLOON) += balloon_compaction.o
   obj-$(CONFIG_PAGE_EXTENSION) += page_ext.o
+ obj-$(CONFIG_PAGE_TABLE_CHECK) += page_table_check.o
   obj-$(CONFIG_CMA_DEBUGFS) += cma_debug.o
   obj-$(CONFIG_SECRETMEM) += secretmem.o
   obj-$(CONFIG_CMA_SYSFS) += cma_sysfs.o
diff --combined mm/huge_memory.c

index f58524394dc13b14df461b125d5332a535b5b154,6ed86a8f6a5becb09e71e0391c76baf92c6d7e97..406a3c28c0266be6e7fb7cca1f79c6daa78de024
--- 1/mm/huge_memory.c
--- 2/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@@ -1322,7 -1322,7 +1322,7 @@@ vm_fault_t do_huge_pmd_wp_page(struct v
          * We can only reuse the page if nobody else maps the huge page or it's
          * part.
          */
-       if (reuse_swap_page(page, NULL)) {
+       if (reuse_swap_page(page)) {
                 pmd_t entry;
                 entry = pmd_mkyoung(orig_pmd);
                 entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
@@@ -2542,38 -2542,28 +2542,28 @@@ int total_mapcount(struct page *page
    * need full accuracy to avoid breaking page pinning, because
    * page_trans_huge_mapcount() is slower than page_mapcount().
    */
- int page_trans_huge_mapcount(struct page *page, int *total_mapcount)
+ int page_trans_huge_mapcount(struct page *page)
   {
-       int i, ret, _total_mapcount, mapcount;
+       int i, ret;
   
         /* hugetlbfs shouldn't call it */
         VM_BUG_ON_PAGE(PageHuge(page), page);
   
-       if (likely(!PageTransCompound(page))) {
-               mapcount = atomic_read(&page->_mapcount) + 1;
-               if (total_mapcount)
-                       *total_mapcount = mapcount;
-               return mapcount;
-       }
+       if (likely(!PageTransCompound(page)))
+               return atomic_read(&page->_mapcount) + 1;
   
         page = compound_head(page);
   
-       _total_mapcount = ret = 0;
+       ret = 0;
         for (i = 0; i < thp_nr_pages(page); i++) {
-               mapcount = atomic_read(&page[i]._mapcount) + 1;
+               int mapcount = atomic_read(&page[i]._mapcount) + 1;
                 ret = max(ret, mapcount);
-               _total_mapcount += mapcount;
         }
-       if (PageDoubleMap(page)) {
+ 
+       if (PageDoubleMap(page))
                 ret -= 1;
-               _total_mapcount -= thp_nr_pages(page);
-       }
-       mapcount = compound_mapcount(page);
-       ret += mapcount;
-       _total_mapcount += mapcount;
-       if (total_mapcount)
-               *total_mapcount = _total_mapcount;
-       return ret;
+ 
+       return ret + compound_mapcount(page);
   }
   
   /* Racy check whether the huge page can be split */
@@@ -2614,7 -2604,6 +2604,7 @@@ int split_huge_page_to_list(struct pag
   {
         struct page *head = compound_head(page);
         struct deferred_split *ds_queue = get_deferred_split_queue(head);
+ +      XA_STATE(xas, &head->mapping->i_pages, head->index);
         struct anon_vma *anon_vma = NULL;
         struct address_space *mapping = NULL;
         int extra_pins, ret;
@@@ -2653,13 -2642,6 +2643,13 @@@
                         goto out;
                 }
   
+ +              xas_split_alloc(&xas, head, compound_order(head),
+ +                              mapping_gfp_mask(mapping) & GFP_RECLAIM_MASK);
+ +              if (xas_error(&xas)) {
+ +                      ret = xas_error(&xas);
+ +                      goto out;
+ +              }
+ +
                 anon_vma = NULL;
                 i_mmap_lock_read(mapping);
   
@@@ -2689,12 -2671,13 +2679,12 @@@
         /* block interrupt reentry in xa_lock and spinlock */
         local_irq_disable();
         if (mapping) {
- -              XA_STATE(xas, &mapping->i_pages, page_index(head));
- -
                 /*
                  * Check if the head page is present in page cache.
                  * We assume all tail are present too, if head is there.
                  */
- -              xa_lock(&mapping->i_pages);
+ +              xas_lock(&xas);
+ +              xas_reset(&xas);
                 if (xas_load(&xas) != head)
                         goto fail;
         }
@@@ -2710,7 -2693,6 +2700,7 @@@
                 if (mapping) {
                         int nr = thp_nr_pages(head);
   
+ +                      xas_split(&xas, head, thp_order(head));
                         if (PageSwapBacked(head)) {
                                 __mod_lruvec_page_state(head, NR_SHMEM_THPS,
                                                         -nr);
@@@ -2727,7 -2709,7 +2717,7 @@@
                 spin_unlock(&ds_queue->split_queue_lock);
   fail:
                 if (mapping)
- -                      xa_unlock(&mapping->i_pages);
+ +                      xas_unlock(&xas);
                 local_irq_enable();
                 remap_page(head, thp_nr_pages(head));
                 ret = -EBUSY;
@@@ -2741,8 -2723,6 +2731,8 @@@ out_unlock
         if (mapping)
                 i_mmap_unlock_read(mapping);
   out:
+ +      /* Free any memory we didn't use */
+ +      xas_nomem(&xas, 0);
         count_vm_event(!ret ? THP_SPLIT_PAGE : THP_SPLIT_PAGE_FAILED);
         return ret;
   }
diff --combined mm/internal.h

index 26af8a5a5be349dc3f0a0fc9ea30d5b33bd530eb,c5834cc28a444db37fa362b3b29c7a2ba1f60aee..d80300392a194f20b4864191ab0ecaba831cb1aa
--- 1/mm/internal.h
--- 2/mm/internal.h
+++ b/mm/internal.h
@@@ -12,8 -12,6 +12,8 @@@
   #include <linux/pagemap.h>
   #include <linux/tracepoint-defs.h>
   
+ +struct folio_batch;
+ +
   /*
    * The set of flags that only affect watermark checking and reclaim
    * behaviour. This is used by the MM to obey the caller constraints
@@@ -23,7 -21,7 +23,7 @@@
   #define GFP_RECLAIM_MASK (__GFP_RECLAIM|__GFP_HIGH|__GFP_IO|__GFP_FS|\
                         __GFP_NOWARN|__GFP_RETRY_MAYFAIL|__GFP_NOFAIL|\
                         __GFP_NORETRY|__GFP_MEMALLOC|__GFP_NOMEMALLOC|\
-                       __GFP_ATOMIC)
+                       __GFP_ATOMIC|__GFP_NOLOCKDEP)
   
   /* The GFP flags allowed during early boot */
   #define GFP_BOOT_MASK (__GFP_BITS_MASK & ~(__GFP_RECLAIM|__GFP_IO|__GFP_FS))
@@@ -76,7 -74,6 +76,7 @@@ static inline bool can_madv_lru_vma(str
         return !(vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_PFNMAP));
   }
   
+ +struct zap_details;
   void unmap_page_range(struct mmu_gather *tlb,
                              struct vm_area_struct *vma,
                              unsigned long addr, unsigned long end,
@@@ -93,13 -90,7 +93,13 @@@ static inline void force_page_cache_rea
   }
   
   unsigned find_lock_entries(struct address_space *mapping, pgoff_t start,
- -              pgoff_t end, struct pagevec *pvec, pgoff_t *indices);
+ +              pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices);
+ +unsigned find_get_entries(struct address_space *mapping, pgoff_t start,
+ +              pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices);
+ +void filemap_free_folio(struct address_space *mapping, struct folio *folio);
+ +int truncate_inode_folio(struct address_space *mapping, struct folio *folio);
+ +bool truncate_inode_partial_folio(struct folio *folio, loff_t start,
+ +              loff_t end);
   
   /**
    * folio_evictable - Test whether a folio is evictable.
@@@ -166,11 -157,6 +166,6 @@@ extern void reclaim_throttle(pg_data_t 
    */
   extern pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address);
   
- /*
-  * in mm/memcontrol.c:
-  */
- extern bool cgroup_memory_nokmem;
- 
   /*
    * in mm/page_alloc.c
    */
@@@ -397,7 -383,6 +392,7 @@@ void __vma_link_list(struct mm_struct *
   void __vma_unlink_list(struct mm_struct *mm, struct vm_area_struct *vma);
   
   #ifdef CONFIG_MMU
+ +void unmap_mapping_folio(struct folio *folio);
   extern long populate_vma_page_range(struct vm_area_struct *vma,
                 unsigned long start, unsigned long end, int *locked);
   extern long faultin_vma_page_range(struct vm_area_struct *vma,
@@@ -501,8 -486,8 +496,8 @@@ static inline struct file *maybe_unlock
         }
         return fpin;
   }
- -
   #else /* !CONFIG_MMU */
+ +static inline void unmap_mapping_folio(struct folio *folio) { }
   static inline void clear_page_mlock(struct page *page) { }
   static inline void mlock_vma_page(struct page *page) { }
   static inline void vunmap_range_noflush(unsigned long start, unsigned long end)
diff --combined mm/kasan/quarantine.c

index 587da8995f2d9b4a9ade1536411c605b37fba7ac,47ed4fc33a29e094070fcfab9947b2f59434059e..08291ed33e93af757e436b8e2efbe626d50c1f79
--- 1/mm/kasan/quarantine.c
--- 2/mm/kasan/quarantine.c
+++ b/mm/kasan/quarantine.c
@@@ -117,7 -117,7 +117,7 @@@ static unsigned long quarantine_batch_s
   
   static struct kmem_cache *qlink_to_cache(struct qlist_node *qlink)
   {
- -      return virt_to_head_page(qlink)->slab_cache;
+ +      return virt_to_slab(qlink)->slab_cache;
   }
   
   static void *qlink_to_object(struct qlist_node *qlink, struct kmem_cache *cache)
@@@ -132,11 -132,22 +132,22 @@@
   static void qlink_free(struct qlist_node *qlink, struct kmem_cache *cache)
   {
         void *object = qlink_to_object(qlink, cache);
+       struct kasan_free_meta *meta = kasan_get_free_meta(cache, object);
         unsigned long flags;
   
         if (IS_ENABLED(CONFIG_SLAB))
                 local_irq_save(flags);
   
+       /*
+        * If init_on_free is enabled and KASAN's free metadata is stored in
+        * the object, zero the metadata. Otherwise, the object's memory will
+        * not be properly zeroed, as KASAN saves the metadata after the slab
+        * allocator zeroes the object.
+        */
+       if (slab_want_init_on_free(cache) &&
+           cache->kasan_info.free_meta_offset == 0)
+               memzero_explicit(meta, sizeof(*meta));
+ 
         /*
          * As the object now gets freed from the quarantine, assume that its
          * free track is no longer valid.
diff --combined mm/khugepaged.c

index 2e1911cc3466dc66e27e64323e20b15ea5492447,7af84bac6fc241cd116e4b8f8dc5c530bbd0dabc..35f14d0a00a6cdadd20f63c4feeac2d44b3c8e14
--- 1/mm/khugepaged.c
--- 2/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@@ -618,6 -618,7 +618,7 @@@ static int __collapse_huge_page_isolate
                                 continue;
                         } else {
                                 result = SCAN_EXCEED_NONE_PTE;
+                               count_vm_event(THP_SCAN_EXCEED_NONE_PTE);
                                 goto out;
                         }
                 }
@@@ -636,6 -637,7 +637,7 @@@
                 if (page_mapcount(page) > 1 &&
                                 ++shared > khugepaged_max_ptes_shared) {
                         result = SCAN_EXCEED_SHARED_PTE;
+                       count_vm_event(THP_SCAN_EXCEED_SHARED_PTE);
                         goto out;
                 }
   
@@@ -681,7 -683,7 +683,7 @@@
                         goto out;
                 }
                 if (!pte_write(pteval) && PageSwapCache(page) &&
-                               !reuse_swap_page(page, NULL)) {
+                               !reuse_swap_page(page)) {
                         /*
                          * Page is in the swap cache and cannot be re-used.
                          * It cannot be collapsed into a THP.
@@@ -756,11 -758,7 +758,7 @@@ static void __collapse_huge_page_copy(p
                                  * ptl mostly unnecessary.
                                  */
                                 spin_lock(ptl);
-                               /*
-                                * paravirt calls inside pte_clear here are
-                                * superfluous.
-                                */
-                               pte_clear(vma->vm_mm, address, _pte);
+                               ptep_clear(vma->vm_mm, address, _pte);
                                 spin_unlock(ptl);
                         }
                 } else {
@@@ -774,11 -772,7 +772,7 @@@
                          * inside page_remove_rmap().
                          */
                         spin_lock(ptl);
-                       /*
-                        * paravirt calls inside pte_clear here are
-                        * superfluous.
-                        */
-                       pte_clear(vma->vm_mm, address, _pte);
+                       ptep_clear(vma->vm_mm, address, _pte);
                         page_remove_rmap(src_page, false);
                         spin_unlock(ptl);
                         free_page_and_swap_cache(src_page);
@@@ -1261,6 -1255,7 +1255,7 @@@ static int khugepaged_scan_pmd(struct m
                                 continue;
                         } else {
                                 result = SCAN_EXCEED_SWAP_PTE;
+                               count_vm_event(THP_SCAN_EXCEED_SWAP_PTE);
                                 goto out_unmap;
                         }
                 }
@@@ -1270,6 -1265,7 +1265,7 @@@
                                 continue;
                         } else {
                                 result = SCAN_EXCEED_NONE_PTE;
+                               count_vm_event(THP_SCAN_EXCEED_NONE_PTE);
                                 goto out_unmap;
                         }
                 }
@@@ -1298,6 -1294,7 +1294,7 @@@
                 if (page_mapcount(page) > 1 &&
                                 ++shared > khugepaged_max_ptes_shared) {
                         result = SCAN_EXCEED_SHARED_PTE;
+                       count_vm_event(THP_SCAN_EXCEED_SHARED_PTE);
                         goto out_unmap;
                 }
   
@@@ -1306,7 -1303,7 +1303,7 @@@
                 /*
                  * Record which node the original page is from and save this
                  * information to khugepaged_node_load[].
-                * Khupaged will allocate hugepage from the node has the max
+                * Khugepaged will allocate hugepage from the node has the max
                  * hit record.
                  */
                 node = page_to_nid(page);
@@@ -1667,10 -1664,7 +1664,10 @@@ static void collapse_file(struct mm_str
         }
         count_memcg_page_event(new_page, THP_COLLAPSE_ALLOC);
   
- -      /* This will be less messy when we use multi-index entries */
+ +      /*
+ +       * Ensure we have slots for all the pages in the range.  This is
+ +       * almost certainly a no-op because most of the pages must be present
+ +       */
         do {
                 xas_lock_irq(&xas);
                 xas_create_range(&xas);
@@@ -1895,9 -1889,6 +1892,9 @@@ out_unlock
                         __mod_lruvec_page_state(new_page, NR_SHMEM, nr_none);
         }
   
+ +      /* Join all the small entries into a single multi-index entry */
+ +      xas_set_order(&xas, start, HPAGE_PMD_ORDER);
+ +      xas_store(&xas, new_page);
   xa_locked:
         xas_unlock_irq(&xas);
   xa_unlocked:
@@@ -2014,15 -2005,12 +2011,16 @@@ static void khugepaged_scan_file(struc
                 if (xa_is_value(page)) {
                         if (++swap > khugepaged_max_ptes_swap) {
                                 result = SCAN_EXCEED_SWAP_PTE;
+                               count_vm_event(THP_SCAN_EXCEED_SWAP_PTE);
                                 break;
                         }
                         continue;
                 }
   
+ +              /*
+ +               * XXX: khugepaged should compact smaller compound pages
+ +               * into a PMD sized page
+ +               */
                 if (PageTransCompound(page)) {
                         result = SCAN_PAGE_COMPOUND;
                         break;
@@@ -2064,6 -2052,7 +2062,7 @@@
         if (result == SCAN_SUCCEED) {
                 if (present < HPAGE_PMD_NR - khugepaged_max_ptes_none) {
                         result = SCAN_EXCEED_NONE_PTE;
+                       count_vm_event(THP_SCAN_EXCEED_NONE_PTE);
                 } else {
                         node = khugepaged_find_target_node();
                         collapse_file(mm, file, start, hpage, node);
diff --combined mm/memcontrol.c

index 4a7b3ebf8e48189d5ec445579508721b7f9b3d14,c9ddd02dc5de4c295322b80110aced6d9b7c0429..09d342c7cbd0d9f23b3c916c8dab2132e1d35ffe
--- 1/mm/memcontrol.c
--- 2/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@@ -84,7 -84,7 +84,7 @@@ EXPORT_PER_CPU_SYMBOL_GPL(int_active_me
   static bool cgroup_memory_nosocket __ro_after_init;
   
   /* Kernel memory accounting disabled? */
- bool cgroup_memory_nokmem __ro_after_init;
+ static bool cgroup_memory_nokmem __ro_after_init;
   
   /* Whether the swap controller is active */
   #ifdef CONFIG_MEMCG_SWAP
@@@ -629,11 -629,17 +629,17 @@@ static DEFINE_SPINLOCK(stats_flush_lock
   static DEFINE_PER_CPU(unsigned int, stats_updates);
   static atomic_t stats_flush_threshold = ATOMIC_INIT(0);
   
- static inline void memcg_rstat_updated(struct mem_cgroup *memcg)
+ static inline void memcg_rstat_updated(struct mem_cgroup *memcg, int val)
   {
+       unsigned int x;
+ 
         cgroup_rstat_updated(memcg->css.cgroup, smp_processor_id());
-       if (!(__this_cpu_inc_return(stats_updates) % MEMCG_CHARGE_BATCH))
-               atomic_inc(&stats_flush_threshold);
+ 
+       x = __this_cpu_add_return(stats_updates, abs(val));
+       if (x > MEMCG_CHARGE_BATCH) {
+               atomic_add(x / MEMCG_CHARGE_BATCH, &stats_flush_threshold);
+               __this_cpu_write(stats_updates, 0);
+       }
   }
   
   static void __mem_cgroup_flush_stats(void)
@@@ -656,7 -662,7 +662,7 @@@ void mem_cgroup_flush_stats(void
   
   static void flush_memcg_stats_dwork(struct work_struct *w)
   {
-       mem_cgroup_flush_stats();
+       __mem_cgroup_flush_stats();
         queue_delayed_work(system_unbound_wq, &stats_flush_dwork, 2UL*HZ);
   }
   
@@@ -672,7 -678,7 +678,7 @@@ void __mod_memcg_state(struct mem_cgrou
                 return;
   
         __this_cpu_add(memcg->vmstats_percpu->state[idx], val);
-       memcg_rstat_updated(memcg);
+       memcg_rstat_updated(memcg, val);
   }
   
   /* idx can be of type enum memcg_stat_item or node_stat_item. */
@@@ -705,7 -711,7 +711,7 @@@ void __mod_memcg_lruvec_state(struct lr
         /* Update lruvec */
         __this_cpu_add(pn->lruvec_stats_percpu->state[idx], val);
   
-       memcg_rstat_updated(memcg);
+       memcg_rstat_updated(memcg, val);
   }
   
   /**
@@@ -789,7 -795,7 +795,7 @@@ void __count_memcg_events(struct mem_cg
                 return;
   
         __this_cpu_add(memcg->vmstats_percpu->events[idx], count);
-       memcg_rstat_updated(memcg);
+       memcg_rstat_updated(memcg, count);
   }
   
   static unsigned long memcg_events(struct mem_cgroup *memcg, int event)
@@@ -1369,6 -1375,7 +1375,7 @@@ static const struct memory_stat memory_
         { "pagetables",                 NR_PAGETABLE                    },
         { "percpu",                     MEMCG_PERCPU_B                  },
         { "sock",                       MEMCG_SOCK                      },
+       { "vmalloc",                    MEMCG_VMALLOC                   },
         { "shmem",                      NR_SHMEM                        },
         { "file_mapped",                NR_FILE_MAPPED                  },
         { "file_dirty",                 NR_FILE_DIRTY                   },
@@@ -2816,31 -2823,31 +2823,31 @@@ static inline void mod_objcg_mlstate(st
         rcu_read_unlock();
   }
   
- -int memcg_alloc_page_obj_cgroups(struct page *page, struct kmem_cache *s,
- -                               gfp_t gfp, bool new_page)
+ +int memcg_alloc_slab_cgroups(struct slab *slab, struct kmem_cache *s,
+ +                               gfp_t gfp, bool new_slab)
   {
- -      unsigned int objects = objs_per_slab_page(s, page);
+ +      unsigned int objects = objs_per_slab(s, slab);
         unsigned long memcg_data;
         void *vec;
   
         gfp &= ~OBJCGS_CLEAR_MASK;
         vec = kcalloc_node(objects, sizeof(struct obj_cgroup *), gfp,
- -                         page_to_nid(page));
+ +                         slab_nid(slab));
         if (!vec)
                 return -ENOMEM;
   
         memcg_data = (unsigned long) vec | MEMCG_DATA_OBJCGS;
- -      if (new_page) {
+ +      if (new_slab) {
                 /*
- -               * If the slab page is brand new and nobody can yet access
- -               * it's memcg_data, no synchronization is required and
- -               * memcg_data can be simply assigned.
+ +               * If the slab is brand new and nobody can yet access its
+ +               * memcg_data, no synchronization is required and memcg_data can
+ +               * be simply assigned.
                  */
- -              page->memcg_data = memcg_data;
- -      } else if (cmpxchg(&page->memcg_data, 0, memcg_data)) {
+ +              slab->memcg_data = memcg_data;
+ +      } else if (cmpxchg(&slab->memcg_data, 0, memcg_data)) {
                 /*
- -               * If the slab page is already in use, somebody can allocate
- -               * and assign obj_cgroups in parallel. In this case the existing
+ +               * If the slab is already in use, somebody can allocate and
+ +               * assign obj_cgroups in parallel. In this case the existing
                  * objcg vector should be reused.
                  */
                 kfree(vec);
@@@ -2865,43 -2872,38 +2872,43 @@@
    */
   struct mem_cgroup *mem_cgroup_from_obj(void *p)
   {
- -      struct page *page;
+ +      struct folio *folio;
   
         if (mem_cgroup_disabled())
                 return NULL;
   
- -      page = virt_to_head_page(p);
+ +      folio = virt_to_folio(p);
   
         /*
          * Slab objects are accounted individually, not per-page.
          * Memcg membership data for each individual object is saved in
- -       * the page->obj_cgroups.
+ +       * slab->memcg_data.
          */
- -      if (page_objcgs_check(page)) {
- -              struct obj_cgroup *objcg;
+ +      if (folio_test_slab(folio)) {
+ +              struct obj_cgroup **objcgs;
+ +              struct slab *slab;
                 unsigned int off;
   
- -              off = obj_to_index(page->slab_cache, page, p);
- -              objcg = page_objcgs(page)[off];
- -              if (objcg)
- -                      return obj_cgroup_memcg(objcg);
+ +              slab = folio_slab(folio);
+ +              objcgs = slab_objcgs(slab);
+ +              if (!objcgs)
+ +                      return NULL;
+ +
+ +              off = obj_to_index(slab->slab_cache, slab, p);
+ +              if (objcgs[off])
+ +                      return obj_cgroup_memcg(objcgs[off]);
   
                 return NULL;
         }
   
         /*
- -       * page_memcg_check() is used here, because page_has_obj_cgroups()
- -       * check above could fail because the object cgroups vector wasn't set
- -       * at that moment, but it can be set concurrently.
+ +       * page_memcg_check() is used here, because in theory we can encounter
+ +       * a folio where the slab flag has been cleared already, but
+ +       * slab->memcg_data has not been freed yet
          * page_memcg_check(page) will guarantee that a proper memory
          * cgroup pointer or NULL will be returned.
          */
- -      return page_memcg_check(page);
+ +      return page_memcg_check(folio_page(folio, 0));
   }
   
   __always_inline struct obj_cgroup *get_obj_cgroup_from_current(void)
@@@ -4850,6 -4852,17 +4857,17 @@@ out_kfree
         return ret;
   }
   
+ #if defined(CONFIG_MEMCG_KMEM) && (defined(CONFIG_SLAB) || defined(CONFIG_SLUB_DEBUG))
+ static int mem_cgroup_slab_show(struct seq_file *m, void *p)
+ {
+       /*
+        * Deprecated.
+        * Please, take a look at tools/cgroup/slabinfo.py .
+        */
+       return 0;
+ }
+ #endif
+ 
   static struct cftype mem_cgroup_legacy_files[] = {
         {
                 .name = "usage_in_bytes",
@@@ -4950,7 -4963,7 +4968,7 @@@
         (defined(CONFIG_SLAB) || defined(CONFIG_SLUB_DEBUG))
         {
                 .name = "kmem.slabinfo",
-               .seq_show = memcg_slab_show,
+               .seq_show = mem_cgroup_slab_show,
         },
   #endif
         {
@@@ -5110,15 -5123,11 +5128,11 @@@ static void mem_cgroup_free(struct mem_
   static struct mem_cgroup *mem_cgroup_alloc(void)
   {
         struct mem_cgroup *memcg;
-       unsigned int size;
         int node;
         int __maybe_unused i;
         long error = -ENOMEM;
   
-       size = sizeof(struct mem_cgroup);
-       size += nr_node_ids * sizeof(struct mem_cgroup_per_node *);
- 
-       memcg = kzalloc(size, GFP_KERNEL);
+       memcg = kzalloc(struct_size(memcg, nodeinfo, nr_node_ids), GFP_KERNEL);
         if (!memcg)
                 return ERR_PTR(error);
   
@@@ -6312,6 -6321,8 +6326,8 @@@ static void __memory_events_show(struc
         seq_printf(m, "oom %lu\n", atomic_long_read(&events[MEMCG_OOM]));
         seq_printf(m, "oom_kill %lu\n",
                    atomic_long_read(&events[MEMCG_OOM_KILL]));
+       seq_printf(m, "oom_group_kill %lu\n",
+                  atomic_long_read(&events[MEMCG_OOM_GROUP_KILL]));
   }
   
   static int memory_events_show(struct seq_file *m, void *v)
diff --combined mm/memory-failure.c

index f1c389f7e6692d2c45360b6e1cff3faa161db0e2,373837bb94cb3a4a580beba950a8f654912ccdcd..14ae5c18e77668b431d6a1123108e5ddc3ddeaa6
--- 1/mm/memory-failure.c
--- 2/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@@ -58,6 -58,7 +58,7 @@@
   #include <linux/ratelimit.h>
   #include <linux/page-isolation.h>
   #include <linux/pagewalk.h>
+ #include <linux/shmem_fs.h>
   #include "internal.h"
   #include "ras/ras_event.h"
   
@@@ -722,7 -723,6 +723,6 @@@ static const char * const action_page_t
         [MF_MSG_KERNEL_HIGH_ORDER]      = "high-order kernel page",
         [MF_MSG_SLAB]                   = "kernel slab page",
         [MF_MSG_DIFFERENT_COMPOUND]     = "different compound page after locking",
-       [MF_MSG_POISONED_HUGE]          = "huge page already hardware poisoned",
         [MF_MSG_HUGE]                   = "huge page",
         [MF_MSG_FREE_HUGE]              = "free huge page",
         [MF_MSG_NON_PMD_HUGE]           = "non-pmd-sized huge page",
@@@ -737,7 -737,6 +737,6 @@@
         [MF_MSG_CLEAN_LRU]              = "clean LRU page",
         [MF_MSG_TRUNCATED_LRU]          = "already truncated LRU page",
         [MF_MSG_BUDDY]                  = "free buddy page",
-       [MF_MSG_BUDDY_2ND]              = "free buddy page (2nd try)",
         [MF_MSG_DAX]                    = "dax page",
         [MF_MSG_UNSPLIT_THP]            = "unsplit thp",
         [MF_MSG_UNKNOWN]                = "unknown page",
@@@ -867,6 -866,7 +866,7 @@@ static int me_pagecache_clean(struct pa
   {
         int ret;
         struct address_space *mapping;
+       bool extra_pins;
   
         delete_from_lru_cache(p);
   
@@@ -895,18 -895,24 +895,24 @@@
                 goto out;
         }
   
+       /*
+        * The shmem page is kept in page cache instead of truncating
+        * so is expected to have an extra refcount after error-handling.
+        */
+       extra_pins = shmem_mapping(mapping);
+ 
         /*
          * Truncation is a bit tricky. Enable it per file system for now.
          *
          * Open: to take i_rwsem or not for this? Right now we don't.
          */
         ret = truncate_error_page(p, page_to_pfn(p), mapping);
+       if (has_extra_refcount(ps, p, extra_pins))
+               ret = MF_FAILED;
+ 
   out:
         unlock_page(p);
   
-       if (has_extra_refcount(ps, p, false))
-               ret = MF_FAILED;
- 
         return ret;
   }
   
@@@ -1154,6 -1160,22 +1160,22 @@@ static int page_action(struct page_stat
         return (result == MF_RECOVERED || result == MF_DELAYED) ? 0 : -EBUSY;
   }
   
+ static inline bool PageHWPoisonTakenOff(struct page *page)
+ {
+       return PageHWPoison(page) && page_private(page) == MAGIC_HWPOISON;
+ }
+ 
+ void SetPageHWPoisonTakenOff(struct page *page)
+ {
+       set_page_private(page, MAGIC_HWPOISON);
+ }
+ 
+ void ClearPageHWPoisonTakenOff(struct page *page)
+ {
+       if (PageHWPoison(page))
+               set_page_private(page, 0);
+ }
+ 
   /*
    * Return true if a page type of a given page is supported by hwpoison
    * mechanism (while handling could fail), otherwise false.  This function
@@@ -1256,6 -1278,27 +1278,27 @@@ out
         return ret;
   }
   
+ static int __get_unpoison_page(struct page *page)
+ {
+       struct page *head = compound_head(page);
+       int ret = 0;
+       bool hugetlb = false;
+ 
+       ret = get_hwpoison_huge_page(head, &hugetlb);
+       if (hugetlb)
+               return ret;
+ 
+       /*
+        * PageHWPoisonTakenOff pages are not only marked as PG_hwpoison,
+        * but also isolated from buddy freelist, so need to identify the
+        * state and have to cancel both operations to unpoison.
+        */
+       if (PageHWPoisonTakenOff(page))
+               return -EHWPOISON;
+ 
+       return get_page_unless_zero(page) ? 1 : 0;
+ }
+ 
   /**
    * get_hwpoison_page() - Get refcount for memory error handling
    * @p:                Raw error page (hit by memory error)
@@@ -1263,7 -1306,7 +1306,7 @@@
    *
    * get_hwpoison_page() takes a page refcount of an error page to handle memory
    * error on it, after checking that the error page is in a well-defined state
-  * (defined as a page-type we can successfully handle the memor error on it,
+  * (defined as a page-type we can successfully handle the memory error on it,
    * such as LRU page and hugetlb page).
    *
    * Memory error handling could be triggered at any time on any type of page,
@@@ -1272,18 -1315,26 +1315,26 @@@
    * extra care for the error page's state (as done in __get_hwpoison_page()),
    * and has some retry logic in get_any_page().
    *
+  * When called from unpoison_memory(), the caller should already ensure that
+  * the given page has PG_hwpoison. So it's never reused for other page
+  * allocations, and __get_unpoison_page() never races with them.
+  *
    * Return: 0 on failure,
    *         1 on success for in-use pages in a well-defined state,
    *         -EIO for pages on which we can not handle memory errors,
    *         -EBUSY when get_hwpoison_page() has raced with page lifecycle
-  *         operations like allocation and free.
+  *         operations like allocation and free,
+  *         -EHWPOISON when the page is hwpoisoned and taken off from buddy.
    */
   static int get_hwpoison_page(struct page *p, unsigned long flags)
   {
         int ret;
   
         zone_pcp_disable(page_zone(p));
-       ret = get_any_page(p, flags);
+       if (flags & MF_UNPOISON)
+               ret = __get_unpoison_page(p);
+       else
+               ret = get_any_page(p, flags);
         zone_pcp_enable(page_zone(p));
   
         return ret;
@@@ -1494,14 -1545,6 +1545,6 @@@ static int memory_failure_hugetlb(unsig
         lock_page(head);
         page_flags = head->flags;
   
-       if (!PageHWPoison(head)) {
-               pr_err("Memory failure: %#lx: just unpoisoned\n", pfn);
-               num_poisoned_pages_dec();
-               unlock_page(head);
-               put_page(head);
-               return 0;
-       }
- 
         /*
          * TODO: hwpoison for pud-sized hugetlb doesn't work right now, so
          * simply disable it. In order to make it work properly, we need
@@@ -1615,6 -1658,8 +1658,8 @@@ out
         return rc;
   }
   
+ static DEFINE_MUTEX(mf_mutex);
+ 
   /**
    * memory_failure - Handle memory failure of a page.
    * @pfn: Page Number of the corrupted page
@@@ -1641,33 -1686,25 +1686,32 @@@ int memory_failure(unsigned long pfn, i
         int res = 0;
         unsigned long page_flags;
         bool retry = true;
-       static DEFINE_MUTEX(mf_mutex);
   
         if (!sysctl_memory_failure_recovery)
                 panic("Memory failure on page %lx", pfn);
   
+ +      mutex_lock(&mf_mutex);
+ +
         p = pfn_to_online_page(pfn);
         if (!p) {
+ +              res = arch_memory_failure(pfn, flags);
+ +              if (res == 0)
+ +                      goto unlock_mutex;
+ +
                 if (pfn_valid(pfn)) {
                         pgmap = get_dev_pagemap(pfn, NULL);
- -                      if (pgmap)
- -                              return memory_failure_dev_pagemap(pfn, flags,
- -                                                                pgmap);
+ +                      if (pgmap) {
+ +                              res = memory_failure_dev_pagemap(pfn, flags,
+ +                                                               pgmap);
+ +                              goto unlock_mutex;
+ +                      }
                 }
                 pr_err("Memory failure: %#lx: memory outside kernel control\n",
                         pfn);
- -              return -ENXIO;
+ +              res = -ENXIO;
+ +              goto unlock_mutex;
         }
   
- -      mutex_lock(&mf_mutex);
- -
   try_again:
         if (PageHuge(p)) {
                 res = memory_failure_hugetlb(pfn, flags);
@@@ -1782,16 -1819,6 +1826,6 @@@
          */
         page_flags = p->flags;
   
-       /*
-        * unpoison always clear PG_hwpoison inside page lock
-        */
-       if (!PageHWPoison(p)) {
-               pr_err("Memory failure: %#lx: just unpoisoned\n", pfn);
-               num_poisoned_pages_dec();
-               unlock_page(p);
-               put_page(p);
-               goto unlock_mutex;
-       }
         if (hwpoison_filter(p)) {
                 if (TestClearPageHWPoison(p))
                         num_poisoned_pages_dec();
@@@ -1955,6 -1982,28 +1989,28 @@@ core_initcall(memory_failure_init)
                 pr_info(fmt, pfn);                      \
   })
   
+ static inline int clear_page_hwpoison(struct ratelimit_state *rs, struct page *p)
+ {
+       if (TestClearPageHWPoison(p)) {
+               unpoison_pr_info("Unpoison: Software-unpoisoned page %#lx\n",
+                                page_to_pfn(p), rs);
+               num_poisoned_pages_dec();
+               return 1;
+       }
+       return 0;
+ }
+ 
+ static inline int unpoison_taken_off_page(struct ratelimit_state *rs,
+                                         struct page *p)
+ {
+       if (put_page_back_buddy(p)) {
+               unpoison_pr_info("Unpoison: Software-unpoisoned page %#lx\n",
+                                page_to_pfn(p), rs);
+               return 0;
+       }
+       return -EBUSY;
+ }
+ 
   /**
    * unpoison_memory - Unpoison a previously poisoned page
    * @pfn: Page number of the to be unpoisoned page
@@@ -1971,8 -2020,7 +2027,7 @@@ int unpoison_memory(unsigned long pfn
   {
         struct page *page;
         struct page *p;
-       int freeit = 0;
-       unsigned long flags = 0;
+       int ret = -EBUSY;
         static DEFINE_RATELIMIT_STATE(unpoison_rs, DEFAULT_RATELIMIT_INTERVAL,
                                         DEFAULT_RATELIMIT_BURST);
   
@@@ -1982,69 -2030,60 +2037,60 @@@
         p = pfn_to_page(pfn);
         page = compound_head(p);
   
+       mutex_lock(&mf_mutex);
+ 
         if (!PageHWPoison(p)) {
                 unpoison_pr_info("Unpoison: Page was already unpoisoned %#lx\n",
                                  pfn, &unpoison_rs);
-               return 0;
+               goto unlock_mutex;
         }
   
         if (page_count(page) > 1) {
                 unpoison_pr_info("Unpoison: Someone grabs the hwpoison page %#lx\n",
                                  pfn, &unpoison_rs);
-               return 0;
+               goto unlock_mutex;
         }
   
         if (page_mapped(page)) {
                 unpoison_pr_info("Unpoison: Someone maps the hwpoison page %#lx\n",
                                  pfn, &unpoison_rs);
-               return 0;
+               goto unlock_mutex;
         }
   
         if (page_mapping(page)) {
                 unpoison_pr_info("Unpoison: the hwpoison page has non-NULL mapping %#lx\n",
                                  pfn, &unpoison_rs);
-               return 0;
-       }
- 
-       /*
-        * unpoison_memory() can encounter thp only when the thp is being
-        * worked by memory_failure() and the page lock is not held yet.
-        * In such case, we yield to memory_failure() and make unpoison fail.
-        */
-       if (!PageHuge(page) && PageTransHuge(page)) {
-               unpoison_pr_info("Unpoison: Memory failure is now running on %#lx\n",
-                                pfn, &unpoison_rs);
-               return 0;
+               goto unlock_mutex;
         }
   
-       if (!get_hwpoison_page(p, flags)) {
-               if (TestClearPageHWPoison(p))
-                       num_poisoned_pages_dec();
-               unpoison_pr_info("Unpoison: Software-unpoisoned free page %#lx\n",
-                                pfn, &unpoison_rs);
-               return 0;
-       }
+       if (PageSlab(page) || PageTable(page))
+               goto unlock_mutex;
   
-       lock_page(page);
-       /*
-        * This test is racy because PG_hwpoison is set outside of page lock.
-        * That's acceptable because that won't trigger kernel panic. Instead,
-        * the PG_hwpoison page will be caught and isolated on the entrance to
-        * the free buddy page pool.
-        */
-       if (TestClearPageHWPoison(page)) {
-               unpoison_pr_info("Unpoison: Software-unpoisoned page %#lx\n",
-                                pfn, &unpoison_rs);
-               num_poisoned_pages_dec();
-               freeit = 1;
-       }
-       unlock_page(page);
+       ret = get_hwpoison_page(p, MF_UNPOISON);
+       if (!ret) {
+               if (clear_page_hwpoison(&unpoison_rs, page))
+                       ret = 0;
+               else
+                       ret = -EBUSY;
+       } else if (ret < 0) {
+               if (ret == -EHWPOISON) {
+                       ret = unpoison_taken_off_page(&unpoison_rs, p);
+               } else
+                       unpoison_pr_info("Unpoison: failed to grab page %#lx\n",
+                                        pfn, &unpoison_rs);
+       } else {
+               int freeit = clear_page_hwpoison(&unpoison_rs, p);
   
-       put_page(page);
-       if (freeit && !(pfn == my_zero_pfn(0) && page_count(p) == 1))
                 put_page(page);
+               if (freeit && !(pfn == my_zero_pfn(0) && page_count(p) == 1)) {
+                       put_page(page);
+                       ret = 0;
+               }
+       }
   
-       return 0;
+ unlock_mutex:
+       mutex_unlock(&mf_mutex);
+       return ret;
   }
   EXPORT_SYMBOL(unpoison_memory);
   
@@@ -2225,9 -2264,12 +2271,12 @@@ int soft_offline_page(unsigned long pfn
                 return -EIO;
         }
   
+       mutex_lock(&mf_mutex);
+ 
         if (PageHWPoison(page)) {
                 pr_info("%s: %#lx page already poisoned\n", __func__, pfn);
                 put_ref_page(ref_page);
+               mutex_unlock(&mf_mutex);
                 return 0;
         }
   
@@@ -2246,5 -2288,7 +2295,7 @@@ retry
                 }
         }
   
+       mutex_unlock(&mf_mutex);
+ 
         return ret;
   }
diff --combined mm/memory.c

index 23f2f1300d4294f1bb46253d65ee2c1c9eadb29b,571d02f419baa4682515e48e9fcdb270988b8b1f..f306e698a1e3ebf6c0f7c61b9b66b32bff832ddc
--- 1/mm/memory.c
--- 2/mm/memory.c
+++ b/mm/memory.c
@@@ -41,6 -41,7 +41,7 @@@
   
   #include <linux/kernel_stat.h>
   #include <linux/mm.h>
+ #include <linux/mm_inline.h>
   #include <linux/sched/mm.h>
   #include <linux/sched/coredump.h>
   #include <linux/sched/numa_balancing.h>
@@@ -719,8 -720,6 +720,6 @@@ static void restore_exclusive_pte(struc
         else if (is_writable_device_exclusive_entry(entry))
                 pte = maybe_mkwrite(pte_mkdirty(pte), vma);
   
-       set_pte_at(vma->vm_mm, address, ptep, pte);
- 
         /*
          * No need to take a page reference as one was already
          * created when the swap entry was made.
@@@ -734,6 -733,8 +733,8 @@@
                  */
                 WARN_ON_ONCE(!PageAnon(page));
   
+       set_pte_at(vma->vm_mm, address, ptep, pte);
+ 
         if (vma->vm_flags & VM_LOCKED)
                 mlock_vma_page(page);
   
@@@ -1304,28 -1305,6 +1305,28 @@@ copy_page_range(struct vm_area_struct *
         return ret;
   }
   
+ +/*
+ + * Parameter block passed down to zap_pte_range in exceptional cases.
+ + */
+ +struct zap_details {
+ +      struct address_space *zap_mapping;      /* Check page->mapping if set */
+ +      struct folio *single_folio;     /* Locked folio to be unmapped */
+ +};
+ +
+ +/*
+ + * We set details->zap_mapping when we want to unmap shared but keep private
+ + * pages. Return true if skip zapping this page, false otherwise.
+ + */
+ +static inline bool
+ +zap_skip_check_mapping(struct zap_details *details, struct page *page)
+ +{
+ +      if (!details || !page)
+ +              return false;
+ +
+ +      return details->zap_mapping &&
+ +              (details->zap_mapping != page_rmapping(page));
+ +}
+ +
   static unsigned long zap_pte_range(struct mmu_gather *tlb,
                                 struct vm_area_struct *vma, pmd_t *pmd,
                                 unsigned long addr, unsigned long end,
@@@ -1465,8 -1444,8 +1466,8 @@@ static inline unsigned long zap_pmd_ran
                         else if (zap_huge_pmd(tlb, vma, pmd, addr))
                                 goto next;
                         /* fall through */
- -              } else if (details && details->single_page &&
- -                         PageTransCompound(details->single_page) &&
+ +              } else if (details && details->single_folio &&
+ +                         folio_test_pmd_mappable(details->single_folio) &&
                            next - addr == HPAGE_PMD_SIZE && pmd_none(*pmd)) {
                         spinlock_t *ptl = pmd_lock(tlb->mm, pmd);
                         /*
@@@ -3354,30 -3333,31 +3355,30 @@@ static inline void unmap_mapping_range_
   }
   
   /**
- - * unmap_mapping_page() - Unmap single page from processes.
- - * @page: The locked page to be unmapped.
+ + * unmap_mapping_folio() - Unmap single folio from processes.
+ + * @folio: The locked folio to be unmapped.
    *
- - * Unmap this page from any userspace process which still has it mmaped.
+ + * Unmap this folio from any userspace process which still has it mmaped.
    * Typically, for efficiency, the range of nearby pages has already been
    * unmapped by unmap_mapping_pages() or unmap_mapping_range().  But once
- - * truncation or invalidation holds the lock on a page, it may find that
- - * the page has been remapped again: and then uses unmap_mapping_page()
+ + * truncation or invalidation holds the lock on a folio, it may find that
+ + * the page has been remapped again: and then uses unmap_mapping_folio()
    * to unmap it finally.
    */
- -void unmap_mapping_page(struct page *page)
+ +void unmap_mapping_folio(struct folio *folio)
   {
- -      struct address_space *mapping = page->mapping;
+ +      struct address_space *mapping = folio->mapping;
         struct zap_details details = { };
         pgoff_t first_index;
         pgoff_t last_index;
   
- -      VM_BUG_ON(!PageLocked(page));
- -      VM_BUG_ON(PageTail(page));
+ +      VM_BUG_ON(!folio_test_locked(folio));
   
- -      first_index = page->index;
- -      last_index = page->index + thp_nr_pages(page) - 1;
+ +      first_index = folio->index;
+ +      last_index = folio->index + folio_nr_pages(folio) - 1;
   
         details.zap_mapping = mapping;
- -      details.single_page = page;
+ +      details.single_folio = folio;
   
         i_mmap_lock_write(mapping);
         if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root)))
@@@ -3647,7 -3627,7 +3648,7 @@@ vm_fault_t do_swap_page(struct vm_faul
         inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
         dec_mm_counter_fast(vma->vm_mm, MM_SWAPENTS);
         pte = mk_pte(page, vma->vm_page_prot);
-       if ((vmf->flags & FAULT_FLAG_WRITE) && reuse_swap_page(page, NULL)) {
+       if ((vmf->flags & FAULT_FLAG_WRITE) && reuse_swap_page(page)) {
                 pte = maybe_mkwrite(pte_mkdirty(pte), vma);
                 vmf->flags &= ~FAULT_FLAG_WRITE;
                 ret |= VM_FAULT_WRITE;
@@@ -3660,8 -3640,6 +3661,6 @@@
                 pte = pte_mkuffd_wp(pte);
                 pte = pte_wrprotect(pte);
         }
-       set_pte_at(vma->vm_mm, vmf->address, vmf->pte, pte);
-       arch_do_swap_page(vma->vm_mm, vma, vmf->address, pte, vmf->orig_pte);
         vmf->orig_pte = pte;
   
         /* ksm created a completely new copy */
@@@ -3672,6 -3650,9 +3671,9 @@@
                 do_page_add_anon_rmap(page, vma, vmf->address, exclusive);
         }
   
+       set_pte_at(vma->vm_mm, vmf->address, vmf->pte, pte);
+       arch_do_swap_page(vma->vm_mm, vma, vmf->address, pte, vmf->orig_pte);
+ 
         swap_free(entry);
         if (mem_cgroup_swap_full(page) ||
             (vma->vm_flags & VM_LOCKED) || PageMlocked(page))
diff --combined mm/memremap.c

index 643965da13a60b0d4611efac33a7e17112767f08,a2869d8519a207189c24a72ed068a8816dde4eb3..6aa5f0c2d11fda56628243cbbf38561ae3bc03b9
--- 1/mm/memremap.c
--- 2/mm/memremap.c
+++ b/mm/memremap.c
@@@ -102,16 -102,47 +102,23 @@@ static unsigned long pfn_end(struct dev
         return (range->start + range_len(range)) >> PAGE_SHIFT;
   }
   
- static unsigned long pfn_next(unsigned long pfn)
+ static unsigned long pfn_next(struct dev_pagemap *pgmap, unsigned long pfn)
   {
-       if (pfn % 1024 == 0)
+       if (pfn % (1024 << pgmap->vmemmap_shift))
                 cond_resched();
-       return pfn + 1;
+       return pfn + pgmap_vmemmap_nr(pgmap);
+ }
+ 
+ static unsigned long pfn_len(struct dev_pagemap *pgmap, unsigned long range_id)
+ {
+       return (pfn_end(pgmap, range_id) -
+               pfn_first(pgmap, range_id)) >> pgmap->vmemmap_shift;
   }
   
   #define for_each_device_pfn(pfn, map, i) \
-       for (pfn = pfn_first(map, i); pfn < pfn_end(map, i); pfn = pfn_next(pfn))
+       for (pfn = pfn_first(map, i); pfn < pfn_end(map, i); \
+            pfn = pfn_next(map, pfn))
   
- -static void dev_pagemap_kill(struct dev_pagemap *pgmap)
- -{
- -      if (pgmap->ops && pgmap->ops->kill)
- -              pgmap->ops->kill(pgmap);
- -      else
- -              percpu_ref_kill(pgmap->ref);
- -}
- -
- -static void dev_pagemap_cleanup(struct dev_pagemap *pgmap)
- -{
- -      if (pgmap->ops && pgmap->ops->cleanup) {
- -              pgmap->ops->cleanup(pgmap);
- -      } else {
- -              wait_for_completion(&pgmap->done);
- -              percpu_ref_exit(pgmap->ref);
- -      }
- -      /*
- -       * Undo the pgmap ref assignment for the internal case as the
- -       * caller may re-enable the same pgmap.
- -       */
- -      if (pgmap->ref == &pgmap->internal_ref)
- -              pgmap->ref = NULL;
- -}
- -
   static void pageunmap_range(struct dev_pagemap *pgmap, int range_id)
   {
         struct range *range = &pgmap->ranges[range_id];
@@@ -143,12 -174,11 +150,12 @@@ void memunmap_pages(struct dev_pagemap 
         unsigned long pfn;
         int i;
   
- -      dev_pagemap_kill(pgmap);
+ +      percpu_ref_kill(&pgmap->ref);
         for (i = 0; i < pgmap->nr_range; i++)
                 for_each_device_pfn(pfn, pgmap, i)
                         put_page(pfn_to_page(pfn));
- -      dev_pagemap_cleanup(pgmap);
+ +      wait_for_completion(&pgmap->done);
+ +      percpu_ref_exit(&pgmap->ref);
   
         for (i = 0; i < pgmap->nr_range; i++)
                 pageunmap_range(pgmap, i);
@@@ -165,7 -195,8 +172,7 @@@ static void devm_memremap_pages_release
   
   static void dev_pagemap_percpu_release(struct percpu_ref *ref)
   {
- -      struct dev_pagemap *pgmap =
- -              container_of(ref, struct dev_pagemap, internal_ref);
+ +      struct dev_pagemap *pgmap = container_of(ref, struct dev_pagemap, ref);
   
         complete(&pgmap->done);
   }
@@@ -271,8 -302,7 +278,7 @@@ static int pagemap_range(struct dev_pag
         memmap_init_zone_device(&NODE_DATA(nid)->node_zones[ZONE_DEVICE],
                                 PHYS_PFN(range->start),
                                 PHYS_PFN(range_len(range)), pgmap);
-       percpu_ref_get_many(&pgmap->ref,
-               pfn_end(pgmap, range_id) - pfn_first(pgmap, range_id));
- -      percpu_ref_get_many(pgmap->ref, pfn_len(pgmap, range_id));
++      percpu_ref_get_many(&pgmap->ref, pfn_len(pgmap, range_id));
         return 0;
   
   err_add_memory:
@@@ -338,11 -368,22 +344,11 @@@ void *memremap_pages(struct dev_pagema
                 break;
         }
   
- -      if (!pgmap->ref) {
- -              if (pgmap->ops && (pgmap->ops->kill || pgmap->ops->cleanup))
- -                      return ERR_PTR(-EINVAL);
- -
- -              init_completion(&pgmap->done);
- -              error = percpu_ref_init(&pgmap->internal_ref,
- -                              dev_pagemap_percpu_release, 0, GFP_KERNEL);
- -              if (error)
- -                      return ERR_PTR(error);
- -              pgmap->ref = &pgmap->internal_ref;
- -      } else {
- -              if (!pgmap->ops || !pgmap->ops->kill || !pgmap->ops->cleanup) {
- -                      WARN(1, "Missing reference count teardown definition\n");
- -                      return ERR_PTR(-EINVAL);
- -              }
- -      }
+ +      init_completion(&pgmap->done);
+ +      error = percpu_ref_init(&pgmap->ref, dev_pagemap_percpu_release, 0,
+ +                              GFP_KERNEL);
+ +      if (error)
+ +              return ERR_PTR(error);
   
         devmap_managed_enable_get(pgmap);
   
@@@ -451,7 -492,7 +457,7 @@@ struct dev_pagemap *get_dev_pagemap(uns
         /* fall back to slow path lookup */
         rcu_read_lock();
         pgmap = xa_load(&pgmap_array, PHYS_PFN(phys));
- -      if (pgmap && !percpu_ref_tryget_live(pgmap->ref))
+ +      if (pgmap && !percpu_ref_tryget_live(&pgmap->ref))
                 pgmap = NULL;
         rcu_read_unlock();
   
diff --combined mm/migrate.c

index 7079e6b7dbe7d1a5e8f05ed9dbba4828422be5ef,05af2b2336b97afb2bccc67fdbef2925e64e6dcd..18ce840914f0d9b1b5bee825a0a0bd260899e583
--- 1/mm/migrate.c
--- 2/mm/migrate.c
+++ b/mm/migrate.c
@@@ -50,6 -50,7 +50,7 @@@
   #include <linux/ptrace.h>
   #include <linux/oom.h>
   #include <linux/memory.h>
+ #include <linux/random.h>
   
   #include <asm/tlbflush.h>
   
@@@ -236,20 -237,19 +237,19 @@@ static bool remove_migration_pte(struc
   
                         pte = pte_mkhuge(pte);
                         pte = arch_make_huge_pte(pte, shift, vma->vm_flags);
-                       set_huge_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte);
                         if (PageAnon(new))
                                 hugepage_add_anon_rmap(new, vma, pvmw.address);
                         else
                                 page_dup_rmap(new, true);
+                       set_huge_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte);
                 } else
   #endif
                 {
-                       set_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte);
- 
                         if (PageAnon(new))
                                 page_add_anon_rmap(new, vma, pvmw.address, false);
                         else
                                 page_add_file_rmap(new, false);
+                       set_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte);
                 }
                 if (vma->vm_flags & VM_LOCKED && !PageTransCompound(new))
                         mlock_vma_page(new);
@@@ -291,7 -291,7 +291,7 @@@ void __migration_entry_wait(struct mm_s
   {
         pte_t pte;
         swp_entry_t entry;
- -      struct page *page;
+ +      struct folio *folio;
   
         spin_lock(ptl);
         pte = *ptep;
@@@ -302,17 -302,18 +302,17 @@@
         if (!is_migration_entry(entry))
                 goto out;
   
- -      page = pfn_swap_entry_to_page(entry);
- -      page = compound_head(page);
+ +      folio = page_folio(pfn_swap_entry_to_page(entry));
   
         /*
          * Once page cache replacement of page migration started, page_count
- -       * is zero; but we must not call put_and_wait_on_page_locked() without
- -       * a ref. Use get_page_unless_zero(), and just fault again if it fails.
+ +       * is zero; but we must not call folio_put_wait_locked() without
+ +       * a ref. Use folio_try_get(), and just fault again if it fails.
          */
- -      if (!get_page_unless_zero(page))
+ +      if (!folio_try_get(folio))
                 goto out;
         pte_unmap_unlock(ptep, ptl);
- -      put_and_wait_on_page_locked(page, TASK_UNINTERRUPTIBLE);
+ +      folio_put_wait_locked(folio, TASK_UNINTERRUPTIBLE);
         return;
   out:
         pte_unmap_unlock(ptep, ptl);
@@@ -337,16 -338,16 +337,16 @@@ void migration_entry_wait_huge(struct v
   void pmd_migration_entry_wait(struct mm_struct *mm, pmd_t *pmd)
   {
         spinlock_t *ptl;
- -      struct page *page;
+ +      struct folio *folio;
   
         ptl = pmd_lock(mm, pmd);
         if (!is_pmd_migration_entry(*pmd))
                 goto unlock;
- -      page = pfn_swap_entry_to_page(pmd_to_swp_entry(*pmd));
- -      if (!get_page_unless_zero(page))
+ +      folio = page_folio(pfn_swap_entry_to_page(pmd_to_swp_entry(*pmd)));
+ +      if (!folio_try_get(folio))
                 goto unlock;
         spin_unlock(ptl);
- -      put_and_wait_on_page_locked(page, TASK_UNINTERRUPTIBLE);
+ +      folio_put_wait_locked(folio, TASK_UNINTERRUPTIBLE);
         return;
   unlock:
         spin_unlock(ptl);
@@@ -433,6 -434,14 +433,6 @@@ int folio_migrate_mapping(struct addres
         }
   
         xas_store(&xas, newfolio);
- -      if (nr > 1) {
- -              int i;
- -
- -              for (i = 1; i < nr; i++) {
- -                      xas_next(&xas);
- -                      xas_store(&xas, newfolio);
- -              }
- -      }
   
         /*
          * Drop cache reference from old page by unfreezing
@@@ -1084,80 -1093,6 +1084,6 @@@ out
         return rc;
   }
   
- 
- /*
-  * node_demotion[] example:
-  *
-  * Consider a system with two sockets.  Each socket has
-  * three classes of memory attached: fast, medium and slow.
-  * Each memory class is placed in its own NUMA node.  The
-  * CPUs are placed in the node with the "fast" memory.  The
-  * 6 NUMA nodes (0-5) might be split among the sockets like
-  * this:
-  *
-  *    Socket A: 0, 1, 2
-  *    Socket B: 3, 4, 5
-  *
-  * When Node 0 fills up, its memory should be migrated to
-  * Node 1.  When Node 1 fills up, it should be migrated to
-  * Node 2.  The migration path start on the nodes with the
-  * processors (since allocations default to this node) and
-  * fast memory, progress through medium and end with the
-  * slow memory:
-  *
-  *    0 -> 1 -> 2 -> stop
-  *    3 -> 4 -> 5 -> stop
-  *
-  * This is represented in the node_demotion[] like this:
-  *
-  *    {  1, // Node 0 migrates to 1
-  *       2, // Node 1 migrates to 2
-  *      -1, // Node 2 does not migrate
-  *       4, // Node 3 migrates to 4
-  *       5, // Node 4 migrates to 5
-  *      -1} // Node 5 does not migrate
-  */
- 
- /*
-  * Writes to this array occur without locking.  Cycles are
-  * not allowed: Node X demotes to Y which demotes to X...
-  *
-  * If multiple reads are performed, a single rcu_read_lock()
-  * must be held over all reads to ensure that no cycles are
-  * observed.
-  */
- static int node_demotion[MAX_NUMNODES] __read_mostly =
-       {[0 ...  MAX_NUMNODES - 1] = NUMA_NO_NODE};
- 
- /**
-  * next_demotion_node() - Get the next node in the demotion path
-  * @node: The starting node to lookup the next node
-  *
-  * Return: node id for next memory node in the demotion path hierarchy
-  * from @node; NUMA_NO_NODE if @node is terminal.  This does not keep
-  * @node online or guarantee that it *continues* to be the next demotion
-  * target.
-  */
- int next_demotion_node(int node)
- {
-       int target;
- 
-       /*
-        * node_demotion[] is updated without excluding this
-        * function from running.  RCU doesn't provide any
-        * compiler barriers, so the READ_ONCE() is required
-        * to avoid compiler reordering or read merging.
-        *
-        * Make sure to use RCU over entire code blocks if
-        * node_demotion[] reads need to be consistent.
-        */
-       rcu_read_lock();
-       target = READ_ONCE(node_demotion[node]);
-       rcu_read_unlock();
- 
-       return target;
- }
- 
   /*
    * Obtain the lock on page, remove all ptes and migrate the page
    * to the newly allocated page in newpage.
@@@ -1413,7 -1348,7 +1339,7 @@@ static inline int try_split_thp(struct 
    * @mode:             The migration mode that specifies the constraints for
    *                    page migration, if any.
    * @reason:           The reason for page migration.
-  * @ret_succeeded:    Set to the number of pages migrated successfully if
+  * @ret_succeeded:    Set to the number of normal pages migrated successfully if
    *                    the caller passes a non-NULL pointer.
    *
    * The function returns after 10 attempts or if no pages are movable any more
@@@ -1421,7 -1356,9 +1347,9 @@@
    * It is caller's responsibility to call putback_movable_pages() to return pages
    * to the LRU or free list only if ret != 0.
    *
-  * Returns the number of pages that were not migrated, or an error code.
+  * Returns the number of {normal page, THP, hugetlb} that were not migrated, or
+  * an error code. The number of THP splits will be considered as the number of
+  * non-migrated THP, no matter how many subpages of the THP are migrated successfully.
    */
   int migrate_pages(struct list_head *from, new_page_t get_new_page,
                 free_page_t put_new_page, unsigned long private,
@@@ -1430,6 -1367,7 +1358,7 @@@
         int retry = 1;
         int thp_retry = 1;
         int nr_failed = 0;
+       int nr_failed_pages = 0;
         int nr_succeeded = 0;
         int nr_thp_succeeded = 0;
         int nr_thp_failed = 0;
@@@ -1441,13 -1379,16 +1370,16 @@@
         int swapwrite = current->flags & PF_SWAPWRITE;
         int rc, nr_subpages;
         LIST_HEAD(ret_pages);
+       LIST_HEAD(thp_split_pages);
         bool nosplit = (reason == MR_NUMA_MISPLACED);
+       bool no_subpage_counting = false;
   
         trace_mm_migrate_pages_start(mode, reason);
   
         if (!swapwrite)
                 current->flags |= PF_SWAPWRITE;
   
+ thp_subpage_migration:
         for (pass = 0; pass < 10 && (retry || thp_retry); pass++) {
                 retry = 0;
                 thp_retry = 0;
@@@ -1460,7 -1401,7 +1392,7 @@@ retry
                          * during migration.
                          */
                         is_thp = PageTransHuge(page) && !PageHuge(page);
-                       nr_subpages = thp_nr_pages(page);
+                       nr_subpages = compound_nr(page);
                         cond_resched();
   
                         if (PageHuge(page))
@@@ -1496,18 -1437,20 +1428,20 @@@
                         case -ENOSYS:
                                 /* THP migration is unsupported */
                                 if (is_thp) {
-                                       if (!try_split_thp(page, &page2, from)) {
+                                       nr_thp_failed++;
+                                       if (!try_split_thp(page, &page2, &thp_split_pages)) {
                                                 nr_thp_split++;
                                                 goto retry;
                                         }
   
-                                       nr_thp_failed++;
-                                       nr_failed += nr_subpages;
+                                       nr_failed_pages += nr_subpages;
                                         break;
                                 }
   
                                 /* Hugetlb migration is unsupported */
-                               nr_failed++;
+                               if (!no_subpage_counting)
+                                       nr_failed++;
+                               nr_failed_pages += nr_subpages;
                                 break;
                         case -ENOMEM:
                                 /*
@@@ -1516,16 -1459,19 +1450,19 @@@
                                  * THP NUMA faulting doesn't split THP to retry.
                                  */
                                 if (is_thp && !nosplit) {
-                                       if (!try_split_thp(page, &page2, from)) {
+                                       nr_thp_failed++;
+                                       if (!try_split_thp(page, &page2, &thp_split_pages)) {
                                                 nr_thp_split++;
                                                 goto retry;
                                         }
   
-                                       nr_thp_failed++;
-                                       nr_failed += nr_subpages;
+                                       nr_failed_pages += nr_subpages;
                                         goto out;
                                 }
-                               nr_failed++;
+ 
+                               if (!no_subpage_counting)
+                                       nr_failed++;
+                               nr_failed_pages += nr_subpages;
                                 goto out;
                         case -EAGAIN:
                                 if (is_thp) {
@@@ -1535,12 -1481,11 +1472,11 @@@
                                 retry++;
                                 break;
                         case MIGRATEPAGE_SUCCESS:
+                               nr_succeeded += nr_subpages;
                                 if (is_thp) {
                                         nr_thp_succeeded++;
-                                       nr_succeeded += nr_subpages;
                                         break;
                                 }
-                               nr_succeeded++;
                                 break;
                         default:
                                 /*
@@@ -1551,17 -1496,37 +1487,37 @@@
                                  */
                                 if (is_thp) {
                                         nr_thp_failed++;
-                                       nr_failed += nr_subpages;
+                                       nr_failed_pages += nr_subpages;
                                         break;
                                 }
-                               nr_failed++;
+ 
+                               if (!no_subpage_counting)
+                                       nr_failed++;
+                               nr_failed_pages += nr_subpages;
                                 break;
                         }
                 }
         }
-       nr_failed += retry + thp_retry;
+       nr_failed += retry;
         nr_thp_failed += thp_retry;
-       rc = nr_failed;
+       /*
+        * Try to migrate subpages of fail-to-migrate THPs, no nr_failed
+        * counting in this round, since all subpages of a THP is counted
+        * as 1 failure in the first round.
+        */
+       if (!list_empty(&thp_split_pages)) {
+               /*
+                * Move non-migrated pages (after 10 retries) to ret_pages
+                * to avoid migrating them again.
+                */
+               list_splice_init(from, &ret_pages);
+               list_splice_init(&thp_split_pages, from);
+               no_subpage_counting = true;
+               retry = 1;
+               goto thp_subpage_migration;
+       }
+ 
+       rc = nr_failed + nr_thp_failed;
   out:
         /*
          * Put the permanent failure page back to migration list, they
@@@ -1570,11 -1535,11 +1526,11 @@@
         list_splice(&ret_pages, from);
   
         count_vm_events(PGMIGRATE_SUCCESS, nr_succeeded);
-       count_vm_events(PGMIGRATE_FAIL, nr_failed);
+       count_vm_events(PGMIGRATE_FAIL, nr_failed_pages);
         count_vm_events(THP_MIGRATION_SUCCESS, nr_thp_succeeded);
         count_vm_events(THP_MIGRATION_FAIL, nr_thp_failed);
         count_vm_events(THP_MIGRATION_SPLIT, nr_thp_split);
-       trace_mm_migrate_pages(nr_succeeded, nr_failed, nr_thp_succeeded,
+       trace_mm_migrate_pages(nr_succeeded, nr_failed_pages, nr_thp_succeeded,
                                nr_thp_failed, nr_thp_split, mode, reason);
   
         if (!swapwrite)
@@@ -2516,8 -2481,7 +2472,7 @@@ static bool migrate_vma_check_page(stru
   static void migrate_vma_unmap(struct migrate_vma *migrate)
   {
         const unsigned long npages = migrate->npages;
-       const unsigned long start = migrate->start;
-       unsigned long addr, i, restore = 0;
+       unsigned long i, restore = 0;
         bool allow_drain = true;
   
         lru_add_drain();
@@@ -2563,7 -2527,7 +2518,7 @@@
                 }
         }
   
-       for (addr = start, i = 0; i < npages && restore; addr += PAGE_SIZE, i++) {
+       for (i = 0; i < npages && restore; i++) {
                 struct page *page = migrate_pfn_to_page(migrate->src[i]);
   
                 if (!page || (migrate->src[i] & MIGRATE_PFN_MIGRATE))
@@@ -2961,14 -2925,152 +2916,152 @@@ void migrate_vma_finalize(struct migrat
   EXPORT_SYMBOL(migrate_vma_finalize);
   #endif /* CONFIG_DEVICE_PRIVATE */
   
+ /*
+  * node_demotion[] example:
+  *
+  * Consider a system with two sockets.  Each socket has
+  * three classes of memory attached: fast, medium and slow.
+  * Each memory class is placed in its own NUMA node.  The
+  * CPUs are placed in the node with the "fast" memory.  The
+  * 6 NUMA nodes (0-5) might be split among the sockets like
+  * this:
+  *
+  *    Socket A: 0, 1, 2
+  *    Socket B: 3, 4, 5
+  *
+  * When Node 0 fills up, its memory should be migrated to
+  * Node 1.  When Node 1 fills up, it should be migrated to
+  * Node 2.  The migration path start on the nodes with the
+  * processors (since allocations default to this node) and
+  * fast memory, progress through medium and end with the
+  * slow memory:
+  *
+  *    0 -> 1 -> 2 -> stop
+  *    3 -> 4 -> 5 -> stop
+  *
+  * This is represented in the node_demotion[] like this:
+  *
+  *    {  nr=1, nodes[0]=1 }, // Node 0 migrates to 1
+  *    {  nr=1, nodes[0]=2 }, // Node 1 migrates to 2
+  *    {  nr=0, nodes[0]=-1 }, // Node 2 does not migrate
+  *    {  nr=1, nodes[0]=4 }, // Node 3 migrates to 4
+  *    {  nr=1, nodes[0]=5 }, // Node 4 migrates to 5
+  *    {  nr=0, nodes[0]=-1 }, // Node 5 does not migrate
+  *
+  * Moreover some systems may have multiple slow memory nodes.
+  * Suppose a system has one socket with 3 memory nodes, node 0
+  * is fast memory type, and node 1/2 both are slow memory
+  * type, and the distance between fast memory node and slow
+  * memory node is same. So the migration path should be:
+  *
+  *    0 -> 1/2 -> stop
+  *
+  * This is represented in the node_demotion[] like this:
+  *    { nr=2, {nodes[0]=1, nodes[1]=2} }, // Node 0 migrates to node 1 and node 2
+  *    { nr=0, nodes[0]=-1, }, // Node 1 dose not migrate
+  *    { nr=0, nodes[0]=-1, }, // Node 2 does not migrate
+  */
+ 
+ /*
+  * Writes to this array occur without locking.  Cycles are
+  * not allowed: Node X demotes to Y which demotes to X...
+  *
+  * If multiple reads are performed, a single rcu_read_lock()
+  * must be held over all reads to ensure that no cycles are
+  * observed.
+  */
+ #define DEFAULT_DEMOTION_TARGET_NODES 15
+ 
+ #if MAX_NUMNODES < DEFAULT_DEMOTION_TARGET_NODES
+ #define DEMOTION_TARGET_NODES (MAX_NUMNODES - 1)
+ #else
+ #define DEMOTION_TARGET_NODES DEFAULT_DEMOTION_TARGET_NODES
+ #endif
+ 
+ struct demotion_nodes {
+       unsigned short nr;
+       short nodes[DEMOTION_TARGET_NODES];
+ };
+ 
+ static struct demotion_nodes *node_demotion __read_mostly;
+ 
+ /**
+  * next_demotion_node() - Get the next node in the demotion path
+  * @node: The starting node to lookup the next node
+  *
+  * Return: node id for next memory node in the demotion path hierarchy
+  * from @node; NUMA_NO_NODE if @node is terminal.  This does not keep
+  * @node online or guarantee that it *continues* to be the next demotion
+  * target.
+  */
+ int next_demotion_node(int node)
+ {
+       struct demotion_nodes *nd;
+       unsigned short target_nr, index;
+       int target;
+ 
+       if (!node_demotion)
+               return NUMA_NO_NODE;
+ 
+       nd = &node_demotion[node];
+ 
+       /*
+        * node_demotion[] is updated without excluding this
+        * function from running.  RCU doesn't provide any
+        * compiler barriers, so the READ_ONCE() is required
+        * to avoid compiler reordering or read merging.
+        *
+        * Make sure to use RCU over entire code blocks if
+        * node_demotion[] reads need to be consistent.
+        */
+       rcu_read_lock();
+       target_nr = READ_ONCE(nd->nr);
+ 
+       switch (target_nr) {
+       case 0:
+               target = NUMA_NO_NODE;
+               goto out;
+       case 1:
+               index = 0;
+               break;
+       default:
+               /*
+                * If there are multiple target nodes, just select one
+                * target node randomly.
+                *
+                * In addition, we can also use round-robin to select
+                * target node, but we should introduce another variable
+                * for node_demotion[] to record last selected target node,
+                * that may cause cache ping-pong due to the changing of
+                * last target node. Or introducing per-cpu data to avoid
+                * caching issue, which seems more complicated. So selecting
+                * target node randomly seems better until now.
+                */
+               index = get_random_int() % target_nr;
+               break;
+       }
+ 
+       target = READ_ONCE(nd->nodes[index]);
+ 
+ out:
+       rcu_read_unlock();
+       return target;
+ }
+ 
   #if defined(CONFIG_HOTPLUG_CPU)
   /* Disable reclaim-based migration. */
   static void __disable_all_migrate_targets(void)
   {
-       int node;
+       int node, i;
+ 
+       if (!node_demotion)
+               return;
   
-       for_each_online_node(node)
-               node_demotion[node] = NUMA_NO_NODE;
+       for_each_online_node(node) {
+               node_demotion[node].nr = 0;
+               for (i = 0; i < DEMOTION_TARGET_NODES; i++)
+                       node_demotion[node].nodes[i] = NUMA_NO_NODE;
+       }
   }
   
   static void disable_all_migrate_targets(void)
@@@ -2995,26 -3097,40 +3088,40 @@@
    * Failing here is OK.  It might just indicate
    * being at the end of a chain.
    */
- static int establish_migrate_target(int node, nodemask_t *used)
+ static int establish_migrate_target(int node, nodemask_t *used,
+                                   int best_distance)
   {
-       int migration_target;
+       int migration_target, index, val;
+       struct demotion_nodes *nd;
   
-       /*
-        * Can not set a migration target on a
-        * node with it already set.
-        *
-        * No need for READ_ONCE() here since this
-        * in the write path for node_demotion[].
-        * This should be the only thread writing.
-        */
-       if (node_demotion[node] != NUMA_NO_NODE)
+       if (!node_demotion)
                 return NUMA_NO_NODE;
   
+       nd = &node_demotion[node];
+ 
         migration_target = find_next_best_node(node, used);
         if (migration_target == NUMA_NO_NODE)
                 return NUMA_NO_NODE;
   
-       node_demotion[node] = migration_target;
+       /*
+        * If the node has been set a migration target node before,
+        * which means it's the best distance between them. Still
+        * check if this node can be demoted to other target nodes
+        * if they have a same best distance.
+        */
+       if (best_distance != -1) {
+               val = node_distance(node, migration_target);
+               if (val > best_distance)
+                       return NUMA_NO_NODE;
+       }
+ 
+       index = nd->nr;
+       if (WARN_ONCE(index >= DEMOTION_TARGET_NODES,
+                     "Exceeds maximum demotion target nodes\n"))
+               return NUMA_NO_NODE;
+ 
+       nd->nodes[index] = migration_target;
+       nd->nr++;
   
         return migration_target;
   }
@@@ -3030,7 -3146,9 +3137,9 @@@
    *
    * The difference here is that cycles must be avoided.  If
    * node0 migrates to node1, then neither node1, nor anything
-  * node1 migrates to can migrate to node0.
+  * node1 migrates to can migrate to node0. Also one node can
+  * be migrated to multiple nodes if the target nodes all have
+  * a same best-distance against the source node.
    *
    * This function can run simultaneously with readers of
    * node_demotion[].  However, it can not run simultaneously
@@@ -3042,7 -3160,7 +3151,7 @@@ static void __set_migration_target_node
         nodemask_t next_pass    = NODE_MASK_NONE;
         nodemask_t this_pass    = NODE_MASK_NONE;
         nodemask_t used_targets = NODE_MASK_NONE;
-       int node;
+       int node, best_distance;
   
         /*
          * Avoid any oddities like cycles that could occur
@@@ -3071,18 -3189,33 +3180,33 @@@ again
          * multiple source nodes to share a destination.
          */
         nodes_or(used_targets, used_targets, this_pass);
-       for_each_node_mask(node, this_pass) {
-               int target_node = establish_migrate_target(node, &used_targets);
   
-               if (target_node == NUMA_NO_NODE)
-                       continue;
+       for_each_node_mask(node, this_pass) {
+               best_distance = -1;
   
                 /*
-                * Visit targets from this pass in the next pass.
-                * Eventually, every node will have been part of
-                * a pass, and will become set in 'used_targets'.
+                * Try to set up the migration path for the node, and the target
+                * migration nodes can be multiple, so doing a loop to find all
+                * the target nodes if they all have a best node distance.
                  */
-               node_set(target_node, next_pass);
+               do {
+                       int target_node =
+                               establish_migrate_target(node, &used_targets,
+                                                        best_distance);
+ 
+                       if (target_node == NUMA_NO_NODE)
+                               break;
+ 
+                       if (best_distance == -1)
+                               best_distance = node_distance(node, target_node);
+ 
+                       /*
+                        * Visit targets from this pass in the next pass.
+                        * Eventually, every node will have been part of
+                        * a pass, and will become set in 'used_targets'.
+                        */
+                       node_set(target_node, next_pass);
+               } while (1);
         }
         /*
          * 'next_pass' contains nodes which became migration
@@@ -3183,6 -3316,11 +3307,11 @@@ static int __init migrate_on_reclaim_in
   {
         int ret;
   
+       node_demotion = kmalloc_array(nr_node_ids,
+                                     sizeof(struct demotion_nodes),
+                                     GFP_KERNEL);
+       WARN_ON(!node_demotion);
+ 
         ret = cpuhp_setup_state_nocalls(CPUHP_MM_DEMOTION_DEAD, "mm/demotion:offline",
                                         NULL, migration_offline_cpu);
         /*
diff --combined mm/shmem.c

index 28d627444a243fd9704dbc74feb07bafa43b6858,0700e9acf53bcf9add22e6374430dde4950c2470..66909efd0a1b25bee37b773aecbb550b13c0175c
--- 1/mm/shmem.c
--- 2/mm/shmem.c
+++ b/mm/shmem.c
@@@ -554,7 -554,7 +554,7 @@@ static unsigned long shmem_unused_huge_
         struct shmem_inode_info *info;
         struct page *page;
         unsigned long batch = sc ? sc->nr_to_scan : 128;
-       int removed = 0, split = 0;
+       int split = 0;
   
         if (list_empty(&sbinfo->shrinklist))
                 return SHRINK_STOP;
@@@ -569,7 -569,6 +569,6 @@@
                 /* inode is about to be evicted */
                 if (!inode) {
                         list_del_init(&info->shrinklist);
-                       removed++;
                         goto next;
                 }
   
@@@ -577,12 -576,12 +576,12 @@@
                 if (round_up(inode->i_size, PAGE_SIZE) ==
                                 round_up(inode->i_size, HPAGE_PMD_SIZE)) {
                         list_move(&info->shrinklist, &to_remove);
-                       removed++;
                         goto next;
                 }
   
                 list_move(&info->shrinklist, &list);
   next:
+               sbinfo->shrinklist_len--;
                 if (!--batch)
                         break;
         }
@@@ -602,7 -601,7 +601,7 @@@
                 inode = &info->vfs_inode;
   
                 if (nr_to_split && split >= nr_to_split)
-                       goto leave;
+                       goto move_back;
   
                 page = find_get_page(inode->i_mapping,
                                 (inode->i_size & HPAGE_PMD_MASK) >> PAGE_SHIFT);
@@@ -616,38 -615,44 +615,44 @@@
                 }
   
                 /*
-                * Leave the inode on the list if we failed to lock
-                * the page at this time.
+                * Move the inode on the list back to shrinklist if we failed
+                * to lock the page at this time.
                  *
                  * Waiting for the lock may lead to deadlock in the
                  * reclaim path.
                  */
                 if (!trylock_page(page)) {
                         put_page(page);
-                       goto leave;
+                       goto move_back;
                 }
   
                 ret = split_huge_page(page);
                 unlock_page(page);
                 put_page(page);
   
-               /* If split failed leave the inode on the list */
+               /* If split failed move the inode on the list back to shrinklist */
                 if (ret)
-                       goto leave;
+                       goto move_back;
   
                 split++;
   drop:
                 list_del_init(&info->shrinklist);
-               removed++;
- leave:
+               goto put;
+ move_back:
+               /*
+                * Make sure the inode is either on the global list or deleted
+                * from any local list before iput() since it could be deleted
+                * in another thread once we put the inode (then the local list
+                * is corrupted).
+                */
+               spin_lock(&sbinfo->shrinklist_lock);
+               list_move(&info->shrinklist, &sbinfo->shrinklist);
+               sbinfo->shrinklist_len++;
+               spin_unlock(&sbinfo->shrinklist_lock);
+ put:
                 iput(inode);
         }
   
-       spin_lock(&sbinfo->shrinklist_lock);
-       list_splice_tail(&list, &sbinfo->shrinklist);
-       sbinfo->shrinklist_len -= removed;
-       spin_unlock(&sbinfo->shrinklist_lock);
- 
         return split;
   }
   
@@@ -694,6 -699,7 +699,6 @@@ static int shmem_add_to_page_cache(stru
                                    struct mm_struct *charge_mm)
   {
         XA_STATE_ORDER(xas, &mapping->i_pages, index, compound_order(page));
- -      unsigned long i = 0;
         unsigned long nr = compound_nr(page);
         int error;
   
@@@ -720,18 -726,20 +725,18 @@@
         cgroup_throttle_swaprate(page, gfp);
   
         do {
- -              void *entry;
                 xas_lock_irq(&xas);
- -              entry = xas_find_conflict(&xas);
- -              if (entry != expected)
+ +              if (expected != xas_find_conflict(&xas)) {
                         xas_set_err(&xas, -EEXIST);
- -              xas_create_range(&xas);
- -              if (xas_error(&xas))
                         goto unlock;
- -next:
- -              xas_store(&xas, page);
- -              if (++i < nr) {
- -                      xas_next(&xas);
- -                      goto next;
                 }
+ +              if (expected && xas_find_conflict(&xas)) {
+ +                      xas_set_err(&xas, -EEXIST);
+ +                      goto unlock;
+ +              }
+ +              xas_store(&xas, page);
+ +              if (xas_error(&xas))
+ +                      goto unlock;
                 if (PageTransHuge(page)) {
                         count_vm_event(THP_FILE_ALLOC);
                         __mod_lruvec_page_state(page, NR_SHMEM_THPS, nr);
@@@ -877,26 -885,30 +882,26 @@@ void shmem_unlock_mapping(struct addres
         }
   }
   
- -/*
- - * Check whether a hole-punch or truncation needs to split a huge page,
- - * returning true if no split was required, or the split has been successful.
- - *
- - * Eviction (or truncation to 0 size) should never need to split a huge page;
- - * but in rare cases might do so, if shmem_undo_range() failed to trylock on
- - * head, and then succeeded to trylock on tail.
- - *
- - * A split can only succeed when there are no additional references on the
- - * huge page: so the split below relies upon find_get_entries() having stopped
- - * when it found a subpage of the huge page, without getting further references.
- - */
- -static bool shmem_punch_compound(struct page *page, pgoff_t start, pgoff_t end)
+ +static struct folio *shmem_get_partial_folio(struct inode *inode, pgoff_t index)
   {
- -      if (!PageTransCompound(page))
- -              return true;
- -
- -      /* Just proceed to delete a huge page wholly within the range punched */
- -      if (PageHead(page) &&
- -          page->index >= start && page->index + HPAGE_PMD_NR <= end)
- -              return true;
+ +      struct folio *folio;
+ +      struct page *page;
   
- -      /* Try to split huge page, so we can truly punch the hole or truncate */
- -      return split_huge_page(page) >= 0;
+ +      /*
+ +       * At first avoid shmem_getpage(,,,SGP_READ): that fails
+ +       * beyond i_size, and reports fallocated pages as holes.
+ +       */
+ +      folio = __filemap_get_folio(inode->i_mapping, index,
+ +                                      FGP_ENTRY | FGP_LOCK, 0);
+ +      if (!xa_is_value(folio))
+ +              return folio;
+ +      /*
+ +       * But read a page back from swap if any of it is within i_size
+ +       * (although in some cases this is just a waste of time).
+ +       */
+ +      page = NULL;
+ +      shmem_getpage(inode, index, &page, SGP_READ);
+ +      return page ? page_folio(page) : NULL;
   }
   
   /*
@@@ -910,10 -922,10 +915,10 @@@ static void shmem_undo_range(struct ino
         struct shmem_inode_info *info = SHMEM_I(inode);
         pgoff_t start = (lstart + PAGE_SIZE - 1) >> PAGE_SHIFT;
         pgoff_t end = (lend + 1) >> PAGE_SHIFT;
- -      unsigned int partial_start = lstart & (PAGE_SIZE - 1);
- -      unsigned int partial_end = (lend + 1) & (PAGE_SIZE - 1);
- -      struct pagevec pvec;
+ +      struct folio_batch fbatch;
         pgoff_t indices[PAGEVEC_SIZE];
+ +      struct folio *folio;
+ +      bool same_folio;
         long nr_swaps_freed = 0;
         pgoff_t index;
         int i;
@@@ -924,64 -936,67 +929,64 @@@
         if (info->fallocend > start && info->fallocend <= end && !unfalloc)
                 info->fallocend = start;
   
- -      pagevec_init(&pvec);
+ +      folio_batch_init(&fbatch);
         index = start;
         while (index < end && find_lock_entries(mapping, index, end - 1,
- -                      &pvec, indices)) {
- -              for (i = 0; i < pagevec_count(&pvec); i++) {
- -                      struct page *page = pvec.pages[i];
+ +                      &fbatch, indices)) {
+ +              for (i = 0; i < folio_batch_count(&fbatch); i++) {
+ +                      folio = fbatch.folios[i];
   
                         index = indices[i];
   
- -                      if (xa_is_value(page)) {
+ +                      if (xa_is_value(folio)) {
                                 if (unfalloc)
                                         continue;
                                 nr_swaps_freed += !shmem_free_swap(mapping,
- -                                                              index, page);
+ +                                                              index, folio);
                                 continue;
                         }
- -                      index += thp_nr_pages(page) - 1;
+ +                      index += folio_nr_pages(folio) - 1;
   
- -                      if (!unfalloc || !PageUptodate(page))
- -                              truncate_inode_page(mapping, page);
- -                      unlock_page(page);
+ +                      if (!unfalloc || !folio_test_uptodate(folio))
+ +                              truncate_inode_folio(mapping, folio);
+ +                      folio_unlock(folio);
                 }
- -              pagevec_remove_exceptionals(&pvec);
- -              pagevec_release(&pvec);
+ +              folio_batch_remove_exceptionals(&fbatch);
+ +              folio_batch_release(&fbatch);
                 cond_resched();
                 index++;
         }
   
- -      if (partial_start) {
- -              struct page *page = NULL;
- -              shmem_getpage(inode, start - 1, &page, SGP_READ);
- -              if (page) {
- -                      unsigned int top = PAGE_SIZE;
- -                      if (start > end) {
- -                              top = partial_end;
- -                              partial_end = 0;
- -                      }
- -                      zero_user_segment(page, partial_start, top);
- -                      set_page_dirty(page);
- -                      unlock_page(page);
- -                      put_page(page);
+ +      same_folio = (lstart >> PAGE_SHIFT) == (lend >> PAGE_SHIFT);
+ +      folio = shmem_get_partial_folio(inode, lstart >> PAGE_SHIFT);
+ +      if (folio) {
+ +              same_folio = lend < folio_pos(folio) + folio_size(folio);
+ +              folio_mark_dirty(folio);
+ +              if (!truncate_inode_partial_folio(folio, lstart, lend)) {
+ +                      start = folio->index + folio_nr_pages(folio);
+ +                      if (same_folio)
+ +                              end = folio->index;
                 }
+ +              folio_unlock(folio);
+ +              folio_put(folio);
+ +              folio = NULL;
         }
- -      if (partial_end) {
- -              struct page *page = NULL;
- -              shmem_getpage(inode, end, &page, SGP_READ);
- -              if (page) {
- -                      zero_user_segment(page, 0, partial_end);
- -                      set_page_dirty(page);
- -                      unlock_page(page);
- -                      put_page(page);
- -              }
+ +
+ +      if (!same_folio)
+ +              folio = shmem_get_partial_folio(inode, lend >> PAGE_SHIFT);
+ +      if (folio) {
+ +              folio_mark_dirty(folio);
+ +              if (!truncate_inode_partial_folio(folio, lstart, lend))
+ +                      end = folio->index;
+ +              folio_unlock(folio);
+ +              folio_put(folio);
         }
- -      if (start >= end)
- -              return;
   
         index = start;
         while (index < end) {
                 cond_resched();
   
- -              if (!find_get_entries(mapping, index, end - 1, &pvec,
+ +              if (!find_get_entries(mapping, index, end - 1, &fbatch,
                                 indices)) {
                         /* If all gone or hole-punch or unfalloc, we're done */
                         if (index == start || end != -1)
@@@ -990,14 -1005,14 +995,14 @@@
                         index = start;
                         continue;
                 }
- -              for (i = 0; i < pagevec_count(&pvec); i++) {
- -                      struct page *page = pvec.pages[i];
+ +              for (i = 0; i < folio_batch_count(&fbatch); i++) {
+ +                      folio = fbatch.folios[i];
   
                         index = indices[i];
- -                      if (xa_is_value(page)) {
+ +                      if (xa_is_value(folio)) {
                                 if (unfalloc)
                                         continue;
- -                              if (shmem_free_swap(mapping, index, page)) {
+ +                              if (shmem_free_swap(mapping, index, folio)) {
                                         /* Swap was replaced by page: retry */
                                         index--;
                                         break;
@@@ -1006,24 -1021,32 +1011,24 @@@
                                 continue;
                         }
   
- -                      lock_page(page);
+ +                      folio_lock(folio);
   
- -                      if (!unfalloc || !PageUptodate(page)) {
- -                              if (page_mapping(page) != mapping) {
+ +                      if (!unfalloc || !folio_test_uptodate(folio)) {
+ +                              if (folio_mapping(folio) != mapping) {
                                         /* Page was replaced by swap: retry */
- -                                      unlock_page(page);
+ +                                      folio_unlock(folio);
                                         index--;
                                         break;
                                 }
- -                              VM_BUG_ON_PAGE(PageWriteback(page), page);
- -                              if (shmem_punch_compound(page, start, end))
- -                                      truncate_inode_page(mapping, page);
- -                              else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
- -                                      /* Wipe the page and don't get stuck */
- -                                      clear_highpage(page);
- -                                      flush_dcache_page(page);
- -                                      set_page_dirty(page);
- -                                      if (index <
- -                                          round_up(start, HPAGE_PMD_NR))
- -                                              start = index + 1;
- -                              }
+ +                              VM_BUG_ON_FOLIO(folio_test_writeback(folio),
+ +                                              folio);
+ +                              truncate_inode_folio(mapping, folio);
                         }
- -                      unlock_page(page);
+ +                      index = folio->index + folio_nr_pages(folio) - 1;
+ +                      folio_unlock(folio);
                 }
- -              pagevec_remove_exceptionals(&pvec);
- -              pagevec_release(&pvec);
+ +              folio_batch_remove_exceptionals(&fbatch);
+ +              folio_batch_release(&fbatch);
                 index++;
         }
   
@@@ -1541,8 -1564,7 +1546,7 @@@ static struct page *shmem_alloc_hugepag
                 return NULL;
   
         shmem_pseudo_vma_init(&pvma, info, hindex);
-       page = alloc_pages_vma(gfp, HPAGE_PMD_ORDER, &pvma, 0, numa_node_id(),
-                              true);
+       page = alloc_pages_vma(gfp, HPAGE_PMD_ORDER, &pvma, 0, true);
         shmem_pseudo_vma_destroy(&pvma);
         if (page)
                 prep_transhuge_page(page);
@@@ -2439,6 -2461,7 +2443,7 @@@ shmem_write_begin(struct file *file, st
         struct inode *inode = mapping->host;
         struct shmem_inode_info *info = SHMEM_I(inode);
         pgoff_t index = pos >> PAGE_SHIFT;
+       int ret = 0;
   
         /* i_rwsem is held by caller */
         if (unlikely(info->seals & (F_SEAL_GROW |
@@@ -2449,7 -2472,19 +2454,19 @@@
                         return -EPERM;
         }
   
-       return shmem_getpage(inode, index, pagep, SGP_WRITE);
+       ret = shmem_getpage(inode, index, pagep, SGP_WRITE);
+ 
+       if (ret)
+               return ret;
+ 
+       if (PageHWPoison(*pagep)) {
+               unlock_page(*pagep);
+               put_page(*pagep);
+               *pagep = NULL;
+               return -EIO;
+       }
+ 
+       return 0;
   }
   
   static int
@@@ -2536,6 -2571,12 +2553,12 @@@ static ssize_t shmem_file_read_iter(str
                         if (sgp == SGP_CACHE)
                                 set_page_dirty(page);
                         unlock_page(page);
+ 
+                       if (PageHWPoison(page)) {
+                               put_page(page);
+                               error = -EIO;
+                               break;
+                       }
                 }
   
                 /*
@@@ -3075,7 -3116,8 +3098,8 @@@ static const char *shmem_get_link(struc
                 page = find_get_page(inode->i_mapping, 0);
                 if (!page)
                         return ERR_PTR(-ECHILD);
-               if (!PageUptodate(page)) {
+               if (PageHWPoison(page) ||
+                   !PageUptodate(page)) {
                         put_page(page);
                         return ERR_PTR(-ECHILD);
                 }
@@@ -3083,6 -3125,13 +3107,13 @@@
                 error = shmem_getpage(inode, 0, &page, SGP_READ);
                 if (error)
                         return ERR_PTR(error);
+               if (!page)
+                       return ERR_PTR(-ECHILD);
+               if (PageHWPoison(page)) {
+                       unlock_page(page);
+                       put_page(page);
+                       return ERR_PTR(-ECHILD);
+               }
                 unlock_page(page);
         }
         set_delayed_call(done, shmem_put_link, page);
@@@ -3733,6 -3782,13 +3764,13 @@@ static void shmem_destroy_inodecache(vo
         kmem_cache_destroy(shmem_inode_cachep);
   }
   
+ /* Keep the page in page cache instead of truncating it */
+ static int shmem_error_remove_page(struct address_space *mapping,
+                                  struct page *page)
+ {
+       return 0;
+ }
+ 
   const struct address_space_operations shmem_aops = {
         .writepage      = shmem_writepage,
         .set_page_dirty = __set_page_dirty_no_writeback,
@@@ -3743,7 -3799,7 +3781,7 @@@
   #ifdef CONFIG_MIGRATION
         .migratepage    = migrate_page,
   #endif
-       .error_remove_page = generic_error_remove_page,
+       .error_remove_page = shmem_error_remove_page,
   };
   EXPORT_SYMBOL(shmem_aops);
   
@@@ -4151,9 -4207,14 +4189,14 @@@ struct page *shmem_read_mapping_page_gf
         error = shmem_getpage_gfp(inode, index, &page, SGP_CACHE,
                                   gfp, NULL, NULL, NULL);
         if (error)
-               page = ERR_PTR(error);
-       else
-               unlock_page(page);
+               return ERR_PTR(error);
+ 
+       unlock_page(page);
+       if (PageHWPoison(page)) {
+               put_page(page);
+               return ERR_PTR(-EIO);
+       }
+ 
         return page;
   #else
         /*
diff --combined mm/slab.h

index 95b9a74a2d515fd83091253b786886be9cc08476,053eefaf6cbd7e23b2643d0c3720658f6b9d4056..7edb7d23f14126e96e1b8c74181a15bc0c932acc
--- 1/mm/slab.h
--- 2/mm/slab.h
+++ b/mm/slab.h
@@@ -5,197 -5,6 +5,197 @@@
    * Internal slab definitions
    */
   
+ +/* Reuses the bits in struct page */
+ +struct slab {
+ +      unsigned long __page_flags;
+ +
+ +#if defined(CONFIG_SLAB)
+ +
+ +      union {
+ +              struct list_head slab_list;
+ +              struct rcu_head rcu_head;
+ +      };
+ +      struct kmem_cache *slab_cache;
+ +      void *freelist; /* array of free object indexes */
+ +      void *s_mem;    /* first object */
+ +      unsigned int active;
+ +
+ +#elif defined(CONFIG_SLUB)
+ +
+ +      union {
+ +              struct list_head slab_list;
+ +              struct rcu_head rcu_head;
+ +#ifdef CONFIG_SLUB_CPU_PARTIAL
+ +              struct {
+ +                      struct slab *next;
+ +                      int slabs;      /* Nr of slabs left */
+ +              };
+ +#endif
+ +      };
+ +      struct kmem_cache *slab_cache;
+ +      /* Double-word boundary */
+ +      void *freelist;         /* first free object */
+ +      union {
+ +              unsigned long counters;
+ +              struct {
+ +                      unsigned inuse:16;
+ +                      unsigned objects:15;
+ +                      unsigned frozen:1;
+ +              };
+ +      };
+ +      unsigned int __unused;
+ +
+ +#elif defined(CONFIG_SLOB)
+ +
+ +      struct list_head slab_list;
+ +      void *__unused_1;
+ +      void *freelist;         /* first free block */
+ +      long units;
+ +      unsigned int __unused_2;
+ +
+ +#else
+ +#error "Unexpected slab allocator configured"
+ +#endif
+ +
+ +      atomic_t __page_refcount;
+ +#ifdef CONFIG_MEMCG
+ +      unsigned long memcg_data;
+ +#endif
+ +};
+ +
+ +#define SLAB_MATCH(pg, sl)                                            \
+ +      static_assert(offsetof(struct page, pg) == offsetof(struct slab, sl))
+ +SLAB_MATCH(flags, __page_flags);
+ +SLAB_MATCH(compound_head, slab_list); /* Ensure bit 0 is clear */
+ +SLAB_MATCH(slab_list, slab_list);
+ +#ifndef CONFIG_SLOB
+ +SLAB_MATCH(rcu_head, rcu_head);
+ +SLAB_MATCH(slab_cache, slab_cache);
+ +#endif
+ +#ifdef CONFIG_SLAB
+ +SLAB_MATCH(s_mem, s_mem);
+ +SLAB_MATCH(active, active);
+ +#endif
+ +SLAB_MATCH(_refcount, __page_refcount);
+ +#ifdef CONFIG_MEMCG
+ +SLAB_MATCH(memcg_data, memcg_data);
+ +#endif
+ +#undef SLAB_MATCH
+ +static_assert(sizeof(struct slab) <= sizeof(struct page));
+ +
+ +/**
+ + * folio_slab - Converts from folio to slab.
+ + * @folio: The folio.
+ + *
+ + * Currently struct slab is a different representation of a folio where
+ + * folio_test_slab() is true.
+ + *
+ + * Return: The slab which contains this folio.
+ + */
+ +#define folio_slab(folio)     (_Generic((folio),                      \
+ +      const struct folio *:   (const struct slab *)(folio),           \
+ +      struct folio *:         (struct slab *)(folio)))
+ +
+ +/**
+ + * slab_folio - The folio allocated for a slab
+ + * @slab: The slab.
+ + *
+ + * Slabs are allocated as folios that contain the individual objects and are
+ + * using some fields in the first struct page of the folio - those fields are
+ + * now accessed by struct slab. It is occasionally necessary to convert back to
+ + * a folio in order to communicate with the rest of the mm.  Please use this
+ + * helper function instead of casting yourself, as the implementation may change
+ + * in the future.
+ + */
+ +#define slab_folio(s)         (_Generic((s),                          \
+ +      const struct slab *:    (const struct folio *)s,                \
+ +      struct slab *:          (struct folio *)s))
+ +
+ +/**
+ + * page_slab - Converts from first struct page to slab.
+ + * @p: The first (either head of compound or single) page of slab.
+ + *
+ + * A temporary wrapper to convert struct page to struct slab in situations where
+ + * we know the page is the compound head, or single order-0 page.
+ + *
+ + * Long-term ideally everything would work with struct slab directly or go
+ + * through folio to struct slab.
+ + *
+ + * Return: The slab which contains this page
+ + */
+ +#define page_slab(p)          (_Generic((p),                          \
+ +      const struct page *:    (const struct slab *)(p),               \
+ +      struct page *:          (struct slab *)(p)))
+ +
+ +/**
+ + * slab_page - The first struct page allocated for a slab
+ + * @slab: The slab.
+ + *
+ + * A convenience wrapper for converting slab to the first struct page of the
+ + * underlying folio, to communicate with code not yet converted to folio or
+ + * struct slab.
+ + */
+ +#define slab_page(s) folio_page(slab_folio(s), 0)
+ +
+ +/*
+ + * If network-based swap is enabled, sl*b must keep track of whether pages
+ + * were allocated from pfmemalloc reserves.
+ + */
+ +static inline bool slab_test_pfmemalloc(const struct slab *slab)
+ +{
+ +      return folio_test_active((struct folio *)slab_folio(slab));
+ +}
+ +
+ +static inline void slab_set_pfmemalloc(struct slab *slab)
+ +{
+ +      folio_set_active(slab_folio(slab));
+ +}
+ +
+ +static inline void slab_clear_pfmemalloc(struct slab *slab)
+ +{
+ +      folio_clear_active(slab_folio(slab));
+ +}
+ +
+ +static inline void __slab_clear_pfmemalloc(struct slab *slab)
+ +{
+ +      __folio_clear_active(slab_folio(slab));
+ +}
+ +
+ +static inline void *slab_address(const struct slab *slab)
+ +{
+ +      return folio_address(slab_folio(slab));
+ +}
+ +
+ +static inline int slab_nid(const struct slab *slab)
+ +{
+ +      return folio_nid(slab_folio(slab));
+ +}
+ +
+ +static inline pg_data_t *slab_pgdat(const struct slab *slab)
+ +{
+ +      return folio_pgdat(slab_folio(slab));
+ +}
+ +
+ +static inline struct slab *virt_to_slab(const void *addr)
+ +{
+ +      struct folio *folio = virt_to_folio(addr);
+ +
+ +      if (!folio_test_slab(folio))
+ +              return NULL;
+ +
+ +      return folio_slab(folio);
+ +}
+ +
+ +static inline int slab_order(const struct slab *slab)
+ +{
+ +      return folio_order((struct folio *)slab_folio(slab));
+ +}
+ +
+ +static inline size_t slab_size(const struct slab *slab)
+ +{
+ +      return PAGE_SIZE << slab_order(slab);
+ +}
+ +
   #ifdef CONFIG_SLOB
   /*
    * Common fields provided in kmem_cache by all slab allocators
@@@ -436,33 -245,15 +436,33 @@@ static inline bool kmem_cache_debug_fla
   }
   
   #ifdef CONFIG_MEMCG_KMEM
- -int memcg_alloc_page_obj_cgroups(struct page *page, struct kmem_cache *s,
- -                               gfp_t gfp, bool new_page);
+ +/*
+ + * slab_objcgs - get the object cgroups vector associated with a slab
+ + * @slab: a pointer to the slab struct
+ + *
+ + * Returns a pointer to the object cgroups vector associated with the slab,
+ + * or NULL if no such vector has been associated yet.
+ + */
+ +static inline struct obj_cgroup **slab_objcgs(struct slab *slab)
+ +{
+ +      unsigned long memcg_data = READ_ONCE(slab->memcg_data);
+ +
+ +      VM_BUG_ON_PAGE(memcg_data && !(memcg_data & MEMCG_DATA_OBJCGS),
+ +                                                      slab_page(slab));
+ +      VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_KMEM, slab_page(slab));
+ +
+ +      return (struct obj_cgroup **)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
+ +}
+ +
+ +int memcg_alloc_slab_cgroups(struct slab *slab, struct kmem_cache *s,
+ +                               gfp_t gfp, bool new_slab);
   void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat,
                      enum node_stat_item idx, int nr);
   
- -static inline void memcg_free_page_obj_cgroups(struct page *page)
+ +static inline void memcg_free_slab_cgroups(struct slab *slab)
   {
- -      kfree(page_objcgs(page));
- -      page->memcg_data = 0;
+ +      kfree(slab_objcgs(slab));
+ +      slab->memcg_data = 0;
   }
   
   static inline size_t obj_full_size(struct kmem_cache *s)
@@@ -507,7 -298,7 +507,7 @@@ static inline void memcg_slab_post_allo
                                               gfp_t flags, size_t size,
                                               void **p)
   {
- -      struct page *page;
+ +      struct slab *slab;
         unsigned long off;
         size_t i;
   
@@@ -516,19 -307,19 +516,19 @@@
   
         for (i = 0; i < size; i++) {
                 if (likely(p[i])) {
- -                      page = virt_to_head_page(p[i]);
+ +                      slab = virt_to_slab(p[i]);
   
- -                      if (!page_objcgs(page) &&
- -                          memcg_alloc_page_obj_cgroups(page, s, flags,
+ +                      if (!slab_objcgs(slab) &&
+ +                          memcg_alloc_slab_cgroups(slab, s, flags,
                                                          false)) {
                                 obj_cgroup_uncharge(objcg, obj_full_size(s));
                                 continue;
                         }
   
- -                      off = obj_to_index(s, page, p[i]);
+ +                      off = obj_to_index(s, slab, p[i]);
                         obj_cgroup_get(objcg);
- -                      page_objcgs(page)[off] = objcg;
- -                      mod_objcg_state(objcg, page_pgdat(page),
+ +                      slab_objcgs(slab)[off] = objcg;
+ +                      mod_objcg_state(objcg, slab_pgdat(slab),
                                         cache_vmstat_idx(s), obj_full_size(s));
                 } else {
                         obj_cgroup_uncharge(objcg, obj_full_size(s));
@@@ -543,7 -334,7 +543,7 @@@ static inline void memcg_slab_free_hook
         struct kmem_cache *s;
         struct obj_cgroup **objcgs;
         struct obj_cgroup *objcg;
- -      struct page *page;
+ +      struct slab *slab;
         unsigned int off;
         int i;
   
@@@ -554,52 -345,43 +554,52 @@@
                 if (unlikely(!p[i]))
                         continue;
   
- -              page = virt_to_head_page(p[i]);
- -              objcgs = page_objcgs_check(page);
+ +              slab = virt_to_slab(p[i]);
+ +              /* we could be given a kmalloc_large() object, skip those */
+ +              if (!slab)
+ +                      continue;
+ +
+ +              objcgs = slab_objcgs(slab);
                 if (!objcgs)
                         continue;
   
                 if (!s_orig)
- -                      s = page->slab_cache;
+ +                      s = slab->slab_cache;
                 else
                         s = s_orig;
   
- -              off = obj_to_index(s, page, p[i]);
+ +              off = obj_to_index(s, slab, p[i]);
                 objcg = objcgs[off];
                 if (!objcg)
                         continue;
   
                 objcgs[off] = NULL;
                 obj_cgroup_uncharge(objcg, obj_full_size(s));
- -              mod_objcg_state(objcg, page_pgdat(page), cache_vmstat_idx(s),
+ +              mod_objcg_state(objcg, slab_pgdat(slab), cache_vmstat_idx(s),
                                 -obj_full_size(s));
                 obj_cgroup_put(objcg);
         }
   }
   
   #else /* CONFIG_MEMCG_KMEM */
+ +static inline struct obj_cgroup **slab_objcgs(struct slab *slab)
+ +{
+ +      return NULL;
+ +}
+ +
   static inline struct mem_cgroup *memcg_from_slab_obj(void *ptr)
   {
         return NULL;
   }
   
- -static inline int memcg_alloc_page_obj_cgroups(struct page *page,
+ +static inline int memcg_alloc_slab_cgroups(struct slab *slab,
                                                struct kmem_cache *s, gfp_t gfp,
- -                                             bool new_page)
+ +                                             bool new_slab)
   {
         return 0;
   }
   
- -static inline void memcg_free_page_obj_cgroups(struct page *page)
+ +static inline void memcg_free_slab_cgroups(struct slab *slab)
   {
   }
   
@@@ -623,35 -405,35 +623,35 @@@ static inline void memcg_slab_free_hook
   }
   #endif /* CONFIG_MEMCG_KMEM */
   
+ +#ifndef CONFIG_SLOB
   static inline struct kmem_cache *virt_to_cache(const void *obj)
   {
- -      struct page *page;
+ +      struct slab *slab;
   
- -      page = virt_to_head_page(obj);
- -      if (WARN_ONCE(!PageSlab(page), "%s: Object is not a Slab page!\n",
+ +      slab = virt_to_slab(obj);
+ +      if (WARN_ONCE(!slab, "%s: Object is not a Slab page!\n",
                                         __func__))
                 return NULL;
- -      return page->slab_cache;
+ +      return slab->slab_cache;
   }
   
- -static __always_inline void account_slab_page(struct page *page, int order,
- -                                            struct kmem_cache *s,
- -                                            gfp_t gfp)
+ +static __always_inline void account_slab(struct slab *slab, int order,
+ +                                       struct kmem_cache *s, gfp_t gfp)
   {
         if (memcg_kmem_enabled() && (s->flags & SLAB_ACCOUNT))
- -              memcg_alloc_page_obj_cgroups(page, s, gfp, true);
+ +              memcg_alloc_slab_cgroups(slab, s, gfp, true);
   
- -      mod_node_page_state(page_pgdat(page), cache_vmstat_idx(s),
+ +      mod_node_page_state(slab_pgdat(slab), cache_vmstat_idx(s),
                             PAGE_SIZE << order);
   }
   
- -static __always_inline void unaccount_slab_page(struct page *page, int order,
- -                                              struct kmem_cache *s)
+ +static __always_inline void unaccount_slab(struct slab *slab, int order,
+ +                                         struct kmem_cache *s)
   {
         if (memcg_kmem_enabled())
- -              memcg_free_page_obj_cgroups(page);
+ +              memcg_free_slab_cgroups(slab);
   
- -      mod_node_page_state(page_pgdat(page), cache_vmstat_idx(s),
+ +      mod_node_page_state(slab_pgdat(slab), cache_vmstat_idx(s),
                             -(PAGE_SIZE << order));
   }
   
@@@ -670,7 -452,6 +670,7 @@@ static inline struct kmem_cache *cache_
                 print_tracking(cachep, x);
         return cachep;
   }
+ +#endif /* CONFIG_SLOB */
   
   static inline size_t slab_ksize(const struct kmem_cache *s)
   {
@@@ -794,11 -575,6 +794,6 @@@ static inline struct kmem_cache_node *g
   
   #endif
   
- void *slab_start(struct seq_file *m, loff_t *pos);
- void *slab_next(struct seq_file *m, void *p, loff_t *pos);
- void slab_stop(struct seq_file *m, void *p);
- int memcg_slab_show(struct seq_file *m, void *p);
- 
   #if defined(CONFIG_SLAB) || defined(CONFIG_SLUB_DEBUG)
   void dump_unreclaimable_slab(void);
   #else
@@@ -854,7 -630,7 +849,7 @@@ static inline void debugfs_slab_release
   #define KS_ADDRS_COUNT 16
   struct kmem_obj_info {
         void *kp_ptr;
- -      struct page *kp_page;
+ +      struct slab *kp_slab;
         void *kp_objp;
         unsigned long kp_data_offset;
         struct kmem_cache *kp_slab_cache;
@@@ -862,18 -638,7 +857,18 @@@
         void *kp_stack[KS_ADDRS_COUNT];
         void *kp_free_stack[KS_ADDRS_COUNT];
   };
- -void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct page *page);
+ +void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab);
+ +#endif
+ +
+ +#ifdef CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR
+ +void __check_heap_object(const void *ptr, unsigned long n,
+ +                       const struct slab *slab, bool to_user);
+ +#else
+ +static inline
+ +void __check_heap_object(const void *ptr, unsigned long n,
+ +                       const struct slab *slab, bool to_user)
+ +{
+ +}
   #endif
   
   #endif /* MM_SLAB_H */
diff --combined mm/slab_common.c

index dc15566141d4447bda26128a9a0158c5128ee081,9513244457e6f4653aa056fca914e36f6be9b7fd..23f2ab0713b7748accfb7c1154f1f93b412be67c
--- 1/mm/slab_common.c
--- 2/mm/slab_common.c
+++ b/mm/slab_common.c
@@@ -489,9 -489,7 +489,7 @@@ void slab_kmem_cache_release(struct kme
   
   void kmem_cache_destroy(struct kmem_cache *s)
   {
-       int err;
- 
-       if (unlikely(!s))
+       if (unlikely(!s) || !kasan_check_byte(s))
                 return;
   
         cpus_read_lock();
@@@ -501,12 -499,9 +499,9 @@@
         if (s->refcount)
                 goto out_unlock;
   
-       err = shutdown_cache(s);
-       if (err) {
-               pr_err("%s %s: Slab cache still has objects\n",
-                      __func__, s->name);
-               dump_stack();
-       }
+       WARN(shutdown_cache(s),
+            "%s %s: Slab cache still has objects when called from %pS",
+            __func__, s->name, (void *)_RET_IP_);
   out_unlock:
         mutex_unlock(&slab_mutex);
         cpus_read_unlock();
@@@ -550,13 -545,13 +545,13 @@@ bool slab_is_available(void
    */
   bool kmem_valid_obj(void *object)
   {
- -      struct page *page;
+ +      struct folio *folio;
   
         /* Some arches consider ZERO_SIZE_PTR to be a valid address. */
         if (object < (void *)PAGE_SIZE || !virt_addr_valid(object))
                 return false;
- -      page = virt_to_head_page(object);
- -      return PageSlab(page);
+ +      folio = virt_to_folio(object);
+ +      return folio_test_slab(folio);
   }
   EXPORT_SYMBOL_GPL(kmem_valid_obj);
   
@@@ -579,18 -574,18 +574,18 @@@ void kmem_dump_obj(void *object
   {
         char *cp = IS_ENABLED(CONFIG_MMU) ? "" : "/vmalloc";
         int i;
- -      struct page *page;
+ +      struct slab *slab;
         unsigned long ptroffset;
         struct kmem_obj_info kp = { };
   
         if (WARN_ON_ONCE(!virt_addr_valid(object)))
                 return;
- -      page = virt_to_head_page(object);
- -      if (WARN_ON_ONCE(!PageSlab(page))) {
+ +      slab = virt_to_slab(object);
+ +      if (WARN_ON_ONCE(!slab)) {
                 pr_cont(" non-slab memory.\n");
                 return;
         }
- -      kmem_obj_info(&kp, object, page);
+ +      kmem_obj_info(&kp, object, slab);
         if (kp.kp_slab_cache)
                 pr_cont(" slab%s %s", cp, kp.kp_slab_cache->name);
         else
@@@ -824,7 -819,7 +819,7 @@@ void __init setup_kmalloc_cache_index_t
   
         if (KMALLOC_MIN_SIZE >= 64) {
                 /*
-                * The 96 byte size cache is not used if the alignment
+                * The 96 byte sized cache is not used if the alignment
                  * is 64 byte.
                  */
                 for (i = 64 + 8; i <= 96; i += 8)
@@@ -849,7 -844,7 +844,7 @@@ new_kmalloc_cache(int idx, enum kmalloc
         if (type == KMALLOC_RECLAIM) {
                 flags |= SLAB_RECLAIM_ACCOUNT;
         } else if (IS_ENABLED(CONFIG_MEMCG_KMEM) && (type == KMALLOC_CGROUP)) {
-               if (cgroup_memory_nokmem) {
+               if (mem_cgroup_kmem_disabled()) {
                         kmalloc_caches[type][idx] = kmalloc_caches[KMALLOC_NORMAL][idx];
                         return;
                 }
@@@ -1044,18 -1039,18 +1039,18 @@@ static void print_slabinfo_header(struc
         seq_putc(m, '\n');
   }
   
- void *slab_start(struct seq_file *m, loff_t *pos)
+ static void *slab_start(struct seq_file *m, loff_t *pos)
   {
         mutex_lock(&slab_mutex);
         return seq_list_start(&slab_caches, *pos);
   }
   
- void *slab_next(struct seq_file *m, void *p, loff_t *pos)
+ static void *slab_next(struct seq_file *m, void *p, loff_t *pos)
   {
         return seq_list_next(p, &slab_caches, pos);
   }
   
- void slab_stop(struct seq_file *m, void *p)
+ static void slab_stop(struct seq_file *m, void *p)
   {
         mutex_unlock(&slab_mutex);
   }
@@@ -1123,17 -1118,6 +1118,6 @@@ void dump_unreclaimable_slab(void
         mutex_unlock(&slab_mutex);
   }
   
- #if defined(CONFIG_MEMCG_KMEM)
- int memcg_slab_show(struct seq_file *m, void *p)
- {
-       /*
-        * Deprecated.
-        * Please, take a look at tools/cgroup/slabinfo.py .
-        */
-       return 0;
- }
- #endif
- 
   /*
    * slabinfo_op - iterator that generates /proc/slabinfo
    *
diff --combined mm/swap.c

index 74f6b311d7eefea87318e5322b10416cc035994b,b461814ce0cb6363250b343806fa2d2b9ab307b8..bcf3ac288b56d560dbfd82ec8c183898e9a3c586
--- 1/mm/swap.c
--- 2/mm/swap.c
+++ b/mm/swap.c
@@@ -882,7 -882,7 +882,7 @@@ void lru_cache_disable(void
          * all online CPUs so any calls of lru_cache_disabled wrapped by
          * local_lock or preemption disabled would be ordered by that.
          * The atomic operation doesn't need to have stronger ordering
-        * requirements because that is enforeced by the scheduling
+        * requirements because that is enforced by the scheduling
          * guarantees.
          */
         __lru_add_drain_all(true);
@@@ -1077,24 -1077,24 +1077,24 @@@ void __pagevec_lru_add(struct pagevec *
   }
   
   /**
- - * pagevec_remove_exceptionals - pagevec exceptionals pruning
- - * @pvec:     The pagevec to prune
+ + * folio_batch_remove_exceptionals() - Prune non-folios from a batch.
+ + * @fbatch: The batch to prune
    *
- - * find_get_entries() fills both pages and XArray value entries (aka
- - * exceptional entries) into the pagevec.  This function prunes all
- - * exceptionals from @pvec without leaving holes, so that it can be
- - * passed on to page-only pagevec operations.
+ + * find_get_entries() fills a batch with both folios and shadow/swap/DAX
+ + * entries.  This function prunes all the non-folio entries from @fbatch
+ + * without leaving holes, so that it can be passed on to folio-only batch
+ + * operations.
    */
- -void pagevec_remove_exceptionals(struct pagevec *pvec)
+ +void folio_batch_remove_exceptionals(struct folio_batch *fbatch)
   {
- -      int i, j;
+ +      unsigned int i, j;
   
- -      for (i = 0, j = 0; i < pagevec_count(pvec); i++) {
- -              struct page *page = pvec->pages[i];
- -              if (!xa_is_value(page))
- -                      pvec->pages[j++] = page;
+ +      for (i = 0, j = 0; i < folio_batch_count(fbatch); i++) {
+ +              struct folio *folio = fbatch->folios[i];
+ +              if (!xa_is_value(folio))
+ +                      fbatch->folios[j++] = folio;
         }
- -      pvec->nr = j;
+ +      fbatch->nr = j;
   }
   
   /**
diff --combined mm/truncate.c

index 5c87cdc70e7bf5c60614d445910042ac39a23cc2,41b8249b3b4aa0a9194f21b6d254f52585f56ead..5e243d7269c0f9d2e70625b70ecca9c4ae4f439c
--- 1/mm/truncate.c
--- 2/mm/truncate.c
+++ b/mm/truncate.c
@@@ -56,11 -56,11 +56,11 @@@ static void clear_shadow_entry(struct a
   
   /*
    * Unconditionally remove exceptional entries. Usually called from truncate
- - * path. Note that the pagevec may be altered by this function by removing
- - * exceptional entries similar to what pagevec_remove_exceptionals does.
+ + * path. Note that the folio_batch may be altered by this function by removing
+ + * exceptional entries similar to what folio_batch_remove_exceptionals() does.
    */
- -static void truncate_exceptional_pvec_entries(struct address_space *mapping,
- -                              struct pagevec *pvec, pgoff_t *indices)
+ +static void truncate_folio_batch_exceptionals(struct address_space *mapping,
+ +                              struct folio_batch *fbatch, pgoff_t *indices)
   {
         int i, j;
         bool dax;
@@@ -69,11 -69,11 +69,11 @@@
         if (shmem_mapping(mapping))
                 return;
   
- -      for (j = 0; j < pagevec_count(pvec); j++)
- -              if (xa_is_value(pvec->pages[j]))
+ +      for (j = 0; j < folio_batch_count(fbatch); j++)
+ +              if (xa_is_value(fbatch->folios[j]))
                         break;
   
- -      if (j == pagevec_count(pvec))
+ +      if (j == folio_batch_count(fbatch))
                 return;
   
         dax = dax_mapping(mapping);
@@@ -82,12 -82,12 +82,12 @@@
                 xa_lock_irq(&mapping->i_pages);
         }
   
- -      for (i = j; i < pagevec_count(pvec); i++) {
- -              struct page *page = pvec->pages[i];
+ +      for (i = j; i < folio_batch_count(fbatch); i++) {
+ +              struct folio *folio = fbatch->folios[i];
                 pgoff_t index = indices[i];
   
- -              if (!xa_is_value(page)) {
- -                      pvec->pages[j++] = page;
+ +              if (!xa_is_value(folio)) {
+ +                      fbatch->folios[j++] = folio;
                         continue;
                 }
   
@@@ -96,7 -96,7 +96,7 @@@
                         continue;
                 }
   
- -              __clear_shadow_entry(mapping, index, page);
+ +              __clear_shadow_entry(mapping, index, folio);
         }
   
         if (!dax) {
@@@ -105,7 -105,7 +105,7 @@@
                         inode_add_lru(mapping->host);
                 spin_unlock(&mapping->host->i_lock);
         }
- -      pvec->nr = j;
+ +      fbatch->nr = j;
   }
   
   /*
@@@ -177,21 -177,21 +177,21 @@@ void do_invalidatepage(struct page *pag
    * its lock, b) when a concurrent invalidate_mapping_pages got there first and
    * c) when tmpfs swizzles a page between a tmpfs inode and swapper_space.
    */
- -static void truncate_cleanup_page(struct page *page)
+ +static void truncate_cleanup_folio(struct folio *folio)
   {
- -      if (page_mapped(page))
- -              unmap_mapping_page(page);
+ +      if (folio_mapped(folio))
+ +              unmap_mapping_folio(folio);
   
- -      if (page_has_private(page))
- -              do_invalidatepage(page, 0, thp_size(page));
+ +      if (folio_has_private(folio))
+ +              do_invalidatepage(&folio->page, 0, folio_size(folio));
   
         /*
          * Some filesystems seem to re-dirty the page even after
          * the VM has canceled the dirty bit (eg ext3 journaling).
          * Hence dirty accounting check is placed after invalidation.
          */
- -      cancel_dirty_page(page);
- -      ClearPageMappedToDisk(page);
+ +      folio_cancel_dirty(folio);
+ +      folio_clear_mappedtodisk(folio);
   }
   
   /*
@@@ -205,7 -205,6 +205,6 @@@
   static int
   invalidate_complete_page(struct address_space *mapping, struct page *page)
   {
-       int ret;
   
         if (page->mapping != mapping)
                 return 0;
@@@ -213,80 -212,26 +212,78 @@@
         if (page_has_private(page) && !try_to_release_page(page, 0))
                 return 0;
   
-       ret = remove_mapping(mapping, page);
- 
-       return ret;
+       return remove_mapping(mapping, page);
   }
   
- -int truncate_inode_page(struct address_space *mapping, struct page *page)
+ +int truncate_inode_folio(struct address_space *mapping, struct folio *folio)
   {
- -      VM_BUG_ON_PAGE(PageTail(page), page);
- -
- -      if (page->mapping != mapping)
+ +      if (folio->mapping != mapping)
                 return -EIO;
   
- -      truncate_cleanup_page(page);
- -      delete_from_page_cache(page);
+ +      truncate_cleanup_folio(folio);
+ +      filemap_remove_folio(folio);
         return 0;
   }
   
+ +/*
+ + * Handle partial folios.  The folio may be entirely within the
+ + * range if a split has raced with us.  If not, we zero the part of the
+ + * folio that's within the [start, end] range, and then split the folio if
+ + * it's large.  split_page_range() will discard pages which now lie beyond
+ + * i_size, and we rely on the caller to discard pages which lie within a
+ + * newly created hole.
+ + *
+ + * Returns false if splitting failed so the caller can avoid
+ + * discarding the entire folio which is stubbornly unsplit.
+ + */
+ +bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end)
+ +{
+ +      loff_t pos = folio_pos(folio);
+ +      unsigned int offset, length;
+ +
+ +      if (pos < start)
+ +              offset = start - pos;
+ +      else
+ +              offset = 0;
+ +      length = folio_size(folio);
+ +      if (pos + length <= (u64)end)
+ +              length = length - offset;
+ +      else
+ +              length = end + 1 - pos - offset;
+ +
+ +      folio_wait_writeback(folio);
+ +      if (length == folio_size(folio)) {
+ +              truncate_inode_folio(folio->mapping, folio);
+ +              return true;
+ +      }
+ +
+ +      /*
+ +       * We may be zeroing pages we're about to discard, but it avoids
+ +       * doing a complex calculation here, and then doing the zeroing
+ +       * anyway if the page split fails.
+ +       */
+ +      folio_zero_range(folio, offset, length);
+ +
+ +      cleancache_invalidate_page(folio->mapping, &folio->page);
+ +      if (folio_has_private(folio))
+ +              do_invalidatepage(&folio->page, offset, length);
+ +      if (!folio_test_large(folio))
+ +              return true;
+ +      if (split_huge_page(&folio->page) == 0)
+ +              return true;
+ +      if (folio_test_dirty(folio))
+ +              return false;
+ +      truncate_inode_folio(folio->mapping, folio);
+ +      return true;
+ +}
+ +
   /*
    * Used to get rid of pages on hardware memory corruption.
    */
   int generic_error_remove_page(struct address_space *mapping, struct page *page)
   {
+ +      VM_BUG_ON_PAGE(PageTail(page), page);
+ +
         if (!mapping)
                 return -EINVAL;
         /*
@@@ -295,7 -240,7 +292,7 @@@
          */
         if (!S_ISREG(mapping->host->i_mode))
                 return -EIO;
- -      return truncate_inode_page(mapping, page);
+ +      return truncate_inode_folio(mapping, page_folio(page));
   }
   EXPORT_SYMBOL(generic_error_remove_page);
   
@@@ -346,16 -291,20 +343,16 @@@ void truncate_inode_pages_range(struct 
   {
         pgoff_t         start;          /* inclusive */
         pgoff_t         end;            /* exclusive */
- -      unsigned int    partial_start;  /* inclusive */
- -      unsigned int    partial_end;    /* exclusive */
- -      struct pagevec  pvec;
+ +      struct folio_batch fbatch;
         pgoff_t         indices[PAGEVEC_SIZE];
         pgoff_t         index;
         int             i;
+ +      struct folio    *folio;
+ +      bool            same_folio;
   
         if (mapping_empty(mapping))
                 goto out;
   
- -      /* Offsets within partial pages */
- -      partial_start = lstart & (PAGE_SIZE - 1);
- -      partial_end = (lend + 1) & (PAGE_SIZE - 1);
- -
         /*
          * 'start' and 'end' always covers the range of pages to be fully
          * truncated. Partial pages are covered with 'partial_start' at the
@@@ -373,49 -322,64 +370,49 @@@
         else
                 end = (lend + 1) >> PAGE_SHIFT;
   
- -      pagevec_init(&pvec);
+ +      folio_batch_init(&fbatch);
         index = start;
         while (index < end && find_lock_entries(mapping, index, end - 1,
- -                      &pvec, indices)) {
- -              index = indices[pagevec_count(&pvec) - 1] + 1;
- -              truncate_exceptional_pvec_entries(mapping, &pvec, indices);
- -              for (i = 0; i < pagevec_count(&pvec); i++)
- -                      truncate_cleanup_page(pvec.pages[i]);
- -              delete_from_page_cache_batch(mapping, &pvec);
- -              for (i = 0; i < pagevec_count(&pvec); i++)
- -                      unlock_page(pvec.pages[i]);
- -              pagevec_release(&pvec);
+ +                      &fbatch, indices)) {
+ +              index = indices[folio_batch_count(&fbatch) - 1] + 1;
+ +              truncate_folio_batch_exceptionals(mapping, &fbatch, indices);
+ +              for (i = 0; i < folio_batch_count(&fbatch); i++)
+ +                      truncate_cleanup_folio(fbatch.folios[i]);
+ +              delete_from_page_cache_batch(mapping, &fbatch);
+ +              for (i = 0; i < folio_batch_count(&fbatch); i++)
+ +                      folio_unlock(fbatch.folios[i]);
+ +              folio_batch_release(&fbatch);
                 cond_resched();
         }
   
- -      if (partial_start) {
- -              struct page *page = find_lock_page(mapping, start - 1);
- -              if (page) {
- -                      unsigned int top = PAGE_SIZE;
- -                      if (start > end) {
- -                              /* Truncation within a single page */
- -                              top = partial_end;
- -                              partial_end = 0;
- -                      }
- -                      wait_on_page_writeback(page);
- -                      zero_user_segment(page, partial_start, top);
- -                      cleancache_invalidate_page(mapping, page);
- -                      if (page_has_private(page))
- -                              do_invalidatepage(page, partial_start,
- -                                                top - partial_start);
- -                      unlock_page(page);
- -                      put_page(page);
+ +      same_folio = (lstart >> PAGE_SHIFT) == (lend >> PAGE_SHIFT);
+ +      folio = __filemap_get_folio(mapping, lstart >> PAGE_SHIFT, FGP_LOCK, 0);
+ +      if (folio) {
+ +              same_folio = lend < folio_pos(folio) + folio_size(folio);
+ +              if (!truncate_inode_partial_folio(folio, lstart, lend)) {
+ +                      start = folio->index + folio_nr_pages(folio);
+ +                      if (same_folio)
+ +                              end = folio->index;
                 }
+ +              folio_unlock(folio);
+ +              folio_put(folio);
+ +              folio = NULL;
         }
- -      if (partial_end) {
- -              struct page *page = find_lock_page(mapping, end);
- -              if (page) {
- -                      wait_on_page_writeback(page);
- -                      zero_user_segment(page, 0, partial_end);
- -                      cleancache_invalidate_page(mapping, page);
- -                      if (page_has_private(page))
- -                              do_invalidatepage(page, 0,
- -                                                partial_end);
- -                      unlock_page(page);
- -                      put_page(page);
- -              }
+ +
+ +      if (!same_folio)
+ +              folio = __filemap_get_folio(mapping, lend >> PAGE_SHIFT,
+ +                                              FGP_LOCK, 0);
+ +      if (folio) {
+ +              if (!truncate_inode_partial_folio(folio, lstart, lend))
+ +                      end = folio->index;
+ +              folio_unlock(folio);
+ +              folio_put(folio);
         }
- -      /*
- -       * If the truncation happened within a single page no pages
- -       * will be released, just zeroed, so we can bail out now.
- -       */
- -      if (start >= end)
- -              goto out;
   
         index = start;
- -      for ( ; ; ) {
+ +      while (index < end) {
                 cond_resched();
- -              if (!find_get_entries(mapping, index, end - 1, &pvec,
+ +              if (!find_get_entries(mapping, index, end - 1, &fbatch,
                                 indices)) {
                         /* If all gone from start onwards, we're done */
                         if (index == start)
@@@ -425,24 -389,23 +422,24 @@@
                         continue;
                 }
   
- -              for (i = 0; i < pagevec_count(&pvec); i++) {
- -                      struct page *page = pvec.pages[i];
+ +              for (i = 0; i < folio_batch_count(&fbatch); i++) {
+ +                      struct folio *folio = fbatch.folios[i];
   
                         /* We rely upon deletion not changing page->index */
                         index = indices[i];
   
- -                      if (xa_is_value(page))
+ +                      if (xa_is_value(folio))
                                 continue;
   
- -                      lock_page(page);
- -                      WARN_ON(page_to_index(page) != index);
- -                      wait_on_page_writeback(page);
- -                      truncate_inode_page(mapping, page);
- -                      unlock_page(page);
+ +                      folio_lock(folio);
+ +                      VM_BUG_ON_FOLIO(!folio_contains(folio, index), folio);
+ +                      folio_wait_writeback(folio);
+ +                      truncate_inode_folio(mapping, folio);
+ +                      folio_unlock(folio);
+ +                      index = folio_index(folio) + folio_nr_pages(folio) - 1;
                 }
- -              truncate_exceptional_pvec_entries(mapping, &pvec, indices);
- -              pagevec_release(&pvec);
+ +              truncate_folio_batch_exceptionals(mapping, &fbatch, indices);
+ +              folio_batch_release(&fbatch);
                 index++;
         }
   
@@@ -513,16 -476,16 +510,16 @@@ static unsigned long __invalidate_mappi
                 pgoff_t start, pgoff_t end, unsigned long *nr_pagevec)
   {
         pgoff_t indices[PAGEVEC_SIZE];
- -      struct pagevec pvec;
+ +      struct folio_batch fbatch;
         pgoff_t index = start;
         unsigned long ret;
         unsigned long count = 0;
         int i;
   
- -      pagevec_init(&pvec);
- -      while (find_lock_entries(mapping, index, end, &pvec, indices)) {
- -              for (i = 0; i < pagevec_count(&pvec); i++) {
- -                      struct page *page = pvec.pages[i];
+ +      folio_batch_init(&fbatch);
+ +      while (find_lock_entries(mapping, index, end, &fbatch, indices)) {
+ +              for (i = 0; i < folio_batch_count(&fbatch); i++) {
+ +                      struct page *page = &fbatch.folios[i]->page;
   
                         /* We rely upon deletion not changing page->index */
                         index = indices[i];
@@@ -549,8 -512,8 +546,8 @@@
                         }
                         count += ret;
                 }
- -              pagevec_remove_exceptionals(&pvec);
- -              pagevec_release(&pvec);
+ +              folio_batch_remove_exceptionals(&fbatch);
+ +              folio_batch_release(&fbatch);
                 cond_resched();
                 index++;
         }
@@@ -602,29 -565,31 +599,29 @@@ void invalidate_mapping_pagevec(struct 
    * shrink_page_list() has a temp ref on them, or because they're transiently
    * sitting in the lru_cache_add() pagevecs.
    */
- -static int
- -invalidate_complete_page2(struct address_space *mapping, struct page *page)
+ +static int invalidate_complete_folio2(struct address_space *mapping,
+ +                                      struct folio *folio)
   {
- -      if (page->mapping != mapping)
+ +      if (folio->mapping != mapping)
                 return 0;
   
- -      if (page_has_private(page) && !try_to_release_page(page, GFP_KERNEL))
+ +      if (folio_has_private(folio) &&
+ +          !filemap_release_folio(folio, GFP_KERNEL))
                 return 0;
   
         spin_lock(&mapping->host->i_lock);
         xa_lock_irq(&mapping->i_pages);
- -      if (PageDirty(page))
+ +      if (folio_test_dirty(folio))
                 goto failed;
   
- -      BUG_ON(page_has_private(page));
- -      __delete_from_page_cache(page, NULL);
+ +      BUG_ON(folio_has_private(folio));
+ +      __filemap_remove_folio(folio, NULL);
         xa_unlock_irq(&mapping->i_pages);
         if (mapping_shrinkable(mapping))
                 inode_add_lru(mapping->host);
         spin_unlock(&mapping->host->i_lock);
   
- -      if (mapping->a_ops->freepage)
- -              mapping->a_ops->freepage(page);
- -
- -      put_page(page); /* pagecache ref */
+ +      filemap_free_folio(mapping, folio);
         return 1;
   failed:
         xa_unlock_irq(&mapping->i_pages);
@@@ -632,13 -597,13 +629,13 @@@
         return 0;
   }
   
- -static int do_launder_page(struct address_space *mapping, struct page *page)
+ +static int do_launder_folio(struct address_space *mapping, struct folio *folio)
   {
- -      if (!PageDirty(page))
+ +      if (!folio_test_dirty(folio))
                 return 0;
- -      if (page->mapping != mapping || mapping->a_ops->launder_page == NULL)
+ +      if (folio->mapping != mapping || mapping->a_ops->launder_page == NULL)
                 return 0;
- -      return mapping->a_ops->launder_page(page);
+ +      return mapping->a_ops->launder_page(&folio->page);
   }
   
   /**
@@@ -656,7 -621,7 +653,7 @@@ int invalidate_inode_pages2_range(struc
                                   pgoff_t start, pgoff_t end)
   {
         pgoff_t indices[PAGEVEC_SIZE];
- -      struct pagevec pvec;
+ +      struct folio_batch fbatch;
         pgoff_t index;
         int i;
         int ret = 0;
@@@ -666,25 -631,25 +663,25 @@@
         if (mapping_empty(mapping))
                 goto out;
   
- -      pagevec_init(&pvec);
+ +      folio_batch_init(&fbatch);
         index = start;
- -      while (find_get_entries(mapping, index, end, &pvec, indices)) {
- -              for (i = 0; i < pagevec_count(&pvec); i++) {
- -                      struct page *page = pvec.pages[i];
+ +      while (find_get_entries(mapping, index, end, &fbatch, indices)) {
+ +              for (i = 0; i < folio_batch_count(&fbatch); i++) {
+ +                      struct folio *folio = fbatch.folios[i];
   
- -                      /* We rely upon deletion not changing page->index */
+ +                      /* We rely upon deletion not changing folio->index */
                         index = indices[i];
   
- -                      if (xa_is_value(page)) {
+ +                      if (xa_is_value(folio)) {
                                 if (!invalidate_exceptional_entry2(mapping,
- -                                                                 index, page))
+ +                                              index, folio))
                                         ret = -EBUSY;
                                 continue;
                         }
   
- -                      if (!did_range_unmap && page_mapped(page)) {
+ +                      if (!did_range_unmap && folio_mapped(folio)) {
                                 /*
- -                               * If page is mapped, before taking its lock,
+ +                               * If folio is mapped, before taking its lock,
                                  * zap the rest of the file in one hit.
                                  */
                                 unmap_mapping_pages(mapping, index,
@@@ -692,29 -657,29 +689,29 @@@
                                 did_range_unmap = 1;
                         }
   
- -                      lock_page(page);
- -                      WARN_ON(page_to_index(page) != index);
- -                      if (page->mapping != mapping) {
- -                              unlock_page(page);
+ +                      folio_lock(folio);
+ +                      VM_BUG_ON_FOLIO(!folio_contains(folio, index), folio);
+ +                      if (folio->mapping != mapping) {
+ +                              folio_unlock(folio);
                                 continue;
                         }
- -                      wait_on_page_writeback(page);
+ +                      folio_wait_writeback(folio);
   
- -                      if (page_mapped(page))
- -                              unmap_mapping_page(page);
- -                      BUG_ON(page_mapped(page));
+ +                      if (folio_mapped(folio))
+ +                              unmap_mapping_folio(folio);
+ +                      BUG_ON(folio_mapped(folio));
   
- -                      ret2 = do_launder_page(mapping, page);
+ +                      ret2 = do_launder_folio(mapping, folio);
                         if (ret2 == 0) {
- -                              if (!invalidate_complete_page2(mapping, page))
+ +                              if (!invalidate_complete_folio2(mapping, folio))
                                         ret2 = -EBUSY;
                         }
                         if (ret2 < 0)
                                 ret = ret2;
- -                      unlock_page(page);
+ +                      folio_unlock(folio);
                 }
- -              pagevec_remove_exceptionals(&pvec);
- -              pagevec_release(&pvec);
+ +              folio_batch_remove_exceptionals(&fbatch);
+ +              folio_batch_release(&fbatch);
                 cond_resched();
                 index++;
         }
author	Linus Torvalds <[email protected]>
	Sat, 15 Jan 2022 18:37:06 +0000 (20:37 +0200)
committer	Linus Torvalds <[email protected]>
	Sat, 15 Jan 2022 18:37:06 +0000 (20:37 +0200)
		1	2
MAINTAINERS	patch \|	diff1 \|	diff2 \|	blob \| history
arch/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history
arch/arm/mm/fault.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/arm64/mm/fault.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/parisc/mm/fault.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/mm/fault.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/s390/mm/fault.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/um/kernel/trap.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/include/asm/pgtable.h	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/block/zram/zram_drv.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/dax/bus.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/dax/bus.h	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/dax/device.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/of/fdt.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext4/extents.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/xfs/xfs_buf.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/fs.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/kasan.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/memcontrol.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/memremap.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/mm.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/mm_types.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/page-flags.h	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/fork.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/rcu/rcutorture.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/sysctl.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/Makefile	patch \|	diff1 \|	diff2 \|	blob \| history
mm/huge_memory.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/internal.h	patch \|	diff1 \|	diff2 \|	blob \| history
mm/kasan/quarantine.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/khugepaged.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/memcontrol.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/memory-failure.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/memory.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/memremap.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/migrate.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/shmem.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/slab.h	patch \|	diff1 \|	diff2 \|	blob \| history
mm/slab_common.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/swap.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/truncate.c	patch \|	diff1 \|	diff2 \|	blob \| history