Merge branch 'xarray' of git://git.infradead.org/users/willy/linux-dax

author Linus Torvalds <[email protected]>

Sun, 28 Oct 2018 18:35:40 +0000 (11:35 -0700)

committer Linus Torvalds <[email protected]>

Sun, 28 Oct 2018 18:35:40 +0000 (11:35 -0700)
author Linus Torvalds <[email protected]>
Sun, 28 Oct 2018 18:35:40 +0000 (11:35 -0700)
committer Linus Torvalds <[email protected]>
Sun, 28 Oct 2018 18:35:40 +0000 (11:35 -0700)
diff --combined Documentation/core-api/index.rst

index 29c790f571a5e578379fec2151b3761eb6e12dd9,df3105aa75f4fd5ef7b9d4ffc504a15afaa71822..3adee82be311e61e231857ae788bce06b5b87ef8
--- 1/Documentation/core-api/index.rst
--- 2/Documentation/core-api/index.rst
+++ b/Documentation/core-api/index.rst
@@@ -21,19 -21,17 +21,20 @@@ Core utilitie
      local_ops
      workqueue
      genericirq
+    xarray
      flexible-arrays
      librs
      genalloc
      errseq
      printk-formats
      circular-buffers
+ +   memory-allocation
      mm-api
      gfp_mask-from-fs-io
      timekeeping
      boot-time-mm
+ +   memory-hotplug
+ +
   
   Interfaces for kernel debugging
   ===============================
diff --combined MAINTAINERS

index bfc9722a493222d45dac67c1b2c7515505cb51ae,e54349ea090bc96566a3e54dc4421f7bb4cc4f51..a78d457558814d4efbb80eb160a38e984ca59abf
--- 1/MAINTAINERS
--- 2/MAINTAINERS
+++ b/MAINTAINERS
@@@ -324,6 -324,7 +324,6 @@@ F: Documentation/ABI/testing/sysfs-bus-
   F:    Documentation/ABI/testing/configfs-acpi
   F:    drivers/pci/*acpi*
   F:    drivers/pci/*/*acpi*
- -F:    drivers/pci/*/*/*acpi*
   F:    tools/power/acpi/
   
   ACPI APEI
@@@ -535,7 -536,7 +535,7 @@@ F: Documentation/hwmon/adt747
   F:    drivers/hwmon/adt7475.c
   
   ADVANSYS SCSI DRIVER
- M:    Matthew Wilcox <[email protected]>
+ M:    Matthew Wilcox <[email protected]>
   M:    Hannes Reinecke <[email protected]>
   L:    [email protected]
   S:    Maintained
@@@ -839,7 -840,7 +839,7 @@@ ANALOG DEVICES INC ADGS1408 DRIVE
   M:    Mircea Caprioru <[email protected]>
   S:    Supported
   F:    drivers/mux/adgs1408.c
- -F:    Documentation/devicetree/bindings/mux/adgs1408.txt
+ +F:    Documentation/devicetree/bindings/mux/adi,adgs1408.txt
   
   ANALOG DEVICES INC ADP5061 DRIVER
   M:    Stefan Popa <[email protected]>
@@@ -932,7 -933,6 +932,7 @@@ M: Greg Kroah-Hartman <gregkh@linuxfoun
   M:    Arve Hjønnevåg <[email protected]>
   M:    Todd Kjos <[email protected]>
   M:    Martijn Coenen <[email protected]>
+ +M:    Joel Fernandes <[email protected]>
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/staging.git
   L:    [email protected]
   S:    Supported
@@@ -1181,7 -1181,7 +1181,7 @@@ N:      ow
   F:    arch/arm/mach-actions/
   F:    arch/arm/boot/dts/owl-*
   F:    arch/arm64/boot/dts/actions/
- -F:    drivers/clocksource/owl-*
+ +F:    drivers/clocksource/timer-owl*
   F:    drivers/pinctrl/actions/*
   F:    drivers/soc/actions/
   F:    include/dt-bindings/power/owl-*
@@@ -1251,7 -1251,7 +1251,7 @@@ N:      meso
   
   ARM/Annapurna Labs ALPINE ARCHITECTURE
   M:    Tsahee Zidenberg <[email protected]>
- -M:    Antoine Tenart <antoine.tenart@free-electrons.com>
+ +M:    Antoine Tenart <antoine.tenart@bootlin.com>
   L:    [email protected] (moderated for non-subscribers)
   S:    Maintained
   F:    arch/arm/mach-alpine/
@@@ -1604,7 -1604,7 +1604,7 @@@ L:      [email protected]
   S:    Maintained
   F:    arch/arm/boot/dts/lpc43*
   F:    drivers/clk/nxp/clk-lpc18xx*
- -F:    drivers/clocksource/time-lpc32xx.c
+ +F:    drivers/clocksource/timer-lpc32xx.c
   F:    drivers/i2c/busses/i2c-lpc2k.c
   F:    drivers/memory/pl172.c
   F:    drivers/mtd/spi-nor/nxp-spifi.c
@@@ -2196,7 -2196,6 +2196,7 @@@ F:      drivers/clk/uniphier
   F:    drivers/gpio/gpio-uniphier.c
   F:    drivers/i2c/busses/i2c-uniphier*
   F:    drivers/irqchip/irq-uniphier-aidet.c
+ +F:    drivers/mmc/host/uniphier-sd.c
   F:    drivers/pinctrl/uniphier/
   F:    drivers/reset/reset-uniphier.c
   F:    drivers/tty/serial/8250/8250_uniphier.c
@@@ -2221,7 -2220,7 +2221,7 @@@ F:      arch/arm/mach-vexpress
   F:    */*/vexpress*
   F:    */*/*/vexpress*
   F:    drivers/clk/versatile/clk-vexpress-osc.c
- -F:    drivers/clocksource/versatile.c
+ +F:    drivers/clocksource/timer-versatile.c
   N:    mps2
   
   ARM/VFP SUPPORT
@@@ -2243,7 -2242,7 +2243,7 @@@ M:      Tony Prisk <[email protected]
   L:    [email protected] (moderated for non-subscribers)
   S:    Maintained
   F:    arch/arm/mach-vt8500/
- -F:    drivers/clocksource/vt8500_timer.c
+ +F:    drivers/clocksource/timer-vt8500.c
   F:    drivers/i2c/busses/i2c-wmt.c
   F:    drivers/mmc/host/wmt-sdmmc.c
   F:    drivers/pwm/pwm-vt8500.c
@@@ -2308,7 -2307,7 +2308,7 @@@ F:      drivers/cpuidle/cpuidle-zynq.
   F:    drivers/block/xsysace.c
   N:    zynq
   N:    xilinx
- -F:    drivers/clocksource/cadence_ttc_timer.c
+ +F:    drivers/clocksource/timer-cadence-ttc.c
   F:    drivers/i2c/busses/i2c-cadence.c
   F:    drivers/mmc/host/sdhci-of-arasan.c
   F:    drivers/edac/synopsys_edac.c
@@@ -2957,6 -2956,7 +2957,6 @@@ F:      include/linux/bcm963xx_tag.
   
   BROADCOM BNX2 GIGABIT ETHERNET DRIVER
   M:    Rasesh Mody <[email protected]>
- -M:    Harish Patil <[email protected]>
   M:    [email protected]
   L:    [email protected]
   S:    Supported
@@@ -2977,7 -2977,6 +2977,7 @@@ F:      drivers/scsi/bnx2i
   
   BROADCOM BNX2X 10 GIGABIT ETHERNET DRIVER
   M:    Ariel Elior <[email protected]>
+ +M:    Sudarsana Kalluru <[email protected]>
   M:    [email protected]
   L:    [email protected]
   S:    Supported
@@@ -3008,14 -3007,6 +3008,14 @@@ S:    Supporte
   F:    drivers/gpio/gpio-brcmstb.c
   F:    Documentation/devicetree/bindings/gpio/brcm,brcmstb-gpio.txt
   
+ +BROADCOM BRCMSTB I2C DRIVER
+ +M:    Kamal Dasu <[email protected]>
+ +L:    [email protected]
+ +L:    [email protected]
+ +S:    Supported
+ +F:    drivers/i2c/busses/i2c-brcmstb.c
+ +F:    Documentation/devicetree/bindings/i2c/i2c-brcmstb.txt
+ +
   BROADCOM BRCMSTB USB2 and USB3 PHY DRIVER
   M:    Al Cooper <[email protected]>
   L:    [email protected]
@@@ -3123,15 -3114,6 +3123,15 @@@ S:    Maintaine
   F:    Documentation/devicetree/bindings/memory-controllers/brcm,dpfe-cpu.txt
   F:    drivers/memory/brcmstb_dpfe.c
   
+ +BROADCOM SPI DRIVER
+ +M:    Kamal Dasu <[email protected]>
+ +M:    [email protected]
+ +S:    Maintained
+ +F:    Documentation/devicetree/bindings/spi/brcm,spi-bcm-qspi.txt
+ +F:    drivers/spi/spi-bcm-qspi.*
+ +F:    drivers/spi/spi-brcmstb-qspi.c
+ +F:    drivers/spi/spi-iproc-qspi.c
+ +
   BROADCOM SYSTEMPORT ETHERNET DRIVER
   M:    Florian Fainelli <[email protected]>
   L:    [email protected]
@@@ -3692,12 -3674,6 +3692,12 @@@ S:    Maintaine
   F:    Documentation/devicetree/bindings/media/coda.txt
   F:    drivers/media/platform/coda/
   
+ +CODE OF CONDUCT
+ +M:    Greg Kroah-Hartman <[email protected]>
+ +S:    Supported
+ +F:    Documentation/process/code-of-conduct.rst
+ +F:    Documentation/process/code-of-conduct-interpretation.rst
+ +
   COMMON CLK FRAMEWORK
   M:    Michael Turquette <[email protected]>
   M:    Stephen Boyd <[email protected]>
@@@ -4056,7 -4032,7 +4056,7 @@@ M:      Uma Krishnan <[email protected]
   L:    [email protected]
   S:    Supported
   F:    drivers/scsi/cxlflash/
- -F:    include/uapi/scsi/cxlflash_ioctls.h
+ +F:    include/uapi/scsi/cxlflash_ioctl.h
   F:    Documentation/powerpc/cxlflash.txt
   
   CYBERPRO FB DRIVER
@@@ -4099,7 -4075,7 +4099,7 @@@ D-LINK DIR-685 TOUCHKEYS DRIVE
   M:    Linus Walleij <[email protected]>
   L:    [email protected]
   S:    Supported
- -F:    drivers/input/dlink-dir685-touchkeys.c
+ +F:    drivers/input/keyboard/dlink-dir685-touchkeys.c
   
   DALLAS/MAXIM DS1685-FAMILY REAL TIME CLOCK
   M:    Joshua Kinard <[email protected]>
@@@ -4195,11 -4171,6 +4195,11 @@@ S:    Maintaine
   F:    drivers/platform/x86/dell-smbios-wmi.c
   F:    tools/wmi/dell-smbios-example.c
   
+ +DEFZA FDDI NETWORK DRIVER
+ +M:    "Maciej W. Rozycki" <[email protected]>
+ +S:    Maintained
+ +F:    drivers/net/fddi/defza.*
+ +
   DELL LAPTOP DRIVER
   M:    Matthew Garrett <[email protected]>
   M:    Pali Rohár <[email protected]>
@@@ -4393,7 -4364,7 +4393,7 @@@ S:      Maintaine
   F:    drivers/i2c/busses/i2c-diolan-u2c.c
   
   FILESYSTEM DIRECT ACCESS (DAX)
- M:    Matthew Wilcox <[email protected]>
+ M:    Matthew Wilcox <[email protected]>
   M:    Ross Zwisler <[email protected]>
   M:    Jan Kara <[email protected]>
   L:    [email protected]
@@@ -4515,12 -4486,11 +4515,12 @@@ S:   Maintaine
   F:    Documentation/
   F:    scripts/kernel-doc
   X:    Documentation/ABI/
+ +X:    Documentation/acpi/
   X:    Documentation/devicetree/
- -X:    Documentation/acpi
- -X:    Documentation/power
- -X:    Documentation/spi
- -X:    Documentation/media
+ +X:    Documentation/i2c/
+ +X:    Documentation/media/
+ +X:    Documentation/power/
+ +X:    Documentation/spi/
   T:    git git://git.lwn.net/linux.git docs-next
   
   DOCUMENTATION/ITALIAN
@@@ -4558,13 -4528,9 +4558,13 @@@ F:    drivers/soc/fsl/dpi
   
   DPAA2 ETHERNET DRIVER
   M:    Ioana Radulescu <[email protected]>
- -L:    linux-kernel@vger.kernel.org
+ +L:    netdev@vger.kernel.org
   S:    Maintained
- -F:    drivers/staging/fsl-dpaa2/ethernet
+ +F:    drivers/net/ethernet/freescale/dpaa2/dpaa2-eth*
+ +F:    drivers/net/ethernet/freescale/dpaa2/dpni*
+ +F:    drivers/net/ethernet/freescale/dpaa2/dpkg.h
+ +F:    drivers/net/ethernet/freescale/dpaa2/Makefile
+ +F:    drivers/net/ethernet/freescale/dpaa2/Kconfig
   
   DPAA2 ETHERNET SWITCH DRIVER
   M:    Ioana Radulescu <[email protected]>
@@@ -4575,10 -4541,9 +4575,10 @@@ F:    drivers/staging/fsl-dpaa2/eths
   
   DPAA2 PTP CLOCK DRIVER
   M:    Yangbo Lu <[email protected]>
- -L:    linux-kernel@vger.kernel.org
+ +L:    netdev@vger.kernel.org
   S:    Maintained
- -F:    drivers/staging/fsl-dpaa2/rtc
+ +F:    drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp*
+ +F:    drivers/net/ethernet/freescale/dpaa2/dprtc*
   
   DPT_I2O SCSI RAID DRIVER
   M:    Adaptec OEM Raid Solutions <[email protected]>
@@@ -5365,8 -5330,7 +5365,8 @@@ S:      Maintaine
   F:    drivers/edac/r82600_edac.c
   
   EDAC-SBRIDGE
- -M:    Mauro Carvalho Chehab <[email protected]>
+ +M:    Tony Luck <[email protected]>
+ +R:    Qiuxu Zhuo <[email protected]>
   L:    [email protected]
   S:    Maintained
   F:    drivers/edac/sb_edac.c
@@@ -5506,8 -5470,7 +5506,8 @@@ S:      Odd Fixe
   F:    drivers/net/ethernet/agere/
   
   ETHERNET BRIDGE
- -M:    Stephen Hemminger <[email protected]>
+ +M:    Roopa Prabhu <[email protected]>
+ +M:    Nikolay Aleksandrov <[email protected]>
   L:    [email protected] (moderated for non-subscribers)
   L:    [email protected]
   W:    http://www.linuxfoundation.org/en/Net:Bridge
@@@ -5551,7 -5514,7 +5551,7 @@@ W:      http://ext4.wiki.kernel.or
   Q:    http://patchwork.ozlabs.org/project/linux-ext4/list/
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4.git
   S:    Maintained
- -F:    Documentation/filesystems/ext4.txt
+ +F:    Documentation/filesystems/ext4/ext4.rst
   F:    fs/ext4/
   
   Extended Verification Module (EVM)
@@@ -6491,7 -6454,6 +6491,7 @@@ F:      Documentation/devicetree/bindings/hw
   F:    Documentation/hwmon/
   F:    drivers/hwmon/
   F:    include/linux/hwmon*.h
+ +F:    include/trace/events/hwmon*.h
   
   HARDWARE RANDOM NUMBER GENERATOR CORE
   M:    Matt Mackall <[email protected]>
@@@ -6800,12 -6762,6 +6800,12 @@@ S:    Maintaine
   F:    mm/memory-failure.c
   F:    mm/hwpoison-inject.c
   
+ +HYGON PROCESSOR SUPPORT
+ +M:    Pu Wen <[email protected]>
+ +L:    [email protected]
+ +S:    Maintained
+ +F:    arch/x86/kernel/cpu/hygon.c
+ +
   Hyper-V CORE AND DRIVERS
   M:    "K. Y. Srinivasan" <[email protected]>
   M:    Haiyang Zhang <[email protected]>
@@@ -7385,16 -7341,15 +7385,16 @@@ T:   git git://git.kernel.org/pub/scm/lin
   S:    Supported
   F:    Documentation/networking/e100.rst
   F:    Documentation/networking/e1000.rst
- -F:    Documentation/networking/e1000e.txt
- -F:    Documentation/networking/igb.txt
- -F:    Documentation/networking/igbvf.txt
- -F:    Documentation/networking/ixgb.txt
- -F:    Documentation/networking/ixgbe.txt
- -F:    Documentation/networking/ixgbevf.txt
- -F:    Documentation/networking/i40e.txt
- -F:    Documentation/networking/i40evf.txt
- -F:    Documentation/networking/ice.txt
+ +F:    Documentation/networking/e1000e.rst
+ +F:    Documentation/networking/fm10k.rst
+ +F:    Documentation/networking/igb.rst
+ +F:    Documentation/networking/igbvf.rst
+ +F:    Documentation/networking/ixgb.rst
+ +F:    Documentation/networking/ixgbe.rst
+ +F:    Documentation/networking/ixgbevf.rst
+ +F:    Documentation/networking/i40e.rst
+ +F:    Documentation/networking/iavf.rst
+ +F:    Documentation/networking/ice.rst
   F:    drivers/net/ethernet/intel/
   F:    drivers/net/ethernet/intel/*/
   F:    include/linux/avf/virtchnl.h
@@@ -7416,12 -7371,6 +7416,12 @@@ T:    git https://github.com/intel/gvt-lin
   S:    Supported
   F:    drivers/gpu/drm/i915/gvt/
   
+ +INTEL PMIC GPIO DRIVER
+ +R:    Andy Shevchenko <[email protected]>
+ +S:    Maintained
+ +F:    drivers/gpio/gpio-*cove.c
+ +F:    drivers/gpio/gpio-msic.c
+ +
   INTEL HID EVENT DRIVER
   M:    Alex Hung <[email protected]>
   L:    [email protected]
@@@ -7548,14 -7497,6 +7548,14 @@@ F:    drivers/platform/x86/intel_punit_ipc
   F:    arch/x86/include/asm/intel_pmc_ipc.h
   F:    arch/x86/include/asm/intel_punit_ipc.h
   
+ +INTEL MULTIFUNCTION PMIC DEVICE DRIVERS
+ +R:    Andy Shevchenko <[email protected]>
+ +S:    Maintained
+ +F:    drivers/mfd/intel_msic.c
+ +F:    drivers/mfd/intel_soc_pmic*
+ +F:    include/linux/mfd/intel_msic.h
+ +F:    include/linux/mfd/intel_soc_pmic*
+ +
   INTEL PRO/WIRELESS 2100, 2200BG, 2915ABG NETWORK CONNECTION SUPPORT
   M:    Stanislav Yakovlev <[email protected]>
   L:    [email protected]
@@@ -7579,6 -7520,14 +7579,6 @@@ S:     Supporte
   F:    drivers/infiniband/hw/i40iw/
   F:    include/uapi/rdma/i40iw-abi.h
   
- -INTEL SHA MULTIBUFFER DRIVER
- -M:    Megha Dey <[email protected]>
- -R:    Tim Chen <[email protected]>
- -L:    [email protected]
- -S:    Supported
- -F:    arch/x86/crypto/sha*-mb/
- -F:    crypto/mcryptd.c
- -
   INTEL TELEMETRY DRIVER
   M:    Souvik Kumar Chakravarty <[email protected]>
   L:    [email protected]
@@@ -7686,7 -7635,6 +7686,7 @@@ M:      Corey Minyard <[email protected]
   L:    [email protected] (moderated for non-subscribers)
   W:    http://openipmi.sourceforge.net/
   S:    Supported
+ +F:    Documentation/devicetree/bindings/ipmi/
   F:    Documentation/IPMI.txt
   F:    drivers/char/ipmi/
   F:    include/linux/ipmi*
@@@ -8158,7 -8106,6 +8158,7 @@@ F:      security/keys/encrypted-keys
   
   KEYS-TRUSTED
   M:    James Bottomley <[email protected]>
+ +M:      Jarkko Sakkinen <[email protected]>
   M:    Mimi Zohar <[email protected]>
   L:    [email protected]
   L:    [email protected]
@@@ -8236,25 -8183,6 +8236,25 @@@ S:    Maintaine
   F:    net/l3mdev
   F:    include/net/l3mdev.h
   
+ +L7 BPF FRAMEWORK
+ +M:    John Fastabend <[email protected]>
+ +M:    Daniel Borkmann <[email protected]>
+ +L:    [email protected]
+ +S:    Maintained
+ +F:    include/linux/skmsg.h
+ +F:    net/core/skmsg.c
+ +F:    net/core/sock_map.c
+ +F:    net/ipv4/tcp_bpf.c
+ +
+ +LANTIQ / INTEL Ethernet drivers
+ +M:    Hauke Mehrtens <[email protected]>
+ +L:    [email protected]
+ +S:    Maintained
+ +F:    net/dsa/tag_gswip.c
+ +F:    drivers/net/ethernet/lantiq_xrx200.c
+ +F:    drivers/net/dsa/lantiq_pce.h
+ +F:    drivers/net/dsa/lantiq_gswip.c
+ +
   LANTIQ MIPS ARCHITECTURE
   M:    John Crispin <[email protected]>
   L:    [email protected]
@@@ -8670,6 -8598,7 +8670,6 @@@ F:      include/linux/spinlock*.
   F:    arch/*/include/asm/spinlock*.h
   F:    include/linux/rwlock*.h
   F:    include/linux/mutex*.h
- -F:    arch/*/include/asm/mutex*.h
   F:    include/linux/rwsem*.h
   F:    arch/*/include/asm/rwsem.h
   F:    include/linux/seqlock.h
@@@ -8697,7 -8626,7 +8697,7 @@@ F:      drivers/message/fusion
   F:    drivers/scsi/mpt3sas/
   
   LSILOGIC/SYMBIOS/NCR 53C8XX and 53C1010 PCI-SCSI drivers
- M:    Matthew Wilcox <[email protected]>
+ M:    Matthew Wilcox <[email protected]>
   L:    [email protected]
   S:    Maintained
   F:    drivers/scsi/sym53c8xx_2/
@@@ -8815,7 -8744,7 +8815,7 @@@ M:      Vivien Didelot <vivien.didelot@savoi
   L:    [email protected]
   S:    Maintained
   F:    drivers/net/dsa/mv88e6xxx/
- -F:    linux/platform_data/mv88e6xxx.h
+ +F:    include/linux/platform_data/mv88e6xxx.h
   F:    Documentation/devicetree/bindings/net/dsa/marvell.txt
   
   MARVELL ARMADA DRM SUPPORT
@@@ -8905,15 -8834,6 +8905,15 @@@ S:    Supporte
   F:    drivers/mmc/host/sdhci-xenon*
   F:    Documentation/devicetree/bindings/mmc/marvell,xenon-sdhci.txt
   
+ +MARVELL OCTEONTX2 RVU ADMIN FUNCTION DRIVER
+ +M:    Sunil Goutham <[email protected]>
+ +M:    Linu Cherian <[email protected]>
+ +M:    Geetha sowjanya <[email protected]>
+ +M:    Jerin Jacob <[email protected]>
+ +L:    [email protected]
+ +S:    Supported
+ +F:    drivers/net/ethernet/marvell/octeontx2/af/
+ +
   MATROX FRAMEBUFFER DRIVER
   L:    [email protected]
   S:    Orphan
@@@ -8927,6 -8847,13 +8927,6 @@@ S:     Maintaine
   F:    Documentation/hwmon/max16065
   F:    drivers/hwmon/max16065.c
   
- -MAX20751 HARDWARE MONITOR DRIVER
- -M:    Guenter Roeck <[email protected]>
- -L:    [email protected]
- -S:    Maintained
- -F:    Documentation/hwmon/max20751
- -F:    drivers/hwmon/max20751.c
- -
   MAX2175 SDR TUNER DRIVER
   M:    Ramesh Shanmugasundaram <[email protected]>
   L:    [email protected]
@@@ -9568,7 -9495,7 +9568,7 @@@ MEN Z069 WATCHDOG DRIVE
   M:    Johannes Thumshirn <[email protected]>
   L:    [email protected]
   S:    Maintained
- -F:    drivers/watchdog/menz069_wdt.c
+ +F:    drivers/watchdog/menz69_wdt.c
   
   MESON AO CEC DRIVER FOR AMLOGIC SOCS
   M:    Neil Armstrong <[email protected]>
@@@ -9592,7 -9519,6 +9592,7 @@@ M:      Richard Genoud <richard.genoud@gmail
   S:    Maintained
   F:    drivers/tty/serial/atmel_serial.c
   F:    drivers/tty/serial/atmel_serial.h
+ +F:    Documentation/devicetree/bindings/mfd/atmel-usart.txt
   
   MICROCHIP / ATMEL DMA DRIVER
   M:    Ludovic Desroches <[email protected]>
@@@ -9624,21 -9550,6 +9624,21 @@@ S:    Supporte
   F:    drivers/mtd/nand/raw/atmel/*
   F:    Documentation/devicetree/bindings/mtd/atmel-nand.txt
   
+ +MICROCHIP AT91 USART MFD DRIVER
+ +M:    Radu Pirea <[email protected]>
+ +L:    [email protected]
+ +S:    Supported
+ +F:    drivers/mfd/at91-usart.c
+ +F:    include/dt-bindings/mfd/at91-usart.h
+ +F:    Documentation/devicetree/bindings/mfd/atmel-usart.txt
+ +
+ +MICROCHIP AT91 USART SPI DRIVER
+ +M:    Radu Pirea <[email protected]>
+ +L:    [email protected]
+ +S:    Supported
+ +F:    drivers/spi/spi-at91-usart.c
+ +F:    Documentation/devicetree/bindings/mfd/atmel-usart.txt
+ +
   MICROCHIP KSZ SERIES ETHERNET SWITCH DRIVER
   M:    Woojung Huh <[email protected]>
   M:    Microchip Linux Driver Support <[email protected]>
@@@ -9747,8 -9658,7 +9747,8 @@@ MIPS/LOONGSON2 ARCHITECTUR
   M:    Jiaxun Yang <[email protected]>
   L:    [email protected]
   S:    Maintained
- -F:    arch/mips/loongson64/*{2e/2f}*
+ +F:    arch/mips/loongson64/fuloong-2e/
+ +F:    arch/mips/loongson64/lemote-2f/
   F:    arch/mips/include/asm/mach-loongson64/
   F:    drivers/*/*loongson2*
   F:    drivers/*/*/*loongson2*
@@@ -9788,19 -9698,6 +9788,19 @@@ S:    Maintaine
   F:    arch/arm/boot/dts/mmp*
   F:    arch/arm/mach-mmp/
   
+ +MMU GATHER AND TLB INVALIDATION
+ +M:    Will Deacon <[email protected]>
+ +M:    "Aneesh Kumar K.V" <[email protected]>
+ +M:    Andrew Morton <[email protected]>
+ +M:    Nick Piggin <[email protected]>
+ +M:    Peter Zijlstra <[email protected]>
+ +L:    [email protected]
+ +L:    [email protected]
+ +S:    Maintained
+ +F:    arch/*/include/asm/tlb.h
+ +F:    include/asm-generic/tlb.h
+ +F:    mm/mmu_gather.c
+ +
   MN88472 MEDIA DRIVER
   M:    Antti Palosaari <[email protected]>
   L:    [email protected]
@@@ -9968,7 -9865,7 +9968,7 @@@ M:      Peter Rosin <[email protected]
   S:    Maintained
   F:    Documentation/ABI/testing/sysfs-class-mux*
   F:    Documentation/devicetree/bindings/mux/
- -F:    include/linux/dt-bindings/mux/
+ +F:    include/dt-bindings/mux/
   F:    include/linux/mux/
   F:    drivers/mux/
   
@@@ -10005,13 -9902,6 +10005,13 @@@ S:  Supporte
   F:    drivers/gpu/drm/mxsfb/
   F:    Documentation/devicetree/bindings/display/mxsfb.txt
   
+ +MYLEX DAC960 PCI RAID Controller
+ +M:    Hannes Reinecke <[email protected]>
+ +L:    [email protected]
+ +S:    Supported
+ +F:    drivers/scsi/myrb.*
+ +F:    drivers/scsi/myrs.*
+ +
   MYRICOM MYRI-10G 10GbE DRIVER (MYRI10GE)
   M:    Chris Lee <[email protected]>
   L:    [email protected]
@@@ -10232,6 -10122,7 +10232,6 @@@ L:   [email protected]
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec.git
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec-next.git
   S:    Maintained
- -F:    net/core/flow.c
   F:    net/xfrm/
   F:    net/key/
   F:    net/ipv4/xfrm*
@@@ -10294,8 -10185,6 +10294,8 @@@ NETWORKING [TLS
   M:    Boris Pismenny <[email protected]>
   M:    Aviad Yehezkel <[email protected]>
   M:    Dave Watson <[email protected]>
+ +M:    John Fastabend <[email protected]>
+ +M:    Daniel Borkmann <[email protected]>
   L:    [email protected]
   S:    Maintained
   F:    net/tls/*
@@@ -11053,7 -10942,7 +11053,7 @@@ M:   Willy Tarreau <[email protected]
   M:    Ksenija Stanojevic <[email protected]>
   S:    Odd Fixes
   F:    Documentation/auxdisplay/lcd-panel-cgram.txt
- -F:    drivers/misc/panel.c
+ +F:    drivers/auxdisplay/panel.c
   
   PARALLEL PORT SUBSYSTEM
   M:    Sudip Mukherjee <[email protected]>
@@@ -11300,7 -11189,7 +11300,7 @@@ M:   Murali Karicheri <[email protected]
   L:    [email protected]
   L:    [email protected] (moderated for non-subscribers)
   S:    Maintained
- -F:    drivers/pci/controller/dwc/*keystone*
+ +F:    drivers/pci/controller/dwc/pci-keystone.c
   
   PCI ENDPOINT SUBSYSTEM
   M:    Kishon Vijay Abraham I <[email protected]>
@@@ -11600,12 -11489,15 +11600,12 @@@ S:        Maintaine
   F:    drivers/pinctrl/intel/
   
   PIN CONTROLLER - MEDIATEK
- -M:    Sean Wang <sean.wang@mediatek.com>
+ +M:    Sean Wang <sean.wang@kernel.org>
   L:    [email protected] (moderated for non-subscribers)
   S:    Maintained
   F:    Documentation/devicetree/bindings/pinctrl/pinctrl-mt65xx.txt
   F:    Documentation/devicetree/bindings/pinctrl/pinctrl-mt7622.txt
- -F:    drivers/pinctrl/mediatek/mtk-eint.*
- -F:    drivers/pinctrl/mediatek/pinctrl-mtk-common.*
- -F:    drivers/pinctrl/mediatek/pinctrl-mt2701.c
- -F:    drivers/pinctrl/mediatek/pinctrl-mt7622.c
+ +F:    drivers/pinctrl/mediatek/
   
   PIN CONTROLLER - QUALCOMM
   M:    Bjorn Andersson <[email protected]>
@@@ -11683,26 -11575,7 +11683,26 @@@ W: http://hwmon.wiki.kernel.org
   W:    http://www.roeck-us.net/linux/drivers/
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/groeck/linux-staging.git
   S:    Maintained
+ +F:    Documentation/devicetree/bindings/hwmon/ibm,cffps1.txt
+ +F:    Documentation/devicetree/bindings/hwmon/max31785.txt
+ +F:    Documentation/devicetree/bindings/hwmon/ltc2978.txt
+ +F:    Documentation/hwmon/adm1275
+ +F:    Documentation/hwmon/ibm-cffps
+ +F:    Documentation/hwmon/ir35221
+ +F:    Documentation/hwmon/lm25066
+ +F:    Documentation/hwmon/ltc2978
+ +F:    Documentation/hwmon/ltc3815
+ +F:    Documentation/hwmon/max16064
+ +F:    Documentation/hwmon/max20751
+ +F:    Documentation/hwmon/max31785
+ +F:    Documentation/hwmon/max34440
+ +F:    Documentation/hwmon/max8688
   F:    Documentation/hwmon/pmbus
+ +F:    Documentation/hwmon/pmbus-core
+ +F:    Documentation/hwmon/tps40422
+ +F:    Documentation/hwmon/ucd9000
+ +F:    Documentation/hwmon/ucd9200
+ +F:    Documentation/hwmon/zl6100
   F:    drivers/hwmon/pmbus/
   F:    include/linux/pmbus.h
   
@@@ -12106,7 -11979,7 +12106,7 @@@ F:   Documentation/scsi/LICENSE.qla4xx
   F:    drivers/scsi/qla4xxx/
   
   QLOGIC QLCNIC (1/10)Gb ETHERNET DRIVER
- -M:    Harish Patil <harish.patil@cavium.com>
+ +M:    Shahed Shaikh <Shahed.Shaikh@cavium.com>
   M:    Manish Chopra <[email protected]>
   M:    [email protected]
   L:    [email protected]
@@@ -12114,6 -11987,7 +12114,6 @@@ S:   Supporte
   F:    drivers/net/ethernet/qlogic/qlcnic/
   
   QLOGIC QLGE 10Gb ETHERNET DRIVER
- -M:    Harish Patil <[email protected]>
   M:    Manish Chopra <[email protected]>
   M:    [email protected]
   L:    [email protected]
@@@ -12801,18 -12675,6 +12801,18 @@@ W: http://www.ibm.com/developerworks/li
   S:    Supported
   F:    drivers/s390/crypto/
   
+ +S390 VFIO AP DRIVER
+ +M:    Tony Krowiak <[email protected]>
+ +M:    Pierre Morel <[email protected]>
+ +M:    Halil Pasic <[email protected]>
+ +L:    [email protected]
+ +W:    http://www.ibm.com/developerworks/linux/linux390/
+ +S:    Supported
+ +F:    drivers/s390/crypto/vfio_ap_drv.c
+ +F:    drivers/s390/crypto/vfio_ap_private.h
+ +F:    drivers/s390/crypto/vfio_ap_ops.c
+ +F:    Documentation/s390/vfio-ap.txt
+ +
   S390 ZFCP DRIVER
   M:    Steffen Maier <[email protected]>
   M:    Benjamin Block <[email protected]>
@@@ -13201,7 -13063,7 +13201,7 @@@ SELINUX SECURITY MODUL
   M:    Paul Moore <[email protected]>
   M:    Stephen Smalley <[email protected]>
   M:    Eric Paris <[email protected]>
- -L:    selinux@tycho.nsa.gov (moderated for non-subscribers)
+ +L:    selinux@vger.kernel.org
   W:    https://selinuxproject.org
   W:    https://github.com/SELinuxProject
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/pcmoore/selinux.git
@@@ -13445,7 -13307,6 +13445,7 @@@ M:   Uwe Kleine-König <u.kleine-koenig@p
   R:    Pengutronix Kernel Team <[email protected]>
   S:    Supported
   F:    drivers/siox/*
+ +F:    drivers/gpio/gpio-siox.c
   F:    include/trace/events/siox.h
   
   SIS 190 ETHERNET DRIVER
@@@ -13629,8 -13490,8 +13629,8 @@@ L:   [email protected]
   S:    Maintained
   F:    Documentation/devicetree/bindings/arm/firmware/sdei.txt
   F:    drivers/firmware/arm_sdei.c
- -F:    include/linux/sdei.h
- -F:    include/uapi/linux/sdei.h
+ +F:    include/linux/arm_sdei.h
+ +F:    include/uapi/linux/arm_sdei.h
   
   SOFTWARE RAID (Multiple Disks) SUPPORT
   M:    Shaohua Li <[email protected]>
@@@ -13758,7 -13619,7 +13758,7 @@@ F:   sound/soc
   F:    include/sound/soc*
   
   SOUNDWIRE SUBSYSTEM
- -M:    Vinod Koul <v[email protected]>
+ +M:    Vinod Koul <v[email protected]>
   M:    Sanyog Kale <[email protected]>
   R:    Pierre-Louis Bossart <[email protected]>
   L:    [email protected] (moderated for non-subscribers)
@@@ -14173,12 -14034,6 +14173,12 @@@ S: Supporte
   F:    drivers/reset/reset-axs10x.c
   F:    Documentation/devicetree/bindings/reset/snps,axs10x-reset.txt
   
+ +SYNOPSYS CREG GPIO DRIVER
+ +M:    Eugeniy Paltsev <[email protected]>
+ +S:    Maintained
+ +F:    drivers/gpio/gpio-creg-snps.c
+ +F:    Documentation/devicetree/bindings/gpio/snps,creg-gpio.txt
+ +
   SYNOPSYS DESIGNWARE 8250 UART DRIVER
   R:    Andy Shevchenko <[email protected]>
   S:    Maintained
@@@ -14765,13 -14620,6 +14765,13 @@@ L: [email protected]
   S:    Maintained
   F:    drivers/net/ethernet/ti/netcp*
   
+ +TI PCM3060 ASoC CODEC DRIVER
+ +M:    Kirill Marinushkin <[email protected]>
+ +L:    [email protected] (moderated for non-subscribers)
+ +S:    Maintained
+ +F:    Documentation/devicetree/bindings/sound/pcm3060.txt
+ +F:    sound/soc/codecs/pcm3060*
+ +
   TI TAS571X FAMILY ASoC CODEC DRIVER
   M:    Kevin Cernekee <[email protected]>
   L:    [email protected] (moderated for non-subscribers)
@@@ -15444,12 -15292,6 +15444,12 @@@ F: Documentation/driver-api/usb/typec_b
   F:    drivers/usb/typec/altmodes/
   F:    include/linux/usb/typec_altmode.h
   
+ +USB TYPEC PORT CONTROLLER DRIVERS
+ +M:    Guenter Roeck <[email protected]>
+ +L:    [email protected]
+ +S:    Maintained
+ +F:    drivers/usb/typec/tcpm/
+ +
   USB UHCI DRIVER
   M:    Alan Stern <[email protected]>
   L:    [email protected]
@@@ -15524,19 -15366,13 +15524,19 @@@ F:        arch/x86/um
   F:    fs/hostfs/
   F:    fs/hppfs/
   
+ +USERSPACE COPYIN/COPYOUT (UIOVEC)
+ +M:    Alexander Viro <[email protected]>
+ +S:    Maintained
+ +F:    lib/iov_iter.c
+ +F:    include/linux/uio.h
+ +
   USERSPACE I/O (UIO)
   M:    Greg Kroah-Hartman <[email protected]>
   S:    Maintained
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
   F:    Documentation/driver-api/uio-howto.rst
   F:    drivers/uio/
- -F:    include/linux/uio*.h
+ +F:    include/linux/uio_driver.h
   
   UTIL-LINUX PACKAGE
   M:    Karel Zak <[email protected]>
@@@ -15559,7 -15395,7 +15559,7 @@@ S:   Maintaine
   UVESAFB DRIVER
   M:    Michal Januszewski <[email protected]>
   L:    [email protected]
- -W:    http://dev.gentoo.org/~spock/projects/uvesafb/
+ +W:    https://github.com/mjanusz/v86d
   S:    Maintained
   F:    Documentation/fb/uvesafb.txt
   F:    drivers/video/fbdev/uvesafb.*
@@@ -15872,7 -15708,7 +15872,7 @@@ F:   include/linux/regulator
   
   VRF
   M:    David Ahern <[email protected]>
- -M:    Shrijeet Mukherjee <shm@cumulusnetworks.com>
+ +M:    Shrijeet Mukherjee <shrijeet@gmail.com>
   L:    [email protected]
   S:    Maintained
   F:    drivers/net/vrf.c
@@@ -16137,6 -15973,17 +16137,17 @@@ T: git git://git.kernel.org/pub/scm/lin
   S:    Maintained
   F:    arch/x86/entry/vdso/
   
+ XARRAY
+ M:    Matthew Wilcox <[email protected]>
+ L:    [email protected]
+ S:    Supported
+ F:    Documentation/core-api/xarray.rst
+ F:    lib/idr.c
+ F:    lib/xarray.c
+ F:    include/linux/idr.h
+ F:    include/linux/xarray.h
+ F:    tools/testing/radix-tree
+ 
   XC2028/3028 TUNER DRIVER
   M:    Mauro Carvalho Chehab <[email protected]>
   L:    [email protected]
diff --combined arch/parisc/kernel/syscall.S

index f5f22ea9b97e683d141ba3fdbd3eedf4f3a4632a,a9bc90dc4ae75e4e0489a297ef64c645a9cc7557..9505c317818df77cb1e67ea39fa3b43110d32d16
--- 1/arch/parisc/kernel/syscall.S
--- 2/arch/parisc/kernel/syscall.S
+++ b/arch/parisc/kernel/syscall.S
@@@ -2,7 -2,7 +2,7 @@@
    * Linux/PA-RISC Project (http://www.parisc-linux.org/)
    * 
    * System call entry code / Linux gateway page
-  * Copyright (c) Matthew Wilcox 1999 <willy@bofh.ai>
+  * Copyright (c) Matthew Wilcox 1999 <willy@infradead.org>
    * Licensed under the GNU GPL.
    * thanks to Philipp Rumpf, Mike Shaver and various others
    * sorry about the wall, puffin..
@@@ -640,7 -640,8 +640,7 @@@ cas_action
         sub,<>  %r28, %r25, %r0
   2:    stw     %r24, 0(%r26)
         /* Free lock */
- -      sync
- -      stw     %r20, 0(%sr2,%r20)
+ +      stw,ma  %r20, 0(%sr2,%r20)
   #if ENABLE_LWS_DEBUG
         /* Clear thread register indicator */
         stw     %r0, 4(%sr2,%r20)
@@@ -654,7 -655,8 +654,7 @@@
   3:            
         /* Error occurred on load or store */
         /* Free lock */
- -      sync
- -      stw     %r20, 0(%sr2,%r20)
+ +      stw,ma  %r20, 0(%sr2,%r20)
   #if ENABLE_LWS_DEBUG
         stw     %r0, 4(%sr2,%r20)
   #endif
@@@ -855,7 -857,8 +855,7 @@@ cas2_action
   
   cas2_end:
         /* Free lock */
- -      sync
- -      stw     %r20, 0(%sr2,%r20)
+ +      stw,ma  %r20, 0(%sr2,%r20)
         /* Enable interrupts */
         ssm     PSW_SM_I, %r0
         /* Return to userspace, set no error */
@@@ -865,7 -868,8 +865,7 @@@
   22:
         /* Error occurred on load or store */
         /* Free lock */
- -      sync
- -      stw     %r20, 0(%sr2,%r20)
+ +      stw,ma  %r20, 0(%sr2,%r20)
         ssm     PSW_SM_I, %r0
         ldo     1(%r0),%r28
         b       lws_exit
diff --combined arch/powerpc/include/asm/book3s/64/pgtable.h

index c4a726c10af5b294245478f60849115be5cfdef5,62039e557ac021ddf6fdd07796ecbc14e145245e..6c99e846a8c9557f1e7c15da0f92c43644677502
--- 1/arch/powerpc/include/asm/book3s/64/pgtable.h
--- 2/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@@ -14,6 -14,10 +14,6 @@@
    */
   #define _PAGE_BIT_SWAP_TYPE   0
   
- -#define _PAGE_NA              0
- -#define _PAGE_RO              0
- -#define _PAGE_USER            0
- -
   #define _PAGE_EXEC            0x00001 /* execute permission */
   #define _PAGE_WRITE           0x00002 /* write access allowed */
   #define _PAGE_READ            0x00004 /* read access allowed */
@@@ -110,7 -114,7 +110,7 @@@
    */
   #define _HPAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \
                          _PAGE_ACCESSED | H_PAGE_THP_HUGE | _PAGE_PTE | \
- -                       _PAGE_SOFT_DIRTY)
+ +                       _PAGE_SOFT_DIRTY | _PAGE_DEVMAP)
   /*
    * user access blocked by key
    */
@@@ -118,23 -122,34 +118,23 @@@
   #define _PAGE_KERNEL_RO                (_PAGE_PRIVILEGED | _PAGE_READ)
   #define _PAGE_KERNEL_RWX      (_PAGE_PRIVILEGED | _PAGE_DIRTY |       \
                                  _PAGE_RW | _PAGE_EXEC)
- -/*
- - * No page size encoding in the linux PTE
- - */
- -#define _PAGE_PSIZE           0
   /*
    * _PAGE_CHG_MASK masks of bits that are to be preserved across
    * pgprot changes
    */
   #define _PAGE_CHG_MASK        (PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \
                          _PAGE_ACCESSED | _PAGE_SPECIAL | _PAGE_PTE |   \
- -                       _PAGE_SOFT_DIRTY)
+ +                       _PAGE_SOFT_DIRTY | _PAGE_DEVMAP)
   
   #define H_PTE_PKEY  (H_PTE_PKEY_BIT0 | H_PTE_PKEY_BIT1 | H_PTE_PKEY_BIT2 | \
                      H_PTE_PKEY_BIT3 | H_PTE_PKEY_BIT4)
- -/*
- - * Mask of bits returned by pte_pgprot()
- - */
- -#define PAGE_PROT_BITS  (_PAGE_SAO | _PAGE_NON_IDEMPOTENT | _PAGE_TOLERANT | \
- -                       H_PAGE_4K_PFN | _PAGE_PRIVILEGED | _PAGE_ACCESSED | \
- -                       _PAGE_READ | _PAGE_WRITE |  _PAGE_DIRTY | _PAGE_EXEC | \
- -                       _PAGE_SOFT_DIRTY | H_PTE_PKEY)
   /*
    * We define 2 sets of base prot bits, one for basic pages (ie,
    * cacheable kernel and user pages) and one for non cacheable
    * pages. We always set _PAGE_COHERENT when SMP is enabled or
    * the processor might need it for DMA coherency.
    */
- -#define _PAGE_BASE_NC (_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_PSIZE)
+ +#define _PAGE_BASE_NC (_PAGE_PRESENT | _PAGE_ACCESSED)
   #define _PAGE_BASE    (_PAGE_BASE_NC)
   
   /* Permission masks used to generate the __P and __S table,
@@@ -144,6 -159,8 +144,6 @@@
    * Write permissions imply read permissions for now (we could make write-only
    * pages on BookE but we don't bother for now). Execute permission control is
    * possible on platforms that define _PAGE_EXEC
- - *
- - * Note due to the way vm flags are laid out, the bits are XWR
    */
   #define PAGE_NONE     __pgprot(_PAGE_BASE | _PAGE_PRIVILEGED)
   #define PAGE_SHARED   __pgprot(_PAGE_BASE | _PAGE_RW)
@@@ -153,6 -170,24 +153,6 @@@
   #define PAGE_READONLY __pgprot(_PAGE_BASE | _PAGE_READ)
   #define PAGE_READONLY_X       __pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_EXEC)
   
- -#define __P000        PAGE_NONE
- -#define __P001        PAGE_READONLY
- -#define __P010        PAGE_COPY
- -#define __P011        PAGE_COPY
- -#define __P100        PAGE_READONLY_X
- -#define __P101        PAGE_READONLY_X
- -#define __P110        PAGE_COPY_X
- -#define __P111        PAGE_COPY_X
- -
- -#define __S000        PAGE_NONE
- -#define __S001        PAGE_READONLY
- -#define __S010        PAGE_SHARED
- -#define __S011        PAGE_SHARED
- -#define __S100        PAGE_READONLY_X
- -#define __S101        PAGE_READONLY_X
- -#define __S110        PAGE_SHARED_X
- -#define __S111        PAGE_SHARED_X
- -
   /* Permission masks used for kernel mappings */
   #define PAGE_KERNEL   __pgprot(_PAGE_BASE | _PAGE_KERNEL_RW)
   #define PAGE_KERNEL_NC        __pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | \
@@@ -426,7 -461,6 +426,7 @@@ static inline void ptep_set_wrprotect(s
                 pte_update(mm, addr, ptep, 0, _PAGE_PRIVILEGED, 0);
   }
   
+ +#define __HAVE_ARCH_HUGE_PTEP_SET_WRPROTECT
   static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
                                            unsigned long addr, pte_t *ptep)
   {
@@@ -485,11 -519,7 +485,11 @@@ static inline int pte_special(pte_t pte
         return !!(pte_raw(pte) & cpu_to_be64(_PAGE_SPECIAL));
   }
   
- -static inline pgprot_t pte_pgprot(pte_t pte)  { return __pgprot(pte_val(pte) & PAGE_PROT_BITS); }
+ +static inline bool pte_exec(pte_t pte)
+ +{
+ +      return !!(pte_raw(pte) & cpu_to_be64(_PAGE_EXEC));
+ +}
+ +
   
   #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
   static inline bool pte_soft_dirty(pte_t pte)
@@@ -499,12 -529,12 +499,12 @@@
   
   static inline pte_t pte_mksoft_dirty(pte_t pte)
   {
- -      return __pte(pte_val(pte) | _PAGE_SOFT_DIRTY);
+ +      return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_SOFT_DIRTY));
   }
   
   static inline pte_t pte_clear_soft_dirty(pte_t pte)
   {
- -      return __pte(pte_val(pte) & ~_PAGE_SOFT_DIRTY);
+ +      return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_SOFT_DIRTY));
   }
   #endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
   
@@@ -525,7 -555,7 +525,7 @@@ static inline pte_t pte_mk_savedwrite(p
          */
         VM_BUG_ON((pte_raw(pte) & cpu_to_be64(_PAGE_PRESENT | _PAGE_RWX | _PAGE_PRIVILEGED)) !=
                   cpu_to_be64(_PAGE_PRESENT | _PAGE_PRIVILEGED));
- -      return __pte(pte_val(pte) & ~_PAGE_PRIVILEGED);
+ +      return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_PRIVILEGED));
   }
   
   #define pte_clear_savedwrite pte_clear_savedwrite
@@@ -535,14 -565,14 +535,14 @@@ static inline pte_t pte_clear_savedwrit
          * Used by KSM subsystem to make a protnone pte readonly.
          */
         VM_BUG_ON(!pte_protnone(pte));
- -      return __pte(pte_val(pte) | _PAGE_PRIVILEGED);
+ +      return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_PRIVILEGED));
   }
   #else
   #define pte_clear_savedwrite pte_clear_savedwrite
   static inline pte_t pte_clear_savedwrite(pte_t pte)
   {
         VM_WARN_ON(1);
- -      return __pte(pte_val(pte) & ~_PAGE_WRITE);
+ +      return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_WRITE));
   }
   #endif /* CONFIG_NUMA_BALANCING */
   
@@@ -557,11 -587,6 +557,11 @@@ static inline int pte_present(pte_t pte
         return !!(pte_raw(pte) & cpu_to_be64(_PAGE_PRESENT | _PAGE_INVALID));
   }
   
+ +static inline bool pte_hw_valid(pte_t pte)
+ +{
+ +      return !!(pte_raw(pte) & cpu_to_be64(_PAGE_PRESENT));
+ +}
+ +
   #ifdef CONFIG_PPC_MEM_KEYS
   extern bool arch_pte_access_permitted(u64 pte, bool write, bool execute);
   #else
@@@ -571,22 -596,25 +571,22 @@@ static inline bool arch_pte_access_perm
   }
   #endif /* CONFIG_PPC_MEM_KEYS */
   
+ +static inline bool pte_user(pte_t pte)
+ +{
+ +      return !(pte_raw(pte) & cpu_to_be64(_PAGE_PRIVILEGED));
+ +}
+ +
   #define pte_access_permitted pte_access_permitted
   static inline bool pte_access_permitted(pte_t pte, bool write)
   {
- -      unsigned long pteval = pte_val(pte);
- -      /* Also check for pte_user */
- -      unsigned long clear_pte_bits = _PAGE_PRIVILEGED;
         /*
          * _PAGE_READ is needed for any access and will be
          * cleared for PROT_NONE
          */
- -      unsigned long need_pte_bits = _PAGE_PRESENT | _PAGE_READ;
- -
- -      if (write)
- -              need_pte_bits |= _PAGE_WRITE;
- -
- -      if ((pteval & need_pte_bits) != need_pte_bits)
+ +      if (!pte_present(pte) || !pte_user(pte) || !pte_read(pte))
                 return false;
   
- -      if ((pteval & clear_pte_bits) == clear_pte_bits)
+ +      if (write && !pte_write(pte))
                 return false;
   
         return arch_pte_access_permitted(pte_val(pte), write, 0);
@@@ -615,32 -643,17 +615,32 @@@ static inline pte_t pte_wrprotect(pte_
   {
         if (unlikely(pte_savedwrite(pte)))
                 return pte_clear_savedwrite(pte);
- -      return __pte(pte_val(pte) & ~_PAGE_WRITE);
+ +      return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_WRITE));
+ +}
+ +
+ +static inline pte_t pte_exprotect(pte_t pte)
+ +{
+ +      return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_EXEC));
   }
   
   static inline pte_t pte_mkclean(pte_t pte)
   {
- -      return __pte(pte_val(pte) & ~_PAGE_DIRTY);
+ +      return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_DIRTY));
   }
   
   static inline pte_t pte_mkold(pte_t pte)
   {
- -      return __pte(pte_val(pte) & ~_PAGE_ACCESSED);
+ +      return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_ACCESSED));
+ +}
+ +
+ +static inline pte_t pte_mkexec(pte_t pte)
+ +{
+ +      return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_EXEC));
+ +}
+ +
+ +static inline pte_t pte_mkpte(pte_t pte)
+ +{
+ +      return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_PTE));
   }
   
   static inline pte_t pte_mkwrite(pte_t pte)
@@@ -648,22 -661,22 +648,22 @@@
         /*
          * write implies read, hence set both
          */
- -      return __pte(pte_val(pte) | _PAGE_RW);
+ +      return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_RW));
   }
   
   static inline pte_t pte_mkdirty(pte_t pte)
   {
- -      return __pte(pte_val(pte) | _PAGE_DIRTY | _PAGE_SOFT_DIRTY);
+ +      return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_DIRTY | _PAGE_SOFT_DIRTY));
   }
   
   static inline pte_t pte_mkyoung(pte_t pte)
   {
- -      return __pte(pte_val(pte) | _PAGE_ACCESSED);
+ +      return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_ACCESSED));
   }
   
   static inline pte_t pte_mkspecial(pte_t pte)
   {
- -      return __pte(pte_val(pte) | _PAGE_SPECIAL);
+ +      return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_SPECIAL));
   }
   
   static inline pte_t pte_mkhuge(pte_t pte)
@@@ -673,17 -686,7 +673,17 @@@
   
   static inline pte_t pte_mkdevmap(pte_t pte)
   {
- -      return __pte(pte_val(pte) | _PAGE_SPECIAL|_PAGE_DEVMAP);
+ +      return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_SPECIAL | _PAGE_DEVMAP));
+ +}
+ +
+ +static inline pte_t pte_mkprivileged(pte_t pte)
+ +{
+ +      return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_PRIVILEGED));
+ +}
+ +
+ +static inline pte_t pte_mkuser(pte_t pte)
+ +{
+ +      return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_PRIVILEGED));
   }
   
   /*
@@@ -702,8 -705,12 +702,8 @@@ static inline int pte_devmap(pte_t pte
   static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
   {
         /* FIXME!! check whether this need to be a conditional */
- -      return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot));
- -}
- -
- -static inline bool pte_user(pte_t pte)
- -{
- -      return !(pte_raw(pte) & cpu_to_be64(_PAGE_PRIVILEGED));
+ +      return __pte_raw((pte_raw(pte) & cpu_to_be64(_PAGE_CHG_MASK)) |
+ +                       cpu_to_be64(pgprot_val(newprot)));
   }
   
   /* Encode and de-code a swap entry */
@@@ -716,9 -723,7 +716,7 @@@
         BUILD_BUG_ON(_PAGE_HPTEFLAGS & (0x1f << _PAGE_BIT_SWAP_TYPE)); \
         BUILD_BUG_ON(_PAGE_HPTEFLAGS & _PAGE_SWP_SOFT_DIRTY);   \
         } while (0)
- /*
-  * on pte we don't need handle RADIX_TREE_EXCEPTIONAL_SHIFT;
-  */
+ 
   #define SWP_TYPE_BITS 5
   #define __swp_type(x)         (((x).val >> _PAGE_BIT_SWAP_TYPE) \
                                 & ((1UL << SWP_TYPE_BITS) - 1))
@@@ -734,8 -739,6 +732,8 @@@
    */
   #define __pte_to_swp_entry(pte)       ((swp_entry_t) { pte_val((pte)) & ~_PAGE_PTE })
   #define __swp_entry_to_pte(x) __pte((x).val | _PAGE_PTE)
+ +#define __pmd_to_swp_entry(pmd)       (__pte_to_swp_entry(pmd_pte(pmd)))
+ +#define __swp_entry_to_pmd(x) (pte_pmd(__swp_entry_to_pte(x)))
   
   #ifdef CONFIG_MEM_SOFT_DIRTY
   #define _PAGE_SWP_SOFT_DIRTY   (1UL << (SWP_TYPE_BITS + _PAGE_BIT_SWAP_TYPE))
@@@ -746,7 -749,7 +744,7 @@@
   #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
   static inline pte_t pte_swp_mksoft_dirty(pte_t pte)
   {
- -      return __pte(pte_val(pte) | _PAGE_SWP_SOFT_DIRTY);
+ +      return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_SWP_SOFT_DIRTY));
   }
   
   static inline bool pte_swp_soft_dirty(pte_t pte)
@@@ -756,7 -759,7 +754,7 @@@
   
   static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)
   {
- -      return __pte(pte_val(pte) & ~_PAGE_SWP_SOFT_DIRTY);
+ +      return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_SWP_SOFT_DIRTY));
   }
   #endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
   
@@@ -845,10 -848,10 +843,10 @@@ static inline pgprot_t pgprot_writecomb
    */
   static inline bool pte_ci(pte_t pte)
   {
- -      unsigned long pte_v = pte_val(pte);
+ +      __be64 pte_v = pte_raw(pte);
   
- -      if (((pte_v & _PAGE_CACHE_CTL) == _PAGE_TOLERANT) ||
- -          ((pte_v & _PAGE_CACHE_CTL) == _PAGE_NON_IDEMPOTENT))
+ +      if (((pte_v & cpu_to_be64(_PAGE_CACHE_CTL)) == cpu_to_be64(_PAGE_TOLERANT)) ||
+ +          ((pte_v & cpu_to_be64(_PAGE_CACHE_CTL)) == cpu_to_be64(_PAGE_NON_IDEMPOTENT)))
                 return true;
         return false;
   }
@@@ -870,16 -873,8 +868,16 @@@ static inline int pmd_none(pmd_t pmd
   
   static inline int pmd_present(pmd_t pmd)
   {
+ +      /*
+ +       * A pmd is considerent present if _PAGE_PRESENT is set.
+ +       * We also need to consider the pmd present which is marked
+ +       * invalid during a split. Hence we look for _PAGE_INVALID
+ +       * if we find _PAGE_PRESENT cleared.
+ +       */
+ +      if (pmd_raw(pmd) & cpu_to_be64(_PAGE_PRESENT | _PAGE_INVALID))
+ +              return true;
   
- -      return !pmd_none(pmd);
+ +      return false;
   }
   
   static inline int pmd_bad(pmd_t pmd)
@@@ -906,7 -901,7 +904,7 @@@ static inline int pud_none(pud_t pud
   
   static inline int pud_present(pud_t pud)
   {
- -      return !pud_none(pud);
+ +      return (pud_raw(pud) & cpu_to_be64(_PAGE_PRESENT));
   }
   
   extern struct page *pud_page(pud_t pud);
@@@ -953,7 -948,7 +951,7 @@@ static inline int pgd_none(pgd_t pgd
   
   static inline int pgd_present(pgd_t pgd)
   {
- -      return !pgd_none(pgd);
+ +      return (pgd_raw(pgd) & cpu_to_be64(_PAGE_PRESENT));
   }
   
   static inline pte_t pgd_pte(pgd_t pgd)
@@@ -1023,16 -1018,17 +1021,16 @@@ extern struct page *pgd_page(pgd_t pgd)
   #define pgd_ERROR(e) \
         pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
   
- -static inline int map_kernel_page(unsigned long ea, unsigned long pa,
- -                                unsigned long flags)
+ +static inline int map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot)
   {
         if (radix_enabled()) {
   #if defined(CONFIG_PPC_RADIX_MMU) && defined(DEBUG_VM)
                 unsigned long page_size = 1 << mmu_psize_defs[mmu_io_psize].shift;
                 WARN((page_size != PAGE_SIZE), "I/O page size != PAGE_SIZE");
   #endif
- -              return radix__map_kernel_page(ea, pa, __pgprot(flags), PAGE_SIZE);
+ +              return radix__map_kernel_page(ea, pa, prot, PAGE_SIZE);
         }
- -      return hash__map_kernel_page(ea, pa, flags);
+ +      return hash__map_kernel_page(ea, pa, prot);
   }
   
   static inline int __meminit vmemmap_create_mapping(unsigned long start,
@@@ -1084,12 -1080,6 +1082,12 @@@ static inline pte_t *pmdp_ptep(pmd_t *p
   #define pmd_soft_dirty(pmd)    pte_soft_dirty(pmd_pte(pmd))
   #define pmd_mksoft_dirty(pmd)  pte_pmd(pte_mksoft_dirty(pmd_pte(pmd)))
   #define pmd_clear_soft_dirty(pmd) pte_pmd(pte_clear_soft_dirty(pmd_pte(pmd)))
+ +
+ +#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
+ +#define pmd_swp_mksoft_dirty(pmd)     pte_pmd(pte_swp_mksoft_dirty(pmd_pte(pmd)))
+ +#define pmd_swp_soft_dirty(pmd)               pte_swp_soft_dirty(pmd_pte(pmd))
+ +#define pmd_swp_clear_soft_dirty(pmd) pte_pmd(pte_swp_clear_soft_dirty(pmd_pte(pmd)))
+ +#endif
   #endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
   
   #ifdef CONFIG_NUMA_BALANCING
@@@ -1135,10 -1125,6 +1133,10 @@@ pmd_hugepage_update(struct mm_struct *m
         return hash__pmd_hugepage_update(mm, addr, pmdp, clr, set);
   }
   
+ +/*
+ + * returns true for pmd migration entries, THP, devmap, hugetlb
+ + * But compile time dependent on THP config
+ + */
   static inline int pmd_large(pmd_t pmd)
   {
         return !!(pmd_raw(pmd) & cpu_to_be64(_PAGE_PTE));
@@@ -1173,22 -1159,8 +1171,22 @@@ static inline void pmdp_set_wrprotect(s
                 pmd_hugepage_update(mm, addr, pmdp, 0, _PAGE_PRIVILEGED);
   }
   
+ +/*
+ + * Only returns true for a THP. False for pmd migration entry.
+ + * We also need to return true when we come across a pte that
+ + * in between a thp split. While splitting THP, we mark the pmd
+ + * invalid (pmdp_invalidate()) before we set it with pte page
+ + * address. A pmd_trans_huge() check against a pmd entry during that time
+ + * should return true.
+ + * We should not call this on a hugetlb entry. We should check for HugeTLB
+ + * entry using vma->vm_flags
+ + * The page table walk rule is explained in Documentation/vm/transhuge.rst
+ + */
   static inline int pmd_trans_huge(pmd_t pmd)
   {
+ +      if (!pmd_present(pmd))
+ +              return false;
+ +
         if (radix_enabled())
                 return radix__pmd_trans_huge(pmd);
         return hash__pmd_trans_huge(pmd);
diff --combined arch/powerpc/include/asm/nohash/64/pgtable.h

index 67421f74efcf63770b8fbb20259b91c55ea6ba11,05765c2d2c1fd8d14915c6c308023a53a190b63d..e77ed97616327e2d68b51e9d09a1c5664b8b1b8e
--- 1/arch/powerpc/include/asm/nohash/64/pgtable.h
--- 2/arch/powerpc/include/asm/nohash/64/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
@@@ -89,47 -89,11 +89,47 @@@
    * Include the PTE bits definitions
    */
   #include <asm/nohash/pte-book3e.h>
- -#include <asm/pte-common.h>
+ +
+ +#define _PAGE_SAO     0
+ +
+ +#define PTE_RPN_MASK  (~((1UL << PTE_RPN_SHIFT) - 1))
+ +
+ +/*
+ + * _PAGE_CHG_MASK masks of bits that are to be preserved across
+ + * pgprot changes.
+ + */
+ +#define _PAGE_CHG_MASK        (PTE_RPN_MASK | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_SPECIAL)
+ +
+ +#define H_PAGE_4K_PFN 0
   
   #ifndef __ASSEMBLY__
   /* pte_clear moved to later in this file */
   
+ +static inline pte_t pte_mkwrite(pte_t pte)
+ +{
+ +      return __pte(pte_val(pte) | _PAGE_RW);
+ +}
+ +
+ +static inline pte_t pte_mkdirty(pte_t pte)
+ +{
+ +      return __pte(pte_val(pte) | _PAGE_DIRTY);
+ +}
+ +
+ +static inline pte_t pte_mkyoung(pte_t pte)
+ +{
+ +      return __pte(pte_val(pte) | _PAGE_ACCESSED);
+ +}
+ +
+ +static inline pte_t pte_wrprotect(pte_t pte)
+ +{
+ +      return __pte(pte_val(pte) & ~_PAGE_RW);
+ +}
+ +
+ +static inline pte_t pte_mkexec(pte_t pte)
+ +{
+ +      return __pte(pte_val(pte) | _PAGE_EXEC);
+ +}
+ +
   #define PMD_BAD_BITS          (PTE_TABLE_SIZE-1)
   #define PUD_BAD_BITS          (PMD_TABLE_SIZE-1)
   
@@@ -275,7 -239,6 +275,7 @@@ static inline void ptep_set_wrprotect(s
         pte_update(mm, addr, ptep, _PAGE_RW, 0, 0);
   }
   
+ +#define __HAVE_ARCH_HUGE_PTEP_SET_WRPROTECT
   static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
                                            unsigned long addr, pte_t *ptep)
   {
@@@ -350,9 -313,7 +350,7 @@@ static inline void __ptep_set_access_fl
   #define MAX_SWAPFILES_CHECK() do { \
         BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS); \
         } while (0)
- /*
-  * on pte we don't need handle RADIX_TREE_EXCEPTIONAL_SHIFT;
-  */
+ 
   #define SWP_TYPE_BITS 5
   #define __swp_type(x)         (((x).val >> _PAGE_BIT_SWAP_TYPE) \
                                 & ((1UL << SWP_TYPE_BITS) - 1))
@@@ -364,7 -325,8 +362,7 @@@
   #define __pte_to_swp_entry(pte)               ((swp_entry_t) { pte_val((pte)) })
   #define __swp_entry_to_pte(x)         __pte((x).val)
   
- -extern int map_kernel_page(unsigned long ea, unsigned long pa,
- -                         unsigned long flags);
+ +int map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot);
   extern int __meminit vmemmap_create_mapping(unsigned long start,
                                             unsigned long page_size,
                                             unsigned long phys);
diff --combined drivers/pci/hotplug/acpiphp.h

index cf3058404f4120f3d8c78d656dabd5587957d85b,14cef4cf592b17affa71c6939a00de738b4ab77c..a2094c07af6a3b1d4b5d091e2b0b2ad14e832a42
--- 1/drivers/pci/hotplug/acpiphp.h
--- 2/drivers/pci/hotplug/acpiphp.h
+++ b/drivers/pci/hotplug/acpiphp.h
@@@ -8,7 -8,7 +8,7 @@@
    * Copyright (C) 2002 Hiroshi Aono ([email protected])
    * Copyright (C) 2002,2003 Takayoshi Kochi ([email protected])
    * Copyright (C) 2002,2003 NEC Corporation
-  * Copyright (C) 2003-2005 Matthew Wilcox ([email protected])
+  * Copyright (C) 2003-2005 Matthew Wilcox ([email protected])
    * Copyright (C) 2003-2005 Hewlett Packard
    *
    * All rights reserved.
@@@ -33,19 -33,15 +33,19 @@@ struct acpiphp_slot
    * struct slot - slot information for each *physical* slot
    */
   struct slot {
- -      struct hotplug_slot     *hotplug_slot;
+ +      struct hotplug_slot     hotplug_slot;
         struct acpiphp_slot     *acpi_slot;
- -      struct hotplug_slot_info info;
         unsigned int sun;       /* ACPI _SUN (Slot User Number) value */
   };
   
   static inline const char *slot_name(struct slot *slot)
   {
- -      return hotplug_slot_name(slot->hotplug_slot);
+ +      return hotplug_slot_name(&slot->hotplug_slot);
+ +}
+ +
+ +static inline struct slot *to_slot(struct hotplug_slot *hotplug_slot)
+ +{
+ +      return container_of(hotplug_slot, struct slot, hotplug_slot);
   }
   
   /*
diff --combined drivers/pci/hotplug/acpiphp_core.c

index c9e2bd40c0385e72b3c18643d8642b3d1a41f818,0447b169e7ffe7022d249e1809a58cbcdbf497c5..853e04ad272c507c3a4105bc9f2e5e1e331b427e
--- 1/drivers/pci/hotplug/acpiphp_core.c
--- 2/drivers/pci/hotplug/acpiphp_core.c
+++ b/drivers/pci/hotplug/acpiphp_core.c
@@@ -8,7 -8,7 +8,7 @@@
    * Copyright (C) 2002 Hiroshi Aono ([email protected])
    * Copyright (C) 2002,2003 Takayoshi Kochi ([email protected])
    * Copyright (C) 2002,2003 NEC Corporation
-  * Copyright (C) 2003-2005 Matthew Wilcox ([email protected])
+  * Copyright (C) 2003-2005 Matthew Wilcox ([email protected])
    * Copyright (C) 2003-2005 Hewlett Packard
    *
    * All rights reserved.
@@@ -40,7 -40,7 +40,7 @@@ bool acpiphp_disabled
   static struct acpiphp_attention_info *attention_info;
   
   #define DRIVER_VERSION        "0.5"
- #define DRIVER_AUTHOR "Greg Kroah-Hartman <[email protected]>, Takayoshi Kochi <[email protected]>, Matthew Wilcox <willy@hp.com>"
+ #define DRIVER_AUTHOR "Greg Kroah-Hartman <[email protected]>, Takayoshi Kochi <[email protected]>, Matthew Wilcox <willy@infradead.org>"
   #define DRIVER_DESC   "ACPI Hot Plug PCI Controller Driver"
   
   MODULE_AUTHOR(DRIVER_AUTHOR);
@@@ -57,7 -57,7 +57,7 @@@ static int get_attention_status(struct 
   static int get_latch_status(struct hotplug_slot *slot, u8 *value);
   static int get_adapter_status(struct hotplug_slot *slot, u8 *value);
   
- -static struct hotplug_slot_ops acpi_hotplug_slot_ops = {
+ +static const struct hotplug_slot_ops acpi_hotplug_slot_ops = {
         .enable_slot            = enable_slot,
         .disable_slot           = disable_slot,
         .set_attention_status   = set_attention_status,
@@@ -118,7 -118,7 +118,7 @@@ EXPORT_SYMBOL_GPL(acpiphp_unregister_at
    */
   static int enable_slot(struct hotplug_slot *hotplug_slot)
   {
- -      struct slot *slot = hotplug_slot->private;
+ +      struct slot *slot = to_slot(hotplug_slot);
   
         pr_debug("%s - physical_slot = %s\n", __func__, slot_name(slot));
   
@@@ -135,7 -135,7 +135,7 @@@
    */
   static int disable_slot(struct hotplug_slot *hotplug_slot)
   {
- -      struct slot *slot = hotplug_slot->private;
+ +      struct slot *slot = to_slot(hotplug_slot);
   
         pr_debug("%s - physical_slot = %s\n", __func__, slot_name(slot));
   
@@@ -179,7 -179,7 +179,7 @@@ static int set_attention_status(struct 
    */
   static int get_power_status(struct hotplug_slot *hotplug_slot, u8 *value)
   {
- -      struct slot *slot = hotplug_slot->private;
+ +      struct slot *slot = to_slot(hotplug_slot);
   
         pr_debug("%s - physical_slot = %s\n", __func__, slot_name(slot));
   
@@@ -225,7 -225,7 +225,7 @@@ static int get_attention_status(struct 
    */
   static int get_latch_status(struct hotplug_slot *hotplug_slot, u8 *value)
   {
- -      struct slot *slot = hotplug_slot->private;
+ +      struct slot *slot = to_slot(hotplug_slot);
   
         pr_debug("%s - physical_slot = %s\n", __func__, slot_name(slot));
   
@@@ -245,7 -245,7 +245,7 @@@
    */
   static int get_adapter_status(struct hotplug_slot *hotplug_slot, u8 *value)
   {
- -      struct slot *slot = hotplug_slot->private;
+ +      struct slot *slot = to_slot(hotplug_slot);
   
         pr_debug("%s - physical_slot = %s\n", __func__, slot_name(slot));
   
@@@ -266,26 -266,39 +266,26 @@@ int acpiphp_register_hotplug_slot(struc
         if (!slot)
                 goto error;
   
- -      slot->hotplug_slot = kzalloc(sizeof(*slot->hotplug_slot), GFP_KERNEL);
- -      if (!slot->hotplug_slot)
- -              goto error_slot;
- -
- -      slot->hotplug_slot->info = &slot->info;
- -
- -      slot->hotplug_slot->private = slot;
- -      slot->hotplug_slot->ops = &acpi_hotplug_slot_ops;
+ +      slot->hotplug_slot.ops = &acpi_hotplug_slot_ops;
   
         slot->acpi_slot = acpiphp_slot;
- -      slot->hotplug_slot->info->power_status = acpiphp_get_power_status(slot->acpi_slot);
- -      slot->hotplug_slot->info->attention_status = 0;
- -      slot->hotplug_slot->info->latch_status = acpiphp_get_latch_status(slot->acpi_slot);
- -      slot->hotplug_slot->info->adapter_status = acpiphp_get_adapter_status(slot->acpi_slot);
   
         acpiphp_slot->slot = slot;
         slot->sun = sun;
         snprintf(name, SLOT_NAME_SIZE, "%u", sun);
   
- -      retval = pci_hp_register(slot->hotplug_slot, acpiphp_slot->bus,
+ +      retval = pci_hp_register(&slot->hotplug_slot, acpiphp_slot->bus,
                                  acpiphp_slot->device, name);
         if (retval == -EBUSY)
- -              goto error_hpslot;
+ +              goto error_slot;
         if (retval) {
                 pr_err("pci_hp_register failed with error %d\n", retval);
- -              goto error_hpslot;
+ +              goto error_slot;
         }
   
         pr_info("Slot [%s] registered\n", slot_name(slot));
   
         return 0;
- -error_hpslot:
- -      kfree(slot->hotplug_slot);
   error_slot:
         kfree(slot);
   error:
@@@ -299,7 -312,8 +299,7 @@@ void acpiphp_unregister_hotplug_slot(st
   
         pr_info("Slot [%s] unregistered\n", slot_name(slot));
   
- -      pci_hp_deregister(slot->hotplug_slot);
- -      kfree(slot->hotplug_slot);
+ +      pci_hp_deregister(&slot->hotplug_slot);
         kfree(slot);
   }
   
diff --combined fs/btrfs/compression.c

index 8703ce68fe9d164ea782a1ed67a8d9ff194ab70e,a65d144da00cd1794bc9659f4accc6d12132e9e5..2955a4ea2fa8cb82a5969274805e9f4abbde5b3d
--- 1/fs/btrfs/compression.c
--- 2/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@@ -437,10 -437,8 +437,8 @@@ static noinline int add_ra_bio_pages(st
                 if (pg_index > end_index)
                         break;
   
-               rcu_read_lock();
-               page = radix_tree_lookup(&mapping->i_pages, pg_index);
-               rcu_read_unlock();
-               if (page && !radix_tree_exceptional_entry(page)) {
+               page = xa_load(&mapping->i_pages, pg_index);
+               if (page && !xa_is_value(page)) {
                         misses++;
                         if (misses > 4)
                                 break;
@@@ -528,6 -526,7 +526,6 @@@ blk_status_t btrfs_submit_compressed_re
                                  int mirror_num, unsigned long bio_flags)
   {
         struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
- -      struct extent_io_tree *tree;
         struct extent_map_tree *em_tree;
         struct compressed_bio *cb;
         unsigned long compressed_len;
@@@ -544,6 -543,7 +542,6 @@@
         int faili = 0;
         u32 *sums;
   
- -      tree = &BTRFS_I(inode)->io_tree;
         em_tree = &BTRFS_I(inode)->extent_tree;
   
         /* we need the actual starting offset of this extent in the file */
diff --combined fs/btrfs/extent_io.c

index 6877a74c74691436a6d0290dc18420900ab6fcfd,d4ad015e44852e2d449ddad8a9c3ed70aa10eb8e..d228f706ff3e61784e4c78e71a40d923dbefe1d2
--- 1/fs/btrfs/extent_io.c
--- 2/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@@ -1424,15 -1424,20 +1424,15 @@@ int find_first_extent_bit(struct extent
                           struct extent_state **cached_state)
   {
         struct extent_state *state;
- -      struct rb_node *n;
         int ret = 1;
   
         spin_lock(&tree->lock);
         if (cached_state && *cached_state) {
                 state = *cached_state;
                 if (state->end == start - 1 && extent_state_in_tree(state)) {
- -                      n = rb_next(&state->rb_node);
- -                      while (n) {
- -                              state = rb_entry(n, struct extent_state,
- -                                               rb_node);
+ +                      while ((state = next_state(state)) != NULL) {
                                 if (state->state & bits)
                                         goto got_it;
- -                              n = rb_next(n);
                         }
                         free_extent_state(*cached_state);
                         *cached_state = NULL;
@@@ -1563,7 -1568,7 +1563,7 @@@ static noinline int lock_delalloc_pages
    *
    * 1 is returned if we find something, 0 if nothing was in the tree
    */
- -STATIC u64 find_lock_delalloc_range(struct inode *inode,
+ +static noinline_for_stack u64 find_lock_delalloc_range(struct inode *inode,
                                     struct extent_io_tree *tree,
                                     struct page *locked_page, u64 *start,
                                     u64 *end, u64 max_bytes)
@@@ -1643,17 -1648,6 +1643,17 @@@ out_failed
         return found;
   }
   
+ +#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
+ +u64 btrfs_find_lock_delalloc_range(struct inode *inode,
+ +                                  struct extent_io_tree *tree,
+ +                                  struct page *locked_page, u64 *start,
+ +                                  u64 *end, u64 max_bytes)
+ +{
+ +      return find_lock_delalloc_range(inode, tree, locked_page, start, end,
+ +                      max_bytes);
+ +}
+ +#endif
+ +
   static int __process_pages_contig(struct address_space *mapping,
                                   struct page *locked_page,
                                   pgoff_t start_index, pgoff_t end_index,
@@@ -3784,7 -3778,7 +3784,7 @@@ int btree_write_cache_pages(struct addr
         pgoff_t index;
         pgoff_t end;            /* Inclusive */
         int scanned = 0;
-       int tag;
+       xa_mark_t tag;
   
         pagevec_init(&pvec);
         if (wbc->range_cyclic) {
@@@ -3909,7 -3903,7 +3909,7 @@@ static int extent_write_cache_pages(str
         pgoff_t done_index;
         int range_whole = 0;
         int scanned = 0;
-       int tag;
+       xa_mark_t tag;
   
         /*
          * We have to hold onto the inode so that ordered extents can do their
@@@ -5159,11 -5153,9 +5159,9 @@@ void clear_extent_buffer_dirty(struct e
   
                 clear_page_dirty_for_io(page);
                 xa_lock_irq(&page->mapping->i_pages);
-               if (!PageDirty(page)) {
-                       radix_tree_tag_clear(&page->mapping->i_pages,
-                                               page_index(page),
-                                               PAGECACHE_TAG_DIRTY);
-               }
+               if (!PageDirty(page))
+                       __xa_clear_mark(&page->mapping->i_pages,
+                                       page_index(page), PAGECACHE_TAG_DIRTY);
                 xa_unlock_irq(&page->mapping->i_pages);
                 ClearPageError(page);
                 unlock_page(page);
@@@ -5171,11 -5163,11 +5169,11 @@@
         WARN_ON(atomic_read(&eb->refs) == 0);
   }
   
- -int set_extent_buffer_dirty(struct extent_buffer *eb)
+ +bool set_extent_buffer_dirty(struct extent_buffer *eb)
   {
         int i;
         int num_pages;
- -      int was_dirty = 0;
+ +      bool was_dirty;
   
         check_buffer_tree_ref(eb);
   
@@@ -5185,15 -5177,8 +5183,15 @@@
         WARN_ON(atomic_read(&eb->refs) == 0);
         WARN_ON(!test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags));
   
+ +      if (!was_dirty)
+ +              for (i = 0; i < num_pages; i++)
+ +                      set_page_dirty(eb->pages[i]);
+ +
+ +#ifdef CONFIG_BTRFS_DEBUG
         for (i = 0; i < num_pages; i++)
- -              set_page_dirty(eb->pages[i]);
+ +              ASSERT(PageDirty(eb->pages[i]));
+ +#endif
+ +
         return was_dirty;
   }
   
diff --combined fs/buffer.c

index 109f551968662886c157ba15964aeb931470be61,1286c2b95498de47d2ba08b57a93901bdf4367bd..d60d61e8ed7de495bddd0bc799f16c2606a4c68b
--- 1/fs/buffer.c
--- 2/fs/buffer.c
+++ b/fs/buffer.c
@@@ -562,7 -562,7 +562,7 @@@ void mark_buffer_dirty_inode(struct buf
   EXPORT_SYMBOL(mark_buffer_dirty_inode);
   
   /*
-  * Mark the page dirty, and set it dirty in the radix tree, and mark the inode
+  * Mark the page dirty, and set it dirty in the page cache, and mark the inode
    * dirty.
    *
    * If warn is true, then emit a warning if the page is not uptodate and has
@@@ -579,8 -579,8 +579,8 @@@ void __set_page_dirty(struct page *page
         if (page->mapping) {    /* Race with truncate? */
                 WARN_ON_ONCE(warn && !PageUptodate(page));
                 account_page_dirtied(page, mapping);
-               radix_tree_tag_set(&mapping->i_pages,
-                               page_index(page), PAGECACHE_TAG_DIRTY);
+               __xa_set_mark(&mapping->i_pages, page_index(page),
+                               PAGECACHE_TAG_DIRTY);
         }
         xa_unlock_irqrestore(&mapping->i_pages, flags);
   }
@@@ -1050,7 -1050,7 +1050,7 @@@ __getblk_slow(struct block_device *bdev
    * The relationship between dirty buffers and dirty pages:
    *
    * Whenever a page has any dirty buffers, the page's dirty bit is set, and
-  * the page is tagged dirty in its radix tree.
+  * the page is tagged dirty in the page cache.
    *
    * At all times, the dirtiness of the buffers represents the dirtiness of
    * subsections of the page.  If the page has buffers, the page dirty bit is
@@@ -1073,9 -1073,9 +1073,9 @@@
    * mark_buffer_dirty - mark a buffer_head as needing writeout
    * @bh: the buffer_head to mark dirty
    *
-  * mark_buffer_dirty() will set the dirty bit against the buffer, then set its
-  * backing page dirty, then tag the page as dirty in its address_space's radix
-  * tree and then attach the address_space's inode to its superblock's dirty
+  * mark_buffer_dirty() will set the dirty bit against the buffer, then set
+  * its backing page dirty, then tag the page as dirty in the page cache
+  * and then attach the address_space's inode to its superblock's dirty
    * inode list.
    *
    * mark_buffer_dirty() is atomic.  It takes bh->b_page->mapping->private_lock,
@@@ -3060,6 -3060,11 +3060,6 @@@ static int submit_bh_wbc(int op, int op
          */
         bio = bio_alloc(GFP_NOIO, 1);
   
- -      if (wbc) {
- -              wbc_init_bio(wbc, bio);
- -              wbc_account_io(wbc, bh->b_page, bh->b_size);
- -      }
- -
         bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
         bio_set_dev(bio, bh->b_bdev);
         bio->bi_write_hint = write_hint;
@@@ -3079,11 -3084,6 +3079,11 @@@
                 op_flags |= REQ_PRIO;
         bio_set_op_attrs(bio, op, op_flags);
   
+ +      if (wbc) {
+ +              wbc_init_bio(wbc, bio);
+ +              wbc_account_io(wbc, bh->b_page, bh->b_size);
+ +      }
+ +
         submit_bio(bio);
         return 0;
   }
diff --combined fs/ext4/inode.c

index c3d9a42c561ef54165581a974b2f32e871842d53,57bad3edfbed3d561e3c6819934de5328cd2c10a..05f01fbd9c7fb868ecf5502cb6217e862461ef8e
--- 1/fs/ext4/inode.c
--- 2/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@@ -577,8 -577,8 +577,8 @@@ int ext4_map_blocks(handle_t *handle, s
                                 EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
                 if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) &&
                     !(status & EXTENT_STATUS_WRITTEN) &&
- -                  ext4_find_delalloc_range(inode, map->m_lblk,
- -                                           map->m_lblk + map->m_len - 1))
+ +                  ext4_es_scan_range(inode, &ext4_es_is_delayed, map->m_lblk,
+ +                                     map->m_lblk + map->m_len - 1))
                         status |= EXTENT_STATUS_DELAYED;
                 ret = ext4_es_insert_extent(inode, map->m_lblk,
                                             map->m_len, map->m_pblk, status);
@@@ -701,8 -701,8 +701,8 @@@ found
                                 EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
                 if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) &&
                     !(status & EXTENT_STATUS_WRITTEN) &&
- -                  ext4_find_delalloc_range(inode, map->m_lblk,
- -                                           map->m_lblk + map->m_len - 1))
+ +                  ext4_es_scan_range(inode, &ext4_es_is_delayed, map->m_lblk,
+ +                                     map->m_lblk + map->m_len - 1))
                         status |= EXTENT_STATUS_DELAYED;
                 ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
                                             map->m_pblk, status);
@@@ -1595,7 -1595,7 +1595,7 @@@ static int ext4_da_reserve_space(struc
         return 0;       /* success */
   }
   
- -static void ext4_da_release_space(struct inode *inode, int to_free)
+ +void ext4_da_release_space(struct inode *inode, int to_free)
   {
         struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
         struct ext4_inode_info *ei = EXT4_I(inode);
@@@ -1634,11 -1634,13 +1634,11 @@@ static void ext4_da_page_release_reserv
                                              unsigned int offset,
                                              unsigned int length)
   {
- -      int to_release = 0, contiguous_blks = 0;
+ +      int contiguous_blks = 0;
         struct buffer_head *head, *bh;
         unsigned int curr_off = 0;
         struct inode *inode = page->mapping->host;
- -      struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
         unsigned int stop = offset + length;
- -      int num_clusters;
         ext4_fsblk_t lblk;
   
         BUG_ON(stop > PAGE_SIZE || stop < length);
@@@ -1652,6 -1654,7 +1652,6 @@@
                         break;
   
                 if ((offset <= curr_off) && (buffer_delay(bh))) {
- -                      to_release++;
                         contiguous_blks++;
                         clear_buffer_delay(bh);
                 } else if (contiguous_blks) {
@@@ -1659,7 -1662,7 +1659,7 @@@
                                (PAGE_SHIFT - inode->i_blkbits);
                         lblk += (curr_off >> inode->i_blkbits) -
                                 contiguous_blks;
- -                      ext4_es_remove_extent(inode, lblk, contiguous_blks);
+ +                      ext4_es_remove_blks(inode, lblk, contiguous_blks);
                         contiguous_blks = 0;
                 }
                 curr_off = next_off;
@@@ -1668,9 -1671,21 +1668,9 @@@
         if (contiguous_blks) {
                 lblk = page->index << (PAGE_SHIFT - inode->i_blkbits);
                 lblk += (curr_off >> inode->i_blkbits) - contiguous_blks;
- -              ext4_es_remove_extent(inode, lblk, contiguous_blks);
+ +              ext4_es_remove_blks(inode, lblk, contiguous_blks);
         }
   
- -      /* If we have released all the blocks belonging to a cluster, then we
- -       * need to release the reserved space for that cluster. */
- -      num_clusters = EXT4_NUM_B2C(sbi, to_release);
- -      while (num_clusters > 0) {
- -              lblk = (page->index << (PAGE_SHIFT - inode->i_blkbits)) +
- -                      ((num_clusters - 1) << sbi->s_cluster_bits);
- -              if (sbi->s_cluster_ratio == 1 ||
- -                  !ext4_find_delalloc_cluster(inode, lblk))
- -                      ext4_da_release_space(inode, 1);
- -
- -              num_clusters--;
- -      }
   }
   
   /*
@@@ -1765,65 -1780,6 +1765,65 @@@ static int ext4_bh_delay_or_unwritten(h
         return (buffer_delay(bh) || buffer_unwritten(bh)) && buffer_dirty(bh);
   }
   
+ +/*
+ + * ext4_insert_delayed_block - adds a delayed block to the extents status
+ + *                             tree, incrementing the reserved cluster/block
+ + *                             count or making a pending reservation
+ + *                             where needed
+ + *
+ + * @inode - file containing the newly added block
+ + * @lblk - logical block to be added
+ + *
+ + * Returns 0 on success, negative error code on failure.
+ + */
+ +static int ext4_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk)
+ +{
+ +      struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+ +      int ret;
+ +      bool allocated = false;
+ +
+ +      /*
+ +       * If the cluster containing lblk is shared with a delayed,
+ +       * written, or unwritten extent in a bigalloc file system, it's
+ +       * already been accounted for and does not need to be reserved.
+ +       * A pending reservation must be made for the cluster if it's
+ +       * shared with a written or unwritten extent and doesn't already
+ +       * have one.  Written and unwritten extents can be purged from the
+ +       * extents status tree if the system is under memory pressure, so
+ +       * it's necessary to examine the extent tree if a search of the
+ +       * extents status tree doesn't get a match.
+ +       */
+ +      if (sbi->s_cluster_ratio == 1) {
+ +              ret = ext4_da_reserve_space(inode);
+ +              if (ret != 0)   /* ENOSPC */
+ +                      goto errout;
+ +      } else {   /* bigalloc */
+ +              if (!ext4_es_scan_clu(inode, &ext4_es_is_delonly, lblk)) {
+ +                      if (!ext4_es_scan_clu(inode,
+ +                                            &ext4_es_is_mapped, lblk)) {
+ +                              ret = ext4_clu_mapped(inode,
+ +                                                    EXT4_B2C(sbi, lblk));
+ +                              if (ret < 0)
+ +                                      goto errout;
+ +                              if (ret == 0) {
+ +                                      ret = ext4_da_reserve_space(inode);
+ +                                      if (ret != 0)   /* ENOSPC */
+ +                                              goto errout;
+ +                              } else {
+ +                                      allocated = true;
+ +                              }
+ +                      } else {
+ +                              allocated = true;
+ +                      }
+ +              }
+ +      }
+ +
+ +      ret = ext4_es_insert_delayed_block(inode, lblk, allocated);
+ +
+ +errout:
+ +      return ret;
+ +}
+ +
   /*
    * This function is grabs code from the very beginning of
    * ext4_map_blocks, but assumes that the caller is from delayed write
@@@ -1903,14 -1859,28 +1903,14 @@@ static int ext4_da_map_blocks(struct in
   add_delayed:
         if (retval == 0) {
                 int ret;
+ +
                 /*
                  * XXX: __block_prepare_write() unmaps passed block,
                  * is it OK?
                  */
- -              /*
- -               * If the block was allocated from previously allocated cluster,
- -               * then we don't need to reserve it again. However we still need
- -               * to reserve metadata for every block we're going to write.
- -               */
- -              if (EXT4_SB(inode->i_sb)->s_cluster_ratio == 1 ||
- -                  !ext4_find_delalloc_cluster(inode, map->m_lblk)) {
- -                      ret = ext4_da_reserve_space(inode);
- -                      if (ret) {
- -                              /* not enough space to reserve */
- -                              retval = ret;
- -                              goto out_unlock;
- -                      }
- -              }
   
- -              ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
- -                                          ~0, EXTENT_STATUS_DELAYED);
- -              if (ret) {
+ +              ret = ext4_insert_delayed_block(inode, map->m_lblk);
+ +              if (ret != 0) {
                         retval = ret;
                         goto out_unlock;
                 }
@@@ -2643,7 -2613,7 +2643,7 @@@ static int mpage_prepare_extent_to_map(
         long left = mpd->wbc->nr_to_write;
         pgoff_t index = mpd->first_page;
         pgoff_t end = mpd->last_page;
-       int tag;
+       xa_mark_t tag;
         int i, err = 0;
         int blkbits = mpd->inode->i_blkbits;
         ext4_lblk_t lblk;
@@@ -3480,8 -3450,7 +3480,8 @@@ static int ext4_iomap_begin(struct inod
                         ext4_lblk_t end = map.m_lblk + map.m_len - 1;
                         struct extent_status es;
   
- -                      ext4_es_find_delayed_extent_range(inode, map.m_lblk, end, &es);
+ +                      ext4_es_find_extent_range(inode, &ext4_es_is_delayed,
+ +                                                map.m_lblk, end, &es);
   
                         if (!es.es_len || es.es_lblk > end) {
                                 /* entire range is a hole */
@@@ -6184,14 -6153,13 +6184,14 @@@ static int ext4_bh_unmapped(handle_t *h
         return !buffer_mapped(bh);
   }
   
- -int ext4_page_mkwrite(struct vm_fault *vmf)
+ +vm_fault_t ext4_page_mkwrite(struct vm_fault *vmf)
   {
         struct vm_area_struct *vma = vmf->vma;
         struct page *page = vmf->page;
         loff_t size;
         unsigned long len;
- -      int ret;
+ +      int err;
+ +      vm_fault_t ret;
         struct file *file = vma->vm_file;
         struct inode *inode = file_inode(file);
         struct address_space *mapping = inode->i_mapping;
@@@ -6204,8 -6172,8 +6204,8 @@@
   
         down_read(&EXT4_I(inode)->i_mmap_sem);
   
- -      ret = ext4_convert_inline_data(inode);
- -      if (ret)
+ +      err = ext4_convert_inline_data(inode);
+ +      if (err)
                 goto out_ret;
   
         /* Delalloc case is easy... */
@@@ -6213,9 -6181,9 +6213,9 @@@
             !ext4_should_journal_data(inode) &&
             !ext4_nonda_switch(inode->i_sb)) {
                 do {
- -                      ret = block_page_mkwrite(vma, vmf,
+ +                      err = block_page_mkwrite(vma, vmf,
                                                    ext4_da_get_block_prep);
- -              } while (ret == -ENOSPC &&
+ +              } while (err == -ENOSPC &&
                        ext4_should_retry_alloc(inode->i_sb, &retries));
                 goto out_ret;
         }
@@@ -6260,8 -6228,8 +6260,8 @@@ retry_alloc
                 ret = VM_FAULT_SIGBUS;
                 goto out;
         }
- -      ret = block_page_mkwrite(vma, vmf, get_block);
- -      if (!ret && ext4_should_journal_data(inode)) {
+ +      err = block_page_mkwrite(vma, vmf, get_block);
+ +      if (!err && ext4_should_journal_data(inode)) {
                 if (ext4_walk_page_buffers(handle, page_buffers(page), 0,
                           PAGE_SIZE, NULL, do_journal_get_write_access)) {
                         unlock_page(page);
@@@ -6272,24 -6240,24 +6272,24 @@@
                 ext4_set_inode_state(inode, EXT4_STATE_JDATA);
         }
         ext4_journal_stop(handle);
- -      if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
+ +      if (err == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
                 goto retry_alloc;
   out_ret:
- -      ret = block_page_mkwrite_return(ret);
+ +      ret = block_page_mkwrite_return(err);
   out:
         up_read(&EXT4_I(inode)->i_mmap_sem);
         sb_end_pagefault(inode->i_sb);
         return ret;
   }
   
- -int ext4_filemap_fault(struct vm_fault *vmf)
+ +vm_fault_t ext4_filemap_fault(struct vm_fault *vmf)
   {
         struct inode *inode = file_inode(vmf->vma->vm_file);
- -      int err;
+ +      vm_fault_t ret;
   
         down_read(&EXT4_I(inode)->i_mmap_sem);
- -      err = filemap_fault(vmf);
+ +      ret = filemap_fault(vmf);
         up_read(&EXT4_I(inode)->i_mmap_sem);
   
- -      return err;
+ +      return ret;
   }
diff --combined fs/f2fs/data.c

index 106f116466bf1937fb104213beb1eb0b0e6f1247,6962491172a5d47ebc301974c7bc458df1637707..b293cb3e27a228bd4549b71aed643116fb1c80bc
--- 1/fs/f2fs/data.c
--- 2/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@@ -1,9 -1,12 +1,9 @@@
+ +// SPDX-License-Identifier: GPL-2.0
   /*
    * fs/f2fs/data.c
    *
    * Copyright (c) 2012 Samsung Electronics Co., Ltd.
    *             http://www.samsung.com/
- - *
- - * This program is free software; you can redistribute it and/or modify
- - * it under the terms of the GNU General Public License version 2 as
- - * published by the Free Software Foundation.
    */
   #include <linux/fs.h>
   #include <linux/f2fs_fs.h>
@@@ -46,29 -49,12 +46,29 @@@ static bool __is_cp_guaranteed(struct p
                         inode->i_ino ==  F2FS_NODE_INO(sbi) ||
                         S_ISDIR(inode->i_mode) ||
                         (S_ISREG(inode->i_mode) &&
- -                      is_inode_flag_set(inode, FI_ATOMIC_FILE)) ||
+ +                      (f2fs_is_atomic_file(inode) || IS_NOQUOTA(inode))) ||
                         is_cold_data(page))
                 return true;
         return false;
   }
   
+ +static enum count_type __read_io_type(struct page *page)
+ +{
+ +      struct address_space *mapping = page->mapping;
+ +
+ +      if (mapping) {
+ +              struct inode *inode = mapping->host;
+ +              struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ +
+ +              if (inode->i_ino == F2FS_META_INO(sbi))
+ +                      return F2FS_RD_META;
+ +
+ +              if (inode->i_ino == F2FS_NODE_INO(sbi))
+ +                      return F2FS_RD_NODE;
+ +      }
+ +      return F2FS_RD_DATA;
+ +}
+ +
   /* postprocessing steps for read bios */
   enum bio_post_read_step {
         STEP_INITIAL = 0,
@@@ -94,12 -80,10 +94,12 @@@ static void __read_end_io(struct bio *b
                 /* PG_error was set if any post_read step failed */
                 if (bio->bi_status || PageError(page)) {
                         ClearPageUptodate(page);
- -                      SetPageError(page);
+ +                      /* will re-read again later */
+ +                      ClearPageError(page);
                 } else {
                         SetPageUptodate(page);
                 }
+ +              dec_page_count(F2FS_P_SB(page), __read_io_type(page));
                 unlock_page(page);
         }
         if (bio->bi_private)
@@@ -142,9 -126,8 +142,9 @@@ static bool f2fs_bio_post_read_required
   
   static void f2fs_read_end_io(struct bio *bio)
   {
- -      if (time_to_inject(F2FS_P_SB(bio_first_page_all(bio)), FAULT_IO)) {
- -              f2fs_show_injection_info(FAULT_IO);
+ +      if (time_to_inject(F2FS_P_SB(bio_first_page_all(bio)),
+ +                                              FAULT_READ_IO)) {
+ +              f2fs_show_injection_info(FAULT_READ_IO);
                 bio->bi_status = BLK_STS_IOERR;
         }
   
@@@ -165,11 -148,6 +165,11 @@@ static void f2fs_write_end_io(struct bi
         struct bio_vec *bvec;
         int i;
   
+ +      if (time_to_inject(sbi, FAULT_WRITE_IO)) {
+ +              f2fs_show_injection_info(FAULT_WRITE_IO);
+ +              bio->bi_status = BLK_STS_IOERR;
+ +      }
+ +
         bio_for_each_segment_all(bvec, bio, i) {
                 struct page *page = bvec->bv_page;
                 enum count_type type = WB_DATA_TYPE(page);
@@@ -341,8 -319,8 +341,8 @@@ static void __submit_merged_bio(struct 
         io->bio = NULL;
   }
   
- -static bool __has_merged_page(struct f2fs_bio_info *io,
- -                              struct inode *inode, nid_t ino, pgoff_t idx)
+ +static bool __has_merged_page(struct f2fs_bio_info *io, struct inode *inode,
+ +                                              struct page *page, nid_t ino)
   {
         struct bio_vec *bvec;
         struct page *target;
@@@ -351,7 -329,7 +351,7 @@@
         if (!io->bio)
                 return false;
   
- -      if (!inode && !ino)
+ +      if (!inode && !page && !ino)
                 return true;
   
         bio_for_each_segment_all(bvec, io->bio, i) {
@@@ -361,10 -339,11 +361,10 @@@
                 else
                         target = fscrypt_control_page(bvec->bv_page);
   
- -              if (idx != target->index)
- -                      continue;
- -
                 if (inode && inode == target->mapping->host)
                         return true;
+ +              if (page && page == target)
+ +                      return true;
                 if (ino && ino == ino_of_node(target))
                         return true;
         }
@@@ -373,8 -352,7 +373,8 @@@
   }
   
   static bool has_merged_page(struct f2fs_sb_info *sbi, struct inode *inode,
- -                              nid_t ino, pgoff_t idx, enum page_type type)
+ +                                              struct page *page, nid_t ino,
+ +                                              enum page_type type)
   {
         enum page_type btype = PAGE_TYPE_OF_BIO(type);
         enum temp_type temp;
@@@ -385,7 -363,7 +385,7 @@@
                 io = sbi->write_io[btype] + temp;
   
                 down_read(&io->io_rwsem);
- -              ret = __has_merged_page(io, inode, ino, idx);
+ +              ret = __has_merged_page(io, inode, page, ino);
                 up_read(&io->io_rwsem);
   
                 /* TODO: use HOT temp only for meta pages now. */
@@@ -416,12 -394,12 +416,12 @@@ static void __f2fs_submit_merged_write(
   }
   
   static void __submit_merged_write_cond(struct f2fs_sb_info *sbi,
- -                              struct inode *inode, nid_t ino, pgoff_t idx,
- -                              enum page_type type, bool force)
+ +                              struct inode *inode, struct page *page,
+ +                              nid_t ino, enum page_type type, bool force)
   {
         enum temp_type temp;
   
- -      if (!force && !has_merged_page(sbi, inode, ino, idx, type))
+ +      if (!force && !has_merged_page(sbi, inode, page, ino, type))
                 return;
   
         for (temp = HOT; temp < NR_TEMP_TYPE; temp++) {
@@@ -440,10 -418,10 +440,10 @@@ void f2fs_submit_merged_write(struct f2
   }
   
   void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi,
- -                              struct inode *inode, nid_t ino, pgoff_t idx,
- -                              enum page_type type)
+ +                              struct inode *inode, struct page *page,
+ +                              nid_t ino, enum page_type type)
   {
- -      __submit_merged_write_cond(sbi, inode, ino, idx, type, false);
+ +      __submit_merged_write_cond(sbi, inode, page, ino, type, false);
   }
   
   void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi)
@@@ -478,16 -456,12 +478,16 @@@ int f2fs_submit_page_bio(struct f2fs_io
                 bio_put(bio);
                 return -EFAULT;
         }
+ +
+ +      if (fio->io_wbc && !is_read_io(fio->op))
+ +              wbc_account_io(fio->io_wbc, page, PAGE_SIZE);
+ +
         bio_set_op_attrs(bio, fio->op, fio->op_flags);
   
- -      __submit_bio(fio->sbi, bio, fio->type);
+ +      inc_page_count(fio->sbi, is_read_io(fio->op) ?
+ +                      __read_io_type(page): WB_DATA_TYPE(fio->page));
   
- -      if (!is_read_io(fio->op))
- -              inc_page_count(fio->sbi, WB_DATA_TYPE(fio->page));
+ +      __submit_bio(fio->sbi, bio, fio->type);
         return 0;
   }
   
@@@ -559,9 -533,6 +559,9 @@@ skip
         if (fio->in_list)
                 goto next;
   out:
+ +      if (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) ||
+ +                              f2fs_is_checkpoint_ready(sbi))
+ +              __submit_merged_bio(io);
         up_write(&io->io_rwsem);
   }
   
@@@ -594,6 -565,9 +594,6 @@@ static struct bio *f2fs_grab_read_bio(s
                 ctx->bio = bio;
                 ctx->enabled_steps = post_read_steps;
                 bio->bi_private = ctx;
- -
- -              /* wait the page to be moved by cleaning */
- -              f2fs_wait_on_block_writeback(sbi, blkaddr);
         }
   
         return bio;
@@@ -608,15 -582,10 +608,15 @@@ static int f2fs_submit_page_read(struc
         if (IS_ERR(bio))
                 return PTR_ERR(bio);
   
+ +      /* wait for GCed page writeback via META_MAPPING */
+ +      f2fs_wait_on_block_writeback(inode, blkaddr);
+ +
         if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
                 bio_put(bio);
                 return -EFAULT;
         }
+ +      ClearPageError(page);
+ +      inc_page_count(F2FS_I_SB(inode), F2FS_RD_DATA);
         __submit_bio(F2FS_I_SB(inode), bio, DATA);
         return 0;
   }
@@@ -907,6 -876,7 +907,6 @@@ static int __allocate_data_block(struc
         struct f2fs_summary sum;
         struct node_info ni;
         block_t old_blkaddr;
- -      pgoff_t fofs;
         blkcnt_t count = 1;
         int err;
   
@@@ -919,7 -889,7 +919,7 @@@
   
         dn->data_blkaddr = datablock_addr(dn->inode,
                                 dn->node_page, dn->ofs_in_node);
- -      if (dn->data_blkaddr == NEW_ADDR)
+ +      if (dn->data_blkaddr != NULL_ADDR)
                 goto alloc;
   
         if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count))))
@@@ -935,10 -905,12 +935,10 @@@ alloc
                                         old_blkaddr, old_blkaddr);
         f2fs_set_data_blkaddr(dn);
   
- -      /* update i_size */
- -      fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) +
- -                                                      dn->ofs_in_node;
- -      if (i_size_read(dn->inode) < ((loff_t)(fofs + 1) << PAGE_SHIFT))
- -              f2fs_i_size_write(dn->inode,
- -                              ((loff_t)(fofs + 1) << PAGE_SHIFT));
+ +      /*
+ +       * i_size will be updated by direct_IO. Otherwise, we'll get stale
+ +       * data from unwritten block via dio_read.
+ +       */
         return 0;
   }
   
@@@ -973,7 -945,7 +973,7 @@@ int f2fs_preallocate_blocks(struct kioc
   
         if (direct_io) {
                 map.m_seg_type = f2fs_rw_hint_to_seg_type(iocb->ki_hint);
- -              flag = f2fs_force_buffered_io(inode, WRITE) ?
+ +              flag = f2fs_force_buffered_io(inode, iocb, from) ?
                                         F2FS_GET_BLOCK_PRE_AIO :
                                         F2FS_GET_BLOCK_PRE_DIO;
                 goto map_blocks;
@@@ -998,7 -970,7 +998,7 @@@ map_blocks
         return err;
   }
   
- -static inline void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock)
+ +void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock)
   {
         if (flag == F2FS_GET_BLOCK_PRE_AIO) {
                 if (lock)
@@@ -1053,11 -1025,6 +1053,11 @@@ int f2fs_map_blocks(struct inode *inode
                 map->m_flags = F2FS_MAP_MAPPED;
                 if (map->m_next_extent)
                         *map->m_next_extent = pgofs + map->m_len;
+ +
+ +              /* for hardware encryption, but to avoid potential issue in future */
+ +              if (flag == F2FS_GET_BLOCK_DIO)
+ +                      f2fs_wait_on_block_writeback_range(inode,
+ +                                              map->m_pblk, map->m_len);
                 goto out;
         }
   
@@@ -1097,15 -1064,7 +1097,15 @@@ next_block
                 goto sync_out;
         }
   
- -      if (!is_valid_data_blkaddr(sbi, blkaddr)) {
+ +      if (is_valid_data_blkaddr(sbi, blkaddr)) {
+ +              /* use out-place-update for driect IO under LFS mode */
+ +              if (test_opt(sbi, LFS) && create &&
+ +                              flag == F2FS_GET_BLOCK_DIO) {
+ +                      err = __allocate_data_block(&dn, map->m_seg_type);
+ +                      if (!err)
+ +                              set_inode_flag(inode, FI_APPEND_WRITE);
+ +              }
+ +      } else {
                 if (create) {
                         if (unlikely(f2fs_cp_error(sbi))) {
                                 err = -EIO;
@@@ -1117,8 -1076,6 +1117,8 @@@
                                         last_ofs_in_node = dn.ofs_in_node;
                                 }
                         } else {
+ +                              WARN_ON(flag != F2FS_GET_BLOCK_PRE_DIO &&
+ +                                      flag != F2FS_GET_BLOCK_DIO);
                                 err = __allocate_data_block(&dn,
                                                         map->m_seg_type);
                                 if (!err)
@@@ -1216,12 -1173,6 +1216,12 @@@ skip
         goto next_dnode;
   
   sync_out:
+ +
+ +      /* for hardware encryption, but to avoid potential issue in future */
+ +      if (flag == F2FS_GET_BLOCK_DIO && map->m_flags & F2FS_MAP_MAPPED)
+ +              f2fs_wait_on_block_writeback_range(inode,
+ +                                              map->m_pblk, map->m_len);
+ +
         if (flag == F2FS_GET_BLOCK_PRECACHE) {
                 if (map->m_flags & F2FS_MAP_MAPPED) {
                         unsigned int ofs = start_pgofs - map->m_lblk;
@@@ -1304,7 -1255,7 +1304,7 @@@ static int get_data_block_dio(struct in
                         struct buffer_head *bh_result, int create)
   {
         return __get_data_block(inode, iblock, bh_result, create,
- -                                              F2FS_GET_BLOCK_DEFAULT, NULL,
+ +                                              F2FS_GET_BLOCK_DIO, NULL,
                                                 f2fs_rw_hint_to_seg_type(
                                                         inode->i_write_hint));
   }
@@@ -1607,17 -1558,9 +1607,17 @@@ submit_and_realloc
                         }
                 }
   
+ +              /*
+ +               * If the page is under writeback, we need to wait for
+ +               * its completion to see the correct decrypted data.
+ +               */
+ +              f2fs_wait_on_block_writeback(inode, block_nr);
+ +
                 if (bio_add_page(bio, page, blocksize, 0) < blocksize)
                         goto submit_and_realloc;
   
+ +              inc_page_count(F2FS_I_SB(inode), F2FS_RD_DATA);
+ +              ClearPageError(page);
                 last_block_in_bio = block_nr;
                 goto next_page;
   set_error_page:
@@@ -1682,7 -1625,7 +1682,7 @@@ static int encrypt_one_page(struct f2fs
                 return 0;
   
         /* wait for GCed page writeback via META_MAPPING */
- -      f2fs_wait_on_block_writeback(fio->sbi, fio->old_blkaddr);
+ +      f2fs_wait_on_block_writeback(inode, fio->old_blkaddr);
   
   retry_encrypt:
         fio->encrypted_page = fscrypt_encrypt_page(inode, fio->page,
@@@ -1739,10 -1682,6 +1739,10 @@@ static inline bool check_inplace_update
                         is_inode_flag_set(inode, FI_NEED_IPU))
                 return true;
   
+ +      if (unlikely(fio && is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
+ +                      !f2fs_is_checkpointed_data(sbi, fio->old_blkaddr)))
+ +              return true;
+ +
         return false;
   }
   
@@@ -1766,8 -1705,6 +1766,8 @@@ bool f2fs_should_update_outplace(struc
                 return true;
         if (S_ISDIR(inode->i_mode))
                 return true;
+ +      if (IS_NOQUOTA(inode))
+ +              return true;
         if (f2fs_is_atomic_file(inode))
                 return true;
         if (fio) {
@@@ -1775,9 -1712,6 +1775,9 @@@
                         return true;
                 if (IS_ATOMIC_WRITTEN_PAGE(fio->page))
                         return true;
+ +              if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
+ +                      f2fs_is_checkpointed_data(sbi, fio->old_blkaddr)))
+ +                      return true;
         }
         return false;
   }
@@@ -1829,7 -1763,6 +1829,7 @@@ int f2fs_do_write_data_page(struct f2fs
         /* This page is already truncated */
         if (fio->old_blkaddr == NULL_ADDR) {
                 ClearPageUptodate(page);
+ +              clear_cold_data(page);
                 goto out_writepage;
         }
   got_it:
@@@ -2005,20 -1938,18 +2005,20 @@@ done
   
   out:
         inode_dec_dirty_pages(inode);
- -      if (err)
+ +      if (err) {
                 ClearPageUptodate(page);
+ +              clear_cold_data(page);
+ +      }
   
         if (wbc->for_reclaim) {
- -              f2fs_submit_merged_write_cond(sbi, inode, 0, page->index, DATA);
+ +              f2fs_submit_merged_write_cond(sbi, NULL, page, 0, DATA);
                 clear_inode_flag(inode, FI_HOT_DATA);
                 f2fs_remove_dirty_inode(inode);
                 submitted = NULL;
         }
   
         unlock_page(page);
- -      if (!S_ISDIR(inode->i_mode))
+ +      if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode))
                 f2fs_balance_fs(sbi, need_balance_fs);
   
         if (unlikely(f2fs_cp_error(sbi))) {
@@@ -2069,10 -2000,10 +2069,10 @@@ static int f2fs_write_cache_pages(struc
         pgoff_t index;
         pgoff_t end;            /* Inclusive */
         pgoff_t done_index;
- -      pgoff_t last_idx = ULONG_MAX;
         int cycled;
         int range_whole = 0;
-       int tag;
+       xa_mark_t tag;
+ +      int nwritten = 0;
   
         pagevec_init(&pvec);
   
@@@ -2175,7 -2106,7 +2175,7 @@@ continue_unlock
                                 done = 1;
                                 break;
                         } else if (submitted) {
- -                              last_idx = page->index;
+ +                              nwritten++;
                         }
   
                         if (--wbc->nr_to_write <= 0 &&
@@@ -2197,9 -2128,9 +2197,9 @@@
         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
                 mapping->writeback_index = done_index;
   
- -      if (last_idx != ULONG_MAX)
+ +      if (nwritten)
                 f2fs_submit_merged_write_cond(F2FS_M_SB(mapping), mapping->host,
- -                                              0, last_idx, DATA);
+ +                                                              NULL, 0, DATA);
   
         return ret;
   }
@@@ -2209,8 -2140,6 +2209,8 @@@ static inline bool __should_serialize_i
   {
         if (!S_ISREG(inode->i_mode))
                 return false;
+ +      if (IS_NOQUOTA(inode))
+ +              return false;
         if (wbc->sync_mode != WB_SYNC_ALL)
                 return true;
         if (get_dirty_pages(inode) >= SM_I(F2FS_I_SB(inode))->min_seq_blocks)
@@@ -2240,8 -2169,7 +2240,8 @@@ static int __f2fs_write_data_pages(stru
         if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
                 goto skip_write;
   
- -      if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE &&
+ +      if ((S_ISDIR(inode->i_mode) || IS_NOQUOTA(inode)) &&
+ +                      wbc->sync_mode == WB_SYNC_NONE &&
                         get_dirty_pages(inode) < nr_pages_to_skip(sbi, DATA) &&
                         f2fs_available_free_memory(sbi, DIRTY_DENTS))
                 goto skip_write;
@@@ -2306,7 -2234,7 +2306,7 @@@ static void f2fs_write_failed(struct ad
                 down_write(&F2FS_I(inode)->i_mmap_sem);
   
                 truncate_pagecache(inode, i_size);
- -              f2fs_truncate_blocks(inode, i_size, true);
+ +              f2fs_truncate_blocks(inode, i_size, true, true);
   
                 up_write(&F2FS_I(inode)->i_mmap_sem);
                 up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
@@@ -2404,10 -2332,6 +2404,10 @@@ static int f2fs_write_begin(struct fil
   
         trace_f2fs_write_begin(inode, pos, len, flags);
   
+ +      err = f2fs_is_checkpoint_ready(sbi);
+ +      if (err)
+ +              goto fail;
+ +
         if ((f2fs_is_atomic_file(inode) &&
                         !f2fs_available_free_memory(sbi, INMEM_PAGES)) ||
                         is_inode_flag_set(inode, FI_ATOMIC_REVOKE_REQUEST)) {
@@@ -2445,8 -2369,7 +2445,8 @@@ repeat
         if (err)
                 goto fail;
   
- -      if (need_balance && has_not_enough_free_secs(sbi, 0, 0)) {
+ +      if (need_balance && !IS_NOQUOTA(inode) &&
+ +                      has_not_enough_free_secs(sbi, 0, 0)) {
                 unlock_page(page);
                 f2fs_balance_fs(sbi, true);
                 lock_page(page);
@@@ -2459,6 -2382,10 +2459,6 @@@
   
         f2fs_wait_on_page_writeback(page, DATA, false);
   
- -      /* wait for GCed page writeback via META_MAPPING */
- -      if (f2fs_post_read_required(inode))
- -              f2fs_wait_on_block_writeback(sbi, blkaddr);
- -
         if (len == PAGE_SIZE || PageUptodate(page))
                 return 0;
   
@@@ -2553,53 -2480,36 +2553,53 @@@ static ssize_t f2fs_direct_IO(struct ki
         struct address_space *mapping = iocb->ki_filp->f_mapping;
         struct inode *inode = mapping->host;
         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ +      struct f2fs_inode_info *fi = F2FS_I(inode);
         size_t count = iov_iter_count(iter);
         loff_t offset = iocb->ki_pos;
         int rw = iov_iter_rw(iter);
         int err;
         enum rw_hint hint = iocb->ki_hint;
         int whint_mode = F2FS_OPTION(sbi).whint_mode;
+ +      bool do_opu;
   
         err = check_direct_IO(inode, iter, offset);
         if (err)
                 return err < 0 ? err : 0;
   
- -      if (f2fs_force_buffered_io(inode, rw))
+ +      if (f2fs_force_buffered_io(inode, iocb, iter))
                 return 0;
   
+ +      do_opu = allow_outplace_dio(inode, iocb, iter);
+ +
         trace_f2fs_direct_IO_enter(inode, offset, count, rw);
   
         if (rw == WRITE && whint_mode == WHINT_MODE_OFF)
                 iocb->ki_hint = WRITE_LIFE_NOT_SET;
   
- -      if (!down_read_trylock(&F2FS_I(inode)->i_gc_rwsem[rw])) {
- -              if (iocb->ki_flags & IOCB_NOWAIT) {
+ +      if (iocb->ki_flags & IOCB_NOWAIT) {
+ +              if (!down_read_trylock(&fi->i_gc_rwsem[rw])) {
                         iocb->ki_hint = hint;
                         err = -EAGAIN;
                         goto out;
                 }
- -              down_read(&F2FS_I(inode)->i_gc_rwsem[rw]);
+ +              if (do_opu && !down_read_trylock(&fi->i_gc_rwsem[READ])) {
+ +                      up_read(&fi->i_gc_rwsem[rw]);
+ +                      iocb->ki_hint = hint;
+ +                      err = -EAGAIN;
+ +                      goto out;
+ +              }
+ +      } else {
+ +              down_read(&fi->i_gc_rwsem[rw]);
+ +              if (do_opu)
+ +                      down_read(&fi->i_gc_rwsem[READ]);
         }
   
         err = blockdev_direct_IO(iocb, inode, iter, get_data_block_dio);
- -      up_read(&F2FS_I(inode)->i_gc_rwsem[rw]);
+ +
+ +      if (do_opu)
+ +              up_read(&fi->i_gc_rwsem[READ]);
+ +
+ +      up_read(&fi->i_gc_rwsem[rw]);
   
         if (rw == WRITE) {
                 if (whint_mode == WHINT_MODE_OFF)
@@@ -2607,8 -2517,7 +2607,8 @@@
                 if (err > 0) {
                         f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_IO,
                                                                         err);
- -                      set_inode_flag(inode, FI_UPDATE_WRITE);
+ +                      if (!do_opu)
+ +                              set_inode_flag(inode, FI_UPDATE_WRITE);
                 } else if (err < 0) {
                         f2fs_write_failed(mapping, offset + count);
                 }
@@@ -2641,8 -2550,6 +2641,8 @@@ void f2fs_invalidate_page(struct page *
                 }
         }
   
+ +      clear_cold_data(page);
+ +
         /* This is atomic written page, keep Private */
         if (IS_ATOMIC_WRITTEN_PAGE(page))
                 return f2fs_drop_inmem_page(inode, page);
@@@ -2661,7 -2568,6 +2661,7 @@@ int f2fs_release_page(struct page *page
         if (IS_ATOMIC_WRITTEN_PAGE(page))
                 return 0;
   
+ +      clear_cold_data(page);
         set_page_private(page, 0);
         ClearPagePrivate(page);
         return 1;
@@@ -2677,6 -2583,10 +2677,6 @@@ static int f2fs_set_data_page_dirty(str
         if (!PageUptodate(page))
                 SetPageUptodate(page);
   
- -      /* don't remain PG_checked flag which was set during GC */
- -      if (is_cold_data(page))
- -              clear_cold_data(page);
- -
         if (f2fs_is_atomic_file(inode) && !f2fs_is_commit_atomic_write(inode)) {
                 if (!IS_ATOMIC_WRITTEN_PAGE(page)) {
                         f2fs_register_inmem_page(inode, page);
@@@ -2787,13 -2697,13 +2787,13 @@@ const struct address_space_operations f
   #endif
   };
   
- void f2fs_clear_radix_tree_dirty_tag(struct page *page)
+ void f2fs_clear_page_cache_dirty_tag(struct page *page)
   {
         struct address_space *mapping = page_mapping(page);
         unsigned long flags;
   
         xa_lock_irqsave(&mapping->i_pages, flags);
-       radix_tree_tag_clear(&mapping->i_pages, page_index(page),
+       __xa_clear_mark(&mapping->i_pages, page_index(page),
                                                 PAGECACHE_TAG_DIRTY);
         xa_unlock_irqrestore(&mapping->i_pages, flags);
   }
diff --combined fs/f2fs/dir.c

index 2ef84b4590ead0367e6d518a865e75838a41f373,01006085904aad689ae3e24dbfb264346b1b560b..bacc667950b6473e8c342d60a87803aeb0ac0d15
--- 1/fs/f2fs/dir.c
--- 2/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@@ -1,9 -1,12 +1,9 @@@
+ +// SPDX-License-Identifier: GPL-2.0
   /*
    * fs/f2fs/dir.c
    *
    * Copyright (c) 2012 Samsung Electronics Co., Ltd.
    *             http://www.samsung.com/
- - *
- - * This program is free software; you can redistribute it and/or modify
- - * it under the terms of the GNU General Public License version 2 as
- - * published by the Free Software Foundation.
    */
   #include <linux/fs.h>
   #include <linux/f2fs_fs.h>
@@@ -655,9 -658,9 +655,9 @@@ int f2fs_do_tmpfile(struct inode *inode
         f2fs_put_page(page, 1);
   
         clear_inode_flag(inode, FI_NEW_INODE);
+ +      f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
   fail:
         up_write(&F2FS_I(inode)->i_sem);
- -      f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
         return err;
   }
   
@@@ -726,11 -729,10 +726,11 @@@ void f2fs_delete_entry(struct f2fs_dir_
   
         if (bit_pos == NR_DENTRY_IN_BLOCK &&
                 !f2fs_truncate_hole(dir, page->index, page->index + 1)) {
-               f2fs_clear_radix_tree_dirty_tag(page);
+               f2fs_clear_page_cache_dirty_tag(page);
                 clear_page_dirty_for_io(page);
                 ClearPagePrivate(page);
                 ClearPageUptodate(page);
+ +              clear_cold_data(page);
                 inode_dec_dirty_pages(dir);
                 f2fs_remove_dirty_inode(dir);
         }
@@@ -782,15 -784,9 +782,15 @@@ int f2fs_fill_dentries(struct dir_conte
         struct f2fs_dir_entry *de = NULL;
         struct fscrypt_str de_name = FSTR_INIT(NULL, 0);
         struct f2fs_sb_info *sbi = F2FS_I_SB(d->inode);
+ +      struct blk_plug plug;
+ +      bool readdir_ra = sbi->readdir_ra == 1;
+ +      int err = 0;
   
         bit_pos = ((unsigned long)ctx->pos % d->max);
   
+ +      if (readdir_ra)
+ +              blk_start_plug(&plug);
+ +
         while (bit_pos < d->max) {
                 bit_pos = find_next_bit_le(d->bitmap, d->max, bit_pos);
                 if (bit_pos >= d->max)
@@@ -810,33 -806,29 +810,33 @@@
   
                 if (f2fs_encrypted_inode(d->inode)) {
                         int save_len = fstr->len;
- -                      int err;
   
                         err = fscrypt_fname_disk_to_usr(d->inode,
                                                 (u32)de->hash_code, 0,
                                                 &de_name, fstr);
                         if (err)
- -                              return err;
+ +                              goto out;
   
                         de_name = *fstr;
                         fstr->len = save_len;
                 }
   
                 if (!dir_emit(ctx, de_name.name, de_name.len,
- -                                      le32_to_cpu(de->ino), d_type))
- -                      return 1;
+ +                                      le32_to_cpu(de->ino), d_type)) {
+ +                      err = 1;
+ +                      goto out;
+ +              }
   
- -              if (sbi->readdir_ra == 1)
+ +              if (readdir_ra)
                         f2fs_ra_node_page(sbi, le32_to_cpu(de->ino));
   
                 bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
                 ctx->pos = start_pos + bit_pos;
         }
- -      return 0;
+ +out:
+ +      if (readdir_ra)
+ +              blk_finish_plug(&plug);
+ +      return err;
   }
   
   static int f2fs_readdir(struct file *file, struct dir_context *ctx)
diff --combined fs/f2fs/f2fs.h

index 56204a8f8a12f14a5c6bd8f9f9f53e3c58260787,3ccb8ed84faef1fc8a49808c186a8c9deaeaf171..1e031971a466cf686b5cadbd226a4ee9c3a59012
--- 1/fs/f2fs/f2fs.h
--- 2/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@@ -1,14 -1,16 +1,14 @@@
+ +// SPDX-License-Identifier: GPL-2.0
   /*
    * fs/f2fs/f2fs.h
    *
    * Copyright (c) 2012 Samsung Electronics Co., Ltd.
    *             http://www.samsung.com/
- - *
- - * This program is free software; you can redistribute it and/or modify
- - * it under the terms of the GNU General Public License version 2 as
- - * published by the Free Software Foundation.
    */
   #ifndef _LINUX_F2FS_H
   #define _LINUX_F2FS_H
   
+ +#include <linux/uio.h>
   #include <linux/types.h>
   #include <linux/page-flags.h>
   #include <linux/buffer_head.h>
@@@ -51,10 -53,9 +51,10 @@@ enum 
         FAULT_DIR_DEPTH,
         FAULT_EVICT_INODE,
         FAULT_TRUNCATE,
- -      FAULT_IO,
+ +      FAULT_READ_IO,
         FAULT_CHECKPOINT,
         FAULT_DISCARD,
+ +      FAULT_WRITE_IO,
         FAULT_MAX,
   };
   
@@@ -99,7 -100,6 +99,7 @@@ extern char *f2fs_fault_name[FAULT_MAX]
   #define F2FS_MOUNT_QUOTA              0x00400000
   #define F2FS_MOUNT_INLINE_XATTR_SIZE  0x00800000
   #define F2FS_MOUNT_RESERVE_ROOT               0x01000000
+ +#define F2FS_MOUNT_DISABLE_CHECKPOINT 0x02000000
   
   #define F2FS_OPTION(sbi)      ((sbi)->mount_opt)
   #define clear_opt(sbi, option)        (F2FS_OPTION(sbi).opt &= ~F2FS_MOUNT_##option)
@@@ -150,7 -150,6 +150,7 @@@ struct f2fs_mount_info 
   #define F2FS_FEATURE_INODE_CRTIME     0x0100
   #define F2FS_FEATURE_LOST_FOUND               0x0200
   #define F2FS_FEATURE_VERITY           0x0400  /* reserved */
+ +#define F2FS_FEATURE_SB_CHKSUM                0x0800
   
   #define F2FS_HAS_FEATURE(sb, mask)                                    \
         ((F2FS_SB(sb)->raw_super->feature & cpu_to_le32(mask)) != 0)
@@@ -179,7 -178,6 +179,7 @@@ enum 
   #define       CP_RECOVERY     0x00000008
   #define       CP_DISCARD      0x00000010
   #define CP_TRIMMED    0x00000020
+ +#define CP_PAUSE      0x00000040
   
   #define MAX_DISCARD_BLOCKS(sbi)               BLKS_PER_SEC(sbi)
   #define DEF_MAX_DISCARD_REQUEST               8       /* issue 8 discards per round */
@@@ -189,7 -187,6 +189,7 @@@
   #define DEF_DISCARD_URGENT_UTIL               80      /* do more discard over 80% */
   #define DEF_CP_INTERVAL                       60      /* 60 secs */
   #define DEF_IDLE_INTERVAL             5       /* 5 secs */
+ +#define DEF_DISABLE_INTERVAL          5       /* 5 secs */
   
   struct cp_control {
         int reason;
@@@ -206,7 -203,6 +206,7 @@@ enum 
         META_NAT,
         META_SIT,
         META_SSA,
+ +      META_MAX,
         META_POR,
         DATA_GENERIC,
         META_GENERIC,
@@@ -328,7 -324,7 +328,7 @@@ struct discard_cmd_control 
         atomic_t issued_discard;                /* # of issued discard */
         atomic_t issing_discard;                /* # of issing discard */
         atomic_t discard_cmd_cnt;               /* # of cached cmd count */
- -      struct rb_root root;                    /* root of discard rb-tree */
+ +      struct rb_root_cached root;             /* root of discard rb-tree */
         bool rbtree_check;                      /* config for consistence check */
   };
   
@@@ -531,9 -527,6 +531,9 @@@ enum 
   
   #define DEFAULT_RETRY_IO_COUNT        8       /* maximum retry read IO count */
   
+ +/* maximum retry quota flush count */
+ +#define DEFAULT_RETRY_QUOTA_FLUSH_COUNT               8
+ +
   #define F2FS_LINK_MAX 0xffffffff      /* maximum link count per file */
   
   #define MAX_DIR_RA_PAGES      4       /* maximum ra pages of dir */
@@@ -573,13 -566,12 +573,13 @@@ struct extent_node 
   
   struct extent_tree {
         nid_t ino;                      /* inode number */
- -      struct rb_root root;            /* root of extent info rb-tree */
+ +      struct rb_root_cached root;     /* root of extent info rb-tree */
         struct extent_node *cached_en;  /* recently accessed extent node */
         struct extent_info largest;     /* largested extent info */
         struct list_head list;          /* to be used by sbi->zombie_list */
         rwlock_t lock;                  /* protect extent info rb-tree */
         atomic_t node_cnt;              /* # of extent node in rb-tree*/
+ +      bool largest_updated;           /* largest extent updated */
   };
   
   /*
@@@ -608,7 -600,6 +608,7 @@@ enum 
         F2FS_GET_BLOCK_DEFAULT,
         F2FS_GET_BLOCK_FIEMAP,
         F2FS_GET_BLOCK_BMAP,
+ +      F2FS_GET_BLOCK_DIO,
         F2FS_GET_BLOCK_PRE_DIO,
         F2FS_GET_BLOCK_PRE_AIO,
         F2FS_GET_BLOCK_PRECACHE,
@@@ -763,12 -754,12 +763,12 @@@ static inline bool __is_front_mergeable
   }
   
   extern void f2fs_mark_inode_dirty_sync(struct inode *inode, bool sync);
- -static inline void __try_update_largest_extent(struct inode *inode,
- -                      struct extent_tree *et, struct extent_node *en)
+ +static inline void __try_update_largest_extent(struct extent_tree *et,
+ +                                              struct extent_node *en)
   {
         if (en->ei.len > et->largest.len) {
                 et->largest = en->ei;
- -              f2fs_mark_inode_dirty_sync(inode, true);
+ +              et->largest_updated = true;
         }
   }
   
@@@ -953,9 -944,6 +953,9 @@@ enum count_type 
         F2FS_DIRTY_IMETA,
         F2FS_WB_CP_DATA,
         F2FS_WB_DATA,
+ +      F2FS_RD_DATA,
+ +      F2FS_RD_NODE,
+ +      F2FS_RD_META,
         NR_COUNT_TYPE,
   };
   
@@@ -1100,19 -1088,11 +1100,19 @@@ enum 
         SBI_NEED_SB_WRITE,                      /* need to recover superblock */
         SBI_NEED_CP,                            /* need to checkpoint */
         SBI_IS_SHUTDOWN,                        /* shutdown by ioctl */
+ +      SBI_IS_RECOVERED,                       /* recovered orphan/data */
+ +      SBI_CP_DISABLED,                        /* CP was disabled last mount */
+ +      SBI_QUOTA_NEED_FLUSH,                   /* need to flush quota info in CP */
+ +      SBI_QUOTA_SKIP_FLUSH,                   /* skip flushing quota in current CP */
+ +      SBI_QUOTA_NEED_REPAIR,                  /* quota file may be corrupted */
   };
   
   enum {
         CP_TIME,
         REQ_TIME,
+ +      DISCARD_TIME,
+ +      GC_TIME,
+ +      DISABLE_TIME,
         MAX_TIME,
   };
   
@@@ -1229,6 -1209,7 +1229,6 @@@ struct f2fs_sb_info 
         unsigned int total_valid_node_count;    /* valid node block count */
         loff_t max_file_blocks;                 /* max block index of file */
         int dir_level;                          /* directory level */
- -      unsigned int trigger_ssr_threshold;     /* threshold to trigger ssr */
         int readdir_ra;                         /* readahead inode in readdir */
   
         block_t user_block_count;               /* # of user blocks */
@@@ -1238,9 -1219,6 +1238,9 @@@
         block_t reserved_blocks;                /* configurable reserved blocks */
         block_t current_reserved_blocks;        /* current reserved blocks */
   
+ +      /* Additional tracking for no checkpoint mode */
+ +      block_t unusable_block_count;           /* # of blocks saved by last cp */
+ +
         unsigned int nquota_files;              /* # of quota sysfile */
   
         u32 s_next_generation;                  /* for NFS support */
@@@ -1279,7 -1257,6 +1279,7 @@@
          */
   #ifdef CONFIG_F2FS_STAT_FS
         struct f2fs_stat_info *stat_info;       /* FS status information */
+ +      atomic_t meta_count[META_MAX];          /* # of meta blocks */
         unsigned int segment_count[2];          /* # of allocated segments */
         unsigned int block_count[2];            /* # of allocated blocks */
         atomic_t inplace_count;         /* # of inplace update */
@@@ -1295,8 -1272,6 +1295,8 @@@
         atomic_t max_aw_cnt;                    /* max # of atomic writes */
         atomic_t max_vw_cnt;                    /* max # of volatile writes */
         int bg_gc;                              /* background gc calls */
+ +      unsigned int io_skip_bggc;              /* skip background gc for in-flight IO */
+ +      unsigned int other_skip_bggc;           /* skip background gc for other reasons */
         unsigned int ndirty_inode[NR_INODE_TYPE];       /* # of dirty inodes */
   #endif
         spinlock_t stat_lock;                   /* lock for stat operations */
@@@ -1331,9 -1306,9 +1331,9 @@@
   };
   
   #ifdef CONFIG_F2FS_FAULT_INJECTION
- -#define f2fs_show_injection_info(type)                                \
- -      printk("%sF2FS-fs : inject %s in %s of %pF\n",          \
- -              KERN_INFO, f2fs_fault_name[type],               \
+ +#define f2fs_show_injection_info(type)                                        \
+ +      printk_ratelimited("%sF2FS-fs : inject %s in %s of %pF\n",      \
+ +              KERN_INFO, f2fs_fault_name[type],                       \
                 __func__, __builtin_return_address(0))
   static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type)
   {
@@@ -1369,15 -1344,7 +1369,15 @@@ static inline bool time_to_inject(struc
   
   static inline void f2fs_update_time(struct f2fs_sb_info *sbi, int type)
   {
- -      sbi->last_time[type] = jiffies;
+ +      unsigned long now = jiffies;
+ +
+ +      sbi->last_time[type] = now;
+ +
+ +      /* DISCARD_TIME and GC_TIME are based on REQ_TIME */
+ +      if (type == REQ_TIME) {
+ +              sbi->last_time[DISCARD_TIME] = now;
+ +              sbi->last_time[GC_TIME] = now;
+ +      }
   }
   
   static inline bool f2fs_time_over(struct f2fs_sb_info *sbi, int type)
@@@ -1387,18 -1354,16 +1387,18 @@@
         return time_after(jiffies, sbi->last_time[type] + interval);
   }
   
- -static inline bool is_idle(struct f2fs_sb_info *sbi)
+ +static inline unsigned int f2fs_time_to_wait(struct f2fs_sb_info *sbi,
+ +                                              int type)
   {
- -      struct block_device *bdev = sbi->sb->s_bdev;
- -      struct request_queue *q = bdev_get_queue(bdev);
- -      struct request_list *rl = &q->root_rl;
+ +      unsigned long interval = sbi->interval_time[type] * HZ;
+ +      unsigned int wait_ms = 0;
+ +      long delta;
   
- -      if (rl->count[BLK_RW_SYNC] || rl->count[BLK_RW_ASYNC])
- -              return false;
+ +      delta = (sbi->last_time[type] + interval) - jiffies;
+ +      if (delta > 0)
+ +              wait_ms = jiffies_to_msecs(delta);
   
- -      return f2fs_time_over(sbi, REQ_TIME);
+ +      return wait_ms;
   }
   
   /*
@@@ -1739,8 -1704,7 +1739,8 @@@ static inline int inc_valid_block_count
   
         if (!__allow_reserved_blocks(sbi, inode, true))
                 avail_user_block_count -= F2FS_OPTION(sbi).root_reserved_blocks;
- -
+ +      if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
+ +              avail_user_block_count -= sbi->unusable_block_count;
         if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) {
                 diff = sbi->total_valid_block_count - avail_user_block_count;
                 if (diff > *count)
@@@ -1791,9 -1755,7 +1791,9 @@@ static inline void inc_page_count(struc
         atomic_inc(&sbi->nr_pages[count_type]);
   
         if (count_type == F2FS_DIRTY_DATA || count_type == F2FS_INMEM_PAGES ||
- -              count_type == F2FS_WB_CP_DATA || count_type == F2FS_WB_DATA)
+ +              count_type == F2FS_WB_CP_DATA || count_type == F2FS_WB_DATA ||
+ +              count_type == F2FS_RD_DATA || count_type == F2FS_RD_NODE ||
+ +              count_type == F2FS_RD_META)
                 return;
   
         set_sbi_flag(sbi, SBI_IS_DIRTY);
@@@ -1929,18 -1891,12 +1929,18 @@@ static inline int inc_valid_node_count(
   {
         block_t valid_block_count;
         unsigned int valid_node_count;
- -      bool quota = inode && !is_inode;
+ +      int err;
   
- -      if (quota) {
- -              int ret = dquot_reserve_block(inode, 1);
- -              if (ret)
- -                      return ret;
+ +      if (is_inode) {
+ +              if (inode) {
+ +                      err = dquot_alloc_inode(inode);
+ +                      if (err)
+ +                              return err;
+ +              }
+ +      } else {
+ +              err = dquot_reserve_block(inode, 1);
+ +              if (err)
+ +                      return err;
         }
   
         if (time_to_inject(sbi, FAULT_BLOCK)) {
@@@ -1955,8 -1911,6 +1955,8 @@@
   
         if (!__allow_reserved_blocks(sbi, inode, false))
                 valid_block_count += F2FS_OPTION(sbi).root_reserved_blocks;
+ +      if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
+ +              valid_block_count += sbi->unusable_block_count;
   
         if (unlikely(valid_block_count > sbi->user_block_count)) {
                 spin_unlock(&sbi->stat_lock);
@@@ -1984,12 -1938,8 +1984,12 @@@
         return 0;
   
   enospc:
- -      if (quota)
+ +      if (is_inode) {
+ +              if (inode)
+ +                      dquot_free_inode(inode);
+ +      } else {
                 dquot_release_reservation_block(inode, 1);
+ +      }
         return -ENOSPC;
   }
   
@@@ -2010,9 -1960,7 +2010,9 @@@ static inline void dec_valid_node_count
   
         spin_unlock(&sbi->stat_lock);
   
- -      if (!is_inode)
+ +      if (is_inode)
+ +              dquot_free_inode(inode);
+ +      else
                 f2fs_i_blocks_write(inode, 1, false, true);
   }
   
@@@ -2142,15 -2090,6 +2142,15 @@@ static inline struct bio *f2fs_bio_allo
         return bio_alloc(GFP_KERNEL, npages);
   }
   
+ +static inline bool is_idle(struct f2fs_sb_info *sbi, int type)
+ +{
+ +      if (get_pages(sbi, F2FS_RD_DATA) || get_pages(sbi, F2FS_RD_NODE) ||
+ +              get_pages(sbi, F2FS_RD_META) || get_pages(sbi, F2FS_WB_DATA) ||
+ +              get_pages(sbi, F2FS_WB_CP_DATA))
+ +              return false;
+ +      return f2fs_time_over(sbi, type);
+ +}
+ +
   static inline void f2fs_radix_tree_insert(struct radix_tree_root *root,
                                 unsigned long index, void *item)
   {
@@@ -2800,8 -2739,7 +2800,8 @@@ static inline bool is_valid_data_blkadd
    */
   int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync);
   void f2fs_truncate_data_blocks(struct dnode_of_data *dn);
- -int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock);
+ +int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock,
+ +                                                      bool buf_write);
   int f2fs_truncate(struct inode *inode);
   int f2fs_getattr(const struct path *path, struct kstat *stat,
                         u32 request_mask, unsigned int flags);
@@@ -2811,7 -2749,6 +2811,7 @@@ void f2fs_truncate_data_blocks_range(st
   int f2fs_precache_extents(struct inode *inode);
   long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
   long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
+ +int f2fs_transfer_project_quota(struct inode *inode, kprojid_t kprojid);
   int f2fs_pin_file_control(struct inode *inode, bool inc);
   
   /*
@@@ -2890,7 -2827,6 +2890,7 @@@ static inline int f2fs_add_link(struct 
   int f2fs_inode_dirtied(struct inode *inode, bool sync);
   void f2fs_inode_synced(struct inode *inode);
   int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly);
+ +int f2fs_quota_sync(struct super_block *sb, int type);
   void f2fs_quota_off_umount(struct super_block *sb);
   int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover);
   int f2fs_sync_fs(struct super_block *sb, int sync);
@@@ -2933,7 -2869,7 +2933,7 @@@ struct page *f2fs_new_node_page(struct 
   void f2fs_ra_node_page(struct f2fs_sb_info *sbi, nid_t nid);
   struct page *f2fs_get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid);
   struct page *f2fs_get_node_page_ra(struct page *parent, int start);
- -void f2fs_move_node_page(struct page *node_page, int gc_type);
+ +int f2fs_move_node_page(struct page *node_page, int gc_type);
   int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
                         struct writeback_control *wbc, bool atomic,
                         unsigned int *seq_id);
@@@ -2950,7 -2886,7 +2950,7 @@@ int f2fs_recover_xattr_data(struct inod
   int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct page *page);
   int f2fs_restore_node_summary(struct f2fs_sb_info *sbi,
                         unsigned int segno, struct f2fs_summary_block *sum);
- -void f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc);
+ +int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc);
   int f2fs_build_node_manager(struct f2fs_sb_info *sbi);
   void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi);
   int __init f2fs_create_node_manager_caches(void);
@@@ -2978,8 -2914,6 +2978,8 @@@ void f2fs_stop_discard_thread(struct f2
   bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi);
   void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
                                         struct cp_control *cpc);
+ +void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi);
+ +int f2fs_disable_cp_again(struct f2fs_sb_info *sbi);
   void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi);
   int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra);
   void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi);
@@@ -3008,9 -2942,7 +3008,9 @@@ void f2fs_allocate_data_block(struct f2
                         struct f2fs_io_info *fio, bool add_list);
   void f2fs_wait_on_page_writeback(struct page *page,
                         enum page_type type, bool ordered);
- -void f2fs_wait_on_block_writeback(struct f2fs_sb_info *sbi, block_t blkaddr);
+ +void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr);
+ +void f2fs_wait_on_block_writeback_range(struct inode *inode, block_t blkaddr,
+ +                                                              block_t len);
   void f2fs_write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk);
   void f2fs_write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk);
   int f2fs_lookup_journal_in_cursum(struct f2fs_journal *journal, int type,
@@@ -3070,8 -3002,8 +3070,8 @@@ int f2fs_init_post_read_processing(void
   void f2fs_destroy_post_read_processing(void);
   void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type);
   void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi,
- -                              struct inode *inode, nid_t ino, pgoff_t idx,
- -                              enum page_type type);
+ +                              struct inode *inode, struct page *page,
+ +                              nid_t ino, enum page_type type);
   void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi);
   int f2fs_submit_page_bio(struct f2fs_io_info *fio);
   void f2fs_submit_page_write(struct f2fs_io_info *fio);
@@@ -3093,7 -3025,6 +3093,7 @@@ struct page *f2fs_get_lock_data_page(st
   struct page *f2fs_get_new_data_page(struct inode *inode,
                         struct page *ipage, pgoff_t index, bool new_i_size);
   int f2fs_do_write_data_page(struct f2fs_io_info *fio);
+ +void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock);
   int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
                         int create, int flag);
   int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
@@@ -3108,7 -3039,7 +3108,7 @@@ int f2fs_migrate_page(struct address_sp
                         struct page *page, enum migrate_mode mode);
   #endif
   bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len);
- void f2fs_clear_radix_tree_dirty_tag(struct page *page);
+ void f2fs_clear_page_cache_dirty_tag(struct page *page);
   
   /*
    * gc.c
@@@ -3146,8 -3077,6 +3146,8 @@@ struct f2fs_stat_info 
         int free_nids, avail_nids, alloc_nids;
         int total_count, utilization;
         int bg_gc, nr_wb_cp_data, nr_wb_data;
+ +      int nr_rd_data, nr_rd_node, nr_rd_meta;
+ +      unsigned int io_skip_bggc, other_skip_bggc;
         int nr_flushing, nr_flushed, flush_list_empty;
         int nr_discarding, nr_discarded;
         int nr_discard_cmd;
@@@ -3169,7 -3098,6 +3169,7 @@@
         int cursec[NR_CURSEG_TYPE];
         int curzone[NR_CURSEG_TYPE];
   
+ +      unsigned int meta_count[META_MAX];
         unsigned int segment_count[2];
         unsigned int block_count[2];
         unsigned int inplace_count;
@@@ -3185,8 -3113,6 +3185,8 @@@ static inline struct f2fs_stat_info *F2
   #define stat_inc_bg_cp_count(si)      ((si)->bg_cp_count++)
   #define stat_inc_call_count(si)               ((si)->call_count++)
   #define stat_inc_bggc_count(sbi)      ((sbi)->bg_gc++)
+ +#define stat_io_skip_bggc_count(sbi)  ((sbi)->io_skip_bggc++)
+ +#define stat_other_skip_bggc_count(sbi)       ((sbi)->other_skip_bggc++)
   #define stat_inc_dirty_inode(sbi, type)       ((sbi)->ndirty_inode[type]++)
   #define stat_dec_dirty_inode(sbi, type)       ((sbi)->ndirty_inode[type]--)
   #define stat_inc_total_hit(sbi)               (atomic64_inc(&(sbi)->total_hit_ext))
@@@ -3223,17 -3149,6 +3223,17 @@@
                 if (f2fs_has_inline_dentry(inode))                      \
                         (atomic_dec(&F2FS_I_SB(inode)->inline_dir));    \
         } while (0)
+ +#define stat_inc_meta_count(sbi, blkaddr)                             \
+ +      do {                                                            \
+ +              if (blkaddr < SIT_I(sbi)->sit_base_addr)                \
+ +                      atomic_inc(&(sbi)->meta_count[META_CP]);        \
+ +              else if (blkaddr < NM_I(sbi)->nat_blkaddr)              \
+ +                      atomic_inc(&(sbi)->meta_count[META_SIT]);       \
+ +              else if (blkaddr < SM_I(sbi)->ssa_blkaddr)              \
+ +                      atomic_inc(&(sbi)->meta_count[META_NAT]);       \
+ +              else if (blkaddr < SM_I(sbi)->main_blkaddr)             \
+ +                      atomic_inc(&(sbi)->meta_count[META_SSA]);       \
+ +      } while (0)
   #define stat_inc_seg_type(sbi, curseg)                                        \
                 ((sbi)->segment_count[(curseg)->alloc_type]++)
   #define stat_inc_block_count(sbi, curseg)                             \
@@@ -3303,8 -3218,6 +3303,8 @@@ void f2fs_destroy_root_stats(void)
   #define stat_inc_bg_cp_count(si)                      do { } while (0)
   #define stat_inc_call_count(si)                               do { } while (0)
   #define stat_inc_bggc_count(si)                               do { } while (0)
+ +#define stat_io_skip_bggc_count(sbi)                  do { } while (0)
+ +#define stat_other_skip_bggc_count(sbi)                       do { } while (0)
   #define stat_inc_dirty_inode(sbi, type)                       do { } while (0)
   #define stat_dec_dirty_inode(sbi, type)                       do { } while (0)
   #define stat_inc_total_hit(sb)                                do { } while (0)
@@@ -3323,7 -3236,6 +3323,7 @@@
   #define stat_inc_volatile_write(inode)                        do { } while (0)
   #define stat_dec_volatile_write(inode)                        do { } while (0)
   #define stat_update_max_volatile_write(inode)         do { } while (0)
+ +#define stat_inc_meta_count(sbi, blkaddr)             do { } while (0)
   #define stat_inc_seg_type(sbi, curseg)                        do { } while (0)
   #define stat_inc_block_count(sbi, curseg)             do { } while (0)
   #define stat_inc_inplace_blocks(sbi)                  do { } while (0)
@@@ -3393,19 -3305,18 +3393,19 @@@ void f2fs_leave_shrinker(struct f2fs_sb
   /*
    * extent_cache.c
    */
- -struct rb_entry *f2fs_lookup_rb_tree(struct rb_root *root,
+ +struct rb_entry *f2fs_lookup_rb_tree(struct rb_root_cached *root,
                                 struct rb_entry *cached_re, unsigned int ofs);
   struct rb_node **f2fs_lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi,
- -                              struct rb_root *root, struct rb_node **parent,
- -                              unsigned int ofs);
- -struct rb_entry *f2fs_lookup_rb_tree_ret(struct rb_root *root,
+ +                              struct rb_root_cached *root,
+ +                              struct rb_node **parent,
+ +                              unsigned int ofs, bool *leftmost);
+ +struct rb_entry *f2fs_lookup_rb_tree_ret(struct rb_root_cached *root,
                 struct rb_entry *cached_re, unsigned int ofs,
                 struct rb_entry **prev_entry, struct rb_entry **next_entry,
                 struct rb_node ***insert_p, struct rb_node **insert_parent,
- -              bool force);
+ +              bool force, bool *leftmost);
   bool f2fs_check_rb_tree_consistence(struct f2fs_sb_info *sbi,
- -                                              struct rb_root *root);
+ +                                              struct rb_root_cached *root);
   unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink);
   bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext);
   void f2fs_drop_extent_tree(struct inode *inode);
@@@ -3445,7 -3356,7 +3445,7 @@@ static inline void f2fs_set_encrypted_i
   {
   #ifdef CONFIG_F2FS_FS_ENCRYPTION
         file_set_encrypt(inode);
- -      inode->i_flags |= S_ENCRYPTED;
+ +      f2fs_set_inode_flags(inode);
   #endif
   }
   
@@@ -3473,7 -3384,6 +3473,7 @@@ F2FS_FEATURE_FUNCS(flexible_inline_xatt
   F2FS_FEATURE_FUNCS(quota_ino, QUOTA_INO);
   F2FS_FEATURE_FUNCS(inode_crtime, INODE_CRTIME);
   F2FS_FEATURE_FUNCS(lost_found, LOST_FOUND);
+ +F2FS_FEATURE_FUNCS(sb_chksum, SB_CHKSUM);
   
   #ifdef CONFIG_BLK_DEV_ZONED
   static inline int get_blkz_type(struct f2fs_sb_info *sbi,
@@@ -3489,20 -3399,11 +3489,20 @@@
   }
   #endif
   
- -static inline bool f2fs_discard_en(struct f2fs_sb_info *sbi)
+ +static inline bool f2fs_hw_should_discard(struct f2fs_sb_info *sbi)
   {
- -      struct request_queue *q = bdev_get_queue(sbi->sb->s_bdev);
+ +      return f2fs_sb_has_blkzoned(sbi->sb);
+ +}
   
- -      return blk_queue_discard(q) || f2fs_sb_has_blkzoned(sbi->sb);
+ +static inline bool f2fs_hw_support_discard(struct f2fs_sb_info *sbi)
+ +{
+ +      return blk_queue_discard(bdev_get_queue(sbi->sb->s_bdev));
+ +}
+ +
+ +static inline bool f2fs_realtime_discard_enable(struct f2fs_sb_info *sbi)
+ +{
+ +      return (test_opt(sbi, DISCARD) && f2fs_hw_support_discard(sbi)) ||
+ +                                      f2fs_hw_should_discard(sbi);
   }
   
   static inline void set_opt_mode(struct f2fs_sb_info *sbi, unsigned int mt)
@@@ -3531,50 -3432,11 +3531,50 @@@ static inline bool f2fs_may_encrypt(str
   #endif
   }
   
- -static inline bool f2fs_force_buffered_io(struct inode *inode, int rw)
+ +static inline int block_unaligned_IO(struct inode *inode,
+ +                              struct kiocb *iocb, struct iov_iter *iter)
   {
- -      return (f2fs_post_read_required(inode) ||
- -                      (rw == WRITE && test_opt(F2FS_I_SB(inode), LFS)) ||
- -                      F2FS_I_SB(inode)->s_ndevs);
+ +      unsigned int i_blkbits = READ_ONCE(inode->i_blkbits);
+ +      unsigned int blocksize_mask = (1 << i_blkbits) - 1;
+ +      loff_t offset = iocb->ki_pos;
+ +      unsigned long align = offset | iov_iter_alignment(iter);
+ +
+ +      return align & blocksize_mask;
+ +}
+ +
+ +static inline int allow_outplace_dio(struct inode *inode,
+ +                              struct kiocb *iocb, struct iov_iter *iter)
+ +{
+ +      struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ +      int rw = iov_iter_rw(iter);
+ +
+ +      return (test_opt(sbi, LFS) && (rw == WRITE) &&
+ +                              !block_unaligned_IO(inode, iocb, iter));
+ +}
+ +
+ +static inline bool f2fs_force_buffered_io(struct inode *inode,
+ +                              struct kiocb *iocb, struct iov_iter *iter)
+ +{
+ +      struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ +      int rw = iov_iter_rw(iter);
+ +
+ +      if (f2fs_post_read_required(inode))
+ +              return true;
+ +      if (sbi->s_ndevs)
+ +              return true;
+ +      /*
+ +       * for blkzoned device, fallback direct IO to buffered IO, so
+ +       * all IOs can be serialized by log-structured write.
+ +       */
+ +      if (f2fs_sb_has_blkzoned(sbi->sb))
+ +              return true;
+ +      if (test_opt(sbi, LFS) && (rw == WRITE) &&
+ +                              block_unaligned_IO(inode, iocb, iter))
+ +              return true;
+ +      if (is_sbi_flag_set(F2FS_I_SB(inode), SBI_CP_DISABLED))
+ +              return true;
+ +
+ +      return false;
   }
   
   #ifdef CONFIG_F2FS_FAULT_INJECTION
@@@ -3585,16 -3447,3 +3585,16 @@@ extern void f2fs_build_fault_attr(struc
   #endif
   
   #endif
+ +
+ +static inline bool is_journalled_quota(struct f2fs_sb_info *sbi)
+ +{
+ +#ifdef CONFIG_QUOTA
+ +      if (f2fs_sb_has_quota_ino(sbi->sb))
+ +              return true;
+ +      if (F2FS_OPTION(sbi).s_qf_names[USRQUOTA] ||
+ +              F2FS_OPTION(sbi).s_qf_names[GRPQUOTA] ||
+ +              F2FS_OPTION(sbi).s_qf_names[PRJQUOTA])
+ +              return true;
+ +#endif
+ +      return false;
+ +}
diff --combined fs/f2fs/inline.c

index cb31a719b04889b922aae25a230e6f08940d233c,3e0a630a0168d31246402118732e4001b729e29b..7b0cff7e605133445a5ac39527235734c9003e3f
--- 1/fs/f2fs/inline.c
--- 2/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@@ -1,9 -1,11 +1,9 @@@
+ +// SPDX-License-Identifier: GPL-2.0
   /*
    * fs/f2fs/inline.c
    * Copyright (c) 2013, Intel Corporation
    * Authors: Huajun Li <[email protected]>
    *          Haicheng Li <[email protected]>
- - * This program is free software; you can redistribute it and/or modify
- - * it under the terms of the GNU General Public License version 2 as
- - * published by the Free Software Foundation.
    */
   
   #include <linux/fs.h>
@@@ -243,7 -245,7 +243,7 @@@ int f2fs_write_inline_data(struct inod
         kunmap_atomic(src_addr);
         set_page_dirty(dn.inode_page);
   
-       f2fs_clear_radix_tree_dirty_tag(page);
+       f2fs_clear_page_cache_dirty_tag(page);
   
         set_inode_flag(inode, FI_APPEND_WRITE);
         set_inode_flag(inode, FI_DATA_EXIST);
@@@ -298,7 -300,7 +298,7 @@@ process_inline
                 clear_inode_flag(inode, FI_INLINE_DATA);
                 f2fs_put_page(ipage, 1);
         } else if (ri && (ri->i_inline & F2FS_INLINE_DATA)) {
- -              if (f2fs_truncate_blocks(inode, 0, false))
+ +              if (f2fs_truncate_blocks(inode, 0, false, false))
                         return false;
                 goto process_inline;
         }
@@@ -470,7 -472,7 +470,7 @@@ static int f2fs_add_inline_entries(stru
         return 0;
   punch_dentry_pages:
         truncate_inode_pages(&dir->i_data, 0);
- -      f2fs_truncate_blocks(dir, 0, false);
+ +      f2fs_truncate_blocks(dir, 0, false, false);
         f2fs_remove_dirty_inode(dir);
         return err;
   }
diff --combined fs/f2fs/node.c

index 2b34206486d8f2faacc7ae9a310eb360a4af0986,0bae5eda056ab1daba4cd3455552ba1dbf3afd2c..d338740d0fdac9cd9f6b7c2549eb55c1da4e8656
--- 1/fs/f2fs/node.c
--- 2/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@@ -1,9 -1,12 +1,9 @@@
+ +// SPDX-License-Identifier: GPL-2.0
   /*
    * fs/f2fs/node.c
    *
    * Copyright (c) 2012 Samsung Electronics Co., Ltd.
    *             http://www.samsung.com/
- - *
- - * This program is free software; you can redistribute it and/or modify
- - * it under the terms of the GNU General Public License version 2 as
- - * published by the Free Software Foundation.
    */
   #include <linux/fs.h>
   #include <linux/f2fs_fs.h>
@@@ -101,7 -104,7 +101,7 @@@ bool f2fs_available_free_memory(struct 
   static void clear_node_page_dirty(struct page *page)
   {
         if (PageDirty(page)) {
-               f2fs_clear_radix_tree_dirty_tag(page);
+               f2fs_clear_page_cache_dirty_tag(page);
                 clear_page_dirty_for_io(page);
                 dec_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES);
         }
@@@ -126,8 -129,6 +126,8 @@@ static struct page *get_next_nat_page(s
   
         /* get current nat block page with lock */
         src_page = get_current_nat_page(sbi, nid);
+ +      if (IS_ERR(src_page))
+ +              return src_page;
         dst_page = f2fs_grab_meta_page(sbi, dst_off);
         f2fs_bug_on(sbi, PageDirty(src_page));
   
@@@ -1306,9 -1307,7 +1306,7 @@@ void f2fs_ra_node_page(struct f2fs_sb_i
         if (f2fs_check_nid_range(sbi, nid))
                 return;
   
-       rcu_read_lock();
-       apage = radix_tree_lookup(&NODE_MAPPING(sbi)->i_pages, nid);
-       rcu_read_unlock();
+       apage = xa_load(&NODE_MAPPING(sbi)->i_pages, nid);
         if (apage)
                 return;
   
@@@ -1541,10 -1540,8 +1539,10 @@@ static int __write_node_page(struct pag
         }
   
         if (__is_valid_data_blkaddr(ni.blk_addr) &&
- -              !f2fs_is_valid_blkaddr(sbi, ni.blk_addr, DATA_GENERIC))
+ +              !f2fs_is_valid_blkaddr(sbi, ni.blk_addr, DATA_GENERIC)) {
+ +              up_read(&sbi->node_write);
                 goto redirty_out;
+ +      }
   
         if (atomic && !test_opt(sbi, NOBARRIER))
                 fio.op_flags |= REQ_PREFLUSH | REQ_FUA;
@@@ -1565,7 -1562,8 +1563,7 @@@
         up_read(&sbi->node_write);
   
         if (wbc->for_reclaim) {
- -              f2fs_submit_merged_write_cond(sbi, page->mapping->host, 0,
- -                                              page->index, NODE);
+ +              f2fs_submit_merged_write_cond(sbi, NULL, page, 0, NODE);
                 submitted = NULL;
         }
   
@@@ -1587,10 -1585,8 +1585,10 @@@ redirty_out
         return AOP_WRITEPAGE_ACTIVATE;
   }
   
- -void f2fs_move_node_page(struct page *node_page, int gc_type)
+ +int f2fs_move_node_page(struct page *node_page, int gc_type)
   {
+ +      int err = 0;
+ +
         if (gc_type == FG_GC) {
                 struct writeback_control wbc = {
                         .sync_mode = WB_SYNC_ALL,
@@@ -1602,16 -1598,12 +1600,16 @@@
                 f2fs_wait_on_page_writeback(node_page, NODE, true);
   
                 f2fs_bug_on(F2FS_P_SB(node_page), PageWriteback(node_page));
- -              if (!clear_page_dirty_for_io(node_page))
+ +              if (!clear_page_dirty_for_io(node_page)) {
+ +                      err = -EAGAIN;
                         goto out_page;
+ +              }
   
                 if (__write_node_page(node_page, false, NULL,
- -                                      &wbc, false, FS_GC_NODE_IO, NULL))
+ +                                      &wbc, false, FS_GC_NODE_IO, NULL)) {
+ +                      err = -EAGAIN;
                         unlock_page(node_page);
+ +              }
                 goto release_page;
         } else {
                 /* set page dirty and write it */
@@@ -1622,7 -1614,6 +1620,7 @@@ out_page
         unlock_page(node_page);
   release_page:
         f2fs_put_page(node_page, 0);
+ +      return err;
   }
   
   static int f2fs_write_node_page(struct page *page,
@@@ -1637,13 -1628,13 +1635,13 @@@ int f2fs_fsync_node_pages(struct f2fs_s
                         unsigned int *seq_id)
   {
         pgoff_t index;
- -      pgoff_t last_idx = ULONG_MAX;
         struct pagevec pvec;
         int ret = 0;
         struct page *last_page = NULL;
         bool marked = false;
         nid_t ino = inode->i_ino;
         int nr_pages;
+ +      int nwritten = 0;
   
         if (atomic) {
                 last_page = last_fsync_dnode(sbi, ino);
@@@ -1721,7 -1712,7 +1719,7 @@@ continue_unlock
                                 f2fs_put_page(last_page, 0);
                                 break;
                         } else if (submitted) {
- -                              last_idx = page->index;
+ +                              nwritten++;
                         }
   
                         if (page == last_page) {
@@@ -1747,8 -1738,8 +1745,8 @@@
                 goto retry;
         }
   out:
- -      if (last_idx != ULONG_MAX)
- -              f2fs_submit_merged_write_cond(sbi, NULL, ino, last_idx, NODE);
+ +      if (nwritten)
+ +              f2fs_submit_merged_write_cond(sbi, NULL, NULL, ino, NODE);
         return ret ? -EIO: 0;
   }
   
@@@ -2275,19 -2266,15 +2273,19 @@@ static int __f2fs_build_free_nids(struc
                                                 nm_i->nat_block_bitmap)) {
                         struct page *page = get_current_nat_page(sbi, nid);
   
- -                      ret = scan_nat_page(sbi, page, nid);
- -                      f2fs_put_page(page, 1);
+ +                      if (IS_ERR(page)) {
+ +                              ret = PTR_ERR(page);
+ +                      } else {
+ +                              ret = scan_nat_page(sbi, page, nid);
+ +                              f2fs_put_page(page, 1);
+ +                      }
   
                         if (ret) {
                                 up_read(&nm_i->nat_tree_lock);
                                 f2fs_bug_on(sbi, !mount);
                                 f2fs_msg(sbi->sb, KERN_ERR,
                                         "NAT is corrupt, run fsck to fix it");
- -                              return -EINVAL;
+ +                              return ret;
                         }
                 }
   
@@@ -2364,9 -2351,8 +2362,9 @@@ retry
         spin_unlock(&nm_i->nid_list_lock);
   
         /* Let's scan nat pages and its caches to get free nids */
- -      f2fs_build_free_nids(sbi, true, false);
- -      goto retry;
+ +      if (!f2fs_build_free_nids(sbi, true, false))
+ +              goto retry;
+ +      return false;
   }
   
   /*
@@@ -2549,7 -2535,7 +2547,7 @@@ retry
         if (!PageUptodate(ipage))
                 SetPageUptodate(ipage);
         fill_node_footer(ipage, ino, ino, 0, true);
- -      set_cold_node(page, false);
+ +      set_cold_node(ipage, false);
   
         src = F2FS_INODE(page);
         dst = F2FS_INODE(ipage);
@@@ -2572,13 -2558,6 +2570,13 @@@
                         F2FS_FITS_IN_INODE(src, le16_to_cpu(src->i_extra_isize),
                                                                 i_projid))
                         dst->i_projid = src->i_projid;
+ +
+ +              if (f2fs_sb_has_inode_crtime(sbi->sb) &&
+ +                      F2FS_FITS_IN_INODE(src, le16_to_cpu(src->i_extra_isize),
+ +                                                      i_crtime_nsec)) {
+ +                      dst->i_crtime = src->i_crtime;
+ +                      dst->i_crtime_nsec = src->i_crtime_nsec;
+ +              }
         }
   
         new_ni = old_ni;
@@@ -2722,7 -2701,7 +2720,7 @@@ static void __update_nat_bits(struct f2
                 __clear_bit_le(nat_index, nm_i->full_nat_bits);
   }
   
- -static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
+ +static int __flush_nat_entry_set(struct f2fs_sb_info *sbi,
                 struct nat_entry_set *set, struct cp_control *cpc)
   {
         struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
@@@ -2746,9 -2725,6 +2744,9 @@@
                 down_write(&curseg->journal_rwsem);
         } else {
                 page = get_next_nat_page(sbi, start_nid);
+ +              if (IS_ERR(page))
+ +                      return PTR_ERR(page);
+ +
                 nat_blk = page_address(page);
                 f2fs_bug_on(sbi, !nat_blk);
         }
@@@ -2794,13 -2770,12 +2792,13 @@@
                 radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set);
                 kmem_cache_free(nat_entry_set_slab, set);
         }
+ +      return 0;
   }
   
   /*
    * This function is called during the checkpointing process.
    */
- -void f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
+ +int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
   {
         struct f2fs_nm_info *nm_i = NM_I(sbi);
         struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
@@@ -2810,7 -2785,6 +2808,7 @@@
         unsigned int found;
         nid_t set_idx = 0;
         LIST_HEAD(sets);
+ +      int err = 0;
   
         /* during unmount, let's flush nat_bits before checking dirty_nat_cnt */
         if (enabled_nat_bits(sbi, cpc)) {
@@@ -2820,7 -2794,7 +2818,7 @@@
         }
   
         if (!nm_i->dirty_nat_cnt)
- -              return;
+ +              return 0;
   
         down_write(&nm_i->nat_tree_lock);
   
@@@ -2843,16 -2817,11 +2841,16 @@@
         }
   
         /* flush dirty nats in nat entry set */
- -      list_for_each_entry_safe(set, tmp, &sets, set_list)
- -              __flush_nat_entry_set(sbi, set, cpc);
+ +      list_for_each_entry_safe(set, tmp, &sets, set_list) {
+ +              err = __flush_nat_entry_set(sbi, set, cpc);
+ +              if (err)
+ +                      break;
+ +      }
   
         up_write(&nm_i->nat_tree_lock);
         /* Allow dirty nats by node block allocation in write_begin */
+ +
+ +      return err;
   }
   
   static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
@@@ -2879,8 -2848,10 +2877,8 @@@
                 struct page *page;
   
                 page = f2fs_get_meta_page(sbi, nat_bits_addr++);
- -              if (IS_ERR(page)) {
- -                      disable_nat_bits(sbi, true);
+ +              if (IS_ERR(page))
                         return PTR_ERR(page);
- -              }
   
                 memcpy(nm_i->nat_bits + (i << F2FS_BLKSIZE_BITS),
                                         page_address(page), F2FS_BLKSIZE);
diff --combined fs/proc/task_mmu.c

index a027473561c6cd2277c05bfd45489584336c22dc,669abb617321cdba9acdd6e5fe6921a8030ad64b..47c3764c469b01c24db489d38020e9bf0f021456
--- 1/fs/proc/task_mmu.c
--- 2/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@@ -521,7 -521,7 +521,7 @@@ static void smaps_pte_entry(pte_t *pte
                 if (!page)
                         return;
   
-               if (radix_tree_exceptional_entry(page))
+               if (xa_is_value(page))
                         mss->swap += PAGE_SIZE;
                 else
                         put_page(page);
@@@ -713,8 -713,6 +713,8 @@@ static void smap_gather_stats(struct vm
         smaps_walk.private = mss;
   
   #ifdef CONFIG_SHMEM
+ +      /* In case of smaps_rollup, reset the value from previous vma */
+ +      mss->check_shmem_swap = false;
         if (vma->vm_file && shmem_mapping(vma->vm_file->f_mapping)) {
                 /*
                  * For shared or readonly shmem mappings we know that all
@@@ -730,7 -728,7 +730,7 @@@
   
                 if (!shmem_swapped || (vma->vm_flags & VM_SHARED) ||
                                         !(vma->vm_flags & VM_WRITE)) {
- -                      mss->swap = shmem_swapped;
+ +                      mss->swap += shmem_swapped;
                 } else {
                         mss->check_shmem_swap = true;
                         smaps_walk.pte_hole = smaps_pte_hole;
diff --combined include/linux/fs.h

index 897eae8faee1b04f12fe8cbebbaae2b4505a771d,e10278e4db66187b61545cd32c0398e61a0bae46..771341470bcebd249b023be7cc8f6a22e09b2d31
--- 1/include/linux/fs.h
--- 2/include/linux/fs.h
+++ b/include/linux/fs.h
@@@ -403,24 -403,40 +403,40 @@@ int pagecache_write_end(struct file *, 
                                 loff_t pos, unsigned len, unsigned copied,
                                 struct page *page, void *fsdata);
   
+ /**
+  * struct address_space - Contents of a cacheable, mappable object.
+  * @host: Owner, either the inode or the block_device.
+  * @i_pages: Cached pages.
+  * @gfp_mask: Memory allocation flags to use for allocating pages.
+  * @i_mmap_writable: Number of VM_SHARED mappings.
+  * @i_mmap: Tree of private and shared mappings.
+  * @i_mmap_rwsem: Protects @i_mmap and @i_mmap_writable.
+  * @nrpages: Number of page entries, protected by the i_pages lock.
+  * @nrexceptional: Shadow or DAX entries, protected by the i_pages lock.
+  * @writeback_index: Writeback starts here.
+  * @a_ops: Methods.
+  * @flags: Error bits and flags (AS_*).
+  * @wb_err: The most recent error which has occurred.
+  * @private_lock: For use by the owner of the address_space.
+  * @private_list: For use by the owner of the address_space.
+  * @private_data: For use by the owner of the address_space.
+  */
   struct address_space {
-       struct inode            *host;          /* owner: inode, block_device */
-       struct radix_tree_root  i_pages;        /* cached pages */
-       atomic_t                i_mmap_writable;/* count VM_SHARED mappings */
-       struct rb_root_cached   i_mmap;         /* tree of private and shared mappings */
-       struct rw_semaphore     i_mmap_rwsem;   /* protect tree, count, list */
-       /* Protected by the i_pages lock */
-       unsigned long           nrpages;        /* number of total pages */
-       /* number of shadow or DAX exceptional entries */
+       struct inode            *host;
+       struct xarray           i_pages;
+       gfp_t                   gfp_mask;
+       atomic_t                i_mmap_writable;
+       struct rb_root_cached   i_mmap;
+       struct rw_semaphore     i_mmap_rwsem;
+       unsigned long           nrpages;
         unsigned long           nrexceptional;
-       pgoff_t                 writeback_index;/* writeback starts here */
-       const struct address_space_operations *a_ops;   /* methods */
-       unsigned long           flags;          /* error bits */
-       spinlock_t              private_lock;   /* for use by the address_space */
-       gfp_t                   gfp_mask;       /* implicit gfp mask for allocations */
-       struct list_head        private_list;   /* for use by the address_space */
-       void                    *private_data;  /* ditto */
+       pgoff_t                 writeback_index;
+       const struct address_space_operations *a_ops;
+       unsigned long           flags;
         errseq_t                wb_err;
+       spinlock_t              private_lock;
+       struct list_head        private_list;
+       void                    *private_data;
   } __attribute__((aligned(sizeof(long)))) __randomize_layout;
         /*
          * On most architectures that alignment is already the case; but
@@@ -467,15 -483,18 +483,18 @@@ struct block_device 
         struct mutex            bd_fsfreeze_mutex;
   } __randomize_layout;
   
+ /* XArray tags, for tagging dirty and writeback pages in the pagecache. */
+ #define PAGECACHE_TAG_DIRTY   XA_MARK_0
+ #define PAGECACHE_TAG_WRITEBACK       XA_MARK_1
+ #define PAGECACHE_TAG_TOWRITE XA_MARK_2
+ 
   /*
-  * Radix-tree tags, for tagging dirty and writeback pages within the pagecache
-  * radix trees
+  * Returns true if any of the pages in the mapping are marked with the tag.
    */
- #define PAGECACHE_TAG_DIRTY   0
- #define PAGECACHE_TAG_WRITEBACK       1
- #define PAGECACHE_TAG_TOWRITE 2
- 
- int mapping_tagged(struct address_space *mapping, int tag);
+ static inline bool mapping_tagged(struct address_space *mapping, xa_mark_t tag)
+ {
+       return xa_marked(&mapping->i_pages, tag);
+ }
   
   static inline void i_mmap_lock_write(struct address_space *mapping)
   {
@@@ -1828,10 -1847,8 +1847,10 @@@ extern ssize_t vfs_copy_file_range(stru
   extern int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in,
                                       struct inode *inode_out, loff_t pos_out,
                                       u64 *len, bool is_dedupe);
+ +extern int do_clone_file_range(struct file *file_in, loff_t pos_in,
+ +                             struct file *file_out, loff_t pos_out, u64 len);
   extern int vfs_clone_file_range(struct file *file_in, loff_t pos_in,
- -              struct file *file_out, loff_t pos_out, u64 len);
+ +                              struct file *file_out, loff_t pos_out, u64 len);
   extern int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
                                          struct inode *dest, loff_t destoff,
                                          loff_t len, bool *is_same);
@@@ -2774,6 -2791,19 +2793,6 @@@ static inline void file_end_write(struc
                 return;
         __sb_end_write(file_inode(file)->i_sb, SB_FREEZE_WRITE);
   }
- -
- -static inline int do_clone_file_range(struct file *file_in, loff_t pos_in,
- -                                    struct file *file_out, loff_t pos_out,
- -                                    u64 len)
- -{
- -      int ret;
- -
- -      file_start_write(file_out);
- -      ret = vfs_clone_file_range(file_in, pos_in, file_out, pos_out, len);
- -      file_end_write(file_out);
- -
- -      return ret;
- -}
   
   /*
    * get_write_access() gets write permission for a file.
diff --combined include/linux/swap.h

index 38195f5c96b1346414d41c5a002d08a9562d13d5,cb479bf5842e2c05d2f86d7f669c9e9154383b30..d8a07a4f171dbfe21ee0e288d98658f304308735
--- 1/include/linux/swap.h
--- 2/include/linux/swap.h
+++ b/include/linux/swap.h
@@@ -167,14 -167,13 +167,14 @@@ enum 
         SWP_SOLIDSTATE  = (1 << 4),     /* blkdev seeks are cheap */
         SWP_CONTINUED   = (1 << 5),     /* swap_map has count continuation */
         SWP_BLKDEV      = (1 << 6),     /* its a block device */
- -      SWP_FILE        = (1 << 7),     /* set after swap_activate success */
- -      SWP_AREA_DISCARD = (1 << 8),    /* single-time swap area discards */
- -      SWP_PAGE_DISCARD = (1 << 9),    /* freed swap page-cluster discards */
- -      SWP_STABLE_WRITES = (1 << 10),  /* no overwrite PG_writeback pages */
- -      SWP_SYNCHRONOUS_IO = (1 << 11), /* synchronous IO is efficient */
+ +      SWP_ACTIVATED   = (1 << 7),     /* set after swap_activate success */
+ +      SWP_FS          = (1 << 8),     /* swap file goes through fs */
+ +      SWP_AREA_DISCARD = (1 << 9),    /* single-time swap area discards */
+ +      SWP_PAGE_DISCARD = (1 << 10),   /* freed swap page-cluster discards */
+ +      SWP_STABLE_WRITES = (1 << 11),  /* no overwrite PG_writeback pages */
+ +      SWP_SYNCHRONOUS_IO = (1 << 12), /* synchronous IO is efficient */
                                         /* add others here before... */
- -      SWP_SCANNING    = (1 << 12),    /* refcount in scan_swap_map */
+ +      SWP_SCANNING    = (1 << 13),    /* refcount in scan_swap_map */
   };
   
   #define SWAP_CLUSTER_MAX 32UL
@@@ -297,20 -296,15 +297,15 @@@ struct vma_swap_readahead 
   
   /* linux/mm/workingset.c */
   void *workingset_eviction(struct address_space *mapping, struct page *page);
- -bool workingset_refault(void *shadow);
+ +void workingset_refault(struct page *page, void *shadow);
   void workingset_activation(struct page *page);
   
- /* Do not use directly, use workingset_lookup_update */
- void workingset_update_node(struct radix_tree_node *node);
- 
- /* Returns workingset_update_node() if the mapping has shadow entries. */
- #define workingset_lookup_update(mapping)                             \
- ({                                                                    \
-       radix_tree_update_node_t __helper = workingset_update_node;     \
-       if (dax_mapping(mapping) || shmem_mapping(mapping))             \
-               __helper = NULL;                                        \
-       __helper;                                                       \
- })
+ /* Only track the nodes of mappings with shadow entries */
+ void workingset_update_node(struct xa_node *node);
+ #define mapping_set_update(xas, mapping) do {                         \
+       if (!dax_mapping(mapping) && !shmem_mapping(mapping))           \
+               xas_set_update(xas, workingset_update_node);            \
+ } while (0)
   
   /* linux/mm/page_alloc.c */
   extern unsigned long totalram_pages;
@@@ -409,7 -403,7 +404,7 @@@ extern void show_swap_cache_info(void)
   extern int add_to_swap(struct page *page);
   extern int add_to_swap_cache(struct page *, swp_entry_t, gfp_t);
   extern int __add_to_swap_cache(struct page *page, swp_entry_t entry);
- extern void __delete_from_swap_cache(struct page *);
+ extern void __delete_from_swap_cache(struct page *, swp_entry_t entry);
   extern void delete_from_swap_cache(struct page *);
   extern void free_page_and_swap_cache(struct page *);
   extern void free_pages_and_swap_cache(struct page **, int);
@@@ -563,7 -557,8 +558,8 @@@ static inline int add_to_swap_cache(str
         return -1;
   }
   
- static inline void __delete_from_swap_cache(struct page *page)
+ static inline void __delete_from_swap_cache(struct page *page,
+                                                       swp_entry_t entry)
   {
   }
   
diff --combined kernel/memremap.c

index 620fc4d2559ac81f7127698a0b4f1b6f76509df1,e842fab9f184800bfa7c41e5eea460b50ebd7b91..9eced2cc9f94d7f58d692b2e3dc84ee594d52171
--- 1/kernel/memremap.c
--- 2/kernel/memremap.c
+++ b/kernel/memremap.c
@@@ -1,47 -1,21 +1,21 @@@
   /* SPDX-License-Identifier: GPL-2.0 */
   /* Copyright(c) 2015 Intel Corporation. All rights reserved. */
- #include <linux/radix-tree.h>
   #include <linux/device.h>
- #include <linux/types.h>
- #include <linux/pfn_t.h>
   #include <linux/io.h>
   #include <linux/kasan.h>
- #include <linux/mm.h>
   #include <linux/memory_hotplug.h>
+ #include <linux/mm.h>
+ #include <linux/pfn_t.h>
   #include <linux/swap.h>
   #include <linux/swapops.h>
+ #include <linux/types.h>
   #include <linux/wait_bit.h>
+ #include <linux/xarray.h>
   
- static DEFINE_MUTEX(pgmap_lock);
- static RADIX_TREE(pgmap_radix, GFP_KERNEL);
+ static DEFINE_XARRAY(pgmap_array);
   #define SECTION_MASK ~((1UL << PA_SECTION_SHIFT) - 1)
   #define SECTION_SIZE (1UL << PA_SECTION_SHIFT)
   
- static unsigned long order_at(struct resource *res, unsigned long pgoff)
- {
-       unsigned long phys_pgoff = PHYS_PFN(res->start) + pgoff;
-       unsigned long nr_pages, mask;
- 
-       nr_pages = PHYS_PFN(resource_size(res));
-       if (nr_pages == pgoff)
-               return ULONG_MAX;
- 
-       /*
-        * What is the largest aligned power-of-2 range available from
-        * this resource pgoff to the end of the resource range,
-        * considering the alignment of the current pgoff?
-        */
-       mask = phys_pgoff | rounddown_pow_of_two(nr_pages - pgoff);
-       if (!mask)
-               return ULONG_MAX;
- 
-       return find_first_bit(&mask, BITS_PER_LONG);
- }
- 
- #define foreach_order_pgoff(res, order, pgoff) \
-       for (pgoff = 0, order = order_at((res), pgoff); order < ULONG_MAX; \
-                       pgoff += 1UL << order, order = order_at((res), pgoff))
- 
   #if IS_ENABLED(CONFIG_DEVICE_PRIVATE)
   vm_fault_t device_private_entry_fault(struct vm_area_struct *vma,
                        unsigned long addr,
@@@ -70,18 -44,10 +44,10 @@@
   EXPORT_SYMBOL(device_private_entry_fault);
   #endif /* CONFIG_DEVICE_PRIVATE */
   
- static void pgmap_radix_release(struct resource *res, unsigned long end_pgoff)
+ static void pgmap_array_delete(struct resource *res)
   {
-       unsigned long pgoff, order;
- 
-       mutex_lock(&pgmap_lock);
-       foreach_order_pgoff(res, order, pgoff) {
-               if (pgoff >= end_pgoff)
-                       break;
-               radix_tree_delete(&pgmap_radix, PHYS_PFN(res->start) + pgoff);
-       }
-       mutex_unlock(&pgmap_lock);
- 
+       xa_store_range(&pgmap_array, PHYS_PFN(res->start), PHYS_PFN(res->end),
+                       NULL, GFP_KERNEL);
         synchronize_rcu();
   }
   
@@@ -142,7 -108,7 +108,7 @@@ static void devm_memremap_pages_release
         mem_hotplug_done();
   
         untrack_pfn(NULL, PHYS_PFN(align_start), align_size);
-       pgmap_radix_release(res, -1);
+       pgmap_array_delete(res);
         dev_WARN_ONCE(dev, pgmap->altmap.alloc,
                       "%s: failed to free all reserved pages\n", __func__);
   }
@@@ -175,10 -141,10 +141,9 @@@ void *devm_memremap_pages(struct devic
         struct vmem_altmap *altmap = pgmap->altmap_valid ?
                         &pgmap->altmap : NULL;
         struct resource *res = &pgmap->res;
- -      unsigned long pfn;
+ +      struct dev_pagemap *conflict_pgmap;
         pgprot_t pgprot = PAGE_KERNEL;
-       unsigned long pgoff, order;
         int error, nid, is_ram;
- -      struct dev_pagemap *conflict_pgmap;
   
         align_start = res->start & ~(SECTION_SIZE - 1);
         align_size = ALIGN(res->start + resource_size(res), SECTION_SIZE)
@@@ -216,20 -182,10 +181,10 @@@
   
         pgmap->dev = dev;
   
-       mutex_lock(&pgmap_lock);
-       error = 0;
- 
-       foreach_order_pgoff(res, order, pgoff) {
-               error = __radix_tree_insert(&pgmap_radix,
-                               PHYS_PFN(res->start) + pgoff, order, pgmap);
-               if (error) {
-                       dev_err(dev, "%s: failed: %d\n", __func__, error);
-                       break;
-               }
-       }
-       mutex_unlock(&pgmap_lock);
+       error = xa_err(xa_store_range(&pgmap_array, PHYS_PFN(res->start),
+                               PHYS_PFN(res->end), pgmap, GFP_KERNEL));
         if (error)
-               goto err_radix;
+               goto err_array;
   
         nid = dev_to_node(dev);
         if (nid < 0)
@@@ -256,14 -212,19 +211,14 @@@
         if (error)
                 goto err_add_memory;
   
- -      for_each_device_pfn(pfn, pgmap) {
- -              struct page *page = pfn_to_page(pfn);
- -
- -              /*
- -               * ZONE_DEVICE pages union ->lru with a ->pgmap back
- -               * pointer.  It is a bug if a ZONE_DEVICE page is ever
- -               * freed or placed on a driver-private list.  Seed the
- -               * storage with LIST_POISON* values.
- -               */
- -              list_del(&page->lru);
- -              page->pgmap = pgmap;
- -              percpu_ref_get(pgmap->ref);
- -      }
+ +      /*
+ +       * Initialization of the pages has been deferred until now in order
+ +       * to allow us to do the work while not holding the hotplug lock.
+ +       */
+ +      memmap_init_zone_device(&NODE_DATA(nid)->node_zones[ZONE_DEVICE],
+ +                              align_start >> PAGE_SHIFT,
+ +                              align_size >> PAGE_SHIFT, pgmap);
+ +      percpu_ref_get_many(pgmap->ref, pfn_end(pgmap) - pfn_first(pgmap));
   
         devm_add_action(dev, devm_memremap_pages_release, pgmap);
   
@@@ -274,8 -235,8 +229,8 @@@
    err_kasan:
         untrack_pfn(NULL, PHYS_PFN(align_start), align_size);
    err_pfn_remap:
-  err_radix:
-       pgmap_radix_release(res, pgoff);
+       pgmap_array_delete(res);
+  err_array:
         return ERR_PTR(error);
   }
   EXPORT_SYMBOL(devm_memremap_pages);
@@@ -315,7 -276,7 +270,7 @@@ struct dev_pagemap *get_dev_pagemap(uns
   
         /* fall back to slow path lookup */
         rcu_read_lock();
-       pgmap = radix_tree_lookup(&pgmap_radix, PHYS_PFN(phys));
+       pgmap = xa_load(&pgmap_array, PHYS_PFN(phys));
         if (pgmap && !percpu_ref_tryget_live(pgmap->ref))
                 pgmap = NULL;
         rcu_read_unlock();
diff --combined lib/Kconfig

index d82f20609939552b6708b15d8969819d65cb13f7,a9965f4af4dd391cce6fb3875bd48332560e3598..d1573a16aa926cc2446b0629e6a5cec573de97d5
--- 1/lib/Kconfig
--- 2/lib/Kconfig
+++ b/lib/Kconfig
@@@ -399,8 -399,11 +399,11 @@@ config INTERVAL_TRE
   
           for more information.
   
- config RADIX_TREE_MULTIORDER
+ config XARRAY_MULTI
         bool
+       help
+         Support entries which occupy multiple consecutive indices in the
+         XArray.
   
   config ASSOCIATIVE_ARRAY
         bool
@@@ -621,6 -624,3 +624,6 @@@ config GENERIC_LIB_CMPDI
   
   config GENERIC_LIB_UCMPDI2
         bool
+ +
+ +config GENERIC_LIB_UMODDI3
+ +      bool
diff --combined lib/Kconfig.debug

index 04adfc3b185ebfdee71081b19563036cc09e214f,091155e12422fdba992f5018b5812ee0a6d2d764..e0ba05e6f6bd151aea1ae5353a13f749ff0848c2
--- 1/lib/Kconfig.debug
--- 2/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@@ -1179,7 -1179,7 +1179,7 @@@ config LOCKDE
         bool
         depends on DEBUG_KERNEL && LOCK_DEBUGGING_SUPPORT
         select STACKTRACE
- -      select FRAME_POINTER if !MIPS && !PPC && !ARM_UNWIND && !S390 && !MICROBLAZE && !ARC && !X86
+ +      select FRAME_POINTER if !MIPS && !PPC && !ARM && !S390 && !MICROBLAZE && !ARC && !X86
         select KALLSYMS
         select KALLSYMS_ALL
   
@@@ -1590,7 -1590,7 +1590,7 @@@ config FAULT_INJECTION_STACKTRACE_FILTE
         depends on FAULT_INJECTION_DEBUG_FS && STACKTRACE_SUPPORT
         depends on !X86_64
         select STACKTRACE
- -      select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE && !ARM_UNWIND && !ARC && !X86
+ +      select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE && !ARM && !ARC && !X86
         help
           Provide stacktrace filter for fault-injection capabilities
   
@@@ -1599,7 -1599,7 +1599,7 @@@ config LATENCYTO
         depends on DEBUG_KERNEL
         depends on STACKTRACE_SUPPORT
         depends on PROC_FS
- -      select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE && !ARM_UNWIND && !ARC && !X86
+ +      select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE && !ARM && !ARC && !X86
         select KALLSYMS
         select KALLSYMS_ALL
         select STACKTRACE
@@@ -1813,6 -1813,9 +1813,9 @@@ config TEST_BITFIEL
   config TEST_UUID
         tristate "Test functions located in the uuid module at runtime"
   
+ config TEST_XARRAY
+       tristate "Test the XArray code at runtime"
+ 
   config TEST_OVERFLOW
         tristate "Test check_*_overflow() functions at runtime"
   
@@@ -1965,14 -1968,6 +1968,14 @@@ config TEST_DEBUG_VIRTUA
   
           If unsure, say N.
   
+ +config TEST_MEMCAT_P
+ +      tristate "Test memcat_p() helper function"
+ +      help
+ +        Test the memcat_p() helper for correctly merging two
+ +        pointer arrays together.
+ +
+ +        If unsure, say N.
+ +
   endif # RUNTIME_TESTING_MENU
   
   config MEMTEST
diff --combined lib/Makefile

index fa3eb1b4c0e3138e99980e136053973ae0316f64,e6f809556af582d139508d85dd137443ccabcb32..3d341f59f756f7af070c8483ce22834f4ee8b94c
--- 1/lib/Makefile
--- 2/lib/Makefile
+++ b/lib/Makefile
@@@ -18,13 -18,13 +18,13 @@@ KCOV_INSTRUMENT_debugobjects.o := 
   KCOV_INSTRUMENT_dynamic_debug.o := n
   
   lib-y := ctype.o string.o vsprintf.o cmdline.o \
-        rbtree.o radix-tree.o timerqueue.o\
+        rbtree.o radix-tree.o timerqueue.o xarray.o \
          idr.o int_sqrt.o extable.o \
          sha1.o chacha20.o irq_regs.o argv_split.o \
          flex_proportions.o ratelimit.o show_mem.o \
          is_single_threaded.o plist.o decompress.o kobject_uevent.o \
          earlycpio.o seq_buf.o siphash.o dec_and_lock.o \
- -       nmi_backtrace.o nodemask.o win_minmax.o
+ +       nmi_backtrace.o nodemask.o win_minmax.o memcat_p.o
   
   lib-$(CONFIG_PRINTK) += dump_stack.o
   lib-$(CONFIG_MMU) += ioremap.o
@@@ -68,10 -68,10 +68,11 @@@ obj-$(CONFIG_TEST_PRINTF) += test_print
   obj-$(CONFIG_TEST_BITMAP) += test_bitmap.o
   obj-$(CONFIG_TEST_BITFIELD) += test_bitfield.o
   obj-$(CONFIG_TEST_UUID) += test_uuid.o
+ obj-$(CONFIG_TEST_XARRAY) += test_xarray.o
   obj-$(CONFIG_TEST_PARMAN) += test_parman.o
   obj-$(CONFIG_TEST_KMOD) += test_kmod.o
   obj-$(CONFIG_TEST_DEBUG_VIRTUAL) += test_debug_virtual.o
+ +obj-$(CONFIG_TEST_MEMCAT_P) += test_memcat_p.o
   
   ifeq ($(CONFIG_DEBUG_KOBJECT),y)
   CFLAGS_kobject.o += -DDEBUG
@@@ -120,6 -120,7 +121,6 @@@ obj-$(CONFIG_ZLIB_INFLATE) += zlib_infl
   obj-$(CONFIG_ZLIB_DEFLATE) += zlib_deflate/
   obj-$(CONFIG_REED_SOLOMON) += reed_solomon/
   obj-$(CONFIG_BCH) += bch.o
- -CFLAGS_bch.o := $(call cc-option,-Wframe-larger-than=4500)
   obj-$(CONFIG_LZO_COMPRESS) += lzo/
   obj-$(CONFIG_LZO_DECOMPRESS) += lzo/
   obj-$(CONFIG_LZ4_COMPRESS) += lz4/
@@@ -271,4 -272,3 +272,4 @@@ obj-$(CONFIG_GENERIC_LIB_LSHRDI3) += ls
   obj-$(CONFIG_GENERIC_LIB_MULDI3) += muldi3.o
   obj-$(CONFIG_GENERIC_LIB_CMPDI2) += cmpdi2.o
   obj-$(CONFIG_GENERIC_LIB_UCMPDI2) += ucmpdi2.o
+ +obj-$(CONFIG_GENERIC_LIB_UMODDI3) += umoddi3.o udivmoddi4.o
diff --combined mm/filemap.c

index 3968da1f7f5a10c373e396f334b1f68ff3b08443,6b36516bc31d85bb79db543e354734306381854e..218d0b2ec82d1534dcb66b4744f886d7d0262d55
--- 1/mm/filemap.c
--- 2/mm/filemap.c
+++ b/mm/filemap.c
@@@ -36,8 -36,6 +36,8 @@@
   #include <linux/cleancache.h>
   #include <linux/shmem_fs.h>
   #include <linux/rmap.h>
+ +#include <linux/delayacct.h>
+ +#include <linux/psi.h>
   #include "internal.h"
   
   #define CREATE_TRACE_POINTS
@@@ -113,60 -111,26 +113,26 @@@
    *   ->tasklist_lock            (memory_failure, collect_procs_ao)
    */
   
- static int page_cache_tree_insert(struct address_space *mapping,
-                                 struct page *page, void **shadowp)
- {
-       struct radix_tree_node *node;
-       void **slot;
-       int error;
- 
-       error = __radix_tree_create(&mapping->i_pages, page->index, 0,
-                                   &node, &slot);
-       if (error)
-               return error;
-       if (*slot) {
-               void *p;
- 
-               p = radix_tree_deref_slot_protected(slot,
-                                                   &mapping->i_pages.xa_lock);
-               if (!radix_tree_exceptional_entry(p))
-                       return -EEXIST;
- 
-               mapping->nrexceptional--;
-               if (shadowp)
-                       *shadowp = p;
-       }
-       __radix_tree_replace(&mapping->i_pages, node, slot, page,
-                            workingset_lookup_update(mapping));
-       mapping->nrpages++;
-       return 0;
- }
- 
- static void page_cache_tree_delete(struct address_space *mapping,
+ static void page_cache_delete(struct address_space *mapping,
                                    struct page *page, void *shadow)
   {
-       int i, nr;
+       XA_STATE(xas, &mapping->i_pages, page->index);
+       unsigned int nr = 1;
+ 
+       mapping_set_update(&xas, mapping);
   
-       /* hugetlb pages are represented by one entry in the radix tree */
-       nr = PageHuge(page) ? 1 : hpage_nr_pages(page);
+       /* hugetlb pages are represented by a single entry in the xarray */
+       if (!PageHuge(page)) {
+               xas_set_order(&xas, page->index, compound_order(page));
+               nr = 1U << compound_order(page);
+       }
   
         VM_BUG_ON_PAGE(!PageLocked(page), page);
         VM_BUG_ON_PAGE(PageTail(page), page);
         VM_BUG_ON_PAGE(nr != 1 && shadow, page);
   
-       for (i = 0; i < nr; i++) {
-               struct radix_tree_node *node;
-               void **slot;
- 
-               __radix_tree_lookup(&mapping->i_pages, page->index + i,
-                                   &node, &slot);
- 
-               VM_BUG_ON_PAGE(!node && nr != 1, page);
- 
-               radix_tree_clear_tags(&mapping->i_pages, node, slot);
-               __radix_tree_replace(&mapping->i_pages, node, slot, shadow,
-                               workingset_lookup_update(mapping));
-       }
+       xas_store(&xas, shadow);
+       xas_init_marks(&xas);
   
         page->mapping = NULL;
         /* Leave page->index set: truncation lookup relies upon it */
@@@ -265,7 -229,7 +231,7 @@@ void __delete_from_page_cache(struct pa
         trace_mm_filemap_delete_from_page_cache(page);
   
         unaccount_page_cache_page(mapping, page);
-       page_cache_tree_delete(mapping, page, shadow);
+       page_cache_delete(mapping, page, shadow);
   }
   
   static void page_cache_free_page(struct address_space *mapping,
@@@ -308,7 -272,7 +274,7 @@@ void delete_from_page_cache(struct pag
   EXPORT_SYMBOL(delete_from_page_cache);
   
   /*
-  * page_cache_tree_delete_batch - delete several pages from page cache
+  * page_cache_delete_batch - delete several pages from page cache
    * @mapping: the mapping to which pages belong
    * @pvec: pagevec with pages to delete
    *
@@@ -321,24 -285,19 +287,19 @@@
    *
    * The function expects the i_pages lock to be held.
    */
- static void
- page_cache_tree_delete_batch(struct address_space *mapping,
+ static void page_cache_delete_batch(struct address_space *mapping,
                              struct pagevec *pvec)
   {
-       struct radix_tree_iter iter;
-       void **slot;
+       XA_STATE(xas, &mapping->i_pages, pvec->pages[0]->index);
         int total_pages = 0;
         int i = 0, tail_pages = 0;
         struct page *page;
-       pgoff_t start;
   
-       start = pvec->pages[0]->index;
-       radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, start) {
+       mapping_set_update(&xas, mapping);
+       xas_for_each(&xas, page, ULONG_MAX) {
                 if (i >= pagevec_count(pvec) && !tail_pages)
                         break;
-               page = radix_tree_deref_slot_protected(slot,
-                                                      &mapping->i_pages.xa_lock);
-               if (radix_tree_exceptional_entry(page))
+               if (xa_is_value(page))
                         continue;
                 if (!tail_pages) {
                         /*
@@@ -346,8 -305,11 +307,11 @@@
                          * have our pages locked so they are protected from
                          * being removed.
                          */
-                       if (page != pvec->pages[i])
+                       if (page != pvec->pages[i]) {
+                               VM_BUG_ON_PAGE(page->index >
+                                               pvec->pages[i]->index, page);
                                 continue;
+                       }
                         WARN_ON_ONCE(!PageLocked(page));
                         if (PageTransHuge(page) && !PageHuge(page))
                                 tail_pages = HPAGE_PMD_NR - 1;
@@@ -358,11 -320,11 +322,11 @@@
                          */
                         i++;
                 } else {
+                       VM_BUG_ON_PAGE(page->index + HPAGE_PMD_NR - tail_pages
+                                       != pvec->pages[i]->index, page);
                         tail_pages--;
                 }
-               radix_tree_clear_tags(&mapping->i_pages, iter.node, slot);
-               __radix_tree_replace(&mapping->i_pages, iter.node, slot, NULL,
-                               workingset_lookup_update(mapping));
+               xas_store(&xas, NULL);
                 total_pages++;
         }
         mapping->nrpages -= total_pages;
@@@ -383,7 -345,7 +347,7 @@@ void delete_from_page_cache_batch(struc
   
                 unaccount_page_cache_page(mapping, pvec->pages[i]);
         }
-       page_cache_tree_delete_batch(mapping, pvec);
+       page_cache_delete_batch(mapping, pvec);
         xa_unlock_irqrestore(&mapping->i_pages, flags);
   
         for (i = 0; i < pagevec_count(pvec); i++)
@@@ -493,20 -455,31 +457,31 @@@ EXPORT_SYMBOL(filemap_flush)
   bool filemap_range_has_page(struct address_space *mapping,
                            loff_t start_byte, loff_t end_byte)
   {
-       pgoff_t index = start_byte >> PAGE_SHIFT;
-       pgoff_t end = end_byte >> PAGE_SHIFT;
         struct page *page;
+       XA_STATE(xas, &mapping->i_pages, start_byte >> PAGE_SHIFT);
+       pgoff_t max = end_byte >> PAGE_SHIFT;
   
         if (end_byte < start_byte)
                 return false;
   
-       if (mapping->nrpages == 0)
-               return false;
+       rcu_read_lock();
+       for (;;) {
+               page = xas_find(&xas, max);
+               if (xas_retry(&xas, page))
+                       continue;
+               /* Shadow entries don't count */
+               if (xa_is_value(page))
+                       continue;
+               /*
+                * We don't need to try to pin this page; we're about to
+                * release the RCU lock anyway.  It is enough to know that
+                * there was a page here recently.
+                */
+               break;
+       }
+       rcu_read_unlock();
   
-       if (!find_get_pages_range(mapping, &index, end, 1, &page))
-               return false;
-       put_page(page);
-       return true;
+       return page != NULL;
   }
   EXPORT_SYMBOL(filemap_range_has_page);
   
@@@ -777,51 -750,44 +752,44 @@@ EXPORT_SYMBOL(file_write_and_wait_range
    * locked.  This function does not add the new page to the LRU, the
    * caller must do that.
    *
-  * The remove + add is atomic.  The only way this function can fail is
-  * memory allocation failure.
+  * The remove + add is atomic.  This function cannot fail.
    */
   int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
   {
-       int error;
+       struct address_space *mapping = old->mapping;
+       void (*freepage)(struct page *) = mapping->a_ops->freepage;
+       pgoff_t offset = old->index;
+       XA_STATE(xas, &mapping->i_pages, offset);
+       unsigned long flags;
   
         VM_BUG_ON_PAGE(!PageLocked(old), old);
         VM_BUG_ON_PAGE(!PageLocked(new), new);
         VM_BUG_ON_PAGE(new->mapping, new);
   
-       error = radix_tree_preload(gfp_mask & GFP_RECLAIM_MASK);
-       if (!error) {
-               struct address_space *mapping = old->mapping;
-               void (*freepage)(struct page *);
-               unsigned long flags;
- 
-               pgoff_t offset = old->index;
-               freepage = mapping->a_ops->freepage;
- 
-               get_page(new);
-               new->mapping = mapping;
-               new->index = offset;
+       get_page(new);
+       new->mapping = mapping;
+       new->index = offset;
   
-               xa_lock_irqsave(&mapping->i_pages, flags);
-               __delete_from_page_cache(old, NULL);
-               error = page_cache_tree_insert(mapping, new, NULL);
-               BUG_ON(error);
+       xas_lock_irqsave(&xas, flags);
+       xas_store(&xas, new);
   
-               /*
-                * hugetlb pages do not participate in page cache accounting.
-                */
-               if (!PageHuge(new))
-                       __inc_node_page_state(new, NR_FILE_PAGES);
-               if (PageSwapBacked(new))
-                       __inc_node_page_state(new, NR_SHMEM);
-               xa_unlock_irqrestore(&mapping->i_pages, flags);
-               mem_cgroup_migrate(old, new);
-               radix_tree_preload_end();
-               if (freepage)
-                       freepage(old);
-               put_page(old);
-       }
+       old->mapping = NULL;
+       /* hugetlb pages do not participate in page cache accounting. */
+       if (!PageHuge(old))
+               __dec_node_page_state(new, NR_FILE_PAGES);
+       if (!PageHuge(new))
+               __inc_node_page_state(new, NR_FILE_PAGES);
+       if (PageSwapBacked(old))
+               __dec_node_page_state(new, NR_SHMEM);
+       if (PageSwapBacked(new))
+               __inc_node_page_state(new, NR_SHMEM);
+       xas_unlock_irqrestore(&xas, flags);
+       mem_cgroup_migrate(old, new);
+       if (freepage)
+               freepage(old);
+       put_page(old);
   
-       return error;
+       return 0;
   }
   EXPORT_SYMBOL_GPL(replace_page_cache_page);
   
@@@ -830,12 -796,15 +798,15 @@@ static int __add_to_page_cache_locked(s
                                       pgoff_t offset, gfp_t gfp_mask,
                                       void **shadowp)
   {
+       XA_STATE(xas, &mapping->i_pages, offset);
         int huge = PageHuge(page);
         struct mem_cgroup *memcg;
         int error;
+       void *old;
   
         VM_BUG_ON_PAGE(!PageLocked(page), page);
         VM_BUG_ON_PAGE(PageSwapBacked(page), page);
+       mapping_set_update(&xas, mapping);
   
         if (!huge) {
                 error = mem_cgroup_try_charge(page, current->mm,
@@@ -844,39 -813,47 +815,47 @@@
                         return error;
         }
   
-       error = radix_tree_maybe_preload(gfp_mask & GFP_RECLAIM_MASK);
-       if (error) {
-               if (!huge)
-                       mem_cgroup_cancel_charge(page, memcg, false);
-               return error;
-       }
- 
         get_page(page);
         page->mapping = mapping;
         page->index = offset;
   
-       xa_lock_irq(&mapping->i_pages);
-       error = page_cache_tree_insert(mapping, page, shadowp);
-       radix_tree_preload_end();
-       if (unlikely(error))
-               goto err_insert;
+       do {
+               xas_lock_irq(&xas);
+               old = xas_load(&xas);
+               if (old && !xa_is_value(old))
+                       xas_set_err(&xas, -EEXIST);
+               xas_store(&xas, page);
+               if (xas_error(&xas))
+                       goto unlock;
+ 
+               if (xa_is_value(old)) {
+                       mapping->nrexceptional--;
+                       if (shadowp)
+                               *shadowp = old;
+               }
+               mapping->nrpages++;
+ 
+               /* hugetlb pages do not participate in page cache accounting */
+               if (!huge)
+                       __inc_node_page_state(page, NR_FILE_PAGES);
+ unlock:
+               xas_unlock_irq(&xas);
+       } while (xas_nomem(&xas, gfp_mask & GFP_RECLAIM_MASK));
+ 
+       if (xas_error(&xas))
+               goto error;
   
-       /* hugetlb pages do not participate in page cache accounting. */
-       if (!huge)
-               __inc_node_page_state(page, NR_FILE_PAGES);
-       xa_unlock_irq(&mapping->i_pages);
         if (!huge)
                 mem_cgroup_commit_charge(page, memcg, false, false);
         trace_mm_filemap_add_to_page_cache(page);
         return 0;
- err_insert:
+ error:
         page->mapping = NULL;
         /* Leave page->index set: truncation relies upon it */
-       xa_unlock_irq(&mapping->i_pages);
         if (!huge)
                 mem_cgroup_cancel_charge(page, memcg, false);
         put_page(page);
-       return error;
+       return xas_error(&xas);
   }
   
   /**
@@@ -917,9 -894,12 +896,9 @@@ int add_to_page_cache_lru(struct page *
                  * data from the working set, only to cache data that will
                  * get overwritten with something else, is a waste of memory.
                  */
- -              if (!(gfp_mask & __GFP_WRITE) &&
- -                  shadow && workingset_refault(shadow)) {
- -                      SetPageActive(page);
- -                      workingset_activation(page);
- -              } else
- -                      ClearPageActive(page);
+ +              WARN_ON_ONCE(PageActive(page));
+ +              if (!(gfp_mask & __GFP_WRITE) && shadow)
+ +                      workingset_refault(page, shadow);
                 lru_cache_add(page);
         }
         return ret;
@@@ -1075,18 -1055,8 +1054,18 @@@ static inline int wait_on_page_bit_comm
   {
         struct wait_page_queue wait_page;
         wait_queue_entry_t *wait = &wait_page.wait;
+ +      bool thrashing = false;
+ +      unsigned long pflags;
         int ret = 0;
   
+ +      if (bit_nr == PG_locked &&
+ +          !PageUptodate(page) && PageWorkingset(page)) {
+ +              if (!PageSwapBacked(page))
+ +                      delayacct_thrashing_start();
+ +              psi_memstall_enter(&pflags);
+ +              thrashing = true;
+ +      }
+ +
         init_wait(wait);
         wait->flags = lock ? WQ_FLAG_EXCLUSIVE : 0;
         wait->func = wake_page_function;
@@@ -1125,12 -1095,6 +1104,12 @@@
   
         finish_wait(q, wait);
   
+ +      if (thrashing) {
+ +              if (!PageSwapBacked(page))
+ +                      delayacct_thrashing_end();
+ +              psi_memstall_leave(&pflags);
+ +      }
+ +
         /*
          * A signal could leave PageWaiters set. Clearing it here if
          * !waitqueue_active would be possible (by open-coding finish_wait),
@@@ -1341,86 -1305,76 +1320,76 @@@ int __lock_page_or_retry(struct page *p
   }
   
   /**
-  * page_cache_next_hole - find the next hole (not-present entry)
-  * @mapping: mapping
-  * @index: index
-  * @max_scan: maximum range to search
-  *
-  * Search the set [index, min(index+max_scan-1, MAX_INDEX)] for the
-  * lowest indexed hole.
-  *
-  * Returns: the index of the hole if found, otherwise returns an index
-  * outside of the set specified (in which case 'return - index >=
-  * max_scan' will be true). In rare cases of index wrap-around, 0 will
-  * be returned.
-  *
-  * page_cache_next_hole may be called under rcu_read_lock. However,
-  * like radix_tree_gang_lookup, this will not atomically search a
-  * snapshot of the tree at a single point in time. For example, if a
-  * hole is created at index 5, then subsequently a hole is created at
-  * index 10, page_cache_next_hole covering both indexes may return 10
-  * if called under rcu_read_lock.
+  * page_cache_next_miss() - Find the next gap in the page cache.
+  * @mapping: Mapping.
+  * @index: Index.
+  * @max_scan: Maximum range to search.
+  *
+  * Search the range [index, min(index + max_scan - 1, ULONG_MAX)] for the
+  * gap with the lowest index.
+  *
+  * This function may be called under the rcu_read_lock.  However, this will
+  * not atomically search a snapshot of the cache at a single point in time.
+  * For example, if a gap is created at index 5, then subsequently a gap is
+  * created at index 10, page_cache_next_miss covering both indices may
+  * return 10 if called under the rcu_read_lock.
+  *
+  * Return: The index of the gap if found, otherwise an index outside the
+  * range specified (in which case 'return - index >= max_scan' will be true).
+  * In the rare case of index wrap-around, 0 will be returned.
    */
- pgoff_t page_cache_next_hole(struct address_space *mapping,
+ pgoff_t page_cache_next_miss(struct address_space *mapping,
                              pgoff_t index, unsigned long max_scan)
   {
-       unsigned long i;
+       XA_STATE(xas, &mapping->i_pages, index);
   
-       for (i = 0; i < max_scan; i++) {
-               struct page *page;
- 
-               page = radix_tree_lookup(&mapping->i_pages, index);
-               if (!page || radix_tree_exceptional_entry(page))
+       while (max_scan--) {
+               void *entry = xas_next(&xas);
+               if (!entry || xa_is_value(entry))
                         break;
-               index++;
-               if (index == 0)
+               if (xas.xa_index == 0)
                         break;
         }
   
-       return index;
+       return xas.xa_index;
   }
- EXPORT_SYMBOL(page_cache_next_hole);
+ EXPORT_SYMBOL(page_cache_next_miss);
   
   /**
-  * page_cache_prev_hole - find the prev hole (not-present entry)
-  * @mapping: mapping
-  * @index: index
-  * @max_scan: maximum range to search
-  *
-  * Search backwards in the range [max(index-max_scan+1, 0), index] for
-  * the first hole.
-  *
-  * Returns: the index of the hole if found, otherwise returns an index
-  * outside of the set specified (in which case 'index - return >=
-  * max_scan' will be true). In rare cases of wrap-around, ULONG_MAX
-  * will be returned.
-  *
-  * page_cache_prev_hole may be called under rcu_read_lock. However,
-  * like radix_tree_gang_lookup, this will not atomically search a
-  * snapshot of the tree at a single point in time. For example, if a
-  * hole is created at index 10, then subsequently a hole is created at
-  * index 5, page_cache_prev_hole covering both indexes may return 5 if
-  * called under rcu_read_lock.
+  * page_cache_prev_miss() - Find the next gap in the page cache.
+  * @mapping: Mapping.
+  * @index: Index.
+  * @max_scan: Maximum range to search.
+  *
+  * Search the range [max(index - max_scan + 1, 0), index] for the
+  * gap with the highest index.
+  *
+  * This function may be called under the rcu_read_lock.  However, this will
+  * not atomically search a snapshot of the cache at a single point in time.
+  * For example, if a gap is created at index 10, then subsequently a gap is
+  * created at index 5, page_cache_prev_miss() covering both indices may
+  * return 5 if called under the rcu_read_lock.
+  *
+  * Return: The index of the gap if found, otherwise an index outside the
+  * range specified (in which case 'index - return >= max_scan' will be true).
+  * In the rare case of wrap-around, ULONG_MAX will be returned.
    */
- pgoff_t page_cache_prev_hole(struct address_space *mapping,
+ pgoff_t page_cache_prev_miss(struct address_space *mapping,
                              pgoff_t index, unsigned long max_scan)
   {
-       unsigned long i;
+       XA_STATE(xas, &mapping->i_pages, index);
   
-       for (i = 0; i < max_scan; i++) {
-               struct page *page;
- 
-               page = radix_tree_lookup(&mapping->i_pages, index);
-               if (!page || radix_tree_exceptional_entry(page))
+       while (max_scan--) {
+               void *entry = xas_prev(&xas);
+               if (!entry || xa_is_value(entry))
                         break;
-               index--;
-               if (index == ULONG_MAX)
+               if (xas.xa_index == ULONG_MAX)
                         break;
         }
   
-       return index;
+       return xas.xa_index;
   }
- EXPORT_SYMBOL(page_cache_prev_hole);
+ EXPORT_SYMBOL(page_cache_prev_miss);
   
   /**
    * find_get_entry - find and get a page cache entry
@@@ -1437,47 -1391,40 +1406,40 @@@
    */
   struct page *find_get_entry(struct address_space *mapping, pgoff_t offset)
   {
-       void **pagep;
+       XA_STATE(xas, &mapping->i_pages, offset);
         struct page *head, *page;
   
         rcu_read_lock();
   repeat:
-       page = NULL;
-       pagep = radix_tree_lookup_slot(&mapping->i_pages, offset);
-       if (pagep) {
-               page = radix_tree_deref_slot(pagep);
-               if (unlikely(!page))
-                       goto out;
-               if (radix_tree_exception(page)) {
-                       if (radix_tree_deref_retry(page))
-                               goto repeat;
-                       /*
-                        * A shadow entry of a recently evicted page,
-                        * or a swap entry from shmem/tmpfs.  Return
-                        * it without attempting to raise page count.
-                        */
-                       goto out;
-               }
+       xas_reset(&xas);
+       page = xas_load(&xas);
+       if (xas_retry(&xas, page))
+               goto repeat;
+       /*
+        * A shadow entry of a recently evicted page, or a swap entry from
+        * shmem/tmpfs.  Return it without attempting to raise page count.
+        */
+       if (!page || xa_is_value(page))
+               goto out;
   
-               head = compound_head(page);
-               if (!page_cache_get_speculative(head))
-                       goto repeat;
+       head = compound_head(page);
+       if (!page_cache_get_speculative(head))
+               goto repeat;
   
-               /* The page was split under us? */
-               if (compound_head(page) != head) {
-                       put_page(head);
-                       goto repeat;
-               }
+       /* The page was split under us? */
+       if (compound_head(page) != head) {
+               put_page(head);
+               goto repeat;
+       }
   
-               /*
-                * Has the page moved?
-                * This is part of the lockless pagecache protocol. See
-                * include/linux/pagemap.h for details.
-                */
-               if (unlikely(page != *pagep)) {
-                       put_page(head);
-                       goto repeat;
-               }
+       /*
+        * Has the page moved?
+        * This is part of the lockless pagecache protocol. See
+        * include/linux/pagemap.h for details.
+        */
+       if (unlikely(page != xas_reload(&xas))) {
+               put_page(head);
+               goto repeat;
         }
   out:
         rcu_read_unlock();
@@@ -1508,7 -1455,7 +1470,7 @@@ struct page *find_lock_entry(struct add
   
   repeat:
         page = find_get_entry(mapping, offset);
-       if (page && !radix_tree_exception(page)) {
+       if (page && !xa_is_value(page)) {
                 lock_page(page);
                 /* Has the page been truncated? */
                 if (unlikely(page_mapping(page) != mapping)) {
@@@ -1554,7 -1501,7 +1516,7 @@@ struct page *pagecache_get_page(struct 
   
   repeat:
         page = find_get_entry(mapping, offset);
-       if (radix_tree_exceptional_entry(page))
+       if (xa_is_value(page))
                 page = NULL;
         if (!page)
                 goto no_page;
@@@ -1640,53 -1587,48 +1602,48 @@@ unsigned find_get_entries(struct addres
                           pgoff_t start, unsigned int nr_entries,
                           struct page **entries, pgoff_t *indices)
   {
-       void **slot;
+       XA_STATE(xas, &mapping->i_pages, start);
+       struct page *page;
         unsigned int ret = 0;
-       struct radix_tree_iter iter;
   
         if (!nr_entries)
                 return 0;
   
         rcu_read_lock();
-       radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, start) {
-               struct page *head, *page;
- repeat:
-               page = radix_tree_deref_slot(slot);
-               if (unlikely(!page))
+       xas_for_each(&xas, page, ULONG_MAX) {
+               struct page *head;
+               if (xas_retry(&xas, page))
                         continue;
-               if (radix_tree_exception(page)) {
-                       if (radix_tree_deref_retry(page)) {
-                               slot = radix_tree_iter_retry(&iter);
-                               continue;
-                       }
-                       /*
-                        * A shadow entry of a recently evicted page, a swap
-                        * entry from shmem/tmpfs or a DAX entry.  Return it
-                        * without attempting to raise page count.
-                        */
+               /*
+                * A shadow entry of a recently evicted page, a swap
+                * entry from shmem/tmpfs or a DAX entry.  Return it
+                * without attempting to raise page count.
+                */
+               if (xa_is_value(page))
                         goto export;
-               }
   
                 head = compound_head(page);
                 if (!page_cache_get_speculative(head))
-                       goto repeat;
+                       goto retry;
   
                 /* The page was split under us? */
-               if (compound_head(page) != head) {
-                       put_page(head);
-                       goto repeat;
-               }
+               if (compound_head(page) != head)
+                       goto put_page;
   
                 /* Has the page moved? */
-               if (unlikely(page != *slot)) {
-                       put_page(head);
-                       goto repeat;
-               }
+               if (unlikely(page != xas_reload(&xas)))
+                       goto put_page;
+ 
   export:
-               indices[ret] = iter.index;
+               indices[ret] = xas.xa_index;
                 entries[ret] = page;
                 if (++ret == nr_entries)
                         break;
+               continue;
+ put_page:
+               put_page(head);
+ retry:
+               xas_reset(&xas);
         }
         rcu_read_unlock();
         return ret;
@@@ -1717,64 -1659,50 +1674,50 @@@ unsigned find_get_pages_range(struct ad
                               pgoff_t end, unsigned int nr_pages,
                               struct page **pages)
   {
-       struct radix_tree_iter iter;
-       void **slot;
+       XA_STATE(xas, &mapping->i_pages, *start);
+       struct page *page;
         unsigned ret = 0;
   
         if (unlikely(!nr_pages))
                 return 0;
   
         rcu_read_lock();
-       radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, *start) {
-               struct page *head, *page;
- 
-               if (iter.index > end)
-                       break;
- repeat:
-               page = radix_tree_deref_slot(slot);
-               if (unlikely(!page))
+       xas_for_each(&xas, page, end) {
+               struct page *head;
+               if (xas_retry(&xas, page))
                         continue;
- 
-               if (radix_tree_exception(page)) {
-                       if (radix_tree_deref_retry(page)) {
-                               slot = radix_tree_iter_retry(&iter);
-                               continue;
-                       }
-                       /*
-                        * A shadow entry of a recently evicted page,
-                        * or a swap entry from shmem/tmpfs.  Skip
-                        * over it.
-                        */
+               /* Skip over shadow, swap and DAX entries */
+               if (xa_is_value(page))
                         continue;
-               }
   
                 head = compound_head(page);
                 if (!page_cache_get_speculative(head))
-                       goto repeat;
+                       goto retry;
   
                 /* The page was split under us? */
-               if (compound_head(page) != head) {
-                       put_page(head);
-                       goto repeat;
-               }
+               if (compound_head(page) != head)
+                       goto put_page;
   
                 /* Has the page moved? */
-               if (unlikely(page != *slot)) {
-                       put_page(head);
-                       goto repeat;
-               }
+               if (unlikely(page != xas_reload(&xas)))
+                       goto put_page;
   
                 pages[ret] = page;
                 if (++ret == nr_pages) {
-                       *start = pages[ret - 1]->index + 1;
+                       *start = page->index + 1;
                         goto out;
                 }
+               continue;
+ put_page:
+               put_page(head);
+ retry:
+               xas_reset(&xas);
         }
   
         /*
          * We come here when there is no page beyond @end. We take care to not
          * overflow the index @start as it confuses some of the callers. This
-        * breaks the iteration when there is page at index -1 but that is
+        * breaks the iteration when there is a page at index -1 but that is
          * already broken anyway.
          */
         if (end == (pgoff_t)-1)
@@@ -1802,57 -1730,43 +1745,43 @@@ out
   unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
                                unsigned int nr_pages, struct page **pages)
   {
-       struct radix_tree_iter iter;
-       void **slot;
+       XA_STATE(xas, &mapping->i_pages, index);
+       struct page *page;
         unsigned int ret = 0;
   
         if (unlikely(!nr_pages))
                 return 0;
   
         rcu_read_lock();
-       radix_tree_for_each_contig(slot, &mapping->i_pages, &iter, index) {
-               struct page *head, *page;
- repeat:
-               page = radix_tree_deref_slot(slot);
-               /* The hole, there no reason to continue */
-               if (unlikely(!page))
-                       break;
- 
-               if (radix_tree_exception(page)) {
-                       if (radix_tree_deref_retry(page)) {
-                               slot = radix_tree_iter_retry(&iter);
-                               continue;
-                       }
-                       /*
-                        * A shadow entry of a recently evicted page,
-                        * or a swap entry from shmem/tmpfs.  Stop
-                        * looking for contiguous pages.
-                        */
+       for (page = xas_load(&xas); page; page = xas_next(&xas)) {
+               struct page *head;
+               if (xas_retry(&xas, page))
+                       continue;
+               /*
+                * If the entry has been swapped out, we can stop looking.
+                * No current caller is looking for DAX entries.
+                */
+               if (xa_is_value(page))
                         break;
-               }
   
                 head = compound_head(page);
                 if (!page_cache_get_speculative(head))
-                       goto repeat;
+                       goto retry;
   
                 /* The page was split under us? */
-               if (compound_head(page) != head) {
-                       put_page(head);
-                       goto repeat;
-               }
+               if (compound_head(page) != head)
+                       goto put_page;
   
                 /* Has the page moved? */
-               if (unlikely(page != *slot)) {
-                       put_page(head);
-                       goto repeat;
-               }
+               if (unlikely(page != xas_reload(&xas)))
+                       goto put_page;
   
                 /*
                  * must check mapping and index after taking the ref.
                  * otherwise we can get both false positives and false
                  * negatives, which is just confusing to the caller.
                  */
-               if (page->mapping == NULL || page_to_pgoff(page) != iter.index) {
+               if (!page->mapping || page_to_pgoff(page) != xas.xa_index) {
                         put_page(page);
                         break;
                 }
@@@ -1860,6 -1774,11 +1789,11 @@@
                 pages[ret] = page;
                 if (++ret == nr_pages)
                         break;
+               continue;
+ put_page:
+               put_page(head);
+ retry:
+               xas_reset(&xas);
         }
         rcu_read_unlock();
         return ret;
@@@ -1879,74 -1798,58 +1813,58 @@@ EXPORT_SYMBOL(find_get_pages_contig)
    * @tag.   We update @index to index the next page for the traversal.
    */
   unsigned find_get_pages_range_tag(struct address_space *mapping, pgoff_t *index,
-                       pgoff_t end, int tag, unsigned int nr_pages,
+                       pgoff_t end, xa_mark_t tag, unsigned int nr_pages,
                         struct page **pages)
   {
-       struct radix_tree_iter iter;
-       void **slot;
+       XA_STATE(xas, &mapping->i_pages, *index);
+       struct page *page;
         unsigned ret = 0;
   
         if (unlikely(!nr_pages))
                 return 0;
   
         rcu_read_lock();
-       radix_tree_for_each_tagged(slot, &mapping->i_pages, &iter, *index, tag) {
-               struct page *head, *page;
- 
-               if (iter.index > end)
-                       break;
- repeat:
-               page = radix_tree_deref_slot(slot);
-               if (unlikely(!page))
+       xas_for_each_marked(&xas, page, end, tag) {
+               struct page *head;
+               if (xas_retry(&xas, page))
                         continue;
- 
-               if (radix_tree_exception(page)) {
-                       if (radix_tree_deref_retry(page)) {
-                               slot = radix_tree_iter_retry(&iter);
-                               continue;
-                       }
-                       /*
-                        * A shadow entry of a recently evicted page.
-                        *
-                        * Those entries should never be tagged, but
-                        * this tree walk is lockless and the tags are
-                        * looked up in bulk, one radix tree node at a
-                        * time, so there is a sizable window for page
-                        * reclaim to evict a page we saw tagged.
-                        *
-                        * Skip over it.
-                        */
+               /*
+                * Shadow entries should never be tagged, but this iteration
+                * is lockless so there is a window for page reclaim to evict
+                * a page we saw tagged.  Skip over it.
+                */
+               if (xa_is_value(page))
                         continue;
-               }
   
                 head = compound_head(page);
                 if (!page_cache_get_speculative(head))
-                       goto repeat;
+                       goto retry;
   
                 /* The page was split under us? */
-               if (compound_head(page) != head) {
-                       put_page(head);
-                       goto repeat;
-               }
+               if (compound_head(page) != head)
+                       goto put_page;
   
                 /* Has the page moved? */
-               if (unlikely(page != *slot)) {
-                       put_page(head);
-                       goto repeat;
-               }
+               if (unlikely(page != xas_reload(&xas)))
+                       goto put_page;
   
                 pages[ret] = page;
                 if (++ret == nr_pages) {
-                       *index = pages[ret - 1]->index + 1;
+                       *index = page->index + 1;
                         goto out;
                 }
+               continue;
+ put_page:
+               put_page(head);
+ retry:
+               xas_reset(&xas);
         }
   
         /*
-        * We come here when we got at @end. We take care to not overflow the
+        * We come here when we got to @end. We take care to not overflow the
          * index @index as it confuses some of the callers. This breaks the
-        * iteration when there is page at index -1 but that is already broken
-        * anyway.
+        * iteration when there is a page at index -1 but that is already
+        * broken anyway.
          */
         if (end == (pgoff_t)-1)
                 *index = (pgoff_t)-1;
@@@ -1972,57 -1875,51 +1890,51 @@@ EXPORT_SYMBOL(find_get_pages_range_tag)
    * @tag.
    */
   unsigned find_get_entries_tag(struct address_space *mapping, pgoff_t start,
-                       int tag, unsigned int nr_entries,
+                       xa_mark_t tag, unsigned int nr_entries,
                         struct page **entries, pgoff_t *indices)
   {
-       void **slot;
+       XA_STATE(xas, &mapping->i_pages, start);
+       struct page *page;
         unsigned int ret = 0;
-       struct radix_tree_iter iter;
   
         if (!nr_entries)
                 return 0;
   
         rcu_read_lock();
-       radix_tree_for_each_tagged(slot, &mapping->i_pages, &iter, start, tag) {
-               struct page *head, *page;
- repeat:
-               page = radix_tree_deref_slot(slot);
-               if (unlikely(!page))
+       xas_for_each_marked(&xas, page, ULONG_MAX, tag) {
+               struct page *head;
+               if (xas_retry(&xas, page))
                         continue;
-               if (radix_tree_exception(page)) {
-                       if (radix_tree_deref_retry(page)) {
-                               slot = radix_tree_iter_retry(&iter);
-                               continue;
-                       }
- 
-                       /*
-                        * A shadow entry of a recently evicted page, a swap
-                        * entry from shmem/tmpfs or a DAX entry.  Return it
-                        * without attempting to raise page count.
-                        */
+               /*
+                * A shadow entry of a recently evicted page, a swap
+                * entry from shmem/tmpfs or a DAX entry.  Return it
+                * without attempting to raise page count.
+                */
+               if (xa_is_value(page))
                         goto export;
-               }
   
                 head = compound_head(page);
                 if (!page_cache_get_speculative(head))
-                       goto repeat;
+                       goto retry;
   
                 /* The page was split under us? */
-               if (compound_head(page) != head) {
-                       put_page(head);
-                       goto repeat;
-               }
+               if (compound_head(page) != head)
+                       goto put_page;
   
                 /* Has the page moved? */
-               if (unlikely(page != *slot)) {
-                       put_page(head);
-                       goto repeat;
-               }
+               if (unlikely(page != xas_reload(&xas)))
+                       goto put_page;
+ 
   export:
-               indices[ret] = iter.index;
+               indices[ret] = xas.xa_index;
                 entries[ret] = page;
                 if (++ret == nr_entries)
                         break;
+               continue;
+ put_page:
+               put_page(head);
+ retry:
+               xas_reset(&xas);
         }
         rcu_read_unlock();
         return ret;
@@@ -2596,7 -2493,9 +2508,7 @@@ no_cached_page
          * system is low on memory, or a problem occurs while trying
          * to schedule I/O.
          */
- -      if (error == -ENOMEM)
- -              return VM_FAULT_OOM;
- -      return VM_FAULT_SIGBUS;
+ +      return vmf_error(error);
   
   page_not_uptodate:
         /*
@@@ -2626,45 -2525,31 +2538,31 @@@ EXPORT_SYMBOL(filemap_fault)
   void filemap_map_pages(struct vm_fault *vmf,
                 pgoff_t start_pgoff, pgoff_t end_pgoff)
   {
-       struct radix_tree_iter iter;
-       void **slot;
         struct file *file = vmf->vma->vm_file;
         struct address_space *mapping = file->f_mapping;
         pgoff_t last_pgoff = start_pgoff;
         unsigned long max_idx;
+       XA_STATE(xas, &mapping->i_pages, start_pgoff);
         struct page *head, *page;
   
         rcu_read_lock();
-       radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, start_pgoff) {
-               if (iter.index > end_pgoff)
-                       break;
- repeat:
-               page = radix_tree_deref_slot(slot);
-               if (unlikely(!page))
-                       goto next;
-               if (radix_tree_exception(page)) {
-                       if (radix_tree_deref_retry(page)) {
-                               slot = radix_tree_iter_retry(&iter);
-                               continue;
-                       }
+       xas_for_each(&xas, page, end_pgoff) {
+               if (xas_retry(&xas, page))
+                       continue;
+               if (xa_is_value(page))
                         goto next;
-               }
   
                 head = compound_head(page);
                 if (!page_cache_get_speculative(head))
-                       goto repeat;
+                       goto next;
   
                 /* The page was split under us? */
-               if (compound_head(page) != head) {
-                       put_page(head);
-                       goto repeat;
-               }
+               if (compound_head(page) != head)
+                       goto skip;
   
                 /* Has the page moved? */
-               if (unlikely(page != *slot)) {
-                       put_page(head);
-                       goto repeat;
-               }
+               if (unlikely(page != xas_reload(&xas)))
+                       goto skip;
   
                 if (!PageUptodate(page) ||
                                 PageReadahead(page) ||
@@@ -2683,10 -2568,10 +2581,10 @@@
                 if (file->f_ra.mmap_miss > 0)
                         file->f_ra.mmap_miss--;
   
-               vmf->address += (iter.index - last_pgoff) << PAGE_SHIFT;
+               vmf->address += (xas.xa_index - last_pgoff) << PAGE_SHIFT;
                 if (vmf->pte)
-                       vmf->pte += iter.index - last_pgoff;
-               last_pgoff = iter.index;
+                       vmf->pte += xas.xa_index - last_pgoff;
+               last_pgoff = xas.xa_index;
                 if (alloc_set_pte(vmf, NULL, page))
                         goto unlock;
                 unlock_page(page);
@@@ -2699,8 -2584,6 +2597,6 @@@ next
                 /* Huge page is mapped? No need to proceed. */
                 if (pmd_trans_huge(*vmf->pmd))
                         break;
-               if (iter.index == end_pgoff)
-                       break;
         }
         rcu_read_unlock();
   }
@@@ -2761,9 -2644,9 +2657,9 @@@ int generic_file_readonly_mmap(struct f
         return generic_file_mmap(file, vma);
   }
   #else
- -int filemap_page_mkwrite(struct vm_fault *vmf)
+ +vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf)
   {
- -      return -ENOSYS;
+ +      return VM_FAULT_SIGBUS;
   }
   int generic_file_mmap(struct file * file, struct vm_area_struct * vma)
   {
@@@ -2810,7 -2693,7 +2706,7 @@@ repeat
                         put_page(page);
                         if (err == -EEXIST)
                                 goto repeat;
-                       /* Presumably ENOMEM for radix tree node */
+                       /* Presumably ENOMEM for xarray node */
                         return ERR_PTR(err);
                 }
   
@@@ -3025,7 -2908,7 +2921,7 @@@ generic_file_direct_write(struct kiocb 
         if (iocb->ki_flags & IOCB_NOWAIT) {
                 /* If there are pages to writeback, return */
                 if (filemap_range_has_page(inode->i_mapping, pos,
- -                                         pos + iov_iter_count(from)))
+ +                                         pos + write_len))
                         return -EAGAIN;
         } else {
                 written = filemap_write_and_wait_range(mapping, pos,
diff --combined mm/huge_memory.c

index 25ef59b7ee3482d797a21f9a69bb916bbfc67ebb,9eb79c384616ea72d42c39c8f04013caf350d2f2..4e4ef8fa479d53b7ee7c4c8fcb86985acb790c8a
--- 1/mm/huge_memory.c
--- 2/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@@ -852,10 -852,11 +852,10 @@@ static void touch_pmd(struct vm_area_st
   }
   
   struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
- -              pmd_t *pmd, int flags)
+ +              pmd_t *pmd, int flags, struct dev_pagemap **pgmap)
   {
         unsigned long pfn = pmd_pfn(*pmd);
         struct mm_struct *mm = vma->vm_mm;
- -      struct dev_pagemap *pgmap;
         struct page *page;
   
         assert_spin_locked(pmd_lockptr(mm, pmd));
@@@ -885,11 -886,12 +885,11 @@@
                 return ERR_PTR(-EEXIST);
   
         pfn += (addr & ~PMD_MASK) >> PAGE_SHIFT;
- -      pgmap = get_dev_pagemap(pfn, NULL);
- -      if (!pgmap)
+ +      *pgmap = get_dev_pagemap(pfn, *pgmap);
+ +      if (!*pgmap)
                 return ERR_PTR(-EFAULT);
         page = pfn_to_page(pfn);
         get_page(page);
- -      put_dev_pagemap(pgmap);
   
         return page;
   }
@@@ -998,10 -1000,11 +998,10 @@@ static void touch_pud(struct vm_area_st
   }
   
   struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr,
- -              pud_t *pud, int flags)
+ +              pud_t *pud, int flags, struct dev_pagemap **pgmap)
   {
         unsigned long pfn = pud_pfn(*pud);
         struct mm_struct *mm = vma->vm_mm;
- -      struct dev_pagemap *pgmap;
         struct page *page;
   
         assert_spin_locked(pud_lockptr(mm, pud));
@@@ -1025,11 -1028,12 +1025,11 @@@
                 return ERR_PTR(-EEXIST);
   
         pfn += (addr & ~PUD_MASK) >> PAGE_SHIFT;
- -      pgmap = get_dev_pagemap(pfn, NULL);
- -      if (!pgmap)
+ +      *pgmap = get_dev_pagemap(pfn, *pgmap);
+ +      if (!*pgmap)
                 return ERR_PTR(-EFAULT);
         page = pfn_to_page(pfn);
         get_page(page);
- -      put_dev_pagemap(pgmap);
   
         return page;
   }
@@@ -1558,20 -1562,8 +1558,20 @@@ vm_fault_t do_huge_pmd_numa_page(struc
          * We are not sure a pending tlb flush here is for a huge page
          * mapping or not. Hence use the tlb range variant
          */
- -      if (mm_tlb_flush_pending(vma->vm_mm))
+ +      if (mm_tlb_flush_pending(vma->vm_mm)) {
                 flush_tlb_range(vma, haddr, haddr + HPAGE_PMD_SIZE);
+ +              /*
+ +               * change_huge_pmd() released the pmd lock before
+ +               * invalidating the secondary MMUs sharing the primary
+ +               * MMU pagetables (with ->invalidate_range()). The
+ +               * mmu_notifier_invalidate_range_end() (which
+ +               * internally calls ->invalidate_range()) in
+ +               * change_pmd_range() will run after us, so we can't
+ +               * rely on it here and we need an explicit invalidate.
+ +               */
+ +              mmu_notifier_invalidate_range(vma->vm_mm, haddr,
+ +                                            haddr + HPAGE_PMD_SIZE);
+ +      }
   
         /*
          * Migrate the THP to the requested node, returns with page unlocked
@@@ -1788,7 -1780,7 +1788,7 @@@ static pmd_t move_soft_dirty_pmd(pmd_t 
   
   bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
                   unsigned long new_addr, unsigned long old_end,
- -                pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
+ +                pmd_t *old_pmd, pmd_t *new_pmd)
   {
         spinlock_t *old_ptl, *new_ptl;
         pmd_t pmd;
@@@ -1819,7 -1811,7 +1819,7 @@@
                 if (new_ptl != old_ptl)
                         spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
                 pmd = pmdp_huge_get_and_clear(mm, old_addr, old_pmd);
- -              if (pmd_present(pmd) && pmd_dirty(pmd))
+ +              if (pmd_present(pmd))
                         force_flush = true;
                 VM_BUG_ON(!pmd_none(*new_pmd));
   
@@@ -1830,10 -1822,12 +1830,10 @@@
                 }
                 pmd = move_soft_dirty_pmd(pmd);
                 set_pmd_at(mm, new_addr, new_pmd, pmd);
- -              if (new_ptl != old_ptl)
- -                      spin_unlock(new_ptl);
                 if (force_flush)
                         flush_tlb_range(vma, old_addr, old_addr + PMD_SIZE);
- -              else
- -                      *need_flush = true;
+ +              if (new_ptl != old_ptl)
+ +                      spin_unlock(new_ptl);
                 spin_unlock(old_ptl);
                 return true;
         }
@@@ -2377,7 -2371,6 +2377,7 @@@ static void __split_huge_page_tail(stru
                          (1L << PG_mlocked) |
                          (1L << PG_uptodate) |
                          (1L << PG_active) |
+ +                       (1L << PG_workingset) |
                          (1L << PG_locked) |
                          (1L << PG_unevictable) |
                          (1L << PG_dirty)));
@@@ -2450,13 -2443,13 +2450,13 @@@ static void __split_huge_page(struct pa
         ClearPageCompound(head);
         /* See comment in __split_huge_page_tail() */
         if (PageAnon(head)) {
-               /* Additional pin to radix tree of swap cache */
+               /* Additional pin to swap cache */
                 if (PageSwapCache(head))
                         page_ref_add(head, 2);
                 else
                         page_ref_inc(head);
         } else {
-               /* Additional pin to radix tree */
+               /* Additional pin to page cache */
                 page_ref_add(head, 2);
                 xa_unlock(&head->mapping->i_pages);
         }
@@@ -2568,7 -2561,7 +2568,7 @@@ bool can_split_huge_page(struct page *p
   {
         int extra_pins;
   
-       /* Additional pins from radix tree */
+       /* Additional pins from page cache */
         if (PageAnon(page))
                 extra_pins = PageSwapCache(page) ? HPAGE_PMD_NR : 0;
         else
@@@ -2664,17 -2657,14 +2664,14 @@@ int split_huge_page_to_list(struct pag
         spin_lock_irqsave(zone_lru_lock(page_zone(head)), flags);
   
         if (mapping) {
-               void **pslot;
+               XA_STATE(xas, &mapping->i_pages, page_index(head));
   
-               xa_lock(&mapping->i_pages);
-               pslot = radix_tree_lookup_slot(&mapping->i_pages,
-                               page_index(head));
                 /*
-                * Check if the head page is present in radix tree.
+                * Check if the head page is present in page cache.
                  * We assume all tail are present too, if head is there.
                  */
-               if (radix_tree_deref_slot_protected(pslot,
-                                       &mapping->i_pages.xa_lock) != head)
+               xa_lock(&mapping->i_pages);
+               if (xas_load(&xas) != head)
                         goto fail;
         }
   
@@@ -2892,6 -2882,9 +2889,6 @@@ void set_pmd_migration_entry(struct pag
         if (!(pvmw->pmd && !pvmw->pte))
                 return;
   
- -      mmu_notifier_invalidate_range_start(mm, address,
- -                      address + HPAGE_PMD_SIZE);
- -
         flush_cache_range(vma, address, address + HPAGE_PMD_SIZE);
         pmdval = *pvmw->pmd;
         pmdp_invalidate(vma, address, pvmw->pmd);
@@@ -2904,6 -2897,9 +2901,6 @@@
         set_pmd_at(mm, address, pvmw->pmd, pmdswp);
         page_remove_rmap(page, true);
         put_page(page);
- -
- -      mmu_notifier_invalidate_range_end(mm, address,
- -                      address + HPAGE_PMD_SIZE);
   }
   
   void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new)
@@@ -2932,7 -2928,7 +2929,7 @@@
         else
                 page_add_file_rmap(new, true);
         set_pmd_at(mm, mmun_start, pvmw->pmd, pmde);
- -      if (vma->vm_flags & VM_LOCKED)
+ +      if ((vma->vm_flags & VM_LOCKED) && !PageDoubleMap(new))
                 mlock_vma_page(new);
         update_mmu_cache_pmd(vma, address, pvmw->pmd);
   }
diff --combined mm/madvise.c

index 71d21df2a3f362cc370c8ff158fb03dfa9d2d03c,9d802566c494949d4dba498a01dd0bf5f25abcfb..6cb1ca93e290743144567bb64ae3c87894aa1f5d
--- 1/mm/madvise.c
--- 2/mm/madvise.c
+++ b/mm/madvise.c
@@@ -96,7 -96,7 +96,7 @@@ static long madvise_behavior(struct vm_
                 new_flags |= VM_DONTDUMP;
                 break;
         case MADV_DODUMP:
- -              if (new_flags & VM_SPECIAL) {
+ +              if (!is_vm_hugetlb_page(vma) && new_flags & VM_SPECIAL) {
                         error = -EINVAL;
                         goto out;
                 }
@@@ -251,7 -251,7 +251,7 @@@ static void force_shm_swapin_readahead(
                 index = ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
   
                 page = find_get_entry(mapping, index);
-               if (!radix_tree_exceptional_entry(page)) {
+               if (!xa_is_value(page)) {
                         if (page)
                                 put_page(page);
                         continue;
diff --combined mm/memcontrol.c

index 10a9b554d69f3e9c2c99a36e792565b881efabc9,29d9d1a69b367ae71585a9032897e6151752fbc4..54920cbc46bfdcb87b0a4ae3e6b4538596b6bd58
--- 1/mm/memcontrol.c
--- 2/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@@ -1669,8 -1669,6 +1669,8 @@@ static enum oom_status mem_cgroup_oom(s
         if (order > PAGE_ALLOC_COSTLY_ORDER)
                 return OOM_SKIPPED;
   
+ +      memcg_memory_event(memcg, MEMCG_OOM);
+ +
         /*
          * We are in the middle of the charge context here, so we
          * don't want to block when potentially sitting on a callstack
@@@ -2252,6 -2250,8 +2252,6 @@@ retry
         if (fatal_signal_pending(current))
                 goto force;
   
- -      memcg_memory_event(mem_over_limit, MEMCG_OOM);
- -
         /*
          * keep retrying as long as the memcg oom killer is able to make
          * a forward progress or bypass the charge if the oom killer
@@@ -2460,7 -2460,7 +2460,7 @@@ static void memcg_kmem_cache_create_fun
   /*
    * Enqueue the creation of a per-memcg kmem_cache.
    */
- -static void __memcg_schedule_kmem_cache_create(struct mem_cgroup *memcg,
+ +static void memcg_schedule_kmem_cache_create(struct mem_cgroup *memcg,
                                                struct kmem_cache *cachep)
   {
         struct memcg_kmem_cache_create_work *cw;
@@@ -2478,6 -2478,25 +2478,6 @@@
         queue_work(memcg_kmem_cache_wq, &cw->work);
   }
   
- -static void memcg_schedule_kmem_cache_create(struct mem_cgroup *memcg,
- -                                           struct kmem_cache *cachep)
- -{
- -      /*
- -       * We need to stop accounting when we kmalloc, because if the
- -       * corresponding kmalloc cache is not yet created, the first allocation
- -       * in __memcg_schedule_kmem_cache_create will recurse.
- -       *
- -       * However, it is better to enclose the whole function. Depending on
- -       * the debugging options enabled, INIT_WORK(), for instance, can
- -       * trigger an allocation. This too, will make us recurse. Because at
- -       * this point we can't allow ourselves back into memcg_kmem_get_cache,
- -       * the safest choice is to do it like this, wrapping the whole function.
- -       */
- -      current->memcg_kmem_skip_account = 1;
- -      __memcg_schedule_kmem_cache_create(memcg, cachep);
- -      current->memcg_kmem_skip_account = 0;
- -}
- -
   static inline bool memcg_kmem_bypass(void)
   {
         if (in_interrupt() || !current->mm || (current->flags & PF_KTHREAD))
@@@ -2512,6 -2531,9 +2512,6 @@@ struct kmem_cache *memcg_kmem_get_cache
         if (memcg_kmem_bypass())
                 return cachep;
   
- -      if (current->memcg_kmem_skip_account)
- -              return cachep;
- -
         memcg = get_mem_cgroup_from_current();
         kmemcg_id = READ_ONCE(memcg->kmemcg_id);
         if (kmemcg_id < 0)
@@@ -4299,12 -4321,14 +4299,12 @@@ static void mem_cgroup_id_remove(struc
   
   static void mem_cgroup_id_get_many(struct mem_cgroup *memcg, unsigned int n)
   {
- -      VM_BUG_ON(atomic_read(&memcg->id.ref) <= 0);
- -      atomic_add(n, &memcg->id.ref);
+ +      refcount_add(n, &memcg->id.ref);
   }
   
   static void mem_cgroup_id_put_many(struct mem_cgroup *memcg, unsigned int n)
   {
- -      VM_BUG_ON(atomic_read(&memcg->id.ref) < n);
- -      if (atomic_sub_and_test(n, &memcg->id.ref)) {
+ +      if (refcount_sub_and_test(n, &memcg->id.ref)) {
                 mem_cgroup_id_remove(memcg);
   
                 /* Memcg ID pins CSS */
@@@ -4521,7 -4545,7 +4521,7 @@@ static int mem_cgroup_css_online(struc
         }
   
         /* Online state pins memcg ID, memcg ID pins CSS */
- -      atomic_set(&memcg->id.ref, 1);
+ +      refcount_set(&memcg->id.ref, 1);
         css_get(css);
         return 0;
   }
@@@ -4549,8 -4573,6 +4549,8 @@@ static void mem_cgroup_css_offline(stru
         memcg_offline_kmem(memcg);
         wb_memcg_offline(memcg);
   
+ +      drain_all_stock(memcg);
+ +
         mem_cgroup_id_put(memcg);
   }
   
@@@ -4728,7 -4750,7 +4728,7 @@@ static struct page *mc_handle_file_pte(
         /* shmem/tmpfs may report page out on swap: account for that too. */
         if (shmem_mapping(mapping)) {
                 page = find_get_entry(mapping, pgoff);
-               if (radix_tree_exceptional_entry(page)) {
+               if (xa_is_value(page)) {
                         swp_entry_t swp = radix_to_swp_entry(page);
                         if (do_memsw_account())
                                 *entry = swp;
@@@ -5573,13 -5595,6 +5573,13 @@@ static int memory_stat_show(struct seq_
         seq_printf(m, "pgfault %lu\n", acc.events[PGFAULT]);
         seq_printf(m, "pgmajfault %lu\n", acc.events[PGMAJFAULT]);
   
+ +      seq_printf(m, "workingset_refault %lu\n",
+ +                 acc.stat[WORKINGSET_REFAULT]);
+ +      seq_printf(m, "workingset_activate %lu\n",
+ +                 acc.stat[WORKINGSET_ACTIVATE]);
+ +      seq_printf(m, "workingset_nodereclaim %lu\n",
+ +                 acc.stat[WORKINGSET_NODERECLAIM]);
+ +
         seq_printf(m, "pgrefill %lu\n", acc.events[PGREFILL]);
         seq_printf(m, "pgscan %lu\n", acc.events[PGSCAN_KSWAPD] +
                    acc.events[PGSCAN_DIRECT]);
@@@ -5590,6 -5605,13 +5590,6 @@@
         seq_printf(m, "pglazyfree %lu\n", acc.events[PGLAZYFREE]);
         seq_printf(m, "pglazyfreed %lu\n", acc.events[PGLAZYFREED]);
   
- -      seq_printf(m, "workingset_refault %lu\n",
- -                 acc.stat[WORKINGSET_REFAULT]);
- -      seq_printf(m, "workingset_activate %lu\n",
- -                 acc.stat[WORKINGSET_ACTIVATE]);
- -      seq_printf(m, "workingset_nodereclaim %lu\n",
- -                 acc.stat[WORKINGSET_NODERECLAIM]);
- -
         return 0;
   }
   
@@@ -6355,7 -6377,7 +6355,7 @@@ subsys_initcall(mem_cgroup_init)
   #ifdef CONFIG_MEMCG_SWAP
   static struct mem_cgroup *mem_cgroup_id_get_online(struct mem_cgroup *memcg)
   {
- -      while (!atomic_inc_not_zero(&memcg->id.ref)) {
+ +      while (!refcount_inc_not_zero(&memcg->id.ref)) {
                 /*
                  * The root cgroup cannot be destroyed, so it's refcount must
                  * always be >= 1.
diff --combined mm/migrate.c

index b6700f2962f32d77663655901edf403e688395d9,b3cde3fd094a711f4b70e9961b570b32bc82dc11..f7e4bfdc13b780137d08fa522b070e7192056f24
--- 1/mm/migrate.c
--- 2/mm/migrate.c
+++ b/mm/migrate.c
@@@ -275,9 -275,6 +275,9 @@@ static bool remove_migration_pte(struc
                 if (vma->vm_flags & VM_LOCKED && !PageTransCompound(new))
                         mlock_vma_page(new);
   
+ +              if (PageTransHuge(page) && PageMlocked(page))
+ +                      clear_page_mlock(page);
+ +
                 /* No need to invalidate - it was non-present before */
                 update_mmu_cache(vma, pvmw.address, pvmw.pte);
         }
@@@ -326,7 -323,7 +326,7 @@@ void __migration_entry_wait(struct mm_s
         page = migration_entry_to_page(entry);
   
         /*
-        * Once radix-tree replacement of page migration started, page_count
+        * Once page cache replacement of page migration started, page_count
          * *must* be zero. And, we don't want to call wait_on_page_locked()
          * against a page without get_page().
          * So, we use get_page_unless_zero(), here. Even failed, page fault
@@@ -441,10 -438,10 +441,10 @@@ int migrate_page_move_mapping(struct ad
                 struct buffer_head *head, enum migrate_mode mode,
                 int extra_count)
   {
+       XA_STATE(xas, &mapping->i_pages, page_index(page));
         struct zone *oldzone, *newzone;
         int dirty;
         int expected_count = 1 + extra_count;
-       void **pslot;
   
         /*
          * Device public or private pages have an extra refcount as they are
@@@ -470,21 -467,16 +470,16 @@@
         oldzone = page_zone(page);
         newzone = page_zone(newpage);
   
-       xa_lock_irq(&mapping->i_pages);
- 
-       pslot = radix_tree_lookup_slot(&mapping->i_pages,
-                                       page_index(page));
+       xas_lock_irq(&xas);
   
         expected_count += hpage_nr_pages(page) + page_has_private(page);
-       if (page_count(page) != expected_count ||
-               radix_tree_deref_slot_protected(pslot,
-                                       &mapping->i_pages.xa_lock) != page) {
-               xa_unlock_irq(&mapping->i_pages);
+       if (page_count(page) != expected_count || xas_load(&xas) != page) {
+               xas_unlock_irq(&xas);
                 return -EAGAIN;
         }
   
         if (!page_ref_freeze(page, expected_count)) {
-               xa_unlock_irq(&mapping->i_pages);
+               xas_unlock_irq(&xas);
                 return -EAGAIN;
         }
   
@@@ -498,7 -490,7 +493,7 @@@
         if (mode == MIGRATE_ASYNC && head &&
                         !buffer_migrate_lock_buffers(head, mode)) {
                 page_ref_unfreeze(page, expected_count);
-               xa_unlock_irq(&mapping->i_pages);
+               xas_unlock_irq(&xas);
                 return -EAGAIN;
         }
   
@@@ -526,16 -518,13 +521,13 @@@
                 SetPageDirty(newpage);
         }
   
-       radix_tree_replace_slot(&mapping->i_pages, pslot, newpage);
+       xas_store(&xas, newpage);
         if (PageTransHuge(page)) {
                 int i;
-               int index = page_index(page);
   
                 for (i = 1; i < HPAGE_PMD_NR; i++) {
-                       pslot = radix_tree_lookup_slot(&mapping->i_pages,
-                                                      index + i);
-                       radix_tree_replace_slot(&mapping->i_pages, pslot,
-                                               newpage + i);
+                       xas_next(&xas);
+                       xas_store(&xas, newpage + i);
                 }
         }
   
@@@ -546,7 -535,7 +538,7 @@@
          */
         page_ref_unfreeze(page, expected_count - hpage_nr_pages(page));
   
-       xa_unlock(&mapping->i_pages);
+       xas_unlock(&xas);
         /* Leave irq disabled to prevent preemption while updating stats */
   
         /*
@@@ -586,22 -575,18 +578,18 @@@ EXPORT_SYMBOL(migrate_page_move_mapping
   int migrate_huge_page_move_mapping(struct address_space *mapping,
                                    struct page *newpage, struct page *page)
   {
+       XA_STATE(xas, &mapping->i_pages, page_index(page));
         int expected_count;
-       void **pslot;
- 
-       xa_lock_irq(&mapping->i_pages);
- 
-       pslot = radix_tree_lookup_slot(&mapping->i_pages, page_index(page));
   
+       xas_lock_irq(&xas);
         expected_count = 2 + page_has_private(page);
-       if (page_count(page) != expected_count ||
-               radix_tree_deref_slot_protected(pslot, &mapping->i_pages.xa_lock) != page) {
-               xa_unlock_irq(&mapping->i_pages);
+       if (page_count(page) != expected_count || xas_load(&xas) != page) {
+               xas_unlock_irq(&xas);
                 return -EAGAIN;
         }
   
         if (!page_ref_freeze(page, expected_count)) {
-               xa_unlock_irq(&mapping->i_pages);
+               xas_unlock_irq(&xas);
                 return -EAGAIN;
         }
   
@@@ -610,11 -595,11 +598,11 @@@
   
         get_page(newpage);
   
-       radix_tree_replace_slot(&mapping->i_pages, pslot, newpage);
+       xas_store(&xas, newpage);
   
         page_ref_unfreeze(page, expected_count - 1);
   
-       xa_unlock_irq(&mapping->i_pages);
+       xas_unlock_irq(&xas);
   
         return MIGRATEPAGE_SUCCESS;
   }
@@@ -685,8 -670,6 +673,8 @@@ void migrate_page_states(struct page *n
                 SetPageActive(newpage);
         } else if (TestClearPageUnevictable(page))
                 SetPageUnevictable(newpage);
+ +      if (PageWorkingset(page))
+ +              SetPageWorkingset(newpage);
         if (PageChecked(page))
                 SetPageChecked(newpage);
         if (PageMappedToDisk(page))
@@@ -1416,7 -1399,7 +1404,7 @@@ retry
                                  * we encounter them after the rest of the list
                                  * is processed.
                                  */
- -                              if (PageTransHuge(page)) {
+ +                              if (PageTransHuge(page) && !PageHuge(page)) {
                                         lock_page(page);
                                         rc = split_huge_page_to_list(page, from);
                                         unlock_page(page);
@@@ -1860,6 -1843,46 +1848,6 @@@ static struct page *alloc_misplaced_dst
         return newpage;
   }
   
- -/*
- - * page migration rate limiting control.
- - * Do not migrate more than @pages_to_migrate in a @migrate_interval_millisecs
- - * window of time. Default here says do not migrate more than 1280M per second.
- - */
- -static unsigned int migrate_interval_millisecs __read_mostly = 100;
- -static unsigned int ratelimit_pages __read_mostly = 128 << (20 - PAGE_SHIFT);
- -
- -/* Returns true if the node is migrate rate-limited after the update */
- -static bool numamigrate_update_ratelimit(pg_data_t *pgdat,
- -                                      unsigned long nr_pages)
- -{
- -      /*
- -       * Rate-limit the amount of data that is being migrated to a node.
- -       * Optimal placement is no good if the memory bus is saturated and
- -       * all the time is being spent migrating!
- -       */
- -      if (time_after(jiffies, pgdat->numabalancing_migrate_next_window)) {
- -              spin_lock(&pgdat->numabalancing_migrate_lock);
- -              pgdat->numabalancing_migrate_nr_pages = 0;
- -              pgdat->numabalancing_migrate_next_window = jiffies +
- -                      msecs_to_jiffies(migrate_interval_millisecs);
- -              spin_unlock(&pgdat->numabalancing_migrate_lock);
- -      }
- -      if (pgdat->numabalancing_migrate_nr_pages > ratelimit_pages) {
- -              trace_mm_numa_migrate_ratelimit(current, pgdat->node_id,
- -                                                              nr_pages);
- -              return true;
- -      }
- -
- -      /*
- -       * This is an unlocked non-atomic update so errors are possible.
- -       * The consequences are failing to migrate when we potentiall should
- -       * have which is not severe enough to warrant locking. If it is ever
- -       * a problem, it can be converted to a per-cpu counter.
- -       */
- -      pgdat->numabalancing_migrate_nr_pages += nr_pages;
- -      return false;
- -}
- -
   static int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page)
   {
         int page_lru;
@@@ -1932,6 -1955,14 +1920,6 @@@ int migrate_misplaced_page(struct page 
         if (page_is_file_cache(page) && PageDirty(page))
                 goto out;
   
- -      /*
- -       * Rate-limit the amount of data that is being migrated to a node.
- -       * Optimal placement is no good if the memory bus is saturated and
- -       * all the time is being spent migrating!
- -       */
- -      if (numamigrate_update_ratelimit(pgdat, 1))
- -              goto out;
- -
         isolated = numamigrate_isolate_page(pgdat, page);
         if (!isolated)
                 goto out;
@@@ -1975,7 -2006,16 +1963,7 @@@ int migrate_misplaced_transhuge_page(st
         int isolated = 0;
         struct page *new_page = NULL;
         int page_lru = page_is_file_cache(page);
- -      unsigned long mmun_start = address & HPAGE_PMD_MASK;
- -      unsigned long mmun_end = mmun_start + HPAGE_PMD_SIZE;
- -
- -      /*
- -       * Rate-limit the amount of data that is being migrated to a node.
- -       * Optimal placement is no good if the memory bus is saturated and
- -       * all the time is being spent migrating!
- -       */
- -      if (numamigrate_update_ratelimit(pgdat, HPAGE_PMD_NR))
- -              goto out_dropref;
+ +      unsigned long start = address & HPAGE_PMD_MASK;
   
         new_page = alloc_pages_node(node,
                 (GFP_TRANSHUGE_LIGHT | __GFP_THISNODE),
@@@ -1998,15 -2038,15 +1986,15 @@@
         /* anon mapping, we can simply copy page->mapping to the new page: */
         new_page->mapping = page->mapping;
         new_page->index = page->index;
+ +      /* flush the cache before copying using the kernel virtual address */
+ +      flush_cache_range(vma, start, start + HPAGE_PMD_SIZE);
         migrate_page_copy(new_page, page);
         WARN_ON(PageLRU(new_page));
   
         /* Recheck the target PMD */
- -      mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
         ptl = pmd_lock(mm, pmd);
         if (unlikely(!pmd_same(*pmd, entry) || !page_ref_freeze(page, 2))) {
                 spin_unlock(ptl);
- -              mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
   
                 /* Reverse changes made by migrate_page_copy() */
                 if (TestClearPageActive(new_page))
@@@ -2030,26 -2070,16 +2018,26 @@@
         entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
   
         /*
- -       * Clear the old entry under pagetable lock and establish the new PTE.
- -       * Any parallel GUP will either observe the old page blocking on the
- -       * page lock, block on the page table lock or observe the new page.
- -       * The SetPageUptodate on the new page and page_add_new_anon_rmap
- -       * guarantee the copy is visible before the pagetable update.
+ +       * Overwrite the old entry under pagetable lock and establish
+ +       * the new PTE. Any parallel GUP will either observe the old
+ +       * page blocking on the page lock, block on the page table
+ +       * lock or observe the new page. The SetPageUptodate on the
+ +       * new page and page_add_new_anon_rmap guarantee the copy is
+ +       * visible before the pagetable update.
+ +       */
+ +      page_add_anon_rmap(new_page, vma, start, true);
+ +      /*
+ +       * At this point the pmd is numa/protnone (i.e. non present) and the TLB
+ +       * has already been flushed globally.  So no TLB can be currently
+ +       * caching this non present pmd mapping.  There's no need to clear the
+ +       * pmd before doing set_pmd_at(), nor to flush the TLB after
+ +       * set_pmd_at().  Clearing the pmd here would introduce a race
+ +       * condition against MADV_DONTNEED, because MADV_DONTNEED only holds the
+ +       * mmap_sem for reading.  If the pmd is set to NULL at any given time,
+ +       * MADV_DONTNEED won't wait on the pmd lock and it'll skip clearing this
+ +       * pmd.
          */
- -      flush_cache_range(vma, mmun_start, mmun_end);
- -      page_add_anon_rmap(new_page, vma, mmun_start, true);
- -      pmdp_huge_clear_flush_notify(vma, mmun_start, pmd);
- -      set_pmd_at(mm, mmun_start, pmd, entry);
+ +      set_pmd_at(mm, start, pmd, entry);
         update_mmu_cache_pmd(vma, address, &entry);
   
         page_ref_unfreeze(page, 2);
@@@ -2058,6 -2088,11 +2046,6 @@@
         set_page_owner_migrate_reason(new_page, MR_NUMA_MISPLACED);
   
         spin_unlock(ptl);
- -      /*
- -       * No need to double call mmu_notifier->invalidate_range() callback as
- -       * the above pmdp_huge_clear_flush_notify() did already call it.
- -       */
- -      mmu_notifier_invalidate_range_only_end(mm, mmun_start, mmun_end);
   
         /* Take an "isolate" reference and put new page on the LRU. */
         get_page(new_page);
@@@ -2078,10 -2113,11 +2066,10 @@@
   
   out_fail:
         count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR);
- -out_dropref:
         ptl = pmd_lock(mm, pmd);
         if (pmd_same(*pmd, entry)) {
                 entry = pmd_modify(entry, vma->vm_page_prot);
- -              set_pmd_at(mm, mmun_start, pmd, entry);
+ +              set_pmd_at(mm, start, pmd, entry);
                 update_mmu_cache_pmd(vma, address, &entry);
         }
         spin_unlock(ptl);
diff --combined mm/page-writeback.c

index 439a304a6c921742710e1758b6118fea9f89a52f,fc6e5743b0bf2ddb83a27efecc4038fae3dc3e38..3f690bae6b780304ad31ea04e999a3f377c35991
--- 1/mm/page-writeback.c
--- 2/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@@ -2097,34 -2097,25 +2097,25 @@@ void __init page_writeback_init(void
    * dirty pages in the file (thus it is important for this function to be quick
    * so that it can tag pages faster than a dirtying process can create them).
    */
- /*
-  * We tag pages in batches of WRITEBACK_TAG_BATCH to reduce the i_pages lock
-  * latency.
-  */
   void tag_pages_for_writeback(struct address_space *mapping,
                              pgoff_t start, pgoff_t end)
   {
- #define WRITEBACK_TAG_BATCH 4096
-       unsigned long tagged = 0;
-       struct radix_tree_iter iter;
-       void **slot;
+       XA_STATE(xas, &mapping->i_pages, start);
+       unsigned int tagged = 0;
+       void *page;
   
-       xa_lock_irq(&mapping->i_pages);
-       radix_tree_for_each_tagged(slot, &mapping->i_pages, &iter, start,
-                                                       PAGECACHE_TAG_DIRTY) {
-               if (iter.index > end)
-                       break;
-               radix_tree_iter_tag_set(&mapping->i_pages, &iter,
-                                                       PAGECACHE_TAG_TOWRITE);
-               tagged++;
-               if ((tagged % WRITEBACK_TAG_BATCH) != 0)
+       xas_lock_irq(&xas);
+       xas_for_each_marked(&xas, page, end, PAGECACHE_TAG_DIRTY) {
+               xas_set_mark(&xas, PAGECACHE_TAG_TOWRITE);
+               if (++tagged % XA_CHECK_SCHED)
                         continue;
-               slot = radix_tree_iter_resume(slot, &iter);
-               xa_unlock_irq(&mapping->i_pages);
+ 
+               xas_pause(&xas);
+               xas_unlock_irq(&xas);
                 cond_resched();
-               xa_lock_irq(&mapping->i_pages);
+               xas_lock_irq(&xas);
         }
-       xa_unlock_irq(&mapping->i_pages);
+       xas_unlock_irq(&xas);
   }
   EXPORT_SYMBOL(tag_pages_for_writeback);
   
@@@ -2149,13 -2140,6 +2140,13 @@@
    * not miss some pages (e.g., because some other process has cleared TOWRITE
    * tag we set). The rule we follow is that TOWRITE tag can be cleared only
    * by the process clearing the DIRTY tag (and submitting the page for IO).
+ + *
+ + * To avoid deadlocks between range_cyclic writeback and callers that hold
+ + * pages in PageWriteback to aggregate IO until write_cache_pages() returns,
+ + * we do not loop back to the start of the file. Doing so causes a page
+ + * lock/page writeback access order inversion - we should only ever lock
+ + * multiple pages in ascending page->index order, and looping back to the start
+ + * of the file violates that rule and causes deadlocks.
    */
   int write_cache_pages(struct address_space *mapping,
                       struct writeback_control *wbc, writepage_t writepage,
@@@ -2169,24 -2153,31 +2160,24 @@@
         pgoff_t index;
         pgoff_t end;            /* Inclusive */
         pgoff_t done_index;
- -      int cycled;
         int range_whole = 0;
-       int tag;
+       xa_mark_t tag;
   
         pagevec_init(&pvec);
         if (wbc->range_cyclic) {
                 writeback_index = mapping->writeback_index; /* prev offset */
                 index = writeback_index;
- -              if (index == 0)
- -                      cycled = 1;
- -              else
- -                      cycled = 0;
                 end = -1;
         } else {
                 index = wbc->range_start >> PAGE_SHIFT;
                 end = wbc->range_end >> PAGE_SHIFT;
                 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
                         range_whole = 1;
- -              cycled = 1; /* ignore range_cyclic tests */
         }
         if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
                 tag = PAGECACHE_TAG_TOWRITE;
         else
                 tag = PAGECACHE_TAG_DIRTY;
- -retry:
         if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
                 tag_pages_for_writeback(mapping, index, end);
         done_index = index;
@@@ -2272,14 -2263,17 +2263,14 @@@ continue_unlock
                 pagevec_release(&pvec);
                 cond_resched();
         }
- -      if (!cycled && !done) {
- -              /*
- -               * range_cyclic:
- -               * We hit the last page and there is more work to be done: wrap
- -               * back to the start of the file
- -               */
- -              cycled = 1;
- -              index = 0;
- -              end = writeback_index - 1;
- -              goto retry;
- -      }
+ +
+ +      /*
+ +       * If we hit the last page and there is more work to be done: wrap
+ +       * back the index back to the start of the file for the next
+ +       * time we are called.
+ +       */
+ +      if (wbc->range_cyclic && !done)
+ +              done_index = 0;
         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
                 mapping->writeback_index = done_index;
   
@@@ -2442,7 -2436,7 +2433,7 @@@ void account_page_cleaned(struct page *
   
   /*
    * For address_spaces which do not use buffers.  Just tag the page as dirty in
-  * its radix tree.
+  * the xarray.
    *
    * This is also used when a single buffer is being dirtied: we want to set the
    * page dirty in that case, but not all the buffers.  This is a "bottom-up"
@@@ -2468,7 -2462,7 +2459,7 @@@ int __set_page_dirty_nobuffers(struct p
                 BUG_ON(page_mapping(page) != mapping);
                 WARN_ON_ONCE(!PagePrivate(page) && !PageUptodate(page));
                 account_page_dirtied(page, mapping);
-               radix_tree_tag_set(&mapping->i_pages, page_index(page),
+               __xa_set_mark(&mapping->i_pages, page_index(page),
                                    PAGECACHE_TAG_DIRTY);
                 xa_unlock_irqrestore(&mapping->i_pages, flags);
                 unlock_page_memcg(page);
@@@ -2631,13 -2625,13 +2622,13 @@@ EXPORT_SYMBOL(__cancel_dirty_page)
    * Returns true if the page was previously dirty.
    *
    * This is for preparing to put the page under writeout.  We leave the page
-  * tagged as dirty in the radix tree so that a concurrent write-for-sync
+  * tagged as dirty in the xarray so that a concurrent write-for-sync
    * can discover it via a PAGECACHE_TAG_DIRTY walk.  The ->writepage
    * implementation will run either set_page_writeback() or set_page_dirty(),
-  * at which stage we bring the page's dirty flag and radix-tree dirty tag
+  * at which stage we bring the page's dirty flag and xarray dirty tag
    * back into sync.
    *
-  * This incoherency between the page's dirty flag and radix-tree tag is
+  * This incoherency between the page's dirty flag and xarray tag is
    * unfortunate, but it only exists while the page is locked.
    */
   int clear_page_dirty_for_io(struct page *page)
@@@ -2718,7 -2712,7 +2709,7 @@@ int test_clear_page_writeback(struct pa
                 xa_lock_irqsave(&mapping->i_pages, flags);
                 ret = TestClearPageWriteback(page);
                 if (ret) {
-                       radix_tree_tag_clear(&mapping->i_pages, page_index(page),
+                       __xa_clear_mark(&mapping->i_pages, page_index(page),
                                                 PAGECACHE_TAG_WRITEBACK);
                         if (bdi_cap_account_writeback(bdi)) {
                                 struct bdi_writeback *wb = inode_to_wb(inode);
@@@ -2758,11 -2752,13 +2749,13 @@@ int __test_set_page_writeback(struct pa
   
         lock_page_memcg(page);
         if (mapping && mapping_use_writeback_tags(mapping)) {
+               XA_STATE(xas, &mapping->i_pages, page_index(page));
                 struct inode *inode = mapping->host;
                 struct backing_dev_info *bdi = inode_to_bdi(inode);
                 unsigned long flags;
   
-               xa_lock_irqsave(&mapping->i_pages, flags);
+               xas_lock_irqsave(&xas, flags);
+               xas_load(&xas);
                 ret = TestSetPageWriteback(page);
                 if (!ret) {
                         bool on_wblist;
@@@ -2770,8 -2766,7 +2763,7 @@@
                         on_wblist = mapping_tagged(mapping,
                                                    PAGECACHE_TAG_WRITEBACK);
   
-                       radix_tree_tag_set(&mapping->i_pages, page_index(page),
-                                               PAGECACHE_TAG_WRITEBACK);
+                       xas_set_mark(&xas, PAGECACHE_TAG_WRITEBACK);
                         if (bdi_cap_account_writeback(bdi))
                                 inc_wb_stat(inode_to_wb(inode), WB_WRITEBACK);
   
@@@ -2784,12 -2779,10 +2776,10 @@@
                                 sb_mark_inode_writeback(mapping->host);
                 }
                 if (!PageDirty(page))
-                       radix_tree_tag_clear(&mapping->i_pages, page_index(page),
-                                               PAGECACHE_TAG_DIRTY);
+                       xas_clear_mark(&xas, PAGECACHE_TAG_DIRTY);
                 if (!keep_write)
-                       radix_tree_tag_clear(&mapping->i_pages, page_index(page),
-                                               PAGECACHE_TAG_TOWRITE);
-               xa_unlock_irqrestore(&mapping->i_pages, flags);
+                       xas_clear_mark(&xas, PAGECACHE_TAG_TOWRITE);
+               xas_unlock_irqrestore(&xas, flags);
         } else {
                 ret = TestSetPageWriteback(page);
         }
@@@ -2803,16 -2796,6 +2793,6 @@@
   }
   EXPORT_SYMBOL(__test_set_page_writeback);
   
- /*
-  * Return true if any of the pages in the mapping are marked with the
-  * passed tag.
-  */
- int mapping_tagged(struct address_space *mapping, int tag)
- {
-       return radix_tree_tagged(&mapping->i_pages, tag);
- }
- EXPORT_SYMBOL(mapping_tagged);
- 
   /**
    * wait_for_stable_page() - wait for writeback to finish, if necessary.
    * @page:     The page to wait on.
diff --combined mm/swap.c

index 87a54c8dee3408ff9e07c1b9be3cae707ed56ba2,6861f3140a13a966267bb71477286b0c6ad41d42..aa483719922e732a748620e58ef6c47194024c1b
--- 1/mm/swap.c
--- 2/mm/swap.c
+++ b/mm/swap.c
@@@ -29,6 -29,7 +29,6 @@@
   #include <linux/cpu.h>
   #include <linux/notifier.h>
   #include <linux/backing-dev.h>
- -#include <linux/memremap.h>
   #include <linux/memcontrol.h>
   #include <linux/gfp.h>
   #include <linux/uio.h>
@@@ -964,7 -965,7 +964,7 @@@ void pagevec_remove_exceptionals(struc
   
         for (i = 0, j = 0; i < pagevec_count(pvec); i++) {
                 struct page *page = pvec->pages[i];
-               if (!radix_tree_exceptional_entry(page))
+               if (!xa_is_value(page))
                         pvec->pages[j++] = page;
         }
         pvec->nr = j;
@@@ -1001,7 -1002,7 +1001,7 @@@ EXPORT_SYMBOL(pagevec_lookup_range)
   
   unsigned pagevec_lookup_range_tag(struct pagevec *pvec,
                 struct address_space *mapping, pgoff_t *index, pgoff_t end,
-               int tag)
+               xa_mark_t tag)
   {
         pvec->nr = find_get_pages_range_tag(mapping, index, end, tag,
                                         PAGEVEC_SIZE, pvec->pages);
@@@ -1011,7 -1012,7 +1011,7 @@@ EXPORT_SYMBOL(pagevec_lookup_range_tag)
   
   unsigned pagevec_lookup_range_nr_tag(struct pagevec *pvec,
                 struct address_space *mapping, pgoff_t *index, pgoff_t end,
-               int tag, unsigned max_pages)
+               xa_mark_t tag, unsigned max_pages)
   {
         pvec->nr = find_get_pages_range_tag(mapping, index, end, tag,
                 min_t(unsigned int, max_pages, PAGEVEC_SIZE), pvec->pages);
diff --combined mm/swap_state.c

index 0d6a7f268d2e6c1abe6fb550a9e14206b26188ff,31c45a25b2d3fde18bed79d5f64083d82e3b975b..fd2f21e1c60ae051fb85a0adff3ef6e891e6a8bb
--- 1/mm/swap_state.c
--- 2/mm/swap_state.c
+++ b/mm/swap_state.c
@@@ -107,14 -107,15 +107,15 @@@ void show_swap_cache_info(void
   }
   
   /*
-  * __add_to_swap_cache resembles add_to_page_cache_locked on swapper_space,
+  * add_to_swap_cache resembles add_to_page_cache_locked on swapper_space,
    * but sets SwapCache flag and private instead of mapping and index.
    */
- int __add_to_swap_cache(struct page *page, swp_entry_t entry)
+ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp)
   {
-       int error, i, nr = hpage_nr_pages(page);
-       struct address_space *address_space;
+       struct address_space *address_space = swap_address_space(entry);
         pgoff_t idx = swp_offset(entry);
+       XA_STATE_ORDER(xas, &address_space->i_pages, idx, compound_order(page));
+       unsigned long i, nr = 1UL << compound_order(page);
   
         VM_BUG_ON_PAGE(!PageLocked(page), page);
         VM_BUG_ON_PAGE(PageSwapCache(page), page);
@@@ -123,73 -124,52 +124,52 @@@
         page_ref_add(page, nr);
         SetPageSwapCache(page);
   
-       address_space = swap_address_space(entry);
-       xa_lock_irq(&address_space->i_pages);
-       for (i = 0; i < nr; i++) {
-               set_page_private(page + i, entry.val + i);
-               error = radix_tree_insert(&address_space->i_pages,
-                                         idx + i, page + i);
-               if (unlikely(error))
-                       break;
-       }
-       if (likely(!error)) {
+       do {
+               xas_lock_irq(&xas);
+               xas_create_range(&xas);
+               if (xas_error(&xas))
+                       goto unlock;
+               for (i = 0; i < nr; i++) {
+                       VM_BUG_ON_PAGE(xas.xa_index != idx + i, page);
+                       set_page_private(page + i, entry.val + i);
+                       xas_store(&xas, page + i);
+                       xas_next(&xas);
+               }
                 address_space->nrpages += nr;
                 __mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, nr);
                 ADD_CACHE_INFO(add_total, nr);
-       } else {
-               /*
-                * Only the context which have set SWAP_HAS_CACHE flag
-                * would call add_to_swap_cache().
-                * So add_to_swap_cache() doesn't returns -EEXIST.
-                */
-               VM_BUG_ON(error == -EEXIST);
-               set_page_private(page + i, 0UL);
-               while (i--) {
-                       radix_tree_delete(&address_space->i_pages, idx + i);
-                       set_page_private(page + i, 0UL);
-               }
-               ClearPageSwapCache(page);
-               page_ref_sub(page, nr);
-       }
-       xa_unlock_irq(&address_space->i_pages);
+ unlock:
+               xas_unlock_irq(&xas);
+       } while (xas_nomem(&xas, gfp));
   
-       return error;
- }
- 
- 
- int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask)
- {
-       int error;
+       if (!xas_error(&xas))
+               return 0;
   
-       error = radix_tree_maybe_preload_order(gfp_mask, compound_order(page));
-       if (!error) {
-               error = __add_to_swap_cache(page, entry);
-               radix_tree_preload_end();
-       }
-       return error;
+       ClearPageSwapCache(page);
+       page_ref_sub(page, nr);
+       return xas_error(&xas);
   }
   
   /*
    * This must be called only on pages that have
    * been verified to be in the swap cache.
    */
- void __delete_from_swap_cache(struct page *page)
+ void __delete_from_swap_cache(struct page *page, swp_entry_t entry)
   {
-       struct address_space *address_space;
+       struct address_space *address_space = swap_address_space(entry);
         int i, nr = hpage_nr_pages(page);
-       swp_entry_t entry;
-       pgoff_t idx;
+       pgoff_t idx = swp_offset(entry);
+       XA_STATE(xas, &address_space->i_pages, idx);
   
         VM_BUG_ON_PAGE(!PageLocked(page), page);
         VM_BUG_ON_PAGE(!PageSwapCache(page), page);
         VM_BUG_ON_PAGE(PageWriteback(page), page);
   
-       entry.val = page_private(page);
-       address_space = swap_address_space(entry);
-       idx = swp_offset(entry);
         for (i = 0; i < nr; i++) {
-               radix_tree_delete(&address_space->i_pages, idx + i);
+               void *entry = xas_store(&xas, NULL);
+               VM_BUG_ON_PAGE(entry != page + i, entry);
                 set_page_private(page + i, 0);
+               xas_next(&xas);
         }
         ClearPageSwapCache(page);
         address_space->nrpages -= nr;
@@@ -217,7 -197,7 +197,7 @@@ int add_to_swap(struct page *page
                 return 0;
   
         /*
-        * Radix-tree node allocations from PF_MEMALLOC contexts could
+        * XArray node allocations from PF_MEMALLOC contexts could
          * completely exhaust the page allocator. __GFP_NOMEMALLOC
          * stops emergency reserves from being allocated.
          *
@@@ -229,7 -209,6 +209,6 @@@
          */
         err = add_to_swap_cache(page, entry,
                         __GFP_HIGH|__GFP_NOMEMALLOC|__GFP_NOWARN);
-       /* -ENOMEM radix-tree allocation failure */
         if (err)
                 /*
                  * add_to_swap_cache() doesn't return -EEXIST, so we can safely
@@@ -263,14 -242,11 +242,11 @@@ fail
    */
   void delete_from_swap_cache(struct page *page)
   {
-       swp_entry_t entry;
-       struct address_space *address_space;
+       swp_entry_t entry = { .val = page_private(page) };
+       struct address_space *address_space = swap_address_space(entry);
   
-       entry.val = page_private(page);
- 
-       address_space = swap_address_space(entry);
         xa_lock_irq(&address_space->i_pages);
-       __delete_from_swap_cache(page);
+       __delete_from_swap_cache(page, entry);
         xa_unlock_irq(&address_space->i_pages);
   
         put_swap_page(page, entry);
@@@ -413,19 -389,11 +389,11 @@@ struct page *__read_swap_cache_async(sw
                                 break;          /* Out of memory */
                 }
   
-               /*
-                * call radix_tree_preload() while we can wait.
-                */
-               err = radix_tree_maybe_preload(gfp_mask & GFP_KERNEL);
-               if (err)
-                       break;
- 
                 /*
                  * Swap entry may have been freed since our caller observed it.
                  */
                 err = swapcache_prepare(entry);
                 if (err == -EEXIST) {
-                       radix_tree_preload_end();
                         /*
                          * We might race against get_swap_page() and stumble
                          * across a SWAP_HAS_CACHE swap_map entry whose page
@@@ -433,27 -401,19 +401,20 @@@
                          */
                         cond_resched();
                         continue;
-               }
-               if (err) {              /* swp entry is obsolete ? */
-                       radix_tree_preload_end();
+               } else if (err)         /* swp entry is obsolete ? */
                         break;
-               }
   
-               /* May fail (-ENOMEM) if radix-tree node allocation failed. */
+               /* May fail (-ENOMEM) if XArray node allocation failed. */
                 __SetPageLocked(new_page);
                 __SetPageSwapBacked(new_page);
-               err = __add_to_swap_cache(new_page, entry);
+               err = add_to_swap_cache(new_page, entry, gfp_mask & GFP_KERNEL);
                 if (likely(!err)) {
-                       radix_tree_preload_end();
-                       /*
-                        * Initiate read into locked page and return.
-                        */
+                       /* Initiate read into locked page */
+ +                      SetPageWorkingset(new_page);
                         lru_cache_add_anon(new_page);
                         *new_page_allocated = true;
                         return new_page;
                 }
-               radix_tree_preload_end();
                 __ClearPageLocked(new_page);
                 /*
                  * add_to_swap_cache() doesn't return -EEXIST, so we can safely
@@@ -626,7 -586,7 +587,7 @@@ int init_swap_address_space(unsigned in
                 return -ENOMEM;
         for (i = 0; i < nr; i++) {
                 space = spaces + i;
-               INIT_RADIX_TREE(&space->i_pages, GFP_ATOMIC|__GFP_NOWARN);
+               xa_init_flags(&space->i_pages, XA_FLAGS_LOCK_IRQ);
                 atomic_set(&space->i_mmap_writable, 0);
                 space->a_ops = &swap_aops;
                 /* swap cache doesn't use writeback related tags */
diff --combined mm/vmscan.c

index 28c9ae5633b9899be8b4f2dd05bb9070238007e4,f9cc86e918121f769573d56369cd51b616e5f120..62ac0c488624fd8fd3d2306b04951466cca1a0df
--- 1/mm/vmscan.c
--- 2/mm/vmscan.c
+++ b/mm/vmscan.c
@@@ -49,7 -49,6 +49,7 @@@
   #include <linux/prefetch.h>
   #include <linux/printk.h>
   #include <linux/dax.h>
+ +#include <linux/psi.h>
   
   #include <asm/tlbflush.h>
   #include <asm/div64.h>
@@@ -474,18 -473,9 +474,18 @@@ static unsigned long do_shrink_slab(str
         nr = atomic_long_xchg(&shrinker->nr_deferred[nid], 0);
   
         total_scan = nr;
- -      delta = freeable >> priority;
- -      delta *= 4;
- -      do_div(delta, shrinker->seeks);
+ +      if (shrinker->seeks) {
+ +              delta = freeable >> priority;
+ +              delta *= 4;
+ +              do_div(delta, shrinker->seeks);
+ +      } else {
+ +              /*
+ +               * These objects don't require any IO to create. Trim
+ +               * them aggressively under memory pressure to keep
+ +               * them from causing refetches in the IO caches.
+ +               */
+ +              delta = freeable / 2;
+ +      }
   
         /*
          * Make sure we apply some minimal pressure on default priority
@@@ -590,8 -580,8 +590,8 @@@ static unsigned long shrink_slab_memcg(
                         struct mem_cgroup *memcg, int priority)
   {
         struct memcg_shrinker_map *map;
- -      unsigned long freed = 0;
- -      int ret, i;
+ +      unsigned long ret, freed = 0;
+ +      int i;
   
         if (!memcg_kmem_enabled() || !mem_cgroup_online(memcg))
                 return 0;
@@@ -687,8 -677,9 +687,8 @@@ static unsigned long shrink_slab(gfp_t 
                                  struct mem_cgroup *memcg,
                                  int priority)
   {
+ +      unsigned long ret, freed = 0;
         struct shrinker *shrinker;
- -      unsigned long freed = 0;
- -      int ret;
   
         if (!mem_cgroup_is_root(memcg))
                 return shrink_slab_memcg(gfp_mask, nid, memcg, priority);
@@@ -751,12 -742,12 +751,12 @@@ static inline int is_page_cache_freeabl
   {
         /*
          * A freeable page cache page is referenced only by the caller
-        * that isolated the page, the page cache radix tree and
-        * optional buffer heads at page->private.
+        * that isolated the page, the page cache and optional buffer
+        * heads at page->private.
          */
-       int radix_pins = PageTransHuge(page) && PageSwapCache(page) ?
+       int page_cache_pins = PageTransHuge(page) && PageSwapCache(page) ?
                 HPAGE_PMD_NR : 1;
-       return page_count(page) - page_has_private(page) == 1 + radix_pins;
+       return page_count(page) - page_has_private(page) == 1 + page_cache_pins;
   }
   
   static int may_write_to_inode(struct inode *inode, struct scan_control *sc)
@@@ -932,7 -923,7 +932,7 @@@ static int __remove_mapping(struct addr
         if (PageSwapCache(page)) {
                 swp_entry_t swap = { .val = page_private(page) };
                 mem_cgroup_swapout(page, swap);
-               __delete_from_swap_cache(page);
+               __delete_from_swap_cache(page, swap);
                 xa_unlock_irqrestore(&mapping->i_pages, flags);
                 put_swap_page(page, swap);
         } else {
@@@ -2155,7 -2146,6 +2155,7 @@@ static void shrink_active_list(unsigne
                 }
   
                 ClearPageActive(page);  /* we are de-activating */
+ +              SetPageWorkingset(page);
                 list_add(&page->lru, &l_inactive);
         }
   
@@@ -2467,11 -2457,9 +2467,11 @@@ out
                         /*
                          * Scan types proportional to swappiness and
                          * their relative recent reclaim efficiency.
+ +                       * Make sure we don't miss the last page
+ +                       * because of a round-off error.
                          */
- -                      scan = div64_u64(scan * fraction[file],
- -                                       denominator);
+ +                      scan = DIV64_U64_ROUND_UP(scan * fraction[file],
+ +                                                denominator);
                         break;
                 case SCAN_FILE:
                 case SCAN_ANON:
@@@ -3315,7 -3303,6 +3315,7 @@@ unsigned long try_to_free_mem_cgroup_pa
   {
         struct zonelist *zonelist;
         unsigned long nr_reclaimed;
+ +      unsigned long pflags;
         int nid;
         unsigned int noreclaim_flag;
         struct scan_control sc = {
@@@ -3344,13 -3331,9 +3344,13 @@@
                                             sc.gfp_mask,
                                             sc.reclaim_idx);
   
+ +      psi_memstall_enter(&pflags);
         noreclaim_flag = memalloc_noreclaim_save();
+ +
         nr_reclaimed = do_try_to_free_pages(zonelist, &sc);
+ +
         memalloc_noreclaim_restore(noreclaim_flag);
+ +      psi_memstall_leave(&pflags);
   
         trace_mm_vmscan_memcg_reclaim_end(nr_reclaimed);
   
@@@ -3515,7 -3498,6 +3515,7 @@@ static int balance_pgdat(pg_data_t *pgd
         int i;
         unsigned long nr_soft_reclaimed;
         unsigned long nr_soft_scanned;
+ +      unsigned long pflags;
         struct zone *zone;
         struct scan_control sc = {
                 .gfp_mask = GFP_KERNEL,
@@@ -3526,7 -3508,6 +3526,7 @@@
                 .may_swap = 1,
         };
   
+ +      psi_memstall_enter(&pflags);
         __fs_reclaim_acquire();
   
         count_vm_event(PAGEOUTRUN);
@@@ -3628,7 -3609,6 +3628,7 @@@
   out:
         snapshot_refaults(NULL, pgdat);
         __fs_reclaim_release();
+ +      psi_memstall_leave(&pflags);
         /*
          * Return the order kswapd stopped reclaiming at as
          * prepare_kswapd_sleep() takes it into account. If another caller
diff --combined mm/workingset.c

index cbc13d4dfa795d66931c4300fb36fa770e4827d1,5cfb29ec3fd92b4621abe7426d7e695d68279cbb..d46f8c92aa2fe736a1248ae30e0fb73a17b58a3e
--- 1/mm/workingset.c
--- 2/mm/workingset.c
+++ b/mm/workingset.c
@@@ -121,7 -121,7 +121,7 @@@
    * the only thing eating into inactive list space is active pages.
    *
    *
- - *            Activating refaulting pages
+ + *            Refaulting inactive pages
    *
    * All that is known about the active list is that the pages have been
    * accessed more than once in the past.  This means that at any given
@@@ -134,10 -134,6 +134,10 @@@
    * used less frequently than the refaulting page - or even not used at
    * all anymore.
    *
+ + * That means if inactive cache is refaulting with a suitable refault
+ + * distance, we assume the cache workingset is transitioning and put
+ + * pressure on the current active list.
+ + *
    * If this is wrong and demotion kicks in, the pages which are truly
    * used more frequently will be reactivated while the less frequently
    * used once will be evicted from memory.
@@@ -145,14 -141,6 +145,14 @@@
    * But if this is right, the stale pages will be pushed out of memory
    * and the used pages get to stay in cache.
    *
+ + *            Refaulting active pages
+ + *
+ + * If on the other hand the refaulting pages have recently been
+ + * deactivated, it means that the active list is no longer protecting
+ + * actively used cache from reclaim. The cache is NOT transitioning to
+ + * a different workingset; the existing workingset is thrashing in the
+ + * space allocated to the page cache.
+ + *
    *
    *            Implementation
    *
@@@ -160,20 -148,21 +160,20 @@@
    * and activations is maintained (node->inactive_age).
    *
    * On eviction, a snapshot of this counter (along with some bits to
-  * identify the node) is stored in the now empty page cache radix tree
+  * identify the node) is stored in the now empty page cache
    * slot of the evicted page.  This is called a shadow entry.
    *
    * On cache misses for which there are shadow entries, an eligible
    * refault distance will immediately activate the refaulting page.
    */
   
- #define EVICTION_SHIFT        (RADIX_TREE_EXCEPTIONAL_ENTRY + \
+ #define EVICTION_SHIFT        ((BITS_PER_LONG - BITS_PER_XA_VALUE) +  \
- -                       NODES_SHIFT +                          \
- -                       MEM_CGROUP_ID_SHIFT)
+ +                       1 + NODES_SHIFT + MEM_CGROUP_ID_SHIFT)
   #define EVICTION_MASK (~0UL >> EVICTION_SHIFT)
   
   /*
    * Eviction timestamps need to be able to cover the full range of
-  * actionable refaults. However, bits are tight in the radix tree
+  * actionable refaults. However, bits are tight in the xarray
    * entry, and after storing the identifier for the lruvec there might
    * not be enough left to represent every single actionable refault. In
    * that case, we have to sacrifice granularity for distance, and group
@@@ -181,28 -170,22 +181,27 @@@
    */
   static unsigned int bucket_order __read_mostly;
   
- -static void *pack_shadow(int memcgid, pg_data_t *pgdat, unsigned long eviction)
+ +static void *pack_shadow(int memcgid, pg_data_t *pgdat, unsigned long eviction,
+ +                       bool workingset)
   {
         eviction >>= bucket_order;
+       eviction &= EVICTION_MASK;
         eviction = (eviction << MEM_CGROUP_ID_SHIFT) | memcgid;
         eviction = (eviction << NODES_SHIFT) | pgdat->node_id;
-       eviction = (eviction << RADIX_TREE_EXCEPTIONAL_SHIFT);
+ +      eviction = (eviction << 1) | workingset;
   
-       return (void *)(eviction | RADIX_TREE_EXCEPTIONAL_ENTRY);
+       return xa_mk_value(eviction);
   }
   
   static void unpack_shadow(void *shadow, int *memcgidp, pg_data_t **pgdat,
- -                        unsigned long *evictionp)
+ +                        unsigned long *evictionp, bool *workingsetp)
   {
-       unsigned long entry = (unsigned long)shadow;
+       unsigned long entry = xa_to_value(shadow);
         int memcgid, nid;
+ +      bool workingset;
   
-       entry >>= RADIX_TREE_EXCEPTIONAL_SHIFT;
+ +      workingset = entry & 1;
+ +      entry >>= 1;
         nid = entry & ((1UL << NODES_SHIFT) - 1);
         entry >>= NODES_SHIFT;
         memcgid = entry & ((1UL << MEM_CGROUP_ID_SHIFT) - 1);
@@@ -211,7 -194,6 +210,7 @@@
         *memcgidp = memcgid;
         *pgdat = NODE_DATA(nid);
         *evictionp = entry << bucket_order;
+ +      *workingsetp = workingset;
   }
   
   /**
@@@ -224,8 -206,8 +223,8 @@@
    */
   void *workingset_eviction(struct address_space *mapping, struct page *page)
   {
- -      struct mem_cgroup *memcg = page_memcg(page);
         struct pglist_data *pgdat = page_pgdat(page);
+ +      struct mem_cgroup *memcg = page_memcg(page);
         int memcgid = mem_cgroup_id(memcg);
         unsigned long eviction;
         struct lruvec *lruvec;
@@@ -237,30 -219,30 +236,30 @@@
   
         lruvec = mem_cgroup_lruvec(pgdat, memcg);
         eviction = atomic_long_inc_return(&lruvec->inactive_age);
- -      return pack_shadow(memcgid, pgdat, eviction);
+ +      return pack_shadow(memcgid, pgdat, eviction, PageWorkingset(page));
   }
   
   /**
    * workingset_refault - evaluate the refault of a previously evicted page
+ + * @page: the freshly allocated replacement page
    * @shadow: shadow entry of the evicted page
    *
    * Calculates and evaluates the refault distance of the previously
    * evicted page in the context of the node it was allocated in.
- - *
- - * Returns %true if the page should be activated, %false otherwise.
    */
- -bool workingset_refault(void *shadow)
+ +void workingset_refault(struct page *page, void *shadow)
   {
         unsigned long refault_distance;
+ +      struct pglist_data *pgdat;
         unsigned long active_file;
         struct mem_cgroup *memcg;
         unsigned long eviction;
         struct lruvec *lruvec;
         unsigned long refault;
- -      struct pglist_data *pgdat;
+ +      bool workingset;
         int memcgid;
   
- -      unpack_shadow(shadow, &memcgid, &pgdat, &eviction);
+ +      unpack_shadow(shadow, &memcgid, &pgdat, &eviction, &workingset);
   
         rcu_read_lock();
         /*
@@@ -280,51 -262,41 +279,51 @@@
          * configurations instead.
          */
         memcg = mem_cgroup_from_id(memcgid);
- -      if (!mem_cgroup_disabled() && !memcg) {
- -              rcu_read_unlock();
- -              return false;
- -      }
+ +      if (!mem_cgroup_disabled() && !memcg)
+ +              goto out;
         lruvec = mem_cgroup_lruvec(pgdat, memcg);
         refault = atomic_long_read(&lruvec->inactive_age);
         active_file = lruvec_lru_size(lruvec, LRU_ACTIVE_FILE, MAX_NR_ZONES);
   
         /*
- -       * The unsigned subtraction here gives an accurate distance
- -       * across inactive_age overflows in most cases.
+ +       * Calculate the refault distance
          *
- -       * There is a special case: usually, shadow entries have a
- -       * short lifetime and are either refaulted or reclaimed along
- -       * with the inode before they get too old.  But it is not
- -       * impossible for the inactive_age to lap a shadow entry in
- -       * the field, which can then can result in a false small
- -       * refault distance, leading to a false activation should this
- -       * old entry actually refault again.  However, earlier kernels
- -       * used to deactivate unconditionally with *every* reclaim
- -       * invocation for the longest time, so the occasional
- -       * inappropriate activation leading to pressure on the active
- -       * list is not a problem.
+ +       * The unsigned subtraction here gives an accurate distance
+ +       * across inactive_age overflows in most cases. There is a
+ +       * special case: usually, shadow entries have a short lifetime
+ +       * and are either refaulted or reclaimed along with the inode
+ +       * before they get too old.  But it is not impossible for the
+ +       * inactive_age to lap a shadow entry in the field, which can
+ +       * then result in a false small refault distance, leading to a
+ +       * false activation should this old entry actually refault
+ +       * again.  However, earlier kernels used to deactivate
+ +       * unconditionally with *every* reclaim invocation for the
+ +       * longest time, so the occasional inappropriate activation
+ +       * leading to pressure on the active list is not a problem.
          */
         refault_distance = (refault - eviction) & EVICTION_MASK;
   
         inc_lruvec_state(lruvec, WORKINGSET_REFAULT);
   
- -      if (refault_distance <= active_file) {
- -              inc_lruvec_state(lruvec, WORKINGSET_ACTIVATE);
- -              rcu_read_unlock();
- -              return true;
+ +      /*
+ +       * Compare the distance to the existing workingset size. We
+ +       * don't act on pages that couldn't stay resident even if all
+ +       * the memory was available to the page cache.
+ +       */
+ +      if (refault_distance > active_file)
+ +              goto out;
+ +
+ +      SetPageActive(page);
+ +      atomic_long_inc(&lruvec->inactive_age);
+ +      inc_lruvec_state(lruvec, WORKINGSET_ACTIVATE);
+ +
+ +      /* Page was active prior to eviction */
+ +      if (workingset) {
+ +              SetPageWorkingset(page);
+ +              inc_lruvec_state(lruvec, WORKINGSET_RESTORE);
         }
+ +out:
         rcu_read_unlock();
- -      return false;
   }
   
   /**
@@@ -367,7 -339,7 +366,7 @@@ out
   
   static struct list_lru shadow_nodes;
   
- void workingset_update_node(struct radix_tree_node *node)
+ void workingset_update_node(struct xa_node *node)
   {
         /*
          * Track non-empty nodes that contain only shadow entries;
@@@ -377,20 -349,12 +376,20 @@@
          * already where they should be. The list_empty() test is safe
          * as node->private_list is protected by the i_pages lock.
          */
-       if (node->count && node->count == node->exceptional) {
+ +      VM_WARN_ON_ONCE(!irqs_disabled());  /* For __inc_lruvec_page_state */
+ +
- -              if (list_empty(&node->private_list))
+       if (node->count && node->count == node->nr_values) {
+ +              if (list_empty(&node->private_list)) {
                         list_lru_add(&shadow_nodes, &node->private_list);
+ +                      __inc_lruvec_page_state(virt_to_page(node),
+ +                                              WORKINGSET_NODES);
+ +              }
         } else {
- -              if (!list_empty(&node->private_list))
+ +              if (!list_empty(&node->private_list)) {
                         list_lru_del(&shadow_nodes, &node->private_list);
+ +                      __dec_lruvec_page_state(virt_to_page(node),
+ +                                              WORKINGSET_NODES);
+ +              }
         }
   }
   
@@@ -399,12 -363,12 +398,12 @@@ static unsigned long count_shadow_nodes
   {
         unsigned long max_nodes;
         unsigned long nodes;
- -      unsigned long cache;
+ +      unsigned long pages;
   
         nodes = list_lru_shrink_count(&shadow_nodes, sc);
   
         /*
-        * Approximate a reasonable limit for the radix tree nodes
+        * Approximate a reasonable limit for the nodes
          * containing shadow entries. We don't need to keep more
          * shadow entries than possible pages on the active list,
          * since refault distances bigger than that are dismissed.
@@@ -419,26 -383,20 +418,26 @@@
          * worst-case density of 1/8th. Below that, not all eligible
          * refaults can be detected anymore.
          *
-        * On 64-bit with 7 radix_tree_nodes per page and 64 slots
+        * On 64-bit with 7 xa_nodes per page and 64 slots
          * each, this will reclaim shadow entries when they consume
          * ~1.8% of available memory:
          *
-        * PAGE_SIZE / radix_tree_nodes / node_entries * 8 / PAGE_SIZE
+        * PAGE_SIZE / xa_nodes / node_entries * 8 / PAGE_SIZE
          */
+ +#ifdef CONFIG_MEMCG
         if (sc->memcg) {
- -              cache = mem_cgroup_node_nr_lru_pages(sc->memcg, sc->nid,
- -                                                   LRU_ALL_FILE);
- -      } else {
- -              cache = node_page_state(NODE_DATA(sc->nid), NR_ACTIVE_FILE) +
- -                      node_page_state(NODE_DATA(sc->nid), NR_INACTIVE_FILE);
- -      }
- -      max_nodes = cache >> (XA_CHUNK_SHIFT - 3);
+ +              struct lruvec *lruvec;
+ +
+ +              pages = mem_cgroup_node_nr_lru_pages(sc->memcg, sc->nid,
+ +                                                   LRU_ALL);
+ +              lruvec = mem_cgroup_lruvec(NODE_DATA(sc->nid), sc->memcg);
+ +              pages += lruvec_page_state(lruvec, NR_SLAB_RECLAIMABLE);
+ +              pages += lruvec_page_state(lruvec, NR_SLAB_UNRECLAIMABLE);
+ +      } else
+ +#endif
+ +              pages = node_present_pages(sc->nid);
+ +
-       max_nodes = pages >> (RADIX_TREE_MAP_SHIFT - 3);
++      max_nodes = pages >> (XA_CHUNK_SHIFT - 3);
   
         if (!nodes)
                 return SHRINK_EMPTY;
@@@ -451,11 -409,11 +450,11 @@@
   static enum lru_status shadow_lru_isolate(struct list_head *item,
                                           struct list_lru_one *lru,
                                           spinlock_t *lru_lock,
-                                         void *arg)
+                                         void *arg) __must_hold(lru_lock)
   {
+       struct xa_node *node = container_of(item, struct xa_node, private_list);
+       XA_STATE(xas, node->array, 0);
         struct address_space *mapping;
-       struct radix_tree_node *node;
-       unsigned int i;
         int ret;
   
         /*
@@@ -463,15 -421,14 +462,14 @@@
          * the shadow node LRU under the i_pages lock and the
          * lru_lock.  Because the page cache tree is emptied before
          * the inode can be destroyed, holding the lru_lock pins any
-        * address_space that has radix tree nodes on the LRU.
+        * address_space that has nodes on the LRU.
          *
          * We can then safely transition to the i_pages lock to
          * pin only the address_space of the particular node we want
          * to reclaim, take the node off-LRU, and drop the lru_lock.
          */
   
-       node = container_of(item, struct radix_tree_node, private_list);
-       mapping = container_of(node->root, struct address_space, i_pages);
+       mapping = container_of(node->array, struct address_space, i_pages);
   
         /* Coming from the list, invert the lock order */
         if (!xa_trylock(&mapping->i_pages)) {
@@@ -481,8 -438,6 +479,8 @@@
         }
   
         list_lru_isolate(lru, item);
+ +      __dec_lruvec_page_state(virt_to_page(node), WORKINGSET_NODES);
+ +
         spin_unlock(lru_lock);
   
         /*
@@@ -490,29 -445,21 +488,21 @@@
          * no pages, so we expect to be able to remove them all and
          * delete and free the empty node afterwards.
          */
-       if (WARN_ON_ONCE(!node->exceptional))
+       if (WARN_ON_ONCE(!node->nr_values))
                 goto out_invalid;
-       if (WARN_ON_ONCE(node->count != node->exceptional))
-               goto out_invalid;
-       for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) {
-               if (node->slots[i]) {
-                       if (WARN_ON_ONCE(!radix_tree_exceptional_entry(node->slots[i])))
-                               goto out_invalid;
-                       if (WARN_ON_ONCE(!node->exceptional))
-                               goto out_invalid;
-                       if (WARN_ON_ONCE(!mapping->nrexceptional))
-                               goto out_invalid;
-                       node->slots[i] = NULL;
-                       node->exceptional--;
-                       node->count--;
-                       mapping->nrexceptional--;
-               }
-       }
-       if (WARN_ON_ONCE(node->exceptional))
+       if (WARN_ON_ONCE(node->count != node->nr_values))
                 goto out_invalid;
- -      inc_lruvec_page_state(virt_to_page(node), WORKINGSET_NODERECLAIM);
+       mapping->nrexceptional -= node->nr_values;
+       xas.xa_node = xa_parent_locked(&mapping->i_pages, node);
+       xas.xa_offset = node->offset;
+       xas.xa_shift = node->shift + XA_CHUNK_SHIFT;
+       xas_set_update(&xas, workingset_update_node);
+       /*
+        * We could store a shadow entry here which was the minimum of the
+        * shadow entries we were tracking ...
+        */
+       xas_store(&xas, NULL);
-       __radix_tree_delete_node(&mapping->i_pages, node,
-                                workingset_lookup_update(mapping));
+ +      __inc_lruvec_page_state(virt_to_page(node), WORKINGSET_NODERECLAIM);
   
   out_invalid:
         xa_unlock_irq(&mapping->i_pages);
@@@ -534,7 -481,7 +524,7 @@@ static unsigned long scan_shadow_nodes(
   static struct shrinker workingset_shadow_shrinker = {
         .count_objects = count_shadow_nodes,
         .scan_objects = scan_shadow_nodes,
- -      .seeks = DEFAULT_SEEKS,
+ +      .seeks = 0, /* ->count reports only fully expendable nodes */
         .flags = SHRINKER_NUMA_AWARE | SHRINKER_MEMCG_AWARE,
   };
author	Linus Torvalds <[email protected]>
	Sun, 28 Oct 2018 18:35:40 +0000 (11:35 -0700)
committer	Linus Torvalds <[email protected]>
	Sun, 28 Oct 2018 18:35:40 +0000 (11:35 -0700)
		1	2
Documentation/core-api/index.rst	patch \|	diff1 \|	diff2 \|	blob \| history
MAINTAINERS	patch \|	diff1 \|	diff2 \|	blob \| history
arch/parisc/kernel/syscall.S	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/include/asm/book3s/64/pgtable.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/include/asm/nohash/64/pgtable.h	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/pci/hotplug/acpiphp.h	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/pci/hotplug/acpiphp_core.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/btrfs/compression.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/btrfs/extent_io.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/buffer.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext4/inode.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/f2fs/data.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/f2fs/dir.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/f2fs/f2fs.h	patch \|	diff1 \|	diff2 \|	blob \| history
fs/f2fs/inline.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/f2fs/node.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/proc/task_mmu.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/fs.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/swap.h	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/memremap.c	patch \|	diff1 \|	diff2 \|	blob \| history
lib/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history
lib/Kconfig.debug	patch \|	diff1 \|	diff2 \|	blob \| history
lib/Makefile	patch \|	diff1 \|	diff2 \|	blob \| history
mm/filemap.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/huge_memory.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/madvise.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/memcontrol.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/migrate.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/page-writeback.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/swap.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/swap_state.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/vmscan.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/workingset.c	patch \|	diff1 \|	diff2 \|	blob \| history