Merge branch 'for-4.15/block' of git://git.kernel.dk/linux-block

author Linus Torvalds <[email protected]>

Tue, 14 Nov 2017 23:32:19 +0000 (15:32 -0800)

committer Linus Torvalds <[email protected]>

Tue, 14 Nov 2017 23:32:19 +0000 (15:32 -0800)
author Linus Torvalds <[email protected]>
Tue, 14 Nov 2017 23:32:19 +0000 (15:32 -0800)
committer Linus Torvalds <[email protected]>
Tue, 14 Nov 2017 23:32:19 +0000 (15:32 -0800)
diff --combined MAINTAINERS

index e372994747b7ea2340344ea46ca013cc7f3e5562,6e9343af6bbfa5088d555c74e69fa9dd84846133..ba3d8c197d92b8c4973d9a7952a419f03a488558
--- 1/MAINTAINERS
--- 2/MAINTAINERS
+++ b/MAINTAINERS
@@@ -873,7 -873,7 +873,7 @@@ F: drivers/android
   F:    drivers/staging/android/
   
   ANDROID GOLDFISH RTC DRIVER
- -M:    Miodrag Dinic <miodrag.dinic@imgtec.com>
+ +M:    Miodrag Dinic <miodrag.dinic@mips.com>
   S:    Supported
   F:    Documentation/devicetree/bindings/rtc/google,goldfish-rtc.txt
   F:    drivers/rtc/rtc-goldfish.c
@@@ -2562,10 -2562,12 +2562,12 @@@ S:   Maintaine
   F:    drivers/net/hamradio/baycom*
   
   BCACHE (BLOCK LAYER CACHE)
+ M:    Michael Lyle <[email protected]>
   M:    Kent Overstreet <[email protected]>
   L:    [email protected]
   W:    http://bcache.evilpiepirate.org
- S:    Orphan
+ C:    irc://irc.oftc.net/bcache
+ S:    Maintained
   F:    drivers/md/bcache/
   
   BDISP ST MEDIA DRIVER
@@@ -2896,13 -2898,6 +2898,13 @@@ S:    Supporte
   F:    drivers/gpio/gpio-brcmstb.c
   F:    Documentation/devicetree/bindings/gpio/brcm,brcmstb-gpio.txt
   
+ +BROADCOM BRCMSTB USB2 and USB3 PHY DRIVER
+ +M:    Al Cooper <[email protected]>
+ +L:    [email protected]
+ +L:    [email protected]
+ +S:    Maintained
+ +F:    drivers/phy/broadcom/phy-brcm-usb*
+ +
   BROADCOM GENET ETHERNET DRIVER
   M:    Florian Fainelli <[email protected]>
   L:    [email protected]
@@@ -3451,8 -3446,7 +3453,8 @@@ M:      Thomas Gleixner <[email protected]
   L:    [email protected]
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git timers/core
   S:    Supported
- -F:    drivers/clocksource
+ +F:    drivers/clocksource/
+ +F:    Documentation/devicetree/bindings/timer/
   
   CMPC ACPI DRIVER
   M:    Thadeu Lima de Souza Cascardo <[email protected]>
@@@ -3644,8 -3638,6 +3646,8 @@@ F:      drivers/cpufreq/arm_big_little_dt.
   
   CPU POWER MONITORING SUBSYSTEM
   M:    Thomas Renninger <[email protected]>
+ +M:    Shuah Khan <[email protected]>
+ +M:    Shuah Khan <[email protected]>
   L:    [email protected]
   S:    Maintained
   F:    tools/power/cpupower/
@@@ -4244,7 -4236,7 +4246,7 @@@ S:      Maintaine
   F:    drivers/dma/
   F:    include/linux/dmaengine.h
   F:    Documentation/devicetree/bindings/dma/
- -F:    Documentation/dmaengine/
+ +F:    Documentation/driver-api/dmaengine/
   T:    git git://git.infradead.org/users/vkoul/slave-dma.git
   
   DMA MAPPING HELPERS
@@@ -4916,19 -4908,13 +4918,19 @@@ L:   [email protected]
   S:    Maintained
   F:    drivers/edac/highbank*
   
- -EDAC-CAVIUM
+ +EDAC-CAVIUM OCTEON
   M:    Ralf Baechle <[email protected]>
   M:    David Daney <[email protected]>
   L:    [email protected]
   L:    [email protected]
   S:    Supported
   F:    drivers/edac/octeon_edac*
+ +
+ +EDAC-CAVIUM THUNDERX
+ +M:    David Daney <[email protected]>
+ +M:    Jan Glauber <[email protected]>
+ +L:    [email protected]
+ +S:    Supported
   F:    drivers/edac/thunderx_edac*
   
   EDAC-CORE
@@@ -5229,7 -5215,8 +5231,7 @@@ F:      fs/ext4
   
   Extended Verification Module (EVM)
   M:    Mimi Zohar <[email protected]>
- -L:    [email protected]
- -L:    [email protected]
+ +L:    [email protected]
   S:    Supported
   F:    security/integrity/evm/
   
@@@ -5274,8 -5261,7 +5276,8 @@@ S:      Maintaine
   F:    drivers/iommu/exynos-iommu.c
   
   EZchip NPS platform support
- -M:    Noam Camus <[email protected]>
+ +M:    Elad Kanfi <[email protected]>
+ +M:    Vineet Gupta <[email protected]>
   S:    Supported
   F:    arch/arc/plat-eznps
   F:    arch/arc/boot/dts/eznps.dts
@@@ -5361,7 -5347,9 +5363,7 @@@ M:      "J. Bruce Fields" <bfields@fieldses.
   L:    [email protected]
   S:    Maintained
   F:    include/linux/fcntl.h
- -F:    include/linux/fs.h
   F:    include/uapi/linux/fcntl.h
- -F:    include/uapi/linux/fs.h
   F:    fs/fcntl.c
   F:    fs/locks.c
   
@@@ -5370,8 -5358,6 +5372,8 @@@ M:      Alexander Viro <[email protected]
   L:    [email protected]
   S:    Maintained
   F:    fs/*
+ +F:    include/linux/fs.h
+ +F:    include/uapi/linux/fs.h
   
   FINTEK F75375S HARDWARE MONITOR AND FAN CONTROLLER DRIVER
   M:    Riku Voipio <[email protected]>
@@@ -5484,7 -5470,7 +5486,7 @@@ F:      include/uapi/linux/fb.
   
   FREESCALE CAAM (Cryptographic Acceleration and Assurance Module) DRIVER
   M:    Horia Geantă <[email protected]>
- -M:    Dan Douglass <dan.douglass@nxp.com>
+ +M:    Aymen Sghaier <aymen.sghaier@nxp.com>
   L:    [email protected]
   S:    Maintained
   F:    drivers/crypto/caam/
@@@ -5664,7 -5650,6 +5666,7 @@@ T:      git git://git.kernel.org/pub/scm/lin
   S:    Supported
   F:    fs/crypto/
   F:    include/linux/fscrypt*.h
+ +F:    Documentation/filesystems/fscrypt.rst
   
   FUJITSU FR-V (FRV) PORT
   S:    Orphan
@@@ -6687,7 -6672,7 +6689,7 @@@ F:      include/net/ieee802154_netdev.
   F:    Documentation/networking/ieee802154.txt
   
   IFE PROTOCOL
- -M:    Yotam Gigi <yotamg@mellanox.com>
+ +M:    Yotam Gigi <yotam.gi@gmail.com>
   M:    Jamal Hadi Salim <[email protected]>
   F:    net/ife
   F:    include/net/ife.h
@@@ -6749,13 -6734,13 +6751,13 @@@ S:   Maintaine
   F:    drivers/usb/atm/ueagle-atm.c
   
   IMGTEC ASCII LCD DRIVER
- -M:    Paul Burton <paul.burton@imgtec.com>
+ +M:    Paul Burton <paul.burton@mips.com>
   S:    Maintained
   F:    Documentation/devicetree/bindings/auxdisplay/img-ascii-lcd.txt
   F:    drivers/auxdisplay/img-ascii-lcd.c
   
   IMGTEC IR DECODER DRIVER
- -M:    James Hogan <j[email protected]>
+ +M:    James Hogan <j[email protected]>
   S:    Maintained
   F:    drivers/media/rc/img-ir/
   
@@@ -6857,7 -6842,9 +6859,7 @@@ L:      [email protected]
   INTEGRITY MEASUREMENT ARCHITECTURE (IMA)
   M:    Mimi Zohar <[email protected]>
   M:    Dmitry Kasatkin <[email protected]>
- -L:    [email protected]
- -L:    [email protected]
- -L:    [email protected]
+ +L:    [email protected]
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/zohar/linux-integrity.git
   S:    Supported
   F:    security/integrity/ima/
@@@ -7577,7 -7564,7 +7579,7 @@@ F:      arch/arm64/include/asm/kvm
   F:    arch/arm64/kvm/
   
   KERNEL VIRTUAL MACHINE FOR MIPS (KVM/mips)
- -M:    James Hogan <j[email protected]>
+ +M:    James Hogan <j[email protected]>
   L:    [email protected]
   S:    Supported
   F:    arch/mips/include/uapi/asm/kvm*
@@@ -7585,7 -7572,7 +7587,7 @@@ F:      arch/mips/include/asm/kvm
   F:    arch/mips/kvm/
   
   KERNEL VIRTUAL MACHINE FOR POWERPC (KVM/powerpc)
- -M:    Alexander Graf <[email protected]>
+ +M:    Paul Mackerras <[email protected]>
   L:    [email protected]
   W:    http://www.linux-kvm.org/
   T:    git git://github.com/agraf/linux-2.6.git
@@@ -7640,7 -7627,8 +7642,7 @@@ F:      kernel/kexec
   
   KEYS-ENCRYPTED
   M:    Mimi Zohar <[email protected]>
- -M:    David Safford <[email protected]>
- -L:    [email protected]
+ +L:    [email protected]
   L:    [email protected]
   S:    Supported
   F:    Documentation/security/keys/trusted-encrypted.rst
@@@ -7648,8 -7636,9 +7650,8 @@@ F:      include/keys/encrypted-type.
   F:    security/keys/encrypted-keys/
   
   KEYS-TRUSTED
- -M:    David Safford <[email protected]>
   M:    Mimi Zohar <[email protected]>
- -L:    linux-security-module@vger.kernel.org
+ +L:    linux-integrity@vger.kernel.org
   L:    [email protected]
   S:    Supported
   F:    Documentation/security/keys/trusted-encrypted.rst
@@@ -7757,11 -7746,6 +7759,11 @@@ S:    Maintaine
   F:    Documentation/scsi/53c700.txt
   F:    drivers/scsi/53c700*
   
+ +LEAKING_ADDRESSES
+ +M:    Tobin C. Harding <[email protected]>
+ +S:    Maintained
+ +F:    scripts/leaking_addresses.pl
+ +
   LED SUBSYSTEM
   M:    Richard Purdie <[email protected]>
   M:    Jacek Anaszewski <[email protected]>
@@@ -8282,12 -8266,6 +8284,12 @@@ L:    [email protected]
   S:    Orphan
   F:    drivers/net/wireless/marvell/libertas/
   
+ +MARVELL MACCHIATOBIN SUPPORT
+ +M:    Russell King <[email protected]>
+ +L:    [email protected]
+ +S:    Maintained
+ +F:    arch/arm64/boot/dts/marvell/armada-8040-mcbin.dts
+ +
   MARVELL MV643XX ETHERNET DRIVER
   M:    Sebastian Hesselbarth <[email protected]>
   L:    [email protected]
@@@ -8621,12 -8599,6 +8623,12 @@@ M:    Sean Wang <[email protected]
   S:    Maintained
   F:    drivers/media/rc/mtk-cir.c
   
+ +MEDIATEK PMIC LED DRIVER
+ +M:    Sean Wang <[email protected]>
+ +S:    Maintained
+ +F:    drivers/leds/leds-mt6323.c
+ +F:    Documentation/devicetree/bindings/leds/leds-mt6323.txt
+ +
   MEDIATEK ETHERNET DRIVER
   M:    Felix Fietkau <[email protected]>
   M:    John Crispin <[email protected]>
@@@ -8760,7 -8732,7 +8762,7 @@@ Q:      http://patchwork.ozlabs.org/project/
   F:    drivers/net/ethernet/mellanox/mlxsw/
   
   MELLANOX FIRMWARE FLASH LIBRARY (mlxfw)
- -M:    Yotam Gigi <[email protected]>
+ +M:    [email protected]
   L:    [email protected]
   S:    Supported
   W:    http://www.mellanox.com
@@@ -8909,7 -8881,7 +8911,7 @@@ F:      Documentation/devicetree/bindings/me
   T:    git git://linuxtv.org/media_tree.git
   
   METAG ARCHITECTURE
- -M:    James Hogan <j[email protected]>
+ +M:    James Hogan <j[email protected]>
   L:    [email protected]
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/jhogan/metag.git
   S:    Odd Fixes
@@@ -9010,7 -8982,7 +9012,7 @@@ F:      Documentation/mips
   F:    arch/mips/
   
   MIPS BOSTON DEVELOPMENT BOARD
- -M:    Paul Burton <paul.burton@imgtec.com>
+ +M:    Paul Burton <paul.burton@mips.com>
   L:    [email protected]
   S:    Maintained
   F:    Documentation/devicetree/bindings/clock/img,boston-clock.txt
@@@ -9020,7 -8992,7 +9022,7 @@@ F:      drivers/clk/imgtec/clk-boston.
   F:    include/dt-bindings/clock/boston-clock.h
   
   MIPS GENERIC PLATFORM
- -M:    Paul Burton <paul.burton@imgtec.com>
+ +M:    Paul Burton <paul.burton@mips.com>
   L:    [email protected]
   S:    Supported
   F:    arch/mips/generic/
@@@ -9036,7 -9008,7 +9038,7 @@@ F:      drivers/*/*loongson1
   F:    drivers/*/*/*loongson1*
   
   MIPS RINT INSTRUCTION EMULATION
- -M:    Aleksandar Markovic <aleksandar.markovic@imgtec.com>
+ +M:    Aleksandar Markovic <aleksandar.markovic@mips.com>
   L:    [email protected]
   S:    Supported
   F:    arch/mips/math-emu/sp_rint.c
@@@ -9230,6 -9202,7 +9232,6 @@@ F:      include/linux/isicom.
   MUSB MULTIPOINT HIGH SPEED DUAL-ROLE CONTROLLER
   M:    Bin Liu <[email protected]>
   L:    [email protected]
- -T:    git git://git.kernel.org/pub/scm/linux/kernel/git/balbi/usb.git
   S:    Maintained
   F:    drivers/usb/musb/
   
@@@ -9377,7 -9350,7 +9379,7 @@@ NETWORK BLOCK DEVICE (NBD
   M:    Josef Bacik <[email protected]>
   S:    Maintained
   L:    [email protected]
- -L:    nbd[email protected]
+ +L:    nbd@other.debian.org
   F:    Documentation/blockdev/nbd.txt
   F:    drivers/block/nbd.c
   F:    include/uapi/linux/nbd.h
@@@ -10048,11 -10021,7 +10050,11 @@@ T: git git://github.com/openrisc/linux.
   L:    [email protected]
   W:    http://openrisc.io
   S:    Maintained
+ +F:    Documentation/devicetree/bindings/openrisc/
+ +F:    Documentation/openrisc/
   F:    arch/openrisc/
+ +F:    drivers/irqchip/irq-ompic.c
+ +F:    drivers/irqchip/irq-or1k-*
   
   OPENVSWITCH
   M:    Pravin Shelar <[email protected]>
@@@ -10070,7 -10039,7 +10072,7 @@@ M:   Stephen Boyd <[email protected]
   L:    [email protected]
   S:    Maintained
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/vireshk/pm.git
- -F:    drivers/base/power/opp/
+ +F:    drivers/opp/
   F:    include/linux/pm_opp.h
   F:    Documentation/power/opp.txt
   F:    Documentation/devicetree/bindings/opp/
@@@ -10200,6 -10169,7 +10202,6 @@@ F:   Documentation/parport*.tx
   
   PARAVIRT_OPS INTERFACE
   M:    Juergen Gross <[email protected]>
- -M:    Chris Wright <[email protected]>
   M:    Alok Kataria <[email protected]>
   M:    Rusty Russell <[email protected]>
   L:    [email protected]
@@@ -10357,6 -10327,7 +10359,6 @@@ F:   drivers/pci/host/vmd.
   
   PCI DRIVER FOR MICROSEMI SWITCHTEC
   M:    Kurt Schwemmer <[email protected]>
- -M:    Stephen Bates <[email protected]>
   M:    Logan Gunthorpe <[email protected]>
   L:    [email protected]
   S:    Maintained
@@@ -10421,7 -10392,6 +10423,7 @@@ F:   drivers/pci/dwc/*keystone
   
   PCI ENDPOINT SUBSYSTEM
   M:    Kishon Vijay Abraham I <[email protected]>
+ +M:    Lorenzo Pieralisi <[email protected]>
   L:    [email protected]
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/kishon/pci-endpoint.git
   S:    Supported
@@@ -10473,15 -10443,6 +10475,15 @@@ F: include/linux/pci
   F:    arch/x86/pci/
   F:    arch/x86/kernel/quirks.c
   
+ +PCI NATIVE HOST BRIDGE AND ENDPOINT DRIVERS
+ +M:    Lorenzo Pieralisi <[email protected]>
+ +L:    [email protected]
+ +Q:    http://patchwork.ozlabs.org/project/linux-pci/list/
+ +T:    git git://git.kernel.org/pub/scm/linux/kernel/git/lpieralisi/pci.git/
+ +S:    Supported
+ +F:    drivers/pci/host/
+ +F:    drivers/pci/dwc/
+ +
   PCIE DRIVER FOR AXIS ARTPEC
   M:    Niklas Cassel <[email protected]>
   M:    Jesper Nilsson <[email protected]>
@@@ -10501,6 -10462,7 +10503,6 @@@ F:   drivers/pci/host/pci-thunder-
   
   PCIE DRIVER FOR HISILICON
   M:    Zhou Wang <[email protected]>
- -M:    Gabriele Paoloni <[email protected]>
   L:    [email protected]
   S:    Maintained
   F:    Documentation/devicetree/bindings/pci/hisilicon-pcie.txt
@@@ -10587,8 -10549,6 +10589,8 @@@ M:   Peter Zijlstra <[email protected]
   M:    Ingo Molnar <[email protected]>
   M:    Arnaldo Carvalho de Melo <[email protected]>
   R:    Alexander Shishkin <[email protected]>
+ +R:    Jiri Olsa <[email protected]>
+ +R:    Namhyung Kim <[email protected]>
   L:    [email protected]
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf/core
   S:    Supported
@@@ -10712,9 -10672,10 +10714,9 @@@ S:  Maintaine
   F:    drivers/pinctrl/spear/
   
   PISTACHIO SOC SUPPORT
- -M:    James Hartley <[email protected]>
- -M:    Ionela Voinescu <[email protected]>
+ +M:    James Hartley <[email protected]>
   L:    [email protected]
- -S:    Maintained
+ +S:    Odd Fixes
   F:    arch/mips/pistachio/
   F:    arch/mips/include/asm/mach-pistachio/
   F:    arch/mips/boot/dts/img/pistachio*
@@@ -10918,7 -10879,7 +10920,7 @@@ S:   Maintaine
   F:    drivers/block/ps3vram.c
   
   PSAMPLE PACKET SAMPLING SUPPORT:
- -M:    Yotam Gigi <yotamg@mellanox.com>
+ +M:    Yotam Gigi <yotam.gi@gmail.com>
   S:    Maintained
   F:    net/psample
   F:    include/net/psample.h
@@@ -11061,6 -11022,7 +11063,6 @@@ F:   drivers/mtd/nand/pxa3xx_nand.
   
   QAT DRIVER
   M:    Giovanni Cabiddu <[email protected]>
- -M:    Salvatore Benedetto <[email protected]>
   L:    [email protected]
   S:    Supported
   F:    drivers/crypto/qat/
@@@ -11793,7 -11755,7 +11795,7 @@@ L:   [email protected]
   L:    [email protected]
   S:    Maintained
   F:    drivers/crypto/exynos-rng.c
- -F:    Documentation/devicetree/bindings/rng/samsung,exynos-rng4.txt
+ +F:    Documentation/devicetree/bindings/crypto/samsung,exynos-rng4.txt
   
   SAMSUNG FRAMEBUFFER DRIVER
   M:    Jingoo Han <[email protected]>
@@@ -12076,16 -12038,9 +12078,15 @@@ L: [email protected]
   S:    Maintained
   F:    drivers/mmc/host/sdhci-spear.c
   
+ +SECURE DIGITAL HOST CONTROLLER INTERFACE (SDHCI) TI OMAP DRIVER
+ +M:    Kishon Vijay Abraham I <[email protected]>
+ +L:    [email protected]
+ +S:    Maintained
+ +F:    drivers/mmc/host/sdhci-omap.c
+ +
   SECURE ENCRYPTING DEVICE (SED) OPAL DRIVER
   M:    Scott Bauer <[email protected]>
   M:    Jonathan Derrick <[email protected]>
- M:    Rafael Antognolli <[email protected]>
   L:    [email protected]
   S:    Supported
   F:    block/sed*
@@@ -12971,9 -12926,9 +12972,9 @@@ F:   drivers/mmc/host/dw_mmc
   SYNOPSYS HSDK RESET CONTROLLER DRIVER
   M:    Eugeniy Paltsev <[email protected]>
   S:    Supported
- -F:    drivers/reset/reset-hsdk-v1.c
- -F:    include/dt-bindings/reset/snps,hsdk-v1-reset.h
- -F:    Documentation/devicetree/bindings/reset/snps,hsdk-v1-reset.txt
+ +F:    drivers/reset/reset-hsdk.c
+ +F:    include/dt-bindings/reset/snps,hsdk-reset.h
+ +F:    Documentation/devicetree/bindings/reset/snps,hsdk-reset.txt
   
   SYSTEM CONFIGURATION (SYSCON)
   M:    Lee Jones <[email protected]>
@@@ -13632,14 -13587,23 +13633,14 @@@ F:        drivers/platform/x86/toshiba-wmi.
   
   TPM DEVICE DRIVER
   M:    Peter Huewe <[email protected]>
- -M:    Marcel Selhorst <[email protected]>
   M:    Jarkko Sakkinen <[email protected]>
   R:    Jason Gunthorpe <[email protected]>
- -W:    http://tpmdd.sourceforge.net
- -L:    [email protected] (moderated for non-subscribers)
- -Q:    https://patchwork.kernel.org/project/tpmdd-devel/list/
+ +L:    [email protected]
+ +Q:    https://patchwork.kernel.org/project/linux-integrity/list/
   T:    git git://git.infradead.org/users/jjs/linux-tpmdd.git
   S:    Maintained
   F:    drivers/char/tpm/
   
- -TPM IBM_VTPM DEVICE DRIVER
- -M:    Ashley Lai <[email protected]>
- -W:    http://tpmdd.sourceforge.net
- -L:    [email protected] (moderated for non-subscribers)
- -S:    Maintained
- -F:    drivers/char/tpm/tpm_ibmvtpm*
- -
   TRACING
   M:    Steven Rostedt <[email protected]>
   M:    Ingo Molnar <[email protected]>
@@@ -14349,7 -14313,6 +14350,7 @@@ L:   [email protected]
   L:    [email protected]
   S:    Supported
   F:    drivers/s390/virtio/
+ +F:    arch/s390/include/uapi/asm/virtio-ccw.h
   
   VIRTIO GPU DRIVER
   M:    David Airlie <[email protected]>
diff --combined block/bio.c

index cc60213e56d8695d121cec7e7402341175e24f46,ae9ad34e6a71f206cdcc717f0ac0993c8034cc99..b94a802f8ba341894d6c4eafb0c04f87eacbe1c6
--- 1/block/bio.c
--- 2/block/bio.c
+++ b/block/bio.c
@@@ -400,7 -400,7 +400,7 @@@ static void punt_bios_to_rescuer(struc
   
   /**
    * bio_alloc_bioset - allocate a bio for I/O
-  * @gfp_mask:   the GFP_ mask given to the slab allocator
+  * @gfp_mask:   the GFP_* mask given to the slab allocator
    * @nr_iovecs:        number of iovecs to pre-allocate
    * @bs:               the bio_set to allocate from.
    *
@@@ -917,9 -917,17 +917,9 @@@ int bio_iov_iter_get_pages(struct bio *
   }
   EXPORT_SYMBOL_GPL(bio_iov_iter_get_pages);
   
- -struct submit_bio_ret {
- -      struct completion event;
- -      int error;
- -};
- -
   static void submit_bio_wait_endio(struct bio *bio)
   {
- -      struct submit_bio_ret *ret = bio->bi_private;
- -
- -      ret->error = blk_status_to_errno(bio->bi_status);
- -      complete(&ret->event);
+ +      complete(bio->bi_private);
   }
   
   /**
@@@ -935,15 -943,16 +935,15 @@@
    */
   int submit_bio_wait(struct bio *bio)
   {
- -      struct submit_bio_ret ret;
+ +      DECLARE_COMPLETION_ONSTACK_MAP(done, bio->bi_disk->lockdep_map);
   
- -      init_completion(&ret.event);
- -      bio->bi_private = &ret;
+ +      bio->bi_private = &done;
         bio->bi_end_io = submit_bio_wait_endio;
         bio->bi_opf |= REQ_SYNC;
         submit_bio(bio);
- -      wait_for_completion_io(&ret.event);
+ +      wait_for_completion_io(&done);
   
- -      return ret.error;
+ +      return blk_status_to_errno(bio->bi_status);
   }
   EXPORT_SYMBOL(submit_bio_wait);
   
@@@ -1230,8 -1239,8 +1230,8 @@@ struct bio *bio_copy_user_iov(struct re
          */
         bmd->is_our_pages = map_data ? 0 : 1;
         memcpy(bmd->iov, iter->iov, sizeof(struct iovec) * iter->nr_segs);
- -      iov_iter_init(&bmd->iter, iter->type, bmd->iov,
- -                      iter->nr_segs, iter->count);
+ +      bmd->iter = *iter;
+ +      bmd->iter.iov = bmd->iov;
   
         ret = -ENOMEM;
         bio = bio_kmalloc(gfp_mask, nr_pages);
@@@ -1322,7 -1331,6 +1322,7 @@@ struct bio *bio_map_user_iov(struct req
         int ret, offset;
         struct iov_iter i;
         struct iovec iov;
+ +      struct bio_vec *bvec;
   
         iov_for_each(iov, i, *iter) {
                 unsigned long uaddr = (unsigned long) iov.iov_base;
@@@ -1367,12 -1375,7 +1367,12 @@@
                 ret = get_user_pages_fast(uaddr, local_nr_pages,
                                 (iter->type & WRITE) != WRITE,
                                 &pages[cur_page]);
- -              if (ret < local_nr_pages) {
+ +              if (unlikely(ret < local_nr_pages)) {
+ +                      for (j = cur_page; j < page_limit; j++) {
+ +                              if (!pages[j])
+ +                                      break;
+ +                              put_page(pages[j]);
+ +                      }
                         ret = -EFAULT;
                         goto out_unmap;
                 }
@@@ -1380,7 -1383,6 +1380,7 @@@
                 offset = offset_in_page(uaddr);
                 for (j = cur_page; j < page_limit; j++) {
                         unsigned int bytes = PAGE_SIZE - offset;
+ +                      unsigned short prev_bi_vcnt = bio->bi_vcnt;
   
                         if (len <= 0)
                                 break;
@@@ -1395,13 -1397,6 +1395,13 @@@
                                             bytes)
                                 break;
   
+ +                      /*
+ +                       * check if vector was merged with previous
+ +                       * drop page reference if needed
+ +                       */
+ +                      if (bio->bi_vcnt == prev_bi_vcnt)
+ +                              put_page(pages[j]);
+ +
                         len -= bytes;
                         offset = 0;
                 }
@@@ -1428,8 -1423,10 +1428,8 @@@
         return bio;
   
    out_unmap:
- -      for (j = 0; j < nr_pages; j++) {
- -              if (!pages[j])
- -                      break;
- -              put_page(pages[j]);
+ +      bio_for_each_segment_all(bvec, bio, j) {
+ +              put_page(bvec->bv_page);
         }
    out:
         kfree(pages);
@@@ -1931,11 -1928,8 +1931,8 @@@ void bioset_free(struct bio_set *bs
         if (bs->rescue_workqueue)
                 destroy_workqueue(bs->rescue_workqueue);
   
-       if (bs->bio_pool)
-               mempool_destroy(bs->bio_pool);
- 
-       if (bs->bvec_pool)
-               mempool_destroy(bs->bvec_pool);
+       mempool_destroy(bs->bio_pool);
+       mempool_destroy(bs->bvec_pool);
   
         bioset_integrity_free(bs);
         bio_put_slab(bs);
@@@ -2035,37 -2029,6 +2032,6 @@@ int bio_associate_blkcg(struct bio *bio
   }
   EXPORT_SYMBOL_GPL(bio_associate_blkcg);
   
- /**
-  * bio_associate_current - associate a bio with %current
-  * @bio: target bio
-  *
-  * Associate @bio with %current if it hasn't been associated yet.  Block
-  * layer will treat @bio as if it were issued by %current no matter which
-  * task actually issues it.
-  *
-  * This function takes an extra reference of @task's io_context and blkcg
-  * which will be put when @bio is released.  The caller must own @bio,
-  * ensure %current->io_context exists, and is responsible for synchronizing
-  * calls to this function.
-  */
- int bio_associate_current(struct bio *bio)
- {
-       struct io_context *ioc;
- 
-       if (bio->bi_css)
-               return -EBUSY;
- 
-       ioc = current->io_context;
-       if (!ioc)
-               return -ENOENT;
- 
-       get_io_context_active(ioc);
-       bio->bi_ioc = ioc;
-       bio->bi_css = task_get_css(current, io_cgrp_id);
-       return 0;
- }
- EXPORT_SYMBOL_GPL(bio_associate_current);
- 
   /**
    * bio_disassociate_task - undo bio_associate_current()
    * @bio: target bio
diff --combined block/blk-lib.c

index 63fb971d65745ac0621c69b6bc22ad5b0b76dd84,f625fda5f0955a42ae6aa6f5379f15e417ddc66f..2bc544ce3d2e5a89c2c216c85d72e00f9172bb09
--- 1/block/blk-lib.c
--- 2/block/blk-lib.c
+++ b/block/blk-lib.c
@@@ -1,4 -1,3 +1,4 @@@
+ +// SPDX-License-Identifier: GPL-2.0
   /*
    * Functions related to generic helpers functions
    */
@@@ -275,6 -274,40 +275,40 @@@ static unsigned int __blkdev_sectors_to
         return min(pages, (sector_t)BIO_MAX_PAGES);
   }
   
+ static int __blkdev_issue_zero_pages(struct block_device *bdev,
+               sector_t sector, sector_t nr_sects, gfp_t gfp_mask,
+               struct bio **biop)
+ {
+       struct request_queue *q = bdev_get_queue(bdev);
+       struct bio *bio = *biop;
+       int bi_size = 0;
+       unsigned int sz;
+ 
+       if (!q)
+               return -ENXIO;
+ 
+       while (nr_sects != 0) {
+               bio = next_bio(bio, __blkdev_sectors_to_bio_pages(nr_sects),
+                              gfp_mask);
+               bio->bi_iter.bi_sector = sector;
+               bio_set_dev(bio, bdev);
+               bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
+ 
+               while (nr_sects != 0) {
+                       sz = min((sector_t) PAGE_SIZE, nr_sects << 9);
+                       bi_size = bio_add_page(bio, ZERO_PAGE(0), sz, 0);
+                       nr_sects -= bi_size >> 9;
+                       sector += bi_size >> 9;
+                       if (bi_size < sz)
+                               break;
+               }
+               cond_resched();
+       }
+ 
+       *biop = bio;
+       return 0;
+ }
+ 
   /**
    * __blkdev_issue_zeroout - generate number of zero filed write bios
    * @bdev:     blockdev to issue
@@@ -288,12 -321,6 +322,6 @@@
    *  Zero-fill a block range, either using hardware offload or by explicitly
    *  writing zeroes to the device.
    *
-  *  Note that this function may fail with -EOPNOTSUPP if the driver signals
-  *  zeroing offload support, but the device fails to process the command (for
-  *  some devices there is no non-destructive way to verify whether this
-  *  operation is actually supported).  In this case the caller should call
-  *  retry the call to blkdev_issue_zeroout() and the fallback path will be used.
-  *
    *  If a device is using logical block provisioning, the underlying space will
    *  not be released if %flags contains BLKDEV_ZERO_NOUNMAP.
    *
@@@ -305,9 -332,6 +333,6 @@@ int __blkdev_issue_zeroout(struct block
                 unsigned flags)
   {
         int ret;
-       int bi_size = 0;
-       struct bio *bio = *biop;
-       unsigned int sz;
         sector_t bs_mask;
   
         bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1;
@@@ -317,30 -341,10 +342,10 @@@
         ret = __blkdev_issue_write_zeroes(bdev, sector, nr_sects, gfp_mask,
                         biop, flags);
         if (ret != -EOPNOTSUPP || (flags & BLKDEV_ZERO_NOFALLBACK))
-               goto out;
- 
-       ret = 0;
-       while (nr_sects != 0) {
-               bio = next_bio(bio, __blkdev_sectors_to_bio_pages(nr_sects),
-                              gfp_mask);
-               bio->bi_iter.bi_sector = sector;
-               bio_set_dev(bio, bdev);
-               bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
- 
-               while (nr_sects != 0) {
-                       sz = min((sector_t) PAGE_SIZE, nr_sects << 9);
-                       bi_size = bio_add_page(bio, ZERO_PAGE(0), sz, 0);
-                       nr_sects -= bi_size >> 9;
-                       sector += bi_size >> 9;
-                       if (bi_size < sz)
-                               break;
-               }
-               cond_resched();
-       }
+               return ret;
   
-       *biop = bio;
- out:
-       return ret;
+       return __blkdev_issue_zero_pages(bdev, sector, nr_sects, gfp_mask,
+                                        biop);
   }
   EXPORT_SYMBOL(__blkdev_issue_zeroout);
   
@@@ -360,18 -364,49 +365,49 @@@
   int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
                 sector_t nr_sects, gfp_t gfp_mask, unsigned flags)
   {
-       int ret;
-       struct bio *bio = NULL;
+       int ret = 0;
+       sector_t bs_mask;
+       struct bio *bio;
         struct blk_plug plug;
+       bool try_write_zeroes = !!bdev_write_zeroes_sectors(bdev);
   
+       bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1;
+       if ((sector | nr_sects) & bs_mask)
+               return -EINVAL;
+ 
+ retry:
+       bio = NULL;
         blk_start_plug(&plug);
-       ret = __blkdev_issue_zeroout(bdev, sector, nr_sects, gfp_mask,
-                       &bio, flags);
+       if (try_write_zeroes) {
+               ret = __blkdev_issue_write_zeroes(bdev, sector, nr_sects,
+                                                 gfp_mask, &bio, flags);
+       } else if (!(flags & BLKDEV_ZERO_NOFALLBACK)) {
+               ret = __blkdev_issue_zero_pages(bdev, sector, nr_sects,
+                                               gfp_mask, &bio);
+       } else {
+               /* No zeroing offload support */
+               ret = -EOPNOTSUPP;
+       }
         if (ret == 0 && bio) {
                 ret = submit_bio_wait(bio);
                 bio_put(bio);
         }
         blk_finish_plug(&plug);
+       if (ret && try_write_zeroes) {
+               if (!(flags & BLKDEV_ZERO_NOFALLBACK)) {
+                       try_write_zeroes = false;
+                       goto retry;
+               }
+               if (!bdev_write_zeroes_sectors(bdev)) {
+                       /*
+                        * Zeroing offload support was indicated, but the
+                        * device reported ILLEGAL REQUEST (for some devices
+                        * there is no non-destructive way to verify whether
+                        * WRITE ZEROES is actually supported).
+                        */
+                       ret = -EOPNOTSUPP;
+               }
+       }
   
         return ret;
   }
diff --combined block/blk-mq-debugfs.c

index de294d775acfa413854c109eeaa08b9b6bdfd354,e4f2bb936e6630c6f529d9be97ccd27cc828b682..b56a4f35720d8a46e8a5daf3f2c63a51475308a3
--- 1/block/blk-mq-debugfs.c
--- 2/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@@ -54,7 -54,6 +54,6 @@@ static const char *const blk_queue_flag
         QUEUE_FLAG_NAME(NOMERGES),
         QUEUE_FLAG_NAME(SAME_COMP),
         QUEUE_FLAG_NAME(FAIL_IO),
-       QUEUE_FLAG_NAME(STACKABLE),
         QUEUE_FLAG_NAME(NONROT),
         QUEUE_FLAG_NAME(IO_STAT),
         QUEUE_FLAG_NAME(DISCARD),
@@@ -75,6 -74,7 +74,7 @@@
         QUEUE_FLAG_NAME(REGISTERED),
         QUEUE_FLAG_NAME(SCSI_PASSTHROUGH),
         QUEUE_FLAG_NAME(QUIESCED),
+       QUEUE_FLAG_NAME(PREEMPT_ONLY),
   };
   #undef QUEUE_FLAG_NAME
   
@@@ -180,7 -180,6 +180,6 @@@ static const char *const hctx_state_nam
         HCTX_STATE_NAME(STOPPED),
         HCTX_STATE_NAME(TAG_ACTIVE),
         HCTX_STATE_NAME(SCHED_RESTART),
-       HCTX_STATE_NAME(TAG_WAITING),
         HCTX_STATE_NAME(START_ON_RUN),
   };
   #undef HCTX_STATE_NAME
@@@ -815,14 -814,10 +814,14 @@@ int blk_mq_debugfs_register(struct requ
                 goto err;
   
         /*
- -       * blk_mq_init_hctx() attempted to do this already, but q->debugfs_dir
+ +       * blk_mq_init_sched() attempted to do this already, but q->debugfs_dir
          * didn't exist yet (because we don't know what to name the directory
          * until the queue is registered to a gendisk).
          */
+ +      if (q->elevator && !q->sched_debugfs_dir)
+ +              blk_mq_debugfs_register_sched(q);
+ +
+ +      /* Similarly, blk_mq_init_hctx() couldn't do this previously. */
         queue_for_each_hw_ctx(q, hctx, i) {
                 if (!hctx->debugfs_dir && blk_mq_debugfs_register_hctx(q, hctx))
                         goto err;
diff --combined block/blk-mq-tag.h

index c190165d92ea3fda4a24efca107be007f9787051,5932a7ac7fc4e9f98d417d303e80de18ae8c17e5..61deab0b5a5a565c1214ad305cac52f0dde7fb3d
--- 1/block/blk-mq-tag.h
--- 2/block/blk-mq-tag.h
+++ b/block/blk-mq-tag.h
@@@ -1,4 -1,3 +1,4 @@@
+ +/* SPDX-License-Identifier: GPL-2.0 */
   #ifndef INT_BLK_MQ_TAG_H
   #define INT_BLK_MQ_TAG_H
   
@@@ -44,14 -43,9 +44,9 @@@ static inline struct sbq_wait_state *bt
         return sbq_wait_ptr(bt, &hctx->wait_index);
   }
   
- enum {
-       BLK_MQ_TAG_CACHE_MIN    = 1,
-       BLK_MQ_TAG_CACHE_MAX    = 64,
- };
- 
   enum {
         BLK_MQ_TAG_FAIL         = -1U,
-       BLK_MQ_TAG_MIN          = BLK_MQ_TAG_CACHE_MIN,
+       BLK_MQ_TAG_MIN          = 1,
         BLK_MQ_TAG_MAX          = BLK_MQ_TAG_FAIL - 1,
   };
   
diff --combined block/blk-mq.h

index 4933af9d61f736ed1b99630231b6988071b1e6d2,dcf379a892dda3c349884c4041a213a4bd1d8bd4..6c7c3ff5bf627d3e36a8e1bf1feca66ff00ac74d
--- 1/block/blk-mq.h
--- 2/block/blk-mq.h
+++ b/block/blk-mq.h
@@@ -1,8 -1,8 +1,9 @@@
+ +/* SPDX-License-Identifier: GPL-2.0 */
   #ifndef INT_BLK_MQ_H
   #define INT_BLK_MQ_H
   
   #include "blk-stat.h"
+ #include "blk-mq-tag.h"
   
   struct blk_mq_tag_set;
   
@@@ -26,16 -26,16 +27,16 @@@ struct blk_mq_ctx 
         struct kobject          kobj;
   } ____cacheline_aligned_in_smp;
   
- void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
   void blk_mq_freeze_queue(struct request_queue *q);
   void blk_mq_free_queue(struct request_queue *q);
   int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr);
   void blk_mq_wake_waiters(struct request_queue *q);
- bool blk_mq_dispatch_rq_list(struct request_queue *, struct list_head *);
+ bool blk_mq_dispatch_rq_list(struct request_queue *, struct list_head *, bool);
   void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list);
- bool blk_mq_hctx_has_pending(struct blk_mq_hw_ctx *hctx);
   bool blk_mq_get_driver_tag(struct request *rq, struct blk_mq_hw_ctx **hctx,
                                 bool wait);
+ struct request *blk_mq_dequeue_from_ctx(struct blk_mq_hw_ctx *hctx,
+                                       struct blk_mq_ctx *start);
   
   /*
    * Internal helpers for allocating/freeing the request map
@@@ -55,7 -55,7 +56,7 @@@ int blk_mq_alloc_rqs(struct blk_mq_tag_
    */
   void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
                                 bool at_head);
- void blk_mq_request_bypass_insert(struct request *rq);
+ void blk_mq_request_bypass_insert(struct request *rq, bool run_queue);
   void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
                                 struct list_head *list);
   
@@@ -109,7 -109,7 +110,7 @@@ static inline void blk_mq_put_ctx(struc
   struct blk_mq_alloc_data {
         /* input parameter */
         struct request_queue *q;
-       unsigned int flags;
+       blk_mq_req_flags_t flags;
         unsigned int shallow_depth;
   
         /* input & output parameter */
@@@ -138,4 -138,53 +139,53 @@@ static inline bool blk_mq_hw_queue_mapp
   void blk_mq_in_flight(struct request_queue *q, struct hd_struct *part,
                         unsigned int inflight[2]);
   
+ static inline void blk_mq_put_dispatch_budget(struct blk_mq_hw_ctx *hctx)
+ {
+       struct request_queue *q = hctx->queue;
+ 
+       if (q->mq_ops->put_budget)
+               q->mq_ops->put_budget(hctx);
+ }
+ 
+ static inline bool blk_mq_get_dispatch_budget(struct blk_mq_hw_ctx *hctx)
+ {
+       struct request_queue *q = hctx->queue;
+ 
+       if (q->mq_ops->get_budget)
+               return q->mq_ops->get_budget(hctx);
+       return true;
+ }
+ 
+ static inline void __blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx,
+                                          struct request *rq)
+ {
+       blk_mq_put_tag(hctx, hctx->tags, rq->mq_ctx, rq->tag);
+       rq->tag = -1;
+ 
+       if (rq->rq_flags & RQF_MQ_INFLIGHT) {
+               rq->rq_flags &= ~RQF_MQ_INFLIGHT;
+               atomic_dec(&hctx->nr_active);
+       }
+ }
+ 
+ static inline void blk_mq_put_driver_tag_hctx(struct blk_mq_hw_ctx *hctx,
+                                      struct request *rq)
+ {
+       if (rq->tag == -1 || rq->internal_tag == -1)
+               return;
+ 
+       __blk_mq_put_driver_tag(hctx, rq);
+ }
+ 
+ static inline void blk_mq_put_driver_tag(struct request *rq)
+ {
+       struct blk_mq_hw_ctx *hctx;
+ 
+       if (rq->tag == -1 || rq->internal_tag == -1)
+               return;
+ 
+       hctx = blk_mq_map_queue(rq->q, rq->mq_ctx->cpu);
+       __blk_mq_put_driver_tag(hctx, rq);
+ }
+ 
   #endif
diff --combined block/blk-throttle.c

index 8631763866c6d973225cd643f3d422c9c6a21845,fe49c465ec8604baf0da4ec9ecf078fb8fbd26f3..96ad32623427d4794ad7563369bc9f89bb85fd26
--- 1/block/blk-throttle.c
--- 2/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@@ -1,4 -1,3 +1,4 @@@
+ +// SPDX-License-Identifier: GPL-2.0
   /*
    * Interface for controlling IO bandwidth on a request queue
    *
@@@ -1912,11 -1911,11 +1912,11 @@@ static void throtl_upgrade_state(struc
   
                 tg->disptime = jiffies - 1;
                 throtl_select_dispatch(sq);
- -              throtl_schedule_next_dispatch(sq, false);
+ +              throtl_schedule_next_dispatch(sq, true);
         }
         rcu_read_unlock();
         throtl_select_dispatch(&td->service_queue);
- -      throtl_schedule_next_dispatch(&td->service_queue, false);
+ +      throtl_schedule_next_dispatch(&td->service_queue, true);
         queue_work(kthrotld_workqueue, &td->dispatch_work);
   }
   
@@@ -2113,8 -2112,12 +2113,12 @@@ static inline void throtl_update_latenc
   static void blk_throtl_assoc_bio(struct throtl_grp *tg, struct bio *bio)
   {
   #ifdef CONFIG_BLK_DEV_THROTTLING_LOW
-       if (bio->bi_css)
+       if (bio->bi_css) {
+               if (bio->bi_cg_private)
+                       blkg_put(tg_to_blkg(bio->bi_cg_private));
                 bio->bi_cg_private = tg;
+               blkg_get(tg_to_blkg(tg));
+       }
         blk_stat_set_issue(&bio->bi_issue_stat, bio_sectors(bio));
   #endif
   }
@@@ -2284,8 -2287,10 +2288,10 @@@ void blk_throtl_bio_endio(struct bio *b
   
         start_time = blk_stat_time(&bio->bi_issue_stat) >> 10;
         finish_time = __blk_stat_time(finish_time_ns) >> 10;
-       if (!start_time || finish_time <= start_time)
+       if (!start_time || finish_time <= start_time) {
+               blkg_put(tg_to_blkg(tg));
                 return;
+       }
   
         lat = finish_time - start_time;
         /* this is only for bio based driver */
@@@ -2315,6 -2320,8 +2321,8 @@@
                 tg->bio_cnt /= 2;
                 tg->bad_bio_cnt /= 2;
         }
+ 
+       blkg_put(tg_to_blkg(tg));
   }
   #endif
   
diff --combined block/blk-wbt.c

index d822530e6aeade81a7c9b2b3d9c6a6cccc0bb351,e59d59c11ebbb308fe2cc41562fedfd90eecfe6c..b252da0e4c11051f7c78be797122448e231a0bde
--- 1/block/blk-wbt.c
--- 2/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@@ -261,7 -261,7 +261,7 @@@ static inline bool stat_sample_valid(st
   
   static u64 rwb_sync_issue_lat(struct rq_wb *rwb)
   {
- -      u64 now, issue = ACCESS_ONCE(rwb->sync_issue);
+ +      u64 now, issue = READ_ONCE(rwb->sync_issue);
   
         if (!issue || !rwb->sync_cookie)
                 return 0;
@@@ -654,7 -654,7 +654,7 @@@ void wbt_set_write_cache(struct rq_wb *
   }
   
   /*
-  * Disable wbt, if enabled by default. Only called from CFQ.
+  * Disable wbt, if enabled by default.
    */
   void wbt_disable_default(struct request_queue *q)
   {
diff --combined block/blk.h

index 85be8b232b373b3f69ac829f374e550f06725460,6ac43dfd68a7d489b295b5416fa1723b6e1cd7e8..3f1446937aece26f38ceb66cf4e3d159a23df871
--- 1/block/blk.h
--- 2/block/blk.h
+++ b/block/blk.h
@@@ -1,4 -1,3 +1,4 @@@
+ +/* SPDX-License-Identifier: GPL-2.0 */
   #ifndef BLK_INTERNAL_H
   #define BLK_INTERNAL_H
   
@@@ -123,8 -122,15 +123,15 @@@ void blk_account_io_done(struct reques
    * Internal atomic flags for request handling
    */
   enum rq_atomic_flags {
+       /*
+        * Keep these two bits first - not because we depend on the
+        * value of them, but we do depend on them being in the same
+        * byte of storage to ensure ordering on writes. Keeping them
+        * first will achieve that nicely.
+        */
         REQ_ATOM_COMPLETE = 0,
         REQ_ATOM_STARTED,
+ 
         REQ_ATOM_POLL_SLEPT,
   };
   
@@@ -149,45 -155,6 +156,6 @@@ static inline void blk_clear_rq_complet
   
   void blk_insert_flush(struct request *rq);
   
- static inline struct request *__elv_next_request(struct request_queue *q)
- {
-       struct request *rq;
-       struct blk_flush_queue *fq = blk_get_flush_queue(q, NULL);
- 
-       WARN_ON_ONCE(q->mq_ops);
- 
-       while (1) {
-               if (!list_empty(&q->queue_head)) {
-                       rq = list_entry_rq(q->queue_head.next);
-                       return rq;
-               }
- 
-               /*
-                * Flush request is running and flush request isn't queueable
-                * in the drive, we can hold the queue till flush request is
-                * finished. Even we don't do this, driver can't dispatch next
-                * requests and will requeue them. And this can improve
-                * throughput too. For example, we have request flush1, write1,
-                * flush 2. flush1 is dispatched, then queue is hold, write1
-                * isn't inserted to queue. After flush1 is finished, flush2
-                * will be dispatched. Since disk cache is already clean,
-                * flush2 will be finished very soon, so looks like flush2 is
-                * folded to flush1.
-                * Since the queue is hold, a flag is set to indicate the queue
-                * should be restarted later. Please see flush_end_io() for
-                * details.
-                */
-               if (fq->flush_pending_idx != fq->flush_running_idx &&
-                               !queue_flush_queueable(q)) {
-                       fq->flush_queue_delayed = 1;
-                       return NULL;
-               }
-               if (unlikely(blk_queue_bypass(q)) ||
-                   !q->elevator->type->ops.sq.elevator_dispatch_fn(q, 0))
-                       return NULL;
-       }
- }
- 
   static inline void elv_activate_rq(struct request_queue *q, struct request *rq)
   {
         struct elevator_queue *e = q->elevator;
diff --combined block/genhd.c

index 630c0da6cfcf2633bf8340616b6ba1d013f5f408,997e598f3b86e92864fbb1694a01c6bda66e80e5..c2223f12a8051411d4e89a0bc2850e03d7d0427e
--- 1/block/genhd.c
--- 2/block/genhd.c
+++ b/block/genhd.c
@@@ -588,6 -588,11 +588,11 @@@ static void register_disk(struct devic
         disk->part0.holder_dir = kobject_create_and_add("holders", &ddev->kobj);
         disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj);
   
+       if (disk->flags & GENHD_FL_HIDDEN) {
+               dev_set_uevent_suppress(ddev, 0);
+               return;
+       }
+ 
         /* No minors to use for partitions */
         if (!disk_part_scan_enabled(disk))
                 goto exit;
@@@ -616,6 -621,11 +621,11 @@@ exit
         while ((part = disk_part_iter_next(&piter)))
                 kobject_uevent(&part_to_dev(part)->kobj, KOBJ_ADD);
         disk_part_iter_exit(&piter);
+ 
+       err = sysfs_create_link(&ddev->kobj,
+                               &disk->queue->backing_dev_info->dev->kobj,
+                               "bdi");
+       WARN_ON(err);
   }
   
   /**
@@@ -630,7 -640,6 +640,6 @@@
    */
   void device_add_disk(struct device *parent, struct gendisk *disk)
   {
-       struct backing_dev_info *bdi;
         dev_t devt;
         int retval;
   
@@@ -639,7 -648,8 +648,8 @@@
          * parameters make sense.
          */
         WARN_ON(disk->minors && !(disk->major || disk->first_minor));
-       WARN_ON(!disk->minors && !(disk->flags & GENHD_FL_EXT_DEVT));
+       WARN_ON(!disk->minors &&
+               !(disk->flags & (GENHD_FL_EXT_DEVT | GENHD_FL_HIDDEN)));
   
         disk->flags |= GENHD_FL_UP;
   
@@@ -648,22 -658,26 +658,26 @@@
                 WARN_ON(1);
                 return;
         }
-       disk_to_dev(disk)->devt = devt;
- 
-       /* ->major and ->first_minor aren't supposed to be
-        * dereferenced from here on, but set them just in case.
-        */
         disk->major = MAJOR(devt);
         disk->first_minor = MINOR(devt);
   
         disk_alloc_events(disk);
   
-       /* Register BDI before referencing it from bdev */
-       bdi = disk->queue->backing_dev_info;
-       bdi_register_owner(bdi, disk_to_dev(disk));
- 
-       blk_register_region(disk_devt(disk), disk->minors, NULL,
-                           exact_match, exact_lock, disk);
+       if (disk->flags & GENHD_FL_HIDDEN) {
+               /*
+                * Don't let hidden disks show up in /proc/partitions,
+                * and don't bother scanning for partitions either.
+                */
+               disk->flags |= GENHD_FL_SUPPRESS_PARTITION_INFO;
+               disk->flags |= GENHD_FL_NO_PART_SCAN;
+       } else {
+               /* Register BDI before referencing it from bdev */
+               disk_to_dev(disk)->devt = devt;
+               bdi_register_owner(disk->queue->backing_dev_info,
+                               disk_to_dev(disk));
+               blk_register_region(disk_devt(disk), disk->minors, NULL,
+                                   exact_match, exact_lock, disk);
+       }
         register_disk(parent, disk);
         blk_register_queue(disk);
   
@@@ -673,10 -687,6 +687,6 @@@
          */
         WARN_ON_ONCE(!blk_get_queue(disk->queue));
   
-       retval = sysfs_create_link(&disk_to_dev(disk)->kobj, &bdi->dev->kobj,
-                                  "bdi");
-       WARN_ON(retval);
- 
         disk_add_events(disk);
         blk_integrity_add(disk);
   }
@@@ -705,7 -715,8 +715,8 @@@ void del_gendisk(struct gendisk *disk
         set_capacity(disk, 0);
         disk->flags &= ~GENHD_FL_UP;
   
-       sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi");
+       if (!(disk->flags & GENHD_FL_HIDDEN))
+               sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi");
         if (disk->queue) {
                 /*
                  * Unregister bdi before releasing device numbers (as they can
@@@ -716,13 -727,15 +727,15 @@@
         } else {
                 WARN_ON(1);
         }
-       blk_unregister_region(disk_devt(disk), disk->minors);
   
-       part_stat_set_all(&disk->part0, 0);
-       disk->part0.stamp = 0;
+       if (!(disk->flags & GENHD_FL_HIDDEN))
+               blk_unregister_region(disk_devt(disk), disk->minors);
   
         kobject_put(disk->part0.holder_dir);
         kobject_put(disk->slave_dir);
+ 
+       part_stat_set_all(&disk->part0, 0);
+       disk->part0.stamp = 0;
         if (!sysfs_deprecated)
                 sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk)));
         pm_runtime_set_memalloc_noio(disk_to_dev(disk), false);
@@@ -785,6 -798,10 +798,10 @@@ struct gendisk *get_gendisk(dev_t devt
                 spin_unlock_bh(&ext_devt_lock);
         }
   
+       if (disk && unlikely(disk->flags & GENHD_FL_HIDDEN)) {
+               put_disk(disk);
+               disk = NULL;
+       }
         return disk;
   }
   EXPORT_SYMBOL(get_gendisk);
@@@ -1028,6 -1045,15 +1045,15 @@@ static ssize_t disk_removable_show(stru
                        (disk->flags & GENHD_FL_REMOVABLE ? 1 : 0));
   }
   
+ static ssize_t disk_hidden_show(struct device *dev,
+                                  struct device_attribute *attr, char *buf)
+ {
+       struct gendisk *disk = dev_to_disk(dev);
+ 
+       return sprintf(buf, "%d\n",
+                      (disk->flags & GENHD_FL_HIDDEN ? 1 : 0));
+ }
+ 
   static ssize_t disk_ro_show(struct device *dev,
                                    struct device_attribute *attr, char *buf)
   {
@@@ -1065,6 -1091,7 +1091,7 @@@ static ssize_t disk_discard_alignment_s
   static DEVICE_ATTR(range, S_IRUGO, disk_range_show, NULL);
   static DEVICE_ATTR(ext_range, S_IRUGO, disk_ext_range_show, NULL);
   static DEVICE_ATTR(removable, S_IRUGO, disk_removable_show, NULL);
+ static DEVICE_ATTR(hidden, S_IRUGO, disk_hidden_show, NULL);
   static DEVICE_ATTR(ro, S_IRUGO, disk_ro_show, NULL);
   static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL);
   static DEVICE_ATTR(alignment_offset, S_IRUGO, disk_alignment_offset_show, NULL);
@@@ -1089,6 -1116,7 +1116,7 @@@ static struct attribute *disk_attrs[] 
         &dev_attr_range.attr,
         &dev_attr_ext_range.attr,
         &dev_attr_removable.attr,
+       &dev_attr_hidden.attr,
         &dev_attr_ro.attr,
         &dev_attr_size.attr,
         &dev_attr_alignment_offset.attr,
@@@ -1354,7 -1382,13 +1382,7 @@@ dev_t blk_lookup_devt(const char *name
   }
   EXPORT_SYMBOL(blk_lookup_devt);
   
- -struct gendisk *alloc_disk(int minors)
- -{
- -      return alloc_disk_node(minors, NUMA_NO_NODE);
- -}
- -EXPORT_SYMBOL(alloc_disk);
- -
- -struct gendisk *alloc_disk_node(int minors, int node_id)
+ +struct gendisk *__alloc_disk_node(int minors, int node_id)
   {
         struct gendisk *disk;
         struct disk_part_tbl *ptbl;
@@@ -1405,7 -1439,7 +1433,7 @@@
         }
         return disk;
   }
- -EXPORT_SYMBOL(alloc_disk_node);
+ +EXPORT_SYMBOL(__alloc_disk_node);
   
   struct kobject *get_disk(struct gendisk *disk)
   {
diff --combined drivers/block/Kconfig

index 7b2df7a54d8759f45e5467bb31a31a94500f7651,95e678716b5e9e80b04fd7eedc409a1fe7f5cb0e..923b417eaf4c9939aec7cc48681bcc797ec10128
--- 1/drivers/block/Kconfig
--- 2/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@@ -1,4 -1,3 +1,4 @@@
+ +# SPDX-License-Identifier: GPL-2.0
   #
   # Block device driver configuration
   #
@@@ -18,7 -17,7 +18,7 @@@ if BLK_DE
   
   config BLK_DEV_NULL_BLK
         tristate "Null test block driver"
- -      depends on CONFIGFS_FS
+ +      select CONFIGFS_FS
   
   config BLK_DEV_FD
         tristate "Normal floppy disk support"
@@@ -68,9 -67,13 +68,13 @@@ config AMIGA_Z2RA
           To compile this driver as a module, choose M here: the
           module will be called z2ram.
   
+ config CDROM
+       tristate
+ 
   config GDROM
         tristate "SEGA Dreamcast GD-ROM drive"
         depends on SH_DREAMCAST
+       select CDROM
         select BLK_SCSI_REQUEST # only for the generic cdrom code
         help
           A standard SEGA Dreamcast comes with a modified CD ROM drive called a
@@@ -348,6 -351,7 +352,7 @@@ config BLK_DEV_RAM_DA
   config CDROM_PKTCDVD
         tristate "Packet writing on CD/DVD media (DEPRECATED)"
         depends on !UML
+       select CDROM
         select BLK_SCSI_REQUEST
         help
           Note: This driver is deprecated and will be removed from the
diff --combined drivers/block/nbd.c

index 9adfb5445f8dca5a88a4ffe59d3573ed5b854e02,95cab69d9c8be14a27206a96dc451f41add31cda..5f2a4240a204d54fc6fe87e569dc6165d5190530
--- 1/drivers/block/nbd.c
--- 2/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@@ -243,6 -243,7 +243,6 @@@ static void nbd_size_set(struct nbd_dev
         struct nbd_config *config = nbd->config;
         config->blksize = blocksize;
         config->bytesize = blocksize * nr_blocks;
- -      nbd_size_update(nbd);
   }
   
   static void nbd_complete_rq(struct request *req)
@@@ -288,15 -289,6 +288,6 @@@ static enum blk_eh_timer_return nbd_xmi
                 cmd->status = BLK_STS_TIMEOUT;
                 return BLK_EH_HANDLED;
         }
- 
-       /* If we are waiting on our dead timer then we could get timeout
-        * callbacks for our request.  For this we just want to reset the timer
-        * and let the queue side take care of everything.
-        */
-       if (!completion_done(&cmd->send_complete)) {
-               nbd_config_put(nbd);
-               return BLK_EH_RESET_TIMER;
-       }
         config = nbd->config;
   
         if (config->num_connections > 1) {
@@@ -386,15 -378,6 +377,15 @@@ static int sock_xmit(struct nbd_device 
         return result;
   }
   
+ +/*
+ + * Different settings for sk->sk_sndtimeo can result in different return values
+ + * if there is a signal pending when we enter sendmsg, because reasons?
+ + */
+ +static inline int was_interrupted(int result)
+ +{
+ +      return result == -ERESTARTSYS || result == -EINTR;
+ +}
+ +
   /* always call with the tx_lock held */
   static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
   {
@@@ -467,7 -450,7 +458,7 @@@
         result = sock_xmit(nbd, index, 1, &from,
                         (type == NBD_CMD_WRITE) ? MSG_MORE : 0, &sent);
         if (result <= 0) {
- -              if (result == -ERESTARTSYS) {
+ +              if (was_interrupted(result)) {
                         /* If we havne't sent anything we can just return BUSY,
                          * however if we have sent something we need to make
                          * sure we only allow this req to be sent until we are
@@@ -511,7 -494,7 +502,7 @@@ send_pages
                         }
                         result = sock_xmit(nbd, index, 1, &from, flags, &sent);
                         if (result <= 0) {
- -                              if (result == -ERESTARTSYS) {
+ +                              if (was_interrupted(result)) {
                                         /* We've already sent the header, we
                                          * have no choice but to set pending and
                                          * return BUSY.
@@@ -723,9 -706,9 +714,9 @@@ static int wait_for_reconnect(struct nb
                 return 0;
         if (test_bit(NBD_DISCONNECTED, &config->runtime_flags))
                 return 0;
-       wait_event_interruptible_timeout(config->conn_wait,
-                                        atomic_read(&config->live_connections),
-                                        config->dead_conn_timeout);
+       wait_event_timeout(config->conn_wait,
+                          atomic_read(&config->live_connections),
+                          config->dead_conn_timeout);
         return atomic_read(&config->live_connections);
   }
   
@@@ -740,6 -723,7 +731,7 @@@ static int nbd_handle_cmd(struct nbd_cm
         if (!refcount_inc_not_zero(&nbd->config_refs)) {
                 dev_err_ratelimited(disk_to_dev(nbd->disk),
                                     "Socks array is empty\n");
+               blk_mq_start_request(req);
                 return -EINVAL;
         }
         config = nbd->config;
@@@ -748,6 -732,7 +740,7 @@@
                 dev_err_ratelimited(disk_to_dev(nbd->disk),
                                     "Attempted send on invalid socket\n");
                 nbd_config_put(nbd);
+               blk_mq_start_request(req);
                 return -EINVAL;
         }
         cmd->status = BLK_STS_OK;
@@@ -771,6 -756,7 +764,7 @@@ again
                          */
                         sock_shutdown(nbd);
                         nbd_config_put(nbd);
+                       blk_mq_start_request(req);
                         return -EIO;
                 }
                 goto again;
@@@ -781,6 -767,7 +775,7 @@@
          * here so that it gets put _after_ the request that is already on the
          * dispatch list.
          */
+       blk_mq_start_request(req);
         if (unlikely(nsock->pending && nsock->pending != req)) {
                 blk_mq_requeue_request(req, true);
                 ret = 0;
@@@ -793,10 -780,10 +788,10 @@@
         ret = nbd_send_cmd(nbd, cmd, index);
         if (ret == -EAGAIN) {
                 dev_err_ratelimited(disk_to_dev(nbd->disk),
-                                   "Request send failed trying another connection\n");
+                                   "Request send failed, requeueing\n");
                 nbd_mark_nsock_dead(nbd, nsock, 1);
-               mutex_unlock(&nsock->tx_lock);
-               goto again;
+               blk_mq_requeue_request(req, true);
+               ret = 0;
         }
   out:
         mutex_unlock(&nsock->tx_lock);
@@@ -820,7 -807,6 +815,6 @@@ static blk_status_t nbd_queue_rq(struc
          * done sending everything over the wire.
          */
         init_completion(&cmd->send_complete);
-       blk_mq_start_request(bd->rq);
   
         /* We can be called directly from the user space process, which means we
          * could possibly have signals pending so our sendmsg will fail.  In
@@@ -828,13 -814,9 +822,13 @@@
          * appropriate.
          */
         ret = nbd_handle_cmd(cmd, hctx->queue_num);
+ +      if (ret < 0)
+ +              ret = BLK_STS_IOERR;
+ +      else if (!ret)
+ +              ret = BLK_STS_OK;
         complete(&cmd->send_complete);
   
- -      return ret < 0 ? BLK_STS_IOERR : BLK_STS_OK;
+ +      return ret;
   }
   
   static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg,
@@@ -1102,7 -1084,6 +1096,7 @@@ static int nbd_start_device(struct nbd_
                 args->index = i;
                 queue_work(recv_workqueue, &args->work);
         }
+ +      nbd_size_update(nbd);
         return error;
   }
   
diff --combined drivers/block/null_blk.c

index cda69dbefe3ba52b78b61538773191cae4a1e8b5,50c83c4b2ea02ea57b11f931dc2088b771805250..c61960deb74aac4277d7994f8020fc8f7a65d3bd
--- 1/drivers/block/null_blk.c
--- 2/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@@ -154,6 -154,10 +154,10 @@@ enum 
         NULL_Q_MQ               = 2,
   };
   
+ static int g_no_sched;
+ module_param_named(no_sched, g_no_sched, int, S_IRUGO);
+ MODULE_PARM_DESC(no_sched, "No io scheduler");
+ 
   static int g_submit_queues = 1;
   module_param_named(submit_queues, g_submit_queues, int, S_IRUGO);
   MODULE_PARM_DESC(submit_queues, "Number of submission queues");
@@@ -476,7 -480,7 +480,7 @@@ static struct configfs_item_operations 
         .release        = nullb_device_release,
   };
   
- -static struct config_item_type nullb_device_type = {
+ +static const struct config_item_type nullb_device_type = {
         .ct_item_ops    = &nullb_device_ops,
         .ct_attrs       = nullb_device_attrs,
         .ct_owner       = THIS_MODULE,
@@@ -528,7 -532,7 +532,7 @@@ static struct configfs_group_operation
         .drop_item      = nullb_group_drop_item,
   };
   
- -static struct config_item_type nullb_group_type = {
+ +static const struct config_item_type nullb_group_type = {
         .ct_group_ops   = &nullb_group_ops,
         .ct_attrs       = nullb_group_attrs,
         .ct_owner       = THIS_MODULE,
@@@ -1754,6 -1758,8 +1758,8 @@@ static int null_init_tag_set(struct nul
         set->numa_node = nullb ? nullb->dev->home_node : g_home_node;
         set->cmd_size   = sizeof(struct nullb_cmd);
         set->flags = BLK_MQ_F_SHOULD_MERGE;
+       if (g_no_sched)
+               set->flags |= BLK_MQ_F_NO_SCHED;
         set->driver_data = NULL;
   
         if ((nullb && nullb->dev->blocking) || g_blocking)
@@@ -1985,8 -1991,10 +1991,10 @@@ static int __init null_init(void
   
         for (i = 0; i < nr_devices; i++) {
                 dev = null_alloc_dev();
-               if (!dev)
+               if (!dev) {
+                       ret = -ENOMEM;
                         goto err_dev;
+               }
                 ret = null_add_dev(dev);
                 if (ret) {
                         null_free_dev(dev);
diff --combined drivers/block/paride/Kconfig

index b226835a909a3106f1adf2f6a5e6614596caae51,1d5057f5080ba124bac9f04330abdfde573b45a3..f8bd6ef3605a5be238ab486318b11df3c8b41d7c
--- 1/drivers/block/paride/Kconfig
--- 2/drivers/block/paride/Kconfig
+++ b/drivers/block/paride/Kconfig
@@@ -1,4 -1,3 +1,4 @@@
+ +# SPDX-License-Identifier: GPL-2.0
   #
   # PARIDE configuration
   #
@@@ -26,6 -25,7 +26,7 @@@ config PARIDE_P
   config PARIDE_PCD
         tristate "Parallel port ATAPI CD-ROMs"
         depends on PARIDE
+       select CDROM
         select BLK_SCSI_REQUEST # only for the generic cdrom code
         ---help---
           This option enables the high-level driver for ATAPI CD-ROM devices
diff --combined drivers/block/skd_main.c

index 64d0fc17c1742ab74aa232da503d08e344b594b2,802ab9f7a8c11f5a50a9d192ffb5e17838475286..2819f23e8bf2fe8f18cac9ccae63a8e298adc73a
--- 1/drivers/block/skd_main.c
--- 2/drivers/block/skd_main.c
+++ b/drivers/block/skd_main.c
@@@ -1967,7 -1967,8 +1967,8 @@@ static void skd_isr_msg_from_dev(struc
                 break;
   
         case FIT_MTD_CMD_LOG_HOST_ID:
-               skdev->connect_time_stamp = get_seconds();
+               /* hardware interface overflows in y2106 */
+               skdev->connect_time_stamp = (u32)ktime_get_real_seconds();
                 data = skdev->connect_time_stamp & 0xFFFF;
                 mtd = FIT_MXD_CONS(FIT_MTD_CMD_LOG_TIME_STAMP_LO, 0, data);
                 SKD_WRITEL(skdev, mtd, FIT_MSG_TO_DEVICE);
@@@ -2604,7 -2605,7 +2605,7 @@@ static void *skd_alloc_dma(struct skd_d
                 return NULL;
         *dma_handle = dma_map_single(dev, buf, s->size, dir);
         if (dma_mapping_error(dev, *dma_handle)) {
- -              kfree(buf);
+ +              kmem_cache_free(s, buf);
                 buf = NULL;
         }
         return buf;
diff --combined drivers/cdrom/Makefile

index a95566ff47d30043994642901b6341cc8bb78ec1,7f3f43cc22574f516f6b962c8fa281fdb9c6d46e..0f3664b45f485821bdf60dfe34d27d15d4fa4e5c
--- 1/drivers/cdrom/Makefile
--- 2/drivers/cdrom/Makefile
+++ b/drivers/cdrom/Makefile
@@@ -1,14 -1,2 +1,3 @@@
- # Makefile for the kernel cdrom device drivers.
- #
- # 30 Jan 1998, Michael Elizabeth Chastain, <mailto:[email protected]>
- # Rewritten to use lists instead of if-statements.
- 
- # Each configuration option enables a list of files.
- 
- obj-$(CONFIG_BLK_DEV_IDECD)   +=              cdrom.o
- obj-$(CONFIG_BLK_DEV_SR)      +=              cdrom.o
- obj-$(CONFIG_PARIDE_PCD)      +=              cdrom.o
- obj-$(CONFIG_CDROM_PKTCDVD)   +=              cdrom.o
- 
- obj-$(CONFIG_GDROM)           += gdrom.o      cdrom.o
+ +# SPDX-License-Identifier: GPL-2.0
+ obj-$(CONFIG_CDROM)   += cdrom.o
+ obj-$(CONFIG_GDROM)   += gdrom.o
diff --combined drivers/ide/ide-pm.c

index dccdca9eda38692fcbd0764908d9e532a26a7ff0,f56d742908df65a45816a9bb9dfde0f380742ec9..ad8a125defdd51133bada4a30d16f890e5223e31
--- 1/drivers/ide/ide-pm.c
--- 2/drivers/ide/ide-pm.c
+++ b/drivers/ide/ide-pm.c
@@@ -1,4 -1,3 +1,4 @@@
+ +// SPDX-License-Identifier: GPL-2.0
   #include <linux/kernel.h>
   #include <linux/gfp.h>
   #include <linux/ide.h>
@@@ -90,9 -89,9 +90,9 @@@ int generic_ide_resume(struct device *d
         }
   
         memset(&rqpm, 0, sizeof(rqpm));
-       rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
+       rq = blk_get_request_flags(drive->queue, REQ_OP_DRV_IN,
+                                  BLK_MQ_REQ_PREEMPT);
         ide_req(rq)->type = ATA_PRIV_PM_RESUME;
-       rq->rq_flags |= RQF_PREEMPT;
         rq->special = &rqpm;
         rqpm.pm_step = IDE_PM_START_RESUME;
         rqpm.pm_state = PM_EVENT_ON;
diff --combined drivers/md/bcache/alloc.c

index 08035634795c12d26730375abc4ac8b7c399c5ad,8c5a626343d4da98f26ae503fcf4af9bd7777d61..a27d85232ce1343ce802576ab3584ed67be6f35d
--- 1/drivers/md/bcache/alloc.c
--- 2/drivers/md/bcache/alloc.c
+++ b/drivers/md/bcache/alloc.c
@@@ -1,4 -1,3 +1,4 @@@
+ +// SPDX-License-Identifier: GPL-2.0
   /*
    * Primary bucket allocation code
    *
@@@ -407,7 -406,8 +407,8 @@@ long bch_bucket_alloc(struct cache *ca
   
         finish_wait(&ca->set->bucket_wait, &w);
   out:
-       wake_up_process(ca->alloc_thread);
+       if (ca->alloc_thread)
+               wake_up_process(ca->alloc_thread);
   
         trace_bcache_alloc(ca, reserve);
   
@@@ -442,6 -442,11 +443,11 @@@
                 b->prio = INITIAL_PRIO;
         }
   
+       if (ca->set->avail_nbuckets > 0) {
+               ca->set->avail_nbuckets--;
+               bch_update_bucket_in_use(ca->set, &ca->set->gc_stats);
+       }
+ 
         return r;
   }
   
@@@ -449,6 -454,11 +455,11 @@@ void __bch_bucket_free(struct cache *ca
   {
         SET_GC_MARK(b, 0);
         SET_GC_SECTORS_USED(b, 0);
+ 
+       if (ca->set->avail_nbuckets < ca->set->nbuckets) {
+               ca->set->avail_nbuckets++;
+               bch_update_bucket_in_use(ca->set, &ca->set->gc_stats);
+       }
   }
   
   void bch_bucket_free(struct cache_set *c, struct bkey *k)
@@@ -601,7 -611,7 +612,7 @@@ bool bch_alloc_sectors(struct cache_se
   
         /*
          * If we had to allocate, we might race and not need to allocate the
-        * second time we call find_data_bucket(). If we allocated a bucket but
+        * second time we call pick_data_bucket(). If we allocated a bucket but
          * didn't use it, drop the refcount bch_bucket_alloc_set() took:
          */
         if (KEY_PTRS(&alloc.key))
diff --combined drivers/md/bcache/bcache.h

index abd31e847f967203e9f702cb4ca6e1772f6a7d46,e274082330dcd8b1e564576c2bedc581c830d650..843877e017e1afa51138ede3601ec0c55daf9e8e
--- 1/drivers/md/bcache/bcache.h
--- 2/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@@ -1,4 -1,3 +1,4 @@@
+ +/* SPDX-License-Identifier: GPL-2.0 */
   #ifndef _BCACHE_H
   #define _BCACHE_H
   
@@@ -185,6 -184,7 +185,7 @@@
   #include <linux/mutex.h>
   #include <linux/rbtree.h>
   #include <linux/rwsem.h>
+ #include <linux/refcount.h>
   #include <linux/types.h>
   #include <linux/workqueue.h>
   
@@@ -266,9 -266,6 +267,6 @@@ struct bcache_device 
         atomic_t                *stripe_sectors_dirty;
         unsigned long           *full_dirty_stripes;
   
-       unsigned long           sectors_dirty_last;
-       long                    sectors_dirty_derivative;
- 
         struct bio_set          *bio_split;
   
         unsigned                data_csum:1;
@@@ -300,7 -297,7 +298,7 @@@ struct cached_dev 
         struct semaphore        sb_write_mutex;
   
         /* Refcount on the cache set. Always nonzero when we're caching. */
-       atomic_t                count;
+       refcount_t              count;
         struct work_struct      detach;
   
         /*
@@@ -363,12 -360,14 +361,14 @@@
   
         uint64_t                writeback_rate_target;
         int64_t                 writeback_rate_proportional;
-       int64_t                 writeback_rate_derivative;
-       int64_t                 writeback_rate_change;
+       int64_t                 writeback_rate_integral;
+       int64_t                 writeback_rate_integral_scaled;
+       int32_t                 writeback_rate_change;
   
         unsigned                writeback_rate_update_seconds;
-       unsigned                writeback_rate_d_term;
+       unsigned                writeback_rate_i_term_inverse;
         unsigned                writeback_rate_p_term_inverse;
+       unsigned                writeback_rate_minimum;
   };
   
   enum alloc_reserve {
@@@ -582,6 -581,7 +582,7 @@@ struct cache_set 
         uint8_t                 need_gc;
         struct gc_stat          gc_stats;
         size_t                  nbuckets;
+       size_t                  avail_nbuckets;
   
         struct task_struct      *gc_thread;
         /* Where in the btree gc currently is */
@@@ -807,13 -807,13 +808,13 @@@ do {                                                                    
   
   static inline void cached_dev_put(struct cached_dev *dc)
   {
-       if (atomic_dec_and_test(&dc->count))
+       if (refcount_dec_and_test(&dc->count))
                 schedule_work(&dc->detach);
   }
   
   static inline bool cached_dev_get(struct cached_dev *dc)
   {
-       if (!atomic_inc_not_zero(&dc->count))
+       if (!refcount_inc_not_zero(&dc->count))
                 return false;
   
         /* Paired with the mb in cached_dev_attach */
diff --combined drivers/md/bcache/btree.c

index 658c54b3b07a96e8f53b73b1e30eba117e2a2274,d8865e6ead37046b6d9a0b9c56baa1306a9ee089..11c5503d31dc3029df2cde14f8f9e9fc48514bc2
--- 1/drivers/md/bcache/btree.c
--- 2/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@@ -1,4 -1,3 +1,4 @@@
+ +// SPDX-License-Identifier: GPL-2.0
   /*
    * Copyright (C) 2010 Kent Overstreet <[email protected]>
    *
@@@ -1241,6 -1240,11 +1241,11 @@@ void bch_initial_mark_key(struct cache_
         __bch_btree_mark_key(c, level, k);
   }
   
+ void bch_update_bucket_in_use(struct cache_set *c, struct gc_stat *stats)
+ {
+       stats->in_use = (c->nbuckets - c->avail_nbuckets) * 100 / c->nbuckets;
+ }
+ 
   static bool btree_gc_mark_node(struct btree *b, struct gc_stat *gc)
   {
         uint8_t stale = 0;
@@@ -1652,9 -1656,8 +1657,8 @@@ static void btree_gc_start(struct cache
         mutex_unlock(&c->bucket_lock);
   }
   
- static size_t bch_btree_gc_finish(struct cache_set *c)
+ static void bch_btree_gc_finish(struct cache_set *c)
   {
-       size_t available = 0;
         struct bucket *b;
         struct cache *ca;
         unsigned i;
@@@ -1691,6 -1694,7 +1695,7 @@@
         }
         rcu_read_unlock();
   
+       c->avail_nbuckets = 0;
         for_each_cache(ca, c, i) {
                 uint64_t *i;
   
@@@ -1712,18 -1716,16 +1717,16 @@@
                         BUG_ON(!GC_MARK(b) && GC_SECTORS_USED(b));
   
                         if (!GC_MARK(b) || GC_MARK(b) == GC_MARK_RECLAIMABLE)
-                               available++;
+                               c->avail_nbuckets++;
                 }
         }
   
         mutex_unlock(&c->bucket_lock);
-       return available;
   }
   
   static void bch_btree_gc(struct cache_set *c)
   {
         int ret;
-       unsigned long available;
         struct gc_stat stats;
         struct closure writes;
         struct btree_op op;
@@@ -1746,14 -1748,14 +1749,14 @@@
                         pr_warn("gc failed!");
         } while (ret);
   
-       available = bch_btree_gc_finish(c);
+       bch_btree_gc_finish(c);
         wake_up_allocators(c);
   
         bch_time_stats_update(&c->btree_gc_time, start_time);
   
         stats.key_bytes *= sizeof(uint64_t);
         stats.data      <<= 9;
-       stats.in_use    = (c->nbuckets - available) * 100 / c->nbuckets;
+       bch_update_bucket_in_use(c, &stats);
         memcpy(&c->gc_stats, &stats, sizeof(struct gc_stat));
   
         trace_bcache_gc_end(c);
diff --combined drivers/md/bcache/btree.h

index 42204d61bc9544d29f1d9b53f8eb0b6211c909f7,4073aca09a4982af8647dc5bae9cabf24f82e540..d211e2c25b6bce30591e131663b77123c099b3df
--- 1/drivers/md/bcache/btree.h
--- 2/drivers/md/bcache/btree.h
+++ b/drivers/md/bcache/btree.h
@@@ -1,4 -1,3 +1,4 @@@
+ +/* SPDX-License-Identifier: GPL-2.0 */
   #ifndef _BCACHE_BTREE_H
   #define _BCACHE_BTREE_H
   
@@@ -306,5 -305,5 +306,5 @@@ void bch_keybuf_del(struct keybuf *, st
   struct keybuf_key *bch_keybuf_next(struct keybuf *);
   struct keybuf_key *bch_keybuf_next_rescan(struct cache_set *, struct keybuf *,
                                           struct bkey *, keybuf_pred_fn *);
- 
+ void bch_update_bucket_in_use(struct cache_set *c, struct gc_stat *stats);
   #endif
diff --combined drivers/md/bcache/closure.h

index 965907ce1e2097672d260f43fb8f7ddc5343b414,00fb314cce57c4e910601eb87085f3d58bce321b..ccfbea6f9f6ba93fc0e5dec6103bf5bef960f64b
--- 1/drivers/md/bcache/closure.h
--- 2/drivers/md/bcache/closure.h
+++ b/drivers/md/bcache/closure.h
@@@ -1,4 -1,3 +1,4 @@@
+ +/* SPDX-License-Identifier: GPL-2.0 */
   #ifndef _LINUX_CLOSURE_H
   #define _LINUX_CLOSURE_H
   
@@@ -252,6 -251,12 +252,12 @@@ static inline void set_closure_fn(struc
   static inline void closure_queue(struct closure *cl)
   {
         struct workqueue_struct *wq = cl->wq;
+       /**
+        * Changes made to closure, work_struct, or a couple of other structs
+        * may cause work.func not pointing to the right location.
+        */
+       BUILD_BUG_ON(offsetof(struct closure, fn)
+                    != offsetof(struct work_struct, func));
         if (wq) {
                 INIT_WORK(&cl->work, cl->work.func);
                 BUG_ON(!queue_work(wq, &cl->work));
diff --combined drivers/md/bcache/request.c

index 3475d6628e219f6eecda80e1bf98c56ef67c3bc4,597dd1e87beab2504c40475b996058efdafdbe58..3a7aed7282b2a0227e9f04cec6d82404bad55f23
--- 1/drivers/md/bcache/request.c
--- 2/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@@ -1,4 -1,3 +1,4 @@@
+ +// SPDX-License-Identifier: GPL-2.0
   /*
    * Main bcache entry point - handle a read or a write request and decide what to
    * do with it; the make_request functions are called by the block layer.
@@@ -27,12 -26,12 +27,12 @@@ struct kmem_cache *bch_search_cache
   
   static void bch_data_insert_start(struct closure *);
   
- static unsigned cache_mode(struct cached_dev *dc, struct bio *bio)
+ static unsigned cache_mode(struct cached_dev *dc)
   {
         return BDEV_CACHE_MODE(&dc->sb);
   }
   
- static bool verify(struct cached_dev *dc, struct bio *bio)
+ static bool verify(struct cached_dev *dc)
   {
         return dc->verify;
   }
@@@ -370,7 -369,7 +370,7 @@@ static struct hlist_head *iohash(struc
   static bool check_should_bypass(struct cached_dev *dc, struct bio *bio)
   {
         struct cache_set *c = dc->disk.c;
-       unsigned mode = cache_mode(dc, bio);
+       unsigned mode = cache_mode(dc);
         unsigned sectors, congested = bch_get_congested(c);
         struct task_struct *task = current;
         struct io *i;
@@@ -385,6 -384,14 +385,14 @@@
              op_is_write(bio_op(bio))))
                 goto skip;
   
+       /*
+        * Flag for bypass if the IO is for read-ahead or background,
+        * unless the read-ahead request is for metadata (eg, for gfs2).
+        */
+       if (bio->bi_opf & (REQ_RAHEAD|REQ_BACKGROUND) &&
+           !(bio->bi_opf & REQ_META))
+               goto skip;
+ 
         if (bio->bi_iter.bi_sector & (c->sb.block_size - 1) ||
             bio_sectors(bio) & (c->sb.block_size - 1)) {
                 pr_debug("skipping unaligned io");
@@@ -463,6 -470,7 +471,7 @@@ struct search 
         unsigned                recoverable:1;
         unsigned                write:1;
         unsigned                read_dirty_data:1;
+       unsigned                cache_missed:1;
   
         unsigned long           start_time;
   
@@@ -649,6 -657,7 +658,7 @@@ static inline struct search *search_all
   
         s->orig_bio             = bio;
         s->cache_miss           = NULL;
+       s->cache_missed         = 0;
         s->d                    = d;
         s->recoverable          = 1;
         s->write                = op_is_write(bio_op(bio));
@@@ -698,8 -707,16 +708,16 @@@ static void cached_dev_read_error(struc
   {
         struct search *s = container_of(cl, struct search, cl);
         struct bio *bio = &s->bio.bio;
+       struct cached_dev *dc = container_of(s->d, struct cached_dev, disk);
   
-       if (s->recoverable) {
+       /*
+        * If cache device is dirty (dc->has_dirty is non-zero), then
+        * recovery a failed read request from cached device may get a
+        * stale data back. So read failure recovery is only permitted
+        * when cache device is clean.
+        */
+       if (s->recoverable &&
+           (dc && !atomic_read(&dc->has_dirty))) {
                 /* Retry from the backing device: */
                 trace_bcache_read_retry(s->orig_bio);
   
@@@ -740,7 -757,7 +758,7 @@@ static void cached_dev_read_done(struc
                 s->cache_miss = NULL;
         }
   
-       if (verify(dc, &s->bio.bio) && s->recoverable && !s->read_dirty_data)
+       if (verify(dc) && s->recoverable && !s->read_dirty_data)
                 bch_data_verify(dc, s->orig_bio);
   
         bio_complete(s);
@@@ -760,12 -777,12 +778,12 @@@ static void cached_dev_read_done_bh(str
         struct cached_dev *dc = container_of(s->d, struct cached_dev, disk);
   
         bch_mark_cache_accounting(s->iop.c, s->d,
-                                 !s->cache_miss, s->iop.bypass);
+                                 !s->cache_missed, s->iop.bypass);
         trace_bcache_read(s->orig_bio, !s->cache_miss, s->iop.bypass);
   
         if (s->iop.status)
                 continue_at_nobarrier(cl, cached_dev_read_error, bcache_wq);
-       else if (s->iop.bio || verify(dc, &s->bio.bio))
+       else if (s->iop.bio || verify(dc))
                 continue_at_nobarrier(cl, cached_dev_read_done, bcache_wq);
         else
                 continue_at_nobarrier(cl, cached_dev_bio_complete, NULL);
@@@ -779,6 -796,8 +797,8 @@@ static int cached_dev_cache_miss(struc
         struct cached_dev *dc = container_of(s->d, struct cached_dev, disk);
         struct bio *miss, *cache_bio;
   
+       s->cache_missed = 1;
+ 
         if (s->cache_miss || s->iop.bypass) {
                 miss = bio_next_split(bio, sectors, GFP_NOIO, s->d->bio_split);
                 ret = miss == bio ? MAP_DONE : MAP_CONTINUE;
@@@ -892,7 -911,7 +912,7 @@@ static void cached_dev_write(struct cac
                 s->iop.bypass = true;
   
         if (should_writeback(dc, s->orig_bio,
-                            cache_mode(dc, bio),
+                            cache_mode(dc),
                              s->iop.bypass)) {
                 s->iop.bypass = false;
                 s->iop.writeback = true;
diff --combined drivers/md/bcache/sysfs.c

index 234b2f5b286df209984ed1223ae51cedfbb9e314,2290bffd49228e8ac85f7bd999caf9524e7d9036..b4184092c7279fa2fe1246f6a3e70650c850d6a1
--- 1/drivers/md/bcache/sysfs.c
--- 2/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@@ -1,4 -1,3 +1,4 @@@
+ +// SPDX-License-Identifier: GPL-2.0
   /*
    * bcache sysfs interfaces
    *
@@@ -82,8 -81,9 +82,9 @@@ rw_attribute(writeback_delay)
   rw_attribute(writeback_rate);
   
   rw_attribute(writeback_rate_update_seconds);
- rw_attribute(writeback_rate_d_term);
+ rw_attribute(writeback_rate_i_term_inverse);
   rw_attribute(writeback_rate_p_term_inverse);
+ rw_attribute(writeback_rate_minimum);
   read_attribute(writeback_rate_debug);
   
   read_attribute(stripe_size);
@@@ -131,15 -131,16 +132,16 @@@ SHOW(__bch_cached_dev
         sysfs_hprint(writeback_rate,    dc->writeback_rate.rate << 9);
   
         var_print(writeback_rate_update_seconds);
-       var_print(writeback_rate_d_term);
+       var_print(writeback_rate_i_term_inverse);
         var_print(writeback_rate_p_term_inverse);
+       var_print(writeback_rate_minimum);
   
         if (attr == &sysfs_writeback_rate_debug) {
                 char rate[20];
                 char dirty[20];
                 char target[20];
                 char proportional[20];
-               char derivative[20];
+               char integral[20];
                 char change[20];
                 s64 next_io;
   
@@@ -147,7 -148,7 +149,7 @@@
                 bch_hprint(dirty,       bcache_dev_sectors_dirty(&dc->disk) << 9);
                 bch_hprint(target,      dc->writeback_rate_target << 9);
                 bch_hprint(proportional,dc->writeback_rate_proportional << 9);
-               bch_hprint(derivative,  dc->writeback_rate_derivative << 9);
+               bch_hprint(integral,    dc->writeback_rate_integral_scaled << 9);
                 bch_hprint(change,      dc->writeback_rate_change << 9);
   
                 next_io = div64_s64(dc->writeback_rate.next - local_clock(),
@@@ -158,11 -159,11 +160,11 @@@
                                "dirty:\t\t%s\n"
                                "target:\t\t%s\n"
                                "proportional:\t%s\n"
-                              "derivative:\t%s\n"
+                              "integral:\t%s\n"
                                "change:\t\t%s/sec\n"
                                "next io:\t%llims\n",
                                rate, dirty, target, proportional,
-                              derivative, change, next_io);
+                              integral, change, next_io);
         }
   
         sysfs_hprint(dirty_data,
@@@ -214,7 -215,7 +216,7 @@@ STORE(__cached_dev
                             dc->writeback_rate.rate, 1, INT_MAX);
   
         d_strtoul_nonzero(writeback_rate_update_seconds);
-       d_strtoul(writeback_rate_d_term);
+       d_strtoul(writeback_rate_i_term_inverse);
         d_strtoul_nonzero(writeback_rate_p_term_inverse);
   
         d_strtoi_h(sequential_cutoff);
@@@ -320,7 -321,7 +322,7 @@@ static struct attribute *bch_cached_dev
         &sysfs_writeback_percent,
         &sysfs_writeback_rate,
         &sysfs_writeback_rate_update_seconds,
-       &sysfs_writeback_rate_d_term,
+       &sysfs_writeback_rate_i_term_inverse,
         &sysfs_writeback_rate_p_term_inverse,
         &sysfs_writeback_rate_debug,
         &sysfs_dirty_data,
@@@ -746,6 -747,11 +748,11 @@@ static struct attribute *bch_cache_set_
   };
   KTYPE(bch_cache_set_internal);
   
+ static int __bch_cache_cmp(const void *l, const void *r)
+ {
+       return *((uint16_t *)r) - *((uint16_t *)l);
+ }
+ 
   SHOW(__bch_cache)
   {
         struct cache *ca = container_of(kobj, struct cache, kobj);
@@@ -770,9 -776,6 +777,6 @@@
                                                CACHE_REPLACEMENT(&ca->sb));
   
         if (attr == &sysfs_priority_stats) {
-               int cmp(const void *l, const void *r)
-               {       return *((uint16_t *) r) - *((uint16_t *) l); }
- 
                 struct bucket *b;
                 size_t n = ca->sb.nbuckets, i;
                 size_t unused = 0, available = 0, dirty = 0, meta = 0;
@@@ -801,7 -804,7 +805,7 @@@
                         p[i] = ca->buckets[i].prio;
                 mutex_unlock(&ca->set->bucket_lock);
   
-               sort(p, n, sizeof(uint16_t), cmp, NULL);
+               sort(p, n, sizeof(uint16_t), __bch_cache_cmp, NULL);
   
                 while (n &&
                        !cached[n - 1])
diff --combined drivers/md/bcache/util.h

index f54b58282f7756acd13600206714a1e4064728b4,8f509290bb02ec1b77c9306e983adada04aa3351..ed5e8a412eb8e3582251495e137741fa6a665cf6
--- 1/drivers/md/bcache/util.h
--- 2/drivers/md/bcache/util.h
+++ b/drivers/md/bcache/util.h
@@@ -1,4 -1,3 +1,4 @@@
+ +/* SPDX-License-Identifier: GPL-2.0 */
   
   #ifndef _BCACHE_UTIL_H
   #define _BCACHE_UTIL_H
@@@ -442,10 -441,10 +442,10 @@@ struct bch_ratelimit 
         uint64_t                next;
   
         /*
-        * Rate at which we want to do work, in units per nanosecond
+        * Rate at which we want to do work, in units per second
          * The units here correspond to the units passed to bch_next_delay()
          */
-       unsigned                rate;
+       uint32_t                rate;
   };
   
   static inline void bch_ratelimit_reset(struct bch_ratelimit *d)
diff --combined drivers/md/bcache/writeback.c

index 70454f2ad2faacde608f40638dd55955eed4e586,9b770b13bdf62058f372cb799575c0f9b399d065..56a37884ca8b44725794c59fbe9197beb679314c
--- 1/drivers/md/bcache/writeback.c
--- 2/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@@ -1,4 -1,3 +1,4 @@@
+ +// SPDX-License-Identifier: GPL-2.0
   /*
    * background writeback - scan btree for dirty data and write it to the backing
    * device
@@@ -26,48 -25,63 +26,63 @@@ static void __update_writeback_rate(str
                                 bcache_flash_devs_sectors_dirty(c);
         uint64_t cache_dirty_target =
                 div_u64(cache_sectors * dc->writeback_percent, 100);
- 
         int64_t target = div64_u64(cache_dirty_target * bdev_sectors(dc->bdev),
                                    c->cached_dev_sectors);
   
-       /* PD controller */
- 
+       /*
+        * PI controller:
+        * Figures out the amount that should be written per second.
+        *
+        * First, the error (number of sectors that are dirty beyond our
+        * target) is calculated.  The error is accumulated (numerically
+        * integrated).
+        *
+        * Then, the proportional value and integral value are scaled
+        * based on configured values.  These are stored as inverses to
+        * avoid fixed point math and to make configuration easy-- e.g.
+        * the default value of 40 for writeback_rate_p_term_inverse
+        * attempts to write at a rate that would retire all the dirty
+        * blocks in 40 seconds.
+        *
+        * The writeback_rate_i_inverse value of 10000 means that 1/10000th
+        * of the error is accumulated in the integral term per second.
+        * This acts as a slow, long-term average that is not subject to
+        * variations in usage like the p term.
+        */
         int64_t dirty = bcache_dev_sectors_dirty(&dc->disk);
-       int64_t derivative = dirty - dc->disk.sectors_dirty_last;
-       int64_t proportional = dirty - target;
-       int64_t change;
- 
-       dc->disk.sectors_dirty_last = dirty;
- 
-       /* Scale to sectors per second */
- 
-       proportional *= dc->writeback_rate_update_seconds;
-       proportional = div_s64(proportional, dc->writeback_rate_p_term_inverse);
- 
-       derivative = div_s64(derivative, dc->writeback_rate_update_seconds);
- 
-       derivative = ewma_add(dc->disk.sectors_dirty_derivative, derivative,
-                             (dc->writeback_rate_d_term /
-                              dc->writeback_rate_update_seconds) ?: 1, 0);
- 
-       derivative *= dc->writeback_rate_d_term;
-       derivative = div_s64(derivative, dc->writeback_rate_p_term_inverse);
- 
-       change = proportional + derivative;
+       int64_t error = dirty - target;
+       int64_t proportional_scaled =
+               div_s64(error, dc->writeback_rate_p_term_inverse);
+       int64_t integral_scaled;
+       uint32_t new_rate;
+ 
+       if ((error < 0 && dc->writeback_rate_integral > 0) ||
+           (error > 0 && time_before64(local_clock(),
+                        dc->writeback_rate.next + NSEC_PER_MSEC))) {
+               /*
+                * Only decrease the integral term if it's more than
+                * zero.  Only increase the integral term if the device
+                * is keeping up.  (Don't wind up the integral
+                * ineffectively in either case).
+                *
+                * It's necessary to scale this by
+                * writeback_rate_update_seconds to keep the integral
+                * term dimensioned properly.
+                */
+               dc->writeback_rate_integral += error *
+                       dc->writeback_rate_update_seconds;
+       }
   
-       /* Don't increase writeback rate if the device isn't keeping up */
-       if (change > 0 &&
-           time_after64(local_clock(),
-                        dc->writeback_rate.next + NSEC_PER_MSEC))
-               change = 0;
+       integral_scaled = div_s64(dc->writeback_rate_integral,
+                       dc->writeback_rate_i_term_inverse);
   
-       dc->writeback_rate.rate =
-               clamp_t(int64_t, (int64_t) dc->writeback_rate.rate + change,
-                       1, NSEC_PER_MSEC);
+       new_rate = clamp_t(int32_t, (proportional_scaled + integral_scaled),
+                       dc->writeback_rate_minimum, NSEC_PER_SEC);
   
-       dc->writeback_rate_proportional = proportional;
-       dc->writeback_rate_derivative = derivative;
-       dc->writeback_rate_change = change;
+       dc->writeback_rate_proportional = proportional_scaled;
+       dc->writeback_rate_integral_scaled = integral_scaled;
+       dc->writeback_rate_change = new_rate - dc->writeback_rate.rate;
+       dc->writeback_rate.rate = new_rate;
         dc->writeback_rate_target = target;
   }
   
@@@ -180,13 -194,21 +195,21 @@@ static void write_dirty(struct closure 
         struct dirty_io *io = container_of(cl, struct dirty_io, cl);
         struct keybuf_key *w = io->bio.bi_private;
   
-       dirty_init(w);
-       bio_set_op_attrs(&io->bio, REQ_OP_WRITE, 0);
-       io->bio.bi_iter.bi_sector = KEY_START(&w->key);
-       bio_set_dev(&io->bio, io->dc->bdev);
-       io->bio.bi_end_io       = dirty_endio;
+       /*
+        * IO errors are signalled using the dirty bit on the key.
+        * If we failed to read, we should not attempt to write to the
+        * backing device.  Instead, immediately go to write_dirty_finish
+        * to clean up.
+        */
+       if (KEY_DIRTY(&w->key)) {
+               dirty_init(w);
+               bio_set_op_attrs(&io->bio, REQ_OP_WRITE, 0);
+               io->bio.bi_iter.bi_sector = KEY_START(&w->key);
+               bio_set_dev(&io->bio, io->dc->bdev);
+               io->bio.bi_end_io       = dirty_endio;
   
-       closure_bio_submit(&io->bio, cl);
+               closure_bio_submit(&io->bio, cl);
+       }
   
         continue_at(cl, write_dirty_finish, io->dc->writeback_write_wq);
   }
@@@ -418,6 -440,8 +441,8 @@@ static int bch_writeback_thread(void *a
         struct cached_dev *dc = arg;
         bool searched_full_index;
   
+       bch_ratelimit_reset(&dc->writeback_rate);
+ 
         while (!kthread_should_stop()) {
                 down_write(&dc->writeback_lock);
                 if (!atomic_read(&dc->has_dirty) ||
@@@ -445,7 -469,6 +470,6 @@@
   
                 up_write(&dc->writeback_lock);
   
-               bch_ratelimit_reset(&dc->writeback_rate);
                 read_dirty(dc);
   
                 if (searched_full_index) {
@@@ -455,6 -478,8 +479,8 @@@
                                !kthread_should_stop() &&
                                !test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags))
                                 delay = schedule_timeout_interruptible(delay);
+ 
+                       bch_ratelimit_reset(&dc->writeback_rate);
                 }
         }
   
@@@ -492,8 -517,6 +518,6 @@@ void bch_sectors_dirty_init(struct bcac
   
         bch_btree_map_keys(&op.op, d->c, &KEY(op.inode, 0, 0),
                            sectors_dirty_init_fn, 0);
- 
-       d->sectors_dirty_last = bcache_dev_sectors_dirty(d);
   }
   
   void bch_cached_dev_writeback_init(struct cached_dev *dc)
@@@ -507,10 -530,11 +531,11 @@@
         dc->writeback_percent           = 10;
         dc->writeback_delay             = 30;
         dc->writeback_rate.rate         = 1024;
+       dc->writeback_rate_minimum      = 8;
   
         dc->writeback_rate_update_seconds = 5;
-       dc->writeback_rate_d_term       = 30;
-       dc->writeback_rate_p_term_inverse = 6000;
+       dc->writeback_rate_p_term_inverse = 40;
+       dc->writeback_rate_i_term_inverse = 10000;
   
         INIT_DELAYED_WORK(&dc->writeback_rate_update, update_writeback_rate);
   }
diff --combined drivers/md/bcache/writeback.h

index 151544740148618ef8b22407618e42af5855ea28,7d25bff37a9bf95ada7bfdeeb3b8e3fb17aaa469..a9e3ffb4b03c72bcc713b0147583b72072383a94
--- 1/drivers/md/bcache/writeback.h
--- 2/drivers/md/bcache/writeback.h
+++ b/drivers/md/bcache/writeback.h
@@@ -1,4 -1,3 +1,4 @@@
+ +/* SPDX-License-Identifier: GPL-2.0 */
   #ifndef _BCACHE_WRITEBACK_H
   #define _BCACHE_WRITEBACK_H
   
@@@ -77,7 -76,9 +77,9 @@@ static inline bool should_writeback(str
         if (would_skip)
                 return false;
   
-       return op_is_sync(bio->bi_opf) || in_use <= CUTOFF_WRITEBACK;
+       return (op_is_sync(bio->bi_opf) ||
+               bio->bi_opf & (REQ_META|REQ_PRIO) ||
+               in_use <= CUTOFF_WRITEBACK);
   }
   
   static inline void bch_writeback_queue(struct cached_dev *dc)
@@@ -90,7 -91,7 +92,7 @@@ static inline void bch_writeback_add(st
   {
         if (!atomic_read(&dc->has_dirty) &&
             !atomic_xchg(&dc->has_dirty, 1)) {
-               atomic_inc(&dc->count);
+               refcount_inc(&dc->count);
   
                 if (BDEV_STATE(&dc->sb) != BDEV_STATE_DIRTY) {
                         SET_BDEV_STATE(&dc->sb, BDEV_STATE_DIRTY);
diff --combined drivers/md/dm.c

index 8aaffa19b29af44301542a87e7e660589cc4ec30,8d07ad61221c64ee7352e3f02f7ae8b40a700967..a3f8cbb98dd5b7e77d62e93160eb4469743ec063
--- 1/drivers/md/dm.c
--- 2/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@@ -52,12 -52,6 +52,12 @@@ static struct workqueue_struct *deferre
   atomic_t dm_global_event_nr = ATOMIC_INIT(0);
   DECLARE_WAIT_QUEUE_HEAD(dm_global_eventq);
   
+ +void dm_issue_global_event(void)
+ +{
+ +      atomic_inc(&dm_global_event_nr);
+ +      wake_up(&dm_global_eventq);
+ +}
+ +
   /*
    * One of these is allocated per bio.
    */
@@@ -114,7 -108,7 +114,7 @@@ static unsigned reserved_bio_based_ios 
   
   static int __dm_get_module_param_int(int *module_param, int min, int max)
   {
- -      int param = ACCESS_ONCE(*module_param);
+ +      int param = READ_ONCE(*module_param);
         int modified_param = 0;
         bool modified = true;
   
@@@ -136,7 -130,7 +136,7 @@@
   unsigned __dm_get_module_param(unsigned *module_param,
                                unsigned def, unsigned max)
   {
- -      unsigned param = ACCESS_ONCE(*module_param);
+ +      unsigned param = READ_ONCE(*module_param);
         unsigned modified_param = 0;
   
         if (!param)
@@@ -1618,17 -1612,6 +1618,6 @@@ static void dm_wq_work(struct work_stru
   
   void dm_init_md_queue(struct mapped_device *md)
   {
-       /*
-        * Request-based dm devices cannot be stacked on top of bio-based dm
-        * devices.  The type of this dm device may not have been decided yet.
-        * The type is decided at the first table loading time.
-        * To prevent problematic device stacking, clear the queue flag
-        * for request stacking support until then.
-        *
-        * This queue is new, so no concurrency on the queue_flags.
-        */
-       queue_flag_clear_unlocked(QUEUE_FLAG_STACKABLE, md->queue);
- 
         /*
          * Initialize data that will only be used by a non-blk-mq DM queue
          * - must do so here (in alloc_dev callchain) before queue is used
@@@ -1871,8 -1854,9 +1860,8 @@@ static void event_callback(void *contex
         dm_send_uevents(&uevents, &disk_to_dev(md->disk)->kobj);
   
         atomic_inc(&md->event_nr);
- -      atomic_inc(&dm_global_event_nr);
         wake_up(&md->eventq);
- -      wake_up(&dm_global_eventq);
+ +      dm_issue_global_event();
   }
   
   /*
@@@ -2288,7 -2272,6 +2277,7 @@@ struct dm_table *dm_swap_table(struct m
         }
   
         map = __bind(md, table, &limits);
+ +      dm_issue_global_event();
   
   out:
         mutex_unlock(&md->suspend_lock);
diff --combined drivers/nvme/host/Makefile

index 7b96e4588a128bd71953be919bd3df7585220805,b856f2f549cdaf28c8efdd8b59c94d168dfb499f..a25fd43650ad6b7df8ffbe3bcd2eff859594b6bc
--- 1/drivers/nvme/host/Makefile
--- 2/drivers/nvme/host/Makefile
+++ b/drivers/nvme/host/Makefile
@@@ -1,4 -1,3 +1,4 @@@
+ +# SPDX-License-Identifier: GPL-2.0
   obj-$(CONFIG_NVME_CORE)                       += nvme-core.o
   obj-$(CONFIG_BLK_DEV_NVME)            += nvme.o
   obj-$(CONFIG_NVME_FABRICS)            += nvme-fabrics.o
@@@ -6,6 -5,7 +6,7 @@@ obj-$(CONFIG_NVME_RDMA)                  += nvme-rdma.
   obj-$(CONFIG_NVME_FC)                 += nvme-fc.o
   
   nvme-core-y                           := core.o
+ nvme-core-$(CONFIG_NVME_MULTIPATH)    += multipath.o
   nvme-core-$(CONFIG_NVM)                       += lightnvm.o
   
   nvme-y                                        += pci.o
diff --combined drivers/nvme/host/core.c

index 37f9039bb9cab29892f783ddb45a459809c3805c,993813ccdc0b679f9357a85cb1389611fcd1d21a..25da74d310d1bbd5e7c62f9a35de94f6279fbc25
--- 1/drivers/nvme/host/core.c
--- 2/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@@ -34,13 -34,13 +34,13 @@@
   
   #define NVME_MINORS           (1U << MINORBITS)
   
- unsigned char admin_timeout = 60;
- module_param(admin_timeout, byte, 0644);
+ unsigned int admin_timeout = 60;
+ module_param(admin_timeout, uint, 0644);
   MODULE_PARM_DESC(admin_timeout, "timeout in seconds for admin commands");
   EXPORT_SYMBOL_GPL(admin_timeout);
   
- unsigned char nvme_io_timeout = 30;
- module_param_named(io_timeout, nvme_io_timeout, byte, 0644);
+ unsigned int nvme_io_timeout = 30;
+ module_param_named(io_timeout, nvme_io_timeout, uint, 0644);
   MODULE_PARM_DESC(io_timeout, "timeout in seconds for I/O");
   EXPORT_SYMBOL_GPL(nvme_io_timeout);
   
@@@ -52,9 -52,6 +52,6 @@@ static u8 nvme_max_retries = 5
   module_param_named(max_retries, nvme_max_retries, byte, 0644);
   MODULE_PARM_DESC(max_retries, "max number of retries a command may have");
   
- static int nvme_char_major;
- module_param(nvme_char_major, int, 0);
- 
   static unsigned long default_ps_max_latency_us = 100000;
   module_param(default_ps_max_latency_us, ulong, 0644);
   MODULE_PARM_DESC(default_ps_max_latency_us,
@@@ -71,10 -68,17 +68,17 @@@ MODULE_PARM_DESC(streams, "turn on supp
   struct workqueue_struct *nvme_wq;
   EXPORT_SYMBOL_GPL(nvme_wq);
   
- static LIST_HEAD(nvme_ctrl_list);
- static DEFINE_SPINLOCK(dev_list_lock);
+ static DEFINE_IDA(nvme_subsystems_ida);
+ static LIST_HEAD(nvme_subsystems);
+ static DEFINE_MUTEX(nvme_subsystems_lock);
   
+ static DEFINE_IDA(nvme_instance_ida);
+ static dev_t nvme_chr_devt;
   static struct class *nvme_class;
+ static struct class *nvme_subsys_class;
+ 
+ static void nvme_ns_remove(struct nvme_ns *ns);
+ static int nvme_revalidate_disk(struct gendisk *disk);
   
   static __le32 nvme_get_log_dw10(u8 lid, size_t size)
   {
@@@ -101,6 -105,51 +105,51 @@@ static int nvme_reset_ctrl_sync(struct 
         return ret;
   }
   
+ static void nvme_delete_ctrl_work(struct work_struct *work)
+ {
+       struct nvme_ctrl *ctrl =
+               container_of(work, struct nvme_ctrl, delete_work);
+ 
+       flush_work(&ctrl->reset_work);
+       nvme_stop_ctrl(ctrl);
+       nvme_remove_namespaces(ctrl);
+       ctrl->ops->delete_ctrl(ctrl);
+       nvme_uninit_ctrl(ctrl);
+       nvme_put_ctrl(ctrl);
+ }
+ 
+ int nvme_delete_ctrl(struct nvme_ctrl *ctrl)
+ {
+       if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_DELETING))
+               return -EBUSY;
+       if (!queue_work(nvme_wq, &ctrl->delete_work))
+               return -EBUSY;
+       return 0;
+ }
+ EXPORT_SYMBOL_GPL(nvme_delete_ctrl);
+ 
+ int nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl)
+ {
+       int ret = 0;
+ 
+       /*
+        * Keep a reference until the work is flushed since ->delete_ctrl
+        * can free the controller.
+        */
+       nvme_get_ctrl(ctrl);
+       ret = nvme_delete_ctrl(ctrl);
+       if (!ret)
+               flush_work(&ctrl->delete_work);
+       nvme_put_ctrl(ctrl);
+       return ret;
+ }
+ EXPORT_SYMBOL_GPL(nvme_delete_ctrl_sync);
+ 
+ static inline bool nvme_ns_has_pi(struct nvme_ns *ns)
+ {
+       return ns->pi_type && ns->ms == sizeof(struct t10_pi_tuple);
+ }
+ 
   static blk_status_t nvme_error_status(struct request *req)
   {
         switch (nvme_req(req)->status & 0x7ff) {
@@@ -142,9 -191,16 +191,16 @@@ static inline bool nvme_req_needs_retry
   void nvme_complete_rq(struct request *req)
   {
         if (unlikely(nvme_req(req)->status && nvme_req_needs_retry(req))) {
-               nvme_req(req)->retries++;
-               blk_mq_requeue_request(req, true);
-               return;
+               if (nvme_req_needs_failover(req)) {
+                       nvme_failover_req(req);
+                       return;
+               }
+ 
+               if (!blk_queue_dying(req->q)) {
+                       nvme_req(req)->retries++;
+                       blk_mq_requeue_request(req, true);
+                       return;
+               }
         }
   
         blk_mq_end_request(req, nvme_error_status(req));
@@@ -153,18 -209,13 +209,13 @@@ EXPORT_SYMBOL_GPL(nvme_complete_rq)
   
   void nvme_cancel_request(struct request *req, void *data, bool reserved)
   {
-       int status;
- 
         if (!blk_mq_request_started(req))
                 return;
   
         dev_dbg_ratelimited(((struct nvme_ctrl *) data)->device,
                                 "Cancelling I/O %d", req->tag);
   
-       status = NVME_SC_ABORT_REQ;
-       if (blk_queue_dying(req->q))
-               status |= NVME_SC_DNR;
-       nvme_req(req)->status = status;
+       nvme_req(req)->status = NVME_SC_ABORT_REQ;
         blk_mq_complete_request(req);
   
   }
@@@ -205,6 -256,7 +256,7 @@@ bool nvme_change_ctrl_state(struct nvme
         case NVME_CTRL_RECONNECTING:
                 switch (old_state) {
                 case NVME_CTRL_LIVE:
+               case NVME_CTRL_RESETTING:
                         changed = true;
                         /* FALLTHRU */
                 default:
@@@ -239,11 -291,29 +291,29 @@@
                 ctrl->state = new_state;
   
         spin_unlock_irqrestore(&ctrl->lock, flags);
- 
+       if (changed && ctrl->state == NVME_CTRL_LIVE)
+               nvme_kick_requeue_lists(ctrl);
         return changed;
   }
   EXPORT_SYMBOL_GPL(nvme_change_ctrl_state);
   
+ static void nvme_free_ns_head(struct kref *ref)
+ {
+       struct nvme_ns_head *head =
+               container_of(ref, struct nvme_ns_head, ref);
+ 
+       nvme_mpath_remove_disk(head);
+       ida_simple_remove(&head->subsys->ns_ida, head->instance);
+       list_del_init(&head->entry);
+       cleanup_srcu_struct(&head->srcu);
+       kfree(head);
+ }
+ 
+ static void nvme_put_ns_head(struct nvme_ns_head *head)
+ {
+       kref_put(&head->ref, nvme_free_ns_head);
+ }
+ 
   static void nvme_free_ns(struct kref *kref)
   {
         struct nvme_ns *ns = container_of(kref, struct nvme_ns, kref);
@@@ -251,14 -321,8 +321,8 @@@
         if (ns->ndev)
                 nvme_nvm_unregister(ns);
   
-       if (ns->disk) {
-               spin_lock(&dev_list_lock);
-               ns->disk->private_data = NULL;
-               spin_unlock(&dev_list_lock);
-       }
- 
         put_disk(ns->disk);
-       ida_simple_remove(&ns->ctrl->ns_ida, ns->instance);
+       nvme_put_ns_head(ns->head);
         nvme_put_ctrl(ns->ctrl);
         kfree(ns);
   }
@@@ -268,31 -332,8 +332,8 @@@ static void nvme_put_ns(struct nvme_ns 
         kref_put(&ns->kref, nvme_free_ns);
   }
   
- static struct nvme_ns *nvme_get_ns_from_disk(struct gendisk *disk)
- {
-       struct nvme_ns *ns;
- 
-       spin_lock(&dev_list_lock);
-       ns = disk->private_data;
-       if (ns) {
-               if (!kref_get_unless_zero(&ns->kref))
-                       goto fail;
-               if (!try_module_get(ns->ctrl->ops->module))
-                       goto fail_put_ns;
-       }
-       spin_unlock(&dev_list_lock);
- 
-       return ns;
- 
- fail_put_ns:
-       kref_put(&ns->kref, nvme_free_ns);
- fail:
-       spin_unlock(&dev_list_lock);
-       return NULL;
- }
- 
   struct request *nvme_alloc_request(struct request_queue *q,
-               struct nvme_command *cmd, unsigned int flags, int qid)
+               struct nvme_command *cmd, blk_mq_req_flags_t flags, int qid)
   {
         unsigned op = nvme_is_write(cmd) ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN;
         struct request *req;
@@@ -417,7 -458,7 +458,7 @@@ static inline void nvme_setup_flush(str
   {
         memset(cmnd, 0, sizeof(*cmnd));
         cmnd->common.opcode = nvme_cmd_flush;
-       cmnd->common.nsid = cpu_to_le32(ns->ns_id);
+       cmnd->common.nsid = cpu_to_le32(ns->head->ns_id);
   }
   
   static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req,
@@@ -448,7 -489,7 +489,7 @@@
   
         memset(cmnd, 0, sizeof(*cmnd));
         cmnd->dsm.opcode = nvme_cmd_dsm;
-       cmnd->dsm.nsid = cpu_to_le32(ns->ns_id);
+       cmnd->dsm.nsid = cpu_to_le32(ns->head->ns_id);
         cmnd->dsm.nr = cpu_to_le32(segments - 1);
         cmnd->dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD);
   
@@@ -467,16 -508,6 +508,6 @@@ static inline blk_status_t nvme_setup_r
         u16 control = 0;
         u32 dsmgmt = 0;
   
-       /*
-        * If formated with metadata, require the block layer provide a buffer
-        * unless this namespace is formated such that the metadata can be
-        * stripped/generated by the controller with PRACT=1.
-        */
-       if (ns && ns->ms &&
-           (!ns->pi_type || ns->ms != sizeof(struct t10_pi_tuple)) &&
-           !blk_integrity_rq(req) && !blk_rq_is_passthrough(req))
-               return BLK_STS_NOTSUPP;
- 
         if (req->cmd_flags & REQ_FUA)
                 control |= NVME_RW_FUA;
         if (req->cmd_flags & (REQ_FAILFAST_DEV | REQ_RAHEAD))
@@@ -487,7 -518,7 +518,7 @@@
   
         memset(cmnd, 0, sizeof(*cmnd));
         cmnd->rw.opcode = (rq_data_dir(req) ? nvme_cmd_write : nvme_cmd_read);
-       cmnd->rw.nsid = cpu_to_le32(ns->ns_id);
+       cmnd->rw.nsid = cpu_to_le32(ns->head->ns_id);
         cmnd->rw.slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req)));
         cmnd->rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1);
   
@@@ -495,6 -526,18 +526,18 @@@
                 nvme_assign_write_stream(ctrl, req, &control, &dsmgmt);
   
         if (ns->ms) {
+               /*
+                * If formated with metadata, the block layer always provides a
+                * metadata buffer if CONFIG_BLK_DEV_INTEGRITY is enabled.  Else
+                * we enable the PRACT bit for protection information or set the
+                * namespace capacity to zero to prevent any I/O.
+                */
+               if (!blk_integrity_rq(req)) {
+                       if (WARN_ON_ONCE(!nvme_ns_has_pi(ns)))
+                               return BLK_STS_NOTSUPP;
+                       control |= NVME_RW_PRINFO_PRACT;
+               }
+ 
                 switch (ns->pi_type) {
                 case NVME_NS_DPS_PI_TYPE3:
                         control |= NVME_RW_PRINFO_PRCHK_GUARD;
@@@ -507,8 -550,6 +550,6 @@@
                                         nvme_block_nr(ns, blk_rq_pos(req)));
                         break;
                 }
-               if (!blk_integrity_rq(req))
-                       control |= NVME_RW_PRINFO_PRACT;
         }
   
         cmnd->rw.control = cpu_to_le16(control);
@@@ -560,7 -601,8 +601,8 @@@ EXPORT_SYMBOL_GPL(nvme_setup_cmd)
    */
   int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
                 union nvme_result *result, void *buffer, unsigned bufflen,
-               unsigned timeout, int qid, int at_head, int flags)
+               unsigned timeout, int qid, int at_head,
+               blk_mq_req_flags_t flags)
   {
         struct request *req;
         int ret;
@@@ -778,7 -820,7 +820,7 @@@ static int nvme_identify_ctrl(struct nv
   }
   
   static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, unsigned nsid,
-               u8 *eui64, u8 *nguid, uuid_t *uuid)
+               struct nvme_ns_ids *ids)
   {
         struct nvme_command c = { };
         int status;
@@@ -814,7 -856,7 +856,7 @@@
                                 goto free_data;
                         }
                         len = NVME_NIDT_EUI64_LEN;
-                       memcpy(eui64, data + pos + sizeof(*cur), len);
+                       memcpy(ids->eui64, data + pos + sizeof(*cur), len);
                         break;
                 case NVME_NIDT_NGUID:
                         if (cur->nidl != NVME_NIDT_NGUID_LEN) {
@@@ -824,7 -866,7 +866,7 @@@
                                 goto free_data;
                         }
                         len = NVME_NIDT_NGUID_LEN;
-                       memcpy(nguid, data + pos + sizeof(*cur), len);
+                       memcpy(ids->nguid, data + pos + sizeof(*cur), len);
                         break;
                 case NVME_NIDT_UUID:
                         if (cur->nidl != NVME_NIDT_UUID_LEN) {
@@@ -834,7 -876,7 +876,7 @@@
                                 goto free_data;
                         }
                         len = NVME_NIDT_UUID_LEN;
-                       uuid_copy(uuid, data + pos + sizeof(*cur));
+                       uuid_copy(&ids->uuid, data + pos + sizeof(*cur));
                         break;
                 default:
                         /* Skip unnkown types */
@@@ -968,7 -1010,7 +1010,7 @@@ static int nvme_submit_io(struct nvme_n
         memset(&c, 0, sizeof(c));
         c.rw.opcode = io.opcode;
         c.rw.flags = io.flags;
-       c.rw.nsid = cpu_to_le32(ns->ns_id);
+       c.rw.nsid = cpu_to_le32(ns->head->ns_id);
         c.rw.slba = cpu_to_le64(io.slba);
         c.rw.length = cpu_to_le16(io.nblocks);
         c.rw.control = cpu_to_le16(io.control);
@@@ -982,12 -1024,87 +1024,87 @@@
                         metadata, meta_len, io.slba, NULL, 0);
   }
   
+ static u32 nvme_known_admin_effects(u8 opcode)
+ {
+       switch (opcode) {
+       case nvme_admin_format_nvm:
+               return NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC |
+                                       NVME_CMD_EFFECTS_CSE_MASK;
+       case nvme_admin_sanitize_nvm:
+               return NVME_CMD_EFFECTS_CSE_MASK;
+       default:
+               break;
+       }
+       return 0;
+ }
+ 
+ static u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
+                                                               u8 opcode)
+ {
+       u32 effects = 0;
+ 
+       if (ns) {
+               if (ctrl->effects)
+                       effects = le32_to_cpu(ctrl->effects->iocs[opcode]);
+               if (effects & ~NVME_CMD_EFFECTS_CSUPP)
+                       dev_warn(ctrl->device,
+                                "IO command:%02x has unhandled effects:%08x\n",
+                                opcode, effects);
+               return 0;
+       }
+ 
+       if (ctrl->effects)
+               effects = le32_to_cpu(ctrl->effects->iocs[opcode]);
+       else
+               effects = nvme_known_admin_effects(opcode);
+ 
+       /*
+        * For simplicity, IO to all namespaces is quiesced even if the command
+        * effects say only one namespace is affected.
+        */
+       if (effects & (NVME_CMD_EFFECTS_LBCC | NVME_CMD_EFFECTS_CSE_MASK)) {
+               nvme_start_freeze(ctrl);
+               nvme_wait_freeze(ctrl);
+       }
+       return effects;
+ }
+ 
+ static void nvme_update_formats(struct nvme_ctrl *ctrl)
+ {
+       struct nvme_ns *ns;
+ 
+       mutex_lock(&ctrl->namespaces_mutex);
+       list_for_each_entry(ns, &ctrl->namespaces, list) {
+               if (ns->disk && nvme_revalidate_disk(ns->disk))
+                       nvme_ns_remove(ns);
+       }
+       mutex_unlock(&ctrl->namespaces_mutex);
+ }
+ 
+ static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects)
+ {
+       /*
+        * Revalidate LBA changes prior to unfreezing. This is necessary to
+        * prevent memory corruption if a logical block size was changed by
+        * this command.
+        */
+       if (effects & NVME_CMD_EFFECTS_LBCC)
+               nvme_update_formats(ctrl);
+       if (effects & (NVME_CMD_EFFECTS_LBCC | NVME_CMD_EFFECTS_CSE_MASK))
+               nvme_unfreeze(ctrl);
+       if (effects & NVME_CMD_EFFECTS_CCC)
+               nvme_init_identify(ctrl);
+       if (effects & (NVME_CMD_EFFECTS_NIC | NVME_CMD_EFFECTS_NCC))
+               nvme_queue_scan(ctrl);
+ }
+ 
   static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
                         struct nvme_passthru_cmd __user *ucmd)
   {
         struct nvme_passthru_cmd cmd;
         struct nvme_command c;
         unsigned timeout = 0;
+       u32 effects;
         int status;
   
         if (!capable(CAP_SYS_ADMIN))
@@@ -1013,10 -1130,13 +1130,13 @@@
         if (cmd.timeout_ms)
                 timeout = msecs_to_jiffies(cmd.timeout_ms);
   
+       effects = nvme_passthru_start(ctrl, ns, cmd.opcode);
         status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c,
                         (void __user *)(uintptr_t)cmd.addr, cmd.data_len,
                         (void __user *)(uintptr_t)cmd.metadata, cmd.metadata,
                         0, &cmd.result, timeout);
+       nvme_passthru_end(ctrl, effects);
+ 
         if (status >= 0) {
                 if (put_user(cmd.result, &ucmd->result))
                         return -EFAULT;
@@@ -1025,15 -1145,37 +1145,37 @@@
         return status;
   }
   
- static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
-               unsigned int cmd, unsigned long arg)
+ /*
+  * Issue ioctl requests on the first available path.  Note that unlike normal
+  * block layer requests we will not retry failed request on another controller.
+  */
+ static struct nvme_ns *nvme_get_ns_from_disk(struct gendisk *disk,
+               struct nvme_ns_head **head, int *srcu_idx)
   {
-       struct nvme_ns *ns = bdev->bd_disk->private_data;
+ #ifdef CONFIG_NVME_MULTIPATH
+       if (disk->fops == &nvme_ns_head_ops) {
+               *head = disk->private_data;
+               *srcu_idx = srcu_read_lock(&(*head)->srcu);
+               return nvme_find_path(*head);
+       }
+ #endif
+       *head = NULL;
+       *srcu_idx = -1;
+       return disk->private_data;
+ }
   
+ static void nvme_put_ns_from_disk(struct nvme_ns_head *head, int idx)
+ {
+       if (head)
+               srcu_read_unlock(&head->srcu, idx);
+ }
+ 
+ static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned cmd, unsigned long arg)
+ {
         switch (cmd) {
         case NVME_IOCTL_ID:
                 force_successful_syscall_return();
-               return ns->ns_id;
+               return ns->head->ns_id;
         case NVME_IOCTL_ADMIN_CMD:
                 return nvme_user_cmd(ns->ctrl, NULL, (void __user *)arg);
         case NVME_IOCTL_IO_CMD:
@@@ -1052,27 -1194,39 +1194,39 @@@
         }
   }
   
- #ifdef CONFIG_COMPAT
- static int nvme_compat_ioctl(struct block_device *bdev, fmode_t mode,
-                       unsigned int cmd, unsigned long arg)
+ static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
+               unsigned int cmd, unsigned long arg)
   {
-       return nvme_ioctl(bdev, mode, cmd, arg);
+       struct nvme_ns_head *head = NULL;
+       struct nvme_ns *ns;
+       int srcu_idx, ret;
+ 
+       ns = nvme_get_ns_from_disk(bdev->bd_disk, &head, &srcu_idx);
+       if (unlikely(!ns))
+               ret = -EWOULDBLOCK;
+       else
+               ret = nvme_ns_ioctl(ns, cmd, arg);
+       nvme_put_ns_from_disk(head, srcu_idx);
+       return ret;
   }
- #else
- #define nvme_compat_ioctl     NULL
- #endif
   
   static int nvme_open(struct block_device *bdev, fmode_t mode)
   {
-       return nvme_get_ns_from_disk(bdev->bd_disk) ? 0 : -ENXIO;
+       struct nvme_ns *ns = bdev->bd_disk->private_data;
+ 
+ #ifdef CONFIG_NVME_MULTIPATH
+       /* should never be called due to GENHD_FL_HIDDEN */
+       if (WARN_ON_ONCE(ns->head->disk))
+               return -ENXIO;
+ #endif
+       if (!kref_get_unless_zero(&ns->kref))
+               return -ENXIO;
+       return 0;
   }
   
   static void nvme_release(struct gendisk *disk, fmode_t mode)
   {
-       struct nvme_ns *ns = disk->private_data;
- 
-       module_put(ns->ctrl->ops->module);
-       nvme_put_ns(ns);
+       nvme_put_ns(disk->private_data);
   }
   
   static int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo)
@@@ -1085,35 -1239,12 +1239,12 @@@
   }
   
   #ifdef CONFIG_BLK_DEV_INTEGRITY
- static void nvme_prep_integrity(struct gendisk *disk, struct nvme_id_ns *id,
-               u16 bs)
- {
-       struct nvme_ns *ns = disk->private_data;
-       u16 old_ms = ns->ms;
-       u8 pi_type = 0;
- 
-       ns->ms = le16_to_cpu(id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ms);
-       ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT);
- 
-       /* PI implementation requires metadata equal t10 pi tuple size */
-       if (ns->ms == sizeof(struct t10_pi_tuple))
-               pi_type = id->dps & NVME_NS_DPS_PI_MASK;
- 
-       if (blk_get_integrity(disk) &&
-           (ns->pi_type != pi_type || ns->ms != old_ms ||
-            bs != queue_logical_block_size(disk->queue) ||
-            (ns->ms && ns->ext)))
-               blk_integrity_unregister(disk);
- 
-       ns->pi_type = pi_type;
- }
- 
- static void nvme_init_integrity(struct nvme_ns *ns)
+ static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type)
   {
         struct blk_integrity integrity;
   
         memset(&integrity, 0, sizeof(integrity));
-       switch (ns->pi_type) {
+       switch (pi_type) {
         case NVME_NS_DPS_PI_TYPE3:
                 integrity.profile = &t10_pi_type3_crc;
                 integrity.tag_size = sizeof(u16) + sizeof(u32);
@@@ -1129,16 -1260,12 +1260,12 @@@
                 integrity.profile = NULL;
                 break;
         }
-       integrity.tuple_size = ns->ms;
-       blk_integrity_register(ns->disk, &integrity);
-       blk_queue_max_integrity_segments(ns->queue, 1);
+       integrity.tuple_size = ms;
+       blk_integrity_register(disk, &integrity);
+       blk_queue_max_integrity_segments(disk->queue, 1);
   }
   #else
- static void nvme_prep_integrity(struct gendisk *disk, struct nvme_id_ns *id,
-               u16 bs)
- {
- }
- static void nvme_init_integrity(struct nvme_ns *ns)
+ static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type)
   {
   }
   #endif /* CONFIG_BLK_DEV_INTEGRITY */
@@@ -1149,53 -1276,89 +1276,89 @@@ static void nvme_set_chunk_size(struct 
         blk_queue_chunk_sectors(ns->queue, rounddown_pow_of_two(chunk_size));
   }
   
- static void nvme_config_discard(struct nvme_ns *ns)
+ static void nvme_config_discard(struct nvme_ctrl *ctrl,
+               unsigned stream_alignment, struct request_queue *queue)
   {
-       struct nvme_ctrl *ctrl = ns->ctrl;
-       u32 logical_block_size = queue_logical_block_size(ns->queue);
+       u32 size = queue_logical_block_size(queue);
+ 
+       if (stream_alignment)
+               size *= stream_alignment;
   
         BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) <
                         NVME_DSM_MAX_RANGES);
   
-       if (ctrl->nr_streams && ns->sws && ns->sgs) {
-               unsigned int sz = logical_block_size * ns->sws * ns->sgs;
+       queue->limits.discard_alignment = size;
+       queue->limits.discard_granularity = size;
   
-               ns->queue->limits.discard_alignment = sz;
-               ns->queue->limits.discard_granularity = sz;
-       } else {
-               ns->queue->limits.discard_alignment = logical_block_size;
-               ns->queue->limits.discard_granularity = logical_block_size;
-       }
-       blk_queue_max_discard_sectors(ns->queue, UINT_MAX);
-       blk_queue_max_discard_segments(ns->queue, NVME_DSM_MAX_RANGES);
-       queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue);
+       blk_queue_max_discard_sectors(queue, UINT_MAX);
+       blk_queue_max_discard_segments(queue, NVME_DSM_MAX_RANGES);
+       queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, queue);
   
         if (ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES)
-               blk_queue_max_write_zeroes_sectors(ns->queue, UINT_MAX);
+               blk_queue_max_write_zeroes_sectors(queue, UINT_MAX);
   }
   
   static void nvme_report_ns_ids(struct nvme_ctrl *ctrl, unsigned int nsid,
-               struct nvme_id_ns *id, u8 *eui64, u8 *nguid, uuid_t *uuid)
+               struct nvme_id_ns *id, struct nvme_ns_ids *ids)
   {
+       memset(ids, 0, sizeof(*ids));
+ 
         if (ctrl->vs >= NVME_VS(1, 1, 0))
-               memcpy(eui64, id->eui64, sizeof(id->eui64));
+               memcpy(ids->eui64, id->eui64, sizeof(id->eui64));
         if (ctrl->vs >= NVME_VS(1, 2, 0))
-               memcpy(nguid, id->nguid, sizeof(id->nguid));
+               memcpy(ids->nguid, id->nguid, sizeof(id->nguid));
         if (ctrl->vs >= NVME_VS(1, 3, 0)) {
                  /* Don't treat error as fatal we potentially
                   * already have a NGUID or EUI-64
                   */
-               if (nvme_identify_ns_descs(ctrl, nsid, eui64, nguid, uuid))
+               if (nvme_identify_ns_descs(ctrl, nsid, ids))
                         dev_warn(ctrl->device,
                                  "%s: Identify Descriptors failed\n", __func__);
         }
   }
   
+ static bool nvme_ns_ids_valid(struct nvme_ns_ids *ids)
+ {
+       return !uuid_is_null(&ids->uuid) ||
+               memchr_inv(ids->nguid, 0, sizeof(ids->nguid)) ||
+               memchr_inv(ids->eui64, 0, sizeof(ids->eui64));
+ }
+ 
+ static bool nvme_ns_ids_equal(struct nvme_ns_ids *a, struct nvme_ns_ids *b)
+ {
+       return uuid_equal(&a->uuid, &b->uuid) &&
+               memcmp(&a->nguid, &b->nguid, sizeof(a->nguid)) == 0 &&
+               memcmp(&a->eui64, &b->eui64, sizeof(a->eui64)) == 0;
+ }
+ 
+ static void nvme_update_disk_info(struct gendisk *disk,
+               struct nvme_ns *ns, struct nvme_id_ns *id)
+ {
+       sector_t capacity = le64_to_cpup(&id->nsze) << (ns->lba_shift - 9);
+       unsigned stream_alignment = 0;
+ 
+       if (ns->ctrl->nr_streams && ns->sws && ns->sgs)
+               stream_alignment = ns->sws * ns->sgs;
+ 
+       blk_mq_freeze_queue(disk->queue);
+       blk_integrity_unregister(disk);
+ 
+       blk_queue_logical_block_size(disk->queue, 1 << ns->lba_shift);
+       if (ns->ms && !ns->ext &&
+           (ns->ctrl->ops->flags & NVME_F_METADATA_SUPPORTED))
+               nvme_init_integrity(disk, ns->ms, ns->pi_type);
+       if (ns->ms && !nvme_ns_has_pi(ns) && !blk_get_integrity(disk))
+               capacity = 0;
+       set_capacity(disk, capacity);
+ 
+       if (ns->ctrl->oncs & NVME_CTRL_ONCS_DSM)
+               nvme_config_discard(ns->ctrl, stream_alignment, disk->queue);
+       blk_mq_unfreeze_queue(disk->queue);
+ }
+ 
   static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
   {
         struct nvme_ns *ns = disk->private_data;
-       struct nvme_ctrl *ctrl = ns->ctrl;
-       u16 bs;
   
         /*
          * If identify namespace failed, use default 512 byte block size so
@@@ -1204,26 -1367,22 +1367,22 @@@
         ns->lba_shift = id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ds;
         if (ns->lba_shift == 0)
                 ns->lba_shift = 9;
-       bs = 1 << ns->lba_shift;
         ns->noiob = le16_to_cpu(id->noiob);
+       ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT);
+       ns->ms = le16_to_cpu(id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ms);
+       /* the PI implementation requires metadata equal t10 pi tuple size */
+       if (ns->ms == sizeof(struct t10_pi_tuple))
+               ns->pi_type = id->dps & NVME_NS_DPS_PI_MASK;
+       else
+               ns->pi_type = 0;
   
-       blk_mq_freeze_queue(disk->queue);
- 
-       if (ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)
-               nvme_prep_integrity(disk, id, bs);
-       blk_queue_logical_block_size(ns->queue, bs);
         if (ns->noiob)
                 nvme_set_chunk_size(ns);
-       if (ns->ms && !blk_get_integrity(disk) && !ns->ext)
-               nvme_init_integrity(ns);
-       if (ns->ms && !(ns->ms == 8 && ns->pi_type) && !blk_get_integrity(disk))
-               set_capacity(disk, 0);
-       else
-               set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9));
- 
-       if (ctrl->oncs & NVME_CTRL_ONCS_DSM)
-               nvme_config_discard(ns);
-       blk_mq_unfreeze_queue(disk->queue);
+       nvme_update_disk_info(disk, ns, id);
+ #ifdef CONFIG_NVME_MULTIPATH
+       if (ns->head->disk)
+               nvme_update_disk_info(ns->head->disk, ns, id);
+ #endif
   }
   
   static int nvme_revalidate_disk(struct gendisk *disk)
@@@ -1231,8 -1390,7 +1390,7 @@@
         struct nvme_ns *ns = disk->private_data;
         struct nvme_ctrl *ctrl = ns->ctrl;
         struct nvme_id_ns *id;
-       u8 eui64[8] = { 0 }, nguid[16] = { 0 };
-       uuid_t uuid = uuid_null;
+       struct nvme_ns_ids ids;
         int ret = 0;
   
         if (test_bit(NVME_NS_DEAD, &ns->flags)) {
@@@ -1240,7 -1398,7 +1398,7 @@@
                 return -ENODEV;
         }
   
-       id = nvme_identify_ns(ctrl, ns->ns_id);
+       id = nvme_identify_ns(ctrl, ns->head->ns_id);
         if (!id)
                 return -ENODEV;
   
@@@ -1249,13 -1407,10 +1407,11 @@@
                 goto out;
         }
   
-       nvme_report_ns_ids(ctrl, ns->ns_id, id, eui64, nguid, &uuid);
-       if (!uuid_equal(&ns->uuid, &uuid) ||
-           memcmp(&ns->nguid, &nguid, sizeof(ns->nguid)) ||
-           memcmp(&ns->eui, &eui64, sizeof(ns->eui))) {
+ +      __nvme_revalidate_disk(disk, id);
+       nvme_report_ns_ids(ctrl, ns->head->ns_id, id, &ids);
+       if (!nvme_ns_ids_equal(&ns->head->ids, &ids)) {
                 dev_err(ctrl->device,
-                       "identifiers changed for nsid %d\n", ns->ns_id);
+                       "identifiers changed for nsid %d\n", ns->head->ns_id);
                 ret = -ENODEV;
         }
   
@@@ -1287,8 -1442,10 +1443,10 @@@ static char nvme_pr_type(enum pr_type t
   static int nvme_pr_command(struct block_device *bdev, u32 cdw10,
                                 u64 key, u64 sa_key, u8 op)
   {
-       struct nvme_ns *ns = bdev->bd_disk->private_data;
+       struct nvme_ns_head *head = NULL;
+       struct nvme_ns *ns;
         struct nvme_command c;
+       int srcu_idx, ret;
         u8 data[16] = { 0, };
   
         put_unaligned_le64(key, &data[0]);
@@@ -1296,10 -1453,16 +1454,16 @@@
   
         memset(&c, 0, sizeof(c));
         c.common.opcode = op;
-       c.common.nsid = cpu_to_le32(ns->ns_id);
+       c.common.nsid = cpu_to_le32(head->ns_id);
         c.common.cdw10[0] = cpu_to_le32(cdw10);
   
-       return nvme_submit_sync_cmd(ns->queue, &c, data, 16);
+       ns = nvme_get_ns_from_disk(bdev->bd_disk, &head, &srcu_idx);
+       if (unlikely(!ns))
+               ret = -EWOULDBLOCK;
+       else
+               ret = nvme_submit_sync_cmd(ns->queue, &c, data, 16);
+       nvme_put_ns_from_disk(head, srcu_idx);
+       return ret;
   }
   
   static int nvme_pr_register(struct block_device *bdev, u64 old,
@@@ -1381,7 -1544,7 +1545,7 @@@ EXPORT_SYMBOL_GPL(nvme_sec_submit)
   static const struct block_device_operations nvme_fops = {
         .owner          = THIS_MODULE,
         .ioctl          = nvme_ioctl,
-       .compat_ioctl   = nvme_compat_ioctl,
+       .compat_ioctl   = nvme_ioctl,
         .open           = nvme_open,
         .release        = nvme_release,
         .getgeo         = nvme_getgeo,
@@@ -1389,6 -1552,32 +1553,32 @@@
         .pr_ops         = &nvme_pr_ops,
   };
   
+ #ifdef CONFIG_NVME_MULTIPATH
+ static int nvme_ns_head_open(struct block_device *bdev, fmode_t mode)
+ {
+       struct nvme_ns_head *head = bdev->bd_disk->private_data;
+ 
+       if (!kref_get_unless_zero(&head->ref))
+               return -ENXIO;
+       return 0;
+ }
+ 
+ static void nvme_ns_head_release(struct gendisk *disk, fmode_t mode)
+ {
+       nvme_put_ns_head(disk->private_data);
+ }
+ 
+ const struct block_device_operations nvme_ns_head_ops = {
+       .owner          = THIS_MODULE,
+       .open           = nvme_ns_head_open,
+       .release        = nvme_ns_head_release,
+       .ioctl          = nvme_ioctl,
+       .compat_ioctl   = nvme_ioctl,
+       .getgeo         = nvme_getgeo,
+       .pr_ops         = &nvme_pr_ops,
+ };
+ #endif /* CONFIG_NVME_MULTIPATH */
+ 
   static int nvme_wait_ready(struct nvme_ctrl *ctrl, u64 cap, bool enabled)
   {
         unsigned long timeout =
@@@ -1737,14 -1926,15 +1927,15 @@@ static bool quirk_matches(const struct 
                 string_matches(id->fr, q->fr, sizeof(id->fr));
   }
   
- static void nvme_init_subnqn(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
+ static void nvme_init_subnqn(struct nvme_subsystem *subsys, struct nvme_ctrl *ctrl,
+               struct nvme_id_ctrl *id)
   {
         size_t nqnlen;
         int off;
   
         nqnlen = strnlen(id->subnqn, NVMF_NQN_SIZE);
         if (nqnlen > 0 && nqnlen < NVMF_NQN_SIZE) {
-               strcpy(ctrl->subnqn, id->subnqn);
+               strncpy(subsys->subnqn, id->subnqn, NVMF_NQN_SIZE);
                 return;
         }
   
@@@ -1752,14 -1942,222 +1943,222 @@@
                 dev_warn(ctrl->device, "missing or invalid SUBNQN field.\n");
   
         /* Generate a "fake" NQN per Figure 254 in NVMe 1.3 + ECN 001 */
-       off = snprintf(ctrl->subnqn, NVMF_NQN_SIZE,
+       off = snprintf(subsys->subnqn, NVMF_NQN_SIZE,
                         "nqn.2014.08.org.nvmexpress:%4x%4x",
                         le16_to_cpu(id->vid), le16_to_cpu(id->ssvid));
-       memcpy(ctrl->subnqn + off, id->sn, sizeof(id->sn));
+       memcpy(subsys->subnqn + off, id->sn, sizeof(id->sn));
         off += sizeof(id->sn);
-       memcpy(ctrl->subnqn + off, id->mn, sizeof(id->mn));
+       memcpy(subsys->subnqn + off, id->mn, sizeof(id->mn));
         off += sizeof(id->mn);
-       memset(ctrl->subnqn + off, 0, sizeof(ctrl->subnqn) - off);
+       memset(subsys->subnqn + off, 0, sizeof(subsys->subnqn) - off);
+ }
+ 
+ static void __nvme_release_subsystem(struct nvme_subsystem *subsys)
+ {
+       ida_simple_remove(&nvme_subsystems_ida, subsys->instance);
+       kfree(subsys);
+ }
+ 
+ static void nvme_release_subsystem(struct device *dev)
+ {
+       __nvme_release_subsystem(container_of(dev, struct nvme_subsystem, dev));
+ }
+ 
+ static void nvme_destroy_subsystem(struct kref *ref)
+ {
+       struct nvme_subsystem *subsys =
+                       container_of(ref, struct nvme_subsystem, ref);
+ 
+       mutex_lock(&nvme_subsystems_lock);
+       list_del(&subsys->entry);
+       mutex_unlock(&nvme_subsystems_lock);
+ 
+       ida_destroy(&subsys->ns_ida);
+       device_del(&subsys->dev);
+       put_device(&subsys->dev);
+ }
+ 
+ static void nvme_put_subsystem(struct nvme_subsystem *subsys)
+ {
+       kref_put(&subsys->ref, nvme_destroy_subsystem);
+ }
+ 
+ static struct nvme_subsystem *__nvme_find_get_subsystem(const char *subsysnqn)
+ {
+       struct nvme_subsystem *subsys;
+ 
+       lockdep_assert_held(&nvme_subsystems_lock);
+ 
+       list_for_each_entry(subsys, &nvme_subsystems, entry) {
+               if (strcmp(subsys->subnqn, subsysnqn))
+                       continue;
+               if (!kref_get_unless_zero(&subsys->ref))
+                       continue;
+               return subsys;
+       }
+ 
+       return NULL;
+ }
+ 
+ #define SUBSYS_ATTR_RO(_name, _mode, _show)                   \
+       struct device_attribute subsys_attr_##_name = \
+               __ATTR(_name, _mode, _show, NULL)
+ 
+ static ssize_t nvme_subsys_show_nqn(struct device *dev,
+                                   struct device_attribute *attr,
+                                   char *buf)
+ {
+       struct nvme_subsystem *subsys =
+               container_of(dev, struct nvme_subsystem, dev);
+ 
+       return snprintf(buf, PAGE_SIZE, "%s\n", subsys->subnqn);
+ }
+ static SUBSYS_ATTR_RO(subsysnqn, S_IRUGO, nvme_subsys_show_nqn);
+ 
+ #define nvme_subsys_show_str_function(field)                          \
+ static ssize_t subsys_##field##_show(struct device *dev,              \
+                           struct device_attribute *attr, char *buf)   \
+ {                                                                     \
+       struct nvme_subsystem *subsys =                                 \
+               container_of(dev, struct nvme_subsystem, dev);          \
+       return sprintf(buf, "%.*s\n",                                   \
+                      (int)sizeof(subsys->field), subsys->field);      \
+ }                                                                     \
+ static SUBSYS_ATTR_RO(field, S_IRUGO, subsys_##field##_show);
+ 
+ nvme_subsys_show_str_function(model);
+ nvme_subsys_show_str_function(serial);
+ nvme_subsys_show_str_function(firmware_rev);
+ 
+ static struct attribute *nvme_subsys_attrs[] = {
+       &subsys_attr_model.attr,
+       &subsys_attr_serial.attr,
+       &subsys_attr_firmware_rev.attr,
+       &subsys_attr_subsysnqn.attr,
+       NULL,
+ };
+ 
+ static struct attribute_group nvme_subsys_attrs_group = {
+       .attrs = nvme_subsys_attrs,
+ };
+ 
+ static const struct attribute_group *nvme_subsys_attrs_groups[] = {
+       &nvme_subsys_attrs_group,
+       NULL,
+ };
+ 
+ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
+ {
+       struct nvme_subsystem *subsys, *found;
+       int ret;
+ 
+       subsys = kzalloc(sizeof(*subsys), GFP_KERNEL);
+       if (!subsys)
+               return -ENOMEM;
+       ret = ida_simple_get(&nvme_subsystems_ida, 0, 0, GFP_KERNEL);
+       if (ret < 0) {
+               kfree(subsys);
+               return ret;
+       }
+       subsys->instance = ret;
+       mutex_init(&subsys->lock);
+       kref_init(&subsys->ref);
+       INIT_LIST_HEAD(&subsys->ctrls);
+       INIT_LIST_HEAD(&subsys->nsheads);
+       nvme_init_subnqn(subsys, ctrl, id);
+       memcpy(subsys->serial, id->sn, sizeof(subsys->serial));
+       memcpy(subsys->model, id->mn, sizeof(subsys->model));
+       memcpy(subsys->firmware_rev, id->fr, sizeof(subsys->firmware_rev));
+       subsys->vendor_id = le16_to_cpu(id->vid);
+       subsys->cmic = id->cmic;
+ 
+       subsys->dev.class = nvme_subsys_class;
+       subsys->dev.release = nvme_release_subsystem;
+       subsys->dev.groups = nvme_subsys_attrs_groups;
+       dev_set_name(&subsys->dev, "nvme-subsys%d", subsys->instance);
+       device_initialize(&subsys->dev);
+ 
+       mutex_lock(&nvme_subsystems_lock);
+       found = __nvme_find_get_subsystem(subsys->subnqn);
+       if (found) {
+               /*
+                * Verify that the subsystem actually supports multiple
+                * controllers, else bail out.
+                */
+               if (!(id->cmic & (1 << 1))) {
+                       dev_err(ctrl->device,
+                               "ignoring ctrl due to duplicate subnqn (%s).\n",
+                               found->subnqn);
+                       nvme_put_subsystem(found);
+                       ret = -EINVAL;
+                       goto out_unlock;
+               }
+ 
+               __nvme_release_subsystem(subsys);
+               subsys = found;
+       } else {
+               ret = device_add(&subsys->dev);
+               if (ret) {
+                       dev_err(ctrl->device,
+                               "failed to register subsystem device.\n");
+                       goto out_unlock;
+               }
+               ida_init(&subsys->ns_ida);
+               list_add_tail(&subsys->entry, &nvme_subsystems);
+       }
+ 
+       ctrl->subsys = subsys;
+       mutex_unlock(&nvme_subsystems_lock);
+ 
+       if (sysfs_create_link(&subsys->dev.kobj, &ctrl->device->kobj,
+                       dev_name(ctrl->device))) {
+               dev_err(ctrl->device,
+                       "failed to create sysfs link from subsystem.\n");
+               /* the transport driver will eventually put the subsystem */
+               return -EINVAL;
+       }
+ 
+       mutex_lock(&subsys->lock);
+       list_add_tail(&ctrl->subsys_entry, &subsys->ctrls);
+       mutex_unlock(&subsys->lock);
+ 
+       return 0;
+ 
+ out_unlock:
+       mutex_unlock(&nvme_subsystems_lock);
+       put_device(&subsys->dev);
+       return ret;
+ }
+ 
+ static int nvme_get_log(struct nvme_ctrl *ctrl, u8 log_page, void *log,
+                       size_t size)
+ {
+       struct nvme_command c = { };
+ 
+       c.common.opcode = nvme_admin_get_log_page;
+       c.common.nsid = cpu_to_le32(NVME_NSID_ALL);
+       c.common.cdw10[0] = nvme_get_log_dw10(log_page, size);
+ 
+       return nvme_submit_sync_cmd(ctrl->admin_q, &c, log, size);
+ }
+ 
+ static int nvme_get_effects_log(struct nvme_ctrl *ctrl)
+ {
+       int ret;
+ 
+       if (!ctrl->effects)
+               ctrl->effects = kzalloc(sizeof(*ctrl->effects), GFP_KERNEL);
+ 
+       if (!ctrl->effects)
+               return 0;
+ 
+       ret = nvme_get_log(ctrl, NVME_LOG_CMD_EFFECTS, ctrl->effects,
+                                       sizeof(*ctrl->effects));
+       if (ret) {
+               kfree(ctrl->effects);
+               ctrl->effects = NULL;
+       }
+       return ret;
   }
   
   /*
@@@ -1797,9 -2195,19 +2196,19 @@@ int nvme_init_identify(struct nvme_ctr
                 return -EIO;
         }
   
-       nvme_init_subnqn(ctrl, id);
+       if (id->lpa & NVME_CTRL_LPA_CMD_EFFECTS_LOG) {
+               ret = nvme_get_effects_log(ctrl);
+               if (ret < 0)
+                       return ret;
+       }
   
         if (!ctrl->identified) {
+               int i;
+ 
+               ret = nvme_init_subsystem(ctrl, id);
+               if (ret)
+                       goto out_free;
+ 
                 /*
                  * Check for quirks.  Quirk can depend on firmware version,
                  * so, in principle, the set of quirks present can change
@@@ -1808,9 -2216,6 +2217,6 @@@
                  * the device, but we'd have to make sure that the driver
                  * behaves intelligently if the quirks change.
                  */
- 
-               int i;
- 
                 for (i = 0; i < ARRAY_SIZE(core_quirks); i++) {
                         if (quirk_matches(id, &core_quirks[i]))
                                 ctrl->quirks |= core_quirks[i].quirks;
@@@ -1823,14 -2228,10 +2229,10 @@@
         }
   
         ctrl->oacs = le16_to_cpu(id->oacs);
-       ctrl->vid = le16_to_cpu(id->vid);
         ctrl->oncs = le16_to_cpup(&id->oncs);
         atomic_set(&ctrl->abort_limit, id->acl + 1);
         ctrl->vwc = id->vwc;
         ctrl->cntlid = le16_to_cpup(&id->cntlid);
-       memcpy(ctrl->serial, id->sn, sizeof(id->sn));
-       memcpy(ctrl->model, id->mn, sizeof(id->mn));
-       memcpy(ctrl->firmware_rev, id->fr, sizeof(id->fr));
         if (id->mdts)
                 max_hw_sectors = 1 << (id->mdts + page_shift - 9);
         else
@@@ -1931,33 -2332,12 +2333,12 @@@ EXPORT_SYMBOL_GPL(nvme_init_identify)
   
   static int nvme_dev_open(struct inode *inode, struct file *file)
   {
-       struct nvme_ctrl *ctrl;
-       int instance = iminor(inode);
-       int ret = -ENODEV;
- 
-       spin_lock(&dev_list_lock);
-       list_for_each_entry(ctrl, &nvme_ctrl_list, node) {
-               if (ctrl->instance != instance)
-                       continue;
- 
-               if (!ctrl->admin_q) {
-                       ret = -EWOULDBLOCK;
-                       break;
-               }
-               if (!kref_get_unless_zero(&ctrl->kref))
-                       break;
-               file->private_data = ctrl;
-               ret = 0;
-               break;
-       }
-       spin_unlock(&dev_list_lock);
- 
-       return ret;
- }
+       struct nvme_ctrl *ctrl =
+               container_of(inode->i_cdev, struct nvme_ctrl, cdev);
   
- static int nvme_dev_release(struct inode *inode, struct file *file)
- {
-       nvme_put_ctrl(file->private_data);
+       if (ctrl->state != NVME_CTRL_LIVE)
+               return -EWOULDBLOCK;
+       file->private_data = ctrl;
         return 0;
   }
   
@@@ -2021,7 -2401,6 +2402,6 @@@ static long nvme_dev_ioctl(struct file 
   static const struct file_operations nvme_dev_fops = {
         .owner          = THIS_MODULE,
         .open           = nvme_dev_open,
-       .release        = nvme_dev_release,
         .unlocked_ioctl = nvme_dev_ioctl,
         .compat_ioctl   = nvme_dev_ioctl,
   };
@@@ -2051,77 -2430,86 +2431,86 @@@ static ssize_t nvme_sysfs_rescan(struc
   }
   static DEVICE_ATTR(rescan_controller, S_IWUSR, NULL, nvme_sysfs_rescan);
   
+ static inline struct nvme_ns_head *dev_to_ns_head(struct device *dev)
+ {
+       struct gendisk *disk = dev_to_disk(dev);
+ 
+       if (disk->fops == &nvme_fops)
+               return nvme_get_ns_from_dev(dev)->head;
+       else
+               return disk->private_data;
+ }
+ 
   static ssize_t wwid_show(struct device *dev, struct device_attribute *attr,
-                                                               char *buf)
+               char *buf)
   {
-       struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
-       struct nvme_ctrl *ctrl = ns->ctrl;
-       int serial_len = sizeof(ctrl->serial);
-       int model_len = sizeof(ctrl->model);
+       struct nvme_ns_head *head = dev_to_ns_head(dev);
+       struct nvme_ns_ids *ids = &head->ids;
+       struct nvme_subsystem *subsys = head->subsys;
+       int serial_len = sizeof(subsys->serial);
+       int model_len = sizeof(subsys->model);
   
-       if (!uuid_is_null(&ns->uuid))
-               return sprintf(buf, "uuid.%pU\n", &ns->uuid);
+       if (!uuid_is_null(&ids->uuid))
+               return sprintf(buf, "uuid.%pU\n", &ids->uuid);
   
-       if (memchr_inv(ns->nguid, 0, sizeof(ns->nguid)))
-               return sprintf(buf, "eui.%16phN\n", ns->nguid);
+       if (memchr_inv(ids->nguid, 0, sizeof(ids->nguid)))
+               return sprintf(buf, "eui.%16phN\n", ids->nguid);
   
-       if (memchr_inv(ns->eui, 0, sizeof(ns->eui)))
-               return sprintf(buf, "eui.%8phN\n", ns->eui);
+       if (memchr_inv(ids->eui64, 0, sizeof(ids->eui64)))
+               return sprintf(buf, "eui.%8phN\n", ids->eui64);
   
-       while (serial_len > 0 && (ctrl->serial[serial_len - 1] == ' ' ||
-                                 ctrl->serial[serial_len - 1] == '\0'))
+       while (serial_len > 0 && (subsys->serial[serial_len - 1] == ' ' ||
+                                 subsys->serial[serial_len - 1] == '\0'))
                 serial_len--;
-       while (model_len > 0 && (ctrl->model[model_len - 1] == ' ' ||
-                                ctrl->model[model_len - 1] == '\0'))
+       while (model_len > 0 && (subsys->model[model_len - 1] == ' ' ||
+                                subsys->model[model_len - 1] == '\0'))
                 model_len--;
   
-       return sprintf(buf, "nvme.%04x-%*phN-%*phN-%08x\n", ctrl->vid,
-               serial_len, ctrl->serial, model_len, ctrl->model, ns->ns_id);
+       return sprintf(buf, "nvme.%04x-%*phN-%*phN-%08x\n", subsys->vendor_id,
+               serial_len, subsys->serial, model_len, subsys->model,
+               head->ns_id);
   }
   static DEVICE_ATTR(wwid, S_IRUGO, wwid_show, NULL);
   
   static ssize_t nguid_show(struct device *dev, struct device_attribute *attr,
-                         char *buf)
+               char *buf)
   {
-       struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
-       return sprintf(buf, "%pU\n", ns->nguid);
+       return sprintf(buf, "%pU\n", dev_to_ns_head(dev)->ids.nguid);
   }
   static DEVICE_ATTR(nguid, S_IRUGO, nguid_show, NULL);
   
   static ssize_t uuid_show(struct device *dev, struct device_attribute *attr,
-                                                               char *buf)
+               char *buf)
   {
-       struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
+       struct nvme_ns_ids *ids = &dev_to_ns_head(dev)->ids;
   
         /* For backward compatibility expose the NGUID to userspace if
          * we have no UUID set
          */
-       if (uuid_is_null(&ns->uuid)) {
+       if (uuid_is_null(&ids->uuid)) {
                 printk_ratelimited(KERN_WARNING
                                    "No UUID available providing old NGUID\n");
-               return sprintf(buf, "%pU\n", ns->nguid);
+               return sprintf(buf, "%pU\n", ids->nguid);
         }
-       return sprintf(buf, "%pU\n", &ns->uuid);
+       return sprintf(buf, "%pU\n", &ids->uuid);
   }
   static DEVICE_ATTR(uuid, S_IRUGO, uuid_show, NULL);
   
   static ssize_t eui_show(struct device *dev, struct device_attribute *attr,
-                                                               char *buf)
+               char *buf)
   {
-       struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
-       return sprintf(buf, "%8phd\n", ns->eui);
+       return sprintf(buf, "%8ph\n", dev_to_ns_head(dev)->ids.eui64);
   }
   static DEVICE_ATTR(eui, S_IRUGO, eui_show, NULL);
   
   static ssize_t nsid_show(struct device *dev, struct device_attribute *attr,
-                                                               char *buf)
+               char *buf)
   {
-       struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
-       return sprintf(buf, "%d\n", ns->ns_id);
+       return sprintf(buf, "%d\n", dev_to_ns_head(dev)->ns_id);
   }
   static DEVICE_ATTR(nsid, S_IRUGO, nsid_show, NULL);
   
- static struct attribute *nvme_ns_attrs[] = {
+ static struct attribute *nvme_ns_id_attrs[] = {
         &dev_attr_wwid.attr,
         &dev_attr_uuid.attr,
         &dev_attr_nguid.attr,
@@@ -2130,31 -2518,31 +2519,31 @@@
         NULL,
   };
   
- static umode_t nvme_ns_attrs_are_visible(struct kobject *kobj,
+ static umode_t nvme_ns_id_attrs_are_visible(struct kobject *kobj,
                 struct attribute *a, int n)
   {
         struct device *dev = container_of(kobj, struct device, kobj);
-       struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
+       struct nvme_ns_ids *ids = &dev_to_ns_head(dev)->ids;
   
         if (a == &dev_attr_uuid.attr) {
-               if (uuid_is_null(&ns->uuid) &&
-                   !memchr_inv(ns->nguid, 0, sizeof(ns->nguid)))
+               if (uuid_is_null(&ids->uuid) &&
+                   !memchr_inv(ids->nguid, 0, sizeof(ids->nguid)))
                         return 0;
         }
         if (a == &dev_attr_nguid.attr) {
-               if (!memchr_inv(ns->nguid, 0, sizeof(ns->nguid)))
+               if (!memchr_inv(ids->nguid, 0, sizeof(ids->nguid)))
                         return 0;
         }
         if (a == &dev_attr_eui.attr) {
-               if (!memchr_inv(ns->eui, 0, sizeof(ns->eui)))
+               if (!memchr_inv(ids->eui64, 0, sizeof(ids->eui64)))
                         return 0;
         }
         return a->mode;
   }
   
- static const struct attribute_group nvme_ns_attr_group = {
-       .attrs          = nvme_ns_attrs,
-       .is_visible     = nvme_ns_attrs_are_visible,
+ const struct attribute_group nvme_ns_id_attr_group = {
+       .attrs          = nvme_ns_id_attrs,
+       .is_visible     = nvme_ns_id_attrs_are_visible,
   };
   
   #define nvme_show_str_function(field)                                         \
@@@ -2162,10 -2550,15 +2551,15 @@@ static ssize_t  field##_show(struct dev
                             struct device_attribute *attr, char *buf)           \
   {                                                                             \
           struct nvme_ctrl *ctrl = dev_get_drvdata(dev);                                \
-         return sprintf(buf, "%.*s\n", (int)sizeof(ctrl->field), ctrl->field); \
+         return sprintf(buf, "%.*s\n",                                         \
+               (int)sizeof(ctrl->subsys->field), ctrl->subsys->field);         \
   }                                                                             \
   static DEVICE_ATTR(field, S_IRUGO, field##_show, NULL);
   
+ nvme_show_str_function(model);
+ nvme_show_str_function(serial);
+ nvme_show_str_function(firmware_rev);
+ 
   #define nvme_show_int_function(field)                                         \
   static ssize_t  field##_show(struct device *dev,                              \
                             struct device_attribute *attr, char *buf)           \
@@@ -2175,9 -2568,6 +2569,6 @@@
   }                                                                             \
   static DEVICE_ATTR(field, S_IRUGO, field##_show, NULL);
   
- nvme_show_str_function(model);
- nvme_show_str_function(serial);
- nvme_show_str_function(firmware_rev);
   nvme_show_int_function(cntlid);
   
   static ssize_t nvme_sysfs_delete(struct device *dev,
@@@ -2187,7 -2577,7 +2578,7 @@@
         struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
   
         if (device_remove_file_self(dev, attr))
-               ctrl->ops->delete_ctrl(ctrl);
+               nvme_delete_ctrl_sync(ctrl);
         return count;
   }
   static DEVICE_ATTR(delete_controller, S_IWUSR, NULL, nvme_sysfs_delete);
@@@ -2231,7 -2621,7 +2622,7 @@@ static ssize_t nvme_sysfs_show_subsysnq
   {
         struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
   
-       return snprintf(buf, PAGE_SIZE, "%s\n", ctrl->subnqn);
+       return snprintf(buf, PAGE_SIZE, "%s\n", ctrl->subsys->subnqn);
   }
   static DEVICE_ATTR(subsysnqn, S_IRUGO, nvme_sysfs_show_subsysnqn, NULL);
   
@@@ -2284,12 -2674,128 +2675,128 @@@ static const struct attribute_group *nv
         NULL,
   };
   
+ static struct nvme_ns_head *__nvme_find_ns_head(struct nvme_subsystem *subsys,
+               unsigned nsid)
+ {
+       struct nvme_ns_head *h;
+ 
+       lockdep_assert_held(&subsys->lock);
+ 
+       list_for_each_entry(h, &subsys->nsheads, entry) {
+               if (h->ns_id == nsid && kref_get_unless_zero(&h->ref))
+                       return h;
+       }
+ 
+       return NULL;
+ }
+ 
+ static int __nvme_check_ids(struct nvme_subsystem *subsys,
+               struct nvme_ns_head *new)
+ {
+       struct nvme_ns_head *h;
+ 
+       lockdep_assert_held(&subsys->lock);
+ 
+       list_for_each_entry(h, &subsys->nsheads, entry) {
+               if (nvme_ns_ids_valid(&new->ids) &&
+                   nvme_ns_ids_equal(&new->ids, &h->ids))
+                       return -EINVAL;
+       }
+ 
+       return 0;
+ }
+ 
+ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl,
+               unsigned nsid, struct nvme_id_ns *id)
+ {
+       struct nvme_ns_head *head;
+       int ret = -ENOMEM;
+ 
+       head = kzalloc(sizeof(*head), GFP_KERNEL);
+       if (!head)
+               goto out;
+       ret = ida_simple_get(&ctrl->subsys->ns_ida, 1, 0, GFP_KERNEL);
+       if (ret < 0)
+               goto out_free_head;
+       head->instance = ret;
+       INIT_LIST_HEAD(&head->list);
+       init_srcu_struct(&head->srcu);
+       head->subsys = ctrl->subsys;
+       head->ns_id = nsid;
+       kref_init(&head->ref);
+ 
+       nvme_report_ns_ids(ctrl, nsid, id, &head->ids);
+ 
+       ret = __nvme_check_ids(ctrl->subsys, head);
+       if (ret) {
+               dev_err(ctrl->device,
+                       "duplicate IDs for nsid %d\n", nsid);
+               goto out_cleanup_srcu;
+       }
+ 
+       ret = nvme_mpath_alloc_disk(ctrl, head);
+       if (ret)
+               goto out_cleanup_srcu;
+ 
+       list_add_tail(&head->entry, &ctrl->subsys->nsheads);
+       return head;
+ out_cleanup_srcu:
+       cleanup_srcu_struct(&head->srcu);
+       ida_simple_remove(&ctrl->subsys->ns_ida, head->instance);
+ out_free_head:
+       kfree(head);
+ out:
+       return ERR_PTR(ret);
+ }
+ 
+ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid,
+               struct nvme_id_ns *id, bool *new)
+ {
+       struct nvme_ctrl *ctrl = ns->ctrl;
+       bool is_shared = id->nmic & (1 << 0);
+       struct nvme_ns_head *head = NULL;
+       int ret = 0;
+ 
+       mutex_lock(&ctrl->subsys->lock);
+       if (is_shared)
+               head = __nvme_find_ns_head(ctrl->subsys, nsid);
+       if (!head) {
+               head = nvme_alloc_ns_head(ctrl, nsid, id);
+               if (IS_ERR(head)) {
+                       ret = PTR_ERR(head);
+                       goto out_unlock;
+               }
+ 
+               *new = true;
+       } else {
+               struct nvme_ns_ids ids;
+ 
+               nvme_report_ns_ids(ctrl, nsid, id, &ids);
+               if (!nvme_ns_ids_equal(&head->ids, &ids)) {
+                       dev_err(ctrl->device,
+                               "IDs don't match for shared namespace %d\n",
+                                       nsid);
+                       ret = -EINVAL;
+                       goto out_unlock;
+               }
+ 
+               *new = false;
+       }
+ 
+       list_add_tail(&ns->siblings, &head->list);
+       ns->head = head;
+ 
+ out_unlock:
+       mutex_unlock(&ctrl->subsys->lock);
+       return ret;
+ }
+ 
   static int ns_cmp(void *priv, struct list_head *a, struct list_head *b)
   {
         struct nvme_ns *nsa = container_of(a, struct nvme_ns, list);
         struct nvme_ns *nsb = container_of(b, struct nvme_ns, list);
   
-       return nsa->ns_id - nsb->ns_id;
+       return nsa->head->ns_id - nsb->head->ns_id;
   }
   
   static struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid)
@@@ -2298,12 -2804,13 +2805,13 @@@
   
         mutex_lock(&ctrl->namespaces_mutex);
         list_for_each_entry(ns, &ctrl->namespaces, list) {
-               if (ns->ns_id == nsid) {
-                       kref_get(&ns->kref);
+               if (ns->head->ns_id == nsid) {
+                       if (!kref_get_unless_zero(&ns->kref))
+                               continue;
                         ret = ns;
                         break;
                 }
-               if (ns->ns_id > nsid)
+               if (ns->head->ns_id > nsid)
                         break;
         }
         mutex_unlock(&ctrl->namespaces_mutex);
@@@ -2318,7 -2825,7 +2826,7 @@@ static int nvme_setup_streams_ns(struc
         if (!ctrl->nr_streams)
                 return 0;
   
-       ret = nvme_get_stream_params(ctrl, &s, ns->ns_id);
+       ret = nvme_get_stream_params(ctrl, &s, ns->head->ns_id);
         if (ret)
                 return ret;
   
@@@ -2342,33 -2849,27 +2850,27 @@@ static void nvme_alloc_ns(struct nvme_c
         struct gendisk *disk;
         struct nvme_id_ns *id;
         char disk_name[DISK_NAME_LEN];
-       int node = dev_to_node(ctrl->dev);
+       int node = dev_to_node(ctrl->dev), flags = GENHD_FL_EXT_DEVT;
+       bool new = true;
   
         ns = kzalloc_node(sizeof(*ns), GFP_KERNEL, node);
         if (!ns)
                 return;
   
-       ns->instance = ida_simple_get(&ctrl->ns_ida, 1, 0, GFP_KERNEL);
-       if (ns->instance < 0)
-               goto out_free_ns;
- 
         ns->queue = blk_mq_init_queue(ctrl->tagset);
         if (IS_ERR(ns->queue))
-               goto out_release_instance;
+               goto out_free_ns;
         queue_flag_set_unlocked(QUEUE_FLAG_NONROT, ns->queue);
         ns->queue->queuedata = ns;
         ns->ctrl = ctrl;
   
         kref_init(&ns->kref);
-       ns->ns_id = nsid;
         ns->lba_shift = 9; /* set to a default value for 512 until disk is validated */
   
         blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift);
         nvme_set_queue_limits(ctrl, ns->queue);
         nvme_setup_streams_ns(ctrl, ns);
   
-       sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->instance);
- 
         id = nvme_identify_ns(ctrl, nsid);
         if (!id)
                 goto out_free_queue;
@@@ -2376,23 -2877,49 +2878,49 @@@
         if (id->ncap == 0)
                 goto out_free_id;
   
-       nvme_report_ns_ids(ctrl, ns->ns_id, id, ns->eui, ns->nguid, &ns->uuid);
+       if (nvme_init_ns_head(ns, nsid, id, &new))
+               goto out_free_id;
+       
+ #ifdef CONFIG_NVME_MULTIPATH
+       /*
+        * If multipathing is enabled we need to always use the subsystem
+        * instance number for numbering our devices to avoid conflicts
+        * between subsystems that have multiple controllers and thus use
+        * the multipath-aware subsystem node and those that have a single
+        * controller and use the controller node directly.
+        */
+       if (ns->head->disk) {
+               sprintf(disk_name, "nvme%dc%dn%d", ctrl->subsys->instance,
+                               ctrl->cntlid, ns->head->instance);
+               flags = GENHD_FL_HIDDEN;
+       } else {
+               sprintf(disk_name, "nvme%dn%d", ctrl->subsys->instance,
+                               ns->head->instance);
+       }
+ #else
+       /*
+        * But without the multipath code enabled, multiple controller per
+        * subsystems are visible as devices and thus we cannot use the
+        * subsystem instance.
+        */
+       sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->head->instance);
+ #endif
   
         if ((ctrl->quirks & NVME_QUIRK_LIGHTNVM) && id->vs[0] == 0x1) {
                 if (nvme_nvm_register(ns, disk_name, node)) {
                         dev_warn(ctrl->device, "LightNVM init failure\n");
-                       goto out_free_id;
+                       goto out_unlink_ns;
                 }
         }
   
         disk = alloc_disk_node(0, node);
         if (!disk)
-               goto out_free_id;
+               goto out_unlink_ns;
   
         disk->fops = &nvme_fops;
         disk->private_data = ns;
         disk->queue = ns->queue;
-       disk->flags = GENHD_FL_EXT_DEVT;
+       disk->flags = flags;
         memcpy(disk->disk_name, disk_name, DISK_NAME_LEN);
         ns->disk = disk;
   
@@@ -2402,49 -2929,65 +2930,65 @@@
         list_add_tail(&ns->list, &ctrl->namespaces);
         mutex_unlock(&ctrl->namespaces_mutex);
   
-       kref_get(&ctrl->kref);
+       nvme_get_ctrl(ctrl);
   
         kfree(id);
   
         device_add_disk(ctrl->device, ns->disk);
         if (sysfs_create_group(&disk_to_dev(ns->disk)->kobj,
-                                       &nvme_ns_attr_group))
+                                       &nvme_ns_id_attr_group))
                 pr_warn("%s: failed to create sysfs group for identification\n",
                         ns->disk->disk_name);
         if (ns->ndev && nvme_nvm_register_sysfs(ns))
                 pr_warn("%s: failed to register lightnvm sysfs group for identification\n",
                         ns->disk->disk_name);
+ 
+       if (new)
+               nvme_mpath_add_disk(ns->head);
+       nvme_mpath_add_disk_links(ns);
         return;
+  out_unlink_ns:
+       mutex_lock(&ctrl->subsys->lock);
+       list_del_rcu(&ns->siblings);
+       mutex_unlock(&ctrl->subsys->lock);
    out_free_id:
         kfree(id);
    out_free_queue:
         blk_cleanup_queue(ns->queue);
-  out_release_instance:
-       ida_simple_remove(&ctrl->ns_ida, ns->instance);
    out_free_ns:
         kfree(ns);
   }
   
   static void nvme_ns_remove(struct nvme_ns *ns)
   {
+       struct nvme_ns_head *head = ns->head;
+ 
         if (test_and_set_bit(NVME_NS_REMOVING, &ns->flags))
                 return;
   
         if (ns->disk && ns->disk->flags & GENHD_FL_UP) {
                 if (blk_get_integrity(ns->disk))
                         blk_integrity_unregister(ns->disk);
+               nvme_mpath_remove_disk_links(ns);
                 sysfs_remove_group(&disk_to_dev(ns->disk)->kobj,
-                                       &nvme_ns_attr_group);
+                                       &nvme_ns_id_attr_group);
                 if (ns->ndev)
                         nvme_nvm_unregister_sysfs(ns);
                 del_gendisk(ns->disk);
                 blk_cleanup_queue(ns->queue);
         }
   
+       mutex_lock(&ns->ctrl->subsys->lock);
+       nvme_mpath_clear_current_path(ns);
+       if (head)
+               list_del_rcu(&ns->siblings);
+       mutex_unlock(&ns->ctrl->subsys->lock);
+ 
         mutex_lock(&ns->ctrl->namespaces_mutex);
         list_del_init(&ns->list);
         mutex_unlock(&ns->ctrl->namespaces_mutex);
   
+       synchronize_srcu(&head->srcu);
         nvme_put_ns(ns);
   }
   
@@@ -2467,7 -3010,7 +3011,7 @@@ static void nvme_remove_invalid_namespa
         struct nvme_ns *ns, *next;
   
         list_for_each_entry_safe(ns, next, &ctrl->namespaces, list) {
-               if (ns->ns_id > nsid)
+               if (ns->head->ns_id > nsid)
                         nvme_ns_remove(ns);
         }
   }
@@@ -2583,20 -3126,29 +3127,29 @@@ void nvme_remove_namespaces(struct nvme
   }
   EXPORT_SYMBOL_GPL(nvme_remove_namespaces);
   
+ static void nvme_aen_uevent(struct nvme_ctrl *ctrl)
+ {
+       char *envp[2] = { NULL, NULL };
+       u32 aen_result = ctrl->aen_result;
+ 
+       ctrl->aen_result = 0;
+       if (!aen_result)
+               return;
+ 
+       envp[0] = kasprintf(GFP_KERNEL, "NVME_AEN=%#08x", aen_result);
+       if (!envp[0])
+               return;
+       kobject_uevent_env(&ctrl->device->kobj, KOBJ_CHANGE, envp);
+       kfree(envp[0]);
+ }
+ 
   static void nvme_async_event_work(struct work_struct *work)
   {
         struct nvme_ctrl *ctrl =
                 container_of(work, struct nvme_ctrl, async_event_work);
   
-       spin_lock_irq(&ctrl->lock);
-       while (ctrl->state == NVME_CTRL_LIVE && ctrl->event_limit > 0) {
-               int aer_idx = --ctrl->event_limit;
- 
-               spin_unlock_irq(&ctrl->lock);
-               ctrl->ops->submit_async_event(ctrl, aer_idx);
-               spin_lock_irq(&ctrl->lock);
-       }
-       spin_unlock_irq(&ctrl->lock);
+       nvme_aen_uevent(ctrl);
+       ctrl->ops->submit_async_event(ctrl);
   }
   
   static bool nvme_ctrl_pp_status(struct nvme_ctrl *ctrl)
@@@ -2615,18 -3167,13 +3168,13 @@@
   
   static void nvme_get_fw_slot_info(struct nvme_ctrl *ctrl)
   {
-       struct nvme_command c = { };
         struct nvme_fw_slot_info_log *log;
   
         log = kmalloc(sizeof(*log), GFP_KERNEL);
         if (!log)
                 return;
   
-       c.common.opcode = nvme_admin_get_log_page;
-       c.common.nsid = cpu_to_le32(NVME_NSID_ALL);
-       c.common.cdw10[0] = nvme_get_log_dw10(NVME_LOG_FW_SLOT, sizeof(*log));
- 
-       if (!nvme_submit_sync_cmd(ctrl->admin_q, &c, log, sizeof(*log)))
+       if (nvme_get_log(ctrl, NVME_LOG_FW_SLOT, log, sizeof(*log)))
                 dev_warn(ctrl->device,
                                 "Get FW SLOT INFO log error\n");
         kfree(log);
@@@ -2660,7 -3207,7 +3208,7 @@@ static void nvme_fw_act_work(struct wor
                 return;
   
         nvme_start_queues(ctrl);
-       /* read FW slot informationi to clear the AER*/
+       /* read FW slot information to clear the AER */
         nvme_get_fw_slot_info(ctrl);
   }
   
@@@ -2668,24 -3215,21 +3216,21 @@@ void nvme_complete_async_event(struct n
                 union nvme_result *res)
   {
         u32 result = le32_to_cpu(res->u32);
-       bool done = true;
   
-       switch (le16_to_cpu(status) >> 1) {
-       case NVME_SC_SUCCESS:
-               done = false;
-               /*FALLTHRU*/
-       case NVME_SC_ABORT_REQ:
-               ++ctrl->event_limit;
-               if (ctrl->state == NVME_CTRL_LIVE)
-                       queue_work(nvme_wq, &ctrl->async_event_work);
+       if (le16_to_cpu(status) >> 1 != NVME_SC_SUCCESS)
+               return;
+ 
+       switch (result & 0x7) {
+       case NVME_AER_ERROR:
+       case NVME_AER_SMART:
+       case NVME_AER_CSS:
+       case NVME_AER_VS:
+               ctrl->aen_result = result;
                 break;
         default:
                 break;
         }
   
-       if (done)
-               return;
- 
         switch (result & 0xff07) {
         case NVME_AER_NOTICE_NS_CHANGED:
                 dev_info(ctrl->device, "rescanning\n");
@@@ -2697,44 -3241,9 +3242,9 @@@
         default:
                 dev_warn(ctrl->device, "async event result %08x\n", result);
         }
- }
- EXPORT_SYMBOL_GPL(nvme_complete_async_event);
- 
- void nvme_queue_async_events(struct nvme_ctrl *ctrl)
- {
-       ctrl->event_limit = NVME_NR_AERS;
         queue_work(nvme_wq, &ctrl->async_event_work);
   }
- EXPORT_SYMBOL_GPL(nvme_queue_async_events);
- 
- static DEFINE_IDA(nvme_instance_ida);
- 
- static int nvme_set_instance(struct nvme_ctrl *ctrl)
- {
-       int instance, error;
- 
-       do {
-               if (!ida_pre_get(&nvme_instance_ida, GFP_KERNEL))
-                       return -ENODEV;
- 
-               spin_lock(&dev_list_lock);
-               error = ida_get_new(&nvme_instance_ida, &instance);
-               spin_unlock(&dev_list_lock);
-       } while (error == -EAGAIN);
- 
-       if (error)
-               return -ENODEV;
- 
-       ctrl->instance = instance;
-       return 0;
- }
- 
- static void nvme_release_instance(struct nvme_ctrl *ctrl)
- {
-       spin_lock(&dev_list_lock);
-       ida_remove(&nvme_instance_ida, ctrl->instance);
-       spin_unlock(&dev_list_lock);
- }
+ EXPORT_SYMBOL_GPL(nvme_complete_async_event);
   
   void nvme_stop_ctrl(struct nvme_ctrl *ctrl)
   {
@@@ -2752,7 -3261,7 +3262,7 @@@ void nvme_start_ctrl(struct nvme_ctrl *
   
         if (ctrl->queue_count > 1) {
                 nvme_queue_scan(ctrl);
-               nvme_queue_async_events(ctrl);
+               queue_work(nvme_wq, &ctrl->async_event_work);
                 nvme_start_queues(ctrl);
         }
   }
@@@ -2760,30 -3269,31 +3270,31 @@@ EXPORT_SYMBOL_GPL(nvme_start_ctrl)
   
   void nvme_uninit_ctrl(struct nvme_ctrl *ctrl)
   {
-       device_destroy(nvme_class, MKDEV(nvme_char_major, ctrl->instance));
- 
-       spin_lock(&dev_list_lock);
-       list_del(&ctrl->node);
-       spin_unlock(&dev_list_lock);
+       cdev_device_del(&ctrl->cdev, ctrl->device);
   }
   EXPORT_SYMBOL_GPL(nvme_uninit_ctrl);
   
- static void nvme_free_ctrl(struct kref *kref)
+ static void nvme_free_ctrl(struct device *dev)
   {
-       struct nvme_ctrl *ctrl = container_of(kref, struct nvme_ctrl, kref);
+       struct nvme_ctrl *ctrl =
+               container_of(dev, struct nvme_ctrl, ctrl_device);
+       struct nvme_subsystem *subsys = ctrl->subsys;
   
-       put_device(ctrl->device);
-       nvme_release_instance(ctrl);
-       ida_destroy(&ctrl->ns_ida);
+       ida_simple_remove(&nvme_instance_ida, ctrl->instance);
+       kfree(ctrl->effects);
+ 
+       if (subsys) {
+               mutex_lock(&subsys->lock);
+               list_del(&ctrl->subsys_entry);
+               mutex_unlock(&subsys->lock);
+               sysfs_remove_link(&subsys->dev.kobj, dev_name(ctrl->device));
+       }
   
         ctrl->ops->free_ctrl(ctrl);
- }
   
- void nvme_put_ctrl(struct nvme_ctrl *ctrl)
- {
-       kref_put(&ctrl->kref, nvme_free_ctrl);
+       if (subsys)
+               nvme_put_subsystem(subsys);
   }
- EXPORT_SYMBOL_GPL(nvme_put_ctrl);
   
   /*
    * Initialize a NVMe controller structures.  This needs to be called during
@@@ -2799,32 -3309,36 +3310,36 @@@ int nvme_init_ctrl(struct nvme_ctrl *ct
         spin_lock_init(&ctrl->lock);
         INIT_LIST_HEAD(&ctrl->namespaces);
         mutex_init(&ctrl->namespaces_mutex);
-       kref_init(&ctrl->kref);
         ctrl->dev = dev;
         ctrl->ops = ops;
         ctrl->quirks = quirks;
         INIT_WORK(&ctrl->scan_work, nvme_scan_work);
         INIT_WORK(&ctrl->async_event_work, nvme_async_event_work);
         INIT_WORK(&ctrl->fw_act_work, nvme_fw_act_work);
+       INIT_WORK(&ctrl->delete_work, nvme_delete_ctrl_work);
   
-       ret = nvme_set_instance(ctrl);
-       if (ret)
+       ret = ida_simple_get(&nvme_instance_ida, 0, 0, GFP_KERNEL);
+       if (ret < 0)
                 goto out;
- 
-       ctrl->device = device_create_with_groups(nvme_class, ctrl->dev,
-                               MKDEV(nvme_char_major, ctrl->instance),
-                               ctrl, nvme_dev_attr_groups,
-                               "nvme%d", ctrl->instance);
-       if (IS_ERR(ctrl->device)) {
-               ret = PTR_ERR(ctrl->device);
+       ctrl->instance = ret;
+ 
+       device_initialize(&ctrl->ctrl_device);
+       ctrl->device = &ctrl->ctrl_device;
+       ctrl->device->devt = MKDEV(MAJOR(nvme_chr_devt), ctrl->instance);
+       ctrl->device->class = nvme_class;
+       ctrl->device->parent = ctrl->dev;
+       ctrl->device->groups = nvme_dev_attr_groups;
+       ctrl->device->release = nvme_free_ctrl;
+       dev_set_drvdata(ctrl->device, ctrl);
+       ret = dev_set_name(ctrl->device, "nvme%d", ctrl->instance);
+       if (ret)
                 goto out_release_instance;
-       }
-       get_device(ctrl->device);
-       ida_init(&ctrl->ns_ida);
   
-       spin_lock(&dev_list_lock);
-       list_add_tail(&ctrl->node, &nvme_ctrl_list);
-       spin_unlock(&dev_list_lock);
+       cdev_init(&ctrl->cdev, &nvme_dev_fops);
+       ctrl->cdev.owner = ops->module;
+       ret = cdev_device_add(&ctrl->cdev, ctrl->device);
+       if (ret)
+               goto out_free_name;
   
         /*
          * Initialize latency tolerance controls.  The sysfs files won't
@@@ -2835,8 -3349,10 +3350,10 @@@
                 min(default_ps_max_latency_us, (unsigned long)S32_MAX));
   
         return 0;
+ out_free_name:
+       kfree_const(dev->kobj.name);
   out_release_instance:
-       nvme_release_instance(ctrl);
+       ida_simple_remove(&nvme_instance_ida, ctrl->instance);
   out:
         return ret;
   }
@@@ -2945,6 -3461,16 +3462,16 @@@ void nvme_start_queues(struct nvme_ctr
   }
   EXPORT_SYMBOL_GPL(nvme_start_queues);
   
+ int nvme_reinit_tagset(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set)
+ {
+       if (!ctrl->ops->reinit_request)
+               return 0;
+ 
+       return blk_mq_tagset_iter(set, set->driver_data,
+                       ctrl->ops->reinit_request);
+ }
+ EXPORT_SYMBOL_GPL(nvme_reinit_tagset);
+ 
   int __init nvme_core_init(void)
   {
         int result;
@@@ -2954,12 -3480,9 +3481,9 @@@
         if (!nvme_wq)
                 return -ENOMEM;
   
-       result = __register_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme",
-                                                       &nvme_dev_fops);
+       result = alloc_chrdev_region(&nvme_chr_devt, 0, NVME_MINORS, "nvme");
         if (result < 0)
                 goto destroy_wq;
-       else if (result > 0)
-               nvme_char_major = result;
   
         nvme_class = class_create(THIS_MODULE, "nvme");
         if (IS_ERR(nvme_class)) {
@@@ -2967,10 -3490,17 +3491,17 @@@
                 goto unregister_chrdev;
         }
   
+       nvme_subsys_class = class_create(THIS_MODULE, "nvme-subsystem");
+       if (IS_ERR(nvme_subsys_class)) {
+               result = PTR_ERR(nvme_subsys_class);
+               goto destroy_class;
+       }
         return 0;
   
+ destroy_class:
+       class_destroy(nvme_class);
   unregister_chrdev:
-       __unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme");
+       unregister_chrdev_region(nvme_chr_devt, NVME_MINORS);
   destroy_wq:
         destroy_workqueue(nvme_wq);
         return result;
@@@ -2978,8 -3508,10 +3509,10 @@@
   
   void nvme_core_exit(void)
   {
+       ida_destroy(&nvme_subsystems_ida);
+       class_destroy(nvme_subsys_class);
         class_destroy(nvme_class);
-       __unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme");
+       unregister_chrdev_region(nvme_chr_devt, NVME_MINORS);
         destroy_workqueue(nvme_wq);
   }
   
diff --combined drivers/nvme/host/fc.c

index be49d0f793816cae0a9629665230acf248ab85ec,6eb460b117d6faf5d753319782a29a4f266f0599..7ab0be55c7d063b31f1a9525a2961308b0d8a274
--- 1/drivers/nvme/host/fc.c
--- 2/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@@ -30,27 -30,19 +30,19 @@@
   /* *************************** Data Structures/Defines ****************** */
   
   
- /*
-  * We handle AEN commands ourselves and don't even let the
-  * block layer know about them.
-  */
- #define NVME_FC_NR_AEN_COMMANDS       1
- #define NVME_FC_AQ_BLKMQ_DEPTH        \
-       (NVME_AQ_DEPTH - NVME_FC_NR_AEN_COMMANDS)
- #define AEN_CMDID_BASE                (NVME_FC_AQ_BLKMQ_DEPTH + 1)
- 
   enum nvme_fc_queue_flags {
         NVME_FC_Q_CONNECTED = (1 << 0),
   };
   
   #define NVMEFC_QUEUE_DELAY    3               /* ms units */
   
+ #define NVME_FC_DEFAULT_DEV_LOSS_TMO  60      /* seconds */
+ 
   struct nvme_fc_queue {
         struct nvme_fc_ctrl     *ctrl;
         struct device           *dev;
         struct blk_mq_hw_ctx    *hctx;
         void                    *lldd_handle;
-       int                     queue_size;
         size_t                  cmnd_capsule_len;
         u32                     qnum;
         u32                     rqcnt;
@@@ -124,6 -116,7 +116,7 @@@ struct nvme_fc_lport 
         struct device                   *dev;   /* physical device for dma */
         struct nvme_fc_port_template    *ops;
         struct kref                     ref;
+       atomic_t                        act_rport_cnt;
   } __aligned(sizeof(u64));     /* alignment for other things alloc'd with */
   
   struct nvme_fc_rport {
@@@ -136,6 -129,8 +129,8 @@@
         struct nvme_fc_lport            *lport;
         spinlock_t                      lock;
         struct kref                     ref;
+       atomic_t                        act_ctrl_cnt;
+       unsigned long                   dev_loss_end;
   } __aligned(sizeof(u64));     /* alignment for other things alloc'd with */
   
   enum nvme_fcctrl_flags {
@@@ -150,6 -145,7 +145,7 @@@ struct nvme_fc_ctrl 
         struct nvme_fc_rport    *rport;
         u32                     cnum;
   
+       bool                    assoc_active;
         u64                     association_id;
   
         struct list_head        ctrl_list;      /* rport->ctrl_list */
@@@ -157,7 -153,6 +153,6 @@@
         struct blk_mq_tag_set   admin_tag_set;
         struct blk_mq_tag_set   tag_set;
   
-       struct work_struct      delete_work;
         struct delayed_work     connect_work;
   
         struct kref             ref;
@@@ -165,7 -160,7 +160,7 @@@
         u32                     iocnt;
         wait_queue_head_t       ioabort_wait;
   
-       struct nvme_fc_fcp_op   aen_ops[NVME_FC_NR_AEN_COMMANDS];
+       struct nvme_fc_fcp_op   aen_ops[NVME_NR_AEN_COMMANDS];
   
         struct nvme_ctrl        ctrl;
   };
@@@ -213,10 -208,16 +208,16 @@@ static DEFINE_IDA(nvme_fc_ctrl_cnt)
   
   
   
+ /*
+  * These items are short-term. They will eventually be moved into
+  * a generic FC class. See comments in module init.
+  */
+ static struct class *fc_class;
+ static struct device *fc_udev_device;
+ 
   
   /* *********************** FC-NVME Port Management ************************ */
   
- static int __nvme_fc_del_ctrl(struct nvme_fc_ctrl *);
   static void __nvme_fc_delete_hw_queue(struct nvme_fc_ctrl *,
                         struct nvme_fc_queue *, unsigned int);
   
@@@ -235,9 -236,6 +236,6 @@@ nvme_fc_free_lport(struct kref *ref
         list_del(&lport->port_list);
         spin_unlock_irqrestore(&nvme_fc_lock, flags);
   
-       /* let the LLDD know we've finished tearing it down */
-       lport->ops->localport_delete(&lport->localport);
- 
         ida_simple_remove(&nvme_fc_local_port_cnt, lport->localport.port_num);
         ida_destroy(&lport->endp_cnt);
   
@@@ -260,7 -258,9 +258,9 @@@ nvme_fc_lport_get(struct nvme_fc_lport 
   
   
   static struct nvme_fc_lport *
- nvme_fc_attach_to_unreg_lport(struct nvme_fc_port_info *pinfo)
+ nvme_fc_attach_to_unreg_lport(struct nvme_fc_port_info *pinfo,
+                       struct nvme_fc_port_template *ops,
+                       struct device *dev)
   {
         struct nvme_fc_lport *lport;
         unsigned long flags;
@@@ -272,6 -272,11 +272,11 @@@
                     lport->localport.port_name != pinfo->port_name)
                         continue;
   
+               if (lport->dev != dev) {
+                       lport = ERR_PTR(-EXDEV);
+                       goto out_done;
+               }
+ 
                 if (lport->localport.port_state != FC_OBJSTATE_DELETED) {
                         lport = ERR_PTR(-EEXIST);
                         goto out_done;
@@@ -288,6 -293,7 +293,7 @@@
   
                 /* resume the lport */
   
+               lport->ops = ops;
                 lport->localport.port_role = pinfo->port_role;
                 lport->localport.port_id = pinfo->port_id;
                 lport->localport.port_state = FC_OBJSTATE_ONLINE;
@@@ -348,7 -354,7 +354,7 @@@ nvme_fc_register_localport(struct nvme_
          * expired, we can simply re-enable the localport. Remoteports
          * and controller reconnections should resume naturally.
          */
-       newrec = nvme_fc_attach_to_unreg_lport(pinfo);
+       newrec = nvme_fc_attach_to_unreg_lport(pinfo, template, dev);
   
         /* found an lport, but something about its state is bad */
         if (IS_ERR(newrec)) {
@@@ -384,6 -390,7 +390,7 @@@
         INIT_LIST_HEAD(&newrec->port_list);
         INIT_LIST_HEAD(&newrec->endp_list);
         kref_init(&newrec->ref);
+       atomic_set(&newrec->act_rport_cnt, 0);
         newrec->ops = template;
         newrec->dev = dev;
         ida_init(&newrec->endp_cnt);
@@@ -446,12 -453,177 +453,177 @@@ nvme_fc_unregister_localport(struct nvm
   
         spin_unlock_irqrestore(&nvme_fc_lock, flags);
   
+       if (atomic_read(&lport->act_rport_cnt) == 0)
+               lport->ops->localport_delete(&lport->localport);
+ 
         nvme_fc_lport_put(lport);
   
         return 0;
   }
   EXPORT_SYMBOL_GPL(nvme_fc_unregister_localport);
   
+ /*
+  * TRADDR strings, per FC-NVME are fixed format:
+  *   "nn-0x<16hexdigits>:pn-0x<16hexdigits>" - 43 characters
+  * udev event will only differ by prefix of what field is
+  * being specified:
+  *    "NVMEFC_HOST_TRADDR=" or "NVMEFC_TRADDR=" - 19 max characters
+  *  19 + 43 + null_fudge = 64 characters
+  */
+ #define FCNVME_TRADDR_LENGTH          64
+ 
+ static void
+ nvme_fc_signal_discovery_scan(struct nvme_fc_lport *lport,
+               struct nvme_fc_rport *rport)
+ {
+       char hostaddr[FCNVME_TRADDR_LENGTH];    /* NVMEFC_HOST_TRADDR=...*/
+       char tgtaddr[FCNVME_TRADDR_LENGTH];     /* NVMEFC_TRADDR=...*/
+       char *envp[4] = { "FC_EVENT=nvmediscovery", hostaddr, tgtaddr, NULL };
+ 
+       if (!(rport->remoteport.port_role & FC_PORT_ROLE_NVME_DISCOVERY))
+               return;
+ 
+       snprintf(hostaddr, sizeof(hostaddr),
+               "NVMEFC_HOST_TRADDR=nn-0x%016llx:pn-0x%016llx",
+               lport->localport.node_name, lport->localport.port_name);
+       snprintf(tgtaddr, sizeof(tgtaddr),
+               "NVMEFC_TRADDR=nn-0x%016llx:pn-0x%016llx",
+               rport->remoteport.node_name, rport->remoteport.port_name);
+       kobject_uevent_env(&fc_udev_device->kobj, KOBJ_CHANGE, envp);
+ }
+ 
+ static void
+ nvme_fc_free_rport(struct kref *ref)
+ {
+       struct nvme_fc_rport *rport =
+               container_of(ref, struct nvme_fc_rport, ref);
+       struct nvme_fc_lport *lport =
+                       localport_to_lport(rport->remoteport.localport);
+       unsigned long flags;
+ 
+       WARN_ON(rport->remoteport.port_state != FC_OBJSTATE_DELETED);
+       WARN_ON(!list_empty(&rport->ctrl_list));
+ 
+       /* remove from lport list */
+       spin_lock_irqsave(&nvme_fc_lock, flags);
+       list_del(&rport->endp_list);
+       spin_unlock_irqrestore(&nvme_fc_lock, flags);
+ 
+       ida_simple_remove(&lport->endp_cnt, rport->remoteport.port_num);
+ 
+       kfree(rport);
+ 
+       nvme_fc_lport_put(lport);
+ }
+ 
+ static void
+ nvme_fc_rport_put(struct nvme_fc_rport *rport)
+ {
+       kref_put(&rport->ref, nvme_fc_free_rport);
+ }
+ 
+ static int
+ nvme_fc_rport_get(struct nvme_fc_rport *rport)
+ {
+       return kref_get_unless_zero(&rport->ref);
+ }
+ 
+ static void
+ nvme_fc_resume_controller(struct nvme_fc_ctrl *ctrl)
+ {
+       switch (ctrl->ctrl.state) {
+       case NVME_CTRL_NEW:
+       case NVME_CTRL_RECONNECTING:
+               /*
+                * As all reconnects were suppressed, schedule a
+                * connect.
+                */
+               dev_info(ctrl->ctrl.device,
+                       "NVME-FC{%d}: connectivity re-established. "
+                       "Attempting reconnect\n", ctrl->cnum);
+ 
+               queue_delayed_work(nvme_wq, &ctrl->connect_work, 0);
+               break;
+ 
+       case NVME_CTRL_RESETTING:
+               /*
+                * Controller is already in the process of terminating the
+                * association. No need to do anything further. The reconnect
+                * step will naturally occur after the reset completes.
+                */
+               break;
+ 
+       default:
+               /* no action to take - let it delete */
+               break;
+       }
+ }
+ 
+ static struct nvme_fc_rport *
+ nvme_fc_attach_to_suspended_rport(struct nvme_fc_lport *lport,
+                               struct nvme_fc_port_info *pinfo)
+ {
+       struct nvme_fc_rport *rport;
+       struct nvme_fc_ctrl *ctrl;
+       unsigned long flags;
+ 
+       spin_lock_irqsave(&nvme_fc_lock, flags);
+ 
+       list_for_each_entry(rport, &lport->endp_list, endp_list) {
+               if (rport->remoteport.node_name != pinfo->node_name ||
+                   rport->remoteport.port_name != pinfo->port_name)
+                       continue;
+ 
+               if (!nvme_fc_rport_get(rport)) {
+                       rport = ERR_PTR(-ENOLCK);
+                       goto out_done;
+               }
+ 
+               spin_unlock_irqrestore(&nvme_fc_lock, flags);
+ 
+               spin_lock_irqsave(&rport->lock, flags);
+ 
+               /* has it been unregistered */
+               if (rport->remoteport.port_state != FC_OBJSTATE_DELETED) {
+                       /* means lldd called us twice */
+                       spin_unlock_irqrestore(&rport->lock, flags);
+                       nvme_fc_rport_put(rport);
+                       return ERR_PTR(-ESTALE);
+               }
+ 
+               rport->remoteport.port_state = FC_OBJSTATE_ONLINE;
+               rport->dev_loss_end = 0;
+ 
+               /*
+                * kick off a reconnect attempt on all associations to the
+                * remote port. A successful reconnects will resume i/o.
+                */
+               list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list)
+                       nvme_fc_resume_controller(ctrl);
+ 
+               spin_unlock_irqrestore(&rport->lock, flags);
+ 
+               return rport;
+       }
+ 
+       rport = NULL;
+ 
+ out_done:
+       spin_unlock_irqrestore(&nvme_fc_lock, flags);
+ 
+       return rport;
+ }
+ 
+ static inline void
+ __nvme_fc_set_dev_loss_tmo(struct nvme_fc_rport *rport,
+                       struct nvme_fc_port_info *pinfo)
+ {
+       if (pinfo->dev_loss_tmo)
+               rport->remoteport.dev_loss_tmo = pinfo->dev_loss_tmo;
+       else
+               rport->remoteport.dev_loss_tmo = NVME_FC_DEFAULT_DEV_LOSS_TMO;
+ }
+ 
   /**
    * nvme_fc_register_remoteport - transport entry point called by an
    *                              LLDD to register the existence of a NVME
@@@ -478,28 -650,52 +650,52 @@@ nvme_fc_register_remoteport(struct nvme
         unsigned long flags;
         int ret, idx;
   
+       if (!nvme_fc_lport_get(lport)) {
+               ret = -ESHUTDOWN;
+               goto out_reghost_failed;
+       }
+ 
+       /*
+        * look to see if there is already a remoteport that is waiting
+        * for a reconnect (within dev_loss_tmo) with the same WWN's.
+        * If so, transition to it and reconnect.
+        */
+       newrec = nvme_fc_attach_to_suspended_rport(lport, pinfo);
+ 
+       /* found an rport, but something about its state is bad */
+       if (IS_ERR(newrec)) {
+               ret = PTR_ERR(newrec);
+               goto out_lport_put;
+ 
+       /* found existing rport, which was resumed */
+       } else if (newrec) {
+               nvme_fc_lport_put(lport);
+               __nvme_fc_set_dev_loss_tmo(newrec, pinfo);
+               nvme_fc_signal_discovery_scan(lport, newrec);
+               *portptr = &newrec->remoteport;
+               return 0;
+       }
+ 
+       /* nothing found - allocate a new remoteport struct */
+ 
         newrec = kmalloc((sizeof(*newrec) + lport->ops->remote_priv_sz),
                          GFP_KERNEL);
         if (!newrec) {
                 ret = -ENOMEM;
-               goto out_reghost_failed;
-       }
- 
-       if (!nvme_fc_lport_get(lport)) {
-               ret = -ESHUTDOWN;
-               goto out_kfree_rport;
+               goto out_lport_put;
         }
   
         idx = ida_simple_get(&lport->endp_cnt, 0, 0, GFP_KERNEL);
         if (idx < 0) {
                 ret = -ENOSPC;
-               goto out_lport_put;
+               goto out_kfree_rport;
         }
   
         INIT_LIST_HEAD(&newrec->endp_list);
         INIT_LIST_HEAD(&newrec->ctrl_list);
         INIT_LIST_HEAD(&newrec->ls_req_list);
         kref_init(&newrec->ref);
+       atomic_set(&newrec->act_ctrl_cnt, 0);
         spin_lock_init(&newrec->lock);
         newrec->remoteport.localport = &lport->localport;
         newrec->dev = lport->dev;
@@@ -511,63 -707,27 +707,27 @@@
         newrec->remoteport.port_id = pinfo->port_id;
         newrec->remoteport.port_state = FC_OBJSTATE_ONLINE;
         newrec->remoteport.port_num = idx;
+       __nvme_fc_set_dev_loss_tmo(newrec, pinfo);
   
         spin_lock_irqsave(&nvme_fc_lock, flags);
         list_add_tail(&newrec->endp_list, &lport->endp_list);
         spin_unlock_irqrestore(&nvme_fc_lock, flags);
   
+       nvme_fc_signal_discovery_scan(lport, newrec);
+ 
         *portptr = &newrec->remoteport;
         return 0;
   
- out_lport_put:
-       nvme_fc_lport_put(lport);
   out_kfree_rport:
         kfree(newrec);
+ out_lport_put:
+       nvme_fc_lport_put(lport);
   out_reghost_failed:
         *portptr = NULL;
         return ret;
   }
   EXPORT_SYMBOL_GPL(nvme_fc_register_remoteport);
   
- static void
- nvme_fc_free_rport(struct kref *ref)
- {
-       struct nvme_fc_rport *rport =
-               container_of(ref, struct nvme_fc_rport, ref);
-       struct nvme_fc_lport *lport =
-                       localport_to_lport(rport->remoteport.localport);
-       unsigned long flags;
- 
-       WARN_ON(rport->remoteport.port_state != FC_OBJSTATE_DELETED);
-       WARN_ON(!list_empty(&rport->ctrl_list));
- 
-       /* remove from lport list */
-       spin_lock_irqsave(&nvme_fc_lock, flags);
-       list_del(&rport->endp_list);
-       spin_unlock_irqrestore(&nvme_fc_lock, flags);
- 
-       /* let the LLDD know we've finished tearing it down */
-       lport->ops->remoteport_delete(&rport->remoteport);
- 
-       ida_simple_remove(&lport->endp_cnt, rport->remoteport.port_num);
- 
-       kfree(rport);
- 
-       nvme_fc_lport_put(lport);
- }
- 
- static void
- nvme_fc_rport_put(struct nvme_fc_rport *rport)
- {
-       kref_put(&rport->ref, nvme_fc_free_rport);
- }
- 
- static int
- nvme_fc_rport_get(struct nvme_fc_rport *rport)
- {
-       return kref_get_unless_zero(&rport->ref);
- }
- 
   static int
   nvme_fc_abort_lsops(struct nvme_fc_rport *rport)
   {
@@@ -592,6 -752,58 +752,58 @@@ restart
         return 0;
   }
   
+ static void
+ nvme_fc_ctrl_connectivity_loss(struct nvme_fc_ctrl *ctrl)
+ {
+       dev_info(ctrl->ctrl.device,
+               "NVME-FC{%d}: controller connectivity lost. Awaiting "
+               "Reconnect", ctrl->cnum);
+ 
+       switch (ctrl->ctrl.state) {
+       case NVME_CTRL_NEW:
+       case NVME_CTRL_LIVE:
+               /*
+                * Schedule a controller reset. The reset will terminate the
+                * association and schedule the reconnect timer.  Reconnects
+                * will be attempted until either the ctlr_loss_tmo
+                * (max_retries * connect_delay) expires or the remoteport's
+                * dev_loss_tmo expires.
+                */
+               if (nvme_reset_ctrl(&ctrl->ctrl)) {
+                       dev_warn(ctrl->ctrl.device,
+                               "NVME-FC{%d}: Couldn't schedule reset. "
+                               "Deleting controller.\n",
+                               ctrl->cnum);
+                       nvme_delete_ctrl(&ctrl->ctrl);
+               }
+               break;
+ 
+       case NVME_CTRL_RECONNECTING:
+               /*
+                * The association has already been terminated and the
+                * controller is attempting reconnects.  No need to do anything
+                * futher.  Reconnects will be attempted until either the
+                * ctlr_loss_tmo (max_retries * connect_delay) expires or the
+                * remoteport's dev_loss_tmo expires.
+                */
+               break;
+ 
+       case NVME_CTRL_RESETTING:
+               /*
+                * Controller is already in the process of terminating the
+                * association.  No need to do anything further. The reconnect
+                * step will kick in naturally after the association is
+                * terminated.
+                */
+               break;
+ 
+       case NVME_CTRL_DELETING:
+       default:
+               /* no action to take - let it delete */
+               break;
+       }
+ }
+ 
   /**
    * nvme_fc_unregister_remoteport - transport entry point called by an
    *                              LLDD to deregister/remove a previously
@@@ -621,19 -833,78 +833,78 @@@ nvme_fc_unregister_remoteport(struct nv
         }
         portptr->port_state = FC_OBJSTATE_DELETED;
   
-       /* tear down all associations to the remote port */
-       list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list)
-               __nvme_fc_del_ctrl(ctrl);
+       rport->dev_loss_end = jiffies + (portptr->dev_loss_tmo * HZ);
+ 
+       list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list) {
+               /* if dev_loss_tmo==0, dev loss is immediate */
+               if (!portptr->dev_loss_tmo) {
+                       dev_warn(ctrl->ctrl.device,
+                               "NVME-FC{%d}: controller connectivity lost. "
+                               "Deleting controller.\n",
+                               ctrl->cnum);
+                       nvme_delete_ctrl(&ctrl->ctrl);
+               } else
+                       nvme_fc_ctrl_connectivity_loss(ctrl);
+       }
   
         spin_unlock_irqrestore(&rport->lock, flags);
   
         nvme_fc_abort_lsops(rport);
   
+       if (atomic_read(&rport->act_ctrl_cnt) == 0)
+               rport->lport->ops->remoteport_delete(portptr);
+ 
+       /*
+        * release the reference, which will allow, if all controllers
+        * go away, which should only occur after dev_loss_tmo occurs,
+        * for the rport to be torn down.
+        */
         nvme_fc_rport_put(rport);
+ 
         return 0;
   }
   EXPORT_SYMBOL_GPL(nvme_fc_unregister_remoteport);
   
+ /**
+  * nvme_fc_rescan_remoteport - transport entry point called by an
+  *                              LLDD to request a nvme device rescan.
+  * @remoteport: pointer to the (registered) remote port that is to be
+  *              rescanned.
+  *
+  * Returns: N/A
+  */
+ void
+ nvme_fc_rescan_remoteport(struct nvme_fc_remote_port *remoteport)
+ {
+       struct nvme_fc_rport *rport = remoteport_to_rport(remoteport);
+ 
+       nvme_fc_signal_discovery_scan(rport->lport, rport);
+ }
+ EXPORT_SYMBOL_GPL(nvme_fc_rescan_remoteport);
+ 
+ int
+ nvme_fc_set_remoteport_devloss(struct nvme_fc_remote_port *portptr,
+                       u32 dev_loss_tmo)
+ {
+       struct nvme_fc_rport *rport = remoteport_to_rport(portptr);
+       unsigned long flags;
+ 
+       spin_lock_irqsave(&rport->lock, flags);
+ 
+       if (portptr->port_state != FC_OBJSTATE_ONLINE) {
+               spin_unlock_irqrestore(&rport->lock, flags);
+               return -EINVAL;
+       }
+ 
+       /* a dev_loss_tmo of 0 (immediate) is allowed to be set */
+       rport->remoteport.dev_loss_tmo = dev_loss_tmo;
+ 
+       spin_unlock_irqrestore(&rport->lock, flags);
+ 
+       return 0;
+ }
+ EXPORT_SYMBOL_GPL(nvme_fc_set_remoteport_devloss);
+ 
   
   /* *********************** FC-NVME DMA Handling **************************** */
   
@@@ -723,7 -994,6 +994,6 @@@ fc_dma_unmap_sg(struct device *dev, str
                 dma_unmap_sg(dev, sg, nents, dir);
   }
   
- 
   /* *********************** FC-NVME LS Handling **************************** */
   
   static void nvme_fc_ctrl_put(struct nvme_fc_ctrl *);
@@@ -1266,7 -1536,7 +1536,7 @@@ nvme_fc_abort_aen_ops(struct nvme_fc_ct
         unsigned long flags;
         int i, ret;
   
-       for (i = 0; i < NVME_FC_NR_AEN_COMMANDS; i++, aen_op++) {
+       for (i = 0; i < NVME_NR_AEN_COMMANDS; i++, aen_op++) {
                 if (atomic_read(&aen_op->state) != FCPOP_STATE_ACTIVE)
                         continue;
   
@@@ -1331,7 -1601,7 +1601,7 @@@ nvme_fc_fcpio_done(struct nvmefc_fcp_re
         struct nvme_command *sqe = &op->cmd_iu.sqe;
         __le16 status = cpu_to_le16(NVME_SC_SUCCESS << 1);
         union nvme_result result;
-       bool complete_rq, terminate_assoc = true;
+       bool terminate_assoc = true;
   
         /*
          * WARNING:
@@@ -1373,8 -1643,9 +1643,9 @@@
         fc_dma_sync_single_for_cpu(ctrl->lport->dev, op->fcp_req.rspdma,
                                 sizeof(op->rsp_iu), DMA_FROM_DEVICE);
   
-       if (atomic_read(&op->state) == FCPOP_STATE_ABORTED)
-               status = cpu_to_le16((NVME_SC_ABORT_REQ | NVME_SC_DNR) << 1);
+       if (atomic_read(&op->state) == FCPOP_STATE_ABORTED ||
+                       op->flags & FCOP_FLAGS_TERMIO)
+               status = cpu_to_le16(NVME_SC_ABORT_REQ << 1);
         else if (freq->status)
                 status = cpu_to_le16(NVME_SC_INTERNAL << 1);
   
@@@ -1438,23 -1709,27 +1709,27 @@@
   done:
         if (op->flags & FCOP_FLAGS_AEN) {
                 nvme_complete_async_event(&queue->ctrl->ctrl, status, &result);
-               complete_rq = __nvme_fc_fcpop_chk_teardowns(ctrl, op);
+               __nvme_fc_fcpop_chk_teardowns(ctrl, op);
                 atomic_set(&op->state, FCPOP_STATE_IDLE);
                 op->flags = FCOP_FLAGS_AEN;     /* clear other flags */
                 nvme_fc_ctrl_put(ctrl);
                 goto check_error;
         }
   
-       complete_rq = __nvme_fc_fcpop_chk_teardowns(ctrl, op);
-       if (!complete_rq) {
-               if (unlikely(op->flags & FCOP_FLAGS_TERMIO)) {
-                       status = cpu_to_le16(NVME_SC_ABORT_REQ << 1);
-                       if (blk_queue_dying(rq->q))
-                               status |= cpu_to_le16(NVME_SC_DNR << 1);
-               }
-               nvme_end_request(rq, status, result);
-       } else
+       /*
+        * Force failures of commands if we're killing the controller
+        * or have an error on a command used to create an new association
+        */
+       if (status &&
+           (blk_queue_dying(rq->q) ||
+            ctrl->ctrl.state == NVME_CTRL_NEW ||
+            ctrl->ctrl.state == NVME_CTRL_RECONNECTING))
+               status |= cpu_to_le16(NVME_SC_DNR << 1);
+ 
+       if (__nvme_fc_fcpop_chk_teardowns(ctrl, op))
                 __nvme_fc_final_op_cleanup(rq);
+       else
+               nvme_end_request(rq, status, result);
   
   check_error:
         if (terminate_assoc)
@@@ -1531,7 -1806,7 +1806,7 @@@ nvme_fc_init_aen_ops(struct nvme_fc_ctr
         int i, ret;
   
         aen_op = ctrl->aen_ops;
-       for (i = 0; i < NVME_FC_NR_AEN_COMMANDS; i++, aen_op++) {
+       for (i = 0; i < NVME_NR_AEN_COMMANDS; i++, aen_op++) {
                 private = kzalloc(ctrl->lport->ops->fcprqst_priv_sz,
                                                 GFP_KERNEL);
                 if (!private)
@@@ -1541,7 -1816,7 +1816,7 @@@
                 sqe = &cmdiu->sqe;
                 ret = __nvme_fc_init_request(ctrl, &ctrl->queues[0],
                                 aen_op, (struct request *)NULL,
-                               (AEN_CMDID_BASE + i));
+                               (NVME_AQ_BLK_MQ_DEPTH + i));
                 if (ret) {
                         kfree(private);
                         return ret;
@@@ -1554,7 -1829,7 +1829,7 @@@
                 memset(sqe, 0, sizeof(*sqe));
                 sqe->common.opcode = nvme_admin_async_event;
                 /* Note: core layer may overwrite the sqe.command_id value */
-               sqe->common.command_id = AEN_CMDID_BASE + i;
+               sqe->common.command_id = NVME_AQ_BLK_MQ_DEPTH + i;
         }
         return 0;
   }
@@@ -1566,7 -1841,7 +1841,7 @@@ nvme_fc_term_aen_ops(struct nvme_fc_ctr
         int i;
   
         aen_op = ctrl->aen_ops;
-       for (i = 0; i < NVME_FC_NR_AEN_COMMANDS; i++, aen_op++) {
+       for (i = 0; i < NVME_NR_AEN_COMMANDS; i++, aen_op++) {
                 if (!aen_op->fcp_req.private)
                         continue;
   
@@@ -1610,7 -1885,7 +1885,7 @@@ nvme_fc_init_admin_hctx(struct blk_mq_h
   }
   
   static void
- nvme_fc_init_queue(struct nvme_fc_ctrl *ctrl, int idx, size_t queue_size)
+ nvme_fc_init_queue(struct nvme_fc_ctrl *ctrl, int idx)
   {
         struct nvme_fc_queue *queue;
   
@@@ -1626,8 -1901,6 +1901,6 @@@
         else
                 queue->cmnd_capsule_len = sizeof(struct nvme_command);
   
-       queue->queue_size = queue_size;
- 
         /*
          * Considered whether we should allocate buffers for all SQEs
          * and CQEs and dma map them - mapping their respective entries
@@@ -1751,7 -2024,7 +2024,7 @@@ nvme_fc_init_io_queues(struct nvme_fc_c
         int i;
   
         for (i = 1; i < ctrl->ctrl.queue_count; i++)
-               nvme_fc_init_queue(ctrl, i, ctrl->ctrl.sqsize);
+               nvme_fc_init_queue(ctrl, i);
   }
   
   static void
@@@ -1825,13 -2098,6 +2098,6 @@@ nvme_fc_error_recovery(struct nvme_fc_c
         dev_warn(ctrl->ctrl.device,
                 "NVME-FC{%d}: resetting controller\n", ctrl->cnum);
   
-       if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING)) {
-               dev_err(ctrl->ctrl.device,
-                       "NVME-FC{%d}: error_recovery: Couldn't change state "
-                       "to RECONNECTING\n", ctrl->cnum);
-               return;
-       }
- 
         nvme_reset_ctrl(&ctrl->ctrl);
   }
   
@@@ -1842,13 -2108,14 +2108,14 @@@ nvme_fc_timeout(struct request *rq, boo
         struct nvme_fc_ctrl *ctrl = op->ctrl;
         int ret;
   
-       if (reserved)
+       if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE ||
+                       atomic_read(&op->state) == FCPOP_STATE_ABORTED)
                 return BLK_EH_RESET_TIMER;
   
         ret = __nvme_fc_abort_op(ctrl, op);
         if (ret)
-               /* io wasn't active to abort consider it done */
-               return BLK_EH_HANDLED;
+               /* io wasn't active to abort */
+               return BLK_EH_NOT_HANDLED;
   
         /*
          * we can't individually ABTS an io without affecting the queue,
@@@ -1859,7 -2126,12 +2126,12 @@@
          */
         nvme_fc_error_recovery(ctrl, "io timeout error");
   
-       return BLK_EH_HANDLED;
+       /*
+        * the io abort has been initiated. Have the reset timer
+        * restarted and the abort completion will complete the io
+        * shortly. Avoids a synchronous wait while the abort finishes.
+        */
+       return BLK_EH_RESET_TIMER;
   }
   
   static int
@@@ -2110,7 -2382,7 +2382,7 @@@ nvme_fc_poll(struct blk_mq_hw_ctx *hctx
   }
   
   static void
- nvme_fc_submit_async_event(struct nvme_ctrl *arg, int aer_idx)
+ nvme_fc_submit_async_event(struct nvme_ctrl *arg)
   {
         struct nvme_fc_ctrl *ctrl = to_fc_ctrl(arg);
         struct nvme_fc_fcp_op *aen_op;
@@@ -2118,9 -2390,6 +2390,6 @@@
         bool terminating = false;
         blk_status_t ret;
   
-       if (aer_idx > NVME_FC_NR_AEN_COMMANDS)
-               return;
- 
         spin_lock_irqsave(&ctrl->lock, flags);
         if (ctrl->flags & FCCTRL_TERMIO)
                 terminating = true;
@@@ -2129,13 -2398,13 +2398,13 @@@
         if (terminating)
                 return;
   
-       aen_op = &ctrl->aen_ops[aer_idx];
+       aen_op = &ctrl->aen_ops[0];
   
         ret = nvme_fc_start_fcp_op(ctrl, aen_op->queue, aen_op, 0,
                                         NVMEFC_FCP_NODATA);
         if (ret)
                 dev_err(ctrl->ctrl.device,
-                       "failed async event work [%d]\n", aer_idx);
+                       "failed async event work\n");
   }
   
   static void
@@@ -2337,7 -2606,7 +2606,7 @@@ nvme_fc_reinit_io_queues(struct nvme_fc
   
         nvme_fc_init_io_queues(ctrl);
   
-       ret = blk_mq_reinit_tagset(&ctrl->tag_set, nvme_fc_reinit_request);
+       ret = nvme_reinit_tagset(&ctrl->ctrl, ctrl->ctrl.tagset);
         if (ret)
                 goto out_free_io_queues;
   
@@@ -2360,6 -2629,61 +2629,61 @@@ out_free_io_queues
         return ret;
   }
   
+ static void
+ nvme_fc_rport_active_on_lport(struct nvme_fc_rport *rport)
+ {
+       struct nvme_fc_lport *lport = rport->lport;
+ 
+       atomic_inc(&lport->act_rport_cnt);
+ }
+ 
+ static void
+ nvme_fc_rport_inactive_on_lport(struct nvme_fc_rport *rport)
+ {
+       struct nvme_fc_lport *lport = rport->lport;
+       u32 cnt;
+ 
+       cnt = atomic_dec_return(&lport->act_rport_cnt);
+       if (cnt == 0 && lport->localport.port_state == FC_OBJSTATE_DELETED)
+               lport->ops->localport_delete(&lport->localport);
+ }
+ 
+ static int
+ nvme_fc_ctlr_active_on_rport(struct nvme_fc_ctrl *ctrl)
+ {
+       struct nvme_fc_rport *rport = ctrl->rport;
+       u32 cnt;
+ 
+       if (ctrl->assoc_active)
+               return 1;
+ 
+       ctrl->assoc_active = true;
+       cnt = atomic_inc_return(&rport->act_ctrl_cnt);
+       if (cnt == 1)
+               nvme_fc_rport_active_on_lport(rport);
+ 
+       return 0;
+ }
+ 
+ static int
+ nvme_fc_ctlr_inactive_on_rport(struct nvme_fc_ctrl *ctrl)
+ {
+       struct nvme_fc_rport *rport = ctrl->rport;
+       struct nvme_fc_lport *lport = rport->lport;
+       u32 cnt;
+ 
+       /* ctrl->assoc_active=false will be set independently */
+ 
+       cnt = atomic_dec_return(&rport->act_ctrl_cnt);
+       if (cnt == 0) {
+               if (rport->remoteport.port_state == FC_OBJSTATE_DELETED)
+                       lport->ops->remoteport_delete(&rport->remoteport);
+               nvme_fc_rport_inactive_on_lport(rport);
+       }
+ 
+       return 0;
+ }
+ 
   /*
    * This routine restarts the controller on the host side, and
    * on the link side, recreates the controller association.
@@@ -2368,26 -2692,31 +2692,31 @@@ static in
   nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
   {
         struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
-       u32 segs;
         int ret;
         bool changed;
   
         ++ctrl->ctrl.nr_reconnects;
   
+       if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE)
+               return -ENODEV;
+ 
+       if (nvme_fc_ctlr_active_on_rport(ctrl))
+               return -ENOTUNIQ;
+ 
         /*
          * Create the admin queue
          */
   
-       nvme_fc_init_queue(ctrl, 0, NVME_FC_AQ_BLKMQ_DEPTH);
+       nvme_fc_init_queue(ctrl, 0);
   
         ret = __nvme_fc_create_hw_queue(ctrl, &ctrl->queues[0], 0,
-                               NVME_FC_AQ_BLKMQ_DEPTH);
+                               NVME_AQ_BLK_MQ_DEPTH);
         if (ret)
                 goto out_free_queue;
   
         ret = nvme_fc_connect_admin_queue(ctrl, &ctrl->queues[0],
-                               NVME_FC_AQ_BLKMQ_DEPTH,
-                               (NVME_FC_AQ_BLKMQ_DEPTH / 4));
+                               NVME_AQ_BLK_MQ_DEPTH,
+                               (NVME_AQ_BLK_MQ_DEPTH / 4));
         if (ret)
                 goto out_delete_hw_queue;
   
@@@ -2419,9 -2748,8 +2748,8 @@@
         if (ret)
                 goto out_disconnect_admin_queue;
   
-       segs = min_t(u32, NVME_FC_MAX_SEGMENTS,
-                       ctrl->lport->ops->max_sgl_segments);
-       ctrl->ctrl.max_hw_sectors = (segs - 1) << (PAGE_SHIFT - 9);
+       ctrl->ctrl.max_hw_sectors =
+               (ctrl->lport->ops->max_sgl_segments - 1) << (PAGE_SHIFT - 9);
   
         ret = nvme_init_identify(&ctrl->ctrl);
         if (ret)
@@@ -2465,11 -2793,11 +2793,11 @@@
         }
   
         changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
-       WARN_ON_ONCE(!changed);
   
         ctrl->ctrl.nr_reconnects = 0;
   
-       nvme_start_ctrl(&ctrl->ctrl);
+       if (changed)
+               nvme_start_ctrl(&ctrl->ctrl);
   
         return 0;       /* Success */
   
@@@ -2482,6 -2810,8 +2810,8 @@@ out_delete_hw_queue
         __nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[0], 0);
   out_free_queue:
         nvme_fc_free_queue(&ctrl->queues[0]);
+       ctrl->assoc_active = false;
+       nvme_fc_ctlr_inactive_on_rport(ctrl);
   
         return ret;
   }
@@@ -2497,6 -2827,10 +2827,10 @@@ nvme_fc_delete_association(struct nvme_
   {
         unsigned long flags;
   
+       if (!ctrl->assoc_active)
+               return;
+       ctrl->assoc_active = false;
+ 
         spin_lock_irqsave(&ctrl->lock, flags);
         ctrl->flags |= FCCTRL_TERMIO;
         ctrl->iocnt = 0;
@@@ -2537,7 -2871,8 +2871,8 @@@
          * use blk_mq_tagset_busy_itr() and the transport routine to
          * terminate the exchanges.
          */
-       blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
+       if (ctrl->ctrl.state != NVME_CTRL_NEW)
+               blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
         blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
                                 nvme_fc_terminate_exchange, &ctrl->ctrl);
   
@@@ -2545,10 -2880,10 +2880,10 @@@
         nvme_fc_abort_aen_ops(ctrl);
   
         /* wait for all io that had to be aborted */
- -      spin_lock_irqsave(&ctrl->lock, flags);
+ +      spin_lock_irq(&ctrl->lock);
         wait_event_lock_irq(ctrl->ioabort_wait, ctrl->iocnt == 0, ctrl->lock);
         ctrl->flags &= ~FCCTRL_TERMIO;
- -      spin_unlock_irqrestore(&ctrl->lock, flags);
+ +      spin_unlock_irq(&ctrl->lock);
   
         nvme_fc_term_aen_ops(ctrl);
   
@@@ -2568,102 -2903,64 +2903,64 @@@
   
         __nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[0], 0);
         nvme_fc_free_queue(&ctrl->queues[0]);
+ 
+       nvme_fc_ctlr_inactive_on_rport(ctrl);
   }
   
   static void
- nvme_fc_delete_ctrl_work(struct work_struct *work)
+ nvme_fc_delete_ctrl(struct nvme_ctrl *nctrl)
   {
-       struct nvme_fc_ctrl *ctrl =
-               container_of(work, struct nvme_fc_ctrl, delete_work);
+       struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl);
   
-       cancel_work_sync(&ctrl->ctrl.reset_work);
         cancel_delayed_work_sync(&ctrl->connect_work);
-       nvme_stop_ctrl(&ctrl->ctrl);
-       nvme_remove_namespaces(&ctrl->ctrl);
         /*
          * kill the association on the link side.  this will block
          * waiting for io to terminate
          */
         nvme_fc_delete_association(ctrl);
- 
-       /*
-        * tear down the controller
-        * After the last reference on the nvme ctrl is removed,
-        * the transport nvme_fc_nvme_ctrl_freed() callback will be
-        * invoked. From there, the transport will tear down it's
-        * logical queues and association.
-        */
-       nvme_uninit_ctrl(&ctrl->ctrl);
- 
-       nvme_put_ctrl(&ctrl->ctrl);
- }
- 
- static bool
- __nvme_fc_schedule_delete_work(struct nvme_fc_ctrl *ctrl)
- {
-       if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING))
-               return true;
- 
-       if (!queue_work(nvme_wq, &ctrl->delete_work))
-               return true;
- 
-       return false;
- }
- 
- static int
- __nvme_fc_del_ctrl(struct nvme_fc_ctrl *ctrl)
- {
-       return __nvme_fc_schedule_delete_work(ctrl) ? -EBUSY : 0;
- }
- 
- /*
-  * Request from nvme core layer to delete the controller
-  */
- static int
- nvme_fc_del_nvme_ctrl(struct nvme_ctrl *nctrl)
- {
-       struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl);
-       int ret;
- 
-       if (!kref_get_unless_zero(&ctrl->ctrl.kref))
-               return -EBUSY;
- 
-       ret = __nvme_fc_del_ctrl(ctrl);
- 
-       if (!ret)
-               flush_workqueue(nvme_wq);
- 
-       nvme_put_ctrl(&ctrl->ctrl);
- 
-       return ret;
   }
   
   static void
   nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status)
   {
-       /* If we are resetting/deleting then do nothing */
-       if (ctrl->ctrl.state != NVME_CTRL_RECONNECTING) {
-               WARN_ON_ONCE(ctrl->ctrl.state == NVME_CTRL_NEW ||
-                       ctrl->ctrl.state == NVME_CTRL_LIVE);
-               return;
-       }
+       struct nvme_fc_rport *rport = ctrl->rport;
+       struct nvme_fc_remote_port *portptr = &rport->remoteport;
+       unsigned long recon_delay = ctrl->ctrl.opts->reconnect_delay * HZ;
+       bool recon = true;
   
-       dev_info(ctrl->ctrl.device,
-               "NVME-FC{%d}: reset: Reconnect attempt failed (%d)\n",
-               ctrl->cnum, status);
+       if (ctrl->ctrl.state != NVME_CTRL_RECONNECTING)
+               return;
   
-       if (nvmf_should_reconnect(&ctrl->ctrl)) {
+       if (portptr->port_state == FC_OBJSTATE_ONLINE)
                 dev_info(ctrl->ctrl.device,
-                       "NVME-FC{%d}: Reconnect attempt in %d seconds.\n",
-                       ctrl->cnum, ctrl->ctrl.opts->reconnect_delay);
-               queue_delayed_work(nvme_wq, &ctrl->connect_work,
-                               ctrl->ctrl.opts->reconnect_delay * HZ);
+                       "NVME-FC{%d}: reset: Reconnect attempt failed (%d)\n",
+                       ctrl->cnum, status);
+       else if (time_after_eq(jiffies, rport->dev_loss_end))
+               recon = false;
+ 
+       if (recon && nvmf_should_reconnect(&ctrl->ctrl)) {
+               if (portptr->port_state == FC_OBJSTATE_ONLINE)
+                       dev_info(ctrl->ctrl.device,
+                               "NVME-FC{%d}: Reconnect attempt in %ld "
+                               "seconds\n",
+                               ctrl->cnum, recon_delay / HZ);
+               else if (time_after(jiffies + recon_delay, rport->dev_loss_end))
+                       recon_delay = rport->dev_loss_end - jiffies;
+ 
+               queue_delayed_work(nvme_wq, &ctrl->connect_work, recon_delay);
         } else {
-               dev_warn(ctrl->ctrl.device,
+               if (portptr->port_state == FC_OBJSTATE_ONLINE)
+                       dev_warn(ctrl->ctrl.device,
                                 "NVME-FC{%d}: Max reconnect attempts (%d) "
                                 "reached. Removing controller\n",
                                 ctrl->cnum, ctrl->ctrl.nr_reconnects);
-               WARN_ON(__nvme_fc_schedule_delete_work(ctrl));
+               else
+                       dev_warn(ctrl->ctrl.device,
+                               "NVME-FC{%d}: dev_loss_tmo (%d) expired "
+                               "while waiting for remoteport connectivity. "
+                               "Removing controller\n", ctrl->cnum,
+                               portptr->dev_loss_tmo);
+               WARN_ON(nvme_delete_ctrl(&ctrl->ctrl));
         }
   }
   
@@@ -2675,15 -2972,28 +2972,28 @@@ nvme_fc_reset_ctrl_work(struct work_str
         int ret;
   
         nvme_stop_ctrl(&ctrl->ctrl);
+ 
         /* will block will waiting for io to terminate */
         nvme_fc_delete_association(ctrl);
   
-       ret = nvme_fc_create_association(ctrl);
+       if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING)) {
+               dev_err(ctrl->ctrl.device,
+                       "NVME-FC{%d}: error_recovery: Couldn't change state "
+                       "to RECONNECTING\n", ctrl->cnum);
+               return;
+       }
+ 
+       if (ctrl->rport->remoteport.port_state == FC_OBJSTATE_ONLINE)
+               ret = nvme_fc_create_association(ctrl);
+       else
+               ret = -ENOTCONN;
+ 
         if (ret)
                 nvme_fc_reconnect_or_delete(ctrl, ret);
         else
                 dev_info(ctrl->ctrl.device,
-                       "NVME-FC{%d}: controller reset complete\n", ctrl->cnum);
+                       "NVME-FC{%d}: controller reset complete\n",
+                       ctrl->cnum);
   }
   
   static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = {
@@@ -2695,8 -3005,9 +3005,9 @@@
         .reg_write32            = nvmf_reg_write32,
         .free_ctrl              = nvme_fc_nvme_ctrl_freed,
         .submit_async_event     = nvme_fc_submit_async_event,
-       .delete_ctrl            = nvme_fc_del_nvme_ctrl,
+       .delete_ctrl            = nvme_fc_delete_ctrl,
         .get_address            = nvmf_get_address,
+       .reinit_request         = nvme_fc_reinit_request,
   };
   
   static void
@@@ -2728,13 -3039,40 +3039,40 @@@ static const struct blk_mq_ops nvme_fc_
   };
   
   
+ /*
+  * Fails a controller request if it matches an existing controller
+  * (association) with the same tuple:
+  * <Host NQN, Host ID, local FC port, remote FC port, SUBSYS NQN>
+  *
+  * The ports don't need to be compared as they are intrinsically
+  * already matched by the port pointers supplied.
+  */
+ static bool
+ nvme_fc_existing_controller(struct nvme_fc_rport *rport,
+               struct nvmf_ctrl_options *opts)
+ {
+       struct nvme_fc_ctrl *ctrl;
+       unsigned long flags;
+       bool found = false;
+ 
+       spin_lock_irqsave(&rport->lock, flags);
+       list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list) {
+               found = nvmf_ctlr_matches_baseopts(&ctrl->ctrl, opts);
+               if (found)
+                       break;
+       }
+       spin_unlock_irqrestore(&rport->lock, flags);
+ 
+       return found;
+ }
+ 
   static struct nvme_ctrl *
   nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
         struct nvme_fc_lport *lport, struct nvme_fc_rport *rport)
   {
         struct nvme_fc_ctrl *ctrl;
         unsigned long flags;
- -      int ret, idx;
+ +      int ret, idx, retry;
   
         if (!(rport->remoteport.port_role &
             (FC_PORT_ROLE_NVME_DISCOVERY | FC_PORT_ROLE_NVME_TARGET))) {
@@@ -2742,6 -3080,12 +3080,12 @@@
                 goto out_fail;
         }
   
+       if (!opts->duplicate_connect &&
+           nvme_fc_existing_controller(rport, opts)) {
+               ret = -EALREADY;
+               goto out_fail;
+       }
+ 
         ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);
         if (!ctrl) {
                 ret = -ENOMEM;
@@@ -2760,12 -3104,11 +3104,12 @@@
         ctrl->rport = rport;
         ctrl->dev = lport->dev;
         ctrl->cnum = idx;
+       ctrl->assoc_active = false;
+ +      init_waitqueue_head(&ctrl->ioabort_wait);
   
         get_device(ctrl->dev);
         kref_init(&ctrl->ref);
   
-       INIT_WORK(&ctrl->delete_work, nvme_fc_delete_ctrl_work);
         INIT_WORK(&ctrl->ctrl.reset_work, nvme_fc_reset_ctrl_work);
         INIT_DELAYED_WORK(&ctrl->connect_work, nvme_fc_connect_ctrl_work);
         spin_lock_init(&ctrl->lock);
@@@ -2787,7 -3130,7 +3131,7 @@@
   
         memset(&ctrl->admin_tag_set, 0, sizeof(ctrl->admin_tag_set));
         ctrl->admin_tag_set.ops = &nvme_fc_admin_mq_ops;
-       ctrl->admin_tag_set.queue_depth = NVME_FC_AQ_BLKMQ_DEPTH;
+       ctrl->admin_tag_set.queue_depth = NVME_AQ_MQ_TAG_DEPTH;
         ctrl->admin_tag_set.reserved_tags = 2; /* fabric connect + Keep-Alive */
         ctrl->admin_tag_set.numa_node = NUMA_NO_NODE;
         ctrl->admin_tag_set.cmd_size = sizeof(struct nvme_fc_fcp_op) +
@@@ -2797,6 -3140,7 +3141,7 @@@
         ctrl->admin_tag_set.driver_data = ctrl;
         ctrl->admin_tag_set.nr_hw_queues = 1;
         ctrl->admin_tag_set.timeout = ADMIN_TIMEOUT;
+       ctrl->admin_tag_set.flags = BLK_MQ_F_NO_SCHED;
   
         ret = blk_mq_alloc_tag_set(&ctrl->admin_tag_set);
         if (ret)
@@@ -2826,37 -3170,9 +3171,37 @@@
         list_add_tail(&ctrl->ctrl_list, &rport->ctrl_list);
         spin_unlock_irqrestore(&rport->lock, flags);
   
- -      ret = nvme_fc_create_association(ctrl);
+ +      /*
+ +       * It's possible that transactions used to create the association
+ +       * may fail. Examples: CreateAssociation LS or CreateIOConnection
+ +       * LS gets dropped/corrupted/fails; or a frame gets dropped or a
+ +       * command times out for one of the actions to init the controller
+ +       * (Connect, Get/Set_Property, Set_Features, etc). Many of these
+ +       * transport errors (frame drop, LS failure) inherently must kill
+ +       * the association. The transport is coded so that any command used
+ +       * to create the association (prior to a LIVE state transition
+ +       * while NEW or RECONNECTING) will fail if it completes in error or
+ +       * times out.
+ +       *
+ +       * As such: as the connect request was mostly likely due to a
+ +       * udev event that discovered the remote port, meaning there is
+ +       * not an admin or script there to restart if the connect
+ +       * request fails, retry the initial connection creation up to
+ +       * three times before giving up and declaring failure.
+ +       */
+ +      for (retry = 0; retry < 3; retry++) {
+ +              ret = nvme_fc_create_association(ctrl);
+ +              if (!ret)
+ +                      break;
+ +      }
+ +
         if (ret) {
+ +              /* couldn't schedule retry - fail out */
+ +              dev_err(ctrl->ctrl.device,
+ +                      "NVME-FC{%d}: Connect retry failed\n", ctrl->cnum);
+ +
                 ctrl->ctrl.opts = NULL;
+ +
                 /* initiate nvme ctrl ref counting teardown */
                 nvme_uninit_ctrl(&ctrl->ctrl);
                 nvme_put_ctrl(&ctrl->ctrl);
@@@ -2878,7 -3194,7 +3223,7 @@@
                 return ERR_PTR(ret);
         }
   
-       kref_get(&ctrl->ctrl.kref);
+       nvme_get_ctrl(&ctrl->ctrl);
   
         dev_info(ctrl->ctrl.device,
                 "NVME-FC{%d}: new ctrl: NQN \"%s\"\n",
@@@ -3026,7 -3342,50 +3371,50 @@@ static struct nvmf_transport_ops nvme_f
   
   static int __init nvme_fc_init_module(void)
   {
-       return nvmf_register_transport(&nvme_fc_transport);
+       int ret;
+ 
+       /*
+        * NOTE:
+        * It is expected that in the future the kernel will combine
+        * the FC-isms that are currently under scsi and now being
+        * added to by NVME into a new standalone FC class. The SCSI
+        * and NVME protocols and their devices would be under this
+        * new FC class.
+        *
+        * As we need something to post FC-specific udev events to,
+        * specifically for nvme probe events, start by creating the
+        * new device class.  When the new standalone FC class is
+        * put in place, this code will move to a more generic
+        * location for the class.
+        */
+       fc_class = class_create(THIS_MODULE, "fc");
+       if (IS_ERR(fc_class)) {
+               pr_err("couldn't register class fc\n");
+               return PTR_ERR(fc_class);
+       }
+ 
+       /*
+        * Create a device for the FC-centric udev events
+        */
+       fc_udev_device = device_create(fc_class, NULL, MKDEV(0, 0), NULL,
+                               "fc_udev_device");
+       if (IS_ERR(fc_udev_device)) {
+               pr_err("couldn't create fc_udev device!\n");
+               ret = PTR_ERR(fc_udev_device);
+               goto out_destroy_class;
+       }
+ 
+       ret = nvmf_register_transport(&nvme_fc_transport);
+       if (ret)
+               goto out_destroy_device;
+ 
+       return 0;
+ 
+ out_destroy_device:
+       device_destroy(fc_class, MKDEV(0, 0));
+ out_destroy_class:
+       class_destroy(fc_class);
+       return ret;
   }
   
   static void __exit nvme_fc_exit_module(void)
@@@ -3039,6 -3398,9 +3427,9 @@@
   
         ida_destroy(&nvme_fc_local_port_cnt);
         ida_destroy(&nvme_fc_ctrl_cnt);
+ 
+       device_destroy(fc_class, MKDEV(0, 0));
+       class_destroy(fc_class);
   }
   
   module_init(nvme_fc_init_module);
diff --combined drivers/nvme/host/pci.c

index 3f5a04c586cefdc8096469ba38d325004963b42d,762b8402e04c591df728e6e5e5bffa16ccf0d907..a11cfd470089226cffd01c9c6104afdc876c341a
--- 1/drivers/nvme/host/pci.c
--- 2/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@@ -13,7 -13,6 +13,6 @@@
    */
   
   #include <linux/aer.h>
- #include <linux/bitops.h>
   #include <linux/blkdev.h>
   #include <linux/blk-mq.h>
   #include <linux/blk-mq-pci.h>
@@@ -26,12 -25,9 +25,9 @@@
   #include <linux/mutex.h>
   #include <linux/once.h>
   #include <linux/pci.h>
- #include <linux/poison.h>
   #include <linux/t10-pi.h>
- #include <linux/timer.h>
   #include <linux/types.h>
   #include <linux/io-64-nonatomic-lo-hi.h>
- #include <asm/unaligned.h>
   #include <linux/sed-opal.h>
   
   #include "nvme.h"
@@@ -39,11 -35,7 +35,7 @@@
   #define SQ_SIZE(depth)                (depth * sizeof(struct nvme_command))
   #define CQ_SIZE(depth)                (depth * sizeof(struct nvme_completion))
   
- /*
-  * We handle AEN commands ourselves and don't even let the
-  * block layer know about them.
-  */
- #define NVME_AQ_BLKMQ_DEPTH   (NVME_AQ_DEPTH - NVME_NR_AERS)
+ #define SGES_PER_PAGE (PAGE_SIZE / sizeof(struct nvme_sgl_desc))
   
   static int use_threaded_interrupts;
   module_param(use_threaded_interrupts, int, 0);
@@@ -57,6 -49,12 +49,12 @@@ module_param(max_host_mem_size_mb, uint
   MODULE_PARM_DESC(max_host_mem_size_mb,
         "Maximum Host Memory Buffer (HMB) size per controller (in MiB)");
   
+ static unsigned int sgl_threshold = SZ_32K;
+ module_param(sgl_threshold, uint, 0644);
+ MODULE_PARM_DESC(sgl_threshold,
+               "Use SGLs when average request segment size is larger or equal to "
+               "this size. Use 0 to disable SGLs.");
+ 
   static int io_queue_depth_set(const char *val, const struct kernel_param *kp);
   static const struct kernel_param_ops io_queue_depth_ops = {
         .set = io_queue_depth_set,
@@@ -94,7 -92,7 +92,7 @@@ struct nvme_dev 
         struct mutex shutdown_lock;
         bool subsystem;
         void __iomem *cmb;
- -      dma_addr_t cmb_dma_addr;
+ +      pci_bus_addr_t cmb_bus_addr;
         u64 cmb_size;
         u32 cmbsz;
         u32 cmbloc;
@@@ -178,6 -176,7 +176,7 @@@ struct nvme_queue 
   struct nvme_iod {
         struct nvme_request req;
         struct nvme_queue *nvmeq;
+       bool use_sgl;
         int aborted;
         int npages;             /* In the PRP list. 0 means small pool in use */
         int nents;              /* Used in scatterlist */
@@@ -331,17 -330,35 +330,35 @@@ static int nvme_npages(unsigned size, s
         return DIV_ROUND_UP(8 * nprps, PAGE_SIZE - 8);
   }
   
- static unsigned int nvme_iod_alloc_size(struct nvme_dev *dev,
-               unsigned int size, unsigned int nseg)
+ /*
+  * Calculates the number of pages needed for the SGL segments. For example a 4k
+  * page can accommodate 256 SGL descriptors.
+  */
+ static int nvme_pci_npages_sgl(unsigned int num_seg)
   {
-       return sizeof(__le64 *) * nvme_npages(size, dev) +
-                       sizeof(struct scatterlist) * nseg;
+       return DIV_ROUND_UP(num_seg * sizeof(struct nvme_sgl_desc), PAGE_SIZE);
   }
   
- static unsigned int nvme_cmd_size(struct nvme_dev *dev)
+ static unsigned int nvme_pci_iod_alloc_size(struct nvme_dev *dev,
+               unsigned int size, unsigned int nseg, bool use_sgl)
   {
-       return sizeof(struct nvme_iod) +
-               nvme_iod_alloc_size(dev, NVME_INT_BYTES(dev), NVME_INT_PAGES);
+       size_t alloc_size;
+ 
+       if (use_sgl)
+               alloc_size = sizeof(__le64 *) * nvme_pci_npages_sgl(nseg);
+       else
+               alloc_size = sizeof(__le64 *) * nvme_npages(size, dev);
+ 
+       return alloc_size + sizeof(struct scatterlist) * nseg;
+ }
+ 
+ static unsigned int nvme_pci_cmd_size(struct nvme_dev *dev, bool use_sgl)
+ {
+       unsigned int alloc_size = nvme_pci_iod_alloc_size(dev,
+                                   NVME_INT_BYTES(dev), NVME_INT_PAGES,
+                                   use_sgl);
+ 
+       return sizeof(struct nvme_iod) + alloc_size;
   }
   
   static int nvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
@@@ -425,10 -442,10 +442,10 @@@ static void __nvme_submit_cmd(struct nv
         nvmeq->sq_tail = tail;
   }
   
- static __le64 **iod_list(struct request *req)
+ static void **nvme_pci_iod_list(struct request *req)
   {
         struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
-       return (__le64 **)(iod->sg + blk_rq_nr_phys_segments(req));
+       return (void **)(iod->sg + blk_rq_nr_phys_segments(req));
   }
   
   static blk_status_t nvme_init_iod(struct request *rq, struct nvme_dev *dev)
@@@ -438,7 -455,10 +455,10 @@@
         unsigned int size = blk_rq_payload_bytes(rq);
   
         if (nseg > NVME_INT_PAGES || size > NVME_INT_BYTES(dev)) {
-               iod->sg = kmalloc(nvme_iod_alloc_size(dev, size, nseg), GFP_ATOMIC);
+               size_t alloc_size = nvme_pci_iod_alloc_size(dev, size, nseg,
+                               iod->use_sgl);
+ 
+               iod->sg = kmalloc(alloc_size, GFP_ATOMIC);
                 if (!iod->sg)
                         return BLK_STS_RESOURCE;
         } else {
@@@ -456,18 -476,31 +476,31 @@@
   static void nvme_free_iod(struct nvme_dev *dev, struct request *req)
   {
         struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
-       const int last_prp = dev->ctrl.page_size / 8 - 1;
+       const int last_prp = dev->ctrl.page_size / sizeof(__le64) - 1;
+       dma_addr_t dma_addr = iod->first_dma, next_dma_addr;
+ 
         int i;
-       __le64 **list = iod_list(req);
-       dma_addr_t prp_dma = iod->first_dma;
   
         if (iod->npages == 0)
-               dma_pool_free(dev->prp_small_pool, list[0], prp_dma);
+               dma_pool_free(dev->prp_small_pool, nvme_pci_iod_list(req)[0],
+                       dma_addr);
+ 
         for (i = 0; i < iod->npages; i++) {
-               __le64 *prp_list = list[i];
-               dma_addr_t next_prp_dma = le64_to_cpu(prp_list[last_prp]);
-               dma_pool_free(dev->prp_page_pool, prp_list, prp_dma);
-               prp_dma = next_prp_dma;
+               void *addr = nvme_pci_iod_list(req)[i];
+ 
+               if (iod->use_sgl) {
+                       struct nvme_sgl_desc *sg_list = addr;
+ 
+                       next_dma_addr =
+                           le64_to_cpu((sg_list[SGES_PER_PAGE - 1]).addr);
+               } else {
+                       __le64 *prp_list = addr;
+ 
+                       next_dma_addr = le64_to_cpu(prp_list[last_prp]);
+               }
+ 
+               dma_pool_free(dev->prp_page_pool, addr, dma_addr);
+               dma_addr = next_dma_addr;
         }
   
         if (iod->sg != iod->inline_sg)
@@@ -555,7 -588,8 +588,8 @@@ static void nvme_print_sgl(struct scatt
         }
   }
   
- static blk_status_t nvme_setup_prps(struct nvme_dev *dev, struct request *req)
+ static blk_status_t nvme_pci_setup_prps(struct nvme_dev *dev,
+               struct request *req, struct nvme_rw_command *cmnd)
   {
         struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
         struct dma_pool *pool;
@@@ -566,14 -600,16 +600,16 @@@
         u32 page_size = dev->ctrl.page_size;
         int offset = dma_addr & (page_size - 1);
         __le64 *prp_list;
-       __le64 **list = iod_list(req);
+       void **list = nvme_pci_iod_list(req);
         dma_addr_t prp_dma;
         int nprps, i;
   
+       iod->use_sgl = false;
+ 
         length -= (page_size - offset);
         if (length <= 0) {
                 iod->first_dma = 0;
-               return BLK_STS_OK;
+               goto done;
         }
   
         dma_len -= (page_size - offset);
@@@ -587,7 -623,7 +623,7 @@@
   
         if (length <= page_size) {
                 iod->first_dma = dma_addr;
-               return BLK_STS_OK;
+               goto done;
         }
   
         nprps = DIV_ROUND_UP(length, page_size);
@@@ -634,6 -670,10 +670,10 @@@
                 dma_len = sg_dma_len(sg);
         }
   
+ done:
+       cmnd->dptr.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
+       cmnd->dptr.prp2 = cpu_to_le64(iod->first_dma);
+ 
         return BLK_STS_OK;
   
    bad_sgl:
@@@ -643,6 -683,110 +683,110 @@@
         return BLK_STS_IOERR;
   }
   
+ static void nvme_pci_sgl_set_data(struct nvme_sgl_desc *sge,
+               struct scatterlist *sg)
+ {
+       sge->addr = cpu_to_le64(sg_dma_address(sg));
+       sge->length = cpu_to_le32(sg_dma_len(sg));
+       sge->type = NVME_SGL_FMT_DATA_DESC << 4;
+ }
+ 
+ static void nvme_pci_sgl_set_seg(struct nvme_sgl_desc *sge,
+               dma_addr_t dma_addr, int entries)
+ {
+       sge->addr = cpu_to_le64(dma_addr);
+       if (entries < SGES_PER_PAGE) {
+               sge->length = cpu_to_le32(entries * sizeof(*sge));
+               sge->type = NVME_SGL_FMT_LAST_SEG_DESC << 4;
+       } else {
+               sge->length = cpu_to_le32(PAGE_SIZE);
+               sge->type = NVME_SGL_FMT_SEG_DESC << 4;
+       }
+ }
+ 
+ static blk_status_t nvme_pci_setup_sgls(struct nvme_dev *dev,
+               struct request *req, struct nvme_rw_command *cmd)
+ {
+       struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
+       int length = blk_rq_payload_bytes(req);
+       struct dma_pool *pool;
+       struct nvme_sgl_desc *sg_list;
+       struct scatterlist *sg = iod->sg;
+       int entries = iod->nents, i = 0;
+       dma_addr_t sgl_dma;
+ 
+       iod->use_sgl = true;
+ 
+       /* setting the transfer type as SGL */
+       cmd->flags = NVME_CMD_SGL_METABUF;
+ 
+       if (length == sg_dma_len(sg)) {
+               nvme_pci_sgl_set_data(&cmd->dptr.sgl, sg);
+               return BLK_STS_OK;
+       }
+ 
+       if (entries <= (256 / sizeof(struct nvme_sgl_desc))) {
+               pool = dev->prp_small_pool;
+               iod->npages = 0;
+       } else {
+               pool = dev->prp_page_pool;
+               iod->npages = 1;
+       }
+ 
+       sg_list = dma_pool_alloc(pool, GFP_ATOMIC, &sgl_dma);
+       if (!sg_list) {
+               iod->npages = -1;
+               return BLK_STS_RESOURCE;
+       }
+ 
+       nvme_pci_iod_list(req)[0] = sg_list;
+       iod->first_dma = sgl_dma;
+ 
+       nvme_pci_sgl_set_seg(&cmd->dptr.sgl, sgl_dma, entries);
+ 
+       do {
+               if (i == SGES_PER_PAGE) {
+                       struct nvme_sgl_desc *old_sg_desc = sg_list;
+                       struct nvme_sgl_desc *link = &old_sg_desc[i - 1];
+ 
+                       sg_list = dma_pool_alloc(pool, GFP_ATOMIC, &sgl_dma);
+                       if (!sg_list)
+                               return BLK_STS_RESOURCE;
+ 
+                       i = 0;
+                       nvme_pci_iod_list(req)[iod->npages++] = sg_list;
+                       sg_list[i++] = *link;
+                       nvme_pci_sgl_set_seg(link, sgl_dma, entries);
+               }
+ 
+               nvme_pci_sgl_set_data(&sg_list[i++], sg);
+ 
+               length -= sg_dma_len(sg);
+               sg = sg_next(sg);
+               entries--;
+       } while (length > 0);
+ 
+       WARN_ON(entries > 0);
+       return BLK_STS_OK;
+ }
+ 
+ static inline bool nvme_pci_use_sgls(struct nvme_dev *dev, struct request *req)
+ {
+       struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
+       unsigned int avg_seg_size;
+ 
+       avg_seg_size = DIV_ROUND_UP(blk_rq_payload_bytes(req),
+                       blk_rq_nr_phys_segments(req));
+ 
+       if (!(dev->ctrl.sgls & ((1 << 0) | (1 << 1))))
+               return false;
+       if (!iod->nvmeq->qid)
+               return false;
+       if (!sgl_threshold || avg_seg_size < sgl_threshold)
+               return false;
+       return true;
+ }
+ 
   static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
                 struct nvme_command *cmnd)
   {
@@@ -662,7 -806,11 +806,11 @@@
                                 DMA_ATTR_NO_WARN))
                 goto out;
   
-       ret = nvme_setup_prps(dev, req);
+       if (nvme_pci_use_sgls(dev, req))
+               ret = nvme_pci_setup_sgls(dev, req, &cmnd->rw);
+       else
+               ret = nvme_pci_setup_prps(dev, req, &cmnd->rw);
+ 
         if (ret != BLK_STS_OK)
                 goto out_unmap;
   
@@@ -682,8 -830,6 +830,6 @@@
                         goto out_unmap;
         }
   
-       cmnd->rw.dptr.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
-       cmnd->rw.dptr.prp2 = cpu_to_le64(iod->first_dma);
         if (blk_integrity_rq(req))
                 cmnd->rw.metadata = cpu_to_le64(sg_dma_address(&iod->meta_sg));
         return BLK_STS_OK;
@@@ -804,7 -950,7 +950,7 @@@ static inline void nvme_handle_cqe(stru
          * for them but rather special case them here.
          */
         if (unlikely(nvmeq->qid == 0 &&
-                       cqe->command_id >= NVME_AQ_BLKMQ_DEPTH)) {
+                       cqe->command_id >= NVME_AQ_BLK_MQ_DEPTH)) {
                 nvme_complete_async_event(&nvmeq->dev->ctrl,
                                 cqe->status, &cqe->result);
                 return;
@@@ -897,7 -1043,7 +1043,7 @@@ static int nvme_poll(struct blk_mq_hw_c
         return __nvme_poll(nvmeq, tag);
   }
   
- static void nvme_pci_submit_async_event(struct nvme_ctrl *ctrl, int aer_idx)
+ static void nvme_pci_submit_async_event(struct nvme_ctrl *ctrl)
   {
         struct nvme_dev *dev = to_nvme_dev(ctrl);
         struct nvme_queue *nvmeq = dev->queues[0];
@@@ -905,7 -1051,7 +1051,7 @@@
   
         memset(&c, 0, sizeof(c));
         c.common.opcode = nvme_admin_async_event;
-       c.common.command_id = NVME_AQ_BLKMQ_DEPTH + aer_idx;
+       c.common.command_id = NVME_AQ_BLK_MQ_DEPTH;
   
         spin_lock_irq(&nvmeq->q_lock);
         __nvme_submit_cmd(nvmeq, &c);
@@@ -930,7 -1076,7 +1076,7 @@@ static int adapter_alloc_cq(struct nvme
         int flags = NVME_QUEUE_PHYS_CONTIG | NVME_CQ_IRQ_ENABLED;
   
         /*
-        * Note: we (ab)use the fact the the prp fields survive if no data
+        * Note: we (ab)use the fact that the prp fields survive if no data
          * is attached to the request.
          */
         memset(&c, 0, sizeof(c));
@@@ -951,7 -1097,7 +1097,7 @@@ static int adapter_alloc_sq(struct nvme
         int flags = NVME_QUEUE_PHYS_CONTIG;
   
         /*
-        * Note: we (ab)use the fact the the prp fields survive if no data
+        * Note: we (ab)use the fact that the prp fields survive if no data
          * is attached to the request.
          */
         memset(&c, 0, sizeof(c));
@@@ -1226,7 -1372,7 +1372,7 @@@ static int nvme_alloc_sq_cmds(struct nv
         if (qid && dev->cmb && use_cmb_sqes && NVME_CMB_SQS(dev->cmbsz)) {
                 unsigned offset = (qid - 1) * roundup(SQ_SIZE(depth),
                                                       dev->ctrl.page_size);
- -              nvmeq->sq_dma_addr = dev->cmb_dma_addr + offset;
+ +              nvmeq->sq_dma_addr = dev->cmb_bus_addr + offset;
                 nvmeq->sq_cmds_io = dev->cmb + offset;
         } else {
                 nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(depth),
@@@ -1372,14 -1518,10 +1518,10 @@@ static int nvme_alloc_admin_tags(struc
                 dev->admin_tagset.ops = &nvme_mq_admin_ops;
                 dev->admin_tagset.nr_hw_queues = 1;
   
-               /*
-                * Subtract one to leave an empty queue entry for 'Full Queue'
-                * condition. See NVM-Express 1.2 specification, section 4.1.2.
-                */
-               dev->admin_tagset.queue_depth = NVME_AQ_BLKMQ_DEPTH - 1;
+               dev->admin_tagset.queue_depth = NVME_AQ_MQ_TAG_DEPTH;
                 dev->admin_tagset.timeout = ADMIN_TIMEOUT;
                 dev->admin_tagset.numa_node = dev_to_node(dev->dev);
-               dev->admin_tagset.cmd_size = nvme_cmd_size(dev);
+               dev->admin_tagset.cmd_size = nvme_pci_cmd_size(dev, false);
                 dev->admin_tagset.flags = BLK_MQ_F_NO_SCHED;
                 dev->admin_tagset.driver_data = dev;
   
@@@ -1527,7 -1669,7 +1669,7 @@@ static void __iomem *nvme_map_cmb(struc
         resource_size_t bar_size;
         struct pci_dev *pdev = to_pci_dev(dev->dev);
         void __iomem *cmb;
- -      dma_addr_t dma_addr;
+ +      int bar;
   
         dev->cmbsz = readl(dev->bar + NVME_REG_CMBSZ);
         if (!(NVME_CMB_SZ(dev->cmbsz)))
@@@ -1540,8 -1682,7 +1682,8 @@@
         szu = (u64)1 << (12 + 4 * NVME_CMB_SZU(dev->cmbsz));
         size = szu * NVME_CMB_SZ(dev->cmbsz);
         offset = szu * NVME_CMB_OFST(dev->cmbloc);
- -      bar_size = pci_resource_len(pdev, NVME_CMB_BIR(dev->cmbloc));
+ +      bar = NVME_CMB_BIR(dev->cmbloc);
+ +      bar_size = pci_resource_len(pdev, bar);
   
         if (offset > bar_size)
                 return NULL;
@@@ -1554,11 -1695,12 +1696,11 @@@
         if (size > bar_size - offset)
                 size = bar_size - offset;
   
- -      dma_addr = pci_resource_start(pdev, NVME_CMB_BIR(dev->cmbloc)) + offset;
- -      cmb = ioremap_wc(dma_addr, size);
+ +      cmb = ioremap_wc(pci_resource_start(pdev, bar) + offset, size);
         if (!cmb)
                 return NULL;
   
- -      dev->cmb_dma_addr = dma_addr;
+ +      dev->cmb_bus_addr = pci_bus_address(pdev, bar) + offset;
         dev->cmb_size = size;
         return cmb;
   }
@@@ -1906,7 -2048,11 +2048,11 @@@ static int nvme_dev_add(struct nvme_de
                 dev->tagset.numa_node = dev_to_node(dev->dev);
                 dev->tagset.queue_depth =
                                 min_t(int, dev->q_depth, BLK_MQ_MAX_DEPTH) - 1;
-               dev->tagset.cmd_size = nvme_cmd_size(dev);
+               dev->tagset.cmd_size = nvme_pci_cmd_size(dev, false);
+               if ((dev->ctrl.sgls & ((1 << 0) | (1 << 1))) && sgl_threshold) {
+                       dev->tagset.cmd_size = max(dev->tagset.cmd_size,
+                                       nvme_pci_cmd_size(dev, true));
+               }
                 dev->tagset.flags = BLK_MQ_F_SHOULD_MERGE;
                 dev->tagset.driver_data = dev;
   
@@@ -2132,9 -2278,9 +2278,9 @@@ static void nvme_remove_dead_ctrl(struc
   {
         dev_warn(dev->ctrl.device, "Removing after probe failure status: %d\n", status);
   
-       kref_get(&dev->ctrl.kref);
+       nvme_get_ctrl(&dev->ctrl);
         nvme_dev_disable(dev, false);
-       if (!schedule_work(&dev->remove_work))
+       if (!queue_work(nvme_wq, &dev->remove_work))
                 nvme_put_ctrl(&dev->ctrl);
   }
   
@@@ -2557,6 -2703,7 +2703,7 @@@ static int __init nvme_init(void
   static void __exit nvme_exit(void)
   {
         pci_unregister_driver(&nvme_driver);
+       flush_workqueue(nvme_wq);
         _nvme_check_size();
   }
   
diff --combined drivers/nvme/host/rdma.c

index 0ebb539f3bd3a7d7a6e18753bc4fefa402a1b626,c8d854474a5b6ce50b37d4e63ef248ff219cce0a..4f9bf2f815c399f3f7f39d5b6d485dbe75a2466f
--- 1/drivers/nvme/host/rdma.c
--- 2/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@@ -41,17 -41,9 +41,9 @@@
   
   #define NVME_RDMA_MAX_INLINE_SEGMENTS 1
   
- /*
-  * We handle AEN commands ourselves and don't even let the
-  * block layer know about them.
-  */
- #define NVME_RDMA_NR_AEN_COMMANDS      1
- #define NVME_RDMA_AQ_BLKMQ_DEPTH       \
-       (NVME_AQ_DEPTH - NVME_RDMA_NR_AEN_COMMANDS)
- 
   struct nvme_rdma_device {
-       struct ib_device       *dev;
-       struct ib_pd           *pd;
+       struct ib_device        *dev;
+       struct ib_pd            *pd;
         struct kref             ref;
         struct list_head        entry;
   };
@@@ -79,8 -71,8 +71,8 @@@ struct nvme_rdma_request 
   };
   
   enum nvme_rdma_queue_flags {
-       NVME_RDMA_Q_LIVE                = 0,
-       NVME_RDMA_Q_DELETING            = 1,
+       NVME_RDMA_Q_ALLOCATED           = 0,
+       NVME_RDMA_Q_LIVE                = 1,
   };
   
   struct nvme_rdma_queue {
@@@ -105,7 -97,6 +97,6 @@@ struct nvme_rdma_ctrl 
   
         /* other member variables */
         struct blk_mq_tag_set   tag_set;
-       struct work_struct      delete_work;
         struct work_struct      err_work;
   
         struct nvme_rdma_qe     async_event_sqe;
@@@ -274,6 -265,9 +265,9 @@@ static int nvme_rdma_reinit_request(voi
         struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
         int ret = 0;
   
+       if (WARN_ON_ONCE(!req->mr))
+               return 0;
+ 
         ib_dereg_mr(req->mr);
   
         req->mr = ib_alloc_mr(dev->pd, IB_MR_TYPE_MEM_REG,
@@@ -434,11 -428,9 +428,9 @@@ out_err
   
   static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue)
   {
-       struct nvme_rdma_device *dev;
-       struct ib_device *ibdev;
+       struct nvme_rdma_device *dev = queue->device;
+       struct ib_device *ibdev = dev->dev;
   
-       dev = queue->device;
-       ibdev = dev->dev;
         rdma_destroy_qp(queue->cm_id);
         ib_free_cq(queue->ib_cq);
   
@@@ -493,7 -485,7 +485,7 @@@ static int nvme_rdma_create_queue_ib(st
         return 0;
   
   out_destroy_qp:
-       ib_destroy_qp(queue->qp);
+       rdma_destroy_qp(queue->cm_id);
   out_destroy_ib_cq:
         ib_free_cq(queue->ib_cq);
   out_put_dev:
@@@ -544,11 -536,11 +536,11 @@@ static int nvme_rdma_alloc_queue(struc
         ret = nvme_rdma_wait_for_cm(queue);
         if (ret) {
                 dev_info(ctrl->ctrl.device,
-                       "rdma_resolve_addr wait failed (%d).\n", ret);
+                       "rdma connection establishment failed (%d)\n", ret);
                 goto out_destroy_cm_id;
         }
   
-       clear_bit(NVME_RDMA_Q_DELETING, &queue->flags);
+       set_bit(NVME_RDMA_Q_ALLOCATED, &queue->flags);
   
         return 0;
   
@@@ -568,15 -560,9 +560,15 @@@ static void nvme_rdma_stop_queue(struc
   
   static void nvme_rdma_free_queue(struct nvme_rdma_queue *queue)
   {
-       if (test_and_set_bit(NVME_RDMA_Q_DELETING, &queue->flags))
+       if (!test_and_clear_bit(NVME_RDMA_Q_ALLOCATED, &queue->flags))
                 return;
   
+ +      if (nvme_rdma_queue_idx(queue) == 0) {
+ +              nvme_rdma_free_qe(queue->device->dev,
+ +                      &queue->ctrl->async_event_sqe,
+ +                      sizeof(struct nvme_command), DMA_TO_DEVICE);
+ +      }
+ +
         nvme_rdma_destroy_queue_ib(queue);
         rdma_destroy_id(queue->cm_id);
   }
@@@ -676,11 -662,10 +668,10 @@@ out_free_queues
         return ret;
   }
   
- static void nvme_rdma_free_tagset(struct nvme_ctrl *nctrl, bool admin)
+ static void nvme_rdma_free_tagset(struct nvme_ctrl *nctrl,
+               struct blk_mq_tag_set *set)
   {
         struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
-       struct blk_mq_tag_set *set = admin ?
-                       &ctrl->admin_tag_set : &ctrl->tag_set;
   
         blk_mq_free_tag_set(set);
         nvme_rdma_dev_put(ctrl->device);
@@@ -697,7 -682,7 +688,7 @@@ static struct blk_mq_tag_set *nvme_rdma
                 set = &ctrl->admin_tag_set;
                 memset(set, 0, sizeof(*set));
                 set->ops = &nvme_rdma_admin_mq_ops;
-               set->queue_depth = NVME_RDMA_AQ_BLKMQ_DEPTH;
+               set->queue_depth = NVME_AQ_MQ_TAG_DEPTH;
                 set->reserved_tags = 2; /* connect + keep-alive */
                 set->numa_node = NUMA_NO_NODE;
                 set->cmd_size = sizeof(struct nvme_rdma_request) +
@@@ -705,6 -690,7 +696,7 @@@
                 set->driver_data = ctrl;
                 set->nr_hw_queues = 1;
                 set->timeout = ADMIN_TIMEOUT;
+               set->flags = BLK_MQ_F_NO_SCHED;
         } else {
                 set = &ctrl->tag_set;
                 memset(set, 0, sizeof(*set));
@@@ -745,10 -731,12 +737,10 @@@ out
   static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl,
                 bool remove)
   {
- -      nvme_rdma_free_qe(ctrl->queues[0].device->dev, &ctrl->async_event_sqe,
- -                      sizeof(struct nvme_command), DMA_TO_DEVICE);
         nvme_rdma_stop_queue(&ctrl->queues[0]);
         if (remove) {
                 blk_cleanup_queue(ctrl->ctrl.admin_q);
-               nvme_rdma_free_tagset(&ctrl->ctrl, true);
+               nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.admin_tagset);
         }
         nvme_rdma_free_queue(&ctrl->queues[0]);
   }
@@@ -769,10 -757,8 +761,10 @@@ static int nvme_rdma_configure_admin_qu
   
         if (new) {
                 ctrl->ctrl.admin_tagset = nvme_rdma_alloc_tagset(&ctrl->ctrl, true);
- -              if (IS_ERR(ctrl->ctrl.admin_tagset))
+ +              if (IS_ERR(ctrl->ctrl.admin_tagset)) {
+ +                      error = PTR_ERR(ctrl->ctrl.admin_tagset);
                         goto out_free_queue;
+ +              }
   
                 ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set);
                 if (IS_ERR(ctrl->ctrl.admin_q)) {
@@@ -780,8 -766,7 +772,7 @@@
                         goto out_free_tagset;
                 }
         } else {
-               error = blk_mq_reinit_tagset(&ctrl->admin_tag_set,
-                                            nvme_rdma_reinit_request);
+               error = nvme_reinit_tagset(&ctrl->ctrl, ctrl->ctrl.admin_tagset);
                 if (error)
                         goto out_free_queue;
         }
@@@ -825,7 -810,7 +816,7 @@@ out_cleanup_queue
                 blk_cleanup_queue(ctrl->ctrl.admin_q);
   out_free_tagset:
         if (new)
-               nvme_rdma_free_tagset(&ctrl->ctrl, true);
+               nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.admin_tagset);
   out_free_queue:
         nvme_rdma_free_queue(&ctrl->queues[0]);
         return error;
@@@ -837,7 -822,7 +828,7 @@@ static void nvme_rdma_destroy_io_queues
         nvme_rdma_stop_io_queues(ctrl);
         if (remove) {
                 blk_cleanup_queue(ctrl->ctrl.connect_q);
-               nvme_rdma_free_tagset(&ctrl->ctrl, false);
+               nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.tagset);
         }
         nvme_rdma_free_io_queues(ctrl);
   }
@@@ -852,10 -837,8 +843,10 @@@ static int nvme_rdma_configure_io_queue
   
         if (new) {
                 ctrl->ctrl.tagset = nvme_rdma_alloc_tagset(&ctrl->ctrl, false);
- -              if (IS_ERR(ctrl->ctrl.tagset))
+ +              if (IS_ERR(ctrl->ctrl.tagset)) {
+ +                      ret = PTR_ERR(ctrl->ctrl.tagset);
                         goto out_free_io_queues;
+ +              }
   
                 ctrl->ctrl.connect_q = blk_mq_init_queue(&ctrl->tag_set);
                 if (IS_ERR(ctrl->ctrl.connect_q)) {
@@@ -863,8 -846,7 +854,7 @@@
                         goto out_free_tag_set;
                 }
         } else {
-               ret = blk_mq_reinit_tagset(&ctrl->tag_set,
-                                          nvme_rdma_reinit_request);
+               ret = nvme_reinit_tagset(&ctrl->ctrl, ctrl->ctrl.tagset);
                 if (ret)
                         goto out_free_io_queues;
   
@@@ -883,7 -865,7 +873,7 @@@ out_cleanup_connect_q
                 blk_cleanup_queue(ctrl->ctrl.connect_q);
   out_free_tag_set:
         if (new)
-               nvme_rdma_free_tagset(&ctrl->ctrl, false);
+               nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.tagset);
   out_free_io_queues:
         nvme_rdma_free_io_queues(ctrl);
         return ret;
@@@ -922,7 -904,7 +912,7 @@@ static void nvme_rdma_reconnect_or_remo
                                 ctrl->ctrl.opts->reconnect_delay * HZ);
         } else {
                 dev_info(ctrl->ctrl.device, "Removing controller...\n");
-               queue_work(nvme_wq, &ctrl->delete_work);
+               nvme_delete_ctrl(&ctrl->ctrl);
         }
   }
   
@@@ -935,10 -917,6 +925,6 @@@ static void nvme_rdma_reconnect_ctrl_wo
   
         ++ctrl->ctrl.nr_reconnects;
   
-       if (ctrl->ctrl.queue_count > 1)
-               nvme_rdma_destroy_io_queues(ctrl, false);
- 
-       nvme_rdma_destroy_admin_queue(ctrl, false);
         ret = nvme_rdma_configure_admin_queue(ctrl, false);
         if (ret)
                 goto requeue;
@@@ -946,7 -924,7 +932,7 @@@
         if (ctrl->ctrl.queue_count > 1) {
                 ret = nvme_rdma_configure_io_queues(ctrl, false);
                 if (ret)
-                       goto requeue;
+                       goto destroy_admin;
         }
   
         changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
@@@ -956,14 -934,17 +942,17 @@@
                 return;
         }
   
-       ctrl->ctrl.nr_reconnects = 0;
- 
         nvme_start_ctrl(&ctrl->ctrl);
   
-       dev_info(ctrl->ctrl.device, "Successfully reconnected\n");
+       dev_info(ctrl->ctrl.device, "Successfully reconnected (%d attempts)\n",
+                       ctrl->ctrl.nr_reconnects);
+ 
+       ctrl->ctrl.nr_reconnects = 0;
   
         return;
   
+ destroy_admin:
+       nvme_rdma_destroy_admin_queue(ctrl, false);
   requeue:
         dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n",
                         ctrl->ctrl.nr_reconnects);
@@@ -979,17 -960,15 +968,15 @@@ static void nvme_rdma_error_recovery_wo
   
         if (ctrl->ctrl.queue_count > 1) {
                 nvme_stop_queues(&ctrl->ctrl);
-               nvme_rdma_stop_io_queues(ctrl);
-       }
-       blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
-       nvme_rdma_stop_queue(&ctrl->queues[0]);
- 
-       /* We must take care of fastfail/requeue all our inflight requests */
-       if (ctrl->ctrl.queue_count > 1)
                 blk_mq_tagset_busy_iter(&ctrl->tag_set,
                                         nvme_cancel_request, &ctrl->ctrl);
+               nvme_rdma_destroy_io_queues(ctrl, false);
+       }
+ 
+       blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
         blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
                                 nvme_cancel_request, &ctrl->ctrl);
+       nvme_rdma_destroy_admin_queue(ctrl, false);
   
         /*
          * queues are not a live anymore, so restart the queues to fail fast
@@@ -1065,7 -1044,7 +1052,7 @@@ static void nvme_rdma_unmap_data(struc
         if (!blk_rq_bytes(rq))
                 return;
   
-       if (req->mr->need_inval) {
+       if (req->mr->need_inval && test_bit(NVME_RDMA_Q_LIVE, &req->queue->flags)) {
                 res = nvme_rdma_inv_rkey(queue, req);
                 if (unlikely(res < 0)) {
                         dev_err(ctrl->ctrl.device,
@@@ -1314,7 -1293,7 +1301,7 @@@ static struct blk_mq_tags *nvme_rdma_ta
         return queue->ctrl->tag_set.tags[queue_idx - 1];
   }
   
- static void nvme_rdma_submit_async_event(struct nvme_ctrl *arg, int aer_idx)
+ static void nvme_rdma_submit_async_event(struct nvme_ctrl *arg)
   {
         struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(arg);
         struct nvme_rdma_queue *queue = &ctrl->queues[0];
@@@ -1324,14 -1303,11 +1311,11 @@@
         struct ib_sge sge;
         int ret;
   
-       if (WARN_ON_ONCE(aer_idx != 0))
-               return;
- 
         ib_dma_sync_single_for_cpu(dev, sqe->dma, sizeof(*cmd), DMA_TO_DEVICE);
   
         memset(cmd, 0, sizeof(*cmd));
         cmd->common.opcode = nvme_admin_async_event;
-       cmd->common.command_id = NVME_RDMA_AQ_BLKMQ_DEPTH;
+       cmd->common.command_id = NVME_AQ_BLK_MQ_DEPTH;
         cmd->common.flags |= NVME_CMD_SGL_METABUF;
         nvme_rdma_set_sg_null(cmd);
   
@@@ -1393,7 -1369,7 +1377,7 @@@ static int __nvme_rdma_recv_done(struc
          * for them but rather special case them here.
          */
         if (unlikely(nvme_rdma_queue_idx(queue) == 0 &&
-                       cqe->command_id >= NVME_RDMA_AQ_BLKMQ_DEPTH))
+                       cqe->command_id >= NVME_AQ_BLK_MQ_DEPTH))
                 nvme_complete_async_event(&queue->ctrl->ctrl, cqe->status,
                                 &cqe->result);
         else
@@@ -1590,6 -1566,10 +1574,10 @@@ nvme_rdma_timeout(struct request *rq, b
   {
         struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
   
+       dev_warn(req->queue->ctrl->ctrl.device,
+                "I/O %d QID %d timeout, reset controller\n",
+                rq->tag, nvme_rdma_queue_idx(req->queue));
+ 
         /* queue error recovery */
         nvme_rdma_error_recovery(req->queue->ctrl);
   
@@@ -1614,15 -1594,12 +1602,15 @@@ nvme_rdma_queue_is_ready(struct nvme_rd
                         /*
                          * reconnecting state means transport disruption, which
                          * can take a long time and even might fail permanently,
- -                       * so we can't let incoming I/O be requeued forever.
- -                       * fail it fast to allow upper layers a chance to
- -                       * failover.
+ +                       * fail fast to give upper layers a chance to failover.
+ +                       * deleting state means that the ctrl will never accept
+ +                       * commands again, fail it permanently.
                          */
- -                      if (queue->ctrl->ctrl.state == NVME_CTRL_RECONNECTING)
+ +                      if (queue->ctrl->ctrl.state == NVME_CTRL_RECONNECTING ||
+ +                          queue->ctrl->ctrl.state == NVME_CTRL_DELETING) {
+ +                              nvme_req(rq)->status = NVME_SC_ABORT_REQ;
                                 return BLK_STS_IOERR;
+ +                      }
                         return BLK_STS_RESOURCE; /* try again later */
                 }
         }
@@@ -1767,50 -1744,9 +1755,9 @@@ static void nvme_rdma_shutdown_ctrl(str
         nvme_rdma_destroy_admin_queue(ctrl, shutdown);
   }
   
- static void nvme_rdma_remove_ctrl(struct nvme_rdma_ctrl *ctrl)
+ static void nvme_rdma_delete_ctrl(struct nvme_ctrl *ctrl)
   {
-       nvme_remove_namespaces(&ctrl->ctrl);
-       nvme_rdma_shutdown_ctrl(ctrl, true);
-       nvme_uninit_ctrl(&ctrl->ctrl);
-       nvme_put_ctrl(&ctrl->ctrl);
- }
- 
- static void nvme_rdma_del_ctrl_work(struct work_struct *work)
- {
-       struct nvme_rdma_ctrl *ctrl = container_of(work,
-                               struct nvme_rdma_ctrl, delete_work);
- 
-       nvme_stop_ctrl(&ctrl->ctrl);
-       nvme_rdma_remove_ctrl(ctrl);
- }
- 
- static int __nvme_rdma_del_ctrl(struct nvme_rdma_ctrl *ctrl)
- {
-       if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING))
-               return -EBUSY;
- 
-       if (!queue_work(nvme_wq, &ctrl->delete_work))
-               return -EBUSY;
- 
-       return 0;
- }
- 
- static int nvme_rdma_del_ctrl(struct nvme_ctrl *nctrl)
- {
-       struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
-       int ret = 0;
- 
-       /*
-        * Keep a reference until all work is flushed since
-        * __nvme_rdma_del_ctrl can free the ctrl mem
-        */
-       if (!kref_get_unless_zero(&ctrl->ctrl.kref))
-               return -EBUSY;
-       ret = __nvme_rdma_del_ctrl(ctrl);
-       if (!ret)
-               flush_work(&ctrl->delete_work);
-       nvme_put_ctrl(&ctrl->ctrl);
-       return ret;
+       nvme_rdma_shutdown_ctrl(to_rdma_ctrl(ctrl), true);
   }
   
   static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
@@@ -1834,7 -1770,11 +1781,11 @@@
         }
   
         changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
-       WARN_ON_ONCE(!changed);
+       if (!changed) {
+               /* state change failure is ok if we're in DELETING state */
+               WARN_ON_ONCE(ctrl->ctrl.state != NVME_CTRL_DELETING);
+               return;
+       }
   
         nvme_start_ctrl(&ctrl->ctrl);
   
@@@ -1842,7 -1782,10 +1793,10 @@@
   
   out_fail:
         dev_warn(ctrl->ctrl.device, "Removing after reset failure\n");
-       nvme_rdma_remove_ctrl(ctrl);
+       nvme_remove_namespaces(&ctrl->ctrl);
+       nvme_rdma_shutdown_ctrl(ctrl, true);
+       nvme_uninit_ctrl(&ctrl->ctrl);
+       nvme_put_ctrl(&ctrl->ctrl);
   }
   
   static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
@@@ -1854,10 -1797,88 +1808,88 @@@
         .reg_write32            = nvmf_reg_write32,
         .free_ctrl              = nvme_rdma_free_ctrl,
         .submit_async_event     = nvme_rdma_submit_async_event,
-       .delete_ctrl            = nvme_rdma_del_ctrl,
+       .delete_ctrl            = nvme_rdma_delete_ctrl,
         .get_address            = nvmf_get_address,
+       .reinit_request         = nvme_rdma_reinit_request,
   };
   
+ static inline bool
+ __nvme_rdma_options_match(struct nvme_rdma_ctrl *ctrl,
+       struct nvmf_ctrl_options *opts)
+ {
+       char *stdport = __stringify(NVME_RDMA_IP_PORT);
+ 
+ 
+       if (!nvmf_ctlr_matches_baseopts(&ctrl->ctrl, opts) ||
+           strcmp(opts->traddr, ctrl->ctrl.opts->traddr))
+               return false;
+ 
+       if (opts->mask & NVMF_OPT_TRSVCID &&
+           ctrl->ctrl.opts->mask & NVMF_OPT_TRSVCID) {
+               if (strcmp(opts->trsvcid, ctrl->ctrl.opts->trsvcid))
+                       return false;
+       } else if (opts->mask & NVMF_OPT_TRSVCID) {
+               if (strcmp(opts->trsvcid, stdport))
+                       return false;
+       } else if (ctrl->ctrl.opts->mask & NVMF_OPT_TRSVCID) {
+               if (strcmp(stdport, ctrl->ctrl.opts->trsvcid))
+                       return false;
+       }
+       /* else, it's a match as both have stdport. Fall to next checks */
+ 
+       /*
+        * checking the local address is rough. In most cases, one
+        * is not specified and the host port is selected by the stack.
+        *
+        * Assume no match if:
+        *  local address is specified and address is not the same
+        *  local address is not specified but remote is, or vice versa
+        *    (admin using specific host_traddr when it matters).
+        */
+       if (opts->mask & NVMF_OPT_HOST_TRADDR &&
+           ctrl->ctrl.opts->mask & NVMF_OPT_HOST_TRADDR) {
+               if (strcmp(opts->host_traddr, ctrl->ctrl.opts->host_traddr))
+                       return false;
+       } else if (opts->mask & NVMF_OPT_HOST_TRADDR ||
+                  ctrl->ctrl.opts->mask & NVMF_OPT_HOST_TRADDR)
+               return false;
+       /*
+        * if neither controller had an host port specified, assume it's
+        * a match as everything else matched.
+        */
+ 
+       return true;
+ }
+ 
+ /*
+  * Fails a connection request if it matches an existing controller
+  * (association) with the same tuple:
+  * <Host NQN, Host ID, local address, remote address, remote port, SUBSYS NQN>
+  *
+  * if local address is not specified in the request, it will match an
+  * existing controller with all the other parameters the same and no
+  * local port address specified as well.
+  *
+  * The ports don't need to be compared as they are intrinsically
+  * already matched by the port pointers supplied.
+  */
+ static bool
+ nvme_rdma_existing_controller(struct nvmf_ctrl_options *opts)
+ {
+       struct nvme_rdma_ctrl *ctrl;
+       bool found = false;
+ 
+       mutex_lock(&nvme_rdma_ctrl_mutex);
+       list_for_each_entry(ctrl, &nvme_rdma_ctrl_list, list) {
+               found = __nvme_rdma_options_match(ctrl, opts);
+               if (found)
+                       break;
+       }
+       mutex_unlock(&nvme_rdma_ctrl_mutex);
+ 
+       return found;
+ }
+ 
   static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
                 struct nvmf_ctrl_options *opts)
   {
@@@ -1894,6 -1915,11 +1926,11 @@@
                 }
         }
   
+       if (!opts->duplicate_connect && nvme_rdma_existing_controller(opts)) {
+               ret = -EALREADY;
+               goto out_free_ctrl;
+       }
+ 
         ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_rdma_ctrl_ops,
                                 0 /* no quirks, we're perfect! */);
         if (ret)
@@@ -1902,7 -1928,6 +1939,6 @@@
         INIT_DELAYED_WORK(&ctrl->reconnect_work,
                         nvme_rdma_reconnect_ctrl_work);
         INIT_WORK(&ctrl->err_work, nvme_rdma_error_recovery_work);
-       INIT_WORK(&ctrl->delete_work, nvme_rdma_del_ctrl_work);
         INIT_WORK(&ctrl->ctrl.reset_work, nvme_rdma_reset_ctrl_work);
   
         ctrl->ctrl.queue_count = opts->nr_io_queues + 1; /* +1 for admin queue */
@@@ -1961,7 -1986,7 +1997,7 @@@
         dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISpcs\n",
                 ctrl->ctrl.opts->subsysnqn, &ctrl->addr);
   
-       kref_get(&ctrl->ctrl.kref);
+       nvme_get_ctrl(&ctrl->ctrl);
   
         mutex_lock(&nvme_rdma_ctrl_mutex);
         list_add_tail(&ctrl->list, &nvme_rdma_ctrl_list);
@@@ -2006,7 -2031,7 +2042,7 @@@ static void nvme_rdma_remove_one(struc
                 dev_info(ctrl->ctrl.device,
                         "Removing ctrl: NQN \"%s\", addr %pISp\n",
                         ctrl->ctrl.opts->subsysnqn, &ctrl->addr);
-               __nvme_rdma_del_ctrl(ctrl);
+               nvme_delete_ctrl(&ctrl->ctrl);
         }
         mutex_unlock(&nvme_rdma_ctrl_mutex);
   
diff --combined drivers/nvme/target/core.c

index 645ba7eee35db7a66a0249d39c7adba514173229,22a2a2bb40f9e58a04902e8f5d617be39af15332..b54748ad5f4800cdda6ee83372fd16f2eb574fd3
--- 1/drivers/nvme/target/core.c
--- 2/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@@ -57,6 -57,17 +57,17 @@@ u16 nvmet_copy_from_sgl(struct nvmet_re
         return 0;
   }
   
+ static unsigned int nvmet_max_nsid(struct nvmet_subsys *subsys)
+ {
+       struct nvmet_ns *ns;
+ 
+       if (list_empty(&subsys->namespaces))
+               return 0;
+ 
+       ns = list_last_entry(&subsys->namespaces, struct nvmet_ns, dev_link);
+       return ns->nsid;
+ }
+ 
   static u32 nvmet_async_event_result(struct nvmet_async_event *aen)
   {
         return aen->event_type | (aen->event_info << 8) | (aen->log_page << 16);
@@@ -334,6 -345,8 +345,8 @@@ void nvmet_ns_disable(struct nvmet_ns *
   
         ns->enabled = false;
         list_del_rcu(&ns->dev_link);
+       if (ns->nsid == subsys->max_nsid)
+               subsys->max_nsid = nvmet_max_nsid(subsys);
         mutex_unlock(&subsys->lock);
   
         /*
@@@ -387,21 -400,12 +400,21 @@@ struct nvmet_ns *nvmet_ns_alloc(struct 
   
   static void __nvmet_req_complete(struct nvmet_req *req, u16 status)
   {
+ +      u32 old_sqhd, new_sqhd;
+ +      u16 sqhd;
+ +
         if (status)
                 nvmet_set_status(req, status);
   
- -      if (req->sq->size)
- -              req->sq->sqhd = (req->sq->sqhd + 1) % req->sq->size;
- -      req->rsp->sq_head = cpu_to_le16(req->sq->sqhd);
+ +      if (req->sq->size) {
+ +              do {
+ +                      old_sqhd = req->sq->sqhd;
+ +                      new_sqhd = (old_sqhd + 1) % req->sq->size;
+ +              } while (cmpxchg(&req->sq->sqhd, old_sqhd, new_sqhd) !=
+ +                                      old_sqhd);
+ +      }
+ +      sqhd = req->sq->sqhd & 0x0000FFFF;
+ +      req->rsp->sq_head = cpu_to_le16(sqhd);
         req->rsp->sq_id = cpu_to_le16(req->sq->qid);
         req->rsp->command_id = req->cmd->common.command_id;
   
@@@ -497,6 -501,7 +510,7 @@@ bool nvmet_req_init(struct nvmet_req *r
         req->ops = ops;
         req->sg = NULL;
         req->sg_cnt = 0;
+       req->transfer_len = 0;
         req->rsp->status = 0;
   
         /* no support for fused commands yet */
@@@ -546,6 -551,15 +560,15 @@@ void nvmet_req_uninit(struct nvmet_req 
   }
   EXPORT_SYMBOL_GPL(nvmet_req_uninit);
   
+ void nvmet_req_execute(struct nvmet_req *req)
+ {
+       if (unlikely(req->data_len != req->transfer_len))
+               nvmet_req_complete(req, NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR);
+       else
+               req->execute(req);
+ }
+ EXPORT_SYMBOL_GPL(nvmet_req_execute);
+ 
   static inline bool nvmet_cc_en(u32 cc)
   {
         return (cc >> NVME_CC_EN_SHIFT) & 0x1;
diff --combined drivers/nvme/target/nvmet.h

index 87e429bfcd8a0c918f2aae018c247bb0014d3d0b,194ebffc688c3b38793d59d1afaa9bb383dad82c..417f6c0331cc9c45311ddd85b118b327cce16f77
--- 1/drivers/nvme/target/nvmet.h
--- 2/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@@ -74,7 -74,7 +74,7 @@@ struct nvmet_sq 
         struct percpu_ref       ref;
         u16                     qid;
         u16                     size;
- -      u16                     sqhd;
+ +      u32                     sqhd;
         struct completion       free_done;
         struct completion       confirm_done;
   };
@@@ -223,7 -223,10 +223,10 @@@ struct nvmet_req 
         struct bio              inline_bio;
         struct bio_vec          inline_bvec[NVMET_MAX_INLINE_BIOVEC];
         int                     sg_cnt;
+       /* data length as parsed from the command: */
         size_t                  data_len;
+       /* data length as parsed from the SGL descriptor: */
+       size_t                  transfer_len;
   
         struct nvmet_port       *port;
   
@@@ -266,6 -269,7 +269,7 @@@ u16 nvmet_parse_fabrics_cmd(struct nvme
   bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
                 struct nvmet_sq *sq, struct nvmet_fabrics_ops *ops);
   void nvmet_req_uninit(struct nvmet_req *req);
+ void nvmet_req_execute(struct nvmet_req *req);
   void nvmet_req_complete(struct nvmet_req *req, u16 status);
   
   void nvmet_cq_setup(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq, u16 qid,
@@@ -314,7 -318,7 +318,7 @@@ u16 nvmet_copy_from_sgl(struct nvmet_re
   u32 nvmet_get_log_page_len(struct nvme_command *cmd);
   
   #define NVMET_QUEUE_SIZE      1024
- #define NVMET_NR_QUEUES               64
+ #define NVMET_NR_QUEUES               128
   #define NVMET_MAX_CMD         NVMET_QUEUE_SIZE
   #define NVMET_KAS             10
   #define NVMET_DISC_KATO               120
diff --combined drivers/scsi/scsi_lib.c

index bcc1694cebcd3e184f40bba43f3a2200ea56c6e8,f907e2f8c1ddb83fdff588cec9fc0daedd5dbc72..54de24c785dd9573593845d53ba1a4bf77500de9
--- 1/drivers/scsi/scsi_lib.c
--- 2/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@@ -252,9 -252,9 +252,9 @@@ int scsi_execute(struct scsi_device *sd
         struct scsi_request *rq;
         int ret = DRIVER_ERROR << 24;
   
-       req = blk_get_request(sdev->request_queue,
+       req = blk_get_request_flags(sdev->request_queue,
                         data_direction == DMA_TO_DEVICE ?
-                       REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, __GFP_RECLAIM);
+                       REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, BLK_MQ_REQ_PREEMPT);
         if (IS_ERR(req))
                 return ret;
         rq = scsi_req(req);
@@@ -268,7 -268,7 +268,7 @@@
         rq->retries = retries;
         req->timeout = timeout;
         req->cmd_flags |= flags;
-       req->rq_flags |= rq_flags | RQF_QUIET | RQF_PREEMPT;
+       req->rq_flags |= rq_flags | RQF_QUIET;
   
         /*
          * head injection *required* here otherwise quiesce won't work
@@@ -1301,7 -1301,7 +1301,7 @@@ scsi_prep_state_check(struct scsi_devic
                         /*
                          * If the devices is blocked we defer normal commands.
                          */
-                       if (!(req->rq_flags & RQF_PREEMPT))
+                       if (req && !(req->rq_flags & RQF_PREEMPT))
                                 ret = BLKPREP_DEFER;
                         break;
                 default:
@@@ -1310,7 -1310,7 +1310,7 @@@
                          * special commands.  In particular any user initiated
                          * command is not allowed.
                          */
-                       if (!(req->rq_flags & RQF_PREEMPT))
+                       if (req && !(req->rq_flags & RQF_PREEMPT))
                                 ret = BLKPREP_KILL;
                         break;
                 }
@@@ -1379,6 -1379,8 +1379,6 @@@ static int scsi_prep_fn(struct request_
   
         ret = scsi_setup_cmnd(sdev, req);
   out:
- -      if (ret != BLKPREP_OK)
- -              cmd->flags &= ~SCMD_INITIALIZED;
         return scsi_prep_return(q, req, ret);
   }
   
@@@ -1898,6 -1900,7 +1898,6 @@@ static int scsi_mq_prep_fn(struct reque
         struct scsi_device *sdev = req->q->queuedata;
         struct Scsi_Host *shost = sdev->host;
         struct scatterlist *sg;
- -      int ret;
   
         scsi_init_command(sdev, cmd);
   
@@@ -1931,7 -1934,10 +1931,7 @@@
   
         blk_mq_start_request(req);
   
- -      ret = scsi_setup_cmnd(sdev, req);
- -      if (ret != BLK_STS_OK)
- -              cmd->flags &= ~SCMD_INITIALIZED;
- -      return ret;
+ +      return scsi_setup_cmnd(sdev, req);
   }
   
   static void scsi_mq_done(struct scsi_cmnd *cmd)
@@@ -1940,6 -1946,33 +1940,33 @@@
         blk_mq_complete_request(cmd->request);
   }
   
+ static void scsi_mq_put_budget(struct blk_mq_hw_ctx *hctx)
+ {
+       struct request_queue *q = hctx->queue;
+       struct scsi_device *sdev = q->queuedata;
+ 
+       atomic_dec(&sdev->device_busy);
+       put_device(&sdev->sdev_gendev);
+ }
+ 
+ static bool scsi_mq_get_budget(struct blk_mq_hw_ctx *hctx)
+ {
+       struct request_queue *q = hctx->queue;
+       struct scsi_device *sdev = q->queuedata;
+ 
+       if (!get_device(&sdev->sdev_gendev))
+               goto out;
+       if (!scsi_dev_queue_ready(q, sdev))
+               goto out_put_device;
+ 
+       return true;
+ 
+ out_put_device:
+       put_device(&sdev->sdev_gendev);
+ out:
+       return false;
+ }
+ 
   static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
                          const struct blk_mq_queue_data *bd)
   {
@@@ -1953,16 -1986,11 +1980,11 @@@
   
         ret = prep_to_mq(scsi_prep_state_check(sdev, req));
         if (ret != BLK_STS_OK)
-               goto out;
+               goto out_put_budget;
   
         ret = BLK_STS_RESOURCE;
-       if (!get_device(&sdev->sdev_gendev))
-               goto out;
- 
-       if (!scsi_dev_queue_ready(q, sdev))
-               goto out_put_device;
         if (!scsi_target_queue_ready(shost, sdev))
-               goto out_dec_device_busy;
+               goto out_put_budget;
         if (!scsi_host_queue_ready(q, shost, sdev))
                 goto out_dec_target_busy;
   
@@@ -1993,15 -2021,12 +2015,12 @@@
         return BLK_STS_OK;
   
   out_dec_host_busy:
-       atomic_dec(&shost->host_busy);
+        atomic_dec(&shost->host_busy);
   out_dec_target_busy:
         if (scsi_target(sdev)->can_queue > 0)
                 atomic_dec(&scsi_target(sdev)->target_busy);
- out_dec_device_busy:
-       atomic_dec(&sdev->device_busy);
- out_put_device:
-       put_device(&sdev->sdev_gendev);
- out:
+ out_put_budget:
+       scsi_mq_put_budget(hctx);
         switch (ret) {
         case BLK_STS_OK:
                 break;
@@@ -2205,6 -2230,8 +2224,8 @@@ struct request_queue *scsi_old_alloc_qu
   }
   
   static const struct blk_mq_ops scsi_mq_ops = {
+       .get_budget     = scsi_mq_get_budget,
+       .put_budget     = scsi_mq_put_budget,
         .queue_rq       = scsi_queue_rq,
         .complete       = scsi_softirq_done,
         .timeout        = scsi_timeout,
@@@ -2685,6 -2712,7 +2706,6 @@@ scsi_device_set_state(struct scsi_devic
   
         }
         sdev->sdev_state = state;
- -      sysfs_notify(&sdev->sdev_gendev.kobj, NULL, "state");
         return 0;
   
    illegal:
@@@ -2919,21 -2947,37 +2940,37 @@@ static void scsi_wait_for_queuecommand(
   int
   scsi_device_quiesce(struct scsi_device *sdev)
   {
+       struct request_queue *q = sdev->request_queue;
         int err;
   
+       /*
+        * It is allowed to call scsi_device_quiesce() multiple times from
+        * the same context but concurrent scsi_device_quiesce() calls are
+        * not allowed.
+        */
+       WARN_ON_ONCE(sdev->quiesced_by && sdev->quiesced_by != current);
+ 
+       blk_set_preempt_only(q);
+ 
+       blk_mq_freeze_queue(q);
+       /*
+        * Ensure that the effect of blk_set_preempt_only() will be visible
+        * for percpu_ref_tryget() callers that occur after the queue
+        * unfreeze even if the queue was already frozen before this function
+        * was called. See also https://lwn.net/Articles/573497/.
+        */
+       synchronize_rcu();
+       blk_mq_unfreeze_queue(q);
+ 
         mutex_lock(&sdev->state_mutex);
         err = scsi_device_set_state(sdev, SDEV_QUIESCE);
+       if (err == 0)
+               sdev->quiesced_by = current;
+       else
+               blk_clear_preempt_only(q);
         mutex_unlock(&sdev->state_mutex);
   
-       if (err)
-               return err;
- 
-       scsi_run_queue(sdev->request_queue);
-       while (atomic_read(&sdev->device_busy)) {
-               msleep_interruptible(200);
-               scsi_run_queue(sdev->request_queue);
-       }
-       return 0;
+       return err;
   }
   EXPORT_SYMBOL(scsi_device_quiesce);
   
@@@ -2953,9 -2997,11 +2990,11 @@@ void scsi_device_resume(struct scsi_dev
          * device deleted during suspend)
          */
         mutex_lock(&sdev->state_mutex);
-       if (sdev->sdev_state == SDEV_QUIESCE &&
-           scsi_device_set_state(sdev, SDEV_RUNNING) == 0)
-               scsi_run_queue(sdev->request_queue);
+       WARN_ON_ONCE(!sdev->quiesced_by);
+       sdev->quiesced_by = NULL;
+       blk_clear_preempt_only(sdev->request_queue);
+       if (sdev->sdev_state == SDEV_QUIESCE)
+               scsi_device_set_state(sdev, SDEV_RUNNING);
         mutex_unlock(&sdev->state_mutex);
   }
   EXPORT_SYMBOL(scsi_device_resume);
@@@ -3108,6 -3154,7 +3147,6 @@@ int scsi_internal_device_unblock_nowait
         case SDEV_BLOCK:
         case SDEV_TRANSPORT_OFFLINE:
                 sdev->sdev_state = new_state;
- -              sysfs_notify(&sdev->sdev_gendev.kobj, NULL, "state");
                 break;
         case SDEV_CREATED_BLOCK:
                 if (new_state == SDEV_TRANSPORT_OFFLINE ||
@@@ -3115,6 -3162,7 +3154,6 @@@
                         sdev->sdev_state = new_state;
                 else
                         sdev->sdev_state = SDEV_CREATED;
- -              sysfs_notify(&sdev->sdev_gendev.kobj, NULL, "state");
                 break;
         case SDEV_CANCEL:
         case SDEV_OFFLINE:
diff --combined drivers/scsi/sg.c

index aa28874e8fb92f5090d64c9ceb9523fce224eabe,92fd870e13156bb46fb42ac4da77000c11dc00b4..f098877eed4aa83fb4c1245e49b378a7699389d5
--- 1/drivers/scsi/sg.c
--- 2/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@@ -217,7 -217,7 +217,7 @@@ static int sg_allow_access(struct file 
         if (sfp->parentdp->device->type == TYPE_SCANNER)
                 return 0;
   
-       return blk_verify_command(cmd, filp->f_mode & FMODE_WRITE);
+       return blk_verify_command(cmd, filp->f_mode);
   }
   
   static int
@@@ -837,7 -837,7 +837,7 @@@ sg_fill_request_table(Sg_fd *sfp, sg_re
   
         val = 0;
         list_for_each_entry(srp, &sfp->rq_list, entry) {
- -              if (val > SG_MAX_QUEUE)
+ +              if (val >= SG_MAX_QUEUE)
                         break;
                 rinfo[val].req_state = srp->done + 1;
                 rinfo[val].problem =
diff --combined fs/block_dev.c

index 789f55e851aeffb6b1212403188638d12a1d2540,04973f48442243a441364b2f4cf75b1c8d50257c..4a181fcb51751dc2cbc8fda10930a47bc883380e
--- 1/fs/block_dev.c
--- 2/fs/block_dev.c
+++ b/fs/block_dev.c
@@@ -54,18 -54,6 +54,6 @@@ struct block_device *I_BDEV(struct inod
   }
   EXPORT_SYMBOL(I_BDEV);
   
- void __vfs_msg(struct super_block *sb, const char *prefix, const char *fmt, ...)
- {
-       struct va_format vaf;
-       va_list args;
- 
-       va_start(args, fmt);
-       vaf.fmt = fmt;
-       vaf.va = &args;
-       printk_ratelimited("%sVFS (%s): %pV\n", prefix, sb->s_id, &vaf);
-       va_end(args);
- }
- 
   static void bdev_write_inode(struct block_device *bdev)
   {
         struct inode *inode = bdev->bd_inode;
@@@ -249,7 -237,7 +237,7 @@@ __blkdev_direct_IO_simple(struct kiocb 
                 if (!READ_ONCE(bio.bi_private))
                         break;
                 if (!(iocb->ki_flags & IOCB_HIPRI) ||
-                   !blk_mq_poll(bdev_get_queue(bdev), qc))
+                   !blk_poll(bdev_get_queue(bdev), qc))
                         io_schedule();
         }
         __set_current_state(TASK_RUNNING);
@@@ -414,7 -402,7 +402,7 @@@ __blkdev_direct_IO(struct kiocb *iocb, 
                         break;
   
                 if (!(iocb->ki_flags & IOCB_HIPRI) ||
-                   !blk_mq_poll(bdev_get_queue(bdev), qc))
+                   !blk_poll(bdev_get_queue(bdev), qc))
                         io_schedule();
         }
         __set_current_state(TASK_RUNNING);
@@@ -674,7 -662,7 +662,7 @@@ int bdev_read_page(struct block_device 
         if (!ops->rw_page || bdev_get_integrity(bdev))
                 return result;
   
-       result = blk_queue_enter(bdev->bd_queue, false);
+       result = blk_queue_enter(bdev->bd_queue, 0);
         if (result)
                 return result;
         result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, false);
@@@ -710,18 -698,16 +698,18 @@@ int bdev_write_page(struct block_devic
   
         if (!ops->rw_page || bdev_get_integrity(bdev))
                 return -EOPNOTSUPP;
-       result = blk_queue_enter(bdev->bd_queue, false);
+       result = blk_queue_enter(bdev->bd_queue, 0);
         if (result)
                 return result;
   
         set_page_writeback(page);
         result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, true);
- -      if (result)
+ +      if (result) {
                 end_page_writeback(page);
- -      else
+ +      } else {
+ +              clean_page_buffers(page);
                 unlock_page(page);
+ +      }
         blk_queue_exit(bdev->bd_queue);
         return result;
   }
diff --combined fs/buffer.c

index 49b7e9bdcd1d34b0815142991e956f3d6fb3d6b2,bcabb69e7462e23caf2eab6cdb67c3bbb25b2410..1c18a22a6013b2c0ff3b4c0b31ac416f31973ca4
--- 1/fs/buffer.c
--- 2/fs/buffer.c
+++ b/fs/buffer.c
@@@ -252,27 -252,6 +252,6 @@@ out
         return ret;
   }
   
- /*
-  * Kick the writeback threads then try to free up some ZONE_NORMAL memory.
-  */
- static void free_more_memory(void)
- {
-       struct zoneref *z;
-       int nid;
- 
-       wakeup_flusher_threads(1024, WB_REASON_FREE_MORE_MEM);
-       yield();
- 
-       for_each_online_node(nid) {
- 
-               z = first_zones_zonelist(node_zonelist(nid, GFP_NOFS),
-                                               gfp_zone(GFP_NOFS), NULL);
-               if (z->zone)
-                       try_to_free_pages(node_zonelist(nid, GFP_NOFS), 0,
-                                               GFP_NOFS, NULL);
-       }
- }
- 
   /*
    * I/O completion handler for block_read_full_page() - pages
    * which come unlocked at the end of I/O.
@@@ -861,16 -840,19 +840,19 @@@ int remove_inode_buffers(struct inode *
    * which may not fail from ordinary buffer allocations.
    */
   struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
-               int retry)
+               bool retry)
   {
         struct buffer_head *bh, *head;
+       gfp_t gfp = GFP_NOFS;
         long offset;
   
- try_again:
+       if (retry)
+               gfp |= __GFP_NOFAIL;
+ 
         head = NULL;
         offset = PAGE_SIZE;
         while ((offset -= size) >= 0) {
-               bh = alloc_buffer_head(GFP_NOFS);
+               bh = alloc_buffer_head(gfp);
                 if (!bh)
                         goto no_grow;
   
@@@ -896,23 -878,7 +878,7 @@@ no_grow
                 } while (head);
         }
   
-       /*
-        * Return failure for non-async IO requests.  Async IO requests
-        * are not allowed to fail, so we have to wait until buffer heads
-        * become available.  But we don't want tasks sleeping with 
-        * partially complete buffers, so all were released above.
-        */
-       if (!retry)
-               return NULL;
- 
-       /* We're _really_ low on memory. Now we just
-        * wait for old buffer heads to become free due to
-        * finishing IO.  Since this is an async request and
-        * the reserve list is empty, we're sure there are 
-        * async buffer heads in use.
-        */
-       free_more_memory();
-       goto try_again;
+       return NULL;
   }
   EXPORT_SYMBOL_GPL(alloc_page_buffers);
   
@@@ -1001,8 -967,6 +967,6 @@@ grow_dev_page(struct block_device *bdev
         gfp_mask |= __GFP_NOFAIL;
   
         page = find_or_create_page(inode->i_mapping, index, gfp_mask);
-       if (!page)
-               return ret;
   
         BUG_ON(!PageLocked(page));
   
@@@ -1021,9 -985,7 +985,7 @@@
         /*
          * Allocate some buffers for this page
          */
-       bh = alloc_page_buffers(page, size, 0);
-       if (!bh)
-               goto failed;
+       bh = alloc_page_buffers(page, size, true);
   
         /*
          * Link the page to the buffers and initialise them.  Take the
@@@ -1103,8 -1065,6 +1065,6 @@@ __getblk_slow(struct block_device *bdev
                 ret = grow_buffers(bdev, block, size, gfp);
                 if (ret < 0)
                         return NULL;
-               if (ret == 0)
-                       free_more_memory();
         }
   }
   
@@@ -1575,7 -1535,7 +1535,7 @@@ void create_empty_buffers(struct page *
   {
         struct buffer_head *bh, *head, *tail;
   
-       head = alloc_page_buffers(page, blocksize, 1);
+       head = alloc_page_buffers(page, blocksize, true);
         bh = head;
         do {
                 bh->b_state |= b_state;
@@@ -1692,8 -1652,7 +1652,8 @@@ static struct buffer_head *create_page_
         BUG_ON(!PageLocked(page));
   
         if (!page_has_buffers(page))
- -              create_empty_buffers(page, 1 << ACCESS_ONCE(inode->i_blkbits), b_state);
+ +              create_empty_buffers(page, 1 << READ_ONCE(inode->i_blkbits),
+ +                                   b_state);
         return page_buffers(page);
   }
   
@@@ -1979,8 -1938,8 +1939,8 @@@ iomap_to_bh(struct inode *inode, sector
         case IOMAP_MAPPED:
                 if (offset >= i_size_read(inode))
                         set_buffer_new(bh);
- -              bh->b_blocknr = (iomap->blkno >> (inode->i_blkbits - 9)) +
- -                              ((offset - iomap->offset) >> inode->i_blkbits);
+ +              bh->b_blocknr = (iomap->addr + offset - iomap->offset) >>
+ +                              inode->i_blkbits;
                 set_buffer_mapped(bh);
                 break;
         }
@@@ -2639,7 -2598,7 +2599,7 @@@ int nobh_write_begin(struct address_spa
          * Be careful: the buffer linked list is a NULL terminated one, rather
          * than the circular one we're used to.
          */
-       head = alloc_page_buffers(page, blocksize, 0);
+       head = alloc_page_buffers(page, blocksize, false);
         if (!head) {
                 ret = -ENOMEM;
                 goto out_release;
@@@ -3056,8 -3015,16 +3016,16 @@@ void guard_bio_eod(int op, struct bio *
         sector_t maxsector;
         struct bio_vec *bvec = &bio->bi_io_vec[bio->bi_vcnt - 1];
         unsigned truncated_bytes;
+       struct hd_struct *part;
+ 
+       rcu_read_lock();
+       part = __disk_get_part(bio->bi_disk, bio->bi_partno);
+       if (part)
+               maxsector = part_nr_sects_read(part);
+       else
+               maxsector = get_capacity(bio->bi_disk);
+       rcu_read_unlock();
   
-       maxsector = get_capacity(bio->bi_disk);
         if (!maxsector)
                 return;
   
diff --combined fs/direct-io.c

index 98fe1325da9d07e52135eb728c3d0dd381d35649,d2bc339cb1e98cb8c29808758db912c0752580d4..3aafb3343a65c76fb66211228564d73ce9097485
--- 1/fs/direct-io.c
--- 2/fs/direct-io.c
+++ b/fs/direct-io.c
@@@ -44,12 -44,6 +44,12 @@@
    */
   #define DIO_PAGES     64
   
+ +/*
+ + * Flags for dio_complete()
+ + */
+ +#define DIO_COMPLETE_ASYNC            0x01    /* This is async IO */
+ +#define DIO_COMPLETE_INVALIDATE               0x02    /* Can invalidate pages */
+ +
   /*
    * This code generally works in units of "dio_blocks".  A dio_block is
    * somewhere between the hard sector size and the filesystem block size.  it
@@@ -231,7 -225,7 +231,7 @@@ static inline struct page *dio_get_page
    * filesystems can use it to hold additional state between get_block calls and
    * dio_complete.
    */
- -static ssize_t dio_complete(struct dio *dio, ssize_t ret, bool is_async)
+ +static ssize_t dio_complete(struct dio *dio, ssize_t ret, unsigned int flags)
   {
         loff_t offset = dio->iocb->ki_pos;
         ssize_t transferred = 0;
@@@ -265,27 -259,14 +265,27 @@@
         if (ret == 0)
                 ret = transferred;
   
+ +      if (dio->end_io) {
+ +              // XXX: ki_pos??
+ +              err = dio->end_io(dio->iocb, offset, ret, dio->private);
+ +              if (err)
+ +                      ret = err;
+ +      }
+ +
         /*
          * Try again to invalidate clean pages which might have been cached by
          * non-direct readahead, or faulted in by get_user_pages() if the source
          * of the write was an mmap'ed region of the file we're writing.  Either
          * one is a pretty crazy thing to do, so we don't support it 100%.  If
          * this invalidation fails, tough, the write still worked...
+ +       *
+ +       * And this page cache invalidation has to be after dio->end_io(), as
+ +       * some filesystems convert unwritten extents to real allocations in
+ +       * end_io() when necessary, otherwise a racing buffer read would cache
+ +       * zeros from unwritten extents.
          */
- -      if (ret > 0 && dio->op == REQ_OP_WRITE &&
+ +      if (flags & DIO_COMPLETE_INVALIDATE &&
+ +          ret > 0 && dio->op == REQ_OP_WRITE &&
             dio->inode->i_mapping->nrpages) {
                 err = invalidate_inode_pages2_range(dio->inode->i_mapping,
                                         offset >> PAGE_SHIFT,
@@@ -293,10 -274,18 +293,10 @@@
                 WARN_ON_ONCE(err);
         }
   
- -      if (dio->end_io) {
- -
- -              // XXX: ki_pos??
- -              err = dio->end_io(dio->iocb, offset, ret, dio->private);
- -              if (err)
- -                      ret = err;
- -      }
- -
         if (!(dio->flags & DIO_SKIP_DIO_COUNT))
                 inode_dio_end(dio->inode);
   
- -      if (is_async) {
+ +      if (flags & DIO_COMPLETE_ASYNC) {
                 /*
                  * generic_write_sync expects ki_pos to have been updated
                  * already, but the submission path only does this for
@@@ -317,7 -306,7 +317,7 @@@ static void dio_aio_complete_work(struc
   {
         struct dio *dio = container_of(work, struct dio, complete_work);
   
- -      dio_complete(dio, 0, true);
+ +      dio_complete(dio, 0, DIO_COMPLETE_ASYNC | DIO_COMPLETE_INVALIDATE);
   }
   
   static blk_status_t dio_bio_complete(struct dio *dio, struct bio *bio);
@@@ -359,7 -348,7 +359,7 @@@ static void dio_bio_end_aio(struct bio 
                         queue_work(dio->inode->i_sb->s_dio_done_wq,
                                    &dio->complete_work);
                 } else {
- -                      dio_complete(dio, 0, true);
+ +                      dio_complete(dio, 0, DIO_COMPLETE_ASYNC);
                 }
         }
   }
@@@ -497,7 -486,7 +497,7 @@@ static struct bio *dio_await_one(struc
                 dio->waiter = current;
                 spin_unlock_irqrestore(&dio->bio_lock, flags);
                 if (!(dio->iocb->ki_flags & IOCB_HIPRI) ||
-                   !blk_mq_poll(dio->bio_disk->queue, dio->bio_cookie))
+                   !blk_poll(dio->bio_disk->queue, dio->bio_cookie))
                         io_schedule();
                 /* wake up sets us TASK_RUNNING */
                 spin_lock_irqsave(&dio->bio_lock, flags);
@@@ -877,8 -866,7 +877,8 @@@ out
          */
         if (sdio->boundary) {
                 ret = dio_send_cur_page(dio, sdio, map_bh);
- -              dio_bio_submit(dio, sdio);
+ +              if (sdio->bio)
+ +                      dio_bio_submit(dio, sdio);
                 put_page(sdio->cur_page);
                 sdio->cur_page = NULL;
         }
@@@ -1152,7 -1140,7 +1152,7 @@@ do_blockdev_direct_IO(struct kiocb *ioc
                       get_block_t get_block, dio_iodone_t end_io,
                       dio_submit_t submit_io, int flags)
   {
- -      unsigned i_blkbits = ACCESS_ONCE(inode->i_blkbits);
+ +      unsigned i_blkbits = READ_ONCE(inode->i_blkbits);
         unsigned blkbits = i_blkbits;
         unsigned blocksize_mask = (1 << blkbits) - 1;
         ssize_t retval = -EINVAL;
@@@ -1371,7 -1359,7 +1371,7 @@@
                 dio_await_completion(dio);
   
         if (drop_refcount(dio) == 0) {
- -              retval = dio_complete(dio, retval, false);
+ +              retval = dio_complete(dio, retval, DIO_COMPLETE_INVALIDATE);
         } else
                 BUG_ON(retval != -EIOCBQUEUED);
   
diff --combined fs/iomap.c

index 5011a964a5501ece5a42185d666ae9c6a69d9624,4241bac905b19bea8b66c17cd684ab28906743a7..b9f74803e56c08ce9655c96c5c32260e0e925efd
--- 1/fs/iomap.c
--- 2/fs/iomap.c
+++ b/fs/iomap.c
@@@ -350,8 -350,8 +350,8 @@@ static int iomap_zero(struct inode *ino
   static int iomap_dax_zero(loff_t pos, unsigned offset, unsigned bytes,
                 struct iomap *iomap)
   {
- -      sector_t sector = iomap->blkno +
- -              (((pos & ~(PAGE_SIZE - 1)) - iomap->offset) >> 9);
+ +      sector_t sector = (iomap->addr +
+ +                         (pos & PAGE_MASK) - iomap->offset) >> 9;
   
         return __dax_zero_page_range(iomap->bdev, iomap->dax_dev, sector,
                         offset, bytes);
@@@ -510,12 -510,11 +510,12 @@@ static int iomap_to_fiemap(struct fiema
                 flags |= FIEMAP_EXTENT_MERGED;
         if (iomap->flags & IOMAP_F_SHARED)
                 flags |= FIEMAP_EXTENT_SHARED;
+ +      if (iomap->flags & IOMAP_F_DATA_INLINE)
+ +              flags |= FIEMAP_EXTENT_DATA_INLINE;
   
         return fiemap_fill_next_extent(fi, iomap->offset,
- -                      iomap->blkno != IOMAP_NULL_BLOCK ? iomap->blkno << 9: 0,
+ +                      iomap->addr != IOMAP_NULL_ADDR ? iomap->addr : 0,
                         iomap->length, flags);
- -
   }
   
   static loff_t
@@@ -715,9 -714,23 +715,9 @@@ static ssize_t iomap_dio_complete(struc
   {
         struct kiocb *iocb = dio->iocb;
         struct inode *inode = file_inode(iocb->ki_filp);
+ +      loff_t offset = iocb->ki_pos;
         ssize_t ret;
   
- -      /*
- -       * Try again to invalidate clean pages which might have been cached by
- -       * non-direct readahead, or faulted in by get_user_pages() if the source
- -       * of the write was an mmap'ed region of the file we're writing.  Either
- -       * one is a pretty crazy thing to do, so we don't support it 100%.  If
- -       * this invalidation fails, tough, the write still worked...
- -       */
- -      if (!dio->error &&
- -          (dio->flags & IOMAP_DIO_WRITE) && inode->i_mapping->nrpages) {
- -              ret = invalidate_inode_pages2_range(inode->i_mapping,
- -                              iocb->ki_pos >> PAGE_SHIFT,
- -                              (iocb->ki_pos + dio->size - 1) >> PAGE_SHIFT);
- -              WARN_ON_ONCE(ret);
- -      }
- -
         if (dio->end_io) {
                 ret = dio->end_io(iocb,
                                 dio->error ? dio->error : dio->size,
@@@ -729,33 -742,12 +729,33 @@@
         if (likely(!ret)) {
                 ret = dio->size;
                 /* check for short read */
- -              if (iocb->ki_pos + ret > dio->i_size &&
+ +              if (offset + ret > dio->i_size &&
                     !(dio->flags & IOMAP_DIO_WRITE))
- -                      ret = dio->i_size - iocb->ki_pos;
+ +                      ret = dio->i_size - offset;
                 iocb->ki_pos += ret;
         }
   
+ +      /*
+ +       * Try again to invalidate clean pages which might have been cached by
+ +       * non-direct readahead, or faulted in by get_user_pages() if the source
+ +       * of the write was an mmap'ed region of the file we're writing.  Either
+ +       * one is a pretty crazy thing to do, so we don't support it 100%.  If
+ +       * this invalidation fails, tough, the write still worked...
+ +       *
+ +       * And this page cache invalidation has to be after dio->end_io(), as
+ +       * some filesystems convert unwritten extents to real allocations in
+ +       * end_io() when necessary, otherwise a racing buffer read would cache
+ +       * zeros from unwritten extents.
+ +       */
+ +      if (!dio->error &&
+ +          (dio->flags & IOMAP_DIO_WRITE) && inode->i_mapping->nrpages) {
+ +              int err;
+ +              err = invalidate_inode_pages2_range(inode->i_mapping,
+ +                              offset >> PAGE_SHIFT,
+ +                              (offset + dio->size - 1) >> PAGE_SHIFT);
+ +              WARN_ON_ONCE(err);
+ +      }
+ +
         inode_dio_end(file_inode(iocb->ki_filp));
         kfree(dio);
   
@@@ -831,7 -823,7 +831,7 @@@ iomap_dio_zero(struct iomap_dio *dio, s
         bio = bio_alloc(GFP_KERNEL, 1);
         bio_set_dev(bio, iomap->bdev);
         bio->bi_iter.bi_sector =
- -              iomap->blkno + ((pos - iomap->offset) >> 9);
+ +              (iomap->addr + pos - iomap->offset) >> 9;
         bio->bi_private = dio;
         bio->bi_end_io = iomap_dio_bio_end_io;
   
@@@ -910,7 -902,7 +910,7 @@@ iomap_dio_actor(struct inode *inode, lo
                 bio = bio_alloc(GFP_KERNEL, nr_pages);
                 bio_set_dev(bio, iomap->bdev);
                 bio->bi_iter.bi_sector =
- -                      iomap->blkno + ((pos - iomap->offset) >> 9);
+ +                      (iomap->addr + pos - iomap->offset) >> 9;
                 bio->bi_write_hint = dio->iocb->ki_hint;
                 bio->bi_private = dio;
                 bio->bi_end_io = iomap_dio_bio_end_io;
@@@ -1017,13 -1009,6 +1017,13 @@@ iomap_dio_rw(struct kiocb *iocb, struc
         WARN_ON_ONCE(ret);
         ret = 0;
   
+ +      if (iov_iter_rw(iter) == WRITE && !is_sync_kiocb(iocb) &&
+ +          !inode->i_sb->s_dio_done_wq) {
+ +              ret = sb_init_dio_done_wq(inode->i_sb);
+ +              if (ret < 0)
+ +                      goto out_free_dio;
+ +      }
+ +
         inode_dio_begin(inode);
   
         blk_start_plug(&plug);
@@@ -1046,6 -1031,13 +1046,6 @@@
         if (ret < 0)
                 iomap_dio_set_error(dio, ret);
   
- -      if (ret >= 0 && iov_iter_rw(iter) == WRITE && !is_sync_kiocb(iocb) &&
- -                      !inode->i_sb->s_dio_done_wq) {
- -              ret = sb_init_dio_done_wq(inode->i_sb);
- -              if (ret < 0)
- -                      iomap_dio_set_error(dio, ret);
- -      }
- -
         if (!atomic_dec_and_test(&dio->ref)) {
                 if (!is_sync_kiocb(iocb))
                         return -EIOCBQUEUED;
@@@ -1057,7 -1049,7 +1057,7 @@@
   
                         if (!(iocb->ki_flags & IOCB_HIPRI) ||
                             !dio->submit.last_queue ||
-                           !blk_mq_poll(dio->submit.last_queue,
+                           !blk_poll(dio->submit.last_queue,
                                          dio->submit.cookie))
                                 io_schedule();
                 }
diff --combined fs/sync.c

index 83ac79a960dd1aea9aa79932bbb08de662e7abab,09f96a18dd930b48a8cc7ec9760c52f97c2f1c6a..6e0a2cbaf6dedb495b91e16f16ab1d44eee3ea46
--- 1/fs/sync.c
--- 2/fs/sync.c
+++ b/fs/sync.c
@@@ -1,4 -1,3 +1,4 @@@
+ +// SPDX-License-Identifier: GPL-2.0
   /*
    * High-level sync()-related operations
    */
@@@ -109,7 -108,7 +109,7 @@@ SYSCALL_DEFINE0(sync
   {
         int nowait = 0, wait = 1;
   
-       wakeup_flusher_threads(0, WB_REASON_SYNC);
+       wakeup_flusher_threads(WB_REASON_SYNC);
         iterate_supers(sync_inodes_one_sb, NULL);
         iterate_supers(sync_fs_one_sb, &nowait);
         iterate_supers(sync_fs_one_sb, &wait);
diff --combined include/linux/backing-dev-defs.h

index fff4cfa0c21df2117f99d93f7d6c7130d02c9d87,b7c7be6f5986ac7a3adba7f23c21a92ecd34a270..bfe86b54f6c149a6c8718f13417ecf7a796b7193
--- 1/include/linux/backing-dev-defs.h
--- 2/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@@ -1,4 -1,3 +1,4 @@@
+ +/* SPDX-License-Identifier: GPL-2.0 */
   #ifndef __LINUX_BACKING_DEV_DEFS_H
   #define __LINUX_BACKING_DEV_DEFS_H
   
@@@ -25,6 -24,7 +25,7 @@@ enum wb_state 
         WB_shutting_down,       /* wb_shutdown() in progress */
         WB_writeback_running,   /* Writeback is in progress */
         WB_has_dirty_io,        /* Dirty inodes on ->b_{dirty|io|more_io} */
+       WB_start_all,           /* nr_pages == 0 (all) work pending */
   };
   
   enum wb_congested_state {
@@@ -44,6 -44,28 +45,28 @@@ enum wb_stat_item 
   
   #define WB_STAT_BATCH (8*(1+ilog2(nr_cpu_ids)))
   
+ /*
+  * why some writeback work was initiated
+  */
+ enum wb_reason {
+       WB_REASON_BACKGROUND,
+       WB_REASON_VMSCAN,
+       WB_REASON_SYNC,
+       WB_REASON_PERIODIC,
+       WB_REASON_LAPTOP_TIMER,
+       WB_REASON_FREE_MORE_MEM,
+       WB_REASON_FS_FREE_SPACE,
+       /*
+        * There is no bdi forker thread any more and works are done
+        * by emergency worker, however, this is TPs userland visible
+        * and we'll be exposing exactly the same information,
+        * so it has a mismatch name.
+        */
+       WB_REASON_FORKER_THREAD,
+ 
+       WB_REASON_MAX,
+ };
+ 
   /*
    * For cgroup writeback, multiple wb's may map to the same blkcg.  Those
    * wb's can operate mostly independently but should share the congested
@@@ -116,6 -138,7 +139,7 @@@ struct bdi_writeback 
   
         struct fprop_local_percpu completions;
         int dirty_exceeded;
+       enum wb_reason start_all_reason;
   
         spinlock_t work_lock;           /* protects work_list & dwork scheduling */
         struct list_head work_list;
diff --combined include/linux/backing-dev.h

index 16621579a3db313bf4a5f315a732ae6121e8b8c2,872afa41abc2d78b4d01b279ae0d92bf887adfaa..f41ca8486e0272c71a59f68d33394e72ac0407e3
--- 1/include/linux/backing-dev.h
--- 2/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@@ -1,4 -1,3 +1,4 @@@
+ +/* SPDX-License-Identifier: GPL-2.0 */
   /*
    * include/linux/backing-dev.h
    *
@@@ -39,8 -38,6 +39,6 @@@ static inline struct backing_dev_info *
         return bdi_alloc_node(gfp_mask, NUMA_NO_NODE);
   }
   
- void wb_start_writeback(struct bdi_writeback *wb, long nr_pages,
-                       bool range_cyclic, enum wb_reason reason);
   void wb_start_background_writeback(struct bdi_writeback *wb);
   void wb_workfn(struct work_struct *work);
   void wb_wakeup_delayed(struct bdi_writeback *wb);
@@@ -175,8 -172,6 +173,6 @@@ static inline int wb_congested(struct b
   
   long congestion_wait(int sync, long timeout);
   long wait_iff_congested(struct pglist_data *pgdat, int sync, long timeout);
- int pdflush_proc_obsolete(struct ctl_table *table, int write,
-               void __user *buffer, size_t *lenp, loff_t *ppos);
   
   static inline bool bdi_cap_stable_pages_required(struct backing_dev_info *bdi)
   {
diff --combined include/linux/blk-cgroup.h

index 8bbc3716507ac254c38aebeda44fa69a300aa47d,f57e54d645297f3421a260f79a956b3866c83756..e9825ff57b155d75153a7217b2354fcbfda914a6
--- 1/include/linux/blk-cgroup.h
--- 2/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@@ -1,4 -1,3 +1,4 @@@
+ +/* SPDX-License-Identifier: GPL-2.0 */
   #ifndef _BLK_CGROUP_H
   #define _BLK_CGROUP_H
   /*
@@@ -20,6 -19,7 +20,7 @@@
   #include <linux/radix-tree.h>
   #include <linux/blkdev.h>
   #include <linux/atomic.h>
+ #include <linux/kthread.h>
   
   /* percpu_counter batch for blkg_[rw]stats, per-cpu drift doesn't matter */
   #define BLKG_STAT_CPU_BATCH   (INT_MAX / 2)
@@@ -224,22 -224,16 +225,16 @@@ static inline struct blkcg *css_to_blkc
         return css ? container_of(css, struct blkcg, css) : NULL;
   }
   
- static inline struct blkcg *task_blkcg(struct task_struct *tsk)
- {
-       return css_to_blkcg(task_css(tsk, io_cgrp_id));
- }
- 
   static inline struct blkcg *bio_blkcg(struct bio *bio)
   {
+       struct cgroup_subsys_state *css;
+ 
         if (bio && bio->bi_css)
                 return css_to_blkcg(bio->bi_css);
-       return task_blkcg(current);
- }
- 
- static inline struct cgroup_subsys_state *
- task_get_blkcg_css(struct task_struct *task)
- {
-       return task_get_css(task, io_cgrp_id);
+       css = kthread_blkcg();
+       if (css)
+               return css_to_blkcg(css);
+       return css_to_blkcg(task_css(current, io_cgrp_id));
   }
   
   /**
@@@ -736,12 -730,6 +731,6 @@@ struct blkcg_policy 
   
   #define blkcg_root_css        ((struct cgroup_subsys_state *)ERR_PTR(-EINVAL))
   
- static inline struct cgroup_subsys_state *
- task_get_blkcg_css(struct task_struct *task)
- {
-       return NULL;
- }
- 
   #ifdef CONFIG_BLOCK
   
   static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; }
diff --combined include/linux/blk-mq.h

index 994cbb0f7ffca5a38771c4dd24eb46eccda6abde,eb1e2cdffb317a0ae7383c000e51b6b721de1417..95c9a5c862e2545b26922b3cbb2103200a29a888
--- 1/include/linux/blk-mq.h
--- 2/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@@ -1,4 -1,3 +1,4 @@@
+ +/* SPDX-License-Identifier: GPL-2.0 */
   #ifndef BLK_MQ_H
   #define BLK_MQ_H
   
@@@ -31,10 -30,12 +31,12 @@@ struct blk_mq_hw_ctx 
   
         struct sbitmap          ctx_map;
   
+       struct blk_mq_ctx       *dispatch_from;
+ 
         struct blk_mq_ctx       **ctxs;
         unsigned int            nr_ctx;
   
-       wait_queue_entry_t              dispatch_wait;
+       wait_queue_entry_t      dispatch_wait;
         atomic_t                wait_index;
   
         struct blk_mq_tags      *tags;
@@@ -91,6 -92,8 +93,8 @@@ struct blk_mq_queue_data 
   
   typedef blk_status_t (queue_rq_fn)(struct blk_mq_hw_ctx *,
                 const struct blk_mq_queue_data *);
+ typedef bool (get_budget_fn)(struct blk_mq_hw_ctx *);
+ typedef void (put_budget_fn)(struct blk_mq_hw_ctx *);
   typedef enum blk_eh_timer_return (timeout_fn)(struct request *, bool);
   typedef int (init_hctx_fn)(struct blk_mq_hw_ctx *, void *, unsigned int);
   typedef void (exit_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int);
@@@ -112,6 -115,15 +116,15 @@@ struct blk_mq_ops 
          */
         queue_rq_fn             *queue_rq;
   
+       /*
+        * Reserve budget before queue request, once .queue_rq is
+        * run, it is driver's responsibility to release the
+        * reserved budget. Also we have to handle failure case
+        * of .get_budget for avoiding I/O deadlock.
+        */
+       get_budget_fn           *get_budget;
+       put_budget_fn           *put_budget;
+ 
         /*
          * Called on request timeout
          */
@@@ -169,8 -181,7 +182,7 @@@ enum 
         BLK_MQ_S_STOPPED        = 0,
         BLK_MQ_S_TAG_ACTIVE     = 1,
         BLK_MQ_S_SCHED_RESTART  = 2,
-       BLK_MQ_S_TAG_WAITING    = 3,
-       BLK_MQ_S_START_ON_RUN   = 4,
+       BLK_MQ_S_START_ON_RUN   = 3,
   
         BLK_MQ_MAX_DEPTH        = 10240,
   
@@@ -198,15 -209,21 +210,21 @@@ void blk_mq_free_request(struct reques
   bool blk_mq_can_queue(struct blk_mq_hw_ctx *);
   
   enum {
-       BLK_MQ_REQ_NOWAIT       = (1 << 0), /* return when out of requests */
-       BLK_MQ_REQ_RESERVED     = (1 << 1), /* allocate from reserved pool */
-       BLK_MQ_REQ_INTERNAL     = (1 << 2), /* allocate internal/sched tag */
+       /* return when out of requests */
+       BLK_MQ_REQ_NOWAIT       = (__force blk_mq_req_flags_t)(1 << 0),
+       /* allocate from reserved pool */
+       BLK_MQ_REQ_RESERVED     = (__force blk_mq_req_flags_t)(1 << 1),
+       /* allocate internal/sched tag */
+       BLK_MQ_REQ_INTERNAL     = (__force blk_mq_req_flags_t)(1 << 2),
+       /* set RQF_PREEMPT */
+       BLK_MQ_REQ_PREEMPT      = (__force blk_mq_req_flags_t)(1 << 3),
   };
   
   struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op,
-               unsigned int flags);
+               blk_mq_req_flags_t flags);
   struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
-               unsigned int op, unsigned int flags, unsigned int hctx_idx);
+               unsigned int op, blk_mq_req_flags_t flags,
+               unsigned int hctx_idx);
   struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag);
   
   enum {
@@@ -249,7 -266,7 +267,7 @@@ void blk_mq_start_stopped_hw_queues(str
   void blk_mq_quiesce_queue(struct request_queue *q);
   void blk_mq_unquiesce_queue(struct request_queue *q);
   void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs);
- void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
+ bool blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
   void blk_mq_run_hw_queues(struct request_queue *q, bool async);
   void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs);
   void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset,
@@@ -260,8 -277,8 +278,8 @@@ void blk_freeze_queue_start(struct requ
   void blk_mq_freeze_queue_wait(struct request_queue *q);
   int blk_mq_freeze_queue_wait_timeout(struct request_queue *q,
                                      unsigned long timeout);
- int blk_mq_reinit_tagset(struct blk_mq_tag_set *set,
-                        int (reinit_request)(void *, struct request *));
+ int blk_mq_tagset_iter(struct blk_mq_tag_set *set, void *data,
+               int (reinit_request)(void *, struct request *));
   
   int blk_mq_map_queues(struct blk_mq_tag_set *set);
   void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues);
diff --combined include/linux/blk_types.h

index 96ac3815542c1484b6c9300dafdef53bacdf4903,13ccfc9b210ac78a2c99cecce68e1dce4eaf82fc..a1e628e032dad75bf1837a25e45b55a7f54ca2df
--- 1/include/linux/blk_types.h
--- 2/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@@ -1,4 -1,3 +1,4 @@@
+ +/* SPDX-License-Identifier: GPL-2.0 */
   /*
    * Block data types and constants.  Directly include this file only to
    * break include dependency loop.
@@@ -163,6 -162,8 +163,8 @@@ struct bio 
    */
   #define BIO_RESET_BITS        BVEC_POOL_OFFSET
   
+ typedef __u32 __bitwise blk_mq_req_flags_t;
+ 
   /*
    * Operations and flags common to the bio and request structures.
    * We use 8 bits for encoding the operation, and the remaining 24 for flags.
@@@ -225,11 -226,14 +227,14 @@@ enum req_flag_bits 
         __REQ_PREFLUSH,         /* request for cache flush */
         __REQ_RAHEAD,           /* read ahead, can fail anytime */
         __REQ_BACKGROUND,       /* background IO */
+       __REQ_NOWAIT,           /* Don't wait if request will block */
   
         /* command specific flags for REQ_OP_WRITE_ZEROES: */
         __REQ_NOUNMAP,          /* do not free blocks when zeroing */
   
-       __REQ_NOWAIT,           /* Don't wait if request will block */
+       /* for driver use */
+       __REQ_DRV,
+ 
         __REQ_NR_BITS,          /* stops here */
   };
   
@@@ -246,9 -250,11 +251,11 @@@
   #define REQ_PREFLUSH          (1ULL << __REQ_PREFLUSH)
   #define REQ_RAHEAD            (1ULL << __REQ_RAHEAD)
   #define REQ_BACKGROUND                (1ULL << __REQ_BACKGROUND)
+ #define REQ_NOWAIT            (1ULL << __REQ_NOWAIT)
   
   #define REQ_NOUNMAP           (1ULL << __REQ_NOUNMAP)
- #define REQ_NOWAIT            (1ULL << __REQ_NOWAIT)
+ 
+ #define REQ_DRV                       (1ULL << __REQ_DRV)
   
   #define REQ_FAILFAST_MASK \
         (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER)
@@@ -330,11 -336,10 +337,10 @@@ static inline bool blk_qc_t_is_internal
   }
   
   struct blk_rq_stat {
-       s64 mean;
+       u64 mean;
         u64 min;
         u64 max;
-       s32 nr_samples;
-       s32 nr_batch;
+       u32 nr_samples;
         u64 batch;
   };
   
diff --combined include/linux/blkdev.h

index 8da66379f7ea7afceb9af2f32f998d7ca71c25a7,e80ea1d31343da36b808dfe9f547ad8466d8d091..8089ca17db9ac65998ec9cf82f65743bb5c5abb9
--- 1/include/linux/blkdev.h
--- 2/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@@ -1,4 -1,3 +1,4 @@@
+ +/* SPDX-License-Identifier: GPL-2.0 */
   #ifndef _LINUX_BLKDEV_H
   #define _LINUX_BLKDEV_H
   
@@@ -267,6 -266,7 +267,7 @@@ struct blk_queue_ctx
   
   typedef void (request_fn_proc) (struct request_queue *q);
   typedef blk_qc_t (make_request_fn) (struct request_queue *q, struct bio *bio);
+ typedef bool (poll_q_fn) (struct request_queue *q, blk_qc_t);
   typedef int (prep_rq_fn) (struct request_queue *, struct request *);
   typedef void (unprep_rq_fn) (struct request_queue *, struct request *);
   
@@@ -409,6 -409,7 +410,7 @@@ struct request_queue 
   
         request_fn_proc         *request_fn;
         make_request_fn         *make_request_fn;
+       poll_q_fn               *poll_fn;
         prep_rq_fn              *prep_rq_fn;
         unprep_rq_fn            *unprep_rq_fn;
         softirq_done_fn         *softirq_done_fn;
@@@ -610,7 -611,6 +612,6 @@@
   #define QUEUE_FLAG_NOMERGES     5     /* disable merge attempts */
   #define QUEUE_FLAG_SAME_COMP  6       /* complete on same CPU-group */
   #define QUEUE_FLAG_FAIL_IO    7       /* fake timeout */
- #define QUEUE_FLAG_STACKABLE  8       /* supports request stacking */
   #define QUEUE_FLAG_NONROT     9       /* non-rotational device (SSD) */
   #define QUEUE_FLAG_VIRT        QUEUE_FLAG_NONROT /* paravirt device */
   #define QUEUE_FLAG_IO_STAT     10     /* do IO stats */
@@@ -632,14 -632,13 +633,13 @@@
   #define QUEUE_FLAG_REGISTERED  26     /* queue has been registered to a disk */
   #define QUEUE_FLAG_SCSI_PASSTHROUGH 27        /* queue supports SCSI commands */
   #define QUEUE_FLAG_QUIESCED    28     /* queue has been quiesced */
+ #define QUEUE_FLAG_PREEMPT_ONLY       29      /* only process REQ_PREEMPT requests */
   
   #define QUEUE_FLAG_DEFAULT    ((1 << QUEUE_FLAG_IO_STAT) |            \
-                                (1 << QUEUE_FLAG_STACKABLE)    |       \
                                  (1 << QUEUE_FLAG_SAME_COMP)    |       \
                                  (1 << QUEUE_FLAG_ADD_RANDOM))
   
   #define QUEUE_FLAG_MQ_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) |            \
-                                (1 << QUEUE_FLAG_STACKABLE)    |       \
                                  (1 << QUEUE_FLAG_SAME_COMP)    |       \
                                  (1 << QUEUE_FLAG_POLL))
   
@@@ -723,8 -722,6 +723,6 @@@ static inline void queue_flag_clear(uns
   #define blk_queue_nonrot(q)   test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags)
   #define blk_queue_io_stat(q)  test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags)
   #define blk_queue_add_random(q)       test_bit(QUEUE_FLAG_ADD_RANDOM, &(q)->queue_flags)
- #define blk_queue_stackable(q)        \
-       test_bit(QUEUE_FLAG_STACKABLE, &(q)->queue_flags)
   #define blk_queue_discard(q)  test_bit(QUEUE_FLAG_DISCARD, &(q)->queue_flags)
   #define blk_queue_secure_erase(q) \
         (test_bit(QUEUE_FLAG_SECERASE, &(q)->queue_flags))
@@@ -736,6 -733,11 +734,11 @@@
         ((rq)->cmd_flags & (REQ_FAILFAST_DEV|REQ_FAILFAST_TRANSPORT| \
                              REQ_FAILFAST_DRIVER))
   #define blk_queue_quiesced(q) test_bit(QUEUE_FLAG_QUIESCED, &(q)->queue_flags)
+ #define blk_queue_preempt_only(q)                             \
+       test_bit(QUEUE_FLAG_PREEMPT_ONLY, &(q)->queue_flags)
+ 
+ extern int blk_set_preempt_only(struct request_queue *q);
+ extern void blk_clear_preempt_only(struct request_queue *q);
   
   static inline bool blk_account_rq(struct request *rq)
   {
@@@ -923,24 -925,17 +926,17 @@@ static inline void rq_flush_dcache_page
   }
   #endif
   
- #ifdef CONFIG_PRINTK
- #define vfs_msg(sb, level, fmt, ...)                          \
-       __vfs_msg(sb, level, fmt, ##__VA_ARGS__)
- #else
- #define vfs_msg(sb, level, fmt, ...)                          \
- do {                                                          \
-       no_printk(fmt, ##__VA_ARGS__);                          \
-       __vfs_msg(sb, "", " ");                                 \
- } while (0)
- #endif
- 
   extern int blk_register_queue(struct gendisk *disk);
   extern void blk_unregister_queue(struct gendisk *disk);
   extern blk_qc_t generic_make_request(struct bio *bio);
+ extern blk_qc_t direct_make_request(struct bio *bio);
   extern void blk_rq_init(struct request_queue *q, struct request *rq);
   extern void blk_init_request_from_bio(struct request *req, struct bio *bio);
   extern void blk_put_request(struct request *);
   extern void __blk_put_request(struct request_queue *, struct request *);
+ extern struct request *blk_get_request_flags(struct request_queue *,
+                                            unsigned int op,
+                                            blk_mq_req_flags_t flags);
   extern struct request *blk_get_request(struct request_queue *, unsigned int op,
                                        gfp_t gfp_mask);
   extern void blk_requeue_request(struct request_queue *, struct request *);
@@@ -964,7 -959,7 +960,7 @@@ extern int scsi_cmd_ioctl(struct reques
   extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t,
                          struct scsi_ioctl_command __user *);
   
- extern int blk_queue_enter(struct request_queue *q, bool nowait);
+ extern int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags);
   extern void blk_queue_exit(struct request_queue *q);
   extern void blk_start_queue(struct request_queue *q);
   extern void blk_start_queue_async(struct request_queue *q);
@@@ -991,7 -986,7 +987,7 @@@ extern void blk_execute_rq_nowait(struc
   int blk_status_to_errno(blk_status_t status);
   blk_status_t errno_to_blk_status(int errno);
   
- bool blk_mq_poll(struct request_queue *q, blk_qc_t cookie);
+ bool blk_poll(struct request_queue *q, blk_qc_t cookie);
   
   static inline struct request_queue *bdev_get_queue(struct block_device *bdev)
   {
@@@ -1110,6 -1105,8 +1106,8 @@@ extern struct request *blk_peek_request
   extern void blk_start_request(struct request *rq);
   extern struct request *blk_fetch_request(struct request_queue *q);
   
+ void blk_steal_bios(struct bio_list *list, struct request *rq);
+ 
   /*
    * Request completion related functions.
    *
@@@ -1372,7 -1369,7 +1370,7 @@@ static inline int sb_issue_zeroout(stru
                                     gfp_mask, 0);
   }
   
- extern int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm);
+ extern int blk_verify_command(unsigned char *cmd, fmode_t mode);
   
   enum blk_default_limits {
         BLK_MAX_SEGMENTS        = 128,
diff --combined include/linux/buffer_head.h

index afa37f807f12c1065a44ba35ffe6c88423756fd2,ae2d25f01b98e66b4c35e29a8c5a1365f6c807ec..8b1bf8d3d4a202944969a1ae70b7659cd94484e1
--- 1/include/linux/buffer_head.h
--- 2/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@@ -1,4 -1,3 +1,4 @@@
+ +/* SPDX-License-Identifier: GPL-2.0 */
   /*
    * include/linux/buffer_head.h
    *
@@@ -157,7 -156,7 +157,7 @@@ void set_bh_page(struct buffer_head *bh
                 struct page *page, unsigned long offset);
   int try_to_free_buffers(struct page *);
   struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
-               int retry);
+               bool retry);
   void create_empty_buffers(struct page *, unsigned long,
                         unsigned long b_state);
   void end_buffer_read_sync(struct buffer_head *bh, int uptodate);
@@@ -233,7 -232,6 +233,7 @@@ int generic_write_end(struct file *, st
                                 loff_t, unsigned, unsigned,
                                 struct page *, void *);
   void page_zero_new_buffers(struct page *page, unsigned from, unsigned to);
+ +void clean_page_buffers(struct page *page);
   int cont_write_begin(struct file *, struct address_space *, loff_t,
                         unsigned, unsigned, struct page **, void **,
                         get_block_t *, loff_t *);
diff --combined include/linux/elevator.h

index ddb7632d73b9532df238f6d727bafd19ec39ec60,6df8b14f1f6a04ea984ce3500ab9e0a200187006..3d794b3dc53236a9bff9b0f2ae69f085209ee385
--- 1/include/linux/elevator.h
--- 2/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@@ -1,4 -1,3 +1,4 @@@
+ +/* SPDX-License-Identifier: GPL-2.0 */
   #ifndef _LINUX_ELEVATOR_H
   #define _LINUX_ELEVATOR_H
   
@@@ -145,6 -144,7 +145,7 @@@ struct elevator_typ
         size_t icq_align;       /* ditto */
         struct elv_fs_entry *elevator_attrs;
         char elevator_name[ELV_NAME_MAX];
+       const char *elevator_alias;
         struct module *elevator_owner;
         bool uses_mq;
   #ifdef CONFIG_BLK_DEBUG_FS
diff --combined include/linux/genhd.h

index eaefb7a62f83707a9493f664c232f3514d2db880,ca10cc292187dcdb59f4974dda214fe21a0494da..5144ebe046c97aefed613bbdcea8dceeb2442ed3
--- 1/include/linux/genhd.h
--- 2/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@@ -1,4 -1,3 +1,4 @@@
+ +/* SPDX-License-Identifier: GPL-2.0 */
   #ifndef _LINUX_GENHD_H
   #define _LINUX_GENHD_H
   
@@@ -141,6 -140,7 +141,7 @@@ struct hd_struct 
   #define GENHD_FL_NATIVE_CAPACITY              128
   #define GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE   256
   #define GENHD_FL_NO_PART_SCAN                 512
+ #define GENHD_FL_HIDDEN                               1024
   
   enum {
         DISK_EVENT_MEDIA_CHANGE                 = 1 << 0, /* media changed */
@@@ -207,7 -207,6 +208,7 @@@ struct gendisk 
   #endif        /* CONFIG_BLK_DEV_INTEGRITY */
         int node_id;
         struct badblocks *bb;
+ +      struct lockdep_map lockdep_map;
   };
   
   static inline struct gendisk *part_to_disk(struct hd_struct *part)
@@@ -236,7 -235,7 +237,7 @@@ static inline bool disk_part_scan_enabl
   
   static inline dev_t disk_devt(struct gendisk *disk)
   {
-       return disk_to_dev(disk)->devt;
+       return MKDEV(disk->major, disk->first_minor);
   }
   
   static inline dev_t part_devt(struct hd_struct *part)
@@@ -244,6 -243,7 +245,7 @@@
         return part_to_dev(part)->devt;
   }
   
+ extern struct hd_struct *__disk_get_part(struct gendisk *disk, int partno);
   extern struct hd_struct *disk_get_part(struct gendisk *disk, int partno);
   
   static inline void disk_put_part(struct hd_struct *part)
@@@ -592,7 -592,8 +594,7 @@@ extern void __delete_partition(struct p
   extern void delete_partition(struct gendisk *, int);
   extern void printk_all_partitions(void);
   
- -extern struct gendisk *alloc_disk_node(int minors, int node_id);
- -extern struct gendisk *alloc_disk(int minors);
+ +extern struct gendisk *__alloc_disk_node(int minors, int node_id);
   extern struct kobject *get_disk(struct gendisk *disk);
   extern void put_disk(struct gendisk *disk);
   extern void blk_register_region(dev_t devt, unsigned long range,
@@@ -616,24 -617,6 +618,24 @@@ extern ssize_t part_fail_store(struct d
                                const char *buf, size_t count);
   #endif /* CONFIG_FAIL_MAKE_REQUEST */
   
+ +#define alloc_disk_node(minors, node_id)                              \
+ +({                                                                    \
+ +      static struct lock_class_key __key;                             \
+ +      const char *__name;                                             \
+ +      struct gendisk *__disk;                                         \
+ +                                                                      \
+ +      __name = "(gendisk_completion)"#minors"("#node_id")";           \
+ +                                                                      \
+ +      __disk = __alloc_disk_node(minors, node_id);                    \
+ +                                                                      \
+ +      if (__disk)                                                     \
+ +              lockdep_init_map(&__disk->lockdep_map, __name, &__key, 0); \
+ +                                                                      \
+ +      __disk;                                                         \
+ +})
+ +
+ +#define alloc_disk(minors) alloc_disk_node(minors, NUMA_NO_NODE)
+ +
   static inline int hd_ref_init(struct hd_struct *part)
   {
         if (percpu_ref_init(&part->ref, __delete_partition, 0,
diff --combined include/linux/kthread.h

index 86d53a3cb497ff0d587952f4b9073da2f71e98a2,fb201842c635d0497f4a24a8db81ecb91875071a..3203e36b2ee81f746b6d87c16701cdc567274ebd
--- 1/include/linux/kthread.h
--- 2/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@@ -1,9 -1,9 +1,10 @@@
+ +/* SPDX-License-Identifier: GPL-2.0 */
   #ifndef _LINUX_KTHREAD_H
   #define _LINUX_KTHREAD_H
   /* Simple interface for creating and stopping kernel threads without mess. */
   #include <linux/err.h>
   #include <linux/sched.h>
+ #include <linux/cgroup.h>
   
   __printf(4, 5)
   struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),
@@@ -76,7 -76,7 +77,7 @@@ extern int tsk_fork_get_node(struct tas
    */
   struct kthread_work;
   typedef void (*kthread_work_func_t)(struct kthread_work *work);
- -void kthread_delayed_work_timer_fn(unsigned long __data);
+ +void kthread_delayed_work_timer_fn(struct timer_list *t);
   
   enum {
         KTW_FREEZABLE           = 1 << 0,       /* freeze during suspend */
@@@ -117,8 -117,8 +118,8 @@@ struct kthread_delayed_work 
   
   #define KTHREAD_DELAYED_WORK_INIT(dwork, fn) {                                \
         .work = KTHREAD_WORK_INIT((dwork).work, (fn)),                  \
- -      .timer = __TIMER_INITIALIZER(kthread_delayed_work_timer_fn,     \
- -                                   0, (unsigned long)&(dwork),        \
+ +      .timer = __TIMER_INITIALIZER((TIMER_FUNC_TYPE)kthread_delayed_work_timer_fn,\
+ +                                   (TIMER_DATA_TYPE)&(dwork.timer),   \
                                      TIMER_IRQSAFE),                    \
         }
   
@@@ -165,8 -165,8 +166,8 @@@ extern void __kthread_init_worker(struc
         do {                                                            \
                 kthread_init_work(&(dwork)->work, (fn));                \
                 __setup_timer(&(dwork)->timer,                          \
- -                            kthread_delayed_work_timer_fn,            \
- -                            (unsigned long)(dwork),                   \
+ +                            (TIMER_FUNC_TYPE)kthread_delayed_work_timer_fn,\
+ +                            (TIMER_DATA_TYPE)&(dwork)->timer,         \
                               TIMER_IRQSAFE);                           \
         } while (0)
   
@@@ -199,4 -199,14 +200,14 @@@ bool kthread_cancel_delayed_work_sync(s
   
   void kthread_destroy_worker(struct kthread_worker *worker);
   
+ #ifdef CONFIG_BLK_CGROUP
+ void kthread_associate_blkcg(struct cgroup_subsys_state *css);
+ struct cgroup_subsys_state *kthread_blkcg(void);
+ #else
+ static inline void kthread_associate_blkcg(struct cgroup_subsys_state *css) { }
+ static inline struct cgroup_subsys_state *kthread_blkcg(void)
+ {
+       return NULL;
+ }
+ #endif
   #endif /* _LINUX_KTHREAD_H */
diff --combined include/linux/lightnvm.h

index a29a8db5cc2fcd3865ee99fb8e35520389aa8975,b7f111ff4d3b3ace306abc853f387f8dfd370926..2d1d9de06728d619d98b9b718a8e9cdb432d56de
--- 1/include/linux/lightnvm.h
--- 2/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@@ -1,4 -1,3 +1,4 @@@
+ +/* SPDX-License-Identifier: GPL-2.0 */
   #ifndef NVM_H
   #define NVM_H
   
@@@ -57,6 -56,7 +57,7 @@@ typedef int (nvm_get_l2p_tbl_fn)(struc
   typedef int (nvm_op_bb_tbl_fn)(struct nvm_dev *, struct ppa_addr, u8 *);
   typedef int (nvm_op_set_bb_fn)(struct nvm_dev *, struct ppa_addr *, int, int);
   typedef int (nvm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *);
+ typedef int (nvm_submit_io_sync_fn)(struct nvm_dev *, struct nvm_rq *);
   typedef void *(nvm_create_dma_pool_fn)(struct nvm_dev *, char *);
   typedef void (nvm_destroy_dma_pool_fn)(void *);
   typedef void *(nvm_dev_dma_alloc_fn)(struct nvm_dev *, void *, gfp_t,
@@@ -70,6 -70,7 +71,7 @@@ struct nvm_dev_ops 
         nvm_op_set_bb_fn        *set_bb_tbl;
   
         nvm_submit_io_fn        *submit_io;
+       nvm_submit_io_sync_fn   *submit_io_sync;
   
         nvm_create_dma_pool_fn  *create_dma_pool;
         nvm_destroy_dma_pool_fn *destroy_dma_pool;
@@@ -461,10 -462,9 +463,9 @@@ struct nvm_tgt_type 
   
         /* For internal use */
         struct list_head list;
+       struct module *owner;
   };
   
- extern struct nvm_tgt_type *nvm_find_target_type(const char *, int);
- 
   extern int nvm_register_tgt_type(struct nvm_tgt_type *);
   extern void nvm_unregister_tgt_type(struct nvm_tgt_type *);
   
@@@ -479,10 -479,8 +480,8 @@@ extern int nvm_set_tgt_bb_tbl(struct nv
                               int, int);
   extern int nvm_max_phys_sects(struct nvm_tgt_dev *);
   extern int nvm_submit_io(struct nvm_tgt_dev *, struct nvm_rq *);
+ extern int nvm_submit_io_sync(struct nvm_tgt_dev *, struct nvm_rq *);
   extern int nvm_erase_sync(struct nvm_tgt_dev *, struct ppa_addr *, int);
- extern int nvm_set_rqd_ppalist(struct nvm_tgt_dev *, struct nvm_rq *,
-                                       const struct ppa_addr *, int, int);
- extern void nvm_free_rqd_ppalist(struct nvm_tgt_dev *, struct nvm_rq *);
   extern int nvm_get_l2p_tbl(struct nvm_tgt_dev *, u64, u32, nvm_l2p_update_fn *,
                            void *);
   extern int nvm_get_area(struct nvm_tgt_dev *, sector_t *, sector_t);
@@@ -491,8 -489,6 +490,6 @@@ extern void nvm_end_io(struct nvm_rq *)
   extern int nvm_bb_tbl_fold(struct nvm_dev *, u8 *, int);
   extern int nvm_get_tgt_bb_tbl(struct nvm_tgt_dev *, struct ppa_addr, u8 *);
   
- extern int nvm_dev_factory(struct nvm_dev *, int flags);
- 
   extern void nvm_part_to_tgt(struct nvm_dev *, sector_t *, int);
   
   #else /* CONFIG_NVM */
diff --combined include/linux/writeback.h

index e12d92808e983c4b6e129c4304ac50f307d20f6d,e15ec14085ade47f366d6ec822daac637407ea3c..f42d85631d1711fd0085141fc1e61b0c31cd1ddd
--- 1/include/linux/writeback.h
--- 2/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@@ -1,4 -1,3 +1,4 @@@
+ +/* SPDX-License-Identifier: GPL-2.0 */
   /*
    * include/linux/writeback.h
    */
@@@ -42,28 -41,6 +42,6 @@@ enum writeback_sync_modes 
         WB_SYNC_ALL,    /* Wait on every mapping */
   };
   
- /*
-  * why some writeback work was initiated
-  */
- enum wb_reason {
-       WB_REASON_BACKGROUND,
-       WB_REASON_VMSCAN,
-       WB_REASON_SYNC,
-       WB_REASON_PERIODIC,
-       WB_REASON_LAPTOP_TIMER,
-       WB_REASON_FREE_MORE_MEM,
-       WB_REASON_FS_FREE_SPACE,
-       /*
-        * There is no bdi forker thread any more and works are done
-        * by emergency worker, however, this is TPs userland visible
-        * and we'll be exposing exactly the same information,
-        * so it has a mismatch name.
-        */
-       WB_REASON_FORKER_THREAD,
- 
-       WB_REASON_MAX,
- };
- 
   /*
    * A control structure which tells the writeback code what to do.  These are
    * always on the stack, and hence need no locking.  They are always initialised
@@@ -186,11 -163,11 +164,11 @@@ struct bdi_writeback
   void writeback_inodes_sb(struct super_block *, enum wb_reason reason);
   void writeback_inodes_sb_nr(struct super_block *, unsigned long nr,
                                                         enum wb_reason reason);
- bool try_to_writeback_inodes_sb(struct super_block *, enum wb_reason reason);
- bool try_to_writeback_inodes_sb_nr(struct super_block *, unsigned long nr,
-                                  enum wb_reason reason);
+ void try_to_writeback_inodes_sb(struct super_block *sb, enum wb_reason reason);
   void sync_inodes_sb(struct super_block *);
- void wakeup_flusher_threads(long nr_pages, enum wb_reason reason);
+ void wakeup_flusher_threads(enum wb_reason reason);
+ void wakeup_flusher_threads_bdi(struct backing_dev_info *bdi,
+                               enum wb_reason reason);
   void inode_wait_for_writeback(struct inode *inode);
   
   /* writeback.h requires fs.h; it, too, is not included from here. */
diff --combined include/scsi/scsi_device.h

index 571ddb49b92693ef6f5f4eebc11985f51765a8ff,6f0f1e242e236da26c716887eb7d64519c721fc7..73af87dfbff8d6915b79777d5e483ec4af25a337
--- 1/include/scsi/scsi_device.h
--- 2/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@@ -1,4 -1,3 +1,4 @@@
+ +/* SPDX-License-Identifier: GPL-2.0 */
   #ifndef _SCSI_SCSI_DEVICE_H
   #define _SCSI_SCSI_DEVICE_H
   
@@@ -193,7 -192,6 +193,7 @@@ struct scsi_device 
         unsigned no_dif:1;      /* T10 PI (DIF) should be disabled */
         unsigned broken_fua:1;          /* Don't set FUA bit */
         unsigned lun_in_cdb:1;          /* Store LUN bits in CDB[1] */
+ +      unsigned unmap_limit_for_ws:1;  /* Use the UNMAP limit for WRITE SAME */
   
         atomic_t disk_events_disable_depth; /* disable depth for disk events */
   
@@@ -221,6 -219,7 +221,7 @@@
         unsigned char           access_state;
         struct mutex            state_mutex;
         enum scsi_device_state sdev_state;
+       struct task_struct      *quiesced_by;
         unsigned long           sdev_data[0];
   } __attribute__((aligned(sizeof(unsigned long))));
   
diff --combined include/trace/events/writeback.h

index 2e1fa7910306d794abfabf23223d37206cb2e6b3,19a0ea08e098984c25b87707a24e37efcc76f82b..32db72c7c055fa2f566f70d89cce51402f213b28
--- 1/include/trace/events/writeback.h
--- 2/include/trace/events/writeback.h
+++ b/include/trace/events/writeback.h
@@@ -1,4 -1,3 +1,4 @@@
+ +/* SPDX-License-Identifier: GPL-2.0 */
   #undef TRACE_SYSTEM
   #define TRACE_SYSTEM writeback
   
@@@ -287,7 -286,6 +287,6 @@@ DEFINE_EVENT(writeback_class, name, 
         TP_PROTO(struct bdi_writeback *wb), \
         TP_ARGS(wb))
   
- DEFINE_WRITEBACK_EVENT(writeback_nowork);
   DEFINE_WRITEBACK_EVENT(writeback_wake_background);
   
   TRACE_EVENT(writeback_bdi_register,
diff --combined kernel/kthread.c

index ba3992c8c3753bcc0785ecf998e457d21c013873,8dbe2454cb1deed1447450ea301e2b7ac481ea9d..8af313081b0d9a7f626f6b3b496119737e9e89a6
--- 1/kernel/kthread.c
--- 2/kernel/kthread.c
+++ b/kernel/kthread.c
@@@ -20,7 -20,6 +20,6 @@@
   #include <linux/freezer.h>
   #include <linux/ptrace.h>
   #include <linux/uaccess.h>
- #include <linux/cgroup.h>
   #include <trace/events/sched.h>
   
   static DEFINE_SPINLOCK(kthread_create_lock);
@@@ -47,6 -46,9 +46,9 @@@ struct kthread 
         void *data;
         struct completion parked;
         struct completion exited;
+ #ifdef CONFIG_BLK_CGROUP
+       struct cgroup_subsys_state *blkcg_css;
+ #endif
   };
   
   enum KTHREAD_BITS {
@@@ -74,11 -76,17 +76,17 @@@ static inline struct kthread *to_kthrea
   
   void free_kthread_struct(struct task_struct *k)
   {
+       struct kthread *kthread;
+ 
         /*
          * Can be NULL if this kthread was created by kernel_thread()
          * or if kmalloc() in kthread() failed.
          */
-       kfree(to_kthread(k));
+       kthread = to_kthread(k);
+ #ifdef CONFIG_BLK_CGROUP
+       WARN_ON_ONCE(kthread && kthread->blkcg_css);
+ #endif
+       kfree(kthread);
   }
   
   /**
@@@ -196,7 -204,7 +204,7 @@@ static int kthread(void *_create
         struct kthread *self;
         int ret;
   
-       self = kmalloc(sizeof(*self), GFP_KERNEL);
+       self = kzalloc(sizeof(*self), GFP_KERNEL);
         set_kthread_struct(self);
   
         /* If user was SIGKILLed, I release the structure. */
@@@ -212,7 -220,6 +220,6 @@@
                 do_exit(-ENOMEM);
         }
   
-       self->flags = 0;
         self->data = data;
         init_completion(&self->exited);
         init_completion(&self->parked);
@@@ -798,14 -805,15 +805,14 @@@ EXPORT_SYMBOL_GPL(kthread_queue_work)
   /**
    * kthread_delayed_work_timer_fn - callback that queues the associated kthread
    *    delayed work when the timer expires.
- - * @__data: pointer to the data associated with the timer
+ + * @t: pointer to the expired timer
    *
    * The format of the function is defined by struct timer_list.
    * It should have been called from irqsafe timer with irq already off.
    */
- -void kthread_delayed_work_timer_fn(unsigned long __data)
+ +void kthread_delayed_work_timer_fn(struct timer_list *t)
   {
- -      struct kthread_delayed_work *dwork =
- -              (struct kthread_delayed_work *)__data;
+ +      struct kthread_delayed_work *dwork = from_timer(dwork, t, timer);
         struct kthread_work *work = &dwork->work;
         struct kthread_worker *worker = work->worker;
   
@@@ -836,7 -844,8 +843,7 @@@ void __kthread_queue_delayed_work(struc
         struct timer_list *timer = &dwork->timer;
         struct kthread_work *work = &dwork->work;
   
- -      WARN_ON_ONCE(timer->function != kthread_delayed_work_timer_fn ||
- -                   timer->data != (unsigned long)dwork);
+ +      WARN_ON_ONCE(timer->function != (TIMER_FUNC_TYPE)kthread_delayed_work_timer_fn);
   
         /*
          * If @delay is 0, queue @dwork->work immediately.  This is for
@@@ -1152,3 -1161,54 +1159,54 @@@ void kthread_destroy_worker(struct kthr
         kfree(worker);
   }
   EXPORT_SYMBOL(kthread_destroy_worker);
+ 
+ #ifdef CONFIG_BLK_CGROUP
+ /**
+  * kthread_associate_blkcg - associate blkcg to current kthread
+  * @css: the cgroup info
+  *
+  * Current thread must be a kthread. The thread is running jobs on behalf of
+  * other threads. In some cases, we expect the jobs attach cgroup info of
+  * original threads instead of that of current thread. This function stores
+  * original thread's cgroup info in current kthread context for later
+  * retrieval.
+  */
+ void kthread_associate_blkcg(struct cgroup_subsys_state *css)
+ {
+       struct kthread *kthread;
+ 
+       if (!(current->flags & PF_KTHREAD))
+               return;
+       kthread = to_kthread(current);
+       if (!kthread)
+               return;
+ 
+       if (kthread->blkcg_css) {
+               css_put(kthread->blkcg_css);
+               kthread->blkcg_css = NULL;
+       }
+       if (css) {
+               css_get(css);
+               kthread->blkcg_css = css;
+       }
+ }
+ EXPORT_SYMBOL(kthread_associate_blkcg);
+ 
+ /**
+  * kthread_blkcg - get associated blkcg css of current kthread
+  *
+  * Current thread must be a kthread.
+  */
+ struct cgroup_subsys_state *kthread_blkcg(void)
+ {
+       struct kthread *kthread;
+ 
+       if (current->flags & PF_KTHREAD) {
+               kthread = to_kthread(current);
+               if (kthread)
+                       return kthread->blkcg_css;
+       }
+       return NULL;
+ }
+ EXPORT_SYMBOL(kthread_blkcg);
+ #endif
diff --combined kernel/sysctl.c

index d9c31bc2eaea2c95a7a7be5a5321700b84d8f640,a5dd8d82c25385f2de8a4753ce49cb24777351dd..9576bd582d4a870f657d974e1e3beaca6ea635fa
--- 1/kernel/sysctl.c
--- 2/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@@ -367,8 -367,7 +367,8 @@@ static struct ctl_table kern_table[] = 
                 .data           = &sysctl_sched_time_avg,
                 .maxlen         = sizeof(unsigned int),
                 .mode           = 0644,
- -              .proc_handler   = proc_dointvec,
+ +              .proc_handler   = proc_dointvec_minmax,
+ +              .extra1         = &one,
         },
   #ifdef CONFIG_SCHEDSTATS
         {
@@@ -872,9 -871,9 +872,9 @@@
   #if defined(CONFIG_LOCKUP_DETECTOR)
         {
                 .procname       = "watchdog",
- -              .data           = &watchdog_user_enabled,
- -              .maxlen         = sizeof (int),
- -              .mode           = 0644,
+ +              .data           = &watchdog_user_enabled,
+ +              .maxlen         = sizeof(int),
+ +              .mode           = 0644,
                 .proc_handler   = proc_watchdog,
                 .extra1         = &zero,
                 .extra2         = &one,
@@@ -890,12 -889,16 +890,12 @@@
         },
         {
                 .procname       = "nmi_watchdog",
- -              .data           = &nmi_watchdog_enabled,
- -              .maxlen         = sizeof (int),
- -              .mode           = 0644,
+ +              .data           = &nmi_watchdog_user_enabled,
+ +              .maxlen         = sizeof(int),
+ +              .mode           = NMI_WATCHDOG_SYSCTL_PERM,
                 .proc_handler   = proc_nmi_watchdog,
                 .extra1         = &zero,
- -#if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR)
                 .extra2         = &one,
- -#else
- -              .extra2         = &zero,
- -#endif
         },
         {
                 .procname       = "watchdog_cpumask",
@@@ -907,9 -910,9 +907,9 @@@
   #ifdef CONFIG_SOFTLOCKUP_DETECTOR
         {
                 .procname       = "soft_watchdog",
- -              .data           = &soft_watchdog_enabled,
- -              .maxlen         = sizeof (int),
- -              .mode           = 0644,
+ +              .data           = &soft_watchdog_user_enabled,
+ +              .maxlen         = sizeof(int),
+ +              .mode           = 0644,
                 .proc_handler   = proc_soft_watchdog,
                 .extra1         = &zero,
                 .extra2         = &one,
@@@ -1341,11 -1344,6 +1341,6 @@@ static struct ctl_table vm_table[] = 
                 .proc_handler   = dirtytime_interval_handler,
                 .extra1         = &zero,
         },
-       {
-               .procname       = "nr_pdflush_threads",
-               .mode           = 0444 /* read-only */,
-               .proc_handler   = pdflush_proc_obsolete,
-       },
         {
                 .procname       = "swappiness",
                 .data           = &vm_swappiness,
@@@ -2182,6 -2180,8 +2177,6 @@@ static int do_proc_douintvec_conv(unsig
                                   int write, void *data)
   {
         if (write) {
- -              if (*lvalp > UINT_MAX)
- -                      return -EINVAL;
                 if (*lvalp > UINT_MAX)
                         return -EINVAL;
                 *valp = *lvalp;
diff --combined mm/page_io.c

index 5d882de3fbfd2bf0f94a35d004205ceddc87b817,ff04de630c465538b65bfd0f14dcf4ec1ea5548b..cd52b9cc169bc3cf4821778af2bc159400ae9bb3
--- 1/mm/page_io.c
--- 2/mm/page_io.c
+++ b/mm/page_io.c
@@@ -1,4 -1,3 +1,4 @@@
+ +// SPDX-License-Identifier: GPL-2.0
   /*
    *  linux/mm/page_io.c
    *
@@@ -408,7 -407,7 +408,7 @@@ int swap_readpage(struct page *page, bo
                 if (!READ_ONCE(bio->bi_private))
                         break;
   
-               if (!blk_mq_poll(disk->queue, qc))
+               if (!blk_poll(disk->queue, qc))
                         break;
         }
         __set_current_state(TASK_RUNNING);
diff --combined mm/vmscan.c

index eb2f0315b8c0e9549b98e51c2d5e5cd45b9920e1,42a7fdd52d8778e1025197b29151bcb36a4499f9..15b483ef6440d3a45ba69816159f7bf7ecc9915b
--- 1/mm/vmscan.c
--- 2/mm/vmscan.c
+++ b/mm/vmscan.c
@@@ -1,4 -1,3 +1,4 @@@
+ +// SPDX-License-Identifier: GPL-2.0
   /*
    *  linux/mm/vmscan.c
    *
@@@ -1868,7 -1867,7 +1868,7 @@@ shrink_inactive_list(unsigned long nr_t
                  * also allow kswapd to start writing pages during reclaim.
                  */
                 if (stat.nr_unqueued_dirty == nr_taken) {
-                       wakeup_flusher_threads(0, WB_REASON_VMSCAN);
+                       wakeup_flusher_threads(WB_REASON_VMSCAN);
                         set_bit(PGDAT_DIRTY, &pgdat->flags);
                 }
author	Linus Torvalds <[email protected]>
	Tue, 14 Nov 2017 23:32:19 +0000 (15:32 -0800)
committer	Linus Torvalds <[email protected]>
	Tue, 14 Nov 2017 23:32:19 +0000 (15:32 -0800)
		1	2
MAINTAINERS	patch \|	diff1 \|	diff2 \|	blob \| history
block/bio.c	patch \|	diff1 \|	diff2 \|	blob \| history
block/blk-lib.c	patch \|	diff1 \|	diff2 \|	blob \| history
block/blk-mq-debugfs.c	patch \|	diff1 \|	diff2 \|	blob \| history
block/blk-mq-tag.h	patch \|	diff1 \|	diff2 \|	blob \| history
block/blk-mq.h	patch \|	diff1 \|	diff2 \|	blob \| history
block/blk-throttle.c	patch \|	diff1 \|	diff2 \|	blob \| history
block/blk-wbt.c	patch \|	diff1 \|	diff2 \|	blob \| history
block/blk.h	patch \|	diff1 \|	diff2 \|	blob \| history
block/genhd.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/block/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/block/nbd.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/block/null_blk.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/block/paride/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/block/skd_main.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/cdrom/Makefile	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/ide/ide-pm.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/md/bcache/alloc.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/md/bcache/bcache.h	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/md/bcache/btree.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/md/bcache/btree.h	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/md/bcache/closure.h	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/md/bcache/request.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/md/bcache/sysfs.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/md/bcache/util.h	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/md/bcache/writeback.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/md/bcache/writeback.h	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/md/dm.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/nvme/host/Makefile	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/nvme/host/core.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/nvme/host/fc.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/nvme/host/pci.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/nvme/host/rdma.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/nvme/target/core.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/nvme/target/nvmet.h	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/scsi/scsi_lib.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/scsi/sg.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/block_dev.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/buffer.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/direct-io.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/iomap.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/sync.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/backing-dev-defs.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/backing-dev.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/blk-cgroup.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/blk-mq.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/blk_types.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/blkdev.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/buffer_head.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/elevator.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/genhd.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/kthread.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/lightnvm.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/writeback.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/scsi/scsi_device.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/trace/events/writeback.h	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/kthread.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/sysctl.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/page_io.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/vmscan.c	patch \|	diff1 \|	diff2 \|	blob \| history