]> Git Repo - J-linux.git/commitdiff
Merge tag 'vfio-v6.2-rc1' of https://github.com/awilliam/linux-vfio
authorLinus Torvalds <[email protected]>
Thu, 15 Dec 2022 21:12:15 +0000 (13:12 -0800)
committerLinus Torvalds <[email protected]>
Thu, 15 Dec 2022 21:12:15 +0000 (13:12 -0800)
Pull VFIO updates from Alex Williamson:

 - Replace deprecated git://github.com link in MAINTAINERS (Palmer
   Dabbelt)

 - Simplify vfio/mlx5 with module_pci_driver() helper (Shang XiaoJing)

 - Drop unnecessary buffer from ACPI call (Rafael Mendonca)

 - Correct latent missing include issue in iova-bitmap and fix support
   for unaligned bitmaps. Follow-up with better fix through refactor
   (Joao Martins)

 - Rework ccw mdev driver to split private data from parent structure,
   better aligning with the mdev lifecycle and allowing us to remove a
   temporary workaround (Eric Farman)

 - Add an interface to get an estimated migration data size for a
   device, allowing userspace to make informed decisions, ex. more
   accurately predicting VM downtime (Yishai Hadas)

 - Fix minor typo in vfio/mlx5 array declaration (Yishai Hadas)

 - Simplify module and Kconfig through consolidating SPAPR/EEH code and
   config options and folding virqfd module into main vfio module (Jason
   Gunthorpe)

 - Fix error path from device_register() across all vfio mdev and sample
   drivers (Alex Williamson)

 - Define migration pre-copy interface and implement for vfio/mlx5
   devices, allowing portions of the device state to be saved while the
   device continues operation, towards reducing the stop-copy state size
   (Jason Gunthorpe, Yishai Hadas, Shay Drory)

 - Implement pre-copy for hisi_acc devices (Shameer Kolothum)

 - Fixes to mdpy mdev driver remove path and error path on probe (Shang
   XiaoJing)

 - vfio/mlx5 fixes for incorrect return after copy_to_user() fault and
   incorrect buffer freeing (Dan Carpenter)

* tag 'vfio-v6.2-rc1' of https://github.com/awilliam/linux-vfio: (42 commits)
  vfio/mlx5: error pointer dereference in error handling
  vfio/mlx5: fix error code in mlx5vf_precopy_ioctl()
  samples: vfio-mdev: Fix missing pci_disable_device() in mdpy_fb_probe()
  hisi_acc_vfio_pci: Enable PRE_COPY flag
  hisi_acc_vfio_pci: Move the dev compatibility tests for early check
  hisi_acc_vfio_pci: Introduce support for PRE_COPY state transitions
  hisi_acc_vfio_pci: Add support for precopy IOCTL
  vfio/mlx5: Enable MIGRATION_PRE_COPY flag
  vfio/mlx5: Fallback to STOP_COPY upon specific PRE_COPY error
  vfio/mlx5: Introduce multiple loads
  vfio/mlx5: Consider temporary end of stream as part of PRE_COPY
  vfio/mlx5: Introduce vfio precopy ioctl implementation
  vfio/mlx5: Introduce SW headers for migration states
  vfio/mlx5: Introduce device transitions of PRE_COPY
  vfio/mlx5: Refactor to use queue based data chunks
  vfio/mlx5: Refactor migration file state
  vfio/mlx5: Refactor MKEY usage
  vfio/mlx5: Refactor PD usage
  vfio/mlx5: Enforce a single SAVE command at a time
  vfio: Extend the device migration protocol with PRE_COPY
  ...

17 files changed:
1  2 
MAINTAINERS
drivers/gpu/drm/i915/gvt/kvmgt.c
drivers/s390/cio/vfio_ccw_fsm.c
drivers/s390/cio/vfio_ccw_ops.c
drivers/s390/crypto/vfio_ap_ops.c
drivers/vfio/Kconfig
drivers/vfio/Makefile
drivers/vfio/fsl-mc/vfio_fsl_mc.c
drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
drivers/vfio/pci/mlx5/main.c
drivers/vfio/pci/vfio_pci_core.c
drivers/vfio/platform/vfio_amba.c
drivers/vfio/platform/vfio_platform.c
drivers/vfio/vfio.h
drivers/vfio/vfio_main.c
include/linux/mlx5/mlx5_ifc.h
include/linux/vfio.h

diff --combined MAINTAINERS
index 096ae475e21cd95018290cb9c83e17898a8b5ba2,daa6a7a755ecefef221371d14af9450cc11957eb..4d75ffe9affaa6de590b595965fb8196274e5882
@@@ -312,13 -312,6 +312,13 @@@ L:       [email protected]
  S:    Maintained
  F:    drivers/counter/104-quad-8.c
  
 +ACCES IDIO-16 GPIO LIBRARY
 +M:    William Breathitt Gray <[email protected]>
 +L:    [email protected]
 +S:    Maintained
 +F:    drivers/gpio/gpio-idio-16.c
 +F:    drivers/gpio/gpio-idio-16.h
 +
  ACCES PCI-IDIO-16 GPIO DRIVER
  M:    William Breathitt Gray <[email protected]>
  L:    [email protected]
@@@ -782,24 -775,6 +782,24 @@@ T:       git git://linuxtv.org/media_tree.gi
  F:    Documentation/devicetree/bindings/media/allwinner,sun4i-a10-csi.yaml
  F:    drivers/media/platform/sunxi/sun4i-csi/
  
 +ALLWINNER A31 CSI DRIVER
 +M:    Yong Deng <[email protected]>
 +M:    Paul Kocialkowski <[email protected]>
 +L:    [email protected]
 +S:    Maintained
 +T:    git git://linuxtv.org/media_tree.git
 +F:    Documentation/devicetree/bindings/media/allwinner,sun6i-a31-csi.yaml
 +F:    drivers/media/platform/sunxi/sun6i-csi/
 +
 +ALLWINNER A31 ISP DRIVER
 +M:    Paul Kocialkowski <[email protected]>
 +L:    [email protected]
 +S:    Maintained
 +T:    git git://linuxtv.org/media_tree.git
 +F:    Documentation/devicetree/bindings/media/allwinner,sun6i-a31-isp.yaml
 +F:    drivers/staging/media/sunxi/sun6i-isp/
 +F:    drivers/staging/media/sunxi/sun6i-isp/uapi/sun6i-isp-config.h
 +
  ALLWINNER A31 MIPI CSI-2 BRIDGE DRIVER
  M:    Paul Kocialkowski <[email protected]>
  L:    [email protected]
@@@ -1118,16 -1093,6 +1118,16 @@@ S:    Maintaine
  F:    Documentation/hid/amd-sfh*
  F:    drivers/hid/amd-sfh-hid/
  
 +AMLOGIC DDR PMU DRIVER
 +M:    Jiucheng Xu <[email protected]>
 +L:    [email protected]
 +S:    Supported
 +W:    http://www.amlogic.com
 +F:    Documentation/admin-guide/perf/meson-ddr-pmu.rst
 +F:    Documentation/devicetree/bindings/perf/amlogic,g12-ddr-pmu.yaml
 +F:    drivers/perf/amlogic/
 +F:    include/soc/amlogic/
 +
  AMPHION VPU CODEC V4L2 DRIVER
  M:    Ming Qian <[email protected]>
  M:    Shijie Qin <[email protected]>
@@@ -1720,7 -1685,7 +1720,7 @@@ M:      Miquel Raynal <miquel.raynal@bootlin
  M:    Naga Sureshkumar Relli <[email protected]>
  L:    [email protected] (moderated for non-subscribers)
  S:    Maintained
 -F:    Documentation/devicetree/bindings/memory-controllers/arm,pl353-smc.yaml
 +F:    Documentation/devicetree/bindings/memory-controllers/arm,pl35x-smc.yaml
  F:    drivers/memory/pl353-smc.c
  
  ARM PRIMECELL CLCD PL110 DRIVER
@@@ -1932,14 -1897,12 +1932,14 @@@ T:   git https://github.com/AsahiLinux/li
  F:    Documentation/devicetree/bindings/arm/apple.yaml
  F:    Documentation/devicetree/bindings/arm/apple/*
  F:    Documentation/devicetree/bindings/clock/apple,nco.yaml
 +F:    Documentation/devicetree/bindings/cpufreq/apple,cluster-cpufreq.yaml
  F:    Documentation/devicetree/bindings/dma/apple,admac.yaml
  F:    Documentation/devicetree/bindings/i2c/apple,i2c.yaml
  F:    Documentation/devicetree/bindings/interrupt-controller/apple,*
  F:    Documentation/devicetree/bindings/iommu/apple,dart.yaml
  F:    Documentation/devicetree/bindings/iommu/apple,sart.yaml
  F:    Documentation/devicetree/bindings/mailbox/apple,mailbox.yaml
 +F:    Documentation/devicetree/bindings/net/bluetooth/brcm,bcm4377-bluetooth.yaml
  F:    Documentation/devicetree/bindings/nvme/apple,nvme-ans.yaml
  F:    Documentation/devicetree/bindings/nvmem/apple,efuses.yaml
  F:    Documentation/devicetree/bindings/pci/apple,pcie.yaml
@@@ -1947,9 -1910,7 +1947,9 @@@ F:      Documentation/devicetree/bindings/pi
  F:    Documentation/devicetree/bindings/power/apple*
  F:    Documentation/devicetree/bindings/watchdog/apple,wdt.yaml
  F:    arch/arm64/boot/dts/apple/
 +F:    drivers/bluetooth/hci_bcm4377.c
  F:    drivers/clk/clk-apple-nco.c
 +F:    drivers/cpufreq/apple-soc-cpufreq.c
  F:    drivers/dma/apple-admac.c
  F:    drivers/i2c/busses/i2c-pasemi-core.c
  F:    drivers/i2c/busses/i2c-pasemi-platform.c
@@@ -2236,7 -2197,7 +2236,7 @@@ M:      Wei Xu <[email protected]
  L:    [email protected] (moderated for non-subscribers)
  S:    Supported
  W:    http://www.hisilicon.com
 -T:    git git://github.com/hisilicon/linux-hisi.git
 +T:    git https://github.com/hisilicon/linux-hisi.git
  F:    arch/arm/boot/dts/hi3*
  F:    arch/arm/boot/dts/hip*
  F:    arch/arm/boot/dts/hisi*
@@@ -2311,6 -2272,8 +2311,6 @@@ F:      drivers/clocksource/timer-ixp4xx.
  F:    drivers/crypto/ixp4xx_crypto.c
  F:    drivers/gpio/gpio-ixp4xx.c
  F:    drivers/irqchip/irq-ixp4xx.c
 -F:    include/linux/irqchip/irq-ixp4xx.h
 -F:    include/linux/platform_data/timer-ixp4xx.h
  
  ARM/INTEL KEEMBAY ARCHITECTURE
  M:    Paul J. Murphy <[email protected]>
@@@ -2378,8 -2341,6 +2378,8 @@@ M:      Gregory Clement <gregory.clement@boo
  L:    [email protected] (moderated for non-subscribers)
  S:    Maintained
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/gclement/mvebu.git
 +F:    Documentation/devicetree/bindings/arm/marvell/marvell,dove.txt
 +F:    Documentation/devicetree/bindings/arm/marvell/marvell,orion5x.txt
  F:    Documentation/devicetree/bindings/soc/dove/
  F:    arch/arm/boot/dts/dove*
  F:    arch/arm/boot/dts/orion5x*
@@@ -2396,7 -2357,6 +2396,7 @@@ M:      Sebastian Hesselbarth <sebastian.hes
  L:    [email protected] (moderated for non-subscribers)
  S:    Maintained
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/gclement/mvebu.git
 +F:    Documentation/devicetree/bindings/arm/marvell/
  F:    arch/arm/boot/dts/armada*
  F:    arch/arm/boot/dts/kirkwood*
  F:    arch/arm/configs/mvebu_*_defconfig
@@@ -2479,7 -2439,6 +2479,7 @@@ L:      [email protected]
  S:    Supported
  T:    git git://github.com/microchip-ung/linux-upstream.git
  F:    arch/arm64/boot/dts/microchip/
 +F:    drivers/net/ethernet/microchip/vcap/
  F:    drivers/pinctrl/pinctrl-microchip-sgpio.c
  N:    sparx5
  
@@@ -2661,7 -2620,7 +2661,7 @@@ W:      http://www.armlinux.org.uk
  ARM/QUALCOMM SUPPORT
  M:    Andy Gross <[email protected]>
  M:    Bjorn Andersson <[email protected]>
 -R:    Konrad Dybcio <konrad.dybcio@somainline.org>
 +R:    Konrad Dybcio <konrad.dybcio@linaro.org>
  L:    [email protected]
  S:    Maintained
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/qcom/linux.git
@@@ -2732,7 -2691,7 +2732,7 @@@ F:      arch/arm/boot/dts/rtd
  F:    arch/arm/mach-realtek/
  F:    arch/arm64/boot/dts/realtek/
  
 -ARM/RENESAS ARCHITECTURE
 +ARM/RISC-V/RENESAS ARCHITECTURE
  M:    Geert Uytterhoeven <[email protected]>
  M:    Magnus Damm <[email protected]>
  L:    [email protected]
@@@ -2740,6 -2699,7 +2740,6 @@@ S:      Supporte
  Q:    http://patchwork.kernel.org/project/linux-renesas-soc/list/
  C:    irc://irc.libera.chat/renesas-soc
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/geert/renesas-devel.git next
 -F:    Documentation/devicetree/bindings/arm/renesas.yaml
  F:    Documentation/devicetree/bindings/hwinfo/renesas,prr.yaml
  F:    Documentation/devicetree/bindings/soc/renesas/
  F:    arch/arm/boot/dts/emev2*
@@@ -2753,7 -2713,6 +2753,7 @@@ F:      arch/arm/configs/shmobile_defconfi
  F:    arch/arm/include/debug/renesas-scif.S
  F:    arch/arm/mach-shmobile/
  F:    arch/arm64/boot/dts/renesas/
 +F:    arch/riscv/boot/dts/renesas/
  F:    drivers/soc/renesas/
  F:    include/linux/soc/renesas/
  
@@@ -4850,7 -4809,7 +4850,7 @@@ R:      Jeff Layton <[email protected]
  L:    [email protected]
  S:    Supported
  W:    http://ceph.com/
 -T:    git git://github.com/ceph/ceph-client.git
 +T:    git https://github.com/ceph/ceph-client.git
  F:    include/linux/ceph/
  F:    include/linux/crush/
  F:    net/ceph/
@@@ -4862,7 -4821,7 +4862,7 @@@ R:      Jeff Layton <[email protected]
  L:    [email protected]
  S:    Supported
  W:    http://ceph.com/
 -T:    git git://github.com/ceph/ceph-client.git
 +T:    git https://github.com/ceph/ceph-client.git
  F:    Documentation/filesystems/ceph.rst
  F:    fs/ceph/
  
@@@ -4952,7 -4911,7 +4952,7 @@@ F:      drivers/platform/chrome
  
  CHROMEOS EC CODEC DRIVER
  M:    Cheng-Yi Chiang <[email protected]>
 -M:    Tzung-Bi Shih <tzungbi@google.com>
 +M:    Tzung-Bi Shih <tzungbi@kernel.org>
  R:    Guenter Roeck <[email protected]>
  L:    [email protected]
  S:    Maintained
@@@ -4982,12 -4941,6 +4982,12 @@@ S:    Maintaine
  F:    drivers/platform/chrome/cros_usbpd_notify.c
  F:    include/linux/platform_data/cros_usbpd_notify.h
  
 +CHROMEOS HPS DRIVER
 +M:    Dan Callaghan <[email protected]>
 +R:    Sami Kyöstilä <[email protected]>
 +S:    Maintained
 +F:    drivers/platform/chrome/cros_hps_i2c.c
 +
  CHRONTEL CH7322 CEC DRIVER
  M:    Joe Tessler <[email protected]>
  L:    [email protected]
@@@ -5346,7 -5299,7 +5346,7 @@@ M:      Johannes Weiner <[email protected]
  M:    Michal Hocko <[email protected]>
  M:    Roman Gushchin <[email protected]>
  M:    Shakeel Butt <[email protected]>
 -R:    Muchun Song <[email protected]>
 +R:    Muchun Song <[email protected]>
  L:    [email protected]
  L:    [email protected]
  S:    Maintained
@@@ -5549,6 -5502,14 +5549,6 @@@ M:     Jaya Kumar <[email protected]
  S:    Maintained
  F:    sound/pci/cs5535audio/
  
 -CSI DRIVERS FOR ALLWINNER V3s
 -M:    Yong Deng <[email protected]>
 -L:    [email protected]
 -S:    Maintained
 -T:    git git://linuxtv.org/media_tree.git
 -F:    Documentation/devicetree/bindings/media/allwinner,sun6i-a31-csi.yaml
 -F:    drivers/media/platform/sunxi/sun6i-csi/
 -
  CTU CAN FD DRIVER
  M:    Pavel Pisa <[email protected]>
  M:    Ondrej Ille <[email protected]>
@@@ -5624,6 -5585,8 +5624,6 @@@ F:      drivers/scsi/cxgbi/cxgb3
  
  CXGB4 CRYPTO DRIVER (chcr)
  M:    Ayush Sawal <[email protected]>
 -M:    Vinay Kumar Yadav <[email protected]>
 -M:    Rohit Maheshwari <[email protected]>
  L:    [email protected]
  S:    Supported
  W:    http://www.chelsio.com
@@@ -5631,6 -5594,8 +5631,6 @@@ F:      drivers/crypto/chelsi
  
  CXGB4 INLINE CRYPTO DRIVER
  M:    Ayush Sawal <[email protected]>
 -M:    Vinay Kumar Yadav <[email protected]>
 -M:    Rohit Maheshwari <[email protected]>
  L:    [email protected]
  S:    Supported
  W:    http://www.chelsio.com
  S:    Maintained
  F:    drivers/platform/x86/dell/dell-wmi-descriptor.c
  
 +DELL WMI DDV DRIVER
 +M:    Armin Wolf <[email protected]>
 +S:    Maintained
 +F:    Documentation/ABI/testing/debugfs-dell-wmi-ddv
 +F:    Documentation/ABI/testing/sysfs-platform-dell-wmi-ddv
 +F:    drivers/platform/x86/dell/dell-wmi-ddv.c
 +
  DELL WMI SYSMAN DRIVER
  M:    Divya Bharathi <[email protected]>
  M:    Prasanth Ksr <[email protected]>
@@@ -6075,12 -6033,11 +6075,12 @@@ F:   include/net/devlink.
  F:    include/uapi/linux/devlink.h
  F:    net/core/devlink.c
  
 -DH ELECTRONICS IMX6 DHCOM BOARD SUPPORT
 +DH ELECTRONICS IMX6 DHCOM/DHCOR BOARD SUPPORT
  M:    Christoph Niedermaier <[email protected]>
  L:    [email protected]
  S:    Maintained
  F:    arch/arm/boot/dts/imx6*-dhcom-*
 +F:    arch/arm/boot/dts/imx6*-dhcor-*
  
  DH ELECTRONICS STM32MP1 DHCOM/DHCOR BOARD SUPPORT
  M:    Marek Vasut <[email protected]>
@@@ -6372,7 -6329,6 +6372,7 @@@ F:      drivers/net/ethernet/freescale/dpaa2
  F:    drivers/net/ethernet/freescale/dpaa2/Makefile
  F:    drivers/net/ethernet/freescale/dpaa2/dpaa2-eth*
  F:    drivers/net/ethernet/freescale/dpaa2/dpaa2-mac*
 +F:    drivers/net/ethernet/freescale/dpaa2/dpaa2-xsk*
  F:    drivers/net/ethernet/freescale/dpaa2/dpkg.h
  F:    drivers/net/ethernet/freescale/dpaa2/dpmac*
  F:    drivers/net/ethernet/freescale/dpaa2/dpni*
@@@ -6550,12 -6506,6 +6550,12 @@@ S:    Orphan / Obsolet
  F:    drivers/gpu/drm/i810/
  F:    include/uapi/drm/i810_drm.h
  
 +DRM DRIVER FOR JADARD JD9365DA-H3 MIPI-DSI LCD PANELS
 +M:    Jagan Teki <[email protected]>
 +S:    Maintained
 +F:    Documentation/devicetree/bindings/display/panel/jadard,jd9365da-h3.yaml
 +F:    drivers/gpu/drm/panel/panel-jadard-jd9365da-h3.c
 +
  DRM DRIVER FOR LOGICVC DISPLAY CONTROLLER
  M:    Paul Kocialkowski <[email protected]>
  S:    Supported
  S:    Maintained
  T:    git git://anongit.freedesktop.org/drm/drm-misc
  F:    drivers/gpu/drm/drm_aperture.c
 +F:    drivers/gpu/drm/tiny/ofdrm.c
  F:    drivers/gpu/drm/tiny/simpledrm.c
  F:    drivers/video/aperture.c
 +F:    drivers/video/nomodeset.c
  F:    include/drm/drm_aperture.h
  F:    include/linux/aperture.h
 +F:    include/video/nomodeset.h
  
  DRM DRIVER FOR SIS VIDEO CARDS
  S:    Orphan / Obsolete
@@@ -6880,15 -6827,6 +6880,15 @@@ F:    include/drm/drm
  F:    include/linux/vga*
  F:    include/uapi/drm/drm*
  
 +DRM COMPUTE ACCELERATORS DRIVERS AND FRAMEWORK
 +M:    Oded Gabbay <[email protected]>
 +L:    [email protected]
 +S:    Maintained
 +C:    irc://irc.oftc.net/dri-devel
 +T:    git https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/accel.git
 +F:    Documentation/accel/
 +F:    drivers/accel/
 +
  DRM DRIVERS FOR ALLWINNER A10
  M:    Maxime Ripard <[email protected]>
  M:    Chen-Yu Tsai <[email protected]>
@@@ -7177,7 -7115,7 +7177,7 @@@ F:      drivers/gpu/drm/ttm
  F:    include/drm/ttm/
  
  DRM GPU SCHEDULER
 -M:    Andrey Grodzovsky <andrey.grodzovsky@amd.com>
 +M:    Luben Tuikov <luben.tuikov@amd.com>
  L:    [email protected]
  S:    Maintained
  T:    git git://anongit.freedesktop.org/drm/drm-misc
@@@ -7425,9 -7363,9 +7425,9 @@@ F:      drivers/edac/thunderx_edac
  
  EDAC-CORE
  M:    Borislav Petkov <[email protected]>
 -M:    Mauro Carvalho Chehab <[email protected]>
  M:    Tony Luck <[email protected]>
  R:    James Morse <[email protected]>
 +R:    Mauro Carvalho Chehab <[email protected]>
  R:    Robert Richter <[email protected]>
  L:    [email protected]
  S:    Supported
@@@ -7544,7 -7482,8 +7544,7 @@@ S:      Maintaine
  F:    drivers/edac/pnd2_edac.[ch]
  
  EDAC-QCOM
 -M:    Channagoud Kadabi <[email protected]>
 -M:    Venkata Narendra Kumar Gutta <[email protected]>
 +M:    Manivannan Sadhasivam <[email protected]>
  L:    [email protected]
  L:    [email protected]
  S:    Maintained
@@@ -7745,7 -7684,6 +7745,7 @@@ ETAS ES58X CAN/USB DRIVE
  M:    Vincent Mailhol <[email protected]>
  L:    [email protected]
  S:    Maintained
 +F:    Documentation/networking/devlink/etas_es58x.rst
  F:    drivers/net/can/usb/etas_es58x/
  
  ETHERNET BRIDGE
@@@ -7851,6 -7789,7 +7851,6 @@@ F:      Documentation/admin-guide/efi-stub.r
  F:    arch/*/include/asm/efi.h
  F:    arch/*/kernel/efi.c
  F:    arch/arm/boot/compressed/efi-header.S
 -F:    arch/arm64/kernel/efi-entry.S
  F:    arch/x86/platform/efi/
  F:    drivers/firmware/efi/
  F:    include/linux/efi*.h
@@@ -7896,7 -7835,6 +7896,7 @@@ M:      Chao Yu <[email protected]
  L:    [email protected]
  S:    Maintained
  W:    https://f2fs.wiki.kernel.org/
 +B:    https://bugzilla.kernel.org/enter_bug.cgi?product=File%20System&component=f2fs
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs.git
  F:    Documentation/ABI/testing/sysfs-fs-f2fs
  F:    Documentation/filesystems/f2fs.rst
@@@ -8113,8 -8051,6 +8113,8 @@@ S:      Supporte
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/hardening
  F:    include/linux/fortify-string.h
  F:    lib/fortify_kunit.c
 +F:    lib/memcpy_kunit.c
 +F:    lib/strscpy_kunit.c
  F:    lib/test_fortify/*
  F:    scripts/test_fortify.sh
  K:    \b__NO_FORTIFY\b
@@@ -8251,10 -8187,7 +8251,10 @@@ S:    Maintaine
  F:    drivers/i2c/busses/i2c-cpm.c
  
  FREESCALE IMX / MXC FEC DRIVER
 -M:    Joakim Zhang <[email protected]>
 +M:    Wei Fang <[email protected]>
 +R:    Shenwei Wang <[email protected]>
 +R:    Clark Wang <[email protected]>
 +R:    NXP Linux Team <[email protected]>
  L:    [email protected]
  S:    Maintained
  F:    Documentation/devicetree/bindings/net/fsl,fec.yaml
@@@ -8669,8 -8602,8 +8669,8 @@@ F:      include/asm-generic
  F:    include/uapi/asm-generic/
  
  GENERIC PHY FRAMEWORK
 -M:    Kishon Vijay Abraham I <[email protected]>
  M:    Vinod Koul <[email protected]>
 +M:    Kishon Vijay Abraham I <[email protected]>
  L:    [email protected]
  S:    Supported
  Q:    https://patchwork.kernel.org/project/linux-phy/list/
@@@ -8813,7 -8746,6 +8813,7 @@@ GPIO IR Transmitte
  M:    Sean Young <[email protected]>
  L:    [email protected]
  S:    Maintained
 +F:    Documentation/devicetree/bindings/leds/irled/gpio-ir-tx.yaml
  F:    drivers/media/rc/gpio-ir-tx.c
  
  GPIO MOCKUP DRIVER
@@@ -9237,13 -9169,6 +9237,13 @@@ W:    http://www.highpoint-tech.co
  F:    Documentation/scsi/hptiop.rst
  F:    drivers/scsi/hptiop.c
  
 +HIMAX HX83112B TOUCHSCREEN SUPPORT
 +M:    Job Noorman <[email protected]>
 +L:    [email protected]
 +S:    Maintained
 +F:    Documentation/devicetree/bindings/input/touchscreen/himax,hx83112b.yaml
 +F:    drivers/input/touchscreen/himax_hx83112b.c
 +
  HIPPI
  M:    Jes Sorensen <[email protected]>
  L:    [email protected]
@@@ -9273,7 -9198,6 +9273,7 @@@ HISILICON GPIO DRIVE
  M:    Jay Fang <[email protected]>
  L:    [email protected]
  S:    Maintained
 +F:    Documentation/devicetree/bindings/gpio/hisilicon,ascend910-gpio.yaml
  F:    drivers/gpio/gpio-hisi.c
  
  HISILICON HIGH PERFORMANCE RSA ENGINE DRIVER (HPRE)
@@@ -9324,7 -9248,7 +9324,7 @@@ F:      drivers/misc/hisi_hikey_usb.
  
  HISILICON PMU DRIVER
  M:    Shaokun Zhang <[email protected]>
 -M:    Qi Liu <liuqi115@huawei.com>
 +M:    Jonathan Cameron <jonathan.cameron@huawei.com>
  S:    Supported
  W:    http://www.hisilicon.com
  F:    Documentation/admin-guide/perf/hisi-pcie-pmu.rst
@@@ -9373,7 -9297,7 +9373,7 @@@ F:      Documentation/devicetree/bindings/in
  F:    drivers/infiniband/hw/hns/
  
  HISILICON SAS Controller
 -M:    John Garry <john.garry@huawei.com>
 +M:    Xiang Chen <chenxiang66@hisilicon.com>
  S:    Supported
  W:    http://www.hisilicon.com
  F:    Documentation/devicetree/bindings/scsi/hisilicon-sas.txt
@@@ -9417,7 -9341,7 +9417,7 @@@ S:      Maintaine
  F:    drivers/crypto/hisilicon/trng/trng.c
  
  HISILICON V3XX SPI NOR FLASH Controller Driver
 -M:    John Garry <john.garry@huawei.com>
 +M:    Jay Fang <f.fangjian@huawei.com>
  S:    Maintained
  W:    http://www.hisilicon.com
  F:    drivers/spi/spi-hisi-sfc-v3xx.c
@@@ -9442,7 -9366,7 +9442,7 @@@ F:      drivers/net/wireless/intersil/hostap
  HP COMPAQ TC1100 TABLET WMI EXTRAS DRIVER
  L:    [email protected]
  S:    Orphan
 -F:    drivers/platform/x86/tc1100-wmi.c
 +F:    drivers/platform/x86/hp/tc1100-wmi.c
  
  HPET: High Precision Event Timers driver
  M:    Clemens Ladisch <[email protected]>
@@@ -9512,15 -9436,14 +9512,15 @@@ F:   Documentation/devicetree/bindings/ii
  F:    drivers/iio/humidity/hts221*
  
  HUAWEI ETHERNET DRIVER
 +M:    Cai Huoqing <[email protected]>
  L:    [email protected]
 -S:    Orphan
 +S:    Maintained
  F:    Documentation/networking/device_drivers/ethernet/huawei/hinic.rst
  F:    drivers/net/ethernet/huawei/hinic/
  
  HUGETLB SUBSYSTEM
  M:    Mike Kravetz <[email protected]>
 -M:    Muchun Song <[email protected]>
 +M:    Muchun Song <[email protected]>
  L:    [email protected]
  S:    Maintained
  F:    Documentation/ABI/testing/sysfs-kernel-mm-hugepages
@@@ -9584,6 -9507,7 +9584,6 @@@ F:      drivers/media/i2c/hi847.
  Hyper-V/Azure CORE AND DRIVERS
  M:    "K. Y. Srinivasan" <[email protected]>
  M:    Haiyang Zhang <[email protected]>
 -M:    Stephen Hemminger <[email protected]>
  M:    Wei Liu <[email protected]>
  M:    Dexuan Cui <[email protected]>
  L:    [email protected]
@@@ -9617,7 -9541,6 +9617,7 @@@ F:      include/asm-generic/hyperv-tlfs.
  F:    include/asm-generic/mshyperv.h
  F:    include/clocksource/hyperv_timer.h
  F:    include/linux/hyperv.h
 +F:    include/net/mana
  F:    include/uapi/linux/hyperv.h
  F:    net/vmw_vsock/hyperv_transport.c
  F:    tools/hv/
@@@ -9781,7 -9704,8 +9781,7 @@@ F:      Documentation/devicetree/bindings/i3
  F:    drivers/i3c/master/i3c-master-cdns.c
  
  I3C DRIVER FOR SYNOPSYS DESIGNWARE
 -M:    Vitor Soares <[email protected]>
 -S:    Maintained
 +S:    Orphan
  F:    Documentation/devicetree/bindings/i3c/snps,dw-i3c-master.yaml
  F:    drivers/i3c/master/dw*
  
@@@ -10103,11 -10027,6 +10103,11 @@@ F: Documentation/hwmon/ina2xx.rs
  F:    drivers/hwmon/ina2xx.c
  F:    include/linux/platform_data/ina2xx.h
  
 +INDEX OF FURTHER KERNEL DOCUMENTATION
 +M:    Carlos Bilbao <[email protected]>
 +S:    Maintained
 +F:    Documentation/process/kernel-docs.rst
 +
  INDUSTRY PACK SUBSYSTEM (IPACK)
  M:    Samuel Iglesias Gonsalvez <[email protected]>
  M:    Jens Taprogge <[email protected]>
@@@ -10137,7 -10056,6 +10137,7 @@@ F:   drivers/infiniband
  F:    include/rdma/
  F:    include/trace/events/ib_mad.h
  F:    include/trace/events/ib_umad.h
 +F:    include/trace/misc/rdma.h
  F:    include/uapi/linux/if_infiniband.h
  F:    include/uapi/rdma/
  F:    samples/bpf/ibumad_kern.c
@@@ -10311,7 -10229,6 +10311,7 @@@ Q:   http://patchwork.freedesktop.org/pro
  B:    https://gitlab.freedesktop.org/drm/intel/-/wikis/How-to-file-i915-bugs
  C:    irc://irc.oftc.net/intel-gfx
  T:    git git://anongit.freedesktop.org/drm-intel
 +F:    Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
  F:    Documentation/gpu/i915.rst
  F:    drivers/gpu/drm/i915/
  F:    include/drm/i915*
@@@ -10371,7 -10288,7 +10371,7 @@@ T:   git https://github.com/intel/gvt-lin
  F:    drivers/gpu/drm/i915/gvt/
  
  INTEL HID EVENT DRIVER
 -M:    Alex Hung <alex.hung@canonical.com>
 +M:    Alex Hung <alexhung@gmail.com>
  L:    [email protected]
  S:    Maintained
  F:    drivers/platform/x86/intel/hid.c
@@@ -10803,18 -10720,6 +10803,18 @@@ F: drivers/iommu/dma-iommu.
  F:    drivers/iommu/iova.c
  F:    include/linux/iova.h
  
 +IOMMUFD
 +M:    Jason Gunthorpe <[email protected]>
 +M:    Kevin Tian <[email protected]>
 +L:    [email protected]
 +S:    Maintained
 +T:    git git://git.kernel.org/pub/scm/linux/kernel/git/jgg/iommufd.git
 +F:    Documentation/userspace-api/iommufd.rst
 +F:    drivers/iommu/iommufd/
 +F:    include/linux/iommufd.h
 +F:    include/uapi/linux/iommufd.h
 +F:    tools/testing/selftests/iommu/
 +
  IOMMU SUBSYSTEM
  M:    Joerg Roedel <[email protected]>
  M:    Will Deacon <[email protected]>
@@@ -10994,13 -10899,6 +10994,13 @@@ F: drivers/isdn/Makefil
  F:    drivers/isdn/hardware/
  F:    drivers/isdn/mISDN/
  
 +ISOFS FILESYSTEM
 +M:    Jan Kara <[email protected]>
 +L:    [email protected]
 +S:    Maintained
 +F:    Documentation/filesystems/isofs.rst
 +F:    fs/isofs/
 +
  IT87 HARDWARE MONITORING DRIVER
  M:    Jean Delvare <[email protected]>
  L:    [email protected]
@@@ -11062,9 -10960,9 +11062,9 @@@ F:   drivers/hwmon/jc42.
  JFS FILESYSTEM
  M:    Dave Kleikamp <[email protected]>
  L:    [email protected]
 -S:    Maintained
 +S:    Odd Fixes
  W:    http://jfs.sourceforge.net/
 -T:    git git://github.com/kleikamp/linux-shaggy.git
 +T:    git https://github.com/kleikamp/linux-shaggy.git
  F:    Documentation/admin-guide/jfs.rst
  F:    fs/jfs/
  
@@@ -11138,7 -11036,6 +11138,7 @@@ KCONFI
  M:    Masahiro Yamada <[email protected]>
  L:    [email protected]
  S:    Maintained
 +Q:    https://patchwork.kernel.org/project/linux-kbuild/list/
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/masahiroy/linux-kbuild.git kconfig
  F:    Documentation/kbuild/kconfig*
  F:    scripts/Kconfig.include
@@@ -11196,12 -11093,10 +11196,12 @@@ F:        fs/autofs
  
  KERNEL BUILD + files below scripts/ (unless maintained elsewhere)
  M:    Masahiro Yamada <[email protected]>
 -M:    Michal Marek <[email protected]>
 +R:    Nathan Chancellor <[email protected]>
  R:    Nick Desaulniers <[email protected]>
 +R:    Nicolas Schier <[email protected]>
  L:    [email protected]
  S:    Maintained
 +Q:    https://patchwork.kernel.org/project/linux-kbuild/list/
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/masahiroy/linux-kbuild.git
  F:    Documentation/kbuild/
  F:    Makefile
@@@ -11219,8 -11114,6 +11219,8 @@@ M:   Kees Cook <[email protected]
  L:    [email protected]
  S:    Supported
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/hardening
 +F:    Documentation/ABI/testing/sysfs-kernel-oops_count
 +F:    Documentation/ABI/testing/sysfs-kernel-warn_count
  F:    include/linux/overflow.h
  F:    include/linux/randomize_kstack.h
  F:    mm/usercopy.c
@@@ -11239,18 -11132,11 +11239,18 @@@ L:        [email protected]
  S:    Supported
  W:    http://nfs.sourceforge.net/
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/cel/linux.git
 +F:    fs/exportfs/
  F:    fs/lockd/
  F:    fs/nfs_common/
  F:    fs/nfsd/
  F:    include/linux/lockd/
  F:    include/linux/sunrpc/
 +F:    include/trace/events/rpcgss.h
 +F:    include/trace/events/rpcrdma.h
 +F:    include/trace/events/sunrpc.h
 +F:    include/trace/misc/fs.h
 +F:    include/trace/misc/nfs.h
 +F:    include/trace/misc/sunrpc.h
  F:    include/uapi/linux/nfsd/
  F:    include/uapi/linux/sunrpc/
  F:    net/sunrpc/
@@@ -11438,16 -11324,6 +11438,16 @@@ F: arch/x86/kvm/svm/hyperv.
  F:    arch/x86/kvm/svm/svm_onhyperv.*
  F:    arch/x86/kvm/vmx/evmcs.*
  
 +KVM X86 Xen (KVM/Xen)
 +M:    David Woodhouse <[email protected]>
 +M:    Paul Durrant <[email protected]>
 +M:    Sean Christopherson <[email protected]>
 +M:    Paolo Bonzini <[email protected]>
 +L:    [email protected]
 +S:    Supported
 +T:    git git://git.kernel.org/pub/scm/virt/kvm/kvm.git
 +F:    arch/x86/kvm/xen.*
 +
  KERNFS
  M:    Greg Kroah-Hartman <[email protected]>
  M:    Tejun Heo <[email protected]>
@@@ -11982,7 -11858,7 +11982,7 @@@ M:   Eric Piel <[email protected]
  S:    Maintained
  F:    Documentation/misc-devices/lis3lv02d.rst
  F:    drivers/misc/lis3lv02d/
 -F:    drivers/platform/x86/hp_accel.c
 +F:    drivers/platform/x86/hp/hp_accel.c
  
  LIST KUNIT TEST
  M:    David Gow <[email protected]>
@@@ -12137,21 -12013,6 +12137,21 @@@ F: drivers/*/*loongarch
  F:    Documentation/loongarch/
  F:    Documentation/translations/zh_CN/loongarch/
  
 +LOONGSON-2 SOC SERIES GUTS DRIVER
 +M:    Yinbo Zhu <[email protected]>
 +L:    [email protected]
 +S:    Maintained
 +F:    Documentation/devicetree/bindings/hwinfo/loongson,ls2k-chipid.yaml
 +F:    drivers/soc/loongson/loongson2_guts.c
 +
 +LOONGSON-2 SOC SERIES PINCTRL DRIVER
 +M:    zhanghongchen <[email protected]>
 +M:    Yinbo Zhu <[email protected]>
 +L:    [email protected]
 +S:    Maintained
 +F:    Documentation/devicetree/bindings/pinctrl/loongson,ls2k-pinctrl.yaml
 +F:    drivers/pinctrl/pinctrl-loongson2.c
 +
  LSILOGIC MPT FUSION DRIVERS (FC/SAS/SPI)
  M:    Sathya Prakash <[email protected]>
  M:    Sreekanth Reddy <[email protected]>
@@@ -12229,7 -12090,7 +12229,7 @@@ M:   Alexey Kodanev <alexey.kodanev@oracl
  L:    [email protected] (subscribers-only)
  S:    Maintained
  W:    http://linux-test-project.github.io/
 -T:    git git://github.com/linux-test-project/ltp.git
 +T:    git https://github.com/linux-test-project/ltp.git
  
  LYNX 28G SERDES PHY DRIVER
  M:    Ioana Ciornei <[email protected]>
@@@ -12365,6 -12226,7 +12365,6 @@@ F:   arch/mips/boot/dts/img/pistachio
  
  MARVELL 88E6XXX ETHERNET SWITCH FABRIC DRIVER
  M:    Andrew Lunn <[email protected]>
 -M:    Vivien Didelot <[email protected]>
  L:    [email protected]
  S:    Maintained
  F:    Documentation/devicetree/bindings/net/dsa/marvell.txt
@@@ -12454,7 -12316,7 +12454,7 @@@ M:   Marcin Wojtas <[email protected]
  M:    Russell King <[email protected]>
  L:    [email protected]
  S:    Maintained
 -F:    Documentation/devicetree/bindings/net/marvell-pp2.txt
 +F:    Documentation/devicetree/bindings/net/marvell,pp2.yaml
  F:    drivers/net/ethernet/marvell/mvpp2/
  
  MARVELL MWIFIEX WIRELESS DRIVER
@@@ -12502,7 -12364,7 +12502,7 @@@ F:   Documentation/networking/device_driv
  F:    drivers/net/ethernet/marvell/octeontx2/af/
  
  MARVELL PRESTERA ETHERNET SWITCH DRIVER
 -M:    Taras Chornyi <t[email protected]>
 +M:    Taras Chornyi <t[email protected]>
  S:    Supported
  W:    https://github.com/Marvell-switching/switchdev-prestera
  F:    drivers/net/ethernet/marvell/prestera/
@@@ -12864,7 -12726,7 +12864,7 @@@ F:   Documentation/admin-guide/media/imx7
  F:    Documentation/devicetree/bindings/media/nxp,imx-mipi-csi2.yaml
  F:    Documentation/devicetree/bindings/media/nxp,imx7-csi.yaml
  F:    drivers/media/platform/nxp/imx-mipi-csis.c
 -F:    drivers/staging/media/imx/imx7-media-csi.c
 +F:    drivers/media/platform/nxp/imx7-media-csi.c
  
  MEDIA DRIVERS FOR HELENE
  M:    Abylay Ospan <[email protected]>
@@@ -13061,7 -12923,6 +13061,7 @@@ M:   Felix Fietkau <[email protected]
  M:    John Crispin <[email protected]>
  M:    Sean Wang <[email protected]>
  M:    Mark Lee <[email protected]>
 +M:    Lorenzo Bianconi <[email protected]>
  L:    [email protected]
  S:    Maintained
  F:    drivers/net/ethernet/mediatek/
@@@ -13433,20 -13294,10 +13433,20 @@@ F:        include/linux/memory_hotplug.
  F:    include/linux/mm.h
  F:    include/linux/mmzone.h
  F:    include/linux/pagewalk.h
 -F:    include/linux/vmalloc.h
  F:    mm/
  F:    tools/testing/selftests/vm/
  
 +VMALLOC
 +M:    Andrew Morton <[email protected]>
 +R:    Uladzislau Rezki <[email protected]>
 +R:    Christoph Hellwig <[email protected]>
 +L:    [email protected]
 +S:    Maintained
 +W:    http://www.linux-mm.org
 +T:    git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
 +F:    include/linux/vmalloc.h
 +F:    mm/vmalloc.c
 +
  MEMORY HOT(UN)PLUG
  M:    David Hildenbrand <[email protected]>
  M:    Oscar Salvador <[email protected]>
@@@ -13534,7 -13385,7 +13534,7 @@@ MESON NAND CONTROLLER DRIVER FOR AMLOGI
  M:    Liang Yang <[email protected]>
  L:    [email protected]
  S:    Maintained
 -F:    Documentation/devicetree/bindings/mtd/amlogic,meson-nand.txt
 +F:    Documentation/devicetree/bindings/mtd/amlogic,meson-nand.yaml
  F:    drivers/mtd/nand/raw/meson_*
  
  MESON VIDEO DECODER DRIVER FOR AMLOGIC SOCS
@@@ -13615,7 -13466,7 +13615,7 @@@ M:   Eugen Hristev <eugen.hristev@microch
  L:    [email protected]
  S:    Supported
  F:    Documentation/devicetree/bindings/media/microchip,csi2dc.yaml
 -F:    drivers/media/platform/atmel/microchip-csi2dc.c
 +F:    drivers/media/platform/microchip/microchip-csi2dc.c
  
  MICROCHIP ECC DRIVER
  M:    Tudor Ambarus <[email protected]>
  S:    Supported
  F:    Documentation/devicetree/bindings/media/atmel,isc.yaml
  F:    Documentation/devicetree/bindings/media/microchip,xisc.yaml
 -F:    drivers/media/platform/atmel/atmel-isc*
 -F:    drivers/media/platform/atmel/atmel-sama*-isc*
 +F:    drivers/staging/media/deprecated/atmel/atmel-isc*
 +F:    drivers/staging/media/deprecated/atmel/atmel-sama*-isc*
 +F:    drivers/media/platform/microchip/microchip-isc*
 +F:    drivers/media/platform/microchip/microchip-sama*-isc*
  F:    include/linux/atmel-isc-media.h
  
  MICROCHIP ISI DRIVER
@@@ -13778,12 -13627,6 +13778,12 @@@ S: Supporte
  F:    drivers/misc/atmel-ssc.c
  F:    include/linux/atmel-ssc.h
  
 +MICROCHIP SOC DRIVERS
 +M:    Conor Dooley <[email protected]>
 +S:    Supported
 +T:    git https://git.kernel.org/pub/scm/linux/kernel/git/conor/linux.git/
 +F:    drivers/soc/microchip/
 +
  MICROCHIP USB251XB DRIVER
  M:    Richard Leitner <[email protected]>
  L:    [email protected]
@@@ -13828,15 -13671,6 +13828,15 @@@ F: drivers/scsi/smartpqi/smartpqi*.[ch
  F:    include/linux/cciss*.h
  F:    include/uapi/linux/cciss*.h
  
 +MICROSOFT MANA RDMA DRIVER
 +M:    Long Li <[email protected]>
 +M:    Ajay Sharma <[email protected]>
 +L:    [email protected]
 +S:    Supported
 +F:    drivers/infiniband/hw/mana/
 +F:    include/net/mana
 +F:    include/uapi/rdma/mana-abi.h
 +
  MICROSOFT SURFACE AGGREGATOR TABLET-MODE SWITCH
  M:    Maximilian Luz <[email protected]>
  L:    [email protected]
@@@ -14112,7 -13946,6 +14112,7 @@@ F:   include/uapi/linux/meye.
  
  MOTORCOMM PHY DRIVER
  M:    Peter Geis <[email protected]>
 +M:    Frank <[email protected]>
  L:    [email protected]
  S:    Maintained
  F:    drivers/net/phy/motorcomm.c
@@@ -14491,6 -14324,7 +14491,6 @@@ F:   drivers/net/wireless
  
  NETWORKING [DSA]
  M:    Andrew Lunn <[email protected]>
 -M:    Vivien Didelot <[email protected]>
  M:    Florian Fainelli <[email protected]>
  M:    Vladimir Oltean <[email protected]>
  S:    Maintained
@@@ -15347,13 -15181,6 +15347,13 @@@ S: Maintaine
  T:    git git://linuxtv.org/media_tree.git
  F:    drivers/media/i2c/ov08d10.c
  
 +OMNIVISION OV08X40 SENSOR DRIVER
 +M:    Jason Chen <[email protected]>
 +L:    [email protected]
 +S:    Maintained
 +T:    git git://linuxtv.org/media_tree.git
 +F:    drivers/media/i2c/ov08x40.c
 +
  OMNIVISION OV13858 SENSOR DRIVER
  M:    Sakari Ailus <[email protected]>
  L:    [email protected]
@@@ -15392,14 -15219,6 +15392,14 @@@ S: Maintaine
  T:    git git://linuxtv.org/media_tree.git
  F:    drivers/media/i2c/ov2740.c
  
 +OMNIVISION OV4689 SENSOR DRIVER
 +M:    Mikhail Rudenko <[email protected]>
 +L:    [email protected]
 +S:    Maintained
 +T:    git git://linuxtv.org/media_tree.git
 +F:    Documentation/devicetree/bindings/media/i2c/ovti,ov4689.yaml
 +F:    drivers/media/i2c/ov5647.c
 +
  OMNIVISION OV5640 SENSOR DRIVER
  M:    Steve Longerbeam <[email protected]>
  L:    [email protected]
@@@ -15524,12 -15343,6 +15524,12 @@@ S: Maintaine
  F:    drivers/mtd/nand/onenand/
  F:    include/linux/mtd/onenand*.h
  
 +ONEXPLAYER FAN DRIVER
 +M:    Joaquín Ignacio Aramendía <[email protected]>
 +L:    [email protected]
 +S:    Maintained
 +F:    drivers/hwmon/oxp-sensors.c
 +
  ONION OMEGA2+ BOARD
  M:    Harvey Hunt <[email protected]>
  L:    [email protected]
@@@ -16133,7 -15946,6 +16133,7 @@@ Q:   https://patchwork.kernel.org/project
  B:    https://bugzilla.kernel.org
  C:    irc://irc.oftc.net/linux-pci
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/lpieralisi/pci.git
 +F:    Documentation/devicetree/bindings/pci/
  F:    drivers/pci/controller/
  F:    drivers/pci/pci-bridge-emul.c
  F:    drivers/pci/pci-bridge-emul.h
@@@ -16240,7 -16052,7 +16240,7 @@@ F:   Documentation/devicetree/bindings/pc
  F:    drivers/pci/controller/*microchip*
  
  PCIE DRIVER FOR QUALCOMM MSM
 -M:    Stanimir Varbanov <[email protected]>
 +M:    Manivannan Sadhasivam <[email protected]>
  L:    [email protected]
  L:    [email protected]
  S:    Maintained
@@@ -16330,8 -16142,7 +16330,8 @@@ F:   include/linux/peci-cpu.
  F:    include/linux/peci.h
  
  PENSANDO ETHERNET DRIVERS
 -M:    Shannon Nelson <[email protected]>
 +M:    Shannon Nelson <[email protected]>
 +M:    Brett Creeley <[email protected]>
  M:    [email protected]
  L:    [email protected]
  S:    Supported
@@@ -16489,7 -16300,7 +16489,7 @@@ M:   Sean Wang <[email protected]
  L:    [email protected] (moderated for non-subscribers)
  S:    Maintained
  F:    Documentation/devicetree/bindings/pinctrl/mediatek,mt65xx-pinctrl.yaml
 -F:    Documentation/devicetree/bindings/pinctrl/mediatek,mt6797-pinctrl.yaml
 +F:    Documentation/devicetree/bindings/pinctrl/mediatek,mt6779-pinctrl.yaml
  F:    Documentation/devicetree/bindings/pinctrl/mediatek,mt7622-pinctrl.yaml
  F:    Documentation/devicetree/bindings/pinctrl/mediatek,mt8183-pinctrl.yaml
  F:    drivers/pinctrl/mediatek/
@@@ -16562,6 -16373,13 +16562,6 @@@ S:  Supporte
  F:    Documentation/devicetree/bindings/input/pine64,pinephone-keyboard.yaml
  F:    drivers/input/keyboard/pinephone-keyboard.c
  
 -PKTCDVD DRIVER
 -M:    [email protected]
 -S:    Orphan
 -F:    drivers/block/pktcdvd.c
 -F:    include/linux/pktcdvd.h
 -F:    include/uapi/linux/pktcdvd.h
 -
  PLANTOWER PMS7003 AIR POLLUTION SENSOR DRIVER
  M:    Tomasz Duszynski <[email protected]>
  S:    Maintained
@@@ -16839,10 -16657,10 +16839,10 @@@ F:        net/psampl
  
  PSTORE FILESYSTEM
  M:    Kees Cook <[email protected]>
 -M:    Anton Vorontsov <[email protected]>
 -M:    Colin Cross <ccross@android.com>
 -M:    Tony Luck <[email protected]>
 -S:    Maintained
 +R:    Tony Luck <[email protected]>
 +R:    Guilherme G. Piccoli <gpiccoli@igalia.com>
 +L:    [email protected]
 +S:    Supported
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/pstore
  F:    Documentation/admin-guide/ramoops.rst
  F:    Documentation/admin-guide/pstore-blk.rst
@@@ -16889,6 -16707,7 +16889,6 @@@ M:   Hans Verkuil <[email protected]
  L:    [email protected]
  S:    Maintained
  T:    git git://linuxtv.org/media_tree.git
 -F:    Documentation/admin-guide/media/pulse8-cec.rst
  F:    drivers/media/cec/usb/pulse8/
  
  PURELIFI PLFXLC DRIVER
@@@ -16919,7 -16738,6 +16919,7 @@@ PWM IR Transmitte
  M:    Sean Young <[email protected]>
  L:    [email protected]
  S:    Maintained
 +F:    Documentation/devicetree/bindings/leds/irled/pwm-ir-tx.yaml
  F:    drivers/media/rc/pwm-ir-tx.c
  
  PWM SUBSYSTEM
@@@ -16984,7 -16802,7 +16984,7 @@@ M:   Srinivas Kandagatla <srinivas.kandag
  M:    Banajit Goswami <[email protected]>
  L:    [email protected] (moderated for non-subscribers)
  S:    Supported
 -F:    Documentation/devicetree/bindings/soc/qcom/qcom,apr.yaml
 +F:    Documentation/devicetree/bindings/soc/qcom/qcom,apr*
  F:    Documentation/devicetree/bindings/sound/qcom,*
  F:    drivers/soc/qcom/apr.c
  F:    include/dt-bindings/sound/qcom,wcd9335.h
@@@ -17342,8 -17160,7 +17342,8 @@@ F:   Documentation/devicetree/bindings/th
  F:    drivers/thermal/qcom/
  
  QUALCOMM VENUS VIDEO ACCELERATOR DRIVER
 -M:    Stanimir Varbanov <[email protected]>
 +M:    Stanimir Varbanov <[email protected]>
 +M:    Vikash Garodia <[email protected]>
  L:    [email protected]
  L:    [email protected]
  S:    Maintained
@@@ -17408,7 -17225,7 +17408,7 @@@ R:   Dongsheng Yang <dongsheng.yang@easys
  L:    [email protected]
  S:    Supported
  W:    http://ceph.com/
 -T:    git git://github.com/ceph/ceph-client.git
 +T:    git https://github.com/ceph/ceph-client.git
  F:    Documentation/ABI/testing/sysfs-bus-rbd
  F:    drivers/block/rbd.c
  F:    drivers/block/rbd_types.h
@@@ -17661,8 -17478,10 +17661,8 @@@ S:  Maintaine
  F:    drivers/net/wireless/realtek/rtw89/
  
  REDPINE WIRELESS DRIVER
 -M:    Amitkumar Karwar <[email protected]>
 -M:    Siva Rebbagondla <[email protected]>
  L:    [email protected]
 -S:    Maintained
 +S:    Orphan
  F:    drivers/net/wireless/rsi/
  
  REGISTER MAP ABSTRACTION
@@@ -17907,7 -17726,7 +17907,7 @@@ F:   arch/riscv
  N:    riscv
  K:    riscv
  
 -RISC-V/MICROCHIP POLARFIRE SOC SUPPORT
 +RISC-V MICROCHIP FPGA SUPPORT
  M:    Conor Dooley <[email protected]>
  M:    Daire McNamara <[email protected]>
  L:    [email protected]
@@@ -17925,26 -17744,17 +17925,26 @@@ F:        Documentation/devicetree/bindings/us
  F:    arch/riscv/boot/dts/microchip/
  F:    drivers/char/hw_random/mpfs-rng.c
  F:    drivers/clk/microchip/clk-mpfs.c
 -F:    drivers/i2c/busses/i2c-microchip-core.c
 +F:    drivers/i2c/busses/i2c-microchip-corei2c.c
  F:    drivers/mailbox/mailbox-mpfs.c
  F:    drivers/pci/controller/pcie-microchip-host.c
  F:    drivers/reset/reset-mpfs.c
  F:    drivers/rtc/rtc-mpfs.c
 -F:    drivers/soc/microchip/
 +F:    drivers/soc/microchip/mpfs-sys-controller.c
  F:    drivers/spi/spi-microchip-core-qspi.c
  F:    drivers/spi/spi-microchip-core.c
  F:    drivers/usb/musb/mpfs.c
  F:    include/soc/microchip/mpfs.h
  
 +RISC-V MISC SOC SUPPORT
 +M:    Conor Dooley <[email protected]>
 +L:    [email protected]
 +S:    Maintained
 +Q:    https://patchwork.kernel.org/project/linux-riscv/list/
 +T:    git https://git.kernel.org/pub/scm/linux/kernel/git/conor/linux.git/
 +F:    Documentation/devicetree/bindings/riscv/
 +F:    arch/riscv/boot/dts/
 +
  RNBD BLOCK DRIVERS
  M:    Md. Haris Iqbal <[email protected]>
  M:    Jack Wang <[email protected]>
@@@ -17960,13 -17770,6 +17960,13 @@@ F: Documentation/ABI/*/sysfs-driver-hid
  F:    drivers/hid/hid-roccat*
  F:    include/linux/hid-roccat*
  
 +ROCKCHIP CRYPTO DRIVERS
 +M:    Corentin Labbe <[email protected]>
 +L:    [email protected]
 +S:    Maintained
 +F:    Documentation/devicetree/bindings/crypto/rockchip,rk3288-crypto.yaml
 +F:    drivers/crypto/rockchip/
 +
  ROCKCHIP I2S TDM DRIVER
  M:    Nicolas Frattaroli <[email protected]>
  L:    [email protected]
  S:    Maintained
  F:    drivers/video/fbdev/savage/
  
 -S390
 +S390 ARCHITECTURE
  M:    Heiko Carstens <[email protected]>
  M:    Vasily Gorbik <[email protected]>
  M:    Alexander Gordeev <[email protected]>
  S:    Supported
  F:    drivers/s390/net/
  
 +S390 MM
 +M:    Alexander Gordeev <[email protected]>
 +M:    Gerald Schaefer <[email protected]>
 +L:    [email protected]
 +S:    Supported
 +T:    git git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux.git
 +F:    arch/s390/include/asm/pgtable.h
 +F:    arch/s390/mm
 +
  S390 PCI SUBSYSTEM
  M:    Niklas Schnelle <[email protected]>
  M:    Gerald Schaefer <[email protected]>
@@@ -18683,7 -18477,6 +18683,7 @@@ K:   \bsecure_computin
  K:    \bTIF_SECCOMP\b
  
  SECURE DIGITAL HOST CONTROLLER INTERFACE (SDHCI) Broadcom BRCMSTB DRIVER
 +M:    Kamal Dasu <[email protected]>
  M:    Al Cooper <[email protected]>
  R:    Broadcom internal kernel review list <[email protected]>
  L:    [email protected]
@@@ -18694,7 -18487,6 +18694,7 @@@ SECURE DIGITAL HOST CONTROLLER INTERFAC
  M:    Adrian Hunter <[email protected]>
  L:    [email protected]
  S:    Supported
 +F:    Documentation/devicetree/bindings/mmc/sdhci-common.yaml
  F:    drivers/mmc/host/sdhci*
  
  SECURE DIGITAL HOST CONTROLLER INTERFACE (SDHCI) MICROCHIP DRIVER
@@@ -18989,6 -18781,7 +18989,6 @@@ M:   Palmer Dabbelt <[email protected]
  M:    Paul Walmsley <[email protected]>
  L:    [email protected]
  S:    Supported
 -T:    git https://github.com/sifive/riscv-linux.git
  N:    sifive
  K:    [^@]sifive
  
@@@ -19007,13 -18800,6 +19007,13 @@@ S: Maintaine
  F:    Documentation/devicetree/bindings/dma/sifive,fu540-c000-pdma.yaml
  F:    drivers/dma/sf-pdma/
  
 +SIFIVE SOC DRIVERS
 +M:    Conor Dooley <[email protected]>
 +L:    [email protected]
 +S:    Maintained
 +T:    git https://git.kernel.org/pub/scm/linux/kernel/git/conor/linux.git/
 +F:    drivers/soc/sifive/
 +
  SILEAD TOUCHSCREEN DRIVER
  M:    Hans de Goede <[email protected]>
  L:    [email protected]
@@@ -19080,7 -18866,7 +19080,7 @@@ M:   Jason A. Donenfeld <[email protected]
  S:    Maintained
  F:    include/linux/siphash.h
  F:    lib/siphash.c
 -F:    lib/test_siphash.c
 +F:    lib/siphash_kunit.c
  
  SIS 190 ETHERNET DRIVER
  M:    Francois Romieu <[email protected]>
@@@ -19104,7 -18890,7 +19104,7 @@@ F:   drivers/video/fbdev/sis
  F:    include/video/sisfb.h
  
  SIS I2C TOUCHSCREEN DRIVER
 -M:    Mika Penttilä <mika.penttila@nextfour.com>
 +M:    Mika Penttilä <mpenttil@redhat.com>
  L:    [email protected]
  S:    Maintained
  F:    Documentation/devicetree/bindings/input/touchscreen/sis_i2c.txt
@@@ -19247,7 -19033,7 +19247,7 @@@ M:   Jassi Brar <[email protected]
  M:    Ilias Apalodimas <[email protected]>
  L:    [email protected]
  S:    Maintained
 -F:    Documentation/devicetree/bindings/net/socionext-netsec.txt
 +F:    Documentation/devicetree/bindings/net/socionext,synquacer-netsec.yaml
  F:    drivers/net/ethernet/socionext/netsec.c
  
  SOCIONEXT (SNI) Synquacer SPI DRIVER
@@@ -19255,7 -19041,7 +19255,7 @@@ M:   Masahisa Kojima <masahisa.kojima@lin
  M:    Jassi Brar <[email protected]>
  L:    [email protected]
  S:    Maintained
 -F:    Documentation/devicetree/bindings/spi/spi-synquacer.txt
 +F:    Documentation/devicetree/bindings/spi/socionext,synquacer-spi.yaml
  F:    drivers/spi/spi-synquacer.c
  
  SOCIONEXT SYNQUACER I2C DRIVER
@@@ -19402,7 -19188,7 +19402,7 @@@ M:   Manivannan Sadhasivam <manivannan.sa
  L:    [email protected]
  S:    Maintained
  T:    git git://linuxtv.org/media_tree.git
 -F:    Documentation/devicetree/bindings/media/i2c/imx290.txt
 +F:    Documentation/devicetree/bindings/media/i2c/sony,imx290.yaml
  F:    drivers/media/i2c/imx290.c
  
  SONY IMX319 SENSOR DRIVER
@@@ -19551,11 -19337,6 +19551,11 @@@ W: https://linuxtv.or
  Q:    http://patchwork.linuxtv.org/project/linux-media/list/
  F:    drivers/media/dvb-frontends/sp2*
  
 +SPANISH DOCUMENTATION
 +M:    Carlos Bilbao <[email protected]>
 +S:    Maintained
 +F:    Documentation/translations/sp_SP/
 +
  SPARC + UltraSPARC (sparc/sparc64)
  M:    "David S. Miller" <[email protected]>
  L:    [email protected]
@@@ -19699,7 -19480,7 +19699,7 @@@ M:   Sylvain Petinot <sylvain.petinot@fos
  L:    [email protected]
  S:    Maintained
  T:    git git://linuxtv.org/media_tree.git
 -F:    Documentation/devicetree/bindings/media/i2c/st,st-mipid02.txt
 +F:    Documentation/devicetree/bindings/media/i2c/st,st-mipid02.yaml
  F:    drivers/media/i2c/st-mipid02.c
  
  ST STM32 I2C/SMBUS DRIVER
@@@ -19722,16 -19503,6 +19722,16 @@@ S: Maintaine
  F:    Documentation/hwmon/stpddc60.rst
  F:    drivers/hwmon/pmbus/stpddc60.c
  
 +ST VGXY61 DRIVER
 +M:    Benjamin Mugnier <[email protected]>
 +M:    Sylvain Petinot <[email protected]>
 +L:    [email protected]
 +S:    Maintained
 +T:    git git://linuxtv.org/media_tree.git
 +F:    Documentation/devicetree/bindings/media/i2c/st,st-vgxy61.yaml
 +F:    Documentation/userspace-api/media/drivers/st-vgxy61.rst
 +F:    drivers/media/i2c/st-vgxy61.c
 +
  ST VL53L0X ToF RANGER(I2C) IIO DRIVER
  M:    Song Qiang <[email protected]>
  L:    [email protected]
@@@ -19747,7 -19518,6 +19747,7 @@@ S:   Supporte
  F:    Documentation/process/stable-kernel-rules.rst
  
  STAGING - ATOMISP DRIVER
 +M:    Hans de Goede <[email protected]>
  M:    Mauro Carvalho Chehab <[email protected]>
  R:    Sakari Ailus <[email protected]>
  L:    [email protected]
@@@ -19831,11 -19601,6 +19831,11 @@@ M: Ion Badulescu <[email protected]
  S:    Odd Fixes
  F:    drivers/net/ethernet/adaptec/starfire*
  
 +STARFIVE DEVICETREES
 +M:    Emil Renner Berthing <[email protected]>
 +S:    Maintained
 +F:    arch/riscv/boot/dts/starfive/
 +
  STARFIVE JH7100 CLOCK DRIVERS
  M:    Emil Renner Berthing <[email protected]>
  S:    Maintained
@@@ -19957,13 -19722,6 +19957,13 @@@ W: https://sunplus.atlassian.net/wiki/s
  F:    Documentation/devicetree/bindings/net/sunplus,sp7021-emac.yaml
  F:    drivers/net/ethernet/sunplus/
  
 +SUNPLUS MMC DRIVER
 +M:    Tony Huang <[email protected]>
 +M:    Li-hao Kuo <[email protected]>
 +S:    Maintained
 +F:    Documentation/devicetree/bindings/mmc/sunplus,mmc.yaml
 +F:    drivers/mmc/host/sunplus-mmc.c
 +
  SUNPLUS OCOTP DRIVER
  M:    Vincent Shih <[email protected]>
  S:    Maintained
@@@ -20215,7 -19973,6 +20215,7 @@@ F:   drivers/clk/clk-sc[mp]i.
  F:    drivers/cpufreq/sc[mp]i-cpufreq.c
  F:    drivers/firmware/arm_scmi/
  F:    drivers/firmware/arm_scpi.c
 +F:    drivers/powercap/arm_scmi_powercap.c
  F:    drivers/regulator/scmi-regulator.c
  F:    drivers/reset/reset-scmi.c
  F:    include/linux/sc[mp]i_protocol.h
@@@ -20550,7 -20307,7 +20550,7 @@@ M:   Chris Zankel <[email protected]
  M:    Max Filippov <[email protected]>
  L:    [email protected]
  S:    Maintained
 -T:    git git://github.com/czankel/xtensa-linux.git
 +T:    git https://github.com/jcmvbkbc/linux-xtensa.git
  F:    arch/xtensa/
  F:    drivers/irqchip/irq-xtensa-*
  
@@@ -20900,6 -20657,7 +20900,6 @@@ W:   https://wireless.wiki.kernel.org/en/
  W:    https://wireless.wiki.kernel.org/en/users/Drivers/wl1251
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/luca/wl12xx.git
  F:    drivers/net/wireless/ti/
 -F:    include/linux/wl12xx.h
  
  TIMEKEEPING, CLOCKSOURCE CORE, NTP, ALARMTIMER
  M:    John Stultz <[email protected]>
@@@ -21781,7 -21539,7 +21781,7 @@@ M:   Alex Williamson <alex.williamson@red
  R:    Cornelia Huck <[email protected]>
  L:    [email protected]
  S:    Maintained
- T:    git git://github.com/awilliam/linux-vfio.git
+ T:    git https://github.com/awilliam/linux-vfio.git
  F:    Documentation/ABI/testing/sysfs-devices-vfio-dev
  F:    Documentation/driver-api/vfio.rst
  F:    drivers/vfio/
@@@ -21963,12 -21721,6 +21963,12 @@@ F: include/linux/virtio*.
  F:    include/uapi/linux/virtio_*.h
  F:    tools/virtio/
  
 +VISL VIRTUAL STATELESS DECODER DRIVER
 +M:    Daniel Almeida <[email protected]>
 +L:    [email protected]
 +S:    Supported
 +F:    drivers/media/test-drivers/visl
 +
  IFCVF VIRTIO DATA PATH ACCELERATOR
  R:    Zhu Lingshan <[email protected]>
  F:    drivers/vdpa/ifcvf/
index 539447eda665f4952369abaf669bc909087dce17,eee6805e67ded2e5947015708b2ded42786a9887..f5451adcd4890c08cb121f1e1d00d58ce64d010f
@@@ -664,14 -664,18 +664,14 @@@ static int intel_vgpu_open_device(struc
                return -ESRCH;
        }
  
 -      kvm_get_kvm(vgpu->vfio_device.kvm);
 -
        if (__kvmgt_vgpu_exist(vgpu))
                return -EEXIST;
  
        vgpu->attached = true;
  
 -      kvmgt_protect_table_init(vgpu);
 -      gvt_cache_init(vgpu);
 -
        vgpu->track_node.track_write = kvmgt_page_track_write;
        vgpu->track_node.track_flush_slot = kvmgt_page_track_flush_slot;
 +      kvm_get_kvm(vgpu->vfio_device.kvm);
        kvm_page_track_register_notifier(vgpu->vfio_device.kvm,
                                         &vgpu->track_node);
  
  
        intel_gvt_activate_vgpu(vgpu);
  
 -      atomic_set(&vgpu->released, 0);
        return 0;
  }
  
@@@ -701,25 -706,24 +701,25 @@@ static void intel_vgpu_close_device(str
        if (!vgpu->attached)
                return;
  
 -      if (atomic_cmpxchg(&vgpu->released, 0, 1))
 -              return;
 -
        intel_gvt_release_vgpu(vgpu);
  
        debugfs_remove(debugfs_lookup(KVMGT_DEBUGFS_FILENAME, vgpu->debugfs));
  
        kvm_page_track_unregister_notifier(vgpu->vfio_device.kvm,
                                           &vgpu->track_node);
 +      kvm_put_kvm(vgpu->vfio_device.kvm);
 +
        kvmgt_protect_table_destroy(vgpu);
        gvt_cache_destroy(vgpu);
  
 +      WARN_ON(vgpu->nr_cache_entries);
 +
 +      vgpu->gfn_cache = RB_ROOT;
 +      vgpu->dma_addr_cache = RB_ROOT;
 +
        intel_vgpu_release_msi_eventfd_ctx(vgpu);
  
        vgpu->attached = false;
 -
 -      if (vgpu->vfio_device.kvm)
 -              kvm_put_kvm(vgpu->vfio_device.kvm);
  }
  
  static u64 intel_vgpu_get_bar_addr(struct intel_vgpu *vgpu, int bar)
@@@ -1447,17 -1451,9 +1447,17 @@@ static int intel_vgpu_init_dev(struct v
        struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
        struct intel_vgpu_type *type =
                container_of(mdev->type, struct intel_vgpu_type, type);
 +      int ret;
  
        vgpu->gvt = kdev_to_i915(mdev->type->parent->dev)->gvt;
 -      return intel_gvt_create_vgpu(vgpu, type->conf);
 +      ret = intel_gvt_create_vgpu(vgpu, type->conf);
 +      if (ret)
 +              return ret;
 +
 +      kvmgt_protect_table_init(vgpu);
 +      gvt_cache_init(vgpu);
 +
 +      return 0;
  }
  
  static void intel_vgpu_release_dev(struct vfio_device *vfio_dev)
        struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
  
        intel_gvt_destroy_vgpu(vgpu);
-       vfio_free_device(vfio_dev);
  }
  
  static const struct vfio_device_ops intel_vgpu_dev_ops = {
        .mmap           = intel_vgpu_mmap,
        .ioctl          = intel_vgpu_ioctl,
        .dma_unmap      = intel_vgpu_dma_unmap,
 +      .bind_iommufd   = vfio_iommufd_emulated_bind,
 +      .unbind_iommufd = vfio_iommufd_emulated_unbind,
 +      .attach_ioas    = vfio_iommufd_emulated_attach_ioas,
  };
  
  static int intel_vgpu_probe(struct mdev_device *mdev)
index 0a5e8b4a674342b31d784b0ed73c777d5d40c51b,e67fad897af3a2fb387674a7b120f9b8fa34a223..2784a4e4d2bef3c9383ee10eb66f8ded33cb00f9
  
  static int fsm_io_helper(struct vfio_ccw_private *private)
  {
-       struct subchannel *sch;
+       struct subchannel *sch = to_subchannel(private->vdev.dev->parent);
        union orb *orb;
        int ccode;
        __u8 lpm;
        unsigned long flags;
        int ret;
  
-       sch = private->sch;
        spin_lock_irqsave(sch->lock, flags);
  
 -      orb = cp_get_orb(&private->cp, (u32)(addr_t)sch, sch->lpm);
 +      orb = cp_get_orb(&private->cp, (u32)virt_to_phys(sch), sch->lpm);
        if (!orb) {
                ret = -EIO;
                goto out;
@@@ -80,13 -78,11 +78,11 @@@ out
  
  static int fsm_do_halt(struct vfio_ccw_private *private)
  {
-       struct subchannel *sch;
+       struct subchannel *sch = to_subchannel(private->vdev.dev->parent);
        unsigned long flags;
        int ccode;
        int ret;
  
-       sch = private->sch;
        spin_lock_irqsave(sch->lock, flags);
  
        VFIO_CCW_TRACE_EVENT(2, "haltIO");
  
  static int fsm_do_clear(struct vfio_ccw_private *private)
  {
-       struct subchannel *sch;
+       struct subchannel *sch = to_subchannel(private->vdev.dev->parent);
        unsigned long flags;
        int ccode;
        int ret;
  
-       sch = private->sch;
        spin_lock_irqsave(sch->lock, flags);
  
        VFIO_CCW_TRACE_EVENT(2, "clearIO");
  static void fsm_notoper(struct vfio_ccw_private *private,
                        enum vfio_ccw_event event)
  {
-       struct subchannel *sch = private->sch;
+       struct subchannel *sch = to_subchannel(private->vdev.dev->parent);
  
        VFIO_CCW_MSG_EVENT(2, "sch %x.%x.%04x: notoper event %x state %x\n",
                           sch->schid.cssid,
@@@ -228,7 -222,7 +222,7 @@@ static void fsm_async_retry(struct vfio
  static void fsm_disabled_irq(struct vfio_ccw_private *private,
                             enum vfio_ccw_event event)
  {
-       struct subchannel *sch = private->sch;
+       struct subchannel *sch = to_subchannel(private->vdev.dev->parent);
  
        /*
         * An interrupt in a disabled state means a previous disable was not
  }
  inline struct subchannel_id get_schid(struct vfio_ccw_private *p)
  {
-       return p->sch->schid;
+       struct subchannel *sch = to_subchannel(p->vdev.dev->parent);
+       return sch->schid;
  }
  
  /*
@@@ -360,10 -356,11 +356,11 @@@ static void fsm_async_request(struct vf
  static void fsm_irq(struct vfio_ccw_private *private,
                    enum vfio_ccw_event event)
  {
+       struct subchannel *sch = to_subchannel(private->vdev.dev->parent);
        struct irb *irb = this_cpu_ptr(&cio_irb);
  
        VFIO_CCW_TRACE_EVENT(6, "IRQ");
-       VFIO_CCW_TRACE_EVENT(6, dev_name(&private->sch->dev));
+       VFIO_CCW_TRACE_EVENT(6, dev_name(&sch->dev));
  
        memcpy(&private->irb, irb, sizeof(*irb));
  
  static void fsm_open(struct vfio_ccw_private *private,
                     enum vfio_ccw_event event)
  {
-       struct subchannel *sch = private->sch;
+       struct subchannel *sch = to_subchannel(private->vdev.dev->parent);
        int ret;
  
        spin_lock_irq(sch->lock);
@@@ -397,7 -394,7 +394,7 @@@ err_unlock
  static void fsm_close(struct vfio_ccw_private *private,
                      enum vfio_ccw_event event)
  {
-       struct subchannel *sch = private->sch;
+       struct subchannel *sch = to_subchannel(private->vdev.dev->parent);
        int ret;
  
        spin_lock_irq(sch->lock);
index 560453d99c24fc9624fc3bc1a4d7778e5e6cc2f9,598a3814d4282b742c45f9f3af1b27d4c404e7d3..5b53b94f13c741cc089c98373eaecc9a30e74f34
@@@ -49,26 -49,70 +49,70 @@@ static int vfio_ccw_mdev_init_dev(struc
        struct vfio_ccw_private *private =
                container_of(vdev, struct vfio_ccw_private, vdev);
  
-       init_completion(&private->release_comp);
+       mutex_init(&private->io_mutex);
+       private->state = VFIO_CCW_STATE_STANDBY;
+       INIT_LIST_HEAD(&private->crw);
+       INIT_WORK(&private->io_work, vfio_ccw_sch_io_todo);
+       INIT_WORK(&private->crw_work, vfio_ccw_crw_todo);
+       private->cp.guest_cp = kcalloc(CCWCHAIN_LEN_MAX, sizeof(struct ccw1),
+                                      GFP_KERNEL);
+       if (!private->cp.guest_cp)
+               goto out_free_private;
+       private->io_region = kmem_cache_zalloc(vfio_ccw_io_region,
+                                              GFP_KERNEL | GFP_DMA);
+       if (!private->io_region)
+               goto out_free_cp;
+       private->cmd_region = kmem_cache_zalloc(vfio_ccw_cmd_region,
+                                               GFP_KERNEL | GFP_DMA);
+       if (!private->cmd_region)
+               goto out_free_io;
+       private->schib_region = kmem_cache_zalloc(vfio_ccw_schib_region,
+                                                 GFP_KERNEL | GFP_DMA);
+       if (!private->schib_region)
+               goto out_free_cmd;
+       private->crw_region = kmem_cache_zalloc(vfio_ccw_crw_region,
+                                               GFP_KERNEL | GFP_DMA);
+       if (!private->crw_region)
+               goto out_free_schib;
        return 0;
+ out_free_schib:
+       kmem_cache_free(vfio_ccw_schib_region, private->schib_region);
+ out_free_cmd:
+       kmem_cache_free(vfio_ccw_cmd_region, private->cmd_region);
+ out_free_io:
+       kmem_cache_free(vfio_ccw_io_region, private->io_region);
+ out_free_cp:
+       kfree(private->cp.guest_cp);
+ out_free_private:
+       mutex_destroy(&private->io_mutex);
+       return -ENOMEM;
  }
  
  static int vfio_ccw_mdev_probe(struct mdev_device *mdev)
  {
-       struct vfio_ccw_private *private = dev_get_drvdata(mdev->dev.parent);
+       struct subchannel *sch = to_subchannel(mdev->dev.parent);
+       struct vfio_ccw_parent *parent = dev_get_drvdata(&sch->dev);
+       struct vfio_ccw_private *private;
        int ret;
  
-       if (private->state == VFIO_CCW_STATE_NOT_OPER)
-               return -ENODEV;
+       private = vfio_alloc_device(vfio_ccw_private, vdev, &mdev->dev,
+                                   &vfio_ccw_dev_ops);
+       if (IS_ERR(private))
+               return PTR_ERR(private);
  
-       ret = vfio_init_device(&private->vdev, &mdev->dev, &vfio_ccw_dev_ops);
-       if (ret)
-               return ret;
+       dev_set_drvdata(&parent->dev, private);
  
        VFIO_CCW_MSG_EVENT(2, "sch %x.%x.%04x: create\n",
-                          private->sch->schid.cssid,
-                          private->sch->schid.ssid,
-                          private->sch->schid.sch_no);
+                          sch->schid.cssid,
+                          sch->schid.ssid,
+                          sch->schid.sch_no);
  
        ret = vfio_register_emulated_iommu_dev(&private->vdev);
        if (ret)
        return 0;
  
  err_put_vdev:
+       dev_set_drvdata(&parent->dev, NULL);
        vfio_put_device(&private->vdev);
        return ret;
  }
@@@ -85,40 -130,36 +130,36 @@@ static void vfio_ccw_mdev_release_dev(s
  {
        struct vfio_ccw_private *private =
                container_of(vdev, struct vfio_ccw_private, vdev);
-       /*
-        * We cannot free vfio_ccw_private here because it includes
-        * parent info which must be free'ed by css driver.
-        *
-        * Use a workaround by memset'ing the core device part and
-        * then notifying the remove path that all active references
-        * to this device have been released.
-        */
-       memset(vdev, 0, sizeof(*vdev));
-       complete(&private->release_comp);
+       struct vfio_ccw_crw *crw, *temp;
+       list_for_each_entry_safe(crw, temp, &private->crw, next) {
+               list_del(&crw->next);
+               kfree(crw);
+       }
+       kmem_cache_free(vfio_ccw_crw_region, private->crw_region);
+       kmem_cache_free(vfio_ccw_schib_region, private->schib_region);
+       kmem_cache_free(vfio_ccw_cmd_region, private->cmd_region);
+       kmem_cache_free(vfio_ccw_io_region, private->io_region);
+       kfree(private->cp.guest_cp);
+       mutex_destroy(&private->io_mutex);
  }
  
  static void vfio_ccw_mdev_remove(struct mdev_device *mdev)
  {
-       struct vfio_ccw_private *private = dev_get_drvdata(mdev->dev.parent);
+       struct subchannel *sch = to_subchannel(mdev->dev.parent);
+       struct vfio_ccw_parent *parent = dev_get_drvdata(&sch->dev);
+       struct vfio_ccw_private *private = dev_get_drvdata(&parent->dev);
  
        VFIO_CCW_MSG_EVENT(2, "sch %x.%x.%04x: remove\n",
-                          private->sch->schid.cssid,
-                          private->sch->schid.ssid,
-                          private->sch->schid.sch_no);
+                          sch->schid.cssid,
+                          sch->schid.ssid,
+                          sch->schid.sch_no);
  
        vfio_unregister_group_dev(&private->vdev);
  
+       dev_set_drvdata(&parent->dev, NULL);
        vfio_put_device(&private->vdev);
-       /*
-        * Wait for all active references on mdev are released so it
-        * is safe to defer kfree() to a later point.
-        *
-        * TODO: the clean fix is to split parent/mdev info from ccw
-        * private structure so each can be managed in its own life
-        * cycle.
-        */
-       wait_for_completion(&private->release_comp);
  }
  
  static int vfio_ccw_mdev_open_device(struct vfio_device *vdev)
@@@ -588,9 -629,6 +629,9 @@@ static const struct vfio_device_ops vfi
        .ioctl = vfio_ccw_mdev_ioctl,
        .request = vfio_ccw_mdev_request,
        .dma_unmap = vfio_ccw_dma_unmap,
 +      .bind_iommufd = vfio_iommufd_emulated_bind,
 +      .unbind_iommufd = vfio_iommufd_emulated_unbind,
 +      .attach_ioas = vfio_iommufd_emulated_attach_ioas,
  };
  
  struct mdev_driver vfio_ccw_mdev_driver = {
index 9720aed2ac27978b1b5795379b9b33ea319c433b,f108c0f1471252e3247e48ef92f6b45792e25df4..9c01957e56b3f17f494dcb8766646a404eb35280
@@@ -429,7 -429,7 +429,7 @@@ static struct ap_queue_status vfio_ap_i
  
        aqic_gisa.isc = nisc;
        aqic_gisa.ir = 1;
 -      aqic_gisa.gisa = (uint64_t)gisa >> 4;
 +      aqic_gisa.gisa = virt_to_phys(gisa) >> 4;
  
        status = ap_aqic(q->apqn, aqic_gisa, h_nib);
        switch (status.response_code) {
@@@ -765,11 -765,6 +765,6 @@@ static void vfio_ap_mdev_unlink_fr_queu
        }
  }
  
- static void vfio_ap_mdev_release_dev(struct vfio_device *vdev)
- {
-       vfio_free_device(vdev);
- }
  static void vfio_ap_mdev_remove(struct mdev_device *mdev)
  {
        struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(&mdev->dev);
@@@ -1535,29 -1530,13 +1530,29 @@@ static int vfio_ap_mdev_set_kvm(struct 
        return 0;
  }
  
 +static void unmap_iova(struct ap_matrix_mdev *matrix_mdev, u64 iova, u64 length)
 +{
 +      struct ap_queue_table *qtable = &matrix_mdev->qtable;
 +      struct vfio_ap_queue *q;
 +      int loop_cursor;
 +
 +      hash_for_each(qtable->queues, loop_cursor, q, mdev_qnode) {
 +              if (q->saved_iova >= iova && q->saved_iova < iova + length)
 +                      vfio_ap_irq_disable(q);
 +      }
 +}
 +
  static void vfio_ap_mdev_dma_unmap(struct vfio_device *vdev, u64 iova,
                                   u64 length)
  {
        struct ap_matrix_mdev *matrix_mdev =
                container_of(vdev, struct ap_matrix_mdev, vdev);
  
 -      vfio_unpin_pages(&matrix_mdev->vdev, iova, 1);
 +      mutex_lock(&matrix_dev->mdevs_lock);
 +
 +      unmap_iova(matrix_mdev, iova, length);
 +
 +      mutex_unlock(&matrix_dev->mdevs_lock);
  }
  
  /**
@@@ -1800,14 -1779,10 +1795,13 @@@ static const struct attribute_group vfi
  
  static const struct vfio_device_ops vfio_ap_matrix_dev_ops = {
        .init = vfio_ap_mdev_init_dev,
-       .release = vfio_ap_mdev_release_dev,
        .open_device = vfio_ap_mdev_open_device,
        .close_device = vfio_ap_mdev_close_device,
        .ioctl = vfio_ap_mdev_ioctl,
        .dma_unmap = vfio_ap_mdev_dma_unmap,
 +      .bind_iommufd = vfio_iommufd_emulated_bind,
 +      .unbind_iommufd = vfio_iommufd_emulated_unbind,
 +      .attach_ioas = vfio_iommufd_emulated_attach_ioas,
  };
  
  static struct mdev_driver vfio_ap_matrix_driver = {
diff --combined drivers/vfio/Kconfig
index 286c1663bd75646d42f01324f953f2bbdebff1a4,0b8d53f63c7e5caea7436da0b5bdb03ecd0089b4..a8f54462946742661820ed9dd42560a54aa0ceb4
@@@ -2,9 -2,8 +2,9 @@@
  menuconfig VFIO
        tristate "VFIO Non-Privileged userspace driver framework"
        select IOMMU_API
 -      select VFIO_IOMMU_TYPE1 if MMU && (X86 || S390 || ARM || ARM64)
 +      depends on IOMMUFD || !IOMMUFD
        select INTERVAL_TREE
 +      select VFIO_CONTAINER if IOMMUFD=n
        help
          VFIO provides a framework for secure userspace device drivers.
          See Documentation/driver-api/vfio.rst for more details.
          If you don't know what to do here, say N.
  
  if VFIO
 +config VFIO_CONTAINER
 +      bool "Support for the VFIO container /dev/vfio/vfio"
 +      select VFIO_IOMMU_TYPE1 if MMU && (X86 || S390 || ARM || ARM64)
 +      default y
 +      help
 +        The VFIO container is the classic interface to VFIO for establishing
 +        IOMMU mappings. If N is selected here then IOMMUFD must be used to
 +        manage the mappings.
 +
 +        Unless testing IOMMUFD say Y here.
 +
 +if VFIO_CONTAINER
  config VFIO_IOMMU_TYPE1
        tristate
        default n
@@@ -33,6 -20,11 +33,6 @@@ config VFIO_IOMMU_SPAPR_TC
        depends on SPAPR_TCE_IOMMU
        default VFIO
  
 -config VFIO_VIRQFD
 -      bool
 -      select EVENTFD
 -      default n
 -
  config VFIO_NOIOMMU
        bool "VFIO No-IOMMU support"
        help
          this mode since there is no IOMMU to provide DMA translation.
  
          If you don't know what to do here, say N.
- config VFIO_SPAPR_EEH
-       tristate
-       depends on EEH && VFIO_IOMMU_SPAPR_TCE
-       default VFIO
 +endif
 +
-       tristate
 +config VFIO_VIRQFD
++      bool
 +      select EVENTFD
 +      default n
  
  source "drivers/vfio/pci/Kconfig"
  source "drivers/vfio/platform/Kconfig"
diff --combined drivers/vfio/Makefile
index 3783db7e8082c8a1e4059f888f26f1b746d152d4,0721ed4831c92f0b260ceb8081bad84c337ab4a8..70e7dcb302efd20ddc2d4cb31fbffc1ea5e6b49f
@@@ -1,18 -1,13 +1,15 @@@
  # SPDX-License-Identifier: GPL-2.0
- vfio_virqfd-y := virqfd.o
  obj-$(CONFIG_VFIO) += vfio.o
  
  vfio-y += vfio_main.o \
 -        iova_bitmap.o \
 -        container.o
 +        group.o \
 +        iova_bitmap.o
 +vfio-$(CONFIG_IOMMUFD) += iommufd.o
 +vfio-$(CONFIG_VFIO_CONTAINER) += container.o
+ vfio-$(CONFIG_VFIO_VIRQFD) += virqfd.o
  
- obj-$(CONFIG_VFIO_VIRQFD) += vfio_virqfd.o
  obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o
  obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o
- obj-$(CONFIG_VFIO_SPAPR_EEH) += vfio_spapr_eeh.o
  obj-$(CONFIG_VFIO_PCI) += pci/
  obj-$(CONFIG_VFIO_PLATFORM) += platform/
  obj-$(CONFIG_VFIO_MDEV) += mdev/
index 5cd4bb47644039105138d3939861356f4bd5c4e9,7b8889f550076195d73f7602f830d305198d7d90..defeb8510ace52dd64921193cef5ef6aeca49866
@@@ -568,7 -568,6 +568,6 @@@ static void vfio_fsl_mc_release_dev(str
  
        vfio_fsl_uninit_device(vdev);
        mutex_destroy(&vdev->igate);
-       vfio_free_device(core_vdev);
  }
  
  static int vfio_fsl_mc_remove(struct fsl_mc_device *mc_dev)
@@@ -592,9 -591,6 +591,9 @@@ static const struct vfio_device_ops vfi
        .read           = vfio_fsl_mc_read,
        .write          = vfio_fsl_mc_write,
        .mmap           = vfio_fsl_mc_mmap,
 +      .bind_iommufd   = vfio_iommufd_physical_bind,
 +      .unbind_iommufd = vfio_iommufd_physical_unbind,
 +      .attach_ioas    = vfio_iommufd_physical_attach_ioas,
  };
  
  static struct fsl_mc_driver vfio_fsl_mc_driver = {
index 40019b11c5a969e9e3b3ef95046691549b97ed66,51941bb4f31f002fb5a27ec8f500b91a6eeadd21..0bba3b05c6c780aa081f2031b5ae38b05d9aefc0
@@@ -360,8 -360,8 +360,8 @@@ static int vf_qm_check_match(struct his
        u32 que_iso_state;
        int ret;
  
-       if (migf->total_length < QM_MATCH_SIZE)
-               return -EINVAL;
+       if (migf->total_length < QM_MATCH_SIZE || hisi_acc_vdev->match_done)
+               return 0;
  
        if (vf_data->acc_magic != ACC_DEV_MAGIC) {
                dev_err(dev, "failed to match ACC_DEV_MAGIC\n");
        }
  
        hisi_acc_vdev->vf_qm_state = vf_data->vf_qm_state;
+       hisi_acc_vdev->match_done = true;
        return 0;
  }
  
@@@ -493,10 -494,6 +494,6 @@@ static int vf_qm_state_save(struct hisi
        struct device *dev = &vf_qm->pdev->dev;
        int ret;
  
-       ret = vf_qm_get_match_data(hisi_acc_vdev, vf_data);
-       if (ret)
-               return ret;
        if (unlikely(qm_wait_dev_not_ready(vf_qm))) {
                /* Update state and return with match data */
                vf_data->vf_qm_state = QM_NOT_READY;
@@@ -673,12 -670,6 +670,6 @@@ static int hisi_acc_vf_load_state(struc
        struct hisi_acc_vf_migration_file *migf = hisi_acc_vdev->resuming_migf;
        int ret;
  
-       /* Check dev compatibility */
-       ret = vf_qm_check_match(hisi_acc_vdev, migf);
-       if (ret) {
-               dev_err(dev, "failed to match the VF!\n");
-               return ret;
-       }
        /* Recover data to VF */
        ret = vf_qm_load_data(hisi_acc_vdev, migf);
        if (ret) {
@@@ -732,6 -723,10 +723,10 @@@ static ssize_t hisi_acc_vf_resume_write
        *pos += len;
        done = len;
        migf->total_length += len;
+       ret = vf_qm_check_match(migf->hisi_acc_vdev, migf);
+       if (ret)
+               done = -EFAULT;
  out_unlock:
        mutex_unlock(&migf->lock);
        return done;
@@@ -764,9 -759,58 +759,58 @@@ hisi_acc_vf_pci_resume(struct hisi_acc_
  
        stream_open(migf->filp->f_inode, migf->filp);
        mutex_init(&migf->lock);
+       migf->hisi_acc_vdev = hisi_acc_vdev;
        return migf;
  }
  
+ static long hisi_acc_vf_precopy_ioctl(struct file *filp,
+                                     unsigned int cmd, unsigned long arg)
+ {
+       struct hisi_acc_vf_migration_file *migf = filp->private_data;
+       struct hisi_acc_vf_core_device *hisi_acc_vdev = migf->hisi_acc_vdev;
+       loff_t *pos = &filp->f_pos;
+       struct vfio_precopy_info info;
+       unsigned long minsz;
+       int ret;
+       if (cmd != VFIO_MIG_GET_PRECOPY_INFO)
+               return -ENOTTY;
+       minsz = offsetofend(struct vfio_precopy_info, dirty_bytes);
+       if (copy_from_user(&info, (void __user *)arg, minsz))
+               return -EFAULT;
+       if (info.argsz < minsz)
+               return -EINVAL;
+       mutex_lock(&hisi_acc_vdev->state_mutex);
+       if (hisi_acc_vdev->mig_state != VFIO_DEVICE_STATE_PRE_COPY) {
+               mutex_unlock(&hisi_acc_vdev->state_mutex);
+               return -EINVAL;
+       }
+       mutex_lock(&migf->lock);
+       if (migf->disabled) {
+               ret = -ENODEV;
+               goto out;
+       }
+       if (*pos > migf->total_length) {
+               ret = -EINVAL;
+               goto out;
+       }
+       info.dirty_bytes = 0;
+       info.initial_bytes = migf->total_length - *pos;
+       ret = copy_to_user((void __user *)arg, &info, minsz) ? -EFAULT : 0;
+ out:
+       mutex_unlock(&migf->lock);
+       mutex_unlock(&hisi_acc_vdev->state_mutex);
+       return ret;
+ }
  static ssize_t hisi_acc_vf_save_read(struct file *filp, char __user *buf, size_t len,
                                     loff_t *pos)
  {
@@@ -807,12 -851,14 +851,14 @@@ out_unlock
  static const struct file_operations hisi_acc_vf_save_fops = {
        .owner = THIS_MODULE,
        .read = hisi_acc_vf_save_read,
+       .unlocked_ioctl = hisi_acc_vf_precopy_ioctl,
+       .compat_ioctl = compat_ptr_ioctl,
        .release = hisi_acc_vf_release_file,
        .llseek = no_llseek,
  };
  
  static struct hisi_acc_vf_migration_file *
- hisi_acc_vf_stop_copy(struct hisi_acc_vf_core_device *hisi_acc_vdev)
+ hisi_acc_open_saving_migf(struct hisi_acc_vf_core_device *hisi_acc_vdev)
  {
        struct hisi_acc_vf_migration_file *migf;
        int ret;
  
        stream_open(migf->filp->f_inode, migf->filp);
        mutex_init(&migf->lock);
+       migf->hisi_acc_vdev = hisi_acc_vdev;
  
-       ret = vf_qm_state_save(hisi_acc_vdev, migf);
+       ret = vf_qm_get_match_data(hisi_acc_vdev, &migf->vf_data);
        if (ret) {
                fput(migf->filp);
                return ERR_PTR(ret);
        return migf;
  }
  
+ static struct hisi_acc_vf_migration_file *
+ hisi_acc_vf_pre_copy(struct hisi_acc_vf_core_device *hisi_acc_vdev)
+ {
+       struct hisi_acc_vf_migration_file *migf;
+       migf = hisi_acc_open_saving_migf(hisi_acc_vdev);
+       if (IS_ERR(migf))
+               return migf;
+       migf->total_length = QM_MATCH_SIZE;
+       return migf;
+ }
+ static struct hisi_acc_vf_migration_file *
+ hisi_acc_vf_stop_copy(struct hisi_acc_vf_core_device *hisi_acc_vdev, bool open)
+ {
+       int ret;
+       struct hisi_acc_vf_migration_file *migf = NULL;
+       if (open) {
+               /*
+                * Userspace didn't use PRECOPY support. Hence saving_migf
+                * is not opened yet.
+                */
+               migf = hisi_acc_open_saving_migf(hisi_acc_vdev);
+               if (IS_ERR(migf))
+                       return migf;
+       } else {
+               migf = hisi_acc_vdev->saving_migf;
+       }
+       ret = vf_qm_state_save(hisi_acc_vdev, migf);
+       if (ret)
+               return ERR_PTR(ret);
+       return open ? migf : NULL;
+ }
  static int hisi_acc_vf_stop_device(struct hisi_acc_vf_core_device *hisi_acc_vdev)
  {
        struct device *dev = &hisi_acc_vdev->vf_dev->dev;
@@@ -869,6 -954,31 +954,31 @@@ hisi_acc_vf_set_device_state(struct his
        u32 cur = hisi_acc_vdev->mig_state;
        int ret;
  
+       if (cur == VFIO_DEVICE_STATE_RUNNING && new == VFIO_DEVICE_STATE_PRE_COPY) {
+               struct hisi_acc_vf_migration_file *migf;
+               migf = hisi_acc_vf_pre_copy(hisi_acc_vdev);
+               if (IS_ERR(migf))
+                       return ERR_CAST(migf);
+               get_file(migf->filp);
+               hisi_acc_vdev->saving_migf = migf;
+               return migf->filp;
+       }
+       if (cur == VFIO_DEVICE_STATE_PRE_COPY && new == VFIO_DEVICE_STATE_STOP_COPY) {
+               struct hisi_acc_vf_migration_file *migf;
+               ret = hisi_acc_vf_stop_device(hisi_acc_vdev);
+               if (ret)
+                       return ERR_PTR(ret);
+               migf = hisi_acc_vf_stop_copy(hisi_acc_vdev, false);
+               if (IS_ERR(migf))
+                       return ERR_CAST(migf);
+               return NULL;
+       }
        if (cur == VFIO_DEVICE_STATE_RUNNING && new == VFIO_DEVICE_STATE_STOP) {
                ret = hisi_acc_vf_stop_device(hisi_acc_vdev);
                if (ret)
        if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_STOP_COPY) {
                struct hisi_acc_vf_migration_file *migf;
  
-               migf = hisi_acc_vf_stop_copy(hisi_acc_vdev);
+               migf = hisi_acc_vf_stop_copy(hisi_acc_vdev, true);
                if (IS_ERR(migf))
                        return ERR_CAST(migf);
                get_file(migf->filp);
                return NULL;
        }
  
+       if (cur == VFIO_DEVICE_STATE_PRE_COPY && new == VFIO_DEVICE_STATE_RUNNING) {
+               hisi_acc_vf_disable_fds(hisi_acc_vdev);
+               return NULL;
+       }
        if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_RUNNING) {
                hisi_acc_vf_start_device(hisi_acc_vdev);
                return NULL;
@@@ -957,6 -1072,14 +1072,14 @@@ hisi_acc_vfio_pci_set_device_state(stru
        return res;
  }
  
+ static int
+ hisi_acc_vfio_pci_get_data_size(struct vfio_device *vdev,
+                               unsigned long *stop_copy_length)
+ {
+       *stop_copy_length = sizeof(struct acc_vf_data);
+       return 0;
+ }
  static int
  hisi_acc_vfio_pci_get_device_state(struct vfio_device *vdev,
                                   enum vfio_device_mig_state *curr_state)
@@@ -1213,6 -1336,7 +1336,7 @@@ static void hisi_acc_vfio_pci_close_dev
  static const struct vfio_migration_ops hisi_acc_vfio_pci_migrn_state_ops = {
        .migration_set_state = hisi_acc_vfio_pci_set_device_state,
        .migration_get_state = hisi_acc_vfio_pci_get_device_state,
+       .migration_get_data_size = hisi_acc_vfio_pci_get_data_size,
  };
  
  static int hisi_acc_vfio_pci_migrn_init_dev(struct vfio_device *core_vdev)
        hisi_acc_vdev->vf_dev = pdev;
        mutex_init(&hisi_acc_vdev->state_mutex);
  
-       core_vdev->migration_flags = VFIO_MIGRATION_STOP_COPY;
+       core_vdev->migration_flags = VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_PRE_COPY;
        core_vdev->mig_ops = &hisi_acc_vfio_pci_migrn_state_ops;
  
        return vfio_pci_core_init_dev(core_vdev);
@@@ -1246,9 -1370,6 +1370,9 @@@ static const struct vfio_device_ops his
        .mmap = hisi_acc_vfio_pci_mmap,
        .request = vfio_pci_core_request,
        .match = vfio_pci_core_match,
 +      .bind_iommufd = vfio_iommufd_physical_bind,
 +      .unbind_iommufd = vfio_iommufd_physical_unbind,
 +      .attach_ioas = vfio_iommufd_physical_attach_ioas,
  };
  
  static const struct vfio_device_ops hisi_acc_vfio_pci_ops = {
        .mmap = vfio_pci_core_mmap,
        .request = vfio_pci_core_request,
        .match = vfio_pci_core_match,
 +      .bind_iommufd = vfio_iommufd_physical_bind,
 +      .unbind_iommufd = vfio_iommufd_physical_unbind,
 +      .attach_ioas = vfio_iommufd_physical_attach_ioas,
  };
  
  static int hisi_acc_vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
index 32d1f38d351e7e00625f6b8e6cd825f7afb47073,031ac8cc215d4202bbb21812ca638158d857ecf9..9feb89c6d939daccbaabfcb76a147d15603db4f0
@@@ -32,8 -32,8 +32,8 @@@ static struct mlx5vf_pci_core_device *m
                            core_device);
  }
  
- static struct page *
- mlx5vf_get_migration_page(struct mlx5_vf_migration_file *migf,
+ struct page *
+ mlx5vf_get_migration_page(struct mlx5_vhca_data_buffer *buf,
                          unsigned long offset)
  {
        unsigned long cur_offset = 0;
        unsigned int i;
  
        /* All accesses are sequential */
-       if (offset < migf->last_offset || !migf->last_offset_sg) {
-               migf->last_offset = 0;
-               migf->last_offset_sg = migf->table.sgt.sgl;
-               migf->sg_last_entry = 0;
+       if (offset < buf->last_offset || !buf->last_offset_sg) {
+               buf->last_offset = 0;
+               buf->last_offset_sg = buf->table.sgt.sgl;
+               buf->sg_last_entry = 0;
        }
  
-       cur_offset = migf->last_offset;
+       cur_offset = buf->last_offset;
  
-       for_each_sg(migf->last_offset_sg, sg,
-                       migf->table.sgt.orig_nents - migf->sg_last_entry, i) {
+       for_each_sg(buf->last_offset_sg, sg,
+                       buf->table.sgt.orig_nents - buf->sg_last_entry, i) {
                if (offset < sg->length + cur_offset) {
-                       migf->last_offset_sg = sg;
-                       migf->sg_last_entry += i;
-                       migf->last_offset = cur_offset;
+                       buf->last_offset_sg = sg;
+                       buf->sg_last_entry += i;
+                       buf->last_offset = cur_offset;
                        return nth_page(sg_page(sg),
                                        (offset - cur_offset) / PAGE_SIZE);
                }
@@@ -63,8 -63,8 +63,8 @@@
        return NULL;
  }
  
static int mlx5vf_add_migration_pages(struct mlx5_vf_migration_file *migf,
-                                     unsigned int npages)
int mlx5vf_add_migration_pages(struct mlx5_vhca_data_buffer *buf,
+                              unsigned int npages)
  {
        unsigned int to_alloc = npages;
        struct page **page_list;
                }
                to_alloc -= filled;
                ret = sg_alloc_append_table_from_pages(
-                       &migf->table, page_list, filled, 0,
+                       &buf->table, page_list, filled, 0,
                        filled << PAGE_SHIFT, UINT_MAX, SG_MAX_SINGLE_ALLOC,
                        GFP_KERNEL);
  
                if (ret)
                        goto err;
-               migf->allocated_length += filled * PAGE_SIZE;
+               buf->allocated_length += filled * PAGE_SIZE;
                /* clean input for another bulk allocation */
                memset(page_list, 0, filled * sizeof(*page_list));
                to_fill = min_t(unsigned int, to_alloc,
@@@ -108,16 -108,8 +108,8 @@@ err
  
  static void mlx5vf_disable_fd(struct mlx5_vf_migration_file *migf)
  {
-       struct sg_page_iter sg_iter;
        mutex_lock(&migf->lock);
-       /* Undo alloc_pages_bulk_array() */
-       for_each_sgtable_page(&migf->table.sgt, &sg_iter, 0)
-               __free_page(sg_page_iter_page(&sg_iter));
-       sg_free_append_table(&migf->table);
-       migf->disabled = true;
-       migf->total_length = 0;
-       migf->allocated_length = 0;
+       migf->state = MLX5_MIGF_STATE_ERROR;
        migf->filp->f_pos = 0;
        mutex_unlock(&migf->lock);
  }
@@@ -132,10 -124,91 +124,91 @@@ static int mlx5vf_release_file(struct i
        return 0;
  }
  
+ static struct mlx5_vhca_data_buffer *
+ mlx5vf_get_data_buff_from_pos(struct mlx5_vf_migration_file *migf, loff_t pos,
+                             bool *end_of_data)
+ {
+       struct mlx5_vhca_data_buffer *buf;
+       bool found = false;
+       *end_of_data = false;
+       spin_lock_irq(&migf->list_lock);
+       if (list_empty(&migf->buf_list)) {
+               *end_of_data = true;
+               goto end;
+       }
+       buf = list_first_entry(&migf->buf_list, struct mlx5_vhca_data_buffer,
+                              buf_elm);
+       if (pos >= buf->start_pos &&
+           pos < buf->start_pos + buf->length) {
+               found = true;
+               goto end;
+       }
+       /*
+        * As we use a stream based FD we may expect having the data always
+        * on first chunk
+        */
+       migf->state = MLX5_MIGF_STATE_ERROR;
+ end:
+       spin_unlock_irq(&migf->list_lock);
+       return found ? buf : NULL;
+ }
+ static ssize_t mlx5vf_buf_read(struct mlx5_vhca_data_buffer *vhca_buf,
+                              char __user **buf, size_t *len, loff_t *pos)
+ {
+       unsigned long offset;
+       ssize_t done = 0;
+       size_t copy_len;
+       copy_len = min_t(size_t,
+                        vhca_buf->start_pos + vhca_buf->length - *pos, *len);
+       while (copy_len) {
+               size_t page_offset;
+               struct page *page;
+               size_t page_len;
+               u8 *from_buff;
+               int ret;
+               offset = *pos - vhca_buf->start_pos;
+               page_offset = offset % PAGE_SIZE;
+               offset -= page_offset;
+               page = mlx5vf_get_migration_page(vhca_buf, offset);
+               if (!page)
+                       return -EINVAL;
+               page_len = min_t(size_t, copy_len, PAGE_SIZE - page_offset);
+               from_buff = kmap_local_page(page);
+               ret = copy_to_user(*buf, from_buff + page_offset, page_len);
+               kunmap_local(from_buff);
+               if (ret)
+                       return -EFAULT;
+               *pos += page_len;
+               *len -= page_len;
+               *buf += page_len;
+               done += page_len;
+               copy_len -= page_len;
+       }
+       if (*pos >= vhca_buf->start_pos + vhca_buf->length) {
+               spin_lock_irq(&vhca_buf->migf->list_lock);
+               list_del_init(&vhca_buf->buf_elm);
+               list_add_tail(&vhca_buf->buf_elm, &vhca_buf->migf->avail_list);
+               spin_unlock_irq(&vhca_buf->migf->list_lock);
+       }
+       return done;
+ }
  static ssize_t mlx5vf_save_read(struct file *filp, char __user *buf, size_t len,
                               loff_t *pos)
  {
        struct mlx5_vf_migration_file *migf = filp->private_data;
+       struct mlx5_vhca_data_buffer *vhca_buf;
+       bool first_loop_call = true;
+       bool end_of_data;
        ssize_t done = 0;
  
        if (pos)
  
        if (!(filp->f_flags & O_NONBLOCK)) {
                if (wait_event_interruptible(migf->poll_wait,
-                            READ_ONCE(migf->total_length) || migf->is_err))
+                               !list_empty(&migf->buf_list) ||
+                               migf->state == MLX5_MIGF_STATE_ERROR ||
+                               migf->state == MLX5_MIGF_STATE_PRE_COPY_ERROR ||
+                               migf->state == MLX5_MIGF_STATE_PRE_COPY ||
+                               migf->state == MLX5_MIGF_STATE_COMPLETE))
                        return -ERESTARTSYS;
        }
  
        mutex_lock(&migf->lock);
-       if ((filp->f_flags & O_NONBLOCK) && !READ_ONCE(migf->total_length)) {
-               done = -EAGAIN;
-               goto out_unlock;
-       }
-       if (*pos > migf->total_length) {
-               done = -EINVAL;
-               goto out_unlock;
-       }
-       if (migf->disabled || migf->is_err) {
+       if (migf->state == MLX5_MIGF_STATE_ERROR) {
                done = -ENODEV;
                goto out_unlock;
        }
  
-       len = min_t(size_t, migf->total_length - *pos, len);
        while (len) {
-               size_t page_offset;
-               struct page *page;
-               size_t page_len;
-               u8 *from_buff;
-               int ret;
+               ssize_t count;
+               vhca_buf = mlx5vf_get_data_buff_from_pos(migf, *pos,
+                                                        &end_of_data);
+               if (first_loop_call) {
+                       first_loop_call = false;
+                       /* Temporary end of file as part of PRE_COPY */
+                       if (end_of_data && (migf->state == MLX5_MIGF_STATE_PRE_COPY ||
+                               migf->state == MLX5_MIGF_STATE_PRE_COPY_ERROR)) {
+                               done = -ENOMSG;
+                               goto out_unlock;
+                       }
+                       if (end_of_data && migf->state != MLX5_MIGF_STATE_COMPLETE) {
+                               if (filp->f_flags & O_NONBLOCK) {
+                                       done = -EAGAIN;
+                                       goto out_unlock;
+                               }
+                       }
+               }
+               if (end_of_data)
+                       goto out_unlock;
  
-               page_offset = (*pos) % PAGE_SIZE;
-               page = mlx5vf_get_migration_page(migf, *pos - page_offset);
-               if (!page) {
-                       if (done == 0)
-                               done = -EINVAL;
+               if (!vhca_buf) {
+                       done = -EINVAL;
                        goto out_unlock;
                }
  
-               page_len = min_t(size_t, len, PAGE_SIZE - page_offset);
-               from_buff = kmap_local_page(page);
-               ret = copy_to_user(buf, from_buff + page_offset, page_len);
-               kunmap_local(from_buff);
-               if (ret) {
-                       done = -EFAULT;
+               count = mlx5vf_buf_read(vhca_buf, &buf, &len, pos);
+               if (count < 0) {
+                       done = count;
                        goto out_unlock;
                }
-               *pos += page_len;
-               len -= page_len;
-               done += page_len;
-               buf += page_len;
+               done += count;
        }
  
  out_unlock:
@@@ -206,27 -283,188 +283,188 @@@ static __poll_t mlx5vf_save_poll(struc
        poll_wait(filp, &migf->poll_wait, wait);
  
        mutex_lock(&migf->lock);
-       if (migf->disabled || migf->is_err)
+       if (migf->state == MLX5_MIGF_STATE_ERROR)
                pollflags = EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
-       else if (READ_ONCE(migf->total_length))
+       else if (!list_empty(&migf->buf_list) ||
+                migf->state == MLX5_MIGF_STATE_COMPLETE)
                pollflags = EPOLLIN | EPOLLRDNORM;
        mutex_unlock(&migf->lock);
  
        return pollflags;
  }
  
+ /*
+  * FD is exposed and user can use it after receiving an error.
+  * Mark migf in error, and wake the user.
+  */
+ static void mlx5vf_mark_err(struct mlx5_vf_migration_file *migf)
+ {
+       migf->state = MLX5_MIGF_STATE_ERROR;
+       wake_up_interruptible(&migf->poll_wait);
+ }
+ static long mlx5vf_precopy_ioctl(struct file *filp, unsigned int cmd,
+                                unsigned long arg)
+ {
+       struct mlx5_vf_migration_file *migf = filp->private_data;
+       struct mlx5vf_pci_core_device *mvdev = migf->mvdev;
+       struct mlx5_vhca_data_buffer *buf;
+       struct vfio_precopy_info info = {};
+       loff_t *pos = &filp->f_pos;
+       unsigned long minsz;
+       size_t inc_length = 0;
+       bool end_of_data;
+       int ret;
+       if (cmd != VFIO_MIG_GET_PRECOPY_INFO)
+               return -ENOTTY;
+       minsz = offsetofend(struct vfio_precopy_info, dirty_bytes);
+       if (copy_from_user(&info, (void __user *)arg, minsz))
+               return -EFAULT;
+       if (info.argsz < minsz)
+               return -EINVAL;
+       mutex_lock(&mvdev->state_mutex);
+       if (mvdev->mig_state != VFIO_DEVICE_STATE_PRE_COPY &&
+           mvdev->mig_state != VFIO_DEVICE_STATE_PRE_COPY_P2P) {
+               ret = -EINVAL;
+               goto err_state_unlock;
+       }
+       /*
+        * We can't issue a SAVE command when the device is suspended, so as
+        * part of VFIO_DEVICE_STATE_PRE_COPY_P2P no reason to query for extra
+        * bytes that can't be read.
+        */
+       if (mvdev->mig_state == VFIO_DEVICE_STATE_PRE_COPY) {
+               /*
+                * Once the query returns it's guaranteed that there is no
+                * active SAVE command.
+                * As so, the other code below is safe with the proper locks.
+                */
+               ret = mlx5vf_cmd_query_vhca_migration_state(mvdev, &inc_length,
+                                                           MLX5VF_QUERY_INC);
+               if (ret)
+                       goto err_state_unlock;
+       }
+       mutex_lock(&migf->lock);
+       if (migf->state == MLX5_MIGF_STATE_ERROR) {
+               ret = -ENODEV;
+               goto err_migf_unlock;
+       }
+       buf = mlx5vf_get_data_buff_from_pos(migf, *pos, &end_of_data);
+       if (buf) {
+               if (buf->start_pos == 0) {
+                       info.initial_bytes = buf->header_image_size - *pos;
+               } else if (buf->start_pos ==
+                               sizeof(struct mlx5_vf_migration_header)) {
+                       /* First data buffer following the header */
+                       info.initial_bytes = buf->start_pos +
+                                               buf->length - *pos;
+               } else {
+                       info.dirty_bytes = buf->start_pos + buf->length - *pos;
+               }
+       } else {
+               if (!end_of_data) {
+                       ret = -EINVAL;
+                       goto err_migf_unlock;
+               }
+               info.dirty_bytes = inc_length;
+       }
+       if (!end_of_data || !inc_length) {
+               mutex_unlock(&migf->lock);
+               goto done;
+       }
+       mutex_unlock(&migf->lock);
+       /*
+        * We finished transferring the current state and the device has a
+        * dirty state, save a new state to be ready for.
+        */
+       buf = mlx5vf_get_data_buffer(migf, inc_length, DMA_FROM_DEVICE);
+       if (IS_ERR(buf)) {
+               ret = PTR_ERR(buf);
+               mlx5vf_mark_err(migf);
+               goto err_state_unlock;
+       }
+       ret = mlx5vf_cmd_save_vhca_state(mvdev, migf, buf, true, true);
+       if (ret) {
+               mlx5vf_mark_err(migf);
+               mlx5vf_put_data_buffer(buf);
+               goto err_state_unlock;
+       }
+ done:
+       mlx5vf_state_mutex_unlock(mvdev);
+       if (copy_to_user((void __user *)arg, &info, minsz))
+               return -EFAULT;
+       return 0;
+ err_migf_unlock:
+       mutex_unlock(&migf->lock);
+ err_state_unlock:
+       mlx5vf_state_mutex_unlock(mvdev);
+       return ret;
+ }
  static const struct file_operations mlx5vf_save_fops = {
        .owner = THIS_MODULE,
        .read = mlx5vf_save_read,
        .poll = mlx5vf_save_poll,
+       .unlocked_ioctl = mlx5vf_precopy_ioctl,
+       .compat_ioctl = compat_ptr_ioctl,
        .release = mlx5vf_release_file,
        .llseek = no_llseek,
  };
  
+ static int mlx5vf_pci_save_device_inc_data(struct mlx5vf_pci_core_device *mvdev)
+ {
+       struct mlx5_vf_migration_file *migf = mvdev->saving_migf;
+       struct mlx5_vhca_data_buffer *buf;
+       size_t length;
+       int ret;
+       if (migf->state == MLX5_MIGF_STATE_ERROR)
+               return -ENODEV;
+       ret = mlx5vf_cmd_query_vhca_migration_state(mvdev, &length,
+                               MLX5VF_QUERY_INC | MLX5VF_QUERY_FINAL);
+       if (ret)
+               goto err;
+       buf = mlx5vf_get_data_buffer(migf, length, DMA_FROM_DEVICE);
+       if (IS_ERR(buf)) {
+               ret = PTR_ERR(buf);
+               goto err;
+       }
+       ret = mlx5vf_cmd_save_vhca_state(mvdev, migf, buf, true, false);
+       if (ret)
+               goto err_save;
+       return 0;
+ err_save:
+       mlx5vf_put_data_buffer(buf);
+ err:
+       mlx5vf_mark_err(migf);
+       return ret;
+ }
  static struct mlx5_vf_migration_file *
- mlx5vf_pci_save_device_data(struct mlx5vf_pci_core_device *mvdev)
+ mlx5vf_pci_save_device_data(struct mlx5vf_pci_core_device *mvdev, bool track)
  {
        struct mlx5_vf_migration_file *migf;
+       struct mlx5_vhca_data_buffer *buf;
+       size_t length;
        int ret;
  
        migf = kzalloc(sizeof(*migf), GFP_KERNEL);
        migf->filp = anon_inode_getfile("mlx5vf_mig", &mlx5vf_save_fops, migf,
                                        O_RDONLY);
        if (IS_ERR(migf->filp)) {
-               int err = PTR_ERR(migf->filp);
-               kfree(migf);
-               return ERR_PTR(err);
+               ret = PTR_ERR(migf->filp);
+               goto end;
        }
  
+       migf->mvdev = mvdev;
+       ret = mlx5vf_cmd_alloc_pd(migf);
+       if (ret)
+               goto out_free;
        stream_open(migf->filp->f_inode, migf->filp);
        mutex_init(&migf->lock);
        init_waitqueue_head(&migf->poll_wait);
+       init_completion(&migf->save_comp);
+       /*
+        * save_comp is being used as a binary semaphore built from
+        * a completion. A normal mutex cannot be used because the lock is
+        * passed between kernel threads and lockdep can't model this.
+        */
+       complete(&migf->save_comp);
        mlx5_cmd_init_async_ctx(mvdev->mdev, &migf->async_ctx);
        INIT_WORK(&migf->async_data.work, mlx5vf_mig_file_cleanup_cb);
-       ret = mlx5vf_cmd_query_vhca_migration_state(mvdev,
-                                                   &migf->total_length);
+       INIT_LIST_HEAD(&migf->buf_list);
+       INIT_LIST_HEAD(&migf->avail_list);
+       spin_lock_init(&migf->list_lock);
+       ret = mlx5vf_cmd_query_vhca_migration_state(mvdev, &length, 0);
        if (ret)
-               goto out_free;
+               goto out_pd;
  
-       ret = mlx5vf_add_migration_pages(
-               migf, DIV_ROUND_UP_ULL(migf->total_length, PAGE_SIZE));
-       if (ret)
-               goto out_free;
+       buf = mlx5vf_alloc_data_buffer(migf, length, DMA_FROM_DEVICE);
+       if (IS_ERR(buf)) {
+               ret = PTR_ERR(buf);
+               goto out_pd;
+       }
  
-       migf->mvdev = mvdev;
-       ret = mlx5vf_cmd_save_vhca_state(mvdev, migf);
+       ret = mlx5vf_cmd_save_vhca_state(mvdev, migf, buf, false, track);
        if (ret)
-               goto out_free;
+               goto out_save;
        return migf;
+ out_save:
+       mlx5vf_free_data_buffer(buf);
+ out_pd:
+       mlx5vf_cmd_dealloc_pd(migf);
  out_free:
        fput(migf->filp);
+ end:
+       kfree(migf);
        return ERR_PTR(ret);
  }
  
+ static int
+ mlx5vf_append_page_to_mig_buf(struct mlx5_vhca_data_buffer *vhca_buf,
+                             const char __user **buf, size_t *len,
+                             loff_t *pos, ssize_t *done)
+ {
+       unsigned long offset;
+       size_t page_offset;
+       struct page *page;
+       size_t page_len;
+       u8 *to_buff;
+       int ret;
+       offset = *pos - vhca_buf->start_pos;
+       page_offset = offset % PAGE_SIZE;
+       page = mlx5vf_get_migration_page(vhca_buf, offset - page_offset);
+       if (!page)
+               return -EINVAL;
+       page_len = min_t(size_t, *len, PAGE_SIZE - page_offset);
+       to_buff = kmap_local_page(page);
+       ret = copy_from_user(to_buff + page_offset, *buf, page_len);
+       kunmap_local(to_buff);
+       if (ret)
+               return -EFAULT;
+       *pos += page_len;
+       *done += page_len;
+       *buf += page_len;
+       *len -= page_len;
+       vhca_buf->length += page_len;
+       return 0;
+ }
+ static int
+ mlx5vf_resume_read_image_no_header(struct mlx5_vhca_data_buffer *vhca_buf,
+                                  loff_t requested_length,
+                                  const char __user **buf, size_t *len,
+                                  loff_t *pos, ssize_t *done)
+ {
+       int ret;
+       if (requested_length > MAX_MIGRATION_SIZE)
+               return -ENOMEM;
+       if (vhca_buf->allocated_length < requested_length) {
+               ret = mlx5vf_add_migration_pages(
+                       vhca_buf,
+                       DIV_ROUND_UP(requested_length - vhca_buf->allocated_length,
+                                    PAGE_SIZE));
+               if (ret)
+                       return ret;
+       }
+       while (*len) {
+               ret = mlx5vf_append_page_to_mig_buf(vhca_buf, buf, len, pos,
+                                                   done);
+               if (ret)
+                       return ret;
+       }
+       return 0;
+ }
+ static ssize_t
+ mlx5vf_resume_read_image(struct mlx5_vf_migration_file *migf,
+                        struct mlx5_vhca_data_buffer *vhca_buf,
+                        size_t image_size, const char __user **buf,
+                        size_t *len, loff_t *pos, ssize_t *done,
+                        bool *has_work)
+ {
+       size_t copy_len, to_copy;
+       int ret;
+       to_copy = min_t(size_t, *len, image_size - vhca_buf->length);
+       copy_len = to_copy;
+       while (to_copy) {
+               ret = mlx5vf_append_page_to_mig_buf(vhca_buf, buf, &to_copy, pos,
+                                                   done);
+               if (ret)
+                       return ret;
+       }
+       *len -= copy_len;
+       if (vhca_buf->length == image_size) {
+               migf->load_state = MLX5_VF_LOAD_STATE_LOAD_IMAGE;
+               migf->max_pos += image_size;
+               *has_work = true;
+       }
+       return 0;
+ }
+ static int
+ mlx5vf_resume_read_header(struct mlx5_vf_migration_file *migf,
+                         struct mlx5_vhca_data_buffer *vhca_buf,
+                         const char __user **buf,
+                         size_t *len, loff_t *pos,
+                         ssize_t *done, bool *has_work)
+ {
+       struct page *page;
+       size_t copy_len;
+       u8 *to_buff;
+       int ret;
+       copy_len = min_t(size_t, *len,
+               sizeof(struct mlx5_vf_migration_header) - vhca_buf->length);
+       page = mlx5vf_get_migration_page(vhca_buf, 0);
+       if (!page)
+               return -EINVAL;
+       to_buff = kmap_local_page(page);
+       ret = copy_from_user(to_buff + vhca_buf->length, *buf, copy_len);
+       if (ret) {
+               ret = -EFAULT;
+               goto end;
+       }
+       *buf += copy_len;
+       *pos += copy_len;
+       *done += copy_len;
+       *len -= copy_len;
+       vhca_buf->length += copy_len;
+       if (vhca_buf->length == sizeof(struct mlx5_vf_migration_header)) {
+               u64 flags;
+               vhca_buf->header_image_size = le64_to_cpup((__le64 *)to_buff);
+               if (vhca_buf->header_image_size > MAX_MIGRATION_SIZE) {
+                       ret = -ENOMEM;
+                       goto end;
+               }
+               flags = le64_to_cpup((__le64 *)(to_buff +
+                           offsetof(struct mlx5_vf_migration_header, flags)));
+               if (flags) {
+                       ret = -EOPNOTSUPP;
+                       goto end;
+               }
+               migf->load_state = MLX5_VF_LOAD_STATE_PREP_IMAGE;
+               migf->max_pos += vhca_buf->length;
+               *has_work = true;
+       }
+ end:
+       kunmap_local(to_buff);
+       return ret;
+ }
  static ssize_t mlx5vf_resume_write(struct file *filp, const char __user *buf,
                                   size_t len, loff_t *pos)
  {
        struct mlx5_vf_migration_file *migf = filp->private_data;
+       struct mlx5_vhca_data_buffer *vhca_buf = migf->buf;
+       struct mlx5_vhca_data_buffer *vhca_buf_header = migf->buf_header;
        loff_t requested_length;
+       bool has_work = false;
        ssize_t done = 0;
+       int ret = 0;
  
        if (pos)
                return -ESPIPE;
            check_add_overflow((loff_t)len, *pos, &requested_length))
                return -EINVAL;
  
-       if (requested_length > MAX_MIGRATION_SIZE)
-               return -ENOMEM;
+       mutex_lock(&migf->mvdev->state_mutex);
        mutex_lock(&migf->lock);
-       if (migf->disabled) {
-               done = -ENODEV;
+       if (migf->state == MLX5_MIGF_STATE_ERROR) {
+               ret = -ENODEV;
                goto out_unlock;
        }
  
-       if (migf->allocated_length < requested_length) {
-               done = mlx5vf_add_migration_pages(
-                       migf,
-                       DIV_ROUND_UP(requested_length - migf->allocated_length,
-                                    PAGE_SIZE));
-               if (done)
-                       goto out_unlock;
-       }
-       while (len) {
-               size_t page_offset;
-               struct page *page;
-               size_t page_len;
-               u8 *to_buff;
-               int ret;
-               page_offset = (*pos) % PAGE_SIZE;
-               page = mlx5vf_get_migration_page(migf, *pos - page_offset);
-               if (!page) {
-                       if (done == 0)
-                               done = -EINVAL;
-                       goto out_unlock;
+       while (len || has_work) {
+               has_work = false;
+               switch (migf->load_state) {
+               case MLX5_VF_LOAD_STATE_READ_HEADER:
+                       ret = mlx5vf_resume_read_header(migf, vhca_buf_header,
+                                                       &buf, &len, pos,
+                                                       &done, &has_work);
+                       if (ret)
+                               goto out_unlock;
+                       break;
+               case MLX5_VF_LOAD_STATE_PREP_IMAGE:
+               {
+                       u64 size = vhca_buf_header->header_image_size;
+                       if (vhca_buf->allocated_length < size) {
+                               mlx5vf_free_data_buffer(vhca_buf);
+                               migf->buf = mlx5vf_alloc_data_buffer(migf,
+                                                       size, DMA_TO_DEVICE);
+                               if (IS_ERR(migf->buf)) {
+                                       ret = PTR_ERR(migf->buf);
+                                       migf->buf = NULL;
+                                       goto out_unlock;
+                               }
+                               vhca_buf = migf->buf;
+                       }
+                       vhca_buf->start_pos = migf->max_pos;
+                       migf->load_state = MLX5_VF_LOAD_STATE_READ_IMAGE;
+                       break;
                }
+               case MLX5_VF_LOAD_STATE_READ_IMAGE_NO_HEADER:
+                       ret = mlx5vf_resume_read_image_no_header(vhca_buf,
+                                               requested_length,
+                                               &buf, &len, pos, &done);
+                       if (ret)
+                               goto out_unlock;
+                       break;
+               case MLX5_VF_LOAD_STATE_READ_IMAGE:
+                       ret = mlx5vf_resume_read_image(migf, vhca_buf,
+                                               vhca_buf_header->header_image_size,
+                                               &buf, &len, pos, &done, &has_work);
+                       if (ret)
+                               goto out_unlock;
+                       break;
+               case MLX5_VF_LOAD_STATE_LOAD_IMAGE:
+                       ret = mlx5vf_cmd_load_vhca_state(migf->mvdev, migf, vhca_buf);
+                       if (ret)
+                               goto out_unlock;
+                       migf->load_state = MLX5_VF_LOAD_STATE_READ_HEADER;
+                       /* prep header buf for next image */
+                       vhca_buf_header->length = 0;
+                       vhca_buf_header->header_image_size = 0;
+                       /* prep data buf for next image */
+                       vhca_buf->length = 0;
  
-               page_len = min_t(size_t, len, PAGE_SIZE - page_offset);
-               to_buff = kmap_local_page(page);
-               ret = copy_from_user(to_buff + page_offset, buf, page_len);
-               kunmap_local(to_buff);
-               if (ret) {
-                       done = -EFAULT;
-                       goto out_unlock;
+                       break;
+               default:
+                       break;
                }
-               *pos += page_len;
-               len -= page_len;
-               done += page_len;
-               buf += page_len;
-               migf->total_length += page_len;
        }
  out_unlock:
+       if (ret)
+               migf->state = MLX5_MIGF_STATE_ERROR;
        mutex_unlock(&migf->lock);
-       return done;
+       mlx5vf_state_mutex_unlock(migf->mvdev);
+       return ret ? ret : done;
  }
  
  static const struct file_operations mlx5vf_resume_fops = {
@@@ -345,6 -778,8 +778,8 @@@ static struct mlx5_vf_migration_file 
  mlx5vf_pci_resume_device_data(struct mlx5vf_pci_core_device *mvdev)
  {
        struct mlx5_vf_migration_file *migf;
+       struct mlx5_vhca_data_buffer *buf;
+       int ret;
  
        migf = kzalloc(sizeof(*migf), GFP_KERNEL);
        if (!migf)
        migf->filp = anon_inode_getfile("mlx5vf_mig", &mlx5vf_resume_fops, migf,
                                        O_WRONLY);
        if (IS_ERR(migf->filp)) {
-               int err = PTR_ERR(migf->filp);
+               ret = PTR_ERR(migf->filp);
+               goto end;
+       }
  
-               kfree(migf);
-               return ERR_PTR(err);
+       migf->mvdev = mvdev;
+       ret = mlx5vf_cmd_alloc_pd(migf);
+       if (ret)
+               goto out_free;
+       buf = mlx5vf_alloc_data_buffer(migf, 0, DMA_TO_DEVICE);
+       if (IS_ERR(buf)) {
+               ret = PTR_ERR(buf);
+               goto out_pd;
+       }
+       migf->buf = buf;
+       if (MLX5VF_PRE_COPY_SUPP(mvdev)) {
+               buf = mlx5vf_alloc_data_buffer(migf,
+                       sizeof(struct mlx5_vf_migration_header), DMA_NONE);
+               if (IS_ERR(buf)) {
+                       ret = PTR_ERR(buf);
+                       goto out_buf;
+               }
+               migf->buf_header = buf;
+               migf->load_state = MLX5_VF_LOAD_STATE_READ_HEADER;
+       } else {
+               /* Initial state will be to read the image */
+               migf->load_state = MLX5_VF_LOAD_STATE_READ_IMAGE_NO_HEADER;
        }
        stream_open(migf->filp->f_inode, migf->filp);
        mutex_init(&migf->lock);
+       INIT_LIST_HEAD(&migf->buf_list);
+       INIT_LIST_HEAD(&migf->avail_list);
+       spin_lock_init(&migf->list_lock);
        return migf;
+ out_buf:
+       mlx5vf_free_data_buffer(migf->buf);
+ out_pd:
+       mlx5vf_cmd_dealloc_pd(migf);
+ out_free:
+       fput(migf->filp);
+ end:
+       kfree(migf);
+       return ERR_PTR(ret);
  }
  
  void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev)
  {
        if (mvdev->resuming_migf) {
                mlx5vf_disable_fd(mvdev->resuming_migf);
+               mlx5fv_cmd_clean_migf_resources(mvdev->resuming_migf);
                fput(mvdev->resuming_migf->filp);
                mvdev->resuming_migf = NULL;
        }
                mlx5_cmd_cleanup_async_ctx(&mvdev->saving_migf->async_ctx);
                cancel_work_sync(&mvdev->saving_migf->async_data.work);
                mlx5vf_disable_fd(mvdev->saving_migf);
+               mlx5fv_cmd_clean_migf_resources(mvdev->saving_migf);
                fput(mvdev->saving_migf->filp);
                mvdev->saving_migf = NULL;
        }
@@@ -402,7 -877,8 +877,8 @@@ mlx5vf_pci_step_device_state_locked(str
                return NULL;
        }
  
-       if (cur == VFIO_DEVICE_STATE_RUNNING && new == VFIO_DEVICE_STATE_RUNNING_P2P) {
+       if ((cur == VFIO_DEVICE_STATE_RUNNING && new == VFIO_DEVICE_STATE_RUNNING_P2P) ||
+           (cur == VFIO_DEVICE_STATE_PRE_COPY && new == VFIO_DEVICE_STATE_PRE_COPY_P2P)) {
                ret = mlx5vf_cmd_suspend_vhca(mvdev,
                        MLX5_SUSPEND_VHCA_IN_OP_MOD_SUSPEND_INITIATOR);
                if (ret)
                return NULL;
        }
  
-       if (cur == VFIO_DEVICE_STATE_RUNNING_P2P && new == VFIO_DEVICE_STATE_RUNNING) {
+       if ((cur == VFIO_DEVICE_STATE_RUNNING_P2P && new == VFIO_DEVICE_STATE_RUNNING) ||
+           (cur == VFIO_DEVICE_STATE_PRE_COPY_P2P && new == VFIO_DEVICE_STATE_PRE_COPY)) {
                ret = mlx5vf_cmd_resume_vhca(mvdev,
                        MLX5_RESUME_VHCA_IN_OP_MOD_RESUME_INITIATOR);
                if (ret)
        if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_STOP_COPY) {
                struct mlx5_vf_migration_file *migf;
  
-               migf = mlx5vf_pci_save_device_data(mvdev);
+               migf = mlx5vf_pci_save_device_data(mvdev, false);
                if (IS_ERR(migf))
                        return ERR_CAST(migf);
                get_file(migf->filp);
                return migf->filp;
        }
  
-       if ((cur == VFIO_DEVICE_STATE_STOP_COPY && new == VFIO_DEVICE_STATE_STOP)) {
+       if ((cur == VFIO_DEVICE_STATE_STOP_COPY && new == VFIO_DEVICE_STATE_STOP) ||
+           (cur == VFIO_DEVICE_STATE_PRE_COPY && new == VFIO_DEVICE_STATE_RUNNING) ||
+           (cur == VFIO_DEVICE_STATE_PRE_COPY_P2P &&
+            new == VFIO_DEVICE_STATE_RUNNING_P2P)) {
                mlx5vf_disable_fds(mvdev);
                return NULL;
        }
        }
  
        if (cur == VFIO_DEVICE_STATE_RESUMING && new == VFIO_DEVICE_STATE_STOP) {
-               ret = mlx5vf_cmd_load_vhca_state(mvdev,
-                                                mvdev->resuming_migf);
-               if (ret)
-                       return ERR_PTR(ret);
+               if (!MLX5VF_PRE_COPY_SUPP(mvdev)) {
+                       ret = mlx5vf_cmd_load_vhca_state(mvdev,
+                                                        mvdev->resuming_migf,
+                                                        mvdev->resuming_migf->buf);
+                       if (ret)
+                               return ERR_PTR(ret);
+               }
                mlx5vf_disable_fds(mvdev);
                return NULL;
        }
  
+       if ((cur == VFIO_DEVICE_STATE_RUNNING && new == VFIO_DEVICE_STATE_PRE_COPY) ||
+           (cur == VFIO_DEVICE_STATE_RUNNING_P2P &&
+            new == VFIO_DEVICE_STATE_PRE_COPY_P2P)) {
+               struct mlx5_vf_migration_file *migf;
+               migf = mlx5vf_pci_save_device_data(mvdev, true);
+               if (IS_ERR(migf))
+                       return ERR_CAST(migf);
+               get_file(migf->filp);
+               mvdev->saving_migf = migf;
+               return migf->filp;
+       }
+       if (cur == VFIO_DEVICE_STATE_PRE_COPY_P2P && new == VFIO_DEVICE_STATE_STOP_COPY) {
+               ret = mlx5vf_cmd_suspend_vhca(mvdev,
+                       MLX5_SUSPEND_VHCA_IN_OP_MOD_SUSPEND_RESPONDER);
+               if (ret)
+                       return ERR_PTR(ret);
+               ret = mlx5vf_pci_save_device_inc_data(mvdev);
+               return ret ? ERR_PTR(ret) : NULL;
+       }
        /*
         * vfio_mig_get_next_state() does not use arcs other than the above
         */
@@@ -512,6 -1017,23 +1017,23 @@@ mlx5vf_pci_set_device_state(struct vfio
        return res;
  }
  
+ static int mlx5vf_pci_get_data_size(struct vfio_device *vdev,
+                                   unsigned long *stop_copy_length)
+ {
+       struct mlx5vf_pci_core_device *mvdev = container_of(
+               vdev, struct mlx5vf_pci_core_device, core_device.vdev);
+       size_t state_size;
+       int ret;
+       mutex_lock(&mvdev->state_mutex);
+       ret = mlx5vf_cmd_query_vhca_migration_state(mvdev,
+                                                   &state_size, 0);
+       if (!ret)
+               *stop_copy_length = state_size;
+       mlx5vf_state_mutex_unlock(mvdev);
+       return ret;
+ }
  static int mlx5vf_pci_get_device_state(struct vfio_device *vdev,
                                       enum vfio_device_mig_state *curr_state)
  {
@@@ -577,6 -1099,7 +1099,7 @@@ static void mlx5vf_pci_close_device(str
  static const struct vfio_migration_ops mlx5vf_pci_mig_ops = {
        .migration_set_state = mlx5vf_pci_set_device_state,
        .migration_get_state = mlx5vf_pci_get_device_state,
+       .migration_get_data_size = mlx5vf_pci_get_data_size,
  };
  
  static const struct vfio_log_ops mlx5vf_pci_log_ops = {
@@@ -623,9 -1146,6 +1146,9 @@@ static const struct vfio_device_ops mlx
        .mmap = vfio_pci_core_mmap,
        .request = vfio_pci_core_request,
        .match = vfio_pci_core_match,
 +      .bind_iommufd = vfio_iommufd_physical_bind,
 +      .unbind_iommufd = vfio_iommufd_physical_unbind,
 +      .attach_ioas = vfio_iommufd_physical_attach_ioas,
  };
  
  static int mlx5vf_pci_probe(struct pci_dev *pdev,
@@@ -679,18 -1199,7 +1202,7 @@@ static struct pci_driver mlx5vf_pci_dri
        .driver_managed_dma = true,
  };
  
- static void __exit mlx5vf_pci_cleanup(void)
- {
-       pci_unregister_driver(&mlx5vf_pci_driver);
- }
- static int __init mlx5vf_pci_init(void)
- {
-       return pci_register_driver(&mlx5vf_pci_driver);
- }
- module_init(mlx5vf_pci_init);
- module_exit(mlx5vf_pci_cleanup);
+ module_pci_driver(mlx5vf_pci_driver);
  
  MODULE_LICENSE("GPL");
  MODULE_AUTHOR("Max Gurtovoy <[email protected]>");
index e030c2120183ef5c6bd4019ad50e5e6ac4f40a6c,f9365a5bc9612a337782ffde649f9529268d3cc2..26a541cc64d114a22e4f9688c71f3a163683ebdc
@@@ -27,6 -27,9 +27,9 @@@
  #include <linux/vgaarb.h>
  #include <linux/nospec.h>
  #include <linux/sched/mm.h>
+ #if IS_ENABLED(CONFIG_EEH)
+ #include <asm/eeh.h>
+ #endif
  
  #include "vfio_pci_priv.h"
  
@@@ -686,7 -689,9 +689,9 @@@ void vfio_pci_core_close_device(struct 
                vdev->sriov_pf_core_dev->vf_token->users--;
                mutex_unlock(&vdev->sriov_pf_core_dev->vf_token->lock);
        }
-       vfio_spapr_pci_eeh_release(vdev->pdev);
+ #if IS_ENABLED(CONFIG_EEH)
+       eeh_dev_release(vdev->pdev);
+ #endif
        vfio_pci_core_disable(vdev);
  
        mutex_lock(&vdev->igate);
@@@ -705,7 -710,9 +710,9 @@@ EXPORT_SYMBOL_GPL(vfio_pci_core_close_d
  void vfio_pci_core_finish_enable(struct vfio_pci_core_device *vdev)
  {
        vfio_pci_probe_mmaps(vdev);
-       vfio_spapr_pci_eeh_open(vdev->pdev);
+ #if IS_ENABLED(CONFIG_EEH)
+       eeh_dev_open(vdev->pdev);
+ #endif
  
        if (vdev->sriov_pf_core_dev) {
                mutex_lock(&vdev->sriov_pf_core_dev->vf_token->lock);
@@@ -2109,7 -2116,6 +2116,6 @@@ void vfio_pci_core_release_dev(struct v
        mutex_destroy(&vdev->vma_lock);
        kfree(vdev->region);
        kfree(vdev->pm_save);
-       vfio_free_device(core_vdev);
  }
  EXPORT_SYMBOL_GPL(vfio_pci_core_release_dev);
  
@@@ -2128,7 -2134,8 +2134,8 @@@ int vfio_pci_core_register_device(struc
  
        if (vdev->vdev.mig_ops) {
                if (!(vdev->vdev.mig_ops->migration_get_state &&
-                     vdev->vdev.mig_ops->migration_set_state) ||
+                     vdev->vdev.mig_ops->migration_set_state &&
+                     vdev->vdev.mig_ops->migration_get_data_size) ||
                    !(vdev->vdev.migration_flags & VFIO_MIGRATION_STOP_COPY))
                        return -EINVAL;
        }
@@@ -2488,12 -2495,12 +2495,12 @@@ static bool vfio_pci_dev_set_needs_rese
        struct vfio_pci_core_device *cur;
        bool needs_reset = false;
  
 -      list_for_each_entry(cur, &dev_set->device_list, vdev.dev_set_list) {
 -              /* No VFIO device in the set can have an open device FD */
 -              if (cur->vdev.open_count)
 -                      return false;
 +      /* No other VFIO device in the set can be open. */
 +      if (vfio_device_set_open_count(dev_set) > 1)
 +              return false;
 +
 +      list_for_each_entry(cur, &dev_set->device_list, vdev.dev_set_list)
                needs_reset |= cur->needs_reset;
 -      }
        return needs_reset;
  }
  
index 5a046098d0bdf4e3324931aa5d7d21929f14dd7f,18faf2678b9984f635065cddd2e103b5a91e41f5..83fe5401559585d241146258b9630d1670ed8600
@@@ -95,7 -95,6 +95,6 @@@ static void vfio_amba_release_dev(struc
  
        vfio_platform_release_common(vdev);
        kfree(vdev->name);
-       vfio_free_device(core_vdev);
  }
  
  static void vfio_amba_remove(struct amba_device *adev)
@@@ -117,9 -116,6 +116,9 @@@ static const struct vfio_device_ops vfi
        .read           = vfio_platform_read,
        .write          = vfio_platform_write,
        .mmap           = vfio_platform_mmap,
 +      .bind_iommufd   = vfio_iommufd_physical_bind,
 +      .unbind_iommufd = vfio_iommufd_physical_unbind,
 +      .attach_ioas    = vfio_iommufd_physical_attach_ioas,
  };
  
  static const struct amba_id pl330_ids[] = {
index b87c3b70878341b51ac148be29e1b84af0c5c37c,9910451dc3415ff9b61d759e1254f89a65d6cf16..22a1efca32a8a86ae24ae683d2035d7cece8e016
@@@ -83,7 -83,6 +83,6 @@@ static void vfio_platform_release_dev(s
                container_of(core_vdev, struct vfio_platform_device, vdev);
  
        vfio_platform_release_common(vdev);
-       vfio_free_device(core_vdev);
  }
  
  static int vfio_platform_remove(struct platform_device *pdev)
@@@ -106,9 -105,6 +105,9 @@@ static const struct vfio_device_ops vfi
        .read           = vfio_platform_read,
        .write          = vfio_platform_write,
        .mmap           = vfio_platform_mmap,
 +      .bind_iommufd   = vfio_iommufd_physical_bind,
 +      .unbind_iommufd = vfio_iommufd_physical_unbind,
 +      .attach_ioas    = vfio_iommufd_physical_attach_ioas,
  };
  
  static struct platform_driver vfio_platform_driver = {
diff --combined drivers/vfio/vfio.h
index 2e05418fd18df0ba63c54d44b657f30259d1395f,a7113b4baaa24228d284be20b83427b2cd4d62b6..f8219a438bfbf58d3f7c08e907a3dc4f660dfed5
@@@ -6,25 -6,14 +6,25 @@@
  #ifndef __VFIO_VFIO_H__
  #define __VFIO_VFIO_H__
  
 +#include <linux/file.h>
  #include <linux/device.h>
  #include <linux/cdev.h>
  #include <linux/module.h>
  
 +struct iommufd_ctx;
  struct iommu_group;
  struct vfio_device;
  struct vfio_container;
  
 +void vfio_device_put_registration(struct vfio_device *device);
 +bool vfio_device_try_get_registration(struct vfio_device *device);
 +int vfio_device_open(struct vfio_device *device,
 +                   struct iommufd_ctx *iommufd, struct kvm *kvm);
 +void vfio_device_close(struct vfio_device *device,
 +                     struct iommufd_ctx *iommufd);
 +
 +extern const struct file_operations vfio_device_fops;
 +
  enum vfio_group_type {
        /*
         * Physical device with IOMMU backing.
@@@ -65,30 -54,14 +65,30 @@@ struct vfio_group 
        struct list_head                device_list;
        struct mutex                    device_lock;
        struct list_head                vfio_next;
 +#if IS_ENABLED(CONFIG_VFIO_CONTAINER)
        struct list_head                container_next;
 +#endif
        enum vfio_group_type            type;
        struct mutex                    group_lock;
        struct kvm                      *kvm;
        struct file                     *opened_file;
        struct blocking_notifier_head   notifier;
 +      struct iommufd_ctx              *iommufd;
  };
  
 +int vfio_device_set_group(struct vfio_device *device,
 +                        enum vfio_group_type type);
 +void vfio_device_remove_group(struct vfio_device *device);
 +void vfio_device_group_register(struct vfio_device *device);
 +void vfio_device_group_unregister(struct vfio_device *device);
 +int vfio_device_group_use_iommu(struct vfio_device *device);
 +void vfio_device_group_unuse_iommu(struct vfio_device *device);
 +void vfio_device_group_close(struct vfio_device *device);
 +bool vfio_device_has_container(struct vfio_device *device);
 +int __init vfio_group_init(void);
 +void vfio_group_cleanup(void);
 +
 +#if IS_ENABLED(CONFIG_VFIO_CONTAINER)
  /* events for the backend driver notify callback */
  enum vfio_iommu_notify_type {
        VFIO_IOMMU_CONTAINER_CLOSE = 0,
@@@ -136,102 -109,34 +136,115 @@@ struct vfio_iommu_driver 
  int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops);
  void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops);
  
 -bool vfio_assert_device_open(struct vfio_device *device);
 -
  struct vfio_container *vfio_container_from_file(struct file *filep);
 -int vfio_device_assign_container(struct vfio_device *device);
 -void vfio_device_unassign_container(struct vfio_device *device);
 +int vfio_group_use_container(struct vfio_group *group);
 +void vfio_group_unuse_container(struct vfio_group *group);
  int vfio_container_attach_group(struct vfio_container *container,
                                struct vfio_group *group);
  void vfio_group_detach_container(struct vfio_group *group);
  void vfio_device_container_register(struct vfio_device *device);
  void vfio_device_container_unregister(struct vfio_device *device);
 -long vfio_container_ioctl_check_extension(struct vfio_container *container,
 -                                        unsigned long arg);
 +int vfio_device_container_pin_pages(struct vfio_device *device,
 +                                  dma_addr_t iova, int npage,
 +                                  int prot, struct page **pages);
 +void vfio_device_container_unpin_pages(struct vfio_device *device,
 +                                     dma_addr_t iova, int npage);
 +int vfio_device_container_dma_rw(struct vfio_device *device,
 +                               dma_addr_t iova, void *data,
 +                               size_t len, bool write);
 +
  int __init vfio_container_init(void);
  void vfio_container_cleanup(void);
 +#else
 +static inline struct vfio_container *
 +vfio_container_from_file(struct file *filep)
 +{
 +      return NULL;
 +}
 +
 +static inline int vfio_group_use_container(struct vfio_group *group)
 +{
 +      return -EOPNOTSUPP;
 +}
 +
 +static inline void vfio_group_unuse_container(struct vfio_group *group)
 +{
 +}
 +
 +static inline int vfio_container_attach_group(struct vfio_container *container,
 +                                            struct vfio_group *group)
 +{
 +      return -EOPNOTSUPP;
 +}
 +
 +static inline void vfio_group_detach_container(struct vfio_group *group)
 +{
 +}
 +
 +static inline void vfio_device_container_register(struct vfio_device *device)
 +{
 +}
 +
 +static inline void vfio_device_container_unregister(struct vfio_device *device)
 +{
 +}
 +
 +static inline int vfio_device_container_pin_pages(struct vfio_device *device,
 +                                                dma_addr_t iova, int npage,
 +                                                int prot, struct page **pages)
 +{
 +      return -EOPNOTSUPP;
 +}
 +
 +static inline void vfio_device_container_unpin_pages(struct vfio_device *device,
 +                                                   dma_addr_t iova, int npage)
 +{
 +}
 +
 +static inline int vfio_device_container_dma_rw(struct vfio_device *device,
 +                                             dma_addr_t iova, void *data,
 +                                             size_t len, bool write)
 +{
 +      return -EOPNOTSUPP;
 +}
 +
 +static inline int vfio_container_init(void)
 +{
 +      return 0;
 +}
 +static inline void vfio_container_cleanup(void)
 +{
 +}
 +#endif
 +
 +#if IS_ENABLED(CONFIG_IOMMUFD)
 +int vfio_iommufd_bind(struct vfio_device *device, struct iommufd_ctx *ictx);
 +void vfio_iommufd_unbind(struct vfio_device *device);
 +#else
 +static inline int vfio_iommufd_bind(struct vfio_device *device,
 +                                  struct iommufd_ctx *ictx)
 +{
 +      return -EOPNOTSUPP;
 +}
 +
 +static inline void vfio_iommufd_unbind(struct vfio_device *device)
 +{
 +}
 +#endif
  
+ #if IS_ENABLED(CONFIG_VFIO_VIRQFD)
+ int __init vfio_virqfd_init(void);
+ void vfio_virqfd_exit(void);
+ #else
+ static inline int __init vfio_virqfd_init(void)
+ {
+       return 0;
+ }
+ static inline void vfio_virqfd_exit(void)
+ {
+ }
+ #endif
  #ifdef CONFIG_VFIO_NOIOMMU
  extern bool vfio_noiommu __read_mostly;
  #else
diff --combined drivers/vfio/vfio_main.c
index e21ff965141e693bbf89f7d6916f5b33adf65744,03dbcd3d96f0e55ae9c0844f07e8c7365146daaf..5177bb061b17b51b9e8e9030d9bdf13c0b6a2b4a
@@@ -13,6 -13,8 +13,6 @@@
  #include <linux/cdev.h>
  #include <linux/compat.h>
  #include <linux/device.h>
 -#include <linux/file.h>
 -#include <linux/anon_inodes.h>
  #include <linux/fs.h>
  #include <linux/idr.h>
  #include <linux/iommu.h>
@@@ -33,7 -35,6 +33,7 @@@
  #include <linux/pm_runtime.h>
  #include <linux/interval_tree.h>
  #include <linux/iova_bitmap.h>
 +#include <linux/iommufd.h>
  #include "vfio.h"
  
  #define DRIVER_VERSION        "0.3"
  #define DRIVER_DESC   "VFIO - User Level meta-driver"
  
  static struct vfio {
 -      struct class                    *class;
 -      struct list_head                group_list;
 -      struct mutex                    group_lock; /* locks group_list */
 -      struct ida                      group_ida;
 -      dev_t                           group_devt;
        struct class                    *device_class;
        struct ida                      device_ida;
  } vfio;
  
  static DEFINE_XARRAY(vfio_device_set_xa);
 -static const struct file_operations vfio_group_fops;
  
  int vfio_assign_device_set(struct vfio_device *device, void *set_id)
  {
@@@ -118,34 -125,208 +118,34 @@@ static void vfio_release_device_set(str
        xa_unlock(&vfio_device_set_xa);
  }
  
 -/*
 - * Group objects - create, release, get, put, search
 - */
 -static struct vfio_group *
 -__vfio_group_get_from_iommu(struct iommu_group *iommu_group)
 -{
 -      struct vfio_group *group;
 -
 -      /*
 -       * group->iommu_group from the vfio.group_list cannot be NULL
 -       * under the vfio.group_lock.
 -       */
 -      list_for_each_entry(group, &vfio.group_list, vfio_next) {
 -              if (group->iommu_group == iommu_group) {
 -                      refcount_inc(&group->drivers);
 -                      return group;
 -              }
 -      }
 -      return NULL;
 -}
 -
 -static struct vfio_group *
 -vfio_group_get_from_iommu(struct iommu_group *iommu_group)
 -{
 -      struct vfio_group *group;
 -
 -      mutex_lock(&vfio.group_lock);
 -      group = __vfio_group_get_from_iommu(iommu_group);
 -      mutex_unlock(&vfio.group_lock);
 -      return group;
 -}
 -
 -static void vfio_group_release(struct device *dev)
 -{
 -      struct vfio_group *group = container_of(dev, struct vfio_group, dev);
 -
 -      mutex_destroy(&group->device_lock);
 -      mutex_destroy(&group->group_lock);
 -      WARN_ON(group->iommu_group);
 -      ida_free(&vfio.group_ida, MINOR(group->dev.devt));
 -      kfree(group);
 -}
 -
 -static struct vfio_group *vfio_group_alloc(struct iommu_group *iommu_group,
 -                                         enum vfio_group_type type)
 -{
 -      struct vfio_group *group;
 -      int minor;
 -
 -      group = kzalloc(sizeof(*group), GFP_KERNEL);
 -      if (!group)
 -              return ERR_PTR(-ENOMEM);
 -
 -      minor = ida_alloc_max(&vfio.group_ida, MINORMASK, GFP_KERNEL);
 -      if (minor < 0) {
 -              kfree(group);
 -              return ERR_PTR(minor);
 -      }
 -
 -      device_initialize(&group->dev);
 -      group->dev.devt = MKDEV(MAJOR(vfio.group_devt), minor);
 -      group->dev.class = vfio.class;
 -      group->dev.release = vfio_group_release;
 -      cdev_init(&group->cdev, &vfio_group_fops);
 -      group->cdev.owner = THIS_MODULE;
 -
 -      refcount_set(&group->drivers, 1);
 -      mutex_init(&group->group_lock);
 -      INIT_LIST_HEAD(&group->device_list);
 -      mutex_init(&group->device_lock);
 -      group->iommu_group = iommu_group;
 -      /* put in vfio_group_release() */
 -      iommu_group_ref_get(iommu_group);
 -      group->type = type;
 -      BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier);
 -
 -      return group;
 -}
 -
 -static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group,
 -              enum vfio_group_type type)
 +unsigned int vfio_device_set_open_count(struct vfio_device_set *dev_set)
  {
 -      struct vfio_group *group;
 -      struct vfio_group *ret;
 -      int err;
 -
 -      group = vfio_group_alloc(iommu_group, type);
 -      if (IS_ERR(group))
 -              return group;
 -
 -      err = dev_set_name(&group->dev, "%s%d",
 -                         group->type == VFIO_NO_IOMMU ? "noiommu-" : "",
 -                         iommu_group_id(iommu_group));
 -      if (err) {
 -              ret = ERR_PTR(err);
 -              goto err_put;
 -      }
 -
 -      mutex_lock(&vfio.group_lock);
 -
 -      /* Did we race creating this group? */
 -      ret = __vfio_group_get_from_iommu(iommu_group);
 -      if (ret)
 -              goto err_unlock;
 -
 -      err = cdev_device_add(&group->cdev, &group->dev);
 -      if (err) {
 -              ret = ERR_PTR(err);
 -              goto err_unlock;
 -      }
 +      struct vfio_device *cur;
 +      unsigned int open_count = 0;
  
 -      list_add(&group->vfio_next, &vfio.group_list);
 +      lockdep_assert_held(&dev_set->lock);
  
 -      mutex_unlock(&vfio.group_lock);
 -      return group;
 -
 -err_unlock:
 -      mutex_unlock(&vfio.group_lock);
 -err_put:
 -      put_device(&group->dev);
 -      return ret;
 -}
 -
 -static void vfio_device_remove_group(struct vfio_device *device)
 -{
 -      struct vfio_group *group = device->group;
 -      struct iommu_group *iommu_group;
 -
 -      if (group->type == VFIO_NO_IOMMU || group->type == VFIO_EMULATED_IOMMU)
 -              iommu_group_remove_device(device->dev);
 -
 -      /* Pairs with vfio_create_group() / vfio_group_get_from_iommu() */
 -      if (!refcount_dec_and_mutex_lock(&group->drivers, &vfio.group_lock))
 -              return;
 -      list_del(&group->vfio_next);
 -
 -      /*
 -       * We could concurrently probe another driver in the group that might
 -       * race vfio_device_remove_group() with vfio_get_group(), so we have to
 -       * ensure that the sysfs is all cleaned up under lock otherwise the
 -       * cdev_device_add() will fail due to the name aready existing.
 -       */
 -      cdev_device_del(&group->cdev, &group->dev);
 -
 -      mutex_lock(&group->group_lock);
 -      /*
 -       * These data structures all have paired operations that can only be
 -       * undone when the caller holds a live reference on the device. Since
 -       * all pairs must be undone these WARN_ON's indicate some caller did not
 -       * properly hold the group reference.
 -       */
 -      WARN_ON(!list_empty(&group->device_list));
 -      WARN_ON(group->notifier.head);
 -
 -      /*
 -       * Revoke all users of group->iommu_group. At this point we know there
 -       * are no devices active because we are unplugging the last one. Setting
 -       * iommu_group to NULL blocks all new users.
 -       */
 -      if (group->container)
 -              vfio_group_detach_container(group);
 -      iommu_group = group->iommu_group;
 -      group->iommu_group = NULL;
 -      mutex_unlock(&group->group_lock);
 -      mutex_unlock(&vfio.group_lock);
 -
 -      iommu_group_put(iommu_group);
 -      put_device(&group->dev);
 +      list_for_each_entry(cur, &dev_set->device_list, dev_set_list)
 +              open_count += cur->open_count;
 +      return open_count;
  }
 +EXPORT_SYMBOL_GPL(vfio_device_set_open_count);
  
  /*
   * Device objects - create, release, get, put, search
   */
  /* Device reference always implies a group reference */
 -static void vfio_device_put_registration(struct vfio_device *device)
 +void vfio_device_put_registration(struct vfio_device *device)
  {
        if (refcount_dec_and_test(&device->refcount))
                complete(&device->comp);
  }
  
 -static bool vfio_device_try_get_registration(struct vfio_device *device)
 +bool vfio_device_try_get_registration(struct vfio_device *device)
  {
        return refcount_inc_not_zero(&device->refcount);
  }
  
 -static struct vfio_device *vfio_group_get_device(struct vfio_group *group,
 -                                               struct device *dev)
 -{
 -      struct vfio_device *device;
 -
 -      mutex_lock(&group->device_lock);
 -      list_for_each_entry(device, &group->device_list, group_next) {
 -              if (device->dev == dev &&
 -                  vfio_device_try_get_registration(device)) {
 -                      mutex_unlock(&group->device_lock);
 -                      return device;
 -              }
 -      }
 -      mutex_unlock(&group->device_lock);
 -      return NULL;
 -}
 -
  /*
   * VFIO driver API
   */
@@@ -158,15 -339,15 +158,15 @@@ static void vfio_device_release(struct 
        vfio_release_device_set(device);
        ida_free(&vfio.device_ida, device->index);
  
-       /*
-        * kvfree() cannot be done here due to a life cycle mess in
-        * vfio-ccw. Before the ccw part is fixed all drivers are
-        * required to support @release and call vfio_free_device()
-        * from there.
-        */
-       device->ops->release(device);
+       if (device->ops->release)
+               device->ops->release(device);
+       kvfree(device);
  }
  
+ static int vfio_init_device(struct vfio_device *device, struct device *dev,
+                           const struct vfio_device_ops *ops);
  /*
   * Allocate and initialize vfio_device so it can be registered to vfio
   * core.
@@@ -205,11 -386,9 +205,9 @@@ EXPORT_SYMBOL_GPL(_vfio_alloc_device)
  
  /*
   * Initialize a vfio_device so it can be registered to vfio core.
-  *
-  * Only vfio-ccw driver should call this interface.
   */
- int vfio_init_device(struct vfio_device *device, struct device *dev,
-                    const struct vfio_device_ops *ops)
static int vfio_init_device(struct vfio_device *device, struct device *dev,
+                           const struct vfio_device_ops *ops)
  {
        int ret;
  
@@@ -241,28 -420,95 +239,16 @@@ out_uninit
        ida_free(&vfio.device_ida, device->index);
        return ret;
  }
- EXPORT_SYMBOL_GPL(vfio_init_device);
- /*
-  * The helper called by driver @release callback to free the device
-  * structure. Drivers which don't have private data to clean can
-  * simply use this helper as its @release.
-  */
- void vfio_free_device(struct vfio_device *device)
- {
-       kvfree(device);
- }
- EXPORT_SYMBOL_GPL(vfio_free_device);
  
 -static struct vfio_group *vfio_noiommu_group_alloc(struct device *dev,
 -              enum vfio_group_type type)
 -{
 -      struct iommu_group *iommu_group;
 -      struct vfio_group *group;
 -      int ret;
 -
 -      iommu_group = iommu_group_alloc();
 -      if (IS_ERR(iommu_group))
 -              return ERR_CAST(iommu_group);
 -
 -      ret = iommu_group_set_name(iommu_group, "vfio-noiommu");
 -      if (ret)
 -              goto out_put_group;
 -      ret = iommu_group_add_device(iommu_group, dev);
 -      if (ret)
 -              goto out_put_group;
 -
 -      group = vfio_create_group(iommu_group, type);
 -      if (IS_ERR(group)) {
 -              ret = PTR_ERR(group);
 -              goto out_remove_device;
 -      }
 -      iommu_group_put(iommu_group);
 -      return group;
 -
 -out_remove_device:
 -      iommu_group_remove_device(dev);
 -out_put_group:
 -      iommu_group_put(iommu_group);
 -      return ERR_PTR(ret);
 -}
 -
 -static struct vfio_group *vfio_group_find_or_alloc(struct device *dev)
 -{
 -      struct iommu_group *iommu_group;
 -      struct vfio_group *group;
 -
 -      iommu_group = iommu_group_get(dev);
 -      if (!iommu_group && vfio_noiommu) {
 -              /*
 -               * With noiommu enabled, create an IOMMU group for devices that
 -               * don't already have one, implying no IOMMU hardware/driver
 -               * exists.  Taint the kernel because we're about to give a DMA
 -               * capable device to a user without IOMMU protection.
 -               */
 -              group = vfio_noiommu_group_alloc(dev, VFIO_NO_IOMMU);
 -              if (!IS_ERR(group)) {
 -                      add_taint(TAINT_USER, LOCKDEP_STILL_OK);
 -                      dev_warn(dev, "Adding kernel taint for vfio-noiommu group on device\n");
 -              }
 -              return group;
 -      }
 -
 -      if (!iommu_group)
 -              return ERR_PTR(-EINVAL);
 -
 -      /*
 -       * VFIO always sets IOMMU_CACHE because we offer no way for userspace to
 -       * restore cache coherency. It has to be checked here because it is only
 -       * valid for cases where we are using iommu groups.
 -       */
 -      if (!device_iommu_capable(dev, IOMMU_CAP_CACHE_COHERENCY)) {
 -              iommu_group_put(iommu_group);
 -              return ERR_PTR(-EINVAL);
 -      }
 -
 -      group = vfio_group_get_from_iommu(iommu_group);
 -      if (!group)
 -              group = vfio_create_group(iommu_group, VFIO_IOMMU);
 -
 -      /* The vfio_group holds a reference to the iommu_group */
 -      iommu_group_put(iommu_group);
 -      return group;
 -}
 -
  static int __vfio_register_dev(struct vfio_device *device,
 -              struct vfio_group *group)
 +                             enum vfio_group_type type)
  {
 -      struct vfio_device *existing_device;
        int ret;
  
 -      /*
 -       * In all cases group is the output of one of the group allocation
 -       * functions and we have group->drivers incremented for us.
 -       */
 -      if (IS_ERR(group))
 -              return PTR_ERR(group);
 +      if (WARN_ON(device->ops->bind_iommufd &&
 +                  (!device->ops->unbind_iommufd ||
 +                   !device->ops->attach_ioas)))
 +              return -EINVAL;
  
        /*
         * If the driver doesn't specify a set then the device is added to a
        if (!device->dev_set)
                vfio_assign_device_set(device, device);
  
 -      existing_device = vfio_group_get_device(group, device->dev);
 -      if (existing_device) {
 -              /*
 -               * group->iommu_group is non-NULL because we hold the drivers
 -               * refcount.
 -               */
 -              dev_WARN(device->dev, "Device already exists on group %d\n",
 -                       iommu_group_id(group->iommu_group));
 -              vfio_device_put_registration(existing_device);
 -              ret = -EBUSY;
 -              goto err_out;
 -      }
 -
 -      /* Our reference on group is moved to the device */
 -      device->group = group;
 -
        ret = dev_set_name(&device->device, "vfio%d", device->index);
        if (ret)
 -              goto err_out;
 +              return ret;
 +
 +      ret = vfio_device_set_group(device, type);
 +      if (ret)
 +              return ret;
  
        ret = device_add(&device->device);
        if (ret)
        /* Refcounting can't start until the driver calls register */
        refcount_set(&device->refcount, 1);
  
 -      mutex_lock(&group->device_lock);
 -      list_add(&device->group_next, &group->device_list);
 -      mutex_unlock(&group->device_lock);
 +      vfio_device_group_register(device);
  
        return 0;
  err_out:
  
  int vfio_register_group_dev(struct vfio_device *device)
  {
 -      return __vfio_register_dev(device,
 -              vfio_group_find_or_alloc(device->dev));
 +      return __vfio_register_dev(device, VFIO_IOMMU);
  }
  EXPORT_SYMBOL_GPL(vfio_register_group_dev);
  
   */
  int vfio_register_emulated_iommu_dev(struct vfio_device *device)
  {
 -      return __vfio_register_dev(device,
 -              vfio_noiommu_group_alloc(device->dev, VFIO_EMULATED_IOMMU));
 +      return __vfio_register_dev(device, VFIO_EMULATED_IOMMU);
  }
  EXPORT_SYMBOL_GPL(vfio_register_emulated_iommu_dev);
  
 -static struct vfio_device *vfio_device_get_from_name(struct vfio_group *group,
 -                                                   char *buf)
 -{
 -      struct vfio_device *it, *device = ERR_PTR(-ENODEV);
 -
 -      mutex_lock(&group->device_lock);
 -      list_for_each_entry(it, &group->device_list, group_next) {
 -              int ret;
 -
 -              if (it->ops->match) {
 -                      ret = it->ops->match(it, buf);
 -                      if (ret < 0) {
 -                              device = ERR_PTR(ret);
 -                              break;
 -                      }
 -              } else {
 -                      ret = !strcmp(dev_name(it->dev), buf);
 -              }
 -
 -              if (ret && vfio_device_try_get_registration(it)) {
 -                      device = it;
 -                      break;
 -              }
 -      }
 -      mutex_unlock(&group->device_lock);
 -
 -      return device;
 -}
 -
  /*
   * Decrement the device reference count and wait for the device to be
   * removed.  Open file descriptors for the device... */
  void vfio_unregister_group_dev(struct vfio_device *device)
  {
 -      struct vfio_group *group = device->group;
        unsigned int i = 0;
        bool interrupted = false;
        long rc;
                }
        }
  
 -      mutex_lock(&group->device_lock);
 -      list_del(&device->group_next);
 -      mutex_unlock(&group->device_lock);
 +      vfio_device_group_unregister(device);
  
        /* Balances device_add in register path */
        device_del(&device->device);
  
 +      /* Balances vfio_device_set_group in register path */
        vfio_device_remove_group(device);
  }
  EXPORT_SYMBOL_GPL(vfio_unregister_group_dev);
  
 -/*
 - * VFIO Group fd, /dev/vfio/$GROUP
 - */
 -/*
 - * VFIO_GROUP_UNSET_CONTAINER should fail if there are other users or
 - * if there was no container to unset.  Since the ioctl is called on
 - * the group, we know that still exists, therefore the only valid
 - * transition here is 1->0.
 - */
 -static int vfio_group_ioctl_unset_container(struct vfio_group *group)
 -{
 -      int ret = 0;
 -
 -      mutex_lock(&group->group_lock);
 -      if (!group->container) {
 -              ret = -EINVAL;
 -              goto out_unlock;
 -      }
 -      if (group->container_users != 1) {
 -              ret = -EBUSY;
 -              goto out_unlock;
 -      }
 -      vfio_group_detach_container(group);
 -
 -out_unlock:
 -      mutex_unlock(&group->group_lock);
 -      return ret;
 -}
 -
 -static int vfio_group_ioctl_set_container(struct vfio_group *group,
 -                                        int __user *arg)
 -{
 -      struct vfio_container *container;
 -      struct fd f;
 -      int ret;
 -      int fd;
 -
 -      if (get_user(fd, arg))
 -              return -EFAULT;
 -
 -      f = fdget(fd);
 -      if (!f.file)
 -              return -EBADF;
 -
 -      mutex_lock(&group->group_lock);
 -      if (group->container || WARN_ON(group->container_users)) {
 -              ret = -EINVAL;
 -              goto out_unlock;
 -      }
 -      if (!group->iommu_group) {
 -              ret = -ENODEV;
 -              goto out_unlock;
 -      }
 -
 -      container = vfio_container_from_file(f.file);
 -      ret = -EINVAL;
 -      if (container) {
 -              ret = vfio_container_attach_group(container, group);
 -              goto out_unlock;
 -      }
 -
 -out_unlock:
 -      mutex_unlock(&group->group_lock);
 -      fdput(f);
 -      return ret;
 -}
 -
 -static const struct file_operations vfio_device_fops;
 -
  /* true if the vfio_device has open_device() called but not close_device() */
 -bool vfio_assert_device_open(struct vfio_device *device)
 +static bool vfio_assert_device_open(struct vfio_device *device)
  {
        return !WARN_ON_ONCE(!READ_ONCE(device->open_count));
  }
  
 -static struct file *vfio_device_open(struct vfio_device *device)
 +static int vfio_device_first_open(struct vfio_device *device,
 +                                struct iommufd_ctx *iommufd, struct kvm *kvm)
  {
 -      struct file *filep;
        int ret;
  
 -      mutex_lock(&device->group->group_lock);
 -      ret = vfio_device_assign_container(device);
 -      mutex_unlock(&device->group->group_lock);
 -      if (ret)
 -              return ERR_PTR(ret);
 +      lockdep_assert_held(&device->dev_set->lock);
  
 -      if (!try_module_get(device->dev->driver->owner)) {
 -              ret = -ENODEV;
 -              goto err_unassign_container;
 -      }
 -
 -      mutex_lock(&device->dev_set->lock);
 -      device->open_count++;
 -      if (device->open_count == 1) {
 -              /*
 -               * Here we pass the KVM pointer with the group under the read
 -               * lock.  If the device driver will use it, it must obtain a
 -               * reference and release it during close_device.
 -               */
 -              mutex_lock(&device->group->group_lock);
 -              device->kvm = device->group->kvm;
 +      if (!try_module_get(device->dev->driver->owner))
 +              return -ENODEV;
  
 -              if (device->ops->open_device) {
 -                      ret = device->ops->open_device(device);
 -                      if (ret)
 -                              goto err_undo_count;
 -              }
 -              vfio_device_container_register(device);
 -              mutex_unlock(&device->group->group_lock);
 -      }
 -      mutex_unlock(&device->dev_set->lock);
 +      if (iommufd)
 +              ret = vfio_iommufd_bind(device, iommufd);
 +      else
 +              ret = vfio_device_group_use_iommu(device);
 +      if (ret)
 +              goto err_module_put;
  
 -      /*
 -       * We can't use anon_inode_getfd() because we need to modify
 -       * the f_mode flags directly to allow more than just ioctls
 -       */
 -      filep = anon_inode_getfile("[vfio-device]", &vfio_device_fops,
 -                                 device, O_RDWR);
 -      if (IS_ERR(filep)) {
 -              ret = PTR_ERR(filep);
 -              goto err_close_device;
 +      device->kvm = kvm;
 +      if (device->ops->open_device) {
 +              ret = device->ops->open_device(device);
 +              if (ret)
 +                      goto err_unuse_iommu;
        }
 +      return 0;
  
 -      /*
 -       * TODO: add an anon_inode interface to do this.
 -       * Appears to be missing by lack of need rather than
 -       * explicitly prevented.  Now there's need.
 -       */
 -      filep->f_mode |= (FMODE_PREAD | FMODE_PWRITE);
 -
 -      if (device->group->type == VFIO_NO_IOMMU)
 -              dev_warn(device->dev, "vfio-noiommu device opened by user "
 -                       "(%s:%d)\n", current->comm, task_pid_nr(current));
 -      /*
 -       * On success the ref of device is moved to the file and
 -       * put in vfio_device_fops_release()
 -       */
 -      return filep;
 -
 -err_close_device:
 -      mutex_lock(&device->dev_set->lock);
 -      mutex_lock(&device->group->group_lock);
 -      if (device->open_count == 1 && device->ops->close_device) {
 -              device->ops->close_device(device);
 -
 -              vfio_device_container_unregister(device);
 -      }
 -err_undo_count:
 -      mutex_unlock(&device->group->group_lock);
 -      device->open_count--;
 -      if (device->open_count == 0 && device->kvm)
 -              device->kvm = NULL;
 -      mutex_unlock(&device->dev_set->lock);
 +err_unuse_iommu:
 +      device->kvm = NULL;
 +      if (iommufd)
 +              vfio_iommufd_unbind(device);
 +      else
 +              vfio_device_group_unuse_iommu(device);
 +err_module_put:
        module_put(device->dev->driver->owner);
 -err_unassign_container:
 -      vfio_device_unassign_container(device);
 -      return ERR_PTR(ret);
 -}
 -
 -static int vfio_group_ioctl_get_device_fd(struct vfio_group *group,
 -                                        char __user *arg)
 -{
 -      struct vfio_device *device;
 -      struct file *filep;
 -      char *buf;
 -      int fdno;
 -      int ret;
 -
 -      buf = strndup_user(arg, PAGE_SIZE);
 -      if (IS_ERR(buf))
 -              return PTR_ERR(buf);
 -
 -      device = vfio_device_get_from_name(group, buf);
 -      kfree(buf);
 -      if (IS_ERR(device))
 -              return PTR_ERR(device);
 -
 -      fdno = get_unused_fd_flags(O_CLOEXEC);
 -      if (fdno < 0) {
 -              ret = fdno;
 -              goto err_put_device;
 -      }
 -
 -      filep = vfio_device_open(device);
 -      if (IS_ERR(filep)) {
 -              ret = PTR_ERR(filep);
 -              goto err_put_fdno;
 -      }
 -
 -      fd_install(fdno, filep);
 -      return fdno;
 -
 -err_put_fdno:
 -      put_unused_fd(fdno);
 -err_put_device:
 -      vfio_device_put_registration(device);
        return ret;
  }
  
 -static int vfio_group_ioctl_get_status(struct vfio_group *group,
 -                                     struct vfio_group_status __user *arg)
 -{
 -      unsigned long minsz = offsetofend(struct vfio_group_status, flags);
 -      struct vfio_group_status status;
 -
 -      if (copy_from_user(&status, arg, minsz))
 -              return -EFAULT;
 -
 -      if (status.argsz < minsz)
 -              return -EINVAL;
 -
 -      status.flags = 0;
 -
 -      mutex_lock(&group->group_lock);
 -      if (!group->iommu_group) {
 -              mutex_unlock(&group->group_lock);
 -              return -ENODEV;
 -      }
 -
 -      if (group->container)
 -              status.flags |= VFIO_GROUP_FLAGS_CONTAINER_SET |
 -                              VFIO_GROUP_FLAGS_VIABLE;
 -      else if (!iommu_group_dma_owner_claimed(group->iommu_group))
 -              status.flags |= VFIO_GROUP_FLAGS_VIABLE;
 -      mutex_unlock(&group->group_lock);
 -
 -      if (copy_to_user(arg, &status, minsz))
 -              return -EFAULT;
 -      return 0;
 -}
 -
 -static long vfio_group_fops_unl_ioctl(struct file *filep,
 -                                    unsigned int cmd, unsigned long arg)
 +static void vfio_device_last_close(struct vfio_device *device,
 +                                 struct iommufd_ctx *iommufd)
  {
 -      struct vfio_group *group = filep->private_data;
 -      void __user *uarg = (void __user *)arg;
 +      lockdep_assert_held(&device->dev_set->lock);
  
 -      switch (cmd) {
 -      case VFIO_GROUP_GET_DEVICE_FD:
 -              return vfio_group_ioctl_get_device_fd(group, uarg);
 -      case VFIO_GROUP_GET_STATUS:
 -              return vfio_group_ioctl_get_status(group, uarg);
 -      case VFIO_GROUP_SET_CONTAINER:
 -              return vfio_group_ioctl_set_container(group, uarg);
 -      case VFIO_GROUP_UNSET_CONTAINER:
 -              return vfio_group_ioctl_unset_container(group);
 -      default:
 -              return -ENOTTY;
 -      }
 +      if (device->ops->close_device)
 +              device->ops->close_device(device);
 +      device->kvm = NULL;
 +      if (iommufd)
 +              vfio_iommufd_unbind(device);
 +      else
 +              vfio_device_group_unuse_iommu(device);
 +      module_put(device->dev->driver->owner);
  }
  
 -static int vfio_group_fops_open(struct inode *inode, struct file *filep)
 +int vfio_device_open(struct vfio_device *device,
 +                   struct iommufd_ctx *iommufd, struct kvm *kvm)
  {
 -      struct vfio_group *group =
 -              container_of(inode->i_cdev, struct vfio_group, cdev);
 -      int ret;
 -
 -      mutex_lock(&group->group_lock);
 -
 -      /*
 -       * drivers can be zero if this races with vfio_device_remove_group(), it
 -       * will be stable at 0 under the group rwsem
 -       */
 -      if (refcount_read(&group->drivers) == 0) {
 -              ret = -ENODEV;
 -              goto out_unlock;
 -      }
 +      int ret = 0;
  
 -      if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO)) {
 -              ret = -EPERM;
 -              goto out_unlock;
 +      mutex_lock(&device->dev_set->lock);
 +      device->open_count++;
 +      if (device->open_count == 1) {
 +              ret = vfio_device_first_open(device, iommufd, kvm);
 +              if (ret)
 +                      device->open_count--;
        }
 +      mutex_unlock(&device->dev_set->lock);
  
 -      /*
 -       * Do we need multiple instances of the group open?  Seems not.
 -       */
 -      if (group->opened_file) {
 -              ret = -EBUSY;
 -              goto out_unlock;
 -      }
 -      group->opened_file = filep;
 -      filep->private_data = group;
 -      ret = 0;
 -out_unlock:
 -      mutex_unlock(&group->group_lock);
        return ret;
  }
  
 -static int vfio_group_fops_release(struct inode *inode, struct file *filep)
 +void vfio_device_close(struct vfio_device *device,
 +                     struct iommufd_ctx *iommufd)
  {
 -      struct vfio_group *group = filep->private_data;
 -
 -      filep->private_data = NULL;
 -
 -      mutex_lock(&group->group_lock);
 -      /*
 -       * Device FDs hold a group file reference, therefore the group release
 -       * is only called when there are no open devices.
 -       */
 -      WARN_ON(group->notifier.head);
 -      if (group->container)
 -              vfio_group_detach_container(group);
 -      group->opened_file = NULL;
 -      mutex_unlock(&group->group_lock);
 -      return 0;
 +      mutex_lock(&device->dev_set->lock);
 +      vfio_assert_device_open(device);
 +      if (device->open_count == 1)
 +              vfio_device_last_close(device, iommufd);
 +      device->open_count--;
 +      mutex_unlock(&device->dev_set->lock);
  }
  
 -static const struct file_operations vfio_group_fops = {
 -      .owner          = THIS_MODULE,
 -      .unlocked_ioctl = vfio_group_fops_unl_ioctl,
 -      .compat_ioctl   = compat_ptr_ioctl,
 -      .open           = vfio_group_fops_open,
 -      .release        = vfio_group_fops_release,
 -};
 -
  /*
   * Wrapper around pm_runtime_resume_and_get().
   * Return error code on failure or 0 on success.
@@@ -477,7 -1000,22 +463,7 @@@ static int vfio_device_fops_release(str
  {
        struct vfio_device *device = filep->private_data;
  
 -      mutex_lock(&device->dev_set->lock);
 -      vfio_assert_device_open(device);
 -      mutex_lock(&device->group->group_lock);
 -      if (device->open_count == 1 && device->ops->close_device)
 -              device->ops->close_device(device);
 -
 -      vfio_device_container_unregister(device);
 -      mutex_unlock(&device->group->group_lock);
 -      device->open_count--;
 -      if (device->open_count == 0)
 -              device->kvm = NULL;
 -      mutex_unlock(&device->dev_set->lock);
 -
 -      module_put(device->dev->driver->owner);
 -
 -      vfio_device_unassign_container(device);
 +      vfio_device_group_close(device);
  
        vfio_device_put_registration(device);
  
@@@ -504,7 -1042,7 +490,7 @@@ int vfio_mig_get_next_state(struct vfio
                            enum vfio_device_mig_state new_fsm,
                            enum vfio_device_mig_state *next_fsm)
  {
-       enum { VFIO_DEVICE_NUM_STATES = VFIO_DEVICE_STATE_RUNNING_P2P + 1 };
+       enum { VFIO_DEVICE_NUM_STATES = VFIO_DEVICE_STATE_PRE_COPY_P2P + 1 };
        /*
         * The coding in this table requires the driver to implement the
         * following FSM arcs:
         *         RUNNING_P2P -> RUNNING
         *         RUNNING_P2P -> STOP
         *         STOP -> RUNNING_P2P
-        * Without P2P the driver must implement:
+        *
+        * If precopy is supported then the driver must support these additional
+        * FSM arcs:
+        *         RUNNING -> PRE_COPY
+        *         PRE_COPY -> RUNNING
+        *         PRE_COPY -> STOP_COPY
+        * However, if precopy and P2P are supported together then the driver
+        * must support these additional arcs beyond the P2P arcs above:
+        *         PRE_COPY -> RUNNING
+        *         PRE_COPY -> PRE_COPY_P2P
+        *         PRE_COPY_P2P -> PRE_COPY
+        *         PRE_COPY_P2P -> RUNNING_P2P
+        *         PRE_COPY_P2P -> STOP_COPY
+        *         RUNNING -> PRE_COPY
+        *         RUNNING_P2P -> PRE_COPY_P2P
+        *
+        * Without P2P and precopy the driver must implement:
         *         RUNNING -> STOP
         *         STOP -> RUNNING
         *
         * The coding will step through multiple states for some combination
         * transitions; if all optional features are supported, this means the
         * following ones:
+        *         PRE_COPY -> PRE_COPY_P2P -> STOP_COPY
+        *         PRE_COPY -> RUNNING -> RUNNING_P2P
+        *         PRE_COPY -> RUNNING -> RUNNING_P2P -> STOP
+        *         PRE_COPY -> RUNNING -> RUNNING_P2P -> STOP -> RESUMING
+        *         PRE_COPY_P2P -> RUNNING_P2P -> RUNNING
+        *         PRE_COPY_P2P -> RUNNING_P2P -> STOP
+        *         PRE_COPY_P2P -> RUNNING_P2P -> STOP -> RESUMING
         *         RESUMING -> STOP -> RUNNING_P2P
+        *         RESUMING -> STOP -> RUNNING_P2P -> PRE_COPY_P2P
         *         RESUMING -> STOP -> RUNNING_P2P -> RUNNING
+        *         RESUMING -> STOP -> RUNNING_P2P -> RUNNING -> PRE_COPY
         *         RESUMING -> STOP -> STOP_COPY
+        *         RUNNING -> RUNNING_P2P -> PRE_COPY_P2P
         *         RUNNING -> RUNNING_P2P -> STOP
         *         RUNNING -> RUNNING_P2P -> STOP -> RESUMING
         *         RUNNING -> RUNNING_P2P -> STOP -> STOP_COPY
+        *         RUNNING_P2P -> RUNNING -> PRE_COPY
         *         RUNNING_P2P -> STOP -> RESUMING
         *         RUNNING_P2P -> STOP -> STOP_COPY
+        *         STOP -> RUNNING_P2P -> PRE_COPY_P2P
         *         STOP -> RUNNING_P2P -> RUNNING
+        *         STOP -> RUNNING_P2P -> RUNNING -> PRE_COPY
         *         STOP_COPY -> STOP -> RESUMING
         *         STOP_COPY -> STOP -> RUNNING_P2P
         *         STOP_COPY -> STOP -> RUNNING_P2P -> RUNNING
+        *
+        *  The following transitions are blocked:
+        *         STOP_COPY -> PRE_COPY
+        *         STOP_COPY -> PRE_COPY_P2P
         */
        static const u8 vfio_from_fsm_table[VFIO_DEVICE_NUM_STATES][VFIO_DEVICE_NUM_STATES] = {
                [VFIO_DEVICE_STATE_STOP] = {
                        [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP,
                        [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING_P2P,
+                       [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_RUNNING_P2P,
+                       [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P,
                        [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP_COPY,
                        [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RESUMING,
                        [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P,
                [VFIO_DEVICE_STATE_RUNNING] = {
                        [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_RUNNING_P2P,
                        [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING,
+                       [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_PRE_COPY,
+                       [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P,
                        [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_RUNNING_P2P,
                        [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RUNNING_P2P,
                        [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P,
                        [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
                },
+               [VFIO_DEVICE_STATE_PRE_COPY] = {
+                       [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_RUNNING,
+                       [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING,
+                       [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_PRE_COPY,
+                       [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_PRE_COPY_P2P,
+                       [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_PRE_COPY_P2P,
+                       [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RUNNING,
+                       [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING,
+                       [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
+               },
+               [VFIO_DEVICE_STATE_PRE_COPY_P2P] = {
+                       [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_RUNNING_P2P,
+                       [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING_P2P,
+                       [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_PRE_COPY,
+                       [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_PRE_COPY_P2P,
+                       [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP_COPY,
+                       [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RUNNING_P2P,
+                       [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P,
+                       [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
+               },
                [VFIO_DEVICE_STATE_STOP_COPY] = {
                        [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP,
                        [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_STOP,
+                       [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_ERROR,
+                       [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_ERROR,
                        [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP_COPY,
                        [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_STOP,
                        [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_STOP,
                [VFIO_DEVICE_STATE_RESUMING] = {
                        [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP,
                        [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_STOP,
+                       [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_STOP,
+                       [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_STOP,
                        [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP,
                        [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RESUMING,
                        [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_STOP,
                [VFIO_DEVICE_STATE_RUNNING_P2P] = {
                        [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP,
                        [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING,
+                       [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_RUNNING,
+                       [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_PRE_COPY_P2P,
                        [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP,
                        [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_STOP,
                        [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P,
                [VFIO_DEVICE_STATE_ERROR] = {
                        [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_ERROR,
                        [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_ERROR,
+                       [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_ERROR,
+                       [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_ERROR,
                        [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_ERROR,
                        [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_ERROR,
                        [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_ERROR,
        static const unsigned int state_flags_table[VFIO_DEVICE_NUM_STATES] = {
                [VFIO_DEVICE_STATE_STOP] = VFIO_MIGRATION_STOP_COPY,
                [VFIO_DEVICE_STATE_RUNNING] = VFIO_MIGRATION_STOP_COPY,
+               [VFIO_DEVICE_STATE_PRE_COPY] =
+                       VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_PRE_COPY,
+               [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_MIGRATION_STOP_COPY |
+                                                  VFIO_MIGRATION_P2P |
+                                                  VFIO_MIGRATION_PRE_COPY,
                [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_MIGRATION_STOP_COPY,
                [VFIO_DEVICE_STATE_RESUMING] = VFIO_MIGRATION_STOP_COPY,
                [VFIO_DEVICE_STATE_RUNNING_P2P] =
@@@ -704,6 -1312,34 +760,34 @@@ out_copy
        return 0;
  }
  
+ static int
+ vfio_ioctl_device_feature_migration_data_size(struct vfio_device *device,
+                                             u32 flags, void __user *arg,
+                                             size_t argsz)
+ {
+       struct vfio_device_feature_mig_data_size data_size = {};
+       unsigned long stop_copy_length;
+       int ret;
+       if (!device->mig_ops)
+               return -ENOTTY;
+       ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_GET,
+                                sizeof(data_size));
+       if (ret != 1)
+               return ret;
+       ret = device->mig_ops->migration_get_data_size(device, &stop_copy_length);
+       if (ret)
+               return ret;
+       data_size.stop_copy_length = stop_copy_length;
+       if (copy_to_user(arg, &data_size, sizeof(data_size)))
+               return -EFAULT;
+       return 0;
+ }
  static int vfio_ioctl_device_feature_migration(struct vfio_device *device,
                                               u32 flags, void __user *arg,
                                               size_t argsz)
@@@ -931,6 -1567,10 +1015,10 @@@ static int vfio_ioctl_device_feature(st
                return vfio_ioctl_device_feature_logging_report(
                        device, feature.flags, arg->data,
                        feature.argsz - minsz);
+       case VFIO_DEVICE_FEATURE_MIG_DATA_SIZE:
+               return vfio_ioctl_device_feature_migration_data_size(
+                       device, feature.flags, arg->data,
+                       feature.argsz - minsz);
        default:
                if (unlikely(!device->ops->device_feature))
                        return -EINVAL;
@@@ -1000,7 -1640,7 +1088,7 @@@ static int vfio_device_fops_mmap(struc
        return device->ops->mmap(device, vma);
  }
  
 -static const struct file_operations vfio_device_fops = {
 +const struct file_operations vfio_device_fops = {
        .owner          = THIS_MODULE,
        .release        = vfio_device_fops_release,
        .read           = vfio_device_fops_read,
        .mmap           = vfio_device_fops_mmap,
  };
  
 -/**
 - * vfio_file_iommu_group - Return the struct iommu_group for the vfio group file
 - * @file: VFIO group file
 - *
 - * The returned iommu_group is valid as long as a ref is held on the file. This
 - * returns a reference on the group. This function is deprecated, only the SPAPR
 - * path in kvm should call it.
 - */
 -struct iommu_group *vfio_file_iommu_group(struct file *file)
 -{
 -      struct vfio_group *group = file->private_data;
 -      struct iommu_group *iommu_group = NULL;
 -
 -      if (!IS_ENABLED(CONFIG_SPAPR_TCE_IOMMU))
 -              return NULL;
 -
 -      if (!vfio_file_is_group(file))
 -              return NULL;
 -
 -      mutex_lock(&group->group_lock);
 -      if (group->iommu_group) {
 -              iommu_group = group->iommu_group;
 -              iommu_group_ref_get(iommu_group);
 -      }
 -      mutex_unlock(&group->group_lock);
 -      return iommu_group;
 -}
 -EXPORT_SYMBOL_GPL(vfio_file_iommu_group);
 -
 -/**
 - * vfio_file_is_group - True if the file is usable with VFIO aPIS
 - * @file: VFIO group file
 - */
 -bool vfio_file_is_group(struct file *file)
 -{
 -      return file->f_op == &vfio_group_fops;
 -}
 -EXPORT_SYMBOL_GPL(vfio_file_is_group);
 -
 -/**
 - * vfio_file_enforced_coherent - True if the DMA associated with the VFIO file
 - *        is always CPU cache coherent
 - * @file: VFIO group file
 - *
 - * Enforced coherency means that the IOMMU ignores things like the PCIe no-snoop
 - * bit in DMA transactions. A return of false indicates that the user has
 - * rights to access additional instructions such as wbinvd on x86.
 - */
 -bool vfio_file_enforced_coherent(struct file *file)
 -{
 -      struct vfio_group *group = file->private_data;
 -      bool ret;
 -
 -      if (!vfio_file_is_group(file))
 -              return true;
 -
 -      mutex_lock(&group->group_lock);
 -      if (group->container) {
 -              ret = vfio_container_ioctl_check_extension(group->container,
 -                                                         VFIO_DMA_CC_IOMMU);
 -      } else {
 -              /*
 -               * Since the coherency state is determined only once a container
 -               * is attached the user must do so before they can prove they
 -               * have permission.
 -               */
 -              ret = true;
 -      }
 -      mutex_unlock(&group->group_lock);
 -      return ret;
 -}
 -EXPORT_SYMBOL_GPL(vfio_file_enforced_coherent);
 -
 -/**
 - * vfio_file_set_kvm - Link a kvm with VFIO drivers
 - * @file: VFIO group file
 - * @kvm: KVM to link
 - *
 - * When a VFIO device is first opened the KVM will be available in
 - * device->kvm if one was associated with the group.
 - */
 -void vfio_file_set_kvm(struct file *file, struct kvm *kvm)
 -{
 -      struct vfio_group *group = file->private_data;
 -
 -      if (!vfio_file_is_group(file))
 -              return;
 -
 -      mutex_lock(&group->group_lock);
 -      group->kvm = kvm;
 -      mutex_unlock(&group->group_lock);
 -}
 -EXPORT_SYMBOL_GPL(vfio_file_set_kvm);
 -
 -/**
 - * vfio_file_has_dev - True if the VFIO file is a handle for device
 - * @file: VFIO file to check
 - * @device: Device that must be part of the file
 - *
 - * Returns true if given file has permission to manipulate the given device.
 - */
 -bool vfio_file_has_dev(struct file *file, struct vfio_device *device)
 -{
 -      struct vfio_group *group = file->private_data;
 -
 -      if (!vfio_file_is_group(file))
 -              return false;
 -
 -      return group == device->group;
 -}
 -EXPORT_SYMBOL_GPL(vfio_file_has_dev);
 -
  /*
   * Sub-module support
   */
@@@ -1130,136 -1882,39 +1218,140 @@@ int vfio_set_irqs_validate_and_prepare(
  EXPORT_SYMBOL(vfio_set_irqs_validate_and_prepare);
  
  /*
 - * Module/class support
 + * Pin contiguous user pages and return their associated host pages for local
 + * domain only.
 + * @device [in]  : device
 + * @iova [in]    : starting IOVA of user pages to be pinned.
 + * @npage [in]   : count of pages to be pinned.  This count should not
 + *               be greater than VFIO_PIN_PAGES_MAX_ENTRIES.
 + * @prot [in]    : protection flags
 + * @pages[out]   : array of host pages
 + * Return error or number of pages pinned.
 + *
 + * A driver may only call this function if the vfio_device was created
 + * by vfio_register_emulated_iommu_dev() due to vfio_device_container_pin_pages().
   */
 -static char *vfio_devnode(struct device *dev, umode_t *mode)
 +int vfio_pin_pages(struct vfio_device *device, dma_addr_t iova,
 +                 int npage, int prot, struct page **pages)
  {
 -      return kasprintf(GFP_KERNEL, "vfio/%s", dev_name(dev));
 +      /* group->container cannot change while a vfio device is open */
 +      if (!pages || !npage || WARN_ON(!vfio_assert_device_open(device)))
 +              return -EINVAL;
 +      if (vfio_device_has_container(device))
 +              return vfio_device_container_pin_pages(device, iova,
 +                                                     npage, prot, pages);
 +      if (device->iommufd_access) {
 +              int ret;
 +
 +              if (iova > ULONG_MAX)
 +                      return -EINVAL;
 +              /*
 +               * VFIO ignores the sub page offset, npages is from the start of
 +               * a PAGE_SIZE chunk of IOVA. The caller is expected to recover
 +               * the sub page offset by doing:
 +               *     pages[0] + (iova % PAGE_SIZE)
 +               */
 +              ret = iommufd_access_pin_pages(
 +                      device->iommufd_access, ALIGN_DOWN(iova, PAGE_SIZE),
 +                      npage * PAGE_SIZE, pages,
 +                      (prot & IOMMU_WRITE) ? IOMMUFD_ACCESS_RW_WRITE : 0);
 +              if (ret)
 +                      return ret;
 +              return npage;
 +      }
 +      return -EINVAL;
  }
 +EXPORT_SYMBOL(vfio_pin_pages);
  
 +/*
 + * Unpin contiguous host pages for local domain only.
 + * @device [in]  : device
 + * @iova [in]    : starting address of user pages to be unpinned.
 + * @npage [in]   : count of pages to be unpinned.  This count should not
 + *                 be greater than VFIO_PIN_PAGES_MAX_ENTRIES.
 + */
 +void vfio_unpin_pages(struct vfio_device *device, dma_addr_t iova, int npage)
 +{
 +      if (WARN_ON(!vfio_assert_device_open(device)))
 +              return;
 +
 +      if (vfio_device_has_container(device)) {
 +              vfio_device_container_unpin_pages(device, iova, npage);
 +              return;
 +      }
 +      if (device->iommufd_access) {
 +              if (WARN_ON(iova > ULONG_MAX))
 +                      return;
 +              iommufd_access_unpin_pages(device->iommufd_access,
 +                                         ALIGN_DOWN(iova, PAGE_SIZE),
 +                                         npage * PAGE_SIZE);
 +              return;
 +      }
 +}
 +EXPORT_SYMBOL(vfio_unpin_pages);
 +
 +/*
 + * This interface allows the CPUs to perform some sort of virtual DMA on
 + * behalf of the device.
 + *
 + * CPUs read/write from/into a range of IOVAs pointing to user space memory
 + * into/from a kernel buffer.
 + *
 + * As the read/write of user space memory is conducted via the CPUs and is
 + * not a real device DMA, it is not necessary to pin the user space memory.
 + *
 + * @device [in]               : VFIO device
 + * @iova [in]         : base IOVA of a user space buffer
 + * @data [in]         : pointer to kernel buffer
 + * @len [in]          : kernel buffer length
 + * @write             : indicate read or write
 + * Return error code on failure or 0 on success.
 + */
 +int vfio_dma_rw(struct vfio_device *device, dma_addr_t iova, void *data,
 +              size_t len, bool write)
 +{
 +      if (!data || len <= 0 || !vfio_assert_device_open(device))
 +              return -EINVAL;
 +
 +      if (vfio_device_has_container(device))
 +              return vfio_device_container_dma_rw(device, iova,
 +                                                  data, len, write);
 +
 +      if (device->iommufd_access) {
 +              unsigned int flags = 0;
 +
 +              if (iova > ULONG_MAX)
 +                      return -EINVAL;
 +
 +              /* VFIO historically tries to auto-detect a kthread */
 +              if (!current->mm)
 +                      flags |= IOMMUFD_ACCESS_RW_KTHREAD;
 +              if (write)
 +                      flags |= IOMMUFD_ACCESS_RW_WRITE;
 +              return iommufd_access_rw(device->iommufd_access, iova, data,
 +                                       len, flags);
 +      }
 +      return -EINVAL;
 +}
 +EXPORT_SYMBOL(vfio_dma_rw);
 +
 +/*
 + * Module/class support
 + */
  static int __init vfio_init(void)
  {
        int ret;
  
 -      ida_init(&vfio.group_ida);
        ida_init(&vfio.device_ida);
 -      mutex_init(&vfio.group_lock);
 -      INIT_LIST_HEAD(&vfio.group_list);
  
 -      ret = vfio_container_init();
 +      ret = vfio_group_init();
        if (ret)
                return ret;
  
 -      /* /dev/vfio/$GROUP */
 -      vfio.class = class_create(THIS_MODULE, "vfio");
 -      if (IS_ERR(vfio.class)) {
 -              ret = PTR_ERR(vfio.class);
 -              goto err_group_class;
 -      }
 -
 -      vfio.class->devnode = vfio_devnode;
 -
+       ret = vfio_virqfd_init();
+       if (ret)
+               goto err_virqfd;
        /* /sys/class/vfio-dev/vfioX */
        vfio.device_class = class_create(THIS_MODULE, "vfio-dev");
        if (IS_ERR(vfio.device_class)) {
                goto err_dev_class;
        }
  
 -      ret = alloc_chrdev_region(&vfio.group_devt, 0, MINORMASK + 1, "vfio");
 -      if (ret)
 -              goto err_alloc_chrdev;
 -
        pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n");
        return 0;
  
 -err_alloc_chrdev:
 -      class_destroy(vfio.device_class);
 -      vfio.device_class = NULL;
  err_dev_class:
 -      class_destroy(vfio.class);
 -      vfio.class = NULL;
 -err_group_class:
+       vfio_virqfd_exit();
+ err_virqfd:
 -      vfio_container_cleanup();
 +      vfio_group_cleanup();
        return ret;
  }
  
  static void __exit vfio_cleanup(void)
  {
 -      WARN_ON(!list_empty(&vfio.group_list));
 -
        ida_destroy(&vfio.device_ida);
 -      ida_destroy(&vfio.group_ida);
 -      unregister_chrdev_region(vfio.group_devt, MINORMASK + 1);
        class_destroy(vfio.device_class);
        vfio.device_class = NULL;
 -      class_destroy(vfio.class);
+       vfio_virqfd_exit();
 -      vfio_container_cleanup();
 -      vfio.class = NULL;
 +      vfio_group_cleanup();
        xa_destroy(&vfio_device_set_xa);
  }
  
@@@ -1291,4 -1965,6 +1386,4 @@@ MODULE_VERSION(DRIVER_VERSION)
  MODULE_LICENSE("GPL v2");
  MODULE_AUTHOR(DRIVER_AUTHOR);
  MODULE_DESCRIPTION(DRIVER_DESC);
 -MODULE_ALIAS_MISCDEV(VFIO_MINOR);
 -MODULE_ALIAS("devname:vfio/vfio");
  MODULE_SOFTDEP("post: vfio_iommu_type1 vfio_iommu_spapr_tce");
index 152d2d7f87432f6fe267c210890fe3de3e6e4643,230a96626a5f34a08f4e3ba89a61dde6b9f3dfe5..f3d1c62c98ddc4d6d34e7818dd0284d1e4c3a621
@@@ -68,7 -68,6 +68,7 @@@ enum 
        MLX5_SET_HCA_CAP_OP_MOD_ODP                   = 0x2,
        MLX5_SET_HCA_CAP_OP_MOD_ATOMIC                = 0x3,
        MLX5_SET_HCA_CAP_OP_MOD_ROCE                  = 0x4,
 +      MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE2       = 0x20,
        MLX5_SET_HCA_CAP_OP_MODE_PORT_SELECTION       = 0x25,
  };
  
@@@ -446,10 -445,7 +446,10 @@@ struct mlx5_ifc_flow_table_prop_layout_
        u8         max_modify_header_actions[0x8];
        u8         max_ft_level[0x8];
  
 -      u8         reserved_at_40[0x6];
 +      u8         reformat_add_esp_trasport[0x1];
 +      u8         reserved_at_41[0x2];
 +      u8         reformat_del_esp_trasport[0x1];
 +      u8         reserved_at_44[0x2];
        u8         execute_aso[0x1];
        u8         reserved_at_47[0x19];
  
@@@ -642,10 -638,8 +642,10 @@@ struct mlx5_ifc_fte_match_set_misc2_bit
        u8         reserved_at_1a0[0x8];
  
        u8         macsec_syndrome[0x8];
 +      u8         ipsec_syndrome[0x8];
 +      u8         reserved_at_1b8[0x8];
  
 -      u8         reserved_at_1b0[0x50];
 +      u8         reserved_at_1c0[0x40];
  };
  
  struct mlx5_ifc_fte_match_set_misc3_bits {
@@@ -1881,17 -1875,19 +1881,22 @@@ struct mlx5_ifc_cmd_hca_cap_bits 
  };
  
  struct mlx5_ifc_cmd_hca_cap_2_bits {
 -      u8         reserved_at_0[0xa0];
 +      u8         reserved_at_0[0x80];
 +
 +      u8         migratable[0x1];
 +      u8         reserved_at_81[0x1f];
  
        u8         max_reformat_insert_size[0x8];
        u8         max_reformat_insert_offset[0x8];
        u8         max_reformat_remove_size[0x8];
        u8         max_reformat_remove_offset[0x8];
  
-       u8         reserved_at_c0[0xe0];
+       u8         reserved_at_c0[0x8];
+       u8         migration_multi_load[0x1];
+       u8         migration_tracking_state[0x1];
+       u8         reserved_at_ca[0x16];
+       u8         reserved_at_e0[0xc0];
  
        u8         reserved_at_1a0[0xb];
        u8         log_min_mkey_entity_size[0x5];
@@@ -6113,38 -6109,6 +6118,38 @@@ struct mlx5_ifc_match_definer_format_32
        u8         inner_dmac_15_0[0x10];
  };
  
 +enum {
 +      MLX5_IFC_DEFINER_FORMAT_ID_SELECT = 61,
 +};
 +
 +#define MLX5_IFC_DEFINER_FORMAT_OFFSET_UNUSED 0x0
 +#define MLX5_IFC_DEFINER_FORMAT_OFFSET_OUTER_ETH_PKT_LEN 0x48
 +#define MLX5_IFC_DEFINER_DW_SELECTORS_NUM 9
 +#define MLX5_IFC_DEFINER_BYTE_SELECTORS_NUM 8
 +
 +struct mlx5_ifc_match_definer_match_mask_bits {
 +      u8         reserved_at_1c0[5][0x20];
 +      u8         match_dw_8[0x20];
 +      u8         match_dw_7[0x20];
 +      u8         match_dw_6[0x20];
 +      u8         match_dw_5[0x20];
 +      u8         match_dw_4[0x20];
 +      u8         match_dw_3[0x20];
 +      u8         match_dw_2[0x20];
 +      u8         match_dw_1[0x20];
 +      u8         match_dw_0[0x20];
 +
 +      u8         match_byte_7[0x8];
 +      u8         match_byte_6[0x8];
 +      u8         match_byte_5[0x8];
 +      u8         match_byte_4[0x8];
 +
 +      u8         match_byte_3[0x8];
 +      u8         match_byte_2[0x8];
 +      u8         match_byte_1[0x8];
 +      u8         match_byte_0[0x8];
 +};
 +
  struct mlx5_ifc_match_definer_bits {
        u8         modify_field_select[0x40];
  
        u8         reserved_at_80[0x10];
        u8         format_id[0x10];
  
 -      u8         reserved_at_a0[0x160];
 +      u8         reserved_at_a0[0x60];
 +
 +      u8         format_select_dw3[0x8];
 +      u8         format_select_dw2[0x8];
 +      u8         format_select_dw1[0x8];
 +      u8         format_select_dw0[0x8];
 +
 +      u8         format_select_dw7[0x8];
 +      u8         format_select_dw6[0x8];
 +      u8         format_select_dw5[0x8];
 +      u8         format_select_dw4[0x8];
 +
 +      u8         reserved_at_100[0x18];
 +      u8         format_select_dw8[0x8];
 +
 +      u8         reserved_at_120[0x20];
 +
 +      u8         format_select_byte3[0x8];
 +      u8         format_select_byte2[0x8];
 +      u8         format_select_byte1[0x8];
 +      u8         format_select_byte0[0x8];
 +
 +      u8         format_select_byte7[0x8];
 +      u8         format_select_byte6[0x8];
 +      u8         format_select_byte5[0x8];
 +      u8         format_select_byte4[0x8];
 +
 +      u8         reserved_at_180[0x40];
  
 -      u8         match_mask[16][0x20];
 +      union {
 +              struct {
 +                      u8         match_mask[16][0x20];
 +              };
 +              struct mlx5_ifc_match_definer_match_mask_bits match_mask_format;
 +      };
  };
  
  struct mlx5_ifc_general_obj_in_cmd_hdr_bits {
@@@ -6457,9 -6389,6 +6462,9 @@@ enum mlx5_reformat_ctx_type 
        MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL = 0x2,
        MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2 = 0x3,
        MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL = 0x4,
 +      MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_IPV4 = 0x5,
 +      MLX5_REFORMAT_TYPE_DEL_ESP_TRANSPORT = 0x8,
 +      MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_IPV6 = 0xb,
        MLX5_REFORMAT_TYPE_INSERT_HDR = 0xf,
        MLX5_REFORMAT_TYPE_REMOVE_HDR = 0x10,
        MLX5_REFORMAT_TYPE_ADD_MACSEC = 0x11,
@@@ -11639,41 -11568,6 +11644,41 @@@ enum 
        MLX5_IPSEC_OBJECT_ICV_LEN_16B,
  };
  
 +enum {
 +      MLX5_IPSEC_ASO_REG_C_0_1 = 0x0,
 +      MLX5_IPSEC_ASO_REG_C_2_3 = 0x1,
 +      MLX5_IPSEC_ASO_REG_C_4_5 = 0x2,
 +      MLX5_IPSEC_ASO_REG_C_6_7 = 0x3,
 +};
 +
 +enum {
 +      MLX5_IPSEC_ASO_MODE              = 0x0,
 +      MLX5_IPSEC_ASO_REPLAY_PROTECTION = 0x1,
 +      MLX5_IPSEC_ASO_INC_SN            = 0x2,
 +};
 +
 +struct mlx5_ifc_ipsec_aso_bits {
 +      u8         valid[0x1];
 +      u8         reserved_at_201[0x1];
 +      u8         mode[0x2];
 +      u8         window_sz[0x2];
 +      u8         soft_lft_arm[0x1];
 +      u8         hard_lft_arm[0x1];
 +      u8         remove_flow_enable[0x1];
 +      u8         esn_event_arm[0x1];
 +      u8         reserved_at_20a[0x16];
 +
 +      u8         remove_flow_pkt_cnt[0x20];
 +
 +      u8         remove_flow_soft_lft[0x20];
 +
 +      u8         reserved_at_260[0x80];
 +
 +      u8         mode_parameter[0x20];
 +
 +      u8         replay_protection_window[0x100];
 +};
 +
  struct mlx5_ifc_ipsec_obj_bits {
        u8         modify_field_select[0x40];
        u8         full_offload[0x1];
  
        u8         implicit_iv[0x40];
  
 -      u8         reserved_at_100[0x700];
 +      u8         reserved_at_100[0x8];
 +      u8         ipsec_aso_access_pd[0x18];
 +      u8         reserved_at_120[0xe0];
 +
 +      struct mlx5_ifc_ipsec_aso_bits ipsec_aso;
  };
  
  struct mlx5_ifc_create_ipsec_obj_in_bits {
@@@ -12033,7 -11923,8 +12038,8 @@@ struct mlx5_ifc_query_vhca_migration_st
        u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
  
-       u8         reserved_at_40[0x10];
+       u8         incremental[0x1];
+       u8         reserved_at_41[0xf];
        u8         vhca_id[0x10];
  
        u8         reserved_at_60[0x20];
@@@ -12059,7 -11950,9 +12065,9 @@@ struct mlx5_ifc_save_vhca_state_in_bit
        u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
  
-       u8         reserved_at_40[0x10];
+       u8         incremental[0x1];
+       u8         set_track[0x1];
+       u8         reserved_at_42[0xe];
        u8         vhca_id[0x10];
  
        u8         reserved_at_60[0x20];
diff --combined include/linux/vfio.h
index a615542df1e04f3a7aa25a2bbec3c910dd13bbc8,b4d5d4ca3d7d5e930042343a00b92fe1887c5898..35be78e9ae57d9576d7ccdee6296778fb2c9d9f3
@@@ -17,9 -17,6 +17,9 @@@
  #include <linux/iova_bitmap.h>
  
  struct kvm;
 +struct iommufd_ctx;
 +struct iommufd_device;
 +struct iommufd_access;
  
  /*
   * VFIO devices can be placed in a set, this allows all devices to share this
@@@ -57,12 -54,6 +57,12 @@@ struct vfio_device 
        struct completion comp;
        struct list_head group_next;
        struct list_head iommu_entry;
 +      struct iommufd_access *iommufd_access;
 +#if IS_ENABLED(CONFIG_IOMMUFD)
 +      struct iommufd_device *iommufd_device;
 +      struct iommufd_ctx *iommufd_ictx;
 +      bool iommufd_attached;
 +#endif
  };
  
  /**
@@@ -89,10 -80,6 +89,10 @@@ struct vfio_device_ops 
        char    *name;
        int     (*init)(struct vfio_device *vdev);
        void    (*release)(struct vfio_device *vdev);
 +      int     (*bind_iommufd)(struct vfio_device *vdev,
 +                              struct iommufd_ctx *ictx, u32 *out_device_id);
 +      void    (*unbind_iommufd)(struct vfio_device *vdev);
 +      int     (*attach_ioas)(struct vfio_device *vdev, u32 *pt_id);
        int     (*open_device)(struct vfio_device *vdev);
        void    (*close_device)(struct vfio_device *vdev);
        ssize_t (*read)(struct vfio_device *vdev, char __user *buf,
                                  void __user *arg, size_t argsz);
  };
  
 +#if IS_ENABLED(CONFIG_IOMMUFD)
 +int vfio_iommufd_physical_bind(struct vfio_device *vdev,
 +                             struct iommufd_ctx *ictx, u32 *out_device_id);
 +void vfio_iommufd_physical_unbind(struct vfio_device *vdev);
 +int vfio_iommufd_physical_attach_ioas(struct vfio_device *vdev, u32 *pt_id);
 +int vfio_iommufd_emulated_bind(struct vfio_device *vdev,
 +                             struct iommufd_ctx *ictx, u32 *out_device_id);
 +void vfio_iommufd_emulated_unbind(struct vfio_device *vdev);
 +int vfio_iommufd_emulated_attach_ioas(struct vfio_device *vdev, u32 *pt_id);
 +#else
 +#define vfio_iommufd_physical_bind                                      \
 +      ((int (*)(struct vfio_device *vdev, struct iommufd_ctx *ictx,   \
 +                u32 *out_device_id)) NULL)
 +#define vfio_iommufd_physical_unbind \
 +      ((void (*)(struct vfio_device *vdev)) NULL)
 +#define vfio_iommufd_physical_attach_ioas \
 +      ((int (*)(struct vfio_device *vdev, u32 *pt_id)) NULL)
 +#define vfio_iommufd_emulated_bind                                      \
 +      ((int (*)(struct vfio_device *vdev, struct iommufd_ctx *ictx,   \
 +                u32 *out_device_id)) NULL)
 +#define vfio_iommufd_emulated_unbind \
 +      ((void (*)(struct vfio_device *vdev)) NULL)
 +#define vfio_iommufd_emulated_attach_ioas \
 +      ((int (*)(struct vfio_device *vdev, u32 *pt_id)) NULL)
 +#endif
 +
  /**
   * @migration_set_state: Optional callback to change the migration state for
   *         devices that support migration. It's mandatory for
   * @migration_get_state: Optional callback to get the migration state for
   *         devices that support migration. It's mandatory for
   *         VFIO_DEVICE_FEATURE_MIGRATION migration support.
+  * @migration_get_data_size: Optional callback to get the estimated data
+  *          length that will be required to complete stop copy. It's mandatory for
+  *          VFIO_DEVICE_FEATURE_MIGRATION migration support.
   */
  struct vfio_migration_ops {
        struct file *(*migration_set_state)(
                enum vfio_device_mig_state new_state);
        int (*migration_get_state)(struct vfio_device *device,
                                   enum vfio_device_mig_state *curr_state);
+       int (*migration_get_data_size)(struct vfio_device *device,
+                                      unsigned long *stop_copy_length);
  };
  
  /**
@@@ -215,9 -181,6 +220,6 @@@ struct vfio_device *_vfio_alloc_device(
                                        dev, ops),                              \
                     struct dev_struct, member)
  
- int vfio_init_device(struct vfio_device *device, struct device *dev,
-                    const struct vfio_device_ops *ops);
- void vfio_free_device(struct vfio_device *device);
  static inline void vfio_put_device(struct vfio_device *device)
  {
        put_device(&device->device);
@@@ -228,7 -191,6 +230,7 @@@ int vfio_register_emulated_iommu_dev(st
  void vfio_unregister_group_dev(struct vfio_device *device);
  
  int vfio_assign_device_set(struct vfio_device *device, void *set_id);
 +unsigned int vfio_device_set_open_count(struct vfio_device_set *dev_set);
  
  int vfio_mig_get_next_state(struct vfio_device *device,
                            enum vfio_device_mig_state cur_fsm,
@@@ -271,29 -233,6 +273,6 @@@ int vfio_set_irqs_validate_and_prepare(
                                       int num_irqs, int max_irq_type,
                                       size_t *data_size);
  
- struct pci_dev;
- #if IS_ENABLED(CONFIG_VFIO_SPAPR_EEH)
- void vfio_spapr_pci_eeh_open(struct pci_dev *pdev);
- void vfio_spapr_pci_eeh_release(struct pci_dev *pdev);
- long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group, unsigned int cmd,
-                               unsigned long arg);
- #else
- static inline void vfio_spapr_pci_eeh_open(struct pci_dev *pdev)
- {
- }
- static inline void vfio_spapr_pci_eeh_release(struct pci_dev *pdev)
- {
- }
- static inline long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
-                                             unsigned int cmd,
-                                             unsigned long arg)
- {
-       return -ENOTTY;
- }
- #endif /* CONFIG_VFIO_SPAPR_EEH */
  /*
   * IRQfd - generic
   */
This page took 0.324229 seconds and 4 git commands to generate.