]> Git Repo - linux.git/commitdiff
Merge tag 'timers-core-2024-11-18' of git://git.kernel.org/pub/scm/linux/kernel/git...
authorLinus Torvalds <[email protected]>
Wed, 20 Nov 2024 00:35:06 +0000 (16:35 -0800)
committerLinus Torvalds <[email protected]>
Wed, 20 Nov 2024 00:35:06 +0000 (16:35 -0800)
Pull timer updates from Thomas Gleixner:
 "A rather large update for timekeeping and timers:

   - The final step to get rid of auto-rearming posix-timers

     posix-timers are currently auto-rearmed by the kernel when the
     signal of the timer is ignored so that the timer signal can be
     delivered once the corresponding signal is unignored.

     This requires to throttle the timer to prevent a DoS by small
     intervals and keeps the system pointlessly out of low power states
     for no value. This is a long standing non-trivial problem due to
     the lock order of posix-timer lock and the sighand lock along with
     life time issues as the timer and the sigqueue have different life
     time rules.

     Cure this by:

       - Embedding the sigqueue into the timer struct to have the same
         life time rules. Aside of that this also avoids the lookup of
         the timer in the signal delivery and rearm path as it's just a
         always valid container_of() now.

       - Queuing ignored timer signals onto a seperate ignored list.

       - Moving queued timer signals onto the ignored list when the
         signal is switched to SIG_IGN before it could be delivered.

       - Walking the ignored list when SIG_IGN is lifted and requeue the
         signals to the actual signal lists. This allows the signal
         delivery code to rearm the timer.

     This also required to consolidate the signal delivery rules so they
     are consistent across all situations. With that all self test
     scenarios finally succeed.

   - Core infrastructure for VFS multigrain timestamping

     This is required to allow the kernel to use coarse grained time
     stamps by default and switch to fine grained time stamps when inode
     attributes are actively observed via getattr().

     These changes have been provided to the VFS tree as well, so that
     the VFS specific infrastructure could be built on top.

   - Cleanup and consolidation of the sleep() infrastructure

       - Move all sleep and timeout functions into one file

       - Rework udelay() and ndelay() into proper documented inline
         functions and replace the hardcoded magic numbers by proper
         defines.

       - Rework the fsleep() implementation to take the reality of the
         timer wheel granularity on different HZ values into account.
         Right now the boundaries are hard coded time ranges which fail
         to provide the requested accuracy on different HZ settings.

       - Update documentation for all sleep/timeout related functions
         and fix up stale documentation links all over the place

       - Fixup a few usage sites

   - Rework of timekeeping and adjtimex(2) to prepare for multiple PTP
     clocks

     A system can have multiple PTP clocks which are participating in
     seperate and independent PTP clock domains. So far the kernel only
     considers the PTP clock which is based on CLOCK TAI relevant as
     that's the clock which drives the timekeeping adjustments via the
     various user space daemons through adjtimex(2).

     The non TAI based clock domains are accessible via the file
     descriptor based posix clocks, but their usability is very limited.
     They can't be accessed fast as they always go all the way out to
     the hardware and they cannot be utilized in the kernel itself.

     As Time Sensitive Networking (TSN) gains traction it is required to
     provide fast user and kernel space access to these clocks.

     The approach taken is to utilize the timekeeping and adjtimex(2)
     infrastructure to provide this access in a similar way how the
     kernel provides access to clock MONOTONIC, REALTIME etc.

     Instead of creating a duplicated infrastructure this rework
     converts timekeeping and adjtimex(2) into generic functionality
     which operates on pointers to data structures instead of using
     static variables.

     This allows to provide time accessors and adjtimex(2) functionality
     for the independent PTP clocks in a subsequent step.

   - Consolidate hrtimer initialization

     hrtimers are set up by initializing the data structure and then
     seperately setting the callback function for historical reasons.

     That's an extra unnecessary step and makes Rust support less
     straight forward than it should be.

     Provide a new set of hrtimer_setup*() functions and convert the
     core code and a few usage sites of the less frequently used
     interfaces over.

     The bulk of the htimer_init() to hrtimer_setup() conversion is
     already prepared and scheduled for the next merge window.

   - Drivers:

       - Ensure that the global timekeeping clocksource is utilizing the
         cluster 0 timer on MIPS multi-cluster systems.

         Otherwise CPUs on different clusters use their cluster specific
         clocksource which is not guaranteed to be synchronized with
         other clusters.

       - Mostly boring cleanups, fixes, improvements and code movement"

* tag 'timers-core-2024-11-18' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (140 commits)
  posix-timers: Fix spurious warning on double enqueue versus do_exit()
  clocksource/drivers/arm_arch_timer: Use of_property_present() for non-boolean properties
  clocksource/drivers/gpx: Remove redundant casts
  clocksource/drivers/timer-ti-dm: Fix child node refcount handling
  dt-bindings: timer: actions,owl-timer: convert to YAML
  clocksource/drivers/ralink: Add Ralink System Tick Counter driver
  clocksource/drivers/mips-gic-timer: Always use cluster 0 counter as clocksource
  clocksource/drivers/timer-ti-dm: Don't fail probe if int not found
  clocksource/drivers:sp804: Make user selectable
  clocksource/drivers/dw_apb: Remove unused dw_apb_clockevent functions
  hrtimers: Delete hrtimer_init_on_stack()
  alarmtimer: Switch to use hrtimer_setup() and hrtimer_setup_on_stack()
  io_uring: Switch to use hrtimer_setup_on_stack()
  sched/idle: Switch to use hrtimer_setup_on_stack()
  hrtimers: Delete hrtimer_init_sleeper_on_stack()
  wait: Switch to use hrtimer_setup_sleeper_on_stack()
  timers: Switch to use hrtimer_setup_sleeper_on_stack()
  net: pktgen: Switch to use hrtimer_setup_sleeper_on_stack()
  futex: Switch to use hrtimer_setup_sleeper_on_stack()
  fs/aio: Switch to use hrtimer_setup_sleeper_on_stack()
  ...

25 files changed:
1  2 
MAINTAINERS
arch/riscv/configs/defconfig
arch/x86/Kconfig
fs/aio.c
fs/proc/base.c
fs/timerfd.c
include/linux/tick.h
include/linux/wait.h
io_uring/io_uring.c
io_uring/rw.c
io_uring/timeout.c
kernel/cpu.c
kernel/fork.c
kernel/futex/core.c
kernel/sched/idle.c
kernel/signal.c
kernel/time/hrtimer.c
kernel/time/tick-sched.c
kernel/time/timekeeping.c
kernel/time/timer.c
kernel/time/vsyscall.c
lib/Kconfig.debug
mm/damon/core.c
net/bluetooth/hci_event.c
net/netfilter/xt_IDLETIMER.c

diff --combined MAINTAINERS
index b23f8e9f23b306fc692d6d296dac45bc16770259,3a24287712f190ac26731596f158a773358da838..a4628d566e9881d7077b4aaf60c8156cb8451fcd
@@@ -258,6 -258,12 +258,6 @@@ L:        [email protected]
  S:    Maintained
  F:    drivers/net/ethernet/alteon/acenic*
  
 -ACER ASPIRE 1 EMBEDDED CONTROLLER DRIVER
 -M:    Nikita Travkin <[email protected]>
 -S:    Maintained
 -F:    Documentation/devicetree/bindings/platform/acer,aspire1-ec.yaml
 -F:    drivers/platform/arm64/acer-aspire1-ec.c
 -
  ACER ASPIRE ONE TEMPERATURE AND FAN DRIVER
  M:    Peter Kaestle <[email protected]>
  L:    [email protected]
@@@ -854,7 -860,7 +854,7 @@@ F: drivers/crypto/allwinner
  
  ALLWINNER DMIC DRIVERS
  M:    Ban Tao <[email protected]>
 -L:    [email protected] (moderated for non-subscribers)
 +L:    [email protected]
  S:    Maintained
  F:    Documentation/devicetree/bindings/sound/allwinner,sun50i-h6-dmic.yaml
  F:    sound/soc/sunxi/sun50i-dmic.c
@@@ -882,6 -888,7 +882,6 @@@ F: drivers/staging/media/sunxi/cedrus
  
  ALPHA PORT
  M:    Richard Henderson <[email protected]>
 -M:    Ivan Kokshaysky <[email protected]>
  M:    Matt Turner <[email protected]>
  L:    [email protected]
  S:    Odd Fixes
@@@ -1174,9 -1181,8 +1174,9 @@@ F:      Documentation/hid/amd-sfh
  F:    drivers/hid/amd-sfh-hid/
  
  AMD SPI DRIVER
 -M:    Sanjay R Mehta <[email protected]>
 -S:    Maintained
 +M:    Raju Rangoju <[email protected]>
 +L:    [email protected]
 +S:    Supported
  F:    drivers/spi/spi-amd.c
  
  AMD XGBE DRIVER
@@@ -1511,7 -1517,7 +1511,7 @@@ F:      drivers/iio/gyro/adxrs290.
  ANALOG DEVICES INC ASOC CODEC DRIVERS
  M:    Lars-Peter Clausen <[email protected]>
  M:    Nuno Sá <[email protected]>
 -L:    [email protected] (moderated for non-subscribers)
 +L:    [email protected]
  S:    Supported
  W:    http://wiki.analog.com/
  W:    https://ez.analog.com/linux-software-drivers
@@@ -1588,7 -1594,7 +1588,7 @@@ F:      drivers/rtc/rtc-goldfish.
  AOA (Apple Onboard Audio) ALSA DRIVER
  M:    Johannes Berg <[email protected]>
  L:    [email protected]
 -L:    [email protected] (moderated for non-subscribers)
 +L:    [email protected]
  S:    Maintained
  F:    sound/aoa/
  
@@@ -1755,8 -1761,8 +1755,8 @@@ F:      include/uapi/linux/if_arcnet.
  ARM AND ARM64 SoC SUB-ARCHITECTURES (COMMON PARTS)
  M:    Arnd Bergmann <[email protected]>
  M:    Olof Johansson <[email protected]>
 -M:    [email protected]
  L:    [email protected] (moderated for non-subscribers)
 +L:    [email protected]
  S:    Maintained
  P:    Documentation/process/maintainer-soc.rst
  C:    irc://irc.libera.chat/armlinux
@@@ -1998,7 -2004,7 +1998,7 @@@ F:      Documentation/devicetree/bindings/mm
  F:    Documentation/devicetree/bindings/net/actions,owl-emac.yaml
  F:    Documentation/devicetree/bindings/pinctrl/actions,*
  F:    Documentation/devicetree/bindings/power/actions,owl-sps.txt
- F:    Documentation/devicetree/bindings/timer/actions,owl-timer.txt
+ F:    Documentation/devicetree/bindings/timer/actions,owl-timer.yaml
  F:    arch/arm/boot/dts/actions/
  F:    arch/arm/mach-actions/
  F:    arch/arm64/boot/dts/actions/
@@@ -2085,7 -2091,7 +2085,7 @@@ F:      drivers/crypto/amlogic
  
  ARM/Amlogic Meson SoC Sound Drivers
  M:    Jerome Brunet <[email protected]>
 -L:    [email protected] (moderated for non-subscribers)
 +L:    [email protected]
  S:    Maintained
  F:    Documentation/devicetree/bindings/sound/amlogic*
  F:    sound/soc/meson/
@@@ -2123,7 -2129,7 +2123,7 @@@ F:      drivers/*/*alpine
  ARM/APPLE MACHINE SOUND DRIVERS
  M:    Martin Povišer <[email protected]>
  L:    [email protected]
 -L:    [email protected] (moderated for non-subscribers)
 +L:    [email protected]
  S:    Maintained
  F:    Documentation/devicetree/bindings/sound/adi,ssm3515.yaml
  F:    Documentation/devicetree/bindings/sound/apple,*
  S:    Maintained
  F:    arch/arm/mach-ep93xx/ts72xx.c
  
 -ARM/CIRRUS LOGIC CLPS711X ARM ARCHITECTURE
 -M:    Alexander Shiyan <[email protected]>
 -L:    [email protected] (moderated for non-subscribers)
 -S:    Odd Fixes
 -N:    clps711x
 -
  ARM/CIRRUS LOGIC EP93XX ARM ARCHITECTURE
  M:    Hartley Sweeten <[email protected]>
  M:    Alexander Sverdlin <[email protected]>
@@@ -2853,7 -2865,7 +2853,7 @@@ F:      Documentation/devicetree/bindings/ar
  F:    Documentation/devicetree/bindings/bus/qcom*
  F:    Documentation/devicetree/bindings/cache/qcom,llcc.yaml
  F:    Documentation/devicetree/bindings/firmware/qcom,scm.yaml
 -F:    Documentation/devicetree/bindings/reserved-memory/qcom
 +F:    Documentation/devicetree/bindings/reserved-memory/qcom*
  F:    Documentation/devicetree/bindings/soc/qcom/
  F:    arch/arm/boot/dts/qcom/
  F:    arch/arm/configs/qcom_defconfig
@@@ -3720,7 -3732,7 +3720,7 @@@ F:      arch/arm/boot/dts/microchip/at91-tse
  
  AXENTIA ASOC DRIVERS
  M:    Peter Rosin <[email protected]>
 -L:    [email protected] (moderated for non-subscribers)
 +L:    [email protected]
  S:    Maintained
  F:    Documentation/devicetree/bindings/sound/axentia,*
  F:    sound/soc/atmel/tse850-pcm5142.c
@@@ -3746,7 -3758,6 +3746,7 @@@ F:      drivers/spi/spi-axi-spi-engine.
  AXI PWM GENERATOR
  M:    Michael Hennerich <[email protected]>
  M:    Nuno Sá <[email protected]>
 +R:    Trevor Gamblin <[email protected]>
  L:    [email protected]
  S:    Supported
  W:    https://ez.analog.com/linux-software-drivers
@@@ -3804,6 -3815,14 +3804,6 @@@ F:     drivers/video/backlight
  F:    include/linux/backlight.h
  F:    include/linux/pwm_backlight.h
  
 -BAIKAL-T1 PVT HARDWARE MONITOR DRIVER
 -M:    Serge Semin <[email protected]>
 -L:    [email protected]
 -S:    Supported
 -F:    Documentation/devicetree/bindings/hwmon/baikal,bt1-pvt.yaml
 -F:    Documentation/hwmon/bt1-pvt.rst
 -F:    drivers/hwmon/bt1-pvt.[ch]
 -
  BARCO P50 GPIO DRIVER
  M:    Santosh Kumar Yadav <[email protected]>
  M:    Peter Korsgaard <[email protected]>
@@@ -4832,7 -4851,7 +4832,7 @@@ F:      include/uapi/linux/bsg.
  
  BT87X AUDIO DRIVER
  M:    Clemens Ladisch <[email protected]>
 -L:    [email protected] (moderated for non-subscribers)
 +L:    [email protected]
  S:    Maintained
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound.git
  F:    Documentation/sound/cards/bt87x.rst
@@@ -4894,7 -4913,7 +4894,7 @@@ F:      drivers/net/can/bxcan.
  
  C-MEDIA CMI8788 DRIVER
  M:    Clemens Ladisch <[email protected]>
 -L:    [email protected] (moderated for non-subscribers)
 +L:    [email protected]
  S:    Maintained
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound.git
  F:    sound/pci/oxygen/
@@@ -6301,6 -6320,7 +6301,6 @@@ DECSTATION PLATFORM SUPPOR
  M:    "Maciej W. Rozycki" <[email protected]>
  L:    [email protected]
  S:    Maintained
 -W:    http://www.linux-mips.org/wiki/DECstation
  F:    arch/mips/dec/
  F:    arch/mips/include/asm/dec/
  F:    arch/mips/include/asm/mach-dec/
@@@ -6456,6 -6476,7 +6456,6 @@@ F:      drivers/mtd/nand/raw/denali
  
  DESIGNWARE EDMA CORE IP DRIVER
  M:    Manivannan Sadhasivam <[email protected]>
 -R:    Serge Semin <[email protected]>
  L:    [email protected]
  S:    Maintained
  F:    drivers/dma/dw-edma/
@@@ -7811,8 -7832,6 +7811,8 @@@ F:      drivers/gpu/drm/xlnx
  DRM GPU SCHEDULER
  M:    Luben Tuikov <[email protected]>
  M:    Matthew Brost <[email protected]>
 +M:    Danilo Krummrich <[email protected]>
 +M:    Philipp Stanner <[email protected]>
  L:    [email protected]
  S:    Maintained
  T:    git https://gitlab.freedesktop.org/drm/misc/kernel.git
@@@ -8060,10 -8079,10 +8060,10 @@@ S:   Maintaine
  F:    drivers/edac/highbank*
  
  EDAC-CAVIUM OCTEON
 -M:    Ralf Baechle <[email protected]>
 +M:    Thomas Bogendoerfer <[email protected]>
  L:    [email protected]
  L:    [email protected]
 -S:    Supported
 +S:    Maintained
  F:    drivers/edac/octeon_edac*
  
  EDAC-CAVIUM THUNDERX
@@@ -8103,8 -8122,7 +8103,8 @@@ S:      Maintaine
  F:    drivers/edac/e7xxx_edac.c
  
  EDAC-FSL_DDR
 -M:    York Sun <[email protected]>
 +R:    Frank Li <[email protected]>
 +L:    [email protected]
  L:    [email protected]
  S:    Maintained
  F:    drivers/edac/fsl_ddr_edac.*
@@@ -8234,7 -8252,7 +8234,7 @@@ F:      drivers/edac/ti_edac.
  
  EDIROL UA-101/UA-1000 DRIVER
  M:    Clemens Ladisch <[email protected]>
 -L:    [email protected] (moderated for non-subscribers)
 +L:    [email protected]
  S:    Maintained
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound.git
  F:    sound/usb/misc/ua101.c
@@@ -8796,7 -8814,7 +8796,7 @@@ F:      drivers/net/can/usb/f81604.
  FIREWIRE AUDIO DRIVERS and IEC 61883-1/6 PACKET STREAMING ENGINE
  M:    Clemens Ladisch <[email protected]>
  M:    Takashi Sakamoto <[email protected]>
 -L:    [email protected] (moderated for non-subscribers)
 +L:    [email protected]
  S:    Maintained
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound.git
  F:    include/uapi/sound/firewire.h
@@@ -8870,7 -8888,7 +8870,7 @@@ F:      drivers/input/joystick/fsia6b.
  
  FOCUSRITE SCARLETT2 MIXER DRIVER (Scarlett Gen 2+ and Clarett)
  M:    Geoffrey D. Bennett <[email protected]>
 -L:    [email protected] (moderated for non-subscribers)
 +L:    [email protected]
  S:    Maintained
  W:    https://github.com/geoffreybennett/scarlett-gen2
  B:    https://github.com/geoffreybennett/scarlett-gen2/issues
@@@ -8894,7 -8912,6 +8894,7 @@@ F:      include/linux/fortify-string.
  F:    lib/fortify_kunit.c
  F:    lib/memcpy_kunit.c
  F:    lib/test_fortify/*
 +K:    \bunsafe_memcpy\b
  K:    \b__NO_FORTIFY\b
  
  FPGA DFL DRIVERS
@@@ -9192,7 -9209,7 +9192,7 @@@ M:      Shengjiu Wang <[email protected]
  M:    Xiubo Li <[email protected]>
  R:    Fabio Estevam <[email protected]>
  R:    Nicolin Chen <[email protected]>
 -L:    [email protected] (moderated for non-subscribers)
 +L:    [email protected]
  L:    [email protected]
  S:    Maintained
  F:    sound/soc/fsl/fsl*
@@@ -9202,7 -9219,7 +9202,7 @@@ FREESCALE SOC LPC32XX SOUND DRIVER
  M:    J.M.B. Downing <[email protected]>
  M:    Piotr Wojtaszczyk <[email protected]>
  R:    Vladimir Zapolskiy <[email protected]>
 -L:    [email protected] (moderated for non-subscribers)
 +L:    [email protected]
  L:    [email protected]
  S:    Maintained
  F:    Documentation/devicetree/bindings/sound/nxp,lpc3220-i2s.yaml
@@@ -9210,7 -9227,7 +9210,7 @@@ F:      sound/soc/fsl/lpc3xxx-
  
  FREESCALE SOC SOUND QMC DRIVER
  M:    Herve Codina <[email protected]>
 -L:    [email protected] (moderated for non-subscribers)
 +L:    [email protected]
  L:    [email protected]
  S:    Maintained
  F:    Documentation/devicetree/bindings/sound/fsl,qmc-audio.yaml
@@@ -9725,7 -9742,6 +9725,7 @@@ F:      include/dt-bindings/gpio
  F:    include/linux/gpio.h
  F:    include/linux/gpio/
  F:    include/linux/of_gpio.h
 +K:    (devm_)?gpio_(request|free|direction|get|set)
  
  GPIO UAPI
  M:    Bartosz Golaszewski <[email protected]>
@@@ -9740,6 -9756,14 +9740,6 @@@ F:     drivers/gpio/gpiolib-cdev.
  F:    include/uapi/linux/gpio.h
  F:    tools/gpio/
  
 -GRE DEMULTIPLEXER DRIVER
 -M:    Dmitry Kozlov <[email protected]>
 -L:    [email protected]
 -S:    Maintained
 -F:    include/net/gre.h
 -F:    net/ipv4/gre_demux.c
 -F:    net/ipv4/gre_offload.c
 -
  GRETH 10/100/1G Ethernet MAC device driver
  M:    Andreas Larsson <[email protected]>
  L:    [email protected]
@@@ -10138,10 -10162,12 +10138,12 @@@ S:        Maintaine
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git timers/core
  F:    Documentation/timers/
  F:    include/linux/clockchips.h
+ F:    include/linux/delay.h
  F:    include/linux/hrtimer.h
  F:    include/linux/timer.h
  F:    kernel/time/clockevents.c
  F:    kernel/time/hrtimer.c
+ F:    kernel/time/sleep_timeout.c
  F:    kernel/time/timer.c
  F:    kernel/time/timer_list.c
  F:    kernel/time/timer_migration.*
@@@ -10243,7 -10269,7 +10245,7 @@@ F:   Documentation/devicetree/bindings/ar
  F:    drivers/bus/hisi_lpc.c
  
  HISILICON NETWORK SUBSYSTEM 3 DRIVER (HNS3)
 -M:    Yisen Zhuang <yisen.zhuang@huawei.com>
 +M:    Jian Shen <shenjian15@huawei.com>
  M:    Salil Mehta <[email protected]>
  M:    Jijie Shao <[email protected]>
  L:    [email protected]
@@@ -10252,7 -10278,7 +10254,7 @@@ W:   http://www.hisilicon.co
  F:    drivers/net/ethernet/hisilicon/hns3/
  
  HISILICON NETWORK SUBSYSTEM DRIVER
 -M:    Yisen Zhuang <yisen.zhuang@huawei.com>
 +M:    Jian Shen <shenjian15@huawei.com>
  M:    Salil Mehta <[email protected]>
  L:    [email protected]
  S:    Maintained
@@@ -10493,7 -10519,6 +10495,7 @@@ F:   Documentation/mm/hugetlbfs_reserv.rs
  F:    Documentation/mm/vmemmap_dedup.rst
  F:    fs/hugetlbfs/
  F:    include/linux/hugetlb.h
 +F:    include/trace/events/hugetlbfs.h
  F:    mm/hugetlb.c
  F:    mm/hugetlb_vmemmap.c
  F:    mm/hugetlb_vmemmap.h
@@@ -11131,7 -11156,7 +11133,7 @@@ F:   drivers/iio/pressure/dps310.
  
  INFINEON PEB2466 ASoC CODEC
  M:    Herve Codina <[email protected]>
 -L:    [email protected] (moderated for non-subscribers)
 +L:    [email protected]
  S:    Maintained
  F:    Documentation/devicetree/bindings/sound/infineon,peb2466.yaml
  F:    sound/soc/codecs/peb2466.c
@@@ -11257,10 -11282,10 +11259,10 @@@ F:        security/integrity
  F:    security/integrity/ima/
  
  INTEGRITY POLICY ENFORCEMENT (IPE)
 -M:    Fan Wu <wufan@linux.microsoft.com>
 +M:    Fan Wu <wufan@kernel.org>
  L:    [email protected]
  S:    Supported
 -T:    git https://github.com/microsoft/ipe.git
 +T:    git git://git.kernel.org/pub/scm/linux/kernel/git/wufan/ipe.git
  F:    Documentation/admin-guide/LSM/ipe.rst
  F:    Documentation/security/ipe.rst
  F:    scripts/ipe/
@@@ -11294,7 -11319,7 +11296,7 @@@ M:   Bard Liao <[email protected]
  M:    Ranjani Sridharan <[email protected]>
  M:    Kai Vehmanen <[email protected]>
  R:    Pierre-Louis Bossart <[email protected]>
 -L:    [email protected] (moderated for non-subscribers)
 +L:    [email protected]
  S:    Supported
  F:    sound/soc/intel/
  
@@@ -11448,7 -11473,7 +11450,7 @@@ Q:   https://patchwork.kernel.org/project
  F:    drivers/dma/ioat*
  
  INTEL IAA CRYPTO DRIVER
 -M:    Tom Zanussi <tom.zanussi@linux.intel.com>
 +M:    Kristen Accardi <kristen.c.accardi@intel.com>
  L:    [email protected]
  S:    Supported
  F:    Documentation/driver-api/crypto/iaa/iaa-crypto.rst
@@@ -11473,7 -11498,7 +11475,7 @@@ F:   include/uapi/linux/idxd.
  
  INTEL IN FIELD SCAN (IFS) DEVICE
  M:    Jithu Joseph <[email protected]>
 -R:    Ashok Raj <ashok.raj@intel.com>
 +R:    Ashok Raj <ashok.raj.linux@gmail.com>
  R:    Tony Luck <[email protected]>
  S:    Maintained
  F:    drivers/platform/x86/intel/ifs
@@@ -11578,16 -11603,6 +11580,16 @@@ F: drivers/crypto/intel/keembay/keembay
  F:    drivers/crypto/intel/keembay/ocs-hcu.c
  F:    drivers/crypto/intel/keembay/ocs-hcu.h
  
 +INTEL LA JOLLA COVE ADAPTER (LJCA) USB I/O EXPANDER DRIVERS
 +M:    Wentong Wu <[email protected]>
 +M:    Sakari Ailus <[email protected]>
 +S:    Maintained
 +F:    drivers/gpio/gpio-ljca.c
 +F:    drivers/i2c/busses/i2c-ljca.c
 +F:    drivers/spi/spi-ljca.c
 +F:    drivers/usb/misc/usb-ljca.c
 +F:    include/linux/usb/ljca.h
 +
  INTEL MANAGEMENT ENGINE (mei)
  M:    Tomas Winkler <[email protected]>
  L:    [email protected]
@@@ -11886,7 -11901,7 +11888,7 @@@ F:   Documentation/devicetree/bindings/ii
  F:    drivers/iio/gyro/mpu3050*
  
  IOC3 ETHERNET DRIVER
 -M:    Ralf Baechle <[email protected]>
 +M:    Thomas Bogendoerfer <[email protected]>
  L:    [email protected]
  S:    Maintained
  F:    drivers/net/ethernet/sgi/ioc3-eth.c
@@@ -11988,7 -12003,7 +11990,7 @@@ F:   drivers/tty/ipwireless
  
  IRON DEVICE AUDIO CODEC DRIVERS
  M:    Kiseok Jo <[email protected]>
 -L:    [email protected] (moderated for non-subscribers)
 +L:    [email protected]
  S:    Maintained
  F:    Documentation/devicetree/bindings/sound/irondevice,*
  F:    sound/soc/codecs/sma*
@@@ -12105,14 -12120,6 +12107,14 @@@ F: drivers/isdn/Makefil
  F:    drivers/isdn/hardware/
  F:    drivers/isdn/mISDN/
  
 +ISL28022 HARDWARE MONITORING DRIVER
 +M:    Carsten Spieß <[email protected]>
 +L:    [email protected]
 +S:    Maintained
 +F:    Documentation/devicetree/bindings/hwmon/renesas,isl28022.yaml
 +F:    Documentation/hwmon/isl28022.rst
 +F:    drivers/hwmon/isl28022.c
 +
  ISOFS FILESYSTEM
  M:    Jan Kara <[email protected]>
  L:    [email protected]
@@@ -12234,7 -12241,6 +12236,7 @@@ R:   Dmitry Vyukov <[email protected]
  R:    Vincenzo Frascino <[email protected]>
  L:    [email protected]
  S:    Maintained
 +B:    https://bugzilla.kernel.org/buglist.cgi?component=Sanitizers&product=Memory%20Management
  F:    Documentation/dev-tools/kasan.rst
  F:    arch/*/include/asm/*kasan.h
  F:    arch/*/mm/kasan_init*
@@@ -12258,7 -12264,6 +12260,7 @@@ R:   Dmitry Vyukov <[email protected]
  R:    Andrey Konovalov <[email protected]>
  L:    [email protected]
  S:    Maintained
 +B:    https://bugzilla.kernel.org/buglist.cgi?component=Sanitizers&product=Memory%20Management
  F:    Documentation/dev-tools/kcov.rst
  F:    include/linux/kcov.h
  F:    include/uapi/linux/kcov.h
@@@ -12340,7 -12345,6 +12342,7 @@@ F:   include/linux/randomize_kstack.
  F:    kernel/configs/hardening.config
  F:    lib/usercopy_kunit.c
  F:    mm/usercopy.c
 +F:    security/Kconfig.hardening
  K:    \b(add|choose)_random_kstack_offset\b
  K:    \b__check_(object_size|heap_object)\b
  K:    \b__counted_by\b
@@@ -12457,7 -12461,7 +12459,7 @@@ F:   virt/kvm/
  KERNEL VIRTUAL MACHINE FOR ARM64 (KVM/arm64)
  M:    Marc Zyngier <[email protected]>
  M:    Oliver Upton <[email protected]>
 -R:    James Morse <james.morse@arm.com>
 +R:    Joey Gouly <joey.gouly@arm.com>
  R:    Suzuki K Poulose <[email protected]>
  R:    Zenghui Yu <[email protected]>
  L:    [email protected] (moderated for non-subscribers)
@@@ -12938,29 -12942,49 +12940,29 @@@ LIBATA PATA ARASAN COMPACT FLASH CONTRO
  M:    Viresh Kumar <[email protected]>
  L:    [email protected]
  S:    Maintained
 -T:    git git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git
  F:    drivers/ata/pata_arasan_cf.c
  F:    include/linux/pata_arasan_cf_data.h
  
 -LIBATA PATA DRIVERS
 -R:    Sergey Shtylyov <[email protected]>
 -L:    [email protected]
 -F:    drivers/ata/ata_*.c
 -F:    drivers/ata/pata_*.c
 -
  LIBATA PATA FARADAY FTIDE010 AND GEMINI SATA BRIDGE DRIVERS
  M:    Linus Walleij <[email protected]>
  L:    [email protected]
  S:    Maintained
 -T:    git git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git
  F:    drivers/ata/pata_ftide010.c
  F:    drivers/ata/sata_gemini.c
  F:    drivers/ata/sata_gemini.h
  
  LIBATA SATA AHCI PLATFORM devices support
  M:    Hans de Goede <[email protected]>
 -M:    Jens Axboe <[email protected]>
  L:    [email protected]
  S:    Maintained
 -T:    git git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git
  F:    drivers/ata/ahci_platform.c
  F:    drivers/ata/libahci_platform.c
  F:    include/linux/ahci_platform.h
  
 -LIBATA SATA AHCI SYNOPSYS DWC CONTROLLER DRIVER
 -M:    Serge Semin <[email protected]>
 -L:    [email protected]
 -S:    Maintained
 -T:    git git://git.kernel.org/pub/scm/linux/kernel/git/dlemoal/libata.git
 -F:    Documentation/devicetree/bindings/ata/baikal,bt1-ahci.yaml
 -F:    Documentation/devicetree/bindings/ata/snps,dwc-ahci.yaml
 -F:    drivers/ata/ahci_dwc.c
 -
  LIBATA SATA PROMISE TX2/TX4 CONTROLLER DRIVER
  M:    Mikael Pettersson <[email protected]>
  L:    [email protected]
  S:    Maintained
 -T:    git git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git
  F:    drivers/ata/sata_promise.*
  
  LIBATA SUBSYSTEM (Serial and Parallel ATA drivers)
@@@ -13825,12 -13849,6 +13827,12 @@@ S: Supporte
  F:    Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst
  F:    drivers/net/ethernet/marvell/octeontx2/af/
  
 +MARVELL PEM PMU DRIVER
 +M:    Linu Cherian <[email protected]>
 +M:    Gowthami Thiagarajan <[email protected]>
 +S:    Supported
 +F:    drivers/perf/marvell_pem_pmu.c
 +
  MARVELL PRESTERA ETHERNET SWITCH DRIVER
  M:    Taras Chornyi <[email protected]>
  S:    Supported
@@@ -13936,7 -13954,7 +13938,7 @@@ F:   drivers/media/i2c/max96717.
  
  MAX9860 MONO AUDIO VOICE CODEC DRIVER
  M:    Peter Rosin <[email protected]>
 -L:    [email protected] (moderated for non-subscribers)
 +L:    [email protected]
  S:    Maintained
  F:    Documentation/devicetree/bindings/sound/max9860.txt
  F:    sound/soc/codecs/max9860.*
@@@ -14159,7 -14177,8 +14161,7 @@@ T:   git git://linuxtv.org/media_tree.gi
  F:    drivers/media/platform/nxp/imx-pxp.[ch]
  
  MEDIA DRIVERS FOR ASCOT2E
 -M:    Sergey Kozlov <[email protected]>
 -M:    Abylay Ospan <[email protected]>
 +M:    Abylay Ospan <[email protected]>
  L:    [email protected]
  S:    Supported
  W:    https://linuxtv.org
@@@ -14176,7 -14195,8 +14178,7 @@@ T:   git git://linuxtv.org/media_tree.gi
  F:    drivers/media/dvb-frontends/cxd2099*
  
  MEDIA DRIVERS FOR CXD2841ER
 -M:    Sergey Kozlov <[email protected]>
 -M:    Abylay Ospan <[email protected]>
 +M:    Abylay Ospan <[email protected]>
  L:    [email protected]
  S:    Supported
  W:    https://linuxtv.org
@@@ -14229,7 -14249,7 +14231,7 @@@ F:   drivers/media/platform/nxp/imx7-medi
  F:    drivers/media/platform/nxp/imx8mq-mipi-csi2.c
  
  MEDIA DRIVERS FOR HELENE
 -M:    Abylay Ospan <aospan@netup.ru>
 +M:    Abylay Ospan <aospan@amazon.com>
  L:    [email protected]
  S:    Supported
  W:    https://linuxtv.org
@@@ -14238,7 -14258,8 +14240,7 @@@ T:   git git://linuxtv.org/media_tree.gi
  F:    drivers/media/dvb-frontends/helene*
  
  MEDIA DRIVERS FOR HORUS3A
 -M:    Sergey Kozlov <[email protected]>
 -M:    Abylay Ospan <[email protected]>
 +M:    Abylay Ospan <[email protected]>
  L:    [email protected]
  S:    Supported
  W:    https://linuxtv.org
@@@ -14247,7 -14268,8 +14249,7 @@@ T:   git git://linuxtv.org/media_tree.gi
  F:    drivers/media/dvb-frontends/horus3a*
  
  MEDIA DRIVERS FOR LNBH25
 -M:    Sergey Kozlov <[email protected]>
 -M:    Abylay Ospan <[email protected]>
 +M:    Abylay Ospan <[email protected]>
  L:    [email protected]
  S:    Supported
  W:    https://linuxtv.org
@@@ -14263,7 -14285,8 +14265,7 @@@ T:   git git://linuxtv.org/media_tree.gi
  F:    drivers/media/dvb-frontends/mxl5xx*
  
  MEDIA DRIVERS FOR NETUP PCI UNIVERSAL DVB devices
 -M:    Sergey Kozlov <[email protected]>
 -M:    Abylay Ospan <[email protected]>
 +M:    Abylay Ospan <[email protected]>
  L:    [email protected]
  S:    Supported
  W:    https://linuxtv.org
@@@ -14888,10 -14911,9 +14890,10 @@@ N: include/linux/page[-_]
  
  MEMORY MAPPING
  M:    Andrew Morton <[email protected]>
 -R:    Liam R. Howlett <[email protected]>
 +M:    Liam R. Howlett <[email protected]>
 +M:    Lorenzo Stoakes <[email protected]>
  R:    Vlastimil Babka <[email protected]>
 -R:    Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
 +R:    Jann Horn <jannh@google.com>
  L:    [email protected]
  S:    Maintained
  W:    http://www.linux-mm.org
@@@ -14914,6 -14936,13 +14916,6 @@@ F:  drivers/mtd
  F:    include/linux/mtd/
  F:    include/uapi/mtd/
  
 -MEMSENSING MICROSYSTEMS MSA311 DRIVER
 -M:    Dmitry Rokosov <[email protected]>
 -L:    [email protected]
 -S:    Maintained
 -F:    Documentation/devicetree/bindings/iio/accel/memsensing,msa311.yaml
 -F:    drivers/iio/accel/msa311.c
 -
  MEN A21 WATCHDOG DRIVER
  M:    Johannes Thumshirn <[email protected]>
  L:    [email protected]
@@@ -15058,8 -15087,7 +15060,8 @@@ F:   drivers/spi/spi-at91-usart.
  
  MICROCHIP AUDIO ASOC DRIVERS
  M:    Claudiu Beznea <[email protected]>
 -L:    [email protected] (moderated for non-subscribers)
 +M:    Andrei Simion <[email protected]>
 +L:    [email protected]
  S:    Supported
  F:    Documentation/devicetree/bindings/sound/atmel*
  F:    Documentation/devicetree/bindings/sound/axentia,tse850-pcm5142.txt
@@@ -15167,7 -15195,6 +15169,7 @@@ F:   include/video/atmel_lcdc.
  
  MICROCHIP MCP16502 PMIC DRIVER
  M:    Claudiu Beznea <[email protected]>
 +M:    Andrei Simion <[email protected]>
  L:    [email protected] (moderated for non-subscribers)
  S:    Supported
  F:    Documentation/devicetree/bindings/regulator/microchip,mcp16502.yaml
@@@ -15249,6 -15276,7 +15251,6 @@@ F:   drivers/tty/serial/8250/8250_pci1xxx
  
  MICROCHIP POLARFIRE FPGA DRIVERS
  M:    Conor Dooley <[email protected]>
 -R:    Vladimir Georgiev <[email protected]>
  L:    [email protected]
  S:    Supported
  F:    Documentation/devicetree/bindings/fpga/microchip,mpf-spi-fpga-mgr.yaml
@@@ -15298,7 -15326,6 +15300,7 @@@ F:   drivers/spi/spi-atmel.
  
  MICROCHIP SSC DRIVER
  M:    Claudiu Beznea <[email protected]>
 +M:    Andrei Simion <[email protected]>
  L:    [email protected] (moderated for non-subscribers)
  S:    Supported
  F:    Documentation/devicetree/bindings/misc/atmel-ssc.txt
  M:    Thomas Bogendoerfer <[email protected]>
  L:    [email protected]
  S:    Maintained
 -W:    http://www.linux-mips.org/
  Q:    https://patchwork.kernel.org/project/linux-mips/list/
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/mips/linux.git
  F:    Documentation/devicetree/bindings/mips/
@@@ -15503,6 -15531,17 +15505,6 @@@ F:  arch/mips
  F:    drivers/platform/mips/
  F:    include/dt-bindings/mips/
  
 -MIPS BAIKAL-T1 PLATFORM
 -M:    Serge Semin <[email protected]>
 -L:    [email protected]
 -S:    Supported
 -F:    Documentation/devicetree/bindings/bus/baikal,bt1-*.yaml
 -F:    Documentation/devicetree/bindings/clock/baikal,bt1-*.yaml
 -F:    drivers/bus/bt1-*.c
 -F:    drivers/clk/baikal-t1/
 -F:    drivers/memory/bt1-l2-ctl.c
 -F:    drivers/mtd/maps/physmap-bt1-rom.[ch]
 -
  MIPS BOSTON DEVELOPMENT BOARD
  M:    Paul Burton <[email protected]>
  L:    [email protected]
@@@ -15515,6 -15554,7 +15517,6 @@@ F:   include/dt-bindings/clock/boston-clo
  
  MIPS CORE DRIVERS
  M:    Thomas Bogendoerfer <[email protected]>
 -M:    Serge Semin <[email protected]>
  L:    [email protected]
  S:    Supported
  F:    drivers/bus/mips_cdmm.c
@@@ -15919,7 -15959,7 +15921,7 @@@ F:   include/linux/mtd/*nand*.
  
  NATIVE INSTRUMENTS USB SOUND INTERFACE DRIVER
  M:    Daniel Mack <[email protected]>
 -L:    [email protected] (moderated for non-subscribers)
 +L:    [email protected]
  S:    Maintained
  W:    http://www.native-instruments.com
  F:    sound/usb/caiaq/
@@@ -15956,14 -15996,6 +15958,14 @@@ S: Maintaine
  F:    Documentation/devicetree/bindings/hwmon/nuvoton,nct6775.yaml
  F:    drivers/hwmon/nct6775-i2c.c
  
 +NCT7363 HARDWARE MONITOR DRIVER
 +M:    Ban Feng <[email protected]>
 +L:    [email protected]
 +S:    Maintained
 +F:    Documentation/devicetree/bindings/hwmon/nuvoton,nct7363.yaml
 +F:    Documentation/hwmon/nct7363.rst
 +F:    drivers/hwmon/nct7363.c
 +
  NETCONSOLE
  M:    Breno Leitao <[email protected]>
  S:    Maintained
@@@ -16015,8 -16047,9 +16017,8 @@@ F:   net/netfilter
  F:    tools/testing/selftests/net/netfilter/
  
  NETROM NETWORK LAYER
 -M:    Ralf Baechle <[email protected]>
  L:    [email protected]
 -S:    Maintained
 +S:    Orphan
  W:    https://linux-ax25.in-berlin.de
  F:    include/net/netrom.h
  F:    include/uapi/linux/netrom.h
@@@ -16057,7 -16090,6 +16059,7 @@@ F:   include/uapi/linux/net_dropmon.
  F:    net/core/drop_monitor.c
  
  NETWORKING DRIVERS
 +M:    Andrew Lunn <[email protected]>
  M:    "David S. Miller" <[email protected]>
  M:    Eric Dumazet <[email protected]>
  M:    Jakub Kicinski <[email protected]>
@@@ -16105,6 -16137,7 +16107,6 @@@ F:   drivers/net/wireless
  
  NETWORKING [DSA]
  M:    Andrew Lunn <[email protected]>
 -M:    Florian Fainelli <[email protected]>
  M:    Vladimir Oltean <[email protected]>
  S:    Maintained
  F:    Documentation/devicetree/bindings/net/dsa/
@@@ -16122,7 -16155,6 +16124,7 @@@ M:   "David S. Miller" <[email protected]
  M:    Eric Dumazet <[email protected]>
  M:    Jakub Kicinski <[email protected]>
  M:    Paolo Abeni <[email protected]>
 +R:    Simon Horman <[email protected]>
  L:    [email protected]
  S:    Maintained
  P:    Documentation/process/maintainer-netdev.rst
@@@ -16165,22 -16197,10 +16167,22 @@@ F:        include/uapi/linux/rtnetlink.
  F:    lib/net_utils.c
  F:    lib/random32.c
  F:    net/
 +F:    samples/pktgen/
  F:    tools/net/
  F:    tools/testing/selftests/net/
 +X:    Documentation/networking/mac80211-injection.rst
 +X:    Documentation/networking/mac80211_hwsim/
 +X:    Documentation/networking/regulatory.rst
 +X:    include/net/cfg80211.h
 +X:    include/net/ieee80211_radiotap.h
 +X:    include/net/iw_handler.h
 +X:    include/net/mac80211.h
 +X:    include/net/wext.h
  X:    net/9p/
  X:    net/bluetooth/
 +X:    net/mac80211/
 +X:    net/rfkill/
 +X:    net/wireless/
  
  NETWORKING [IPSEC]
  M:    Steffen Klassert <[email protected]>
@@@ -16490,6 -16510,12 +16492,6 @@@ F:  include/linux/ntb.
  F:    include/linux/ntb_transport.h
  F:    tools/testing/selftests/ntb/
  
 -NTB IDT DRIVER
 -M:    Serge Semin <[email protected]>
 -L:    [email protected]
 -S:    Supported
 -F:    drivers/ntb/hw/idt/
 -
  NTB INTEL DRIVER
  M:    Dave Jiang <[email protected]>
  L:    [email protected]
@@@ -16704,7 -16730,7 +16706,7 @@@ F:   drivers/extcon/extcon-ptn5150.
  
  NXP SGTL5000 DRIVER
  M:    Fabio Estevam <[email protected]>
 -L:    [email protected] (moderated for non-subscribers)
 +L:    [email protected]
  S:    Maintained
  F:    Documentation/devicetree/bindings/sound/fsl,sgtl5000.yaml
  F:    sound/soc/codecs/sgtl5000*
@@@ -16728,7 -16754,7 +16730,7 @@@ K:   "nxp,tda998x
  
  NXP TFA9879 DRIVER
  M:    Peter Rosin <[email protected]>
 -L:    [email protected] (moderated for non-subscribers)
 +L:    [email protected]
  S:    Maintained
  F:    Documentation/devicetree/bindings/sound/nxp,tfa9879.yaml
  F:    sound/soc/codecs/tfa9879*
@@@ -16740,7 -16766,7 +16742,7 @@@ F:   drivers/nfc/nxp-nc
  
  NXP/Goodix TFA989X (TFA1) DRIVER
  M:    Stephan Gerhold <[email protected]>
 -L:    [email protected] (moderated for non-subscribers)
 +L:    [email protected]
  S:    Maintained
  F:    Documentation/devicetree/bindings/sound/nxp,tfa989x.yaml
  F:    sound/soc/codecs/tfa989x.c
@@@ -16760,6 -16786,13 +16762,6 @@@ S:  Maintaine
  F:    Documentation/hwmon/nzxt-kraken3.rst
  F:    drivers/hwmon/nzxt-kraken3.c
  
 -NZXT-SMART2 HARDWARE MONITORING DRIVER
 -M:    Aleksandr Mezin <[email protected]>
 -L:    [email protected]
 -S:    Maintained
 -F:    Documentation/hwmon/nzxt-smart2.rst
 -F:    drivers/hwmon/nzxt-smart2.c
 -
  OBJAGG
  M:    Jiri Pirko <[email protected]>
  L:    [email protected]
@@@ -16819,7 -16852,7 +16821,7 @@@ F:   include/uapi/misc/ocxl.
  OMAP AUDIO SUPPORT
  M:    Peter Ujfalusi <[email protected]>
  M:    Jarkko Nikula <[email protected]>
 -L:    [email protected] (moderated for non-subscribers)
 +L:    [email protected]
  L:    [email protected]
  S:    Maintained
  F:    sound/soc/ti/n810.c
@@@ -17376,7 -17409,7 +17378,7 @@@ F:   include/linux/pm_opp.
  
  OPL4 DRIVER
  M:    Clemens Ladisch <[email protected]>
 -L:    [email protected] (moderated for non-subscribers)
 +L:    [email protected]
  S:    Maintained
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound.git
  F:    sound/drivers/opl4/
@@@ -18503,6 -18536,13 +18505,6 @@@ F:  drivers/pps
  F:    include/linux/pps*.h
  F:    include/uapi/linux/pps.h
  
 -PPTP DRIVER
 -M:    Dmitry Kozlov <[email protected]>
 -L:    [email protected]
 -S:    Maintained
 -W:    http://sourceforge.net/projects/accel-pptp
 -F:    drivers/net/ppp/pptp.c
 -
  PRESSURE STALL INFORMATION (PSI)
  M:    Johannes Weiner <[email protected]>
  M:    Suren Baghdasaryan <[email protected]>
@@@ -18752,7 -18792,7 +18754,7 @@@ F:   drivers/crypto/intel/qat
  
  QCOM AUDIO (ASoC) DRIVERS
  M:    Srinivas Kandagatla <[email protected]>
 -L:    [email protected] (moderated for non-subscribers)
 +L:    [email protected]
  L:    [email protected]
  S:    Supported
  F:    Documentation/devicetree/bindings/soc/qcom/qcom,apr*
@@@ -19476,14 -19516,6 +19478,14 @@@ S: Maintaine
  F:    Documentation/tools/rtla/
  F:    tools/tracing/rtla/
  
 +Real-time Linux (PREEMPT_RT)
 +M:    Sebastian Andrzej Siewior <[email protected]>
 +M:    Clark Williams <[email protected]>
 +M:    Steven Rostedt <[email protected]>
 +L:    [email protected]
 +S:    Supported
 +K:    PREEMPT_RT
 +
  REALTEK AUDIO CODECS
  M:    Oder Chiou <[email protected]>
  S:    Maintained
@@@ -19594,11 -19626,9 +19596,11 @@@ F: Documentation/devicetree/bindings/i2
  F:    drivers/i2c/busses/i2c-emev2.c
  
  RENESAS ETHERNET AVB DRIVER
 -R:    Sergey Shtylyov <[email protected]>
 +M:    Paul Barker <[email protected]>
 +M:    Niklas Söderlund <[email protected]>
  L:    [email protected]
  L:    [email protected]
 +S:    Supported
  F:    Documentation/devicetree/bindings/net/renesas,etheravb.yaml
  F:    drivers/net/ethernet/renesas/Kconfig
  F:    drivers/net/ethernet/renesas/Makefile
@@@ -19624,7 -19654,7 +19626,7 @@@ F:   drivers/net/ethernet/renesas/rtsn.
  
  RENESAS IDT821034 ASoC CODEC
  M:    Herve Codina <[email protected]>
 -L:    [email protected] (moderated for non-subscribers)
 +L:    [email protected]
  S:    Maintained
  F:    Documentation/devicetree/bindings/sound/renesas,idt821034.yaml
  F:    sound/soc/codecs/idt821034.c
@@@ -19654,7 -19684,7 +19656,7 @@@ F:   drivers/i2c/busses/i2c-rcar.
  F:    drivers/i2c/busses/i2c-sh_mobile.c
  
  RENESAS R-CAR SATA DRIVER
 -R:    Sergey Shtylyov <[email protected]>
 +M:    Geert Uytterhoeven <[email protected]>
  L:    [email protected]
  L:    [email protected]
  S:    Supported
@@@ -19737,10 -19767,9 +19739,10 @@@ F: Documentation/devicetree/bindings/i2
  F:    drivers/i2c/busses/i2c-rzv2m.c
  
  RENESAS SUPERH ETHERNET DRIVER
 -R:    Sergey Shtylyov <[email protected]>
 +M:    Niklas Söderlund <[email protected]>
  L:    [email protected]
  L:    [email protected]
 +S:    Supported
  F:    Documentation/devicetree/bindings/net/renesas,ether.yaml
  F:    drivers/net/ethernet/renesas/Kconfig
  F:    drivers/net/ethernet/renesas/Makefile
@@@ -19891,10 -19920,12 +19893,10 @@@ L:        [email protected]
  S:    Maintained
  Q:    https://patchwork.kernel.org/project/linux-riscv/list/
  T:    git https://git.kernel.org/pub/scm/linux/kernel/git/conor/linux.git/
 -F:    Documentation/devicetree/bindings/riscv/
 -F:    arch/riscv/boot/dts/
 -X:    arch/riscv/boot/dts/allwinner/
 -X:    arch/riscv/boot/dts/renesas/
 -X:    arch/riscv/boot/dts/sophgo/
 -X:    arch/riscv/boot/dts/thead/
 +F:    arch/riscv/boot/dts/canaan/
 +F:    arch/riscv/boot/dts/microchip/
 +F:    arch/riscv/boot/dts/sifive/
 +F:    arch/riscv/boot/dts/starfive/
  
  RISC-V PMU DRIVERS
  M:    Atish Patra <[email protected]>
@@@ -20079,8 -20110,9 +20081,8 @@@ F:   include/linux/mfd/rohm-generic.
  F:    include/linux/mfd/rohm-shared.h
  
  ROSE NETWORK LAYER
 -M:    Ralf Baechle <[email protected]>
  L:    [email protected]
 -S:    Maintained
 +S:    Orphan
  W:    https://linux-ax25.in-berlin.de
  F:    include/net/rose.h
  F:    include/uapi/linux/rose.h
  S:    Supported
  F:    drivers/s390/cio/
  
 +S390 CRYPTO MODULES, PRNG DRIVER, ARCH RANDOM
 +M:    Harald Freudenberger <[email protected]>
 +M:    Holger Dengler <[email protected]>
 +L:    [email protected]
 +L:    [email protected]
 +S:    Supported
 +F:    arch/s390/crypto/
 +F:    arch/s390/include/asm/archrandom.h
 +F:    arch/s390/include/asm/cpacf.h
 +
  S390 DASD DRIVER
  M:    Stefan Haberland <[email protected]>
  M:    Jan Hoeppner <[email protected]>
@@@ -20255,14 -20277,6 +20257,14 @@@ F: block/partitions/ibm.
  F:    drivers/s390/block/dasd*
  F:    include/linux/dasd_mod.h
  
 +S390 HWRANDOM TRNG DRIVER
 +M:    Harald Freudenberger <[email protected]>
 +M:    Holger Dengler <[email protected]>
 +L:    [email protected]
 +L:    [email protected]
 +S:    Supported
 +F:    drivers/char/hw_random/s390-trng.c
 +
  S390 IOMMU (PCI)
  M:    Niklas Schnelle <[email protected]>
  M:    Matthew Rosato <[email protected]>
@@@ -20344,16 -20358,10 +20346,16 @@@ F:        arch/s390/kvm/pci
  F:    drivers/vfio/pci/vfio_pci_zdev.c
  F:    include/uapi/linux/vfio_zdev.h
  
 -S390 ZCRYPT DRIVER
 +S390 ZCRYPT AND PKEY DRIVER AND AP BUS
  M:    Harald Freudenberger <[email protected]>
 +M:    Holger Dengler <[email protected]>
  L:    [email protected]
  S:    Supported
 +F:    arch/s390/include/asm/ap.h
 +F:    arch/s390/include/asm/pkey.h
 +F:    arch/s390/include/asm/trace/zcrypt.h
 +F:    arch/s390/include/uapi/asm/pkey.h
 +F:    arch/s390/include/uapi/asm/zcrypt.h
  F:    drivers/s390/crypto/
  
  S390 ZFCP DRIVER
@@@ -20397,7 -20405,7 +20399,7 @@@ F:   security/safesetid
  
  SAMSUNG AUDIO (ASoC) DRIVERS
  M:    Sylwester Nawrocki <[email protected]>
 -L:    [email protected] (moderated for non-subscribers)
 +L:    [email protected]
  S:    Maintained
  B:    mailto:[email protected]
  F:    Documentation/devicetree/bindings/sound/samsung*
@@@ -20840,7 -20848,6 +20842,7 @@@ Q:   https://patchwork.kernel.org/project
  B:    mailto:[email protected]
  P:    https://github.com/LinuxSecurityModule/kernel/blob/main/README.md
  T:    git https://git.kernel.org/pub/scm/linux/kernel/git/pcmoore/lsm.git
 +F:    include/linux/lsm/
  F:    include/linux/lsm_audit.h
  F:    include/linux/lsm_hook_defs.h
  F:    include/linux/lsm_hooks.h
@@@ -20934,7 -20941,7 +20936,7 @@@ F:   drivers/media/rc/serial_ir.
  
  SERIAL LOW-POWER INTER-CHIP MEDIA BUS (SLIMbus)
  M:    Srinivas Kandagatla <[email protected]>
 -L:    [email protected] (moderated for non-subscribers)
 +L:    [email protected]
  S:    Maintained
  F:    Documentation/devicetree/bindings/slimbus/
  F:    drivers/slimbus/
@@@ -21368,7 -21375,7 +21370,7 @@@ F:   Documentation/devicetree/bindings/i2
  F:    drivers/i2c/busses/i2c-synquacer.c
  
  SOCIONEXT UNIPHIER SOUND DRIVER
 -L:    [email protected] (moderated for non-subscribers)
 +L:    [email protected]
  S:    Orphan
  F:    sound/soc/uniphier/
  
@@@ -21430,11 -21437,11 +21432,11 @@@ F:        include/linux/property.
  
  SOFTWARE RAID (Multiple Disks) SUPPORT
  M:    Song Liu <[email protected]>
 -R:    Yu Kuai <[email protected]>
 +M:    Yu Kuai <[email protected]>
  L:    [email protected]
  S:    Supported
  Q:    https://patchwork.kernel.org/project/linux-raid/list/
 -T:    git git://git.kernel.org/pub/scm/linux/kernel/git/song/md.git
 +T:    git git://git.kernel.org/pub/scm/linux/kernel/git/mdraid/linux.git
  F:    drivers/md/Kconfig
  F:    drivers/md/Makefile
  F:    drivers/md/md*
@@@ -21627,7 -21634,7 +21629,7 @@@ F:   tools/testing/selftests/als
  
  SOUND - COMPRESSED AUDIO
  M:    Vinod Koul <[email protected]>
 -L:    [email protected] (moderated for non-subscribers)
 +L:    [email protected]
  S:    Supported
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound.git
  F:    Documentation/sound/designs/compress-offload.rst
@@@ -21685,21 -21692,12 +21687,21 @@@ S:        Supporte
  W:    https://github.com/thesofproject/linux/
  F:    sound/soc/sof/
  
 +SOUND - GENERIC SOUND CARD (Simple-Audio-Card, Audio-Graph-Card)
 +M:    Kuninori Morimoto <[email protected]>
 +S:    Supported
 +L:    [email protected]
 +F:    sound/soc/generic/
 +F:    include/sound/simple_card*
 +F:    Documentation/devicetree/bindings/sound/simple-card.yaml
 +F:    Documentation/devicetree/bindings/sound/audio-graph*.yaml
 +
  SOUNDWIRE SUBSYSTEM
  M:    Vinod Koul <[email protected]>
  M:    Bard Liao <[email protected]>
  R:    Pierre-Louis Bossart <[email protected]>
  R:    Sanyog Kale <[email protected]>
 -L:    [email protected] (moderated for non-subscribers)
 +L:    [email protected]
  S:    Supported
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/vkoul/soundwire.git
  F:    Documentation/driver-api/soundwire/
@@@ -21772,8 -21770,8 +21774,8 @@@ F:   drivers/accessibility/speakup
  SPEAR PLATFORM/CLOCK/PINCTRL SUPPORT
  M:    Viresh Kumar <[email protected]>
  M:    Shiraz Hashim <[email protected]>
 -M:    [email protected]
  L:    [email protected] (moderated for non-subscribers)
 +L:    [email protected]
  S:    Maintained
  W:    http://www.st.com/spear
  F:    arch/arm/boot/dts/st/spear*
@@@ -22172,7 -22170,7 +22174,7 @@@ F:   kernel/static_call.
  
  STI AUDIO (ASoC) DRIVERS
  M:    Arnaud Pouliquen <[email protected]>
 -L:    [email protected] (moderated for non-subscribers)
 +L:    [email protected]
  S:    Maintained
  F:    Documentation/devicetree/bindings/sound/st,sti-asoc-card.txt
  F:    sound/soc/sti/
@@@ -22193,7 -22191,7 +22195,7 @@@ F:   drivers/media/usb/stk1160
  STM32 AUDIO (ASoC) DRIVERS
  M:    Olivier Moysan <[email protected]>
  M:    Arnaud Pouliquen <[email protected]>
 -L:    [email protected] (moderated for non-subscribers)
 +L:    [email protected]
  S:    Maintained
  F:    Documentation/devicetree/bindings/iio/adc/st,stm32-dfsdm-adc.yaml
  F:    Documentation/devicetree/bindings/sound/st,stm32-*.yaml
@@@ -22431,11 -22429,19 +22433,11 @@@ F:        drivers/tty/serial/8250/8250_lpss.
  
  SYNOPSYS DESIGNWARE APB GPIO DRIVER
  M:    Hoan Tran <[email protected]>
 -M:    Serge Semin <[email protected]>
  L:    [email protected]
  S:    Maintained
  F:    Documentation/devicetree/bindings/gpio/snps,dw-apb-gpio.yaml
  F:    drivers/gpio/gpio-dwapb.c
  
 -SYNOPSYS DESIGNWARE APB SSI DRIVER
 -M:    Serge Semin <[email protected]>
 -L:    [email protected]
 -S:    Supported
 -F:    Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml
 -F:    drivers/spi/spi-dw*
 -
  SYNOPSYS DESIGNWARE AXI DMAC DRIVER
  M:    Eugeniy Paltsev <[email protected]>
  S:    Maintained
@@@ -22888,7 -22894,7 +22890,7 @@@ F:   drivers/irqchip/irq-xtensa-
  
  TEXAS INSTRUMENTS ASoC DRIVERS
  M:    Peter Ujfalusi <[email protected]>
 -L:    [email protected] (moderated for non-subscribers)
 +L:    [email protected]
  S:    Maintained
  F:    Documentation/devicetree/bindings/sound/davinci-mcasp-audio.yaml
  F:    sound/soc/ti/
@@@ -22897,7 -22903,7 +22899,7 @@@ TEXAS INSTRUMENTS AUDIO (ASoC/HDA) DRIV
  M:    Shenghao Ding <[email protected]>
  M:    Kevin Lu <[email protected]>
  M:    Baojun Xu <[email protected]>
 -L:    [email protected] (moderated for non-subscribers)
 +L:    [email protected]
  S:    Maintained
  F:    Documentation/devicetree/bindings/sound/tas2552.txt
  F:    Documentation/devicetree/bindings/sound/ti,tas2562.yaml
@@@ -22938,12 -22944,6 +22940,12 @@@ F: include/linux/dma/k3-udma-glue.
  F:    include/linux/dma/ti-cppi5.h
  X:    drivers/dma/ti/cppi41.c
  
 +TEXAS INSTRUMENTS TPS25990 HARDWARE MONITOR DRIVER
 +M:    Jerome Brunet <[email protected]>
 +L:    [email protected]
 +S:    Maintained
 +F:    Documentation/devicetree/bindings/hwmon/pmbus/ti,tps25990.yaml
 +
  TEXAS INSTRUMENTS TPS23861 PoE PSE DRIVER
  M:    Robert Marko <[email protected]>
  M:    Luka Perkov <[email protected]>
@@@ -23271,7 -23271,7 +23273,7 @@@ F:   drivers/soc/ti/
  TI LM49xxx FAMILY ASoC CODEC DRIVERS
  M:    M R Swami Reddy <[email protected]>
  M:    Vishwas A Deshpande <[email protected]>
 -L:    [email protected] (moderated for non-subscribers)
 +L:    [email protected]
  S:    Maintained
  F:    sound/soc/codecs/isabelle*
  F:    sound/soc/codecs/lm49453*
@@@ -23285,15 -23285,15 +23287,15 @@@ F:        Documentation/devicetree/bindings/ii
  F:    drivers/iio/adc/ti-lmp92064.c
  
  TI PCM3060 ASoC CODEC DRIVER
 -M:    Kirill Marinushkin <kmarinushkin@birdec.com>
 -L:    [email protected] (moderated for non-subscribers)
 +M:    Kirill Marinushkin <k.marinushkin@gmail.com>
 +L:    [email protected]
  S:    Maintained
  F:    Documentation/devicetree/bindings/sound/pcm3060.txt
  F:    sound/soc/codecs/pcm3060*
  
  TI TAS571X FAMILY ASoC CODEC DRIVER
  M:    Kevin Cernekee <[email protected]>
 -L:    [email protected] (moderated for non-subscribers)
 +L:    [email protected]
  S:    Odd Fixes
  F:    sound/soc/codecs/tas571x*
  
@@@ -23321,7 -23321,7 +23323,7 @@@ F:   drivers/iio/adc/ti-tsc2046.
  
  TI TWL4030 SERIES SOC CODEC DRIVER
  M:    Peter Ujfalusi <[email protected]>
 -L:    [email protected] (moderated for non-subscribers)
 +L:    [email protected]
  S:    Maintained
  F:    sound/soc/codecs/twl4030*
  
@@@ -23629,9 -23629,10 +23631,9 @@@ F:  drivers/net/tun.
  
  TURBOCHANNEL SUBSYSTEM
  M:    "Maciej W. Rozycki" <[email protected]>
 -M:    Ralf Baechle <[email protected]>
  L:    [email protected]
  S:    Maintained
 -Q:    http://patchwork.linux-mips.org/project/linux-mips/list/
 +Q:    https://patchwork.kernel.org/project/linux-mips/list/
  F:    drivers/tc/
  F:    include/linux/tc.h
  
  S:    Maintained
  F:    drivers/hid/hid-udraw-ps3.c
  
 -UFS FILESYSTEM
 -M:    Evgeniy Dushistov <[email protected]>
 -S:    Maintained
 -F:    Documentation/admin-guide/ufs.rst
 -F:    fs/ufs/
 -
  UHID USERSPACE HID IO DRIVER
  M:    David Rheinsberg <[email protected]>
  L:    [email protected]
@@@ -23990,7 -23997,7 +23992,7 @@@ F:   drivers/usb/storage
  
  USB MIDI DRIVER
  M:    Clemens Ladisch <[email protected]>
 -L:    [email protected] (moderated for non-subscribers)
 +L:    [email protected]
  S:    Maintained
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound.git
  F:    sound/usb/midi.*
@@@ -24052,7 -24059,6 +24054,7 @@@ USB RAW GADGET DRIVE
  R:    Andrey Konovalov <[email protected]>
  L:    [email protected]
  S:    Maintained
 +B:    https://github.com/xairy/raw-gadget/issues
  F:    Documentation/usb/raw-gadget.rst
  F:    drivers/usb/gadget/legacy/raw_gadget.c
  F:    include/uapi/linux/usb/raw_gadget.h
@@@ -24169,12 -24175,8 +24171,12 @@@ F: drivers/usb/host/xhci
  
  USER DATAGRAM PROTOCOL (UDP)
  M:    Willem de Bruijn <[email protected]>
 +L:    [email protected]
  S:    Maintained
  F:    include/linux/udp.h
 +F:    include/net/udp.h
 +F:    include/trace/events/udp.h
 +F:    include/uapi/linux/udp.h
  F:    net/ipv4/udp.c
  F:    net/ipv6/udp.c
  
@@@ -24655,7 -24657,7 +24657,7 @@@ VIRTIO SOUND DRIVE
  M:    Anton Yakovlev <[email protected]>
  M:    "Michael S. Tsirkin" <[email protected]>
  L:    [email protected]
 -L:    [email protected] (moderated for non-subscribers)
 +L:    [email protected]
  S:    Maintained
  F:    include/uapi/linux/virtio_snd.h
  F:    sound/virtio/*
@@@ -24724,10 -24726,9 +24726,10 @@@ F: tools/testing/vsock
  
  VMA
  M:    Andrew Morton <[email protected]>
 -R:    Liam R. Howlett <[email protected]>
 +M:    Liam R. Howlett <[email protected]>
 +M:    Lorenzo Stoakes <[email protected]>
  R:    Vlastimil Babka <[email protected]>
 -R:    Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
 +R:    Jann Horn <jannh@google.com>
  L:    [email protected]
  S:    Maintained
  W:    https://www.linux-mm.org
@@@ -25385,7 -25386,7 +25387,7 @@@ F:   include/xen/interface/io/usbif.
  XEN SOUND FRONTEND DRIVER
  M:    Oleksandr Andrushchenko <[email protected]>
  L:    [email protected] (moderated for non-subscribers)
 -L:    [email protected] (moderated for non-subscribers)
 +L:    [email protected]
  S:    Supported
  F:    sound/xen/*
  
@@@ -25401,7 -25402,7 +25403,7 @@@ F:   include/xen/arm/swiotlb-xen.
  F:    include/xen/swiotlb-xen.h
  
  XFS FILESYSTEM
 -M:    Chandan Babu R <[email protected]>
 +M:    Carlos Maiolino <[email protected]>
  R:    Darrick J. Wong <[email protected]>
  L:    [email protected]
  S:    Supported
index 5b1d6325df85b2607b4ae451aa81bebf908ffee4,26c01b9e3434c4f6d548b328ff894cbc0eaa8b96..1d5e13b148f21482bc396dab23797dc0d2e17139
@@@ -256,7 -256,6 +256,7 @@@ CONFIG_RPMSG_CTRL=
  CONFIG_RPMSG_VIRTIO=y
  CONFIG_PM_DEVFREQ=y
  CONFIG_IIO=y
 +CONFIG_THEAD_C900_ACLINT_SSWI=y
  CONFIG_PHY_SUN4I_USB=m
  CONFIG_PHY_STARFIVE_JH7110_DPHY_RX=m
  CONFIG_PHY_STARFIVE_JH7110_PCIE=m
@@@ -302,7 -301,6 +302,6 @@@ CONFIG_DEBUG_MEMORY_INIT=
  CONFIG_DEBUG_PER_CPU_MAPS=y
  CONFIG_SOFTLOCKUP_DETECTOR=y
  CONFIG_WQ_WATCHDOG=y
- CONFIG_DEBUG_TIMEKEEPING=y
  CONFIG_DEBUG_RT_MUTEXES=y
  CONFIG_DEBUG_SPINLOCK=y
  CONFIG_DEBUG_MUTEXES=y
diff --combined arch/x86/Kconfig
index 8f5362a2498fc4e7513133ad370ccb8ab7103f03,53a5eda8219c2e7dbddfda42856601904387fb6c..a3c31b784edc92962106ba7f20d222092dbba7ed
@@@ -93,7 -93,6 +93,7 @@@ config X8
        select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
        select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
        select ARCH_HAS_PMEM_API                if X86_64
 +      select ARCH_HAS_PREEMPT_LAZY
        select ARCH_HAS_PTE_DEVMAP              if X86_64
        select ARCH_HAS_PTE_SPECIAL
        select ARCH_HAS_HW_PTE_YOUNG
        select ARCH_HAS_PARANOID_L1D_FLUSH
        select BUILDTIME_TABLE_SORT
        select CLKEVT_I8253
-       select CLOCKSOURCE_VALIDATE_LAST_CYCLE
        select CLOCKSOURCE_WATCHDOG
        # Word-size accesses may read uninitialized data past the trailing \0
        # in strings and cause false KMSAN reports.
@@@ -1955,7 -1953,6 +1954,7 @@@ config X86_USER_SHADOW_STAC
        depends on AS_WRUSS
        depends on X86_64
        select ARCH_USES_HIGH_VMA_FLAGS
 +      select ARCH_HAS_USER_SHADOW_STACK
        select X86_CET
        help
          Shadow stack protection is a hardware feature that detects function
@@@ -2086,9 -2083,6 +2085,9 @@@ config ARCH_SUPPORTS_KEXEC_JUM
  config ARCH_SUPPORTS_CRASH_DUMP
        def_bool X86_64 || (X86_32 && HIGHMEM)
  
 +config ARCH_DEFAULT_CRASH_DUMP
 +      def_bool y
 +
  config ARCH_SUPPORTS_CRASH_HOTPLUG
        def_bool y
  
@@@ -2262,7 -2256,6 +2261,7 @@@ config RANDOMIZE_MEMORY_PHYSICAL_PADDIN
  config ADDRESS_MASKING
        bool "Linear Address Masking support"
        depends on X86_64
 +      depends on COMPILE_TEST || !CPU_MITIGATIONS # wait for LASS
        help
          Linear Address Masking (LAM) modifies the checking that is applied
          to 64-bit linear addresses, allowing software to use of the
@@@ -2429,14 -2422,6 +2428,14 @@@ config CFI_AUTO_DEFAUL
  
  source "kernel/livepatch/Kconfig"
  
 +config X86_BUS_LOCK_DETECT
 +      bool "Split Lock Detect and Bus Lock Detect support"
 +      depends on CPU_SUP_INTEL || CPU_SUP_AMD
 +      default y
 +      help
 +        Enable Split Lock Detect and Bus Lock Detect functionalities.
 +        See <file:Documentation/arch/x86/buslock.rst> for more information.
 +
  endmenu
  
  config CC_HAS_NAMED_AS
diff --combined fs/aio.c
index 72e3970f42256b2c6ca2117f3e54ebfa64af1c8b,a5d331f29943226a17916fbff00948eb0f49a07a..50671640b5883f5d20f652e23c4ea3fe04c989f2
+++ b/fs/aio.c
@@@ -1335,7 -1335,7 +1335,7 @@@ static long read_events(struct kioctx *
        if (until == 0 || ret < 0 || ret >= min_nr)
                return ret;
  
-       hrtimer_init_sleeper_on_stack(&t, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+       hrtimer_setup_sleeper_on_stack(&t, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
        if (until != KTIME_MAX) {
                hrtimer_set_expires_range_ns(&t.timer, until, current->timer_slack_ns);
                hrtimer_sleeper_start_expires(&t, HRTIMER_MODE_REL);
@@@ -2191,6 -2191,7 +2191,6 @@@ SYSCALL_DEFINE3(io_cancel, aio_context_
                return -EINVAL;
  
        spin_lock_irq(&ctx->ctx_lock);
 -      /* TODO: use a hash or array, this sucks. */
        list_for_each_entry(kiocb, &ctx->active_reqs, ki_list) {
                if (kiocb->ki_res.obj == obj) {
                        ret = kiocb->ki_cancel(&kiocb->rw);
diff --combined fs/proc/base.c
index e9d7ddc52f69cbd7e804346e07ae1eee7ac3ac1b,6a37a43241e4a1969c868c461e3031adea404d28..3e31a4805427471da340a9f736dab92f07f1f66a
@@@ -58,6 -58,7 +58,6 @@@
  #include <linux/init.h>
  #include <linux/capability.h>
  #include <linux/file.h>
 -#include <linux/fdtable.h>
  #include <linux/generic-radix-tree.h>
  #include <linux/string.h>
  #include <linux/seq_file.h>
@@@ -2552,8 -2553,8 +2552,8 @@@ static int show_timer(struct seq_file *
  
        seq_printf(m, "ID: %d\n", timer->it_id);
        seq_printf(m, "signal: %d/%px\n",
-                  timer->sigq->info.si_signo,
-                  timer->sigq->info.si_value.sival_ptr);
+                  timer->sigq.info.si_signo,
+                  timer->sigq.info.si_value.sival_ptr);
        seq_printf(m, "notify: %s/%s.%d\n",
                   nstr[notify & ~SIGEV_THREAD_ID],
                   (notify & SIGEV_THREAD_ID) ? "tid" : "pid",
diff --combined fs/timerfd.c
index 4c32244b05083ded00892da3fcd4334120c12fff,f10c99ad5c6009a0f158ee913483a99ae33dcc92..9f7eb451a60f611aa156db6beb75de0004efbbfb
@@@ -79,13 -79,11 +79,11 @@@ static enum hrtimer_restart timerfd_tmr
        return HRTIMER_NORESTART;
  }
  
- static enum alarmtimer_restart timerfd_alarmproc(struct alarm *alarm,
-       ktime_t now)
+ static void timerfd_alarmproc(struct alarm *alarm, ktime_t now)
  {
        struct timerfd_ctx *ctx = container_of(alarm, struct timerfd_ctx,
                                               t.alarm);
        timerfd_triggered(ctx);
-       return ALARMTIMER_NORESTART;
  }
  
  /*
@@@ -394,6 -392,19 +392,6 @@@ static const struct file_operations tim
        .unlocked_ioctl = timerfd_ioctl,
  };
  
 -static int timerfd_fget(int fd, struct fd *p)
 -{
 -      struct fd f = fdget(fd);
 -      if (!fd_file(f))
 -              return -EBADF;
 -      if (fd_file(f)->f_op != &timerfd_fops) {
 -              fdput(f);
 -              return -EINVAL;
 -      }
 -      *p = f;
 -      return 0;
 -}
 -
  SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
  {
        int ufd;
@@@ -458,6 -469,7 +456,6 @@@ static int do_timerfd_settime(int ufd, 
                const struct itimerspec64 *new,
                struct itimerspec64 *old)
  {
 -      struct fd f;
        struct timerfd_ctx *ctx;
        int ret;
  
                 !itimerspec64_valid(new))
                return -EINVAL;
  
 -      ret = timerfd_fget(ufd, &f);
 -      if (ret)
 -              return ret;
 +      CLASS(fd, f)(ufd);
 +      if (fd_empty(f))
 +              return -EBADF;
 +
 +      if (fd_file(f)->f_op != &timerfd_fops)
 +              return -EINVAL;
 +
        ctx = fd_file(f)->private_data;
  
 -      if (isalarm(ctx) && !capable(CAP_WAKE_ALARM)) {
 -              fdput(f);
 +      if (isalarm(ctx) && !capable(CAP_WAKE_ALARM))
                return -EPERM;
 -      }
  
        timerfd_setup_cancel(ctx, flags);
  
        ret = timerfd_setup(ctx, flags, new);
  
        spin_unlock_irq(&ctx->wqh.lock);
 -      fdput(f);
        return ret;
  }
  
  static int do_timerfd_gettime(int ufd, struct itimerspec64 *t)
  {
 -      struct fd f;
        struct timerfd_ctx *ctx;
 -      int ret = timerfd_fget(ufd, &f);
 -      if (ret)
 -              return ret;
 +      CLASS(fd, f)(ufd);
 +
 +      if (fd_empty(f))
 +              return -EBADF;
 +      if (fd_file(f)->f_op != &timerfd_fops)
 +              return -EINVAL;
        ctx = fd_file(f)->private_data;
  
        spin_lock_irq(&ctx->wqh.lock);
        t->it_value = ktime_to_timespec64(timerfd_get_remaining(ctx));
        t->it_interval = ktime_to_timespec64(ctx->tintv);
        spin_unlock_irq(&ctx->wqh.lock);
 -      fdput(f);
        return 0;
  }
  
diff --combined include/linux/tick.h
index 99c9c5a7252aabd4808d3e5df15dab54f1d2ef75,b0c74bfe0600f8ea0f9ea4421aecef3f2cebc8eb..b8ddc8e631a3ccc4bba993aa27a13a2391383f98
@@@ -20,12 -20,10 +20,10 @@@ extern void __init tick_init(void)
  extern void tick_suspend_local(void);
  /* Should be core only, but XEN resume magic and ARM BL switcher require it */
  extern void tick_resume_local(void);
- extern void tick_cleanup_dead_cpu(int cpu);
  #else /* CONFIG_GENERIC_CLOCKEVENTS */
  static inline void tick_init(void) { }
  static inline void tick_suspend_local(void) { }
  static inline void tick_resume_local(void) { }
- static inline void tick_cleanup_dead_cpu(int cpu) { }
  #endif /* !CONFIG_GENERIC_CLOCKEVENTS */
  
  #if defined(CONFIG_GENERIC_CLOCKEVENTS) && defined(CONFIG_HOTPLUG_CPU)
@@@ -251,19 -249,12 +249,19 @@@ static inline void tick_dep_set_task(st
        if (tick_nohz_full_enabled())
                tick_nohz_dep_set_task(tsk, bit);
  }
 +
  static inline void tick_dep_clear_task(struct task_struct *tsk,
                                       enum tick_dep_bits bit)
  {
        if (tick_nohz_full_enabled())
                tick_nohz_dep_clear_task(tsk, bit);
  }
 +
 +static inline void tick_dep_init_task(struct task_struct *tsk)
 +{
 +      atomic_set(&tsk->tick_dep_mask, 0);
 +}
 +
  static inline void tick_dep_set_signal(struct task_struct *tsk,
                                       enum tick_dep_bits bit)
  {
@@@ -297,7 -288,6 +295,7 @@@ static inline void tick_dep_set_task(st
                                     enum tick_dep_bits bit) { }
  static inline void tick_dep_clear_task(struct task_struct *tsk,
                                       enum tick_dep_bits bit) { }
 +static inline void tick_dep_init_task(struct task_struct *tsk) { }
  static inline void tick_dep_set_signal(struct task_struct *tsk,
                                       enum tick_dep_bits bit) { }
  static inline void tick_dep_clear_signal(struct signal_struct *signal,
diff --combined include/linux/wait.h
index 2b322a9b88a2bd122d30e70a3d6eaa12c5cec244,643b7c7bf376a42b546f1abfab20068b72cf9c81..6d90ad97440876082512beeb0a59d2e09d082f23
@@@ -221,7 -221,6 +221,7 @@@ void __wake_up_pollfree(struct wait_que
  #define wake_up_all(x)                        __wake_up(x, TASK_NORMAL, 0, NULL)
  #define wake_up_locked(x)             __wake_up_locked((x), TASK_NORMAL, 1)
  #define wake_up_all_locked(x)         __wake_up_locked((x), TASK_NORMAL, 0)
 +#define wake_up_sync(x)                       __wake_up_sync(x, TASK_NORMAL)
  
  #define wake_up_interruptible(x)      __wake_up(x, TASK_INTERRUPTIBLE, 1, NULL)
  #define wake_up_interruptible_nr(x, nr)       __wake_up(x, TASK_INTERRUPTIBLE, nr, NULL)
@@@ -542,8 -541,8 +542,8 @@@ do {                                                                               
        int __ret = 0;                                                          \
        struct hrtimer_sleeper __t;                                             \
                                                                                \
-       hrtimer_init_sleeper_on_stack(&__t, CLOCK_MONOTONIC,                    \
-                                     HRTIMER_MODE_REL);                        \
+       hrtimer_setup_sleeper_on_stack(&__t, CLOCK_MONOTONIC,                   \
+                                      HRTIMER_MODE_REL);                       \
        if ((timeout) != KTIME_MAX) {                                           \
                hrtimer_set_expires_range_ns(&__t.timer, timeout,               \
                                        current->timer_slack_ns);               \
diff --combined io_uring/io_uring.c
index 73af598633002566ead45fa7b79e6095f60b4639,0842aa3f60e7a53e682bb61392b1ab23a9b275b0..8012933998837ddcef45c14f1dfe543947a9eaec
@@@ -51,6 -51,7 +51,6 @@@
  #include <linux/sched/signal.h>
  #include <linux/fs.h>
  #include <linux/file.h>
 -#include <linux/fdtable.h>
  #include <linux/mm.h>
  #include <linux/mman.h>
  #include <linux/percpu.h>
@@@ -69,7 -70,6 +69,7 @@@
  #include <linux/io_uring/cmd.h>
  #include <linux/audit.h>
  #include <linux/security.h>
 +#include <linux/jump_label.h>
  #include <asm/shmparam.h>
  
  #define CREATE_TRACE_POINTS
  #include "alloc_cache.h"
  #include "eventfd.h"
  
 -#define IORING_MAX_ENTRIES    32768
 -#define IORING_MAX_CQ_ENTRIES (2 * IORING_MAX_ENTRIES)
 -
  #define SQE_COMMON_FLAGS (IOSQE_FIXED_FILE | IOSQE_IO_LINK | \
                          IOSQE_IO_HARDLINK | IOSQE_ASYNC)
  
@@@ -141,13 -144,11 +141,13 @@@ struct io_defer_entry 
  #define IO_CQ_WAKE_FORCE      (IO_CQ_WAKE_INIT >> 1)
  
  static bool io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
 -                                       struct task_struct *task,
 +                                       struct io_uring_task *tctx,
                                         bool cancel_all);
  
  static void io_queue_sqe(struct io_kiocb *req);
  
 +static __read_mostly DEFINE_STATIC_KEY_FALSE(io_key_has_sqarray);
 +
  struct kmem_cache *req_cachep;
  static struct workqueue_struct *iou_wq __ro_after_init;
  
@@@ -200,12 -201,12 +200,12 @@@ static bool io_match_linked(struct io_k
   * As io_match_task() but protected against racing with linked timeouts.
   * User must not hold timeout_lock.
   */
 -bool io_match_task_safe(struct io_kiocb *head, struct task_struct *task,
 +bool io_match_task_safe(struct io_kiocb *head, struct io_uring_task *tctx,
                        bool cancel_all)
  {
        bool matched;
  
 -      if (task && head->task != task)
 +      if (tctx && head->tctx != tctx)
                return false;
        if (cancel_all)
                return true;
@@@ -260,23 -261,15 +260,23 @@@ static __cold void io_fallback_req_func
  
  static int io_alloc_hash_table(struct io_hash_table *table, unsigned bits)
  {
 -      unsigned hash_buckets = 1U << bits;
 -      size_t hash_size = hash_buckets * sizeof(table->hbs[0]);
 +      unsigned int hash_buckets;
 +      int i;
  
 -      table->hbs = kmalloc(hash_size, GFP_KERNEL);
 -      if (!table->hbs)
 -              return -ENOMEM;
 +      do {
 +              hash_buckets = 1U << bits;
 +              table->hbs = kvmalloc_array(hash_buckets, sizeof(table->hbs[0]),
 +                                              GFP_KERNEL_ACCOUNT);
 +              if (table->hbs)
 +                      break;
 +              if (bits == 1)
 +                      return -ENOMEM;
 +              bits--;
 +      } while (1);
  
        table->hash_bits = bits;
 -      init_hash_table(table, hash_buckets);
 +      for (i = 0; i < hash_buckets; i++)
 +              INIT_HLIST_HEAD(&table->hbs[i].list);
        return 0;
  }
  
@@@ -301,18 -294,21 +301,18 @@@ static __cold struct io_ring_ctx *io_ri
        hash_bits = clamp(hash_bits, 1, 8);
        if (io_alloc_hash_table(&ctx->cancel_table, hash_bits))
                goto err;
 -      if (io_alloc_hash_table(&ctx->cancel_table_locked, hash_bits))
 -              goto err;
        if (percpu_ref_init(&ctx->refs, io_ring_ctx_ref_free,
                            0, GFP_KERNEL))
                goto err;
  
        ctx->flags = p->flags;
 +      ctx->hybrid_poll_time = LLONG_MAX;
        atomic_set(&ctx->cq_wait_nr, IO_CQ_WAKE_INIT);
        init_waitqueue_head(&ctx->sqo_sq_wait);
        INIT_LIST_HEAD(&ctx->sqd_list);
        INIT_LIST_HEAD(&ctx->cq_overflow_list);
        INIT_LIST_HEAD(&ctx->io_buffers_cache);
 -      ret = io_alloc_cache_init(&ctx->rsrc_node_cache, IO_NODE_ALLOC_CACHE_MAX,
 -                          sizeof(struct io_rsrc_node));
 -      ret |= io_alloc_cache_init(&ctx->apoll_cache, IO_POLL_ALLOC_CACHE_MAX,
 +      ret = io_alloc_cache_init(&ctx->apoll_cache, IO_POLL_ALLOC_CACHE_MAX,
                            sizeof(struct async_poll));
        ret |= io_alloc_cache_init(&ctx->netmsg_cache, IO_ALLOC_CACHE_MAX,
                            sizeof(struct io_async_msghdr));
                            sizeof(struct io_kiocb));
        ret |= io_futex_cache_init(ctx);
        if (ret)
 -              goto err;
 +              goto free_ref;
        init_completion(&ctx->ref_comp);
        xa_init_flags(&ctx->personalities, XA_FLAGS_ALLOC1);
        mutex_init(&ctx->uring_lock);
        init_waitqueue_head(&ctx->cq_wait);
        init_waitqueue_head(&ctx->poll_wq);
 -      init_waitqueue_head(&ctx->rsrc_quiesce_wq);
        spin_lock_init(&ctx->completion_lock);
        spin_lock_init(&ctx->timeout_lock);
        INIT_WQ_LIST(&ctx->iopoll_list);
        INIT_LIST_HEAD(&ctx->defer_list);
        INIT_LIST_HEAD(&ctx->timeout_list);
        INIT_LIST_HEAD(&ctx->ltimeout_list);
 -      INIT_LIST_HEAD(&ctx->rsrc_ref_list);
        init_llist_head(&ctx->work_llist);
        INIT_LIST_HEAD(&ctx->tctx_list);
        ctx->submit_state.free_list.next = NULL;
        INIT_WQ_LIST(&ctx->submit_state.compl_reqs);
        INIT_HLIST_HEAD(&ctx->cancelable_uring_cmd);
        io_napi_init(ctx);
 +      mutex_init(&ctx->resize_lock);
  
        return ctx;
 +
 +free_ref:
 +      percpu_ref_exit(&ctx->refs);
  err:
 -      io_alloc_cache_free(&ctx->rsrc_node_cache, kfree);
        io_alloc_cache_free(&ctx->apoll_cache, kfree);
        io_alloc_cache_free(&ctx->netmsg_cache, io_netmsg_cache_free);
        io_alloc_cache_free(&ctx->rw_cache, io_rw_cache_free);
        io_alloc_cache_free(&ctx->uring_cache, kfree);
        io_alloc_cache_free(&ctx->msg_cache, io_msg_cache_free);
        io_futex_cache_free(ctx);
 -      kfree(ctx->cancel_table.hbs);
 -      kfree(ctx->cancel_table_locked.hbs);
 +      kvfree(ctx->cancel_table.hbs);
        xa_destroy(&ctx->io_bl_xa);
        kfree(ctx);
        return NULL;
@@@ -406,8 -402,11 +406,8 @@@ static void io_clean_op(struct io_kioc
                kfree(req->apoll);
                req->apoll = NULL;
        }
 -      if (req->flags & REQ_F_INFLIGHT) {
 -              struct io_uring_task *tctx = req->task->io_uring;
 -
 -              atomic_dec(&tctx->inflight_tracked);
 -      }
 +      if (req->flags & REQ_F_INFLIGHT)
 +              atomic_dec(&req->tctx->inflight_tracked);
        if (req->flags & REQ_F_CREDS)
                put_cred(req->creds);
        if (req->flags & REQ_F_ASYNC_DATA) {
@@@ -421,7 -420,7 +421,7 @@@ static inline void io_req_track_infligh
  {
        if (!(req->flags & REQ_F_INFLIGHT)) {
                req->flags |= REQ_F_INFLIGHT;
 -              atomic_inc(&req->task->io_uring->inflight_tracked);
 +              atomic_inc(&req->tctx->inflight_tracked);
        }
  }
  
@@@ -510,7 -509,7 +510,7 @@@ static void io_prep_async_link(struct i
  static void io_queue_iowq(struct io_kiocb *req)
  {
        struct io_kiocb *link = io_prep_linked_timeout(req);
 -      struct io_uring_task *tctx = req->task->io_uring;
 +      struct io_uring_task *tctx = req->tctx;
  
        BUG_ON(!tctx);
        BUG_ON(!tctx->io_wq);
         * procedure rather than attempt to run this request (or create a new
         * worker for it).
         */
 -      if (WARN_ON_ONCE(!same_thread_group(req->task, current)))
 +      if (WARN_ON_ONCE(!same_thread_group(tctx->task, current)))
                atomic_or(IO_WQ_WORK_CANCEL, &req->work.flags);
  
        trace_io_uring_queue_async_work(req, io_wq_is_hashed(&req->work));
@@@ -673,19 -672,30 +673,19 @@@ static void io_cqring_do_overflow_flush
        mutex_unlock(&ctx->uring_lock);
  }
  
 -/* can be called by any task */
 -static void io_put_task_remote(struct task_struct *task)
 -{
 -      struct io_uring_task *tctx = task->io_uring;
 -
 -      percpu_counter_sub(&tctx->inflight, 1);
 -      if (unlikely(atomic_read(&tctx->in_cancel)))
 -              wake_up(&tctx->wait);
 -      put_task_struct(task);
 -}
 -
 -/* used by a task to put its own references */
 -static void io_put_task_local(struct task_struct *task)
 -{
 -      task->io_uring->cached_refs++;
 -}
 -
  /* must to be called somewhat shortly after putting a request */
 -static inline void io_put_task(struct task_struct *task)
 +static inline void io_put_task(struct io_kiocb *req)
  {
 -      if (likely(task == current))
 -              io_put_task_local(task);
 -      else
 -              io_put_task_remote(task);
 +      struct io_uring_task *tctx = req->tctx;
 +
 +      if (likely(tctx->task == current)) {
 +              tctx->cached_refs++;
 +      } else {
 +              percpu_counter_sub(&tctx->inflight, 1);
 +              if (unlikely(atomic_read(&tctx->in_cancel)))
 +                      wake_up(&tctx->wait);
 +              put_task_struct(tctx->task);
 +      }
  }
  
  void io_task_refs_refill(struct io_uring_task *tctx)
@@@ -807,6 -817,8 +807,6 @@@ static bool io_fill_cqe_aux(struct io_r
         * the ring.
         */
        if (likely(io_get_cqe(ctx, &cqe))) {
 -              trace_io_uring_complete(ctx, NULL, user_data, res, cflags, 0, 0);
 -
                WRITE_ONCE(cqe->user_data, user_data);
                WRITE_ONCE(cqe->res, res);
                WRITE_ONCE(cqe->flags, cflags);
                        WRITE_ONCE(cqe->big_cqe[0], 0);
                        WRITE_ONCE(cqe->big_cqe[1], 0);
                }
 +
 +              trace_io_uring_complete(ctx, NULL, cqe);
                return true;
        }
        return false;
@@@ -933,8 -943,6 +933,8 @@@ void io_req_defer_failed(struct io_kioc
  static void io_preinit_req(struct io_kiocb *req, struct io_ring_ctx *ctx)
  {
        req->ctx = ctx;
 +      req->buf_node = NULL;
 +      req->file_node = NULL;
        req->link = NULL;
        req->async_data = NULL;
        /* not necessary, but safer to zero */
@@@ -1065,8 -1073,23 +1065,8 @@@ struct llist_node *io_handle_tw_list(st
        return node;
  }
  
 -/**
 - * io_llist_xchg - swap all entries in a lock-less list
 - * @head:     the head of lock-less list to delete all entries
 - * @new:      new entry as the head of the list
 - *
 - * If list is empty, return NULL, otherwise, return the pointer to the first entry.
 - * The order of entries returned is from the newest to the oldest added one.
 - */
 -static inline struct llist_node *io_llist_xchg(struct llist_head *head,
 -                                             struct llist_node *new)
 -{
 -      return xchg(&head->first, new);
 -}
 -
 -static __cold void io_fallback_tw(struct io_uring_task *tctx, bool sync)
 +static __cold void __io_fallback_tw(struct llist_node *node, bool sync)
  {
 -      struct llist_node *node = llist_del_all(&tctx->task_list);
        struct io_ring_ctx *last_ctx = NULL;
        struct io_kiocb *req;
  
        }
  }
  
 +static void io_fallback_tw(struct io_uring_task *tctx, bool sync)
 +{
 +      struct llist_node *node = llist_del_all(&tctx->task_list);
 +
 +      __io_fallback_tw(node, sync);
 +}
 +
  struct llist_node *tctx_task_work_run(struct io_uring_task *tctx,
                                      unsigned int max_entries,
                                      unsigned int *count)
@@@ -1209,7 -1225,7 +1209,7 @@@ static inline void io_req_local_work_ad
  
  static void io_req_normal_work_add(struct io_kiocb *req)
  {
 -      struct io_uring_task *tctx = req->task->io_uring;
 +      struct io_uring_task *tctx = req->tctx;
        struct io_ring_ctx *ctx = req->ctx;
  
        /* task_work already pending, we're done */
                return;
        }
  
 -      if (likely(!task_work_add(req->task, &tctx->task_work, ctx->notify_method)))
 +      if (likely(!task_work_add(tctx->task, &tctx->task_work, ctx->notify_method)))
                return;
  
        io_fallback_tw(tctx, false);
@@@ -1252,9 -1268,16 +1252,9 @@@ void io_req_task_work_add_remote(struc
  
  static void __cold io_move_task_work_from_local(struct io_ring_ctx *ctx)
  {
 -      struct llist_node *node;
 -
 -      node = llist_del_all(&ctx->work_llist);
 -      while (node) {
 -              struct io_kiocb *req = container_of(node, struct io_kiocb,
 -                                                  io_task_work.node);
 +      struct llist_node *node = llist_del_all(&ctx->work_llist);
  
 -              node = node->next;
 -              io_req_normal_work_add(req);
 -      }
 +      __io_fallback_tw(node, false);
  }
  
  static bool io_run_local_work_continue(struct io_ring_ctx *ctx, int events,
@@@ -1285,7 -1308,7 +1285,7 @@@ again
         * llists are in reverse order, flip it back the right way before
         * running the pending items.
         */
 -      node = llist_reverse_order(io_llist_xchg(&ctx->work_llist, NULL));
 +      node = llist_reverse_order(llist_del_all(&ctx->work_llist));
        while (node) {
                struct llist_node *next = node->next;
                struct io_kiocb *req = container_of(node, struct io_kiocb,
@@@ -1338,7 -1361,8 +1338,7 @@@ static void io_req_task_cancel(struct i
  void io_req_task_submit(struct io_kiocb *req, struct io_tw_state *ts)
  {
        io_tw_lock(req->ctx, ts);
 -      /* req->task == current here, checking PF_EXITING is safe */
 -      if (unlikely(req->task->flags & PF_EXITING))
 +      if (unlikely(io_should_terminate_tw()))
                io_req_defer_failed(req, -EFAULT);
        else if (req->flags & REQ_F_FORCE_ASYNC)
                io_queue_iowq(req);
@@@ -1396,8 -1420,8 +1396,8 @@@ static void io_free_batch_list(struct i
                                io_clean_op(req);
                }
                io_put_file(req);
 -              io_put_rsrc_node(ctx, req->rsrc_node);
 -              io_put_task(req->task);
 +              io_req_put_rsrc_nodes(req);
 +              io_put_task(req);
  
                node = req->comp_list.next;
                io_req_add_to_cache(req, ctx);
@@@ -1859,16 -1883,20 +1859,16 @@@ inline struct file *io_file_get_fixed(s
                                      unsigned int issue_flags)
  {
        struct io_ring_ctx *ctx = req->ctx;
 -      struct io_fixed_file *slot;
 +      struct io_rsrc_node *node;
        struct file *file = NULL;
  
        io_ring_submit_lock(ctx, issue_flags);
 -
 -      if (unlikely((unsigned int)fd >= ctx->nr_user_files))
 -              goto out;
 -      fd = array_index_nospec(fd, ctx->nr_user_files);
 -      slot = io_fixed_file_slot(&ctx->file_table, fd);
 -      if (!req->rsrc_node)
 -              __io_req_set_rsrc_node(req, ctx);
 -      req->flags |= io_slot_flags(slot);
 -      file = io_slot_file(slot);
 -out:
 +      node = io_rsrc_node_lookup(&ctx->file_table.data, fd);
 +      if (node) {
 +              io_req_assign_rsrc_node(&req->file_node, node);
 +              req->flags |= io_slot_flags(node);
 +              file = io_slot_file(node);
 +      }
        io_ring_submit_unlock(ctx, issue_flags);
        return file;
  }
@@@ -2010,10 -2038,11 +2010,10 @@@ static int io_init_req(struct io_ring_c
        req->opcode = opcode = READ_ONCE(sqe->opcode);
        /* same numerical values with corresponding REQ_F_*, safe to copy */
        sqe_flags = READ_ONCE(sqe->flags);
 -      req->flags = (io_req_flags_t) sqe_flags;
 +      req->flags = (__force io_req_flags_t) sqe_flags;
        req->cqe.user_data = READ_ONCE(sqe->user_data);
        req->file = NULL;
 -      req->rsrc_node = NULL;
 -      req->task = current;
 +      req->tctx = current->io_uring;
        req->cancel_seq_set = false;
  
        if (unlikely(opcode >= IORING_OP_LAST)) {
@@@ -2231,8 -2260,7 +2231,8 @@@ static bool io_get_sqe(struct io_ring_c
        unsigned mask = ctx->sq_entries - 1;
        unsigned head = ctx->cached_sq_head++ & mask;
  
 -      if (!(ctx->flags & IORING_SETUP_NO_SQARRAY)) {
 +      if (static_branch_unlikely(&io_key_has_sqarray) &&
 +          (!(ctx->flags & IORING_SETUP_NO_SQARRAY))) {
                head = READ_ONCE(ctx->sq_array[head]);
                if (unlikely(head >= ctx->sq_entries)) {
                        /* drop invalid entries */
                                   READ_ONCE(ctx->rings->sq_dropped) + 1);
                        return false;
                }
 +              head = array_index_nospec(head, ctx->sq_entries);
        }
  
        /*
@@@ -2408,13 -2435,14 +2408,14 @@@ static int io_cqring_schedule_timeout(s
  {
        ktime_t timeout;
  
-       hrtimer_init_on_stack(&iowq->t, clock_id, HRTIMER_MODE_ABS);
        if (iowq->min_timeout) {
                timeout = ktime_add_ns(iowq->min_timeout, start_time);
-               iowq->t.function = io_cqring_min_timer_wakeup;
+               hrtimer_setup_on_stack(&iowq->t, io_cqring_min_timer_wakeup, clock_id,
+                                      HRTIMER_MODE_ABS);
        } else {
                timeout = iowq->timeout;
-               iowq->t.function = io_cqring_timer_wakeup;
+               hrtimer_setup_on_stack(&iowq->t, io_cqring_timer_wakeup, clock_id,
+                                      HRTIMER_MODE_ABS);
        }
  
        hrtimer_set_expires_range_ns(&iowq->t, timeout, 0);
@@@ -2472,10 -2500,9 +2473,10 @@@ static inline int io_cqring_wait_schedu
  
  struct ext_arg {
        size_t argsz;
 -      struct __kernel_timespec __user *ts;
 +      struct timespec64 ts;
        const sigset_t __user *sig;
        ktime_t min_time;
 +      bool ts_set;
  };
  
  /*
@@@ -2513,8 -2540,13 +2514,8 @@@ static int io_cqring_wait(struct io_rin
        iowq.timeout = KTIME_MAX;
        start_time = io_get_time(ctx);
  
 -      if (ext_arg->ts) {
 -              struct timespec64 ts;
 -
 -              if (get_timespec64(&ts, ext_arg->ts))
 -                      return -EFAULT;
 -
 -              iowq.timeout = timespec64_to_ktime(ts);
 +      if (ext_arg->ts_set) {
 +              iowq.timeout = timespec64_to_ktime(ext_arg->ts);
                if (!(flags & IORING_ENTER_ABS_TIMER))
                        iowq.timeout = ktime_add(iowq.timeout, start_time);
        }
@@@ -2638,8 -2670,8 +2639,8 @@@ static void io_rings_free(struct io_rin
        ctx->sq_sqes = NULL;
  }
  
 -static unsigned long rings_size(struct io_ring_ctx *ctx, unsigned int sq_entries,
 -                              unsigned int cq_entries, size_t *sq_offset)
 +unsigned long rings_size(unsigned int flags, unsigned int sq_entries,
 +                       unsigned int cq_entries, size_t *sq_offset)
  {
        struct io_rings *rings;
        size_t off, sq_array_size;
        off = struct_size(rings, cqes, cq_entries);
        if (off == SIZE_MAX)
                return SIZE_MAX;
 -      if (ctx->flags & IORING_SETUP_CQE32) {
 +      if (flags & IORING_SETUP_CQE32) {
                if (check_shl_overflow(off, 1, &off))
                        return SIZE_MAX;
        }
                return SIZE_MAX;
  #endif
  
 -      if (ctx->flags & IORING_SETUP_NO_SQARRAY) {
 +      if (flags & IORING_SETUP_NO_SQARRAY) {
                *sq_offset = SIZE_MAX;
                return off;
        }
@@@ -2695,10 -2727,15 +2696,10 @@@ static void io_req_caches_free(struct i
  static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
  {
        io_sq_thread_finish(ctx);
 -      /* __io_rsrc_put_work() may need uring_lock to progress, wait w/o it */
 -      if (WARN_ON_ONCE(!list_empty(&ctx->rsrc_ref_list)))
 -              return;
  
        mutex_lock(&ctx->uring_lock);
 -      if (ctx->buf_data)
 -              __io_sqe_buffers_unregister(ctx);
 -      if (ctx->file_data)
 -              __io_sqe_files_unregister(ctx);
 +      io_sqe_buffers_unregister(ctx);
 +      io_sqe_files_unregister(ctx);
        io_cqring_overflow_kill(ctx);
        io_eventfd_unregister(ctx);
        io_alloc_cache_free(&ctx->apoll_cache, kfree);
        io_alloc_cache_free(&ctx->msg_cache, io_msg_cache_free);
        io_futex_cache_free(ctx);
        io_destroy_buffers(ctx);
 +      io_free_region(ctx, &ctx->param_region);
        mutex_unlock(&ctx->uring_lock);
        if (ctx->sq_creds)
                put_cred(ctx->sq_creds);
        if (ctx->submitter_task)
                put_task_struct(ctx->submitter_task);
  
 -      /* there are no registered resources left, nobody uses it */
 -      if (ctx->rsrc_node)
 -              io_rsrc_node_destroy(ctx, ctx->rsrc_node);
 -
 -      WARN_ON_ONCE(!list_empty(&ctx->rsrc_ref_list));
        WARN_ON_ONCE(!list_empty(&ctx->ltimeout_list));
  
 -      io_alloc_cache_free(&ctx->rsrc_node_cache, kfree);
        if (ctx->mm_account) {
                mmdrop(ctx->mm_account);
                ctx->mm_account = NULL;
        }
        io_rings_free(ctx);
  
 +      if (!(ctx->flags & IORING_SETUP_NO_SQARRAY))
 +              static_branch_dec(&io_key_has_sqarray);
 +
        percpu_ref_exit(&ctx->refs);
        free_uid(ctx->user);
        io_req_caches_free(ctx);
        if (ctx->hash_map)
                io_wq_put_hash(ctx->hash_map);
        io_napi_free(ctx);
 -      kfree(ctx->cancel_table.hbs);
 -      kfree(ctx->cancel_table_locked.hbs);
 +      kvfree(ctx->cancel_table.hbs);
        xa_destroy(&ctx->io_bl_xa);
        kfree(ctx);
  }
@@@ -2971,7 -3011,7 +2972,7 @@@ static int io_uring_release(struct inod
  }
  
  struct io_task_cancel {
 -      struct task_struct *task;
 +      struct io_uring_task *tctx;
        bool all;
  };
  
@@@ -2980,11 -3020,11 +2981,11 @@@ static bool io_cancel_task_cb(struct io
        struct io_kiocb *req = container_of(work, struct io_kiocb, work);
        struct io_task_cancel *cancel = data;
  
 -      return io_match_task_safe(req, cancel->task, cancel->all);
 +      return io_match_task_safe(req, cancel->tctx, cancel->all);
  }
  
  static __cold bool io_cancel_defer_files(struct io_ring_ctx *ctx,
 -                                       struct task_struct *task,
 +                                       struct io_uring_task *tctx,
                                         bool cancel_all)
  {
        struct io_defer_entry *de;
  
        spin_lock(&ctx->completion_lock);
        list_for_each_entry_reverse(de, &ctx->defer_list, list) {
 -              if (io_match_task_safe(de->req, task, cancel_all)) {
 +              if (io_match_task_safe(de->req, tctx, cancel_all)) {
                        list_cut_position(&list, &ctx->defer_list, &de->list);
                        break;
                }
@@@ -3035,10 -3075,11 +3036,10 @@@ static __cold bool io_uring_try_cancel_
  }
  
  static __cold bool io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
 -                                              struct task_struct *task,
 +                                              struct io_uring_task *tctx,
                                                bool cancel_all)
  {
 -      struct io_task_cancel cancel = { .task = task, .all = cancel_all, };
 -      struct io_uring_task *tctx = task ? task->io_uring : NULL;
 +      struct io_task_cancel cancel = { .tctx = tctx, .all = cancel_all, };
        enum io_wq_cancel cret;
        bool ret = false;
  
        if (!ctx->rings)
                return false;
  
 -      if (!task) {
 +      if (!tctx) {
                ret |= io_uring_try_cancel_iowq(ctx);
 -      } else if (tctx && tctx->io_wq) {
 +      } else if (tctx->io_wq) {
                /*
                 * Cancels requests of all rings, not only @ctx, but
                 * it's fine as the task is in exit/exec.
        if ((ctx->flags & IORING_SETUP_DEFER_TASKRUN) &&
            io_allowed_defer_tw_run(ctx))
                ret |= io_run_local_work(ctx, INT_MAX) > 0;
 -      ret |= io_cancel_defer_files(ctx, task, cancel_all);
 +      ret |= io_cancel_defer_files(ctx, tctx, cancel_all);
        mutex_lock(&ctx->uring_lock);
 -      ret |= io_poll_remove_all(ctx, task, cancel_all);
 -      ret |= io_waitid_remove_all(ctx, task, cancel_all);
 -      ret |= io_futex_remove_all(ctx, task, cancel_all);
 -      ret |= io_uring_try_cancel_uring_cmd(ctx, task, cancel_all);
 +      ret |= io_poll_remove_all(ctx, tctx, cancel_all);
 +      ret |= io_waitid_remove_all(ctx, tctx, cancel_all);
 +      ret |= io_futex_remove_all(ctx, tctx, cancel_all);
 +      ret |= io_uring_try_cancel_uring_cmd(ctx, tctx, cancel_all);
        mutex_unlock(&ctx->uring_lock);
 -      ret |= io_kill_timeouts(ctx, task, cancel_all);
 -      if (task)
 +      ret |= io_kill_timeouts(ctx, tctx, cancel_all);
 +      if (tctx)
                ret |= io_run_task_work() > 0;
        else
                ret |= flush_delayed_work(&ctx->fallback_work);
@@@ -3138,13 -3179,12 +3139,13 @@@ __cold void io_uring_cancel_generic(boo
                                if (node->ctx->sq_data)
                                        continue;
                                loop |= io_uring_try_cancel_requests(node->ctx,
 -                                                      current, cancel_all);
 +                                                      current->io_uring,
 +                                                      cancel_all);
                        }
                } else {
                        list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
                                loop |= io_uring_try_cancel_requests(ctx,
 -                                                                   current,
 +                                                                   current->io_uring,
                                                                     cancel_all);
                }
  
@@@ -3191,44 -3231,22 +3192,44 @@@ void __io_uring_cancel(bool cancel_all
        io_uring_cancel_generic(cancel_all, NULL);
  }
  
 -static int io_validate_ext_arg(unsigned flags, const void __user *argp, size_t argsz)
 +static struct io_uring_reg_wait *io_get_ext_arg_reg(struct io_ring_ctx *ctx,
 +                      const struct io_uring_getevents_arg __user *uarg)
  {
 -      if (flags & IORING_ENTER_EXT_ARG) {
 -              struct io_uring_getevents_arg arg;
 +      unsigned long size = sizeof(struct io_uring_reg_wait);
 +      unsigned long offset = (uintptr_t)uarg;
 +      unsigned long end;
  
 -              if (argsz != sizeof(arg))
 -                      return -EINVAL;
 -              if (copy_from_user(&arg, argp, sizeof(arg)))
 -                      return -EFAULT;
 -      }
 +      if (unlikely(offset % sizeof(long)))
 +              return ERR_PTR(-EFAULT);
 +
 +      /* also protects from NULL ->cq_wait_arg as the size would be 0 */
 +      if (unlikely(check_add_overflow(offset, size, &end) ||
 +                   end > ctx->cq_wait_size))
 +              return ERR_PTR(-EFAULT);
 +
 +      return ctx->cq_wait_arg + offset;
 +}
 +
 +static int io_validate_ext_arg(struct io_ring_ctx *ctx, unsigned flags,
 +                             const void __user *argp, size_t argsz)
 +{
 +      struct io_uring_getevents_arg arg;
 +
 +      if (!(flags & IORING_ENTER_EXT_ARG))
 +              return 0;
 +      if (flags & IORING_ENTER_EXT_ARG_REG)
 +              return -EINVAL;
 +      if (argsz != sizeof(arg))
 +              return -EINVAL;
 +      if (copy_from_user(&arg, argp, sizeof(arg)))
 +              return -EFAULT;
        return 0;
  }
  
 -static int io_get_ext_arg(unsigned flags, const void __user *argp,
 -                        struct ext_arg *ext_arg)
 +static int io_get_ext_arg(struct io_ring_ctx *ctx, unsigned flags,
 +                        const void __user *argp, struct ext_arg *ext_arg)
  {
 +      const struct io_uring_getevents_arg __user *uarg = argp;
        struct io_uring_getevents_arg arg;
  
        /*
         */
        if (!(flags & IORING_ENTER_EXT_ARG)) {
                ext_arg->sig = (const sigset_t __user *) argp;
 -              ext_arg->ts = NULL;
 +              return 0;
 +      }
 +
 +      if (flags & IORING_ENTER_EXT_ARG_REG) {
 +              struct io_uring_reg_wait *w;
 +
 +              if (ext_arg->argsz != sizeof(struct io_uring_reg_wait))
 +                      return -EINVAL;
 +              w = io_get_ext_arg_reg(ctx, argp);
 +              if (IS_ERR(w))
 +                      return PTR_ERR(w);
 +
 +              if (w->flags & ~IORING_REG_WAIT_TS)
 +                      return -EINVAL;
 +              ext_arg->min_time = READ_ONCE(w->min_wait_usec) * NSEC_PER_USEC;
 +              ext_arg->sig = u64_to_user_ptr(READ_ONCE(w->sigmask));
 +              ext_arg->argsz = READ_ONCE(w->sigmask_sz);
 +              if (w->flags & IORING_REG_WAIT_TS) {
 +                      ext_arg->ts.tv_sec = READ_ONCE(w->ts.tv_sec);
 +                      ext_arg->ts.tv_nsec = READ_ONCE(w->ts.tv_nsec);
 +                      ext_arg->ts_set = true;
 +              }
                return 0;
        }
  
         */
        if (ext_arg->argsz != sizeof(arg))
                return -EINVAL;
 -      if (copy_from_user(&arg, argp, sizeof(arg)))
 +#ifdef CONFIG_64BIT
 +      if (!user_access_begin(uarg, sizeof(*uarg)))
 +              return -EFAULT;
 +      unsafe_get_user(arg.sigmask, &uarg->sigmask, uaccess_end);
 +      unsafe_get_user(arg.sigmask_sz, &uarg->sigmask_sz, uaccess_end);
 +      unsafe_get_user(arg.min_wait_usec, &uarg->min_wait_usec, uaccess_end);
 +      unsafe_get_user(arg.ts, &uarg->ts, uaccess_end);
 +      user_access_end();
 +#else
 +      if (copy_from_user(&arg, uarg, sizeof(arg)))
                return -EFAULT;
 +#endif
        ext_arg->min_time = arg.min_wait_usec * NSEC_PER_USEC;
        ext_arg->sig = u64_to_user_ptr(arg.sigmask);
        ext_arg->argsz = arg.sigmask_sz;
 -      ext_arg->ts = u64_to_user_ptr(arg.ts);
 +      if (arg.ts) {
 +              if (get_timespec64(&ext_arg->ts, u64_to_user_ptr(arg.ts)))
 +                      return -EFAULT;
 +              ext_arg->ts_set = true;
 +      }
        return 0;
 +#ifdef CONFIG_64BIT
 +uaccess_end:
 +      user_access_end();
 +      return -EFAULT;
 +#endif
  }
  
  SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
        if (unlikely(flags & ~(IORING_ENTER_GETEVENTS | IORING_ENTER_SQ_WAKEUP |
                               IORING_ENTER_SQ_WAIT | IORING_ENTER_EXT_ARG |
                               IORING_ENTER_REGISTERED_RING |
 -                             IORING_ENTER_ABS_TIMER)))
 +                             IORING_ENTER_ABS_TIMER |
 +                             IORING_ENTER_EXT_ARG_REG)))
                return -EINVAL;
  
        /*
                         */
                        mutex_lock(&ctx->uring_lock);
  iopoll_locked:
 -                      ret2 = io_validate_ext_arg(flags, argp, argsz);
 +                      ret2 = io_validate_ext_arg(ctx, flags, argp, argsz);
                        if (likely(!ret2)) {
                                min_complete = min(min_complete,
                                                   ctx->cq_entries);
                } else {
                        struct ext_arg ext_arg = { .argsz = argsz };
  
 -                      ret2 = io_get_ext_arg(flags, argp, &ext_arg);
 +                      ret2 = io_get_ext_arg(ctx, flags, argp, &ext_arg);
                        if (likely(!ret2)) {
                                min_complete = min(min_complete,
                                                   ctx->cq_entries);
@@@ -3458,8 -3435,7 +3459,8 @@@ static __cold int io_allocate_scq_uring
        ctx->sq_entries = p->sq_entries;
        ctx->cq_entries = p->cq_entries;
  
 -      size = rings_size(ctx, p->sq_entries, p->cq_entries, &sq_array_offset);
 +      size = rings_size(ctx->flags, p->sq_entries, p->cq_entries,
 +                        &sq_array_offset);
        if (size == SIZE_MAX)
                return -EOVERFLOW;
  
@@@ -3525,8 -3501,14 +3526,8 @@@ static struct file *io_uring_get_file(s
                                         O_RDWR | O_CLOEXEC, NULL);
  }
  
 -static __cold int io_uring_create(unsigned entries, struct io_uring_params *p,
 -                                struct io_uring_params __user *params)
 +int io_uring_fill_params(unsigned entries, struct io_uring_params *p)
  {
 -      struct io_ring_ctx *ctx;
 -      struct io_uring_task *tctx;
 -      struct file *file;
 -      int ret;
 -
        if (!entries)
                return -EINVAL;
        if (entries > IORING_MAX_ENTRIES) {
                p->cq_entries = 2 * p->sq_entries;
        }
  
 +      p->sq_off.head = offsetof(struct io_rings, sq.head);
 +      p->sq_off.tail = offsetof(struct io_rings, sq.tail);
 +      p->sq_off.ring_mask = offsetof(struct io_rings, sq_ring_mask);
 +      p->sq_off.ring_entries = offsetof(struct io_rings, sq_ring_entries);
 +      p->sq_off.flags = offsetof(struct io_rings, sq_flags);
 +      p->sq_off.dropped = offsetof(struct io_rings, sq_dropped);
 +      p->sq_off.resv1 = 0;
 +      if (!(p->flags & IORING_SETUP_NO_MMAP))
 +              p->sq_off.user_addr = 0;
 +
 +      p->cq_off.head = offsetof(struct io_rings, cq.head);
 +      p->cq_off.tail = offsetof(struct io_rings, cq.tail);
 +      p->cq_off.ring_mask = offsetof(struct io_rings, cq_ring_mask);
 +      p->cq_off.ring_entries = offsetof(struct io_rings, cq_ring_entries);
 +      p->cq_off.overflow = offsetof(struct io_rings, cq_overflow);
 +      p->cq_off.cqes = offsetof(struct io_rings, cqes);
 +      p->cq_off.flags = offsetof(struct io_rings, cq_flags);
 +      p->cq_off.resv1 = 0;
 +      if (!(p->flags & IORING_SETUP_NO_MMAP))
 +              p->cq_off.user_addr = 0;
 +
 +      return 0;
 +}
 +
 +static __cold int io_uring_create(unsigned entries, struct io_uring_params *p,
 +                                struct io_uring_params __user *params)
 +{
 +      struct io_ring_ctx *ctx;
 +      struct io_uring_task *tctx;
 +      struct file *file;
 +      int ret;
 +
 +      ret = io_uring_fill_params(entries, p);
 +      if (unlikely(ret))
 +              return ret;
 +
        ctx = io_ring_ctx_alloc(p);
        if (!ctx)
                return -ENOMEM;
        ctx->clockid = CLOCK_MONOTONIC;
        ctx->clock_offset = 0;
  
 +      if (!(ctx->flags & IORING_SETUP_NO_SQARRAY))
 +              static_branch_inc(&io_key_has_sqarray);
 +
        if ((ctx->flags & IORING_SETUP_DEFER_TASKRUN) &&
            !(ctx->flags & IORING_SETUP_IOPOLL) &&
            !(ctx->flags & IORING_SETUP_SQPOLL))
                ctx->notify_method = TWA_SIGNAL;
        }
  
 +      /* HYBRID_IOPOLL only valid with IOPOLL */
 +      if ((ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_HYBRID_IOPOLL)) ==
 +                      IORING_SETUP_HYBRID_IOPOLL)
 +              goto err;
 +
        /*
         * For DEFER_TASKRUN we require the completion task to be the same as the
         * submission task. This implies that there is only one submitter, so enforce
        if (ret)
                goto err;
  
 -      ret = io_sq_offload_create(ctx, p);
 -      if (ret)
 -              goto err;
 +      if (!(p->flags & IORING_SETUP_NO_SQARRAY))
 +              p->sq_off.array = (char *)ctx->sq_array - (char *)ctx->rings;
  
 -      ret = io_rsrc_init(ctx);
 +      ret = io_sq_offload_create(ctx, p);
        if (ret)
                goto err;
  
 -      p->sq_off.head = offsetof(struct io_rings, sq.head);
 -      p->sq_off.tail = offsetof(struct io_rings, sq.tail);
 -      p->sq_off.ring_mask = offsetof(struct io_rings, sq_ring_mask);
 -      p->sq_off.ring_entries = offsetof(struct io_rings, sq_ring_entries);
 -      p->sq_off.flags = offsetof(struct io_rings, sq_flags);
 -      p->sq_off.dropped = offsetof(struct io_rings, sq_dropped);
 -      if (!(ctx->flags & IORING_SETUP_NO_SQARRAY))
 -              p->sq_off.array = (char *)ctx->sq_array - (char *)ctx->rings;
 -      p->sq_off.resv1 = 0;
 -      if (!(ctx->flags & IORING_SETUP_NO_MMAP))
 -              p->sq_off.user_addr = 0;
 -
 -      p->cq_off.head = offsetof(struct io_rings, cq.head);
 -      p->cq_off.tail = offsetof(struct io_rings, cq.tail);
 -      p->cq_off.ring_mask = offsetof(struct io_rings, cq_ring_mask);
 -      p->cq_off.ring_entries = offsetof(struct io_rings, cq_ring_entries);
 -      p->cq_off.overflow = offsetof(struct io_rings, cq_overflow);
 -      p->cq_off.cqes = offsetof(struct io_rings, cqes);
 -      p->cq_off.flags = offsetof(struct io_rings, cq_flags);
 -      p->cq_off.resv1 = 0;
 -      if (!(ctx->flags & IORING_SETUP_NO_MMAP))
 -              p->cq_off.user_addr = 0;
 -
        p->features = IORING_FEAT_SINGLE_MMAP | IORING_FEAT_NODROP |
                        IORING_FEAT_SUBMIT_STABLE | IORING_FEAT_RW_CUR_POS |
                        IORING_FEAT_CUR_PERSONALITY | IORING_FEAT_FAST_POLL |
@@@ -3774,7 -3736,7 +3775,7 @@@ static long io_uring_setup(u32 entries
                        IORING_SETUP_SQE128 | IORING_SETUP_CQE32 |
                        IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN |
                        IORING_SETUP_NO_MMAP | IORING_SETUP_REGISTERED_FD_ONLY |
 -                      IORING_SETUP_NO_SQARRAY))
 +                      IORING_SETUP_NO_SQARRAY | IORING_SETUP_HYBRID_IOPOLL))
                return -EINVAL;
  
        return io_uring_create(entries, &p, params);
@@@ -3812,8 -3774,6 +3813,8 @@@ static int __init io_uring_init(void
        struct kmem_cache_args kmem_args = {
                .useroffset = offsetof(struct io_kiocb, cmd.data),
                .usersize = sizeof_field(struct io_kiocb, cmd.data),
 +              .freeptr_offset = offsetof(struct io_kiocb, work),
 +              .use_freeptr_offset = true,
        };
  
  #define __BUILD_BUG_VERIFY_OFFSET_SIZE(stype, eoffset, esize, ename) do { \
diff --combined io_uring/rw.c
index cce8bc2ecd3fa493e7ccbd48170bb1ea90311f26,f023ff49c6883cbe00aab4979e7c2fd115e5cad2..0bcb83e4ce3cf9b0d02da3f6cd504a377931ccde
@@@ -31,19 -31,9 +31,19 @@@ struct io_rw 
        rwf_t                           flags;
  };
  
 -static inline bool io_file_supports_nowait(struct io_kiocb *req)
 +static bool io_file_supports_nowait(struct io_kiocb *req, __poll_t mask)
  {
 -      return req->flags & REQ_F_SUPPORT_NOWAIT;
 +      /* If FMODE_NOWAIT is set for a file, we're golden */
 +      if (req->flags & REQ_F_SUPPORT_NOWAIT)
 +              return true;
 +      /* No FMODE_NOWAIT, if we can poll, check the status */
 +      if (io_file_can_poll(req)) {
 +              struct poll_table_struct pt = { ._key = mask };
 +
 +              return vfs_poll(req->file, &pt) & mask;
 +      }
 +      /* No FMODE_NOWAIT support, and file isn't pollable. Tough luck. */
 +      return false;
  }
  
  #ifdef CONFIG_COMPAT
@@@ -330,21 -320,22 +330,21 @@@ static int io_prep_rw_fixed(struct io_k
  {
        struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
        struct io_ring_ctx *ctx = req->ctx;
 +      struct io_rsrc_node *node;
        struct io_async_rw *io;
 -      u16 index;
        int ret;
  
        ret = io_prep_rw(req, sqe, ddir, false);
        if (unlikely(ret))
                return ret;
  
 -      if (unlikely(req->buf_index >= ctx->nr_user_bufs))
 +      node = io_rsrc_node_lookup(&ctx->buf_table, req->buf_index);
 +      if (!node)
                return -EFAULT;
 -      index = array_index_nospec(req->buf_index, ctx->nr_user_bufs);
 -      req->imu = ctx->user_bufs[index];
 -      io_req_set_rsrc_node(req, ctx, 0);
 +      io_req_assign_buf_node(req, node);
  
        io = req->async_data;
 -      ret = io_import_fixed(ddir, &io->iter, req->imu, rw->addr, rw->len);
 +      ret = io_import_fixed(ddir, &io->iter, node->buf, rw->addr, rw->len);
        iov_iter_save_state(&io->iter, &io->iter_state);
        return ret;
  }
@@@ -434,7 -425,7 +434,7 @@@ static bool io_rw_should_reissue(struc
         * Play it safe and assume not safe to re-import and reissue if we're
         * not in the original thread group (or in task context).
         */
 -      if (!same_thread_group(req->task, current) || !in_task())
 +      if (!same_thread_group(req->tctx->task, current) || !in_task())
                return false;
        return true;
  }
@@@ -805,8 -796,8 +805,8 @@@ static int io_rw_init_file(struct io_ki
         * supports async. Otherwise it's impossible to use O_NONBLOCK files
         * reliably. If not, or it IOCB_NOWAIT is set, don't retry.
         */
 -      if ((kiocb->ki_flags & IOCB_NOWAIT) ||
 -          ((file->f_flags & O_NONBLOCK) && !io_file_supports_nowait(req)))
 +      if (kiocb->ki_flags & IOCB_NOWAIT ||
 +          ((file->f_flags & O_NONBLOCK && !(req->flags & REQ_F_SUPPORT_NOWAIT))))
                req->flags |= REQ_F_NOWAIT;
  
        if (ctx->flags & IORING_SETUP_IOPOLL) {
                kiocb->ki_flags |= IOCB_HIPRI;
                kiocb->ki_complete = io_complete_rw_iopoll;
                req->iopoll_completed = 0;
 +              if (ctx->flags & IORING_SETUP_HYBRID_IOPOLL) {
 +                      /* make sure every req only blocks once*/
 +                      req->flags &= ~REQ_F_IOPOLL_STATE;
 +                      req->iopoll_start = ktime_get_ns();
 +              }
        } else {
                if (kiocb->ki_flags & IOCB_HIPRI)
                        return -EINVAL;
@@@ -852,7 -838,7 +852,7 @@@ static int __io_read(struct io_kiocb *r
  
        if (force_nonblock) {
                /* If the file doesn't support async, just async punt */
 -              if (unlikely(!io_file_supports_nowait(req)))
 +              if (unlikely(!io_file_supports_nowait(req, EPOLLIN)))
                        return -EAGAIN;
                kiocb->ki_flags |= IOCB_NOWAIT;
        } else {
@@@ -965,6 -951,13 +965,6 @@@ int io_read_mshot(struct io_kiocb *req
  
        ret = __io_read(req, issue_flags);
  
 -      /*
 -       * If the file doesn't support proper NOWAIT, then disable multishot
 -       * and stay in single shot mode.
 -       */
 -      if (!io_file_supports_nowait(req))
 -              req->flags &= ~REQ_F_APOLL_MULTISHOT;
 -
        /*
         * If we get -EAGAIN, recycle our buffer and just let normal poll
         * handling arm it.
                if (issue_flags & IO_URING_F_MULTISHOT)
                        return IOU_ISSUE_SKIP_COMPLETE;
                return -EAGAIN;
 -      }
 -
 -      /*
 -       * Any successful return value will keep the multishot read armed.
 -       */
 -      if (ret > 0 && req->flags & REQ_F_APOLL_MULTISHOT) {
 +      } else if (ret <= 0) {
 +              io_kbuf_recycle(req, issue_flags);
 +              if (ret < 0)
 +                      req_set_fail(req);
 +      } else {
                /*
 -               * Put our buffer and post a CQE. If we fail to post a CQE, then
 +               * Any successful return value will keep the multishot read
 +               * armed, if it's still set. Put our buffer and post a CQE. If
 +               * we fail to post a CQE, or multishot is no longer set, then
                 * jump to the termination path. This request is then done.
                 */
                cflags = io_put_kbuf(req, ret, issue_flags);
        return IOU_OK;
  }
  
 +static bool io_kiocb_start_write(struct io_kiocb *req, struct kiocb *kiocb)
 +{
 +      struct inode *inode;
 +      bool ret;
 +
 +      if (!(req->flags & REQ_F_ISREG))
 +              return true;
 +      if (!(kiocb->ki_flags & IOCB_NOWAIT)) {
 +              kiocb_start_write(kiocb);
 +              return true;
 +      }
 +
 +      inode = file_inode(kiocb->ki_filp);
 +      ret = sb_start_write_trylock(inode->i_sb);
 +      if (ret)
 +              __sb_writers_release(inode->i_sb, SB_FREEZE_WRITE);
 +      return ret;
 +}
 +
  int io_write(struct io_kiocb *req, unsigned int issue_flags)
  {
        bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
  
        if (force_nonblock) {
                /* If the file doesn't support async, just async punt */
 -              if (unlikely(!io_file_supports_nowait(req)))
 +              if (unlikely(!io_file_supports_nowait(req, EPOLLOUT)))
                        goto ret_eagain;
  
                /* Check if we can support NOWAIT. */
        if (unlikely(ret))
                return ret;
  
 -      if (req->flags & REQ_F_ISREG)
 -              kiocb_start_write(kiocb);
 +      if (unlikely(!io_kiocb_start_write(req, kiocb)))
 +              return -EAGAIN;
        kiocb->ki_flags |= IOCB_WRITE;
  
        if (likely(req->file->f_op->write_iter))
@@@ -1139,78 -1112,6 +1139,78 @@@ void io_rw_fail(struct io_kiocb *req
        io_req_set_res(req, res, req->cqe.flags);
  }
  
-       hrtimer_init_sleeper_on_stack(&timer, CLOCK_MONOTONIC, mode);
 +static int io_uring_classic_poll(struct io_kiocb *req, struct io_comp_batch *iob,
 +                              unsigned int poll_flags)
 +{
 +      struct file *file = req->file;
 +
 +      if (req->opcode == IORING_OP_URING_CMD) {
 +              struct io_uring_cmd *ioucmd;
 +
 +              ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
 +              return file->f_op->uring_cmd_iopoll(ioucmd, iob, poll_flags);
 +      } else {
 +              struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
 +
 +              return file->f_op->iopoll(&rw->kiocb, iob, poll_flags);
 +      }
 +}
 +
 +static u64 io_hybrid_iopoll_delay(struct io_ring_ctx *ctx, struct io_kiocb *req)
 +{
 +      struct hrtimer_sleeper timer;
 +      enum hrtimer_mode mode;
 +      ktime_t kt;
 +      u64 sleep_time;
 +
 +      if (req->flags & REQ_F_IOPOLL_STATE)
 +              return 0;
 +
 +      if (ctx->hybrid_poll_time == LLONG_MAX)
 +              return 0;
 +
 +      /* Using half the running time to do schedule */
 +      sleep_time = ctx->hybrid_poll_time / 2;
 +
 +      kt = ktime_set(0, sleep_time);
 +      req->flags |= REQ_F_IOPOLL_STATE;
 +
 +      mode = HRTIMER_MODE_REL;
++      hrtimer_setup_sleeper_on_stack(&timer, CLOCK_MONOTONIC, mode);
 +      hrtimer_set_expires(&timer.timer, kt);
 +      set_current_state(TASK_INTERRUPTIBLE);
 +      hrtimer_sleeper_start_expires(&timer, mode);
 +
 +      if (timer.task)
 +              io_schedule();
 +
 +      hrtimer_cancel(&timer.timer);
 +      __set_current_state(TASK_RUNNING);
 +      destroy_hrtimer_on_stack(&timer.timer);
 +      return sleep_time;
 +}
 +
 +static int io_uring_hybrid_poll(struct io_kiocb *req,
 +                              struct io_comp_batch *iob, unsigned int poll_flags)
 +{
 +      struct io_ring_ctx *ctx = req->ctx;
 +      u64 runtime, sleep_time;
 +      int ret;
 +
 +      sleep_time = io_hybrid_iopoll_delay(ctx, req);
 +      ret = io_uring_classic_poll(req, iob, poll_flags);
 +      runtime = ktime_get_ns() - req->iopoll_start - sleep_time;
 +
 +      /*
 +       * Use minimum sleep time if we're polling devices with different
 +       * latencies. We could get more completions from the faster ones.
 +       */
 +      if (ctx->hybrid_poll_time > runtime)
 +              ctx->hybrid_poll_time = runtime;
 +
 +      return ret;
 +}
 +
  int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin)
  {
        struct io_wq_work_node *pos, *start, *prev;
  
        wq_list_for_each(pos, start, &ctx->iopoll_list) {
                struct io_kiocb *req = container_of(pos, struct io_kiocb, comp_list);
 -              struct file *file = req->file;
                int ret;
  
                /*
                if (READ_ONCE(req->iopoll_completed))
                        break;
  
 -              if (req->opcode == IORING_OP_URING_CMD) {
 -                      struct io_uring_cmd *ioucmd;
 -
 -                      ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
 -                      ret = file->f_op->uring_cmd_iopoll(ioucmd, &iob,
 -                                                              poll_flags);
 -              } else {
 -                      struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
 +              if (ctx->flags & IORING_SETUP_HYBRID_IOPOLL)
 +                      ret = io_uring_hybrid_poll(req, &iob, poll_flags);
 +              else
 +                      ret = io_uring_classic_poll(req, &iob, poll_flags);
  
 -                      ret = file->f_op->iopoll(&rw->kiocb, &iob, poll_flags);
 -              }
                if (unlikely(ret < 0))
                        return ret;
                else if (ret)
                        poll_flags |= BLK_POLL_ONESHOT;
  
                /* iopoll may have completed current req */
 -              if (!rq_list_empty(iob.req_list) ||
 +              if (!rq_list_empty(&iob.req_list) ||
                    READ_ONCE(req->iopoll_completed))
                        break;
        }
  
 -      if (!rq_list_empty(iob.req_list))
 +      if (!rq_list_empty(&iob.req_list))
                iob.complete(&iob);
        else if (!pos)
                return 0;
diff --combined io_uring/timeout.c
index 5b12bd6a804c80d596b1fd46ed15f5b8b480b114,2ffe5e1dc68aeb92aa7bc5c4dc3ba28cafe03c09..f3d502717aebbcf991f02ee1857b6cda5f9d14d7
@@@ -76,7 -76,6 +76,6 @@@ static void io_timeout_complete(struct 
                        /* re-arm timer */
                        spin_lock_irq(&ctx->timeout_lock);
                        list_add(&timeout->list, ctx->timeout_list.prev);
-                       data->timer.function = io_timeout_fn;
                        hrtimer_start(&data->timer, timespec64_to_ktime(data->ts), data->mode);
                        spin_unlock_irq(&ctx->timeout_lock);
                        return;
@@@ -300,18 -299,16 +299,18 @@@ static void io_req_task_link_timeout(st
  {
        struct io_timeout *timeout = io_kiocb_to_cmd(req, struct io_timeout);
        struct io_kiocb *prev = timeout->prev;
 -      int ret = -ENOENT;
 +      int ret;
  
        if (prev) {
 -              if (!(req->task->flags & PF_EXITING)) {
 +              if (!io_should_terminate_tw()) {
                        struct io_cancel_data cd = {
                                .ctx            = req->ctx,
                                .data           = prev->cqe.user_data,
                        };
  
 -                      ret = io_try_cancel(req->task->io_uring, &cd, 0);
 +                      ret = io_try_cancel(req->tctx, &cd, 0);
 +              } else {
 +                      ret = -ECANCELED;
                }
                io_req_set_res(req, ret ?: -ETIME, 0);
                io_req_task_complete(req, ts);
@@@ -639,13 -636,13 +638,13 @@@ void io_queue_linked_timeout(struct io_
        io_put_req(req);
  }
  
 -static bool io_match_task(struct io_kiocb *head, struct task_struct *task,
 +static bool io_match_task(struct io_kiocb *head, struct io_uring_task *tctx,
                          bool cancel_all)
        __must_hold(&head->ctx->timeout_lock)
  {
        struct io_kiocb *req;
  
 -      if (task && head->task != task)
 +      if (tctx && head->tctx != tctx)
                return false;
        if (cancel_all)
                return true;
  }
  
  /* Returns true if we found and killed one or more timeouts */
 -__cold bool io_kill_timeouts(struct io_ring_ctx *ctx, struct task_struct *tsk,
 +__cold bool io_kill_timeouts(struct io_ring_ctx *ctx, struct io_uring_task *tctx,
                             bool cancel_all)
  {
        struct io_timeout *timeout, *tmp;
        list_for_each_entry_safe(timeout, tmp, &ctx->timeout_list, list) {
                struct io_kiocb *req = cmd_to_io_kiocb(timeout);
  
 -              if (io_match_task(req, tsk, cancel_all) &&
 +              if (io_match_task(req, tctx, cancel_all) &&
                    io_kill_timeout(req, -ECANCELED))
                        canceled++;
        }
diff --combined kernel/cpu.c
index c4aaf73dec9e606e031273a9f80583512c9c6a4c,895f3287e3f3fb2c72d15ef919edb5baee642208..6e34b52cb5ce714571bb5778648b9720df39ddb1
@@@ -1338,9 -1338,6 +1338,8 @@@ static int takedown_cpu(unsigned int cp
  
        cpuhp_bp_sync_dead(cpu);
  
-       tick_cleanup_dead_cpu(cpu);
 +      lockdep_cleanup_dead_cpu(cpu, idle_thread_get(cpu));
 +
        /*
         * Callbacks must be re-integrated right away to the RCU state machine.
         * Otherwise an RCU callback could block a further teardown function
diff --combined kernel/fork.c
index f6752b0dca97123f721b9ac4c2e3b9166b45570d,c2bd8367a850ec55f78b46877152768f90e76fd3..e58d27c057889bbcd52f63d10e8c880835e8066c
  #include <linux/rseq.h>
  #include <uapi/linux/pidfd.h>
  #include <linux/pidfs.h>
 +#include <linux/tick.h>
  
  #include <asm/pgalloc.h>
  #include <linux/uaccess.h>
@@@ -654,6 -653,11 +654,6 @@@ static __latent_entropy int dup_mmap(st
        mm->exec_vm = oldmm->exec_vm;
        mm->stack_vm = oldmm->stack_vm;
  
 -      retval = ksm_fork(mm, oldmm);
 -      if (retval)
 -              goto out;
 -      khugepaged_fork(mm, oldmm);
 -
        /* Use __mt_dup() to efficiently build an identical maple tree. */
        retval = __mt_dup(&oldmm->mm_mt, &mm->mm_mt, GFP_KERNEL);
        if (unlikely(retval))
@@@ -756,8 -760,6 +756,8 @@@ loop_out
        vma_iter_free(&vmi);
        if (!retval) {
                mt_set_in_rcu(vmi.mas.tree);
 +              ksm_fork(mm, oldmm);
 +              khugepaged_fork(mm, oldmm);
        } else if (mpnt) {
                /*
                 * The entire maple tree has already been duplicated. If the
@@@ -773,10 -775,7 +773,10 @@@ out
        mmap_write_unlock(mm);
        flush_tlb_mm(oldmm);
        mmap_write_unlock(oldmm);
 -      dup_userfaultfd_complete(&uf);
 +      if (!retval)
 +              dup_userfaultfd_complete(&uf);
 +      else
 +              dup_userfaultfd_fail(&uf);
  fail_uprobe_end:
        uprobe_end_dup_mmap();
        return retval;
@@@ -1185,7 -1184,7 +1185,7 @@@ static struct task_struct *dup_task_str
        tsk->active_memcg = NULL;
  #endif
  
 -#ifdef CONFIG_CPU_SUP_INTEL
 +#ifdef CONFIG_X86_BUS_LOCK_DETECT
        tsk->reported_split_lock = 0;
  #endif
  
@@@ -1299,7 -1298,7 +1299,7 @@@ static struct mm_struct *mm_init(struc
        if (init_new_context(p, mm))
                goto fail_nocontext;
  
 -      if (mm_alloc_cid(mm))
 +      if (mm_alloc_cid(mm, p))
                goto fail_cid;
  
        if (percpu_counter_init_many(mm->rss_stat, 0, GFP_KERNEL_ACCOUNT,
@@@ -1757,30 -1756,33 +1757,30 @@@ static int copy_files(unsigned long clo
                      int no_files)
  {
        struct files_struct *oldf, *newf;
 -      int error = 0;
  
        /*
         * A background process may not have any files ...
         */
        oldf = current->files;
        if (!oldf)
 -              goto out;
 +              return 0;
  
        if (no_files) {
                tsk->files = NULL;
 -              goto out;
 +              return 0;
        }
  
        if (clone_flags & CLONE_FILES) {
                atomic_inc(&oldf->count);
 -              goto out;
 +              return 0;
        }
  
 -      newf = dup_fd(oldf, NR_OPEN_MAX, &error);
 -      if (!newf)
 -              goto out;
 +      newf = dup_fd(oldf, NULL);
 +      if (IS_ERR(newf))
 +              return PTR_ERR(newf);
  
        tsk->files = newf;
 -      error = 0;
 -out:
 -      return error;
 +      return 0;
  }
  
  static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk)
@@@ -1862,6 -1864,7 +1862,7 @@@ static int copy_signal(unsigned long cl
  
  #ifdef CONFIG_POSIX_TIMERS
        INIT_HLIST_HEAD(&sig->posix_timers);
+       INIT_HLIST_HEAD(&sig->ignored_posix_timers);
        hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
        sig->real_timer.function = it_real_fn;
  #endif
@@@ -2293,7 -2296,6 +2294,7 @@@ __latent_entropy struct task_struct *co
        acct_clear_integrals(p);
  
        posix_cputimers_init(&p->posix_cputimers);
 +      tick_dep_init_task(p);
  
        p->io_context = NULL;
        audit_set_context(p, NULL);
@@@ -3237,16 -3239,17 +3238,16 @@@ static int unshare_fs(unsigned long uns
  /*
   * Unshare file descriptor table if it is being shared
   */
 -int unshare_fd(unsigned long unshare_flags, unsigned int max_fds,
 -             struct files_struct **new_fdp)
 +static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp)
  {
        struct files_struct *fd = current->files;
 -      int error = 0;
  
        if ((unshare_flags & CLONE_FILES) &&
            (fd && atomic_read(&fd->count) > 1)) {
 -              *new_fdp = dup_fd(fd, max_fds, &error);
 -              if (!*new_fdp)
 -                      return error;
 +              fd = dup_fd(fd, NULL);
 +              if (IS_ERR(fd))
 +                      return PTR_ERR(fd);
 +              *new_fdp = fd;
        }
  
        return 0;
@@@ -3304,7 -3307,7 +3305,7 @@@ int ksys_unshare(unsigned long unshare_
        err = unshare_fs(unshare_flags, &new_fs);
        if (err)
                goto bad_unshare_out;
 -      err = unshare_fd(unshare_flags, NR_OPEN_MAX, &new_fd);
 +      err = unshare_fd(unshare_flags, &new_fd);
        if (err)
                goto bad_unshare_cleanup_fs;
        err = unshare_userns(unshare_flags, &new_cred);
@@@ -3396,7 -3399,7 +3397,7 @@@ int unshare_files(void
        struct files_struct *old, *copy = NULL;
        int error;
  
 -      error = unshare_fd(CLONE_FILES, NR_OPEN_MAX, &copy);
 +      error = unshare_fd(CLONE_FILES, &copy);
        if (error || !copy)
                return error;
  
diff --combined kernel/futex/core.c
index 692912bf1252c43a115c39489fb0ad1af748f54c,fb7214c7a36f7377b7fd2dc8a232bc96b1d0d953..326bfe6549d75a6a4bed6e83281805314ecbbf77
@@@ -140,9 -140,9 +140,9 @@@ futex_setup_timer(ktime_t *time, struc
        if (!time)
                return NULL;
  
-       hrtimer_init_sleeper_on_stack(timeout, (flags & FLAGS_CLOCKRT) ?
-                                     CLOCK_REALTIME : CLOCK_MONOTONIC,
-                                     HRTIMER_MODE_ABS);
+       hrtimer_setup_sleeper_on_stack(timeout,
+                                      (flags & FLAGS_CLOCKRT) ? CLOCK_REALTIME : CLOCK_MONOTONIC,
+                                      HRTIMER_MODE_ABS);
        /*
         * If range_ns is 0, calling hrtimer_set_expires_range_ns() is
         * effectively the same as calling hrtimer_set_expires().
@@@ -181,12 -181,12 +181,12 @@@ static u64 get_inode_sequence_number(st
                return old;
  
        for (;;) {
 -              u64 new = atomic64_add_return(1, &i_seq);
 +              u64 new = atomic64_inc_return(&i_seq);
                if (WARN_ON_ONCE(!new))
                        continue;
  
 -              old = atomic64_cmpxchg_relaxed(&inode->i_sequence, 0, new);
 -              if (old)
 +              old = 0;
 +              if (!atomic64_try_cmpxchg_relaxed(&inode->i_sequence, &old, new))
                        return old;
                return new;
        }
diff --combined kernel/sched/idle.c
index ab911d1335ba2042294456665e1863b9fb1e6e6a,631e428029259a6f5094a7c05442c22689883687..621696269584b6bfc5b3439f52cd1f360da964b3
@@@ -271,6 -271,7 +271,6 @@@ static void do_idle(void
        tick_nohz_idle_enter();
  
        while (!need_resched()) {
 -              rmb();
  
                /*
                 * Interrupts shouldn't be re-enabled from that point on until
@@@ -398,8 -399,8 +398,8 @@@ void play_idle_precise(u64 duration_ns
        cpuidle_use_deepest_state(latency_ns);
  
        it.done = 0;
-       hrtimer_init_on_stack(&it.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
-       it.timer.function = idle_inject_timer_fn;
+       hrtimer_setup_on_stack(&it.timer, idle_inject_timer_fn, CLOCK_MONOTONIC,
+                              HRTIMER_MODE_REL_HARD);
        hrtimer_start(&it.timer, ns_to_ktime(duration_ns),
                      HRTIMER_MODE_REL_PINNED_HARD);
  
diff --combined kernel/signal.c
index 65fd233f6f23ca513bf926d7e0536948482aacd3,10b464b9d91f6f68b335978e6e132279a61d3184..98b65cb358306836525a66ea83d77c3369c672f6
@@@ -59,6 -59,8 +59,8 @@@
  #include <asm/cacheflush.h>
  #include <asm/syscall.h>      /* for syscall_get_* */
  
+ #include "time/posix-timers.h"
  /*
   * SLAB caches for signal bits.
   */
@@@ -396,16 -398,9 +398,9 @@@ void task_join_group_stop(struct task_s
        task_set_jobctl_pending(task, mask | JOBCTL_STOP_PENDING);
  }
  
- /*
-  * allocate a new signal queue record
-  * - this may be called without locks if and only if t == current, otherwise an
-  *   appropriate lock must be held to stop the target task from exiting
-  */
- static struct sigqueue *
- __sigqueue_alloc(int sig, struct task_struct *t, gfp_t gfp_flags,
-                int override_rlimit, const unsigned int sigqueue_flags)
+ static struct ucounts *sig_get_ucounts(struct task_struct *t, int sig,
+                                      int override_rlimit)
  {
-       struct sigqueue *q = NULL;
        struct ucounts *ucounts;
        long sigpending;
  
         */
        rcu_read_lock();
        ucounts = task_ucounts(t);
 -      sigpending = inc_rlimit_get_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING);
 +      sigpending = inc_rlimit_get_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING,
 +                                          override_rlimit);
        rcu_read_unlock();
        if (!sigpending)
                return NULL;
  
-       if (override_rlimit || likely(sigpending <= task_rlimit(t, RLIMIT_SIGPENDING))) {
-               q = kmem_cache_alloc(sigqueue_cachep, gfp_flags);
-       } else {
+       if (unlikely(!override_rlimit && sigpending > task_rlimit(t, RLIMIT_SIGPENDING))) {
+               dec_rlimit_put_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING);
                print_dropped_signal(sig);
+               return NULL;
        }
  
-       if (unlikely(q == NULL)) {
+       return ucounts;
+ }
+ static void __sigqueue_init(struct sigqueue *q, struct ucounts *ucounts,
+                           const unsigned int sigqueue_flags)
+ {
+       INIT_LIST_HEAD(&q->list);
+       q->flags = sigqueue_flags;
+       q->ucounts = ucounts;
+ }
+ /*
+  * allocate a new signal queue record
+  * - this may be called without locks if and only if t == current, otherwise an
+  *   appropriate lock must be held to stop the target task from exiting
+  */
+ static struct sigqueue *sigqueue_alloc(int sig, struct task_struct *t, gfp_t gfp_flags,
+                                      int override_rlimit)
+ {
+       struct ucounts *ucounts = sig_get_ucounts(t, sig, override_rlimit);
+       struct sigqueue *q;
+       if (!ucounts)
+               return NULL;
+       q = kmem_cache_alloc(sigqueue_cachep, gfp_flags);
+       if (!q) {
                dec_rlimit_put_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING);
-       } else {
-               INIT_LIST_HEAD(&q->list);
-               q->flags = sigqueue_flags;
-               q->ucounts = ucounts;
+               return NULL;
        }
+       __sigqueue_init(q, ucounts, 0);
        return q;
  }
  
  static void __sigqueue_free(struct sigqueue *q)
  {
-       if (q->flags & SIGQUEUE_PREALLOC)
+       if (q->flags & SIGQUEUE_PREALLOC) {
+               posixtimer_sigqueue_putref(q);
                return;
+       }
        if (q->ucounts) {
                dec_rlimit_put_ucounts(q->ucounts, UCOUNT_RLIMIT_SIGPENDING);
                q->ucounts = NULL;
@@@ -479,42 -500,6 +501,6 @@@ void flush_signals(struct task_struct *
  }
  EXPORT_SYMBOL(flush_signals);
  
- #ifdef CONFIG_POSIX_TIMERS
- static void __flush_itimer_signals(struct sigpending *pending)
- {
-       sigset_t signal, retain;
-       struct sigqueue *q, *n;
-       signal = pending->signal;
-       sigemptyset(&retain);
-       list_for_each_entry_safe(q, n, &pending->list, list) {
-               int sig = q->info.si_signo;
-               if (likely(q->info.si_code != SI_TIMER)) {
-                       sigaddset(&retain, sig);
-               } else {
-                       sigdelset(&signal, sig);
-                       list_del_init(&q->list);
-                       __sigqueue_free(q);
-               }
-       }
-       sigorsets(&pending->signal, &signal, &retain);
- }
- void flush_itimer_signals(void)
- {
-       struct task_struct *tsk = current;
-       unsigned long flags;
-       spin_lock_irqsave(&tsk->sighand->siglock, flags);
-       __flush_itimer_signals(&tsk->pending);
-       __flush_itimer_signals(&tsk->signal->shared_pending);
-       spin_unlock_irqrestore(&tsk->sighand->siglock, flags);
- }
- #endif
  void ignore_signals(struct task_struct *t)
  {
        int i;
@@@ -564,7 -549,7 +550,7 @@@ bool unhandled_signal(struct task_struc
  }
  
  static void collect_signal(int sig, struct sigpending *list, kernel_siginfo_t *info,
-                          bool *resched_timer)
+                          struct sigqueue **timer_sigq)
  {
        struct sigqueue *q, *first = NULL;
  
@@@ -587,12 -572,17 +573,17 @@@ still_pending
                list_del_init(&first->list);
                copy_siginfo(info, &first->info);
  
-               *resched_timer =
-                       (first->flags & SIGQUEUE_PREALLOC) &&
-                       (info->si_code == SI_TIMER) &&
-                       (info->si_sys_private);
-               __sigqueue_free(first);
+               /*
+                * posix-timer signals are preallocated and freed when the last
+                * reference count is dropped in posixtimer_deliver_signal() or
+                * immediately on timer deletion when the signal is not pending.
+                * Spare the extra round through __sigqueue_free() which is
+                * ignoring preallocated signals.
+                */
+               if (unlikely((first->flags & SIGQUEUE_PREALLOC) && (info->si_code == SI_TIMER)))
+                       *timer_sigq = first;
+               else
+                       __sigqueue_free(first);
        } else {
                /*
                 * Ok, it wasn't in the queue.  This must be
  }
  
  static int __dequeue_signal(struct sigpending *pending, sigset_t *mask,
-                       kernel_siginfo_t *info, bool *resched_timer)
+                           kernel_siginfo_t *info, struct sigqueue **timer_sigq)
  {
        int sig = next_signal(pending, mask);
  
        if (sig)
-               collect_signal(sig, pending, info, resched_timer);
+               collect_signal(sig, pending, info, timer_sigq);
        return sig;
  }
  
  int dequeue_signal(sigset_t *mask, kernel_siginfo_t *info, enum pid_type *type)
  {
        struct task_struct *tsk = current;
-       bool resched_timer = false;
+       struct sigqueue *timer_sigq;
        int signr;
  
        lockdep_assert_held(&tsk->sighand->siglock);
  
+ again:
        *type = PIDTYPE_PID;
-       signr = __dequeue_signal(&tsk->pending, mask, info, &resched_timer);
+       timer_sigq = NULL;
+       signr = __dequeue_signal(&tsk->pending, mask, info, &timer_sigq);
        if (!signr) {
                *type = PIDTYPE_TGID;
                signr = __dequeue_signal(&tsk->signal->shared_pending,
-                                        mask, info, &resched_timer);
- #ifdef CONFIG_POSIX_TIMERS
-               /*
-                * itimer signal ?
-                *
-                * itimers are process shared and we restart periodic
-                * itimers in the signal delivery path to prevent DoS
-                * attacks in the high resolution timer case. This is
-                * compliant with the old way of self-restarting
-                * itimers, as the SIGALRM is a legacy signal and only
-                * queued once. Changing the restart behaviour to
-                * restart the timer in the signal dequeue path is
-                * reducing the timer noise on heavy loaded !highres
-                * systems too.
-                */
-               if (unlikely(signr == SIGALRM)) {
-                       struct hrtimer *tmr = &tsk->signal->real_timer;
-                       if (!hrtimer_is_queued(tmr) &&
-                           tsk->signal->it_real_incr != 0) {
-                               hrtimer_forward(tmr, tmr->base->get_time(),
-                                               tsk->signal->it_real_incr);
-                               hrtimer_restart(tmr);
-                       }
-               }
- #endif
+                                        mask, info, &timer_sigq);
+               if (unlikely(signr == SIGALRM))
+                       posixtimer_rearm_itimer(tsk);
        }
  
        recalc_sigpending();
                 */
                current->jobctl |= JOBCTL_STOP_DEQUEUED;
        }
- #ifdef CONFIG_POSIX_TIMERS
-       if (resched_timer) {
-               /*
-                * Release the siglock to ensure proper locking order
-                * of timer locks outside of siglocks.  Note, we leave
-                * irqs disabled here, since the posix-timers code is
-                * about to disable them again anyway.
-                */
-               spin_unlock(&tsk->sighand->siglock);
-               posixtimer_rearm(info);
-               spin_lock(&tsk->sighand->siglock);
  
-               /* Don't expose the si_sys_private value to userspace */
-               info->si_sys_private = 0;
+       if (IS_ENABLED(CONFIG_POSIX_TIMERS) && unlikely(timer_sigq)) {
+               if (!posixtimer_deliver_signal(info, timer_sigq))
+                       goto again;
        }
- #endif
        return signr;
  }
  EXPORT_SYMBOL_GPL(dequeue_signal);
@@@ -773,17 -733,24 +734,24 @@@ void signal_wake_up_state(struct task_s
                kick_process(t);
  }
  
- /*
-  * Remove signals in mask from the pending set and queue.
-  * Returns 1 if any signals were found.
-  *
-  * All callers must be holding the siglock.
-  */
- static void flush_sigqueue_mask(sigset_t *mask, struct sigpending *s)
+ static inline void posixtimer_sig_ignore(struct task_struct *tsk, struct sigqueue *q);
+ static void sigqueue_free_ignored(struct task_struct *tsk, struct sigqueue *q)
+ {
+       if (likely(!(q->flags & SIGQUEUE_PREALLOC) || q->info.si_code != SI_TIMER))
+               __sigqueue_free(q);
+       else
+               posixtimer_sig_ignore(tsk, q);
+ }
+ /* Remove signals in mask from the pending set and queue. */
+ static void flush_sigqueue_mask(struct task_struct *p, sigset_t *mask, struct sigpending *s)
  {
        struct sigqueue *q, *n;
        sigset_t m;
  
+       lockdep_assert_held(&p->sighand->siglock);
        sigandsets(&m, mask, &s->signal);
        if (sigisemptyset(&m))
                return;
        list_for_each_entry_safe(q, n, &s->list, list) {
                if (sigismember(mask, q->info.si_signo)) {
                        list_del_init(&q->list);
-                       __sigqueue_free(q);
+                       sigqueue_free_ignored(p, q);
                }
        }
  }
@@@ -917,18 -884,18 +885,18 @@@ static bool prepare_signal(int sig, str
                 * This is a stop signal.  Remove SIGCONT from all queues.
                 */
                siginitset(&flush, sigmask(SIGCONT));
-               flush_sigqueue_mask(&flush, &signal->shared_pending);
+               flush_sigqueue_mask(p, &flush, &signal->shared_pending);
                for_each_thread(p, t)
-                       flush_sigqueue_mask(&flush, &t->pending);
+                       flush_sigqueue_mask(p, &flush, &t->pending);
        } else if (sig == SIGCONT) {
                unsigned int why;
                /*
                 * Remove all stop signals from all queues, wake all threads.
                 */
                siginitset(&flush, SIG_KERNEL_STOP_MASK);
-               flush_sigqueue_mask(&flush, &signal->shared_pending);
+               flush_sigqueue_mask(p, &flush, &signal->shared_pending);
                for_each_thread(p, t) {
-                       flush_sigqueue_mask(&flush, &t->pending);
+                       flush_sigqueue_mask(p, &flush, &t->pending);
                        task_clear_jobctl_pending(t, JOBCTL_STOP_PENDING);
                        if (likely(!(t->ptrace & PT_SEIZED))) {
                                t->jobctl &= ~JOBCTL_STOPPED;
@@@ -1115,7 -1082,7 +1083,7 @@@ static int __send_signal_locked(int sig
        else
                override_rlimit = 0;
  
-       q = __sigqueue_alloc(sig, t, GFP_ATOMIC, override_rlimit, 0);
+       q = sigqueue_alloc(sig, t, GFP_ATOMIC, override_rlimit);
  
        if (q) {
                list_add_tail(&q->list, &pending->list);
@@@ -1923,112 -1890,242 +1891,242 @@@ int kill_pid(struct pid *pid, int sig, 
  }
  EXPORT_SYMBOL(kill_pid);
  
+ #ifdef CONFIG_POSIX_TIMERS
  /*
-  * These functions support sending signals using preallocated sigqueue
-  * structures.  This is needed "because realtime applications cannot
-  * afford to lose notifications of asynchronous events, like timer
-  * expirations or I/O completions".  In the case of POSIX Timers
-  * we allocate the sigqueue structure from the timer_create.  If this
-  * allocation fails we are able to report the failure to the application
-  * with an EAGAIN error.
+  * These functions handle POSIX timer signals. POSIX timers use
+  * preallocated sigqueue structs for sending signals.
   */
- struct sigqueue *sigqueue_alloc(void)
+ static void __flush_itimer_signals(struct sigpending *pending)
  {
-       return __sigqueue_alloc(-1, current, GFP_KERNEL, 0, SIGQUEUE_PREALLOC);
+       sigset_t signal, retain;
+       struct sigqueue *q, *n;
+       signal = pending->signal;
+       sigemptyset(&retain);
+       list_for_each_entry_safe(q, n, &pending->list, list) {
+               int sig = q->info.si_signo;
+               if (likely(q->info.si_code != SI_TIMER)) {
+                       sigaddset(&retain, sig);
+               } else {
+                       sigdelset(&signal, sig);
+                       list_del_init(&q->list);
+                       __sigqueue_free(q);
+               }
+       }
+       sigorsets(&pending->signal, &signal, &retain);
  }
  
- void sigqueue_free(struct sigqueue *q)
+ void flush_itimer_signals(void)
  {
-       spinlock_t *lock = &current->sighand->siglock;
-       unsigned long flags;
+       struct task_struct *tsk = current;
  
-       if (WARN_ON_ONCE(!(q->flags & SIGQUEUE_PREALLOC)))
-               return;
-       /*
-        * We must hold ->siglock while testing q->list
-        * to serialize with collect_signal() or with
-        * __exit_signal()->flush_sigqueue().
-        */
-       spin_lock_irqsave(lock, flags);
-       q->flags &= ~SIGQUEUE_PREALLOC;
-       /*
-        * If it is queued it will be freed when dequeued,
-        * like the "regular" sigqueue.
-        */
-       if (!list_empty(&q->list))
-               q = NULL;
-       spin_unlock_irqrestore(lock, flags);
+       guard(spinlock_irqsave)(&tsk->sighand->siglock);
+       __flush_itimer_signals(&tsk->pending);
+       __flush_itimer_signals(&tsk->signal->shared_pending);
+ }
  
-       if (q)
-               __sigqueue_free(q);
+ bool posixtimer_init_sigqueue(struct sigqueue *q)
+ {
+       struct ucounts *ucounts = sig_get_ucounts(current, -1, 0);
+       if (!ucounts)
+               return false;
+       clear_siginfo(&q->info);
+       __sigqueue_init(q, ucounts, SIGQUEUE_PREALLOC);
+       return true;
  }
  
int send_sigqueue(struct sigqueue *q, struct pid *pid, enum pid_type type)
static void posixtimer_queue_sigqueue(struct sigqueue *q, struct task_struct *t, enum pid_type type)
  {
-       int sig = q->info.si_signo;
        struct sigpending *pending;
+       int sig = q->info.si_signo;
+       signalfd_notify(t, sig);
+       pending = (type != PIDTYPE_PID) ? &t->signal->shared_pending : &t->pending;
+       list_add_tail(&q->list, &pending->list);
+       sigaddset(&pending->signal, sig);
+       complete_signal(sig, t, type);
+ }
+ /*
+  * This function is used by POSIX timers to deliver a timer signal.
+  * Where type is PIDTYPE_PID (such as for timers with SIGEV_THREAD_ID
+  * set), the signal must be delivered to the specific thread (queues
+  * into t->pending).
+  *
+  * Where type is not PIDTYPE_PID, signals must be delivered to the
+  * process. In this case, prefer to deliver to current if it is in
+  * the same thread group as the target process, which avoids
+  * unnecessarily waking up a potentially idle task.
+  */
+ static inline struct task_struct *posixtimer_get_target(struct k_itimer *tmr)
+ {
+       struct task_struct *t = pid_task(tmr->it_pid, tmr->it_pid_type);
+       if (t && tmr->it_pid_type != PIDTYPE_PID && same_thread_group(t, current))
+               t = current;
+       return t;
+ }
+ void posixtimer_send_sigqueue(struct k_itimer *tmr)
+ {
+       struct sigqueue *q = &tmr->sigq;
+       int sig = q->info.si_signo;
        struct task_struct *t;
        unsigned long flags;
-       int ret, result;
+       int result;
  
-       if (WARN_ON_ONCE(!(q->flags & SIGQUEUE_PREALLOC)))
-               return 0;
-       if (WARN_ON_ONCE(q->info.si_code != SI_TIMER))
-               return 0;
+       guard(rcu)();
  
-       ret = -1;
-       rcu_read_lock();
+       t = posixtimer_get_target(tmr);
+       if (!t)
+               return;
+       if (!likely(lock_task_sighand(t, &flags)))
+               return;
  
        /*
-        * This function is used by POSIX timers to deliver a timer signal.
-        * Where type is PIDTYPE_PID (such as for timers with SIGEV_THREAD_ID
-        * set), the signal must be delivered to the specific thread (queues
-        * into t->pending).
-        *
-        * Where type is not PIDTYPE_PID, signals must be delivered to the
-        * process. In this case, prefer to deliver to current if it is in
-        * the same thread group as the target process, which avoids
-        * unnecessarily waking up a potentially idle task.
+        * Update @tmr::sigqueue_seq for posix timer signals with sighand
+        * locked to prevent a race against dequeue_signal().
         */
-       t = pid_task(pid, type);
-       if (!t)
-               goto ret;
-       if (type != PIDTYPE_PID && same_thread_group(t, current))
-               t = current;
-       if (!likely(lock_task_sighand(t, &flags)))
-               goto ret;
+       tmr->it_sigqueue_seq = tmr->it_signal_seq;
  
-       ret = 1; /* the signal is ignored */
-       result = TRACE_SIGNAL_IGNORED;
-       if (!prepare_signal(sig, t, false))
+       /*
+        * Set the signal delivery status under sighand lock, so that the
+        * ignored signal handling can distinguish between a periodic and a
+        * non-periodic timer.
+        */
+       tmr->it_sig_periodic = tmr->it_status == POSIX_TIMER_REQUEUE_PENDING;
+       if (!prepare_signal(sig, t, false)) {
+               result = TRACE_SIGNAL_IGNORED;
+               if (!list_empty(&q->list)) {
+                       /*
+                        * If task group is exiting with the signal already pending,
+                        * wait for __exit_signal() to do its job. Otherwise if
+                        * ignored, it's not supposed to be queued. Try to survive.
+                        */
+                       WARN_ON_ONCE(!(t->signal->flags & SIGNAL_GROUP_EXIT));
+                       goto out;
+               }
+               /* Periodic timers with SIG_IGN are queued on the ignored list */
+               if (tmr->it_sig_periodic) {
+                       /*
+                        * Already queued means the timer was rearmed after
+                        * the previous expiry got it on the ignore list.
+                        * Nothing to do for that case.
+                        */
+                       if (hlist_unhashed(&tmr->ignored_list)) {
+                               /*
+                                * Take a signal reference and queue it on
+                                * the ignored list.
+                                */
+                               posixtimer_sigqueue_getref(q);
+                               posixtimer_sig_ignore(t, q);
+                       }
+               } else if (!hlist_unhashed(&tmr->ignored_list)) {
+                       /*
+                        * Covers the case where a timer was periodic and
+                        * then the signal was ignored. Later it was rearmed
+                        * as oneshot timer. The previous signal is invalid
+                        * now, and this oneshot signal has to be dropped.
+                        * Remove it from the ignored list and drop the
+                        * reference count as the signal is not longer
+                        * queued.
+                        */
+                       hlist_del_init(&tmr->ignored_list);
+                       posixtimer_putref(tmr);
+               }
                goto out;
+       }
+       /* This should never happen and leaks a reference count */
+       if (WARN_ON_ONCE(!hlist_unhashed(&tmr->ignored_list)))
+               hlist_del_init(&tmr->ignored_list);
  
-       ret = 0;
        if (unlikely(!list_empty(&q->list))) {
-               /*
-                * If an SI_TIMER entry is already queue just increment
-                * the overrun count.
-                */
-               q->info.si_overrun++;
+               /* This holds a reference count already */
                result = TRACE_SIGNAL_ALREADY_PENDING;
                goto out;
        }
-       q->info.si_overrun = 0;
  
-       signalfd_notify(t, sig);
-       pending = (type != PIDTYPE_PID) ? &t->signal->shared_pending : &t->pending;
-       list_add_tail(&q->list, &pending->list);
-       sigaddset(&pending->signal, sig);
-       complete_signal(sig, t, type);
+       posixtimer_sigqueue_getref(q);
+       posixtimer_queue_sigqueue(q, t, tmr->it_pid_type);
        result = TRACE_SIGNAL_DELIVERED;
  out:
-       trace_signal_generate(sig, &q->info, t, type != PIDTYPE_PID, result);
+       trace_signal_generate(sig, &q->info, t, tmr->it_pid_type != PIDTYPE_PID, result);
        unlock_task_sighand(t, &flags);
- ret:
-       rcu_read_unlock();
-       return ret;
  }
  
+ static inline void posixtimer_sig_ignore(struct task_struct *tsk, struct sigqueue *q)
+ {
+       struct k_itimer *tmr = container_of(q, struct k_itimer, sigq);
+       /*
+        * If the timer is marked deleted already or the signal originates
+        * from a non-periodic timer, then just drop the reference
+        * count. Otherwise queue it on the ignored list.
+        */
+       if (tmr->it_signal && tmr->it_sig_periodic)
+               hlist_add_head(&tmr->ignored_list, &tsk->signal->ignored_posix_timers);
+       else
+               posixtimer_putref(tmr);
+ }
+ static void posixtimer_sig_unignore(struct task_struct *tsk, int sig)
+ {
+       struct hlist_head *head = &tsk->signal->ignored_posix_timers;
+       struct hlist_node *tmp;
+       struct k_itimer *tmr;
+       if (likely(hlist_empty(head)))
+               return;
+       /*
+        * Rearming a timer with sighand lock held is not possible due to
+        * lock ordering vs. tmr::it_lock. Just stick the sigqueue back and
+        * let the signal delivery path deal with it whether it needs to be
+        * rearmed or not. This cannot be decided here w/o dropping sighand
+        * lock and creating a loop retry horror show.
+        */
+       hlist_for_each_entry_safe(tmr, tmp , head, ignored_list) {
+               struct task_struct *target;
+               /*
+                * tmr::sigq.info.si_signo is immutable, so accessing it
+                * without holding tmr::it_lock is safe.
+                */
+               if (tmr->sigq.info.si_signo != sig)
+                       continue;
+               hlist_del_init(&tmr->ignored_list);
+               /* This should never happen and leaks a reference count */
+               if (WARN_ON_ONCE(!list_empty(&tmr->sigq.list)))
+                       continue;
+               /*
+                * Get the target for the signal. If target is a thread and
+                * has exited by now, drop the reference count.
+                */
+               guard(rcu)();
+               target = posixtimer_get_target(tmr);
+               if (target)
+                       posixtimer_queue_sigqueue(&tmr->sigq, target, tmr->it_pid_type);
+               else
+                       posixtimer_putref(tmr);
+       }
+ }
+ #else /* CONFIG_POSIX_TIMERS */
+ static inline void posixtimer_sig_ignore(struct task_struct *tsk, struct sigqueue *q) { }
+ static inline void posixtimer_sig_unignore(struct task_struct *tsk, int sig) { }
+ #endif /* !CONFIG_POSIX_TIMERS */
  void do_notify_pidfd(struct task_struct *task)
  {
        struct pid *pid = task_pid(task);
@@@ -3909,6 -4006,7 +4007,6 @@@ SYSCALL_DEFINE4(pidfd_send_signal, int
                siginfo_t __user *, info, unsigned int, flags)
  {
        int ret;
 -      struct fd f;
        struct pid *pid;
        kernel_siginfo_t kinfo;
        enum pid_type type;
        if (hweight32(flags & PIDFD_SEND_SIGNAL_FLAGS) > 1)
                return -EINVAL;
  
 -      f = fdget(pidfd);
 -      if (!fd_file(f))
 +      CLASS(fd, f)(pidfd);
 +      if (fd_empty(f))
                return -EBADF;
  
        /* Is this a pidfd? */
        pid = pidfd_to_pid(fd_file(f));
 -      if (IS_ERR(pid)) {
 -              ret = PTR_ERR(pid);
 -              goto err;
 -      }
 +      if (IS_ERR(pid))
 +              return PTR_ERR(pid);
  
 -      ret = -EINVAL;
        if (!access_pidfd_pidns(pid))
 -              goto err;
 +              return -EINVAL;
  
        switch (flags) {
        case 0:
        if (info) {
                ret = copy_siginfo_from_user_any(&kinfo, info);
                if (unlikely(ret))
 -                      goto err;
 +                      return ret;
  
 -              ret = -EINVAL;
                if (unlikely(sig != kinfo.si_signo))
 -                      goto err;
 +                      return -EINVAL;
  
                /* Only allow sending arbitrary signals to yourself. */
 -              ret = -EPERM;
                if ((task_pid(current) != pid || type > PIDTYPE_TGID) &&
                    (kinfo.si_code >= 0 || kinfo.si_code == SI_TKILL))
 -                      goto err;
 +                      return -EPERM;
        } else {
                prepare_kill_siginfo(sig, &kinfo, type);
        }
  
        if (type == PIDTYPE_PGID)
 -              ret = kill_pgrp_info(sig, &kinfo, pid);
 +              return kill_pgrp_info(sig, &kinfo, pid);
        else
 -              ret = kill_pid_info_type(sig, &kinfo, pid, type);
 -err:
 -      fdput(f);
 -      return ret;
 +              return kill_pid_info_type(sig, &kinfo, pid, type);
  }
  
  static int
@@@ -4145,8 -4251,8 +4243,8 @@@ void kernel_sigaction(int sig, __sighan
                sigemptyset(&mask);
                sigaddset(&mask, sig);
  
-               flush_sigqueue_mask(&mask, &current->signal->shared_pending);
-               flush_sigqueue_mask(&mask, &current->pending);
+               flush_sigqueue_mask(current, &mask, &current->signal->shared_pending);
+               flush_sigqueue_mask(current, &mask, &current->pending);
                recalc_sigpending();
        }
        spin_unlock_irq(&current->sighand->siglock);
@@@ -4196,6 -4302,8 +4294,8 @@@ int do_sigaction(int sig, struct k_siga
        sigaction_compat_abi(act, oact);
  
        if (act) {
+               bool was_ignored = k->sa.sa_handler == SIG_IGN;
                sigdelsetmask(&act->sa.sa_mask,
                              sigmask(SIGKILL) | sigmask(SIGSTOP));
                *k = *act;
                if (sig_handler_ignored(sig_handler(p, sig), sig)) {
                        sigemptyset(&mask);
                        sigaddset(&mask, sig);
-                       flush_sigqueue_mask(&mask, &p->signal->shared_pending);
+                       flush_sigqueue_mask(p, &mask, &p->signal->shared_pending);
                        for_each_thread(p, t)
-                               flush_sigqueue_mask(&mask, &t->pending);
+                               flush_sigqueue_mask(p, &mask, &t->pending);
+               } else if (was_ignored) {
+                       posixtimer_sig_unignore(p, sig);
                }
        }
  
diff --combined kernel/time/hrtimer.c
index d9911516e7431ff414a3c9a7ef04ee53deebf81d,55e9ffbcd49ad466c0f02279e6e624e15f58613d..80fe3749d2db1053e7d87d0bfd5e9067273f71f2
@@@ -417,6 -417,11 +417,11 @@@ static inline void debug_hrtimer_init(s
        debug_object_init(timer, &hrtimer_debug_descr);
  }
  
+ static inline void debug_hrtimer_init_on_stack(struct hrtimer *timer)
+ {
+       debug_object_init_on_stack(timer, &hrtimer_debug_descr);
+ }
  static inline void debug_hrtimer_activate(struct hrtimer *timer,
                                          enum hrtimer_mode mode)
  {
@@@ -428,28 -433,6 +433,6 @@@ static inline void debug_hrtimer_deacti
        debug_object_deactivate(timer, &hrtimer_debug_descr);
  }
  
- static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
-                          enum hrtimer_mode mode);
- void hrtimer_init_on_stack(struct hrtimer *timer, clockid_t clock_id,
-                          enum hrtimer_mode mode)
- {
-       debug_object_init_on_stack(timer, &hrtimer_debug_descr);
-       __hrtimer_init(timer, clock_id, mode);
- }
- EXPORT_SYMBOL_GPL(hrtimer_init_on_stack);
- static void __hrtimer_init_sleeper(struct hrtimer_sleeper *sl,
-                                  clockid_t clock_id, enum hrtimer_mode mode);
- void hrtimer_init_sleeper_on_stack(struct hrtimer_sleeper *sl,
-                                  clockid_t clock_id, enum hrtimer_mode mode)
- {
-       debug_object_init_on_stack(&sl->timer, &hrtimer_debug_descr);
-       __hrtimer_init_sleeper(sl, clock_id, mode);
- }
- EXPORT_SYMBOL_GPL(hrtimer_init_sleeper_on_stack);
  void destroy_hrtimer_on_stack(struct hrtimer *timer)
  {
        debug_object_free(timer, &hrtimer_debug_descr);
@@@ -459,6 -442,7 +442,7 @@@ EXPORT_SYMBOL_GPL(destroy_hrtimer_on_st
  #else
  
  static inline void debug_hrtimer_init(struct hrtimer *timer) { }
+ static inline void debug_hrtimer_init_on_stack(struct hrtimer *timer) { }
  static inline void debug_hrtimer_activate(struct hrtimer *timer,
                                          enum hrtimer_mode mode) { }
  static inline void debug_hrtimer_deactivate(struct hrtimer *timer) { }
@@@ -472,6 -456,13 +456,13 @@@ debug_init(struct hrtimer *timer, clock
        trace_hrtimer_init(timer, clockid, mode);
  }
  
+ static inline void debug_init_on_stack(struct hrtimer *timer, clockid_t clockid,
+                                      enum hrtimer_mode mode)
+ {
+       debug_hrtimer_init_on_stack(timer);
+       trace_hrtimer_init(timer, clockid, mode);
+ }
  static inline void debug_activate(struct hrtimer *timer,
                                  enum hrtimer_mode mode)
  {
@@@ -1544,6 -1535,11 +1535,11 @@@ static inline int hrtimer_clockid_to_ba
        return HRTIMER_BASE_MONOTONIC;
  }
  
+ static enum hrtimer_restart hrtimer_dummy_timeout(struct hrtimer *unused)
+ {
+       return HRTIMER_NORESTART;
+ }
  static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
                           enum hrtimer_mode mode)
  {
        timerqueue_init(&timer->node);
  }
  
+ static void __hrtimer_setup(struct hrtimer *timer,
+                           enum hrtimer_restart (*function)(struct hrtimer *),
+                           clockid_t clock_id, enum hrtimer_mode mode)
+ {
+       __hrtimer_init(timer, clock_id, mode);
+       if (WARN_ON_ONCE(!function))
+               timer->function = hrtimer_dummy_timeout;
+       else
+               timer->function = function;
+ }
  /**
   * hrtimer_init - initialize a timer to the given clock
   * @timer:    the timer to be initialized
@@@ -1600,6 -1608,46 +1608,46 @@@ void hrtimer_init(struct hrtimer *timer
  }
  EXPORT_SYMBOL_GPL(hrtimer_init);
  
+ /**
+  * hrtimer_setup - initialize a timer to the given clock
+  * @timer:    the timer to be initialized
+  * @function: the callback function
+  * @clock_id: the clock to be used
+  * @mode:       The modes which are relevant for initialization:
+  *              HRTIMER_MODE_ABS, HRTIMER_MODE_REL, HRTIMER_MODE_ABS_SOFT,
+  *              HRTIMER_MODE_REL_SOFT
+  *
+  *              The PINNED variants of the above can be handed in,
+  *              but the PINNED bit is ignored as pinning happens
+  *              when the hrtimer is started
+  */
+ void hrtimer_setup(struct hrtimer *timer, enum hrtimer_restart (*function)(struct hrtimer *),
+                  clockid_t clock_id, enum hrtimer_mode mode)
+ {
+       debug_init(timer, clock_id, mode);
+       __hrtimer_setup(timer, function, clock_id, mode);
+ }
+ EXPORT_SYMBOL_GPL(hrtimer_setup);
+ /**
+  * hrtimer_setup_on_stack - initialize a timer on stack memory
+  * @timer:    The timer to be initialized
+  * @function: the callback function
+  * @clock_id: The clock to be used
+  * @mode:       The timer mode
+  *
+  * Similar to hrtimer_setup(), except that this one must be used if struct hrtimer is in stack
+  * memory.
+  */
+ void hrtimer_setup_on_stack(struct hrtimer *timer,
+                           enum hrtimer_restart (*function)(struct hrtimer *),
+                           clockid_t clock_id, enum hrtimer_mode mode)
+ {
+       debug_init_on_stack(timer, clock_id, mode);
+       __hrtimer_setup(timer, function, clock_id, mode);
+ }
+ EXPORT_SYMBOL_GPL(hrtimer_setup_on_stack);
  /*
   * A timer is active, when it is enqueued into the rbtree or the
   * callback function is running or it's in the state of being migrated
@@@ -1811,7 -1859,7 +1859,7 @@@ retry
        if (!ktime_before(now, cpu_base->softirq_expires_next)) {
                cpu_base->softirq_expires_next = KTIME_MAX;
                cpu_base->softirq_activated = 1;
 -              raise_softirq_irqoff(HRTIMER_SOFTIRQ);
 +              raise_timer_softirq(HRTIMER_SOFTIRQ);
        }
  
        __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD);
@@@ -1906,7 -1954,7 +1954,7 @@@ void hrtimer_run_queues(void
        if (!ktime_before(now, cpu_base->softirq_expires_next)) {
                cpu_base->softirq_expires_next = KTIME_MAX;
                cpu_base->softirq_activated = 1;
 -              raise_softirq_irqoff(HRTIMER_SOFTIRQ);
 +              raise_timer_softirq(HRTIMER_SOFTIRQ);
        }
  
        __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD);
@@@ -1944,7 -1992,7 +1992,7 @@@ void hrtimer_sleeper_start_expires(stru
         * Make the enqueue delivery mode check work on RT. If the sleeper
         * was initialized for hard interrupt delivery, force the mode bit.
         * This is a special case for hrtimer_sleepers because
-        * hrtimer_init_sleeper() determines the delivery mode on RT so the
+        * __hrtimer_init_sleeper() determines the delivery mode on RT so the
         * fiddling with this decision is avoided at the call sites.
         */
        if (IS_ENABLED(CONFIG_PREEMPT_RT) && sl->timer.is_hard)
@@@ -1987,19 -2035,18 +2035,18 @@@ static void __hrtimer_init_sleeper(stru
  }
  
  /**
-  * hrtimer_init_sleeper - initialize sleeper to the given clock
+  * hrtimer_setup_sleeper_on_stack - initialize a sleeper in stack memory
   * @sl:               sleeper to be initialized
   * @clock_id: the clock to be used
   * @mode:     timer mode abs/rel
   */
- void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, clockid_t clock_id,
-                         enum hrtimer_mode mode)
+ void hrtimer_setup_sleeper_on_stack(struct hrtimer_sleeper *sl,
+                                   clockid_t clock_id, enum hrtimer_mode mode)
  {
-       debug_init(&sl->timer, clock_id, mode);
+       debug_init_on_stack(&sl->timer, clock_id, mode);
        __hrtimer_init_sleeper(sl, clock_id, mode);
  }
- EXPORT_SYMBOL_GPL(hrtimer_init_sleeper);
+ EXPORT_SYMBOL_GPL(hrtimer_setup_sleeper_on_stack);
  
  int nanosleep_copyout(struct restart_block *restart, struct timespec64 *ts)
  {
@@@ -2060,8 -2107,7 +2107,7 @@@ static long __sched hrtimer_nanosleep_r
        struct hrtimer_sleeper t;
        int ret;
  
-       hrtimer_init_sleeper_on_stack(&t, restart->nanosleep.clockid,
-                                     HRTIMER_MODE_ABS);
+       hrtimer_setup_sleeper_on_stack(&t, restart->nanosleep.clockid, HRTIMER_MODE_ABS);
        hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires);
        ret = do_nanosleep(&t, HRTIMER_MODE_ABS);
        destroy_hrtimer_on_stack(&t.timer);
@@@ -2075,7 -2121,7 +2121,7 @@@ long hrtimer_nanosleep(ktime_t rqtp, co
        struct hrtimer_sleeper t;
        int ret = 0;
  
-       hrtimer_init_sleeper_on_stack(&t, clockid, mode);
+       hrtimer_setup_sleeper_on_stack(&t, clockid, mode);
        hrtimer_set_expires_range_ns(&t.timer, rqtp, current->timer_slack_ns);
        ret = do_nanosleep(&t, mode);
        if (ret != -ERESTART_RESTARTBLOCK)
@@@ -2242,123 -2288,3 +2288,3 @@@ void __init hrtimers_init(void
        hrtimers_prepare_cpu(smp_processor_id());
        open_softirq(HRTIMER_SOFTIRQ, hrtimer_run_softirq);
  }
- /**
-  * schedule_hrtimeout_range_clock - sleep until timeout
-  * @expires:  timeout value (ktime_t)
-  * @delta:    slack in expires timeout (ktime_t)
-  * @mode:     timer mode
-  * @clock_id: timer clock to be used
-  */
- int __sched
- schedule_hrtimeout_range_clock(ktime_t *expires, u64 delta,
-                              const enum hrtimer_mode mode, clockid_t clock_id)
- {
-       struct hrtimer_sleeper t;
-       /*
-        * Optimize when a zero timeout value is given. It does not
-        * matter whether this is an absolute or a relative time.
-        */
-       if (expires && *expires == 0) {
-               __set_current_state(TASK_RUNNING);
-               return 0;
-       }
-       /*
-        * A NULL parameter means "infinite"
-        */
-       if (!expires) {
-               schedule();
-               return -EINTR;
-       }
-       hrtimer_init_sleeper_on_stack(&t, clock_id, mode);
-       hrtimer_set_expires_range_ns(&t.timer, *expires, delta);
-       hrtimer_sleeper_start_expires(&t, mode);
-       if (likely(t.task))
-               schedule();
-       hrtimer_cancel(&t.timer);
-       destroy_hrtimer_on_stack(&t.timer);
-       __set_current_state(TASK_RUNNING);
-       return !t.task ? 0 : -EINTR;
- }
- EXPORT_SYMBOL_GPL(schedule_hrtimeout_range_clock);
- /**
-  * schedule_hrtimeout_range - sleep until timeout
-  * @expires:  timeout value (ktime_t)
-  * @delta:    slack in expires timeout (ktime_t)
-  * @mode:     timer mode
-  *
-  * Make the current task sleep until the given expiry time has
-  * elapsed. The routine will return immediately unless
-  * the current task state has been set (see set_current_state()).
-  *
-  * The @delta argument gives the kernel the freedom to schedule the
-  * actual wakeup to a time that is both power and performance friendly
-  * for regular (non RT/DL) tasks.
-  * The kernel give the normal best effort behavior for "@expires+@delta",
-  * but may decide to fire the timer earlier, but no earlier than @expires.
-  *
-  * You can set the task state as follows -
-  *
-  * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to
-  * pass before the routine returns unless the current task is explicitly
-  * woken up, (e.g. by wake_up_process()).
-  *
-  * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
-  * delivered to the current task or the current task is explicitly woken
-  * up.
-  *
-  * The current task state is guaranteed to be TASK_RUNNING when this
-  * routine returns.
-  *
-  * Returns 0 when the timer has expired. If the task was woken before the
-  * timer expired by a signal (only possible in state TASK_INTERRUPTIBLE) or
-  * by an explicit wakeup, it returns -EINTR.
-  */
- int __sched schedule_hrtimeout_range(ktime_t *expires, u64 delta,
-                                    const enum hrtimer_mode mode)
- {
-       return schedule_hrtimeout_range_clock(expires, delta, mode,
-                                             CLOCK_MONOTONIC);
- }
- EXPORT_SYMBOL_GPL(schedule_hrtimeout_range);
- /**
-  * schedule_hrtimeout - sleep until timeout
-  * @expires:  timeout value (ktime_t)
-  * @mode:     timer mode
-  *
-  * Make the current task sleep until the given expiry time has
-  * elapsed. The routine will return immediately unless
-  * the current task state has been set (see set_current_state()).
-  *
-  * You can set the task state as follows -
-  *
-  * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to
-  * pass before the routine returns unless the current task is explicitly
-  * woken up, (e.g. by wake_up_process()).
-  *
-  * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
-  * delivered to the current task or the current task is explicitly woken
-  * up.
-  *
-  * The current task state is guaranteed to be TASK_RUNNING when this
-  * routine returns.
-  *
-  * Returns 0 when the timer has expired. If the task was woken before the
-  * timer expired by a signal (only possible in state TASK_INTERRUPTIBLE) or
-  * by an explicit wakeup, it returns -EINTR.
-  */
- int __sched schedule_hrtimeout(ktime_t *expires,
-                              const enum hrtimer_mode mode)
- {
-       return schedule_hrtimeout_range(expires, 0, mode);
- }
- EXPORT_SYMBOL_GPL(schedule_hrtimeout);
diff --combined kernel/time/tick-sched.c
index e0c47259e91a7b54323ed5c2efc877efa3f5de9d,9f90c7333b1dcb10ad95fc311498826ff366e9a0..fa058510af9c1a17d75833a5b7317dce59cd2754
@@@ -311,14 -311,6 +311,6 @@@ static enum hrtimer_restart tick_nohz_h
        return HRTIMER_RESTART;
  }
  
- static void tick_sched_timer_cancel(struct tick_sched *ts)
- {
-       if (tick_sched_flag_test(ts, TS_FLAG_HIGHRES))
-               hrtimer_cancel(&ts->sched_timer);
-       else if (tick_sched_flag_test(ts, TS_FLAG_NOHZ))
-               tick_program_event(KTIME_MAX, 1);
- }
  #ifdef CONFIG_NO_HZ_FULL
  cpumask_var_t tick_nohz_full_mask;
  EXPORT_SYMBOL_GPL(tick_nohz_full_mask);
@@@ -434,12 -426,6 +426,12 @@@ static void tick_nohz_kick_task(struct 
         *   smp_mb__after_spin_lock()
         *   tick_nohz_task_switch()
         *     LOAD p->tick_dep_mask
 +       *
 +       * XXX given a task picks up the dependency on schedule(), should we
 +       * only care about tasks that are currently on the CPU instead of all
 +       * that are on the runqueue?
 +       *
 +       * That is, does this want to be: task_on_cpu() / task_curr()?
         */
        if (!sched_task_on_rq(tsk))
                return;
@@@ -865,7 -851,7 +857,7 @@@ static void tick_nohz_restart(struct ti
  
  static inline bool local_timer_softirq_pending(void)
  {
 -      return local_softirq_pending() & BIT(TIMER_SOFTIRQ);
 +      return local_timers_pending() & BIT(TIMER_SOFTIRQ);
  }
  
  /*
@@@ -1061,7 -1047,10 +1053,10 @@@ static void tick_nohz_stop_tick(struct 
         * the tick timer.
         */
        if (unlikely(expires == KTIME_MAX)) {
-               tick_sched_timer_cancel(ts);
+               if (tick_sched_flag_test(ts, TS_FLAG_HIGHRES))
+                       hrtimer_cancel(&ts->sched_timer);
+               else
+                       tick_program_event(KTIME_MAX, 1);
                return;
        }
  
@@@ -1610,21 -1599,13 +1605,13 @@@ void tick_setup_sched_timer(bool hrtime
   */
  void tick_sched_timer_dying(int cpu)
  {
-       struct tick_device *td = &per_cpu(tick_cpu_device, cpu);
        struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
-       struct clock_event_device *dev = td->evtdev;
        ktime_t idle_sleeptime, iowait_sleeptime;
        unsigned long idle_calls, idle_sleeps;
  
        /* This must happen before hrtimers are migrated! */
-       tick_sched_timer_cancel(ts);
-       /*
-        * If the clockevents doesn't support CLOCK_EVT_STATE_ONESHOT_STOPPED,
-        * make sure not to call low-res tick handler.
-        */
-       if (tick_sched_flag_test(ts, TS_FLAG_NOHZ))
-               dev->event_handler = clockevents_handle_noop;
+       if (tick_sched_flag_test(ts, TS_FLAG_HIGHRES))
+               hrtimer_cancel(&ts->sched_timer);
  
        idle_sleeptime = ts->idle_sleeptime;
        iowait_sleeptime = ts->iowait_sleeptime;
index cdd61990cd14ad0430783f00160cfdec759e5cdb,d115adebc418c77837a6096a784e75906a080fab..0ca85ff4fbb4aeff74b310d188dc78426fc5004d
@@@ -30,8 -30,9 +30,9 @@@
  #include "timekeeping_internal.h"
  
  #define TK_CLEAR_NTP          (1 << 0)
- #define TK_MIRROR             (1 << 1)
- #define TK_CLOCK_WAS_SET      (1 << 2)
+ #define TK_CLOCK_WAS_SET      (1 << 1)
+ #define TK_UPDATE_ALL         (TK_CLEAR_NTP | TK_CLOCK_WAS_SET)
  
  enum timekeeping_adv_mode {
        /* Update timekeeper when a tick has passed */
        TK_ADV_FREQ
  };
  
- DEFINE_RAW_SPINLOCK(timekeeper_lock);
  /*
   * The most important data for readout fits into a single 64 byte
   * cache line.
   */
- static struct {
+ struct tk_data {
        seqcount_raw_spinlock_t seq;
        struct timekeeper       timekeeper;
- } tk_core ____cacheline_aligned = {
-       .seq = SEQCNT_RAW_SPINLOCK_ZERO(tk_core.seq, &timekeeper_lock),
- };
+       struct timekeeper       shadow_timekeeper;
+       raw_spinlock_t          lock;
+ } ____cacheline_aligned;
  
- static struct timekeeper shadow_timekeeper;
+ static struct tk_data tk_core;
  
  /* flag for if timekeeping is suspended */
  int __read_mostly timekeeping_suspended;
@@@ -114,6 -113,19 +113,19 @@@ static struct tk_fast tk_fast_raw  ____
        .base[1] = FAST_TK_INIT,
  };
  
+ unsigned long timekeeper_lock_irqsave(void)
+ {
+       unsigned long flags;
+       raw_spin_lock_irqsave(&tk_core.lock, flags);
+       return flags;
+ }
+ void timekeeper_unlock_irqrestore(unsigned long flags)
+ {
+       raw_spin_unlock_irqrestore(&tk_core.lock, flags);
+ }
  /*
   * Multigrain timestamps require tracking the latest fine-grained timestamp
   * that has been issued, and never returning a coarse-grained timestamp that is
@@@ -178,13 -190,15 +190,15 @@@ static void tk_set_wall_to_mono(struct 
        WARN_ON_ONCE(tk->offs_real != timespec64_to_ktime(tmp));
        tk->wall_to_monotonic = wtm;
        set_normalized_timespec64(&tmp, -wtm.tv_sec, -wtm.tv_nsec);
-       tk->offs_real = timespec64_to_ktime(tmp);
-       tk->offs_tai = ktime_add(tk->offs_real, ktime_set(tk->tai_offset, 0));
+       /* Paired with READ_ONCE() in ktime_mono_to_any() */
+       WRITE_ONCE(tk->offs_real, timespec64_to_ktime(tmp));
+       WRITE_ONCE(tk->offs_tai, ktime_add(tk->offs_real, ktime_set(tk->tai_offset, 0)));
  }
  
  static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta)
  {
-       tk->offs_boot = ktime_add(tk->offs_boot, delta);
+       /* Paired with READ_ONCE() in ktime_mono_to_any() */
+       WRITE_ONCE(tk->offs_boot, ktime_add(tk->offs_boot, delta));
        /*
         * Timespec representation for VDSO update to avoid 64bit division
         * on every update.
   * the tkr's clocksource may change between the read reference, and the
   * clock reference passed to the read function.  This can cause crashes if
   * the wrong clocksource is passed to the wrong read function.
-  * This isn't necessary to use when holding the timekeeper_lock or doing
+  * This isn't necessary to use when holding the tk_core.lock or doing
   * a read of the fast-timekeeper tkrs (which is protected by its own locking
   * and update logic).
   */
@@@ -212,97 -226,6 +226,6 @@@ static inline u64 tk_clock_read(const s
        return clock->read(clock);
  }
  
- #ifdef CONFIG_DEBUG_TIMEKEEPING
- #define WARNING_FREQ (HZ*300) /* 5 minute rate-limiting */
- static void timekeeping_check_update(struct timekeeper *tk, u64 offset)
- {
-       u64 max_cycles = tk->tkr_mono.clock->max_cycles;
-       const char *name = tk->tkr_mono.clock->name;
-       if (offset > max_cycles) {
-               printk_deferred("WARNING: timekeeping: Cycle offset (%lld) is larger than allowed by the '%s' clock's max_cycles value (%lld): time overflow danger\n",
-                               offset, name, max_cycles);
-               printk_deferred("         timekeeping: Your kernel is sick, but tries to cope by capping time updates\n");
-       } else {
-               if (offset > (max_cycles >> 1)) {
-                       printk_deferred("INFO: timekeeping: Cycle offset (%lld) is larger than the '%s' clock's 50%% safety margin (%lld)\n",
-                                       offset, name, max_cycles >> 1);
-                       printk_deferred("      timekeeping: Your kernel is still fine, but is feeling a bit nervous\n");
-               }
-       }
-       if (tk->underflow_seen) {
-               if (jiffies - tk->last_warning > WARNING_FREQ) {
-                       printk_deferred("WARNING: Underflow in clocksource '%s' observed, time update ignored.\n", name);
-                       printk_deferred("         Please report this, consider using a different clocksource, if possible.\n");
-                       printk_deferred("         Your kernel is probably still fine.\n");
-                       tk->last_warning = jiffies;
-               }
-               tk->underflow_seen = 0;
-       }
-       if (tk->overflow_seen) {
-               if (jiffies - tk->last_warning > WARNING_FREQ) {
-                       printk_deferred("WARNING: Overflow in clocksource '%s' observed, time update capped.\n", name);
-                       printk_deferred("         Please report this, consider using a different clocksource, if possible.\n");
-                       printk_deferred("         Your kernel is probably still fine.\n");
-                       tk->last_warning = jiffies;
-               }
-               tk->overflow_seen = 0;
-       }
- }
- static inline u64 timekeeping_cycles_to_ns(const struct tk_read_base *tkr, u64 cycles);
- static inline u64 timekeeping_debug_get_ns(const struct tk_read_base *tkr)
- {
-       struct timekeeper *tk = &tk_core.timekeeper;
-       u64 now, last, mask, max, delta;
-       unsigned int seq;
-       /*
-        * Since we're called holding a seqcount, the data may shift
-        * under us while we're doing the calculation. This can cause
-        * false positives, since we'd note a problem but throw the
-        * results away. So nest another seqcount here to atomically
-        * grab the points we are checking with.
-        */
-       do {
-               seq = read_seqcount_begin(&tk_core.seq);
-               now = tk_clock_read(tkr);
-               last = tkr->cycle_last;
-               mask = tkr->mask;
-               max = tkr->clock->max_cycles;
-       } while (read_seqcount_retry(&tk_core.seq, seq));
-       delta = clocksource_delta(now, last, mask);
-       /*
-        * Try to catch underflows by checking if we are seeing small
-        * mask-relative negative values.
-        */
-       if (unlikely((~delta & mask) < (mask >> 3)))
-               tk->underflow_seen = 1;
-       /* Check for multiplication overflows */
-       if (unlikely(delta > max))
-               tk->overflow_seen = 1;
-       /* timekeeping_cycles_to_ns() handles both under and overflow */
-       return timekeeping_cycles_to_ns(tkr, now);
- }
- #else
- static inline void timekeeping_check_update(struct timekeeper *tk, u64 offset)
- {
- }
- static inline u64 timekeeping_debug_get_ns(const struct tk_read_base *tkr)
- {
-       BUG();
- }
- #endif
  /**
   * tk_setup_internals - Set up internals to use clocksource clock.
   *
@@@ -407,19 -330,11 +330,11 @@@ static inline u64 timekeeping_cycles_to
        return ((delta * tkr->mult) + tkr->xtime_nsec) >> tkr->shift;
  }
  
- static __always_inline u64 __timekeeping_get_ns(const struct tk_read_base *tkr)
+ static __always_inline u64 timekeeping_get_ns(const struct tk_read_base *tkr)
  {
        return timekeeping_cycles_to_ns(tkr, tk_clock_read(tkr));
  }
  
- static inline u64 timekeeping_get_ns(const struct tk_read_base *tkr)
- {
-       if (IS_ENABLED(CONFIG_DEBUG_TIMEKEEPING))
-               return timekeeping_debug_get_ns(tkr);
-       return __timekeeping_get_ns(tkr);
- }
  /**
   * update_fast_timekeeper - Update the fast and NMI safe monotonic timekeeper.
   * @tkr: Timekeeping readout base from which we take the update
   * We want to use this from any context including NMI and tracing /
   * instrumenting the timekeeping code itself.
   *
 - * Employ the latch technique; see @raw_write_seqcount_latch.
 + * Employ the latch technique; see @write_seqcount_latch.
   *
   * So if a NMI hits the update of base[0] then it will use base[1]
   * which is still consistent. In the worst case this can result is a
@@@ -441,18 -356,16 +356,18 @@@ static void update_fast_timekeeper(cons
        struct tk_read_base *base = tkf->base;
  
        /* Force readers off to base[1] */
 -      raw_write_seqcount_latch(&tkf->seq);
 +      write_seqcount_latch_begin(&tkf->seq);
  
        /* Update base[0] */
        memcpy(base, tkr, sizeof(*base));
  
        /* Force readers back to base[0] */
 -      raw_write_seqcount_latch(&tkf->seq);
 +      write_seqcount_latch(&tkf->seq);
  
        /* Update base[1] */
        memcpy(base + 1, base, sizeof(*base));
 +
 +      write_seqcount_latch_end(&tkf->seq);
  }
  
  static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf)
        u64 now;
  
        do {
 -              seq = raw_read_seqcount_latch(&tkf->seq);
 +              seq = read_seqcount_latch(&tkf->seq);
                tkr = tkf->base + (seq & 0x01);
                now = ktime_to_ns(tkr->base);
-               now += __timekeeping_get_ns(tkr);
+               now += timekeeping_get_ns(tkr);
 -      } while (raw_read_seqcount_latch_retry(&tkf->seq, seq));
 +      } while (read_seqcount_latch_retry(&tkf->seq, seq));
  
        return now;
  }
@@@ -536,7 -449,7 +451,7 @@@ EXPORT_SYMBOL_GPL(ktime_get_raw_fast_ns
   *    timekeeping_inject_sleeptime64()
   *    __timekeeping_inject_sleeptime(tk, delta);
   *                                                 timestamp();
-  *    timekeeping_update(tk, TK_CLEAR_NTP...);
+  *    timekeeping_update_staged(tkd, TK_CLEAR_NTP...);
   *
   * (2) On 32-bit systems, the 64-bit boot offset (tk->offs_boot) may be
   * partially updated.  Since the tk->offs_boot update is a rare event, this
@@@ -581,7 -494,7 +496,7 @@@ static __always_inline u64 __ktime_get_
                tkr = tkf->base + (seq & 0x01);
                basem = ktime_to_ns(tkr->base);
                baser = ktime_to_ns(tkr->base_real);
-               delta = __timekeeping_get_ns(tkr);
+               delta = timekeeping_get_ns(tkr);
        } while (raw_read_seqcount_latch_retry(&tkf->seq, seq));
  
        if (mono)
@@@ -695,13 -608,11 +610,11 @@@ static void update_pvclock_gtod(struct 
  int pvclock_gtod_register_notifier(struct notifier_block *nb)
  {
        struct timekeeper *tk = &tk_core.timekeeper;
-       unsigned long flags;
        int ret;
  
-       raw_spin_lock_irqsave(&timekeeper_lock, flags);
+       guard(raw_spinlock_irqsave)(&tk_core.lock);
        ret = raw_notifier_chain_register(&pvclock_gtod_chain, nb);
        update_pvclock_gtod(tk, true);
-       raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
  
        return ret;
  }
@@@ -714,14 -625,8 +627,8 @@@ EXPORT_SYMBOL_GPL(pvclock_gtod_register
   */
  int pvclock_gtod_unregister_notifier(struct notifier_block *nb)
  {
-       unsigned long flags;
-       int ret;
-       raw_spin_lock_irqsave(&timekeeper_lock, flags);
-       ret = raw_notifier_chain_unregister(&pvclock_gtod_chain, nb);
-       raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
-       return ret;
+       guard(raw_spinlock_irqsave)(&tk_core.lock);
+       return raw_notifier_chain_unregister(&pvclock_gtod_chain, nb);
  }
  EXPORT_SYMBOL_GPL(pvclock_gtod_unregister_notifier);
  
@@@ -736,6 -641,18 +643,18 @@@ static inline void tk_update_leap_state
                tk->next_leap_ktime = ktime_sub(tk->next_leap_ktime, tk->offs_real);
  }
  
+ /*
+  * Leap state update for both shadow and the real timekeeper
+  * Separate to spare a full memcpy() of the timekeeper.
+  */
+ static void tk_update_leap_state_all(struct tk_data *tkd)
+ {
+       write_seqcount_begin(&tkd->seq);
+       tk_update_leap_state(&tkd->shadow_timekeeper);
+       tkd->timekeeper.next_leap_ktime = tkd->shadow_timekeeper.next_leap_ktime;
+       write_seqcount_end(&tkd->seq);
+ }
  /*
   * Update the ktime_t based scalar nsec members of the timekeeper
   */
@@@ -769,9 -686,30 +688,30 @@@ static inline void tk_update_ktime_data
        tk->tkr_raw.base = ns_to_ktime(tk->raw_sec * NSEC_PER_SEC);
  }
  
- /* must hold timekeeper_lock */
- static void timekeeping_update(struct timekeeper *tk, unsigned int action)
+ /*
+  * Restore the shadow timekeeper from the real timekeeper.
+  */
+ static void timekeeping_restore_shadow(struct tk_data *tkd)
+ {
+       lockdep_assert_held(&tkd->lock);
+       memcpy(&tkd->shadow_timekeeper, &tkd->timekeeper, sizeof(tkd->timekeeper));
+ }
+ static void timekeeping_update_from_shadow(struct tk_data *tkd, unsigned int action)
  {
+       struct timekeeper *tk = &tk_core.shadow_timekeeper;
+       lockdep_assert_held(&tkd->lock);
+       /*
+        * Block out readers before running the updates below because that
+        * updates VDSO and other time related infrastructure. Not blocking
+        * the readers might let a reader see time going backwards when
+        * reading from the VDSO after the VDSO update and then reading in
+        * the kernel from the timekeeper before that got updated.
+        */
+       write_seqcount_begin(&tkd->seq);
        if (action & TK_CLEAR_NTP) {
                tk->ntp_error = 0;
                ntp_clear();
  
        if (action & TK_CLOCK_WAS_SET)
                tk->clock_was_set_seq++;
        /*
-        * The mirroring of the data to the shadow-timekeeper needs
-        * to happen last here to ensure we don't over-write the
-        * timekeeper structure on the next update with stale data
+        * Update the real timekeeper.
+        *
+        * We could avoid this memcpy() by switching pointers, but that has
+        * the downside that the reader side does not longer benefit from
+        * the cacheline optimized data layout of the timekeeper and requires
+        * another indirection.
         */
-       if (action & TK_MIRROR)
-               memcpy(&shadow_timekeeper, &tk_core.timekeeper,
-                      sizeof(tk_core.timekeeper));
+       memcpy(&tkd->timekeeper, tk, sizeof(*tk));
+       write_seqcount_end(&tkd->seq);
  }
  
  /**
@@@ -949,6 -890,14 +892,14 @@@ ktime_t ktime_mono_to_any(ktime_t tmono
        unsigned int seq;
        ktime_t tconv;
  
+       if (IS_ENABLED(CONFIG_64BIT)) {
+               /*
+                * Paired with WRITE_ONCE()s in tk_set_wall_to_mono() and
+                * tk_update_sleep_time().
+                */
+               return ktime_add(tmono, READ_ONCE(*offset));
+       }
        do {
                seq = read_seqcount_begin(&tk_core.seq);
                tconv = ktime_add(tmono, *offset);
@@@ -1079,6 -1028,7 +1030,7 @@@ void ktime_get_snapshot(struct system_t
        unsigned int seq;
        ktime_t base_raw;
        ktime_t base_real;
+       ktime_t base_boot;
        u64 nsec_raw;
        u64 nsec_real;
        u64 now;
                systime_snapshot->clock_was_set_seq = tk->clock_was_set_seq;
                base_real = ktime_add(tk->tkr_mono.base,
                                      tk_core.timekeeper.offs_real);
+               base_boot = ktime_add(tk->tkr_mono.base,
+                                     tk_core.timekeeper.offs_boot);
                base_raw = tk->tkr_raw.base;
                nsec_real = timekeeping_cycles_to_ns(&tk->tkr_mono, now);
                nsec_raw  = timekeeping_cycles_to_ns(&tk->tkr_raw, now);
  
        systime_snapshot->cycles = now;
        systime_snapshot->real = ktime_add_ns(base_real, nsec_real);
+       systime_snapshot->boot = ktime_add_ns(base_boot, nsec_real);
        systime_snapshot->raw = ktime_add_ns(base_raw, nsec_raw);
  }
  EXPORT_SYMBOL_GPL(ktime_get_snapshot);
@@@ -1459,45 -1412,35 +1414,35 @@@ EXPORT_SYMBOL_GPL(timekeeping_clocksour
   */
  int do_settimeofday64(const struct timespec64 *ts)
  {
-       struct timekeeper *tk = &tk_core.timekeeper;
        struct timespec64 ts_delta, xt;
-       unsigned long flags;
-       int ret = 0;
  
        if (!timespec64_valid_settod(ts))
                return -EINVAL;
  
-       raw_spin_lock_irqsave(&timekeeper_lock, flags);
-       write_seqcount_begin(&tk_core.seq);
-       timekeeping_forward_now(tk);
+       scoped_guard (raw_spinlock_irqsave, &tk_core.lock) {
+               struct timekeeper *tks = &tk_core.shadow_timekeeper;
  
-       xt = tk_xtime(tk);
-       ts_delta = timespec64_sub(*ts, xt);
+               timekeeping_forward_now(tks);
  
-       if (timespec64_compare(&tk->wall_to_monotonic, &ts_delta) > 0) {
-               ret = -EINVAL;
-               goto out;
-       }
-       tk_set_wall_to_mono(tk, timespec64_sub(tk->wall_to_monotonic, ts_delta));
+               xt = tk_xtime(tks);
+               ts_delta = timespec64_sub(*ts, xt);
  
-       tk_set_xtime(tk, ts);
- out:
-       timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET);
+               if (timespec64_compare(&tks->wall_to_monotonic, &ts_delta) > 0) {
+                       timekeeping_restore_shadow(&tk_core);
+                       return -EINVAL;
+               }
  
-       write_seqcount_end(&tk_core.seq);
-       raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
+               tk_set_wall_to_mono(tks, timespec64_sub(tks->wall_to_monotonic, ts_delta));
+               tk_set_xtime(tks, ts);
+               timekeeping_update_from_shadow(&tk_core, TK_UPDATE_ALL);
+       }
  
        /* Signal hrtimers about time change */
        clock_was_set(CLOCK_SET_WALL);
  
-       if (!ret) {
-               audit_tk_injoffset(ts_delta);
-               add_device_randomness(ts, sizeof(*ts));
-       }
-       return ret;
+       audit_tk_injoffset(ts_delta);
+       add_device_randomness(ts, sizeof(*ts));
+       return 0;
  }
  EXPORT_SYMBOL(do_settimeofday64);
  
   */
  static int timekeeping_inject_offset(const struct timespec64 *ts)
  {
-       struct timekeeper *tk = &tk_core.timekeeper;
-       unsigned long flags;
-       struct timespec64 tmp;
-       int ret = 0;
        if (ts->tv_nsec < 0 || ts->tv_nsec >= NSEC_PER_SEC)
                return -EINVAL;
  
-       raw_spin_lock_irqsave(&timekeeper_lock, flags);
-       write_seqcount_begin(&tk_core.seq);
-       timekeeping_forward_now(tk);
-       /* Make sure the proposed value is valid */
-       tmp = timespec64_add(tk_xtime(tk), *ts);
-       if (timespec64_compare(&tk->wall_to_monotonic, ts) > 0 ||
-           !timespec64_valid_settod(&tmp)) {
-               ret = -EINVAL;
-               goto error;
-       }
+       scoped_guard (raw_spinlock_irqsave, &tk_core.lock) {
+               struct timekeeper *tks = &tk_core.shadow_timekeeper;
+               struct timespec64 tmp;
  
-       tk_xtime_add(tk, ts);
-       tk_set_wall_to_mono(tk, timespec64_sub(tk->wall_to_monotonic, *ts));
+               timekeeping_forward_now(tks);
  
- error: /* even if we error out, we forwarded the time, so call update */
-       timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET);
+               /* Make sure the proposed value is valid */
+               tmp = timespec64_add(tk_xtime(tks), *ts);
+               if (timespec64_compare(&tks->wall_to_monotonic, ts) > 0 ||
+                   !timespec64_valid_settod(&tmp)) {
+                       timekeeping_restore_shadow(&tk_core);
+                       return -EINVAL;
+               }
  
-       write_seqcount_end(&tk_core.seq);
-       raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
+               tk_xtime_add(tks, ts);
+               tk_set_wall_to_mono(tks, timespec64_sub(tks->wall_to_monotonic, *ts));
+               timekeeping_update_from_shadow(&tk_core, TK_UPDATE_ALL);
+       }
  
        /* Signal hrtimers about time change */
        clock_was_set(CLOCK_SET_WALL);
-       return ret;
+       return 0;
  }
  
  /*
@@@ -1595,43 -1529,34 +1531,34 @@@ static void __timekeeping_set_tai_offse
   */
  static int change_clocksource(void *data)
  {
-       struct timekeeper *tk = &tk_core.timekeeper;
-       struct clocksource *new, *old = NULL;
-       unsigned long flags;
-       bool change = false;
-       new = (struct clocksource *) data;
+       struct clocksource *new = data, *old = NULL;
  
        /*
-        * If the cs is in module, get a module reference. Succeeds
-        * for built-in code (owner == NULL) as well.
+        * If the clocksource is in a module, get a module reference.
+        * Succeeds for built-in code (owner == NULL) as well. Abort if the
+        * reference can't be acquired.
         */
-       if (try_module_get(new->owner)) {
-               if (!new->enable || new->enable(new) == 0)
-                       change = true;
-               else
-                       module_put(new->owner);
-       }
-       raw_spin_lock_irqsave(&timekeeper_lock, flags);
-       write_seqcount_begin(&tk_core.seq);
-       timekeeping_forward_now(tk);
+       if (!try_module_get(new->owner))
+               return 0;
  
-       if (change) {
-               old = tk->tkr_mono.clock;
-               tk_setup_internals(tk, new);
+       /* Abort if the device can't be enabled */
+       if (new->enable && new->enable(new) != 0) {
+               module_put(new->owner);
+               return 0;
        }
  
-       timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET);
+       scoped_guard (raw_spinlock_irqsave, &tk_core.lock) {
+               struct timekeeper *tks = &tk_core.shadow_timekeeper;
  
-       write_seqcount_end(&tk_core.seq);
-       raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
+               timekeeping_forward_now(tks);
+               old = tks->tkr_mono.clock;
+               tk_setup_internals(tks, new);
+               timekeeping_update_from_shadow(&tk_core, TK_UPDATE_ALL);
+       }
  
        if (old) {
                if (old->disable)
                        old->disable(old);
                module_put(old->owner);
        }
  
@@@ -1756,6 -1681,12 +1683,12 @@@ read_persistent_wall_and_boot_offset(st
        *boot_offset = ns_to_timespec64(local_clock());
  }
  
+ static __init void tkd_basic_setup(struct tk_data *tkd)
+ {
+       raw_spin_lock_init(&tkd->lock);
+       seqcount_raw_spinlock_init(&tkd->seq, &tkd->lock);
+ }
  /*
   * Flag reflecting whether timekeeping_resume() has injected sleeptime.
   *
@@@ -1780,9 -1711,10 +1713,10 @@@ static bool persistent_clock_exists
  void __init timekeeping_init(void)
  {
        struct timespec64 wall_time, boot_offset, wall_to_mono;
-       struct timekeeper *tk = &tk_core.timekeeper;
+       struct timekeeper *tks = &tk_core.shadow_timekeeper;
        struct clocksource *clock;
-       unsigned long flags;
+       tkd_basic_setup(&tk_core);
  
        read_persistent_wall_and_boot_offset(&wall_time, &boot_offset);
        if (timespec64_valid_settod(&wall_time) &&
         */
        wall_to_mono = timespec64_sub(boot_offset, wall_time);
  
-       raw_spin_lock_irqsave(&timekeeper_lock, flags);
-       write_seqcount_begin(&tk_core.seq);
+       guard(raw_spinlock_irqsave)(&tk_core.lock);
        ntp_init();
  
        clock = clocksource_default_clock();
        if (clock->enable)
                clock->enable(clock);
-       tk_setup_internals(tk, clock);
-       tk_set_xtime(tk, &wall_time);
-       tk->raw_sec = 0;
+       tk_setup_internals(tks, clock);
  
-       tk_set_wall_to_mono(tk, wall_to_mono);
+       tk_set_xtime(tks, &wall_time);
+       tks->raw_sec = 0;
  
-       timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET);
+       tk_set_wall_to_mono(tks, wall_to_mono);
  
-       write_seqcount_end(&tk_core.seq);
-       raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
+       timekeeping_update_from_shadow(&tk_core, TK_CLOCK_WAS_SET);
  }
  
  /* time in seconds when suspend began for persistent clock */
@@@ -1897,22 -1826,14 +1828,14 @@@ bool timekeeping_rtc_skipsuspend(void
   */
  void timekeeping_inject_sleeptime64(const struct timespec64 *delta)
  {
-       struct timekeeper *tk = &tk_core.timekeeper;
-       unsigned long flags;
+       scoped_guard(raw_spinlock_irqsave, &tk_core.lock) {
+               struct timekeeper *tks = &tk_core.shadow_timekeeper;
  
-       raw_spin_lock_irqsave(&timekeeper_lock, flags);
-       write_seqcount_begin(&tk_core.seq);
-       suspend_timing_needed = false;
-       timekeeping_forward_now(tk);
-       __timekeeping_inject_sleeptime(tk, delta);
-       timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET);
-       write_seqcount_end(&tk_core.seq);
-       raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
+               suspend_timing_needed = false;
+               timekeeping_forward_now(tks);
+               __timekeeping_inject_sleeptime(tks, delta);
+               timekeeping_update_from_shadow(&tk_core, TK_UPDATE_ALL);
+       }
  
        /* Signal hrtimers about time change */
        clock_was_set(CLOCK_SET_WALL | CLOCK_SET_BOOT);
   */
  void timekeeping_resume(void)
  {
-       struct timekeeper *tk = &tk_core.timekeeper;
-       struct clocksource *clock = tk->tkr_mono.clock;
-       unsigned long flags;
+       struct timekeeper *tks = &tk_core.shadow_timekeeper;
+       struct clocksource *clock = tks->tkr_mono.clock;
        struct timespec64 ts_new, ts_delta;
-       u64 cycle_now, nsec;
        bool inject_sleeptime = false;
+       u64 cycle_now, nsec;
+       unsigned long flags;
  
        read_persistent_clock64(&ts_new);
  
        clockevents_resume();
        clocksource_resume();
  
-       raw_spin_lock_irqsave(&timekeeper_lock, flags);
-       write_seqcount_begin(&tk_core.seq);
+       raw_spin_lock_irqsave(&tk_core.lock, flags);
  
        /*
         * After system resumes, we need to calculate the suspended time and
         * The less preferred source will only be tried if there is no better
         * usable source. The rtc part is handled separately in rtc core code.
         */
-       cycle_now = tk_clock_read(&tk->tkr_mono);
+       cycle_now = tk_clock_read(&tks->tkr_mono);
        nsec = clocksource_stop_suspend_timing(clock, cycle_now);
        if (nsec > 0) {
                ts_delta = ns_to_timespec64(nsec);
  
        if (inject_sleeptime) {
                suspend_timing_needed = false;
-               __timekeeping_inject_sleeptime(tk, &ts_delta);
+               __timekeeping_inject_sleeptime(tks, &ts_delta);
        }
  
        /* Re-base the last cycle value */
-       tk->tkr_mono.cycle_last = cycle_now;
-       tk->tkr_raw.cycle_last  = cycle_now;
+       tks->tkr_mono.cycle_last = cycle_now;
+       tks->tkr_raw.cycle_last  = cycle_now;
  
-       tk->ntp_error = 0;
+       tks->ntp_error = 0;
        timekeeping_suspended = 0;
-       timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET);
-       write_seqcount_end(&tk_core.seq);
-       raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
+       timekeeping_update_from_shadow(&tk_core, TK_CLOCK_WAS_SET);
+       raw_spin_unlock_irqrestore(&tk_core.lock, flags);
  
        touch_softlockup_watchdog();
  
  
  int timekeeping_suspend(void)
  {
-       struct timekeeper *tk = &tk_core.timekeeper;
-       unsigned long flags;
-       struct timespec64               delta, delta_delta;
-       static struct timespec64        old_delta;
+       struct timekeeper *tks = &tk_core.shadow_timekeeper;
+       struct timespec64 delta, delta_delta;
+       static struct timespec64 old_delta;
        struct clocksource *curr_clock;
+       unsigned long flags;
        u64 cycle_now;
  
        read_persistent_clock64(&timekeeping_suspend_time);
  
        suspend_timing_needed = true;
  
-       raw_spin_lock_irqsave(&timekeeper_lock, flags);
-       write_seqcount_begin(&tk_core.seq);
-       timekeeping_forward_now(tk);
+       raw_spin_lock_irqsave(&tk_core.lock, flags);
+       timekeeping_forward_now(tks);
        timekeeping_suspended = 1;
  
        /*
         * just read from the current clocksource. Save this to potentially
         * use in suspend timing.
         */
-       curr_clock = tk->tkr_mono.clock;
-       cycle_now = tk->tkr_mono.cycle_last;
+       curr_clock = tks->tkr_mono.clock;
+       cycle_now = tks->tkr_mono.cycle_last;
        clocksource_start_suspend_timing(curr_clock, cycle_now);
  
        if (persistent_clock_exists) {
                 * try to compensate so the difference in system time
                 * and persistent_clock time stays close to constant.
                 */
-               delta = timespec64_sub(tk_xtime(tk), timekeeping_suspend_time);
+               delta = timespec64_sub(tk_xtime(tks), timekeeping_suspend_time);
                delta_delta = timespec64_sub(delta, old_delta);
                if (abs(delta_delta.tv_sec) >= 2) {
                        /*
                }
        }
  
-       timekeeping_update(tk, TK_MIRROR);
-       halt_fast_timekeeper(tk);
-       write_seqcount_end(&tk_core.seq);
-       raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
+       timekeeping_update_from_shadow(&tk_core, 0);
+       halt_fast_timekeeper(tks);
+       raw_spin_unlock_irqrestore(&tk_core.lock, flags);
  
        tick_suspend();
        clocksource_suspend();
@@@ -2149,16 -2066,17 +2068,17 @@@ static __always_inline void timekeeping
   */
  static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
  {
+       u64 ntp_tl = ntp_tick_length();
        u32 mult;
  
        /*
         * Determine the multiplier from the current NTP tick length.
         * Avoid expensive division when the tick length doesn't change.
         */
-       if (likely(tk->ntp_tick == ntp_tick_length())) {
+       if (likely(tk->ntp_tick == ntp_tl)) {
                mult = tk->tkr_mono.mult - tk->ntp_err_mult;
        } else {
-               tk->ntp_tick = ntp_tick_length();
+               tk->ntp_tick = ntp_tl;
                mult = div64_u64((tk->ntp_tick >> tk->ntp_error_shift) -
                                 tk->xtime_remainder, tk->cycle_interval);
        }
@@@ -2297,28 -2215,24 +2217,24 @@@ static u64 logarithmic_accumulation(str
   */
  static bool timekeeping_advance(enum timekeeping_adv_mode mode)
  {
+       struct timekeeper *tk = &tk_core.shadow_timekeeper;
        struct timekeeper *real_tk = &tk_core.timekeeper;
-       struct timekeeper *tk = &shadow_timekeeper;
-       u64 offset;
-       int shift = 0, maxshift;
        unsigned int clock_set = 0;
-       unsigned long flags;
+       int shift = 0, maxshift;
+       u64 offset;
  
-       raw_spin_lock_irqsave(&timekeeper_lock, flags);
+       guard(raw_spinlock_irqsave)(&tk_core.lock);
  
        /* Make sure we're fully resumed: */
        if (unlikely(timekeeping_suspended))
-               goto out;
+               return false;
  
        offset = clocksource_delta(tk_clock_read(&tk->tkr_mono),
                                   tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
  
        /* Check if there's really nothing to do */
        if (offset < real_tk->cycle_interval && mode == TK_ADV_TICK)
-               goto out;
-       /* Do some additional sanity checking */
-       timekeeping_check_update(tk, offset);
+               return false;
  
        /*
         * With NO_HZ we may have to accumulate many cycle_intervals
        maxshift = (64 - (ilog2(ntp_tick_length())+1)) - 1;
        shift = min(shift, maxshift);
        while (offset >= tk->cycle_interval) {
-               offset = logarithmic_accumulation(tk, offset, shift,
-                                                       &clock_set);
+               offset = logarithmic_accumulation(tk, offset, shift, &clock_set);
                if (offset < tk->cycle_interval<<shift)
                        shift--;
        }
         */
        clock_set |= accumulate_nsecs_to_secs(tk);
  
-       write_seqcount_begin(&tk_core.seq);
-       /*
-        * Update the real timekeeper.
-        *
-        * We could avoid this memcpy by switching pointers, but that
-        * requires changes to all other timekeeper usage sites as
-        * well, i.e. move the timekeeper pointer getter into the
-        * spinlocked/seqcount protected sections. And we trade this
-        * memcpy under the tk_core.seq against one before we start
-        * updating.
-        */
-       timekeeping_update(tk, clock_set);
-       memcpy(real_tk, tk, sizeof(*tk));
-       /* The memcpy must come last. Do not put anything here! */
-       write_seqcount_end(&tk_core.seq);
- out:
-       raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
+       timekeeping_update_from_shadow(&tk_core, clock_set);
  
        return !!clock_set;
  }
@@@ -2658,13 -2555,10 +2557,10 @@@ EXPORT_SYMBOL_GPL(random_get_entropy_fa
   */
  int do_adjtimex(struct __kernel_timex *txc)
  {
-       struct timekeeper *tk = &tk_core.timekeeper;
        struct audit_ntp_data ad;
        bool offset_set = false;
        bool clock_set = false;
        struct timespec64 ts;
-       unsigned long flags;
-       s32 orig_tai, tai;
        int ret;
  
        /* Validate the data before disabling interrupts */
  
        if (txc->modes & ADJ_SETOFFSET) {
                struct timespec64 delta;
                delta.tv_sec  = txc->time.tv_sec;
                delta.tv_nsec = txc->time.tv_usec;
                if (!(txc->modes & ADJ_NANO))
        ktime_get_real_ts64(&ts);
        add_device_randomness(&ts, sizeof(ts));
  
-       raw_spin_lock_irqsave(&timekeeper_lock, flags);
-       write_seqcount_begin(&tk_core.seq);
+       scoped_guard (raw_spinlock_irqsave, &tk_core.lock) {
+               struct timekeeper *tks = &tk_core.shadow_timekeeper;
+               s32 orig_tai, tai;
  
-       orig_tai = tai = tk->tai_offset;
-       ret = __do_adjtimex(txc, &ts, &tai, &ad);
+               orig_tai = tai = tks->tai_offset;
+               ret = __do_adjtimex(txc, &ts, &tai, &ad);
  
-       if (tai != orig_tai) {
-               __timekeeping_set_tai_offset(tk, tai);
-               timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET);
-               clock_set = true;
+               if (tai != orig_tai) {
+                       __timekeeping_set_tai_offset(tks, tai);
+                       timekeeping_update_from_shadow(&tk_core, TK_CLOCK_WAS_SET);
+                       clock_set = true;
+               } else {
+                       tk_update_leap_state_all(&tk_core);
+               }
        }
-       tk_update_leap_state(tk);
-       write_seqcount_end(&tk_core.seq);
-       raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
  
        audit_ntp_log(&ad);
  
   */
  void hardpps(const struct timespec64 *phase_ts, const struct timespec64 *raw_ts)
  {
-       unsigned long flags;
-       raw_spin_lock_irqsave(&timekeeper_lock, flags);
-       write_seqcount_begin(&tk_core.seq);
+       guard(raw_spinlock_irqsave)(&tk_core.lock);
        __hardpps(phase_ts, raw_ts);
-       write_seqcount_end(&tk_core.seq);
-       raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
  }
  EXPORT_SYMBOL(hardpps);
  #endif /* CONFIG_NTP_PPS */
diff --combined kernel/time/timer.c
index 06f0bc1db6d9a294cbf43e530a9e583ae7854fab,a283e524835dfbb1b17214af6f45e65f946ec72d..a5860bf6d16f97f5457a89c6e481ec8963c20c3f
@@@ -37,7 -37,6 +37,6 @@@
  #include <linux/tick.h>
  #include <linux/kallsyms.h>
  #include <linux/irq_work.h>
- #include <linux/sched/signal.h>
  #include <linux/sched/sysctl.h>
  #include <linux/sched/nohz.h>
  #include <linux/sched/debug.h>
@@@ -2422,7 -2421,8 +2421,8 @@@ static inline void __run_timers(struct 
  
  static void __run_timer_base(struct timer_base *base)
  {
-       if (time_before(jiffies, base->next_expiry))
+       /* Can race against a remote CPU updating next_expiry under the lock */
+       if (time_before(jiffies, READ_ONCE(base->next_expiry)))
                return;
  
        timer_base_lock_expiry(base);
@@@ -2499,7 -2499,7 +2499,7 @@@ static void run_local_timers(void
                 */
                if (time_after_eq(jiffies, READ_ONCE(base->next_expiry)) ||
                    (i == BASE_DEF && tmigr_requires_handle_remote())) {
 -                      raise_softirq(TIMER_SOFTIRQ);
 +                      raise_timer_softirq(TIMER_SOFTIRQ);
                        return;
                }
        }
@@@ -2526,141 -2526,6 +2526,6 @@@ void update_process_times(int user_tick
                run_posix_cpu_timers();
  }
  
- /*
-  * Since schedule_timeout()'s timer is defined on the stack, it must store
-  * the target task on the stack as well.
-  */
- struct process_timer {
-       struct timer_list timer;
-       struct task_struct *task;
- };
- static void process_timeout(struct timer_list *t)
- {
-       struct process_timer *timeout = from_timer(timeout, t, timer);
-       wake_up_process(timeout->task);
- }
- /**
-  * schedule_timeout - sleep until timeout
-  * @timeout: timeout value in jiffies
-  *
-  * Make the current task sleep until @timeout jiffies have elapsed.
-  * The function behavior depends on the current task state
-  * (see also set_current_state() description):
-  *
-  * %TASK_RUNNING - the scheduler is called, but the task does not sleep
-  * at all. That happens because sched_submit_work() does nothing for
-  * tasks in %TASK_RUNNING state.
-  *
-  * %TASK_UNINTERRUPTIBLE - at least @timeout jiffies are guaranteed to
-  * pass before the routine returns unless the current task is explicitly
-  * woken up, (e.g. by wake_up_process()).
-  *
-  * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
-  * delivered to the current task or the current task is explicitly woken
-  * up.
-  *
-  * The current task state is guaranteed to be %TASK_RUNNING when this
-  * routine returns.
-  *
-  * Specifying a @timeout value of %MAX_SCHEDULE_TIMEOUT will schedule
-  * the CPU away without a bound on the timeout. In this case the return
-  * value will be %MAX_SCHEDULE_TIMEOUT.
-  *
-  * Returns 0 when the timer has expired otherwise the remaining time in
-  * jiffies will be returned. In all cases the return value is guaranteed
-  * to be non-negative.
-  */
- signed long __sched schedule_timeout(signed long timeout)
- {
-       struct process_timer timer;
-       unsigned long expire;
-       switch (timeout)
-       {
-       case MAX_SCHEDULE_TIMEOUT:
-               /*
-                * These two special cases are useful to be comfortable
-                * in the caller. Nothing more. We could take
-                * MAX_SCHEDULE_TIMEOUT from one of the negative value
-                * but I' d like to return a valid offset (>=0) to allow
-                * the caller to do everything it want with the retval.
-                */
-               schedule();
-               goto out;
-       default:
-               /*
-                * Another bit of PARANOID. Note that the retval will be
-                * 0 since no piece of kernel is supposed to do a check
-                * for a negative retval of schedule_timeout() (since it
-                * should never happens anyway). You just have the printk()
-                * that will tell you if something is gone wrong and where.
-                */
-               if (timeout < 0) {
-                       printk(KERN_ERR "schedule_timeout: wrong timeout "
-                               "value %lx\n", timeout);
-                       dump_stack();
-                       __set_current_state(TASK_RUNNING);
-                       goto out;
-               }
-       }
-       expire = timeout + jiffies;
-       timer.task = current;
-       timer_setup_on_stack(&timer.timer, process_timeout, 0);
-       __mod_timer(&timer.timer, expire, MOD_TIMER_NOTPENDING);
-       schedule();
-       del_timer_sync(&timer.timer);
-       /* Remove the timer from the object tracker */
-       destroy_timer_on_stack(&timer.timer);
-       timeout = expire - jiffies;
-  out:
-       return timeout < 0 ? 0 : timeout;
- }
- EXPORT_SYMBOL(schedule_timeout);
- /*
-  * We can use __set_current_state() here because schedule_timeout() calls
-  * schedule() unconditionally.
-  */
- signed long __sched schedule_timeout_interruptible(signed long timeout)
- {
-       __set_current_state(TASK_INTERRUPTIBLE);
-       return schedule_timeout(timeout);
- }
- EXPORT_SYMBOL(schedule_timeout_interruptible);
- signed long __sched schedule_timeout_killable(signed long timeout)
- {
-       __set_current_state(TASK_KILLABLE);
-       return schedule_timeout(timeout);
- }
- EXPORT_SYMBOL(schedule_timeout_killable);
- signed long __sched schedule_timeout_uninterruptible(signed long timeout)
- {
-       __set_current_state(TASK_UNINTERRUPTIBLE);
-       return schedule_timeout(timeout);
- }
- EXPORT_SYMBOL(schedule_timeout_uninterruptible);
- /*
-  * Like schedule_timeout_uninterruptible(), except this task will not contribute
-  * to load average.
-  */
- signed long __sched schedule_timeout_idle(signed long timeout)
- {
-       __set_current_state(TASK_IDLE);
-       return schedule_timeout(timeout);
- }
- EXPORT_SYMBOL(schedule_timeout_idle);
  #ifdef CONFIG_HOTPLUG_CPU
  static void migrate_timer_list(struct timer_base *new_base, struct hlist_head *head)
  {
@@@ -2757,59 -2622,3 +2622,3 @@@ void __init init_timers(void
        posix_cputimers_init_work();
        open_softirq(TIMER_SOFTIRQ, run_timer_softirq);
  }
- /**
-  * msleep - sleep safely even with waitqueue interruptions
-  * @msecs: Time in milliseconds to sleep for
-  */
- void msleep(unsigned int msecs)
- {
-       unsigned long timeout = msecs_to_jiffies(msecs);
-       while (timeout)
-               timeout = schedule_timeout_uninterruptible(timeout);
- }
- EXPORT_SYMBOL(msleep);
- /**
-  * msleep_interruptible - sleep waiting for signals
-  * @msecs: Time in milliseconds to sleep for
-  */
- unsigned long msleep_interruptible(unsigned int msecs)
- {
-       unsigned long timeout = msecs_to_jiffies(msecs);
-       while (timeout && !signal_pending(current))
-               timeout = schedule_timeout_interruptible(timeout);
-       return jiffies_to_msecs(timeout);
- }
- EXPORT_SYMBOL(msleep_interruptible);
- /**
-  * usleep_range_state - Sleep for an approximate time in a given state
-  * @min:      Minimum time in usecs to sleep
-  * @max:      Maximum time in usecs to sleep
-  * @state:    State of the current task that will be while sleeping
-  *
-  * In non-atomic context where the exact wakeup time is flexible, use
-  * usleep_range_state() instead of udelay().  The sleep improves responsiveness
-  * by avoiding the CPU-hogging busy-wait of udelay(), and the range reduces
-  * power usage by allowing hrtimers to take advantage of an already-
-  * scheduled interrupt instead of scheduling a new one just for this sleep.
-  */
- void __sched usleep_range_state(unsigned long min, unsigned long max,
-                               unsigned int state)
- {
-       ktime_t exp = ktime_add_us(ktime_get(), min);
-       u64 delta = (u64)(max - min) * NSEC_PER_USEC;
-       for (;;) {
-               __set_current_state(state);
-               /* Do not return before the requested sleep time has elapsed */
-               if (!schedule_hrtimeout_range(&exp, delta, HRTIMER_MODE_ABS))
-                       break;
-       }
- }
- EXPORT_SYMBOL(usleep_range_state);
diff --combined kernel/time/vsyscall.c
index 28706a13c222df29635fd22d23f21eacb068855c,98488b20b594e6ad6b3a89a728b435de9654141a..05d3831431658227c080a89202f45e7a0af88895
@@@ -119,7 -119,7 +119,7 @@@ void update_vsyscall(struct timekeeper 
        if (clock_mode != VDSO_CLOCKMODE_NONE)
                update_vdso_data(vdata, tk);
  
 -      __arch_update_vsyscall(vdata, tk);
 +      __arch_update_vsyscall(vdata);
  
        vdso_write_end(vdata);
  
@@@ -151,9 -151,8 +151,8 @@@ void update_vsyscall_tz(void
  unsigned long vdso_update_begin(void)
  {
        struct vdso_data *vdata = __arch_get_k_vdso_data();
-       unsigned long flags;
+       unsigned long flags = timekeeper_lock_irqsave();
  
-       raw_spin_lock_irqsave(&timekeeper_lock, flags);
        vdso_write_begin(vdata);
        return flags;
  }
@@@ -172,5 -171,5 +171,5 @@@ void vdso_update_end(unsigned long flag
  
        vdso_write_end(vdata);
        __arch_sync_vdso_data(vdata);
-       raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
+       timekeeper_unlock_irqrestore(flags);
  }
diff --combined lib/Kconfig.debug
index 188f3fd6bdd39469bd5814f0522c23df319141d6,14977b9fc254efde25f5bb7872eb1c6a31cc3e21..652ec5f5fdfb8f584a122236ae3d5d4556451d31
@@@ -1328,19 -1328,6 +1328,6 @@@ config SCHEDSTAT
  
  endmenu
  
- config DEBUG_TIMEKEEPING
-       bool "Enable extra timekeeping sanity checking"
-       help
-         This option will enable additional timekeeping sanity checks
-         which may be helpful when diagnosing issues where timekeeping
-         problems are suspected.
-         This may include checks in the timekeeping hotpaths, so this
-         option may have a (very small) performance impact to some
-         workloads.
-         If unsure, say N.
  config DEBUG_PREEMPT
        bool "Debug preemptible kernel"
        depends on DEBUG_KERNEL && PREEMPTION && TRACE_IRQFLAGS_SUPPORT
@@@ -1409,14 -1396,22 +1396,14 @@@ config PROVE_LOCKIN
         For more details, see Documentation/locking/lockdep-design.rst.
  
  config PROVE_RAW_LOCK_NESTING
 -      bool "Enable raw_spinlock - spinlock nesting checks"
 +      bool
        depends on PROVE_LOCKING
 -      default n
 +      default y
        help
         Enable the raw_spinlock vs. spinlock nesting checks which ensure
         that the lock nesting rules for PREEMPT_RT enabled kernels are
         not violated.
  
 -       NOTE: There are known nesting problems. So if you enable this
 -       option expect lockdep splats until these problems have been fully
 -       addressed which is work in progress. This config switch allows to
 -       identify and analyze these problems. It will be removed and the
 -       check permanently enabled once the main issues have been fixed.
 -
 -       If unsure, select N.
 -
  config LOCK_STAT
        bool "Lock usage statistics"
        depends on DEBUG_KERNEL && LOCK_DEBUGGING_SUPPORT
@@@ -1897,7 -1892,7 +1884,7 @@@ config STRICT_DEVME
        bool "Filter access to /dev/mem"
        depends on MMU && DEVMEM
        depends on ARCH_HAS_DEVMEM_IS_ALLOWED || GENERIC_LIB_DEVMEM_IS_ALLOWED
 -      default y if PPC || X86 || ARM64
 +      default y if PPC || X86 || ARM64 || S390
        help
          If this option is disabled, you allow userspace (root) access to all
          of memory, including kernel and userspace memory. Accidental
@@@ -3052,7 -3047,7 +3039,7 @@@ config RUST_BUILD_ASSERT_ALLO
        bool "Allow unoptimized build-time assertions"
        depends on RUST
        help
 -        Controls how are `build_error!` and `build_assert!` handled during build.
 +        Controls how `build_error!` and `build_assert!` are handled during the build.
  
          If calls to them exist in the binary, it may indicate a violated invariant
          or that the optimizer failed to verify the invariant during compilation.
diff --combined mm/damon/core.c
index 511c3f61ab44c4cd5b8fa9751d2818279c6fd554,79efd8089d6ca36c8e1080bca265f6db705b43f3..8b8e2933dcd4a8f6c2b5ed592d1d48092fbc844e
@@@ -1412,7 -1412,7 +1412,7 @@@ static void damon_do_apply_schemes(stru
        damon_for_each_scheme(s, c) {
                struct damos_quota *quota = &s->quota;
  
 -              if (c->passed_sample_intervals != s->next_apply_sis)
 +              if (c->passed_sample_intervals < s->next_apply_sis)
                        continue;
  
                if (!s->wmarks.activated)
@@@ -1456,31 -1456,17 +1456,31 @@@ static unsigned long damon_feed_loop_ne
                unsigned long score)
  {
        const unsigned long goal = 10000;
 -      unsigned long score_goal_diff = max(goal, score) - min(goal, score);
 -      unsigned long score_goal_diff_bp = score_goal_diff * 10000 / goal;
 -      unsigned long compensation = last_input * score_goal_diff_bp / 10000;
        /* Set minimum input as 10000 to avoid compensation be zero */
        const unsigned long min_input = 10000;
 +      unsigned long score_goal_diff, compensation;
 +      bool over_achieving = score > goal;
  
 -      if (goal > score)
 +      if (score == goal)
 +              return last_input;
 +      if (score >= goal * 2)
 +              return min_input;
 +
 +      if (over_achieving)
 +              score_goal_diff = score - goal;
 +      else
 +              score_goal_diff = goal - score;
 +
 +      if (last_input < ULONG_MAX / score_goal_diff)
 +              compensation = last_input * score_goal_diff / goal;
 +      else
 +              compensation = last_input / goal * score_goal_diff;
 +
 +      if (over_achieving)
 +              return max(last_input - compensation, min_input);
 +      if (last_input < ULONG_MAX - compensation)
                return last_input + compensation;
 -      if (last_input > compensation + min_input)
 -              return last_input - compensation;
 -      return min_input;
 +      return ULONG_MAX;
  }
  
  #ifdef CONFIG_PSI
@@@ -1636,7 -1622,7 +1636,7 @@@ static void kdamond_apply_schemes(struc
        bool has_schemes_to_apply = false;
  
        damon_for_each_scheme(s, c) {
 -              if (c->passed_sample_intervals != s->next_apply_sis)
 +              if (c->passed_sample_intervals < s->next_apply_sis)
                        continue;
  
                if (!s->wmarks.activated)
        }
  
        damon_for_each_scheme(s, c) {
 -              if (c->passed_sample_intervals != s->next_apply_sis)
 +              if (c->passed_sample_intervals < s->next_apply_sis)
                        continue;
 -              s->next_apply_sis +=
 +              s->next_apply_sis = c->passed_sample_intervals +
                        (s->apply_interval_us ? s->apply_interval_us :
                         c->attrs.aggr_interval) / sample_interval;
        }
@@@ -1906,11 -1892,10 +1906,10 @@@ static unsigned long damos_wmark_wait_u
  
  static void kdamond_usleep(unsigned long usecs)
  {
-       /* See Documentation/timers/timers-howto.rst for the thresholds */
-       if (usecs > 20 * USEC_PER_MSEC)
+       if (usecs >= USLEEP_RANGE_UPPER_BOUND)
                schedule_timeout_idle(usecs_to_jiffies(usecs));
        else
-               usleep_idle_range(usecs, usecs + 1);
+               usleep_range_idle(usecs, usecs + 1);
  }
  
  /* Returns negative error code if it's not activated but should return */
@@@ -2014,7 -1999,7 +2013,7 @@@ static int kdamond_fn(void *data
                if (ctx->ops.check_accesses)
                        max_nr_accesses = ctx->ops.check_accesses(ctx);
  
 -              if (ctx->passed_sample_intervals == next_aggregation_sis) {
 +              if (ctx->passed_sample_intervals >= next_aggregation_sis) {
                        kdamond_merge_regions(ctx,
                                        max_nr_accesses / 10,
                                        sz_limit);
  
                sample_interval = ctx->attrs.sample_interval ?
                        ctx->attrs.sample_interval : 1;
 -              if (ctx->passed_sample_intervals == next_aggregation_sis) {
 +              if (ctx->passed_sample_intervals >= next_aggregation_sis) {
                        ctx->next_aggregation_sis = next_aggregation_sis +
                                ctx->attrs.aggr_interval / sample_interval;
  
                                ctx->ops.reset_aggregated(ctx);
                }
  
 -              if (ctx->passed_sample_intervals == next_ops_update_sis) {
 +              if (ctx->passed_sample_intervals >= next_ops_update_sis) {
                        ctx->next_ops_update_sis = next_ops_update_sis +
                                ctx->attrs.ops_update_interval /
                                sample_interval;
index 0bbad90ddd6f87e87c03859bae48a7901d39b634,4bd94d432bcf01d2b982e6df38b3ab4703e1b26b..7b35c58bbbeb79f2b50a02212771fb283ba5643d
@@@ -25,7 -25,7 +25,7 @@@
  
  /* Bluetooth HCI event handling. */
  
 -#include <asm/unaligned.h>
 +#include <linux/unaligned.h>
  #include <linux/crypto.h>
  #include <crypto/algapi.h>
  
@@@ -42,8 -42,6 +42,6 @@@
  #define ZERO_KEY "\x00\x00\x00\x00\x00\x00\x00\x00" \
                 "\x00\x00\x00\x00\x00\x00\x00\x00"
  
- #define secs_to_jiffies(_secs) msecs_to_jiffies((_secs) * 1000)
  /* Handle HCI Event packets */
  
  static void *hci_ev_skb_pull(struct hci_dev *hdev, struct sk_buff *skb,
@@@ -3706,7 -3704,7 +3704,7 @@@ static void hci_remote_features_evt(str
                goto unlock;
        }
  
 -      if (!ev->status && !test_bit(HCI_CONN_MGMT_CONNECTED, &conn->flags)) {
 +      if (!ev->status) {
                struct hci_cp_remote_name_req cp;
                memset(&cp, 0, sizeof(cp));
                bacpy(&cp.bdaddr, &conn->dst);
@@@ -5324,16 -5322,19 +5322,16 @@@ static void hci_user_confirm_request_ev
                goto unlock;
        }
  
 -      /* If no side requires MITM protection; auto-accept */
 +      /* If no side requires MITM protection; use JUST_CFM method */
        if ((!loc_mitm || conn->remote_cap == HCI_IO_NO_INPUT_OUTPUT) &&
            (!rem_mitm || conn->io_capability == HCI_IO_NO_INPUT_OUTPUT)) {
  
 -              /* If we're not the initiators request authorization to
 -               * proceed from user space (mgmt_user_confirm with
 -               * confirm_hint set to 1). The exception is if neither
 -               * side had MITM or if the local IO capability is
 -               * NoInputNoOutput, in which case we do auto-accept
 +              /* If we're not the initiator of request authorization and the
 +               * local IO capability is not NoInputNoOutput, use JUST_WORKS
 +               * method (mgmt_user_confirm with confirm_hint set to 1).
                 */
                if (!test_bit(HCI_CONN_AUTH_PEND, &conn->flags) &&
 -                  conn->io_capability != HCI_IO_NO_INPUT_OUTPUT &&
 -                  (loc_mitm || rem_mitm)) {
 +                  conn->io_capability != HCI_IO_NO_INPUT_OUTPUT) {
                        bt_dev_dbg(hdev, "Confirming auto-accept as acceptor");
                        confirm_hint = 1;
                        goto confirm;
index f8b25b6f5da7367d93b44a2fce4e9b32e1eeb1b5,5514600586a91d35c4c4aa6484ff119261f9895e..85f017e37cfcc085bc05594125a3b2c2380e0363
@@@ -107,14 -107,12 +107,12 @@@ static void idletimer_tg_expired(struc
        schedule_work(&timer->work);
  }
  
- static enum alarmtimer_restart idletimer_tg_alarmproc(struct alarm *alarm,
-                                                         ktime_t now)
+ static void idletimer_tg_alarmproc(struct alarm *alarm, ktime_t now)
  {
        struct idletimer_tg *timer = alarm->data;
  
        pr_debug("alarm %s expired\n", timer->attr.attr.name);
        schedule_work(&timer->work);
-       return ALARMTIMER_NORESTART;
  }
  
  static int idletimer_check_sysfs_name(const char *name, unsigned int size)
@@@ -458,49 -456,28 +456,49 @@@ static void idletimer_tg_destroy_v1(con
  
  static struct xt_target idletimer_tg[] __read_mostly = {
        {
 -      .name           = "IDLETIMER",
 -      .family         = NFPROTO_UNSPEC,
 -      .target         = idletimer_tg_target,
 -      .targetsize     = sizeof(struct idletimer_tg_info),
 -      .usersize       = offsetof(struct idletimer_tg_info, timer),
 -      .checkentry     = idletimer_tg_checkentry,
 -      .destroy        = idletimer_tg_destroy,
 -      .me             = THIS_MODULE,
 +              .name           = "IDLETIMER",
 +              .family         = NFPROTO_IPV4,
 +              .target         = idletimer_tg_target,
 +              .targetsize     = sizeof(struct idletimer_tg_info),
 +              .usersize       = offsetof(struct idletimer_tg_info, timer),
 +              .checkentry     = idletimer_tg_checkentry,
 +              .destroy        = idletimer_tg_destroy,
 +              .me             = THIS_MODULE,
        },
        {
 -      .name           = "IDLETIMER",
 -      .family         = NFPROTO_UNSPEC,
 -      .revision       = 1,
 -      .target         = idletimer_tg_target_v1,
 -      .targetsize     = sizeof(struct idletimer_tg_info_v1),
 -      .usersize       = offsetof(struct idletimer_tg_info_v1, timer),
 -      .checkentry     = idletimer_tg_checkentry_v1,
 -      .destroy        = idletimer_tg_destroy_v1,
 -      .me             = THIS_MODULE,
 +              .name           = "IDLETIMER",
 +              .family         = NFPROTO_IPV4,
 +              .revision       = 1,
 +              .target         = idletimer_tg_target_v1,
 +              .targetsize     = sizeof(struct idletimer_tg_info_v1),
 +              .usersize       = offsetof(struct idletimer_tg_info_v1, timer),
 +              .checkentry     = idletimer_tg_checkentry_v1,
 +              .destroy        = idletimer_tg_destroy_v1,
 +              .me             = THIS_MODULE,
        },
 -
 -
 +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
 +      {
 +              .name           = "IDLETIMER",
 +              .family         = NFPROTO_IPV6,
 +              .target         = idletimer_tg_target,
 +              .targetsize     = sizeof(struct idletimer_tg_info),
 +              .usersize       = offsetof(struct idletimer_tg_info, timer),
 +              .checkentry     = idletimer_tg_checkentry,
 +              .destroy        = idletimer_tg_destroy,
 +              .me             = THIS_MODULE,
 +      },
 +      {
 +              .name           = "IDLETIMER",
 +              .family         = NFPROTO_IPV6,
 +              .revision       = 1,
 +              .target         = idletimer_tg_target_v1,
 +              .targetsize     = sizeof(struct idletimer_tg_info_v1),
 +              .usersize       = offsetof(struct idletimer_tg_info_v1, timer),
 +              .checkentry     = idletimer_tg_checkentry_v1,
 +              .destroy        = idletimer_tg_destroy_v1,
 +              .me             = THIS_MODULE,
 +      },
 +#endif
  };
  
  static struct class *idletimer_tg_class;
This page took 0.420226 seconds and 4 git commands to generate.