Merge tag 'locking-urgent-2020-08-10' of git://git.kernel.org/pub/scm/linux/kernel...

author Linus Torvalds <[email protected]>

Tue, 11 Aug 2020 02:07:44 +0000 (19:07 -0700)

committer Linus Torvalds <[email protected]>

Tue, 11 Aug 2020 02:07:44 +0000 (19:07 -0700)
author Linus Torvalds <[email protected]>
Tue, 11 Aug 2020 02:07:44 +0000 (19:07 -0700)
committer Linus Torvalds <[email protected]>
Tue, 11 Aug 2020 02:07:44 +0000 (19:07 -0700)
diff --combined arch/ia64/kernel/process.c

index 36eba4ba6543986a91fa8f47da9163ceda4292d1,4562a1aed454a220b338d7de1758e9b2b2b29d95..f19cb97c009878672b2706dd0dd243eaad6a7d94
--- 1/arch/ia64/kernel/process.c
--- 2/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@@ -40,6 -40,7 +40,6 @@@
   #include <asm/elf.h>
   #include <asm/irq.h>
   #include <asm/kexec.h>
- -#include <asm/pgalloc.h>
   #include <asm/processor.h>
   #include <asm/sal.h>
   #include <asm/switch_to.h>
@@@ -47,6 -48,7 +47,7 @@@
   #include <linux/uaccess.h>
   #include <asm/unwind.h>
   #include <asm/user.h>
+ #include <asm/xtp.h>
   
   #include "entry.h"
   
@@@ -295,7 -297,7 +296,7 @@@ ia64_load_extra (struct task_struct *ta
                 pfm_load_regs(task);
   
         info = __this_cpu_read(pfm_syst_info);
- -      if (info & PFM_CPUINFO_SYST_WIDE) 
+ +      if (info & PFM_CPUINFO_SYST_WIDE)
                 pfm_syst_wide_update_task(task, info, 1);
   #endif
   }
@@@ -309,7 -311,7 +310,7 @@@
    *
    *    <clone syscall>         <some kernel call frames>
    *    sys_clone                  :
- - *    do_fork                 do_fork
+ + *    _do_fork                _do_fork
    *    copy_thread             copy_thread
    *
    * This means that the stack layout is as follows:
@@@ -332,8 -334,9 +333,8 @@@
    * so there is nothing to worry about.
    */
   int
- -copy_thread(unsigned long clone_flags,
- -           unsigned long user_stack_base, unsigned long user_stack_size,
- -           struct task_struct *p)
+ +copy_thread(unsigned long clone_flags, unsigned long user_stack_base,
+ +          unsigned long user_stack_size, struct task_struct *p, unsigned long tls)
   {
         extern char ia64_ret_from_clone;
         struct switch_stack *child_stack, *stack;
@@@ -414,7 -417,7 +415,7 @@@
         rbs_size = stack->ar_bspstore - rbs;
         memcpy((void *) child_rbs, (void *) rbs, rbs_size);
         if (clone_flags & CLONE_SETTLS)
- -              child_ptregs->r13 = regs->r16;  /* see sys_clone2() in entry.S */
+ +              child_ptregs->r13 = tls;
         if (user_stack_base) {
                 child_ptregs->r12 = user_stack_base + user_stack_size - 16;
                 child_ptregs->ar_bspstore = user_stack_base;
@@@ -439,29 -442,11 +440,29 @@@
         return retval;
   }
   
+ +asmlinkage long ia64_clone(unsigned long clone_flags, unsigned long stack_start,
+ +                         unsigned long stack_size, unsigned long parent_tidptr,
+ +                         unsigned long child_tidptr, unsigned long tls)
+ +{
+ +      struct kernel_clone_args args = {
+ +              .flags          = (lower_32_bits(clone_flags) & ~CSIGNAL),
+ +              .pidfd          = (int __user *)parent_tidptr,
+ +              .child_tid      = (int __user *)child_tidptr,
+ +              .parent_tid     = (int __user *)parent_tidptr,
+ +              .exit_signal    = (lower_32_bits(clone_flags) & CSIGNAL),
+ +              .stack          = stack_start,
+ +              .stack_size     = stack_size,
+ +              .tls            = tls,
+ +      };
+ +
+ +      return _do_fork(&args);
+ +}
+ +
   static void
   do_copy_task_regs (struct task_struct *task, struct unw_frame_info *info, void *arg)
   {
         unsigned long mask, sp, nat_bits = 0, ar_rnat, urbs_end, cfm;
- -      unsigned long uninitialized_var(ip);    /* GCC be quiet */
+ +      unsigned long ip;
         elf_greg_t *dst = arg;
         struct pt_regs *pt;
         char nat;
@@@ -530,18 -515,52 +531,18 @@@
         unw_get_ar(info, UNW_AR_SSD, &dst[56]);
   }
   
- -void
- -do_dump_task_fpu (struct task_struct *task, struct unw_frame_info *info, void *arg)
- -{
- -      elf_fpreg_t *dst = arg;
- -      int i;
- -
- -      memset(dst, 0, sizeof(elf_fpregset_t)); /* don't leak any "random" bits */
- -
- -      if (unw_unwind_to_user(info) < 0)
- -              return;
- -
- -      /* f0 is 0.0, f1 is 1.0 */
- -
- -      for (i = 2; i < 32; ++i)
- -              unw_get_fr(info, i, dst + i);
- -
- -      ia64_flush_fph(task);
- -      if ((task->thread.flags & IA64_THREAD_FPH_VALID) != 0)
- -              memcpy(dst + 32, task->thread.fph, 96*16);
- -}
- -
   void
   do_copy_regs (struct unw_frame_info *info, void *arg)
   {
         do_copy_task_regs(current, info, arg);
   }
   
- -void
- -do_dump_fpu (struct unw_frame_info *info, void *arg)
- -{
- -      do_dump_task_fpu(current, info, arg);
- -}
- -
   void
   ia64_elf_core_copy_regs (struct pt_regs *pt, elf_gregset_t dst)
   {
         unw_init_running(do_copy_regs, dst);
   }
   
- -int
- -dump_fpu (struct pt_regs *pt, elf_fpregset_t dst)
- -{
- -      unw_init_running(do_dump_fpu, dst);
- -      return 1;       /* f0-f31 are always valid so we always return 1 */
- -}
- -
   /*
    * Flush thread state.  This is called when a thread does an execve().
    */
diff --combined arch/ia64/kernel/smp.c

index 0e27420031219afe141b20ae0714050c7870ca1e,1cf7b9b3c1e21544643da969c28e186d2376f14c..7b7b64eb312975c6e677683f68a5abbbd3206f87
--- 1/arch/ia64/kernel/smp.c
--- 2/arch/ia64/kernel/smp.c
+++ b/arch/ia64/kernel/smp.c
@@@ -39,12 -39,14 +39,13 @@@
   #include <asm/io.h>
   #include <asm/irq.h>
   #include <asm/page.h>
- -#include <asm/pgalloc.h>
   #include <asm/processor.h>
   #include <asm/ptrace.h>
   #include <asm/sal.h>
   #include <asm/tlbflush.h>
   #include <asm/unistd.h>
   #include <asm/mca.h>
+ #include <asm/xtp.h>
   
   /*
    * Note: alignment of 4 entries/cacheline was empirically determined
diff --combined arch/x86/include/asm/tsc.h

index b7b2624fba8666a70adc55f358d7d6edf18bb840,db5977174ce7442007d2c70bcafd8519075e49e4..01a300a9700b9bb4ac352fcd9c65593a8b762567
--- 1/arch/x86/include/asm/tsc.h
--- 2/arch/x86/include/asm/tsc.h
+++ b/arch/x86/include/asm/tsc.h
@@@ -6,7 -6,11 +6,8 @@@
   #define _ASM_X86_TSC_H
   
   #include <asm/processor.h>
+ #include <asm/cpufeature.h>
   
- -#define NS_SCALE      10 /* 2^10, carefully chosen */
- -#define US_SCALE      32 /* 2^32, arbitralrily chosen */
- -
   /*
    * Standard way to access the cycle counter.
    */
diff --combined arch/x86/kernel/apic/apic.c

index ccf726cc87b77fc5701a6cee166611aff3eab94b,0c89003e7f50f8fe58e30926006d4cb537efa037..5f943b93816759fdc2588d4f5bb5af978851384a
--- 1/arch/x86/kernel/apic/apic.c
--- 2/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@@ -40,12 -40,14 +40,13 @@@
   #include <asm/irq_remapping.h>
   #include <asm/perf_event.h>
   #include <asm/x86_init.h>
- -#include <asm/pgalloc.h>
   #include <linux/atomic.h>
   #include <asm/mpspec.h>
   #include <asm/i8259.h>
   #include <asm/proto.h>
   #include <asm/traps.h>
   #include <asm/apic.h>
+ #include <asm/acpi.h>
   #include <asm/io_apic.h>
   #include <asm/desc.h>
   #include <asm/hpet.h>
diff --combined arch/x86/kernel/cpu/common.c

index 965474d78cef46ff98fe82d6f0cab4ec2370ebe7,52b565016eb1f3abe897597b730ebaf94b9888ad..c5d6f17d9b9d38d213524cd2842ee737c94881a7
--- 1/arch/x86/kernel/cpu/common.c
--- 2/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@@ -45,6 -45,7 +45,7 @@@
   #include <asm/mtrr.h>
   #include <asm/hwcap2.h>
   #include <linux/numa.h>
+ #include <asm/numa.h>
   #include <asm/asm.h>
   #include <asm/bugs.h>
   #include <asm/cpu.h>
@@@ -441,22 -442,6 +442,22 @@@ static void __init setup_cr_pinning(voi
         static_key_enable(&cr_pinning.key);
   }
   
+ +static __init int x86_nofsgsbase_setup(char *arg)
+ +{
+ +      /* Require an exact match without trailing characters. */
+ +      if (strlen(arg))
+ +              return 0;
+ +
+ +      /* Do not emit a message if the feature is not present. */
+ +      if (!boot_cpu_has(X86_FEATURE_FSGSBASE))
+ +              return 1;
+ +
+ +      setup_clear_cpu_cap(X86_FEATURE_FSGSBASE);
+ +      pr_info("FSGSBASE disabled via kernel command line\n");
+ +      return 1;
+ +}
+ +__setup("nofsgsbase", x86_nofsgsbase_setup);
+ +
   /*
    * Protection Keys are not available in 32-bit mode.
    */
@@@ -1511,12 -1496,6 +1512,12 @@@ static void identify_cpu(struct cpuinfo
         setup_smap(c);
         setup_umip(c);
   
+ +      /* Enable FSGSBASE instructions if available. */
+ +      if (cpu_has(c, X86_FEATURE_FSGSBASE)) {
+ +              cr4_set_bits(X86_CR4_FSGSBASE);
+ +              elf_hwcap2 |= HWCAP2_FSGSBASE;
+ +      }
+ +
         /*
          * The vendor-specific functions might have changed features.
          * Now we do "generic changes."
diff --combined arch/x86/kernel/cpu/intel.c

index b6b7b38dff5fd73bc7a9e9266f56c7d7ca457698,6eb42d7a3dfdfc6118fa4baf497a2ae9332f8031..59a1e3ce3f145cc7b7115431dd7c025cf6d55498
--- 1/arch/x86/kernel/cpu/intel.c
--- 2/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@@ -23,6 -23,7 +23,7 @@@
   #include <asm/cmdline.h>
   #include <asm/traps.h>
   #include <asm/resctrl.h>
+ #include <asm/numa.h>
   
   #ifdef CONFIG_X86_64
   #include <linux/topology.h>
@@@ -1156,8 -1157,6 +1157,8 @@@ static const struct x86_cpu_id split_lo
         X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_L,      1),
         X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L,         1),
         X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE,           1),
+ +      X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X,    1),
+ +      X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE,           1),
         {}
   };
   
diff --combined arch/x86/kernel/mpparse.c

index c27b82b62c8bda9734f1ad54752f8ac687ad0d69,db509e1134ceaaa7e28c561a0eed998a761f5ebf..411af4aa7b51f3de393c9efa04be95697838734b
--- 1/arch/x86/kernel/mpparse.c
--- 2/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@@ -19,9 -19,12 +19,11 @@@
   #include <linux/smp.h>
   #include <linux/pci.h>
   
+ #include <asm/io_apic.h>
+ #include <asm/acpi.h>
   #include <asm/irqdomain.h>
   #include <asm/mtrr.h>
   #include <asm/mpspec.h>
- -#include <asm/pgalloc.h>
   #include <asm/io_apic.h>
   #include <asm/proto.h>
   #include <asm/bios_ebda.h>
diff --combined arch/x86/kernel/setup.c

index b9a68d8e06d8d1964cc54db97933eaa2b22ed130,f7671980b5251a8c44458bd18cfb0488210f8150..3511736fbc747e3b606daf8bee7ae896ea91980b
--- 1/arch/x86/kernel/setup.c
--- 2/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@@ -25,6 -25,7 +25,7 @@@
   #include <xen/xen.h>
   
   #include <asm/apic.h>
+ #include <asm/numa.h>
   #include <asm/bios_ebda.h>
   #include <asm/bugs.h>
   #include <asm/cpu.h>
@@@ -870,6 -871,8 +871,6 @@@ void __init setup_arch(char **cmdline_p
   
   #ifdef CONFIG_BLK_DEV_RAM
         rd_image_start = boot_params.hdr.ram_size & RAMDISK_IMAGE_START_MASK;
- -      rd_prompt = ((boot_params.hdr.ram_size & RAMDISK_PROMPT_FLAG) != 0);
- -      rd_doload = ((boot_params.hdr.ram_size & RAMDISK_LOAD_FLAG) != 0);
   #endif
   #ifdef CONFIG_EFI
         if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature,
diff --combined arch/x86/mm/init_32.c

index 4cb958419fb0f2ce11055520449961556ebe8b00,d46a5cf6ccb0ff13b1e5de3522a603952dc719c0..7c055259de3a66d4685395942c06348f45b14241
--- 1/arch/x86/mm/init_32.c
--- 2/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@@ -52,6 -52,7 +52,7 @@@
   #include <asm/cpu_entry_area.h>
   #include <asm/init.h>
   #include <asm/pgtable_areas.h>
+ #include <asm/numa.h>
   
   #include "mm_internal.h"
   
@@@ -678,6 -679,7 +679,6 @@@ void __init initmem_init(void
   #endif
   
         memblock_set_node(0, PHYS_ADDR_MAX, &memblock.memory, 0);
- -      sparse_memory_present_with_active_regions(0);
   
   #ifdef CONFIG_FLATMEM
         max_mapnr = IS_ENABLED(CONFIG_HIGHMEM) ? highend_pfn : max_low_pfn;
@@@ -717,6 -719,7 +718,6 @@@ void __init paging_init(void
          * NOTE: at this point the bootmem allocator is fully available.
          */
         olpc_dt_build_devicetree();
- -      sparse_memory_present_with_active_regions(MAX_NUMNODES);
         sparse_init();
         zone_sizes_init();
   }
diff --combined arch/x86/xen/smp_pv.c

index 9ea598dcc132fc4c7ef02cd23f918892082d65b6,8b04c0ddee37429f0f71d7abf785e4021798d635..47c8f4b444c9a8ffc5cdb6fd0163784442d2709c
--- 1/arch/x86/xen/smp_pv.c
--- 2/arch/x86/xen/smp_pv.c
+++ b/arch/x86/xen/smp_pv.c
@@@ -29,6 -29,7 +29,7 @@@
   #include <asm/idtentry.h>
   #include <asm/desc.h>
   #include <asm/cpu.h>
+ #include <asm/io_apic.h>
   
   #include <xen/interface/xen.h>
   #include <xen/interface/vcpu.h>
@@@ -92,7 -93,9 +93,7 @@@ static void cpu_bringup(void
   asmlinkage __visible void cpu_bringup_and_idle(void)
   {
         cpu_bringup();
- -      boot_init_stack_canary();
         cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
- -      prevent_tail_call_optimization();
   }
   
   void xen_smp_intr_free_pv(unsigned int cpu)
diff --combined block/blk-iocost.c

index 521c29b8ae297f9477c6a95a4b47695f41898fa7,8e940c27c27cfd7938f7cd8a26fed53c53e52393..413e0b5c8e6b08cb9e76af181b267bbed84e8ead
--- 1/block/blk-iocost.c
--- 2/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@@ -406,7 -406,7 +406,7 @@@ struct ioc 
         enum ioc_running                running;
         atomic64_t                      vtime_rate;
   
-       seqcount_t                      period_seqcount;
+       seqcount_spinlock_t             period_seqcount;
         u32                             period_at;      /* wallclock starttime */
         u64                             period_at_vtime; /* vtime starttime */
   
@@@ -873,7 -873,6 +873,6 @@@ static void ioc_now(struct ioc *ioc, st
   
   static void ioc_start_period(struct ioc *ioc, struct ioc_now *now)
   {
-       lockdep_assert_held(&ioc->lock);
         WARN_ON_ONCE(ioc->running != IOC_RUNNING);
   
         write_seqcount_begin(&ioc->period_seqcount);
@@@ -1370,7 -1369,7 +1369,7 @@@ static void ioc_timer_fn(struct timer_l
          * should have woken up in the last period and expire idle iocgs.
          */
         list_for_each_entry_safe(iocg, tiocg, &ioc->active_iocgs, active_list) {
- -              if (!waitqueue_active(&iocg->waitq) && iocg->abs_vdebt &&
+ +              if (!waitqueue_active(&iocg->waitq) && !iocg->abs_vdebt &&
                     !iocg_is_idle(iocg))
                         continue;
   
@@@ -2001,7 -2000,7 +2000,7 @@@ static int blk_iocost_init(struct reque
   
         ioc->running = IOC_IDLE;
         atomic64_set(&ioc->vtime_rate, VTIME_PER_USEC);
-       seqcount_init(&ioc->period_seqcount);
+       seqcount_spinlock_init(&ioc->period_seqcount, &ioc->lock);
         ioc->period_at = ktime_to_us(ktime_get());
         atomic64_set(&ioc->cur_period, 0);
         atomic_set(&ioc->hweight_gen, 0);
@@@ -2045,7 -2044,8 +2044,7 @@@ static struct blkg_policy_data *ioc_pd_
         int levels = blkcg->css.cgroup->level + 1;
         struct ioc_gq *iocg;
   
- -      iocg = kzalloc_node(sizeof(*iocg) + levels * sizeof(iocg->ancestors[0]),
- -                          gfp, q->node);
+ +      iocg = kzalloc_node(struct_size(iocg, ancestors, levels), gfp, q->node);
         if (!iocg)
                 return NULL;
   
diff --combined drivers/dma-buf/dma-resv.c

index 07f5273207e7006d0c69c0cb0cb5d52b24998bb2,a7631352a486749ee7ecbeb6ed397e479c3402c2..434a3314fb0ed3913a8f816743ef01ac6bfae6ae
--- 1/drivers/dma-buf/dma-resv.c
--- 2/drivers/dma-buf/dma-resv.c
+++ b/drivers/dma-buf/dma-resv.c
@@@ -36,7 -36,6 +36,7 @@@
   #include <linux/export.h>
   #include <linux/mm.h>
   #include <linux/sched/mm.h>
+ +#include <linux/mmu_notifier.h>
   
   /**
    * DOC: Reservation Object Overview
@@@ -52,12 -51,6 +52,6 @@@
   DEFINE_WD_CLASS(reservation_ww_class);
   EXPORT_SYMBOL(reservation_ww_class);
   
- struct lock_class_key reservation_seqcount_class;
- EXPORT_SYMBOL(reservation_seqcount_class);
- 
- const char reservation_seqcount_string[] = "reservation_seqcount";
- EXPORT_SYMBOL(reservation_seqcount_string);
- 
   /**
    * dma_resv_list_alloc - allocate fence list
    * @shared_max: number of fences we need space for
@@@ -117,13 -110,6 +111,13 @@@ static int __init dma_resv_lockdep(void
         if (ret == -EDEADLK)
                 dma_resv_lock_slow(&obj, &ctx);
         fs_reclaim_acquire(GFP_KERNEL);
+ +#ifdef CONFIG_MMU_NOTIFIER
+ +      lock_map_acquire(&__mmu_notifier_invalidate_range_start_map);
+ +      __dma_fence_might_wait();
+ +      lock_map_release(&__mmu_notifier_invalidate_range_start_map);
+ +#else
+ +      __dma_fence_might_wait();
+ +#endif
         fs_reclaim_release(GFP_KERNEL);
         ww_mutex_unlock(&obj.lock);
         ww_acquire_fini(&ctx);
@@@ -143,9 -129,8 +137,8 @@@ subsys_initcall(dma_resv_lockdep)
   void dma_resv_init(struct dma_resv *obj)
   {
         ww_mutex_init(&obj->lock, &reservation_ww_class);
+       seqcount_ww_mutex_init(&obj->seq, &obj->lock);
   
-       __seqcount_init(&obj->seq, reservation_seqcount_string,
-                       &reservation_seqcount_class);
         RCU_INIT_POINTER(obj->fence, NULL);
         RCU_INIT_POINTER(obj->fence_excl, NULL);
   }
@@@ -275,7 -260,6 +268,6 @@@ void dma_resv_add_shared_fence(struct d
         fobj = dma_resv_get_list(obj);
         count = fobj->shared_count;
   
-       preempt_disable();
         write_seqcount_begin(&obj->seq);
   
         for (i = 0; i < count; ++i) {
@@@ -297,7 -281,6 +289,6 @@@ replace
         smp_store_mb(fobj->shared_count, count);
   
         write_seqcount_end(&obj->seq);
-       preempt_enable();
         dma_fence_put(old);
   }
   EXPORT_SYMBOL(dma_resv_add_shared_fence);
@@@ -324,14 -307,12 +315,12 @@@ void dma_resv_add_excl_fence(struct dma
         if (fence)
                 dma_fence_get(fence);
   
-       preempt_disable();
         write_seqcount_begin(&obj->seq);
         /* write_seqcount_begin provides the necessary memory barrier */
         RCU_INIT_POINTER(obj->fence_excl, fence);
         if (old)
                 old->shared_count = 0;
         write_seqcount_end(&obj->seq);
-       preempt_enable();
   
         /* inplace update, no shared fences */
         while (i--)
@@@ -409,13 -390,11 +398,11 @@@ retry
         src_list = dma_resv_get_list(dst);
         old = dma_resv_get_excl(dst);
   
-       preempt_disable();
         write_seqcount_begin(&dst->seq);
         /* write_seqcount_begin provides the necessary memory barrier */
         RCU_INIT_POINTER(dst->fence_excl, new);
         RCU_INIT_POINTER(dst->fence, dst_list);
         write_seqcount_end(&dst->seq);
-       preempt_enable();
   
         dma_resv_list_free(src_list);
         dma_fence_put(old);
diff --combined drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

index e5a5ba869eb4a25bb0b9ced934222da73b866e08,ff4b583cb96acda8ab31d5c63679223ffec42fad..a58af513c952628e545f306790e5aa7f936f5537
--- 1/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
--- 2/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@@ -258,11 -258,9 +258,9 @@@ static int amdgpu_amdkfd_remove_evictio
         new->shared_count = k;
   
         /* Install the new fence list, seqcount provides the barriers */
-       preempt_disable();
         write_seqcount_begin(&resv->seq);
         RCU_INIT_POINTER(resv->fence, new);
         write_seqcount_end(&resv->seq);
-       preempt_enable();
   
         /* Drop the references to the removed fences or move them to ef_list */
         for (i = j, k = 0; i < old->shared_count; ++i) {
@@@ -395,7 -393,7 +393,7 @@@ static int vm_update_pds(struct amdgpu_
         if (ret)
                 return ret;
   
- -      return amdgpu_sync_fence(sync, vm->last_update, false);
+ +      return amdgpu_sync_fence(sync, vm->last_update);
   }
   
   static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem)
@@@ -785,7 -783,7 +783,7 @@@ static int unmap_bo_from_gpuvm(struct a
   
         amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update);
   
- -      amdgpu_sync_fence(sync, bo_va->last_pt_update, false);
+ +      amdgpu_sync_fence(sync, bo_va->last_pt_update);
   
         return 0;
   }
@@@ -804,7 -802,7 +802,7 @@@ static int update_gpuvm_pte(struct amdg
                 return ret;
         }
   
- -      return amdgpu_sync_fence(sync, bo_va->last_pt_update, false);
+ +      return amdgpu_sync_fence(sync, bo_va->last_pt_update);
   }
   
   static int map_bo_to_gpuvm(struct amdgpu_device *adev,
@@@ -1354,7 -1352,7 +1352,7 @@@ int amdgpu_amdkfd_gpuvm_free_memory_of_
         }
   
         /* Free the BO*/
- -      drm_gem_object_put_unlocked(&mem->bo->tbo.base);
+ +      drm_gem_object_put(&mem->bo->tbo.base);
         mutex_destroy(&mem->lock);
         kfree(mem);
   
@@@ -2102,7 -2100,7 +2100,7 @@@ int amdgpu_amdkfd_gpuvm_restore_process
                         pr_debug("Memory eviction: Validate BOs failed. Try again\n");
                         goto validate_map_fail;
                 }
- -              ret = amdgpu_sync_fence(&sync_obj, bo->tbo.moving, false);
+ +              ret = amdgpu_sync_fence(&sync_obj, bo->tbo.moving);
                 if (ret) {
                         pr_debug("Memory eviction: Sync BO fence failed. Try again\n");
                         goto validate_map_fail;
diff --combined drivers/iommu/intel/irq_remapping.c

index aa096b333a9914300eb3c6fe04c38f1bd6b41261,3cf9d570607617f7f8ba06470af705a8bd3cb49e..23583b0e66a5e12dc18cbb79dbc1b8ffa6dc7552
--- 1/drivers/iommu/intel/irq_remapping.c
--- 2/drivers/iommu/intel/irq_remapping.c
+++ b/drivers/iommu/intel/irq_remapping.c
@@@ -15,6 -15,7 +15,7 @@@
   #include <linux/irqdomain.h>
   #include <linux/crash_dump.h>
   #include <asm/io_apic.h>
+ #include <asm/apic.h>
   #include <asm/smp.h>
   #include <asm/cpu.h>
   #include <asm/irq_remapping.h>
@@@ -628,21 -629,13 +629,21 @@@ out_free_table
   
   static void intel_teardown_irq_remapping(struct intel_iommu *iommu)
   {
+ +      struct fwnode_handle *fn;
+ +
         if (iommu && iommu->ir_table) {
                 if (iommu->ir_msi_domain) {
+ +                      fn = iommu->ir_msi_domain->fwnode;
+ +
                         irq_domain_remove(iommu->ir_msi_domain);
+ +                      irq_domain_free_fwnode(fn);
                         iommu->ir_msi_domain = NULL;
                 }
                 if (iommu->ir_domain) {
+ +                      fn = iommu->ir_domain->fwnode;
+ +
                         irq_domain_remove(iommu->ir_domain);
+ +                      irq_domain_free_fwnode(fn);
                         iommu->ir_domain = NULL;
                 }
                 free_pages((unsigned long)iommu->ir_table->base,
diff --combined drivers/md/raid5.c

index fb8d1fb1408876675e1d0094bdae8acf1bdf7eb5,892aefe88fa7ca55ca52f3a3143d45bacfdd49e4..ef0fd4830803f00c1142ee7feebb55bf91840b26
--- 1/drivers/md/raid5.c
--- 2/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@@ -69,13 -69,13 +69,13 @@@ static struct workqueue_struct *raid5_w
   
   static inline struct hlist_head *stripe_hash(struct r5conf *conf, sector_t sect)
   {
- -      int hash = (sect >> STRIPE_SHIFT) & HASH_MASK;
+ +      int hash = (sect >> RAID5_STRIPE_SHIFT(conf)) & HASH_MASK;
         return &conf->stripe_hashtbl[hash];
   }
   
- -static inline int stripe_hash_locks_hash(sector_t sect)
+ +static inline int stripe_hash_locks_hash(struct r5conf *conf, sector_t sect)
   {
- -      return (sect >> STRIPE_SHIFT) & STRIPE_HASH_LOCKS_MASK;
+ +      return (sect >> RAID5_STRIPE_SHIFT(conf)) & STRIPE_HASH_LOCKS_MASK;
   }
   
   static inline void lock_device_hash_lock(struct r5conf *conf, int hash)
@@@ -627,7 -627,7 +627,7 @@@ raid5_get_active_stripe(struct r5conf *
                         int previous, int noblock, int noquiesce)
   {
         struct stripe_head *sh;
- -      int hash = stripe_hash_locks_hash(sector);
+ +      int hash = stripe_hash_locks_hash(conf, sector);
         int inc_empty_inactive_list_flag;
   
         pr_debug("get_stripe, sector %llu\n", (unsigned long long)sector);
@@@ -748,9 -748,9 +748,9 @@@ static void stripe_add_to_batch_list(st
         tmp_sec = sh->sector;
         if (!sector_div(tmp_sec, conf->chunk_sectors))
                 return;
- -      head_sector = sh->sector - STRIPE_SECTORS;
+ +      head_sector = sh->sector - RAID5_STRIPE_SECTORS(conf);
   
- -      hash = stripe_hash_locks_hash(head_sector);
+ +      hash = stripe_hash_locks_hash(conf, head_sector);
         spin_lock_irq(conf->hash_locks + hash);
         head = __find_stripe(conf, head_sector, conf->generation);
         if (head && !atomic_inc_not_zero(&head->count)) {
@@@ -873,7 -873,7 +873,7 @@@ static void dispatch_bio_list(struct bi
         struct bio *bio;
   
         while ((bio = bio_list_pop(tmp)))
- -              generic_make_request(bio);
+ +              submit_bio_noacct(bio);
   }
   
   static int cmp_stripe(void *priv, struct list_head *a, struct list_head *b)
@@@ -1057,7 -1057,7 +1057,7 @@@ again
                        test_bit(WriteErrorSeen, &rdev->flags)) {
                         sector_t first_bad;
                         int bad_sectors;
- -                      int bad = is_badblock(rdev, sh->sector, STRIPE_SECTORS,
+ +                      int bad = is_badblock(rdev, sh->sector, RAID5_STRIPE_SECTORS(conf),
                                               &first_bad, &bad_sectors);
                         if (!bad)
                                 break;
@@@ -1089,7 -1089,7 +1089,7 @@@
                 if (rdev) {
                         if (s->syncing || s->expanding || s->expanded
                             || s->replacing)
- -                              md_sync_acct(rdev->bdev, STRIPE_SECTORS);
+ +                              md_sync_acct(rdev->bdev, RAID5_STRIPE_SECTORS(conf));
   
                         set_bit(STRIPE_IO_STARTED, &sh->state);
   
@@@ -1129,9 -1129,9 +1129,9 @@@
                         else
                                 sh->dev[i].vec.bv_page = sh->dev[i].page;
                         bi->bi_vcnt = 1;
- -                      bi->bi_io_vec[0].bv_len = STRIPE_SIZE;
+ +                      bi->bi_io_vec[0].bv_len = RAID5_STRIPE_SIZE(conf);
                         bi->bi_io_vec[0].bv_offset = 0;
- -                      bi->bi_iter.bi_size = STRIPE_SIZE;
+ +                      bi->bi_iter.bi_size = RAID5_STRIPE_SIZE(conf);
                         bi->bi_write_hint = sh->dev[i].write_hint;
                         if (!rrdev)
                                 sh->dev[i].write_hint = RWH_WRITE_LIFE_NOT_SET;
@@@ -1151,12 -1151,12 +1151,12 @@@
                         if (should_defer && op_is_write(op))
                                 bio_list_add(&pending_bios, bi);
                         else
- -                              generic_make_request(bi);
+ +                              submit_bio_noacct(bi);
                 }
                 if (rrdev) {
                         if (s->syncing || s->expanding || s->expanded
                             || s->replacing)
- -                              md_sync_acct(rrdev->bdev, STRIPE_SECTORS);
+ +                              md_sync_acct(rrdev->bdev, RAID5_STRIPE_SECTORS(conf));
   
                         set_bit(STRIPE_IO_STARTED, &sh->state);
   
@@@ -1183,9 -1183,9 +1183,9 @@@
                                 WARN_ON(test_bit(R5_UPTODATE, &sh->dev[i].flags));
                         sh->dev[i].rvec.bv_page = sh->dev[i].page;
                         rbi->bi_vcnt = 1;
- -                      rbi->bi_io_vec[0].bv_len = STRIPE_SIZE;
+ +                      rbi->bi_io_vec[0].bv_len = RAID5_STRIPE_SIZE(conf);
                         rbi->bi_io_vec[0].bv_offset = 0;
- -                      rbi->bi_iter.bi_size = STRIPE_SIZE;
+ +                      rbi->bi_iter.bi_size = RAID5_STRIPE_SIZE(conf);
                         rbi->bi_write_hint = sh->dev[i].write_hint;
                         sh->dev[i].write_hint = RWH_WRITE_LIFE_NOT_SET;
                         /*
@@@ -1201,7 -1201,7 +1201,7 @@@
                         if (should_defer && op_is_write(op))
                                 bio_list_add(&pending_bios, rbi);
                         else
- -                              generic_make_request(rbi);
+ +                              submit_bio_noacct(rbi);
                 }
                 if (!rdev && !rrdev) {
                         if (op_is_write(op))
@@@ -1235,7 -1235,6 +1235,7 @@@ async_copy_data(int frombio, struct bi
         int page_offset;
         struct async_submit_ctl submit;
         enum async_tx_flags flags = 0;
+ +      struct r5conf *conf = sh->raid_conf;
   
         if (bio->bi_iter.bi_sector >= sector)
                 page_offset = (signed)(bio->bi_iter.bi_sector - sector) * 512;
@@@ -1257,8 -1256,8 +1257,8 @@@
                         len -= b_offset;
                 }
   
- -              if (len > 0 && page_offset + len > STRIPE_SIZE)
- -                      clen = STRIPE_SIZE - page_offset;
+ +              if (len > 0 && page_offset + len > RAID5_STRIPE_SIZE(conf))
+ +                      clen = RAID5_STRIPE_SIZE(conf) - page_offset;
                 else
                         clen = len;
   
@@@ -1266,9 -1265,9 +1266,9 @@@
                         b_offset += bvl.bv_offset;
                         bio_page = bvl.bv_page;
                         if (frombio) {
- -                              if (sh->raid_conf->skip_copy &&
+ +                              if (conf->skip_copy &&
                                     b_offset == 0 && page_offset == 0 &&
- -                                  clen == STRIPE_SIZE &&
+ +                                  clen == RAID5_STRIPE_SIZE(conf) &&
                                     !no_skipcopy)
                                         *page = bio_page;
                                 else
@@@ -1293,7 -1292,6 +1293,7 @@@ static void ops_complete_biofill(void *
   {
         struct stripe_head *sh = stripe_head_ref;
         int i;
+ +      struct r5conf *conf = sh->raid_conf;
   
         pr_debug("%s: stripe %llu\n", __func__,
                 (unsigned long long)sh->sector);
@@@ -1314,8 -1312,8 +1314,8 @@@
                         rbi = dev->read;
                         dev->read = NULL;
                         while (rbi && rbi->bi_iter.bi_sector <
- -                              dev->sector + STRIPE_SECTORS) {
- -                              rbi2 = r5_next_bio(rbi, dev->sector);
+ +                              dev->sector + RAID5_STRIPE_SECTORS(conf)) {
+ +                              rbi2 = r5_next_bio(conf, rbi, dev->sector);
                                 bio_endio(rbi);
                                 rbi = rbi2;
                         }
@@@ -1332,7 -1330,6 +1332,7 @@@ static void ops_run_biofill(struct stri
         struct dma_async_tx_descriptor *tx = NULL;
         struct async_submit_ctl submit;
         int i;
+ +      struct r5conf *conf = sh->raid_conf;
   
         BUG_ON(sh->batch_head);
         pr_debug("%s: stripe %llu\n", __func__,
@@@ -1347,10 -1344,10 +1347,10 @@@
                         dev->toread = NULL;
                         spin_unlock_irq(&sh->stripe_lock);
                         while (rbi && rbi->bi_iter.bi_sector <
- -                              dev->sector + STRIPE_SECTORS) {
+ +                              dev->sector + RAID5_STRIPE_SECTORS(conf)) {
                                 tx = async_copy_data(0, rbi, &dev->page,
                                                      dev->sector, tx, sh, 0);
- -                              rbi = r5_next_bio(rbi, dev->sector);
+ +                              rbi = r5_next_bio(conf, rbi, dev->sector);
                         }
                 }
         }
@@@ -1432,11 -1429,9 +1432,11 @@@ ops_run_compute5(struct stripe_head *sh
         init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST, NULL,
                           ops_complete_compute, sh, to_addr_conv(sh, percpu, 0));
         if (unlikely(count == 1))
- -              tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, &submit);
+ +              tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0,
+ +                              RAID5_STRIPE_SIZE(sh->raid_conf), &submit);
         else
- -              tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit);
+ +              tx = async_xor(xor_dest, xor_srcs, 0, count,
+ +                              RAID5_STRIPE_SIZE(sh->raid_conf), &submit);
   
         return tx;
   }
@@@ -1527,8 -1522,7 +1527,8 @@@ ops_run_compute6_1(struct stripe_head *
                 init_async_submit(&submit, ASYNC_TX_FENCE, NULL,
                                   ops_complete_compute, sh,
                                   to_addr_conv(sh, percpu, 0));
- -              tx = async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE, &submit);
+ +              tx = async_gen_syndrome(blocks, 0, count+2,
+ +                              RAID5_STRIPE_SIZE(sh->raid_conf), &submit);
         } else {
                 /* Compute any data- or p-drive using XOR */
                 count = 0;
@@@ -1541,8 -1535,7 +1541,8 @@@
                 init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST,
                                   NULL, ops_complete_compute, sh,
                                   to_addr_conv(sh, percpu, 0));
- -              tx = async_xor(dest, blocks, 0, count, STRIPE_SIZE, &submit);
+ +              tx = async_xor(dest, blocks, 0, count,
+ +                              RAID5_STRIPE_SIZE(sh->raid_conf), &submit);
         }
   
         return tx;
@@@ -1605,8 -1598,7 +1605,8 @@@ ops_run_compute6_2(struct stripe_head *
                                           ops_complete_compute, sh,
                                           to_addr_conv(sh, percpu, 0));
                         return async_gen_syndrome(blocks, 0, syndrome_disks+2,
- -                                                STRIPE_SIZE, &submit);
+ +                                                RAID5_STRIPE_SIZE(sh->raid_conf),
+ +                                                &submit);
                 } else {
                         struct page *dest;
                         int data_target;
@@@ -1629,8 -1621,7 +1629,8 @@@
                                           ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST,
                                           NULL, NULL, NULL,
                                           to_addr_conv(sh, percpu, 0));
- -                      tx = async_xor(dest, blocks, 0, count, STRIPE_SIZE,
+ +                      tx = async_xor(dest, blocks, 0, count,
+ +                                     RAID5_STRIPE_SIZE(sh->raid_conf),
                                        &submit);
   
                         count = set_syndrome_sources(blocks, sh, SYNDROME_SRC_ALL);
@@@ -1638,8 -1629,7 +1638,8 @@@
                                           ops_complete_compute, sh,
                                           to_addr_conv(sh, percpu, 0));
                         return async_gen_syndrome(blocks, 0, count+2,
- -                                                STRIPE_SIZE, &submit);
+ +                                                RAID5_STRIPE_SIZE(sh->raid_conf),
+ +                                                &submit);
                 }
         } else {
                 init_async_submit(&submit, ASYNC_TX_FENCE, NULL,
@@@ -1648,15 -1638,13 +1648,15 @@@
                 if (failb == syndrome_disks) {
                         /* We're missing D+P. */
                         return async_raid6_datap_recov(syndrome_disks+2,
- -                                                     STRIPE_SIZE, faila,
- -                                                     blocks, &submit);
+ +                                              RAID5_STRIPE_SIZE(sh->raid_conf),
+ +                                              faila,
+ +                                              blocks, &submit);
                 } else {
                         /* We're missing D+D. */
                         return async_raid6_2data_recov(syndrome_disks+2,
- -                                                     STRIPE_SIZE, faila, failb,
- -                                                     blocks, &submit);
+ +                                              RAID5_STRIPE_SIZE(sh->raid_conf),
+ +                                              faila, failb,
+ +                                              blocks, &submit);
                 }
         }
   }
@@@ -1703,8 -1691,7 +1703,8 @@@ ops_run_prexor5(struct stripe_head *sh
   
         init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
                           ops_complete_prexor, sh, to_addr_conv(sh, percpu, 0));
- -      tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit);
+ +      tx = async_xor(xor_dest, xor_srcs, 0, count,
+ +                      RAID5_STRIPE_SIZE(sh->raid_conf), &submit);
   
         return tx;
   }
@@@ -1724,8 -1711,7 +1724,8 @@@ ops_run_prexor6(struct stripe_head *sh
   
         init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_PQ_XOR_DST, tx,
                           ops_complete_prexor, sh, to_addr_conv(sh, percpu, 0));
- -      tx = async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE,  &submit);
+ +      tx = async_gen_syndrome(blocks, 0, count+2,
+ +                      RAID5_STRIPE_SIZE(sh->raid_conf), &submit);
   
         return tx;
   }
@@@ -1766,7 -1752,7 +1766,7 @@@ again
                         WARN_ON(dev->page != dev->orig_page);
   
                         while (wbi && wbi->bi_iter.bi_sector <
- -                              dev->sector + STRIPE_SECTORS) {
+ +                              dev->sector + RAID5_STRIPE_SECTORS(conf)) {
                                 if (wbi->bi_opf & REQ_FUA)
                                         set_bit(R5_WantFUA, &dev->flags);
                                 if (wbi->bi_opf & REQ_SYNC)
@@@ -1784,7 -1770,7 +1784,7 @@@
                                                 clear_bit(R5_OVERWRITE, &dev->flags);
                                         }
                                 }
- -                              wbi = r5_next_bio(wbi, dev->sector);
+ +                              wbi = r5_next_bio(conf, wbi, dev->sector);
                         }
   
                         if (head_sh->batch_head) {
@@@ -1924,11 -1910,9 +1924,11 @@@ again
         }
   
         if (unlikely(count == 1))
- -              tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, &submit);
+ +              tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0,
+ +                              RAID5_STRIPE_SIZE(sh->raid_conf), &submit);
         else
- -              tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit);
+ +              tx = async_xor(xor_dest, xor_srcs, 0, count,
+ +                              RAID5_STRIPE_SIZE(sh->raid_conf), &submit);
         if (!last_stripe) {
                 j++;
                 sh = list_first_entry(&sh->batch_list, struct stripe_head,
@@@ -1988,8 -1972,7 +1988,8 @@@ again
         } else
                 init_async_submit(&submit, 0, tx, NULL, NULL,
                                   to_addr_conv(sh, percpu, j));
- -      tx = async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE,  &submit);
+ +      tx = async_gen_syndrome(blocks, 0, count+2,
+ +                      RAID5_STRIPE_SIZE(sh->raid_conf),  &submit);
         if (!last_stripe) {
                 j++;
                 sh = list_first_entry(&sh->batch_list, struct stripe_head,
@@@ -2037,8 -2020,7 +2037,8 @@@ static void ops_run_check_p(struct stri
   
         init_async_submit(&submit, 0, NULL, NULL, NULL,
                           to_addr_conv(sh, percpu, 0));
- -      tx = async_xor_val(xor_dest, xor_srcs, 0, count, STRIPE_SIZE,
+ +      tx = async_xor_val(xor_dest, xor_srcs, 0, count,
+ +                         RAID5_STRIPE_SIZE(sh->raid_conf),
                            &sh->ops.zero_sum_result, &submit);
   
         atomic_inc(&sh->count);
@@@ -2063,8 -2045,7 +2063,8 @@@ static void ops_run_check_pq(struct str
         atomic_inc(&sh->count);
         init_async_submit(&submit, ASYNC_TX_ACK, NULL, ops_complete_check,
                           sh, to_addr_conv(sh, percpu, 0));
- -      async_syndrome_val(srcs, 0, count+2, STRIPE_SIZE,
+ +      async_syndrome_val(srcs, 0, count+2,
+ +                         RAID5_STRIPE_SIZE(sh->raid_conf),
                            &sh->ops.zero_sum_result, percpu->spare_page, &submit);
   }
   
@@@ -2236,9 -2217,9 +2236,9 @@@ static int grow_stripes(struct r5conf *
   /**
    * scribble_alloc - allocate percpu scribble buffer for required size
    *                of the scribble region
- - * @percpu - from for_each_present_cpu() of the caller
- - * @num - total number of disks in the array
- - * @cnt - scribble objs count for required size of the scribble region
+ + * @percpu: from for_each_present_cpu() of the caller
+ + * @num: total number of disks in the array
+ + * @cnt: scribble objs count for required size of the scribble region
    *
    * The scribble buffer size must be enough to contain:
    * 1/ a struct page pointer for each device in the array +2
@@@ -2294,7 -2275,7 +2294,7 @@@ static int resize_chunks(struct r5conf 
   
                 percpu = per_cpu_ptr(conf->percpu, cpu);
                 err = scribble_alloc(percpu, new_disks,
- -                                   new_sectors / STRIPE_SECTORS);
+ +                                   new_sectors / RAID5_STRIPE_SECTORS(conf));
                 if (err)
                         break;
         }
@@@ -2528,10 -2509,10 +2528,10 @@@ static void raid5_end_read_request(stru
                          */
                         pr_info_ratelimited(
                                 "md/raid:%s: read error corrected (%lu sectors at %llu on %s)\n",
- -                              mdname(conf->mddev), STRIPE_SECTORS,
+ +                              mdname(conf->mddev), RAID5_STRIPE_SECTORS(conf),
                                 (unsigned long long)s,
                                 bdevname(rdev->bdev, b));
- -                      atomic_add(STRIPE_SECTORS, &rdev->corrected_errors);
+ +                      atomic_add(RAID5_STRIPE_SECTORS(conf), &rdev->corrected_errors);
                         clear_bit(R5_ReadError, &sh->dev[i].flags);
                         clear_bit(R5_ReWrite, &sh->dev[i].flags);
                 } else if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags))
@@@ -2604,7 -2585,7 +2604,7 @@@
                         if (!(set_bad
                               && test_bit(In_sync, &rdev->flags)
                               && rdev_set_badblocks(
- -                                    rdev, sh->sector, STRIPE_SECTORS, 0)))
+ +                                    rdev, sh->sector, RAID5_STRIPE_SECTORS(conf), 0)))
                                 md_error(conf->mddev, rdev);
                 }
         }
@@@ -2620,7 -2601,7 +2620,7 @@@ static void raid5_end_write_request(str
         struct stripe_head *sh = bi->bi_private;
         struct r5conf *conf = sh->raid_conf;
         int disks = sh->disks, i;
- -      struct md_rdev *uninitialized_var(rdev);
+ +      struct md_rdev *rdev;
         sector_t first_bad;
         int bad_sectors;
         int replacement = 0;
@@@ -2656,7 -2637,7 +2656,7 @@@
                 if (bi->bi_status)
                         md_error(conf->mddev, rdev);
                 else if (is_badblock(rdev, sh->sector,
- -                                   STRIPE_SECTORS,
+ +                                   RAID5_STRIPE_SECTORS(conf),
                                      &first_bad, &bad_sectors))
                         set_bit(R5_MadeGoodRepl, &sh->dev[i].flags);
         } else {
@@@ -2668,7 -2649,7 +2668,7 @@@
                                 set_bit(MD_RECOVERY_NEEDED,
                                         &rdev->mddev->recovery);
                 } else if (is_badblock(rdev, sh->sector,
- -                                     STRIPE_SECTORS,
+ +                                     RAID5_STRIPE_SECTORS(conf),
                                        &first_bad, &bad_sectors)) {
                         set_bit(R5_MadeGood, &sh->dev[i].flags);
                         if (test_bit(R5_ReadError, &sh->dev[i].flags))
@@@ -3302,13 -3283,13 +3302,13 @@@ static int add_stripe_bio(struct stripe
                 /* check if page is covered */
                 sector_t sector = sh->dev[dd_idx].sector;
                 for (bi=sh->dev[dd_idx].towrite;
- -                   sector < sh->dev[dd_idx].sector + STRIPE_SECTORS &&
+ +                   sector < sh->dev[dd_idx].sector + RAID5_STRIPE_SECTORS(conf) &&
                              bi && bi->bi_iter.bi_sector <= sector;
- -                   bi = r5_next_bio(bi, sh->dev[dd_idx].sector)) {
+ +                   bi = r5_next_bio(conf, bi, sh->dev[dd_idx].sector)) {
                         if (bio_end_sector(bi) >= sector)
                                 sector = bio_end_sector(bi);
                 }
- -              if (sector >= sh->dev[dd_idx].sector + STRIPE_SECTORS)
+ +              if (sector >= sh->dev[dd_idx].sector + RAID5_STRIPE_SECTORS(conf))
                         if (!test_and_set_bit(R5_OVERWRITE, &sh->dev[dd_idx].flags))
                                 sh->overwrite_disks++;
         }
@@@ -3333,7 -3314,7 +3333,7 @@@
                 set_bit(STRIPE_BITMAP_PENDING, &sh->state);
                 spin_unlock_irq(&sh->stripe_lock);
                 md_bitmap_startwrite(conf->mddev->bitmap, sh->sector,
- -                                   STRIPE_SECTORS, 0);
+ +                                   RAID5_STRIPE_SECTORS(conf), 0);
                 spin_lock_irq(&sh->stripe_lock);
                 clear_bit(STRIPE_BITMAP_PENDING, &sh->state);
                 if (!sh->batch_head) {
@@@ -3395,7 -3376,7 +3395,7 @@@ handle_failed_stripe(struct r5conf *con
                                 if (!rdev_set_badblocks(
                                             rdev,
                                             sh->sector,
- -                                          STRIPE_SECTORS, 0))
+ +                                          RAID5_STRIPE_SECTORS(conf), 0))
                                         md_error(conf->mddev, rdev);
                                 rdev_dec_pending(rdev, conf->mddev);
                         }
@@@ -3415,8 -3396,8 +3415,8 @@@
                         wake_up(&conf->wait_for_overlap);
   
                 while (bi && bi->bi_iter.bi_sector <
- -                      sh->dev[i].sector + STRIPE_SECTORS) {
- -                      struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector);
+ +                      sh->dev[i].sector + RAID5_STRIPE_SECTORS(conf)) {
+ +                      struct bio *nextbi = r5_next_bio(conf, bi, sh->dev[i].sector);
   
                         md_write_end(conf->mddev);
                         bio_io_error(bi);
@@@ -3424,7 -3405,7 +3424,7 @@@
                 }
                 if (bitmap_end)
                         md_bitmap_endwrite(conf->mddev->bitmap, sh->sector,
- -                                         STRIPE_SECTORS, 0, 0);
+ +                                         RAID5_STRIPE_SECTORS(conf), 0, 0);
                 bitmap_end = 0;
                 /* and fail all 'written' */
                 bi = sh->dev[i].written;
@@@ -3436,8 -3417,8 +3436,8 @@@
   
                 if (bi) bitmap_end = 1;
                 while (bi && bi->bi_iter.bi_sector <
- -                     sh->dev[i].sector + STRIPE_SECTORS) {
- -                      struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector);
+ +                     sh->dev[i].sector + RAID5_STRIPE_SECTORS(conf)) {
+ +                      struct bio *bi2 = r5_next_bio(conf, bi, sh->dev[i].sector);
   
                         md_write_end(conf->mddev);
                         bio_io_error(bi);
@@@ -3460,9 -3441,9 +3460,9 @@@
                         if (bi)
                                 s->to_read--;
                         while (bi && bi->bi_iter.bi_sector <
- -                             sh->dev[i].sector + STRIPE_SECTORS) {
+ +                             sh->dev[i].sector + RAID5_STRIPE_SECTORS(conf)) {
                                 struct bio *nextbi =
- -                                      r5_next_bio(bi, sh->dev[i].sector);
+ +                                      r5_next_bio(conf, bi, sh->dev[i].sector);
   
                                 bio_io_error(bi);
                                 bi = nextbi;
@@@ -3470,7 -3451,7 +3470,7 @@@
                 }
                 if (bitmap_end)
                         md_bitmap_endwrite(conf->mddev->bitmap, sh->sector,
- -                                         STRIPE_SECTORS, 0, 0);
+ +                                         RAID5_STRIPE_SECTORS(conf), 0, 0);
                 /* If we were in the middle of a write the parity block might
                  * still be locked - so just clear all R5_LOCKED flags
                  */
@@@ -3515,14 -3496,14 +3515,14 @@@ handle_failed_sync(struct r5conf *conf
                             && !test_bit(Faulty, &rdev->flags)
                             && !test_bit(In_sync, &rdev->flags)
                             && !rdev_set_badblocks(rdev, sh->sector,
- -                                                 STRIPE_SECTORS, 0))
+ +                                                 RAID5_STRIPE_SECTORS(conf), 0))
                                 abort = 1;
                         rdev = rcu_dereference(conf->disks[i].replacement);
                         if (rdev
                             && !test_bit(Faulty, &rdev->flags)
                             && !test_bit(In_sync, &rdev->flags)
                             && !rdev_set_badblocks(rdev, sh->sector,
- -                                                 STRIPE_SECTORS, 0))
+ +                                                 RAID5_STRIPE_SECTORS(conf), 0))
                                 abort = 1;
                 }
                 rcu_read_unlock();
@@@ -3530,7 -3511,7 +3530,7 @@@
                         conf->recovery_disabled =
                                 conf->mddev->recovery_disabled;
         }
- -      md_done_sync(conf->mddev, STRIPE_SECTORS, !abort);
+ +      md_done_sync(conf->mddev, RAID5_STRIPE_SECTORS(conf), !abort);
   }
   
   static int want_replace(struct stripe_head *sh, int disk_idx)
@@@ -3557,7 -3538,6 +3557,7 @@@ static int need_this_block(struct strip
         struct r5dev *fdev[2] = { &sh->dev[s->failed_num[0]],
                                   &sh->dev[s->failed_num[1]] };
         int i;
+ +      bool force_rcw = (sh->raid_conf->rmw_level == PARITY_DISABLE_RMW);
   
   
         if (test_bit(R5_LOCKED, &dev->flags) ||
@@@ -3616,27 -3596,17 +3616,27 @@@
                          * devices must be read.
                          */
                         return 1;
+ +
+ +              if (s->failed >= 2 &&
+ +                  (fdev[i]->towrite ||
+ +                   s->failed_num[i] == sh->pd_idx ||
+ +                   s->failed_num[i] == sh->qd_idx) &&
+ +                  !test_bit(R5_UPTODATE, &fdev[i]->flags))
+ +                      /* In max degraded raid6, If the failed disk is P, Q,
+ +                       * or we want to read the failed disk, we need to do
+ +                       * reconstruct-write.
+ +                       */
+ +                      force_rcw = true;
         }
   
- -      /* If we are forced to do a reconstruct-write, either because
- -       * the current RAID6 implementation only supports that, or
- -       * because parity cannot be trusted and we are currently
- -       * recovering it, there is extra need to be careful.
+ +      /* If we are forced to do a reconstruct-write, because parity
+ +       * cannot be trusted and we are currently recovering it, there
+ +       * is extra need to be careful.
          * If one of the devices that we would need to read, because
          * it is not being overwritten (and maybe not written at all)
          * is missing/faulty, then we need to read everything we can.
          */
- -      if (sh->raid_conf->level != 6 &&
+ +      if (!force_rcw &&
             sh->sector < sh->raid_conf->mddev->recovery_cp)
                 /* reconstruct-write isn't being forced */
                 return 0;
@@@ -3740,7 -3710,7 +3740,7 @@@ static int fetch_block(struct stripe_he
         return 0;
   }
   
- -/**
+ +/*
    * handle_stripe_fill - read or compute data to satisfy pending requests.
    */
   static void handle_stripe_fill(struct stripe_head *sh,
@@@ -3815,14 -3785,14 +3815,14 @@@ returnbi
                                 wbi = dev->written;
                                 dev->written = NULL;
                                 while (wbi && wbi->bi_iter.bi_sector <
- -                                      dev->sector + STRIPE_SECTORS) {
- -                                      wbi2 = r5_next_bio(wbi, dev->sector);
+ +                                      dev->sector + RAID5_STRIPE_SECTORS(conf)) {
+ +                                      wbi2 = r5_next_bio(conf, wbi, dev->sector);
                                         md_write_end(conf->mddev);
                                         bio_endio(wbi);
                                         wbi = wbi2;
                                 }
                                 md_bitmap_endwrite(conf->mddev->bitmap, sh->sector,
- -                                                 STRIPE_SECTORS,
+ +                                                 RAID5_STRIPE_SECTORS(conf),
                                                    !test_bit(STRIPE_DEGRADED, &sh->state),
                                                    0);
                                 if (head_sh->batch_head) {
@@@ -4006,8 -3976,10 +4006,8 @@@ static int handle_stripe_dirtying(struc
                                         set_bit(R5_LOCKED, &dev->flags);
                                         set_bit(R5_Wantread, &dev->flags);
                                         s->locked++;
- -                              } else {
+ +                              } else
                                         set_bit(STRIPE_DELAYED, &sh->state);
- -                                      set_bit(STRIPE_HANDLE, &sh->state);
- -                              }
                         }
                 }
         }
@@@ -4032,8 -4004,10 +4032,8 @@@
                                         set_bit(R5_Wantread, &dev->flags);
                                         s->locked++;
                                         qread++;
- -                              } else {
+ +                              } else
                                         set_bit(STRIPE_DELAYED, &sh->state);
- -                                      set_bit(STRIPE_HANDLE, &sh->state);
- -                              }
                         }
                 }
                 if (rcw && conf->mddev->queue)
@@@ -4125,7 -4099,7 +4125,7 @@@ static void handle_parity_checks5(struc
                          */
                         set_bit(STRIPE_INSYNC, &sh->state);
                 else {
- -                      atomic64_add(STRIPE_SECTORS, &conf->mddev->resync_mismatches);
+ +                      atomic64_add(RAID5_STRIPE_SECTORS(conf), &conf->mddev->resync_mismatches);
                         if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery)) {
                                 /* don't try to repair!! */
                                 set_bit(STRIPE_INSYNC, &sh->state);
@@@ -4133,7 -4107,7 +4133,7 @@@
                                                     "%llu-%llu\n", mdname(conf->mddev),
                                                     (unsigned long long) sh->sector,
                                                     (unsigned long long) sh->sector +
- -                                                  STRIPE_SECTORS);
+ +                                                  RAID5_STRIPE_SECTORS(conf));
                         } else {
                                 sh->check_state = check_state_compute_run;
                                 set_bit(STRIPE_COMPUTE_RUN, &sh->state);
@@@ -4290,7 -4264,7 +4290,7 @@@ static void handle_parity_checks6(struc
                                  */
                         }
                 } else {
- -                      atomic64_add(STRIPE_SECTORS, &conf->mddev->resync_mismatches);
+ +                      atomic64_add(RAID5_STRIPE_SECTORS(conf), &conf->mddev->resync_mismatches);
                         if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery)) {
                                 /* don't try to repair!! */
                                 set_bit(STRIPE_INSYNC, &sh->state);
@@@ -4298,7 -4272,7 +4298,7 @@@
                                                     "%llu-%llu\n", mdname(conf->mddev),
                                                     (unsigned long long) sh->sector,
                                                     (unsigned long long) sh->sector +
- -                                                  STRIPE_SECTORS);
+ +                                                  RAID5_STRIPE_SECTORS(conf));
                         } else {
                                 int *target = &sh->ops.target;
   
@@@ -4369,7 -4343,7 +4369,7 @@@ static void handle_stripe_expansion(str
                         /* place all the copies on one channel */
                         init_async_submit(&submit, 0, tx, NULL, NULL, NULL);
                         tx = async_memcpy(sh2->dev[dd_idx].page,
- -                                        sh->dev[i].page, 0, 0, STRIPE_SIZE,
+ +                                        sh->dev[i].page, 0, 0, RAID5_STRIPE_SIZE(conf),
                                           &submit);
   
                         set_bit(R5_Expanded, &sh2->dev[dd_idx].flags);
@@@ -4468,8 -4442,8 +4468,8 @@@ static void analyse_stripe(struct strip
                  */
                 rdev = rcu_dereference(conf->disks[i].replacement);
                 if (rdev && !test_bit(Faulty, &rdev->flags) &&
- -                  rdev->recovery_offset >= sh->sector + STRIPE_SECTORS &&
- -                  !is_badblock(rdev, sh->sector, STRIPE_SECTORS,
+ +                  rdev->recovery_offset >= sh->sector + RAID5_STRIPE_SECTORS(conf) &&
+ +                  !is_badblock(rdev, sh->sector, RAID5_STRIPE_SECTORS(conf),
                                  &first_bad, &bad_sectors))
                         set_bit(R5_ReadRepl, &dev->flags);
                 else {
@@@ -4483,7 -4457,7 +4483,7 @@@
                 if (rdev && test_bit(Faulty, &rdev->flags))
                         rdev = NULL;
                 if (rdev) {
- -                      is_bad = is_badblock(rdev, sh->sector, STRIPE_SECTORS,
+ +                      is_bad = is_badblock(rdev, sh->sector, RAID5_STRIPE_SECTORS(conf),
                                              &first_bad, &bad_sectors);
                         if (s->blocked_rdev == NULL
                             && (test_bit(Blocked, &rdev->flags)
@@@ -4510,7 -4484,7 +4510,7 @@@
                         }
                 } else if (test_bit(In_sync, &rdev->flags))
                         set_bit(R5_Insync, &dev->flags);
- -              else if (sh->sector + STRIPE_SECTORS <= rdev->recovery_offset)
+ +              else if (sh->sector + RAID5_STRIPE_SECTORS(conf) <= rdev->recovery_offset)
                         /* in sync if before recovery_offset */
                         set_bit(R5_Insync, &dev->flags);
                 else if (test_bit(R5_UPTODATE, &dev->flags) &&
@@@ -4599,12 -4573,12 +4599,12 @@@
         rcu_read_unlock();
   }
   
+ +/*
+ + * Return '1' if this is a member of batch, or '0' if it is a lone stripe or
+ + * a head which can now be handled.
+ + */
   static int clear_batch_ready(struct stripe_head *sh)
   {
- -      /* Return '1' if this is a member of batch, or
- -       * '0' if it is a lone stripe or a head which can now be
- -       * handled.
- -       */
         struct stripe_head *tmp;
         if (!test_and_clear_bit(STRIPE_BATCH_READY, &sh->state))
                 return (sh->batch_head && sh->batch_head != sh);
@@@ -4708,16 -4682,6 +4708,16 @@@ static void handle_stripe(struct stripe
         struct r5dev *pdev, *qdev;
   
         clear_bit(STRIPE_HANDLE, &sh->state);
+ +
+ +      /*
+ +       * handle_stripe should not continue handle the batched stripe, only
+ +       * the head of batch list or lone stripe can continue. Otherwise we
+ +       * could see break_stripe_batch_list warns about the STRIPE_ACTIVE
+ +       * is set for the batched stripe.
+ +       */
+ +      if (clear_batch_ready(sh))
+ +              return;
+ +
         if (test_and_set_bit_lock(STRIPE_ACTIVE, &sh->state)) {
                 /* already being handled, ensure it gets handled
                  * again when current action finishes */
@@@ -4725,6 -4689,11 +4725,6 @@@
                 return;
         }
   
- -      if (clear_batch_ready(sh) ) {
- -              clear_bit_unlock(STRIPE_ACTIVE, &sh->state);
- -              return;
- -      }
- -
         if (test_and_clear_bit(STRIPE_BATCH_ERR, &sh->state))
                 break_stripe_batch_list(sh, 0);
   
@@@ -4873,7 -4842,7 +4873,7 @@@
          * or to load a block that is being partially written.
          */
         if (s.to_read || s.non_overwrite
- -          || (conf->level == 6 && s.to_write && s.failed)
+ +          || (s.to_write && s.failed)
             || (s.syncing && (s.uptodate + s.compute < disks))
             || s.replacing
             || s.expanding)
@@@ -4958,7 -4927,7 +4958,7 @@@
         if ((s.syncing || s.replacing) && s.locked == 0 &&
             !test_bit(STRIPE_COMPUTE_RUN, &sh->state) &&
             test_bit(STRIPE_INSYNC, &sh->state)) {
- -              md_done_sync(conf->mddev, STRIPE_SECTORS, 1);
+ +              md_done_sync(conf->mddev, RAID5_STRIPE_SECTORS(conf), 1);
                 clear_bit(STRIPE_SYNCING, &sh->state);
                 if (test_and_clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags))
                         wake_up(&conf->wait_for_overlap);
@@@ -4977,11 -4946,14 +4977,11 @@@
                                 if (!test_bit(R5_ReWrite, &dev->flags)) {
                                         set_bit(R5_Wantwrite, &dev->flags);
                                         set_bit(R5_ReWrite, &dev->flags);
- -                                      set_bit(R5_LOCKED, &dev->flags);
- -                                      s.locked++;
- -                              } else {
+ +                              } else
                                         /* let's read it back */
                                         set_bit(R5_Wantread, &dev->flags);
- -                                      set_bit(R5_LOCKED, &dev->flags);
- -                                      s.locked++;
- -                              }
+ +                              set_bit(R5_LOCKED, &dev->flags);
+ +                              s.locked++;
                         }
                 }
   
@@@ -5023,7 -4995,7 +5023,7 @@@
                 clear_bit(STRIPE_EXPAND_READY, &sh->state);
                 atomic_dec(&conf->reshape_stripes);
                 wake_up(&conf->wait_for_overlap);
- -              md_done_sync(conf->mddev, STRIPE_SECTORS, 1);
+ +              md_done_sync(conf->mddev, RAID5_STRIPE_SECTORS(conf), 1);
         }
   
         if (s.expanding && s.locked == 0 &&
@@@ -5053,14 -5025,14 +5053,14 @@@ finish
                                 /* We own a safe reference to the rdev */
                                 rdev = conf->disks[i].rdev;
                                 if (!rdev_set_badblocks(rdev, sh->sector,
- -                                                      STRIPE_SECTORS, 0))
+ +                                                      RAID5_STRIPE_SECTORS(conf), 0))
                                         md_error(conf->mddev, rdev);
                                 rdev_dec_pending(rdev, conf->mddev);
                         }
                         if (test_and_clear_bit(R5_MadeGood, &dev->flags)) {
                                 rdev = conf->disks[i].rdev;
                                 rdev_clear_badblocks(rdev, sh->sector,
- -                                                   STRIPE_SECTORS, 0);
+ +                                                   RAID5_STRIPE_SECTORS(conf), 0);
                                 rdev_dec_pending(rdev, conf->mddev);
                         }
                         if (test_and_clear_bit(R5_MadeGoodRepl, &dev->flags)) {
@@@ -5069,7 -5041,7 +5069,7 @@@
                                         /* rdev have been moved down */
                                         rdev = conf->disks[i].rdev;
                                 rdev_clear_badblocks(rdev, sh->sector,
- -                                                   STRIPE_SECTORS, 0);
+ +                                                   RAID5_STRIPE_SECTORS(conf), 0);
                                 rdev_dec_pending(rdev, conf->mddev);
                         }
                 }
@@@ -5127,6 -5099,28 +5127,6 @@@ static void activate_bit_delay(struct r
         }
   }
   
- -static int raid5_congested(struct mddev *mddev, int bits)
- -{
- -      struct r5conf *conf = mddev->private;
- -
- -      /* No difference between reads and writes.  Just check
- -       * how busy the stripe_cache is
- -       */
- -
- -      if (test_bit(R5_INACTIVE_BLOCKED, &conf->cache_state))
- -              return 1;
- -
- -      /* Also checks whether there is pressure on r5cache log space */
- -      if (test_bit(R5C_LOG_TIGHT, &conf->cache_state))
- -              return 1;
- -      if (conf->quiesce)
- -              return 1;
- -      if (atomic_read(&conf->empty_inactive_list_nr))
- -              return 1;
- -
- -      return 0;
- -}
- -
   static int in_chunk_boundary(struct mddev *mddev, struct bio *bio)
   {
         struct r5conf *conf = mddev->private;
@@@ -5295,7 -5289,7 +5295,7 @@@ static int raid5_read_one_chunk(struct 
                         trace_block_bio_remap(align_bi->bi_disk->queue,
                                               align_bi, disk_devt(mddev->gendisk),
                                               raid_bio->bi_iter.bi_sector);
- -              generic_make_request(align_bi);
+ +              submit_bio_noacct(align_bi);
                 return 1;
         } else {
                 rcu_read_unlock();
@@@ -5315,7 -5309,7 +5315,7 @@@ static struct bio *chunk_aligned_read(s
                 struct r5conf *conf = mddev->private;
                 split = bio_split(raid_bio, sectors, GFP_NOIO, &conf->bio_split);
                 bio_chain(split, raid_bio);
- -              generic_make_request(raid_bio);
+ +              submit_bio_noacct(raid_bio);
                 raid_bio = split;
         }
   
@@@ -5511,7 -5505,7 +5511,7 @@@ static void make_discard_request(struc
                 /* Skip discard while reshape is happening */
                 return;
   
- -      logical_sector = bi->bi_iter.bi_sector & ~((sector_t)STRIPE_SECTORS-1);
+ +      logical_sector = bi->bi_iter.bi_sector & ~((sector_t)RAID5_STRIPE_SECTORS(conf)-1);
         last_sector = bio_end_sector(bi);
   
         bi->bi_next = NULL;
@@@ -5526,7 -5520,7 +5526,7 @@@
         last_sector *= conf->chunk_sectors;
   
         for (; logical_sector < last_sector;
- -           logical_sector += STRIPE_SECTORS) {
+ +           logical_sector += RAID5_STRIPE_SECTORS(conf)) {
                 DEFINE_WAIT(w);
                 int d;
         again:
@@@ -5571,7 -5565,7 +5571,7 @@@
                              d++)
                                 md_bitmap_startwrite(mddev->bitmap,
                                                      sh->sector,
- -                                                   STRIPE_SECTORS,
+ +                                                   RAID5_STRIPE_SECTORS(conf),
                                                      0);
                         sh->bm_seq = conf->seq_flush + 1;
                         set_bit(STRIPE_BIT_DELAY, &sh->state);
@@@ -5636,12 -5630,12 +5636,12 @@@ static bool raid5_make_request(struct m
                 return true;
         }
   
- -      logical_sector = bi->bi_iter.bi_sector & ~((sector_t)STRIPE_SECTORS-1);
+ +      logical_sector = bi->bi_iter.bi_sector & ~((sector_t)RAID5_STRIPE_SECTORS(conf)-1);
         last_sector = bio_end_sector(bi);
         bi->bi_next = NULL;
   
         prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE);
- -      for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) {
+ +      for (; logical_sector < last_sector; logical_sector += RAID5_STRIPE_SECTORS(conf)) {
                 int previous;
                 int seq;
   
@@@ -5739,7 -5733,8 +5739,7 @@@
                                 do_flush = false;
                         }
   
- -                      if (!sh->batch_head || sh == sh->batch_head)
- -                              set_bit(STRIPE_HANDLE, &sh->state);
+ +                      set_bit(STRIPE_HANDLE, &sh->state);
                         clear_bit(STRIPE_DELAYED, &sh->state);
                         if ((!sh->batch_head || sh == sh->batch_head) &&
                             (bi->bi_opf & REQ_SYNC) &&
@@@ -5804,7 -5799,7 +5804,7 @@@ static sector_t reshape_request(struct 
                 sector_div(sector_nr, new_data_disks);
                 if (sector_nr) {
                         mddev->curr_resync_completed = sector_nr;
- -                      sysfs_notify(&mddev->kobj, NULL, "sync_completed");
+ +                      sysfs_notify_dirent_safe(mddev->sysfs_completed);
                         *skipped = 1;
                         retn = sector_nr;
                         goto finish;
@@@ -5918,11 -5913,11 +5918,11 @@@
                 conf->reshape_safe = mddev->reshape_position;
                 spin_unlock_irq(&conf->device_lock);
                 wake_up(&conf->wait_for_overlap);
- -              sysfs_notify(&mddev->kobj, NULL, "sync_completed");
+ +              sysfs_notify_dirent_safe(mddev->sysfs_completed);
         }
   
         INIT_LIST_HEAD(&stripes);
- -      for (i = 0; i < reshape_sectors; i += STRIPE_SECTORS) {
+ +      for (i = 0; i < reshape_sectors; i += RAID5_STRIPE_SECTORS(conf)) {
                 int j;
                 int skipped_disk = 0;
                 sh = raid5_get_active_stripe(conf, stripe_addr+i, 0, 0, 1);
@@@ -5943,7 -5938,7 +5943,7 @@@
                                 skipped_disk = 1;
                                 continue;
                         }
- -                      memset(page_address(sh->dev[j].page), 0, STRIPE_SIZE);
+ +                      memset(page_address(sh->dev[j].page), 0, RAID5_STRIPE_SIZE(conf));
                         set_bit(R5_Expanded, &sh->dev[j].flags);
                         set_bit(R5_UPTODATE, &sh->dev[j].flags);
                 }
@@@ -5978,7 -5973,7 +5978,7 @@@
                 set_bit(STRIPE_EXPAND_SOURCE, &sh->state);
                 set_bit(STRIPE_HANDLE, &sh->state);
                 raid5_release_stripe(sh);
- -              first_sector += STRIPE_SECTORS;
+ +              first_sector += RAID5_STRIPE_SECTORS(conf);
         }
         /* Now that the sources are clearly marked, we can release
          * the destination stripes
@@@ -6025,7 -6020,7 +6025,7 @@@ finish
                 conf->reshape_safe = mddev->reshape_position;
                 spin_unlock_irq(&conf->device_lock);
                 wake_up(&conf->wait_for_overlap);
- -              sysfs_notify(&mddev->kobj, NULL, "sync_completed");
+ +              sysfs_notify_dirent_safe(mddev->sysfs_completed);
         }
   ret:
         return retn;
@@@ -6084,12 -6079,11 +6084,12 @@@ static inline sector_t raid5_sync_reque
         if (!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) &&
             !conf->fullsync &&
             !md_bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, 1) &&
- -          sync_blocks >= STRIPE_SECTORS) {
+ +          sync_blocks >= RAID5_STRIPE_SECTORS(conf)) {
                 /* we can skip this block, and probably more */
- -              sync_blocks /= STRIPE_SECTORS;
+ +              do_div(sync_blocks, RAID5_STRIPE_SECTORS(conf));
                 *skipped = 1;
- -              return sync_blocks * STRIPE_SECTORS; /* keep things rounded to whole stripes */
+ +              /* keep things rounded to whole stripes */
+ +              return sync_blocks * RAID5_STRIPE_SECTORS(conf);
         }
   
         md_bitmap_cond_end_sync(mddev->bitmap, sector_nr, false);
@@@ -6122,7 -6116,7 +6122,7 @@@
   
         raid5_release_stripe(sh);
   
- -      return STRIPE_SECTORS;
+ +      return RAID5_STRIPE_SECTORS(conf);
   }
   
   static int  retry_aligned_read(struct r5conf *conf, struct bio *raid_bio,
@@@ -6145,14 -6139,14 +6145,14 @@@
         int handled = 0;
   
         logical_sector = raid_bio->bi_iter.bi_sector &
- -              ~((sector_t)STRIPE_SECTORS-1);
+ +              ~((sector_t)RAID5_STRIPE_SECTORS(conf)-1);
         sector = raid5_compute_sector(conf, logical_sector,
                                       0, &dd_idx, NULL);
         last_sector = bio_end_sector(raid_bio);
   
         for (; logical_sector < last_sector;
- -           logical_sector += STRIPE_SECTORS,
- -                   sector += STRIPE_SECTORS,
+ +           logical_sector += RAID5_STRIPE_SECTORS(conf),
+ +                   sector += RAID5_STRIPE_SECTORS(conf),
                      scnt++) {
   
                 if (scnt < offset)
@@@ -6485,77 -6479,6 +6485,77 @@@ raid5_rmw_level = __ATTR(rmw_level, S_I
                          raid5_show_rmw_level,
                          raid5_store_rmw_level);
   
+ +static ssize_t
+ +raid5_show_stripe_size(struct mddev  *mddev, char *page)
+ +{
+ +      struct r5conf *conf;
+ +      int ret = 0;
+ +
+ +      spin_lock(&mddev->lock);
+ +      conf = mddev->private;
+ +      if (conf)
+ +              ret = sprintf(page, "%lu\n", RAID5_STRIPE_SIZE(conf));
+ +      spin_unlock(&mddev->lock);
+ +      return ret;
+ +}
+ +
+ +#if PAGE_SIZE != DEFAULT_STRIPE_SIZE
+ +static ssize_t
+ +raid5_store_stripe_size(struct mddev  *mddev, const char *page, size_t len)
+ +{
+ +      struct r5conf *conf;
+ +      unsigned long new;
+ +      int err;
+ +
+ +      if (len >= PAGE_SIZE)
+ +              return -EINVAL;
+ +      if (kstrtoul(page, 10, &new))
+ +              return -EINVAL;
+ +
+ +      /*
+ +       * The value should not be bigger than PAGE_SIZE. It requires to
+ +       * be multiple of DEFAULT_STRIPE_SIZE.
+ +       */
+ +      if (new % DEFAULT_STRIPE_SIZE != 0 || new > PAGE_SIZE || new == 0)
+ +              return -EINVAL;
+ +
+ +      err = mddev_lock(mddev);
+ +      if (err)
+ +              return err;
+ +
+ +      conf = mddev->private;
+ +      if (!conf) {
+ +              err = -ENODEV;
+ +              goto out_unlock;
+ +      }
+ +
+ +      if (new == conf->stripe_size)
+ +              goto out_unlock;
+ +
+ +      pr_debug("md/raid: change stripe_size from %lu to %lu\n",
+ +                      conf->stripe_size, new);
+ +
+ +      mddev_suspend(mddev);
+ +      conf->stripe_size = new;
+ +      conf->stripe_shift = ilog2(new) - 9;
+ +      conf->stripe_sectors = new >> 9;
+ +      mddev_resume(mddev);
+ +
+ +out_unlock:
+ +      mddev_unlock(mddev);
+ +      return err ?: len;
+ +}
+ +
+ +static struct md_sysfs_entry
+ +raid5_stripe_size = __ATTR(stripe_size, 0644,
+ +                       raid5_show_stripe_size,
+ +                       raid5_store_stripe_size);
+ +#else
+ +static struct md_sysfs_entry
+ +raid5_stripe_size = __ATTR(stripe_size, 0444,
+ +                       raid5_show_stripe_size,
+ +                       NULL);
+ +#endif
   
   static ssize_t
   raid5_show_preread_threshold(struct mddev *mddev, char *page)
@@@ -6744,7 -6667,6 +6744,7 @@@ static struct attribute *raid5_attrs[] 
         &raid5_group_thread_cnt.attr,
         &raid5_skip_copy.attr,
         &raid5_rmw_level.attr,
+ +      &raid5_stripe_size.attr,
         &r5c_journal_mode.attr,
         &ppl_write_hint.attr,
         NULL,
@@@ -6844,7 -6766,7 +6844,7 @@@ static int alloc_scratch_buffer(struct 
                                conf->previous_raid_disks),
                            max(conf->chunk_sectors,
                                conf->prev_chunk_sectors)
- -                         / STRIPE_SECTORS)) {
+ +                         / RAID5_STRIPE_SECTORS(conf))) {
                 free_scratch_buffer(conf, percpu);
                 return -ENOMEM;
         }
@@@ -6996,12 -6918,6 +6996,12 @@@ static struct r5conf *setup_conf(struc
         conf = kzalloc(sizeof(struct r5conf), GFP_KERNEL);
         if (conf == NULL)
                 goto abort;
+ +
+ +#if PAGE_SIZE != DEFAULT_STRIPE_SIZE
+ +      conf->stripe_size = DEFAULT_STRIPE_SIZE;
+ +      conf->stripe_shift = ilog2(DEFAULT_STRIPE_SIZE) - 9;
+ +      conf->stripe_sectors = DEFAULT_STRIPE_SIZE >> 9;
+ +#endif
         INIT_LIST_HEAD(&conf->free_list);
         INIT_LIST_HEAD(&conf->pending_list);
         conf->pending_data = kcalloc(PENDING_IO_MAX,
@@@ -7019,7 -6935,7 +7019,7 @@@
         } else
                 goto abort;
         spin_lock_init(&conf->device_lock);
-       seqcount_init(&conf->gen_lock);
+       seqcount_spinlock_init(&conf->gen_lock, &conf->device_lock);
         mutex_init(&conf->cache_size_mutex);
         init_waitqueue_head(&conf->wait_for_quiescent);
         init_waitqueue_head(&conf->wait_for_stripe);
@@@ -7153,8 -7069,8 +7153,8 @@@
         conf->min_nr_stripes = NR_STRIPES;
         if (mddev->reshape_position != MaxSector) {
                 int stripes = max_t(int,
- -                      ((mddev->chunk_sectors << 9) / STRIPE_SIZE) * 4,
- -                      ((mddev->new_chunk_sectors << 9) / STRIPE_SIZE) * 4);
+ +                      ((mddev->chunk_sectors << 9) / RAID5_STRIPE_SIZE(conf)) * 4,
+ +                      ((mddev->new_chunk_sectors << 9) / RAID5_STRIPE_SIZE(conf)) * 4);
                 conf->min_nr_stripes = max(NR_STRIPES, stripes);
                 if (conf->min_nr_stripes != NR_STRIPES)
                         pr_info("md/raid:%s: force stripe size %d for reshape\n",
@@@ -7885,14 -7801,14 +7885,14 @@@ static int check_stripe_cache(struct md
          * stripe_heads first.
          */
         struct r5conf *conf = mddev->private;
- -      if (((mddev->chunk_sectors << 9) / STRIPE_SIZE) * 4
+ +      if (((mddev->chunk_sectors << 9) / RAID5_STRIPE_SIZE(conf)) * 4
             > conf->min_nr_stripes ||
- -          ((mddev->new_chunk_sectors << 9) / STRIPE_SIZE) * 4
+ +          ((mddev->new_chunk_sectors << 9) / RAID5_STRIPE_SIZE(conf)) * 4
             > conf->min_nr_stripes) {
                 pr_warn("md/raid:%s: reshape: not enough stripes.  Needed %lu\n",
                         mdname(mddev),
                         ((max(mddev->chunk_sectors, mddev->new_chunk_sectors) << 9)
- -                       / STRIPE_SIZE)*4);
+ +                       / RAID5_STRIPE_SIZE(conf))*4);
                 return 0;
         }
         return 1;
@@@ -8028,8 -7944,8 +8028,8 @@@ static int raid5_start_reshape(struct m
                                         else
                                                 rdev->recovery_offset = 0;
   
- -                                      if (sysfs_link_rdev(mddev, rdev))
- -                                              /* Failure here is OK */;
+ +                                      /* Failure here is OK */
+ +                                      sysfs_link_rdev(mddev, rdev);
                                 }
                         } else if (rdev->raid_disk >= conf->previous_raid_disks
                                    && !test_bit(Faulty, &rdev->flags)) {
@@@ -8224,7 -8140,7 +8224,7 @@@ static void *raid5_takeover_raid1(struc
         while (chunksect && (mddev->array_sectors & (chunksect-1)))
                 chunksect >>= 1;
   
- -      if ((chunksect<<9) < STRIPE_SIZE)
+ +      if ((chunksect<<9) < RAID5_STRIPE_SIZE((struct r5conf *)mddev->private))
                 /* array size does not allow a suitable chunk size */
                 return ERR_PTR(-EINVAL);
   
@@@ -8511,6 -8427,7 +8511,6 @@@ static struct md_personality raid6_pers
         .finish_reshape = raid5_finish_reshape,
         .quiesce        = raid5_quiesce,
         .takeover       = raid6_takeover,
- -      .congested      = raid5_congested,
         .change_consistency_policy = raid5_change_consistency_policy,
   };
   static struct md_personality raid5_personality =
@@@ -8535,6 -8452,7 +8535,6 @@@
         .finish_reshape = raid5_finish_reshape,
         .quiesce        = raid5_quiesce,
         .takeover       = raid5_takeover,
- -      .congested      = raid5_congested,
         .change_consistency_policy = raid5_change_consistency_policy,
   };
   
@@@ -8560,6 -8478,7 +8560,6 @@@ static struct md_personality raid4_pers
         .finish_reshape = raid5_finish_reshape,
         .quiesce        = raid5_quiesce,
         .takeover       = raid4_takeover,
- -      .congested      = raid5_congested,
         .change_consistency_policy = raid5_change_consistency_policy,
   };
   
diff --combined drivers/md/raid5.h

index 7fb3b26a181a9f9442fcbfdfa79bd050cc04192e,a2c9e9e9f5ac8346c07a7d72ddf3960fa82e0238..16fc29472f5c0a6ef7b25005cee2dd566ed258d9
--- 1/drivers/md/raid5.h
--- 2/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@@ -472,20 -472,32 +472,20 @@@ struct disk_info 
    */
   
   #define NR_STRIPES            256
+ +#define DEFAULT_STRIPE_SIZE   4096
+ +
+ +#if PAGE_SIZE == DEFAULT_STRIPE_SIZE
   #define STRIPE_SIZE           PAGE_SIZE
   #define STRIPE_SHIFT          (PAGE_SHIFT - 9)
   #define STRIPE_SECTORS                (STRIPE_SIZE>>9)
+ +#endif
+ +
   #define       IO_THRESHOLD            1
   #define BYPASS_THRESHOLD      1
   #define NR_HASH                       (PAGE_SIZE / sizeof(struct hlist_head))
   #define HASH_MASK             (NR_HASH - 1)
   #define MAX_STRIPE_BATCH      8
   
- -/* bio's attached to a stripe+device for I/O are linked together in bi_sector
- - * order without overlap.  There may be several bio's per stripe+device, and
- - * a bio could span several devices.
- - * When walking this list for a particular stripe+device, we must never proceed
- - * beyond a bio that extends past this device, as the next bio might no longer
- - * be valid.
- - * This function is used to determine the 'next' bio in the list, given the
- - * sector of the current stripe+device
- - */
- -static inline struct bio *r5_next_bio(struct bio *bio, sector_t sector)
- -{
- -      if (bio_end_sector(bio) < sector + STRIPE_SECTORS)
- -              return bio->bi_next;
- -      else
- -              return NULL;
- -}
- -
   /* NOTE NR_STRIPE_HASH_LOCKS must remain below 64.
    * This is because we sometimes take all the spinlocks
    * and creating that much locking depth can cause
@@@ -562,11 -574,6 +562,11 @@@ struct r5conf 
         int                     raid_disks;
         int                     max_nr_stripes;
         int                     min_nr_stripes;
+ +#if PAGE_SIZE != DEFAULT_STRIPE_SIZE
+ +      unsigned long   stripe_size;
+ +      unsigned int    stripe_shift;
+ +      unsigned long   stripe_sectors;
+ +#endif
   
         /* reshape_progress is the leading edge of a 'reshape'
          * It has value MaxSector when no reshape is happening
@@@ -582,7 -589,7 +582,7 @@@
         int                     prev_chunk_sectors;
         int                     prev_algo;
         short                   generation; /* increments with every reshape */
-       seqcount_t              gen_lock;       /* lock against generation changes */
+       seqcount_spinlock_t     gen_lock;       /* lock against generation changes */
         unsigned long           reshape_checkpoint; /* Time we last updated
                                                      * metadata */
         long long               min_offset_diff; /* minimum difference between
@@@ -683,32 -690,6 +683,32 @@@
         struct r5pending_data   *next_pending_data;
   };
   
+ +#if PAGE_SIZE == DEFAULT_STRIPE_SIZE
+ +#define RAID5_STRIPE_SIZE(conf)       STRIPE_SIZE
+ +#define RAID5_STRIPE_SHIFT(conf)      STRIPE_SHIFT
+ +#define RAID5_STRIPE_SECTORS(conf)    STRIPE_SECTORS
+ +#else
+ +#define RAID5_STRIPE_SIZE(conf)       ((conf)->stripe_size)
+ +#define RAID5_STRIPE_SHIFT(conf)      ((conf)->stripe_shift)
+ +#define RAID5_STRIPE_SECTORS(conf)    ((conf)->stripe_sectors)
+ +#endif
+ +
+ +/* bio's attached to a stripe+device for I/O are linked together in bi_sector
+ + * order without overlap.  There may be several bio's per stripe+device, and
+ + * a bio could span several devices.
+ + * When walking this list for a particular stripe+device, we must never proceed
+ + * beyond a bio that extends past this device, as the next bio might no longer
+ + * be valid.
+ + * This function is used to determine the 'next' bio in the list, given the
+ + * sector of the current stripe+device
+ + */
+ +static inline struct bio *r5_next_bio(struct r5conf *conf, struct bio *bio, sector_t sector)
+ +{
+ +      if (bio_end_sector(bio) < sector + RAID5_STRIPE_SECTORS(conf))
+ +              return bio->bi_next;
+ +      else
+ +              return NULL;
+ +}
   
   /*
    * Our supported algorithms
diff --combined fs/userfaultfd.c

index 6e264dded46e4f51928a06d3c3e96d9358def764,26e8b23594fb3d14a078e8f38b47a645229a7804..0e4a3837da52e64134f8de280690485b8dbcb4a1
--- 1/fs/userfaultfd.c
--- 2/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@@ -61,7 -61,7 +61,7 @@@ struct userfaultfd_ctx 
         /* waitqueue head for events */
         wait_queue_head_t event_wqh;
         /* a refile sequence protected by fault_pending_wqh lock */
-       struct seqcount refile_seq;
+       seqcount_spinlock_t refile_seq;
         /* pseudo fd refcounting */
         refcount_t refcount;
         /* userfaultfd syscall flags */
@@@ -339,6 -339,7 +339,6 @@@ out
         return ret;
   }
   
- -/* Should pair with userfaultfd_signal_pending() */
   static inline long userfaultfd_get_blocking_state(unsigned int flags)
   {
         if (flags & FAULT_FLAG_INTERRUPTIBLE)
@@@ -350,6 -351,18 +350,6 @@@
         return TASK_UNINTERRUPTIBLE;
   }
   
- -/* Should pair with userfaultfd_get_blocking_state() */
- -static inline bool userfaultfd_signal_pending(unsigned int flags)
- -{
- -      if (flags & FAULT_FLAG_INTERRUPTIBLE)
- -              return signal_pending(current);
- -
- -      if (flags & FAULT_FLAG_KILLABLE)
- -              return fatal_signal_pending(current);
- -
- -      return false;
- -}
- -
   /*
    * The locking rules involved in returning VM_FAULT_RETRY depending on
    * FAULT_FLAG_ALLOW_RETRY, FAULT_FLAG_RETRY_NOWAIT and
@@@ -503,9 -516,33 +503,9 @@@ vm_fault_t handle_userfault(struct vm_f
                                                        vmf->flags, reason);
         mmap_read_unlock(mm);
   
- -      if (likely(must_wait && !READ_ONCE(ctx->released) &&
- -                 !userfaultfd_signal_pending(vmf->flags))) {
+ +      if (likely(must_wait && !READ_ONCE(ctx->released))) {
                 wake_up_poll(&ctx->fd_wqh, EPOLLIN);
                 schedule();
- -              ret |= VM_FAULT_MAJOR;
- -
- -              /*
- -               * False wakeups can orginate even from rwsem before
- -               * up_read() however userfaults will wait either for a
- -               * targeted wakeup on the specific uwq waitqueue from
- -               * wake_userfault() or for signals or for uffd
- -               * release.
- -               */
- -              while (!READ_ONCE(uwq.waken)) {
- -                      /*
- -                       * This needs the full smp_store_mb()
- -                       * guarantee as the state write must be
- -                       * visible to other CPUs before reading
- -                       * uwq.waken from other CPUs.
- -                       */
- -                      set_current_state(blocking_state);
- -                      if (READ_ONCE(uwq.waken) ||
- -                          READ_ONCE(ctx->released) ||
- -                          userfaultfd_signal_pending(vmf->flags))
- -                              break;
- -                      schedule();
- -              }
         }
   
         __set_current_state(TASK_RUNNING);
@@@ -1961,7 -1998,7 +1961,7 @@@ static void init_once_userfaultfd_ctx(v
         init_waitqueue_head(&ctx->fault_wqh);
         init_waitqueue_head(&ctx->event_wqh);
         init_waitqueue_head(&ctx->fd_wqh);
-       seqcount_init(&ctx->refile_seq);
+       seqcount_spinlock_init(&ctx->refile_seq, &ctx->fault_pending_wqh.lock);
   }
   
   SYSCALL_DEFINE1(userfaultfd, int, flags)
diff --combined include/linux/sched.h

index 52bcc9f48e176e46c15d1a840d2feaf3ea13d78a,7c7a9499d7bc7151f581bfe9983790da23039bfb..53ddc02e2e7922a7c505fed554437c0b5e98beb3
--- 1/include/linux/sched.h
--- 2/include/linux/sched.h
+++ b/include/linux/sched.h
@@@ -18,7 -18,6 +18,7 @@@
   #include <linux/mutex.h>
   #include <linux/plist.h>
   #include <linux/hrtimer.h>
+ +#include <linux/irqflags.h>
   #include <linux/seccomp.h>
   #include <linux/nodemask.h>
   #include <linux/rcupdate.h>
@@@ -32,6 -31,7 +32,7 @@@
   #include <linux/task_io_accounting.h>
   #include <linux/posix-timers.h>
   #include <linux/rseq.h>
+ #include <linux/seqlock.h>
   #include <linux/kcsan.h>
   
   /* task_struct member predeclarations (sorted alphabetically): */
@@@ -155,24 -155,24 +156,24 @@@ struct task_group
    *
    *   for (;;) {
    *    set_current_state(TASK_UNINTERRUPTIBLE);
- - *    if (!need_sleep)
- - *            break;
+ + *    if (CONDITION)
+ + *       break;
    *
    *    schedule();
    *   }
    *   __set_current_state(TASK_RUNNING);
    *
    * If the caller does not need such serialisation (because, for instance, the
- - * condition test and condition change and wakeup are under the same lock) then
+ + * CONDITION test and condition change and wakeup are under the same lock) then
    * use __set_current_state().
    *
    * The above is typically ordered against the wakeup, which does:
    *
- - *   need_sleep = false;
+ + *   CONDITION = 1;
    *   wake_up_state(p, TASK_UNINTERRUPTIBLE);
    *
- - * where wake_up_state() executes a full memory barrier before accessing the
- - * task state.
+ + * where wake_up_state()/try_to_wake_up() executes a full memory barrier before
+ + * accessing p->state.
    *
    * Wakeup will do: if (@state & p->state) p->state = TASK_RUNNING, that is,
    * once it observes the TASK_UNINTERRUPTIBLE store the waking CPU can issue a
@@@ -375,7 -375,7 +376,7 @@@ struct util_est 
    * For cfs_rq, they are the aggregated values of all runnable and blocked
    * sched_entities.
    *
- - * The load/runnable/util_avg doesn't direcly factor frequency scaling and CPU
+ + * The load/runnable/util_avg doesn't directly factor frequency scaling and CPU
    * capacity scaling. The scaling is done through the rq_clock_pelt that is used
    * for computing those signals (see update_rq_clock_pelt())
    *
@@@ -687,15 -687,9 +688,15 @@@ struct task_struct 
         struct sched_dl_entity          dl;
   
   #ifdef CONFIG_UCLAMP_TASK
- -      /* Clamp values requested for a scheduling entity */
+ +      /*
+ +       * Clamp values requested for a scheduling entity.
+ +       * Must be updated with task_rq_lock() held.
+ +       */
         struct uclamp_se                uclamp_req[UCLAMP_CNT];
- -      /* Effective clamp values used for a scheduling entity */
+ +      /*
+ +       * Effective clamp values used for a scheduling entity.
+ +       * Must be updated with task_rq_lock() held.
+ +       */
         struct uclamp_se                uclamp[UCLAMP_CNT];
   #endif
   
@@@ -987,9 -981,17 +988,9 @@@
   #endif
   
   #ifdef CONFIG_TRACE_IRQFLAGS
- -      unsigned int                    irq_events;
+ +      struct irqtrace_events          irqtrace;
         unsigned int                    hardirq_threaded;
- -      unsigned long                   hardirq_enable_ip;
- -      unsigned long                   hardirq_disable_ip;
- -      unsigned int                    hardirq_enable_event;
- -      unsigned int                    hardirq_disable_event;
         u64                             hardirq_chain_key;
- -      unsigned long                   softirq_disable_ip;
- -      unsigned long                   softirq_enable_ip;
- -      unsigned int                    softirq_disable_event;
- -      unsigned int                    softirq_enable_event;
         int                             softirqs_enabled;
         int                             softirq_context;
         int                             irq_config;
@@@ -1049,7 -1051,7 +1050,7 @@@
         /* Protected by ->alloc_lock: */
         nodemask_t                      mems_allowed;
         /* Seqence number to catch updates: */
-       seqcount_t                      mems_allowed_seq;
+       seqcount_spinlock_t             mems_allowed_seq;
         int                             cpuset_mem_spread_rotor;
         int                             cpuset_slab_spread_rotor;
   #endif
@@@ -1190,12 -1192,8 +1191,12 @@@
   #ifdef CONFIG_KASAN
         unsigned int                    kasan_depth;
   #endif
+ +
   #ifdef CONFIG_KCSAN
         struct kcsan_ctx                kcsan_ctx;
+ +#ifdef CONFIG_TRACE_IRQFLAGS
+ +      struct irqtrace_events          kcsan_save_irqtrace;
+ +#endif
   #endif
   
   #ifdef CONFIG_FUNCTION_GRAPH_TRACER
@@@ -1507,6 -1505,7 +1508,6 @@@ extern struct pid *cad_pid
   #define PF_KTHREAD            0x00200000      /* I am a kernel thread */
   #define PF_RANDOMIZE          0x00400000      /* Randomize virtual address space */
   #define PF_SWAPWRITE          0x00800000      /* Allowed to write to swap */
- -#define PF_UMH                        0x02000000      /* I'm an Usermodehelper process */
   #define PF_NO_SETAFFINITY     0x04000000      /* Userland is not allowed to meddle with cpus_mask */
   #define PF_MCE_EARLY          0x08000000      /* Early kill for mce process policy */
   #define PF_MEMALLOC_NOCMA     0x10000000      /* All allocation request will have _GFP_MOVABLE cleared */
@@@ -1648,9 -1647,6 +1649,9 @@@ extern int idle_cpu(int cpu)
   extern int available_idle_cpu(int cpu);
   extern int sched_setscheduler(struct task_struct *, int, const struct sched_param *);
   extern int sched_setscheduler_nocheck(struct task_struct *, int, const struct sched_param *);
+ +extern void sched_set_fifo(struct task_struct *p);
+ +extern void sched_set_fifo_low(struct task_struct *p);
+ +extern void sched_set_normal(struct task_struct *p, int nice);
   extern int sched_setattr(struct task_struct *, const struct sched_attr *);
   extern int sched_setattr_nocheck(struct task_struct *, const struct sched_attr *);
   extern struct task_struct *idle_task(int cpu);
@@@ -2018,6 -2014,14 +2019,6 @@@ static inline void rseq_execve(struct t
   
   #endif
   
- -void __exit_umh(struct task_struct *tsk);
- -
- -static inline void exit_umh(struct task_struct *tsk)
- -{
- -      if (unlikely(tsk->flags & PF_UMH))
- -              __exit_umh(tsk);
- -}
- -
   #ifdef CONFIG_DEBUG_RSEQ
   
   void rseq_syscall(struct pt_regs *regs);
@@@ -2039,7 -2043,6 +2040,7 @@@ const struct sched_avg *sched_trace_rq_
   const struct sched_avg *sched_trace_rq_avg_irq(struct rq *rq);
   
   int sched_trace_rq_cpu(struct rq *rq);
+ +int sched_trace_rq_nr_running(struct rq *rq);
   
   const struct cpumask *sched_trace_rd_span(struct root_domain *rd);
   
diff --combined include/net/netfilter/nf_conntrack.h

index c7bfddfc65b02e4eaf22aac0409e56404455db69,ea4e2010b2465bd58511904dfdf63717df64de64..439379ca9fface9171d47e5bc6361e34e9cf4360
--- 1/include/net/netfilter/nf_conntrack.h
--- 2/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@@ -279,18 -279,6 +279,18 @@@ static inline bool nf_ct_should_gc(cons
                !nf_ct_is_dying(ct);
   }
   
+ +#define       NF_CT_DAY       (86400 * HZ)
+ +
+ +/* Set an arbitrary timeout large enough not to ever expire, this save
+ + * us a check for the IPS_OFFLOAD_BIT from the packet path via
+ + * nf_ct_is_expired().
+ + */
+ +static inline void nf_ct_offload_timeout(struct nf_conn *ct)
+ +{
+ +      if (nf_ct_expires(ct) < NF_CT_DAY / 2)
+ +              ct->timeout = nfct_time_stamp + NF_CT_DAY;
+ +}
+ +
   struct kernel_param;
   
   int nf_conntrack_set_hashsize(const char *val, const struct kernel_param *kp);
@@@ -298,7 -286,7 +298,7 @@@ int nf_conntrack_hash_resize(unsigned i
   
   extern struct hlist_nulls_head *nf_conntrack_hash;
   extern unsigned int nf_conntrack_htable_size;
- extern seqcount_t nf_conntrack_generation;
+ extern seqcount_spinlock_t nf_conntrack_generation;
   extern unsigned int nf_conntrack_max;
   
   /* must be called with rcu read lock held */
diff --combined init/init_task.c

index a3eb3847e1f48d5c8c9df2f6e50520e35af4b9c8,94fe3ba1bb600cf40d07ea7af04f327fb24a034b..89024e8c4e95e56eb6abf6526c8bf7c52db68b56
--- 1/init/init_task.c
--- 2/init/init_task.c
+++ b/init/init_task.c
@@@ -154,7 -154,8 +154,8 @@@ struct task_struct init_tas
         .trc_holdout_list = LIST_HEAD_INIT(init_task.trc_holdout_list),
   #endif
   #ifdef CONFIG_CPUSETS
-       .mems_allowed_seq = SEQCNT_ZERO(init_task.mems_allowed_seq),
+       .mems_allowed_seq = SEQCNT_SPINLOCK_ZERO(init_task.mems_allowed_seq,
+                                                &init_task.alloc_lock),
   #endif
   #ifdef CONFIG_RT_MUTEXES
         .pi_waiters     = RB_ROOT_CACHED,
@@@ -204,9 -205,6 +205,9 @@@
   #ifdef CONFIG_SECURITY
         .security       = NULL,
   #endif
+ +#ifdef CONFIG_SECCOMP
+ +      .seccomp        = { .filter_count = ATOMIC_INIT(0) },
+ +#endif
   };
   EXPORT_SYMBOL(init_task);
   
diff --combined kernel/fork.c

index 35e9894d394c2941a2ebf873494e6e1546834421,fc72f09a61b2b7d5c2bda8f46f01a77c93cebde3..4d32190861bdc6500730bf296b2f79f7a51ceacf
--- 1/kernel/fork.c
--- 2/kernel/fork.c
+++ b/kernel/fork.c
@@@ -261,7 -261,7 +261,7 @@@ static unsigned long *alloc_thread_stac
                                              THREAD_SIZE_ORDER);
   
         if (likely(page)) {
- -              tsk->stack = page_address(page);
+ +              tsk->stack = kasan_reset_tag(page_address(page));
                 return tsk->stack;
         }
         return NULL;
@@@ -276,8 -276,13 +276,8 @@@ static inline void free_thread_stack(st
         if (vm) {
                 int i;
   
- -              for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) {
- -                      mod_memcg_page_state(vm->pages[i],
- -                                           MEMCG_KERNEL_STACK_KB,
- -                                           -(int)(PAGE_SIZE / 1024));
- -
+ +              for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++)
                         memcg_kmem_uncharge_page(vm->pages[i], 0);
- -              }
   
                 for (i = 0; i < NR_CACHED_STACKS; i++) {
                         if (this_cpu_cmpxchg(cached_stacks[i],
@@@ -302,7 -307,6 +302,7 @@@ static unsigned long *alloc_thread_stac
   {
         unsigned long *stack;
         stack = kmem_cache_alloc_node(thread_stack_cache, THREADINFO_GFP, node);
+ +      stack = kasan_reset_tag(stack);
         tsk->stack = stack;
         return stack;
   }
@@@ -355,13 -359,7 +355,13 @@@ struct vm_area_struct *vm_area_dup(stru
         struct vm_area_struct *new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
   
         if (new) {
- -              *new = *orig;
+ +              ASSERT_EXCLUSIVE_WRITER(orig->vm_flags);
+ +              ASSERT_EXCLUSIVE_WRITER(orig->vm_file);
+ +              /*
+ +               * orig->shared.rb may be modified concurrently, but the clone
+ +               * will be reinitialized.
+ +               */
+ +              *new = data_race(*orig);
                 INIT_LIST_HEAD(&new->anon_vma_chain);
                 new->vm_next = new->vm_prev = NULL;
         }
@@@ -378,14 -376,31 +378,14 @@@ static void account_kernel_stack(struc
         void *stack = task_stack_page(tsk);
         struct vm_struct *vm = task_stack_vm_area(tsk);
   
- -      BUILD_BUG_ON(IS_ENABLED(CONFIG_VMAP_STACK) && PAGE_SIZE % 1024 != 0);
- -
- -      if (vm) {
- -              int i;
- -
- -              BUG_ON(vm->nr_pages != THREAD_SIZE / PAGE_SIZE);
- -
- -              for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) {
- -                      mod_zone_page_state(page_zone(vm->pages[i]),
- -                                          NR_KERNEL_STACK_KB,
- -                                          PAGE_SIZE / 1024 * account);
- -              }
- -      } else {
- -              /*
- -               * All stack pages are in the same zone and belong to the
- -               * same memcg.
- -               */
- -              struct page *first_page = virt_to_page(stack);
- -
- -              mod_zone_page_state(page_zone(first_page), NR_KERNEL_STACK_KB,
- -                                  THREAD_SIZE / 1024 * account);
   
- -              mod_memcg_obj_state(stack, MEMCG_KERNEL_STACK_KB,
- -                                  account * (THREAD_SIZE / 1024));
- -      }
+ +      /* All stack pages are in the same node. */
+ +      if (vm)
+ +              mod_lruvec_page_state(vm->pages[0], NR_KERNEL_STACK_KB,
+ +                                    account * (THREAD_SIZE / 1024));
+ +      else
+ +              mod_lruvec_slab_state(stack, NR_KERNEL_STACK_KB,
+ +                                    account * (THREAD_SIZE / 1024));
   }
   
   static int memcg_charge_kernel_stack(struct task_struct *tsk)
@@@ -394,23 -409,24 +394,23 @@@
         struct vm_struct *vm = task_stack_vm_area(tsk);
         int ret;
   
+ +      BUILD_BUG_ON(IS_ENABLED(CONFIG_VMAP_STACK) && PAGE_SIZE % 1024 != 0);
+ +
         if (vm) {
                 int i;
   
+ +              BUG_ON(vm->nr_pages != THREAD_SIZE / PAGE_SIZE);
+ +
                 for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) {
                         /*
                          * If memcg_kmem_charge_page() fails, page->mem_cgroup
- -                       * pointer is NULL, and both memcg_kmem_uncharge_page()
- -                       * and mod_memcg_page_state() in free_thread_stack()
- -                       * will ignore this page. So it's safe.
+ +                       * pointer is NULL, and memcg_kmem_uncharge_page() in
+ +                       * free_thread_stack() will ignore this page.
                          */
                         ret = memcg_kmem_charge_page(vm->pages[i], GFP_KERNEL,
                                                      0);
                         if (ret)
                                 return ret;
- -
- -                      mod_memcg_page_state(vm->pages[i],
- -                                           MEMCG_KERNEL_STACK_KB,
- -                                           PAGE_SIZE / 1024);
                 }
         }
   #endif
@@@ -457,6 -473,7 +457,6 @@@ void free_task(struct task_struct *tsk
   #endif
         rt_mutex_debug_task_free(tsk);
         ftrace_graph_exit_task(tsk);
- -      put_seccomp_filter(tsk);
         arch_release_task_struct(tsk);
         if (tsk->flags & PF_KTHREAD)
                 free_kthread_struct(tsk);
@@@ -1457,7 -1474,7 +1457,7 @@@ static int copy_files(unsigned long clo
                 goto out;
         }
   
- -      newf = dup_fd(oldf, &error);
+ +      newf = dup_fd(oldf, NR_OPEN_MAX, &error);
         if (!newf)
                 goto out;
   
@@@ -1770,18 -1787,22 +1770,18 @@@ static void pidfd_show_fdinfo(struct se
    */
   static __poll_t pidfd_poll(struct file *file, struct poll_table_struct *pts)
   {
- -      struct task_struct *task;
         struct pid *pid = file->private_data;
         __poll_t poll_flags = 0;
   
         poll_wait(file, &pid->wait_pidfd, pts);
   
- -      rcu_read_lock();
- -      task = pid_task(pid, PIDTYPE_PID);
         /*
          * Inform pollers only when the whole thread group exits.
          * If the thread group leader exits before all other threads in the
          * group, then poll(2) should block, similar to the wait(2) family.
          */
- -      if (!task || (task->exit_state && thread_group_empty(task)))
+ +      if (thread_group_exited(pid))
                 poll_flags = EPOLLIN | EPOLLRDNORM;
- -      rcu_read_unlock();
   
         return poll_flags;
   }
@@@ -2011,14 -2032,20 +2011,14 @@@ static __latent_entropy struct task_str
   #ifdef CONFIG_CPUSETS
         p->cpuset_mem_spread_rotor = NUMA_NO_NODE;
         p->cpuset_slab_spread_rotor = NUMA_NO_NODE;
-       seqcount_init(&p->mems_allowed_seq);
+       seqcount_spinlock_init(&p->mems_allowed_seq, &p->alloc_lock);
   #endif
   #ifdef CONFIG_TRACE_IRQFLAGS
- -      p->irq_events = 0;
- -      p->hardirq_enable_ip = 0;
- -      p->hardirq_enable_event = 0;
- -      p->hardirq_disable_ip = _THIS_IP_;
- -      p->hardirq_disable_event = 0;
- -      p->softirqs_enabled = 1;
- -      p->softirq_enable_ip = _THIS_IP_;
- -      p->softirq_enable_event = 0;
- -      p->softirq_disable_ip = 0;
- -      p->softirq_disable_event = 0;
- -      p->softirq_context = 0;
+ +      memset(&p->irqtrace, 0, sizeof(p->irqtrace));
+ +      p->irqtrace.hardirq_disable_ip  = _THIS_IP_;
+ +      p->irqtrace.softirq_enable_ip   = _THIS_IP_;
+ +      p->softirqs_enabled             = 1;
+ +      p->softirq_context              = 0;
   #endif
   
         p->pagefault_disabled = 0;
@@@ -2075,7 -2102,8 +2075,7 @@@
         retval = copy_io(clone_flags, p);
         if (retval)
                 goto bad_fork_cleanup_namespaces;
- -      retval = copy_thread_tls(clone_flags, args->stack, args->stack_size, p,
- -                               args->tls);
+ +      retval = copy_thread(clone_flags, args->stack, args->stack_size, p, args->tls);
         if (retval)
                 goto bad_fork_cleanup_io;
   
@@@ -2274,7 -2302,6 +2274,7 @@@
         write_unlock_irq(&tasklist_lock);
   
         proc_fork_connector(p);
+ +      sched_post_fork(p);
         cgroup_post_fork(p, args);
         perf_event_fork(p);
   
@@@ -2393,20 -2420,6 +2393,20 @@@ long _do_fork(struct kernel_clone_args 
         int trace = 0;
         long nr;
   
+ +      /*
+ +       * For legacy clone() calls, CLONE_PIDFD uses the parent_tid argument
+ +       * to return the pidfd. Hence, CLONE_PIDFD and CLONE_PARENT_SETTID are
+ +       * mutually exclusive. With clone3() CLONE_PIDFD has grown a separate
+ +       * field in struct clone_args and it still doesn't make sense to have
+ +       * them both point at the same memory location. Performing this check
+ +       * here has the advantage that we don't need to have a separate helper
+ +       * to check for legacy clone().
+ +       */
+ +      if ((args->flags & CLONE_PIDFD) &&
+ +          (args->flags & CLONE_PARENT_SETTID) &&
+ +          (args->pidfd == args->parent_tid))
+ +              return -EINVAL;
+ +
         /*
          * Determine whether and which event to report to ptracer.  When
          * called from kernel_thread or CLONE_UNTRACED is explicitly
@@@ -2464,6 -2477,42 +2464,6 @@@
         return nr;
   }
   
- -bool legacy_clone_args_valid(const struct kernel_clone_args *kargs)
- -{
- -      /* clone(CLONE_PIDFD) uses parent_tidptr to return a pidfd */
- -      if ((kargs->flags & CLONE_PIDFD) &&
- -          (kargs->flags & CLONE_PARENT_SETTID))
- -              return false;
- -
- -      return true;
- -}
- -
- -#ifndef CONFIG_HAVE_COPY_THREAD_TLS
- -/* For compatibility with architectures that call do_fork directly rather than
- - * using the syscall entry points below. */
- -long do_fork(unsigned long clone_flags,
- -            unsigned long stack_start,
- -            unsigned long stack_size,
- -            int __user *parent_tidptr,
- -            int __user *child_tidptr)
- -{
- -      struct kernel_clone_args args = {
- -              .flags          = (lower_32_bits(clone_flags) & ~CSIGNAL),
- -              .pidfd          = parent_tidptr,
- -              .child_tid      = child_tidptr,
- -              .parent_tid     = parent_tidptr,
- -              .exit_signal    = (lower_32_bits(clone_flags) & CSIGNAL),
- -              .stack          = stack_start,
- -              .stack_size     = stack_size,
- -      };
- -
- -      if (!legacy_clone_args_valid(&args))
- -              return -EINVAL;
- -
- -      return _do_fork(&args);
- -}
- -#endif
- -
   /*
    * Create a kernel thread.
    */
@@@ -2542,12 -2591,24 +2542,12 @@@ SYSCALL_DEFINE5(clone, unsigned long, c
                 .tls            = tls,
         };
   
- -      if (!legacy_clone_args_valid(&args))
- -              return -EINVAL;
- -
         return _do_fork(&args);
   }
   #endif
   
   #ifdef __ARCH_WANT_SYS_CLONE3
   
- -/*
- - * copy_thread implementations handle CLONE_SETTLS by reading the TLS value from
- - * the registers containing the syscall arguments for clone. This doesn't work
- - * with clone3 since the TLS value is passed in clone_args instead.
- - */
- -#ifndef CONFIG_HAVE_COPY_THREAD_TLS
- -#error clone3 requires copy_thread_tls support in arch
- -#endif
- -
   noinline static int copy_clone_args_from_user(struct kernel_clone_args *kargs,
                                               struct clone_args __user *uargs,
                                               size_t usize)
@@@ -2844,15 -2905,14 +2844,15 @@@ static int unshare_fs(unsigned long uns
   /*
    * Unshare file descriptor table if it is being shared
    */
- -static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp)
+ +int unshare_fd(unsigned long unshare_flags, unsigned int max_fds,
+ +             struct files_struct **new_fdp)
   {
         struct files_struct *fd = current->files;
         int error = 0;
   
         if ((unshare_flags & CLONE_FILES) &&
             (fd && atomic_read(&fd->count) > 1)) {
- -              *new_fdp = dup_fd(fd, &error);
+ +              *new_fdp = dup_fd(fd, max_fds, &error);
                 if (!*new_fdp)
                         return error;
         }
@@@ -2863,7 -2923,7 +2863,7 @@@
   /*
    * unshare allows a process to 'unshare' part of the process
    * context which was originally shared using clone.  copy_*
- - * functions used by do_fork() cannot be used here directly
+ + * functions used by _do_fork() cannot be used here directly
    * because they modify an inactive task_struct that is being
    * constructed. Here we are modifying the current, active,
    * task_struct.
@@@ -2912,7 -2972,7 +2912,7 @@@ int ksys_unshare(unsigned long unshare_
         err = unshare_fs(unshare_flags, &new_fs);
         if (err)
                 goto bad_unshare_out;
- -      err = unshare_fd(unshare_flags, &new_fd);
+ +      err = unshare_fd(unshare_flags, NR_OPEN_MAX, &new_fd);
         if (err)
                 goto bad_unshare_cleanup_fs;
         err = unshare_userns(unshare_flags, &new_cred);
@@@ -3001,7 -3061,7 +3001,7 @@@ int unshare_files(struct files_struct *
         struct files_struct *copy = NULL;
         int error;
   
- -      error = unshare_fd(CLONE_FILES, &copy);
+ +      error = unshare_fd(CLONE_FILES, NR_OPEN_MAX, &copy);
         if (error || !copy) {
                 *displaced = NULL;
                 return error;
diff --combined kernel/time/timekeeping.c

index 63a632f9896c38133e4877dc828390af052105f1,05ecfd8a3314037e2f84fbdfb7b557efaebb72c9..406306b3345232298f4b7dbb3b0a58e77536a568
--- 1/kernel/time/timekeeping.c
--- 2/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@@ -39,18 -39,19 +39,19 @@@ enum timekeeping_adv_mode 
         TK_ADV_FREQ
   };
   
+ static DEFINE_RAW_SPINLOCK(timekeeper_lock);
+ 
   /*
    * The most important data for readout fits into a single 64 byte
    * cache line.
    */
   static struct {
-       seqcount_t              seq;
+       seqcount_raw_spinlock_t seq;
         struct timekeeper       timekeeper;
   } tk_core ____cacheline_aligned = {
-       .seq = SEQCNT_ZERO(tk_core.seq),
+       .seq = SEQCNT_RAW_SPINLOCK_ZERO(tk_core.seq, &timekeeper_lock),
   };
   
- static DEFINE_RAW_SPINLOCK(timekeeper_lock);
   static struct timekeeper shadow_timekeeper;
   
   /**
@@@ -63,7 -64,7 +64,7 @@@
    * See @update_fast_timekeeper() below.
    */
   struct tk_fast {
-       seqcount_t              seq;
+       seqcount_raw_spinlock_t seq;
         struct tk_read_base     base[2];
   };
   
@@@ -80,11 -81,13 +81,13 @@@ static struct clocksource dummy_clock 
   };
   
   static struct tk_fast tk_fast_mono ____cacheline_aligned = {
+       .seq     = SEQCNT_RAW_SPINLOCK_ZERO(tk_fast_mono.seq, &timekeeper_lock),
         .base[0] = { .clock = &dummy_clock, },
         .base[1] = { .clock = &dummy_clock, },
   };
   
   static struct tk_fast tk_fast_raw  ____cacheline_aligned = {
+       .seq     = SEQCNT_RAW_SPINLOCK_ZERO(tk_fast_raw.seq, &timekeeper_lock),
         .base[0] = { .clock = &dummy_clock, },
         .base[1] = { .clock = &dummy_clock, },
   };
@@@ -157,7 -160,7 +160,7 @@@ static inline void tk_update_sleep_time
    * tk_clock_read - atomic clocksource read() helper
    *
    * This helper is necessary to use in the read paths because, while the
-  * seqlock ensures we don't return a bad value while structures are updated,
+  * seqcount ensures we don't return a bad value while structures are updated,
    * it doesn't protect from potential crashes. There is the possibility that
    * the tkr's clocksource may change between the read reference, and the
    * clock reference passed to the read function.  This can cause crashes if
@@@ -222,10 -225,10 +225,10 @@@ static inline u64 timekeeping_get_delta
         unsigned int seq;
   
         /*
-        * Since we're called holding a seqlock, the data may shift
+        * Since we're called holding a seqcount, the data may shift
          * under us while we're doing the calculation. This can cause
          * false positives, since we'd note a problem but throw the
-        * results away. So nest another seqlock here to atomically
+        * results away. So nest another seqcount here to atomically
          * grab the points we are checking with.
          */
         do {
@@@ -486,7 -489,7 +489,7 @@@ EXPORT_SYMBOL_GPL(ktime_get_raw_fast_ns
    *
    * To keep it NMI safe since we're accessing from tracing, we're not using a
    * separate timekeeper with updates to monotonic clock and boot offset
-  * protected with seqlocks. This has the following minor side effects:
+  * protected with seqcounts. This has the following minor side effects:
    *
    * (1) Its possible that a timestamp be taken after the boot offset is updated
    * but before the timekeeper is updated. If this happens, the new boot offset
@@@ -2193,7 -2196,7 +2196,7 @@@ EXPORT_SYMBOL(ktime_get_coarse_ts64)
   void do_timer(unsigned long ticks)
   {
         jiffies_64 += ticks;
- -      calc_global_load(ticks);
+ +      calc_global_load();
   }
   
   /**
diff --combined net/netfilter/nf_conntrack_core.c

index e38b60fc183e364003e148eec5478670fdc99326,b597b5b16ba1919785468ddf84b96c316fa00d58..5b97d233f89ba5d8477311a2c2d82f9faf22dc5e
--- 1/net/netfilter/nf_conntrack_core.c
--- 2/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@@ -180,7 -180,7 +180,7 @@@ EXPORT_SYMBOL_GPL(nf_conntrack_htable_s
   
   unsigned int nf_conntrack_max __read_mostly;
   EXPORT_SYMBOL_GPL(nf_conntrack_max);
- seqcount_t nf_conntrack_generation __read_mostly;
+ seqcount_spinlock_t nf_conntrack_generation __read_mostly;
   static unsigned int nf_conntrack_hash_rnd __read_mostly;
   
   static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple,
@@@ -1006,7 -1006,7 +1006,7 @@@ static int nf_ct_resolve_clash_harder(s
    *
    * @skb: skb that causes the clash
    * @h: tuplehash of the clashing entry already in table
- - * @hash_reply: hash slot for reply direction
+ + * @reply_hash: hash slot for reply direction
    *
    * A conntrack entry can be inserted to the connection tracking table
    * if there is no existing entry with an identical tuple.
@@@ -1344,6 -1344,18 +1344,6 @@@ static bool gc_worker_can_early_drop(co
         return false;
   }
   
- -#define       DAY     (86400 * HZ)
- -
- -/* Set an arbitrary timeout large enough not to ever expire, this save
- - * us a check for the IPS_OFFLOAD_BIT from the packet path via
- - * nf_ct_is_expired().
- - */
- -static void nf_ct_offload_timeout(struct nf_conn *ct)
- -{
- -      if (nf_ct_expires(ct) < DAY / 2)
- -              ct->timeout = nfct_time_stamp + DAY;
- -}
- -
   static void gc_worker(struct work_struct *work)
   {
         unsigned int min_interval = max(HZ / GC_MAX_BUCKETS_DIV, 1u);
@@@ -2588,7 -2600,8 +2588,8 @@@ int nf_conntrack_init_start(void
         /* struct nf_ct_ext uses u8 to store offsets/size */
         BUILD_BUG_ON(total_extension_size() > 255u);
   
-       seqcount_init(&nf_conntrack_generation);
+       seqcount_spinlock_init(&nf_conntrack_generation,
+                              &nf_conntrack_locks_all_lock);
   
         for (i = 0; i < CONNTRACK_LOCKS; i++)
                 spin_lock_init(&nf_conntrack_locks[i]);
diff --combined net/xfrm/xfrm_policy.c

index 042ea9b40c7b4183b6a0a4f9322c8361d4f59f94,732a940468b075fabf44df3e891a875cee593986..d5280fd6f9c127b6099f07108a74f76005923e98
--- 1/net/xfrm/xfrm_policy.c
--- 2/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@@ -39,7 -39,7 +39,7 @@@
   #ifdef CONFIG_XFRM_STATISTICS
   #include <net/snmp.h>
   #endif
- -#ifdef CONFIG_INET_ESPINTCP
+ +#ifdef CONFIG_XFRM_ESPINTCP
   #include <net/espintcp.h>
   #endif
   
@@@ -122,7 -122,7 +122,7 @@@ struct xfrm_pol_inexact_bin 
         /* list containing '*:*' policies */
         struct hlist_head hhead;
   
-       seqcount_t count;
+       seqcount_spinlock_t count;
         /* tree sorted by daddr/prefix */
         struct rb_root root_d;
   
@@@ -155,7 -155,7 +155,7 @@@ static struct xfrm_policy_afinfo const 
                                                 __read_mostly;
   
   static struct kmem_cache *xfrm_dst_cache __ro_after_init;
- static __read_mostly seqcount_t xfrm_policy_hash_generation;
+ static __read_mostly seqcount_mutex_t xfrm_policy_hash_generation;
   
   static struct rhashtable xfrm_policy_inexact_table;
   static const struct rhashtable_params xfrm_pol_inexact_params;
@@@ -719,7 -719,7 +719,7 @@@ xfrm_policy_inexact_alloc_bin(const str
         INIT_HLIST_HEAD(&bin->hhead);
         bin->root_d = RB_ROOT;
         bin->root_s = RB_ROOT;
-       seqcount_init(&bin->count);
+       seqcount_spinlock_init(&bin->count, &net->xfrm.xfrm_policy_lock);
   
         prev = rhashtable_lookup_get_insert_key(&xfrm_policy_inexact_table,
                                                 &bin->k, &bin->head,
@@@ -1433,10 -1433,14 +1433,10 @@@ static void xfrm_policy_requeue(struct 
         spin_unlock_bh(&pq->hold_queue.lock);
   }
   
- -static bool xfrm_policy_mark_match(struct xfrm_policy *policy,
- -                                 struct xfrm_policy *pol)
+ +static inline bool xfrm_policy_mark_match(const struct xfrm_mark *mark,
+ +                                        struct xfrm_policy *pol)
   {
- -      if (policy->mark.v == pol->mark.v &&
- -          policy->priority == pol->priority)
- -              return true;
- -
- -      return false;
+ +      return mark->v == pol->mark.v && mark->m == pol->mark.m;
   }
   
   static u32 xfrm_pol_bin_key(const void *data, u32 len, u32 seed)
@@@ -1499,7 -1503,7 +1499,7 @@@ static void xfrm_policy_insert_inexact_
                 if (pol->type == policy->type &&
                     pol->if_id == policy->if_id &&
                     !selector_cmp(&pol->selector, &policy->selector) &&
- -                  xfrm_policy_mark_match(policy, pol) &&
+ +                  xfrm_policy_mark_match(&policy->mark, pol) &&
                     xfrm_sec_ctx_match(pol->security, policy->security) &&
                     !WARN_ON(delpol)) {
                         delpol = pol;
@@@ -1534,7 -1538,7 +1534,7 @@@ static struct xfrm_policy *xfrm_policy_
                 if (pol->type == policy->type &&
                     pol->if_id == policy->if_id &&
                     !selector_cmp(&pol->selector, &policy->selector) &&
- -                  xfrm_policy_mark_match(policy, pol) &&
+ +                  xfrm_policy_mark_match(&policy->mark, pol) &&
                     xfrm_sec_ctx_match(pol->security, policy->security) &&
                     !WARN_ON(delpol)) {
                         if (excl)
@@@ -1606,8 -1610,9 +1606,8 @@@ int xfrm_policy_insert(int dir, struct 
   EXPORT_SYMBOL(xfrm_policy_insert);
   
   static struct xfrm_policy *
- -__xfrm_policy_bysel_ctx(struct hlist_head *chain, u32 mark, u32 if_id,
- -                      u8 type, int dir,
- -                      struct xfrm_selector *sel,
+ +__xfrm_policy_bysel_ctx(struct hlist_head *chain, const struct xfrm_mark *mark,
+ +                      u32 if_id, u8 type, int dir, struct xfrm_selector *sel,
                         struct xfrm_sec_ctx *ctx)
   {
         struct xfrm_policy *pol;
@@@ -1618,7 -1623,7 +1618,7 @@@
         hlist_for_each_entry(pol, chain, bydst) {
                 if (pol->type == type &&
                     pol->if_id == if_id &&
- -                  (mark & pol->mark.m) == pol->mark.v &&
+ +                  xfrm_policy_mark_match(mark, pol) &&
                     !selector_cmp(sel, &pol->selector) &&
                     xfrm_sec_ctx_match(ctx, pol->security))
                         return pol;
@@@ -1627,10 -1632,11 +1627,10 @@@
         return NULL;
   }
   
- -struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u32 if_id,
- -                                        u8 type, int dir,
- -                                        struct xfrm_selector *sel,
- -                                        struct xfrm_sec_ctx *ctx, int delete,
- -                                        int *err)
+ +struct xfrm_policy *
+ +xfrm_policy_bysel_ctx(struct net *net, const struct xfrm_mark *mark, u32 if_id,
+ +                    u8 type, int dir, struct xfrm_selector *sel,
+ +                    struct xfrm_sec_ctx *ctx, int delete, int *err)
   {
         struct xfrm_pol_inexact_bin *bin = NULL;
         struct xfrm_policy *pol, *ret = NULL;
@@@ -1697,9 -1703,9 +1697,9 @@@
   }
   EXPORT_SYMBOL(xfrm_policy_bysel_ctx);
   
- -struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u32 if_id,
- -                                   u8 type, int dir, u32 id, int delete,
- -                                   int *err)
+ +struct xfrm_policy *
+ +xfrm_policy_byid(struct net *net, const struct xfrm_mark *mark, u32 if_id,
+ +               u8 type, int dir, u32 id, int delete, int *err)
   {
         struct xfrm_policy *pol, *ret;
         struct hlist_head *chain;
@@@ -1714,7 -1720,8 +1714,7 @@@
         ret = NULL;
         hlist_for_each_entry(pol, chain, byidx) {
                 if (pol->type == type && pol->index == id &&
- -                  pol->if_id == if_id &&
- -                  (mark & pol->mark.m) == pol->mark.v) {
+ +                  pol->if_id == if_id && xfrm_policy_mark_match(mark, pol)) {
                         xfrm_pol_hold(pol);
                         if (delete) {
                                 *err = security_xfrm_policy_delete(
@@@ -1899,7 -1906,7 +1899,7 @@@ static int xfrm_policy_match(const stru
   
   static struct xfrm_pol_inexact_node *
   xfrm_policy_lookup_inexact_addr(const struct rb_root *r,
-                               seqcount_t *count,
+                               seqcount_spinlock_t *count,
                                 const xfrm_address_t *addr, u16 family)
   {
         const struct rb_node *parent;
@@@ -2751,7 -2758,6 +2751,7 @@@ static void xfrm_policy_queue_process(s
         struct xfrm_policy_queue *pq = &pol->polq;
         struct flowi fl;
         struct sk_buff_head list;
+ +      __u32 skb_mark;
   
         spin_lock(&pq->hold_queue.lock);
         skb = skb_peek(&pq->hold_queue);
@@@ -2761,12 -2767,7 +2761,12 @@@
         }
         dst = skb_dst(skb);
         sk = skb->sk;
+ +
+ +      /* Fixup the mark to support VTI. */
+ +      skb_mark = skb->mark;
+ +      skb->mark = pol->mark.v;
         xfrm_decode_session(skb, &fl, dst->ops->family);
+ +      skb->mark = skb_mark;
         spin_unlock(&pq->hold_queue.lock);
   
         dst_hold(xfrm_dst_path(dst));
@@@ -2798,12 -2799,7 +2798,12 @@@
         while (!skb_queue_empty(&list)) {
                 skb = __skb_dequeue(&list);
   
+ +              /* Fixup the mark to support VTI. */
+ +              skb_mark = skb->mark;
+ +              skb->mark = pol->mark.v;
                 xfrm_decode_session(skb, &fl, skb_dst(skb)->ops->family);
+ +              skb->mark = skb_mark;
+ +
                 dst_hold(xfrm_dst_path(skb_dst(skb)));
                 dst = xfrm_lookup(net, xfrm_dst_path(skb_dst(skb)), &fl, skb->sk, 0);
                 if (IS_ERR(dst)) {
@@@ -4157,10 -4153,10 +4157,10 @@@ void __init xfrm_init(void
   {
         register_pernet_subsys(&xfrm_net_ops);
         xfrm_dev_init();
-       seqcount_init(&xfrm_policy_hash_generation);
+       seqcount_mutex_init(&xfrm_policy_hash_generation, &hash_resize_mutex);
         xfrm_input_init();
   
- -#ifdef CONFIG_INET_ESPINTCP
+ +#ifdef CONFIG_XFRM_ESPINTCP
         espintcp_init();
   #endif
author	Linus Torvalds <[email protected]>
	Tue, 11 Aug 2020 02:07:44 +0000 (19:07 -0700)
committer	Linus Torvalds <[email protected]>
	Tue, 11 Aug 2020 02:07:44 +0000 (19:07 -0700)
		1	2
arch/ia64/kernel/process.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/ia64/kernel/smp.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/include/asm/tsc.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/apic/apic.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/cpu/common.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/cpu/intel.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/mpparse.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/setup.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/mm/init_32.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/xen/smp_pv.c	patch \|	diff1 \|	diff2 \|	blob \| history
block/blk-iocost.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/dma-buf/dma-resv.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/iommu/intel/irq_remapping.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/md/raid5.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/md/raid5.h	patch \|	diff1 \|	diff2 \|	blob \| history
fs/userfaultfd.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/sched.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/net/netfilter/nf_conntrack.h	patch \|	diff1 \|	diff2 \|	blob \| history
init/init_task.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/fork.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/time/timekeeping.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/netfilter/nf_conntrack_core.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/xfrm/xfrm_policy.c	patch \|	diff1 \|	diff2 \|	blob \| history