]> Git Repo - linux.git/commitdiff
Merge tag 'locking_core_for_v5.17_rc1' of git://git.kernel.org/pub/scm/linux/kernel...
authorLinus Torvalds <[email protected]>
Wed, 12 Jan 2022 01:24:45 +0000 (17:24 -0800)
committerLinus Torvalds <[email protected]>
Wed, 12 Jan 2022 01:24:45 +0000 (17:24 -0800)
Pull locking updates from Borislav Petkov:
 "Lots of cleanups and preparation. Highlights:

   - futex: Cleanup and remove runtime futex_cmpxchg detection

   - rtmutex: Some fixes for the PREEMPT_RT locking infrastructure

   - kcsan: Share owner_on_cpu() between mutex,rtmutex and rwsem and
     annotate the racy owner->on_cpu access *once*.

   - atomic64: Dead-Code-Elemination"

[ Description above by Peter Zijlstra ]

* tag 'locking_core_for_v5.17_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  locking/atomic: atomic64: Remove unusable atomic ops
  futex: Fix additional regressions
  locking: Allow to include asm/spinlock_types.h from linux/spinlock_types_raw.h
  x86/mm: Include spinlock_t definition in pgtable.
  locking: Mark racy reads of owner->on_cpu
  locking: Make owner_on_cpu() into <linux/sched.h>
  lockdep/selftests: Adapt ww-tests for PREEMPT_RT
  lockdep/selftests: Skip the softirq related tests on PREEMPT_RT
  lockdep/selftests: Unbalanced migrate_disable() & rcu_read_lock().
  lockdep/selftests: Avoid using local_lock_{acquire|release}().
  lockdep: Remove softirq accounting on PREEMPT_RT.
  locking/rtmutex: Add rt_mutex_lock_nest_lock() and rt_mutex_lock_killable().
  locking/rtmutex: Squash self-deadlock check for ww_rt_mutex.
  locking: Remove rt_rwlock_is_contended().
  sched: Trigger warning if ->migration_disabled counter underflows.
  futex: Fix sparc32/m68k/nds32 build regression
  futex: Remove futex_cmpxchg detection
  futex: Ensure futex_atomic_cmpxchg_inatomic() is present
  kernel/locking: Use a pointer in ww_mutex_trylock().

1  2 
arch/arm/Kconfig
arch/arm64/Kconfig
arch/m68k/Kconfig
arch/um/Kconfig
arch/um/kernel/skas/uaccess.c
include/linux/sched.h
init/Kconfig
kernel/sched/core.c

diff --combined arch/arm/Kconfig
index 78d04735896bc23c743091f950a4de7637d78303,f9aa0860f83e7110dda8541331d53ee036bb5db2..fabe39169b120ca6c11fb3037b2c134f0faa7b5c
@@@ -69,7 -69,6 +69,7 @@@ config AR
        select HAVE_ARCH_AUDITSYSCALL if AEABI && !OABI_COMPAT
        select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6
        select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU
 +      select HAVE_ARCH_KFENCE if MMU && !XIP_KERNEL
        select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 && MMU
        select HAVE_ARCH_KASAN if MMU && !XIP_KERNEL
        select HAVE_ARCH_MMAP_RND_BITS if MMU
@@@ -93,7 -92,6 +93,6 @@@
        select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL
        select HAVE_FUNCTION_GRAPH_TRACER if !THUMB2_KERNEL && !CC_IS_CLANG
        select HAVE_FUNCTION_TRACER if !XIP_KERNEL && !(THUMB2_KERNEL && CC_IS_CLANG)
-       select HAVE_FUTEX_CMPXCHG if FUTEX
        select HAVE_GCC_PLUGINS
        select HAVE_HW_BREAKPOINT if PERF_EVENTS && (CPU_V6 || CPU_V6K || CPU_V7)
        select HAVE_IRQ_TIME_ACCOUNTING
@@@ -480,6 -478,7 +479,6 @@@ config ARCH_S3C24X
        select GPIO_SAMSUNG
        select GPIOLIB
        select GENERIC_IRQ_MULTI_HANDLER
 -      select HAVE_S3C2410_I2C if I2C
        select NEED_MACH_IO_H
        select S3C2410_WATCHDOG
        select SAMSUNG_ATAGS
diff --combined arch/arm64/Kconfig
index ef3b5cb40d166f4f7f051140342c3b0200553792,5e2dfef78956ac974facac542862b1536d5e1f61..f6e333b59314148afa4361b202e3b0ca8c869d80
@@@ -150,8 -150,6 +150,8 @@@ config ARM6
        select HAVE_ARCH_KASAN_VMALLOC if HAVE_ARCH_KASAN
        select HAVE_ARCH_KASAN_SW_TAGS if HAVE_ARCH_KASAN
        select HAVE_ARCH_KASAN_HW_TAGS if (HAVE_ARCH_KASAN && ARM64_MTE)
 +      # Some instrumentation may be unsound, hence EXPERT
 +      select HAVE_ARCH_KCSAN if EXPERT
        select HAVE_ARCH_KFENCE
        select HAVE_ARCH_KGDB
        select HAVE_ARCH_MMAP_RND_BITS
        select HAVE_REGS_AND_STACK_ACCESS_API
        select HAVE_POSIX_CPU_TIMERS_TASK_WORK
        select HAVE_FUNCTION_ARG_ACCESS_API
-       select HAVE_FUTEX_CMPXCHG if FUTEX
        select MMU_GATHER_RCU_TABLE_FREE
        select HAVE_RSEQ
        select HAVE_STACKPROTECTOR
@@@ -1547,12 -1544,6 +1546,12 @@@ endmen
  
  menu "ARMv8.2 architectural features"
  
 +config AS_HAS_ARMV8_2
 +       def_bool $(cc-option,-Wa$(comma)-march=armv8.2-a)
 +
 +config AS_HAS_SHA3
 +       def_bool $(as-instr,.arch armv8.2-a+sha3)
 +
  config ARM64_PMEM
        bool "Enable support for persistent memory"
        select ARCH_HAS_PMEM_API
diff --combined arch/m68k/Kconfig
index 4cae3fbe7f974eef745ea627672d110f924db685,15a793c5b2dc428d2e7d945189f36366a441f2b7..936e1803c7c7d3cb278e33b912fc7009e6b551c4
@@@ -9,7 -9,6 +9,7 @@@ config M68
        select ARCH_HAVE_NMI_SAFE_CMPXCHG if RMW_INSNS
        select ARCH_MIGHT_HAVE_PC_PARPORT if ISA
        select ARCH_NO_PREEMPT if !COLDFIRE
 +      select ARCH_USE_MEMTEST if MMU_MOTOROLA
        select ARCH_WANT_IPC_PARSE_VERSION
        select BINFMT_FLAT_ARGVP_ENVP_ON_STACK
        select DMA_DIRECT_REMAP if HAS_DMA && MMU && !COLDFIRE
@@@ -21,7 -20,6 +21,6 @@@
        select HAVE_ASM_MODVERSIONS
        select HAVE_DEBUG_BUGVERBOSE
        select HAVE_EFFICIENT_UNALIGNED_ACCESS if !CPU_HAS_NO_UNALIGNED
-       select HAVE_FUTEX_CMPXCHG if MMU && FUTEX
        select HAVE_MOD_ARCH_SPECIFIC
        select HAVE_UID16
        select MMU_GATHER_NO_RANGE if MMU
diff --combined arch/um/Kconfig
index b233db4f42b2c8c74dd8ad01435c097fad657168,c906250d49706c77879189f6e507d4c40d4e036d..4d398b80aea8c2a82d5d161f85eece291fac428f
@@@ -14,14 -14,13 +14,13 @@@ config UM
        select HAVE_ARCH_SECCOMP_FILTER
        select HAVE_ASM_MODVERSIONS
        select HAVE_UID16
-       select HAVE_FUTEX_CMPXCHG if FUTEX
        select HAVE_DEBUG_KMEMLEAK
        select HAVE_DEBUG_BUGVERBOSE
        select NO_DMA if !UML_DMA_EMULATION
 +      select OF_EARLY_FLATTREE if OF
        select GENERIC_IRQ_SHOW
        select GENERIC_CPU_DEVICES
        select HAVE_GCC_PLUGINS
 -      select SET_FS
        select TRACE_IRQFLAGS_SUPPORT
        select TTY # Needed for line.c
        select HAVE_ARCH_VMAP_STACK
index 23775d01a2a61bd3c6337e9f31e8818f599afa4a,9e37a7c05990d3207f66938236d30eea1d9757f4..aaee96f07172da74017c2d2d35cdac0ab853a0c8
@@@ -146,6 -146,11 +146,6 @@@ static int copy_chunk_from_user(unsigne
  
  unsigned long raw_copy_from_user(void *to, const void __user *from, unsigned long n)
  {
 -      if (uaccess_kernel()) {
 -              memcpy(to, (__force void*)from, n);
 -              return 0;
 -      }
 -
        return buffer_op((unsigned long) from, n, 0, copy_chunk_from_user, &to);
  }
  EXPORT_SYMBOL(raw_copy_from_user);
@@@ -161,6 -166,11 +161,6 @@@ static int copy_chunk_to_user(unsigned 
  
  unsigned long raw_copy_to_user(void __user *to, const void *from, unsigned long n)
  {
 -      if (uaccess_kernel()) {
 -              memcpy((__force void *) to, from, n);
 -              return 0;
 -      }
 -
        return buffer_op((unsigned long) to, n, 1, copy_chunk_to_user, &from);
  }
  EXPORT_SYMBOL(raw_copy_to_user);
@@@ -186,6 -196,12 +186,6 @@@ long strncpy_from_user(char *dst, cons
  
        if (!access_ok(src, 1))
                return -EFAULT;
 -
 -      if (uaccess_kernel()) {
 -              strncpy(dst, (__force void *) src, count);
 -              return strnlen(dst, count);
 -      }
 -
        n = buffer_op((unsigned long) src, count, 0, strncpy_chunk_from_user,
                      &ptr);
        if (n != 0)
@@@ -202,6 -218,11 +202,6 @@@ static int clear_chunk(unsigned long ad
  
  unsigned long __clear_user(void __user *mem, unsigned long len)
  {
 -      if (uaccess_kernel()) {
 -              memset((__force void*)mem, 0, len);
 -              return 0;
 -      }
 -
        return buffer_op((unsigned long) mem, len, 1, clear_chunk, NULL);
  }
  EXPORT_SYMBOL(__clear_user);
@@@ -224,6 -245,10 +224,6 @@@ long strnlen_user(const char __user *st
  
        if (!access_ok(str, 1))
                return -EFAULT;
 -
 -      if (uaccess_kernel())
 -              return strnlen((__force char*)str, len) + 1;
 -
        n = buffer_op((unsigned long) str, len, 0, strnlen_chunk, &count);
        if (n == 0)
                return count + 1;
@@@ -323,7 -348,6 +323,6 @@@ EXPORT_SYMBOL(arch_futex_atomic_op_inus
   * 0 - On success
   * -EFAULT - User access resulted in a page fault
   * -EAGAIN - Atomic operation was unable to complete due to contention
-  * -ENOSYS - Function not implemented (only if !HAVE_FUTEX_CMPXCHG)
   */
  
  int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
diff --combined include/linux/sched.h
index 4d0a12618e3c09a7c5a157c6f0f96464244d05aa,0b9b0e3f4791efa74f934ff48b9d142a0ae7440f..7a1f16df66e3d41fb63fe12f8ff760040885d323
@@@ -523,11 -523,7 +523,11 @@@ struct sched_statistics 
        u64                             nr_wakeups_affine_attempts;
        u64                             nr_wakeups_passive;
        u64                             nr_wakeups_idle;
 +
 +#ifdef CONFIG_SCHED_CORE
 +      u64                             core_forceidle_sum;
  #endif
 +#endif /* CONFIG_SCHEDSTATS */
  } ____cacheline_aligned;
  
  struct sched_entity {
@@@ -1343,9 -1339,6 +1343,9 @@@ struct task_struct 
  #ifdef CONFIG_TRACE_IRQFLAGS
        struct irqtrace_events          kcsan_save_irqtrace;
  #endif
 +#ifdef CONFIG_KCSAN_WEAK_MEMORY
 +      int                             kcsan_stack_depth;
 +#endif
  #endif
  
  #if IS_ENABLED(CONFIG_KUNIT)
@@@ -2178,6 -2171,15 +2178,15 @@@ extern long sched_getaffinity(pid_t pid
  #endif
  
  #ifdef CONFIG_SMP
+ static inline bool owner_on_cpu(struct task_struct *owner)
+ {
+       /*
+        * As lock holder preemption issue, we both skip spinning if
+        * task is not on cpu or its cpu is preempted
+        */
+       return READ_ONCE(owner->on_cpu) && !vcpu_is_preempted(task_cpu(owner));
+ }
  /* Returns effective CPU energy utilization, as seen by the scheduler */
  unsigned long sched_cpu_util(int cpu, unsigned long max);
  #endif /* CONFIG_SMP */
diff --combined init/Kconfig
index ec589e5c1b8ce577c9213ff822c0830ed176c465,ce077d51bb624fa73f651e863e59c71376df4a8d..f2ae41e6717f1bfb4e12b8e0e061819654d9d081
@@@ -550,7 -550,7 +550,7 @@@ config SCHED_THERMAL_PRESSUR
          i.e. put less load on throttled CPUs than on non/less throttled ones.
  
          This requires the architecture to implement
 -        arch_set_thermal_pressure() and arch_scale_thermal_pressure().
 +        arch_update_thermal_pressure() and arch_scale_thermal_pressure().
  
  config BSD_PROCESS_ACCT
        bool "BSD Process Accounting"
@@@ -1579,6 -1579,7 +1579,7 @@@ config BASE_FUL
  
  config FUTEX
        bool "Enable futex support" if EXPERT
+       depends on !(SPARC32 && SMP)
        default y
        imply RT_MUTEXES
        help
@@@ -1591,14 -1592,6 +1592,6 @@@ config FUTEX_P
        depends on FUTEX && RT_MUTEXES
        default y
  
- config HAVE_FUTEX_CMPXCHG
-       bool
-       depends on FUTEX
-       help
-         Architectures should select this if futex_atomic_cmpxchg_inatomic()
-         is implemented and always working. This removes a couple of runtime
-         checks.
  config EPOLL
        bool "Enable eventpoll support" if EXPERT
        default y
@@@ -1933,7 -1926,6 +1926,7 @@@ endchoic
  config SLAB_MERGE_DEFAULT
        bool "Allow slab caches to be merged"
        default y
 +      depends on SLAB || SLUB
        help
          For reduced kernel memory fragmentation, slab caches can be
          merged when they share the same size and other characteristics.
diff --combined kernel/sched/core.c
index 6f488072e2fd69ffaa6585b43fcc3155fba9e18a,9a02820fc10b95968c6a6e85fb7382ac1da6c6df..83872f95a1ea2af92378c6055f3221feed797eb3
@@@ -144,7 -144,7 +144,7 @@@ static inline bool __sched_core_less(st
                return false;
  
        /* flip prio, so high prio is leftmost */
 -      if (prio_less(b, a, task_rq(a)->core->core_forceidle))
 +      if (prio_less(b, a, !!task_rq(a)->core->core_forceidle_count))
                return true;
  
        return false;
@@@ -181,23 -181,15 +181,23 @@@ void sched_core_enqueue(struct rq *rq, 
        rb_add(&p->core_node, &rq->core_tree, rb_sched_core_less);
  }
  
 -void sched_core_dequeue(struct rq *rq, struct task_struct *p)
 +void sched_core_dequeue(struct rq *rq, struct task_struct *p, int flags)
  {
        rq->core->core_task_seq++;
  
 -      if (!sched_core_enqueued(p))
 -              return;
 +      if (sched_core_enqueued(p)) {
 +              rb_erase(&p->core_node, &rq->core_tree);
 +              RB_CLEAR_NODE(&p->core_node);
 +      }
  
 -      rb_erase(&p->core_node, &rq->core_tree);
 -      RB_CLEAR_NODE(&p->core_node);
 +      /*
 +       * Migrating the last task off the cpu, with the cpu in forced idle
 +       * state. Reschedule to create an accounting edge for forced idle,
 +       * and re-examine whether the core is still in forced idle state.
 +       */
 +      if (!(flags & DEQUEUE_SAVE) && rq->nr_running == 1 &&
 +          rq->core->core_forceidle_count && rq->curr == rq->idle)
 +              resched_curr(rq);
  }
  
  /*
@@@ -288,8 -280,6 +288,8 @@@ static void __sched_core_flip(bool enab
                for_each_cpu(t, smt_mask)
                        cpu_rq(t)->core_enabled = enabled;
  
 +              cpu_rq(cpu)->core->core_forceidle_start = 0;
 +
                sched_core_unlock(cpu, &flags);
  
                cpumask_andnot(&sched_core_mask, &sched_core_mask, smt_mask);
@@@ -374,8 -364,7 +374,8 @@@ void sched_core_put(void
  #else /* !CONFIG_SCHED_CORE */
  
  static inline void sched_core_enqueue(struct rq *rq, struct task_struct *p) { }
 -static inline void sched_core_dequeue(struct rq *rq, struct task_struct *p) { }
 +static inline void
 +sched_core_dequeue(struct rq *rq, struct task_struct *p, int flags) { }
  
  #endif /* CONFIG_SCHED_CORE */
  
@@@ -2016,7 -2005,7 +2016,7 @@@ static inline void enqueue_task(struct 
  static inline void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
  {
        if (sched_core_enabled(rq))
 -              sched_core_dequeue(rq, p);
 +              sched_core_dequeue(rq, p, flags);
  
        if (!(flags & DEQUEUE_NOCLOCK))
                update_rq_clock(rq);
@@@ -2184,6 -2173,9 +2184,9 @@@ void migrate_enable(void
                return;
        }
  
+       if (WARN_ON_ONCE(!p->migration_disabled))
+               return;
        /*
         * Ensure stop_task runs either before or after this, and that
         * __set_cpus_allowed_ptr(SCA_MIGRATE_ENABLE) doesn't schedule().
@@@ -5255,7 -5247,6 +5258,7 @@@ void scheduler_tick(void
        if (sched_feat(LATENCY_WARN))
                resched_latency = cpu_resched_latency(rq);
        calc_global_load_tick(rq);
 +      sched_core_tick(rq);
  
        rq_unlock(rq, &rf);
  
@@@ -5668,7 -5659,6 +5671,7 @@@ pick_next_task(struct rq *rq, struct ta
        struct task_struct *next, *p, *max = NULL;
        const struct cpumask *smt_mask;
        bool fi_before = false;
 +      bool core_clock_updated = (rq == rq->core);
        unsigned long cookie;
        int i, cpu, occ = 0;
        struct rq *rq_i;
  
        /* reset state */
        rq->core->core_cookie = 0UL;
 -      if (rq->core->core_forceidle) {
 +      if (rq->core->core_forceidle_count) {
 +              if (!core_clock_updated) {
 +                      update_rq_clock(rq->core);
 +                      core_clock_updated = true;
 +              }
 +              sched_core_account_forceidle(rq);
 +              /* reset after accounting force idle */
 +              rq->core->core_forceidle_start = 0;
 +              rq->core->core_forceidle_count = 0;
 +              rq->core->core_forceidle_occupation = 0;
                need_sync = true;
                fi_before = true;
 -              rq->core->core_forceidle = false;
        }
  
        /*
        for_each_cpu_wrap(i, smt_mask, cpu) {
                rq_i = cpu_rq(i);
  
 -              if (i != cpu)
 +              /*
 +               * Current cpu always has its clock updated on entrance to
 +               * pick_next_task(). If the current cpu is not the core,
 +               * the core may also have been updated above.
 +               */
 +              if (i != cpu && (rq_i != rq->core || !core_clock_updated))
                        update_rq_clock(rq_i);
  
                p = rq_i->core_pick = pick_task(rq_i);
  
                if (p == rq_i->idle) {
                        if (rq_i->nr_running) {
 -                              rq->core->core_forceidle = true;
 +                              rq->core->core_forceidle_count++;
                                if (!fi_before)
                                        rq->core->core_forceidle_seq++;
                        }
                }
        }
  
 +      if (schedstat_enabled() && rq->core->core_forceidle_count) {
 +              if (cookie)
 +                      rq->core->core_forceidle_start = rq_clock(rq->core);
 +              rq->core->core_forceidle_occupation = occ;
 +      }
 +
        rq->core->core_pick_seq = rq->core->core_task_seq;
        next = rq->core_pick;
        rq->core_sched_seq = rq->core->core_pick_seq;
                 *  1            0       1
                 *  1            1       0
                 */
 -              if (!(fi_before && rq->core->core_forceidle))
 -                      task_vruntime_update(rq_i, rq_i->core_pick, rq->core->core_forceidle);
 +              if (!(fi_before && rq->core->core_forceidle_count))
 +                      task_vruntime_update(rq_i, rq_i->core_pick, !!rq->core->core_forceidle_count);
  
                rq_i->core_pick->core_occupation = occ;
  
@@@ -6065,19 -6036,11 +6068,19 @@@ static void sched_core_cpu_deactivate(u
                goto unlock;
  
        /* copy the shared state to the new leader */
 -      core_rq->core_task_seq      = rq->core_task_seq;
 -      core_rq->core_pick_seq      = rq->core_pick_seq;
 -      core_rq->core_cookie        = rq->core_cookie;
 -      core_rq->core_forceidle     = rq->core_forceidle;
 -      core_rq->core_forceidle_seq = rq->core_forceidle_seq;
 +      core_rq->core_task_seq             = rq->core_task_seq;
 +      core_rq->core_pick_seq             = rq->core_pick_seq;
 +      core_rq->core_cookie               = rq->core_cookie;
 +      core_rq->core_forceidle_count      = rq->core_forceidle_count;
 +      core_rq->core_forceidle_seq        = rq->core_forceidle_seq;
 +      core_rq->core_forceidle_occupation = rq->core_forceidle_occupation;
 +
 +      /*
 +       * Accounting edge for forced idle is handled in pick_next_task().
 +       * Don't need another one here, since the hotplug thread shouldn't
 +       * have a cookie.
 +       */
 +      core_rq->core_forceidle_start = 0;
  
        /* install new leader */
        for_each_cpu(t, smt_mask) {
@@@ -7166,7 -7129,7 +7169,7 @@@ unsigned long effective_cpu_util(int cp
  
  unsigned long sched_cpu_util(int cpu, unsigned long max)
  {
 -      return effective_cpu_util(cpu, cpu_util_cfs(cpu_rq(cpu)), max,
 +      return effective_cpu_util(cpu, cpu_util_cfs(cpu), max,
                                  ENERGY_UTIL, NULL);
  }
  #endif /* CONFIG_SMP */
@@@ -8560,7 -8523,7 +8563,7 @@@ void sched_show_task(struct task_struc
        rcu_read_unlock();
        pr_cont(" stack:%5lu pid:%5d ppid:%6d flags:0x%08lx\n",
                free, task_pid_nr(p), ppid,
 -              (unsigned long)task_thread_info(p)->flags);
 +              read_task_thread_flags(p));
  
        print_worker_info(KERN_INFO, p);
        print_stop_info(KERN_INFO, p);
@@@ -9449,9 -9412,7 +9452,9 @@@ void __init sched_init(void
                rq->core_pick = NULL;
                rq->core_enabled = 0;
                rq->core_tree = RB_ROOT;
 -              rq->core_forceidle = false;
 +              rq->core_forceidle_count = 0;
 +              rq->core_forceidle_occupation = 0;
 +              rq->core_forceidle_start = 0;
  
                rq->core_cookie = 0UL;
  #endif
This page took 0.132367 seconds and 4 git commands to generate.