rcupdate.rcu_cpu_stall_timeout to be used (after
conversion from seconds to milliseconds).
+ rcupdate.rcu_cpu_stall_cputime= [KNL]
+ Provide statistics on the cputime and count of
+ interrupts and tasks during the sampling period. For
+ multiple continuous RCU stalls, all sampling periods
+ begin at half of the first RCU stall timeout.
+
+ rcupdate.rcu_exp_stall_task_details= [KNL]
+ Print stack dumps of any tasks blocking the
+ current expedited RCU grace period during an
+ expedited RCU CPU stall warning.
+
rcupdate.rcu_expedited= [KNL]
Use expedited grace-period primitives, for
example, synchronize_rcu_expedited() instead
rdt= [HW,X86,RDT]
Turn on/off individual RDT features. List is:
cmt, mbmtotal, mbmlocal, l3cat, l3cdp, l2cat, l2cdp,
- mba.
+ mba, smba, bmec.
E.g. to turn on cmt and turn off mba use:
rdt=cmt,!mba
in situations with strict latency requirements (where
interruptions from clocksource watchdog are not
acceptable).
+ [x86] recalibrate: force recalibration against a HW timer
+ (HPET or PM timer) on systems whose TSC frequency was
+ obtained from HW or FW using either an MSR or CPUID(0x15).
+ Warn if the difference is more than 500 ppm.
+ [x86] watchdog: Use TSC as the watchdog clocksource with
+ which to check other HW timers (HPET or PM timer), but
+ only on systems where TSC has been deemed trustworthy.
+ This will be suppressed by an earlier tsc=nowatchdog and
+ can be overridden by a later tsc=nowatchdog. A console
+ message will flag any such suppression or overriding.
tsc_early_khz= [X86] Skip early TSC calibration and use the given
value instead. Useful when the early TSC frequency discovery
#include <linux/capability.h>
#include <linux/file.h>
#include <linux/fdtable.h>
+#include <linux/filelock.h>
#include <linux/fs.h>
#include <linux/init.h>
#include <linux/security.h>
char *list_type)
{
struct file_lock *fl;
- struct inode *inode = locks_inode(filp);
+ struct inode *inode = file_inode(filp);
list_for_each_entry(fl, list, fl_list)
if (fl->fl_file == filp)
{
struct file_lock *cfl;
struct file_lock_context *ctx;
- struct inode *inode = locks_inode(filp);
+ struct inode *inode = file_inode(filp);
void *owner;
void (*func)(void);
int posix_lock_file(struct file *filp, struct file_lock *fl,
struct file_lock *conflock)
{
- return posix_lock_inode(locks_inode(filp), fl, conflock);
+ return posix_lock_inode(file_inode(filp), fl, conflock);
}
EXPORT_SYMBOL(posix_lock_file);
int fcntl_getlease(struct file *filp)
{
struct file_lock *fl;
- struct inode *inode = locks_inode(filp);
+ struct inode *inode = file_inode(filp);
struct file_lock_context *ctx;
int type = F_UNLCK;
LIST_HEAD(dispose);
static int
check_conflicting_open(struct file *filp, const long arg, int flags)
{
- struct inode *inode = locks_inode(filp);
+ struct inode *inode = file_inode(filp);
int self_wcount = 0, self_rcount = 0;
if (flags & FL_LAYOUT)
generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **priv)
{
struct file_lock *fl, *my_fl = NULL, *lease;
- struct inode *inode = locks_inode(filp);
+ struct inode *inode = file_inode(filp);
struct file_lock_context *ctx;
bool is_deleg = (*flp)->fl_flags & FL_DELEG;
int error;
{
int error = -EAGAIN;
struct file_lock *fl, *victim = NULL;
- struct inode *inode = locks_inode(filp);
+ struct inode *inode = file_inode(filp);
struct file_lock_context *ctx;
LIST_HEAD(dispose);
int generic_setlease(struct file *filp, long arg, struct file_lock **flp,
void **priv)
{
- struct inode *inode = locks_inode(filp);
+ struct inode *inode = file_inode(filp);
int error;
if ((!uid_eq(current_fsuid(), inode->i_uid)) && !capable(CAP_LEASE))
}
EXPORT_SYMBOL(generic_setlease);
- #if IS_ENABLED(CONFIG_SRCU)
/*
* Kernel subsystems can register to be notified on any attempt to set
* a new lease with the lease_notifier_chain. This is used by (e.g.) nfsd
}
EXPORT_SYMBOL_GPL(lease_unregister_notifier);
- #else /* !IS_ENABLED(CONFIG_SRCU) */
- static inline void
- lease_notifier_chain_init(void)
- {
- }
-
- static inline void
- setlease_notifier(long arg, struct file_lock *lease)
- {
- }
-
- int lease_register_notifier(struct notifier_block *nb)
- {
- return 0;
- }
- EXPORT_SYMBOL_GPL(lease_register_notifier);
-
- void lease_unregister_notifier(struct notifier_block *nb)
- {
- }
- EXPORT_SYMBOL_GPL(lease_unregister_notifier);
-
- #endif /* IS_ENABLED(CONFIG_SRCU) */
-
/**
* vfs_setlease - sets a lease on an open file
* @filp: file pointer
struct flock *flock)
{
struct file_lock *file_lock = locks_alloc_lock();
- struct inode *inode = locks_inode(filp);
+ struct inode *inode = file_inode(filp);
struct file *f;
int error;
void locks_remove_posix(struct file *filp, fl_owner_t owner)
{
int error;
- struct inode *inode = locks_inode(filp);
+ struct inode *inode = file_inode(filp);
struct file_lock lock;
struct file_lock_context *ctx;
locks_remove_flock(struct file *filp, struct file_lock_context *flctx)
{
struct file_lock fl;
- struct inode *inode = locks_inode(filp);
+ struct inode *inode = file_inode(filp);
if (list_empty(&flctx->flc_flock))
return;
{
struct file_lock_context *ctx;
- ctx = locks_inode_context(locks_inode(filp));
+ ctx = locks_inode_context(file_inode(filp));
if (!ctx)
return;
*/
if (fl->fl_file != NULL)
- inode = locks_inode(fl->fl_file);
+ inode = file_inode(fl->fl_file);
seq_printf(f, "%lld: ", id);
void show_fd_locks(struct seq_file *f,
struct file *filp, struct files_struct *files)
{
- struct inode *inode = locks_inode(filp);
+ struct inode *inode = file_inode(filp);
struct file_lock_context *ctx;
int id = 0;
DEFINE_STATIC_KEY_FALSE(__sched_core_enabled);
/* kernel prio, less is more */
-static inline int __task_prio(struct task_struct *p)
+static inline int __task_prio(const struct task_struct *p)
{
if (p->sched_class == &stop_sched_class) /* trumps deadline */
return -2;
*/
/* real prio, less is less */
-static inline bool prio_less(struct task_struct *a, struct task_struct *b, bool in_fi)
+static inline bool prio_less(const struct task_struct *a,
+ const struct task_struct *b, bool in_fi)
{
int pa = __task_prio(a), pb = __task_prio(b);
return false;
}
-static inline bool __sched_core_less(struct task_struct *a, struct task_struct *b)
+static inline bool __sched_core_less(const struct task_struct *a,
+ const struct task_struct *b)
{
if (a->core_cookie < b->core_cookie)
return true;
.user_mask = NULL,
.flags = SCA_USER, /* clear the user requested mask */
};
+ union cpumask_rcuhead {
+ cpumask_t cpumask;
+ struct rcu_head rcu;
+ };
__do_set_cpus_allowed(p, &ac);
- kfree(ac.user_mask);
+
+ /*
+ * Because this is called with p->pi_lock held, it is not possible
+ * to use kfree() here (when PREEMPT_RT=y), therefore punt to using
+ * kfree_rcu().
+ */
+ kfree_rcu((union cpumask_rcuhead *)ac.user_mask, rcu);
+}
+
+static cpumask_t *alloc_user_cpus_ptr(int node)
+{
+ /*
+ * See do_set_cpus_allowed() above for the rcu_head usage.
+ */
+ int size = max_t(int, cpumask_size(), sizeof(struct rcu_head));
+
+ return kmalloc_node(size, GFP_KERNEL, node);
}
int dup_user_cpus_ptr(struct task_struct *dst, struct task_struct *src,
int node)
{
+ cpumask_t *user_mask;
unsigned long flags;
- if (!src->user_cpus_ptr)
+ /*
+ * Always clear dst->user_cpus_ptr first as their user_cpus_ptr's
+ * may differ by now due to racing.
+ */
+ dst->user_cpus_ptr = NULL;
+
+ /*
+ * This check is racy and losing the race is a valid situation.
+ * It is not worth the extra overhead of taking the pi_lock on
+ * every fork/clone.
+ */
+ if (data_race(!src->user_cpus_ptr))
return 0;
- dst->user_cpus_ptr = kmalloc_node(cpumask_size(), GFP_KERNEL, node);
- if (!dst->user_cpus_ptr)
+ user_mask = alloc_user_cpus_ptr(node);
+ if (!user_mask)
return -ENOMEM;
- /* Use pi_lock to protect content of user_cpus_ptr */
+ /*
+ * Use pi_lock to protect content of user_cpus_ptr
+ *
+ * Though unlikely, user_cpus_ptr can be reset to NULL by a concurrent
+ * do_set_cpus_allowed().
+ */
raw_spin_lock_irqsave(&src->pi_lock, flags);
- cpumask_copy(dst->user_cpus_ptr, src->user_cpus_ptr);
+ if (src->user_cpus_ptr) {
+ swap(dst->user_cpus_ptr, user_mask);
+ cpumask_copy(dst->user_cpus_ptr, src->user_cpus_ptr);
+ }
raw_spin_unlock_irqrestore(&src->pi_lock, flags);
+
+ if (unlikely(user_mask))
+ kfree(user_mask);
+
return 0;
}
}
if (!(ctx->flags & SCA_MIGRATE_ENABLE)) {
- if (cpumask_equal(&p->cpus_mask, ctx->new_mask))
+ if (cpumask_equal(&p->cpus_mask, ctx->new_mask)) {
+ if (ctx->flags & SCA_USER)
+ swap(p->user_cpus_ptr, ctx->user_mask);
goto out;
+ }
if (WARN_ON_ONCE(p == current &&
is_migration_disabled(p) &&
return false;
}
+static inline cpumask_t *alloc_user_cpus_ptr(int node)
+{
+ return NULL;
+}
+
#endif /* !CONFIG_SMP */
static void
}
/*
- * Mark the task runnable and perform wakeup-preemption.
+ * Mark the task runnable.
*/
-static void ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags,
- struct rq_flags *rf)
+static inline void ttwu_do_wakeup(struct task_struct *p)
{
- check_preempt_curr(rq, p, wake_flags);
WRITE_ONCE(p->__state, TASK_RUNNING);
trace_sched_wakeup(p);
+}
+
+static void
+ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags,
+ struct rq_flags *rf)
+{
+ int en_flags = ENQUEUE_WAKEUP | ENQUEUE_NOCLOCK;
+
+ lockdep_assert_rq_held(rq);
+
+ if (p->sched_contributes_to_load)
+ rq->nr_uninterruptible--;
+
+#ifdef CONFIG_SMP
+ if (wake_flags & WF_MIGRATED)
+ en_flags |= ENQUEUE_MIGRATED;
+ else
+#endif
+ if (p->in_iowait) {
+ delayacct_blkio_end(p);
+ atomic_dec(&task_rq(p)->nr_iowait);
+ }
+
+ activate_task(rq, p, en_flags);
+ check_preempt_curr(rq, p, wake_flags);
+
+ ttwu_do_wakeup(p);
#ifdef CONFIG_SMP
if (p->sched_class->task_woken) {
#endif
}
-static void
-ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags,
- struct rq_flags *rf)
-{
- int en_flags = ENQUEUE_WAKEUP | ENQUEUE_NOCLOCK;
-
- lockdep_assert_rq_held(rq);
-
- if (p->sched_contributes_to_load)
- rq->nr_uninterruptible--;
-
-#ifdef CONFIG_SMP
- if (wake_flags & WF_MIGRATED)
- en_flags |= ENQUEUE_MIGRATED;
- else
-#endif
- if (p->in_iowait) {
- delayacct_blkio_end(p);
- atomic_dec(&task_rq(p)->nr_iowait);
- }
-
- activate_task(rq, p, en_flags);
- ttwu_do_wakeup(rq, p, wake_flags, rf);
-}
-
/*
* Consider @p being inside a wait loop:
*
rq = __task_rq_lock(p, &rf);
if (task_on_rq_queued(p)) {
- /* check_preempt_curr() may use rq clock */
- update_rq_clock(rq);
- ttwu_do_wakeup(rq, p, wake_flags, &rf);
+ if (!task_on_cpu(rq, p)) {
+ /*
+ * When on_rq && !on_cpu the task is preempted, see if
+ * it should preempt the task that is current now.
+ */
+ update_rq_clock(rq);
+ check_preempt_curr(rq, p, wake_flags);
+ }
+ ttwu_do_wakeup(p);
ret = 1;
}
__task_rq_unlock(rq, &rf);
goto out;
trace_sched_waking(p);
- WRITE_ONCE(p->__state, TASK_RUNNING);
- trace_sched_wakeup(p);
+ ttwu_do_wakeup(p);
goto out;
}
sched_info_switch(rq, prev, next);
perf_event_task_sched_out(prev, next);
rseq_preempt(prev);
+ switch_mm_cid(prev, next);
fire_sched_out_preempt_notifiers(prev, next);
kmap_local_sched_out();
prepare_task(next);
}
EXPORT_SYMBOL(single_task_running);
+ unsigned long long nr_context_switches_cpu(int cpu)
+ {
+ return cpu_rq(cpu)->nr_switches;
+ }
+
unsigned long long nr_context_switches(void)
{
int i;
unsigned long thermal_pressure;
u64 resched_latency;
- arch_scale_freq_tick();
+ if (housekeeping_cpu(cpu, HK_TYPE_TICK))
+ arch_scale_freq_tick();
+
sched_clock_tick();
rq_lock(rq, &rf);
{
int i;
- for_each_cpu_wrap(i, sched_domain_span(sd), cpu) {
+ for_each_cpu_wrap(i, sched_domain_span(sd), cpu + 1) {
if (i == cpu)
continue;
if (retval)
goto out_put_task;
- user_mask = kmalloc(cpumask_size(), GFP_KERNEL);
- if (!user_mask) {
+ /*
+ * With non-SMP configs, user_cpus_ptr/user_mask isn't used and
+ * alloc_user_cpus_ptr() returns NULL.
+ */
+ user_mask = alloc_user_cpus_ptr(NUMA_NO_NODE);
+ if (user_mask) {
+ cpumask_copy(user_mask, in_mask);
+ } else if (IS_ENABLED(CONFIG_SMP)) {
retval = -ENOMEM;
goto out_put_task;
}
- cpumask_copy(user_mask, in_mask);
+
ac = (struct affinity_context){
.new_mask = in_mask,
.user_mask = user_mask,
{
trace_sched_update_nr_running_tp(rq, count);
}
+
+#ifdef CONFIG_SCHED_MM_CID
+void sched_mm_cid_exit_signals(struct task_struct *t)
+{
+ struct mm_struct *mm = t->mm;
+ unsigned long flags;
+
+ if (!mm)
+ return;
+ local_irq_save(flags);
+ mm_cid_put(mm, t->mm_cid);
+ t->mm_cid = -1;
+ t->mm_cid_active = 0;
+ local_irq_restore(flags);
+}
+
+void sched_mm_cid_before_execve(struct task_struct *t)
+{
+ struct mm_struct *mm = t->mm;
+ unsigned long flags;
+
+ if (!mm)
+ return;
+ local_irq_save(flags);
+ mm_cid_put(mm, t->mm_cid);
+ t->mm_cid = -1;
+ t->mm_cid_active = 0;
+ local_irq_restore(flags);
+}
+
+void sched_mm_cid_after_execve(struct task_struct *t)
+{
+ struct mm_struct *mm = t->mm;
+ unsigned long flags;
+
+ if (!mm)
+ return;
+ local_irq_save(flags);
+ t->mm_cid = mm_cid_get(mm);
+ t->mm_cid_active = 1;
+ local_irq_restore(flags);
+ rseq_set_notify_resume(t);
+}
+
+void sched_mm_cid_fork(struct task_struct *t)
+{
+ WARN_ON_ONCE(!t->mm || t->mm_cid != -1);
+ t->mm_cid_active = 1;
+}
+#endif