#include <linux/llist.h>
+ /*
+ * An entry can be in one of four states:
+ *
+ * free NULL, 0 -> {claimed} : free to be used
+ * claimed NULL, 3 -> {pending} : claimed to be enqueued
+ * pending next, 3 -> {busy} : queued, pending callback
+ * busy NULL, 2 -> {free, claimed} : callback in progress, can be claimed
+ */
+
+ #define IRQ_WORK_PENDING 1UL
+ #define IRQ_WORK_BUSY 2UL
+ #define IRQ_WORK_FLAGS 3UL
+ #define IRQ_WORK_LAZY 4UL /* Doesn't want IPI, wait for tick */
+
struct irq_work {
unsigned long flags;
struct llist_node llnode;
work->func = func;
}
-bool irq_work_queue(struct irq_work *work);
+void irq_work_queue(struct irq_work *work);
void irq_work_run(void);
void irq_work_sync(struct irq_work *work);
+ #ifdef CONFIG_IRQ_WORK
+ bool irq_work_needs_cpu(void);
+ #else
+ static bool irq_work_needs_cpu(void) { return false; }
+ #endif
+
#endif /* _LINUX_IRQ_WORK_H */
#include <linux/clockchips.h>
#include <linux/irqflags.h>
+ #include <linux/percpu.h>
+ #include <linux/hrtimer.h>
#ifdef CONFIG_GENERIC_CLOCKEVENTS
#endif /* !CONFIG_GENERIC_CLOCKEVENTS */
# ifdef CONFIG_NO_HZ
+ DECLARE_PER_CPU(struct tick_sched, tick_cpu_sched);
+
+ static inline int tick_nohz_tick_stopped(void)
+ {
+ return __this_cpu_read(tick_cpu_sched.tick_stopped);
+ }
+
extern void tick_nohz_idle_enter(void);
extern void tick_nohz_idle_exit(void);
extern void tick_nohz_irq_exit(void);
extern ktime_t tick_nohz_get_sleep_length(void);
extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time);
extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time);
- # else
+
+ # else /* !CONFIG_NO_HZ */
+ static inline int tick_nohz_tick_stopped(void)
+ {
+ return 0;
+ }
+
static inline void tick_nohz_idle_enter(void) { }
static inline void tick_nohz_idle_exit(void) { }
static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; }
# endif /* !NO_HZ */
+# ifdef CONFIG_CPU_IDLE_GOV_MENU
+extern void menu_hrtimer_cancel(void);
+# else
+static inline void menu_hrtimer_cancel(void) {}
+# endif /* CONFIG_CPU_IDLE_GOV_MENU */
+
#endif
This option enables preemptible-RCU code that is common between
the TREE_PREEMPT_RCU and TINY_PREEMPT_RCU implementations.
+config CONTEXT_TRACKING
+ bool
+
config RCU_USER_QS
bool "Consider userspace as in RCU extended quiescent state"
- depends on HAVE_RCU_USER_QS && SMP
+ depends on HAVE_CONTEXT_TRACKING && SMP
+ select CONTEXT_TRACKING
help
This option sets hooks on kernel / userspace boundaries and
puts RCU in extended quiescent state when the CPU runs in
userspace. It means that when a CPU runs in userspace, it is
excluded from the global RCU state machine and thus doesn't
- to keep the timer tick on for RCU.
+ try to keep the timer tick on for RCU.
Unless you want to hack and help the development of the full
- tickless feature, you shouldn't enable this option. It adds
- unnecessary overhead.
+ dynticks mode, you shouldn't enable this option. It also
+ adds unnecessary overhead.
If unsure say N
-config RCU_USER_QS_FORCE
- bool "Force userspace extended QS by default"
- depends on RCU_USER_QS
+config CONTEXT_TRACKING_FORCE
+ bool "Force context tracking"
+ depends on CONTEXT_TRACKING
help
- Set the hooks in user/kernel boundaries by default in order to
- test this feature that treats userspace as an extended quiescent
- state until we have a real user like a full adaptive nohz option.
-
- Unless you want to hack and help the development of the full
- tickless feature, you shouldn't enable this option. It adds
- unnecessary overhead.
-
- If unsure say N
+ Probe on user/kernel boundaries by default in order to
+ test the features that rely on it such as userspace RCU extended
+ quiescent states.
+ This test is there for debugging until we have a real user like the
+ full dynticks mode.
config RCU_FANOUT
int "Tree-based hierarchical RCU fanout value"
depends on NO_HZ && SMP
default n
help
- This option causes RCU to attempt to accelerate grace periods
- in order to allow CPUs to enter dynticks-idle state more
- quickly. On the other hand, this option increases the overhead
- of the dynticks-idle checking, particularly on systems with
- large numbers of CPUs.
+ This option causes RCU to attempt to accelerate grace periods in
+ order to allow CPUs to enter dynticks-idle state more quickly.
+ On the other hand, this option increases the overhead of the
+ dynticks-idle checking, thus degrading scheduling latency.
- Say Y if energy efficiency is critically important, particularly
- if you have relatively few CPUs.
+ Say Y if energy efficiency is critically important, and you don't
+ care about real-time response.
Say N if you are unsure.
Accept the default if unsure.
+config RCU_NOCB_CPU
+ bool "Offload RCU callback processing from boot-selected CPUs"
+ depends on TREE_RCU || TREE_PREEMPT_RCU
+ default n
+ help
+ Use this option to reduce OS jitter for aggressive HPC or
+ real-time workloads. It can also be used to offload RCU
+ callback invocation to energy-efficient CPUs in battery-powered
+ asymmetric multiprocessors.
+
+ This option offloads callback invocation from the set of
+ CPUs specified at boot time by the rcu_nocbs parameter.
+ For each such CPU, a kthread ("rcuoN") will be created to
+ invoke callbacks, where the "N" is the CPU being offloaded.
+ Nothing prevents this kthread from running on the specified
+ CPUs, but (1) the kthreads may be preempted between each
+ callback, and (2) affinity or cgroups can be used to force
+ the kthreads to run on whatever set of CPUs is desired.
+
+ Say Y here if you want reduced OS jitter on selected CPUs.
+ Say N here if you are unsure.
+
endmenu # "RCU Subsystem"
config IKCONFIG
config HAVE_UNSTABLE_SCHED_CLOCK
bool
+#
+# For architectures that want to enable the support for NUMA-affine scheduler
+# balancing logic:
+#
+config ARCH_SUPPORTS_NUMA_BALANCING
+ bool
+
+# For architectures that (ab)use NUMA to represent different memory regions
+# all cpu-local but of different latencies, such as SuperH.
+#
+config ARCH_WANT_NUMA_VARIABLE_LOCALITY
+ bool
+
+#
+# For architectures that are willing to define _PAGE_NUMA as _PAGE_PROTNONE
+config ARCH_WANTS_PROT_NUMA_PROT_NONE
+ bool
+
+config ARCH_USES_NUMA_PROT_NONE
+ bool
+ default y
+ depends on ARCH_WANTS_PROT_NUMA_PROT_NONE
+ depends on NUMA_BALANCING
+
+config NUMA_BALANCING_DEFAULT_ENABLED
+ bool "Automatically enable NUMA aware memory/task placement"
+ default y
+ depends on NUMA_BALANCING
+ help
+ If set, autonumic NUMA balancing will be enabled if running on a NUMA
+ machine.
+
+config NUMA_BALANCING
+ bool "Memory placement aware NUMA scheduler"
+ depends on ARCH_SUPPORTS_NUMA_BALANCING
+ depends on !ARCH_WANT_NUMA_VARIABLE_LOCALITY
+ depends on SMP && NUMA && MIGRATION
+ help
+ This option adds support for automatic NUMA aware memory/task placement.
+ The mechanism is quite primitive and is based on migrating memory when
+ it is references to the node the task is running on.
+
+ This system will be inactive on UMA systems.
+
menuconfig CGROUPS
boolean "Control Group support"
depends on EVENTFD
config MEMCG_KMEM
bool "Memory Resource Controller Kernel Memory accounting (EXPERIMENTAL)"
depends on MEMCG && EXPERIMENTAL
- default n
+ depends on SLUB || SLAB
help
The Kernel Memory extension for Memory Resource Controller can limit
the amount of memory used by kernel objects in the system. Those are
# Filesystems
depends on 9P_FS = n
depends on AFS_FS = n
- depends on AUTOFS4_FS = n
depends on CEPH_FS = n
depends on CIFS = n
depends on CODA_FS = n
- depends on FUSE_FS = n
depends on GFS2_FS = n
depends on NCP_FS = n
depends on NFSD = n
Enabling this option will pass "-Os" instead of "-O2" to gcc
resulting in a smaller kernel.
- If unsure, say Y.
+ If unsure, say N.
config SYSCTL
bool
config PRINTK
default y
bool "Enable support for printk" if EXPERT
+ select IRQ_WORK
help
This option enables normal printk support. Removing it
eliminates most of the message strings from the kernel image
#include <linux/percpu.h>
#include <linux/hardirq.h>
#include <linux/irqflags.h>
+ #include <linux/sched.h>
+ #include <linux/tick.h>
+ #include <linux/cpu.h>
+ #include <linux/notifier.h>
#include <asm/processor.h>
- /*
- * An entry can be in one of four states:
- *
- * free NULL, 0 -> {claimed} : free to be used
- * claimed NULL, 3 -> {pending} : claimed to be enqueued
- * pending next, 3 -> {busy} : queued, pending callback
- * busy NULL, 2 -> {free, claimed} : callback in progress, can be claimed
- */
-
- #define IRQ_WORK_PENDING 1UL
- #define IRQ_WORK_BUSY 2UL
- #define IRQ_WORK_FLAGS 3UL
static DEFINE_PER_CPU(struct llist_head, irq_work_list);
+ static DEFINE_PER_CPU(int, irq_work_raised);
/*
* Claim the entry so that no one else will poke at it.
}
/*
- * Queue the entry and raise the IPI if needed.
+ * Enqueue the irq_work @entry unless it's already pending
+ * somewhere.
+ *
+ * Can be re-enqueued while the callback is still in progress.
*/
-static void __irq_work_queue(struct irq_work *work)
+void irq_work_queue(struct irq_work *work)
{
- bool empty;
-
+ /* Only queue if not already pending */
+ if (!irq_work_claim(work))
+ return;
+
+ /* Queue the entry and raise the IPI if needed. */
preempt_disable();
- empty = llist_add(&work->llnode, &__get_cpu_var(irq_work_list));
- /* The list was empty, raise self-interrupt to start processing. */
- if (empty)
- arch_irq_work_raise();
+ llist_add(&work->llnode, &__get_cpu_var(irq_work_list));
+
+ /*
+ * If the work is not "lazy" or the tick is stopped, raise the irq
+ * work interrupt (if supported by the arch), otherwise, just wait
+ * for the next tick.
+ */
+ if (!(work->flags & IRQ_WORK_LAZY) || tick_nohz_tick_stopped()) {
+ if (!this_cpu_cmpxchg(irq_work_raised, 0, 1))
+ arch_irq_work_raise();
+ }
preempt_enable();
}
-
-/*
- * Enqueue the irq_work @entry, returns true on success, failure when the
- * @entry was already enqueued by someone else.
- *
- * Can be re-enqueued while the callback is still in progress.
- */
-bool irq_work_queue(struct irq_work *work)
-{
- if (!irq_work_claim(work)) {
- /*
- * Already enqueued, can't do!
- */
- return false;
- }
-
- __irq_work_queue(work);
- return true;
-}
EXPORT_SYMBOL_GPL(irq_work_queue);
- /*
- * Run the irq_work entries on this cpu. Requires to be ran from hardirq
- * context with local IRQs disabled.
- */
- void irq_work_run(void)
+ bool irq_work_needs_cpu(void)
+ {
+ struct llist_head *this_list;
+
+ this_list = &__get_cpu_var(irq_work_list);
+ if (llist_empty(this_list))
+ return false;
+
+ /* All work should have been flushed before going offline */
+ WARN_ON_ONCE(cpu_is_offline(smp_processor_id()));
+
+ return true;
+ }
+
+ static void __irq_work_run(void)
{
+ unsigned long flags;
struct irq_work *work;
struct llist_head *this_list;
struct llist_node *llnode;
+
+ /*
+ * Reset the "raised" state right before we check the list because
+ * an NMI may enqueue after we find the list empty from the runner.
+ */
+ __this_cpu_write(irq_work_raised, 0);
+ barrier();
+
this_list = &__get_cpu_var(irq_work_list);
if (llist_empty(this_list))
return;
- BUG_ON(!in_irq());
BUG_ON(!irqs_disabled());
llnode = llist_del_all(this_list);
* to claim that work don't rely on us to handle their data
* while we are in the middle of the func.
*/
- xchg(&work->flags, IRQ_WORK_BUSY);
+ flags = work->flags & ~IRQ_WORK_PENDING;
+ xchg(&work->flags, flags);
+
work->func(work);
/*
* Clear the BUSY bit and return to the free state if
* no-one else claimed it meanwhile.
*/
- (void)cmpxchg(&work->flags, IRQ_WORK_BUSY, 0);
+ (void)cmpxchg(&work->flags, flags, flags & ~IRQ_WORK_BUSY);
}
}
+
+ /*
+ * Run the irq_work entries on this cpu. Requires to be ran from hardirq
+ * context with local IRQs disabled.
+ */
+ void irq_work_run(void)
+ {
+ BUG_ON(!in_irq());
+ __irq_work_run();
+ }
EXPORT_SYMBOL_GPL(irq_work_run);
/*
cpu_relax();
}
EXPORT_SYMBOL_GPL(irq_work_sync);
+
+ #ifdef CONFIG_HOTPLUG_CPU
+ static int irq_work_cpu_notify(struct notifier_block *self,
+ unsigned long action, void *hcpu)
+ {
+ long cpu = (long)hcpu;
+
+ switch (action) {
+ case CPU_DYING:
+ /* Called from stop_machine */
+ if (WARN_ON_ONCE(cpu != smp_processor_id()))
+ break;
+ __irq_work_run();
+ break;
+ default:
+ break;
+ }
+ return NOTIFY_OK;
+ }
+
+ static struct notifier_block cpu_notify;
+
+ static __init int irq_work_init_cpu_notifier(void)
+ {
+ cpu_notify.notifier_call = irq_work_cpu_notify;
+ cpu_notify.priority = 0;
+ register_cpu_notifier(&cpu_notify);
+ return 0;
+ }
+ device_initcall(irq_work_init_cpu_notifier);
+
+ #endif /* CONFIG_HOTPLUG_CPU */
#include <linux/notifier.h>
#include <linux/rculist.h>
#include <linux/poll.h>
+ #include <linux/irq_work.h>
#include <asm/uaccess.h>
struct console *console_drivers;
EXPORT_SYMBOL_GPL(console_drivers);
+#ifdef CONFIG_LOCKDEP
+static struct lockdep_map console_lock_dep_map = {
+ .name = "console_lock"
+};
+#endif
+
/*
* This is used for debugging the mess that is the VT code by
* keeping track if we have the console semaphore held. It's
free, (free * 100) / __LOG_BUF_LEN);
}
+static bool __read_mostly ignore_loglevel;
+
+static int __init ignore_loglevel_setup(char *str)
+{
+ ignore_loglevel = 1;
+ printk(KERN_INFO "debug: ignoring loglevel setting.\n");
+
+ return 0;
+}
+
+early_param("ignore_loglevel", ignore_loglevel_setup);
+module_param(ignore_loglevel, bool, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(ignore_loglevel, "ignore loglevel setting, to"
+ "print all kernel messages to the console.");
+
#ifdef CONFIG_BOOT_PRINTK_DELAY
static int boot_delay; /* msecs delay after each printk during bootup */
}
__setup("boot_delay=", boot_delay_setup);
-static void boot_delay_msec(void)
+static void boot_delay_msec(int level)
{
unsigned long long k;
unsigned long timeout;
- if (boot_delay == 0 || system_state != SYSTEM_BOOTING)
+ if ((boot_delay == 0 || system_state != SYSTEM_BOOTING)
+ || (level >= console_loglevel && !ignore_loglevel)) {
return;
+ }
k = (unsigned long long)loops_per_msec * boot_delay;
}
}
#else
-static inline void boot_delay_msec(void)
+static inline void boot_delay_msec(int level)
{
}
#endif
if (!printk_time)
return 0;
+ rem_nsec = do_div(ts, 1000000000);
+
if (!buf)
- return 15;
+ return snprintf(NULL, 0, "[%5lu.000000] ", (unsigned long)ts);
- rem_nsec = do_div(ts, 1000000000);
return sprintf(buf, "[%5lu.%06lu] ",
(unsigned long)ts, rem_nsec / 1000);
}
return do_syslog(type, buf, len, SYSLOG_FROM_CALL);
}
-static bool __read_mostly ignore_loglevel;
-
-static int __init ignore_loglevel_setup(char *str)
-{
- ignore_loglevel = 1;
- printk(KERN_INFO "debug: ignoring loglevel setting.\n");
-
- return 0;
-}
-
-early_param("ignore_loglevel", ignore_loglevel_setup);
-module_param(ignore_loglevel, bool, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(ignore_loglevel, "ignore loglevel setting, to"
- "print all kernel messages to the console.");
-
/*
* Call the console drivers, asking them to write out
* log_buf[start] to log_buf[end - 1].
int this_cpu;
int printed_len = 0;
- boot_delay_msec();
+ boot_delay_msec(level);
printk_delay();
/* This stops the holder of console_sem just where we want him */
*/
void console_lock(void)
{
- BUG_ON(in_interrupt());
+ might_sleep();
+
down(&console_sem);
if (console_suspended)
return;
console_locked = 1;
console_may_schedule = 1;
+ mutex_acquire(&console_lock_dep_map, 0, 0, _RET_IP_);
}
EXPORT_SYMBOL(console_lock);
}
console_locked = 1;
console_may_schedule = 0;
+ mutex_acquire(&console_lock_dep_map, 0, 1, _RET_IP_);
return 1;
}
EXPORT_SYMBOL(console_trylock);
static DEFINE_PER_CPU(int, printk_pending);
static DEFINE_PER_CPU(char [PRINTK_BUF_SIZE], printk_sched_buf);
- void printk_tick(void)
+ static void wake_up_klogd_work_func(struct irq_work *irq_work)
{
- if (__this_cpu_read(printk_pending)) {
- int pending = __this_cpu_xchg(printk_pending, 0);
- if (pending & PRINTK_PENDING_SCHED) {
- char *buf = __get_cpu_var(printk_sched_buf);
- printk(KERN_WARNING "[sched_delayed] %s", buf);
- }
- if (pending & PRINTK_PENDING_WAKEUP)
- wake_up_interruptible(&log_wait);
+ int pending = __this_cpu_xchg(printk_pending, 0);
+
+ if (pending & PRINTK_PENDING_SCHED) {
+ char *buf = __get_cpu_var(printk_sched_buf);
+ printk(KERN_WARNING "[sched_delayed] %s", buf);
}
- }
- int printk_needs_cpu(int cpu)
- {
- if (cpu_is_offline(cpu))
- printk_tick();
- return __this_cpu_read(printk_pending);
+ if (pending & PRINTK_PENDING_WAKEUP)
+ wake_up_interruptible(&log_wait);
}
+ static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) = {
+ .func = wake_up_klogd_work_func,
+ .flags = IRQ_WORK_LAZY,
+ };
+
void wake_up_klogd(void)
{
- if (waitqueue_active(&log_wait))
+ preempt_disable();
+ if (waitqueue_active(&log_wait)) {
this_cpu_or(printk_pending, PRINTK_PENDING_WAKEUP);
+ irq_work_queue(&__get_cpu_var(wake_up_klogd_work));
+ }
+ preempt_enable();
}
static void console_cont_flush(char *text, size_t size)
local_irq_restore(flags);
}
console_locked = 0;
+ mutex_release(&console_lock_dep_map, 1, _RET_IP_);
/* Release the exclusive_console once it is used */
if (unlikely(exclusive_console))
va_end(args);
__this_cpu_or(printk_pending, PRINTK_PENDING_SCHED);
+ irq_work_queue(&__get_cpu_var(wake_up_klogd_work));
local_irq_restore(flags);
return r;
#include <linux/profile.h>
#include <linux/sched.h>
#include <linux/module.h>
+ #include <linux/irq_work.h>
#include <asm/irq_regs.h>
/*
* Per cpu nohz control structure
*/
- static DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched);
+ DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched);
/*
- * The time, when the last jiffy update happened. Protected by xtime_lock.
+ * The time, when the last jiffy update happened. Protected by jiffies_lock.
*/
static ktime_t last_jiffies_update;
ktime_t delta;
/*
- * Do a quick check without holding xtime_lock:
+ * Do a quick check without holding jiffies_lock:
*/
delta = ktime_sub(now, last_jiffies_update);
if (delta.tv64 < tick_period.tv64)
return;
- /* Reevalute with xtime_lock held */
- write_seqlock(&xtime_lock);
+ /* Reevalute with jiffies_lock held */
+ write_seqlock(&jiffies_lock);
delta = ktime_sub(now, last_jiffies_update);
if (delta.tv64 >= tick_period.tv64) {
/* Keep the tick_next_period variable up to date */
tick_next_period = ktime_add(last_jiffies_update, tick_period);
}
- write_sequnlock(&xtime_lock);
+ write_sequnlock(&jiffies_lock);
}
/*
{
ktime_t period;
- write_seqlock(&xtime_lock);
+ write_seqlock(&jiffies_lock);
/* Did we start the jiffies update yet ? */
if (last_jiffies_update.tv64 == 0)
last_jiffies_update = tick_next_period;
period = last_jiffies_update;
- write_sequnlock(&xtime_lock);
+ write_sequnlock(&jiffies_lock);
return period;
}
+
+static void tick_sched_do_timer(ktime_t now)
+{
+ int cpu = smp_processor_id();
+
+#ifdef CONFIG_NO_HZ
+ /*
+ * Check if the do_timer duty was dropped. We don't care about
+ * concurrency: This happens only when the cpu in charge went
+ * into a long sleep. If two cpus happen to assign themself to
+ * this duty, then the jiffies update is still serialized by
+ * jiffies_lock.
+ */
+ if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE))
+ tick_do_timer_cpu = cpu;
+#endif
+
+ /* Check, if the jiffies need an update */
+ if (tick_do_timer_cpu == cpu)
+ tick_do_update_jiffies64(now);
+}
+
+static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
+{
+#ifdef CONFIG_NO_HZ
+ /*
+ * When we are idle and the tick is stopped, we have to touch
+ * the watchdog as we might not schedule for a really long
+ * time. This happens on complete idle SMP systems while
+ * waiting on the login prompt. We also increment the "start of
+ * idle" jiffy stamp so the idle accounting adjustment we do
+ * when we go busy again does not account too much ticks.
+ */
+ if (ts->tick_stopped) {
+ touch_softlockup_watchdog();
+ if (is_idle_task(current))
+ ts->idle_jiffies++;
+ }
+#endif
+ update_process_times(user_mode(regs));
+ profile_tick(CPU_PROFILING);
+}
+
/*
* NOHZ - aka dynamic tick functionality
*/
/* Read jiffies and the time when jiffies were updated last */
do {
- seq = read_seqbegin(&xtime_lock);
+ seq = read_seqbegin(&jiffies_lock);
last_update = last_jiffies_update;
last_jiffies = jiffies;
time_delta = timekeeping_max_deferment();
- } while (read_seqretry(&xtime_lock, seq));
+ } while (read_seqretry(&jiffies_lock, seq));
- if (rcu_needs_cpu(cpu, &rcu_delta_jiffies) || printk_needs_cpu(cpu) ||
- arch_needs_cpu(cpu)) {
+ if (rcu_needs_cpu(cpu, &rcu_delta_jiffies) ||
+ arch_needs_cpu(cpu) || irq_work_needs_cpu()) {
next_jiffies = last_jiffies + 1;
delta_jiffies = 1;
} else {
if (!ts->inidle)
return;
+ /* Cancel the timer because CPU already waken up from the C-states*/
+ menu_hrtimer_cancel();
__tick_nohz_idle_enter(ts);
}
ts->inidle = 0;
+ /* Cancel the timer because CPU already waken up from the C-states*/
+ menu_hrtimer_cancel();
if (ts->idle_active || ts->tick_stopped)
now = ktime_get();
{
struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
struct pt_regs *regs = get_irq_regs();
- int cpu = smp_processor_id();
ktime_t now = ktime_get();
dev->next_event.tv64 = KTIME_MAX;
- /*
- * Check if the do_timer duty was dropped. We don't care about
- * concurrency: This happens only when the cpu in charge went
- * into a long sleep. If two cpus happen to assign themself to
- * this duty, then the jiffies update is still serialized by
- * xtime_lock.
- */
- if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE))
- tick_do_timer_cpu = cpu;
-
- /* Check, if the jiffies need an update */
- if (tick_do_timer_cpu == cpu)
- tick_do_update_jiffies64(now);
-
- /*
- * When we are idle and the tick is stopped, we have to touch
- * the watchdog as we might not schedule for a really long
- * time. This happens on complete idle SMP systems while
- * waiting on the login prompt. We also increment the "start
- * of idle" jiffy stamp so the idle accounting adjustment we
- * do when we go busy again does not account too much ticks.
- */
- if (ts->tick_stopped) {
- touch_softlockup_watchdog();
- ts->idle_jiffies++;
- }
-
- update_process_times(user_mode(regs));
- profile_tick(CPU_PROFILING);
+ tick_sched_do_timer(now);
+ tick_sched_handle(ts, regs);
while (tick_nohz_reprogram(ts, now)) {
now = ktime_get();
#ifdef CONFIG_HIGH_RES_TIMERS
/*
* We rearm the timer until we get disabled by the idle code.
- * Called with interrupts disabled and timer->base->cpu_base->lock held.
+ * Called with interrupts disabled.
*/
static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer)
{
container_of(timer, struct tick_sched, sched_timer);
struct pt_regs *regs = get_irq_regs();
ktime_t now = ktime_get();
- int cpu = smp_processor_id();
-#ifdef CONFIG_NO_HZ
- /*
- * Check if the do_timer duty was dropped. We don't care about
- * concurrency: This happens only when the cpu in charge went
- * into a long sleep. If two cpus happen to assign themself to
- * this duty, then the jiffies update is still serialized by
- * xtime_lock.
- */
- if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE))
- tick_do_timer_cpu = cpu;
-#endif
-
- /* Check, if the jiffies need an update */
- if (tick_do_timer_cpu == cpu)
- tick_do_update_jiffies64(now);
+ tick_sched_do_timer(now);
/*
* Do not call, when we are not in irq context and have
* no valid regs pointer
*/
- if (regs) {
- /*
- * When we are idle and the tick is stopped, we have to touch
- * the watchdog as we might not schedule for a really long
- * time. This happens on complete idle SMP systems while
- * waiting on the login prompt. We also increment the "start of
- * idle" jiffy stamp so the idle accounting adjustment we do
- * when we go busy again does not account too much ticks.
- */
- if (ts->tick_stopped) {
- touch_softlockup_watchdog();
- if (is_idle_task(current))
- ts->idle_jiffies++;
- }
- update_process_times(user_mode(regs));
- profile_tick(CPU_PROFILING);
- }
+ if (regs)
+ tick_sched_handle(ts, regs);
hrtimer_forward(timer, now, tick_period);
/* Get the next period (per cpu) */
hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update());
- /* Offset the tick to avert xtime_lock contention. */
+ /* Offset the tick to avert jiffies_lock contention. */
if (sched_skew_tick) {
u64 offset = ktime_to_ns(tick_period) >> 1;
do_div(offset, num_possible_cpus());