2 * linux/kernel/time/tick-broadcast.c
4 * This file contains functions which emulate a local clock-event
5 * device via a broadcast event source.
8 * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
9 * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner
11 * This code is licenced under the GPL version 2. For details see
12 * kernel-base/COPYING.
14 #include <linux/cpu.h>
15 #include <linux/err.h>
16 #include <linux/hrtimer.h>
17 #include <linux/interrupt.h>
18 #include <linux/percpu.h>
19 #include <linux/profile.h>
20 #include <linux/sched.h>
21 #include <linux/smp.h>
23 #include "tick-internal.h"
26 * Broadcast support for broken x86 hardware, where the local apic
27 * timer stops in C3 state.
30 static struct tick_device tick_broadcast_device;
31 static cpumask_var_t tick_broadcast_mask;
32 static cpumask_var_t tmpmask;
33 static DEFINE_RAW_SPINLOCK(tick_broadcast_lock);
34 static int tick_broadcast_force;
36 #ifdef CONFIG_TICK_ONESHOT
37 static void tick_broadcast_clear_oneshot(int cpu);
39 static inline void tick_broadcast_clear_oneshot(int cpu) { }
43 * Debugging: see timer_list.c
45 struct tick_device *tick_get_broadcast_device(void)
47 return &tick_broadcast_device;
50 struct cpumask *tick_get_broadcast_mask(void)
52 return tick_broadcast_mask;
56 * Start the device in periodic mode
58 static void tick_broadcast_start_periodic(struct clock_event_device *bc)
61 tick_setup_periodic(bc, 1);
65 * Check, if the device can be utilized as broadcast device:
67 int tick_check_broadcast_device(struct clock_event_device *dev)
69 struct clock_event_device *cur = tick_broadcast_device.evtdev;
71 if ((dev->features & CLOCK_EVT_FEAT_DUMMY) ||
72 (tick_broadcast_device.evtdev &&
73 tick_broadcast_device.evtdev->rating >= dev->rating) ||
74 (dev->features & CLOCK_EVT_FEAT_C3STOP))
77 clockevents_exchange_device(tick_broadcast_device.evtdev, dev);
79 cur->event_handler = clockevents_handle_noop;
80 tick_broadcast_device.evtdev = dev;
81 if (!cpumask_empty(tick_broadcast_mask))
82 tick_broadcast_start_periodic(dev);
84 * Inform all cpus about this. We might be in a situation
85 * where we did not switch to oneshot mode because the per cpu
86 * devices are affected by CLOCK_EVT_FEAT_C3STOP and the lack
87 * of a oneshot capable broadcast device. Without that
88 * notification the systems stays stuck in periodic mode
91 if (dev->features & CLOCK_EVT_FEAT_ONESHOT)
97 * Check, if the device is the broadcast device
99 int tick_is_broadcast_device(struct clock_event_device *dev)
101 return (dev && tick_broadcast_device.evtdev == dev);
104 static void err_broadcast(const struct cpumask *mask)
106 pr_crit_once("Failed to broadcast timer tick. Some CPUs may be unresponsive.\n");
109 static void tick_device_setup_broadcast_func(struct clock_event_device *dev)
112 dev->broadcast = tick_broadcast;
113 if (!dev->broadcast) {
114 pr_warn_once("%s depends on broadcast, but no broadcast function available\n",
116 dev->broadcast = err_broadcast;
121 * Check, if the device is disfunctional and a place holder, which
122 * needs to be handled by the broadcast device.
124 int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
129 raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
132 * Devices might be registered with both periodic and oneshot
133 * mode disabled. This signals, that the device needs to be
134 * operated from the broadcast device and is a placeholder for
135 * the cpu local device.
137 if (!tick_device_is_functional(dev)) {
138 dev->event_handler = tick_handle_periodic;
139 tick_device_setup_broadcast_func(dev);
140 cpumask_set_cpu(cpu, tick_broadcast_mask);
141 tick_broadcast_start_periodic(tick_broadcast_device.evtdev);
145 * When the new device is not affected by the stop
146 * feature and the cpu is marked in the broadcast mask
147 * then clear the broadcast bit.
149 if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) {
150 int cpu = smp_processor_id();
151 cpumask_clear_cpu(cpu, tick_broadcast_mask);
152 tick_broadcast_clear_oneshot(cpu);
154 tick_device_setup_broadcast_func(dev);
157 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
161 #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
162 int tick_receive_broadcast(void)
164 struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
165 struct clock_event_device *evt = td->evtdev;
170 if (!evt->event_handler)
173 evt->event_handler(evt);
179 * Broadcast the event to the cpus, which are set in the mask (mangled).
181 static void tick_do_broadcast(struct cpumask *mask)
183 int cpu = smp_processor_id();
184 struct tick_device *td;
187 * Check, if the current cpu is in the mask
189 if (cpumask_test_cpu(cpu, mask)) {
190 cpumask_clear_cpu(cpu, mask);
191 td = &per_cpu(tick_cpu_device, cpu);
192 td->evtdev->event_handler(td->evtdev);
195 if (!cpumask_empty(mask)) {
197 * It might be necessary to actually check whether the devices
198 * have different broadcast functions. For now, just use the
199 * one of the first device. This works as long as we have this
200 * misfeature only on x86 (lapic)
202 td = &per_cpu(tick_cpu_device, cpumask_first(mask));
203 td->evtdev->broadcast(mask);
208 * Periodic broadcast:
209 * - invoke the broadcast handlers
211 static void tick_do_periodic_broadcast(void)
213 raw_spin_lock(&tick_broadcast_lock);
215 cpumask_and(tmpmask, cpu_online_mask, tick_broadcast_mask);
216 tick_do_broadcast(tmpmask);
218 raw_spin_unlock(&tick_broadcast_lock);
222 * Event handler for periodic broadcast ticks
224 static void tick_handle_periodic_broadcast(struct clock_event_device *dev)
228 tick_do_periodic_broadcast();
231 * The device is in periodic mode. No reprogramming necessary:
233 if (dev->mode == CLOCK_EVT_MODE_PERIODIC)
237 * Setup the next period for devices, which do not have
238 * periodic mode. We read dev->next_event first and add to it
239 * when the event already expired. clockevents_program_event()
240 * sets dev->next_event only when the event is really
241 * programmed to the device.
243 for (next = dev->next_event; ;) {
244 next = ktime_add(next, tick_period);
246 if (!clockevents_program_event(dev, next, false))
248 tick_do_periodic_broadcast();
253 * Powerstate information: The system enters/leaves a state, where
254 * affected devices might stop
256 static void tick_do_broadcast_on_off(unsigned long *reason)
258 struct clock_event_device *bc, *dev;
259 struct tick_device *td;
263 raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
265 cpu = smp_processor_id();
266 td = &per_cpu(tick_cpu_device, cpu);
268 bc = tick_broadcast_device.evtdev;
271 * Is the device not affected by the powerstate ?
273 if (!dev || !(dev->features & CLOCK_EVT_FEAT_C3STOP))
276 if (!tick_device_is_functional(dev))
279 bc_stopped = cpumask_empty(tick_broadcast_mask);
282 case CLOCK_EVT_NOTIFY_BROADCAST_ON:
283 case CLOCK_EVT_NOTIFY_BROADCAST_FORCE:
284 if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_mask)) {
285 if (tick_broadcast_device.mode ==
286 TICKDEV_MODE_PERIODIC)
287 clockevents_shutdown(dev);
289 if (*reason == CLOCK_EVT_NOTIFY_BROADCAST_FORCE)
290 tick_broadcast_force = 1;
292 case CLOCK_EVT_NOTIFY_BROADCAST_OFF:
293 if (!tick_broadcast_force &&
294 cpumask_test_and_clear_cpu(cpu, tick_broadcast_mask)) {
295 if (tick_broadcast_device.mode ==
296 TICKDEV_MODE_PERIODIC)
297 tick_setup_periodic(dev, 0);
302 if (cpumask_empty(tick_broadcast_mask)) {
304 clockevents_shutdown(bc);
305 } else if (bc_stopped) {
306 if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
307 tick_broadcast_start_periodic(bc);
309 tick_broadcast_setup_oneshot(bc);
312 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
316 * Powerstate information: The system enters/leaves a state, where
317 * affected devices might stop.
319 void tick_broadcast_on_off(unsigned long reason, int *oncpu)
321 if (!cpumask_test_cpu(*oncpu, cpu_online_mask))
322 printk(KERN_ERR "tick-broadcast: ignoring broadcast for "
323 "offline CPU #%d\n", *oncpu);
325 tick_do_broadcast_on_off(&reason);
329 * Set the periodic handler depending on broadcast on/off
331 void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast)
334 dev->event_handler = tick_handle_periodic;
336 dev->event_handler = tick_handle_periodic_broadcast;
340 * Remove a CPU from broadcasting
342 void tick_shutdown_broadcast(unsigned int *cpup)
344 struct clock_event_device *bc;
346 unsigned int cpu = *cpup;
348 raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
350 bc = tick_broadcast_device.evtdev;
351 cpumask_clear_cpu(cpu, tick_broadcast_mask);
353 if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) {
354 if (bc && cpumask_empty(tick_broadcast_mask))
355 clockevents_shutdown(bc);
358 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
361 void tick_suspend_broadcast(void)
363 struct clock_event_device *bc;
366 raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
368 bc = tick_broadcast_device.evtdev;
370 clockevents_shutdown(bc);
372 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
375 int tick_resume_broadcast(void)
377 struct clock_event_device *bc;
381 raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
383 bc = tick_broadcast_device.evtdev;
386 clockevents_set_mode(bc, CLOCK_EVT_MODE_RESUME);
388 switch (tick_broadcast_device.mode) {
389 case TICKDEV_MODE_PERIODIC:
390 if (!cpumask_empty(tick_broadcast_mask))
391 tick_broadcast_start_periodic(bc);
392 broadcast = cpumask_test_cpu(smp_processor_id(),
393 tick_broadcast_mask);
395 case TICKDEV_MODE_ONESHOT:
396 if (!cpumask_empty(tick_broadcast_mask))
397 broadcast = tick_resume_broadcast_oneshot(bc);
401 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
407 #ifdef CONFIG_TICK_ONESHOT
409 static cpumask_var_t tick_broadcast_oneshot_mask;
410 static cpumask_var_t tick_broadcast_pending_mask;
411 static cpumask_var_t tick_broadcast_force_mask;
414 * Exposed for debugging: see timer_list.c
416 struct cpumask *tick_get_broadcast_oneshot_mask(void)
418 return tick_broadcast_oneshot_mask;
422 * Called before going idle with interrupts disabled. Checks whether a
423 * broadcast event from the other core is about to happen. We detected
424 * that in tick_broadcast_oneshot_control(). The callsite can use this
425 * to avoid a deep idle transition as we are about to get the
426 * broadcast IPI right away.
428 int tick_check_broadcast_expired(void)
430 return cpumask_test_cpu(smp_processor_id(), tick_broadcast_force_mask);
434 * Set broadcast interrupt affinity
436 static void tick_broadcast_set_affinity(struct clock_event_device *bc,
437 const struct cpumask *cpumask)
439 if (!(bc->features & CLOCK_EVT_FEAT_DYNIRQ))
442 if (cpumask_equal(bc->cpumask, cpumask))
445 bc->cpumask = cpumask;
446 irq_set_affinity(bc->irq, bc->cpumask);
449 static int tick_broadcast_set_event(struct clock_event_device *bc, int cpu,
450 ktime_t expires, int force)
454 if (bc->mode != CLOCK_EVT_MODE_ONESHOT)
455 clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
457 ret = clockevents_program_event(bc, expires, force);
459 tick_broadcast_set_affinity(bc, cpumask_of(cpu));
463 int tick_resume_broadcast_oneshot(struct clock_event_device *bc)
465 clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
470 * Called from irq_enter() when idle was interrupted to reenable the
473 void tick_check_oneshot_broadcast(int cpu)
475 if (cpumask_test_cpu(cpu, tick_broadcast_oneshot_mask)) {
476 struct tick_device *td = &per_cpu(tick_cpu_device, cpu);
478 clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_ONESHOT);
483 * Handle oneshot mode broadcasting
485 static void tick_handle_oneshot_broadcast(struct clock_event_device *dev)
487 struct tick_device *td;
488 ktime_t now, next_event;
489 int cpu, next_cpu = 0;
491 raw_spin_lock(&tick_broadcast_lock);
493 dev->next_event.tv64 = KTIME_MAX;
494 next_event.tv64 = KTIME_MAX;
495 cpumask_clear(tmpmask);
497 /* Find all expired events */
498 for_each_cpu(cpu, tick_broadcast_oneshot_mask) {
499 td = &per_cpu(tick_cpu_device, cpu);
500 if (td->evtdev->next_event.tv64 <= now.tv64) {
501 cpumask_set_cpu(cpu, tmpmask);
503 * Mark the remote cpu in the pending mask, so
504 * it can avoid reprogramming the cpu local
505 * timer in tick_broadcast_oneshot_control().
507 cpumask_set_cpu(cpu, tick_broadcast_pending_mask);
508 } else if (td->evtdev->next_event.tv64 < next_event.tv64) {
509 next_event.tv64 = td->evtdev->next_event.tv64;
514 /* Take care of enforced broadcast requests */
515 cpumask_or(tmpmask, tmpmask, tick_broadcast_force_mask);
516 cpumask_clear(tick_broadcast_force_mask);
519 * Wakeup the cpus which have an expired event.
521 tick_do_broadcast(tmpmask);
524 * Two reasons for reprogram:
526 * - The global event did not expire any CPU local
527 * events. This happens in dyntick mode, as the maximum PIT
528 * delta is quite small.
530 * - There are pending events on sleeping CPUs which were not
533 if (next_event.tv64 != KTIME_MAX) {
535 * Rearm the broadcast device. If event expired,
538 if (tick_broadcast_set_event(dev, next_cpu, next_event, 0))
541 raw_spin_unlock(&tick_broadcast_lock);
545 * Powerstate information: The system enters/leaves a state, where
546 * affected devices might stop
548 void tick_broadcast_oneshot_control(unsigned long reason)
550 struct clock_event_device *bc, *dev;
551 struct tick_device *td;
557 * Periodic mode does not care about the enter/exit of power
560 if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
564 * We are called with preemtion disabled from the depth of the
565 * idle code, so we can't be moved away.
567 cpu = smp_processor_id();
568 td = &per_cpu(tick_cpu_device, cpu);
571 if (!(dev->features & CLOCK_EVT_FEAT_C3STOP))
574 bc = tick_broadcast_device.evtdev;
576 raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
577 if (reason == CLOCK_EVT_NOTIFY_BROADCAST_ENTER) {
578 WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask));
579 if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) {
580 clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN);
582 * We only reprogram the broadcast timer if we
583 * did not mark ourself in the force mask and
584 * if the cpu local event is earlier than the
585 * broadcast event. If the current CPU is in
586 * the force mask, then we are going to be
587 * woken by the IPI right away.
589 if (!cpumask_test_cpu(cpu, tick_broadcast_force_mask) &&
590 dev->next_event.tv64 < bc->next_event.tv64)
591 tick_broadcast_set_event(bc, cpu, dev->next_event, 1);
594 if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_oneshot_mask)) {
595 clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
596 if (dev->next_event.tv64 == KTIME_MAX)
599 * The cpu which was handling the broadcast
600 * timer marked this cpu in the broadcast
601 * pending mask and fired the broadcast
602 * IPI. So we are going to handle the expired
603 * event anyway via the broadcast IPI
604 * handler. No need to reprogram the timer
605 * with an already expired event.
607 if (cpumask_test_and_clear_cpu(cpu,
608 tick_broadcast_pending_mask))
612 * If the pending bit is not set, then we are
613 * either the CPU handling the broadcast
614 * interrupt or we got woken by something else.
616 * We are not longer in the broadcast mask, so
617 * if the cpu local expiry time is already
618 * reached, we would reprogram the cpu local
619 * timer with an already expired event.
621 * This can lead to a ping-pong when we return
622 * to idle and therefor rearm the broadcast
623 * timer before the cpu local timer was able
624 * to fire. This happens because the forced
625 * reprogramming makes sure that the event
626 * will happen in the future and depending on
627 * the min_delta setting this might be far
628 * enough out that the ping-pong starts.
630 * If the cpu local next_event has expired
631 * then we know that the broadcast timer
632 * next_event has expired as well and
633 * broadcast is about to be handled. So we
634 * avoid reprogramming and enforce that the
635 * broadcast handler, which did not run yet,
636 * will invoke the cpu local handler.
638 * We cannot call the handler directly from
639 * here, because we might be in a NOHZ phase
640 * and we did not go through the irq_enter()
644 if (dev->next_event.tv64 <= now.tv64) {
645 cpumask_set_cpu(cpu, tick_broadcast_force_mask);
649 * We got woken by something else. Reprogram
650 * the cpu local timer device.
652 tick_program_event(dev->next_event, 1);
656 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
660 * Reset the one shot broadcast for a cpu
662 * Called with tick_broadcast_lock held
664 static void tick_broadcast_clear_oneshot(int cpu)
666 cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask);
669 static void tick_broadcast_init_next_event(struct cpumask *mask,
672 struct tick_device *td;
675 for_each_cpu(cpu, mask) {
676 td = &per_cpu(tick_cpu_device, cpu);
678 td->evtdev->next_event = expires;
683 * tick_broadcast_setup_oneshot - setup the broadcast device
685 void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
687 int cpu = smp_processor_id();
689 /* Set it up only once ! */
690 if (bc->event_handler != tick_handle_oneshot_broadcast) {
691 int was_periodic = bc->mode == CLOCK_EVT_MODE_PERIODIC;
693 bc->event_handler = tick_handle_oneshot_broadcast;
695 /* Take the do_timer update */
696 if (!tick_nohz_full_cpu(cpu))
697 tick_do_timer_cpu = cpu;
700 * We must be careful here. There might be other CPUs
701 * waiting for periodic broadcast. We need to set the
702 * oneshot_mask bits for those and program the
703 * broadcast device to fire.
705 cpumask_copy(tmpmask, tick_broadcast_mask);
706 cpumask_clear_cpu(cpu, tmpmask);
707 cpumask_or(tick_broadcast_oneshot_mask,
708 tick_broadcast_oneshot_mask, tmpmask);
710 if (was_periodic && !cpumask_empty(tmpmask)) {
711 clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
712 tick_broadcast_init_next_event(tmpmask,
714 tick_broadcast_set_event(bc, cpu, tick_next_period, 1);
716 bc->next_event.tv64 = KTIME_MAX;
719 * The first cpu which switches to oneshot mode sets
720 * the bit for all other cpus which are in the general
721 * (periodic) broadcast mask. So the bit is set and
722 * would prevent the first broadcast enter after this
723 * to program the bc device.
725 tick_broadcast_clear_oneshot(cpu);
730 * Select oneshot operating mode for the broadcast device
732 void tick_broadcast_switch_to_oneshot(void)
734 struct clock_event_device *bc;
737 raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
739 tick_broadcast_device.mode = TICKDEV_MODE_ONESHOT;
740 bc = tick_broadcast_device.evtdev;
742 tick_broadcast_setup_oneshot(bc);
744 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
749 * Remove a dead CPU from broadcasting
751 void tick_shutdown_broadcast_oneshot(unsigned int *cpup)
754 unsigned int cpu = *cpup;
756 raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
759 * Clear the broadcast mask flag for the dead cpu, but do not
760 * stop the broadcast device!
762 cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask);
764 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
768 * Check, whether the broadcast device is in one shot mode
770 int tick_broadcast_oneshot_active(void)
772 return tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT;
776 * Check whether the broadcast device supports oneshot.
778 bool tick_broadcast_oneshot_available(void)
780 struct clock_event_device *bc = tick_broadcast_device.evtdev;
782 return bc ? bc->features & CLOCK_EVT_FEAT_ONESHOT : false;
787 void __init tick_broadcast_init(void)
789 zalloc_cpumask_var(&tick_broadcast_mask, GFP_NOWAIT);
790 zalloc_cpumask_var(&tmpmask, GFP_NOWAIT);
791 #ifdef CONFIG_TICK_ONESHOT
792 zalloc_cpumask_var(&tick_broadcast_oneshot_mask, GFP_NOWAIT);
793 zalloc_cpumask_var(&tick_broadcast_pending_mask, GFP_NOWAIT);
794 zalloc_cpumask_var(&tick_broadcast_force_mask, GFP_NOWAIT);