]> Git Repo - linux.git/blob - kernel/watchdog.c
watchdog/core: Split out cpumask write function
[linux.git] / kernel / watchdog.c
1 /*
2  * Detect hard and soft lockups on a system
3  *
4  * started by Don Zickus, Copyright (C) 2010 Red Hat, Inc.
5  *
6  * Note: Most of this code is borrowed heavily from the original softlockup
7  * detector, so thanks to Ingo for the initial implementation.
8  * Some chunks also taken from the old x86-specific nmi watchdog code, thanks
9  * to those contributors as well.
10  */
11
12 #define pr_fmt(fmt) "watchdog: " fmt
13
14 #include <linux/mm.h>
15 #include <linux/cpu.h>
16 #include <linux/nmi.h>
17 #include <linux/init.h>
18 #include <linux/module.h>
19 #include <linux/sysctl.h>
20 #include <linux/smpboot.h>
21 #include <linux/sched/rt.h>
22 #include <uapi/linux/sched/types.h>
23 #include <linux/tick.h>
24 #include <linux/workqueue.h>
25 #include <linux/sched/clock.h>
26 #include <linux/sched/debug.h>
27
28 #include <asm/irq_regs.h>
29 #include <linux/kvm_para.h>
30 #include <linux/kthread.h>
31
32 static DEFINE_MUTEX(watchdog_mutex);
33
34 int __read_mostly nmi_watchdog_enabled;
35
36 #if defined(CONFIG_HARDLOCKUP_DETECTOR) || defined(CONFIG_HAVE_NMI_WATCHDOG)
37 unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED |
38                                                 NMI_WATCHDOG_ENABLED;
39 #else
40 unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED;
41 #endif
42
43 #ifdef CONFIG_HARDLOCKUP_DETECTOR
44 /*
45  * Should we panic when a soft-lockup or hard-lockup occurs:
46  */
47 unsigned int __read_mostly hardlockup_panic =
48                         CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE;
49 /*
50  * We may not want to enable hard lockup detection by default in all cases,
51  * for example when running the kernel as a guest on a hypervisor. In these
52  * cases this function can be called to disable hard lockup detection. This
53  * function should only be executed once by the boot processor before the
54  * kernel command line parameters are parsed, because otherwise it is not
55  * possible to override this in hardlockup_panic_setup().
56  */
57 void __init hardlockup_detector_disable(void)
58 {
59         watchdog_enabled &= ~NMI_WATCHDOG_ENABLED;
60 }
61
62 static int __init hardlockup_panic_setup(char *str)
63 {
64         if (!strncmp(str, "panic", 5))
65                 hardlockup_panic = 1;
66         else if (!strncmp(str, "nopanic", 7))
67                 hardlockup_panic = 0;
68         else if (!strncmp(str, "0", 1))
69                 watchdog_enabled &= ~NMI_WATCHDOG_ENABLED;
70         else if (!strncmp(str, "1", 1))
71                 watchdog_enabled |= NMI_WATCHDOG_ENABLED;
72         return 1;
73 }
74 __setup("nmi_watchdog=", hardlockup_panic_setup);
75
76 # ifdef CONFIG_SMP
77 int __read_mostly sysctl_hardlockup_all_cpu_backtrace;
78
79 static int __init hardlockup_all_cpu_backtrace_setup(char *str)
80 {
81         sysctl_hardlockup_all_cpu_backtrace = !!simple_strtol(str, NULL, 0);
82         return 1;
83 }
84 __setup("hardlockup_all_cpu_backtrace=", hardlockup_all_cpu_backtrace_setup);
85 # endif /* CONFIG_SMP */
86 #endif /* CONFIG_HARDLOCKUP_DETECTOR */
87
88 int __read_mostly watchdog_user_enabled;
89 int __read_mostly watchdog_thresh = 10;
90
91 struct cpumask watchdog_cpumask __read_mostly;
92 unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask);
93
94 /*
95  * The 'watchdog_running' variable is set to 1 when the watchdog threads
96  * are registered/started and is set to 0 when the watchdog threads are
97  * unregistered/stopped, so it is an indicator whether the threads exist.
98  */
99 static int __read_mostly watchdog_running;
100
101 /*
102  * These functions can be overridden if an architecture implements its
103  * own hardlockup detector.
104  *
105  * watchdog_nmi_enable/disable can be implemented to start and stop when
106  * softlockup watchdog threads start and stop. The arch must select the
107  * SOFTLOCKUP_DETECTOR Kconfig.
108  */
109 int __weak watchdog_nmi_enable(unsigned int cpu)
110 {
111         return 0;
112 }
113
114 void __weak watchdog_nmi_disable(unsigned int cpu)
115 {
116         hardlockup_detector_perf_disable();
117 }
118
119 /*
120  * watchdog_nmi_reconfigure can be implemented to be notified after any
121  * watchdog configuration change. The arch hardlockup watchdog should
122  * respond to the following variables:
123  * - nmi_watchdog_enabled
124  * - watchdog_thresh
125  * - watchdog_cpumask
126  * - sysctl_hardlockup_all_cpu_backtrace
127  * - hardlockup_panic
128  */
129 void __weak watchdog_nmi_reconfigure(void) { }
130
131 #ifdef CONFIG_SOFTLOCKUP_DETECTOR
132
133 /* Helper for online, unparked cpus. */
134 #define for_each_watchdog_cpu(cpu) \
135         for_each_cpu_and((cpu), cpu_online_mask, &watchdog_cpumask)
136
137 /* Global variables, exported for sysctl */
138 unsigned int __read_mostly softlockup_panic =
139                         CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE;
140 int __read_mostly soft_watchdog_enabled;
141
142 static u64 __read_mostly sample_period;
143
144 static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
145 static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog);
146 static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer);
147 static DEFINE_PER_CPU(bool, softlockup_touch_sync);
148 static DEFINE_PER_CPU(bool, soft_watchdog_warn);
149 static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts);
150 static DEFINE_PER_CPU(unsigned long, soft_lockup_hrtimer_cnt);
151 static DEFINE_PER_CPU(struct task_struct *, softlockup_task_ptr_saved);
152 static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
153 static unsigned long soft_lockup_nmi_warn;
154
155 static int __init softlockup_panic_setup(char *str)
156 {
157         softlockup_panic = simple_strtoul(str, NULL, 0);
158         return 1;
159 }
160 __setup("softlockup_panic=", softlockup_panic_setup);
161
162 static int __init nowatchdog_setup(char *str)
163 {
164         watchdog_enabled = 0;
165         return 1;
166 }
167 __setup("nowatchdog", nowatchdog_setup);
168
169 static int __init nosoftlockup_setup(char *str)
170 {
171         watchdog_enabled &= ~SOFT_WATCHDOG_ENABLED;
172         return 1;
173 }
174 __setup("nosoftlockup", nosoftlockup_setup);
175
176 #ifdef CONFIG_SMP
177 int __read_mostly sysctl_softlockup_all_cpu_backtrace;
178
179 static int __init softlockup_all_cpu_backtrace_setup(char *str)
180 {
181         sysctl_softlockup_all_cpu_backtrace = !!simple_strtol(str, NULL, 0);
182         return 1;
183 }
184 __setup("softlockup_all_cpu_backtrace=", softlockup_all_cpu_backtrace_setup);
185 #endif
186
187 static void __lockup_detector_cleanup(void);
188
189 /*
190  * Hard-lockup warnings should be triggered after just a few seconds. Soft-
191  * lockups can have false positives under extreme conditions. So we generally
192  * want a higher threshold for soft lockups than for hard lockups. So we couple
193  * the thresholds with a factor: we make the soft threshold twice the amount of
194  * time the hard threshold is.
195  */
196 static int get_softlockup_thresh(void)
197 {
198         return watchdog_thresh * 2;
199 }
200
201 /*
202  * Returns seconds, approximately.  We don't need nanosecond
203  * resolution, and we don't need to waste time with a big divide when
204  * 2^30ns == 1.074s.
205  */
206 static unsigned long get_timestamp(void)
207 {
208         return running_clock() >> 30LL;  /* 2^30 ~= 10^9 */
209 }
210
211 static void set_sample_period(void)
212 {
213         /*
214          * convert watchdog_thresh from seconds to ns
215          * the divide by 5 is to give hrtimer several chances (two
216          * or three with the current relation between the soft
217          * and hard thresholds) to increment before the
218          * hardlockup detector generates a warning
219          */
220         sample_period = get_softlockup_thresh() * ((u64)NSEC_PER_SEC / 5);
221         watchdog_update_hrtimer_threshold(sample_period);
222 }
223
224 /* Commands for resetting the watchdog */
225 static void __touch_watchdog(void)
226 {
227         __this_cpu_write(watchdog_touch_ts, get_timestamp());
228 }
229
230 /**
231  * touch_softlockup_watchdog_sched - touch watchdog on scheduler stalls
232  *
233  * Call when the scheduler may have stalled for legitimate reasons
234  * preventing the watchdog task from executing - e.g. the scheduler
235  * entering idle state.  This should only be used for scheduler events.
236  * Use touch_softlockup_watchdog() for everything else.
237  */
238 void touch_softlockup_watchdog_sched(void)
239 {
240         /*
241          * Preemption can be enabled.  It doesn't matter which CPU's timestamp
242          * gets zeroed here, so use the raw_ operation.
243          */
244         raw_cpu_write(watchdog_touch_ts, 0);
245 }
246
247 void touch_softlockup_watchdog(void)
248 {
249         touch_softlockup_watchdog_sched();
250         wq_watchdog_touch(raw_smp_processor_id());
251 }
252 EXPORT_SYMBOL(touch_softlockup_watchdog);
253
254 void touch_all_softlockup_watchdogs(void)
255 {
256         int cpu;
257
258         /*
259          * this is done lockless
260          * do we care if a 0 races with a timestamp?
261          * all it means is the softlock check starts one cycle later
262          */
263         for_each_watchdog_cpu(cpu)
264                 per_cpu(watchdog_touch_ts, cpu) = 0;
265         wq_watchdog_touch(-1);
266 }
267
268 void touch_softlockup_watchdog_sync(void)
269 {
270         __this_cpu_write(softlockup_touch_sync, true);
271         __this_cpu_write(watchdog_touch_ts, 0);
272 }
273
274 static int is_softlockup(unsigned long touch_ts)
275 {
276         unsigned long now = get_timestamp();
277
278         if ((watchdog_enabled & SOFT_WATCHDOG_ENABLED) && watchdog_thresh){
279                 /* Warn about unreasonable delays. */
280                 if (time_after(now, touch_ts + get_softlockup_thresh()))
281                         return now - touch_ts;
282         }
283         return 0;
284 }
285
286 /* watchdog detector functions */
287 bool is_hardlockup(void)
288 {
289         unsigned long hrint = __this_cpu_read(hrtimer_interrupts);
290
291         if (__this_cpu_read(hrtimer_interrupts_saved) == hrint)
292                 return true;
293
294         __this_cpu_write(hrtimer_interrupts_saved, hrint);
295         return false;
296 }
297
298 static void watchdog_interrupt_count(void)
299 {
300         __this_cpu_inc(hrtimer_interrupts);
301 }
302
303 static int watchdog_enable_all_cpus(void);
304 static void watchdog_disable_all_cpus(void);
305
306 /* watchdog kicker functions */
307 static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
308 {
309         unsigned long touch_ts = __this_cpu_read(watchdog_touch_ts);
310         struct pt_regs *regs = get_irq_regs();
311         int duration;
312         int softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace;
313
314         if (!watchdog_enabled)
315                 return HRTIMER_NORESTART;
316
317         /* kick the hardlockup detector */
318         watchdog_interrupt_count();
319
320         /* kick the softlockup detector */
321         wake_up_process(__this_cpu_read(softlockup_watchdog));
322
323         /* .. and repeat */
324         hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period));
325
326         if (touch_ts == 0) {
327                 if (unlikely(__this_cpu_read(softlockup_touch_sync))) {
328                         /*
329                          * If the time stamp was touched atomically
330                          * make sure the scheduler tick is up to date.
331                          */
332                         __this_cpu_write(softlockup_touch_sync, false);
333                         sched_clock_tick();
334                 }
335
336                 /* Clear the guest paused flag on watchdog reset */
337                 kvm_check_and_clear_guest_paused();
338                 __touch_watchdog();
339                 return HRTIMER_RESTART;
340         }
341
342         /* check for a softlockup
343          * This is done by making sure a high priority task is
344          * being scheduled.  The task touches the watchdog to
345          * indicate it is getting cpu time.  If it hasn't then
346          * this is a good indication some task is hogging the cpu
347          */
348         duration = is_softlockup(touch_ts);
349         if (unlikely(duration)) {
350                 /*
351                  * If a virtual machine is stopped by the host it can look to
352                  * the watchdog like a soft lockup, check to see if the host
353                  * stopped the vm before we issue the warning
354                  */
355                 if (kvm_check_and_clear_guest_paused())
356                         return HRTIMER_RESTART;
357
358                 /* only warn once */
359                 if (__this_cpu_read(soft_watchdog_warn) == true) {
360                         /*
361                          * When multiple processes are causing softlockups the
362                          * softlockup detector only warns on the first one
363                          * because the code relies on a full quiet cycle to
364                          * re-arm.  The second process prevents the quiet cycle
365                          * and never gets reported.  Use task pointers to detect
366                          * this.
367                          */
368                         if (__this_cpu_read(softlockup_task_ptr_saved) !=
369                             current) {
370                                 __this_cpu_write(soft_watchdog_warn, false);
371                                 __touch_watchdog();
372                         }
373                         return HRTIMER_RESTART;
374                 }
375
376                 if (softlockup_all_cpu_backtrace) {
377                         /* Prevent multiple soft-lockup reports if one cpu is already
378                          * engaged in dumping cpu back traces
379                          */
380                         if (test_and_set_bit(0, &soft_lockup_nmi_warn)) {
381                                 /* Someone else will report us. Let's give up */
382                                 __this_cpu_write(soft_watchdog_warn, true);
383                                 return HRTIMER_RESTART;
384                         }
385                 }
386
387                 pr_emerg("BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
388                         smp_processor_id(), duration,
389                         current->comm, task_pid_nr(current));
390                 __this_cpu_write(softlockup_task_ptr_saved, current);
391                 print_modules();
392                 print_irqtrace_events(current);
393                 if (regs)
394                         show_regs(regs);
395                 else
396                         dump_stack();
397
398                 if (softlockup_all_cpu_backtrace) {
399                         /* Avoid generating two back traces for current
400                          * given that one is already made above
401                          */
402                         trigger_allbutself_cpu_backtrace();
403
404                         clear_bit(0, &soft_lockup_nmi_warn);
405                         /* Barrier to sync with other cpus */
406                         smp_mb__after_atomic();
407                 }
408
409                 add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
410                 if (softlockup_panic)
411                         panic("softlockup: hung tasks");
412                 __this_cpu_write(soft_watchdog_warn, true);
413         } else
414                 __this_cpu_write(soft_watchdog_warn, false);
415
416         return HRTIMER_RESTART;
417 }
418
419 static void watchdog_set_prio(unsigned int policy, unsigned int prio)
420 {
421         struct sched_param param = { .sched_priority = prio };
422
423         sched_setscheduler(current, policy, &param);
424 }
425
426 static void watchdog_enable(unsigned int cpu)
427 {
428         struct hrtimer *hrtimer = this_cpu_ptr(&watchdog_hrtimer);
429
430         /*
431          * Start the timer first to prevent the NMI watchdog triggering
432          * before the timer has a chance to fire.
433          */
434         hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
435         hrtimer->function = watchdog_timer_fn;
436         hrtimer_start(hrtimer, ns_to_ktime(sample_period),
437                       HRTIMER_MODE_REL_PINNED);
438
439         /* Initialize timestamp */
440         __touch_watchdog();
441         /* Enable the perf event */
442         watchdog_nmi_enable(cpu);
443
444         watchdog_set_prio(SCHED_FIFO, MAX_RT_PRIO - 1);
445 }
446
447 static void watchdog_disable(unsigned int cpu)
448 {
449         struct hrtimer *hrtimer = this_cpu_ptr(&watchdog_hrtimer);
450
451         watchdog_set_prio(SCHED_NORMAL, 0);
452         /*
453          * Disable the perf event first. That prevents that a large delay
454          * between disabling the timer and disabling the perf event causes
455          * the perf NMI to detect a false positive.
456          */
457         watchdog_nmi_disable(cpu);
458         hrtimer_cancel(hrtimer);
459 }
460
461 static void watchdog_cleanup(unsigned int cpu, bool online)
462 {
463         watchdog_disable(cpu);
464 }
465
466 static int watchdog_should_run(unsigned int cpu)
467 {
468         return __this_cpu_read(hrtimer_interrupts) !=
469                 __this_cpu_read(soft_lockup_hrtimer_cnt);
470 }
471
472 /*
473  * The watchdog thread function - touches the timestamp.
474  *
475  * It only runs once every sample_period seconds (4 seconds by
476  * default) to reset the softlockup timestamp. If this gets delayed
477  * for more than 2*watchdog_thresh seconds then the debug-printout
478  * triggers in watchdog_timer_fn().
479  */
480 static void watchdog(unsigned int cpu)
481 {
482         __this_cpu_write(soft_lockup_hrtimer_cnt,
483                          __this_cpu_read(hrtimer_interrupts));
484         __touch_watchdog();
485 }
486
487 static struct smp_hotplug_thread watchdog_threads = {
488         .store                  = &softlockup_watchdog,
489         .thread_should_run      = watchdog_should_run,
490         .thread_fn              = watchdog,
491         .thread_comm            = "watchdog/%u",
492         .setup                  = watchdog_enable,
493         .cleanup                = watchdog_cleanup,
494         .park                   = watchdog_disable,
495         .unpark                 = watchdog_enable,
496 };
497
498 /*
499  * park all watchdog threads that are specified in 'watchdog_cpumask'
500  *
501  * This function returns an error if kthread_park() of a watchdog thread
502  * fails. In this situation, the watchdog threads of some CPUs can already
503  * be parked and the watchdog threads of other CPUs can still be runnable.
504  * Callers are expected to handle this special condition as appropriate in
505  * their context.
506  *
507  * This function may only be called in a context that is protected against
508  * races with CPU hotplug - for example, via get_online_cpus().
509  */
510 static int watchdog_park_threads(void)
511 {
512         int cpu, ret = 0;
513
514         for_each_watchdog_cpu(cpu) {
515                 ret = kthread_park(per_cpu(softlockup_watchdog, cpu));
516                 if (ret)
517                         break;
518         }
519         return ret;
520 }
521
522 /*
523  * unpark all watchdog threads that are specified in 'watchdog_cpumask'
524  *
525  * This function may only be called in a context that is protected against
526  * races with CPU hotplug - for example, via get_online_cpus().
527  */
528 static void watchdog_unpark_threads(void)
529 {
530         int cpu;
531
532         for_each_watchdog_cpu(cpu)
533                 kthread_unpark(per_cpu(softlockup_watchdog, cpu));
534 }
535
536 static int update_watchdog_all_cpus(void)
537 {
538         int ret;
539
540         ret = watchdog_park_threads();
541         if (ret)
542                 return ret;
543
544         watchdog_unpark_threads();
545
546         return 0;
547 }
548
549 static int watchdog_enable_all_cpus(void)
550 {
551         int err = 0;
552
553         if (!watchdog_running) {
554                 err = smpboot_register_percpu_thread_cpumask(&watchdog_threads,
555                                                              &watchdog_cpumask);
556                 if (err)
557                         pr_err("Failed to create watchdog threads, disabled\n");
558                 else
559                         watchdog_running = 1;
560         } else {
561                 /*
562                  * Enable/disable the lockup detectors or
563                  * change the sample period 'on the fly'.
564                  */
565                 err = update_watchdog_all_cpus();
566
567                 if (err) {
568                         watchdog_disable_all_cpus();
569                         pr_err("Failed to update lockup detectors, disabled\n");
570                 }
571         }
572
573         if (err)
574                 watchdog_enabled = 0;
575
576         return err;
577 }
578
579 static void watchdog_disable_all_cpus(void)
580 {
581         if (watchdog_running) {
582                 watchdog_running = 0;
583                 smpboot_unregister_percpu_thread(&watchdog_threads);
584         }
585 }
586
587 #else /* CONFIG_SOFTLOCKUP_DETECTOR */
588 static inline int watchdog_park_threads(void) { return 0; }
589 static inline void watchdog_unpark_threads(void) { }
590 static inline int watchdog_enable_all_cpus(void) { return 0; }
591 static inline void watchdog_disable_all_cpus(void) { }
592 static inline void set_sample_period(void) { }
593 #endif /* !CONFIG_SOFTLOCKUP_DETECTOR */
594
595 static void __lockup_detector_cleanup(void)
596 {
597         lockdep_assert_held(&watchdog_mutex);
598         hardlockup_detector_perf_cleanup();
599 }
600
601 /**
602  * lockup_detector_cleanup - Cleanup after cpu hotplug or sysctl changes
603  *
604  * Caller must not hold the cpu hotplug rwsem.
605  */
606 void lockup_detector_cleanup(void)
607 {
608         mutex_lock(&watchdog_mutex);
609         __lockup_detector_cleanup();
610         mutex_unlock(&watchdog_mutex);
611 }
612
613 /**
614  * lockup_detector_soft_poweroff - Interface to stop lockup detector(s)
615  *
616  * Special interface for parisc. It prevents lockup detector warnings from
617  * the default pm_poweroff() function which busy loops forever.
618  */
619 void lockup_detector_soft_poweroff(void)
620 {
621         watchdog_enabled = 0;
622 }
623
624 #ifdef CONFIG_SYSCTL
625
626 /*
627  * Update the run state of the lockup detectors.
628  */
629 static int proc_watchdog_update(void)
630 {
631         int err = 0;
632
633         /*
634          * Watchdog threads won't be started if they are already active.
635          * The 'watchdog_running' variable in watchdog_*_all_cpus() takes
636          * care of this. If those threads are already active, the sample
637          * period will be updated and the lockup detectors will be enabled
638          * or disabled 'on the fly'.
639          */
640         if (watchdog_enabled && watchdog_thresh)
641                 err = watchdog_enable_all_cpus();
642         else
643                 watchdog_disable_all_cpus();
644
645         watchdog_nmi_reconfigure();
646
647         __lockup_detector_cleanup();
648
649         return err;
650
651 }
652
653 /*
654  * common function for watchdog, nmi_watchdog and soft_watchdog parameter
655  *
656  * caller             | table->data points to | 'which' contains the flag(s)
657  * -------------------|-----------------------|-----------------------------
658  * proc_watchdog      | watchdog_user_enabled | NMI_WATCHDOG_ENABLED or'ed
659  *                    |                       | with SOFT_WATCHDOG_ENABLED
660  * -------------------|-----------------------|-----------------------------
661  * proc_nmi_watchdog  | nmi_watchdog_enabled  | NMI_WATCHDOG_ENABLED
662  * -------------------|-----------------------|-----------------------------
663  * proc_soft_watchdog | soft_watchdog_enabled | SOFT_WATCHDOG_ENABLED
664  */
665 static int proc_watchdog_common(int which, struct ctl_table *table, int write,
666                                 void __user *buffer, size_t *lenp, loff_t *ppos)
667 {
668         int err, old, new;
669         int *watchdog_param = (int *)table->data;
670
671         cpu_hotplug_disable();
672         mutex_lock(&watchdog_mutex);
673
674         /*
675          * If the parameter is being read return the state of the corresponding
676          * bit(s) in 'watchdog_enabled', else update 'watchdog_enabled' and the
677          * run state of the lockup detectors.
678          */
679         if (!write) {
680                 *watchdog_param = (watchdog_enabled & which) != 0;
681                 err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
682         } else {
683                 err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
684                 if (err)
685                         goto out;
686
687                 /*
688                  * There is a race window between fetching the current value
689                  * from 'watchdog_enabled' and storing the new value. During
690                  * this race window, watchdog_nmi_enable() can sneak in and
691                  * clear the NMI_WATCHDOG_ENABLED bit in 'watchdog_enabled'.
692                  * The 'cmpxchg' detects this race and the loop retries.
693                  */
694                 do {
695                         old = watchdog_enabled;
696                         /*
697                          * If the parameter value is not zero set the
698                          * corresponding bit(s), else clear it(them).
699                          */
700                         if (*watchdog_param)
701                                 new = old | which;
702                         else
703                                 new = old & ~which;
704                 } while (cmpxchg(&watchdog_enabled, old, new) != old);
705
706                 /*
707                  * Update the run state of the lockup detectors. There is _no_
708                  * need to check the value returned by proc_watchdog_update()
709                  * and to restore the previous value of 'watchdog_enabled' as
710                  * both lockup detectors are disabled if proc_watchdog_update()
711                  * returns an error.
712                  */
713                 if (old == new)
714                         goto out;
715
716                 err = proc_watchdog_update();
717         }
718 out:
719         mutex_unlock(&watchdog_mutex);
720         cpu_hotplug_enable();
721         return err;
722 }
723
724 /*
725  * /proc/sys/kernel/watchdog
726  */
727 int proc_watchdog(struct ctl_table *table, int write,
728                   void __user *buffer, size_t *lenp, loff_t *ppos)
729 {
730         return proc_watchdog_common(NMI_WATCHDOG_ENABLED|SOFT_WATCHDOG_ENABLED,
731                                     table, write, buffer, lenp, ppos);
732 }
733
734 /*
735  * /proc/sys/kernel/nmi_watchdog
736  */
737 int proc_nmi_watchdog(struct ctl_table *table, int write,
738                       void __user *buffer, size_t *lenp, loff_t *ppos)
739 {
740         return proc_watchdog_common(NMI_WATCHDOG_ENABLED,
741                                     table, write, buffer, lenp, ppos);
742 }
743
744 /*
745  * /proc/sys/kernel/soft_watchdog
746  */
747 int proc_soft_watchdog(struct ctl_table *table, int write,
748                         void __user *buffer, size_t *lenp, loff_t *ppos)
749 {
750         return proc_watchdog_common(SOFT_WATCHDOG_ENABLED,
751                                     table, write, buffer, lenp, ppos);
752 }
753
754 /*
755  * /proc/sys/kernel/watchdog_thresh
756  */
757 int proc_watchdog_thresh(struct ctl_table *table, int write,
758                          void __user *buffer, size_t *lenp, loff_t *ppos)
759 {
760         int err, old, new;
761
762         cpu_hotplug_disable();
763         mutex_lock(&watchdog_mutex);
764
765         old = ACCESS_ONCE(watchdog_thresh);
766         err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
767
768         if (err || !write)
769                 goto out;
770
771         /*
772          * Update the sample period. Restore on failure.
773          */
774         new = ACCESS_ONCE(watchdog_thresh);
775         if (old == new)
776                 goto out;
777
778         set_sample_period();
779         err = proc_watchdog_update();
780         if (err) {
781                 watchdog_thresh = old;
782                 set_sample_period();
783         }
784 out:
785         mutex_unlock(&watchdog_mutex);
786         cpu_hotplug_enable();
787         return err;
788 }
789
790 static int watchdog_update_cpus(void)
791 {
792         if (IS_ENABLED(CONFIG_SOFTLOCKUP_DETECTOR)) {
793                 return smpboot_update_cpumask_percpu_thread(&watchdog_threads,
794                                                             &watchdog_cpumask);
795                 __lockup_detector_cleanup();
796         }
797         return 0;
798 }
799
800 static void proc_watchdog_cpumask_update(void)
801 {
802         /* Remove impossible cpus to keep sysctl output clean. */
803         cpumask_and(&watchdog_cpumask, &watchdog_cpumask, cpu_possible_mask);
804
805         if (watchdog_running) {
806                 /*
807                  * Failure would be due to being unable to allocate a
808                  * temporary cpumask, so we are likely not in a position to
809                  * do much else to make things better.
810                  */
811                 if (watchdog_update_cpus() != 0)
812                         pr_err("cpumask update failed\n");
813         }
814
815         watchdog_nmi_reconfigure();
816 }
817
818 /*
819  * The cpumask is the mask of possible cpus that the watchdog can run
820  * on, not the mask of cpus it is actually running on.  This allows the
821  * user to specify a mask that will include cpus that have not yet
822  * been brought online, if desired.
823  */
824 int proc_watchdog_cpumask(struct ctl_table *table, int write,
825                           void __user *buffer, size_t *lenp, loff_t *ppos)
826 {
827         int err;
828
829         cpu_hotplug_disable();
830         mutex_lock(&watchdog_mutex);
831
832         err = proc_do_large_bitmap(table, write, buffer, lenp, ppos);
833         if (!err && write)
834                 proc_watchdog_cpumask_update();
835
836         mutex_unlock(&watchdog_mutex);
837         cpu_hotplug_enable();
838         return err;
839 }
840 #endif /* CONFIG_SYSCTL */
841
842 void __init lockup_detector_init(void)
843 {
844         set_sample_period();
845
846 #ifdef CONFIG_NO_HZ_FULL
847         if (tick_nohz_full_enabled()) {
848                 pr_info("Disabling watchdog on nohz_full cores by default\n");
849                 cpumask_copy(&watchdog_cpumask, housekeeping_mask);
850         } else
851                 cpumask_copy(&watchdog_cpumask, cpu_possible_mask);
852 #else
853         cpumask_copy(&watchdog_cpumask, cpu_possible_mask);
854 #endif
855
856         if (watchdog_enabled)
857                 watchdog_enable_all_cpus();
858 }
This page took 0.109024 seconds and 4 git commands to generate.