Loading...
Note: File does not exist in v4.6.
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Watchdog support on powerpc systems.
4 *
5 * Copyright 2017, IBM Corporation.
6 *
7 * This uses code from arch/sparc/kernel/nmi.c and kernel/watchdog.c
8 */
9
10#define pr_fmt(fmt) "watchdog: " fmt
11
12#include <linux/kernel.h>
13#include <linux/param.h>
14#include <linux/init.h>
15#include <linux/percpu.h>
16#include <linux/cpu.h>
17#include <linux/nmi.h>
18#include <linux/module.h>
19#include <linux/export.h>
20#include <linux/kprobes.h>
21#include <linux/hardirq.h>
22#include <linux/reboot.h>
23#include <linux/slab.h>
24#include <linux/kdebug.h>
25#include <linux/sched/debug.h>
26#include <linux/delay.h>
27#include <linux/processor.h>
28#include <linux/smp.h>
29
30#include <asm/interrupt.h>
31#include <asm/paca.h>
32#include <asm/nmi.h>
33
34/*
35 * The powerpc watchdog ensures that each CPU is able to service timers.
36 * The watchdog sets up a simple timer on each CPU to run once per timer
37 * period, and updates a per-cpu timestamp and a "pending" cpumask. This is
38 * the heartbeat.
39 *
40 * Then there are two systems to check that the heartbeat is still running.
41 * The local soft-NMI, and the SMP checker.
42 *
43 * The soft-NMI checker can detect lockups on the local CPU. When interrupts
44 * are disabled with local_irq_disable(), platforms that use soft-masking
45 * can leave hardware interrupts enabled and handle them with a masked
46 * interrupt handler. The masked handler can send the timer interrupt to the
47 * watchdog's soft_nmi_interrupt(), which appears to Linux as an NMI
48 * interrupt, and can be used to detect CPUs stuck with IRQs disabled.
49 *
50 * The soft-NMI checker will compare the heartbeat timestamp for this CPU
51 * with the current time, and take action if the difference exceeds the
52 * watchdog threshold.
53 *
54 * The limitation of the soft-NMI watchdog is that it does not work when
55 * interrupts are hard disabled or otherwise not being serviced. This is
56 * solved by also having a SMP watchdog where all CPUs check all other
57 * CPUs heartbeat.
58 *
59 * The SMP checker can detect lockups on other CPUs. A gobal "pending"
60 * cpumask is kept, containing all CPUs which enable the watchdog. Each
61 * CPU clears their pending bit in their heartbeat timer. When the bitmask
62 * becomes empty, the last CPU to clear its pending bit updates a global
63 * timestamp and refills the pending bitmask.
64 *
65 * In the heartbeat timer, if any CPU notices that the global timestamp has
66 * not been updated for a period exceeding the watchdog threshold, then it
67 * means the CPU(s) with their bit still set in the pending mask have had
68 * their heartbeat stop, and action is taken.
69 *
70 * Some platforms implement true NMI IPIs, which can be used by the SMP
71 * watchdog to detect an unresponsive CPU and pull it out of its stuck
72 * state with the NMI IPI, to get crash/debug data from it. This way the
73 * SMP watchdog can detect hardware interrupts off lockups.
74 */
75
76static cpumask_t wd_cpus_enabled __read_mostly;
77
78static u64 wd_panic_timeout_tb __read_mostly; /* timebase ticks until panic */
79static u64 wd_smp_panic_timeout_tb __read_mostly; /* panic other CPUs */
80
81static u64 wd_timer_period_ms __read_mostly; /* interval between heartbeat */
82
83static DEFINE_PER_CPU(struct hrtimer, wd_hrtimer);
84static DEFINE_PER_CPU(u64, wd_timer_tb);
85
86/* SMP checker bits */
87static unsigned long __wd_smp_lock;
88static cpumask_t wd_smp_cpus_pending;
89static cpumask_t wd_smp_cpus_stuck;
90static u64 wd_smp_last_reset_tb;
91
92static inline void wd_smp_lock(unsigned long *flags)
93{
94 /*
95 * Avoid locking layers if possible.
96 * This may be called from low level interrupt handlers at some
97 * point in future.
98 */
99 raw_local_irq_save(*flags);
100 hard_irq_disable(); /* Make it soft-NMI safe */
101 while (unlikely(test_and_set_bit_lock(0, &__wd_smp_lock))) {
102 raw_local_irq_restore(*flags);
103 spin_until_cond(!test_bit(0, &__wd_smp_lock));
104 raw_local_irq_save(*flags);
105 hard_irq_disable();
106 }
107}
108
109static inline void wd_smp_unlock(unsigned long *flags)
110{
111 clear_bit_unlock(0, &__wd_smp_lock);
112 raw_local_irq_restore(*flags);
113}
114
115static void wd_lockup_ipi(struct pt_regs *regs)
116{
117 int cpu = raw_smp_processor_id();
118 u64 tb = get_tb();
119
120 pr_emerg("CPU %d Hard LOCKUP\n", cpu);
121 pr_emerg("CPU %d TB:%lld, last heartbeat TB:%lld (%lldms ago)\n",
122 cpu, tb, per_cpu(wd_timer_tb, cpu),
123 tb_to_ns(tb - per_cpu(wd_timer_tb, cpu)) / 1000000);
124 print_modules();
125 print_irqtrace_events(current);
126 if (regs)
127 show_regs(regs);
128 else
129 dump_stack();
130
131 /* Do not panic from here because that can recurse into NMI IPI layer */
132}
133
134static void set_cpumask_stuck(const struct cpumask *cpumask, u64 tb)
135{
136 cpumask_or(&wd_smp_cpus_stuck, &wd_smp_cpus_stuck, cpumask);
137 cpumask_andnot(&wd_smp_cpus_pending, &wd_smp_cpus_pending, cpumask);
138 if (cpumask_empty(&wd_smp_cpus_pending)) {
139 wd_smp_last_reset_tb = tb;
140 cpumask_andnot(&wd_smp_cpus_pending,
141 &wd_cpus_enabled,
142 &wd_smp_cpus_stuck);
143 }
144}
145static void set_cpu_stuck(int cpu, u64 tb)
146{
147 set_cpumask_stuck(cpumask_of(cpu), tb);
148}
149
150static void watchdog_smp_panic(int cpu, u64 tb)
151{
152 unsigned long flags;
153 int c;
154
155 wd_smp_lock(&flags);
156 /* Double check some things under lock */
157 if ((s64)(tb - wd_smp_last_reset_tb) < (s64)wd_smp_panic_timeout_tb)
158 goto out;
159 if (cpumask_test_cpu(cpu, &wd_smp_cpus_pending))
160 goto out;
161 if (cpumask_weight(&wd_smp_cpus_pending) == 0)
162 goto out;
163
164 pr_emerg("CPU %d detected hard LOCKUP on other CPUs %*pbl\n",
165 cpu, cpumask_pr_args(&wd_smp_cpus_pending));
166 pr_emerg("CPU %d TB:%lld, last SMP heartbeat TB:%lld (%lldms ago)\n",
167 cpu, tb, wd_smp_last_reset_tb,
168 tb_to_ns(tb - wd_smp_last_reset_tb) / 1000000);
169
170 if (!sysctl_hardlockup_all_cpu_backtrace) {
171 /*
172 * Try to trigger the stuck CPUs, unless we are going to
173 * get a backtrace on all of them anyway.
174 */
175 for_each_cpu(c, &wd_smp_cpus_pending) {
176 if (c == cpu)
177 continue;
178 smp_send_nmi_ipi(c, wd_lockup_ipi, 1000000);
179 }
180 }
181
182 /* Take the stuck CPUs out of the watch group */
183 set_cpumask_stuck(&wd_smp_cpus_pending, tb);
184
185 wd_smp_unlock(&flags);
186
187 printk_safe_flush();
188 /*
189 * printk_safe_flush() seems to require another print
190 * before anything actually goes out to console.
191 */
192 if (sysctl_hardlockup_all_cpu_backtrace)
193 trigger_allbutself_cpu_backtrace();
194
195 if (hardlockup_panic)
196 nmi_panic(NULL, "Hard LOCKUP");
197
198 return;
199
200out:
201 wd_smp_unlock(&flags);
202}
203
204static void wd_smp_clear_cpu_pending(int cpu, u64 tb)
205{
206 if (!cpumask_test_cpu(cpu, &wd_smp_cpus_pending)) {
207 if (unlikely(cpumask_test_cpu(cpu, &wd_smp_cpus_stuck))) {
208 struct pt_regs *regs = get_irq_regs();
209 unsigned long flags;
210
211 wd_smp_lock(&flags);
212
213 pr_emerg("CPU %d became unstuck TB:%lld\n",
214 cpu, tb);
215 print_irqtrace_events(current);
216 if (regs)
217 show_regs(regs);
218 else
219 dump_stack();
220
221 cpumask_clear_cpu(cpu, &wd_smp_cpus_stuck);
222 wd_smp_unlock(&flags);
223 }
224 return;
225 }
226 cpumask_clear_cpu(cpu, &wd_smp_cpus_pending);
227 if (cpumask_empty(&wd_smp_cpus_pending)) {
228 unsigned long flags;
229
230 wd_smp_lock(&flags);
231 if (cpumask_empty(&wd_smp_cpus_pending)) {
232 wd_smp_last_reset_tb = tb;
233 cpumask_andnot(&wd_smp_cpus_pending,
234 &wd_cpus_enabled,
235 &wd_smp_cpus_stuck);
236 }
237 wd_smp_unlock(&flags);
238 }
239}
240
241static void watchdog_timer_interrupt(int cpu)
242{
243 u64 tb = get_tb();
244
245 per_cpu(wd_timer_tb, cpu) = tb;
246
247 wd_smp_clear_cpu_pending(cpu, tb);
248
249 if ((s64)(tb - wd_smp_last_reset_tb) >= (s64)wd_smp_panic_timeout_tb)
250 watchdog_smp_panic(cpu, tb);
251}
252
253DEFINE_INTERRUPT_HANDLER_NMI(soft_nmi_interrupt)
254{
255 unsigned long flags;
256 int cpu = raw_smp_processor_id();
257 u64 tb;
258
259 /* should only arrive from kernel, with irqs disabled */
260 WARN_ON_ONCE(!arch_irq_disabled_regs(regs));
261
262 if (!cpumask_test_cpu(cpu, &wd_cpus_enabled))
263 return 0;
264
265 __this_cpu_inc(irq_stat.soft_nmi_irqs);
266
267 tb = get_tb();
268 if (tb - per_cpu(wd_timer_tb, cpu) >= wd_panic_timeout_tb) {
269 wd_smp_lock(&flags);
270 if (cpumask_test_cpu(cpu, &wd_smp_cpus_stuck)) {
271 wd_smp_unlock(&flags);
272 return 0;
273 }
274 set_cpu_stuck(cpu, tb);
275
276 pr_emerg("CPU %d self-detected hard LOCKUP @ %pS\n",
277 cpu, (void *)regs->nip);
278 pr_emerg("CPU %d TB:%lld, last heartbeat TB:%lld (%lldms ago)\n",
279 cpu, tb, per_cpu(wd_timer_tb, cpu),
280 tb_to_ns(tb - per_cpu(wd_timer_tb, cpu)) / 1000000);
281 print_modules();
282 print_irqtrace_events(current);
283 show_regs(regs);
284
285 wd_smp_unlock(&flags);
286
287 if (sysctl_hardlockup_all_cpu_backtrace)
288 trigger_allbutself_cpu_backtrace();
289
290 if (hardlockup_panic)
291 nmi_panic(regs, "Hard LOCKUP");
292 }
293 if (wd_panic_timeout_tb < 0x7fffffff)
294 mtspr(SPRN_DEC, wd_panic_timeout_tb);
295
296 return 0;
297}
298
299static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
300{
301 int cpu = smp_processor_id();
302
303 if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
304 return HRTIMER_NORESTART;
305
306 if (!cpumask_test_cpu(cpu, &watchdog_cpumask))
307 return HRTIMER_NORESTART;
308
309 watchdog_timer_interrupt(cpu);
310
311 hrtimer_forward_now(hrtimer, ms_to_ktime(wd_timer_period_ms));
312
313 return HRTIMER_RESTART;
314}
315
316void arch_touch_nmi_watchdog(void)
317{
318 unsigned long ticks = tb_ticks_per_usec * wd_timer_period_ms * 1000;
319 int cpu = smp_processor_id();
320 u64 tb = get_tb();
321
322 if (tb - per_cpu(wd_timer_tb, cpu) >= ticks) {
323 per_cpu(wd_timer_tb, cpu) = tb;
324 wd_smp_clear_cpu_pending(cpu, tb);
325 }
326}
327EXPORT_SYMBOL(arch_touch_nmi_watchdog);
328
329static void start_watchdog(void *arg)
330{
331 struct hrtimer *hrtimer = this_cpu_ptr(&wd_hrtimer);
332 int cpu = smp_processor_id();
333 unsigned long flags;
334
335 if (cpumask_test_cpu(cpu, &wd_cpus_enabled)) {
336 WARN_ON(1);
337 return;
338 }
339
340 if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
341 return;
342
343 if (!cpumask_test_cpu(cpu, &watchdog_cpumask))
344 return;
345
346 wd_smp_lock(&flags);
347 cpumask_set_cpu(cpu, &wd_cpus_enabled);
348 if (cpumask_weight(&wd_cpus_enabled) == 1) {
349 cpumask_set_cpu(cpu, &wd_smp_cpus_pending);
350 wd_smp_last_reset_tb = get_tb();
351 }
352 wd_smp_unlock(&flags);
353
354 *this_cpu_ptr(&wd_timer_tb) = get_tb();
355
356 hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
357 hrtimer->function = watchdog_timer_fn;
358 hrtimer_start(hrtimer, ms_to_ktime(wd_timer_period_ms),
359 HRTIMER_MODE_REL_PINNED);
360}
361
362static int start_watchdog_on_cpu(unsigned int cpu)
363{
364 return smp_call_function_single(cpu, start_watchdog, NULL, true);
365}
366
367static void stop_watchdog(void *arg)
368{
369 struct hrtimer *hrtimer = this_cpu_ptr(&wd_hrtimer);
370 int cpu = smp_processor_id();
371 unsigned long flags;
372
373 if (!cpumask_test_cpu(cpu, &wd_cpus_enabled))
374 return; /* Can happen in CPU unplug case */
375
376 hrtimer_cancel(hrtimer);
377
378 wd_smp_lock(&flags);
379 cpumask_clear_cpu(cpu, &wd_cpus_enabled);
380 wd_smp_unlock(&flags);
381
382 wd_smp_clear_cpu_pending(cpu, get_tb());
383}
384
385static int stop_watchdog_on_cpu(unsigned int cpu)
386{
387 return smp_call_function_single(cpu, stop_watchdog, NULL, true);
388}
389
390static void watchdog_calc_timeouts(void)
391{
392 wd_panic_timeout_tb = watchdog_thresh * ppc_tb_freq;
393
394 /* Have the SMP detector trigger a bit later */
395 wd_smp_panic_timeout_tb = wd_panic_timeout_tb * 3 / 2;
396
397 /* 2/5 is the factor that the perf based detector uses */
398 wd_timer_period_ms = watchdog_thresh * 1000 * 2 / 5;
399}
400
401void watchdog_nmi_stop(void)
402{
403 int cpu;
404
405 for_each_cpu(cpu, &wd_cpus_enabled)
406 stop_watchdog_on_cpu(cpu);
407}
408
409void watchdog_nmi_start(void)
410{
411 int cpu;
412
413 watchdog_calc_timeouts();
414 for_each_cpu_and(cpu, cpu_online_mask, &watchdog_cpumask)
415 start_watchdog_on_cpu(cpu);
416}
417
418/*
419 * Invoked from core watchdog init.
420 */
421int __init watchdog_nmi_probe(void)
422{
423 int err;
424
425 err = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
426 "powerpc/watchdog:online",
427 start_watchdog_on_cpu,
428 stop_watchdog_on_cpu);
429 if (err < 0) {
430 pr_warn("could not be initialized");
431 return err;
432 }
433 return 0;
434}