Linux Audio

Check our new training course

Loading...
Note: File does not exist in v4.17.
  1// SPDX-License-Identifier: GPL-2.0
  2/*
  3 * Detect hard lockups on a system using perf
  4 *
  5 * started by Don Zickus, Copyright (C) 2010 Red Hat, Inc.
  6 *
  7 * Note: Most of this code is borrowed heavily from the original softlockup
  8 * detector, so thanks to Ingo for the initial implementation.
  9 * Some chunks also taken from the old x86-specific nmi watchdog code, thanks
 10 * to those contributors as well.
 11 */
 12
 13#define pr_fmt(fmt) "NMI watchdog: " fmt
 14
 15#include <linux/nmi.h>
 16#include <linux/atomic.h>
 17#include <linux/module.h>
 18#include <linux/sched/debug.h>
 19
 20#include <asm/irq_regs.h>
 21#include <linux/perf_event.h>
 22
 23static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
 24static DEFINE_PER_CPU(struct perf_event *, dead_event);
 25static struct cpumask dead_events_mask;
 26
 27static atomic_t watchdog_cpus = ATOMIC_INIT(0);
 28
 29#ifdef CONFIG_HARDLOCKUP_CHECK_TIMESTAMP
 30static DEFINE_PER_CPU(ktime_t, last_timestamp);
 31static DEFINE_PER_CPU(unsigned int, nmi_rearmed);
 32static ktime_t watchdog_hrtimer_sample_threshold __read_mostly;
 33
 34void watchdog_update_hrtimer_threshold(u64 period)
 35{
 36	/*
 37	 * The hrtimer runs with a period of (watchdog_threshold * 2) / 5
 38	 *
 39	 * So it runs effectively with 2.5 times the rate of the NMI
 40	 * watchdog. That means the hrtimer should fire 2-3 times before
 41	 * the NMI watchdog expires. The NMI watchdog on x86 is based on
 42	 * unhalted CPU cycles, so if Turbo-Mode is enabled the CPU cycles
 43	 * might run way faster than expected and the NMI fires in a
 44	 * smaller period than the one deduced from the nominal CPU
 45	 * frequency. Depending on the Turbo-Mode factor this might be fast
 46	 * enough to get the NMI period smaller than the hrtimer watchdog
 47	 * period and trigger false positives.
 48	 *
 49	 * The sample threshold is used to check in the NMI handler whether
 50	 * the minimum time between two NMI samples has elapsed. That
 51	 * prevents false positives.
 52	 *
 53	 * Set this to 4/5 of the actual watchdog threshold period so the
 54	 * hrtimer is guaranteed to fire at least once within the real
 55	 * watchdog threshold.
 56	 */
 57	watchdog_hrtimer_sample_threshold = period * 2;
 58}
 59
 60static bool watchdog_check_timestamp(void)
 61{
 62	ktime_t delta, now = ktime_get_mono_fast_ns();
 63
 64	delta = now - __this_cpu_read(last_timestamp);
 65	if (delta < watchdog_hrtimer_sample_threshold) {
 66		/*
 67		 * If ktime is jiffies based, a stalled timer would prevent
 68		 * jiffies from being incremented and the filter would look
 69		 * at a stale timestamp and never trigger.
 70		 */
 71		if (__this_cpu_inc_return(nmi_rearmed) < 10)
 72			return false;
 73	}
 74	__this_cpu_write(nmi_rearmed, 0);
 75	__this_cpu_write(last_timestamp, now);
 76	return true;
 77}
 78
 79static void watchdog_init_timestamp(void)
 80{
 81	__this_cpu_write(nmi_rearmed, 0);
 82	__this_cpu_write(last_timestamp, ktime_get_mono_fast_ns());
 83}
 84#else
 85static inline bool watchdog_check_timestamp(void) { return true; }
 86static inline void watchdog_init_timestamp(void) { }
 87#endif
 88
 89static struct perf_event_attr wd_hw_attr = {
 90	.type		= PERF_TYPE_HARDWARE,
 91	.config		= PERF_COUNT_HW_CPU_CYCLES,
 92	.size		= sizeof(struct perf_event_attr),
 93	.pinned		= 1,
 94	.disabled	= 1,
 95};
 96
 97static struct perf_event_attr fallback_wd_hw_attr = {
 98	.type		= PERF_TYPE_HARDWARE,
 99	.config		= PERF_COUNT_HW_CPU_CYCLES,
100	.size		= sizeof(struct perf_event_attr),
101	.pinned		= 1,
102	.disabled	= 1,
103};
104
105/* Callback function for perf event subsystem */
106static void watchdog_overflow_callback(struct perf_event *event,
107				       struct perf_sample_data *data,
108				       struct pt_regs *regs)
109{
110	/* Ensure the watchdog never gets throttled */
111	event->hw.interrupts = 0;
112
113	if (!watchdog_check_timestamp())
114		return;
115
116	watchdog_hardlockup_check(smp_processor_id(), regs);
117}
118
119static int hardlockup_detector_event_create(void)
120{
121	unsigned int cpu;
122	struct perf_event_attr *wd_attr;
123	struct perf_event *evt;
124
125	/*
126	 * Preemption is not disabled because memory will be allocated.
127	 * Ensure CPU-locality by calling this in per-CPU kthread.
128	 */
129	WARN_ON(!is_percpu_thread());
130	cpu = raw_smp_processor_id();
131	wd_attr = &wd_hw_attr;
132	wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh);
133
134	/* Try to register using hardware perf events */
135	evt = perf_event_create_kernel_counter(wd_attr, cpu, NULL,
136					       watchdog_overflow_callback, NULL);
137	if (IS_ERR(evt)) {
138		wd_attr = &fallback_wd_hw_attr;
139		wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh);
140		evt = perf_event_create_kernel_counter(wd_attr, cpu, NULL,
141						       watchdog_overflow_callback, NULL);
142	}
143
144	if (IS_ERR(evt)) {
145		pr_debug("Perf event create on CPU %d failed with %ld\n", cpu,
146			 PTR_ERR(evt));
147		return PTR_ERR(evt);
148	}
149	this_cpu_write(watchdog_ev, evt);
150	return 0;
151}
152
153/**
154 * watchdog_hardlockup_enable - Enable the local event
155 * @cpu: The CPU to enable hard lockup on.
156 */
157void watchdog_hardlockup_enable(unsigned int cpu)
158{
159	WARN_ON_ONCE(cpu != smp_processor_id());
160
161	if (hardlockup_detector_event_create())
162		return;
163
164	/* use original value for check */
165	if (!atomic_fetch_inc(&watchdog_cpus))
166		pr_info("Enabled. Permanently consumes one hw-PMU counter.\n");
167
168	watchdog_init_timestamp();
169	perf_event_enable(this_cpu_read(watchdog_ev));
170}
171
172/**
173 * watchdog_hardlockup_disable - Disable the local event
174 * @cpu: The CPU to enable hard lockup on.
175 */
176void watchdog_hardlockup_disable(unsigned int cpu)
177{
178	struct perf_event *event = this_cpu_read(watchdog_ev);
179
180	WARN_ON_ONCE(cpu != smp_processor_id());
181
182	if (event) {
183		perf_event_disable(event);
184		this_cpu_write(watchdog_ev, NULL);
185		this_cpu_write(dead_event, event);
186		cpumask_set_cpu(smp_processor_id(), &dead_events_mask);
187		atomic_dec(&watchdog_cpus);
188	}
189}
190
191/**
192 * hardlockup_detector_perf_cleanup - Cleanup disabled events and destroy them
193 *
194 * Called from lockup_detector_cleanup(). Serialized by the caller.
195 */
196void hardlockup_detector_perf_cleanup(void)
197{
198	int cpu;
199
200	for_each_cpu(cpu, &dead_events_mask) {
201		struct perf_event *event = per_cpu(dead_event, cpu);
202
203		/*
204		 * Required because for_each_cpu() reports  unconditionally
205		 * CPU0 as set on UP kernels. Sigh.
206		 */
207		if (event)
208			perf_event_release_kernel(event);
209		per_cpu(dead_event, cpu) = NULL;
210	}
211	cpumask_clear(&dead_events_mask);
212}
213
214/**
215 * hardlockup_detector_perf_stop - Globally stop watchdog events
216 *
217 * Special interface for x86 to handle the perf HT bug.
218 */
219void __init hardlockup_detector_perf_stop(void)
220{
221	int cpu;
222
223	lockdep_assert_cpus_held();
224
225	for_each_online_cpu(cpu) {
226		struct perf_event *event = per_cpu(watchdog_ev, cpu);
227
228		if (event)
229			perf_event_disable(event);
230	}
231}
232
233/**
234 * hardlockup_detector_perf_restart - Globally restart watchdog events
235 *
236 * Special interface for x86 to handle the perf HT bug.
237 */
238void __init hardlockup_detector_perf_restart(void)
239{
240	int cpu;
241
242	lockdep_assert_cpus_held();
243
244	if (!(watchdog_enabled & WATCHDOG_HARDLOCKUP_ENABLED))
245		return;
246
247	for_each_online_cpu(cpu) {
248		struct perf_event *event = per_cpu(watchdog_ev, cpu);
249
250		if (event)
251			perf_event_enable(event);
252	}
253}
254
255bool __weak __init arch_perf_nmi_is_available(void)
256{
257	return true;
258}
259
260/**
261 * watchdog_hardlockup_probe - Probe whether NMI event is available at all
262 */
263int __init watchdog_hardlockup_probe(void)
264{
265	int ret;
266
267	if (!arch_perf_nmi_is_available())
268		return -ENODEV;
269
270	ret = hardlockup_detector_event_create();
271
272	if (ret) {
273		pr_info("Perf NMI watchdog permanently disabled\n");
274	} else {
275		perf_event_release_kernel(this_cpu_read(watchdog_ev));
276		this_cpu_write(watchdog_ev, NULL);
277	}
278	return ret;
279}
280
281/**
282 * hardlockup_config_perf_event - Overwrite config of wd_hw_attr.
283 * @str: number which identifies the raw perf event to use
284 */
285void __init hardlockup_config_perf_event(const char *str)
286{
287	u64 config;
288	char buf[24];
289	char *comma = strchr(str, ',');
290
291	if (!comma) {
292		if (kstrtoull(str, 16, &config))
293			return;
294	} else {
295		unsigned int len = comma - str;
296
297		if (len >= sizeof(buf))
298			return;
299
300		if (strscpy(buf, str, sizeof(buf)) < 0)
301			return;
302		buf[len] = 0;
303		if (kstrtoull(buf, 16, &config))
304			return;
305	}
306
307	wd_hw_attr.type = PERF_TYPE_RAW;
308	wd_hw_attr.config = config;
309}