Linux Audio

Check our new training course

Loading...
Note: File does not exist in v4.6.
  1// SPDX-License-Identifier: GPL-2.0
  2
  3#include <linux/debug_locks.h>
  4#include <linux/delay.h>
  5#include <linux/jiffies.h>
  6#include <linux/kernel.h>
  7#include <linux/lockdep.h>
  8#include <linux/preempt.h>
  9#include <linux/printk.h>
 10#include <linux/sched.h>
 11#include <linux/spinlock.h>
 12#include <linux/stacktrace.h>
 13
 14#include "kcsan.h"
 15#include "encoding.h"
 16
 17/*
 18 * Max. number of stack entries to show in the report.
 19 */
 20#define NUM_STACK_ENTRIES 64
 21
 22/* Common access info. */
 23struct access_info {
 24	const volatile void	*ptr;
 25	size_t			size;
 26	int			access_type;
 27	int			task_pid;
 28	int			cpu_id;
 29};
 30
 31/*
 32 * Other thread info: communicated from other racing thread to thread that set
 33 * up the watchpoint, which then prints the complete report atomically.
 34 */
 35struct other_info {
 36	struct access_info	ai;
 37	unsigned long		stack_entries[NUM_STACK_ENTRIES];
 38	int			num_stack_entries;
 39
 40	/*
 41	 * Optionally pass @current. Typically we do not need to pass @current
 42	 * via @other_info since just @task_pid is sufficient. Passing @current
 43	 * has additional overhead.
 44	 *
 45	 * To safely pass @current, we must either use get_task_struct/
 46	 * put_task_struct, or stall the thread that populated @other_info.
 47	 *
 48	 * We cannot rely on get_task_struct/put_task_struct in case
 49	 * release_report() races with a task being released, and would have to
 50	 * free it in release_report(). This may result in deadlock if we want
 51	 * to use KCSAN on the allocators.
 52	 *
 53	 * Since we also want to reliably print held locks for
 54	 * CONFIG_KCSAN_VERBOSE, the current implementation stalls the thread
 55	 * that populated @other_info until it has been consumed.
 56	 */
 57	struct task_struct	*task;
 58};
 59
 60/*
 61 * To never block any producers of struct other_info, we need as many elements
 62 * as we have watchpoints (upper bound on concurrent races to report).
 63 */
 64static struct other_info other_infos[CONFIG_KCSAN_NUM_WATCHPOINTS + NUM_SLOTS-1];
 65
 66/*
 67 * Information about reported races; used to rate limit reporting.
 68 */
 69struct report_time {
 70	/*
 71	 * The last time the race was reported.
 72	 */
 73	unsigned long time;
 74
 75	/*
 76	 * The frames of the 2 threads; if only 1 thread is known, one frame
 77	 * will be 0.
 78	 */
 79	unsigned long frame1;
 80	unsigned long frame2;
 81};
 82
 83/*
 84 * Since we also want to be able to debug allocators with KCSAN, to avoid
 85 * deadlock, report_times cannot be dynamically resized with krealloc in
 86 * rate_limit_report.
 87 *
 88 * Therefore, we use a fixed-size array, which at most will occupy a page. This
 89 * still adequately rate limits reports, assuming that a) number of unique data
 90 * races is not excessive, and b) occurrence of unique races within the
 91 * same time window is limited.
 92 */
 93#define REPORT_TIMES_MAX (PAGE_SIZE / sizeof(struct report_time))
 94#define REPORT_TIMES_SIZE                                                      \
 95	(CONFIG_KCSAN_REPORT_ONCE_IN_MS > REPORT_TIMES_MAX ?                   \
 96		 REPORT_TIMES_MAX :                                            \
 97		 CONFIG_KCSAN_REPORT_ONCE_IN_MS)
 98static struct report_time report_times[REPORT_TIMES_SIZE];
 99
100/*
101 * Spinlock serializing report generation, and access to @other_infos. Although
102 * it could make sense to have a finer-grained locking story for @other_infos,
103 * report generation needs to be serialized either way, so not much is gained.
104 */
105static DEFINE_RAW_SPINLOCK(report_lock);
106
107/*
108 * Checks if the race identified by thread frames frame1 and frame2 has
109 * been reported since (now - KCSAN_REPORT_ONCE_IN_MS).
110 */
111static bool rate_limit_report(unsigned long frame1, unsigned long frame2)
112{
113	struct report_time *use_entry = &report_times[0];
114	unsigned long invalid_before;
115	int i;
116
117	BUILD_BUG_ON(CONFIG_KCSAN_REPORT_ONCE_IN_MS != 0 && REPORT_TIMES_SIZE == 0);
118
119	if (CONFIG_KCSAN_REPORT_ONCE_IN_MS == 0)
120		return false;
121
122	invalid_before = jiffies - msecs_to_jiffies(CONFIG_KCSAN_REPORT_ONCE_IN_MS);
123
124	/* Check if a matching race report exists. */
125	for (i = 0; i < REPORT_TIMES_SIZE; ++i) {
126		struct report_time *rt = &report_times[i];
127
128		/*
129		 * Must always select an entry for use to store info as we
130		 * cannot resize report_times; at the end of the scan, use_entry
131		 * will be the oldest entry, which ideally also happened before
132		 * KCSAN_REPORT_ONCE_IN_MS ago.
133		 */
134		if (time_before(rt->time, use_entry->time))
135			use_entry = rt;
136
137		/*
138		 * Initially, no need to check any further as this entry as well
139		 * as following entries have never been used.
140		 */
141		if (rt->time == 0)
142			break;
143
144		/* Check if entry expired. */
145		if (time_before(rt->time, invalid_before))
146			continue; /* before KCSAN_REPORT_ONCE_IN_MS ago */
147
148		/* Reported recently, check if race matches. */
149		if ((rt->frame1 == frame1 && rt->frame2 == frame2) ||
150		    (rt->frame1 == frame2 && rt->frame2 == frame1))
151			return true;
152	}
153
154	use_entry->time = jiffies;
155	use_entry->frame1 = frame1;
156	use_entry->frame2 = frame2;
157	return false;
158}
159
160/*
161 * Special rules to skip reporting.
162 */
163static bool
164skip_report(enum kcsan_value_change value_change, unsigned long top_frame)
165{
166	/* Should never get here if value_change==FALSE. */
167	WARN_ON_ONCE(value_change == KCSAN_VALUE_CHANGE_FALSE);
168
169	/*
170	 * The first call to skip_report always has value_change==TRUE, since we
171	 * cannot know the value written of an instrumented access. For the 2nd
172	 * call there are 6 cases with CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY:
173	 *
174	 * 1. read watchpoint, conflicting write (value_change==TRUE): report;
175	 * 2. read watchpoint, conflicting write (value_change==MAYBE): skip;
176	 * 3. write watchpoint, conflicting write (value_change==TRUE): report;
177	 * 4. write watchpoint, conflicting write (value_change==MAYBE): skip;
178	 * 5. write watchpoint, conflicting read (value_change==MAYBE): skip;
179	 * 6. write watchpoint, conflicting read (value_change==TRUE): report;
180	 *
181	 * Cases 1-4 are intuitive and expected; case 5 ensures we do not report
182	 * data races where the write may have rewritten the same value; case 6
183	 * is possible either if the size is larger than what we check value
184	 * changes for or the access type is KCSAN_ACCESS_ASSERT.
185	 */
186	if (IS_ENABLED(CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY) &&
187	    value_change == KCSAN_VALUE_CHANGE_MAYBE) {
188		/*
189		 * The access is a write, but the data value did not change.
190		 *
191		 * We opt-out of this filter for certain functions at request of
192		 * maintainers.
193		 */
194		char buf[64];
195		int len = scnprintf(buf, sizeof(buf), "%ps", (void *)top_frame);
196
197		if (!strnstr(buf, "rcu_", len) &&
198		    !strnstr(buf, "_rcu", len) &&
199		    !strnstr(buf, "_srcu", len))
200			return true;
201	}
202
203	return kcsan_skip_report_debugfs(top_frame);
204}
205
206static const char *get_access_type(int type)
207{
208	if (type & KCSAN_ACCESS_ASSERT) {
209		if (type & KCSAN_ACCESS_SCOPED) {
210			if (type & KCSAN_ACCESS_WRITE)
211				return "assert no accesses (scoped)";
212			else
213				return "assert no writes (scoped)";
214		} else {
215			if (type & KCSAN_ACCESS_WRITE)
216				return "assert no accesses";
217			else
218				return "assert no writes";
219		}
220	}
221
222	switch (type) {
223	case 0:
224		return "read";
225	case KCSAN_ACCESS_ATOMIC:
226		return "read (marked)";
227	case KCSAN_ACCESS_WRITE:
228		return "write";
229	case KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC:
230		return "write (marked)";
231	case KCSAN_ACCESS_SCOPED:
232		return "read (scoped)";
233	case KCSAN_ACCESS_SCOPED | KCSAN_ACCESS_ATOMIC:
234		return "read (marked, scoped)";
235	case KCSAN_ACCESS_SCOPED | KCSAN_ACCESS_WRITE:
236		return "write (scoped)";
237	case KCSAN_ACCESS_SCOPED | KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC:
238		return "write (marked, scoped)";
239	default:
240		BUG();
241	}
242}
243
244static const char *get_bug_type(int type)
245{
246	return (type & KCSAN_ACCESS_ASSERT) != 0 ? "assert: race" : "data-race";
247}
248
249/* Return thread description: in task or interrupt. */
250static const char *get_thread_desc(int task_id)
251{
252	if (task_id != -1) {
253		static char buf[32]; /* safe: protected by report_lock */
254
255		snprintf(buf, sizeof(buf), "task %i", task_id);
256		return buf;
257	}
258	return "interrupt";
259}
260
261/* Helper to skip KCSAN-related functions in stack-trace. */
262static int get_stack_skipnr(const unsigned long stack_entries[], int num_entries)
263{
264	char buf[64];
265	char *cur;
266	int len, skip;
267
268	for (skip = 0; skip < num_entries; ++skip) {
269		len = scnprintf(buf, sizeof(buf), "%ps", (void *)stack_entries[skip]);
270
271		/* Never show tsan_* or {read,write}_once_size. */
272		if (strnstr(buf, "tsan_", len) ||
273		    strnstr(buf, "_once_size", len))
274			continue;
275
276		cur = strnstr(buf, "kcsan_", len);
277		if (cur) {
278			cur += sizeof("kcsan_") - 1;
279			if (strncmp(cur, "test", sizeof("test") - 1))
280				continue; /* KCSAN runtime function. */
281			/* KCSAN related test. */
282		}
283
284		/*
285		 * No match for runtime functions -- @skip entries to skip to
286		 * get to first frame of interest.
287		 */
288		break;
289	}
290
291	return skip;
292}
293
294/* Compares symbolized strings of addr1 and addr2. */
295static int sym_strcmp(void *addr1, void *addr2)
296{
297	char buf1[64];
298	char buf2[64];
299
300	snprintf(buf1, sizeof(buf1), "%pS", addr1);
301	snprintf(buf2, sizeof(buf2), "%pS", addr2);
302
303	return strncmp(buf1, buf2, sizeof(buf1));
304}
305
306static void print_verbose_info(struct task_struct *task)
307{
308	if (!task)
309		return;
310
311	/* Restore IRQ state trace for printing. */
312	kcsan_restore_irqtrace(task);
313
314	pr_err("\n");
315	debug_show_held_locks(task);
316	print_irqtrace_events(task);
317}
318
319/*
320 * Returns true if a report was generated, false otherwise.
321 */
322static bool print_report(enum kcsan_value_change value_change,
323			 enum kcsan_report_type type,
324			 const struct access_info *ai,
325			 const struct other_info *other_info)
326{
327	unsigned long stack_entries[NUM_STACK_ENTRIES] = { 0 };
328	int num_stack_entries = stack_trace_save(stack_entries, NUM_STACK_ENTRIES, 1);
329	int skipnr = get_stack_skipnr(stack_entries, num_stack_entries);
330	unsigned long this_frame = stack_entries[skipnr];
331	unsigned long other_frame = 0;
332	int other_skipnr = 0; /* silence uninit warnings */
333
334	/*
335	 * Must check report filter rules before starting to print.
336	 */
337	if (skip_report(KCSAN_VALUE_CHANGE_TRUE, stack_entries[skipnr]))
338		return false;
339
340	if (type == KCSAN_REPORT_RACE_SIGNAL) {
341		other_skipnr = get_stack_skipnr(other_info->stack_entries,
342						other_info->num_stack_entries);
343		other_frame = other_info->stack_entries[other_skipnr];
344
345		/* @value_change is only known for the other thread */
346		if (skip_report(value_change, other_frame))
347			return false;
348	}
349
350	if (rate_limit_report(this_frame, other_frame))
351		return false;
352
353	/* Print report header. */
354	pr_err("==================================================================\n");
355	switch (type) {
356	case KCSAN_REPORT_RACE_SIGNAL: {
357		int cmp;
358
359		/*
360		 * Order functions lexographically for consistent bug titles.
361		 * Do not print offset of functions to keep title short.
362		 */
363		cmp = sym_strcmp((void *)other_frame, (void *)this_frame);
364		pr_err("BUG: KCSAN: %s in %ps / %ps\n",
365		       get_bug_type(ai->access_type | other_info->ai.access_type),
366		       (void *)(cmp < 0 ? other_frame : this_frame),
367		       (void *)(cmp < 0 ? this_frame : other_frame));
368	} break;
369
370	case KCSAN_REPORT_RACE_UNKNOWN_ORIGIN:
371		pr_err("BUG: KCSAN: %s in %pS\n", get_bug_type(ai->access_type),
372		       (void *)this_frame);
373		break;
374
375	default:
376		BUG();
377	}
378
379	pr_err("\n");
380
381	/* Print information about the racing accesses. */
382	switch (type) {
383	case KCSAN_REPORT_RACE_SIGNAL:
384		pr_err("%s to 0x%px of %zu bytes by %s on cpu %i:\n",
385		       get_access_type(other_info->ai.access_type), other_info->ai.ptr,
386		       other_info->ai.size, get_thread_desc(other_info->ai.task_pid),
387		       other_info->ai.cpu_id);
388
389		/* Print the other thread's stack trace. */
390		stack_trace_print(other_info->stack_entries + other_skipnr,
391				  other_info->num_stack_entries - other_skipnr,
392				  0);
393
394		if (IS_ENABLED(CONFIG_KCSAN_VERBOSE))
395			print_verbose_info(other_info->task);
396
397		pr_err("\n");
398		pr_err("%s to 0x%px of %zu bytes by %s on cpu %i:\n",
399		       get_access_type(ai->access_type), ai->ptr, ai->size,
400		       get_thread_desc(ai->task_pid), ai->cpu_id);
401		break;
402
403	case KCSAN_REPORT_RACE_UNKNOWN_ORIGIN:
404		pr_err("race at unknown origin, with %s to 0x%px of %zu bytes by %s on cpu %i:\n",
405		       get_access_type(ai->access_type), ai->ptr, ai->size,
406		       get_thread_desc(ai->task_pid), ai->cpu_id);
407		break;
408
409	default:
410		BUG();
411	}
412	/* Print stack trace of this thread. */
413	stack_trace_print(stack_entries + skipnr, num_stack_entries - skipnr,
414			  0);
415
416	if (IS_ENABLED(CONFIG_KCSAN_VERBOSE))
417		print_verbose_info(current);
418
419	/* Print report footer. */
420	pr_err("\n");
421	pr_err("Reported by Kernel Concurrency Sanitizer on:\n");
422	dump_stack_print_info(KERN_DEFAULT);
423	pr_err("==================================================================\n");
424
425	return true;
426}
427
428static void release_report(unsigned long *flags, struct other_info *other_info)
429{
430	if (other_info)
431		/*
432		 * Use size to denote valid/invalid, since KCSAN entirely
433		 * ignores 0-sized accesses.
434		 */
435		other_info->ai.size = 0;
436
437	raw_spin_unlock_irqrestore(&report_lock, *flags);
438}
439
440/*
441 * Sets @other_info->task and awaits consumption of @other_info.
442 *
443 * Precondition: report_lock is held.
444 * Postcondition: report_lock is held.
445 */
446static void set_other_info_task_blocking(unsigned long *flags,
447					 const struct access_info *ai,
448					 struct other_info *other_info)
449{
450	/*
451	 * We may be instrumenting a code-path where current->state is already
452	 * something other than TASK_RUNNING.
453	 */
454	const bool is_running = current->state == TASK_RUNNING;
455	/*
456	 * To avoid deadlock in case we are in an interrupt here and this is a
457	 * race with a task on the same CPU (KCSAN_INTERRUPT_WATCHER), provide a
458	 * timeout to ensure this works in all contexts.
459	 *
460	 * Await approximately the worst case delay of the reporting thread (if
461	 * we are not interrupted).
462	 */
463	int timeout = max(kcsan_udelay_task, kcsan_udelay_interrupt);
464
465	other_info->task = current;
466	do {
467		if (is_running) {
468			/*
469			 * Let lockdep know the real task is sleeping, to print
470			 * the held locks (recall we turned lockdep off, so
471			 * locking/unlocking @report_lock won't be recorded).
472			 */
473			set_current_state(TASK_UNINTERRUPTIBLE);
474		}
475		raw_spin_unlock_irqrestore(&report_lock, *flags);
476		/*
477		 * We cannot call schedule() since we also cannot reliably
478		 * determine if sleeping here is permitted -- see in_atomic().
479		 */
480
481		udelay(1);
482		raw_spin_lock_irqsave(&report_lock, *flags);
483		if (timeout-- < 0) {
484			/*
485			 * Abort. Reset @other_info->task to NULL, since it
486			 * appears the other thread is still going to consume
487			 * it. It will result in no verbose info printed for
488			 * this task.
489			 */
490			other_info->task = NULL;
491			break;
492		}
493		/*
494		 * If invalid, or @ptr nor @current matches, then @other_info
495		 * has been consumed and we may continue. If not, retry.
496		 */
497	} while (other_info->ai.size && other_info->ai.ptr == ai->ptr &&
498		 other_info->task == current);
499	if (is_running)
500		set_current_state(TASK_RUNNING);
501}
502
503/* Populate @other_info; requires that the provided @other_info not in use. */
504static void prepare_report_producer(unsigned long *flags,
505				    const struct access_info *ai,
506				    struct other_info *other_info)
507{
508	raw_spin_lock_irqsave(&report_lock, *flags);
509
510	/*
511	 * The same @other_infos entry cannot be used concurrently, because
512	 * there is a one-to-one mapping to watchpoint slots (@watchpoints in
513	 * core.c), and a watchpoint is only released for reuse after reporting
514	 * is done by the consumer of @other_info. Therefore, it is impossible
515	 * for another concurrent prepare_report_producer() to set the same
516	 * @other_info, and are guaranteed exclusivity for the @other_infos
517	 * entry pointed to by @other_info.
518	 *
519	 * To check this property holds, size should never be non-zero here,
520	 * because every consumer of struct other_info resets size to 0 in
521	 * release_report().
522	 */
523	WARN_ON(other_info->ai.size);
524
525	other_info->ai = *ai;
526	other_info->num_stack_entries = stack_trace_save(other_info->stack_entries, NUM_STACK_ENTRIES, 2);
527
528	if (IS_ENABLED(CONFIG_KCSAN_VERBOSE))
529		set_other_info_task_blocking(flags, ai, other_info);
530
531	raw_spin_unlock_irqrestore(&report_lock, *flags);
532}
533
534/* Awaits producer to fill @other_info and then returns. */
535static bool prepare_report_consumer(unsigned long *flags,
536				    const struct access_info *ai,
537				    struct other_info *other_info)
538{
539
540	raw_spin_lock_irqsave(&report_lock, *flags);
541	while (!other_info->ai.size) { /* Await valid @other_info. */
542		raw_spin_unlock_irqrestore(&report_lock, *flags);
543		cpu_relax();
544		raw_spin_lock_irqsave(&report_lock, *flags);
545	}
546
547	/* Should always have a matching access based on watchpoint encoding. */
548	if (WARN_ON(!matching_access((unsigned long)other_info->ai.ptr & WATCHPOINT_ADDR_MASK, other_info->ai.size,
549				     (unsigned long)ai->ptr & WATCHPOINT_ADDR_MASK, ai->size)))
550		goto discard;
551
552	if (!matching_access((unsigned long)other_info->ai.ptr, other_info->ai.size,
553			     (unsigned long)ai->ptr, ai->size)) {
554		/*
555		 * If the actual accesses to not match, this was a false
556		 * positive due to watchpoint encoding.
557		 */
558		kcsan_counter_inc(KCSAN_COUNTER_ENCODING_FALSE_POSITIVES);
559		goto discard;
560	}
561
562	return true;
563
564discard:
565	release_report(flags, other_info);
566	return false;
567}
568
569/*
570 * Depending on the report type either sets @other_info and returns false, or
571 * awaits @other_info and returns true. If @other_info is not required for the
572 * report type, simply acquires @report_lock and returns true.
573 */
574static noinline bool prepare_report(unsigned long *flags,
575				    enum kcsan_report_type type,
576				    const struct access_info *ai,
577				    struct other_info *other_info)
578{
579	switch (type) {
580	case KCSAN_REPORT_CONSUMED_WATCHPOINT:
581		prepare_report_producer(flags, ai, other_info);
582		return false;
583	case KCSAN_REPORT_RACE_SIGNAL:
584		return prepare_report_consumer(flags, ai, other_info);
585	default:
586		/* @other_info not required; just acquire @report_lock. */
587		raw_spin_lock_irqsave(&report_lock, *flags);
588		return true;
589	}
590}
591
592void kcsan_report(const volatile void *ptr, size_t size, int access_type,
593		  enum kcsan_value_change value_change,
594		  enum kcsan_report_type type, int watchpoint_idx)
595{
596	unsigned long flags = 0;
597	const struct access_info ai = {
598		.ptr		= ptr,
599		.size		= size,
600		.access_type	= access_type,
601		.task_pid	= in_task() ? task_pid_nr(current) : -1,
602		.cpu_id		= raw_smp_processor_id()
603	};
604	struct other_info *other_info = type == KCSAN_REPORT_RACE_UNKNOWN_ORIGIN
605					? NULL : &other_infos[watchpoint_idx];
606
607	kcsan_disable_current();
608	if (WARN_ON(watchpoint_idx < 0 || watchpoint_idx >= ARRAY_SIZE(other_infos)))
609		goto out;
610
611	/*
612	 * Because we may generate reports when we're in scheduler code, the use
613	 * of printk() could deadlock. Until such time that all printing code
614	 * called in print_report() is scheduler-safe, accept the risk, and just
615	 * get our message out. As such, also disable lockdep to hide the
616	 * warning, and avoid disabling lockdep for the rest of the kernel.
617	 */
618	lockdep_off();
619
620	if (prepare_report(&flags, type, &ai, other_info)) {
621		/*
622		 * Never report if value_change is FALSE, only if we it is
623		 * either TRUE or MAYBE. In case of MAYBE, further filtering may
624		 * be done once we know the full stack trace in print_report().
625		 */
626		bool reported = value_change != KCSAN_VALUE_CHANGE_FALSE &&
627				print_report(value_change, type, &ai, other_info);
628
629		if (reported && panic_on_warn)
630			panic("panic_on_warn set ...\n");
631
632		release_report(&flags, other_info);
633	}
634
635	lockdep_on();
636out:
637	kcsan_enable_current();
638}