Linux Audio

Check our new training course

Loading...
v5.4
  1// SPDX-License-Identifier: GPL-2.0+
  2/*
  3 * Copyright (C) 2007 Alan Stern
  4 * Copyright (C) IBM Corporation, 2009
  5 * Copyright (C) 2009, Frederic Weisbecker <fweisbec@gmail.com>
  6 *
  7 * Thanks to Ingo Molnar for his many suggestions.
  8 *
  9 * Authors: Alan Stern <stern@rowland.harvard.edu>
 10 *          K.Prasad <prasad@linux.vnet.ibm.com>
 11 *          Frederic Weisbecker <fweisbec@gmail.com>
 12 */
 13
 14/*
 15 * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility,
 16 * using the CPU's debug registers.
 17 * This file contains the arch-independent routines.
 18 */
 19
 
 
 
 
 
 
 
 20#include <linux/irqflags.h>
 21#include <linux/kallsyms.h>
 22#include <linux/notifier.h>
 23#include <linux/kprobes.h>
 24#include <linux/kdebug.h>
 25#include <linux/kernel.h>
 26#include <linux/module.h>
 
 
 27#include <linux/percpu.h>
 
 28#include <linux/sched.h>
 29#include <linux/init.h>
 30#include <linux/slab.h>
 31#include <linux/list.h>
 32#include <linux/cpu.h>
 33#include <linux/smp.h>
 34#include <linux/bug.h>
 35
 36#include <linux/hw_breakpoint.h>
 37/*
 38 * Constraints data
 
 
 
 
 
 
 
 
 
 
 
 
 39 */
 40struct bp_cpuinfo {
 41	/* Number of pinned cpu breakpoints in a cpu */
 42	unsigned int	cpu_pinned;
 43	/* tsk_pinned[n] is the number of tasks having n+1 breakpoints */
 44	unsigned int	*tsk_pinned;
 45	/* Number of non-pinned cpu/task breakpoints in a cpu */
 46	unsigned int	flexible; /* XXX: placeholder, see fetch_this_slot() */
 47};
 48
 49static DEFINE_PER_CPU(struct bp_cpuinfo, bp_cpuinfo[TYPE_MAX]);
 50static int nr_slots[TYPE_MAX];
 51
 52static struct bp_cpuinfo *get_bp_info(int cpu, enum bp_type_idx type)
 53{
 54	return per_cpu_ptr(bp_cpuinfo + type, cpu);
 55}
 56
 
 
 
 
 
 57/* Keep track of the breakpoints attached to tasks */
 58static LIST_HEAD(bp_task_head);
 
 
 
 
 
 
 59
 60static int constraints_initialized;
 61
 62/* Gather the number of total pinned and un-pinned bp in a cpuset */
 63struct bp_busy_slots {
 64	unsigned int pinned;
 65	unsigned int flexible;
 66};
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 67
 68/* Serialize accesses to the above constraints */
 69static DEFINE_MUTEX(nr_bp_mutex);
 
 
 70
 71__weak int hw_breakpoint_weight(struct perf_event *bp)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 72{
 73	return 1;
 74}
 
 75
 76static inline enum bp_type_idx find_slot_idx(u64 bp_type)
 77{
 78	if (bp_type & HW_BREAKPOINT_RW)
 79		return TYPE_DATA;
 80
 81	return TYPE_INST;
 82}
 83
 84/*
 85 * Report the maximum number of pinned breakpoints a task
 86 * have in this cpu
 87 */
 88static unsigned int max_task_bp_pinned(int cpu, enum bp_type_idx type)
 89{
 90	unsigned int *tsk_pinned = get_bp_info(cpu, type)->tsk_pinned;
 91	int i;
 92
 93	for (i = nr_slots[type] - 1; i >= 0; i--) {
 94		if (tsk_pinned[i] > 0)
 95			return i + 1;
 96	}
 97
 98	return 0;
 
 99}
100
101/*
102 * Count the number of breakpoints of the same type and same task.
103 * The given event must be not on the list.
 
 
 
104 */
105static int task_bp_pinned(int cpu, struct perf_event *bp, enum bp_type_idx type)
106{
107	struct task_struct *tsk = bp->hw.target;
108	struct perf_event *iter;
109	int count = 0;
110
111	list_for_each_entry(iter, &bp_task_head, hw.bp_list) {
112		if (iter->hw.target == tsk &&
113		    find_slot_idx(iter->attr.bp_type) == type &&
114		    (iter->cpu < 0 || cpu == iter->cpu))
115			count += hw_breakpoint_weight(iter);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116	}
117
 
 
118	return count;
119}
120
121static const struct cpumask *cpumask_of_bp(struct perf_event *bp)
122{
123	if (bp->cpu >= 0)
124		return cpumask_of(bp->cpu);
125	return cpu_possible_mask;
126}
127
128/*
129 * Report the number of pinned/un-pinned breakpoints we have in
130 * a given cpu (cpu > -1) or in all of them (cpu = -1).
131 */
132static void
133fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp,
134		    enum bp_type_idx type)
135{
136	const struct cpumask *cpumask = cpumask_of_bp(bp);
 
137	int cpu;
138
 
 
 
 
 
 
 
 
 
 
 
 
 
139	for_each_cpu(cpu, cpumask) {
140		struct bp_cpuinfo *info = get_bp_info(cpu, type);
141		int nr;
142
143		nr = info->cpu_pinned;
144		if (!bp->hw.target)
145			nr += max_task_bp_pinned(cpu, type);
146		else
147			nr += task_bp_pinned(cpu, bp, type);
148
149		if (nr > slots->pinned)
150			slots->pinned = nr;
151
152		nr = info->flexible;
153		if (nr > slots->flexible)
154			slots->flexible = nr;
155	}
156}
157
158/*
159 * For now, continue to consider flexible as pinned, until we can
160 * ensure no flexible event can ever be scheduled before a pinned event
161 * in a same cpu.
162 */
163static void
164fetch_this_slot(struct bp_busy_slots *slots, int weight)
165{
166	slots->pinned += weight;
167}
168
169/*
170 * Add a pinned breakpoint for the given task in our constraint table
171 */
172static void toggle_bp_task_slot(struct perf_event *bp, int cpu,
173				enum bp_type_idx type, int weight)
174{
175	unsigned int *tsk_pinned = get_bp_info(cpu, type)->tsk_pinned;
176	int old_idx, new_idx;
177
178	old_idx = task_bp_pinned(cpu, bp, type) - 1;
179	new_idx = old_idx + weight;
180
181	if (old_idx >= 0)
182		tsk_pinned[old_idx]--;
183	if (new_idx >= 0)
184		tsk_pinned[new_idx]++;
185}
186
187/*
188 * Add/remove the given breakpoint in our constraint table
189 */
190static void
191toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type,
192	       int weight)
193{
194	const struct cpumask *cpumask = cpumask_of_bp(bp);
195	int cpu;
196
197	if (!enable)
198		weight = -weight;
199
200	/* Pinned counter cpu profiling */
201	if (!bp->hw.target) {
202		get_bp_info(bp->cpu, type)->cpu_pinned += weight;
203		return;
 
 
 
 
 
 
 
 
204	}
205
206	/* Pinned counter task profiling */
207	for_each_cpu(cpu, cpumask)
208		toggle_bp_task_slot(bp, cpu, type, weight);
 
 
 
 
 
209
210	if (enable)
211		list_add_tail(&bp->hw.bp_list, &bp_task_head);
212	else
213		list_del(&bp->hw.bp_list);
214}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
215
216/*
217 * Function to perform processor-specific cleanup during unregistration
218 */
219__weak void arch_unregister_hw_breakpoint(struct perf_event *bp)
220{
221	/*
222	 * A weak stub function here for those archs that don't define
223	 * it inside arch/.../kernel/hw_breakpoint.c
224	 */
 
 
 
 
 
 
225}
226
227/*
228 * Constraints to check before allowing this new breakpoint counter:
 
 
 
 
 
229 *
230 *  == Non-pinned counter == (Considered as pinned for now)
231 *
232 *   - If attached to a single cpu, check:
233 *
234 *       (per_cpu(info->flexible, cpu) || (per_cpu(info->cpu_pinned, cpu)
235 *           + max(per_cpu(info->tsk_pinned, cpu)))) < HBP_NUM
236 *
237 *       -> If there are already non-pinned counters in this cpu, it means
238 *          there is already a free slot for them.
239 *          Otherwise, we check that the maximum number of per task
240 *          breakpoints (for this cpu) plus the number of per cpu breakpoint
241 *          (for this cpu) doesn't cover every registers.
242 *
243 *   - If attached to every cpus, check:
244 *
245 *       (per_cpu(info->flexible, *) || (max(per_cpu(info->cpu_pinned, *))
246 *           + max(per_cpu(info->tsk_pinned, *)))) < HBP_NUM
247 *
248 *       -> This is roughly the same, except we check the number of per cpu
249 *          bp for every cpu and we keep the max one. Same for the per tasks
250 *          breakpoints.
251 *
252 *
253 * == Pinned counter ==
254 *
255 *   - If attached to a single cpu, check:
256 *
257 *       ((per_cpu(info->flexible, cpu) > 1) + per_cpu(info->cpu_pinned, cpu)
258 *            + max(per_cpu(info->tsk_pinned, cpu))) < HBP_NUM
259 *
260 *       -> Same checks as before. But now the info->flexible, if any, must keep
261 *          one register at least (or they will never be fed).
262 *
263 *   - If attached to every cpus, check:
264 *
265 *       ((per_cpu(info->flexible, *) > 1) + max(per_cpu(info->cpu_pinned, *))
266 *            + max(per_cpu(info->tsk_pinned, *))) < HBP_NUM
267 */
268static int __reserve_bp_slot(struct perf_event *bp, u64 bp_type)
269{
270	struct bp_busy_slots slots = {0};
271	enum bp_type_idx type;
 
272	int weight;
273
274	/* We couldn't initialize breakpoint constraints on boot */
275	if (!constraints_initialized)
276		return -ENOMEM;
277
278	/* Basic checks */
279	if (bp_type == HW_BREAKPOINT_EMPTY ||
280	    bp_type == HW_BREAKPOINT_INVALID)
281		return -EINVAL;
282
283	type = find_slot_idx(bp_type);
284	weight = hw_breakpoint_weight(bp);
285
286	fetch_bp_busy_slots(&slots, bp, type);
287	/*
288	 * Simulate the addition of this breakpoint to the constraints
289	 * and see the result.
290	 */
291	fetch_this_slot(&slots, weight);
292
293	/* Flexible counters need to keep at least one slot */
294	if (slots.pinned + (!!slots.flexible) > nr_slots[type])
295		return -ENOSPC;
296
297	toggle_bp_slot(bp, true, type, weight);
298
299	return 0;
300}
301
302int reserve_bp_slot(struct perf_event *bp)
303{
304	int ret;
305
306	mutex_lock(&nr_bp_mutex);
307
308	ret = __reserve_bp_slot(bp, bp->attr.bp_type);
309
310	mutex_unlock(&nr_bp_mutex);
311
 
312	return ret;
313}
314
315static void __release_bp_slot(struct perf_event *bp, u64 bp_type)
316{
317	enum bp_type_idx type;
318	int weight;
319
320	type = find_slot_idx(bp_type);
321	weight = hw_breakpoint_weight(bp);
322	toggle_bp_slot(bp, false, type, weight);
323}
324
325void release_bp_slot(struct perf_event *bp)
326{
327	mutex_lock(&nr_bp_mutex);
328
329	arch_unregister_hw_breakpoint(bp);
330	__release_bp_slot(bp, bp->attr.bp_type);
331
332	mutex_unlock(&nr_bp_mutex);
333}
334
335static int __modify_bp_slot(struct perf_event *bp, u64 old_type, u64 new_type)
336{
337	int err;
338
339	__release_bp_slot(bp, old_type);
340
341	err = __reserve_bp_slot(bp, new_type);
342	if (err) {
343		/*
344		 * Reserve the old_type slot back in case
345		 * there's no space for the new type.
346		 *
347		 * This must succeed, because we just released
348		 * the old_type slot in the __release_bp_slot
349		 * call above. If not, something is broken.
350		 */
351		WARN_ON(__reserve_bp_slot(bp, old_type));
352	}
353
354	return err;
355}
356
357static int modify_bp_slot(struct perf_event *bp, u64 old_type, u64 new_type)
358{
359	int ret;
 
360
361	mutex_lock(&nr_bp_mutex);
362	ret = __modify_bp_slot(bp, old_type, new_type);
363	mutex_unlock(&nr_bp_mutex);
364	return ret;
365}
366
367/*
368 * Allow the kernel debugger to reserve breakpoint slots without
369 * taking a lock using the dbg_* variant of for the reserve and
370 * release breakpoint slots.
371 */
372int dbg_reserve_bp_slot(struct perf_event *bp)
373{
374	if (mutex_is_locked(&nr_bp_mutex))
 
 
375		return -1;
376
377	return __reserve_bp_slot(bp, bp->attr.bp_type);
 
 
 
 
 
378}
379
380int dbg_release_bp_slot(struct perf_event *bp)
381{
382	if (mutex_is_locked(&nr_bp_mutex))
383		return -1;
384
 
 
385	__release_bp_slot(bp, bp->attr.bp_type);
 
386
387	return 0;
388}
389
390static int hw_breakpoint_parse(struct perf_event *bp,
391			       const struct perf_event_attr *attr,
392			       struct arch_hw_breakpoint *hw)
393{
394	int err;
395
396	err = hw_breakpoint_arch_parse(bp, attr, hw);
397	if (err)
398		return err;
399
400	if (arch_check_bp_in_kernelspace(hw)) {
401		if (attr->exclude_kernel)
402			return -EINVAL;
403		/*
404		 * Don't let unprivileged users set a breakpoint in the trap
405		 * path to avoid trap recursion attacks.
406		 */
407		if (!capable(CAP_SYS_ADMIN))
408			return -EPERM;
409	}
410
411	return 0;
412}
413
414int register_perf_hw_breakpoint(struct perf_event *bp)
415{
416	struct arch_hw_breakpoint hw = { };
417	int err;
418
419	err = reserve_bp_slot(bp);
420	if (err)
421		return err;
422
423	err = hw_breakpoint_parse(bp, &bp->attr, &hw);
424	if (err) {
425		release_bp_slot(bp);
426		return err;
427	}
428
429	bp->hw.info = hw;
430
431	return 0;
432}
433
434/**
435 * register_user_hw_breakpoint - register a hardware breakpoint for user space
436 * @attr: breakpoint attributes
437 * @triggered: callback to trigger when we hit the breakpoint
 
438 * @tsk: pointer to 'task_struct' of the process to which the address belongs
439 */
440struct perf_event *
441register_user_hw_breakpoint(struct perf_event_attr *attr,
442			    perf_overflow_handler_t triggered,
443			    void *context,
444			    struct task_struct *tsk)
445{
446	return perf_event_create_kernel_counter(attr, -1, tsk, triggered,
447						context);
448}
449EXPORT_SYMBOL_GPL(register_user_hw_breakpoint);
450
451static void hw_breakpoint_copy_attr(struct perf_event_attr *to,
452				    struct perf_event_attr *from)
453{
454	to->bp_addr = from->bp_addr;
455	to->bp_type = from->bp_type;
456	to->bp_len  = from->bp_len;
457	to->disabled = from->disabled;
458}
459
460int
461modify_user_hw_breakpoint_check(struct perf_event *bp, struct perf_event_attr *attr,
462			        bool check)
463{
464	struct arch_hw_breakpoint hw = { };
465	int err;
466
467	err = hw_breakpoint_parse(bp, attr, &hw);
468	if (err)
469		return err;
470
471	if (check) {
472		struct perf_event_attr old_attr;
473
474		old_attr = bp->attr;
475		hw_breakpoint_copy_attr(&old_attr, attr);
476		if (memcmp(&old_attr, attr, sizeof(*attr)))
477			return -EINVAL;
478	}
479
480	if (bp->attr.bp_type != attr->bp_type) {
481		err = modify_bp_slot(bp, bp->attr.bp_type, attr->bp_type);
482		if (err)
483			return err;
484	}
485
486	hw_breakpoint_copy_attr(&bp->attr, attr);
487	bp->hw.info = hw;
488
489	return 0;
490}
491
492/**
493 * modify_user_hw_breakpoint - modify a user-space hardware breakpoint
494 * @bp: the breakpoint structure to modify
495 * @attr: new breakpoint attributes
496 */
497int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr)
498{
499	int err;
500
501	/*
502	 * modify_user_hw_breakpoint can be invoked with IRQs disabled and hence it
503	 * will not be possible to raise IPIs that invoke __perf_event_disable.
504	 * So call the function directly after making sure we are targeting the
505	 * current task.
506	 */
507	if (irqs_disabled() && bp->ctx && bp->ctx->task == current)
508		perf_event_disable_local(bp);
509	else
510		perf_event_disable(bp);
511
512	err = modify_user_hw_breakpoint_check(bp, attr, false);
513
514	if (!bp->attr.disabled)
515		perf_event_enable(bp);
516
517	return err;
518}
519EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint);
520
521/**
522 * unregister_hw_breakpoint - unregister a user-space hardware breakpoint
523 * @bp: the breakpoint structure to unregister
524 */
525void unregister_hw_breakpoint(struct perf_event *bp)
526{
527	if (!bp)
528		return;
529	perf_event_release_kernel(bp);
530}
531EXPORT_SYMBOL_GPL(unregister_hw_breakpoint);
532
533/**
534 * register_wide_hw_breakpoint - register a wide breakpoint in the kernel
535 * @attr: breakpoint attributes
536 * @triggered: callback to trigger when we hit the breakpoint
 
537 *
538 * @return a set of per_cpu pointers to perf events
539 */
540struct perf_event * __percpu *
541register_wide_hw_breakpoint(struct perf_event_attr *attr,
542			    perf_overflow_handler_t triggered,
543			    void *context)
544{
545	struct perf_event * __percpu *cpu_events, *bp;
546	long err = 0;
547	int cpu;
548
549	cpu_events = alloc_percpu(typeof(*cpu_events));
550	if (!cpu_events)
551		return (void __percpu __force *)ERR_PTR(-ENOMEM);
552
553	get_online_cpus();
554	for_each_online_cpu(cpu) {
555		bp = perf_event_create_kernel_counter(attr, cpu, NULL,
556						      triggered, context);
557		if (IS_ERR(bp)) {
558			err = PTR_ERR(bp);
559			break;
560		}
561
562		per_cpu(*cpu_events, cpu) = bp;
563	}
564	put_online_cpus();
565
566	if (likely(!err))
567		return cpu_events;
568
569	unregister_wide_hw_breakpoint(cpu_events);
570	return (void __percpu __force *)ERR_PTR(err);
571}
572EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint);
573
574/**
575 * unregister_wide_hw_breakpoint - unregister a wide breakpoint in the kernel
576 * @cpu_events: the per cpu set of events to unregister
577 */
578void unregister_wide_hw_breakpoint(struct perf_event * __percpu *cpu_events)
579{
580	int cpu;
581
582	for_each_possible_cpu(cpu)
583		unregister_hw_breakpoint(per_cpu(*cpu_events, cpu));
584
585	free_percpu(cpu_events);
586}
587EXPORT_SYMBOL_GPL(unregister_wide_hw_breakpoint);
588
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
589static struct notifier_block hw_breakpoint_exceptions_nb = {
590	.notifier_call = hw_breakpoint_exceptions_notify,
591	/* we need to be notified first */
592	.priority = 0x7fffffff
593};
594
595static void bp_perf_event_destroy(struct perf_event *event)
596{
597	release_bp_slot(event);
598}
599
600static int hw_breakpoint_event_init(struct perf_event *bp)
601{
602	int err;
603
604	if (bp->attr.type != PERF_TYPE_BREAKPOINT)
605		return -ENOENT;
606
607	/*
608	 * no branch sampling for breakpoint events
609	 */
610	if (has_branch_stack(bp))
611		return -EOPNOTSUPP;
612
613	err = register_perf_hw_breakpoint(bp);
614	if (err)
615		return err;
616
617	bp->destroy = bp_perf_event_destroy;
618
619	return 0;
620}
621
622static int hw_breakpoint_add(struct perf_event *bp, int flags)
623{
624	if (!(flags & PERF_EF_START))
625		bp->hw.state = PERF_HES_STOPPED;
626
627	if (is_sampling_event(bp)) {
628		bp->hw.last_period = bp->hw.sample_period;
629		perf_swevent_set_period(bp);
630	}
631
632	return arch_install_hw_breakpoint(bp);
633}
634
635static void hw_breakpoint_del(struct perf_event *bp, int flags)
636{
637	arch_uninstall_hw_breakpoint(bp);
638}
639
640static void hw_breakpoint_start(struct perf_event *bp, int flags)
641{
642	bp->hw.state = 0;
643}
644
645static void hw_breakpoint_stop(struct perf_event *bp, int flags)
646{
647	bp->hw.state = PERF_HES_STOPPED;
648}
649
650static struct pmu perf_breakpoint = {
651	.task_ctx_nr	= perf_sw_context, /* could eventually get its own */
652
653	.event_init	= hw_breakpoint_event_init,
654	.add		= hw_breakpoint_add,
655	.del		= hw_breakpoint_del,
656	.start		= hw_breakpoint_start,
657	.stop		= hw_breakpoint_stop,
658	.read		= hw_breakpoint_pmu_read,
659};
660
661int __init init_hw_breakpoint(void)
662{
663	int cpu, err_cpu;
664	int i;
665
666	for (i = 0; i < TYPE_MAX; i++)
667		nr_slots[i] = hw_breakpoint_slots(i);
668
669	for_each_possible_cpu(cpu) {
670		for (i = 0; i < TYPE_MAX; i++) {
671			struct bp_cpuinfo *info = get_bp_info(cpu, i);
672
673			info->tsk_pinned = kcalloc(nr_slots[i], sizeof(int),
674							GFP_KERNEL);
675			if (!info->tsk_pinned)
676				goto err_alloc;
677		}
678	}
 
679
680	constraints_initialized = 1;
681
682	perf_pmu_register(&perf_breakpoint, "breakpoint", PERF_TYPE_BREAKPOINT);
683
684	return register_die_notifier(&hw_breakpoint_exceptions_nb);
685
686 err_alloc:
687	for_each_possible_cpu(err_cpu) {
688		for (i = 0; i < TYPE_MAX; i++)
689			kfree(get_bp_info(err_cpu, i)->tsk_pinned);
690		if (err_cpu == cpu)
691			break;
692	}
693
694	return -ENOMEM;
695}
696
697
v6.13.7
   1// SPDX-License-Identifier: GPL-2.0+
   2/*
   3 * Copyright (C) 2007 Alan Stern
   4 * Copyright (C) IBM Corporation, 2009
   5 * Copyright (C) 2009, Frederic Weisbecker <fweisbec@gmail.com>
   6 *
   7 * Thanks to Ingo Molnar for his many suggestions.
   8 *
   9 * Authors: Alan Stern <stern@rowland.harvard.edu>
  10 *          K.Prasad <prasad@linux.vnet.ibm.com>
  11 *          Frederic Weisbecker <fweisbec@gmail.com>
  12 */
  13
  14/*
  15 * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility,
  16 * using the CPU's debug registers.
  17 * This file contains the arch-independent routines.
  18 */
  19
  20#include <linux/hw_breakpoint.h>
  21
  22#include <linux/atomic.h>
  23#include <linux/bug.h>
  24#include <linux/cpu.h>
  25#include <linux/export.h>
  26#include <linux/init.h>
  27#include <linux/irqflags.h>
 
 
 
  28#include <linux/kdebug.h>
  29#include <linux/kernel.h>
  30#include <linux/mutex.h>
  31#include <linux/notifier.h>
  32#include <linux/percpu-rwsem.h>
  33#include <linux/percpu.h>
  34#include <linux/rhashtable.h>
  35#include <linux/sched.h>
 
  36#include <linux/slab.h>
 
 
 
 
  37
 
  38/*
  39 * Datastructure to track the total uses of N slots across tasks or CPUs;
  40 * bp_slots_histogram::count[N] is the number of assigned N+1 breakpoint slots.
  41 */
  42struct bp_slots_histogram {
  43#ifdef hw_breakpoint_slots
  44	atomic_t count[hw_breakpoint_slots(0)];
  45#else
  46	atomic_t *count;
  47#endif
  48};
  49
  50/*
  51 * Per-CPU constraints data.
  52 */
  53struct bp_cpuinfo {
  54	/* Number of pinned CPU breakpoints in a CPU. */
  55	unsigned int			cpu_pinned;
  56	/* Histogram of pinned task breakpoints in a CPU. */
  57	struct bp_slots_histogram	tsk_pinned;
 
 
  58};
  59
  60static DEFINE_PER_CPU(struct bp_cpuinfo, bp_cpuinfo[TYPE_MAX]);
 
  61
  62static struct bp_cpuinfo *get_bp_info(int cpu, enum bp_type_idx type)
  63{
  64	return per_cpu_ptr(bp_cpuinfo + type, cpu);
  65}
  66
  67/* Number of pinned CPU breakpoints globally. */
  68static struct bp_slots_histogram cpu_pinned[TYPE_MAX];
  69/* Number of pinned CPU-independent task breakpoints. */
  70static struct bp_slots_histogram tsk_pinned_all[TYPE_MAX];
  71
  72/* Keep track of the breakpoints attached to tasks */
  73static struct rhltable task_bps_ht;
  74static const struct rhashtable_params task_bps_ht_params = {
  75	.head_offset = offsetof(struct hw_perf_event, bp_list),
  76	.key_offset = offsetof(struct hw_perf_event, target),
  77	.key_len = sizeof_field(struct hw_perf_event, target),
  78	.automatic_shrinking = true,
  79};
  80
  81static bool constraints_initialized __ro_after_init;
  82
  83/*
  84 * Synchronizes accesses to the per-CPU constraints; the locking rules are:
  85 *
  86 *  1. Atomic updates to bp_cpuinfo::tsk_pinned only require a held read-lock
  87 *     (due to bp_slots_histogram::count being atomic, no update are lost).
  88 *
  89 *  2. Holding a write-lock is required for computations that require a
  90 *     stable snapshot of all bp_cpuinfo::tsk_pinned.
  91 *
  92 *  3. In all other cases, non-atomic accesses require the appropriately held
  93 *     lock (read-lock for read-only accesses; write-lock for reads/writes).
  94 */
  95DEFINE_STATIC_PERCPU_RWSEM(bp_cpuinfo_sem);
  96
  97/*
  98 * Return mutex to serialize accesses to per-task lists in task_bps_ht. Since
  99 * rhltable synchronizes concurrent insertions/deletions, independent tasks may
 100 * insert/delete concurrently; therefore, a mutex per task is sufficient.
 101 *
 102 * Uses task_struct::perf_event_mutex, to avoid extending task_struct with a
 103 * hw_breakpoint-only mutex, which may be infrequently used. The caveat here is
 104 * that hw_breakpoint may contend with per-task perf event list management. The
 105 * assumption is that perf usecases involving hw_breakpoints are very unlikely
 106 * to result in unnecessary contention.
 107 */
 108static inline struct mutex *get_task_bps_mutex(struct perf_event *bp)
 109{
 110	struct task_struct *tsk = bp->hw.target;
 111
 112	return tsk ? &tsk->perf_event_mutex : NULL;
 113}
 114
 115static struct mutex *bp_constraints_lock(struct perf_event *bp)
 116{
 117	struct mutex *tsk_mtx = get_task_bps_mutex(bp);
 118
 119	if (tsk_mtx) {
 120		/*
 121		 * Fully analogous to the perf_try_init_event() nesting
 122		 * argument in the comment near perf_event_ctx_lock_nested();
 123		 * this child->perf_event_mutex cannot ever deadlock against
 124		 * the parent->perf_event_mutex usage from
 125		 * perf_event_task_{en,dis}able().
 126		 *
 127		 * Specifically, inherited events will never occur on
 128		 * ->perf_event_list.
 129		 */
 130		mutex_lock_nested(tsk_mtx, SINGLE_DEPTH_NESTING);
 131		percpu_down_read(&bp_cpuinfo_sem);
 132	} else {
 133		percpu_down_write(&bp_cpuinfo_sem);
 134	}
 135
 136	return tsk_mtx;
 137}
 138
 139static void bp_constraints_unlock(struct mutex *tsk_mtx)
 140{
 141	if (tsk_mtx) {
 142		percpu_up_read(&bp_cpuinfo_sem);
 143		mutex_unlock(tsk_mtx);
 144	} else {
 145		percpu_up_write(&bp_cpuinfo_sem);
 146	}
 147}
 148
 149static bool bp_constraints_is_locked(struct perf_event *bp)
 150{
 151	struct mutex *tsk_mtx = get_task_bps_mutex(bp);
 152
 153	return percpu_is_write_locked(&bp_cpuinfo_sem) ||
 154	       (tsk_mtx ? mutex_is_locked(tsk_mtx) :
 155			  percpu_is_read_locked(&bp_cpuinfo_sem));
 156}
 157
 158static inline void assert_bp_constraints_lock_held(struct perf_event *bp)
 159{
 160	struct mutex *tsk_mtx = get_task_bps_mutex(bp);
 161
 162	if (tsk_mtx)
 163		lockdep_assert_held(tsk_mtx);
 164	lockdep_assert_held(&bp_cpuinfo_sem);
 165}
 166
 167#ifdef hw_breakpoint_slots
 168/*
 169 * Number of breakpoint slots is constant, and the same for all types.
 170 */
 171static_assert(hw_breakpoint_slots(TYPE_INST) == hw_breakpoint_slots(TYPE_DATA));
 172static inline int hw_breakpoint_slots_cached(int type)	{ return hw_breakpoint_slots(type); }
 173static inline int init_breakpoint_slots(void)		{ return 0; }
 174#else
 175/*
 176 * Dynamic number of breakpoint slots.
 177 */
 178static int __nr_bp_slots[TYPE_MAX] __ro_after_init;
 179
 180static inline int hw_breakpoint_slots_cached(int type)
 181{
 182	return __nr_bp_slots[type];
 183}
 184
 185static __init bool
 186bp_slots_histogram_alloc(struct bp_slots_histogram *hist, enum bp_type_idx type)
 187{
 188	hist->count = kcalloc(hw_breakpoint_slots_cached(type), sizeof(*hist->count), GFP_KERNEL);
 189	return hist->count;
 190}
 191
 192static __init void bp_slots_histogram_free(struct bp_slots_histogram *hist)
 193{
 194	kfree(hist->count);
 195}
 196
 197static __init int init_breakpoint_slots(void)
 198{
 199	int i, cpu, err_cpu;
 200
 201	for (i = 0; i < TYPE_MAX; i++)
 202		__nr_bp_slots[i] = hw_breakpoint_slots(i);
 203
 204	for_each_possible_cpu(cpu) {
 205		for (i = 0; i < TYPE_MAX; i++) {
 206			struct bp_cpuinfo *info = get_bp_info(cpu, i);
 207
 208			if (!bp_slots_histogram_alloc(&info->tsk_pinned, i))
 209				goto err;
 210		}
 211	}
 212	for (i = 0; i < TYPE_MAX; i++) {
 213		if (!bp_slots_histogram_alloc(&cpu_pinned[i], i))
 214			goto err;
 215		if (!bp_slots_histogram_alloc(&tsk_pinned_all[i], i))
 216			goto err;
 217	}
 218
 219	return 0;
 220err:
 221	for_each_possible_cpu(err_cpu) {
 222		for (i = 0; i < TYPE_MAX; i++)
 223			bp_slots_histogram_free(&get_bp_info(err_cpu, i)->tsk_pinned);
 224		if (err_cpu == cpu)
 225			break;
 226	}
 227	for (i = 0; i < TYPE_MAX; i++) {
 228		bp_slots_histogram_free(&cpu_pinned[i]);
 229		bp_slots_histogram_free(&tsk_pinned_all[i]);
 230	}
 231
 232	return -ENOMEM;
 233}
 234#endif
 235
 236static inline void
 237bp_slots_histogram_add(struct bp_slots_histogram *hist, int old, int val)
 238{
 239	const int old_idx = old - 1;
 240	const int new_idx = old_idx + val;
 241
 242	if (old_idx >= 0)
 243		WARN_ON(atomic_dec_return_relaxed(&hist->count[old_idx]) < 0);
 244	if (new_idx >= 0)
 245		WARN_ON(atomic_inc_return_relaxed(&hist->count[new_idx]) < 0);
 246}
 247
 248static int
 249bp_slots_histogram_max(struct bp_slots_histogram *hist, enum bp_type_idx type)
 250{
 251	for (int i = hw_breakpoint_slots_cached(type) - 1; i >= 0; i--) {
 252		const int count = atomic_read(&hist->count[i]);
 253
 254		/* Catch unexpected writers; we want a stable snapshot. */
 255		ASSERT_EXCLUSIVE_WRITER(hist->count[i]);
 256		if (count > 0)
 257			return i + 1;
 258		WARN(count < 0, "inconsistent breakpoint slots histogram");
 259	}
 260
 261	return 0;
 262}
 263
 264static int
 265bp_slots_histogram_max_merge(struct bp_slots_histogram *hist1, struct bp_slots_histogram *hist2,
 266			     enum bp_type_idx type)
 267{
 268	for (int i = hw_breakpoint_slots_cached(type) - 1; i >= 0; i--) {
 269		const int count1 = atomic_read(&hist1->count[i]);
 270		const int count2 = atomic_read(&hist2->count[i]);
 271
 272		/* Catch unexpected writers; we want a stable snapshot. */
 273		ASSERT_EXCLUSIVE_WRITER(hist1->count[i]);
 274		ASSERT_EXCLUSIVE_WRITER(hist2->count[i]);
 275		if (count1 + count2 > 0)
 276			return i + 1;
 277		WARN(count1 < 0, "inconsistent breakpoint slots histogram");
 278		WARN(count2 < 0, "inconsistent breakpoint slots histogram");
 279	}
 280
 281	return 0;
 282}
 283
 284#ifndef hw_breakpoint_weight
 285static inline int hw_breakpoint_weight(struct perf_event *bp)
 286{
 287	return 1;
 288}
 289#endif
 290
 291static inline enum bp_type_idx find_slot_idx(u64 bp_type)
 292{
 293	if (bp_type & HW_BREAKPOINT_RW)
 294		return TYPE_DATA;
 295
 296	return TYPE_INST;
 297}
 298
 299/*
 300 * Return the maximum number of pinned breakpoints a task has in this CPU.
 
 301 */
 302static unsigned int max_task_bp_pinned(int cpu, enum bp_type_idx type)
 303{
 304	struct bp_slots_histogram *tsk_pinned = &get_bp_info(cpu, type)->tsk_pinned;
 
 305
 306	/*
 307	 * At this point we want to have acquired the bp_cpuinfo_sem as a
 308	 * writer to ensure that there are no concurrent writers in
 309	 * toggle_bp_task_slot() to tsk_pinned, and we get a stable snapshot.
 310	 */
 311	lockdep_assert_held_write(&bp_cpuinfo_sem);
 312	return bp_slots_histogram_max_merge(tsk_pinned, &tsk_pinned_all[type], type);
 313}
 314
 315/*
 316 * Count the number of breakpoints of the same type and same task.
 317 * The given event must be not on the list.
 318 *
 319 * If @cpu is -1, but the result of task_bp_pinned() is not CPU-independent,
 320 * returns a negative value.
 321 */
 322static int task_bp_pinned(int cpu, struct perf_event *bp, enum bp_type_idx type)
 323{
 324	struct rhlist_head *head, *pos;
 325	struct perf_event *iter;
 326	int count = 0;
 327
 328	/*
 329	 * We need a stable snapshot of the per-task breakpoint list.
 330	 */
 331	assert_bp_constraints_lock_held(bp);
 332
 333	rcu_read_lock();
 334	head = rhltable_lookup(&task_bps_ht, &bp->hw.target, task_bps_ht_params);
 335	if (!head)
 336		goto out;
 337
 338	rhl_for_each_entry_rcu(iter, pos, head, hw.bp_list) {
 339		if (find_slot_idx(iter->attr.bp_type) != type)
 340			continue;
 341
 342		if (iter->cpu >= 0) {
 343			if (cpu == -1) {
 344				count = -1;
 345				goto out;
 346			} else if (cpu != iter->cpu)
 347				continue;
 348		}
 349
 350		count += hw_breakpoint_weight(iter);
 351	}
 352
 353out:
 354	rcu_read_unlock();
 355	return count;
 356}
 357
 358static const struct cpumask *cpumask_of_bp(struct perf_event *bp)
 359{
 360	if (bp->cpu >= 0)
 361		return cpumask_of(bp->cpu);
 362	return cpu_possible_mask;
 363}
 364
 365/*
 366 * Returns the max pinned breakpoint slots in a given
 367 * CPU (cpu > -1) or across all of them (cpu = -1).
 368 */
 369static int
 370max_bp_pinned_slots(struct perf_event *bp, enum bp_type_idx type)
 
 371{
 372	const struct cpumask *cpumask = cpumask_of_bp(bp);
 373	int pinned_slots = 0;
 374	int cpu;
 375
 376	if (bp->hw.target && bp->cpu < 0) {
 377		int max_pinned = task_bp_pinned(-1, bp, type);
 378
 379		if (max_pinned >= 0) {
 380			/*
 381			 * Fast path: task_bp_pinned() is CPU-independent and
 382			 * returns the same value for any CPU.
 383			 */
 384			max_pinned += bp_slots_histogram_max(&cpu_pinned[type], type);
 385			return max_pinned;
 386		}
 387	}
 388
 389	for_each_cpu(cpu, cpumask) {
 390		struct bp_cpuinfo *info = get_bp_info(cpu, type);
 391		int nr;
 392
 393		nr = info->cpu_pinned;
 394		if (!bp->hw.target)
 395			nr += max_task_bp_pinned(cpu, type);
 396		else
 397			nr += task_bp_pinned(cpu, bp, type);
 398
 399		pinned_slots = max(nr, pinned_slots);
 
 
 
 
 
 400	}
 
 401
 402	return pinned_slots;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 403}
 404
 405/*
 406 * Add/remove the given breakpoint in our constraint table
 407 */
 408static int
 409toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type, int weight)
 
 410{
 411	int cpu, next_tsk_pinned;
 
 412
 413	if (!enable)
 414		weight = -weight;
 415
 
 416	if (!bp->hw.target) {
 417		/*
 418		 * Update the pinned CPU slots, in per-CPU bp_cpuinfo and in the
 419		 * global histogram.
 420		 */
 421		struct bp_cpuinfo *info = get_bp_info(bp->cpu, type);
 422
 423		lockdep_assert_held_write(&bp_cpuinfo_sem);
 424		bp_slots_histogram_add(&cpu_pinned[type], info->cpu_pinned, weight);
 425		info->cpu_pinned += weight;
 426		return 0;
 427	}
 428
 429	/*
 430	 * If bp->hw.target, tsk_pinned is only modified, but not used
 431	 * otherwise. We can permit concurrent updates as long as there are no
 432	 * other uses: having acquired bp_cpuinfo_sem as a reader allows
 433	 * concurrent updates here. Uses of tsk_pinned will require acquiring
 434	 * bp_cpuinfo_sem as a writer to stabilize tsk_pinned's value.
 435	 */
 436	lockdep_assert_held_read(&bp_cpuinfo_sem);
 437
 438	/*
 439	 * Update the pinned task slots, in per-CPU bp_cpuinfo and in the global
 440	 * histogram. We need to take care of 4 cases:
 441	 *
 442	 *  1. This breakpoint targets all CPUs (cpu < 0), and there may only
 443	 *     exist other task breakpoints targeting all CPUs. In this case we
 444	 *     can simply update the global slots histogram.
 445	 *
 446	 *  2. This breakpoint targets a specific CPU (cpu >= 0), but there may
 447	 *     only exist other task breakpoints targeting all CPUs.
 448	 *
 449	 *     a. On enable: remove the existing breakpoints from the global
 450	 *        slots histogram and use the per-CPU histogram.
 451	 *
 452	 *     b. On disable: re-insert the existing breakpoints into the global
 453	 *        slots histogram and remove from per-CPU histogram.
 454	 *
 455	 *  3. Some other existing task breakpoints target specific CPUs. Only
 456	 *     update the per-CPU slots histogram.
 457	 */
 458
 459	if (!enable) {
 460		/*
 461		 * Remove before updating histograms so we can determine if this
 462		 * was the last task breakpoint for a specific CPU.
 463		 */
 464		int ret = rhltable_remove(&task_bps_ht, &bp->hw.bp_list, task_bps_ht_params);
 465
 466		if (ret)
 467			return ret;
 468	}
 469	/*
 470	 * Note: If !enable, next_tsk_pinned will not count the to-be-removed breakpoint.
 471	 */
 472	next_tsk_pinned = task_bp_pinned(-1, bp, type);
 473
 474	if (next_tsk_pinned >= 0) {
 475		if (bp->cpu < 0) { /* Case 1: fast path */
 476			if (!enable)
 477				next_tsk_pinned += hw_breakpoint_weight(bp);
 478			bp_slots_histogram_add(&tsk_pinned_all[type], next_tsk_pinned, weight);
 479		} else if (enable) { /* Case 2.a: slow path */
 480			/* Add existing to per-CPU histograms. */
 481			for_each_possible_cpu(cpu) {
 482				bp_slots_histogram_add(&get_bp_info(cpu, type)->tsk_pinned,
 483						       0, next_tsk_pinned);
 484			}
 485			/* Add this first CPU-pinned task breakpoint. */
 486			bp_slots_histogram_add(&get_bp_info(bp->cpu, type)->tsk_pinned,
 487					       next_tsk_pinned, weight);
 488			/* Rebalance global task pinned histogram. */
 489			bp_slots_histogram_add(&tsk_pinned_all[type], next_tsk_pinned,
 490					       -next_tsk_pinned);
 491		} else { /* Case 2.b: slow path */
 492			/* Remove this last CPU-pinned task breakpoint. */
 493			bp_slots_histogram_add(&get_bp_info(bp->cpu, type)->tsk_pinned,
 494					       next_tsk_pinned + hw_breakpoint_weight(bp), weight);
 495			/* Remove all from per-CPU histograms. */
 496			for_each_possible_cpu(cpu) {
 497				bp_slots_histogram_add(&get_bp_info(cpu, type)->tsk_pinned,
 498						       next_tsk_pinned, -next_tsk_pinned);
 499			}
 500			/* Rebalance global task pinned histogram. */
 501			bp_slots_histogram_add(&tsk_pinned_all[type], 0, next_tsk_pinned);
 502		}
 503	} else { /* Case 3: slow path */
 504		const struct cpumask *cpumask = cpumask_of_bp(bp);
 505
 506		for_each_cpu(cpu, cpumask) {
 507			next_tsk_pinned = task_bp_pinned(cpu, bp, type);
 508			if (!enable)
 509				next_tsk_pinned += hw_breakpoint_weight(bp);
 510			bp_slots_histogram_add(&get_bp_info(cpu, type)->tsk_pinned,
 511					       next_tsk_pinned, weight);
 512		}
 513	}
 514
 
 
 
 
 
 515	/*
 516	 * Readers want a stable snapshot of the per-task breakpoint list.
 
 517	 */
 518	assert_bp_constraints_lock_held(bp);
 519
 520	if (enable)
 521		return rhltable_insert(&task_bps_ht, &bp->hw.bp_list, task_bps_ht_params);
 522
 523	return 0;
 524}
 525
 526/*
 527 * Constraints to check before allowing this new breakpoint counter.
 528 *
 529 * Note: Flexible breakpoints are currently unimplemented, but outlined in the
 530 * below algorithm for completeness.  The implementation treats flexible as
 531 * pinned due to no guarantee that we currently always schedule flexible events
 532 * before a pinned event in a same CPU.
 533 *
 534 *  == Non-pinned counter == (Considered as pinned for now)
 535 *
 536 *   - If attached to a single cpu, check:
 537 *
 538 *       (per_cpu(info->flexible, cpu) || (per_cpu(info->cpu_pinned, cpu)
 539 *           + max(per_cpu(info->tsk_pinned, cpu)))) < HBP_NUM
 540 *
 541 *       -> If there are already non-pinned counters in this cpu, it means
 542 *          there is already a free slot for them.
 543 *          Otherwise, we check that the maximum number of per task
 544 *          breakpoints (for this cpu) plus the number of per cpu breakpoint
 545 *          (for this cpu) doesn't cover every registers.
 546 *
 547 *   - If attached to every cpus, check:
 548 *
 549 *       (per_cpu(info->flexible, *) || (max(per_cpu(info->cpu_pinned, *))
 550 *           + max(per_cpu(info->tsk_pinned, *)))) < HBP_NUM
 551 *
 552 *       -> This is roughly the same, except we check the number of per cpu
 553 *          bp for every cpu and we keep the max one. Same for the per tasks
 554 *          breakpoints.
 555 *
 556 *
 557 * == Pinned counter ==
 558 *
 559 *   - If attached to a single cpu, check:
 560 *
 561 *       ((per_cpu(info->flexible, cpu) > 1) + per_cpu(info->cpu_pinned, cpu)
 562 *            + max(per_cpu(info->tsk_pinned, cpu))) < HBP_NUM
 563 *
 564 *       -> Same checks as before. But now the info->flexible, if any, must keep
 565 *          one register at least (or they will never be fed).
 566 *
 567 *   - If attached to every cpus, check:
 568 *
 569 *       ((per_cpu(info->flexible, *) > 1) + max(per_cpu(info->cpu_pinned, *))
 570 *            + max(per_cpu(info->tsk_pinned, *))) < HBP_NUM
 571 */
 572static int __reserve_bp_slot(struct perf_event *bp, u64 bp_type)
 573{
 
 574	enum bp_type_idx type;
 575	int max_pinned_slots;
 576	int weight;
 577
 578	/* We couldn't initialize breakpoint constraints on boot */
 579	if (!constraints_initialized)
 580		return -ENOMEM;
 581
 582	/* Basic checks */
 583	if (bp_type == HW_BREAKPOINT_EMPTY ||
 584	    bp_type == HW_BREAKPOINT_INVALID)
 585		return -EINVAL;
 586
 587	type = find_slot_idx(bp_type);
 588	weight = hw_breakpoint_weight(bp);
 589
 590	/* Check if this new breakpoint can be satisfied across all CPUs. */
 591	max_pinned_slots = max_bp_pinned_slots(bp, type) + weight;
 592	if (max_pinned_slots > hw_breakpoint_slots_cached(type))
 
 
 
 
 
 
 593		return -ENOSPC;
 594
 595	return toggle_bp_slot(bp, true, type, weight);
 
 
 596}
 597
 598int reserve_bp_slot(struct perf_event *bp)
 599{
 600	struct mutex *mtx = bp_constraints_lock(bp);
 601	int ret = __reserve_bp_slot(bp, bp->attr.bp_type);
 
 
 
 
 
 602
 603	bp_constraints_unlock(mtx);
 604	return ret;
 605}
 606
 607static void __release_bp_slot(struct perf_event *bp, u64 bp_type)
 608{
 609	enum bp_type_idx type;
 610	int weight;
 611
 612	type = find_slot_idx(bp_type);
 613	weight = hw_breakpoint_weight(bp);
 614	WARN_ON(toggle_bp_slot(bp, false, type, weight));
 615}
 616
 617void release_bp_slot(struct perf_event *bp)
 618{
 619	struct mutex *mtx = bp_constraints_lock(bp);
 620
 
 621	__release_bp_slot(bp, bp->attr.bp_type);
 622	bp_constraints_unlock(mtx);
 
 623}
 624
 625static int __modify_bp_slot(struct perf_event *bp, u64 old_type, u64 new_type)
 626{
 627	int err;
 628
 629	__release_bp_slot(bp, old_type);
 630
 631	err = __reserve_bp_slot(bp, new_type);
 632	if (err) {
 633		/*
 634		 * Reserve the old_type slot back in case
 635		 * there's no space for the new type.
 636		 *
 637		 * This must succeed, because we just released
 638		 * the old_type slot in the __release_bp_slot
 639		 * call above. If not, something is broken.
 640		 */
 641		WARN_ON(__reserve_bp_slot(bp, old_type));
 642	}
 643
 644	return err;
 645}
 646
 647static int modify_bp_slot(struct perf_event *bp, u64 old_type, u64 new_type)
 648{
 649	struct mutex *mtx = bp_constraints_lock(bp);
 650	int ret = __modify_bp_slot(bp, old_type, new_type);
 651
 652	bp_constraints_unlock(mtx);
 
 
 653	return ret;
 654}
 655
 656/*
 657 * Allow the kernel debugger to reserve breakpoint slots without
 658 * taking a lock using the dbg_* variant of for the reserve and
 659 * release breakpoint slots.
 660 */
 661int dbg_reserve_bp_slot(struct perf_event *bp)
 662{
 663	int ret;
 664
 665	if (bp_constraints_is_locked(bp))
 666		return -1;
 667
 668	/* Locks aren't held; disable lockdep assert checking. */
 669	lockdep_off();
 670	ret = __reserve_bp_slot(bp, bp->attr.bp_type);
 671	lockdep_on();
 672
 673	return ret;
 674}
 675
 676int dbg_release_bp_slot(struct perf_event *bp)
 677{
 678	if (bp_constraints_is_locked(bp))
 679		return -1;
 680
 681	/* Locks aren't held; disable lockdep assert checking. */
 682	lockdep_off();
 683	__release_bp_slot(bp, bp->attr.bp_type);
 684	lockdep_on();
 685
 686	return 0;
 687}
 688
 689static int hw_breakpoint_parse(struct perf_event *bp,
 690			       const struct perf_event_attr *attr,
 691			       struct arch_hw_breakpoint *hw)
 692{
 693	int err;
 694
 695	err = hw_breakpoint_arch_parse(bp, attr, hw);
 696	if (err)
 697		return err;
 698
 699	if (arch_check_bp_in_kernelspace(hw)) {
 700		if (attr->exclude_kernel)
 701			return -EINVAL;
 702		/*
 703		 * Don't let unprivileged users set a breakpoint in the trap
 704		 * path to avoid trap recursion attacks.
 705		 */
 706		if (!capable(CAP_SYS_ADMIN))
 707			return -EPERM;
 708	}
 709
 710	return 0;
 711}
 712
 713int register_perf_hw_breakpoint(struct perf_event *bp)
 714{
 715	struct arch_hw_breakpoint hw = { };
 716	int err;
 717
 718	err = reserve_bp_slot(bp);
 719	if (err)
 720		return err;
 721
 722	err = hw_breakpoint_parse(bp, &bp->attr, &hw);
 723	if (err) {
 724		release_bp_slot(bp);
 725		return err;
 726	}
 727
 728	bp->hw.info = hw;
 729
 730	return 0;
 731}
 732
 733/**
 734 * register_user_hw_breakpoint - register a hardware breakpoint for user space
 735 * @attr: breakpoint attributes
 736 * @triggered: callback to trigger when we hit the breakpoint
 737 * @context: context data could be used in the triggered callback
 738 * @tsk: pointer to 'task_struct' of the process to which the address belongs
 739 */
 740struct perf_event *
 741register_user_hw_breakpoint(struct perf_event_attr *attr,
 742			    perf_overflow_handler_t triggered,
 743			    void *context,
 744			    struct task_struct *tsk)
 745{
 746	return perf_event_create_kernel_counter(attr, -1, tsk, triggered,
 747						context);
 748}
 749EXPORT_SYMBOL_GPL(register_user_hw_breakpoint);
 750
 751static void hw_breakpoint_copy_attr(struct perf_event_attr *to,
 752				    struct perf_event_attr *from)
 753{
 754	to->bp_addr = from->bp_addr;
 755	to->bp_type = from->bp_type;
 756	to->bp_len  = from->bp_len;
 757	to->disabled = from->disabled;
 758}
 759
 760int
 761modify_user_hw_breakpoint_check(struct perf_event *bp, struct perf_event_attr *attr,
 762			        bool check)
 763{
 764	struct arch_hw_breakpoint hw = { };
 765	int err;
 766
 767	err = hw_breakpoint_parse(bp, attr, &hw);
 768	if (err)
 769		return err;
 770
 771	if (check) {
 772		struct perf_event_attr old_attr;
 773
 774		old_attr = bp->attr;
 775		hw_breakpoint_copy_attr(&old_attr, attr);
 776		if (memcmp(&old_attr, attr, sizeof(*attr)))
 777			return -EINVAL;
 778	}
 779
 780	if (bp->attr.bp_type != attr->bp_type) {
 781		err = modify_bp_slot(bp, bp->attr.bp_type, attr->bp_type);
 782		if (err)
 783			return err;
 784	}
 785
 786	hw_breakpoint_copy_attr(&bp->attr, attr);
 787	bp->hw.info = hw;
 788
 789	return 0;
 790}
 791
 792/**
 793 * modify_user_hw_breakpoint - modify a user-space hardware breakpoint
 794 * @bp: the breakpoint structure to modify
 795 * @attr: new breakpoint attributes
 796 */
 797int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr)
 798{
 799	int err;
 800
 801	/*
 802	 * modify_user_hw_breakpoint can be invoked with IRQs disabled and hence it
 803	 * will not be possible to raise IPIs that invoke __perf_event_disable.
 804	 * So call the function directly after making sure we are targeting the
 805	 * current task.
 806	 */
 807	if (irqs_disabled() && bp->ctx && bp->ctx->task == current)
 808		perf_event_disable_local(bp);
 809	else
 810		perf_event_disable(bp);
 811
 812	err = modify_user_hw_breakpoint_check(bp, attr, false);
 813
 814	if (!bp->attr.disabled)
 815		perf_event_enable(bp);
 816
 817	return err;
 818}
 819EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint);
 820
 821/**
 822 * unregister_hw_breakpoint - unregister a user-space hardware breakpoint
 823 * @bp: the breakpoint structure to unregister
 824 */
 825void unregister_hw_breakpoint(struct perf_event *bp)
 826{
 827	if (!bp)
 828		return;
 829	perf_event_release_kernel(bp);
 830}
 831EXPORT_SYMBOL_GPL(unregister_hw_breakpoint);
 832
 833/**
 834 * register_wide_hw_breakpoint - register a wide breakpoint in the kernel
 835 * @attr: breakpoint attributes
 836 * @triggered: callback to trigger when we hit the breakpoint
 837 * @context: context data could be used in the triggered callback
 838 *
 839 * @return a set of per_cpu pointers to perf events
 840 */
 841struct perf_event * __percpu *
 842register_wide_hw_breakpoint(struct perf_event_attr *attr,
 843			    perf_overflow_handler_t triggered,
 844			    void *context)
 845{
 846	struct perf_event * __percpu *cpu_events, *bp;
 847	long err = 0;
 848	int cpu;
 849
 850	cpu_events = alloc_percpu(typeof(*cpu_events));
 851	if (!cpu_events)
 852		return ERR_PTR_PCPU(-ENOMEM);
 853
 854	cpus_read_lock();
 855	for_each_online_cpu(cpu) {
 856		bp = perf_event_create_kernel_counter(attr, cpu, NULL,
 857						      triggered, context);
 858		if (IS_ERR(bp)) {
 859			err = PTR_ERR(bp);
 860			break;
 861		}
 862
 863		per_cpu(*cpu_events, cpu) = bp;
 864	}
 865	cpus_read_unlock();
 866
 867	if (likely(!err))
 868		return cpu_events;
 869
 870	unregister_wide_hw_breakpoint(cpu_events);
 871	return ERR_PTR_PCPU(err);
 872}
 873EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint);
 874
 875/**
 876 * unregister_wide_hw_breakpoint - unregister a wide breakpoint in the kernel
 877 * @cpu_events: the per cpu set of events to unregister
 878 */
 879void unregister_wide_hw_breakpoint(struct perf_event * __percpu *cpu_events)
 880{
 881	int cpu;
 882
 883	for_each_possible_cpu(cpu)
 884		unregister_hw_breakpoint(per_cpu(*cpu_events, cpu));
 885
 886	free_percpu(cpu_events);
 887}
 888EXPORT_SYMBOL_GPL(unregister_wide_hw_breakpoint);
 889
 890/**
 891 * hw_breakpoint_is_used - check if breakpoints are currently used
 892 *
 893 * Returns: true if breakpoints are used, false otherwise.
 894 */
 895bool hw_breakpoint_is_used(void)
 896{
 897	int cpu;
 898
 899	if (!constraints_initialized)
 900		return false;
 901
 902	for_each_possible_cpu(cpu) {
 903		for (int type = 0; type < TYPE_MAX; ++type) {
 904			struct bp_cpuinfo *info = get_bp_info(cpu, type);
 905
 906			if (info->cpu_pinned)
 907				return true;
 908
 909			for (int slot = 0; slot < hw_breakpoint_slots_cached(type); ++slot) {
 910				if (atomic_read(&info->tsk_pinned.count[slot]))
 911					return true;
 912			}
 913		}
 914	}
 915
 916	for (int type = 0; type < TYPE_MAX; ++type) {
 917		for (int slot = 0; slot < hw_breakpoint_slots_cached(type); ++slot) {
 918			/*
 919			 * Warn, because if there are CPU pinned counters,
 920			 * should never get here; bp_cpuinfo::cpu_pinned should
 921			 * be consistent with the global cpu_pinned histogram.
 922			 */
 923			if (WARN_ON(atomic_read(&cpu_pinned[type].count[slot])))
 924				return true;
 925
 926			if (atomic_read(&tsk_pinned_all[type].count[slot]))
 927				return true;
 928		}
 929	}
 930
 931	return false;
 932}
 933
 934static struct notifier_block hw_breakpoint_exceptions_nb = {
 935	.notifier_call = hw_breakpoint_exceptions_notify,
 936	/* we need to be notified first */
 937	.priority = 0x7fffffff
 938};
 939
 940static void bp_perf_event_destroy(struct perf_event *event)
 941{
 942	release_bp_slot(event);
 943}
 944
 945static int hw_breakpoint_event_init(struct perf_event *bp)
 946{
 947	int err;
 948
 949	if (bp->attr.type != PERF_TYPE_BREAKPOINT)
 950		return -ENOENT;
 951
 952	/*
 953	 * no branch sampling for breakpoint events
 954	 */
 955	if (has_branch_stack(bp))
 956		return -EOPNOTSUPP;
 957
 958	err = register_perf_hw_breakpoint(bp);
 959	if (err)
 960		return err;
 961
 962	bp->destroy = bp_perf_event_destroy;
 963
 964	return 0;
 965}
 966
 967static int hw_breakpoint_add(struct perf_event *bp, int flags)
 968{
 969	if (!(flags & PERF_EF_START))
 970		bp->hw.state = PERF_HES_STOPPED;
 971
 972	if (is_sampling_event(bp)) {
 973		bp->hw.last_period = bp->hw.sample_period;
 974		perf_swevent_set_period(bp);
 975	}
 976
 977	return arch_install_hw_breakpoint(bp);
 978}
 979
 980static void hw_breakpoint_del(struct perf_event *bp, int flags)
 981{
 982	arch_uninstall_hw_breakpoint(bp);
 983}
 984
 985static void hw_breakpoint_start(struct perf_event *bp, int flags)
 986{
 987	bp->hw.state = 0;
 988}
 989
 990static void hw_breakpoint_stop(struct perf_event *bp, int flags)
 991{
 992	bp->hw.state = PERF_HES_STOPPED;
 993}
 994
 995static struct pmu perf_breakpoint = {
 996	.task_ctx_nr	= perf_sw_context, /* could eventually get its own */
 997
 998	.event_init	= hw_breakpoint_event_init,
 999	.add		= hw_breakpoint_add,
1000	.del		= hw_breakpoint_del,
1001	.start		= hw_breakpoint_start,
1002	.stop		= hw_breakpoint_stop,
1003	.read		= hw_breakpoint_pmu_read,
1004};
1005
1006int __init init_hw_breakpoint(void)
1007{
1008	int ret;
 
 
 
 
 
 
 
 
1009
1010	ret = rhltable_init(&task_bps_ht, &task_bps_ht_params);
1011	if (ret)
1012		return ret;
1013
1014	ret = init_breakpoint_slots();
1015	if (ret)
1016		return ret;
1017
1018	constraints_initialized = true;
1019
1020	perf_pmu_register(&perf_breakpoint, "breakpoint", PERF_TYPE_BREAKPOINT);
1021
1022	return register_die_notifier(&hw_breakpoint_exceptions_nb);
 
 
 
 
 
 
 
 
 
 
1023}