Linux Audio

Check our new training course

Loading...
v4.6
 
  1/*
  2 * This program is free software; you can redistribute it and/or modify
  3 * it under the terms of the GNU General Public License as published by
  4 * the Free Software Foundation; either version 2 of the License, or
  5 * (at your option) any later version.
  6 *
  7 * This program is distributed in the hope that it will be useful,
  8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 10 * GNU General Public License for more details.
 11 *
 12 * You should have received a copy of the GNU General Public License
 13 * along with this program; if not, write to the Free Software
 14 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 15 *
 16 * Copyright (C) 2007 Alan Stern
 17 * Copyright (C) IBM Corporation, 2009
 18 * Copyright (C) 2009, Frederic Weisbecker <fweisbec@gmail.com>
 19 *
 20 * Thanks to Ingo Molnar for his many suggestions.
 21 *
 22 * Authors: Alan Stern <stern@rowland.harvard.edu>
 23 *          K.Prasad <prasad@linux.vnet.ibm.com>
 24 *          Frederic Weisbecker <fweisbec@gmail.com>
 25 */
 26
 27/*
 28 * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility,
 29 * using the CPU's debug registers.
 30 * This file contains the arch-independent routines.
 31 */
 32
 
 
 
 
 
 
 
 33#include <linux/irqflags.h>
 34#include <linux/kallsyms.h>
 35#include <linux/notifier.h>
 36#include <linux/kprobes.h>
 37#include <linux/kdebug.h>
 38#include <linux/kernel.h>
 39#include <linux/module.h>
 
 
 40#include <linux/percpu.h>
 
 41#include <linux/sched.h>
 42#include <linux/init.h>
 43#include <linux/slab.h>
 44#include <linux/list.h>
 45#include <linux/cpu.h>
 46#include <linux/smp.h>
 47
 48#include <linux/hw_breakpoint.h>
 49/*
 50 * Constraints data
 
 
 
 
 
 
 
 
 
 
 
 
 51 */
 52struct bp_cpuinfo {
 53	/* Number of pinned cpu breakpoints in a cpu */
 54	unsigned int	cpu_pinned;
 55	/* tsk_pinned[n] is the number of tasks having n+1 breakpoints */
 56	unsigned int	*tsk_pinned;
 57	/* Number of non-pinned cpu/task breakpoints in a cpu */
 58	unsigned int	flexible; /* XXX: placeholder, see fetch_this_slot() */
 59};
 60
 61static DEFINE_PER_CPU(struct bp_cpuinfo, bp_cpuinfo[TYPE_MAX]);
 62static int nr_slots[TYPE_MAX];
 63
 64static struct bp_cpuinfo *get_bp_info(int cpu, enum bp_type_idx type)
 65{
 66	return per_cpu_ptr(bp_cpuinfo + type, cpu);
 67}
 68
 
 
 
 
 
 69/* Keep track of the breakpoints attached to tasks */
 70static LIST_HEAD(bp_task_head);
 
 
 
 
 
 
 71
 72static int constraints_initialized;
 73
 74/* Gather the number of total pinned and un-pinned bp in a cpuset */
 75struct bp_busy_slots {
 76	unsigned int pinned;
 77	unsigned int flexible;
 78};
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 79
 80/* Serialize accesses to the above constraints */
 81static DEFINE_MUTEX(nr_bp_mutex);
 82
 83__weak int hw_breakpoint_weight(struct perf_event *bp)
 84{
 85	return 1;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 86}
 87
 88static inline enum bp_type_idx find_slot_idx(struct perf_event *bp)
 89{
 90	if (bp->attr.bp_type & HW_BREAKPOINT_RW)
 91		return TYPE_DATA;
 
 
 
 
 
 92
 93	return TYPE_INST;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 94}
 95
 
 96/*
 97 * Report the maximum number of pinned breakpoints a task
 98 * have in this cpu
 99 */
100static unsigned int max_task_bp_pinned(int cpu, enum bp_type_idx type)
 
 
 
 
 
 
 
 
 
101{
102	unsigned int *tsk_pinned = get_bp_info(cpu, type)->tsk_pinned;
103	int i;
 
 
 
 
 
 
 
104
105	for (i = nr_slots[type] - 1; i >= 0; i--) {
106		if (tsk_pinned[i] > 0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107			return i + 1;
 
108	}
109
110	return 0;
111}
112
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113/*
114 * Count the number of breakpoints of the same type and same task.
115 * The given event must be not on the list.
 
 
 
116 */
117static int task_bp_pinned(int cpu, struct perf_event *bp, enum bp_type_idx type)
118{
119	struct task_struct *tsk = bp->hw.target;
120	struct perf_event *iter;
121	int count = 0;
122
123	list_for_each_entry(iter, &bp_task_head, hw.bp_list) {
124		if (iter->hw.target == tsk &&
125		    find_slot_idx(iter) == type &&
126		    (iter->cpu < 0 || cpu == iter->cpu))
127			count += hw_breakpoint_weight(iter);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128	}
129
 
 
130	return count;
131}
132
133static const struct cpumask *cpumask_of_bp(struct perf_event *bp)
134{
135	if (bp->cpu >= 0)
136		return cpumask_of(bp->cpu);
137	return cpu_possible_mask;
138}
139
140/*
141 * Report the number of pinned/un-pinned breakpoints we have in
142 * a given cpu (cpu > -1) or in all of them (cpu = -1).
143 */
144static void
145fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp,
146		    enum bp_type_idx type)
147{
148	const struct cpumask *cpumask = cpumask_of_bp(bp);
 
149	int cpu;
150
 
 
 
 
 
 
 
 
 
 
 
 
 
151	for_each_cpu(cpu, cpumask) {
152		struct bp_cpuinfo *info = get_bp_info(cpu, type);
153		int nr;
154
155		nr = info->cpu_pinned;
156		if (!bp->hw.target)
157			nr += max_task_bp_pinned(cpu, type);
158		else
159			nr += task_bp_pinned(cpu, bp, type);
160
161		if (nr > slots->pinned)
162			slots->pinned = nr;
163
164		nr = info->flexible;
165		if (nr > slots->flexible)
166			slots->flexible = nr;
167	}
168}
169
170/*
171 * For now, continue to consider flexible as pinned, until we can
172 * ensure no flexible event can ever be scheduled before a pinned event
173 * in a same cpu.
174 */
175static void
176fetch_this_slot(struct bp_busy_slots *slots, int weight)
177{
178	slots->pinned += weight;
179}
180
181/*
182 * Add a pinned breakpoint for the given task in our constraint table
183 */
184static void toggle_bp_task_slot(struct perf_event *bp, int cpu,
185				enum bp_type_idx type, int weight)
186{
187	unsigned int *tsk_pinned = get_bp_info(cpu, type)->tsk_pinned;
188	int old_idx, new_idx;
189
190	old_idx = task_bp_pinned(cpu, bp, type) - 1;
191	new_idx = old_idx + weight;
192
193	if (old_idx >= 0)
194		tsk_pinned[old_idx]--;
195	if (new_idx >= 0)
196		tsk_pinned[new_idx]++;
197}
198
199/*
200 * Add/remove the given breakpoint in our constraint table
201 */
202static void
203toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type,
204	       int weight)
205{
206	const struct cpumask *cpumask = cpumask_of_bp(bp);
207	int cpu;
208
209	if (!enable)
210		weight = -weight;
211
212	/* Pinned counter cpu profiling */
213	if (!bp->hw.target) {
214		get_bp_info(bp->cpu, type)->cpu_pinned += weight;
215		return;
 
 
 
 
 
 
 
 
216	}
217
218	/* Pinned counter task profiling */
219	for_each_cpu(cpu, cpumask)
220		toggle_bp_task_slot(bp, cpu, type, weight);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
221
222	if (enable)
223		list_add_tail(&bp->hw.bp_list, &bp_task_head);
224	else
225		list_del(&bp->hw.bp_list);
 
 
 
 
 
 
 
 
 
226}
227
228/*
229 * Function to perform processor-specific cleanup during unregistration
230 */
231__weak void arch_unregister_hw_breakpoint(struct perf_event *bp)
232{
233	/*
234	 * A weak stub function here for those archs that don't define
235	 * it inside arch/.../kernel/hw_breakpoint.c
236	 */
237}
238
239/*
240 * Contraints to check before allowing this new breakpoint counter:
 
 
 
 
 
241 *
242 *  == Non-pinned counter == (Considered as pinned for now)
243 *
244 *   - If attached to a single cpu, check:
245 *
246 *       (per_cpu(info->flexible, cpu) || (per_cpu(info->cpu_pinned, cpu)
247 *           + max(per_cpu(info->tsk_pinned, cpu)))) < HBP_NUM
248 *
249 *       -> If there are already non-pinned counters in this cpu, it means
250 *          there is already a free slot for them.
251 *          Otherwise, we check that the maximum number of per task
252 *          breakpoints (for this cpu) plus the number of per cpu breakpoint
253 *          (for this cpu) doesn't cover every registers.
254 *
255 *   - If attached to every cpus, check:
256 *
257 *       (per_cpu(info->flexible, *) || (max(per_cpu(info->cpu_pinned, *))
258 *           + max(per_cpu(info->tsk_pinned, *)))) < HBP_NUM
259 *
260 *       -> This is roughly the same, except we check the number of per cpu
261 *          bp for every cpu and we keep the max one. Same for the per tasks
262 *          breakpoints.
263 *
264 *
265 * == Pinned counter ==
266 *
267 *   - If attached to a single cpu, check:
268 *
269 *       ((per_cpu(info->flexible, cpu) > 1) + per_cpu(info->cpu_pinned, cpu)
270 *            + max(per_cpu(info->tsk_pinned, cpu))) < HBP_NUM
271 *
272 *       -> Same checks as before. But now the info->flexible, if any, must keep
273 *          one register at least (or they will never be fed).
274 *
275 *   - If attached to every cpus, check:
276 *
277 *       ((per_cpu(info->flexible, *) > 1) + max(per_cpu(info->cpu_pinned, *))
278 *            + max(per_cpu(info->tsk_pinned, *))) < HBP_NUM
279 */
280static int __reserve_bp_slot(struct perf_event *bp)
281{
282	struct bp_busy_slots slots = {0};
283	enum bp_type_idx type;
 
284	int weight;
 
285
286	/* We couldn't initialize breakpoint constraints on boot */
287	if (!constraints_initialized)
288		return -ENOMEM;
289
290	/* Basic checks */
291	if (bp->attr.bp_type == HW_BREAKPOINT_EMPTY ||
292	    bp->attr.bp_type == HW_BREAKPOINT_INVALID)
293		return -EINVAL;
294
295	type = find_slot_idx(bp);
296	weight = hw_breakpoint_weight(bp);
297
298	fetch_bp_busy_slots(&slots, bp, type);
299	/*
300	 * Simulate the addition of this breakpoint to the constraints
301	 * and see the result.
302	 */
303	fetch_this_slot(&slots, weight);
304
305	/* Flexible counters need to keep at least one slot */
306	if (slots.pinned + (!!slots.flexible) > nr_slots[type])
307		return -ENOSPC;
308
309	toggle_bp_slot(bp, true, type, weight);
 
 
310
311	return 0;
312}
313
314int reserve_bp_slot(struct perf_event *bp)
315{
316	int ret;
317
318	mutex_lock(&nr_bp_mutex);
319
320	ret = __reserve_bp_slot(bp);
321
322	mutex_unlock(&nr_bp_mutex);
323
 
324	return ret;
325}
326
327static void __release_bp_slot(struct perf_event *bp)
328{
329	enum bp_type_idx type;
330	int weight;
331
332	type = find_slot_idx(bp);
 
 
333	weight = hw_breakpoint_weight(bp);
334	toggle_bp_slot(bp, false, type, weight);
335}
336
337void release_bp_slot(struct perf_event *bp)
338{
339	mutex_lock(&nr_bp_mutex);
340
341	arch_unregister_hw_breakpoint(bp);
342	__release_bp_slot(bp);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
343
344	mutex_unlock(&nr_bp_mutex);
 
 
 
 
 
 
 
 
 
345}
346
347/*
348 * Allow the kernel debugger to reserve breakpoint slots without
349 * taking a lock using the dbg_* variant of for the reserve and
350 * release breakpoint slots.
351 */
352int dbg_reserve_bp_slot(struct perf_event *bp)
353{
354	if (mutex_is_locked(&nr_bp_mutex))
 
 
355		return -1;
356
357	return __reserve_bp_slot(bp);
 
 
 
 
 
358}
359
360int dbg_release_bp_slot(struct perf_event *bp)
361{
362	if (mutex_is_locked(&nr_bp_mutex))
363		return -1;
364
365	__release_bp_slot(bp);
 
 
 
366
367	return 0;
368}
369
370static int validate_hw_breakpoint(struct perf_event *bp)
 
 
371{
372	int ret;
373
374	ret = arch_validate_hwbkpt_settings(bp);
375	if (ret)
376		return ret;
377
378	if (arch_check_bp_in_kernelspace(bp)) {
379		if (bp->attr.exclude_kernel)
380			return -EINVAL;
381		/*
382		 * Don't let unprivileged users set a breakpoint in the trap
383		 * path to avoid trap recursion attacks.
384		 */
385		if (!capable(CAP_SYS_ADMIN))
386			return -EPERM;
387	}
388
389	return 0;
390}
391
392int register_perf_hw_breakpoint(struct perf_event *bp)
393{
394	int ret;
395
396	ret = reserve_bp_slot(bp);
397	if (ret)
398		return ret;
399
400	ret = validate_hw_breakpoint(bp);
 
 
401
402	/* if arch_validate_hwbkpt_settings() fails then release bp slot */
403	if (ret)
404		release_bp_slot(bp);
 
 
405
406	return ret;
 
 
407}
408
409/**
410 * register_user_hw_breakpoint - register a hardware breakpoint for user space
411 * @attr: breakpoint attributes
412 * @triggered: callback to trigger when we hit the breakpoint
 
413 * @tsk: pointer to 'task_struct' of the process to which the address belongs
414 */
415struct perf_event *
416register_user_hw_breakpoint(struct perf_event_attr *attr,
417			    perf_overflow_handler_t triggered,
418			    void *context,
419			    struct task_struct *tsk)
420{
421	return perf_event_create_kernel_counter(attr, -1, tsk, triggered,
422						context);
423}
424EXPORT_SYMBOL_GPL(register_user_hw_breakpoint);
425
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
426/**
427 * modify_user_hw_breakpoint - modify a user-space hardware breakpoint
428 * @bp: the breakpoint structure to modify
429 * @attr: new breakpoint attributes
430 * @triggered: callback to trigger when we hit the breakpoint
431 * @tsk: pointer to 'task_struct' of the process to which the address belongs
432 */
433int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr)
434{
435	u64 old_addr = bp->attr.bp_addr;
436	u64 old_len = bp->attr.bp_len;
437	int old_type = bp->attr.bp_type;
438	int err = 0;
439
440	/*
441	 * modify_user_hw_breakpoint can be invoked with IRQs disabled and hence it
442	 * will not be possible to raise IPIs that invoke __perf_event_disable.
443	 * So call the function directly after making sure we are targeting the
444	 * current task.
445	 */
446	if (irqs_disabled() && bp->ctx && bp->ctx->task == current)
447		perf_event_disable_local(bp);
448	else
449		perf_event_disable(bp);
450
451	bp->attr.bp_addr = attr->bp_addr;
452	bp->attr.bp_type = attr->bp_type;
453	bp->attr.bp_len = attr->bp_len;
454
455	if (attr->disabled)
456		goto end;
457
458	err = validate_hw_breakpoint(bp);
459	if (!err)
460		perf_event_enable(bp);
461
462	if (err) {
463		bp->attr.bp_addr = old_addr;
464		bp->attr.bp_type = old_type;
465		bp->attr.bp_len = old_len;
466		if (!bp->attr.disabled)
467			perf_event_enable(bp);
468
469		return err;
470	}
471
472end:
473	bp->attr.disabled = attr->disabled;
474
475	return 0;
476}
477EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint);
478
479/**
480 * unregister_hw_breakpoint - unregister a user-space hardware breakpoint
481 * @bp: the breakpoint structure to unregister
482 */
483void unregister_hw_breakpoint(struct perf_event *bp)
484{
485	if (!bp)
486		return;
487	perf_event_release_kernel(bp);
488}
489EXPORT_SYMBOL_GPL(unregister_hw_breakpoint);
490
491/**
492 * register_wide_hw_breakpoint - register a wide breakpoint in the kernel
493 * @attr: breakpoint attributes
494 * @triggered: callback to trigger when we hit the breakpoint
 
495 *
496 * @return a set of per_cpu pointers to perf events
497 */
498struct perf_event * __percpu *
499register_wide_hw_breakpoint(struct perf_event_attr *attr,
500			    perf_overflow_handler_t triggered,
501			    void *context)
502{
503	struct perf_event * __percpu *cpu_events, *bp;
504	long err = 0;
505	int cpu;
506
507	cpu_events = alloc_percpu(typeof(*cpu_events));
508	if (!cpu_events)
509		return (void __percpu __force *)ERR_PTR(-ENOMEM);
510
511	get_online_cpus();
512	for_each_online_cpu(cpu) {
513		bp = perf_event_create_kernel_counter(attr, cpu, NULL,
514						      triggered, context);
515		if (IS_ERR(bp)) {
516			err = PTR_ERR(bp);
517			break;
518		}
519
520		per_cpu(*cpu_events, cpu) = bp;
521	}
522	put_online_cpus();
523
524	if (likely(!err))
525		return cpu_events;
526
527	unregister_wide_hw_breakpoint(cpu_events);
528	return (void __percpu __force *)ERR_PTR(err);
529}
530EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint);
531
532/**
533 * unregister_wide_hw_breakpoint - unregister a wide breakpoint in the kernel
534 * @cpu_events: the per cpu set of events to unregister
535 */
536void unregister_wide_hw_breakpoint(struct perf_event * __percpu *cpu_events)
537{
538	int cpu;
539
540	for_each_possible_cpu(cpu)
541		unregister_hw_breakpoint(per_cpu(*cpu_events, cpu));
542
543	free_percpu(cpu_events);
544}
545EXPORT_SYMBOL_GPL(unregister_wide_hw_breakpoint);
546
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
547static struct notifier_block hw_breakpoint_exceptions_nb = {
548	.notifier_call = hw_breakpoint_exceptions_notify,
549	/* we need to be notified first */
550	.priority = 0x7fffffff
551};
552
553static void bp_perf_event_destroy(struct perf_event *event)
554{
555	release_bp_slot(event);
556}
557
558static int hw_breakpoint_event_init(struct perf_event *bp)
559{
560	int err;
561
562	if (bp->attr.type != PERF_TYPE_BREAKPOINT)
563		return -ENOENT;
564
565	/*
566	 * no branch sampling for breakpoint events
567	 */
568	if (has_branch_stack(bp))
569		return -EOPNOTSUPP;
570
571	err = register_perf_hw_breakpoint(bp);
572	if (err)
573		return err;
574
575	bp->destroy = bp_perf_event_destroy;
576
577	return 0;
578}
579
580static int hw_breakpoint_add(struct perf_event *bp, int flags)
581{
582	if (!(flags & PERF_EF_START))
583		bp->hw.state = PERF_HES_STOPPED;
584
585	if (is_sampling_event(bp)) {
586		bp->hw.last_period = bp->hw.sample_period;
587		perf_swevent_set_period(bp);
588	}
589
590	return arch_install_hw_breakpoint(bp);
591}
592
593static void hw_breakpoint_del(struct perf_event *bp, int flags)
594{
595	arch_uninstall_hw_breakpoint(bp);
596}
597
598static void hw_breakpoint_start(struct perf_event *bp, int flags)
599{
600	bp->hw.state = 0;
601}
602
603static void hw_breakpoint_stop(struct perf_event *bp, int flags)
604{
605	bp->hw.state = PERF_HES_STOPPED;
606}
607
608static struct pmu perf_breakpoint = {
609	.task_ctx_nr	= perf_sw_context, /* could eventually get its own */
610
611	.event_init	= hw_breakpoint_event_init,
612	.add		= hw_breakpoint_add,
613	.del		= hw_breakpoint_del,
614	.start		= hw_breakpoint_start,
615	.stop		= hw_breakpoint_stop,
616	.read		= hw_breakpoint_pmu_read,
617};
618
619int __init init_hw_breakpoint(void)
620{
621	int cpu, err_cpu;
622	int i;
623
624	for (i = 0; i < TYPE_MAX; i++)
625		nr_slots[i] = hw_breakpoint_slots(i);
626
627	for_each_possible_cpu(cpu) {
628		for (i = 0; i < TYPE_MAX; i++) {
629			struct bp_cpuinfo *info = get_bp_info(cpu, i);
630
631			info->tsk_pinned = kcalloc(nr_slots[i], sizeof(int),
632							GFP_KERNEL);
633			if (!info->tsk_pinned)
634				goto err_alloc;
635		}
636	}
637
638	constraints_initialized = 1;
639
640	perf_pmu_register(&perf_breakpoint, "breakpoint", PERF_TYPE_BREAKPOINT);
641
642	return register_die_notifier(&hw_breakpoint_exceptions_nb);
643
644 err_alloc:
645	for_each_possible_cpu(err_cpu) {
646		for (i = 0; i < TYPE_MAX; i++)
647			kfree(get_bp_info(err_cpu, i)->tsk_pinned);
648		if (err_cpu == cpu)
649			break;
650	}
651
652	return -ENOMEM;
653}
654
655
v6.2
   1// SPDX-License-Identifier: GPL-2.0+
   2/*
 
 
 
 
 
 
 
 
 
 
 
 
 
 
   3 * Copyright (C) 2007 Alan Stern
   4 * Copyright (C) IBM Corporation, 2009
   5 * Copyright (C) 2009, Frederic Weisbecker <fweisbec@gmail.com>
   6 *
   7 * Thanks to Ingo Molnar for his many suggestions.
   8 *
   9 * Authors: Alan Stern <stern@rowland.harvard.edu>
  10 *          K.Prasad <prasad@linux.vnet.ibm.com>
  11 *          Frederic Weisbecker <fweisbec@gmail.com>
  12 */
  13
  14/*
  15 * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility,
  16 * using the CPU's debug registers.
  17 * This file contains the arch-independent routines.
  18 */
  19
  20#include <linux/hw_breakpoint.h>
  21
  22#include <linux/atomic.h>
  23#include <linux/bug.h>
  24#include <linux/cpu.h>
  25#include <linux/export.h>
  26#include <linux/init.h>
  27#include <linux/irqflags.h>
 
 
 
  28#include <linux/kdebug.h>
  29#include <linux/kernel.h>
  30#include <linux/mutex.h>
  31#include <linux/notifier.h>
  32#include <linux/percpu-rwsem.h>
  33#include <linux/percpu.h>
  34#include <linux/rhashtable.h>
  35#include <linux/sched.h>
 
  36#include <linux/slab.h>
 
 
 
  37
 
  38/*
  39 * Datastructure to track the total uses of N slots across tasks or CPUs;
  40 * bp_slots_histogram::count[N] is the number of assigned N+1 breakpoint slots.
  41 */
  42struct bp_slots_histogram {
  43#ifdef hw_breakpoint_slots
  44	atomic_t count[hw_breakpoint_slots(0)];
  45#else
  46	atomic_t *count;
  47#endif
  48};
  49
  50/*
  51 * Per-CPU constraints data.
  52 */
  53struct bp_cpuinfo {
  54	/* Number of pinned CPU breakpoints in a CPU. */
  55	unsigned int			cpu_pinned;
  56	/* Histogram of pinned task breakpoints in a CPU. */
  57	struct bp_slots_histogram	tsk_pinned;
 
 
  58};
  59
  60static DEFINE_PER_CPU(struct bp_cpuinfo, bp_cpuinfo[TYPE_MAX]);
 
  61
  62static struct bp_cpuinfo *get_bp_info(int cpu, enum bp_type_idx type)
  63{
  64	return per_cpu_ptr(bp_cpuinfo + type, cpu);
  65}
  66
  67/* Number of pinned CPU breakpoints globally. */
  68static struct bp_slots_histogram cpu_pinned[TYPE_MAX];
  69/* Number of pinned CPU-independent task breakpoints. */
  70static struct bp_slots_histogram tsk_pinned_all[TYPE_MAX];
  71
  72/* Keep track of the breakpoints attached to tasks */
  73static struct rhltable task_bps_ht;
  74static const struct rhashtable_params task_bps_ht_params = {
  75	.head_offset = offsetof(struct hw_perf_event, bp_list),
  76	.key_offset = offsetof(struct hw_perf_event, target),
  77	.key_len = sizeof_field(struct hw_perf_event, target),
  78	.automatic_shrinking = true,
  79};
  80
  81static bool constraints_initialized __ro_after_init;
  82
  83/*
  84 * Synchronizes accesses to the per-CPU constraints; the locking rules are:
  85 *
  86 *  1. Atomic updates to bp_cpuinfo::tsk_pinned only require a held read-lock
  87 *     (due to bp_slots_histogram::count being atomic, no update are lost).
  88 *
  89 *  2. Holding a write-lock is required for computations that require a
  90 *     stable snapshot of all bp_cpuinfo::tsk_pinned.
  91 *
  92 *  3. In all other cases, non-atomic accesses require the appropriately held
  93 *     lock (read-lock for read-only accesses; write-lock for reads/writes).
  94 */
  95DEFINE_STATIC_PERCPU_RWSEM(bp_cpuinfo_sem);
  96
  97/*
  98 * Return mutex to serialize accesses to per-task lists in task_bps_ht. Since
  99 * rhltable synchronizes concurrent insertions/deletions, independent tasks may
 100 * insert/delete concurrently; therefore, a mutex per task is sufficient.
 101 *
 102 * Uses task_struct::perf_event_mutex, to avoid extending task_struct with a
 103 * hw_breakpoint-only mutex, which may be infrequently used. The caveat here is
 104 * that hw_breakpoint may contend with per-task perf event list management. The
 105 * assumption is that perf usecases involving hw_breakpoints are very unlikely
 106 * to result in unnecessary contention.
 107 */
 108static inline struct mutex *get_task_bps_mutex(struct perf_event *bp)
 109{
 110	struct task_struct *tsk = bp->hw.target;
 111
 112	return tsk ? &tsk->perf_event_mutex : NULL;
 113}
 114
 115static struct mutex *bp_constraints_lock(struct perf_event *bp)
 116{
 117	struct mutex *tsk_mtx = get_task_bps_mutex(bp);
 118
 119	if (tsk_mtx) {
 120		/*
 121		 * Fully analogous to the perf_try_init_event() nesting
 122		 * argument in the comment near perf_event_ctx_lock_nested();
 123		 * this child->perf_event_mutex cannot ever deadlock against
 124		 * the parent->perf_event_mutex usage from
 125		 * perf_event_task_{en,dis}able().
 126		 *
 127		 * Specifically, inherited events will never occur on
 128		 * ->perf_event_list.
 129		 */
 130		mutex_lock_nested(tsk_mtx, SINGLE_DEPTH_NESTING);
 131		percpu_down_read(&bp_cpuinfo_sem);
 132	} else {
 133		percpu_down_write(&bp_cpuinfo_sem);
 134	}
 135
 136	return tsk_mtx;
 137}
 138
 139static void bp_constraints_unlock(struct mutex *tsk_mtx)
 140{
 141	if (tsk_mtx) {
 142		percpu_up_read(&bp_cpuinfo_sem);
 143		mutex_unlock(tsk_mtx);
 144	} else {
 145		percpu_up_write(&bp_cpuinfo_sem);
 146	}
 147}
 148
 149static bool bp_constraints_is_locked(struct perf_event *bp)
 150{
 151	struct mutex *tsk_mtx = get_task_bps_mutex(bp);
 152
 153	return percpu_is_write_locked(&bp_cpuinfo_sem) ||
 154	       (tsk_mtx ? mutex_is_locked(tsk_mtx) :
 155			  percpu_is_read_locked(&bp_cpuinfo_sem));
 156}
 157
 158static inline void assert_bp_constraints_lock_held(struct perf_event *bp)
 159{
 160	struct mutex *tsk_mtx = get_task_bps_mutex(bp);
 161
 162	if (tsk_mtx)
 163		lockdep_assert_held(tsk_mtx);
 164	lockdep_assert_held(&bp_cpuinfo_sem);
 165}
 166
 167#ifdef hw_breakpoint_slots
 168/*
 169 * Number of breakpoint slots is constant, and the same for all types.
 
 170 */
 171static_assert(hw_breakpoint_slots(TYPE_INST) == hw_breakpoint_slots(TYPE_DATA));
 172static inline int hw_breakpoint_slots_cached(int type)	{ return hw_breakpoint_slots(type); }
 173static inline int init_breakpoint_slots(void)		{ return 0; }
 174#else
 175/*
 176 * Dynamic number of breakpoint slots.
 177 */
 178static int __nr_bp_slots[TYPE_MAX] __ro_after_init;
 179
 180static inline int hw_breakpoint_slots_cached(int type)
 181{
 182	return __nr_bp_slots[type];
 183}
 184
 185static __init bool
 186bp_slots_histogram_alloc(struct bp_slots_histogram *hist, enum bp_type_idx type)
 187{
 188	hist->count = kcalloc(hw_breakpoint_slots_cached(type), sizeof(*hist->count), GFP_KERNEL);
 189	return hist->count;
 190}
 191
 192static __init void bp_slots_histogram_free(struct bp_slots_histogram *hist)
 193{
 194	kfree(hist->count);
 195}
 196
 197static __init int init_breakpoint_slots(void)
 198{
 199	int i, cpu, err_cpu;
 200
 201	for (i = 0; i < TYPE_MAX; i++)
 202		__nr_bp_slots[i] = hw_breakpoint_slots(i);
 203
 204	for_each_possible_cpu(cpu) {
 205		for (i = 0; i < TYPE_MAX; i++) {
 206			struct bp_cpuinfo *info = get_bp_info(cpu, i);
 207
 208			if (!bp_slots_histogram_alloc(&info->tsk_pinned, i))
 209				goto err;
 210		}
 211	}
 212	for (i = 0; i < TYPE_MAX; i++) {
 213		if (!bp_slots_histogram_alloc(&cpu_pinned[i], i))
 214			goto err;
 215		if (!bp_slots_histogram_alloc(&tsk_pinned_all[i], i))
 216			goto err;
 217	}
 218
 219	return 0;
 220err:
 221	for_each_possible_cpu(err_cpu) {
 222		for (i = 0; i < TYPE_MAX; i++)
 223			bp_slots_histogram_free(&get_bp_info(err_cpu, i)->tsk_pinned);
 224		if (err_cpu == cpu)
 225			break;
 226	}
 227	for (i = 0; i < TYPE_MAX; i++) {
 228		bp_slots_histogram_free(&cpu_pinned[i]);
 229		bp_slots_histogram_free(&tsk_pinned_all[i]);
 230	}
 231
 232	return -ENOMEM;
 233}
 234#endif
 235
 236static inline void
 237bp_slots_histogram_add(struct bp_slots_histogram *hist, int old, int val)
 238{
 239	const int old_idx = old - 1;
 240	const int new_idx = old_idx + val;
 241
 242	if (old_idx >= 0)
 243		WARN_ON(atomic_dec_return_relaxed(&hist->count[old_idx]) < 0);
 244	if (new_idx >= 0)
 245		WARN_ON(atomic_inc_return_relaxed(&hist->count[new_idx]) < 0);
 246}
 247
 248static int
 249bp_slots_histogram_max(struct bp_slots_histogram *hist, enum bp_type_idx type)
 250{
 251	for (int i = hw_breakpoint_slots_cached(type) - 1; i >= 0; i--) {
 252		const int count = atomic_read(&hist->count[i]);
 253
 254		/* Catch unexpected writers; we want a stable snapshot. */
 255		ASSERT_EXCLUSIVE_WRITER(hist->count[i]);
 256		if (count > 0)
 257			return i + 1;
 258		WARN(count < 0, "inconsistent breakpoint slots histogram");
 259	}
 260
 261	return 0;
 262}
 263
 264static int
 265bp_slots_histogram_max_merge(struct bp_slots_histogram *hist1, struct bp_slots_histogram *hist2,
 266			     enum bp_type_idx type)
 267{
 268	for (int i = hw_breakpoint_slots_cached(type) - 1; i >= 0; i--) {
 269		const int count1 = atomic_read(&hist1->count[i]);
 270		const int count2 = atomic_read(&hist2->count[i]);
 271
 272		/* Catch unexpected writers; we want a stable snapshot. */
 273		ASSERT_EXCLUSIVE_WRITER(hist1->count[i]);
 274		ASSERT_EXCLUSIVE_WRITER(hist2->count[i]);
 275		if (count1 + count2 > 0)
 276			return i + 1;
 277		WARN(count1 < 0, "inconsistent breakpoint slots histogram");
 278		WARN(count2 < 0, "inconsistent breakpoint slots histogram");
 279	}
 280
 281	return 0;
 282}
 283
 284#ifndef hw_breakpoint_weight
 285static inline int hw_breakpoint_weight(struct perf_event *bp)
 286{
 287	return 1;
 288}
 289#endif
 290
 291static inline enum bp_type_idx find_slot_idx(u64 bp_type)
 292{
 293	if (bp_type & HW_BREAKPOINT_RW)
 294		return TYPE_DATA;
 295
 296	return TYPE_INST;
 297}
 298
 299/*
 300 * Return the maximum number of pinned breakpoints a task has in this CPU.
 301 */
 302static unsigned int max_task_bp_pinned(int cpu, enum bp_type_idx type)
 303{
 304	struct bp_slots_histogram *tsk_pinned = &get_bp_info(cpu, type)->tsk_pinned;
 305
 306	/*
 307	 * At this point we want to have acquired the bp_cpuinfo_sem as a
 308	 * writer to ensure that there are no concurrent writers in
 309	 * toggle_bp_task_slot() to tsk_pinned, and we get a stable snapshot.
 310	 */
 311	lockdep_assert_held_write(&bp_cpuinfo_sem);
 312	return bp_slots_histogram_max_merge(tsk_pinned, &tsk_pinned_all[type], type);
 313}
 314
 315/*
 316 * Count the number of breakpoints of the same type and same task.
 317 * The given event must be not on the list.
 318 *
 319 * If @cpu is -1, but the result of task_bp_pinned() is not CPU-independent,
 320 * returns a negative value.
 321 */
 322static int task_bp_pinned(int cpu, struct perf_event *bp, enum bp_type_idx type)
 323{
 324	struct rhlist_head *head, *pos;
 325	struct perf_event *iter;
 326	int count = 0;
 327
 328	/*
 329	 * We need a stable snapshot of the per-task breakpoint list.
 330	 */
 331	assert_bp_constraints_lock_held(bp);
 332
 333	rcu_read_lock();
 334	head = rhltable_lookup(&task_bps_ht, &bp->hw.target, task_bps_ht_params);
 335	if (!head)
 336		goto out;
 337
 338	rhl_for_each_entry_rcu(iter, pos, head, hw.bp_list) {
 339		if (find_slot_idx(iter->attr.bp_type) != type)
 340			continue;
 341
 342		if (iter->cpu >= 0) {
 343			if (cpu == -1) {
 344				count = -1;
 345				goto out;
 346			} else if (cpu != iter->cpu)
 347				continue;
 348		}
 349
 350		count += hw_breakpoint_weight(iter);
 351	}
 352
 353out:
 354	rcu_read_unlock();
 355	return count;
 356}
 357
 358static const struct cpumask *cpumask_of_bp(struct perf_event *bp)
 359{
 360	if (bp->cpu >= 0)
 361		return cpumask_of(bp->cpu);
 362	return cpu_possible_mask;
 363}
 364
 365/*
 366 * Returns the max pinned breakpoint slots in a given
 367 * CPU (cpu > -1) or across all of them (cpu = -1).
 368 */
 369static int
 370max_bp_pinned_slots(struct perf_event *bp, enum bp_type_idx type)
 
 371{
 372	const struct cpumask *cpumask = cpumask_of_bp(bp);
 373	int pinned_slots = 0;
 374	int cpu;
 375
 376	if (bp->hw.target && bp->cpu < 0) {
 377		int max_pinned = task_bp_pinned(-1, bp, type);
 378
 379		if (max_pinned >= 0) {
 380			/*
 381			 * Fast path: task_bp_pinned() is CPU-independent and
 382			 * returns the same value for any CPU.
 383			 */
 384			max_pinned += bp_slots_histogram_max(&cpu_pinned[type], type);
 385			return max_pinned;
 386		}
 387	}
 388
 389	for_each_cpu(cpu, cpumask) {
 390		struct bp_cpuinfo *info = get_bp_info(cpu, type);
 391		int nr;
 392
 393		nr = info->cpu_pinned;
 394		if (!bp->hw.target)
 395			nr += max_task_bp_pinned(cpu, type);
 396		else
 397			nr += task_bp_pinned(cpu, bp, type);
 398
 399		pinned_slots = max(nr, pinned_slots);
 
 
 
 
 
 400	}
 
 401
 402	return pinned_slots;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 403}
 404
 405/*
 406 * Add/remove the given breakpoint in our constraint table
 407 */
 408static int
 409toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type, int weight)
 
 410{
 411	int cpu, next_tsk_pinned;
 
 412
 413	if (!enable)
 414		weight = -weight;
 415
 
 416	if (!bp->hw.target) {
 417		/*
 418		 * Update the pinned CPU slots, in per-CPU bp_cpuinfo and in the
 419		 * global histogram.
 420		 */
 421		struct bp_cpuinfo *info = get_bp_info(bp->cpu, type);
 422
 423		lockdep_assert_held_write(&bp_cpuinfo_sem);
 424		bp_slots_histogram_add(&cpu_pinned[type], info->cpu_pinned, weight);
 425		info->cpu_pinned += weight;
 426		return 0;
 427	}
 428
 429	/*
 430	 * If bp->hw.target, tsk_pinned is only modified, but not used
 431	 * otherwise. We can permit concurrent updates as long as there are no
 432	 * other uses: having acquired bp_cpuinfo_sem as a reader allows
 433	 * concurrent updates here. Uses of tsk_pinned will require acquiring
 434	 * bp_cpuinfo_sem as a writer to stabilize tsk_pinned's value.
 435	 */
 436	lockdep_assert_held_read(&bp_cpuinfo_sem);
 437
 438	/*
 439	 * Update the pinned task slots, in per-CPU bp_cpuinfo and in the global
 440	 * histogram. We need to take care of 4 cases:
 441	 *
 442	 *  1. This breakpoint targets all CPUs (cpu < 0), and there may only
 443	 *     exist other task breakpoints targeting all CPUs. In this case we
 444	 *     can simply update the global slots histogram.
 445	 *
 446	 *  2. This breakpoint targets a specific CPU (cpu >= 0), but there may
 447	 *     only exist other task breakpoints targeting all CPUs.
 448	 *
 449	 *     a. On enable: remove the existing breakpoints from the global
 450	 *        slots histogram and use the per-CPU histogram.
 451	 *
 452	 *     b. On disable: re-insert the existing breakpoints into the global
 453	 *        slots histogram and remove from per-CPU histogram.
 454	 *
 455	 *  3. Some other existing task breakpoints target specific CPUs. Only
 456	 *     update the per-CPU slots histogram.
 457	 */
 458
 459	if (!enable) {
 460		/*
 461		 * Remove before updating histograms so we can determine if this
 462		 * was the last task breakpoint for a specific CPU.
 463		 */
 464		int ret = rhltable_remove(&task_bps_ht, &bp->hw.bp_list, task_bps_ht_params);
 465
 466		if (ret)
 467			return ret;
 468	}
 469	/*
 470	 * Note: If !enable, next_tsk_pinned will not count the to-be-removed breakpoint.
 471	 */
 472	next_tsk_pinned = task_bp_pinned(-1, bp, type);
 473
 474	if (next_tsk_pinned >= 0) {
 475		if (bp->cpu < 0) { /* Case 1: fast path */
 476			if (!enable)
 477				next_tsk_pinned += hw_breakpoint_weight(bp);
 478			bp_slots_histogram_add(&tsk_pinned_all[type], next_tsk_pinned, weight);
 479		} else if (enable) { /* Case 2.a: slow path */
 480			/* Add existing to per-CPU histograms. */
 481			for_each_possible_cpu(cpu) {
 482				bp_slots_histogram_add(&get_bp_info(cpu, type)->tsk_pinned,
 483						       0, next_tsk_pinned);
 484			}
 485			/* Add this first CPU-pinned task breakpoint. */
 486			bp_slots_histogram_add(&get_bp_info(bp->cpu, type)->tsk_pinned,
 487					       next_tsk_pinned, weight);
 488			/* Rebalance global task pinned histogram. */
 489			bp_slots_histogram_add(&tsk_pinned_all[type], next_tsk_pinned,
 490					       -next_tsk_pinned);
 491		} else { /* Case 2.b: slow path */
 492			/* Remove this last CPU-pinned task breakpoint. */
 493			bp_slots_histogram_add(&get_bp_info(bp->cpu, type)->tsk_pinned,
 494					       next_tsk_pinned + hw_breakpoint_weight(bp), weight);
 495			/* Remove all from per-CPU histograms. */
 496			for_each_possible_cpu(cpu) {
 497				bp_slots_histogram_add(&get_bp_info(cpu, type)->tsk_pinned,
 498						       next_tsk_pinned, -next_tsk_pinned);
 499			}
 500			/* Rebalance global task pinned histogram. */
 501			bp_slots_histogram_add(&tsk_pinned_all[type], 0, next_tsk_pinned);
 502		}
 503	} else { /* Case 3: slow path */
 504		const struct cpumask *cpumask = cpumask_of_bp(bp);
 505
 506		for_each_cpu(cpu, cpumask) {
 507			next_tsk_pinned = task_bp_pinned(cpu, bp, type);
 508			if (!enable)
 509				next_tsk_pinned += hw_breakpoint_weight(bp);
 510			bp_slots_histogram_add(&get_bp_info(cpu, type)->tsk_pinned,
 511					       next_tsk_pinned, weight);
 512		}
 513	}
 514
 515	/*
 516	 * Readers want a stable snapshot of the per-task breakpoint list.
 517	 */
 518	assert_bp_constraints_lock_held(bp);
 519
 520	if (enable)
 521		return rhltable_insert(&task_bps_ht, &bp->hw.bp_list, task_bps_ht_params);
 522
 523	return 0;
 524}
 525
 526__weak int arch_reserve_bp_slot(struct perf_event *bp)
 527{
 528	return 0;
 529}
 530
 531__weak void arch_release_bp_slot(struct perf_event *bp)
 532{
 533}
 534
 535/*
 536 * Function to perform processor-specific cleanup during unregistration
 537 */
 538__weak void arch_unregister_hw_breakpoint(struct perf_event *bp)
 539{
 540	/*
 541	 * A weak stub function here for those archs that don't define
 542	 * it inside arch/.../kernel/hw_breakpoint.c
 543	 */
 544}
 545
 546/*
 547 * Constraints to check before allowing this new breakpoint counter.
 548 *
 549 * Note: Flexible breakpoints are currently unimplemented, but outlined in the
 550 * below algorithm for completeness.  The implementation treats flexible as
 551 * pinned due to no guarantee that we currently always schedule flexible events
 552 * before a pinned event in a same CPU.
 553 *
 554 *  == Non-pinned counter == (Considered as pinned for now)
 555 *
 556 *   - If attached to a single cpu, check:
 557 *
 558 *       (per_cpu(info->flexible, cpu) || (per_cpu(info->cpu_pinned, cpu)
 559 *           + max(per_cpu(info->tsk_pinned, cpu)))) < HBP_NUM
 560 *
 561 *       -> If there are already non-pinned counters in this cpu, it means
 562 *          there is already a free slot for them.
 563 *          Otherwise, we check that the maximum number of per task
 564 *          breakpoints (for this cpu) plus the number of per cpu breakpoint
 565 *          (for this cpu) doesn't cover every registers.
 566 *
 567 *   - If attached to every cpus, check:
 568 *
 569 *       (per_cpu(info->flexible, *) || (max(per_cpu(info->cpu_pinned, *))
 570 *           + max(per_cpu(info->tsk_pinned, *)))) < HBP_NUM
 571 *
 572 *       -> This is roughly the same, except we check the number of per cpu
 573 *          bp for every cpu and we keep the max one. Same for the per tasks
 574 *          breakpoints.
 575 *
 576 *
 577 * == Pinned counter ==
 578 *
 579 *   - If attached to a single cpu, check:
 580 *
 581 *       ((per_cpu(info->flexible, cpu) > 1) + per_cpu(info->cpu_pinned, cpu)
 582 *            + max(per_cpu(info->tsk_pinned, cpu))) < HBP_NUM
 583 *
 584 *       -> Same checks as before. But now the info->flexible, if any, must keep
 585 *          one register at least (or they will never be fed).
 586 *
 587 *   - If attached to every cpus, check:
 588 *
 589 *       ((per_cpu(info->flexible, *) > 1) + max(per_cpu(info->cpu_pinned, *))
 590 *            + max(per_cpu(info->tsk_pinned, *))) < HBP_NUM
 591 */
 592static int __reserve_bp_slot(struct perf_event *bp, u64 bp_type)
 593{
 
 594	enum bp_type_idx type;
 595	int max_pinned_slots;
 596	int weight;
 597	int ret;
 598
 599	/* We couldn't initialize breakpoint constraints on boot */
 600	if (!constraints_initialized)
 601		return -ENOMEM;
 602
 603	/* Basic checks */
 604	if (bp_type == HW_BREAKPOINT_EMPTY ||
 605	    bp_type == HW_BREAKPOINT_INVALID)
 606		return -EINVAL;
 607
 608	type = find_slot_idx(bp_type);
 609	weight = hw_breakpoint_weight(bp);
 610
 611	/* Check if this new breakpoint can be satisfied across all CPUs. */
 612	max_pinned_slots = max_bp_pinned_slots(bp, type) + weight;
 613	if (max_pinned_slots > hw_breakpoint_slots_cached(type))
 
 
 
 
 
 
 614		return -ENOSPC;
 615
 616	ret = arch_reserve_bp_slot(bp);
 617	if (ret)
 618		return ret;
 619
 620	return toggle_bp_slot(bp, true, type, weight);
 621}
 622
 623int reserve_bp_slot(struct perf_event *bp)
 624{
 625	struct mutex *mtx = bp_constraints_lock(bp);
 626	int ret = __reserve_bp_slot(bp, bp->attr.bp_type);
 
 
 
 
 
 627
 628	bp_constraints_unlock(mtx);
 629	return ret;
 630}
 631
 632static void __release_bp_slot(struct perf_event *bp, u64 bp_type)
 633{
 634	enum bp_type_idx type;
 635	int weight;
 636
 637	arch_release_bp_slot(bp);
 638
 639	type = find_slot_idx(bp_type);
 640	weight = hw_breakpoint_weight(bp);
 641	WARN_ON(toggle_bp_slot(bp, false, type, weight));
 642}
 643
 644void release_bp_slot(struct perf_event *bp)
 645{
 646	struct mutex *mtx = bp_constraints_lock(bp);
 647
 648	arch_unregister_hw_breakpoint(bp);
 649	__release_bp_slot(bp, bp->attr.bp_type);
 650	bp_constraints_unlock(mtx);
 651}
 652
 653static int __modify_bp_slot(struct perf_event *bp, u64 old_type, u64 new_type)
 654{
 655	int err;
 656
 657	__release_bp_slot(bp, old_type);
 658
 659	err = __reserve_bp_slot(bp, new_type);
 660	if (err) {
 661		/*
 662		 * Reserve the old_type slot back in case
 663		 * there's no space for the new type.
 664		 *
 665		 * This must succeed, because we just released
 666		 * the old_type slot in the __release_bp_slot
 667		 * call above. If not, something is broken.
 668		 */
 669		WARN_ON(__reserve_bp_slot(bp, old_type));
 670	}
 671
 672	return err;
 673}
 674
 675static int modify_bp_slot(struct perf_event *bp, u64 old_type, u64 new_type)
 676{
 677	struct mutex *mtx = bp_constraints_lock(bp);
 678	int ret = __modify_bp_slot(bp, old_type, new_type);
 679
 680	bp_constraints_unlock(mtx);
 681	return ret;
 682}
 683
 684/*
 685 * Allow the kernel debugger to reserve breakpoint slots without
 686 * taking a lock using the dbg_* variant of for the reserve and
 687 * release breakpoint slots.
 688 */
 689int dbg_reserve_bp_slot(struct perf_event *bp)
 690{
 691	int ret;
 692
 693	if (bp_constraints_is_locked(bp))
 694		return -1;
 695
 696	/* Locks aren't held; disable lockdep assert checking. */
 697	lockdep_off();
 698	ret = __reserve_bp_slot(bp, bp->attr.bp_type);
 699	lockdep_on();
 700
 701	return ret;
 702}
 703
 704int dbg_release_bp_slot(struct perf_event *bp)
 705{
 706	if (bp_constraints_is_locked(bp))
 707		return -1;
 708
 709	/* Locks aren't held; disable lockdep assert checking. */
 710	lockdep_off();
 711	__release_bp_slot(bp, bp->attr.bp_type);
 712	lockdep_on();
 713
 714	return 0;
 715}
 716
 717static int hw_breakpoint_parse(struct perf_event *bp,
 718			       const struct perf_event_attr *attr,
 719			       struct arch_hw_breakpoint *hw)
 720{
 721	int err;
 722
 723	err = hw_breakpoint_arch_parse(bp, attr, hw);
 724	if (err)
 725		return err;
 726
 727	if (arch_check_bp_in_kernelspace(hw)) {
 728		if (attr->exclude_kernel)
 729			return -EINVAL;
 730		/*
 731		 * Don't let unprivileged users set a breakpoint in the trap
 732		 * path to avoid trap recursion attacks.
 733		 */
 734		if (!capable(CAP_SYS_ADMIN))
 735			return -EPERM;
 736	}
 737
 738	return 0;
 739}
 740
 741int register_perf_hw_breakpoint(struct perf_event *bp)
 742{
 743	struct arch_hw_breakpoint hw = { };
 744	int err;
 
 
 
 745
 746	err = reserve_bp_slot(bp);
 747	if (err)
 748		return err;
 749
 750	err = hw_breakpoint_parse(bp, &bp->attr, &hw);
 751	if (err) {
 752		release_bp_slot(bp);
 753		return err;
 754	}
 755
 756	bp->hw.info = hw;
 757
 758	return 0;
 759}
 760
 761/**
 762 * register_user_hw_breakpoint - register a hardware breakpoint for user space
 763 * @attr: breakpoint attributes
 764 * @triggered: callback to trigger when we hit the breakpoint
 765 * @context: context data could be used in the triggered callback
 766 * @tsk: pointer to 'task_struct' of the process to which the address belongs
 767 */
 768struct perf_event *
 769register_user_hw_breakpoint(struct perf_event_attr *attr,
 770			    perf_overflow_handler_t triggered,
 771			    void *context,
 772			    struct task_struct *tsk)
 773{
 774	return perf_event_create_kernel_counter(attr, -1, tsk, triggered,
 775						context);
 776}
 777EXPORT_SYMBOL_GPL(register_user_hw_breakpoint);
 778
 779static void hw_breakpoint_copy_attr(struct perf_event_attr *to,
 780				    struct perf_event_attr *from)
 781{
 782	to->bp_addr = from->bp_addr;
 783	to->bp_type = from->bp_type;
 784	to->bp_len  = from->bp_len;
 785	to->disabled = from->disabled;
 786}
 787
 788int
 789modify_user_hw_breakpoint_check(struct perf_event *bp, struct perf_event_attr *attr,
 790			        bool check)
 791{
 792	struct arch_hw_breakpoint hw = { };
 793	int err;
 794
 795	err = hw_breakpoint_parse(bp, attr, &hw);
 796	if (err)
 797		return err;
 798
 799	if (check) {
 800		struct perf_event_attr old_attr;
 801
 802		old_attr = bp->attr;
 803		hw_breakpoint_copy_attr(&old_attr, attr);
 804		if (memcmp(&old_attr, attr, sizeof(*attr)))
 805			return -EINVAL;
 806	}
 807
 808	if (bp->attr.bp_type != attr->bp_type) {
 809		err = modify_bp_slot(bp, bp->attr.bp_type, attr->bp_type);
 810		if (err)
 811			return err;
 812	}
 813
 814	hw_breakpoint_copy_attr(&bp->attr, attr);
 815	bp->hw.info = hw;
 816
 817	return 0;
 818}
 819
 820/**
 821 * modify_user_hw_breakpoint - modify a user-space hardware breakpoint
 822 * @bp: the breakpoint structure to modify
 823 * @attr: new breakpoint attributes
 
 
 824 */
 825int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr)
 826{
 827	int err;
 
 
 
 828
 829	/*
 830	 * modify_user_hw_breakpoint can be invoked with IRQs disabled and hence it
 831	 * will not be possible to raise IPIs that invoke __perf_event_disable.
 832	 * So call the function directly after making sure we are targeting the
 833	 * current task.
 834	 */
 835	if (irqs_disabled() && bp->ctx && bp->ctx->task == current)
 836		perf_event_disable_local(bp);
 837	else
 838		perf_event_disable(bp);
 839
 840	err = modify_user_hw_breakpoint_check(bp, attr, false);
 
 
 841
 842	if (!bp->attr.disabled)
 
 
 
 
 843		perf_event_enable(bp);
 844
 845	return err;
 
 
 
 
 
 
 
 
 
 
 
 
 
 846}
 847EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint);
 848
 849/**
 850 * unregister_hw_breakpoint - unregister a user-space hardware breakpoint
 851 * @bp: the breakpoint structure to unregister
 852 */
 853void unregister_hw_breakpoint(struct perf_event *bp)
 854{
 855	if (!bp)
 856		return;
 857	perf_event_release_kernel(bp);
 858}
 859EXPORT_SYMBOL_GPL(unregister_hw_breakpoint);
 860
 861/**
 862 * register_wide_hw_breakpoint - register a wide breakpoint in the kernel
 863 * @attr: breakpoint attributes
 864 * @triggered: callback to trigger when we hit the breakpoint
 865 * @context: context data could be used in the triggered callback
 866 *
 867 * @return a set of per_cpu pointers to perf events
 868 */
 869struct perf_event * __percpu *
 870register_wide_hw_breakpoint(struct perf_event_attr *attr,
 871			    perf_overflow_handler_t triggered,
 872			    void *context)
 873{
 874	struct perf_event * __percpu *cpu_events, *bp;
 875	long err = 0;
 876	int cpu;
 877
 878	cpu_events = alloc_percpu(typeof(*cpu_events));
 879	if (!cpu_events)
 880		return (void __percpu __force *)ERR_PTR(-ENOMEM);
 881
 882	cpus_read_lock();
 883	for_each_online_cpu(cpu) {
 884		bp = perf_event_create_kernel_counter(attr, cpu, NULL,
 885						      triggered, context);
 886		if (IS_ERR(bp)) {
 887			err = PTR_ERR(bp);
 888			break;
 889		}
 890
 891		per_cpu(*cpu_events, cpu) = bp;
 892	}
 893	cpus_read_unlock();
 894
 895	if (likely(!err))
 896		return cpu_events;
 897
 898	unregister_wide_hw_breakpoint(cpu_events);
 899	return (void __percpu __force *)ERR_PTR(err);
 900}
 901EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint);
 902
 903/**
 904 * unregister_wide_hw_breakpoint - unregister a wide breakpoint in the kernel
 905 * @cpu_events: the per cpu set of events to unregister
 906 */
 907void unregister_wide_hw_breakpoint(struct perf_event * __percpu *cpu_events)
 908{
 909	int cpu;
 910
 911	for_each_possible_cpu(cpu)
 912		unregister_hw_breakpoint(per_cpu(*cpu_events, cpu));
 913
 914	free_percpu(cpu_events);
 915}
 916EXPORT_SYMBOL_GPL(unregister_wide_hw_breakpoint);
 917
 918/**
 919 * hw_breakpoint_is_used - check if breakpoints are currently used
 920 *
 921 * Returns: true if breakpoints are used, false otherwise.
 922 */
 923bool hw_breakpoint_is_used(void)
 924{
 925	int cpu;
 926
 927	if (!constraints_initialized)
 928		return false;
 929
 930	for_each_possible_cpu(cpu) {
 931		for (int type = 0; type < TYPE_MAX; ++type) {
 932			struct bp_cpuinfo *info = get_bp_info(cpu, type);
 933
 934			if (info->cpu_pinned)
 935				return true;
 936
 937			for (int slot = 0; slot < hw_breakpoint_slots_cached(type); ++slot) {
 938				if (atomic_read(&info->tsk_pinned.count[slot]))
 939					return true;
 940			}
 941		}
 942	}
 943
 944	for (int type = 0; type < TYPE_MAX; ++type) {
 945		for (int slot = 0; slot < hw_breakpoint_slots_cached(type); ++slot) {
 946			/*
 947			 * Warn, because if there are CPU pinned counters,
 948			 * should never get here; bp_cpuinfo::cpu_pinned should
 949			 * be consistent with the global cpu_pinned histogram.
 950			 */
 951			if (WARN_ON(atomic_read(&cpu_pinned[type].count[slot])))
 952				return true;
 953
 954			if (atomic_read(&tsk_pinned_all[type].count[slot]))
 955				return true;
 956		}
 957	}
 958
 959	return false;
 960}
 961
 962static struct notifier_block hw_breakpoint_exceptions_nb = {
 963	.notifier_call = hw_breakpoint_exceptions_notify,
 964	/* we need to be notified first */
 965	.priority = 0x7fffffff
 966};
 967
 968static void bp_perf_event_destroy(struct perf_event *event)
 969{
 970	release_bp_slot(event);
 971}
 972
 973static int hw_breakpoint_event_init(struct perf_event *bp)
 974{
 975	int err;
 976
 977	if (bp->attr.type != PERF_TYPE_BREAKPOINT)
 978		return -ENOENT;
 979
 980	/*
 981	 * no branch sampling for breakpoint events
 982	 */
 983	if (has_branch_stack(bp))
 984		return -EOPNOTSUPP;
 985
 986	err = register_perf_hw_breakpoint(bp);
 987	if (err)
 988		return err;
 989
 990	bp->destroy = bp_perf_event_destroy;
 991
 992	return 0;
 993}
 994
 995static int hw_breakpoint_add(struct perf_event *bp, int flags)
 996{
 997	if (!(flags & PERF_EF_START))
 998		bp->hw.state = PERF_HES_STOPPED;
 999
1000	if (is_sampling_event(bp)) {
1001		bp->hw.last_period = bp->hw.sample_period;
1002		perf_swevent_set_period(bp);
1003	}
1004
1005	return arch_install_hw_breakpoint(bp);
1006}
1007
1008static void hw_breakpoint_del(struct perf_event *bp, int flags)
1009{
1010	arch_uninstall_hw_breakpoint(bp);
1011}
1012
1013static void hw_breakpoint_start(struct perf_event *bp, int flags)
1014{
1015	bp->hw.state = 0;
1016}
1017
1018static void hw_breakpoint_stop(struct perf_event *bp, int flags)
1019{
1020	bp->hw.state = PERF_HES_STOPPED;
1021}
1022
1023static struct pmu perf_breakpoint = {
1024	.task_ctx_nr	= perf_sw_context, /* could eventually get its own */
1025
1026	.event_init	= hw_breakpoint_event_init,
1027	.add		= hw_breakpoint_add,
1028	.del		= hw_breakpoint_del,
1029	.start		= hw_breakpoint_start,
1030	.stop		= hw_breakpoint_stop,
1031	.read		= hw_breakpoint_pmu_read,
1032};
1033
1034int __init init_hw_breakpoint(void)
1035{
1036	int ret;
 
 
 
 
1037
1038	ret = rhltable_init(&task_bps_ht, &task_bps_ht_params);
1039	if (ret)
1040		return ret;
1041
1042	ret = init_breakpoint_slots();
1043	if (ret)
1044		return ret;
 
 
 
1045
1046	constraints_initialized = true;
1047
1048	perf_pmu_register(&perf_breakpoint, "breakpoint", PERF_TYPE_BREAKPOINT);
1049
1050	return register_die_notifier(&hw_breakpoint_exceptions_nb);
 
 
 
 
 
 
 
 
 
 
1051}