Linux Audio

Check our new training course

Loading...
v6.2
   1// SPDX-License-Identifier: GPL-2.0+
   2/*
   3 * Copyright (C) 2007 Alan Stern
   4 * Copyright (C) IBM Corporation, 2009
   5 * Copyright (C) 2009, Frederic Weisbecker <fweisbec@gmail.com>
   6 *
   7 * Thanks to Ingo Molnar for his many suggestions.
   8 *
   9 * Authors: Alan Stern <stern@rowland.harvard.edu>
  10 *          K.Prasad <prasad@linux.vnet.ibm.com>
  11 *          Frederic Weisbecker <fweisbec@gmail.com>
  12 */
  13
  14/*
  15 * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility,
  16 * using the CPU's debug registers.
  17 * This file contains the arch-independent routines.
  18 */
  19
  20#include <linux/hw_breakpoint.h>
  21
  22#include <linux/atomic.h>
  23#include <linux/bug.h>
  24#include <linux/cpu.h>
  25#include <linux/export.h>
  26#include <linux/init.h>
  27#include <linux/irqflags.h>
 
 
 
  28#include <linux/kdebug.h>
  29#include <linux/kernel.h>
  30#include <linux/mutex.h>
  31#include <linux/notifier.h>
  32#include <linux/percpu-rwsem.h>
  33#include <linux/percpu.h>
  34#include <linux/rhashtable.h>
  35#include <linux/sched.h>
 
  36#include <linux/slab.h>
 
 
 
 
  37
 
  38/*
  39 * Datastructure to track the total uses of N slots across tasks or CPUs;
  40 * bp_slots_histogram::count[N] is the number of assigned N+1 breakpoint slots.
  41 */
  42struct bp_slots_histogram {
  43#ifdef hw_breakpoint_slots
  44	atomic_t count[hw_breakpoint_slots(0)];
  45#else
  46	atomic_t *count;
  47#endif
  48};
  49
  50/*
  51 * Per-CPU constraints data.
  52 */
  53struct bp_cpuinfo {
  54	/* Number of pinned CPU breakpoints in a CPU. */
  55	unsigned int			cpu_pinned;
  56	/* Histogram of pinned task breakpoints in a CPU. */
  57	struct bp_slots_histogram	tsk_pinned;
 
 
  58};
  59
  60static DEFINE_PER_CPU(struct bp_cpuinfo, bp_cpuinfo[TYPE_MAX]);
 
  61
  62static struct bp_cpuinfo *get_bp_info(int cpu, enum bp_type_idx type)
  63{
  64	return per_cpu_ptr(bp_cpuinfo + type, cpu);
  65}
  66
  67/* Number of pinned CPU breakpoints globally. */
  68static struct bp_slots_histogram cpu_pinned[TYPE_MAX];
  69/* Number of pinned CPU-independent task breakpoints. */
  70static struct bp_slots_histogram tsk_pinned_all[TYPE_MAX];
  71
  72/* Keep track of the breakpoints attached to tasks */
  73static struct rhltable task_bps_ht;
  74static const struct rhashtable_params task_bps_ht_params = {
  75	.head_offset = offsetof(struct hw_perf_event, bp_list),
  76	.key_offset = offsetof(struct hw_perf_event, target),
  77	.key_len = sizeof_field(struct hw_perf_event, target),
  78	.automatic_shrinking = true,
  79};
  80
  81static bool constraints_initialized __ro_after_init;
  82
  83/*
  84 * Synchronizes accesses to the per-CPU constraints; the locking rules are:
  85 *
  86 *  1. Atomic updates to bp_cpuinfo::tsk_pinned only require a held read-lock
  87 *     (due to bp_slots_histogram::count being atomic, no update are lost).
  88 *
  89 *  2. Holding a write-lock is required for computations that require a
  90 *     stable snapshot of all bp_cpuinfo::tsk_pinned.
  91 *
  92 *  3. In all other cases, non-atomic accesses require the appropriately held
  93 *     lock (read-lock for read-only accesses; write-lock for reads/writes).
  94 */
  95DEFINE_STATIC_PERCPU_RWSEM(bp_cpuinfo_sem);
  96
  97/*
  98 * Return mutex to serialize accesses to per-task lists in task_bps_ht. Since
  99 * rhltable synchronizes concurrent insertions/deletions, independent tasks may
 100 * insert/delete concurrently; therefore, a mutex per task is sufficient.
 101 *
 102 * Uses task_struct::perf_event_mutex, to avoid extending task_struct with a
 103 * hw_breakpoint-only mutex, which may be infrequently used. The caveat here is
 104 * that hw_breakpoint may contend with per-task perf event list management. The
 105 * assumption is that perf usecases involving hw_breakpoints are very unlikely
 106 * to result in unnecessary contention.
 107 */
 108static inline struct mutex *get_task_bps_mutex(struct perf_event *bp)
 109{
 110	struct task_struct *tsk = bp->hw.target;
 111
 112	return tsk ? &tsk->perf_event_mutex : NULL;
 113}
 114
 115static struct mutex *bp_constraints_lock(struct perf_event *bp)
 116{
 117	struct mutex *tsk_mtx = get_task_bps_mutex(bp);
 118
 119	if (tsk_mtx) {
 120		/*
 121		 * Fully analogous to the perf_try_init_event() nesting
 122		 * argument in the comment near perf_event_ctx_lock_nested();
 123		 * this child->perf_event_mutex cannot ever deadlock against
 124		 * the parent->perf_event_mutex usage from
 125		 * perf_event_task_{en,dis}able().
 126		 *
 127		 * Specifically, inherited events will never occur on
 128		 * ->perf_event_list.
 129		 */
 130		mutex_lock_nested(tsk_mtx, SINGLE_DEPTH_NESTING);
 131		percpu_down_read(&bp_cpuinfo_sem);
 132	} else {
 133		percpu_down_write(&bp_cpuinfo_sem);
 134	}
 135
 136	return tsk_mtx;
 137}
 138
 139static void bp_constraints_unlock(struct mutex *tsk_mtx)
 140{
 141	if (tsk_mtx) {
 142		percpu_up_read(&bp_cpuinfo_sem);
 143		mutex_unlock(tsk_mtx);
 144	} else {
 145		percpu_up_write(&bp_cpuinfo_sem);
 146	}
 147}
 148
 149static bool bp_constraints_is_locked(struct perf_event *bp)
 150{
 151	struct mutex *tsk_mtx = get_task_bps_mutex(bp);
 152
 153	return percpu_is_write_locked(&bp_cpuinfo_sem) ||
 154	       (tsk_mtx ? mutex_is_locked(tsk_mtx) :
 155			  percpu_is_read_locked(&bp_cpuinfo_sem));
 156}
 157
 158static inline void assert_bp_constraints_lock_held(struct perf_event *bp)
 159{
 160	struct mutex *tsk_mtx = get_task_bps_mutex(bp);
 161
 162	if (tsk_mtx)
 163		lockdep_assert_held(tsk_mtx);
 164	lockdep_assert_held(&bp_cpuinfo_sem);
 165}
 166
 167#ifdef hw_breakpoint_slots
 168/*
 169 * Number of breakpoint slots is constant, and the same for all types.
 170 */
 171static_assert(hw_breakpoint_slots(TYPE_INST) == hw_breakpoint_slots(TYPE_DATA));
 172static inline int hw_breakpoint_slots_cached(int type)	{ return hw_breakpoint_slots(type); }
 173static inline int init_breakpoint_slots(void)		{ return 0; }
 174#else
 175/*
 176 * Dynamic number of breakpoint slots.
 177 */
 178static int __nr_bp_slots[TYPE_MAX] __ro_after_init;
 179
 180static inline int hw_breakpoint_slots_cached(int type)
 181{
 182	return __nr_bp_slots[type];
 183}
 184
 185static __init bool
 186bp_slots_histogram_alloc(struct bp_slots_histogram *hist, enum bp_type_idx type)
 187{
 188	hist->count = kcalloc(hw_breakpoint_slots_cached(type), sizeof(*hist->count), GFP_KERNEL);
 189	return hist->count;
 190}
 191
 192static __init void bp_slots_histogram_free(struct bp_slots_histogram *hist)
 193{
 194	kfree(hist->count);
 195}
 196
 197static __init int init_breakpoint_slots(void)
 198{
 199	int i, cpu, err_cpu;
 200
 201	for (i = 0; i < TYPE_MAX; i++)
 202		__nr_bp_slots[i] = hw_breakpoint_slots(i);
 203
 204	for_each_possible_cpu(cpu) {
 205		for (i = 0; i < TYPE_MAX; i++) {
 206			struct bp_cpuinfo *info = get_bp_info(cpu, i);
 207
 208			if (!bp_slots_histogram_alloc(&info->tsk_pinned, i))
 209				goto err;
 210		}
 211	}
 212	for (i = 0; i < TYPE_MAX; i++) {
 213		if (!bp_slots_histogram_alloc(&cpu_pinned[i], i))
 214			goto err;
 215		if (!bp_slots_histogram_alloc(&tsk_pinned_all[i], i))
 216			goto err;
 217	}
 218
 219	return 0;
 220err:
 221	for_each_possible_cpu(err_cpu) {
 222		for (i = 0; i < TYPE_MAX; i++)
 223			bp_slots_histogram_free(&get_bp_info(err_cpu, i)->tsk_pinned);
 224		if (err_cpu == cpu)
 225			break;
 226	}
 227	for (i = 0; i < TYPE_MAX; i++) {
 228		bp_slots_histogram_free(&cpu_pinned[i]);
 229		bp_slots_histogram_free(&tsk_pinned_all[i]);
 230	}
 231
 232	return -ENOMEM;
 233}
 234#endif
 235
 236static inline void
 237bp_slots_histogram_add(struct bp_slots_histogram *hist, int old, int val)
 238{
 239	const int old_idx = old - 1;
 240	const int new_idx = old_idx + val;
 241
 242	if (old_idx >= 0)
 243		WARN_ON(atomic_dec_return_relaxed(&hist->count[old_idx]) < 0);
 244	if (new_idx >= 0)
 245		WARN_ON(atomic_inc_return_relaxed(&hist->count[new_idx]) < 0);
 246}
 247
 248static int
 249bp_slots_histogram_max(struct bp_slots_histogram *hist, enum bp_type_idx type)
 250{
 251	for (int i = hw_breakpoint_slots_cached(type) - 1; i >= 0; i--) {
 252		const int count = atomic_read(&hist->count[i]);
 253
 254		/* Catch unexpected writers; we want a stable snapshot. */
 255		ASSERT_EXCLUSIVE_WRITER(hist->count[i]);
 256		if (count > 0)
 257			return i + 1;
 258		WARN(count < 0, "inconsistent breakpoint slots histogram");
 259	}
 260
 261	return 0;
 262}
 263
 264static int
 265bp_slots_histogram_max_merge(struct bp_slots_histogram *hist1, struct bp_slots_histogram *hist2,
 266			     enum bp_type_idx type)
 267{
 268	for (int i = hw_breakpoint_slots_cached(type) - 1; i >= 0; i--) {
 269		const int count1 = atomic_read(&hist1->count[i]);
 270		const int count2 = atomic_read(&hist2->count[i]);
 271
 272		/* Catch unexpected writers; we want a stable snapshot. */
 273		ASSERT_EXCLUSIVE_WRITER(hist1->count[i]);
 274		ASSERT_EXCLUSIVE_WRITER(hist2->count[i]);
 275		if (count1 + count2 > 0)
 276			return i + 1;
 277		WARN(count1 < 0, "inconsistent breakpoint slots histogram");
 278		WARN(count2 < 0, "inconsistent breakpoint slots histogram");
 279	}
 280
 281	return 0;
 282}
 283
 284#ifndef hw_breakpoint_weight
 285static inline int hw_breakpoint_weight(struct perf_event *bp)
 286{
 287	return 1;
 288}
 289#endif
 290
 291static inline enum bp_type_idx find_slot_idx(u64 bp_type)
 292{
 293	if (bp_type & HW_BREAKPOINT_RW)
 294		return TYPE_DATA;
 295
 296	return TYPE_INST;
 297}
 298
 299/*
 300 * Return the maximum number of pinned breakpoints a task has in this CPU.
 
 301 */
 302static unsigned int max_task_bp_pinned(int cpu, enum bp_type_idx type)
 303{
 304	struct bp_slots_histogram *tsk_pinned = &get_bp_info(cpu, type)->tsk_pinned;
 
 305
 306	/*
 307	 * At this point we want to have acquired the bp_cpuinfo_sem as a
 308	 * writer to ensure that there are no concurrent writers in
 309	 * toggle_bp_task_slot() to tsk_pinned, and we get a stable snapshot.
 310	 */
 311	lockdep_assert_held_write(&bp_cpuinfo_sem);
 312	return bp_slots_histogram_max_merge(tsk_pinned, &tsk_pinned_all[type], type);
 313}
 314
 315/*
 316 * Count the number of breakpoints of the same type and same task.
 317 * The given event must be not on the list.
 318 *
 319 * If @cpu is -1, but the result of task_bp_pinned() is not CPU-independent,
 320 * returns a negative value.
 321 */
 322static int task_bp_pinned(int cpu, struct perf_event *bp, enum bp_type_idx type)
 323{
 324	struct rhlist_head *head, *pos;
 325	struct perf_event *iter;
 326	int count = 0;
 327
 328	/*
 329	 * We need a stable snapshot of the per-task breakpoint list.
 330	 */
 331	assert_bp_constraints_lock_held(bp);
 332
 333	rcu_read_lock();
 334	head = rhltable_lookup(&task_bps_ht, &bp->hw.target, task_bps_ht_params);
 335	if (!head)
 336		goto out;
 337
 338	rhl_for_each_entry_rcu(iter, pos, head, hw.bp_list) {
 339		if (find_slot_idx(iter->attr.bp_type) != type)
 340			continue;
 341
 342		if (iter->cpu >= 0) {
 343			if (cpu == -1) {
 344				count = -1;
 345				goto out;
 346			} else if (cpu != iter->cpu)
 347				continue;
 348		}
 349
 350		count += hw_breakpoint_weight(iter);
 351	}
 352
 353out:
 354	rcu_read_unlock();
 355	return count;
 356}
 357
 358static const struct cpumask *cpumask_of_bp(struct perf_event *bp)
 359{
 360	if (bp->cpu >= 0)
 361		return cpumask_of(bp->cpu);
 362	return cpu_possible_mask;
 363}
 364
 365/*
 366 * Returns the max pinned breakpoint slots in a given
 367 * CPU (cpu > -1) or across all of them (cpu = -1).
 368 */
 369static int
 370max_bp_pinned_slots(struct perf_event *bp, enum bp_type_idx type)
 
 371{
 372	const struct cpumask *cpumask = cpumask_of_bp(bp);
 373	int pinned_slots = 0;
 374	int cpu;
 375
 376	if (bp->hw.target && bp->cpu < 0) {
 377		int max_pinned = task_bp_pinned(-1, bp, type);
 378
 379		if (max_pinned >= 0) {
 380			/*
 381			 * Fast path: task_bp_pinned() is CPU-independent and
 382			 * returns the same value for any CPU.
 383			 */
 384			max_pinned += bp_slots_histogram_max(&cpu_pinned[type], type);
 385			return max_pinned;
 386		}
 387	}
 388
 389	for_each_cpu(cpu, cpumask) {
 390		struct bp_cpuinfo *info = get_bp_info(cpu, type);
 391		int nr;
 392
 393		nr = info->cpu_pinned;
 394		if (!bp->hw.target)
 395			nr += max_task_bp_pinned(cpu, type);
 396		else
 397			nr += task_bp_pinned(cpu, bp, type);
 398
 399		pinned_slots = max(nr, pinned_slots);
 
 
 
 
 
 400	}
 
 401
 402	return pinned_slots;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 403}
 404
 405/*
 406 * Add/remove the given breakpoint in our constraint table
 407 */
 408static int
 409toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type, int weight)
 
 410{
 411	int cpu, next_tsk_pinned;
 
 412
 413	if (!enable)
 414		weight = -weight;
 415
 
 416	if (!bp->hw.target) {
 417		/*
 418		 * Update the pinned CPU slots, in per-CPU bp_cpuinfo and in the
 419		 * global histogram.
 420		 */
 421		struct bp_cpuinfo *info = get_bp_info(bp->cpu, type);
 422
 423		lockdep_assert_held_write(&bp_cpuinfo_sem);
 424		bp_slots_histogram_add(&cpu_pinned[type], info->cpu_pinned, weight);
 425		info->cpu_pinned += weight;
 426		return 0;
 427	}
 428
 429	/*
 430	 * If bp->hw.target, tsk_pinned is only modified, but not used
 431	 * otherwise. We can permit concurrent updates as long as there are no
 432	 * other uses: having acquired bp_cpuinfo_sem as a reader allows
 433	 * concurrent updates here. Uses of tsk_pinned will require acquiring
 434	 * bp_cpuinfo_sem as a writer to stabilize tsk_pinned's value.
 435	 */
 436	lockdep_assert_held_read(&bp_cpuinfo_sem);
 437
 438	/*
 439	 * Update the pinned task slots, in per-CPU bp_cpuinfo and in the global
 440	 * histogram. We need to take care of 4 cases:
 441	 *
 442	 *  1. This breakpoint targets all CPUs (cpu < 0), and there may only
 443	 *     exist other task breakpoints targeting all CPUs. In this case we
 444	 *     can simply update the global slots histogram.
 445	 *
 446	 *  2. This breakpoint targets a specific CPU (cpu >= 0), but there may
 447	 *     only exist other task breakpoints targeting all CPUs.
 448	 *
 449	 *     a. On enable: remove the existing breakpoints from the global
 450	 *        slots histogram and use the per-CPU histogram.
 451	 *
 452	 *     b. On disable: re-insert the existing breakpoints into the global
 453	 *        slots histogram and remove from per-CPU histogram.
 454	 *
 455	 *  3. Some other existing task breakpoints target specific CPUs. Only
 456	 *     update the per-CPU slots histogram.
 457	 */
 458
 459	if (!enable) {
 460		/*
 461		 * Remove before updating histograms so we can determine if this
 462		 * was the last task breakpoint for a specific CPU.
 463		 */
 464		int ret = rhltable_remove(&task_bps_ht, &bp->hw.bp_list, task_bps_ht_params);
 465
 466		if (ret)
 467			return ret;
 468	}
 469	/*
 470	 * Note: If !enable, next_tsk_pinned will not count the to-be-removed breakpoint.
 471	 */
 472	next_tsk_pinned = task_bp_pinned(-1, bp, type);
 473
 474	if (next_tsk_pinned >= 0) {
 475		if (bp->cpu < 0) { /* Case 1: fast path */
 476			if (!enable)
 477				next_tsk_pinned += hw_breakpoint_weight(bp);
 478			bp_slots_histogram_add(&tsk_pinned_all[type], next_tsk_pinned, weight);
 479		} else if (enable) { /* Case 2.a: slow path */
 480			/* Add existing to per-CPU histograms. */
 481			for_each_possible_cpu(cpu) {
 482				bp_slots_histogram_add(&get_bp_info(cpu, type)->tsk_pinned,
 483						       0, next_tsk_pinned);
 484			}
 485			/* Add this first CPU-pinned task breakpoint. */
 486			bp_slots_histogram_add(&get_bp_info(bp->cpu, type)->tsk_pinned,
 487					       next_tsk_pinned, weight);
 488			/* Rebalance global task pinned histogram. */
 489			bp_slots_histogram_add(&tsk_pinned_all[type], next_tsk_pinned,
 490					       -next_tsk_pinned);
 491		} else { /* Case 2.b: slow path */
 492			/* Remove this last CPU-pinned task breakpoint. */
 493			bp_slots_histogram_add(&get_bp_info(bp->cpu, type)->tsk_pinned,
 494					       next_tsk_pinned + hw_breakpoint_weight(bp), weight);
 495			/* Remove all from per-CPU histograms. */
 496			for_each_possible_cpu(cpu) {
 497				bp_slots_histogram_add(&get_bp_info(cpu, type)->tsk_pinned,
 498						       next_tsk_pinned, -next_tsk_pinned);
 499			}
 500			/* Rebalance global task pinned histogram. */
 501			bp_slots_histogram_add(&tsk_pinned_all[type], 0, next_tsk_pinned);
 502		}
 503	} else { /* Case 3: slow path */
 504		const struct cpumask *cpumask = cpumask_of_bp(bp);
 505
 506		for_each_cpu(cpu, cpumask) {
 507			next_tsk_pinned = task_bp_pinned(cpu, bp, type);
 508			if (!enable)
 509				next_tsk_pinned += hw_breakpoint_weight(bp);
 510			bp_slots_histogram_add(&get_bp_info(cpu, type)->tsk_pinned,
 511					       next_tsk_pinned, weight);
 512		}
 513	}
 514
 515	/*
 516	 * Readers want a stable snapshot of the per-task breakpoint list.
 517	 */
 518	assert_bp_constraints_lock_held(bp);
 519
 520	if (enable)
 521		return rhltable_insert(&task_bps_ht, &bp->hw.bp_list, task_bps_ht_params);
 522
 523	return 0;
 524}
 525
 526__weak int arch_reserve_bp_slot(struct perf_event *bp)
 527{
 528	return 0;
 529}
 530
 531__weak void arch_release_bp_slot(struct perf_event *bp)
 532{
 533}
 534
 535/*
 536 * Function to perform processor-specific cleanup during unregistration
 537 */
 538__weak void arch_unregister_hw_breakpoint(struct perf_event *bp)
 539{
 540	/*
 541	 * A weak stub function here for those archs that don't define
 542	 * it inside arch/.../kernel/hw_breakpoint.c
 543	 */
 544}
 545
 546/*
 547 * Constraints to check before allowing this new breakpoint counter.
 548 *
 549 * Note: Flexible breakpoints are currently unimplemented, but outlined in the
 550 * below algorithm for completeness.  The implementation treats flexible as
 551 * pinned due to no guarantee that we currently always schedule flexible events
 552 * before a pinned event in a same CPU.
 553 *
 554 *  == Non-pinned counter == (Considered as pinned for now)
 555 *
 556 *   - If attached to a single cpu, check:
 557 *
 558 *       (per_cpu(info->flexible, cpu) || (per_cpu(info->cpu_pinned, cpu)
 559 *           + max(per_cpu(info->tsk_pinned, cpu)))) < HBP_NUM
 560 *
 561 *       -> If there are already non-pinned counters in this cpu, it means
 562 *          there is already a free slot for them.
 563 *          Otherwise, we check that the maximum number of per task
 564 *          breakpoints (for this cpu) plus the number of per cpu breakpoint
 565 *          (for this cpu) doesn't cover every registers.
 566 *
 567 *   - If attached to every cpus, check:
 568 *
 569 *       (per_cpu(info->flexible, *) || (max(per_cpu(info->cpu_pinned, *))
 570 *           + max(per_cpu(info->tsk_pinned, *)))) < HBP_NUM
 571 *
 572 *       -> This is roughly the same, except we check the number of per cpu
 573 *          bp for every cpu and we keep the max one. Same for the per tasks
 574 *          breakpoints.
 575 *
 576 *
 577 * == Pinned counter ==
 578 *
 579 *   - If attached to a single cpu, check:
 580 *
 581 *       ((per_cpu(info->flexible, cpu) > 1) + per_cpu(info->cpu_pinned, cpu)
 582 *            + max(per_cpu(info->tsk_pinned, cpu))) < HBP_NUM
 583 *
 584 *       -> Same checks as before. But now the info->flexible, if any, must keep
 585 *          one register at least (or they will never be fed).
 586 *
 587 *   - If attached to every cpus, check:
 588 *
 589 *       ((per_cpu(info->flexible, *) > 1) + max(per_cpu(info->cpu_pinned, *))
 590 *            + max(per_cpu(info->tsk_pinned, *))) < HBP_NUM
 591 */
 592static int __reserve_bp_slot(struct perf_event *bp, u64 bp_type)
 593{
 
 594	enum bp_type_idx type;
 595	int max_pinned_slots;
 596	int weight;
 597	int ret;
 598
 599	/* We couldn't initialize breakpoint constraints on boot */
 600	if (!constraints_initialized)
 601		return -ENOMEM;
 602
 603	/* Basic checks */
 604	if (bp_type == HW_BREAKPOINT_EMPTY ||
 605	    bp_type == HW_BREAKPOINT_INVALID)
 606		return -EINVAL;
 607
 608	type = find_slot_idx(bp_type);
 609	weight = hw_breakpoint_weight(bp);
 610
 611	/* Check if this new breakpoint can be satisfied across all CPUs. */
 612	max_pinned_slots = max_bp_pinned_slots(bp, type) + weight;
 613	if (max_pinned_slots > hw_breakpoint_slots_cached(type))
 
 
 
 
 
 
 614		return -ENOSPC;
 615
 616	ret = arch_reserve_bp_slot(bp);
 617	if (ret)
 618		return ret;
 619
 620	return toggle_bp_slot(bp, true, type, weight);
 
 
 621}
 622
 623int reserve_bp_slot(struct perf_event *bp)
 624{
 625	struct mutex *mtx = bp_constraints_lock(bp);
 626	int ret = __reserve_bp_slot(bp, bp->attr.bp_type);
 
 
 
 
 
 627
 628	bp_constraints_unlock(mtx);
 629	return ret;
 630}
 631
 632static void __release_bp_slot(struct perf_event *bp, u64 bp_type)
 633{
 634	enum bp_type_idx type;
 635	int weight;
 636
 637	arch_release_bp_slot(bp);
 638
 639	type = find_slot_idx(bp_type);
 640	weight = hw_breakpoint_weight(bp);
 641	WARN_ON(toggle_bp_slot(bp, false, type, weight));
 642}
 643
 644void release_bp_slot(struct perf_event *bp)
 645{
 646	struct mutex *mtx = bp_constraints_lock(bp);
 647
 648	arch_unregister_hw_breakpoint(bp);
 649	__release_bp_slot(bp, bp->attr.bp_type);
 650	bp_constraints_unlock(mtx);
 
 651}
 652
 653static int __modify_bp_slot(struct perf_event *bp, u64 old_type, u64 new_type)
 654{
 655	int err;
 656
 657	__release_bp_slot(bp, old_type);
 658
 659	err = __reserve_bp_slot(bp, new_type);
 660	if (err) {
 661		/*
 662		 * Reserve the old_type slot back in case
 663		 * there's no space for the new type.
 664		 *
 665		 * This must succeed, because we just released
 666		 * the old_type slot in the __release_bp_slot
 667		 * call above. If not, something is broken.
 668		 */
 669		WARN_ON(__reserve_bp_slot(bp, old_type));
 670	}
 671
 672	return err;
 673}
 674
 675static int modify_bp_slot(struct perf_event *bp, u64 old_type, u64 new_type)
 676{
 677	struct mutex *mtx = bp_constraints_lock(bp);
 678	int ret = __modify_bp_slot(bp, old_type, new_type);
 679
 680	bp_constraints_unlock(mtx);
 
 
 681	return ret;
 682}
 683
 684/*
 685 * Allow the kernel debugger to reserve breakpoint slots without
 686 * taking a lock using the dbg_* variant of for the reserve and
 687 * release breakpoint slots.
 688 */
 689int dbg_reserve_bp_slot(struct perf_event *bp)
 690{
 691	int ret;
 692
 693	if (bp_constraints_is_locked(bp))
 694		return -1;
 695
 696	/* Locks aren't held; disable lockdep assert checking. */
 697	lockdep_off();
 698	ret = __reserve_bp_slot(bp, bp->attr.bp_type);
 699	lockdep_on();
 700
 701	return ret;
 702}
 703
 704int dbg_release_bp_slot(struct perf_event *bp)
 705{
 706	if (bp_constraints_is_locked(bp))
 707		return -1;
 708
 709	/* Locks aren't held; disable lockdep assert checking. */
 710	lockdep_off();
 711	__release_bp_slot(bp, bp->attr.bp_type);
 712	lockdep_on();
 713
 714	return 0;
 715}
 716
 717static int hw_breakpoint_parse(struct perf_event *bp,
 718			       const struct perf_event_attr *attr,
 719			       struct arch_hw_breakpoint *hw)
 720{
 721	int err;
 722
 723	err = hw_breakpoint_arch_parse(bp, attr, hw);
 724	if (err)
 725		return err;
 726
 727	if (arch_check_bp_in_kernelspace(hw)) {
 728		if (attr->exclude_kernel)
 729			return -EINVAL;
 730		/*
 731		 * Don't let unprivileged users set a breakpoint in the trap
 732		 * path to avoid trap recursion attacks.
 733		 */
 734		if (!capable(CAP_SYS_ADMIN))
 735			return -EPERM;
 736	}
 737
 738	return 0;
 739}
 740
 741int register_perf_hw_breakpoint(struct perf_event *bp)
 742{
 743	struct arch_hw_breakpoint hw = { };
 744	int err;
 745
 746	err = reserve_bp_slot(bp);
 747	if (err)
 748		return err;
 749
 750	err = hw_breakpoint_parse(bp, &bp->attr, &hw);
 751	if (err) {
 752		release_bp_slot(bp);
 753		return err;
 754	}
 755
 756	bp->hw.info = hw;
 757
 758	return 0;
 759}
 760
 761/**
 762 * register_user_hw_breakpoint - register a hardware breakpoint for user space
 763 * @attr: breakpoint attributes
 764 * @triggered: callback to trigger when we hit the breakpoint
 765 * @context: context data could be used in the triggered callback
 766 * @tsk: pointer to 'task_struct' of the process to which the address belongs
 767 */
 768struct perf_event *
 769register_user_hw_breakpoint(struct perf_event_attr *attr,
 770			    perf_overflow_handler_t triggered,
 771			    void *context,
 772			    struct task_struct *tsk)
 773{
 774	return perf_event_create_kernel_counter(attr, -1, tsk, triggered,
 775						context);
 776}
 777EXPORT_SYMBOL_GPL(register_user_hw_breakpoint);
 778
 779static void hw_breakpoint_copy_attr(struct perf_event_attr *to,
 780				    struct perf_event_attr *from)
 781{
 782	to->bp_addr = from->bp_addr;
 783	to->bp_type = from->bp_type;
 784	to->bp_len  = from->bp_len;
 785	to->disabled = from->disabled;
 786}
 787
 788int
 789modify_user_hw_breakpoint_check(struct perf_event *bp, struct perf_event_attr *attr,
 790			        bool check)
 791{
 792	struct arch_hw_breakpoint hw = { };
 793	int err;
 794
 795	err = hw_breakpoint_parse(bp, attr, &hw);
 796	if (err)
 797		return err;
 798
 799	if (check) {
 800		struct perf_event_attr old_attr;
 801
 802		old_attr = bp->attr;
 803		hw_breakpoint_copy_attr(&old_attr, attr);
 804		if (memcmp(&old_attr, attr, sizeof(*attr)))
 805			return -EINVAL;
 806	}
 807
 808	if (bp->attr.bp_type != attr->bp_type) {
 809		err = modify_bp_slot(bp, bp->attr.bp_type, attr->bp_type);
 810		if (err)
 811			return err;
 812	}
 813
 814	hw_breakpoint_copy_attr(&bp->attr, attr);
 815	bp->hw.info = hw;
 816
 817	return 0;
 818}
 819
 820/**
 821 * modify_user_hw_breakpoint - modify a user-space hardware breakpoint
 822 * @bp: the breakpoint structure to modify
 823 * @attr: new breakpoint attributes
 824 */
 825int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr)
 826{
 827	int err;
 828
 829	/*
 830	 * modify_user_hw_breakpoint can be invoked with IRQs disabled and hence it
 831	 * will not be possible to raise IPIs that invoke __perf_event_disable.
 832	 * So call the function directly after making sure we are targeting the
 833	 * current task.
 834	 */
 835	if (irqs_disabled() && bp->ctx && bp->ctx->task == current)
 836		perf_event_disable_local(bp);
 837	else
 838		perf_event_disable(bp);
 839
 840	err = modify_user_hw_breakpoint_check(bp, attr, false);
 841
 842	if (!bp->attr.disabled)
 843		perf_event_enable(bp);
 844
 845	return err;
 846}
 847EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint);
 848
 849/**
 850 * unregister_hw_breakpoint - unregister a user-space hardware breakpoint
 851 * @bp: the breakpoint structure to unregister
 852 */
 853void unregister_hw_breakpoint(struct perf_event *bp)
 854{
 855	if (!bp)
 856		return;
 857	perf_event_release_kernel(bp);
 858}
 859EXPORT_SYMBOL_GPL(unregister_hw_breakpoint);
 860
 861/**
 862 * register_wide_hw_breakpoint - register a wide breakpoint in the kernel
 863 * @attr: breakpoint attributes
 864 * @triggered: callback to trigger when we hit the breakpoint
 865 * @context: context data could be used in the triggered callback
 866 *
 867 * @return a set of per_cpu pointers to perf events
 868 */
 869struct perf_event * __percpu *
 870register_wide_hw_breakpoint(struct perf_event_attr *attr,
 871			    perf_overflow_handler_t triggered,
 872			    void *context)
 873{
 874	struct perf_event * __percpu *cpu_events, *bp;
 875	long err = 0;
 876	int cpu;
 877
 878	cpu_events = alloc_percpu(typeof(*cpu_events));
 879	if (!cpu_events)
 880		return (void __percpu __force *)ERR_PTR(-ENOMEM);
 881
 882	cpus_read_lock();
 883	for_each_online_cpu(cpu) {
 884		bp = perf_event_create_kernel_counter(attr, cpu, NULL,
 885						      triggered, context);
 886		if (IS_ERR(bp)) {
 887			err = PTR_ERR(bp);
 888			break;
 889		}
 890
 891		per_cpu(*cpu_events, cpu) = bp;
 892	}
 893	cpus_read_unlock();
 894
 895	if (likely(!err))
 896		return cpu_events;
 897
 898	unregister_wide_hw_breakpoint(cpu_events);
 899	return (void __percpu __force *)ERR_PTR(err);
 900}
 901EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint);
 902
 903/**
 904 * unregister_wide_hw_breakpoint - unregister a wide breakpoint in the kernel
 905 * @cpu_events: the per cpu set of events to unregister
 906 */
 907void unregister_wide_hw_breakpoint(struct perf_event * __percpu *cpu_events)
 908{
 909	int cpu;
 910
 911	for_each_possible_cpu(cpu)
 912		unregister_hw_breakpoint(per_cpu(*cpu_events, cpu));
 913
 914	free_percpu(cpu_events);
 915}
 916EXPORT_SYMBOL_GPL(unregister_wide_hw_breakpoint);
 917
 918/**
 919 * hw_breakpoint_is_used - check if breakpoints are currently used
 920 *
 921 * Returns: true if breakpoints are used, false otherwise.
 922 */
 923bool hw_breakpoint_is_used(void)
 924{
 925	int cpu;
 926
 927	if (!constraints_initialized)
 928		return false;
 929
 930	for_each_possible_cpu(cpu) {
 931		for (int type = 0; type < TYPE_MAX; ++type) {
 932			struct bp_cpuinfo *info = get_bp_info(cpu, type);
 933
 934			if (info->cpu_pinned)
 935				return true;
 936
 937			for (int slot = 0; slot < hw_breakpoint_slots_cached(type); ++slot) {
 938				if (atomic_read(&info->tsk_pinned.count[slot]))
 939					return true;
 940			}
 941		}
 942	}
 943
 944	for (int type = 0; type < TYPE_MAX; ++type) {
 945		for (int slot = 0; slot < hw_breakpoint_slots_cached(type); ++slot) {
 946			/*
 947			 * Warn, because if there are CPU pinned counters,
 948			 * should never get here; bp_cpuinfo::cpu_pinned should
 949			 * be consistent with the global cpu_pinned histogram.
 950			 */
 951			if (WARN_ON(atomic_read(&cpu_pinned[type].count[slot])))
 952				return true;
 953
 954			if (atomic_read(&tsk_pinned_all[type].count[slot]))
 955				return true;
 956		}
 957	}
 958
 959	return false;
 960}
 961
 962static struct notifier_block hw_breakpoint_exceptions_nb = {
 963	.notifier_call = hw_breakpoint_exceptions_notify,
 964	/* we need to be notified first */
 965	.priority = 0x7fffffff
 966};
 967
 968static void bp_perf_event_destroy(struct perf_event *event)
 969{
 970	release_bp_slot(event);
 971}
 972
 973static int hw_breakpoint_event_init(struct perf_event *bp)
 974{
 975	int err;
 976
 977	if (bp->attr.type != PERF_TYPE_BREAKPOINT)
 978		return -ENOENT;
 979
 980	/*
 981	 * no branch sampling for breakpoint events
 982	 */
 983	if (has_branch_stack(bp))
 984		return -EOPNOTSUPP;
 985
 986	err = register_perf_hw_breakpoint(bp);
 987	if (err)
 988		return err;
 989
 990	bp->destroy = bp_perf_event_destroy;
 991
 992	return 0;
 993}
 994
 995static int hw_breakpoint_add(struct perf_event *bp, int flags)
 996{
 997	if (!(flags & PERF_EF_START))
 998		bp->hw.state = PERF_HES_STOPPED;
 999
1000	if (is_sampling_event(bp)) {
1001		bp->hw.last_period = bp->hw.sample_period;
1002		perf_swevent_set_period(bp);
1003	}
1004
1005	return arch_install_hw_breakpoint(bp);
1006}
1007
1008static void hw_breakpoint_del(struct perf_event *bp, int flags)
1009{
1010	arch_uninstall_hw_breakpoint(bp);
1011}
1012
1013static void hw_breakpoint_start(struct perf_event *bp, int flags)
1014{
1015	bp->hw.state = 0;
1016}
1017
1018static void hw_breakpoint_stop(struct perf_event *bp, int flags)
1019{
1020	bp->hw.state = PERF_HES_STOPPED;
1021}
1022
1023static struct pmu perf_breakpoint = {
1024	.task_ctx_nr	= perf_sw_context, /* could eventually get its own */
1025
1026	.event_init	= hw_breakpoint_event_init,
1027	.add		= hw_breakpoint_add,
1028	.del		= hw_breakpoint_del,
1029	.start		= hw_breakpoint_start,
1030	.stop		= hw_breakpoint_stop,
1031	.read		= hw_breakpoint_pmu_read,
1032};
1033
1034int __init init_hw_breakpoint(void)
1035{
1036	int ret;
 
1037
1038	ret = rhltable_init(&task_bps_ht, &task_bps_ht_params);
1039	if (ret)
1040		return ret;
1041
1042	ret = init_breakpoint_slots();
1043	if (ret)
1044		return ret;
 
 
 
 
 
 
 
1045
1046	constraints_initialized = true;
1047
1048	perf_pmu_register(&perf_breakpoint, "breakpoint", PERF_TYPE_BREAKPOINT);
1049
1050	return register_die_notifier(&hw_breakpoint_exceptions_nb);
 
 
 
 
 
 
 
 
 
 
1051}
v5.9
  1// SPDX-License-Identifier: GPL-2.0+
  2/*
  3 * Copyright (C) 2007 Alan Stern
  4 * Copyright (C) IBM Corporation, 2009
  5 * Copyright (C) 2009, Frederic Weisbecker <fweisbec@gmail.com>
  6 *
  7 * Thanks to Ingo Molnar for his many suggestions.
  8 *
  9 * Authors: Alan Stern <stern@rowland.harvard.edu>
 10 *          K.Prasad <prasad@linux.vnet.ibm.com>
 11 *          Frederic Weisbecker <fweisbec@gmail.com>
 12 */
 13
 14/*
 15 * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility,
 16 * using the CPU's debug registers.
 17 * This file contains the arch-independent routines.
 18 */
 19
 
 
 
 
 
 
 
 20#include <linux/irqflags.h>
 21#include <linux/kallsyms.h>
 22#include <linux/notifier.h>
 23#include <linux/kprobes.h>
 24#include <linux/kdebug.h>
 25#include <linux/kernel.h>
 26#include <linux/module.h>
 
 
 27#include <linux/percpu.h>
 
 28#include <linux/sched.h>
 29#include <linux/init.h>
 30#include <linux/slab.h>
 31#include <linux/list.h>
 32#include <linux/cpu.h>
 33#include <linux/smp.h>
 34#include <linux/bug.h>
 35
 36#include <linux/hw_breakpoint.h>
 37/*
 38 * Constraints data
 
 
 
 
 
 
 
 
 
 
 
 
 39 */
 40struct bp_cpuinfo {
 41	/* Number of pinned cpu breakpoints in a cpu */
 42	unsigned int	cpu_pinned;
 43	/* tsk_pinned[n] is the number of tasks having n+1 breakpoints */
 44	unsigned int	*tsk_pinned;
 45	/* Number of non-pinned cpu/task breakpoints in a cpu */
 46	unsigned int	flexible; /* XXX: placeholder, see fetch_this_slot() */
 47};
 48
 49static DEFINE_PER_CPU(struct bp_cpuinfo, bp_cpuinfo[TYPE_MAX]);
 50static int nr_slots[TYPE_MAX];
 51
 52static struct bp_cpuinfo *get_bp_info(int cpu, enum bp_type_idx type)
 53{
 54	return per_cpu_ptr(bp_cpuinfo + type, cpu);
 55}
 56
 
 
 
 
 
 57/* Keep track of the breakpoints attached to tasks */
 58static LIST_HEAD(bp_task_head);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 59
 60static int constraints_initialized;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 61
 62/* Gather the number of total pinned and un-pinned bp in a cpuset */
 63struct bp_busy_slots {
 64	unsigned int pinned;
 65	unsigned int flexible;
 66};
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 67
 68/* Serialize accesses to the above constraints */
 69static DEFINE_MUTEX(nr_bp_mutex);
 70
 71__weak int hw_breakpoint_weight(struct perf_event *bp)
 
 72{
 73	return 1;
 74}
 
 75
 76static inline enum bp_type_idx find_slot_idx(u64 bp_type)
 77{
 78	if (bp_type & HW_BREAKPOINT_RW)
 79		return TYPE_DATA;
 80
 81	return TYPE_INST;
 82}
 83
 84/*
 85 * Report the maximum number of pinned breakpoints a task
 86 * have in this cpu
 87 */
 88static unsigned int max_task_bp_pinned(int cpu, enum bp_type_idx type)
 89{
 90	unsigned int *tsk_pinned = get_bp_info(cpu, type)->tsk_pinned;
 91	int i;
 92
 93	for (i = nr_slots[type] - 1; i >= 0; i--) {
 94		if (tsk_pinned[i] > 0)
 95			return i + 1;
 96	}
 97
 98	return 0;
 
 99}
100
101/*
102 * Count the number of breakpoints of the same type and same task.
103 * The given event must be not on the list.
 
 
 
104 */
105static int task_bp_pinned(int cpu, struct perf_event *bp, enum bp_type_idx type)
106{
107	struct task_struct *tsk = bp->hw.target;
108	struct perf_event *iter;
109	int count = 0;
110
111	list_for_each_entry(iter, &bp_task_head, hw.bp_list) {
112		if (iter->hw.target == tsk &&
113		    find_slot_idx(iter->attr.bp_type) == type &&
114		    (iter->cpu < 0 || cpu == iter->cpu))
115			count += hw_breakpoint_weight(iter);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116	}
117
 
 
118	return count;
119}
120
121static const struct cpumask *cpumask_of_bp(struct perf_event *bp)
122{
123	if (bp->cpu >= 0)
124		return cpumask_of(bp->cpu);
125	return cpu_possible_mask;
126}
127
128/*
129 * Report the number of pinned/un-pinned breakpoints we have in
130 * a given cpu (cpu > -1) or in all of them (cpu = -1).
131 */
132static void
133fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp,
134		    enum bp_type_idx type)
135{
136	const struct cpumask *cpumask = cpumask_of_bp(bp);
 
137	int cpu;
138
 
 
 
 
 
 
 
 
 
 
 
 
 
139	for_each_cpu(cpu, cpumask) {
140		struct bp_cpuinfo *info = get_bp_info(cpu, type);
141		int nr;
142
143		nr = info->cpu_pinned;
144		if (!bp->hw.target)
145			nr += max_task_bp_pinned(cpu, type);
146		else
147			nr += task_bp_pinned(cpu, bp, type);
148
149		if (nr > slots->pinned)
150			slots->pinned = nr;
151
152		nr = info->flexible;
153		if (nr > slots->flexible)
154			slots->flexible = nr;
155	}
156}
157
158/*
159 * For now, continue to consider flexible as pinned, until we can
160 * ensure no flexible event can ever be scheduled before a pinned event
161 * in a same cpu.
162 */
163static void
164fetch_this_slot(struct bp_busy_slots *slots, int weight)
165{
166	slots->pinned += weight;
167}
168
169/*
170 * Add a pinned breakpoint for the given task in our constraint table
171 */
172static void toggle_bp_task_slot(struct perf_event *bp, int cpu,
173				enum bp_type_idx type, int weight)
174{
175	unsigned int *tsk_pinned = get_bp_info(cpu, type)->tsk_pinned;
176	int old_idx, new_idx;
177
178	old_idx = task_bp_pinned(cpu, bp, type) - 1;
179	new_idx = old_idx + weight;
180
181	if (old_idx >= 0)
182		tsk_pinned[old_idx]--;
183	if (new_idx >= 0)
184		tsk_pinned[new_idx]++;
185}
186
187/*
188 * Add/remove the given breakpoint in our constraint table
189 */
190static void
191toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type,
192	       int weight)
193{
194	const struct cpumask *cpumask = cpumask_of_bp(bp);
195	int cpu;
196
197	if (!enable)
198		weight = -weight;
199
200	/* Pinned counter cpu profiling */
201	if (!bp->hw.target) {
202		get_bp_info(bp->cpu, type)->cpu_pinned += weight;
203		return;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
204	}
205
206	/* Pinned counter task profiling */
207	for_each_cpu(cpu, cpumask)
208		toggle_bp_task_slot(bp, cpu, type, weight);
 
209
210	if (enable)
211		list_add_tail(&bp->hw.bp_list, &bp_task_head);
212	else
213		list_del(&bp->hw.bp_list);
214}
215
216__weak int arch_reserve_bp_slot(struct perf_event *bp)
217{
218	return 0;
219}
220
221__weak void arch_release_bp_slot(struct perf_event *bp)
222{
223}
224
225/*
226 * Function to perform processor-specific cleanup during unregistration
227 */
228__weak void arch_unregister_hw_breakpoint(struct perf_event *bp)
229{
230	/*
231	 * A weak stub function here for those archs that don't define
232	 * it inside arch/.../kernel/hw_breakpoint.c
233	 */
234}
235
236/*
237 * Constraints to check before allowing this new breakpoint counter:
 
 
 
 
 
238 *
239 *  == Non-pinned counter == (Considered as pinned for now)
240 *
241 *   - If attached to a single cpu, check:
242 *
243 *       (per_cpu(info->flexible, cpu) || (per_cpu(info->cpu_pinned, cpu)
244 *           + max(per_cpu(info->tsk_pinned, cpu)))) < HBP_NUM
245 *
246 *       -> If there are already non-pinned counters in this cpu, it means
247 *          there is already a free slot for them.
248 *          Otherwise, we check that the maximum number of per task
249 *          breakpoints (for this cpu) plus the number of per cpu breakpoint
250 *          (for this cpu) doesn't cover every registers.
251 *
252 *   - If attached to every cpus, check:
253 *
254 *       (per_cpu(info->flexible, *) || (max(per_cpu(info->cpu_pinned, *))
255 *           + max(per_cpu(info->tsk_pinned, *)))) < HBP_NUM
256 *
257 *       -> This is roughly the same, except we check the number of per cpu
258 *          bp for every cpu and we keep the max one. Same for the per tasks
259 *          breakpoints.
260 *
261 *
262 * == Pinned counter ==
263 *
264 *   - If attached to a single cpu, check:
265 *
266 *       ((per_cpu(info->flexible, cpu) > 1) + per_cpu(info->cpu_pinned, cpu)
267 *            + max(per_cpu(info->tsk_pinned, cpu))) < HBP_NUM
268 *
269 *       -> Same checks as before. But now the info->flexible, if any, must keep
270 *          one register at least (or they will never be fed).
271 *
272 *   - If attached to every cpus, check:
273 *
274 *       ((per_cpu(info->flexible, *) > 1) + max(per_cpu(info->cpu_pinned, *))
275 *            + max(per_cpu(info->tsk_pinned, *))) < HBP_NUM
276 */
277static int __reserve_bp_slot(struct perf_event *bp, u64 bp_type)
278{
279	struct bp_busy_slots slots = {0};
280	enum bp_type_idx type;
 
281	int weight;
282	int ret;
283
284	/* We couldn't initialize breakpoint constraints on boot */
285	if (!constraints_initialized)
286		return -ENOMEM;
287
288	/* Basic checks */
289	if (bp_type == HW_BREAKPOINT_EMPTY ||
290	    bp_type == HW_BREAKPOINT_INVALID)
291		return -EINVAL;
292
293	type = find_slot_idx(bp_type);
294	weight = hw_breakpoint_weight(bp);
295
296	fetch_bp_busy_slots(&slots, bp, type);
297	/*
298	 * Simulate the addition of this breakpoint to the constraints
299	 * and see the result.
300	 */
301	fetch_this_slot(&slots, weight);
302
303	/* Flexible counters need to keep at least one slot */
304	if (slots.pinned + (!!slots.flexible) > nr_slots[type])
305		return -ENOSPC;
306
307	ret = arch_reserve_bp_slot(bp);
308	if (ret)
309		return ret;
310
311	toggle_bp_slot(bp, true, type, weight);
312
313	return 0;
314}
315
316int reserve_bp_slot(struct perf_event *bp)
317{
318	int ret;
319
320	mutex_lock(&nr_bp_mutex);
321
322	ret = __reserve_bp_slot(bp, bp->attr.bp_type);
323
324	mutex_unlock(&nr_bp_mutex);
325
 
326	return ret;
327}
328
329static void __release_bp_slot(struct perf_event *bp, u64 bp_type)
330{
331	enum bp_type_idx type;
332	int weight;
333
334	arch_release_bp_slot(bp);
335
336	type = find_slot_idx(bp_type);
337	weight = hw_breakpoint_weight(bp);
338	toggle_bp_slot(bp, false, type, weight);
339}
340
341void release_bp_slot(struct perf_event *bp)
342{
343	mutex_lock(&nr_bp_mutex);
344
345	arch_unregister_hw_breakpoint(bp);
346	__release_bp_slot(bp, bp->attr.bp_type);
347
348	mutex_unlock(&nr_bp_mutex);
349}
350
351static int __modify_bp_slot(struct perf_event *bp, u64 old_type, u64 new_type)
352{
353	int err;
354
355	__release_bp_slot(bp, old_type);
356
357	err = __reserve_bp_slot(bp, new_type);
358	if (err) {
359		/*
360		 * Reserve the old_type slot back in case
361		 * there's no space for the new type.
362		 *
363		 * This must succeed, because we just released
364		 * the old_type slot in the __release_bp_slot
365		 * call above. If not, something is broken.
366		 */
367		WARN_ON(__reserve_bp_slot(bp, old_type));
368	}
369
370	return err;
371}
372
373static int modify_bp_slot(struct perf_event *bp, u64 old_type, u64 new_type)
374{
375	int ret;
 
376
377	mutex_lock(&nr_bp_mutex);
378	ret = __modify_bp_slot(bp, old_type, new_type);
379	mutex_unlock(&nr_bp_mutex);
380	return ret;
381}
382
383/*
384 * Allow the kernel debugger to reserve breakpoint slots without
385 * taking a lock using the dbg_* variant of for the reserve and
386 * release breakpoint slots.
387 */
388int dbg_reserve_bp_slot(struct perf_event *bp)
389{
390	if (mutex_is_locked(&nr_bp_mutex))
 
 
391		return -1;
392
393	return __reserve_bp_slot(bp, bp->attr.bp_type);
 
 
 
 
 
394}
395
396int dbg_release_bp_slot(struct perf_event *bp)
397{
398	if (mutex_is_locked(&nr_bp_mutex))
399		return -1;
400
 
 
401	__release_bp_slot(bp, bp->attr.bp_type);
 
402
403	return 0;
404}
405
406static int hw_breakpoint_parse(struct perf_event *bp,
407			       const struct perf_event_attr *attr,
408			       struct arch_hw_breakpoint *hw)
409{
410	int err;
411
412	err = hw_breakpoint_arch_parse(bp, attr, hw);
413	if (err)
414		return err;
415
416	if (arch_check_bp_in_kernelspace(hw)) {
417		if (attr->exclude_kernel)
418			return -EINVAL;
419		/*
420		 * Don't let unprivileged users set a breakpoint in the trap
421		 * path to avoid trap recursion attacks.
422		 */
423		if (!capable(CAP_SYS_ADMIN))
424			return -EPERM;
425	}
426
427	return 0;
428}
429
430int register_perf_hw_breakpoint(struct perf_event *bp)
431{
432	struct arch_hw_breakpoint hw = { };
433	int err;
434
435	err = reserve_bp_slot(bp);
436	if (err)
437		return err;
438
439	err = hw_breakpoint_parse(bp, &bp->attr, &hw);
440	if (err) {
441		release_bp_slot(bp);
442		return err;
443	}
444
445	bp->hw.info = hw;
446
447	return 0;
448}
449
450/**
451 * register_user_hw_breakpoint - register a hardware breakpoint for user space
452 * @attr: breakpoint attributes
453 * @triggered: callback to trigger when we hit the breakpoint
 
454 * @tsk: pointer to 'task_struct' of the process to which the address belongs
455 */
456struct perf_event *
457register_user_hw_breakpoint(struct perf_event_attr *attr,
458			    perf_overflow_handler_t triggered,
459			    void *context,
460			    struct task_struct *tsk)
461{
462	return perf_event_create_kernel_counter(attr, -1, tsk, triggered,
463						context);
464}
465EXPORT_SYMBOL_GPL(register_user_hw_breakpoint);
466
467static void hw_breakpoint_copy_attr(struct perf_event_attr *to,
468				    struct perf_event_attr *from)
469{
470	to->bp_addr = from->bp_addr;
471	to->bp_type = from->bp_type;
472	to->bp_len  = from->bp_len;
473	to->disabled = from->disabled;
474}
475
476int
477modify_user_hw_breakpoint_check(struct perf_event *bp, struct perf_event_attr *attr,
478			        bool check)
479{
480	struct arch_hw_breakpoint hw = { };
481	int err;
482
483	err = hw_breakpoint_parse(bp, attr, &hw);
484	if (err)
485		return err;
486
487	if (check) {
488		struct perf_event_attr old_attr;
489
490		old_attr = bp->attr;
491		hw_breakpoint_copy_attr(&old_attr, attr);
492		if (memcmp(&old_attr, attr, sizeof(*attr)))
493			return -EINVAL;
494	}
495
496	if (bp->attr.bp_type != attr->bp_type) {
497		err = modify_bp_slot(bp, bp->attr.bp_type, attr->bp_type);
498		if (err)
499			return err;
500	}
501
502	hw_breakpoint_copy_attr(&bp->attr, attr);
503	bp->hw.info = hw;
504
505	return 0;
506}
507
508/**
509 * modify_user_hw_breakpoint - modify a user-space hardware breakpoint
510 * @bp: the breakpoint structure to modify
511 * @attr: new breakpoint attributes
512 */
513int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr)
514{
515	int err;
516
517	/*
518	 * modify_user_hw_breakpoint can be invoked with IRQs disabled and hence it
519	 * will not be possible to raise IPIs that invoke __perf_event_disable.
520	 * So call the function directly after making sure we are targeting the
521	 * current task.
522	 */
523	if (irqs_disabled() && bp->ctx && bp->ctx->task == current)
524		perf_event_disable_local(bp);
525	else
526		perf_event_disable(bp);
527
528	err = modify_user_hw_breakpoint_check(bp, attr, false);
529
530	if (!bp->attr.disabled)
531		perf_event_enable(bp);
532
533	return err;
534}
535EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint);
536
537/**
538 * unregister_hw_breakpoint - unregister a user-space hardware breakpoint
539 * @bp: the breakpoint structure to unregister
540 */
541void unregister_hw_breakpoint(struct perf_event *bp)
542{
543	if (!bp)
544		return;
545	perf_event_release_kernel(bp);
546}
547EXPORT_SYMBOL_GPL(unregister_hw_breakpoint);
548
549/**
550 * register_wide_hw_breakpoint - register a wide breakpoint in the kernel
551 * @attr: breakpoint attributes
552 * @triggered: callback to trigger when we hit the breakpoint
 
553 *
554 * @return a set of per_cpu pointers to perf events
555 */
556struct perf_event * __percpu *
557register_wide_hw_breakpoint(struct perf_event_attr *attr,
558			    perf_overflow_handler_t triggered,
559			    void *context)
560{
561	struct perf_event * __percpu *cpu_events, *bp;
562	long err = 0;
563	int cpu;
564
565	cpu_events = alloc_percpu(typeof(*cpu_events));
566	if (!cpu_events)
567		return (void __percpu __force *)ERR_PTR(-ENOMEM);
568
569	get_online_cpus();
570	for_each_online_cpu(cpu) {
571		bp = perf_event_create_kernel_counter(attr, cpu, NULL,
572						      triggered, context);
573		if (IS_ERR(bp)) {
574			err = PTR_ERR(bp);
575			break;
576		}
577
578		per_cpu(*cpu_events, cpu) = bp;
579	}
580	put_online_cpus();
581
582	if (likely(!err))
583		return cpu_events;
584
585	unregister_wide_hw_breakpoint(cpu_events);
586	return (void __percpu __force *)ERR_PTR(err);
587}
588EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint);
589
590/**
591 * unregister_wide_hw_breakpoint - unregister a wide breakpoint in the kernel
592 * @cpu_events: the per cpu set of events to unregister
593 */
594void unregister_wide_hw_breakpoint(struct perf_event * __percpu *cpu_events)
595{
596	int cpu;
597
598	for_each_possible_cpu(cpu)
599		unregister_hw_breakpoint(per_cpu(*cpu_events, cpu));
600
601	free_percpu(cpu_events);
602}
603EXPORT_SYMBOL_GPL(unregister_wide_hw_breakpoint);
604
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
605static struct notifier_block hw_breakpoint_exceptions_nb = {
606	.notifier_call = hw_breakpoint_exceptions_notify,
607	/* we need to be notified first */
608	.priority = 0x7fffffff
609};
610
611static void bp_perf_event_destroy(struct perf_event *event)
612{
613	release_bp_slot(event);
614}
615
616static int hw_breakpoint_event_init(struct perf_event *bp)
617{
618	int err;
619
620	if (bp->attr.type != PERF_TYPE_BREAKPOINT)
621		return -ENOENT;
622
623	/*
624	 * no branch sampling for breakpoint events
625	 */
626	if (has_branch_stack(bp))
627		return -EOPNOTSUPP;
628
629	err = register_perf_hw_breakpoint(bp);
630	if (err)
631		return err;
632
633	bp->destroy = bp_perf_event_destroy;
634
635	return 0;
636}
637
638static int hw_breakpoint_add(struct perf_event *bp, int flags)
639{
640	if (!(flags & PERF_EF_START))
641		bp->hw.state = PERF_HES_STOPPED;
642
643	if (is_sampling_event(bp)) {
644		bp->hw.last_period = bp->hw.sample_period;
645		perf_swevent_set_period(bp);
646	}
647
648	return arch_install_hw_breakpoint(bp);
649}
650
651static void hw_breakpoint_del(struct perf_event *bp, int flags)
652{
653	arch_uninstall_hw_breakpoint(bp);
654}
655
656static void hw_breakpoint_start(struct perf_event *bp, int flags)
657{
658	bp->hw.state = 0;
659}
660
661static void hw_breakpoint_stop(struct perf_event *bp, int flags)
662{
663	bp->hw.state = PERF_HES_STOPPED;
664}
665
666static struct pmu perf_breakpoint = {
667	.task_ctx_nr	= perf_sw_context, /* could eventually get its own */
668
669	.event_init	= hw_breakpoint_event_init,
670	.add		= hw_breakpoint_add,
671	.del		= hw_breakpoint_del,
672	.start		= hw_breakpoint_start,
673	.stop		= hw_breakpoint_stop,
674	.read		= hw_breakpoint_pmu_read,
675};
676
677int __init init_hw_breakpoint(void)
678{
679	int cpu, err_cpu;
680	int i;
681
682	for (i = 0; i < TYPE_MAX; i++)
683		nr_slots[i] = hw_breakpoint_slots(i);
 
684
685	for_each_possible_cpu(cpu) {
686		for (i = 0; i < TYPE_MAX; i++) {
687			struct bp_cpuinfo *info = get_bp_info(cpu, i);
688
689			info->tsk_pinned = kcalloc(nr_slots[i], sizeof(int),
690							GFP_KERNEL);
691			if (!info->tsk_pinned)
692				goto err_alloc;
693		}
694	}
695
696	constraints_initialized = 1;
697
698	perf_pmu_register(&perf_breakpoint, "breakpoint", PERF_TYPE_BREAKPOINT);
699
700	return register_die_notifier(&hw_breakpoint_exceptions_nb);
701
702 err_alloc:
703	for_each_possible_cpu(err_cpu) {
704		for (i = 0; i < TYPE_MAX; i++)
705			kfree(get_bp_info(err_cpu, i)->tsk_pinned);
706		if (err_cpu == cpu)
707			break;
708	}
709
710	return -ENOMEM;
711}
712
713