Linux Audio

Check our new training course

Loading...
v6.2
   1// SPDX-License-Identifier: GPL-2.0+
   2/*
 
 
 
 
 
 
 
 
 
 
 
 
 
 
   3 * Copyright (C) 2007 Alan Stern
   4 * Copyright (C) IBM Corporation, 2009
   5 * Copyright (C) 2009, Frederic Weisbecker <fweisbec@gmail.com>
   6 *
   7 * Thanks to Ingo Molnar for his many suggestions.
   8 *
   9 * Authors: Alan Stern <stern@rowland.harvard.edu>
  10 *          K.Prasad <prasad@linux.vnet.ibm.com>
  11 *          Frederic Weisbecker <fweisbec@gmail.com>
  12 */
  13
  14/*
  15 * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility,
  16 * using the CPU's debug registers.
  17 * This file contains the arch-independent routines.
  18 */
  19
  20#include <linux/hw_breakpoint.h>
  21
  22#include <linux/atomic.h>
  23#include <linux/bug.h>
  24#include <linux/cpu.h>
  25#include <linux/export.h>
  26#include <linux/init.h>
  27#include <linux/irqflags.h>
 
 
 
  28#include <linux/kdebug.h>
  29#include <linux/kernel.h>
  30#include <linux/mutex.h>
  31#include <linux/notifier.h>
  32#include <linux/percpu-rwsem.h>
  33#include <linux/percpu.h>
  34#include <linux/rhashtable.h>
  35#include <linux/sched.h>
 
  36#include <linux/slab.h>
 
 
 
  37
  38/*
  39 * Datastructure to track the total uses of N slots across tasks or CPUs;
  40 * bp_slots_histogram::count[N] is the number of assigned N+1 breakpoint slots.
  41 */
  42struct bp_slots_histogram {
  43#ifdef hw_breakpoint_slots
  44	atomic_t count[hw_breakpoint_slots(0)];
  45#else
  46	atomic_t *count;
  47#endif
  48};
  49
  50/*
  51 * Per-CPU constraints data.
  52 */
  53struct bp_cpuinfo {
  54	/* Number of pinned CPU breakpoints in a CPU. */
  55	unsigned int			cpu_pinned;
  56	/* Histogram of pinned task breakpoints in a CPU. */
  57	struct bp_slots_histogram	tsk_pinned;
  58};
  59
  60static DEFINE_PER_CPU(struct bp_cpuinfo, bp_cpuinfo[TYPE_MAX]);
  61
  62static struct bp_cpuinfo *get_bp_info(int cpu, enum bp_type_idx type)
  63{
  64	return per_cpu_ptr(bp_cpuinfo + type, cpu);
  65}
  66
  67/* Number of pinned CPU breakpoints globally. */
  68static struct bp_slots_histogram cpu_pinned[TYPE_MAX];
  69/* Number of pinned CPU-independent task breakpoints. */
  70static struct bp_slots_histogram tsk_pinned_all[TYPE_MAX];
  71
  72/* Keep track of the breakpoints attached to tasks */
  73static struct rhltable task_bps_ht;
  74static const struct rhashtable_params task_bps_ht_params = {
  75	.head_offset = offsetof(struct hw_perf_event, bp_list),
  76	.key_offset = offsetof(struct hw_perf_event, target),
  77	.key_len = sizeof_field(struct hw_perf_event, target),
  78	.automatic_shrinking = true,
  79};
  80
  81static bool constraints_initialized __ro_after_init;
  82
  83/*
  84 * Synchronizes accesses to the per-CPU constraints; the locking rules are:
  85 *
  86 *  1. Atomic updates to bp_cpuinfo::tsk_pinned only require a held read-lock
  87 *     (due to bp_slots_histogram::count being atomic, no update are lost).
  88 *
  89 *  2. Holding a write-lock is required for computations that require a
  90 *     stable snapshot of all bp_cpuinfo::tsk_pinned.
  91 *
  92 *  3. In all other cases, non-atomic accesses require the appropriately held
  93 *     lock (read-lock for read-only accesses; write-lock for reads/writes).
  94 */
  95DEFINE_STATIC_PERCPU_RWSEM(bp_cpuinfo_sem);
  96
  97/*
  98 * Return mutex to serialize accesses to per-task lists in task_bps_ht. Since
  99 * rhltable synchronizes concurrent insertions/deletions, independent tasks may
 100 * insert/delete concurrently; therefore, a mutex per task is sufficient.
 101 *
 102 * Uses task_struct::perf_event_mutex, to avoid extending task_struct with a
 103 * hw_breakpoint-only mutex, which may be infrequently used. The caveat here is
 104 * that hw_breakpoint may contend with per-task perf event list management. The
 105 * assumption is that perf usecases involving hw_breakpoints are very unlikely
 106 * to result in unnecessary contention.
 107 */
 108static inline struct mutex *get_task_bps_mutex(struct perf_event *bp)
 109{
 110	struct task_struct *tsk = bp->hw.target;
 111
 112	return tsk ? &tsk->perf_event_mutex : NULL;
 113}
 114
 115static struct mutex *bp_constraints_lock(struct perf_event *bp)
 116{
 117	struct mutex *tsk_mtx = get_task_bps_mutex(bp);
 118
 119	if (tsk_mtx) {
 120		/*
 121		 * Fully analogous to the perf_try_init_event() nesting
 122		 * argument in the comment near perf_event_ctx_lock_nested();
 123		 * this child->perf_event_mutex cannot ever deadlock against
 124		 * the parent->perf_event_mutex usage from
 125		 * perf_event_task_{en,dis}able().
 126		 *
 127		 * Specifically, inherited events will never occur on
 128		 * ->perf_event_list.
 129		 */
 130		mutex_lock_nested(tsk_mtx, SINGLE_DEPTH_NESTING);
 131		percpu_down_read(&bp_cpuinfo_sem);
 132	} else {
 133		percpu_down_write(&bp_cpuinfo_sem);
 134	}
 135
 136	return tsk_mtx;
 137}
 138
 139static void bp_constraints_unlock(struct mutex *tsk_mtx)
 140{
 141	if (tsk_mtx) {
 142		percpu_up_read(&bp_cpuinfo_sem);
 143		mutex_unlock(tsk_mtx);
 144	} else {
 145		percpu_up_write(&bp_cpuinfo_sem);
 146	}
 147}
 148
 149static bool bp_constraints_is_locked(struct perf_event *bp)
 150{
 151	struct mutex *tsk_mtx = get_task_bps_mutex(bp);
 152
 153	return percpu_is_write_locked(&bp_cpuinfo_sem) ||
 154	       (tsk_mtx ? mutex_is_locked(tsk_mtx) :
 155			  percpu_is_read_locked(&bp_cpuinfo_sem));
 156}
 157
 158static inline void assert_bp_constraints_lock_held(struct perf_event *bp)
 159{
 160	struct mutex *tsk_mtx = get_task_bps_mutex(bp);
 161
 162	if (tsk_mtx)
 163		lockdep_assert_held(tsk_mtx);
 164	lockdep_assert_held(&bp_cpuinfo_sem);
 165}
 166
 167#ifdef hw_breakpoint_slots
 168/*
 169 * Number of breakpoint slots is constant, and the same for all types.
 170 */
 171static_assert(hw_breakpoint_slots(TYPE_INST) == hw_breakpoint_slots(TYPE_DATA));
 172static inline int hw_breakpoint_slots_cached(int type)	{ return hw_breakpoint_slots(type); }
 173static inline int init_breakpoint_slots(void)		{ return 0; }
 174#else
 175/*
 176 * Dynamic number of breakpoint slots.
 177 */
 178static int __nr_bp_slots[TYPE_MAX] __ro_after_init;
 179
 180static inline int hw_breakpoint_slots_cached(int type)
 181{
 182	return __nr_bp_slots[type];
 183}
 184
 185static __init bool
 186bp_slots_histogram_alloc(struct bp_slots_histogram *hist, enum bp_type_idx type)
 187{
 188	hist->count = kcalloc(hw_breakpoint_slots_cached(type), sizeof(*hist->count), GFP_KERNEL);
 189	return hist->count;
 190}
 191
 192static __init void bp_slots_histogram_free(struct bp_slots_histogram *hist)
 193{
 194	kfree(hist->count);
 195}
 196
 197static __init int init_breakpoint_slots(void)
 198{
 199	int i, cpu, err_cpu;
 200
 201	for (i = 0; i < TYPE_MAX; i++)
 202		__nr_bp_slots[i] = hw_breakpoint_slots(i);
 203
 204	for_each_possible_cpu(cpu) {
 205		for (i = 0; i < TYPE_MAX; i++) {
 206			struct bp_cpuinfo *info = get_bp_info(cpu, i);
 207
 208			if (!bp_slots_histogram_alloc(&info->tsk_pinned, i))
 209				goto err;
 210		}
 211	}
 212	for (i = 0; i < TYPE_MAX; i++) {
 213		if (!bp_slots_histogram_alloc(&cpu_pinned[i], i))
 214			goto err;
 215		if (!bp_slots_histogram_alloc(&tsk_pinned_all[i], i))
 216			goto err;
 217	}
 218
 219	return 0;
 220err:
 221	for_each_possible_cpu(err_cpu) {
 222		for (i = 0; i < TYPE_MAX; i++)
 223			bp_slots_histogram_free(&get_bp_info(err_cpu, i)->tsk_pinned);
 224		if (err_cpu == cpu)
 225			break;
 226	}
 227	for (i = 0; i < TYPE_MAX; i++) {
 228		bp_slots_histogram_free(&cpu_pinned[i]);
 229		bp_slots_histogram_free(&tsk_pinned_all[i]);
 230	}
 231
 232	return -ENOMEM;
 233}
 234#endif
 235
 236static inline void
 237bp_slots_histogram_add(struct bp_slots_histogram *hist, int old, int val)
 238{
 239	const int old_idx = old - 1;
 240	const int new_idx = old_idx + val;
 241
 242	if (old_idx >= 0)
 243		WARN_ON(atomic_dec_return_relaxed(&hist->count[old_idx]) < 0);
 244	if (new_idx >= 0)
 245		WARN_ON(atomic_inc_return_relaxed(&hist->count[new_idx]) < 0);
 246}
 247
 248static int
 249bp_slots_histogram_max(struct bp_slots_histogram *hist, enum bp_type_idx type)
 250{
 251	for (int i = hw_breakpoint_slots_cached(type) - 1; i >= 0; i--) {
 252		const int count = atomic_read(&hist->count[i]);
 253
 254		/* Catch unexpected writers; we want a stable snapshot. */
 255		ASSERT_EXCLUSIVE_WRITER(hist->count[i]);
 256		if (count > 0)
 257			return i + 1;
 258		WARN(count < 0, "inconsistent breakpoint slots histogram");
 259	}
 260
 261	return 0;
 262}
 263
 264static int
 265bp_slots_histogram_max_merge(struct bp_slots_histogram *hist1, struct bp_slots_histogram *hist2,
 266			     enum bp_type_idx type)
 267{
 268	for (int i = hw_breakpoint_slots_cached(type) - 1; i >= 0; i--) {
 269		const int count1 = atomic_read(&hist1->count[i]);
 270		const int count2 = atomic_read(&hist2->count[i]);
 271
 272		/* Catch unexpected writers; we want a stable snapshot. */
 273		ASSERT_EXCLUSIVE_WRITER(hist1->count[i]);
 274		ASSERT_EXCLUSIVE_WRITER(hist2->count[i]);
 275		if (count1 + count2 > 0)
 276			return i + 1;
 277		WARN(count1 < 0, "inconsistent breakpoint slots histogram");
 278		WARN(count2 < 0, "inconsistent breakpoint slots histogram");
 279	}
 280
 281	return 0;
 282}
 283
 284#ifndef hw_breakpoint_weight
 285static inline int hw_breakpoint_weight(struct perf_event *bp)
 286{
 287	return 1;
 288}
 289#endif
 290
 291static inline enum bp_type_idx find_slot_idx(u64 bp_type)
 292{
 293	if (bp_type & HW_BREAKPOINT_RW)
 294		return TYPE_DATA;
 295
 296	return TYPE_INST;
 297}
 298
 299/*
 300 * Return the maximum number of pinned breakpoints a task has in this CPU.
 
 301 */
 302static unsigned int max_task_bp_pinned(int cpu, enum bp_type_idx type)
 303{
 304	struct bp_slots_histogram *tsk_pinned = &get_bp_info(cpu, type)->tsk_pinned;
 
 305
 306	/*
 307	 * At this point we want to have acquired the bp_cpuinfo_sem as a
 308	 * writer to ensure that there are no concurrent writers in
 309	 * toggle_bp_task_slot() to tsk_pinned, and we get a stable snapshot.
 310	 */
 311	lockdep_assert_held_write(&bp_cpuinfo_sem);
 312	return bp_slots_histogram_max_merge(tsk_pinned, &tsk_pinned_all[type], type);
 313}
 314
 315/*
 316 * Count the number of breakpoints of the same type and same task.
 317 * The given event must be not on the list.
 318 *
 319 * If @cpu is -1, but the result of task_bp_pinned() is not CPU-independent,
 320 * returns a negative value.
 321 */
 322static int task_bp_pinned(int cpu, struct perf_event *bp, enum bp_type_idx type)
 323{
 324	struct rhlist_head *head, *pos;
 325	struct perf_event *iter;
 326	int count = 0;
 327
 328	/*
 329	 * We need a stable snapshot of the per-task breakpoint list.
 330	 */
 331	assert_bp_constraints_lock_held(bp);
 332
 333	rcu_read_lock();
 334	head = rhltable_lookup(&task_bps_ht, &bp->hw.target, task_bps_ht_params);
 335	if (!head)
 336		goto out;
 337
 338	rhl_for_each_entry_rcu(iter, pos, head, hw.bp_list) {
 339		if (find_slot_idx(iter->attr.bp_type) != type)
 340			continue;
 341
 342		if (iter->cpu >= 0) {
 343			if (cpu == -1) {
 344				count = -1;
 345				goto out;
 346			} else if (cpu != iter->cpu)
 347				continue;
 348		}
 349
 350		count += hw_breakpoint_weight(iter);
 351	}
 352
 353out:
 354	rcu_read_unlock();
 355	return count;
 356}
 357
 358static const struct cpumask *cpumask_of_bp(struct perf_event *bp)
 359{
 360	if (bp->cpu >= 0)
 361		return cpumask_of(bp->cpu);
 362	return cpu_possible_mask;
 363}
 364
 365/*
 366 * Returns the max pinned breakpoint slots in a given
 367 * CPU (cpu > -1) or across all of them (cpu = -1).
 368 */
 369static int
 370max_bp_pinned_slots(struct perf_event *bp, enum bp_type_idx type)
 371{
 372	const struct cpumask *cpumask = cpumask_of_bp(bp);
 373	int pinned_slots = 0;
 374	int cpu;
 375
 376	if (bp->hw.target && bp->cpu < 0) {
 377		int max_pinned = task_bp_pinned(-1, bp, type);
 
 
 
 
 
 378
 379		if (max_pinned >= 0) {
 380			/*
 381			 * Fast path: task_bp_pinned() is CPU-independent and
 382			 * returns the same value for any CPU.
 383			 */
 384			max_pinned += bp_slots_histogram_max(&cpu_pinned[type], type);
 385			return max_pinned;
 386		}
 387	}
 388
 389	for_each_cpu(cpu, cpumask) {
 390		struct bp_cpuinfo *info = get_bp_info(cpu, type);
 391		int nr;
 392
 393		nr = info->cpu_pinned;
 394		if (!bp->hw.target)
 395			nr += max_task_bp_pinned(cpu, type);
 396		else
 397			nr += task_bp_pinned(cpu, bp, type);
 398
 399		pinned_slots = max(nr, pinned_slots);
 400	}
 401
 402	return pinned_slots;
 
 
 
 
 403}
 404
 405/*
 406 * Add/remove the given breakpoint in our constraint table
 
 
 407 */
 408static int
 409toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type, int weight)
 410{
 411	int cpu, next_tsk_pinned;
 
 412
 413	if (!enable)
 414		weight = -weight;
 
 
 
 
 
 
 
 
 415
 416	if (!bp->hw.target) {
 417		/*
 418		 * Update the pinned CPU slots, in per-CPU bp_cpuinfo and in the
 419		 * global histogram.
 420		 */
 421		struct bp_cpuinfo *info = get_bp_info(bp->cpu, type);
 422
 423		lockdep_assert_held_write(&bp_cpuinfo_sem);
 424		bp_slots_histogram_add(&cpu_pinned[type], info->cpu_pinned, weight);
 425		info->cpu_pinned += weight;
 426		return 0;
 
 
 
 
 
 
 427	}
 
 428
 429	/*
 430	 * If bp->hw.target, tsk_pinned is only modified, but not used
 431	 * otherwise. We can permit concurrent updates as long as there are no
 432	 * other uses: having acquired bp_cpuinfo_sem as a reader allows
 433	 * concurrent updates here. Uses of tsk_pinned will require acquiring
 434	 * bp_cpuinfo_sem as a writer to stabilize tsk_pinned's value.
 435	 */
 436	lockdep_assert_held_read(&bp_cpuinfo_sem);
 437
 438	/*
 439	 * Update the pinned task slots, in per-CPU bp_cpuinfo and in the global
 440	 * histogram. We need to take care of 4 cases:
 441	 *
 442	 *  1. This breakpoint targets all CPUs (cpu < 0), and there may only
 443	 *     exist other task breakpoints targeting all CPUs. In this case we
 444	 *     can simply update the global slots histogram.
 445	 *
 446	 *  2. This breakpoint targets a specific CPU (cpu >= 0), but there may
 447	 *     only exist other task breakpoints targeting all CPUs.
 448	 *
 449	 *     a. On enable: remove the existing breakpoints from the global
 450	 *        slots histogram and use the per-CPU histogram.
 451	 *
 452	 *     b. On disable: re-insert the existing breakpoints into the global
 453	 *        slots histogram and remove from per-CPU histogram.
 454	 *
 455	 *  3. Some other existing task breakpoints target specific CPUs. Only
 456	 *     update the per-CPU slots histogram.
 457	 */
 458
 459	if (!enable) {
 460		/*
 461		 * Remove before updating histograms so we can determine if this
 462		 * was the last task breakpoint for a specific CPU.
 463		 */
 464		int ret = rhltable_remove(&task_bps_ht, &bp->hw.bp_list, task_bps_ht_params);
 465
 466		if (ret)
 467			return ret;
 
 
 
 468	}
 469	/*
 470	 * Note: If !enable, next_tsk_pinned will not count the to-be-removed breakpoint.
 471	 */
 472	next_tsk_pinned = task_bp_pinned(-1, bp, type);
 473
 474	if (next_tsk_pinned >= 0) {
 475		if (bp->cpu < 0) { /* Case 1: fast path */
 476			if (!enable)
 477				next_tsk_pinned += hw_breakpoint_weight(bp);
 478			bp_slots_histogram_add(&tsk_pinned_all[type], next_tsk_pinned, weight);
 479		} else if (enable) { /* Case 2.a: slow path */
 480			/* Add existing to per-CPU histograms. */
 481			for_each_possible_cpu(cpu) {
 482				bp_slots_histogram_add(&get_bp_info(cpu, type)->tsk_pinned,
 483						       0, next_tsk_pinned);
 484			}
 485			/* Add this first CPU-pinned task breakpoint. */
 486			bp_slots_histogram_add(&get_bp_info(bp->cpu, type)->tsk_pinned,
 487					       next_tsk_pinned, weight);
 488			/* Rebalance global task pinned histogram. */
 489			bp_slots_histogram_add(&tsk_pinned_all[type], next_tsk_pinned,
 490					       -next_tsk_pinned);
 491		} else { /* Case 2.b: slow path */
 492			/* Remove this last CPU-pinned task breakpoint. */
 493			bp_slots_histogram_add(&get_bp_info(bp->cpu, type)->tsk_pinned,
 494					       next_tsk_pinned + hw_breakpoint_weight(bp), weight);
 495			/* Remove all from per-CPU histograms. */
 496			for_each_possible_cpu(cpu) {
 497				bp_slots_histogram_add(&get_bp_info(cpu, type)->tsk_pinned,
 498						       next_tsk_pinned, -next_tsk_pinned);
 499			}
 500			/* Rebalance global task pinned histogram. */
 501			bp_slots_histogram_add(&tsk_pinned_all[type], 0, next_tsk_pinned);
 502		}
 503	} else { /* Case 3: slow path */
 504		const struct cpumask *cpumask = cpumask_of_bp(bp);
 505
 506		for_each_cpu(cpu, cpumask) {
 507			next_tsk_pinned = task_bp_pinned(cpu, bp, type);
 508			if (!enable)
 509				next_tsk_pinned += hw_breakpoint_weight(bp);
 510			bp_slots_histogram_add(&get_bp_info(cpu, type)->tsk_pinned,
 511					       next_tsk_pinned, weight);
 512		}
 513	}
 514
 515	/*
 516	 * Readers want a stable snapshot of the per-task breakpoint list.
 517	 */
 518	assert_bp_constraints_lock_held(bp);
 
 
 519
 520	if (enable)
 521		return rhltable_insert(&task_bps_ht, &bp->hw.bp_list, task_bps_ht_params);
 522
 523	return 0;
 524}
 525
 526__weak int arch_reserve_bp_slot(struct perf_event *bp)
 527{
 528	return 0;
 529}
 530
 531__weak void arch_release_bp_slot(struct perf_event *bp)
 532{
 533}
 534
 535/*
 536 * Function to perform processor-specific cleanup during unregistration
 537 */
 538__weak void arch_unregister_hw_breakpoint(struct perf_event *bp)
 539{
 540	/*
 541	 * A weak stub function here for those archs that don't define
 542	 * it inside arch/.../kernel/hw_breakpoint.c
 543	 */
 544}
 545
 546/*
 547 * Constraints to check before allowing this new breakpoint counter.
 548 *
 549 * Note: Flexible breakpoints are currently unimplemented, but outlined in the
 550 * below algorithm for completeness.  The implementation treats flexible as
 551 * pinned due to no guarantee that we currently always schedule flexible events
 552 * before a pinned event in a same CPU.
 553 *
 554 *  == Non-pinned counter == (Considered as pinned for now)
 555 *
 556 *   - If attached to a single cpu, check:
 557 *
 558 *       (per_cpu(info->flexible, cpu) || (per_cpu(info->cpu_pinned, cpu)
 559 *           + max(per_cpu(info->tsk_pinned, cpu)))) < HBP_NUM
 560 *
 561 *       -> If there are already non-pinned counters in this cpu, it means
 562 *          there is already a free slot for them.
 563 *          Otherwise, we check that the maximum number of per task
 564 *          breakpoints (for this cpu) plus the number of per cpu breakpoint
 565 *          (for this cpu) doesn't cover every registers.
 566 *
 567 *   - If attached to every cpus, check:
 568 *
 569 *       (per_cpu(info->flexible, *) || (max(per_cpu(info->cpu_pinned, *))
 570 *           + max(per_cpu(info->tsk_pinned, *)))) < HBP_NUM
 571 *
 572 *       -> This is roughly the same, except we check the number of per cpu
 573 *          bp for every cpu and we keep the max one. Same for the per tasks
 574 *          breakpoints.
 575 *
 576 *
 577 * == Pinned counter ==
 578 *
 579 *   - If attached to a single cpu, check:
 580 *
 581 *       ((per_cpu(info->flexible, cpu) > 1) + per_cpu(info->cpu_pinned, cpu)
 582 *            + max(per_cpu(info->tsk_pinned, cpu))) < HBP_NUM
 583 *
 584 *       -> Same checks as before. But now the info->flexible, if any, must keep
 585 *          one register at least (or they will never be fed).
 586 *
 587 *   - If attached to every cpus, check:
 588 *
 589 *       ((per_cpu(info->flexible, *) > 1) + max(per_cpu(info->cpu_pinned, *))
 590 *            + max(per_cpu(info->tsk_pinned, *))) < HBP_NUM
 591 */
 592static int __reserve_bp_slot(struct perf_event *bp, u64 bp_type)
 593{
 
 594	enum bp_type_idx type;
 595	int max_pinned_slots;
 596	int weight;
 597	int ret;
 598
 599	/* We couldn't initialize breakpoint constraints on boot */
 600	if (!constraints_initialized)
 601		return -ENOMEM;
 602
 603	/* Basic checks */
 604	if (bp_type == HW_BREAKPOINT_EMPTY ||
 605	    bp_type == HW_BREAKPOINT_INVALID)
 606		return -EINVAL;
 607
 608	type = find_slot_idx(bp_type);
 609	weight = hw_breakpoint_weight(bp);
 610
 611	/* Check if this new breakpoint can be satisfied across all CPUs. */
 612	max_pinned_slots = max_bp_pinned_slots(bp, type) + weight;
 613	if (max_pinned_slots > hw_breakpoint_slots_cached(type))
 
 
 
 
 
 
 614		return -ENOSPC;
 615
 616	ret = arch_reserve_bp_slot(bp);
 617	if (ret)
 618		return ret;
 619
 620	return toggle_bp_slot(bp, true, type, weight);
 621}
 622
 623int reserve_bp_slot(struct perf_event *bp)
 624{
 625	struct mutex *mtx = bp_constraints_lock(bp);
 626	int ret = __reserve_bp_slot(bp, bp->attr.bp_type);
 
 
 
 
 
 627
 628	bp_constraints_unlock(mtx);
 629	return ret;
 630}
 631
 632static void __release_bp_slot(struct perf_event *bp, u64 bp_type)
 633{
 634	enum bp_type_idx type;
 635	int weight;
 636
 637	arch_release_bp_slot(bp);
 638
 639	type = find_slot_idx(bp_type);
 640	weight = hw_breakpoint_weight(bp);
 641	WARN_ON(toggle_bp_slot(bp, false, type, weight));
 642}
 643
 644void release_bp_slot(struct perf_event *bp)
 645{
 646	struct mutex *mtx = bp_constraints_lock(bp);
 647
 648	arch_unregister_hw_breakpoint(bp);
 649	__release_bp_slot(bp, bp->attr.bp_type);
 650	bp_constraints_unlock(mtx);
 651}
 652
 653static int __modify_bp_slot(struct perf_event *bp, u64 old_type, u64 new_type)
 654{
 655	int err;
 656
 657	__release_bp_slot(bp, old_type);
 658
 659	err = __reserve_bp_slot(bp, new_type);
 660	if (err) {
 661		/*
 662		 * Reserve the old_type slot back in case
 663		 * there's no space for the new type.
 664		 *
 665		 * This must succeed, because we just released
 666		 * the old_type slot in the __release_bp_slot
 667		 * call above. If not, something is broken.
 668		 */
 669		WARN_ON(__reserve_bp_slot(bp, old_type));
 670	}
 671
 672	return err;
 673}
 674
 675static int modify_bp_slot(struct perf_event *bp, u64 old_type, u64 new_type)
 676{
 677	struct mutex *mtx = bp_constraints_lock(bp);
 678	int ret = __modify_bp_slot(bp, old_type, new_type);
 679
 680	bp_constraints_unlock(mtx);
 681	return ret;
 682}
 683
 684/*
 685 * Allow the kernel debugger to reserve breakpoint slots without
 686 * taking a lock using the dbg_* variant of for the reserve and
 687 * release breakpoint slots.
 688 */
 689int dbg_reserve_bp_slot(struct perf_event *bp)
 690{
 691	int ret;
 692
 693	if (bp_constraints_is_locked(bp))
 694		return -1;
 695
 696	/* Locks aren't held; disable lockdep assert checking. */
 697	lockdep_off();
 698	ret = __reserve_bp_slot(bp, bp->attr.bp_type);
 699	lockdep_on();
 700
 701	return ret;
 702}
 703
 704int dbg_release_bp_slot(struct perf_event *bp)
 705{
 706	if (bp_constraints_is_locked(bp))
 707		return -1;
 708
 709	/* Locks aren't held; disable lockdep assert checking. */
 710	lockdep_off();
 711	__release_bp_slot(bp, bp->attr.bp_type);
 712	lockdep_on();
 713
 714	return 0;
 715}
 716
 717static int hw_breakpoint_parse(struct perf_event *bp,
 718			       const struct perf_event_attr *attr,
 719			       struct arch_hw_breakpoint *hw)
 720{
 721	int err;
 722
 723	err = hw_breakpoint_arch_parse(bp, attr, hw);
 724	if (err)
 725		return err;
 726
 727	if (arch_check_bp_in_kernelspace(hw)) {
 728		if (attr->exclude_kernel)
 729			return -EINVAL;
 730		/*
 731		 * Don't let unprivileged users set a breakpoint in the trap
 732		 * path to avoid trap recursion attacks.
 733		 */
 734		if (!capable(CAP_SYS_ADMIN))
 735			return -EPERM;
 736	}
 737
 738	return 0;
 739}
 740
 741int register_perf_hw_breakpoint(struct perf_event *bp)
 742{
 743	struct arch_hw_breakpoint hw = { };
 744	int err;
 745
 746	err = reserve_bp_slot(bp);
 747	if (err)
 748		return err;
 749
 750	err = hw_breakpoint_parse(bp, &bp->attr, &hw);
 751	if (err) {
 752		release_bp_slot(bp);
 753		return err;
 754	}
 755
 756	bp->hw.info = hw;
 
 
 757
 758	return 0;
 759}
 760
 761/**
 762 * register_user_hw_breakpoint - register a hardware breakpoint for user space
 763 * @attr: breakpoint attributes
 764 * @triggered: callback to trigger when we hit the breakpoint
 765 * @context: context data could be used in the triggered callback
 766 * @tsk: pointer to 'task_struct' of the process to which the address belongs
 767 */
 768struct perf_event *
 769register_user_hw_breakpoint(struct perf_event_attr *attr,
 770			    perf_overflow_handler_t triggered,
 771			    void *context,
 772			    struct task_struct *tsk)
 773{
 774	return perf_event_create_kernel_counter(attr, -1, tsk, triggered,
 775						context);
 776}
 777EXPORT_SYMBOL_GPL(register_user_hw_breakpoint);
 778
 779static void hw_breakpoint_copy_attr(struct perf_event_attr *to,
 780				    struct perf_event_attr *from)
 781{
 782	to->bp_addr = from->bp_addr;
 783	to->bp_type = from->bp_type;
 784	to->bp_len  = from->bp_len;
 785	to->disabled = from->disabled;
 786}
 787
 788int
 789modify_user_hw_breakpoint_check(struct perf_event *bp, struct perf_event_attr *attr,
 790			        bool check)
 791{
 792	struct arch_hw_breakpoint hw = { };
 793	int err;
 794
 795	err = hw_breakpoint_parse(bp, attr, &hw);
 796	if (err)
 797		return err;
 798
 799	if (check) {
 800		struct perf_event_attr old_attr;
 801
 802		old_attr = bp->attr;
 803		hw_breakpoint_copy_attr(&old_attr, attr);
 804		if (memcmp(&old_attr, attr, sizeof(*attr)))
 805			return -EINVAL;
 806	}
 807
 808	if (bp->attr.bp_type != attr->bp_type) {
 809		err = modify_bp_slot(bp, bp->attr.bp_type, attr->bp_type);
 810		if (err)
 811			return err;
 812	}
 813
 814	hw_breakpoint_copy_attr(&bp->attr, attr);
 815	bp->hw.info = hw;
 816
 817	return 0;
 818}
 819
 820/**
 821 * modify_user_hw_breakpoint - modify a user-space hardware breakpoint
 822 * @bp: the breakpoint structure to modify
 823 * @attr: new breakpoint attributes
 
 
 824 */
 825int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr)
 826{
 827	int err;
 
 
 
 
 
 
 
 
 
 828
 829	/*
 830	 * modify_user_hw_breakpoint can be invoked with IRQs disabled and hence it
 831	 * will not be possible to raise IPIs that invoke __perf_event_disable.
 832	 * So call the function directly after making sure we are targeting the
 833	 * current task.
 834	 */
 835	if (irqs_disabled() && bp->ctx && bp->ctx->task == current)
 836		perf_event_disable_local(bp);
 837	else
 838		perf_event_disable(bp);
 839
 840	err = modify_user_hw_breakpoint_check(bp, attr, false);
 841
 842	if (!bp->attr.disabled)
 843		perf_event_enable(bp);
 844
 845	return err;
 
 
 
 
 
 
 
 
 
 
 
 
 
 846}
 847EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint);
 848
 849/**
 850 * unregister_hw_breakpoint - unregister a user-space hardware breakpoint
 851 * @bp: the breakpoint structure to unregister
 852 */
 853void unregister_hw_breakpoint(struct perf_event *bp)
 854{
 855	if (!bp)
 856		return;
 857	perf_event_release_kernel(bp);
 858}
 859EXPORT_SYMBOL_GPL(unregister_hw_breakpoint);
 860
 861/**
 862 * register_wide_hw_breakpoint - register a wide breakpoint in the kernel
 863 * @attr: breakpoint attributes
 864 * @triggered: callback to trigger when we hit the breakpoint
 865 * @context: context data could be used in the triggered callback
 866 *
 867 * @return a set of per_cpu pointers to perf events
 868 */
 869struct perf_event * __percpu *
 870register_wide_hw_breakpoint(struct perf_event_attr *attr,
 871			    perf_overflow_handler_t triggered,
 872			    void *context)
 873{
 874	struct perf_event * __percpu *cpu_events, *bp;
 875	long err = 0;
 876	int cpu;
 877
 878	cpu_events = alloc_percpu(typeof(*cpu_events));
 879	if (!cpu_events)
 880		return (void __percpu __force *)ERR_PTR(-ENOMEM);
 881
 882	cpus_read_lock();
 883	for_each_online_cpu(cpu) {
 
 884		bp = perf_event_create_kernel_counter(attr, cpu, NULL,
 885						      triggered, context);
 
 
 
 886		if (IS_ERR(bp)) {
 887			err = PTR_ERR(bp);
 888			break;
 889		}
 890
 891		per_cpu(*cpu_events, cpu) = bp;
 892	}
 893	cpus_read_unlock();
 894
 895	if (likely(!err))
 896		return cpu_events;
 897
 898	unregister_wide_hw_breakpoint(cpu_events);
 
 
 
 
 
 
 
 
 
 899	return (void __percpu __force *)ERR_PTR(err);
 900}
 901EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint);
 902
 903/**
 904 * unregister_wide_hw_breakpoint - unregister a wide breakpoint in the kernel
 905 * @cpu_events: the per cpu set of events to unregister
 906 */
 907void unregister_wide_hw_breakpoint(struct perf_event * __percpu *cpu_events)
 908{
 909	int cpu;
 910
 911	for_each_possible_cpu(cpu)
 912		unregister_hw_breakpoint(per_cpu(*cpu_events, cpu));
 913
 914	free_percpu(cpu_events);
 915}
 916EXPORT_SYMBOL_GPL(unregister_wide_hw_breakpoint);
 917
 918/**
 919 * hw_breakpoint_is_used - check if breakpoints are currently used
 920 *
 921 * Returns: true if breakpoints are used, false otherwise.
 922 */
 923bool hw_breakpoint_is_used(void)
 924{
 925	int cpu;
 926
 927	if (!constraints_initialized)
 928		return false;
 929
 930	for_each_possible_cpu(cpu) {
 931		for (int type = 0; type < TYPE_MAX; ++type) {
 932			struct bp_cpuinfo *info = get_bp_info(cpu, type);
 933
 934			if (info->cpu_pinned)
 935				return true;
 936
 937			for (int slot = 0; slot < hw_breakpoint_slots_cached(type); ++slot) {
 938				if (atomic_read(&info->tsk_pinned.count[slot]))
 939					return true;
 940			}
 941		}
 942	}
 943
 944	for (int type = 0; type < TYPE_MAX; ++type) {
 945		for (int slot = 0; slot < hw_breakpoint_slots_cached(type); ++slot) {
 946			/*
 947			 * Warn, because if there are CPU pinned counters,
 948			 * should never get here; bp_cpuinfo::cpu_pinned should
 949			 * be consistent with the global cpu_pinned histogram.
 950			 */
 951			if (WARN_ON(atomic_read(&cpu_pinned[type].count[slot])))
 952				return true;
 953
 954			if (atomic_read(&tsk_pinned_all[type].count[slot]))
 955				return true;
 956		}
 957	}
 958
 959	return false;
 960}
 
 961
 962static struct notifier_block hw_breakpoint_exceptions_nb = {
 963	.notifier_call = hw_breakpoint_exceptions_notify,
 964	/* we need to be notified first */
 965	.priority = 0x7fffffff
 966};
 967
 968static void bp_perf_event_destroy(struct perf_event *event)
 969{
 970	release_bp_slot(event);
 971}
 972
 973static int hw_breakpoint_event_init(struct perf_event *bp)
 974{
 975	int err;
 976
 977	if (bp->attr.type != PERF_TYPE_BREAKPOINT)
 978		return -ENOENT;
 979
 980	/*
 981	 * no branch sampling for breakpoint events
 982	 */
 983	if (has_branch_stack(bp))
 984		return -EOPNOTSUPP;
 985
 986	err = register_perf_hw_breakpoint(bp);
 987	if (err)
 988		return err;
 989
 990	bp->destroy = bp_perf_event_destroy;
 991
 992	return 0;
 993}
 994
 995static int hw_breakpoint_add(struct perf_event *bp, int flags)
 996{
 997	if (!(flags & PERF_EF_START))
 998		bp->hw.state = PERF_HES_STOPPED;
 999
1000	if (is_sampling_event(bp)) {
1001		bp->hw.last_period = bp->hw.sample_period;
1002		perf_swevent_set_period(bp);
1003	}
1004
1005	return arch_install_hw_breakpoint(bp);
1006}
1007
1008static void hw_breakpoint_del(struct perf_event *bp, int flags)
1009{
1010	arch_uninstall_hw_breakpoint(bp);
1011}
1012
1013static void hw_breakpoint_start(struct perf_event *bp, int flags)
1014{
1015	bp->hw.state = 0;
1016}
1017
1018static void hw_breakpoint_stop(struct perf_event *bp, int flags)
1019{
1020	bp->hw.state = PERF_HES_STOPPED;
1021}
1022
1023static struct pmu perf_breakpoint = {
1024	.task_ctx_nr	= perf_sw_context, /* could eventually get its own */
1025
1026	.event_init	= hw_breakpoint_event_init,
1027	.add		= hw_breakpoint_add,
1028	.del		= hw_breakpoint_del,
1029	.start		= hw_breakpoint_start,
1030	.stop		= hw_breakpoint_stop,
1031	.read		= hw_breakpoint_pmu_read,
1032};
1033
1034int __init init_hw_breakpoint(void)
1035{
1036	int ret;
 
 
1037
1038	ret = rhltable_init(&task_bps_ht, &task_bps_ht_params);
1039	if (ret)
1040		return ret;
1041
1042	ret = init_breakpoint_slots();
1043	if (ret)
1044		return ret;
 
 
 
 
 
 
1045
1046	constraints_initialized = true;
1047
1048	perf_pmu_register(&perf_breakpoint, "breakpoint", PERF_TYPE_BREAKPOINT);
1049
1050	return register_die_notifier(&hw_breakpoint_exceptions_nb);
 
 
 
 
 
 
 
 
 
 
1051}
v3.1
 
  1/*
  2 * This program is free software; you can redistribute it and/or modify
  3 * it under the terms of the GNU General Public License as published by
  4 * the Free Software Foundation; either version 2 of the License, or
  5 * (at your option) any later version.
  6 *
  7 * This program is distributed in the hope that it will be useful,
  8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 10 * GNU General Public License for more details.
 11 *
 12 * You should have received a copy of the GNU General Public License
 13 * along with this program; if not, write to the Free Software
 14 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 15 *
 16 * Copyright (C) 2007 Alan Stern
 17 * Copyright (C) IBM Corporation, 2009
 18 * Copyright (C) 2009, Frederic Weisbecker <fweisbec@gmail.com>
 19 *
 20 * Thanks to Ingo Molnar for his many suggestions.
 21 *
 22 * Authors: Alan Stern <stern@rowland.harvard.edu>
 23 *          K.Prasad <prasad@linux.vnet.ibm.com>
 24 *          Frederic Weisbecker <fweisbec@gmail.com>
 25 */
 26
 27/*
 28 * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility,
 29 * using the CPU's debug registers.
 30 * This file contains the arch-independent routines.
 31 */
 32
 
 
 
 
 
 
 
 33#include <linux/irqflags.h>
 34#include <linux/kallsyms.h>
 35#include <linux/notifier.h>
 36#include <linux/kprobes.h>
 37#include <linux/kdebug.h>
 38#include <linux/kernel.h>
 39#include <linux/module.h>
 
 
 40#include <linux/percpu.h>
 
 41#include <linux/sched.h>
 42#include <linux/init.h>
 43#include <linux/slab.h>
 44#include <linux/list.h>
 45#include <linux/cpu.h>
 46#include <linux/smp.h>
 47
 48#include <linux/hw_breakpoint.h>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 49
 
 
 
 
 50
 
 51/*
 52 * Constraints data
 53 */
 
 
 
 
 
 
 
 
 54
 55/* Number of pinned cpu breakpoints in a cpu */
 56static DEFINE_PER_CPU(unsigned int, nr_cpu_bp_pinned[TYPE_MAX]);
 
 
 57
 58/* Number of pinned task breakpoints in a cpu */
 59static DEFINE_PER_CPU(unsigned int *, nr_task_bp_pinned[TYPE_MAX]);
 
 
 
 
 60
 61/* Number of non-pinned cpu/task breakpoints in a cpu */
 62static DEFINE_PER_CPU(unsigned int, nr_bp_flexible[TYPE_MAX]);
 
 
 63
 64static int nr_slots[TYPE_MAX];
 
 
 65
 66/* Keep track of the breakpoints attached to tasks */
 67static LIST_HEAD(bp_task_head);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 68
 69static int constraints_initialized;
 
 
 
 
 
 70
 71/* Gather the number of total pinned and un-pinned bp in a cpuset */
 72struct bp_busy_slots {
 73	unsigned int pinned;
 74	unsigned int flexible;
 75};
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 76
 77/* Serialize accesses to the above constraints */
 78static DEFINE_MUTEX(nr_bp_mutex);
 79
 80__weak int hw_breakpoint_weight(struct perf_event *bp)
 
 81{
 82	return 1;
 83}
 
 84
 85static inline enum bp_type_idx find_slot_idx(struct perf_event *bp)
 86{
 87	if (bp->attr.bp_type & HW_BREAKPOINT_RW)
 88		return TYPE_DATA;
 89
 90	return TYPE_INST;
 91}
 92
 93/*
 94 * Report the maximum number of pinned breakpoints a task
 95 * have in this cpu
 96 */
 97static unsigned int max_task_bp_pinned(int cpu, enum bp_type_idx type)
 98{
 99	int i;
100	unsigned int *tsk_pinned = per_cpu(nr_task_bp_pinned[type], cpu);
101
102	for (i = nr_slots[type] - 1; i >= 0; i--) {
103		if (tsk_pinned[i] > 0)
104			return i + 1;
105	}
106
107	return 0;
 
108}
109
110/*
111 * Count the number of breakpoints of the same type and same task.
112 * The given event must be not on the list.
 
 
 
113 */
114static int task_bp_pinned(struct perf_event *bp, enum bp_type_idx type)
115{
116	struct task_struct *tsk = bp->hw.bp_target;
117	struct perf_event *iter;
118	int count = 0;
119
120	list_for_each_entry(iter, &bp_task_head, hw.bp_list) {
121		if (iter->hw.bp_target == tsk && find_slot_idx(iter) == type)
122			count += hw_breakpoint_weight(iter);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123	}
124
 
 
125	return count;
126}
127
 
 
 
 
 
 
 
128/*
129 * Report the number of pinned/un-pinned breakpoints we have in
130 * a given cpu (cpu > -1) or in all of them (cpu = -1).
131 */
132static void
133fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp,
134		    enum bp_type_idx type)
135{
136	int cpu = bp->cpu;
137	struct task_struct *tsk = bp->hw.bp_target;
138
139	if (cpu >= 0) {
140		slots->pinned = per_cpu(nr_cpu_bp_pinned[type], cpu);
141		if (!tsk)
142			slots->pinned += max_task_bp_pinned(cpu, type);
143		else
144			slots->pinned += task_bp_pinned(bp, type);
145		slots->flexible = per_cpu(nr_bp_flexible[type], cpu);
146
147		return;
 
 
 
 
 
 
 
148	}
149
150	for_each_online_cpu(cpu) {
151		unsigned int nr;
 
152
153		nr = per_cpu(nr_cpu_bp_pinned[type], cpu);
154		if (!tsk)
155			nr += max_task_bp_pinned(cpu, type);
156		else
157			nr += task_bp_pinned(bp, type);
158
159		if (nr > slots->pinned)
160			slots->pinned = nr;
161
162		nr = per_cpu(nr_bp_flexible[type], cpu);
163
164		if (nr > slots->flexible)
165			slots->flexible = nr;
166	}
167}
168
169/*
170 * For now, continue to consider flexible as pinned, until we can
171 * ensure no flexible event can ever be scheduled before a pinned event
172 * in a same cpu.
173 */
174static void
175fetch_this_slot(struct bp_busy_slots *slots, int weight)
176{
177	slots->pinned += weight;
178}
179
180/*
181 * Add a pinned breakpoint for the given task in our constraint table
182 */
183static void toggle_bp_task_slot(struct perf_event *bp, int cpu, bool enable,
184				enum bp_type_idx type, int weight)
185{
186	unsigned int *tsk_pinned;
187	int old_count = 0;
188	int old_idx = 0;
189	int idx = 0;
190
191	old_count = task_bp_pinned(bp, type);
192	old_idx = old_count - 1;
193	idx = old_idx + weight;
 
 
 
194
195	/* tsk_pinned[n] is the number of tasks having n breakpoints */
196	tsk_pinned = per_cpu(nr_task_bp_pinned[type], cpu);
197	if (enable) {
198		tsk_pinned[idx]++;
199		if (old_count > 0)
200			tsk_pinned[old_idx]--;
201	} else {
202		tsk_pinned[idx]--;
203		if (old_count > 0)
204			tsk_pinned[old_idx]++;
205	}
206}
207
208/*
209 * Add/remove the given breakpoint in our constraint table
210 */
211static void
212toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type,
213	       int weight)
214{
215	int cpu = bp->cpu;
216	struct task_struct *tsk = bp->hw.bp_target;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
217
218	/* Pinned counter cpu profiling */
219	if (!tsk) {
 
 
 
 
220
221		if (enable)
222			per_cpu(nr_cpu_bp_pinned[type], bp->cpu) += weight;
223		else
224			per_cpu(nr_cpu_bp_pinned[type], bp->cpu) -= weight;
225		return;
226	}
 
 
 
 
227
228	/* Pinned counter task profiling */
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
229
230	if (!enable)
231		list_del(&bp->hw.bp_list);
 
 
 
 
 
 
232
233	if (cpu >= 0) {
234		toggle_bp_task_slot(bp, cpu, enable, type, weight);
235	} else {
236		for_each_online_cpu(cpu)
237			toggle_bp_task_slot(bp, cpu, enable, type, weight);
238	}
239
240	if (enable)
241		list_add_tail(&bp->hw.bp_list, &bp_task_head);
 
 
 
 
 
 
 
 
 
 
 
242}
243
244/*
245 * Function to perform processor-specific cleanup during unregistration
246 */
247__weak void arch_unregister_hw_breakpoint(struct perf_event *bp)
248{
249	/*
250	 * A weak stub function here for those archs that don't define
251	 * it inside arch/.../kernel/hw_breakpoint.c
252	 */
253}
254
255/*
256 * Contraints to check before allowing this new breakpoint counter:
 
 
 
 
 
257 *
258 *  == Non-pinned counter == (Considered as pinned for now)
259 *
260 *   - If attached to a single cpu, check:
261 *
262 *       (per_cpu(nr_bp_flexible, cpu) || (per_cpu(nr_cpu_bp_pinned, cpu)
263 *           + max(per_cpu(nr_task_bp_pinned, cpu)))) < HBP_NUM
264 *
265 *       -> If there are already non-pinned counters in this cpu, it means
266 *          there is already a free slot for them.
267 *          Otherwise, we check that the maximum number of per task
268 *          breakpoints (for this cpu) plus the number of per cpu breakpoint
269 *          (for this cpu) doesn't cover every registers.
270 *
271 *   - If attached to every cpus, check:
272 *
273 *       (per_cpu(nr_bp_flexible, *) || (max(per_cpu(nr_cpu_bp_pinned, *))
274 *           + max(per_cpu(nr_task_bp_pinned, *)))) < HBP_NUM
275 *
276 *       -> This is roughly the same, except we check the number of per cpu
277 *          bp for every cpu and we keep the max one. Same for the per tasks
278 *          breakpoints.
279 *
280 *
281 * == Pinned counter ==
282 *
283 *   - If attached to a single cpu, check:
284 *
285 *       ((per_cpu(nr_bp_flexible, cpu) > 1) + per_cpu(nr_cpu_bp_pinned, cpu)
286 *            + max(per_cpu(nr_task_bp_pinned, cpu))) < HBP_NUM
287 *
288 *       -> Same checks as before. But now the nr_bp_flexible, if any, must keep
289 *          one register at least (or they will never be fed).
290 *
291 *   - If attached to every cpus, check:
292 *
293 *       ((per_cpu(nr_bp_flexible, *) > 1) + max(per_cpu(nr_cpu_bp_pinned, *))
294 *            + max(per_cpu(nr_task_bp_pinned, *))) < HBP_NUM
295 */
296static int __reserve_bp_slot(struct perf_event *bp)
297{
298	struct bp_busy_slots slots = {0};
299	enum bp_type_idx type;
 
300	int weight;
 
301
302	/* We couldn't initialize breakpoint constraints on boot */
303	if (!constraints_initialized)
304		return -ENOMEM;
305
306	/* Basic checks */
307	if (bp->attr.bp_type == HW_BREAKPOINT_EMPTY ||
308	    bp->attr.bp_type == HW_BREAKPOINT_INVALID)
309		return -EINVAL;
310
311	type = find_slot_idx(bp);
312	weight = hw_breakpoint_weight(bp);
313
314	fetch_bp_busy_slots(&slots, bp, type);
315	/*
316	 * Simulate the addition of this breakpoint to the constraints
317	 * and see the result.
318	 */
319	fetch_this_slot(&slots, weight);
320
321	/* Flexible counters need to keep at least one slot */
322	if (slots.pinned + (!!slots.flexible) > nr_slots[type])
323		return -ENOSPC;
324
325	toggle_bp_slot(bp, true, type, weight);
 
 
326
327	return 0;
328}
329
330int reserve_bp_slot(struct perf_event *bp)
331{
332	int ret;
333
334	mutex_lock(&nr_bp_mutex);
335
336	ret = __reserve_bp_slot(bp);
337
338	mutex_unlock(&nr_bp_mutex);
339
 
340	return ret;
341}
342
343static void __release_bp_slot(struct perf_event *bp)
344{
345	enum bp_type_idx type;
346	int weight;
347
348	type = find_slot_idx(bp);
 
 
349	weight = hw_breakpoint_weight(bp);
350	toggle_bp_slot(bp, false, type, weight);
351}
352
353void release_bp_slot(struct perf_event *bp)
354{
355	mutex_lock(&nr_bp_mutex);
356
357	arch_unregister_hw_breakpoint(bp);
358	__release_bp_slot(bp);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
359
360	mutex_unlock(&nr_bp_mutex);
 
361}
362
363/*
364 * Allow the kernel debugger to reserve breakpoint slots without
365 * taking a lock using the dbg_* variant of for the reserve and
366 * release breakpoint slots.
367 */
368int dbg_reserve_bp_slot(struct perf_event *bp)
369{
370	if (mutex_is_locked(&nr_bp_mutex))
 
 
371		return -1;
372
373	return __reserve_bp_slot(bp);
 
 
 
 
 
374}
375
376int dbg_release_bp_slot(struct perf_event *bp)
377{
378	if (mutex_is_locked(&nr_bp_mutex))
379		return -1;
380
381	__release_bp_slot(bp);
 
 
 
382
383	return 0;
384}
385
386static int validate_hw_breakpoint(struct perf_event *bp)
 
 
387{
388	int ret;
389
390	ret = arch_validate_hwbkpt_settings(bp);
391	if (ret)
392		return ret;
393
394	if (arch_check_bp_in_kernelspace(bp)) {
395		if (bp->attr.exclude_kernel)
396			return -EINVAL;
397		/*
398		 * Don't let unprivileged users set a breakpoint in the trap
399		 * path to avoid trap recursion attacks.
400		 */
401		if (!capable(CAP_SYS_ADMIN))
402			return -EPERM;
403	}
404
405	return 0;
406}
407
408int register_perf_hw_breakpoint(struct perf_event *bp)
409{
410	int ret;
 
411
412	ret = reserve_bp_slot(bp);
413	if (ret)
414		return ret;
415
416	ret = validate_hw_breakpoint(bp);
 
 
 
 
417
418	/* if arch_validate_hwbkpt_settings() fails then release bp slot */
419	if (ret)
420		release_bp_slot(bp);
421
422	return ret;
423}
424
425/**
426 * register_user_hw_breakpoint - register a hardware breakpoint for user space
427 * @attr: breakpoint attributes
428 * @triggered: callback to trigger when we hit the breakpoint
 
429 * @tsk: pointer to 'task_struct' of the process to which the address belongs
430 */
431struct perf_event *
432register_user_hw_breakpoint(struct perf_event_attr *attr,
433			    perf_overflow_handler_t triggered,
434			    void *context,
435			    struct task_struct *tsk)
436{
437	return perf_event_create_kernel_counter(attr, -1, tsk, triggered,
438						context);
439}
440EXPORT_SYMBOL_GPL(register_user_hw_breakpoint);
441
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
442/**
443 * modify_user_hw_breakpoint - modify a user-space hardware breakpoint
444 * @bp: the breakpoint structure to modify
445 * @attr: new breakpoint attributes
446 * @triggered: callback to trigger when we hit the breakpoint
447 * @tsk: pointer to 'task_struct' of the process to which the address belongs
448 */
449int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr)
450{
451	u64 old_addr = bp->attr.bp_addr;
452	u64 old_len = bp->attr.bp_len;
453	int old_type = bp->attr.bp_type;
454	int err = 0;
455
456	perf_event_disable(bp);
457
458	bp->attr.bp_addr = attr->bp_addr;
459	bp->attr.bp_type = attr->bp_type;
460	bp->attr.bp_len = attr->bp_len;
461
462	if (attr->disabled)
463		goto end;
 
 
 
 
 
 
 
 
464
465	err = validate_hw_breakpoint(bp);
466	if (!err)
 
467		perf_event_enable(bp);
468
469	if (err) {
470		bp->attr.bp_addr = old_addr;
471		bp->attr.bp_type = old_type;
472		bp->attr.bp_len = old_len;
473		if (!bp->attr.disabled)
474			perf_event_enable(bp);
475
476		return err;
477	}
478
479end:
480	bp->attr.disabled = attr->disabled;
481
482	return 0;
483}
484EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint);
485
486/**
487 * unregister_hw_breakpoint - unregister a user-space hardware breakpoint
488 * @bp: the breakpoint structure to unregister
489 */
490void unregister_hw_breakpoint(struct perf_event *bp)
491{
492	if (!bp)
493		return;
494	perf_event_release_kernel(bp);
495}
496EXPORT_SYMBOL_GPL(unregister_hw_breakpoint);
497
498/**
499 * register_wide_hw_breakpoint - register a wide breakpoint in the kernel
500 * @attr: breakpoint attributes
501 * @triggered: callback to trigger when we hit the breakpoint
 
502 *
503 * @return a set of per_cpu pointers to perf events
504 */
505struct perf_event * __percpu *
506register_wide_hw_breakpoint(struct perf_event_attr *attr,
507			    perf_overflow_handler_t triggered,
508			    void *context)
509{
510	struct perf_event * __percpu *cpu_events, **pevent, *bp;
511	long err;
512	int cpu;
513
514	cpu_events = alloc_percpu(typeof(*cpu_events));
515	if (!cpu_events)
516		return (void __percpu __force *)ERR_PTR(-ENOMEM);
517
518	get_online_cpus();
519	for_each_online_cpu(cpu) {
520		pevent = per_cpu_ptr(cpu_events, cpu);
521		bp = perf_event_create_kernel_counter(attr, cpu, NULL,
522						      triggered, context);
523
524		*pevent = bp;
525
526		if (IS_ERR(bp)) {
527			err = PTR_ERR(bp);
528			goto fail;
529		}
 
 
530	}
531	put_online_cpus();
532
533	return cpu_events;
 
534
535fail:
536	for_each_online_cpu(cpu) {
537		pevent = per_cpu_ptr(cpu_events, cpu);
538		if (IS_ERR(*pevent))
539			break;
540		unregister_hw_breakpoint(*pevent);
541	}
542	put_online_cpus();
543
544	free_percpu(cpu_events);
545	return (void __percpu __force *)ERR_PTR(err);
546}
547EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint);
548
549/**
550 * unregister_wide_hw_breakpoint - unregister a wide breakpoint in the kernel
551 * @cpu_events: the per cpu set of events to unregister
552 */
553void unregister_wide_hw_breakpoint(struct perf_event * __percpu *cpu_events)
554{
555	int cpu;
556	struct perf_event **pevent;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
557
558	for_each_possible_cpu(cpu) {
559		pevent = per_cpu_ptr(cpu_events, cpu);
560		unregister_hw_breakpoint(*pevent);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
561	}
562	free_percpu(cpu_events);
 
563}
564EXPORT_SYMBOL_GPL(unregister_wide_hw_breakpoint);
565
566static struct notifier_block hw_breakpoint_exceptions_nb = {
567	.notifier_call = hw_breakpoint_exceptions_notify,
568	/* we need to be notified first */
569	.priority = 0x7fffffff
570};
571
572static void bp_perf_event_destroy(struct perf_event *event)
573{
574	release_bp_slot(event);
575}
576
577static int hw_breakpoint_event_init(struct perf_event *bp)
578{
579	int err;
580
581	if (bp->attr.type != PERF_TYPE_BREAKPOINT)
582		return -ENOENT;
583
 
 
 
 
 
 
584	err = register_perf_hw_breakpoint(bp);
585	if (err)
586		return err;
587
588	bp->destroy = bp_perf_event_destroy;
589
590	return 0;
591}
592
593static int hw_breakpoint_add(struct perf_event *bp, int flags)
594{
595	if (!(flags & PERF_EF_START))
596		bp->hw.state = PERF_HES_STOPPED;
597
 
 
 
 
 
598	return arch_install_hw_breakpoint(bp);
599}
600
601static void hw_breakpoint_del(struct perf_event *bp, int flags)
602{
603	arch_uninstall_hw_breakpoint(bp);
604}
605
606static void hw_breakpoint_start(struct perf_event *bp, int flags)
607{
608	bp->hw.state = 0;
609}
610
611static void hw_breakpoint_stop(struct perf_event *bp, int flags)
612{
613	bp->hw.state = PERF_HES_STOPPED;
614}
615
616static struct pmu perf_breakpoint = {
617	.task_ctx_nr	= perf_sw_context, /* could eventually get its own */
618
619	.event_init	= hw_breakpoint_event_init,
620	.add		= hw_breakpoint_add,
621	.del		= hw_breakpoint_del,
622	.start		= hw_breakpoint_start,
623	.stop		= hw_breakpoint_stop,
624	.read		= hw_breakpoint_pmu_read,
625};
626
627int __init init_hw_breakpoint(void)
628{
629	unsigned int **task_bp_pinned;
630	int cpu, err_cpu;
631	int i;
632
633	for (i = 0; i < TYPE_MAX; i++)
634		nr_slots[i] = hw_breakpoint_slots(i);
 
635
636	for_each_possible_cpu(cpu) {
637		for (i = 0; i < TYPE_MAX; i++) {
638			task_bp_pinned = &per_cpu(nr_task_bp_pinned[i], cpu);
639			*task_bp_pinned = kzalloc(sizeof(int) * nr_slots[i],
640						  GFP_KERNEL);
641			if (!*task_bp_pinned)
642				goto err_alloc;
643		}
644	}
645
646	constraints_initialized = 1;
647
648	perf_pmu_register(&perf_breakpoint, "breakpoint", PERF_TYPE_BREAKPOINT);
649
650	return register_die_notifier(&hw_breakpoint_exceptions_nb);
651
652 err_alloc:
653	for_each_possible_cpu(err_cpu) {
654		if (err_cpu == cpu)
655			break;
656		for (i = 0; i < TYPE_MAX; i++)
657			kfree(per_cpu(nr_task_bp_pinned[i], cpu));
658	}
659
660	return -ENOMEM;
661}
662
663