trampoline.c - kernel/bpf/trampoline.c - Linux diff v6.2

   1// SPDX-License-Identifier: GPL-2.0-only
   2/* Copyright (c) 2019 Facebook */
   3#include <linux/hash.h>
   4#include <linux/bpf.h>
   5#include <linux/filter.h>
   6#include <linux/ftrace.h>
   7#include <linux/rbtree_latch.h>
   8#include <linux/perf_event.h>
   9#include <linux/btf.h>
  10#include <linux/rcupdate_trace.h>
  11#include <linux/rcupdate_wait.h>
  12#include <linux/module.h>
  13#include <linux/static_call.h>
  14#include <linux/bpf_verifier.h>
  15#include <linux/bpf_lsm.h>
  16#include <linux/delay.h>
  17
  18/* dummy _ops. The verifier will operate on target program's ops. */
  19const struct bpf_verifier_ops bpf_extension_verifier_ops = {
  20};
  21const struct bpf_prog_ops bpf_extension_prog_ops = {
  22};
  23
  24/* btf_vmlinux has ~22k attachable functions. 1k htab is enough. */
  25#define TRAMPOLINE_HASH_BITS 10
  26#define TRAMPOLINE_TABLE_SIZE (1 << TRAMPOLINE_HASH_BITS)
  27
  28static struct hlist_head trampoline_table[TRAMPOLINE_TABLE_SIZE];
  29
  30/* serializes access to trampoline_table */
  31static DEFINE_MUTEX(trampoline_mutex);
  32
  33#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
  34static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mutex);
  35
  36static int bpf_tramp_ftrace_ops_func(struct ftrace_ops *ops, enum ftrace_ops_cmd cmd)
  37{
  38	struct bpf_trampoline *tr = ops->private;
  39	int ret = 0;
  40
  41	if (cmd == FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_SELF) {
  42		/* This is called inside register_ftrace_direct_multi(), so
  43		 * tr->mutex is already locked.
  44		 */
  45		lockdep_assert_held_once(&tr->mutex);
  46
  47		/* Instead of updating the trampoline here, we propagate
  48		 * -EAGAIN to register_ftrace_direct_multi(). Then we can
  49		 * retry register_ftrace_direct_multi() after updating the
  50		 * trampoline.
  51		 */
  52		if ((tr->flags & BPF_TRAMP_F_CALL_ORIG) &&
  53		    !(tr->flags & BPF_TRAMP_F_ORIG_STACK)) {
  54			if (WARN_ON_ONCE(tr->flags & BPF_TRAMP_F_SHARE_IPMODIFY))
  55				return -EBUSY;
  56
  57			tr->flags |= BPF_TRAMP_F_SHARE_IPMODIFY;
  58			return -EAGAIN;
  59		}
  60
  61		return 0;
  62	}
  63
  64	/* The normal locking order is
  65	 *    tr->mutex => direct_mutex (ftrace.c) => ftrace_lock (ftrace.c)
  66	 *
  67	 * The following two commands are called from
  68	 *
  69	 *   prepare_direct_functions_for_ipmodify
  70	 *   cleanup_direct_functions_after_ipmodify
  71	 *
  72	 * In both cases, direct_mutex is already locked. Use
  73	 * mutex_trylock(&tr->mutex) to avoid deadlock in race condition
  74	 * (something else is making changes to this same trampoline).
  75	 */
  76	if (!mutex_trylock(&tr->mutex)) {
  77		/* sleep 1 ms to make sure whatever holding tr->mutex makes
  78		 * some progress.
  79		 */
  80		msleep(1);
  81		return -EAGAIN;
  82	}
  83
  84	switch (cmd) {
  85	case FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_PEER:
  86		tr->flags |= BPF_TRAMP_F_SHARE_IPMODIFY;
  87
  88		if ((tr->flags & BPF_TRAMP_F_CALL_ORIG) &&
  89		    !(tr->flags & BPF_TRAMP_F_ORIG_STACK))
  90			ret = bpf_trampoline_update(tr, false /* lock_direct_mutex */);
  91		break;
  92	case FTRACE_OPS_CMD_DISABLE_SHARE_IPMODIFY_PEER:
  93		tr->flags &= ~BPF_TRAMP_F_SHARE_IPMODIFY;
  94
  95		if (tr->flags & BPF_TRAMP_F_ORIG_STACK)
  96			ret = bpf_trampoline_update(tr, false /* lock_direct_mutex */);
  97		break;
  98	default:
  99		ret = -EINVAL;
 100		break;
 101	}
 102
 103	mutex_unlock(&tr->mutex);
 104	return ret;
 105}
 106#endif
 107
 108bool bpf_prog_has_trampoline(const struct bpf_prog *prog)
 109{
 110	enum bpf_attach_type eatype = prog->expected_attach_type;
 111	enum bpf_prog_type ptype = prog->type;
 112
 113	return (ptype == BPF_PROG_TYPE_TRACING &&
 114		(eatype == BPF_TRACE_FENTRY || eatype == BPF_TRACE_FEXIT ||
 115		 eatype == BPF_MODIFY_RETURN)) ||
 116		(ptype == BPF_PROG_TYPE_LSM && eatype == BPF_LSM_MAC);
 117}
 118
 119void bpf_image_ksym_add(void *data, struct bpf_ksym *ksym)
 120{
 121	ksym->start = (unsigned long) data;
 122	ksym->end = ksym->start + PAGE_SIZE;
 
 
 
 
 123	bpf_ksym_add(ksym);
 124	perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF, ksym->start,
 125			   PAGE_SIZE, false, ksym->name);
 126}
 127
 128void bpf_image_ksym_del(struct bpf_ksym *ksym)
 129{
 130	bpf_ksym_del(ksym);
 131	perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF, ksym->start,
 132			   PAGE_SIZE, true, ksym->name);
 133}
 134
 135static struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
 136{
 137	struct bpf_trampoline *tr;
 138	struct hlist_head *head;
 139	int i;
 140
 141	mutex_lock(&trampoline_mutex);
 142	head = &trampoline_table[hash_64(key, TRAMPOLINE_HASH_BITS)];
 143	hlist_for_each_entry(tr, head, hlist) {
 144		if (tr->key == key) {
 145			refcount_inc(&tr->refcnt);
 146			goto out;
 147		}
 148	}
 149	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
 150	if (!tr)
 151		goto out;
 152#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
 153	tr->fops = kzalloc(sizeof(struct ftrace_ops), GFP_KERNEL);
 154	if (!tr->fops) {
 155		kfree(tr);
 156		tr = NULL;
 157		goto out;
 158	}
 159	tr->fops->private = tr;
 160	tr->fops->ops_func = bpf_tramp_ftrace_ops_func;
 161#endif
 162
 163	tr->key = key;
 164	INIT_HLIST_NODE(&tr->hlist);
 165	hlist_add_head(&tr->hlist, head);
 166	refcount_set(&tr->refcnt, 1);
 167	mutex_init(&tr->mutex);
 168	for (i = 0; i < BPF_TRAMP_MAX; i++)
 169		INIT_HLIST_HEAD(&tr->progs_hlist[i]);
 170out:
 171	mutex_unlock(&trampoline_mutex);
 172	return tr;
 173}
 174
 175static int bpf_trampoline_module_get(struct bpf_trampoline *tr)
 176{
 177	struct module *mod;
 178	int err = 0;
 179
 180	preempt_disable();
 181	mod = __module_text_address((unsigned long) tr->func.addr);
 182	if (mod && !try_module_get(mod))
 183		err = -ENOENT;
 184	preempt_enable();
 185	tr->mod = mod;
 186	return err;
 187}
 188
 189static void bpf_trampoline_module_put(struct bpf_trampoline *tr)
 190{
 191	module_put(tr->mod);
 192	tr->mod = NULL;
 193}
 194
 195static int unregister_fentry(struct bpf_trampoline *tr, void *old_addr)
 196{
 197	void *ip = tr->func.addr;
 198	int ret;
 199
 200	if (tr->func.ftrace_managed)
 201		ret = unregister_ftrace_direct_multi(tr->fops, (long)old_addr);
 202	else
 203		ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, old_addr, NULL);
 204
 205	if (!ret)
 206		bpf_trampoline_module_put(tr);
 207	return ret;
 208}
 209
 210static int modify_fentry(struct bpf_trampoline *tr, void *old_addr, void *new_addr,
 211			 bool lock_direct_mutex)
 212{
 213	void *ip = tr->func.addr;
 214	int ret;
 215
 216	if (tr->func.ftrace_managed) {
 217		if (lock_direct_mutex)
 218			ret = modify_ftrace_direct_multi(tr->fops, (long)new_addr);
 219		else
 220			ret = modify_ftrace_direct_multi_nolock(tr->fops, (long)new_addr);
 221	} else {
 222		ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, old_addr, new_addr);
 223	}
 224	return ret;
 225}
 226
 227/* first time registering */
 228static int register_fentry(struct bpf_trampoline *tr, void *new_addr)
 229{
 230	void *ip = tr->func.addr;
 231	unsigned long faddr;
 232	int ret;
 233
 234	faddr = ftrace_location((unsigned long)ip);
 235	if (faddr) {
 236		if (!tr->fops)
 237			return -ENOTSUPP;
 238		tr->func.ftrace_managed = true;
 239	}
 240
 241	if (bpf_trampoline_module_get(tr))
 242		return -ENOENT;
 243
 244	if (tr->func.ftrace_managed) {
 245		ftrace_set_filter_ip(tr->fops, (unsigned long)ip, 0, 1);
 246		ret = register_ftrace_direct_multi(tr->fops, (long)new_addr);
 247	} else {
 248		ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, NULL, new_addr);
 249	}
 250
 251	if (ret)
 252		bpf_trampoline_module_put(tr);
 253	return ret;
 254}
 255
 256static struct bpf_tramp_links *
 257bpf_trampoline_get_progs(const struct bpf_trampoline *tr, int *total, bool *ip_arg)
 258{
 259	struct bpf_tramp_link *link;
 260	struct bpf_tramp_links *tlinks;
 261	struct bpf_tramp_link **links;
 262	int kind;
 263
 264	*total = 0;
 265	tlinks = kcalloc(BPF_TRAMP_MAX, sizeof(*tlinks), GFP_KERNEL);
 266	if (!tlinks)
 267		return ERR_PTR(-ENOMEM);
 268
 269	for (kind = 0; kind < BPF_TRAMP_MAX; kind++) {
 270		tlinks[kind].nr_links = tr->progs_cnt[kind];
 271		*total += tr->progs_cnt[kind];
 272		links = tlinks[kind].links;
 273
 274		hlist_for_each_entry(link, &tr->progs_hlist[kind], tramp_hlist) {
 275			*ip_arg |= link->link.prog->call_get_func_ip;
 276			*links++ = link;
 277		}
 278	}
 279	return tlinks;
 280}
 281
 
 
 
 
 
 
 
 
 
 282static void __bpf_tramp_image_put_deferred(struct work_struct *work)
 283{
 284	struct bpf_tramp_image *im;
 285
 286	im = container_of(work, struct bpf_tramp_image, work);
 287	bpf_image_ksym_del(&im->ksym);
 288	bpf_jit_free_exec(im->image);
 289	bpf_jit_uncharge_modmem(PAGE_SIZE);
 290	percpu_ref_exit(&im->pcref);
 291	kfree_rcu(im, rcu);
 292}
 293
 294/* callback, fexit step 3 or fentry step 2 */
 295static void __bpf_tramp_image_put_rcu(struct rcu_head *rcu)
 296{
 297	struct bpf_tramp_image *im;
 298
 299	im = container_of(rcu, struct bpf_tramp_image, rcu);
 300	INIT_WORK(&im->work, __bpf_tramp_image_put_deferred);
 301	schedule_work(&im->work);
 302}
 303
 304/* callback, fexit step 2. Called after percpu_ref_kill confirms. */
 305static void __bpf_tramp_image_release(struct percpu_ref *pcref)
 306{
 307	struct bpf_tramp_image *im;
 308
 309	im = container_of(pcref, struct bpf_tramp_image, pcref);
 310	call_rcu_tasks(&im->rcu, __bpf_tramp_image_put_rcu);
 311}
 312
 313/* callback, fexit or fentry step 1 */
 314static void __bpf_tramp_image_put_rcu_tasks(struct rcu_head *rcu)
 315{
 316	struct bpf_tramp_image *im;
 317
 318	im = container_of(rcu, struct bpf_tramp_image, rcu);
 319	if (im->ip_after_call)
 320		/* the case of fmod_ret/fexit trampoline and CONFIG_PREEMPTION=y */
 321		percpu_ref_kill(&im->pcref);
 322	else
 323		/* the case of fentry trampoline */
 324		call_rcu_tasks(&im->rcu, __bpf_tramp_image_put_rcu);
 325}
 326
 327static void bpf_tramp_image_put(struct bpf_tramp_image *im)
 328{
 329	/* The trampoline image that calls original function is using:
 330	 * rcu_read_lock_trace to protect sleepable bpf progs
 331	 * rcu_read_lock to protect normal bpf progs
 332	 * percpu_ref to protect trampoline itself
 333	 * rcu tasks to protect trampoline asm not covered by percpu_ref
 334	 * (which are few asm insns before __bpf_tramp_enter and
 335	 *  after __bpf_tramp_exit)
 336	 *
 337	 * The trampoline is unreachable before bpf_tramp_image_put().
 338	 *
 339	 * First, patch the trampoline to avoid calling into fexit progs.
 340	 * The progs will be freed even if the original function is still
 341	 * executing or sleeping.
 342	 * In case of CONFIG_PREEMPT=y use call_rcu_tasks() to wait on
 343	 * first few asm instructions to execute and call into
 344	 * __bpf_tramp_enter->percpu_ref_get.
 345	 * Then use percpu_ref_kill to wait for the trampoline and the original
 346	 * function to finish.
 347	 * Then use call_rcu_tasks() to make sure few asm insns in
 348	 * the trampoline epilogue are done as well.
 349	 *
 350	 * In !PREEMPT case the task that got interrupted in the first asm
 351	 * insns won't go through an RCU quiescent state which the
 352	 * percpu_ref_kill will be waiting for. Hence the first
 353	 * call_rcu_tasks() is not necessary.
 354	 */
 355	if (im->ip_after_call) {
 356		int err = bpf_arch_text_poke(im->ip_after_call, BPF_MOD_JUMP,
 357					     NULL, im->ip_epilogue);
 358		WARN_ON(err);
 359		if (IS_ENABLED(CONFIG_PREEMPTION))
 360			call_rcu_tasks(&im->rcu, __bpf_tramp_image_put_rcu_tasks);
 361		else
 362			percpu_ref_kill(&im->pcref);
 363		return;
 364	}
 365
 366	/* The trampoline without fexit and fmod_ret progs doesn't call original
 367	 * function and doesn't use percpu_ref.
 368	 * Use call_rcu_tasks_trace() to wait for sleepable progs to finish.
 369	 * Then use call_rcu_tasks() to wait for the rest of trampoline asm
 370	 * and normal progs.
 371	 */
 372	call_rcu_tasks_trace(&im->rcu, __bpf_tramp_image_put_rcu_tasks);
 373}
 374
 375static struct bpf_tramp_image *bpf_tramp_image_alloc(u64 key, u32 idx)
 376{
 377	struct bpf_tramp_image *im;
 378	struct bpf_ksym *ksym;
 379	void *image;
 380	int err = -ENOMEM;
 381
 382	im = kzalloc(sizeof(*im), GFP_KERNEL);
 383	if (!im)
 384		goto out;
 385
 386	err = bpf_jit_charge_modmem(PAGE_SIZE);
 387	if (err)
 388		goto out_free_im;
 
 389
 390	err = -ENOMEM;
 391	im->image = image = bpf_jit_alloc_exec(PAGE_SIZE);
 392	if (!image)
 393		goto out_uncharge;
 394	set_vm_flush_reset_perms(image);
 395
 396	err = percpu_ref_init(&im->pcref, __bpf_tramp_image_release, 0, GFP_KERNEL);
 397	if (err)
 398		goto out_free_image;
 399
 400	ksym = &im->ksym;
 401	INIT_LIST_HEAD_RCU(&ksym->lnode);
 402	snprintf(ksym->name, KSYM_NAME_LEN, "bpf_trampoline_%llu_%u", key, idx);
 403	bpf_image_ksym_add(image, ksym);
 
 404	return im;
 405
 406out_free_image:
 407	bpf_jit_free_exec(im->image);
 408out_uncharge:
 409	bpf_jit_uncharge_modmem(PAGE_SIZE);
 410out_free_im:
 411	kfree(im);
 412out:
 413	return ERR_PTR(err);
 414}
 415
 416static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mutex)
 417{
 418	struct bpf_tramp_image *im;
 419	struct bpf_tramp_links *tlinks;
 420	u32 orig_flags = tr->flags;
 421	bool ip_arg = false;
 422	int err, total;
 423
 424	tlinks = bpf_trampoline_get_progs(tr, &total, &ip_arg);
 425	if (IS_ERR(tlinks))
 426		return PTR_ERR(tlinks);
 427
 428	if (total == 0) {
 429		err = unregister_fentry(tr, tr->cur_image->image);
 430		bpf_tramp_image_put(tr->cur_image);
 431		tr->cur_image = NULL;
 432		tr->selector = 0;
 433		goto out;
 434	}
 435
 436	im = bpf_tramp_image_alloc(tr->key, tr->selector);
 437	if (IS_ERR(im)) {
 438		err = PTR_ERR(im);
 439		goto out;
 440	}
 441
 442	/* clear all bits except SHARE_IPMODIFY */
 443	tr->flags &= BPF_TRAMP_F_SHARE_IPMODIFY;
 444
 445	if (tlinks[BPF_TRAMP_FEXIT].nr_links ||
 446	    tlinks[BPF_TRAMP_MODIFY_RETURN].nr_links) {
 447		/* NOTE: BPF_TRAMP_F_RESTORE_REGS and BPF_TRAMP_F_SKIP_FRAME
 448		 * should not be set together.
 449		 */
 450		tr->flags |= BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_SKIP_FRAME;
 451	} else {
 452		tr->flags |= BPF_TRAMP_F_RESTORE_REGS;
 453	}
 454
 455	if (ip_arg)
 456		tr->flags |= BPF_TRAMP_F_IP_ARG;
 457
 458#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
 459again:
 460	if ((tr->flags & BPF_TRAMP_F_SHARE_IPMODIFY) &&
 461	    (tr->flags & BPF_TRAMP_F_CALL_ORIG))
 462		tr->flags |= BPF_TRAMP_F_ORIG_STACK;
 463#endif
 464
 465	err = arch_prepare_bpf_trampoline(im, im->image, im->image + PAGE_SIZE,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 466					  &tr->func.model, tr->flags, tlinks,
 467					  tr->func.addr);
 468	if (err < 0)
 469		goto out;
 470
 471	set_memory_rox((long)im->image, 1);
 
 
 472
 473	WARN_ON(tr->cur_image && tr->selector == 0);
 474	WARN_ON(!tr->cur_image && tr->selector);
 475	if (tr->cur_image)
 476		/* progs already running at this address */
 477		err = modify_fentry(tr, tr->cur_image->image, im->image, lock_direct_mutex);
 478	else
 479		/* first time registering */
 480		err = register_fentry(tr, im->image);
 481
 482#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
 483	if (err == -EAGAIN) {
 484		/* -EAGAIN from bpf_tramp_ftrace_ops_func. Now
 485		 * BPF_TRAMP_F_SHARE_IPMODIFY is set, we can generate the
 486		 * trampoline again, and retry register.
 487		 */
 488		/* reset fops->func and fops->trampoline for re-register */
 489		tr->fops->func = NULL;
 490		tr->fops->trampoline = 0;
 491
 492		/* reset im->image memory attr for arch_prepare_bpf_trampoline */
 493		set_memory_nx((long)im->image, 1);
 494		set_memory_rw((long)im->image, 1);
 495		goto again;
 496	}
 497#endif
 498	if (err)
 499		goto out;
 500
 501	if (tr->cur_image)
 502		bpf_tramp_image_put(tr->cur_image);
 503	tr->cur_image = im;
 504	tr->selector++;
 505out:
 506	/* If any error happens, restore previous flags */
 507	if (err)
 508		tr->flags = orig_flags;
 509	kfree(tlinks);
 510	return err;
 
 
 
 
 511}
 512
 513static enum bpf_tramp_prog_type bpf_attach_type_to_tramp(struct bpf_prog *prog)
 514{
 515	switch (prog->expected_attach_type) {
 516	case BPF_TRACE_FENTRY:
 517		return BPF_TRAMP_FENTRY;
 518	case BPF_MODIFY_RETURN:
 519		return BPF_TRAMP_MODIFY_RETURN;
 520	case BPF_TRACE_FEXIT:
 521		return BPF_TRAMP_FEXIT;
 522	case BPF_LSM_MAC:
 523		if (!prog->aux->attach_func_proto->type)
 524			/* The function returns void, we cannot modify its
 525			 * return value.
 526			 */
 527			return BPF_TRAMP_FEXIT;
 528		else
 529			return BPF_TRAMP_MODIFY_RETURN;
 530	default:
 531		return BPF_TRAMP_REPLACE;
 532	}
 533}
 534
 535static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 536{
 537	enum bpf_tramp_prog_type kind;
 538	struct bpf_tramp_link *link_exiting;
 539	int err = 0;
 540	int cnt = 0, i;
 541
 542	kind = bpf_attach_type_to_tramp(link->link.prog);
 543	if (tr->extension_prog)
 544		/* cannot attach fentry/fexit if extension prog is attached.
 545		 * cannot overwrite extension prog either.
 546		 */
 547		return -EBUSY;
 548
 549	for (i = 0; i < BPF_TRAMP_MAX; i++)
 550		cnt += tr->progs_cnt[i];
 551
 552	if (kind == BPF_TRAMP_REPLACE) {
 553		/* Cannot attach extension if fentry/fexit are in use. */
 554		if (cnt)
 555			return -EBUSY;
 
 
 
 556		tr->extension_prog = link->link.prog;
 557		return bpf_arch_text_poke(tr->func.addr, BPF_MOD_JUMP, NULL,
 558					  link->link.prog->bpf_func);
 559	}
 560	if (cnt >= BPF_MAX_TRAMP_LINKS)
 561		return -E2BIG;
 562	if (!hlist_unhashed(&link->tramp_hlist))
 563		/* prog already linked */
 564		return -EBUSY;
 565	hlist_for_each_entry(link_exiting, &tr->progs_hlist[kind], tramp_hlist) {
 566		if (link_exiting->link.prog != link->link.prog)
 567			continue;
 568		/* prog already linked */
 569		return -EBUSY;
 570	}
 571
 572	hlist_add_head(&link->tramp_hlist, &tr->progs_hlist[kind]);
 573	tr->progs_cnt[kind]++;
 574	err = bpf_trampoline_update(tr, true /* lock_direct_mutex */);
 575	if (err) {
 576		hlist_del_init(&link->tramp_hlist);
 577		tr->progs_cnt[kind]--;
 578	}
 579	return err;
 580}
 581
 582int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr)
 
 
 583{
 584	int err;
 585
 586	mutex_lock(&tr->mutex);
 587	err = __bpf_trampoline_link_prog(link, tr);
 588	mutex_unlock(&tr->mutex);
 589	return err;
 590}
 591
 592static int __bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr)
 
 
 593{
 594	enum bpf_tramp_prog_type kind;
 595	int err;
 596
 597	kind = bpf_attach_type_to_tramp(link->link.prog);
 598	if (kind == BPF_TRAMP_REPLACE) {
 599		WARN_ON_ONCE(!tr->extension_prog);
 600		err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_JUMP,
 601					 tr->extension_prog->bpf_func, NULL);
 602		tr->extension_prog = NULL;
 
 
 603		return err;
 604	}
 605	hlist_del_init(&link->tramp_hlist);
 606	tr->progs_cnt[kind]--;
 607	return bpf_trampoline_update(tr, true /* lock_direct_mutex */);
 608}
 609
 610/* bpf_trampoline_unlink_prog() should never fail. */
 611int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr)
 
 
 612{
 613	int err;
 614
 615	mutex_lock(&tr->mutex);
 616	err = __bpf_trampoline_unlink_prog(link, tr);
 617	mutex_unlock(&tr->mutex);
 618	return err;
 619}
 620
 621#if defined(CONFIG_CGROUP_BPF) && defined(CONFIG_BPF_LSM)
 622static void bpf_shim_tramp_link_release(struct bpf_link *link)
 623{
 624	struct bpf_shim_tramp_link *shim_link =
 625		container_of(link, struct bpf_shim_tramp_link, link.link);
 626
 627	/* paired with 'shim_link->trampoline = tr' in bpf_trampoline_link_cgroup_shim */
 628	if (!shim_link->trampoline)
 629		return;
 630
 631	WARN_ON_ONCE(bpf_trampoline_unlink_prog(&shim_link->link, shim_link->trampoline));
 632	bpf_trampoline_put(shim_link->trampoline);
 633}
 634
 635static void bpf_shim_tramp_link_dealloc(struct bpf_link *link)
 636{
 637	struct bpf_shim_tramp_link *shim_link =
 638		container_of(link, struct bpf_shim_tramp_link, link.link);
 639
 640	kfree(shim_link);
 641}
 642
 643static const struct bpf_link_ops bpf_shim_tramp_link_lops = {
 644	.release = bpf_shim_tramp_link_release,
 645	.dealloc = bpf_shim_tramp_link_dealloc,
 646};
 647
 648static struct bpf_shim_tramp_link *cgroup_shim_alloc(const struct bpf_prog *prog,
 649						     bpf_func_t bpf_func,
 650						     int cgroup_atype)
 651{
 652	struct bpf_shim_tramp_link *shim_link = NULL;
 653	struct bpf_prog *p;
 654
 655	shim_link = kzalloc(sizeof(*shim_link), GFP_USER);
 656	if (!shim_link)
 657		return NULL;
 658
 659	p = bpf_prog_alloc(1, 0);
 660	if (!p) {
 661		kfree(shim_link);
 662		return NULL;
 663	}
 664
 665	p->jited = false;
 666	p->bpf_func = bpf_func;
 667
 668	p->aux->cgroup_atype = cgroup_atype;
 669	p->aux->attach_func_proto = prog->aux->attach_func_proto;
 670	p->aux->attach_btf_id = prog->aux->attach_btf_id;
 671	p->aux->attach_btf = prog->aux->attach_btf;
 672	btf_get(p->aux->attach_btf);
 673	p->type = BPF_PROG_TYPE_LSM;
 674	p->expected_attach_type = BPF_LSM_MAC;
 675	bpf_prog_inc(p);
 676	bpf_link_init(&shim_link->link.link, BPF_LINK_TYPE_UNSPEC,
 677		      &bpf_shim_tramp_link_lops, p);
 678	bpf_cgroup_atype_get(p->aux->attach_btf_id, cgroup_atype);
 679
 680	return shim_link;
 681}
 682
 683static struct bpf_shim_tramp_link *cgroup_shim_find(struct bpf_trampoline *tr,
 684						    bpf_func_t bpf_func)
 685{
 686	struct bpf_tramp_link *link;
 687	int kind;
 688
 689	for (kind = 0; kind < BPF_TRAMP_MAX; kind++) {
 690		hlist_for_each_entry(link, &tr->progs_hlist[kind], tramp_hlist) {
 691			struct bpf_prog *p = link->link.prog;
 692
 693			if (p->bpf_func == bpf_func)
 694				return container_of(link, struct bpf_shim_tramp_link, link);
 695		}
 696	}
 697
 698	return NULL;
 699}
 700
 701int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog,
 702				    int cgroup_atype)
 703{
 704	struct bpf_shim_tramp_link *shim_link = NULL;
 705	struct bpf_attach_target_info tgt_info = {};
 706	struct bpf_trampoline *tr;
 707	bpf_func_t bpf_func;
 708	u64 key;
 709	int err;
 710
 711	err = bpf_check_attach_target(NULL, prog, NULL,
 712				      prog->aux->attach_btf_id,
 713				      &tgt_info);
 714	if (err)
 715		return err;
 716
 717	key = bpf_trampoline_compute_key(NULL, prog->aux->attach_btf,
 718					 prog->aux->attach_btf_id);
 719
 720	bpf_lsm_find_cgroup_shim(prog, &bpf_func);
 721	tr = bpf_trampoline_get(key, &tgt_info);
 722	if (!tr)
 723		return  -ENOMEM;
 724
 725	mutex_lock(&tr->mutex);
 726
 727	shim_link = cgroup_shim_find(tr, bpf_func);
 728	if (shim_link) {
 729		/* Reusing existing shim attached by the other program. */
 730		bpf_link_inc(&shim_link->link.link);
 731
 732		mutex_unlock(&tr->mutex);
 733		bpf_trampoline_put(tr); /* bpf_trampoline_get above */
 734		return 0;
 735	}
 736
 737	/* Allocate and install new shim. */
 738
 739	shim_link = cgroup_shim_alloc(prog, bpf_func, cgroup_atype);
 740	if (!shim_link) {
 741		err = -ENOMEM;
 742		goto err;
 743	}
 744
 745	err = __bpf_trampoline_link_prog(&shim_link->link, tr);
 746	if (err)
 747		goto err;
 748
 749	shim_link->trampoline = tr;
 750	/* note, we're still holding tr refcnt from above */
 751
 752	mutex_unlock(&tr->mutex);
 753
 754	return 0;
 755err:
 756	mutex_unlock(&tr->mutex);
 757
 758	if (shim_link)
 759		bpf_link_put(&shim_link->link.link);
 760
 761	/* have to release tr while _not_ holding its mutex */
 762	bpf_trampoline_put(tr); /* bpf_trampoline_get above */
 763
 764	return err;
 765}
 766
 767void bpf_trampoline_unlink_cgroup_shim(struct bpf_prog *prog)
 768{
 769	struct bpf_shim_tramp_link *shim_link = NULL;
 770	struct bpf_trampoline *tr;
 771	bpf_func_t bpf_func;
 772	u64 key;
 773
 774	key = bpf_trampoline_compute_key(NULL, prog->aux->attach_btf,
 775					 prog->aux->attach_btf_id);
 776
 777	bpf_lsm_find_cgroup_shim(prog, &bpf_func);
 778	tr = bpf_trampoline_lookup(key);
 779	if (WARN_ON_ONCE(!tr))
 780		return;
 781
 782	mutex_lock(&tr->mutex);
 783	shim_link = cgroup_shim_find(tr, bpf_func);
 784	mutex_unlock(&tr->mutex);
 785
 786	if (shim_link)
 787		bpf_link_put(&shim_link->link.link);
 788
 789	bpf_trampoline_put(tr); /* bpf_trampoline_lookup above */
 790}
 791#endif
 792
 793struct bpf_trampoline *bpf_trampoline_get(u64 key,
 794					  struct bpf_attach_target_info *tgt_info)
 795{
 796	struct bpf_trampoline *tr;
 797
 798	tr = bpf_trampoline_lookup(key);
 799	if (!tr)
 800		return NULL;
 801
 802	mutex_lock(&tr->mutex);
 803	if (tr->func.addr)
 804		goto out;
 805
 806	memcpy(&tr->func.model, &tgt_info->fmodel, sizeof(tgt_info->fmodel));
 807	tr->func.addr = (void *)tgt_info->tgt_addr;
 808out:
 809	mutex_unlock(&tr->mutex);
 810	return tr;
 811}
 812
 813void bpf_trampoline_put(struct bpf_trampoline *tr)
 814{
 815	int i;
 816
 817	if (!tr)
 818		return;
 819	mutex_lock(&trampoline_mutex);
 820	if (!refcount_dec_and_test(&tr->refcnt))
 821		goto out;
 822	WARN_ON_ONCE(mutex_is_locked(&tr->mutex));
 823
 824	for (i = 0; i < BPF_TRAMP_MAX; i++)
 825		if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[i])))
 826			goto out;
 827
 828	/* This code will be executed even when the last bpf_tramp_image
 829	 * is alive. All progs are detached from the trampoline and the
 830	 * trampoline image is patched with jmp into epilogue to skip
 831	 * fexit progs. The fentry-only trampoline will be freed via
 832	 * multiple rcu callbacks.
 833	 */
 834	hlist_del(&tr->hlist);
 835	if (tr->fops) {
 836		ftrace_free_filter(tr->fops);
 837		kfree(tr->fops);
 838	}
 839	kfree(tr);
 840out:
 841	mutex_unlock(&trampoline_mutex);
 842}
 843
 844#define NO_START_TIME 1
 845static __always_inline u64 notrace bpf_prog_start_time(void)
 846{
 847	u64 start = NO_START_TIME;
 848
 849	if (static_branch_unlikely(&bpf_stats_enabled_key)) {
 850		start = sched_clock();
 851		if (unlikely(!start))
 852			start = NO_START_TIME;
 853	}
 854	return start;
 855}
 856
 857/* The logic is similar to bpf_prog_run(), but with an explicit
 858 * rcu_read_lock() and migrate_disable() which are required
 859 * for the trampoline. The macro is split into
 860 * call __bpf_prog_enter
 861 * call prog->bpf_func
 862 * call __bpf_prog_exit
 863 *
 864 * __bpf_prog_enter returns:
 865 * 0 - skip execution of the bpf prog
 866 * 1 - execute bpf prog
 867 * [2..MAX_U64] - execute bpf prog and record execution time.
 868 *     This is start time.
 869 */
 870static u64 notrace __bpf_prog_enter_recur(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx)
 871	__acquires(RCU)
 872{
 873	rcu_read_lock();
 874	migrate_disable();
 875
 876	run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);
 877
 878	if (unlikely(this_cpu_inc_return(*(prog->active)) != 1)) {
 879		bpf_prog_inc_misses_counter(prog);
 
 
 880		return 0;
 881	}
 882	return bpf_prog_start_time();
 883}
 884
 885static void notrace update_prog_stats(struct bpf_prog *prog,
 886				      u64 start)
 887{
 888	struct bpf_prog_stats *stats;
 889
 890	if (static_branch_unlikely(&bpf_stats_enabled_key) &&
 891	    /* static_key could be enabled in __bpf_prog_enter*
 892	     * and disabled in __bpf_prog_exit*.
 893	     * And vice versa.
 894	     * Hence check that 'start' is valid.
 895	     */
 896	    start > NO_START_TIME) {
 
 897		unsigned long flags;
 898
 899		stats = this_cpu_ptr(prog->stats);
 900		flags = u64_stats_update_begin_irqsave(&stats->syncp);
 901		u64_stats_inc(&stats->cnt);
 902		u64_stats_add(&stats->nsecs, sched_clock() - start);
 903		u64_stats_update_end_irqrestore(&stats->syncp, flags);
 904	}
 905}
 906
 907static void notrace __bpf_prog_exit_recur(struct bpf_prog *prog, u64 start,
 908					  struct bpf_tramp_run_ctx *run_ctx)
 909	__releases(RCU)
 910{
 911	bpf_reset_run_ctx(run_ctx->saved_run_ctx);
 912
 913	update_prog_stats(prog, start);
 914	this_cpu_dec(*(prog->active));
 915	migrate_enable();
 916	rcu_read_unlock();
 917}
 918
 919static u64 notrace __bpf_prog_enter_lsm_cgroup(struct bpf_prog *prog,
 920					       struct bpf_tramp_run_ctx *run_ctx)
 921	__acquires(RCU)
 922{
 923	/* Runtime stats are exported via actual BPF_LSM_CGROUP
 924	 * programs, not the shims.
 925	 */
 926	rcu_read_lock();
 927	migrate_disable();
 928
 929	run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);
 930
 931	return NO_START_TIME;
 932}
 933
 934static void notrace __bpf_prog_exit_lsm_cgroup(struct bpf_prog *prog, u64 start,
 935					       struct bpf_tramp_run_ctx *run_ctx)
 936	__releases(RCU)
 937{
 938	bpf_reset_run_ctx(run_ctx->saved_run_ctx);
 939
 940	migrate_enable();
 941	rcu_read_unlock();
 942}
 943
 944u64 notrace __bpf_prog_enter_sleepable_recur(struct bpf_prog *prog,
 945					     struct bpf_tramp_run_ctx *run_ctx)
 946{
 947	rcu_read_lock_trace();
 948	migrate_disable();
 949	might_fault();
 950
 
 
 951	if (unlikely(this_cpu_inc_return(*(prog->active)) != 1)) {
 952		bpf_prog_inc_misses_counter(prog);
 
 
 953		return 0;
 954	}
 955
 956	run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);
 957
 958	return bpf_prog_start_time();
 959}
 960
 961void notrace __bpf_prog_exit_sleepable_recur(struct bpf_prog *prog, u64 start,
 962					     struct bpf_tramp_run_ctx *run_ctx)
 963{
 964	bpf_reset_run_ctx(run_ctx->saved_run_ctx);
 965
 966	update_prog_stats(prog, start);
 967	this_cpu_dec(*(prog->active));
 968	migrate_enable();
 969	rcu_read_unlock_trace();
 970}
 971
 972static u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog,
 973					      struct bpf_tramp_run_ctx *run_ctx)
 974{
 975	rcu_read_lock_trace();
 976	migrate_disable();
 977	might_fault();
 978
 979	run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);
 980
 981	return bpf_prog_start_time();
 982}
 983
 984static void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start,
 985					      struct bpf_tramp_run_ctx *run_ctx)
 986{
 987	bpf_reset_run_ctx(run_ctx->saved_run_ctx);
 988
 989	update_prog_stats(prog, start);
 990	migrate_enable();
 991	rcu_read_unlock_trace();
 992}
 993
 994static u64 notrace __bpf_prog_enter(struct bpf_prog *prog,
 995				    struct bpf_tramp_run_ctx *run_ctx)
 996	__acquires(RCU)
 997{
 998	rcu_read_lock();
 999	migrate_disable();
1000
1001	run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);
1002
1003	return bpf_prog_start_time();
1004}
1005
1006static void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start,
1007				    struct bpf_tramp_run_ctx *run_ctx)
1008	__releases(RCU)
1009{
1010	bpf_reset_run_ctx(run_ctx->saved_run_ctx);
1011
1012	update_prog_stats(prog, start);
1013	migrate_enable();
1014	rcu_read_unlock();
1015}
1016
1017void notrace __bpf_tramp_enter(struct bpf_tramp_image *tr)
1018{
1019	percpu_ref_get(&tr->pcref);
1020}
1021
1022void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr)
1023{
1024	percpu_ref_put(&tr->pcref);
1025}
1026
1027bpf_trampoline_enter_t bpf_trampoline_enter(const struct bpf_prog *prog)
1028{
1029	bool sleepable = prog->aux->sleepable;
1030
1031	if (bpf_prog_check_recur(prog))
1032		return sleepable ? __bpf_prog_enter_sleepable_recur :
1033			__bpf_prog_enter_recur;
1034
1035	if (resolve_prog_type(prog) == BPF_PROG_TYPE_LSM &&
1036	    prog->expected_attach_type == BPF_LSM_CGROUP)
1037		return __bpf_prog_enter_lsm_cgroup;
1038
1039	return sleepable ? __bpf_prog_enter_sleepable : __bpf_prog_enter;
1040}
1041
1042bpf_trampoline_exit_t bpf_trampoline_exit(const struct bpf_prog *prog)
1043{
1044	bool sleepable = prog->aux->sleepable;
1045
1046	if (bpf_prog_check_recur(prog))
1047		return sleepable ? __bpf_prog_exit_sleepable_recur :
1048			__bpf_prog_exit_recur;
1049
1050	if (resolve_prog_type(prog) == BPF_PROG_TYPE_LSM &&
1051	    prog->expected_attach_type == BPF_LSM_CGROUP)
1052		return __bpf_prog_exit_lsm_cgroup;
1053
1054	return sleepable ? __bpf_prog_exit_sleepable : __bpf_prog_exit;
1055}
1056
1057int __weak
1058arch_prepare_bpf_trampoline(struct bpf_tramp_image *tr, void *image, void *image_end,
1059			    const struct btf_func_model *m, u32 flags,
1060			    struct bpf_tramp_links *tlinks,
1061			    void *orig_call)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1062{
1063	return -ENOTSUPP;
1064}
1065
1066static int __init init_trampolines(void)
1067{
1068	int i;
1069
1070	for (i = 0; i < TRAMPOLINE_TABLE_SIZE; i++)
1071		INIT_HLIST_HEAD(&trampoline_table[i]);
1072	return 0;
1073}
1074late_initcall(init_trampolines);

   1// SPDX-License-Identifier: GPL-2.0-only
   2/* Copyright (c) 2019 Facebook */
   3#include <linux/hash.h>
   4#include <linux/bpf.h>
   5#include <linux/filter.h>
   6#include <linux/ftrace.h>
   7#include <linux/rbtree_latch.h>
   8#include <linux/perf_event.h>
   9#include <linux/btf.h>
  10#include <linux/rcupdate_trace.h>
  11#include <linux/rcupdate_wait.h>
 
  12#include <linux/static_call.h>
  13#include <linux/bpf_verifier.h>
  14#include <linux/bpf_lsm.h>
  15#include <linux/delay.h>
  16
  17/* dummy _ops. The verifier will operate on target program's ops. */
  18const struct bpf_verifier_ops bpf_extension_verifier_ops = {
  19};
  20const struct bpf_prog_ops bpf_extension_prog_ops = {
  21};
  22
  23/* btf_vmlinux has ~22k attachable functions. 1k htab is enough. */
  24#define TRAMPOLINE_HASH_BITS 10
  25#define TRAMPOLINE_TABLE_SIZE (1 << TRAMPOLINE_HASH_BITS)
  26
  27static struct hlist_head trampoline_table[TRAMPOLINE_TABLE_SIZE];
  28
  29/* serializes access to trampoline_table */
  30static DEFINE_MUTEX(trampoline_mutex);
  31
  32#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
  33static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mutex);
  34
  35static int bpf_tramp_ftrace_ops_func(struct ftrace_ops *ops, enum ftrace_ops_cmd cmd)
  36{
  37	struct bpf_trampoline *tr = ops->private;
  38	int ret = 0;
  39
  40	if (cmd == FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_SELF) {
  41		/* This is called inside register_ftrace_direct_multi(), so
  42		 * tr->mutex is already locked.
  43		 */
  44		lockdep_assert_held_once(&tr->mutex);
  45
  46		/* Instead of updating the trampoline here, we propagate
  47		 * -EAGAIN to register_ftrace_direct(). Then we can
  48		 * retry register_ftrace_direct() after updating the
  49		 * trampoline.
  50		 */
  51		if ((tr->flags & BPF_TRAMP_F_CALL_ORIG) &&
  52		    !(tr->flags & BPF_TRAMP_F_ORIG_STACK)) {
  53			if (WARN_ON_ONCE(tr->flags & BPF_TRAMP_F_SHARE_IPMODIFY))
  54				return -EBUSY;
  55
  56			tr->flags |= BPF_TRAMP_F_SHARE_IPMODIFY;
  57			return -EAGAIN;
  58		}
  59
  60		return 0;
  61	}
  62
  63	/* The normal locking order is
  64	 *    tr->mutex => direct_mutex (ftrace.c) => ftrace_lock (ftrace.c)
  65	 *
  66	 * The following two commands are called from
  67	 *
  68	 *   prepare_direct_functions_for_ipmodify
  69	 *   cleanup_direct_functions_after_ipmodify
  70	 *
  71	 * In both cases, direct_mutex is already locked. Use
  72	 * mutex_trylock(&tr->mutex) to avoid deadlock in race condition
  73	 * (something else is making changes to this same trampoline).
  74	 */
  75	if (!mutex_trylock(&tr->mutex)) {
  76		/* sleep 1 ms to make sure whatever holding tr->mutex makes
  77		 * some progress.
  78		 */
  79		msleep(1);
  80		return -EAGAIN;
  81	}
  82
  83	switch (cmd) {
  84	case FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_PEER:
  85		tr->flags |= BPF_TRAMP_F_SHARE_IPMODIFY;
  86
  87		if ((tr->flags & BPF_TRAMP_F_CALL_ORIG) &&
  88		    !(tr->flags & BPF_TRAMP_F_ORIG_STACK))
  89			ret = bpf_trampoline_update(tr, false /* lock_direct_mutex */);
  90		break;
  91	case FTRACE_OPS_CMD_DISABLE_SHARE_IPMODIFY_PEER:
  92		tr->flags &= ~BPF_TRAMP_F_SHARE_IPMODIFY;
  93
  94		if (tr->flags & BPF_TRAMP_F_ORIG_STACK)
  95			ret = bpf_trampoline_update(tr, false /* lock_direct_mutex */);
  96		break;
  97	default:
  98		ret = -EINVAL;
  99		break;
 100	}
 101
 102	mutex_unlock(&tr->mutex);
 103	return ret;
 104}
 105#endif
 106
 107bool bpf_prog_has_trampoline(const struct bpf_prog *prog)
 108{
 109	enum bpf_attach_type eatype = prog->expected_attach_type;
 110	enum bpf_prog_type ptype = prog->type;
 111
 112	return (ptype == BPF_PROG_TYPE_TRACING &&
 113		(eatype == BPF_TRACE_FENTRY || eatype == BPF_TRACE_FEXIT ||
 114		 eatype == BPF_MODIFY_RETURN)) ||
 115		(ptype == BPF_PROG_TYPE_LSM && eatype == BPF_LSM_MAC);
 116}
 117
 118void bpf_image_ksym_init(void *data, unsigned int size, struct bpf_ksym *ksym)
 119{
 120	ksym->start = (unsigned long) data;
 121	ksym->end = ksym->start + size;
 122}
 123
 124void bpf_image_ksym_add(struct bpf_ksym *ksym)
 125{
 126	bpf_ksym_add(ksym);
 127	perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF, ksym->start,
 128			   PAGE_SIZE, false, ksym->name);
 129}
 130
 131void bpf_image_ksym_del(struct bpf_ksym *ksym)
 132{
 133	bpf_ksym_del(ksym);
 134	perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF, ksym->start,
 135			   PAGE_SIZE, true, ksym->name);
 136}
 137
 138static struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
 139{
 140	struct bpf_trampoline *tr;
 141	struct hlist_head *head;
 142	int i;
 143
 144	mutex_lock(&trampoline_mutex);
 145	head = &trampoline_table[hash_64(key, TRAMPOLINE_HASH_BITS)];
 146	hlist_for_each_entry(tr, head, hlist) {
 147		if (tr->key == key) {
 148			refcount_inc(&tr->refcnt);
 149			goto out;
 150		}
 151	}
 152	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
 153	if (!tr)
 154		goto out;
 155#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
 156	tr->fops = kzalloc(sizeof(struct ftrace_ops), GFP_KERNEL);
 157	if (!tr->fops) {
 158		kfree(tr);
 159		tr = NULL;
 160		goto out;
 161	}
 162	tr->fops->private = tr;
 163	tr->fops->ops_func = bpf_tramp_ftrace_ops_func;
 164#endif
 165
 166	tr->key = key;
 167	INIT_HLIST_NODE(&tr->hlist);
 168	hlist_add_head(&tr->hlist, head);
 169	refcount_set(&tr->refcnt, 1);
 170	mutex_init(&tr->mutex);
 171	for (i = 0; i < BPF_TRAMP_MAX; i++)
 172		INIT_HLIST_HEAD(&tr->progs_hlist[i]);
 173out:
 174	mutex_unlock(&trampoline_mutex);
 175	return tr;
 176}
 177
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 178static int unregister_fentry(struct bpf_trampoline *tr, void *old_addr)
 179{
 180	void *ip = tr->func.addr;
 181	int ret;
 182
 183	if (tr->func.ftrace_managed)
 184		ret = unregister_ftrace_direct(tr->fops, (long)old_addr, false);
 185	else
 186		ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, old_addr, NULL);
 187
 
 
 188	return ret;
 189}
 190
 191static int modify_fentry(struct bpf_trampoline *tr, void *old_addr, void *new_addr,
 192			 bool lock_direct_mutex)
 193{
 194	void *ip = tr->func.addr;
 195	int ret;
 196
 197	if (tr->func.ftrace_managed) {
 198		if (lock_direct_mutex)
 199			ret = modify_ftrace_direct(tr->fops, (long)new_addr);
 200		else
 201			ret = modify_ftrace_direct_nolock(tr->fops, (long)new_addr);
 202	} else {
 203		ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, old_addr, new_addr);
 204	}
 205	return ret;
 206}
 207
 208/* first time registering */
 209static int register_fentry(struct bpf_trampoline *tr, void *new_addr)
 210{
 211	void *ip = tr->func.addr;
 212	unsigned long faddr;
 213	int ret;
 214
 215	faddr = ftrace_location((unsigned long)ip);
 216	if (faddr) {
 217		if (!tr->fops)
 218			return -ENOTSUPP;
 219		tr->func.ftrace_managed = true;
 220	}
 221
 
 
 
 222	if (tr->func.ftrace_managed) {
 223		ftrace_set_filter_ip(tr->fops, (unsigned long)ip, 0, 1);
 224		ret = register_ftrace_direct(tr->fops, (long)new_addr);
 225	} else {
 226		ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, NULL, new_addr);
 227	}
 228
 
 
 229	return ret;
 230}
 231
 232static struct bpf_tramp_links *
 233bpf_trampoline_get_progs(const struct bpf_trampoline *tr, int *total, bool *ip_arg)
 234{
 235	struct bpf_tramp_link *link;
 236	struct bpf_tramp_links *tlinks;
 237	struct bpf_tramp_link **links;
 238	int kind;
 239
 240	*total = 0;
 241	tlinks = kcalloc(BPF_TRAMP_MAX, sizeof(*tlinks), GFP_KERNEL);
 242	if (!tlinks)
 243		return ERR_PTR(-ENOMEM);
 244
 245	for (kind = 0; kind < BPF_TRAMP_MAX; kind++) {
 246		tlinks[kind].nr_links = tr->progs_cnt[kind];
 247		*total += tr->progs_cnt[kind];
 248		links = tlinks[kind].links;
 249
 250		hlist_for_each_entry(link, &tr->progs_hlist[kind], tramp_hlist) {
 251			*ip_arg |= link->link.prog->call_get_func_ip;
 252			*links++ = link;
 253		}
 254	}
 255	return tlinks;
 256}
 257
 258static void bpf_tramp_image_free(struct bpf_tramp_image *im)
 259{
 260	bpf_image_ksym_del(&im->ksym);
 261	arch_free_bpf_trampoline(im->image, im->size);
 262	bpf_jit_uncharge_modmem(im->size);
 263	percpu_ref_exit(&im->pcref);
 264	kfree_rcu(im, rcu);
 265}
 266
 267static void __bpf_tramp_image_put_deferred(struct work_struct *work)
 268{
 269	struct bpf_tramp_image *im;
 270
 271	im = container_of(work, struct bpf_tramp_image, work);
 272	bpf_tramp_image_free(im);
 
 
 
 
 273}
 274
 275/* callback, fexit step 3 or fentry step 2 */
 276static void __bpf_tramp_image_put_rcu(struct rcu_head *rcu)
 277{
 278	struct bpf_tramp_image *im;
 279
 280	im = container_of(rcu, struct bpf_tramp_image, rcu);
 281	INIT_WORK(&im->work, __bpf_tramp_image_put_deferred);
 282	schedule_work(&im->work);
 283}
 284
 285/* callback, fexit step 2. Called after percpu_ref_kill confirms. */
 286static void __bpf_tramp_image_release(struct percpu_ref *pcref)
 287{
 288	struct bpf_tramp_image *im;
 289
 290	im = container_of(pcref, struct bpf_tramp_image, pcref);
 291	call_rcu_tasks(&im->rcu, __bpf_tramp_image_put_rcu);
 292}
 293
 294/* callback, fexit or fentry step 1 */
 295static void __bpf_tramp_image_put_rcu_tasks(struct rcu_head *rcu)
 296{
 297	struct bpf_tramp_image *im;
 298
 299	im = container_of(rcu, struct bpf_tramp_image, rcu);
 300	if (im->ip_after_call)
 301		/* the case of fmod_ret/fexit trampoline and CONFIG_PREEMPTION=y */
 302		percpu_ref_kill(&im->pcref);
 303	else
 304		/* the case of fentry trampoline */
 305		call_rcu_tasks(&im->rcu, __bpf_tramp_image_put_rcu);
 306}
 307
 308static void bpf_tramp_image_put(struct bpf_tramp_image *im)
 309{
 310	/* The trampoline image that calls original function is using:
 311	 * rcu_read_lock_trace to protect sleepable bpf progs
 312	 * rcu_read_lock to protect normal bpf progs
 313	 * percpu_ref to protect trampoline itself
 314	 * rcu tasks to protect trampoline asm not covered by percpu_ref
 315	 * (which are few asm insns before __bpf_tramp_enter and
 316	 *  after __bpf_tramp_exit)
 317	 *
 318	 * The trampoline is unreachable before bpf_tramp_image_put().
 319	 *
 320	 * First, patch the trampoline to avoid calling into fexit progs.
 321	 * The progs will be freed even if the original function is still
 322	 * executing or sleeping.
 323	 * In case of CONFIG_PREEMPT=y use call_rcu_tasks() to wait on
 324	 * first few asm instructions to execute and call into
 325	 * __bpf_tramp_enter->percpu_ref_get.
 326	 * Then use percpu_ref_kill to wait for the trampoline and the original
 327	 * function to finish.
 328	 * Then use call_rcu_tasks() to make sure few asm insns in
 329	 * the trampoline epilogue are done as well.
 330	 *
 331	 * In !PREEMPT case the task that got interrupted in the first asm
 332	 * insns won't go through an RCU quiescent state which the
 333	 * percpu_ref_kill will be waiting for. Hence the first
 334	 * call_rcu_tasks() is not necessary.
 335	 */
 336	if (im->ip_after_call) {
 337		int err = bpf_arch_text_poke(im->ip_after_call, BPF_MOD_JUMP,
 338					     NULL, im->ip_epilogue);
 339		WARN_ON(err);
 340		if (IS_ENABLED(CONFIG_TASKS_RCU))
 341			call_rcu_tasks(&im->rcu, __bpf_tramp_image_put_rcu_tasks);
 342		else
 343			percpu_ref_kill(&im->pcref);
 344		return;
 345	}
 346
 347	/* The trampoline without fexit and fmod_ret progs doesn't call original
 348	 * function and doesn't use percpu_ref.
 349	 * Use call_rcu_tasks_trace() to wait for sleepable progs to finish.
 350	 * Then use call_rcu_tasks() to wait for the rest of trampoline asm
 351	 * and normal progs.
 352	 */
 353	call_rcu_tasks_trace(&im->rcu, __bpf_tramp_image_put_rcu_tasks);
 354}
 355
 356static struct bpf_tramp_image *bpf_tramp_image_alloc(u64 key, int size)
 357{
 358	struct bpf_tramp_image *im;
 359	struct bpf_ksym *ksym;
 360	void *image;
 361	int err = -ENOMEM;
 362
 363	im = kzalloc(sizeof(*im), GFP_KERNEL);
 364	if (!im)
 365		goto out;
 366
 367	err = bpf_jit_charge_modmem(size);
 368	if (err)
 369		goto out_free_im;
 370	im->size = size;
 371
 372	err = -ENOMEM;
 373	im->image = image = arch_alloc_bpf_trampoline(size);
 374	if (!image)
 375		goto out_uncharge;
 
 376
 377	err = percpu_ref_init(&im->pcref, __bpf_tramp_image_release, 0, GFP_KERNEL);
 378	if (err)
 379		goto out_free_image;
 380
 381	ksym = &im->ksym;
 382	INIT_LIST_HEAD_RCU(&ksym->lnode);
 383	snprintf(ksym->name, KSYM_NAME_LEN, "bpf_trampoline_%llu", key);
 384	bpf_image_ksym_init(image, size, ksym);
 385	bpf_image_ksym_add(ksym);
 386	return im;
 387
 388out_free_image:
 389	arch_free_bpf_trampoline(im->image, im->size);
 390out_uncharge:
 391	bpf_jit_uncharge_modmem(size);
 392out_free_im:
 393	kfree(im);
 394out:
 395	return ERR_PTR(err);
 396}
 397
 398static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mutex)
 399{
 400	struct bpf_tramp_image *im;
 401	struct bpf_tramp_links *tlinks;
 402	u32 orig_flags = tr->flags;
 403	bool ip_arg = false;
 404	int err, total, size;
 405
 406	tlinks = bpf_trampoline_get_progs(tr, &total, &ip_arg);
 407	if (IS_ERR(tlinks))
 408		return PTR_ERR(tlinks);
 409
 410	if (total == 0) {
 411		err = unregister_fentry(tr, tr->cur_image->image);
 412		bpf_tramp_image_put(tr->cur_image);
 413		tr->cur_image = NULL;
 
 
 
 
 
 
 
 414		goto out;
 415	}
 416
 417	/* clear all bits except SHARE_IPMODIFY and TAIL_CALL_CTX */
 418	tr->flags &= (BPF_TRAMP_F_SHARE_IPMODIFY | BPF_TRAMP_F_TAIL_CALL_CTX);
 419
 420	if (tlinks[BPF_TRAMP_FEXIT].nr_links ||
 421	    tlinks[BPF_TRAMP_MODIFY_RETURN].nr_links) {
 422		/* NOTE: BPF_TRAMP_F_RESTORE_REGS and BPF_TRAMP_F_SKIP_FRAME
 423		 * should not be set together.
 424		 */
 425		tr->flags |= BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_SKIP_FRAME;
 426	} else {
 427		tr->flags |= BPF_TRAMP_F_RESTORE_REGS;
 428	}
 429
 430	if (ip_arg)
 431		tr->flags |= BPF_TRAMP_F_IP_ARG;
 432
 433#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
 434again:
 435	if ((tr->flags & BPF_TRAMP_F_SHARE_IPMODIFY) &&
 436	    (tr->flags & BPF_TRAMP_F_CALL_ORIG))
 437		tr->flags |= BPF_TRAMP_F_ORIG_STACK;
 438#endif
 439
 440	size = arch_bpf_trampoline_size(&tr->func.model, tr->flags,
 441					tlinks, tr->func.addr);
 442	if (size < 0) {
 443		err = size;
 444		goto out;
 445	}
 446
 447	if (size > PAGE_SIZE) {
 448		err = -E2BIG;
 449		goto out;
 450	}
 451
 452	im = bpf_tramp_image_alloc(tr->key, size);
 453	if (IS_ERR(im)) {
 454		err = PTR_ERR(im);
 455		goto out;
 456	}
 457
 458	err = arch_prepare_bpf_trampoline(im, im->image, im->image + size,
 459					  &tr->func.model, tr->flags, tlinks,
 460					  tr->func.addr);
 461	if (err < 0)
 462		goto out_free;
 463
 464	err = arch_protect_bpf_trampoline(im->image, im->size);
 465	if (err)
 466		goto out_free;
 467
 468	WARN_ON(tr->cur_image && total == 0);
 
 469	if (tr->cur_image)
 470		/* progs already running at this address */
 471		err = modify_fentry(tr, tr->cur_image->image, im->image, lock_direct_mutex);
 472	else
 473		/* first time registering */
 474		err = register_fentry(tr, im->image);
 475
 476#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
 477	if (err == -EAGAIN) {
 478		/* -EAGAIN from bpf_tramp_ftrace_ops_func. Now
 479		 * BPF_TRAMP_F_SHARE_IPMODIFY is set, we can generate the
 480		 * trampoline again, and retry register.
 481		 */
 482		/* reset fops->func and fops->trampoline for re-register */
 483		tr->fops->func = NULL;
 484		tr->fops->trampoline = 0;
 485
 486		/* free im memory and reallocate later */
 487		bpf_tramp_image_free(im);
 
 488		goto again;
 489	}
 490#endif
 491	if (err)
 492		goto out_free;
 493
 494	if (tr->cur_image)
 495		bpf_tramp_image_put(tr->cur_image);
 496	tr->cur_image = im;
 
 497out:
 498	/* If any error happens, restore previous flags */
 499	if (err)
 500		tr->flags = orig_flags;
 501	kfree(tlinks);
 502	return err;
 503
 504out_free:
 505	bpf_tramp_image_free(im);
 506	goto out;
 507}
 508
 509static enum bpf_tramp_prog_type bpf_attach_type_to_tramp(struct bpf_prog *prog)
 510{
 511	switch (prog->expected_attach_type) {
 512	case BPF_TRACE_FENTRY:
 513		return BPF_TRAMP_FENTRY;
 514	case BPF_MODIFY_RETURN:
 515		return BPF_TRAMP_MODIFY_RETURN;
 516	case BPF_TRACE_FEXIT:
 517		return BPF_TRAMP_FEXIT;
 518	case BPF_LSM_MAC:
 519		if (!prog->aux->attach_func_proto->type)
 520			/* The function returns void, we cannot modify its
 521			 * return value.
 522			 */
 523			return BPF_TRAMP_FEXIT;
 524		else
 525			return BPF_TRAMP_MODIFY_RETURN;
 526	default:
 527		return BPF_TRAMP_REPLACE;
 528	}
 529}
 530
 531static int bpf_freplace_check_tgt_prog(struct bpf_prog *tgt_prog)
 532{
 533	struct bpf_prog_aux *aux = tgt_prog->aux;
 534
 535	guard(mutex)(&aux->ext_mutex);
 536	if (aux->prog_array_member_cnt)
 537		/* Program extensions can not extend target prog when the target
 538		 * prog has been updated to any prog_array map as tail callee.
 539		 * It's to prevent a potential infinite loop like:
 540		 * tgt prog entry -> tgt prog subprog -> freplace prog entry
 541		 * --tailcall-> tgt prog entry.
 542		 */
 543		return -EBUSY;
 544
 545	aux->is_extended = true;
 546	return 0;
 547}
 548
 549static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link,
 550				      struct bpf_trampoline *tr,
 551				      struct bpf_prog *tgt_prog)
 552{
 553	enum bpf_tramp_prog_type kind;
 554	struct bpf_tramp_link *link_exiting;
 555	int err = 0;
 556	int cnt = 0, i;
 557
 558	kind = bpf_attach_type_to_tramp(link->link.prog);
 559	if (tr->extension_prog)
 560		/* cannot attach fentry/fexit if extension prog is attached.
 561		 * cannot overwrite extension prog either.
 562		 */
 563		return -EBUSY;
 564
 565	for (i = 0; i < BPF_TRAMP_MAX; i++)
 566		cnt += tr->progs_cnt[i];
 567
 568	if (kind == BPF_TRAMP_REPLACE) {
 569		/* Cannot attach extension if fentry/fexit are in use. */
 570		if (cnt)
 571			return -EBUSY;
 572		err = bpf_freplace_check_tgt_prog(tgt_prog);
 573		if (err)
 574			return err;
 575		tr->extension_prog = link->link.prog;
 576		return bpf_arch_text_poke(tr->func.addr, BPF_MOD_JUMP, NULL,
 577					  link->link.prog->bpf_func);
 578	}
 579	if (cnt >= BPF_MAX_TRAMP_LINKS)
 580		return -E2BIG;
 581	if (!hlist_unhashed(&link->tramp_hlist))
 582		/* prog already linked */
 583		return -EBUSY;
 584	hlist_for_each_entry(link_exiting, &tr->progs_hlist[kind], tramp_hlist) {
 585		if (link_exiting->link.prog != link->link.prog)
 586			continue;
 587		/* prog already linked */
 588		return -EBUSY;
 589	}
 590
 591	hlist_add_head(&link->tramp_hlist, &tr->progs_hlist[kind]);
 592	tr->progs_cnt[kind]++;
 593	err = bpf_trampoline_update(tr, true /* lock_direct_mutex */);
 594	if (err) {
 595		hlist_del_init(&link->tramp_hlist);
 596		tr->progs_cnt[kind]--;
 597	}
 598	return err;
 599}
 600
 601int bpf_trampoline_link_prog(struct bpf_tramp_link *link,
 602			     struct bpf_trampoline *tr,
 603			     struct bpf_prog *tgt_prog)
 604{
 605	int err;
 606
 607	mutex_lock(&tr->mutex);
 608	err = __bpf_trampoline_link_prog(link, tr, tgt_prog);
 609	mutex_unlock(&tr->mutex);
 610	return err;
 611}
 612
 613static int __bpf_trampoline_unlink_prog(struct bpf_tramp_link *link,
 614					struct bpf_trampoline *tr,
 615					struct bpf_prog *tgt_prog)
 616{
 617	enum bpf_tramp_prog_type kind;
 618	int err;
 619
 620	kind = bpf_attach_type_to_tramp(link->link.prog);
 621	if (kind == BPF_TRAMP_REPLACE) {
 622		WARN_ON_ONCE(!tr->extension_prog);
 623		err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_JUMP,
 624					 tr->extension_prog->bpf_func, NULL);
 625		tr->extension_prog = NULL;
 626		guard(mutex)(&tgt_prog->aux->ext_mutex);
 627		tgt_prog->aux->is_extended = false;
 628		return err;
 629	}
 630	hlist_del_init(&link->tramp_hlist);
 631	tr->progs_cnt[kind]--;
 632	return bpf_trampoline_update(tr, true /* lock_direct_mutex */);
 633}
 634
 635/* bpf_trampoline_unlink_prog() should never fail. */
 636int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link,
 637			       struct bpf_trampoline *tr,
 638			       struct bpf_prog *tgt_prog)
 639{
 640	int err;
 641
 642	mutex_lock(&tr->mutex);
 643	err = __bpf_trampoline_unlink_prog(link, tr, tgt_prog);
 644	mutex_unlock(&tr->mutex);
 645	return err;
 646}
 647
 648#if defined(CONFIG_CGROUP_BPF) && defined(CONFIG_BPF_LSM)
 649static void bpf_shim_tramp_link_release(struct bpf_link *link)
 650{
 651	struct bpf_shim_tramp_link *shim_link =
 652		container_of(link, struct bpf_shim_tramp_link, link.link);
 653
 654	/* paired with 'shim_link->trampoline = tr' in bpf_trampoline_link_cgroup_shim */
 655	if (!shim_link->trampoline)
 656		return;
 657
 658	WARN_ON_ONCE(bpf_trampoline_unlink_prog(&shim_link->link, shim_link->trampoline, NULL));
 659	bpf_trampoline_put(shim_link->trampoline);
 660}
 661
 662static void bpf_shim_tramp_link_dealloc(struct bpf_link *link)
 663{
 664	struct bpf_shim_tramp_link *shim_link =
 665		container_of(link, struct bpf_shim_tramp_link, link.link);
 666
 667	kfree(shim_link);
 668}
 669
 670static const struct bpf_link_ops bpf_shim_tramp_link_lops = {
 671	.release = bpf_shim_tramp_link_release,
 672	.dealloc = bpf_shim_tramp_link_dealloc,
 673};
 674
 675static struct bpf_shim_tramp_link *cgroup_shim_alloc(const struct bpf_prog *prog,
 676						     bpf_func_t bpf_func,
 677						     int cgroup_atype)
 678{
 679	struct bpf_shim_tramp_link *shim_link = NULL;
 680	struct bpf_prog *p;
 681
 682	shim_link = kzalloc(sizeof(*shim_link), GFP_USER);
 683	if (!shim_link)
 684		return NULL;
 685
 686	p = bpf_prog_alloc(1, 0);
 687	if (!p) {
 688		kfree(shim_link);
 689		return NULL;
 690	}
 691
 692	p->jited = false;
 693	p->bpf_func = bpf_func;
 694
 695	p->aux->cgroup_atype = cgroup_atype;
 696	p->aux->attach_func_proto = prog->aux->attach_func_proto;
 697	p->aux->attach_btf_id = prog->aux->attach_btf_id;
 698	p->aux->attach_btf = prog->aux->attach_btf;
 699	btf_get(p->aux->attach_btf);
 700	p->type = BPF_PROG_TYPE_LSM;
 701	p->expected_attach_type = BPF_LSM_MAC;
 702	bpf_prog_inc(p);
 703	bpf_link_init(&shim_link->link.link, BPF_LINK_TYPE_UNSPEC,
 704		      &bpf_shim_tramp_link_lops, p);
 705	bpf_cgroup_atype_get(p->aux->attach_btf_id, cgroup_atype);
 706
 707	return shim_link;
 708}
 709
 710static struct bpf_shim_tramp_link *cgroup_shim_find(struct bpf_trampoline *tr,
 711						    bpf_func_t bpf_func)
 712{
 713	struct bpf_tramp_link *link;
 714	int kind;
 715
 716	for (kind = 0; kind < BPF_TRAMP_MAX; kind++) {
 717		hlist_for_each_entry(link, &tr->progs_hlist[kind], tramp_hlist) {
 718			struct bpf_prog *p = link->link.prog;
 719
 720			if (p->bpf_func == bpf_func)
 721				return container_of(link, struct bpf_shim_tramp_link, link);
 722		}
 723	}
 724
 725	return NULL;
 726}
 727
 728int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog,
 729				    int cgroup_atype)
 730{
 731	struct bpf_shim_tramp_link *shim_link = NULL;
 732	struct bpf_attach_target_info tgt_info = {};
 733	struct bpf_trampoline *tr;
 734	bpf_func_t bpf_func;
 735	u64 key;
 736	int err;
 737
 738	err = bpf_check_attach_target(NULL, prog, NULL,
 739				      prog->aux->attach_btf_id,
 740				      &tgt_info);
 741	if (err)
 742		return err;
 743
 744	key = bpf_trampoline_compute_key(NULL, prog->aux->attach_btf,
 745					 prog->aux->attach_btf_id);
 746
 747	bpf_lsm_find_cgroup_shim(prog, &bpf_func);
 748	tr = bpf_trampoline_get(key, &tgt_info);
 749	if (!tr)
 750		return  -ENOMEM;
 751
 752	mutex_lock(&tr->mutex);
 753
 754	shim_link = cgroup_shim_find(tr, bpf_func);
 755	if (shim_link) {
 756		/* Reusing existing shim attached by the other program. */
 757		bpf_link_inc(&shim_link->link.link);
 758
 759		mutex_unlock(&tr->mutex);
 760		bpf_trampoline_put(tr); /* bpf_trampoline_get above */
 761		return 0;
 762	}
 763
 764	/* Allocate and install new shim. */
 765
 766	shim_link = cgroup_shim_alloc(prog, bpf_func, cgroup_atype);
 767	if (!shim_link) {
 768		err = -ENOMEM;
 769		goto err;
 770	}
 771
 772	err = __bpf_trampoline_link_prog(&shim_link->link, tr, NULL);
 773	if (err)
 774		goto err;
 775
 776	shim_link->trampoline = tr;
 777	/* note, we're still holding tr refcnt from above */
 778
 779	mutex_unlock(&tr->mutex);
 780
 781	return 0;
 782err:
 783	mutex_unlock(&tr->mutex);
 784
 785	if (shim_link)
 786		bpf_link_put(&shim_link->link.link);
 787
 788	/* have to release tr while _not_ holding its mutex */
 789	bpf_trampoline_put(tr); /* bpf_trampoline_get above */
 790
 791	return err;
 792}
 793
 794void bpf_trampoline_unlink_cgroup_shim(struct bpf_prog *prog)
 795{
 796	struct bpf_shim_tramp_link *shim_link = NULL;
 797	struct bpf_trampoline *tr;
 798	bpf_func_t bpf_func;
 799	u64 key;
 800
 801	key = bpf_trampoline_compute_key(NULL, prog->aux->attach_btf,
 802					 prog->aux->attach_btf_id);
 803
 804	bpf_lsm_find_cgroup_shim(prog, &bpf_func);
 805	tr = bpf_trampoline_lookup(key);
 806	if (WARN_ON_ONCE(!tr))
 807		return;
 808
 809	mutex_lock(&tr->mutex);
 810	shim_link = cgroup_shim_find(tr, bpf_func);
 811	mutex_unlock(&tr->mutex);
 812
 813	if (shim_link)
 814		bpf_link_put(&shim_link->link.link);
 815
 816	bpf_trampoline_put(tr); /* bpf_trampoline_lookup above */
 817}
 818#endif
 819
 820struct bpf_trampoline *bpf_trampoline_get(u64 key,
 821					  struct bpf_attach_target_info *tgt_info)
 822{
 823	struct bpf_trampoline *tr;
 824
 825	tr = bpf_trampoline_lookup(key);
 826	if (!tr)
 827		return NULL;
 828
 829	mutex_lock(&tr->mutex);
 830	if (tr->func.addr)
 831		goto out;
 832
 833	memcpy(&tr->func.model, &tgt_info->fmodel, sizeof(tgt_info->fmodel));
 834	tr->func.addr = (void *)tgt_info->tgt_addr;
 835out:
 836	mutex_unlock(&tr->mutex);
 837	return tr;
 838}
 839
 840void bpf_trampoline_put(struct bpf_trampoline *tr)
 841{
 842	int i;
 843
 844	if (!tr)
 845		return;
 846	mutex_lock(&trampoline_mutex);
 847	if (!refcount_dec_and_test(&tr->refcnt))
 848		goto out;
 849	WARN_ON_ONCE(mutex_is_locked(&tr->mutex));
 850
 851	for (i = 0; i < BPF_TRAMP_MAX; i++)
 852		if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[i])))
 853			goto out;
 854
 855	/* This code will be executed even when the last bpf_tramp_image
 856	 * is alive. All progs are detached from the trampoline and the
 857	 * trampoline image is patched with jmp into epilogue to skip
 858	 * fexit progs. The fentry-only trampoline will be freed via
 859	 * multiple rcu callbacks.
 860	 */
 861	hlist_del(&tr->hlist);
 862	if (tr->fops) {
 863		ftrace_free_filter(tr->fops);
 864		kfree(tr->fops);
 865	}
 866	kfree(tr);
 867out:
 868	mutex_unlock(&trampoline_mutex);
 869}
 870
 871#define NO_START_TIME 1
 872static __always_inline u64 notrace bpf_prog_start_time(void)
 873{
 874	u64 start = NO_START_TIME;
 875
 876	if (static_branch_unlikely(&bpf_stats_enabled_key)) {
 877		start = sched_clock();
 878		if (unlikely(!start))
 879			start = NO_START_TIME;
 880	}
 881	return start;
 882}
 883
 884/* The logic is similar to bpf_prog_run(), but with an explicit
 885 * rcu_read_lock() and migrate_disable() which are required
 886 * for the trampoline. The macro is split into
 887 * call __bpf_prog_enter
 888 * call prog->bpf_func
 889 * call __bpf_prog_exit
 890 *
 891 * __bpf_prog_enter returns:
 892 * 0 - skip execution of the bpf prog
 893 * 1 - execute bpf prog
 894 * [2..MAX_U64] - execute bpf prog and record execution time.
 895 *     This is start time.
 896 */
 897static u64 notrace __bpf_prog_enter_recur(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx)
 898	__acquires(RCU)
 899{
 900	rcu_read_lock();
 901	migrate_disable();
 902
 903	run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);
 904
 905	if (unlikely(this_cpu_inc_return(*(prog->active)) != 1)) {
 906		bpf_prog_inc_misses_counter(prog);
 907		if (prog->aux->recursion_detected)
 908			prog->aux->recursion_detected(prog);
 909		return 0;
 910	}
 911	return bpf_prog_start_time();
 912}
 913
 914static void notrace update_prog_stats(struct bpf_prog *prog,
 915				      u64 start)
 916{
 917	struct bpf_prog_stats *stats;
 918
 919	if (static_branch_unlikely(&bpf_stats_enabled_key) &&
 920	    /* static_key could be enabled in __bpf_prog_enter*
 921	     * and disabled in __bpf_prog_exit*.
 922	     * And vice versa.
 923	     * Hence check that 'start' is valid.
 924	     */
 925	    start > NO_START_TIME) {
 926		u64 duration = sched_clock() - start;
 927		unsigned long flags;
 928
 929		stats = this_cpu_ptr(prog->stats);
 930		flags = u64_stats_update_begin_irqsave(&stats->syncp);
 931		u64_stats_inc(&stats->cnt);
 932		u64_stats_add(&stats->nsecs, duration);
 933		u64_stats_update_end_irqrestore(&stats->syncp, flags);
 934	}
 935}
 936
 937static void notrace __bpf_prog_exit_recur(struct bpf_prog *prog, u64 start,
 938					  struct bpf_tramp_run_ctx *run_ctx)
 939	__releases(RCU)
 940{
 941	bpf_reset_run_ctx(run_ctx->saved_run_ctx);
 942
 943	update_prog_stats(prog, start);
 944	this_cpu_dec(*(prog->active));
 945	migrate_enable();
 946	rcu_read_unlock();
 947}
 948
 949static u64 notrace __bpf_prog_enter_lsm_cgroup(struct bpf_prog *prog,
 950					       struct bpf_tramp_run_ctx *run_ctx)
 951	__acquires(RCU)
 952{
 953	/* Runtime stats are exported via actual BPF_LSM_CGROUP
 954	 * programs, not the shims.
 955	 */
 956	rcu_read_lock();
 957	migrate_disable();
 958
 959	run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);
 960
 961	return NO_START_TIME;
 962}
 963
 964static void notrace __bpf_prog_exit_lsm_cgroup(struct bpf_prog *prog, u64 start,
 965					       struct bpf_tramp_run_ctx *run_ctx)
 966	__releases(RCU)
 967{
 968	bpf_reset_run_ctx(run_ctx->saved_run_ctx);
 969
 970	migrate_enable();
 971	rcu_read_unlock();
 972}
 973
 974u64 notrace __bpf_prog_enter_sleepable_recur(struct bpf_prog *prog,
 975					     struct bpf_tramp_run_ctx *run_ctx)
 976{
 977	rcu_read_lock_trace();
 978	migrate_disable();
 979	might_fault();
 980
 981	run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);
 982
 983	if (unlikely(this_cpu_inc_return(*(prog->active)) != 1)) {
 984		bpf_prog_inc_misses_counter(prog);
 985		if (prog->aux->recursion_detected)
 986			prog->aux->recursion_detected(prog);
 987		return 0;
 988	}
 
 
 
 989	return bpf_prog_start_time();
 990}
 991
 992void notrace __bpf_prog_exit_sleepable_recur(struct bpf_prog *prog, u64 start,
 993					     struct bpf_tramp_run_ctx *run_ctx)
 994{
 995	bpf_reset_run_ctx(run_ctx->saved_run_ctx);
 996
 997	update_prog_stats(prog, start);
 998	this_cpu_dec(*(prog->active));
 999	migrate_enable();
1000	rcu_read_unlock_trace();
1001}
1002
1003static u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog,
1004					      struct bpf_tramp_run_ctx *run_ctx)
1005{
1006	rcu_read_lock_trace();
1007	migrate_disable();
1008	might_fault();
1009
1010	run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);
1011
1012	return bpf_prog_start_time();
1013}
1014
1015static void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start,
1016					      struct bpf_tramp_run_ctx *run_ctx)
1017{
1018	bpf_reset_run_ctx(run_ctx->saved_run_ctx);
1019
1020	update_prog_stats(prog, start);
1021	migrate_enable();
1022	rcu_read_unlock_trace();
1023}
1024
1025static u64 notrace __bpf_prog_enter(struct bpf_prog *prog,
1026				    struct bpf_tramp_run_ctx *run_ctx)
1027	__acquires(RCU)
1028{
1029	rcu_read_lock();
1030	migrate_disable();
1031
1032	run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);
1033
1034	return bpf_prog_start_time();
1035}
1036
1037static void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start,
1038				    struct bpf_tramp_run_ctx *run_ctx)
1039	__releases(RCU)
1040{
1041	bpf_reset_run_ctx(run_ctx->saved_run_ctx);
1042
1043	update_prog_stats(prog, start);
1044	migrate_enable();
1045	rcu_read_unlock();
1046}
1047
1048void notrace __bpf_tramp_enter(struct bpf_tramp_image *tr)
1049{
1050	percpu_ref_get(&tr->pcref);
1051}
1052
1053void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr)
1054{
1055	percpu_ref_put(&tr->pcref);
1056}
1057
1058bpf_trampoline_enter_t bpf_trampoline_enter(const struct bpf_prog *prog)
1059{
1060	bool sleepable = prog->sleepable;
1061
1062	if (bpf_prog_check_recur(prog))
1063		return sleepable ? __bpf_prog_enter_sleepable_recur :
1064			__bpf_prog_enter_recur;
1065
1066	if (resolve_prog_type(prog) == BPF_PROG_TYPE_LSM &&
1067	    prog->expected_attach_type == BPF_LSM_CGROUP)
1068		return __bpf_prog_enter_lsm_cgroup;
1069
1070	return sleepable ? __bpf_prog_enter_sleepable : __bpf_prog_enter;
1071}
1072
1073bpf_trampoline_exit_t bpf_trampoline_exit(const struct bpf_prog *prog)
1074{
1075	bool sleepable = prog->sleepable;
1076
1077	if (bpf_prog_check_recur(prog))
1078		return sleepable ? __bpf_prog_exit_sleepable_recur :
1079			__bpf_prog_exit_recur;
1080
1081	if (resolve_prog_type(prog) == BPF_PROG_TYPE_LSM &&
1082	    prog->expected_attach_type == BPF_LSM_CGROUP)
1083		return __bpf_prog_exit_lsm_cgroup;
1084
1085	return sleepable ? __bpf_prog_exit_sleepable : __bpf_prog_exit;
1086}
1087
1088int __weak
1089arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *image_end,
1090			    const struct btf_func_model *m, u32 flags,
1091			    struct bpf_tramp_links *tlinks,
1092			    void *func_addr)
1093{
1094	return -ENOTSUPP;
1095}
1096
1097void * __weak arch_alloc_bpf_trampoline(unsigned int size)
1098{
1099	void *image;
1100
1101	if (WARN_ON_ONCE(size > PAGE_SIZE))
1102		return NULL;
1103	image = bpf_jit_alloc_exec(PAGE_SIZE);
1104	if (image)
1105		set_vm_flush_reset_perms(image);
1106	return image;
1107}
1108
1109void __weak arch_free_bpf_trampoline(void *image, unsigned int size)
1110{
1111	WARN_ON_ONCE(size > PAGE_SIZE);
1112	/* bpf_jit_free_exec doesn't need "size", but
1113	 * bpf_prog_pack_free() needs it.
1114	 */
1115	bpf_jit_free_exec(image);
1116}
1117
1118int __weak arch_protect_bpf_trampoline(void *image, unsigned int size)
1119{
1120	WARN_ON_ONCE(size > PAGE_SIZE);
1121	return set_memory_rox((long)image, 1);
1122}
1123
1124int __weak arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
1125				    struct bpf_tramp_links *tlinks, void *func_addr)
1126{
1127	return -ENOTSUPP;
1128}
1129
1130static int __init init_trampolines(void)
1131{
1132	int i;
1133
1134	for (i = 0; i < TRAMPOLINE_TABLE_SIZE; i++)
1135		INIT_HLIST_HEAD(&trampoline_table[i]);
1136	return 0;
1137}
1138late_initcall(init_trampolines);