libbpf.c - tools/lib/bpf/libbpf.c - Linux source code v6.8

Note: File does not exist in v3.5.6.
    1// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
    2
    3/*
    4 * Common eBPF ELF object loading operations.
    5 *
    6 * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org>
    7 * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
    8 * Copyright (C) 2015 Huawei Inc.
    9 * Copyright (C) 2017 Nicira, Inc.
   10 * Copyright (C) 2019 Isovalent, Inc.
   11 */
   12
   13#ifndef _GNU_SOURCE
   14#define _GNU_SOURCE
   15#endif
   16#include <stdlib.h>
   17#include <stdio.h>
   18#include <stdarg.h>
   19#include <libgen.h>
   20#include <inttypes.h>
   21#include <limits.h>
   22#include <string.h>
   23#include <unistd.h>
   24#include <endian.h>
   25#include <fcntl.h>
   26#include <errno.h>
   27#include <ctype.h>
   28#include <asm/unistd.h>
   29#include <linux/err.h>
   30#include <linux/kernel.h>
   31#include <linux/bpf.h>
   32#include <linux/btf.h>
   33#include <linux/filter.h>
   34#include <linux/limits.h>
   35#include <linux/perf_event.h>
   36#include <linux/ring_buffer.h>
   37#include <sys/epoll.h>
   38#include <sys/ioctl.h>
   39#include <sys/mman.h>
   40#include <sys/stat.h>
   41#include <sys/types.h>
   42#include <sys/vfs.h>
   43#include <sys/utsname.h>
   44#include <sys/resource.h>
   45#include <libelf.h>
   46#include <gelf.h>
   47#include <zlib.h>
   48
   49#include "libbpf.h"
   50#include "bpf.h"
   51#include "btf.h"
   52#include "str_error.h"
   53#include "libbpf_internal.h"
   54#include "hashmap.h"
   55#include "bpf_gen_internal.h"
   56#include "zip.h"
   57
   58#ifndef BPF_FS_MAGIC
   59#define BPF_FS_MAGIC		0xcafe4a11
   60#endif
   61
   62#define BPF_INSN_SZ (sizeof(struct bpf_insn))
   63
   64/* vsprintf() in __base_pr() uses nonliteral format string. It may break
   65 * compilation if user enables corresponding warning. Disable it explicitly.
   66 */
   67#pragma GCC diagnostic ignored "-Wformat-nonliteral"
   68
   69#define __printf(a, b)	__attribute__((format(printf, a, b)))
   70
   71static struct bpf_map *bpf_object__add_map(struct bpf_object *obj);
   72static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog);
   73
   74static const char * const attach_type_name[] = {
   75	[BPF_CGROUP_INET_INGRESS]	= "cgroup_inet_ingress",
   76	[BPF_CGROUP_INET_EGRESS]	= "cgroup_inet_egress",
   77	[BPF_CGROUP_INET_SOCK_CREATE]	= "cgroup_inet_sock_create",
   78	[BPF_CGROUP_INET_SOCK_RELEASE]	= "cgroup_inet_sock_release",
   79	[BPF_CGROUP_SOCK_OPS]		= "cgroup_sock_ops",
   80	[BPF_CGROUP_DEVICE]		= "cgroup_device",
   81	[BPF_CGROUP_INET4_BIND]		= "cgroup_inet4_bind",
   82	[BPF_CGROUP_INET6_BIND]		= "cgroup_inet6_bind",
   83	[BPF_CGROUP_INET4_CONNECT]	= "cgroup_inet4_connect",
   84	[BPF_CGROUP_INET6_CONNECT]	= "cgroup_inet6_connect",
   85	[BPF_CGROUP_UNIX_CONNECT]       = "cgroup_unix_connect",
   86	[BPF_CGROUP_INET4_POST_BIND]	= "cgroup_inet4_post_bind",
   87	[BPF_CGROUP_INET6_POST_BIND]	= "cgroup_inet6_post_bind",
   88	[BPF_CGROUP_INET4_GETPEERNAME]	= "cgroup_inet4_getpeername",
   89	[BPF_CGROUP_INET6_GETPEERNAME]	= "cgroup_inet6_getpeername",
   90	[BPF_CGROUP_UNIX_GETPEERNAME]	= "cgroup_unix_getpeername",
   91	[BPF_CGROUP_INET4_GETSOCKNAME]	= "cgroup_inet4_getsockname",
   92	[BPF_CGROUP_INET6_GETSOCKNAME]	= "cgroup_inet6_getsockname",
   93	[BPF_CGROUP_UNIX_GETSOCKNAME]	= "cgroup_unix_getsockname",
   94	[BPF_CGROUP_UDP4_SENDMSG]	= "cgroup_udp4_sendmsg",
   95	[BPF_CGROUP_UDP6_SENDMSG]	= "cgroup_udp6_sendmsg",
   96	[BPF_CGROUP_UNIX_SENDMSG]	= "cgroup_unix_sendmsg",
   97	[BPF_CGROUP_SYSCTL]		= "cgroup_sysctl",
   98	[BPF_CGROUP_UDP4_RECVMSG]	= "cgroup_udp4_recvmsg",
   99	[BPF_CGROUP_UDP6_RECVMSG]	= "cgroup_udp6_recvmsg",
  100	[BPF_CGROUP_UNIX_RECVMSG]	= "cgroup_unix_recvmsg",
  101	[BPF_CGROUP_GETSOCKOPT]		= "cgroup_getsockopt",
  102	[BPF_CGROUP_SETSOCKOPT]		= "cgroup_setsockopt",
  103	[BPF_SK_SKB_STREAM_PARSER]	= "sk_skb_stream_parser",
  104	[BPF_SK_SKB_STREAM_VERDICT]	= "sk_skb_stream_verdict",
  105	[BPF_SK_SKB_VERDICT]		= "sk_skb_verdict",
  106	[BPF_SK_MSG_VERDICT]		= "sk_msg_verdict",
  107	[BPF_LIRC_MODE2]		= "lirc_mode2",
  108	[BPF_FLOW_DISSECTOR]		= "flow_dissector",
  109	[BPF_TRACE_RAW_TP]		= "trace_raw_tp",
  110	[BPF_TRACE_FENTRY]		= "trace_fentry",
  111	[BPF_TRACE_FEXIT]		= "trace_fexit",
  112	[BPF_MODIFY_RETURN]		= "modify_return",
  113	[BPF_LSM_MAC]			= "lsm_mac",
  114	[BPF_LSM_CGROUP]		= "lsm_cgroup",
  115	[BPF_SK_LOOKUP]			= "sk_lookup",
  116	[BPF_TRACE_ITER]		= "trace_iter",
  117	[BPF_XDP_DEVMAP]		= "xdp_devmap",
  118	[BPF_XDP_CPUMAP]		= "xdp_cpumap",
  119	[BPF_XDP]			= "xdp",
  120	[BPF_SK_REUSEPORT_SELECT]	= "sk_reuseport_select",
  121	[BPF_SK_REUSEPORT_SELECT_OR_MIGRATE]	= "sk_reuseport_select_or_migrate",
  122	[BPF_PERF_EVENT]		= "perf_event",
  123	[BPF_TRACE_KPROBE_MULTI]	= "trace_kprobe_multi",
  124	[BPF_STRUCT_OPS]		= "struct_ops",
  125	[BPF_NETFILTER]			= "netfilter",
  126	[BPF_TCX_INGRESS]		= "tcx_ingress",
  127	[BPF_TCX_EGRESS]		= "tcx_egress",
  128	[BPF_TRACE_UPROBE_MULTI]	= "trace_uprobe_multi",
  129	[BPF_NETKIT_PRIMARY]		= "netkit_primary",
  130	[BPF_NETKIT_PEER]		= "netkit_peer",
  131};
  132
  133static const char * const link_type_name[] = {
  134	[BPF_LINK_TYPE_UNSPEC]			= "unspec",
  135	[BPF_LINK_TYPE_RAW_TRACEPOINT]		= "raw_tracepoint",
  136	[BPF_LINK_TYPE_TRACING]			= "tracing",
  137	[BPF_LINK_TYPE_CGROUP]			= "cgroup",
  138	[BPF_LINK_TYPE_ITER]			= "iter",
  139	[BPF_LINK_TYPE_NETNS]			= "netns",
  140	[BPF_LINK_TYPE_XDP]			= "xdp",
  141	[BPF_LINK_TYPE_PERF_EVENT]		= "perf_event",
  142	[BPF_LINK_TYPE_KPROBE_MULTI]		= "kprobe_multi",
  143	[BPF_LINK_TYPE_STRUCT_OPS]		= "struct_ops",
  144	[BPF_LINK_TYPE_NETFILTER]		= "netfilter",
  145	[BPF_LINK_TYPE_TCX]			= "tcx",
  146	[BPF_LINK_TYPE_UPROBE_MULTI]		= "uprobe_multi",
  147	[BPF_LINK_TYPE_NETKIT]			= "netkit",
  148};
  149
  150static const char * const map_type_name[] = {
  151	[BPF_MAP_TYPE_UNSPEC]			= "unspec",
  152	[BPF_MAP_TYPE_HASH]			= "hash",
  153	[BPF_MAP_TYPE_ARRAY]			= "array",
  154	[BPF_MAP_TYPE_PROG_ARRAY]		= "prog_array",
  155	[BPF_MAP_TYPE_PERF_EVENT_ARRAY]		= "perf_event_array",
  156	[BPF_MAP_TYPE_PERCPU_HASH]		= "percpu_hash",
  157	[BPF_MAP_TYPE_PERCPU_ARRAY]		= "percpu_array",
  158	[BPF_MAP_TYPE_STACK_TRACE]		= "stack_trace",
  159	[BPF_MAP_TYPE_CGROUP_ARRAY]		= "cgroup_array",
  160	[BPF_MAP_TYPE_LRU_HASH]			= "lru_hash",
  161	[BPF_MAP_TYPE_LRU_PERCPU_HASH]		= "lru_percpu_hash",
  162	[BPF_MAP_TYPE_LPM_TRIE]			= "lpm_trie",
  163	[BPF_MAP_TYPE_ARRAY_OF_MAPS]		= "array_of_maps",
  164	[BPF_MAP_TYPE_HASH_OF_MAPS]		= "hash_of_maps",
  165	[BPF_MAP_TYPE_DEVMAP]			= "devmap",
  166	[BPF_MAP_TYPE_DEVMAP_HASH]		= "devmap_hash",
  167	[BPF_MAP_TYPE_SOCKMAP]			= "sockmap",
  168	[BPF_MAP_TYPE_CPUMAP]			= "cpumap",
  169	[BPF_MAP_TYPE_XSKMAP]			= "xskmap",
  170	[BPF_MAP_TYPE_SOCKHASH]			= "sockhash",
  171	[BPF_MAP_TYPE_CGROUP_STORAGE]		= "cgroup_storage",
  172	[BPF_MAP_TYPE_REUSEPORT_SOCKARRAY]	= "reuseport_sockarray",
  173	[BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE]	= "percpu_cgroup_storage",
  174	[BPF_MAP_TYPE_QUEUE]			= "queue",
  175	[BPF_MAP_TYPE_STACK]			= "stack",
  176	[BPF_MAP_TYPE_SK_STORAGE]		= "sk_storage",
  177	[BPF_MAP_TYPE_STRUCT_OPS]		= "struct_ops",
  178	[BPF_MAP_TYPE_RINGBUF]			= "ringbuf",
  179	[BPF_MAP_TYPE_INODE_STORAGE]		= "inode_storage",
  180	[BPF_MAP_TYPE_TASK_STORAGE]		= "task_storage",
  181	[BPF_MAP_TYPE_BLOOM_FILTER]		= "bloom_filter",
  182	[BPF_MAP_TYPE_USER_RINGBUF]             = "user_ringbuf",
  183	[BPF_MAP_TYPE_CGRP_STORAGE]		= "cgrp_storage",
  184};
  185
  186static const char * const prog_type_name[] = {
  187	[BPF_PROG_TYPE_UNSPEC]			= "unspec",
  188	[BPF_PROG_TYPE_SOCKET_FILTER]		= "socket_filter",
  189	[BPF_PROG_TYPE_KPROBE]			= "kprobe",
  190	[BPF_PROG_TYPE_SCHED_CLS]		= "sched_cls",
  191	[BPF_PROG_TYPE_SCHED_ACT]		= "sched_act",
  192	[BPF_PROG_TYPE_TRACEPOINT]		= "tracepoint",
  193	[BPF_PROG_TYPE_XDP]			= "xdp",
  194	[BPF_PROG_TYPE_PERF_EVENT]		= "perf_event",
  195	[BPF_PROG_TYPE_CGROUP_SKB]		= "cgroup_skb",
  196	[BPF_PROG_TYPE_CGROUP_SOCK]		= "cgroup_sock",
  197	[BPF_PROG_TYPE_LWT_IN]			= "lwt_in",
  198	[BPF_PROG_TYPE_LWT_OUT]			= "lwt_out",
  199	[BPF_PROG_TYPE_LWT_XMIT]		= "lwt_xmit",
  200	[BPF_PROG_TYPE_SOCK_OPS]		= "sock_ops",
  201	[BPF_PROG_TYPE_SK_SKB]			= "sk_skb",
  202	[BPF_PROG_TYPE_CGROUP_DEVICE]		= "cgroup_device",
  203	[BPF_PROG_TYPE_SK_MSG]			= "sk_msg",
  204	[BPF_PROG_TYPE_RAW_TRACEPOINT]		= "raw_tracepoint",
  205	[BPF_PROG_TYPE_CGROUP_SOCK_ADDR]	= "cgroup_sock_addr",
  206	[BPF_PROG_TYPE_LWT_SEG6LOCAL]		= "lwt_seg6local",
  207	[BPF_PROG_TYPE_LIRC_MODE2]		= "lirc_mode2",
  208	[BPF_PROG_TYPE_SK_REUSEPORT]		= "sk_reuseport",
  209	[BPF_PROG_TYPE_FLOW_DISSECTOR]		= "flow_dissector",
  210	[BPF_PROG_TYPE_CGROUP_SYSCTL]		= "cgroup_sysctl",
  211	[BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE]	= "raw_tracepoint_writable",
  212	[BPF_PROG_TYPE_CGROUP_SOCKOPT]		= "cgroup_sockopt",
  213	[BPF_PROG_TYPE_TRACING]			= "tracing",
  214	[BPF_PROG_TYPE_STRUCT_OPS]		= "struct_ops",
  215	[BPF_PROG_TYPE_EXT]			= "ext",
  216	[BPF_PROG_TYPE_LSM]			= "lsm",
  217	[BPF_PROG_TYPE_SK_LOOKUP]		= "sk_lookup",
  218	[BPF_PROG_TYPE_SYSCALL]			= "syscall",
  219	[BPF_PROG_TYPE_NETFILTER]		= "netfilter",
  220};
  221
  222static int __base_pr(enum libbpf_print_level level, const char *format,
  223		     va_list args)
  224{
  225	if (level == LIBBPF_DEBUG)
  226		return 0;
  227
  228	return vfprintf(stderr, format, args);
  229}
  230
  231static libbpf_print_fn_t __libbpf_pr = __base_pr;
  232
  233libbpf_print_fn_t libbpf_set_print(libbpf_print_fn_t fn)
  234{
  235	libbpf_print_fn_t old_print_fn;
  236
  237	old_print_fn = __atomic_exchange_n(&__libbpf_pr, fn, __ATOMIC_RELAXED);
  238
  239	return old_print_fn;
  240}
  241
  242__printf(2, 3)
  243void libbpf_print(enum libbpf_print_level level, const char *format, ...)
  244{
  245	va_list args;
  246	int old_errno;
  247	libbpf_print_fn_t print_fn;
  248
  249	print_fn = __atomic_load_n(&__libbpf_pr, __ATOMIC_RELAXED);
  250	if (!print_fn)
  251		return;
  252
  253	old_errno = errno;
  254
  255	va_start(args, format);
  256	__libbpf_pr(level, format, args);
  257	va_end(args);
  258
  259	errno = old_errno;
  260}
  261
  262static void pr_perm_msg(int err)
  263{
  264	struct rlimit limit;
  265	char buf[100];
  266
  267	if (err != -EPERM || geteuid() != 0)
  268		return;
  269
  270	err = getrlimit(RLIMIT_MEMLOCK, &limit);
  271	if (err)
  272		return;
  273
  274	if (limit.rlim_cur == RLIM_INFINITY)
  275		return;
  276
  277	if (limit.rlim_cur < 1024)
  278		snprintf(buf, sizeof(buf), "%zu bytes", (size_t)limit.rlim_cur);
  279	else if (limit.rlim_cur < 1024*1024)
  280		snprintf(buf, sizeof(buf), "%.1f KiB", (double)limit.rlim_cur / 1024);
  281	else
  282		snprintf(buf, sizeof(buf), "%.1f MiB", (double)limit.rlim_cur / (1024*1024));
  283
  284	pr_warn("permission error while running as root; try raising 'ulimit -l'? current value: %s\n",
  285		buf);
  286}
  287
  288#define STRERR_BUFSIZE  128
  289
  290/* Copied from tools/perf/util/util.h */
  291#ifndef zfree
  292# define zfree(ptr) ({ free(*ptr); *ptr = NULL; })
  293#endif
  294
  295#ifndef zclose
  296# define zclose(fd) ({			\
  297	int ___err = 0;			\
  298	if ((fd) >= 0)			\
  299		___err = close((fd));	\
  300	fd = -1;			\
  301	___err; })
  302#endif
  303
  304static inline __u64 ptr_to_u64(const void *ptr)
  305{
  306	return (__u64) (unsigned long) ptr;
  307}
  308
  309int libbpf_set_strict_mode(enum libbpf_strict_mode mode)
  310{
  311	/* as of v1.0 libbpf_set_strict_mode() is a no-op */
  312	return 0;
  313}
  314
  315__u32 libbpf_major_version(void)
  316{
  317	return LIBBPF_MAJOR_VERSION;
  318}
  319
  320__u32 libbpf_minor_version(void)
  321{
  322	return LIBBPF_MINOR_VERSION;
  323}
  324
  325const char *libbpf_version_string(void)
  326{
  327#define __S(X) #X
  328#define _S(X) __S(X)
  329	return  "v" _S(LIBBPF_MAJOR_VERSION) "." _S(LIBBPF_MINOR_VERSION);
  330#undef _S
  331#undef __S
  332}
  333
  334enum reloc_type {
  335	RELO_LD64,
  336	RELO_CALL,
  337	RELO_DATA,
  338	RELO_EXTERN_LD64,
  339	RELO_EXTERN_CALL,
  340	RELO_SUBPROG_ADDR,
  341	RELO_CORE,
  342};
  343
  344struct reloc_desc {
  345	enum reloc_type type;
  346	int insn_idx;
  347	union {
  348		const struct bpf_core_relo *core_relo; /* used when type == RELO_CORE */
  349		struct {
  350			int map_idx;
  351			int sym_off;
  352			int ext_idx;
  353		};
  354	};
  355};
  356
  357/* stored as sec_def->cookie for all libbpf-supported SEC()s */
  358enum sec_def_flags {
  359	SEC_NONE = 0,
  360	/* expected_attach_type is optional, if kernel doesn't support that */
  361	SEC_EXP_ATTACH_OPT = 1,
  362	/* legacy, only used by libbpf_get_type_names() and
  363	 * libbpf_attach_type_by_name(), not used by libbpf itself at all.
  364	 * This used to be associated with cgroup (and few other) BPF programs
  365	 * that were attachable through BPF_PROG_ATTACH command. Pretty
  366	 * meaningless nowadays, though.
  367	 */
  368	SEC_ATTACHABLE = 2,
  369	SEC_ATTACHABLE_OPT = SEC_ATTACHABLE | SEC_EXP_ATTACH_OPT,
  370	/* attachment target is specified through BTF ID in either kernel or
  371	 * other BPF program's BTF object
  372	 */
  373	SEC_ATTACH_BTF = 4,
  374	/* BPF program type allows sleeping/blocking in kernel */
  375	SEC_SLEEPABLE = 8,
  376	/* BPF program support non-linear XDP buffer */
  377	SEC_XDP_FRAGS = 16,
  378	/* Setup proper attach type for usdt probes. */
  379	SEC_USDT = 32,
  380};
  381
  382struct bpf_sec_def {
  383	char *sec;
  384	enum bpf_prog_type prog_type;
  385	enum bpf_attach_type expected_attach_type;
  386	long cookie;
  387	int handler_id;
  388
  389	libbpf_prog_setup_fn_t prog_setup_fn;
  390	libbpf_prog_prepare_load_fn_t prog_prepare_load_fn;
  391	libbpf_prog_attach_fn_t prog_attach_fn;
  392};
  393
  394/*
  395 * bpf_prog should be a better name but it has been used in
  396 * linux/filter.h.
  397 */
  398struct bpf_program {
  399	char *name;
  400	char *sec_name;
  401	size_t sec_idx;
  402	const struct bpf_sec_def *sec_def;
  403	/* this program's instruction offset (in number of instructions)
  404	 * within its containing ELF section
  405	 */
  406	size_t sec_insn_off;
  407	/* number of original instructions in ELF section belonging to this
  408	 * program, not taking into account subprogram instructions possible
  409	 * appended later during relocation
  410	 */
  411	size_t sec_insn_cnt;
  412	/* Offset (in number of instructions) of the start of instruction
  413	 * belonging to this BPF program  within its containing main BPF
  414	 * program. For the entry-point (main) BPF program, this is always
  415	 * zero. For a sub-program, this gets reset before each of main BPF
  416	 * programs are processed and relocated and is used to determined
  417	 * whether sub-program was already appended to the main program, and
  418	 * if yes, at which instruction offset.
  419	 */
  420	size_t sub_insn_off;
  421
  422	/* instructions that belong to BPF program; insns[0] is located at
  423	 * sec_insn_off instruction within its ELF section in ELF file, so
  424	 * when mapping ELF file instruction index to the local instruction,
  425	 * one needs to subtract sec_insn_off; and vice versa.
  426	 */
  427	struct bpf_insn *insns;
  428	/* actual number of instruction in this BPF program's image; for
  429	 * entry-point BPF programs this includes the size of main program
  430	 * itself plus all the used sub-programs, appended at the end
  431	 */
  432	size_t insns_cnt;
  433
  434	struct reloc_desc *reloc_desc;
  435	int nr_reloc;
  436
  437	/* BPF verifier log settings */
  438	char *log_buf;
  439	size_t log_size;
  440	__u32 log_level;
  441
  442	struct bpf_object *obj;
  443
  444	int fd;
  445	bool autoload;
  446	bool autoattach;
  447	bool sym_global;
  448	bool mark_btf_static;
  449	enum bpf_prog_type type;
  450	enum bpf_attach_type expected_attach_type;
  451	int exception_cb_idx;
  452
  453	int prog_ifindex;
  454	__u32 attach_btf_obj_fd;
  455	__u32 attach_btf_id;
  456	__u32 attach_prog_fd;
  457
  458	void *func_info;
  459	__u32 func_info_rec_size;
  460	__u32 func_info_cnt;
  461
  462	void *line_info;
  463	__u32 line_info_rec_size;
  464	__u32 line_info_cnt;
  465	__u32 prog_flags;
  466};
  467
  468struct bpf_struct_ops {
  469	const char *tname;
  470	const struct btf_type *type;
  471	struct bpf_program **progs;
  472	__u32 *kern_func_off;
  473	/* e.g. struct tcp_congestion_ops in bpf_prog's btf format */
  474	void *data;
  475	/* e.g. struct bpf_struct_ops_tcp_congestion_ops in
  476	 *      btf_vmlinux's format.
  477	 * struct bpf_struct_ops_tcp_congestion_ops {
  478	 *	[... some other kernel fields ...]
  479	 *	struct tcp_congestion_ops data;
  480	 * }
  481	 * kern_vdata-size == sizeof(struct bpf_struct_ops_tcp_congestion_ops)
  482	 * bpf_map__init_kern_struct_ops() will populate the "kern_vdata"
  483	 * from "data".
  484	 */
  485	void *kern_vdata;
  486	__u32 type_id;
  487};
  488
  489#define DATA_SEC ".data"
  490#define BSS_SEC ".bss"
  491#define RODATA_SEC ".rodata"
  492#define KCONFIG_SEC ".kconfig"
  493#define KSYMS_SEC ".ksyms"
  494#define STRUCT_OPS_SEC ".struct_ops"
  495#define STRUCT_OPS_LINK_SEC ".struct_ops.link"
  496
  497enum libbpf_map_type {
  498	LIBBPF_MAP_UNSPEC,
  499	LIBBPF_MAP_DATA,
  500	LIBBPF_MAP_BSS,
  501	LIBBPF_MAP_RODATA,
  502	LIBBPF_MAP_KCONFIG,
  503};
  504
  505struct bpf_map_def {
  506	unsigned int type;
  507	unsigned int key_size;
  508	unsigned int value_size;
  509	unsigned int max_entries;
  510	unsigned int map_flags;
  511};
  512
  513struct bpf_map {
  514	struct bpf_object *obj;
  515	char *name;
  516	/* real_name is defined for special internal maps (.rodata*,
  517	 * .data*, .bss, .kconfig) and preserves their original ELF section
  518	 * name. This is important to be able to find corresponding BTF
  519	 * DATASEC information.
  520	 */
  521	char *real_name;
  522	int fd;
  523	int sec_idx;
  524	size_t sec_offset;
  525	int map_ifindex;
  526	int inner_map_fd;
  527	struct bpf_map_def def;
  528	__u32 numa_node;
  529	__u32 btf_var_idx;
  530	__u32 btf_key_type_id;
  531	__u32 btf_value_type_id;
  532	__u32 btf_vmlinux_value_type_id;
  533	enum libbpf_map_type libbpf_type;
  534	void *mmaped;
  535	struct bpf_struct_ops *st_ops;
  536	struct bpf_map *inner_map;
  537	void **init_slots;
  538	int init_slots_sz;
  539	char *pin_path;
  540	bool pinned;
  541	bool reused;
  542	bool autocreate;
  543	__u64 map_extra;
  544};
  545
  546enum extern_type {
  547	EXT_UNKNOWN,
  548	EXT_KCFG,
  549	EXT_KSYM,
  550};
  551
  552enum kcfg_type {
  553	KCFG_UNKNOWN,
  554	KCFG_CHAR,
  555	KCFG_BOOL,
  556	KCFG_INT,
  557	KCFG_TRISTATE,
  558	KCFG_CHAR_ARR,
  559};
  560
  561struct extern_desc {
  562	enum extern_type type;
  563	int sym_idx;
  564	int btf_id;
  565	int sec_btf_id;
  566	const char *name;
  567	char *essent_name;
  568	bool is_set;
  569	bool is_weak;
  570	union {
  571		struct {
  572			enum kcfg_type type;
  573			int sz;
  574			int align;
  575			int data_off;
  576			bool is_signed;
  577		} kcfg;
  578		struct {
  579			unsigned long long addr;
  580
  581			/* target btf_id of the corresponding kernel var. */
  582			int kernel_btf_obj_fd;
  583			int kernel_btf_id;
  584
  585			/* local btf_id of the ksym extern's type. */
  586			__u32 type_id;
  587			/* BTF fd index to be patched in for insn->off, this is
  588			 * 0 for vmlinux BTF, index in obj->fd_array for module
  589			 * BTF
  590			 */
  591			__s16 btf_fd_idx;
  592		} ksym;
  593	};
  594};
  595
  596struct module_btf {
  597	struct btf *btf;
  598	char *name;
  599	__u32 id;
  600	int fd;
  601	int fd_array_idx;
  602};
  603
  604enum sec_type {
  605	SEC_UNUSED = 0,
  606	SEC_RELO,
  607	SEC_BSS,
  608	SEC_DATA,
  609	SEC_RODATA,
  610};
  611
  612struct elf_sec_desc {
  613	enum sec_type sec_type;
  614	Elf64_Shdr *shdr;
  615	Elf_Data *data;
  616};
  617
  618struct elf_state {
  619	int fd;
  620	const void *obj_buf;
  621	size_t obj_buf_sz;
  622	Elf *elf;
  623	Elf64_Ehdr *ehdr;
  624	Elf_Data *symbols;
  625	Elf_Data *st_ops_data;
  626	Elf_Data *st_ops_link_data;
  627	size_t shstrndx; /* section index for section name strings */
  628	size_t strtabidx;
  629	struct elf_sec_desc *secs;
  630	size_t sec_cnt;
  631	int btf_maps_shndx;
  632	__u32 btf_maps_sec_btf_id;
  633	int text_shndx;
  634	int symbols_shndx;
  635	int st_ops_shndx;
  636	int st_ops_link_shndx;
  637};
  638
  639struct usdt_manager;
  640
  641struct bpf_object {
  642	char name[BPF_OBJ_NAME_LEN];
  643	char license[64];
  644	__u32 kern_version;
  645
  646	struct bpf_program *programs;
  647	size_t nr_programs;
  648	struct bpf_map *maps;
  649	size_t nr_maps;
  650	size_t maps_cap;
  651
  652	char *kconfig;
  653	struct extern_desc *externs;
  654	int nr_extern;
  655	int kconfig_map_idx;
  656
  657	bool loaded;
  658	bool has_subcalls;
  659	bool has_rodata;
  660
  661	struct bpf_gen *gen_loader;
  662
  663	/* Information when doing ELF related work. Only valid if efile.elf is not NULL */
  664	struct elf_state efile;
  665
  666	struct btf *btf;
  667	struct btf_ext *btf_ext;
  668
  669	/* Parse and load BTF vmlinux if any of the programs in the object need
  670	 * it at load time.
  671	 */
  672	struct btf *btf_vmlinux;
  673	/* Path to the custom BTF to be used for BPF CO-RE relocations as an
  674	 * override for vmlinux BTF.
  675	 */
  676	char *btf_custom_path;
  677	/* vmlinux BTF override for CO-RE relocations */
  678	struct btf *btf_vmlinux_override;
  679	/* Lazily initialized kernel module BTFs */
  680	struct module_btf *btf_modules;
  681	bool btf_modules_loaded;
  682	size_t btf_module_cnt;
  683	size_t btf_module_cap;
  684
  685	/* optional log settings passed to BPF_BTF_LOAD and BPF_PROG_LOAD commands */
  686	char *log_buf;
  687	size_t log_size;
  688	__u32 log_level;
  689
  690	int *fd_array;
  691	size_t fd_array_cap;
  692	size_t fd_array_cnt;
  693
  694	struct usdt_manager *usdt_man;
  695
  696	char path[];
  697};
  698
  699static const char *elf_sym_str(const struct bpf_object *obj, size_t off);
  700static const char *elf_sec_str(const struct bpf_object *obj, size_t off);
  701static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx);
  702static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name);
  703static Elf64_Shdr *elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn);
  704static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn);
  705static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn);
  706static Elf64_Sym *elf_sym_by_idx(const struct bpf_object *obj, size_t idx);
  707static Elf64_Rel *elf_rel_by_idx(Elf_Data *data, size_t idx);
  708
  709void bpf_program__unload(struct bpf_program *prog)
  710{
  711	if (!prog)
  712		return;
  713
  714	zclose(prog->fd);
  715
  716	zfree(&prog->func_info);
  717	zfree(&prog->line_info);
  718}
  719
  720static void bpf_program__exit(struct bpf_program *prog)
  721{
  722	if (!prog)
  723		return;
  724
  725	bpf_program__unload(prog);
  726	zfree(&prog->name);
  727	zfree(&prog->sec_name);
  728	zfree(&prog->insns);
  729	zfree(&prog->reloc_desc);
  730
  731	prog->nr_reloc = 0;
  732	prog->insns_cnt = 0;
  733	prog->sec_idx = -1;
  734}
  735
  736static bool insn_is_subprog_call(const struct bpf_insn *insn)
  737{
  738	return BPF_CLASS(insn->code) == BPF_JMP &&
  739	       BPF_OP(insn->code) == BPF_CALL &&
  740	       BPF_SRC(insn->code) == BPF_K &&
  741	       insn->src_reg == BPF_PSEUDO_CALL &&
  742	       insn->dst_reg == 0 &&
  743	       insn->off == 0;
  744}
  745
  746static bool is_call_insn(const struct bpf_insn *insn)
  747{
  748	return insn->code == (BPF_JMP | BPF_CALL);
  749}
  750
  751static bool insn_is_pseudo_func(struct bpf_insn *insn)
  752{
  753	return is_ldimm64_insn(insn) && insn->src_reg == BPF_PSEUDO_FUNC;
  754}
  755
  756static int
  757bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog,
  758		      const char *name, size_t sec_idx, const char *sec_name,
  759		      size_t sec_off, void *insn_data, size_t insn_data_sz)
  760{
  761	if (insn_data_sz == 0 || insn_data_sz % BPF_INSN_SZ || sec_off % BPF_INSN_SZ) {
  762		pr_warn("sec '%s': corrupted program '%s', offset %zu, size %zu\n",
  763			sec_name, name, sec_off, insn_data_sz);
  764		return -EINVAL;
  765	}
  766
  767	memset(prog, 0, sizeof(*prog));
  768	prog->obj = obj;
  769
  770	prog->sec_idx = sec_idx;
  771	prog->sec_insn_off = sec_off / BPF_INSN_SZ;
  772	prog->sec_insn_cnt = insn_data_sz / BPF_INSN_SZ;
  773	/* insns_cnt can later be increased by appending used subprograms */
  774	prog->insns_cnt = prog->sec_insn_cnt;
  775
  776	prog->type = BPF_PROG_TYPE_UNSPEC;
  777	prog->fd = -1;
  778	prog->exception_cb_idx = -1;
  779
  780	/* libbpf's convention for SEC("?abc...") is that it's just like
  781	 * SEC("abc...") but the corresponding bpf_program starts out with
  782	 * autoload set to false.
  783	 */
  784	if (sec_name[0] == '?') {
  785		prog->autoload = false;
  786		/* from now on forget there was ? in section name */
  787		sec_name++;
  788	} else {
  789		prog->autoload = true;
  790	}
  791
  792	prog->autoattach = true;
  793
  794	/* inherit object's log_level */
  795	prog->log_level = obj->log_level;
  796
  797	prog->sec_name = strdup(sec_name);
  798	if (!prog->sec_name)
  799		goto errout;
  800
  801	prog->name = strdup(name);
  802	if (!prog->name)
  803		goto errout;
  804
  805	prog->insns = malloc(insn_data_sz);
  806	if (!prog->insns)
  807		goto errout;
  808	memcpy(prog->insns, insn_data, insn_data_sz);
  809
  810	return 0;
  811errout:
  812	pr_warn("sec '%s': failed to allocate memory for prog '%s'\n", sec_name, name);
  813	bpf_program__exit(prog);
  814	return -ENOMEM;
  815}
  816
  817static int
  818bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data,
  819			 const char *sec_name, int sec_idx)
  820{
  821	Elf_Data *symbols = obj->efile.symbols;
  822	struct bpf_program *prog, *progs;
  823	void *data = sec_data->d_buf;
  824	size_t sec_sz = sec_data->d_size, sec_off, prog_sz, nr_syms;
  825	int nr_progs, err, i;
  826	const char *name;
  827	Elf64_Sym *sym;
  828
  829	progs = obj->programs;
  830	nr_progs = obj->nr_programs;
  831	nr_syms = symbols->d_size / sizeof(Elf64_Sym);
  832
  833	for (i = 0; i < nr_syms; i++) {
  834		sym = elf_sym_by_idx(obj, i);
  835
  836		if (sym->st_shndx != sec_idx)
  837			continue;
  838		if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC)
  839			continue;
  840
  841		prog_sz = sym->st_size;
  842		sec_off = sym->st_value;
  843
  844		name = elf_sym_str(obj, sym->st_name);
  845		if (!name) {
  846			pr_warn("sec '%s': failed to get symbol name for offset %zu\n",
  847				sec_name, sec_off);
  848			return -LIBBPF_ERRNO__FORMAT;
  849		}
  850
  851		if (sec_off + prog_sz > sec_sz) {
  852			pr_warn("sec '%s': program at offset %zu crosses section boundary\n",
  853				sec_name, sec_off);
  854			return -LIBBPF_ERRNO__FORMAT;
  855		}
  856
  857		if (sec_idx != obj->efile.text_shndx && ELF64_ST_BIND(sym->st_info) == STB_LOCAL) {
  858			pr_warn("sec '%s': program '%s' is static and not supported\n", sec_name, name);
  859			return -ENOTSUP;
  860		}
  861
  862		pr_debug("sec '%s': found program '%s' at insn offset %zu (%zu bytes), code size %zu insns (%zu bytes)\n",
  863			 sec_name, name, sec_off / BPF_INSN_SZ, sec_off, prog_sz / BPF_INSN_SZ, prog_sz);
  864
  865		progs = libbpf_reallocarray(progs, nr_progs + 1, sizeof(*progs));
  866		if (!progs) {
  867			/*
  868			 * In this case the original obj->programs
  869			 * is still valid, so don't need special treat for
  870			 * bpf_close_object().
  871			 */
  872			pr_warn("sec '%s': failed to alloc memory for new program '%s'\n",
  873				sec_name, name);
  874			return -ENOMEM;
  875		}
  876		obj->programs = progs;
  877
  878		prog = &progs[nr_progs];
  879
  880		err = bpf_object__init_prog(obj, prog, name, sec_idx, sec_name,
  881					    sec_off, data + sec_off, prog_sz);
  882		if (err)
  883			return err;
  884
  885		if (ELF64_ST_BIND(sym->st_info) != STB_LOCAL)
  886			prog->sym_global = true;
  887
  888		/* if function is a global/weak symbol, but has restricted
  889		 * (STV_HIDDEN or STV_INTERNAL) visibility, mark its BTF FUNC
  890		 * as static to enable more permissive BPF verification mode
  891		 * with more outside context available to BPF verifier
  892		 */
  893		if (prog->sym_global && (ELF64_ST_VISIBILITY(sym->st_other) == STV_HIDDEN
  894		    || ELF64_ST_VISIBILITY(sym->st_other) == STV_INTERNAL))
  895			prog->mark_btf_static = true;
  896
  897		nr_progs++;
  898		obj->nr_programs = nr_progs;
  899	}
  900
  901	return 0;
  902}
  903
  904static const struct btf_member *
  905find_member_by_offset(const struct btf_type *t, __u32 bit_offset)
  906{
  907	struct btf_member *m;
  908	int i;
  909
  910	for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) {
  911		if (btf_member_bit_offset(t, i) == bit_offset)
  912			return m;
  913	}
  914
  915	return NULL;
  916}
  917
  918static const struct btf_member *
  919find_member_by_name(const struct btf *btf, const struct btf_type *t,
  920		    const char *name)
  921{
  922	struct btf_member *m;
  923	int i;
  924
  925	for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) {
  926		if (!strcmp(btf__name_by_offset(btf, m->name_off), name))
  927			return m;
  928	}
  929
  930	return NULL;
  931}
  932
  933#define STRUCT_OPS_VALUE_PREFIX "bpf_struct_ops_"
  934static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
  935				   const char *name, __u32 kind);
  936
  937static int
  938find_struct_ops_kern_types(const struct btf *btf, const char *tname,
  939			   const struct btf_type **type, __u32 *type_id,
  940			   const struct btf_type **vtype, __u32 *vtype_id,
  941			   const struct btf_member **data_member)
  942{
  943	const struct btf_type *kern_type, *kern_vtype;
  944	const struct btf_member *kern_data_member;
  945	__s32 kern_vtype_id, kern_type_id;
  946	__u32 i;
  947
  948	kern_type_id = btf__find_by_name_kind(btf, tname, BTF_KIND_STRUCT);
  949	if (kern_type_id < 0) {
  950		pr_warn("struct_ops init_kern: struct %s is not found in kernel BTF\n",
  951			tname);
  952		return kern_type_id;
  953	}
  954	kern_type = btf__type_by_id(btf, kern_type_id);
  955
  956	/* Find the corresponding "map_value" type that will be used
  957	 * in map_update(BPF_MAP_TYPE_STRUCT_OPS).  For example,
  958	 * find "struct bpf_struct_ops_tcp_congestion_ops" from the
  959	 * btf_vmlinux.
  960	 */
  961	kern_vtype_id = find_btf_by_prefix_kind(btf, STRUCT_OPS_VALUE_PREFIX,
  962						tname, BTF_KIND_STRUCT);
  963	if (kern_vtype_id < 0) {
  964		pr_warn("struct_ops init_kern: struct %s%s is not found in kernel BTF\n",
  965			STRUCT_OPS_VALUE_PREFIX, tname);
  966		return kern_vtype_id;
  967	}
  968	kern_vtype = btf__type_by_id(btf, kern_vtype_id);
  969
  970	/* Find "struct tcp_congestion_ops" from
  971	 * struct bpf_struct_ops_tcp_congestion_ops {
  972	 *	[ ... ]
  973	 *	struct tcp_congestion_ops data;
  974	 * }
  975	 */
  976	kern_data_member = btf_members(kern_vtype);
  977	for (i = 0; i < btf_vlen(kern_vtype); i++, kern_data_member++) {
  978		if (kern_data_member->type == kern_type_id)
  979			break;
  980	}
  981	if (i == btf_vlen(kern_vtype)) {
  982		pr_warn("struct_ops init_kern: struct %s data is not found in struct %s%s\n",
  983			tname, STRUCT_OPS_VALUE_PREFIX, tname);
  984		return -EINVAL;
  985	}
  986
  987	*type = kern_type;
  988	*type_id = kern_type_id;
  989	*vtype = kern_vtype;
  990	*vtype_id = kern_vtype_id;
  991	*data_member = kern_data_member;
  992
  993	return 0;
  994}
  995
  996static bool bpf_map__is_struct_ops(const struct bpf_map *map)
  997{
  998	return map->def.type == BPF_MAP_TYPE_STRUCT_OPS;
  999}
 1000
 1001/* Init the map's fields that depend on kern_btf */
 1002static int bpf_map__init_kern_struct_ops(struct bpf_map *map,
 1003					 const struct btf *btf,
 1004					 const struct btf *kern_btf)
 1005{
 1006	const struct btf_member *member, *kern_member, *kern_data_member;
 1007	const struct btf_type *type, *kern_type, *kern_vtype;
 1008	__u32 i, kern_type_id, kern_vtype_id, kern_data_off;
 1009	struct bpf_struct_ops *st_ops;
 1010	void *data, *kern_data;
 1011	const char *tname;
 1012	int err;
 1013
 1014	st_ops = map->st_ops;
 1015	type = st_ops->type;
 1016	tname = st_ops->tname;
 1017	err = find_struct_ops_kern_types(kern_btf, tname,
 1018					 &kern_type, &kern_type_id,
 1019					 &kern_vtype, &kern_vtype_id,
 1020					 &kern_data_member);
 1021	if (err)
 1022		return err;
 1023
 1024	pr_debug("struct_ops init_kern %s: type_id:%u kern_type_id:%u kern_vtype_id:%u\n",
 1025		 map->name, st_ops->type_id, kern_type_id, kern_vtype_id);
 1026
 1027	map->def.value_size = kern_vtype->size;
 1028	map->btf_vmlinux_value_type_id = kern_vtype_id;
 1029
 1030	st_ops->kern_vdata = calloc(1, kern_vtype->size);
 1031	if (!st_ops->kern_vdata)
 1032		return -ENOMEM;
 1033
 1034	data = st_ops->data;
 1035	kern_data_off = kern_data_member->offset / 8;
 1036	kern_data = st_ops->kern_vdata + kern_data_off;
 1037
 1038	member = btf_members(type);
 1039	for (i = 0; i < btf_vlen(type); i++, member++) {
 1040		const struct btf_type *mtype, *kern_mtype;
 1041		__u32 mtype_id, kern_mtype_id;
 1042		void *mdata, *kern_mdata;
 1043		__s64 msize, kern_msize;
 1044		__u32 moff, kern_moff;
 1045		__u32 kern_member_idx;
 1046		const char *mname;
 1047
 1048		mname = btf__name_by_offset(btf, member->name_off);
 1049		kern_member = find_member_by_name(kern_btf, kern_type, mname);
 1050		if (!kern_member) {
 1051			pr_warn("struct_ops init_kern %s: Cannot find member %s in kernel BTF\n",
 1052				map->name, mname);
 1053			return -ENOTSUP;
 1054		}
 1055
 1056		kern_member_idx = kern_member - btf_members(kern_type);
 1057		if (btf_member_bitfield_size(type, i) ||
 1058		    btf_member_bitfield_size(kern_type, kern_member_idx)) {
 1059			pr_warn("struct_ops init_kern %s: bitfield %s is not supported\n",
 1060				map->name, mname);
 1061			return -ENOTSUP;
 1062		}
 1063
 1064		moff = member->offset / 8;
 1065		kern_moff = kern_member->offset / 8;
 1066
 1067		mdata = data + moff;
 1068		kern_mdata = kern_data + kern_moff;
 1069
 1070		mtype = skip_mods_and_typedefs(btf, member->type, &mtype_id);
 1071		kern_mtype = skip_mods_and_typedefs(kern_btf, kern_member->type,
 1072						    &kern_mtype_id);
 1073		if (BTF_INFO_KIND(mtype->info) !=
 1074		    BTF_INFO_KIND(kern_mtype->info)) {
 1075			pr_warn("struct_ops init_kern %s: Unmatched member type %s %u != %u(kernel)\n",
 1076				map->name, mname, BTF_INFO_KIND(mtype->info),
 1077				BTF_INFO_KIND(kern_mtype->info));
 1078			return -ENOTSUP;
 1079		}
 1080
 1081		if (btf_is_ptr(mtype)) {
 1082			struct bpf_program *prog;
 1083
 1084			prog = st_ops->progs[i];
 1085			if (!prog)
 1086				continue;
 1087
 1088			kern_mtype = skip_mods_and_typedefs(kern_btf,
 1089							    kern_mtype->type,
 1090							    &kern_mtype_id);
 1091
 1092			/* mtype->type must be a func_proto which was
 1093			 * guaranteed in bpf_object__collect_st_ops_relos(),
 1094			 * so only check kern_mtype for func_proto here.
 1095			 */
 1096			if (!btf_is_func_proto(kern_mtype)) {
 1097				pr_warn("struct_ops init_kern %s: kernel member %s is not a func ptr\n",
 1098					map->name, mname);
 1099				return -ENOTSUP;
 1100			}
 1101
 1102			prog->attach_btf_id = kern_type_id;
 1103			prog->expected_attach_type = kern_member_idx;
 1104
 1105			st_ops->kern_func_off[i] = kern_data_off + kern_moff;
 1106
 1107			pr_debug("struct_ops init_kern %s: func ptr %s is set to prog %s from data(+%u) to kern_data(+%u)\n",
 1108				 map->name, mname, prog->name, moff,
 1109				 kern_moff);
 1110
 1111			continue;
 1112		}
 1113
 1114		msize = btf__resolve_size(btf, mtype_id);
 1115		kern_msize = btf__resolve_size(kern_btf, kern_mtype_id);
 1116		if (msize < 0 || kern_msize < 0 || msize != kern_msize) {
 1117			pr_warn("struct_ops init_kern %s: Error in size of member %s: %zd != %zd(kernel)\n",
 1118				map->name, mname, (ssize_t)msize,
 1119				(ssize_t)kern_msize);
 1120			return -ENOTSUP;
 1121		}
 1122
 1123		pr_debug("struct_ops init_kern %s: copy %s %u bytes from data(+%u) to kern_data(+%u)\n",
 1124			 map->name, mname, (unsigned int)msize,
 1125			 moff, kern_moff);
 1126		memcpy(kern_mdata, mdata, msize);
 1127	}
 1128
 1129	return 0;
 1130}
 1131
 1132static int bpf_object__init_kern_struct_ops_maps(struct bpf_object *obj)
 1133{
 1134	struct bpf_map *map;
 1135	size_t i;
 1136	int err;
 1137
 1138	for (i = 0; i < obj->nr_maps; i++) {
 1139		map = &obj->maps[i];
 1140
 1141		if (!bpf_map__is_struct_ops(map))
 1142			continue;
 1143
 1144		err = bpf_map__init_kern_struct_ops(map, obj->btf,
 1145						    obj->btf_vmlinux);
 1146		if (err)
 1147			return err;
 1148	}
 1149
 1150	return 0;
 1151}
 1152
 1153static int init_struct_ops_maps(struct bpf_object *obj, const char *sec_name,
 1154				int shndx, Elf_Data *data, __u32 map_flags)
 1155{
 1156	const struct btf_type *type, *datasec;
 1157	const struct btf_var_secinfo *vsi;
 1158	struct bpf_struct_ops *st_ops;
 1159	const char *tname, *var_name;
 1160	__s32 type_id, datasec_id;
 1161	const struct btf *btf;
 1162	struct bpf_map *map;
 1163	__u32 i;
 1164
 1165	if (shndx == -1)
 1166		return 0;
 1167
 1168	btf = obj->btf;
 1169	datasec_id = btf__find_by_name_kind(btf, sec_name,
 1170					    BTF_KIND_DATASEC);
 1171	if (datasec_id < 0) {
 1172		pr_warn("struct_ops init: DATASEC %s not found\n",
 1173			sec_name);
 1174		return -EINVAL;
 1175	}
 1176
 1177	datasec = btf__type_by_id(btf, datasec_id);
 1178	vsi = btf_var_secinfos(datasec);
 1179	for (i = 0; i < btf_vlen(datasec); i++, vsi++) {
 1180		type = btf__type_by_id(obj->btf, vsi->type);
 1181		var_name = btf__name_by_offset(obj->btf, type->name_off);
 1182
 1183		type_id = btf__resolve_type(obj->btf, vsi->type);
 1184		if (type_id < 0) {
 1185			pr_warn("struct_ops init: Cannot resolve var type_id %u in DATASEC %s\n",
 1186				vsi->type, sec_name);
 1187			return -EINVAL;
 1188		}
 1189
 1190		type = btf__type_by_id(obj->btf, type_id);
 1191		tname = btf__name_by_offset(obj->btf, type->name_off);
 1192		if (!tname[0]) {
 1193			pr_warn("struct_ops init: anonymous type is not supported\n");
 1194			return -ENOTSUP;
 1195		}
 1196		if (!btf_is_struct(type)) {
 1197			pr_warn("struct_ops init: %s is not a struct\n", tname);
 1198			return -EINVAL;
 1199		}
 1200
 1201		map = bpf_object__add_map(obj);
 1202		if (IS_ERR(map))
 1203			return PTR_ERR(map);
 1204
 1205		map->sec_idx = shndx;
 1206		map->sec_offset = vsi->offset;
 1207		map->name = strdup(var_name);
 1208		if (!map->name)
 1209			return -ENOMEM;
 1210
 1211		map->def.type = BPF_MAP_TYPE_STRUCT_OPS;
 1212		map->def.key_size = sizeof(int);
 1213		map->def.value_size = type->size;
 1214		map->def.max_entries = 1;
 1215		map->def.map_flags = map_flags;
 1216
 1217		map->st_ops = calloc(1, sizeof(*map->st_ops));
 1218		if (!map->st_ops)
 1219			return -ENOMEM;
 1220		st_ops = map->st_ops;
 1221		st_ops->data = malloc(type->size);
 1222		st_ops->progs = calloc(btf_vlen(type), sizeof(*st_ops->progs));
 1223		st_ops->kern_func_off = malloc(btf_vlen(type) *
 1224					       sizeof(*st_ops->kern_func_off));
 1225		if (!st_ops->data || !st_ops->progs || !st_ops->kern_func_off)
 1226			return -ENOMEM;
 1227
 1228		if (vsi->offset + type->size > data->d_size) {
 1229			pr_warn("struct_ops init: var %s is beyond the end of DATASEC %s\n",
 1230				var_name, sec_name);
 1231			return -EINVAL;
 1232		}
 1233
 1234		memcpy(st_ops->data,
 1235		       data->d_buf + vsi->offset,
 1236		       type->size);
 1237		st_ops->tname = tname;
 1238		st_ops->type = type;
 1239		st_ops->type_id = type_id;
 1240
 1241		pr_debug("struct_ops init: struct %s(type_id=%u) %s found at offset %u\n",
 1242			 tname, type_id, var_name, vsi->offset);
 1243	}
 1244
 1245	return 0;
 1246}
 1247
 1248static int bpf_object_init_struct_ops(struct bpf_object *obj)
 1249{
 1250	int err;
 1251
 1252	err = init_struct_ops_maps(obj, STRUCT_OPS_SEC, obj->efile.st_ops_shndx,
 1253				   obj->efile.st_ops_data, 0);
 1254	err = err ?: init_struct_ops_maps(obj, STRUCT_OPS_LINK_SEC,
 1255					  obj->efile.st_ops_link_shndx,
 1256					  obj->efile.st_ops_link_data,
 1257					  BPF_F_LINK);
 1258	return err;
 1259}
 1260
 1261static struct bpf_object *bpf_object__new(const char *path,
 1262					  const void *obj_buf,
 1263					  size_t obj_buf_sz,
 1264					  const char *obj_name)
 1265{
 1266	struct bpf_object *obj;
 1267	char *end;
 1268
 1269	obj = calloc(1, sizeof(struct bpf_object) + strlen(path) + 1);
 1270	if (!obj) {
 1271		pr_warn("alloc memory failed for %s\n", path);
 1272		return ERR_PTR(-ENOMEM);
 1273	}
 1274
 1275	strcpy(obj->path, path);
 1276	if (obj_name) {
 1277		libbpf_strlcpy(obj->name, obj_name, sizeof(obj->name));
 1278	} else {
 1279		/* Using basename() GNU version which doesn't modify arg. */
 1280		libbpf_strlcpy(obj->name, basename((void *)path), sizeof(obj->name));
 1281		end = strchr(obj->name, '.');
 1282		if (end)
 1283			*end = 0;
 1284	}
 1285
 1286	obj->efile.fd = -1;
 1287	/*
 1288	 * Caller of this function should also call
 1289	 * bpf_object__elf_finish() after data collection to return
 1290	 * obj_buf to user. If not, we should duplicate the buffer to
 1291	 * avoid user freeing them before elf finish.
 1292	 */
 1293	obj->efile.obj_buf = obj_buf;
 1294	obj->efile.obj_buf_sz = obj_buf_sz;
 1295	obj->efile.btf_maps_shndx = -1;
 1296	obj->efile.st_ops_shndx = -1;
 1297	obj->efile.st_ops_link_shndx = -1;
 1298	obj->kconfig_map_idx = -1;
 1299
 1300	obj->kern_version = get_kernel_version();
 1301	obj->loaded = false;
 1302
 1303	return obj;
 1304}
 1305
 1306static void bpf_object__elf_finish(struct bpf_object *obj)
 1307{
 1308	if (!obj->efile.elf)
 1309		return;
 1310
 1311	elf_end(obj->efile.elf);
 1312	obj->efile.elf = NULL;
 1313	obj->efile.symbols = NULL;
 1314	obj->efile.st_ops_data = NULL;
 1315	obj->efile.st_ops_link_data = NULL;
 1316
 1317	zfree(&obj->efile.secs);
 1318	obj->efile.sec_cnt = 0;
 1319	zclose(obj->efile.fd);
 1320	obj->efile.obj_buf = NULL;
 1321	obj->efile.obj_buf_sz = 0;
 1322}
 1323
 1324static int bpf_object__elf_init(struct bpf_object *obj)
 1325{
 1326	Elf64_Ehdr *ehdr;
 1327	int err = 0;
 1328	Elf *elf;
 1329
 1330	if (obj->efile.elf) {
 1331		pr_warn("elf: init internal error\n");
 1332		return -LIBBPF_ERRNO__LIBELF;
 1333	}
 1334
 1335	if (obj->efile.obj_buf_sz > 0) {
 1336		/* obj_buf should have been validated by bpf_object__open_mem(). */
 1337		elf = elf_memory((char *)obj->efile.obj_buf, obj->efile.obj_buf_sz);
 1338	} else {
 1339		obj->efile.fd = open(obj->path, O_RDONLY | O_CLOEXEC);
 1340		if (obj->efile.fd < 0) {
 1341			char errmsg[STRERR_BUFSIZE], *cp;
 1342
 1343			err = -errno;
 1344			cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
 1345			pr_warn("elf: failed to open %s: %s\n", obj->path, cp);
 1346			return err;
 1347		}
 1348
 1349		elf = elf_begin(obj->efile.fd, ELF_C_READ_MMAP, NULL);
 1350	}
 1351
 1352	if (!elf) {
 1353		pr_warn("elf: failed to open %s as ELF file: %s\n", obj->path, elf_errmsg(-1));
 1354		err = -LIBBPF_ERRNO__LIBELF;
 1355		goto errout;
 1356	}
 1357
 1358	obj->efile.elf = elf;
 1359
 1360	if (elf_kind(elf) != ELF_K_ELF) {
 1361		err = -LIBBPF_ERRNO__FORMAT;
 1362		pr_warn("elf: '%s' is not a proper ELF object\n", obj->path);
 1363		goto errout;
 1364	}
 1365
 1366	if (gelf_getclass(elf) != ELFCLASS64) {
 1367		err = -LIBBPF_ERRNO__FORMAT;
 1368		pr_warn("elf: '%s' is not a 64-bit ELF object\n", obj->path);
 1369		goto errout;
 1370	}
 1371
 1372	obj->efile.ehdr = ehdr = elf64_getehdr(elf);
 1373	if (!obj->efile.ehdr) {
 1374		pr_warn("elf: failed to get ELF header from %s: %s\n", obj->path, elf_errmsg(-1));
 1375		err = -LIBBPF_ERRNO__FORMAT;
 1376		goto errout;
 1377	}
 1378
 1379	if (elf_getshdrstrndx(elf, &obj->efile.shstrndx)) {
 1380		pr_warn("elf: failed to get section names section index for %s: %s\n",
 1381			obj->path, elf_errmsg(-1));
 1382		err = -LIBBPF_ERRNO__FORMAT;
 1383		goto errout;
 1384	}
 1385
 1386	/* ELF is corrupted/truncated, avoid calling elf_strptr. */
 1387	if (!elf_rawdata(elf_getscn(elf, obj->efile.shstrndx), NULL)) {
 1388		pr_warn("elf: failed to get section names strings from %s: %s\n",
 1389			obj->path, elf_errmsg(-1));
 1390		err = -LIBBPF_ERRNO__FORMAT;
 1391		goto errout;
 1392	}
 1393
 1394	/* Old LLVM set e_machine to EM_NONE */
 1395	if (ehdr->e_type != ET_REL || (ehdr->e_machine && ehdr->e_machine != EM_BPF)) {
 1396		pr_warn("elf: %s is not a valid eBPF object file\n", obj->path);
 1397		err = -LIBBPF_ERRNO__FORMAT;
 1398		goto errout;
 1399	}
 1400
 1401	return 0;
 1402errout:
 1403	bpf_object__elf_finish(obj);
 1404	return err;
 1405}
 1406
 1407static int bpf_object__check_endianness(struct bpf_object *obj)
 1408{
 1409#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
 1410	if (obj->efile.ehdr->e_ident[EI_DATA] == ELFDATA2LSB)
 1411		return 0;
 1412#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
 1413	if (obj->efile.ehdr->e_ident[EI_DATA] == ELFDATA2MSB)
 1414		return 0;
 1415#else
 1416# error "Unrecognized __BYTE_ORDER__"
 1417#endif
 1418	pr_warn("elf: endianness mismatch in %s.\n", obj->path);
 1419	return -LIBBPF_ERRNO__ENDIAN;
 1420}
 1421
 1422static int
 1423bpf_object__init_license(struct bpf_object *obj, void *data, size_t size)
 1424{
 1425	if (!data) {
 1426		pr_warn("invalid license section in %s\n", obj->path);
 1427		return -LIBBPF_ERRNO__FORMAT;
 1428	}
 1429	/* libbpf_strlcpy() only copies first N - 1 bytes, so size + 1 won't
 1430	 * go over allowed ELF data section buffer
 1431	 */
 1432	libbpf_strlcpy(obj->license, data, min(size + 1, sizeof(obj->license)));
 1433	pr_debug("license of %s is %s\n", obj->path, obj->license);
 1434	return 0;
 1435}
 1436
 1437static int
 1438bpf_object__init_kversion(struct bpf_object *obj, void *data, size_t size)
 1439{
 1440	__u32 kver;
 1441
 1442	if (!data || size != sizeof(kver)) {
 1443		pr_warn("invalid kver section in %s\n", obj->path);
 1444		return -LIBBPF_ERRNO__FORMAT;
 1445	}
 1446	memcpy(&kver, data, sizeof(kver));
 1447	obj->kern_version = kver;
 1448	pr_debug("kernel version of %s is %x\n", obj->path, obj->kern_version);
 1449	return 0;
 1450}
 1451
 1452static bool bpf_map_type__is_map_in_map(enum bpf_map_type type)
 1453{
 1454	if (type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
 1455	    type == BPF_MAP_TYPE_HASH_OF_MAPS)
 1456		return true;
 1457	return false;
 1458}
 1459
 1460static int find_elf_sec_sz(const struct bpf_object *obj, const char *name, __u32 *size)
 1461{
 1462	Elf_Data *data;
 1463	Elf_Scn *scn;
 1464
 1465	if (!name)
 1466		return -EINVAL;
 1467
 1468	scn = elf_sec_by_name(obj, name);
 1469	data = elf_sec_data(obj, scn);
 1470	if (data) {
 1471		*size = data->d_size;
 1472		return 0; /* found it */
 1473	}
 1474
 1475	return -ENOENT;
 1476}
 1477
 1478static Elf64_Sym *find_elf_var_sym(const struct bpf_object *obj, const char *name)
 1479{
 1480	Elf_Data *symbols = obj->efile.symbols;
 1481	const char *sname;
 1482	size_t si;
 1483
 1484	for (si = 0; si < symbols->d_size / sizeof(Elf64_Sym); si++) {
 1485		Elf64_Sym *sym = elf_sym_by_idx(obj, si);
 1486
 1487		if (ELF64_ST_TYPE(sym->st_info) != STT_OBJECT)
 1488			continue;
 1489
 1490		if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL &&
 1491		    ELF64_ST_BIND(sym->st_info) != STB_WEAK)
 1492			continue;
 1493
 1494		sname = elf_sym_str(obj, sym->st_name);
 1495		if (!sname) {
 1496			pr_warn("failed to get sym name string for var %s\n", name);
 1497			return ERR_PTR(-EIO);
 1498		}
 1499		if (strcmp(name, sname) == 0)
 1500			return sym;
 1501	}
 1502
 1503	return ERR_PTR(-ENOENT);
 1504}
 1505
 1506static int create_placeholder_fd(void)
 1507{
 1508	int fd;
 1509
 1510	fd = ensure_good_fd(memfd_create("libbpf-placeholder-fd", MFD_CLOEXEC));
 1511	if (fd < 0)
 1512		return -errno;
 1513	return fd;
 1514}
 1515
 1516static struct bpf_map *bpf_object__add_map(struct bpf_object *obj)
 1517{
 1518	struct bpf_map *map;
 1519	int err;
 1520
 1521	err = libbpf_ensure_mem((void **)&obj->maps, &obj->maps_cap,
 1522				sizeof(*obj->maps), obj->nr_maps + 1);
 1523	if (err)
 1524		return ERR_PTR(err);
 1525
 1526	map = &obj->maps[obj->nr_maps++];
 1527	map->obj = obj;
 1528	/* Preallocate map FD without actually creating BPF map just yet.
 1529	 * These map FD "placeholders" will be reused later without changing
 1530	 * FD value when map is actually created in the kernel.
 1531	 *
 1532	 * This is useful to be able to perform BPF program relocations
 1533	 * without having to create BPF maps before that step. This allows us
 1534	 * to finalize and load BTF very late in BPF object's loading phase,
 1535	 * right before BPF maps have to be created and BPF programs have to
 1536	 * be loaded. By having these map FD placeholders we can perform all
 1537	 * the sanitizations, relocations, and any other adjustments before we
 1538	 * start creating actual BPF kernel objects (BTF, maps, progs).
 1539	 */
 1540	map->fd = create_placeholder_fd();
 1541	if (map->fd < 0)
 1542		return ERR_PTR(map->fd);
 1543	map->inner_map_fd = -1;
 1544	map->autocreate = true;
 1545
 1546	return map;
 1547}
 1548
 1549static size_t bpf_map_mmap_sz(unsigned int value_sz, unsigned int max_entries)
 1550{
 1551	const long page_sz = sysconf(_SC_PAGE_SIZE);
 1552	size_t map_sz;
 1553
 1554	map_sz = (size_t)roundup(value_sz, 8) * max_entries;
 1555	map_sz = roundup(map_sz, page_sz);
 1556	return map_sz;
 1557}
 1558
 1559static int bpf_map_mmap_resize(struct bpf_map *map, size_t old_sz, size_t new_sz)
 1560{
 1561	void *mmaped;
 1562
 1563	if (!map->mmaped)
 1564		return -EINVAL;
 1565
 1566	if (old_sz == new_sz)
 1567		return 0;
 1568
 1569	mmaped = mmap(NULL, new_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
 1570	if (mmaped == MAP_FAILED)
 1571		return -errno;
 1572
 1573	memcpy(mmaped, map->mmaped, min(old_sz, new_sz));
 1574	munmap(map->mmaped, old_sz);
 1575	map->mmaped = mmaped;
 1576	return 0;
 1577}
 1578
 1579static char *internal_map_name(struct bpf_object *obj, const char *real_name)
 1580{
 1581	char map_name[BPF_OBJ_NAME_LEN], *p;
 1582	int pfx_len, sfx_len = max((size_t)7, strlen(real_name));
 1583
 1584	/* This is one of the more confusing parts of libbpf for various
 1585	 * reasons, some of which are historical. The original idea for naming
 1586	 * internal names was to include as much of BPF object name prefix as
 1587	 * possible, so that it can be distinguished from similar internal
 1588	 * maps of a different BPF object.
 1589	 * As an example, let's say we have bpf_object named 'my_object_name'
 1590	 * and internal map corresponding to '.rodata' ELF section. The final
 1591	 * map name advertised to user and to the kernel will be
 1592	 * 'my_objec.rodata', taking first 8 characters of object name and
 1593	 * entire 7 characters of '.rodata'.
 1594	 * Somewhat confusingly, if internal map ELF section name is shorter
 1595	 * than 7 characters, e.g., '.bss', we still reserve 7 characters
 1596	 * for the suffix, even though we only have 4 actual characters, and
 1597	 * resulting map will be called 'my_objec.bss', not even using all 15
 1598	 * characters allowed by the kernel. Oh well, at least the truncated
 1599	 * object name is somewhat consistent in this case. But if the map
 1600	 * name is '.kconfig', we'll still have entirety of '.kconfig' added
 1601	 * (8 chars) and thus will be left with only first 7 characters of the
 1602	 * object name ('my_obje'). Happy guessing, user, that the final map
 1603	 * name will be "my_obje.kconfig".
 1604	 * Now, with libbpf starting to support arbitrarily named .rodata.*
 1605	 * and .data.* data sections, it's possible that ELF section name is
 1606	 * longer than allowed 15 chars, so we now need to be careful to take
 1607	 * only up to 15 first characters of ELF name, taking no BPF object
 1608	 * name characters at all. So '.rodata.abracadabra' will result in
 1609	 * '.rodata.abracad' kernel and user-visible name.
 1610	 * We need to keep this convoluted logic intact for .data, .bss and
 1611	 * .rodata maps, but for new custom .data.custom and .rodata.custom
 1612	 * maps we use their ELF names as is, not prepending bpf_object name
 1613	 * in front. We still need to truncate them to 15 characters for the
 1614	 * kernel. Full name can be recovered for such maps by using DATASEC
 1615	 * BTF type associated with such map's value type, though.
 1616	 */
 1617	if (sfx_len >= BPF_OBJ_NAME_LEN)
 1618		sfx_len = BPF_OBJ_NAME_LEN - 1;
 1619
 1620	/* if there are two or more dots in map name, it's a custom dot map */
 1621	if (strchr(real_name + 1, '.') != NULL)
 1622		pfx_len = 0;
 1623	else
 1624		pfx_len = min((size_t)BPF_OBJ_NAME_LEN - sfx_len - 1, strlen(obj->name));
 1625
 1626	snprintf(map_name, sizeof(map_name), "%.*s%.*s", pfx_len, obj->name,
 1627		 sfx_len, real_name);
 1628
 1629	/* sanitise map name to characters allowed by kernel */
 1630	for (p = map_name; *p && p < map_name + sizeof(map_name); p++)
 1631		if (!isalnum(*p) && *p != '_' && *p != '.')
 1632			*p = '_';
 1633
 1634	return strdup(map_name);
 1635}
 1636
 1637static int
 1638map_fill_btf_type_info(struct bpf_object *obj, struct bpf_map *map);
 1639
 1640/* Internal BPF map is mmap()'able only if at least one of corresponding
 1641 * DATASEC's VARs are to be exposed through BPF skeleton. I.e., it's a GLOBAL
 1642 * variable and it's not marked as __hidden (which turns it into, effectively,
 1643 * a STATIC variable).
 1644 */
 1645static bool map_is_mmapable(struct bpf_object *obj, struct bpf_map *map)
 1646{
 1647	const struct btf_type *t, *vt;
 1648	struct btf_var_secinfo *vsi;
 1649	int i, n;
 1650
 1651	if (!map->btf_value_type_id)
 1652		return false;
 1653
 1654	t = btf__type_by_id(obj->btf, map->btf_value_type_id);
 1655	if (!btf_is_datasec(t))
 1656		return false;
 1657
 1658	vsi = btf_var_secinfos(t);
 1659	for (i = 0, n = btf_vlen(t); i < n; i++, vsi++) {
 1660		vt = btf__type_by_id(obj->btf, vsi->type);
 1661		if (!btf_is_var(vt))
 1662			continue;
 1663
 1664		if (btf_var(vt)->linkage != BTF_VAR_STATIC)
 1665			return true;
 1666	}
 1667
 1668	return false;
 1669}
 1670
 1671static int
 1672bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
 1673			      const char *real_name, int sec_idx, void *data, size_t data_sz)
 1674{
 1675	struct bpf_map_def *def;
 1676	struct bpf_map *map;
 1677	size_t mmap_sz;
 1678	int err;
 1679
 1680	map = bpf_object__add_map(obj);
 1681	if (IS_ERR(map))
 1682		return PTR_ERR(map);
 1683
 1684	map->libbpf_type = type;
 1685	map->sec_idx = sec_idx;
 1686	map->sec_offset = 0;
 1687	map->real_name = strdup(real_name);
 1688	map->name = internal_map_name(obj, real_name);
 1689	if (!map->real_name || !map->name) {
 1690		zfree(&map->real_name);
 1691		zfree(&map->name);
 1692		return -ENOMEM;
 1693	}
 1694
 1695	def = &map->def;
 1696	def->type = BPF_MAP_TYPE_ARRAY;
 1697	def->key_size = sizeof(int);
 1698	def->value_size = data_sz;
 1699	def->max_entries = 1;
 1700	def->map_flags = type == LIBBPF_MAP_RODATA || type == LIBBPF_MAP_KCONFIG
 1701			 ? BPF_F_RDONLY_PROG : 0;
 1702
 1703	/* failures are fine because of maps like .rodata.str1.1 */
 1704	(void) map_fill_btf_type_info(obj, map);
 1705
 1706	if (map_is_mmapable(obj, map))
 1707		def->map_flags |= BPF_F_MMAPABLE;
 1708
 1709	pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n",
 1710		 map->name, map->sec_idx, map->sec_offset, def->map_flags);
 1711
 1712	mmap_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries);
 1713	map->mmaped = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE,
 1714			   MAP_SHARED | MAP_ANONYMOUS, -1, 0);
 1715	if (map->mmaped == MAP_FAILED) {
 1716		err = -errno;
 1717		map->mmaped = NULL;
 1718		pr_warn("failed to alloc map '%s' content buffer: %d\n",
 1719			map->name, err);
 1720		zfree(&map->real_name);
 1721		zfree(&map->name);
 1722		return err;
 1723	}
 1724
 1725	if (data)
 1726		memcpy(map->mmaped, data, data_sz);
 1727
 1728	pr_debug("map %td is \"%s\"\n", map - obj->maps, map->name);
 1729	return 0;
 1730}
 1731
 1732static int bpf_object__init_global_data_maps(struct bpf_object *obj)
 1733{
 1734	struct elf_sec_desc *sec_desc;
 1735	const char *sec_name;
 1736	int err = 0, sec_idx;
 1737
 1738	/*
 1739	 * Populate obj->maps with libbpf internal maps.
 1740	 */
 1741	for (sec_idx = 1; sec_idx < obj->efile.sec_cnt; sec_idx++) {
 1742		sec_desc = &obj->efile.secs[sec_idx];
 1743
 1744		/* Skip recognized sections with size 0. */
 1745		if (!sec_desc->data || sec_desc->data->d_size == 0)
 1746			continue;
 1747
 1748		switch (sec_desc->sec_type) {
 1749		case SEC_DATA:
 1750			sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
 1751			err = bpf_object__init_internal_map(obj, LIBBPF_MAP_DATA,
 1752							    sec_name, sec_idx,
 1753							    sec_desc->data->d_buf,
 1754							    sec_desc->data->d_size);
 1755			break;
 1756		case SEC_RODATA:
 1757			obj->has_rodata = true;
 1758			sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
 1759			err = bpf_object__init_internal_map(obj, LIBBPF_MAP_RODATA,
 1760							    sec_name, sec_idx,
 1761							    sec_desc->data->d_buf,
 1762							    sec_desc->data->d_size);
 1763			break;
 1764		case SEC_BSS:
 1765			sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
 1766			err = bpf_object__init_internal_map(obj, LIBBPF_MAP_BSS,
 1767							    sec_name, sec_idx,
 1768							    NULL,
 1769							    sec_desc->data->d_size);
 1770			break;
 1771		default:
 1772			/* skip */
 1773			break;
 1774		}
 1775		if (err)
 1776			return err;
 1777	}
 1778	return 0;
 1779}
 1780
 1781
 1782static struct extern_desc *find_extern_by_name(const struct bpf_object *obj,
 1783					       const void *name)
 1784{
 1785	int i;
 1786
 1787	for (i = 0; i < obj->nr_extern; i++) {
 1788		if (strcmp(obj->externs[i].name, name) == 0)
 1789			return &obj->externs[i];
 1790	}
 1791	return NULL;
 1792}
 1793
 1794static int set_kcfg_value_tri(struct extern_desc *ext, void *ext_val,
 1795			      char value)
 1796{
 1797	switch (ext->kcfg.type) {
 1798	case KCFG_BOOL:
 1799		if (value == 'm') {
 1800			pr_warn("extern (kcfg) '%s': value '%c' implies tristate or char type\n",
 1801				ext->name, value);
 1802			return -EINVAL;
 1803		}
 1804		*(bool *)ext_val = value == 'y' ? true : false;
 1805		break;
 1806	case KCFG_TRISTATE:
 1807		if (value == 'y')
 1808			*(enum libbpf_tristate *)ext_val = TRI_YES;
 1809		else if (value == 'm')
 1810			*(enum libbpf_tristate *)ext_val = TRI_MODULE;
 1811		else /* value == 'n' */
 1812			*(enum libbpf_tristate *)ext_val = TRI_NO;
 1813		break;
 1814	case KCFG_CHAR:
 1815		*(char *)ext_val = value;
 1816		break;
 1817	case KCFG_UNKNOWN:
 1818	case KCFG_INT:
 1819	case KCFG_CHAR_ARR:
 1820	default:
 1821		pr_warn("extern (kcfg) '%s': value '%c' implies bool, tristate, or char type\n",
 1822			ext->name, value);
 1823		return -EINVAL;
 1824	}
 1825	ext->is_set = true;
 1826	return 0;
 1827}
 1828
 1829static int set_kcfg_value_str(struct extern_desc *ext, char *ext_val,
 1830			      const char *value)
 1831{
 1832	size_t len;
 1833
 1834	if (ext->kcfg.type != KCFG_CHAR_ARR) {
 1835		pr_warn("extern (kcfg) '%s': value '%s' implies char array type\n",
 1836			ext->name, value);
 1837		return -EINVAL;
 1838	}
 1839
 1840	len = strlen(value);
 1841	if (value[len - 1] != '"') {
 1842		pr_warn("extern (kcfg) '%s': invalid string config '%s'\n",
 1843			ext->name, value);
 1844		return -EINVAL;
 1845	}
 1846
 1847	/* strip quotes */
 1848	len -= 2;
 1849	if (len >= ext->kcfg.sz) {
 1850		pr_warn("extern (kcfg) '%s': long string '%s' of (%zu bytes) truncated to %d bytes\n",
 1851			ext->name, value, len, ext->kcfg.sz - 1);
 1852		len = ext->kcfg.sz - 1;
 1853	}
 1854	memcpy(ext_val, value + 1, len);
 1855	ext_val[len] = '\0';
 1856	ext->is_set = true;
 1857	return 0;
 1858}
 1859
 1860static int parse_u64(const char *value, __u64 *res)
 1861{
 1862	char *value_end;
 1863	int err;
 1864
 1865	errno = 0;
 1866	*res = strtoull(value, &value_end, 0);
 1867	if (errno) {
 1868		err = -errno;
 1869		pr_warn("failed to parse '%s' as integer: %d\n", value, err);
 1870		return err;
 1871	}
 1872	if (*value_end) {
 1873		pr_warn("failed to parse '%s' as integer completely\n", value);
 1874		return -EINVAL;
 1875	}
 1876	return 0;
 1877}
 1878
 1879static bool is_kcfg_value_in_range(const struct extern_desc *ext, __u64 v)
 1880{
 1881	int bit_sz = ext->kcfg.sz * 8;
 1882
 1883	if (ext->kcfg.sz == 8)
 1884		return true;
 1885
 1886	/* Validate that value stored in u64 fits in integer of `ext->sz`
 1887	 * bytes size without any loss of information. If the target integer
 1888	 * is signed, we rely on the following limits of integer type of
 1889	 * Y bits and subsequent transformation:
 1890	 *
 1891	 *     -2^(Y-1) <= X           <= 2^(Y-1) - 1
 1892	 *            0 <= X + 2^(Y-1) <= 2^Y - 1
 1893	 *            0 <= X + 2^(Y-1) <  2^Y
 1894	 *
 1895	 *  For unsigned target integer, check that all the (64 - Y) bits are
 1896	 *  zero.
 1897	 */
 1898	if (ext->kcfg.is_signed)
 1899		return v + (1ULL << (bit_sz - 1)) < (1ULL << bit_sz);
 1900	else
 1901		return (v >> bit_sz) == 0;
 1902}
 1903
 1904static int set_kcfg_value_num(struct extern_desc *ext, void *ext_val,
 1905			      __u64 value)
 1906{
 1907	if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR &&
 1908	    ext->kcfg.type != KCFG_BOOL) {
 1909		pr_warn("extern (kcfg) '%s': value '%llu' implies integer, char, or boolean type\n",
 1910			ext->name, (unsigned long long)value);
 1911		return -EINVAL;
 1912	}
 1913	if (ext->kcfg.type == KCFG_BOOL && value > 1) {
 1914		pr_warn("extern (kcfg) '%s': value '%llu' isn't boolean compatible\n",
 1915			ext->name, (unsigned long long)value);
 1916		return -EINVAL;
 1917
 1918	}
 1919	if (!is_kcfg_value_in_range(ext, value)) {
 1920		pr_warn("extern (kcfg) '%s': value '%llu' doesn't fit in %d bytes\n",
 1921			ext->name, (unsigned long long)value, ext->kcfg.sz);
 1922		return -ERANGE;
 1923	}
 1924	switch (ext->kcfg.sz) {
 1925	case 1:
 1926		*(__u8 *)ext_val = value;
 1927		break;
 1928	case 2:
 1929		*(__u16 *)ext_val = value;
 1930		break;
 1931	case 4:
 1932		*(__u32 *)ext_val = value;
 1933		break;
 1934	case 8:
 1935		*(__u64 *)ext_val = value;
 1936		break;
 1937	default:
 1938		return -EINVAL;
 1939	}
 1940	ext->is_set = true;
 1941	return 0;
 1942}
 1943
 1944static int bpf_object__process_kconfig_line(struct bpf_object *obj,
 1945					    char *buf, void *data)
 1946{
 1947	struct extern_desc *ext;
 1948	char *sep, *value;
 1949	int len, err = 0;
 1950	void *ext_val;
 1951	__u64 num;
 1952
 1953	if (!str_has_pfx(buf, "CONFIG_"))
 1954		return 0;
 1955
 1956	sep = strchr(buf, '=');
 1957	if (!sep) {
 1958		pr_warn("failed to parse '%s': no separator\n", buf);
 1959		return -EINVAL;
 1960	}
 1961
 1962	/* Trim ending '\n' */
 1963	len = strlen(buf);
 1964	if (buf[len - 1] == '\n')
 1965		buf[len - 1] = '\0';
 1966	/* Split on '=' and ensure that a value is present. */
 1967	*sep = '\0';
 1968	if (!sep[1]) {
 1969		*sep = '=';
 1970		pr_warn("failed to parse '%s': no value\n", buf);
 1971		return -EINVAL;
 1972	}
 1973
 1974	ext = find_extern_by_name(obj, buf);
 1975	if (!ext || ext->is_set)
 1976		return 0;
 1977
 1978	ext_val = data + ext->kcfg.data_off;
 1979	value = sep + 1;
 1980
 1981	switch (*value) {
 1982	case 'y': case 'n': case 'm':
 1983		err = set_kcfg_value_tri(ext, ext_val, *value);
 1984		break;
 1985	case '"':
 1986		err = set_kcfg_value_str(ext, ext_val, value);
 1987		break;
 1988	default:
 1989		/* assume integer */
 1990		err = parse_u64(value, &num);
 1991		if (err) {
 1992			pr_warn("extern (kcfg) '%s': value '%s' isn't a valid integer\n", ext->name, value);
 1993			return err;
 1994		}
 1995		if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR) {
 1996			pr_warn("extern (kcfg) '%s': value '%s' implies integer type\n", ext->name, value);
 1997			return -EINVAL;
 1998		}
 1999		err = set_kcfg_value_num(ext, ext_val, num);
 2000		break;
 2001	}
 2002	if (err)
 2003		return err;
 2004	pr_debug("extern (kcfg) '%s': set to %s\n", ext->name, value);
 2005	return 0;
 2006}
 2007
 2008static int bpf_object__read_kconfig_file(struct bpf_object *obj, void *data)
 2009{
 2010	char buf[PATH_MAX];
 2011	struct utsname uts;
 2012	int len, err = 0;
 2013	gzFile file;
 2014
 2015	uname(&uts);
 2016	len = snprintf(buf, PATH_MAX, "/boot/config-%s", uts.release);
 2017	if (len < 0)
 2018		return -EINVAL;
 2019	else if (len >= PATH_MAX)
 2020		return -ENAMETOOLONG;
 2021
 2022	/* gzopen also accepts uncompressed files. */
 2023	file = gzopen(buf, "re");
 2024	if (!file)
 2025		file = gzopen("/proc/config.gz", "re");
 2026
 2027	if (!file) {
 2028		pr_warn("failed to open system Kconfig\n");
 2029		return -ENOENT;
 2030	}
 2031
 2032	while (gzgets(file, buf, sizeof(buf))) {
 2033		err = bpf_object__process_kconfig_line(obj, buf, data);
 2034		if (err) {
 2035			pr_warn("error parsing system Kconfig line '%s': %d\n",
 2036				buf, err);
 2037			goto out;
 2038		}
 2039	}
 2040
 2041out:
 2042	gzclose(file);
 2043	return err;
 2044}
 2045
 2046static int bpf_object__read_kconfig_mem(struct bpf_object *obj,
 2047					const char *config, void *data)
 2048{
 2049	char buf[PATH_MAX];
 2050	int err = 0;
 2051	FILE *file;
 2052
 2053	file = fmemopen((void *)config, strlen(config), "r");
 2054	if (!file) {
 2055		err = -errno;
 2056		pr_warn("failed to open in-memory Kconfig: %d\n", err);
 2057		return err;
 2058	}
 2059
 2060	while (fgets(buf, sizeof(buf), file)) {
 2061		err = bpf_object__process_kconfig_line(obj, buf, data);
 2062		if (err) {
 2063			pr_warn("error parsing in-memory Kconfig line '%s': %d\n",
 2064				buf, err);
 2065			break;
 2066		}
 2067	}
 2068
 2069	fclose(file);
 2070	return err;
 2071}
 2072
 2073static int bpf_object__init_kconfig_map(struct bpf_object *obj)
 2074{
 2075	struct extern_desc *last_ext = NULL, *ext;
 2076	size_t map_sz;
 2077	int i, err;
 2078
 2079	for (i = 0; i < obj->nr_extern; i++) {
 2080		ext = &obj->externs[i];
 2081		if (ext->type == EXT_KCFG)
 2082			last_ext = ext;
 2083	}
 2084
 2085	if (!last_ext)
 2086		return 0;
 2087
 2088	map_sz = last_ext->kcfg.data_off + last_ext->kcfg.sz;
 2089	err = bpf_object__init_internal_map(obj, LIBBPF_MAP_KCONFIG,
 2090					    ".kconfig", obj->efile.symbols_shndx,
 2091					    NULL, map_sz);
 2092	if (err)
 2093		return err;
 2094
 2095	obj->kconfig_map_idx = obj->nr_maps - 1;
 2096
 2097	return 0;
 2098}
 2099
 2100const struct btf_type *
 2101skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id)
 2102{
 2103	const struct btf_type *t = btf__type_by_id(btf, id);
 2104
 2105	if (res_id)
 2106		*res_id = id;
 2107
 2108	while (btf_is_mod(t) || btf_is_typedef(t)) {
 2109		if (res_id)
 2110			*res_id = t->type;
 2111		t = btf__type_by_id(btf, t->type);
 2112	}
 2113
 2114	return t;
 2115}
 2116
 2117static const struct btf_type *
 2118resolve_func_ptr(const struct btf *btf, __u32 id, __u32 *res_id)
 2119{
 2120	const struct btf_type *t;
 2121
 2122	t = skip_mods_and_typedefs(btf, id, NULL);
 2123	if (!btf_is_ptr(t))
 2124		return NULL;
 2125
 2126	t = skip_mods_and_typedefs(btf, t->type, res_id);
 2127
 2128	return btf_is_func_proto(t) ? t : NULL;
 2129}
 2130
 2131static const char *__btf_kind_str(__u16 kind)
 2132{
 2133	switch (kind) {
 2134	case BTF_KIND_UNKN: return "void";
 2135	case BTF_KIND_INT: return "int";
 2136	case BTF_KIND_PTR: return "ptr";
 2137	case BTF_KIND_ARRAY: return "array";
 2138	case BTF_KIND_STRUCT: return "struct";
 2139	case BTF_KIND_UNION: return "union";
 2140	case BTF_KIND_ENUM: return "enum";
 2141	case BTF_KIND_FWD: return "fwd";
 2142	case BTF_KIND_TYPEDEF: return "typedef";
 2143	case BTF_KIND_VOLATILE: return "volatile";
 2144	case BTF_KIND_CONST: return "const";
 2145	case BTF_KIND_RESTRICT: return "restrict";
 2146	case BTF_KIND_FUNC: return "func";
 2147	case BTF_KIND_FUNC_PROTO: return "func_proto";
 2148	case BTF_KIND_VAR: return "var";
 2149	case BTF_KIND_DATASEC: return "datasec";
 2150	case BTF_KIND_FLOAT: return "float";
 2151	case BTF_KIND_DECL_TAG: return "decl_tag";
 2152	case BTF_KIND_TYPE_TAG: return "type_tag";
 2153	case BTF_KIND_ENUM64: return "enum64";
 2154	default: return "unknown";
 2155	}
 2156}
 2157
 2158const char *btf_kind_str(const struct btf_type *t)
 2159{
 2160	return __btf_kind_str(btf_kind(t));
 2161}
 2162
 2163/*
 2164 * Fetch integer attribute of BTF map definition. Such attributes are
 2165 * represented using a pointer to an array, in which dimensionality of array
 2166 * encodes specified integer value. E.g., int (*type)[BPF_MAP_TYPE_ARRAY];
 2167 * encodes `type => BPF_MAP_TYPE_ARRAY` key/value pair completely using BTF
 2168 * type definition, while using only sizeof(void *) space in ELF data section.
 2169 */
 2170static bool get_map_field_int(const char *map_name, const struct btf *btf,
 2171			      const struct btf_member *m, __u32 *res)
 2172{
 2173	const struct btf_type *t = skip_mods_and_typedefs(btf, m->type, NULL);
 2174	const char *name = btf__name_by_offset(btf, m->name_off);
 2175	const struct btf_array *arr_info;
 2176	const struct btf_type *arr_t;
 2177
 2178	if (!btf_is_ptr(t)) {
 2179		pr_warn("map '%s': attr '%s': expected PTR, got %s.\n",
 2180			map_name, name, btf_kind_str(t));
 2181		return false;
 2182	}
 2183
 2184	arr_t = btf__type_by_id(btf, t->type);
 2185	if (!arr_t) {
 2186		pr_warn("map '%s': attr '%s': type [%u] not found.\n",
 2187			map_name, name, t->type);
 2188		return false;
 2189	}
 2190	if (!btf_is_array(arr_t)) {
 2191		pr_warn("map '%s': attr '%s': expected ARRAY, got %s.\n",
 2192			map_name, name, btf_kind_str(arr_t));
 2193		return false;
 2194	}
 2195	arr_info = btf_array(arr_t);
 2196	*res = arr_info->nelems;
 2197	return true;
 2198}
 2199
 2200static int pathname_concat(char *buf, size_t buf_sz, const char *path, const char *name)
 2201{
 2202	int len;
 2203
 2204	len = snprintf(buf, buf_sz, "%s/%s", path, name);
 2205	if (len < 0)
 2206		return -EINVAL;
 2207	if (len >= buf_sz)
 2208		return -ENAMETOOLONG;
 2209
 2210	return 0;
 2211}
 2212
 2213static int build_map_pin_path(struct bpf_map *map, const char *path)
 2214{
 2215	char buf[PATH_MAX];
 2216	int err;
 2217
 2218	if (!path)
 2219		path = "/sys/fs/bpf";
 2220
 2221	err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map));
 2222	if (err)
 2223		return err;
 2224
 2225	return bpf_map__set_pin_path(map, buf);
 2226}
 2227
 2228/* should match definition in bpf_helpers.h */
 2229enum libbpf_pin_type {
 2230	LIBBPF_PIN_NONE,
 2231	/* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */
 2232	LIBBPF_PIN_BY_NAME,
 2233};
 2234
 2235int parse_btf_map_def(const char *map_name, struct btf *btf,
 2236		      const struct btf_type *def_t, bool strict,
 2237		      struct btf_map_def *map_def, struct btf_map_def *inner_def)
 2238{
 2239	const struct btf_type *t;
 2240	const struct btf_member *m;
 2241	bool is_inner = inner_def == NULL;
 2242	int vlen, i;
 2243
 2244	vlen = btf_vlen(def_t);
 2245	m = btf_members(def_t);
 2246	for (i = 0; i < vlen; i++, m++) {
 2247		const char *name = btf__name_by_offset(btf, m->name_off);
 2248
 2249		if (!name) {
 2250			pr_warn("map '%s': invalid field #%d.\n", map_name, i);
 2251			return -EINVAL;
 2252		}
 2253		if (strcmp(name, "type") == 0) {
 2254			if (!get_map_field_int(map_name, btf, m, &map_def->map_type))
 2255				return -EINVAL;
 2256			map_def->parts |= MAP_DEF_MAP_TYPE;
 2257		} else if (strcmp(name, "max_entries") == 0) {
 2258			if (!get_map_field_int(map_name, btf, m, &map_def->max_entries))
 2259				return -EINVAL;
 2260			map_def->parts |= MAP_DEF_MAX_ENTRIES;
 2261		} else if (strcmp(name, "map_flags") == 0) {
 2262			if (!get_map_field_int(map_name, btf, m, &map_def->map_flags))
 2263				return -EINVAL;
 2264			map_def->parts |= MAP_DEF_MAP_FLAGS;
 2265		} else if (strcmp(name, "numa_node") == 0) {
 2266			if (!get_map_field_int(map_name, btf, m, &map_def->numa_node))
 2267				return -EINVAL;
 2268			map_def->parts |= MAP_DEF_NUMA_NODE;
 2269		} else if (strcmp(name, "key_size") == 0) {
 2270			__u32 sz;
 2271
 2272			if (!get_map_field_int(map_name, btf, m, &sz))
 2273				return -EINVAL;
 2274			if (map_def->key_size && map_def->key_size != sz) {
 2275				pr_warn("map '%s': conflicting key size %u != %u.\n",
 2276					map_name, map_def->key_size, sz);
 2277				return -EINVAL;
 2278			}
 2279			map_def->key_size = sz;
 2280			map_def->parts |= MAP_DEF_KEY_SIZE;
 2281		} else if (strcmp(name, "key") == 0) {
 2282			__s64 sz;
 2283
 2284			t = btf__type_by_id(btf, m->type);
 2285			if (!t) {
 2286				pr_warn("map '%s': key type [%d] not found.\n",
 2287					map_name, m->type);
 2288				return -EINVAL;
 2289			}
 2290			if (!btf_is_ptr(t)) {
 2291				pr_warn("map '%s': key spec is not PTR: %s.\n",
 2292					map_name, btf_kind_str(t));
 2293				return -EINVAL;
 2294			}
 2295			sz = btf__resolve_size(btf, t->type);
 2296			if (sz < 0) {
 2297				pr_warn("map '%s': can't determine key size for type [%u]: %zd.\n",
 2298					map_name, t->type, (ssize_t)sz);
 2299				return sz;
 2300			}
 2301			if (map_def->key_size && map_def->key_size != sz) {
 2302				pr_warn("map '%s': conflicting key size %u != %zd.\n",
 2303					map_name, map_def->key_size, (ssize_t)sz);
 2304				return -EINVAL;
 2305			}
 2306			map_def->key_size = sz;
 2307			map_def->key_type_id = t->type;
 2308			map_def->parts |= MAP_DEF_KEY_SIZE | MAP_DEF_KEY_TYPE;
 2309		} else if (strcmp(name, "value_size") == 0) {
 2310			__u32 sz;
 2311
 2312			if (!get_map_field_int(map_name, btf, m, &sz))
 2313				return -EINVAL;
 2314			if (map_def->value_size && map_def->value_size != sz) {
 2315				pr_warn("map '%s': conflicting value size %u != %u.\n",
 2316					map_name, map_def->value_size, sz);
 2317				return -EINVAL;
 2318			}
 2319			map_def->value_size = sz;
 2320			map_def->parts |= MAP_DEF_VALUE_SIZE;
 2321		} else if (strcmp(name, "value") == 0) {
 2322			__s64 sz;
 2323
 2324			t = btf__type_by_id(btf, m->type);
 2325			if (!t) {
 2326				pr_warn("map '%s': value type [%d] not found.\n",
 2327					map_name, m->type);
 2328				return -EINVAL;
 2329			}
 2330			if (!btf_is_ptr(t)) {
 2331				pr_warn("map '%s': value spec is not PTR: %s.\n",
 2332					map_name, btf_kind_str(t));
 2333				return -EINVAL;
 2334			}
 2335			sz = btf__resolve_size(btf, t->type);
 2336			if (sz < 0) {
 2337				pr_warn("map '%s': can't determine value size for type [%u]: %zd.\n",
 2338					map_name, t->type, (ssize_t)sz);
 2339				return sz;
 2340			}
 2341			if (map_def->value_size && map_def->value_size != sz) {
 2342				pr_warn("map '%s': conflicting value size %u != %zd.\n",
 2343					map_name, map_def->value_size, (ssize_t)sz);
 2344				return -EINVAL;
 2345			}
 2346			map_def->value_size = sz;
 2347			map_def->value_type_id = t->type;
 2348			map_def->parts |= MAP_DEF_VALUE_SIZE | MAP_DEF_VALUE_TYPE;
 2349		}
 2350		else if (strcmp(name, "values") == 0) {
 2351			bool is_map_in_map = bpf_map_type__is_map_in_map(map_def->map_type);
 2352			bool is_prog_array = map_def->map_type == BPF_MAP_TYPE_PROG_ARRAY;
 2353			const char *desc = is_map_in_map ? "map-in-map inner" : "prog-array value";
 2354			char inner_map_name[128];
 2355			int err;
 2356
 2357			if (is_inner) {
 2358				pr_warn("map '%s': multi-level inner maps not supported.\n",
 2359					map_name);
 2360				return -ENOTSUP;
 2361			}
 2362			if (i != vlen - 1) {
 2363				pr_warn("map '%s': '%s' member should be last.\n",
 2364					map_name, name);
 2365				return -EINVAL;
 2366			}
 2367			if (!is_map_in_map && !is_prog_array) {
 2368				pr_warn("map '%s': should be map-in-map or prog-array.\n",
 2369					map_name);
 2370				return -ENOTSUP;
 2371			}
 2372			if (map_def->value_size && map_def->value_size != 4) {
 2373				pr_warn("map '%s': conflicting value size %u != 4.\n",
 2374					map_name, map_def->value_size);
 2375				return -EINVAL;
 2376			}
 2377			map_def->value_size = 4;
 2378			t = btf__type_by_id(btf, m->type);
 2379			if (!t) {
 2380				pr_warn("map '%s': %s type [%d] not found.\n",
 2381					map_name, desc, m->type);
 2382				return -EINVAL;
 2383			}
 2384			if (!btf_is_array(t) || btf_array(t)->nelems) {
 2385				pr_warn("map '%s': %s spec is not a zero-sized array.\n",
 2386					map_name, desc);
 2387				return -EINVAL;
 2388			}
 2389			t = skip_mods_and_typedefs(btf, btf_array(t)->type, NULL);
 2390			if (!btf_is_ptr(t)) {
 2391				pr_warn("map '%s': %s def is of unexpected kind %s.\n",
 2392					map_name, desc, btf_kind_str(t));
 2393				return -EINVAL;
 2394			}
 2395			t = skip_mods_and_typedefs(btf, t->type, NULL);
 2396			if (is_prog_array) {
 2397				if (!btf_is_func_proto(t)) {
 2398					pr_warn("map '%s': prog-array value def is of unexpected kind %s.\n",
 2399						map_name, btf_kind_str(t));
 2400					return -EINVAL;
 2401				}
 2402				continue;
 2403			}
 2404			if (!btf_is_struct(t)) {
 2405				pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n",
 2406					map_name, btf_kind_str(t));
 2407				return -EINVAL;
 2408			}
 2409
 2410			snprintf(inner_map_name, sizeof(inner_map_name), "%s.inner", map_name);
 2411			err = parse_btf_map_def(inner_map_name, btf, t, strict, inner_def, NULL);
 2412			if (err)
 2413				return err;
 2414
 2415			map_def->parts |= MAP_DEF_INNER_MAP;
 2416		} else if (strcmp(name, "pinning") == 0) {
 2417			__u32 val;
 2418
 2419			if (is_inner) {
 2420				pr_warn("map '%s': inner def can't be pinned.\n", map_name);
 2421				return -EINVAL;
 2422			}
 2423			if (!get_map_field_int(map_name, btf, m, &val))
 2424				return -EINVAL;
 2425			if (val != LIBBPF_PIN_NONE && val != LIBBPF_PIN_BY_NAME) {
 2426				pr_warn("map '%s': invalid pinning value %u.\n",
 2427					map_name, val);
 2428				return -EINVAL;
 2429			}
 2430			map_def->pinning = val;
 2431			map_def->parts |= MAP_DEF_PINNING;
 2432		} else if (strcmp(name, "map_extra") == 0) {
 2433			__u32 map_extra;
 2434
 2435			if (!get_map_field_int(map_name, btf, m, &map_extra))
 2436				return -EINVAL;
 2437			map_def->map_extra = map_extra;
 2438			map_def->parts |= MAP_DEF_MAP_EXTRA;
 2439		} else {
 2440			if (strict) {
 2441				pr_warn("map '%s': unknown field '%s'.\n", map_name, name);
 2442				return -ENOTSUP;
 2443			}
 2444			pr_debug("map '%s': ignoring unknown field '%s'.\n", map_name, name);
 2445		}
 2446	}
 2447
 2448	if (map_def->map_type == BPF_MAP_TYPE_UNSPEC) {
 2449		pr_warn("map '%s': map type isn't specified.\n", map_name);
 2450		return -EINVAL;
 2451	}
 2452
 2453	return 0;
 2454}
 2455
 2456static size_t adjust_ringbuf_sz(size_t sz)
 2457{
 2458	__u32 page_sz = sysconf(_SC_PAGE_SIZE);
 2459	__u32 mul;
 2460
 2461	/* if user forgot to set any size, make sure they see error */
 2462	if (sz == 0)
 2463		return 0;
 2464	/* Kernel expects BPF_MAP_TYPE_RINGBUF's max_entries to be
 2465	 * a power-of-2 multiple of kernel's page size. If user diligently
 2466	 * satisified these conditions, pass the size through.
 2467	 */
 2468	if ((sz % page_sz) == 0 && is_pow_of_2(sz / page_sz))
 2469		return sz;
 2470
 2471	/* Otherwise find closest (page_sz * power_of_2) product bigger than
 2472	 * user-set size to satisfy both user size request and kernel
 2473	 * requirements and substitute correct max_entries for map creation.
 2474	 */
 2475	for (mul = 1; mul <= UINT_MAX / page_sz; mul <<= 1) {
 2476		if (mul * page_sz > sz)
 2477			return mul * page_sz;
 2478	}
 2479
 2480	/* if it's impossible to satisfy the conditions (i.e., user size is
 2481	 * very close to UINT_MAX but is not a power-of-2 multiple of
 2482	 * page_size) then just return original size and let kernel reject it
 2483	 */
 2484	return sz;
 2485}
 2486
 2487static bool map_is_ringbuf(const struct bpf_map *map)
 2488{
 2489	return map->def.type == BPF_MAP_TYPE_RINGBUF ||
 2490	       map->def.type == BPF_MAP_TYPE_USER_RINGBUF;
 2491}
 2492
 2493static void fill_map_from_def(struct bpf_map *map, const struct btf_map_def *def)
 2494{
 2495	map->def.type = def->map_type;
 2496	map->def.key_size = def->key_size;
 2497	map->def.value_size = def->value_size;
 2498	map->def.max_entries = def->max_entries;
 2499	map->def.map_flags = def->map_flags;
 2500	map->map_extra = def->map_extra;
 2501
 2502	map->numa_node = def->numa_node;
 2503	map->btf_key_type_id = def->key_type_id;
 2504	map->btf_value_type_id = def->value_type_id;
 2505
 2506	/* auto-adjust BPF ringbuf map max_entries to be a multiple of page size */
 2507	if (map_is_ringbuf(map))
 2508		map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries);
 2509
 2510	if (def->parts & MAP_DEF_MAP_TYPE)
 2511		pr_debug("map '%s': found type = %u.\n", map->name, def->map_type);
 2512
 2513	if (def->parts & MAP_DEF_KEY_TYPE)
 2514		pr_debug("map '%s': found key [%u], sz = %u.\n",
 2515			 map->name, def->key_type_id, def->key_size);
 2516	else if (def->parts & MAP_DEF_KEY_SIZE)
 2517		pr_debug("map '%s': found key_size = %u.\n", map->name, def->key_size);
 2518
 2519	if (def->parts & MAP_DEF_VALUE_TYPE)
 2520		pr_debug("map '%s': found value [%u], sz = %u.\n",
 2521			 map->name, def->value_type_id, def->value_size);
 2522	else if (def->parts & MAP_DEF_VALUE_SIZE)
 2523		pr_debug("map '%s': found value_size = %u.\n", map->name, def->value_size);
 2524
 2525	if (def->parts & MAP_DEF_MAX_ENTRIES)
 2526		pr_debug("map '%s': found max_entries = %u.\n", map->name, def->max_entries);
 2527	if (def->parts & MAP_DEF_MAP_FLAGS)
 2528		pr_debug("map '%s': found map_flags = 0x%x.\n", map->name, def->map_flags);
 2529	if (def->parts & MAP_DEF_MAP_EXTRA)
 2530		pr_debug("map '%s': found map_extra = 0x%llx.\n", map->name,
 2531			 (unsigned long long)def->map_extra);
 2532	if (def->parts & MAP_DEF_PINNING)
 2533		pr_debug("map '%s': found pinning = %u.\n", map->name, def->pinning);
 2534	if (def->parts & MAP_DEF_NUMA_NODE)
 2535		pr_debug("map '%s': found numa_node = %u.\n", map->name, def->numa_node);
 2536
 2537	if (def->parts & MAP_DEF_INNER_MAP)
 2538		pr_debug("map '%s': found inner map definition.\n", map->name);
 2539}
 2540
 2541static const char *btf_var_linkage_str(__u32 linkage)
 2542{
 2543	switch (linkage) {
 2544	case BTF_VAR_STATIC: return "static";
 2545	case BTF_VAR_GLOBAL_ALLOCATED: return "global";
 2546	case BTF_VAR_GLOBAL_EXTERN: return "extern";
 2547	default: return "unknown";
 2548	}
 2549}
 2550
 2551static int bpf_object__init_user_btf_map(struct bpf_object *obj,
 2552					 const struct btf_type *sec,
 2553					 int var_idx, int sec_idx,
 2554					 const Elf_Data *data, bool strict,
 2555					 const char *pin_root_path)
 2556{
 2557	struct btf_map_def map_def = {}, inner_def = {};
 2558	const struct btf_type *var, *def;
 2559	const struct btf_var_secinfo *vi;
 2560	const struct btf_var *var_extra;
 2561	const char *map_name;
 2562	struct bpf_map *map;
 2563	int err;
 2564
 2565	vi = btf_var_secinfos(sec) + var_idx;
 2566	var = btf__type_by_id(obj->btf, vi->type);
 2567	var_extra = btf_var(var);
 2568	map_name = btf__name_by_offset(obj->btf, var->name_off);
 2569
 2570	if (map_name == NULL || map_name[0] == '\0') {
 2571		pr_warn("map #%d: empty name.\n", var_idx);
 2572		return -EINVAL;
 2573	}
 2574	if ((__u64)vi->offset + vi->size > data->d_size) {
 2575		pr_warn("map '%s' BTF data is corrupted.\n", map_name);
 2576		return -EINVAL;
 2577	}
 2578	if (!btf_is_var(var)) {
 2579		pr_warn("map '%s': unexpected var kind %s.\n",
 2580			map_name, btf_kind_str(var));
 2581		return -EINVAL;
 2582	}
 2583	if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED) {
 2584		pr_warn("map '%s': unsupported map linkage %s.\n",
 2585			map_name, btf_var_linkage_str(var_extra->linkage));
 2586		return -EOPNOTSUPP;
 2587	}
 2588
 2589	def = skip_mods_and_typedefs(obj->btf, var->type, NULL);
 2590	if (!btf_is_struct(def)) {
 2591		pr_warn("map '%s': unexpected def kind %s.\n",
 2592			map_name, btf_kind_str(var));
 2593		return -EINVAL;
 2594	}
 2595	if (def->size > vi->size) {
 2596		pr_warn("map '%s': invalid def size.\n", map_name);
 2597		return -EINVAL;
 2598	}
 2599
 2600	map = bpf_object__add_map(obj);
 2601	if (IS_ERR(map))
 2602		return PTR_ERR(map);
 2603	map->name = strdup(map_name);
 2604	if (!map->name) {
 2605		pr_warn("map '%s': failed to alloc map name.\n", map_name);
 2606		return -ENOMEM;
 2607	}
 2608	map->libbpf_type = LIBBPF_MAP_UNSPEC;
 2609	map->def.type = BPF_MAP_TYPE_UNSPEC;
 2610	map->sec_idx = sec_idx;
 2611	map->sec_offset = vi->offset;
 2612	map->btf_var_idx = var_idx;
 2613	pr_debug("map '%s': at sec_idx %d, offset %zu.\n",
 2614		 map_name, map->sec_idx, map->sec_offset);
 2615
 2616	err = parse_btf_map_def(map->name, obj->btf, def, strict, &map_def, &inner_def);
 2617	if (err)
 2618		return err;
 2619
 2620	fill_map_from_def(map, &map_def);
 2621
 2622	if (map_def.pinning == LIBBPF_PIN_BY_NAME) {
 2623		err = build_map_pin_path(map, pin_root_path);
 2624		if (err) {
 2625			pr_warn("map '%s': couldn't build pin path.\n", map->name);
 2626			return err;
 2627		}
 2628	}
 2629
 2630	if (map_def.parts & MAP_DEF_INNER_MAP) {
 2631		map->inner_map = calloc(1, sizeof(*map->inner_map));
 2632		if (!map->inner_map)
 2633			return -ENOMEM;
 2634		map->inner_map->fd = create_placeholder_fd();
 2635		if (map->inner_map->fd < 0)
 2636			return map->inner_map->fd;
 2637		map->inner_map->sec_idx = sec_idx;
 2638		map->inner_map->name = malloc(strlen(map_name) + sizeof(".inner") + 1);
 2639		if (!map->inner_map->name)
 2640			return -ENOMEM;
 2641		sprintf(map->inner_map->name, "%s.inner", map_name);
 2642
 2643		fill_map_from_def(map->inner_map, &inner_def);
 2644	}
 2645
 2646	err = map_fill_btf_type_info(obj, map);
 2647	if (err)
 2648		return err;
 2649
 2650	return 0;
 2651}
 2652
 2653static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict,
 2654					  const char *pin_root_path)
 2655{
 2656	const struct btf_type *sec = NULL;
 2657	int nr_types, i, vlen, err;
 2658	const struct btf_type *t;
 2659	const char *name;
 2660	Elf_Data *data;
 2661	Elf_Scn *scn;
 2662
 2663	if (obj->efile.btf_maps_shndx < 0)
 2664		return 0;
 2665
 2666	scn = elf_sec_by_idx(obj, obj->efile.btf_maps_shndx);
 2667	data = elf_sec_data(obj, scn);
 2668	if (!scn || !data) {
 2669		pr_warn("elf: failed to get %s map definitions for %s\n",
 2670			MAPS_ELF_SEC, obj->path);
 2671		return -EINVAL;
 2672	}
 2673
 2674	nr_types = btf__type_cnt(obj->btf);
 2675	for (i = 1; i < nr_types; i++) {
 2676		t = btf__type_by_id(obj->btf, i);
 2677		if (!btf_is_datasec(t))
 2678			continue;
 2679		name = btf__name_by_offset(obj->btf, t->name_off);
 2680		if (strcmp(name, MAPS_ELF_SEC) == 0) {
 2681			sec = t;
 2682			obj->efile.btf_maps_sec_btf_id = i;
 2683			break;
 2684		}
 2685	}
 2686
 2687	if (!sec) {
 2688		pr_warn("DATASEC '%s' not found.\n", MAPS_ELF_SEC);
 2689		return -ENOENT;
 2690	}
 2691
 2692	vlen = btf_vlen(sec);
 2693	for (i = 0; i < vlen; i++) {
 2694		err = bpf_object__init_user_btf_map(obj, sec, i,
 2695						    obj->efile.btf_maps_shndx,
 2696						    data, strict,
 2697						    pin_root_path);
 2698		if (err)
 2699			return err;
 2700	}
 2701
 2702	return 0;
 2703}
 2704
 2705static int bpf_object__init_maps(struct bpf_object *obj,
 2706				 const struct bpf_object_open_opts *opts)
 2707{
 2708	const char *pin_root_path;
 2709	bool strict;
 2710	int err = 0;
 2711
 2712	strict = !OPTS_GET(opts, relaxed_maps, false);
 2713	pin_root_path = OPTS_GET(opts, pin_root_path, NULL);
 2714
 2715	err = bpf_object__init_user_btf_maps(obj, strict, pin_root_path);
 2716	err = err ?: bpf_object__init_global_data_maps(obj);
 2717	err = err ?: bpf_object__init_kconfig_map(obj);
 2718	err = err ?: bpf_object_init_struct_ops(obj);
 2719
 2720	return err;
 2721}
 2722
 2723static bool section_have_execinstr(struct bpf_object *obj, int idx)
 2724{
 2725	Elf64_Shdr *sh;
 2726
 2727	sh = elf_sec_hdr(obj, elf_sec_by_idx(obj, idx));
 2728	if (!sh)
 2729		return false;
 2730
 2731	return sh->sh_flags & SHF_EXECINSTR;
 2732}
 2733
 2734static bool btf_needs_sanitization(struct bpf_object *obj)
 2735{
 2736	bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC);
 2737	bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC);
 2738	bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT);
 2739	bool has_func = kernel_supports(obj, FEAT_BTF_FUNC);
 2740	bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG);
 2741	bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG);
 2742	bool has_enum64 = kernel_supports(obj, FEAT_BTF_ENUM64);
 2743
 2744	return !has_func || !has_datasec || !has_func_global || !has_float ||
 2745	       !has_decl_tag || !has_type_tag || !has_enum64;
 2746}
 2747
 2748static int bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
 2749{
 2750	bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC);
 2751	bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC);
 2752	bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT);
 2753	bool has_func = kernel_supports(obj, FEAT_BTF_FUNC);
 2754	bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG);
 2755	bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG);
 2756	bool has_enum64 = kernel_supports(obj, FEAT_BTF_ENUM64);
 2757	int enum64_placeholder_id = 0;
 2758	struct btf_type *t;
 2759	int i, j, vlen;
 2760
 2761	for (i = 1; i < btf__type_cnt(btf); i++) {
 2762		t = (struct btf_type *)btf__type_by_id(btf, i);
 2763
 2764		if ((!has_datasec && btf_is_var(t)) || (!has_decl_tag && btf_is_decl_tag(t))) {
 2765			/* replace VAR/DECL_TAG with INT */
 2766			t->info = BTF_INFO_ENC(BTF_KIND_INT, 0, 0);
 2767			/*
 2768			 * using size = 1 is the safest choice, 4 will be too
 2769			 * big and cause kernel BTF validation failure if
 2770			 * original variable took less than 4 bytes
 2771			 */
 2772			t->size = 1;
 2773			*(int *)(t + 1) = BTF_INT_ENC(0, 0, 8);
 2774		} else if (!has_datasec && btf_is_datasec(t)) {
 2775			/* replace DATASEC with STRUCT */
 2776			const struct btf_var_secinfo *v = btf_var_secinfos(t);
 2777			struct btf_member *m = btf_members(t);
 2778			struct btf_type *vt;
 2779			char *name;
 2780
 2781			name = (char *)btf__name_by_offset(btf, t->name_off);
 2782			while (*name) {
 2783				if (*name == '.')
 2784					*name = '_';
 2785				name++;
 2786			}
 2787
 2788			vlen = btf_vlen(t);
 2789			t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, vlen);
 2790			for (j = 0; j < vlen; j++, v++, m++) {
 2791				/* order of field assignments is important */
 2792				m->offset = v->offset * 8;
 2793				m->type = v->type;
 2794				/* preserve variable name as member name */
 2795				vt = (void *)btf__type_by_id(btf, v->type);
 2796				m->name_off = vt->name_off;
 2797			}
 2798		} else if (!has_func && btf_is_func_proto(t)) {
 2799			/* replace FUNC_PROTO with ENUM */
 2800			vlen = btf_vlen(t);
 2801			t->info = BTF_INFO_ENC(BTF_KIND_ENUM, 0, vlen);
 2802			t->size = sizeof(__u32); /* kernel enforced */
 2803		} else if (!has_func && btf_is_func(t)) {
 2804			/* replace FUNC with TYPEDEF */
 2805			t->info = BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0);
 2806		} else if (!has_func_global && btf_is_func(t)) {
 2807			/* replace BTF_FUNC_GLOBAL with BTF_FUNC_STATIC */
 2808			t->info = BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0);
 2809		} else if (!has_float && btf_is_float(t)) {
 2810			/* replace FLOAT with an equally-sized empty STRUCT;
 2811			 * since C compilers do not accept e.g. "float" as a
 2812			 * valid struct name, make it anonymous
 2813			 */
 2814			t->name_off = 0;
 2815			t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 0);
 2816		} else if (!has_type_tag && btf_is_type_tag(t)) {
 2817			/* replace TYPE_TAG with a CONST */
 2818			t->name_off = 0;
 2819			t->info = BTF_INFO_ENC(BTF_KIND_CONST, 0, 0);
 2820		} else if (!has_enum64 && btf_is_enum(t)) {
 2821			/* clear the kflag */
 2822			t->info = btf_type_info(btf_kind(t), btf_vlen(t), false);
 2823		} else if (!has_enum64 && btf_is_enum64(t)) {
 2824			/* replace ENUM64 with a union */
 2825			struct btf_member *m;
 2826
 2827			if (enum64_placeholder_id == 0) {
 2828				enum64_placeholder_id = btf__add_int(btf, "enum64_placeholder", 1, 0);
 2829				if (enum64_placeholder_id < 0)
 2830					return enum64_placeholder_id;
 2831
 2832				t = (struct btf_type *)btf__type_by_id(btf, i);
 2833			}
 2834
 2835			m = btf_members(t);
 2836			vlen = btf_vlen(t);
 2837			t->info = BTF_INFO_ENC(BTF_KIND_UNION, 0, vlen);
 2838			for (j = 0; j < vlen; j++, m++) {
 2839				m->type = enum64_placeholder_id;
 2840				m->offset = 0;
 2841			}
 2842		}
 2843	}
 2844
 2845	return 0;
 2846}
 2847
 2848static bool libbpf_needs_btf(const struct bpf_object *obj)
 2849{
 2850	return obj->efile.btf_maps_shndx >= 0 ||
 2851	       obj->efile.st_ops_shndx >= 0 ||
 2852	       obj->efile.st_ops_link_shndx >= 0 ||
 2853	       obj->nr_extern > 0;
 2854}
 2855
 2856static bool kernel_needs_btf(const struct bpf_object *obj)
 2857{
 2858	return obj->efile.st_ops_shndx >= 0 || obj->efile.st_ops_link_shndx >= 0;
 2859}
 2860
 2861static int bpf_object__init_btf(struct bpf_object *obj,
 2862				Elf_Data *btf_data,
 2863				Elf_Data *btf_ext_data)
 2864{
 2865	int err = -ENOENT;
 2866
 2867	if (btf_data) {
 2868		obj->btf = btf__new(btf_data->d_buf, btf_data->d_size);
 2869		err = libbpf_get_error(obj->btf);
 2870		if (err) {
 2871			obj->btf = NULL;
 2872			pr_warn("Error loading ELF section %s: %d.\n", BTF_ELF_SEC, err);
 2873			goto out;
 2874		}
 2875		/* enforce 8-byte pointers for BPF-targeted BTFs */
 2876		btf__set_pointer_size(obj->btf, 8);
 2877	}
 2878	if (btf_ext_data) {
 2879		struct btf_ext_info *ext_segs[3];
 2880		int seg_num, sec_num;
 2881
 2882		if (!obj->btf) {
 2883			pr_debug("Ignore ELF section %s because its depending ELF section %s is not found.\n",
 2884				 BTF_EXT_ELF_SEC, BTF_ELF_SEC);
 2885			goto out;
 2886		}
 2887		obj->btf_ext = btf_ext__new(btf_ext_data->d_buf, btf_ext_data->d_size);
 2888		err = libbpf_get_error(obj->btf_ext);
 2889		if (err) {
 2890			pr_warn("Error loading ELF section %s: %d. Ignored and continue.\n",
 2891				BTF_EXT_ELF_SEC, err);
 2892			obj->btf_ext = NULL;
 2893			goto out;
 2894		}
 2895
 2896		/* setup .BTF.ext to ELF section mapping */
 2897		ext_segs[0] = &obj->btf_ext->func_info;
 2898		ext_segs[1] = &obj->btf_ext->line_info;
 2899		ext_segs[2] = &obj->btf_ext->core_relo_info;
 2900		for (seg_num = 0; seg_num < ARRAY_SIZE(ext_segs); seg_num++) {
 2901			struct btf_ext_info *seg = ext_segs[seg_num];
 2902			const struct btf_ext_info_sec *sec;
 2903			const char *sec_name;
 2904			Elf_Scn *scn;
 2905
 2906			if (seg->sec_cnt == 0)
 2907				continue;
 2908
 2909			seg->sec_idxs = calloc(seg->sec_cnt, sizeof(*seg->sec_idxs));
 2910			if (!seg->sec_idxs) {
 2911				err = -ENOMEM;
 2912				goto out;
 2913			}
 2914
 2915			sec_num = 0;
 2916			for_each_btf_ext_sec(seg, sec) {
 2917				/* preventively increment index to avoid doing
 2918				 * this before every continue below
 2919				 */
 2920				sec_num++;
 2921
 2922				sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off);
 2923				if (str_is_empty(sec_name))
 2924					continue;
 2925				scn = elf_sec_by_name(obj, sec_name);
 2926				if (!scn)
 2927					continue;
 2928
 2929				seg->sec_idxs[sec_num - 1] = elf_ndxscn(scn);
 2930			}
 2931		}
 2932	}
 2933out:
 2934	if (err && libbpf_needs_btf(obj)) {
 2935		pr_warn("BTF is required, but is missing or corrupted.\n");
 2936		return err;
 2937	}
 2938	return 0;
 2939}
 2940
 2941static int compare_vsi_off(const void *_a, const void *_b)
 2942{
 2943	const struct btf_var_secinfo *a = _a;
 2944	const struct btf_var_secinfo *b = _b;
 2945
 2946	return a->offset - b->offset;
 2947}
 2948
 2949static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf,
 2950			     struct btf_type *t)
 2951{
 2952	__u32 size = 0, i, vars = btf_vlen(t);
 2953	const char *sec_name = btf__name_by_offset(btf, t->name_off);
 2954	struct btf_var_secinfo *vsi;
 2955	bool fixup_offsets = false;
 2956	int err;
 2957
 2958	if (!sec_name) {
 2959		pr_debug("No name found in string section for DATASEC kind.\n");
 2960		return -ENOENT;
 2961	}
 2962
 2963	/* Extern-backing datasecs (.ksyms, .kconfig) have their size and
 2964	 * variable offsets set at the previous step. Further, not every
 2965	 * extern BTF VAR has corresponding ELF symbol preserved, so we skip
 2966	 * all fixups altogether for such sections and go straight to sorting
 2967	 * VARs within their DATASEC.
 2968	 */
 2969	if (strcmp(sec_name, KCONFIG_SEC) == 0 || strcmp(sec_name, KSYMS_SEC) == 0)
 2970		goto sort_vars;
 2971
 2972	/* Clang leaves DATASEC size and VAR offsets as zeroes, so we need to
 2973	 * fix this up. But BPF static linker already fixes this up and fills
 2974	 * all the sizes and offsets during static linking. So this step has
 2975	 * to be optional. But the STV_HIDDEN handling is non-optional for any
 2976	 * non-extern DATASEC, so the variable fixup loop below handles both
 2977	 * functions at the same time, paying the cost of BTF VAR <-> ELF
 2978	 * symbol matching just once.
 2979	 */
 2980	if (t->size == 0) {
 2981		err = find_elf_sec_sz(obj, sec_name, &size);
 2982		if (err || !size) {
 2983			pr_debug("sec '%s': failed to determine size from ELF: size %u, err %d\n",
 2984				 sec_name, size, err);
 2985			return -ENOENT;
 2986		}
 2987
 2988		t->size = size;
 2989		fixup_offsets = true;
 2990	}
 2991
 2992	for (i = 0, vsi = btf_var_secinfos(t); i < vars; i++, vsi++) {
 2993		const struct btf_type *t_var;
 2994		struct btf_var *var;
 2995		const char *var_name;
 2996		Elf64_Sym *sym;
 2997
 2998		t_var = btf__type_by_id(btf, vsi->type);
 2999		if (!t_var || !btf_is_var(t_var)) {
 3000			pr_debug("sec '%s': unexpected non-VAR type found\n", sec_name);
 3001			return -EINVAL;
 3002		}
 3003
 3004		var = btf_var(t_var);
 3005		if (var->linkage == BTF_VAR_STATIC || var->linkage == BTF_VAR_GLOBAL_EXTERN)
 3006			continue;
 3007
 3008		var_name = btf__name_by_offset(btf, t_var->name_off);
 3009		if (!var_name) {
 3010			pr_debug("sec '%s': failed to find name of DATASEC's member #%d\n",
 3011				 sec_name, i);
 3012			return -ENOENT;
 3013		}
 3014
 3015		sym = find_elf_var_sym(obj, var_name);
 3016		if (IS_ERR(sym)) {
 3017			pr_debug("sec '%s': failed to find ELF symbol for VAR '%s'\n",
 3018				 sec_name, var_name);
 3019			return -ENOENT;
 3020		}
 3021
 3022		if (fixup_offsets)
 3023			vsi->offset = sym->st_value;
 3024
 3025		/* if variable is a global/weak symbol, but has restricted
 3026		 * (STV_HIDDEN or STV_INTERNAL) visibility, mark its BTF VAR
 3027		 * as static. This follows similar logic for functions (BPF
 3028		 * subprogs) and influences libbpf's further decisions about
 3029		 * whether to make global data BPF array maps as
 3030		 * BPF_F_MMAPABLE.
 3031		 */
 3032		if (ELF64_ST_VISIBILITY(sym->st_other) == STV_HIDDEN
 3033		    || ELF64_ST_VISIBILITY(sym->st_other) == STV_INTERNAL)
 3034			var->linkage = BTF_VAR_STATIC;
 3035	}
 3036
 3037sort_vars:
 3038	qsort(btf_var_secinfos(t), vars, sizeof(*vsi), compare_vsi_off);
 3039	return 0;
 3040}
 3041
 3042static int bpf_object_fixup_btf(struct bpf_object *obj)
 3043{
 3044	int i, n, err = 0;
 3045
 3046	if (!obj->btf)
 3047		return 0;
 3048
 3049	n = btf__type_cnt(obj->btf);
 3050	for (i = 1; i < n; i++) {
 3051		struct btf_type *t = btf_type_by_id(obj->btf, i);
 3052
 3053		/* Loader needs to fix up some of the things compiler
 3054		 * couldn't get its hands on while emitting BTF. This
 3055		 * is section size and global variable offset. We use
 3056		 * the info from the ELF itself for this purpose.
 3057		 */
 3058		if (btf_is_datasec(t)) {
 3059			err = btf_fixup_datasec(obj, obj->btf, t);
 3060			if (err)
 3061				return err;
 3062		}
 3063	}
 3064
 3065	return 0;
 3066}
 3067
 3068static bool prog_needs_vmlinux_btf(struct bpf_program *prog)
 3069{
 3070	if (prog->type == BPF_PROG_TYPE_STRUCT_OPS ||
 3071	    prog->type == BPF_PROG_TYPE_LSM)
 3072		return true;
 3073
 3074	/* BPF_PROG_TYPE_TRACING programs which do not attach to other programs
 3075	 * also need vmlinux BTF
 3076	 */
 3077	if (prog->type == BPF_PROG_TYPE_TRACING && !prog->attach_prog_fd)
 3078		return true;
 3079
 3080	return false;
 3081}
 3082
 3083static bool map_needs_vmlinux_btf(struct bpf_map *map)
 3084{
 3085	return bpf_map__is_struct_ops(map);
 3086}
 3087
 3088static bool obj_needs_vmlinux_btf(const struct bpf_object *obj)
 3089{
 3090	struct bpf_program *prog;
 3091	struct bpf_map *map;
 3092	int i;
 3093
 3094	/* CO-RE relocations need kernel BTF, only when btf_custom_path
 3095	 * is not specified
 3096	 */
 3097	if (obj->btf_ext && obj->btf_ext->core_relo_info.len && !obj->btf_custom_path)
 3098		return true;
 3099
 3100	/* Support for typed ksyms needs kernel BTF */
 3101	for (i = 0; i < obj->nr_extern; i++) {
 3102		const struct extern_desc *ext;
 3103
 3104		ext = &obj->externs[i];
 3105		if (ext->type == EXT_KSYM && ext->ksym.type_id)
 3106			return true;
 3107	}
 3108
 3109	bpf_object__for_each_program(prog, obj) {
 3110		if (!prog->autoload)
 3111			continue;
 3112		if (prog_needs_vmlinux_btf(prog))
 3113			return true;
 3114	}
 3115
 3116	bpf_object__for_each_map(map, obj) {
 3117		if (map_needs_vmlinux_btf(map))
 3118			return true;
 3119	}
 3120
 3121	return false;
 3122}
 3123
 3124static int bpf_object__load_vmlinux_btf(struct bpf_object *obj, bool force)
 3125{
 3126	int err;
 3127
 3128	/* btf_vmlinux could be loaded earlier */
 3129	if (obj->btf_vmlinux || obj->gen_loader)
 3130		return 0;
 3131
 3132	if (!force && !obj_needs_vmlinux_btf(obj))
 3133		return 0;
 3134
 3135	obj->btf_vmlinux = btf__load_vmlinux_btf();
 3136	err = libbpf_get_error(obj->btf_vmlinux);
 3137	if (err) {
 3138		pr_warn("Error loading vmlinux BTF: %d\n", err);
 3139		obj->btf_vmlinux = NULL;
 3140		return err;
 3141	}
 3142	return 0;
 3143}
 3144
 3145static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
 3146{
 3147	struct btf *kern_btf = obj->btf;
 3148	bool btf_mandatory, sanitize;
 3149	int i, err = 0;
 3150
 3151	if (!obj->btf)
 3152		return 0;
 3153
 3154	if (!kernel_supports(obj, FEAT_BTF)) {
 3155		if (kernel_needs_btf(obj)) {
 3156			err = -EOPNOTSUPP;
 3157			goto report;
 3158		}
 3159		pr_debug("Kernel doesn't support BTF, skipping uploading it.\n");
 3160		return 0;
 3161	}
 3162
 3163	/* Even though some subprogs are global/weak, user might prefer more
 3164	 * permissive BPF verification process that BPF verifier performs for
 3165	 * static functions, taking into account more context from the caller
 3166	 * functions. In such case, they need to mark such subprogs with
 3167	 * __attribute__((visibility("hidden"))) and libbpf will adjust
 3168	 * corresponding FUNC BTF type to be marked as static and trigger more
 3169	 * involved BPF verification process.
 3170	 */
 3171	for (i = 0; i < obj->nr_programs; i++) {
 3172		struct bpf_program *prog = &obj->programs[i];
 3173		struct btf_type *t;
 3174		const char *name;
 3175		int j, n;
 3176
 3177		if (!prog->mark_btf_static || !prog_is_subprog(obj, prog))
 3178			continue;
 3179
 3180		n = btf__type_cnt(obj->btf);
 3181		for (j = 1; j < n; j++) {
 3182			t = btf_type_by_id(obj->btf, j);
 3183			if (!btf_is_func(t) || btf_func_linkage(t) != BTF_FUNC_GLOBAL)
 3184				continue;
 3185
 3186			name = btf__str_by_offset(obj->btf, t->name_off);
 3187			if (strcmp(name, prog->name) != 0)
 3188				continue;
 3189
 3190			t->info = btf_type_info(BTF_KIND_FUNC, BTF_FUNC_STATIC, 0);
 3191			break;
 3192		}
 3193	}
 3194
 3195	sanitize = btf_needs_sanitization(obj);
 3196	if (sanitize) {
 3197		const void *raw_data;
 3198		__u32 sz;
 3199
 3200		/* clone BTF to sanitize a copy and leave the original intact */
 3201		raw_data = btf__raw_data(obj->btf, &sz);
 3202		kern_btf = btf__new(raw_data, sz);
 3203		err = libbpf_get_error(kern_btf);
 3204		if (err)
 3205			return err;
 3206
 3207		/* enforce 8-byte pointers for BPF-targeted BTFs */
 3208		btf__set_pointer_size(obj->btf, 8);
 3209		err = bpf_object__sanitize_btf(obj, kern_btf);
 3210		if (err)
 3211			return err;
 3212	}
 3213
 3214	if (obj->gen_loader) {
 3215		__u32 raw_size = 0;
 3216		const void *raw_data = btf__raw_data(kern_btf, &raw_size);
 3217
 3218		if (!raw_data)
 3219			return -ENOMEM;
 3220		bpf_gen__load_btf(obj->gen_loader, raw_data, raw_size);
 3221		/* Pretend to have valid FD to pass various fd >= 0 checks.
 3222		 * This fd == 0 will not be used with any syscall and will be reset to -1 eventually.
 3223		 */
 3224		btf__set_fd(kern_btf, 0);
 3225	} else {
 3226		/* currently BPF_BTF_LOAD only supports log_level 1 */
 3227		err = btf_load_into_kernel(kern_btf, obj->log_buf, obj->log_size,
 3228					   obj->log_level ? 1 : 0);
 3229	}
 3230	if (sanitize) {
 3231		if (!err) {
 3232			/* move fd to libbpf's BTF */
 3233			btf__set_fd(obj->btf, btf__fd(kern_btf));
 3234			btf__set_fd(kern_btf, -1);
 3235		}
 3236		btf__free(kern_btf);
 3237	}
 3238report:
 3239	if (err) {
 3240		btf_mandatory = kernel_needs_btf(obj);
 3241		pr_warn("Error loading .BTF into kernel: %d. %s\n", err,
 3242			btf_mandatory ? "BTF is mandatory, can't proceed."
 3243				      : "BTF is optional, ignoring.");
 3244		if (!btf_mandatory)
 3245			err = 0;
 3246	}
 3247	return err;
 3248}
 3249
 3250static const char *elf_sym_str(const struct bpf_object *obj, size_t off)
 3251{
 3252	const char *name;
 3253
 3254	name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, off);
 3255	if (!name) {
 3256		pr_warn("elf: failed to get section name string at offset %zu from %s: %s\n",
 3257			off, obj->path, elf_errmsg(-1));
 3258		return NULL;
 3259	}
 3260
 3261	return name;
 3262}
 3263
 3264static const char *elf_sec_str(const struct bpf_object *obj, size_t off)
 3265{
 3266	const char *name;
 3267
 3268	name = elf_strptr(obj->efile.elf, obj->efile.shstrndx, off);
 3269	if (!name) {
 3270		pr_warn("elf: failed to get section name string at offset %zu from %s: %s\n",
 3271			off, obj->path, elf_errmsg(-1));
 3272		return NULL;
 3273	}
 3274
 3275	return name;
 3276}
 3277
 3278static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx)
 3279{
 3280	Elf_Scn *scn;
 3281
 3282	scn = elf_getscn(obj->efile.elf, idx);
 3283	if (!scn) {
 3284		pr_warn("elf: failed to get section(%zu) from %s: %s\n",
 3285			idx, obj->path, elf_errmsg(-1));
 3286		return NULL;
 3287	}
 3288	return scn;
 3289}
 3290
 3291static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name)
 3292{
 3293	Elf_Scn *scn = NULL;
 3294	Elf *elf = obj->efile.elf;
 3295	const char *sec_name;
 3296
 3297	while ((scn = elf_nextscn(elf, scn)) != NULL) {
 3298		sec_name = elf_sec_name(obj, scn);
 3299		if (!sec_name)
 3300			return NULL;
 3301
 3302		if (strcmp(sec_name, name) != 0)
 3303			continue;
 3304
 3305		return scn;
 3306	}
 3307	return NULL;
 3308}
 3309
 3310static Elf64_Shdr *elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn)
 3311{
 3312	Elf64_Shdr *shdr;
 3313
 3314	if (!scn)
 3315		return NULL;
 3316
 3317	shdr = elf64_getshdr(scn);
 3318	if (!shdr) {
 3319		pr_warn("elf: failed to get section(%zu) header from %s: %s\n",
 3320			elf_ndxscn(scn), obj->path, elf_errmsg(-1));
 3321		return NULL;
 3322	}
 3323
 3324	return shdr;
 3325}
 3326
 3327static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn)
 3328{
 3329	const char *name;
 3330	Elf64_Shdr *sh;
 3331
 3332	if (!scn)
 3333		return NULL;
 3334
 3335	sh = elf_sec_hdr(obj, scn);
 3336	if (!sh)
 3337		return NULL;
 3338
 3339	name = elf_sec_str(obj, sh->sh_name);
 3340	if (!name) {
 3341		pr_warn("elf: failed to get section(%zu) name from %s: %s\n",
 3342			elf_ndxscn(scn), obj->path, elf_errmsg(-1));
 3343		return NULL;
 3344	}
 3345
 3346	return name;
 3347}
 3348
 3349static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn)
 3350{
 3351	Elf_Data *data;
 3352
 3353	if (!scn)
 3354		return NULL;
 3355
 3356	data = elf_getdata(scn, 0);
 3357	if (!data) {
 3358		pr_warn("elf: failed to get section(%zu) %s data from %s: %s\n",
 3359			elf_ndxscn(scn), elf_sec_name(obj, scn) ?: "<?>",
 3360			obj->path, elf_errmsg(-1));
 3361		return NULL;
 3362	}
 3363
 3364	return data;
 3365}
 3366
 3367static Elf64_Sym *elf_sym_by_idx(const struct bpf_object *obj, size_t idx)
 3368{
 3369	if (idx >= obj->efile.symbols->d_size / sizeof(Elf64_Sym))
 3370		return NULL;
 3371
 3372	return (Elf64_Sym *)obj->efile.symbols->d_buf + idx;
 3373}
 3374
 3375static Elf64_Rel *elf_rel_by_idx(Elf_Data *data, size_t idx)
 3376{
 3377	if (idx >= data->d_size / sizeof(Elf64_Rel))
 3378		return NULL;
 3379
 3380	return (Elf64_Rel *)data->d_buf + idx;
 3381}
 3382
 3383static bool is_sec_name_dwarf(const char *name)
 3384{
 3385	/* approximation, but the actual list is too long */
 3386	return str_has_pfx(name, ".debug_");
 3387}
 3388
 3389static bool ignore_elf_section(Elf64_Shdr *hdr, const char *name)
 3390{
 3391	/* no special handling of .strtab */
 3392	if (hdr->sh_type == SHT_STRTAB)
 3393		return true;
 3394
 3395	/* ignore .llvm_addrsig section as well */
 3396	if (hdr->sh_type == SHT_LLVM_ADDRSIG)
 3397		return true;
 3398
 3399	/* no subprograms will lead to an empty .text section, ignore it */
 3400	if (hdr->sh_type == SHT_PROGBITS && hdr->sh_size == 0 &&
 3401	    strcmp(name, ".text") == 0)
 3402		return true;
 3403
 3404	/* DWARF sections */
 3405	if (is_sec_name_dwarf(name))
 3406		return true;
 3407
 3408	if (str_has_pfx(name, ".rel")) {
 3409		name += sizeof(".rel") - 1;
 3410		/* DWARF section relocations */
 3411		if (is_sec_name_dwarf(name))
 3412			return true;
 3413
 3414		/* .BTF and .BTF.ext don't need relocations */
 3415		if (strcmp(name, BTF_ELF_SEC) == 0 ||
 3416		    strcmp(name, BTF_EXT_ELF_SEC) == 0)
 3417			return true;
 3418	}
 3419
 3420	return false;
 3421}
 3422
 3423static int cmp_progs(const void *_a, const void *_b)
 3424{
 3425	const struct bpf_program *a = _a;
 3426	const struct bpf_program *b = _b;
 3427
 3428	if (a->sec_idx != b->sec_idx)
 3429		return a->sec_idx < b->sec_idx ? -1 : 1;
 3430
 3431	/* sec_insn_off can't be the same within the section */
 3432	return a->sec_insn_off < b->sec_insn_off ? -1 : 1;
 3433}
 3434
 3435static int bpf_object__elf_collect(struct bpf_object *obj)
 3436{
 3437	struct elf_sec_desc *sec_desc;
 3438	Elf *elf = obj->efile.elf;
 3439	Elf_Data *btf_ext_data = NULL;
 3440	Elf_Data *btf_data = NULL;
 3441	int idx = 0, err = 0;
 3442	const char *name;
 3443	Elf_Data *data;
 3444	Elf_Scn *scn;
 3445	Elf64_Shdr *sh;
 3446
 3447	/* ELF section indices are 0-based, but sec #0 is special "invalid"
 3448	 * section. Since section count retrieved by elf_getshdrnum() does
 3449	 * include sec #0, it is already the necessary size of an array to keep
 3450	 * all the sections.
 3451	 */
 3452	if (elf_getshdrnum(obj->efile.elf, &obj->efile.sec_cnt)) {
 3453		pr_warn("elf: failed to get the number of sections for %s: %s\n",
 3454			obj->path, elf_errmsg(-1));
 3455		return -LIBBPF_ERRNO__FORMAT;
 3456	}
 3457	obj->efile.secs = calloc(obj->efile.sec_cnt, sizeof(*obj->efile.secs));
 3458	if (!obj->efile.secs)
 3459		return -ENOMEM;
 3460
 3461	/* a bunch of ELF parsing functionality depends on processing symbols,
 3462	 * so do the first pass and find the symbol table
 3463	 */
 3464	scn = NULL;
 3465	while ((scn = elf_nextscn(elf, scn)) != NULL) {
 3466		sh = elf_sec_hdr(obj, scn);
 3467		if (!sh)
 3468			return -LIBBPF_ERRNO__FORMAT;
 3469
 3470		if (sh->sh_type == SHT_SYMTAB) {
 3471			if (obj->efile.symbols) {
 3472				pr_warn("elf: multiple symbol tables in %s\n", obj->path);
 3473				return -LIBBPF_ERRNO__FORMAT;
 3474			}
 3475
 3476			data = elf_sec_data(obj, scn);
 3477			if (!data)
 3478				return -LIBBPF_ERRNO__FORMAT;
 3479
 3480			idx = elf_ndxscn(scn);
 3481
 3482			obj->efile.symbols = data;
 3483			obj->efile.symbols_shndx = idx;
 3484			obj->efile.strtabidx = sh->sh_link;
 3485		}
 3486	}
 3487
 3488	if (!obj->efile.symbols) {
 3489		pr_warn("elf: couldn't find symbol table in %s, stripped object file?\n",
 3490			obj->path);
 3491		return -ENOENT;
 3492	}
 3493
 3494	scn = NULL;
 3495	while ((scn = elf_nextscn(elf, scn)) != NULL) {
 3496		idx = elf_ndxscn(scn);
 3497		sec_desc = &obj->efile.secs[idx];
 3498
 3499		sh = elf_sec_hdr(obj, scn);
 3500		if (!sh)
 3501			return -LIBBPF_ERRNO__FORMAT;
 3502
 3503		name = elf_sec_str(obj, sh->sh_name);
 3504		if (!name)
 3505			return -LIBBPF_ERRNO__FORMAT;
 3506
 3507		if (ignore_elf_section(sh, name))
 3508			continue;
 3509
 3510		data = elf_sec_data(obj, scn);
 3511		if (!data)
 3512			return -LIBBPF_ERRNO__FORMAT;
 3513
 3514		pr_debug("elf: section(%d) %s, size %ld, link %d, flags %lx, type=%d\n",
 3515			 idx, name, (unsigned long)data->d_size,
 3516			 (int)sh->sh_link, (unsigned long)sh->sh_flags,
 3517			 (int)sh->sh_type);
 3518
 3519		if (strcmp(name, "license") == 0) {
 3520			err = bpf_object__init_license(obj, data->d_buf, data->d_size);
 3521			if (err)
 3522				return err;
 3523		} else if (strcmp(name, "version") == 0) {
 3524			err = bpf_object__init_kversion(obj, data->d_buf, data->d_size);
 3525			if (err)
 3526				return err;
 3527		} else if (strcmp(name, "maps") == 0) {
 3528			pr_warn("elf: legacy map definitions in 'maps' section are not supported by libbpf v1.0+\n");
 3529			return -ENOTSUP;
 3530		} else if (strcmp(name, MAPS_ELF_SEC) == 0) {
 3531			obj->efile.btf_maps_shndx = idx;
 3532		} else if (strcmp(name, BTF_ELF_SEC) == 0) {
 3533			if (sh->sh_type != SHT_PROGBITS)
 3534				return -LIBBPF_ERRNO__FORMAT;
 3535			btf_data = data;
 3536		} else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) {
 3537			if (sh->sh_type != SHT_PROGBITS)
 3538				return -LIBBPF_ERRNO__FORMAT;
 3539			btf_ext_data = data;
 3540		} else if (sh->sh_type == SHT_SYMTAB) {
 3541			/* already processed during the first pass above */
 3542		} else if (sh->sh_type == SHT_PROGBITS && data->d_size > 0) {
 3543			if (sh->sh_flags & SHF_EXECINSTR) {
 3544				if (strcmp(name, ".text") == 0)
 3545					obj->efile.text_shndx = idx;
 3546				err = bpf_object__add_programs(obj, data, name, idx);
 3547				if (err)
 3548					return err;
 3549			} else if (strcmp(name, DATA_SEC) == 0 ||
 3550				   str_has_pfx(name, DATA_SEC ".")) {
 3551				sec_desc->sec_type = SEC_DATA;
 3552				sec_desc->shdr = sh;
 3553				sec_desc->data = data;
 3554			} else if (strcmp(name, RODATA_SEC) == 0 ||
 3555				   str_has_pfx(name, RODATA_SEC ".")) {
 3556				sec_desc->sec_type = SEC_RODATA;
 3557				sec_desc->shdr = sh;
 3558				sec_desc->data = data;
 3559			} else if (strcmp(name, STRUCT_OPS_SEC) == 0) {
 3560				obj->efile.st_ops_data = data;
 3561				obj->efile.st_ops_shndx = idx;
 3562			} else if (strcmp(name, STRUCT_OPS_LINK_SEC) == 0) {
 3563				obj->efile.st_ops_link_data = data;
 3564				obj->efile.st_ops_link_shndx = idx;
 3565			} else {
 3566				pr_info("elf: skipping unrecognized data section(%d) %s\n",
 3567					idx, name);
 3568			}
 3569		} else if (sh->sh_type == SHT_REL) {
 3570			int targ_sec_idx = sh->sh_info; /* points to other section */
 3571
 3572			if (sh->sh_entsize != sizeof(Elf64_Rel) ||
 3573			    targ_sec_idx >= obj->efile.sec_cnt)
 3574				return -LIBBPF_ERRNO__FORMAT;
 3575
 3576			/* Only do relo for section with exec instructions */
 3577			if (!section_have_execinstr(obj, targ_sec_idx) &&
 3578			    strcmp(name, ".rel" STRUCT_OPS_SEC) &&
 3579			    strcmp(name, ".rel" STRUCT_OPS_LINK_SEC) &&
 3580			    strcmp(name, ".rel" MAPS_ELF_SEC)) {
 3581				pr_info("elf: skipping relo section(%d) %s for section(%d) %s\n",
 3582					idx, name, targ_sec_idx,
 3583					elf_sec_name(obj, elf_sec_by_idx(obj, targ_sec_idx)) ?: "<?>");
 3584				continue;
 3585			}
 3586
 3587			sec_desc->sec_type = SEC_RELO;
 3588			sec_desc->shdr = sh;
 3589			sec_desc->data = data;
 3590		} else if (sh->sh_type == SHT_NOBITS && (strcmp(name, BSS_SEC) == 0 ||
 3591							 str_has_pfx(name, BSS_SEC "."))) {
 3592			sec_desc->sec_type = SEC_BSS;
 3593			sec_desc->shdr = sh;
 3594			sec_desc->data = data;
 3595		} else {
 3596			pr_info("elf: skipping section(%d) %s (size %zu)\n", idx, name,
 3597				(size_t)sh->sh_size);
 3598		}
 3599	}
 3600
 3601	if (!obj->efile.strtabidx || obj->efile.strtabidx > idx) {
 3602		pr_warn("elf: symbol strings section missing or invalid in %s\n", obj->path);
 3603		return -LIBBPF_ERRNO__FORMAT;
 3604	}
 3605
 3606	/* sort BPF programs by section name and in-section instruction offset
 3607	 * for faster search
 3608	 */
 3609	if (obj->nr_programs)
 3610		qsort(obj->programs, obj->nr_programs, sizeof(*obj->programs), cmp_progs);
 3611
 3612	return bpf_object__init_btf(obj, btf_data, btf_ext_data);
 3613}
 3614
 3615static bool sym_is_extern(const Elf64_Sym *sym)
 3616{
 3617	int bind = ELF64_ST_BIND(sym->st_info);
 3618	/* externs are symbols w/ type=NOTYPE, bind=GLOBAL|WEAK, section=UND */
 3619	return sym->st_shndx == SHN_UNDEF &&
 3620	       (bind == STB_GLOBAL || bind == STB_WEAK) &&
 3621	       ELF64_ST_TYPE(sym->st_info) == STT_NOTYPE;
 3622}
 3623
 3624static bool sym_is_subprog(const Elf64_Sym *sym, int text_shndx)
 3625{
 3626	int bind = ELF64_ST_BIND(sym->st_info);
 3627	int type = ELF64_ST_TYPE(sym->st_info);
 3628
 3629	/* in .text section */
 3630	if (sym->st_shndx != text_shndx)
 3631		return false;
 3632
 3633	/* local function */
 3634	if (bind == STB_LOCAL && type == STT_SECTION)
 3635		return true;
 3636
 3637	/* global function */
 3638	return bind == STB_GLOBAL && type == STT_FUNC;
 3639}
 3640
 3641static int find_extern_btf_id(const struct btf *btf, const char *ext_name)
 3642{
 3643	const struct btf_type *t;
 3644	const char *tname;
 3645	int i, n;
 3646
 3647	if (!btf)
 3648		return -ESRCH;
 3649
 3650	n = btf__type_cnt(btf);
 3651	for (i = 1; i < n; i++) {
 3652		t = btf__type_by_id(btf, i);
 3653
 3654		if (!btf_is_var(t) && !btf_is_func(t))
 3655			continue;
 3656
 3657		tname = btf__name_by_offset(btf, t->name_off);
 3658		if (strcmp(tname, ext_name))
 3659			continue;
 3660
 3661		if (btf_is_var(t) &&
 3662		    btf_var(t)->linkage != BTF_VAR_GLOBAL_EXTERN)
 3663			return -EINVAL;
 3664
 3665		if (btf_is_func(t) && btf_func_linkage(t) != BTF_FUNC_EXTERN)
 3666			return -EINVAL;
 3667
 3668		return i;
 3669	}
 3670
 3671	return -ENOENT;
 3672}
 3673
 3674static int find_extern_sec_btf_id(struct btf *btf, int ext_btf_id) {
 3675	const struct btf_var_secinfo *vs;
 3676	const struct btf_type *t;
 3677	int i, j, n;
 3678
 3679	if (!btf)
 3680		return -ESRCH;
 3681
 3682	n = btf__type_cnt(btf);
 3683	for (i = 1; i < n; i++) {
 3684		t = btf__type_by_id(btf, i);
 3685
 3686		if (!btf_is_datasec(t))
 3687			continue;
 3688
 3689		vs = btf_var_secinfos(t);
 3690		for (j = 0; j < btf_vlen(t); j++, vs++) {
 3691			if (vs->type == ext_btf_id)
 3692				return i;
 3693		}
 3694	}
 3695
 3696	return -ENOENT;
 3697}
 3698
 3699static enum kcfg_type find_kcfg_type(const struct btf *btf, int id,
 3700				     bool *is_signed)
 3701{
 3702	const struct btf_type *t;
 3703	const char *name;
 3704
 3705	t = skip_mods_and_typedefs(btf, id, NULL);
 3706	name = btf__name_by_offset(btf, t->name_off);
 3707
 3708	if (is_signed)
 3709		*is_signed = false;
 3710	switch (btf_kind(t)) {
 3711	case BTF_KIND_INT: {
 3712		int enc = btf_int_encoding(t);
 3713
 3714		if (enc & BTF_INT_BOOL)
 3715			return t->size == 1 ? KCFG_BOOL : KCFG_UNKNOWN;
 3716		if (is_signed)
 3717			*is_signed = enc & BTF_INT_SIGNED;
 3718		if (t->size == 1)
 3719			return KCFG_CHAR;
 3720		if (t->size < 1 || t->size > 8 || (t->size & (t->size - 1)))
 3721			return KCFG_UNKNOWN;
 3722		return KCFG_INT;
 3723	}
 3724	case BTF_KIND_ENUM:
 3725		if (t->size != 4)
 3726			return KCFG_UNKNOWN;
 3727		if (strcmp(name, "libbpf_tristate"))
 3728			return KCFG_UNKNOWN;
 3729		return KCFG_TRISTATE;
 3730	case BTF_KIND_ENUM64:
 3731		if (strcmp(name, "libbpf_tristate"))
 3732			return KCFG_UNKNOWN;
 3733		return KCFG_TRISTATE;
 3734	case BTF_KIND_ARRAY:
 3735		if (btf_array(t)->nelems == 0)
 3736			return KCFG_UNKNOWN;
 3737		if (find_kcfg_type(btf, btf_array(t)->type, NULL) != KCFG_CHAR)
 3738			return KCFG_UNKNOWN;
 3739		return KCFG_CHAR_ARR;
 3740	default:
 3741		return KCFG_UNKNOWN;
 3742	}
 3743}
 3744
 3745static int cmp_externs(const void *_a, const void *_b)
 3746{
 3747	const struct extern_desc *a = _a;
 3748	const struct extern_desc *b = _b;
 3749
 3750	if (a->type != b->type)
 3751		return a->type < b->type ? -1 : 1;
 3752
 3753	if (a->type == EXT_KCFG) {
 3754		/* descending order by alignment requirements */
 3755		if (a->kcfg.align != b->kcfg.align)
 3756			return a->kcfg.align > b->kcfg.align ? -1 : 1;
 3757		/* ascending order by size, within same alignment class */
 3758		if (a->kcfg.sz != b->kcfg.sz)
 3759			return a->kcfg.sz < b->kcfg.sz ? -1 : 1;
 3760	}
 3761
 3762	/* resolve ties by name */
 3763	return strcmp(a->name, b->name);
 3764}
 3765
 3766static int find_int_btf_id(const struct btf *btf)
 3767{
 3768	const struct btf_type *t;
 3769	int i, n;
 3770
 3771	n = btf__type_cnt(btf);
 3772	for (i = 1; i < n; i++) {
 3773		t = btf__type_by_id(btf, i);
 3774
 3775		if (btf_is_int(t) && btf_int_bits(t) == 32)
 3776			return i;
 3777	}
 3778
 3779	return 0;
 3780}
 3781
 3782static int add_dummy_ksym_var(struct btf *btf)
 3783{
 3784	int i, int_btf_id, sec_btf_id, dummy_var_btf_id;
 3785	const struct btf_var_secinfo *vs;
 3786	const struct btf_type *sec;
 3787
 3788	if (!btf)
 3789		return 0;
 3790
 3791	sec_btf_id = btf__find_by_name_kind(btf, KSYMS_SEC,
 3792					    BTF_KIND_DATASEC);
 3793	if (sec_btf_id < 0)
 3794		return 0;
 3795
 3796	sec = btf__type_by_id(btf, sec_btf_id);
 3797	vs = btf_var_secinfos(sec);
 3798	for (i = 0; i < btf_vlen(sec); i++, vs++) {
 3799		const struct btf_type *vt;
 3800
 3801		vt = btf__type_by_id(btf, vs->type);
 3802		if (btf_is_func(vt))
 3803			break;
 3804	}
 3805
 3806	/* No func in ksyms sec.  No need to add dummy var. */
 3807	if (i == btf_vlen(sec))
 3808		return 0;
 3809
 3810	int_btf_id = find_int_btf_id(btf);
 3811	dummy_var_btf_id = btf__add_var(btf,
 3812					"dummy_ksym",
 3813					BTF_VAR_GLOBAL_ALLOCATED,
 3814					int_btf_id);
 3815	if (dummy_var_btf_id < 0)
 3816		pr_warn("cannot create a dummy_ksym var\n");
 3817
 3818	return dummy_var_btf_id;
 3819}
 3820
 3821static int bpf_object__collect_externs(struct bpf_object *obj)
 3822{
 3823	struct btf_type *sec, *kcfg_sec = NULL, *ksym_sec = NULL;
 3824	const struct btf_type *t;
 3825	struct extern_desc *ext;
 3826	int i, n, off, dummy_var_btf_id;
 3827	const char *ext_name, *sec_name;
 3828	size_t ext_essent_len;
 3829	Elf_Scn *scn;
 3830	Elf64_Shdr *sh;
 3831
 3832	if (!obj->efile.symbols)
 3833		return 0;
 3834
 3835	scn = elf_sec_by_idx(obj, obj->efile.symbols_shndx);
 3836	sh = elf_sec_hdr(obj, scn);
 3837	if (!sh || sh->sh_entsize != sizeof(Elf64_Sym))
 3838		return -LIBBPF_ERRNO__FORMAT;
 3839
 3840	dummy_var_btf_id = add_dummy_ksym_var(obj->btf);
 3841	if (dummy_var_btf_id < 0)
 3842		return dummy_var_btf_id;
 3843
 3844	n = sh->sh_size / sh->sh_entsize;
 3845	pr_debug("looking for externs among %d symbols...\n", n);
 3846
 3847	for (i = 0; i < n; i++) {
 3848		Elf64_Sym *sym = elf_sym_by_idx(obj, i);
 3849
 3850		if (!sym)
 3851			return -LIBBPF_ERRNO__FORMAT;
 3852		if (!sym_is_extern(sym))
 3853			continue;
 3854		ext_name = elf_sym_str(obj, sym->st_name);
 3855		if (!ext_name || !ext_name[0])
 3856			continue;
 3857
 3858		ext = obj->externs;
 3859		ext = libbpf_reallocarray(ext, obj->nr_extern + 1, sizeof(*ext));
 3860		if (!ext)
 3861			return -ENOMEM;
 3862		obj->externs = ext;
 3863		ext = &ext[obj->nr_extern];
 3864		memset(ext, 0, sizeof(*ext));
 3865		obj->nr_extern++;
 3866
 3867		ext->btf_id = find_extern_btf_id(obj->btf, ext_name);
 3868		if (ext->btf_id <= 0) {
 3869			pr_warn("failed to find BTF for extern '%s': %d\n",
 3870				ext_name, ext->btf_id);
 3871			return ext->btf_id;
 3872		}
 3873		t = btf__type_by_id(obj->btf, ext->btf_id);
 3874		ext->name = btf__name_by_offset(obj->btf, t->name_off);
 3875		ext->sym_idx = i;
 3876		ext->is_weak = ELF64_ST_BIND(sym->st_info) == STB_WEAK;
 3877
 3878		ext_essent_len = bpf_core_essential_name_len(ext->name);
 3879		ext->essent_name = NULL;
 3880		if (ext_essent_len != strlen(ext->name)) {
 3881			ext->essent_name = strndup(ext->name, ext_essent_len);
 3882			if (!ext->essent_name)
 3883				return -ENOMEM;
 3884		}
 3885
 3886		ext->sec_btf_id = find_extern_sec_btf_id(obj->btf, ext->btf_id);
 3887		if (ext->sec_btf_id <= 0) {
 3888			pr_warn("failed to find BTF for extern '%s' [%d] section: %d\n",
 3889				ext_name, ext->btf_id, ext->sec_btf_id);
 3890			return ext->sec_btf_id;
 3891		}
 3892		sec = (void *)btf__type_by_id(obj->btf, ext->sec_btf_id);
 3893		sec_name = btf__name_by_offset(obj->btf, sec->name_off);
 3894
 3895		if (strcmp(sec_name, KCONFIG_SEC) == 0) {
 3896			if (btf_is_func(t)) {
 3897				pr_warn("extern function %s is unsupported under %s section\n",
 3898					ext->name, KCONFIG_SEC);
 3899				return -ENOTSUP;
 3900			}
 3901			kcfg_sec = sec;
 3902			ext->type = EXT_KCFG;
 3903			ext->kcfg.sz = btf__resolve_size(obj->btf, t->type);
 3904			if (ext->kcfg.sz <= 0) {
 3905				pr_warn("failed to resolve size of extern (kcfg) '%s': %d\n",
 3906					ext_name, ext->kcfg.sz);
 3907				return ext->kcfg.sz;
 3908			}
 3909			ext->kcfg.align = btf__align_of(obj->btf, t->type);
 3910			if (ext->kcfg.align <= 0) {
 3911				pr_warn("failed to determine alignment of extern (kcfg) '%s': %d\n",
 3912					ext_name, ext->kcfg.align);
 3913				return -EINVAL;
 3914			}
 3915			ext->kcfg.type = find_kcfg_type(obj->btf, t->type,
 3916							&ext->kcfg.is_signed);
 3917			if (ext->kcfg.type == KCFG_UNKNOWN) {
 3918				pr_warn("extern (kcfg) '%s': type is unsupported\n", ext_name);
 3919				return -ENOTSUP;
 3920			}
 3921		} else if (strcmp(sec_name, KSYMS_SEC) == 0) {
 3922			ksym_sec = sec;
 3923			ext->type = EXT_KSYM;
 3924			skip_mods_and_typedefs(obj->btf, t->type,
 3925					       &ext->ksym.type_id);
 3926		} else {
 3927			pr_warn("unrecognized extern section '%s'\n", sec_name);
 3928			return -ENOTSUP;
 3929		}
 3930	}
 3931	pr_debug("collected %d externs total\n", obj->nr_extern);
 3932
 3933	if (!obj->nr_extern)
 3934		return 0;
 3935
 3936	/* sort externs by type, for kcfg ones also by (align, size, name) */
 3937	qsort(obj->externs, obj->nr_extern, sizeof(*ext), cmp_externs);
 3938
 3939	/* for .ksyms section, we need to turn all externs into allocated
 3940	 * variables in BTF to pass kernel verification; we do this by
 3941	 * pretending that each extern is a 8-byte variable
 3942	 */
 3943	if (ksym_sec) {
 3944		/* find existing 4-byte integer type in BTF to use for fake
 3945		 * extern variables in DATASEC
 3946		 */
 3947		int int_btf_id = find_int_btf_id(obj->btf);
 3948		/* For extern function, a dummy_var added earlier
 3949		 * will be used to replace the vs->type and
 3950		 * its name string will be used to refill
 3951		 * the missing param's name.
 3952		 */
 3953		const struct btf_type *dummy_var;
 3954
 3955		dummy_var = btf__type_by_id(obj->btf, dummy_var_btf_id);
 3956		for (i = 0; i < obj->nr_extern; i++) {
 3957			ext = &obj->externs[i];
 3958			if (ext->type != EXT_KSYM)
 3959				continue;
 3960			pr_debug("extern (ksym) #%d: symbol %d, name %s\n",
 3961				 i, ext->sym_idx, ext->name);
 3962		}
 3963
 3964		sec = ksym_sec;
 3965		n = btf_vlen(sec);
 3966		for (i = 0, off = 0; i < n; i++, off += sizeof(int)) {
 3967			struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i;
 3968			struct btf_type *vt;
 3969
 3970			vt = (void *)btf__type_by_id(obj->btf, vs->type);
 3971			ext_name = btf__name_by_offset(obj->btf, vt->name_off);
 3972			ext = find_extern_by_name(obj, ext_name);
 3973			if (!ext) {
 3974				pr_warn("failed to find extern definition for BTF %s '%s'\n",
 3975					btf_kind_str(vt), ext_name);
 3976				return -ESRCH;
 3977			}
 3978			if (btf_is_func(vt)) {
 3979				const struct btf_type *func_proto;
 3980				struct btf_param *param;
 3981				int j;
 3982
 3983				func_proto = btf__type_by_id(obj->btf,
 3984							     vt->type);
 3985				param = btf_params(func_proto);
 3986				/* Reuse the dummy_var string if the
 3987				 * func proto does not have param name.
 3988				 */
 3989				for (j = 0; j < btf_vlen(func_proto); j++)
 3990					if (param[j].type && !param[j].name_off)
 3991						param[j].name_off =
 3992							dummy_var->name_off;
 3993				vs->type = dummy_var_btf_id;
 3994				vt->info &= ~0xffff;
 3995				vt->info |= BTF_FUNC_GLOBAL;
 3996			} else {
 3997				btf_var(vt)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
 3998				vt->type = int_btf_id;
 3999			}
 4000			vs->offset = off;
 4001			vs->size = sizeof(int);
 4002		}
 4003		sec->size = off;
 4004	}
 4005
 4006	if (kcfg_sec) {
 4007		sec = kcfg_sec;
 4008		/* for kcfg externs calculate their offsets within a .kconfig map */
 4009		off = 0;
 4010		for (i = 0; i < obj->nr_extern; i++) {
 4011			ext = &obj->externs[i];
 4012			if (ext->type != EXT_KCFG)
 4013				continue;
 4014
 4015			ext->kcfg.data_off = roundup(off, ext->kcfg.align);
 4016			off = ext->kcfg.data_off + ext->kcfg.sz;
 4017			pr_debug("extern (kcfg) #%d: symbol %d, off %u, name %s\n",
 4018				 i, ext->sym_idx, ext->kcfg.data_off, ext->name);
 4019		}
 4020		sec->size = off;
 4021		n = btf_vlen(sec);
 4022		for (i = 0; i < n; i++) {
 4023			struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i;
 4024
 4025			t = btf__type_by_id(obj->btf, vs->type);
 4026			ext_name = btf__name_by_offset(obj->btf, t->name_off);
 4027			ext = find_extern_by_name(obj, ext_name);
 4028			if (!ext) {
 4029				pr_warn("failed to find extern definition for BTF var '%s'\n",
 4030					ext_name);
 4031				return -ESRCH;
 4032			}
 4033			btf_var(t)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
 4034			vs->offset = ext->kcfg.data_off;
 4035		}
 4036	}
 4037	return 0;
 4038}
 4039
 4040static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog)
 4041{
 4042	return prog->sec_idx == obj->efile.text_shndx && obj->nr_programs > 1;
 4043}
 4044
 4045struct bpf_program *
 4046bpf_object__find_program_by_name(const struct bpf_object *obj,
 4047				 const char *name)
 4048{
 4049	struct bpf_program *prog;
 4050
 4051	bpf_object__for_each_program(prog, obj) {
 4052		if (prog_is_subprog(obj, prog))
 4053			continue;
 4054		if (!strcmp(prog->name, name))
 4055			return prog;
 4056	}
 4057	return errno = ENOENT, NULL;
 4058}
 4059
 4060static bool bpf_object__shndx_is_data(const struct bpf_object *obj,
 4061				      int shndx)
 4062{
 4063	switch (obj->efile.secs[shndx].sec_type) {
 4064	case SEC_BSS:
 4065	case SEC_DATA:
 4066	case SEC_RODATA:
 4067		return true;
 4068	default:
 4069		return false;
 4070	}
 4071}
 4072
 4073static bool bpf_object__shndx_is_maps(const struct bpf_object *obj,
 4074				      int shndx)
 4075{
 4076	return shndx == obj->efile.btf_maps_shndx;
 4077}
 4078
 4079static enum libbpf_map_type
 4080bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx)
 4081{
 4082	if (shndx == obj->efile.symbols_shndx)
 4083		return LIBBPF_MAP_KCONFIG;
 4084
 4085	switch (obj->efile.secs[shndx].sec_type) {
 4086	case SEC_BSS:
 4087		return LIBBPF_MAP_BSS;
 4088	case SEC_DATA:
 4089		return LIBBPF_MAP_DATA;
 4090	case SEC_RODATA:
 4091		return LIBBPF_MAP_RODATA;
 4092	default:
 4093		return LIBBPF_MAP_UNSPEC;
 4094	}
 4095}
 4096
 4097static int bpf_program__record_reloc(struct bpf_program *prog,
 4098				     struct reloc_desc *reloc_desc,
 4099				     __u32 insn_idx, const char *sym_name,
 4100				     const Elf64_Sym *sym, const Elf64_Rel *rel)
 4101{
 4102	struct bpf_insn *insn = &prog->insns[insn_idx];
 4103	size_t map_idx, nr_maps = prog->obj->nr_maps;
 4104	struct bpf_object *obj = prog->obj;
 4105	__u32 shdr_idx = sym->st_shndx;
 4106	enum libbpf_map_type type;
 4107	const char *sym_sec_name;
 4108	struct bpf_map *map;
 4109
 4110	if (!is_call_insn(insn) && !is_ldimm64_insn(insn)) {
 4111		pr_warn("prog '%s': invalid relo against '%s' for insns[%d].code 0x%x\n",
 4112			prog->name, sym_name, insn_idx, insn->code);
 4113		return -LIBBPF_ERRNO__RELOC;
 4114	}
 4115
 4116	if (sym_is_extern(sym)) {
 4117		int sym_idx = ELF64_R_SYM(rel->r_info);
 4118		int i, n = obj->nr_extern;
 4119		struct extern_desc *ext;
 4120
 4121		for (i = 0; i < n; i++) {
 4122			ext = &obj->externs[i];
 4123			if (ext->sym_idx == sym_idx)
 4124				break;
 4125		}
 4126		if (i >= n) {
 4127			pr_warn("prog '%s': extern relo failed to find extern for '%s' (%d)\n",
 4128				prog->name, sym_name, sym_idx);
 4129			return -LIBBPF_ERRNO__RELOC;
 4130		}
 4131		pr_debug("prog '%s': found extern #%d '%s' (sym %d) for insn #%u\n",
 4132			 prog->name, i, ext->name, ext->sym_idx, insn_idx);
 4133		if (insn->code == (BPF_JMP | BPF_CALL))
 4134			reloc_desc->type = RELO_EXTERN_CALL;
 4135		else
 4136			reloc_desc->type = RELO_EXTERN_LD64;
 4137		reloc_desc->insn_idx = insn_idx;
 4138		reloc_desc->ext_idx = i;
 4139		return 0;
 4140	}
 4141
 4142	/* sub-program call relocation */
 4143	if (is_call_insn(insn)) {
 4144		if (insn->src_reg != BPF_PSEUDO_CALL) {
 4145			pr_warn("prog '%s': incorrect bpf_call opcode\n", prog->name);
 4146			return -LIBBPF_ERRNO__RELOC;
 4147		}
 4148		/* text_shndx can be 0, if no default "main" program exists */
 4149		if (!shdr_idx || shdr_idx != obj->efile.text_shndx) {
 4150			sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx));
 4151			pr_warn("prog '%s': bad call relo against '%s' in section '%s'\n",
 4152				prog->name, sym_name, sym_sec_name);
 4153			return -LIBBPF_ERRNO__RELOC;
 4154		}
 4155		if (sym->st_value % BPF_INSN_SZ) {
 4156			pr_warn("prog '%s': bad call relo against '%s' at offset %zu\n",
 4157				prog->name, sym_name, (size_t)sym->st_value);
 4158			return -LIBBPF_ERRNO__RELOC;
 4159		}
 4160		reloc_desc->type = RELO_CALL;
 4161		reloc_desc->insn_idx = insn_idx;
 4162		reloc_desc->sym_off = sym->st_value;
 4163		return 0;
 4164	}
 4165
 4166	if (!shdr_idx || shdr_idx >= SHN_LORESERVE) {
 4167		pr_warn("prog '%s': invalid relo against '%s' in special section 0x%x; forgot to initialize global var?..\n",
 4168			prog->name, sym_name, shdr_idx);
 4169		return -LIBBPF_ERRNO__RELOC;
 4170	}
 4171
 4172	/* loading subprog addresses */
 4173	if (sym_is_subprog(sym, obj->efile.text_shndx)) {
 4174		/* global_func: sym->st_value = offset in the section, insn->imm = 0.
 4175		 * local_func: sym->st_value = 0, insn->imm = offset in the section.
 4176		 */
 4177		if ((sym->st_value % BPF_INSN_SZ) || (insn->imm % BPF_INSN_SZ)) {
 4178			pr_warn("prog '%s': bad subprog addr relo against '%s' at offset %zu+%d\n",
 4179				prog->name, sym_name, (size_t)sym->st_value, insn->imm);
 4180			return -LIBBPF_ERRNO__RELOC;
 4181		}
 4182
 4183		reloc_desc->type = RELO_SUBPROG_ADDR;
 4184		reloc_desc->insn_idx = insn_idx;
 4185		reloc_desc->sym_off = sym->st_value;
 4186		return 0;
 4187	}
 4188
 4189	type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx);
 4190	sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx));
 4191
 4192	/* generic map reference relocation */
 4193	if (type == LIBBPF_MAP_UNSPEC) {
 4194		if (!bpf_object__shndx_is_maps(obj, shdr_idx)) {
 4195			pr_warn("prog '%s': bad map relo against '%s' in section '%s'\n",
 4196				prog->name, sym_name, sym_sec_name);
 4197			return -LIBBPF_ERRNO__RELOC;
 4198		}
 4199		for (map_idx = 0; map_idx < nr_maps; map_idx++) {
 4200			map = &obj->maps[map_idx];
 4201			if (map->libbpf_type != type ||
 4202			    map->sec_idx != sym->st_shndx ||
 4203			    map->sec_offset != sym->st_value)
 4204				continue;
 4205			pr_debug("prog '%s': found map %zd (%s, sec %d, off %zu) for insn #%u\n",
 4206				 prog->name, map_idx, map->name, map->sec_idx,
 4207				 map->sec_offset, insn_idx);
 4208			break;
 4209		}
 4210		if (map_idx >= nr_maps) {
 4211			pr_warn("prog '%s': map relo failed to find map for section '%s', off %zu\n",
 4212				prog->name, sym_sec_name, (size_t)sym->st_value);
 4213			return -LIBBPF_ERRNO__RELOC;
 4214		}
 4215		reloc_desc->type = RELO_LD64;
 4216		reloc_desc->insn_idx = insn_idx;
 4217		reloc_desc->map_idx = map_idx;
 4218		reloc_desc->sym_off = 0; /* sym->st_value determines map_idx */
 4219		return 0;
 4220	}
 4221
 4222	/* global data map relocation */
 4223	if (!bpf_object__shndx_is_data(obj, shdr_idx)) {
 4224		pr_warn("prog '%s': bad data relo against section '%s'\n",
 4225			prog->name, sym_sec_name);
 4226		return -LIBBPF_ERRNO__RELOC;
 4227	}
 4228	for (map_idx = 0; map_idx < nr_maps; map_idx++) {
 4229		map = &obj->maps[map_idx];
 4230		if (map->libbpf_type != type || map->sec_idx != sym->st_shndx)
 4231			continue;
 4232		pr_debug("prog '%s': found data map %zd (%s, sec %d, off %zu) for insn %u\n",
 4233			 prog->name, map_idx, map->name, map->sec_idx,
 4234			 map->sec_offset, insn_idx);
 4235		break;
 4236	}
 4237	if (map_idx >= nr_maps) {
 4238		pr_warn("prog '%s': data relo failed to find map for section '%s'\n",
 4239			prog->name, sym_sec_name);
 4240		return -LIBBPF_ERRNO__RELOC;
 4241	}
 4242
 4243	reloc_desc->type = RELO_DATA;
 4244	reloc_desc->insn_idx = insn_idx;
 4245	reloc_desc->map_idx = map_idx;
 4246	reloc_desc->sym_off = sym->st_value;
 4247	return 0;
 4248}
 4249
 4250static bool prog_contains_insn(const struct bpf_program *prog, size_t insn_idx)
 4251{
 4252	return insn_idx >= prog->sec_insn_off &&
 4253	       insn_idx < prog->sec_insn_off + prog->sec_insn_cnt;
 4254}
 4255
 4256static struct bpf_program *find_prog_by_sec_insn(const struct bpf_object *obj,
 4257						 size_t sec_idx, size_t insn_idx)
 4258{
 4259	int l = 0, r = obj->nr_programs - 1, m;
 4260	struct bpf_program *prog;
 4261
 4262	if (!obj->nr_programs)
 4263		return NULL;
 4264
 4265	while (l < r) {
 4266		m = l + (r - l + 1) / 2;
 4267		prog = &obj->programs[m];
 4268
 4269		if (prog->sec_idx < sec_idx ||
 4270		    (prog->sec_idx == sec_idx && prog->sec_insn_off <= insn_idx))
 4271			l = m;
 4272		else
 4273			r = m - 1;
 4274	}
 4275	/* matching program could be at index l, but it still might be the
 4276	 * wrong one, so we need to double check conditions for the last time
 4277	 */
 4278	prog = &obj->programs[l];
 4279	if (prog->sec_idx == sec_idx && prog_contains_insn(prog, insn_idx))
 4280		return prog;
 4281	return NULL;
 4282}
 4283
 4284static int
 4285bpf_object__collect_prog_relos(struct bpf_object *obj, Elf64_Shdr *shdr, Elf_Data *data)
 4286{
 4287	const char *relo_sec_name, *sec_name;
 4288	size_t sec_idx = shdr->sh_info, sym_idx;
 4289	struct bpf_program *prog;
 4290	struct reloc_desc *relos;
 4291	int err, i, nrels;
 4292	const char *sym_name;
 4293	__u32 insn_idx;
 4294	Elf_Scn *scn;
 4295	Elf_Data *scn_data;
 4296	Elf64_Sym *sym;
 4297	Elf64_Rel *rel;
 4298
 4299	if (sec_idx >= obj->efile.sec_cnt)
 4300		return -EINVAL;
 4301
 4302	scn = elf_sec_by_idx(obj, sec_idx);
 4303	scn_data = elf_sec_data(obj, scn);
 4304	if (!scn_data)
 4305		return -LIBBPF_ERRNO__FORMAT;
 4306
 4307	relo_sec_name = elf_sec_str(obj, shdr->sh_name);
 4308	sec_name = elf_sec_name(obj, scn);
 4309	if (!relo_sec_name || !sec_name)
 4310		return -EINVAL;
 4311
 4312	pr_debug("sec '%s': collecting relocation for section(%zu) '%s'\n",
 4313		 relo_sec_name, sec_idx, sec_name);
 4314	nrels = shdr->sh_size / shdr->sh_entsize;
 4315
 4316	for (i = 0; i < nrels; i++) {
 4317		rel = elf_rel_by_idx(data, i);
 4318		if (!rel) {
 4319			pr_warn("sec '%s': failed to get relo #%d\n", relo_sec_name, i);
 4320			return -LIBBPF_ERRNO__FORMAT;
 4321		}
 4322
 4323		sym_idx = ELF64_R_SYM(rel->r_info);
 4324		sym = elf_sym_by_idx(obj, sym_idx);
 4325		if (!sym) {
 4326			pr_warn("sec '%s': symbol #%zu not found for relo #%d\n",
 4327				relo_sec_name, sym_idx, i);
 4328			return -LIBBPF_ERRNO__FORMAT;
 4329		}
 4330
 4331		if (sym->st_shndx >= obj->efile.sec_cnt) {
 4332			pr_warn("sec '%s': corrupted symbol #%zu pointing to invalid section #%zu for relo #%d\n",
 4333				relo_sec_name, sym_idx, (size_t)sym->st_shndx, i);
 4334			return -LIBBPF_ERRNO__FORMAT;
 4335		}
 4336
 4337		if (rel->r_offset % BPF_INSN_SZ || rel->r_offset >= scn_data->d_size) {
 4338			pr_warn("sec '%s': invalid offset 0x%zx for relo #%d\n",
 4339				relo_sec_name, (size_t)rel->r_offset, i);
 4340			return -LIBBPF_ERRNO__FORMAT;
 4341		}
 4342
 4343		insn_idx = rel->r_offset / BPF_INSN_SZ;
 4344		/* relocations against static functions are recorded as
 4345		 * relocations against the section that contains a function;
 4346		 * in such case, symbol will be STT_SECTION and sym.st_name
 4347		 * will point to empty string (0), so fetch section name
 4348		 * instead
 4349		 */
 4350		if (ELF64_ST_TYPE(sym->st_info) == STT_SECTION && sym->st_name == 0)
 4351			sym_name = elf_sec_name(obj, elf_sec_by_idx(obj, sym->st_shndx));
 4352		else
 4353			sym_name = elf_sym_str(obj, sym->st_name);
 4354		sym_name = sym_name ?: "<?";
 4355
 4356		pr_debug("sec '%s': relo #%d: insn #%u against '%s'\n",
 4357			 relo_sec_name, i, insn_idx, sym_name);
 4358
 4359		prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx);
 4360		if (!prog) {
 4361			pr_debug("sec '%s': relo #%d: couldn't find program in section '%s' for insn #%u, probably overridden weak function, skipping...\n",
 4362				relo_sec_name, i, sec_name, insn_idx);
 4363			continue;
 4364		}
 4365
 4366		relos = libbpf_reallocarray(prog->reloc_desc,
 4367					    prog->nr_reloc + 1, sizeof(*relos));
 4368		if (!relos)
 4369			return -ENOMEM;
 4370		prog->reloc_desc = relos;
 4371
 4372		/* adjust insn_idx to local BPF program frame of reference */
 4373		insn_idx -= prog->sec_insn_off;
 4374		err = bpf_program__record_reloc(prog, &relos[prog->nr_reloc],
 4375						insn_idx, sym_name, sym, rel);
 4376		if (err)
 4377			return err;
 4378
 4379		prog->nr_reloc++;
 4380	}
 4381	return 0;
 4382}
 4383
 4384static int map_fill_btf_type_info(struct bpf_object *obj, struct bpf_map *map)
 4385{
 4386	int id;
 4387
 4388	if (!obj->btf)
 4389		return -ENOENT;
 4390
 4391	/* if it's BTF-defined map, we don't need to search for type IDs.
 4392	 * For struct_ops map, it does not need btf_key_type_id and
 4393	 * btf_value_type_id.
 4394	 */
 4395	if (map->sec_idx == obj->efile.btf_maps_shndx || bpf_map__is_struct_ops(map))
 4396		return 0;
 4397
 4398	/*
 4399	 * LLVM annotates global data differently in BTF, that is,
 4400	 * only as '.data', '.bss' or '.rodata'.
 4401	 */
 4402	if (!bpf_map__is_internal(map))
 4403		return -ENOENT;
 4404
 4405	id = btf__find_by_name(obj->btf, map->real_name);
 4406	if (id < 0)
 4407		return id;
 4408
 4409	map->btf_key_type_id = 0;
 4410	map->btf_value_type_id = id;
 4411	return 0;
 4412}
 4413
 4414static int bpf_get_map_info_from_fdinfo(int fd, struct bpf_map_info *info)
 4415{
 4416	char file[PATH_MAX], buff[4096];
 4417	FILE *fp;
 4418	__u32 val;
 4419	int err;
 4420
 4421	snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd);
 4422	memset(info, 0, sizeof(*info));
 4423
 4424	fp = fopen(file, "re");
 4425	if (!fp) {
 4426		err = -errno;
 4427		pr_warn("failed to open %s: %d. No procfs support?\n", file,
 4428			err);
 4429		return err;
 4430	}
 4431
 4432	while (fgets(buff, sizeof(buff), fp)) {
 4433		if (sscanf(buff, "map_type:\t%u", &val) == 1)
 4434			info->type = val;
 4435		else if (sscanf(buff, "key_size:\t%u", &val) == 1)
 4436			info->key_size = val;
 4437		else if (sscanf(buff, "value_size:\t%u", &val) == 1)
 4438			info->value_size = val;
 4439		else if (sscanf(buff, "max_entries:\t%u", &val) == 1)
 4440			info->max_entries = val;
 4441		else if (sscanf(buff, "map_flags:\t%i", &val) == 1)
 4442			info->map_flags = val;
 4443	}
 4444
 4445	fclose(fp);
 4446
 4447	return 0;
 4448}
 4449
 4450bool bpf_map__autocreate(const struct bpf_map *map)
 4451{
 4452	return map->autocreate;
 4453}
 4454
 4455int bpf_map__set_autocreate(struct bpf_map *map, bool autocreate)
 4456{
 4457	if (map->obj->loaded)
 4458		return libbpf_err(-EBUSY);
 4459
 4460	map->autocreate = autocreate;
 4461	return 0;
 4462}
 4463
 4464int bpf_map__reuse_fd(struct bpf_map *map, int fd)
 4465{
 4466	struct bpf_map_info info;
 4467	__u32 len = sizeof(info), name_len;
 4468	int new_fd, err;
 4469	char *new_name;
 4470
 4471	memset(&info, 0, len);
 4472	err = bpf_map_get_info_by_fd(fd, &info, &len);
 4473	if (err && errno == EINVAL)
 4474		err = bpf_get_map_info_from_fdinfo(fd, &info);
 4475	if (err)
 4476		return libbpf_err(err);
 4477
 4478	name_len = strlen(info.name);
 4479	if (name_len == BPF_OBJ_NAME_LEN - 1 && strncmp(map->name, info.name, name_len) == 0)
 4480		new_name = strdup(map->name);
 4481	else
 4482		new_name = strdup(info.name);
 4483
 4484	if (!new_name)
 4485		return libbpf_err(-errno);
 4486
 4487	/*
 4488	 * Like dup(), but make sure new FD is >= 3 and has O_CLOEXEC set.
 4489	 * This is similar to what we do in ensure_good_fd(), but without
 4490	 * closing original FD.
 4491	 */
 4492	new_fd = fcntl(fd, F_DUPFD_CLOEXEC, 3);
 4493	if (new_fd < 0) {
 4494		err = -errno;
 4495		goto err_free_new_name;
 4496	}
 4497
 4498	err = reuse_fd(map->fd, new_fd);
 4499	if (err)
 4500		goto err_free_new_name;
 4501
 4502	free(map->name);
 4503
 4504	map->name = new_name;
 4505	map->def.type = info.type;
 4506	map->def.key_size = info.key_size;
 4507	map->def.value_size = info.value_size;
 4508	map->def.max_entries = info.max_entries;
 4509	map->def.map_flags = info.map_flags;
 4510	map->btf_key_type_id = info.btf_key_type_id;
 4511	map->btf_value_type_id = info.btf_value_type_id;
 4512	map->reused = true;
 4513	map->map_extra = info.map_extra;
 4514
 4515	return 0;
 4516
 4517err_free_new_name:
 4518	free(new_name);
 4519	return libbpf_err(err);
 4520}
 4521
 4522__u32 bpf_map__max_entries(const struct bpf_map *map)
 4523{
 4524	return map->def.max_entries;
 4525}
 4526
 4527struct bpf_map *bpf_map__inner_map(struct bpf_map *map)
 4528{
 4529	if (!bpf_map_type__is_map_in_map(map->def.type))
 4530		return errno = EINVAL, NULL;
 4531
 4532	return map->inner_map;
 4533}
 4534
 4535int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries)
 4536{
 4537	if (map->obj->loaded)
 4538		return libbpf_err(-EBUSY);
 4539
 4540	map->def.max_entries = max_entries;
 4541
 4542	/* auto-adjust BPF ringbuf map max_entries to be a multiple of page size */
 4543	if (map_is_ringbuf(map))
 4544		map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries);
 4545
 4546	return 0;
 4547}
 4548
 4549static int
 4550bpf_object__probe_loading(struct bpf_object *obj)
 4551{
 4552	char *cp, errmsg[STRERR_BUFSIZE];
 4553	struct bpf_insn insns[] = {
 4554		BPF_MOV64_IMM(BPF_REG_0, 0),
 4555		BPF_EXIT_INSN(),
 4556	};
 4557	int ret, insn_cnt = ARRAY_SIZE(insns);
 4558
 4559	if (obj->gen_loader)
 4560		return 0;
 4561
 4562	ret = bump_rlimit_memlock();
 4563	if (ret)
 4564		pr_warn("Failed to bump RLIMIT_MEMLOCK (err = %d), you might need to do it explicitly!\n", ret);
 4565
 4566	/* make sure basic loading works */
 4567	ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL);
 4568	if (ret < 0)
 4569		ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, NULL);
 4570	if (ret < 0) {
 4571		ret = errno;
 4572		cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
 4573		pr_warn("Error in %s():%s(%d). Couldn't load trivial BPF "
 4574			"program. Make sure your kernel supports BPF "
 4575			"(CONFIG_BPF_SYSCALL=y) and/or that RLIMIT_MEMLOCK is "
 4576			"set to big enough value.\n", __func__, cp, ret);
 4577		return -ret;
 4578	}
 4579	close(ret);
 4580
 4581	return 0;
 4582}
 4583
 4584static int probe_fd(int fd)
 4585{
 4586	if (fd >= 0)
 4587		close(fd);
 4588	return fd >= 0;
 4589}
 4590
 4591static int probe_kern_prog_name(void)
 4592{
 4593	const size_t attr_sz = offsetofend(union bpf_attr, prog_name);
 4594	struct bpf_insn insns[] = {
 4595		BPF_MOV64_IMM(BPF_REG_0, 0),
 4596		BPF_EXIT_INSN(),
 4597	};
 4598	union bpf_attr attr;
 4599	int ret;
 4600
 4601	memset(&attr, 0, attr_sz);
 4602	attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
 4603	attr.license = ptr_to_u64("GPL");
 4604	attr.insns = ptr_to_u64(insns);
 4605	attr.insn_cnt = (__u32)ARRAY_SIZE(insns);
 4606	libbpf_strlcpy(attr.prog_name, "libbpf_nametest", sizeof(attr.prog_name));
 4607
 4608	/* make sure loading with name works */
 4609	ret = sys_bpf_prog_load(&attr, attr_sz, PROG_LOAD_ATTEMPTS);
 4610	return probe_fd(ret);
 4611}
 4612
 4613static int probe_kern_global_data(void)
 4614{
 4615	char *cp, errmsg[STRERR_BUFSIZE];
 4616	struct bpf_insn insns[] = {
 4617		BPF_LD_MAP_VALUE(BPF_REG_1, 0, 16),
 4618		BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42),
 4619		BPF_MOV64_IMM(BPF_REG_0, 0),
 4620		BPF_EXIT_INSN(),
 4621	};
 4622	int ret, map, insn_cnt = ARRAY_SIZE(insns);
 4623
 4624	map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_global", sizeof(int), 32, 1, NULL);
 4625	if (map < 0) {
 4626		ret = -errno;
 4627		cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
 4628		pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n",
 4629			__func__, cp, -ret);
 4630		return ret;
 4631	}
 4632
 4633	insns[0].imm = map;
 4634
 4635	ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL);
 4636	close(map);
 4637	return probe_fd(ret);
 4638}
 4639
 4640static int probe_kern_btf(void)
 4641{
 4642	static const char strs[] = "\0int";
 4643	__u32 types[] = {
 4644		/* int */
 4645		BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),
 4646	};
 4647
 4648	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
 4649					     strs, sizeof(strs)));
 4650}
 4651
 4652static int probe_kern_btf_func(void)
 4653{
 4654	static const char strs[] = "\0int\0x\0a";
 4655	/* void x(int a) {} */
 4656	__u32 types[] = {
 4657		/* int */
 4658		BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
 4659		/* FUNC_PROTO */                                /* [2] */
 4660		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0),
 4661		BTF_PARAM_ENC(7, 1),
 4662		/* FUNC x */                                    /* [3] */
 4663		BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), 2),
 4664	};
 4665
 4666	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
 4667					     strs, sizeof(strs)));
 4668}
 4669
 4670static int probe_kern_btf_func_global(void)
 4671{
 4672	static const char strs[] = "\0int\0x\0a";
 4673	/* static void x(int a) {} */
 4674	__u32 types[] = {
 4675		/* int */
 4676		BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
 4677		/* FUNC_PROTO */                                /* [2] */
 4678		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0),
 4679		BTF_PARAM_ENC(7, 1),
 4680		/* FUNC x BTF_FUNC_GLOBAL */                    /* [3] */
 4681		BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 2),
 4682	};
 4683
 4684	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
 4685					     strs, sizeof(strs)));
 4686}
 4687
 4688static int probe_kern_btf_datasec(void)
 4689{
 4690	static const char strs[] = "\0x\0.data";
 4691	/* static int a; */
 4692	__u32 types[] = {
 4693		/* int */
 4694		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
 4695		/* VAR x */                                     /* [2] */
 4696		BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1),
 4697		BTF_VAR_STATIC,
 4698		/* DATASEC val */                               /* [3] */
 4699		BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
 4700		BTF_VAR_SECINFO_ENC(2, 0, 4),
 4701	};
 4702
 4703	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
 4704					     strs, sizeof(strs)));
 4705}
 4706
 4707static int probe_kern_btf_float(void)
 4708{
 4709	static const char strs[] = "\0float";
 4710	__u32 types[] = {
 4711		/* float */
 4712		BTF_TYPE_FLOAT_ENC(1, 4),
 4713	};
 4714
 4715	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
 4716					     strs, sizeof(strs)));
 4717}
 4718
 4719static int probe_kern_btf_decl_tag(void)
 4720{
 4721	static const char strs[] = "\0tag";
 4722	__u32 types[] = {
 4723		/* int */
 4724		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
 4725		/* VAR x */                                     /* [2] */
 4726		BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1),
 4727		BTF_VAR_STATIC,
 4728		/* attr */
 4729		BTF_TYPE_DECL_TAG_ENC(1, 2, -1),
 4730	};
 4731
 4732	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
 4733					     strs, sizeof(strs)));
 4734}
 4735
 4736static int probe_kern_btf_type_tag(void)
 4737{
 4738	static const char strs[] = "\0tag";
 4739	__u32 types[] = {
 4740		/* int */
 4741		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),		/* [1] */
 4742		/* attr */
 4743		BTF_TYPE_TYPE_TAG_ENC(1, 1),				/* [2] */
 4744		/* ptr */
 4745		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2),	/* [3] */
 4746	};
 4747
 4748	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
 4749					     strs, sizeof(strs)));
 4750}
 4751
 4752static int probe_kern_array_mmap(void)
 4753{
 4754	LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_MMAPABLE);
 4755	int fd;
 4756
 4757	fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_mmap", sizeof(int), sizeof(int), 1, &opts);
 4758	return probe_fd(fd);
 4759}
 4760
 4761static int probe_kern_exp_attach_type(void)
 4762{
 4763	LIBBPF_OPTS(bpf_prog_load_opts, opts, .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE);
 4764	struct bpf_insn insns[] = {
 4765		BPF_MOV64_IMM(BPF_REG_0, 0),
 4766		BPF_EXIT_INSN(),
 4767	};
 4768	int fd, insn_cnt = ARRAY_SIZE(insns);
 4769
 4770	/* use any valid combination of program type and (optional)
 4771	 * non-zero expected attach type (i.e., not a BPF_CGROUP_INET_INGRESS)
 4772	 * to see if kernel supports expected_attach_type field for
 4773	 * BPF_PROG_LOAD command
 4774	 */
 4775	fd = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, NULL, "GPL", insns, insn_cnt, &opts);
 4776	return probe_fd(fd);
 4777}
 4778
 4779static int probe_kern_probe_read_kernel(void)
 4780{
 4781	struct bpf_insn insns[] = {
 4782		BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),	/* r1 = r10 (fp) */
 4783		BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),	/* r1 += -8 */
 4784		BPF_MOV64_IMM(BPF_REG_2, 8),		/* r2 = 8 */
 4785		BPF_MOV64_IMM(BPF_REG_3, 0),		/* r3 = 0 */
 4786		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_probe_read_kernel),
 4787		BPF_EXIT_INSN(),
 4788	};
 4789	int fd, insn_cnt = ARRAY_SIZE(insns);
 4790
 4791	fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, NULL);
 4792	return probe_fd(fd);
 4793}
 4794
 4795static int probe_prog_bind_map(void)
 4796{
 4797	char *cp, errmsg[STRERR_BUFSIZE];
 4798	struct bpf_insn insns[] = {
 4799		BPF_MOV64_IMM(BPF_REG_0, 0),
 4800		BPF_EXIT_INSN(),
 4801	};
 4802	int ret, map, prog, insn_cnt = ARRAY_SIZE(insns);
 4803
 4804	map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_det_bind", sizeof(int), 32, 1, NULL);
 4805	if (map < 0) {
 4806		ret = -errno;
 4807		cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
 4808		pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n",
 4809			__func__, cp, -ret);
 4810		return ret;
 4811	}
 4812
 4813	prog = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL);
 4814	if (prog < 0) {
 4815		close(map);
 4816		return 0;
 4817	}
 4818
 4819	ret = bpf_prog_bind_map(prog, map, NULL);
 4820
 4821	close(map);
 4822	close(prog);
 4823
 4824	return ret >= 0;
 4825}
 4826
 4827static int probe_module_btf(void)
 4828{
 4829	static const char strs[] = "\0int";
 4830	__u32 types[] = {
 4831		/* int */
 4832		BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),
 4833	};
 4834	struct bpf_btf_info info;
 4835	__u32 len = sizeof(info);
 4836	char name[16];
 4837	int fd, err;
 4838
 4839	fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs));
 4840	if (fd < 0)
 4841		return 0; /* BTF not supported at all */
 4842
 4843	memset(&info, 0, sizeof(info));
 4844	info.name = ptr_to_u64(name);
 4845	info.name_len = sizeof(name);
 4846
 4847	/* check that BPF_OBJ_GET_INFO_BY_FD supports specifying name pointer;
 4848	 * kernel's module BTF support coincides with support for
 4849	 * name/name_len fields in struct bpf_btf_info.
 4850	 */
 4851	err = bpf_btf_get_info_by_fd(fd, &info, &len);
 4852	close(fd);
 4853	return !err;
 4854}
 4855
 4856static int probe_perf_link(void)
 4857{
 4858	struct bpf_insn insns[] = {
 4859		BPF_MOV64_IMM(BPF_REG_0, 0),
 4860		BPF_EXIT_INSN(),
 4861	};
 4862	int prog_fd, link_fd, err;
 4863
 4864	prog_fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL",
 4865				insns, ARRAY_SIZE(insns), NULL);
 4866	if (prog_fd < 0)
 4867		return -errno;
 4868
 4869	/* use invalid perf_event FD to get EBADF, if link is supported;
 4870	 * otherwise EINVAL should be returned
 4871	 */
 4872	link_fd = bpf_link_create(prog_fd, -1, BPF_PERF_EVENT, NULL);
 4873	err = -errno; /* close() can clobber errno */
 4874
 4875	if (link_fd >= 0)
 4876		close(link_fd);
 4877	close(prog_fd);
 4878
 4879	return link_fd < 0 && err == -EBADF;
 4880}
 4881
 4882static int probe_uprobe_multi_link(void)
 4883{
 4884	LIBBPF_OPTS(bpf_prog_load_opts, load_opts,
 4885		.expected_attach_type = BPF_TRACE_UPROBE_MULTI,
 4886	);
 4887	LIBBPF_OPTS(bpf_link_create_opts, link_opts);
 4888	struct bpf_insn insns[] = {
 4889		BPF_MOV64_IMM(BPF_REG_0, 0),
 4890		BPF_EXIT_INSN(),
 4891	};
 4892	int prog_fd, link_fd, err;
 4893	unsigned long offset = 0;
 4894
 4895	prog_fd = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL",
 4896				insns, ARRAY_SIZE(insns), &load_opts);
 4897	if (prog_fd < 0)
 4898		return -errno;
 4899
 4900	/* Creating uprobe in '/' binary should fail with -EBADF. */
 4901	link_opts.uprobe_multi.path = "/";
 4902	link_opts.uprobe_multi.offsets = &offset;
 4903	link_opts.uprobe_multi.cnt = 1;
 4904
 4905	link_fd = bpf_link_create(prog_fd, -1, BPF_TRACE_UPROBE_MULTI, &link_opts);
 4906	err = -errno; /* close() can clobber errno */
 4907
 4908	if (link_fd >= 0)
 4909		close(link_fd);
 4910	close(prog_fd);
 4911
 4912	return link_fd < 0 && err == -EBADF;
 4913}
 4914
 4915static int probe_kern_bpf_cookie(void)
 4916{
 4917	struct bpf_insn insns[] = {
 4918		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_attach_cookie),
 4919		BPF_EXIT_INSN(),
 4920	};
 4921	int ret, insn_cnt = ARRAY_SIZE(insns);
 4922
 4923	ret = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL", insns, insn_cnt, NULL);
 4924	return probe_fd(ret);
 4925}
 4926
 4927static int probe_kern_btf_enum64(void)
 4928{
 4929	static const char strs[] = "\0enum64";
 4930	__u32 types[] = {
 4931		BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 0), 8),
 4932	};
 4933
 4934	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
 4935					     strs, sizeof(strs)));
 4936}
 4937
 4938static int probe_kern_syscall_wrapper(void);
 4939
 4940enum kern_feature_result {
 4941	FEAT_UNKNOWN = 0,
 4942	FEAT_SUPPORTED = 1,
 4943	FEAT_MISSING = 2,
 4944};
 4945
 4946typedef int (*feature_probe_fn)(void);
 4947
 4948static struct kern_feature_desc {
 4949	const char *desc;
 4950	feature_probe_fn probe;
 4951	enum kern_feature_result res;
 4952} feature_probes[__FEAT_CNT] = {
 4953	[FEAT_PROG_NAME] = {
 4954		"BPF program name", probe_kern_prog_name,
 4955	},
 4956	[FEAT_GLOBAL_DATA] = {
 4957		"global variables", probe_kern_global_data,
 4958	},
 4959	[FEAT_BTF] = {
 4960		"minimal BTF", probe_kern_btf,
 4961	},
 4962	[FEAT_BTF_FUNC] = {
 4963		"BTF functions", probe_kern_btf_func,
 4964	},
 4965	[FEAT_BTF_GLOBAL_FUNC] = {
 4966		"BTF global function", probe_kern_btf_func_global,
 4967	},
 4968	[FEAT_BTF_DATASEC] = {
 4969		"BTF data section and variable", probe_kern_btf_datasec,
 4970	},
 4971	[FEAT_ARRAY_MMAP] = {
 4972		"ARRAY map mmap()", probe_kern_array_mmap,
 4973	},
 4974	[FEAT_EXP_ATTACH_TYPE] = {
 4975		"BPF_PROG_LOAD expected_attach_type attribute",
 4976		probe_kern_exp_attach_type,
 4977	},
 4978	[FEAT_PROBE_READ_KERN] = {
 4979		"bpf_probe_read_kernel() helper", probe_kern_probe_read_kernel,
 4980	},
 4981	[FEAT_PROG_BIND_MAP] = {
 4982		"BPF_PROG_BIND_MAP support", probe_prog_bind_map,
 4983	},
 4984	[FEAT_MODULE_BTF] = {
 4985		"module BTF support", probe_module_btf,
 4986	},
 4987	[FEAT_BTF_FLOAT] = {
 4988		"BTF_KIND_FLOAT support", probe_kern_btf_float,
 4989	},
 4990	[FEAT_PERF_LINK] = {
 4991		"BPF perf link support", probe_perf_link,
 4992	},
 4993	[FEAT_BTF_DECL_TAG] = {
 4994		"BTF_KIND_DECL_TAG support", probe_kern_btf_decl_tag,
 4995	},
 4996	[FEAT_BTF_TYPE_TAG] = {
 4997		"BTF_KIND_TYPE_TAG support", probe_kern_btf_type_tag,
 4998	},
 4999	[FEAT_MEMCG_ACCOUNT] = {
 5000		"memcg-based memory accounting", probe_memcg_account,
 5001	},
 5002	[FEAT_BPF_COOKIE] = {
 5003		"BPF cookie support", probe_kern_bpf_cookie,
 5004	},
 5005	[FEAT_BTF_ENUM64] = {
 5006		"BTF_KIND_ENUM64 support", probe_kern_btf_enum64,
 5007	},
 5008	[FEAT_SYSCALL_WRAPPER] = {
 5009		"Kernel using syscall wrapper", probe_kern_syscall_wrapper,
 5010	},
 5011	[FEAT_UPROBE_MULTI_LINK] = {
 5012		"BPF multi-uprobe link support", probe_uprobe_multi_link,
 5013	},
 5014};
 5015
 5016bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id)
 5017{
 5018	struct kern_feature_desc *feat = &feature_probes[feat_id];
 5019	int ret;
 5020
 5021	if (obj && obj->gen_loader)
 5022		/* To generate loader program assume the latest kernel
 5023		 * to avoid doing extra prog_load, map_create syscalls.
 5024		 */
 5025		return true;
 5026
 5027	if (READ_ONCE(feat->res) == FEAT_UNKNOWN) {
 5028		ret = feat->probe();
 5029		if (ret > 0) {
 5030			WRITE_ONCE(feat->res, FEAT_SUPPORTED);
 5031		} else if (ret == 0) {
 5032			WRITE_ONCE(feat->res, FEAT_MISSING);
 5033		} else {
 5034			pr_warn("Detection of kernel %s support failed: %d\n", feat->desc, ret);
 5035			WRITE_ONCE(feat->res, FEAT_MISSING);
 5036		}
 5037	}
 5038
 5039	return READ_ONCE(feat->res) == FEAT_SUPPORTED;
 5040}
 5041
 5042static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd)
 5043{
 5044	struct bpf_map_info map_info;
 5045	char msg[STRERR_BUFSIZE];
 5046	__u32 map_info_len = sizeof(map_info);
 5047	int err;
 5048
 5049	memset(&map_info, 0, map_info_len);
 5050	err = bpf_map_get_info_by_fd(map_fd, &map_info, &map_info_len);
 5051	if (err && errno == EINVAL)
 5052		err = bpf_get_map_info_from_fdinfo(map_fd, &map_info);
 5053	if (err) {
 5054		pr_warn("failed to get map info for map FD %d: %s\n", map_fd,
 5055			libbpf_strerror_r(errno, msg, sizeof(msg)));
 5056		return false;
 5057	}
 5058
 5059	return (map_info.type == map->def.type &&
 5060		map_info.key_size == map->def.key_size &&
 5061		map_info.value_size == map->def.value_size &&
 5062		map_info.max_entries == map->def.max_entries &&
 5063		map_info.map_flags == map->def.map_flags &&
 5064		map_info.map_extra == map->map_extra);
 5065}
 5066
 5067static int
 5068bpf_object__reuse_map(struct bpf_map *map)
 5069{
 5070	char *cp, errmsg[STRERR_BUFSIZE];
 5071	int err, pin_fd;
 5072
 5073	pin_fd = bpf_obj_get(map->pin_path);
 5074	if (pin_fd < 0) {
 5075		err = -errno;
 5076		if (err == -ENOENT) {
 5077			pr_debug("found no pinned map to reuse at '%s'\n",
 5078				 map->pin_path);
 5079			return 0;
 5080		}
 5081
 5082		cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
 5083		pr_warn("couldn't retrieve pinned map '%s': %s\n",
 5084			map->pin_path, cp);
 5085		return err;
 5086	}
 5087
 5088	if (!map_is_reuse_compat(map, pin_fd)) {
 5089		pr_warn("couldn't reuse pinned map at '%s': parameter mismatch\n",
 5090			map->pin_path);
 5091		close(pin_fd);
 5092		return -EINVAL;
 5093	}
 5094
 5095	err = bpf_map__reuse_fd(map, pin_fd);
 5096	close(pin_fd);
 5097	if (err)
 5098		return err;
 5099
 5100	map->pinned = true;
 5101	pr_debug("reused pinned map at '%s'\n", map->pin_path);
 5102
 5103	return 0;
 5104}
 5105
 5106static int
 5107bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
 5108{
 5109	enum libbpf_map_type map_type = map->libbpf_type;
 5110	char *cp, errmsg[STRERR_BUFSIZE];
 5111	int err, zero = 0;
 5112
 5113	if (obj->gen_loader) {
 5114		bpf_gen__map_update_elem(obj->gen_loader, map - obj->maps,
 5115					 map->mmaped, map->def.value_size);
 5116		if (map_type == LIBBPF_MAP_RODATA || map_type == LIBBPF_MAP_KCONFIG)
 5117			bpf_gen__map_freeze(obj->gen_loader, map - obj->maps);
 5118		return 0;
 5119	}
 5120	err = bpf_map_update_elem(map->fd, &zero, map->mmaped, 0);
 5121	if (err) {
 5122		err = -errno;
 5123		cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
 5124		pr_warn("Error setting initial map(%s) contents: %s\n",
 5125			map->name, cp);
 5126		return err;
 5127	}
 5128
 5129	/* Freeze .rodata and .kconfig map as read-only from syscall side. */
 5130	if (map_type == LIBBPF_MAP_RODATA || map_type == LIBBPF_MAP_KCONFIG) {
 5131		err = bpf_map_freeze(map->fd);
 5132		if (err) {
 5133			err = -errno;
 5134			cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
 5135			pr_warn("Error freezing map(%s) as read-only: %s\n",
 5136				map->name, cp);
 5137			return err;
 5138		}
 5139	}
 5140	return 0;
 5141}
 5142
 5143static void bpf_map__destroy(struct bpf_map *map);
 5144
 5145static bool map_is_created(const struct bpf_map *map)
 5146{
 5147	return map->obj->loaded || map->reused;
 5148}
 5149
 5150static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, bool is_inner)
 5151{
 5152	LIBBPF_OPTS(bpf_map_create_opts, create_attr);
 5153	struct bpf_map_def *def = &map->def;
 5154	const char *map_name = NULL;
 5155	int err = 0, map_fd;
 5156
 5157	if (kernel_supports(obj, FEAT_PROG_NAME))
 5158		map_name = map->name;
 5159	create_attr.map_ifindex = map->map_ifindex;
 5160	create_attr.map_flags = def->map_flags;
 5161	create_attr.numa_node = map->numa_node;
 5162	create_attr.map_extra = map->map_extra;
 5163
 5164	if (bpf_map__is_struct_ops(map))
 5165		create_attr.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id;
 5166
 5167	if (obj->btf && btf__fd(obj->btf) >= 0) {
 5168		create_attr.btf_fd = btf__fd(obj->btf);
 5169		create_attr.btf_key_type_id = map->btf_key_type_id;
 5170		create_attr.btf_value_type_id = map->btf_value_type_id;
 5171	}
 5172
 5173	if (bpf_map_type__is_map_in_map(def->type)) {
 5174		if (map->inner_map) {
 5175			err = bpf_object__create_map(obj, map->inner_map, true);
 5176			if (err) {
 5177				pr_warn("map '%s': failed to create inner map: %d\n",
 5178					map->name, err);
 5179				return err;
 5180			}
 5181			map->inner_map_fd = map->inner_map->fd;
 5182		}
 5183		if (map->inner_map_fd >= 0)
 5184			create_attr.inner_map_fd = map->inner_map_fd;
 5185	}
 5186
 5187	switch (def->type) {
 5188	case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
 5189	case BPF_MAP_TYPE_CGROUP_ARRAY:
 5190	case BPF_MAP_TYPE_STACK_TRACE:
 5191	case BPF_MAP_TYPE_ARRAY_OF_MAPS:
 5192	case BPF_MAP_TYPE_HASH_OF_MAPS:
 5193	case BPF_MAP_TYPE_DEVMAP:
 5194	case BPF_MAP_TYPE_DEVMAP_HASH:
 5195	case BPF_MAP_TYPE_CPUMAP:
 5196	case BPF_MAP_TYPE_XSKMAP:
 5197	case BPF_MAP_TYPE_SOCKMAP:
 5198	case BPF_MAP_TYPE_SOCKHASH:
 5199	case BPF_MAP_TYPE_QUEUE:
 5200	case BPF_MAP_TYPE_STACK:
 5201		create_attr.btf_fd = 0;
 5202		create_attr.btf_key_type_id = 0;
 5203		create_attr.btf_value_type_id = 0;
 5204		map->btf_key_type_id = 0;
 5205		map->btf_value_type_id = 0;
 5206	default:
 5207		break;
 5208	}
 5209
 5210	if (obj->gen_loader) {
 5211		bpf_gen__map_create(obj->gen_loader, def->type, map_name,
 5212				    def->key_size, def->value_size, def->max_entries,
 5213				    &create_attr, is_inner ? -1 : map - obj->maps);
 5214		/* We keep pretenting we have valid FD to pass various fd >= 0
 5215		 * checks by just keeping original placeholder FDs in place.
 5216		 * See bpf_object__add_map() comment.
 5217		 * This placeholder fd will not be used with any syscall and
 5218		 * will be reset to -1 eventually.
 5219		 */
 5220		map_fd = map->fd;
 5221	} else {
 5222		map_fd = bpf_map_create(def->type, map_name,
 5223					def->key_size, def->value_size,
 5224					def->max_entries, &create_attr);
 5225	}
 5226	if (map_fd < 0 && (create_attr.btf_key_type_id || create_attr.btf_value_type_id)) {
 5227		char *cp, errmsg[STRERR_BUFSIZE];
 5228
 5229		err = -errno;
 5230		cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
 5231		pr_warn("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n",
 5232			map->name, cp, err);
 5233		create_attr.btf_fd = 0;
 5234		create_attr.btf_key_type_id = 0;
 5235		create_attr.btf_value_type_id = 0;
 5236		map->btf_key_type_id = 0;
 5237		map->btf_value_type_id = 0;
 5238		map_fd = bpf_map_create(def->type, map_name,
 5239					def->key_size, def->value_size,
 5240					def->max_entries, &create_attr);
 5241	}
 5242
 5243	if (bpf_map_type__is_map_in_map(def->type) && map->inner_map) {
 5244		if (obj->gen_loader)
 5245			map->inner_map->fd = -1;
 5246		bpf_map__destroy(map->inner_map);
 5247		zfree(&map->inner_map);
 5248	}
 5249
 5250	if (map_fd < 0)
 5251		return map_fd;
 5252
 5253	/* obj->gen_loader case, prevent reuse_fd() from closing map_fd */
 5254	if (map->fd == map_fd)
 5255		return 0;
 5256
 5257	/* Keep placeholder FD value but now point it to the BPF map object.
 5258	 * This way everything that relied on this map's FD (e.g., relocated
 5259	 * ldimm64 instructions) will stay valid and won't need adjustments.
 5260	 * map->fd stays valid but now point to what map_fd points to.
 5261	 */
 5262	return reuse_fd(map->fd, map_fd);
 5263}
 5264
 5265static int init_map_in_map_slots(struct bpf_object *obj, struct bpf_map *map)
 5266{
 5267	const struct bpf_map *targ_map;
 5268	unsigned int i;
 5269	int fd, err = 0;
 5270
 5271	for (i = 0; i < map->init_slots_sz; i++) {
 5272		if (!map->init_slots[i])
 5273			continue;
 5274
 5275		targ_map = map->init_slots[i];
 5276		fd = targ_map->fd;
 5277
 5278		if (obj->gen_loader) {
 5279			bpf_gen__populate_outer_map(obj->gen_loader,
 5280						    map - obj->maps, i,
 5281						    targ_map - obj->maps);
 5282		} else {
 5283			err = bpf_map_update_elem(map->fd, &i, &fd, 0);
 5284		}
 5285		if (err) {
 5286			err = -errno;
 5287			pr_warn("map '%s': failed to initialize slot [%d] to map '%s' fd=%d: %d\n",
 5288				map->name, i, targ_map->name, fd, err);
 5289			return err;
 5290		}
 5291		pr_debug("map '%s': slot [%d] set to map '%s' fd=%d\n",
 5292			 map->name, i, targ_map->name, fd);
 5293	}
 5294
 5295	zfree(&map->init_slots);
 5296	map->init_slots_sz = 0;
 5297
 5298	return 0;
 5299}
 5300
 5301static int init_prog_array_slots(struct bpf_object *obj, struct bpf_map *map)
 5302{
 5303	const struct bpf_program *targ_prog;
 5304	unsigned int i;
 5305	int fd, err;
 5306
 5307	if (obj->gen_loader)
 5308		return -ENOTSUP;
 5309
 5310	for (i = 0; i < map->init_slots_sz; i++) {
 5311		if (!map->init_slots[i])
 5312			continue;
 5313
 5314		targ_prog = map->init_slots[i];
 5315		fd = bpf_program__fd(targ_prog);
 5316
 5317		err = bpf_map_update_elem(map->fd, &i, &fd, 0);
 5318		if (err) {
 5319			err = -errno;
 5320			pr_warn("map '%s': failed to initialize slot [%d] to prog '%s' fd=%d: %d\n",
 5321				map->name, i, targ_prog->name, fd, err);
 5322			return err;
 5323		}
 5324		pr_debug("map '%s': slot [%d] set to prog '%s' fd=%d\n",
 5325			 map->name, i, targ_prog->name, fd);
 5326	}
 5327
 5328	zfree(&map->init_slots);
 5329	map->init_slots_sz = 0;
 5330
 5331	return 0;
 5332}
 5333
 5334static int bpf_object_init_prog_arrays(struct bpf_object *obj)
 5335{
 5336	struct bpf_map *map;
 5337	int i, err;
 5338
 5339	for (i = 0; i < obj->nr_maps; i++) {
 5340		map = &obj->maps[i];
 5341
 5342		if (!map->init_slots_sz || map->def.type != BPF_MAP_TYPE_PROG_ARRAY)
 5343			continue;
 5344
 5345		err = init_prog_array_slots(obj, map);
 5346		if (err < 0)
 5347			return err;
 5348	}
 5349	return 0;
 5350}
 5351
 5352static int map_set_def_max_entries(struct bpf_map *map)
 5353{
 5354	if (map->def.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY && !map->def.max_entries) {
 5355		int nr_cpus;
 5356
 5357		nr_cpus = libbpf_num_possible_cpus();
 5358		if (nr_cpus < 0) {
 5359			pr_warn("map '%s': failed to determine number of system CPUs: %d\n",
 5360				map->name, nr_cpus);
 5361			return nr_cpus;
 5362		}
 5363		pr_debug("map '%s': setting size to %d\n", map->name, nr_cpus);
 5364		map->def.max_entries = nr_cpus;
 5365	}
 5366
 5367	return 0;
 5368}
 5369
 5370static int
 5371bpf_object__create_maps(struct bpf_object *obj)
 5372{
 5373	struct bpf_map *map;
 5374	char *cp, errmsg[STRERR_BUFSIZE];
 5375	unsigned int i, j;
 5376	int err;
 5377	bool retried;
 5378
 5379	for (i = 0; i < obj->nr_maps; i++) {
 5380		map = &obj->maps[i];
 5381
 5382		/* To support old kernels, we skip creating global data maps
 5383		 * (.rodata, .data, .kconfig, etc); later on, during program
 5384		 * loading, if we detect that at least one of the to-be-loaded
 5385		 * programs is referencing any global data map, we'll error
 5386		 * out with program name and relocation index logged.
 5387		 * This approach allows to accommodate Clang emitting
 5388		 * unnecessary .rodata.str1.1 sections for string literals,
 5389		 * but also it allows to have CO-RE applications that use
 5390		 * global variables in some of BPF programs, but not others.
 5391		 * If those global variable-using programs are not loaded at
 5392		 * runtime due to bpf_program__set_autoload(prog, false),
 5393		 * bpf_object loading will succeed just fine even on old
 5394		 * kernels.
 5395		 */
 5396		if (bpf_map__is_internal(map) && !kernel_supports(obj, FEAT_GLOBAL_DATA))
 5397			map->autocreate = false;
 5398
 5399		if (!map->autocreate) {
 5400			pr_debug("map '%s': skipped auto-creating...\n", map->name);
 5401			continue;
 5402		}
 5403
 5404		err = map_set_def_max_entries(map);
 5405		if (err)
 5406			goto err_out;
 5407
 5408		retried = false;
 5409retry:
 5410		if (map->pin_path) {
 5411			err = bpf_object__reuse_map(map);
 5412			if (err) {
 5413				pr_warn("map '%s': error reusing pinned map\n",
 5414					map->name);
 5415				goto err_out;
 5416			}
 5417			if (retried && map->fd < 0) {
 5418				pr_warn("map '%s': cannot find pinned map\n",
 5419					map->name);
 5420				err = -ENOENT;
 5421				goto err_out;
 5422			}
 5423		}
 5424
 5425		if (map->reused) {
 5426			pr_debug("map '%s': skipping creation (preset fd=%d)\n",
 5427				 map->name, map->fd);
 5428		} else {
 5429			err = bpf_object__create_map(obj, map, false);
 5430			if (err)
 5431				goto err_out;
 5432
 5433			pr_debug("map '%s': created successfully, fd=%d\n",
 5434				 map->name, map->fd);
 5435
 5436			if (bpf_map__is_internal(map)) {
 5437				err = bpf_object__populate_internal_map(obj, map);
 5438				if (err < 0)
 5439					goto err_out;
 5440			}
 5441
 5442			if (map->init_slots_sz && map->def.type != BPF_MAP_TYPE_PROG_ARRAY) {
 5443				err = init_map_in_map_slots(obj, map);
 5444				if (err < 0)
 5445					goto err_out;
 5446			}
 5447		}
 5448
 5449		if (map->pin_path && !map->pinned) {
 5450			err = bpf_map__pin(map, NULL);
 5451			if (err) {
 5452				if (!retried && err == -EEXIST) {
 5453					retried = true;
 5454					goto retry;
 5455				}
 5456				pr_warn("map '%s': failed to auto-pin at '%s': %d\n",
 5457					map->name, map->pin_path, err);
 5458				goto err_out;
 5459			}
 5460		}
 5461	}
 5462
 5463	return 0;
 5464
 5465err_out:
 5466	cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
 5467	pr_warn("map '%s': failed to create: %s(%d)\n", map->name, cp, err);
 5468	pr_perm_msg(err);
 5469	for (j = 0; j < i; j++)
 5470		zclose(obj->maps[j].fd);
 5471	return err;
 5472}
 5473
 5474static bool bpf_core_is_flavor_sep(const char *s)
 5475{
 5476	/* check X___Y name pattern, where X and Y are not underscores */
 5477	return s[0] != '_' &&				      /* X */
 5478	       s[1] == '_' && s[2] == '_' && s[3] == '_' &&   /* ___ */
 5479	       s[4] != '_';				      /* Y */
 5480}
 5481
 5482/* Given 'some_struct_name___with_flavor' return the length of a name prefix
 5483 * before last triple underscore. Struct name part after last triple
 5484 * underscore is ignored by BPF CO-RE relocation during relocation matching.
 5485 */
 5486size_t bpf_core_essential_name_len(const char *name)
 5487{
 5488	size_t n = strlen(name);
 5489	int i;
 5490
 5491	for (i = n - 5; i >= 0; i--) {
 5492		if (bpf_core_is_flavor_sep(name + i))
 5493			return i + 1;
 5494	}
 5495	return n;
 5496}
 5497
 5498void bpf_core_free_cands(struct bpf_core_cand_list *cands)
 5499{
 5500	if (!cands)
 5501		return;
 5502
 5503	free(cands->cands);
 5504	free(cands);
 5505}
 5506
 5507int bpf_core_add_cands(struct bpf_core_cand *local_cand,
 5508		       size_t local_essent_len,
 5509		       const struct btf *targ_btf,
 5510		       const char *targ_btf_name,
 5511		       int targ_start_id,
 5512		       struct bpf_core_cand_list *cands)
 5513{
 5514	struct bpf_core_cand *new_cands, *cand;
 5515	const struct btf_type *t, *local_t;
 5516	const char *targ_name, *local_name;
 5517	size_t targ_essent_len;
 5518	int n, i;
 5519
 5520	local_t = btf__type_by_id(local_cand->btf, local_cand->id);
 5521	local_name = btf__str_by_offset(local_cand->btf, local_t->name_off);
 5522
 5523	n = btf__type_cnt(targ_btf);
 5524	for (i = targ_start_id; i < n; i++) {
 5525		t = btf__type_by_id(targ_btf, i);
 5526		if (!btf_kind_core_compat(t, local_t))
 5527			continue;
 5528
 5529		targ_name = btf__name_by_offset(targ_btf, t->name_off);
 5530		if (str_is_empty(targ_name))
 5531			continue;
 5532
 5533		targ_essent_len = bpf_core_essential_name_len(targ_name);
 5534		if (targ_essent_len != local_essent_len)
 5535			continue;
 5536
 5537		if (strncmp(local_name, targ_name, local_essent_len) != 0)
 5538			continue;
 5539
 5540		pr_debug("CO-RE relocating [%d] %s %s: found target candidate [%d] %s %s in [%s]\n",
 5541			 local_cand->id, btf_kind_str(local_t),
 5542			 local_name, i, btf_kind_str(t), targ_name,
 5543			 targ_btf_name);
 5544		new_cands = libbpf_reallocarray(cands->cands, cands->len + 1,
 5545					      sizeof(*cands->cands));
 5546		if (!new_cands)
 5547			return -ENOMEM;
 5548
 5549		cand = &new_cands[cands->len];
 5550		cand->btf = targ_btf;
 5551		cand->id = i;
 5552
 5553		cands->cands = new_cands;
 5554		cands->len++;
 5555	}
 5556	return 0;
 5557}
 5558
 5559static int load_module_btfs(struct bpf_object *obj)
 5560{
 5561	struct bpf_btf_info info;
 5562	struct module_btf *mod_btf;
 5563	struct btf *btf;
 5564	char name[64];
 5565	__u32 id = 0, len;
 5566	int err, fd;
 5567
 5568	if (obj->btf_modules_loaded)
 5569		return 0;
 5570
 5571	if (obj->gen_loader)
 5572		return 0;
 5573
 5574	/* don't do this again, even if we find no module BTFs */
 5575	obj->btf_modules_loaded = true;
 5576
 5577	/* kernel too old to support module BTFs */
 5578	if (!kernel_supports(obj, FEAT_MODULE_BTF))
 5579		return 0;
 5580
 5581	while (true) {
 5582		err = bpf_btf_get_next_id(id, &id);
 5583		if (err && errno == ENOENT)
 5584			return 0;
 5585		if (err && errno == EPERM) {
 5586			pr_debug("skipping module BTFs loading, missing privileges\n");
 5587			return 0;
 5588		}
 5589		if (err) {
 5590			err = -errno;
 5591			pr_warn("failed to iterate BTF objects: %d\n", err);
 5592			return err;
 5593		}
 5594
 5595		fd = bpf_btf_get_fd_by_id(id);
 5596		if (fd < 0) {
 5597			if (errno == ENOENT)
 5598				continue; /* expected race: BTF was unloaded */
 5599			err = -errno;
 5600			pr_warn("failed to get BTF object #%d FD: %d\n", id, err);
 5601			return err;
 5602		}
 5603
 5604		len = sizeof(info);
 5605		memset(&info, 0, sizeof(info));
 5606		info.name = ptr_to_u64(name);
 5607		info.name_len = sizeof(name);
 5608
 5609		err = bpf_btf_get_info_by_fd(fd, &info, &len);
 5610		if (err) {
 5611			err = -errno;
 5612			pr_warn("failed to get BTF object #%d info: %d\n", id, err);
 5613			goto err_out;
 5614		}
 5615
 5616		/* ignore non-module BTFs */
 5617		if (!info.kernel_btf || strcmp(name, "vmlinux") == 0) {
 5618			close(fd);
 5619			continue;
 5620		}
 5621
 5622		btf = btf_get_from_fd(fd, obj->btf_vmlinux);
 5623		err = libbpf_get_error(btf);
 5624		if (err) {
 5625			pr_warn("failed to load module [%s]'s BTF object #%d: %d\n",
 5626				name, id, err);
 5627			goto err_out;
 5628		}
 5629
 5630		err = libbpf_ensure_mem((void **)&obj->btf_modules, &obj->btf_module_cap,
 5631					sizeof(*obj->btf_modules), obj->btf_module_cnt + 1);
 5632		if (err)
 5633			goto err_out;
 5634
 5635		mod_btf = &obj->btf_modules[obj->btf_module_cnt++];
 5636
 5637		mod_btf->btf = btf;
 5638		mod_btf->id = id;
 5639		mod_btf->fd = fd;
 5640		mod_btf->name = strdup(name);
 5641		if (!mod_btf->name) {
 5642			err = -ENOMEM;
 5643			goto err_out;
 5644		}
 5645		continue;
 5646
 5647err_out:
 5648		close(fd);
 5649		return err;
 5650	}
 5651
 5652	return 0;
 5653}
 5654
 5655static struct bpf_core_cand_list *
 5656bpf_core_find_cands(struct bpf_object *obj, const struct btf *local_btf, __u32 local_type_id)
 5657{
 5658	struct bpf_core_cand local_cand = {};
 5659	struct bpf_core_cand_list *cands;
 5660	const struct btf *main_btf;
 5661	const struct btf_type *local_t;
 5662	const char *local_name;
 5663	size_t local_essent_len;
 5664	int err, i;
 5665
 5666	local_cand.btf = local_btf;
 5667	local_cand.id = local_type_id;
 5668	local_t = btf__type_by_id(local_btf, local_type_id);
 5669	if (!local_t)
 5670		return ERR_PTR(-EINVAL);
 5671
 5672	local_name = btf__name_by_offset(local_btf, local_t->name_off);
 5673	if (str_is_empty(local_name))
 5674		return ERR_PTR(-EINVAL);
 5675	local_essent_len = bpf_core_essential_name_len(local_name);
 5676
 5677	cands = calloc(1, sizeof(*cands));
 5678	if (!cands)
 5679		return ERR_PTR(-ENOMEM);
 5680
 5681	/* Attempt to find target candidates in vmlinux BTF first */
 5682	main_btf = obj->btf_vmlinux_override ?: obj->btf_vmlinux;
 5683	err = bpf_core_add_cands(&local_cand, local_essent_len, main_btf, "vmlinux", 1, cands);
 5684	if (err)
 5685		goto err_out;
 5686
 5687	/* if vmlinux BTF has any candidate, don't got for module BTFs */
 5688	if (cands->len)
 5689		return cands;
 5690
 5691	/* if vmlinux BTF was overridden, don't attempt to load module BTFs */
 5692	if (obj->btf_vmlinux_override)
 5693		return cands;
 5694
 5695	/* now look through module BTFs, trying to still find candidates */
 5696	err = load_module_btfs(obj);
 5697	if (err)
 5698		goto err_out;
 5699
 5700	for (i = 0; i < obj->btf_module_cnt; i++) {
 5701		err = bpf_core_add_cands(&local_cand, local_essent_len,
 5702					 obj->btf_modules[i].btf,
 5703					 obj->btf_modules[i].name,
 5704					 btf__type_cnt(obj->btf_vmlinux),
 5705					 cands);
 5706		if (err)
 5707			goto err_out;
 5708	}
 5709
 5710	return cands;
 5711err_out:
 5712	bpf_core_free_cands(cands);
 5713	return ERR_PTR(err);
 5714}
 5715
 5716/* Check local and target types for compatibility. This check is used for
 5717 * type-based CO-RE relocations and follow slightly different rules than
 5718 * field-based relocations. This function assumes that root types were already
 5719 * checked for name match. Beyond that initial root-level name check, names
 5720 * are completely ignored. Compatibility rules are as follows:
 5721 *   - any two STRUCTs/UNIONs/FWDs/ENUMs/INTs are considered compatible, but
 5722 *     kind should match for local and target types (i.e., STRUCT is not
 5723 *     compatible with UNION);
 5724 *   - for ENUMs, the size is ignored;
 5725 *   - for INT, size and signedness are ignored;
 5726 *   - for ARRAY, dimensionality is ignored, element types are checked for
 5727 *     compatibility recursively;
 5728 *   - CONST/VOLATILE/RESTRICT modifiers are ignored;
 5729 *   - TYPEDEFs/PTRs are compatible if types they pointing to are compatible;
 5730 *   - FUNC_PROTOs are compatible if they have compatible signature: same
 5731 *     number of input args and compatible return and argument types.
 5732 * These rules are not set in stone and probably will be adjusted as we get
 5733 * more experience with using BPF CO-RE relocations.
 5734 */
 5735int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
 5736			      const struct btf *targ_btf, __u32 targ_id)
 5737{
 5738	return __bpf_core_types_are_compat(local_btf, local_id, targ_btf, targ_id, 32);
 5739}
 5740
 5741int bpf_core_types_match(const struct btf *local_btf, __u32 local_id,
 5742			 const struct btf *targ_btf, __u32 targ_id)
 5743{
 5744	return __bpf_core_types_match(local_btf, local_id, targ_btf, targ_id, false, 32);
 5745}
 5746
 5747static size_t bpf_core_hash_fn(const long key, void *ctx)
 5748{
 5749	return key;
 5750}
 5751
 5752static bool bpf_core_equal_fn(const long k1, const long k2, void *ctx)
 5753{
 5754	return k1 == k2;
 5755}
 5756
 5757static int record_relo_core(struct bpf_program *prog,
 5758			    const struct bpf_core_relo *core_relo, int insn_idx)
 5759{
 5760	struct reloc_desc *relos, *relo;
 5761
 5762	relos = libbpf_reallocarray(prog->reloc_desc,
 5763				    prog->nr_reloc + 1, sizeof(*relos));
 5764	if (!relos)
 5765		return -ENOMEM;
 5766	relo = &relos[prog->nr_reloc];
 5767	relo->type = RELO_CORE;
 5768	relo->insn_idx = insn_idx;
 5769	relo->core_relo = core_relo;
 5770	prog->reloc_desc = relos;
 5771	prog->nr_reloc++;
 5772	return 0;
 5773}
 5774
 5775static const struct bpf_core_relo *find_relo_core(struct bpf_program *prog, int insn_idx)
 5776{
 5777	struct reloc_desc *relo;
 5778	int i;
 5779
 5780	for (i = 0; i < prog->nr_reloc; i++) {
 5781		relo = &prog->reloc_desc[i];
 5782		if (relo->type != RELO_CORE || relo->insn_idx != insn_idx)
 5783			continue;
 5784
 5785		return relo->core_relo;
 5786	}
 5787
 5788	return NULL;
 5789}
 5790
 5791static int bpf_core_resolve_relo(struct bpf_program *prog,
 5792				 const struct bpf_core_relo *relo,
 5793				 int relo_idx,
 5794				 const struct btf *local_btf,
 5795				 struct hashmap *cand_cache,
 5796				 struct bpf_core_relo_res *targ_res)
 5797{
 5798	struct bpf_core_spec specs_scratch[3] = {};
 5799	struct bpf_core_cand_list *cands = NULL;
 5800	const char *prog_name = prog->name;
 5801	const struct btf_type *local_type;
 5802	const char *local_name;
 5803	__u32 local_id = relo->type_id;
 5804	int err;
 5805
 5806	local_type = btf__type_by_id(local_btf, local_id);
 5807	if (!local_type)
 5808		return -EINVAL;
 5809
 5810	local_name = btf__name_by_offset(local_btf, local_type->name_off);
 5811	if (!local_name)
 5812		return -EINVAL;
 5813
 5814	if (relo->kind != BPF_CORE_TYPE_ID_LOCAL &&
 5815	    !hashmap__find(cand_cache, local_id, &cands)) {
 5816		cands = bpf_core_find_cands(prog->obj, local_btf, local_id);
 5817		if (IS_ERR(cands)) {
 5818			pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s %s: %ld\n",
 5819				prog_name, relo_idx, local_id, btf_kind_str(local_type),
 5820				local_name, PTR_ERR(cands));
 5821			return PTR_ERR(cands);
 5822		}
 5823		err = hashmap__set(cand_cache, local_id, cands, NULL, NULL);
 5824		if (err) {
 5825			bpf_core_free_cands(cands);
 5826			return err;
 5827		}
 5828	}
 5829
 5830	return bpf_core_calc_relo_insn(prog_name, relo, relo_idx, local_btf, cands, specs_scratch,
 5831				       targ_res);
 5832}
 5833
 5834static int
 5835bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path)
 5836{
 5837	const struct btf_ext_info_sec *sec;
 5838	struct bpf_core_relo_res targ_res;
 5839	const struct bpf_core_relo *rec;
 5840	const struct btf_ext_info *seg;
 5841	struct hashmap_entry *entry;
 5842	struct hashmap *cand_cache = NULL;
 5843	struct bpf_program *prog;
 5844	struct bpf_insn *insn;
 5845	const char *sec_name;
 5846	int i, err = 0, insn_idx, sec_idx, sec_num;
 5847
 5848	if (obj->btf_ext->core_relo_info.len == 0)
 5849		return 0;
 5850
 5851	if (targ_btf_path) {
 5852		obj->btf_vmlinux_override = btf__parse(targ_btf_path, NULL);
 5853		err = libbpf_get_error(obj->btf_vmlinux_override);
 5854		if (err) {
 5855			pr_warn("failed to parse target BTF: %d\n", err);
 5856			return err;
 5857		}
 5858	}
 5859
 5860	cand_cache = hashmap__new(bpf_core_hash_fn, bpf_core_equal_fn, NULL);
 5861	if (IS_ERR(cand_cache)) {
 5862		err = PTR_ERR(cand_cache);
 5863		goto out;
 5864	}
 5865
 5866	seg = &obj->btf_ext->core_relo_info;
 5867	sec_num = 0;
 5868	for_each_btf_ext_sec(seg, sec) {
 5869		sec_idx = seg->sec_idxs[sec_num];
 5870		sec_num++;
 5871
 5872		sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off);
 5873		if (str_is_empty(sec_name)) {
 5874			err = -EINVAL;
 5875			goto out;
 5876		}
 5877
 5878		pr_debug("sec '%s': found %d CO-RE relocations\n", sec_name, sec->num_info);
 5879
 5880		for_each_btf_ext_rec(seg, sec, i, rec) {
 5881			if (rec->insn_off % BPF_INSN_SZ)
 5882				return -EINVAL;
 5883			insn_idx = rec->insn_off / BPF_INSN_SZ;
 5884			prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx);
 5885			if (!prog) {
 5886				/* When __weak subprog is "overridden" by another instance
 5887				 * of the subprog from a different object file, linker still
 5888				 * appends all the .BTF.ext info that used to belong to that
 5889				 * eliminated subprogram.
 5890				 * This is similar to what x86-64 linker does for relocations.
 5891				 * So just ignore such relocations just like we ignore
 5892				 * subprog instructions when discovering subprograms.
 5893				 */
 5894				pr_debug("sec '%s': skipping CO-RE relocation #%d for insn #%d belonging to eliminated weak subprogram\n",
 5895					 sec_name, i, insn_idx);
 5896				continue;
 5897			}
 5898			/* no need to apply CO-RE relocation if the program is
 5899			 * not going to be loaded
 5900			 */
 5901			if (!prog->autoload)
 5902				continue;
 5903
 5904			/* adjust insn_idx from section frame of reference to the local
 5905			 * program's frame of reference; (sub-)program code is not yet
 5906			 * relocated, so it's enough to just subtract in-section offset
 5907			 */
 5908			insn_idx = insn_idx - prog->sec_insn_off;
 5909			if (insn_idx >= prog->insns_cnt)
 5910				return -EINVAL;
 5911			insn = &prog->insns[insn_idx];
 5912
 5913			err = record_relo_core(prog, rec, insn_idx);
 5914			if (err) {
 5915				pr_warn("prog '%s': relo #%d: failed to record relocation: %d\n",
 5916					prog->name, i, err);
 5917				goto out;
 5918			}
 5919
 5920			if (prog->obj->gen_loader)
 5921				continue;
 5922
 5923			err = bpf_core_resolve_relo(prog, rec, i, obj->btf, cand_cache, &targ_res);
 5924			if (err) {
 5925				pr_warn("prog '%s': relo #%d: failed to relocate: %d\n",
 5926					prog->name, i, err);
 5927				goto out;
 5928			}
 5929
 5930			err = bpf_core_patch_insn(prog->name, insn, insn_idx, rec, i, &targ_res);
 5931			if (err) {
 5932				pr_warn("prog '%s': relo #%d: failed to patch insn #%u: %d\n",
 5933					prog->name, i, insn_idx, err);
 5934				goto out;
 5935			}
 5936		}
 5937	}
 5938
 5939out:
 5940	/* obj->btf_vmlinux and module BTFs are freed after object load */
 5941	btf__free(obj->btf_vmlinux_override);
 5942	obj->btf_vmlinux_override = NULL;
 5943
 5944	if (!IS_ERR_OR_NULL(cand_cache)) {
 5945		hashmap__for_each_entry(cand_cache, entry, i) {
 5946			bpf_core_free_cands(entry->pvalue);
 5947		}
 5948		hashmap__free(cand_cache);
 5949	}
 5950	return err;
 5951}
 5952
 5953/* base map load ldimm64 special constant, used also for log fixup logic */
 5954#define POISON_LDIMM64_MAP_BASE 2001000000
 5955#define POISON_LDIMM64_MAP_PFX "200100"
 5956
 5957static void poison_map_ldimm64(struct bpf_program *prog, int relo_idx,
 5958			       int insn_idx, struct bpf_insn *insn,
 5959			       int map_idx, const struct bpf_map *map)
 5960{
 5961	int i;
 5962
 5963	pr_debug("prog '%s': relo #%d: poisoning insn #%d that loads map #%d '%s'\n",
 5964		 prog->name, relo_idx, insn_idx, map_idx, map->name);
 5965
 5966	/* we turn single ldimm64 into two identical invalid calls */
 5967	for (i = 0; i < 2; i++) {
 5968		insn->code = BPF_JMP | BPF_CALL;
 5969		insn->dst_reg = 0;
 5970		insn->src_reg = 0;
 5971		insn->off = 0;
 5972		/* if this instruction is reachable (not a dead code),
 5973		 * verifier will complain with something like:
 5974		 * invalid func unknown#2001000123
 5975		 * where lower 123 is map index into obj->maps[] array
 5976		 */
 5977		insn->imm = POISON_LDIMM64_MAP_BASE + map_idx;
 5978
 5979		insn++;
 5980	}
 5981}
 5982
 5983/* unresolved kfunc call special constant, used also for log fixup logic */
 5984#define POISON_CALL_KFUNC_BASE 2002000000
 5985#define POISON_CALL_KFUNC_PFX "2002"
 5986
 5987static void poison_kfunc_call(struct bpf_program *prog, int relo_idx,
 5988			      int insn_idx, struct bpf_insn *insn,
 5989			      int ext_idx, const struct extern_desc *ext)
 5990{
 5991	pr_debug("prog '%s': relo #%d: poisoning insn #%d that calls kfunc '%s'\n",
 5992		 prog->name, relo_idx, insn_idx, ext->name);
 5993
 5994	/* we turn kfunc call into invalid helper call with identifiable constant */
 5995	insn->code = BPF_JMP | BPF_CALL;
 5996	insn->dst_reg = 0;
 5997	insn->src_reg = 0;
 5998	insn->off = 0;
 5999	/* if this instruction is reachable (not a dead code),
 6000	 * verifier will complain with something like:
 6001	 * invalid func unknown#2001000123
 6002	 * where lower 123 is extern index into obj->externs[] array
 6003	 */
 6004	insn->imm = POISON_CALL_KFUNC_BASE + ext_idx;
 6005}
 6006
 6007/* Relocate data references within program code:
 6008 *  - map references;
 6009 *  - global variable references;
 6010 *  - extern references.
 6011 */
 6012static int
 6013bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog)
 6014{
 6015	int i;
 6016
 6017	for (i = 0; i < prog->nr_reloc; i++) {
 6018		struct reloc_desc *relo = &prog->reloc_desc[i];
 6019		struct bpf_insn *insn = &prog->insns[relo->insn_idx];
 6020		const struct bpf_map *map;
 6021		struct extern_desc *ext;
 6022
 6023		switch (relo->type) {
 6024		case RELO_LD64:
 6025			map = &obj->maps[relo->map_idx];
 6026			if (obj->gen_loader) {
 6027				insn[0].src_reg = BPF_PSEUDO_MAP_IDX;
 6028				insn[0].imm = relo->map_idx;
 6029			} else if (map->autocreate) {
 6030				insn[0].src_reg = BPF_PSEUDO_MAP_FD;
 6031				insn[0].imm = map->fd;
 6032			} else {
 6033				poison_map_ldimm64(prog, i, relo->insn_idx, insn,
 6034						   relo->map_idx, map);
 6035			}
 6036			break;
 6037		case RELO_DATA:
 6038			map = &obj->maps[relo->map_idx];
 6039			insn[1].imm = insn[0].imm + relo->sym_off;
 6040			if (obj->gen_loader) {
 6041				insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE;
 6042				insn[0].imm = relo->map_idx;
 6043			} else if (map->autocreate) {
 6044				insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
 6045				insn[0].imm = map->fd;
 6046			} else {
 6047				poison_map_ldimm64(prog, i, relo->insn_idx, insn,
 6048						   relo->map_idx, map);
 6049			}
 6050			break;
 6051		case RELO_EXTERN_LD64:
 6052			ext = &obj->externs[relo->ext_idx];
 6053			if (ext->type == EXT_KCFG) {
 6054				if (obj->gen_loader) {
 6055					insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE;
 6056					insn[0].imm = obj->kconfig_map_idx;
 6057				} else {
 6058					insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
 6059					insn[0].imm = obj->maps[obj->kconfig_map_idx].fd;
 6060				}
 6061				insn[1].imm = ext->kcfg.data_off;
 6062			} else /* EXT_KSYM */ {
 6063				if (ext->ksym.type_id && ext->is_set) { /* typed ksyms */
 6064					insn[0].src_reg = BPF_PSEUDO_BTF_ID;
 6065					insn[0].imm = ext->ksym.kernel_btf_id;
 6066					insn[1].imm = ext->ksym.kernel_btf_obj_fd;
 6067				} else { /* typeless ksyms or unresolved typed ksyms */
 6068					insn[0].imm = (__u32)ext->ksym.addr;
 6069					insn[1].imm = ext->ksym.addr >> 32;
 6070				}
 6071			}
 6072			break;
 6073		case RELO_EXTERN_CALL:
 6074			ext = &obj->externs[relo->ext_idx];
 6075			insn[0].src_reg = BPF_PSEUDO_KFUNC_CALL;
 6076			if (ext->is_set) {
 6077				insn[0].imm = ext->ksym.kernel_btf_id;
 6078				insn[0].off = ext->ksym.btf_fd_idx;
 6079			} else { /* unresolved weak kfunc call */
 6080				poison_kfunc_call(prog, i, relo->insn_idx, insn,
 6081						  relo->ext_idx, ext);
 6082			}
 6083			break;
 6084		case RELO_SUBPROG_ADDR:
 6085			if (insn[0].src_reg != BPF_PSEUDO_FUNC) {
 6086				pr_warn("prog '%s': relo #%d: bad insn\n",
 6087					prog->name, i);
 6088				return -EINVAL;
 6089			}
 6090			/* handled already */
 6091			break;
 6092		case RELO_CALL:
 6093			/* handled already */
 6094			break;
 6095		case RELO_CORE:
 6096			/* will be handled by bpf_program_record_relos() */
 6097			break;
 6098		default:
 6099			pr_warn("prog '%s': relo #%d: bad relo type %d\n",
 6100				prog->name, i, relo->type);
 6101			return -EINVAL;
 6102		}
 6103	}
 6104
 6105	return 0;
 6106}
 6107
 6108static int adjust_prog_btf_ext_info(const struct bpf_object *obj,
 6109				    const struct bpf_program *prog,
 6110				    const struct btf_ext_info *ext_info,
 6111				    void **prog_info, __u32 *prog_rec_cnt,
 6112				    __u32 *prog_rec_sz)
 6113{
 6114	void *copy_start = NULL, *copy_end = NULL;
 6115	void *rec, *rec_end, *new_prog_info;
 6116	const struct btf_ext_info_sec *sec;
 6117	size_t old_sz, new_sz;
 6118	int i, sec_num, sec_idx, off_adj;
 6119
 6120	sec_num = 0;
 6121	for_each_btf_ext_sec(ext_info, sec) {
 6122		sec_idx = ext_info->sec_idxs[sec_num];
 6123		sec_num++;
 6124		if (prog->sec_idx != sec_idx)
 6125			continue;
 6126
 6127		for_each_btf_ext_rec(ext_info, sec, i, rec) {
 6128			__u32 insn_off = *(__u32 *)rec / BPF_INSN_SZ;
 6129
 6130			if (insn_off < prog->sec_insn_off)
 6131				continue;
 6132			if (insn_off >= prog->sec_insn_off + prog->sec_insn_cnt)
 6133				break;
 6134
 6135			if (!copy_start)
 6136				copy_start = rec;
 6137			copy_end = rec + ext_info->rec_size;
 6138		}
 6139
 6140		if (!copy_start)
 6141			return -ENOENT;
 6142
 6143		/* append func/line info of a given (sub-)program to the main
 6144		 * program func/line info
 6145		 */
 6146		old_sz = (size_t)(*prog_rec_cnt) * ext_info->rec_size;
 6147		new_sz = old_sz + (copy_end - copy_start);
 6148		new_prog_info = realloc(*prog_info, new_sz);
 6149		if (!new_prog_info)
 6150			return -ENOMEM;
 6151		*prog_info = new_prog_info;
 6152		*prog_rec_cnt = new_sz / ext_info->rec_size;
 6153		memcpy(new_prog_info + old_sz, copy_start, copy_end - copy_start);
 6154
 6155		/* Kernel instruction offsets are in units of 8-byte
 6156		 * instructions, while .BTF.ext instruction offsets generated
 6157		 * by Clang are in units of bytes. So convert Clang offsets
 6158		 * into kernel offsets and adjust offset according to program
 6159		 * relocated position.
 6160		 */
 6161		off_adj = prog->sub_insn_off - prog->sec_insn_off;
 6162		rec = new_prog_info + old_sz;
 6163		rec_end = new_prog_info + new_sz;
 6164		for (; rec < rec_end; rec += ext_info->rec_size) {
 6165			__u32 *insn_off = rec;
 6166
 6167			*insn_off = *insn_off / BPF_INSN_SZ + off_adj;
 6168		}
 6169		*prog_rec_sz = ext_info->rec_size;
 6170		return 0;
 6171	}
 6172
 6173	return -ENOENT;
 6174}
 6175
 6176static int
 6177reloc_prog_func_and_line_info(const struct bpf_object *obj,
 6178			      struct bpf_program *main_prog,
 6179			      const struct bpf_program *prog)
 6180{
 6181	int err;
 6182
 6183	/* no .BTF.ext relocation if .BTF.ext is missing or kernel doesn't
 6184	 * support func/line info
 6185	 */
 6186	if (!obj->btf_ext || !kernel_supports(obj, FEAT_BTF_FUNC))
 6187		return 0;
 6188
 6189	/* only attempt func info relocation if main program's func_info
 6190	 * relocation was successful
 6191	 */
 6192	if (main_prog != prog && !main_prog->func_info)
 6193		goto line_info;
 6194
 6195	err = adjust_prog_btf_ext_info(obj, prog, &obj->btf_ext->func_info,
 6196				       &main_prog->func_info,
 6197				       &main_prog->func_info_cnt,
 6198				       &main_prog->func_info_rec_size);
 6199	if (err) {
 6200		if (err != -ENOENT) {
 6201			pr_warn("prog '%s': error relocating .BTF.ext function info: %d\n",
 6202				prog->name, err);
 6203			return err;
 6204		}
 6205		if (main_prog->func_info) {
 6206			/*
 6207			 * Some info has already been found but has problem
 6208			 * in the last btf_ext reloc. Must have to error out.
 6209			 */
 6210			pr_warn("prog '%s': missing .BTF.ext function info.\n", prog->name);
 6211			return err;
 6212		}
 6213		/* Have problem loading the very first info. Ignore the rest. */
 6214		pr_warn("prog '%s': missing .BTF.ext function info for the main program, skipping all of .BTF.ext func info.\n",
 6215			prog->name);
 6216	}
 6217
 6218line_info:
 6219	/* don't relocate line info if main program's relocation failed */
 6220	if (main_prog != prog && !main_prog->line_info)
 6221		return 0;
 6222
 6223	err = adjust_prog_btf_ext_info(obj, prog, &obj->btf_ext->line_info,
 6224				       &main_prog->line_info,
 6225				       &main_prog->line_info_cnt,
 6226				       &main_prog->line_info_rec_size);
 6227	if (err) {
 6228		if (err != -ENOENT) {
 6229			pr_warn("prog '%s': error relocating .BTF.ext line info: %d\n",
 6230				prog->name, err);
 6231			return err;
 6232		}
 6233		if (main_prog->line_info) {
 6234			/*
 6235			 * Some info has already been found but has problem
 6236			 * in the last btf_ext reloc. Must have to error out.
 6237			 */
 6238			pr_warn("prog '%s': missing .BTF.ext line info.\n", prog->name);
 6239			return err;
 6240		}
 6241		/* Have problem loading the very first info. Ignore the rest. */
 6242		pr_warn("prog '%s': missing .BTF.ext line info for the main program, skipping all of .BTF.ext line info.\n",
 6243			prog->name);
 6244	}
 6245	return 0;
 6246}
 6247
 6248static int cmp_relo_by_insn_idx(const void *key, const void *elem)
 6249{
 6250	size_t insn_idx = *(const size_t *)key;
 6251	const struct reloc_desc *relo = elem;
 6252
 6253	if (insn_idx == relo->insn_idx)
 6254		return 0;
 6255	return insn_idx < relo->insn_idx ? -1 : 1;
 6256}
 6257
 6258static struct reloc_desc *find_prog_insn_relo(const struct bpf_program *prog, size_t insn_idx)
 6259{
 6260	if (!prog->nr_reloc)
 6261		return NULL;
 6262	return bsearch(&insn_idx, prog->reloc_desc, prog->nr_reloc,
 6263		       sizeof(*prog->reloc_desc), cmp_relo_by_insn_idx);
 6264}
 6265
 6266static int append_subprog_relos(struct bpf_program *main_prog, struct bpf_program *subprog)
 6267{
 6268	int new_cnt = main_prog->nr_reloc + subprog->nr_reloc;
 6269	struct reloc_desc *relos;
 6270	int i;
 6271
 6272	if (main_prog == subprog)
 6273		return 0;
 6274	relos = libbpf_reallocarray(main_prog->reloc_desc, new_cnt, sizeof(*relos));
 6275	/* if new count is zero, reallocarray can return a valid NULL result;
 6276	 * in this case the previous pointer will be freed, so we *have to*
 6277	 * reassign old pointer to the new value (even if it's NULL)
 6278	 */
 6279	if (!relos && new_cnt)
 6280		return -ENOMEM;
 6281	if (subprog->nr_reloc)
 6282		memcpy(relos + main_prog->nr_reloc, subprog->reloc_desc,
 6283		       sizeof(*relos) * subprog->nr_reloc);
 6284
 6285	for (i = main_prog->nr_reloc; i < new_cnt; i++)
 6286		relos[i].insn_idx += subprog->sub_insn_off;
 6287	/* After insn_idx adjustment the 'relos' array is still sorted
 6288	 * by insn_idx and doesn't break bsearch.
 6289	 */
 6290	main_prog->reloc_desc = relos;
 6291	main_prog->nr_reloc = new_cnt;
 6292	return 0;
 6293}
 6294
 6295static int
 6296bpf_object__append_subprog_code(struct bpf_object *obj, struct bpf_program *main_prog,
 6297				struct bpf_program *subprog)
 6298{
 6299       struct bpf_insn *insns;
 6300       size_t new_cnt;
 6301       int err;
 6302
 6303       subprog->sub_insn_off = main_prog->insns_cnt;
 6304
 6305       new_cnt = main_prog->insns_cnt + subprog->insns_cnt;
 6306       insns = libbpf_reallocarray(main_prog->insns, new_cnt, sizeof(*insns));
 6307       if (!insns) {
 6308               pr_warn("prog '%s': failed to realloc prog code\n", main_prog->name);
 6309               return -ENOMEM;
 6310       }
 6311       main_prog->insns = insns;
 6312       main_prog->insns_cnt = new_cnt;
 6313
 6314       memcpy(main_prog->insns + subprog->sub_insn_off, subprog->insns,
 6315              subprog->insns_cnt * sizeof(*insns));
 6316
 6317       pr_debug("prog '%s': added %zu insns from sub-prog '%s'\n",
 6318                main_prog->name, subprog->insns_cnt, subprog->name);
 6319
 6320       /* The subprog insns are now appended. Append its relos too. */
 6321       err = append_subprog_relos(main_prog, subprog);
 6322       if (err)
 6323               return err;
 6324       return 0;
 6325}
 6326
 6327static int
 6328bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog,
 6329		       struct bpf_program *prog)
 6330{
 6331	size_t sub_insn_idx, insn_idx;
 6332	struct bpf_program *subprog;
 6333	struct reloc_desc *relo;
 6334	struct bpf_insn *insn;
 6335	int err;
 6336
 6337	err = reloc_prog_func_and_line_info(obj, main_prog, prog);
 6338	if (err)
 6339		return err;
 6340
 6341	for (insn_idx = 0; insn_idx < prog->sec_insn_cnt; insn_idx++) {
 6342		insn = &main_prog->insns[prog->sub_insn_off + insn_idx];
 6343		if (!insn_is_subprog_call(insn) && !insn_is_pseudo_func(insn))
 6344			continue;
 6345
 6346		relo = find_prog_insn_relo(prog, insn_idx);
 6347		if (relo && relo->type == RELO_EXTERN_CALL)
 6348			/* kfunc relocations will be handled later
 6349			 * in bpf_object__relocate_data()
 6350			 */
 6351			continue;
 6352		if (relo && relo->type != RELO_CALL && relo->type != RELO_SUBPROG_ADDR) {
 6353			pr_warn("prog '%s': unexpected relo for insn #%zu, type %d\n",
 6354				prog->name, insn_idx, relo->type);
 6355			return -LIBBPF_ERRNO__RELOC;
 6356		}
 6357		if (relo) {
 6358			/* sub-program instruction index is a combination of
 6359			 * an offset of a symbol pointed to by relocation and
 6360			 * call instruction's imm field; for global functions,
 6361			 * call always has imm = -1, but for static functions
 6362			 * relocation is against STT_SECTION and insn->imm
 6363			 * points to a start of a static function
 6364			 *
 6365			 * for subprog addr relocation, the relo->sym_off + insn->imm is
 6366			 * the byte offset in the corresponding section.
 6367			 */
 6368			if (relo->type == RELO_CALL)
 6369				sub_insn_idx = relo->sym_off / BPF_INSN_SZ + insn->imm + 1;
 6370			else
 6371				sub_insn_idx = (relo->sym_off + insn->imm) / BPF_INSN_SZ;
 6372		} else if (insn_is_pseudo_func(insn)) {
 6373			/*
 6374			 * RELO_SUBPROG_ADDR relo is always emitted even if both
 6375			 * functions are in the same section, so it shouldn't reach here.
 6376			 */
 6377			pr_warn("prog '%s': missing subprog addr relo for insn #%zu\n",
 6378				prog->name, insn_idx);
 6379			return -LIBBPF_ERRNO__RELOC;
 6380		} else {
 6381			/* if subprogram call is to a static function within
 6382			 * the same ELF section, there won't be any relocation
 6383			 * emitted, but it also means there is no additional
 6384			 * offset necessary, insns->imm is relative to
 6385			 * instruction's original position within the section
 6386			 */
 6387			sub_insn_idx = prog->sec_insn_off + insn_idx + insn->imm + 1;
 6388		}
 6389
 6390		/* we enforce that sub-programs should be in .text section */
 6391		subprog = find_prog_by_sec_insn(obj, obj->efile.text_shndx, sub_insn_idx);
 6392		if (!subprog) {
 6393			pr_warn("prog '%s': no .text section found yet sub-program call exists\n",
 6394				prog->name);
 6395			return -LIBBPF_ERRNO__RELOC;
 6396		}
 6397
 6398		/* if it's the first call instruction calling into this
 6399		 * subprogram (meaning this subprog hasn't been processed
 6400		 * yet) within the context of current main program:
 6401		 *   - append it at the end of main program's instructions blog;
 6402		 *   - process is recursively, while current program is put on hold;
 6403		 *   - if that subprogram calls some other not yet processes
 6404		 *   subprogram, same thing will happen recursively until
 6405		 *   there are no more unprocesses subprograms left to append
 6406		 *   and relocate.
 6407		 */
 6408		if (subprog->sub_insn_off == 0) {
 6409			err = bpf_object__append_subprog_code(obj, main_prog, subprog);
 6410			if (err)
 6411				return err;
 6412			err = bpf_object__reloc_code(obj, main_prog, subprog);
 6413			if (err)
 6414				return err;
 6415		}
 6416
 6417		/* main_prog->insns memory could have been re-allocated, so
 6418		 * calculate pointer again
 6419		 */
 6420		insn = &main_prog->insns[prog->sub_insn_off + insn_idx];
 6421		/* calculate correct instruction position within current main
 6422		 * prog; each main prog can have a different set of
 6423		 * subprograms appended (potentially in different order as
 6424		 * well), so position of any subprog can be different for
 6425		 * different main programs
 6426		 */
 6427		insn->imm = subprog->sub_insn_off - (prog->sub_insn_off + insn_idx) - 1;
 6428
 6429		pr_debug("prog '%s': insn #%zu relocated, imm %d points to subprog '%s' (now at %zu offset)\n",
 6430			 prog->name, insn_idx, insn->imm, subprog->name, subprog->sub_insn_off);
 6431	}
 6432
 6433	return 0;
 6434}
 6435
 6436/*
 6437 * Relocate sub-program calls.
 6438 *
 6439 * Algorithm operates as follows. Each entry-point BPF program (referred to as
 6440 * main prog) is processed separately. For each subprog (non-entry functions,
 6441 * that can be called from either entry progs or other subprogs) gets their
 6442 * sub_insn_off reset to zero. This serves as indicator that this subprogram
 6443 * hasn't been yet appended and relocated within current main prog. Once its
 6444 * relocated, sub_insn_off will point at the position within current main prog
 6445 * where given subprog was appended. This will further be used to relocate all
 6446 * the call instructions jumping into this subprog.
 6447 *
 6448 * We start with main program and process all call instructions. If the call
 6449 * is into a subprog that hasn't been processed (i.e., subprog->sub_insn_off
 6450 * is zero), subprog instructions are appended at the end of main program's
 6451 * instruction array. Then main program is "put on hold" while we recursively
 6452 * process newly appended subprogram. If that subprogram calls into another
 6453 * subprogram that hasn't been appended, new subprogram is appended again to
 6454 * the *main* prog's instructions (subprog's instructions are always left
 6455 * untouched, as they need to be in unmodified state for subsequent main progs
 6456 * and subprog instructions are always sent only as part of a main prog) and
 6457 * the process continues recursively. Once all the subprogs called from a main
 6458 * prog or any of its subprogs are appended (and relocated), all their
 6459 * positions within finalized instructions array are known, so it's easy to
 6460 * rewrite call instructions with correct relative offsets, corresponding to
 6461 * desired target subprog.
 6462 *
 6463 * Its important to realize that some subprogs might not be called from some
 6464 * main prog and any of its called/used subprogs. Those will keep their
 6465 * subprog->sub_insn_off as zero at all times and won't be appended to current
 6466 * main prog and won't be relocated within the context of current main prog.
 6467 * They might still be used from other main progs later.
 6468 *
 6469 * Visually this process can be shown as below. Suppose we have two main
 6470 * programs mainA and mainB and BPF object contains three subprogs: subA,
 6471 * subB, and subC. mainA calls only subA, mainB calls only subC, but subA and
 6472 * subC both call subB:
 6473 *
 6474 *        +--------+ +-------+
 6475 *        |        v v       |
 6476 *     +--+---+ +--+-+-+ +---+--+
 6477 *     | subA | | subB | | subC |
 6478 *     +--+---+ +------+ +---+--+
 6479 *        ^                  ^
 6480 *        |                  |
 6481 *    +---+-------+   +------+----+
 6482 *    |   mainA   |   |   mainB   |
 6483 *    +-----------+   +-----------+
 6484 *
 6485 * We'll start relocating mainA, will find subA, append it and start
 6486 * processing sub A recursively:
 6487 *
 6488 *    +-----------+------+
 6489 *    |   mainA   | subA |
 6490 *    +-----------+------+
 6491 *
 6492 * At this point we notice that subB is used from subA, so we append it and
 6493 * relocate (there are no further subcalls from subB):
 6494 *
 6495 *    +-----------+------+------+
 6496 *    |   mainA   | subA | subB |
 6497 *    +-----------+------+------+
 6498 *
 6499 * At this point, we relocate subA calls, then go one level up and finish with
 6500 * relocatin mainA calls. mainA is done.
 6501 *
 6502 * For mainB process is similar but results in different order. We start with
 6503 * mainB and skip subA and subB, as mainB never calls them (at least
 6504 * directly), but we see subC is needed, so we append and start processing it:
 6505 *
 6506 *    +-----------+------+
 6507 *    |   mainB   | subC |
 6508 *    +-----------+------+
 6509 * Now we see subC needs subB, so we go back to it, append and relocate it:
 6510 *
 6511 *    +-----------+------+------+
 6512 *    |   mainB   | subC | subB |
 6513 *    +-----------+------+------+
 6514 *
 6515 * At this point we unwind recursion, relocate calls in subC, then in mainB.
 6516 */
 6517static int
 6518bpf_object__relocate_calls(struct bpf_object *obj, struct bpf_program *prog)
 6519{
 6520	struct bpf_program *subprog;
 6521	int i, err;
 6522
 6523	/* mark all subprogs as not relocated (yet) within the context of
 6524	 * current main program
 6525	 */
 6526	for (i = 0; i < obj->nr_programs; i++) {
 6527		subprog = &obj->programs[i];
 6528		if (!prog_is_subprog(obj, subprog))
 6529			continue;
 6530
 6531		subprog->sub_insn_off = 0;
 6532	}
 6533
 6534	err = bpf_object__reloc_code(obj, prog, prog);
 6535	if (err)
 6536		return err;
 6537
 6538	return 0;
 6539}
 6540
 6541static void
 6542bpf_object__free_relocs(struct bpf_object *obj)
 6543{
 6544	struct bpf_program *prog;
 6545	int i;
 6546
 6547	/* free up relocation descriptors */
 6548	for (i = 0; i < obj->nr_programs; i++) {
 6549		prog = &obj->programs[i];
 6550		zfree(&prog->reloc_desc);
 6551		prog->nr_reloc = 0;
 6552	}
 6553}
 6554
 6555static int cmp_relocs(const void *_a, const void *_b)
 6556{
 6557	const struct reloc_desc *a = _a;
 6558	const struct reloc_desc *b = _b;
 6559
 6560	if (a->insn_idx != b->insn_idx)
 6561		return a->insn_idx < b->insn_idx ? -1 : 1;
 6562
 6563	/* no two relocations should have the same insn_idx, but ... */
 6564	if (a->type != b->type)
 6565		return a->type < b->type ? -1 : 1;
 6566
 6567	return 0;
 6568}
 6569
 6570static void bpf_object__sort_relos(struct bpf_object *obj)
 6571{
 6572	int i;
 6573
 6574	for (i = 0; i < obj->nr_programs; i++) {
 6575		struct bpf_program *p = &obj->programs[i];
 6576
 6577		if (!p->nr_reloc)
 6578			continue;
 6579
 6580		qsort(p->reloc_desc, p->nr_reloc, sizeof(*p->reloc_desc), cmp_relocs);
 6581	}
 6582}
 6583
 6584static int bpf_prog_assign_exc_cb(struct bpf_object *obj, struct bpf_program *prog)
 6585{
 6586	const char *str = "exception_callback:";
 6587	size_t pfx_len = strlen(str);
 6588	int i, j, n;
 6589
 6590	if (!obj->btf || !kernel_supports(obj, FEAT_BTF_DECL_TAG))
 6591		return 0;
 6592
 6593	n = btf__type_cnt(obj->btf);
 6594	for (i = 1; i < n; i++) {
 6595		const char *name;
 6596		struct btf_type *t;
 6597
 6598		t = btf_type_by_id(obj->btf, i);
 6599		if (!btf_is_decl_tag(t) || btf_decl_tag(t)->component_idx != -1)
 6600			continue;
 6601
 6602		name = btf__str_by_offset(obj->btf, t->name_off);
 6603		if (strncmp(name, str, pfx_len) != 0)
 6604			continue;
 6605
 6606		t = btf_type_by_id(obj->btf, t->type);
 6607		if (!btf_is_func(t) || btf_func_linkage(t) != BTF_FUNC_GLOBAL) {
 6608			pr_warn("prog '%s': exception_callback:<value> decl tag not applied to the main program\n",
 6609				prog->name);
 6610			return -EINVAL;
 6611		}
 6612		if (strcmp(prog->name, btf__str_by_offset(obj->btf, t->name_off)) != 0)
 6613			continue;
 6614		/* Multiple callbacks are specified for the same prog,
 6615		 * the verifier will eventually return an error for this
 6616		 * case, hence simply skip appending a subprog.
 6617		 */
 6618		if (prog->exception_cb_idx >= 0) {
 6619			prog->exception_cb_idx = -1;
 6620			break;
 6621		}
 6622
 6623		name += pfx_len;
 6624		if (str_is_empty(name)) {
 6625			pr_warn("prog '%s': exception_callback:<value> decl tag contains empty value\n",
 6626				prog->name);
 6627			return -EINVAL;
 6628		}
 6629
 6630		for (j = 0; j < obj->nr_programs; j++) {
 6631			struct bpf_program *subprog = &obj->programs[j];
 6632
 6633			if (!prog_is_subprog(obj, subprog))
 6634				continue;
 6635			if (strcmp(name, subprog->name) != 0)
 6636				continue;
 6637			/* Enforce non-hidden, as from verifier point of
 6638			 * view it expects global functions, whereas the
 6639			 * mark_btf_static fixes up linkage as static.
 6640			 */
 6641			if (!subprog->sym_global || subprog->mark_btf_static) {
 6642				pr_warn("prog '%s': exception callback %s must be a global non-hidden function\n",
 6643					prog->name, subprog->name);
 6644				return -EINVAL;
 6645			}
 6646			/* Let's see if we already saw a static exception callback with the same name */
 6647			if (prog->exception_cb_idx >= 0) {
 6648				pr_warn("prog '%s': multiple subprogs with same name as exception callback '%s'\n",
 6649					prog->name, subprog->name);
 6650				return -EINVAL;
 6651			}
 6652			prog->exception_cb_idx = j;
 6653			break;
 6654		}
 6655
 6656		if (prog->exception_cb_idx >= 0)
 6657			continue;
 6658
 6659		pr_warn("prog '%s': cannot find exception callback '%s'\n", prog->name, name);
 6660		return -ENOENT;
 6661	}
 6662
 6663	return 0;
 6664}
 6665
 6666static struct {
 6667	enum bpf_prog_type prog_type;
 6668	const char *ctx_name;
 6669} global_ctx_map[] = {
 6670	{ BPF_PROG_TYPE_CGROUP_DEVICE,           "bpf_cgroup_dev_ctx" },
 6671	{ BPF_PROG_TYPE_CGROUP_SKB,              "__sk_buff" },
 6672	{ BPF_PROG_TYPE_CGROUP_SOCK,             "bpf_sock" },
 6673	{ BPF_PROG_TYPE_CGROUP_SOCK_ADDR,        "bpf_sock_addr" },
 6674	{ BPF_PROG_TYPE_CGROUP_SOCKOPT,          "bpf_sockopt" },
 6675	{ BPF_PROG_TYPE_CGROUP_SYSCTL,           "bpf_sysctl" },
 6676	{ BPF_PROG_TYPE_FLOW_DISSECTOR,          "__sk_buff" },
 6677	{ BPF_PROG_TYPE_KPROBE,                  "bpf_user_pt_regs_t" },
 6678	{ BPF_PROG_TYPE_LWT_IN,                  "__sk_buff" },
 6679	{ BPF_PROG_TYPE_LWT_OUT,                 "__sk_buff" },
 6680	{ BPF_PROG_TYPE_LWT_SEG6LOCAL,           "__sk_buff" },
 6681	{ BPF_PROG_TYPE_LWT_XMIT,                "__sk_buff" },
 6682	{ BPF_PROG_TYPE_NETFILTER,               "bpf_nf_ctx" },
 6683	{ BPF_PROG_TYPE_PERF_EVENT,              "bpf_perf_event_data" },
 6684	{ BPF_PROG_TYPE_RAW_TRACEPOINT,          "bpf_raw_tracepoint_args" },
 6685	{ BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, "bpf_raw_tracepoint_args" },
 6686	{ BPF_PROG_TYPE_SCHED_ACT,               "__sk_buff" },
 6687	{ BPF_PROG_TYPE_SCHED_CLS,               "__sk_buff" },
 6688	{ BPF_PROG_TYPE_SK_LOOKUP,               "bpf_sk_lookup" },
 6689	{ BPF_PROG_TYPE_SK_MSG,                  "sk_msg_md" },
 6690	{ BPF_PROG_TYPE_SK_REUSEPORT,            "sk_reuseport_md" },
 6691	{ BPF_PROG_TYPE_SK_SKB,                  "__sk_buff" },
 6692	{ BPF_PROG_TYPE_SOCK_OPS,                "bpf_sock_ops" },
 6693	{ BPF_PROG_TYPE_SOCKET_FILTER,           "__sk_buff" },
 6694	{ BPF_PROG_TYPE_XDP,                     "xdp_md" },
 6695	/* all other program types don't have "named" context structs */
 6696};
 6697
 6698static bool need_func_arg_type_fixup(const struct btf *btf, const struct bpf_program *prog,
 6699				     const char *subprog_name, int arg_idx,
 6700				     int arg_type_id, const char *ctx_name)
 6701{
 6702	const struct btf_type *t;
 6703	const char *tname;
 6704
 6705	/* check if existing parameter already matches verifier expectations */
 6706	t = skip_mods_and_typedefs(btf, arg_type_id, NULL);
 6707	if (!btf_is_ptr(t))
 6708		goto out_warn;
 6709
 6710	/* typedef bpf_user_pt_regs_t is a special PITA case, valid for kprobe
 6711	 * and perf_event programs, so check this case early on and forget
 6712	 * about it for subsequent checks
 6713	 */
 6714	while (btf_is_mod(t))
 6715		t = btf__type_by_id(btf, t->type);
 6716	if (btf_is_typedef(t) &&
 6717	    (prog->type == BPF_PROG_TYPE_KPROBE || prog->type == BPF_PROG_TYPE_PERF_EVENT)) {
 6718		tname = btf__str_by_offset(btf, t->name_off) ?: "<anon>";
 6719		if (strcmp(tname, "bpf_user_pt_regs_t") == 0)
 6720			return false; /* canonical type for kprobe/perf_event */
 6721	}
 6722
 6723	/* now we can ignore typedefs moving forward */
 6724	t = skip_mods_and_typedefs(btf, t->type, NULL);
 6725
 6726	/* if it's `void *`, definitely fix up BTF info */
 6727	if (btf_is_void(t))
 6728		return true;
 6729
 6730	/* if it's already proper canonical type, no need to fix up */
 6731	tname = btf__str_by_offset(btf, t->name_off) ?: "<anon>";
 6732	if (btf_is_struct(t) && strcmp(tname, ctx_name) == 0)
 6733		return false;
 6734
 6735	/* special cases */
 6736	switch (prog->type) {
 6737	case BPF_PROG_TYPE_KPROBE:
 6738	case BPF_PROG_TYPE_PERF_EVENT:
 6739		/* `struct pt_regs *` is expected, but we need to fix up */
 6740		if (btf_is_struct(t) && strcmp(tname, "pt_regs") == 0)
 6741			return true;
 6742		break;
 6743	case BPF_PROG_TYPE_RAW_TRACEPOINT:
 6744	case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
 6745		/* allow u64* as ctx */
 6746		if (btf_is_int(t) && t->size == 8)
 6747			return true;
 6748		break;
 6749	default:
 6750		break;
 6751	}
 6752
 6753out_warn:
 6754	pr_warn("prog '%s': subprog '%s' arg#%d is expected to be of `struct %s *` type\n",
 6755		prog->name, subprog_name, arg_idx, ctx_name);
 6756	return false;
 6757}
 6758
 6759static int clone_func_btf_info(struct btf *btf, int orig_fn_id, struct bpf_program *prog)
 6760{
 6761	int fn_id, fn_proto_id, ret_type_id, orig_proto_id;
 6762	int i, err, arg_cnt, fn_name_off, linkage;
 6763	struct btf_type *fn_t, *fn_proto_t, *t;
 6764	struct btf_param *p;
 6765
 6766	/* caller already validated FUNC -> FUNC_PROTO validity */
 6767	fn_t = btf_type_by_id(btf, orig_fn_id);
 6768	fn_proto_t = btf_type_by_id(btf, fn_t->type);
 6769
 6770	/* Note that each btf__add_xxx() operation invalidates
 6771	 * all btf_type and string pointers, so we need to be
 6772	 * very careful when cloning BTF types. BTF type
 6773	 * pointers have to be always refetched. And to avoid
 6774	 * problems with invalidated string pointers, we
 6775	 * add empty strings initially, then just fix up
 6776	 * name_off offsets in place. Offsets are stable for
 6777	 * existing strings, so that works out.
 6778	 */
 6779	fn_name_off = fn_t->name_off; /* we are about to invalidate fn_t */
 6780	linkage = btf_func_linkage(fn_t);
 6781	orig_proto_id = fn_t->type; /* original FUNC_PROTO ID */
 6782	ret_type_id = fn_proto_t->type; /* fn_proto_t will be invalidated */
 6783	arg_cnt = btf_vlen(fn_proto_t);
 6784
 6785	/* clone FUNC_PROTO and its params */
 6786	fn_proto_id = btf__add_func_proto(btf, ret_type_id);
 6787	if (fn_proto_id < 0)
 6788		return -EINVAL;
 6789
 6790	for (i = 0; i < arg_cnt; i++) {
 6791		int name_off;
 6792
 6793		/* copy original parameter data */
 6794		t = btf_type_by_id(btf, orig_proto_id);
 6795		p = &btf_params(t)[i];
 6796		name_off = p->name_off;
 6797
 6798		err = btf__add_func_param(btf, "", p->type);
 6799		if (err)
 6800			return err;
 6801
 6802		fn_proto_t = btf_type_by_id(btf, fn_proto_id);
 6803		p = &btf_params(fn_proto_t)[i];
 6804		p->name_off = name_off; /* use remembered str offset */
 6805	}
 6806
 6807	/* clone FUNC now, btf__add_func() enforces non-empty name, so use
 6808	 * entry program's name as a placeholder, which we replace immediately
 6809	 * with original name_off
 6810	 */
 6811	fn_id = btf__add_func(btf, prog->name, linkage, fn_proto_id);
 6812	if (fn_id < 0)
 6813		return -EINVAL;
 6814
 6815	fn_t = btf_type_by_id(btf, fn_id);
 6816	fn_t->name_off = fn_name_off; /* reuse original string */
 6817
 6818	return fn_id;
 6819}
 6820
 6821static int probe_kern_arg_ctx_tag(void)
 6822{
 6823	/* To minimize merge conflicts with BPF token series that refactors
 6824	 * feature detection code a lot, we don't integrate
 6825	 * probe_kern_arg_ctx_tag() into kernel_supports() feature-detection
 6826	 * framework yet, doing our own caching internally.
 6827	 * This will be cleaned up a bit later when bpf/bpf-next trees settle.
 6828	 */
 6829	static int cached_result = -1;
 6830	static const char strs[] = "\0a\0b\0arg:ctx\0";
 6831	const __u32 types[] = {
 6832		/* [1] INT */
 6833		BTF_TYPE_INT_ENC(1 /* "a" */, BTF_INT_SIGNED, 0, 32, 4),
 6834		/* [2] PTR -> VOID */
 6835		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 0),
 6836		/* [3] FUNC_PROTO `int(void *a)` */
 6837		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 1),
 6838		BTF_PARAM_ENC(1 /* "a" */, 2),
 6839		/* [4] FUNC 'a' -> FUNC_PROTO (main prog) */
 6840		BTF_TYPE_ENC(1 /* "a" */, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 3),
 6841		/* [5] FUNC_PROTO `int(void *b __arg_ctx)` */
 6842		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 1),
 6843		BTF_PARAM_ENC(3 /* "b" */, 2),
 6844		/* [6] FUNC 'b' -> FUNC_PROTO (subprog) */
 6845		BTF_TYPE_ENC(3 /* "b" */, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 5),
 6846		/* [7] DECL_TAG 'arg:ctx' -> func 'b' arg 'b' */
 6847		BTF_TYPE_DECL_TAG_ENC(5 /* "arg:ctx" */, 6, 0),
 6848	};
 6849	const struct bpf_insn insns[] = {
 6850		/* main prog */
 6851		BPF_CALL_REL(+1),
 6852		BPF_EXIT_INSN(),
 6853		/* global subprog */
 6854		BPF_EMIT_CALL(BPF_FUNC_get_func_ip), /* needs PTR_TO_CTX */
 6855		BPF_EXIT_INSN(),
 6856	};
 6857	const struct bpf_func_info_min func_infos[] = {
 6858		{ 0, 4 }, /* main prog -> FUNC 'a' */
 6859		{ 2, 6 }, /* subprog -> FUNC 'b' */
 6860	};
 6861	LIBBPF_OPTS(bpf_prog_load_opts, opts);
 6862	int prog_fd, btf_fd, insn_cnt = ARRAY_SIZE(insns);
 6863
 6864	if (cached_result >= 0)
 6865		return cached_result;
 6866
 6867	btf_fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs));
 6868	if (btf_fd < 0)
 6869		return 0;
 6870
 6871	opts.prog_btf_fd = btf_fd;
 6872	opts.func_info = &func_infos;
 6873	opts.func_info_cnt = ARRAY_SIZE(func_infos);
 6874	opts.func_info_rec_size = sizeof(func_infos[0]);
 6875
 6876	prog_fd = bpf_prog_load(BPF_PROG_TYPE_KPROBE, "det_arg_ctx",
 6877				"GPL", insns, insn_cnt, &opts);
 6878	close(btf_fd);
 6879
 6880	cached_result = probe_fd(prog_fd);
 6881	return cached_result;
 6882}
 6883
 6884/* Check if main program or global subprog's function prototype has `arg:ctx`
 6885 * argument tags, and, if necessary, substitute correct type to match what BPF
 6886 * verifier would expect, taking into account specific program type. This
 6887 * allows to support __arg_ctx tag transparently on old kernels that don't yet
 6888 * have a native support for it in the verifier, making user's life much
 6889 * easier.
 6890 */
 6891static int bpf_program_fixup_func_info(struct bpf_object *obj, struct bpf_program *prog)
 6892{
 6893	const char *ctx_name = NULL, *ctx_tag = "arg:ctx", *fn_name;
 6894	struct bpf_func_info_min *func_rec;
 6895	struct btf_type *fn_t, *fn_proto_t;
 6896	struct btf *btf = obj->btf;
 6897	const struct btf_type *t;
 6898	struct btf_param *p;
 6899	int ptr_id = 0, struct_id, tag_id, orig_fn_id;
 6900	int i, n, arg_idx, arg_cnt, err, rec_idx;
 6901	int *orig_ids;
 6902
 6903	/* no .BTF.ext, no problem */
 6904	if (!obj->btf_ext || !prog->func_info)
 6905		return 0;
 6906
 6907	/* don't do any fix ups if kernel natively supports __arg_ctx */
 6908	if (probe_kern_arg_ctx_tag() > 0)
 6909		return 0;
 6910
 6911	/* some BPF program types just don't have named context structs, so
 6912	 * this fallback mechanism doesn't work for them
 6913	 */
 6914	for (i = 0; i < ARRAY_SIZE(global_ctx_map); i++) {
 6915		if (global_ctx_map[i].prog_type != prog->type)
 6916			continue;
 6917		ctx_name = global_ctx_map[i].ctx_name;
 6918		break;
 6919	}
 6920	if (!ctx_name)
 6921		return 0;
 6922
 6923	/* remember original func BTF IDs to detect if we already cloned them */
 6924	orig_ids = calloc(prog->func_info_cnt, sizeof(*orig_ids));
 6925	if (!orig_ids)
 6926		return -ENOMEM;
 6927	for (i = 0; i < prog->func_info_cnt; i++) {
 6928		func_rec = prog->func_info + prog->func_info_rec_size * i;
 6929		orig_ids[i] = func_rec->type_id;
 6930	}
 6931
 6932	/* go through each DECL_TAG with "arg:ctx" and see if it points to one
 6933	 * of our subprogs; if yes and subprog is global and needs adjustment,
 6934	 * clone and adjust FUNC -> FUNC_PROTO combo
 6935	 */
 6936	for (i = 1, n = btf__type_cnt(btf); i < n; i++) {
 6937		/* only DECL_TAG with "arg:ctx" value are interesting */
 6938		t = btf__type_by_id(btf, i);
 6939		if (!btf_is_decl_tag(t))
 6940			continue;
 6941		if (strcmp(btf__str_by_offset(btf, t->name_off), ctx_tag) != 0)
 6942			continue;
 6943
 6944		/* only global funcs need adjustment, if at all */
 6945		orig_fn_id = t->type;
 6946		fn_t = btf_type_by_id(btf, orig_fn_id);
 6947		if (!btf_is_func(fn_t) || btf_func_linkage(fn_t) != BTF_FUNC_GLOBAL)
 6948			continue;
 6949
 6950		/* sanity check FUNC -> FUNC_PROTO chain, just in case */
 6951		fn_proto_t = btf_type_by_id(btf, fn_t->type);
 6952		if (!fn_proto_t || !btf_is_func_proto(fn_proto_t))
 6953			continue;
 6954
 6955		/* find corresponding func_info record */
 6956		func_rec = NULL;
 6957		for (rec_idx = 0; rec_idx < prog->func_info_cnt; rec_idx++) {
 6958			if (orig_ids[rec_idx] == t->type) {
 6959				func_rec = prog->func_info + prog->func_info_rec_size * rec_idx;
 6960				break;
 6961			}
 6962		}
 6963		/* current main program doesn't call into this subprog */
 6964		if (!func_rec)
 6965			continue;
 6966
 6967		/* some more sanity checking of DECL_TAG */
 6968		arg_cnt = btf_vlen(fn_proto_t);
 6969		arg_idx = btf_decl_tag(t)->component_idx;
 6970		if (arg_idx < 0 || arg_idx >= arg_cnt)
 6971			continue;
 6972
 6973		/* check if we should fix up argument type */
 6974		p = &btf_params(fn_proto_t)[arg_idx];
 6975		fn_name = btf__str_by_offset(btf, fn_t->name_off) ?: "<anon>";
 6976		if (!need_func_arg_type_fixup(btf, prog, fn_name, arg_idx, p->type, ctx_name))
 6977			continue;
 6978
 6979		/* clone fn/fn_proto, unless we already did it for another arg */
 6980		if (func_rec->type_id == orig_fn_id) {
 6981			int fn_id;
 6982
 6983			fn_id = clone_func_btf_info(btf, orig_fn_id, prog);
 6984			if (fn_id < 0) {
 6985				err = fn_id;
 6986				goto err_out;
 6987			}
 6988
 6989			/* point func_info record to a cloned FUNC type */
 6990			func_rec->type_id = fn_id;
 6991		}
 6992
 6993		/* create PTR -> STRUCT type chain to mark PTR_TO_CTX argument;
 6994		 * we do it just once per main BPF program, as all global
 6995		 * funcs share the same program type, so need only PTR ->
 6996		 * STRUCT type chain
 6997		 */
 6998		if (ptr_id == 0) {
 6999			struct_id = btf__add_struct(btf, ctx_name, 0);
 7000			ptr_id = btf__add_ptr(btf, struct_id);
 7001			if (ptr_id < 0 || struct_id < 0) {
 7002				err = -EINVAL;
 7003				goto err_out;
 7004			}
 7005		}
 7006
 7007		/* for completeness, clone DECL_TAG and point it to cloned param */
 7008		tag_id = btf__add_decl_tag(btf, ctx_tag, func_rec->type_id, arg_idx);
 7009		if (tag_id < 0) {
 7010			err = -EINVAL;
 7011			goto err_out;
 7012		}
 7013
 7014		/* all the BTF manipulations invalidated pointers, refetch them */
 7015		fn_t = btf_type_by_id(btf, func_rec->type_id);
 7016		fn_proto_t = btf_type_by_id(btf, fn_t->type);
 7017
 7018		/* fix up type ID pointed to by param */
 7019		p = &btf_params(fn_proto_t)[arg_idx];
 7020		p->type = ptr_id;
 7021	}
 7022
 7023	free(orig_ids);
 7024	return 0;
 7025err_out:
 7026	free(orig_ids);
 7027	return err;
 7028}
 7029
 7030static int bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
 7031{
 7032	struct bpf_program *prog;
 7033	size_t i, j;
 7034	int err;
 7035
 7036	if (obj->btf_ext) {
 7037		err = bpf_object__relocate_core(obj, targ_btf_path);
 7038		if (err) {
 7039			pr_warn("failed to perform CO-RE relocations: %d\n",
 7040				err);
 7041			return err;
 7042		}
 7043		bpf_object__sort_relos(obj);
 7044	}
 7045
 7046	/* Before relocating calls pre-process relocations and mark
 7047	 * few ld_imm64 instructions that points to subprogs.
 7048	 * Otherwise bpf_object__reloc_code() later would have to consider
 7049	 * all ld_imm64 insns as relocation candidates. That would
 7050	 * reduce relocation speed, since amount of find_prog_insn_relo()
 7051	 * would increase and most of them will fail to find a relo.
 7052	 */
 7053	for (i = 0; i < obj->nr_programs; i++) {
 7054		prog = &obj->programs[i];
 7055		for (j = 0; j < prog->nr_reloc; j++) {
 7056			struct reloc_desc *relo = &prog->reloc_desc[j];
 7057			struct bpf_insn *insn = &prog->insns[relo->insn_idx];
 7058
 7059			/* mark the insn, so it's recognized by insn_is_pseudo_func() */
 7060			if (relo->type == RELO_SUBPROG_ADDR)
 7061				insn[0].src_reg = BPF_PSEUDO_FUNC;
 7062		}
 7063	}
 7064
 7065	/* relocate subprogram calls and append used subprograms to main
 7066	 * programs; each copy of subprogram code needs to be relocated
 7067	 * differently for each main program, because its code location might
 7068	 * have changed.
 7069	 * Append subprog relos to main programs to allow data relos to be
 7070	 * processed after text is completely relocated.
 7071	 */
 7072	for (i = 0; i < obj->nr_programs; i++) {
 7073		prog = &obj->programs[i];
 7074		/* sub-program's sub-calls are relocated within the context of
 7075		 * its main program only
 7076		 */
 7077		if (prog_is_subprog(obj, prog))
 7078			continue;
 7079		if (!prog->autoload)
 7080			continue;
 7081
 7082		err = bpf_object__relocate_calls(obj, prog);
 7083		if (err) {
 7084			pr_warn("prog '%s': failed to relocate calls: %d\n",
 7085				prog->name, err);
 7086			return err;
 7087		}
 7088
 7089		err = bpf_prog_assign_exc_cb(obj, prog);
 7090		if (err)
 7091			return err;
 7092		/* Now, also append exception callback if it has not been done already. */
 7093		if (prog->exception_cb_idx >= 0) {
 7094			struct bpf_program *subprog = &obj->programs[prog->exception_cb_idx];
 7095
 7096			/* Calling exception callback directly is disallowed, which the
 7097			 * verifier will reject later. In case it was processed already,
 7098			 * we can skip this step, otherwise for all other valid cases we
 7099			 * have to append exception callback now.
 7100			 */
 7101			if (subprog->sub_insn_off == 0) {
 7102				err = bpf_object__append_subprog_code(obj, prog, subprog);
 7103				if (err)
 7104					return err;
 7105				err = bpf_object__reloc_code(obj, prog, subprog);
 7106				if (err)
 7107					return err;
 7108			}
 7109		}
 7110	}
 7111	for (i = 0; i < obj->nr_programs; i++) {
 7112		prog = &obj->programs[i];
 7113		if (prog_is_subprog(obj, prog))
 7114			continue;
 7115		if (!prog->autoload)
 7116			continue;
 7117
 7118		/* Process data relos for main programs */
 7119		err = bpf_object__relocate_data(obj, prog);
 7120		if (err) {
 7121			pr_warn("prog '%s': failed to relocate data references: %d\n",
 7122				prog->name, err);
 7123			return err;
 7124		}
 7125
 7126		/* Fix up .BTF.ext information, if necessary */
 7127		err = bpf_program_fixup_func_info(obj, prog);
 7128		if (err) {
 7129			pr_warn("prog '%s': failed to perform .BTF.ext fix ups: %d\n",
 7130				prog->name, err);
 7131			return err;
 7132		}
 7133	}
 7134
 7135	return 0;
 7136}
 7137
 7138static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
 7139					    Elf64_Shdr *shdr, Elf_Data *data);
 7140
 7141static int bpf_object__collect_map_relos(struct bpf_object *obj,
 7142					 Elf64_Shdr *shdr, Elf_Data *data)
 7143{
 7144	const int bpf_ptr_sz = 8, host_ptr_sz = sizeof(void *);
 7145	int i, j, nrels, new_sz;
 7146	const struct btf_var_secinfo *vi = NULL;
 7147	const struct btf_type *sec, *var, *def;
 7148	struct bpf_map *map = NULL, *targ_map = NULL;
 7149	struct bpf_program *targ_prog = NULL;
 7150	bool is_prog_array, is_map_in_map;
 7151	const struct btf_member *member;
 7152	const char *name, *mname, *type;
 7153	unsigned int moff;
 7154	Elf64_Sym *sym;
 7155	Elf64_Rel *rel;
 7156	void *tmp;
 7157
 7158	if (!obj->efile.btf_maps_sec_btf_id || !obj->btf)
 7159		return -EINVAL;
 7160	sec = btf__type_by_id(obj->btf, obj->efile.btf_maps_sec_btf_id);
 7161	if (!sec)
 7162		return -EINVAL;
 7163
 7164	nrels = shdr->sh_size / shdr->sh_entsize;
 7165	for (i = 0; i < nrels; i++) {
 7166		rel = elf_rel_by_idx(data, i);
 7167		if (!rel) {
 7168			pr_warn(".maps relo #%d: failed to get ELF relo\n", i);
 7169			return -LIBBPF_ERRNO__FORMAT;
 7170		}
 7171
 7172		sym = elf_sym_by_idx(obj, ELF64_R_SYM(rel->r_info));
 7173		if (!sym) {
 7174			pr_warn(".maps relo #%d: symbol %zx not found\n",
 7175				i, (size_t)ELF64_R_SYM(rel->r_info));
 7176			return -LIBBPF_ERRNO__FORMAT;
 7177		}
 7178		name = elf_sym_str(obj, sym->st_name) ?: "<?>";
 7179
 7180		pr_debug(".maps relo #%d: for %zd value %zd rel->r_offset %zu name %d ('%s')\n",
 7181			 i, (ssize_t)(rel->r_info >> 32), (size_t)sym->st_value,
 7182			 (size_t)rel->r_offset, sym->st_name, name);
 7183
 7184		for (j = 0; j < obj->nr_maps; j++) {
 7185			map = &obj->maps[j];
 7186			if (map->sec_idx != obj->efile.btf_maps_shndx)
 7187				continue;
 7188
 7189			vi = btf_var_secinfos(sec) + map->btf_var_idx;
 7190			if (vi->offset <= rel->r_offset &&
 7191			    rel->r_offset + bpf_ptr_sz <= vi->offset + vi->size)
 7192				break;
 7193		}
 7194		if (j == obj->nr_maps) {
 7195			pr_warn(".maps relo #%d: cannot find map '%s' at rel->r_offset %zu\n",
 7196				i, name, (size_t)rel->r_offset);
 7197			return -EINVAL;
 7198		}
 7199
 7200		is_map_in_map = bpf_map_type__is_map_in_map(map->def.type);
 7201		is_prog_array = map->def.type == BPF_MAP_TYPE_PROG_ARRAY;
 7202		type = is_map_in_map ? "map" : "prog";
 7203		if (is_map_in_map) {
 7204			if (sym->st_shndx != obj->efile.btf_maps_shndx) {
 7205				pr_warn(".maps relo #%d: '%s' isn't a BTF-defined map\n",
 7206					i, name);
 7207				return -LIBBPF_ERRNO__RELOC;
 7208			}
 7209			if (map->def.type == BPF_MAP_TYPE_HASH_OF_MAPS &&
 7210			    map->def.key_size != sizeof(int)) {
 7211				pr_warn(".maps relo #%d: hash-of-maps '%s' should have key size %zu.\n",
 7212					i, map->name, sizeof(int));
 7213				return -EINVAL;
 7214			}
 7215			targ_map = bpf_object__find_map_by_name(obj, name);
 7216			if (!targ_map) {
 7217				pr_warn(".maps relo #%d: '%s' isn't a valid map reference\n",
 7218					i, name);
 7219				return -ESRCH;
 7220			}
 7221		} else if (is_prog_array) {
 7222			targ_prog = bpf_object__find_program_by_name(obj, name);
 7223			if (!targ_prog) {
 7224				pr_warn(".maps relo #%d: '%s' isn't a valid program reference\n",
 7225					i, name);
 7226				return -ESRCH;
 7227			}
 7228			if (targ_prog->sec_idx != sym->st_shndx ||
 7229			    targ_prog->sec_insn_off * 8 != sym->st_value ||
 7230			    prog_is_subprog(obj, targ_prog)) {
 7231				pr_warn(".maps relo #%d: '%s' isn't an entry-point program\n",
 7232					i, name);
 7233				return -LIBBPF_ERRNO__RELOC;
 7234			}
 7235		} else {
 7236			return -EINVAL;
 7237		}
 7238
 7239		var = btf__type_by_id(obj->btf, vi->type);
 7240		def = skip_mods_and_typedefs(obj->btf, var->type, NULL);
 7241		if (btf_vlen(def) == 0)
 7242			return -EINVAL;
 7243		member = btf_members(def) + btf_vlen(def) - 1;
 7244		mname = btf__name_by_offset(obj->btf, member->name_off);
 7245		if (strcmp(mname, "values"))
 7246			return -EINVAL;
 7247
 7248		moff = btf_member_bit_offset(def, btf_vlen(def) - 1) / 8;
 7249		if (rel->r_offset - vi->offset < moff)
 7250			return -EINVAL;
 7251
 7252		moff = rel->r_offset - vi->offset - moff;
 7253		/* here we use BPF pointer size, which is always 64 bit, as we
 7254		 * are parsing ELF that was built for BPF target
 7255		 */
 7256		if (moff % bpf_ptr_sz)
 7257			return -EINVAL;
 7258		moff /= bpf_ptr_sz;
 7259		if (moff >= map->init_slots_sz) {
 7260			new_sz = moff + 1;
 7261			tmp = libbpf_reallocarray(map->init_slots, new_sz, host_ptr_sz);
 7262			if (!tmp)
 7263				return -ENOMEM;
 7264			map->init_slots = tmp;
 7265			memset(map->init_slots + map->init_slots_sz, 0,
 7266			       (new_sz - map->init_slots_sz) * host_ptr_sz);
 7267			map->init_slots_sz = new_sz;
 7268		}
 7269		map->init_slots[moff] = is_map_in_map ? (void *)targ_map : (void *)targ_prog;
 7270
 7271		pr_debug(".maps relo #%d: map '%s' slot [%d] points to %s '%s'\n",
 7272			 i, map->name, moff, type, name);
 7273	}
 7274
 7275	return 0;
 7276}
 7277
 7278static int bpf_object__collect_relos(struct bpf_object *obj)
 7279{
 7280	int i, err;
 7281
 7282	for (i = 0; i < obj->efile.sec_cnt; i++) {
 7283		struct elf_sec_desc *sec_desc = &obj->efile.secs[i];
 7284		Elf64_Shdr *shdr;
 7285		Elf_Data *data;
 7286		int idx;
 7287
 7288		if (sec_desc->sec_type != SEC_RELO)
 7289			continue;
 7290
 7291		shdr = sec_desc->shdr;
 7292		data = sec_desc->data;
 7293		idx = shdr->sh_info;
 7294
 7295		if (shdr->sh_type != SHT_REL) {
 7296			pr_warn("internal error at %d\n", __LINE__);
 7297			return -LIBBPF_ERRNO__INTERNAL;
 7298		}
 7299
 7300		if (idx == obj->efile.st_ops_shndx || idx == obj->efile.st_ops_link_shndx)
 7301			err = bpf_object__collect_st_ops_relos(obj, shdr, data);
 7302		else if (idx == obj->efile.btf_maps_shndx)
 7303			err = bpf_object__collect_map_relos(obj, shdr, data);
 7304		else
 7305			err = bpf_object__collect_prog_relos(obj, shdr, data);
 7306		if (err)
 7307			return err;
 7308	}
 7309
 7310	bpf_object__sort_relos(obj);
 7311	return 0;
 7312}
 7313
 7314static bool insn_is_helper_call(struct bpf_insn *insn, enum bpf_func_id *func_id)
 7315{
 7316	if (BPF_CLASS(insn->code) == BPF_JMP &&
 7317	    BPF_OP(insn->code) == BPF_CALL &&
 7318	    BPF_SRC(insn->code) == BPF_K &&
 7319	    insn->src_reg == 0 &&
 7320	    insn->dst_reg == 0) {
 7321		    *func_id = insn->imm;
 7322		    return true;
 7323	}
 7324	return false;
 7325}
 7326
 7327static int bpf_object__sanitize_prog(struct bpf_object *obj, struct bpf_program *prog)
 7328{
 7329	struct bpf_insn *insn = prog->insns;
 7330	enum bpf_func_id func_id;
 7331	int i;
 7332
 7333	if (obj->gen_loader)
 7334		return 0;
 7335
 7336	for (i = 0; i < prog->insns_cnt; i++, insn++) {
 7337		if (!insn_is_helper_call(insn, &func_id))
 7338			continue;
 7339
 7340		/* on kernels that don't yet support
 7341		 * bpf_probe_read_{kernel,user}[_str] helpers, fall back
 7342		 * to bpf_probe_read() which works well for old kernels
 7343		 */
 7344		switch (func_id) {
 7345		case BPF_FUNC_probe_read_kernel:
 7346		case BPF_FUNC_probe_read_user:
 7347			if (!kernel_supports(obj, FEAT_PROBE_READ_KERN))
 7348				insn->imm = BPF_FUNC_probe_read;
 7349			break;
 7350		case BPF_FUNC_probe_read_kernel_str:
 7351		case BPF_FUNC_probe_read_user_str:
 7352			if (!kernel_supports(obj, FEAT_PROBE_READ_KERN))
 7353				insn->imm = BPF_FUNC_probe_read_str;
 7354			break;
 7355		default:
 7356			break;
 7357		}
 7358	}
 7359	return 0;
 7360}
 7361
 7362static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attach_name,
 7363				     int *btf_obj_fd, int *btf_type_id);
 7364
 7365/* this is called as prog->sec_def->prog_prepare_load_fn for libbpf-supported sec_defs */
 7366static int libbpf_prepare_prog_load(struct bpf_program *prog,
 7367				    struct bpf_prog_load_opts *opts, long cookie)
 7368{
 7369	enum sec_def_flags def = cookie;
 7370
 7371	/* old kernels might not support specifying expected_attach_type */
 7372	if ((def & SEC_EXP_ATTACH_OPT) && !kernel_supports(prog->obj, FEAT_EXP_ATTACH_TYPE))
 7373		opts->expected_attach_type = 0;
 7374
 7375	if (def & SEC_SLEEPABLE)
 7376		opts->prog_flags |= BPF_F_SLEEPABLE;
 7377
 7378	if (prog->type == BPF_PROG_TYPE_XDP && (def & SEC_XDP_FRAGS))
 7379		opts->prog_flags |= BPF_F_XDP_HAS_FRAGS;
 7380
 7381	/* special check for usdt to use uprobe_multi link */
 7382	if ((def & SEC_USDT) && kernel_supports(prog->obj, FEAT_UPROBE_MULTI_LINK))
 7383		prog->expected_attach_type = BPF_TRACE_UPROBE_MULTI;
 7384
 7385	if ((def & SEC_ATTACH_BTF) && !prog->attach_btf_id) {
 7386		int btf_obj_fd = 0, btf_type_id = 0, err;
 7387		const char *attach_name;
 7388
 7389		attach_name = strchr(prog->sec_name, '/');
 7390		if (!attach_name) {
 7391			/* if BPF program is annotated with just SEC("fentry")
 7392			 * (or similar) without declaratively specifying
 7393			 * target, then it is expected that target will be
 7394			 * specified with bpf_program__set_attach_target() at
 7395			 * runtime before BPF object load step. If not, then
 7396			 * there is nothing to load into the kernel as BPF
 7397			 * verifier won't be able to validate BPF program
 7398			 * correctness anyways.
 7399			 */
 7400			pr_warn("prog '%s': no BTF-based attach target is specified, use bpf_program__set_attach_target()\n",
 7401				prog->name);
 7402			return -EINVAL;
 7403		}
 7404		attach_name++; /* skip over / */
 7405
 7406		err = libbpf_find_attach_btf_id(prog, attach_name, &btf_obj_fd, &btf_type_id);
 7407		if (err)
 7408			return err;
 7409
 7410		/* cache resolved BTF FD and BTF type ID in the prog */
 7411		prog->attach_btf_obj_fd = btf_obj_fd;
 7412		prog->attach_btf_id = btf_type_id;
 7413
 7414		/* but by now libbpf common logic is not utilizing
 7415		 * prog->atach_btf_obj_fd/prog->attach_btf_id anymore because
 7416		 * this callback is called after opts were populated by
 7417		 * libbpf, so this callback has to update opts explicitly here
 7418		 */
 7419		opts->attach_btf_obj_fd = btf_obj_fd;
 7420		opts->attach_btf_id = btf_type_id;
 7421	}
 7422	return 0;
 7423}
 7424
 7425static void fixup_verifier_log(struct bpf_program *prog, char *buf, size_t buf_sz);
 7426
 7427static int bpf_object_load_prog(struct bpf_object *obj, struct bpf_program *prog,
 7428				struct bpf_insn *insns, int insns_cnt,
 7429				const char *license, __u32 kern_version, int *prog_fd)
 7430{
 7431	LIBBPF_OPTS(bpf_prog_load_opts, load_attr);
 7432	const char *prog_name = NULL;
 7433	char *cp, errmsg[STRERR_BUFSIZE];
 7434	size_t log_buf_size = 0;
 7435	char *log_buf = NULL, *tmp;
 7436	int btf_fd, ret, err;
 7437	bool own_log_buf = true;
 7438	__u32 log_level = prog->log_level;
 7439
 7440	if (prog->type == BPF_PROG_TYPE_UNSPEC) {
 7441		/*
 7442		 * The program type must be set.  Most likely we couldn't find a proper
 7443		 * section definition at load time, and thus we didn't infer the type.
 7444		 */
 7445		pr_warn("prog '%s': missing BPF prog type, check ELF section name '%s'\n",
 7446			prog->name, prog->sec_name);
 7447		return -EINVAL;
 7448	}
 7449
 7450	if (!insns || !insns_cnt)
 7451		return -EINVAL;
 7452
 7453	if (kernel_supports(obj, FEAT_PROG_NAME))
 7454		prog_name = prog->name;
 7455	load_attr.attach_prog_fd = prog->attach_prog_fd;
 7456	load_attr.attach_btf_obj_fd = prog->attach_btf_obj_fd;
 7457	load_attr.attach_btf_id = prog->attach_btf_id;
 7458	load_attr.kern_version = kern_version;
 7459	load_attr.prog_ifindex = prog->prog_ifindex;
 7460
 7461	/* specify func_info/line_info only if kernel supports them */
 7462	btf_fd = btf__fd(obj->btf);
 7463	if (btf_fd >= 0 && kernel_supports(obj, FEAT_BTF_FUNC)) {
 7464		load_attr.prog_btf_fd = btf_fd;
 7465		load_attr.func_info = prog->func_info;
 7466		load_attr.func_info_rec_size = prog->func_info_rec_size;
 7467		load_attr.func_info_cnt = prog->func_info_cnt;
 7468		load_attr.line_info = prog->line_info;
 7469		load_attr.line_info_rec_size = prog->line_info_rec_size;
 7470		load_attr.line_info_cnt = prog->line_info_cnt;
 7471	}
 7472	load_attr.log_level = log_level;
 7473	load_attr.prog_flags = prog->prog_flags;
 7474	load_attr.fd_array = obj->fd_array;
 7475
 7476	/* adjust load_attr if sec_def provides custom preload callback */
 7477	if (prog->sec_def && prog->sec_def->prog_prepare_load_fn) {
 7478		err = prog->sec_def->prog_prepare_load_fn(prog, &load_attr, prog->sec_def->cookie);
 7479		if (err < 0) {
 7480			pr_warn("prog '%s': failed to prepare load attributes: %d\n",
 7481				prog->name, err);
 7482			return err;
 7483		}
 7484		insns = prog->insns;
 7485		insns_cnt = prog->insns_cnt;
 7486	}
 7487
 7488	/* allow prog_prepare_load_fn to change expected_attach_type */
 7489	load_attr.expected_attach_type = prog->expected_attach_type;
 7490
 7491	if (obj->gen_loader) {
 7492		bpf_gen__prog_load(obj->gen_loader, prog->type, prog->name,
 7493				   license, insns, insns_cnt, &load_attr,
 7494				   prog - obj->programs);
 7495		*prog_fd = -1;
 7496		return 0;
 7497	}
 7498
 7499retry_load:
 7500	/* if log_level is zero, we don't request logs initially even if
 7501	 * custom log_buf is specified; if the program load fails, then we'll
 7502	 * bump log_level to 1 and use either custom log_buf or we'll allocate
 7503	 * our own and retry the load to get details on what failed
 7504	 */
 7505	if (log_level) {
 7506		if (prog->log_buf) {
 7507			log_buf = prog->log_buf;
 7508			log_buf_size = prog->log_size;
 7509			own_log_buf = false;
 7510		} else if (obj->log_buf) {
 7511			log_buf = obj->log_buf;
 7512			log_buf_size = obj->log_size;
 7513			own_log_buf = false;
 7514		} else {
 7515			log_buf_size = max((size_t)BPF_LOG_BUF_SIZE, log_buf_size * 2);
 7516			tmp = realloc(log_buf, log_buf_size);
 7517			if (!tmp) {
 7518				ret = -ENOMEM;
 7519				goto out;
 7520			}
 7521			log_buf = tmp;
 7522			log_buf[0] = '\0';
 7523			own_log_buf = true;
 7524		}
 7525	}
 7526
 7527	load_attr.log_buf = log_buf;
 7528	load_attr.log_size = log_buf_size;
 7529	load_attr.log_level = log_level;
 7530
 7531	ret = bpf_prog_load(prog->type, prog_name, license, insns, insns_cnt, &load_attr);
 7532	if (ret >= 0) {
 7533		if (log_level && own_log_buf) {
 7534			pr_debug("prog '%s': -- BEGIN PROG LOAD LOG --\n%s-- END PROG LOAD LOG --\n",
 7535				 prog->name, log_buf);
 7536		}
 7537
 7538		if (obj->has_rodata && kernel_supports(obj, FEAT_PROG_BIND_MAP)) {
 7539			struct bpf_map *map;
 7540			int i;
 7541
 7542			for (i = 0; i < obj->nr_maps; i++) {
 7543				map = &prog->obj->maps[i];
 7544				if (map->libbpf_type != LIBBPF_MAP_RODATA)
 7545					continue;
 7546
 7547				if (bpf_prog_bind_map(ret, map->fd, NULL)) {
 7548					cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
 7549					pr_warn("prog '%s': failed to bind map '%s': %s\n",
 7550						prog->name, map->real_name, cp);
 7551					/* Don't fail hard if can't bind rodata. */
 7552				}
 7553			}
 7554		}
 7555
 7556		*prog_fd = ret;
 7557		ret = 0;
 7558		goto out;
 7559	}
 7560
 7561	if (log_level == 0) {
 7562		log_level = 1;
 7563		goto retry_load;
 7564	}
 7565	/* On ENOSPC, increase log buffer size and retry, unless custom
 7566	 * log_buf is specified.
 7567	 * Be careful to not overflow u32, though. Kernel's log buf size limit
 7568	 * isn't part of UAPI so it can always be bumped to full 4GB. So don't
 7569	 * multiply by 2 unless we are sure we'll fit within 32 bits.
 7570	 * Currently, we'll get -EINVAL when we reach (UINT_MAX >> 2).
 7571	 */
 7572	if (own_log_buf && errno == ENOSPC && log_buf_size <= UINT_MAX / 2)
 7573		goto retry_load;
 7574
 7575	ret = -errno;
 7576
 7577	/* post-process verifier log to improve error descriptions */
 7578	fixup_verifier_log(prog, log_buf, log_buf_size);
 7579
 7580	cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
 7581	pr_warn("prog '%s': BPF program load failed: %s\n", prog->name, cp);
 7582	pr_perm_msg(ret);
 7583
 7584	if (own_log_buf && log_buf && log_buf[0] != '\0') {
 7585		pr_warn("prog '%s': -- BEGIN PROG LOAD LOG --\n%s-- END PROG LOAD LOG --\n",
 7586			prog->name, log_buf);
 7587	}
 7588
 7589out:
 7590	if (own_log_buf)
 7591		free(log_buf);
 7592	return ret;
 7593}
 7594
 7595static char *find_prev_line(char *buf, char *cur)
 7596{
 7597	char *p;
 7598
 7599	if (cur == buf) /* end of a log buf */
 7600		return NULL;
 7601
 7602	p = cur - 1;
 7603	while (p - 1 >= buf && *(p - 1) != '\n')
 7604		p--;
 7605
 7606	return p;
 7607}
 7608
 7609static void patch_log(char *buf, size_t buf_sz, size_t log_sz,
 7610		      char *orig, size_t orig_sz, const char *patch)
 7611{
 7612	/* size of the remaining log content to the right from the to-be-replaced part */
 7613	size_t rem_sz = (buf + log_sz) - (orig + orig_sz);
 7614	size_t patch_sz = strlen(patch);
 7615
 7616	if (patch_sz != orig_sz) {
 7617		/* If patch line(s) are longer than original piece of verifier log,
 7618		 * shift log contents by (patch_sz - orig_sz) bytes to the right
 7619		 * starting from after to-be-replaced part of the log.
 7620		 *
 7621		 * If patch line(s) are shorter than original piece of verifier log,
 7622		 * shift log contents by (orig_sz - patch_sz) bytes to the left
 7623		 * starting from after to-be-replaced part of the log
 7624		 *
 7625		 * We need to be careful about not overflowing available
 7626		 * buf_sz capacity. If that's the case, we'll truncate the end
 7627		 * of the original log, as necessary.
 7628		 */
 7629		if (patch_sz > orig_sz) {
 7630			if (orig + patch_sz >= buf + buf_sz) {
 7631				/* patch is big enough to cover remaining space completely */
 7632				patch_sz -= (orig + patch_sz) - (buf + buf_sz) + 1;
 7633				rem_sz = 0;
 7634			} else if (patch_sz - orig_sz > buf_sz - log_sz) {
 7635				/* patch causes part of remaining log to be truncated */
 7636				rem_sz -= (patch_sz - orig_sz) - (buf_sz - log_sz);
 7637			}
 7638		}
 7639		/* shift remaining log to the right by calculated amount */
 7640		memmove(orig + patch_sz, orig + orig_sz, rem_sz);
 7641	}
 7642
 7643	memcpy(orig, patch, patch_sz);
 7644}
 7645
 7646static void fixup_log_failed_core_relo(struct bpf_program *prog,
 7647				       char *buf, size_t buf_sz, size_t log_sz,
 7648				       char *line1, char *line2, char *line3)
 7649{
 7650	/* Expected log for failed and not properly guarded CO-RE relocation:
 7651	 * line1 -> 123: (85) call unknown#195896080
 7652	 * line2 -> invalid func unknown#195896080
 7653	 * line3 -> <anything else or end of buffer>
 7654	 *
 7655	 * "123" is the index of the instruction that was poisoned. We extract
 7656	 * instruction index to find corresponding CO-RE relocation and
 7657	 * replace this part of the log with more relevant information about
 7658	 * failed CO-RE relocation.
 7659	 */
 7660	const struct bpf_core_relo *relo;
 7661	struct bpf_core_spec spec;
 7662	char patch[512], spec_buf[256];
 7663	int insn_idx, err, spec_len;
 7664
 7665	if (sscanf(line1, "%d: (%*d) call unknown#195896080\n", &insn_idx) != 1)
 7666		return;
 7667
 7668	relo = find_relo_core(prog, insn_idx);
 7669	if (!relo)
 7670		return;
 7671
 7672	err = bpf_core_parse_spec(prog->name, prog->obj->btf, relo, &spec);
 7673	if (err)
 7674		return;
 7675
 7676	spec_len = bpf_core_format_spec(spec_buf, sizeof(spec_buf), &spec);
 7677	snprintf(patch, sizeof(patch),
 7678		 "%d: <invalid CO-RE relocation>\n"
 7679		 "failed to resolve CO-RE relocation %s%s\n",
 7680		 insn_idx, spec_buf, spec_len >= sizeof(spec_buf) ? "..." : "");
 7681
 7682	patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch);
 7683}
 7684
 7685static void fixup_log_missing_map_load(struct bpf_program *prog,
 7686				       char *buf, size_t buf_sz, size_t log_sz,
 7687				       char *line1, char *line2, char *line3)
 7688{
 7689	/* Expected log for failed and not properly guarded map reference:
 7690	 * line1 -> 123: (85) call unknown#2001000345
 7691	 * line2 -> invalid func unknown#2001000345
 7692	 * line3 -> <anything else or end of buffer>
 7693	 *
 7694	 * "123" is the index of the instruction that was poisoned.
 7695	 * "345" in "2001000345" is a map index in obj->maps to fetch map name.
 7696	 */
 7697	struct bpf_object *obj = prog->obj;
 7698	const struct bpf_map *map;
 7699	int insn_idx, map_idx;
 7700	char patch[128];
 7701
 7702	if (sscanf(line1, "%d: (%*d) call unknown#%d\n", &insn_idx, &map_idx) != 2)
 7703		return;
 7704
 7705	map_idx -= POISON_LDIMM64_MAP_BASE;
 7706	if (map_idx < 0 || map_idx >= obj->nr_maps)
 7707		return;
 7708	map = &obj->maps[map_idx];
 7709
 7710	snprintf(patch, sizeof(patch),
 7711		 "%d: <invalid BPF map reference>\n"
 7712		 "BPF map '%s' is referenced but wasn't created\n",
 7713		 insn_idx, map->name);
 7714
 7715	patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch);
 7716}
 7717
 7718static void fixup_log_missing_kfunc_call(struct bpf_program *prog,
 7719					 char *buf, size_t buf_sz, size_t log_sz,
 7720					 char *line1, char *line2, char *line3)
 7721{
 7722	/* Expected log for failed and not properly guarded kfunc call:
 7723	 * line1 -> 123: (85) call unknown#2002000345
 7724	 * line2 -> invalid func unknown#2002000345
 7725	 * line3 -> <anything else or end of buffer>
 7726	 *
 7727	 * "123" is the index of the instruction that was poisoned.
 7728	 * "345" in "2002000345" is an extern index in obj->externs to fetch kfunc name.
 7729	 */
 7730	struct bpf_object *obj = prog->obj;
 7731	const struct extern_desc *ext;
 7732	int insn_idx, ext_idx;
 7733	char patch[128];
 7734
 7735	if (sscanf(line1, "%d: (%*d) call unknown#%d\n", &insn_idx, &ext_idx) != 2)
 7736		return;
 7737
 7738	ext_idx -= POISON_CALL_KFUNC_BASE;
 7739	if (ext_idx < 0 || ext_idx >= obj->nr_extern)
 7740		return;
 7741	ext = &obj->externs[ext_idx];
 7742
 7743	snprintf(patch, sizeof(patch),
 7744		 "%d: <invalid kfunc call>\n"
 7745		 "kfunc '%s' is referenced but wasn't resolved\n",
 7746		 insn_idx, ext->name);
 7747
 7748	patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch);
 7749}
 7750
 7751static void fixup_verifier_log(struct bpf_program *prog, char *buf, size_t buf_sz)
 7752{
 7753	/* look for familiar error patterns in last N lines of the log */
 7754	const size_t max_last_line_cnt = 10;
 7755	char *prev_line, *cur_line, *next_line;
 7756	size_t log_sz;
 7757	int i;
 7758
 7759	if (!buf)
 7760		return;
 7761
 7762	log_sz = strlen(buf) + 1;
 7763	next_line = buf + log_sz - 1;
 7764
 7765	for (i = 0; i < max_last_line_cnt; i++, next_line = cur_line) {
 7766		cur_line = find_prev_line(buf, next_line);
 7767		if (!cur_line)
 7768			return;
 7769
 7770		if (str_has_pfx(cur_line, "invalid func unknown#195896080\n")) {
 7771			prev_line = find_prev_line(buf, cur_line);
 7772			if (!prev_line)
 7773				continue;
 7774
 7775			/* failed CO-RE relocation case */
 7776			fixup_log_failed_core_relo(prog, buf, buf_sz, log_sz,
 7777						   prev_line, cur_line, next_line);
 7778			return;
 7779		} else if (str_has_pfx(cur_line, "invalid func unknown#"POISON_LDIMM64_MAP_PFX)) {
 7780			prev_line = find_prev_line(buf, cur_line);
 7781			if (!prev_line)
 7782				continue;
 7783
 7784			/* reference to uncreated BPF map */
 7785			fixup_log_missing_map_load(prog, buf, buf_sz, log_sz,
 7786						   prev_line, cur_line, next_line);
 7787			return;
 7788		} else if (str_has_pfx(cur_line, "invalid func unknown#"POISON_CALL_KFUNC_PFX)) {
 7789			prev_line = find_prev_line(buf, cur_line);
 7790			if (!prev_line)
 7791				continue;
 7792
 7793			/* reference to unresolved kfunc */
 7794			fixup_log_missing_kfunc_call(prog, buf, buf_sz, log_sz,
 7795						     prev_line, cur_line, next_line);
 7796			return;
 7797		}
 7798	}
 7799}
 7800
 7801static int bpf_program_record_relos(struct bpf_program *prog)
 7802{
 7803	struct bpf_object *obj = prog->obj;
 7804	int i;
 7805
 7806	for (i = 0; i < prog->nr_reloc; i++) {
 7807		struct reloc_desc *relo = &prog->reloc_desc[i];
 7808		struct extern_desc *ext = &obj->externs[relo->ext_idx];
 7809		int kind;
 7810
 7811		switch (relo->type) {
 7812		case RELO_EXTERN_LD64:
 7813			if (ext->type != EXT_KSYM)
 7814				continue;
 7815			kind = btf_is_var(btf__type_by_id(obj->btf, ext->btf_id)) ?
 7816				BTF_KIND_VAR : BTF_KIND_FUNC;
 7817			bpf_gen__record_extern(obj->gen_loader, ext->name,
 7818					       ext->is_weak, !ext->ksym.type_id,
 7819					       true, kind, relo->insn_idx);
 7820			break;
 7821		case RELO_EXTERN_CALL:
 7822			bpf_gen__record_extern(obj->gen_loader, ext->name,
 7823					       ext->is_weak, false, false, BTF_KIND_FUNC,
 7824					       relo->insn_idx);
 7825			break;
 7826		case RELO_CORE: {
 7827			struct bpf_core_relo cr = {
 7828				.insn_off = relo->insn_idx * 8,
 7829				.type_id = relo->core_relo->type_id,
 7830				.access_str_off = relo->core_relo->access_str_off,
 7831				.kind = relo->core_relo->kind,
 7832			};
 7833
 7834			bpf_gen__record_relo_core(obj->gen_loader, &cr);
 7835			break;
 7836		}
 7837		default:
 7838			continue;
 7839		}
 7840	}
 7841	return 0;
 7842}
 7843
 7844static int
 7845bpf_object__load_progs(struct bpf_object *obj, int log_level)
 7846{
 7847	struct bpf_program *prog;
 7848	size_t i;
 7849	int err;
 7850
 7851	for (i = 0; i < obj->nr_programs; i++) {
 7852		prog = &obj->programs[i];
 7853		err = bpf_object__sanitize_prog(obj, prog);
 7854		if (err)
 7855			return err;
 7856	}
 7857
 7858	for (i = 0; i < obj->nr_programs; i++) {
 7859		prog = &obj->programs[i];
 7860		if (prog_is_subprog(obj, prog))
 7861			continue;
 7862		if (!prog->autoload) {
 7863			pr_debug("prog '%s': skipped loading\n", prog->name);
 7864			continue;
 7865		}
 7866		prog->log_level |= log_level;
 7867
 7868		if (obj->gen_loader)
 7869			bpf_program_record_relos(prog);
 7870
 7871		err = bpf_object_load_prog(obj, prog, prog->insns, prog->insns_cnt,
 7872					   obj->license, obj->kern_version, &prog->fd);
 7873		if (err) {
 7874			pr_warn("prog '%s': failed to load: %d\n", prog->name, err);
 7875			return err;
 7876		}
 7877	}
 7878
 7879	bpf_object__free_relocs(obj);
 7880	return 0;
 7881}
 7882
 7883static const struct bpf_sec_def *find_sec_def(const char *sec_name);
 7884
 7885static int bpf_object_init_progs(struct bpf_object *obj, const struct bpf_object_open_opts *opts)
 7886{
 7887	struct bpf_program *prog;
 7888	int err;
 7889
 7890	bpf_object__for_each_program(prog, obj) {
 7891		prog->sec_def = find_sec_def(prog->sec_name);
 7892		if (!prog->sec_def) {
 7893			/* couldn't guess, but user might manually specify */
 7894			pr_debug("prog '%s': unrecognized ELF section name '%s'\n",
 7895				prog->name, prog->sec_name);
 7896			continue;
 7897		}
 7898
 7899		prog->type = prog->sec_def->prog_type;
 7900		prog->expected_attach_type = prog->sec_def->expected_attach_type;
 7901
 7902		/* sec_def can have custom callback which should be called
 7903		 * after bpf_program is initialized to adjust its properties
 7904		 */
 7905		if (prog->sec_def->prog_setup_fn) {
 7906			err = prog->sec_def->prog_setup_fn(prog, prog->sec_def->cookie);
 7907			if (err < 0) {
 7908				pr_warn("prog '%s': failed to initialize: %d\n",
 7909					prog->name, err);
 7910				return err;
 7911			}
 7912		}
 7913	}
 7914
 7915	return 0;
 7916}
 7917
 7918static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, size_t obj_buf_sz,
 7919					  const struct bpf_object_open_opts *opts)
 7920{
 7921	const char *obj_name, *kconfig, *btf_tmp_path;
 7922	struct bpf_object *obj;
 7923	char tmp_name[64];
 7924	int err;
 7925	char *log_buf;
 7926	size_t log_size;
 7927	__u32 log_level;
 7928
 7929	if (elf_version(EV_CURRENT) == EV_NONE) {
 7930		pr_warn("failed to init libelf for %s\n",
 7931			path ? : "(mem buf)");
 7932		return ERR_PTR(-LIBBPF_ERRNO__LIBELF);
 7933	}
 7934
 7935	if (!OPTS_VALID(opts, bpf_object_open_opts))
 7936		return ERR_PTR(-EINVAL);
 7937
 7938	obj_name = OPTS_GET(opts, object_name, NULL);
 7939	if (obj_buf) {
 7940		if (!obj_name) {
 7941			snprintf(tmp_name, sizeof(tmp_name), "%lx-%lx",
 7942				 (unsigned long)obj_buf,
 7943				 (unsigned long)obj_buf_sz);
 7944			obj_name = tmp_name;
 7945		}
 7946		path = obj_name;
 7947		pr_debug("loading object '%s' from buffer\n", obj_name);
 7948	}
 7949
 7950	log_buf = OPTS_GET(opts, kernel_log_buf, NULL);
 7951	log_size = OPTS_GET(opts, kernel_log_size, 0);
 7952	log_level = OPTS_GET(opts, kernel_log_level, 0);
 7953	if (log_size > UINT_MAX)
 7954		return ERR_PTR(-EINVAL);
 7955	if (log_size && !log_buf)
 7956		return ERR_PTR(-EINVAL);
 7957
 7958	obj = bpf_object__new(path, obj_buf, obj_buf_sz, obj_name);
 7959	if (IS_ERR(obj))
 7960		return obj;
 7961
 7962	obj->log_buf = log_buf;
 7963	obj->log_size = log_size;
 7964	obj->log_level = log_level;
 7965
 7966	btf_tmp_path = OPTS_GET(opts, btf_custom_path, NULL);
 7967	if (btf_tmp_path) {
 7968		if (strlen(btf_tmp_path) >= PATH_MAX) {
 7969			err = -ENAMETOOLONG;
 7970			goto out;
 7971		}
 7972		obj->btf_custom_path = strdup(btf_tmp_path);
 7973		if (!obj->btf_custom_path) {
 7974			err = -ENOMEM;
 7975			goto out;
 7976		}
 7977	}
 7978
 7979	kconfig = OPTS_GET(opts, kconfig, NULL);
 7980	if (kconfig) {
 7981		obj->kconfig = strdup(kconfig);
 7982		if (!obj->kconfig) {
 7983			err = -ENOMEM;
 7984			goto out;
 7985		}
 7986	}
 7987
 7988	err = bpf_object__elf_init(obj);
 7989	err = err ? : bpf_object__check_endianness(obj);
 7990	err = err ? : bpf_object__elf_collect(obj);
 7991	err = err ? : bpf_object__collect_externs(obj);
 7992	err = err ? : bpf_object_fixup_btf(obj);
 7993	err = err ? : bpf_object__init_maps(obj, opts);
 7994	err = err ? : bpf_object_init_progs(obj, opts);
 7995	err = err ? : bpf_object__collect_relos(obj);
 7996	if (err)
 7997		goto out;
 7998
 7999	bpf_object__elf_finish(obj);
 8000
 8001	return obj;
 8002out:
 8003	bpf_object__close(obj);
 8004	return ERR_PTR(err);
 8005}
 8006
 8007struct bpf_object *
 8008bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts)
 8009{
 8010	if (!path)
 8011		return libbpf_err_ptr(-EINVAL);
 8012
 8013	pr_debug("loading %s\n", path);
 8014
 8015	return libbpf_ptr(bpf_object_open(path, NULL, 0, opts));
 8016}
 8017
 8018struct bpf_object *bpf_object__open(const char *path)
 8019{
 8020	return bpf_object__open_file(path, NULL);
 8021}
 8022
 8023struct bpf_object *
 8024bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz,
 8025		     const struct bpf_object_open_opts *opts)
 8026{
 8027	if (!obj_buf || obj_buf_sz == 0)
 8028		return libbpf_err_ptr(-EINVAL);
 8029
 8030	return libbpf_ptr(bpf_object_open(NULL, obj_buf, obj_buf_sz, opts));
 8031}
 8032
 8033static int bpf_object_unload(struct bpf_object *obj)
 8034{
 8035	size_t i;
 8036
 8037	if (!obj)
 8038		return libbpf_err(-EINVAL);
 8039
 8040	for (i = 0; i < obj->nr_maps; i++) {
 8041		zclose(obj->maps[i].fd);
 8042		if (obj->maps[i].st_ops)
 8043			zfree(&obj->maps[i].st_ops->kern_vdata);
 8044	}
 8045
 8046	for (i = 0; i < obj->nr_programs; i++)
 8047		bpf_program__unload(&obj->programs[i]);
 8048
 8049	return 0;
 8050}
 8051
 8052static int bpf_object__sanitize_maps(struct bpf_object *obj)
 8053{
 8054	struct bpf_map *m;
 8055
 8056	bpf_object__for_each_map(m, obj) {
 8057		if (!bpf_map__is_internal(m))
 8058			continue;
 8059		if (!kernel_supports(obj, FEAT_ARRAY_MMAP))
 8060			m->def.map_flags &= ~BPF_F_MMAPABLE;
 8061	}
 8062
 8063	return 0;
 8064}
 8065
 8066int libbpf_kallsyms_parse(kallsyms_cb_t cb, void *ctx)
 8067{
 8068	char sym_type, sym_name[500];
 8069	unsigned long long sym_addr;
 8070	int ret, err = 0;
 8071	FILE *f;
 8072
 8073	f = fopen("/proc/kallsyms", "re");
 8074	if (!f) {
 8075		err = -errno;
 8076		pr_warn("failed to open /proc/kallsyms: %d\n", err);
 8077		return err;
 8078	}
 8079
 8080	while (true) {
 8081		ret = fscanf(f, "%llx %c %499s%*[^\n]\n",
 8082			     &sym_addr, &sym_type, sym_name);
 8083		if (ret == EOF && feof(f))
 8084			break;
 8085		if (ret != 3) {
 8086			pr_warn("failed to read kallsyms entry: %d\n", ret);
 8087			err = -EINVAL;
 8088			break;
 8089		}
 8090
 8091		err = cb(sym_addr, sym_type, sym_name, ctx);
 8092		if (err)
 8093			break;
 8094	}
 8095
 8096	fclose(f);
 8097	return err;
 8098}
 8099
 8100static int kallsyms_cb(unsigned long long sym_addr, char sym_type,
 8101		       const char *sym_name, void *ctx)
 8102{
 8103	struct bpf_object *obj = ctx;
 8104	const struct btf_type *t;
 8105	struct extern_desc *ext;
 8106
 8107	ext = find_extern_by_name(obj, sym_name);
 8108	if (!ext || ext->type != EXT_KSYM)
 8109		return 0;
 8110
 8111	t = btf__type_by_id(obj->btf, ext->btf_id);
 8112	if (!btf_is_var(t))
 8113		return 0;
 8114
 8115	if (ext->is_set && ext->ksym.addr != sym_addr) {
 8116		pr_warn("extern (ksym) '%s': resolution is ambiguous: 0x%llx or 0x%llx\n",
 8117			sym_name, ext->ksym.addr, sym_addr);
 8118		return -EINVAL;
 8119	}
 8120	if (!ext->is_set) {
 8121		ext->is_set = true;
 8122		ext->ksym.addr = sym_addr;
 8123		pr_debug("extern (ksym) '%s': set to 0x%llx\n", sym_name, sym_addr);
 8124	}
 8125	return 0;
 8126}
 8127
 8128static int bpf_object__read_kallsyms_file(struct bpf_object *obj)
 8129{
 8130	return libbpf_kallsyms_parse(kallsyms_cb, obj);
 8131}
 8132
 8133static int find_ksym_btf_id(struct bpf_object *obj, const char *ksym_name,
 8134			    __u16 kind, struct btf **res_btf,
 8135			    struct module_btf **res_mod_btf)
 8136{
 8137	struct module_btf *mod_btf;
 8138	struct btf *btf;
 8139	int i, id, err;
 8140
 8141	btf = obj->btf_vmlinux;
 8142	mod_btf = NULL;
 8143	id = btf__find_by_name_kind(btf, ksym_name, kind);
 8144
 8145	if (id == -ENOENT) {
 8146		err = load_module_btfs(obj);
 8147		if (err)
 8148			return err;
 8149
 8150		for (i = 0; i < obj->btf_module_cnt; i++) {
 8151			/* we assume module_btf's BTF FD is always >0 */
 8152			mod_btf = &obj->btf_modules[i];
 8153			btf = mod_btf->btf;
 8154			id = btf__find_by_name_kind_own(btf, ksym_name, kind);
 8155			if (id != -ENOENT)
 8156				break;
 8157		}
 8158	}
 8159	if (id <= 0)
 8160		return -ESRCH;
 8161
 8162	*res_btf = btf;
 8163	*res_mod_btf = mod_btf;
 8164	return id;
 8165}
 8166
 8167static int bpf_object__resolve_ksym_var_btf_id(struct bpf_object *obj,
 8168					       struct extern_desc *ext)
 8169{
 8170	const struct btf_type *targ_var, *targ_type;
 8171	__u32 targ_type_id, local_type_id;
 8172	struct module_btf *mod_btf = NULL;
 8173	const char *targ_var_name;
 8174	struct btf *btf = NULL;
 8175	int id, err;
 8176
 8177	id = find_ksym_btf_id(obj, ext->name, BTF_KIND_VAR, &btf, &mod_btf);
 8178	if (id < 0) {
 8179		if (id == -ESRCH && ext->is_weak)
 8180			return 0;
 8181		pr_warn("extern (var ksym) '%s': not found in kernel BTF\n",
 8182			ext->name);
 8183		return id;
 8184	}
 8185
 8186	/* find local type_id */
 8187	local_type_id = ext->ksym.type_id;
 8188
 8189	/* find target type_id */
 8190	targ_var = btf__type_by_id(btf, id);
 8191	targ_var_name = btf__name_by_offset(btf, targ_var->name_off);
 8192	targ_type = skip_mods_and_typedefs(btf, targ_var->type, &targ_type_id);
 8193
 8194	err = bpf_core_types_are_compat(obj->btf, local_type_id,
 8195					btf, targ_type_id);
 8196	if (err <= 0) {
 8197		const struct btf_type *local_type;
 8198		const char *targ_name, *local_name;
 8199
 8200		local_type = btf__type_by_id(obj->btf, local_type_id);
 8201		local_name = btf__name_by_offset(obj->btf, local_type->name_off);
 8202		targ_name = btf__name_by_offset(btf, targ_type->name_off);
 8203
 8204		pr_warn("extern (var ksym) '%s': incompatible types, expected [%d] %s %s, but kernel has [%d] %s %s\n",
 8205			ext->name, local_type_id,
 8206			btf_kind_str(local_type), local_name, targ_type_id,
 8207			btf_kind_str(targ_type), targ_name);
 8208		return -EINVAL;
 8209	}
 8210
 8211	ext->is_set = true;
 8212	ext->ksym.kernel_btf_obj_fd = mod_btf ? mod_btf->fd : 0;
 8213	ext->ksym.kernel_btf_id = id;
 8214	pr_debug("extern (var ksym) '%s': resolved to [%d] %s %s\n",
 8215		 ext->name, id, btf_kind_str(targ_var), targ_var_name);
 8216
 8217	return 0;
 8218}
 8219
 8220static int bpf_object__resolve_ksym_func_btf_id(struct bpf_object *obj,
 8221						struct extern_desc *ext)
 8222{
 8223	int local_func_proto_id, kfunc_proto_id, kfunc_id;
 8224	struct module_btf *mod_btf = NULL;
 8225	const struct btf_type *kern_func;
 8226	struct btf *kern_btf = NULL;
 8227	int ret;
 8228
 8229	local_func_proto_id = ext->ksym.type_id;
 8230
 8231	kfunc_id = find_ksym_btf_id(obj, ext->essent_name ?: ext->name, BTF_KIND_FUNC, &kern_btf,
 8232				    &mod_btf);
 8233	if (kfunc_id < 0) {
 8234		if (kfunc_id == -ESRCH && ext->is_weak)
 8235			return 0;
 8236		pr_warn("extern (func ksym) '%s': not found in kernel or module BTFs\n",
 8237			ext->name);
 8238		return kfunc_id;
 8239	}
 8240
 8241	kern_func = btf__type_by_id(kern_btf, kfunc_id);
 8242	kfunc_proto_id = kern_func->type;
 8243
 8244	ret = bpf_core_types_are_compat(obj->btf, local_func_proto_id,
 8245					kern_btf, kfunc_proto_id);
 8246	if (ret <= 0) {
 8247		if (ext->is_weak)
 8248			return 0;
 8249
 8250		pr_warn("extern (func ksym) '%s': func_proto [%d] incompatible with %s [%d]\n",
 8251			ext->name, local_func_proto_id,
 8252			mod_btf ? mod_btf->name : "vmlinux", kfunc_proto_id);
 8253		return -EINVAL;
 8254	}
 8255
 8256	/* set index for module BTF fd in fd_array, if unset */
 8257	if (mod_btf && !mod_btf->fd_array_idx) {
 8258		/* insn->off is s16 */
 8259		if (obj->fd_array_cnt == INT16_MAX) {
 8260			pr_warn("extern (func ksym) '%s': module BTF fd index %d too big to fit in bpf_insn offset\n",
 8261				ext->name, mod_btf->fd_array_idx);
 8262			return -E2BIG;
 8263		}
 8264		/* Cannot use index 0 for module BTF fd */
 8265		if (!obj->fd_array_cnt)
 8266			obj->fd_array_cnt = 1;
 8267
 8268		ret = libbpf_ensure_mem((void **)&obj->fd_array, &obj->fd_array_cap, sizeof(int),
 8269					obj->fd_array_cnt + 1);
 8270		if (ret)
 8271			return ret;
 8272		mod_btf->fd_array_idx = obj->fd_array_cnt;
 8273		/* we assume module BTF FD is always >0 */
 8274		obj->fd_array[obj->fd_array_cnt++] = mod_btf->fd;
 8275	}
 8276
 8277	ext->is_set = true;
 8278	ext->ksym.kernel_btf_id = kfunc_id;
 8279	ext->ksym.btf_fd_idx = mod_btf ? mod_btf->fd_array_idx : 0;
 8280	/* Also set kernel_btf_obj_fd to make sure that bpf_object__relocate_data()
 8281	 * populates FD into ld_imm64 insn when it's used to point to kfunc.
 8282	 * {kernel_btf_id, btf_fd_idx} -> fixup bpf_call.
 8283	 * {kernel_btf_id, kernel_btf_obj_fd} -> fixup ld_imm64.
 8284	 */
 8285	ext->ksym.kernel_btf_obj_fd = mod_btf ? mod_btf->fd : 0;
 8286	pr_debug("extern (func ksym) '%s': resolved to %s [%d]\n",
 8287		 ext->name, mod_btf ? mod_btf->name : "vmlinux", kfunc_id);
 8288
 8289	return 0;
 8290}
 8291
 8292static int bpf_object__resolve_ksyms_btf_id(struct bpf_object *obj)
 8293{
 8294	const struct btf_type *t;
 8295	struct extern_desc *ext;
 8296	int i, err;
 8297
 8298	for (i = 0; i < obj->nr_extern; i++) {
 8299		ext = &obj->externs[i];
 8300		if (ext->type != EXT_KSYM || !ext->ksym.type_id)
 8301			continue;
 8302
 8303		if (obj->gen_loader) {
 8304			ext->is_set = true;
 8305			ext->ksym.kernel_btf_obj_fd = 0;
 8306			ext->ksym.kernel_btf_id = 0;
 8307			continue;
 8308		}
 8309		t = btf__type_by_id(obj->btf, ext->btf_id);
 8310		if (btf_is_var(t))
 8311			err = bpf_object__resolve_ksym_var_btf_id(obj, ext);
 8312		else
 8313			err = bpf_object__resolve_ksym_func_btf_id(obj, ext);
 8314		if (err)
 8315			return err;
 8316	}
 8317	return 0;
 8318}
 8319
 8320static int bpf_object__resolve_externs(struct bpf_object *obj,
 8321				       const char *extra_kconfig)
 8322{
 8323	bool need_config = false, need_kallsyms = false;
 8324	bool need_vmlinux_btf = false;
 8325	struct extern_desc *ext;
 8326	void *kcfg_data = NULL;
 8327	int err, i;
 8328
 8329	if (obj->nr_extern == 0)
 8330		return 0;
 8331
 8332	if (obj->kconfig_map_idx >= 0)
 8333		kcfg_data = obj->maps[obj->kconfig_map_idx].mmaped;
 8334
 8335	for (i = 0; i < obj->nr_extern; i++) {
 8336		ext = &obj->externs[i];
 8337
 8338		if (ext->type == EXT_KSYM) {
 8339			if (ext->ksym.type_id)
 8340				need_vmlinux_btf = true;
 8341			else
 8342				need_kallsyms = true;
 8343			continue;
 8344		} else if (ext->type == EXT_KCFG) {
 8345			void *ext_ptr = kcfg_data + ext->kcfg.data_off;
 8346			__u64 value = 0;
 8347
 8348			/* Kconfig externs need actual /proc/config.gz */
 8349			if (str_has_pfx(ext->name, "CONFIG_")) {
 8350				need_config = true;
 8351				continue;
 8352			}
 8353
 8354			/* Virtual kcfg externs are customly handled by libbpf */
 8355			if (strcmp(ext->name, "LINUX_KERNEL_VERSION") == 0) {
 8356				value = get_kernel_version();
 8357				if (!value) {
 8358					pr_warn("extern (kcfg) '%s': failed to get kernel version\n", ext->name);
 8359					return -EINVAL;
 8360				}
 8361			} else if (strcmp(ext->name, "LINUX_HAS_BPF_COOKIE") == 0) {
 8362				value = kernel_supports(obj, FEAT_BPF_COOKIE);
 8363			} else if (strcmp(ext->name, "LINUX_HAS_SYSCALL_WRAPPER") == 0) {
 8364				value = kernel_supports(obj, FEAT_SYSCALL_WRAPPER);
 8365			} else if (!str_has_pfx(ext->name, "LINUX_") || !ext->is_weak) {
 8366				/* Currently libbpf supports only CONFIG_ and LINUX_ prefixed
 8367				 * __kconfig externs, where LINUX_ ones are virtual and filled out
 8368				 * customly by libbpf (their values don't come from Kconfig).
 8369				 * If LINUX_xxx variable is not recognized by libbpf, but is marked
 8370				 * __weak, it defaults to zero value, just like for CONFIG_xxx
 8371				 * externs.
 8372				 */
 8373				pr_warn("extern (kcfg) '%s': unrecognized virtual extern\n", ext->name);
 8374				return -EINVAL;
 8375			}
 8376
 8377			err = set_kcfg_value_num(ext, ext_ptr, value);
 8378			if (err)
 8379				return err;
 8380			pr_debug("extern (kcfg) '%s': set to 0x%llx\n",
 8381				 ext->name, (long long)value);
 8382		} else {
 8383			pr_warn("extern '%s': unrecognized extern kind\n", ext->name);
 8384			return -EINVAL;
 8385		}
 8386	}
 8387	if (need_config && extra_kconfig) {
 8388		err = bpf_object__read_kconfig_mem(obj, extra_kconfig, kcfg_data);
 8389		if (err)
 8390			return -EINVAL;
 8391		need_config = false;
 8392		for (i = 0; i < obj->nr_extern; i++) {
 8393			ext = &obj->externs[i];
 8394			if (ext->type == EXT_KCFG && !ext->is_set) {
 8395				need_config = true;
 8396				break;
 8397			}
 8398		}
 8399	}
 8400	if (need_config) {
 8401		err = bpf_object__read_kconfig_file(obj, kcfg_data);
 8402		if (err)
 8403			return -EINVAL;
 8404	}
 8405	if (need_kallsyms) {
 8406		err = bpf_object__read_kallsyms_file(obj);
 8407		if (err)
 8408			return -EINVAL;
 8409	}
 8410	if (need_vmlinux_btf) {
 8411		err = bpf_object__resolve_ksyms_btf_id(obj);
 8412		if (err)
 8413			return -EINVAL;
 8414	}
 8415	for (i = 0; i < obj->nr_extern; i++) {
 8416		ext = &obj->externs[i];
 8417
 8418		if (!ext->is_set && !ext->is_weak) {
 8419			pr_warn("extern '%s' (strong): not resolved\n", ext->name);
 8420			return -ESRCH;
 8421		} else if (!ext->is_set) {
 8422			pr_debug("extern '%s' (weak): not resolved, defaulting to zero\n",
 8423				 ext->name);
 8424		}
 8425	}
 8426
 8427	return 0;
 8428}
 8429
 8430static void bpf_map_prepare_vdata(const struct bpf_map *map)
 8431{
 8432	struct bpf_struct_ops *st_ops;
 8433	__u32 i;
 8434
 8435	st_ops = map->st_ops;
 8436	for (i = 0; i < btf_vlen(st_ops->type); i++) {
 8437		struct bpf_program *prog = st_ops->progs[i];
 8438		void *kern_data;
 8439		int prog_fd;
 8440
 8441		if (!prog)
 8442			continue;
 8443
 8444		prog_fd = bpf_program__fd(prog);
 8445		kern_data = st_ops->kern_vdata + st_ops->kern_func_off[i];
 8446		*(unsigned long *)kern_data = prog_fd;
 8447	}
 8448}
 8449
 8450static int bpf_object_prepare_struct_ops(struct bpf_object *obj)
 8451{
 8452	int i;
 8453
 8454	for (i = 0; i < obj->nr_maps; i++)
 8455		if (bpf_map__is_struct_ops(&obj->maps[i]))
 8456			bpf_map_prepare_vdata(&obj->maps[i]);
 8457
 8458	return 0;
 8459}
 8460
 8461static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const char *target_btf_path)
 8462{
 8463	int err, i;
 8464
 8465	if (!obj)
 8466		return libbpf_err(-EINVAL);
 8467
 8468	if (obj->loaded) {
 8469		pr_warn("object '%s': load can't be attempted twice\n", obj->name);
 8470		return libbpf_err(-EINVAL);
 8471	}
 8472
 8473	if (obj->gen_loader)
 8474		bpf_gen__init(obj->gen_loader, extra_log_level, obj->nr_programs, obj->nr_maps);
 8475
 8476	err = bpf_object__probe_loading(obj);
 8477	err = err ? : bpf_object__load_vmlinux_btf(obj, false);
 8478	err = err ? : bpf_object__resolve_externs(obj, obj->kconfig);
 8479	err = err ? : bpf_object__sanitize_maps(obj);
 8480	err = err ? : bpf_object__init_kern_struct_ops_maps(obj);
 8481	err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : target_btf_path);
 8482	err = err ? : bpf_object__sanitize_and_load_btf(obj);
 8483	err = err ? : bpf_object__create_maps(obj);
 8484	err = err ? : bpf_object__load_progs(obj, extra_log_level);
 8485	err = err ? : bpf_object_init_prog_arrays(obj);
 8486	err = err ? : bpf_object_prepare_struct_ops(obj);
 8487
 8488	if (obj->gen_loader) {
 8489		/* reset FDs */
 8490		if (obj->btf)
 8491			btf__set_fd(obj->btf, -1);
 8492		if (!err)
 8493			err = bpf_gen__finish(obj->gen_loader, obj->nr_programs, obj->nr_maps);
 8494	}
 8495
 8496	/* clean up fd_array */
 8497	zfree(&obj->fd_array);
 8498
 8499	/* clean up module BTFs */
 8500	for (i = 0; i < obj->btf_module_cnt; i++) {
 8501		close(obj->btf_modules[i].fd);
 8502		btf__free(obj->btf_modules[i].btf);
 8503		free(obj->btf_modules[i].name);
 8504	}
 8505	free(obj->btf_modules);
 8506
 8507	/* clean up vmlinux BTF */
 8508	btf__free(obj->btf_vmlinux);
 8509	obj->btf_vmlinux = NULL;
 8510
 8511	obj->loaded = true; /* doesn't matter if successfully or not */
 8512
 8513	if (err)
 8514		goto out;
 8515
 8516	return 0;
 8517out:
 8518	/* unpin any maps that were auto-pinned during load */
 8519	for (i = 0; i < obj->nr_maps; i++)
 8520		if (obj->maps[i].pinned && !obj->maps[i].reused)
 8521			bpf_map__unpin(&obj->maps[i], NULL);
 8522
 8523	bpf_object_unload(obj);
 8524	pr_warn("failed to load object '%s'\n", obj->path);
 8525	return libbpf_err(err);
 8526}
 8527
 8528int bpf_object__load(struct bpf_object *obj)
 8529{
 8530	return bpf_object_load(obj, 0, NULL);
 8531}
 8532
 8533static int make_parent_dir(const char *path)
 8534{
 8535	char *cp, errmsg[STRERR_BUFSIZE];
 8536	char *dname, *dir;
 8537	int err = 0;
 8538
 8539	dname = strdup(path);
 8540	if (dname == NULL)
 8541		return -ENOMEM;
 8542
 8543	dir = dirname(dname);
 8544	if (mkdir(dir, 0700) && errno != EEXIST)
 8545		err = -errno;
 8546
 8547	free(dname);
 8548	if (err) {
 8549		cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
 8550		pr_warn("failed to mkdir %s: %s\n", path, cp);
 8551	}
 8552	return err;
 8553}
 8554
 8555static int check_path(const char *path)
 8556{
 8557	char *cp, errmsg[STRERR_BUFSIZE];
 8558	struct statfs st_fs;
 8559	char *dname, *dir;
 8560	int err = 0;
 8561
 8562	if (path == NULL)
 8563		return -EINVAL;
 8564
 8565	dname = strdup(path);
 8566	if (dname == NULL)
 8567		return -ENOMEM;
 8568
 8569	dir = dirname(dname);
 8570	if (statfs(dir, &st_fs)) {
 8571		cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
 8572		pr_warn("failed to statfs %s: %s\n", dir, cp);
 8573		err = -errno;
 8574	}
 8575	free(dname);
 8576
 8577	if (!err && st_fs.f_type != BPF_FS_MAGIC) {
 8578		pr_warn("specified path %s is not on BPF FS\n", path);
 8579		err = -EINVAL;
 8580	}
 8581
 8582	return err;
 8583}
 8584
 8585int bpf_program__pin(struct bpf_program *prog, const char *path)
 8586{
 8587	char *cp, errmsg[STRERR_BUFSIZE];
 8588	int err;
 8589
 8590	if (prog->fd < 0) {
 8591		pr_warn("prog '%s': can't pin program that wasn't loaded\n", prog->name);
 8592		return libbpf_err(-EINVAL);
 8593	}
 8594
 8595	err = make_parent_dir(path);
 8596	if (err)
 8597		return libbpf_err(err);
 8598
 8599	err = check_path(path);
 8600	if (err)
 8601		return libbpf_err(err);
 8602
 8603	if (bpf_obj_pin(prog->fd, path)) {
 8604		err = -errno;
 8605		cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
 8606		pr_warn("prog '%s': failed to pin at '%s': %s\n", prog->name, path, cp);
 8607		return libbpf_err(err);
 8608	}
 8609
 8610	pr_debug("prog '%s': pinned at '%s'\n", prog->name, path);
 8611	return 0;
 8612}
 8613
 8614int bpf_program__unpin(struct bpf_program *prog, const char *path)
 8615{
 8616	int err;
 8617
 8618	if (prog->fd < 0) {
 8619		pr_warn("prog '%s': can't unpin program that wasn't loaded\n", prog->name);
 8620		return libbpf_err(-EINVAL);
 8621	}
 8622
 8623	err = check_path(path);
 8624	if (err)
 8625		return libbpf_err(err);
 8626
 8627	err = unlink(path);
 8628	if (err)
 8629		return libbpf_err(-errno);
 8630
 8631	pr_debug("prog '%s': unpinned from '%s'\n", prog->name, path);
 8632	return 0;
 8633}
 8634
 8635int bpf_map__pin(struct bpf_map *map, const char *path)
 8636{
 8637	char *cp, errmsg[STRERR_BUFSIZE];
 8638	int err;
 8639
 8640	if (map == NULL) {
 8641		pr_warn("invalid map pointer\n");
 8642		return libbpf_err(-EINVAL);
 8643	}
 8644
 8645	if (map->pin_path) {
 8646		if (path && strcmp(path, map->pin_path)) {
 8647			pr_warn("map '%s' already has pin path '%s' different from '%s'\n",
 8648				bpf_map__name(map), map->pin_path, path);
 8649			return libbpf_err(-EINVAL);
 8650		} else if (map->pinned) {
 8651			pr_debug("map '%s' already pinned at '%s'; not re-pinning\n",
 8652				 bpf_map__name(map), map->pin_path);
 8653			return 0;
 8654		}
 8655	} else {
 8656		if (!path) {
 8657			pr_warn("missing a path to pin map '%s' at\n",
 8658				bpf_map__name(map));
 8659			return libbpf_err(-EINVAL);
 8660		} else if (map->pinned) {
 8661			pr_warn("map '%s' already pinned\n", bpf_map__name(map));
 8662			return libbpf_err(-EEXIST);
 8663		}
 8664
 8665		map->pin_path = strdup(path);
 8666		if (!map->pin_path) {
 8667			err = -errno;
 8668			goto out_err;
 8669		}
 8670	}
 8671
 8672	err = make_parent_dir(map->pin_path);
 8673	if (err)
 8674		return libbpf_err(err);
 8675
 8676	err = check_path(map->pin_path);
 8677	if (err)
 8678		return libbpf_err(err);
 8679
 8680	if (bpf_obj_pin(map->fd, map->pin_path)) {
 8681		err = -errno;
 8682		goto out_err;
 8683	}
 8684
 8685	map->pinned = true;
 8686	pr_debug("pinned map '%s'\n", map->pin_path);
 8687
 8688	return 0;
 8689
 8690out_err:
 8691	cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
 8692	pr_warn("failed to pin map: %s\n", cp);
 8693	return libbpf_err(err);
 8694}
 8695
 8696int bpf_map__unpin(struct bpf_map *map, const char *path)
 8697{
 8698	int err;
 8699
 8700	if (map == NULL) {
 8701		pr_warn("invalid map pointer\n");
 8702		return libbpf_err(-EINVAL);
 8703	}
 8704
 8705	if (map->pin_path) {
 8706		if (path && strcmp(path, map->pin_path)) {
 8707			pr_warn("map '%s' already has pin path '%s' different from '%s'\n",
 8708				bpf_map__name(map), map->pin_path, path);
 8709			return libbpf_err(-EINVAL);
 8710		}
 8711		path = map->pin_path;
 8712	} else if (!path) {
 8713		pr_warn("no path to unpin map '%s' from\n",
 8714			bpf_map__name(map));
 8715		return libbpf_err(-EINVAL);
 8716	}
 8717
 8718	err = check_path(path);
 8719	if (err)
 8720		return libbpf_err(err);
 8721
 8722	err = unlink(path);
 8723	if (err != 0)
 8724		return libbpf_err(-errno);
 8725
 8726	map->pinned = false;
 8727	pr_debug("unpinned map '%s' from '%s'\n", bpf_map__name(map), path);
 8728
 8729	return 0;
 8730}
 8731
 8732int bpf_map__set_pin_path(struct bpf_map *map, const char *path)
 8733{
 8734	char *new = NULL;
 8735
 8736	if (path) {
 8737		new = strdup(path);
 8738		if (!new)
 8739			return libbpf_err(-errno);
 8740	}
 8741
 8742	free(map->pin_path);
 8743	map->pin_path = new;
 8744	return 0;
 8745}
 8746
 8747__alias(bpf_map__pin_path)
 8748const char *bpf_map__get_pin_path(const struct bpf_map *map);
 8749
 8750const char *bpf_map__pin_path(const struct bpf_map *map)
 8751{
 8752	return map->pin_path;
 8753}
 8754
 8755bool bpf_map__is_pinned(const struct bpf_map *map)
 8756{
 8757	return map->pinned;
 8758}
 8759
 8760static void sanitize_pin_path(char *s)
 8761{
 8762	/* bpffs disallows periods in path names */
 8763	while (*s) {
 8764		if (*s == '.')
 8765			*s = '_';
 8766		s++;
 8767	}
 8768}
 8769
 8770int bpf_object__pin_maps(struct bpf_object *obj, const char *path)
 8771{
 8772	struct bpf_map *map;
 8773	int err;
 8774
 8775	if (!obj)
 8776		return libbpf_err(-ENOENT);
 8777
 8778	if (!obj->loaded) {
 8779		pr_warn("object not yet loaded; load it first\n");
 8780		return libbpf_err(-ENOENT);
 8781	}
 8782
 8783	bpf_object__for_each_map(map, obj) {
 8784		char *pin_path = NULL;
 8785		char buf[PATH_MAX];
 8786
 8787		if (!map->autocreate)
 8788			continue;
 8789
 8790		if (path) {
 8791			err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map));
 8792			if (err)
 8793				goto err_unpin_maps;
 8794			sanitize_pin_path(buf);
 8795			pin_path = buf;
 8796		} else if (!map->pin_path) {
 8797			continue;
 8798		}
 8799
 8800		err = bpf_map__pin(map, pin_path);
 8801		if (err)
 8802			goto err_unpin_maps;
 8803	}
 8804
 8805	return 0;
 8806
 8807err_unpin_maps:
 8808	while ((map = bpf_object__prev_map(obj, map))) {
 8809		if (!map->pin_path)
 8810			continue;
 8811
 8812		bpf_map__unpin(map, NULL);
 8813	}
 8814
 8815	return libbpf_err(err);
 8816}
 8817
 8818int bpf_object__unpin_maps(struct bpf_object *obj, const char *path)
 8819{
 8820	struct bpf_map *map;
 8821	int err;
 8822
 8823	if (!obj)
 8824		return libbpf_err(-ENOENT);
 8825
 8826	bpf_object__for_each_map(map, obj) {
 8827		char *pin_path = NULL;
 8828		char buf[PATH_MAX];
 8829
 8830		if (path) {
 8831			err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map));
 8832			if (err)
 8833				return libbpf_err(err);
 8834			sanitize_pin_path(buf);
 8835			pin_path = buf;
 8836		} else if (!map->pin_path) {
 8837			continue;
 8838		}
 8839
 8840		err = bpf_map__unpin(map, pin_path);
 8841		if (err)
 8842			return libbpf_err(err);
 8843	}
 8844
 8845	return 0;
 8846}
 8847
 8848int bpf_object__pin_programs(struct bpf_object *obj, const char *path)
 8849{
 8850	struct bpf_program *prog;
 8851	char buf[PATH_MAX];
 8852	int err;
 8853
 8854	if (!obj)
 8855		return libbpf_err(-ENOENT);
 8856
 8857	if (!obj->loaded) {
 8858		pr_warn("object not yet loaded; load it first\n");
 8859		return libbpf_err(-ENOENT);
 8860	}
 8861
 8862	bpf_object__for_each_program(prog, obj) {
 8863		err = pathname_concat(buf, sizeof(buf), path, prog->name);
 8864		if (err)
 8865			goto err_unpin_programs;
 8866
 8867		err = bpf_program__pin(prog, buf);
 8868		if (err)
 8869			goto err_unpin_programs;
 8870	}
 8871
 8872	return 0;
 8873
 8874err_unpin_programs:
 8875	while ((prog = bpf_object__prev_program(obj, prog))) {
 8876		if (pathname_concat(buf, sizeof(buf), path, prog->name))
 8877			continue;
 8878
 8879		bpf_program__unpin(prog, buf);
 8880	}
 8881
 8882	return libbpf_err(err);
 8883}
 8884
 8885int bpf_object__unpin_programs(struct bpf_object *obj, const char *path)
 8886{
 8887	struct bpf_program *prog;
 8888	int err;
 8889
 8890	if (!obj)
 8891		return libbpf_err(-ENOENT);
 8892
 8893	bpf_object__for_each_program(prog, obj) {
 8894		char buf[PATH_MAX];
 8895
 8896		err = pathname_concat(buf, sizeof(buf), path, prog->name);
 8897		if (err)
 8898			return libbpf_err(err);
 8899
 8900		err = bpf_program__unpin(prog, buf);
 8901		if (err)
 8902			return libbpf_err(err);
 8903	}
 8904
 8905	return 0;
 8906}
 8907
 8908int bpf_object__pin(struct bpf_object *obj, const char *path)
 8909{
 8910	int err;
 8911
 8912	err = bpf_object__pin_maps(obj, path);
 8913	if (err)
 8914		return libbpf_err(err);
 8915
 8916	err = bpf_object__pin_programs(obj, path);
 8917	if (err) {
 8918		bpf_object__unpin_maps(obj, path);
 8919		return libbpf_err(err);
 8920	}
 8921
 8922	return 0;
 8923}
 8924
 8925int bpf_object__unpin(struct bpf_object *obj, const char *path)
 8926{
 8927	int err;
 8928
 8929	err = bpf_object__unpin_programs(obj, path);
 8930	if (err)
 8931		return libbpf_err(err);
 8932
 8933	err = bpf_object__unpin_maps(obj, path);
 8934	if (err)
 8935		return libbpf_err(err);
 8936
 8937	return 0;
 8938}
 8939
 8940static void bpf_map__destroy(struct bpf_map *map)
 8941{
 8942	if (map->inner_map) {
 8943		bpf_map__destroy(map->inner_map);
 8944		zfree(&map->inner_map);
 8945	}
 8946
 8947	zfree(&map->init_slots);
 8948	map->init_slots_sz = 0;
 8949
 8950	if (map->mmaped) {
 8951		size_t mmap_sz;
 8952
 8953		mmap_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries);
 8954		munmap(map->mmaped, mmap_sz);
 8955		map->mmaped = NULL;
 8956	}
 8957
 8958	if (map->st_ops) {
 8959		zfree(&map->st_ops->data);
 8960		zfree(&map->st_ops->progs);
 8961		zfree(&map->st_ops->kern_func_off);
 8962		zfree(&map->st_ops);
 8963	}
 8964
 8965	zfree(&map->name);
 8966	zfree(&map->real_name);
 8967	zfree(&map->pin_path);
 8968
 8969	if (map->fd >= 0)
 8970		zclose(map->fd);
 8971}
 8972
 8973void bpf_object__close(struct bpf_object *obj)
 8974{
 8975	size_t i;
 8976
 8977	if (IS_ERR_OR_NULL(obj))
 8978		return;
 8979
 8980	usdt_manager_free(obj->usdt_man);
 8981	obj->usdt_man = NULL;
 8982
 8983	bpf_gen__free(obj->gen_loader);
 8984	bpf_object__elf_finish(obj);
 8985	bpf_object_unload(obj);
 8986	btf__free(obj->btf);
 8987	btf__free(obj->btf_vmlinux);
 8988	btf_ext__free(obj->btf_ext);
 8989
 8990	for (i = 0; i < obj->nr_maps; i++)
 8991		bpf_map__destroy(&obj->maps[i]);
 8992
 8993	zfree(&obj->btf_custom_path);
 8994	zfree(&obj->kconfig);
 8995
 8996	for (i = 0; i < obj->nr_extern; i++)
 8997		zfree(&obj->externs[i].essent_name);
 8998
 8999	zfree(&obj->externs);
 9000	obj->nr_extern = 0;
 9001
 9002	zfree(&obj->maps);
 9003	obj->nr_maps = 0;
 9004
 9005	if (obj->programs && obj->nr_programs) {
 9006		for (i = 0; i < obj->nr_programs; i++)
 9007			bpf_program__exit(&obj->programs[i]);
 9008	}
 9009	zfree(&obj->programs);
 9010
 9011	free(obj);
 9012}
 9013
 9014const char *bpf_object__name(const struct bpf_object *obj)
 9015{
 9016	return obj ? obj->name : libbpf_err_ptr(-EINVAL);
 9017}
 9018
 9019unsigned int bpf_object__kversion(const struct bpf_object *obj)
 9020{
 9021	return obj ? obj->kern_version : 0;
 9022}
 9023
 9024struct btf *bpf_object__btf(const struct bpf_object *obj)
 9025{
 9026	return obj ? obj->btf : NULL;
 9027}
 9028
 9029int bpf_object__btf_fd(const struct bpf_object *obj)
 9030{
 9031	return obj->btf ? btf__fd(obj->btf) : -1;
 9032}
 9033
 9034int bpf_object__set_kversion(struct bpf_object *obj, __u32 kern_version)
 9035{
 9036	if (obj->loaded)
 9037		return libbpf_err(-EINVAL);
 9038
 9039	obj->kern_version = kern_version;
 9040
 9041	return 0;
 9042}
 9043
 9044int bpf_object__gen_loader(struct bpf_object *obj, struct gen_loader_opts *opts)
 9045{
 9046	struct bpf_gen *gen;
 9047
 9048	if (!opts)
 9049		return -EFAULT;
 9050	if (!OPTS_VALID(opts, gen_loader_opts))
 9051		return -EINVAL;
 9052	gen = calloc(sizeof(*gen), 1);
 9053	if (!gen)
 9054		return -ENOMEM;
 9055	gen->opts = opts;
 9056	obj->gen_loader = gen;
 9057	return 0;
 9058}
 9059
 9060static struct bpf_program *
 9061__bpf_program__iter(const struct bpf_program *p, const struct bpf_object *obj,
 9062		    bool forward)
 9063{
 9064	size_t nr_programs = obj->nr_programs;
 9065	ssize_t idx;
 9066
 9067	if (!nr_programs)
 9068		return NULL;
 9069
 9070	if (!p)
 9071		/* Iter from the beginning */
 9072		return forward ? &obj->programs[0] :
 9073			&obj->programs[nr_programs - 1];
 9074
 9075	if (p->obj != obj) {
 9076		pr_warn("error: program handler doesn't match object\n");
 9077		return errno = EINVAL, NULL;
 9078	}
 9079
 9080	idx = (p - obj->programs) + (forward ? 1 : -1);
 9081	if (idx >= obj->nr_programs || idx < 0)
 9082		return NULL;
 9083	return &obj->programs[idx];
 9084}
 9085
 9086struct bpf_program *
 9087bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prev)
 9088{
 9089	struct bpf_program *prog = prev;
 9090
 9091	do {
 9092		prog = __bpf_program__iter(prog, obj, true);
 9093	} while (prog && prog_is_subprog(obj, prog));
 9094
 9095	return prog;
 9096}
 9097
 9098struct bpf_program *
 9099bpf_object__prev_program(const struct bpf_object *obj, struct bpf_program *next)
 9100{
 9101	struct bpf_program *prog = next;
 9102
 9103	do {
 9104		prog = __bpf_program__iter(prog, obj, false);
 9105	} while (prog && prog_is_subprog(obj, prog));
 9106
 9107	return prog;
 9108}
 9109
 9110void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex)
 9111{
 9112	prog->prog_ifindex = ifindex;
 9113}
 9114
 9115const char *bpf_program__name(const struct bpf_program *prog)
 9116{
 9117	return prog->name;
 9118}
 9119
 9120const char *bpf_program__section_name(const struct bpf_program *prog)
 9121{
 9122	return prog->sec_name;
 9123}
 9124
 9125bool bpf_program__autoload(const struct bpf_program *prog)
 9126{
 9127	return prog->autoload;
 9128}
 9129
 9130int bpf_program__set_autoload(struct bpf_program *prog, bool autoload)
 9131{
 9132	if (prog->obj->loaded)
 9133		return libbpf_err(-EINVAL);
 9134
 9135	prog->autoload = autoload;
 9136	return 0;
 9137}
 9138
 9139bool bpf_program__autoattach(const struct bpf_program *prog)
 9140{
 9141	return prog->autoattach;
 9142}
 9143
 9144void bpf_program__set_autoattach(struct bpf_program *prog, bool autoattach)
 9145{
 9146	prog->autoattach = autoattach;
 9147}
 9148
 9149const struct bpf_insn *bpf_program__insns(const struct bpf_program *prog)
 9150{
 9151	return prog->insns;
 9152}
 9153
 9154size_t bpf_program__insn_cnt(const struct bpf_program *prog)
 9155{
 9156	return prog->insns_cnt;
 9157}
 9158
 9159int bpf_program__set_insns(struct bpf_program *prog,
 9160			   struct bpf_insn *new_insns, size_t new_insn_cnt)
 9161{
 9162	struct bpf_insn *insns;
 9163
 9164	if (prog->obj->loaded)
 9165		return -EBUSY;
 9166
 9167	insns = libbpf_reallocarray(prog->insns, new_insn_cnt, sizeof(*insns));
 9168	/* NULL is a valid return from reallocarray if the new count is zero */
 9169	if (!insns && new_insn_cnt) {
 9170		pr_warn("prog '%s': failed to realloc prog code\n", prog->name);
 9171		return -ENOMEM;
 9172	}
 9173	memcpy(insns, new_insns, new_insn_cnt * sizeof(*insns));
 9174
 9175	prog->insns = insns;
 9176	prog->insns_cnt = new_insn_cnt;
 9177	return 0;
 9178}
 9179
 9180int bpf_program__fd(const struct bpf_program *prog)
 9181{
 9182	if (!prog)
 9183		return libbpf_err(-EINVAL);
 9184
 9185	if (prog->fd < 0)
 9186		return libbpf_err(-ENOENT);
 9187
 9188	return prog->fd;
 9189}
 9190
 9191__alias(bpf_program__type)
 9192enum bpf_prog_type bpf_program__get_type(const struct bpf_program *prog);
 9193
 9194enum bpf_prog_type bpf_program__type(const struct bpf_program *prog)
 9195{
 9196	return prog->type;
 9197}
 9198
 9199static size_t custom_sec_def_cnt;
 9200static struct bpf_sec_def *custom_sec_defs;
 9201static struct bpf_sec_def custom_fallback_def;
 9202static bool has_custom_fallback_def;
 9203static int last_custom_sec_def_handler_id;
 9204
 9205int bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type)
 9206{
 9207	if (prog->obj->loaded)
 9208		return libbpf_err(-EBUSY);
 9209
 9210	/* if type is not changed, do nothing */
 9211	if (prog->type == type)
 9212		return 0;
 9213
 9214	prog->type = type;
 9215
 9216	/* If a program type was changed, we need to reset associated SEC()
 9217	 * handler, as it will be invalid now. The only exception is a generic
 9218	 * fallback handler, which by definition is program type-agnostic and
 9219	 * is a catch-all custom handler, optionally set by the application,
 9220	 * so should be able to handle any type of BPF program.
 9221	 */
 9222	if (prog->sec_def != &custom_fallback_def)
 9223		prog->sec_def = NULL;
 9224	return 0;
 9225}
 9226
 9227__alias(bpf_program__expected_attach_type)
 9228enum bpf_attach_type bpf_program__get_expected_attach_type(const struct bpf_program *prog);
 9229
 9230enum bpf_attach_type bpf_program__expected_attach_type(const struct bpf_program *prog)
 9231{
 9232	return prog->expected_attach_type;
 9233}
 9234
 9235int bpf_program__set_expected_attach_type(struct bpf_program *prog,
 9236					   enum bpf_attach_type type)
 9237{
 9238	if (prog->obj->loaded)
 9239		return libbpf_err(-EBUSY);
 9240
 9241	prog->expected_attach_type = type;
 9242	return 0;
 9243}
 9244
 9245__u32 bpf_program__flags(const struct bpf_program *prog)
 9246{
 9247	return prog->prog_flags;
 9248}
 9249
 9250int bpf_program__set_flags(struct bpf_program *prog, __u32 flags)
 9251{
 9252	if (prog->obj->loaded)
 9253		return libbpf_err(-EBUSY);
 9254
 9255	prog->prog_flags = flags;
 9256	return 0;
 9257}
 9258
 9259__u32 bpf_program__log_level(const struct bpf_program *prog)
 9260{
 9261	return prog->log_level;
 9262}
 9263
 9264int bpf_program__set_log_level(struct bpf_program *prog, __u32 log_level)
 9265{
 9266	if (prog->obj->loaded)
 9267		return libbpf_err(-EBUSY);
 9268
 9269	prog->log_level = log_level;
 9270	return 0;
 9271}
 9272
 9273const char *bpf_program__log_buf(const struct bpf_program *prog, size_t *log_size)
 9274{
 9275	*log_size = prog->log_size;
 9276	return prog->log_buf;
 9277}
 9278
 9279int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log_size)
 9280{
 9281	if (log_size && !log_buf)
 9282		return -EINVAL;
 9283	if (prog->log_size > UINT_MAX)
 9284		return -EINVAL;
 9285	if (prog->obj->loaded)
 9286		return -EBUSY;
 9287
 9288	prog->log_buf = log_buf;
 9289	prog->log_size = log_size;
 9290	return 0;
 9291}
 9292
 9293#define SEC_DEF(sec_pfx, ptype, atype, flags, ...) {			    \
 9294	.sec = (char *)sec_pfx,						    \
 9295	.prog_type = BPF_PROG_TYPE_##ptype,				    \
 9296	.expected_attach_type = atype,					    \
 9297	.cookie = (long)(flags),					    \
 9298	.prog_prepare_load_fn = libbpf_prepare_prog_load,		    \
 9299	__VA_ARGS__							    \
 9300}
 9301
 9302static int attach_kprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link);
 9303static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link);
 9304static int attach_ksyscall(const struct bpf_program *prog, long cookie, struct bpf_link **link);
 9305static int attach_usdt(const struct bpf_program *prog, long cookie, struct bpf_link **link);
 9306static int attach_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link);
 9307static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link);
 9308static int attach_trace(const struct bpf_program *prog, long cookie, struct bpf_link **link);
 9309static int attach_kprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link);
 9310static int attach_uprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link);
 9311static int attach_lsm(const struct bpf_program *prog, long cookie, struct bpf_link **link);
 9312static int attach_iter(const struct bpf_program *prog, long cookie, struct bpf_link **link);
 9313
 9314static const struct bpf_sec_def section_defs[] = {
 9315	SEC_DEF("socket",		SOCKET_FILTER, 0, SEC_NONE),
 9316	SEC_DEF("sk_reuseport/migrate",	SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, SEC_ATTACHABLE),
 9317	SEC_DEF("sk_reuseport",		SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT, SEC_ATTACHABLE),
 9318	SEC_DEF("kprobe+",		KPROBE,	0, SEC_NONE, attach_kprobe),
 9319	SEC_DEF("uprobe+",		KPROBE,	0, SEC_NONE, attach_uprobe),
 9320	SEC_DEF("uprobe.s+",		KPROBE,	0, SEC_SLEEPABLE, attach_uprobe),
 9321	SEC_DEF("kretprobe+",		KPROBE, 0, SEC_NONE, attach_kprobe),
 9322	SEC_DEF("uretprobe+",		KPROBE, 0, SEC_NONE, attach_uprobe),
 9323	SEC_DEF("uretprobe.s+",		KPROBE, 0, SEC_SLEEPABLE, attach_uprobe),
 9324	SEC_DEF("kprobe.multi+",	KPROBE,	BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi),
 9325	SEC_DEF("kretprobe.multi+",	KPROBE,	BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi),
 9326	SEC_DEF("uprobe.multi+",	KPROBE,	BPF_TRACE_UPROBE_MULTI, SEC_NONE, attach_uprobe_multi),
 9327	SEC_DEF("uretprobe.multi+",	KPROBE,	BPF_TRACE_UPROBE_MULTI, SEC_NONE, attach_uprobe_multi),
 9328	SEC_DEF("uprobe.multi.s+",	KPROBE,	BPF_TRACE_UPROBE_MULTI, SEC_SLEEPABLE, attach_uprobe_multi),
 9329	SEC_DEF("uretprobe.multi.s+",	KPROBE,	BPF_TRACE_UPROBE_MULTI, SEC_SLEEPABLE, attach_uprobe_multi),
 9330	SEC_DEF("ksyscall+",		KPROBE,	0, SEC_NONE, attach_ksyscall),
 9331	SEC_DEF("kretsyscall+",		KPROBE, 0, SEC_NONE, attach_ksyscall),
 9332	SEC_DEF("usdt+",		KPROBE,	0, SEC_USDT, attach_usdt),
 9333	SEC_DEF("usdt.s+",		KPROBE,	0, SEC_USDT | SEC_SLEEPABLE, attach_usdt),
 9334	SEC_DEF("tc/ingress",		SCHED_CLS, BPF_TCX_INGRESS, SEC_NONE), /* alias for tcx */
 9335	SEC_DEF("tc/egress",		SCHED_CLS, BPF_TCX_EGRESS, SEC_NONE),  /* alias for tcx */
 9336	SEC_DEF("tcx/ingress",		SCHED_CLS, BPF_TCX_INGRESS, SEC_NONE),
 9337	SEC_DEF("tcx/egress",		SCHED_CLS, BPF_TCX_EGRESS, SEC_NONE),
 9338	SEC_DEF("tc",			SCHED_CLS, 0, SEC_NONE), /* deprecated / legacy, use tcx */
 9339	SEC_DEF("classifier",		SCHED_CLS, 0, SEC_NONE), /* deprecated / legacy, use tcx */
 9340	SEC_DEF("action",		SCHED_ACT, 0, SEC_NONE), /* deprecated / legacy, use tcx */
 9341	SEC_DEF("netkit/primary",	SCHED_CLS, BPF_NETKIT_PRIMARY, SEC_NONE),
 9342	SEC_DEF("netkit/peer",		SCHED_CLS, BPF_NETKIT_PEER, SEC_NONE),
 9343	SEC_DEF("tracepoint+",		TRACEPOINT, 0, SEC_NONE, attach_tp),
 9344	SEC_DEF("tp+",			TRACEPOINT, 0, SEC_NONE, attach_tp),
 9345	SEC_DEF("raw_tracepoint+",	RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp),
 9346	SEC_DEF("raw_tp+",		RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp),
 9347	SEC_DEF("raw_tracepoint.w+",	RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp),
 9348	SEC_DEF("raw_tp.w+",		RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp),
 9349	SEC_DEF("tp_btf+",		TRACING, BPF_TRACE_RAW_TP, SEC_ATTACH_BTF, attach_trace),
 9350	SEC_DEF("fentry+",		TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF, attach_trace),
 9351	SEC_DEF("fmod_ret+",		TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF, attach_trace),
 9352	SEC_DEF("fexit+",		TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF, attach_trace),
 9353	SEC_DEF("fentry.s+",		TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace),
 9354	SEC_DEF("fmod_ret.s+",		TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace),
 9355	SEC_DEF("fexit.s+",		TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace),
 9356	SEC_DEF("freplace+",		EXT, 0, SEC_ATTACH_BTF, attach_trace),
 9357	SEC_DEF("lsm+",			LSM, BPF_LSM_MAC, SEC_ATTACH_BTF, attach_lsm),
 9358	SEC_DEF("lsm.s+",		LSM, BPF_LSM_MAC, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_lsm),
 9359	SEC_DEF("lsm_cgroup+",		LSM, BPF_LSM_CGROUP, SEC_ATTACH_BTF),
 9360	SEC_DEF("iter+",		TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF, attach_iter),
 9361	SEC_DEF("iter.s+",		TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_iter),
 9362	SEC_DEF("syscall",		SYSCALL, 0, SEC_SLEEPABLE),
 9363	SEC_DEF("xdp.frags/devmap",	XDP, BPF_XDP_DEVMAP, SEC_XDP_FRAGS),
 9364	SEC_DEF("xdp/devmap",		XDP, BPF_XDP_DEVMAP, SEC_ATTACHABLE),
 9365	SEC_DEF("xdp.frags/cpumap",	XDP, BPF_XDP_CPUMAP, SEC_XDP_FRAGS),
 9366	SEC_DEF("xdp/cpumap",		XDP, BPF_XDP_CPUMAP, SEC_ATTACHABLE),
 9367	SEC_DEF("xdp.frags",		XDP, BPF_XDP, SEC_XDP_FRAGS),
 9368	SEC_DEF("xdp",			XDP, BPF_XDP, SEC_ATTACHABLE_OPT),
 9369	SEC_DEF("perf_event",		PERF_EVENT, 0, SEC_NONE),
 9370	SEC_DEF("lwt_in",		LWT_IN, 0, SEC_NONE),
 9371	SEC_DEF("lwt_out",		LWT_OUT, 0, SEC_NONE),
 9372	SEC_DEF("lwt_xmit",		LWT_XMIT, 0, SEC_NONE),
 9373	SEC_DEF("lwt_seg6local",	LWT_SEG6LOCAL, 0, SEC_NONE),
 9374	SEC_DEF("sockops",		SOCK_OPS, BPF_CGROUP_SOCK_OPS, SEC_ATTACHABLE_OPT),
 9375	SEC_DEF("sk_skb/stream_parser",	SK_SKB, BPF_SK_SKB_STREAM_PARSER, SEC_ATTACHABLE_OPT),
 9376	SEC_DEF("sk_skb/stream_verdict",SK_SKB, BPF_SK_SKB_STREAM_VERDICT, SEC_ATTACHABLE_OPT),
 9377	SEC_DEF("sk_skb",		SK_SKB, 0, SEC_NONE),
 9378	SEC_DEF("sk_msg",		SK_MSG, BPF_SK_MSG_VERDICT, SEC_ATTACHABLE_OPT),
 9379	SEC_DEF("lirc_mode2",		LIRC_MODE2, BPF_LIRC_MODE2, SEC_ATTACHABLE_OPT),
 9380	SEC_DEF("flow_dissector",	FLOW_DISSECTOR, BPF_FLOW_DISSECTOR, SEC_ATTACHABLE_OPT),
 9381	SEC_DEF("cgroup_skb/ingress",	CGROUP_SKB, BPF_CGROUP_INET_INGRESS, SEC_ATTACHABLE_OPT),
 9382	SEC_DEF("cgroup_skb/egress",	CGROUP_SKB, BPF_CGROUP_INET_EGRESS, SEC_ATTACHABLE_OPT),
 9383	SEC_DEF("cgroup/skb",		CGROUP_SKB, 0, SEC_NONE),
 9384	SEC_DEF("cgroup/sock_create",	CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE),
 9385	SEC_DEF("cgroup/sock_release",	CGROUP_SOCK, BPF_CGROUP_INET_SOCK_RELEASE, SEC_ATTACHABLE),
 9386	SEC_DEF("cgroup/sock",		CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE_OPT),
 9387	SEC_DEF("cgroup/post_bind4",	CGROUP_SOCK, BPF_CGROUP_INET4_POST_BIND, SEC_ATTACHABLE),
 9388	SEC_DEF("cgroup/post_bind6",	CGROUP_SOCK, BPF_CGROUP_INET6_POST_BIND, SEC_ATTACHABLE),
 9389	SEC_DEF("cgroup/bind4",		CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_BIND, SEC_ATTACHABLE),
 9390	SEC_DEF("cgroup/bind6",		CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_BIND, SEC_ATTACHABLE),
 9391	SEC_DEF("cgroup/connect4",	CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_CONNECT, SEC_ATTACHABLE),
 9392	SEC_DEF("cgroup/connect6",	CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_CONNECT, SEC_ATTACHABLE),
 9393	SEC_DEF("cgroup/connect_unix",	CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_CONNECT, SEC_ATTACHABLE),
 9394	SEC_DEF("cgroup/sendmsg4",	CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_SENDMSG, SEC_ATTACHABLE),
 9395	SEC_DEF("cgroup/sendmsg6",	CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_SENDMSG, SEC_ATTACHABLE),
 9396	SEC_DEF("cgroup/sendmsg_unix",	CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_SENDMSG, SEC_ATTACHABLE),
 9397	SEC_DEF("cgroup/recvmsg4",	CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_RECVMSG, SEC_ATTACHABLE),
 9398	SEC_DEF("cgroup/recvmsg6",	CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_RECVMSG, SEC_ATTACHABLE),
 9399	SEC_DEF("cgroup/recvmsg_unix",	CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_RECVMSG, SEC_ATTACHABLE),
 9400	SEC_DEF("cgroup/getpeername4",	CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETPEERNAME, SEC_ATTACHABLE),
 9401	SEC_DEF("cgroup/getpeername6",	CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETPEERNAME, SEC_ATTACHABLE),
 9402	SEC_DEF("cgroup/getpeername_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_GETPEERNAME, SEC_ATTACHABLE),
 9403	SEC_DEF("cgroup/getsockname4",	CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETSOCKNAME, SEC_ATTACHABLE),
 9404	SEC_DEF("cgroup/getsockname6",	CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETSOCKNAME, SEC_ATTACHABLE),
 9405	SEC_DEF("cgroup/getsockname_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_GETSOCKNAME, SEC_ATTACHABLE),
 9406	SEC_DEF("cgroup/sysctl",	CGROUP_SYSCTL, BPF_CGROUP_SYSCTL, SEC_ATTACHABLE),
 9407	SEC_DEF("cgroup/getsockopt",	CGROUP_SOCKOPT, BPF_CGROUP_GETSOCKOPT, SEC_ATTACHABLE),
 9408	SEC_DEF("cgroup/setsockopt",	CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT, SEC_ATTACHABLE),
 9409	SEC_DEF("cgroup/dev",		CGROUP_DEVICE, BPF_CGROUP_DEVICE, SEC_ATTACHABLE_OPT),
 9410	SEC_DEF("struct_ops+",		STRUCT_OPS, 0, SEC_NONE),
 9411	SEC_DEF("struct_ops.s+",	STRUCT_OPS, 0, SEC_SLEEPABLE),
 9412	SEC_DEF("sk_lookup",		SK_LOOKUP, BPF_SK_LOOKUP, SEC_ATTACHABLE),
 9413	SEC_DEF("netfilter",		NETFILTER, BPF_NETFILTER, SEC_NONE),
 9414};
 9415
 9416int libbpf_register_prog_handler(const char *sec,
 9417				 enum bpf_prog_type prog_type,
 9418				 enum bpf_attach_type exp_attach_type,
 9419				 const struct libbpf_prog_handler_opts *opts)
 9420{
 9421	struct bpf_sec_def *sec_def;
 9422
 9423	if (!OPTS_VALID(opts, libbpf_prog_handler_opts))
 9424		return libbpf_err(-EINVAL);
 9425
 9426	if (last_custom_sec_def_handler_id == INT_MAX) /* prevent overflow */
 9427		return libbpf_err(-E2BIG);
 9428
 9429	if (sec) {
 9430		sec_def = libbpf_reallocarray(custom_sec_defs, custom_sec_def_cnt + 1,
 9431					      sizeof(*sec_def));
 9432		if (!sec_def)
 9433			return libbpf_err(-ENOMEM);
 9434
 9435		custom_sec_defs = sec_def;
 9436		sec_def = &custom_sec_defs[custom_sec_def_cnt];
 9437	} else {
 9438		if (has_custom_fallback_def)
 9439			return libbpf_err(-EBUSY);
 9440
 9441		sec_def = &custom_fallback_def;
 9442	}
 9443
 9444	sec_def->sec = sec ? strdup(sec) : NULL;
 9445	if (sec && !sec_def->sec)
 9446		return libbpf_err(-ENOMEM);
 9447
 9448	sec_def->prog_type = prog_type;
 9449	sec_def->expected_attach_type = exp_attach_type;
 9450	sec_def->cookie = OPTS_GET(opts, cookie, 0);
 9451
 9452	sec_def->prog_setup_fn = OPTS_GET(opts, prog_setup_fn, NULL);
 9453	sec_def->prog_prepare_load_fn = OPTS_GET(opts, prog_prepare_load_fn, NULL);
 9454	sec_def->prog_attach_fn = OPTS_GET(opts, prog_attach_fn, NULL);
 9455
 9456	sec_def->handler_id = ++last_custom_sec_def_handler_id;
 9457
 9458	if (sec)
 9459		custom_sec_def_cnt++;
 9460	else
 9461		has_custom_fallback_def = true;
 9462
 9463	return sec_def->handler_id;
 9464}
 9465
 9466int libbpf_unregister_prog_handler(int handler_id)
 9467{
 9468	struct bpf_sec_def *sec_defs;
 9469	int i;
 9470
 9471	if (handler_id <= 0)
 9472		return libbpf_err(-EINVAL);
 9473
 9474	if (has_custom_fallback_def && custom_fallback_def.handler_id == handler_id) {
 9475		memset(&custom_fallback_def, 0, sizeof(custom_fallback_def));
 9476		has_custom_fallback_def = false;
 9477		return 0;
 9478	}
 9479
 9480	for (i = 0; i < custom_sec_def_cnt; i++) {
 9481		if (custom_sec_defs[i].handler_id == handler_id)
 9482			break;
 9483	}
 9484
 9485	if (i == custom_sec_def_cnt)
 9486		return libbpf_err(-ENOENT);
 9487
 9488	free(custom_sec_defs[i].sec);
 9489	for (i = i + 1; i < custom_sec_def_cnt; i++)
 9490		custom_sec_defs[i - 1] = custom_sec_defs[i];
 9491	custom_sec_def_cnt--;
 9492
 9493	/* try to shrink the array, but it's ok if we couldn't */
 9494	sec_defs = libbpf_reallocarray(custom_sec_defs, custom_sec_def_cnt, sizeof(*sec_defs));
 9495	/* if new count is zero, reallocarray can return a valid NULL result;
 9496	 * in this case the previous pointer will be freed, so we *have to*
 9497	 * reassign old pointer to the new value (even if it's NULL)
 9498	 */
 9499	if (sec_defs || custom_sec_def_cnt == 0)
 9500		custom_sec_defs = sec_defs;
 9501
 9502	return 0;
 9503}
 9504
 9505static bool sec_def_matches(const struct bpf_sec_def *sec_def, const char *sec_name)
 9506{
 9507	size_t len = strlen(sec_def->sec);
 9508
 9509	/* "type/" always has to have proper SEC("type/extras") form */
 9510	if (sec_def->sec[len - 1] == '/') {
 9511		if (str_has_pfx(sec_name, sec_def->sec))
 9512			return true;
 9513		return false;
 9514	}
 9515
 9516	/* "type+" means it can be either exact SEC("type") or
 9517	 * well-formed SEC("type/extras") with proper '/' separator
 9518	 */
 9519	if (sec_def->sec[len - 1] == '+') {
 9520		len--;
 9521		/* not even a prefix */
 9522		if (strncmp(sec_name, sec_def->sec, len) != 0)
 9523			return false;
 9524		/* exact match or has '/' separator */
 9525		if (sec_name[len] == '\0' || sec_name[len] == '/')
 9526			return true;
 9527		return false;
 9528	}
 9529
 9530	return strcmp(sec_name, sec_def->sec) == 0;
 9531}
 9532
 9533static const struct bpf_sec_def *find_sec_def(const char *sec_name)
 9534{
 9535	const struct bpf_sec_def *sec_def;
 9536	int i, n;
 9537
 9538	n = custom_sec_def_cnt;
 9539	for (i = 0; i < n; i++) {
 9540		sec_def = &custom_sec_defs[i];
 9541		if (sec_def_matches(sec_def, sec_name))
 9542			return sec_def;
 9543	}
 9544
 9545	n = ARRAY_SIZE(section_defs);
 9546	for (i = 0; i < n; i++) {
 9547		sec_def = &section_defs[i];
 9548		if (sec_def_matches(sec_def, sec_name))
 9549			return sec_def;
 9550	}
 9551
 9552	if (has_custom_fallback_def)
 9553		return &custom_fallback_def;
 9554
 9555	return NULL;
 9556}
 9557
 9558#define MAX_TYPE_NAME_SIZE 32
 9559
 9560static char *libbpf_get_type_names(bool attach_type)
 9561{
 9562	int i, len = ARRAY_SIZE(section_defs) * MAX_TYPE_NAME_SIZE;
 9563	char *buf;
 9564
 9565	buf = malloc(len);
 9566	if (!buf)
 9567		return NULL;
 9568
 9569	buf[0] = '\0';
 9570	/* Forge string buf with all available names */
 9571	for (i = 0; i < ARRAY_SIZE(section_defs); i++) {
 9572		const struct bpf_sec_def *sec_def = &section_defs[i];
 9573
 9574		if (attach_type) {
 9575			if (sec_def->prog_prepare_load_fn != libbpf_prepare_prog_load)
 9576				continue;
 9577
 9578			if (!(sec_def->cookie & SEC_ATTACHABLE))
 9579				continue;
 9580		}
 9581
 9582		if (strlen(buf) + strlen(section_defs[i].sec) + 2 > len) {
 9583			free(buf);
 9584			return NULL;
 9585		}
 9586		strcat(buf, " ");
 9587		strcat(buf, section_defs[i].sec);
 9588	}
 9589
 9590	return buf;
 9591}
 9592
 9593int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
 9594			     enum bpf_attach_type *expected_attach_type)
 9595{
 9596	const struct bpf_sec_def *sec_def;
 9597	char *type_names;
 9598
 9599	if (!name)
 9600		return libbpf_err(-EINVAL);
 9601
 9602	sec_def = find_sec_def(name);
 9603	if (sec_def) {
 9604		*prog_type = sec_def->prog_type;
 9605		*expected_attach_type = sec_def->expected_attach_type;
 9606		return 0;
 9607	}
 9608
 9609	pr_debug("failed to guess program type from ELF section '%s'\n", name);
 9610	type_names = libbpf_get_type_names(false);
 9611	if (type_names != NULL) {
 9612		pr_debug("supported section(type) names are:%s\n", type_names);
 9613		free(type_names);
 9614	}
 9615
 9616	return libbpf_err(-ESRCH);
 9617}
 9618
 9619const char *libbpf_bpf_attach_type_str(enum bpf_attach_type t)
 9620{
 9621	if (t < 0 || t >= ARRAY_SIZE(attach_type_name))
 9622		return NULL;
 9623
 9624	return attach_type_name[t];
 9625}
 9626
 9627const char *libbpf_bpf_link_type_str(enum bpf_link_type t)
 9628{
 9629	if (t < 0 || t >= ARRAY_SIZE(link_type_name))
 9630		return NULL;
 9631
 9632	return link_type_name[t];
 9633}
 9634
 9635const char *libbpf_bpf_map_type_str(enum bpf_map_type t)
 9636{
 9637	if (t < 0 || t >= ARRAY_SIZE(map_type_name))
 9638		return NULL;
 9639
 9640	return map_type_name[t];
 9641}
 9642
 9643const char *libbpf_bpf_prog_type_str(enum bpf_prog_type t)
 9644{
 9645	if (t < 0 || t >= ARRAY_SIZE(prog_type_name))
 9646		return NULL;
 9647
 9648	return prog_type_name[t];
 9649}
 9650
 9651static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj,
 9652						     int sec_idx,
 9653						     size_t offset)
 9654{
 9655	struct bpf_map *map;
 9656	size_t i;
 9657
 9658	for (i = 0; i < obj->nr_maps; i++) {
 9659		map = &obj->maps[i];
 9660		if (!bpf_map__is_struct_ops(map))
 9661			continue;
 9662		if (map->sec_idx == sec_idx &&
 9663		    map->sec_offset <= offset &&
 9664		    offset - map->sec_offset < map->def.value_size)
 9665			return map;
 9666	}
 9667
 9668	return NULL;
 9669}
 9670
 9671/* Collect the reloc from ELF and populate the st_ops->progs[] */
 9672static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
 9673					    Elf64_Shdr *shdr, Elf_Data *data)
 9674{
 9675	const struct btf_member *member;
 9676	struct bpf_struct_ops *st_ops;
 9677	struct bpf_program *prog;
 9678	unsigned int shdr_idx;
 9679	const struct btf *btf;
 9680	struct bpf_map *map;
 9681	unsigned int moff, insn_idx;
 9682	const char *name;
 9683	__u32 member_idx;
 9684	Elf64_Sym *sym;
 9685	Elf64_Rel *rel;
 9686	int i, nrels;
 9687
 9688	btf = obj->btf;
 9689	nrels = shdr->sh_size / shdr->sh_entsize;
 9690	for (i = 0; i < nrels; i++) {
 9691		rel = elf_rel_by_idx(data, i);
 9692		if (!rel) {
 9693			pr_warn("struct_ops reloc: failed to get %d reloc\n", i);
 9694			return -LIBBPF_ERRNO__FORMAT;
 9695		}
 9696
 9697		sym = elf_sym_by_idx(obj, ELF64_R_SYM(rel->r_info));
 9698		if (!sym) {
 9699			pr_warn("struct_ops reloc: symbol %zx not found\n",
 9700				(size_t)ELF64_R_SYM(rel->r_info));
 9701			return -LIBBPF_ERRNO__FORMAT;
 9702		}
 9703
 9704		name = elf_sym_str(obj, sym->st_name) ?: "<?>";
 9705		map = find_struct_ops_map_by_offset(obj, shdr->sh_info, rel->r_offset);
 9706		if (!map) {
 9707			pr_warn("struct_ops reloc: cannot find map at rel->r_offset %zu\n",
 9708				(size_t)rel->r_offset);
 9709			return -EINVAL;
 9710		}
 9711
 9712		moff = rel->r_offset - map->sec_offset;
 9713		shdr_idx = sym->st_shndx;
 9714		st_ops = map->st_ops;
 9715		pr_debug("struct_ops reloc %s: for %lld value %lld shdr_idx %u rel->r_offset %zu map->sec_offset %zu name %d (\'%s\')\n",
 9716			 map->name,
 9717			 (long long)(rel->r_info >> 32),
 9718			 (long long)sym->st_value,
 9719			 shdr_idx, (size_t)rel->r_offset,
 9720			 map->sec_offset, sym->st_name, name);
 9721
 9722		if (shdr_idx >= SHN_LORESERVE) {
 9723			pr_warn("struct_ops reloc %s: rel->r_offset %zu shdr_idx %u unsupported non-static function\n",
 9724				map->name, (size_t)rel->r_offset, shdr_idx);
 9725			return -LIBBPF_ERRNO__RELOC;
 9726		}
 9727		if (sym->st_value % BPF_INSN_SZ) {
 9728			pr_warn("struct_ops reloc %s: invalid target program offset %llu\n",
 9729				map->name, (unsigned long long)sym->st_value);
 9730			return -LIBBPF_ERRNO__FORMAT;
 9731		}
 9732		insn_idx = sym->st_value / BPF_INSN_SZ;
 9733
 9734		member = find_member_by_offset(st_ops->type, moff * 8);
 9735		if (!member) {
 9736			pr_warn("struct_ops reloc %s: cannot find member at moff %u\n",
 9737				map->name, moff);
 9738			return -EINVAL;
 9739		}
 9740		member_idx = member - btf_members(st_ops->type);
 9741		name = btf__name_by_offset(btf, member->name_off);
 9742
 9743		if (!resolve_func_ptr(btf, member->type, NULL)) {
 9744			pr_warn("struct_ops reloc %s: cannot relocate non func ptr %s\n",
 9745				map->name, name);
 9746			return -EINVAL;
 9747		}
 9748
 9749		prog = find_prog_by_sec_insn(obj, shdr_idx, insn_idx);
 9750		if (!prog) {
 9751			pr_warn("struct_ops reloc %s: cannot find prog at shdr_idx %u to relocate func ptr %s\n",
 9752				map->name, shdr_idx, name);
 9753			return -EINVAL;
 9754		}
 9755
 9756		/* prevent the use of BPF prog with invalid type */
 9757		if (prog->type != BPF_PROG_TYPE_STRUCT_OPS) {
 9758			pr_warn("struct_ops reloc %s: prog %s is not struct_ops BPF program\n",
 9759				map->name, prog->name);
 9760			return -EINVAL;
 9761		}
 9762
 9763		/* if we haven't yet processed this BPF program, record proper
 9764		 * attach_btf_id and member_idx
 9765		 */
 9766		if (!prog->attach_btf_id) {
 9767			prog->attach_btf_id = st_ops->type_id;
 9768			prog->expected_attach_type = member_idx;
 9769		}
 9770
 9771		/* struct_ops BPF prog can be re-used between multiple
 9772		 * .struct_ops & .struct_ops.link as long as it's the
 9773		 * same struct_ops struct definition and the same
 9774		 * function pointer field
 9775		 */
 9776		if (prog->attach_btf_id != st_ops->type_id ||
 9777		    prog->expected_attach_type != member_idx) {
 9778			pr_warn("struct_ops reloc %s: cannot use prog %s in sec %s with type %u attach_btf_id %u expected_attach_type %u for func ptr %s\n",
 9779				map->name, prog->name, prog->sec_name, prog->type,
 9780				prog->attach_btf_id, prog->expected_attach_type, name);
 9781			return -EINVAL;
 9782		}
 9783
 9784		st_ops->progs[member_idx] = prog;
 9785	}
 9786
 9787	return 0;
 9788}
 9789
 9790#define BTF_TRACE_PREFIX "btf_trace_"
 9791#define BTF_LSM_PREFIX "bpf_lsm_"
 9792#define BTF_ITER_PREFIX "bpf_iter_"
 9793#define BTF_MAX_NAME_SIZE 128
 9794
 9795void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type,
 9796				const char **prefix, int *kind)
 9797{
 9798	switch (attach_type) {
 9799	case BPF_TRACE_RAW_TP:
 9800		*prefix = BTF_TRACE_PREFIX;
 9801		*kind = BTF_KIND_TYPEDEF;
 9802		break;
 9803	case BPF_LSM_MAC:
 9804	case BPF_LSM_CGROUP:
 9805		*prefix = BTF_LSM_PREFIX;
 9806		*kind = BTF_KIND_FUNC;
 9807		break;
 9808	case BPF_TRACE_ITER:
 9809		*prefix = BTF_ITER_PREFIX;
 9810		*kind = BTF_KIND_FUNC;
 9811		break;
 9812	default:
 9813		*prefix = "";
 9814		*kind = BTF_KIND_FUNC;
 9815	}
 9816}
 9817
 9818static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
 9819				   const char *name, __u32 kind)
 9820{
 9821	char btf_type_name[BTF_MAX_NAME_SIZE];
 9822	int ret;
 9823
 9824	ret = snprintf(btf_type_name, sizeof(btf_type_name),
 9825		       "%s%s", prefix, name);
 9826	/* snprintf returns the number of characters written excluding the
 9827	 * terminating null. So, if >= BTF_MAX_NAME_SIZE are written, it
 9828	 * indicates truncation.
 9829	 */
 9830	if (ret < 0 || ret >= sizeof(btf_type_name))
 9831		return -ENAMETOOLONG;
 9832	return btf__find_by_name_kind(btf, btf_type_name, kind);
 9833}
 9834
 9835static inline int find_attach_btf_id(struct btf *btf, const char *name,
 9836				     enum bpf_attach_type attach_type)
 9837{
 9838	const char *prefix;
 9839	int kind;
 9840
 9841	btf_get_kernel_prefix_kind(attach_type, &prefix, &kind);
 9842	return find_btf_by_prefix_kind(btf, prefix, name, kind);
 9843}
 9844
 9845int libbpf_find_vmlinux_btf_id(const char *name,
 9846			       enum bpf_attach_type attach_type)
 9847{
 9848	struct btf *btf;
 9849	int err;
 9850
 9851	btf = btf__load_vmlinux_btf();
 9852	err = libbpf_get_error(btf);
 9853	if (err) {
 9854		pr_warn("vmlinux BTF is not found\n");
 9855		return libbpf_err(err);
 9856	}
 9857
 9858	err = find_attach_btf_id(btf, name, attach_type);
 9859	if (err <= 0)
 9860		pr_warn("%s is not found in vmlinux BTF\n", name);
 9861
 9862	btf__free(btf);
 9863	return libbpf_err(err);
 9864}
 9865
 9866static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd)
 9867{
 9868	struct bpf_prog_info info;
 9869	__u32 info_len = sizeof(info);
 9870	struct btf *btf;
 9871	int err;
 9872
 9873	memset(&info, 0, info_len);
 9874	err = bpf_prog_get_info_by_fd(attach_prog_fd, &info, &info_len);
 9875	if (err) {
 9876		pr_warn("failed bpf_prog_get_info_by_fd for FD %d: %d\n",
 9877			attach_prog_fd, err);
 9878		return err;
 9879	}
 9880
 9881	err = -EINVAL;
 9882	if (!info.btf_id) {
 9883		pr_warn("The target program doesn't have BTF\n");
 9884		goto out;
 9885	}
 9886	btf = btf__load_from_kernel_by_id(info.btf_id);
 9887	err = libbpf_get_error(btf);
 9888	if (err) {
 9889		pr_warn("Failed to get BTF %d of the program: %d\n", info.btf_id, err);
 9890		goto out;
 9891	}
 9892	err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC);
 9893	btf__free(btf);
 9894	if (err <= 0) {
 9895		pr_warn("%s is not found in prog's BTF\n", name);
 9896		goto out;
 9897	}
 9898out:
 9899	return err;
 9900}
 9901
 9902static int find_kernel_btf_id(struct bpf_object *obj, const char *attach_name,
 9903			      enum bpf_attach_type attach_type,
 9904			      int *btf_obj_fd, int *btf_type_id)
 9905{
 9906	int ret, i;
 9907
 9908	ret = find_attach_btf_id(obj->btf_vmlinux, attach_name, attach_type);
 9909	if (ret > 0) {
 9910		*btf_obj_fd = 0; /* vmlinux BTF */
 9911		*btf_type_id = ret;
 9912		return 0;
 9913	}
 9914	if (ret != -ENOENT)
 9915		return ret;
 9916
 9917	ret = load_module_btfs(obj);
 9918	if (ret)
 9919		return ret;
 9920
 9921	for (i = 0; i < obj->btf_module_cnt; i++) {
 9922		const struct module_btf *mod = &obj->btf_modules[i];
 9923
 9924		ret = find_attach_btf_id(mod->btf, attach_name, attach_type);
 9925		if (ret > 0) {
 9926			*btf_obj_fd = mod->fd;
 9927			*btf_type_id = ret;
 9928			return 0;
 9929		}
 9930		if (ret == -ENOENT)
 9931			continue;
 9932
 9933		return ret;
 9934	}
 9935
 9936	return -ESRCH;
 9937}
 9938
 9939static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attach_name,
 9940				     int *btf_obj_fd, int *btf_type_id)
 9941{
 9942	enum bpf_attach_type attach_type = prog->expected_attach_type;
 9943	__u32 attach_prog_fd = prog->attach_prog_fd;
 9944	int err = 0;
 9945
 9946	/* BPF program's BTF ID */
 9947	if (prog->type == BPF_PROG_TYPE_EXT || attach_prog_fd) {
 9948		if (!attach_prog_fd) {
 9949			pr_warn("prog '%s': attach program FD is not set\n", prog->name);
 9950			return -EINVAL;
 9951		}
 9952		err = libbpf_find_prog_btf_id(attach_name, attach_prog_fd);
 9953		if (err < 0) {
 9954			pr_warn("prog '%s': failed to find BPF program (FD %d) BTF ID for '%s': %d\n",
 9955				 prog->name, attach_prog_fd, attach_name, err);
 9956			return err;
 9957		}
 9958		*btf_obj_fd = 0;
 9959		*btf_type_id = err;
 9960		return 0;
 9961	}
 9962
 9963	/* kernel/module BTF ID */
 9964	if (prog->obj->gen_loader) {
 9965		bpf_gen__record_attach_target(prog->obj->gen_loader, attach_name, attach_type);
 9966		*btf_obj_fd = 0;
 9967		*btf_type_id = 1;
 9968	} else {
 9969		err = find_kernel_btf_id(prog->obj, attach_name, attach_type, btf_obj_fd, btf_type_id);
 9970	}
 9971	if (err) {
 9972		pr_warn("prog '%s': failed to find kernel BTF type ID of '%s': %d\n",
 9973			prog->name, attach_name, err);
 9974		return err;
 9975	}
 9976	return 0;
 9977}
 9978
 9979int libbpf_attach_type_by_name(const char *name,
 9980			       enum bpf_attach_type *attach_type)
 9981{
 9982	char *type_names;
 9983	const struct bpf_sec_def *sec_def;
 9984
 9985	if (!name)
 9986		return libbpf_err(-EINVAL);
 9987
 9988	sec_def = find_sec_def(name);
 9989	if (!sec_def) {
 9990		pr_debug("failed to guess attach type based on ELF section name '%s'\n", name);
 9991		type_names = libbpf_get_type_names(true);
 9992		if (type_names != NULL) {
 9993			pr_debug("attachable section(type) names are:%s\n", type_names);
 9994			free(type_names);
 9995		}
 9996
 9997		return libbpf_err(-EINVAL);
 9998	}
 9999
10000	if (sec_def->prog_prepare_load_fn != libbpf_prepare_prog_load)
10001		return libbpf_err(-EINVAL);
10002	if (!(sec_def->cookie & SEC_ATTACHABLE))
10003		return libbpf_err(-EINVAL);
10004
10005	*attach_type = sec_def->expected_attach_type;
10006	return 0;
10007}
10008
10009int bpf_map__fd(const struct bpf_map *map)
10010{
10011	if (!map)
10012		return libbpf_err(-EINVAL);
10013	if (!map_is_created(map))
10014		return -1;
10015	return map->fd;
10016}
10017
10018static bool map_uses_real_name(const struct bpf_map *map)
10019{
10020	/* Since libbpf started to support custom .data.* and .rodata.* maps,
10021	 * their user-visible name differs from kernel-visible name. Users see
10022	 * such map's corresponding ELF section name as a map name.
10023	 * This check distinguishes .data/.rodata from .data.* and .rodata.*
10024	 * maps to know which name has to be returned to the user.
10025	 */
10026	if (map->libbpf_type == LIBBPF_MAP_DATA && strcmp(map->real_name, DATA_SEC) != 0)
10027		return true;
10028	if (map->libbpf_type == LIBBPF_MAP_RODATA && strcmp(map->real_name, RODATA_SEC) != 0)
10029		return true;
10030	return false;
10031}
10032
10033const char *bpf_map__name(const struct bpf_map *map)
10034{
10035	if (!map)
10036		return NULL;
10037
10038	if (map_uses_real_name(map))
10039		return map->real_name;
10040
10041	return map->name;
10042}
10043
10044enum bpf_map_type bpf_map__type(const struct bpf_map *map)
10045{
10046	return map->def.type;
10047}
10048
10049int bpf_map__set_type(struct bpf_map *map, enum bpf_map_type type)
10050{
10051	if (map_is_created(map))
10052		return libbpf_err(-EBUSY);
10053	map->def.type = type;
10054	return 0;
10055}
10056
10057__u32 bpf_map__map_flags(const struct bpf_map *map)
10058{
10059	return map->def.map_flags;
10060}
10061
10062int bpf_map__set_map_flags(struct bpf_map *map, __u32 flags)
10063{
10064	if (map_is_created(map))
10065		return libbpf_err(-EBUSY);
10066	map->def.map_flags = flags;
10067	return 0;
10068}
10069
10070__u64 bpf_map__map_extra(const struct bpf_map *map)
10071{
10072	return map->map_extra;
10073}
10074
10075int bpf_map__set_map_extra(struct bpf_map *map, __u64 map_extra)
10076{
10077	if (map_is_created(map))
10078		return libbpf_err(-EBUSY);
10079	map->map_extra = map_extra;
10080	return 0;
10081}
10082
10083__u32 bpf_map__numa_node(const struct bpf_map *map)
10084{
10085	return map->numa_node;
10086}
10087
10088int bpf_map__set_numa_node(struct bpf_map *map, __u32 numa_node)
10089{
10090	if (map_is_created(map))
10091		return libbpf_err(-EBUSY);
10092	map->numa_node = numa_node;
10093	return 0;
10094}
10095
10096__u32 bpf_map__key_size(const struct bpf_map *map)
10097{
10098	return map->def.key_size;
10099}
10100
10101int bpf_map__set_key_size(struct bpf_map *map, __u32 size)
10102{
10103	if (map_is_created(map))
10104		return libbpf_err(-EBUSY);
10105	map->def.key_size = size;
10106	return 0;
10107}
10108
10109__u32 bpf_map__value_size(const struct bpf_map *map)
10110{
10111	return map->def.value_size;
10112}
10113
10114static int map_btf_datasec_resize(struct bpf_map *map, __u32 size)
10115{
10116	struct btf *btf;
10117	struct btf_type *datasec_type, *var_type;
10118	struct btf_var_secinfo *var;
10119	const struct btf_type *array_type;
10120	const struct btf_array *array;
10121	int vlen, element_sz, new_array_id;
10122	__u32 nr_elements;
10123
10124	/* check btf existence */
10125	btf = bpf_object__btf(map->obj);
10126	if (!btf)
10127		return -ENOENT;
10128
10129	/* verify map is datasec */
10130	datasec_type = btf_type_by_id(btf, bpf_map__btf_value_type_id(map));
10131	if (!btf_is_datasec(datasec_type)) {
10132		pr_warn("map '%s': cannot be resized, map value type is not a datasec\n",
10133			bpf_map__name(map));
10134		return -EINVAL;
10135	}
10136
10137	/* verify datasec has at least one var */
10138	vlen = btf_vlen(datasec_type);
10139	if (vlen == 0) {
10140		pr_warn("map '%s': cannot be resized, map value datasec is empty\n",
10141			bpf_map__name(map));
10142		return -EINVAL;
10143	}
10144
10145	/* verify last var in the datasec is an array */
10146	var = &btf_var_secinfos(datasec_type)[vlen - 1];
10147	var_type = btf_type_by_id(btf, var->type);
10148	array_type = skip_mods_and_typedefs(btf, var_type->type, NULL);
10149	if (!btf_is_array(array_type)) {
10150		pr_warn("map '%s': cannot be resized, last var must be an array\n",
10151			bpf_map__name(map));
10152		return -EINVAL;
10153	}
10154
10155	/* verify request size aligns with array */
10156	array = btf_array(array_type);
10157	element_sz = btf__resolve_size(btf, array->type);
10158	if (element_sz <= 0 || (size - var->offset) % element_sz != 0) {
10159		pr_warn("map '%s': cannot be resized, element size (%d) doesn't align with new total size (%u)\n",
10160			bpf_map__name(map), element_sz, size);
10161		return -EINVAL;
10162	}
10163
10164	/* create a new array based on the existing array, but with new length */
10165	nr_elements = (size - var->offset) / element_sz;
10166	new_array_id = btf__add_array(btf, array->index_type, array->type, nr_elements);
10167	if (new_array_id < 0)
10168		return new_array_id;
10169
10170	/* adding a new btf type invalidates existing pointers to btf objects,
10171	 * so refresh pointers before proceeding
10172	 */
10173	datasec_type = btf_type_by_id(btf, map->btf_value_type_id);
10174	var = &btf_var_secinfos(datasec_type)[vlen - 1];
10175	var_type = btf_type_by_id(btf, var->type);
10176
10177	/* finally update btf info */
10178	datasec_type->size = size;
10179	var->size = size - var->offset;
10180	var_type->type = new_array_id;
10181
10182	return 0;
10183}
10184
10185int bpf_map__set_value_size(struct bpf_map *map, __u32 size)
10186{
10187	if (map->obj->loaded || map->reused)
10188		return libbpf_err(-EBUSY);
10189
10190	if (map->mmaped) {
10191		int err;
10192		size_t mmap_old_sz, mmap_new_sz;
10193
10194		mmap_old_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries);
10195		mmap_new_sz = bpf_map_mmap_sz(size, map->def.max_entries);
10196		err = bpf_map_mmap_resize(map, mmap_old_sz, mmap_new_sz);
10197		if (err) {
10198			pr_warn("map '%s': failed to resize memory-mapped region: %d\n",
10199				bpf_map__name(map), err);
10200			return err;
10201		}
10202		err = map_btf_datasec_resize(map, size);
10203		if (err && err != -ENOENT) {
10204			pr_warn("map '%s': failed to adjust resized BTF, clearing BTF key/value info: %d\n",
10205				bpf_map__name(map), err);
10206			map->btf_value_type_id = 0;
10207			map->btf_key_type_id = 0;
10208		}
10209	}
10210
10211	map->def.value_size = size;
10212	return 0;
10213}
10214
10215__u32 bpf_map__btf_key_type_id(const struct bpf_map *map)
10216{
10217	return map ? map->btf_key_type_id : 0;
10218}
10219
10220__u32 bpf_map__btf_value_type_id(const struct bpf_map *map)
10221{
10222	return map ? map->btf_value_type_id : 0;
10223}
10224
10225int bpf_map__set_initial_value(struct bpf_map *map,
10226			       const void *data, size_t size)
10227{
10228	if (map->obj->loaded || map->reused)
10229		return libbpf_err(-EBUSY);
10230
10231	if (!map->mmaped || map->libbpf_type == LIBBPF_MAP_KCONFIG ||
10232	    size != map->def.value_size)
10233		return libbpf_err(-EINVAL);
10234
10235	memcpy(map->mmaped, data, size);
10236	return 0;
10237}
10238
10239void *bpf_map__initial_value(struct bpf_map *map, size_t *psize)
10240{
10241	if (!map->mmaped)
10242		return NULL;
10243	*psize = map->def.value_size;
10244	return map->mmaped;
10245}
10246
10247bool bpf_map__is_internal(const struct bpf_map *map)
10248{
10249	return map->libbpf_type != LIBBPF_MAP_UNSPEC;
10250}
10251
10252__u32 bpf_map__ifindex(const struct bpf_map *map)
10253{
10254	return map->map_ifindex;
10255}
10256
10257int bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex)
10258{
10259	if (map_is_created(map))
10260		return libbpf_err(-EBUSY);
10261	map->map_ifindex = ifindex;
10262	return 0;
10263}
10264
10265int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd)
10266{
10267	if (!bpf_map_type__is_map_in_map(map->def.type)) {
10268		pr_warn("error: unsupported map type\n");
10269		return libbpf_err(-EINVAL);
10270	}
10271	if (map->inner_map_fd != -1) {
10272		pr_warn("error: inner_map_fd already specified\n");
10273		return libbpf_err(-EINVAL);
10274	}
10275	if (map->inner_map) {
10276		bpf_map__destroy(map->inner_map);
10277		zfree(&map->inner_map);
10278	}
10279	map->inner_map_fd = fd;
10280	return 0;
10281}
10282
10283static struct bpf_map *
10284__bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i)
10285{
10286	ssize_t idx;
10287	struct bpf_map *s, *e;
10288
10289	if (!obj || !obj->maps)
10290		return errno = EINVAL, NULL;
10291
10292	s = obj->maps;
10293	e = obj->maps + obj->nr_maps;
10294
10295	if ((m < s) || (m >= e)) {
10296		pr_warn("error in %s: map handler doesn't belong to object\n",
10297			 __func__);
10298		return errno = EINVAL, NULL;
10299	}
10300
10301	idx = (m - obj->maps) + i;
10302	if (idx >= obj->nr_maps || idx < 0)
10303		return NULL;
10304	return &obj->maps[idx];
10305}
10306
10307struct bpf_map *
10308bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *prev)
10309{
10310	if (prev == NULL)
10311		return obj->maps;
10312
10313	return __bpf_map__iter(prev, obj, 1);
10314}
10315
10316struct bpf_map *
10317bpf_object__prev_map(const struct bpf_object *obj, const struct bpf_map *next)
10318{
10319	if (next == NULL) {
10320		if (!obj->nr_maps)
10321			return NULL;
10322		return obj->maps + obj->nr_maps - 1;
10323	}
10324
10325	return __bpf_map__iter(next, obj, -1);
10326}
10327
10328struct bpf_map *
10329bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name)
10330{
10331	struct bpf_map *pos;
10332
10333	bpf_object__for_each_map(pos, obj) {
10334		/* if it's a special internal map name (which always starts
10335		 * with dot) then check if that special name matches the
10336		 * real map name (ELF section name)
10337		 */
10338		if (name[0] == '.') {
10339			if (pos->real_name && strcmp(pos->real_name, name) == 0)
10340				return pos;
10341			continue;
10342		}
10343		/* otherwise map name has to be an exact match */
10344		if (map_uses_real_name(pos)) {
10345			if (strcmp(pos->real_name, name) == 0)
10346				return pos;
10347			continue;
10348		}
10349		if (strcmp(pos->name, name) == 0)
10350			return pos;
10351	}
10352	return errno = ENOENT, NULL;
10353}
10354
10355int
10356bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name)
10357{
10358	return bpf_map__fd(bpf_object__find_map_by_name(obj, name));
10359}
10360
10361static int validate_map_op(const struct bpf_map *map, size_t key_sz,
10362			   size_t value_sz, bool check_value_sz)
10363{
10364	if (!map_is_created(map)) /* map is not yet created */
10365		return -ENOENT;
10366
10367	if (map->def.key_size != key_sz) {
10368		pr_warn("map '%s': unexpected key size %zu provided, expected %u\n",
10369			map->name, key_sz, map->def.key_size);
10370		return -EINVAL;
10371	}
10372
10373	if (!check_value_sz)
10374		return 0;
10375
10376	switch (map->def.type) {
10377	case BPF_MAP_TYPE_PERCPU_ARRAY:
10378	case BPF_MAP_TYPE_PERCPU_HASH:
10379	case BPF_MAP_TYPE_LRU_PERCPU_HASH:
10380	case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE: {
10381		int num_cpu = libbpf_num_possible_cpus();
10382		size_t elem_sz = roundup(map->def.value_size, 8);
10383
10384		if (value_sz != num_cpu * elem_sz) {
10385			pr_warn("map '%s': unexpected value size %zu provided for per-CPU map, expected %d * %zu = %zd\n",
10386				map->name, value_sz, num_cpu, elem_sz, num_cpu * elem_sz);
10387			return -EINVAL;
10388		}
10389		break;
10390	}
10391	default:
10392		if (map->def.value_size != value_sz) {
10393			pr_warn("map '%s': unexpected value size %zu provided, expected %u\n",
10394				map->name, value_sz, map->def.value_size);
10395			return -EINVAL;
10396		}
10397		break;
10398	}
10399	return 0;
10400}
10401
10402int bpf_map__lookup_elem(const struct bpf_map *map,
10403			 const void *key, size_t key_sz,
10404			 void *value, size_t value_sz, __u64 flags)
10405{
10406	int err;
10407
10408	err = validate_map_op(map, key_sz, value_sz, true);
10409	if (err)
10410		return libbpf_err(err);
10411
10412	return bpf_map_lookup_elem_flags(map->fd, key, value, flags);
10413}
10414
10415int bpf_map__update_elem(const struct bpf_map *map,
10416			 const void *key, size_t key_sz,
10417			 const void *value, size_t value_sz, __u64 flags)
10418{
10419	int err;
10420
10421	err = validate_map_op(map, key_sz, value_sz, true);
10422	if (err)
10423		return libbpf_err(err);
10424
10425	return bpf_map_update_elem(map->fd, key, value, flags);
10426}
10427
10428int bpf_map__delete_elem(const struct bpf_map *map,
10429			 const void *key, size_t key_sz, __u64 flags)
10430{
10431	int err;
10432
10433	err = validate_map_op(map, key_sz, 0, false /* check_value_sz */);
10434	if (err)
10435		return libbpf_err(err);
10436
10437	return bpf_map_delete_elem_flags(map->fd, key, flags);
10438}
10439
10440int bpf_map__lookup_and_delete_elem(const struct bpf_map *map,
10441				    const void *key, size_t key_sz,
10442				    void *value, size_t value_sz, __u64 flags)
10443{
10444	int err;
10445
10446	err = validate_map_op(map, key_sz, value_sz, true);
10447	if (err)
10448		return libbpf_err(err);
10449
10450	return bpf_map_lookup_and_delete_elem_flags(map->fd, key, value, flags);
10451}
10452
10453int bpf_map__get_next_key(const struct bpf_map *map,
10454			  const void *cur_key, void *next_key, size_t key_sz)
10455{
10456	int err;
10457
10458	err = validate_map_op(map, key_sz, 0, false /* check_value_sz */);
10459	if (err)
10460		return libbpf_err(err);
10461
10462	return bpf_map_get_next_key(map->fd, cur_key, next_key);
10463}
10464
10465long libbpf_get_error(const void *ptr)
10466{
10467	if (!IS_ERR_OR_NULL(ptr))
10468		return 0;
10469
10470	if (IS_ERR(ptr))
10471		errno = -PTR_ERR(ptr);
10472
10473	/* If ptr == NULL, then errno should be already set by the failing
10474	 * API, because libbpf never returns NULL on success and it now always
10475	 * sets errno on error. So no extra errno handling for ptr == NULL
10476	 * case.
10477	 */
10478	return -errno;
10479}
10480
10481/* Replace link's underlying BPF program with the new one */
10482int bpf_link__update_program(struct bpf_link *link, struct bpf_program *prog)
10483{
10484	int ret;
10485
10486	ret = bpf_link_update(bpf_link__fd(link), bpf_program__fd(prog), NULL);
10487	return libbpf_err_errno(ret);
10488}
10489
10490/* Release "ownership" of underlying BPF resource (typically, BPF program
10491 * attached to some BPF hook, e.g., tracepoint, kprobe, etc). Disconnected
10492 * link, when destructed through bpf_link__destroy() call won't attempt to
10493 * detach/unregisted that BPF resource. This is useful in situations where,
10494 * say, attached BPF program has to outlive userspace program that attached it
10495 * in the system. Depending on type of BPF program, though, there might be
10496 * additional steps (like pinning BPF program in BPF FS) necessary to ensure
10497 * exit of userspace program doesn't trigger automatic detachment and clean up
10498 * inside the kernel.
10499 */
10500void bpf_link__disconnect(struct bpf_link *link)
10501{
10502	link->disconnected = true;
10503}
10504
10505int bpf_link__destroy(struct bpf_link *link)
10506{
10507	int err = 0;
10508
10509	if (IS_ERR_OR_NULL(link))
10510		return 0;
10511
10512	if (!link->disconnected && link->detach)
10513		err = link->detach(link);
10514	if (link->pin_path)
10515		free(link->pin_path);
10516	if (link->dealloc)
10517		link->dealloc(link);
10518	else
10519		free(link);
10520
10521	return libbpf_err(err);
10522}
10523
10524int bpf_link__fd(const struct bpf_link *link)
10525{
10526	return link->fd;
10527}
10528
10529const char *bpf_link__pin_path(const struct bpf_link *link)
10530{
10531	return link->pin_path;
10532}
10533
10534static int bpf_link__detach_fd(struct bpf_link *link)
10535{
10536	return libbpf_err_errno(close(link->fd));
10537}
10538
10539struct bpf_link *bpf_link__open(const char *path)
10540{
10541	struct bpf_link *link;
10542	int fd;
10543
10544	fd = bpf_obj_get(path);
10545	if (fd < 0) {
10546		fd = -errno;
10547		pr_warn("failed to open link at %s: %d\n", path, fd);
10548		return libbpf_err_ptr(fd);
10549	}
10550
10551	link = calloc(1, sizeof(*link));
10552	if (!link) {
10553		close(fd);
10554		return libbpf_err_ptr(-ENOMEM);
10555	}
10556	link->detach = &bpf_link__detach_fd;
10557	link->fd = fd;
10558
10559	link->pin_path = strdup(path);
10560	if (!link->pin_path) {
10561		bpf_link__destroy(link);
10562		return libbpf_err_ptr(-ENOMEM);
10563	}
10564
10565	return link;
10566}
10567
10568int bpf_link__detach(struct bpf_link *link)
10569{
10570	return bpf_link_detach(link->fd) ? -errno : 0;
10571}
10572
10573int bpf_link__pin(struct bpf_link *link, const char *path)
10574{
10575	int err;
10576
10577	if (link->pin_path)
10578		return libbpf_err(-EBUSY);
10579	err = make_parent_dir(path);
10580	if (err)
10581		return libbpf_err(err);
10582	err = check_path(path);
10583	if (err)
10584		return libbpf_err(err);
10585
10586	link->pin_path = strdup(path);
10587	if (!link->pin_path)
10588		return libbpf_err(-ENOMEM);
10589
10590	if (bpf_obj_pin(link->fd, link->pin_path)) {
10591		err = -errno;
10592		zfree(&link->pin_path);
10593		return libbpf_err(err);
10594	}
10595
10596	pr_debug("link fd=%d: pinned at %s\n", link->fd, link->pin_path);
10597	return 0;
10598}
10599
10600int bpf_link__unpin(struct bpf_link *link)
10601{
10602	int err;
10603
10604	if (!link->pin_path)
10605		return libbpf_err(-EINVAL);
10606
10607	err = unlink(link->pin_path);
10608	if (err != 0)
10609		return -errno;
10610
10611	pr_debug("link fd=%d: unpinned from %s\n", link->fd, link->pin_path);
10612	zfree(&link->pin_path);
10613	return 0;
10614}
10615
10616struct bpf_link_perf {
10617	struct bpf_link link;
10618	int perf_event_fd;
10619	/* legacy kprobe support: keep track of probe identifier and type */
10620	char *legacy_probe_name;
10621	bool legacy_is_kprobe;
10622	bool legacy_is_retprobe;
10623};
10624
10625static int remove_kprobe_event_legacy(const char *probe_name, bool retprobe);
10626static int remove_uprobe_event_legacy(const char *probe_name, bool retprobe);
10627
10628static int bpf_link_perf_detach(struct bpf_link *link)
10629{
10630	struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
10631	int err = 0;
10632
10633	if (ioctl(perf_link->perf_event_fd, PERF_EVENT_IOC_DISABLE, 0) < 0)
10634		err = -errno;
10635
10636	if (perf_link->perf_event_fd != link->fd)
10637		close(perf_link->perf_event_fd);
10638	close(link->fd);
10639
10640	/* legacy uprobe/kprobe needs to be removed after perf event fd closure */
10641	if (perf_link->legacy_probe_name) {
10642		if (perf_link->legacy_is_kprobe) {
10643			err = remove_kprobe_event_legacy(perf_link->legacy_probe_name,
10644							 perf_link->legacy_is_retprobe);
10645		} else {
10646			err = remove_uprobe_event_legacy(perf_link->legacy_probe_name,
10647							 perf_link->legacy_is_retprobe);
10648		}
10649	}
10650
10651	return err;
10652}
10653
10654static void bpf_link_perf_dealloc(struct bpf_link *link)
10655{
10656	struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
10657
10658	free(perf_link->legacy_probe_name);
10659	free(perf_link);
10660}
10661
10662struct bpf_link *bpf_program__attach_perf_event_opts(const struct bpf_program *prog, int pfd,
10663						     const struct bpf_perf_event_opts *opts)
10664{
10665	char errmsg[STRERR_BUFSIZE];
10666	struct bpf_link_perf *link;
10667	int prog_fd, link_fd = -1, err;
10668	bool force_ioctl_attach;
10669
10670	if (!OPTS_VALID(opts, bpf_perf_event_opts))
10671		return libbpf_err_ptr(-EINVAL);
10672
10673	if (pfd < 0) {
10674		pr_warn("prog '%s': invalid perf event FD %d\n",
10675			prog->name, pfd);
10676		return libbpf_err_ptr(-EINVAL);
10677	}
10678	prog_fd = bpf_program__fd(prog);
10679	if (prog_fd < 0) {
10680		pr_warn("prog '%s': can't attach BPF program w/o FD (did you load it?)\n",
10681			prog->name);
10682		return libbpf_err_ptr(-EINVAL);
10683	}
10684
10685	link = calloc(1, sizeof(*link));
10686	if (!link)
10687		return libbpf_err_ptr(-ENOMEM);
10688	link->link.detach = &bpf_link_perf_detach;
10689	link->link.dealloc = &bpf_link_perf_dealloc;
10690	link->perf_event_fd = pfd;
10691
10692	force_ioctl_attach = OPTS_GET(opts, force_ioctl_attach, false);
10693	if (kernel_supports(prog->obj, FEAT_PERF_LINK) && !force_ioctl_attach) {
10694		DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_opts,
10695			.perf_event.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0));
10696
10697		link_fd = bpf_link_create(prog_fd, pfd, BPF_PERF_EVENT, &link_opts);
10698		if (link_fd < 0) {
10699			err = -errno;
10700			pr_warn("prog '%s': failed to create BPF link for perf_event FD %d: %d (%s)\n",
10701				prog->name, pfd,
10702				err, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10703			goto err_out;
10704		}
10705		link->link.fd = link_fd;
10706	} else {
10707		if (OPTS_GET(opts, bpf_cookie, 0)) {
10708			pr_warn("prog '%s': user context value is not supported\n", prog->name);
10709			err = -EOPNOTSUPP;
10710			goto err_out;
10711		}
10712
10713		if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) {
10714			err = -errno;
10715			pr_warn("prog '%s': failed to attach to perf_event FD %d: %s\n",
10716				prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10717			if (err == -EPROTO)
10718				pr_warn("prog '%s': try add PERF_SAMPLE_CALLCHAIN to or remove exclude_callchain_[kernel|user] from pfd %d\n",
10719					prog->name, pfd);
10720			goto err_out;
10721		}
10722		link->link.fd = pfd;
10723	}
10724	if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
10725		err = -errno;
10726		pr_warn("prog '%s': failed to enable perf_event FD %d: %s\n",
10727			prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10728		goto err_out;
10729	}
10730
10731	return &link->link;
10732err_out:
10733	if (link_fd >= 0)
10734		close(link_fd);
10735	free(link);
10736	return libbpf_err_ptr(err);
10737}
10738
10739struct bpf_link *bpf_program__attach_perf_event(const struct bpf_program *prog, int pfd)
10740{
10741	return bpf_program__attach_perf_event_opts(prog, pfd, NULL);
10742}
10743
10744/*
10745 * this function is expected to parse integer in the range of [0, 2^31-1] from
10746 * given file using scanf format string fmt. If actual parsed value is
10747 * negative, the result might be indistinguishable from error
10748 */
10749static int parse_uint_from_file(const char *file, const char *fmt)
10750{
10751	char buf[STRERR_BUFSIZE];
10752	int err, ret;
10753	FILE *f;
10754
10755	f = fopen(file, "re");
10756	if (!f) {
10757		err = -errno;
10758		pr_debug("failed to open '%s': %s\n", file,
10759			 libbpf_strerror_r(err, buf, sizeof(buf)));
10760		return err;
10761	}
10762	err = fscanf(f, fmt, &ret);
10763	if (err != 1) {
10764		err = err == EOF ? -EIO : -errno;
10765		pr_debug("failed to parse '%s': %s\n", file,
10766			libbpf_strerror_r(err, buf, sizeof(buf)));
10767		fclose(f);
10768		return err;
10769	}
10770	fclose(f);
10771	return ret;
10772}
10773
10774static int determine_kprobe_perf_type(void)
10775{
10776	const char *file = "/sys/bus/event_source/devices/kprobe/type";
10777
10778	return parse_uint_from_file(file, "%d\n");
10779}
10780
10781static int determine_uprobe_perf_type(void)
10782{
10783	const char *file = "/sys/bus/event_source/devices/uprobe/type";
10784
10785	return parse_uint_from_file(file, "%d\n");
10786}
10787
10788static int determine_kprobe_retprobe_bit(void)
10789{
10790	const char *file = "/sys/bus/event_source/devices/kprobe/format/retprobe";
10791
10792	return parse_uint_from_file(file, "config:%d\n");
10793}
10794
10795static int determine_uprobe_retprobe_bit(void)
10796{
10797	const char *file = "/sys/bus/event_source/devices/uprobe/format/retprobe";
10798
10799	return parse_uint_from_file(file, "config:%d\n");
10800}
10801
10802#define PERF_UPROBE_REF_CTR_OFFSET_BITS 32
10803#define PERF_UPROBE_REF_CTR_OFFSET_SHIFT 32
10804
10805static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
10806				 uint64_t offset, int pid, size_t ref_ctr_off)
10807{
10808	const size_t attr_sz = sizeof(struct perf_event_attr);
10809	struct perf_event_attr attr;
10810	char errmsg[STRERR_BUFSIZE];
10811	int type, pfd;
10812
10813	if ((__u64)ref_ctr_off >= (1ULL << PERF_UPROBE_REF_CTR_OFFSET_BITS))
10814		return -EINVAL;
10815
10816	memset(&attr, 0, attr_sz);
10817
10818	type = uprobe ? determine_uprobe_perf_type()
10819		      : determine_kprobe_perf_type();
10820	if (type < 0) {
10821		pr_warn("failed to determine %s perf type: %s\n",
10822			uprobe ? "uprobe" : "kprobe",
10823			libbpf_strerror_r(type, errmsg, sizeof(errmsg)));
10824		return type;
10825	}
10826	if (retprobe) {
10827		int bit = uprobe ? determine_uprobe_retprobe_bit()
10828				 : determine_kprobe_retprobe_bit();
10829
10830		if (bit < 0) {
10831			pr_warn("failed to determine %s retprobe bit: %s\n",
10832				uprobe ? "uprobe" : "kprobe",
10833				libbpf_strerror_r(bit, errmsg, sizeof(errmsg)));
10834			return bit;
10835		}
10836		attr.config |= 1 << bit;
10837	}
10838	attr.size = attr_sz;
10839	attr.type = type;
10840	attr.config |= (__u64)ref_ctr_off << PERF_UPROBE_REF_CTR_OFFSET_SHIFT;
10841	attr.config1 = ptr_to_u64(name); /* kprobe_func or uprobe_path */
10842	attr.config2 = offset;		 /* kprobe_addr or probe_offset */
10843
10844	/* pid filter is meaningful only for uprobes */
10845	pfd = syscall(__NR_perf_event_open, &attr,
10846		      pid < 0 ? -1 : pid /* pid */,
10847		      pid == -1 ? 0 : -1 /* cpu */,
10848		      -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
10849	return pfd >= 0 ? pfd : -errno;
10850}
10851
10852static int append_to_file(const char *file, const char *fmt, ...)
10853{
10854	int fd, n, err = 0;
10855	va_list ap;
10856	char buf[1024];
10857
10858	va_start(ap, fmt);
10859	n = vsnprintf(buf, sizeof(buf), fmt, ap);
10860	va_end(ap);
10861
10862	if (n < 0 || n >= sizeof(buf))
10863		return -EINVAL;
10864
10865	fd = open(file, O_WRONLY | O_APPEND | O_CLOEXEC, 0);
10866	if (fd < 0)
10867		return -errno;
10868
10869	if (write(fd, buf, n) < 0)
10870		err = -errno;
10871
10872	close(fd);
10873	return err;
10874}
10875
10876#define DEBUGFS "/sys/kernel/debug/tracing"
10877#define TRACEFS "/sys/kernel/tracing"
10878
10879static bool use_debugfs(void)
10880{
10881	static int has_debugfs = -1;
10882
10883	if (has_debugfs < 0)
10884		has_debugfs = faccessat(AT_FDCWD, DEBUGFS, F_OK, AT_EACCESS) == 0;
10885
10886	return has_debugfs == 1;
10887}
10888
10889static const char *tracefs_path(void)
10890{
10891	return use_debugfs() ? DEBUGFS : TRACEFS;
10892}
10893
10894static const char *tracefs_kprobe_events(void)
10895{
10896	return use_debugfs() ? DEBUGFS"/kprobe_events" : TRACEFS"/kprobe_events";
10897}
10898
10899static const char *tracefs_uprobe_events(void)
10900{
10901	return use_debugfs() ? DEBUGFS"/uprobe_events" : TRACEFS"/uprobe_events";
10902}
10903
10904static const char *tracefs_available_filter_functions(void)
10905{
10906	return use_debugfs() ? DEBUGFS"/available_filter_functions"
10907			     : TRACEFS"/available_filter_functions";
10908}
10909
10910static const char *tracefs_available_filter_functions_addrs(void)
10911{
10912	return use_debugfs() ? DEBUGFS"/available_filter_functions_addrs"
10913			     : TRACEFS"/available_filter_functions_addrs";
10914}
10915
10916static void gen_kprobe_legacy_event_name(char *buf, size_t buf_sz,
10917					 const char *kfunc_name, size_t offset)
10918{
10919	static int index = 0;
10920	int i;
10921
10922	snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx_%d", getpid(), kfunc_name, offset,
10923		 __sync_fetch_and_add(&index, 1));
10924
10925	/* sanitize binary_path in the probe name */
10926	for (i = 0; buf[i]; i++) {
10927		if (!isalnum(buf[i]))
10928			buf[i] = '_';
10929	}
10930}
10931
10932static int add_kprobe_event_legacy(const char *probe_name, bool retprobe,
10933				   const char *kfunc_name, size_t offset)
10934{
10935	return append_to_file(tracefs_kprobe_events(), "%c:%s/%s %s+0x%zx",
10936			      retprobe ? 'r' : 'p',
10937			      retprobe ? "kretprobes" : "kprobes",
10938			      probe_name, kfunc_name, offset);
10939}
10940
10941static int remove_kprobe_event_legacy(const char *probe_name, bool retprobe)
10942{
10943	return append_to_file(tracefs_kprobe_events(), "-:%s/%s",
10944			      retprobe ? "kretprobes" : "kprobes", probe_name);
10945}
10946
10947static int determine_kprobe_perf_type_legacy(const char *probe_name, bool retprobe)
10948{
10949	char file[256];
10950
10951	snprintf(file, sizeof(file), "%s/events/%s/%s/id",
10952		 tracefs_path(), retprobe ? "kretprobes" : "kprobes", probe_name);
10953
10954	return parse_uint_from_file(file, "%d\n");
10955}
10956
10957static int perf_event_kprobe_open_legacy(const char *probe_name, bool retprobe,
10958					 const char *kfunc_name, size_t offset, int pid)
10959{
10960	const size_t attr_sz = sizeof(struct perf_event_attr);
10961	struct perf_event_attr attr;
10962	char errmsg[STRERR_BUFSIZE];
10963	int type, pfd, err;
10964
10965	err = add_kprobe_event_legacy(probe_name, retprobe, kfunc_name, offset);
10966	if (err < 0) {
10967		pr_warn("failed to add legacy kprobe event for '%s+0x%zx': %s\n",
10968			kfunc_name, offset,
10969			libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10970		return err;
10971	}
10972	type = determine_kprobe_perf_type_legacy(probe_name, retprobe);
10973	if (type < 0) {
10974		err = type;
10975		pr_warn("failed to determine legacy kprobe event id for '%s+0x%zx': %s\n",
10976			kfunc_name, offset,
10977			libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10978		goto err_clean_legacy;
10979	}
10980
10981	memset(&attr, 0, attr_sz);
10982	attr.size = attr_sz;
10983	attr.config = type;
10984	attr.type = PERF_TYPE_TRACEPOINT;
10985
10986	pfd = syscall(__NR_perf_event_open, &attr,
10987		      pid < 0 ? -1 : pid, /* pid */
10988		      pid == -1 ? 0 : -1, /* cpu */
10989		      -1 /* group_fd */,  PERF_FLAG_FD_CLOEXEC);
10990	if (pfd < 0) {
10991		err = -errno;
10992		pr_warn("legacy kprobe perf_event_open() failed: %s\n",
10993			libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10994		goto err_clean_legacy;
10995	}
10996	return pfd;
10997
10998err_clean_legacy:
10999	/* Clear the newly added legacy kprobe_event */
11000	remove_kprobe_event_legacy(probe_name, retprobe);
11001	return err;
11002}
11003
11004static const char *arch_specific_syscall_pfx(void)
11005{
11006#if defined(__x86_64__)
11007	return "x64";
11008#elif defined(__i386__)
11009	return "ia32";
11010#elif defined(__s390x__)
11011	return "s390x";
11012#elif defined(__s390__)
11013	return "s390";
11014#elif defined(__arm__)
11015	return "arm";
11016#elif defined(__aarch64__)
11017	return "arm64";
11018#elif defined(__mips__)
11019	return "mips";
11020#elif defined(__riscv)
11021	return "riscv";
11022#elif defined(__powerpc__)
11023	return "powerpc";
11024#elif defined(__powerpc64__)
11025	return "powerpc64";
11026#else
11027	return NULL;
11028#endif
11029}
11030
11031static int probe_kern_syscall_wrapper(void)
11032{
11033	char syscall_name[64];
11034	const char *ksys_pfx;
11035
11036	ksys_pfx = arch_specific_syscall_pfx();
11037	if (!ksys_pfx)
11038		return 0;
11039
11040	snprintf(syscall_name, sizeof(syscall_name), "__%s_sys_bpf", ksys_pfx);
11041
11042	if (determine_kprobe_perf_type() >= 0) {
11043		int pfd;
11044
11045		pfd = perf_event_open_probe(false, false, syscall_name, 0, getpid(), 0);
11046		if (pfd >= 0)
11047			close(pfd);
11048
11049		return pfd >= 0 ? 1 : 0;
11050	} else { /* legacy mode */
11051		char probe_name[128];
11052
11053		gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name), syscall_name, 0);
11054		if (add_kprobe_event_legacy(probe_name, false, syscall_name, 0) < 0)
11055			return 0;
11056
11057		(void)remove_kprobe_event_legacy(probe_name, false);
11058		return 1;
11059	}
11060}
11061
11062struct bpf_link *
11063bpf_program__attach_kprobe_opts(const struct bpf_program *prog,
11064				const char *func_name,
11065				const struct bpf_kprobe_opts *opts)
11066{
11067	DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);
11068	enum probe_attach_mode attach_mode;
11069	char errmsg[STRERR_BUFSIZE];
11070	char *legacy_probe = NULL;
11071	struct bpf_link *link;
11072	size_t offset;
11073	bool retprobe, legacy;
11074	int pfd, err;
11075
11076	if (!OPTS_VALID(opts, bpf_kprobe_opts))
11077		return libbpf_err_ptr(-EINVAL);
11078
11079	attach_mode = OPTS_GET(opts, attach_mode, PROBE_ATTACH_MODE_DEFAULT);
11080	retprobe = OPTS_GET(opts, retprobe, false);
11081	offset = OPTS_GET(opts, offset, 0);
11082	pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
11083
11084	legacy = determine_kprobe_perf_type() < 0;
11085	switch (attach_mode) {
11086	case PROBE_ATTACH_MODE_LEGACY:
11087		legacy = true;
11088		pe_opts.force_ioctl_attach = true;
11089		break;
11090	case PROBE_ATTACH_MODE_PERF:
11091		if (legacy)
11092			return libbpf_err_ptr(-ENOTSUP);
11093		pe_opts.force_ioctl_attach = true;
11094		break;
11095	case PROBE_ATTACH_MODE_LINK:
11096		if (legacy || !kernel_supports(prog->obj, FEAT_PERF_LINK))
11097			return libbpf_err_ptr(-ENOTSUP);
11098		break;
11099	case PROBE_ATTACH_MODE_DEFAULT:
11100		break;
11101	default:
11102		return libbpf_err_ptr(-EINVAL);
11103	}
11104
11105	if (!legacy) {
11106		pfd = perf_event_open_probe(false /* uprobe */, retprobe,
11107					    func_name, offset,
11108					    -1 /* pid */, 0 /* ref_ctr_off */);
11109	} else {
11110		char probe_name[256];
11111
11112		gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name),
11113					     func_name, offset);
11114
11115		legacy_probe = strdup(probe_name);
11116		if (!legacy_probe)
11117			return libbpf_err_ptr(-ENOMEM);
11118
11119		pfd = perf_event_kprobe_open_legacy(legacy_probe, retprobe, func_name,
11120						    offset, -1 /* pid */);
11121	}
11122	if (pfd < 0) {
11123		err = -errno;
11124		pr_warn("prog '%s': failed to create %s '%s+0x%zx' perf event: %s\n",
11125			prog->name, retprobe ? "kretprobe" : "kprobe",
11126			func_name, offset,
11127			libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
11128		goto err_out;
11129	}
11130	link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts);
11131	err = libbpf_get_error(link);
11132	if (err) {
11133		close(pfd);
11134		pr_warn("prog '%s': failed to attach to %s '%s+0x%zx': %s\n",
11135			prog->name, retprobe ? "kretprobe" : "kprobe",
11136			func_name, offset,
11137			libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
11138		goto err_clean_legacy;
11139	}
11140	if (legacy) {
11141		struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
11142
11143		perf_link->legacy_probe_name = legacy_probe;
11144		perf_link->legacy_is_kprobe = true;
11145		perf_link->legacy_is_retprobe = retprobe;
11146	}
11147
11148	return link;
11149
11150err_clean_legacy:
11151	if (legacy)
11152		remove_kprobe_event_legacy(legacy_probe, retprobe);
11153err_out:
11154	free(legacy_probe);
11155	return libbpf_err_ptr(err);
11156}
11157
11158struct bpf_link *bpf_program__attach_kprobe(const struct bpf_program *prog,
11159					    bool retprobe,
11160					    const char *func_name)
11161{
11162	DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts,
11163		.retprobe = retprobe,
11164	);
11165
11166	return bpf_program__attach_kprobe_opts(prog, func_name, &opts);
11167}
11168
11169struct bpf_link *bpf_program__attach_ksyscall(const struct bpf_program *prog,
11170					      const char *syscall_name,
11171					      const struct bpf_ksyscall_opts *opts)
11172{
11173	LIBBPF_OPTS(bpf_kprobe_opts, kprobe_opts);
11174	char func_name[128];
11175
11176	if (!OPTS_VALID(opts, bpf_ksyscall_opts))
11177		return libbpf_err_ptr(-EINVAL);
11178
11179	if (kernel_supports(prog->obj, FEAT_SYSCALL_WRAPPER)) {
11180		/* arch_specific_syscall_pfx() should never return NULL here
11181		 * because it is guarded by kernel_supports(). However, since
11182		 * compiler does not know that we have an explicit conditional
11183		 * as well.
11184		 */
11185		snprintf(func_name, sizeof(func_name), "__%s_sys_%s",
11186			 arch_specific_syscall_pfx() ? : "", syscall_name);
11187	} else {
11188		snprintf(func_name, sizeof(func_name), "__se_sys_%s", syscall_name);
11189	}
11190
11191	kprobe_opts.retprobe = OPTS_GET(opts, retprobe, false);
11192	kprobe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
11193
11194	return bpf_program__attach_kprobe_opts(prog, func_name, &kprobe_opts);
11195}
11196
11197/* Adapted from perf/util/string.c */
11198bool glob_match(const char *str, const char *pat)
11199{
11200	while (*str && *pat && *pat != '*') {
11201		if (*pat == '?') {      /* Matches any single character */
11202			str++;
11203			pat++;
11204			continue;
11205		}
11206		if (*str != *pat)
11207			return false;
11208		str++;
11209		pat++;
11210	}
11211	/* Check wild card */
11212	if (*pat == '*') {
11213		while (*pat == '*')
11214			pat++;
11215		if (!*pat) /* Tail wild card matches all */
11216			return true;
11217		while (*str)
11218			if (glob_match(str++, pat))
11219				return true;
11220	}
11221	return !*str && !*pat;
11222}
11223
11224struct kprobe_multi_resolve {
11225	const char *pattern;
11226	unsigned long *addrs;
11227	size_t cap;
11228	size_t cnt;
11229};
11230
11231struct avail_kallsyms_data {
11232	char **syms;
11233	size_t cnt;
11234	struct kprobe_multi_resolve *res;
11235};
11236
11237static int avail_func_cmp(const void *a, const void *b)
11238{
11239	return strcmp(*(const char **)a, *(const char **)b);
11240}
11241
11242static int avail_kallsyms_cb(unsigned long long sym_addr, char sym_type,
11243			     const char *sym_name, void *ctx)
11244{
11245	struct avail_kallsyms_data *data = ctx;
11246	struct kprobe_multi_resolve *res = data->res;
11247	int err;
11248
11249	if (!bsearch(&sym_name, data->syms, data->cnt, sizeof(*data->syms), avail_func_cmp))
11250		return 0;
11251
11252	err = libbpf_ensure_mem((void **)&res->addrs, &res->cap, sizeof(*res->addrs), res->cnt + 1);
11253	if (err)
11254		return err;
11255
11256	res->addrs[res->cnt++] = (unsigned long)sym_addr;
11257	return 0;
11258}
11259
11260static int libbpf_available_kallsyms_parse(struct kprobe_multi_resolve *res)
11261{
11262	const char *available_functions_file = tracefs_available_filter_functions();
11263	struct avail_kallsyms_data data;
11264	char sym_name[500];
11265	FILE *f;
11266	int err = 0, ret, i;
11267	char **syms = NULL;
11268	size_t cap = 0, cnt = 0;
11269
11270	f = fopen(available_functions_file, "re");
11271	if (!f) {
11272		err = -errno;
11273		pr_warn("failed to open %s: %d\n", available_functions_file, err);
11274		return err;
11275	}
11276
11277	while (true) {
11278		char *name;
11279
11280		ret = fscanf(f, "%499s%*[^\n]\n", sym_name);
11281		if (ret == EOF && feof(f))
11282			break;
11283
11284		if (ret != 1) {
11285			pr_warn("failed to parse available_filter_functions entry: %d\n", ret);
11286			err = -EINVAL;
11287			goto cleanup;
11288		}
11289
11290		if (!glob_match(sym_name, res->pattern))
11291			continue;
11292
11293		err = libbpf_ensure_mem((void **)&syms, &cap, sizeof(*syms), cnt + 1);
11294		if (err)
11295			goto cleanup;
11296
11297		name = strdup(sym_name);
11298		if (!name) {
11299			err = -errno;
11300			goto cleanup;
11301		}
11302
11303		syms[cnt++] = name;
11304	}
11305
11306	/* no entries found, bail out */
11307	if (cnt == 0) {
11308		err = -ENOENT;
11309		goto cleanup;
11310	}
11311
11312	/* sort available functions */
11313	qsort(syms, cnt, sizeof(*syms), avail_func_cmp);
11314
11315	data.syms = syms;
11316	data.res = res;
11317	data.cnt = cnt;
11318	libbpf_kallsyms_parse(avail_kallsyms_cb, &data);
11319
11320	if (res->cnt == 0)
11321		err = -ENOENT;
11322
11323cleanup:
11324	for (i = 0; i < cnt; i++)
11325		free((char *)syms[i]);
11326	free(syms);
11327
11328	fclose(f);
11329	return err;
11330}
11331
11332static bool has_available_filter_functions_addrs(void)
11333{
11334	return access(tracefs_available_filter_functions_addrs(), R_OK) != -1;
11335}
11336
11337static int libbpf_available_kprobes_parse(struct kprobe_multi_resolve *res)
11338{
11339	const char *available_path = tracefs_available_filter_functions_addrs();
11340	char sym_name[500];
11341	FILE *f;
11342	int ret, err = 0;
11343	unsigned long long sym_addr;
11344
11345	f = fopen(available_path, "re");
11346	if (!f) {
11347		err = -errno;
11348		pr_warn("failed to open %s: %d\n", available_path, err);
11349		return err;
11350	}
11351
11352	while (true) {
11353		ret = fscanf(f, "%llx %499s%*[^\n]\n", &sym_addr, sym_name);
11354		if (ret == EOF && feof(f))
11355			break;
11356
11357		if (ret != 2) {
11358			pr_warn("failed to parse available_filter_functions_addrs entry: %d\n",
11359				ret);
11360			err = -EINVAL;
11361			goto cleanup;
11362		}
11363
11364		if (!glob_match(sym_name, res->pattern))
11365			continue;
11366
11367		err = libbpf_ensure_mem((void **)&res->addrs, &res->cap,
11368					sizeof(*res->addrs), res->cnt + 1);
11369		if (err)
11370			goto cleanup;
11371
11372		res->addrs[res->cnt++] = (unsigned long)sym_addr;
11373	}
11374
11375	if (res->cnt == 0)
11376		err = -ENOENT;
11377
11378cleanup:
11379	fclose(f);
11380	return err;
11381}
11382
11383struct bpf_link *
11384bpf_program__attach_kprobe_multi_opts(const struct bpf_program *prog,
11385				      const char *pattern,
11386				      const struct bpf_kprobe_multi_opts *opts)
11387{
11388	LIBBPF_OPTS(bpf_link_create_opts, lopts);
11389	struct kprobe_multi_resolve res = {
11390		.pattern = pattern,
11391	};
11392	struct bpf_link *link = NULL;
11393	char errmsg[STRERR_BUFSIZE];
11394	const unsigned long *addrs;
11395	int err, link_fd, prog_fd;
11396	const __u64 *cookies;
11397	const char **syms;
11398	bool retprobe;
11399	size_t cnt;
11400
11401	if (!OPTS_VALID(opts, bpf_kprobe_multi_opts))
11402		return libbpf_err_ptr(-EINVAL);
11403
11404	syms    = OPTS_GET(opts, syms, false);
11405	addrs   = OPTS_GET(opts, addrs, false);
11406	cnt     = OPTS_GET(opts, cnt, false);
11407	cookies = OPTS_GET(opts, cookies, false);
11408
11409	if (!pattern && !addrs && !syms)
11410		return libbpf_err_ptr(-EINVAL);
11411	if (pattern && (addrs || syms || cookies || cnt))
11412		return libbpf_err_ptr(-EINVAL);
11413	if (!pattern && !cnt)
11414		return libbpf_err_ptr(-EINVAL);
11415	if (addrs && syms)
11416		return libbpf_err_ptr(-EINVAL);
11417
11418	if (pattern) {
11419		if (has_available_filter_functions_addrs())
11420			err = libbpf_available_kprobes_parse(&res);
11421		else
11422			err = libbpf_available_kallsyms_parse(&res);
11423		if (err)
11424			goto error;
11425		addrs = res.addrs;
11426		cnt = res.cnt;
11427	}
11428
11429	retprobe = OPTS_GET(opts, retprobe, false);
11430
11431	lopts.kprobe_multi.syms = syms;
11432	lopts.kprobe_multi.addrs = addrs;
11433	lopts.kprobe_multi.cookies = cookies;
11434	lopts.kprobe_multi.cnt = cnt;
11435	lopts.kprobe_multi.flags = retprobe ? BPF_F_KPROBE_MULTI_RETURN : 0;
11436
11437	link = calloc(1, sizeof(*link));
11438	if (!link) {
11439		err = -ENOMEM;
11440		goto error;
11441	}
11442	link->detach = &bpf_link__detach_fd;
11443
11444	prog_fd = bpf_program__fd(prog);
11445	link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_KPROBE_MULTI, &lopts);
11446	if (link_fd < 0) {
11447		err = -errno;
11448		pr_warn("prog '%s': failed to attach: %s\n",
11449			prog->name, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
11450		goto error;
11451	}
11452	link->fd = link_fd;
11453	free(res.addrs);
11454	return link;
11455
11456error:
11457	free(link);
11458	free(res.addrs);
11459	return libbpf_err_ptr(err);
11460}
11461
11462static int attach_kprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link)
11463{
11464	DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts);
11465	unsigned long offset = 0;
11466	const char *func_name;
11467	char *func;
11468	int n;
11469
11470	*link = NULL;
11471
11472	/* no auto-attach for SEC("kprobe") and SEC("kretprobe") */
11473	if (strcmp(prog->sec_name, "kprobe") == 0 || strcmp(prog->sec_name, "kretprobe") == 0)
11474		return 0;
11475
11476	opts.retprobe = str_has_pfx(prog->sec_name, "kretprobe/");
11477	if (opts.retprobe)
11478		func_name = prog->sec_name + sizeof("kretprobe/") - 1;
11479	else
11480		func_name = prog->sec_name + sizeof("kprobe/") - 1;
11481
11482	n = sscanf(func_name, "%m[a-zA-Z0-9_.]+%li", &func, &offset);
11483	if (n < 1) {
11484		pr_warn("kprobe name is invalid: %s\n", func_name);
11485		return -EINVAL;
11486	}
11487	if (opts.retprobe && offset != 0) {
11488		free(func);
11489		pr_warn("kretprobes do not support offset specification\n");
11490		return -EINVAL;
11491	}
11492
11493	opts.offset = offset;
11494	*link = bpf_program__attach_kprobe_opts(prog, func, &opts);
11495	free(func);
11496	return libbpf_get_error(*link);
11497}
11498
11499static int attach_ksyscall(const struct bpf_program *prog, long cookie, struct bpf_link **link)
11500{
11501	LIBBPF_OPTS(bpf_ksyscall_opts, opts);
11502	const char *syscall_name;
11503
11504	*link = NULL;
11505
11506	/* no auto-attach for SEC("ksyscall") and SEC("kretsyscall") */
11507	if (strcmp(prog->sec_name, "ksyscall") == 0 || strcmp(prog->sec_name, "kretsyscall") == 0)
11508		return 0;
11509
11510	opts.retprobe = str_has_pfx(prog->sec_name, "kretsyscall/");
11511	if (opts.retprobe)
11512		syscall_name = prog->sec_name + sizeof("kretsyscall/") - 1;
11513	else
11514		syscall_name = prog->sec_name + sizeof("ksyscall/") - 1;
11515
11516	*link = bpf_program__attach_ksyscall(prog, syscall_name, &opts);
11517	return *link ? 0 : -errno;
11518}
11519
11520static int attach_kprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link)
11521{
11522	LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
11523	const char *spec;
11524	char *pattern;
11525	int n;
11526
11527	*link = NULL;
11528
11529	/* no auto-attach for SEC("kprobe.multi") and SEC("kretprobe.multi") */
11530	if (strcmp(prog->sec_name, "kprobe.multi") == 0 ||
11531	    strcmp(prog->sec_name, "kretprobe.multi") == 0)
11532		return 0;
11533
11534	opts.retprobe = str_has_pfx(prog->sec_name, "kretprobe.multi/");
11535	if (opts.retprobe)
11536		spec = prog->sec_name + sizeof("kretprobe.multi/") - 1;
11537	else
11538		spec = prog->sec_name + sizeof("kprobe.multi/") - 1;
11539
11540	n = sscanf(spec, "%m[a-zA-Z0-9_.*?]", &pattern);
11541	if (n < 1) {
11542		pr_warn("kprobe multi pattern is invalid: %s\n", pattern);
11543		return -EINVAL;
11544	}
11545
11546	*link = bpf_program__attach_kprobe_multi_opts(prog, pattern, &opts);
11547	free(pattern);
11548	return libbpf_get_error(*link);
11549}
11550
11551static int attach_uprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link)
11552{
11553	char *probe_type = NULL, *binary_path = NULL, *func_name = NULL;
11554	LIBBPF_OPTS(bpf_uprobe_multi_opts, opts);
11555	int n, ret = -EINVAL;
11556
11557	*link = NULL;
11558
11559	n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%m[^\n]",
11560		   &probe_type, &binary_path, &func_name);
11561	switch (n) {
11562	case 1:
11563		/* handle SEC("u[ret]probe") - format is valid, but auto-attach is impossible. */
11564		ret = 0;
11565		break;
11566	case 3:
11567		opts.retprobe = strcmp(probe_type, "uretprobe.multi") == 0;
11568		*link = bpf_program__attach_uprobe_multi(prog, -1, binary_path, func_name, &opts);
11569		ret = libbpf_get_error(*link);
11570		break;
11571	default:
11572		pr_warn("prog '%s': invalid format of section definition '%s'\n", prog->name,
11573			prog->sec_name);
11574		break;
11575	}
11576	free(probe_type);
11577	free(binary_path);
11578	free(func_name);
11579	return ret;
11580}
11581
11582static void gen_uprobe_legacy_event_name(char *buf, size_t buf_sz,
11583					 const char *binary_path, uint64_t offset)
11584{
11585	int i;
11586
11587	snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx", getpid(), binary_path, (size_t)offset);
11588
11589	/* sanitize binary_path in the probe name */
11590	for (i = 0; buf[i]; i++) {
11591		if (!isalnum(buf[i]))
11592			buf[i] = '_';
11593	}
11594}
11595
11596static inline int add_uprobe_event_legacy(const char *probe_name, bool retprobe,
11597					  const char *binary_path, size_t offset)
11598{
11599	return append_to_file(tracefs_uprobe_events(), "%c:%s/%s %s:0x%zx",
11600			      retprobe ? 'r' : 'p',
11601			      retprobe ? "uretprobes" : "uprobes",
11602			      probe_name, binary_path, offset);
11603}
11604
11605static inline int remove_uprobe_event_legacy(const char *probe_name, bool retprobe)
11606{
11607	return append_to_file(tracefs_uprobe_events(), "-:%s/%s",
11608			      retprobe ? "uretprobes" : "uprobes", probe_name);
11609}
11610
11611static int determine_uprobe_perf_type_legacy(const char *probe_name, bool retprobe)
11612{
11613	char file[512];
11614
11615	snprintf(file, sizeof(file), "%s/events/%s/%s/id",
11616		 tracefs_path(), retprobe ? "uretprobes" : "uprobes", probe_name);
11617
11618	return parse_uint_from_file(file, "%d\n");
11619}
11620
11621static int perf_event_uprobe_open_legacy(const char *probe_name, bool retprobe,
11622					 const char *binary_path, size_t offset, int pid)
11623{
11624	const size_t attr_sz = sizeof(struct perf_event_attr);
11625	struct perf_event_attr attr;
11626	int type, pfd, err;
11627
11628	err = add_uprobe_event_legacy(probe_name, retprobe, binary_path, offset);
11629	if (err < 0) {
11630		pr_warn("failed to add legacy uprobe event for %s:0x%zx: %d\n",
11631			binary_path, (size_t)offset, err);
11632		return err;
11633	}
11634	type = determine_uprobe_perf_type_legacy(probe_name, retprobe);
11635	if (type < 0) {
11636		err = type;
11637		pr_warn("failed to determine legacy uprobe event id for %s:0x%zx: %d\n",
11638			binary_path, offset, err);
11639		goto err_clean_legacy;
11640	}
11641
11642	memset(&attr, 0, attr_sz);
11643	attr.size = attr_sz;
11644	attr.config = type;
11645	attr.type = PERF_TYPE_TRACEPOINT;
11646
11647	pfd = syscall(__NR_perf_event_open, &attr,
11648		      pid < 0 ? -1 : pid, /* pid */
11649		      pid == -1 ? 0 : -1, /* cpu */
11650		      -1 /* group_fd */,  PERF_FLAG_FD_CLOEXEC);
11651	if (pfd < 0) {
11652		err = -errno;
11653		pr_warn("legacy uprobe perf_event_open() failed: %d\n", err);
11654		goto err_clean_legacy;
11655	}
11656	return pfd;
11657
11658err_clean_legacy:
11659	/* Clear the newly added legacy uprobe_event */
11660	remove_uprobe_event_legacy(probe_name, retprobe);
11661	return err;
11662}
11663
11664/* Find offset of function name in archive specified by path. Currently
11665 * supported are .zip files that do not compress their contents, as used on
11666 * Android in the form of APKs, for example. "file_name" is the name of the ELF
11667 * file inside the archive. "func_name" matches symbol name or name@@LIB for
11668 * library functions.
11669 *
11670 * An overview of the APK format specifically provided here:
11671 * https://en.wikipedia.org/w/index.php?title=Apk_(file_format)&oldid=1139099120#Package_contents
11672 */
11673static long elf_find_func_offset_from_archive(const char *archive_path, const char *file_name,
11674					      const char *func_name)
11675{
11676	struct zip_archive *archive;
11677	struct zip_entry entry;
11678	long ret;
11679	Elf *elf;
11680
11681	archive = zip_archive_open(archive_path);
11682	if (IS_ERR(archive)) {
11683		ret = PTR_ERR(archive);
11684		pr_warn("zip: failed to open %s: %ld\n", archive_path, ret);
11685		return ret;
11686	}
11687
11688	ret = zip_archive_find_entry(archive, file_name, &entry);
11689	if (ret) {
11690		pr_warn("zip: could not find archive member %s in %s: %ld\n", file_name,
11691			archive_path, ret);
11692		goto out;
11693	}
11694	pr_debug("zip: found entry for %s in %s at 0x%lx\n", file_name, archive_path,
11695		 (unsigned long)entry.data_offset);
11696
11697	if (entry.compression) {
11698		pr_warn("zip: entry %s of %s is compressed and cannot be handled\n", file_name,
11699			archive_path);
11700		ret = -LIBBPF_ERRNO__FORMAT;
11701		goto out;
11702	}
11703
11704	elf = elf_memory((void *)entry.data, entry.data_length);
11705	if (!elf) {
11706		pr_warn("elf: could not read elf file %s from %s: %s\n", file_name, archive_path,
11707			elf_errmsg(-1));
11708		ret = -LIBBPF_ERRNO__LIBELF;
11709		goto out;
11710	}
11711
11712	ret = elf_find_func_offset(elf, file_name, func_name);
11713	if (ret > 0) {
11714		pr_debug("elf: symbol address match for %s of %s in %s: 0x%x + 0x%lx = 0x%lx\n",
11715			 func_name, file_name, archive_path, entry.data_offset, ret,
11716			 ret + entry.data_offset);
11717		ret += entry.data_offset;
11718	}
11719	elf_end(elf);
11720
11721out:
11722	zip_archive_close(archive);
11723	return ret;
11724}
11725
11726static const char *arch_specific_lib_paths(void)
11727{
11728	/*
11729	 * Based on https://packages.debian.org/sid/libc6.
11730	 *
11731	 * Assume that the traced program is built for the same architecture
11732	 * as libbpf, which should cover the vast majority of cases.
11733	 */
11734#if defined(__x86_64__)
11735	return "/lib/x86_64-linux-gnu";
11736#elif defined(__i386__)
11737	return "/lib/i386-linux-gnu";
11738#elif defined(__s390x__)
11739	return "/lib/s390x-linux-gnu";
11740#elif defined(__s390__)
11741	return "/lib/s390-linux-gnu";
11742#elif defined(__arm__) && defined(__SOFTFP__)
11743	return "/lib/arm-linux-gnueabi";
11744#elif defined(__arm__) && !defined(__SOFTFP__)
11745	return "/lib/arm-linux-gnueabihf";
11746#elif defined(__aarch64__)
11747	return "/lib/aarch64-linux-gnu";
11748#elif defined(__mips__) && defined(__MIPSEL__) && _MIPS_SZLONG == 64
11749	return "/lib/mips64el-linux-gnuabi64";
11750#elif defined(__mips__) && defined(__MIPSEL__) && _MIPS_SZLONG == 32
11751	return "/lib/mipsel-linux-gnu";
11752#elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
11753	return "/lib/powerpc64le-linux-gnu";
11754#elif defined(__sparc__) && defined(__arch64__)
11755	return "/lib/sparc64-linux-gnu";
11756#elif defined(__riscv) && __riscv_xlen == 64
11757	return "/lib/riscv64-linux-gnu";
11758#else
11759	return NULL;
11760#endif
11761}
11762
11763/* Get full path to program/shared library. */
11764static int resolve_full_path(const char *file, char *result, size_t result_sz)
11765{
11766	const char *search_paths[3] = {};
11767	int i, perm;
11768
11769	if (str_has_sfx(file, ".so") || strstr(file, ".so.")) {
11770		search_paths[0] = getenv("LD_LIBRARY_PATH");
11771		search_paths[1] = "/usr/lib64:/usr/lib";
11772		search_paths[2] = arch_specific_lib_paths();
11773		perm = R_OK;
11774	} else {
11775		search_paths[0] = getenv("PATH");
11776		search_paths[1] = "/usr/bin:/usr/sbin";
11777		perm = R_OK | X_OK;
11778	}
11779
11780	for (i = 0; i < ARRAY_SIZE(search_paths); i++) {
11781		const char *s;
11782
11783		if (!search_paths[i])
11784			continue;
11785		for (s = search_paths[i]; s != NULL; s = strchr(s, ':')) {
11786			char *next_path;
11787			int seg_len;
11788
11789			if (s[0] == ':')
11790				s++;
11791			next_path = strchr(s, ':');
11792			seg_len = next_path ? next_path - s : strlen(s);
11793			if (!seg_len)
11794				continue;
11795			snprintf(result, result_sz, "%.*s/%s", seg_len, s, file);
11796			/* ensure it has required permissions */
11797			if (faccessat(AT_FDCWD, result, perm, AT_EACCESS) < 0)
11798				continue;
11799			pr_debug("resolved '%s' to '%s'\n", file, result);
11800			return 0;
11801		}
11802	}
11803	return -ENOENT;
11804}
11805
11806struct bpf_link *
11807bpf_program__attach_uprobe_multi(const struct bpf_program *prog,
11808				 pid_t pid,
11809				 const char *path,
11810				 const char *func_pattern,
11811				 const struct bpf_uprobe_multi_opts *opts)
11812{
11813	const unsigned long *ref_ctr_offsets = NULL, *offsets = NULL;
11814	LIBBPF_OPTS(bpf_link_create_opts, lopts);
11815	unsigned long *resolved_offsets = NULL;
11816	int err = 0, link_fd, prog_fd;
11817	struct bpf_link *link = NULL;
11818	char errmsg[STRERR_BUFSIZE];
11819	char full_path[PATH_MAX];
11820	const __u64 *cookies;
11821	const char **syms;
11822	size_t cnt;
11823
11824	if (!OPTS_VALID(opts, bpf_uprobe_multi_opts))
11825		return libbpf_err_ptr(-EINVAL);
11826
11827	syms = OPTS_GET(opts, syms, NULL);
11828	offsets = OPTS_GET(opts, offsets, NULL);
11829	ref_ctr_offsets = OPTS_GET(opts, ref_ctr_offsets, NULL);
11830	cookies = OPTS_GET(opts, cookies, NULL);
11831	cnt = OPTS_GET(opts, cnt, 0);
11832
11833	/*
11834	 * User can specify 2 mutually exclusive set of inputs:
11835	 *
11836	 * 1) use only path/func_pattern/pid arguments
11837	 *
11838	 * 2) use path/pid with allowed combinations of:
11839	 *    syms/offsets/ref_ctr_offsets/cookies/cnt
11840	 *
11841	 *    - syms and offsets are mutually exclusive
11842	 *    - ref_ctr_offsets and cookies are optional
11843	 *
11844	 * Any other usage results in error.
11845	 */
11846
11847	if (!path)
11848		return libbpf_err_ptr(-EINVAL);
11849	if (!func_pattern && cnt == 0)
11850		return libbpf_err_ptr(-EINVAL);
11851
11852	if (func_pattern) {
11853		if (syms || offsets || ref_ctr_offsets || cookies || cnt)
11854			return libbpf_err_ptr(-EINVAL);
11855	} else {
11856		if (!!syms == !!offsets)
11857			return libbpf_err_ptr(-EINVAL);
11858	}
11859
11860	if (func_pattern) {
11861		if (!strchr(path, '/')) {
11862			err = resolve_full_path(path, full_path, sizeof(full_path));
11863			if (err) {
11864				pr_warn("prog '%s': failed to resolve full path for '%s': %d\n",
11865					prog->name, path, err);
11866				return libbpf_err_ptr(err);
11867			}
11868			path = full_path;
11869		}
11870
11871		err = elf_resolve_pattern_offsets(path, func_pattern,
11872						  &resolved_offsets, &cnt);
11873		if (err < 0)
11874			return libbpf_err_ptr(err);
11875		offsets = resolved_offsets;
11876	} else if (syms) {
11877		err = elf_resolve_syms_offsets(path, cnt, syms, &resolved_offsets, STT_FUNC);
11878		if (err < 0)
11879			return libbpf_err_ptr(err);
11880		offsets = resolved_offsets;
11881	}
11882
11883	lopts.uprobe_multi.path = path;
11884	lopts.uprobe_multi.offsets = offsets;
11885	lopts.uprobe_multi.ref_ctr_offsets = ref_ctr_offsets;
11886	lopts.uprobe_multi.cookies = cookies;
11887	lopts.uprobe_multi.cnt = cnt;
11888	lopts.uprobe_multi.flags = OPTS_GET(opts, retprobe, false) ? BPF_F_UPROBE_MULTI_RETURN : 0;
11889
11890	if (pid == 0)
11891		pid = getpid();
11892	if (pid > 0)
11893		lopts.uprobe_multi.pid = pid;
11894
11895	link = calloc(1, sizeof(*link));
11896	if (!link) {
11897		err = -ENOMEM;
11898		goto error;
11899	}
11900	link->detach = &bpf_link__detach_fd;
11901
11902	prog_fd = bpf_program__fd(prog);
11903	link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &lopts);
11904	if (link_fd < 0) {
11905		err = -errno;
11906		pr_warn("prog '%s': failed to attach multi-uprobe: %s\n",
11907			prog->name, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
11908		goto error;
11909	}
11910	link->fd = link_fd;
11911	free(resolved_offsets);
11912	return link;
11913
11914error:
11915	free(resolved_offsets);
11916	free(link);
11917	return libbpf_err_ptr(err);
11918}
11919
11920LIBBPF_API struct bpf_link *
11921bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid,
11922				const char *binary_path, size_t func_offset,
11923				const struct bpf_uprobe_opts *opts)
11924{
11925	const char *archive_path = NULL, *archive_sep = NULL;
11926	char errmsg[STRERR_BUFSIZE], *legacy_probe = NULL;
11927	DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);
11928	enum probe_attach_mode attach_mode;
11929	char full_path[PATH_MAX];
11930	struct bpf_link *link;
11931	size_t ref_ctr_off;
11932	int pfd, err;
11933	bool retprobe, legacy;
11934	const char *func_name;
11935
11936	if (!OPTS_VALID(opts, bpf_uprobe_opts))
11937		return libbpf_err_ptr(-EINVAL);
11938
11939	attach_mode = OPTS_GET(opts, attach_mode, PROBE_ATTACH_MODE_DEFAULT);
11940	retprobe = OPTS_GET(opts, retprobe, false);
11941	ref_ctr_off = OPTS_GET(opts, ref_ctr_offset, 0);
11942	pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
11943
11944	if (!binary_path)
11945		return libbpf_err_ptr(-EINVAL);
11946
11947	/* Check if "binary_path" refers to an archive. */
11948	archive_sep = strstr(binary_path, "!/");
11949	if (archive_sep) {
11950		full_path[0] = '\0';
11951		libbpf_strlcpy(full_path, binary_path,
11952			       min(sizeof(full_path), (size_t)(archive_sep - binary_path + 1)));
11953		archive_path = full_path;
11954		binary_path = archive_sep + 2;
11955	} else if (!strchr(binary_path, '/')) {
11956		err = resolve_full_path(binary_path, full_path, sizeof(full_path));
11957		if (err) {
11958			pr_warn("prog '%s': failed to resolve full path for '%s': %d\n",
11959				prog->name, binary_path, err);
11960			return libbpf_err_ptr(err);
11961		}
11962		binary_path = full_path;
11963	}
11964	func_name = OPTS_GET(opts, func_name, NULL);
11965	if (func_name) {
11966		long sym_off;
11967
11968		if (archive_path) {
11969			sym_off = elf_find_func_offset_from_archive(archive_path, binary_path,
11970								    func_name);
11971			binary_path = archive_path;
11972		} else {
11973			sym_off = elf_find_func_offset_from_file(binary_path, func_name);
11974		}
11975		if (sym_off < 0)
11976			return libbpf_err_ptr(sym_off);
11977		func_offset += sym_off;
11978	}
11979
11980	legacy = determine_uprobe_perf_type() < 0;
11981	switch (attach_mode) {
11982	case PROBE_ATTACH_MODE_LEGACY:
11983		legacy = true;
11984		pe_opts.force_ioctl_attach = true;
11985		break;
11986	case PROBE_ATTACH_MODE_PERF:
11987		if (legacy)
11988			return libbpf_err_ptr(-ENOTSUP);
11989		pe_opts.force_ioctl_attach = true;
11990		break;
11991	case PROBE_ATTACH_MODE_LINK:
11992		if (legacy || !kernel_supports(prog->obj, FEAT_PERF_LINK))
11993			return libbpf_err_ptr(-ENOTSUP);
11994		break;
11995	case PROBE_ATTACH_MODE_DEFAULT:
11996		break;
11997	default:
11998		return libbpf_err_ptr(-EINVAL);
11999	}
12000
12001	if (!legacy) {
12002		pfd = perf_event_open_probe(true /* uprobe */, retprobe, binary_path,
12003					    func_offset, pid, ref_ctr_off);
12004	} else {
12005		char probe_name[PATH_MAX + 64];
12006
12007		if (ref_ctr_off)
12008			return libbpf_err_ptr(-EINVAL);
12009
12010		gen_uprobe_legacy_event_name(probe_name, sizeof(probe_name),
12011					     binary_path, func_offset);
12012
12013		legacy_probe = strdup(probe_name);
12014		if (!legacy_probe)
12015			return libbpf_err_ptr(-ENOMEM);
12016
12017		pfd = perf_event_uprobe_open_legacy(legacy_probe, retprobe,
12018						    binary_path, func_offset, pid);
12019	}
12020	if (pfd < 0) {
12021		err = -errno;
12022		pr_warn("prog '%s': failed to create %s '%s:0x%zx' perf event: %s\n",
12023			prog->name, retprobe ? "uretprobe" : "uprobe",
12024			binary_path, func_offset,
12025			libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
12026		goto err_out;
12027	}
12028
12029	link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts);
12030	err = libbpf_get_error(link);
12031	if (err) {
12032		close(pfd);
12033		pr_warn("prog '%s': failed to attach to %s '%s:0x%zx': %s\n",
12034			prog->name, retprobe ? "uretprobe" : "uprobe",
12035			binary_path, func_offset,
12036			libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
12037		goto err_clean_legacy;
12038	}
12039	if (legacy) {
12040		struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
12041
12042		perf_link->legacy_probe_name = legacy_probe;
12043		perf_link->legacy_is_kprobe = false;
12044		perf_link->legacy_is_retprobe = retprobe;
12045	}
12046	return link;
12047
12048err_clean_legacy:
12049	if (legacy)
12050		remove_uprobe_event_legacy(legacy_probe, retprobe);
12051err_out:
12052	free(legacy_probe);
12053	return libbpf_err_ptr(err);
12054}
12055
12056/* Format of u[ret]probe section definition supporting auto-attach:
12057 * u[ret]probe/binary:function[+offset]
12058 *
12059 * binary can be an absolute/relative path or a filename; the latter is resolved to a
12060 * full binary path via bpf_program__attach_uprobe_opts.
12061 *
12062 * Specifying uprobe+ ensures we carry out strict matching; either "uprobe" must be
12063 * specified (and auto-attach is not possible) or the above format is specified for
12064 * auto-attach.
12065 */
12066static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link)
12067{
12068	DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts);
12069	char *probe_type = NULL, *binary_path = NULL, *func_name = NULL, *func_off;
12070	int n, c, ret = -EINVAL;
12071	long offset = 0;
12072
12073	*link = NULL;
12074
12075	n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%m[^\n]",
12076		   &probe_type, &binary_path, &func_name);
12077	switch (n) {
12078	case 1:
12079		/* handle SEC("u[ret]probe") - format is valid, but auto-attach is impossible. */
12080		ret = 0;
12081		break;
12082	case 2:
12083		pr_warn("prog '%s': section '%s' missing ':function[+offset]' specification\n",
12084			prog->name, prog->sec_name);
12085		break;
12086	case 3:
12087		/* check if user specifies `+offset`, if yes, this should be
12088		 * the last part of the string, make sure sscanf read to EOL
12089		 */
12090		func_off = strrchr(func_name, '+');
12091		if (func_off) {
12092			n = sscanf(func_off, "+%li%n", &offset, &c);
12093			if (n == 1 && *(func_off + c) == '\0')
12094				func_off[0] = '\0';
12095			else
12096				offset = 0;
12097		}
12098		opts.retprobe = strcmp(probe_type, "uretprobe") == 0 ||
12099				strcmp(probe_type, "uretprobe.s") == 0;
12100		if (opts.retprobe && offset != 0) {
12101			pr_warn("prog '%s': uretprobes do not support offset specification\n",
12102				prog->name);
12103			break;
12104		}
12105		opts.func_name = func_name;
12106		*link = bpf_program__attach_uprobe_opts(prog, -1, binary_path, offset, &opts);
12107		ret = libbpf_get_error(*link);
12108		break;
12109	default:
12110		pr_warn("prog '%s': invalid format of section definition '%s'\n", prog->name,
12111			prog->sec_name);
12112		break;
12113	}
12114	free(probe_type);
12115	free(binary_path);
12116	free(func_name);
12117
12118	return ret;
12119}
12120
12121struct bpf_link *bpf_program__attach_uprobe(const struct bpf_program *prog,
12122					    bool retprobe, pid_t pid,
12123					    const char *binary_path,
12124					    size_t func_offset)
12125{
12126	DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts, .retprobe = retprobe);
12127
12128	return bpf_program__attach_uprobe_opts(prog, pid, binary_path, func_offset, &opts);
12129}
12130
12131struct bpf_link *bpf_program__attach_usdt(const struct bpf_program *prog,
12132					  pid_t pid, const char *binary_path,
12133					  const char *usdt_provider, const char *usdt_name,
12134					  const struct bpf_usdt_opts *opts)
12135{
12136	char resolved_path[512];
12137	struct bpf_object *obj = prog->obj;
12138	struct bpf_link *link;
12139	__u64 usdt_cookie;
12140	int err;
12141
12142	if (!OPTS_VALID(opts, bpf_uprobe_opts))
12143		return libbpf_err_ptr(-EINVAL);
12144
12145	if (bpf_program__fd(prog) < 0) {
12146		pr_warn("prog '%s': can't attach BPF program w/o FD (did you load it?)\n",
12147			prog->name);
12148		return libbpf_err_ptr(-EINVAL);
12149	}
12150
12151	if (!binary_path)
12152		return libbpf_err_ptr(-EINVAL);
12153
12154	if (!strchr(binary_path, '/')) {
12155		err = resolve_full_path(binary_path, resolved_path, sizeof(resolved_path));
12156		if (err) {
12157			pr_warn("prog '%s': failed to resolve full path for '%s': %d\n",
12158				prog->name, binary_path, err);
12159			return libbpf_err_ptr(err);
12160		}
12161		binary_path = resolved_path;
12162	}
12163
12164	/* USDT manager is instantiated lazily on first USDT attach. It will
12165	 * be destroyed together with BPF object in bpf_object__close().
12166	 */
12167	if (IS_ERR(obj->usdt_man))
12168		return libbpf_ptr(obj->usdt_man);
12169	if (!obj->usdt_man) {
12170		obj->usdt_man = usdt_manager_new(obj);
12171		if (IS_ERR(obj->usdt_man))
12172			return libbpf_ptr(obj->usdt_man);
12173	}
12174
12175	usdt_cookie = OPTS_GET(opts, usdt_cookie, 0);
12176	link = usdt_manager_attach_usdt(obj->usdt_man, prog, pid, binary_path,
12177					usdt_provider, usdt_name, usdt_cookie);
12178	err = libbpf_get_error(link);
12179	if (err)
12180		return libbpf_err_ptr(err);
12181	return link;
12182}
12183
12184static int attach_usdt(const struct bpf_program *prog, long cookie, struct bpf_link **link)
12185{
12186	char *path = NULL, *provider = NULL, *name = NULL;
12187	const char *sec_name;
12188	int n, err;
12189
12190	sec_name = bpf_program__section_name(prog);
12191	if (strcmp(sec_name, "usdt") == 0) {
12192		/* no auto-attach for just SEC("usdt") */
12193		*link = NULL;
12194		return 0;
12195	}
12196
12197	n = sscanf(sec_name, "usdt/%m[^:]:%m[^:]:%m[^:]", &path, &provider, &name);
12198	if (n != 3) {
12199		pr_warn("invalid section '%s', expected SEC(\"usdt/<path>:<provider>:<name>\")\n",
12200			sec_name);
12201		err = -EINVAL;
12202	} else {
12203		*link = bpf_program__attach_usdt(prog, -1 /* any process */, path,
12204						 provider, name, NULL);
12205		err = libbpf_get_error(*link);
12206	}
12207	free(path);
12208	free(provider);
12209	free(name);
12210	return err;
12211}
12212
12213static int determine_tracepoint_id(const char *tp_category,
12214				   const char *tp_name)
12215{
12216	char file[PATH_MAX];
12217	int ret;
12218
12219	ret = snprintf(file, sizeof(file), "%s/events/%s/%s/id",
12220		       tracefs_path(), tp_category, tp_name);
12221	if (ret < 0)
12222		return -errno;
12223	if (ret >= sizeof(file)) {
12224		pr_debug("tracepoint %s/%s path is too long\n",
12225			 tp_category, tp_name);
12226		return -E2BIG;
12227	}
12228	return parse_uint_from_file(file, "%d\n");
12229}
12230
12231static int perf_event_open_tracepoint(const char *tp_category,
12232				      const char *tp_name)
12233{
12234	const size_t attr_sz = sizeof(struct perf_event_attr);
12235	struct perf_event_attr attr;
12236	char errmsg[STRERR_BUFSIZE];
12237	int tp_id, pfd, err;
12238
12239	tp_id = determine_tracepoint_id(tp_category, tp_name);
12240	if (tp_id < 0) {
12241		pr_warn("failed to determine tracepoint '%s/%s' perf event ID: %s\n",
12242			tp_category, tp_name,
12243			libbpf_strerror_r(tp_id, errmsg, sizeof(errmsg)));
12244		return tp_id;
12245	}
12246
12247	memset(&attr, 0, attr_sz);
12248	attr.type = PERF_TYPE_TRACEPOINT;
12249	attr.size = attr_sz;
12250	attr.config = tp_id;
12251
12252	pfd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, 0 /* cpu */,
12253		      -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
12254	if (pfd < 0) {
12255		err = -errno;
12256		pr_warn("tracepoint '%s/%s' perf_event_open() failed: %s\n",
12257			tp_category, tp_name,
12258			libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
12259		return err;
12260	}
12261	return pfd;
12262}
12263
12264struct bpf_link *bpf_program__attach_tracepoint_opts(const struct bpf_program *prog,
12265						     const char *tp_category,
12266						     const char *tp_name,
12267						     const struct bpf_tracepoint_opts *opts)
12268{
12269	DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);
12270	char errmsg[STRERR_BUFSIZE];
12271	struct bpf_link *link;
12272	int pfd, err;
12273
12274	if (!OPTS_VALID(opts, bpf_tracepoint_opts))
12275		return libbpf_err_ptr(-EINVAL);
12276
12277	pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
12278
12279	pfd = perf_event_open_tracepoint(tp_category, tp_name);
12280	if (pfd < 0) {
12281		pr_warn("prog '%s': failed to create tracepoint '%s/%s' perf event: %s\n",
12282			prog->name, tp_category, tp_name,
12283			libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
12284		return libbpf_err_ptr(pfd);
12285	}
12286	link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts);
12287	err = libbpf_get_error(link);
12288	if (err) {
12289		close(pfd);
12290		pr_warn("prog '%s': failed to attach to tracepoint '%s/%s': %s\n",
12291			prog->name, tp_category, tp_name,
12292			libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
12293		return libbpf_err_ptr(err);
12294	}
12295	return link;
12296}
12297
12298struct bpf_link *bpf_program__attach_tracepoint(const struct bpf_program *prog,
12299						const char *tp_category,
12300						const char *tp_name)
12301{
12302	return bpf_program__attach_tracepoint_opts(prog, tp_category, tp_name, NULL);
12303}
12304
12305static int attach_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link)
12306{
12307	char *sec_name, *tp_cat, *tp_name;
12308
12309	*link = NULL;
12310
12311	/* no auto-attach for SEC("tp") or SEC("tracepoint") */
12312	if (strcmp(prog->sec_name, "tp") == 0 || strcmp(prog->sec_name, "tracepoint") == 0)
12313		return 0;
12314
12315	sec_name = strdup(prog->sec_name);
12316	if (!sec_name)
12317		return -ENOMEM;
12318
12319	/* extract "tp/<category>/<name>" or "tracepoint/<category>/<name>" */
12320	if (str_has_pfx(prog->sec_name, "tp/"))
12321		tp_cat = sec_name + sizeof("tp/") - 1;
12322	else
12323		tp_cat = sec_name + sizeof("tracepoint/") - 1;
12324	tp_name = strchr(tp_cat, '/');
12325	if (!tp_name) {
12326		free(sec_name);
12327		return -EINVAL;
12328	}
12329	*tp_name = '\0';
12330	tp_name++;
12331
12332	*link = bpf_program__attach_tracepoint(prog, tp_cat, tp_name);
12333	free(sec_name);
12334	return libbpf_get_error(*link);
12335}
12336
12337struct bpf_link *bpf_program__attach_raw_tracepoint(const struct bpf_program *prog,
12338						    const char *tp_name)
12339{
12340	char errmsg[STRERR_BUFSIZE];
12341	struct bpf_link *link;
12342	int prog_fd, pfd;
12343
12344	prog_fd = bpf_program__fd(prog);
12345	if (prog_fd < 0) {
12346		pr_warn("prog '%s': can't attach before loaded\n", prog->name);
12347		return libbpf_err_ptr(-EINVAL);
12348	}
12349
12350	link = calloc(1, sizeof(*link));
12351	if (!link)
12352		return libbpf_err_ptr(-ENOMEM);
12353	link->detach = &bpf_link__detach_fd;
12354
12355	pfd = bpf_raw_tracepoint_open(tp_name, prog_fd);
12356	if (pfd < 0) {
12357		pfd = -errno;
12358		free(link);
12359		pr_warn("prog '%s': failed to attach to raw tracepoint '%s': %s\n",
12360			prog->name, tp_name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
12361		return libbpf_err_ptr(pfd);
12362	}
12363	link->fd = pfd;
12364	return link;
12365}
12366
12367static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link)
12368{
12369	static const char *const prefixes[] = {
12370		"raw_tp",
12371		"raw_tracepoint",
12372		"raw_tp.w",
12373		"raw_tracepoint.w",
12374	};
12375	size_t i;
12376	const char *tp_name = NULL;
12377
12378	*link = NULL;
12379
12380	for (i = 0; i < ARRAY_SIZE(prefixes); i++) {
12381		size_t pfx_len;
12382
12383		if (!str_has_pfx(prog->sec_name, prefixes[i]))
12384			continue;
12385
12386		pfx_len = strlen(prefixes[i]);
12387		/* no auto-attach case of, e.g., SEC("raw_tp") */
12388		if (prog->sec_name[pfx_len] == '\0')
12389			return 0;
12390
12391		if (prog->sec_name[pfx_len] != '/')
12392			continue;
12393
12394		tp_name = prog->sec_name + pfx_len + 1;
12395		break;
12396	}
12397
12398	if (!tp_name) {
12399		pr_warn("prog '%s': invalid section name '%s'\n",
12400			prog->name, prog->sec_name);
12401		return -EINVAL;
12402	}
12403
12404	*link = bpf_program__attach_raw_tracepoint(prog, tp_name);
12405	return libbpf_get_error(*link);
12406}
12407
12408/* Common logic for all BPF program types that attach to a btf_id */
12409static struct bpf_link *bpf_program__attach_btf_id(const struct bpf_program *prog,
12410						   const struct bpf_trace_opts *opts)
12411{
12412	LIBBPF_OPTS(bpf_link_create_opts, link_opts);
12413	char errmsg[STRERR_BUFSIZE];
12414	struct bpf_link *link;
12415	int prog_fd, pfd;
12416
12417	if (!OPTS_VALID(opts, bpf_trace_opts))
12418		return libbpf_err_ptr(-EINVAL);
12419
12420	prog_fd = bpf_program__fd(prog);
12421	if (prog_fd < 0) {
12422		pr_warn("prog '%s': can't attach before loaded\n", prog->name);
12423		return libbpf_err_ptr(-EINVAL);
12424	}
12425
12426	link = calloc(1, sizeof(*link));
12427	if (!link)
12428		return libbpf_err_ptr(-ENOMEM);
12429	link->detach = &bpf_link__detach_fd;
12430
12431	/* libbpf is smart enough to redirect to BPF_RAW_TRACEPOINT_OPEN on old kernels */
12432	link_opts.tracing.cookie = OPTS_GET(opts, cookie, 0);
12433	pfd = bpf_link_create(prog_fd, 0, bpf_program__expected_attach_type(prog), &link_opts);
12434	if (pfd < 0) {
12435		pfd = -errno;
12436		free(link);
12437		pr_warn("prog '%s': failed to attach: %s\n",
12438			prog->name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
12439		return libbpf_err_ptr(pfd);
12440	}
12441	link->fd = pfd;
12442	return link;
12443}
12444
12445struct bpf_link *bpf_program__attach_trace(const struct bpf_program *prog)
12446{
12447	return bpf_program__attach_btf_id(prog, NULL);
12448}
12449
12450struct bpf_link *bpf_program__attach_trace_opts(const struct bpf_program *prog,
12451						const struct bpf_trace_opts *opts)
12452{
12453	return bpf_program__attach_btf_id(prog, opts);
12454}
12455
12456struct bpf_link *bpf_program__attach_lsm(const struct bpf_program *prog)
12457{
12458	return bpf_program__attach_btf_id(prog, NULL);
12459}
12460
12461static int attach_trace(const struct bpf_program *prog, long cookie, struct bpf_link **link)
12462{
12463	*link = bpf_program__attach_trace(prog);
12464	return libbpf_get_error(*link);
12465}
12466
12467static int attach_lsm(const struct bpf_program *prog, long cookie, struct bpf_link **link)
12468{
12469	*link = bpf_program__attach_lsm(prog);
12470	return libbpf_get_error(*link);
12471}
12472
12473static struct bpf_link *
12474bpf_program_attach_fd(const struct bpf_program *prog,
12475		      int target_fd, const char *target_name,
12476		      const struct bpf_link_create_opts *opts)
12477{
12478	enum bpf_attach_type attach_type;
12479	char errmsg[STRERR_BUFSIZE];
12480	struct bpf_link *link;
12481	int prog_fd, link_fd;
12482
12483	prog_fd = bpf_program__fd(prog);
12484	if (prog_fd < 0) {
12485		pr_warn("prog '%s': can't attach before loaded\n", prog->name);
12486		return libbpf_err_ptr(-EINVAL);
12487	}
12488
12489	link = calloc(1, sizeof(*link));
12490	if (!link)
12491		return libbpf_err_ptr(-ENOMEM);
12492	link->detach = &bpf_link__detach_fd;
12493
12494	attach_type = bpf_program__expected_attach_type(prog);
12495	link_fd = bpf_link_create(prog_fd, target_fd, attach_type, opts);
12496	if (link_fd < 0) {
12497		link_fd = -errno;
12498		free(link);
12499		pr_warn("prog '%s': failed to attach to %s: %s\n",
12500			prog->name, target_name,
12501			libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
12502		return libbpf_err_ptr(link_fd);
12503	}
12504	link->fd = link_fd;
12505	return link;
12506}
12507
12508struct bpf_link *
12509bpf_program__attach_cgroup(const struct bpf_program *prog, int cgroup_fd)
12510{
12511	return bpf_program_attach_fd(prog, cgroup_fd, "cgroup", NULL);
12512}
12513
12514struct bpf_link *
12515bpf_program__attach_netns(const struct bpf_program *prog, int netns_fd)
12516{
12517	return bpf_program_attach_fd(prog, netns_fd, "netns", NULL);
12518}
12519
12520struct bpf_link *bpf_program__attach_xdp(const struct bpf_program *prog, int ifindex)
12521{
12522	/* target_fd/target_ifindex use the same field in LINK_CREATE */
12523	return bpf_program_attach_fd(prog, ifindex, "xdp", NULL);
12524}
12525
12526struct bpf_link *
12527bpf_program__attach_tcx(const struct bpf_program *prog, int ifindex,
12528			const struct bpf_tcx_opts *opts)
12529{
12530	LIBBPF_OPTS(bpf_link_create_opts, link_create_opts);
12531	__u32 relative_id;
12532	int relative_fd;
12533
12534	if (!OPTS_VALID(opts, bpf_tcx_opts))
12535		return libbpf_err_ptr(-EINVAL);
12536
12537	relative_id = OPTS_GET(opts, relative_id, 0);
12538	relative_fd = OPTS_GET(opts, relative_fd, 0);
12539
12540	/* validate we don't have unexpected combinations of non-zero fields */
12541	if (!ifindex) {
12542		pr_warn("prog '%s': target netdevice ifindex cannot be zero\n",
12543			prog->name);
12544		return libbpf_err_ptr(-EINVAL);
12545	}
12546	if (relative_fd && relative_id) {
12547		pr_warn("prog '%s': relative_fd and relative_id cannot be set at the same time\n",
12548			prog->name);
12549		return libbpf_err_ptr(-EINVAL);
12550	}
12551
12552	link_create_opts.tcx.expected_revision = OPTS_GET(opts, expected_revision, 0);
12553	link_create_opts.tcx.relative_fd = relative_fd;
12554	link_create_opts.tcx.relative_id = relative_id;
12555	link_create_opts.flags = OPTS_GET(opts, flags, 0);
12556
12557	/* target_fd/target_ifindex use the same field in LINK_CREATE */
12558	return bpf_program_attach_fd(prog, ifindex, "tcx", &link_create_opts);
12559}
12560
12561struct bpf_link *
12562bpf_program__attach_netkit(const struct bpf_program *prog, int ifindex,
12563			   const struct bpf_netkit_opts *opts)
12564{
12565	LIBBPF_OPTS(bpf_link_create_opts, link_create_opts);
12566	__u32 relative_id;
12567	int relative_fd;
12568
12569	if (!OPTS_VALID(opts, bpf_netkit_opts))
12570		return libbpf_err_ptr(-EINVAL);
12571
12572	relative_id = OPTS_GET(opts, relative_id, 0);
12573	relative_fd = OPTS_GET(opts, relative_fd, 0);
12574
12575	/* validate we don't have unexpected combinations of non-zero fields */
12576	if (!ifindex) {
12577		pr_warn("prog '%s': target netdevice ifindex cannot be zero\n",
12578			prog->name);
12579		return libbpf_err_ptr(-EINVAL);
12580	}
12581	if (relative_fd && relative_id) {
12582		pr_warn("prog '%s': relative_fd and relative_id cannot be set at the same time\n",
12583			prog->name);
12584		return libbpf_err_ptr(-EINVAL);
12585	}
12586
12587	link_create_opts.netkit.expected_revision = OPTS_GET(opts, expected_revision, 0);
12588	link_create_opts.netkit.relative_fd = relative_fd;
12589	link_create_opts.netkit.relative_id = relative_id;
12590	link_create_opts.flags = OPTS_GET(opts, flags, 0);
12591
12592	return bpf_program_attach_fd(prog, ifindex, "netkit", &link_create_opts);
12593}
12594
12595struct bpf_link *bpf_program__attach_freplace(const struct bpf_program *prog,
12596					      int target_fd,
12597					      const char *attach_func_name)
12598{
12599	int btf_id;
12600
12601	if (!!target_fd != !!attach_func_name) {
12602		pr_warn("prog '%s': supply none or both of target_fd and attach_func_name\n",
12603			prog->name);
12604		return libbpf_err_ptr(-EINVAL);
12605	}
12606
12607	if (prog->type != BPF_PROG_TYPE_EXT) {
12608		pr_warn("prog '%s': only BPF_PROG_TYPE_EXT can attach as freplace",
12609			prog->name);
12610		return libbpf_err_ptr(-EINVAL);
12611	}
12612
12613	if (target_fd) {
12614		LIBBPF_OPTS(bpf_link_create_opts, target_opts);
12615
12616		btf_id = libbpf_find_prog_btf_id(attach_func_name, target_fd);
12617		if (btf_id < 0)
12618			return libbpf_err_ptr(btf_id);
12619
12620		target_opts.target_btf_id = btf_id;
12621
12622		return bpf_program_attach_fd(prog, target_fd, "freplace",
12623					     &target_opts);
12624	} else {
12625		/* no target, so use raw_tracepoint_open for compatibility
12626		 * with old kernels
12627		 */
12628		return bpf_program__attach_trace(prog);
12629	}
12630}
12631
12632struct bpf_link *
12633bpf_program__attach_iter(const struct bpf_program *prog,
12634			 const struct bpf_iter_attach_opts *opts)
12635{
12636	DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_create_opts);
12637	char errmsg[STRERR_BUFSIZE];
12638	struct bpf_link *link;
12639	int prog_fd, link_fd;
12640	__u32 target_fd = 0;
12641
12642	if (!OPTS_VALID(opts, bpf_iter_attach_opts))
12643		return libbpf_err_ptr(-EINVAL);
12644
12645	link_create_opts.iter_info = OPTS_GET(opts, link_info, (void *)0);
12646	link_create_opts.iter_info_len = OPTS_GET(opts, link_info_len, 0);
12647
12648	prog_fd = bpf_program__fd(prog);
12649	if (prog_fd < 0) {
12650		pr_warn("prog '%s': can't attach before loaded\n", prog->name);
12651		return libbpf_err_ptr(-EINVAL);
12652	}
12653
12654	link = calloc(1, sizeof(*link));
12655	if (!link)
12656		return libbpf_err_ptr(-ENOMEM);
12657	link->detach = &bpf_link__detach_fd;
12658
12659	link_fd = bpf_link_create(prog_fd, target_fd, BPF_TRACE_ITER,
12660				  &link_create_opts);
12661	if (link_fd < 0) {
12662		link_fd = -errno;
12663		free(link);
12664		pr_warn("prog '%s': failed to attach to iterator: %s\n",
12665			prog->name, libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
12666		return libbpf_err_ptr(link_fd);
12667	}
12668	link->fd = link_fd;
12669	return link;
12670}
12671
12672static int attach_iter(const struct bpf_program *prog, long cookie, struct bpf_link **link)
12673{
12674	*link = bpf_program__attach_iter(prog, NULL);
12675	return libbpf_get_error(*link);
12676}
12677
12678struct bpf_link *bpf_program__attach_netfilter(const struct bpf_program *prog,
12679					       const struct bpf_netfilter_opts *opts)
12680{
12681	LIBBPF_OPTS(bpf_link_create_opts, lopts);
12682	struct bpf_link *link;
12683	int prog_fd, link_fd;
12684
12685	if (!OPTS_VALID(opts, bpf_netfilter_opts))
12686		return libbpf_err_ptr(-EINVAL);
12687
12688	prog_fd = bpf_program__fd(prog);
12689	if (prog_fd < 0) {
12690		pr_warn("prog '%s': can't attach before loaded\n", prog->name);
12691		return libbpf_err_ptr(-EINVAL);
12692	}
12693
12694	link = calloc(1, sizeof(*link));
12695	if (!link)
12696		return libbpf_err_ptr(-ENOMEM);
12697
12698	link->detach = &bpf_link__detach_fd;
12699
12700	lopts.netfilter.pf = OPTS_GET(opts, pf, 0);
12701	lopts.netfilter.hooknum = OPTS_GET(opts, hooknum, 0);
12702	lopts.netfilter.priority = OPTS_GET(opts, priority, 0);
12703	lopts.netfilter.flags = OPTS_GET(opts, flags, 0);
12704
12705	link_fd = bpf_link_create(prog_fd, 0, BPF_NETFILTER, &lopts);
12706	if (link_fd < 0) {
12707		char errmsg[STRERR_BUFSIZE];
12708
12709		link_fd = -errno;
12710		free(link);
12711		pr_warn("prog '%s': failed to attach to netfilter: %s\n",
12712			prog->name, libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
12713		return libbpf_err_ptr(link_fd);
12714	}
12715	link->fd = link_fd;
12716
12717	return link;
12718}
12719
12720struct bpf_link *bpf_program__attach(const struct bpf_program *prog)
12721{
12722	struct bpf_link *link = NULL;
12723	int err;
12724
12725	if (!prog->sec_def || !prog->sec_def->prog_attach_fn)
12726		return libbpf_err_ptr(-EOPNOTSUPP);
12727
12728	err = prog->sec_def->prog_attach_fn(prog, prog->sec_def->cookie, &link);
12729	if (err)
12730		return libbpf_err_ptr(err);
12731
12732	/* When calling bpf_program__attach() explicitly, auto-attach support
12733	 * is expected to work, so NULL returned link is considered an error.
12734	 * This is different for skeleton's attach, see comment in
12735	 * bpf_object__attach_skeleton().
12736	 */
12737	if (!link)
12738		return libbpf_err_ptr(-EOPNOTSUPP);
12739
12740	return link;
12741}
12742
12743struct bpf_link_struct_ops {
12744	struct bpf_link link;
12745	int map_fd;
12746};
12747
12748static int bpf_link__detach_struct_ops(struct bpf_link *link)
12749{
12750	struct bpf_link_struct_ops *st_link;
12751	__u32 zero = 0;
12752
12753	st_link = container_of(link, struct bpf_link_struct_ops, link);
12754
12755	if (st_link->map_fd < 0)
12756		/* w/o a real link */
12757		return bpf_map_delete_elem(link->fd, &zero);
12758
12759	return close(link->fd);
12760}
12761
12762struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map)
12763{
12764	struct bpf_link_struct_ops *link;
12765	__u32 zero = 0;
12766	int err, fd;
12767
12768	if (!bpf_map__is_struct_ops(map) || map->fd == -1)
12769		return libbpf_err_ptr(-EINVAL);
12770
12771	link = calloc(1, sizeof(*link));
12772	if (!link)
12773		return libbpf_err_ptr(-EINVAL);
12774
12775	/* kern_vdata should be prepared during the loading phase. */
12776	err = bpf_map_update_elem(map->fd, &zero, map->st_ops->kern_vdata, 0);
12777	/* It can be EBUSY if the map has been used to create or
12778	 * update a link before.  We don't allow updating the value of
12779	 * a struct_ops once it is set.  That ensures that the value
12780	 * never changed.  So, it is safe to skip EBUSY.
12781	 */
12782	if (err && (!(map->def.map_flags & BPF_F_LINK) || err != -EBUSY)) {
12783		free(link);
12784		return libbpf_err_ptr(err);
12785	}
12786
12787	link->link.detach = bpf_link__detach_struct_ops;
12788
12789	if (!(map->def.map_flags & BPF_F_LINK)) {
12790		/* w/o a real link */
12791		link->link.fd = map->fd;
12792		link->map_fd = -1;
12793		return &link->link;
12794	}
12795
12796	fd = bpf_link_create(map->fd, 0, BPF_STRUCT_OPS, NULL);
12797	if (fd < 0) {
12798		free(link);
12799		return libbpf_err_ptr(fd);
12800	}
12801
12802	link->link.fd = fd;
12803	link->map_fd = map->fd;
12804
12805	return &link->link;
12806}
12807
12808/*
12809 * Swap the back struct_ops of a link with a new struct_ops map.
12810 */
12811int bpf_link__update_map(struct bpf_link *link, const struct bpf_map *map)
12812{
12813	struct bpf_link_struct_ops *st_ops_link;
12814	__u32 zero = 0;
12815	int err;
12816
12817	if (!bpf_map__is_struct_ops(map) || !map_is_created(map))
12818		return -EINVAL;
12819
12820	st_ops_link = container_of(link, struct bpf_link_struct_ops, link);
12821	/* Ensure the type of a link is correct */
12822	if (st_ops_link->map_fd < 0)
12823		return -EINVAL;
12824
12825	err = bpf_map_update_elem(map->fd, &zero, map->st_ops->kern_vdata, 0);
12826	/* It can be EBUSY if the map has been used to create or
12827	 * update a link before.  We don't allow updating the value of
12828	 * a struct_ops once it is set.  That ensures that the value
12829	 * never changed.  So, it is safe to skip EBUSY.
12830	 */
12831	if (err && err != -EBUSY)
12832		return err;
12833
12834	err = bpf_link_update(link->fd, map->fd, NULL);
12835	if (err < 0)
12836		return err;
12837
12838	st_ops_link->map_fd = map->fd;
12839
12840	return 0;
12841}
12842
12843typedef enum bpf_perf_event_ret (*bpf_perf_event_print_t)(struct perf_event_header *hdr,
12844							  void *private_data);
12845
12846static enum bpf_perf_event_ret
12847perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
12848		       void **copy_mem, size_t *copy_size,
12849		       bpf_perf_event_print_t fn, void *private_data)
12850{
12851	struct perf_event_mmap_page *header = mmap_mem;
12852	__u64 data_head = ring_buffer_read_head(header);
12853	__u64 data_tail = header->data_tail;
12854	void *base = ((__u8 *)header) + page_size;
12855	int ret = LIBBPF_PERF_EVENT_CONT;
12856	struct perf_event_header *ehdr;
12857	size_t ehdr_size;
12858
12859	while (data_head != data_tail) {
12860		ehdr = base + (data_tail & (mmap_size - 1));
12861		ehdr_size = ehdr->size;
12862
12863		if (((void *)ehdr) + ehdr_size > base + mmap_size) {
12864			void *copy_start = ehdr;
12865			size_t len_first = base + mmap_size - copy_start;
12866			size_t len_secnd = ehdr_size - len_first;
12867
12868			if (*copy_size < ehdr_size) {
12869				free(*copy_mem);
12870				*copy_mem = malloc(ehdr_size);
12871				if (!*copy_mem) {
12872					*copy_size = 0;
12873					ret = LIBBPF_PERF_EVENT_ERROR;
12874					break;
12875				}
12876				*copy_size = ehdr_size;
12877			}
12878
12879			memcpy(*copy_mem, copy_start, len_first);
12880			memcpy(*copy_mem + len_first, base, len_secnd);
12881			ehdr = *copy_mem;
12882		}
12883
12884		ret = fn(ehdr, private_data);
12885		data_tail += ehdr_size;
12886		if (ret != LIBBPF_PERF_EVENT_CONT)
12887			break;
12888	}
12889
12890	ring_buffer_write_tail(header, data_tail);
12891	return libbpf_err(ret);
12892}
12893
12894struct perf_buffer;
12895
12896struct perf_buffer_params {
12897	struct perf_event_attr *attr;
12898	/* if event_cb is specified, it takes precendence */
12899	perf_buffer_event_fn event_cb;
12900	/* sample_cb and lost_cb are higher-level common-case callbacks */
12901	perf_buffer_sample_fn sample_cb;
12902	perf_buffer_lost_fn lost_cb;
12903	void *ctx;
12904	int cpu_cnt;
12905	int *cpus;
12906	int *map_keys;
12907};
12908
12909struct perf_cpu_buf {
12910	struct perf_buffer *pb;
12911	void *base; /* mmap()'ed memory */
12912	void *buf; /* for reconstructing segmented data */
12913	size_t buf_size;
12914	int fd;
12915	int cpu;
12916	int map_key;
12917};
12918
12919struct perf_buffer {
12920	perf_buffer_event_fn event_cb;
12921	perf_buffer_sample_fn sample_cb;
12922	perf_buffer_lost_fn lost_cb;
12923	void *ctx; /* passed into callbacks */
12924
12925	size_t page_size;
12926	size_t mmap_size;
12927	struct perf_cpu_buf **cpu_bufs;
12928	struct epoll_event *events;
12929	int cpu_cnt; /* number of allocated CPU buffers */
12930	int epoll_fd; /* perf event FD */
12931	int map_fd; /* BPF_MAP_TYPE_PERF_EVENT_ARRAY BPF map FD */
12932};
12933
12934static void perf_buffer__free_cpu_buf(struct perf_buffer *pb,
12935				      struct perf_cpu_buf *cpu_buf)
12936{
12937	if (!cpu_buf)
12938		return;
12939	if (cpu_buf->base &&
12940	    munmap(cpu_buf->base, pb->mmap_size + pb->page_size))
12941		pr_warn("failed to munmap cpu_buf #%d\n", cpu_buf->cpu);
12942	if (cpu_buf->fd >= 0) {
12943		ioctl(cpu_buf->fd, PERF_EVENT_IOC_DISABLE, 0);
12944		close(cpu_buf->fd);
12945	}
12946	free(cpu_buf->buf);
12947	free(cpu_buf);
12948}
12949
12950void perf_buffer__free(struct perf_buffer *pb)
12951{
12952	int i;
12953
12954	if (IS_ERR_OR_NULL(pb))
12955		return;
12956	if (pb->cpu_bufs) {
12957		for (i = 0; i < pb->cpu_cnt; i++) {
12958			struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i];
12959
12960			if (!cpu_buf)
12961				continue;
12962
12963			bpf_map_delete_elem(pb->map_fd, &cpu_buf->map_key);
12964			perf_buffer__free_cpu_buf(pb, cpu_buf);
12965		}
12966		free(pb->cpu_bufs);
12967	}
12968	if (pb->epoll_fd >= 0)
12969		close(pb->epoll_fd);
12970	free(pb->events);
12971	free(pb);
12972}
12973
12974static struct perf_cpu_buf *
12975perf_buffer__open_cpu_buf(struct perf_buffer *pb, struct perf_event_attr *attr,
12976			  int cpu, int map_key)
12977{
12978	struct perf_cpu_buf *cpu_buf;
12979	char msg[STRERR_BUFSIZE];
12980	int err;
12981
12982	cpu_buf = calloc(1, sizeof(*cpu_buf));
12983	if (!cpu_buf)
12984		return ERR_PTR(-ENOMEM);
12985
12986	cpu_buf->pb = pb;
12987	cpu_buf->cpu = cpu;
12988	cpu_buf->map_key = map_key;
12989
12990	cpu_buf->fd = syscall(__NR_perf_event_open, attr, -1 /* pid */, cpu,
12991			      -1, PERF_FLAG_FD_CLOEXEC);
12992	if (cpu_buf->fd < 0) {
12993		err = -errno;
12994		pr_warn("failed to open perf buffer event on cpu #%d: %s\n",
12995			cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
12996		goto error;
12997	}
12998
12999	cpu_buf->base = mmap(NULL, pb->mmap_size + pb->page_size,
13000			     PROT_READ | PROT_WRITE, MAP_SHARED,
13001			     cpu_buf->fd, 0);
13002	if (cpu_buf->base == MAP_FAILED) {
13003		cpu_buf->base = NULL;
13004		err = -errno;
13005		pr_warn("failed to mmap perf buffer on cpu #%d: %s\n",
13006			cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
13007		goto error;
13008	}
13009
13010	if (ioctl(cpu_buf->fd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
13011		err = -errno;
13012		pr_warn("failed to enable perf buffer event on cpu #%d: %s\n",
13013			cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
13014		goto error;
13015	}
13016
13017	return cpu_buf;
13018
13019error:
13020	perf_buffer__free_cpu_buf(pb, cpu_buf);
13021	return (struct perf_cpu_buf *)ERR_PTR(err);
13022}
13023
13024static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
13025					      struct perf_buffer_params *p);
13026
13027struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt,
13028				     perf_buffer_sample_fn sample_cb,
13029				     perf_buffer_lost_fn lost_cb,
13030				     void *ctx,
13031				     const struct perf_buffer_opts *opts)
13032{
13033	const size_t attr_sz = sizeof(struct perf_event_attr);
13034	struct perf_buffer_params p = {};
13035	struct perf_event_attr attr;
13036	__u32 sample_period;
13037
13038	if (!OPTS_VALID(opts, perf_buffer_opts))
13039		return libbpf_err_ptr(-EINVAL);
13040
13041	sample_period = OPTS_GET(opts, sample_period, 1);
13042	if (!sample_period)
13043		sample_period = 1;
13044
13045	memset(&attr, 0, attr_sz);
13046	attr.size = attr_sz;
13047	attr.config = PERF_COUNT_SW_BPF_OUTPUT;
13048	attr.type = PERF_TYPE_SOFTWARE;
13049	attr.sample_type = PERF_SAMPLE_RAW;
13050	attr.sample_period = sample_period;
13051	attr.wakeup_events = sample_period;
13052
13053	p.attr = &attr;
13054	p.sample_cb = sample_cb;
13055	p.lost_cb = lost_cb;
13056	p.ctx = ctx;
13057
13058	return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p));
13059}
13060
13061struct perf_buffer *perf_buffer__new_raw(int map_fd, size_t page_cnt,
13062					 struct perf_event_attr *attr,
13063					 perf_buffer_event_fn event_cb, void *ctx,
13064					 const struct perf_buffer_raw_opts *opts)
13065{
13066	struct perf_buffer_params p = {};
13067
13068	if (!attr)
13069		return libbpf_err_ptr(-EINVAL);
13070
13071	if (!OPTS_VALID(opts, perf_buffer_raw_opts))
13072		return libbpf_err_ptr(-EINVAL);
13073
13074	p.attr = attr;
13075	p.event_cb = event_cb;
13076	p.ctx = ctx;
13077	p.cpu_cnt = OPTS_GET(opts, cpu_cnt, 0);
13078	p.cpus = OPTS_GET(opts, cpus, NULL);
13079	p.map_keys = OPTS_GET(opts, map_keys, NULL);
13080
13081	return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p));
13082}
13083
13084static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
13085					      struct perf_buffer_params *p)
13086{
13087	const char *online_cpus_file = "/sys/devices/system/cpu/online";
13088	struct bpf_map_info map;
13089	char msg[STRERR_BUFSIZE];
13090	struct perf_buffer *pb;
13091	bool *online = NULL;
13092	__u32 map_info_len;
13093	int err, i, j, n;
13094
13095	if (page_cnt == 0 || (page_cnt & (page_cnt - 1))) {
13096		pr_warn("page count should be power of two, but is %zu\n",
13097			page_cnt);
13098		return ERR_PTR(-EINVAL);
13099	}
13100
13101	/* best-effort sanity checks */
13102	memset(&map, 0, sizeof(map));
13103	map_info_len = sizeof(map);
13104	err = bpf_map_get_info_by_fd(map_fd, &map, &map_info_len);
13105	if (err) {
13106		err = -errno;
13107		/* if BPF_OBJ_GET_INFO_BY_FD is supported, will return
13108		 * -EBADFD, -EFAULT, or -E2BIG on real error
13109		 */
13110		if (err != -EINVAL) {
13111			pr_warn("failed to get map info for map FD %d: %s\n",
13112				map_fd, libbpf_strerror_r(err, msg, sizeof(msg)));
13113			return ERR_PTR(err);
13114		}
13115		pr_debug("failed to get map info for FD %d; API not supported? Ignoring...\n",
13116			 map_fd);
13117	} else {
13118		if (map.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
13119			pr_warn("map '%s' should be BPF_MAP_TYPE_PERF_EVENT_ARRAY\n",
13120				map.name);
13121			return ERR_PTR(-EINVAL);
13122		}
13123	}
13124
13125	pb = calloc(1, sizeof(*pb));
13126	if (!pb)
13127		return ERR_PTR(-ENOMEM);
13128
13129	pb->event_cb = p->event_cb;
13130	pb->sample_cb = p->sample_cb;
13131	pb->lost_cb = p->lost_cb;
13132	pb->ctx = p->ctx;
13133
13134	pb->page_size = getpagesize();
13135	pb->mmap_size = pb->page_size * page_cnt;
13136	pb->map_fd = map_fd;
13137
13138	pb->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
13139	if (pb->epoll_fd < 0) {
13140		err = -errno;
13141		pr_warn("failed to create epoll instance: %s\n",
13142			libbpf_strerror_r(err, msg, sizeof(msg)));
13143		goto error;
13144	}
13145
13146	if (p->cpu_cnt > 0) {
13147		pb->cpu_cnt = p->cpu_cnt;
13148	} else {
13149		pb->cpu_cnt = libbpf_num_possible_cpus();
13150		if (pb->cpu_cnt < 0) {
13151			err = pb->cpu_cnt;
13152			goto error;
13153		}
13154		if (map.max_entries && map.max_entries < pb->cpu_cnt)
13155			pb->cpu_cnt = map.max_entries;
13156	}
13157
13158	pb->events = calloc(pb->cpu_cnt, sizeof(*pb->events));
13159	if (!pb->events) {
13160		err = -ENOMEM;
13161		pr_warn("failed to allocate events: out of memory\n");
13162		goto error;
13163	}
13164	pb->cpu_bufs = calloc(pb->cpu_cnt, sizeof(*pb->cpu_bufs));
13165	if (!pb->cpu_bufs) {
13166		err = -ENOMEM;
13167		pr_warn("failed to allocate buffers: out of memory\n");
13168		goto error;
13169	}
13170
13171	err = parse_cpu_mask_file(online_cpus_file, &online, &n);
13172	if (err) {
13173		pr_warn("failed to get online CPU mask: %d\n", err);
13174		goto error;
13175	}
13176
13177	for (i = 0, j = 0; i < pb->cpu_cnt; i++) {
13178		struct perf_cpu_buf *cpu_buf;
13179		int cpu, map_key;
13180
13181		cpu = p->cpu_cnt > 0 ? p->cpus[i] : i;
13182		map_key = p->cpu_cnt > 0 ? p->map_keys[i] : i;
13183
13184		/* in case user didn't explicitly requested particular CPUs to
13185		 * be attached to, skip offline/not present CPUs
13186		 */
13187		if (p->cpu_cnt <= 0 && (cpu >= n || !online[cpu]))
13188			continue;
13189
13190		cpu_buf = perf_buffer__open_cpu_buf(pb, p->attr, cpu, map_key);
13191		if (IS_ERR(cpu_buf)) {
13192			err = PTR_ERR(cpu_buf);
13193			goto error;
13194		}
13195
13196		pb->cpu_bufs[j] = cpu_buf;
13197
13198		err = bpf_map_update_elem(pb->map_fd, &map_key,
13199					  &cpu_buf->fd, 0);
13200		if (err) {
13201			err = -errno;
13202			pr_warn("failed to set cpu #%d, key %d -> perf FD %d: %s\n",
13203				cpu, map_key, cpu_buf->fd,
13204				libbpf_strerror_r(err, msg, sizeof(msg)));
13205			goto error;
13206		}
13207
13208		pb->events[j].events = EPOLLIN;
13209		pb->events[j].data.ptr = cpu_buf;
13210		if (epoll_ctl(pb->epoll_fd, EPOLL_CTL_ADD, cpu_buf->fd,
13211			      &pb->events[j]) < 0) {
13212			err = -errno;
13213			pr_warn("failed to epoll_ctl cpu #%d perf FD %d: %s\n",
13214				cpu, cpu_buf->fd,
13215				libbpf_strerror_r(err, msg, sizeof(msg)));
13216			goto error;
13217		}
13218		j++;
13219	}
13220	pb->cpu_cnt = j;
13221	free(online);
13222
13223	return pb;
13224
13225error:
13226	free(online);
13227	if (pb)
13228		perf_buffer__free(pb);
13229	return ERR_PTR(err);
13230}
13231
13232struct perf_sample_raw {
13233	struct perf_event_header header;
13234	uint32_t size;
13235	char data[];
13236};
13237
13238struct perf_sample_lost {
13239	struct perf_event_header header;
13240	uint64_t id;
13241	uint64_t lost;
13242	uint64_t sample_id;
13243};
13244
13245static enum bpf_perf_event_ret
13246perf_buffer__process_record(struct perf_event_header *e, void *ctx)
13247{
13248	struct perf_cpu_buf *cpu_buf = ctx;
13249	struct perf_buffer *pb = cpu_buf->pb;
13250	void *data = e;
13251
13252	/* user wants full control over parsing perf event */
13253	if (pb->event_cb)
13254		return pb->event_cb(pb->ctx, cpu_buf->cpu, e);
13255
13256	switch (e->type) {
13257	case PERF_RECORD_SAMPLE: {
13258		struct perf_sample_raw *s = data;
13259
13260		if (pb->sample_cb)
13261			pb->sample_cb(pb->ctx, cpu_buf->cpu, s->data, s->size);
13262		break;
13263	}
13264	case PERF_RECORD_LOST: {
13265		struct perf_sample_lost *s = data;
13266
13267		if (pb->lost_cb)
13268			pb->lost_cb(pb->ctx, cpu_buf->cpu, s->lost);
13269		break;
13270	}
13271	default:
13272		pr_warn("unknown perf sample type %d\n", e->type);
13273		return LIBBPF_PERF_EVENT_ERROR;
13274	}
13275	return LIBBPF_PERF_EVENT_CONT;
13276}
13277
13278static int perf_buffer__process_records(struct perf_buffer *pb,
13279					struct perf_cpu_buf *cpu_buf)
13280{
13281	enum bpf_perf_event_ret ret;
13282
13283	ret = perf_event_read_simple(cpu_buf->base, pb->mmap_size,
13284				     pb->page_size, &cpu_buf->buf,
13285				     &cpu_buf->buf_size,
13286				     perf_buffer__process_record, cpu_buf);
13287	if (ret != LIBBPF_PERF_EVENT_CONT)
13288		return ret;
13289	return 0;
13290}
13291
13292int perf_buffer__epoll_fd(const struct perf_buffer *pb)
13293{
13294	return pb->epoll_fd;
13295}
13296
13297int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms)
13298{
13299	int i, cnt, err;
13300
13301	cnt = epoll_wait(pb->epoll_fd, pb->events, pb->cpu_cnt, timeout_ms);
13302	if (cnt < 0)
13303		return -errno;
13304
13305	for (i = 0; i < cnt; i++) {
13306		struct perf_cpu_buf *cpu_buf = pb->events[i].data.ptr;
13307
13308		err = perf_buffer__process_records(pb, cpu_buf);
13309		if (err) {
13310			pr_warn("error while processing records: %d\n", err);
13311			return libbpf_err(err);
13312		}
13313	}
13314	return cnt;
13315}
13316
13317/* Return number of PERF_EVENT_ARRAY map slots set up by this perf_buffer
13318 * manager.
13319 */
13320size_t perf_buffer__buffer_cnt(const struct perf_buffer *pb)
13321{
13322	return pb->cpu_cnt;
13323}
13324
13325/*
13326 * Return perf_event FD of a ring buffer in *buf_idx* slot of
13327 * PERF_EVENT_ARRAY BPF map. This FD can be polled for new data using
13328 * select()/poll()/epoll() Linux syscalls.
13329 */
13330int perf_buffer__buffer_fd(const struct perf_buffer *pb, size_t buf_idx)
13331{
13332	struct perf_cpu_buf *cpu_buf;
13333
13334	if (buf_idx >= pb->cpu_cnt)
13335		return libbpf_err(-EINVAL);
13336
13337	cpu_buf = pb->cpu_bufs[buf_idx];
13338	if (!cpu_buf)
13339		return libbpf_err(-ENOENT);
13340
13341	return cpu_buf->fd;
13342}
13343
13344int perf_buffer__buffer(struct perf_buffer *pb, int buf_idx, void **buf, size_t *buf_size)
13345{
13346	struct perf_cpu_buf *cpu_buf;
13347
13348	if (buf_idx >= pb->cpu_cnt)
13349		return libbpf_err(-EINVAL);
13350
13351	cpu_buf = pb->cpu_bufs[buf_idx];
13352	if (!cpu_buf)
13353		return libbpf_err(-ENOENT);
13354
13355	*buf = cpu_buf->base;
13356	*buf_size = pb->mmap_size;
13357	return 0;
13358}
13359
13360/*
13361 * Consume data from perf ring buffer corresponding to slot *buf_idx* in
13362 * PERF_EVENT_ARRAY BPF map without waiting/polling. If there is no data to
13363 * consume, do nothing and return success.
13364 * Returns:
13365 *   - 0 on success;
13366 *   - <0 on failure.
13367 */
13368int perf_buffer__consume_buffer(struct perf_buffer *pb, size_t buf_idx)
13369{
13370	struct perf_cpu_buf *cpu_buf;
13371
13372	if (buf_idx >= pb->cpu_cnt)
13373		return libbpf_err(-EINVAL);
13374
13375	cpu_buf = pb->cpu_bufs[buf_idx];
13376	if (!cpu_buf)
13377		return libbpf_err(-ENOENT);
13378
13379	return perf_buffer__process_records(pb, cpu_buf);
13380}
13381
13382int perf_buffer__consume(struct perf_buffer *pb)
13383{
13384	int i, err;
13385
13386	for (i = 0; i < pb->cpu_cnt; i++) {
13387		struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i];
13388
13389		if (!cpu_buf)
13390			continue;
13391
13392		err = perf_buffer__process_records(pb, cpu_buf);
13393		if (err) {
13394			pr_warn("perf_buffer: failed to process records in buffer #%d: %d\n", i, err);
13395			return libbpf_err(err);
13396		}
13397	}
13398	return 0;
13399}
13400
13401int bpf_program__set_attach_target(struct bpf_program *prog,
13402				   int attach_prog_fd,
13403				   const char *attach_func_name)
13404{
13405	int btf_obj_fd = 0, btf_id = 0, err;
13406
13407	if (!prog || attach_prog_fd < 0)
13408		return libbpf_err(-EINVAL);
13409
13410	if (prog->obj->loaded)
13411		return libbpf_err(-EINVAL);
13412
13413	if (attach_prog_fd && !attach_func_name) {
13414		/* remember attach_prog_fd and let bpf_program__load() find
13415		 * BTF ID during the program load
13416		 */
13417		prog->attach_prog_fd = attach_prog_fd;
13418		return 0;
13419	}
13420
13421	if (attach_prog_fd) {
13422		btf_id = libbpf_find_prog_btf_id(attach_func_name,
13423						 attach_prog_fd);
13424		if (btf_id < 0)
13425			return libbpf_err(btf_id);
13426	} else {
13427		if (!attach_func_name)
13428			return libbpf_err(-EINVAL);
13429
13430		/* load btf_vmlinux, if not yet */
13431		err = bpf_object__load_vmlinux_btf(prog->obj, true);
13432		if (err)
13433			return libbpf_err(err);
13434		err = find_kernel_btf_id(prog->obj, attach_func_name,
13435					 prog->expected_attach_type,
13436					 &btf_obj_fd, &btf_id);
13437		if (err)
13438			return libbpf_err(err);
13439	}
13440
13441	prog->attach_btf_id = btf_id;
13442	prog->attach_btf_obj_fd = btf_obj_fd;
13443	prog->attach_prog_fd = attach_prog_fd;
13444	return 0;
13445}
13446
13447int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz)
13448{
13449	int err = 0, n, len, start, end = -1;
13450	bool *tmp;
13451
13452	*mask = NULL;
13453	*mask_sz = 0;
13454
13455	/* Each sub string separated by ',' has format \d+-\d+ or \d+ */
13456	while (*s) {
13457		if (*s == ',' || *s == '\n') {
13458			s++;
13459			continue;
13460		}
13461		n = sscanf(s, "%d%n-%d%n", &start, &len, &end, &len);
13462		if (n <= 0 || n > 2) {
13463			pr_warn("Failed to get CPU range %s: %d\n", s, n);
13464			err = -EINVAL;
13465			goto cleanup;
13466		} else if (n == 1) {
13467			end = start;
13468		}
13469		if (start < 0 || start > end) {
13470			pr_warn("Invalid CPU range [%d,%d] in %s\n",
13471				start, end, s);
13472			err = -EINVAL;
13473			goto cleanup;
13474		}
13475		tmp = realloc(*mask, end + 1);
13476		if (!tmp) {
13477			err = -ENOMEM;
13478			goto cleanup;
13479		}
13480		*mask = tmp;
13481		memset(tmp + *mask_sz, 0, start - *mask_sz);
13482		memset(tmp + start, 1, end - start + 1);
13483		*mask_sz = end + 1;
13484		s += len;
13485	}
13486	if (!*mask_sz) {
13487		pr_warn("Empty CPU range\n");
13488		return -EINVAL;
13489	}
13490	return 0;
13491cleanup:
13492	free(*mask);
13493	*mask = NULL;
13494	return err;
13495}
13496
13497int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz)
13498{
13499	int fd, err = 0, len;
13500	char buf[128];
13501
13502	fd = open(fcpu, O_RDONLY | O_CLOEXEC);
13503	if (fd < 0) {
13504		err = -errno;
13505		pr_warn("Failed to open cpu mask file %s: %d\n", fcpu, err);
13506		return err;
13507	}
13508	len = read(fd, buf, sizeof(buf));
13509	close(fd);
13510	if (len <= 0) {
13511		err = len ? -errno : -EINVAL;
13512		pr_warn("Failed to read cpu mask from %s: %d\n", fcpu, err);
13513		return err;
13514	}
13515	if (len >= sizeof(buf)) {
13516		pr_warn("CPU mask is too big in file %s\n", fcpu);
13517		return -E2BIG;
13518	}
13519	buf[len] = '\0';
13520
13521	return parse_cpu_mask_str(buf, mask, mask_sz);
13522}
13523
13524int libbpf_num_possible_cpus(void)
13525{
13526	static const char *fcpu = "/sys/devices/system/cpu/possible";
13527	static int cpus;
13528	int err, n, i, tmp_cpus;
13529	bool *mask;
13530
13531	tmp_cpus = READ_ONCE(cpus);
13532	if (tmp_cpus > 0)
13533		return tmp_cpus;
13534
13535	err = parse_cpu_mask_file(fcpu, &mask, &n);
13536	if (err)
13537		return libbpf_err(err);
13538
13539	tmp_cpus = 0;
13540	for (i = 0; i < n; i++) {
13541		if (mask[i])
13542			tmp_cpus++;
13543	}
13544	free(mask);
13545
13546	WRITE_ONCE(cpus, tmp_cpus);
13547	return tmp_cpus;
13548}
13549
13550static int populate_skeleton_maps(const struct bpf_object *obj,
13551				  struct bpf_map_skeleton *maps,
13552				  size_t map_cnt)
13553{
13554	int i;
13555
13556	for (i = 0; i < map_cnt; i++) {
13557		struct bpf_map **map = maps[i].map;
13558		const char *name = maps[i].name;
13559		void **mmaped = maps[i].mmaped;
13560
13561		*map = bpf_object__find_map_by_name(obj, name);
13562		if (!*map) {
13563			pr_warn("failed to find skeleton map '%s'\n", name);
13564			return -ESRCH;
13565		}
13566
13567		/* externs shouldn't be pre-setup from user code */
13568		if (mmaped && (*map)->libbpf_type != LIBBPF_MAP_KCONFIG)
13569			*mmaped = (*map)->mmaped;
13570	}
13571	return 0;
13572}
13573
13574static int populate_skeleton_progs(const struct bpf_object *obj,
13575				   struct bpf_prog_skeleton *progs,
13576				   size_t prog_cnt)
13577{
13578	int i;
13579
13580	for (i = 0; i < prog_cnt; i++) {
13581		struct bpf_program **prog = progs[i].prog;
13582		const char *name = progs[i].name;
13583
13584		*prog = bpf_object__find_program_by_name(obj, name);
13585		if (!*prog) {
13586			pr_warn("failed to find skeleton program '%s'\n", name);
13587			return -ESRCH;
13588		}
13589	}
13590	return 0;
13591}
13592
13593int bpf_object__open_skeleton(struct bpf_object_skeleton *s,
13594			      const struct bpf_object_open_opts *opts)
13595{
13596	DECLARE_LIBBPF_OPTS(bpf_object_open_opts, skel_opts,
13597		.object_name = s->name,
13598	);
13599	struct bpf_object *obj;
13600	int err;
13601
13602	/* Attempt to preserve opts->object_name, unless overriden by user
13603	 * explicitly. Overwriting object name for skeletons is discouraged,
13604	 * as it breaks global data maps, because they contain object name
13605	 * prefix as their own map name prefix. When skeleton is generated,
13606	 * bpftool is making an assumption that this name will stay the same.
13607	 */
13608	if (opts) {
13609		memcpy(&skel_opts, opts, sizeof(*opts));
13610		if (!opts->object_name)
13611			skel_opts.object_name = s->name;
13612	}
13613
13614	obj = bpf_object__open_mem(s->data, s->data_sz, &skel_opts);
13615	err = libbpf_get_error(obj);
13616	if (err) {
13617		pr_warn("failed to initialize skeleton BPF object '%s': %d\n",
13618			s->name, err);
13619		return libbpf_err(err);
13620	}
13621
13622	*s->obj = obj;
13623	err = populate_skeleton_maps(obj, s->maps, s->map_cnt);
13624	if (err) {
13625		pr_warn("failed to populate skeleton maps for '%s': %d\n", s->name, err);
13626		return libbpf_err(err);
13627	}
13628
13629	err = populate_skeleton_progs(obj, s->progs, s->prog_cnt);
13630	if (err) {
13631		pr_warn("failed to populate skeleton progs for '%s': %d\n", s->name, err);
13632		return libbpf_err(err);
13633	}
13634
13635	return 0;
13636}
13637
13638int bpf_object__open_subskeleton(struct bpf_object_subskeleton *s)
13639{
13640	int err, len, var_idx, i;
13641	const char *var_name;
13642	const struct bpf_map *map;
13643	struct btf *btf;
13644	__u32 map_type_id;
13645	const struct btf_type *map_type, *var_type;
13646	const struct bpf_var_skeleton *var_skel;
13647	struct btf_var_secinfo *var;
13648
13649	if (!s->obj)
13650		return libbpf_err(-EINVAL);
13651
13652	btf = bpf_object__btf(s->obj);
13653	if (!btf) {
13654		pr_warn("subskeletons require BTF at runtime (object %s)\n",
13655			bpf_object__name(s->obj));
13656		return libbpf_err(-errno);
13657	}
13658
13659	err = populate_skeleton_maps(s->obj, s->maps, s->map_cnt);
13660	if (err) {
13661		pr_warn("failed to populate subskeleton maps: %d\n", err);
13662		return libbpf_err(err);
13663	}
13664
13665	err = populate_skeleton_progs(s->obj, s->progs, s->prog_cnt);
13666	if (err) {
13667		pr_warn("failed to populate subskeleton maps: %d\n", err);
13668		return libbpf_err(err);
13669	}
13670
13671	for (var_idx = 0; var_idx < s->var_cnt; var_idx++) {
13672		var_skel = &s->vars[var_idx];
13673		map = *var_skel->map;
13674		map_type_id = bpf_map__btf_value_type_id(map);
13675		map_type = btf__type_by_id(btf, map_type_id);
13676
13677		if (!btf_is_datasec(map_type)) {
13678			pr_warn("type for map '%1$s' is not a datasec: %2$s",
13679				bpf_map__name(map),
13680				__btf_kind_str(btf_kind(map_type)));
13681			return libbpf_err(-EINVAL);
13682		}
13683
13684		len = btf_vlen(map_type);
13685		var = btf_var_secinfos(map_type);
13686		for (i = 0; i < len; i++, var++) {
13687			var_type = btf__type_by_id(btf, var->type);
13688			var_name = btf__name_by_offset(btf, var_type->name_off);
13689			if (strcmp(var_name, var_skel->name) == 0) {
13690				*var_skel->addr = map->mmaped + var->offset;
13691				break;
13692			}
13693		}
13694	}
13695	return 0;
13696}
13697
13698void bpf_object__destroy_subskeleton(struct bpf_object_subskeleton *s)
13699{
13700	if (!s)
13701		return;
13702	free(s->maps);
13703	free(s->progs);
13704	free(s->vars);
13705	free(s);
13706}
13707
13708int bpf_object__load_skeleton(struct bpf_object_skeleton *s)
13709{
13710	int i, err;
13711
13712	err = bpf_object__load(*s->obj);
13713	if (err) {
13714		pr_warn("failed to load BPF skeleton '%s': %d\n", s->name, err);
13715		return libbpf_err(err);
13716	}
13717
13718	for (i = 0; i < s->map_cnt; i++) {
13719		struct bpf_map *map = *s->maps[i].map;
13720		size_t mmap_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries);
13721		int prot, map_fd = map->fd;
13722		void **mmaped = s->maps[i].mmaped;
13723
13724		if (!mmaped)
13725			continue;
13726
13727		if (!(map->def.map_flags & BPF_F_MMAPABLE)) {
13728			*mmaped = NULL;
13729			continue;
13730		}
13731
13732		if (map->def.map_flags & BPF_F_RDONLY_PROG)
13733			prot = PROT_READ;
13734		else
13735			prot = PROT_READ | PROT_WRITE;
13736
13737		/* Remap anonymous mmap()-ed "map initialization image" as
13738		 * a BPF map-backed mmap()-ed memory, but preserving the same
13739		 * memory address. This will cause kernel to change process'
13740		 * page table to point to a different piece of kernel memory,
13741		 * but from userspace point of view memory address (and its
13742		 * contents, being identical at this point) will stay the
13743		 * same. This mapping will be released by bpf_object__close()
13744		 * as per normal clean up procedure, so we don't need to worry
13745		 * about it from skeleton's clean up perspective.
13746		 */
13747		*mmaped = mmap(map->mmaped, mmap_sz, prot, MAP_SHARED | MAP_FIXED, map_fd, 0);
13748		if (*mmaped == MAP_FAILED) {
13749			err = -errno;
13750			*mmaped = NULL;
13751			pr_warn("failed to re-mmap() map '%s': %d\n",
13752				 bpf_map__name(map), err);
13753			return libbpf_err(err);
13754		}
13755	}
13756
13757	return 0;
13758}
13759
13760int bpf_object__attach_skeleton(struct bpf_object_skeleton *s)
13761{
13762	int i, err;
13763
13764	for (i = 0; i < s->prog_cnt; i++) {
13765		struct bpf_program *prog = *s->progs[i].prog;
13766		struct bpf_link **link = s->progs[i].link;
13767
13768		if (!prog->autoload || !prog->autoattach)
13769			continue;
13770
13771		/* auto-attaching not supported for this program */
13772		if (!prog->sec_def || !prog->sec_def->prog_attach_fn)
13773			continue;
13774
13775		/* if user already set the link manually, don't attempt auto-attach */
13776		if (*link)
13777			continue;
13778
13779		err = prog->sec_def->prog_attach_fn(prog, prog->sec_def->cookie, link);
13780		if (err) {
13781			pr_warn("prog '%s': failed to auto-attach: %d\n",
13782				bpf_program__name(prog), err);
13783			return libbpf_err(err);
13784		}
13785
13786		/* It's possible that for some SEC() definitions auto-attach
13787		 * is supported in some cases (e.g., if definition completely
13788		 * specifies target information), but is not in other cases.
13789		 * SEC("uprobe") is one such case. If user specified target
13790		 * binary and function name, such BPF program can be
13791		 * auto-attached. But if not, it shouldn't trigger skeleton's
13792		 * attach to fail. It should just be skipped.
13793		 * attach_fn signals such case with returning 0 (no error) and
13794		 * setting link to NULL.
13795		 */
13796	}
13797
13798	return 0;
13799}
13800
13801void bpf_object__detach_skeleton(struct bpf_object_skeleton *s)
13802{
13803	int i;
13804
13805	for (i = 0; i < s->prog_cnt; i++) {
13806		struct bpf_link **link = s->progs[i].link;
13807
13808		bpf_link__destroy(*link);
13809		*link = NULL;
13810	}
13811}
13812
13813void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s)
13814{
13815	if (!s)
13816		return;
13817
13818	if (s->progs)
13819		bpf_object__detach_skeleton(s);
13820	if (s->obj)
13821		bpf_object__close(*s->obj);
13822	free(s->maps);
13823	free(s->progs);
13824	free(s);
13825}